xref: /openbsd/sys/netinet/tcp_usrreq.c (revision db3296cf)
1 /*	$OpenBSD: tcp_usrreq.c,v 1.71 2003/06/09 07:40:25 itojun Exp $	*/
2 /*	$NetBSD: tcp_usrreq.c,v 1.20 1996/02/13 23:44:16 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1988, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  *	@(#)COPYRIGHT	1.1 (NRL) 17 January 1995
33  *
34  * NRL grants permission for redistribution and use in source and binary
35  * forms, with or without modification, of the software and documentation
36  * created at NRL provided that the following conditions are met:
37  *
38  * 1. Redistributions of source code must retain the above copyright
39  *    notice, this list of conditions and the following disclaimer.
40  * 2. Redistributions in binary form must reproduce the above copyright
41  *    notice, this list of conditions and the following disclaimer in the
42  *    documentation and/or other materials provided with the distribution.
43  * 3. All advertising materials mentioning features or use of this software
44  *    must display the following acknowledgements:
45  * 	This product includes software developed by the University of
46  * 	California, Berkeley and its contributors.
47  * 	This product includes software developed at the Information
48  * 	Technology Division, US Naval Research Laboratory.
49  * 4. Neither the name of the NRL nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THE SOFTWARE PROVIDED BY NRL IS PROVIDED BY NRL AND CONTRIBUTORS ``AS
54  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
55  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
56  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL NRL OR
57  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
58  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
59  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
60  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
61  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
62  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
63  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
64  *
65  * The views and conclusions contained in the software and documentation
66  * are those of the authors and should not be interpreted as representing
67  * official policies, either expressed or implied, of the US Naval
68  * Research Laboratory (NRL).
69  */
70 
71 #include <sys/param.h>
72 #include <sys/systm.h>
73 #include <sys/mbuf.h>
74 #include <sys/socket.h>
75 #include <sys/socketvar.h>
76 #include <sys/protosw.h>
77 #include <sys/stat.h>
78 #include <sys/sysctl.h>
79 #include <sys/domain.h>
80 #include <sys/kernel.h>
81 
82 #include <net/if.h>
83 #include <net/route.h>
84 
85 #include <netinet/in.h>
86 #include <netinet/in_systm.h>
87 #include <netinet/in_var.h>
88 #include <netinet/ip.h>
89 #include <netinet/in_pcb.h>
90 #include <netinet/ip_var.h>
91 #include <netinet/tcp.h>
92 #include <netinet/tcp_fsm.h>
93 #include <netinet/tcp_seq.h>
94 #include <netinet/tcp_timer.h>
95 #include <netinet/tcp_var.h>
96 #include <netinet/tcpip.h>
97 #include <netinet/tcp_debug.h>
98 
99 /*
100  * TCP protocol interface to socket abstraction.
101  */
102 extern	char *tcpstates[];
103 extern	int tcptv_keep_init;
104 
105 extern int tcp_rst_ppslim;
106 
107 /* from in_pcb.c */
108 extern	struct baddynamicports baddynamicports;
109 
110 struct	inpcbtable tcbtable;
111 
112 int tcp_ident(void *, size_t *, void *, size_t);
113 
114 #ifdef INET6
115 int
116 tcp6_usrreq(so, req, m, nam, control, p)
117 	struct socket *so;
118 	int req;
119 	struct mbuf *m, *nam, *control;
120 	struct proc *p;
121 {
122 
123 	return tcp_usrreq(so, req, m, nam, control);
124 }
125 #endif
126 
127 /*
128  * Process a TCP user request for TCP tb.  If this is a send request
129  * then m is the mbuf chain of send data.  If this is a timer expiration
130  * (called from the software clock routine), then timertype tells which timer.
131  */
132 /*ARGSUSED*/
133 int
134 tcp_usrreq(so, req, m, nam, control)
135 	struct socket *so;
136 	int req;
137 	struct mbuf *m, *nam, *control;
138 {
139 	struct sockaddr_in *sin;
140 	register struct inpcb *inp;
141 	register struct tcpcb *tp = NULL;
142 	int s;
143 	int error = 0;
144 	int ostate;
145 
146 	if (req == PRU_CONTROL) {
147 #ifdef INET6
148 		if (sotopf(so) == PF_INET6)
149 			return in6_control(so, (u_long)m, (caddr_t)nam,
150 			    (struct ifnet *)control, 0);
151 		else
152 #endif /* INET6 */
153 			return (in_control(so, (u_long)m, (caddr_t)nam,
154 			    (struct ifnet *)control));
155 	}
156 	if (control && control->m_len) {
157 		m_freem(control);
158 		if (m)
159 			m_freem(m);
160 		return (EINVAL);
161 	}
162 
163 	s = splsoftnet();
164 	inp = sotoinpcb(so);
165 	/*
166 	 * When a TCP is attached to a socket, then there will be
167 	 * a (struct inpcb) pointed at by the socket, and this
168 	 * structure will point at a subsidary (struct tcpcb).
169 	 */
170 	if (inp == 0 && req != PRU_ATTACH) {
171 		splx(s);
172 		/*
173 		 * The following corrects an mbuf leak under rare
174 		 * circumstances
175 		 */
176 		if (m && (req == PRU_SEND || req == PRU_SENDOOB))
177 			m_freem(m);
178 		return (EINVAL);		/* XXX */
179 	}
180 	if (inp) {
181 		tp = intotcpcb(inp);
182 		/* WHAT IF TP IS 0? */
183 #ifdef KPROF
184 		tcp_acounts[tp->t_state][req]++;
185 #endif
186 		ostate = tp->t_state;
187 	} else
188 		ostate = 0;
189 	switch (req) {
190 
191 	/*
192 	 * TCP attaches to socket via PRU_ATTACH, reserving space,
193 	 * and an internet control block.
194 	 */
195 	case PRU_ATTACH:
196 		if (inp) {
197 			error = EISCONN;
198 			break;
199 		}
200 		error = tcp_attach(so);
201 		if (error)
202 			break;
203 		if ((so->so_options & SO_LINGER) && so->so_linger == 0)
204 			so->so_linger = TCP_LINGERTIME;
205 		tp = sototcpcb(so);
206 		break;
207 
208 	/*
209 	 * PRU_DETACH detaches the TCP protocol from the socket.
210 	 * If the protocol state is non-embryonic, then can't
211 	 * do this directly: have to initiate a PRU_DISCONNECT,
212 	 * which may finish later; embryonic TCB's can just
213 	 * be discarded here.
214 	 */
215 	case PRU_DETACH:
216 		tp = tcp_disconnect(tp);
217 		break;
218 
219 	/*
220 	 * Give the socket an address.
221 	 */
222 	case PRU_BIND:
223 #ifdef INET6
224 		if (inp->inp_flags & INP_IPV6)
225 			error = in6_pcbbind(inp, nam);
226 		else
227 #endif
228 			error = in_pcbbind(inp, nam);
229 		if (error)
230 			break;
231 		break;
232 
233 	/*
234 	 * Prepare to accept connections.
235 	 */
236 	case PRU_LISTEN:
237 		if (inp->inp_lport == 0) {
238 #ifdef INET6
239 			if (inp->inp_flags & INP_IPV6)
240 				error = in6_pcbbind(inp, NULL);
241 			else
242 #endif
243 				error = in_pcbbind(inp, NULL);
244 		}
245 		/* If the in_pcbbind() above is called, the tp->pf
246 		   should still be whatever it was before. */
247 		if (error == 0)
248 			tp->t_state = TCPS_LISTEN;
249 		break;
250 
251 	/*
252 	 * Initiate connection to peer.
253 	 * Create a template for use in transmissions on this connection.
254 	 * Enter SYN_SENT state, and mark socket as connecting.
255 	 * Start keep-alive timer, and seed output sequence space.
256 	 * Send initial segment on connection.
257 	 */
258 	case PRU_CONNECT:
259 		sin = mtod(nam, struct sockaddr_in *);
260 
261 #ifdef INET6
262 		if (sin->sin_family == AF_INET6) {
263 			struct in6_addr *in6_addr = &mtod(nam,
264 			    struct sockaddr_in6 *)->sin6_addr;
265 
266 			if (IN6_IS_ADDR_UNSPECIFIED(in6_addr) ||
267 			    IN6_IS_ADDR_MULTICAST(in6_addr) ||
268 			    (IN6_IS_ADDR_V4MAPPED(in6_addr) &&
269 			    ((in6_addr->s6_addr32[3] == INADDR_ANY) ||
270 			    IN_MULTICAST(in6_addr->s6_addr32[3]) ||
271 			    in_broadcast(sin->sin_addr, NULL)))) {
272 				error = EINVAL;
273 				break;
274 			}
275 
276 			if (inp->inp_lport == 0) {
277 				error = in6_pcbbind(inp, NULL);
278 				if (error)
279 					break;
280 			}
281 			error = in6_pcbconnect(inp, nam);
282 		} else if (sin->sin_family == AF_INET)
283 #endif /* INET6 */
284 		{
285 			if ((sin->sin_addr.s_addr == INADDR_ANY) ||
286 			    IN_MULTICAST(sin->sin_addr.s_addr) ||
287 			    in_broadcast(sin->sin_addr, NULL)) {
288 				error = EINVAL;
289 				break;
290 			}
291 
292 			/* Trying to connect to some broadcast address */
293 			if (in_broadcast(sin->sin_addr, NULL)) {
294 				error = EINVAL;
295 				break;
296 			}
297 
298 			if (inp->inp_lport == 0) {
299 				error = in_pcbbind(inp, NULL);
300 				if (error)
301 					break;
302 			}
303 			error = in_pcbconnect(inp, nam);
304 		}
305 
306 		if (error)
307 			break;
308 
309 		tp->t_template = tcp_template(tp);
310 		if (tp->t_template == 0) {
311 			in_pcbdisconnect(inp);
312 			error = ENOBUFS;
313 			break;
314 		}
315 
316 		so->so_state |= SS_CONNECTOUT;
317 		/* Compute window scaling to request.  */
318 		tcp_rscale(tp, so->so_rcv.sb_hiwat);
319 
320 		soisconnecting(so);
321 		tcpstat.tcps_connattempt++;
322 		tp->t_state = TCPS_SYN_SENT;
323 		TCP_TIMER_ARM(tp, TCPT_KEEP, tcptv_keep_init);
324 #ifdef TCP_COMPAT_42
325 		tp->iss = tcp_iss;
326 		tcp_iss += TCP_ISSINCR/2;
327 #else  /* TCP_COMPAT_42 */
328 		tp->iss = tcp_rndiss_next();
329 #endif /* !TCP_COMPAT_42 */
330 		tcp_sendseqinit(tp);
331 #if defined(TCP_SACK)
332 		tp->snd_last = tp->snd_una;
333 #endif
334 #if defined(TCP_SACK) && defined(TCP_FACK)
335 		tp->snd_fack = tp->snd_una;
336 		tp->retran_data = 0;
337 		tp->snd_awnd = 0;
338 #endif
339 		error = tcp_output(tp);
340 		break;
341 
342 	/*
343 	 * Create a TCP connection between two sockets.
344 	 */
345 	case PRU_CONNECT2:
346 		error = EOPNOTSUPP;
347 		break;
348 
349 	/*
350 	 * Initiate disconnect from peer.
351 	 * If connection never passed embryonic stage, just drop;
352 	 * else if don't need to let data drain, then can just drop anyways,
353 	 * else have to begin TCP shutdown process: mark socket disconnecting,
354 	 * drain unread data, state switch to reflect user close, and
355 	 * send segment (e.g. FIN) to peer.  Socket will be really disconnected
356 	 * when peer sends FIN and acks ours.
357 	 *
358 	 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
359 	 */
360 	case PRU_DISCONNECT:
361 		tp = tcp_disconnect(tp);
362 		break;
363 
364 	/*
365 	 * Accept a connection.  Essentially all the work is
366 	 * done at higher levels; just return the address
367 	 * of the peer, storing through addr.
368 	 */
369 	case PRU_ACCEPT:
370 #ifdef INET6
371 		if (inp->inp_flags & INP_IPV6)
372 			in6_setpeeraddr(inp, nam);
373 		else
374 #endif
375 			in_setpeeraddr(inp, nam);
376 		break;
377 
378 	/*
379 	 * Mark the connection as being incapable of further output.
380 	 */
381 	case PRU_SHUTDOWN:
382 		if (so->so_state & SS_CANTSENDMORE)
383 			break;
384 		socantsendmore(so);
385 		tp = tcp_usrclosed(tp);
386 		if (tp)
387 			error = tcp_output(tp);
388 		break;
389 
390 	/*
391 	 * After a receive, possibly send window update to peer.
392 	 */
393 	case PRU_RCVD:
394 		/*
395 		 * soreceive() calls this function when a user receives
396 		 * ancillary data on a listening socket. We don't call
397 		 * tcp_output in such a case, since there is no header
398 		 * template for a listening socket and hence the kernel
399 		 * will panic.
400 		 */
401 		if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) != 0)
402 			(void) tcp_output(tp);
403 		break;
404 
405 	/*
406 	 * Do a send by putting data in output queue and updating urgent
407 	 * marker if URG set.  Possibly send more data.
408 	 */
409 	case PRU_SEND:
410 		sbappendstream(&so->so_snd, m);
411 		error = tcp_output(tp);
412 		break;
413 
414 	/*
415 	 * Abort the TCP.
416 	 */
417 	case PRU_ABORT:
418 		tp = tcp_drop(tp, ECONNABORTED);
419 		break;
420 
421 	case PRU_SENSE:
422 		((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
423 		splx(s);
424 		return (0);
425 
426 	case PRU_RCVOOB:
427 		if ((so->so_oobmark == 0 &&
428 		    (so->so_state & SS_RCVATMARK) == 0) ||
429 		    so->so_options & SO_OOBINLINE ||
430 		    tp->t_oobflags & TCPOOB_HADDATA) {
431 			error = EINVAL;
432 			break;
433 		}
434 		if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
435 			error = EWOULDBLOCK;
436 			break;
437 		}
438 		m->m_len = 1;
439 		*mtod(m, caddr_t) = tp->t_iobc;
440 		if (((long)nam & MSG_PEEK) == 0)
441 			tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
442 		break;
443 
444 	case PRU_SENDOOB:
445 		if (sbspace(&so->so_snd) < -512) {
446 			m_freem(m);
447 			error = ENOBUFS;
448 			break;
449 		}
450 		/*
451 		 * According to RFC961 (Assigned Protocols),
452 		 * the urgent pointer points to the last octet
453 		 * of urgent data.  We continue, however,
454 		 * to consider it to indicate the first octet
455 		 * of data past the urgent section.
456 		 * Otherwise, snd_up should be one lower.
457 		 */
458 		sbappendstream(&so->so_snd, m);
459 		tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
460 		tp->t_force = 1;
461 		error = tcp_output(tp);
462 		tp->t_force = 0;
463 		break;
464 
465 	case PRU_SOCKADDR:
466 #ifdef INET6
467 		if (inp->inp_flags & INP_IPV6)
468 			in6_setsockaddr(inp, nam);
469 		else
470 #endif
471 			in_setsockaddr(inp, nam);
472 		break;
473 
474 	case PRU_PEERADDR:
475 #ifdef INET6
476 		if (inp->inp_flags & INP_IPV6)
477 			in6_setpeeraddr(inp, nam);
478 		else
479 #endif
480 			in_setpeeraddr(inp, nam);
481 		break;
482 
483 	default:
484 		panic("tcp_usrreq");
485 	}
486 	if (tp && (so->so_options & SO_DEBUG))
487 		tcp_trace(TA_USER, ostate, tp, (caddr_t)0, req, 0);
488 	splx(s);
489 	return (error);
490 }
491 
492 int
493 tcp_ctloutput(op, so, level, optname, mp)
494 	int op;
495 	struct socket *so;
496 	int level, optname;
497 	struct mbuf **mp;
498 {
499 	int error = 0, s;
500 	struct inpcb *inp;
501 	register struct tcpcb *tp;
502 	register struct mbuf *m;
503 	register int i;
504 
505 	s = splsoftnet();
506 	inp = sotoinpcb(so);
507 	if (inp == NULL) {
508 		splx(s);
509 		if (op == PRCO_SETOPT && *mp)
510 			(void) m_free(*mp);
511 		return (ECONNRESET);
512 	}
513 #ifdef INET6
514 	tp = intotcpcb(inp);
515 #endif /* INET6 */
516 	if (level != IPPROTO_TCP) {
517 		switch (so->so_proto->pr_domain->dom_family) {
518 #ifdef INET6
519 		case PF_INET6:
520 			error = ip6_ctloutput(op, so, level, optname, mp);
521 			break;
522 #endif /* INET6 */
523 		case PF_INET:
524 			error = ip_ctloutput(op, so, level, optname, mp);
525 			break;
526 		default:
527 			error = EAFNOSUPPORT;	/*?*/
528 			break;
529 		}
530 		splx(s);
531 		return (error);
532 	}
533 #ifndef INET6
534 	tp = intotcpcb(inp);
535 #endif /* !INET6 */
536 
537 	switch (op) {
538 
539 	case PRCO_SETOPT:
540 		m = *mp;
541 		switch (optname) {
542 
543 		case TCP_NODELAY:
544 			if (m == NULL || m->m_len < sizeof (int))
545 				error = EINVAL;
546 			else if (*mtod(m, int *))
547 				tp->t_flags |= TF_NODELAY;
548 			else
549 				tp->t_flags &= ~TF_NODELAY;
550 			break;
551 
552 		case TCP_MAXSEG:
553 			if (m == NULL || m->m_len < sizeof (int)) {
554 				error = EINVAL;
555 				break;
556 			}
557 
558 			i = *mtod(m, int *);
559 			if (i > 0 && i <= tp->t_maxseg)
560 				tp->t_maxseg = i;
561 			else
562 				error = EINVAL;
563 			break;
564 
565 #ifdef TCP_SACK
566 		case TCP_SACK_DISABLE:
567 			if (m == NULL || m->m_len < sizeof (int)) {
568 				error = EINVAL;
569 				break;
570 			}
571 
572 			if (TCPS_HAVEESTABLISHED(tp->t_state)) {
573 				error = EPERM;
574 				break;
575 			}
576 
577 			if (tp->t_flags & TF_SIGNATURE) {
578 				error = EPERM;
579 				break;
580 			}
581 
582 			if (*mtod(m, int *))
583 				tp->sack_disable = 1;
584 			else
585 				tp->sack_disable = 0;
586 			break;
587 #endif
588 #ifdef TCP_SIGNATURE
589 		case TCP_SIGNATURE_ENABLE:
590 			if (m == NULL || m->m_len < sizeof (int)) {
591 				error = EINVAL;
592 				break;
593 			}
594 
595 			if (TCPS_HAVEESTABLISHED(tp->t_state)) {
596 				error = EPERM;
597 				break;
598 			}
599 
600 			if (*mtod(m, int *)) {
601 				tp->t_flags |= TF_SIGNATURE;
602 #ifdef TCP_SACK
603 				tp->sack_disable = 1;
604 #endif /* TCP_SACK */
605 			} else
606 				tp->t_flags &= ~TF_SIGNATURE;
607 			break;
608 #endif /* TCP_SIGNATURE */
609 		default:
610 			error = ENOPROTOOPT;
611 			break;
612 		}
613 		if (m)
614 			(void) m_free(m);
615 		break;
616 
617 	case PRCO_GETOPT:
618 		*mp = m = m_get(M_WAIT, MT_SOOPTS);
619 		m->m_len = sizeof(int);
620 
621 		switch (optname) {
622 		case TCP_NODELAY:
623 			*mtod(m, int *) = tp->t_flags & TF_NODELAY;
624 			break;
625 		case TCP_MAXSEG:
626 			*mtod(m, int *) = tp->t_maxseg;
627 			break;
628 #ifdef TCP_SACK
629 		case TCP_SACK_DISABLE:
630 			*mtod(m, int *) = tp->sack_disable;
631 			break;
632 #endif
633 		default:
634 			error = ENOPROTOOPT;
635 			break;
636 		}
637 		break;
638 	}
639 	splx(s);
640 	return (error);
641 }
642 
643 #ifndef TCP_SENDSPACE
644 #define	TCP_SENDSPACE	1024*16
645 #endif
646 u_int	tcp_sendspace = TCP_SENDSPACE;
647 #ifndef TCP_RECVSPACE
648 #define	TCP_RECVSPACE	1024*16
649 #endif
650 u_int	tcp_recvspace = TCP_RECVSPACE;
651 
652 /*
653  * Attach TCP protocol to socket, allocating
654  * internet protocol control block, tcp control block,
655  * bufer space, and entering LISTEN state if to accept connections.
656  */
657 int
658 tcp_attach(so)
659 	struct socket *so;
660 {
661 	register struct tcpcb *tp;
662 	struct inpcb *inp;
663 	int error;
664 
665 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
666 		error = soreserve(so, tcp_sendspace, tcp_recvspace);
667 		if (error)
668 			return (error);
669 	}
670 	error = in_pcballoc(so, &tcbtable);
671 	if (error)
672 		return (error);
673 	inp = sotoinpcb(so);
674 	tp = tcp_newtcpcb(inp);
675 	if (tp == NULL) {
676 		int nofd = so->so_state & SS_NOFDREF;	/* XXX */
677 
678 		so->so_state &= ~SS_NOFDREF;	/* don't free the socket yet */
679 		in_pcbdetach(inp);
680 		so->so_state |= nofd;
681 		return (ENOBUFS);
682 	}
683 	tp->t_state = TCPS_CLOSED;
684 #ifdef INET6
685 	/* we disallow IPv4 mapped address completely. */
686 	if (inp->inp_flags & INP_IPV6)
687 		tp->pf = PF_INET6;
688 	else
689 		tp->pf = PF_INET;
690 #else
691 	tp->pf = PF_INET;
692 #endif
693 	return (0);
694 }
695 
696 /*
697  * Initiate (or continue) disconnect.
698  * If embryonic state, just send reset (once).
699  * If in ``let data drain'' option and linger null, just drop.
700  * Otherwise (hard), mark socket disconnecting and drop
701  * current input data; switch states based on user close, and
702  * send segment to peer (with FIN).
703  */
704 struct tcpcb *
705 tcp_disconnect(tp)
706 	register struct tcpcb *tp;
707 {
708 	struct socket *so = tp->t_inpcb->inp_socket;
709 
710 	if (TCPS_HAVEESTABLISHED(tp->t_state) == 0)
711 		tp = tcp_close(tp);
712 	else if ((so->so_options & SO_LINGER) && so->so_linger == 0)
713 		tp = tcp_drop(tp, 0);
714 	else {
715 		soisdisconnecting(so);
716 		sbflush(&so->so_rcv);
717 		tp = tcp_usrclosed(tp);
718 		if (tp)
719 			(void) tcp_output(tp);
720 	}
721 	return (tp);
722 }
723 
724 /*
725  * User issued close, and wish to trail through shutdown states:
726  * if never received SYN, just forget it.  If got a SYN from peer,
727  * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
728  * If already got a FIN from peer, then almost done; go to LAST_ACK
729  * state.  In all other cases, have already sent FIN to peer (e.g.
730  * after PRU_SHUTDOWN), and just have to play tedious game waiting
731  * for peer to send FIN or not respond to keep-alives, etc.
732  * We can let the user exit from the close as soon as the FIN is acked.
733  */
734 struct tcpcb *
735 tcp_usrclosed(tp)
736 	register struct tcpcb *tp;
737 {
738 
739 	switch (tp->t_state) {
740 
741 	case TCPS_CLOSED:
742 	case TCPS_LISTEN:
743 	case TCPS_SYN_SENT:
744 		tp->t_state = TCPS_CLOSED;
745 		tp = tcp_close(tp);
746 		break;
747 
748 	case TCPS_SYN_RECEIVED:
749 	case TCPS_ESTABLISHED:
750 		tp->t_state = TCPS_FIN_WAIT_1;
751 		break;
752 
753 	case TCPS_CLOSE_WAIT:
754 		tp->t_state = TCPS_LAST_ACK;
755 		break;
756 	}
757 	if (tp && tp->t_state >= TCPS_FIN_WAIT_2) {
758 		soisdisconnected(tp->t_inpcb->inp_socket);
759 		/*
760 		 * If we are in FIN_WAIT_2, we arrived here because the
761 		 * application did a shutdown of the send side.  Like the
762 		 * case of a transition from FIN_WAIT_1 to FIN_WAIT_2 after
763 		 * a full close, we start a timer to make sure sockets are
764 		 * not left in FIN_WAIT_2 forever.
765 		 */
766 		if (tp->t_state == TCPS_FIN_WAIT_2)
767 			TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_maxidle);
768 	}
769 	return (tp);
770 }
771 
772 /*
773  * Look up a socket for ident..
774  */
775 int
776 tcp_ident(oldp, oldlenp, newp, newlen)
777 	void *oldp;
778 	size_t *oldlenp;
779 	void *newp;
780 	size_t newlen;
781 {
782 	int error = 0, s;
783 	struct tcp_ident_mapping tir;
784 	struct inpcb *inp;
785 	struct sockaddr_in *fin, *lin;
786 #ifdef INET6
787 	struct sockaddr_in6 *fin6, *lin6;
788 	struct in6_addr f6, l6;
789 #endif
790 
791 	if (oldp == NULL || newp != NULL || newlen != 0)
792 		return (EINVAL);
793 	if  (*oldlenp < sizeof(tir))
794 		return (ENOMEM);
795 	if ((error = copyin(oldp, &tir, sizeof (tir))) != 0 )
796 		return (error);
797 	switch (tir.faddr.ss_family) {
798 #ifdef INET6
799 	case AF_INET6:
800 		fin6 = (struct sockaddr_in6 *)&tir.faddr;
801 		error = in6_embedscope(&f6, fin6, NULL, NULL);
802 		if (error)
803 			return EINVAL;	/*?*/
804 		lin6 = (struct sockaddr_in6 *)&tir.laddr;
805 		error = in6_embedscope(&l6, lin6, NULL, NULL);
806 		if (error)
807 			return EINVAL;	/*?*/
808 		break;
809 #endif
810 	case AF_INET:
811 	  	fin = (struct sockaddr_in *)&tir.faddr;
812 		lin = (struct sockaddr_in *)&tir.laddr;
813 		break;
814 	default:
815 		return (EINVAL);
816 	}
817 
818 	s = splsoftnet();
819 	switch (tir.faddr.ss_family) {
820 	case AF_INET6:
821 #ifdef INET6
822 		inp = in6_pcbhashlookup(&tcbtable, &f6,
823 		    fin6->sin6_port, &l6, lin6->sin6_port);
824 		break;
825 #endif
826 	case AF_INET:
827 		inp = in_pcbhashlookup(&tcbtable,  fin->sin_addr,
828 		    fin->sin_port, lin->sin_addr, lin->sin_port);
829 		break;
830 	}
831 
832 	if (inp == NULL) {
833 		++tcpstat.tcps_pcbhashmiss;
834 		switch (tir.faddr.ss_family) {
835 #ifdef INET6
836 		case AF_INET6:
837 			inp = in_pcblookup(&tcbtable, &f6,
838 			    fin6->sin6_port, &l6, lin6->sin6_port,
839 			    INPLOOKUP_WILDCARD | INPLOOKUP_IPV6);
840 			break;
841 #endif
842 		case AF_INET:
843 			inp = in_pcblookup(&tcbtable, &fin->sin_addr,
844 			    fin->sin_port, &lin->sin_addr, lin->sin_port,
845 			    INPLOOKUP_WILDCARD);
846 			break;
847 		}
848 	}
849 
850 	if (inp != NULL && (inp->inp_socket->so_state & SS_CONNECTOUT)) {
851 		tir.ruid = inp->inp_socket->so_ruid;
852 		tir.euid = inp->inp_socket->so_euid;
853 	} else {
854 		tir.ruid = -1;
855 		tir.euid = -1;
856 	}
857 	splx(s);
858 
859 	*oldlenp = sizeof (tir);
860 	error = copyout((void *)&tir, oldp, sizeof (tir));
861 	return (error);
862 }
863 
864 /*
865  * Sysctl for tcp variables.
866  */
867 int
868 tcp_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
869 	int *name;
870 	u_int namelen;
871 	void *oldp;
872 	size_t *oldlenp;
873 	void *newp;
874 	size_t newlen;
875 {
876 
877 	/* All sysctl names at this level are terminal. */
878 	if (namelen != 1)
879 		return (ENOTDIR);
880 
881 	switch (name[0]) {
882 	case TCPCTL_RFC1323:
883 		return (sysctl_int(oldp, oldlenp, newp, newlen,
884 		    &tcp_do_rfc1323));
885 #ifdef TCP_SACK
886 	case TCPCTL_SACK:
887 		return (sysctl_int(oldp, oldlenp, newp, newlen,
888 		    &tcp_do_sack));
889 #endif
890 	case TCPCTL_MSSDFLT:
891 		return (sysctl_int(oldp, oldlenp, newp, newlen,
892 		    &tcp_mssdflt));
893 	case TCPCTL_KEEPINITTIME:
894 		return (sysctl_int(oldp, oldlenp, newp, newlen,
895 		    &tcptv_keep_init));
896 
897 	case TCPCTL_KEEPIDLE:
898 		return (sysctl_int(oldp, oldlenp, newp, newlen,
899 		    &tcp_keepidle));
900 
901 	case TCPCTL_KEEPINTVL:
902 		return (sysctl_int(oldp, oldlenp, newp, newlen,
903 		    &tcp_keepintvl));
904 
905 	case TCPCTL_SLOWHZ:
906 		return (sysctl_rdint(oldp, oldlenp, newp, PR_SLOWHZ));
907 
908 	case TCPCTL_BADDYNAMIC:
909 		return (sysctl_struct(oldp, oldlenp, newp, newlen,
910 		    baddynamicports.tcp, sizeof(baddynamicports.tcp)));
911 
912 	case TCPCTL_RECVSPACE:
913 		return (sysctl_int(oldp, oldlenp, newp, newlen,&tcp_recvspace));
914 
915 	case TCPCTL_SENDSPACE:
916 		return (sysctl_int(oldp, oldlenp, newp, newlen,&tcp_sendspace));
917 	case TCPCTL_IDENT:
918 		return (tcp_ident(oldp, oldlenp, newp, newlen));
919 	case TCPCTL_RSTPPSLIMIT:
920 		return (sysctl_int(oldp, oldlenp, newp, newlen,
921 		    &tcp_rst_ppslim));
922 	case TCPCTL_ACK_ON_PUSH:
923 		return (sysctl_int(oldp, oldlenp, newp, newlen,
924 		    &tcp_ack_on_push));
925 #ifdef TCP_ECN
926 	case TCPCTL_ECN:
927 		return (sysctl_int(oldp, oldlenp, newp, newlen,
928 		   &tcp_do_ecn));
929 #endif
930 	default:
931 		return (ENOPROTOOPT);
932 	}
933 	/* NOTREACHED */
934 }
935