1 #ifdef	RCSIDENT
2 static char rcsident[] = "$Header: tcp_usrreq.c,v 1.30 85/07/31 09:43:43 walsh Exp $";
3 #endif RCSIDENT
4 
5 #include "../h/param.h"
6 #include "../h/systm.h"
7 #include "../h/mbuf.h"
8 #include "../h/socket.h"
9 #include "../h/socketvar.h"
10 #include "../h/protosw.h"
11 #include "../h/errno.h"
12 #include "../h/ioctl.h"
13 #include "../h/time.h"
14 #include "../h/kernel.h"
15 
16 #include "../net/if.h"
17 #include "../net/route.h"
18 
19 #include "../bbnnet/in.h"
20 #include "../bbnnet/in_var.h"
21 #include "../bbnnet/in_pcb.h"
22 #include "../bbnnet/net.h"
23 #include "../bbnnet/fsm.h"
24 #include "../bbnnet/tcp.h"
25 #include "../bbnnet/ip.h"
26 #include "../bbnnet/icmp.h"
27 #include "../bbnnet/macros.h"
28 #include "../bbnnet/sws.h"
29 
30 /*
31  * TCP protocol interface to socket abstraction.
32  */
33 
34 #ifdef GPROF
35 int	tcp_acounts[TCP_NSTATES][PRU_NREQ];
36 #endif
37 
38 extern tcp_pcbdisconnect();
39 extern tcp_binding_used();
40 
41 struct inpcb tcp;
42 struct tcp_stat tcpstat;
43 sequence tcp_iss;		/* tcp initial send seq # */
44 
45 struct dfilter tcp_dfilter;
46 
47 struct pr_advice tcp_advice =
48 {
49     TCP_RESERVED,	/* application reserved */
50     TCP_USERRESERVED,	/* user reserved */
51     TCP_MAXPORT,	/* max port */
52     TCP_USERRESERVED+1,	/* random last used */
53     sizeof(u_short),	/* port size */
54     tcp_binding_used,	/* confirmation routine */
55 } ;
56 
57 dowedebug(inp, so, filter)
58 register struct inpcb	*inp;
59 struct socket	*so;
60 register struct dfilter *filter;
61 {
62     register int		 count;
63 
64     count = 0;
65     if (inp->inp_faddr.s_addr == filter->foreign_host.s_addr)
66 	count ++;
67     if (inp->inp_fport == filter->foreign_port)
68 	count ++;
69     if (inp->inp_laddr.s_addr == filter->local_host.s_addr)
70 	count ++;
71     if (inp->inp_lport == filter->local_port)
72 	count ++;
73 
74     if (count >= filter->matches)
75 	so->so_options |= SO_DEBUG;
76 }
77 
78 int tcp_noact = 0; /* patchable */
79 
80 /*
81  * Allocate and initialize a new TCB
82  * tcp_usrreq calls tcp_attach calls us.  tcp_usrreq splnet()'s
83  */
84 struct tcpcb *tcp_newtcpcb(inp)
85 register struct inpcb *inp;
86 {
87     register struct tcpcb  *tp;
88     register struct mbuf   *m;
89 
90     m = m_getclr(M_WAIT, MT_PCB);
91     if (m == NULL)
92 	return(NULL);
93     tp = mtod(m, struct tcpcb *);
94 
95     /* initialize non-zero tcb fields */
96 
97     tp->t_rcv_next	= (struct th *)tp;
98     tp->t_rcv_prev	= (struct th *)tp;
99     /*
100      * Don't start off assuming minimum srtt/rxmitime.  If we do, and
101      * TCP_tvRXMIN is small and we decide to communicate over a
102      * reliable, but slow, network then we may not find true values for
103      * these.  We may assume an ACK was for a retransmission that
104      * we're measuring the srtt of, not the original packet.
105      *
106      * Instead, start high and approach from above in a deterministic
107      * fashion.  We should get close to the right values fairly rapidly.
108      *
109      * 7/85: start from above by special casing first round trip time
110      * measurement.  If srtt == 0, do not reset rtt, and do not use
111      * weighted averaging.  srtt starts as time to ack(xmit [+ rxmit...])
112      * and then gets smoothed with new round trip times.  This compromise
113      * for getting to long-term srtt more quickly on LANs should work
114      * on the Internet as well.  It will only hurt Internet connections
115      * if packet loss is high, and even then would only slow getting
116      * to long term srtt.
117      * This method can be turned off by initializing srtt with a non-zero
118      * value.
119      */
120     /* tp->t_srtt   = TCP_tvMAXSRTT; */
121     tp->t_rxmitime	= TCP_tvMAXSRTT + 1;
122     tp->t_rttltimeo	= TCP_tvRTTL;
123     tp->t_xmt_val = tp->snd_end = tp->seq_fin = tp->snd_nxt =
124 	tp->snd_hi = tp->snd_una = tp->iss = tcp_iss;
125     tcp_iss += ISSINCR;
126 
127     /*
128      * Imitate Berkeley code by setting push as a default.  This should
129      * increase compatibility at the user code level.
130      */
131     tp->t_push	 = TRUE;
132 
133     /*
134      * Berkeley 4.2 code sends a data byte beyond the window's edge to see
135      * if the other end is up.  If other end does not respond, connection
136      * times out and aborts.  This is dangerous since the byte may make its
137      * way into the input stream if the recipient is coded keeping in mind
138      * how expensive packets are.
139      *
140      * We'll provide for an optional method to send a well formed ack that
141      * will catch remote failure and generate a tcp reset.  Note that we
142      * don't care if the other end ignores the ack; we only hope for a well
143      * coded tcp to respond with a reset in the right circumstances.  This
144      * sort of handshaking/probing should really be done at the application
145      * level, but not all specs (eg., SMTP) provide for such a noop.
146      *
147      * Optional, since some networks charge for packets and since some might
148      * see this as unecessary traffic.
149      *
150      * also see tcp_ioctl()
151      */
152     if (tp->t_noact = tcp_noact)
153 	tp->t_noactprobe = TRUE;
154 
155     /* attach the tcpcb to the in_pcb */
156 
157     inp->inp_ppcb = (caddr_t)tp;
158     tp->t_in_pcb = inp;
159 
160     return(tp);
161 }
162 
163 /*
164  * Is a tcp port/address pair already in use by some socket on this machine?
165  * Passed to in_pcbbind() to help it find a port/address binding
166  * that is unique for tcp.
167  */
168 int tcp_binding_used(inp, lport, lsaddr, reuselocal)
169 struct inpcb   *inp;
170 u_short	lport;
171 u_long	lsaddr;
172 {
173     register struct inpcb *i;
174 
175     for (i = tcp.inp_next; i != &tcp; i = i->inp_next)
176     {
177 	/*
178 	 * Since our inpcb is in this linked list, don't want to know
179 	 * if we, ourselves, are already using this binding.
180 	 */
181 	if (i != inp)
182 	    if (i->inp_lport == lport)
183 		/*
184 		 * Our/His address is unbound (INADDR_ANY) iff
185 		 * not yet connected to foreign host.
186 		 */
187 		if ((i->inp_laddr.s_addr == lsaddr) ||
188 		    (i->inp_laddr.s_addr == INADDR_ANY) ||
189 		    (lsaddr == INADDR_ANY))
190 		{
191 		    if (!reuselocal)
192 			break;
193 		    if (i->inp_faddr.s_addr == INADDR_ANY)
194 			/*
195 			 * We're both waiting for foreign
196 			 * connection.  Could only re-use if
197 			 * he was already connected.
198 			 */
199 			break;
200 		}
201     }
202     return (i != &tcp);
203 }
204 
205 /*
206  * returns a (struct tcpcb *) cast to a (char *).  This is
207  * so in_pcbconnect() can correctly handle return value. All
208  * other uses promptly cast back.
209  */
210 
211 char *tcp_conn_used(inp, lport, lsaddr, fport, fsaddr)
212 struct inpcb   *inp;
213 u_short	lport;
214 u_long	lsaddr;
215 u_short	fport;
216 u_long	fsaddr;
217 {
218     register struct inpcb *i;
219 
220     for (i = tcp.inp_next; i != &tcp; i = i->inp_next)
221     {
222 	/*
223 	 * Since our inpcb is in this linked list, don't want to know
224 	 * if we, ourselves, are already using this connetion.
225 	 */
226 	if (i != inp)
227 	    if ((i->inp_lport == lport) &&
228 		(i->inp_fport == fport) &&
229 		(i->inp_laddr.s_addr == lsaddr) &&
230 		(i->inp_faddr.s_addr == fsaddr))
231 		    return((char *)i->inp_ppcb);
232     }
233     return ((char *) NULL);
234 }
235 
236 tcp_ioctl (tp, command, data)
237 struct tcpcb *tp;
238 int command;
239 caddr_t	data;
240 {
241     switch (command)
242     {
243 	/* push */
244       case SIOCSPUSH:
245 	tp->t_push = TRUE;
246 	break;
247 
248       case SIOCCPUSH:
249 	tp->t_push = FALSE;
250 	break;
251 
252 	/* no activity timer */
253       case SIOCSNOACT:
254 	{
255 	u_long	value;
256 
257 	value = *((u_long *) data);
258 	/*
259 	 * A shutdown socket should still be able to request some sort of
260 	 * check on the status of the remote end.  Also see tcp_newtcpcb().
261 	 */
262 	tp->t_noactprobe = (value & TCP_NOACTPROBE) ? TRUE : FALSE;
263 	tp->t_noactsig = (value & TCP_NOACTSIG) ? TRUE : FALSE;
264 
265 	if ((tp->t_state <= ESTAB) || (tp->t_state == CLOSE_WAIT))
266 	{
267 	    /* don't interfere with system use of timer */
268 	    value &= ~(TCP_NOACTPROBE|TCP_NOACTSIG);
269 	    tp->t_noact = MIN (MAX_TCPTIMERVAL, value);
270 	    tp->t_timers[TNOACT] = tp->t_noact;
271 	}
272 	}
273 	break;
274 
275       case SIOCGNOACT:
276 	{
277 	u_long	value;
278 
279 	value = tp->t_noact;
280 	if (tp->t_noactprobe)
281 	    value |= TCP_NOACTPROBE;
282 	if (tp->t_noactsig)
283 	    value |= TCP_NOACTSIG;
284 
285 	*((u_long *) data) = value;
286 	}
287 	break;
288 
289 	/* init timer */
290       case SIOCSINIT:
291 	tp->t_itimeo = MIN (MAX_TCPTIMERVAL, *((unsigned *) data));
292 	break;
293 
294       case SIOCGINIT:
295 	*((int *) data) = tp->t_itimeo;
296 	break;
297 
298 	/* retransmit took too long timer */
299       case SIOCSRTTL:
300 	tp->t_rttltimeo = MIN (MAX_TCPTIMERVAL, *((unsigned *) data));
301 	break;
302 
303       case SIOCGRTTL:
304 	*((int *) data) = tp->t_rttltimeo;
305 	break;
306 
307       case SIOCABORT:
308 	{
309 	    struct socket *so;
310 
311 	    /* there really should be a generic way for
312 	     * a user to get to soabort()
313 	     */
314 
315 	    tp->usr_abort = TRUE;
316 	    /*
317 	     * Just in case asked to abort a LISTENing socket,
318 	     * Don't leave unattached, unaccepted connections.
319 	     */
320 	    so = tp->t_in_pcb->inp_socket;
321 	    while (so->so_q0 && (so->so_q0 != so))
322 		(void) soabort(so->so_q0);
323 	    while (so->so_q  && (so->so_q  != so))
324 		(void) soabort(so->so_q);
325 
326 	    w_alloc(IUABORT, 0, tp, tp->t_in_pcb);
327 	}
328 	break;
329 
330       default:
331 	/* not our ioctl, let lower level try ioctl */
332 	return ip_ioctl (tp->t_in_pcb, command, data);
333     }
334 
335     return (0);
336 }
337 
338 
339 /*
340  * Process a TCP user request for TCP tb.  If this is a send request
341  * then m is the mbuf chain of send data.  If this is a timer expiration
342  * (called from the software clock routine), then timertype tells which timer.
343  */
344 /*ARGSUSED*/
345 tcp_usrreq(so, req, m, nam, rights)
346 struct socket *so;
347 int req;
348 struct mbuf *m, *nam, *rights;
349 {
350     register struct inpcb *inp;
351     register struct tcpcb *tp;
352     register int s;
353     register int act, newstate;
354     int error = 0;
355 
356     s = splnet();
357     inp = sotoinpcb(so);
358 
359     /* keep in mind call from ifioctl() */
360     if (rights && req != PRU_CONTROL)
361     {
362 	if (rights->m_len)
363 	{
364 	    splx(s);
365 	    return (EINVAL);
366 	}
367     }
368     /*
369      * When a TCP is attached to a socket, then there will be
370      * a (struct inpcb) pointed at by the socket, and this
371      * structure will point at a subsidary (struct tcpcb).
372      */
373     if (inp == NULL && req != PRU_ATTACH)
374     {
375 	splx(s);
376 	return (EINVAL);	/* XXX */
377     }
378     if (inp)
379     {
380 	tp = inptotcpcb(inp);
381 	/* WHAT IF TP IS 0? */
382 #ifdef GPROF
383 	tcp_acounts[tp->t_state][req]++;
384 #endif
385     }
386 
387     /*
388      * This switch becomes a 'caseb', so put common ones at top.
389      */
390     switch (req)
391     {
392 
393       case PRU_RCVD:
394 	/*
395 	 * After a receive, possibly send window update to peer.
396 	 */
397 	W_ALLOC(IURECV, 0, tp, NULL, so, act, newstate);
398 	break;
399 
400       case PRU_SEND:
401 	/*
402 	 * Do a send by initiating the proper entry to the FSM.
403 	 * Don't let urgent continue.
404 	 */
405 	tp->t_urg = FALSE;
406 	W_ALLOC(IUSEND, 0, tp, m, so, act, newstate);
407 	break;
408 
409 	/*
410 	 * TCP attaches to socket via PRU_ATTACH, reserving space,
411 	 * and an internet control block.
412 	 */
413       case PRU_ATTACH:
414 	if (inp)
415 	{
416 	    error = EISCONN;
417 	    break;
418 	}
419 	error = tcp_attach(so);
420 	if (error)
421 	    break;
422 	if ((so->so_options & SO_LINGER) && so->so_linger == 0)
423 	    so->so_linger = T_LINGERTIME;
424 	tp = sototcpcb(so);
425 	break;
426 
427 	/*
428 	 * PRU_DETACH detaches the TCP protocol from the socket.
429 	 * This is only done after SO_ISCONNECTED has been cleared.
430 	 */
431       case PRU_DETACH:
432 	tcp_disconnect(tp);
433 	break;
434 
435 	/*
436 	 * Give the socket an address.
437 	 */
438       case PRU_BIND:
439 	error = in_pcbbind(inp, nam, &tcp_advice);
440 	break;
441 
442 	/*
443 	 * Prepare to accept connections.
444 	 */
445       case PRU_LISTEN:
446 	if (inp->inp_lport == 0)
447 	    error = in_pcbbind(inp, (struct mbuf *)0, &tcp_advice);
448 	if (error == 0)
449 	    w_alloc(IUOPENA, 0, tp, NULL);
450 	break;
451 
452 	/*
453 	 * Initiate connection to peer.
454 	 * Bind the local end if not already.
455 	 * Set the routing.
456 	 * Crank up the TCP state machine.
457 	 */
458       case PRU_CONNECT:
459 	{
460 	    struct in_addr laddr;
461 
462 	    laddr = inp->inp_laddr;
463 	    if (inp->inp_lport == 0)
464 	    {
465 		error = in_pcbbind(inp, (struct mbuf *)0, &tcp_advice);
466 		if (error)
467 		    break;
468 	    }
469 	    error = in_pcbconnect(inp, nam, tcp_conn_used);
470 	    if (error)
471 		break;
472 
473 	    if (in_broadcast(inp->inp_faddr))
474 	    {
475 		in_pcbdisconnect (inp, tcp_pcbdisconnect);
476 		inp->inp_laddr = laddr;
477 		error = EADDRNOTAVAIL;
478 		break;
479 	    }
480 
481 	    if (! (tp->t_template = tcp_template(tp)))
482 	    {
483 		in_pcbdisconnect (inp, tcp_pcbdisconnect);
484 		inp->inp_laddr = laddr;
485 		error = ENOBUFS;
486 		break;
487 	    }
488 
489 	    tp->sws_qff = SWS_QFF_DEF;
490 
491 	    /*
492 	     * So can debug connection problems without having to change
493 	     * every program or apply debugging flag to each program every
494 	     * time run it.
495 	     */
496 	    dowedebug(inp, so, &tcp_dfilter);
497 
498 	    soisconnecting(so);
499 	    w_alloc(IUOPENR, 0, tp, NULL);
500 	}
501 	break;
502 
503 	/*
504 	 * Create a TCP connection between two sockets.
505 	 */
506       case PRU_CONNECT2:
507 	error = EOPNOTSUPP;
508 	break;
509 
510 	/*
511 	 * Initiate disconnect from peer.
512 	 * If connection never passed embryonic stage, just drop;
513 	 * else if don't need to let data drain, then can just drop anyways,
514 	 * else have to begin TCP shutdown process: mark socket disconnecting,
515 	 * drain unread data, state switch to reflect user close, and
516 	 * send segment (e.g. FIN) to peer.  Socket will be really disconnected
517 	 * when peer sends FIN and acks ours.
518 	 */
519       case PRU_DISCONNECT:
520 	tcp_disconnect(tp);
521 	break;
522 
523 	/*
524 	 * Accept a connection.  Essentially all the work is
525 	 * done at higher levels; just return the address
526 	 * of the peer, storing through addr.
527 	 *
528 	 * BBN-NOTE: upper levels do all the waiting;  this stays the same.
529 	 */
530       case PRU_ACCEPT:
531 	{
532 	    struct sockaddr_in *sin = mtod(nam, struct sockaddr_in *);
533 
534 	    nam->m_len = sizeof (struct sockaddr_in);
535 	    sin->sin_family = AF_INET;
536 	    sin->sin_port = inp->inp_fport;
537 	    sin->sin_addr = inp->inp_faddr;
538 	    break;
539 	}
540 
541 	/*
542 	 * Mark the connection as being incapable of further output.
543 	 */
544       case PRU_SHUTDOWN:
545 	socantsendmore(so);
546 	if (! tp->usr_closed)
547 	    w_alloc(IUCLOSE, 0, tp, inp);
548 	break;
549 
550 	/*
551 	 * Abort the TCP.
552 	 */
553       case PRU_ABORT:
554 	w_alloc(IUABORT, 0, tp, inp);
555 	break;
556 
557       case PRU_CONTROL:
558 	error = tcp_ioctl(tp, (int) m, (caddr_t) nam);
559 	break;
560 
561 
562 /* SOME AS YET UNIMPLEMENTED HOOKS */
563       case PRU_SENSE:
564 	error = EOPNOTSUPP;
565 	break;
566 /* END UNIMPLEMENTED HOOKS */
567 
568       case PRU_RCVOOB:
569 
570 	{
571 	    int	desired;
572 
573 	    if (so->so_oobmark == 0 && (so->so_state & SS_RCVATMARK) == 0)
574 	    {
575 		error = EINVAL;
576 		break;
577 	    }
578 	    if (tp->oob_data == NULL)
579 	    {
580 		error = EWOULDBLOCK;
581 		break;
582 	    }
583 	    desired = *(mtod(m, int *));
584 
585 	    while ((desired > 0) && (tp->oob_data))
586 	    {
587 		char	*p;
588 		unsigned count;
589 
590 		p = mtod(m, caddr_t);
591 		count = MIN(desired, tp->oob_data->m_len);
592 		count = MIN(count, MLEN);
593 		bcopy(mtod(tp->oob_data, caddr_t), p, count);
594 		m->m_len = count;
595 		desired -= count;
596 
597 		tp->oob_data->m_len -= count;
598 		tp->oob_data->m_off += count;
599 		if (tp->oob_data->m_len <= 0)
600 		    tp->oob_data = m_free(tp->oob_data);
601 
602 		if ((desired > 0) && (tp->oob_data))
603 		{
604 		    m->m_next = m_get(M_WAIT, MT_DATA);
605 		    m = m->m_next;
606 		}
607 	    }
608 
609 	}
610 	break;
611 
612       case PRU_SENDOOB:
613 	/*
614 	 * allows up to MAX_TCPOOB bytes of out of band data
615 	 * even if user has used up all his allocated space.
616 	 */
617 	if (sbspace(&so->so_snd) < (- MAX_TCPOOB))
618 	{
619 	    m_freem(m);
620 	    error = ENOBUFS;
621 	    break;
622 
623 	}
624 	tp->t_urg = TRUE;
625 	w_alloc(IUSEND, 0, tp, m);
626 	break;
627 
628 	/*
629 	 * Return the address of this socket (local-side binding)
630 	 */
631       case PRU_SOCKADDR:
632 	in_setsockaddr(inp, nam);
633 	break;
634 
635       case PRU_PEERADDR:
636 	in_setpeeraddr(inp, nam);
637 	break;
638 
639 	/*
640 	 * TCP slow timer went off; run down all those timers.
641 	 */
642       case PRU_SLOWTIMO:
643 	tcp_timeo();
644 	break;
645 
646       default:
647 	panic("tcp_usrreq");
648     }
649     splx(s);
650     return (error);
651 }
652 
653 /*
654  * getsockopt() / setsockopt()
655  */
656 tcp_ctloutput (req,so,level,optname,optval)
657 int req;
658 struct socket *so;
659 int level, optname;
660 struct mbuf **optval;
661 {
662     int s = splnet(); /* like PRU/packet/timer entry into net code */
663     int error;
664     struct inpcb *inp;
665 
666     /*
667      * possibly for us?
668      * Follow Berkeley methods: level is protocol number if meant for the
669      * protocol layer.  (Why not say if=0, arp=1, ip=2, udp/tcp/rdp=3....?)
670      *
671      * Problem: tcp needs to know about IP options in order to use right
672      * maxseg.  This doesn't quite work with the layering.
673      *
674      * Why not combine ioctl/setsockopt/getsockopt paths, since ioctl can be
675      * seen as fixed size sockopt- tried at BBN; removed for 4.3
676      */
677 
678     /* should be "mature" socket so pointers all valid... */
679     inp = sotoinpcb(so);
680 
681     switch(req)
682     {
683 	case PRCO_GETOPT:
684 	    error = tcp_getopt (inp, optname, optval);
685 	    break;
686 
687 	case PRCO_SETOPT:
688 	    error = tcp_setopt (inp, optname, optval);
689 	    break;
690 
691 	default:
692 	    panic("tcp_ctloutput");
693     }
694 
695     splx(s);
696     return (error);
697 }
698 
699 tcp_getopt (inp, command, data)
700 struct inpcb	*inp;
701 struct mbuf	**data;
702 {
703     /*
704      * no TCP specific options accessed by getsockopt() as yet.
705      * let lower level at cmd
706      */
707     return ip_getopt (inp, command, data);
708 }
709 
710 tcp_setopt (inp, command, data)
711 struct inpcb	*inp;
712 struct mbuf	**data;
713 {
714     int error;
715     struct tcpcb *tp;
716 
717     /* no TCP specific options accessed by setsockopt() as yet */
718     tp = inptotcpcb(inp);
719 
720     if (command == SO_IPROUTE)
721 	tp->t_maxseg += inp->inp_optlen;
722 
723     error =  ip_setopt(inp, command, data);
724 
725     if (command == SO_IPROUTE)
726 	tp->t_maxseg -= inp->inp_optlen;
727 
728     return (error);
729 }
730 
731 /*
732  * These numbers come from measurements described in the paper
733  *	"Converting the BBN TCP/IP to 4.2BSD"  (S.L.C. USENIX)
734  * If your network handles packets larger than an ethernet frame, you
735  * could change tcp_init back to determine the largest net's packet size,
736  * multiply that by some number, and round up to a multiple of a CLSIZE.
737  */
738 int	tcp_recvspace = 4096;
739 int	tcp_sendspace = 4096;
740 
741 /*
742  * Attach TCP protocol to socket, allocating
743  * internet protocol control block, tcp control block, buffer space.
744  */
745 tcp_attach(so)
746 struct socket *so;
747 {
748     register struct tcpcb *tp;
749     struct inpcb *inp;
750     int error;
751 
752     if (! (error = soreserve(so, tcp_sendspace, tcp_recvspace)))
753     {
754 	if (! (error = in_pcballoc(so, &tcp)))
755 	{
756 	    inp = sotoinpcb(so);
757 	    if (tp = tcp_newtcpcb(inp))
758 	    {
759 		/*
760 		 * Should change state tables to have an UNOPENED state like
761 		 * the butterfly's which is different from SAME.
762 		 */
763 		tp->t_state = 0;
764 		return (0);
765 	    }
766 	    error = ENOBUFS;
767 	    in_pcbdetach(inp, (int (*)())0);
768 	}
769     }
770     return (error);
771 }
772 
773 /*
774  * Initiate (or continue) disconnect.
775  * If embryonic state, just send reset (once).
776  * If not in ``let data drain'' option, just drop.
777  * Otherwise (hard), mark socket disconnecting and drop
778  * current input data; switch states based on user close, and
779  * send segment to peer (with FIN).
780  */
781 
782 tcp_disconnect(tp)
783 register struct tcpcb *tp;
784 {
785     struct socket *so = tp->t_in_pcb->inp_socket;
786 
787     soisdisconnecting(so);
788     sbflush(&so->so_rcv);
789     tp->usr_abort = TRUE;
790     if (!tp->usr_closed)
791 	w_alloc(IUCLOSE, 0, tp, tp->t_in_pcb);
792 }
793 
794 tcp_init()
795 {
796     /*
797      * Leave these checks in!  It's a pain in the ass to find out
798      * problems caused by too small mbufs if someone changes the
799      * size of an mbuf.
800      */
801     if (sizeof(struct inpcb) > MLEN)
802 	panic("inpcb too big");
803 
804     if (sizeof(struct socket) > MLEN)
805 	panic("socket too big");
806 
807     if (sizeof(struct th) > MLEN)
808 	panic("th too big");
809 
810     if (sizeof(struct tcpcb) > MLEN)
811 	panic("tcpcb too big");
812 
813     if (sizeof(struct t_debug) > MLEN)
814 	panic("t_debug too big");
815 
816     /* init queue */
817     tcp.inp_next = tcp.inp_prev = &tcp;
818 
819     /* are only 4 things to match. turn off for now */
820     tcp_dfilter.matches = 5;
821 
822     tcp_iss = time.tv_sec;
823 
824     ipsw[IPPROTO_TCP].ipsw_hlen = sizeof(struct th);
825 }
826 
827 tcp_ctlinput (prc_code, arg)
828 caddr_t arg;
829 {
830     int error;
831 
832     error = inetctlerrmap[prc_code];
833 
834     switch (prc_code)
835     {
836 	case PRC_UNREACH_PROTOCOL:	/* icmp message */
837 	case PRC_UNREACH_PORT:
838 	case PRC_MSGSIZE:
839 	    {
840 	    register struct th	*tp;
841 	    struct tcpcb	*t;
842 
843 	    tp = (struct th *) (&((struct icmp *) arg)->ic_iphdr);
844 	    t = (struct tcpcb *)tcp_conn_used ((struct inpcb *) 0,
845 		tp->t_src, tp->t_s.s_addr,
846 		tp->t_dst, tp->t_d.s_addr);
847 	    if (t)
848 		t_close(t, error);
849 	    }
850 	    break;
851 
852 	case PRC_UNREACH_NET:
853 	case PRC_UNREACH_HOST:
854 	    {
855 	    register struct th	*tp;
856 	    struct tcpcb	*t;
857 
858 	    tp = (struct th *) (&((struct icmp *) arg)->ic_iphdr);
859 	    t = (struct tcpcb *)tcp_conn_used ((struct inpcb *) 0,
860 		tp->t_src, tp->t_s.s_addr,
861 		tp->t_dst, tp->t_d.s_addr);
862 	    if (t)
863 	    {
864 		struct socket *so;
865 
866 		so = t->t_in_pcb->inp_socket;
867 		if ((so->so_state & SS_NOFDREF) == 0)
868 		    advise_user(so, error);
869 		else
870 		    t_close(t, error);
871 	    }
872 	    }
873 	    break;
874 
875 	case PRC_GWDOWN:
876 	    in_gdown (&tcp, (u_long) arg);
877 	    break;
878 
879 	case PRC_REDIRECT_NET:	/* icmp message */
880 	case PRC_REDIRECT_HOST:
881 	    {
882 	    struct tcpcb	*t;
883 	    register struct th	*tp;
884 
885 	    tp = (struct th *) (&((struct icmp *) arg)->ic_iphdr);
886 	    t = (struct tcpcb *)tcp_conn_used ((struct inpcb *) 0,
887 		tp->t_src, tp->t_s.s_addr,
888 		tp->t_dst, tp->t_d.s_addr);
889 	    if (t)
890 		icmp_redirect_inp(t->t_in_pcb, (struct icmp *) arg,
891 		    prc_code == PRC_REDIRECT_NET ? rtnet : rthost);
892 	    }
893 	    break;
894 
895 	case PRC_TIMXCEED_INTRANS:	/* icmp message */
896 	case PRC_TIMXCEED_REASS:
897 	case PRC_PARAMPROB:
898 	    break;
899 
900 	case PRC_QUENCH:	/* icmp message */
901 	    /*
902 	     * See RFC 896.  The idea is, when we get a source quench message on
903 	     * a connection we should send fewer packets.  This ties in with the
904 	     * silly window syndrome whose solution is to send fewer, larger packets.
905 	     * Deal with quenches by altering threshold used by silly window
906 	     * syndrome.  This is similar to acting as if the window is smaller
907 	     * than it actually is for deciding when to send, except that when we
908 	     * do, we use as much as there really is.
909 	     */
910 	    {
911 	    register struct th	*tp;
912 	    struct tcpcb	*t;
913 
914 	    tp = (struct th *) (&((struct icmp *) arg)->ic_iphdr);
915 	    t = (struct tcpcb *)tcp_conn_used ((struct inpcb *) 0,
916 		tp->t_src, tp->t_s.s_addr,
917 		tp->t_dst, tp->t_d.s_addr);
918 	    if (t)
919 	    {
920 		t->sws_qff -= SWS_QFF_DEC;
921 		if (t->sws_qff < SWS_QFF_MIN)
922 		    t->sws_qff = SWS_QFF_MIN;
923 	    }
924 	    }
925 	    break;
926 
927 	case PRC_IFDOWN:
928 	    {
929 	    u_long addr;
930 
931 	    addr = ((struct sockaddr_in *)(arg))->sin_addr.s_addr;
932 	    inpcb_notify(&tcp, addr, (u_long) 0, error);
933 	    inpcb_notify(&tcp, (u_long) 0, addr, error);
934 	    }
935 	    break;
936 
937 	case PRC_HOSTDEAD:	/* from imp interface */
938 	case PRC_HOSTUNREACH:
939 	    /*
940 	     * get same message for destination hosts and gateways.
941 	     */
942 	    {
943 	    u_long addr;
944 
945 	    addr = ((struct sockaddr_in *)arg)->sin_addr.s_addr;
946 	    in_gdown (&tcp, addr);
947 	    inpcb_notify(&tcp, (u_long) 0, addr, error);
948 	    }
949 	    break;
950 
951 	default:
952 	    panic("tcp_ctlinput");
953     }
954 }
955