1 /*
2 * Copyright (c) 1982, 1986, 1988, 1993, 1995
3 * The Regents of the University of California. All rights reserved.
4 *
5 * %sccs.include.redist.c%
6 *
7 * @(#)tcp_usrreq.c 8.5 (Berkeley) 06/21/95
8 */
9
10 #include <sys/param.h>
11 #include <sys/systm.h>
12 #include <sys/malloc.h>
13 #include <sys/mbuf.h>
14 #include <sys/socket.h>
15 #include <sys/socketvar.h>
16 #include <sys/protosw.h>
17 #include <sys/errno.h>
18 #include <sys/stat.h>
19
20 #include <net/if.h>
21 #include <net/route.h>
22
23 #include <netinet/in.h>
24 #include <netinet/in_systm.h>
25 #include <netinet/ip.h>
26 #include <netinet/in_pcb.h>
27 #include <netinet/ip_var.h>
28 #include <netinet/tcp.h>
29 #include <netinet/tcp_fsm.h>
30 #include <netinet/tcp_seq.h>
31 #include <netinet/tcp_timer.h>
32 #include <netinet/tcp_var.h>
33 #include <netinet/tcpip.h>
34 #include <netinet/tcp_debug.h>
35
36 /*
37 * TCP protocol interface to socket abstraction.
38 */
39 extern char *tcpstates[];
40
41 /*
42 * Process a TCP user request for TCP tb. If this is a send request
43 * then m is the mbuf chain of send data. If this is a timer expiration
44 * (called from the software clock routine), then timertype tells which timer.
45 */
46 /*ARGSUSED*/
47 int
tcp_usrreq(so,req,m,nam,control)48 tcp_usrreq(so, req, m, nam, control)
49 struct socket *so;
50 int req;
51 struct mbuf *m, *nam, *control;
52 {
53 register struct inpcb *inp;
54 register struct tcpcb *tp;
55 int s;
56 int error = 0;
57 int ostate;
58
59 if (req == PRU_CONTROL)
60 return (in_control(so, (u_long)m, (caddr_t)nam,
61 (struct ifnet *)control));
62 if (control && control->m_len) {
63 m_freem(control);
64 if (m)
65 m_freem(m);
66 return (EINVAL);
67 }
68
69 s = splnet();
70 inp = sotoinpcb(so);
71 /*
72 * When a TCP is attached to a socket, then there will be
73 * a (struct inpcb) pointed at by the socket, and this
74 * structure will point at a subsidary (struct tcpcb).
75 */
76 if (inp == 0 && req != PRU_ATTACH) {
77 splx(s);
78 #if 0
79 /*
80 * The following corrects an mbuf leak under rare
81 * circumstances, but has not been fully tested.
82 */
83 if (m && req != PRU_SENSE)
84 m_freem(m);
85 #else
86 /* safer version of fix for mbuf leak */
87 if (m && (req == PRU_SEND || req == PRU_SENDOOB))
88 m_freem(m);
89 #endif
90 return (EINVAL); /* XXX */
91 }
92 if (inp) {
93 tp = intotcpcb(inp);
94 /* WHAT IF TP IS 0? */
95 #ifdef KPROF
96 tcp_acounts[tp->t_state][req]++;
97 #endif
98 ostate = tp->t_state;
99 } else
100 ostate = 0;
101 switch (req) {
102
103 /*
104 * TCP attaches to socket via PRU_ATTACH, reserving space,
105 * and an internet control block.
106 */
107 case PRU_ATTACH:
108 if (inp) {
109 error = EISCONN;
110 break;
111 }
112 error = tcp_attach(so);
113 if (error)
114 break;
115 if ((so->so_options & SO_LINGER) && so->so_linger == 0)
116 so->so_linger = TCP_LINGERTIME;
117 tp = sototcpcb(so);
118 break;
119
120 /*
121 * PRU_DETACH detaches the TCP protocol from the socket.
122 * If the protocol state is non-embryonic, then can't
123 * do this directly: have to initiate a PRU_DISCONNECT,
124 * which may finish later; embryonic TCB's can just
125 * be discarded here.
126 */
127 case PRU_DETACH:
128 if (tp->t_state > TCPS_LISTEN)
129 tp = tcp_disconnect(tp);
130 else
131 tp = tcp_close(tp);
132 break;
133
134 /*
135 * Give the socket an address.
136 */
137 case PRU_BIND:
138 error = in_pcbbind(inp, nam);
139 if (error)
140 break;
141 break;
142
143 /*
144 * Prepare to accept connections.
145 */
146 case PRU_LISTEN:
147 if (inp->inp_lport == 0)
148 error = in_pcbbind(inp, (struct mbuf *)0);
149 if (error == 0)
150 tp->t_state = TCPS_LISTEN;
151 break;
152
153 /*
154 * Initiate connection to peer.
155 * Create a template for use in transmissions on this connection.
156 * Enter SYN_SENT state, and mark socket as connecting.
157 * Start keep-alive timer, and seed output sequence space.
158 * Send initial segment on connection.
159 */
160 case PRU_CONNECT:
161 if (inp->inp_lport == 0) {
162 error = in_pcbbind(inp, (struct mbuf *)0);
163 if (error)
164 break;
165 }
166 error = in_pcbconnect(inp, nam);
167 if (error)
168 break;
169 tp->t_template = tcp_template(tp);
170 if (tp->t_template == 0) {
171 in_pcbdisconnect(inp);
172 error = ENOBUFS;
173 break;
174 }
175 /* Compute window scaling to request. */
176 while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
177 (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat)
178 tp->request_r_scale++;
179 soisconnecting(so);
180 tcpstat.tcps_connattempt++;
181 tp->t_state = TCPS_SYN_SENT;
182 tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT;
183 tp->iss = tcp_iss; tcp_iss += TCP_ISSINCR/4;
184 tcp_sendseqinit(tp);
185 error = tcp_output(tp);
186 break;
187
188 /*
189 * Create a TCP connection between two sockets.
190 */
191 case PRU_CONNECT2:
192 error = EOPNOTSUPP;
193 break;
194
195 /*
196 * Initiate disconnect from peer.
197 * If connection never passed embryonic stage, just drop;
198 * else if don't need to let data drain, then can just drop anyways,
199 * else have to begin TCP shutdown process: mark socket disconnecting,
200 * drain unread data, state switch to reflect user close, and
201 * send segment (e.g. FIN) to peer. Socket will be really disconnected
202 * when peer sends FIN and acks ours.
203 *
204 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
205 */
206 case PRU_DISCONNECT:
207 tp = tcp_disconnect(tp);
208 break;
209
210 /*
211 * Accept a connection. Essentially all the work is
212 * done at higher levels; just return the address
213 * of the peer, storing through addr.
214 */
215 case PRU_ACCEPT:
216 in_setpeeraddr(inp, nam);
217 break;
218
219 /*
220 * Mark the connection as being incapable of further output.
221 */
222 case PRU_SHUTDOWN:
223 socantsendmore(so);
224 tp = tcp_usrclosed(tp);
225 if (tp)
226 error = tcp_output(tp);
227 break;
228
229 /*
230 * After a receive, possibly send window update to peer.
231 */
232 case PRU_RCVD:
233 (void) tcp_output(tp);
234 break;
235
236 /*
237 * Do a send by putting data in output queue and updating urgent
238 * marker if URG set. Possibly send more data.
239 */
240 case PRU_SEND:
241 sbappend(&so->so_snd, m);
242 error = tcp_output(tp);
243 break;
244
245 /*
246 * Abort the TCP.
247 */
248 case PRU_ABORT:
249 tp = tcp_drop(tp, ECONNABORTED);
250 break;
251
252 case PRU_SENSE:
253 ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
254 (void) splx(s);
255 return (0);
256
257 case PRU_RCVOOB:
258 if ((so->so_oobmark == 0 &&
259 (so->so_state & SS_RCVATMARK) == 0) ||
260 so->so_options & SO_OOBINLINE ||
261 tp->t_oobflags & TCPOOB_HADDATA) {
262 error = EINVAL;
263 break;
264 }
265 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
266 error = EWOULDBLOCK;
267 break;
268 }
269 m->m_len = 1;
270 *mtod(m, caddr_t) = tp->t_iobc;
271 if (((int)nam & MSG_PEEK) == 0)
272 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
273 break;
274
275 case PRU_SENDOOB:
276 if (sbspace(&so->so_snd) < -512) {
277 m_freem(m);
278 error = ENOBUFS;
279 break;
280 }
281 /*
282 * According to RFC961 (Assigned Protocols),
283 * the urgent pointer points to the last octet
284 * of urgent data. We continue, however,
285 * to consider it to indicate the first octet
286 * of data past the urgent section.
287 * Otherwise, snd_up should be one lower.
288 */
289 sbappend(&so->so_snd, m);
290 tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
291 tp->t_force = 1;
292 error = tcp_output(tp);
293 tp->t_force = 0;
294 break;
295
296 case PRU_SOCKADDR:
297 in_setsockaddr(inp, nam);
298 break;
299
300 case PRU_PEERADDR:
301 in_setpeeraddr(inp, nam);
302 break;
303
304 /*
305 * TCP slow timer went off; going through this
306 * routine for tracing's sake.
307 */
308 case PRU_SLOWTIMO:
309 tp = tcp_timers(tp, (int)nam);
310 req |= (int)nam << 8; /* for debug's sake */
311 break;
312
313 default:
314 panic("tcp_usrreq");
315 }
316 if (tp && (so->so_options & SO_DEBUG))
317 tcp_trace(TA_USER, ostate, tp, (struct tcpiphdr *)0, req);
318 splx(s);
319 return (error);
320 }
321
322 int
tcp_ctloutput(op,so,level,optname,mp)323 tcp_ctloutput(op, so, level, optname, mp)
324 int op;
325 struct socket *so;
326 int level, optname;
327 struct mbuf **mp;
328 {
329 int error = 0, s;
330 struct inpcb *inp;
331 register struct tcpcb *tp;
332 register struct mbuf *m;
333 register int i;
334
335 s = splnet();
336 inp = sotoinpcb(so);
337 if (inp == NULL) {
338 splx(s);
339 if (op == PRCO_SETOPT && *mp)
340 (void) m_free(*mp);
341 return (ECONNRESET);
342 }
343 if (level != IPPROTO_TCP) {
344 error = ip_ctloutput(op, so, level, optname, mp);
345 splx(s);
346 return (error);
347 }
348 tp = intotcpcb(inp);
349
350 switch (op) {
351
352 case PRCO_SETOPT:
353 m = *mp;
354 switch (optname) {
355
356 case TCP_NODELAY:
357 if (m == NULL || m->m_len < sizeof (int))
358 error = EINVAL;
359 else if (*mtod(m, int *))
360 tp->t_flags |= TF_NODELAY;
361 else
362 tp->t_flags &= ~TF_NODELAY;
363 break;
364
365 case TCP_MAXSEG:
366 if (m && (i = *mtod(m, int *)) > 0 && i <= tp->t_maxseg)
367 tp->t_maxseg = i;
368 else
369 error = EINVAL;
370 break;
371
372 default:
373 error = ENOPROTOOPT;
374 break;
375 }
376 if (m)
377 (void) m_free(m);
378 break;
379
380 case PRCO_GETOPT:
381 *mp = m = m_get(M_WAIT, MT_SOOPTS);
382 m->m_len = sizeof(int);
383
384 switch (optname) {
385 case TCP_NODELAY:
386 *mtod(m, int *) = tp->t_flags & TF_NODELAY;
387 break;
388 case TCP_MAXSEG:
389 *mtod(m, int *) = tp->t_maxseg;
390 break;
391 default:
392 error = ENOPROTOOPT;
393 break;
394 }
395 break;
396 }
397 splx(s);
398 return (error);
399 }
400
401 u_long tcp_sendspace = 1024*8;
402 u_long tcp_recvspace = 1024*8;
403
404 /*
405 * Attach TCP protocol to socket, allocating
406 * internet protocol control block, tcp control block,
407 * bufer space, and entering LISTEN state if to accept connections.
408 */
409 int
tcp_attach(so)410 tcp_attach(so)
411 struct socket *so;
412 {
413 register struct tcpcb *tp;
414 struct inpcb *inp;
415 int error;
416
417 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
418 error = soreserve(so, tcp_sendspace, tcp_recvspace);
419 if (error)
420 return (error);
421 }
422 error = in_pcballoc(so, &tcb);
423 if (error)
424 return (error);
425 inp = sotoinpcb(so);
426 tp = tcp_newtcpcb(inp);
427 if (tp == 0) {
428 int nofd = so->so_state & SS_NOFDREF; /* XXX */
429
430 so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */
431 in_pcbdetach(inp);
432 so->so_state |= nofd;
433 return (ENOBUFS);
434 }
435 tp->t_state = TCPS_CLOSED;
436 return (0);
437 }
438
439 /*
440 * Initiate (or continue) disconnect.
441 * If embryonic state, just send reset (once).
442 * If in ``let data drain'' option and linger null, just drop.
443 * Otherwise (hard), mark socket disconnecting and drop
444 * current input data; switch states based on user close, and
445 * send segment to peer (with FIN).
446 */
447 struct tcpcb *
tcp_disconnect(tp)448 tcp_disconnect(tp)
449 register struct tcpcb *tp;
450 {
451 struct socket *so = tp->t_inpcb->inp_socket;
452
453 if (tp->t_state < TCPS_ESTABLISHED)
454 tp = tcp_close(tp);
455 else if ((so->so_options & SO_LINGER) && so->so_linger == 0)
456 tp = tcp_drop(tp, 0);
457 else {
458 soisdisconnecting(so);
459 sbflush(&so->so_rcv);
460 tp = tcp_usrclosed(tp);
461 if (tp)
462 (void) tcp_output(tp);
463 }
464 return (tp);
465 }
466
467 /*
468 * User issued close, and wish to trail through shutdown states:
469 * if never received SYN, just forget it. If got a SYN from peer,
470 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
471 * If already got a FIN from peer, then almost done; go to LAST_ACK
472 * state. In all other cases, have already sent FIN to peer (e.g.
473 * after PRU_SHUTDOWN), and just have to play tedious game waiting
474 * for peer to send FIN or not respond to keep-alives, etc.
475 * We can let the user exit from the close as soon as the FIN is acked.
476 */
477 struct tcpcb *
tcp_usrclosed(tp)478 tcp_usrclosed(tp)
479 register struct tcpcb *tp;
480 {
481
482 switch (tp->t_state) {
483
484 case TCPS_CLOSED:
485 case TCPS_LISTEN:
486 case TCPS_SYN_SENT:
487 tp->t_state = TCPS_CLOSED;
488 tp = tcp_close(tp);
489 break;
490
491 case TCPS_SYN_RECEIVED:
492 case TCPS_ESTABLISHED:
493 tp->t_state = TCPS_FIN_WAIT_1;
494 break;
495
496 case TCPS_CLOSE_WAIT:
497 tp->t_state = TCPS_LAST_ACK;
498 break;
499 }
500 if (tp && tp->t_state >= TCPS_FIN_WAIT_2)
501 soisdisconnected(tp->t_inpcb->inp_socket);
502 return (tp);
503 }
504