1 /* 2 * Copyright (c) 1982, 1986, 1988, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * %sccs.include.redist.c% 6 * 7 * @(#)tcp_usrreq.c 8.4 (Berkeley) 05/24/95 8 */ 9 10 #include <sys/param.h> 11 #include <sys/systm.h> 12 #include <sys/malloc.h> 13 #include <sys/mbuf.h> 14 #include <sys/socket.h> 15 #include <sys/socketvar.h> 16 #include <sys/protosw.h> 17 #include <sys/errno.h> 18 #include <sys/stat.h> 19 20 #include <net/if.h> 21 #include <net/route.h> 22 23 #include <netinet/in.h> 24 #include <netinet/in_systm.h> 25 #include <netinet/ip.h> 26 #include <netinet/in_pcb.h> 27 #include <netinet/ip_var.h> 28 #include <netinet/tcp.h> 29 #include <netinet/tcp_fsm.h> 30 #include <netinet/tcp_seq.h> 31 #include <netinet/tcp_timer.h> 32 #include <netinet/tcp_var.h> 33 #include <netinet/tcpip.h> 34 #include <netinet/tcp_debug.h> 35 36 /* 37 * TCP protocol interface to socket abstraction. 38 */ 39 extern char *tcpstates[]; 40 41 /* 42 * Process a TCP user request for TCP tb. If this is a send request 43 * then m is the mbuf chain of send data. If this is a timer expiration 44 * (called from the software clock routine), then timertype tells which timer. 45 */ 46 /*ARGSUSED*/ 47 int 48 tcp_usrreq(so, req, m, nam, control) 49 struct socket *so; 50 int req; 51 struct mbuf *m, *nam, *control; 52 { 53 register struct inpcb *inp; 54 register struct tcpcb *tp; 55 int s; 56 int error = 0; 57 int ostate; 58 59 if (req == PRU_CONTROL) 60 return (in_control(so, (u_long)m, (caddr_t)nam, 61 (struct ifnet *)control)); 62 if (control && control->m_len) { 63 m_freem(control); 64 if (m) 65 m_freem(m); 66 return (EINVAL); 67 } 68 69 s = splnet(); 70 inp = sotoinpcb(so); 71 /* 72 * When a TCP is attached to a socket, then there will be 73 * a (struct inpcb) pointed at by the socket, and this 74 * structure will point at a subsidary (struct tcpcb). 75 */ 76 if (inp == 0 && req != PRU_ATTACH) { 77 splx(s); 78 #if 0 79 /* 80 * The following corrects an mbuf leak under rare 81 * circumstances, but has not been fully tested. 82 */ 83 if (m && req != PRU_SENSE) 84 m_freem(m); 85 #else 86 /* safer version of fix for mbuf leak */ 87 if (m && (req == PRU_SEND || req == PRU_SENDOOB)) 88 m_freem(m); 89 #endif 90 return (EINVAL); /* XXX */ 91 } 92 if (inp) { 93 tp = intotcpcb(inp); 94 /* WHAT IF TP IS 0? */ 95 #ifdef KPROF 96 tcp_acounts[tp->t_state][req]++; 97 #endif 98 ostate = tp->t_state; 99 } else 100 ostate = 0; 101 switch (req) { 102 103 /* 104 * TCP attaches to socket via PRU_ATTACH, reserving space, 105 * and an internet control block. 106 */ 107 case PRU_ATTACH: 108 if (inp) { 109 error = EISCONN; 110 break; 111 } 112 error = tcp_attach(so); 113 if (error) 114 break; 115 if ((so->so_options & SO_LINGER) && so->so_linger == 0) 116 so->so_linger = TCP_LINGERTIME; 117 tp = sototcpcb(so); 118 break; 119 120 /* 121 * PRU_DETACH detaches the TCP protocol from the socket. 122 * If the protocol state is non-embryonic, then can't 123 * do this directly: have to initiate a PRU_DISCONNECT, 124 * which may finish later; embryonic TCB's can just 125 * be discarded here. 126 */ 127 case PRU_DETACH: 128 if (tp->t_state > TCPS_LISTEN) 129 tp = tcp_disconnect(tp); 130 else 131 tp = tcp_close(tp); 132 break; 133 134 /* 135 * Give the socket an address. 136 */ 137 case PRU_BIND: 138 error = in_pcbbind(inp, nam); 139 if (error) 140 break; 141 break; 142 143 /* 144 * Prepare to accept connections. 145 */ 146 case PRU_LISTEN: 147 if (inp->inp_lport == 0) 148 error = in_pcbbind(inp, (struct mbuf *)0); 149 if (error == 0) 150 tp->t_state = TCPS_LISTEN; 151 break; 152 153 /* 154 * Initiate connection to peer. 155 * Create a template for use in transmissions on this connection. 156 * Enter SYN_SENT state, and mark socket as connecting. 157 * Start keep-alive timer, and seed output sequence space. 158 * Send initial segment on connection. 159 */ 160 case PRU_CONNECT: 161 if (inp->inp_lport == 0) { 162 error = in_pcbbind(inp, (struct mbuf *)0); 163 if (error) 164 break; 165 } 166 error = in_pcbconnect(inp, nam); 167 if (error) 168 break; 169 tp->t_template = tcp_template(tp); 170 if (tp->t_template == 0) { 171 in_pcbdisconnect(inp); 172 error = ENOBUFS; 173 break; 174 } 175 /* Compute window scaling to request. */ 176 while (tp->request_r_scale < TCP_MAX_WINSHIFT && 177 (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat) 178 tp->request_r_scale++; 179 soisconnecting(so); 180 tcpstat.tcps_connattempt++; 181 tp->t_state = TCPS_SYN_SENT; 182 tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT; 183 tp->iss = tcp_iss; tcp_iss += TCP_ISSINCR/2; 184 tcp_sendseqinit(tp); 185 error = tcp_output(tp); 186 break; 187 188 /* 189 * Create a TCP connection between two sockets. 190 */ 191 case PRU_CONNECT2: 192 error = EOPNOTSUPP; 193 break; 194 195 /* 196 * Initiate disconnect from peer. 197 * If connection never passed embryonic stage, just drop; 198 * else if don't need to let data drain, then can just drop anyways, 199 * else have to begin TCP shutdown process: mark socket disconnecting, 200 * drain unread data, state switch to reflect user close, and 201 * send segment (e.g. FIN) to peer. Socket will be really disconnected 202 * when peer sends FIN and acks ours. 203 * 204 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. 205 */ 206 case PRU_DISCONNECT: 207 tp = tcp_disconnect(tp); 208 break; 209 210 /* 211 * Accept a connection. Essentially all the work is 212 * done at higher levels; just return the address 213 * of the peer, storing through addr. 214 */ 215 case PRU_ACCEPT: 216 in_setpeeraddr(inp, nam); 217 break; 218 219 /* 220 * Mark the connection as being incapable of further output. 221 */ 222 case PRU_SHUTDOWN: 223 socantsendmore(so); 224 tp = tcp_usrclosed(tp); 225 if (tp) 226 error = tcp_output(tp); 227 break; 228 229 /* 230 * After a receive, possibly send window update to peer. 231 */ 232 case PRU_RCVD: 233 (void) tcp_output(tp); 234 break; 235 236 /* 237 * Do a send by putting data in output queue and updating urgent 238 * marker if URG set. Possibly send more data. 239 */ 240 case PRU_SEND: 241 sbappend(&so->so_snd, m); 242 error = tcp_output(tp); 243 break; 244 245 /* 246 * Abort the TCP. 247 */ 248 case PRU_ABORT: 249 tp = tcp_drop(tp, ECONNABORTED); 250 break; 251 252 case PRU_SENSE: 253 ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat; 254 (void) splx(s); 255 return (0); 256 257 case PRU_RCVOOB: 258 if ((so->so_oobmark == 0 && 259 (so->so_state & SS_RCVATMARK) == 0) || 260 so->so_options & SO_OOBINLINE || 261 tp->t_oobflags & TCPOOB_HADDATA) { 262 error = EINVAL; 263 break; 264 } 265 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) { 266 error = EWOULDBLOCK; 267 break; 268 } 269 m->m_len = 1; 270 *mtod(m, caddr_t) = tp->t_iobc; 271 if (((int)nam & MSG_PEEK) == 0) 272 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA); 273 break; 274 275 case PRU_SENDOOB: 276 if (sbspace(&so->so_snd) < -512) { 277 m_freem(m); 278 error = ENOBUFS; 279 break; 280 } 281 /* 282 * According to RFC961 (Assigned Protocols), 283 * the urgent pointer points to the last octet 284 * of urgent data. We continue, however, 285 * to consider it to indicate the first octet 286 * of data past the urgent section. 287 * Otherwise, snd_up should be one lower. 288 */ 289 sbappend(&so->so_snd, m); 290 tp->snd_up = tp->snd_una + so->so_snd.sb_cc; 291 tp->t_force = 1; 292 error = tcp_output(tp); 293 tp->t_force = 0; 294 break; 295 296 case PRU_SOCKADDR: 297 in_setsockaddr(inp, nam); 298 break; 299 300 case PRU_PEERADDR: 301 in_setpeeraddr(inp, nam); 302 break; 303 304 /* 305 * TCP slow timer went off; going through this 306 * routine for tracing's sake. 307 */ 308 case PRU_SLOWTIMO: 309 tp = tcp_timers(tp, (int)nam); 310 req |= (int)nam << 8; /* for debug's sake */ 311 break; 312 313 default: 314 panic("tcp_usrreq"); 315 } 316 if (tp && (so->so_options & SO_DEBUG)) 317 tcp_trace(TA_USER, ostate, tp, (struct tcpiphdr *)0, req); 318 splx(s); 319 return (error); 320 } 321 322 int 323 tcp_ctloutput(op, so, level, optname, mp) 324 int op; 325 struct socket *so; 326 int level, optname; 327 struct mbuf **mp; 328 { 329 int error = 0, s; 330 struct inpcb *inp; 331 register struct tcpcb *tp; 332 register struct mbuf *m; 333 register int i; 334 335 s = splnet(); 336 inp = sotoinpcb(so); 337 if (inp == NULL) { 338 splx(s); 339 if (op == PRCO_SETOPT && *mp) 340 (void) m_free(*mp); 341 return (ECONNRESET); 342 } 343 if (level != IPPROTO_TCP) { 344 error = ip_ctloutput(op, so, level, optname, mp); 345 splx(s); 346 return (error); 347 } 348 tp = intotcpcb(inp); 349 350 switch (op) { 351 352 case PRCO_SETOPT: 353 m = *mp; 354 switch (optname) { 355 356 case TCP_NODELAY: 357 if (m == NULL || m->m_len < sizeof (int)) 358 error = EINVAL; 359 else if (*mtod(m, int *)) 360 tp->t_flags |= TF_NODELAY; 361 else 362 tp->t_flags &= ~TF_NODELAY; 363 break; 364 365 case TCP_MAXSEG: 366 if (m && (i = *mtod(m, int *)) > 0 && i <= tp->t_maxseg) 367 tp->t_maxseg = i; 368 else 369 error = EINVAL; 370 break; 371 372 default: 373 error = ENOPROTOOPT; 374 break; 375 } 376 if (m) 377 (void) m_free(m); 378 break; 379 380 case PRCO_GETOPT: 381 *mp = m = m_get(M_WAIT, MT_SOOPTS); 382 m->m_len = sizeof(int); 383 384 switch (optname) { 385 case TCP_NODELAY: 386 *mtod(m, int *) = tp->t_flags & TF_NODELAY; 387 break; 388 case TCP_MAXSEG: 389 *mtod(m, int *) = tp->t_maxseg; 390 break; 391 default: 392 error = ENOPROTOOPT; 393 break; 394 } 395 break; 396 } 397 splx(s); 398 return (error); 399 } 400 401 u_long tcp_sendspace = 1024*8; 402 u_long tcp_recvspace = 1024*8; 403 404 /* 405 * Attach TCP protocol to socket, allocating 406 * internet protocol control block, tcp control block, 407 * bufer space, and entering LISTEN state if to accept connections. 408 */ 409 int 410 tcp_attach(so) 411 struct socket *so; 412 { 413 register struct tcpcb *tp; 414 struct inpcb *inp; 415 int error; 416 417 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 418 error = soreserve(so, tcp_sendspace, tcp_recvspace); 419 if (error) 420 return (error); 421 } 422 error = in_pcballoc(so, &tcb); 423 if (error) 424 return (error); 425 inp = sotoinpcb(so); 426 tp = tcp_newtcpcb(inp); 427 if (tp == 0) { 428 int nofd = so->so_state & SS_NOFDREF; /* XXX */ 429 430 so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */ 431 in_pcbdetach(inp); 432 so->so_state |= nofd; 433 return (ENOBUFS); 434 } 435 tp->t_state = TCPS_CLOSED; 436 return (0); 437 } 438 439 /* 440 * Initiate (or continue) disconnect. 441 * If embryonic state, just send reset (once). 442 * If in ``let data drain'' option and linger null, just drop. 443 * Otherwise (hard), mark socket disconnecting and drop 444 * current input data; switch states based on user close, and 445 * send segment to peer (with FIN). 446 */ 447 struct tcpcb * 448 tcp_disconnect(tp) 449 register struct tcpcb *tp; 450 { 451 struct socket *so = tp->t_inpcb->inp_socket; 452 453 if (tp->t_state < TCPS_ESTABLISHED) 454 tp = tcp_close(tp); 455 else if ((so->so_options & SO_LINGER) && so->so_linger == 0) 456 tp = tcp_drop(tp, 0); 457 else { 458 soisdisconnecting(so); 459 sbflush(&so->so_rcv); 460 tp = tcp_usrclosed(tp); 461 if (tp) 462 (void) tcp_output(tp); 463 } 464 return (tp); 465 } 466 467 /* 468 * User issued close, and wish to trail through shutdown states: 469 * if never received SYN, just forget it. If got a SYN from peer, 470 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. 471 * If already got a FIN from peer, then almost done; go to LAST_ACK 472 * state. In all other cases, have already sent FIN to peer (e.g. 473 * after PRU_SHUTDOWN), and just have to play tedious game waiting 474 * for peer to send FIN or not respond to keep-alives, etc. 475 * We can let the user exit from the close as soon as the FIN is acked. 476 */ 477 struct tcpcb * 478 tcp_usrclosed(tp) 479 register struct tcpcb *tp; 480 { 481 482 switch (tp->t_state) { 483 484 case TCPS_CLOSED: 485 case TCPS_LISTEN: 486 case TCPS_SYN_SENT: 487 tp->t_state = TCPS_CLOSED; 488 tp = tcp_close(tp); 489 break; 490 491 case TCPS_SYN_RECEIVED: 492 case TCPS_ESTABLISHED: 493 tp->t_state = TCPS_FIN_WAIT_1; 494 break; 495 496 case TCPS_CLOSE_WAIT: 497 tp->t_state = TCPS_LAST_ACK; 498 break; 499 } 500 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) 501 soisdisconnected(tp->t_inpcb->inp_socket); 502 return (tp); 503 } 504