1 /* 2 * Copyright (c) 1982, 1986, 1988, 1990 Regents of the University of California. 3 * All rights reserved. 4 * 5 * %sccs.include.redist.c% 6 * 7 * @(#)uipc_socket2.c 7.16.1.1 (Berkeley) 05/09/91 8 */ 9 10 #include "param.h" 11 #include "systm.h" 12 #include "user.h" 13 #include "proc.h" 14 #include "file.h" 15 #include "buf.h" 16 #include "malloc.h" 17 #include "mbuf.h" 18 #include "protosw.h" 19 #include "socket.h" 20 #include "socketvar.h" 21 22 /* 23 * Primitive routines for operating on sockets and socket buffers 24 */ 25 26 /* strings for sleep message: */ 27 char netio[] = "netio"; 28 char netcon[] = "netcon"; 29 char netcls[] = "netcls"; 30 31 u_long sb_max = SB_MAX; /* patchable */ 32 33 /* 34 * Procedures to manipulate state flags of socket 35 * and do appropriate wakeups. Normal sequence from the 36 * active (originating) side is that soisconnecting() is 37 * called during processing of connect() call, 38 * resulting in an eventual call to soisconnected() if/when the 39 * connection is established. When the connection is torn down 40 * soisdisconnecting() is called during processing of disconnect() call, 41 * and soisdisconnected() is called when the connection to the peer 42 * is totally severed. The semantics of these routines are such that 43 * connectionless protocols can call soisconnected() and soisdisconnected() 44 * only, bypassing the in-progress calls when setting up a ``connection'' 45 * takes no time. 46 * 47 * From the passive side, a socket is created with 48 * two queues of sockets: so_q0 for connections in progress 49 * and so_q for connections already made and awaiting user acceptance. 50 * As a protocol is preparing incoming connections, it creates a socket 51 * structure queued on so_q0 by calling sonewconn(). When the connection 52 * is established, soisconnected() is called, and transfers the 53 * socket structure to so_q, making it available to accept(). 54 * 55 * If a socket is closed with sockets on either 56 * so_q0 or so_q, these sockets are dropped. 57 * 58 * If higher level protocols are implemented in 59 * the kernel, the wakeups done here will sometimes 60 * cause software-interrupt process scheduling. 61 */ 62 63 soisconnecting(so) 64 register struct socket *so; 65 { 66 67 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); 68 so->so_state |= SS_ISCONNECTING; 69 } 70 71 soisconnected(so) 72 register struct socket *so; 73 { 74 register struct socket *head = so->so_head; 75 76 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING); 77 so->so_state |= SS_ISCONNECTED; 78 if (head && soqremque(so, 0)) { 79 soqinsque(head, so, 1); 80 sorwakeup(head); 81 wakeup((caddr_t)&head->so_timeo); 82 } else { 83 wakeup((caddr_t)&so->so_timeo); 84 sorwakeup(so); 85 sowwakeup(so); 86 } 87 } 88 89 soisdisconnecting(so) 90 register struct socket *so; 91 { 92 93 so->so_state &= ~SS_ISCONNECTING; 94 so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE); 95 wakeup((caddr_t)&so->so_timeo); 96 sowwakeup(so); 97 sorwakeup(so); 98 } 99 100 soisdisconnected(so) 101 register struct socket *so; 102 { 103 104 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 105 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE); 106 wakeup((caddr_t)&so->so_timeo); 107 sowwakeup(so); 108 sorwakeup(so); 109 } 110 111 /* 112 * When an attempt at a new connection is noted on a socket 113 * which accepts connections, sonewconn is called. If the 114 * connection is possible (subject to space constraints, etc.) 115 * then we allocate a new structure, propoerly linked into the 116 * data structure of the original socket, and return this. 117 * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED. 118 * 119 * Currently, sonewconn() is defined as sonewconn1() in socketvar.h 120 * to catch calls that are missing the (new) second parameter. 121 */ 122 struct socket * 123 sonewconn1(head, connstatus) 124 register struct socket *head; 125 int connstatus; 126 { 127 register struct socket *so; 128 int soqueue = connstatus ? 1 : 0; 129 130 if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2) 131 return ((struct socket *)0); 132 MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_DONTWAIT); 133 if (so == NULL) 134 return ((struct socket *)0); 135 bzero((caddr_t)so, sizeof(*so)); 136 so->so_type = head->so_type; 137 so->so_options = head->so_options &~ SO_ACCEPTCONN; 138 so->so_linger = head->so_linger; 139 so->so_state = head->so_state | SS_NOFDREF; 140 so->so_proto = head->so_proto; 141 so->so_timeo = head->so_timeo; 142 so->so_pgid = head->so_pgid; 143 (void) soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat); 144 soqinsque(head, so, soqueue); 145 if ((*so->so_proto->pr_usrreq)(so, PRU_ATTACH, 146 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0)) { 147 (void) soqremque(so, soqueue); 148 (void) free((caddr_t)so, M_SOCKET); 149 return ((struct socket *)0); 150 } 151 if (connstatus) { 152 sorwakeup(head); 153 wakeup((caddr_t)&head->so_timeo); 154 so->so_state |= connstatus; 155 } 156 return (so); 157 } 158 159 soqinsque(head, so, q) 160 register struct socket *head, *so; 161 int q; 162 { 163 164 register struct socket **prev; 165 so->so_head = head; 166 if (q == 0) { 167 head->so_q0len++; 168 so->so_q0 = 0; 169 for (prev = &(head->so_q0); *prev; ) 170 prev = &((*prev)->so_q0); 171 } else { 172 head->so_qlen++; 173 so->so_q = 0; 174 for (prev = &(head->so_q); *prev; ) 175 prev = &((*prev)->so_q); 176 } 177 *prev = so; 178 } 179 180 soqremque(so, q) 181 register struct socket *so; 182 int q; 183 { 184 register struct socket *head, *prev, *next; 185 186 head = so->so_head; 187 prev = head; 188 for (;;) { 189 next = q ? prev->so_q : prev->so_q0; 190 if (next == so) 191 break; 192 if (next == 0) 193 return (0); 194 prev = next; 195 } 196 if (q == 0) { 197 prev->so_q0 = next->so_q0; 198 head->so_q0len--; 199 } else { 200 prev->so_q = next->so_q; 201 head->so_qlen--; 202 } 203 next->so_q0 = next->so_q = 0; 204 next->so_head = 0; 205 return (1); 206 } 207 208 /* 209 * Socantsendmore indicates that no more data will be sent on the 210 * socket; it would normally be applied to a socket when the user 211 * informs the system that no more data is to be sent, by the protocol 212 * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data 213 * will be received, and will normally be applied to the socket by a 214 * protocol when it detects that the peer will send no more data. 215 * Data queued for reading in the socket may yet be read. 216 */ 217 218 socantsendmore(so) 219 struct socket *so; 220 { 221 222 so->so_state |= SS_CANTSENDMORE; 223 sowwakeup(so); 224 } 225 226 socantrcvmore(so) 227 struct socket *so; 228 { 229 230 so->so_state |= SS_CANTRCVMORE; 231 sorwakeup(so); 232 } 233 234 /* 235 * Socket select/wakeup routines. 236 */ 237 238 /* 239 * Queue a process for a select on a socket buffer. 240 */ 241 sbselqueue(sb) 242 struct sockbuf *sb; 243 { 244 struct proc *p; 245 246 if ((p = sb->sb_sel) && p->p_wchan == (caddr_t)&selwait) 247 sb->sb_flags |= SB_COLL; 248 else { 249 sb->sb_sel = u.u_procp; 250 sb->sb_flags |= SB_SEL; 251 } 252 } 253 254 /* 255 * Wait for data to arrive at/drain from a socket buffer. 256 */ 257 sbwait(sb) 258 struct sockbuf *sb; 259 { 260 261 sb->sb_flags |= SB_WAIT; 262 return (tsleep((caddr_t)&sb->sb_cc, 263 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, netio, 264 sb->sb_timeo)); 265 } 266 267 /* 268 * Lock a sockbuf already known to be locked; 269 * return any error returned from sleep (EINTR). 270 */ 271 sb_lock(sb) 272 register struct sockbuf *sb; 273 { 274 int error; 275 276 while (sb->sb_flags & SB_LOCK) { 277 sb->sb_flags |= SB_WANT; 278 if (error = tsleep((caddr_t)&sb->sb_flags, 279 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK|PCATCH, 280 netio, 0)) 281 return (error); 282 } 283 sb->sb_flags |= SB_LOCK; 284 return (0); 285 } 286 287 /* 288 * Wakeup processes waiting on a socket buffer. 289 * Do asynchronous notification via SIGIO 290 * if the socket has the SS_ASYNC flag set. 291 */ 292 sowakeup(so, sb) 293 register struct socket *so; 294 register struct sockbuf *sb; 295 { 296 struct proc *p; 297 298 if (sb->sb_sel) { 299 selwakeup(sb->sb_sel, sb->sb_flags & SB_COLL); 300 sb->sb_sel = 0; 301 sb->sb_flags &= ~(SB_SEL|SB_COLL); 302 } 303 if (sb->sb_flags & SB_WAIT) { 304 sb->sb_flags &= ~SB_WAIT; 305 wakeup((caddr_t)&sb->sb_cc); 306 } 307 if (so->so_state & SS_ASYNC) { 308 if (so->so_pgid < 0) 309 gsignal(-so->so_pgid, SIGIO); 310 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0) 311 psignal(p, SIGIO); 312 } 313 } 314 315 /* 316 * Socket buffer (struct sockbuf) utility routines. 317 * 318 * Each socket contains two socket buffers: one for sending data and 319 * one for receiving data. Each buffer contains a queue of mbufs, 320 * information about the number of mbufs and amount of data in the 321 * queue, and other fields allowing select() statements and notification 322 * on data availability to be implemented. 323 * 324 * Data stored in a socket buffer is maintained as a list of records. 325 * Each record is a list of mbufs chained together with the m_next 326 * field. Records are chained together with the m_nextpkt field. The upper 327 * level routine soreceive() expects the following conventions to be 328 * observed when placing information in the receive buffer: 329 * 330 * 1. If the protocol requires each message be preceded by the sender's 331 * name, then a record containing that name must be present before 332 * any associated data (mbuf's must be of type MT_SONAME). 333 * 2. If the protocol supports the exchange of ``access rights'' (really 334 * just additional data associated with the message), and there are 335 * ``rights'' to be received, then a record containing this data 336 * should be present (mbuf's must be of type MT_RIGHTS). 337 * 3. If a name or rights record exists, then it must be followed by 338 * a data record, perhaps of zero length. 339 * 340 * Before using a new socket structure it is first necessary to reserve 341 * buffer space to the socket, by calling sbreserve(). This should commit 342 * some of the available buffer space in the system buffer pool for the 343 * socket (currently, it does nothing but enforce limits). The space 344 * should be released by calling sbrelease() when the socket is destroyed. 345 */ 346 347 soreserve(so, sndcc, rcvcc) 348 register struct socket *so; 349 u_long sndcc, rcvcc; 350 { 351 352 if (sbreserve(&so->so_snd, sndcc) == 0) 353 goto bad; 354 if (sbreserve(&so->so_rcv, rcvcc) == 0) 355 goto bad2; 356 if (so->so_rcv.sb_lowat == 0) 357 so->so_rcv.sb_lowat = 1; 358 if (so->so_snd.sb_lowat == 0) 359 so->so_snd.sb_lowat = MCLBYTES; 360 if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) 361 so->so_snd.sb_lowat = so->so_snd.sb_hiwat; 362 return (0); 363 bad2: 364 sbrelease(&so->so_snd); 365 bad: 366 return (ENOBUFS); 367 } 368 369 /* 370 * Allot mbufs to a sockbuf. 371 * Attempt to scale mbmax so that mbcnt doesn't become limiting 372 * if buffering efficiency is near the normal case. 373 */ 374 sbreserve(sb, cc) 375 struct sockbuf *sb; 376 u_long cc; 377 { 378 379 if (cc > sb_max * MCLBYTES / (MSIZE + MCLBYTES)) 380 return (0); 381 sb->sb_hiwat = cc; 382 sb->sb_mbmax = min(cc * 2, sb_max); 383 if (sb->sb_lowat > sb->sb_hiwat) 384 sb->sb_lowat = sb->sb_hiwat; 385 return (1); 386 } 387 388 /* 389 * Free mbufs held by a socket, and reserved mbuf space. 390 */ 391 sbrelease(sb) 392 struct sockbuf *sb; 393 { 394 395 sbflush(sb); 396 sb->sb_hiwat = sb->sb_mbmax = 0; 397 } 398 399 /* 400 * Routines to add and remove 401 * data from an mbuf queue. 402 * 403 * The routines sbappend() or sbappendrecord() are normally called to 404 * append new mbufs to a socket buffer, after checking that adequate 405 * space is available, comparing the function sbspace() with the amount 406 * of data to be added. sbappendrecord() differs from sbappend() in 407 * that data supplied is treated as the beginning of a new record. 408 * To place a sender's address, optional access rights, and data in a 409 * socket receive buffer, sbappendaddr() should be used. To place 410 * access rights and data in a socket receive buffer, sbappendrights() 411 * should be used. In either case, the new data begins a new record. 412 * Note that unlike sbappend() and sbappendrecord(), these routines check 413 * for the caller that there will be enough space to store the data. 414 * Each fails if there is not enough space, or if it cannot find mbufs 415 * to store additional information in. 416 * 417 * Reliable protocols may use the socket send buffer to hold data 418 * awaiting acknowledgement. Data is normally copied from a socket 419 * send buffer in a protocol with m_copy for output to a peer, 420 * and then removing the data from the socket buffer with sbdrop() 421 * or sbdroprecord() when the data is acknowledged by the peer. 422 */ 423 424 /* 425 * Append mbuf chain m to the last record in the 426 * socket buffer sb. The additional space associated 427 * the mbuf chain is recorded in sb. Empty mbufs are 428 * discarded and mbufs are compacted where possible. 429 */ 430 sbappend(sb, m) 431 struct sockbuf *sb; 432 struct mbuf *m; 433 { 434 register struct mbuf *n, *n0; 435 436 if (m == 0) 437 return; 438 if (n = sb->sb_mb) { 439 while (n->m_nextpkt) 440 n = n->m_nextpkt; 441 } 442 sbcompress(sb, m, n); 443 } 444 445 #ifdef SOCKBUF_DEBUG 446 sbcheck(sb) 447 register struct sockbuf *sb; 448 { 449 register struct mbuf *m; 450 register int len = 0, mbcnt = 0; 451 452 for (m = sb->sb_mb; m; m = m->m_next) { 453 len += m->m_len; 454 mbcnt += MSIZE; 455 if (m->m_flags & M_EXT) 456 mbcnt += m->m_ext.ext_size; 457 if (m->m_nextpkt) 458 panic("sbcheck nextpkt"); 459 } 460 if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) { 461 printf("cc %d != %d || mbcnt %d != %d\n", len, sb->sb_cc, 462 mbcnt, sb->sb_mbcnt); 463 panic("sbcheck"); 464 } 465 } 466 #endif 467 468 /* 469 * As above, except the mbuf chain 470 * begins a new record. 471 */ 472 sbappendrecord(sb, m0) 473 register struct sockbuf *sb; 474 register struct mbuf *m0; 475 { 476 register struct mbuf *m; 477 478 if (m0 == 0) 479 return; 480 if (m = sb->sb_mb) 481 while (m->m_nextpkt) 482 m = m->m_nextpkt; 483 /* 484 * Put the first mbuf on the queue. 485 * Note this permits zero length records. 486 */ 487 sballoc(sb, m0); 488 if (m) 489 m->m_nextpkt = m0; 490 else 491 sb->sb_mb = m0; 492 m = m0->m_next; 493 m0->m_next = 0; 494 sbcompress(sb, m, m0); 495 } 496 497 /* 498 * As above except that OOB data 499 * is inserted at the beginning of the sockbuf, 500 * but after any other OOB data. 501 */ 502 sbinsertoob(sb, m0) 503 register struct sockbuf *sb; 504 register struct mbuf *m0; 505 { 506 register struct mbuf *m; 507 register struct mbuf **mp; 508 509 if (m0 == 0) 510 return; 511 for (mp = &sb->sb_mb; m = *mp; mp = &((*mp)->m_nextpkt)) { 512 again: 513 switch (m->m_type) { 514 515 case MT_OOBDATA: 516 continue; /* WANT next train */ 517 518 case MT_CONTROL: 519 if (m = m->m_next) 520 goto again; /* inspect THIS train further */ 521 } 522 break; 523 } 524 /* 525 * Put the first mbuf on the queue. 526 * Note this permits zero length records. 527 */ 528 m0->m_nextpkt = *mp; 529 *mp = m0; 530 for (m = m0; m; m = m->m_next) 531 sballoc(sb, m); 532 } 533 534 /* 535 * Append address and data, and optionally, control (ancillary) data 536 * to the receive queue of a socket. If present, 537 * m0 must include a packet header with total length. 538 * Returns 0 if no space in sockbuf or insufficient mbufs. 539 */ 540 sbappendaddr(sb, asa, m0, control) 541 register struct sockbuf *sb; 542 struct sockaddr *asa; 543 struct mbuf *m0, *control; 544 { 545 register struct mbuf *m, *n; 546 int space = asa->sa_len, eor = 0; 547 548 if (m0 && (m0->m_flags & M_PKTHDR) == 0) 549 panic("sbappendaddr"); 550 if (m0) 551 space += m0->m_pkthdr.len; 552 for (n = control; n; n = n->m_next) { 553 space += n->m_len; 554 if (n->m_next == 0) /* keep pointer to last control buf */ 555 break; 556 } 557 if (space > sbspace(sb)) 558 return (0); 559 if (asa->sa_len > MLEN) 560 return (0); 561 MGET(m, M_DONTWAIT, MT_SONAME); 562 if (m == 0) 563 return (0); 564 m->m_len = asa->sa_len; 565 bcopy((caddr_t)asa, mtod(m, caddr_t), asa->sa_len); 566 if (n) 567 n->m_next = m0; /* concatenate data to control */ 568 else 569 control = m0; 570 m->m_next = control; 571 for (n = m; n; n = n->m_next) { 572 eor |= n->m_flags & M_EOR; 573 sballoc(sb, n); 574 } 575 m->m_flags |= eor; 576 if (n = sb->sb_mb) { 577 while (n->m_nextpkt) 578 n = n->m_nextpkt; 579 n->m_nextpkt = m; 580 } else 581 sb->sb_mb = m; 582 return (1); 583 } 584 585 sbappendcontrol(sb, m0, control) 586 struct sockbuf *sb; 587 struct mbuf *control, *m0; 588 { 589 register struct mbuf *m, *n; 590 int space = 0, eor = 0; 591 592 if (control == 0) 593 panic("sbappendcontrol"); 594 for (m = control; ; m = m->m_next) { 595 space += m->m_len; 596 if (m->m_next == 0) 597 break; 598 } 599 n = m; /* save pointer to last control buffer */ 600 for (m = m0; m; m = m->m_next) 601 space += m->m_len; 602 if (space > sbspace(sb)) 603 return (0); 604 n->m_next = m0; /* concatenate data to control */ 605 for (m = control; m; m = m->m_next) { 606 eor |= m->m_flags & M_EOR; 607 sballoc(sb, m); 608 } 609 control->m_flags |= eor; 610 if (n = sb->sb_mb) { 611 while (n->m_nextpkt) 612 n = n->m_nextpkt; 613 n->m_nextpkt = control; 614 } else 615 sb->sb_mb = control; 616 return (1); 617 } 618 619 /* 620 * Compress mbuf chain m into the socket 621 * buffer sb following mbuf n. If n 622 * is null, the buffer is presumed empty. 623 */ 624 sbcompress(sb, m, n0) 625 register struct sockbuf *sb; 626 register struct mbuf *m; 627 struct mbuf *n0; 628 { 629 register struct mbuf *n = n0; 630 register int eor = 0; 631 632 if (n) { 633 if (n->m_flags & M_EOR) 634 n = 0; 635 else while (n->m_next) 636 n = n->m_next; 637 } 638 while (m) { 639 eor |= m->m_flags & M_EOR; 640 if (m->m_len == 0) { 641 if (eor == 0 || m->m_next || n) { 642 m = m_free(m); 643 continue; 644 } 645 } 646 if (n && (n->m_flags & M_EXT) == 0 && 647 (n->m_data + n->m_len + m->m_len) < &n->m_dat[MLEN] && 648 n->m_type == m->m_type) { 649 bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len, 650 (unsigned)m->m_len); 651 n->m_len += m->m_len; 652 sb->sb_cc += m->m_len; 653 m = m_free(m); 654 continue; 655 } 656 if (n == 0) { 657 if (n0) 658 n0->m_nextpkt = m; 659 else 660 sb->sb_mb = m; 661 n0 = m; 662 } else 663 n->m_next = m; 664 sballoc(sb, m); 665 n = m; 666 /*m->m_flags &= ~M_EOR;*/ 667 m = m->m_next; 668 n->m_next = 0; 669 } 670 if (eor) { 671 if (n0) 672 n0->m_flags |= eor; 673 else 674 panic("sbcompress"); 675 } 676 } 677 678 /* 679 * Free all mbufs in a sockbuf. 680 * Check that all resources are reclaimed. 681 */ 682 sbflush(sb) 683 register struct sockbuf *sb; 684 { 685 686 if (sb->sb_flags & SB_LOCK) 687 panic("sbflush"); 688 while (sb->sb_mbcnt) 689 sbdrop(sb, (int)sb->sb_cc); 690 if (sb->sb_cc || sb->sb_mb) 691 panic("sbflush 2"); 692 } 693 694 /* 695 * Drop data from (the front of) a sockbuf. 696 */ 697 sbdrop(sb, len) 698 register struct sockbuf *sb; 699 register int len; 700 { 701 register struct mbuf *m, *mn; 702 struct mbuf *next; 703 704 next = (m = sb->sb_mb) ? m->m_nextpkt : 0; 705 while (len > 0) { 706 if (m == 0) { 707 if (next == 0) 708 panic("sbdrop"); 709 m = next; 710 next = m->m_nextpkt; 711 continue; 712 } 713 if (m->m_len > len) { 714 m->m_len -= len; 715 m->m_data += len; 716 sb->sb_cc -= len; 717 break; 718 } 719 len -= m->m_len; 720 sbfree(sb, m); 721 MFREE(m, mn); 722 m = mn; 723 } 724 while (m && m->m_len == 0) { 725 sbfree(sb, m); 726 MFREE(m, mn); 727 m = mn; 728 } 729 if (m) { 730 sb->sb_mb = m; 731 m->m_nextpkt = next; 732 } else 733 sb->sb_mb = next; 734 } 735 736 /* 737 * Drop a record off the front of a sockbuf 738 * and move the next record to the front. 739 */ 740 sbdroprecord(sb) 741 register struct sockbuf *sb; 742 { 743 register struct mbuf *m, *mn; 744 745 m = sb->sb_mb; 746 if (m) { 747 sb->sb_mb = m->m_nextpkt; 748 do { 749 sbfree(sb, m); 750 MFREE(m, mn); 751 } while (m = mn); 752 } 753 } 754