1 /* 2 * Copyright (c) 1982, 1986, 1988, 1990 Regents of the University of California. 3 * All rights reserved. 4 * 5 * %sccs.include.redist.c% 6 * 7 * @(#)uipc_socket2.c 7.16 (Berkeley) 02/19/91 8 */ 9 10 #include "param.h" 11 #include "systm.h" 12 #include "user.h" 13 #include "proc.h" 14 #include "file.h" 15 #include "buf.h" 16 #include "malloc.h" 17 #include "mbuf.h" 18 #include "protosw.h" 19 #include "socket.h" 20 #include "socketvar.h" 21 22 /* 23 * Primitive routines for operating on sockets and socket buffers 24 */ 25 26 /* strings for sleep message: */ 27 char netio[] = "netio"; 28 char netcon[] = "netcon"; 29 char netcls[] = "netcls"; 30 31 u_long sb_max = SB_MAX; /* patchable */ 32 33 /* 34 * Procedures to manipulate state flags of socket 35 * and do appropriate wakeups. Normal sequence from the 36 * active (originating) side is that soisconnecting() is 37 * called during processing of connect() call, 38 * resulting in an eventual call to soisconnected() if/when the 39 * connection is established. When the connection is torn down 40 * soisdisconnecting() is called during processing of disconnect() call, 41 * and soisdisconnected() is called when the connection to the peer 42 * is totally severed. The semantics of these routines are such that 43 * connectionless protocols can call soisconnected() and soisdisconnected() 44 * only, bypassing the in-progress calls when setting up a ``connection'' 45 * takes no time. 46 * 47 * From the passive side, a socket is created with 48 * two queues of sockets: so_q0 for connections in progress 49 * and so_q for connections already made and awaiting user acceptance. 50 * As a protocol is preparing incoming connections, it creates a socket 51 * structure queued on so_q0 by calling sonewconn(). When the connection 52 * is established, soisconnected() is called, and transfers the 53 * socket structure to so_q, making it available to accept(). 54 * 55 * If a socket is closed with sockets on either 56 * so_q0 or so_q, these sockets are dropped. 57 * 58 * If higher level protocols are implemented in 59 * the kernel, the wakeups done here will sometimes 60 * cause software-interrupt process scheduling. 61 */ 62 63 soisconnecting(so) 64 register struct socket *so; 65 { 66 67 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); 68 so->so_state |= SS_ISCONNECTING; 69 } 70 71 soisconnected(so) 72 register struct socket *so; 73 { 74 register struct socket *head = so->so_head; 75 76 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING); 77 so->so_state |= SS_ISCONNECTED; 78 if (head && soqremque(so, 0)) { 79 soqinsque(head, so, 1); 80 sorwakeup(head); 81 wakeup((caddr_t)&head->so_timeo); 82 } else { 83 wakeup((caddr_t)&so->so_timeo); 84 sorwakeup(so); 85 sowwakeup(so); 86 } 87 } 88 89 soisdisconnecting(so) 90 register struct socket *so; 91 { 92 93 so->so_state &= ~SS_ISCONNECTING; 94 so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE); 95 wakeup((caddr_t)&so->so_timeo); 96 sowwakeup(so); 97 sorwakeup(so); 98 } 99 100 soisdisconnected(so) 101 register struct socket *so; 102 { 103 104 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 105 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE); 106 wakeup((caddr_t)&so->so_timeo); 107 sowwakeup(so); 108 sorwakeup(so); 109 } 110 111 /* 112 * When an attempt at a new connection is noted on a socket 113 * which accepts connections, sonewconn is called. If the 114 * connection is possible (subject to space constraints, etc.) 115 * then we allocate a new structure, propoerly linked into the 116 * data structure of the original socket, and return this. 117 * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED. 118 * 119 * Currently, sonewconn() is defined as sonewconn1() in socketvar.h 120 * to catch calls that are missing the (new) second parameter. 121 */ 122 struct socket * 123 sonewconn1(head, connstatus) 124 register struct socket *head; 125 int connstatus; 126 { 127 register struct socket *so; 128 int soqueue = connstatus ? 1 : 0; 129 130 if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2) 131 return ((struct socket *)0); 132 MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_DONTWAIT); 133 if (so == NULL) 134 return ((struct socket *)0); 135 bzero((caddr_t)so, sizeof(*so)); 136 so->so_type = head->so_type; 137 so->so_options = head->so_options &~ SO_ACCEPTCONN; 138 so->so_linger = head->so_linger; 139 so->so_state = head->so_state | SS_NOFDREF; 140 so->so_proto = head->so_proto; 141 so->so_timeo = head->so_timeo; 142 so->so_pgid = head->so_pgid; 143 (void) soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat); 144 soqinsque(head, so, soqueue); 145 if ((*so->so_proto->pr_usrreq)(so, PRU_ATTACH, 146 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0)) { 147 (void) soqremque(so, soqueue); 148 (void) free((caddr_t)so, M_SOCKET); 149 return ((struct socket *)0); 150 } 151 if (connstatus) { 152 sorwakeup(head); 153 wakeup((caddr_t)&head->so_timeo); 154 so->so_state |= connstatus; 155 } 156 return (so); 157 } 158 159 soqinsque(head, so, q) 160 register struct socket *head, *so; 161 int q; 162 { 163 164 register struct socket **prev; 165 so->so_head = head; 166 if (q == 0) { 167 head->so_q0len++; 168 so->so_q0 = 0; 169 for (prev = &(head->so_q0); *prev; ) 170 prev = &((*prev)->so_q0); 171 } else { 172 head->so_qlen++; 173 so->so_q = 0; 174 for (prev = &(head->so_q); *prev; ) 175 prev = &((*prev)->so_q); 176 } 177 *prev = so; 178 } 179 180 soqremque(so, q) 181 register struct socket *so; 182 int q; 183 { 184 register struct socket *head, *prev, *next; 185 186 head = so->so_head; 187 prev = head; 188 for (;;) { 189 next = q ? prev->so_q : prev->so_q0; 190 if (next == so) 191 break; 192 if (next == 0) 193 return (0); 194 prev = next; 195 } 196 if (q == 0) { 197 prev->so_q0 = next->so_q0; 198 head->so_q0len--; 199 } else { 200 prev->so_q = next->so_q; 201 head->so_qlen--; 202 } 203 next->so_q0 = next->so_q = 0; 204 next->so_head = 0; 205 return (1); 206 } 207 208 /* 209 * Socantsendmore indicates that no more data will be sent on the 210 * socket; it would normally be applied to a socket when the user 211 * informs the system that no more data is to be sent, by the protocol 212 * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data 213 * will be received, and will normally be applied to the socket by a 214 * protocol when it detects that the peer will send no more data. 215 * Data queued for reading in the socket may yet be read. 216 */ 217 218 socantsendmore(so) 219 struct socket *so; 220 { 221 222 so->so_state |= SS_CANTSENDMORE; 223 sowwakeup(so); 224 } 225 226 socantrcvmore(so) 227 struct socket *so; 228 { 229 230 so->so_state |= SS_CANTRCVMORE; 231 sorwakeup(so); 232 } 233 234 /* 235 * Socket select/wakeup routines. 236 */ 237 238 /* 239 * Queue a process for a select on a socket buffer. 240 */ 241 sbselqueue(sb) 242 struct sockbuf *sb; 243 { 244 struct proc *p; 245 246 if ((p = sb->sb_sel) && p->p_wchan == (caddr_t)&selwait) 247 sb->sb_flags |= SB_COLL; 248 else { 249 sb->sb_sel = u.u_procp; 250 sb->sb_flags |= SB_SEL; 251 } 252 } 253 254 /* 255 * Wait for data to arrive at/drain from a socket buffer. 256 */ 257 sbwait(sb) 258 struct sockbuf *sb; 259 { 260 261 sb->sb_flags |= SB_WAIT; 262 return (tsleep((caddr_t)&sb->sb_cc, 263 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, netio, 264 sb->sb_timeo)); 265 } 266 267 /* 268 * Lock a sockbuf already known to be locked; 269 * return any error returned from sleep (EINTR). 270 */ 271 sb_lock(sb) 272 register struct sockbuf *sb; 273 { 274 int error; 275 276 while (sb->sb_flags & SB_LOCK) { 277 sb->sb_flags |= SB_WANT; 278 if (error = tsleep((caddr_t)&sb->sb_flags, 279 (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK|PCATCH, 280 netio, 0)) 281 return (error); 282 } 283 sb->sb_flags |= SB_LOCK; 284 return (0); 285 } 286 287 /* 288 * Wakeup processes waiting on a socket buffer. 289 * Do asynchronous notification via SIGIO 290 * if the socket has the SS_ASYNC flag set. 291 */ 292 sowakeup(so, sb) 293 register struct socket *so; 294 register struct sockbuf *sb; 295 { 296 struct proc *p; 297 298 if (sb->sb_sel) { 299 selwakeup(sb->sb_sel, sb->sb_flags & SB_COLL); 300 sb->sb_sel = 0; 301 sb->sb_flags &= ~(SB_SEL|SB_COLL); 302 } 303 if (sb->sb_flags & SB_WAIT) { 304 sb->sb_flags &= ~SB_WAIT; 305 wakeup((caddr_t)&sb->sb_cc); 306 } 307 if (so->so_state & SS_ASYNC) { 308 if (so->so_pgid < 0) 309 gsignal(-so->so_pgid, SIGIO); 310 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0) 311 psignal(p, SIGIO); 312 } 313 } 314 315 /* 316 * Socket buffer (struct sockbuf) utility routines. 317 * 318 * Each socket contains two socket buffers: one for sending data and 319 * one for receiving data. Each buffer contains a queue of mbufs, 320 * information about the number of mbufs and amount of data in the 321 * queue, and other fields allowing select() statements and notification 322 * on data availability to be implemented. 323 * 324 * Data stored in a socket buffer is maintained as a list of records. 325 * Each record is a list of mbufs chained together with the m_next 326 * field. Records are chained together with the m_nextpkt field. The upper 327 * level routine soreceive() expects the following conventions to be 328 * observed when placing information in the receive buffer: 329 * 330 * 1. If the protocol requires each message be preceded by the sender's 331 * name, then a record containing that name must be present before 332 * any associated data (mbuf's must be of type MT_SONAME). 333 * 2. If the protocol supports the exchange of ``access rights'' (really 334 * just additional data associated with the message), and there are 335 * ``rights'' to be received, then a record containing this data 336 * should be present (mbuf's must be of type MT_RIGHTS). 337 * 3. If a name or rights record exists, then it must be followed by 338 * a data record, perhaps of zero length. 339 * 340 * Before using a new socket structure it is first necessary to reserve 341 * buffer space to the socket, by calling sbreserve(). This should commit 342 * some of the available buffer space in the system buffer pool for the 343 * socket (currently, it does nothing but enforce limits). The space 344 * should be released by calling sbrelease() when the socket is destroyed. 345 */ 346 347 soreserve(so, sndcc, rcvcc) 348 register struct socket *so; 349 u_long sndcc, rcvcc; 350 { 351 352 if (sbreserve(&so->so_snd, sndcc) == 0) 353 goto bad; 354 if (sbreserve(&so->so_rcv, rcvcc) == 0) 355 goto bad2; 356 if (so->so_rcv.sb_lowat == 0) 357 so->so_rcv.sb_lowat = 1; 358 if (so->so_snd.sb_lowat == 0) 359 so->so_snd.sb_lowat = MCLBYTES; 360 if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat) 361 so->so_snd.sb_lowat = so->so_snd.sb_hiwat; 362 return (0); 363 bad2: 364 sbrelease(&so->so_snd); 365 bad: 366 return (ENOBUFS); 367 } 368 369 /* 370 * Allot mbufs to a sockbuf. 371 * Attempt to scale mbmax so that mbcnt doesn't become limiting 372 * if buffering efficiency is near the normal case. 373 */ 374 sbreserve(sb, cc) 375 struct sockbuf *sb; 376 u_long cc; 377 { 378 379 if (cc > sb_max * MCLBYTES / (MSIZE + MCLBYTES)) 380 return (0); 381 sb->sb_hiwat = cc; 382 sb->sb_mbmax = min(cc * 2, sb_max); 383 if (sb->sb_lowat > sb->sb_hiwat) 384 sb->sb_lowat = sb->sb_hiwat; 385 return (1); 386 } 387 388 /* 389 * Free mbufs held by a socket, and reserved mbuf space. 390 */ 391 sbrelease(sb) 392 struct sockbuf *sb; 393 { 394 395 sbflush(sb); 396 sb->sb_hiwat = sb->sb_mbmax = 0; 397 } 398 399 /* 400 * Routines to add and remove 401 * data from an mbuf queue. 402 * 403 * The routines sbappend() or sbappendrecord() are normally called to 404 * append new mbufs to a socket buffer, after checking that adequate 405 * space is available, comparing the function sbspace() with the amount 406 * of data to be added. sbappendrecord() differs from sbappend() in 407 * that data supplied is treated as the beginning of a new record. 408 * To place a sender's address, optional access rights, and data in a 409 * socket receive buffer, sbappendaddr() should be used. To place 410 * access rights and data in a socket receive buffer, sbappendrights() 411 * should be used. In either case, the new data begins a new record. 412 * Note that unlike sbappend() and sbappendrecord(), these routines check 413 * for the caller that there will be enough space to store the data. 414 * Each fails if there is not enough space, or if it cannot find mbufs 415 * to store additional information in. 416 * 417 * Reliable protocols may use the socket send buffer to hold data 418 * awaiting acknowledgement. Data is normally copied from a socket 419 * send buffer in a protocol with m_copy for output to a peer, 420 * and then removing the data from the socket buffer with sbdrop() 421 * or sbdroprecord() when the data is acknowledged by the peer. 422 */ 423 424 /* 425 * Append mbuf chain m to the last record in the 426 * socket buffer sb. The additional space associated 427 * the mbuf chain is recorded in sb. Empty mbufs are 428 * discarded and mbufs are compacted where possible. 429 */ 430 sbappend(sb, m) 431 struct sockbuf *sb; 432 struct mbuf *m; 433 { 434 register struct mbuf *n; 435 436 if (m == 0) 437 return; 438 if (n = sb->sb_mb) { 439 while (n->m_nextpkt) 440 n = n->m_nextpkt; 441 do { 442 if (n->m_flags & M_EOR) { 443 sbappendrecord(sb, m); /* XXXXXX!!!! */ 444 return; 445 } 446 } while (n->m_next && (n = n->m_next)); 447 } 448 sbcompress(sb, m, n); 449 } 450 451 #ifdef SOCKBUF_DEBUG 452 sbcheck(sb) 453 register struct sockbuf *sb; 454 { 455 register struct mbuf *m; 456 register int len = 0, mbcnt = 0; 457 458 for (m = sb->sb_mb; m; m = m->m_next) { 459 len += m->m_len; 460 mbcnt += MSIZE; 461 if (m->m_flags & M_EXT) 462 mbcnt += m->m_ext.ext_size; 463 if (m->m_nextpkt) 464 panic("sbcheck nextpkt"); 465 } 466 if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) { 467 printf("cc %d != %d || mbcnt %d != %d\n", len, sb->sb_cc, 468 mbcnt, sb->sb_mbcnt); 469 panic("sbcheck"); 470 } 471 } 472 #endif 473 474 /* 475 * As above, except the mbuf chain 476 * begins a new record. 477 */ 478 sbappendrecord(sb, m0) 479 register struct sockbuf *sb; 480 register struct mbuf *m0; 481 { 482 register struct mbuf *m; 483 484 if (m0 == 0) 485 return; 486 if (m = sb->sb_mb) 487 while (m->m_nextpkt) 488 m = m->m_nextpkt; 489 /* 490 * Put the first mbuf on the queue. 491 * Note this permits zero length records. 492 */ 493 sballoc(sb, m0); 494 if (m) 495 m->m_nextpkt = m0; 496 else 497 sb->sb_mb = m0; 498 m = m0->m_next; 499 m0->m_next = 0; 500 if (m && (m0->m_flags & M_EOR)) { 501 m0->m_flags &= ~M_EOR; 502 m->m_flags |= M_EOR; 503 } 504 sbcompress(sb, m, m0); 505 } 506 507 /* 508 * As above except that OOB data 509 * is inserted at the beginning of the sockbuf, 510 * but after any other OOB data. 511 */ 512 sbinsertoob(sb, m0) 513 register struct sockbuf *sb; 514 register struct mbuf *m0; 515 { 516 register struct mbuf *m; 517 register struct mbuf **mp; 518 519 if (m0 == 0) 520 return; 521 for (mp = &sb->sb_mb; m = *mp; mp = &((*mp)->m_nextpkt)) { 522 again: 523 switch (m->m_type) { 524 525 case MT_OOBDATA: 526 continue; /* WANT next train */ 527 528 case MT_CONTROL: 529 if (m = m->m_next) 530 goto again; /* inspect THIS train further */ 531 } 532 break; 533 } 534 /* 535 * Put the first mbuf on the queue. 536 * Note this permits zero length records. 537 */ 538 sballoc(sb, m0); 539 m0->m_nextpkt = *mp; 540 *mp = m0; 541 m = m0->m_next; 542 m0->m_next = 0; 543 if (m && (m0->m_flags & M_EOR)) { 544 m0->m_flags &= ~M_EOR; 545 m->m_flags |= M_EOR; 546 } 547 sbcompress(sb, m, m0); 548 } 549 550 /* 551 * Append address and data, and optionally, control (ancillary) data 552 * to the receive queue of a socket. If present, 553 * m0 must include a packet header with total length. 554 * Returns 0 if no space in sockbuf or insufficient mbufs. 555 */ 556 sbappendaddr(sb, asa, m0, control) 557 register struct sockbuf *sb; 558 struct sockaddr *asa; 559 struct mbuf *m0, *control; 560 { 561 register struct mbuf *m, *n; 562 int space = asa->sa_len; 563 564 if (m0 && (m0->m_flags & M_PKTHDR) == 0) 565 panic("sbappendaddr"); 566 if (m0) 567 space += m0->m_pkthdr.len; 568 for (n = control; n; n = n->m_next) { 569 space += n->m_len; 570 if (n->m_next == 0) /* keep pointer to last control buf */ 571 break; 572 } 573 if (space > sbspace(sb)) 574 return (0); 575 if (asa->sa_len > MLEN) 576 return (0); 577 MGET(m, M_DONTWAIT, MT_SONAME); 578 if (m == 0) 579 return (0); 580 m->m_len = asa->sa_len; 581 bcopy((caddr_t)asa, mtod(m, caddr_t), asa->sa_len); 582 if (n) 583 n->m_next = m0; /* concatenate data to control */ 584 else 585 control = m0; 586 m->m_next = control; 587 for (n = m; n; n = n->m_next) 588 sballoc(sb, n); 589 if (n = sb->sb_mb) { 590 while (n->m_nextpkt) 591 n = n->m_nextpkt; 592 n->m_nextpkt = m; 593 } else 594 sb->sb_mb = m; 595 return (1); 596 } 597 598 sbappendcontrol(sb, m0, control) 599 struct sockbuf *sb; 600 struct mbuf *control, *m0; 601 { 602 register struct mbuf *m, *n; 603 int space = 0; 604 605 if (control == 0) 606 panic("sbappendcontrol"); 607 for (m = control; ; m = m->m_next) { 608 space += m->m_len; 609 if (m->m_next == 0) 610 break; 611 } 612 n = m; /* save pointer to last control buffer */ 613 for (m = m0; m; m = m->m_next) 614 space += m->m_len; 615 if (space > sbspace(sb)) 616 return (0); 617 n->m_next = m0; /* concatenate data to control */ 618 for (m = control; m; m = m->m_next) 619 sballoc(sb, m); 620 if (n = sb->sb_mb) { 621 while (n->m_nextpkt) 622 n = n->m_nextpkt; 623 n->m_nextpkt = control; 624 } else 625 sb->sb_mb = control; 626 return (1); 627 } 628 629 /* 630 * Compress mbuf chain m into the socket 631 * buffer sb following mbuf n. If n 632 * is null, the buffer is presumed empty. 633 */ 634 sbcompress(sb, m, n) 635 register struct sockbuf *sb; 636 register struct mbuf *m, *n; 637 { 638 register int eor = 0; 639 register struct mbuf *o; 640 641 while (m) { 642 eor |= m->m_flags & M_EOR; 643 if (m->m_len == 0 && 644 (eor == 0 || 645 (((o = m->m_next) || (o = n)) && 646 o->m_type == m->m_type))) { 647 m = m_free(m); 648 continue; 649 } 650 if (n && (n->m_flags & (M_EXT | M_EOR)) == 0 && 651 (n->m_data + n->m_len + m->m_len) < &n->m_dat[MLEN] && 652 n->m_type == m->m_type) { 653 bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len, 654 (unsigned)m->m_len); 655 n->m_len += m->m_len; 656 sb->sb_cc += m->m_len; 657 m = m_free(m); 658 continue; 659 } 660 if (n) 661 n->m_next = m; 662 else 663 sb->sb_mb = m; 664 sballoc(sb, m); 665 n = m; 666 m->m_flags &= ~M_EOR; 667 m = m->m_next; 668 n->m_next = 0; 669 } 670 if (eor) { 671 if (n) 672 n->m_flags |= eor; 673 else 674 printf("semi-panic: sbcompress\n"); 675 } 676 } 677 678 /* 679 * Free all mbufs in a sockbuf. 680 * Check that all resources are reclaimed. 681 */ 682 sbflush(sb) 683 register struct sockbuf *sb; 684 { 685 686 if (sb->sb_flags & SB_LOCK) 687 panic("sbflush"); 688 while (sb->sb_mbcnt) 689 sbdrop(sb, (int)sb->sb_cc); 690 if (sb->sb_cc || sb->sb_mb) 691 panic("sbflush 2"); 692 } 693 694 /* 695 * Drop data from (the front of) a sockbuf. 696 */ 697 sbdrop(sb, len) 698 register struct sockbuf *sb; 699 register int len; 700 { 701 register struct mbuf *m, *mn; 702 struct mbuf *next; 703 704 next = (m = sb->sb_mb) ? m->m_nextpkt : 0; 705 while (len > 0) { 706 if (m == 0) { 707 if (next == 0) 708 panic("sbdrop"); 709 m = next; 710 next = m->m_nextpkt; 711 continue; 712 } 713 if (m->m_len > len) { 714 m->m_len -= len; 715 m->m_data += len; 716 sb->sb_cc -= len; 717 break; 718 } 719 len -= m->m_len; 720 sbfree(sb, m); 721 MFREE(m, mn); 722 m = mn; 723 } 724 while (m && m->m_len == 0) { 725 sbfree(sb, m); 726 MFREE(m, mn); 727 m = mn; 728 } 729 if (m) { 730 sb->sb_mb = m; 731 m->m_nextpkt = next; 732 } else 733 sb->sb_mb = next; 734 } 735 736 /* 737 * Drop a record off the front of a sockbuf 738 * and move the next record to the front. 739 */ 740 sbdroprecord(sb) 741 register struct sockbuf *sb; 742 { 743 register struct mbuf *m, *mn; 744 745 m = sb->sb_mb; 746 if (m) { 747 sb->sb_mb = m->m_nextpkt; 748 do { 749 sbfree(sb, m); 750 MFREE(m, mn); 751 } while (m = mn); 752 } 753 } 754