1 /* 2 * Copyright (c) 1982, 1986, 1988 Regents of the University of California. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms are permitted 6 * provided that the above copyright notice and this paragraph are 7 * duplicated in all such forms and that any documentation, 8 * advertising materials, and other materials related to such 9 * distribution and use acknowledge that the software was developed 10 * by the University of California, Berkeley. The name of the 11 * University may not be used to endorse or promote products derived 12 * from this software without specific prior written permission. 13 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 14 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 15 * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. 16 * 17 * @(#)uipc_socket2.c 7.8 (Berkeley) 04/08/89 18 */ 19 20 #include "param.h" 21 #include "systm.h" 22 #include "dir.h" 23 #include "user.h" 24 #include "proc.h" 25 #include "file.h" 26 #include "inode.h" 27 #include "buf.h" 28 #include "malloc.h" 29 #include "mbuf.h" 30 #include "protosw.h" 31 #include "socket.h" 32 #include "socketvar.h" 33 34 /* 35 * Primitive routines for operating on sockets and socket buffers 36 */ 37 38 /* 39 * Procedures to manipulate state flags of socket 40 * and do appropriate wakeups. Normal sequence from the 41 * active (originating) side is that soisconnecting() is 42 * called during processing of connect() call, 43 * resulting in an eventual call to soisconnected() if/when the 44 * connection is established. When the connection is torn down 45 * soisdisconnecting() is called during processing of disconnect() call, 46 * and soisdisconnected() is called when the connection to the peer 47 * is totally severed. The semantics of these routines are such that 48 * connectionless protocols can call soisconnected() and soisdisconnected() 49 * only, bypassing the in-progress calls when setting up a ``connection'' 50 * takes no time. 51 * 52 * From the passive side, a socket is created with 53 * two queues of sockets: so_q0 for connections in progress 54 * and so_q for connections already made and awaiting user acceptance. 55 * As a protocol is preparing incoming connections, it creates a socket 56 * structure queued on so_q0 by calling sonewconn(). When the connection 57 * is established, soisconnected() is called, and transfers the 58 * socket structure to so_q, making it available to accept(). 59 * 60 * If a socket is closed with sockets on either 61 * so_q0 or so_q, these sockets are dropped. 62 * 63 * If higher level protocols are implemented in 64 * the kernel, the wakeups done here will sometimes 65 * cause software-interrupt process scheduling. 66 */ 67 68 soisconnecting(so) 69 register struct socket *so; 70 { 71 72 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); 73 so->so_state |= SS_ISCONNECTING; 74 wakeup((caddr_t)&so->so_timeo); 75 } 76 77 soisconnected(so) 78 register struct socket *so; 79 { 80 register struct socket *head = so->so_head; 81 82 if (head) { 83 if (soqremque(so, 0) == 0) 84 panic("soisconnected"); 85 soqinsque(head, so, 1); 86 sorwakeup(head); 87 wakeup((caddr_t)&head->so_timeo); 88 } 89 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING); 90 so->so_state |= SS_ISCONNECTED; 91 wakeup((caddr_t)&so->so_timeo); 92 sorwakeup(so); 93 sowwakeup(so); 94 } 95 96 soisdisconnecting(so) 97 register struct socket *so; 98 { 99 100 so->so_state &= ~SS_ISCONNECTING; 101 so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE); 102 wakeup((caddr_t)&so->so_timeo); 103 sowwakeup(so); 104 sorwakeup(so); 105 } 106 107 soisdisconnected(so) 108 register struct socket *so; 109 { 110 111 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 112 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE); 113 wakeup((caddr_t)&so->so_timeo); 114 sowwakeup(so); 115 sorwakeup(so); 116 } 117 118 /* 119 * When an attempt at a new connection is noted on a socket 120 * which accepts connections, sonewconn is called. If the 121 * connection is possible (subject to space constraints, etc.) 122 * then we allocate a new structure, propoerly linked into the 123 * data structure of the original socket, and return this. 124 */ 125 struct socket * 126 sonewconn(head) 127 register struct socket *head; 128 { 129 register struct socket *so; 130 131 if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2) 132 return ((struct socket *)0); 133 MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_DONTWAIT); 134 if (so == NULL) 135 return ((struct socket *)0); 136 bzero((caddr_t)so, sizeof(*so)); 137 so->so_type = head->so_type; 138 so->so_options = head->so_options &~ SO_ACCEPTCONN; 139 so->so_linger = head->so_linger; 140 so->so_state = head->so_state | SS_NOFDREF; 141 so->so_proto = head->so_proto; 142 so->so_timeo = head->so_timeo; 143 so->so_pgid = head->so_pgid; 144 (void) soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat); 145 soqinsque(head, so, 0); 146 if ((*so->so_proto->pr_usrreq)(so, PRU_ATTACH, 147 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0)) { 148 (void) soqremque(so, 0); 149 (void) free((caddr_t)so, M_SOCKET); 150 return ((struct socket *)0); 151 } 152 return (so); 153 } 154 155 soqinsque(head, so, q) 156 register struct socket *head, *so; 157 int q; 158 { 159 160 so->so_head = head; 161 if (q == 0) { 162 head->so_q0len++; 163 so->so_q0 = head->so_q0; 164 head->so_q0 = so; 165 } else { 166 head->so_qlen++; 167 so->so_q = head->so_q; 168 head->so_q = so; 169 } 170 } 171 172 soqremque(so, q) 173 register struct socket *so; 174 int q; 175 { 176 register struct socket *head, *prev, *next; 177 178 head = so->so_head; 179 prev = head; 180 for (;;) { 181 next = q ? prev->so_q : prev->so_q0; 182 if (next == so) 183 break; 184 if (next == head) 185 return (0); 186 prev = next; 187 } 188 if (q == 0) { 189 prev->so_q0 = next->so_q0; 190 head->so_q0len--; 191 } else { 192 prev->so_q = next->so_q; 193 head->so_qlen--; 194 } 195 next->so_q0 = next->so_q = 0; 196 next->so_head = 0; 197 return (1); 198 } 199 200 /* 201 * Socantsendmore indicates that no more data will be sent on the 202 * socket; it would normally be applied to a socket when the user 203 * informs the system that no more data is to be sent, by the protocol 204 * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data 205 * will be received, and will normally be applied to the socket by a 206 * protocol when it detects that the peer will send no more data. 207 * Data queued for reading in the socket may yet be read. 208 */ 209 210 socantsendmore(so) 211 struct socket *so; 212 { 213 214 so->so_state |= SS_CANTSENDMORE; 215 sowwakeup(so); 216 } 217 218 socantrcvmore(so) 219 struct socket *so; 220 { 221 222 so->so_state |= SS_CANTRCVMORE; 223 sorwakeup(so); 224 } 225 226 /* 227 * Socket select/wakeup routines. 228 */ 229 230 /* 231 * Queue a process for a select on a socket buffer. 232 */ 233 sbselqueue(sb) 234 struct sockbuf *sb; 235 { 236 struct proc *p; 237 238 if ((p = sb->sb_sel) && p->p_wchan == (caddr_t)&selwait) 239 sb->sb_flags |= SB_COLL; 240 else 241 sb->sb_sel = u.u_procp; 242 } 243 244 /* 245 * Wait for data to arrive at/drain from a socket buffer. 246 */ 247 sbwait(sb) 248 struct sockbuf *sb; 249 { 250 251 sb->sb_flags |= SB_WAIT; 252 sleep((caddr_t)&sb->sb_cc, PZERO+1); 253 } 254 255 /* 256 * Wakeup processes waiting on a socket buffer. 257 * Do asynchronous notification via SIGIO 258 * if the socket has the SS_ASYNC flag set. 259 */ 260 sowakeup(so, sb) 261 register struct socket *so; 262 register struct sockbuf *sb; 263 { 264 struct proc *p; 265 266 if (sb->sb_sel) { 267 selwakeup(sb->sb_sel, sb->sb_flags & SB_COLL); 268 sb->sb_sel = 0; 269 sb->sb_flags &= ~SB_COLL; 270 } 271 if (sb->sb_flags & SB_WAIT) { 272 sb->sb_flags &= ~SB_WAIT; 273 wakeup((caddr_t)&sb->sb_cc); 274 } 275 if (so->so_state & SS_ASYNC) { 276 if (so->so_pgid < 0) 277 gsignal(-so->so_pgid, SIGIO); 278 else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0) 279 psignal(p, SIGIO); 280 } 281 } 282 283 /* 284 * Socket buffer (struct sockbuf) utility routines. 285 * 286 * Each socket contains two socket buffers: one for sending data and 287 * one for receiving data. Each buffer contains a queue of mbufs, 288 * information about the number of mbufs and amount of data in the 289 * queue, and other fields allowing select() statements and notification 290 * on data availability to be implemented. 291 * 292 * Data stored in a socket buffer is maintained as a list of records. 293 * Each record is a list of mbufs chained together with the m_next 294 * field. Records are chained together with the m_nextpkt field. The upper 295 * level routine soreceive() expects the following conventions to be 296 * observed when placing information in the receive buffer: 297 * 298 * 1. If the protocol requires each message be preceded by the sender's 299 * name, then a record containing that name must be present before 300 * any associated data (mbuf's must be of type MT_SONAME). 301 * 2. If the protocol supports the exchange of ``access rights'' (really 302 * just additional data associated with the message), and there are 303 * ``rights'' to be received, then a record containing this data 304 * should be present (mbuf's must be of type MT_RIGHTS). 305 * 3. If a name or rights record exists, then it must be followed by 306 * a data record, perhaps of zero length. 307 * 308 * Before using a new socket structure it is first necessary to reserve 309 * buffer space to the socket, by calling sbreserve(). This should commit 310 * some of the available buffer space in the system buffer pool for the 311 * socket (currently, it does nothing but enforce limits). The space 312 * should be released by calling sbrelease() when the socket is destroyed. 313 */ 314 315 soreserve(so, sndcc, rcvcc) 316 register struct socket *so; 317 u_long sndcc, rcvcc; 318 { 319 320 if (sbreserve(&so->so_snd, sndcc) == 0) 321 goto bad; 322 if (sbreserve(&so->so_rcv, rcvcc) == 0) 323 goto bad2; 324 return (0); 325 bad2: 326 sbrelease(&so->so_snd); 327 bad: 328 return (ENOBUFS); 329 } 330 331 /* 332 * Allot mbufs to a sockbuf. 333 * Attempt to scale cc so that mbcnt doesn't become limiting 334 * if buffering efficiency is near the normal case. 335 */ 336 sbreserve(sb, cc) 337 struct sockbuf *sb; 338 u_long cc; 339 { 340 341 if (cc > (u_long)SB_MAX * MCLBYTES / (2 * MSIZE + MCLBYTES)) 342 return (0); 343 sb->sb_hiwat = cc; 344 sb->sb_mbmax = MIN(cc * 2, SB_MAX); 345 return (1); 346 } 347 348 /* 349 * Free mbufs held by a socket, and reserved mbuf space. 350 */ 351 sbrelease(sb) 352 struct sockbuf *sb; 353 { 354 355 sbflush(sb); 356 sb->sb_hiwat = sb->sb_mbmax = 0; 357 } 358 359 /* 360 * Routines to add and remove 361 * data from an mbuf queue. 362 * 363 * The routines sbappend() or sbappendrecord() are normally called to 364 * append new mbufs to a socket buffer, after checking that adequate 365 * space is available, comparing the function sbspace() with the amount 366 * of data to be added. sbappendrecord() differs from sbappend() in 367 * that data supplied is treated as the beginning of a new record. 368 * To place a sender's address, optional access rights, and data in a 369 * socket receive buffer, sbappendaddr() should be used. To place 370 * access rights and data in a socket receive buffer, sbappendrights() 371 * should be used. In either case, the new data begins a new record. 372 * Note that unlike sbappend() and sbappendrecord(), these routines check 373 * for the caller that there will be enough space to store the data. 374 * Each fails if there is not enough space, or if it cannot find mbufs 375 * to store additional information in. 376 * 377 * Reliable protocols may use the socket send buffer to hold data 378 * awaiting acknowledgement. Data is normally copied from a socket 379 * send buffer in a protocol with m_copy for output to a peer, 380 * and then removing the data from the socket buffer with sbdrop() 381 * or sbdroprecord() when the data is acknowledged by the peer. 382 */ 383 384 /* 385 * Append mbuf chain m to the last record in the 386 * socket buffer sb. The additional space associated 387 * the mbuf chain is recorded in sb. Empty mbufs are 388 * discarded and mbufs are compacted where possible. 389 */ 390 sbappend(sb, m) 391 struct sockbuf *sb; 392 struct mbuf *m; 393 { 394 register struct mbuf *n; 395 396 if (m == 0) 397 return; 398 if (n = sb->sb_mb) { 399 while (n->m_nextpkt) 400 n = n->m_nextpkt; 401 while (n->m_next) 402 n = n->m_next; 403 } 404 sbcompress(sb, m, n); 405 } 406 407 /* 408 * As above, except the mbuf chain 409 * begins a new record. 410 */ 411 sbappendrecord(sb, m0) 412 register struct sockbuf *sb; 413 register struct mbuf *m0; 414 { 415 register struct mbuf *m; 416 417 if (m0 == 0) 418 return; 419 if (m = sb->sb_mb) 420 while (m->m_nextpkt) 421 m = m->m_nextpkt; 422 /* 423 * Put the first mbuf on the queue. 424 * Note this permits zero length records. 425 */ 426 sballoc(sb, m0); 427 if (m) 428 m->m_nextpkt = m0; 429 else 430 sb->sb_mb = m0; 431 m = m0->m_next; 432 m0->m_next = 0; 433 if (m && (m0->m_flags & M_EOR)) { 434 m0->m_flags &= ~M_EOR; 435 m->m_flags |= M_EOR; 436 } 437 sbcompress(sb, m, m0); 438 } 439 440 /* 441 * As above except that OOB data 442 * is inserted at the beginning of the sockbuf, 443 * but after any other OOB data. 444 */ 445 sbinsertoob(sb, m0) 446 register struct sockbuf *sb; 447 register struct mbuf *m0; 448 { 449 register struct mbuf *m; 450 register struct mbuf **mp; 451 452 if (m0 == 0) 453 return; 454 for (mp = &sb->sb_mb; m = *mp; mp = &((*mp)->m_nextpkt)) { 455 again: 456 switch (m->m_type) { 457 458 case MT_OOBDATA: 459 continue; /* WANT next train */ 460 461 case MT_CONTROL: 462 if (m = m->m_next) 463 goto again; /* inspect THIS train further */ 464 } 465 break; 466 } 467 /* 468 * Put the first mbuf on the queue. 469 * Note this permits zero length records. 470 */ 471 sballoc(sb, m0); 472 m0->m_nextpkt = *mp; 473 *mp = m0; 474 m = m0->m_next; 475 m0->m_next = 0; 476 if (m && (m0->m_flags & M_EOR)) { 477 m0->m_flags &= ~M_EOR; 478 m->m_flags |= M_EOR; 479 } 480 sbcompress(sb, m, m0); 481 } 482 483 /* 484 * Append address and data, and optionally, rights 485 * to the receive queue of a socket. If present, 486 * m0 Return 0 if 487 * no space in sockbuf or insufficient mbufs. 488 */ 489 sbappendaddr(sb, asa, m0, rights0) 490 register struct sockbuf *sb; 491 struct sockaddr *asa; 492 struct mbuf *m0, *rights0; 493 { 494 register struct mbuf *m, *n; 495 int space = asa->sa_len; 496 497 if (m0 && (m0->m_flags & M_PKTHDR) == 0) 498 panic("sbappendaddr"); 499 if (m0) 500 space += m0->m_pkthdr.len; 501 if (rights0) 502 space += rights0->m_len; 503 if (space > sbspace(sb)) 504 return (0); 505 MGET(m, M_DONTWAIT, MT_SONAME); 506 if (m == 0) 507 return (0); 508 if (asa->sa_len > MLEN) { 509 (void) m_free(m); 510 return (0); 511 } 512 m->m_len = asa->sa_len; 513 bcopy((caddr_t)asa, mtod(m, caddr_t), asa->sa_len); 514 if (rights0 && rights0->m_len) { 515 m->m_next = m_copy(rights0, 0, rights0->m_len); 516 if (m->m_next == 0) { 517 m_freem(m); 518 return (0); 519 } 520 sballoc(sb, m->m_next); 521 } 522 sballoc(sb, m); 523 if (n = sb->sb_mb) { 524 while (n->m_nextpkt) 525 n = n->m_nextpkt; 526 n->m_nextpkt = m; 527 } else 528 sb->sb_mb = m; 529 if (m->m_next) 530 m = m->m_next; 531 if (m0) 532 sbcompress(sb, m0, m); 533 return (1); 534 } 535 536 sbappendrights(sb, m0, rights) 537 struct sockbuf *sb; 538 struct mbuf *rights, *m0; 539 { 540 register struct mbuf *m, *n; 541 int space = 0; 542 543 if (rights == 0) 544 panic("sbappendrights"); 545 for (m = m0; m; m = m->m_next) 546 space += m->m_len; 547 space += rights->m_len; 548 if (space > sbspace(sb)) 549 return (0); 550 m = m_copy(rights, 0, rights->m_len); 551 if (m == 0) 552 return (0); 553 sballoc(sb, m); 554 if (n = sb->sb_mb) { 555 while (n->m_nextpkt) 556 n = n->m_nextpkt; 557 n->m_nextpkt = m; 558 } else 559 sb->sb_mb = m; 560 if (m0) 561 sbcompress(sb, m0, m); 562 return (1); 563 } 564 565 /* 566 * Compress mbuf chain m into the socket 567 * buffer sb following mbuf n. If n 568 * is null, the buffer is presumed empty. 569 */ 570 sbcompress(sb, m, n) 571 register struct sockbuf *sb; 572 register struct mbuf *m, *n; 573 { 574 575 register int eor = 0; 576 while (m) { 577 eor |= m->m_flags & M_EOR; 578 if (m->m_len == 0) { 579 m = m_free(m); 580 continue; 581 } 582 if (n && (n->m_flags & (M_EXT | M_EOR)) == 0 && 583 (n->m_data + n->m_len + m->m_len) < &n->m_dat[MLEN] && 584 n->m_type == m->m_type) { 585 bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len, 586 (unsigned)m->m_len); 587 n->m_len += m->m_len; 588 sb->sb_cc += m->m_len; 589 m = m_free(m); 590 continue; 591 } 592 if (n) 593 n->m_next = m; 594 else 595 sb->sb_mb = m; 596 sballoc(sb, m); 597 n = m; 598 m->m_flags &= ~M_EOR; 599 m = m->m_next; 600 n->m_next = 0; 601 } 602 if (n) 603 n->m_flags |= eor; 604 } 605 606 /* 607 * Free all mbufs in a sockbuf. 608 * Check that all resources are reclaimed. 609 */ 610 sbflush(sb) 611 register struct sockbuf *sb; 612 { 613 614 if (sb->sb_flags & SB_LOCK) 615 panic("sbflush"); 616 while (sb->sb_mbcnt) 617 sbdrop(sb, (int)sb->sb_cc); 618 if (sb->sb_cc || sb->sb_mbcnt || sb->sb_mb) 619 panic("sbflush 2"); 620 } 621 622 /* 623 * Drop data from (the front of) a sockbuf. 624 */ 625 sbdrop(sb, len) 626 register struct sockbuf *sb; 627 register int len; 628 { 629 register struct mbuf *m, *mn; 630 struct mbuf *next; 631 632 next = (m = sb->sb_mb) ? m->m_nextpkt : 0; 633 while (len > 0) { 634 if (m == 0) { 635 if (next == 0) 636 panic("sbdrop"); 637 m = next; 638 next = m->m_nextpkt; 639 continue; 640 } 641 if (m->m_len > len) { 642 m->m_len -= len; 643 m->m_data += len; 644 sb->sb_cc -= len; 645 break; 646 } 647 len -= m->m_len; 648 sbfree(sb, m); 649 MFREE(m, mn); 650 m = mn; 651 } 652 while (m && m->m_len == 0) { 653 sbfree(sb, m); 654 MFREE(m, mn); 655 m = mn; 656 } 657 if (m) { 658 sb->sb_mb = m; 659 m->m_nextpkt = next; 660 } else 661 sb->sb_mb = next; 662 } 663 664 /* 665 * Drop a record off the front of a sockbuf 666 * and move the next record to the front. 667 */ 668 sbdroprecord(sb) 669 register struct sockbuf *sb; 670 { 671 register struct mbuf *m, *mn; 672 673 m = sb->sb_mb; 674 if (m) { 675 sb->sb_mb = m->m_nextpkt; 676 do { 677 sbfree(sb, m); 678 MFREE(m, mn); 679 } while (m = mn); 680 } 681 } 682