1 /* 2 * Copyright (c) 1982, 1986 Regents of the University of California. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms are permitted 6 * provided that the above copyright notice and this paragraph are 7 * duplicated in all such forms and that any documentation, 8 * advertising materials, and other materials related to such 9 * distribution and use acknowledge that the software was developed 10 * by the University of California, Berkeley. The name of the 11 * University may not be used to endorse or promote products derived 12 * from this software without specific prior written permission. 13 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 14 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 15 * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. 16 * 17 * @(#)uipc_socket2.c 7.5 (Berkeley) 06/29/88 18 */ 19 20 #include "param.h" 21 #include "systm.h" 22 #include "dir.h" 23 #include "user.h" 24 #include "proc.h" 25 #include "file.h" 26 #include "inode.h" 27 #include "buf.h" 28 #include "mbuf.h" 29 #include "protosw.h" 30 #include "socket.h" 31 #include "socketvar.h" 32 33 /* 34 * Primitive routines for operating on sockets and socket buffers 35 */ 36 37 /* 38 * Procedures to manipulate state flags of socket 39 * and do appropriate wakeups. Normal sequence from the 40 * active (originating) side is that soisconnecting() is 41 * called during processing of connect() call, 42 * resulting in an eventual call to soisconnected() if/when the 43 * connection is established. When the connection is torn down 44 * soisdisconnecting() is called during processing of disconnect() call, 45 * and soisdisconnected() is called when the connection to the peer 46 * is totally severed. The semantics of these routines are such that 47 * connectionless protocols can call soisconnected() and soisdisconnected() 48 * only, bypassing the in-progress calls when setting up a ``connection'' 49 * takes no time. 50 * 51 * From the passive side, a socket is created with 52 * two queues of sockets: so_q0 for connections in progress 53 * and so_q for connections already made and awaiting user acceptance. 54 * As a protocol is preparing incoming connections, it creates a socket 55 * structure queued on so_q0 by calling sonewconn(). When the connection 56 * is established, soisconnected() is called, and transfers the 57 * socket structure to so_q, making it available to accept(). 58 * 59 * If a socket is closed with sockets on either 60 * so_q0 or so_q, these sockets are dropped. 61 * 62 * If higher level protocols are implemented in 63 * the kernel, the wakeups done here will sometimes 64 * cause software-interrupt process scheduling. 65 */ 66 67 soisconnecting(so) 68 register struct socket *so; 69 { 70 71 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); 72 so->so_state |= SS_ISCONNECTING; 73 wakeup((caddr_t)&so->so_timeo); 74 } 75 76 soisconnected(so) 77 register struct socket *so; 78 { 79 register struct socket *head = so->so_head; 80 81 if (head) { 82 if (soqremque(so, 0) == 0) 83 panic("soisconnected"); 84 soqinsque(head, so, 1); 85 sorwakeup(head); 86 wakeup((caddr_t)&head->so_timeo); 87 } 88 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING); 89 so->so_state |= SS_ISCONNECTED; 90 wakeup((caddr_t)&so->so_timeo); 91 sorwakeup(so); 92 sowwakeup(so); 93 } 94 95 soisdisconnecting(so) 96 register struct socket *so; 97 { 98 99 so->so_state &= ~SS_ISCONNECTING; 100 so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE); 101 wakeup((caddr_t)&so->so_timeo); 102 sowwakeup(so); 103 sorwakeup(so); 104 } 105 106 soisdisconnected(so) 107 register struct socket *so; 108 { 109 110 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 111 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE); 112 wakeup((caddr_t)&so->so_timeo); 113 sowwakeup(so); 114 sorwakeup(so); 115 } 116 117 /* 118 * When an attempt at a new connection is noted on a socket 119 * which accepts connections, sonewconn is called. If the 120 * connection is possible (subject to space constraints, etc.) 121 * then we allocate a new structure, propoerly linked into the 122 * data structure of the original socket, and return this. 123 */ 124 struct socket * 125 sonewconn(head) 126 register struct socket *head; 127 { 128 register struct socket *so; 129 register struct mbuf *m; 130 131 if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2) 132 goto bad; 133 m = m_getclr(M_DONTWAIT, MT_SOCKET); 134 if (m == NULL) 135 goto bad; 136 so = mtod(m, struct socket *); 137 so->so_type = head->so_type; 138 so->so_options = head->so_options &~ SO_ACCEPTCONN; 139 so->so_linger = head->so_linger; 140 so->so_state = head->so_state | SS_NOFDREF; 141 so->so_proto = head->so_proto; 142 so->so_timeo = head->so_timeo; 143 so->so_pgrp = head->so_pgrp; 144 (void) soreserve(so, head->so_snd.sb_hiwat, head->so_snd.sb_hiwat); 145 soqinsque(head, so, 0); 146 if ((*so->so_proto->pr_usrreq)(so, PRU_ATTACH, 147 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0)) { 148 (void) soqremque(so, 0); 149 (void) m_free(m); 150 goto bad; 151 } 152 return (so); 153 bad: 154 return ((struct socket *)0); 155 } 156 157 soqinsque(head, so, q) 158 register struct socket *head, *so; 159 int q; 160 { 161 162 so->so_head = head; 163 if (q == 0) { 164 head->so_q0len++; 165 so->so_q0 = head->so_q0; 166 head->so_q0 = so; 167 } else { 168 head->so_qlen++; 169 so->so_q = head->so_q; 170 head->so_q = so; 171 } 172 } 173 174 soqremque(so, q) 175 register struct socket *so; 176 int q; 177 { 178 register struct socket *head, *prev, *next; 179 180 head = so->so_head; 181 prev = head; 182 for (;;) { 183 next = q ? prev->so_q : prev->so_q0; 184 if (next == so) 185 break; 186 if (next == head) 187 return (0); 188 prev = next; 189 } 190 if (q == 0) { 191 prev->so_q0 = next->so_q0; 192 head->so_q0len--; 193 } else { 194 prev->so_q = next->so_q; 195 head->so_qlen--; 196 } 197 next->so_q0 = next->so_q = 0; 198 next->so_head = 0; 199 return (1); 200 } 201 202 /* 203 * Socantsendmore indicates that no more data will be sent on the 204 * socket; it would normally be applied to a socket when the user 205 * informs the system that no more data is to be sent, by the protocol 206 * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data 207 * will be received, and will normally be applied to the socket by a 208 * protocol when it detects that the peer will send no more data. 209 * Data queued for reading in the socket may yet be read. 210 */ 211 212 socantsendmore(so) 213 struct socket *so; 214 { 215 216 so->so_state |= SS_CANTSENDMORE; 217 sowwakeup(so); 218 } 219 220 socantrcvmore(so) 221 struct socket *so; 222 { 223 224 so->so_state |= SS_CANTRCVMORE; 225 sorwakeup(so); 226 } 227 228 /* 229 * Socket select/wakeup routines. 230 */ 231 232 /* 233 * Queue a process for a select on a socket buffer. 234 */ 235 sbselqueue(sb) 236 struct sockbuf *sb; 237 { 238 register struct proc *p; 239 240 if ((p = sb->sb_sel) && p->p_wchan == (caddr_t)&selwait) 241 sb->sb_flags |= SB_COLL; 242 else 243 sb->sb_sel = u.u_procp; 244 } 245 246 /* 247 * Wait for data to arrive at/drain from a socket buffer. 248 */ 249 sbwait(sb) 250 struct sockbuf *sb; 251 { 252 253 sb->sb_flags |= SB_WAIT; 254 sleep((caddr_t)&sb->sb_cc, PZERO+1); 255 } 256 257 /* 258 * Wakeup processes waiting on a socket buffer. 259 */ 260 sbwakeup(sb) 261 register struct sockbuf *sb; 262 { 263 264 if (sb->sb_sel) { 265 selwakeup(sb->sb_sel, sb->sb_flags & SB_COLL); 266 sb->sb_sel = 0; 267 sb->sb_flags &= ~SB_COLL; 268 } 269 if (sb->sb_flags & SB_WAIT) { 270 sb->sb_flags &= ~SB_WAIT; 271 wakeup((caddr_t)&sb->sb_cc); 272 } 273 } 274 275 /* 276 * Wakeup socket readers and writers. 277 * Do asynchronous notification via SIGIO 278 * if the socket has the SS_ASYNC flag set. 279 */ 280 sowakeup(so, sb) 281 register struct socket *so; 282 struct sockbuf *sb; 283 { 284 register struct proc *p; 285 286 sbwakeup(sb); 287 if (so->so_state & SS_ASYNC) { 288 if (so->so_pgrp < 0) 289 gsignal(-so->so_pgrp, SIGIO); 290 else if (so->so_pgrp > 0 && (p = pfind(so->so_pgrp)) != 0) 291 psignal(p, SIGIO); 292 } 293 } 294 295 /* 296 * Socket buffer (struct sockbuf) utility routines. 297 * 298 * Each socket contains two socket buffers: one for sending data and 299 * one for receiving data. Each buffer contains a queue of mbufs, 300 * information about the number of mbufs and amount of data in the 301 * queue, and other fields allowing select() statements and notification 302 * on data availability to be implemented. 303 * 304 * Data stored in a socket buffer is maintained as a list of records. 305 * Each record is a list of mbufs chained together with the m_next 306 * field. Records are chained together with the m_act field. The upper 307 * level routine soreceive() expects the following conventions to be 308 * observed when placing information in the receive buffer: 309 * 310 * 1. If the protocol requires each message be preceded by the sender's 311 * name, then a record containing that name must be present before 312 * any associated data (mbuf's must be of type MT_SONAME). 313 * 2. If the protocol supports the exchange of ``access rights'' (really 314 * just additional data associated with the message), and there are 315 * ``rights'' to be received, then a record containing this data 316 * should be present (mbuf's must be of type MT_RIGHTS). 317 * 3. If a name or rights record exists, then it must be followed by 318 * a data record, perhaps of zero length. 319 * 320 * Before using a new socket structure it is first necessary to reserve 321 * buffer space to the socket, by calling sbreserve(). This should commit 322 * some of the available buffer space in the system buffer pool for the 323 * socket (currently, it does nothing but enforce limits). The space 324 * should be released by calling sbrelease() when the socket is destroyed. 325 */ 326 327 soreserve(so, sndcc, rcvcc) 328 register struct socket *so; 329 u_long sndcc, rcvcc; 330 { 331 332 if (sbreserve(&so->so_snd, sndcc) == 0) 333 goto bad; 334 if (sbreserve(&so->so_rcv, rcvcc) == 0) 335 goto bad2; 336 return (0); 337 bad2: 338 sbrelease(&so->so_snd); 339 bad: 340 return (ENOBUFS); 341 } 342 343 /* 344 * Allot mbufs to a sockbuf. 345 * Attempt to scale cc so that mbcnt doesn't become limiting 346 * if buffering efficiency is near the normal case. 347 */ 348 sbreserve(sb, cc) 349 struct sockbuf *sb; 350 u_long cc; 351 { 352 353 if (cc > (u_long)SB_MAX * CLBYTES / (2 * MSIZE + CLBYTES)) 354 return (0); 355 sb->sb_hiwat = cc; 356 sb->sb_mbmax = MIN(cc * 2, SB_MAX); 357 return (1); 358 } 359 360 /* 361 * Free mbufs held by a socket, and reserved mbuf space. 362 */ 363 sbrelease(sb) 364 struct sockbuf *sb; 365 { 366 367 sbflush(sb); 368 sb->sb_hiwat = sb->sb_mbmax = 0; 369 } 370 371 /* 372 * Routines to add and remove 373 * data from an mbuf queue. 374 * 375 * The routines sbappend() or sbappendrecord() are normally called to 376 * append new mbufs to a socket buffer, after checking that adequate 377 * space is available, comparing the function sbspace() with the amount 378 * of data to be added. sbappendrecord() differs from sbappend() in 379 * that data supplied is treated as the beginning of a new record. 380 * To place a sender's address, optional access rights, and data in a 381 * socket receive buffer, sbappendaddr() should be used. To place 382 * access rights and data in a socket receive buffer, sbappendrights() 383 * should be used. In either case, the new data begins a new record. 384 * Note that unlike sbappend() and sbappendrecord(), these routines check 385 * for the caller that there will be enough space to store the data. 386 * Each fails if there is not enough space, or if it cannot find mbufs 387 * to store additional information in. 388 * 389 * Reliable protocols may use the socket send buffer to hold data 390 * awaiting acknowledgement. Data is normally copied from a socket 391 * send buffer in a protocol with m_copy for output to a peer, 392 * and then removing the data from the socket buffer with sbdrop() 393 * or sbdroprecord() when the data is acknowledged by the peer. 394 */ 395 396 /* 397 * Append mbuf chain m to the last record in the 398 * socket buffer sb. The additional space associated 399 * the mbuf chain is recorded in sb. Empty mbufs are 400 * discarded and mbufs are compacted where possible. 401 */ 402 sbappend(sb, m) 403 struct sockbuf *sb; 404 struct mbuf *m; 405 { 406 register struct mbuf *n; 407 408 if (m == 0) 409 return; 410 if (n = sb->sb_mb) { 411 while (n->m_act) 412 n = n->m_act; 413 while (n->m_next) 414 n = n->m_next; 415 } 416 sbcompress(sb, m, n); 417 } 418 419 /* 420 * As above, except the mbuf chain 421 * begins a new record. 422 */ 423 sbappendrecord(sb, m0) 424 register struct sockbuf *sb; 425 register struct mbuf *m0; 426 { 427 register struct mbuf *m; 428 429 if (m0 == 0) 430 return; 431 if (m = sb->sb_mb) 432 while (m->m_act) 433 m = m->m_act; 434 /* 435 * Put the first mbuf on the queue. 436 * Note this permits zero length records. 437 */ 438 sballoc(sb, m0); 439 if (m) 440 m->m_act = m0; 441 else 442 sb->sb_mb = m0; 443 m = m0->m_next; 444 m0->m_next = 0; 445 sbcompress(sb, m, m0); 446 } 447 448 /* 449 * Append address and data, and optionally, rights 450 * to the receive queue of a socket. Return 0 if 451 * no space in sockbuf or insufficient mbufs. 452 */ 453 sbappendaddr(sb, asa, m0, rights0) 454 register struct sockbuf *sb; 455 struct sockaddr *asa; 456 struct mbuf *rights0, *m0; 457 { 458 register struct mbuf *m, *n; 459 int space = sizeof (*asa); 460 461 for (m = m0; m; m = m->m_next) 462 space += m->m_len; 463 if (rights0) 464 space += rights0->m_len; 465 if (space > sbspace(sb)) 466 return (0); 467 MGET(m, M_DONTWAIT, MT_SONAME); 468 if (m == 0) 469 return (0); 470 *mtod(m, struct sockaddr *) = *asa; 471 m->m_len = sizeof (*asa); 472 if (rights0 && rights0->m_len) { 473 m->m_next = m_copy(rights0, 0, rights0->m_len); 474 if (m->m_next == 0) { 475 m_freem(m); 476 return (0); 477 } 478 sballoc(sb, m->m_next); 479 } 480 sballoc(sb, m); 481 if (n = sb->sb_mb) { 482 while (n->m_act) 483 n = n->m_act; 484 n->m_act = m; 485 } else 486 sb->sb_mb = m; 487 if (m->m_next) 488 m = m->m_next; 489 if (m0) 490 sbcompress(sb, m0, m); 491 return (1); 492 } 493 494 sbappendrights(sb, m0, rights) 495 struct sockbuf *sb; 496 struct mbuf *rights, *m0; 497 { 498 register struct mbuf *m, *n; 499 int space = 0; 500 501 if (rights == 0) 502 panic("sbappendrights"); 503 for (m = m0; m; m = m->m_next) 504 space += m->m_len; 505 space += rights->m_len; 506 if (space > sbspace(sb)) 507 return (0); 508 m = m_copy(rights, 0, rights->m_len); 509 if (m == 0) 510 return (0); 511 sballoc(sb, m); 512 if (n = sb->sb_mb) { 513 while (n->m_act) 514 n = n->m_act; 515 n->m_act = m; 516 } else 517 sb->sb_mb = m; 518 if (m0) 519 sbcompress(sb, m0, m); 520 return (1); 521 } 522 523 /* 524 * Compress mbuf chain m into the socket 525 * buffer sb following mbuf n. If n 526 * is null, the buffer is presumed empty. 527 */ 528 sbcompress(sb, m, n) 529 register struct sockbuf *sb; 530 register struct mbuf *m, *n; 531 { 532 533 while (m) { 534 if (m->m_len == 0) { 535 m = m_free(m); 536 continue; 537 } 538 if (n && n->m_off <= MMAXOFF && m->m_off <= MMAXOFF && 539 (n->m_off + n->m_len + m->m_len) <= MMAXOFF && 540 n->m_type == m->m_type) { 541 bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len, 542 (unsigned)m->m_len); 543 n->m_len += m->m_len; 544 sb->sb_cc += m->m_len; 545 m = m_free(m); 546 continue; 547 } 548 sballoc(sb, m); 549 if (n) 550 n->m_next = m; 551 else 552 sb->sb_mb = m; 553 n = m; 554 m = m->m_next; 555 n->m_next = 0; 556 } 557 } 558 559 /* 560 * Free all mbufs in a sockbuf. 561 * Check that all resources are reclaimed. 562 */ 563 sbflush(sb) 564 register struct sockbuf *sb; 565 { 566 567 if (sb->sb_flags & SB_LOCK) 568 panic("sbflush"); 569 while (sb->sb_mbcnt) 570 sbdrop(sb, (int)sb->sb_cc); 571 if (sb->sb_cc || sb->sb_mbcnt || sb->sb_mb) 572 panic("sbflush 2"); 573 } 574 575 /* 576 * Drop data from (the front of) a sockbuf. 577 */ 578 sbdrop(sb, len) 579 register struct sockbuf *sb; 580 register int len; 581 { 582 register struct mbuf *m, *mn; 583 struct mbuf *next; 584 585 next = (m = sb->sb_mb) ? m->m_act : 0; 586 while (len > 0) { 587 if (m == 0) { 588 if (next == 0) 589 panic("sbdrop"); 590 m = next; 591 next = m->m_act; 592 continue; 593 } 594 if (m->m_len > len) { 595 m->m_len -= len; 596 m->m_off += len; 597 sb->sb_cc -= len; 598 break; 599 } 600 len -= m->m_len; 601 sbfree(sb, m); 602 MFREE(m, mn); 603 m = mn; 604 } 605 while (m && m->m_len == 0) { 606 sbfree(sb, m); 607 MFREE(m, mn); 608 m = mn; 609 } 610 if (m) { 611 sb->sb_mb = m; 612 m->m_act = next; 613 } else 614 sb->sb_mb = next; 615 } 616 617 /* 618 * Drop a record off the front of a sockbuf 619 * and move the next record to the front. 620 */ 621 sbdroprecord(sb) 622 register struct sockbuf *sb; 623 { 624 register struct mbuf *m, *mn; 625 626 m = sb->sb_mb; 627 if (m) { 628 sb->sb_mb = m->m_act; 629 do { 630 sbfree(sb, m); 631 MFREE(m, mn); 632 } while (m = mn); 633 } 634 } 635