1 /* 2 * Copyright (c) 1982, 1986 Regents of the University of California. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms are permitted 6 * provided that this notice is preserved and that due credit is given 7 * to the University of California at Berkeley. The name of the University 8 * may not be used to endorse or promote products derived from this 9 * software without specific prior written permission. This software 10 * is provided ``as is'' without express or implied warranty. 11 * 12 * @(#)uipc_socket2.c 7.3 (Berkeley) 01/28/88 13 */ 14 15 #include "param.h" 16 #include "systm.h" 17 #include "dir.h" 18 #include "user.h" 19 #include "proc.h" 20 #include "file.h" 21 #include "inode.h" 22 #include "buf.h" 23 #include "mbuf.h" 24 #include "protosw.h" 25 #include "socket.h" 26 #include "socketvar.h" 27 28 /* 29 * Primitive routines for operating on sockets and socket buffers 30 */ 31 32 /* 33 * Procedures to manipulate state flags of socket 34 * and do appropriate wakeups. Normal sequence from the 35 * active (originating) side is that soisconnecting() is 36 * called during processing of connect() call, 37 * resulting in an eventual call to soisconnected() if/when the 38 * connection is established. When the connection is torn down 39 * soisdisconnecting() is called during processing of disconnect() call, 40 * and soisdisconnected() is called when the connection to the peer 41 * is totally severed. The semantics of these routines are such that 42 * connectionless protocols can call soisconnected() and soisdisconnected() 43 * only, bypassing the in-progress calls when setting up a ``connection'' 44 * takes no time. 45 * 46 * From the passive side, a socket is created with 47 * two queues of sockets: so_q0 for connections in progress 48 * and so_q for connections already made and awaiting user acceptance. 49 * As a protocol is preparing incoming connections, it creates a socket 50 * structure queued on so_q0 by calling sonewconn(). When the connection 51 * is established, soisconnected() is called, and transfers the 52 * socket structure to so_q, making it available to accept(). 53 * 54 * If a socket is closed with sockets on either 55 * so_q0 or so_q, these sockets are dropped. 56 * 57 * If higher level protocols are implemented in 58 * the kernel, the wakeups done here will sometimes 59 * cause software-interrupt process scheduling. 60 */ 61 62 soisconnecting(so) 63 register struct socket *so; 64 { 65 66 so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING); 67 so->so_state |= SS_ISCONNECTING; 68 wakeup((caddr_t)&so->so_timeo); 69 } 70 71 soisconnected(so) 72 register struct socket *so; 73 { 74 register struct socket *head = so->so_head; 75 76 if (head) { 77 if (soqremque(so, 0) == 0) 78 panic("soisconnected"); 79 soqinsque(head, so, 1); 80 sorwakeup(head); 81 wakeup((caddr_t)&head->so_timeo); 82 } 83 so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING); 84 so->so_state |= SS_ISCONNECTED; 85 wakeup((caddr_t)&so->so_timeo); 86 sorwakeup(so); 87 sowwakeup(so); 88 } 89 90 soisdisconnecting(so) 91 register struct socket *so; 92 { 93 94 so->so_state &= ~SS_ISCONNECTING; 95 so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE); 96 wakeup((caddr_t)&so->so_timeo); 97 sowwakeup(so); 98 sorwakeup(so); 99 } 100 101 soisdisconnected(so) 102 register struct socket *so; 103 { 104 105 so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING); 106 so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE); 107 wakeup((caddr_t)&so->so_timeo); 108 sowwakeup(so); 109 sorwakeup(so); 110 } 111 112 /* 113 * When an attempt at a new connection is noted on a socket 114 * which accepts connections, sonewconn is called. If the 115 * connection is possible (subject to space constraints, etc.) 116 * then we allocate a new structure, propoerly linked into the 117 * data structure of the original socket, and return this. 118 */ 119 struct socket * 120 sonewconn(head) 121 register struct socket *head; 122 { 123 register struct socket *so; 124 register struct mbuf *m; 125 126 if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2) 127 goto bad; 128 m = m_getclr(M_DONTWAIT, MT_SOCKET); 129 if (m == NULL) 130 goto bad; 131 so = mtod(m, struct socket *); 132 so->so_type = head->so_type; 133 so->so_options = head->so_options &~ SO_ACCEPTCONN; 134 so->so_linger = head->so_linger; 135 so->so_state = head->so_state | SS_NOFDREF; 136 so->so_proto = head->so_proto; 137 so->so_timeo = head->so_timeo; 138 so->so_pgrp = head->so_pgrp; 139 soqinsque(head, so, 0); 140 if ((*so->so_proto->pr_usrreq)(so, PRU_ATTACH, 141 (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0)) { 142 (void) soqremque(so, 0); 143 (void) m_free(m); 144 goto bad; 145 } 146 return (so); 147 bad: 148 return ((struct socket *)0); 149 } 150 151 soqinsque(head, so, q) 152 register struct socket *head, *so; 153 int q; 154 { 155 156 so->so_head = head; 157 if (q == 0) { 158 head->so_q0len++; 159 so->so_q0 = head->so_q0; 160 head->so_q0 = so; 161 } else { 162 head->so_qlen++; 163 so->so_q = head->so_q; 164 head->so_q = so; 165 } 166 } 167 168 soqremque(so, q) 169 register struct socket *so; 170 int q; 171 { 172 register struct socket *head, *prev, *next; 173 174 head = so->so_head; 175 prev = head; 176 for (;;) { 177 next = q ? prev->so_q : prev->so_q0; 178 if (next == so) 179 break; 180 if (next == head) 181 return (0); 182 prev = next; 183 } 184 if (q == 0) { 185 prev->so_q0 = next->so_q0; 186 head->so_q0len--; 187 } else { 188 prev->so_q = next->so_q; 189 head->so_qlen--; 190 } 191 next->so_q0 = next->so_q = 0; 192 next->so_head = 0; 193 return (1); 194 } 195 196 /* 197 * Socantsendmore indicates that no more data will be sent on the 198 * socket; it would normally be applied to a socket when the user 199 * informs the system that no more data is to be sent, by the protocol 200 * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data 201 * will be received, and will normally be applied to the socket by a 202 * protocol when it detects that the peer will send no more data. 203 * Data queued for reading in the socket may yet be read. 204 */ 205 206 socantsendmore(so) 207 struct socket *so; 208 { 209 210 so->so_state |= SS_CANTSENDMORE; 211 sowwakeup(so); 212 } 213 214 socantrcvmore(so) 215 struct socket *so; 216 { 217 218 so->so_state |= SS_CANTRCVMORE; 219 sorwakeup(so); 220 } 221 222 /* 223 * Socket select/wakeup routines. 224 */ 225 226 /* 227 * Queue a process for a select on a socket buffer. 228 */ 229 sbselqueue(sb) 230 struct sockbuf *sb; 231 { 232 register struct proc *p; 233 234 if ((p = sb->sb_sel) && p->p_wchan == (caddr_t)&selwait) 235 sb->sb_flags |= SB_COLL; 236 else 237 sb->sb_sel = u.u_procp; 238 } 239 240 /* 241 * Wait for data to arrive at/drain from a socket buffer. 242 */ 243 sbwait(sb) 244 struct sockbuf *sb; 245 { 246 247 sb->sb_flags |= SB_WAIT; 248 sleep((caddr_t)&sb->sb_cc, PZERO+1); 249 } 250 251 /* 252 * Wakeup processes waiting on a socket buffer. 253 */ 254 sbwakeup(sb) 255 register struct sockbuf *sb; 256 { 257 258 if (sb->sb_sel) { 259 selwakeup(sb->sb_sel, sb->sb_flags & SB_COLL); 260 sb->sb_sel = 0; 261 sb->sb_flags &= ~SB_COLL; 262 } 263 if (sb->sb_flags & SB_WAIT) { 264 sb->sb_flags &= ~SB_WAIT; 265 wakeup((caddr_t)&sb->sb_cc); 266 } 267 } 268 269 /* 270 * Wakeup socket readers and writers. 271 * Do asynchronous notification via SIGIO 272 * if the socket has the SS_ASYNC flag set. 273 */ 274 sowakeup(so, sb) 275 register struct socket *so; 276 struct sockbuf *sb; 277 { 278 register struct proc *p; 279 280 sbwakeup(sb); 281 if (so->so_state & SS_ASYNC) { 282 if (so->so_pgrp < 0) 283 gsignal(-so->so_pgrp, SIGIO); 284 else if (so->so_pgrp > 0 && (p = pfind(so->so_pgrp)) != 0) 285 psignal(p, SIGIO); 286 } 287 } 288 289 /* 290 * Socket buffer (struct sockbuf) utility routines. 291 * 292 * Each socket contains two socket buffers: one for sending data and 293 * one for receiving data. Each buffer contains a queue of mbufs, 294 * information about the number of mbufs and amount of data in the 295 * queue, and other fields allowing select() statements and notification 296 * on data availability to be implemented. 297 * 298 * Data stored in a socket buffer is maintained as a list of records. 299 * Each record is a list of mbufs chained together with the m_next 300 * field. Records are chained together with the m_act field. The upper 301 * level routine soreceive() expects the following conventions to be 302 * observed when placing information in the receive buffer: 303 * 304 * 1. If the protocol requires each message be preceded by the sender's 305 * name, then a record containing that name must be present before 306 * any associated data (mbuf's must be of type MT_SONAME). 307 * 2. If the protocol supports the exchange of ``access rights'' (really 308 * just additional data associated with the message), and there are 309 * ``rights'' to be received, then a record containing this data 310 * should be present (mbuf's must be of type MT_RIGHTS). 311 * 3. If a name or rights record exists, then it must be followed by 312 * a data record, perhaps of zero length. 313 * 314 * Before using a new socket structure it is first necessary to reserve 315 * buffer space to the socket, by calling sbreserve(). This should commit 316 * some of the available buffer space in the system buffer pool for the 317 * socket (currently, it does nothing but enforce limits). The space 318 * should be released by calling sbrelease() when the socket is destroyed. 319 */ 320 321 soreserve(so, sndcc, rcvcc) 322 register struct socket *so; 323 u_long sndcc, rcvcc; 324 { 325 326 if (sbreserve(&so->so_snd, sndcc) == 0) 327 goto bad; 328 if (sbreserve(&so->so_rcv, rcvcc) == 0) 329 goto bad2; 330 return (0); 331 bad2: 332 sbrelease(&so->so_snd); 333 bad: 334 return (ENOBUFS); 335 } 336 337 /* 338 * Allot mbufs to a sockbuf. 339 * Attempt to scale cc so that mbcnt doesn't become limiting 340 * if buffering efficiency is near the normal case. 341 */ 342 sbreserve(sb, cc) 343 struct sockbuf *sb; 344 u_long cc; 345 { 346 347 if (cc > (u_long)SB_MAX * CLBYTES / (2 * MSIZE + CLBYTES)) 348 return (0); 349 sb->sb_hiwat = cc; 350 sb->sb_mbmax = MIN(cc * 2, SB_MAX); 351 return (1); 352 } 353 354 /* 355 * Free mbufs held by a socket, and reserved mbuf space. 356 */ 357 sbrelease(sb) 358 struct sockbuf *sb; 359 { 360 361 sbflush(sb); 362 sb->sb_hiwat = sb->sb_mbmax = 0; 363 } 364 365 /* 366 * Routines to add and remove 367 * data from an mbuf queue. 368 * 369 * The routines sbappend() or sbappendrecord() are normally called to 370 * append new mbufs to a socket buffer, after checking that adequate 371 * space is available, comparing the function sbspace() with the amount 372 * of data to be added. sbappendrecord() differs from sbappend() in 373 * that data supplied is treated as the beginning of a new record. 374 * To place a sender's address, optional access rights, and data in a 375 * socket receive buffer, sbappendaddr() should be used. To place 376 * access rights and data in a socket receive buffer, sbappendrights() 377 * should be used. In either case, the new data begins a new record. 378 * Note that unlike sbappend() and sbappendrecord(), these routines check 379 * for the caller that there will be enough space to store the data. 380 * Each fails if there is not enough space, or if it cannot find mbufs 381 * to store additional information in. 382 * 383 * Reliable protocols may use the socket send buffer to hold data 384 * awaiting acknowledgement. Data is normally copied from a socket 385 * send buffer in a protocol with m_copy for output to a peer, 386 * and then removing the data from the socket buffer with sbdrop() 387 * or sbdroprecord() when the data is acknowledged by the peer. 388 */ 389 390 /* 391 * Append mbuf chain m to the last record in the 392 * socket buffer sb. The additional space associated 393 * the mbuf chain is recorded in sb. Empty mbufs are 394 * discarded and mbufs are compacted where possible. 395 */ 396 sbappend(sb, m) 397 struct sockbuf *sb; 398 struct mbuf *m; 399 { 400 register struct mbuf *n; 401 402 if (m == 0) 403 return; 404 if (n = sb->sb_mb) { 405 while (n->m_act) 406 n = n->m_act; 407 while (n->m_next) 408 n = n->m_next; 409 } 410 sbcompress(sb, m, n); 411 } 412 413 /* 414 * As above, except the mbuf chain 415 * begins a new record. 416 */ 417 sbappendrecord(sb, m0) 418 register struct sockbuf *sb; 419 register struct mbuf *m0; 420 { 421 register struct mbuf *m; 422 423 if (m0 == 0) 424 return; 425 if (m = sb->sb_mb) 426 while (m->m_act) 427 m = m->m_act; 428 /* 429 * Put the first mbuf on the queue. 430 * Note this permits zero length records. 431 */ 432 sballoc(sb, m0); 433 if (m) 434 m->m_act = m0; 435 else 436 sb->sb_mb = m0; 437 m = m0->m_next; 438 m0->m_next = 0; 439 sbcompress(sb, m, m0); 440 } 441 442 /* 443 * Append address and data, and optionally, rights 444 * to the receive queue of a socket. Return 0 if 445 * no space in sockbuf or insufficient mbufs. 446 */ 447 sbappendaddr(sb, asa, m0, rights0) 448 register struct sockbuf *sb; 449 struct sockaddr *asa; 450 struct mbuf *rights0, *m0; 451 { 452 register struct mbuf *m, *n; 453 int space = sizeof (*asa); 454 455 for (m = m0; m; m = m->m_next) 456 space += m->m_len; 457 if (rights0) 458 space += rights0->m_len; 459 if (space > sbspace(sb)) 460 return (0); 461 MGET(m, M_DONTWAIT, MT_SONAME); 462 if (m == 0) 463 return (0); 464 *mtod(m, struct sockaddr *) = *asa; 465 m->m_len = sizeof (*asa); 466 if (rights0 && rights0->m_len) { 467 m->m_next = m_copy(rights0, 0, rights0->m_len); 468 if (m->m_next == 0) { 469 m_freem(m); 470 return (0); 471 } 472 sballoc(sb, m->m_next); 473 } 474 sballoc(sb, m); 475 if (n = sb->sb_mb) { 476 while (n->m_act) 477 n = n->m_act; 478 n->m_act = m; 479 } else 480 sb->sb_mb = m; 481 if (m->m_next) 482 m = m->m_next; 483 if (m0) 484 sbcompress(sb, m0, m); 485 return (1); 486 } 487 488 sbappendrights(sb, m0, rights) 489 struct sockbuf *sb; 490 struct mbuf *rights, *m0; 491 { 492 register struct mbuf *m, *n; 493 int space = 0; 494 495 if (rights == 0) 496 panic("sbappendrights"); 497 for (m = m0; m; m = m->m_next) 498 space += m->m_len; 499 space += rights->m_len; 500 if (space > sbspace(sb)) 501 return (0); 502 m = m_copy(rights, 0, rights->m_len); 503 if (m == 0) 504 return (0); 505 sballoc(sb, m); 506 if (n = sb->sb_mb) { 507 while (n->m_act) 508 n = n->m_act; 509 n->m_act = m; 510 } else 511 sb->sb_mb = m; 512 if (m0) 513 sbcompress(sb, m0, m); 514 return (1); 515 } 516 517 /* 518 * Compress mbuf chain m into the socket 519 * buffer sb following mbuf n. If n 520 * is null, the buffer is presumed empty. 521 */ 522 sbcompress(sb, m, n) 523 register struct sockbuf *sb; 524 register struct mbuf *m, *n; 525 { 526 527 while (m) { 528 if (m->m_len == 0) { 529 m = m_free(m); 530 continue; 531 } 532 if (n && n->m_off <= MMAXOFF && m->m_off <= MMAXOFF && 533 (n->m_off + n->m_len + m->m_len) <= MMAXOFF && 534 n->m_type == m->m_type) { 535 bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len, 536 (unsigned)m->m_len); 537 n->m_len += m->m_len; 538 sb->sb_cc += m->m_len; 539 m = m_free(m); 540 continue; 541 } 542 sballoc(sb, m); 543 if (n) 544 n->m_next = m; 545 else 546 sb->sb_mb = m; 547 n = m; 548 m = m->m_next; 549 n->m_next = 0; 550 } 551 } 552 553 /* 554 * Free all mbufs in a sockbuf. 555 * Check that all resources are reclaimed. 556 */ 557 sbflush(sb) 558 register struct sockbuf *sb; 559 { 560 561 if (sb->sb_flags & SB_LOCK) 562 panic("sbflush"); 563 while (sb->sb_mbcnt) 564 sbdrop(sb, (int)sb->sb_cc); 565 if (sb->sb_cc || sb->sb_mbcnt || sb->sb_mb) 566 panic("sbflush 2"); 567 } 568 569 /* 570 * Drop data from (the front of) a sockbuf. 571 */ 572 sbdrop(sb, len) 573 register struct sockbuf *sb; 574 register int len; 575 { 576 register struct mbuf *m, *mn; 577 struct mbuf *next; 578 579 next = (m = sb->sb_mb) ? m->m_act : 0; 580 while (len > 0) { 581 if (m == 0) { 582 if (next == 0) 583 panic("sbdrop"); 584 m = next; 585 next = m->m_act; 586 continue; 587 } 588 if (m->m_len > len) { 589 m->m_len -= len; 590 m->m_off += len; 591 sb->sb_cc -= len; 592 break; 593 } 594 len -= m->m_len; 595 sbfree(sb, m); 596 MFREE(m, mn); 597 m = mn; 598 } 599 while (m && m->m_len == 0) { 600 sbfree(sb, m); 601 MFREE(m, mn); 602 m = mn; 603 } 604 if (m) { 605 sb->sb_mb = m; 606 m->m_act = next; 607 } else 608 sb->sb_mb = next; 609 } 610 611 /* 612 * Drop a record off the front of a sockbuf 613 * and move the next record to the front. 614 */ 615 sbdroprecord(sb) 616 register struct sockbuf *sb; 617 { 618 register struct mbuf *m, *mn; 619 620 m = sb->sb_mb; 621 if (m) { 622 sb->sb_mb = m->m_act; 623 do { 624 sbfree(sb, m); 625 MFREE(m, mn); 626 } while (m = mn); 627 } 628 } 629