1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
25 * Copyright 2015, Joyent, Inc. All rights reserved.
26 * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
27 * Copyright 2022 Garrett D'Amore
28 * Copyright 2024 Oxide Computer Company
29 */
30
31 #include <sys/types.h>
32 #include <sys/t_lock.h>
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/buf.h>
36 #include <sys/conf.h>
37 #include <sys/cred.h>
38 #include <sys/kmem.h>
39 #include <sys/sysmacros.h>
40 #include <sys/vfs.h>
41 #include <sys/vfs_opreg.h>
42 #include <sys/vnode.h>
43 #include <sys/debug.h>
44 #include <sys/errno.h>
45 #include <sys/time.h>
46 #include <sys/file.h>
47 #include <sys/open.h>
48 #include <sys/user.h>
49 #include <sys/termios.h>
50 #include <sys/stream.h>
51 #include <sys/strsubr.h>
52 #include <sys/strsun.h>
53 #include <sys/esunddi.h>
54 #include <sys/flock.h>
55 #include <sys/modctl.h>
56 #include <sys/cmn_err.h>
57 #include <sys/mkdev.h>
58 #include <sys/pathname.h>
59 #include <sys/ddi.h>
60 #include <sys/stat.h>
61 #include <sys/fs/snode.h>
62 #include <sys/fs/dv_node.h>
63 #include <sys/zone.h>
64
65 #include <sys/socket.h>
66 #include <sys/socketvar.h>
67 #include <netinet/in.h>
68 #include <sys/un.h>
69 #include <sys/ucred.h>
70
71 #include <sys/tiuser.h>
72 #define _SUN_TPI_VERSION 2
73 #include <sys/tihdr.h>
74
75 #include <c2/audit.h>
76
77 #include <fs/sockfs/sockcommon.h>
78 #include <fs/sockfs/sockfilter_impl.h>
79 #include <fs/sockfs/socktpi.h>
80 #include <fs/sockfs/socktpi_impl.h>
81 #include <fs/sockfs/sodirect.h>
82
83 /*
84 * Macros that operate on struct cmsghdr.
85 * The CMSG_VALID macro does not assume that the last option buffer is padded.
86 */
87 #define CMSG_CONTENT(cmsg) (&((cmsg)[1]))
88 #define CMSG_CONTENTLEN(cmsg) ((cmsg)->cmsg_len - sizeof (struct cmsghdr))
89 #define CMSG_VALID(cmsg, start, end) \
90 (ISALIGNED_cmsghdr(cmsg) && \
91 ((uintptr_t)(cmsg) >= (uintptr_t)(start)) && \
92 ((uintptr_t)(cmsg) < (uintptr_t)(end)) && \
93 ((ssize_t)(cmsg)->cmsg_len >= sizeof (struct cmsghdr)) && \
94 ((uintptr_t)(cmsg) + (cmsg)->cmsg_len <= (uintptr_t)(end)))
95 #define SO_LOCK_WAKEUP_TIME 3000 /* Wakeup time in milliseconds */
96
97 dev_t sockdev; /* For fsid in getattr */
98
99 struct socklist socklist;
100
101 struct kmem_cache *socket_cache;
102
103 /*
104 * sockconf_lock protects the socket configuration (socket types and
105 * socket filters) which is changed via the sockconfig system call.
106 */
107 krwlock_t sockconf_lock;
108
109 static int sockfs_update(kstat_t *, int);
110 static int sockfs_snapshot(kstat_t *, void *, int);
111 extern smod_info_t *sotpi_smod_create(void);
112
113 extern void sendfile_init();
114
115 extern int modrootloaded;
116
117 /*
118 * Translate from a device pathname (e.g. "/dev/tcp") to a vnode.
119 * Returns with the vnode held.
120 */
121 int
sogetvp(char * devpath,vnode_t ** vpp,int uioflag)122 sogetvp(char *devpath, vnode_t **vpp, int uioflag)
123 {
124 struct snode *csp;
125 vnode_t *vp, *dvp;
126 major_t maj;
127 int error;
128
129 ASSERT(uioflag == UIO_SYSSPACE || uioflag == UIO_USERSPACE);
130
131 /*
132 * Lookup the underlying filesystem vnode.
133 */
134 error = lookupname(devpath, uioflag, FOLLOW, NULLVPP, &vp);
135 if (error)
136 return (error);
137
138 /* Check that it is the correct vnode */
139 if (vp->v_type != VCHR) {
140 VN_RELE(vp);
141 return (ENOTSOCK);
142 }
143
144 /*
145 * If devpath went through devfs, the device should already
146 * be configured. If devpath is a mknod file, however, we
147 * need to make sure the device is properly configured.
148 * To do this, we do something similar to spec_open()
149 * except that we resolve to the minor/leaf level since
150 * we need to return a vnode.
151 */
152 csp = VTOS(VTOS(vp)->s_commonvp);
153 if (!(csp->s_flag & SDIPSET)) {
154 char *pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
155 error = ddi_dev_pathname(vp->v_rdev, S_IFCHR, pathname);
156 if (error == 0)
157 error = devfs_lookupname(pathname, NULLVPP, &dvp);
158 VN_RELE(vp);
159 kmem_free(pathname, MAXPATHLEN);
160 if (error != 0)
161 return (ENXIO);
162 vp = dvp; /* use the devfs vp */
163 }
164
165 /* device is configured at this point */
166 maj = getmajor(vp->v_rdev);
167 if (!STREAMSTAB(maj)) {
168 VN_RELE(vp);
169 return (ENOSTR);
170 }
171
172 *vpp = vp;
173 return (0);
174 }
175
176 /*
177 * Update the accessed, updated, or changed times in an sonode
178 * with the current time.
179 *
180 * Note that both SunOS 4.X and 4.4BSD sockets do not present reasonable
181 * attributes in a fstat call. (They return the current time and 0 for
182 * all timestamps, respectively.) We maintain the current timestamps
183 * here primarily so that should sockmod be popped the resulting
184 * file descriptor will behave like a stream w.r.t. the timestamps.
185 */
186 void
so_update_attrs(struct sonode * so,int flag)187 so_update_attrs(struct sonode *so, int flag)
188 {
189 time_t now = gethrestime_sec();
190
191 if (SOCK_IS_NONSTR(so))
192 return;
193
194 mutex_enter(&so->so_lock);
195 so->so_flag |= flag;
196 if (flag & SOACC)
197 SOTOTPI(so)->sti_atime = now;
198 if (flag & SOMOD)
199 SOTOTPI(so)->sti_mtime = now;
200 mutex_exit(&so->so_lock);
201 }
202
203 extern so_create_func_t sock_comm_create_function;
204 extern so_destroy_func_t sock_comm_destroy_function;
205 /*
206 * Init function called when sockfs is loaded.
207 */
208 int
sockinit(int fstype,char * name)209 sockinit(int fstype, char *name)
210 {
211 static const fs_operation_def_t sock_vfsops_template[] = {
212 NULL, NULL
213 };
214 int error;
215 major_t dev;
216 char *err_str;
217
218 error = vfs_setfsops(fstype, sock_vfsops_template, NULL);
219 if (error != 0) {
220 zcmn_err(GLOBAL_ZONEID, CE_WARN,
221 "sockinit: bad vfs ops template");
222 return (error);
223 }
224
225 error = vn_make_ops(name, socket_vnodeops_template,
226 &socket_vnodeops);
227 if (error != 0) {
228 err_str = "sockinit: bad socket vnode ops template";
229 /* vn_make_ops() does not reset socktpi_vnodeops on failure. */
230 socket_vnodeops = NULL;
231 goto failure;
232 }
233
234 socket_cache = kmem_cache_create("socket_cache",
235 sizeof (struct sonode), 0, sonode_constructor,
236 sonode_destructor, NULL, NULL, NULL, 0);
237
238 rw_init(&sockconf_lock, NULL, RW_DEFAULT, NULL);
239
240 error = socktpi_init();
241 if (error != 0) {
242 err_str = NULL;
243 goto failure;
244 }
245
246 error = sod_init();
247 if (error != 0) {
248 err_str = NULL;
249 goto failure;
250 }
251
252 /*
253 * Set up the default create and destroy functions
254 */
255 sock_comm_create_function = socket_sonode_create;
256 sock_comm_destroy_function = socket_sonode_destroy;
257
258 /*
259 * Build initial list mapping socket parameters to vnode.
260 */
261 smod_init();
262 smod_add(sotpi_smod_create());
263
264 sockparams_init();
265
266 /*
267 * If sockets are needed before init runs /sbin/soconfig
268 * it is possible to preload the sockparams list here using
269 * calls like:
270 * sockconfig(1,2,3, "/dev/tcp", 0);
271 */
272
273 /*
274 * Create a unique dev_t for use in so_fsid.
275 */
276
277 if ((dev = getudev()) == (major_t)-1)
278 dev = 0;
279 sockdev = makedevice(dev, 0);
280
281 mutex_init(&socklist.sl_lock, NULL, MUTEX_DEFAULT, NULL);
282 sendfile_init();
283
284 /* Initialize socket filters */
285 sof_init();
286
287 return (0);
288
289 failure:
290 (void) vfs_freevfsops_by_type(fstype);
291 if (socket_vnodeops != NULL)
292 vn_freevnodeops(socket_vnodeops);
293 if (err_str != NULL)
294 zcmn_err(GLOBAL_ZONEID, CE_WARN, err_str);
295 return (error);
296 }
297
298 /*
299 * Caller must hold the mutex. Used to set SOLOCKED.
300 */
301 void
so_lock_single(struct sonode * so)302 so_lock_single(struct sonode *so)
303 {
304 ASSERT(MUTEX_HELD(&so->so_lock));
305
306 while (so->so_flag & (SOLOCKED | SOASYNC_UNBIND)) {
307 cv_wait_stop(&so->so_single_cv, &so->so_lock,
308 SO_LOCK_WAKEUP_TIME);
309 }
310 so->so_flag |= SOLOCKED;
311 }
312
313 /*
314 * Caller must hold the mutex and pass in SOLOCKED or SOASYNC_UNBIND.
315 * Used to clear SOLOCKED or SOASYNC_UNBIND.
316 */
317 void
so_unlock_single(struct sonode * so,int flag)318 so_unlock_single(struct sonode *so, int flag)
319 {
320 ASSERT(MUTEX_HELD(&so->so_lock));
321 ASSERT(flag & (SOLOCKED|SOASYNC_UNBIND));
322 ASSERT((flag & ~(SOLOCKED|SOASYNC_UNBIND)) == 0);
323 ASSERT(so->so_flag & flag);
324 /*
325 * Process the T_DISCON_IND on sti_discon_ind_mp.
326 *
327 * Call to so_drain_discon_ind will result in so_lock
328 * being dropped and re-acquired later.
329 */
330 if (!SOCK_IS_NONSTR(so)) {
331 sotpi_info_t *sti = SOTOTPI(so);
332
333 if (sti->sti_discon_ind_mp != NULL)
334 so_drain_discon_ind(so);
335 }
336
337 cv_signal(&so->so_single_cv);
338 so->so_flag &= ~flag;
339 }
340
341 /*
342 * Caller must hold the mutex. Used to set SOREADLOCKED.
343 * If the caller wants nonblocking behavior it should set fmode.
344 */
345 int
so_lock_read(struct sonode * so,int fmode)346 so_lock_read(struct sonode *so, int fmode)
347 {
348 ASSERT(MUTEX_HELD(&so->so_lock));
349
350 while (so->so_flag & SOREADLOCKED) {
351 if (fmode & (FNDELAY|FNONBLOCK))
352 return (EWOULDBLOCK);
353 cv_wait_stop(&so->so_read_cv, &so->so_lock,
354 SO_LOCK_WAKEUP_TIME);
355 }
356 so->so_flag |= SOREADLOCKED;
357 return (0);
358 }
359
360 /*
361 * Like so_lock_read above but allows signals.
362 */
363 int
so_lock_read_intr(struct sonode * so,int fmode)364 so_lock_read_intr(struct sonode *so, int fmode)
365 {
366 ASSERT(MUTEX_HELD(&so->so_lock));
367
368 while (so->so_flag & SOREADLOCKED) {
369 if (fmode & (FNDELAY|FNONBLOCK))
370 return (EWOULDBLOCK);
371 if (!cv_wait_sig(&so->so_read_cv, &so->so_lock))
372 return (EINTR);
373 }
374 so->so_flag |= SOREADLOCKED;
375 return (0);
376 }
377
378 /*
379 * Caller must hold the mutex. Used to clear SOREADLOCKED,
380 * set in so_lock_read() or so_lock_read_intr().
381 */
382 void
so_unlock_read(struct sonode * so)383 so_unlock_read(struct sonode *so)
384 {
385 ASSERT(MUTEX_HELD(&so->so_lock));
386 ASSERT(so->so_flag & SOREADLOCKED);
387
388 cv_signal(&so->so_read_cv);
389 so->so_flag &= ~SOREADLOCKED;
390 }
391
392 /*
393 * Verify that the specified offset falls within the mblk and
394 * that the resulting pointer is aligned.
395 * Returns NULL if not.
396 */
397 void *
sogetoff(mblk_t * mp,t_uscalar_t offset,t_uscalar_t length,uint_t align_size)398 sogetoff(mblk_t *mp, t_uscalar_t offset,
399 t_uscalar_t length, uint_t align_size)
400 {
401 uintptr_t ptr1, ptr2;
402
403 ASSERT(mp && mp->b_wptr >= mp->b_rptr);
404 ptr1 = (uintptr_t)mp->b_rptr + offset;
405 ptr2 = (uintptr_t)ptr1 + length;
406 if (ptr1 < (uintptr_t)mp->b_rptr || ptr2 > (uintptr_t)mp->b_wptr) {
407 eprintline(0);
408 return (NULL);
409 }
410 if ((ptr1 & (align_size - 1)) != 0) {
411 eprintline(0);
412 return (NULL);
413 }
414 return ((void *)ptr1);
415 }
416
417 /*
418 * Return the AF_UNIX underlying filesystem vnode matching a given name.
419 * Makes sure the sending and the destination sonodes are compatible.
420 * The vnode is returned held.
421 *
422 * The underlying filesystem VSOCK vnode has a v_stream pointer that
423 * references the actual stream head (hence indirectly the actual sonode).
424 */
425 static int
so_ux_lookup(struct sonode * so,struct sockaddr_un * soun,int checkaccess,vnode_t ** vpp)426 so_ux_lookup(struct sonode *so, struct sockaddr_un *soun, int checkaccess,
427 vnode_t **vpp)
428 {
429 vnode_t *vp; /* Underlying filesystem vnode */
430 vnode_t *rvp; /* real vnode */
431 vnode_t *svp; /* sockfs vnode */
432 struct sonode *so2;
433 int error;
434
435 dprintso(so, 1, ("so_ux_lookup(%p) name <%s>\n", (void *)so,
436 soun->sun_path));
437
438 error = lookupname(soun->sun_path, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp);
439 if (error) {
440 eprintsoline(so, error);
441 return (error);
442 }
443
444 /*
445 * Traverse lofs mounts get the real vnode
446 */
447 if (VOP_REALVP(vp, &rvp, NULL) == 0) {
448 VN_HOLD(rvp); /* hold the real vnode */
449 VN_RELE(vp); /* release hold from lookup */
450 vp = rvp;
451 }
452
453 if (vp->v_type != VSOCK) {
454 error = ENOTSOCK;
455 eprintsoline(so, error);
456 goto done2;
457 }
458
459 if (checkaccess) {
460 /*
461 * Check that we have permissions to access the destination
462 * vnode. This check is not done in BSD but it is required
463 * by X/Open.
464 */
465 error = VOP_ACCESS(vp, VREAD|VWRITE, 0, CRED(), NULL);
466 if (error != 0) {
467 eprintsoline(so, error);
468 goto done2;
469 }
470 }
471
472 /*
473 * Check if the remote socket has been closed.
474 *
475 * Synchronize with vn_rele_stream by holding v_lock while traversing
476 * v_stream->sd_vnode.
477 */
478 mutex_enter(&vp->v_lock);
479 if (vp->v_stream == NULL) {
480 mutex_exit(&vp->v_lock);
481 if (so->so_type == SOCK_DGRAM)
482 error = EDESTADDRREQ;
483 else
484 error = ECONNREFUSED;
485
486 eprintsoline(so, error);
487 goto done2;
488 }
489 ASSERT(vp->v_stream->sd_vnode);
490 svp = vp->v_stream->sd_vnode;
491 /*
492 * holding v_lock on underlying filesystem vnode and acquiring
493 * it on sockfs vnode. Assumes that no code ever attempts to
494 * acquire these locks in the reverse order.
495 */
496 VN_HOLD(svp);
497 mutex_exit(&vp->v_lock);
498
499 if (svp->v_type != VSOCK) {
500 error = ENOTSOCK;
501 eprintsoline(so, error);
502 goto done;
503 }
504
505 so2 = VTOSO(svp);
506
507 if (so->so_type != so2->so_type) {
508 error = EPROTOTYPE;
509 eprintsoline(so, error);
510 goto done;
511 }
512
513 VN_RELE(svp);
514 *vpp = vp;
515 return (0);
516
517 done:
518 VN_RELE(svp);
519 done2:
520 VN_RELE(vp);
521 return (error);
522 }
523
524 /*
525 * Verify peer address for connect and sendto/sendmsg.
526 * Since sendto/sendmsg would not get synchronous errors from the transport
527 * provider we have to do these ugly checks in the socket layer to
528 * preserve compatibility with SunOS 4.X.
529 */
530 int
so_addr_verify(struct sonode * so,const struct sockaddr * name,socklen_t namelen)531 so_addr_verify(struct sonode *so, const struct sockaddr *name,
532 socklen_t namelen)
533 {
534 int family;
535
536 dprintso(so, 1, ("so_addr_verify(%p, %p, %d)\n",
537 (void *)so, (void *)name, namelen));
538
539 ASSERT(name != NULL);
540
541 family = so->so_family;
542 switch (family) {
543 case AF_INET:
544 if (name->sa_family != family) {
545 eprintsoline(so, EAFNOSUPPORT);
546 return (EAFNOSUPPORT);
547 }
548 if (namelen != (socklen_t)sizeof (struct sockaddr_in)) {
549 eprintsoline(so, EINVAL);
550 return (EINVAL);
551 }
552 break;
553 case AF_INET6: {
554 #ifdef DEBUG
555 struct sockaddr_in6 *sin6;
556 #endif /* DEBUG */
557
558 if (name->sa_family != family) {
559 eprintsoline(so, EAFNOSUPPORT);
560 return (EAFNOSUPPORT);
561 }
562 if (namelen != (socklen_t)sizeof (struct sockaddr_in6)) {
563 eprintsoline(so, EINVAL);
564 return (EINVAL);
565 }
566 #ifdef DEBUG
567 /* Verify that apps don't forget to clear sin6_scope_id etc */
568 sin6 = (struct sockaddr_in6 *)name;
569 if (sin6->sin6_scope_id != 0 &&
570 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) {
571 zcmn_err(getzoneid(), CE_WARN,
572 "connect/send* with uninitialized sin6_scope_id "
573 "(%d) on socket. Pid = %d\n",
574 (int)sin6->sin6_scope_id, (int)curproc->p_pid);
575 }
576 #endif /* DEBUG */
577 break;
578 }
579 case AF_UNIX:
580 if (SOTOTPI(so)->sti_faddr_noxlate) {
581 return (0);
582 }
583 if (namelen < (socklen_t)sizeof (short)) {
584 eprintsoline(so, ENOENT);
585 return (ENOENT);
586 }
587 if (name->sa_family != family) {
588 eprintsoline(so, EAFNOSUPPORT);
589 return (EAFNOSUPPORT);
590 }
591 /* MAXPATHLEN + soun_family + nul termination */
592 if (namelen > (socklen_t)(MAXPATHLEN + sizeof (short) + 1)) {
593 eprintsoline(so, ENAMETOOLONG);
594 return (ENAMETOOLONG);
595 }
596
597 break;
598
599 default:
600 /*
601 * Default is don't do any length or sa_family check
602 * to allow non-sockaddr style addresses.
603 */
604 break;
605 }
606
607 return (0);
608 }
609
610
611 /*
612 * Translate an AF_UNIX sockaddr_un to the transport internal name.
613 * Assumes caller has called so_addr_verify first. The translated
614 * (internal form) address is stored in sti->sti_ux_taddr.
615 */
616 /*ARGSUSED*/
617 int
so_ux_addr_xlate(struct sonode * so,struct sockaddr * name,socklen_t namelen,int checkaccess,void ** addrp,socklen_t * addrlenp)618 so_ux_addr_xlate(struct sonode *so, struct sockaddr *name,
619 socklen_t namelen, int checkaccess,
620 void **addrp, socklen_t *addrlenp)
621 {
622 int error;
623 struct sockaddr_un *soun;
624 vnode_t *vp;
625 void *addr;
626 socklen_t addrlen;
627 sotpi_info_t *sti = SOTOTPI(so);
628
629 dprintso(so, 1, ("so_ux_addr_xlate(%p, %p, %d, %d)\n",
630 (void *)so, (void *)name, namelen, checkaccess));
631
632 ASSERT(name != NULL);
633 ASSERT(so->so_family == AF_UNIX);
634 ASSERT(!sti->sti_faddr_noxlate);
635 ASSERT(namelen >= (socklen_t)sizeof (short));
636 ASSERT(name->sa_family == AF_UNIX);
637 soun = (struct sockaddr_un *)name;
638 /*
639 * Lookup vnode for the specified path name and verify that
640 * it is a socket.
641 */
642 error = so_ux_lookup(so, soun, checkaccess, &vp);
643 if (error) {
644 eprintsoline(so, error);
645 return (error);
646 }
647 /*
648 * Use the address of the peer vnode as the address to send
649 * to. We release the peer vnode here. In case it has been
650 * closed by the time the T_CONN_REQ or T_UNITDATA_REQ reaches the
651 * transport the message will get an error or be dropped.
652 * Note that that soua_vp is never dereferenced; it's just a
653 * convenient value by which we can identify the peer.
654 */
655 sti->sti_ux_taddr.soua_vp = vp;
656 sti->sti_ux_taddr.soua_magic = SOU_MAGIC_EXPLICIT;
657 addr = &sti->sti_ux_taddr;
658 addrlen = (socklen_t)sizeof (sti->sti_ux_taddr);
659 dprintso(so, 1, ("ux_xlate UNIX: addrlen %d, vp %p\n",
660 addrlen, (void *)vp));
661 VN_RELE(vp);
662 *addrp = addr;
663 *addrlenp = (socklen_t)addrlen;
664 return (0);
665 }
666
667 /*
668 * Esballoc free function for messages that contain SO_FILEP option.
669 * Decrement the reference count on the file pointers using closef.
670 */
671 void
fdbuf_free(struct fdbuf * fdbuf)672 fdbuf_free(struct fdbuf *fdbuf)
673 {
674 int i;
675 struct file *fp;
676
677 dprint(1, ("fdbuf_free: %d fds\n", fdbuf->fd_numfd));
678 for (i = 0; i < fdbuf->fd_numfd; i++) {
679 /*
680 * We need pointer size alignment for fd_fds. On a LP64
681 * kernel, the required alignment is 8 bytes while
682 * the option headers and values are only 4 bytes
683 * aligned. So its safer to do a bcopy compared to
684 * assigning fdbuf->fd_fds[i] to fp.
685 */
686 bcopy((char *)&fdbuf->fd_fds[i], (char *)&fp, sizeof (fp));
687 dprint(1, ("fdbuf_free: [%d] = %p\n", i, (void *)fp));
688 (void) closef(fp);
689 }
690 if (fdbuf->fd_ebuf != NULL)
691 kmem_free(fdbuf->fd_ebuf, fdbuf->fd_ebuflen);
692 kmem_free(fdbuf, fdbuf->fd_size);
693 }
694
695 /*
696 * Allocate an esballoc'ed message for AF_UNIX file descriptor passing.
697 * Waits if memory is not available.
698 */
699 mblk_t *
fdbuf_allocmsg(int size,struct fdbuf * fdbuf)700 fdbuf_allocmsg(int size, struct fdbuf *fdbuf)
701 {
702 uchar_t *buf;
703 mblk_t *mp;
704
705 dprint(1, ("fdbuf_allocmsg: size %d, %d fds\n", size, fdbuf->fd_numfd));
706 buf = kmem_alloc(size, KM_SLEEP);
707 fdbuf->fd_ebuf = (caddr_t)buf;
708 fdbuf->fd_ebuflen = size;
709 fdbuf->fd_frtn.free_func = fdbuf_free;
710 fdbuf->fd_frtn.free_arg = (caddr_t)fdbuf;
711
712 mp = esballoc_wait(buf, size, BPRI_MED, &fdbuf->fd_frtn);
713 mp->b_datap->db_type = M_PROTO;
714 return (mp);
715 }
716
717 /*
718 * Extract file descriptors from a fdbuf.
719 * Return list in rights/rightslen.
720 */
721 /*ARGSUSED*/
722 static int
fdbuf_extract(struct fdbuf * fdbuf,void * rights,int rightslen,int msg_flags)723 fdbuf_extract(struct fdbuf *fdbuf, void *rights, int rightslen, int msg_flags)
724 {
725 int i, fd;
726 int *rp;
727 struct file *fp;
728 int numfd;
729
730 dprint(1, ("fdbuf_extract: %d fds, len %d\n",
731 fdbuf->fd_numfd, rightslen));
732
733 numfd = fdbuf->fd_numfd;
734 ASSERT(rightslen == numfd * (int)sizeof (int));
735
736 /*
737 * Allocate a file descriptor and increment the f_count.
738 * The latter is needed since we always call fdbuf_free
739 * which performs a closef.
740 */
741 rp = (int *)rights;
742 for (i = 0; i < numfd; i++) {
743 if ((fd = ufalloc(0)) == -1)
744 goto cleanup;
745 /*
746 * We need pointer size alignment for fd_fds. On a LP64
747 * kernel, the required alignment is 8 bytes while
748 * the option headers and values are only 4 bytes
749 * aligned. So its safer to do a bcopy compared to
750 * assigning fdbuf->fd_fds[i] to fp.
751 */
752 bcopy((char *)&fdbuf->fd_fds[i], (char *)&fp, sizeof (fp));
753 mutex_enter(&fp->f_tlock);
754 fp->f_count++;
755 mutex_exit(&fp->f_tlock);
756 setf(fd, fp);
757 if ((msg_flags & MSG_CMSG_CLOEXEC) != 0) {
758 f_setfd_or(fd, FD_CLOEXEC);
759 }
760 if ((msg_flags & MSG_CMSG_CLOFORK) != 0) {
761 f_setfd_or(fd, FD_CLOFORK);
762 }
763 *rp++ = fd;
764 if (AU_AUDITING())
765 audit_fdrecv(fd, fp);
766 dprint(1, ("fdbuf_extract: [%d] = %d, %p refcnt %d\n",
767 i, fd, (void *)fp, fp->f_count));
768 }
769 return (0);
770
771 cleanup:
772 /*
773 * Undo whatever partial work the loop above has done.
774 */
775 {
776 int j;
777
778 rp = (int *)rights;
779 for (j = 0; j < i; j++) {
780 dprint(0,
781 ("fdbuf_extract: cleanup[%d] = %d\n", j, *rp));
782 (void) closeandsetf(*rp++, NULL);
783 }
784 }
785
786 return (EMFILE);
787 }
788
789 /*
790 * Insert file descriptors into an fdbuf.
791 * Returns a kmem_alloc'ed fdbuf. The fdbuf should be freed
792 * by calling fdbuf_free().
793 */
794 int
fdbuf_create(void * rights,int rightslen,struct fdbuf ** fdbufp)795 fdbuf_create(void *rights, int rightslen, struct fdbuf **fdbufp)
796 {
797 int numfd, i;
798 int *fds;
799 struct file *fp;
800 struct fdbuf *fdbuf;
801 int fdbufsize;
802
803 dprint(1, ("fdbuf_create: len %d\n", rightslen));
804
805 numfd = rightslen / (int)sizeof (int);
806
807 fdbufsize = (int)FDBUF_HDRSIZE + (numfd * (int)sizeof (struct file *));
808 fdbuf = kmem_alloc(fdbufsize, KM_SLEEP);
809 fdbuf->fd_size = fdbufsize;
810 fdbuf->fd_numfd = 0;
811 fdbuf->fd_ebuf = NULL;
812 fdbuf->fd_ebuflen = 0;
813 fds = (int *)rights;
814 for (i = 0; i < numfd; i++) {
815 if ((fp = getf(fds[i])) == NULL) {
816 fdbuf_free(fdbuf);
817 return (EBADF);
818 }
819 dprint(1, ("fdbuf_create: [%d] = %d, %p refcnt %d\n",
820 i, fds[i], (void *)fp, fp->f_count));
821 mutex_enter(&fp->f_tlock);
822 fp->f_count++;
823 mutex_exit(&fp->f_tlock);
824 /*
825 * The maximum alignment for fdbuf (or any option header
826 * and its value) it 4 bytes. On a LP64 kernel, the alignment
827 * is not sufficient for pointers (fd_fds in this case). Since
828 * we just did a kmem_alloc (we get a double word alignment),
829 * we don't need to do anything on the send side (we loose
830 * the double word alignment because fdbuf goes after an
831 * option header (eg T_unitdata_req) which is only 4 byte
832 * aligned). We take care of this when we extract the file
833 * descriptor in fdbuf_extract or fdbuf_free.
834 */
835 fdbuf->fd_fds[i] = fp;
836 fdbuf->fd_numfd++;
837 releasef(fds[i]);
838 if (AU_AUDITING())
839 audit_fdsend(fds[i], fp, 0);
840 }
841 *fdbufp = fdbuf;
842 return (0);
843 }
844
845 static int
fdbuf_optlen(int rightslen)846 fdbuf_optlen(int rightslen)
847 {
848 int numfd;
849
850 numfd = rightslen / (int)sizeof (int);
851
852 return ((int)FDBUF_HDRSIZE + (numfd * (int)sizeof (struct file *)));
853 }
854
855 static t_uscalar_t
fdbuf_cmsglen(int fdbuflen)856 fdbuf_cmsglen(int fdbuflen)
857 {
858 return (t_uscalar_t)((fdbuflen - FDBUF_HDRSIZE) /
859 (int)sizeof (struct file *) * (int)sizeof (int));
860 }
861
862
863 /*
864 * Return non-zero if the mblk and fdbuf are consistent.
865 */
866 static int
fdbuf_verify(mblk_t * mp,struct fdbuf * fdbuf,int fdbuflen)867 fdbuf_verify(mblk_t *mp, struct fdbuf *fdbuf, int fdbuflen)
868 {
869 if (fdbuflen >= FDBUF_HDRSIZE &&
870 fdbuflen == fdbuf->fd_size) {
871 frtn_t *frp = mp->b_datap->db_frtnp;
872 /*
873 * Check that the SO_FILEP portion of the
874 * message has not been modified by
875 * the loopback transport. The sending sockfs generates
876 * a message that is esballoc'ed with the free function
877 * being fdbuf_free() and where free_arg contains the
878 * identical information as the SO_FILEP content.
879 *
880 * If any of these constraints are not satisfied we
881 * silently ignore the option.
882 */
883 ASSERT(mp);
884 if (frp != NULL &&
885 frp->free_func == fdbuf_free &&
886 frp->free_arg != NULL &&
887 bcmp(frp->free_arg, fdbuf, fdbuflen) == 0) {
888 dprint(1, ("fdbuf_verify: fdbuf %p len %d\n",
889 (void *)fdbuf, fdbuflen));
890 return (1);
891 } else {
892 zcmn_err(getzoneid(), CE_WARN,
893 "sockfs: mismatched fdbuf content (%p)",
894 (void *)mp);
895 return (0);
896 }
897 } else {
898 zcmn_err(getzoneid(), CE_WARN,
899 "sockfs: mismatched fdbuf len %d, %d\n",
900 fdbuflen, fdbuf->fd_size);
901 return (0);
902 }
903 }
904
905 /*
906 * When the file descriptors returned by sorecvmsg can not be passed
907 * to the application this routine will cleanup the references on
908 * the files. Start at startoff bytes into the buffer.
909 */
910 static void
close_fds(void * fdbuf,int fdbuflen,int startoff)911 close_fds(void *fdbuf, int fdbuflen, int startoff)
912 {
913 int *fds = (int *)fdbuf;
914 int numfd = fdbuflen / (int)sizeof (int);
915 int i;
916
917 dprint(1, ("close_fds(%p, %d, %d)\n", fdbuf, fdbuflen, startoff));
918
919 for (i = 0; i < numfd; i++) {
920 if (startoff < 0)
921 startoff = 0;
922 if (startoff < (int)sizeof (int)) {
923 /*
924 * This file descriptor is partially or fully after
925 * the offset
926 */
927 dprint(0,
928 ("close_fds: cleanup[%d] = %d\n", i, fds[i]));
929 (void) closeandsetf(fds[i], NULL);
930 }
931 startoff -= (int)sizeof (int);
932 }
933 }
934
935 /*
936 * Close all file descriptors contained in the control part starting at
937 * the startoffset.
938 */
939 void
so_closefds(void * control,t_uscalar_t controllen,int oldflg,int startoff)940 so_closefds(void *control, t_uscalar_t controllen, int oldflg,
941 int startoff)
942 {
943 struct cmsghdr *cmsg;
944
945 if (control == NULL)
946 return;
947
948 if (oldflg) {
949 close_fds(control, controllen, startoff);
950 return;
951 }
952 /* Scan control part for file descriptors. */
953 for (cmsg = (struct cmsghdr *)control;
954 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen);
955 cmsg = CMSG_NEXT(cmsg)) {
956 if (cmsg->cmsg_level == SOL_SOCKET &&
957 cmsg->cmsg_type == SCM_RIGHTS) {
958 close_fds(CMSG_CONTENT(cmsg),
959 (int)CMSG_CONTENTLEN(cmsg),
960 startoff - (int)sizeof (struct cmsghdr));
961 }
962 startoff -= ROUNDUP_cmsglen(cmsg->cmsg_len);
963 }
964 }
965
966 /*
967 * Handle truncation of a cmsg when the receive buffer is not big enough.
968 * Adjust the cmsg_len header field in the last cmsg that will be included in
969 * the buffer to reflect the number of bytes included.
970 */
971 void
so_truncatecmsg(void * control,t_uscalar_t controllen,uint_t maxlen)972 so_truncatecmsg(void *control, t_uscalar_t controllen, uint_t maxlen)
973 {
974 struct cmsghdr *cmsg;
975 uint_t len = 0;
976
977 if (control == NULL)
978 return;
979
980 for (cmsg = control;
981 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen);
982 cmsg = CMSG_NEXT(cmsg)) {
983
984 len += ROUNDUP_cmsglen(cmsg->cmsg_len);
985
986 if (len > maxlen) {
987 /*
988 * This cmsg is the last one that will be included in
989 * the truncated buffer.
990 */
991 socklen_t diff = len - maxlen;
992
993 if (diff < CMSG_CONTENTLEN(cmsg)) {
994 dprint(1, ("so_truncatecmsg: %d -> %d\n",
995 cmsg->cmsg_len, cmsg->cmsg_len - diff));
996 cmsg->cmsg_len -= diff;
997 } else {
998 cmsg->cmsg_len = sizeof (struct cmsghdr);
999 }
1000 break;
1001 }
1002 }
1003 }
1004
1005 /*
1006 * Returns a pointer/length for the file descriptors contained
1007 * in the control buffer. Returns with *fdlenp == -1 if there are no
1008 * file descriptor options present. This is different than there being
1009 * a zero-length file descriptor option.
1010 * Fail if there are multiple SCM_RIGHT cmsgs.
1011 */
1012 int
so_getfdopt(void * control,t_uscalar_t controllen,int oldflg,void ** fdsp,int * fdlenp)1013 so_getfdopt(void *control, t_uscalar_t controllen, int oldflg,
1014 void **fdsp, int *fdlenp)
1015 {
1016 struct cmsghdr *cmsg;
1017 void *fds;
1018 int fdlen;
1019
1020 if (control == NULL) {
1021 *fdsp = NULL;
1022 *fdlenp = -1;
1023 return (0);
1024 }
1025
1026 if (oldflg) {
1027 *fdsp = control;
1028 if (controllen == 0)
1029 *fdlenp = -1;
1030 else
1031 *fdlenp = controllen;
1032 dprint(1, ("so_getfdopt: old %d\n", *fdlenp));
1033 return (0);
1034 }
1035
1036 fds = NULL;
1037 fdlen = 0;
1038
1039 for (cmsg = (struct cmsghdr *)control;
1040 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen);
1041 cmsg = CMSG_NEXT(cmsg)) {
1042 if (cmsg->cmsg_level == SOL_SOCKET &&
1043 cmsg->cmsg_type == SCM_RIGHTS) {
1044 if (fds != NULL)
1045 return (EINVAL);
1046 fds = CMSG_CONTENT(cmsg);
1047 fdlen = (int)CMSG_CONTENTLEN(cmsg);
1048 dprint(1, ("so_getfdopt: new %lu\n",
1049 (size_t)CMSG_CONTENTLEN(cmsg)));
1050 }
1051 }
1052 if (fds == NULL) {
1053 dprint(1, ("so_getfdopt: NONE\n"));
1054 *fdlenp = -1;
1055 } else
1056 *fdlenp = fdlen;
1057 *fdsp = fds;
1058 return (0);
1059 }
1060
1061 /*
1062 * Return the length of the options including any file descriptor options.
1063 */
1064 t_uscalar_t
so_optlen(void * control,t_uscalar_t controllen,int oldflg)1065 so_optlen(void *control, t_uscalar_t controllen, int oldflg)
1066 {
1067 struct cmsghdr *cmsg;
1068 t_uscalar_t optlen = 0;
1069 t_uscalar_t len;
1070
1071 if (control == NULL)
1072 return (0);
1073
1074 if (oldflg)
1075 return ((t_uscalar_t)(sizeof (struct T_opthdr) +
1076 fdbuf_optlen(controllen)));
1077
1078 for (cmsg = (struct cmsghdr *)control;
1079 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen);
1080 cmsg = CMSG_NEXT(cmsg)) {
1081 if (cmsg->cmsg_level == SOL_SOCKET &&
1082 cmsg->cmsg_type == SCM_RIGHTS) {
1083 len = fdbuf_optlen((int)CMSG_CONTENTLEN(cmsg));
1084 } else {
1085 len = (t_uscalar_t)CMSG_CONTENTLEN(cmsg);
1086 }
1087 optlen += (t_uscalar_t)(_TPI_ALIGN_TOPT(len) +
1088 sizeof (struct T_opthdr));
1089 }
1090 dprint(1, ("so_optlen: controllen %d, flg %d -> optlen %d\n",
1091 controllen, oldflg, optlen));
1092 return (optlen);
1093 }
1094
1095 /*
1096 * Copy options from control to the mblk. Skip any file descriptor options.
1097 */
1098 void
so_cmsg2opt(void * control,t_uscalar_t controllen,int oldflg,mblk_t * mp)1099 so_cmsg2opt(void *control, t_uscalar_t controllen, int oldflg, mblk_t *mp)
1100 {
1101 struct T_opthdr toh;
1102 struct cmsghdr *cmsg;
1103
1104 if (control == NULL)
1105 return;
1106
1107 if (oldflg) {
1108 /* No real options - caller has handled file descriptors */
1109 return;
1110 }
1111 for (cmsg = (struct cmsghdr *)control;
1112 CMSG_VALID(cmsg, control, (uintptr_t)control + controllen);
1113 cmsg = CMSG_NEXT(cmsg)) {
1114 /*
1115 * Note: The caller handles file descriptors prior
1116 * to calling this function.
1117 */
1118 t_uscalar_t len;
1119
1120 if (cmsg->cmsg_level == SOL_SOCKET &&
1121 cmsg->cmsg_type == SCM_RIGHTS)
1122 continue;
1123
1124 len = (t_uscalar_t)CMSG_CONTENTLEN(cmsg);
1125 toh.level = cmsg->cmsg_level;
1126 toh.name = cmsg->cmsg_type;
1127 toh.len = len + (t_uscalar_t)sizeof (struct T_opthdr);
1128 toh.status = 0;
1129
1130 soappendmsg(mp, &toh, sizeof (toh));
1131 soappendmsg(mp, CMSG_CONTENT(cmsg), len);
1132 mp->b_wptr += _TPI_ALIGN_TOPT(len) - len;
1133 ASSERT(mp->b_wptr <= mp->b_datap->db_lim);
1134 }
1135 }
1136
1137 /*
1138 * Return the length of the control message derived from the options.
1139 * Exclude SO_SRCADDR and SO_UNIX_CLOSE options. Include SO_FILEP.
1140 * When oldflg is set only include SO_FILEP.
1141 * so_opt2cmsg and so_cmsglen are inter-related since so_cmsglen
1142 * allocates the space that so_opt2cmsg fills. If one changes, the other should
1143 * also be checked for any possible impacts.
1144 */
1145 t_uscalar_t
so_cmsglen(mblk_t * mp,void * opt,t_uscalar_t optlen,int oldflg)1146 so_cmsglen(mblk_t *mp, void *opt, t_uscalar_t optlen, int oldflg)
1147 {
1148 t_uscalar_t cmsglen = 0;
1149 struct T_opthdr *tohp;
1150 t_uscalar_t len;
1151 t_uscalar_t last_roundup = 0;
1152
1153 ASSERT(__TPI_TOPT_ISALIGNED(opt));
1154
1155 for (tohp = (struct T_opthdr *)opt;
1156 tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen);
1157 tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) {
1158 dprint(1, ("so_cmsglen: level 0x%x, name %d, len %d\n",
1159 tohp->level, tohp->name, tohp->len));
1160 if (tohp->level == SOL_SOCKET &&
1161 (tohp->name == SO_SRCADDR ||
1162 tohp->name == SO_UNIX_CLOSE)) {
1163 continue;
1164 }
1165 if (tohp->level == SOL_SOCKET && tohp->name == SO_FILEP) {
1166 struct fdbuf *fdbuf;
1167 int fdbuflen;
1168
1169 fdbuf = (struct fdbuf *)_TPI_TOPT_DATA(tohp);
1170 fdbuflen = (int)_TPI_TOPT_DATALEN(tohp);
1171
1172 if (!fdbuf_verify(mp, fdbuf, fdbuflen))
1173 continue;
1174 if (oldflg) {
1175 cmsglen += fdbuf_cmsglen(fdbuflen);
1176 continue;
1177 }
1178 len = fdbuf_cmsglen(fdbuflen);
1179 } else if (tohp->level == SOL_SOCKET &&
1180 tohp->name == SCM_TIMESTAMP) {
1181 if (oldflg)
1182 continue;
1183
1184 if (get_udatamodel() == DATAMODEL_NATIVE) {
1185 len = sizeof (struct timeval);
1186 } else {
1187 len = sizeof (struct timeval32);
1188 }
1189 } else {
1190 if (oldflg)
1191 continue;
1192 len = (t_uscalar_t)_TPI_TOPT_DATALEN(tohp);
1193 }
1194 /*
1195 * Exclude roundup for last option to not set
1196 * MSG_CTRUNC when the cmsg fits but the padding doesn't fit.
1197 */
1198 last_roundup = (t_uscalar_t)
1199 (ROUNDUP_cmsglen(len + (int)sizeof (struct cmsghdr)) -
1200 (len + (int)sizeof (struct cmsghdr)));
1201 cmsglen += (t_uscalar_t)(len + (int)sizeof (struct cmsghdr)) +
1202 last_roundup;
1203 }
1204 cmsglen -= last_roundup;
1205 dprint(1, ("so_cmsglen: optlen %d, flg %d -> cmsglen %d\n",
1206 optlen, oldflg, cmsglen));
1207 return (cmsglen);
1208 }
1209
1210 /*
1211 * Copy options from options to the control. Convert SO_FILEP to
1212 * file descriptors.
1213 * Returns errno or zero.
1214 * so_opt2cmsg and so_cmsglen are inter-related since so_cmsglen
1215 * allocates the space that so_opt2cmsg fills. If one changes, the other should
1216 * also be checked for any possible impacts.
1217 */
1218 int
so_opt2cmsg(mblk_t * mp,void * opt,t_uscalar_t optlen,int msg_flags,void * control,t_uscalar_t controllen)1219 so_opt2cmsg(mblk_t *mp, void *opt, t_uscalar_t optlen, int msg_flags,
1220 void *control, t_uscalar_t controllen)
1221 {
1222 struct T_opthdr *tohp;
1223 struct cmsghdr *cmsg;
1224 struct fdbuf *fdbuf;
1225 int fdbuflen;
1226 int error;
1227 int oldflg = (msg_flags & MSG_XPG4_2) == 0;
1228 #if defined(DEBUG) || defined(__lint)
1229 struct cmsghdr *cend = (struct cmsghdr *)
1230 (((uint8_t *)control) + ROUNDUP_cmsglen(controllen));
1231 #endif
1232 cmsg = (struct cmsghdr *)control;
1233
1234 ASSERT(__TPI_TOPT_ISALIGNED(opt));
1235
1236 for (tohp = (struct T_opthdr *)opt;
1237 tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen);
1238 tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) {
1239 dprint(1, ("so_opt2cmsg: level 0x%x, name %d, len %d\n",
1240 tohp->level, tohp->name, tohp->len));
1241
1242 if (tohp->level == SOL_SOCKET &&
1243 (tohp->name == SO_SRCADDR ||
1244 tohp->name == SO_UNIX_CLOSE)) {
1245 continue;
1246 }
1247 ASSERT((uintptr_t)cmsg <= (uintptr_t)control + controllen);
1248 if (tohp->level == SOL_SOCKET && tohp->name == SO_FILEP) {
1249 fdbuf = (struct fdbuf *)_TPI_TOPT_DATA(tohp);
1250 fdbuflen = (int)_TPI_TOPT_DATALEN(tohp);
1251
1252 if (!fdbuf_verify(mp, fdbuf, fdbuflen))
1253 return (EPROTO);
1254 if (oldflg) {
1255 error = fdbuf_extract(fdbuf, control,
1256 (int)controllen, msg_flags);
1257 if (error != 0)
1258 return (error);
1259 continue;
1260 } else {
1261 int fdlen;
1262
1263 fdlen = (int)fdbuf_cmsglen(
1264 (int)_TPI_TOPT_DATALEN(tohp));
1265
1266 cmsg->cmsg_level = tohp->level;
1267 cmsg->cmsg_type = SCM_RIGHTS;
1268 cmsg->cmsg_len = (socklen_t)(fdlen +
1269 sizeof (struct cmsghdr));
1270
1271 error = fdbuf_extract(fdbuf,
1272 CMSG_CONTENT(cmsg), fdlen, msg_flags);
1273 if (error != 0)
1274 return (error);
1275 }
1276 } else if (tohp->level == SOL_SOCKET &&
1277 tohp->name == SCM_TIMESTAMP) {
1278 timestruc_t *timestamp;
1279
1280 if (oldflg)
1281 continue;
1282
1283 cmsg->cmsg_level = tohp->level;
1284 cmsg->cmsg_type = tohp->name;
1285
1286 timestamp =
1287 (timestruc_t *)P2ROUNDUP((intptr_t)&tohp[1],
1288 sizeof (intptr_t));
1289
1290 if (get_udatamodel() == DATAMODEL_NATIVE) {
1291 struct timeval tv;
1292
1293 cmsg->cmsg_len = sizeof (struct timeval) +
1294 sizeof (struct cmsghdr);
1295 tv.tv_sec = timestamp->tv_sec;
1296 tv.tv_usec = timestamp->tv_nsec /
1297 (NANOSEC / MICROSEC);
1298 /*
1299 * on LP64 systems, the struct timeval in
1300 * the destination will not be 8-byte aligned,
1301 * so use bcopy to avoid alignment trouble
1302 */
1303 bcopy(&tv, CMSG_CONTENT(cmsg), sizeof (tv));
1304 } else {
1305 struct timeval32 *time32;
1306
1307 cmsg->cmsg_len = sizeof (struct timeval32) +
1308 sizeof (struct cmsghdr);
1309 time32 = (struct timeval32 *)CMSG_CONTENT(cmsg);
1310 time32->tv_sec = (time32_t)timestamp->tv_sec;
1311 time32->tv_usec =
1312 (int32_t)(timestamp->tv_nsec /
1313 (NANOSEC / MICROSEC));
1314 }
1315
1316 } else {
1317 if (oldflg)
1318 continue;
1319
1320 cmsg->cmsg_level = tohp->level;
1321 cmsg->cmsg_type = tohp->name;
1322 cmsg->cmsg_len = (socklen_t)sizeof (struct cmsghdr);
1323 if (tohp->level == IPPROTO_IP &&
1324 (tohp->name == IP_RECVTOS ||
1325 tohp->name == IP_RECVTTL)) {
1326 /*
1327 * The data for these is a uint8_t but, in
1328 * order to maintain alignment for any
1329 * following TPI primitives in the message,
1330 * there will be some trailing padding bytes
1331 * which are included in the TPI_TOPT_DATALEN.
1332 * For these types, we set the cmsg_len
1333 * explicitly to the correct value.
1334 */
1335 cmsg->cmsg_len += (socklen_t)sizeof (uint8_t);
1336 } else {
1337 cmsg->cmsg_len +=
1338 (socklen_t)(_TPI_TOPT_DATALEN(tohp));
1339 }
1340
1341 /* copy content to control data part */
1342 bcopy(&tohp[1], CMSG_CONTENT(cmsg),
1343 CMSG_CONTENTLEN(cmsg));
1344 }
1345 /* move to next CMSG structure! */
1346 cmsg = CMSG_NEXT(cmsg);
1347 }
1348 dprint(1, ("so_opt2cmsg: buf %p len %d; cend %p; final cmsg %p\n",
1349 control, controllen, (void *)cend, (void *)cmsg));
1350 ASSERT(cmsg <= cend);
1351 return (0);
1352 }
1353
1354 /*
1355 * Extract the SO_SRCADDR option value if present.
1356 */
1357 void
so_getopt_srcaddr(void * opt,t_uscalar_t optlen,void ** srcp,t_uscalar_t * srclenp)1358 so_getopt_srcaddr(void *opt, t_uscalar_t optlen, void **srcp,
1359 t_uscalar_t *srclenp)
1360 {
1361 struct T_opthdr *tohp;
1362
1363 ASSERT(__TPI_TOPT_ISALIGNED(opt));
1364
1365 ASSERT(srcp != NULL && srclenp != NULL);
1366 *srcp = NULL;
1367 *srclenp = 0;
1368
1369 for (tohp = (struct T_opthdr *)opt;
1370 tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen);
1371 tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) {
1372 dprint(1, ("so_getopt_srcaddr: level 0x%x, name %d, len %d\n",
1373 tohp->level, tohp->name, tohp->len));
1374 if (tohp->level == SOL_SOCKET &&
1375 tohp->name == SO_SRCADDR) {
1376 *srcp = _TPI_TOPT_DATA(tohp);
1377 *srclenp = (t_uscalar_t)_TPI_TOPT_DATALEN(tohp);
1378 }
1379 }
1380 }
1381
1382 /*
1383 * Verify if the SO_UNIX_CLOSE option is present.
1384 */
1385 int
so_getopt_unix_close(void * opt,t_uscalar_t optlen)1386 so_getopt_unix_close(void *opt, t_uscalar_t optlen)
1387 {
1388 struct T_opthdr *tohp;
1389
1390 ASSERT(__TPI_TOPT_ISALIGNED(opt));
1391
1392 for (tohp = (struct T_opthdr *)opt;
1393 tohp && _TPI_TOPT_VALID(tohp, opt, (uintptr_t)opt + optlen);
1394 tohp = _TPI_TOPT_NEXTHDR(opt, optlen, tohp)) {
1395 dprint(1,
1396 ("so_getopt_unix_close: level 0x%x, name %d, len %d\n",
1397 tohp->level, tohp->name, tohp->len));
1398 if (tohp->level == SOL_SOCKET &&
1399 tohp->name == SO_UNIX_CLOSE)
1400 return (1);
1401 }
1402 return (0);
1403 }
1404
1405 /*
1406 * Allocate an M_PROTO message.
1407 *
1408 * If allocation fails the behavior depends on sleepflg:
1409 * _ALLOC_NOSLEEP fail immediately
1410 * _ALLOC_INTR sleep for memory until a signal is caught
1411 * _ALLOC_SLEEP sleep forever. Don't return NULL.
1412 */
1413 mblk_t *
soallocproto(size_t size,int sleepflg,cred_t * cr)1414 soallocproto(size_t size, int sleepflg, cred_t *cr)
1415 {
1416 mblk_t *mp;
1417
1418 /* Round up size for reuse */
1419 size = MAX(size, 64);
1420 if (cr != NULL)
1421 mp = allocb_cred(size, cr, curproc->p_pid);
1422 else
1423 mp = allocb(size, BPRI_MED);
1424
1425 if (mp == NULL) {
1426 int error; /* Dummy - error not returned to caller */
1427
1428 switch (sleepflg) {
1429 case _ALLOC_SLEEP:
1430 if (cr != NULL) {
1431 mp = allocb_cred_wait(size, STR_NOSIG, &error,
1432 cr, curproc->p_pid);
1433 } else {
1434 mp = allocb_wait(size, BPRI_MED, STR_NOSIG,
1435 &error);
1436 }
1437 ASSERT(mp);
1438 break;
1439 case _ALLOC_INTR:
1440 if (cr != NULL) {
1441 mp = allocb_cred_wait(size, 0, &error, cr,
1442 curproc->p_pid);
1443 } else {
1444 mp = allocb_wait(size, BPRI_MED, 0, &error);
1445 }
1446 if (mp == NULL) {
1447 /* Caught signal while sleeping for memory */
1448 eprintline(ENOBUFS);
1449 return (NULL);
1450 }
1451 break;
1452 case _ALLOC_NOSLEEP:
1453 default:
1454 eprintline(ENOBUFS);
1455 return (NULL);
1456 }
1457 }
1458 DB_TYPE(mp) = M_PROTO;
1459 return (mp);
1460 }
1461
1462 /*
1463 * Allocate an M_PROTO message with a single component.
1464 * len is the length of buf. size is the amount to allocate.
1465 *
1466 * buf can be NULL with a non-zero len.
1467 * This results in a bzero'ed chunk being placed the message.
1468 */
1469 mblk_t *
soallocproto1(const void * buf,ssize_t len,ssize_t size,int sleepflg,cred_t * cr)1470 soallocproto1(const void *buf, ssize_t len, ssize_t size, int sleepflg,
1471 cred_t *cr)
1472 {
1473 mblk_t *mp;
1474
1475 if (size == 0)
1476 size = len;
1477
1478 ASSERT(size >= len);
1479 /* Round up size for reuse */
1480 size = MAX(size, 64);
1481 mp = soallocproto(size, sleepflg, cr);
1482 if (mp == NULL)
1483 return (NULL);
1484 mp->b_datap->db_type = M_PROTO;
1485 if (len != 0) {
1486 if (buf != NULL)
1487 bcopy(buf, mp->b_wptr, len);
1488 else
1489 bzero(mp->b_wptr, len);
1490 mp->b_wptr += len;
1491 }
1492 return (mp);
1493 }
1494
1495 /*
1496 * Append buf/len to mp.
1497 * The caller has to ensure that there is enough room in the mblk.
1498 *
1499 * buf can be NULL with a non-zero len.
1500 * This results in a bzero'ed chunk being placed the message.
1501 */
1502 void
soappendmsg(mblk_t * mp,const void * buf,ssize_t len)1503 soappendmsg(mblk_t *mp, const void *buf, ssize_t len)
1504 {
1505 ASSERT(mp);
1506
1507 if (len != 0) {
1508 /* Assert for room left */
1509 ASSERT(mp->b_datap->db_lim - mp->b_wptr >= len);
1510 if (buf != NULL)
1511 bcopy(buf, mp->b_wptr, len);
1512 else
1513 bzero(mp->b_wptr, len);
1514 }
1515 mp->b_wptr += len;
1516 }
1517
1518 /*
1519 * Create a message using two kernel buffers.
1520 * If size is set that will determine the allocation size (e.g. for future
1521 * soappendmsg calls). If size is zero it is derived from the buffer
1522 * lengths.
1523 */
1524 mblk_t *
soallocproto2(const void * buf1,ssize_t len1,const void * buf2,ssize_t len2,ssize_t size,int sleepflg,cred_t * cr)1525 soallocproto2(const void *buf1, ssize_t len1, const void *buf2, ssize_t len2,
1526 ssize_t size, int sleepflg, cred_t *cr)
1527 {
1528 mblk_t *mp;
1529
1530 if (size == 0)
1531 size = len1 + len2;
1532 ASSERT(size >= len1 + len2);
1533
1534 mp = soallocproto1(buf1, len1, size, sleepflg, cr);
1535 if (mp)
1536 soappendmsg(mp, buf2, len2);
1537 return (mp);
1538 }
1539
1540 /*
1541 * Create a message using three kernel buffers.
1542 * If size is set that will determine the allocation size (for future
1543 * soappendmsg calls). If size is zero it is derived from the buffer
1544 * lengths.
1545 */
1546 mblk_t *
soallocproto3(const void * buf1,ssize_t len1,const void * buf2,ssize_t len2,const void * buf3,ssize_t len3,ssize_t size,int sleepflg,cred_t * cr)1547 soallocproto3(const void *buf1, ssize_t len1, const void *buf2, ssize_t len2,
1548 const void *buf3, ssize_t len3, ssize_t size, int sleepflg, cred_t *cr)
1549 {
1550 mblk_t *mp;
1551
1552 if (size == 0)
1553 size = len1 + len2 +len3;
1554 ASSERT(size >= len1 + len2 + len3);
1555
1556 mp = soallocproto1(buf1, len1, size, sleepflg, cr);
1557 if (mp != NULL) {
1558 soappendmsg(mp, buf2, len2);
1559 soappendmsg(mp, buf3, len3);
1560 }
1561 return (mp);
1562 }
1563
1564 #ifdef DEBUG
1565 char *
pr_state(uint_t state,uint_t mode)1566 pr_state(uint_t state, uint_t mode)
1567 {
1568 static char buf[1024];
1569
1570 buf[0] = 0;
1571 if (state & SS_ISCONNECTED)
1572 (void) strcat(buf, "ISCONNECTED ");
1573 if (state & SS_ISCONNECTING)
1574 (void) strcat(buf, "ISCONNECTING ");
1575 if (state & SS_ISDISCONNECTING)
1576 (void) strcat(buf, "ISDISCONNECTING ");
1577 if (state & SS_CANTSENDMORE)
1578 (void) strcat(buf, "CANTSENDMORE ");
1579
1580 if (state & SS_CANTRCVMORE)
1581 (void) strcat(buf, "CANTRCVMORE ");
1582 if (state & SS_ISBOUND)
1583 (void) strcat(buf, "ISBOUND ");
1584 if (state & SS_NDELAY)
1585 (void) strcat(buf, "NDELAY ");
1586 if (state & SS_NONBLOCK)
1587 (void) strcat(buf, "NONBLOCK ");
1588
1589 if (state & SS_ASYNC)
1590 (void) strcat(buf, "ASYNC ");
1591 if (state & SS_ACCEPTCONN)
1592 (void) strcat(buf, "ACCEPTCONN ");
1593 if (state & SS_SAVEDEOR)
1594 (void) strcat(buf, "SAVEDEOR ");
1595
1596 if (state & SS_RCVATMARK)
1597 (void) strcat(buf, "RCVATMARK ");
1598 if (state & SS_OOBPEND)
1599 (void) strcat(buf, "OOBPEND ");
1600 if (state & SS_HAVEOOBDATA)
1601 (void) strcat(buf, "HAVEOOBDATA ");
1602 if (state & SS_HADOOBDATA)
1603 (void) strcat(buf, "HADOOBDATA ");
1604
1605 if (mode & SM_PRIV)
1606 (void) strcat(buf, "PRIV ");
1607 if (mode & SM_ATOMIC)
1608 (void) strcat(buf, "ATOMIC ");
1609 if (mode & SM_ADDR)
1610 (void) strcat(buf, "ADDR ");
1611 if (mode & SM_CONNREQUIRED)
1612 (void) strcat(buf, "CONNREQUIRED ");
1613
1614 if (mode & SM_FDPASSING)
1615 (void) strcat(buf, "FDPASSING ");
1616 if (mode & SM_EXDATA)
1617 (void) strcat(buf, "EXDATA ");
1618 if (mode & SM_OPTDATA)
1619 (void) strcat(buf, "OPTDATA ");
1620 if (mode & SM_BYTESTREAM)
1621 (void) strcat(buf, "BYTESTREAM ");
1622 return (buf);
1623 }
1624
1625 char *
pr_addr(int family,struct sockaddr * addr,t_uscalar_t addrlen)1626 pr_addr(int family, struct sockaddr *addr, t_uscalar_t addrlen)
1627 {
1628 static char buf[1024];
1629
1630 if (addr == NULL || addrlen == 0) {
1631 (void) sprintf(buf, "(len %d) %p", addrlen, (void *)addr);
1632 return (buf);
1633 }
1634 switch (family) {
1635 case AF_INET: {
1636 struct sockaddr_in sin;
1637
1638 bcopy(addr, &sin, sizeof (sin));
1639
1640 (void) sprintf(buf, "(len %d) %x/%d",
1641 addrlen, ntohl(sin.sin_addr.s_addr), ntohs(sin.sin_port));
1642 break;
1643 }
1644 case AF_INET6: {
1645 struct sockaddr_in6 sin6;
1646 uint16_t *piece = (uint16_t *)&sin6.sin6_addr;
1647
1648 bcopy((char *)addr, (char *)&sin6, sizeof (sin6));
1649 (void) sprintf(buf, "(len %d) %x:%x:%x:%x:%x:%x:%x:%x/%d",
1650 addrlen,
1651 ntohs(piece[0]), ntohs(piece[1]),
1652 ntohs(piece[2]), ntohs(piece[3]),
1653 ntohs(piece[4]), ntohs(piece[5]),
1654 ntohs(piece[6]), ntohs(piece[7]),
1655 ntohs(sin6.sin6_port));
1656 break;
1657 }
1658 case AF_UNIX: {
1659 struct sockaddr_un *soun = (struct sockaddr_un *)addr;
1660
1661 (void) sprintf(buf, "(len %d) %s", addrlen,
1662 (soun == NULL) ? "(none)" : soun->sun_path);
1663 break;
1664 }
1665 default:
1666 (void) sprintf(buf, "(unknown af %d)", family);
1667 break;
1668 }
1669 return (buf);
1670 }
1671
1672 /* The logical equivalence operator (a if-and-only-if b) */
1673 #define EQUIVALENT(a, b) (((a) && (b)) || (!(a) && (!(b))))
1674
1675 /*
1676 * Verify limitations and invariants on oob state.
1677 * Return 1 if OK, otherwise 0 so that it can be used as
1678 * ASSERT(verify_oobstate(so));
1679 */
1680 int
so_verify_oobstate(struct sonode * so)1681 so_verify_oobstate(struct sonode *so)
1682 {
1683 boolean_t havemark;
1684
1685 ASSERT(MUTEX_HELD(&so->so_lock));
1686
1687 /*
1688 * The possible state combinations are:
1689 * 0
1690 * SS_OOBPEND
1691 * SS_OOBPEND|SS_HAVEOOBDATA
1692 * SS_OOBPEND|SS_HADOOBDATA
1693 * SS_HADOOBDATA
1694 */
1695 switch (so->so_state & (SS_OOBPEND|SS_HAVEOOBDATA|SS_HADOOBDATA)) {
1696 case 0:
1697 case SS_OOBPEND:
1698 case SS_OOBPEND|SS_HAVEOOBDATA:
1699 case SS_OOBPEND|SS_HADOOBDATA:
1700 case SS_HADOOBDATA:
1701 break;
1702 default:
1703 printf("Bad oob state 1 (%p): state %s\n",
1704 (void *)so, pr_state(so->so_state, so->so_mode));
1705 return (0);
1706 }
1707
1708 /* SS_RCVATMARK should only be set when SS_OOBPEND is set */
1709 if ((so->so_state & (SS_RCVATMARK|SS_OOBPEND)) == SS_RCVATMARK) {
1710 printf("Bad oob state 2 (%p): state %s\n",
1711 (void *)so, pr_state(so->so_state, so->so_mode));
1712 return (0);
1713 }
1714
1715 /*
1716 * (havemark != 0 or SS_RCVATMARK) iff SS_OOBPEND
1717 * For TPI, the presence of a "mark" is indicated by sti_oobsigcnt.
1718 */
1719 havemark = (SOCK_IS_NONSTR(so)) ? so->so_oobmark > 0 :
1720 SOTOTPI(so)->sti_oobsigcnt > 0;
1721
1722 if (!EQUIVALENT(havemark || (so->so_state & SS_RCVATMARK),
1723 so->so_state & SS_OOBPEND)) {
1724 printf("Bad oob state 3 (%p): state %s\n",
1725 (void *)so, pr_state(so->so_state, so->so_mode));
1726 return (0);
1727 }
1728
1729 /*
1730 * Unless SO_OOBINLINE we have so_oobmsg != NULL iff SS_HAVEOOBDATA
1731 */
1732 if (!(so->so_options & SO_OOBINLINE) &&
1733 !EQUIVALENT(so->so_oobmsg != NULL, so->so_state & SS_HAVEOOBDATA)) {
1734 printf("Bad oob state 4 (%p): state %s\n",
1735 (void *)so, pr_state(so->so_state, so->so_mode));
1736 return (0);
1737 }
1738
1739 if (!SOCK_IS_NONSTR(so) &&
1740 SOTOTPI(so)->sti_oobsigcnt < SOTOTPI(so)->sti_oobcnt) {
1741 printf("Bad oob state 5 (%p): counts %d/%d state %s\n",
1742 (void *)so, SOTOTPI(so)->sti_oobsigcnt,
1743 SOTOTPI(so)->sti_oobcnt,
1744 pr_state(so->so_state, so->so_mode));
1745 return (0);
1746 }
1747
1748 return (1);
1749 }
1750 #undef EQUIVALENT
1751 #endif /* DEBUG */
1752
1753 /* initialize sockfs zone specific kstat related items */
1754 void *
sock_kstat_init(zoneid_t zoneid)1755 sock_kstat_init(zoneid_t zoneid)
1756 {
1757 kstat_t *ksp;
1758
1759 ksp = kstat_create_zone("sockfs", 0, "sock_unix_list", "misc",
1760 KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VAR_SIZE|KSTAT_FLAG_VIRTUAL, zoneid);
1761
1762 if (ksp != NULL) {
1763 ksp->ks_update = sockfs_update;
1764 ksp->ks_snapshot = sockfs_snapshot;
1765 ksp->ks_lock = &socklist.sl_lock;
1766 ksp->ks_private = (void *)(uintptr_t)zoneid;
1767 kstat_install(ksp);
1768 }
1769
1770 return (ksp);
1771 }
1772
1773 /* tear down sockfs zone specific kstat related items */
1774 /*ARGSUSED*/
1775 void
sock_kstat_fini(zoneid_t zoneid,void * arg)1776 sock_kstat_fini(zoneid_t zoneid, void *arg)
1777 {
1778 kstat_t *ksp = (kstat_t *)arg;
1779
1780 if (ksp != NULL) {
1781 ASSERT(zoneid == (zoneid_t)(uintptr_t)ksp->ks_private);
1782 kstat_delete(ksp);
1783 }
1784 }
1785
1786 /*
1787 * Zones:
1788 * Note that nactive is going to be different for each zone.
1789 * This means we require kstat to call sockfs_update and then sockfs_snapshot
1790 * for the same zone, or sockfs_snapshot will be taken into the wrong size
1791 * buffer. This is safe, but if the buffer is too small, user will not be
1792 * given details of all sockets. However, as this kstat has a ks_lock, kstat
1793 * driver will keep it locked between the update and the snapshot, so no
1794 * other process (zone) can currently get inbetween resulting in a wrong size
1795 * buffer allocation.
1796 */
1797 static int
sockfs_update(kstat_t * ksp,int rw)1798 sockfs_update(kstat_t *ksp, int rw)
1799 {
1800 uint_t nactive = 0; /* # of active AF_UNIX sockets */
1801 struct sonode *so; /* current sonode on socklist */
1802 zoneid_t myzoneid = (zoneid_t)(uintptr_t)ksp->ks_private;
1803
1804 ASSERT((zoneid_t)(uintptr_t)ksp->ks_private == getzoneid());
1805
1806 if (rw == KSTAT_WRITE) { /* bounce all writes */
1807 return (EACCES);
1808 }
1809
1810 for (so = socklist.sl_list; so != NULL; so = SOTOTPI(so)->sti_next_so) {
1811 if (so->so_count != 0 && so->so_zoneid == myzoneid) {
1812 nactive++;
1813 }
1814 }
1815 ksp->ks_ndata = nactive;
1816 ksp->ks_data_size = nactive * sizeof (struct sockinfo);
1817
1818 return (0);
1819 }
1820
1821 static int
sockfs_snapshot(kstat_t * ksp,void * buf,int rw)1822 sockfs_snapshot(kstat_t *ksp, void *buf, int rw)
1823 {
1824 int ns; /* # of sonodes we've copied */
1825 struct sonode *so; /* current sonode on socklist */
1826 struct sockinfo *psi; /* where we put sockinfo data */
1827 t_uscalar_t sn_len; /* soa_len */
1828 zoneid_t myzoneid = (zoneid_t)(uintptr_t)ksp->ks_private;
1829 sotpi_info_t *sti;
1830
1831 ASSERT((zoneid_t)(uintptr_t)ksp->ks_private == getzoneid());
1832
1833 ksp->ks_snaptime = gethrtime();
1834
1835 if (rw == KSTAT_WRITE) { /* bounce all writes */
1836 return (EACCES);
1837 }
1838
1839 /*
1840 * For each sonode on the socklist, we massage the important
1841 * info into buf, in sockinfo format.
1842 */
1843 psi = (struct sockinfo *)buf;
1844 ns = 0;
1845 for (so = socklist.sl_list; so != NULL; so = SOTOTPI(so)->sti_next_so) {
1846 vattr_t attr;
1847
1848 /* only stuff active sonodes and the same zone: */
1849 if (so->so_count == 0 || so->so_zoneid != myzoneid) {
1850 continue;
1851 }
1852
1853 /*
1854 * If the sonode was activated between the update and the
1855 * snapshot, we're done - as this is only a snapshot.
1856 */
1857 if ((caddr_t)(psi) >= (caddr_t)buf + ksp->ks_data_size) {
1858 break;
1859 }
1860
1861 sti = SOTOTPI(so);
1862 /* copy important info into buf: */
1863 psi->si_size = sizeof (struct sockinfo);
1864 psi->si_family = so->so_family;
1865 psi->si_type = so->so_type;
1866 psi->si_flag = so->so_flag;
1867 psi->si_state = so->so_state;
1868 psi->si_serv_type = sti->sti_serv_type;
1869 psi->si_ux_laddr_sou_magic = sti->sti_ux_laddr.soua_magic;
1870 psi->si_ux_faddr_sou_magic = sti->sti_ux_faddr.soua_magic;
1871 psi->si_laddr_soa_len = sti->sti_laddr.soa_len;
1872 psi->si_faddr_soa_len = sti->sti_faddr.soa_len;
1873 psi->si_szoneid = so->so_zoneid;
1874 psi->si_faddr_noxlate = sti->sti_faddr_noxlate;
1875
1876 /*
1877 * Grab the inode, if possible.
1878 * This must be done before entering so_lock as VOP_GETATTR
1879 * will acquire it.
1880 */
1881 if (so->so_vnode == NULL ||
1882 VOP_GETATTR(so->so_vnode, &attr, 0, CRED(), NULL) != 0)
1883 attr.va_nodeid = 0;
1884
1885 psi->si_inode = attr.va_nodeid;
1886
1887 mutex_enter(&so->so_lock);
1888
1889 if (sti->sti_laddr_sa != NULL) {
1890 ASSERT(sti->sti_laddr_sa->sa_data != NULL);
1891 sn_len = sti->sti_laddr_len;
1892 ASSERT(sn_len <= sizeof (short) +
1893 sizeof (psi->si_laddr_sun_path));
1894
1895 psi->si_laddr_family =
1896 sti->sti_laddr_sa->sa_family;
1897 if (sn_len != 0) {
1898 /* AF_UNIX socket names are NULL terminated */
1899 (void) strncpy(psi->si_laddr_sun_path,
1900 sti->sti_laddr_sa->sa_data,
1901 sizeof (psi->si_laddr_sun_path));
1902 sn_len = strlen(psi->si_laddr_sun_path);
1903 }
1904 psi->si_laddr_sun_path[sn_len] = 0;
1905 }
1906
1907 if (sti->sti_faddr_sa != NULL) {
1908 ASSERT(sti->sti_faddr_sa->sa_data != NULL);
1909 sn_len = sti->sti_faddr_len;
1910 ASSERT(sn_len <= sizeof (short) +
1911 sizeof (psi->si_faddr_sun_path));
1912
1913 psi->si_faddr_family =
1914 sti->sti_faddr_sa->sa_family;
1915 if (sn_len != 0) {
1916 (void) strncpy(psi->si_faddr_sun_path,
1917 sti->sti_faddr_sa->sa_data,
1918 sizeof (psi->si_faddr_sun_path));
1919 sn_len = strlen(psi->si_faddr_sun_path);
1920 }
1921 psi->si_faddr_sun_path[sn_len] = 0;
1922 }
1923
1924 mutex_exit(&so->so_lock);
1925
1926 (void) snprintf(psi->si_son_straddr,
1927 sizeof (psi->si_son_straddr), "%p", (void *)so);
1928 (void) snprintf(psi->si_lvn_straddr,
1929 sizeof (psi->si_lvn_straddr), "%p",
1930 (void *)sti->sti_ux_laddr.soua_vp);
1931 (void) snprintf(psi->si_fvn_straddr,
1932 sizeof (psi->si_fvn_straddr), "%p",
1933 (void *)sti->sti_ux_faddr.soua_vp);
1934
1935 ns++;
1936 psi++;
1937 }
1938
1939 ksp->ks_ndata = ns;
1940 return (0);
1941 }
1942
1943 ssize_t
soreadfile(file_t * fp,uchar_t * buf,u_offset_t fileoff,int * err,size_t size)1944 soreadfile(file_t *fp, uchar_t *buf, u_offset_t fileoff, int *err, size_t size)
1945 {
1946 struct uio auio;
1947 struct iovec aiov[1];
1948 register vnode_t *vp;
1949 int ioflag, rwflag;
1950 ssize_t cnt;
1951 int error = 0;
1952 int iovcnt = 0;
1953 short fflag;
1954
1955 vp = fp->f_vnode;
1956 fflag = fp->f_flag;
1957
1958 rwflag = 0;
1959 aiov[0].iov_base = (caddr_t)buf;
1960 aiov[0].iov_len = size;
1961 iovcnt = 1;
1962 cnt = (ssize_t)size;
1963 (void) VOP_RWLOCK(vp, rwflag, NULL);
1964
1965 auio.uio_loffset = fileoff;
1966 auio.uio_iov = aiov;
1967 auio.uio_iovcnt = iovcnt;
1968 auio.uio_resid = cnt;
1969 auio.uio_segflg = UIO_SYSSPACE;
1970 auio.uio_llimit = MAXOFFSET_T;
1971 auio.uio_fmode = fflag;
1972 auio.uio_extflg = UIO_COPY_CACHED;
1973
1974 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
1975
1976 /* If read sync is not asked for, filter sync flags */
1977 if ((ioflag & FRSYNC) == 0)
1978 ioflag &= ~(FSYNC|FDSYNC);
1979 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL);
1980 cnt -= auio.uio_resid;
1981
1982 VOP_RWUNLOCK(vp, rwflag, NULL);
1983
1984 if (error == EINTR && cnt != 0)
1985 error = 0;
1986
1987 if (error != 0) {
1988 *err = error;
1989 return (0);
1990 } else {
1991 *err = 0;
1992 return (cnt);
1993 }
1994 }
1995
1996 int
so_copyin(const void * from,void * to,size_t size,int fromkernel)1997 so_copyin(const void *from, void *to, size_t size, int fromkernel)
1998 {
1999 if (fromkernel) {
2000 bcopy(from, to, size);
2001 return (0);
2002 }
2003 return (xcopyin(from, to, size));
2004 }
2005
2006 int
so_copyout(const void * from,void * to,size_t size,int tokernel)2007 so_copyout(const void *from, void *to, size_t size, int tokernel)
2008 {
2009 if (tokernel) {
2010 bcopy(from, to, size);
2011 return (0);
2012 }
2013 return (xcopyout(from, to, size));
2014 }
2015