1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 /* 27 * Copyright (c) 2017 by Delphix. All rights reserved. 28 */ 29 30 #include <sys/types.h> 31 #include <sys/t_lock.h> 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/bitmap.h> 35 #include <sys/debug.h> 36 #include <sys/errno.h> 37 #include <sys/strsubr.h> 38 #include <sys/cmn_err.h> 39 #include <sys/sysmacros.h> 40 #include <sys/filio.h> 41 #include <sys/flock.h> 42 #include <sys/stat.h> 43 #include <sys/share.h> 44 45 #include <sys/vfs.h> 46 #include <sys/vfs_opreg.h> 47 48 #include <sys/sockio.h> 49 #include <sys/socket.h> 50 #include <sys/socketvar.h> 51 #include <sys/strsun.h> 52 53 #include <fs/sockfs/sockcommon.h> 54 #include <fs/sockfs/socktpi.h> 55 56 /* 57 * Generic vnode ops 58 */ 59 static int socket_vop_open(struct vnode **, int, struct cred *, 60 caller_context_t *); 61 static int socket_vop_close(struct vnode *, int, int, offset_t, 62 struct cred *, caller_context_t *); 63 static int socket_vop_read(struct vnode *, struct uio *, int, 64 struct cred *, caller_context_t *); 65 static int socket_vop_write(struct vnode *, struct uio *, int, 66 struct cred *, caller_context_t *); 67 static int socket_vop_ioctl(struct vnode *, int, intptr_t, int, 68 struct cred *, int32_t *, caller_context_t *); 69 static int socket_vop_setfl(struct vnode *, int, int, cred_t *, 70 caller_context_t *); 71 static int socket_vop_getattr(struct vnode *, struct vattr *, int, 72 struct cred *, caller_context_t *); 73 static int socket_vop_setattr(struct vnode *, struct vattr *, int, 74 struct cred *, caller_context_t *); 75 static int socket_vop_access(struct vnode *, int, int, struct cred *, 76 caller_context_t *); 77 static int socket_vop_fsync(struct vnode *, int, struct cred *, 78 caller_context_t *); 79 static void socket_vop_inactive(struct vnode *, struct cred *, 80 caller_context_t *); 81 static int socket_vop_fid(struct vnode *, struct fid *, 82 caller_context_t *); 83 static int socket_vop_seek(struct vnode *, offset_t, offset_t *, 84 caller_context_t *); 85 static int socket_vop_poll(struct vnode *, short, int, short *, 86 struct pollhead **, caller_context_t *); 87 88 extern int socket_close_internal(struct sonode *, int, cred_t *); 89 extern void socket_destroy_internal(struct sonode *, cred_t *); 90 91 struct vnodeops *socket_vnodeops; 92 const fs_operation_def_t socket_vnodeops_template[] = { 93 VOPNAME_OPEN, { .vop_open = socket_vop_open }, 94 VOPNAME_CLOSE, { .vop_close = socket_vop_close }, 95 VOPNAME_READ, { .vop_read = socket_vop_read }, 96 VOPNAME_WRITE, { .vop_write = socket_vop_write }, 97 VOPNAME_IOCTL, { .vop_ioctl = socket_vop_ioctl }, 98 VOPNAME_SETFL, { .vop_setfl = socket_vop_setfl }, 99 VOPNAME_GETATTR, { .vop_getattr = socket_vop_getattr }, 100 VOPNAME_SETATTR, { .vop_setattr = socket_vop_setattr }, 101 VOPNAME_ACCESS, { .vop_access = socket_vop_access }, 102 VOPNAME_FSYNC, { .vop_fsync = socket_vop_fsync }, 103 VOPNAME_INACTIVE, { .vop_inactive = socket_vop_inactive }, 104 VOPNAME_FID, { .vop_fid = socket_vop_fid }, 105 VOPNAME_SEEK, { .vop_seek = socket_vop_seek }, 106 VOPNAME_POLL, { .vop_poll = socket_vop_poll }, 107 VOPNAME_DISPOSE, { .error = fs_error }, 108 NULL, NULL 109 }; 110 111 112 /* 113 * generic vnode ops 114 */ 115 116 /*ARGSUSED*/ 117 static int 118 socket_vop_open(struct vnode **vpp, int flag, struct cred *cr, 119 caller_context_t *ct) 120 { 121 struct vnode *vp = *vpp; 122 struct sonode *so = VTOSO(vp); 123 124 flag &= ~FCREAT; /* paranoia */ 125 mutex_enter(&so->so_lock); 126 so->so_count++; 127 mutex_exit(&so->so_lock); 128 129 ASSERT(so->so_count != 0); /* wraparound */ 130 ASSERT(vp->v_type == VSOCK); 131 132 return (0); 133 } 134 135 /*ARGSUSED*/ 136 static int 137 socket_vop_close(struct vnode *vp, int flag, int count, offset_t offset, 138 struct cred *cr, caller_context_t *ct) 139 { 140 struct sonode *so; 141 int error = 0; 142 143 so = VTOSO(vp); 144 ASSERT(vp->v_type == VSOCK); 145 146 cleanlocks(vp, ttoproc(curthread)->p_pid, 0); 147 cleanshares(vp, ttoproc(curthread)->p_pid); 148 149 if (vp->v_stream) 150 strclean(vp); 151 152 if (count > 1) { 153 dprint(2, ("socket_vop_close: count %d\n", count)); 154 return (0); 155 } 156 157 mutex_enter(&so->so_lock); 158 if (--so->so_count == 0) { 159 /* 160 * Initiate connection shutdown. 161 */ 162 mutex_exit(&so->so_lock); 163 error = socket_close_internal(so, flag, cr); 164 } else { 165 mutex_exit(&so->so_lock); 166 } 167 168 return (error); 169 } 170 171 /*ARGSUSED2*/ 172 static int 173 socket_vop_read(struct vnode *vp, struct uio *uiop, int ioflag, struct cred *cr, 174 caller_context_t *ct) 175 { 176 struct sonode *so = VTOSO(vp); 177 struct nmsghdr lmsg; 178 179 ASSERT(vp->v_type == VSOCK); 180 bzero((void *)&lmsg, sizeof (lmsg)); 181 182 return (socket_recvmsg(so, &lmsg, uiop, cr)); 183 } 184 185 /*ARGSUSED2*/ 186 static int 187 socket_vop_write(struct vnode *vp, struct uio *uiop, int ioflag, 188 struct cred *cr, caller_context_t *ct) 189 { 190 struct sonode *so = VTOSO(vp); 191 struct nmsghdr lmsg; 192 193 ASSERT(vp->v_type == VSOCK); 194 bzero((void *)&lmsg, sizeof (lmsg)); 195 196 if (!(so->so_mode & SM_BYTESTREAM)) { 197 /* 198 * If the socket is not byte stream set MSG_EOR 199 */ 200 lmsg.msg_flags = MSG_EOR; 201 } 202 203 return (socket_sendmsg(so, &lmsg, uiop, cr)); 204 } 205 206 /*ARGSUSED4*/ 207 static int 208 socket_vop_ioctl(struct vnode *vp, int cmd, intptr_t arg, int mode, 209 struct cred *cr, int32_t *rvalp, caller_context_t *ct) 210 { 211 struct sonode *so = VTOSO(vp); 212 213 ASSERT(vp->v_type == VSOCK); 214 215 return (socket_ioctl(so, cmd, arg, mode, cr, rvalp)); 216 } 217 218 /* 219 * Allow any flags. Record FNDELAY and FNONBLOCK so that they can be inherited 220 * from listener to acceptor. 221 */ 222 /* ARGSUSED */ 223 static int 224 socket_vop_setfl(vnode_t *vp, int oflags, int nflags, cred_t *cr, 225 caller_context_t *ct) 226 { 227 struct sonode *so = VTOSO(vp); 228 int error = 0; 229 230 ASSERT(vp->v_type == VSOCK); 231 232 mutex_enter(&so->so_lock); 233 if (nflags & FNDELAY) 234 so->so_state |= SS_NDELAY; 235 else 236 so->so_state &= ~SS_NDELAY; 237 if (nflags & FNONBLOCK) 238 so->so_state |= SS_NONBLOCK; 239 else 240 so->so_state &= ~SS_NONBLOCK; 241 mutex_exit(&so->so_lock); 242 243 if (so->so_state & SS_ASYNC) 244 oflags |= FASYNC; 245 /* 246 * Sets/clears the SS_ASYNC flag based on the presence/absence 247 * of the FASYNC flag passed to fcntl(F_SETFL). 248 * This exists solely for BSD fcntl() FASYNC compatibility. 249 */ 250 if ((oflags ^ nflags) & FASYNC && so->so_version != SOV_STREAM) { 251 int async = nflags & FASYNC; 252 int32_t rv; 253 254 /* 255 * For non-TPI sockets all we have to do is set/remove the 256 * SS_ASYNC bit, but for TPI it is more involved. For that 257 * reason we delegate the job to the protocol's ioctl handler. 258 */ 259 error = socket_ioctl(so, FIOASYNC, (intptr_t)&async, FKIOCTL, 260 cr, &rv); 261 } 262 return (error); 263 } 264 265 266 /* 267 * Get the made up attributes for the vnode. 268 * 4.3BSD returns the current time for all the timestamps. 269 * 4.4BSD returns 0 for all the timestamps. 270 * Here we use the access and modified times recorded in the sonode. 271 * 272 * Just like in BSD there is not effect on the underlying file system node 273 * bound to an AF_UNIX pathname. 274 * 275 * When sockmod has been popped this will act just like a stream. Since 276 * a socket is always a clone there is no need to inspect the attributes 277 * of the "realvp". 278 */ 279 /* ARGSUSED */ 280 int 281 socket_vop_getattr(struct vnode *vp, struct vattr *vap, int flags, 282 struct cred *cr, caller_context_t *ct) 283 { 284 dev_t fsid; 285 struct sonode *so; 286 static int sonode_shift = 0; 287 288 /* 289 * Calculate the amount of bitshift to a sonode pointer which will 290 * still keep it unique. See below. 291 */ 292 if (sonode_shift == 0) 293 sonode_shift = highbit(sizeof (struct sonode)); 294 ASSERT(sonode_shift > 0); 295 296 so = VTOSO(vp); 297 fsid = sockdev; 298 299 if (so->so_version == SOV_STREAM) { 300 /* 301 * The imaginary "sockmod" has been popped - act 302 * as a stream 303 */ 304 vap->va_type = VCHR; 305 vap->va_mode = 0; 306 } else { 307 vap->va_type = vp->v_type; 308 vap->va_mode = S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP| 309 S_IROTH|S_IWOTH; 310 } 311 vap->va_uid = vap->va_gid = 0; 312 vap->va_fsid = fsid; 313 /* 314 * If the va_nodeid is > MAX_USHORT, then i386 stats might fail. 315 * So we shift down the sonode pointer to try and get the most 316 * uniqueness into 16-bits. 317 */ 318 vap->va_nodeid = ((ino_t)so >> sonode_shift) & 0xFFFF; 319 vap->va_nlink = 0; 320 vap->va_size = 0; 321 322 /* 323 * We need to zero out the va_rdev to avoid some fstats getting 324 * EOVERFLOW. This also mimics SunOS 4.x and BSD behavior. 325 */ 326 vap->va_rdev = (dev_t)0; 327 vap->va_blksize = MAXBSIZE; 328 vap->va_nblocks = btod(vap->va_size); 329 330 if (!SOCK_IS_NONSTR(so)) { 331 sotpi_info_t *sti = SOTOTPI(so); 332 333 mutex_enter(&so->so_lock); 334 vap->va_atime.tv_sec = sti->sti_atime; 335 vap->va_mtime.tv_sec = sti->sti_mtime; 336 vap->va_ctime.tv_sec = sti->sti_ctime; 337 mutex_exit(&so->so_lock); 338 } else { 339 vap->va_atime.tv_sec = 0; 340 vap->va_mtime.tv_sec = 0; 341 vap->va_ctime.tv_sec = 0; 342 } 343 344 vap->va_atime.tv_nsec = 0; 345 vap->va_mtime.tv_nsec = 0; 346 vap->va_ctime.tv_nsec = 0; 347 vap->va_seq = 0; 348 349 return (0); 350 } 351 352 /* 353 * Set attributes. 354 * Just like in BSD there is not effect on the underlying file system node 355 * bound to an AF_UNIX pathname. 356 * 357 * When sockmod has been popped this will act just like a stream. Since 358 * a socket is always a clone there is no need to modify the attributes 359 * of the "realvp". 360 */ 361 /* ARGSUSED */ 362 int 363 socket_vop_setattr(struct vnode *vp, struct vattr *vap, int flags, 364 struct cred *cr, caller_context_t *ct) 365 { 366 struct sonode *so = VTOSO(vp); 367 368 /* 369 * If times were changed, and we have a STREAMS socket, then update 370 * the sonode. 371 */ 372 if (!SOCK_IS_NONSTR(so)) { 373 sotpi_info_t *sti = SOTOTPI(so); 374 375 mutex_enter(&so->so_lock); 376 if (vap->va_mask & AT_ATIME) 377 sti->sti_atime = vap->va_atime.tv_sec; 378 if (vap->va_mask & AT_MTIME) { 379 sti->sti_mtime = vap->va_mtime.tv_sec; 380 sti->sti_ctime = gethrestime_sec(); 381 } 382 mutex_exit(&so->so_lock); 383 } 384 385 return (0); 386 } 387 388 /* 389 * Check if user is allowed to access vp. For non-STREAMS based sockets, 390 * there might not be a device attached to the file system. So for those 391 * types of sockets there are no permissions to check. 392 * 393 * XXX Should there be some other mechanism to check access rights? 394 */ 395 /*ARGSUSED*/ 396 int 397 socket_vop_access(struct vnode *vp, int mode, int flags, struct cred *cr, 398 caller_context_t *ct) 399 { 400 struct sonode *so = VTOSO(vp); 401 402 if (!SOCK_IS_NONSTR(so)) { 403 ASSERT(so->so_sockparams->sp_sdev_info.sd_vnode != NULL); 404 return (VOP_ACCESS(so->so_sockparams->sp_sdev_info.sd_vnode, 405 mode, flags, cr, NULL)); 406 } 407 return (0); 408 } 409 410 /* 411 * 4.3BSD and 4.4BSD fail a fsync on a socket with EINVAL. 412 * This code does the same to be compatible and also to not give an 413 * application the impression that the data has actually been "synced" 414 * to the other end of the connection. 415 */ 416 /* ARGSUSED */ 417 int 418 socket_vop_fsync(struct vnode *vp, int syncflag, struct cred *cr, 419 caller_context_t *ct) 420 { 421 return (EINVAL); 422 } 423 424 /*ARGSUSED*/ 425 static void 426 socket_vop_inactive(struct vnode *vp, struct cred *cr, caller_context_t *ct) 427 { 428 struct sonode *so = VTOSO(vp); 429 430 ASSERT(vp->v_type == VSOCK); 431 432 mutex_enter(&vp->v_lock); 433 /* 434 * If no one has reclaimed the vnode, remove from the 435 * cache now. 436 */ 437 if (vp->v_count < 1) 438 cmn_err(CE_PANIC, "socket_inactive: Bad v_count"); 439 440 VN_RELE_LOCKED(vp); 441 if (vp->v_count != 0) { 442 mutex_exit(&vp->v_lock); 443 return; 444 } 445 mutex_exit(&vp->v_lock); 446 447 448 ASSERT(!vn_has_cached_data(vp)); 449 450 /* socket specfic clean-up */ 451 socket_destroy_internal(so, cr); 452 } 453 454 /* ARGSUSED */ 455 int 456 socket_vop_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct) 457 { 458 return (EINVAL); 459 } 460 461 /* 462 * Sockets are not seekable. 463 * (and there is a bug to fix STREAMS to make them fail this as well). 464 */ 465 /*ARGSUSED*/ 466 int 467 socket_vop_seek(struct vnode *vp, offset_t ooff, offset_t *noffp, 468 caller_context_t *ct) 469 { 470 return (ESPIPE); 471 } 472 473 /*ARGSUSED*/ 474 static int 475 socket_vop_poll(struct vnode *vp, short events, int anyyet, short *reventsp, 476 struct pollhead **phpp, caller_context_t *ct) 477 { 478 struct sonode *so = VTOSO(vp); 479 480 ASSERT(vp->v_type == VSOCK); 481 482 return (socket_poll(so, events, anyyet, reventsp, phpp)); 483 } 484