1 /* 2 * Copyright (c) 1989 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Rick Macklem at The University of Guelph. 7 * 8 * %sccs.include.redist.c% 9 * 10 * @(#)nfs_vfsops.c 7.39 (Berkeley) 06/05/92 11 */ 12 13 #include "param.h" 14 #include "conf.h" 15 #include "ioctl.h" 16 #include "signal.h" 17 #include "proc.h" 18 #include "namei.h" 19 #include "vnode.h" 20 #include "kernel.h" 21 #include "mount.h" 22 #include "buf.h" 23 #include "mbuf.h" 24 #include "socket.h" 25 #include "systm.h" 26 27 #include "net/if.h" 28 #include "net/route.h" 29 #include "netinet/in.h" 30 31 #include "rpcv2.h" 32 #include "nfsv2.h" 33 #include "nfsnode.h" 34 #include "nfsmount.h" 35 #include "nfs.h" 36 #include "xdr_subs.h" 37 #include "nfsm_subs.h" 38 #include "nfsdiskless.h" 39 #include "nqnfs.h" 40 41 /* 42 * nfs vfs operations. 43 */ 44 struct vfsops nfs_vfsops = { 45 nfs_mount, 46 nfs_start, 47 nfs_unmount, 48 nfs_root, 49 nfs_quotactl, 50 nfs_statfs, 51 nfs_sync, 52 nfs_fhtovp, 53 nfs_vptofh, 54 nfs_init, 55 }; 56 57 /* 58 * This structure must be filled in by a primary bootstrap or bootstrap 59 * server for a diskless/dataless machine. It is initialized below just 60 * to ensure that it is allocated to initialized data (.data not .bss). 61 */ 62 struct nfs_diskless nfs_diskless = { 0 }; 63 64 static u_char nfs_mntid; 65 extern u_long nfs_procids[NFS_NPROCS]; 66 extern u_long nfs_prog, nfs_vers; 67 void nfs_disconnect(), nfsargs_ntoh(); 68 69 #define TRUE 1 70 #define FALSE 0 71 72 /* 73 * nfs statfs call 74 */ 75 nfs_statfs(mp, sbp, p) 76 struct mount *mp; 77 register struct statfs *sbp; 78 struct proc *p; 79 { 80 register struct vnode *vp; 81 register struct nfsv2_statfs *sfp; 82 register caddr_t cp; 83 register long t1; 84 caddr_t bpos, dpos, cp2; 85 u_long xid; 86 int error = 0; 87 struct mbuf *mreq, *mrep, *md, *mb, *mb2; 88 struct nfsmount *nmp; 89 struct ucred *cred; 90 struct nfsnode *np; 91 92 nmp = VFSTONFS(mp); 93 if (error = nfs_nget(mp, &nmp->nm_fh, &np)) 94 return (error); 95 vp = NFSTOV(np); 96 nfsstats.rpccnt[NFSPROC_STATFS]++; 97 cred = crget(); 98 cred->cr_ngroups = 1; 99 nfsm_reqhead(vp, NFSPROC_STATFS, NFSX_FH); 100 nfsm_fhtom(vp); 101 nfsm_request(vp, NFSPROC_STATFS, p, cred); 102 nfsm_dissect(sfp, struct nfsv2_statfs *, NFSX_STATFS); 103 sbp->f_type = MOUNT_NFS; 104 sbp->f_flags = nmp->nm_flag; 105 sbp->f_iosize = NFS_MAXDGRAMDATA; 106 sbp->f_bsize = fxdr_unsigned(long, sfp->sf_bsize); 107 sbp->f_blocks = fxdr_unsigned(long, sfp->sf_blocks); 108 sbp->f_bfree = fxdr_unsigned(long, sfp->sf_bfree); 109 sbp->f_bavail = fxdr_unsigned(long, sfp->sf_bavail); 110 sbp->f_files = 0; 111 sbp->f_ffree = 0; 112 if (sbp != &mp->mnt_stat) { 113 bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN); 114 bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN); 115 } 116 nfsm_reqdone; 117 vrele(vp); 118 crfree(cred); 119 return (error); 120 } 121 122 /* 123 * Mount a remote root fs via. nfs. This depends on the info in the 124 * nfs_diskless structure that has been filled in properly by some primary 125 * bootstrap. 126 * It goes something like this: 127 * - do enough of "ifconfig" by calling ifioctl() so that the system 128 * can talk to the server 129 * - If nfs_diskless.mygateway is filled in, use that address as 130 * a default gateway. 131 * - hand craft the swap nfs vnode hanging off a fake mount point 132 * if swdevt[0].sw_dev == NODEV 133 * - build the rootfs mount point and call mountnfs() to do the rest. 134 */ 135 nfs_mountroot() 136 { 137 register struct mount *mp; 138 register struct mbuf *m; 139 struct socket *so; 140 struct vnode *vp; 141 int error, i; 142 143 /* 144 * Do enough of ifconfig(8) so that the critical net interface can 145 * talk to the server. 146 */ 147 if (socreate(nfs_diskless.myif.ifra_addr.sa_family, &so, SOCK_DGRAM, 0)) 148 panic("nfs ifconf"); 149 if (ifioctl(so, SIOCAIFADDR, &nfs_diskless.myif, curproc)) /* XXX */ 150 panic("nfs ifconf2"); 151 soclose(so); 152 153 /* 154 * If the gateway field is filled in, set it as the default route. 155 */ 156 if (nfs_diskless.mygateway.sin_len != 0) { 157 struct sockaddr_in sin; 158 extern struct sockaddr_in icmpmask; 159 160 sin.sin_len = sizeof (struct sockaddr_in); 161 sin.sin_family = AF_INET; 162 sin.sin_addr.s_addr = 0; /* default */ 163 in_sockmaskof(sin.sin_addr, &icmpmask); 164 if (rtrequest(RTM_ADD, (struct sockaddr *)&sin, 165 (struct sockaddr *)&nfs_diskless.mygateway, 166 (struct sockaddr *)&icmpmask, 167 RTF_UP | RTF_GATEWAY, (struct rtentry **)0)) 168 panic("nfs root route"); 169 } 170 171 /* 172 * If swapping to an nfs node (indicated by swdevt[0].sw_dev == NODEV): 173 * Create a fake mount point just for the swap vnode so that the 174 * swap file can be on a different server from the rootfs. 175 */ 176 if (swdevt[0].sw_dev == NODEV) { 177 mp = (struct mount *)malloc((u_long)sizeof(struct mount), 178 M_MOUNT, M_NOWAIT); 179 if (mp == NULL) 180 panic("nfs root mount"); 181 mp->mnt_op = &nfs_vfsops; 182 mp->mnt_flag = 0; 183 mp->mnt_mounth = NULLVP; 184 185 /* 186 * Set up the diskless nfs_args for the swap mount point 187 * and then call mountnfs() to mount it. 188 * Since the swap file is not the root dir of a file system, 189 * hack it to a regular file. 190 */ 191 nfs_diskless.swap_args.fh = (nfsv2fh_t *)nfs_diskless.swap_fh; 192 MGET(m, MT_SONAME, M_DONTWAIT); 193 if (m == NULL) 194 panic("nfs root mbuf"); 195 bcopy((caddr_t)&nfs_diskless.swap_saddr, mtod(m, caddr_t), 196 nfs_diskless.swap_saddr.sin_len); 197 m->m_len = (int)nfs_diskless.swap_saddr.sin_len; 198 nfsargs_ntoh(&nfs_diskless.swap_args); 199 if (mountnfs(&nfs_diskless.swap_args, mp, m, "/swap", 200 nfs_diskless.swap_hostnam, &vp)) 201 panic("nfs swap"); 202 vp->v_type = VREG; 203 vp->v_flag = 0; 204 swapdev_vp = vp; 205 VREF(vp); 206 swdevt[0].sw_vp = vp; 207 swdevt[0].sw_nblks = ntohl(nfs_diskless.swap_nblks); 208 } 209 210 /* 211 * Create the rootfs mount point. 212 */ 213 mp = (struct mount *)malloc((u_long)sizeof(struct mount), 214 M_MOUNT, M_NOWAIT); 215 if (mp == NULL) 216 panic("nfs root mount2"); 217 mp->mnt_op = &nfs_vfsops; 218 mp->mnt_flag = MNT_RDONLY; 219 mp->mnt_mounth = NULLVP; 220 221 /* 222 * Set up the root fs args and call mountnfs() to do the rest. 223 */ 224 nfs_diskless.root_args.fh = (nfsv2fh_t *)nfs_diskless.root_fh; 225 MGET(m, MT_SONAME, M_DONTWAIT); 226 if (m == NULL) 227 panic("nfs root mbuf2"); 228 bcopy((caddr_t)&nfs_diskless.root_saddr, mtod(m, caddr_t), 229 nfs_diskless.root_saddr.sin_len); 230 m->m_len = (int)nfs_diskless.root_saddr.sin_len; 231 nfsargs_ntoh(&nfs_diskless.root_args); 232 if (mountnfs(&nfs_diskless.root_args, mp, m, "/", 233 nfs_diskless.root_hostnam, &vp)) 234 panic("nfs root"); 235 if (vfs_lock(mp)) 236 panic("nfs root2"); 237 rootfs = mp; 238 mp->mnt_next = mp; 239 mp->mnt_prev = mp; 240 mp->mnt_vnodecovered = NULLVP; 241 vfs_unlock(mp); 242 rootvp = vp; 243 244 /* 245 * This is not really an nfs issue, but it is much easier to 246 * set hostname here and then let the "/etc/rc.xxx" files 247 * mount the right /var based upon its preset value. 248 */ 249 bcopy(nfs_diskless.my_hostnam, hostname, MAXHOSTNAMELEN); 250 hostname[MAXHOSTNAMELEN - 1] = '\0'; 251 for (i = 0; i < MAXHOSTNAMELEN; i++) 252 if (hostname[i] == '\0') 253 break; 254 hostnamelen = i; 255 inittodr((time_t)0); /* There is no time in the nfs fsstat so ?? */ 256 return (0); 257 } 258 259 /* 260 * Convert the integer fields of the nfs_args structure from net byte order 261 * to host byte order. Called by nfs_mountroot() above. 262 */ 263 void 264 nfsargs_ntoh(nfsp) 265 register struct nfs_args *nfsp; 266 { 267 268 NTOHL(nfsp->sotype); 269 NTOHL(nfsp->proto); 270 NTOHL(nfsp->flags); 271 NTOHL(nfsp->wsize); 272 NTOHL(nfsp->rsize); 273 NTOHL(nfsp->timeo); 274 NTOHL(nfsp->retrans); 275 NTOHL(nfsp->maxgrouplist); 276 NTOHL(nfsp->readahead); 277 NTOHL(nfsp->leaseterm); 278 NTOHL(nfsp->deadthresh); 279 } 280 281 /* 282 * VFS Operations. 283 * 284 * mount system call 285 * It seems a bit dumb to copyinstr() the host and path here and then 286 * bcopy() them in mountnfs(), but I wanted to detect errors before 287 * doing the sockargs() call because sockargs() allocates an mbuf and 288 * an error after that means that I have to release the mbuf. 289 */ 290 /* ARGSUSED */ 291 nfs_mount(mp, path, data, ndp, p) 292 struct mount *mp; 293 char *path; 294 caddr_t data; 295 struct nameidata *ndp; 296 struct proc *p; 297 { 298 int error; 299 struct nfs_args args; 300 struct mbuf *nam; 301 struct vnode *vp; 302 char pth[MNAMELEN], hst[MNAMELEN]; 303 u_int len; 304 nfsv2fh_t nfh; 305 306 if (mp->mnt_flag & MNT_UPDATE) 307 return (0); 308 if (error = copyin(data, (caddr_t)&args, sizeof (struct nfs_args))) 309 return (error); 310 if (error = copyin((caddr_t)args.fh, (caddr_t)&nfh, sizeof (nfsv2fh_t))) 311 return (error); 312 if (error = copyinstr(path, pth, MNAMELEN-1, &len)) 313 return (error); 314 bzero(&pth[len], MNAMELEN - len); 315 if (error = copyinstr(args.hostname, hst, MNAMELEN-1, &len)) 316 return (error); 317 bzero(&hst[len], MNAMELEN - len); 318 /* sockargs() call must be after above copyin() calls */ 319 if (error = sockargs(&nam, (caddr_t)args.addr, 320 args.addrlen, MT_SONAME)) 321 return (error); 322 args.fh = &nfh; 323 error = mountnfs(&args, mp, nam, pth, hst, &vp); 324 return (error); 325 } 326 327 /* 328 * Common code for mount and mountroot 329 */ 330 mountnfs(argp, mp, nam, pth, hst, vpp) 331 register struct nfs_args *argp; 332 register struct mount *mp; 333 struct mbuf *nam; 334 char *pth, *hst; 335 struct vnode **vpp; 336 { 337 register struct nfsmount *nmp; 338 struct nfsnode *np; 339 int error; 340 fsid_t tfsid; 341 342 MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount), M_NFSMNT, 343 M_WAITOK); 344 bzero((caddr_t)nmp, sizeof (struct nfsmount)); 345 mp->mnt_data = (qaddr_t)nmp; 346 getnewfsid(mp, MOUNT_NFS); 347 nmp->nm_mountp = mp; 348 nmp->nm_flag = argp->flags; 349 if ((nmp->nm_flag & (NFSMNT_NQNFS | NFSMNT_MYWRITE)) == 350 (NFSMNT_NQNFS | NFSMNT_MYWRITE)) { 351 error = EPERM; 352 goto bad; 353 } 354 if ((nmp->nm_flag & (NFSMNT_RDIRALOOK | NFSMNT_LEASETERM)) && 355 (nmp->nm_flag & NFSMNT_NQNFS) == 0) { 356 error = EPERM; 357 goto bad; 358 } 359 nmp->nm_timeo = NFS_TIMEO; 360 nmp->nm_retry = NFS_RETRANS; 361 nmp->nm_wsize = NFS_WSIZE; 362 nmp->nm_rsize = NFS_RSIZE; 363 nmp->nm_numgrps = NFS_MAXGRPS; 364 nmp->nm_readahead = NFS_DEFRAHEAD; 365 nmp->nm_leaseterm = NQ_DEFLEASE; 366 nmp->nm_deadthresh = NQ_DEADTHRESH; 367 nmp->nm_tnext = (struct nfsnode *)nmp; 368 nmp->nm_tprev = (struct nfsnode *)nmp; 369 nmp->nm_inprog = NULLVP; 370 bcopy((caddr_t)argp->fh, (caddr_t)&nmp->nm_fh, sizeof(nfsv2fh_t)); 371 mp->mnt_stat.f_type = MOUNT_NFS; 372 bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN); 373 bcopy(pth, mp->mnt_stat.f_mntonname, MNAMELEN); 374 nmp->nm_nam = nam; 375 376 if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) { 377 nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10; 378 if (nmp->nm_timeo < NFS_MINTIMEO) 379 nmp->nm_timeo = NFS_MINTIMEO; 380 else if (nmp->nm_timeo > NFS_MAXTIMEO) 381 nmp->nm_timeo = NFS_MAXTIMEO; 382 } 383 384 if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) { 385 nmp->nm_retry = argp->retrans; 386 if (nmp->nm_retry > NFS_MAXREXMIT) 387 nmp->nm_retry = NFS_MAXREXMIT; 388 } 389 390 if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) { 391 nmp->nm_wsize = argp->wsize; 392 /* Round down to multiple of blocksize */ 393 nmp->nm_wsize &= ~0x1ff; 394 if (nmp->nm_wsize <= 0) 395 nmp->nm_wsize = 512; 396 else if (nmp->nm_wsize > NFS_MAXDATA) 397 nmp->nm_wsize = NFS_MAXDATA; 398 } 399 if (nmp->nm_wsize > MAXBSIZE) 400 nmp->nm_wsize = MAXBSIZE; 401 402 if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) { 403 nmp->nm_rsize = argp->rsize; 404 /* Round down to multiple of blocksize */ 405 nmp->nm_rsize &= ~0x1ff; 406 if (nmp->nm_rsize <= 0) 407 nmp->nm_rsize = 512; 408 else if (nmp->nm_rsize > NFS_MAXDATA) 409 nmp->nm_rsize = NFS_MAXDATA; 410 } 411 if (nmp->nm_rsize > MAXBSIZE) 412 nmp->nm_rsize = MAXBSIZE; 413 if ((argp->flags & NFSMNT_MAXGRPS) && argp->maxgrouplist >= 0 && 414 argp->maxgrouplist <= NFS_MAXGRPS) 415 nmp->nm_numgrps = argp->maxgrouplist; 416 if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0 && 417 argp->readahead <= NFS_MAXRAHEAD) 418 nmp->nm_readahead = argp->readahead; 419 if ((argp->flags & NFSMNT_LEASETERM) && argp->leaseterm >= 2 && 420 argp->leaseterm <= NQ_MAXLEASE) 421 nmp->nm_leaseterm = argp->leaseterm; 422 if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 1 && 423 argp->deadthresh <= NQ_NEVERDEAD) 424 nmp->nm_deadthresh = argp->deadthresh; 425 /* Set up the sockets and per-host congestion */ 426 nmp->nm_sotype = argp->sotype; 427 nmp->nm_soproto = argp->proto; 428 429 /* 430 * For Connection based sockets (TCP,...) defer the connect until 431 * the first request, in case the server is not responding. 432 */ 433 if (nmp->nm_sotype == SOCK_DGRAM && 434 (error = nfs_connect(nmp, (struct nfsreq *)0))) 435 goto bad; 436 437 /* 438 * This is silly, but it has to be set so that vinifod() works. 439 * We do not want to do an nfs_statfs() here since we can get 440 * stuck on a dead server and we are holding a lock on the mount 441 * point. 442 */ 443 mp->mnt_stat.f_iosize = NFS_MAXDGRAMDATA; 444 /* 445 * A reference count is needed on the nfsnode representing the 446 * remote root. If this object is not persistent, then backward 447 * traversals of the mount point (i.e. "..") will not work if 448 * the nfsnode gets flushed out of the cache. Ufs does not have 449 * this problem, because one can identify root inodes by their 450 * number == ROOTINO (2). 451 */ 452 if (error = nfs_nget(mp, &nmp->nm_fh, &np)) 453 goto bad; 454 *vpp = NFSTOV(np); 455 456 return (0); 457 bad: 458 nfs_disconnect(nmp); 459 free((caddr_t)nmp, M_NFSMNT); 460 m_freem(nam); 461 return (error); 462 } 463 464 /* 465 * unmount system call 466 */ 467 nfs_unmount(mp, mntflags, p) 468 struct mount *mp; 469 int mntflags; 470 struct proc *p; 471 { 472 register struct nfsmount *nmp; 473 struct nfsnode *np; 474 struct vnode *vp; 475 int error, flags = 0; 476 extern int doforce; 477 478 if (mntflags & MNT_FORCE) { 479 if (!doforce || mp == rootfs) 480 return (EINVAL); 481 flags |= FORCECLOSE; 482 } 483 nmp = VFSTONFS(mp); 484 /* 485 * Clear out the buffer cache 486 */ 487 mntflushbuf(mp, 0); 488 if (mntinvalbuf(mp)) 489 return (EBUSY); 490 /* 491 * Goes something like this.. 492 * - Check for activity on the root vnode (other than ourselves). 493 * - Call vflush() to clear out vnodes for this file system, 494 * except for the root vnode. 495 * - Decrement reference on the vnode representing remote root. 496 * - Close the socket 497 * - Free up the data structures 498 */ 499 /* 500 * We need to decrement the ref. count on the nfsnode representing 501 * the remote root. See comment in mountnfs(). The VFS unmount() 502 * has done vput on this vnode, otherwise we would get deadlock! 503 */ 504 if (error = nfs_nget(mp, &nmp->nm_fh, &np)) 505 return(error); 506 vp = NFSTOV(np); 507 if (vp->v_usecount > 2) { 508 vput(vp); 509 return (EBUSY); 510 } 511 512 /* 513 * Must handshake with nqnfs_clientd() if it is active. 514 */ 515 nmp->nm_flag |= NFSMNT_DISMINPROG; 516 while (nmp->nm_inprog != NULLVP) 517 (void) tsleep((caddr_t)&lbolt, PSOCK, "nfsdism", 0); 518 if (error = vflush(mp, vp, flags)) { 519 vput(vp); 520 nmp->nm_flag &= ~NFSMNT_DISMINPROG; 521 return (error); 522 } 523 524 /* 525 * We are now committed to the unmount. 526 * For NQNFS, let the server daemon free the nfsmount structure. 527 */ 528 if (nmp->nm_flag & (NFSMNT_NQNFS | NFSMNT_KERB)) 529 nmp->nm_flag |= NFSMNT_DISMNT; 530 531 /* 532 * There are two reference counts to get rid of here. 533 */ 534 vrele(vp); 535 vrele(vp); 536 vgone(vp); 537 nfs_disconnect(nmp); 538 m_freem(nmp->nm_nam); 539 540 if ((nmp->nm_flag & (NFSMNT_NQNFS | NFSMNT_KERB)) == 0) 541 free((caddr_t)nmp, M_NFSMNT); 542 return (0); 543 } 544 545 /* 546 * Return root of a filesystem 547 */ 548 nfs_root(mp, vpp) 549 struct mount *mp; 550 struct vnode **vpp; 551 { 552 register struct vnode *vp; 553 struct nfsmount *nmp; 554 struct nfsnode *np; 555 int error; 556 557 nmp = VFSTONFS(mp); 558 if (error = nfs_nget(mp, &nmp->nm_fh, &np)) 559 return (error); 560 vp = NFSTOV(np); 561 vp->v_type = VDIR; 562 vp->v_flag = VROOT; 563 *vpp = vp; 564 return (0); 565 } 566 567 extern int syncprt; 568 569 /* 570 * Flush out the buffer cache 571 */ 572 /* ARGSUSED */ 573 nfs_sync(mp, waitfor) 574 struct mount *mp; 575 int waitfor; 576 { 577 if (syncprt) 578 ufs_bufstats(); 579 /* 580 * Force stale buffer cache information to be flushed. 581 */ 582 mntflushbuf(mp, waitfor == MNT_WAIT ? B_SYNC : 0); 583 return (0); 584 } 585 586 /* 587 * At this point, this should never happen 588 */ 589 /* ARGSUSED */ 590 nfs_fhtovp(mp, fhp, setgen, vpp) 591 struct mount *mp; 592 struct fid *fhp; 593 int setgen; 594 struct vnode **vpp; 595 { 596 597 return (EINVAL); 598 } 599 600 /* 601 * Vnode pointer to File handle, should never happen either 602 */ 603 /* ARGSUSED */ 604 nfs_vptofh(vp, fhp) 605 struct vnode *vp; 606 struct fid *fhp; 607 { 608 609 return (EINVAL); 610 } 611 612 /* 613 * Vfs start routine, a no-op. 614 */ 615 /* ARGSUSED */ 616 nfs_start(mp, flags, p) 617 struct mount *mp; 618 int flags; 619 struct proc *p; 620 { 621 622 return (0); 623 } 624 625 /* 626 * Do operations associated with quotas, not supported 627 */ 628 /* ARGSUSED */ 629 nfs_quotactl(mp, cmd, uid, arg, p) 630 struct mount *mp; 631 int cmd; 632 u_int uid; 633 caddr_t arg; 634 struct proc *p; 635 { 636 637 return (EOPNOTSUPP); 638 } 639