1 /* 2 * Copyright (c) 1989, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Rick Macklem at The University of Guelph. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)nfs_vfsops.c 8.12 (Berkeley) 5/20/95 37 * $FreeBSD: src/sys/nfs/nfs_vfsops.c,v 1.91.2.7 2003/01/27 20:04:08 dillon Exp $ 38 * $DragonFly: src/sys/vfs/nfs/nfs_vfsops.c,v 1.54 2008/07/31 20:23:40 swildner Exp $ 39 */ 40 41 #include "opt_bootp.h" 42 #include "opt_nfsroot.h" 43 44 #include <sys/param.h> 45 #include <sys/sockio.h> 46 #include <sys/proc.h> 47 #include <sys/vnode.h> 48 #include <sys/fcntl.h> 49 #include <sys/kernel.h> 50 #include <sys/sysctl.h> 51 #include <sys/malloc.h> 52 #include <sys/mount.h> 53 #include <sys/mbuf.h> 54 #include <sys/socket.h> 55 #include <sys/socketvar.h> 56 #include <sys/systm.h> 57 #include <sys/objcache.h> 58 59 #include <vm/vm.h> 60 #include <vm/vm_extern.h> 61 62 #include <net/if.h> 63 #include <net/route.h> 64 #include <netinet/in.h> 65 66 #include <sys/thread2.h> 67 #include <sys/mutex2.h> 68 69 #include "rpcv2.h" 70 #include "nfsproto.h" 71 #include "nfs.h" 72 #include "nfsmount.h" 73 #include "nfsnode.h" 74 #include "xdr_subs.h" 75 #include "nfsm_subs.h" 76 #include "nfsdiskless.h" 77 #include "nfsmountrpc.h" 78 79 extern int nfs_mountroot(struct mount *mp); 80 extern void bootpc_init(void); 81 82 extern struct vop_ops nfsv2_vnode_vops; 83 extern struct vop_ops nfsv2_fifo_vops; 84 extern struct vop_ops nfsv2_spec_vops; 85 86 MALLOC_DEFINE(M_NFSREQ, "NFS req", "NFS request header"); 87 MALLOC_DEFINE(M_NFSBIGFH, "NFSV3 bigfh", "NFS version 3 file handle"); 88 MALLOC_DEFINE(M_NFSD, "NFS daemon", "Nfs server daemon structure"); 89 MALLOC_DEFINE(M_NFSDIROFF, "NFSV3 diroff", "NFS directory offset data"); 90 MALLOC_DEFINE(M_NFSRVDESC, "NFSV3 srvdesc", "NFS server socket descriptor"); 91 MALLOC_DEFINE(M_NFSUID, "NFS uid", "Nfs uid mapping structure"); 92 MALLOC_DEFINE(M_NFSHASH, "NFS hash", "NFS hash tables"); 93 94 struct objcache *nfsmount_objcache; 95 96 struct nfsstats nfsstats; 97 SYSCTL_NODE(_vfs, OID_AUTO, nfs, CTLFLAG_RW, 0, "NFS filesystem"); 98 SYSCTL_STRUCT(_vfs_nfs, NFS_NFSSTATS, nfsstats, CTLFLAG_RD, &nfsstats, nfsstats, 99 "Nfs stats structure"); 100 static int nfs_ip_paranoia = 1; 101 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW, &nfs_ip_paranoia, 0, 102 "Enable no-connection mode for protocols that support no-connection mode"); 103 #ifdef NFS_DEBUG 104 int nfs_debug; 105 SYSCTL_INT(_vfs_nfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0, ""); 106 #endif 107 108 /* 109 * Tunable to determine the Read/Write unit size. Maximum value 110 * is NFS_MAXDATA. We also default to NFS_MAXDATA. 111 */ 112 static int nfs_io_size = NFS_MAXDATA; 113 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_io_size, CTLFLAG_RW, 114 &nfs_io_size, 0, "NFS optimal I/O unit size"); 115 116 static void nfs_decode_args (struct nfsmount *nmp, 117 struct nfs_args *argp); 118 static int mountnfs (struct nfs_args *,struct mount *, 119 struct sockaddr *,char *,char *,struct vnode **); 120 static int nfs_mount ( struct mount *mp, char *path, caddr_t data, 121 struct ucred *cred); 122 static int nfs_unmount ( struct mount *mp, int mntflags); 123 static int nfs_root ( struct mount *mp, struct vnode **vpp); 124 static int nfs_statfs ( struct mount *mp, struct statfs *sbp, 125 struct ucred *cred); 126 static int nfs_statvfs(struct mount *mp, struct statvfs *sbp, 127 struct ucred *cred); 128 static int nfs_sync ( struct mount *mp, int waitfor); 129 130 /* 131 * nfs vfs operations. 132 */ 133 static struct vfsops nfs_vfsops = { 134 .vfs_mount = nfs_mount, 135 .vfs_unmount = nfs_unmount, 136 .vfs_root = nfs_root, 137 .vfs_statfs = nfs_statfs, 138 .vfs_statvfs = nfs_statvfs, 139 .vfs_sync = nfs_sync, 140 .vfs_init = nfs_init, 141 .vfs_uninit = nfs_uninit 142 }; 143 VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK); 144 MODULE_VERSION(nfs, 1); 145 146 /* 147 * This structure must be filled in by a primary bootstrap or bootstrap 148 * server for a diskless/dataless machine. It is initialized below just 149 * to ensure that it is allocated to initialized data (.data not .bss). 150 */ 151 struct nfs_diskless nfs_diskless = { { { 0 } } }; 152 struct nfsv3_diskless nfsv3_diskless = { { { 0 } } }; 153 int nfs_diskless_valid = 0; 154 155 SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD, 156 &nfs_diskless_valid, 0, 157 "NFS diskless params were obtained"); 158 159 SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD, 160 nfsv3_diskless.root_hostnam, 0, 161 "Host name for mount point"); 162 163 SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD, 164 &nfsv3_diskless.root_saddr, sizeof nfsv3_diskless.root_saddr, 165 "%Ssockaddr_in", "Address of root server"); 166 167 SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_swappath, CTLFLAG_RD, 168 nfsv3_diskless.swap_hostnam, 0, 169 "Host name for mount ppoint"); 170 171 SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_swapaddr, CTLFLAG_RD, 172 &nfsv3_diskless.swap_saddr, sizeof nfsv3_diskless.swap_saddr, 173 "%Ssockaddr_in", "Address of swap server"); 174 175 176 void nfsargs_ntoh (struct nfs_args *); 177 static int nfs_mountdiskless (char *, char *, int, 178 struct sockaddr_in *, struct nfs_args *, 179 struct thread *, struct vnode **, 180 struct mount **); 181 static void nfs_convert_diskless (void); 182 static void nfs_convert_oargs (struct nfs_args *args, 183 struct onfs_args *oargs); 184 185 /* 186 * Calculate the buffer I/O block size to use. The maximum V2 block size 187 * is typically 8K, the maximum datagram size is typically 16K, and the 188 * maximum V3 block size is typically 32K. The buffer cache tends to work 189 * best with 16K blocks but we allow 32K for TCP connections. 190 * 191 * We force the block size to be at least a page for buffer cache efficiency. 192 */ 193 static int 194 nfs_iosize(int v3, int sotype) 195 { 196 int iosize; 197 int iomax; 198 199 if (v3) { 200 if (sotype == SOCK_STREAM) 201 iomax = NFS_MAXDATA; 202 else 203 iomax = NFS_MAXDGRAMDATA; 204 } else { 205 iomax = NFS_V2MAXDATA; 206 } 207 if ((iosize = nfs_io_size) > iomax) 208 iosize = iomax; 209 if (iosize < PAGE_SIZE) 210 iosize = PAGE_SIZE; 211 212 /* 213 * This is an aweful hack but until the buffer cache is rewritten 214 * we need it. The problem is that when you combine write() with 215 * mmap() the vm_page->valid bits can become weird looking 216 * (e.g. 0xfc). This occurs because NFS uses piecemeal buffers 217 * at the file EOF. To solve the problem the BIO system needs to 218 * be guarenteed that the NFS iosize for regular files will be a 219 * multiple of PAGE_SIZE so it can invalidate the whole page 220 * rather then just the piece of it owned by the buffer when 221 * NFS does vinvalbuf() calls. 222 */ 223 if (iosize & PAGE_MASK) 224 iosize = (iosize & ~PAGE_MASK) + PAGE_SIZE; 225 return iosize; 226 } 227 228 static void 229 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs) 230 { 231 args->version = NFS_ARGSVERSION; 232 args->addr = oargs->addr; 233 args->addrlen = oargs->addrlen; 234 args->sotype = oargs->sotype; 235 args->proto = oargs->proto; 236 args->fh = oargs->fh; 237 args->fhsize = oargs->fhsize; 238 args->flags = oargs->flags; 239 args->wsize = oargs->wsize; 240 args->rsize = oargs->rsize; 241 args->readdirsize = oargs->readdirsize; 242 args->timeo = oargs->timeo; 243 args->retrans = oargs->retrans; 244 args->maxgrouplist = oargs->maxgrouplist; 245 args->readahead = oargs->readahead; 246 args->deadthresh = oargs->deadthresh; 247 args->hostname = oargs->hostname; 248 } 249 250 static void 251 nfs_convert_diskless(void) 252 { 253 int i; 254 255 bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif, 256 sizeof(struct ifaliasreq)); 257 bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway, 258 sizeof(struct sockaddr_in)); 259 nfs_convert_oargs(&nfsv3_diskless.swap_args, &nfs_diskless.swap_args); 260 261 /* 262 * Copy the NFS handle passed from the diskless code. 263 * 264 * XXX CURRENTLY DISABLED - bootp passes us a NFSv2 handle which 265 * will fail utterly with HAMMER due to limitations with NFSv2 266 * directory cookies. 267 */ 268 bcopy(nfs_diskless.swap_fh, nfsv3_diskless.swap_fh, NFSX_V2FH); 269 nfsv3_diskless.swap_fhsize = NFSX_V2FH; 270 for (i = NFSX_V2FH - 1; i >= 0; --i) { 271 if (nfs_diskless.swap_fh[i]) 272 break; 273 } 274 if (i < 0) 275 nfsv3_diskless.swap_fhsize = 0; 276 nfsv3_diskless.swap_fhsize = 0; /* FORCE DISABLE */ 277 278 bcopy(&nfs_diskless.swap_saddr,&nfsv3_diskless.swap_saddr, 279 sizeof(struct sockaddr_in)); 280 bcopy(nfs_diskless.swap_hostnam,nfsv3_diskless.swap_hostnam, MNAMELEN); 281 nfsv3_diskless.swap_nblks = nfs_diskless.swap_nblks; 282 bcopy(&nfs_diskless.swap_ucred, &nfsv3_diskless.swap_ucred, 283 sizeof(struct ucred)); 284 nfs_convert_oargs(&nfsv3_diskless.root_args, &nfs_diskless.root_args); 285 286 /* 287 * Copy the NFS handle passed from the diskless code. 288 * 289 * XXX CURRENTLY DISABLED - bootp passes us a NFSv2 handle which 290 * will fail utterly with HAMMER due to limitations with NFSv2 291 * directory cookies. 292 */ 293 bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH); 294 nfsv3_diskless.root_fhsize = NFSX_V2FH; 295 for (i = NFSX_V2FH - 1; i >= 0; --i) { 296 if (nfs_diskless.root_fh[i]) 297 break; 298 } 299 if (i < 0) 300 nfsv3_diskless.root_fhsize = 0; 301 nfsv3_diskless.root_fhsize = 0; /* FORCE DISABLE */ 302 303 bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr, 304 sizeof(struct sockaddr_in)); 305 bcopy(nfs_diskless.root_hostnam,nfsv3_diskless.root_hostnam, MNAMELEN); 306 nfsv3_diskless.root_time = nfs_diskless.root_time; 307 bcopy(nfs_diskless.my_hostnam,nfsv3_diskless.my_hostnam, 308 MAXHOSTNAMELEN); 309 nfs_diskless_valid = 3; 310 } 311 312 /* 313 * nfs statfs call 314 */ 315 int 316 nfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred) 317 { 318 struct vnode *vp; 319 struct nfs_statfs *sfp; 320 struct nfsmount *nmp = VFSTONFS(mp); 321 thread_t td = curthread; 322 int error = 0, retattr; 323 struct nfsnode *np; 324 u_quad_t tquad; 325 struct nfsm_info info; 326 327 info.mrep = NULL; 328 info.v3 = (nmp->nm_flag & NFSMNT_NFSV3); 329 330 lwkt_gettoken(&nmp->nm_token); 331 332 #ifndef nolint 333 sfp = NULL; 334 #endif 335 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np); 336 if (error) { 337 lwkt_reltoken(&nmp->nm_token); 338 return (error); 339 } 340 vp = NFSTOV(np); 341 /* ignore the passed cred */ 342 cred = crget(); 343 cred->cr_ngroups = 1; 344 if (info.v3 && (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) 345 (void)nfs_fsinfo(nmp, vp, td); 346 nfsstats.rpccnt[NFSPROC_FSSTAT]++; 347 nfsm_reqhead(&info, vp, NFSPROC_FSSTAT, NFSX_FH(info.v3)); 348 ERROROUT(nfsm_fhtom(&info, vp)); 349 NEGKEEPOUT(nfsm_request(&info, vp, NFSPROC_FSSTAT, td, cred, &error)); 350 if (info.v3) { 351 ERROROUT(nfsm_postop_attr(&info, vp, &retattr, 352 NFS_LATTR_NOSHRINK)); 353 } 354 if (error) { 355 if (info.mrep != NULL) 356 m_freem(info.mrep); 357 goto nfsmout; 358 } 359 NULLOUT(sfp = nfsm_dissect(&info, NFSX_STATFS(info.v3))); 360 sbp->f_flags = nmp->nm_flag; 361 362 if (info.v3) { 363 sbp->f_bsize = NFS_FABLKSIZE; 364 tquad = fxdr_hyper(&sfp->sf_tbytes); 365 sbp->f_blocks = (long)(tquad / ((u_quad_t)NFS_FABLKSIZE)); 366 tquad = fxdr_hyper(&sfp->sf_fbytes); 367 sbp->f_bfree = (long)(tquad / ((u_quad_t)NFS_FABLKSIZE)); 368 tquad = fxdr_hyper(&sfp->sf_abytes); 369 sbp->f_bavail = (long)(tquad / ((u_quad_t)NFS_FABLKSIZE)); 370 sbp->f_files = (fxdr_unsigned(int32_t, 371 sfp->sf_tfiles.nfsuquad[1]) & 0x7fffffff); 372 sbp->f_ffree = (fxdr_unsigned(int32_t, 373 sfp->sf_ffiles.nfsuquad[1]) & 0x7fffffff); 374 } else { 375 sbp->f_bsize = fxdr_unsigned(int32_t, sfp->sf_bsize); 376 sbp->f_blocks = fxdr_unsigned(int32_t, sfp->sf_blocks); 377 sbp->f_bfree = fxdr_unsigned(int32_t, sfp->sf_bfree); 378 sbp->f_bavail = fxdr_unsigned(int32_t, sfp->sf_bavail); 379 sbp->f_files = 0; 380 sbp->f_ffree = 0; 381 } 382 383 /* 384 * Some values are pre-set in mnt_stat. Note in particular f_iosize 385 * cannot be changed once the filesystem is mounted as it is used 386 * as the basis for BIOs. 387 */ 388 if (sbp != &mp->mnt_stat) { 389 sbp->f_type = mp->mnt_vfc->vfc_typenum; 390 bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN); 391 sbp->f_iosize = mp->mnt_stat.f_iosize; 392 } 393 m_freem(info.mrep); 394 info.mrep = NULL; 395 nfsmout: 396 vput(vp); 397 crfree(cred); 398 lwkt_reltoken(&nmp->nm_token); 399 return (error); 400 } 401 402 static int 403 nfs_statvfs(struct mount *mp, struct statvfs *sbp, struct ucred *cred) 404 { 405 struct vnode *vp; 406 struct nfs_statfs *sfp; 407 struct nfsmount *nmp = VFSTONFS(mp); 408 thread_t td = curthread; 409 int error = 0, retattr; 410 struct nfsnode *np; 411 struct nfsm_info info; 412 413 info.mrep = NULL; 414 info.v3 = (nmp->nm_flag & NFSMNT_NFSV3); 415 lwkt_gettoken(&nmp->nm_token); 416 417 #ifndef nolint 418 sfp = NULL; 419 #endif 420 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np); 421 if (error) { 422 lwkt_reltoken(&nmp->nm_token); 423 return (error); 424 } 425 vp = NFSTOV(np); 426 /* ignore the passed cred */ 427 cred = crget(); 428 cred->cr_ngroups = 1; 429 if (info.v3 && (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) 430 (void)nfs_fsinfo(nmp, vp, td); 431 nfsstats.rpccnt[NFSPROC_FSSTAT]++; 432 nfsm_reqhead(&info, vp, NFSPROC_FSSTAT, NFSX_FH(info.v3)); 433 ERROROUT(nfsm_fhtom(&info, vp)); 434 NEGKEEPOUT(nfsm_request(&info, vp, NFSPROC_FSSTAT, td, cred, &error)); 435 if (info.v3) { 436 ERROROUT(nfsm_postop_attr(&info, vp, &retattr, 437 NFS_LATTR_NOSHRINK)); 438 } 439 if (error) { 440 if (info.mrep != NULL) 441 m_freem(info.mrep); 442 goto nfsmout; 443 } 444 NULLOUT(sfp = nfsm_dissect(&info, NFSX_STATFS(info.v3))); 445 sbp->f_flag = nmp->nm_flag; 446 sbp->f_owner = nmp->nm_cred->cr_ruid; 447 448 if (info.v3) { 449 sbp->f_bsize = NFS_FABLKSIZE; 450 sbp->f_frsize = NFS_FABLKSIZE; 451 sbp->f_blocks = (fxdr_hyper(&sfp->sf_tbytes) / 452 ((u_quad_t)NFS_FABLKSIZE)); 453 sbp->f_bfree = (fxdr_hyper(&sfp->sf_fbytes) / 454 ((u_quad_t)NFS_FABLKSIZE)); 455 sbp->f_bavail = (fxdr_hyper(&sfp->sf_abytes) / 456 ((u_quad_t)NFS_FABLKSIZE)); 457 sbp->f_files = fxdr_hyper(&sfp->sf_tfiles); 458 sbp->f_ffree = fxdr_hyper(&sfp->sf_ffiles); 459 sbp->f_favail = fxdr_hyper(&sfp->sf_afiles); 460 } else { 461 sbp->f_bsize = fxdr_unsigned(int32_t, sfp->sf_bsize); 462 sbp->f_blocks = fxdr_unsigned(int32_t, sfp->sf_blocks); 463 sbp->f_bfree = fxdr_unsigned(int32_t, sfp->sf_bfree); 464 sbp->f_bavail = fxdr_unsigned(int32_t, sfp->sf_bavail); 465 sbp->f_files = 0; 466 sbp->f_ffree = 0; 467 sbp->f_favail = 0; 468 } 469 sbp->f_syncreads = 0; 470 sbp->f_syncwrites = 0; 471 sbp->f_asyncreads = 0; 472 sbp->f_asyncwrites = 0; 473 sbp->f_type = mp->mnt_vfc->vfc_typenum; 474 475 m_freem(info.mrep); 476 info.mrep = NULL; 477 nfsmout: 478 vput(vp); 479 crfree(cred); 480 lwkt_reltoken(&nmp->nm_token); 481 return (error); 482 } 483 484 /* 485 * nfs version 3 fsinfo rpc call 486 */ 487 int 488 nfs_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct thread *td) 489 { 490 struct nfsv3_fsinfo *fsp; 491 u_int32_t pref, max; 492 int error = 0, retattr; 493 u_int64_t maxfsize; 494 struct nfsm_info info; 495 496 info.v3 = 1; 497 nfsstats.rpccnt[NFSPROC_FSINFO]++; 498 nfsm_reqhead(&info, vp, NFSPROC_FSINFO, NFSX_FH(1)); 499 ERROROUT(nfsm_fhtom(&info, vp)); 500 NEGKEEPOUT(nfsm_request(&info, vp, NFSPROC_FSINFO, td, 501 nfs_vpcred(vp, ND_READ), &error)); 502 ERROROUT(nfsm_postop_attr(&info, vp, &retattr, NFS_LATTR_NOSHRINK)); 503 if (error == 0) { 504 NULLOUT(fsp = nfsm_dissect(&info, NFSX_V3FSINFO)); 505 pref = fxdr_unsigned(u_int32_t, fsp->fs_wtpref); 506 if (pref < nmp->nm_wsize && pref >= NFS_FABLKSIZE) 507 nmp->nm_wsize = (pref + NFS_FABLKSIZE - 1) & 508 ~(NFS_FABLKSIZE - 1); 509 max = fxdr_unsigned(u_int32_t, fsp->fs_wtmax); 510 if (max < nmp->nm_wsize && max > 0) { 511 nmp->nm_wsize = max & ~(NFS_FABLKSIZE - 1); 512 if (nmp->nm_wsize == 0) 513 nmp->nm_wsize = max; 514 } 515 pref = fxdr_unsigned(u_int32_t, fsp->fs_rtpref); 516 if (pref < nmp->nm_rsize && pref >= NFS_FABLKSIZE) 517 nmp->nm_rsize = (pref + NFS_FABLKSIZE - 1) & 518 ~(NFS_FABLKSIZE - 1); 519 max = fxdr_unsigned(u_int32_t, fsp->fs_rtmax); 520 if (max < nmp->nm_rsize && max > 0) { 521 nmp->nm_rsize = max & ~(NFS_FABLKSIZE - 1); 522 if (nmp->nm_rsize == 0) 523 nmp->nm_rsize = max; 524 } 525 pref = fxdr_unsigned(u_int32_t, fsp->fs_dtpref); 526 if (pref < nmp->nm_readdirsize && pref >= NFS_DIRBLKSIZ) 527 nmp->nm_readdirsize = (pref + NFS_DIRBLKSIZ - 1) & 528 ~(NFS_DIRBLKSIZ - 1); 529 if (max < nmp->nm_readdirsize && max > 0) { 530 nmp->nm_readdirsize = max & ~(NFS_DIRBLKSIZ - 1); 531 if (nmp->nm_readdirsize == 0) 532 nmp->nm_readdirsize = max; 533 } 534 maxfsize = fxdr_hyper(&fsp->fs_maxfilesize); 535 if (maxfsize > 0 && maxfsize < nmp->nm_maxfilesize) 536 nmp->nm_maxfilesize = maxfsize; 537 nmp->nm_state |= NFSSTA_GOTFSINFO; 538 539 /* 540 * Use the smaller of rsize/wsize for the biosize. 541 */ 542 if (nmp->nm_rsize < nmp->nm_wsize) 543 nmp->nm_mountp->mnt_stat.f_iosize = nmp->nm_rsize; 544 else 545 nmp->nm_mountp->mnt_stat.f_iosize = nmp->nm_wsize; 546 } 547 m_freem(info.mrep); 548 info.mrep = NULL; 549 nfsmout: 550 return (error); 551 } 552 553 /* 554 * Mount a remote root fs via. nfs. This depends on the info in the 555 * nfs_diskless structure that has been filled in properly by some primary 556 * bootstrap. 557 * It goes something like this: 558 * - do enough of "ifconfig" by calling ifioctl() so that the system 559 * can talk to the server 560 * - If nfs_diskless.mygateway is filled in, use that address as 561 * a default gateway. 562 * - build the rootfs mount point and call mountnfs() to do the rest. 563 */ 564 int 565 nfs_mountroot(struct mount *mp) 566 { 567 struct mount *swap_mp; 568 struct nfsv3_diskless *nd = &nfsv3_diskless; 569 struct socket *so; 570 struct vnode *vp; 571 struct thread *td = curthread; /* XXX */ 572 int error, i; 573 u_long l; 574 char buf[128]; 575 576 #if defined(BOOTP_NFSROOT) && defined(BOOTP) 577 bootpc_init(); /* use bootp to get nfs_diskless filled in */ 578 #endif 579 580 /* 581 * XXX time must be non-zero when we init the interface or else 582 * the arp code will wedge... 583 */ 584 while (mycpu->gd_time_seconds == 0) 585 tsleep(mycpu, 0, "arpkludge", 10); 586 587 /* 588 * The boot code may have passed us a diskless structure. 589 */ 590 kprintf("DISKLESS %d\n", nfs_diskless_valid); 591 if (nfs_diskless_valid == 1) 592 nfs_convert_diskless(); 593 594 /* 595 * NFSv3 is required. 596 */ 597 nd->root_args.flags |= NFSMNT_NFSV3 | NFSMNT_RDIRPLUS; 598 nd->swap_args.flags |= NFSMNT_NFSV3; 599 600 #define SINP(sockaddr) ((struct sockaddr_in *)(sockaddr)) 601 kprintf("nfs_mountroot: interface %s ip %s", 602 nd->myif.ifra_name, 603 inet_ntoa(SINP(&nd->myif.ifra_addr)->sin_addr)); 604 kprintf(" bcast %s", 605 inet_ntoa(SINP(&nd->myif.ifra_broadaddr)->sin_addr)); 606 kprintf(" mask %s\n", 607 inet_ntoa(SINP(&nd->myif.ifra_mask)->sin_addr)); 608 #undef SINP 609 610 /* 611 * XXX splnet, so networks will receive... 612 */ 613 crit_enter(); 614 615 /* 616 * BOOTP does not necessarily have to be compiled into the kernel 617 * for an NFS root to work. If we inherited the network 618 * configuration for PXEBOOT then pxe_setup_nfsdiskless() has figured 619 * out our interface for us and all we need to do is ifconfig the 620 * interface. We only do this if the interface has not already been 621 * ifconfig'd by e.g. BOOTP. 622 */ 623 error = socreate(nd->myif.ifra_addr.sa_family, &so, SOCK_DGRAM, 0, td); 624 if (error) { 625 panic("nfs_mountroot: socreate(%04x): %d", 626 nd->myif.ifra_addr.sa_family, error); 627 } 628 629 error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, proc0.p_ucred); 630 if (error) 631 panic("nfs_mountroot: SIOCAIFADDR: %d", error); 632 633 soclose(so, FNONBLOCK); 634 635 /* 636 * If the gateway field is filled in, set it as the default route. 637 */ 638 if (nd->mygateway.sin_len != 0) { 639 struct sockaddr_in mask, sin; 640 641 bzero((caddr_t)&mask, sizeof(mask)); 642 sin = mask; 643 sin.sin_family = AF_INET; 644 sin.sin_len = sizeof(sin); 645 kprintf("nfs_mountroot: gateway %s\n", 646 inet_ntoa(nd->mygateway.sin_addr)); 647 error = rtrequest_global(RTM_ADD, (struct sockaddr *)&sin, 648 (struct sockaddr *)&nd->mygateway, 649 (struct sockaddr *)&mask, 650 RTF_UP | RTF_GATEWAY); 651 if (error) 652 kprintf("nfs_mountroot: unable to set gateway, error %d, continuing anyway\n", error); 653 } 654 655 /* 656 * Create the rootfs mount point. 657 */ 658 nd->root_args.fh = nd->root_fh; 659 nd->root_args.fhsize = nd->root_fhsize; 660 l = ntohl(nd->root_saddr.sin_addr.s_addr); 661 ksnprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s", 662 (l >> 24) & 0xff, (l >> 16) & 0xff, 663 (l >> 8) & 0xff, (l >> 0) & 0xff,nd->root_hostnam); 664 kprintf("NFS_ROOT: %s\n",buf); 665 error = nfs_mountdiskless(buf, "/", MNT_RDONLY, &nd->root_saddr, 666 &nd->root_args, td, &vp, &mp); 667 if (error) { 668 mp->mnt_vfc->vfc_refcount--; 669 crit_exit(); 670 return (error); 671 } 672 673 swap_mp = NULL; 674 if (nd->swap_nblks) { 675 676 /* Convert to DEV_BSIZE instead of Kilobyte */ 677 nd->swap_nblks *= 2; 678 679 /* 680 * Create a fake mount point just for the swap vnode so that the 681 * swap file can be on a different server from the rootfs. 682 */ 683 nd->swap_args.fh = nd->swap_fh; 684 nd->swap_args.fhsize = nd->swap_fhsize; 685 l = ntohl(nd->swap_saddr.sin_addr.s_addr); 686 ksnprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s", 687 (l >> 24) & 0xff, (l >> 16) & 0xff, 688 (l >> 8) & 0xff, (l >> 0) & 0xff,nd->swap_hostnam); 689 kprintf("NFS SWAP: %s\n",buf); 690 error = nfs_mountdiskless(buf, "/swap", 0, &nd->swap_saddr, 691 &nd->swap_args, td, &vp, &swap_mp); 692 if (error) { 693 crit_exit(); 694 return (error); 695 } 696 vfs_unbusy(swap_mp); 697 698 VTONFS(vp)->n_size = VTONFS(vp)->n_vattr.va_size = 699 nd->swap_nblks * DEV_BSIZE ; 700 701 /* 702 * Since the swap file is not the root dir of a file system, 703 * hack it to a regular file. 704 */ 705 vclrflags(vp, VROOT); 706 vref(vp); 707 nfs_setvtype(vp, VREG); 708 swaponvp(td, vp, nd->swap_nblks); 709 } 710 711 mp->mnt_flag |= MNT_ROOTFS; 712 vfs_unbusy(mp); 713 714 /* 715 * This is not really an nfs issue, but it is much easier to 716 * set hostname here and then let the "/etc/rc.xxx" files 717 * mount the right /var based upon its preset value. 718 */ 719 bcopy(nd->my_hostnam, hostname, MAXHOSTNAMELEN); 720 hostname[MAXHOSTNAMELEN - 1] = '\0'; 721 for (i = 0; i < MAXHOSTNAMELEN; i++) 722 if (hostname[i] == '\0') 723 break; 724 inittodr(ntohl(nd->root_time)); 725 crit_exit(); 726 return (0); 727 } 728 729 /* 730 * Internal version of mount system call for diskless setup. 731 */ 732 static int 733 nfs_mountdiskless(char *path, char *which, int mountflag, 734 struct sockaddr_in *sin, struct nfs_args *args, struct thread *td, 735 struct vnode **vpp, struct mount **mpp) 736 { 737 struct mount *mp; 738 struct sockaddr *nam; 739 int didalloc = 0; 740 int error; 741 742 mp = *mpp; 743 744 if (mp == NULL) { 745 if ((error = vfs_rootmountalloc("nfs", path, &mp)) != 0) { 746 kprintf("nfs_mountroot: NFS not configured"); 747 return (error); 748 } 749 didalloc = 1; 750 } 751 mp->mnt_kern_flag = 0; 752 mp->mnt_flag = mountflag; 753 nam = dup_sockaddr((struct sockaddr *)sin); 754 755 #if defined(BOOTP) || defined(NFS_ROOT) 756 if (args->fhsize == 0) { 757 char *xpath = path; 758 759 kprintf("NFS_ROOT: No FH passed from loader, attempting " 760 "mount rpc..."); 761 while (*xpath && *xpath != ':') 762 ++xpath; 763 if (*xpath) 764 ++xpath; 765 args->fhsize = 0; 766 error = md_mount(sin, xpath, args->fh, &args->fhsize, args, td); 767 if (error) { 768 kprintf("failed error %d.\n", error); 769 goto haderror; 770 } 771 kprintf("success!\n"); 772 } 773 #endif 774 775 if ((error = mountnfs(args, mp, nam, which, path, vpp)) != 0) { 776 #if defined(BOOTP) || defined(NFS_ROOT) 777 haderror: 778 #endif 779 kprintf("nfs_mountroot: mount %s on %s: %d", path, which, error); 780 mp->mnt_vfc->vfc_refcount--; 781 vfs_unbusy(mp); 782 if (didalloc) 783 kfree(mp, M_MOUNT); 784 FREE(nam, M_SONAME); 785 return (error); 786 } 787 *mpp = mp; 788 return (0); 789 } 790 791 static void 792 nfs_decode_args(struct nfsmount *nmp, struct nfs_args *argp) 793 { 794 int adjsock; 795 int maxio; 796 797 crit_enter(); 798 /* 799 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes 800 * no sense in that context. 801 */ 802 if (nmp->nm_sotype == SOCK_STREAM) { 803 nmp->nm_flag &= ~NFSMNT_NOCONN; 804 argp->flags &= ~NFSMNT_NOCONN; 805 } 806 807 /* 808 * readdirplus is NFSv3 only. 809 */ 810 if ((argp->flags & NFSMNT_NFSV3) == 0) { 811 nmp->nm_flag &= ~NFSMNT_RDIRPLUS; 812 argp->flags &= ~NFSMNT_RDIRPLUS; 813 } 814 815 /* 816 * Re-bind if rsrvd port flag has changed 817 */ 818 adjsock = (nmp->nm_flag & NFSMNT_RESVPORT) != 819 (argp->flags & NFSMNT_RESVPORT); 820 821 /* Update flags atomically. Don't change the lock bits. */ 822 nmp->nm_flag = argp->flags | nmp->nm_flag; 823 crit_exit(); 824 825 if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) { 826 nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10; 827 if (nmp->nm_timeo < NFS_MINTIMEO) 828 nmp->nm_timeo = NFS_MINTIMEO; 829 else if (nmp->nm_timeo > NFS_MAXTIMEO) 830 nmp->nm_timeo = NFS_MAXTIMEO; 831 } 832 833 if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) { 834 nmp->nm_retry = argp->retrans; 835 if (nmp->nm_retry > NFS_MAXREXMIT) 836 nmp->nm_retry = NFS_MAXREXMIT; 837 } 838 839 /* 840 * These parameters effect the buffer cache and cannot be changed 841 * once we've successfully mounted. 842 */ 843 if ((nmp->nm_state & NFSSTA_GOTFSINFO) == 0) { 844 maxio = nfs_iosize(argp->flags & NFSMNT_NFSV3, nmp->nm_sotype); 845 846 if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) { 847 nmp->nm_wsize = argp->wsize; 848 /* Round down to multiple of blocksize */ 849 nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1); 850 if (nmp->nm_wsize <= 0) 851 nmp->nm_wsize = NFS_FABLKSIZE; 852 } 853 if (nmp->nm_wsize > maxio) 854 nmp->nm_wsize = maxio; 855 if (nmp->nm_wsize > MAXBSIZE) 856 nmp->nm_wsize = MAXBSIZE; 857 858 if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) { 859 nmp->nm_rsize = argp->rsize; 860 /* Round down to multiple of blocksize */ 861 nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1); 862 if (nmp->nm_rsize <= 0) 863 nmp->nm_rsize = NFS_FABLKSIZE; 864 } 865 if (nmp->nm_rsize > maxio) 866 nmp->nm_rsize = maxio; 867 if (nmp->nm_rsize > MAXBSIZE) 868 nmp->nm_rsize = MAXBSIZE; 869 870 if ((argp->flags & NFSMNT_READDIRSIZE) && 871 argp->readdirsize > 0) { 872 nmp->nm_readdirsize = argp->readdirsize; 873 } 874 if (nmp->nm_readdirsize > maxio) 875 nmp->nm_readdirsize = maxio; 876 if (nmp->nm_readdirsize > nmp->nm_rsize) 877 nmp->nm_readdirsize = nmp->nm_rsize; 878 } 879 880 if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0) 881 nmp->nm_acregmin = argp->acregmin; 882 else 883 nmp->nm_acregmin = NFS_MINATTRTIMO; 884 if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0) 885 nmp->nm_acregmax = argp->acregmax; 886 else 887 nmp->nm_acregmax = NFS_MAXATTRTIMO; 888 if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0) 889 nmp->nm_acdirmin = argp->acdirmin; 890 else 891 nmp->nm_acdirmin = NFS_MINDIRATTRTIMO; 892 if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0) 893 nmp->nm_acdirmax = argp->acdirmax; 894 else 895 nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO; 896 if (nmp->nm_acdirmin > nmp->nm_acdirmax) 897 nmp->nm_acdirmin = nmp->nm_acdirmax; 898 if (nmp->nm_acregmin > nmp->nm_acregmax) 899 nmp->nm_acregmin = nmp->nm_acregmax; 900 901 if ((argp->flags & NFSMNT_MAXGRPS) && argp->maxgrouplist >= 0) { 902 if (argp->maxgrouplist <= NFS_MAXGRPS) 903 nmp->nm_numgrps = argp->maxgrouplist; 904 else 905 nmp->nm_numgrps = NFS_MAXGRPS; 906 } 907 if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) { 908 if (argp->readahead <= NFS_MAXRAHEAD) 909 nmp->nm_readahead = argp->readahead; 910 else 911 nmp->nm_readahead = NFS_MAXRAHEAD; 912 } 913 if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 1) { 914 if (argp->deadthresh <= NFS_NEVERDEAD) 915 nmp->nm_deadthresh = argp->deadthresh; 916 else 917 nmp->nm_deadthresh = NFS_NEVERDEAD; 918 } 919 920 if (nmp->nm_so && adjsock) { 921 nfs_safedisconnect(nmp); 922 if (nmp->nm_sotype == SOCK_DGRAM) 923 while (nfs_connect(nmp, NULL)) { 924 kprintf("nfs_args: retrying connect\n"); 925 (void) tsleep((caddr_t)&lbolt, 0, "nfscon", 0); 926 } 927 } 928 } 929 930 /* 931 * VFS Operations. 932 * 933 * mount system call 934 * It seems a bit dumb to copyinstr() the host and path here and then 935 * bcopy() them in mountnfs(), but I wanted to detect errors before 936 * doing the sockargs() call because sockargs() allocates an mbuf and 937 * an error after that means that I have to release the mbuf. 938 */ 939 /* ARGSUSED */ 940 static int 941 nfs_mount(struct mount *mp, char *path, caddr_t data, struct ucred *cred) 942 { 943 int error; 944 struct nfs_args args; 945 struct sockaddr *nam; 946 struct vnode *vp; 947 char pth[MNAMELEN], hst[MNAMELEN]; 948 size_t len; 949 u_char nfh[NFSX_V3FHMAX]; 950 951 if (path == NULL) { 952 nfs_mountroot(mp); 953 return (0); 954 } 955 error = copyin(data, (caddr_t)&args, sizeof (struct nfs_args)); 956 if (error) 957 return (error); 958 if (args.version != NFS_ARGSVERSION) { 959 #ifdef COMPAT_PRELITE2 960 /* 961 * If the argument version is unknown, then assume the 962 * caller is a pre-lite2 4.4BSD client and convert its 963 * arguments. 964 */ 965 struct onfs_args oargs; 966 error = copyin(data, (caddr_t)&oargs, sizeof (struct onfs_args)); 967 if (error) 968 return (error); 969 nfs_convert_oargs(&args,&oargs); 970 #else /* !COMPAT_PRELITE2 */ 971 return (EPROGMISMATCH); 972 #endif /* COMPAT_PRELITE2 */ 973 } 974 if (mp->mnt_flag & MNT_UPDATE) { 975 struct nfsmount *nmp = VFSTONFS(mp); 976 977 if (nmp == NULL) 978 return (EIO); 979 /* 980 * When doing an update, we can't change from or to 981 * v3, or change cookie translation, or rsize or wsize. 982 */ 983 args.flags &= ~(NFSMNT_NFSV3 | NFSMNT_RSIZE | NFSMNT_WSIZE); 984 args.flags |= nmp->nm_flag & (NFSMNT_NFSV3); 985 nfs_decode_args(nmp, &args); 986 return (0); 987 } 988 989 /* 990 * Make the nfs_ip_paranoia sysctl serve as the default connection 991 * or no-connection mode for those protocols that support 992 * no-connection mode (the flag will be cleared later for protocols 993 * that do not support no-connection mode). This will allow a client 994 * to receive replies from a different IP then the request was 995 * sent to. Note: default value for nfs_ip_paranoia is 1 (paranoid), 996 * not 0. 997 */ 998 if (nfs_ip_paranoia == 0) 999 args.flags |= NFSMNT_NOCONN; 1000 if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) 1001 return (EINVAL); 1002 error = copyin((caddr_t)args.fh, (caddr_t)nfh, args.fhsize); 1003 if (error) 1004 return (error); 1005 error = copyinstr(path, pth, MNAMELEN-1, &len); 1006 if (error) 1007 return (error); 1008 bzero(&pth[len], MNAMELEN - len); 1009 error = copyinstr(args.hostname, hst, MNAMELEN-1, &len); 1010 if (error) 1011 return (error); 1012 bzero(&hst[len], MNAMELEN - len); 1013 /* sockargs() call must be after above copyin() calls */ 1014 error = getsockaddr(&nam, (caddr_t)args.addr, args.addrlen); 1015 if (error) 1016 return (error); 1017 args.fh = nfh; 1018 error = mountnfs(&args, mp, nam, pth, hst, &vp); 1019 return (error); 1020 } 1021 1022 /* 1023 * Common code for mount and mountroot 1024 */ 1025 static int 1026 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam, 1027 char *pth, char *hst, struct vnode **vpp) 1028 { 1029 struct nfsmount *nmp; 1030 struct nfsnode *np; 1031 int error; 1032 int rxcpu; 1033 int txcpu; 1034 1035 if (mp->mnt_flag & MNT_UPDATE) { 1036 nmp = VFSTONFS(mp); 1037 /* update paths, file handles, etc, here XXX */ 1038 FREE(nam, M_SONAME); 1039 return (0); 1040 } else { 1041 nmp = objcache_get(nfsmount_objcache, M_WAITOK); 1042 bzero((caddr_t)nmp, sizeof (struct nfsmount)); 1043 mtx_init(&nmp->nm_rxlock); 1044 mtx_init(&nmp->nm_txlock); 1045 TAILQ_INIT(&nmp->nm_uidlruhead); 1046 TAILQ_INIT(&nmp->nm_bioq); 1047 TAILQ_INIT(&nmp->nm_reqq); 1048 TAILQ_INIT(&nmp->nm_reqtxq); 1049 TAILQ_INIT(&nmp->nm_reqrxq); 1050 mp->mnt_data = (qaddr_t)nmp; 1051 lwkt_token_init(&nmp->nm_token, "nfs_token"); 1052 } 1053 vfs_getnewfsid(mp); 1054 nmp->nm_mountp = mp; 1055 mp->mnt_kern_flag |= MNTK_ALL_MPSAFE; 1056 1057 lwkt_gettoken(&nmp->nm_token); 1058 1059 /* 1060 * V2 can only handle 32 bit filesizes. A 4GB-1 limit may be too 1061 * high, depending on whether we end up with negative offsets in 1062 * the client or server somewhere. 2GB-1 may be safer. 1063 * 1064 * For V3, nfs_fsinfo will adjust this as necessary. Assume maximum 1065 * that we can handle until we find out otherwise. Note that seek 1066 * offsets are signed. 1067 */ 1068 if ((argp->flags & NFSMNT_NFSV3) == 0) 1069 nmp->nm_maxfilesize = 0xffffffffLL; 1070 else 1071 nmp->nm_maxfilesize = 0x7fffffffffffffffLL; 1072 1073 nmp->nm_timeo = NFS_TIMEO; 1074 nmp->nm_retry = NFS_RETRANS; 1075 nmp->nm_wsize = nfs_iosize(argp->flags & NFSMNT_NFSV3, argp->sotype); 1076 nmp->nm_rsize = nmp->nm_wsize; 1077 nmp->nm_readdirsize = NFS_READDIRSIZE; 1078 nmp->nm_numgrps = NFS_MAXGRPS; 1079 nmp->nm_readahead = NFS_DEFRAHEAD; 1080 nmp->nm_deadthresh = NFS_DEADTHRESH; 1081 nmp->nm_fhsize = argp->fhsize; 1082 bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize); 1083 bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN); 1084 nmp->nm_nam = nam; 1085 /* Set up the sockets and per-host congestion */ 1086 nmp->nm_sotype = argp->sotype; 1087 nmp->nm_soproto = argp->proto; 1088 nmp->nm_cred = crhold(proc0.p_ucred); 1089 1090 nfs_decode_args(nmp, argp); 1091 1092 /* 1093 * For Connection based sockets (TCP,...) defer the connect until 1094 * the first request, in case the server is not responding. 1095 */ 1096 if (nmp->nm_sotype == SOCK_DGRAM && 1097 (error = nfs_connect(nmp, NULL))) 1098 goto bad; 1099 1100 /* 1101 * This is silly, but it has to be set so that vinifod() works. 1102 * We do not want to do an nfs_statfs() here since we can get 1103 * stuck on a dead server and we are holding a lock on the mount 1104 * point. 1105 */ 1106 mp->mnt_stat.f_iosize = 1107 nfs_iosize(nmp->nm_flag & NFSMNT_NFSV3, nmp->nm_sotype); 1108 1109 /* 1110 * Install vop_ops for our vnops 1111 */ 1112 vfs_add_vnodeops(mp, &nfsv2_vnode_vops, &mp->mnt_vn_norm_ops); 1113 vfs_add_vnodeops(mp, &nfsv2_spec_vops, &mp->mnt_vn_spec_ops); 1114 vfs_add_vnodeops(mp, &nfsv2_fifo_vops, &mp->mnt_vn_fifo_ops); 1115 1116 /* 1117 * A reference count is needed on the nfsnode representing the 1118 * remote root. If this object is not persistent, then backward 1119 * traversals of the mount point (i.e. "..") will not work if 1120 * the nfsnode gets flushed out of the cache. Ufs does not have 1121 * this problem, because one can identify root inodes by their 1122 * number == ROOTINO (2). 1123 */ 1124 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np); 1125 if (error) 1126 goto bad; 1127 *vpp = NFSTOV(np); 1128 1129 /* 1130 * Retrieval of mountpoint attributes is delayed until nfs_rot 1131 * or nfs_statfs are first called. This will happen either when 1132 * we first traverse the mount point or if somebody does a df(1). 1133 * 1134 * NFSSTA_GOTFSINFO is used to flag if we have successfully 1135 * retrieved mountpoint attributes. In the case of NFSv3 we 1136 * also flag static fsinfo. 1137 */ 1138 if (*vpp != NULL) 1139 (*vpp)->v_type = VNON; 1140 1141 /* 1142 * Lose the lock but keep the ref. 1143 */ 1144 vn_unlock(*vpp); 1145 lwkt_gettoken(&nfs_token); 1146 TAILQ_INSERT_TAIL(&nfs_mountq, nmp, nm_entry); 1147 lwkt_reltoken(&nfs_token); 1148 1149 #ifdef SMP 1150 switch(ncpus) { 1151 case 0: 1152 case 1: 1153 rxcpu = 0; 1154 txcpu = 0; 1155 break; 1156 case 2: 1157 rxcpu = 0; 1158 txcpu = 1; 1159 break; 1160 default: 1161 rxcpu = -1; 1162 txcpu = -1; 1163 break; 1164 } 1165 #else 1166 rxcpu = 0; 1167 txcpu = 0; 1168 #endif 1169 1170 /* 1171 * Start the reader and writer threads. 1172 */ 1173 lwkt_create(nfssvc_iod_reader, nmp, &nmp->nm_rxthread, 1174 NULL, 0, rxcpu, "nfsiod_rx"); 1175 lwkt_create(nfssvc_iod_writer, nmp, &nmp->nm_txthread, 1176 NULL, 0, txcpu, "nfsiod_tx"); 1177 lwkt_reltoken(&nmp->nm_token); 1178 return (0); 1179 bad: 1180 nfs_disconnect(nmp); 1181 lwkt_reltoken(&nmp->nm_token); 1182 nfs_free_mount(nmp); 1183 return (error); 1184 } 1185 1186 /* 1187 * unmount system call 1188 */ 1189 static int 1190 nfs_unmount(struct mount *mp, int mntflags) 1191 { 1192 struct nfsmount *nmp; 1193 int error, flags = 0; 1194 1195 nmp = VFSTONFS(mp); 1196 lwkt_gettoken(&nmp->nm_token); 1197 if (mntflags & MNT_FORCE) { 1198 flags |= FORCECLOSE; 1199 nmp->nm_flag |= NFSMNT_FORCE; 1200 } 1201 1202 /* 1203 * Goes something like this.. 1204 * - Call vflush() to clear out vnodes for this file system 1205 * - Close the socket 1206 * - Free up the data structures 1207 */ 1208 /* In the forced case, cancel any outstanding requests. */ 1209 if (flags & FORCECLOSE) { 1210 error = nfs_nmcancelreqs(nmp); 1211 if (error) { 1212 kprintf("NFS: %s: Unable to cancel all requests\n", 1213 mp->mnt_stat.f_mntfromname); 1214 /* continue anyway */ 1215 } 1216 } 1217 1218 /* 1219 * Must handshake with nfs_clientd() if it is active. XXX 1220 */ 1221 nmp->nm_state |= NFSSTA_DISMINPROG; 1222 1223 /* 1224 * We hold 1 extra ref on the root vnode; see comment in mountnfs(). 1225 * 1226 * If this doesn't work and we are doing a forced unmount we continue 1227 * anyway. 1228 */ 1229 error = vflush(mp, 1, flags); 1230 if (error) { 1231 nmp->nm_state &= ~NFSSTA_DISMINPROG; 1232 if ((flags & FORCECLOSE) == 0) { 1233 lwkt_reltoken(&nmp->nm_token); 1234 return (error); 1235 } 1236 } 1237 1238 /* 1239 * We are now committed to the unmount. 1240 * For NQNFS, let the server daemon free the nfsmount structure. 1241 */ 1242 if (nmp->nm_flag & NFSMNT_KERB) 1243 nmp->nm_state |= NFSSTA_DISMNT; 1244 nfssvc_iod_stop1(nmp); 1245 nfs_disconnect(nmp); 1246 nfssvc_iod_stop2(nmp); 1247 1248 lwkt_gettoken(&nfs_token); 1249 TAILQ_REMOVE(&nfs_mountq, nmp, nm_entry); 1250 lwkt_reltoken(&nfs_token); 1251 1252 lwkt_reltoken(&nmp->nm_token); 1253 1254 if ((nmp->nm_flag & NFSMNT_KERB) == 0) { 1255 nfs_free_mount(nmp); 1256 } 1257 return (0); 1258 } 1259 1260 void 1261 nfs_free_mount(struct nfsmount *nmp) 1262 { 1263 if (nmp->nm_cred) { 1264 crfree(nmp->nm_cred); 1265 nmp->nm_cred = NULL; 1266 } 1267 if (nmp->nm_nam) { 1268 FREE(nmp->nm_nam, M_SONAME); 1269 nmp->nm_nam = NULL; 1270 } 1271 objcache_put(nfsmount_objcache, nmp); 1272 } 1273 1274 /* 1275 * Return root of a filesystem 1276 */ 1277 static int 1278 nfs_root(struct mount *mp, struct vnode **vpp) 1279 { 1280 struct vnode *vp; 1281 struct nfsmount *nmp; 1282 struct vattr attrs; 1283 struct nfsnode *np; 1284 int error; 1285 1286 nmp = VFSTONFS(mp); 1287 lwkt_gettoken(&nmp->nm_token); 1288 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np); 1289 if (error) { 1290 lwkt_reltoken(&nmp->nm_token); 1291 return (error); 1292 } 1293 vp = NFSTOV(np); 1294 1295 /* 1296 * Get transfer parameters and root vnode attributes 1297 * 1298 * NOTE: nfs_fsinfo() is expected to override the default 1299 * f_iosize we set. 1300 */ 1301 if ((nmp->nm_state & NFSSTA_GOTFSINFO) == 0) { 1302 if (nmp->nm_flag & NFSMNT_NFSV3) { 1303 mp->mnt_stat.f_iosize = nfs_iosize(1, nmp->nm_sotype); 1304 error = nfs_fsinfo(nmp, vp, curthread); 1305 } else { 1306 if ((error = VOP_GETATTR(vp, &attrs)) == 0) 1307 nmp->nm_state |= NFSSTA_GOTFSINFO; 1308 1309 } 1310 } else { 1311 /* 1312 * The root vnode is usually cached by the namecache so do not 1313 * try to avoid going over the wire even if we have previous 1314 * information cached. A stale NFS mount can loop 1315 * forever resolving the root vnode if we return no-error when 1316 * there is in fact an error. 1317 */ 1318 np->n_attrstamp = 0; 1319 error = VOP_GETATTR(vp, &attrs); 1320 } 1321 if (vp->v_type == VNON) 1322 nfs_setvtype(vp, VDIR); 1323 vsetflags(vp, VROOT); 1324 if (error) 1325 vput(vp); 1326 else 1327 *vpp = vp; 1328 lwkt_reltoken(&nmp->nm_token); 1329 return (error); 1330 } 1331 1332 struct scaninfo { 1333 int rescan; 1334 int waitfor; 1335 int allerror; 1336 }; 1337 1338 static int nfs_sync_scan1(struct mount *mp, struct vnode *vp, void *data); 1339 static int nfs_sync_scan2(struct mount *mp, struct vnode *vp, void *data); 1340 1341 /* 1342 * Flush out the buffer cache 1343 */ 1344 /* ARGSUSED */ 1345 static int 1346 nfs_sync(struct mount *mp, int waitfor) 1347 { 1348 struct nfsmount *nmp = VFSTONFS(mp); 1349 struct scaninfo scaninfo; 1350 int error; 1351 1352 scaninfo.rescan = 1; 1353 scaninfo.waitfor = waitfor; 1354 scaninfo.allerror = 0; 1355 1356 /* 1357 * Force stale buffer cache information to be flushed. 1358 */ 1359 lwkt_gettoken(&nmp->nm_token); 1360 error = 0; 1361 while (error == 0 && scaninfo.rescan) { 1362 scaninfo.rescan = 0; 1363 error = vmntvnodescan(mp, VMSC_GETVP, nfs_sync_scan1, 1364 nfs_sync_scan2, &scaninfo); 1365 } 1366 lwkt_reltoken(&nmp->nm_token); 1367 return(error); 1368 } 1369 1370 static int 1371 nfs_sync_scan1(struct mount *mp, struct vnode *vp, void *data) 1372 { 1373 struct scaninfo *info = data; 1374 1375 if (vn_islocked(vp) || RB_EMPTY(&vp->v_rbdirty_tree)) 1376 return(-1); 1377 if (info->waitfor & MNT_LAZY) 1378 return(-1); 1379 return(0); 1380 } 1381 1382 static int 1383 nfs_sync_scan2(struct mount *mp, struct vnode *vp, void *data) 1384 { 1385 struct scaninfo *info = data; 1386 int error; 1387 1388 error = VOP_FSYNC(vp, info->waitfor, 0); 1389 if (error) 1390 info->allerror = error; 1391 return(0); 1392 } 1393 1394