1 /* 2 * Copyright (c) 1989, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Rick Macklem at The University of Guelph. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)nfs_vfsops.c 8.12 (Berkeley) 5/20/95 33 * $FreeBSD: src/sys/nfs/nfs_vfsops.c,v 1.91.2.7 2003/01/27 20:04:08 dillon Exp $ 34 */ 35 36 #include "opt_bootp.h" 37 #include "opt_nfsroot.h" 38 39 #include <sys/param.h> 40 #include <sys/sockio.h> 41 #include <sys/proc.h> 42 #include <sys/vnode.h> 43 #include <sys/fcntl.h> 44 #include <sys/kernel.h> 45 #include <sys/sysctl.h> 46 #include <sys/malloc.h> 47 #include <sys/mount.h> 48 #include <sys/mbuf.h> 49 #include <sys/socket.h> 50 #include <sys/socketvar.h> 51 #include <sys/systm.h> 52 #include <sys/objcache.h> 53 54 #include <vm/vm.h> 55 #include <vm/vm_extern.h> 56 57 #include <net/if.h> 58 #include <net/if_var.h> 59 #include <net/route.h> 60 #include <netinet/in.h> 61 62 #include <sys/thread2.h> 63 #include <sys/mutex2.h> 64 65 #include "rpcv2.h" 66 #include "nfsproto.h" 67 #include "nfs.h" 68 #include "nfsmount.h" 69 #include "nfsnode.h" 70 #include "xdr_subs.h" 71 #include "nfsm_subs.h" 72 #include "nfsdiskless.h" 73 #include "nfsmountrpc.h" 74 75 extern int nfs_mountroot(struct mount *mp); 76 extern void bootpc_init(void); 77 78 extern struct vop_ops nfsv2_vnode_vops; 79 extern struct vop_ops nfsv2_fifo_vops; 80 extern struct vop_ops nfsv2_spec_vops; 81 82 MALLOC_DEFINE(M_NFSREQ, "NFS req", "NFS request header"); 83 MALLOC_DEFINE(M_NFSBIGFH, "NFSV3 bigfh", "NFS version 3 file handle"); 84 MALLOC_DEFINE(M_NFSD, "NFS daemon", "Nfs server daemon structure"); 85 MALLOC_DEFINE(M_NFSDIROFF, "NFSV3 diroff", "NFS directory offset data"); 86 MALLOC_DEFINE(M_NFSRVDESC, "NFSV3 srvdesc", "NFS server socket descriptor"); 87 MALLOC_DEFINE(M_NFSUID, "NFS uid", "Nfs uid mapping structure"); 88 MALLOC_DEFINE(M_NFSHASH, "NFS hash", "NFS hash tables"); 89 90 struct objcache *nfsmount_objcache; 91 92 struct nfsstats nfsstats; 93 SYSCTL_NODE(_vfs, OID_AUTO, nfs, CTLFLAG_RW, 0, "NFS filesystem"); 94 SYSCTL_STRUCT(_vfs_nfs, NFS_NFSSTATS, nfsstats, CTLFLAG_RD, &nfsstats, nfsstats, 95 "Nfs stats structure"); 96 static int nfs_ip_paranoia = 1; 97 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW, &nfs_ip_paranoia, 0, 98 "Enable no-connection mode for protocols that support no-connection mode"); 99 #ifdef NFS_DEBUG 100 int nfs_debug; 101 SYSCTL_INT(_vfs_nfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0, ""); 102 #endif 103 104 /* 105 * Tunable to determine the Read/Write unit size. Maximum value 106 * is NFS_MAXDATA. We also default to NFS_MAXDATA. 107 */ 108 static int nfs_io_size = NFS_MAXDATA; 109 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_io_size, CTLFLAG_RW, 110 &nfs_io_size, 0, "NFS optimal I/O unit size"); 111 112 static void nfs_decode_args (struct nfsmount *nmp, 113 struct nfs_args *argp); 114 static int mountnfs (struct nfs_args *,struct mount *, 115 struct sockaddr *,char *,char *,struct vnode **); 116 static int nfs_mount ( struct mount *mp, char *path, caddr_t data, 117 struct ucred *cred); 118 static int nfs_unmount ( struct mount *mp, int mntflags); 119 static int nfs_root ( struct mount *mp, struct vnode **vpp); 120 static int nfs_statfs ( struct mount *mp, struct statfs *sbp, 121 struct ucred *cred); 122 static int nfs_statvfs(struct mount *mp, struct statvfs *sbp, 123 struct ucred *cred); 124 static int nfs_sync ( struct mount *mp, int waitfor); 125 126 /* 127 * nfs vfs operations. 128 */ 129 static struct vfsops nfs_vfsops = { 130 .vfs_mount = nfs_mount, 131 .vfs_unmount = nfs_unmount, 132 .vfs_root = nfs_root, 133 .vfs_statfs = nfs_statfs, 134 .vfs_statvfs = nfs_statvfs, 135 .vfs_sync = nfs_sync, 136 .vfs_init = nfs_init, 137 .vfs_uninit = nfs_uninit 138 }; 139 VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK | VFCF_MPSAFE); 140 MODULE_VERSION(nfs, 1); 141 142 /* 143 * This structure must be filled in by a primary bootstrap or bootstrap 144 * server for a diskless/dataless machine. It is initialized below just 145 * to ensure that it is allocated to initialized data (.data not .bss). 146 */ 147 struct nfs_diskless nfs_diskless = { { { 0 } } }; 148 struct nfsv3_diskless nfsv3_diskless = { { { 0 } } }; 149 int nfs_diskless_valid = 0; 150 151 SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD, 152 &nfs_diskless_valid, 0, 153 "NFS diskless params were obtained"); 154 155 SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD, 156 nfsv3_diskless.root_hostnam, 0, 157 "Host name for mount point"); 158 159 SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD, 160 &nfsv3_diskless.root_saddr, sizeof nfsv3_diskless.root_saddr, 161 "%Ssockaddr_in", "Address of root server"); 162 163 SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_swappath, CTLFLAG_RD, 164 nfsv3_diskless.swap_hostnam, 0, 165 "Host name for mount ppoint"); 166 167 SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_swapaddr, CTLFLAG_RD, 168 &nfsv3_diskless.swap_saddr, sizeof nfsv3_diskless.swap_saddr, 169 "%Ssockaddr_in", "Address of swap server"); 170 171 172 void nfsargs_ntoh (struct nfs_args *); 173 static int nfs_mountdiskless (char *, char *, int, 174 struct sockaddr_in *, struct nfs_args *, 175 struct thread *, struct vnode **, 176 struct mount **); 177 static void nfs_convert_diskless (void); 178 static void nfs_convert_oargs (struct nfs_args *args, 179 struct onfs_args *oargs); 180 181 /* 182 * Calculate the buffer I/O block size to use. The maximum V2 block size 183 * is typically 8K, the maximum datagram size is typically 16K, and the 184 * maximum V3 block size is typically 32K. The buffer cache tends to work 185 * best with 16K blocks but we allow 32K for TCP connections. 186 * 187 * We force the block size to be at least a page for buffer cache efficiency. 188 */ 189 static int 190 nfs_iosize(int v3, int sotype) 191 { 192 int iosize; 193 int iomax; 194 195 if (v3) { 196 if (sotype == SOCK_STREAM) 197 iomax = NFS_MAXDATA; 198 else 199 iomax = NFS_MAXDGRAMDATA; 200 } else { 201 iomax = NFS_V2MAXDATA; 202 } 203 if ((iosize = nfs_io_size) > iomax) 204 iosize = iomax; 205 if (iosize < PAGE_SIZE) 206 iosize = PAGE_SIZE; 207 208 /* 209 * This is an aweful hack but until the buffer cache is rewritten 210 * we need it. The problem is that when you combine write() with 211 * mmap() the vm_page->valid bits can become weird looking 212 * (e.g. 0xfc). This occurs because NFS uses piecemeal buffers 213 * at the file EOF. To solve the problem the BIO system needs to 214 * be guarenteed that the NFS iosize for regular files will be a 215 * multiple of PAGE_SIZE so it can invalidate the whole page 216 * rather then just the piece of it owned by the buffer when 217 * NFS does vinvalbuf() calls. 218 */ 219 if (iosize & PAGE_MASK) 220 iosize = (iosize & ~PAGE_MASK) + PAGE_SIZE; 221 return iosize; 222 } 223 224 static void 225 nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs) 226 { 227 args->version = NFS_ARGSVERSION; 228 args->addr = oargs->addr; 229 args->addrlen = oargs->addrlen; 230 args->sotype = oargs->sotype; 231 args->proto = oargs->proto; 232 args->fh = oargs->fh; 233 args->fhsize = oargs->fhsize; 234 args->flags = oargs->flags; 235 args->wsize = oargs->wsize; 236 args->rsize = oargs->rsize; 237 args->readdirsize = oargs->readdirsize; 238 args->timeo = oargs->timeo; 239 args->retrans = oargs->retrans; 240 args->maxgrouplist = oargs->maxgrouplist; 241 args->readahead = oargs->readahead; 242 args->deadthresh = oargs->deadthresh; 243 args->hostname = oargs->hostname; 244 } 245 246 static void 247 nfs_convert_diskless(void) 248 { 249 int i; 250 251 bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif, 252 sizeof(struct ifaliasreq)); 253 bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway, 254 sizeof(struct sockaddr_in)); 255 nfs_convert_oargs(&nfsv3_diskless.swap_args, &nfs_diskless.swap_args); 256 257 /* 258 * Copy the NFS handle passed from the diskless code. 259 * 260 * XXX CURRENTLY DISABLED - bootp passes us a NFSv2 handle which 261 * will fail utterly with HAMMER due to limitations with NFSv2 262 * directory cookies. 263 */ 264 bcopy(nfs_diskless.swap_fh, nfsv3_diskless.swap_fh, NFSX_V2FH); 265 nfsv3_diskless.swap_fhsize = NFSX_V2FH; 266 for (i = NFSX_V2FH - 1; i >= 0; --i) { 267 if (nfs_diskless.swap_fh[i]) 268 break; 269 } 270 if (i < 0) 271 nfsv3_diskless.swap_fhsize = 0; 272 nfsv3_diskless.swap_fhsize = 0; /* FORCE DISABLE */ 273 274 bcopy(&nfs_diskless.swap_saddr,&nfsv3_diskless.swap_saddr, 275 sizeof(struct sockaddr_in)); 276 bcopy(nfs_diskless.swap_hostnam,nfsv3_diskless.swap_hostnam, MNAMELEN); 277 nfsv3_diskless.swap_nblks = nfs_diskless.swap_nblks; 278 bcopy(&nfs_diskless.swap_ucred, &nfsv3_diskless.swap_ucred, 279 sizeof(struct ucred)); 280 nfs_convert_oargs(&nfsv3_diskless.root_args, &nfs_diskless.root_args); 281 282 /* 283 * Copy the NFS handle passed from the diskless code. 284 * 285 * XXX CURRENTLY DISABLED - bootp passes us a NFSv2 handle which 286 * will fail utterly with HAMMER due to limitations with NFSv2 287 * directory cookies. 288 */ 289 bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH); 290 nfsv3_diskless.root_fhsize = NFSX_V2FH; 291 for (i = NFSX_V2FH - 1; i >= 0; --i) { 292 if (nfs_diskless.root_fh[i]) 293 break; 294 } 295 if (i < 0) 296 nfsv3_diskless.root_fhsize = 0; 297 nfsv3_diskless.root_fhsize = 0; /* FORCE DISABLE */ 298 299 bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr, 300 sizeof(struct sockaddr_in)); 301 bcopy(nfs_diskless.root_hostnam,nfsv3_diskless.root_hostnam, MNAMELEN); 302 nfsv3_diskless.root_time = nfs_diskless.root_time; 303 bcopy(nfs_diskless.my_hostnam,nfsv3_diskless.my_hostnam, 304 MAXHOSTNAMELEN); 305 nfs_diskless_valid = 3; 306 } 307 308 /* 309 * nfs statfs call 310 */ 311 int 312 nfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred) 313 { 314 struct vnode *vp; 315 struct nfs_statfs *sfp; 316 struct nfsmount *nmp = VFSTONFS(mp); 317 thread_t td = curthread; 318 int error = 0, retattr; 319 struct nfsnode *np; 320 u_quad_t tquad; 321 struct nfsm_info info; 322 323 info.mrep = NULL; 324 info.v3 = (nmp->nm_flag & NFSMNT_NFSV3); 325 326 lwkt_gettoken(&nmp->nm_token); 327 328 #ifndef nolint 329 sfp = NULL; 330 #endif 331 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, NULL); 332 if (error) { 333 lwkt_reltoken(&nmp->nm_token); 334 return (error); 335 } 336 vp = NFSTOV(np); 337 /* ignore the passed cred */ 338 cred = crget(); 339 cred->cr_ngroups = 1; 340 if (info.v3 && (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) 341 (void)nfs_fsinfo(nmp, vp, td); 342 nfsstats.rpccnt[NFSPROC_FSSTAT]++; 343 nfsm_reqhead(&info, vp, NFSPROC_FSSTAT, NFSX_FH(info.v3)); 344 ERROROUT(nfsm_fhtom(&info, vp)); 345 NEGKEEPOUT(nfsm_request(&info, vp, NFSPROC_FSSTAT, td, cred, &error)); 346 if (info.v3) { 347 ERROROUT(nfsm_postop_attr(&info, vp, &retattr, 348 NFS_LATTR_NOSHRINK)); 349 } 350 if (error) { 351 if (info.mrep != NULL) 352 m_freem(info.mrep); 353 goto nfsmout; 354 } 355 NULLOUT(sfp = nfsm_dissect(&info, NFSX_STATFS(info.v3))); 356 sbp->f_flags = nmp->nm_flag; 357 358 if (info.v3) { 359 sbp->f_bsize = NFS_FABLKSIZE; 360 tquad = fxdr_hyper(&sfp->sf_tbytes); 361 sbp->f_blocks = (long)(tquad / ((u_quad_t)NFS_FABLKSIZE)); 362 tquad = fxdr_hyper(&sfp->sf_fbytes); 363 sbp->f_bfree = (long)(tquad / ((u_quad_t)NFS_FABLKSIZE)); 364 tquad = fxdr_hyper(&sfp->sf_abytes); 365 sbp->f_bavail = (long)(tquad / ((u_quad_t)NFS_FABLKSIZE)); 366 sbp->f_files = (fxdr_unsigned(int32_t, 367 sfp->sf_tfiles.nfsuquad[1]) & 0x7fffffff); 368 sbp->f_ffree = (fxdr_unsigned(int32_t, 369 sfp->sf_ffiles.nfsuquad[1]) & 0x7fffffff); 370 } else { 371 sbp->f_bsize = fxdr_unsigned(int32_t, sfp->sf_bsize); 372 sbp->f_blocks = fxdr_unsigned(int32_t, sfp->sf_blocks); 373 sbp->f_bfree = fxdr_unsigned(int32_t, sfp->sf_bfree); 374 sbp->f_bavail = fxdr_unsigned(int32_t, sfp->sf_bavail); 375 sbp->f_files = 0; 376 sbp->f_ffree = 0; 377 } 378 379 /* 380 * Some values are pre-set in mnt_stat. Note in particular f_iosize 381 * cannot be changed once the filesystem is mounted as it is used 382 * as the basis for BIOs. 383 */ 384 if (sbp != &mp->mnt_stat) { 385 sbp->f_type = mp->mnt_vfc->vfc_typenum; 386 bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN); 387 sbp->f_iosize = mp->mnt_stat.f_iosize; 388 } 389 m_freem(info.mrep); 390 info.mrep = NULL; 391 nfsmout: 392 vput(vp); 393 crfree(cred); 394 lwkt_reltoken(&nmp->nm_token); 395 return (error); 396 } 397 398 static int 399 nfs_statvfs(struct mount *mp, struct statvfs *sbp, struct ucred *cred) 400 { 401 struct vnode *vp; 402 struct nfs_statfs *sfp; 403 struct nfsmount *nmp = VFSTONFS(mp); 404 thread_t td = curthread; 405 int error = 0, retattr; 406 struct nfsnode *np; 407 struct nfsm_info info; 408 409 info.mrep = NULL; 410 info.v3 = (nmp->nm_flag & NFSMNT_NFSV3); 411 lwkt_gettoken(&nmp->nm_token); 412 413 #ifndef nolint 414 sfp = NULL; 415 #endif 416 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, NULL); 417 if (error) { 418 lwkt_reltoken(&nmp->nm_token); 419 return (error); 420 } 421 vp = NFSTOV(np); 422 /* ignore the passed cred */ 423 cred = crget(); 424 cred->cr_ngroups = 1; 425 if (info.v3 && (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) 426 (void)nfs_fsinfo(nmp, vp, td); 427 nfsstats.rpccnt[NFSPROC_FSSTAT]++; 428 nfsm_reqhead(&info, vp, NFSPROC_FSSTAT, NFSX_FH(info.v3)); 429 ERROROUT(nfsm_fhtom(&info, vp)); 430 NEGKEEPOUT(nfsm_request(&info, vp, NFSPROC_FSSTAT, td, cred, &error)); 431 if (info.v3) { 432 ERROROUT(nfsm_postop_attr(&info, vp, &retattr, 433 NFS_LATTR_NOSHRINK)); 434 } 435 if (error) { 436 if (info.mrep != NULL) 437 m_freem(info.mrep); 438 goto nfsmout; 439 } 440 NULLOUT(sfp = nfsm_dissect(&info, NFSX_STATFS(info.v3))); 441 sbp->f_flag = nmp->nm_flag; 442 sbp->f_owner = nmp->nm_cred->cr_ruid; 443 444 if (info.v3) { 445 sbp->f_bsize = NFS_FABLKSIZE; 446 sbp->f_frsize = NFS_FABLKSIZE; 447 sbp->f_blocks = (fxdr_hyper(&sfp->sf_tbytes) / 448 ((u_quad_t)NFS_FABLKSIZE)); 449 sbp->f_bfree = (fxdr_hyper(&sfp->sf_fbytes) / 450 ((u_quad_t)NFS_FABLKSIZE)); 451 sbp->f_bavail = (fxdr_hyper(&sfp->sf_abytes) / 452 ((u_quad_t)NFS_FABLKSIZE)); 453 sbp->f_files = fxdr_hyper(&sfp->sf_tfiles); 454 sbp->f_ffree = fxdr_hyper(&sfp->sf_ffiles); 455 sbp->f_favail = fxdr_hyper(&sfp->sf_afiles); 456 } else { 457 sbp->f_bsize = fxdr_unsigned(int32_t, sfp->sf_bsize); 458 sbp->f_blocks = fxdr_unsigned(int32_t, sfp->sf_blocks); 459 sbp->f_bfree = fxdr_unsigned(int32_t, sfp->sf_bfree); 460 sbp->f_bavail = fxdr_unsigned(int32_t, sfp->sf_bavail); 461 sbp->f_files = 0; 462 sbp->f_ffree = 0; 463 sbp->f_favail = 0; 464 } 465 sbp->f_syncreads = 0; 466 sbp->f_syncwrites = 0; 467 sbp->f_asyncreads = 0; 468 sbp->f_asyncwrites = 0; 469 sbp->f_type = mp->mnt_vfc->vfc_typenum; 470 471 m_freem(info.mrep); 472 info.mrep = NULL; 473 nfsmout: 474 vput(vp); 475 crfree(cred); 476 lwkt_reltoken(&nmp->nm_token); 477 return (error); 478 } 479 480 /* 481 * nfs version 3 fsinfo rpc call 482 */ 483 int 484 nfs_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct thread *td) 485 { 486 struct nfsv3_fsinfo *fsp; 487 u_int32_t pref, max; 488 int error = 0, retattr; 489 u_int64_t maxfsize; 490 struct nfsm_info info; 491 492 info.v3 = 1; 493 nfsstats.rpccnt[NFSPROC_FSINFO]++; 494 nfsm_reqhead(&info, vp, NFSPROC_FSINFO, NFSX_FH(1)); 495 ERROROUT(nfsm_fhtom(&info, vp)); 496 NEGKEEPOUT(nfsm_request(&info, vp, NFSPROC_FSINFO, td, 497 nfs_vpcred(vp, ND_READ), &error)); 498 ERROROUT(nfsm_postop_attr(&info, vp, &retattr, NFS_LATTR_NOSHRINK)); 499 if (error == 0) { 500 NULLOUT(fsp = nfsm_dissect(&info, NFSX_V3FSINFO)); 501 pref = fxdr_unsigned(u_int32_t, fsp->fs_wtpref); 502 if (pref < nmp->nm_wsize && pref >= NFS_FABLKSIZE) 503 nmp->nm_wsize = roundup2(pref, NFS_FABLKSIZE); 504 max = fxdr_unsigned(u_int32_t, fsp->fs_wtmax); 505 if (max < nmp->nm_wsize && max > 0) { 506 nmp->nm_wsize = max & ~(NFS_FABLKSIZE - 1); 507 if (nmp->nm_wsize == 0) 508 nmp->nm_wsize = max; 509 } 510 pref = fxdr_unsigned(u_int32_t, fsp->fs_rtpref); 511 if (pref < nmp->nm_rsize && pref >= NFS_FABLKSIZE) 512 nmp->nm_rsize = roundup2(pref, NFS_FABLKSIZE); 513 max = fxdr_unsigned(u_int32_t, fsp->fs_rtmax); 514 if (max < nmp->nm_rsize && max > 0) { 515 nmp->nm_rsize = max & ~(NFS_FABLKSIZE - 1); 516 if (nmp->nm_rsize == 0) 517 nmp->nm_rsize = max; 518 } 519 pref = fxdr_unsigned(u_int32_t, fsp->fs_dtpref); 520 if (pref < nmp->nm_readdirsize && pref >= NFS_DIRBLKSIZ) 521 nmp->nm_readdirsize = roundup2(pref, NFS_DIRBLKSIZ); 522 if (max < nmp->nm_readdirsize && max > 0) { 523 nmp->nm_readdirsize = max & ~(NFS_DIRBLKSIZ - 1); 524 if (nmp->nm_readdirsize == 0) 525 nmp->nm_readdirsize = max; 526 } 527 maxfsize = fxdr_hyper(&fsp->fs_maxfilesize); 528 if (maxfsize > 0 && maxfsize < nmp->nm_maxfilesize) 529 nmp->nm_maxfilesize = maxfsize; 530 nmp->nm_state |= NFSSTA_GOTFSINFO; 531 532 /* 533 * Use the smaller of rsize/wsize for the biosize. 534 */ 535 if (nmp->nm_rsize < nmp->nm_wsize) 536 nmp->nm_mountp->mnt_stat.f_iosize = nmp->nm_rsize; 537 else 538 nmp->nm_mountp->mnt_stat.f_iosize = nmp->nm_wsize; 539 } 540 m_freem(info.mrep); 541 info.mrep = NULL; 542 nfsmout: 543 return (error); 544 } 545 546 /* 547 * Mount a remote root fs via. nfs. This depends on the info in the 548 * nfs_diskless structure that has been filled in properly by some primary 549 * bootstrap. 550 * It goes something like this: 551 * - do enough of "ifconfig" by calling ifioctl() so that the system 552 * can talk to the server 553 * - If nfs_diskless.mygateway is filled in, use that address as 554 * a default gateway. 555 * - build the rootfs mount point and call mountnfs() to do the rest. 556 */ 557 int 558 nfs_mountroot(struct mount *mp) 559 { 560 struct mount *swap_mp; 561 struct nfsv3_diskless *nd = &nfsv3_diskless; 562 struct socket *so; 563 struct vnode *vp; 564 struct thread *td = curthread; /* XXX */ 565 int error, i; 566 u_long l; 567 char buf[128], addr[INET_ADDRSTRLEN]; 568 569 #if defined(BOOTP_NFSROOT) && defined(BOOTP) 570 bootpc_init(); /* use bootp to get nfs_diskless filled in */ 571 #endif 572 573 /* 574 * XXX time must be non-zero when we init the interface or else 575 * the arp code will wedge... 576 */ 577 while (mycpu->gd_time_seconds == 0) 578 tsleep(mycpu, 0, "arpkludge", 10); 579 580 /* 581 * The boot code may have passed us a diskless structure. 582 */ 583 kprintf("DISKLESS %d\n", nfs_diskless_valid); 584 if (nfs_diskless_valid == 1) 585 nfs_convert_diskless(); 586 587 /* 588 * NFSv3 is required. 589 */ 590 nd->root_args.flags |= NFSMNT_NFSV3 | NFSMNT_RDIRPLUS; 591 nd->swap_args.flags |= NFSMNT_NFSV3; 592 593 #define SINP(sockaddr) ((struct sockaddr_in *)(sockaddr)) 594 kprintf("nfs_mountroot: interface %s ip %s", 595 nd->myif.ifra_name, 596 kinet_ntoa(SINP(&nd->myif.ifra_addr)->sin_addr, addr)); 597 kprintf(" bcast %s", 598 kinet_ntoa(SINP(&nd->myif.ifra_broadaddr)->sin_addr, addr)); 599 kprintf(" mask %s\n", 600 kinet_ntoa(SINP(&nd->myif.ifra_mask)->sin_addr, addr)); 601 #undef SINP 602 603 /* 604 * XXX splnet, so networks will receive... 605 */ 606 crit_enter(); 607 608 /* 609 * BOOTP does not necessarily have to be compiled into the kernel 610 * for an NFS root to work. If we inherited the network 611 * configuration for PXEBOOT then pxe_setup_nfsdiskless() has figured 612 * out our interface for us and all we need to do is ifconfig the 613 * interface. We only do this if the interface has not already been 614 * ifconfig'd by e.g. BOOTP. 615 */ 616 error = socreate(nd->myif.ifra_addr.sa_family, &so, SOCK_DGRAM, 0, td); 617 if (error) { 618 panic("nfs_mountroot: socreate(%04x): %d", 619 nd->myif.ifra_addr.sa_family, error); 620 } 621 622 error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, proc0.p_ucred); 623 if (error) 624 panic("nfs_mountroot: SIOCAIFADDR: %d", error); 625 626 soclose(so, FNONBLOCK); 627 628 /* 629 * If the gateway field is filled in, set it as the default route. 630 */ 631 if (nd->mygateway.sin_len != 0) { 632 struct sockaddr_in mask, sin; 633 634 bzero((caddr_t)&mask, sizeof(mask)); 635 sin = mask; 636 sin.sin_family = AF_INET; 637 sin.sin_len = sizeof(sin); 638 kprintf("nfs_mountroot: gateway %s\n", 639 kinet_ntoa(nd->mygateway.sin_addr, addr)); 640 error = rtrequest_global(RTM_ADD, (struct sockaddr *)&sin, 641 (struct sockaddr *)&nd->mygateway, 642 (struct sockaddr *)&mask, 643 RTF_UP | RTF_GATEWAY); 644 if (error) 645 kprintf("nfs_mountroot: unable to set gateway, error %d, continuing anyway\n", error); 646 } 647 648 /* 649 * Create the rootfs mount point. 650 */ 651 nd->root_args.fh = nd->root_fh; 652 nd->root_args.fhsize = nd->root_fhsize; 653 l = ntohl(nd->root_saddr.sin_addr.s_addr); 654 ksnprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s", 655 (l >> 24) & 0xff, (l >> 16) & 0xff, 656 (l >> 8) & 0xff, (l >> 0) & 0xff,nd->root_hostnam); 657 kprintf("NFS_ROOT: %s\n",buf); 658 error = nfs_mountdiskless(buf, "/", MNT_RDONLY, &nd->root_saddr, 659 &nd->root_args, td, &vp, &mp); 660 if (error) { 661 mp->mnt_vfc->vfc_refcount--; 662 crit_exit(); 663 return (error); 664 } 665 666 swap_mp = NULL; 667 if (nd->swap_nblks) { 668 669 /* Convert to DEV_BSIZE instead of Kilobyte */ 670 nd->swap_nblks *= 2; 671 672 /* 673 * Create a fake mount point just for the swap vnode so that the 674 * swap file can be on a different server from the rootfs. 675 */ 676 nd->swap_args.fh = nd->swap_fh; 677 nd->swap_args.fhsize = nd->swap_fhsize; 678 l = ntohl(nd->swap_saddr.sin_addr.s_addr); 679 ksnprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s", 680 (l >> 24) & 0xff, (l >> 16) & 0xff, 681 (l >> 8) & 0xff, (l >> 0) & 0xff,nd->swap_hostnam); 682 kprintf("NFS SWAP: %s\n",buf); 683 vp = NULL; /* avoid gcc warnings */ 684 error = nfs_mountdiskless(buf, "/swap", 0, &nd->swap_saddr, 685 &nd->swap_args, td, &vp, &swap_mp); 686 if (error) { 687 crit_exit(); 688 return (error); 689 } 690 vfs_unbusy(swap_mp); 691 692 VTONFS(vp)->n_size = VTONFS(vp)->n_vattr.va_size = 693 nd->swap_nblks * DEV_BSIZE ; 694 695 /* 696 * Since the swap file is not the root dir of a file system, 697 * hack it to a regular file. 698 */ 699 vclrflags(vp, VROOT); 700 vref(vp); 701 nfs_setvtype(vp, VREG); 702 swaponvp(td, vp, nd->swap_nblks); 703 } 704 705 mp->mnt_flag |= MNT_ROOTFS; 706 707 /* 708 * This is not really an nfs issue, but it is much easier to 709 * set hostname here and then let the "/etc/rc.xxx" files 710 * mount the right /var based upon its preset value. 711 */ 712 bcopy(nd->my_hostnam, hostname, MAXHOSTNAMELEN); 713 hostname[MAXHOSTNAMELEN - 1] = '\0'; 714 for (i = 0; i < MAXHOSTNAMELEN; i++) 715 if (hostname[i] == '\0') 716 break; 717 inittodr(ntohl(nd->root_time)); 718 crit_exit(); 719 return (0); 720 } 721 722 /* 723 * Internal version of mount system call for diskless setup. 724 */ 725 static int 726 nfs_mountdiskless(char *path, char *which, int mountflag, 727 struct sockaddr_in *sin, struct nfs_args *args, struct thread *td, 728 struct vnode **vpp, struct mount **mpp) 729 { 730 struct mount *mp; 731 struct sockaddr *nam; 732 int didalloc = 0; 733 int error; 734 735 mp = *mpp; 736 737 if (mp == NULL) { 738 if ((error = vfs_rootmountalloc("nfs", path, &mp)) != 0) { 739 kprintf("nfs_mountroot: NFS not configured"); 740 return (error); 741 } 742 didalloc = 1; 743 } 744 mp->mnt_kern_flag = 0; 745 mp->mnt_flag = mountflag; 746 nam = dup_sockaddr((struct sockaddr *)sin); 747 748 #if defined(BOOTP) || defined(NFS_ROOT) 749 if (args->fhsize == 0) { 750 char *xpath = path; 751 752 kprintf("NFS_ROOT: No FH passed from loader, attempting " 753 "mount rpc..."); 754 while (*xpath && *xpath != ':') 755 ++xpath; 756 if (*xpath) 757 ++xpath; 758 args->fhsize = 0; 759 error = md_mount(sin, xpath, args->fh, &args->fhsize, args, td); 760 if (error) { 761 kprintf("failed error %d.\n", error); 762 goto haderror; 763 } 764 kprintf("success!\n"); 765 } 766 #endif 767 768 if ((error = mountnfs(args, mp, nam, which, path, vpp)) != 0) { 769 #if defined(BOOTP) || defined(NFS_ROOT) 770 haderror: 771 #endif 772 kprintf("nfs_mountroot: mount %s on %s: %d", path, which, error); 773 mp->mnt_vfc->vfc_refcount--; 774 if (didalloc) 775 kfree(mp, M_MOUNT); 776 kfree(nam, M_SONAME); 777 return (error); 778 } 779 *mpp = mp; 780 return (0); 781 } 782 783 static void 784 nfs_decode_args(struct nfsmount *nmp, struct nfs_args *argp) 785 { 786 int adjsock; 787 int maxio; 788 789 crit_enter(); 790 /* 791 * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes 792 * no sense in that context. 793 */ 794 if (nmp->nm_sotype == SOCK_STREAM) { 795 nmp->nm_flag &= ~NFSMNT_NOCONN; 796 argp->flags &= ~NFSMNT_NOCONN; 797 } 798 799 /* 800 * readdirplus is NFSv3 only. 801 */ 802 if ((argp->flags & NFSMNT_NFSV3) == 0) { 803 nmp->nm_flag &= ~NFSMNT_RDIRPLUS; 804 argp->flags &= ~NFSMNT_RDIRPLUS; 805 } 806 807 /* 808 * Re-bind if rsrvd port flag has changed 809 */ 810 adjsock = (nmp->nm_flag & NFSMNT_RESVPORT) != 811 (argp->flags & NFSMNT_RESVPORT); 812 813 /* Update flags atomically. Don't change the lock bits. */ 814 nmp->nm_flag = argp->flags | nmp->nm_flag; 815 crit_exit(); 816 817 if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) { 818 nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10; 819 if (nmp->nm_timeo < NFS_MINTIMEO) 820 nmp->nm_timeo = NFS_MINTIMEO; 821 else if (nmp->nm_timeo > NFS_MAXTIMEO) 822 nmp->nm_timeo = NFS_MAXTIMEO; 823 } 824 825 if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) { 826 nmp->nm_retry = argp->retrans; 827 if (nmp->nm_retry > NFS_MAXREXMIT) 828 nmp->nm_retry = NFS_MAXREXMIT; 829 } 830 831 /* 832 * These parameters effect the buffer cache and cannot be changed 833 * once we've successfully mounted. 834 */ 835 if ((nmp->nm_state & NFSSTA_GOTFSINFO) == 0) { 836 maxio = nfs_iosize(argp->flags & NFSMNT_NFSV3, nmp->nm_sotype); 837 838 if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) { 839 nmp->nm_wsize = argp->wsize; 840 /* Round down to multiple of blocksize */ 841 nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1); 842 if (nmp->nm_wsize <= 0) 843 nmp->nm_wsize = NFS_FABLKSIZE; 844 } 845 if (nmp->nm_wsize > maxio) 846 nmp->nm_wsize = maxio; 847 if (nmp->nm_wsize > MAXBSIZE) 848 nmp->nm_wsize = MAXBSIZE; 849 850 if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) { 851 nmp->nm_rsize = argp->rsize; 852 /* Round down to multiple of blocksize */ 853 nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1); 854 if (nmp->nm_rsize <= 0) 855 nmp->nm_rsize = NFS_FABLKSIZE; 856 } 857 if (nmp->nm_rsize > maxio) 858 nmp->nm_rsize = maxio; 859 if (nmp->nm_rsize > MAXBSIZE) 860 nmp->nm_rsize = MAXBSIZE; 861 862 if ((argp->flags & NFSMNT_READDIRSIZE) && 863 argp->readdirsize > 0) { 864 nmp->nm_readdirsize = argp->readdirsize; 865 } 866 if (nmp->nm_readdirsize > maxio) 867 nmp->nm_readdirsize = maxio; 868 if (nmp->nm_readdirsize > nmp->nm_rsize) 869 nmp->nm_readdirsize = nmp->nm_rsize; 870 } 871 872 if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0) 873 nmp->nm_acregmin = argp->acregmin; 874 else 875 nmp->nm_acregmin = NFS_MINATTRTIMO; 876 if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0) 877 nmp->nm_acregmax = argp->acregmax; 878 else 879 nmp->nm_acregmax = NFS_MAXATTRTIMO; 880 if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0) 881 nmp->nm_acdirmin = argp->acdirmin; 882 else 883 nmp->nm_acdirmin = NFS_MINDIRATTRTIMO; 884 if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0) 885 nmp->nm_acdirmax = argp->acdirmax; 886 else 887 nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO; 888 if (nmp->nm_acdirmin > nmp->nm_acdirmax) 889 nmp->nm_acdirmin = nmp->nm_acdirmax; 890 if (nmp->nm_acregmin > nmp->nm_acregmax) 891 nmp->nm_acregmin = nmp->nm_acregmax; 892 893 if ((argp->flags & NFSMNT_MAXGRPS) && argp->maxgrouplist >= 0) { 894 if (argp->maxgrouplist <= NFS_MAXGRPS) 895 nmp->nm_numgrps = argp->maxgrouplist; 896 else 897 nmp->nm_numgrps = NFS_MAXGRPS; 898 } 899 if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) { 900 if (argp->readahead <= NFS_MAXRAHEAD) 901 nmp->nm_readahead = argp->readahead; 902 else 903 nmp->nm_readahead = NFS_MAXRAHEAD; 904 } 905 if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 1) { 906 if (argp->deadthresh <= NFS_NEVERDEAD) 907 nmp->nm_deadthresh = argp->deadthresh; 908 else 909 nmp->nm_deadthresh = NFS_NEVERDEAD; 910 } 911 912 if (nmp->nm_so && adjsock) { 913 nfs_safedisconnect(nmp); 914 if (nmp->nm_sotype == SOCK_DGRAM) 915 while (nfs_connect(nmp, NULL)) { 916 kprintf("nfs_args: retrying connect\n"); 917 (void) tsleep((caddr_t)&lbolt, 0, "nfscon", 0); 918 } 919 } 920 } 921 922 /* 923 * VFS Operations. 924 * 925 * mount system call 926 * It seems a bit dumb to copyinstr() the host and path here and then 927 * bcopy() them in mountnfs(), but I wanted to detect errors before 928 * doing the sockargs() call because sockargs() allocates an mbuf and 929 * an error after that means that I have to release the mbuf. 930 */ 931 /* ARGSUSED */ 932 static int 933 nfs_mount(struct mount *mp, char *path, caddr_t data, struct ucred *cred) 934 { 935 int error; 936 struct nfs_args args; 937 struct sockaddr *nam; 938 struct vnode *vp; 939 char pth[MNAMELEN], hst[MNAMELEN]; 940 size_t len; 941 u_char nfh[NFSX_V3FHMAX]; 942 943 if (path == NULL) { 944 nfs_mountroot(mp); 945 return (0); 946 } 947 error = copyin(data, (caddr_t)&args, sizeof (struct nfs_args)); 948 if (error) 949 return (error); 950 if (args.version != NFS_ARGSVERSION) { 951 #ifdef COMPAT_PRELITE2 952 /* 953 * If the argument version is unknown, then assume the 954 * caller is a pre-lite2 4.4BSD client and convert its 955 * arguments. 956 */ 957 struct onfs_args oargs; 958 error = copyin(data, (caddr_t)&oargs, sizeof (struct onfs_args)); 959 if (error) 960 return (error); 961 nfs_convert_oargs(&args,&oargs); 962 #else /* !COMPAT_PRELITE2 */ 963 return (EPROGMISMATCH); 964 #endif /* COMPAT_PRELITE2 */ 965 } 966 if (mp->mnt_flag & MNT_UPDATE) { 967 struct nfsmount *nmp = VFSTONFS(mp); 968 969 if (nmp == NULL) 970 return (EIO); 971 /* 972 * When doing an update, we can't change from or to 973 * v3, or change cookie translation, or rsize or wsize. 974 */ 975 args.flags &= ~(NFSMNT_NFSV3 | NFSMNT_RSIZE | NFSMNT_WSIZE); 976 args.flags |= nmp->nm_flag & (NFSMNT_NFSV3); 977 nfs_decode_args(nmp, &args); 978 return (0); 979 } 980 981 /* 982 * Make the nfs_ip_paranoia sysctl serve as the default connection 983 * or no-connection mode for those protocols that support 984 * no-connection mode (the flag will be cleared later for protocols 985 * that do not support no-connection mode). This will allow a client 986 * to receive replies from a different IP then the request was 987 * sent to. Note: default value for nfs_ip_paranoia is 1 (paranoid), 988 * not 0. 989 */ 990 if (nfs_ip_paranoia == 0) 991 args.flags |= NFSMNT_NOCONN; 992 if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) 993 return (EINVAL); 994 error = copyin((caddr_t)args.fh, (caddr_t)nfh, args.fhsize); 995 if (error) 996 return (error); 997 error = copyinstr(path, pth, MNAMELEN-1, &len); 998 if (error) 999 return (error); 1000 bzero(&pth[len], MNAMELEN - len); 1001 error = copyinstr(args.hostname, hst, MNAMELEN-1, &len); 1002 if (error) 1003 return (error); 1004 bzero(&hst[len], MNAMELEN - len); 1005 /* sockargs() call must be after above copyin() calls */ 1006 error = getsockaddr(&nam, (caddr_t)args.addr, args.addrlen); 1007 if (error) 1008 return (error); 1009 args.fh = nfh; 1010 error = mountnfs(&args, mp, nam, pth, hst, &vp); 1011 return (error); 1012 } 1013 1014 /* 1015 * Common code for mount and mountroot 1016 */ 1017 static int 1018 mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam, 1019 char *pth, char *hst, struct vnode **vpp) 1020 { 1021 struct nfsmount *nmp; 1022 struct nfsnode *np; 1023 int error; 1024 int rxcpu; 1025 int txcpu; 1026 1027 if (mp->mnt_flag & MNT_UPDATE) { 1028 nmp = VFSTONFS(mp); 1029 /* update paths, file handles, etc, here XXX */ 1030 kfree(nam, M_SONAME); 1031 return (0); 1032 } else { 1033 nmp = objcache_get(nfsmount_objcache, M_WAITOK); 1034 bzero((caddr_t)nmp, sizeof (struct nfsmount)); 1035 mtx_init_flags(&nmp->nm_rxlock, "nfsrx", MTXF_NOCOLLSTATS); 1036 mtx_init_flags(&nmp->nm_txlock, "nfstx", MTXF_NOCOLLSTATS); 1037 TAILQ_INIT(&nmp->nm_uidlruhead); 1038 TAILQ_INIT(&nmp->nm_bioq); 1039 TAILQ_INIT(&nmp->nm_reqq); 1040 TAILQ_INIT(&nmp->nm_reqtxq); 1041 TAILQ_INIT(&nmp->nm_reqrxq); 1042 mp->mnt_data = (qaddr_t)nmp; 1043 lwkt_token_init(&nmp->nm_token, "nfs_token"); 1044 } 1045 vfs_getnewfsid(mp); 1046 nmp->nm_mountp = mp; 1047 mp->mnt_kern_flag |= MNTK_ALL_MPSAFE; 1048 mp->mnt_kern_flag |= MNTK_THR_SYNC; /* new vsyncscan semantics */ 1049 1050 lwkt_gettoken(&nmp->nm_token); 1051 1052 /* 1053 * V2 can only handle 32 bit filesizes. A 4GB-1 limit may be too 1054 * high, depending on whether we end up with negative offsets in 1055 * the client or server somewhere. 2GB-1 may be safer. 1056 * 1057 * For V3, nfs_fsinfo will adjust this as necessary. Assume maximum 1058 * that we can handle until we find out otherwise. Note that seek 1059 * offsets are signed. 1060 */ 1061 if ((argp->flags & NFSMNT_NFSV3) == 0) 1062 nmp->nm_maxfilesize = 0xffffffffLL; 1063 else 1064 nmp->nm_maxfilesize = 0x7fffffffffffffffLL; 1065 1066 nmp->nm_timeo = NFS_TIMEO; 1067 nmp->nm_retry = NFS_RETRANS; 1068 nmp->nm_wsize = nfs_iosize(argp->flags & NFSMNT_NFSV3, argp->sotype); 1069 nmp->nm_rsize = nmp->nm_wsize; 1070 nmp->nm_readdirsize = NFS_READDIRSIZE; 1071 nmp->nm_numgrps = NFS_MAXGRPS; 1072 nmp->nm_readahead = NFS_DEFRAHEAD; 1073 nmp->nm_deadthresh = NFS_DEADTHRESH; 1074 nmp->nm_fhsize = argp->fhsize; 1075 bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize); 1076 bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN); 1077 nmp->nm_nam = nam; 1078 /* Set up the sockets and per-host congestion */ 1079 nmp->nm_sotype = argp->sotype; 1080 nmp->nm_soproto = argp->proto; 1081 nmp->nm_cred = crhold(proc0.p_ucred); 1082 1083 nfs_decode_args(nmp, argp); 1084 1085 /* 1086 * For Connection based sockets (TCP,...) defer the connect until 1087 * the first request, in case the server is not responding. 1088 */ 1089 if (nmp->nm_sotype == SOCK_DGRAM && 1090 (error = nfs_connect(nmp, NULL))) 1091 goto bad; 1092 1093 /* 1094 * This is silly, but it has to be set so that vinifod() works. 1095 * We do not want to do an nfs_statfs() here since we can get 1096 * stuck on a dead server and we are holding a lock on the mount 1097 * point. 1098 */ 1099 mp->mnt_stat.f_iosize = 1100 nfs_iosize(nmp->nm_flag & NFSMNT_NFSV3, nmp->nm_sotype); 1101 1102 /* 1103 * Install vop_ops for our vnops 1104 */ 1105 vfs_add_vnodeops(mp, &nfsv2_vnode_vops, &mp->mnt_vn_norm_ops); 1106 vfs_add_vnodeops(mp, &nfsv2_spec_vops, &mp->mnt_vn_spec_ops); 1107 vfs_add_vnodeops(mp, &nfsv2_fifo_vops, &mp->mnt_vn_fifo_ops); 1108 1109 /* 1110 * A reference count is needed on the nfsnode representing the 1111 * remote root. If this object is not persistent, then backward 1112 * traversals of the mount point (i.e. "..") will not work if 1113 * the nfsnode gets flushed out of the cache. Ufs does not have 1114 * this problem, because one can identify root inodes by their 1115 * number == UFS_ROOTINO (2). 1116 */ 1117 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, NULL); 1118 if (error) 1119 goto bad; 1120 *vpp = NFSTOV(np); 1121 1122 /* 1123 * Retrieval of mountpoint attributes is delayed until nfs_rot 1124 * or nfs_statfs are first called. This will happen either when 1125 * we first traverse the mount point or if somebody does a df(1). 1126 * 1127 * NFSSTA_GOTFSINFO is used to flag if we have successfully 1128 * retrieved mountpoint attributes. In the case of NFSv3 we 1129 * also flag static fsinfo. 1130 */ 1131 if (*vpp != NULL) 1132 (*vpp)->v_type = VNON; 1133 1134 /* 1135 * Lose the lock but keep the ref. 1136 */ 1137 vn_unlock(*vpp); 1138 lwkt_gettoken(&nfs_token); 1139 TAILQ_INSERT_TAIL(&nfs_mountq, nmp, nm_entry); 1140 lwkt_reltoken(&nfs_token); 1141 1142 switch(ncpus) { 1143 case 0: 1144 case 1: 1145 rxcpu = 0; 1146 txcpu = 0; 1147 break; 1148 case 2: 1149 rxcpu = 0; 1150 txcpu = 1; 1151 break; 1152 default: 1153 rxcpu = -1; 1154 txcpu = -1; 1155 break; 1156 } 1157 1158 /* 1159 * Start the reader and writer threads. 1160 */ 1161 lwkt_create(nfssvc_iod_reader, nmp, &nmp->nm_rxthread, 1162 NULL, 0, rxcpu, "nfsiod_rx"); 1163 lwkt_create(nfssvc_iod_writer, nmp, &nmp->nm_txthread, 1164 NULL, 0, txcpu, "nfsiod_tx"); 1165 lwkt_reltoken(&nmp->nm_token); 1166 return (0); 1167 bad: 1168 nfs_disconnect(nmp); 1169 lwkt_reltoken(&nmp->nm_token); 1170 nfs_free_mount(nmp); 1171 return (error); 1172 } 1173 1174 /* 1175 * unmount system call 1176 */ 1177 static int 1178 nfs_unmount(struct mount *mp, int mntflags) 1179 { 1180 struct nfsmount *nmp; 1181 int error, flags = 0; 1182 1183 nmp = VFSTONFS(mp); 1184 lwkt_gettoken(&nmp->nm_token); 1185 if (mntflags & MNT_FORCE) { 1186 flags |= FORCECLOSE; 1187 nmp->nm_flag |= NFSMNT_FORCE; 1188 } 1189 1190 /* 1191 * Goes something like this.. 1192 * - Call vflush() to clear out vnodes for this file system 1193 * - Close the socket 1194 * - Free up the data structures 1195 */ 1196 /* In the forced case, cancel any outstanding requests. */ 1197 if (flags & FORCECLOSE) { 1198 error = nfs_nmcancelreqs(nmp); 1199 if (error) { 1200 kprintf("NFS: %s: Unable to cancel all requests\n", 1201 mp->mnt_stat.f_mntfromname); 1202 /* continue anyway */ 1203 } 1204 } 1205 1206 /* 1207 * Must handshake with nfs_clientd() if it is active. XXX 1208 */ 1209 nmp->nm_state |= NFSSTA_DISMINPROG; 1210 1211 /* 1212 * We hold 1 extra ref on the root vnode; see comment in mountnfs(). 1213 * 1214 * If this doesn't work and we are doing a forced unmount we continue 1215 * anyway. 1216 */ 1217 error = vflush(mp, 1, flags); 1218 if (error) { 1219 nmp->nm_state &= ~NFSSTA_DISMINPROG; 1220 if ((flags & FORCECLOSE) == 0) { 1221 lwkt_reltoken(&nmp->nm_token); 1222 return (error); 1223 } 1224 } 1225 1226 /* 1227 * We are now committed to the unmount. 1228 * For NQNFS, let the server daemon free the nfsmount structure. 1229 */ 1230 if (nmp->nm_flag & NFSMNT_KERB) 1231 nmp->nm_state |= NFSSTA_DISMNT; 1232 nfssvc_iod_stop1(nmp); 1233 nfs_disconnect(nmp); 1234 nfssvc_iod_stop2(nmp); 1235 1236 lwkt_gettoken(&nfs_token); 1237 TAILQ_REMOVE(&nfs_mountq, nmp, nm_entry); 1238 lwkt_reltoken(&nfs_token); 1239 1240 lwkt_reltoken(&nmp->nm_token); 1241 1242 if ((nmp->nm_flag & NFSMNT_KERB) == 0) { 1243 nfs_free_mount(nmp); 1244 } 1245 return (0); 1246 } 1247 1248 void 1249 nfs_free_mount(struct nfsmount *nmp) 1250 { 1251 if (nmp->nm_cred) { 1252 crfree(nmp->nm_cred); 1253 nmp->nm_cred = NULL; 1254 } 1255 if (nmp->nm_nam) { 1256 kfree(nmp->nm_nam, M_SONAME); 1257 nmp->nm_nam = NULL; 1258 } 1259 objcache_put(nfsmount_objcache, nmp); 1260 } 1261 1262 /* 1263 * Return root of a filesystem 1264 */ 1265 static int 1266 nfs_root(struct mount *mp, struct vnode **vpp) 1267 { 1268 struct vnode *vp; 1269 struct nfsmount *nmp; 1270 struct vattr attrs; 1271 struct nfsnode *np; 1272 int error; 1273 1274 nmp = VFSTONFS(mp); 1275 lwkt_gettoken(&nmp->nm_token); 1276 error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, NULL); 1277 if (error) { 1278 lwkt_reltoken(&nmp->nm_token); 1279 return (error); 1280 } 1281 vp = NFSTOV(np); 1282 1283 /* 1284 * Get transfer parameters and root vnode attributes 1285 * 1286 * NOTE: nfs_fsinfo() is expected to override the default 1287 * f_iosize we set. 1288 */ 1289 if ((nmp->nm_state & NFSSTA_GOTFSINFO) == 0) { 1290 if (nmp->nm_flag & NFSMNT_NFSV3) { 1291 mp->mnt_stat.f_iosize = nfs_iosize(1, nmp->nm_sotype); 1292 error = nfs_fsinfo(nmp, vp, curthread); 1293 } else { 1294 if ((error = VOP_GETATTR(vp, &attrs)) == 0) 1295 nmp->nm_state |= NFSSTA_GOTFSINFO; 1296 1297 } 1298 } else { 1299 /* 1300 * The root vnode is usually cached by the namecache so do not 1301 * try to avoid going over the wire even if we have previous 1302 * information cached. A stale NFS mount can loop 1303 * forever resolving the root vnode if we return no-error when 1304 * there is in fact an error. 1305 */ 1306 np->n_attrstamp = 0; 1307 error = VOP_GETATTR(vp, &attrs); 1308 } 1309 if (vp->v_type == VNON) 1310 nfs_setvtype(vp, VDIR); 1311 vsetflags(vp, VROOT); 1312 if (error) 1313 vput(vp); 1314 else 1315 *vpp = vp; 1316 lwkt_reltoken(&nmp->nm_token); 1317 return (error); 1318 } 1319 1320 struct scaninfo { 1321 int rescan; 1322 int waitfor; 1323 int allerror; 1324 }; 1325 1326 static int nfs_sync_scan2(struct mount *mp, struct vnode *vp, void *data); 1327 1328 /* 1329 * Flush out the buffer cache 1330 */ 1331 /* ARGSUSED */ 1332 static int 1333 nfs_sync(struct mount *mp, int waitfor) 1334 { 1335 struct nfsmount *nmp = VFSTONFS(mp); 1336 struct scaninfo scaninfo; 1337 int error; 1338 1339 scaninfo.rescan = 1; 1340 scaninfo.waitfor = waitfor; 1341 scaninfo.allerror = 0; 1342 1343 /* 1344 * Force stale buffer cache information to be flushed. 1345 */ 1346 lwkt_gettoken(&nmp->nm_token); 1347 error = 0; 1348 if ((waitfor & MNT_LAZY) == 0) { 1349 while (error == 0 && scaninfo.rescan) { 1350 scaninfo.rescan = 0; 1351 error = vsyncscan(mp, VMSC_GETVP, 1352 nfs_sync_scan2, &scaninfo); 1353 } 1354 } 1355 lwkt_reltoken(&nmp->nm_token); 1356 return(error); 1357 } 1358 1359 static int 1360 nfs_sync_scan2(struct mount *mp, struct vnode *vp, void *data) 1361 { 1362 struct scaninfo *info = data; 1363 int error; 1364 1365 if (vp->v_type == VNON || vp->v_type == VBAD) 1366 return(0); 1367 error = VOP_FSYNC(vp, info->waitfor, 0); 1368 if (error) 1369 info->allerror = error; 1370 return(0); 1371 } 1372 1373