1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2009 Rick Macklem, University of Guelph
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 */
29
30 #include <sys/cdefs.h>
31 #include "opt_inet.h"
32 #include "opt_inet6.h"
33 #include <sys/extattr.h>
34 #include <fs/nfs/nfsport.h>
35
36 int nfsrv_issuedelegs = 0;
37 int nfsrv_dolocallocks = 0;
38 struct nfsv4lock nfsv4rootfs_lock;
39 time_t nfsdev_time = 0;
40 int nfsrv_layouthashsize;
41 volatile int nfsrv_layoutcnt = 0;
42
43 NFSD_VNET_DEFINE(struct nfsrv_stablefirst, nfsrv_stablefirst);
44
45 NFSD_VNET_DECLARE(int, nfsrv_numnfsd);
46 NFSD_VNET_DECLARE(struct nfsstatsv1 *, nfsstatsv1_p);
47
48 extern uint32_t nfs_srvmaxio;
49 extern int nfsrv_lease;
50 extern struct timeval nfsboottime;
51 extern u_int32_t newnfs_true, newnfs_false;
52 extern struct mtx nfsrv_dslock_mtx;
53 extern struct mtx nfsrv_recalllock_mtx;
54 extern struct mtx nfsrv_dontlistlock_mtx;
55 extern int nfsd_debuglevel;
56 extern u_int nfsrv_dsdirsize;
57 extern struct nfsdevicehead nfsrv_devidhead;
58 extern int nfsrv_doflexfile;
59 extern int nfsrv_maxpnfsmirror;
60 NFSV4ROOTLOCKMUTEX;
61 NFSSTATESPINLOCK;
62 extern struct nfsdontlisthead nfsrv_dontlisthead;
63 extern volatile int nfsrv_devidcnt;
64 extern struct nfslayouthead nfsrv_recalllisthead;
65 extern char *nfsrv_zeropnfsdat;
66
67 SYSCTL_DECL(_vfs_nfsd);
68 int nfsrv_statehashsize = NFSSTATEHASHSIZE;
69 SYSCTL_INT(_vfs_nfsd, OID_AUTO, statehashsize, CTLFLAG_RDTUN,
70 &nfsrv_statehashsize, 0,
71 "Size of state hash table set via loader.conf");
72
73 int nfsrv_clienthashsize = NFSCLIENTHASHSIZE;
74 SYSCTL_INT(_vfs_nfsd, OID_AUTO, clienthashsize, CTLFLAG_RDTUN,
75 &nfsrv_clienthashsize, 0,
76 "Size of client hash table set via loader.conf");
77
78 int nfsrv_lockhashsize = NFSLOCKHASHSIZE;
79 SYSCTL_INT(_vfs_nfsd, OID_AUTO, fhhashsize, CTLFLAG_RDTUN,
80 &nfsrv_lockhashsize, 0,
81 "Size of file handle hash table set via loader.conf");
82
83 int nfsrv_sessionhashsize = NFSSESSIONHASHSIZE;
84 SYSCTL_INT(_vfs_nfsd, OID_AUTO, sessionhashsize, CTLFLAG_RDTUN,
85 &nfsrv_sessionhashsize, 0,
86 "Size of session hash table set via loader.conf");
87
88 int nfsrv_layouthighwater = NFSLAYOUTHIGHWATER;
89 SYSCTL_INT(_vfs_nfsd, OID_AUTO, layouthighwater, CTLFLAG_RDTUN,
90 &nfsrv_layouthighwater, 0,
91 "High water mark for number of layouts set via loader.conf");
92
93 static int nfsrv_v4statelimit = NFSRV_V4STATELIMIT;
94 SYSCTL_INT(_vfs_nfsd, OID_AUTO, v4statelimit, CTLFLAG_RWTUN,
95 &nfsrv_v4statelimit, 0,
96 "High water limit for NFSv4 opens+locks+delegations");
97
98 static int nfsrv_writedelegifpos = 0;
99 SYSCTL_INT(_vfs_nfsd, OID_AUTO, writedelegifpos, CTLFLAG_RW,
100 &nfsrv_writedelegifpos, 0,
101 "Issue a write delegation for read opens if possible");
102
103 static int nfsrv_allowreadforwriteopen = 1;
104 SYSCTL_INT(_vfs_nfsd, OID_AUTO, allowreadforwriteopen, CTLFLAG_RW,
105 &nfsrv_allowreadforwriteopen, 0,
106 "Allow Reads to be done with Write Access StateIDs");
107
108 int nfsrv_pnfsatime = 0;
109 SYSCTL_INT(_vfs_nfsd, OID_AUTO, pnfsstrictatime, CTLFLAG_RW,
110 &nfsrv_pnfsatime, 0,
111 "For pNFS service, do Getattr ops to keep atime up-to-date");
112
113 int nfsrv_flexlinuxhack = 0;
114 SYSCTL_INT(_vfs_nfsd, OID_AUTO, flexlinuxhack, CTLFLAG_RW,
115 &nfsrv_flexlinuxhack, 0,
116 "For Linux clients, hack around Flex File Layout bug");
117
118 /*
119 * Hash lists for nfs V4.
120 */
121 NFSD_VNET_DEFINE(struct nfsclienthashhead *, nfsclienthash);
122 NFSD_VNET_DEFINE(struct nfslockhashhead *, nfslockhash);
123 NFSD_VNET_DEFINE(struct nfssessionhash *, nfssessionhash);
124
125 struct nfslayouthash *nfslayouthash;
126 volatile int nfsrv_dontlistlen = 0;
127
128 static u_int32_t nfsrv_openpluslock = 0, nfsrv_delegatecnt = 0;
129 static int nfsrv_returnoldstateid = 0, nfsrv_clients = 0;
130 static int nfsrv_clienthighwater = NFSRV_CLIENTHIGHWATER;
131 static int nfsrv_nogsscallback = 0;
132 static volatile int nfsrv_writedelegcnt = 0;
133 static int nfsrv_faildscnt;
134
135 NFSD_VNET_DEFINE_STATIC(time_t, nfsrvboottime);
136
137 /* local functions */
138 static void nfsrv_dumpaclient(struct nfsclient *clp,
139 struct nfsd_dumpclients *dumpp);
140 static void nfsrv_freeopenowner(struct nfsstate *stp, int cansleep,
141 NFSPROC_T *p);
142 static int nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep,
143 NFSPROC_T *p);
144 static void nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep,
145 NFSPROC_T *p);
146 static void nfsrv_freeallnfslocks(struct nfsstate *stp, vnode_t vp,
147 int cansleep, NFSPROC_T *p);
148 static void nfsrv_freenfslock(struct nfslock *lop);
149 static void nfsrv_freenfslockfile(struct nfslockfile *lfp);
150 static void nfsrv_freedeleg(struct nfsstate *);
151 static int nfsrv_getstate(struct nfsclient *clp, nfsv4stateid_t *stateidp,
152 u_int32_t flags, struct nfsstate **stpp);
153 static void nfsrv_getowner(struct nfsstatehead *hp, struct nfsstate *new_stp,
154 struct nfsstate **stpp);
155 static int nfsrv_getlockfh(vnode_t vp, u_short flags,
156 struct nfslockfile *new_lfp, fhandle_t *nfhp, NFSPROC_T *p);
157 static int nfsrv_getlockfile(u_short flags, struct nfslockfile **new_lfpp,
158 struct nfslockfile **lfpp, fhandle_t *nfhp, int lockit);
159 static void nfsrv_insertlock(struct nfslock *new_lop,
160 struct nfslock *insert_lop, struct nfsstate *stp, struct nfslockfile *lfp);
161 static void nfsrv_updatelock(struct nfsstate *stp, struct nfslock **new_lopp,
162 struct nfslock **other_lopp, struct nfslockfile *lfp);
163 static int nfsrv_getipnumber(u_char *cp);
164 static int nfsrv_checkrestart(nfsquad_t clientid, u_int32_t flags,
165 nfsv4stateid_t *stateidp, int specialid);
166 static int nfsrv_checkgrace(struct nfsrv_descript *nd, struct nfsclient *clp,
167 u_int32_t flags);
168 static int nfsrv_docallback(struct nfsclient *clp, int procnum,
169 nfsv4stateid_t *stateidp, int trunc, fhandle_t *fhp,
170 struct nfsvattr *nap, nfsattrbit_t *attrbitp, int laytype, NFSPROC_T *p);
171 static int nfsrv_cbcallargs(struct nfsrv_descript *nd, struct nfsclient *clp,
172 uint32_t callback, int op, const char *optag, struct nfsdsession **sepp,
173 int *slotposp);
174 static u_int32_t nfsrv_nextclientindex(void);
175 static u_int32_t nfsrv_nextstateindex(struct nfsclient *clp);
176 static void nfsrv_markstable(struct nfsclient *clp);
177 static void nfsrv_markreclaim(struct nfsclient *clp);
178 static int nfsrv_checkstable(struct nfsclient *clp);
179 static int nfsrv_clientconflict(struct nfsclient *clp, int *haslockp, struct
180 vnode *vp, NFSPROC_T *p);
181 static int nfsrv_delegconflict(struct nfsstate *stp, int *haslockp,
182 NFSPROC_T *p, vnode_t vp);
183 static int nfsrv_cleandeleg(vnode_t vp, struct nfslockfile *lfp,
184 struct nfsclient *clp, int *haslockp, NFSPROC_T *p);
185 static int nfsrv_notsamecredname(int op, struct nfsrv_descript *nd,
186 struct nfsclient *clp);
187 static time_t nfsrv_leaseexpiry(void);
188 static void nfsrv_delaydelegtimeout(struct nfsstate *stp);
189 static int nfsrv_checkseqid(struct nfsrv_descript *nd, u_int32_t seqid,
190 struct nfsstate *stp, struct nfsrvcache *op);
191 static int nfsrv_nootherstate(struct nfsstate *stp);
192 static int nfsrv_locallock(vnode_t vp, struct nfslockfile *lfp, int flags,
193 uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p);
194 static void nfsrv_localunlock(vnode_t vp, struct nfslockfile *lfp,
195 uint64_t init_first, uint64_t init_end, NFSPROC_T *p);
196 static int nfsrv_dolocal(vnode_t vp, struct nfslockfile *lfp, int flags,
197 int oldflags, uint64_t first, uint64_t end, struct nfslockconflict *cfp,
198 NFSPROC_T *p);
199 static void nfsrv_locallock_rollback(vnode_t vp, struct nfslockfile *lfp,
200 NFSPROC_T *p);
201 static void nfsrv_locallock_commit(struct nfslockfile *lfp, int flags,
202 uint64_t first, uint64_t end);
203 static void nfsrv_locklf(struct nfslockfile *lfp);
204 static void nfsrv_unlocklf(struct nfslockfile *lfp);
205 static struct nfsdsession *nfsrv_findsession(uint8_t *sessionid);
206 static int nfsrv_freesession(struct nfsrv_descript *nd, struct nfsdsession *sep,
207 uint8_t *sessionid);
208 static int nfsv4_setcbsequence(struct nfsrv_descript *nd, struct nfsclient *clp,
209 int dont_replycache, struct nfsdsession **sepp, int *slotposp);
210 static int nfsv4_getcbsession(struct nfsclient *clp, struct nfsdsession **sepp);
211 static int nfsrv_addlayout(struct nfsrv_descript *nd, struct nfslayout **lypp,
212 nfsv4stateid_t *stateidp, char *layp, int *layoutlenp, NFSPROC_T *p);
213 static void nfsrv_freelayout(struct nfslayouthead *lhp, struct nfslayout *lyp);
214 static void nfsrv_freelayoutlist(nfsquad_t clientid);
215 static void nfsrv_freelayouts(nfsquad_t *clid, fsid_t *fs, int laytype,
216 int iomode);
217 static void nfsrv_freealllayouts(void);
218 static void nfsrv_freedevid(struct nfsdevice *ds);
219 static int nfsrv_setdsserver(char *dspathp, char *mdspathp, NFSPROC_T *p,
220 struct nfsdevice **dsp);
221 static void nfsrv_deleteds(struct nfsdevice *fndds);
222 static void nfsrv_allocdevid(struct nfsdevice *ds, char *addr, char *dnshost);
223 static void nfsrv_freealldevids(void);
224 static void nfsrv_flexlayouterr(struct nfsrv_descript *nd, uint32_t *layp,
225 int maxcnt, NFSPROC_T *p);
226 static int nfsrv_recalllayout(nfsquad_t clid, nfsv4stateid_t *stateidp,
227 fhandle_t *fhp, struct nfslayout *lyp, int changed, int laytype,
228 NFSPROC_T *p);
229 static int nfsrv_findlayout(nfsquad_t *clientidp, fhandle_t *fhp, int laytype,
230 NFSPROC_T *, struct nfslayout **lypp);
231 static int nfsrv_fndclid(nfsquad_t *clidvec, nfsquad_t clid, int clidcnt);
232 static struct nfslayout *nfsrv_filelayout(struct nfsrv_descript *nd, int iomode,
233 fhandle_t *fhp, fhandle_t *dsfhp, char *devid, fsid_t fs);
234 static struct nfslayout *nfsrv_flexlayout(struct nfsrv_descript *nd, int iomode,
235 int mirrorcnt, fhandle_t *fhp, fhandle_t *dsfhp, char *devid, fsid_t fs);
236 static int nfsrv_dontlayout(fhandle_t *fhp);
237 static int nfsrv_createdsfile(vnode_t vp, fhandle_t *fhp, struct pnfsdsfile *pf,
238 vnode_t dvp, struct nfsdevice *ds, struct ucred *cred, NFSPROC_T *p,
239 vnode_t *tvpp);
240 static struct nfsdevice *nfsrv_findmirroredds(struct nfsmount *nmp);
241 static int nfsrv_checkmachcred(int op, struct nfsrv_descript *nd,
242 struct nfsclient *clp);
243 static void nfsrv_issuedelegation(struct vnode *vp, struct nfsclient *clp,
244 struct nfsrv_descript *nd, int delegate, int writedeleg, int readonly,
245 u_quad_t filerev, uint64_t rdonly, struct nfsstate **new_delegp,
246 struct nfsstate *new_stp, struct nfslockfile *lfp, uint32_t *rflagsp,
247 nfsv4stateid_t *delegstateidp);
248
249 /*
250 * Scan the client list for a match and either return the current one,
251 * create a new entry or return an error.
252 * If returning a non-error, the clp structure must either be linked into
253 * the client list or free'd.
254 */
255 int
nfsrv_setclient(struct nfsrv_descript * nd,struct nfsclient ** new_clpp,nfsquad_t * clientidp,nfsquad_t * confirmp,NFSPROC_T * p)256 nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp,
257 nfsquad_t *clientidp, nfsquad_t *confirmp, NFSPROC_T *p)
258 {
259 struct nfsclient *clp = NULL, *new_clp = *new_clpp;
260 int i, error = 0, ret;
261 struct nfsstate *stp, *tstp;
262 #ifdef INET
263 struct sockaddr_in *sin, *rin;
264 #endif
265 #ifdef INET6
266 struct sockaddr_in6 *sin6, *rin6;
267 #endif
268 struct nfsdsession *sep, *nsep;
269 int zapit = 0, gotit, hasstate = 0, igotlock;
270 static u_int64_t confirm_index = 0;
271
272 /*
273 * Check for state resource limit exceeded.
274 */
275 if (nfsrv_openpluslock > nfsrv_v4statelimit) {
276 error = NFSERR_RESOURCE;
277 goto out;
278 }
279
280 if (nfsrv_issuedelegs == 0 ||
281 ((nd->nd_flag & ND_GSS) != 0 && nfsrv_nogsscallback != 0))
282 /*
283 * Don't do callbacks when delegations are disabled or
284 * for AUTH_GSS unless enabled via nfsrv_nogsscallback.
285 * If establishing a callback connection is attempted
286 * when a firewall is blocking the callback path, the
287 * server may wait too long for the connect attempt to
288 * succeed during the Open. Some clients, such as Linux,
289 * may timeout and give up on the Open before the server
290 * replies. Also, since AUTH_GSS callbacks are not
291 * yet interoperability tested, they might cause the
292 * server to crap out, if they get past the Init call to
293 * the client.
294 */
295 new_clp->lc_program = 0;
296
297 /* Lock out other nfsd threads */
298 NFSLOCKV4ROOTMUTEX();
299 nfsv4_relref(&nfsv4rootfs_lock);
300 do {
301 igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
302 NFSV4ROOTLOCKMUTEXPTR, NULL);
303 } while (!igotlock);
304 NFSUNLOCKV4ROOTMUTEX();
305
306 /*
307 * Search for a match in the client list.
308 */
309 gotit = i = 0;
310 while (i < nfsrv_clienthashsize && !gotit) {
311 LIST_FOREACH(clp, &NFSD_VNET(nfsclienthash)[i], lc_hash) {
312 if (new_clp->lc_idlen == clp->lc_idlen &&
313 !NFSBCMP(new_clp->lc_id, clp->lc_id, clp->lc_idlen)) {
314 gotit = 1;
315 break;
316 }
317 }
318 if (gotit == 0)
319 i++;
320 }
321 if (!gotit ||
322 (clp->lc_flags & (LCL_NEEDSCONFIRM | LCL_ADMINREVOKED))) {
323 if ((nd->nd_flag & ND_NFSV41) != 0 && confirmp->lval[1] != 0) {
324 /*
325 * For NFSv4.1, if confirmp->lval[1] is non-zero, the
326 * client is trying to update a confirmed clientid.
327 */
328 NFSLOCKV4ROOTMUTEX();
329 nfsv4_unlock(&nfsv4rootfs_lock, 1);
330 NFSUNLOCKV4ROOTMUTEX();
331 confirmp->lval[1] = 0;
332 error = NFSERR_NOENT;
333 goto out;
334 }
335 /*
336 * Get rid of the old one.
337 */
338 if (i != nfsrv_clienthashsize) {
339 LIST_REMOVE(clp, lc_hash);
340 nfsrv_cleanclient(clp, p);
341 nfsrv_freedeleglist(&clp->lc_deleg);
342 nfsrv_freedeleglist(&clp->lc_olddeleg);
343 zapit = 1;
344 }
345 /*
346 * Add it after assigning a client id to it.
347 */
348 new_clp->lc_flags |= LCL_NEEDSCONFIRM;
349 if ((nd->nd_flag & ND_NFSV41) != 0) {
350 confirmp->lval[0] = ++confirm_index;
351 new_clp->lc_confirm.lval[0] = confirmp->lval[0] - 1;
352 } else
353 confirmp->qval = new_clp->lc_confirm.qval =
354 ++confirm_index;
355 clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
356 NFSD_VNET(nfsrvboottime);
357 clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
358 nfsrv_nextclientindex();
359 new_clp->lc_stateindex = 0;
360 new_clp->lc_statemaxindex = 0;
361 new_clp->lc_prevsess = 0;
362 new_clp->lc_cbref = 0;
363 new_clp->lc_expiry = nfsrv_leaseexpiry();
364 LIST_INIT(&new_clp->lc_open);
365 LIST_INIT(&new_clp->lc_deleg);
366 LIST_INIT(&new_clp->lc_olddeleg);
367 LIST_INIT(&new_clp->lc_session);
368 for (i = 0; i < nfsrv_statehashsize; i++)
369 LIST_INIT(&new_clp->lc_stateid[i]);
370 LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
371 lc_hash);
372 NFSD_VNET(nfsstatsv1_p)->srvclients++;
373 nfsrv_openpluslock++;
374 nfsrv_clients++;
375 NFSLOCKV4ROOTMUTEX();
376 nfsv4_unlock(&nfsv4rootfs_lock, 1);
377 NFSUNLOCKV4ROOTMUTEX();
378 if (zapit)
379 nfsrv_zapclient(clp, p);
380 *new_clpp = NULL;
381 goto out;
382 }
383
384 /*
385 * Now, handle the cases where the id is already issued.
386 */
387 if (nfsrv_notsamecredname(NFSV4OP_EXCHANGEID, nd, clp)) {
388 /*
389 * Check to see if there is expired state that should go away.
390 */
391 if (clp->lc_expiry < NFSD_MONOSEC &&
392 (!LIST_EMPTY(&clp->lc_open) || !LIST_EMPTY(&clp->lc_deleg))) {
393 nfsrv_cleanclient(clp, p);
394 nfsrv_freedeleglist(&clp->lc_deleg);
395 }
396
397 /*
398 * If there is outstanding state, then reply NFSERR_CLIDINUSE per
399 * RFC3530 Sec. 8.1.2 last para.
400 */
401 if (!LIST_EMPTY(&clp->lc_deleg)) {
402 hasstate = 1;
403 } else if (LIST_EMPTY(&clp->lc_open)) {
404 hasstate = 0;
405 } else {
406 hasstate = 0;
407 /* Look for an Open on the OpenOwner */
408 LIST_FOREACH(stp, &clp->lc_open, ls_list) {
409 if (!LIST_EMPTY(&stp->ls_open)) {
410 hasstate = 1;
411 break;
412 }
413 }
414 }
415 if (hasstate) {
416 /*
417 * If the uid doesn't match, return NFSERR_CLIDINUSE after
418 * filling out the correct ipaddr and portnum.
419 */
420 switch (clp->lc_req.nr_nam->sa_family) {
421 #ifdef INET
422 case AF_INET:
423 sin = (struct sockaddr_in *)new_clp->lc_req.nr_nam;
424 rin = (struct sockaddr_in *)clp->lc_req.nr_nam;
425 sin->sin_addr.s_addr = rin->sin_addr.s_addr;
426 sin->sin_port = rin->sin_port;
427 break;
428 #endif
429 #ifdef INET6
430 case AF_INET6:
431 sin6 = (struct sockaddr_in6 *)new_clp->lc_req.nr_nam;
432 rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam;
433 sin6->sin6_addr = rin6->sin6_addr;
434 sin6->sin6_port = rin6->sin6_port;
435 break;
436 #endif
437 }
438 NFSLOCKV4ROOTMUTEX();
439 nfsv4_unlock(&nfsv4rootfs_lock, 1);
440 NFSUNLOCKV4ROOTMUTEX();
441 error = NFSERR_CLIDINUSE;
442 goto out;
443 }
444 }
445
446 if (NFSBCMP(new_clp->lc_verf, clp->lc_verf, NFSX_VERF)) {
447 /*
448 * If the verifier has changed, the client has rebooted
449 * and a new client id is issued. The old state info
450 * can be thrown away once the SetClientID_Confirm or
451 * Create_Session that confirms the clientid occurs.
452 */
453 LIST_REMOVE(clp, lc_hash);
454
455 /* Get rid of all sessions on this clientid. */
456 LIST_FOREACH_SAFE(sep, &clp->lc_session, sess_list, nsep) {
457 ret = nfsrv_freesession(NULL, sep, NULL);
458 if (ret != 0)
459 printf("nfsrv_setclient: verifier changed free"
460 " session failed=%d\n", ret);
461 }
462
463 new_clp->lc_flags |= LCL_NEEDSCONFIRM;
464 if ((nd->nd_flag & ND_NFSV41) != 0) {
465 confirmp->lval[0] = ++confirm_index;
466 new_clp->lc_confirm.lval[0] = confirmp->lval[0] - 1;
467 } else
468 confirmp->qval = new_clp->lc_confirm.qval =
469 ++confirm_index;
470 clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
471 NFSD_VNET(nfsrvboottime);
472 clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
473 nfsrv_nextclientindex();
474 new_clp->lc_stateindex = 0;
475 new_clp->lc_statemaxindex = 0;
476 new_clp->lc_prevsess = 0;
477 new_clp->lc_cbref = 0;
478 new_clp->lc_expiry = nfsrv_leaseexpiry();
479
480 /*
481 * Save the state until confirmed.
482 */
483 LIST_NEWHEAD(&new_clp->lc_open, &clp->lc_open, ls_list);
484 LIST_FOREACH(tstp, &new_clp->lc_open, ls_list)
485 tstp->ls_clp = new_clp;
486 LIST_NEWHEAD(&new_clp->lc_deleg, &clp->lc_deleg, ls_list);
487 LIST_FOREACH(tstp, &new_clp->lc_deleg, ls_list)
488 tstp->ls_clp = new_clp;
489 LIST_NEWHEAD(&new_clp->lc_olddeleg, &clp->lc_olddeleg,
490 ls_list);
491 LIST_FOREACH(tstp, &new_clp->lc_olddeleg, ls_list)
492 tstp->ls_clp = new_clp;
493 for (i = 0; i < nfsrv_statehashsize; i++) {
494 LIST_NEWHEAD(&new_clp->lc_stateid[i],
495 &clp->lc_stateid[i], ls_hash);
496 LIST_FOREACH(tstp, &new_clp->lc_stateid[i], ls_hash)
497 tstp->ls_clp = new_clp;
498 }
499 LIST_INIT(&new_clp->lc_session);
500 LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
501 lc_hash);
502 NFSD_VNET(nfsstatsv1_p)->srvclients++;
503 nfsrv_openpluslock++;
504 nfsrv_clients++;
505 NFSLOCKV4ROOTMUTEX();
506 nfsv4_unlock(&nfsv4rootfs_lock, 1);
507 NFSUNLOCKV4ROOTMUTEX();
508
509 /*
510 * Must wait until any outstanding callback on the old clp
511 * completes.
512 */
513 NFSLOCKSTATE();
514 while (clp->lc_cbref) {
515 clp->lc_flags |= LCL_WAKEUPWANTED;
516 (void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1,
517 "nfsd clp", 10 * hz);
518 }
519 NFSUNLOCKSTATE();
520 nfsrv_zapclient(clp, p);
521 *new_clpp = NULL;
522 goto out;
523 }
524
525 /* For NFSv4.1, mark that we found a confirmed clientid. */
526 if ((nd->nd_flag & ND_NFSV41) != 0) {
527 clientidp->lval[0] = clp->lc_clientid.lval[0];
528 clientidp->lval[1] = clp->lc_clientid.lval[1];
529 confirmp->lval[0] = 0; /* Ignored by client */
530 confirmp->lval[1] = 1;
531 } else {
532 /*
533 * id and verifier match, so update the net address info
534 * and get rid of any existing callback authentication
535 * handle, so a new one will be acquired.
536 */
537 LIST_REMOVE(clp, lc_hash);
538 new_clp->lc_flags |= (LCL_NEEDSCONFIRM | LCL_DONTCLEAN);
539 new_clp->lc_expiry = nfsrv_leaseexpiry();
540 confirmp->qval = new_clp->lc_confirm.qval = ++confirm_index;
541 clientidp->lval[0] = new_clp->lc_clientid.lval[0] =
542 clp->lc_clientid.lval[0];
543 clientidp->lval[1] = new_clp->lc_clientid.lval[1] =
544 clp->lc_clientid.lval[1];
545 new_clp->lc_delegtime = clp->lc_delegtime;
546 new_clp->lc_stateindex = clp->lc_stateindex;
547 new_clp->lc_statemaxindex = clp->lc_statemaxindex;
548 new_clp->lc_cbref = 0;
549 LIST_NEWHEAD(&new_clp->lc_open, &clp->lc_open, ls_list);
550 LIST_FOREACH(tstp, &new_clp->lc_open, ls_list)
551 tstp->ls_clp = new_clp;
552 LIST_NEWHEAD(&new_clp->lc_deleg, &clp->lc_deleg, ls_list);
553 LIST_FOREACH(tstp, &new_clp->lc_deleg, ls_list)
554 tstp->ls_clp = new_clp;
555 LIST_NEWHEAD(&new_clp->lc_olddeleg, &clp->lc_olddeleg, ls_list);
556 LIST_FOREACH(tstp, &new_clp->lc_olddeleg, ls_list)
557 tstp->ls_clp = new_clp;
558 for (i = 0; i < nfsrv_statehashsize; i++) {
559 LIST_NEWHEAD(&new_clp->lc_stateid[i],
560 &clp->lc_stateid[i], ls_hash);
561 LIST_FOREACH(tstp, &new_clp->lc_stateid[i], ls_hash)
562 tstp->ls_clp = new_clp;
563 }
564 LIST_INIT(&new_clp->lc_session);
565 LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp,
566 lc_hash);
567 NFSD_VNET(nfsstatsv1_p)->srvclients++;
568 nfsrv_openpluslock++;
569 nfsrv_clients++;
570 }
571 NFSLOCKV4ROOTMUTEX();
572 nfsv4_unlock(&nfsv4rootfs_lock, 1);
573 NFSUNLOCKV4ROOTMUTEX();
574
575 if ((nd->nd_flag & ND_NFSV41) == 0) {
576 /*
577 * Must wait until any outstanding callback on the old clp
578 * completes.
579 */
580 NFSLOCKSTATE();
581 while (clp->lc_cbref) {
582 clp->lc_flags |= LCL_WAKEUPWANTED;
583 (void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1,
584 "nfsdclp", 10 * hz);
585 }
586 NFSUNLOCKSTATE();
587 nfsrv_zapclient(clp, p);
588 *new_clpp = NULL;
589 }
590
591 out:
592 NFSEXITCODE2(error, nd);
593 return (error);
594 }
595
596 /*
597 * Check to see if the client id exists and optionally confirm it.
598 */
599 int
nfsrv_getclient(nfsquad_t clientid,int opflags,struct nfsclient ** clpp,struct nfsdsession * nsep,nfsquad_t confirm,uint32_t cbprogram,struct nfsrv_descript * nd,NFSPROC_T * p)600 nfsrv_getclient(nfsquad_t clientid, int opflags, struct nfsclient **clpp,
601 struct nfsdsession *nsep, nfsquad_t confirm, uint32_t cbprogram,
602 struct nfsrv_descript *nd, NFSPROC_T *p)
603 {
604 struct nfsclient *clp;
605 struct nfsstate *stp;
606 int i;
607 struct nfsclienthashhead *hp;
608 int error = 0, igotlock, doneok;
609 struct nfssessionhash *shp;
610 struct nfsdsession *sep;
611 uint64_t sessid[2];
612 bool sess_replay;
613 static uint64_t next_sess = 0;
614
615 if (clpp)
616 *clpp = NULL;
617 if ((nd == NULL || (nd->nd_flag & ND_NFSV41) == 0 ||
618 opflags != CLOPS_RENEW) && NFSD_VNET(nfsrvboottime) !=
619 clientid.lval[0]) {
620 error = NFSERR_STALECLIENTID;
621 goto out;
622 }
623
624 /*
625 * If called with opflags == CLOPS_RENEW, the State Lock is
626 * already held. Otherwise, we need to get either that or,
627 * for the case of Confirm, lock out the nfsd threads.
628 */
629 if (opflags & CLOPS_CONFIRM) {
630 NFSLOCKV4ROOTMUTEX();
631 nfsv4_relref(&nfsv4rootfs_lock);
632 do {
633 igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
634 NFSV4ROOTLOCKMUTEXPTR, NULL);
635 } while (!igotlock);
636 /*
637 * Create a new sessionid here, since we need to do it where
638 * there is a mutex held to serialize update of next_sess.
639 */
640 if ((nd->nd_flag & ND_NFSV41) != 0) {
641 sessid[0] = ++next_sess;
642 sessid[1] = clientid.qval;
643 }
644 NFSUNLOCKV4ROOTMUTEX();
645 } else if (opflags != CLOPS_RENEW) {
646 NFSLOCKSTATE();
647 }
648
649 /* For NFSv4.1, the clp is acquired from the associated session. */
650 if (nd != NULL && (nd->nd_flag & ND_NFSV41) != 0 &&
651 opflags == CLOPS_RENEW) {
652 clp = NULL;
653 if ((nd->nd_flag & ND_HASSEQUENCE) != 0) {
654 shp = NFSSESSIONHASH(nd->nd_sessionid);
655 NFSLOCKSESSION(shp);
656 sep = nfsrv_findsession(nd->nd_sessionid);
657 if (sep != NULL)
658 clp = sep->sess_clp;
659 NFSUNLOCKSESSION(shp);
660 }
661 } else {
662 hp = NFSCLIENTHASH(clientid);
663 LIST_FOREACH(clp, hp, lc_hash) {
664 if (clp->lc_clientid.lval[1] == clientid.lval[1])
665 break;
666 }
667 }
668 if (clp == NULL) {
669 if (opflags & CLOPS_CONFIRM)
670 error = NFSERR_STALECLIENTID;
671 else
672 error = NFSERR_EXPIRED;
673 } else if (clp->lc_flags & LCL_ADMINREVOKED) {
674 /*
675 * If marked admin revoked, just return the error.
676 */
677 error = NFSERR_ADMINREVOKED;
678 }
679 if (error) {
680 if (opflags & CLOPS_CONFIRM) {
681 NFSLOCKV4ROOTMUTEX();
682 nfsv4_unlock(&nfsv4rootfs_lock, 1);
683 NFSUNLOCKV4ROOTMUTEX();
684 } else if (opflags != CLOPS_RENEW) {
685 NFSUNLOCKSTATE();
686 }
687 goto out;
688 }
689
690 /*
691 * Perform any operations specified by the opflags.
692 */
693 if (opflags & CLOPS_CONFIRM) {
694 sess_replay = false;
695 if ((nd->nd_flag & ND_NFSV41) != 0) {
696 /*
697 * For the case where lc_confirm.lval[0] == confirm.lval[0],
698 * use the new session, but with the previous sessionid.
699 * This is not exactly what the RFC describes, but should
700 * result in the same reply as the previous CreateSession.
701 */
702 if (clp->lc_confirm.lval[0] + 1 == confirm.lval[0]) {
703 clp->lc_confirm.lval[0] = confirm.lval[0];
704 clp->lc_prevsess = sessid[0];
705 } else if (clp->lc_confirm.lval[0] == confirm.lval[0]) {
706 if (clp->lc_prevsess == 0)
707 error = NFSERR_SEQMISORDERED;
708 else
709 sessid[0] = clp->lc_prevsess;
710 sess_replay = true;
711 } else
712 error = NFSERR_SEQMISORDERED;
713 } else if ((nd->nd_flag & ND_NFSV41) == 0 &&
714 clp->lc_confirm.qval != confirm.qval)
715 error = NFSERR_STALECLIENTID;
716 if (error == 0 && nfsrv_notsamecredname(NFSV4OP_CREATESESSION,
717 nd, clp))
718 error = NFSERR_CLIDINUSE;
719
720 if (!error) {
721 if ((clp->lc_flags & (LCL_NEEDSCONFIRM | LCL_DONTCLEAN)) ==
722 LCL_NEEDSCONFIRM) {
723 /*
724 * Hang onto the delegations (as old delegations)
725 * for an Open with CLAIM_DELEGATE_PREV unless in
726 * grace, but get rid of the rest of the state.
727 */
728 nfsrv_cleanclient(clp, p);
729 nfsrv_freedeleglist(&clp->lc_olddeleg);
730 if (nfsrv_checkgrace(nd, clp, 0)) {
731 /* In grace, so just delete delegations */
732 nfsrv_freedeleglist(&clp->lc_deleg);
733 } else {
734 LIST_FOREACH(stp, &clp->lc_deleg, ls_list)
735 stp->ls_flags |= NFSLCK_OLDDELEG;
736 clp->lc_delegtime = NFSD_MONOSEC +
737 nfsrv_lease + NFSRV_LEASEDELTA;
738 LIST_NEWHEAD(&clp->lc_olddeleg, &clp->lc_deleg,
739 ls_list);
740 }
741 if ((nd->nd_flag & ND_NFSV41) != 0)
742 clp->lc_program = cbprogram;
743 }
744 clp->lc_flags &= ~(LCL_NEEDSCONFIRM | LCL_DONTCLEAN);
745 if (clp->lc_program)
746 clp->lc_flags |= LCL_NEEDSCBNULL;
747 /* For NFSv4.1, link the session onto the client. */
748 if (nsep != NULL) {
749 /* Hold a reference on the xprt for a backchannel. */
750 if ((nsep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN)
751 != 0 && !sess_replay) {
752 if (clp->lc_req.nr_client == NULL)
753 clp->lc_req.nr_client = (struct __rpc_client *)
754 clnt_bck_create(nd->nd_xprt->xp_socket,
755 cbprogram, NFSV4_CBVERS);
756 if (clp->lc_req.nr_client != NULL) {
757 SVC_ACQUIRE(nd->nd_xprt);
758 CLNT_ACQUIRE(clp->lc_req.nr_client);
759 nd->nd_xprt->xp_p2 = clp->lc_req.nr_client;
760 /* Disable idle timeout. */
761 nd->nd_xprt->xp_idletimeout = 0;
762 nsep->sess_cbsess.nfsess_xprt = nd->nd_xprt;
763 } else
764 nsep->sess_crflags &= ~NFSV4CRSESS_CONNBACKCHAN;
765 }
766 NFSBCOPY(sessid, nsep->sess_sessionid,
767 NFSX_V4SESSIONID);
768 NFSBCOPY(sessid, nsep->sess_cbsess.nfsess_sessionid,
769 NFSX_V4SESSIONID);
770 if (!sess_replay) {
771 shp = NFSSESSIONHASH(nsep->sess_sessionid);
772 NFSLOCKSTATE();
773 NFSLOCKSESSION(shp);
774 LIST_INSERT_HEAD(&shp->list, nsep, sess_hash);
775 LIST_INSERT_HEAD(&clp->lc_session, nsep, sess_list);
776 nsep->sess_clp = clp;
777 NFSUNLOCKSESSION(shp);
778 NFSUNLOCKSTATE();
779 }
780 }
781 }
782 } else if (clp->lc_flags & LCL_NEEDSCONFIRM) {
783 error = NFSERR_EXPIRED;
784 }
785
786 /*
787 * If called by the Renew Op, we must check the principal.
788 */
789 if (!error && (opflags & CLOPS_RENEWOP)) {
790 if (nfsrv_notsamecredname(0, nd, clp)) {
791 doneok = 0;
792 for (i = 0; i < nfsrv_statehashsize && doneok == 0; i++) {
793 LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
794 if ((stp->ls_flags & NFSLCK_OPEN) &&
795 stp->ls_uid == nd->nd_cred->cr_uid) {
796 doneok = 1;
797 break;
798 }
799 }
800 }
801 if (!doneok)
802 error = NFSERR_ACCES;
803 }
804 if (!error && (clp->lc_flags & LCL_CBDOWN))
805 error = NFSERR_CBPATHDOWN;
806 }
807 if ((!error || error == NFSERR_CBPATHDOWN) &&
808 (opflags & CLOPS_RENEW)) {
809 clp->lc_expiry = nfsrv_leaseexpiry();
810 }
811 if (opflags & CLOPS_CONFIRM) {
812 NFSLOCKV4ROOTMUTEX();
813 nfsv4_unlock(&nfsv4rootfs_lock, 1);
814 NFSUNLOCKV4ROOTMUTEX();
815 } else if (opflags != CLOPS_RENEW) {
816 NFSUNLOCKSTATE();
817 }
818 if (clpp)
819 *clpp = clp;
820
821 out:
822 NFSEXITCODE2(error, nd);
823 return (error);
824 }
825
826 /*
827 * Perform the NFSv4.1 destroy clientid.
828 */
829 int
nfsrv_destroyclient(struct nfsrv_descript * nd,nfsquad_t clientid,NFSPROC_T * p)830 nfsrv_destroyclient(struct nfsrv_descript *nd, nfsquad_t clientid, NFSPROC_T *p)
831 {
832 struct nfsclient *clp;
833 struct nfsclienthashhead *hp;
834 int error = 0, i, igotlock;
835
836 if (NFSD_VNET(nfsrvboottime) != clientid.lval[0]) {
837 error = NFSERR_STALECLIENTID;
838 goto out;
839 }
840
841 /* Lock out other nfsd threads */
842 NFSLOCKV4ROOTMUTEX();
843 nfsv4_relref(&nfsv4rootfs_lock);
844 do {
845 igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
846 NFSV4ROOTLOCKMUTEXPTR, NULL);
847 } while (igotlock == 0);
848 NFSUNLOCKV4ROOTMUTEX();
849
850 hp = NFSCLIENTHASH(clientid);
851 LIST_FOREACH(clp, hp, lc_hash) {
852 if (clp->lc_clientid.lval[1] == clientid.lval[1])
853 break;
854 }
855 if (clp == NULL) {
856 NFSLOCKV4ROOTMUTEX();
857 nfsv4_unlock(&nfsv4rootfs_lock, 1);
858 NFSUNLOCKV4ROOTMUTEX();
859 /* Just return ok, since it is gone. */
860 goto out;
861 }
862
863 /* Check for the SP4_MACH_CRED case. */
864 error = nfsrv_checkmachcred(NFSV4OP_DESTROYCLIENTID, nd, clp);
865 if (error != 0) {
866 NFSLOCKV4ROOTMUTEX();
867 nfsv4_unlock(&nfsv4rootfs_lock, 1);
868 NFSUNLOCKV4ROOTMUTEX();
869 goto out;
870 }
871
872 /*
873 * Free up all layouts on the clientid. Should the client return the
874 * layouts?
875 */
876 nfsrv_freelayoutlist(clientid);
877
878 /* Scan for state on the clientid. */
879 for (i = 0; i < nfsrv_statehashsize; i++)
880 if (!LIST_EMPTY(&clp->lc_stateid[i])) {
881 NFSLOCKV4ROOTMUTEX();
882 nfsv4_unlock(&nfsv4rootfs_lock, 1);
883 NFSUNLOCKV4ROOTMUTEX();
884 error = NFSERR_CLIENTIDBUSY;
885 goto out;
886 }
887 if (!LIST_EMPTY(&clp->lc_session) || !LIST_EMPTY(&clp->lc_deleg)) {
888 NFSLOCKV4ROOTMUTEX();
889 nfsv4_unlock(&nfsv4rootfs_lock, 1);
890 NFSUNLOCKV4ROOTMUTEX();
891 error = NFSERR_CLIENTIDBUSY;
892 goto out;
893 }
894
895 /* Destroy the clientid and return ok. */
896 nfsrv_cleanclient(clp, p);
897 nfsrv_freedeleglist(&clp->lc_deleg);
898 nfsrv_freedeleglist(&clp->lc_olddeleg);
899 LIST_REMOVE(clp, lc_hash);
900 NFSLOCKV4ROOTMUTEX();
901 nfsv4_unlock(&nfsv4rootfs_lock, 1);
902 NFSUNLOCKV4ROOTMUTEX();
903 nfsrv_zapclient(clp, p);
904 out:
905 NFSEXITCODE2(error, nd);
906 return (error);
907 }
908
909 /*
910 * Called from the new nfssvc syscall to admin revoke a clientid.
911 * Returns 0 for success, error otherwise.
912 */
913 int
nfsrv_adminrevoke(struct nfsd_clid * revokep,NFSPROC_T * p)914 nfsrv_adminrevoke(struct nfsd_clid *revokep, NFSPROC_T *p)
915 {
916 struct nfsclient *clp = NULL;
917 int i, error = 0;
918 int gotit, igotlock;
919
920 /*
921 * First, lock out the nfsd so that state won't change while the
922 * revocation record is being written to the stable storage restart
923 * file.
924 */
925 NFSLOCKV4ROOTMUTEX();
926 do {
927 igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
928 NFSV4ROOTLOCKMUTEXPTR, NULL);
929 } while (!igotlock);
930 NFSUNLOCKV4ROOTMUTEX();
931
932 /*
933 * Search for a match in the client list.
934 */
935 gotit = i = 0;
936 while (i < nfsrv_clienthashsize && !gotit) {
937 LIST_FOREACH(clp, &NFSD_VNET(nfsclienthash)[i], lc_hash) {
938 if (revokep->nclid_idlen == clp->lc_idlen &&
939 !NFSBCMP(revokep->nclid_id, clp->lc_id, clp->lc_idlen)) {
940 gotit = 1;
941 break;
942 }
943 }
944 i++;
945 }
946 if (!gotit) {
947 NFSLOCKV4ROOTMUTEX();
948 nfsv4_unlock(&nfsv4rootfs_lock, 0);
949 NFSUNLOCKV4ROOTMUTEX();
950 error = EPERM;
951 goto out;
952 }
953
954 /*
955 * Now, write out the revocation record
956 */
957 nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
958 nfsrv_backupstable();
959
960 /*
961 * and clear out the state, marking the clientid revoked.
962 */
963 clp->lc_flags &= ~LCL_CALLBACKSON;
964 clp->lc_flags |= LCL_ADMINREVOKED;
965 nfsrv_cleanclient(clp, p);
966 nfsrv_freedeleglist(&clp->lc_deleg);
967 nfsrv_freedeleglist(&clp->lc_olddeleg);
968 NFSLOCKV4ROOTMUTEX();
969 nfsv4_unlock(&nfsv4rootfs_lock, 0);
970 NFSUNLOCKV4ROOTMUTEX();
971
972 out:
973 NFSEXITCODE(error);
974 return (error);
975 }
976
977 /*
978 * Dump out stats for all clients. Called from nfssvc(2), that is used
979 * nfsstatsv1.
980 */
981 void
nfsrv_dumpclients(struct nfsd_dumpclients * dumpp,int maxcnt)982 nfsrv_dumpclients(struct nfsd_dumpclients *dumpp, int maxcnt)
983 {
984 struct nfsclient *clp;
985 int i = 0, cnt = 0;
986
987 /*
988 * First, get a reference on the nfsv4rootfs_lock so that an
989 * exclusive lock cannot be acquired while dumping the clients.
990 */
991 NFSLOCKV4ROOTMUTEX();
992 nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
993 NFSUNLOCKV4ROOTMUTEX();
994 NFSLOCKSTATE();
995 /*
996 * Rattle through the client lists until done.
997 */
998 while (i < nfsrv_clienthashsize && cnt < maxcnt) {
999 clp = LIST_FIRST(&NFSD_VNET(nfsclienthash)[i]);
1000 while (clp != LIST_END(&NFSD_VNET(nfsclienthash)[i]) && cnt <
1001 maxcnt) {
1002 nfsrv_dumpaclient(clp, &dumpp[cnt]);
1003 cnt++;
1004 clp = LIST_NEXT(clp, lc_hash);
1005 }
1006 i++;
1007 }
1008 if (cnt < maxcnt)
1009 dumpp[cnt].ndcl_clid.nclid_idlen = 0;
1010 NFSUNLOCKSTATE();
1011 NFSLOCKV4ROOTMUTEX();
1012 nfsv4_relref(&nfsv4rootfs_lock);
1013 NFSUNLOCKV4ROOTMUTEX();
1014 }
1015
1016 /*
1017 * Dump stats for a client. Must be called with the NFSSTATELOCK and spl'd.
1018 */
1019 static void
nfsrv_dumpaclient(struct nfsclient * clp,struct nfsd_dumpclients * dumpp)1020 nfsrv_dumpaclient(struct nfsclient *clp, struct nfsd_dumpclients *dumpp)
1021 {
1022 struct nfsstate *stp, *openstp, *lckownstp;
1023 struct nfslock *lop;
1024 sa_family_t af;
1025 #ifdef INET
1026 struct sockaddr_in *rin;
1027 #endif
1028 #ifdef INET6
1029 struct sockaddr_in6 *rin6;
1030 #endif
1031
1032 dumpp->ndcl_nopenowners = dumpp->ndcl_nlockowners = 0;
1033 dumpp->ndcl_nopens = dumpp->ndcl_nlocks = 0;
1034 dumpp->ndcl_ndelegs = dumpp->ndcl_nolddelegs = 0;
1035 dumpp->ndcl_flags = clp->lc_flags;
1036 dumpp->ndcl_clid.nclid_idlen = clp->lc_idlen;
1037 NFSBCOPY(clp->lc_id, dumpp->ndcl_clid.nclid_id, clp->lc_idlen);
1038 af = clp->lc_req.nr_nam->sa_family;
1039 dumpp->ndcl_addrfam = af;
1040 switch (af) {
1041 #ifdef INET
1042 case AF_INET:
1043 rin = (struct sockaddr_in *)clp->lc_req.nr_nam;
1044 dumpp->ndcl_cbaddr.sin_addr = rin->sin_addr;
1045 break;
1046 #endif
1047 #ifdef INET6
1048 case AF_INET6:
1049 rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam;
1050 dumpp->ndcl_cbaddr.sin6_addr = rin6->sin6_addr;
1051 break;
1052 #endif
1053 }
1054
1055 /*
1056 * Now, scan the state lists and total up the opens and locks.
1057 */
1058 LIST_FOREACH(stp, &clp->lc_open, ls_list) {
1059 dumpp->ndcl_nopenowners++;
1060 LIST_FOREACH(openstp, &stp->ls_open, ls_list) {
1061 dumpp->ndcl_nopens++;
1062 LIST_FOREACH(lckownstp, &openstp->ls_open, ls_list) {
1063 dumpp->ndcl_nlockowners++;
1064 LIST_FOREACH(lop, &lckownstp->ls_lock, lo_lckowner) {
1065 dumpp->ndcl_nlocks++;
1066 }
1067 }
1068 }
1069 }
1070
1071 /*
1072 * and the delegation lists.
1073 */
1074 LIST_FOREACH(stp, &clp->lc_deleg, ls_list) {
1075 dumpp->ndcl_ndelegs++;
1076 }
1077 LIST_FOREACH(stp, &clp->lc_olddeleg, ls_list) {
1078 dumpp->ndcl_nolddelegs++;
1079 }
1080 }
1081
1082 /*
1083 * Dump out lock stats for a file.
1084 */
1085 void
nfsrv_dumplocks(vnode_t vp,struct nfsd_dumplocks * ldumpp,int maxcnt,NFSPROC_T * p)1086 nfsrv_dumplocks(vnode_t vp, struct nfsd_dumplocks *ldumpp, int maxcnt,
1087 NFSPROC_T *p)
1088 {
1089 struct nfsstate *stp;
1090 struct nfslock *lop;
1091 int cnt = 0;
1092 struct nfslockfile *lfp;
1093 sa_family_t af;
1094 #ifdef INET
1095 struct sockaddr_in *rin;
1096 #endif
1097 #ifdef INET6
1098 struct sockaddr_in6 *rin6;
1099 #endif
1100 int ret;
1101 fhandle_t nfh;
1102
1103 ret = nfsrv_getlockfh(vp, 0, NULL, &nfh, p);
1104 /*
1105 * First, get a reference on the nfsv4rootfs_lock so that an
1106 * exclusive lock on it cannot be acquired while dumping the locks.
1107 */
1108 NFSLOCKV4ROOTMUTEX();
1109 nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
1110 NFSUNLOCKV4ROOTMUTEX();
1111 NFSLOCKSTATE();
1112 if (!ret)
1113 ret = nfsrv_getlockfile(0, NULL, &lfp, &nfh, 0);
1114 if (ret) {
1115 ldumpp[0].ndlck_clid.nclid_idlen = 0;
1116 NFSUNLOCKSTATE();
1117 NFSLOCKV4ROOTMUTEX();
1118 nfsv4_relref(&nfsv4rootfs_lock);
1119 NFSUNLOCKV4ROOTMUTEX();
1120 return;
1121 }
1122
1123 /*
1124 * For each open share on file, dump it out.
1125 */
1126 stp = LIST_FIRST(&lfp->lf_open);
1127 while (stp != LIST_END(&lfp->lf_open) && cnt < maxcnt) {
1128 ldumpp[cnt].ndlck_flags = stp->ls_flags;
1129 ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
1130 ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
1131 ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
1132 ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
1133 ldumpp[cnt].ndlck_owner.nclid_idlen =
1134 stp->ls_openowner->ls_ownerlen;
1135 NFSBCOPY(stp->ls_openowner->ls_owner,
1136 ldumpp[cnt].ndlck_owner.nclid_id,
1137 stp->ls_openowner->ls_ownerlen);
1138 ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
1139 NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
1140 stp->ls_clp->lc_idlen);
1141 af = stp->ls_clp->lc_req.nr_nam->sa_family;
1142 ldumpp[cnt].ndlck_addrfam = af;
1143 switch (af) {
1144 #ifdef INET
1145 case AF_INET:
1146 rin = (struct sockaddr_in *)stp->ls_clp->lc_req.nr_nam;
1147 ldumpp[cnt].ndlck_cbaddr.sin_addr = rin->sin_addr;
1148 break;
1149 #endif
1150 #ifdef INET6
1151 case AF_INET6:
1152 rin6 = (struct sockaddr_in6 *)
1153 stp->ls_clp->lc_req.nr_nam;
1154 ldumpp[cnt].ndlck_cbaddr.sin6_addr = rin6->sin6_addr;
1155 break;
1156 #endif
1157 }
1158 stp = LIST_NEXT(stp, ls_file);
1159 cnt++;
1160 }
1161
1162 /*
1163 * and all locks.
1164 */
1165 lop = LIST_FIRST(&lfp->lf_lock);
1166 while (lop != LIST_END(&lfp->lf_lock) && cnt < maxcnt) {
1167 stp = lop->lo_stp;
1168 ldumpp[cnt].ndlck_flags = lop->lo_flags;
1169 ldumpp[cnt].ndlck_first = lop->lo_first;
1170 ldumpp[cnt].ndlck_end = lop->lo_end;
1171 ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
1172 ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
1173 ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
1174 ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
1175 ldumpp[cnt].ndlck_owner.nclid_idlen = stp->ls_ownerlen;
1176 NFSBCOPY(stp->ls_owner, ldumpp[cnt].ndlck_owner.nclid_id,
1177 stp->ls_ownerlen);
1178 ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
1179 NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
1180 stp->ls_clp->lc_idlen);
1181 af = stp->ls_clp->lc_req.nr_nam->sa_family;
1182 ldumpp[cnt].ndlck_addrfam = af;
1183 switch (af) {
1184 #ifdef INET
1185 case AF_INET:
1186 rin = (struct sockaddr_in *)stp->ls_clp->lc_req.nr_nam;
1187 ldumpp[cnt].ndlck_cbaddr.sin_addr = rin->sin_addr;
1188 break;
1189 #endif
1190 #ifdef INET6
1191 case AF_INET6:
1192 rin6 = (struct sockaddr_in6 *)
1193 stp->ls_clp->lc_req.nr_nam;
1194 ldumpp[cnt].ndlck_cbaddr.sin6_addr = rin6->sin6_addr;
1195 break;
1196 #endif
1197 }
1198 lop = LIST_NEXT(lop, lo_lckfile);
1199 cnt++;
1200 }
1201
1202 /*
1203 * and the delegations.
1204 */
1205 stp = LIST_FIRST(&lfp->lf_deleg);
1206 while (stp != LIST_END(&lfp->lf_deleg) && cnt < maxcnt) {
1207 ldumpp[cnt].ndlck_flags = stp->ls_flags;
1208 ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid;
1209 ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0];
1210 ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1];
1211 ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2];
1212 ldumpp[cnt].ndlck_owner.nclid_idlen = 0;
1213 ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen;
1214 NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id,
1215 stp->ls_clp->lc_idlen);
1216 af = stp->ls_clp->lc_req.nr_nam->sa_family;
1217 ldumpp[cnt].ndlck_addrfam = af;
1218 switch (af) {
1219 #ifdef INET
1220 case AF_INET:
1221 rin = (struct sockaddr_in *)stp->ls_clp->lc_req.nr_nam;
1222 ldumpp[cnt].ndlck_cbaddr.sin_addr = rin->sin_addr;
1223 break;
1224 #endif
1225 #ifdef INET6
1226 case AF_INET6:
1227 rin6 = (struct sockaddr_in6 *)
1228 stp->ls_clp->lc_req.nr_nam;
1229 ldumpp[cnt].ndlck_cbaddr.sin6_addr = rin6->sin6_addr;
1230 break;
1231 #endif
1232 }
1233 stp = LIST_NEXT(stp, ls_file);
1234 cnt++;
1235 }
1236
1237 /*
1238 * If list isn't full, mark end of list by setting the client name
1239 * to zero length.
1240 */
1241 if (cnt < maxcnt)
1242 ldumpp[cnt].ndlck_clid.nclid_idlen = 0;
1243 NFSUNLOCKSTATE();
1244 NFSLOCKV4ROOTMUTEX();
1245 nfsv4_relref(&nfsv4rootfs_lock);
1246 NFSUNLOCKV4ROOTMUTEX();
1247 }
1248
1249 /*
1250 * Server timer routine. It can scan any linked list, so long
1251 * as it holds the spin/mutex lock and there is no exclusive lock on
1252 * nfsv4rootfs_lock.
1253 * (For OpenBSD, a kthread is ok. For FreeBSD, I think it is ok
1254 * to do this from a callout, since the spin locks work. For
1255 * Darwin, I'm not sure what will work correctly yet.)
1256 * Should be called once per second.
1257 */
1258 void
nfsrv_servertimer(void * arg __unused)1259 nfsrv_servertimer(void *arg __unused)
1260 {
1261 struct nfsclient *clp, *nclp;
1262 struct nfsstate *stp, *nstp;
1263 int got_ref, i;
1264
1265 /*
1266 * Make sure nfsboottime is set. This is used by V3 as well
1267 * as V4. Note that nfsboottime is not nfsrvboottime, which is
1268 * only used by the V4 server for leases.
1269 */
1270 if (nfsboottime.tv_sec == 0)
1271 NFSSETBOOTTIME(nfsboottime);
1272
1273 /*
1274 * If server hasn't started yet, just return.
1275 */
1276 NFSLOCKSTATE();
1277 if (NFSD_VNET(nfsrv_stablefirst).nsf_eograce == 0) {
1278 NFSUNLOCKSTATE();
1279 return;
1280 }
1281 if (!(NFSD_VNET(nfsrv_stablefirst).nsf_flags & NFSNSF_UPDATEDONE)) {
1282 if (!(NFSD_VNET(nfsrv_stablefirst).nsf_flags &
1283 NFSNSF_GRACEOVER) &&
1284 NFSD_MONOSEC > NFSD_VNET(nfsrv_stablefirst).nsf_eograce)
1285 NFSD_VNET(nfsrv_stablefirst).nsf_flags |=
1286 (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK);
1287 NFSUNLOCKSTATE();
1288 return;
1289 }
1290
1291 /*
1292 * Try and get a reference count on the nfsv4rootfs_lock so that
1293 * no nfsd thread can acquire an exclusive lock on it before this
1294 * call is done. If it is already exclusively locked, just return.
1295 */
1296 NFSLOCKV4ROOTMUTEX();
1297 got_ref = nfsv4_getref_nonblock(&nfsv4rootfs_lock);
1298 NFSUNLOCKV4ROOTMUTEX();
1299 if (got_ref == 0) {
1300 NFSUNLOCKSTATE();
1301 return;
1302 }
1303
1304 /*
1305 * For each client...
1306 */
1307 for (i = 0; i < nfsrv_clienthashsize; i++) {
1308 clp = LIST_FIRST(&NFSD_VNET(nfsclienthash)[i]);
1309 while (clp != LIST_END(&NFSD_VNET(nfsclienthash)[i])) {
1310 nclp = LIST_NEXT(clp, lc_hash);
1311 if (!(clp->lc_flags & LCL_EXPIREIT)) {
1312 if (((clp->lc_expiry + NFSRV_STALELEASE) < NFSD_MONOSEC
1313 && ((LIST_EMPTY(&clp->lc_deleg)
1314 && LIST_EMPTY(&clp->lc_open)) ||
1315 nfsrv_clients > nfsrv_clienthighwater)) ||
1316 (clp->lc_expiry + NFSRV_MOULDYLEASE) < NFSD_MONOSEC ||
1317 (clp->lc_expiry < NFSD_MONOSEC &&
1318 (nfsrv_openpluslock * 10 / 9) > nfsrv_v4statelimit)) {
1319 /*
1320 * Lease has expired several nfsrv_lease times ago:
1321 * PLUS
1322 * - no state is associated with it
1323 * OR
1324 * - above high water mark for number of clients
1325 * (nfsrv_clienthighwater should be large enough
1326 * that this only occurs when clients fail to
1327 * use the same nfs_client_id4.id. Maybe somewhat
1328 * higher that the maximum number of clients that
1329 * will mount this server?)
1330 * OR
1331 * Lease has expired a very long time ago
1332 * OR
1333 * Lease has expired PLUS the number of opens + locks
1334 * has exceeded 90% of capacity
1335 *
1336 * --> Mark for expiry. The actual expiry will be done
1337 * by an nfsd sometime soon.
1338 */
1339 clp->lc_flags |= LCL_EXPIREIT;
1340 NFSD_VNET(nfsrv_stablefirst).nsf_flags |=
1341 (NFSNSF_NEEDLOCK | NFSNSF_EXPIREDCLIENT);
1342 } else {
1343 /*
1344 * If there are no opens, increment no open tick cnt
1345 * If time exceeds NFSNOOPEN, mark it to be thrown away
1346 * otherwise, if there is an open, reset no open time
1347 * Hopefully, this will avoid excessive re-creation
1348 * of open owners and subsequent open confirms.
1349 */
1350 stp = LIST_FIRST(&clp->lc_open);
1351 while (stp != LIST_END(&clp->lc_open)) {
1352 nstp = LIST_NEXT(stp, ls_list);
1353 if (LIST_EMPTY(&stp->ls_open)) {
1354 stp->ls_noopens++;
1355 if (stp->ls_noopens > NFSNOOPEN ||
1356 (nfsrv_openpluslock * 2) >
1357 nfsrv_v4statelimit)
1358 NFSD_VNET(nfsrv_stablefirst).nsf_flags |=
1359 NFSNSF_NOOPENS;
1360 } else {
1361 stp->ls_noopens = 0;
1362 }
1363 stp = nstp;
1364 }
1365 }
1366 }
1367 clp = nclp;
1368 }
1369 }
1370 NFSUNLOCKSTATE();
1371 NFSLOCKV4ROOTMUTEX();
1372 nfsv4_relref(&nfsv4rootfs_lock);
1373 NFSUNLOCKV4ROOTMUTEX();
1374 }
1375
1376 /*
1377 * The following set of functions free up the various data structures.
1378 */
1379 /*
1380 * Clear out all open/lock state related to this nfsclient.
1381 * Caller must hold an exclusive lock on nfsv4rootfs_lock, so that
1382 * there are no other active nfsd threads.
1383 */
1384 void
nfsrv_cleanclient(struct nfsclient * clp,NFSPROC_T * p)1385 nfsrv_cleanclient(struct nfsclient *clp, NFSPROC_T *p)
1386 {
1387 struct nfsstate *stp, *nstp;
1388 struct nfsdsession *sep, *nsep;
1389
1390 LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp)
1391 nfsrv_freeopenowner(stp, 1, p);
1392 if ((clp->lc_flags & LCL_ADMINREVOKED) == 0)
1393 LIST_FOREACH_SAFE(sep, &clp->lc_session, sess_list, nsep)
1394 (void)nfsrv_freesession(NULL, sep, NULL);
1395 }
1396
1397 /*
1398 * Free a client that has been cleaned. It should also already have been
1399 * removed from the lists.
1400 * (Just to be safe w.r.t. newnfs_disconnect(), call this function when
1401 * softclock interrupts are enabled.)
1402 */
1403 void
nfsrv_zapclient(struct nfsclient * clp,NFSPROC_T * p)1404 nfsrv_zapclient(struct nfsclient *clp, NFSPROC_T *p)
1405 {
1406
1407 #ifdef notyet
1408 if ((clp->lc_flags & (LCL_GSS | LCL_CALLBACKSON)) ==
1409 (LCL_GSS | LCL_CALLBACKSON) &&
1410 (clp->lc_hand.nfsh_flag & NFSG_COMPLETE) &&
1411 clp->lc_handlelen > 0) {
1412 clp->lc_hand.nfsh_flag &= ~NFSG_COMPLETE;
1413 clp->lc_hand.nfsh_flag |= NFSG_DESTROYED;
1414 (void) nfsrv_docallback(clp, NFSV4PROC_CBNULL,
1415 NULL, 0, NULL, NULL, NULL, 0, p);
1416 }
1417 #endif
1418 newnfs_disconnect(NULL, &clp->lc_req);
1419 free(clp->lc_req.nr_nam, M_SONAME);
1420 NFSFREEMUTEX(&clp->lc_req.nr_mtx);
1421 free(clp->lc_stateid, M_NFSDCLIENT);
1422 free(clp, M_NFSDCLIENT);
1423 NFSLOCKSTATE();
1424 NFSD_VNET(nfsstatsv1_p)->srvclients--;
1425 nfsrv_openpluslock--;
1426 nfsrv_clients--;
1427 NFSUNLOCKSTATE();
1428 }
1429
1430 /*
1431 * Free a list of delegation state structures.
1432 * (This function will also free all nfslockfile structures that no
1433 * longer have associated state.)
1434 */
1435 void
nfsrv_freedeleglist(struct nfsstatehead * sthp)1436 nfsrv_freedeleglist(struct nfsstatehead *sthp)
1437 {
1438 struct nfsstate *stp, *nstp;
1439
1440 LIST_FOREACH_SAFE(stp, sthp, ls_list, nstp) {
1441 nfsrv_freedeleg(stp);
1442 }
1443 LIST_INIT(sthp);
1444 }
1445
1446 /*
1447 * Free up a delegation.
1448 */
1449 static void
nfsrv_freedeleg(struct nfsstate * stp)1450 nfsrv_freedeleg(struct nfsstate *stp)
1451 {
1452 struct nfslockfile *lfp;
1453
1454 LIST_REMOVE(stp, ls_hash);
1455 LIST_REMOVE(stp, ls_list);
1456 LIST_REMOVE(stp, ls_file);
1457 if ((stp->ls_flags & NFSLCK_DELEGWRITE) != 0)
1458 nfsrv_writedelegcnt--;
1459 lfp = stp->ls_lfp;
1460 if (LIST_EMPTY(&lfp->lf_open) &&
1461 LIST_EMPTY(&lfp->lf_lock) && LIST_EMPTY(&lfp->lf_deleg) &&
1462 LIST_EMPTY(&lfp->lf_locallock) && LIST_EMPTY(&lfp->lf_rollback) &&
1463 lfp->lf_usecount == 0 &&
1464 nfsv4_testlock(&lfp->lf_locallock_lck) == 0)
1465 nfsrv_freenfslockfile(lfp);
1466 free(stp, M_NFSDSTATE);
1467 NFSD_VNET(nfsstatsv1_p)->srvdelegates--;
1468 nfsrv_openpluslock--;
1469 nfsrv_delegatecnt--;
1470 }
1471
1472 /*
1473 * This function frees an open owner and all associated opens.
1474 */
1475 static void
nfsrv_freeopenowner(struct nfsstate * stp,int cansleep,NFSPROC_T * p)1476 nfsrv_freeopenowner(struct nfsstate *stp, int cansleep, NFSPROC_T *p)
1477 {
1478 struct nfsstate *nstp, *tstp;
1479
1480 LIST_REMOVE(stp, ls_list);
1481 /*
1482 * Now, free all associated opens.
1483 */
1484 nstp = LIST_FIRST(&stp->ls_open);
1485 while (nstp != LIST_END(&stp->ls_open)) {
1486 tstp = nstp;
1487 nstp = LIST_NEXT(nstp, ls_list);
1488 (void) nfsrv_freeopen(tstp, NULL, cansleep, p);
1489 }
1490 if (stp->ls_op)
1491 nfsrvd_derefcache(stp->ls_op);
1492 free(stp, M_NFSDSTATE);
1493 NFSD_VNET(nfsstatsv1_p)->srvopenowners--;
1494 nfsrv_openpluslock--;
1495 }
1496
1497 /*
1498 * This function frees an open (nfsstate open structure) with all associated
1499 * lock_owners and locks. It also frees the nfslockfile structure iff there
1500 * are no other opens on the file.
1501 * Returns 1 if it free'd the nfslockfile, 0 otherwise.
1502 */
1503 static int
nfsrv_freeopen(struct nfsstate * stp,vnode_t vp,int cansleep,NFSPROC_T * p)1504 nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p)
1505 {
1506 struct nfsstate *nstp, *tstp;
1507 struct nfslockfile *lfp;
1508 int ret;
1509
1510 LIST_REMOVE(stp, ls_hash);
1511 LIST_REMOVE(stp, ls_list);
1512 LIST_REMOVE(stp, ls_file);
1513
1514 lfp = stp->ls_lfp;
1515 /*
1516 * Now, free all lockowners associated with this open.
1517 */
1518 LIST_FOREACH_SAFE(tstp, &stp->ls_open, ls_list, nstp)
1519 nfsrv_freelockowner(tstp, vp, cansleep, p);
1520
1521 /*
1522 * The nfslockfile is freed here if there are no locks
1523 * associated with the open.
1524 * If there are locks associated with the open, the
1525 * nfslockfile structure can be freed via nfsrv_freelockowner().
1526 * Acquire the state mutex to avoid races with calls to
1527 * nfsrv_getlockfile().
1528 */
1529 if (cansleep != 0)
1530 NFSLOCKSTATE();
1531 if (lfp != NULL && LIST_EMPTY(&lfp->lf_open) &&
1532 LIST_EMPTY(&lfp->lf_deleg) && LIST_EMPTY(&lfp->lf_lock) &&
1533 LIST_EMPTY(&lfp->lf_locallock) && LIST_EMPTY(&lfp->lf_rollback) &&
1534 lfp->lf_usecount == 0 &&
1535 (cansleep != 0 || nfsv4_testlock(&lfp->lf_locallock_lck) == 0)) {
1536 nfsrv_freenfslockfile(lfp);
1537 ret = 1;
1538 } else
1539 ret = 0;
1540 if (cansleep != 0)
1541 NFSUNLOCKSTATE();
1542 free(stp, M_NFSDSTATE);
1543 NFSD_VNET(nfsstatsv1_p)->srvopens--;
1544 nfsrv_openpluslock--;
1545 return (ret);
1546 }
1547
1548 /*
1549 * Frees a lockowner and all associated locks.
1550 */
1551 static void
nfsrv_freelockowner(struct nfsstate * stp,vnode_t vp,int cansleep,NFSPROC_T * p)1552 nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep,
1553 NFSPROC_T *p)
1554 {
1555
1556 LIST_REMOVE(stp, ls_hash);
1557 LIST_REMOVE(stp, ls_list);
1558 nfsrv_freeallnfslocks(stp, vp, cansleep, p);
1559 if (stp->ls_op)
1560 nfsrvd_derefcache(stp->ls_op);
1561 free(stp, M_NFSDSTATE);
1562 NFSD_VNET(nfsstatsv1_p)->srvlockowners--;
1563 nfsrv_openpluslock--;
1564 }
1565
1566 /*
1567 * Free all the nfs locks on a lockowner.
1568 */
1569 static void
nfsrv_freeallnfslocks(struct nfsstate * stp,vnode_t vp,int cansleep,NFSPROC_T * p)1570 nfsrv_freeallnfslocks(struct nfsstate *stp, vnode_t vp, int cansleep,
1571 NFSPROC_T *p)
1572 {
1573 struct nfslock *lop, *nlop;
1574 struct nfsrollback *rlp, *nrlp;
1575 struct nfslockfile *lfp = NULL;
1576 int gottvp = 0;
1577 vnode_t tvp = NULL;
1578 uint64_t first, end;
1579
1580 if (vp != NULL)
1581 ASSERT_VOP_UNLOCKED(vp, "nfsrv_freeallnfslocks: vnode locked");
1582 lop = LIST_FIRST(&stp->ls_lock);
1583 while (lop != LIST_END(&stp->ls_lock)) {
1584 nlop = LIST_NEXT(lop, lo_lckowner);
1585 /*
1586 * Since all locks should be for the same file, lfp should
1587 * not change.
1588 */
1589 if (lfp == NULL)
1590 lfp = lop->lo_lfp;
1591 else if (lfp != lop->lo_lfp)
1592 panic("allnfslocks");
1593 /*
1594 * If vp is NULL and cansleep != 0, a vnode must be acquired
1595 * from the file handle. This only occurs when called from
1596 * nfsrv_cleanclient().
1597 */
1598 if (gottvp == 0) {
1599 if (nfsrv_dolocallocks == 0)
1600 tvp = NULL;
1601 else if (vp == NULL && cansleep != 0) {
1602 tvp = nfsvno_getvp(&lfp->lf_fh);
1603 if (tvp != NULL)
1604 NFSVOPUNLOCK(tvp);
1605 } else
1606 tvp = vp;
1607 gottvp = 1;
1608 }
1609
1610 if (tvp != NULL) {
1611 if (cansleep == 0)
1612 panic("allnfs2");
1613 first = lop->lo_first;
1614 end = lop->lo_end;
1615 nfsrv_freenfslock(lop);
1616 nfsrv_localunlock(tvp, lfp, first, end, p);
1617 LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list,
1618 nrlp)
1619 free(rlp, M_NFSDROLLBACK);
1620 LIST_INIT(&lfp->lf_rollback);
1621 } else
1622 nfsrv_freenfslock(lop);
1623 lop = nlop;
1624 }
1625 if (vp == NULL && tvp != NULL)
1626 vrele(tvp);
1627 }
1628
1629 /*
1630 * Free an nfslock structure.
1631 */
1632 static void
nfsrv_freenfslock(struct nfslock * lop)1633 nfsrv_freenfslock(struct nfslock *lop)
1634 {
1635
1636 if (lop->lo_lckfile.le_prev != NULL) {
1637 LIST_REMOVE(lop, lo_lckfile);
1638 NFSD_VNET(nfsstatsv1_p)->srvlocks--;
1639 nfsrv_openpluslock--;
1640 }
1641 LIST_REMOVE(lop, lo_lckowner);
1642 free(lop, M_NFSDLOCK);
1643 }
1644
1645 /*
1646 * This function frees an nfslockfile structure.
1647 */
1648 static void
nfsrv_freenfslockfile(struct nfslockfile * lfp)1649 nfsrv_freenfslockfile(struct nfslockfile *lfp)
1650 {
1651
1652 LIST_REMOVE(lfp, lf_hash);
1653 free(lfp, M_NFSDLOCKFILE);
1654 }
1655
1656 /*
1657 * This function looks up an nfsstate structure via stateid.
1658 */
1659 static int
nfsrv_getstate(struct nfsclient * clp,nfsv4stateid_t * stateidp,__unused u_int32_t flags,struct nfsstate ** stpp)1660 nfsrv_getstate(struct nfsclient *clp, nfsv4stateid_t *stateidp, __unused u_int32_t flags,
1661 struct nfsstate **stpp)
1662 {
1663 struct nfsstate *stp;
1664 struct nfsstatehead *hp;
1665 int error = 0;
1666
1667 *stpp = NULL;
1668 hp = NFSSTATEHASH(clp, *stateidp);
1669 LIST_FOREACH(stp, hp, ls_hash) {
1670 if (!NFSBCMP(stp->ls_stateid.other, stateidp->other,
1671 NFSX_STATEIDOTHER))
1672 break;
1673 }
1674
1675 /*
1676 * If no state id in list, return NFSERR_BADSTATEID.
1677 */
1678 if (stp == LIST_END(hp)) {
1679 error = NFSERR_BADSTATEID;
1680 goto out;
1681 }
1682 *stpp = stp;
1683
1684 out:
1685 NFSEXITCODE(error);
1686 return (error);
1687 }
1688
1689 /*
1690 * This function gets an nfsstate structure via owner string.
1691 */
1692 static void
nfsrv_getowner(struct nfsstatehead * hp,struct nfsstate * new_stp,struct nfsstate ** stpp)1693 nfsrv_getowner(struct nfsstatehead *hp, struct nfsstate *new_stp,
1694 struct nfsstate **stpp)
1695 {
1696 struct nfsstate *stp;
1697
1698 *stpp = NULL;
1699 LIST_FOREACH(stp, hp, ls_list) {
1700 if (new_stp->ls_ownerlen == stp->ls_ownerlen &&
1701 !NFSBCMP(new_stp->ls_owner,stp->ls_owner,stp->ls_ownerlen)) {
1702 *stpp = stp;
1703 return;
1704 }
1705 }
1706 }
1707
1708 /*
1709 * Lock control function called to update lock status.
1710 * Returns 0 upon success, -1 if there is no lock and the flags indicate
1711 * that one isn't to be created and an NFSERR_xxx for other errors.
1712 * The structures new_stp and new_lop are passed in as pointers that should
1713 * be set to NULL if the structure is used and shouldn't be free'd.
1714 * For the NFSLCK_TEST and NFSLCK_CHECK cases, the structures are
1715 * never used and can safely be allocated on the stack. For all other
1716 * cases, *new_stpp and *new_lopp should be malloc'd before the call,
1717 * in case they are used.
1718 */
1719 int
nfsrv_lockctrl(vnode_t vp,struct nfsstate ** new_stpp,struct nfslock ** new_lopp,struct nfslockconflict * cfp,nfsquad_t clientid,nfsv4stateid_t * stateidp,__unused struct nfsexstuff * exp,struct nfsrv_descript * nd,NFSPROC_T * p)1720 nfsrv_lockctrl(vnode_t vp, struct nfsstate **new_stpp,
1721 struct nfslock **new_lopp, struct nfslockconflict *cfp,
1722 nfsquad_t clientid, nfsv4stateid_t *stateidp,
1723 __unused struct nfsexstuff *exp,
1724 struct nfsrv_descript *nd, NFSPROC_T *p)
1725 {
1726 struct nfslock *lop;
1727 struct nfsstate *new_stp = *new_stpp;
1728 struct nfslock *new_lop = *new_lopp;
1729 struct nfsstate *tstp, *mystp, *nstp;
1730 int specialid = 0;
1731 struct nfslockfile *lfp;
1732 struct nfslock *other_lop = NULL;
1733 struct nfsstate *stp, *lckstp = NULL;
1734 struct nfsclient *clp = NULL;
1735 u_int32_t bits;
1736 int error = 0, haslock = 0, ret, reterr;
1737 int getlckret, delegation = 0, filestruct_locked, vnode_unlocked = 0;
1738 fhandle_t nfh;
1739 uint64_t first, end;
1740 uint32_t lock_flags;
1741
1742 if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_SETATTR)) {
1743 /*
1744 * Note the special cases of "all 1s" or "all 0s" stateids and
1745 * let reads with all 1s go ahead.
1746 */
1747 if (new_stp->ls_stateid.seqid == 0x0 &&
1748 new_stp->ls_stateid.other[0] == 0x0 &&
1749 new_stp->ls_stateid.other[1] == 0x0 &&
1750 new_stp->ls_stateid.other[2] == 0x0)
1751 specialid = 1;
1752 else if (new_stp->ls_stateid.seqid == 0xffffffff &&
1753 new_stp->ls_stateid.other[0] == 0xffffffff &&
1754 new_stp->ls_stateid.other[1] == 0xffffffff &&
1755 new_stp->ls_stateid.other[2] == 0xffffffff)
1756 specialid = 2;
1757 }
1758
1759 /*
1760 * Check for restart conditions (client and server).
1761 */
1762 error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
1763 &new_stp->ls_stateid, specialid);
1764 if (error)
1765 goto out;
1766
1767 /*
1768 * Check for state resource limit exceeded.
1769 */
1770 if ((new_stp->ls_flags & NFSLCK_LOCK) &&
1771 nfsrv_openpluslock > nfsrv_v4statelimit) {
1772 error = NFSERR_RESOURCE;
1773 goto out;
1774 }
1775
1776 /*
1777 * For the lock case, get another nfslock structure,
1778 * just in case we need it.
1779 * Malloc now, before we start sifting through the linked lists,
1780 * in case we have to wait for memory.
1781 */
1782 tryagain:
1783 if (new_stp->ls_flags & NFSLCK_LOCK)
1784 other_lop = malloc(sizeof (struct nfslock),
1785 M_NFSDLOCK, M_WAITOK);
1786 filestruct_locked = 0;
1787 reterr = 0;
1788 lfp = NULL;
1789
1790 /*
1791 * Get the lockfile structure for CFH now, so we can do a sanity
1792 * check against the stateid, before incrementing the seqid#, since
1793 * we want to return NFSERR_BADSTATEID on failure and the seqid#
1794 * shouldn't be incremented for this case.
1795 * If nfsrv_getlockfile() returns -1, it means "not found", which
1796 * will be handled later.
1797 * If we are doing Lock/LockU and local locking is enabled, sleep
1798 * lock the nfslockfile structure.
1799 */
1800 getlckret = nfsrv_getlockfh(vp, new_stp->ls_flags, NULL, &nfh, p);
1801 NFSLOCKSTATE();
1802 if (getlckret == 0) {
1803 if ((new_stp->ls_flags & (NFSLCK_LOCK | NFSLCK_UNLOCK)) != 0 &&
1804 nfsrv_dolocallocks != 0 && nd->nd_repstat == 0) {
1805 getlckret = nfsrv_getlockfile(new_stp->ls_flags, NULL,
1806 &lfp, &nfh, 1);
1807 if (getlckret == 0)
1808 filestruct_locked = 1;
1809 } else
1810 getlckret = nfsrv_getlockfile(new_stp->ls_flags, NULL,
1811 &lfp, &nfh, 0);
1812 }
1813 if (getlckret != 0 && getlckret != -1)
1814 reterr = getlckret;
1815
1816 if (filestruct_locked != 0) {
1817 LIST_INIT(&lfp->lf_rollback);
1818 if ((new_stp->ls_flags & NFSLCK_LOCK)) {
1819 /*
1820 * For local locking, do the advisory locking now, so
1821 * that any conflict can be detected. A failure later
1822 * can be rolled back locally. If an error is returned,
1823 * struct nfslockfile has been unlocked and any local
1824 * locking rolled back.
1825 */
1826 NFSUNLOCKSTATE();
1827 if (vnode_unlocked == 0) {
1828 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl1");
1829 vnode_unlocked = 1;
1830 NFSVOPUNLOCK(vp);
1831 }
1832 reterr = nfsrv_locallock(vp, lfp,
1833 (new_lop->lo_flags & (NFSLCK_READ | NFSLCK_WRITE)),
1834 new_lop->lo_first, new_lop->lo_end, cfp, p);
1835 NFSLOCKSTATE();
1836 }
1837 }
1838
1839 if (specialid == 0) {
1840 if (new_stp->ls_flags & NFSLCK_TEST) {
1841 /*
1842 * RFC 3530 does not list LockT as an op that renews a
1843 * lease, but the consensus seems to be that it is ok
1844 * for a server to do so.
1845 */
1846 error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
1847 (nfsquad_t)((u_quad_t)0), 0, nd, p);
1848
1849 /*
1850 * Since NFSERR_EXPIRED, NFSERR_ADMINREVOKED are not valid
1851 * error returns for LockT, just go ahead and test for a lock,
1852 * since there are no locks for this client, but other locks
1853 * can conflict. (ie. same client will always be false)
1854 */
1855 if (error == NFSERR_EXPIRED || error == NFSERR_ADMINREVOKED)
1856 error = 0;
1857 lckstp = new_stp;
1858 } else {
1859 error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
1860 (nfsquad_t)((u_quad_t)0), 0, nd, p);
1861 if (error == 0)
1862 /*
1863 * Look up the stateid
1864 */
1865 error = nfsrv_getstate(clp, &new_stp->ls_stateid,
1866 new_stp->ls_flags, &stp);
1867 /*
1868 * do some sanity checks for an unconfirmed open or a
1869 * stateid that refers to the wrong file, for an open stateid
1870 */
1871 if (error == 0 && (stp->ls_flags & NFSLCK_OPEN) &&
1872 ((stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM) ||
1873 (getlckret == 0 && stp->ls_lfp != lfp))){
1874 /*
1875 * NFSLCK_SETATTR should return OK rather than NFSERR_BADSTATEID
1876 * The only exception is using SETATTR with SIZE.
1877 * */
1878 if ((new_stp->ls_flags &
1879 (NFSLCK_SETATTR | NFSLCK_CHECK)) != NFSLCK_SETATTR)
1880 error = NFSERR_BADSTATEID;
1881 }
1882
1883 if (error == 0 &&
1884 (stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) &&
1885 getlckret == 0 && stp->ls_lfp != lfp)
1886 error = NFSERR_BADSTATEID;
1887
1888 /*
1889 * If the lockowner stateid doesn't refer to the same file,
1890 * I believe that is considered ok, since some clients will
1891 * only create a single lockowner and use that for all locks
1892 * on all files.
1893 * For now, log it as a diagnostic, instead of considering it
1894 * a BadStateid.
1895 */
1896 if (error == 0 && (stp->ls_flags &
1897 (NFSLCK_OPEN | NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) == 0 &&
1898 getlckret == 0 && stp->ls_lfp != lfp) {
1899 #ifdef DIAGNOSTIC
1900 printf("Got a lock statid for different file open\n");
1901 #endif
1902 /*
1903 error = NFSERR_BADSTATEID;
1904 */
1905 }
1906
1907 if (error == 0) {
1908 if (new_stp->ls_flags & NFSLCK_OPENTOLOCK) {
1909 /*
1910 * If haslock set, we've already checked the seqid.
1911 */
1912 if (!haslock) {
1913 if (stp->ls_flags & NFSLCK_OPEN)
1914 error = nfsrv_checkseqid(nd, new_stp->ls_seq,
1915 stp->ls_openowner, new_stp->ls_op);
1916 else
1917 error = NFSERR_BADSTATEID;
1918 }
1919 if (!error)
1920 nfsrv_getowner(&stp->ls_open, new_stp, &lckstp);
1921 if (lckstp) {
1922 /*
1923 * For NFSv4.1 and NFSv4.2 allow an
1924 * open_to_lock_owner when the lock_owner already
1925 * exists. Just clear NFSLCK_OPENTOLOCK so that
1926 * a new lock_owner will not be created.
1927 * RFC7530 states that the error for NFSv4.0
1928 * is NFS4ERR_BAD_SEQID.
1929 */
1930 if ((nd->nd_flag & ND_NFSV41) != 0)
1931 new_stp->ls_flags &= ~NFSLCK_OPENTOLOCK;
1932 else
1933 error = NFSERR_BADSEQID;
1934 } else
1935 lckstp = new_stp;
1936 } else if (new_stp->ls_flags&(NFSLCK_LOCK|NFSLCK_UNLOCK)) {
1937 /*
1938 * If haslock set, ditto above.
1939 */
1940 if (!haslock) {
1941 if (stp->ls_flags & NFSLCK_OPEN)
1942 error = NFSERR_BADSTATEID;
1943 else
1944 error = nfsrv_checkseqid(nd, new_stp->ls_seq,
1945 stp, new_stp->ls_op);
1946 }
1947 lckstp = stp;
1948 } else {
1949 lckstp = stp;
1950 }
1951 }
1952 /*
1953 * If the seqid part of the stateid isn't the same, return
1954 * NFSERR_OLDSTATEID for cases other than I/O Ops.
1955 * For I/O Ops, only return NFSERR_OLDSTATEID if
1956 * nfsrv_returnoldstateid is set. (The consensus on the email
1957 * list was that most clients would prefer to not receive
1958 * NFSERR_OLDSTATEID for I/O Ops, but the RFC suggests that that
1959 * is what will happen, so I use the nfsrv_returnoldstateid to
1960 * allow for either server configuration.)
1961 */
1962 if (!error && stp->ls_stateid.seqid!=new_stp->ls_stateid.seqid &&
1963 (((nd->nd_flag & ND_NFSV41) == 0 &&
1964 (!(new_stp->ls_flags & NFSLCK_CHECK) ||
1965 nfsrv_returnoldstateid)) ||
1966 ((nd->nd_flag & ND_NFSV41) != 0 &&
1967 new_stp->ls_stateid.seqid != 0)))
1968 error = NFSERR_OLDSTATEID;
1969 }
1970 }
1971
1972 /*
1973 * Now we can check for grace.
1974 */
1975 if (!error)
1976 error = nfsrv_checkgrace(nd, clp, new_stp->ls_flags);
1977 if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error &&
1978 nfsrv_checkstable(clp))
1979 error = NFSERR_NOGRACE;
1980 /*
1981 * If we successfully Reclaimed state, note that.
1982 */
1983 if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error)
1984 nfsrv_markstable(clp);
1985
1986 /*
1987 * At this point, either error == NFSERR_BADSTATEID or the
1988 * seqid# has been updated, so we can return any error.
1989 * If error == 0, there may be an error in:
1990 * nd_repstat - Set by the calling function.
1991 * reterr - Set above, if getting the nfslockfile structure
1992 * or acquiring the local lock failed.
1993 * (If both of these are set, nd_repstat should probably be
1994 * returned, since that error was detected before this
1995 * function call.)
1996 */
1997 if (error != 0 || nd->nd_repstat != 0 || reterr != 0) {
1998 if (error == 0) {
1999 if (nd->nd_repstat != 0)
2000 error = nd->nd_repstat;
2001 else
2002 error = reterr;
2003 }
2004 if (filestruct_locked != 0) {
2005 /* Roll back local locks. */
2006 NFSUNLOCKSTATE();
2007 if (vnode_unlocked == 0) {
2008 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl2");
2009 vnode_unlocked = 1;
2010 NFSVOPUNLOCK(vp);
2011 }
2012 nfsrv_locallock_rollback(vp, lfp, p);
2013 NFSLOCKSTATE();
2014 nfsrv_unlocklf(lfp);
2015 }
2016 NFSUNLOCKSTATE();
2017 goto out;
2018 }
2019
2020 /*
2021 * Check the nfsrv_getlockfile return.
2022 * Returned -1 if no structure found.
2023 */
2024 if (getlckret == -1) {
2025 error = NFSERR_EXPIRED;
2026 /*
2027 * Called from lockt, so no lock is OK.
2028 */
2029 if (new_stp->ls_flags & NFSLCK_TEST) {
2030 error = 0;
2031 } else if (new_stp->ls_flags &
2032 (NFSLCK_CHECK | NFSLCK_SETATTR)) {
2033 /*
2034 * Called to check for a lock, OK if the stateid is all
2035 * 1s or all 0s, but there should be an nfsstate
2036 * otherwise.
2037 * (ie. If there is no open, I'll assume no share
2038 * deny bits.)
2039 */
2040 if (specialid)
2041 error = 0;
2042 else
2043 error = NFSERR_BADSTATEID;
2044 }
2045 NFSUNLOCKSTATE();
2046 goto out;
2047 }
2048
2049 /*
2050 * For NFSLCK_CHECK and NFSLCK_LOCK, test for a share conflict.
2051 * For NFSLCK_CHECK, allow a read if write access is granted,
2052 * but check for a deny. For NFSLCK_LOCK, require correct access,
2053 * which implies a conflicting deny can't exist.
2054 */
2055 if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_LOCK)) {
2056 /*
2057 * Four kinds of state id:
2058 * - specialid (all 0s or all 1s), only for NFSLCK_CHECK
2059 * - stateid for an open
2060 * - stateid for a delegation
2061 * - stateid for a lock owner
2062 */
2063 if (!specialid) {
2064 if (stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) {
2065 delegation = 1;
2066 mystp = stp;
2067 nfsrv_delaydelegtimeout(stp);
2068 } else if (stp->ls_flags & NFSLCK_OPEN) {
2069 mystp = stp;
2070 } else {
2071 mystp = stp->ls_openstp;
2072 }
2073 /*
2074 * If locking or checking, require correct access
2075 * bit set.
2076 */
2077 if (((new_stp->ls_flags & NFSLCK_LOCK) &&
2078 !((new_lop->lo_flags >> NFSLCK_LOCKSHIFT) &
2079 mystp->ls_flags & NFSLCK_ACCESSBITS)) ||
2080 ((new_stp->ls_flags & (NFSLCK_CHECK|NFSLCK_READACCESS)) ==
2081 (NFSLCK_CHECK | NFSLCK_READACCESS) &&
2082 !(mystp->ls_flags & NFSLCK_READACCESS) &&
2083 nfsrv_allowreadforwriteopen == 0) ||
2084 ((new_stp->ls_flags & (NFSLCK_CHECK|NFSLCK_WRITEACCESS)) ==
2085 (NFSLCK_CHECK | NFSLCK_WRITEACCESS) &&
2086 !(mystp->ls_flags & NFSLCK_WRITEACCESS))) {
2087 if (filestruct_locked != 0) {
2088 /* Roll back local locks. */
2089 NFSUNLOCKSTATE();
2090 if (vnode_unlocked == 0) {
2091 ASSERT_VOP_ELOCKED(vp,
2092 "nfsrv_lockctrl3");
2093 vnode_unlocked = 1;
2094 NFSVOPUNLOCK(vp);
2095 }
2096 nfsrv_locallock_rollback(vp, lfp, p);
2097 NFSLOCKSTATE();
2098 nfsrv_unlocklf(lfp);
2099 }
2100 NFSUNLOCKSTATE();
2101 error = NFSERR_OPENMODE;
2102 goto out;
2103 }
2104 } else
2105 mystp = NULL;
2106 if ((new_stp->ls_flags & NFSLCK_CHECK) && !delegation) {
2107 /*
2108 * Check for a conflicting deny bit.
2109 */
2110 LIST_FOREACH(tstp, &lfp->lf_open, ls_file) {
2111 if (tstp != mystp) {
2112 bits = tstp->ls_flags;
2113 bits >>= NFSLCK_SHIFT;
2114 if (new_stp->ls_flags & bits & NFSLCK_ACCESSBITS) {
2115 KASSERT(vnode_unlocked == 0,
2116 ("nfsrv_lockctrl: vnode unlocked1"));
2117 ret = nfsrv_clientconflict(tstp->ls_clp, &haslock,
2118 vp, p);
2119 if (ret == 1) {
2120 /*
2121 * nfsrv_clientconflict unlocks state
2122 * when it returns non-zero.
2123 */
2124 lckstp = NULL;
2125 goto tryagain;
2126 }
2127 if (ret == 0)
2128 NFSUNLOCKSTATE();
2129 if (ret == 2)
2130 error = NFSERR_PERM;
2131 else
2132 error = NFSERR_OPENMODE;
2133 goto out;
2134 }
2135 }
2136 }
2137
2138 /* We're outta here */
2139 NFSUNLOCKSTATE();
2140 goto out;
2141 }
2142 }
2143
2144 /*
2145 * For setattr, just get rid of all the Delegations for other clients.
2146 */
2147 if (new_stp->ls_flags & NFSLCK_SETATTR) {
2148 KASSERT(vnode_unlocked == 0,
2149 ("nfsrv_lockctrl: vnode unlocked2"));
2150 ret = nfsrv_cleandeleg(vp, lfp, clp, &haslock, p);
2151 if (ret) {
2152 /*
2153 * nfsrv_cleandeleg() unlocks state when it
2154 * returns non-zero.
2155 */
2156 if (ret == -1) {
2157 lckstp = NULL;
2158 goto tryagain;
2159 }
2160 error = ret;
2161 goto out;
2162 }
2163 if (!(new_stp->ls_flags & NFSLCK_CHECK) ||
2164 (LIST_EMPTY(&lfp->lf_open) && LIST_EMPTY(&lfp->lf_lock) &&
2165 LIST_EMPTY(&lfp->lf_deleg))) {
2166 NFSUNLOCKSTATE();
2167 goto out;
2168 }
2169 }
2170
2171 /*
2172 * Check for a conflicting delegation. If one is found, call
2173 * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
2174 * been set yet, it will get the lock. Otherwise, it will recall
2175 * the delegation. Then, we try try again...
2176 * I currently believe the conflict algorithm to be:
2177 * For Lock Ops (Lock/LockT/LockU)
2178 * - there is a conflict iff a different client has a write delegation
2179 * For Reading (Read Op)
2180 * - there is a conflict iff a different client has a write delegation
2181 * (the specialids are always a different client)
2182 * For Writing (Write/Setattr of size)
2183 * - there is a conflict if a different client has any delegation
2184 * - there is a conflict if the same client has a read delegation
2185 * (I don't understand why this isn't allowed, but that seems to be
2186 * the current consensus?)
2187 */
2188 tstp = LIST_FIRST(&lfp->lf_deleg);
2189 while (tstp != LIST_END(&lfp->lf_deleg)) {
2190 nstp = LIST_NEXT(tstp, ls_file);
2191 if ((((new_stp->ls_flags&(NFSLCK_LOCK|NFSLCK_UNLOCK|NFSLCK_TEST))||
2192 ((new_stp->ls_flags & NFSLCK_CHECK) &&
2193 (new_lop->lo_flags & NFSLCK_READ))) &&
2194 clp != tstp->ls_clp &&
2195 (tstp->ls_flags & NFSLCK_DELEGWRITE)) ||
2196 ((new_stp->ls_flags & NFSLCK_CHECK) &&
2197 (new_lop->lo_flags & NFSLCK_WRITE) &&
2198 (clp != tstp->ls_clp ||
2199 (tstp->ls_flags & NFSLCK_DELEGREAD)))) {
2200 ret = 0;
2201 if (filestruct_locked != 0) {
2202 /* Roll back local locks. */
2203 NFSUNLOCKSTATE();
2204 if (vnode_unlocked == 0) {
2205 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl4");
2206 NFSVOPUNLOCK(vp);
2207 }
2208 nfsrv_locallock_rollback(vp, lfp, p);
2209 NFSLOCKSTATE();
2210 nfsrv_unlocklf(lfp);
2211 NFSUNLOCKSTATE();
2212 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
2213 vnode_unlocked = 0;
2214 if (VN_IS_DOOMED(vp))
2215 ret = NFSERR_SERVERFAULT;
2216 NFSLOCKSTATE();
2217 }
2218 if (ret == 0)
2219 ret = nfsrv_delegconflict(tstp, &haslock, p, vp);
2220 if (ret) {
2221 /*
2222 * nfsrv_delegconflict unlocks state when it
2223 * returns non-zero, which it always does.
2224 */
2225 if (other_lop) {
2226 free(other_lop, M_NFSDLOCK);
2227 other_lop = NULL;
2228 }
2229 if (ret == -1) {
2230 lckstp = NULL;
2231 goto tryagain;
2232 }
2233 error = ret;
2234 goto out;
2235 }
2236 /* Never gets here. */
2237 }
2238 tstp = nstp;
2239 }
2240
2241 /*
2242 * Handle the unlock case by calling nfsrv_updatelock().
2243 * (Should I have done some access checking above for unlock? For now,
2244 * just let it happen.)
2245 */
2246 if (new_stp->ls_flags & NFSLCK_UNLOCK) {
2247 first = new_lop->lo_first;
2248 end = new_lop->lo_end;
2249 nfsrv_updatelock(stp, new_lopp, &other_lop, lfp);
2250 stateidp->seqid = ++(stp->ls_stateid.seqid);
2251 if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0)
2252 stateidp->seqid = stp->ls_stateid.seqid = 1;
2253 stateidp->other[0] = stp->ls_stateid.other[0];
2254 stateidp->other[1] = stp->ls_stateid.other[1];
2255 stateidp->other[2] = stp->ls_stateid.other[2];
2256 if (filestruct_locked != 0) {
2257 NFSUNLOCKSTATE();
2258 if (vnode_unlocked == 0) {
2259 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl5");
2260 vnode_unlocked = 1;
2261 NFSVOPUNLOCK(vp);
2262 }
2263 /* Update the local locks. */
2264 nfsrv_localunlock(vp, lfp, first, end, p);
2265 NFSLOCKSTATE();
2266 nfsrv_unlocklf(lfp);
2267 }
2268 NFSUNLOCKSTATE();
2269 goto out;
2270 }
2271
2272 /*
2273 * Search for a conflicting lock. A lock conflicts if:
2274 * - the lock range overlaps and
2275 * - at least one lock is a write lock and
2276 * - it is not owned by the same lock owner
2277 */
2278 if (!delegation) {
2279 LIST_FOREACH(lop, &lfp->lf_lock, lo_lckfile) {
2280 if (new_lop->lo_end > lop->lo_first &&
2281 new_lop->lo_first < lop->lo_end &&
2282 (new_lop->lo_flags == NFSLCK_WRITE ||
2283 lop->lo_flags == NFSLCK_WRITE) &&
2284 lckstp != lop->lo_stp &&
2285 (clp != lop->lo_stp->ls_clp ||
2286 lckstp->ls_ownerlen != lop->lo_stp->ls_ownerlen ||
2287 NFSBCMP(lckstp->ls_owner, lop->lo_stp->ls_owner,
2288 lckstp->ls_ownerlen))) {
2289 if (other_lop) {
2290 free(other_lop, M_NFSDLOCK);
2291 other_lop = NULL;
2292 }
2293 if (vnode_unlocked != 0)
2294 ret = nfsrv_clientconflict(lop->lo_stp->ls_clp, &haslock,
2295 NULL, p);
2296 else
2297 ret = nfsrv_clientconflict(lop->lo_stp->ls_clp, &haslock,
2298 vp, p);
2299 if (ret == 1) {
2300 if (filestruct_locked != 0) {
2301 if (vnode_unlocked == 0) {
2302 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl6");
2303 NFSVOPUNLOCK(vp);
2304 }
2305 /* Roll back local locks. */
2306 nfsrv_locallock_rollback(vp, lfp, p);
2307 NFSLOCKSTATE();
2308 nfsrv_unlocklf(lfp);
2309 NFSUNLOCKSTATE();
2310 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
2311 vnode_unlocked = 0;
2312 if (VN_IS_DOOMED(vp)) {
2313 error = NFSERR_SERVERFAULT;
2314 goto out;
2315 }
2316 }
2317 /*
2318 * nfsrv_clientconflict() unlocks state when it
2319 * returns non-zero.
2320 */
2321 lckstp = NULL;
2322 goto tryagain;
2323 }
2324 /*
2325 * Found a conflicting lock, so record the conflict and
2326 * return the error.
2327 */
2328 if (cfp != NULL && ret == 0) {
2329 cfp->cl_clientid.lval[0]=lop->lo_stp->ls_stateid.other[0];
2330 cfp->cl_clientid.lval[1]=lop->lo_stp->ls_stateid.other[1];
2331 cfp->cl_first = lop->lo_first;
2332 cfp->cl_end = lop->lo_end;
2333 cfp->cl_flags = lop->lo_flags;
2334 cfp->cl_ownerlen = lop->lo_stp->ls_ownerlen;
2335 NFSBCOPY(lop->lo_stp->ls_owner, cfp->cl_owner,
2336 cfp->cl_ownerlen);
2337 }
2338 if (ret == 2)
2339 error = NFSERR_PERM;
2340 else if (new_stp->ls_flags & NFSLCK_RECLAIM)
2341 error = NFSERR_RECLAIMCONFLICT;
2342 else if (new_stp->ls_flags & NFSLCK_CHECK)
2343 error = NFSERR_LOCKED;
2344 else
2345 error = NFSERR_DENIED;
2346 if (filestruct_locked != 0 && ret == 0) {
2347 /* Roll back local locks. */
2348 NFSUNLOCKSTATE();
2349 if (vnode_unlocked == 0) {
2350 ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl7");
2351 vnode_unlocked = 1;
2352 NFSVOPUNLOCK(vp);
2353 }
2354 nfsrv_locallock_rollback(vp, lfp, p);
2355 NFSLOCKSTATE();
2356 nfsrv_unlocklf(lfp);
2357 }
2358 if (ret == 0)
2359 NFSUNLOCKSTATE();
2360 goto out;
2361 }
2362 }
2363 }
2364
2365 /*
2366 * We only get here if there was no lock that conflicted.
2367 */
2368 if (new_stp->ls_flags & (NFSLCK_TEST | NFSLCK_CHECK)) {
2369 NFSUNLOCKSTATE();
2370 goto out;
2371 }
2372
2373 /*
2374 * We only get here when we are creating or modifying a lock.
2375 * There are two variants:
2376 * - exist_lock_owner where lock_owner exists
2377 * - open_to_lock_owner with new lock_owner
2378 */
2379 first = new_lop->lo_first;
2380 end = new_lop->lo_end;
2381 lock_flags = new_lop->lo_flags;
2382 if (!(new_stp->ls_flags & NFSLCK_OPENTOLOCK)) {
2383 nfsrv_updatelock(lckstp, new_lopp, &other_lop, lfp);
2384 stateidp->seqid = ++(lckstp->ls_stateid.seqid);
2385 if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0)
2386 stateidp->seqid = lckstp->ls_stateid.seqid = 1;
2387 stateidp->other[0] = lckstp->ls_stateid.other[0];
2388 stateidp->other[1] = lckstp->ls_stateid.other[1];
2389 stateidp->other[2] = lckstp->ls_stateid.other[2];
2390 } else {
2391 /*
2392 * The new open_to_lock_owner case.
2393 * Link the new nfsstate into the lists.
2394 */
2395 new_stp->ls_seq = new_stp->ls_opentolockseq;
2396 nfsrvd_refcache(new_stp->ls_op);
2397 stateidp->seqid = new_stp->ls_stateid.seqid = 1;
2398 stateidp->other[0] = new_stp->ls_stateid.other[0] =
2399 clp->lc_clientid.lval[0];
2400 stateidp->other[1] = new_stp->ls_stateid.other[1] =
2401 clp->lc_clientid.lval[1];
2402 stateidp->other[2] = new_stp->ls_stateid.other[2] =
2403 nfsrv_nextstateindex(clp);
2404 new_stp->ls_clp = clp;
2405 LIST_INIT(&new_stp->ls_lock);
2406 new_stp->ls_openstp = stp;
2407 new_stp->ls_lfp = lfp;
2408 nfsrv_insertlock(new_lop, (struct nfslock *)new_stp, new_stp,
2409 lfp);
2410 LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_stp->ls_stateid),
2411 new_stp, ls_hash);
2412 LIST_INSERT_HEAD(&stp->ls_open, new_stp, ls_list);
2413 *new_lopp = NULL;
2414 *new_stpp = NULL;
2415 NFSD_VNET(nfsstatsv1_p)->srvlockowners++;
2416 nfsrv_openpluslock++;
2417 }
2418 if (filestruct_locked != 0) {
2419 NFSUNLOCKSTATE();
2420 nfsrv_locallock_commit(lfp, lock_flags, first, end);
2421 NFSLOCKSTATE();
2422 nfsrv_unlocklf(lfp);
2423 }
2424 NFSUNLOCKSTATE();
2425
2426 out:
2427 if (haslock) {
2428 NFSLOCKV4ROOTMUTEX();
2429 nfsv4_unlock(&nfsv4rootfs_lock, 1);
2430 NFSUNLOCKV4ROOTMUTEX();
2431 }
2432 if (vnode_unlocked != 0) {
2433 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
2434 if (error == 0 && VN_IS_DOOMED(vp))
2435 error = NFSERR_SERVERFAULT;
2436 }
2437 if (other_lop)
2438 free(other_lop, M_NFSDLOCK);
2439 NFSEXITCODE2(error, nd);
2440 return (error);
2441 }
2442
2443 /*
2444 * Check for state errors for Open.
2445 * repstat is passed back out as an error if more critical errors
2446 * are not detected.
2447 */
2448 int
nfsrv_opencheck(nfsquad_t clientid,nfsv4stateid_t * stateidp,struct nfsstate * new_stp,vnode_t vp,struct nfsrv_descript * nd,NFSPROC_T * p,int repstat)2449 nfsrv_opencheck(nfsquad_t clientid, nfsv4stateid_t *stateidp,
2450 struct nfsstate *new_stp, vnode_t vp, struct nfsrv_descript *nd,
2451 NFSPROC_T *p, int repstat)
2452 {
2453 struct nfsstate *stp, *nstp;
2454 struct nfsclient *clp;
2455 struct nfsstate *ownerstp;
2456 struct nfslockfile *lfp, *new_lfp;
2457 int error = 0, haslock = 0, ret, readonly = 0, getfhret = 0;
2458
2459 if ((new_stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS)
2460 readonly = 1;
2461 /*
2462 * Check for restart conditions (client and server).
2463 */
2464 error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
2465 &new_stp->ls_stateid, 0);
2466 if (error)
2467 goto out;
2468
2469 /*
2470 * Check for state resource limit exceeded.
2471 * Technically this should be SMP protected, but the worst
2472 * case error is "out by one or two" on the count when it
2473 * returns NFSERR_RESOURCE and the limit is just a rather
2474 * arbitrary high water mark, so no harm is done.
2475 */
2476 if (nfsrv_openpluslock > nfsrv_v4statelimit) {
2477 error = NFSERR_RESOURCE;
2478 goto out;
2479 }
2480
2481 tryagain:
2482 new_lfp = malloc(sizeof (struct nfslockfile),
2483 M_NFSDLOCKFILE, M_WAITOK);
2484 if (vp)
2485 getfhret = nfsrv_getlockfh(vp, new_stp->ls_flags, new_lfp,
2486 NULL, p);
2487 NFSLOCKSTATE();
2488 /*
2489 * Get the nfsclient structure.
2490 */
2491 error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
2492 (nfsquad_t)((u_quad_t)0), 0, nd, p);
2493
2494 /*
2495 * Look up the open owner. See if it needs confirmation and
2496 * check the seq#, as required.
2497 */
2498 if (!error)
2499 nfsrv_getowner(&clp->lc_open, new_stp, &ownerstp);
2500
2501 if (!error && ownerstp) {
2502 error = nfsrv_checkseqid(nd, new_stp->ls_seq, ownerstp,
2503 new_stp->ls_op);
2504 /*
2505 * If the OpenOwner hasn't been confirmed, assume the
2506 * old one was a replay and this one is ok.
2507 * See: RFC3530 Sec. 14.2.18.
2508 */
2509 if (error == NFSERR_BADSEQID &&
2510 (ownerstp->ls_flags & NFSLCK_NEEDSCONFIRM))
2511 error = 0;
2512 }
2513
2514 /*
2515 * Check for grace.
2516 */
2517 if (!error)
2518 error = nfsrv_checkgrace(nd, clp, new_stp->ls_flags);
2519 if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error &&
2520 nfsrv_checkstable(clp))
2521 error = NFSERR_NOGRACE;
2522
2523 /*
2524 * If none of the above errors occurred, let repstat be
2525 * returned.
2526 */
2527 if (repstat && !error)
2528 error = repstat;
2529 if (error) {
2530 NFSUNLOCKSTATE();
2531 if (haslock) {
2532 NFSLOCKV4ROOTMUTEX();
2533 nfsv4_unlock(&nfsv4rootfs_lock, 1);
2534 NFSUNLOCKV4ROOTMUTEX();
2535 }
2536 free(new_lfp, M_NFSDLOCKFILE);
2537 goto out;
2538 }
2539
2540 /*
2541 * If vp == NULL, the file doesn't exist yet, so return ok.
2542 * (This always happens on the first pass, so haslock must be 0.)
2543 */
2544 if (vp == NULL) {
2545 NFSUNLOCKSTATE();
2546 free(new_lfp, M_NFSDLOCKFILE);
2547 goto out;
2548 }
2549
2550 /*
2551 * Get the structure for the underlying file.
2552 */
2553 if (getfhret)
2554 error = getfhret;
2555 else
2556 error = nfsrv_getlockfile(new_stp->ls_flags, &new_lfp, &lfp,
2557 NULL, 0);
2558 if (new_lfp)
2559 free(new_lfp, M_NFSDLOCKFILE);
2560 if (error) {
2561 NFSUNLOCKSTATE();
2562 if (haslock) {
2563 NFSLOCKV4ROOTMUTEX();
2564 nfsv4_unlock(&nfsv4rootfs_lock, 1);
2565 NFSUNLOCKV4ROOTMUTEX();
2566 }
2567 goto out;
2568 }
2569
2570 /*
2571 * Search for a conflicting open/share.
2572 */
2573 if (new_stp->ls_flags & NFSLCK_DELEGCUR) {
2574 /*
2575 * For Delegate_Cur, search for the matching Delegation,
2576 * which indicates no conflict.
2577 * An old delegation should have been recovered by the
2578 * client doing a Claim_DELEGATE_Prev, so I won't let
2579 * it match and return NFSERR_EXPIRED. Should I let it
2580 * match?
2581 */
2582 LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
2583 if (!(stp->ls_flags & NFSLCK_OLDDELEG) &&
2584 (((nd->nd_flag & ND_NFSV41) != 0 &&
2585 stateidp->seqid == 0) ||
2586 stateidp->seqid == stp->ls_stateid.seqid) &&
2587 !NFSBCMP(stateidp->other, stp->ls_stateid.other,
2588 NFSX_STATEIDOTHER))
2589 break;
2590 }
2591 if (stp == LIST_END(&lfp->lf_deleg) ||
2592 ((new_stp->ls_flags & NFSLCK_WRITEACCESS) &&
2593 (stp->ls_flags & NFSLCK_DELEGREAD))) {
2594 NFSUNLOCKSTATE();
2595 if (haslock) {
2596 NFSLOCKV4ROOTMUTEX();
2597 nfsv4_unlock(&nfsv4rootfs_lock, 1);
2598 NFSUNLOCKV4ROOTMUTEX();
2599 }
2600 error = NFSERR_EXPIRED;
2601 goto out;
2602 }
2603 }
2604
2605 /*
2606 * Check for access/deny bit conflicts. I check for the same
2607 * owner as well, in case the client didn't bother.
2608 */
2609 LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
2610 if (!(new_stp->ls_flags & NFSLCK_DELEGCUR) &&
2611 (((new_stp->ls_flags & NFSLCK_ACCESSBITS) &
2612 ((stp->ls_flags>>NFSLCK_SHIFT) & NFSLCK_ACCESSBITS))||
2613 ((stp->ls_flags & NFSLCK_ACCESSBITS) &
2614 ((new_stp->ls_flags>>NFSLCK_SHIFT)&NFSLCK_ACCESSBITS)))){
2615 ret = nfsrv_clientconflict(stp->ls_clp,&haslock,vp,p);
2616 if (ret == 1) {
2617 /*
2618 * nfsrv_clientconflict() unlocks
2619 * state when it returns non-zero.
2620 */
2621 goto tryagain;
2622 }
2623 if (ret == 2)
2624 error = NFSERR_PERM;
2625 else if (new_stp->ls_flags & NFSLCK_RECLAIM)
2626 error = NFSERR_RECLAIMCONFLICT;
2627 else
2628 error = NFSERR_SHAREDENIED;
2629 if (ret == 0)
2630 NFSUNLOCKSTATE();
2631 if (haslock) {
2632 NFSLOCKV4ROOTMUTEX();
2633 nfsv4_unlock(&nfsv4rootfs_lock, 1);
2634 NFSUNLOCKV4ROOTMUTEX();
2635 }
2636 goto out;
2637 }
2638 }
2639
2640 /*
2641 * Check for a conflicting delegation. If one is found, call
2642 * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
2643 * been set yet, it will get the lock. Otherwise, it will recall
2644 * the delegation. Then, we try try again...
2645 * (If NFSLCK_DELEGCUR is set, it has a delegation, so there
2646 * isn't a conflict.)
2647 * I currently believe the conflict algorithm to be:
2648 * For Open with Read Access and Deny None
2649 * - there is a conflict iff a different client has a write delegation
2650 * For Open with other Write Access or any Deny except None
2651 * - there is a conflict if a different client has any delegation
2652 * - there is a conflict if the same client has a read delegation
2653 * (The current consensus is that this last case should be
2654 * considered a conflict since the client with a read delegation
2655 * could have done an Open with ReadAccess and WriteDeny
2656 * locally and then not have checked for the WriteDeny.)
2657 * The exception is a NFSv4.1/4.2 client that has requested
2658 * an atomic upgrade to a write delegation.
2659 * Don't check for a Reclaim, since that will be dealt with
2660 * by nfsrv_openctrl().
2661 */
2662 if (!(new_stp->ls_flags &
2663 (NFSLCK_DELEGPREV | NFSLCK_DELEGCUR | NFSLCK_RECLAIM))) {
2664 stp = LIST_FIRST(&lfp->lf_deleg);
2665 while (stp != LIST_END(&lfp->lf_deleg)) {
2666 nstp = LIST_NEXT(stp, ls_file);
2667 if ((readonly && stp->ls_clp != clp &&
2668 (stp->ls_flags & NFSLCK_DELEGWRITE) != 0) ||
2669 (!readonly && (stp->ls_clp != clp ||
2670 ((stp->ls_flags & NFSLCK_DELEGREAD) != 0 &&
2671 (new_stp->ls_flags & NFSLCK_WANTWDELEG) == 0)))) {
2672 ret = nfsrv_delegconflict(stp, &haslock, p, vp);
2673 if (ret) {
2674 /*
2675 * nfsrv_delegconflict() unlocks state
2676 * when it returns non-zero.
2677 */
2678 if (ret == -1)
2679 goto tryagain;
2680 error = ret;
2681 goto out;
2682 }
2683 }
2684 stp = nstp;
2685 }
2686 }
2687 NFSUNLOCKSTATE();
2688 if (haslock) {
2689 NFSLOCKV4ROOTMUTEX();
2690 nfsv4_unlock(&nfsv4rootfs_lock, 1);
2691 NFSUNLOCKV4ROOTMUTEX();
2692 }
2693
2694 out:
2695 NFSEXITCODE2(error, nd);
2696 return (error);
2697 }
2698
2699 /*
2700 * Open control function to create/update open state for an open.
2701 */
2702 int
nfsrv_openctrl(struct nfsrv_descript * nd,vnode_t vp,struct nfsstate ** new_stpp,nfsquad_t clientid,nfsv4stateid_t * stateidp,nfsv4stateid_t * delegstateidp,u_int32_t * rflagsp,struct nfsexstuff * exp,NFSPROC_T * p,u_quad_t filerev)2703 nfsrv_openctrl(struct nfsrv_descript *nd, vnode_t vp,
2704 struct nfsstate **new_stpp, nfsquad_t clientid, nfsv4stateid_t *stateidp,
2705 nfsv4stateid_t *delegstateidp, u_int32_t *rflagsp, struct nfsexstuff *exp,
2706 NFSPROC_T *p, u_quad_t filerev)
2707 {
2708 struct nfsstate *new_stp = *new_stpp;
2709 struct nfsstate *stp, *nstp;
2710 struct nfsstate *openstp = NULL, *new_open, *ownerstp, *new_deleg;
2711 struct nfslockfile *lfp, *new_lfp;
2712 struct nfsclient *clp;
2713 int error = 0, haslock = 0, ret, delegate = 1, writedeleg = 1;
2714 int readonly = 0, cbret = 1, getfhret = 0;
2715 int gotstate = 0, len = 0;
2716 u_char *clidp = NULL;
2717
2718 if ((new_stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS)
2719 readonly = 1;
2720 /*
2721 * Check for restart conditions (client and server).
2722 * (Paranoia, should have been detected by nfsrv_opencheck().)
2723 * If an error does show up, return NFSERR_EXPIRED, since the
2724 * the seqid# has already been incremented.
2725 */
2726 error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
2727 &new_stp->ls_stateid, 0);
2728 if (error) {
2729 printf("Nfsd: openctrl unexpected restart err=%d\n",
2730 error);
2731 error = NFSERR_EXPIRED;
2732 goto out;
2733 }
2734
2735 clidp = malloc(NFSV4_OPAQUELIMIT, M_TEMP, M_WAITOK);
2736 tryagain:
2737 new_lfp = malloc(sizeof (struct nfslockfile),
2738 M_NFSDLOCKFILE, M_WAITOK);
2739 new_open = malloc(sizeof (struct nfsstate),
2740 M_NFSDSTATE, M_WAITOK);
2741 new_deleg = malloc(sizeof (struct nfsstate),
2742 M_NFSDSTATE, M_WAITOK);
2743 getfhret = nfsrv_getlockfh(vp, new_stp->ls_flags, new_lfp,
2744 NULL, p);
2745 NFSLOCKSTATE();
2746 /*
2747 * Get the client structure. Since the linked lists could be changed
2748 * by other nfsd processes if this process does a tsleep(), one of
2749 * two things must be done.
2750 * 1 - don't tsleep()
2751 * or
2752 * 2 - get the nfsv4_lock() { indicated by haslock == 1 }
2753 * before using the lists, since this lock stops the other
2754 * nfsd. This should only be used for rare cases, since it
2755 * essentially single threads the nfsd.
2756 * At this time, it is only done for cases where the stable
2757 * storage file must be written prior to completion of state
2758 * expiration.
2759 */
2760 error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
2761 (nfsquad_t)((u_quad_t)0), 0, nd, p);
2762 if (!error && (clp->lc_flags & LCL_NEEDSCBNULL) &&
2763 clp->lc_program) {
2764 /*
2765 * This happens on the first open for a client
2766 * that supports callbacks.
2767 */
2768 NFSUNLOCKSTATE();
2769 /*
2770 * Although nfsrv_docallback() will sleep, clp won't
2771 * go away, since they are only removed when the
2772 * nfsv4_lock() has blocked the nfsd threads. The
2773 * fields in clp can change, but having multiple
2774 * threads do this Null callback RPC should be
2775 * harmless.
2776 */
2777 cbret = nfsrv_docallback(clp, NFSV4PROC_CBNULL,
2778 NULL, 0, NULL, NULL, NULL, 0, p);
2779 NFSLOCKSTATE();
2780 clp->lc_flags &= ~LCL_NEEDSCBNULL;
2781 if (!cbret)
2782 clp->lc_flags |= LCL_CALLBACKSON;
2783 }
2784
2785 /*
2786 * Look up the open owner. See if it needs confirmation and
2787 * check the seq#, as required.
2788 */
2789 if (!error)
2790 nfsrv_getowner(&clp->lc_open, new_stp, &ownerstp);
2791
2792 if (error) {
2793 NFSUNLOCKSTATE();
2794 printf("Nfsd: openctrl unexpected state err=%d\n",
2795 error);
2796 free(new_lfp, M_NFSDLOCKFILE);
2797 free(new_open, M_NFSDSTATE);
2798 free(new_deleg, M_NFSDSTATE);
2799 if (haslock) {
2800 NFSLOCKV4ROOTMUTEX();
2801 nfsv4_unlock(&nfsv4rootfs_lock, 1);
2802 NFSUNLOCKV4ROOTMUTEX();
2803 }
2804 error = NFSERR_EXPIRED;
2805 goto out;
2806 }
2807
2808 if (new_stp->ls_flags & NFSLCK_RECLAIM)
2809 nfsrv_markstable(clp);
2810
2811 /*
2812 * Get the structure for the underlying file.
2813 */
2814 if (getfhret)
2815 error = getfhret;
2816 else
2817 error = nfsrv_getlockfile(new_stp->ls_flags, &new_lfp, &lfp,
2818 NULL, 0);
2819 if (new_lfp)
2820 free(new_lfp, M_NFSDLOCKFILE);
2821 if (error) {
2822 NFSUNLOCKSTATE();
2823 printf("Nfsd openctrl unexpected getlockfile err=%d\n",
2824 error);
2825 free(new_open, M_NFSDSTATE);
2826 free(new_deleg, M_NFSDSTATE);
2827 if (haslock) {
2828 NFSLOCKV4ROOTMUTEX();
2829 nfsv4_unlock(&nfsv4rootfs_lock, 1);
2830 NFSUNLOCKV4ROOTMUTEX();
2831 }
2832 goto out;
2833 }
2834
2835 /*
2836 * Search for a conflicting open/share.
2837 */
2838 if (new_stp->ls_flags & NFSLCK_DELEGCUR) {
2839 /*
2840 * For Delegate_Cur, search for the matching Delegation,
2841 * which indicates no conflict.
2842 * An old delegation should have been recovered by the
2843 * client doing a Claim_DELEGATE_Prev, so I won't let
2844 * it match and return NFSERR_EXPIRED. Should I let it
2845 * match?
2846 */
2847 LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
2848 if (!(stp->ls_flags & NFSLCK_OLDDELEG) &&
2849 (((nd->nd_flag & ND_NFSV41) != 0 &&
2850 stateidp->seqid == 0) ||
2851 stateidp->seqid == stp->ls_stateid.seqid) &&
2852 !NFSBCMP(stateidp->other, stp->ls_stateid.other,
2853 NFSX_STATEIDOTHER))
2854 break;
2855 }
2856 if (stp == LIST_END(&lfp->lf_deleg) ||
2857 ((new_stp->ls_flags & NFSLCK_WRITEACCESS) &&
2858 (stp->ls_flags & NFSLCK_DELEGREAD))) {
2859 NFSUNLOCKSTATE();
2860 printf("Nfsd openctrl unexpected expiry\n");
2861 free(new_open, M_NFSDSTATE);
2862 free(new_deleg, M_NFSDSTATE);
2863 if (haslock) {
2864 NFSLOCKV4ROOTMUTEX();
2865 nfsv4_unlock(&nfsv4rootfs_lock, 1);
2866 NFSUNLOCKV4ROOTMUTEX();
2867 }
2868 error = NFSERR_EXPIRED;
2869 goto out;
2870 }
2871
2872 /*
2873 * Don't issue a Delegation, since one already exists and
2874 * delay delegation timeout, as required.
2875 */
2876 delegate = 0;
2877 nfsrv_delaydelegtimeout(stp);
2878 }
2879
2880 /*
2881 * Check for access/deny bit conflicts. I also check for the
2882 * same owner, since the client might not have bothered to check.
2883 * Also, note an open for the same file and owner, if found,
2884 * which is all we do here for Delegate_Cur, since conflict
2885 * checking is already done.
2886 */
2887 LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
2888 if (ownerstp && stp->ls_openowner == ownerstp)
2889 openstp = stp;
2890 if (!(new_stp->ls_flags & NFSLCK_DELEGCUR)) {
2891 /*
2892 * If another client has the file open, the only
2893 * delegation that can be issued is a Read delegation
2894 * and only if it is a Read open with Deny none.
2895 */
2896 if (clp != stp->ls_clp) {
2897 if ((stp->ls_flags & NFSLCK_SHAREBITS) ==
2898 NFSLCK_READACCESS)
2899 writedeleg = 0;
2900 else
2901 delegate = 0;
2902 }
2903 if(((new_stp->ls_flags & NFSLCK_ACCESSBITS) &
2904 ((stp->ls_flags>>NFSLCK_SHIFT) & NFSLCK_ACCESSBITS))||
2905 ((stp->ls_flags & NFSLCK_ACCESSBITS) &
2906 ((new_stp->ls_flags>>NFSLCK_SHIFT)&NFSLCK_ACCESSBITS))){
2907 ret = nfsrv_clientconflict(stp->ls_clp,&haslock,vp,p);
2908 if (ret == 1) {
2909 /*
2910 * nfsrv_clientconflict() unlocks state
2911 * when it returns non-zero.
2912 */
2913 free(new_open, M_NFSDSTATE);
2914 free(new_deleg, M_NFSDSTATE);
2915 openstp = NULL;
2916 goto tryagain;
2917 }
2918 if (ret == 2)
2919 error = NFSERR_PERM;
2920 else if (new_stp->ls_flags & NFSLCK_RECLAIM)
2921 error = NFSERR_RECLAIMCONFLICT;
2922 else
2923 error = NFSERR_SHAREDENIED;
2924 if (ret == 0)
2925 NFSUNLOCKSTATE();
2926 if (haslock) {
2927 NFSLOCKV4ROOTMUTEX();
2928 nfsv4_unlock(&nfsv4rootfs_lock, 1);
2929 NFSUNLOCKV4ROOTMUTEX();
2930 }
2931 free(new_open, M_NFSDSTATE);
2932 free(new_deleg, M_NFSDSTATE);
2933 printf("nfsd openctrl unexpected client cnfl\n");
2934 goto out;
2935 }
2936 }
2937 }
2938
2939 /*
2940 * Check for a conflicting delegation. If one is found, call
2941 * nfsrv_delegconflict() to handle it. If the v4root lock hasn't
2942 * been set yet, it will get the lock. Otherwise, it will recall
2943 * the delegation. Then, we try try again...
2944 * (If NFSLCK_DELEGCUR is set, it has a delegation, so there
2945 * isn't a conflict.)
2946 * I currently believe the conflict algorithm to be:
2947 * For Open with Read Access and Deny None
2948 * - there is a conflict iff a different client has a write delegation
2949 * For Open with other Write Access or any Deny except None
2950 * - there is a conflict if a different client has any delegation
2951 * - there is a conflict if the same client has a read delegation
2952 * (The current consensus is that this last case should be
2953 * considered a conflict since the client with a read delegation
2954 * could have done an Open with ReadAccess and WriteDeny
2955 * locally and then not have checked for the WriteDeny.)
2956 * The exception is a NFSv4.1/4.2 client that has requested
2957 * an atomic upgrade to a write delegation.
2958 */
2959 if (!(new_stp->ls_flags & (NFSLCK_DELEGPREV | NFSLCK_DELEGCUR))) {
2960 stp = LIST_FIRST(&lfp->lf_deleg);
2961 while (stp != LIST_END(&lfp->lf_deleg)) {
2962 nstp = LIST_NEXT(stp, ls_file);
2963 if (stp->ls_clp != clp && (stp->ls_flags & NFSLCK_DELEGREAD))
2964 writedeleg = 0;
2965 else if (stp->ls_clp != clp ||
2966 (stp->ls_flags & NFSLCK_DELEGWRITE) != 0 ||
2967 (new_stp->ls_flags & NFSLCK_WANTWDELEG) == 0)
2968 delegate = 0;
2969 if ((readonly && stp->ls_clp != clp &&
2970 (stp->ls_flags & NFSLCK_DELEGWRITE) != 0) ||
2971 (!readonly && (stp->ls_clp != clp ||
2972 ((stp->ls_flags & NFSLCK_DELEGREAD) != 0 &&
2973 (new_stp->ls_flags & NFSLCK_WANTWDELEG) == 0)))) {
2974 if (new_stp->ls_flags & NFSLCK_RECLAIM) {
2975 delegate = 2;
2976 } else {
2977 ret = nfsrv_delegconflict(stp, &haslock, p, vp);
2978 if (ret) {
2979 /*
2980 * nfsrv_delegconflict() unlocks state
2981 * when it returns non-zero.
2982 */
2983 printf("Nfsd openctrl unexpected deleg cnfl\n");
2984 free(new_open, M_NFSDSTATE);
2985 free(new_deleg, M_NFSDSTATE);
2986 if (ret == -1) {
2987 openstp = NULL;
2988 goto tryagain;
2989 }
2990 error = ret;
2991 goto out;
2992 }
2993 }
2994 }
2995 stp = nstp;
2996 }
2997 }
2998
2999 /*
3000 * We only get here if there was no open that conflicted.
3001 * If an open for the owner exists, or in the access/deny bits.
3002 * Otherwise it is a new open. If the open_owner hasn't been
3003 * confirmed, replace the open with the new one needing confirmation,
3004 * otherwise add the open.
3005 */
3006 if (new_stp->ls_flags & NFSLCK_DELEGPREV) {
3007 /*
3008 * Handle NFSLCK_DELEGPREV by searching the old delegations for
3009 * a match. If found, just move the old delegation to the current
3010 * delegation list and issue open. If not found, return
3011 * NFSERR_EXPIRED.
3012 */
3013 LIST_FOREACH(stp, &clp->lc_olddeleg, ls_list) {
3014 if (stp->ls_lfp == lfp) {
3015 /* Found it */
3016 if (stp->ls_clp != clp)
3017 panic("olddeleg clp");
3018 LIST_REMOVE(stp, ls_list);
3019 LIST_REMOVE(stp, ls_hash);
3020 stp->ls_flags &= ~NFSLCK_OLDDELEG;
3021 stp->ls_stateid.seqid = delegstateidp->seqid = 1;
3022 stp->ls_stateid.other[0] = delegstateidp->other[0] =
3023 clp->lc_clientid.lval[0];
3024 stp->ls_stateid.other[1] = delegstateidp->other[1] =
3025 clp->lc_clientid.lval[1];
3026 stp->ls_stateid.other[2] = delegstateidp->other[2] =
3027 nfsrv_nextstateindex(clp);
3028 stp->ls_compref = nd->nd_compref;
3029 LIST_INSERT_HEAD(&clp->lc_deleg, stp, ls_list);
3030 LIST_INSERT_HEAD(NFSSTATEHASH(clp,
3031 stp->ls_stateid), stp, ls_hash);
3032 if (stp->ls_flags & NFSLCK_DELEGWRITE)
3033 *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
3034 else
3035 *rflagsp |= NFSV4OPEN_READDELEGATE;
3036 clp->lc_delegtime = NFSD_MONOSEC +
3037 nfsrv_lease + NFSRV_LEASEDELTA;
3038
3039 /*
3040 * Now, do the associated open.
3041 */
3042 new_open->ls_stateid.seqid = 1;
3043 new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
3044 new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
3045 new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
3046 new_open->ls_flags = (new_stp->ls_flags&NFSLCK_DENYBITS)|
3047 NFSLCK_OPEN;
3048 if (stp->ls_flags & NFSLCK_DELEGWRITE)
3049 new_open->ls_flags |= (NFSLCK_READACCESS |
3050 NFSLCK_WRITEACCESS);
3051 else
3052 new_open->ls_flags |= NFSLCK_READACCESS;
3053 new_open->ls_uid = new_stp->ls_uid;
3054 new_open->ls_lfp = lfp;
3055 new_open->ls_clp = clp;
3056 LIST_INIT(&new_open->ls_open);
3057 LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
3058 LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
3059 new_open, ls_hash);
3060 /*
3061 * and handle the open owner
3062 */
3063 if (ownerstp) {
3064 new_open->ls_openowner = ownerstp;
3065 LIST_INSERT_HEAD(&ownerstp->ls_open,new_open,ls_list);
3066 } else {
3067 new_open->ls_openowner = new_stp;
3068 new_stp->ls_flags = 0;
3069 nfsrvd_refcache(new_stp->ls_op);
3070 new_stp->ls_noopens = 0;
3071 LIST_INIT(&new_stp->ls_open);
3072 LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
3073 LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
3074 *new_stpp = NULL;
3075 NFSD_VNET(nfsstatsv1_p)->srvopenowners++;
3076 nfsrv_openpluslock++;
3077 }
3078 openstp = new_open;
3079 new_open = NULL;
3080 NFSD_VNET(nfsstatsv1_p)->srvopens++;
3081 nfsrv_openpluslock++;
3082 break;
3083 }
3084 }
3085 if (stp == LIST_END(&clp->lc_olddeleg))
3086 error = NFSERR_EXPIRED;
3087 } else if (new_stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) {
3088 /*
3089 * Scan to see that no delegation for this client and file
3090 * doesn't already exist.
3091 * There also shouldn't yet be an Open for this file and
3092 * openowner.
3093 */
3094 LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
3095 if (stp->ls_clp == clp)
3096 break;
3097 }
3098 if (stp == LIST_END(&lfp->lf_deleg) && openstp == NULL) {
3099 /*
3100 * This is the Claim_Previous case with a delegation
3101 * type != Delegate_None.
3102 */
3103 /*
3104 * First, add the delegation. (Although we must issue the
3105 * delegation, we can also ask for an immediate return.)
3106 */
3107 new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1;
3108 new_deleg->ls_stateid.other[0] = delegstateidp->other[0] =
3109 clp->lc_clientid.lval[0];
3110 new_deleg->ls_stateid.other[1] = delegstateidp->other[1] =
3111 clp->lc_clientid.lval[1];
3112 new_deleg->ls_stateid.other[2] = delegstateidp->other[2] =
3113 nfsrv_nextstateindex(clp);
3114 if (new_stp->ls_flags & NFSLCK_DELEGWRITE) {
3115 new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
3116 NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
3117 *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
3118 nfsrv_writedelegcnt++;
3119 } else {
3120 new_deleg->ls_flags = (NFSLCK_DELEGREAD |
3121 NFSLCK_READACCESS);
3122 *rflagsp |= NFSV4OPEN_READDELEGATE;
3123 }
3124 new_deleg->ls_uid = new_stp->ls_uid;
3125 new_deleg->ls_lfp = lfp;
3126 new_deleg->ls_clp = clp;
3127 new_deleg->ls_filerev = filerev;
3128 new_deleg->ls_compref = nd->nd_compref;
3129 new_deleg->ls_lastrecall = 0;
3130 LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
3131 LIST_INSERT_HEAD(NFSSTATEHASH(clp,
3132 new_deleg->ls_stateid), new_deleg, ls_hash);
3133 LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
3134 new_deleg = NULL;
3135 if (delegate == 2 || nfsrv_issuedelegs == 0 ||
3136 (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
3137 LCL_CALLBACKSON ||
3138 NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) ||
3139 !NFSVNO_DELEGOK(vp))
3140 *rflagsp |= NFSV4OPEN_RECALL;
3141 NFSD_VNET(nfsstatsv1_p)->srvdelegates++;
3142 nfsrv_openpluslock++;
3143 nfsrv_delegatecnt++;
3144
3145 /*
3146 * Now, do the associated open.
3147 */
3148 new_open->ls_stateid.seqid = 1;
3149 new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
3150 new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
3151 new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
3152 new_open->ls_flags = (new_stp->ls_flags & NFSLCK_DENYBITS) |
3153 NFSLCK_OPEN;
3154 if (new_stp->ls_flags & NFSLCK_DELEGWRITE)
3155 new_open->ls_flags |= (NFSLCK_READACCESS |
3156 NFSLCK_WRITEACCESS);
3157 else
3158 new_open->ls_flags |= NFSLCK_READACCESS;
3159 new_open->ls_uid = new_stp->ls_uid;
3160 new_open->ls_lfp = lfp;
3161 new_open->ls_clp = clp;
3162 LIST_INIT(&new_open->ls_open);
3163 LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
3164 LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
3165 new_open, ls_hash);
3166 /*
3167 * and handle the open owner
3168 */
3169 if (ownerstp) {
3170 new_open->ls_openowner = ownerstp;
3171 LIST_INSERT_HEAD(&ownerstp->ls_open, new_open, ls_list);
3172 } else {
3173 new_open->ls_openowner = new_stp;
3174 new_stp->ls_flags = 0;
3175 nfsrvd_refcache(new_stp->ls_op);
3176 new_stp->ls_noopens = 0;
3177 LIST_INIT(&new_stp->ls_open);
3178 LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
3179 LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
3180 *new_stpp = NULL;
3181 NFSD_VNET(nfsstatsv1_p)->srvopenowners++;
3182 nfsrv_openpluslock++;
3183 }
3184 openstp = new_open;
3185 new_open = NULL;
3186 NFSD_VNET(nfsstatsv1_p)->srvopens++;
3187 nfsrv_openpluslock++;
3188 } else {
3189 error = NFSERR_RECLAIMCONFLICT;
3190 }
3191 } else if (ownerstp) {
3192 if (ownerstp->ls_flags & NFSLCK_NEEDSCONFIRM) {
3193 /* Replace the open */
3194 if (ownerstp->ls_op)
3195 nfsrvd_derefcache(ownerstp->ls_op);
3196 ownerstp->ls_op = new_stp->ls_op;
3197 nfsrvd_refcache(ownerstp->ls_op);
3198 ownerstp->ls_seq = new_stp->ls_seq;
3199 *rflagsp |= NFSV4OPEN_RESULTCONFIRM;
3200 stp = LIST_FIRST(&ownerstp->ls_open);
3201 stp->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS) |
3202 NFSLCK_OPEN;
3203 stp->ls_stateid.seqid = 1;
3204 stp->ls_uid = new_stp->ls_uid;
3205 if (lfp != stp->ls_lfp) {
3206 LIST_REMOVE(stp, ls_file);
3207 LIST_INSERT_HEAD(&lfp->lf_open, stp, ls_file);
3208 stp->ls_lfp = lfp;
3209 }
3210 openstp = stp;
3211 } else if (openstp) {
3212 openstp->ls_flags |= (new_stp->ls_flags & NFSLCK_SHAREBITS);
3213 openstp->ls_stateid.seqid++;
3214 if ((nd->nd_flag & ND_NFSV41) != 0 &&
3215 openstp->ls_stateid.seqid == 0)
3216 openstp->ls_stateid.seqid = 1;
3217
3218 /*
3219 * This is where we can choose to issue a delegation.
3220 */
3221 nfsrv_issuedelegation(vp, clp, nd, delegate, writedeleg,
3222 readonly, filerev, NFSVNO_EXRDONLY(exp), &new_deleg,
3223 new_stp, lfp, rflagsp, delegstateidp);
3224 } else {
3225 new_open->ls_stateid.seqid = 1;
3226 new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
3227 new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
3228 new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
3229 new_open->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS)|
3230 NFSLCK_OPEN;
3231 new_open->ls_uid = new_stp->ls_uid;
3232 new_open->ls_openowner = ownerstp;
3233 new_open->ls_lfp = lfp;
3234 new_open->ls_clp = clp;
3235 LIST_INIT(&new_open->ls_open);
3236 LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
3237 LIST_INSERT_HEAD(&ownerstp->ls_open, new_open, ls_list);
3238 LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
3239 new_open, ls_hash);
3240 openstp = new_open;
3241 new_open = NULL;
3242 NFSD_VNET(nfsstatsv1_p)->srvopens++;
3243 nfsrv_openpluslock++;
3244
3245 /*
3246 * This is where we can choose to issue a delegation.
3247 */
3248 nfsrv_issuedelegation(vp, clp, nd, delegate, writedeleg,
3249 readonly, filerev, NFSVNO_EXRDONLY(exp), &new_deleg,
3250 new_stp, lfp, rflagsp, delegstateidp);
3251 }
3252 } else {
3253 /*
3254 * New owner case. Start the open_owner sequence with a
3255 * Needs confirmation (unless a reclaim) and hang the
3256 * new open off it.
3257 */
3258 new_open->ls_stateid.seqid = 1;
3259 new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0];
3260 new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1];
3261 new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp);
3262 new_open->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS) |
3263 NFSLCK_OPEN;
3264 new_open->ls_uid = new_stp->ls_uid;
3265 LIST_INIT(&new_open->ls_open);
3266 new_open->ls_openowner = new_stp;
3267 new_open->ls_lfp = lfp;
3268 new_open->ls_clp = clp;
3269 LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file);
3270 if (new_stp->ls_flags & NFSLCK_RECLAIM) {
3271 new_stp->ls_flags = 0;
3272 } else if ((nd->nd_flag & ND_NFSV41) != 0) {
3273 /*
3274 * This is where we can choose to issue a delegation.
3275 */
3276 nfsrv_issuedelegation(vp, clp, nd, delegate, writedeleg,
3277 readonly, filerev, NFSVNO_EXRDONLY(exp), &new_deleg,
3278 new_stp, lfp, rflagsp, delegstateidp);
3279 /* NFSv4.1 never needs confirmation. */
3280 new_stp->ls_flags = 0;
3281
3282 /*
3283 * Since NFSv4.1 never does an OpenConfirm, the first
3284 * open state will be acquired here.
3285 */
3286 if (!(clp->lc_flags & LCL_STAMPEDSTABLE)) {
3287 clp->lc_flags |= LCL_STAMPEDSTABLE;
3288 len = clp->lc_idlen;
3289 NFSBCOPY(clp->lc_id, clidp, len);
3290 gotstate = 1;
3291 }
3292 } else {
3293 *rflagsp |= NFSV4OPEN_RESULTCONFIRM;
3294 new_stp->ls_flags = NFSLCK_NEEDSCONFIRM;
3295 }
3296 nfsrvd_refcache(new_stp->ls_op);
3297 new_stp->ls_noopens = 0;
3298 LIST_INIT(&new_stp->ls_open);
3299 LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list);
3300 LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list);
3301 LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid),
3302 new_open, ls_hash);
3303 openstp = new_open;
3304 new_open = NULL;
3305 *new_stpp = NULL;
3306 NFSD_VNET(nfsstatsv1_p)->srvopens++;
3307 nfsrv_openpluslock++;
3308 NFSD_VNET(nfsstatsv1_p)->srvopenowners++;
3309 nfsrv_openpluslock++;
3310 }
3311 if (!error) {
3312 stateidp->seqid = openstp->ls_stateid.seqid;
3313 stateidp->other[0] = openstp->ls_stateid.other[0];
3314 stateidp->other[1] = openstp->ls_stateid.other[1];
3315 stateidp->other[2] = openstp->ls_stateid.other[2];
3316 }
3317 NFSUNLOCKSTATE();
3318 if (haslock) {
3319 NFSLOCKV4ROOTMUTEX();
3320 nfsv4_unlock(&nfsv4rootfs_lock, 1);
3321 NFSUNLOCKV4ROOTMUTEX();
3322 }
3323 if (new_open)
3324 free(new_open, M_NFSDSTATE);
3325 if (new_deleg)
3326 free(new_deleg, M_NFSDSTATE);
3327
3328 /*
3329 * If the NFSv4.1 client just acquired its first open, write a timestamp
3330 * to the stable storage file.
3331 */
3332 if (gotstate != 0) {
3333 nfsrv_writestable(clidp, len, NFSNST_NEWSTATE, p);
3334 nfsrv_backupstable();
3335 }
3336
3337 out:
3338 free(clidp, M_TEMP);
3339 NFSEXITCODE2(error, nd);
3340 return (error);
3341 }
3342
3343 /*
3344 * Open update. Does the confirm, downgrade and close.
3345 */
3346 int
nfsrv_openupdate(vnode_t vp,struct nfsstate * new_stp,nfsquad_t clientid,nfsv4stateid_t * stateidp,struct nfsrv_descript * nd,NFSPROC_T * p,int * retwriteaccessp)3347 nfsrv_openupdate(vnode_t vp, struct nfsstate *new_stp, nfsquad_t clientid,
3348 nfsv4stateid_t *stateidp, struct nfsrv_descript *nd, NFSPROC_T *p,
3349 int *retwriteaccessp)
3350 {
3351 struct nfsstate *stp;
3352 struct nfsclient *clp;
3353 struct nfslockfile *lfp;
3354 u_int32_t bits;
3355 int error = 0, gotstate = 0, len = 0;
3356 u_char *clidp = NULL;
3357
3358 /*
3359 * Check for restart conditions (client and server).
3360 */
3361 error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
3362 &new_stp->ls_stateid, 0);
3363 if (error)
3364 goto out;
3365
3366 clidp = malloc(NFSV4_OPAQUELIMIT, M_TEMP, M_WAITOK);
3367 NFSLOCKSTATE();
3368 /*
3369 * Get the open structure via clientid and stateid.
3370 */
3371 error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
3372 (nfsquad_t)((u_quad_t)0), 0, nd, p);
3373 if (!error)
3374 error = nfsrv_getstate(clp, &new_stp->ls_stateid,
3375 new_stp->ls_flags, &stp);
3376
3377 /*
3378 * Sanity check the open.
3379 */
3380 if (!error && (!(stp->ls_flags & NFSLCK_OPEN) ||
3381 (!(new_stp->ls_flags & NFSLCK_CONFIRM) &&
3382 (stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM)) ||
3383 ((new_stp->ls_flags & NFSLCK_CONFIRM) &&
3384 (!(stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM)))))
3385 error = NFSERR_BADSTATEID;
3386
3387 if (!error)
3388 error = nfsrv_checkseqid(nd, new_stp->ls_seq,
3389 stp->ls_openowner, new_stp->ls_op);
3390 if (!error && stp->ls_stateid.seqid != new_stp->ls_stateid.seqid &&
3391 (((nd->nd_flag & ND_NFSV41) == 0 &&
3392 !(new_stp->ls_flags & NFSLCK_CONFIRM)) ||
3393 ((nd->nd_flag & ND_NFSV41) != 0 &&
3394 new_stp->ls_stateid.seqid != 0)))
3395 error = NFSERR_OLDSTATEID;
3396 if (!error && vp->v_type != VREG) {
3397 if (vp->v_type == VDIR)
3398 error = NFSERR_ISDIR;
3399 else
3400 error = NFSERR_INVAL;
3401 }
3402
3403 if (error) {
3404 /*
3405 * If a client tries to confirm an Open with a bad
3406 * seqid# and there are no byte range locks or other Opens
3407 * on the openowner, just throw it away, so the next use of the
3408 * openowner will start a fresh seq#.
3409 */
3410 if (error == NFSERR_BADSEQID &&
3411 (new_stp->ls_flags & NFSLCK_CONFIRM) &&
3412 nfsrv_nootherstate(stp))
3413 nfsrv_freeopenowner(stp->ls_openowner, 0, p);
3414 NFSUNLOCKSTATE();
3415 goto out;
3416 }
3417
3418 /*
3419 * Set the return stateid.
3420 */
3421 stateidp->seqid = stp->ls_stateid.seqid + 1;
3422 if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0)
3423 stateidp->seqid = 1;
3424 stateidp->other[0] = stp->ls_stateid.other[0];
3425 stateidp->other[1] = stp->ls_stateid.other[1];
3426 stateidp->other[2] = stp->ls_stateid.other[2];
3427 /*
3428 * Now, handle the three cases.
3429 */
3430 if (new_stp->ls_flags & NFSLCK_CONFIRM) {
3431 /*
3432 * If the open doesn't need confirmation, it seems to me that
3433 * there is a client error, but I'll just log it and keep going?
3434 */
3435 if (!(stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM))
3436 printf("Nfsv4d: stray open confirm\n");
3437 stp->ls_openowner->ls_flags = 0;
3438 stp->ls_stateid.seqid++;
3439 if ((nd->nd_flag & ND_NFSV41) != 0 &&
3440 stp->ls_stateid.seqid == 0)
3441 stp->ls_stateid.seqid = 1;
3442 if (!(clp->lc_flags & LCL_STAMPEDSTABLE)) {
3443 clp->lc_flags |= LCL_STAMPEDSTABLE;
3444 len = clp->lc_idlen;
3445 NFSBCOPY(clp->lc_id, clidp, len);
3446 gotstate = 1;
3447 }
3448 NFSUNLOCKSTATE();
3449 } else if (new_stp->ls_flags & NFSLCK_CLOSE) {
3450 lfp = stp->ls_lfp;
3451 if (retwriteaccessp != NULL) {
3452 if ((stp->ls_flags & NFSLCK_WRITEACCESS) != 0)
3453 *retwriteaccessp = 1;
3454 else
3455 *retwriteaccessp = 0;
3456 }
3457 if (nfsrv_dolocallocks != 0 && !LIST_EMPTY(&stp->ls_open)) {
3458 /* Get the lf lock */
3459 nfsrv_locklf(lfp);
3460 NFSUNLOCKSTATE();
3461 ASSERT_VOP_ELOCKED(vp, "nfsrv_openupdate");
3462 NFSVOPUNLOCK(vp);
3463 if (nfsrv_freeopen(stp, vp, 1, p) == 0) {
3464 NFSLOCKSTATE();
3465 nfsrv_unlocklf(lfp);
3466 NFSUNLOCKSTATE();
3467 }
3468 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
3469 } else {
3470 (void) nfsrv_freeopen(stp, NULL, 0, p);
3471 NFSUNLOCKSTATE();
3472 }
3473 } else {
3474 /*
3475 * Update the share bits, making sure that the new set are a
3476 * subset of the old ones.
3477 */
3478 bits = (new_stp->ls_flags & NFSLCK_SHAREBITS);
3479 if (~(stp->ls_flags) & bits) {
3480 NFSUNLOCKSTATE();
3481 error = NFSERR_INVAL;
3482 goto out;
3483 }
3484 stp->ls_flags = (bits | NFSLCK_OPEN);
3485 stp->ls_stateid.seqid++;
3486 if ((nd->nd_flag & ND_NFSV41) != 0 &&
3487 stp->ls_stateid.seqid == 0)
3488 stp->ls_stateid.seqid = 1;
3489 NFSUNLOCKSTATE();
3490 }
3491
3492 /*
3493 * If the client just confirmed its first open, write a timestamp
3494 * to the stable storage file.
3495 */
3496 if (gotstate != 0) {
3497 nfsrv_writestable(clidp, len, NFSNST_NEWSTATE, p);
3498 nfsrv_backupstable();
3499 }
3500
3501 out:
3502 free(clidp, M_TEMP);
3503 NFSEXITCODE2(error, nd);
3504 return (error);
3505 }
3506
3507 /*
3508 * Delegation update. Does the purge and return.
3509 */
3510 int
nfsrv_delegupdate(struct nfsrv_descript * nd,nfsquad_t clientid,nfsv4stateid_t * stateidp,vnode_t vp,int op,struct ucred * cred,NFSPROC_T * p,int * retwriteaccessp)3511 nfsrv_delegupdate(struct nfsrv_descript *nd, nfsquad_t clientid,
3512 nfsv4stateid_t *stateidp, vnode_t vp, int op, struct ucred *cred,
3513 NFSPROC_T *p, int *retwriteaccessp)
3514 {
3515 struct nfsstate *stp;
3516 struct nfsclient *clp;
3517 int error = 0;
3518 fhandle_t fh;
3519
3520 /*
3521 * Do a sanity check against the file handle for DelegReturn.
3522 */
3523 if (vp) {
3524 error = nfsvno_getfh(vp, &fh, p);
3525 if (error)
3526 goto out;
3527 }
3528 /*
3529 * Check for restart conditions (client and server).
3530 */
3531 if (op == NFSV4OP_DELEGRETURN)
3532 error = nfsrv_checkrestart(clientid, NFSLCK_DELEGRETURN,
3533 stateidp, 0);
3534 else
3535 error = nfsrv_checkrestart(clientid, NFSLCK_DELEGPURGE,
3536 stateidp, 0);
3537
3538 NFSLOCKSTATE();
3539 /*
3540 * Get the open structure via clientid and stateid.
3541 */
3542 if (!error)
3543 error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
3544 (nfsquad_t)((u_quad_t)0), 0, nd, p);
3545 if (error) {
3546 if (error == NFSERR_CBPATHDOWN)
3547 error = 0;
3548 if (error == NFSERR_STALECLIENTID && op == NFSV4OP_DELEGRETURN)
3549 error = NFSERR_STALESTATEID;
3550 }
3551 if (!error && op == NFSV4OP_DELEGRETURN) {
3552 error = nfsrv_getstate(clp, stateidp, NFSLCK_DELEGRETURN, &stp);
3553 if (!error && stp->ls_stateid.seqid != stateidp->seqid &&
3554 ((nd->nd_flag & ND_NFSV41) == 0 || stateidp->seqid != 0))
3555 error = NFSERR_OLDSTATEID;
3556 }
3557 /*
3558 * NFSERR_EXPIRED means that the state has gone away,
3559 * so Delegations have been purged. Just return ok.
3560 */
3561 if (error == NFSERR_EXPIRED && op == NFSV4OP_DELEGPURGE) {
3562 NFSUNLOCKSTATE();
3563 error = 0;
3564 goto out;
3565 }
3566 if (error) {
3567 NFSUNLOCKSTATE();
3568 goto out;
3569 }
3570
3571 if (op == NFSV4OP_DELEGRETURN) {
3572 if (NFSBCMP((caddr_t)&fh, (caddr_t)&stp->ls_lfp->lf_fh,
3573 sizeof (fhandle_t))) {
3574 NFSUNLOCKSTATE();
3575 error = NFSERR_BADSTATEID;
3576 goto out;
3577 }
3578 if (retwriteaccessp != NULL) {
3579 if ((stp->ls_flags & NFSLCK_DELEGWRITE) != 0)
3580 *retwriteaccessp = 1;
3581 else
3582 *retwriteaccessp = 0;
3583 }
3584 nfsrv_freedeleg(stp);
3585 } else {
3586 nfsrv_freedeleglist(&clp->lc_olddeleg);
3587 }
3588 NFSUNLOCKSTATE();
3589 error = 0;
3590
3591 out:
3592 NFSEXITCODE(error);
3593 return (error);
3594 }
3595
3596 /*
3597 * Release lock owner.
3598 */
3599 int
nfsrv_releaselckown(struct nfsstate * new_stp,nfsquad_t clientid,NFSPROC_T * p)3600 nfsrv_releaselckown(struct nfsstate *new_stp, nfsquad_t clientid,
3601 NFSPROC_T *p)
3602 {
3603 struct nfsstate *stp, *nstp, *openstp, *ownstp;
3604 struct nfsclient *clp;
3605 int error = 0;
3606
3607 /*
3608 * Check for restart conditions (client and server).
3609 */
3610 error = nfsrv_checkrestart(clientid, new_stp->ls_flags,
3611 &new_stp->ls_stateid, 0);
3612 if (error)
3613 goto out;
3614
3615 NFSLOCKSTATE();
3616 /*
3617 * Get the lock owner by name.
3618 */
3619 error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
3620 (nfsquad_t)((u_quad_t)0), 0, NULL, p);
3621 if (error) {
3622 NFSUNLOCKSTATE();
3623 goto out;
3624 }
3625 LIST_FOREACH(ownstp, &clp->lc_open, ls_list) {
3626 LIST_FOREACH(openstp, &ownstp->ls_open, ls_list) {
3627 stp = LIST_FIRST(&openstp->ls_open);
3628 while (stp != LIST_END(&openstp->ls_open)) {
3629 nstp = LIST_NEXT(stp, ls_list);
3630 /*
3631 * If the owner matches, check for locks and
3632 * then free or return an error.
3633 */
3634 if (stp->ls_ownerlen == new_stp->ls_ownerlen &&
3635 !NFSBCMP(stp->ls_owner, new_stp->ls_owner,
3636 stp->ls_ownerlen)){
3637 if (LIST_EMPTY(&stp->ls_lock)) {
3638 nfsrv_freelockowner(stp, NULL, 0, p);
3639 } else {
3640 NFSUNLOCKSTATE();
3641 error = NFSERR_LOCKSHELD;
3642 goto out;
3643 }
3644 }
3645 stp = nstp;
3646 }
3647 }
3648 }
3649 NFSUNLOCKSTATE();
3650
3651 out:
3652 NFSEXITCODE(error);
3653 return (error);
3654 }
3655
3656 /*
3657 * Get the file handle for a lock structure.
3658 */
3659 static int
nfsrv_getlockfh(vnode_t vp,u_short flags,struct nfslockfile * new_lfp,fhandle_t * nfhp,NFSPROC_T * p)3660 nfsrv_getlockfh(vnode_t vp, u_short flags, struct nfslockfile *new_lfp,
3661 fhandle_t *nfhp, NFSPROC_T *p)
3662 {
3663 fhandle_t *fhp = NULL;
3664 int error;
3665
3666 /*
3667 * For lock, use the new nfslock structure, otherwise just
3668 * a fhandle_t on the stack.
3669 */
3670 if (flags & NFSLCK_OPEN) {
3671 KASSERT(new_lfp != NULL, ("nfsrv_getlockfh: new_lfp NULL"));
3672 fhp = &new_lfp->lf_fh;
3673 } else if (nfhp) {
3674 fhp = nfhp;
3675 } else {
3676 panic("nfsrv_getlockfh");
3677 }
3678 error = nfsvno_getfh(vp, fhp, p);
3679 NFSEXITCODE(error);
3680 return (error);
3681 }
3682
3683 /*
3684 * Get an nfs lock structure. Allocate one, as required, and return a
3685 * pointer to it.
3686 * Returns an NFSERR_xxx upon failure or -1 to indicate no current lock.
3687 */
3688 static int
nfsrv_getlockfile(u_short flags,struct nfslockfile ** new_lfpp,struct nfslockfile ** lfpp,fhandle_t * nfhp,int lockit)3689 nfsrv_getlockfile(u_short flags, struct nfslockfile **new_lfpp,
3690 struct nfslockfile **lfpp, fhandle_t *nfhp, int lockit)
3691 {
3692 struct nfslockfile *lfp;
3693 fhandle_t *fhp = NULL, *tfhp;
3694 struct nfslockhashhead *hp;
3695 struct nfslockfile *new_lfp = NULL;
3696
3697 /*
3698 * For lock, use the new nfslock structure, otherwise just
3699 * a fhandle_t on the stack.
3700 */
3701 if (flags & NFSLCK_OPEN) {
3702 new_lfp = *new_lfpp;
3703 fhp = &new_lfp->lf_fh;
3704 } else if (nfhp) {
3705 fhp = nfhp;
3706 } else {
3707 panic("nfsrv_getlockfile");
3708 }
3709
3710 hp = NFSLOCKHASH(fhp);
3711 LIST_FOREACH(lfp, hp, lf_hash) {
3712 tfhp = &lfp->lf_fh;
3713 if (NFSVNO_CMPFH(fhp, tfhp)) {
3714 if (lockit)
3715 nfsrv_locklf(lfp);
3716 *lfpp = lfp;
3717 return (0);
3718 }
3719 }
3720 if (!(flags & NFSLCK_OPEN))
3721 return (-1);
3722
3723 /*
3724 * No match, so chain the new one into the list.
3725 */
3726 LIST_INIT(&new_lfp->lf_open);
3727 LIST_INIT(&new_lfp->lf_lock);
3728 LIST_INIT(&new_lfp->lf_deleg);
3729 LIST_INIT(&new_lfp->lf_locallock);
3730 LIST_INIT(&new_lfp->lf_rollback);
3731 new_lfp->lf_locallock_lck.nfslock_usecnt = 0;
3732 new_lfp->lf_locallock_lck.nfslock_lock = 0;
3733 new_lfp->lf_usecount = 0;
3734 LIST_INSERT_HEAD(hp, new_lfp, lf_hash);
3735 *lfpp = new_lfp;
3736 *new_lfpp = NULL;
3737 return (0);
3738 }
3739
3740 /*
3741 * This function adds a nfslock lock structure to the list for the associated
3742 * nfsstate and nfslockfile structures. It will be inserted after the
3743 * entry pointed at by insert_lop.
3744 */
3745 static void
nfsrv_insertlock(struct nfslock * new_lop,struct nfslock * insert_lop,struct nfsstate * stp,struct nfslockfile * lfp)3746 nfsrv_insertlock(struct nfslock *new_lop, struct nfslock *insert_lop,
3747 struct nfsstate *stp, struct nfslockfile *lfp)
3748 {
3749 struct nfslock *lop, *nlop;
3750
3751 new_lop->lo_stp = stp;
3752 new_lop->lo_lfp = lfp;
3753
3754 if (stp != NULL) {
3755 /* Insert in increasing lo_first order */
3756 lop = LIST_FIRST(&lfp->lf_lock);
3757 if (lop == LIST_END(&lfp->lf_lock) ||
3758 new_lop->lo_first <= lop->lo_first) {
3759 LIST_INSERT_HEAD(&lfp->lf_lock, new_lop, lo_lckfile);
3760 } else {
3761 nlop = LIST_NEXT(lop, lo_lckfile);
3762 while (nlop != LIST_END(&lfp->lf_lock) &&
3763 nlop->lo_first < new_lop->lo_first) {
3764 lop = nlop;
3765 nlop = LIST_NEXT(lop, lo_lckfile);
3766 }
3767 LIST_INSERT_AFTER(lop, new_lop, lo_lckfile);
3768 }
3769 } else {
3770 new_lop->lo_lckfile.le_prev = NULL; /* list not used */
3771 }
3772
3773 /*
3774 * Insert after insert_lop, which is overloaded as stp or lfp for
3775 * an empty list.
3776 */
3777 if (stp == NULL && (struct nfslockfile *)insert_lop == lfp)
3778 LIST_INSERT_HEAD(&lfp->lf_locallock, new_lop, lo_lckowner);
3779 else if ((struct nfsstate *)insert_lop == stp)
3780 LIST_INSERT_HEAD(&stp->ls_lock, new_lop, lo_lckowner);
3781 else
3782 LIST_INSERT_AFTER(insert_lop, new_lop, lo_lckowner);
3783 if (stp != NULL) {
3784 NFSD_VNET(nfsstatsv1_p)->srvlocks++;
3785 nfsrv_openpluslock++;
3786 }
3787 }
3788
3789 /*
3790 * This function updates the locking for a lock owner and given file. It
3791 * maintains a list of lock ranges ordered on increasing file offset that
3792 * are NFSLCK_READ or NFSLCK_WRITE and non-overlapping (aka POSIX style).
3793 * It always adds new_lop to the list and sometimes uses the one pointed
3794 * at by other_lopp.
3795 */
3796 static void
nfsrv_updatelock(struct nfsstate * stp,struct nfslock ** new_lopp,struct nfslock ** other_lopp,struct nfslockfile * lfp)3797 nfsrv_updatelock(struct nfsstate *stp, struct nfslock **new_lopp,
3798 struct nfslock **other_lopp, struct nfslockfile *lfp)
3799 {
3800 struct nfslock *new_lop = *new_lopp;
3801 struct nfslock *lop, *tlop, *ilop;
3802 struct nfslock *other_lop = *other_lopp;
3803 int unlock = 0, myfile = 0;
3804 u_int64_t tmp;
3805
3806 /*
3807 * Work down the list until the lock is merged.
3808 */
3809 if (new_lop->lo_flags & NFSLCK_UNLOCK)
3810 unlock = 1;
3811 if (stp != NULL) {
3812 ilop = (struct nfslock *)stp;
3813 lop = LIST_FIRST(&stp->ls_lock);
3814 } else {
3815 ilop = (struct nfslock *)lfp;
3816 lop = LIST_FIRST(&lfp->lf_locallock);
3817 }
3818 while (lop != NULL) {
3819 /*
3820 * Only check locks for this file that aren't before the start of
3821 * new lock's range.
3822 */
3823 if (lop->lo_lfp == lfp) {
3824 myfile = 1;
3825 if (lop->lo_end >= new_lop->lo_first) {
3826 if (new_lop->lo_end < lop->lo_first) {
3827 /*
3828 * If the new lock ends before the start of the
3829 * current lock's range, no merge, just insert
3830 * the new lock.
3831 */
3832 break;
3833 }
3834 if (new_lop->lo_flags == lop->lo_flags ||
3835 (new_lop->lo_first <= lop->lo_first &&
3836 new_lop->lo_end >= lop->lo_end)) {
3837 /*
3838 * This lock can be absorbed by the new lock/unlock.
3839 * This happens when it covers the entire range
3840 * of the old lock or is contiguous
3841 * with the old lock and is of the same type or an
3842 * unlock.
3843 */
3844 if (lop->lo_first < new_lop->lo_first)
3845 new_lop->lo_first = lop->lo_first;
3846 if (lop->lo_end > new_lop->lo_end)
3847 new_lop->lo_end = lop->lo_end;
3848 tlop = lop;
3849 lop = LIST_NEXT(lop, lo_lckowner);
3850 nfsrv_freenfslock(tlop);
3851 continue;
3852 }
3853
3854 /*
3855 * All these cases are for contiguous locks that are not the
3856 * same type, so they can't be merged.
3857 */
3858 if (new_lop->lo_first <= lop->lo_first) {
3859 /*
3860 * This case is where the new lock overlaps with the
3861 * first part of the old lock. Move the start of the
3862 * old lock to just past the end of the new lock. The
3863 * new lock will be inserted in front of the old, since
3864 * ilop hasn't been updated. (We are done now.)
3865 */
3866 lop->lo_first = new_lop->lo_end;
3867 break;
3868 }
3869 if (new_lop->lo_end >= lop->lo_end) {
3870 /*
3871 * This case is where the new lock overlaps with the
3872 * end of the old lock's range. Move the old lock's
3873 * end to just before the new lock's first and insert
3874 * the new lock after the old lock.
3875 * Might not be done yet, since the new lock could
3876 * overlap further locks with higher ranges.
3877 */
3878 lop->lo_end = new_lop->lo_first;
3879 ilop = lop;
3880 lop = LIST_NEXT(lop, lo_lckowner);
3881 continue;
3882 }
3883 /*
3884 * The final case is where the new lock's range is in the
3885 * middle of the current lock's and splits the current lock
3886 * up. Use *other_lopp to handle the second part of the
3887 * split old lock range. (We are done now.)
3888 * For unlock, we use new_lop as other_lop and tmp, since
3889 * other_lop and new_lop are the same for this case.
3890 * We noted the unlock case above, so we don't need
3891 * new_lop->lo_flags any longer.
3892 */
3893 tmp = new_lop->lo_first;
3894 if (other_lop == NULL) {
3895 if (!unlock)
3896 panic("nfsd srv update unlock");
3897 other_lop = new_lop;
3898 *new_lopp = NULL;
3899 }
3900 other_lop->lo_first = new_lop->lo_end;
3901 other_lop->lo_end = lop->lo_end;
3902 other_lop->lo_flags = lop->lo_flags;
3903 other_lop->lo_stp = stp;
3904 other_lop->lo_lfp = lfp;
3905 lop->lo_end = tmp;
3906 nfsrv_insertlock(other_lop, lop, stp, lfp);
3907 *other_lopp = NULL;
3908 ilop = lop;
3909 break;
3910 }
3911 }
3912 ilop = lop;
3913 lop = LIST_NEXT(lop, lo_lckowner);
3914 if (myfile && (lop == NULL || lop->lo_lfp != lfp))
3915 break;
3916 }
3917
3918 /*
3919 * Insert the new lock in the list at the appropriate place.
3920 */
3921 if (!unlock) {
3922 nfsrv_insertlock(new_lop, ilop, stp, lfp);
3923 *new_lopp = NULL;
3924 }
3925 }
3926
3927 /*
3928 * This function handles sequencing of locks, etc.
3929 * It returns an error that indicates what the caller should do.
3930 */
3931 static int
nfsrv_checkseqid(struct nfsrv_descript * nd,u_int32_t seqid,struct nfsstate * stp,struct nfsrvcache * op)3932 nfsrv_checkseqid(struct nfsrv_descript *nd, u_int32_t seqid,
3933 struct nfsstate *stp, struct nfsrvcache *op)
3934 {
3935 int error = 0;
3936
3937 if ((nd->nd_flag & ND_NFSV41) != 0)
3938 /* NFSv4.1 ignores the open_seqid and lock_seqid. */
3939 goto out;
3940 if (op != nd->nd_rp)
3941 panic("nfsrvstate checkseqid");
3942 if (!(op->rc_flag & RC_INPROG))
3943 panic("nfsrvstate not inprog");
3944 if (stp->ls_op && stp->ls_op->rc_refcnt <= 0) {
3945 printf("refcnt=%d\n", stp->ls_op->rc_refcnt);
3946 panic("nfsrvstate op refcnt");
3947 }
3948
3949 /* If ND_ERELOOKUP is set, the seqid has already been handled. */
3950 if ((nd->nd_flag & ND_ERELOOKUP) != 0)
3951 goto out;
3952
3953 if ((stp->ls_seq + 1) == seqid) {
3954 if (stp->ls_op)
3955 nfsrvd_derefcache(stp->ls_op);
3956 stp->ls_op = op;
3957 nfsrvd_refcache(op);
3958 stp->ls_seq = seqid;
3959 goto out;
3960 } else if (stp->ls_seq == seqid && stp->ls_op &&
3961 op->rc_xid == stp->ls_op->rc_xid &&
3962 op->rc_refcnt == 0 &&
3963 op->rc_reqlen == stp->ls_op->rc_reqlen &&
3964 op->rc_cksum == stp->ls_op->rc_cksum) {
3965 if (stp->ls_op->rc_flag & RC_INPROG) {
3966 error = NFSERR_DONTREPLY;
3967 goto out;
3968 }
3969 nd->nd_rp = stp->ls_op;
3970 nd->nd_rp->rc_flag |= RC_INPROG;
3971 nfsrvd_delcache(op);
3972 error = NFSERR_REPLYFROMCACHE;
3973 goto out;
3974 }
3975 error = NFSERR_BADSEQID;
3976
3977 out:
3978 NFSEXITCODE2(error, nd);
3979 return (error);
3980 }
3981
3982 /*
3983 * Get the client ip address for callbacks. If the strings can't be parsed,
3984 * just set lc_program to 0 to indicate no callbacks are possible.
3985 * (For cases where the address can't be parsed or is 0.0.0.0.0.0, set
3986 * the address to the client's transport address. This won't be used
3987 * for callbacks, but can be printed out by nfsstats for info.)
3988 * Return error if the xdr can't be parsed, 0 otherwise.
3989 */
3990 int
nfsrv_getclientipaddr(struct nfsrv_descript * nd,struct nfsclient * clp)3991 nfsrv_getclientipaddr(struct nfsrv_descript *nd, struct nfsclient *clp)
3992 {
3993 u_int32_t *tl;
3994 u_char *cp, *cp2;
3995 int i, j, maxalen = 0, minalen = 0;
3996 sa_family_t af;
3997 #ifdef INET
3998 struct sockaddr_in *rin = NULL, *sin;
3999 #endif
4000 #ifdef INET6
4001 struct sockaddr_in6 *rin6 = NULL, *sin6;
4002 #endif
4003 u_char *addr;
4004 int error = 0, cantparse = 0;
4005 union {
4006 in_addr_t ival;
4007 u_char cval[4];
4008 } ip;
4009 union {
4010 in_port_t sval;
4011 u_char cval[2];
4012 } port;
4013
4014 /* 8 is the maximum length of the port# string. */
4015 addr = malloc(INET6_ADDRSTRLEN + 8, M_TEMP, M_WAITOK);
4016 clp->lc_req.nr_client = NULL;
4017 clp->lc_req.nr_lock = 0;
4018 af = AF_UNSPEC;
4019 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
4020 i = fxdr_unsigned(int, *tl);
4021 if (i >= 3 && i <= 4) {
4022 error = nfsrv_mtostr(nd, addr, i);
4023 if (error)
4024 goto nfsmout;
4025 #ifdef INET
4026 if (!strcmp(addr, "tcp")) {
4027 clp->lc_flags |= LCL_TCPCALLBACK;
4028 clp->lc_req.nr_sotype = SOCK_STREAM;
4029 clp->lc_req.nr_soproto = IPPROTO_TCP;
4030 af = AF_INET;
4031 } else if (!strcmp(addr, "udp")) {
4032 clp->lc_req.nr_sotype = SOCK_DGRAM;
4033 clp->lc_req.nr_soproto = IPPROTO_UDP;
4034 af = AF_INET;
4035 }
4036 #endif
4037 #ifdef INET6
4038 if (af == AF_UNSPEC) {
4039 if (!strcmp(addr, "tcp6")) {
4040 clp->lc_flags |= LCL_TCPCALLBACK;
4041 clp->lc_req.nr_sotype = SOCK_STREAM;
4042 clp->lc_req.nr_soproto = IPPROTO_TCP;
4043 af = AF_INET6;
4044 } else if (!strcmp(addr, "udp6")) {
4045 clp->lc_req.nr_sotype = SOCK_DGRAM;
4046 clp->lc_req.nr_soproto = IPPROTO_UDP;
4047 af = AF_INET6;
4048 }
4049 }
4050 #endif
4051 if (af == AF_UNSPEC) {
4052 cantparse = 1;
4053 }
4054 } else {
4055 cantparse = 1;
4056 if (i > 0) {
4057 error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
4058 if (error)
4059 goto nfsmout;
4060 }
4061 }
4062 /*
4063 * The caller has allocated clp->lc_req.nr_nam to be large enough
4064 * for either AF_INET or AF_INET6 and zeroed out the contents.
4065 * maxalen is set to the maximum length of the host IP address string
4066 * plus 8 for the maximum length of the port#.
4067 * minalen is set to the minimum length of the host IP address string
4068 * plus 4 for the minimum length of the port#.
4069 * These lengths do not include NULL termination,
4070 * so INET[6]_ADDRSTRLEN - 1 is used in the calculations.
4071 */
4072 switch (af) {
4073 #ifdef INET
4074 case AF_INET:
4075 rin = (struct sockaddr_in *)clp->lc_req.nr_nam;
4076 rin->sin_family = AF_INET;
4077 rin->sin_len = sizeof(struct sockaddr_in);
4078 maxalen = INET_ADDRSTRLEN - 1 + 8;
4079 minalen = 7 + 4;
4080 break;
4081 #endif
4082 #ifdef INET6
4083 case AF_INET6:
4084 rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam;
4085 rin6->sin6_family = AF_INET6;
4086 rin6->sin6_len = sizeof(struct sockaddr_in6);
4087 maxalen = INET6_ADDRSTRLEN - 1 + 8;
4088 minalen = 3 + 4;
4089 break;
4090 #endif
4091 }
4092 NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED);
4093 i = fxdr_unsigned(int, *tl);
4094 if (i < 0) {
4095 error = NFSERR_BADXDR;
4096 goto nfsmout;
4097 } else if (i == 0) {
4098 cantparse = 1;
4099 } else if (!cantparse && i <= maxalen && i >= minalen) {
4100 error = nfsrv_mtostr(nd, addr, i);
4101 if (error)
4102 goto nfsmout;
4103
4104 /*
4105 * Parse out the address fields. We expect 6 decimal numbers
4106 * separated by '.'s for AF_INET and two decimal numbers
4107 * preceeded by '.'s for AF_INET6.
4108 */
4109 cp = NULL;
4110 switch (af) {
4111 #ifdef INET6
4112 /*
4113 * For AF_INET6, first parse the host address.
4114 */
4115 case AF_INET6:
4116 cp = strchr(addr, '.');
4117 if (cp != NULL) {
4118 *cp++ = '\0';
4119 if (inet_pton(af, addr, &rin6->sin6_addr) == 1)
4120 i = 4;
4121 else {
4122 cp = NULL;
4123 cantparse = 1;
4124 }
4125 }
4126 break;
4127 #endif
4128 #ifdef INET
4129 case AF_INET:
4130 cp = addr;
4131 i = 0;
4132 break;
4133 #endif
4134 }
4135 while (cp != NULL && *cp && i < 6) {
4136 cp2 = cp;
4137 while (*cp2 && *cp2 != '.')
4138 cp2++;
4139 if (*cp2)
4140 *cp2++ = '\0';
4141 else if (i != 5) {
4142 cantparse = 1;
4143 break;
4144 }
4145 j = nfsrv_getipnumber(cp);
4146 if (j >= 0) {
4147 if (i < 4)
4148 ip.cval[3 - i] = j;
4149 else
4150 port.cval[5 - i] = j;
4151 } else {
4152 cantparse = 1;
4153 break;
4154 }
4155 cp = cp2;
4156 i++;
4157 }
4158 if (!cantparse) {
4159 /*
4160 * The host address INADDR_ANY is (mis)used to indicate
4161 * "there is no valid callback address".
4162 */
4163 switch (af) {
4164 #ifdef INET6
4165 case AF_INET6:
4166 if (!IN6_ARE_ADDR_EQUAL(&rin6->sin6_addr,
4167 &in6addr_any))
4168 rin6->sin6_port = htons(port.sval);
4169 else
4170 cantparse = 1;
4171 break;
4172 #endif
4173 #ifdef INET
4174 case AF_INET:
4175 if (ip.ival != INADDR_ANY) {
4176 rin->sin_addr.s_addr = htonl(ip.ival);
4177 rin->sin_port = htons(port.sval);
4178 } else {
4179 cantparse = 1;
4180 }
4181 break;
4182 #endif
4183 }
4184 }
4185 } else {
4186 cantparse = 1;
4187 if (i > 0) {
4188 error = nfsm_advance(nd, NFSM_RNDUP(i), -1);
4189 if (error)
4190 goto nfsmout;
4191 }
4192 }
4193 if (cantparse) {
4194 switch (nd->nd_nam->sa_family) {
4195 #ifdef INET
4196 case AF_INET:
4197 sin = (struct sockaddr_in *)nd->nd_nam;
4198 rin = (struct sockaddr_in *)clp->lc_req.nr_nam;
4199 rin->sin_family = AF_INET;
4200 rin->sin_len = sizeof(struct sockaddr_in);
4201 rin->sin_addr.s_addr = sin->sin_addr.s_addr;
4202 rin->sin_port = 0x0;
4203 break;
4204 #endif
4205 #ifdef INET6
4206 case AF_INET6:
4207 sin6 = (struct sockaddr_in6 *)nd->nd_nam;
4208 rin6 = (struct sockaddr_in6 *)clp->lc_req.nr_nam;
4209 rin6->sin6_family = AF_INET6;
4210 rin6->sin6_len = sizeof(struct sockaddr_in6);
4211 rin6->sin6_addr = sin6->sin6_addr;
4212 rin6->sin6_port = 0x0;
4213 break;
4214 #endif
4215 }
4216 clp->lc_program = 0;
4217 }
4218 nfsmout:
4219 free(addr, M_TEMP);
4220 NFSEXITCODE2(error, nd);
4221 return (error);
4222 }
4223
4224 /*
4225 * Turn a string of up to three decimal digits into a number. Return -1 upon
4226 * error.
4227 */
4228 static int
nfsrv_getipnumber(u_char * cp)4229 nfsrv_getipnumber(u_char *cp)
4230 {
4231 int i = 0, j = 0;
4232
4233 while (*cp) {
4234 if (j > 2 || *cp < '0' || *cp > '9')
4235 return (-1);
4236 i *= 10;
4237 i += (*cp - '0');
4238 cp++;
4239 j++;
4240 }
4241 if (i < 256)
4242 return (i);
4243 return (-1);
4244 }
4245
4246 /*
4247 * This function checks for restart conditions.
4248 */
4249 static int
nfsrv_checkrestart(nfsquad_t clientid,u_int32_t flags,nfsv4stateid_t * stateidp,int specialid)4250 nfsrv_checkrestart(nfsquad_t clientid, u_int32_t flags,
4251 nfsv4stateid_t *stateidp, int specialid)
4252 {
4253 int ret = 0;
4254
4255 /*
4256 * First check for a server restart. Open, LockT, ReleaseLockOwner
4257 * and DelegPurge have a clientid, the rest a stateid.
4258 */
4259 if (flags &
4260 (NFSLCK_OPEN | NFSLCK_TEST | NFSLCK_RELEASE | NFSLCK_DELEGPURGE)) {
4261 if (clientid.lval[0] != NFSD_VNET(nfsrvboottime)) {
4262 ret = NFSERR_STALECLIENTID;
4263 goto out;
4264 }
4265 } else if (stateidp->other[0] != NFSD_VNET(nfsrvboottime) &&
4266 specialid == 0) {
4267 ret = NFSERR_STALESTATEID;
4268 goto out;
4269 }
4270
4271 /*
4272 * Read, Write, Setattr and LockT can return NFSERR_GRACE and do
4273 * not use a lock/open owner seqid#, so the check can be done now.
4274 * (The others will be checked, as required, later.)
4275 */
4276 if (!(flags & (NFSLCK_CHECK | NFSLCK_TEST)))
4277 goto out;
4278
4279 NFSLOCKSTATE();
4280 ret = nfsrv_checkgrace(NULL, NULL, flags);
4281 NFSUNLOCKSTATE();
4282
4283 out:
4284 NFSEXITCODE(ret);
4285 return (ret);
4286 }
4287
4288 /*
4289 * Check for grace.
4290 */
4291 static int
nfsrv_checkgrace(struct nfsrv_descript * nd,struct nfsclient * clp,u_int32_t flags)4292 nfsrv_checkgrace(struct nfsrv_descript *nd, struct nfsclient *clp,
4293 u_int32_t flags)
4294 {
4295 int error = 0, notreclaimed;
4296 struct nfsrv_stable *sp;
4297
4298 if ((NFSD_VNET(nfsrv_stablefirst).nsf_flags & (NFSNSF_UPDATEDONE |
4299 NFSNSF_GRACEOVER)) == 0) {
4300 /*
4301 * First, check to see if all of the clients have done a
4302 * ReclaimComplete. If so, grace can end now.
4303 */
4304 notreclaimed = 0;
4305 LIST_FOREACH(sp, &NFSD_VNET(nfsrv_stablefirst).nsf_head,
4306 nst_list) {
4307 if ((sp->nst_flag & NFSNST_RECLAIMED) == 0) {
4308 notreclaimed = 1;
4309 break;
4310 }
4311 }
4312 if (notreclaimed == 0)
4313 NFSD_VNET(nfsrv_stablefirst).nsf_flags |=
4314 (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK);
4315 }
4316
4317 if ((NFSD_VNET(nfsrv_stablefirst).nsf_flags & NFSNSF_GRACEOVER) != 0) {
4318 if (flags & NFSLCK_RECLAIM) {
4319 error = NFSERR_NOGRACE;
4320 goto out;
4321 }
4322 } else {
4323 if (!(flags & NFSLCK_RECLAIM)) {
4324 error = NFSERR_GRACE;
4325 goto out;
4326 }
4327 if (nd != NULL && clp != NULL &&
4328 (nd->nd_flag & ND_NFSV41) != 0 &&
4329 (clp->lc_flags & LCL_RECLAIMCOMPLETE) != 0) {
4330 error = NFSERR_NOGRACE;
4331 goto out;
4332 }
4333
4334 /*
4335 * If grace is almost over and we are still getting Reclaims,
4336 * extend grace a bit.
4337 */
4338 if ((NFSD_MONOSEC + NFSRV_LEASEDELTA) >
4339 NFSD_VNET(nfsrv_stablefirst).nsf_eograce)
4340 NFSD_VNET(nfsrv_stablefirst).nsf_eograce =
4341 NFSD_MONOSEC + NFSRV_LEASEDELTA;
4342 }
4343
4344 out:
4345 NFSEXITCODE(error);
4346 return (error);
4347 }
4348
4349 /*
4350 * Do a server callback.
4351 * The "trunc" argument is slightly overloaded and refers to different
4352 * boolean arguments for CBRECALL and CBLAYOUTRECALL.
4353 */
4354 static int
nfsrv_docallback(struct nfsclient * clp,int procnum,nfsv4stateid_t * stateidp,int trunc,fhandle_t * fhp,struct nfsvattr * nap,nfsattrbit_t * attrbitp,int laytype,NFSPROC_T * p)4355 nfsrv_docallback(struct nfsclient *clp, int procnum, nfsv4stateid_t *stateidp,
4356 int trunc, fhandle_t *fhp, struct nfsvattr *nap, nfsattrbit_t *attrbitp,
4357 int laytype, NFSPROC_T *p)
4358 {
4359 struct mbuf *m;
4360 u_int32_t *tl;
4361 struct nfsrv_descript *nd;
4362 struct ucred *cred;
4363 int error = 0, slotpos;
4364 u_int32_t callback;
4365 struct nfsdsession *sep = NULL;
4366 uint64_t tval;
4367 bool dotls;
4368
4369 nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO);
4370 cred = newnfs_getcred();
4371 NFSLOCKSTATE(); /* mostly for lc_cbref++ */
4372 if (clp->lc_flags & LCL_NEEDSCONFIRM) {
4373 NFSUNLOCKSTATE();
4374 panic("docallb");
4375 }
4376 clp->lc_cbref++;
4377
4378 /*
4379 * Fill the callback program# and version into the request
4380 * structure for newnfs_connect() to use.
4381 */
4382 clp->lc_req.nr_prog = clp->lc_program;
4383 #ifdef notnow
4384 if ((clp->lc_flags & LCL_NFSV41) != 0)
4385 clp->lc_req.nr_vers = NFSV41_CBVERS;
4386 else
4387 #endif
4388 clp->lc_req.nr_vers = NFSV4_CBVERS;
4389
4390 /*
4391 * First, fill in some of the fields of nd and cr.
4392 */
4393 nd->nd_flag = ND_NFSV4;
4394 if (clp->lc_flags & LCL_GSS)
4395 nd->nd_flag |= ND_KERBV;
4396 if ((clp->lc_flags & LCL_NFSV41) != 0)
4397 nd->nd_flag |= ND_NFSV41;
4398 if ((clp->lc_flags & LCL_NFSV42) != 0)
4399 nd->nd_flag |= ND_NFSV42;
4400 nd->nd_repstat = 0;
4401 cred->cr_uid = clp->lc_uid;
4402 cred->cr_gid = clp->lc_gid;
4403 callback = clp->lc_callback;
4404 NFSUNLOCKSTATE();
4405 cred->cr_ngroups = 1;
4406
4407 /*
4408 * Get the first mbuf for the request.
4409 */
4410 MGET(m, M_WAITOK, MT_DATA);
4411 m->m_len = 0;
4412 nd->nd_mreq = nd->nd_mb = m;
4413 nd->nd_bpos = mtod(m, caddr_t);
4414
4415 /*
4416 * and build the callback request.
4417 */
4418 if (procnum == NFSV4OP_CBGETATTR) {
4419 nd->nd_procnum = NFSV4PROC_CBCOMPOUND;
4420 error = nfsrv_cbcallargs(nd, clp, callback, NFSV4OP_CBGETATTR,
4421 "CB Getattr", &sep, &slotpos);
4422 if (error != 0) {
4423 m_freem(nd->nd_mreq);
4424 goto errout;
4425 }
4426 (void)nfsm_fhtom(NULL, nd, (u_int8_t *)fhp, NFSX_MYFH, 0);
4427 (void)nfsrv_putattrbit(nd, attrbitp);
4428 } else if (procnum == NFSV4OP_CBRECALL) {
4429 nd->nd_procnum = NFSV4PROC_CBCOMPOUND;
4430 error = nfsrv_cbcallargs(nd, clp, callback, NFSV4OP_CBRECALL,
4431 "CB Recall", &sep, &slotpos);
4432 if (error != 0) {
4433 m_freem(nd->nd_mreq);
4434 goto errout;
4435 }
4436 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID);
4437 *tl++ = txdr_unsigned(stateidp->seqid);
4438 NFSBCOPY((caddr_t)stateidp->other, (caddr_t)tl,
4439 NFSX_STATEIDOTHER);
4440 tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
4441 if (trunc)
4442 *tl = newnfs_true;
4443 else
4444 *tl = newnfs_false;
4445 (void)nfsm_fhtom(NULL, nd, (u_int8_t *)fhp, NFSX_MYFH, 0);
4446 } else if (procnum == NFSV4OP_CBLAYOUTRECALL) {
4447 NFSD_DEBUG(4, "docallback layout recall\n");
4448 nd->nd_procnum = NFSV4PROC_CBCOMPOUND;
4449 error = nfsrv_cbcallargs(nd, clp, callback,
4450 NFSV4OP_CBLAYOUTRECALL, "CB Reclayout", &sep, &slotpos);
4451 NFSD_DEBUG(4, "aft cbcallargs=%d\n", error);
4452 if (error != 0) {
4453 m_freem(nd->nd_mreq);
4454 goto errout;
4455 }
4456 NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
4457 *tl++ = txdr_unsigned(laytype);
4458 *tl++ = txdr_unsigned(NFSLAYOUTIOMODE_ANY);
4459 if (trunc)
4460 *tl++ = newnfs_true;
4461 else
4462 *tl++ = newnfs_false;
4463 *tl = txdr_unsigned(NFSV4LAYOUTRET_FILE);
4464 (void)nfsm_fhtom(NULL, nd, (uint8_t *)fhp, NFSX_MYFH, 0);
4465 NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_HYPER + NFSX_STATEID);
4466 tval = 0;
4467 txdr_hyper(tval, tl); tl += 2;
4468 tval = UINT64_MAX;
4469 txdr_hyper(tval, tl); tl += 2;
4470 *tl++ = txdr_unsigned(stateidp->seqid);
4471 NFSBCOPY(stateidp->other, tl, NFSX_STATEIDOTHER);
4472 tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED);
4473 NFSD_DEBUG(4, "aft args\n");
4474 } else if (procnum == NFSV4PROC_CBNULL) {
4475 nd->nd_procnum = NFSV4PROC_CBNULL;
4476 if ((clp->lc_flags & LCL_NFSV41) != 0) {
4477 error = nfsv4_getcbsession(clp, &sep);
4478 if (error != 0) {
4479 m_freem(nd->nd_mreq);
4480 goto errout;
4481 }
4482 }
4483 } else {
4484 error = NFSERR_SERVERFAULT;
4485 m_freem(nd->nd_mreq);
4486 goto errout;
4487 }
4488
4489 /*
4490 * Call newnfs_connect(), as required, and then newnfs_request().
4491 */
4492 dotls = false;
4493 if ((clp->lc_flags & LCL_TLSCB) != 0)
4494 dotls = true;
4495 (void) newnfs_sndlock(&clp->lc_req.nr_lock);
4496 if (clp->lc_req.nr_client == NULL) {
4497 if ((clp->lc_flags & LCL_NFSV41) != 0) {
4498 error = ECONNREFUSED;
4499 if (procnum != NFSV4PROC_CBNULL)
4500 nfsv4_freeslot(&sep->sess_cbsess, slotpos,
4501 true);
4502 nfsrv_freesession(NULL, sep, NULL);
4503 } else if (nd->nd_procnum == NFSV4PROC_CBNULL)
4504 error = newnfs_connect(NULL, &clp->lc_req, cred,
4505 NULL, 1, dotls, &clp->lc_req.nr_client);
4506 else
4507 error = newnfs_connect(NULL, &clp->lc_req, cred,
4508 NULL, 3, dotls, &clp->lc_req.nr_client);
4509 }
4510 newnfs_sndunlock(&clp->lc_req.nr_lock);
4511 NFSD_DEBUG(4, "aft sndunlock=%d\n", error);
4512 if (!error) {
4513 if ((nd->nd_flag & ND_NFSV41) != 0) {
4514 KASSERT(sep != NULL, ("sep NULL"));
4515 if (sep->sess_cbsess.nfsess_xprt != NULL)
4516 error = newnfs_request(nd, NULL, clp,
4517 &clp->lc_req, NULL, NULL, cred,
4518 clp->lc_program, clp->lc_req.nr_vers, NULL,
4519 1, NULL, &sep->sess_cbsess);
4520 else {
4521 /*
4522 * This should probably never occur, but if a
4523 * client somehow does an RPC without a
4524 * SequenceID Op that causes a callback just
4525 * after the nfsd threads have been terminated
4526 * and restarted we could conceivably get here
4527 * without a backchannel xprt.
4528 */
4529 printf("nfsrv_docallback: no xprt\n");
4530 error = ECONNREFUSED;
4531 }
4532 NFSD_DEBUG(4, "aft newnfs_request=%d\n", error);
4533 if (error != 0 && procnum != NFSV4PROC_CBNULL) {
4534 /*
4535 * It is likely that the callback was never
4536 * processed by the client and, as such,
4537 * the sequence# for the session slot needs
4538 * to be backed up by one to avoid a
4539 * NFSERR_SEQMISORDERED error reply.
4540 * For the unlikely case where the callback
4541 * was processed by the client, this will
4542 * make the next callback on the slot
4543 * appear to be a retry.
4544 * Since callbacks never specify that the
4545 * reply be cached, this "apparent retry"
4546 * should not be a problem.
4547 */
4548 nfsv4_freeslot(&sep->sess_cbsess, slotpos,
4549 true);
4550 }
4551 nfsrv_freesession(NULL, sep, NULL);
4552 } else
4553 error = newnfs_request(nd, NULL, clp, &clp->lc_req,
4554 NULL, NULL, cred, clp->lc_program,
4555 clp->lc_req.nr_vers, NULL, 1, NULL, NULL);
4556 }
4557 errout:
4558 NFSFREECRED(cred);
4559
4560 /*
4561 * If error is set here, the Callback path isn't working
4562 * properly, so twiddle the appropriate LCL_ flags.
4563 * (nd_repstat != 0 indicates the Callback path is working,
4564 * but the callback failed on the client.)
4565 */
4566 if (error) {
4567 /*
4568 * Mark the callback pathway down, which disabled issuing
4569 * of delegations and gets Renew to return NFSERR_CBPATHDOWN.
4570 */
4571 NFSLOCKSTATE();
4572 clp->lc_flags |= LCL_CBDOWN;
4573 NFSUNLOCKSTATE();
4574 } else {
4575 /*
4576 * Callback worked. If the callback path was down, disable
4577 * callbacks, so no more delegations will be issued. (This
4578 * is done on the assumption that the callback pathway is
4579 * flakey.)
4580 */
4581 NFSLOCKSTATE();
4582 if (clp->lc_flags & LCL_CBDOWN)
4583 clp->lc_flags &= ~(LCL_CBDOWN | LCL_CALLBACKSON);
4584 NFSUNLOCKSTATE();
4585 if (nd->nd_repstat) {
4586 error = nd->nd_repstat;
4587 NFSD_DEBUG(1, "nfsrv_docallback op=%d err=%d\n",
4588 procnum, error);
4589 } else if (error == 0 && procnum == NFSV4OP_CBGETATTR)
4590 error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0,
4591 NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL,
4592 p, NULL);
4593 m_freem(nd->nd_mrep);
4594 }
4595 NFSLOCKSTATE();
4596 clp->lc_cbref--;
4597 if ((clp->lc_flags & LCL_WAKEUPWANTED) && clp->lc_cbref == 0) {
4598 clp->lc_flags &= ~LCL_WAKEUPWANTED;
4599 wakeup(clp);
4600 }
4601 NFSUNLOCKSTATE();
4602
4603 free(nd, M_TEMP);
4604 NFSEXITCODE(error);
4605 return (error);
4606 }
4607
4608 /*
4609 * Set up the compound RPC for the callback.
4610 */
4611 static int
nfsrv_cbcallargs(struct nfsrv_descript * nd,struct nfsclient * clp,uint32_t callback,int op,const char * optag,struct nfsdsession ** sepp,int * slotposp)4612 nfsrv_cbcallargs(struct nfsrv_descript *nd, struct nfsclient *clp,
4613 uint32_t callback, int op, const char *optag, struct nfsdsession **sepp,
4614 int *slotposp)
4615 {
4616 uint32_t *tl;
4617 int error, len;
4618
4619 len = strlen(optag);
4620 (void)nfsm_strtom(nd, optag, len);
4621 NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED);
4622 if ((nd->nd_flag & ND_NFSV41) != 0) {
4623 if ((nd->nd_flag & ND_NFSV42) != 0)
4624 *tl++ = txdr_unsigned(NFSV42_MINORVERSION);
4625 else
4626 *tl++ = txdr_unsigned(NFSV41_MINORVERSION);
4627 *tl++ = txdr_unsigned(callback);
4628 *tl++ = txdr_unsigned(2);
4629 *tl = txdr_unsigned(NFSV4OP_CBSEQUENCE);
4630 error = nfsv4_setcbsequence(nd, clp, 1, sepp, slotposp);
4631 if (error != 0)
4632 return (error);
4633 NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED);
4634 *tl = txdr_unsigned(op);
4635 } else {
4636 *tl++ = txdr_unsigned(NFSV4_MINORVERSION);
4637 *tl++ = txdr_unsigned(callback);
4638 *tl++ = txdr_unsigned(1);
4639 *tl = txdr_unsigned(op);
4640 }
4641 return (0);
4642 }
4643
4644 /*
4645 * Return the next index# for a clientid. Mostly just increment and return
4646 * the next one, but... if the 32bit unsigned does actually wrap around,
4647 * it should be rebooted.
4648 * At an average rate of one new client per second, it will wrap around in
4649 * approximately 136 years. (I think the server will have been shut
4650 * down or rebooted before then.)
4651 */
4652 static u_int32_t
nfsrv_nextclientindex(void)4653 nfsrv_nextclientindex(void)
4654 {
4655 static u_int32_t client_index = 0;
4656
4657 client_index++;
4658 if (client_index != 0)
4659 return (client_index);
4660
4661 printf("%s: out of clientids\n", __func__);
4662 return (client_index);
4663 }
4664
4665 /*
4666 * Return the next index# for a stateid. Mostly just increment and return
4667 * the next one, but... if the 32bit unsigned does actually wrap around
4668 * (will a BSD server stay up that long?), find
4669 * new start and end values.
4670 */
4671 static u_int32_t
nfsrv_nextstateindex(struct nfsclient * clp)4672 nfsrv_nextstateindex(struct nfsclient *clp)
4673 {
4674 struct nfsstate *stp;
4675 int i;
4676 u_int32_t canuse, min_index, max_index;
4677
4678 if (!(clp->lc_flags & LCL_INDEXNOTOK)) {
4679 clp->lc_stateindex++;
4680 if (clp->lc_stateindex != clp->lc_statemaxindex)
4681 return (clp->lc_stateindex);
4682 }
4683
4684 /*
4685 * Yuck, we've hit the end.
4686 * Look for a new min and max.
4687 */
4688 min_index = 0;
4689 max_index = 0xffffffff;
4690 for (i = 0; i < nfsrv_statehashsize; i++) {
4691 LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
4692 if (stp->ls_stateid.other[2] > 0x80000000) {
4693 if (stp->ls_stateid.other[2] < max_index)
4694 max_index = stp->ls_stateid.other[2];
4695 } else {
4696 if (stp->ls_stateid.other[2] > min_index)
4697 min_index = stp->ls_stateid.other[2];
4698 }
4699 }
4700 }
4701
4702 /*
4703 * Yikes, highly unlikely, but I'll handle it anyhow.
4704 */
4705 if (min_index == 0x80000000 && max_index == 0x80000001) {
4706 canuse = 0;
4707 /*
4708 * Loop around until we find an unused entry. Return that
4709 * and set LCL_INDEXNOTOK, so the search will continue next time.
4710 * (This is one of those rare cases where a goto is the
4711 * cleanest way to code the loop.)
4712 */
4713 tryagain:
4714 for (i = 0; i < nfsrv_statehashsize; i++) {
4715 LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) {
4716 if (stp->ls_stateid.other[2] == canuse) {
4717 canuse++;
4718 goto tryagain;
4719 }
4720 }
4721 }
4722 clp->lc_flags |= LCL_INDEXNOTOK;
4723 return (canuse);
4724 }
4725
4726 /*
4727 * Ok to start again from min + 1.
4728 */
4729 clp->lc_stateindex = min_index + 1;
4730 clp->lc_statemaxindex = max_index;
4731 clp->lc_flags &= ~LCL_INDEXNOTOK;
4732 return (clp->lc_stateindex);
4733 }
4734
4735 /*
4736 * The following functions handle the stable storage file that deals with
4737 * the edge conditions described in RFC3530 Sec. 8.6.3.
4738 * The file is as follows:
4739 * - a single record at the beginning that has the lease time of the
4740 * previous server instance (before the last reboot) and the nfsrvboottime
4741 * values for the previous server boots.
4742 * These previous boot times are used to ensure that the current
4743 * nfsrvboottime does not, somehow, get set to a previous one.
4744 * (This is important so that Stale ClientIDs and StateIDs can
4745 * be recognized.)
4746 * The number of previous nfsvrboottime values precedes the list.
4747 * - followed by some number of appended records with:
4748 * - client id string
4749 * - flag that indicates it is a record revoking state via lease
4750 * expiration or similar
4751 * OR has successfully acquired state.
4752 * These structures vary in length, with the client string at the end, up
4753 * to NFSV4_OPAQUELIMIT in size.
4754 *
4755 * At the end of the grace period, the file is truncated, the first
4756 * record is rewritten with updated information and any acquired state
4757 * records for successful reclaims of state are written.
4758 *
4759 * Subsequent records are appended when the first state is issued to
4760 * a client and when state is revoked for a client.
4761 *
4762 * When reading the file in, state issued records that come later in
4763 * the file override older ones, since the append log is in cronological order.
4764 * If, for some reason, the file can't be read, the grace period is
4765 * immediately terminated and all reclaims get NFSERR_NOGRACE.
4766 */
4767
4768 /*
4769 * Read in the stable storage file. Called by nfssvc() before the nfsd
4770 * processes start servicing requests.
4771 */
4772 void
nfsrv_setupstable(NFSPROC_T * p)4773 nfsrv_setupstable(NFSPROC_T *p)
4774 {
4775 struct nfsrv_stablefirst *sf = &NFSD_VNET(nfsrv_stablefirst);
4776 struct nfsrv_stable *sp, *nsp;
4777 struct nfst_rec *tsp;
4778 int error, i, tryagain;
4779 off_t off = 0;
4780 ssize_t aresid, len;
4781
4782 /*
4783 * If NFSNSF_UPDATEDONE is set, this is a restart of the nfsds without
4784 * a reboot, so state has not been lost.
4785 */
4786 if (sf->nsf_flags & NFSNSF_UPDATEDONE)
4787 return;
4788 /*
4789 * Set Grace over just until the file reads successfully.
4790 */
4791 NFSD_VNET(nfsrvboottime) = time_second;
4792 LIST_INIT(&sf->nsf_head);
4793 sf->nsf_flags = (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK);
4794 sf->nsf_eograce = NFSD_MONOSEC + NFSRV_LEASEDELTA;
4795 if (sf->nsf_fp == NULL)
4796 return;
4797 error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
4798 (caddr_t)&sf->nsf_rec, sizeof (struct nfsf_rec), off, UIO_SYSSPACE,
4799 0, NFSFPCRED(sf->nsf_fp), &aresid, p);
4800 if (error || aresid || sf->nsf_numboots == 0 ||
4801 sf->nsf_numboots > NFSNSF_MAXNUMBOOTS)
4802 return;
4803
4804 /*
4805 * Now, read in the boottimes.
4806 */
4807 sf->nsf_bootvals = (time_t *)malloc((sf->nsf_numboots + 1) *
4808 sizeof(time_t), M_TEMP, M_WAITOK);
4809 off = sizeof (struct nfsf_rec);
4810 error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
4811 (caddr_t)sf->nsf_bootvals, sf->nsf_numboots * sizeof (time_t), off,
4812 UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p);
4813 if (error || aresid) {
4814 free(sf->nsf_bootvals, M_TEMP);
4815 sf->nsf_bootvals = NULL;
4816 return;
4817 }
4818
4819 /*
4820 * Make sure this nfsrvboottime is different from all recorded
4821 * previous ones.
4822 */
4823 do {
4824 tryagain = 0;
4825 for (i = 0; i < sf->nsf_numboots; i++) {
4826 if (NFSD_VNET(nfsrvboottime) == sf->nsf_bootvals[i]) {
4827 NFSD_VNET(nfsrvboottime)++;
4828 tryagain = 1;
4829 break;
4830 }
4831 }
4832 } while (tryagain);
4833
4834 sf->nsf_flags |= NFSNSF_OK;
4835 off += (sf->nsf_numboots * sizeof (time_t));
4836
4837 /*
4838 * Read through the file, building a list of records for grace
4839 * checking.
4840 * Each record is between sizeof (struct nfst_rec) and
4841 * sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1
4842 * and is actually sizeof (struct nfst_rec) + nst_len - 1.
4843 */
4844 tsp = (struct nfst_rec *)malloc(sizeof (struct nfst_rec) +
4845 NFSV4_OPAQUELIMIT - 1, M_TEMP, M_WAITOK);
4846 do {
4847 error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp),
4848 (caddr_t)tsp, sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1,
4849 off, UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p);
4850 len = (sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1) - aresid;
4851 if (error || (len > 0 && (len < sizeof (struct nfst_rec) ||
4852 len < (sizeof (struct nfst_rec) + tsp->len - 1)))) {
4853 /*
4854 * Yuck, the file has been corrupted, so just return
4855 * after clearing out any restart state, so the grace period
4856 * is over.
4857 */
4858 LIST_FOREACH_SAFE(sp, &sf->nsf_head, nst_list, nsp) {
4859 LIST_REMOVE(sp, nst_list);
4860 free(sp, M_TEMP);
4861 }
4862 free(tsp, M_TEMP);
4863 sf->nsf_flags &= ~NFSNSF_OK;
4864 free(sf->nsf_bootvals, M_TEMP);
4865 sf->nsf_bootvals = NULL;
4866 return;
4867 }
4868 if (len > 0) {
4869 off += sizeof (struct nfst_rec) + tsp->len - 1;
4870 /*
4871 * Search the list for a matching client.
4872 */
4873 LIST_FOREACH(sp, &sf->nsf_head, nst_list) {
4874 if (tsp->len == sp->nst_len &&
4875 !NFSBCMP(tsp->client, sp->nst_client, tsp->len))
4876 break;
4877 }
4878 if (sp == LIST_END(&sf->nsf_head)) {
4879 sp = (struct nfsrv_stable *)malloc(tsp->len +
4880 sizeof (struct nfsrv_stable) - 1, M_TEMP,
4881 M_WAITOK);
4882 NFSBCOPY((caddr_t)tsp, (caddr_t)&sp->nst_rec,
4883 sizeof (struct nfst_rec) + tsp->len - 1);
4884 LIST_INSERT_HEAD(&sf->nsf_head, sp, nst_list);
4885 } else {
4886 if (tsp->flag == NFSNST_REVOKE)
4887 sp->nst_flag |= NFSNST_REVOKE;
4888 else
4889 /*
4890 * A subsequent timestamp indicates the client
4891 * did a setclientid/confirm and any previous
4892 * revoke is no longer relevant.
4893 */
4894 sp->nst_flag &= ~NFSNST_REVOKE;
4895 }
4896 }
4897 } while (len > 0);
4898 free(tsp, M_TEMP);
4899 sf->nsf_flags = NFSNSF_OK;
4900 sf->nsf_eograce = NFSD_MONOSEC + sf->nsf_lease +
4901 NFSRV_LEASEDELTA;
4902 }
4903
4904 /*
4905 * Update the stable storage file, now that the grace period is over.
4906 */
4907 void
nfsrv_updatestable(NFSPROC_T * p)4908 nfsrv_updatestable(NFSPROC_T *p)
4909 {
4910 struct nfsrv_stablefirst *sf = &NFSD_VNET(nfsrv_stablefirst);
4911 struct nfsrv_stable *sp, *nsp;
4912 int i;
4913 struct nfsvattr nva;
4914 vnode_t vp;
4915 #if defined(__FreeBSD_version) && (__FreeBSD_version >= 500000)
4916 mount_t mp = NULL;
4917 #endif
4918 int error;
4919
4920 if (sf->nsf_fp == NULL || (sf->nsf_flags & NFSNSF_UPDATEDONE))
4921 return;
4922 sf->nsf_flags |= NFSNSF_UPDATEDONE;
4923 /*
4924 * Ok, we need to rewrite the stable storage file.
4925 * - truncate to 0 length
4926 * - write the new first structure
4927 * - loop through the data structures, writing out any that
4928 * have timestamps older than the old boot
4929 */
4930 if (sf->nsf_bootvals) {
4931 sf->nsf_numboots++;
4932 for (i = sf->nsf_numboots - 2; i >= 0; i--)
4933 sf->nsf_bootvals[i + 1] = sf->nsf_bootvals[i];
4934 } else {
4935 sf->nsf_numboots = 1;
4936 sf->nsf_bootvals = (time_t *)malloc(sizeof(time_t),
4937 M_TEMP, M_WAITOK);
4938 }
4939 sf->nsf_bootvals[0] = NFSD_VNET(nfsrvboottime);
4940 sf->nsf_lease = nfsrv_lease;
4941 NFSVNO_ATTRINIT(&nva);
4942 NFSVNO_SETATTRVAL(&nva, size, 0);
4943 vp = NFSFPVNODE(sf->nsf_fp);
4944 vn_start_write(vp, &mp, V_WAIT);
4945 if (NFSVOPLOCK(vp, LK_EXCLUSIVE) == 0) {
4946 error = nfsvno_setattr(vp, &nva, NFSFPCRED(sf->nsf_fp), p,
4947 NULL);
4948 NFSVOPUNLOCK(vp);
4949 } else
4950 error = EPERM;
4951 vn_finished_write(mp);
4952 if (!error)
4953 error = NFSD_RDWR(UIO_WRITE, vp,
4954 (caddr_t)&sf->nsf_rec, sizeof (struct nfsf_rec), (off_t)0,
4955 UIO_SYSSPACE, IO_SYNC, NFSFPCRED(sf->nsf_fp), NULL, p);
4956 if (!error)
4957 error = NFSD_RDWR(UIO_WRITE, vp,
4958 (caddr_t)sf->nsf_bootvals,
4959 sf->nsf_numboots * sizeof (time_t),
4960 (off_t)(sizeof (struct nfsf_rec)),
4961 UIO_SYSSPACE, IO_SYNC, NFSFPCRED(sf->nsf_fp), NULL, p);
4962 free(sf->nsf_bootvals, M_TEMP);
4963 sf->nsf_bootvals = NULL;
4964 if (error) {
4965 sf->nsf_flags &= ~NFSNSF_OK;
4966 printf("EEK! Can't write NfsV4 stable storage file\n");
4967 return;
4968 }
4969 sf->nsf_flags |= NFSNSF_OK;
4970
4971 /*
4972 * Loop through the list and write out timestamp records for
4973 * any clients that successfully reclaimed state.
4974 */
4975 LIST_FOREACH_SAFE(sp, &sf->nsf_head, nst_list, nsp) {
4976 if (sp->nst_flag & NFSNST_GOTSTATE) {
4977 nfsrv_writestable(sp->nst_client, sp->nst_len,
4978 NFSNST_NEWSTATE, p);
4979 sp->nst_clp->lc_flags |= LCL_STAMPEDSTABLE;
4980 }
4981 LIST_REMOVE(sp, nst_list);
4982 free(sp, M_TEMP);
4983 }
4984 nfsrv_backupstable();
4985 }
4986
4987 /*
4988 * Append a record to the stable storage file.
4989 */
4990 void
nfsrv_writestable(u_char * client,int len,int flag,NFSPROC_T * p)4991 nfsrv_writestable(u_char *client, int len, int flag, NFSPROC_T *p)
4992 {
4993 struct nfsrv_stablefirst *sf = &NFSD_VNET(nfsrv_stablefirst);
4994 struct nfst_rec *sp;
4995 int error;
4996
4997 if (!(sf->nsf_flags & NFSNSF_OK) || sf->nsf_fp == NULL)
4998 return;
4999 sp = (struct nfst_rec *)malloc(sizeof (struct nfst_rec) +
5000 len - 1, M_TEMP, M_WAITOK);
5001 sp->len = len;
5002 NFSBCOPY(client, sp->client, len);
5003 sp->flag = flag;
5004 error = NFSD_RDWR(UIO_WRITE, NFSFPVNODE(sf->nsf_fp),
5005 (caddr_t)sp, sizeof (struct nfst_rec) + len - 1, (off_t)0,
5006 UIO_SYSSPACE, (IO_SYNC | IO_APPEND), NFSFPCRED(sf->nsf_fp), NULL, p);
5007 free(sp, M_TEMP);
5008 if (error) {
5009 sf->nsf_flags &= ~NFSNSF_OK;
5010 printf("EEK! Can't write NfsV4 stable storage file\n");
5011 }
5012 }
5013
5014 /*
5015 * This function is called during the grace period to mark a client
5016 * that successfully reclaimed state.
5017 */
5018 static void
nfsrv_markstable(struct nfsclient * clp)5019 nfsrv_markstable(struct nfsclient *clp)
5020 {
5021 struct nfsrv_stable *sp;
5022
5023 /*
5024 * First find the client structure.
5025 */
5026 LIST_FOREACH(sp, &NFSD_VNET(nfsrv_stablefirst).nsf_head, nst_list) {
5027 if (sp->nst_len == clp->lc_idlen &&
5028 !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len))
5029 break;
5030 }
5031 if (sp == LIST_END(&NFSD_VNET(nfsrv_stablefirst).nsf_head))
5032 return;
5033
5034 /*
5035 * Now, just mark it and set the nfsclient back pointer.
5036 */
5037 sp->nst_flag |= NFSNST_GOTSTATE;
5038 sp->nst_clp = clp;
5039 }
5040
5041 /*
5042 * This function is called when a NFSv4.1 client does a ReclaimComplete.
5043 * Very similar to nfsrv_markstable(), except for the flag being set.
5044 */
5045 static void
nfsrv_markreclaim(struct nfsclient * clp)5046 nfsrv_markreclaim(struct nfsclient *clp)
5047 {
5048 struct nfsrv_stable *sp;
5049
5050 /*
5051 * First find the client structure.
5052 */
5053 LIST_FOREACH(sp, &NFSD_VNET(nfsrv_stablefirst).nsf_head, nst_list) {
5054 if (sp->nst_len == clp->lc_idlen &&
5055 !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len))
5056 break;
5057 }
5058 if (sp == LIST_END(&NFSD_VNET(nfsrv_stablefirst).nsf_head))
5059 return;
5060
5061 /*
5062 * Now, just set the flag.
5063 */
5064 sp->nst_flag |= NFSNST_RECLAIMED;
5065
5066 /*
5067 * Free up any old delegations.
5068 */
5069 nfsrv_freedeleglist(&clp->lc_olddeleg);
5070 }
5071
5072 /*
5073 * This function is called for a reclaim, to see if it gets grace.
5074 * It returns 0 if a reclaim is allowed, 1 otherwise.
5075 */
5076 static int
nfsrv_checkstable(struct nfsclient * clp)5077 nfsrv_checkstable(struct nfsclient *clp)
5078 {
5079 struct nfsrv_stable *sp;
5080
5081 /*
5082 * First, find the entry for the client.
5083 */
5084 LIST_FOREACH(sp, &NFSD_VNET(nfsrv_stablefirst).nsf_head, nst_list) {
5085 if (sp->nst_len == clp->lc_idlen &&
5086 !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len))
5087 break;
5088 }
5089
5090 /*
5091 * If not in the list, state was revoked or no state was issued
5092 * since the previous reboot, a reclaim is denied.
5093 */
5094 if (sp == LIST_END(&NFSD_VNET(nfsrv_stablefirst).nsf_head) ||
5095 (sp->nst_flag & NFSNST_REVOKE) ||
5096 !(NFSD_VNET(nfsrv_stablefirst).nsf_flags & NFSNSF_OK))
5097 return (1);
5098 return (0);
5099 }
5100
5101 /*
5102 * Test for and try to clear out a conflicting client. This is called by
5103 * nfsrv_lockctrl() and nfsrv_openctrl() when conflicts with other clients
5104 * a found.
5105 * The trick here is that it can't revoke a conflicting client with an
5106 * expired lease unless it holds the v4root lock, so...
5107 * If no v4root lock, get the lock and return 1 to indicate "try again".
5108 * Return 0 to indicate the conflict can't be revoked and 1 to indicate
5109 * the revocation worked and the conflicting client is "bye, bye", so it
5110 * can be tried again.
5111 * Return 2 to indicate that the vnode is VIRF_DOOMED after NFSVOPLOCK().
5112 * Unlocks State before a non-zero value is returned.
5113 */
5114 static int
nfsrv_clientconflict(struct nfsclient * clp,int * haslockp,vnode_t vp,NFSPROC_T * p)5115 nfsrv_clientconflict(struct nfsclient *clp, int *haslockp, vnode_t vp,
5116 NFSPROC_T *p)
5117 {
5118 int gotlock, lktype = 0;
5119
5120 /*
5121 * If lease hasn't expired, we can't fix it.
5122 */
5123 if (clp->lc_expiry >= NFSD_MONOSEC ||
5124 !(NFSD_VNET(nfsrv_stablefirst).nsf_flags & NFSNSF_UPDATEDONE))
5125 return (0);
5126 if (*haslockp == 0) {
5127 NFSUNLOCKSTATE();
5128 if (vp != NULL) {
5129 lktype = NFSVOPISLOCKED(vp);
5130 NFSVOPUNLOCK(vp);
5131 }
5132 NFSLOCKV4ROOTMUTEX();
5133 nfsv4_relref(&nfsv4rootfs_lock);
5134 do {
5135 gotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
5136 NFSV4ROOTLOCKMUTEXPTR, NULL);
5137 } while (!gotlock);
5138 NFSUNLOCKV4ROOTMUTEX();
5139 *haslockp = 1;
5140 if (vp != NULL) {
5141 NFSVOPLOCK(vp, lktype | LK_RETRY);
5142 if (VN_IS_DOOMED(vp))
5143 return (2);
5144 }
5145 return (1);
5146 }
5147 NFSUNLOCKSTATE();
5148
5149 /*
5150 * Ok, we can expire the conflicting client.
5151 */
5152 nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
5153 nfsrv_backupstable();
5154 nfsrv_cleanclient(clp, p);
5155 nfsrv_freedeleglist(&clp->lc_deleg);
5156 nfsrv_freedeleglist(&clp->lc_olddeleg);
5157 LIST_REMOVE(clp, lc_hash);
5158 nfsrv_zapclient(clp, p);
5159 return (1);
5160 }
5161
5162 /*
5163 * Resolve a delegation conflict.
5164 * Returns 0 to indicate the conflict was resolved without sleeping.
5165 * Return -1 to indicate that the caller should check for conflicts again.
5166 * Return > 0 for an error that should be returned, normally NFSERR_DELAY.
5167 *
5168 * Also, manipulate the nfsv4root_lock, as required. It isn't changed
5169 * for a return of 0, since there was no sleep and it could be required
5170 * later. It is released for a return of NFSERR_DELAY, since the caller
5171 * will return that error. It is released when a sleep was done waiting
5172 * for the delegation to be returned or expire (so that other nfsds can
5173 * handle ops). Then, it must be acquired for the write to stable storage.
5174 * (This function is somewhat similar to nfsrv_clientconflict(), but
5175 * the semantics differ in a couple of subtle ways. The return of 0
5176 * indicates the conflict was resolved without sleeping here, not
5177 * that the conflict can't be resolved and the handling of nfsv4root_lock
5178 * differs, as noted above.)
5179 * Unlocks State before returning a non-zero value.
5180 */
5181 static int
nfsrv_delegconflict(struct nfsstate * stp,int * haslockp,NFSPROC_T * p,vnode_t vp)5182 nfsrv_delegconflict(struct nfsstate *stp, int *haslockp, NFSPROC_T *p,
5183 vnode_t vp)
5184 {
5185 struct nfsclient *clp = stp->ls_clp;
5186 int gotlock, error, lktype = 0, retrycnt, zapped_clp;
5187 nfsv4stateid_t tstateid;
5188 fhandle_t tfh;
5189
5190 /*
5191 * If the conflict is with an old delegation...
5192 */
5193 if (stp->ls_flags & NFSLCK_OLDDELEG) {
5194 /*
5195 * You can delete it, if it has expired.
5196 */
5197 if (clp->lc_delegtime < NFSD_MONOSEC) {
5198 nfsrv_freedeleg(stp);
5199 NFSUNLOCKSTATE();
5200 error = -1;
5201 goto out;
5202 }
5203 NFSUNLOCKSTATE();
5204 /*
5205 * During this delay, the old delegation could expire or it
5206 * could be recovered by the client via an Open with
5207 * CLAIM_DELEGATE_PREV.
5208 * Release the nfsv4root_lock, if held.
5209 */
5210 if (*haslockp) {
5211 *haslockp = 0;
5212 NFSLOCKV4ROOTMUTEX();
5213 nfsv4_unlock(&nfsv4rootfs_lock, 1);
5214 NFSUNLOCKV4ROOTMUTEX();
5215 }
5216 error = NFSERR_DELAY;
5217 goto out;
5218 }
5219
5220 /*
5221 * It's a current delegation, so:
5222 * - check to see if the delegation has expired
5223 * - if so, get the v4root lock and then expire it
5224 */
5225 if ((stp->ls_flags & NFSLCK_DELEGRECALL) == 0 || (stp->ls_lastrecall <
5226 NFSD_MONOSEC && clp->lc_expiry >= NFSD_MONOSEC &&
5227 stp->ls_delegtime >= NFSD_MONOSEC)) {
5228 /*
5229 * - do a recall callback, since not yet done
5230 * For now, never allow truncate to be set. To use
5231 * truncate safely, it must be guaranteed that the
5232 * Remove, Rename or Setattr with size of 0 will
5233 * succeed and that would require major changes to
5234 * the VFS/Vnode OPs.
5235 * Set the expiry time large enough so that it won't expire
5236 * until after the callback, then set it correctly, once
5237 * the callback is done. (The delegation will now time
5238 * out whether or not the Recall worked ok. The timeout
5239 * will be extended when ops are done on the delegation
5240 * stateid, up to the timelimit.)
5241 */
5242 if ((stp->ls_flags & NFSLCK_DELEGRECALL) == 0) {
5243 stp->ls_delegtime = NFSD_MONOSEC + (2 * nfsrv_lease) +
5244 NFSRV_LEASEDELTA;
5245 stp->ls_delegtimelimit = NFSD_MONOSEC + (6 *
5246 nfsrv_lease) + NFSRV_LEASEDELTA;
5247 stp->ls_flags |= NFSLCK_DELEGRECALL;
5248 }
5249 stp->ls_lastrecall = time_uptime + 1;
5250
5251 /*
5252 * Loop NFSRV_CBRETRYCNT times while the CBRecall replies
5253 * NFSERR_BADSTATEID or NFSERR_BADHANDLE. This is done
5254 * in order to try and avoid a race that could happen
5255 * when a CBRecall request passed the Open reply with
5256 * the delegation in it when transitting the network.
5257 * Since nfsrv_docallback will sleep, don't use stp after
5258 * the call.
5259 */
5260 NFSBCOPY((caddr_t)&stp->ls_stateid, (caddr_t)&tstateid,
5261 sizeof (tstateid));
5262 NFSBCOPY((caddr_t)&stp->ls_lfp->lf_fh, (caddr_t)&tfh,
5263 sizeof (tfh));
5264 NFSUNLOCKSTATE();
5265 if (*haslockp) {
5266 *haslockp = 0;
5267 NFSLOCKV4ROOTMUTEX();
5268 nfsv4_unlock(&nfsv4rootfs_lock, 1);
5269 NFSUNLOCKV4ROOTMUTEX();
5270 }
5271 retrycnt = 0;
5272 do {
5273 error = nfsrv_docallback(clp, NFSV4OP_CBRECALL,
5274 &tstateid, 0, &tfh, NULL, NULL, 0, p);
5275 retrycnt++;
5276 } while ((error == NFSERR_BADSTATEID ||
5277 error == NFSERR_BADHANDLE) && retrycnt < NFSV4_CBRETRYCNT);
5278 error = NFSERR_DELAY;
5279 goto out;
5280 }
5281
5282 if (clp->lc_expiry >= NFSD_MONOSEC &&
5283 stp->ls_delegtime >= NFSD_MONOSEC) {
5284 NFSUNLOCKSTATE();
5285 /*
5286 * A recall has been done, but it has not yet expired.
5287 * So, RETURN_DELAY.
5288 */
5289 if (*haslockp) {
5290 *haslockp = 0;
5291 NFSLOCKV4ROOTMUTEX();
5292 nfsv4_unlock(&nfsv4rootfs_lock, 1);
5293 NFSUNLOCKV4ROOTMUTEX();
5294 }
5295 error = NFSERR_DELAY;
5296 goto out;
5297 }
5298
5299 /*
5300 * If we don't yet have the lock, just get it and then return,
5301 * since we need that before deleting expired state, such as
5302 * this delegation.
5303 * When getting the lock, unlock the vnode, so other nfsds that
5304 * are in progress, won't get stuck waiting for the vnode lock.
5305 */
5306 if (*haslockp == 0) {
5307 NFSUNLOCKSTATE();
5308 if (vp != NULL) {
5309 lktype = NFSVOPISLOCKED(vp);
5310 NFSVOPUNLOCK(vp);
5311 }
5312 NFSLOCKV4ROOTMUTEX();
5313 nfsv4_relref(&nfsv4rootfs_lock);
5314 do {
5315 gotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
5316 NFSV4ROOTLOCKMUTEXPTR, NULL);
5317 } while (!gotlock);
5318 NFSUNLOCKV4ROOTMUTEX();
5319 *haslockp = 1;
5320 if (vp != NULL) {
5321 NFSVOPLOCK(vp, lktype | LK_RETRY);
5322 if (VN_IS_DOOMED(vp)) {
5323 *haslockp = 0;
5324 NFSLOCKV4ROOTMUTEX();
5325 nfsv4_unlock(&nfsv4rootfs_lock, 1);
5326 NFSUNLOCKV4ROOTMUTEX();
5327 error = NFSERR_PERM;
5328 goto out;
5329 }
5330 }
5331 error = -1;
5332 goto out;
5333 }
5334
5335 NFSUNLOCKSTATE();
5336 /*
5337 * Ok, we can delete the expired delegation.
5338 * First, write the Revoke record to stable storage and then
5339 * clear out the conflict.
5340 * Since all other nfsd threads are now blocked, we can safely
5341 * sleep without the state changing.
5342 */
5343 nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p);
5344 nfsrv_backupstable();
5345 if (clp->lc_expiry < NFSD_MONOSEC) {
5346 nfsrv_cleanclient(clp, p);
5347 nfsrv_freedeleglist(&clp->lc_deleg);
5348 nfsrv_freedeleglist(&clp->lc_olddeleg);
5349 LIST_REMOVE(clp, lc_hash);
5350 zapped_clp = 1;
5351 } else {
5352 nfsrv_freedeleg(stp);
5353 zapped_clp = 0;
5354 }
5355 if (zapped_clp)
5356 nfsrv_zapclient(clp, p);
5357 error = -1;
5358
5359 out:
5360 NFSEXITCODE(error);
5361 return (error);
5362 }
5363
5364 /*
5365 * Check for a remove allowed, if remove is set to 1 and get rid of
5366 * delegations.
5367 */
5368 int
nfsrv_checkremove(vnode_t vp,int remove,struct nfsrv_descript * nd,nfsquad_t clientid,NFSPROC_T * p)5369 nfsrv_checkremove(vnode_t vp, int remove, struct nfsrv_descript *nd,
5370 nfsquad_t clientid, NFSPROC_T *p)
5371 {
5372 struct nfsclient *clp;
5373 struct nfsstate *stp;
5374 struct nfslockfile *lfp;
5375 int error, haslock = 0;
5376 fhandle_t nfh;
5377
5378 clp = NULL;
5379 /*
5380 * First, get the lock file structure.
5381 * (A return of -1 means no associated state, so remove ok.)
5382 */
5383 error = nfsrv_getlockfh(vp, NFSLCK_CHECK, NULL, &nfh, p);
5384 tryagain:
5385 NFSLOCKSTATE();
5386 if (error == 0 && clientid.qval != 0)
5387 error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL,
5388 (nfsquad_t)((u_quad_t)0), 0, nd, p);
5389 if (!error)
5390 error = nfsrv_getlockfile(NFSLCK_CHECK, NULL, &lfp, &nfh, 0);
5391 if (error) {
5392 NFSUNLOCKSTATE();
5393 if (haslock) {
5394 NFSLOCKV4ROOTMUTEX();
5395 nfsv4_unlock(&nfsv4rootfs_lock, 1);
5396 NFSUNLOCKV4ROOTMUTEX();
5397 }
5398 if (error == -1)
5399 error = 0;
5400 goto out;
5401 }
5402
5403 /*
5404 * Now, we must Recall any delegations.
5405 */
5406 error = nfsrv_cleandeleg(vp, lfp, clp, &haslock, p);
5407 if (error) {
5408 /*
5409 * nfsrv_cleandeleg() unlocks state for non-zero
5410 * return.
5411 */
5412 if (error == -1)
5413 goto tryagain;
5414 if (haslock) {
5415 NFSLOCKV4ROOTMUTEX();
5416 nfsv4_unlock(&nfsv4rootfs_lock, 1);
5417 NFSUNLOCKV4ROOTMUTEX();
5418 }
5419 goto out;
5420 }
5421
5422 /*
5423 * Now, look for a conflicting open share.
5424 */
5425 if (remove) {
5426 /*
5427 * If the entry in the directory was the last reference to the
5428 * corresponding filesystem object, the object can be destroyed
5429 * */
5430 if(lfp->lf_usecount>1)
5431 LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
5432 if (stp->ls_flags & NFSLCK_WRITEDENY) {
5433 error = NFSERR_FILEOPEN;
5434 break;
5435 }
5436 }
5437 }
5438
5439 NFSUNLOCKSTATE();
5440 if (haslock) {
5441 NFSLOCKV4ROOTMUTEX();
5442 nfsv4_unlock(&nfsv4rootfs_lock, 1);
5443 NFSUNLOCKV4ROOTMUTEX();
5444 }
5445
5446 out:
5447 NFSEXITCODE(error);
5448 return (error);
5449 }
5450
5451 /*
5452 * Clear out all delegations for the file referred to by lfp.
5453 * May return NFSERR_DELAY, if there will be a delay waiting for
5454 * delegations to expire.
5455 * Returns -1 to indicate it slept while recalling a delegation.
5456 * This function has the side effect of deleting the nfslockfile structure,
5457 * if it no longer has associated state and didn't have to sleep.
5458 * Unlocks State before a non-zero value is returned.
5459 */
5460 static int
nfsrv_cleandeleg(vnode_t vp,struct nfslockfile * lfp,struct nfsclient * clp,int * haslockp,NFSPROC_T * p)5461 nfsrv_cleandeleg(vnode_t vp, struct nfslockfile *lfp,
5462 struct nfsclient *clp, int *haslockp, NFSPROC_T *p)
5463 {
5464 struct nfsstate *stp, *nstp;
5465 int ret = 0;
5466
5467 stp = LIST_FIRST(&lfp->lf_deleg);
5468 while (stp != LIST_END(&lfp->lf_deleg)) {
5469 nstp = LIST_NEXT(stp, ls_file);
5470 if (stp->ls_clp != clp) {
5471 ret = nfsrv_delegconflict(stp, haslockp, p, vp);
5472 if (ret) {
5473 /*
5474 * nfsrv_delegconflict() unlocks state
5475 * when it returns non-zero.
5476 */
5477 goto out;
5478 }
5479 }
5480 stp = nstp;
5481 }
5482 out:
5483 NFSEXITCODE(ret);
5484 return (ret);
5485 }
5486
5487 /*
5488 * There are certain operations that, when being done outside of NFSv4,
5489 * require that any NFSv4 delegation for the file be recalled.
5490 * This function is to be called for those cases:
5491 * VOP_RENAME() - When a delegation is being recalled for any reason,
5492 * the client may have to do Opens against the server, using the file's
5493 * final component name. If the file has been renamed on the server,
5494 * that component name will be incorrect and the Open will fail.
5495 * VOP_REMOVE() - Theoretically, a client could Open a file after it has
5496 * been removed on the server, if there is a delegation issued to
5497 * that client for the file. I say "theoretically" since clients
5498 * normally do an Access Op before the Open and that Access Op will
5499 * fail with ESTALE. Note that NFSv2 and 3 don't even do Opens, so
5500 * they will detect the file's removal in the same manner. (There is
5501 * one case where RFC3530 allows a client to do an Open without first
5502 * doing an Access Op, which is passage of a check against the ACE
5503 * returned with a Write delegation, but current practice is to ignore
5504 * the ACE and always do an Access Op.)
5505 * Since the functions can only be called with an unlocked vnode, this
5506 * can't be done at this time.
5507 * VOP_ADVLOCK() - When a client holds a delegation, it can issue byte range
5508 * locks locally in the client, which are not visible to the server. To
5509 * deal with this, issuing of delegations for a vnode must be disabled
5510 * and all delegations for the vnode recalled. This is done via the
5511 * second function, using the VV_DISABLEDELEG vflag on the vnode.
5512 */
5513 void
nfsd_recalldelegation(vnode_t vp,NFSPROC_T * p)5514 nfsd_recalldelegation(vnode_t vp, NFSPROC_T *p)
5515 {
5516 time_t starttime;
5517 int error;
5518
5519 /*
5520 * First, check to see if the server is currently running and it has
5521 * been called for a regular file when issuing delegations.
5522 */
5523 if (NFSD_VNET(nfsrv_numnfsd) == 0 || vp->v_type != VREG ||
5524 nfsrv_issuedelegs == 0)
5525 return;
5526
5527 KASSERT((NFSVOPISLOCKED(vp) != LK_EXCLUSIVE), ("vp %p is locked", vp));
5528 /*
5529 * First, get a reference on the nfsv4rootfs_lock so that an
5530 * exclusive lock cannot be acquired by another thread.
5531 */
5532 NFSLOCKV4ROOTMUTEX();
5533 nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL);
5534 NFSUNLOCKV4ROOTMUTEX();
5535
5536 /*
5537 * Now, call nfsrv_checkremove() in a loop while it returns
5538 * NFSERR_DELAY. Return upon any other error or when timed out.
5539 */
5540 starttime = NFSD_MONOSEC;
5541 do {
5542 if (NFSVOPLOCK(vp, LK_EXCLUSIVE) == 0) {
5543 error = nfsrv_checkremove(vp, 0, NULL,
5544 (nfsquad_t)((u_quad_t)0), p);
5545 NFSVOPUNLOCK(vp);
5546 } else
5547 error = EPERM;
5548 if (error == NFSERR_DELAY) {
5549 if (NFSD_MONOSEC - starttime > NFS_REMOVETIMEO)
5550 break;
5551 /* Sleep for a short period of time */
5552 (void) nfs_catnap(PZERO, 0, "nfsremove");
5553 }
5554 } while (error == NFSERR_DELAY);
5555 NFSLOCKV4ROOTMUTEX();
5556 nfsv4_relref(&nfsv4rootfs_lock);
5557 NFSUNLOCKV4ROOTMUTEX();
5558 }
5559
5560 void
nfsd_disabledelegation(vnode_t vp,NFSPROC_T * p)5561 nfsd_disabledelegation(vnode_t vp, NFSPROC_T *p)
5562 {
5563
5564 #ifdef VV_DISABLEDELEG
5565 /*
5566 * First, flag issuance of delegations disabled.
5567 */
5568 atomic_set_long(&vp->v_vflag, VV_DISABLEDELEG);
5569 #endif
5570
5571 /*
5572 * Then call nfsd_recalldelegation() to get rid of all extant
5573 * delegations.
5574 */
5575 nfsd_recalldelegation(vp, p);
5576 }
5577
5578 /*
5579 * Check for conflicting locks, etc. and then get rid of delegations.
5580 * (At one point I thought that I should get rid of delegations for any
5581 * Setattr, since it could potentially disallow the I/O op (read or write)
5582 * allowed by the delegation. However, Setattr Ops that aren't changing
5583 * the size get a stateid of all 0s, so you can't tell if it is a delegation
5584 * for the same client or a different one, so I decided to only get rid
5585 * of delegations for other clients when the size is being changed.)
5586 * In general, a Setattr can disable NFS I/O Ops that are outstanding, such
5587 * as Write backs, even if there is no delegation, so it really isn't any
5588 * different?)
5589 */
5590 int
nfsrv_checksetattr(vnode_t vp,struct nfsrv_descript * nd,nfsv4stateid_t * stateidp,struct nfsvattr * nvap,nfsattrbit_t * attrbitp,struct nfsexstuff * exp,NFSPROC_T * p)5591 nfsrv_checksetattr(vnode_t vp, struct nfsrv_descript *nd,
5592 nfsv4stateid_t *stateidp, struct nfsvattr *nvap, nfsattrbit_t *attrbitp,
5593 struct nfsexstuff *exp, NFSPROC_T *p)
5594 {
5595 struct nfsstate st, *stp = &st;
5596 struct nfslock lo, *lop = &lo;
5597 int error = 0;
5598 nfsquad_t clientid;
5599
5600 if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SIZE)) {
5601 stp->ls_flags = (NFSLCK_CHECK | NFSLCK_WRITEACCESS);
5602 lop->lo_first = nvap->na_size;
5603 } else {
5604 stp->ls_flags = 0;
5605 lop->lo_first = 0;
5606 }
5607 if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNER) ||
5608 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNERGROUP) ||
5609 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_MODE) ||
5610 NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_ACL))
5611 stp->ls_flags |= NFSLCK_SETATTR;
5612 if (stp->ls_flags == 0)
5613 goto out;
5614 lop->lo_end = NFS64BITSSET;
5615 lop->lo_flags = NFSLCK_WRITE;
5616 stp->ls_ownerlen = 0;
5617 stp->ls_op = NULL;
5618 stp->ls_uid = nd->nd_cred->cr_uid;
5619 stp->ls_stateid.seqid = stateidp->seqid;
5620 clientid.lval[0] = stp->ls_stateid.other[0] = stateidp->other[0];
5621 clientid.lval[1] = stp->ls_stateid.other[1] = stateidp->other[1];
5622 stp->ls_stateid.other[2] = stateidp->other[2];
5623 error = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid,
5624 stateidp, exp, nd, p);
5625
5626 out:
5627 NFSEXITCODE2(error, nd);
5628 return (error);
5629 }
5630
5631 /*
5632 * Check for a write delegation and do a CBGETATTR if there is one, updating
5633 * the attributes, as required.
5634 * Should I return an error if I can't get the attributes? (For now, I'll
5635 * just return ok.
5636 */
5637 int
nfsrv_checkgetattr(struct nfsrv_descript * nd,vnode_t vp,struct nfsvattr * nvap,nfsattrbit_t * attrbitp,NFSPROC_T * p)5638 nfsrv_checkgetattr(struct nfsrv_descript *nd, vnode_t vp,
5639 struct nfsvattr *nvap, nfsattrbit_t *attrbitp, NFSPROC_T *p)
5640 {
5641 struct nfsstate *stp;
5642 struct nfslockfile *lfp;
5643 struct nfsclient *clp;
5644 struct nfsvattr nva;
5645 fhandle_t nfh;
5646 int error = 0;
5647 nfsattrbit_t cbbits;
5648 u_quad_t delegfilerev;
5649
5650 NFSCBGETATTR_ATTRBIT(attrbitp, &cbbits);
5651 if (!NFSNONZERO_ATTRBIT(&cbbits))
5652 goto out;
5653 if (nfsrv_writedelegcnt == 0)
5654 goto out;
5655
5656 /*
5657 * Get the lock file structure.
5658 * (A return of -1 means no associated state, so return ok.)
5659 */
5660 error = nfsrv_getlockfh(vp, NFSLCK_CHECK, NULL, &nfh, p);
5661 NFSLOCKSTATE();
5662 if (!error)
5663 error = nfsrv_getlockfile(NFSLCK_CHECK, NULL, &lfp, &nfh, 0);
5664 if (error) {
5665 NFSUNLOCKSTATE();
5666 if (error == -1)
5667 error = 0;
5668 goto out;
5669 }
5670
5671 /*
5672 * Now, look for a write delegation.
5673 */
5674 LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
5675 if (stp->ls_flags & NFSLCK_DELEGWRITE)
5676 break;
5677 }
5678 if (stp == LIST_END(&lfp->lf_deleg)) {
5679 NFSUNLOCKSTATE();
5680 goto out;
5681 }
5682 clp = stp->ls_clp;
5683
5684 /* If the clientid is not confirmed, ignore the delegation. */
5685 if (clp->lc_flags & LCL_NEEDSCONFIRM) {
5686 NFSUNLOCKSTATE();
5687 goto out;
5688 }
5689
5690 delegfilerev = stp->ls_filerev;
5691 /*
5692 * If the Write delegation was issued as a part of this Compound RPC
5693 * or if we have an Implied Clientid (used in a previous Op in this
5694 * compound) and it is the client the delegation was issued to,
5695 * just return ok.
5696 * I also assume that it is from the same client iff the network
5697 * host IP address is the same as the callback address. (Not
5698 * exactly correct by the RFC, but avoids a lot of Getattr
5699 * callbacks.)
5700 */
5701 if (nd->nd_compref == stp->ls_compref ||
5702 ((nd->nd_flag & ND_IMPLIEDCLID) &&
5703 clp->lc_clientid.qval == nd->nd_clientid.qval) ||
5704 nfsaddr2_match(clp->lc_req.nr_nam, nd->nd_nam)) {
5705 NFSUNLOCKSTATE();
5706 goto out;
5707 }
5708
5709 /*
5710 * We are now done with the delegation state structure,
5711 * so the statelock can be released and we can now tsleep().
5712 */
5713
5714 /*
5715 * Now, we must do the CB Getattr callback, to see if Change or Size
5716 * has changed.
5717 */
5718 if (clp->lc_expiry >= NFSD_MONOSEC) {
5719 NFSUNLOCKSTATE();
5720 NFSVNO_ATTRINIT(&nva);
5721 nva.na_filerev = NFS64BITSSET;
5722 error = nfsrv_docallback(clp, NFSV4OP_CBGETATTR, NULL,
5723 0, &nfh, &nva, &cbbits, 0, p);
5724 if (!error) {
5725 if ((nva.na_filerev != NFS64BITSSET &&
5726 nva.na_filerev > delegfilerev) ||
5727 (NFSVNO_ISSETSIZE(&nva) &&
5728 nva.na_size != nvap->na_size)) {
5729 error = nfsvno_updfilerev(vp, nvap, nd, p);
5730 if (NFSVNO_ISSETSIZE(&nva))
5731 nvap->na_size = nva.na_size;
5732 }
5733 } else
5734 error = 0; /* Ignore callback errors for now. */
5735 } else {
5736 NFSUNLOCKSTATE();
5737 }
5738
5739 out:
5740 NFSEXITCODE2(error, nd);
5741 return (error);
5742 }
5743
5744 /*
5745 * This function looks for openowners that haven't had any opens for
5746 * a while and throws them away. Called by an nfsd when NFSNSF_NOOPENS
5747 * is set.
5748 */
5749 void
nfsrv_throwawayopens(NFSPROC_T * p)5750 nfsrv_throwawayopens(NFSPROC_T *p)
5751 {
5752 struct nfsclient *clp, *nclp;
5753 struct nfsstate *stp, *nstp;
5754 int i;
5755
5756 NFSLOCKSTATE();
5757 NFSD_VNET(nfsrv_stablefirst).nsf_flags &= ~NFSNSF_NOOPENS;
5758 /*
5759 * For each client...
5760 */
5761 for (i = 0; i < nfsrv_clienthashsize; i++) {
5762 LIST_FOREACH_SAFE(clp, &NFSD_VNET(nfsclienthash)[i], lc_hash,
5763 nclp) {
5764 LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp) {
5765 if (LIST_EMPTY(&stp->ls_open) &&
5766 (stp->ls_noopens > NFSNOOPEN ||
5767 (nfsrv_openpluslock * 2) >
5768 nfsrv_v4statelimit))
5769 nfsrv_freeopenowner(stp, 0, p);
5770 }
5771 }
5772 }
5773 NFSUNLOCKSTATE();
5774 }
5775
5776 /*
5777 * This function checks to see if the credentials are the same.
5778 * The check for same credentials is needed for state management operations
5779 * for NFSv4.0 or NFSv4.1/4.2 when SP4_MACH_CRED is configured via
5780 * ExchangeID.
5781 * Returns 1 for not same, 0 otherwise.
5782 */
5783 static int
nfsrv_notsamecredname(int op,struct nfsrv_descript * nd,struct nfsclient * clp)5784 nfsrv_notsamecredname(int op, struct nfsrv_descript *nd, struct nfsclient *clp)
5785 {
5786
5787 /* Check for the SP4_MACH_CRED case. */
5788 if (op != 0 && nfsrv_checkmachcred(op, nd, clp) != 0)
5789 return (1);
5790
5791 /* For NFSv4.1/4.2, SP4_NONE always allows this. */
5792 if ((nd->nd_flag & ND_NFSV41) != 0)
5793 return (0);
5794
5795 if (nd->nd_flag & ND_GSS) {
5796 if (!(clp->lc_flags & LCL_GSS))
5797 return (1);
5798 if (clp->lc_flags & LCL_NAME) {
5799 if (nd->nd_princlen != clp->lc_namelen ||
5800 NFSBCMP(nd->nd_principal, clp->lc_name,
5801 clp->lc_namelen))
5802 return (1);
5803 else
5804 return (0);
5805 }
5806 if (nd->nd_cred->cr_uid == clp->lc_uid)
5807 return (0);
5808 else
5809 return (1);
5810 } else if (clp->lc_flags & LCL_GSS)
5811 return (1);
5812 /*
5813 * For AUTH_SYS, allow the same uid or root. (This is underspecified
5814 * in RFC3530, which talks about principals, but doesn't say anything
5815 * about uids for AUTH_SYS.)
5816 */
5817 if (nd->nd_cred->cr_uid == clp->lc_uid || nd->nd_cred->cr_uid == 0)
5818 return (0);
5819 else
5820 return (1);
5821 }
5822
5823 /*
5824 * Calculate the lease expiry time.
5825 */
5826 static time_t
nfsrv_leaseexpiry(void)5827 nfsrv_leaseexpiry(void)
5828 {
5829
5830 if (NFSD_VNET(nfsrv_stablefirst).nsf_eograce > NFSD_MONOSEC)
5831 return (NFSD_MONOSEC + 2 * (nfsrv_lease + NFSRV_LEASEDELTA));
5832 return (NFSD_MONOSEC + nfsrv_lease + NFSRV_LEASEDELTA);
5833 }
5834
5835 /*
5836 * Delay the delegation timeout as far as ls_delegtimelimit, as required.
5837 */
5838 static void
nfsrv_delaydelegtimeout(struct nfsstate * stp)5839 nfsrv_delaydelegtimeout(struct nfsstate *stp)
5840 {
5841
5842 if ((stp->ls_flags & NFSLCK_DELEGRECALL) == 0)
5843 return;
5844
5845 if ((stp->ls_delegtime + 15) > NFSD_MONOSEC &&
5846 stp->ls_delegtime < stp->ls_delegtimelimit) {
5847 stp->ls_delegtime += nfsrv_lease;
5848 if (stp->ls_delegtime > stp->ls_delegtimelimit)
5849 stp->ls_delegtime = stp->ls_delegtimelimit;
5850 }
5851 }
5852
5853 /*
5854 * This function checks to see if there is any other state associated
5855 * with the openowner for this Open.
5856 * It returns 1 if there is no other state, 0 otherwise.
5857 */
5858 static int
nfsrv_nootherstate(struct nfsstate * stp)5859 nfsrv_nootherstate(struct nfsstate *stp)
5860 {
5861 struct nfsstate *tstp;
5862
5863 LIST_FOREACH(tstp, &stp->ls_openowner->ls_open, ls_list) {
5864 if (tstp != stp || !LIST_EMPTY(&tstp->ls_lock))
5865 return (0);
5866 }
5867 return (1);
5868 }
5869
5870 /*
5871 * Create a list of lock deltas (changes to local byte range locking
5872 * that can be rolled back using the list) and apply the changes via
5873 * nfsvno_advlock(). Optionally, lock the list. It is expected that either
5874 * the rollback or update function will be called after this.
5875 * It returns an error (and rolls back, as required), if any nfsvno_advlock()
5876 * call fails. If it returns an error, it will unlock the list.
5877 */
5878 static int
nfsrv_locallock(vnode_t vp,struct nfslockfile * lfp,int flags,uint64_t first,uint64_t end,struct nfslockconflict * cfp,NFSPROC_T * p)5879 nfsrv_locallock(vnode_t vp, struct nfslockfile *lfp, int flags,
5880 uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p)
5881 {
5882 struct nfslock *lop, *nlop;
5883 int error = 0;
5884
5885 /* Loop through the list of locks. */
5886 lop = LIST_FIRST(&lfp->lf_locallock);
5887 while (first < end && lop != NULL) {
5888 nlop = LIST_NEXT(lop, lo_lckowner);
5889 if (first >= lop->lo_end) {
5890 /* not there yet */
5891 lop = nlop;
5892 } else if (first < lop->lo_first) {
5893 /* new one starts before entry in list */
5894 if (end <= lop->lo_first) {
5895 /* no overlap between old and new */
5896 error = nfsrv_dolocal(vp, lfp, flags,
5897 NFSLCK_UNLOCK, first, end, cfp, p);
5898 if (error != 0)
5899 break;
5900 first = end;
5901 } else {
5902 /* handle fragment overlapped with new one */
5903 error = nfsrv_dolocal(vp, lfp, flags,
5904 NFSLCK_UNLOCK, first, lop->lo_first, cfp,
5905 p);
5906 if (error != 0)
5907 break;
5908 first = lop->lo_first;
5909 }
5910 } else {
5911 /* new one overlaps this entry in list */
5912 if (end <= lop->lo_end) {
5913 /* overlaps all of new one */
5914 error = nfsrv_dolocal(vp, lfp, flags,
5915 lop->lo_flags, first, end, cfp, p);
5916 if (error != 0)
5917 break;
5918 first = end;
5919 } else {
5920 /* handle fragment overlapped with new one */
5921 error = nfsrv_dolocal(vp, lfp, flags,
5922 lop->lo_flags, first, lop->lo_end, cfp, p);
5923 if (error != 0)
5924 break;
5925 first = lop->lo_end;
5926 lop = nlop;
5927 }
5928 }
5929 }
5930 if (first < end && error == 0)
5931 /* handle fragment past end of list */
5932 error = nfsrv_dolocal(vp, lfp, flags, NFSLCK_UNLOCK, first,
5933 end, cfp, p);
5934
5935 NFSEXITCODE(error);
5936 return (error);
5937 }
5938
5939 /*
5940 * Local lock unlock. Unlock all byte ranges that are no longer locked
5941 * by NFSv4. To do this, unlock any subranges of first-->end that
5942 * do not overlap with the byte ranges of any lock in the lfp->lf_lock
5943 * list. This list has all locks for the file held by other
5944 * <clientid, lockowner> tuples. The list is ordered by increasing
5945 * lo_first value, but may have entries that overlap each other, for
5946 * the case of read locks.
5947 */
5948 static void
nfsrv_localunlock(vnode_t vp,struct nfslockfile * lfp,uint64_t init_first,uint64_t init_end,NFSPROC_T * p)5949 nfsrv_localunlock(vnode_t vp, struct nfslockfile *lfp, uint64_t init_first,
5950 uint64_t init_end, NFSPROC_T *p)
5951 {
5952 struct nfslock *lop;
5953 uint64_t first, end, prevfirst __unused;
5954
5955 first = init_first;
5956 end = init_end;
5957 while (first < init_end) {
5958 /* Loop through all nfs locks, adjusting first and end */
5959 prevfirst = 0;
5960 LIST_FOREACH(lop, &lfp->lf_lock, lo_lckfile) {
5961 KASSERT(prevfirst <= lop->lo_first,
5962 ("nfsv4 locks out of order"));
5963 KASSERT(lop->lo_first < lop->lo_end,
5964 ("nfsv4 bogus lock"));
5965 prevfirst = lop->lo_first;
5966 if (first >= lop->lo_first &&
5967 first < lop->lo_end)
5968 /*
5969 * Overlaps with initial part, so trim
5970 * off that initial part by moving first past
5971 * it.
5972 */
5973 first = lop->lo_end;
5974 else if (end > lop->lo_first &&
5975 lop->lo_first > first) {
5976 /*
5977 * This lock defines the end of the
5978 * segment to unlock, so set end to the
5979 * start of it and break out of the loop.
5980 */
5981 end = lop->lo_first;
5982 break;
5983 }
5984 if (first >= end)
5985 /*
5986 * There is no segment left to do, so
5987 * break out of this loop and then exit
5988 * the outer while() since first will be set
5989 * to end, which must equal init_end here.
5990 */
5991 break;
5992 }
5993 if (first < end) {
5994 /* Unlock this segment */
5995 (void) nfsrv_dolocal(vp, lfp, NFSLCK_UNLOCK,
5996 NFSLCK_READ, first, end, NULL, p);
5997 nfsrv_locallock_commit(lfp, NFSLCK_UNLOCK,
5998 first, end);
5999 }
6000 /*
6001 * Now move past this segment and look for any further
6002 * segment in the range, if there is one.
6003 */
6004 first = end;
6005 end = init_end;
6006 }
6007 }
6008
6009 /*
6010 * Do the local lock operation and update the rollback list, as required.
6011 * Perform the rollback and return the error if nfsvno_advlock() fails.
6012 */
6013 static int
nfsrv_dolocal(vnode_t vp,struct nfslockfile * lfp,int flags,int oldflags,uint64_t first,uint64_t end,struct nfslockconflict * cfp,NFSPROC_T * p)6014 nfsrv_dolocal(vnode_t vp, struct nfslockfile *lfp, int flags, int oldflags,
6015 uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p)
6016 {
6017 struct nfsrollback *rlp;
6018 int error = 0, ltype, oldltype;
6019
6020 if (flags & NFSLCK_WRITE)
6021 ltype = F_WRLCK;
6022 else if (flags & NFSLCK_READ)
6023 ltype = F_RDLCK;
6024 else
6025 ltype = F_UNLCK;
6026 if (oldflags & NFSLCK_WRITE)
6027 oldltype = F_WRLCK;
6028 else if (oldflags & NFSLCK_READ)
6029 oldltype = F_RDLCK;
6030 else
6031 oldltype = F_UNLCK;
6032 if (ltype == oldltype || (oldltype == F_WRLCK && ltype == F_RDLCK))
6033 /* nothing to do */
6034 goto out;
6035 error = nfsvno_advlock(vp, ltype, first, end, p);
6036 if (error != 0) {
6037 if (cfp != NULL) {
6038 cfp->cl_clientid.lval[0] = 0;
6039 cfp->cl_clientid.lval[1] = 0;
6040 cfp->cl_first = 0;
6041 cfp->cl_end = NFS64BITSSET;
6042 cfp->cl_flags = NFSLCK_WRITE;
6043 cfp->cl_ownerlen = 5;
6044 NFSBCOPY("LOCAL", cfp->cl_owner, 5);
6045 }
6046 nfsrv_locallock_rollback(vp, lfp, p);
6047 } else if (ltype != F_UNLCK) {
6048 rlp = malloc(sizeof (struct nfsrollback), M_NFSDROLLBACK,
6049 M_WAITOK);
6050 rlp->rlck_first = first;
6051 rlp->rlck_end = end;
6052 rlp->rlck_type = oldltype;
6053 LIST_INSERT_HEAD(&lfp->lf_rollback, rlp, rlck_list);
6054 }
6055
6056 out:
6057 NFSEXITCODE(error);
6058 return (error);
6059 }
6060
6061 /*
6062 * Roll back local lock changes and free up the rollback list.
6063 */
6064 static void
nfsrv_locallock_rollback(vnode_t vp,struct nfslockfile * lfp,NFSPROC_T * p)6065 nfsrv_locallock_rollback(vnode_t vp, struct nfslockfile *lfp, NFSPROC_T *p)
6066 {
6067 struct nfsrollback *rlp, *nrlp;
6068
6069 LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp) {
6070 (void) nfsvno_advlock(vp, rlp->rlck_type, rlp->rlck_first,
6071 rlp->rlck_end, p);
6072 free(rlp, M_NFSDROLLBACK);
6073 }
6074 LIST_INIT(&lfp->lf_rollback);
6075 }
6076
6077 /*
6078 * Update local lock list and delete rollback list (ie now committed to the
6079 * local locks). Most of the work is done by the internal function.
6080 */
6081 static void
nfsrv_locallock_commit(struct nfslockfile * lfp,int flags,uint64_t first,uint64_t end)6082 nfsrv_locallock_commit(struct nfslockfile *lfp, int flags, uint64_t first,
6083 uint64_t end)
6084 {
6085 struct nfsrollback *rlp, *nrlp;
6086 struct nfslock *new_lop, *other_lop;
6087
6088 new_lop = malloc(sizeof (struct nfslock), M_NFSDLOCK, M_WAITOK);
6089 if (flags & (NFSLCK_READ | NFSLCK_WRITE))
6090 other_lop = malloc(sizeof (struct nfslock), M_NFSDLOCK,
6091 M_WAITOK);
6092 else
6093 other_lop = NULL;
6094 new_lop->lo_flags = flags;
6095 new_lop->lo_first = first;
6096 new_lop->lo_end = end;
6097 nfsrv_updatelock(NULL, &new_lop, &other_lop, lfp);
6098 if (new_lop != NULL)
6099 free(new_lop, M_NFSDLOCK);
6100 if (other_lop != NULL)
6101 free(other_lop, M_NFSDLOCK);
6102
6103 /* and get rid of the rollback list */
6104 LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp)
6105 free(rlp, M_NFSDROLLBACK);
6106 LIST_INIT(&lfp->lf_rollback);
6107 }
6108
6109 /*
6110 * Lock the struct nfslockfile for local lock updating.
6111 */
6112 static void
nfsrv_locklf(struct nfslockfile * lfp)6113 nfsrv_locklf(struct nfslockfile *lfp)
6114 {
6115 int gotlock;
6116
6117 /* lf_usecount ensures *lfp won't be free'd */
6118 lfp->lf_usecount++;
6119 do {
6120 gotlock = nfsv4_lock(&lfp->lf_locallock_lck, 1, NULL,
6121 NFSSTATEMUTEXPTR, NULL);
6122 } while (gotlock == 0);
6123 lfp->lf_usecount--;
6124 }
6125
6126 /*
6127 * Unlock the struct nfslockfile after local lock updating.
6128 */
6129 static void
nfsrv_unlocklf(struct nfslockfile * lfp)6130 nfsrv_unlocklf(struct nfslockfile *lfp)
6131 {
6132
6133 nfsv4_unlock(&lfp->lf_locallock_lck, 0);
6134 }
6135
6136 /*
6137 * Clear out all state for the NFSv4 server.
6138 * Must be called by a thread that can sleep when no nfsds are running.
6139 */
6140 void
nfsrv_throwawayallstate(NFSPROC_T * p)6141 nfsrv_throwawayallstate(NFSPROC_T *p)
6142 {
6143 struct nfsclient *clp, *nclp;
6144 struct nfslockfile *lfp, *nlfp;
6145 int i;
6146
6147 /*
6148 * For each client, clean out the state and then free the structure.
6149 */
6150 for (i = 0; i < nfsrv_clienthashsize; i++) {
6151 LIST_FOREACH_SAFE(clp, &NFSD_VNET(nfsclienthash)[i], lc_hash,
6152 nclp) {
6153 nfsrv_cleanclient(clp, p);
6154 nfsrv_freedeleglist(&clp->lc_deleg);
6155 nfsrv_freedeleglist(&clp->lc_olddeleg);
6156 free(clp->lc_stateid, M_NFSDCLIENT);
6157 free(clp, M_NFSDCLIENT);
6158 }
6159 }
6160
6161 /*
6162 * Also, free up any remaining lock file structures.
6163 */
6164 for (i = 0; i < nfsrv_lockhashsize; i++) {
6165 LIST_FOREACH_SAFE(lfp, &NFSD_VNET(nfslockhash)[i], lf_hash,
6166 nlfp) {
6167 printf("nfsd unload: fnd a lock file struct\n");
6168 nfsrv_freenfslockfile(lfp);
6169 }
6170 }
6171
6172 /* And get rid of the deviceid structures and layouts. */
6173 nfsrv_freealllayoutsanddevids();
6174 }
6175
6176 /*
6177 * Check the sequence# for the session and slot provided as an argument.
6178 * Also, renew the lease if the session will return NFS_OK.
6179 */
6180 int
nfsrv_checksequence(struct nfsrv_descript * nd,uint32_t sequenceid,uint32_t * highest_slotidp,uint32_t * target_highest_slotidp,int cache_this,uint32_t * sflagsp,NFSPROC_T * p)6181 nfsrv_checksequence(struct nfsrv_descript *nd, uint32_t sequenceid,
6182 uint32_t *highest_slotidp, uint32_t *target_highest_slotidp, int cache_this,
6183 uint32_t *sflagsp, NFSPROC_T *p)
6184 {
6185 struct nfsdsession *sep;
6186 struct nfssessionhash *shp;
6187 int error;
6188
6189 shp = NFSSESSIONHASH(nd->nd_sessionid);
6190 NFSLOCKSESSION(shp);
6191 sep = nfsrv_findsession(nd->nd_sessionid);
6192 if (sep == NULL) {
6193 NFSUNLOCKSESSION(shp);
6194 return (NFSERR_BADSESSION);
6195 }
6196 error = nfsv4_seqsession(sequenceid, nd->nd_slotid, *highest_slotidp,
6197 sep->sess_slots, NULL, NFSV4_SLOTS - 1);
6198 if (error != 0) {
6199 NFSUNLOCKSESSION(shp);
6200 return (error);
6201 }
6202 if (cache_this != 0)
6203 nd->nd_flag |= ND_SAVEREPLY;
6204 /* Renew the lease. */
6205 sep->sess_clp->lc_expiry = nfsrv_leaseexpiry();
6206 nd->nd_clientid.qval = sep->sess_clp->lc_clientid.qval;
6207 nd->nd_flag |= ND_IMPLIEDCLID;
6208
6209 /* Handle the SP4_MECH_CRED case for NFSv4.1/4.2. */
6210 if ((sep->sess_clp->lc_flags & LCL_MACHCRED) != 0 &&
6211 (nd->nd_flag & (ND_GSSINTEGRITY | ND_GSSPRIVACY)) != 0 &&
6212 nd->nd_princlen == sep->sess_clp->lc_namelen &&
6213 !NFSBCMP(sep->sess_clp->lc_name, nd->nd_principal,
6214 nd->nd_princlen)) {
6215 nd->nd_flag |= ND_MACHCRED;
6216 NFSSET_OPBIT(&nd->nd_allowops, &sep->sess_clp->lc_allowops);
6217 }
6218
6219 /* Save maximum request and reply sizes. */
6220 nd->nd_maxreq = sep->sess_maxreq;
6221 nd->nd_maxresp = sep->sess_maxresp;
6222
6223 *sflagsp = 0;
6224 if (sep->sess_clp->lc_req.nr_client == NULL ||
6225 (sep->sess_clp->lc_flags & LCL_CBDOWN) != 0)
6226 *sflagsp |= NFSV4SEQ_CBPATHDOWN;
6227 NFSUNLOCKSESSION(shp);
6228 if (error == NFSERR_EXPIRED) {
6229 *sflagsp |= NFSV4SEQ_EXPIREDALLSTATEREVOKED;
6230 error = 0;
6231 } else if (error == NFSERR_ADMINREVOKED) {
6232 *sflagsp |= NFSV4SEQ_ADMINSTATEREVOKED;
6233 error = 0;
6234 }
6235 *highest_slotidp = *target_highest_slotidp = NFSV4_SLOTS - 1;
6236 return (0);
6237 }
6238
6239 /*
6240 * Check/set reclaim complete for this session/clientid.
6241 */
6242 int
nfsrv_checkreclaimcomplete(struct nfsrv_descript * nd,int onefs)6243 nfsrv_checkreclaimcomplete(struct nfsrv_descript *nd, int onefs)
6244 {
6245 struct nfsdsession *sep;
6246 struct nfssessionhash *shp;
6247 int error = 0;
6248
6249 shp = NFSSESSIONHASH(nd->nd_sessionid);
6250 NFSLOCKSTATE();
6251 NFSLOCKSESSION(shp);
6252 sep = nfsrv_findsession(nd->nd_sessionid);
6253 if (sep == NULL) {
6254 NFSUNLOCKSESSION(shp);
6255 NFSUNLOCKSTATE();
6256 return (NFSERR_BADSESSION);
6257 }
6258
6259 if (onefs != 0)
6260 sep->sess_clp->lc_flags |= LCL_RECLAIMONEFS;
6261 /* Check to see if reclaim complete has already happened. */
6262 else if ((sep->sess_clp->lc_flags & LCL_RECLAIMCOMPLETE) != 0)
6263 error = NFSERR_COMPLETEALREADY;
6264 else {
6265 sep->sess_clp->lc_flags |= LCL_RECLAIMCOMPLETE;
6266 nfsrv_markreclaim(sep->sess_clp);
6267 }
6268 NFSUNLOCKSESSION(shp);
6269 NFSUNLOCKSTATE();
6270 return (error);
6271 }
6272
6273 /*
6274 * Cache the reply in a session slot.
6275 */
6276 void
nfsrv_cache_session(struct nfsrv_descript * nd,struct mbuf ** m)6277 nfsrv_cache_session(struct nfsrv_descript *nd, struct mbuf **m)
6278 {
6279 struct nfsdsession *sep;
6280 struct nfssessionhash *shp;
6281 char *buf, *cp;
6282 #ifdef INET
6283 struct sockaddr_in *sin;
6284 #endif
6285 #ifdef INET6
6286 struct sockaddr_in6 *sin6;
6287 #endif
6288
6289 shp = NFSSESSIONHASH(nd->nd_sessionid);
6290 NFSLOCKSESSION(shp);
6291 sep = nfsrv_findsession(nd->nd_sessionid);
6292 if (sep == NULL) {
6293 NFSUNLOCKSESSION(shp);
6294 if ((NFSD_VNET(nfsrv_stablefirst).nsf_flags &
6295 NFSNSF_GRACEOVER) != 0) {
6296 buf = malloc(INET6_ADDRSTRLEN, M_TEMP, M_WAITOK);
6297 switch (nd->nd_nam->sa_family) {
6298 #ifdef INET
6299 case AF_INET:
6300 sin = (struct sockaddr_in *)nd->nd_nam;
6301 cp = inet_ntop(sin->sin_family,
6302 &sin->sin_addr.s_addr, buf,
6303 INET6_ADDRSTRLEN);
6304 break;
6305 #endif
6306 #ifdef INET6
6307 case AF_INET6:
6308 sin6 = (struct sockaddr_in6 *)nd->nd_nam;
6309 cp = inet_ntop(sin6->sin6_family,
6310 &sin6->sin6_addr, buf, INET6_ADDRSTRLEN);
6311 break;
6312 #endif
6313 default:
6314 cp = NULL;
6315 }
6316 if (cp != NULL)
6317 printf("nfsrv_cache_session: no session "
6318 "IPaddr=%s, check NFS clients for unique "
6319 "/etc/hostid's\n", cp);
6320 else
6321 printf("nfsrv_cache_session: no session, "
6322 "check NFS clients for unique "
6323 "/etc/hostid's\n");
6324 free(buf, M_TEMP);
6325 }
6326 m_freem(*m);
6327 return;
6328 }
6329 nfsv4_seqsess_cacherep(nd->nd_slotid, sep->sess_slots, nd->nd_repstat,
6330 m);
6331 NFSUNLOCKSESSION(shp);
6332 }
6333
6334 /*
6335 * Search for a session that matches the sessionid.
6336 */
6337 static struct nfsdsession *
nfsrv_findsession(uint8_t * sessionid)6338 nfsrv_findsession(uint8_t *sessionid)
6339 {
6340 struct nfsdsession *sep;
6341 struct nfssessionhash *shp;
6342
6343 shp = NFSSESSIONHASH(sessionid);
6344 LIST_FOREACH(sep, &shp->list, sess_hash) {
6345 if (!NFSBCMP(sessionid, sep->sess_sessionid, NFSX_V4SESSIONID))
6346 break;
6347 }
6348 return (sep);
6349 }
6350
6351 /*
6352 * Destroy a session.
6353 */
6354 int
nfsrv_destroysession(struct nfsrv_descript * nd,uint8_t * sessionid)6355 nfsrv_destroysession(struct nfsrv_descript *nd, uint8_t *sessionid)
6356 {
6357 int error, igotlock, samesess;
6358
6359 samesess = 0;
6360 if (!NFSBCMP(sessionid, nd->nd_sessionid, NFSX_V4SESSIONID) &&
6361 (nd->nd_flag & ND_HASSEQUENCE) != 0) {
6362 samesess = 1;
6363 if ((nd->nd_flag & ND_LASTOP) == 0)
6364 return (NFSERR_BADSESSION);
6365 }
6366
6367 /* Lock out other nfsd threads */
6368 NFSLOCKV4ROOTMUTEX();
6369 nfsv4_relref(&nfsv4rootfs_lock);
6370 do {
6371 igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL,
6372 NFSV4ROOTLOCKMUTEXPTR, NULL);
6373 } while (igotlock == 0);
6374 NFSUNLOCKV4ROOTMUTEX();
6375
6376 error = nfsrv_freesession(nd, NULL, sessionid);
6377 if (error == 0 && samesess != 0)
6378 nd->nd_flag &= ~ND_HASSEQUENCE;
6379
6380 NFSLOCKV4ROOTMUTEX();
6381 nfsv4_unlock(&nfsv4rootfs_lock, 1);
6382 NFSUNLOCKV4ROOTMUTEX();
6383 return (error);
6384 }
6385
6386 /*
6387 * Bind a connection to a session.
6388 * For now, only certain variants are supported, since the current session
6389 * structure can only handle a single backchannel entry, which will be
6390 * applied to all connections if it is set.
6391 */
6392 int
nfsrv_bindconnsess(struct nfsrv_descript * nd,uint8_t * sessionid,int * foreaftp)6393 nfsrv_bindconnsess(struct nfsrv_descript *nd, uint8_t *sessionid, int *foreaftp)
6394 {
6395 struct nfssessionhash *shp;
6396 struct nfsdsession *sep;
6397 struct nfsclient *clp;
6398 SVCXPRT *savxprt;
6399 int error;
6400
6401 error = 0;
6402 savxprt = NULL;
6403 shp = NFSSESSIONHASH(sessionid);
6404 NFSLOCKSTATE();
6405 NFSLOCKSESSION(shp);
6406 sep = nfsrv_findsession(sessionid);
6407 if (sep != NULL) {
6408 clp = sep->sess_clp;
6409 error = nfsrv_checkmachcred(NFSV4OP_BINDCONNTOSESS, nd, clp);
6410 if (error != 0)
6411 goto out;
6412 if (*foreaftp == NFSCDFC4_BACK ||
6413 *foreaftp == NFSCDFC4_BACK_OR_BOTH ||
6414 *foreaftp == NFSCDFC4_FORE_OR_BOTH) {
6415 /* Try to set up a backchannel. */
6416 if (clp->lc_req.nr_client == NULL) {
6417 NFSD_DEBUG(2, "nfsrv_bindconnsess: acquire "
6418 "backchannel\n");
6419 clp->lc_req.nr_client = (struct __rpc_client *)
6420 clnt_bck_create(nd->nd_xprt->xp_socket,
6421 sep->sess_cbprogram, NFSV4_CBVERS);
6422 }
6423 if (clp->lc_req.nr_client != NULL) {
6424 NFSD_DEBUG(2, "nfsrv_bindconnsess: set up "
6425 "backchannel\n");
6426 savxprt = sep->sess_cbsess.nfsess_xprt;
6427 SVC_ACQUIRE(nd->nd_xprt);
6428 CLNT_ACQUIRE(clp->lc_req.nr_client);
6429 nd->nd_xprt->xp_p2 = clp->lc_req.nr_client;
6430 /* Disable idle timeout. */
6431 nd->nd_xprt->xp_idletimeout = 0;
6432 sep->sess_cbsess.nfsess_xprt = nd->nd_xprt;
6433 sep->sess_crflags |= NFSV4CRSESS_CONNBACKCHAN;
6434 clp->lc_flags |= LCL_DONEBINDCONN |
6435 LCL_NEEDSCBNULL;
6436 clp->lc_flags &= ~LCL_CBDOWN;
6437 if (*foreaftp == NFSCDFS4_BACK)
6438 *foreaftp = NFSCDFS4_BACK;
6439 else
6440 *foreaftp = NFSCDFS4_BOTH;
6441 } else if (*foreaftp != NFSCDFC4_BACK) {
6442 NFSD_DEBUG(2, "nfsrv_bindconnsess: can't set "
6443 "up backchannel\n");
6444 sep->sess_crflags &= ~NFSV4CRSESS_CONNBACKCHAN;
6445 clp->lc_flags |= LCL_DONEBINDCONN;
6446 *foreaftp = NFSCDFS4_FORE;
6447 } else {
6448 error = NFSERR_NOTSUPP;
6449 printf("nfsrv_bindconnsess: Can't add "
6450 "backchannel\n");
6451 }
6452 } else {
6453 NFSD_DEBUG(2, "nfsrv_bindconnsess: Set forechannel\n");
6454 clp->lc_flags |= LCL_DONEBINDCONN;
6455 *foreaftp = NFSCDFS4_FORE;
6456 }
6457 } else
6458 error = NFSERR_BADSESSION;
6459 out:
6460 NFSUNLOCKSESSION(shp);
6461 NFSUNLOCKSTATE();
6462 if (savxprt != NULL)
6463 SVC_RELEASE(savxprt);
6464 return (error);
6465 }
6466
6467 /*
6468 * Free up a session structure.
6469 */
6470 static int
nfsrv_freesession(struct nfsrv_descript * nd,struct nfsdsession * sep,uint8_t * sessionid)6471 nfsrv_freesession(struct nfsrv_descript *nd, struct nfsdsession *sep,
6472 uint8_t *sessionid)
6473 {
6474 struct nfssessionhash *shp;
6475 int i;
6476
6477 NFSLOCKSTATE();
6478 if (sep == NULL) {
6479 shp = NFSSESSIONHASH(sessionid);
6480 NFSLOCKSESSION(shp);
6481 sep = nfsrv_findsession(sessionid);
6482 } else {
6483 shp = NFSSESSIONHASH(sep->sess_sessionid);
6484 NFSLOCKSESSION(shp);
6485 }
6486 if (sep != NULL) {
6487 /* Check for the SP4_MACH_CRED case. */
6488 if (nd != NULL && nfsrv_checkmachcred(NFSV4OP_DESTROYSESSION,
6489 nd, sep->sess_clp) != 0) {
6490 NFSUNLOCKSESSION(shp);
6491 NFSUNLOCKSTATE();
6492 return (NFSERR_AUTHERR | AUTH_TOOWEAK);
6493 }
6494
6495 sep->sess_refcnt--;
6496 if (sep->sess_refcnt > 0) {
6497 NFSUNLOCKSESSION(shp);
6498 NFSUNLOCKSTATE();
6499 return (NFSERR_BACKCHANBUSY);
6500 }
6501 LIST_REMOVE(sep, sess_hash);
6502 LIST_REMOVE(sep, sess_list);
6503 }
6504 NFSUNLOCKSESSION(shp);
6505 NFSUNLOCKSTATE();
6506 if (sep == NULL)
6507 return (NFSERR_BADSESSION);
6508 for (i = 0; i < NFSV4_SLOTS; i++)
6509 if (sep->sess_slots[i].nfssl_reply != NULL)
6510 m_freem(sep->sess_slots[i].nfssl_reply);
6511 if (sep->sess_cbsess.nfsess_xprt != NULL)
6512 SVC_RELEASE(sep->sess_cbsess.nfsess_xprt);
6513 free(sep, M_NFSDSESSION);
6514 return (0);
6515 }
6516
6517 /*
6518 * Free a stateid.
6519 * RFC5661 says that it should fail when there are associated opens, locks
6520 * or delegations. Since stateids represent opens, I don't see how you can
6521 * free an open stateid (it will be free'd when closed), so this function
6522 * only works for lock stateids (freeing the lock_owner) or delegations.
6523 */
6524 int
nfsrv_freestateid(struct nfsrv_descript * nd,nfsv4stateid_t * stateidp,NFSPROC_T * p)6525 nfsrv_freestateid(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp,
6526 NFSPROC_T *p)
6527 {
6528 struct nfsclient *clp;
6529 struct nfsstate *stp;
6530 int error;
6531
6532 NFSLOCKSTATE();
6533 /*
6534 * Look up the stateid
6535 */
6536 error = nfsrv_getclient((nfsquad_t)((u_quad_t)0), CLOPS_RENEW, &clp,
6537 NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p);
6538 if (error == 0) {
6539 /* First, check for a delegation. */
6540 LIST_FOREACH(stp, &clp->lc_deleg, ls_list) {
6541 if (!NFSBCMP(stp->ls_stateid.other, stateidp->other,
6542 NFSX_STATEIDOTHER))
6543 break;
6544 }
6545 if (stp != NULL) {
6546 nfsrv_freedeleg(stp);
6547 NFSUNLOCKSTATE();
6548 return (error);
6549 }
6550 }
6551 /* Not a delegation, try for a lock_owner. */
6552 if (error == 0)
6553 error = nfsrv_getstate(clp, stateidp, 0, &stp);
6554 if (error == 0 && ((stp->ls_flags & (NFSLCK_OPEN | NFSLCK_DELEGREAD |
6555 NFSLCK_DELEGWRITE)) != 0 || (stp->ls_flags & NFSLCK_LOCK) == 0))
6556 /* Not a lock_owner stateid. */
6557 error = NFSERR_LOCKSHELD;
6558 if (error == 0 && !LIST_EMPTY(&stp->ls_lock))
6559 error = NFSERR_LOCKSHELD;
6560 if (error == 0)
6561 nfsrv_freelockowner(stp, NULL, 0, p);
6562 NFSUNLOCKSTATE();
6563 return (error);
6564 }
6565
6566 /*
6567 * Test a stateid.
6568 */
6569 int
nfsrv_teststateid(struct nfsrv_descript * nd,nfsv4stateid_t * stateidp,NFSPROC_T * p)6570 nfsrv_teststateid(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp,
6571 NFSPROC_T *p)
6572 {
6573 struct nfsclient *clp;
6574 struct nfsstate *stp;
6575 int error;
6576
6577 NFSLOCKSTATE();
6578 /*
6579 * Look up the stateid
6580 */
6581 error = nfsrv_getclient((nfsquad_t)((u_quad_t)0), CLOPS_RENEW, &clp,
6582 NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p);
6583 if (error == 0)
6584 error = nfsrv_getstate(clp, stateidp, 0, &stp);
6585 if (error == 0 && stateidp->seqid != 0 &&
6586 SEQ_LT(stateidp->seqid, stp->ls_stateid.seqid))
6587 error = NFSERR_OLDSTATEID;
6588 NFSUNLOCKSTATE();
6589 return (error);
6590 }
6591
6592 /*
6593 * Generate the xdr for an NFSv4.1 CBSequence Operation.
6594 */
6595 static int
nfsv4_setcbsequence(struct nfsrv_descript * nd,struct nfsclient * clp,int dont_replycache,struct nfsdsession ** sepp,int * slotposp)6596 nfsv4_setcbsequence(struct nfsrv_descript *nd, struct nfsclient *clp,
6597 int dont_replycache, struct nfsdsession **sepp, int *slotposp)
6598 {
6599 struct nfsdsession *sep;
6600 uint32_t *tl, slotseq = 0;
6601 int maxslot;
6602 uint8_t sessionid[NFSX_V4SESSIONID];
6603 int error;
6604
6605 error = nfsv4_getcbsession(clp, sepp);
6606 if (error != 0)
6607 return (error);
6608 sep = *sepp;
6609 nfsv4_sequencelookup(NULL, &sep->sess_cbsess, slotposp, &maxslot,
6610 &slotseq, sessionid, true);
6611 KASSERT(maxslot >= 0, ("nfsv4_setcbsequence neg maxslot"));
6612
6613 /* Build the Sequence arguments. */
6614 NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID + 5 * NFSX_UNSIGNED);
6615 bcopy(sessionid, tl, NFSX_V4SESSIONID);
6616 tl += NFSX_V4SESSIONID / NFSX_UNSIGNED;
6617 nd->nd_slotseq = tl;
6618 nd->nd_slotid = *slotposp;
6619 nd->nd_flag |= ND_HASSLOTID;
6620 *tl++ = txdr_unsigned(slotseq);
6621 *tl++ = txdr_unsigned(*slotposp);
6622 *tl++ = txdr_unsigned(maxslot);
6623 if (dont_replycache == 0)
6624 *tl++ = newnfs_true;
6625 else
6626 *tl++ = newnfs_false;
6627 *tl = 0; /* No referring call list, for now. */
6628 nd->nd_flag |= ND_HASSEQUENCE;
6629 return (0);
6630 }
6631
6632 /*
6633 * Get a session for the callback.
6634 */
6635 static int
nfsv4_getcbsession(struct nfsclient * clp,struct nfsdsession ** sepp)6636 nfsv4_getcbsession(struct nfsclient *clp, struct nfsdsession **sepp)
6637 {
6638 struct nfsdsession *sep;
6639
6640 NFSLOCKSTATE();
6641 LIST_FOREACH(sep, &clp->lc_session, sess_list) {
6642 if ((sep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN) != 0)
6643 break;
6644 }
6645 if (sep == NULL) {
6646 NFSUNLOCKSTATE();
6647 return (NFSERR_BADSESSION);
6648 }
6649 sep->sess_refcnt++;
6650 *sepp = sep;
6651 NFSUNLOCKSTATE();
6652 return (0);
6653 }
6654
6655 /*
6656 * Free up all backchannel xprts. This needs to be done when the nfsd threads
6657 * exit, since those transports will all be going away.
6658 * This is only called after all the nfsd threads are done performing RPCs,
6659 * so locking shouldn't be an issue.
6660 */
6661 void
nfsrv_freeallbackchannel_xprts(void)6662 nfsrv_freeallbackchannel_xprts(void)
6663 {
6664 struct nfsdsession *sep;
6665 struct nfsclient *clp;
6666 SVCXPRT *xprt;
6667 int i;
6668
6669 for (i = 0; i < nfsrv_clienthashsize; i++) {
6670 LIST_FOREACH(clp, &NFSD_VNET(nfsclienthash)[i], lc_hash) {
6671 LIST_FOREACH(sep, &clp->lc_session, sess_list) {
6672 xprt = sep->sess_cbsess.nfsess_xprt;
6673 sep->sess_cbsess.nfsess_xprt = NULL;
6674 if (xprt != NULL)
6675 SVC_RELEASE(xprt);
6676 }
6677 }
6678 }
6679 }
6680
6681 /*
6682 * Do a layout commit. Actually just call nfsrv_updatemdsattr().
6683 * I have no idea if the rest of these arguments will ever be useful?
6684 */
6685 int
nfsrv_layoutcommit(struct nfsrv_descript * nd,vnode_t vp,int layouttype,int hasnewoff,uint64_t newoff,uint64_t offset,uint64_t len,int hasnewmtime,struct timespec * newmtimep,int reclaim,nfsv4stateid_t * stateidp,int maxcnt,char * layp,int * hasnewsizep,uint64_t * newsizep,struct ucred * cred,NFSPROC_T * p)6686 nfsrv_layoutcommit(struct nfsrv_descript *nd, vnode_t vp, int layouttype,
6687 int hasnewoff, uint64_t newoff, uint64_t offset, uint64_t len,
6688 int hasnewmtime, struct timespec *newmtimep, int reclaim,
6689 nfsv4stateid_t *stateidp, int maxcnt, char *layp, int *hasnewsizep,
6690 uint64_t *newsizep, struct ucred *cred, NFSPROC_T *p)
6691 {
6692 struct nfsvattr na;
6693 int error;
6694
6695 error = nfsrv_updatemdsattr(vp, &na, p);
6696 if (error == 0) {
6697 *hasnewsizep = 1;
6698 *newsizep = na.na_size;
6699 }
6700 return (error);
6701 }
6702
6703 /*
6704 * Try and get a layout.
6705 */
6706 int
nfsrv_layoutget(struct nfsrv_descript * nd,vnode_t vp,struct nfsexstuff * exp,int layouttype,int * iomode,uint64_t * offset,uint64_t * len,uint64_t minlen,nfsv4stateid_t * stateidp,int maxcnt,int * retonclose,int * layoutlenp,char * layp,struct ucred * cred,NFSPROC_T * p)6707 nfsrv_layoutget(struct nfsrv_descript *nd, vnode_t vp, struct nfsexstuff *exp,
6708 int layouttype, int *iomode, uint64_t *offset, uint64_t *len,
6709 uint64_t minlen, nfsv4stateid_t *stateidp, int maxcnt, int *retonclose,
6710 int *layoutlenp, char *layp, struct ucred *cred, NFSPROC_T *p)
6711 {
6712 struct nfslayouthash *lhyp;
6713 struct nfslayout *lyp;
6714 char *devid;
6715 fhandle_t fh, *dsfhp;
6716 int error, mirrorcnt;
6717
6718 if (nfsrv_devidcnt == 0)
6719 return (NFSERR_UNKNLAYOUTTYPE);
6720
6721 if (*offset != 0)
6722 printf("nfsrv_layoutget: off=%ju len=%ju\n", (uintmax_t)*offset,
6723 (uintmax_t)*len);
6724 error = nfsvno_getfh(vp, &fh, p);
6725 NFSD_DEBUG(4, "layoutget getfh=%d\n", error);
6726 if (error != 0)
6727 return (error);
6728
6729 /*
6730 * For now, all layouts are for entire files.
6731 * Only issue Read/Write layouts if requested for a non-readonly fs.
6732 */
6733 if (NFSVNO_EXRDONLY(exp)) {
6734 if (*iomode == NFSLAYOUTIOMODE_RW)
6735 return (NFSERR_LAYOUTTRYLATER);
6736 *iomode = NFSLAYOUTIOMODE_READ;
6737 }
6738 if (*iomode != NFSLAYOUTIOMODE_RW)
6739 *iomode = NFSLAYOUTIOMODE_READ;
6740
6741 /*
6742 * Check to see if a write layout can be issued for this file.
6743 * This is used during mirror recovery to avoid RW layouts being
6744 * issued for a file while it is being copied to the recovered
6745 * mirror.
6746 */
6747 if (*iomode == NFSLAYOUTIOMODE_RW && nfsrv_dontlayout(&fh) != 0)
6748 return (NFSERR_LAYOUTTRYLATER);
6749
6750 *retonclose = 0;
6751 *offset = 0;
6752 *len = UINT64_MAX;
6753
6754 /* First, see if a layout already exists and return if found. */
6755 lhyp = NFSLAYOUTHASH(&fh);
6756 NFSLOCKLAYOUT(lhyp);
6757 error = nfsrv_findlayout(&nd->nd_clientid, &fh, layouttype, p, &lyp);
6758 NFSD_DEBUG(4, "layoutget findlay=%d\n", error);
6759 /*
6760 * Not sure if the seqid must be the same, so I won't check it.
6761 */
6762 if (error == 0 && (stateidp->other[0] != lyp->lay_stateid.other[0] ||
6763 stateidp->other[1] != lyp->lay_stateid.other[1] ||
6764 stateidp->other[2] != lyp->lay_stateid.other[2])) {
6765 if ((lyp->lay_flags & NFSLAY_CALLB) == 0) {
6766 NFSUNLOCKLAYOUT(lhyp);
6767 NFSD_DEBUG(1, "ret bad stateid\n");
6768 return (NFSERR_BADSTATEID);
6769 }
6770 /*
6771 * I believe we get here because there is a race between
6772 * the client processing the CBLAYOUTRECALL and the layout
6773 * being deleted here on the server.
6774 * The client has now done a LayoutGet with a non-layout
6775 * stateid, as it would when there is no layout.
6776 * As such, free this layout and set error == NFSERR_BADSTATEID
6777 * so the code below will create a new layout structure as
6778 * would happen if no layout was found.
6779 * "lyp" will be set before being used below, but set it NULL
6780 * as a safety belt.
6781 */
6782 nfsrv_freelayout(&lhyp->list, lyp);
6783 lyp = NULL;
6784 error = NFSERR_BADSTATEID;
6785 }
6786 if (error == 0) {
6787 if (lyp->lay_layoutlen > maxcnt) {
6788 NFSUNLOCKLAYOUT(lhyp);
6789 NFSD_DEBUG(1, "ret layout too small\n");
6790 return (NFSERR_TOOSMALL);
6791 }
6792 if (*iomode == NFSLAYOUTIOMODE_RW) {
6793 if ((lyp->lay_flags & NFSLAY_NOSPC) != 0) {
6794 NFSUNLOCKLAYOUT(lhyp);
6795 NFSD_DEBUG(1, "ret layout nospace\n");
6796 return (NFSERR_NOSPC);
6797 }
6798 lyp->lay_flags |= NFSLAY_RW;
6799 } else
6800 lyp->lay_flags |= NFSLAY_READ;
6801 NFSBCOPY(lyp->lay_xdr, layp, lyp->lay_layoutlen);
6802 *layoutlenp = lyp->lay_layoutlen;
6803 if (++lyp->lay_stateid.seqid == 0)
6804 lyp->lay_stateid.seqid = 1;
6805 stateidp->seqid = lyp->lay_stateid.seqid;
6806 NFSUNLOCKLAYOUT(lhyp);
6807 NFSD_DEBUG(4, "ret fnd layout\n");
6808 return (0);
6809 }
6810 NFSUNLOCKLAYOUT(lhyp);
6811
6812 /* Find the device id and file handle. */
6813 dsfhp = malloc(sizeof(fhandle_t) * NFSDEV_MAXMIRRORS, M_TEMP, M_WAITOK);
6814 devid = malloc(NFSX_V4DEVICEID * NFSDEV_MAXMIRRORS, M_TEMP, M_WAITOK);
6815 error = nfsrv_dsgetdevandfh(vp, p, &mirrorcnt, dsfhp, devid);
6816 NFSD_DEBUG(4, "layoutget devandfh=%d\n", error);
6817 if (error == 0) {
6818 if (layouttype == NFSLAYOUT_NFSV4_1_FILES) {
6819 if (NFSX_V4FILELAYOUT > maxcnt)
6820 error = NFSERR_TOOSMALL;
6821 else
6822 lyp = nfsrv_filelayout(nd, *iomode, &fh, dsfhp,
6823 devid, vp->v_mount->mnt_stat.f_fsid);
6824 } else {
6825 if (NFSX_V4FLEXLAYOUT(mirrorcnt) > maxcnt)
6826 error = NFSERR_TOOSMALL;
6827 else
6828 lyp = nfsrv_flexlayout(nd, *iomode, mirrorcnt,
6829 &fh, dsfhp, devid,
6830 vp->v_mount->mnt_stat.f_fsid);
6831 }
6832 }
6833 free(dsfhp, M_TEMP);
6834 free(devid, M_TEMP);
6835 if (error != 0)
6836 return (error);
6837
6838 /*
6839 * Now, add this layout to the list.
6840 */
6841 error = nfsrv_addlayout(nd, &lyp, stateidp, layp, layoutlenp, p);
6842 NFSD_DEBUG(4, "layoutget addl=%d\n", error);
6843 /*
6844 * The lyp will be set to NULL by nfsrv_addlayout() if it
6845 * linked the new structure into the lists.
6846 */
6847 free(lyp, M_NFSDSTATE);
6848 return (error);
6849 }
6850
6851 /*
6852 * Generate a File Layout.
6853 */
6854 static struct nfslayout *
nfsrv_filelayout(struct nfsrv_descript * nd,int iomode,fhandle_t * fhp,fhandle_t * dsfhp,char * devid,fsid_t fs)6855 nfsrv_filelayout(struct nfsrv_descript *nd, int iomode, fhandle_t *fhp,
6856 fhandle_t *dsfhp, char *devid, fsid_t fs)
6857 {
6858 uint32_t *tl;
6859 struct nfslayout *lyp;
6860 uint64_t pattern_offset;
6861
6862 lyp = malloc(sizeof(struct nfslayout) + NFSX_V4FILELAYOUT, M_NFSDSTATE,
6863 M_WAITOK | M_ZERO);
6864 lyp->lay_type = NFSLAYOUT_NFSV4_1_FILES;
6865 if (iomode == NFSLAYOUTIOMODE_RW)
6866 lyp->lay_flags = NFSLAY_RW;
6867 else
6868 lyp->lay_flags = NFSLAY_READ;
6869 NFSBCOPY(fhp, &lyp->lay_fh, sizeof(*fhp));
6870 lyp->lay_clientid.qval = nd->nd_clientid.qval;
6871 lyp->lay_fsid = fs;
6872 NFSBCOPY(devid, lyp->lay_deviceid, NFSX_V4DEVICEID);
6873
6874 /* Fill in the xdr for the files layout. */
6875 tl = (uint32_t *)lyp->lay_xdr;
6876 NFSBCOPY(devid, tl, NFSX_V4DEVICEID); /* Device ID. */
6877 tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
6878
6879 /* Set the stripe size to the maximum I/O size. */
6880 *tl++ = txdr_unsigned(nfs_srvmaxio & NFSFLAYUTIL_STRIPE_MASK);
6881 *tl++ = 0; /* 1st stripe index. */
6882 pattern_offset = 0;
6883 txdr_hyper(pattern_offset, tl); tl += 2; /* Pattern offset. */
6884 *tl++ = txdr_unsigned(1); /* 1 file handle. */
6885 *tl++ = txdr_unsigned(NFSX_V4PNFSFH);
6886 NFSBCOPY(dsfhp, tl, sizeof(*dsfhp));
6887 lyp->lay_layoutlen = NFSX_V4FILELAYOUT;
6888 return (lyp);
6889 }
6890
6891 #define FLEX_OWNERID "999"
6892 #define FLEX_UID0 "0"
6893 /*
6894 * Generate a Flex File Layout.
6895 * The FLEX_OWNERID can be any string of 3 decimal digits. Although this
6896 * string goes on the wire, it isn't supposed to be used by the client,
6897 * since this server uses tight coupling.
6898 * Although not recommended by the spec., if vfs.nfsd.flexlinuxhack=1 use
6899 * a string of "0". This works around the Linux Flex File Layout driver bug
6900 * which uses the synthetic uid/gid strings for the "tightly coupled" case.
6901 */
6902 static struct nfslayout *
nfsrv_flexlayout(struct nfsrv_descript * nd,int iomode,int mirrorcnt,fhandle_t * fhp,fhandle_t * dsfhp,char * devid,fsid_t fs)6903 nfsrv_flexlayout(struct nfsrv_descript *nd, int iomode, int mirrorcnt,
6904 fhandle_t *fhp, fhandle_t *dsfhp, char *devid, fsid_t fs)
6905 {
6906 uint32_t *tl;
6907 struct nfslayout *lyp;
6908 uint64_t lenval;
6909 int i;
6910
6911 lyp = malloc(sizeof(struct nfslayout) + NFSX_V4FLEXLAYOUT(mirrorcnt),
6912 M_NFSDSTATE, M_WAITOK | M_ZERO);
6913 lyp->lay_type = NFSLAYOUT_FLEXFILE;
6914 if (iomode == NFSLAYOUTIOMODE_RW)
6915 lyp->lay_flags = NFSLAY_RW;
6916 else
6917 lyp->lay_flags = NFSLAY_READ;
6918 NFSBCOPY(fhp, &lyp->lay_fh, sizeof(*fhp));
6919 lyp->lay_clientid.qval = nd->nd_clientid.qval;
6920 lyp->lay_fsid = fs;
6921 lyp->lay_mirrorcnt = mirrorcnt;
6922 NFSBCOPY(devid, lyp->lay_deviceid, NFSX_V4DEVICEID);
6923
6924 /* Fill in the xdr for the files layout. */
6925 tl = (uint32_t *)lyp->lay_xdr;
6926 lenval = 0;
6927 txdr_hyper(lenval, tl); tl += 2; /* Stripe unit. */
6928 *tl++ = txdr_unsigned(mirrorcnt); /* # of mirrors. */
6929 for (i = 0; i < mirrorcnt; i++) {
6930 *tl++ = txdr_unsigned(1); /* One stripe. */
6931 NFSBCOPY(devid, tl, NFSX_V4DEVICEID); /* Device ID. */
6932 tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
6933 devid += NFSX_V4DEVICEID;
6934 *tl++ = txdr_unsigned(1); /* Efficiency. */
6935 *tl++ = 0; /* Proxy Stateid. */
6936 *tl++ = 0x55555555;
6937 *tl++ = 0x55555555;
6938 *tl++ = 0x55555555;
6939 *tl++ = txdr_unsigned(1); /* 1 file handle. */
6940 *tl++ = txdr_unsigned(NFSX_V4PNFSFH);
6941 NFSBCOPY(dsfhp, tl, sizeof(*dsfhp));
6942 tl += (NFSM_RNDUP(NFSX_V4PNFSFH) / NFSX_UNSIGNED);
6943 dsfhp++;
6944 if (nfsrv_flexlinuxhack != 0) {
6945 *tl++ = txdr_unsigned(strlen(FLEX_UID0));
6946 *tl = 0; /* 0 pad string. */
6947 NFSBCOPY(FLEX_UID0, tl++, strlen(FLEX_UID0));
6948 *tl++ = txdr_unsigned(strlen(FLEX_UID0));
6949 *tl = 0; /* 0 pad string. */
6950 NFSBCOPY(FLEX_UID0, tl++, strlen(FLEX_UID0));
6951 } else {
6952 *tl++ = txdr_unsigned(strlen(FLEX_OWNERID));
6953 NFSBCOPY(FLEX_OWNERID, tl++, NFSX_UNSIGNED);
6954 *tl++ = txdr_unsigned(strlen(FLEX_OWNERID));
6955 NFSBCOPY(FLEX_OWNERID, tl++, NFSX_UNSIGNED);
6956 }
6957 }
6958 *tl++ = txdr_unsigned(0); /* ff_flags. */
6959 *tl = txdr_unsigned(60); /* Status interval hint. */
6960 lyp->lay_layoutlen = NFSX_V4FLEXLAYOUT(mirrorcnt);
6961 return (lyp);
6962 }
6963
6964 /*
6965 * Parse and process Flex File errors returned via LayoutReturn.
6966 */
6967 static void
nfsrv_flexlayouterr(struct nfsrv_descript * nd,uint32_t * layp,int maxcnt,NFSPROC_T * p)6968 nfsrv_flexlayouterr(struct nfsrv_descript *nd, uint32_t *layp, int maxcnt,
6969 NFSPROC_T *p)
6970 {
6971 uint32_t *tl;
6972 int cnt, errcnt, i, j, opnum, stat;
6973 char devid[NFSX_V4DEVICEID];
6974
6975 tl = layp;
6976 maxcnt -= NFSX_UNSIGNED;
6977 if (maxcnt > 0)
6978 cnt = fxdr_unsigned(int, *tl++);
6979 else
6980 cnt = 0;
6981 NFSD_DEBUG(4, "flexlayouterr cnt=%d\n", cnt);
6982 for (i = 0; i < cnt; i++) {
6983 maxcnt -= NFSX_STATEID + 2 * NFSX_HYPER +
6984 NFSX_UNSIGNED;
6985 if (maxcnt <= 0)
6986 break;
6987 /* Skip offset, length and stateid for now. */
6988 tl += (4 + NFSX_STATEID / NFSX_UNSIGNED);
6989 errcnt = fxdr_unsigned(int, *tl++);
6990 NFSD_DEBUG(4, "flexlayouterr errcnt=%d\n", errcnt);
6991 for (j = 0; j < errcnt; j++) {
6992 maxcnt -= NFSX_V4DEVICEID + 2 * NFSX_UNSIGNED;
6993 if (maxcnt < 0)
6994 break;
6995 NFSBCOPY(tl, devid, NFSX_V4DEVICEID);
6996 tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED);
6997 stat = fxdr_unsigned(int, *tl++);
6998 opnum = fxdr_unsigned(int, *tl++);
6999 NFSD_DEBUG(4, "flexlayouterr op=%d stat=%d\n", opnum,
7000 stat);
7001 /*
7002 * Except for NFSERR_ACCES, NFSERR_STALE and
7003 * NFSERR_NOSPC errors, disable the mirror.
7004 */
7005 if (stat != NFSERR_ACCES && stat != NFSERR_STALE &&
7006 stat != NFSERR_NOSPC)
7007 nfsrv_delds(devid, p);
7008
7009 /* For NFSERR_NOSPC, mark all devids and layouts. */
7010 if (stat == NFSERR_NOSPC)
7011 nfsrv_marknospc(devid, true);
7012 }
7013 }
7014 }
7015
7016 /*
7017 * This function removes all flex file layouts which has a mirror with
7018 * a device id that matches the argument.
7019 * Called when the DS represented by the device id has failed.
7020 */
7021 void
nfsrv_flexmirrordel(char * devid,NFSPROC_T * p)7022 nfsrv_flexmirrordel(char *devid, NFSPROC_T *p)
7023 {
7024 uint32_t *tl;
7025 struct nfslayout *lyp, *nlyp;
7026 struct nfslayouthash *lhyp;
7027 struct nfslayouthead loclyp;
7028 int i, j;
7029
7030 NFSD_DEBUG(4, "flexmirrordel\n");
7031 /* Move all layouts found onto a local list. */
7032 TAILQ_INIT(&loclyp);
7033 for (i = 0; i < nfsrv_layouthashsize; i++) {
7034 lhyp = &nfslayouthash[i];
7035 NFSLOCKLAYOUT(lhyp);
7036 TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) {
7037 if (lyp->lay_type == NFSLAYOUT_FLEXFILE &&
7038 lyp->lay_mirrorcnt > 1) {
7039 NFSD_DEBUG(4, "possible match\n");
7040 tl = lyp->lay_xdr;
7041 tl += 3;
7042 for (j = 0; j < lyp->lay_mirrorcnt; j++) {
7043 tl++;
7044 if (NFSBCMP(devid, tl, NFSX_V4DEVICEID)
7045 == 0) {
7046 /* Found one. */
7047 NFSD_DEBUG(4, "fnd one\n");
7048 TAILQ_REMOVE(&lhyp->list, lyp,
7049 lay_list);
7050 TAILQ_INSERT_HEAD(&loclyp, lyp,
7051 lay_list);
7052 break;
7053 }
7054 tl += (NFSX_V4DEVICEID / NFSX_UNSIGNED +
7055 NFSM_RNDUP(NFSX_V4PNFSFH) /
7056 NFSX_UNSIGNED + 11 * NFSX_UNSIGNED);
7057 }
7058 }
7059 }
7060 NFSUNLOCKLAYOUT(lhyp);
7061 }
7062
7063 /* Now, try to do a Layout recall for each one found. */
7064 TAILQ_FOREACH_SAFE(lyp, &loclyp, lay_list, nlyp) {
7065 NFSD_DEBUG(4, "do layout recall\n");
7066 /*
7067 * The layout stateid.seqid needs to be incremented
7068 * before doing a LAYOUT_RECALL callback.
7069 */
7070 if (++lyp->lay_stateid.seqid == 0)
7071 lyp->lay_stateid.seqid = 1;
7072 nfsrv_recalllayout(lyp->lay_clientid, &lyp->lay_stateid,
7073 &lyp->lay_fh, lyp, 1, lyp->lay_type, p);
7074 nfsrv_freelayout(&loclyp, lyp);
7075 }
7076 }
7077
7078 /*
7079 * Do a recall callback to the client for this layout.
7080 */
7081 static int
nfsrv_recalllayout(nfsquad_t clid,nfsv4stateid_t * stateidp,fhandle_t * fhp,struct nfslayout * lyp,int changed,int laytype,NFSPROC_T * p)7082 nfsrv_recalllayout(nfsquad_t clid, nfsv4stateid_t *stateidp, fhandle_t *fhp,
7083 struct nfslayout *lyp, int changed, int laytype, NFSPROC_T *p)
7084 {
7085 struct nfsclient *clp;
7086 int error;
7087
7088 NFSD_DEBUG(4, "nfsrv_recalllayout\n");
7089 error = nfsrv_getclient(clid, 0, &clp, NULL, (nfsquad_t)((u_quad_t)0),
7090 0, NULL, p);
7091 NFSD_DEBUG(4, "aft nfsrv_getclient=%d\n", error);
7092 if (error != 0) {
7093 printf("nfsrv_recalllayout: getclient err=%d\n", error);
7094 return (error);
7095 }
7096 if ((clp->lc_flags & LCL_NFSV41) != 0) {
7097 error = nfsrv_docallback(clp, NFSV4OP_CBLAYOUTRECALL,
7098 stateidp, changed, fhp, NULL, NULL, laytype, p);
7099 /* If lyp != NULL, handle an error return here. */
7100 if (error != 0 && lyp != NULL) {
7101 NFSDRECALLLOCK();
7102 /*
7103 * Mark it returned, since no layout recall
7104 * has been done.
7105 * All errors seem to be non-recoverable, although
7106 * NFSERR_NOMATCHLAYOUT is a normal event.
7107 */
7108 if ((lyp->lay_flags & NFSLAY_RECALL) != 0) {
7109 lyp->lay_flags |= NFSLAY_RETURNED;
7110 wakeup(lyp);
7111 }
7112 NFSDRECALLUNLOCK();
7113 if (error != NFSERR_NOMATCHLAYOUT)
7114 printf("nfsrv_recalllayout: err=%d\n", error);
7115 }
7116 } else
7117 printf("nfsrv_recalllayout: clp not NFSv4.1\n");
7118 return (error);
7119 }
7120
7121 /*
7122 * Find a layout to recall when we exceed our high water mark.
7123 */
7124 void
nfsrv_recalloldlayout(NFSPROC_T * p)7125 nfsrv_recalloldlayout(NFSPROC_T *p)
7126 {
7127 struct nfslayouthash *lhyp;
7128 struct nfslayout *lyp;
7129 nfsquad_t clientid;
7130 nfsv4stateid_t stateid;
7131 fhandle_t fh;
7132 int error, laytype = 0, ret;
7133
7134 lhyp = &nfslayouthash[arc4random() % nfsrv_layouthashsize];
7135 NFSLOCKLAYOUT(lhyp);
7136 TAILQ_FOREACH_REVERSE(lyp, &lhyp->list, nfslayouthead, lay_list) {
7137 if ((lyp->lay_flags & NFSLAY_CALLB) == 0) {
7138 lyp->lay_flags |= NFSLAY_CALLB;
7139 /*
7140 * The layout stateid.seqid needs to be incremented
7141 * before doing a LAYOUT_RECALL callback.
7142 */
7143 if (++lyp->lay_stateid.seqid == 0)
7144 lyp->lay_stateid.seqid = 1;
7145 clientid = lyp->lay_clientid;
7146 stateid = lyp->lay_stateid;
7147 NFSBCOPY(&lyp->lay_fh, &fh, sizeof(fh));
7148 laytype = lyp->lay_type;
7149 break;
7150 }
7151 }
7152 NFSUNLOCKLAYOUT(lhyp);
7153 if (lyp != NULL) {
7154 error = nfsrv_recalllayout(clientid, &stateid, &fh, NULL, 0,
7155 laytype, p);
7156 if (error != 0 && error != NFSERR_NOMATCHLAYOUT)
7157 NFSD_DEBUG(4, "recallold=%d\n", error);
7158 if (error != 0) {
7159 NFSLOCKLAYOUT(lhyp);
7160 /*
7161 * Since the hash list was unlocked, we need to
7162 * find it again.
7163 */
7164 ret = nfsrv_findlayout(&clientid, &fh, laytype, p,
7165 &lyp);
7166 if (ret == 0 &&
7167 (lyp->lay_flags & NFSLAY_CALLB) != 0 &&
7168 lyp->lay_stateid.other[0] == stateid.other[0] &&
7169 lyp->lay_stateid.other[1] == stateid.other[1] &&
7170 lyp->lay_stateid.other[2] == stateid.other[2]) {
7171 /*
7172 * The client no longer knows this layout, so
7173 * it can be free'd now.
7174 */
7175 if (error == NFSERR_NOMATCHLAYOUT)
7176 nfsrv_freelayout(&lhyp->list, lyp);
7177 else {
7178 /*
7179 * Leave it to be tried later by
7180 * clearing NFSLAY_CALLB and moving
7181 * it to the head of the list, so it
7182 * won't be tried again for a while.
7183 */
7184 lyp->lay_flags &= ~NFSLAY_CALLB;
7185 TAILQ_REMOVE(&lhyp->list, lyp,
7186 lay_list);
7187 TAILQ_INSERT_HEAD(&lhyp->list, lyp,
7188 lay_list);
7189 }
7190 }
7191 NFSUNLOCKLAYOUT(lhyp);
7192 }
7193 }
7194 }
7195
7196 /*
7197 * Try and return layout(s).
7198 */
7199 int
nfsrv_layoutreturn(struct nfsrv_descript * nd,vnode_t vp,int layouttype,int iomode,uint64_t offset,uint64_t len,int reclaim,int kind,nfsv4stateid_t * stateidp,int maxcnt,uint32_t * layp,int * fndp,struct ucred * cred,NFSPROC_T * p)7200 nfsrv_layoutreturn(struct nfsrv_descript *nd, vnode_t vp,
7201 int layouttype, int iomode, uint64_t offset, uint64_t len, int reclaim,
7202 int kind, nfsv4stateid_t *stateidp, int maxcnt, uint32_t *layp, int *fndp,
7203 struct ucred *cred, NFSPROC_T *p)
7204 {
7205 struct nfsvattr na;
7206 struct nfslayouthash *lhyp;
7207 struct nfslayout *lyp;
7208 fhandle_t fh;
7209 int error = 0;
7210
7211 *fndp = 0;
7212 if (kind == NFSV4LAYOUTRET_FILE) {
7213 error = nfsvno_getfh(vp, &fh, p);
7214 if (error == 0) {
7215 error = nfsrv_updatemdsattr(vp, &na, p);
7216 if (error != 0)
7217 printf("nfsrv_layoutreturn: updatemdsattr"
7218 " failed=%d\n", error);
7219 }
7220 if (error == 0) {
7221 if (reclaim == newnfs_true) {
7222 error = nfsrv_checkgrace(NULL, NULL,
7223 NFSLCK_RECLAIM);
7224 if (error != NFSERR_NOGRACE)
7225 error = 0;
7226 return (error);
7227 }
7228 lhyp = NFSLAYOUTHASH(&fh);
7229 NFSDRECALLLOCK();
7230 NFSLOCKLAYOUT(lhyp);
7231 error = nfsrv_findlayout(&nd->nd_clientid, &fh,
7232 layouttype, p, &lyp);
7233 NFSD_DEBUG(4, "layoutret findlay=%d\n", error);
7234 if (error == 0 &&
7235 stateidp->other[0] == lyp->lay_stateid.other[0] &&
7236 stateidp->other[1] == lyp->lay_stateid.other[1] &&
7237 stateidp->other[2] == lyp->lay_stateid.other[2]) {
7238 NFSD_DEBUG(4, "nfsrv_layoutreturn: stateid %d"
7239 " %x %x %x laystateid %d %x %x %x"
7240 " off=%ju len=%ju flgs=0x%x\n",
7241 stateidp->seqid, stateidp->other[0],
7242 stateidp->other[1], stateidp->other[2],
7243 lyp->lay_stateid.seqid,
7244 lyp->lay_stateid.other[0],
7245 lyp->lay_stateid.other[1],
7246 lyp->lay_stateid.other[2],
7247 (uintmax_t)offset, (uintmax_t)len,
7248 lyp->lay_flags);
7249 if (++lyp->lay_stateid.seqid == 0)
7250 lyp->lay_stateid.seqid = 1;
7251 stateidp->seqid = lyp->lay_stateid.seqid;
7252 if (offset == 0 && len == UINT64_MAX) {
7253 if ((iomode & NFSLAYOUTIOMODE_READ) !=
7254 0)
7255 lyp->lay_flags &= ~NFSLAY_READ;
7256 if ((iomode & NFSLAYOUTIOMODE_RW) != 0)
7257 lyp->lay_flags &= ~NFSLAY_RW;
7258 if ((lyp->lay_flags & (NFSLAY_READ |
7259 NFSLAY_RW)) == 0)
7260 nfsrv_freelayout(&lhyp->list,
7261 lyp);
7262 else
7263 *fndp = 1;
7264 } else
7265 *fndp = 1;
7266 }
7267 NFSUNLOCKLAYOUT(lhyp);
7268 /* Search the nfsrv_recalllist for a match. */
7269 TAILQ_FOREACH(lyp, &nfsrv_recalllisthead, lay_list) {
7270 if (NFSBCMP(&lyp->lay_fh, &fh,
7271 sizeof(fh)) == 0 &&
7272 lyp->lay_clientid.qval ==
7273 nd->nd_clientid.qval &&
7274 stateidp->other[0] ==
7275 lyp->lay_stateid.other[0] &&
7276 stateidp->other[1] ==
7277 lyp->lay_stateid.other[1] &&
7278 stateidp->other[2] ==
7279 lyp->lay_stateid.other[2]) {
7280 lyp->lay_flags |= NFSLAY_RETURNED;
7281 wakeup(lyp);
7282 error = 0;
7283 }
7284 }
7285 NFSDRECALLUNLOCK();
7286 }
7287 if (layouttype == NFSLAYOUT_FLEXFILE && layp != NULL)
7288 nfsrv_flexlayouterr(nd, layp, maxcnt, p);
7289 } else if (kind == NFSV4LAYOUTRET_FSID)
7290 nfsrv_freelayouts(&nd->nd_clientid,
7291 &vp->v_mount->mnt_stat.f_fsid, layouttype, iomode);
7292 else if (kind == NFSV4LAYOUTRET_ALL)
7293 nfsrv_freelayouts(&nd->nd_clientid, NULL, layouttype, iomode);
7294 else
7295 error = NFSERR_INVAL;
7296 if (error == -1)
7297 error = 0;
7298 return (error);
7299 }
7300
7301 /*
7302 * Look for an existing layout.
7303 */
7304 static int
nfsrv_findlayout(nfsquad_t * clientidp,fhandle_t * fhp,int laytype,NFSPROC_T * p,struct nfslayout ** lypp)7305 nfsrv_findlayout(nfsquad_t *clientidp, fhandle_t *fhp, int laytype,
7306 NFSPROC_T *p, struct nfslayout **lypp)
7307 {
7308 struct nfslayouthash *lhyp;
7309 struct nfslayout *lyp;
7310 int ret;
7311
7312 *lypp = NULL;
7313 ret = 0;
7314 lhyp = NFSLAYOUTHASH(fhp);
7315 TAILQ_FOREACH(lyp, &lhyp->list, lay_list) {
7316 if (NFSBCMP(&lyp->lay_fh, fhp, sizeof(*fhp)) == 0 &&
7317 lyp->lay_clientid.qval == clientidp->qval &&
7318 lyp->lay_type == laytype)
7319 break;
7320 }
7321 if (lyp != NULL)
7322 *lypp = lyp;
7323 else
7324 ret = -1;
7325 return (ret);
7326 }
7327
7328 /*
7329 * Add the new layout, as required.
7330 */
7331 static int
nfsrv_addlayout(struct nfsrv_descript * nd,struct nfslayout ** lypp,nfsv4stateid_t * stateidp,char * layp,int * layoutlenp,NFSPROC_T * p)7332 nfsrv_addlayout(struct nfsrv_descript *nd, struct nfslayout **lypp,
7333 nfsv4stateid_t *stateidp, char *layp, int *layoutlenp, NFSPROC_T *p)
7334 {
7335 struct nfsclient *clp;
7336 struct nfslayouthash *lhyp;
7337 struct nfslayout *lyp, *nlyp;
7338 fhandle_t *fhp;
7339 int error;
7340
7341 KASSERT((nd->nd_flag & ND_IMPLIEDCLID) != 0,
7342 ("nfsrv_layoutget: no nd_clientid\n"));
7343 lyp = *lypp;
7344 fhp = &lyp->lay_fh;
7345 NFSLOCKSTATE();
7346 error = nfsrv_getclient((nfsquad_t)((u_quad_t)0), CLOPS_RENEW, &clp,
7347 NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p);
7348 if (error != 0) {
7349 NFSUNLOCKSTATE();
7350 return (error);
7351 }
7352 lyp->lay_stateid.seqid = stateidp->seqid = 1;
7353 lyp->lay_stateid.other[0] = stateidp->other[0] =
7354 clp->lc_clientid.lval[0];
7355 lyp->lay_stateid.other[1] = stateidp->other[1] =
7356 clp->lc_clientid.lval[1];
7357 lyp->lay_stateid.other[2] = stateidp->other[2] =
7358 nfsrv_nextstateindex(clp);
7359 NFSUNLOCKSTATE();
7360
7361 lhyp = NFSLAYOUTHASH(fhp);
7362 NFSLOCKLAYOUT(lhyp);
7363 TAILQ_FOREACH(nlyp, &lhyp->list, lay_list) {
7364 if (NFSBCMP(&nlyp->lay_fh, fhp, sizeof(*fhp)) == 0 &&
7365 nlyp->lay_clientid.qval == nd->nd_clientid.qval)
7366 break;
7367 }
7368 if (nlyp != NULL) {
7369 /* A layout already exists, so use it. */
7370 nlyp->lay_flags |= (lyp->lay_flags & (NFSLAY_READ | NFSLAY_RW));
7371 NFSBCOPY(nlyp->lay_xdr, layp, nlyp->lay_layoutlen);
7372 *layoutlenp = nlyp->lay_layoutlen;
7373 if (++nlyp->lay_stateid.seqid == 0)
7374 nlyp->lay_stateid.seqid = 1;
7375 stateidp->seqid = nlyp->lay_stateid.seqid;
7376 stateidp->other[0] = nlyp->lay_stateid.other[0];
7377 stateidp->other[1] = nlyp->lay_stateid.other[1];
7378 stateidp->other[2] = nlyp->lay_stateid.other[2];
7379 NFSUNLOCKLAYOUT(lhyp);
7380 return (0);
7381 }
7382
7383 /* Insert the new layout in the lists. */
7384 *lypp = NULL;
7385 atomic_add_int(&nfsrv_layoutcnt, 1);
7386 NFSD_VNET(nfsstatsv1_p)->srvlayouts++;
7387 NFSBCOPY(lyp->lay_xdr, layp, lyp->lay_layoutlen);
7388 *layoutlenp = lyp->lay_layoutlen;
7389 TAILQ_INSERT_HEAD(&lhyp->list, lyp, lay_list);
7390 NFSUNLOCKLAYOUT(lhyp);
7391 return (0);
7392 }
7393
7394 /*
7395 * Get the devinfo for a deviceid.
7396 */
7397 int
nfsrv_getdevinfo(char * devid,int layouttype,uint32_t * maxcnt,uint32_t * notify,int * devaddrlen,char ** devaddr)7398 nfsrv_getdevinfo(char *devid, int layouttype, uint32_t *maxcnt,
7399 uint32_t *notify, int *devaddrlen, char **devaddr)
7400 {
7401 struct nfsdevice *ds;
7402
7403 if ((layouttype != NFSLAYOUT_NFSV4_1_FILES && layouttype !=
7404 NFSLAYOUT_FLEXFILE) ||
7405 (nfsrv_maxpnfsmirror > 1 && layouttype == NFSLAYOUT_NFSV4_1_FILES))
7406 return (NFSERR_UNKNLAYOUTTYPE);
7407
7408 /*
7409 * Now, search for the device id. Note that the structures won't go
7410 * away, but the order changes in the list. As such, the lock only
7411 * needs to be held during the search through the list.
7412 */
7413 NFSDDSLOCK();
7414 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
7415 if (NFSBCMP(devid, ds->nfsdev_deviceid, NFSX_V4DEVICEID) == 0 &&
7416 ds->nfsdev_nmp != NULL)
7417 break;
7418 }
7419 NFSDDSUNLOCK();
7420 if (ds == NULL)
7421 return (NFSERR_NOENT);
7422
7423 /* If the correct nfsdev_XXXXaddrlen is > 0, we have the device info. */
7424 *devaddrlen = 0;
7425 if (layouttype == NFSLAYOUT_NFSV4_1_FILES) {
7426 *devaddrlen = ds->nfsdev_fileaddrlen;
7427 *devaddr = ds->nfsdev_fileaddr;
7428 } else if (layouttype == NFSLAYOUT_FLEXFILE) {
7429 *devaddrlen = ds->nfsdev_flexaddrlen;
7430 *devaddr = ds->nfsdev_flexaddr;
7431 }
7432 if (*devaddrlen == 0)
7433 return (NFSERR_UNKNLAYOUTTYPE);
7434
7435 /*
7436 * The XDR overhead is 3 unsigned values: layout_type,
7437 * length_of_address and notify bitmap.
7438 * If the notify array is changed to not all zeros, the
7439 * count of unsigned values must be increased.
7440 */
7441 if (*maxcnt > 0 && *maxcnt < NFSM_RNDUP(*devaddrlen) +
7442 3 * NFSX_UNSIGNED) {
7443 *maxcnt = NFSM_RNDUP(*devaddrlen) + 3 * NFSX_UNSIGNED;
7444 return (NFSERR_TOOSMALL);
7445 }
7446 return (0);
7447 }
7448
7449 /*
7450 * Free a list of layout state structures.
7451 */
7452 static void
nfsrv_freelayoutlist(nfsquad_t clientid)7453 nfsrv_freelayoutlist(nfsquad_t clientid)
7454 {
7455 struct nfslayouthash *lhyp;
7456 struct nfslayout *lyp, *nlyp;
7457 int i;
7458
7459 for (i = 0; i < nfsrv_layouthashsize; i++) {
7460 lhyp = &nfslayouthash[i];
7461 NFSLOCKLAYOUT(lhyp);
7462 TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) {
7463 if (lyp->lay_clientid.qval == clientid.qval)
7464 nfsrv_freelayout(&lhyp->list, lyp);
7465 }
7466 NFSUNLOCKLAYOUT(lhyp);
7467 }
7468 }
7469
7470 /*
7471 * Free up a layout.
7472 */
7473 static void
nfsrv_freelayout(struct nfslayouthead * lhp,struct nfslayout * lyp)7474 nfsrv_freelayout(struct nfslayouthead *lhp, struct nfslayout *lyp)
7475 {
7476
7477 NFSD_DEBUG(4, "Freelayout=%p\n", lyp);
7478 atomic_add_int(&nfsrv_layoutcnt, -1);
7479 NFSD_VNET(nfsstatsv1_p)->srvlayouts--;
7480 TAILQ_REMOVE(lhp, lyp, lay_list);
7481 free(lyp, M_NFSDSTATE);
7482 }
7483
7484 /*
7485 * Free up a device id.
7486 */
7487 void
nfsrv_freeonedevid(struct nfsdevice * ds)7488 nfsrv_freeonedevid(struct nfsdevice *ds)
7489 {
7490 int i;
7491
7492 atomic_add_int(&nfsrv_devidcnt, -1);
7493 vrele(ds->nfsdev_dvp);
7494 for (i = 0; i < nfsrv_dsdirsize; i++)
7495 if (ds->nfsdev_dsdir[i] != NULL)
7496 vrele(ds->nfsdev_dsdir[i]);
7497 free(ds->nfsdev_fileaddr, M_NFSDSTATE);
7498 free(ds->nfsdev_flexaddr, M_NFSDSTATE);
7499 free(ds->nfsdev_host, M_NFSDSTATE);
7500 free(ds, M_NFSDSTATE);
7501 }
7502
7503 /*
7504 * Free up a device id and its mirrors.
7505 */
7506 static void
nfsrv_freedevid(struct nfsdevice * ds)7507 nfsrv_freedevid(struct nfsdevice *ds)
7508 {
7509
7510 TAILQ_REMOVE(&nfsrv_devidhead, ds, nfsdev_list);
7511 nfsrv_freeonedevid(ds);
7512 }
7513
7514 /*
7515 * Free all layouts and device ids.
7516 * Done when the nfsd threads are shut down since there may be a new
7517 * modified device id list created when the nfsd is restarted.
7518 */
7519 void
nfsrv_freealllayoutsanddevids(void)7520 nfsrv_freealllayoutsanddevids(void)
7521 {
7522 struct nfsdontlist *mrp, *nmrp;
7523 struct nfslayout *lyp, *nlyp;
7524
7525 /* Get rid of the deviceid structures. */
7526 nfsrv_freealldevids();
7527 TAILQ_INIT(&nfsrv_devidhead);
7528 nfsrv_devidcnt = 0;
7529
7530 /* Get rid of all layouts. */
7531 nfsrv_freealllayouts();
7532
7533 /* Get rid of any nfsdontlist entries. */
7534 LIST_FOREACH_SAFE(mrp, &nfsrv_dontlisthead, nfsmr_list, nmrp)
7535 free(mrp, M_NFSDSTATE);
7536 LIST_INIT(&nfsrv_dontlisthead);
7537 nfsrv_dontlistlen = 0;
7538
7539 /* Free layouts in the recall list. */
7540 TAILQ_FOREACH_SAFE(lyp, &nfsrv_recalllisthead, lay_list, nlyp)
7541 nfsrv_freelayout(&nfsrv_recalllisthead, lyp);
7542 TAILQ_INIT(&nfsrv_recalllisthead);
7543 }
7544
7545 /*
7546 * Free layouts that match the arguments.
7547 */
7548 static void
nfsrv_freelayouts(nfsquad_t * clid,fsid_t * fs,int laytype,int iomode)7549 nfsrv_freelayouts(nfsquad_t *clid, fsid_t *fs, int laytype, int iomode)
7550 {
7551 struct nfslayouthash *lhyp;
7552 struct nfslayout *lyp, *nlyp;
7553 int i;
7554
7555 for (i = 0; i < nfsrv_layouthashsize; i++) {
7556 lhyp = &nfslayouthash[i];
7557 NFSLOCKLAYOUT(lhyp);
7558 TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) {
7559 if (clid->qval != lyp->lay_clientid.qval)
7560 continue;
7561 if (fs != NULL && fsidcmp(fs, &lyp->lay_fsid) != 0)
7562 continue;
7563 if (laytype != lyp->lay_type)
7564 continue;
7565 if ((iomode & NFSLAYOUTIOMODE_READ) != 0)
7566 lyp->lay_flags &= ~NFSLAY_READ;
7567 if ((iomode & NFSLAYOUTIOMODE_RW) != 0)
7568 lyp->lay_flags &= ~NFSLAY_RW;
7569 if ((lyp->lay_flags & (NFSLAY_READ | NFSLAY_RW)) == 0)
7570 nfsrv_freelayout(&lhyp->list, lyp);
7571 }
7572 NFSUNLOCKLAYOUT(lhyp);
7573 }
7574 }
7575
7576 /*
7577 * Free all layouts for the argument file.
7578 */
7579 void
nfsrv_freefilelayouts(fhandle_t * fhp)7580 nfsrv_freefilelayouts(fhandle_t *fhp)
7581 {
7582 struct nfslayouthash *lhyp;
7583 struct nfslayout *lyp, *nlyp;
7584
7585 lhyp = NFSLAYOUTHASH(fhp);
7586 NFSLOCKLAYOUT(lhyp);
7587 TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) {
7588 if (NFSBCMP(&lyp->lay_fh, fhp, sizeof(*fhp)) == 0)
7589 nfsrv_freelayout(&lhyp->list, lyp);
7590 }
7591 NFSUNLOCKLAYOUT(lhyp);
7592 }
7593
7594 /*
7595 * Free all layouts.
7596 */
7597 static void
nfsrv_freealllayouts(void)7598 nfsrv_freealllayouts(void)
7599 {
7600 struct nfslayouthash *lhyp;
7601 struct nfslayout *lyp, *nlyp;
7602 int i;
7603
7604 for (i = 0; i < nfsrv_layouthashsize; i++) {
7605 lhyp = &nfslayouthash[i];
7606 NFSLOCKLAYOUT(lhyp);
7607 TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp)
7608 nfsrv_freelayout(&lhyp->list, lyp);
7609 NFSUNLOCKLAYOUT(lhyp);
7610 }
7611 }
7612
7613 /*
7614 * Look up the mount path for the DS server.
7615 */
7616 static int
nfsrv_setdsserver(char * dspathp,char * mdspathp,NFSPROC_T * p,struct nfsdevice ** dsp)7617 nfsrv_setdsserver(char *dspathp, char *mdspathp, NFSPROC_T *p,
7618 struct nfsdevice **dsp)
7619 {
7620 struct nameidata nd;
7621 struct nfsdevice *ds;
7622 struct mount *mp;
7623 int error, i;
7624 char *dsdirpath;
7625 size_t dsdirsize;
7626
7627 NFSD_DEBUG(4, "setdssrv path=%s\n", dspathp);
7628 *dsp = NULL;
7629 if (jailed(p->td_ucred)) {
7630 printf("A pNFS nfsd cannot run in a jail\n");
7631 return (EPERM);
7632 }
7633 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE,
7634 dspathp);
7635 error = namei(&nd);
7636 NFSD_DEBUG(4, "lookup=%d\n", error);
7637 if (error != 0)
7638 return (error);
7639 if (nd.ni_vp->v_type != VDIR) {
7640 vput(nd.ni_vp);
7641 NFSD_DEBUG(4, "dspath not dir\n");
7642 return (ENOTDIR);
7643 }
7644 if (strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) {
7645 vput(nd.ni_vp);
7646 NFSD_DEBUG(4, "dspath not an NFS mount\n");
7647 return (ENXIO);
7648 }
7649
7650 /*
7651 * Allocate a DS server structure with the NFS mounted directory
7652 * vnode reference counted, so that a non-forced dismount will
7653 * fail with EBUSY.
7654 * This structure is always linked into the list, even if an error
7655 * is being returned. The caller will free the entire list upon
7656 * an error return.
7657 */
7658 *dsp = ds = malloc(sizeof(*ds) + nfsrv_dsdirsize * sizeof(vnode_t),
7659 M_NFSDSTATE, M_WAITOK | M_ZERO);
7660 ds->nfsdev_dvp = nd.ni_vp;
7661 ds->nfsdev_nmp = VFSTONFS(nd.ni_vp->v_mount);
7662 NFSVOPUNLOCK(nd.ni_vp);
7663
7664 dsdirsize = strlen(dspathp) + 16;
7665 dsdirpath = malloc(dsdirsize, M_TEMP, M_WAITOK);
7666 /* Now, create the DS directory structures. */
7667 for (i = 0; i < nfsrv_dsdirsize; i++) {
7668 snprintf(dsdirpath, dsdirsize, "%s/ds%d", dspathp, i);
7669 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
7670 UIO_SYSSPACE, dsdirpath);
7671 error = namei(&nd);
7672 NFSD_DEBUG(4, "dsdirpath=%s lookup=%d\n", dsdirpath, error);
7673 if (error != 0)
7674 break;
7675 if (nd.ni_vp->v_type != VDIR) {
7676 vput(nd.ni_vp);
7677 error = ENOTDIR;
7678 NFSD_DEBUG(4, "dsdirpath not a VDIR\n");
7679 break;
7680 }
7681 if (strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) {
7682 vput(nd.ni_vp);
7683 error = ENXIO;
7684 NFSD_DEBUG(4, "dsdirpath not an NFS mount\n");
7685 break;
7686 }
7687 ds->nfsdev_dsdir[i] = nd.ni_vp;
7688 NFSVOPUNLOCK(nd.ni_vp);
7689 }
7690 free(dsdirpath, M_TEMP);
7691
7692 if (strlen(mdspathp) > 0) {
7693 /*
7694 * This DS stores file for a specific MDS exported file
7695 * system.
7696 */
7697 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
7698 UIO_SYSSPACE, mdspathp);
7699 error = namei(&nd);
7700 NFSD_DEBUG(4, "mds lookup=%d\n", error);
7701 if (error != 0)
7702 goto out;
7703 if (nd.ni_vp->v_type != VDIR) {
7704 vput(nd.ni_vp);
7705 error = ENOTDIR;
7706 NFSD_DEBUG(4, "mdspath not dir\n");
7707 goto out;
7708 }
7709 mp = nd.ni_vp->v_mount;
7710 if ((mp->mnt_flag & MNT_EXPORTED) == 0) {
7711 vput(nd.ni_vp);
7712 error = ENXIO;
7713 NFSD_DEBUG(4, "mdspath not an exported fs\n");
7714 goto out;
7715 }
7716 ds->nfsdev_mdsfsid = mp->mnt_stat.f_fsid;
7717 ds->nfsdev_mdsisset = 1;
7718 vput(nd.ni_vp);
7719 }
7720
7721 out:
7722 TAILQ_INSERT_TAIL(&nfsrv_devidhead, ds, nfsdev_list);
7723 atomic_add_int(&nfsrv_devidcnt, 1);
7724 return (error);
7725 }
7726
7727 /*
7728 * Look up the mount path for the DS server and delete it.
7729 */
7730 int
nfsrv_deldsserver(int op,char * dspathp,NFSPROC_T * p)7731 nfsrv_deldsserver(int op, char *dspathp, NFSPROC_T *p)
7732 {
7733 struct mount *mp;
7734 struct nfsmount *nmp;
7735 struct nfsdevice *ds;
7736 int error;
7737
7738 NFSD_DEBUG(4, "deldssrv path=%s\n", dspathp);
7739 /*
7740 * Search for the path in the mount list. Avoid looking the path
7741 * up, since this mount point may be hung, with associated locked
7742 * vnodes, etc.
7743 * Set NFSMNTP_CANCELRPCS so that any forced dismount will be blocked
7744 * until this completes.
7745 * As noted in the man page, this should be done before any forced
7746 * dismount on the mount point, but at least the handshake on
7747 * NFSMNTP_CANCELRPCS should make it safe.
7748 */
7749 error = 0;
7750 ds = NULL;
7751 nmp = NULL;
7752 mtx_lock(&mountlist_mtx);
7753 TAILQ_FOREACH(mp, &mountlist, mnt_list) {
7754 if (strcmp(mp->mnt_stat.f_mntonname, dspathp) == 0 &&
7755 strcmp(mp->mnt_stat.f_fstypename, "nfs") == 0 &&
7756 mp->mnt_data != NULL) {
7757 nmp = VFSTONFS(mp);
7758 NFSLOCKMNT(nmp);
7759 if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM |
7760 NFSMNTP_CANCELRPCS)) == 0) {
7761 nmp->nm_privflag |= NFSMNTP_CANCELRPCS;
7762 NFSUNLOCKMNT(nmp);
7763 } else {
7764 NFSUNLOCKMNT(nmp);
7765 nmp = NULL;
7766 }
7767 break;
7768 }
7769 }
7770 mtx_unlock(&mountlist_mtx);
7771
7772 if (nmp != NULL) {
7773 ds = nfsrv_deldsnmp(op, nmp, p);
7774 NFSD_DEBUG(4, "deldsnmp=%p\n", ds);
7775 if (ds != NULL) {
7776 nfsrv_killrpcs(nmp);
7777 NFSD_DEBUG(4, "aft killrpcs\n");
7778 } else
7779 error = ENXIO;
7780 NFSLOCKMNT(nmp);
7781 nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS;
7782 wakeup(nmp);
7783 NFSUNLOCKMNT(nmp);
7784 } else
7785 error = EINVAL;
7786 return (error);
7787 }
7788
7789 /*
7790 * Search for and remove a DS entry which matches the "nmp" argument.
7791 * The nfsdevice structure pointer is returned so that the caller can
7792 * free it via nfsrv_freeonedevid().
7793 * For the forced case, do not try to do LayoutRecalls, since the server
7794 * must be shut down now anyhow.
7795 */
7796 struct nfsdevice *
nfsrv_deldsnmp(int op,struct nfsmount * nmp,NFSPROC_T * p)7797 nfsrv_deldsnmp(int op, struct nfsmount *nmp, NFSPROC_T *p)
7798 {
7799 struct nfsdevice *fndds;
7800
7801 NFSD_DEBUG(4, "deldsdvp\n");
7802 NFSDDSLOCK();
7803 if (op == PNFSDOP_FORCEDELDS)
7804 fndds = nfsv4_findmirror(nmp);
7805 else
7806 fndds = nfsrv_findmirroredds(nmp);
7807 if (fndds != NULL)
7808 nfsrv_deleteds(fndds);
7809 NFSDDSUNLOCK();
7810 if (fndds != NULL) {
7811 if (op != PNFSDOP_FORCEDELDS)
7812 nfsrv_flexmirrordel(fndds->nfsdev_deviceid, p);
7813 printf("pNFS server: mirror %s failed\n", fndds->nfsdev_host);
7814 }
7815 return (fndds);
7816 }
7817
7818 /*
7819 * Similar to nfsrv_deldsnmp(), except that the DS is indicated by deviceid.
7820 * This function also calls nfsrv_killrpcs() to unblock RPCs on the mount
7821 * point.
7822 * Also, returns an error instead of the nfsdevice found.
7823 */
7824 int
nfsrv_delds(char * devid,NFSPROC_T * p)7825 nfsrv_delds(char *devid, NFSPROC_T *p)
7826 {
7827 struct nfsdevice *ds, *fndds;
7828 struct nfsmount *nmp;
7829 int fndmirror;
7830
7831 NFSD_DEBUG(4, "delds\n");
7832 /*
7833 * Search the DS server list for a match with devid.
7834 * Remove the DS entry if found and there is a mirror.
7835 */
7836 fndds = NULL;
7837 nmp = NULL;
7838 fndmirror = 0;
7839 NFSDDSLOCK();
7840 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
7841 if (NFSBCMP(ds->nfsdev_deviceid, devid, NFSX_V4DEVICEID) == 0 &&
7842 ds->nfsdev_nmp != NULL) {
7843 NFSD_DEBUG(4, "fnd main ds\n");
7844 fndds = ds;
7845 break;
7846 }
7847 }
7848 if (fndds == NULL) {
7849 NFSDDSUNLOCK();
7850 return (ENXIO);
7851 }
7852 if (fndds->nfsdev_mdsisset == 0 && nfsrv_faildscnt > 0)
7853 fndmirror = 1;
7854 else if (fndds->nfsdev_mdsisset != 0) {
7855 /* For the fsid is set case, search for a mirror. */
7856 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
7857 if (ds != fndds && ds->nfsdev_nmp != NULL &&
7858 ds->nfsdev_mdsisset != 0 &&
7859 fsidcmp(&ds->nfsdev_mdsfsid,
7860 &fndds->nfsdev_mdsfsid) == 0) {
7861 fndmirror = 1;
7862 break;
7863 }
7864 }
7865 }
7866 if (fndmirror != 0) {
7867 nmp = fndds->nfsdev_nmp;
7868 NFSLOCKMNT(nmp);
7869 if ((nmp->nm_privflag & (NFSMNTP_FORCEDISM |
7870 NFSMNTP_CANCELRPCS)) == 0) {
7871 nmp->nm_privflag |= NFSMNTP_CANCELRPCS;
7872 NFSUNLOCKMNT(nmp);
7873 nfsrv_deleteds(fndds);
7874 } else {
7875 NFSUNLOCKMNT(nmp);
7876 nmp = NULL;
7877 }
7878 }
7879 NFSDDSUNLOCK();
7880 if (nmp != NULL) {
7881 nfsrv_flexmirrordel(fndds->nfsdev_deviceid, p);
7882 printf("pNFS server: mirror %s failed\n", fndds->nfsdev_host);
7883 nfsrv_killrpcs(nmp);
7884 NFSLOCKMNT(nmp);
7885 nmp->nm_privflag &= ~NFSMNTP_CANCELRPCS;
7886 wakeup(nmp);
7887 NFSUNLOCKMNT(nmp);
7888 return (0);
7889 }
7890 return (ENXIO);
7891 }
7892
7893 /*
7894 * Mark a DS as disabled by setting nfsdev_nmp = NULL.
7895 */
7896 static void
nfsrv_deleteds(struct nfsdevice * fndds)7897 nfsrv_deleteds(struct nfsdevice *fndds)
7898 {
7899
7900 NFSD_DEBUG(4, "deleteds: deleting a mirror\n");
7901 fndds->nfsdev_nmp = NULL;
7902 if (fndds->nfsdev_mdsisset == 0)
7903 nfsrv_faildscnt--;
7904 }
7905
7906 /*
7907 * Fill in the addr structures for the File and Flex File layouts.
7908 */
7909 static void
nfsrv_allocdevid(struct nfsdevice * ds,char * addr,char * dnshost)7910 nfsrv_allocdevid(struct nfsdevice *ds, char *addr, char *dnshost)
7911 {
7912 uint32_t *tl;
7913 char *netprot;
7914 int addrlen;
7915 static uint64_t new_devid = 0;
7916
7917 if (strchr(addr, ':') != NULL)
7918 netprot = "tcp6";
7919 else
7920 netprot = "tcp";
7921
7922 /* Fill in the device id. */
7923 NFSBCOPY(&nfsdev_time, ds->nfsdev_deviceid, sizeof(nfsdev_time));
7924 new_devid++;
7925 NFSBCOPY(&new_devid, &ds->nfsdev_deviceid[sizeof(nfsdev_time)],
7926 sizeof(new_devid));
7927
7928 /*
7929 * Fill in the file addr (actually the nfsv4_file_layout_ds_addr4
7930 * as defined in RFC5661) in XDR.
7931 */
7932 addrlen = NFSM_RNDUP(strlen(addr)) + NFSM_RNDUP(strlen(netprot)) +
7933 6 * NFSX_UNSIGNED;
7934 NFSD_DEBUG(4, "hn=%s addr=%s netprot=%s\n", dnshost, addr, netprot);
7935 ds->nfsdev_fileaddrlen = addrlen;
7936 tl = malloc(addrlen, M_NFSDSTATE, M_WAITOK | M_ZERO);
7937 ds->nfsdev_fileaddr = (char *)tl;
7938 *tl++ = txdr_unsigned(1); /* One stripe with index 0. */
7939 *tl++ = 0;
7940 *tl++ = txdr_unsigned(1); /* One multipath list */
7941 *tl++ = txdr_unsigned(1); /* with one entry in it. */
7942 /* The netaddr for this one entry. */
7943 *tl++ = txdr_unsigned(strlen(netprot));
7944 NFSBCOPY(netprot, tl, strlen(netprot));
7945 tl += (NFSM_RNDUP(strlen(netprot)) / NFSX_UNSIGNED);
7946 *tl++ = txdr_unsigned(strlen(addr));
7947 NFSBCOPY(addr, tl, strlen(addr));
7948
7949 /*
7950 * Fill in the flex file addr (actually the ff_device_addr4
7951 * as defined for Flexible File Layout) in XDR.
7952 */
7953 addrlen = NFSM_RNDUP(strlen(addr)) + NFSM_RNDUP(strlen(netprot)) +
7954 14 * NFSX_UNSIGNED;
7955 ds->nfsdev_flexaddrlen = addrlen;
7956 tl = malloc(addrlen, M_NFSDSTATE, M_WAITOK | M_ZERO);
7957 ds->nfsdev_flexaddr = (char *)tl;
7958 *tl++ = txdr_unsigned(1); /* One multipath entry. */
7959 /* The netaddr for this one entry. */
7960 *tl++ = txdr_unsigned(strlen(netprot));
7961 NFSBCOPY(netprot, tl, strlen(netprot));
7962 tl += (NFSM_RNDUP(strlen(netprot)) / NFSX_UNSIGNED);
7963 *tl++ = txdr_unsigned(strlen(addr));
7964 NFSBCOPY(addr, tl, strlen(addr));
7965 tl += (NFSM_RNDUP(strlen(addr)) / NFSX_UNSIGNED);
7966 *tl++ = txdr_unsigned(2); /* Two NFS Versions. */
7967 *tl++ = txdr_unsigned(NFS_VER4); /* NFSv4. */
7968 *tl++ = txdr_unsigned(NFSV42_MINORVERSION); /* Minor version 2. */
7969 *tl++ = txdr_unsigned(nfs_srvmaxio); /* DS max rsize. */
7970 *tl++ = txdr_unsigned(nfs_srvmaxio); /* DS max wsize. */
7971 *tl++ = newnfs_true; /* Tightly coupled. */
7972 *tl++ = txdr_unsigned(NFS_VER4); /* NFSv4. */
7973 *tl++ = txdr_unsigned(NFSV41_MINORVERSION); /* Minor version 1. */
7974 *tl++ = txdr_unsigned(nfs_srvmaxio); /* DS max rsize. */
7975 *tl++ = txdr_unsigned(nfs_srvmaxio); /* DS max wsize. */
7976 *tl = newnfs_true; /* Tightly coupled. */
7977
7978 ds->nfsdev_hostnamelen = strlen(dnshost);
7979 ds->nfsdev_host = malloc(ds->nfsdev_hostnamelen + 1, M_NFSDSTATE,
7980 M_WAITOK);
7981 NFSBCOPY(dnshost, ds->nfsdev_host, ds->nfsdev_hostnamelen + 1);
7982 }
7983
7984 /*
7985 * Create the device id list.
7986 * Return 0 if the nfsd threads are to run and ENXIO if the "-p" argument
7987 * is misconfigured.
7988 */
7989 int
nfsrv_createdevids(struct nfsd_nfsd_args * args,NFSPROC_T * p)7990 nfsrv_createdevids(struct nfsd_nfsd_args *args, NFSPROC_T *p)
7991 {
7992 struct nfsdevice *ds;
7993 char *addrp, *dnshostp, *dspathp, *mdspathp;
7994 int error, i;
7995
7996 addrp = args->addr;
7997 dnshostp = args->dnshost;
7998 dspathp = args->dspath;
7999 mdspathp = args->mdspath;
8000 nfsrv_maxpnfsmirror = args->mirrorcnt;
8001 if (addrp == NULL || dnshostp == NULL || dspathp == NULL ||
8002 mdspathp == NULL)
8003 return (0);
8004
8005 /*
8006 * Loop around for each nul-terminated string in args->addr,
8007 * args->dnshost, args->dnspath and args->mdspath.
8008 */
8009 while (addrp < (args->addr + args->addrlen) &&
8010 dnshostp < (args->dnshost + args->dnshostlen) &&
8011 dspathp < (args->dspath + args->dspathlen) &&
8012 mdspathp < (args->mdspath + args->mdspathlen)) {
8013 error = nfsrv_setdsserver(dspathp, mdspathp, p, &ds);
8014 if (error != 0) {
8015 /* Free all DS servers. */
8016 nfsrv_freealldevids();
8017 nfsrv_devidcnt = 0;
8018 return (ENXIO);
8019 }
8020 nfsrv_allocdevid(ds, addrp, dnshostp);
8021 addrp += (strlen(addrp) + 1);
8022 dnshostp += (strlen(dnshostp) + 1);
8023 dspathp += (strlen(dspathp) + 1);
8024 mdspathp += (strlen(mdspathp) + 1);
8025 }
8026 if (nfsrv_devidcnt < nfsrv_maxpnfsmirror) {
8027 /* Free all DS servers. */
8028 nfsrv_freealldevids();
8029 nfsrv_devidcnt = 0;
8030 nfsrv_maxpnfsmirror = 1;
8031 return (ENXIO);
8032 }
8033 /* We can fail at most one less DS than the mirror level. */
8034 nfsrv_faildscnt = nfsrv_maxpnfsmirror - 1;
8035
8036 /*
8037 * Allocate the nfslayout hash table now, since this is a pNFS server.
8038 * Make it 1% of the high water mark and at least 100.
8039 */
8040 if (nfslayouthash == NULL) {
8041 nfsrv_layouthashsize = nfsrv_layouthighwater / 100;
8042 if (nfsrv_layouthashsize < 100)
8043 nfsrv_layouthashsize = 100;
8044 nfslayouthash = mallocarray(nfsrv_layouthashsize,
8045 sizeof(struct nfslayouthash), M_NFSDSESSION, M_WAITOK |
8046 M_ZERO);
8047 for (i = 0; i < nfsrv_layouthashsize; i++) {
8048 mtx_init(&nfslayouthash[i].mtx, "nfslm", NULL, MTX_DEF);
8049 TAILQ_INIT(&nfslayouthash[i].list);
8050 }
8051 }
8052 return (0);
8053 }
8054
8055 /*
8056 * Free all device ids.
8057 */
8058 static void
nfsrv_freealldevids(void)8059 nfsrv_freealldevids(void)
8060 {
8061 struct nfsdevice *ds, *nds;
8062
8063 TAILQ_FOREACH_SAFE(ds, &nfsrv_devidhead, nfsdev_list, nds)
8064 nfsrv_freedevid(ds);
8065 }
8066
8067 /*
8068 * Check to see if there is a Read/Write Layout plus either:
8069 * - A Write Delegation
8070 * or
8071 * - An Open with Write_access.
8072 * Return 1 if this is the case and 0 otherwise.
8073 * This function is used by nfsrv_proxyds() to decide if doing a Proxy
8074 * Getattr RPC to the Data Server (DS) is necessary.
8075 */
8076 #define NFSCLIDVECSIZE 6
8077 int
nfsrv_checkdsattr(vnode_t vp,NFSPROC_T * p)8078 nfsrv_checkdsattr(vnode_t vp, NFSPROC_T *p)
8079 {
8080 fhandle_t fh, *tfhp;
8081 struct nfsstate *stp;
8082 struct nfslayout *lyp;
8083 struct nfslayouthash *lhyp;
8084 struct nfslockhashhead *hp;
8085 struct nfslockfile *lfp;
8086 nfsquad_t clid[NFSCLIDVECSIZE];
8087 int clidcnt, ret;
8088
8089 ret = nfsvno_getfh(vp, &fh, p);
8090 if (ret != 0)
8091 return (0);
8092
8093 /* First check for a Read/Write Layout. */
8094 clidcnt = 0;
8095 lhyp = NFSLAYOUTHASH(&fh);
8096 NFSLOCKLAYOUT(lhyp);
8097 TAILQ_FOREACH(lyp, &lhyp->list, lay_list) {
8098 if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0 &&
8099 ((lyp->lay_flags & NFSLAY_RW) != 0 ||
8100 ((lyp->lay_flags & NFSLAY_READ) != 0 &&
8101 nfsrv_pnfsatime != 0))) {
8102 if (clidcnt < NFSCLIDVECSIZE)
8103 clid[clidcnt].qval = lyp->lay_clientid.qval;
8104 clidcnt++;
8105 }
8106 }
8107 NFSUNLOCKLAYOUT(lhyp);
8108 if (clidcnt == 0) {
8109 /* None found, so return 0. */
8110 return (0);
8111 }
8112
8113 /* Get the nfslockfile for this fh. */
8114 NFSLOCKSTATE();
8115 hp = NFSLOCKHASH(&fh);
8116 LIST_FOREACH(lfp, hp, lf_hash) {
8117 tfhp = &lfp->lf_fh;
8118 if (NFSVNO_CMPFH(&fh, tfhp))
8119 break;
8120 }
8121 if (lfp == NULL) {
8122 /* None found, so return 0. */
8123 NFSUNLOCKSTATE();
8124 return (0);
8125 }
8126
8127 /* Now, look for a Write delegation for this clientid. */
8128 LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) {
8129 if ((stp->ls_flags & NFSLCK_DELEGWRITE) != 0 &&
8130 nfsrv_fndclid(clid, stp->ls_clp->lc_clientid, clidcnt) != 0)
8131 break;
8132 }
8133 if (stp != NULL) {
8134 /* Found one, so return 1. */
8135 NFSUNLOCKSTATE();
8136 return (1);
8137 }
8138
8139 /* No Write delegation, so look for an Open with Write_access. */
8140 LIST_FOREACH(stp, &lfp->lf_open, ls_file) {
8141 KASSERT((stp->ls_flags & NFSLCK_OPEN) != 0,
8142 ("nfsrv_checkdsattr: Non-open in Open list\n"));
8143 if ((stp->ls_flags & NFSLCK_WRITEACCESS) != 0 &&
8144 nfsrv_fndclid(clid, stp->ls_clp->lc_clientid, clidcnt) != 0)
8145 break;
8146 }
8147 NFSUNLOCKSTATE();
8148 if (stp != NULL)
8149 return (1);
8150 return (0);
8151 }
8152
8153 /*
8154 * Look for a matching clientid in the vector. Return 1 if one might match.
8155 */
8156 static int
nfsrv_fndclid(nfsquad_t * clidvec,nfsquad_t clid,int clidcnt)8157 nfsrv_fndclid(nfsquad_t *clidvec, nfsquad_t clid, int clidcnt)
8158 {
8159 int i;
8160
8161 /* If too many for the vector, return 1 since there might be a match. */
8162 if (clidcnt > NFSCLIDVECSIZE)
8163 return (1);
8164
8165 for (i = 0; i < clidcnt; i++)
8166 if (clidvec[i].qval == clid.qval)
8167 return (1);
8168 return (0);
8169 }
8170
8171 /*
8172 * Check the don't list for "vp" and see if issuing an rw layout is allowed.
8173 * Return 1 if issuing an rw layout isn't allowed, 0 otherwise.
8174 */
8175 static int
nfsrv_dontlayout(fhandle_t * fhp)8176 nfsrv_dontlayout(fhandle_t *fhp)
8177 {
8178 struct nfsdontlist *mrp;
8179 int ret;
8180
8181 if (nfsrv_dontlistlen == 0)
8182 return (0);
8183 ret = 0;
8184 NFSDDONTLISTLOCK();
8185 LIST_FOREACH(mrp, &nfsrv_dontlisthead, nfsmr_list) {
8186 if (NFSBCMP(fhp, &mrp->nfsmr_fh, sizeof(*fhp)) == 0 &&
8187 (mrp->nfsmr_flags & NFSMR_DONTLAYOUT) != 0) {
8188 ret = 1;
8189 break;
8190 }
8191 }
8192 NFSDDONTLISTUNLOCK();
8193 return (ret);
8194 }
8195
8196 #define PNFSDS_COPYSIZ 65536
8197 /*
8198 * Create a new file on a DS and copy the contents of an extant DS file to it.
8199 * This can be used for recovery of a DS file onto a recovered DS.
8200 * The steps are:
8201 * - When called, the MDS file's vnode is locked, blocking LayoutGet operations.
8202 * - Disable issuing of read/write layouts for the file via the nfsdontlist,
8203 * so that they will be disabled after the MDS file's vnode is unlocked.
8204 * - Set up the nfsrv_recalllist so that recall of read/write layouts can
8205 * be done.
8206 * - Unlock the MDS file's vnode, so that the client(s) can perform proxied
8207 * writes, LayoutCommits and LayoutReturns for the file when completing the
8208 * LayoutReturn requested by the LayoutRecall callback.
8209 * - Issue a LayoutRecall callback for all read/write layouts and wait for
8210 * them to be returned. (If the LayoutRecall callback replies
8211 * NFSERR_NOMATCHLAYOUT, they are gone and no LayoutReturn is needed.)
8212 * - Exclusively lock the MDS file's vnode. This ensures that no proxied
8213 * writes are in progress or can occur during the DS file copy.
8214 * It also blocks Setattr operations.
8215 * - Create the file on the recovered mirror.
8216 * - Copy the file from the operational DS.
8217 * - Copy any ACL from the MDS file to the new DS file.
8218 * - Set the modify time of the new DS file to that of the MDS file.
8219 * - Update the extended attribute for the MDS file.
8220 * - Enable issuing of rw layouts by deleting the nfsdontlist entry.
8221 * - The caller will unlock the MDS file's vnode allowing operations
8222 * to continue normally, since it is now on the mirror again.
8223 */
8224 int
nfsrv_copymr(vnode_t vp,vnode_t fvp,vnode_t dvp,struct nfsdevice * ds,struct pnfsdsfile * pf,struct pnfsdsfile * wpf,int mirrorcnt,struct ucred * cred,NFSPROC_T * p)8225 nfsrv_copymr(vnode_t vp, vnode_t fvp, vnode_t dvp, struct nfsdevice *ds,
8226 struct pnfsdsfile *pf, struct pnfsdsfile *wpf, int mirrorcnt,
8227 struct ucred *cred, NFSPROC_T *p)
8228 {
8229 struct nfsdontlist *mrp, *nmrp;
8230 struct nfslayouthash *lhyp;
8231 struct nfslayout *lyp, *nlyp;
8232 struct nfslayouthead thl;
8233 struct mount *mp, *tvmp;
8234 struct acl *aclp;
8235 struct vattr va;
8236 struct timespec mtime;
8237 fhandle_t fh;
8238 vnode_t tvp;
8239 off_t rdpos, wrpos;
8240 ssize_t aresid;
8241 char *dat;
8242 int didprintf, ret, retacl, xfer;
8243
8244 ASSERT_VOP_LOCKED(fvp, "nfsrv_copymr fvp");
8245 ASSERT_VOP_LOCKED(vp, "nfsrv_copymr vp");
8246 /*
8247 * Allocate a nfsdontlist entry and set the NFSMR_DONTLAYOUT flag
8248 * so that no more RW layouts will get issued.
8249 */
8250 ret = nfsvno_getfh(vp, &fh, p);
8251 if (ret != 0) {
8252 NFSD_DEBUG(4, "nfsrv_copymr: getfh=%d\n", ret);
8253 return (ret);
8254 }
8255 nmrp = malloc(sizeof(*nmrp), M_NFSDSTATE, M_WAITOK);
8256 nmrp->nfsmr_flags = NFSMR_DONTLAYOUT;
8257 NFSBCOPY(&fh, &nmrp->nfsmr_fh, sizeof(fh));
8258 NFSDDONTLISTLOCK();
8259 LIST_FOREACH(mrp, &nfsrv_dontlisthead, nfsmr_list) {
8260 if (NFSBCMP(&fh, &mrp->nfsmr_fh, sizeof(fh)) == 0)
8261 break;
8262 }
8263 if (mrp == NULL) {
8264 LIST_INSERT_HEAD(&nfsrv_dontlisthead, nmrp, nfsmr_list);
8265 mrp = nmrp;
8266 nmrp = NULL;
8267 nfsrv_dontlistlen++;
8268 NFSD_DEBUG(4, "nfsrv_copymr: in dontlist\n");
8269 } else {
8270 NFSDDONTLISTUNLOCK();
8271 free(nmrp, M_NFSDSTATE);
8272 NFSD_DEBUG(4, "nfsrv_copymr: dup dontlist\n");
8273 return (ENXIO);
8274 }
8275 NFSDDONTLISTUNLOCK();
8276
8277 /*
8278 * Search for all RW layouts for this file. Move them to the
8279 * recall list, so they can be recalled and their return noted.
8280 */
8281 lhyp = NFSLAYOUTHASH(&fh);
8282 NFSDRECALLLOCK();
8283 NFSLOCKLAYOUT(lhyp);
8284 TAILQ_FOREACH_SAFE(lyp, &lhyp->list, lay_list, nlyp) {
8285 if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0 &&
8286 (lyp->lay_flags & NFSLAY_RW) != 0) {
8287 TAILQ_REMOVE(&lhyp->list, lyp, lay_list);
8288 TAILQ_INSERT_HEAD(&nfsrv_recalllisthead, lyp, lay_list);
8289 lyp->lay_trycnt = 0;
8290 }
8291 }
8292 NFSUNLOCKLAYOUT(lhyp);
8293 NFSDRECALLUNLOCK();
8294
8295 ret = 0;
8296 mp = tvmp = NULL;
8297 didprintf = 0;
8298 TAILQ_INIT(&thl);
8299 /* Unlock the MDS vp, so that a LayoutReturn can be done on it. */
8300 NFSVOPUNLOCK(vp);
8301 /* Now, do a recall for all layouts not yet recalled. */
8302 tryagain:
8303 NFSDRECALLLOCK();
8304 TAILQ_FOREACH(lyp, &nfsrv_recalllisthead, lay_list) {
8305 if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0 &&
8306 (lyp->lay_flags & NFSLAY_RECALL) == 0) {
8307 lyp->lay_flags |= NFSLAY_RECALL;
8308 /*
8309 * The layout stateid.seqid needs to be incremented
8310 * before doing a LAYOUT_RECALL callback.
8311 */
8312 if (++lyp->lay_stateid.seqid == 0)
8313 lyp->lay_stateid.seqid = 1;
8314 NFSDRECALLUNLOCK();
8315 nfsrv_recalllayout(lyp->lay_clientid, &lyp->lay_stateid,
8316 &lyp->lay_fh, lyp, 0, lyp->lay_type, p);
8317 NFSD_DEBUG(4, "nfsrv_copymr: recalled layout\n");
8318 goto tryagain;
8319 }
8320 }
8321
8322 /* Now wait for them to be returned. */
8323 tryagain2:
8324 TAILQ_FOREACH(lyp, &nfsrv_recalllisthead, lay_list) {
8325 if (NFSBCMP(&lyp->lay_fh, &fh, sizeof(fh)) == 0) {
8326 if ((lyp->lay_flags & NFSLAY_RETURNED) != 0) {
8327 TAILQ_REMOVE(&nfsrv_recalllisthead, lyp,
8328 lay_list);
8329 TAILQ_INSERT_HEAD(&thl, lyp, lay_list);
8330 NFSD_DEBUG(4,
8331 "nfsrv_copymr: layout returned\n");
8332 } else {
8333 lyp->lay_trycnt++;
8334 ret = mtx_sleep(lyp, NFSDRECALLMUTEXPTR,
8335 PVFS | PCATCH, "nfsmrl", hz);
8336 NFSD_DEBUG(4, "nfsrv_copymr: aft sleep=%d\n",
8337 ret);
8338 if (ret == EINTR || ret == ERESTART)
8339 break;
8340 if ((lyp->lay_flags & NFSLAY_RETURNED) == 0) {
8341 /*
8342 * Give up after 60sec and return
8343 * ENXIO, failing the copymr.
8344 * This layout will remain on the
8345 * recalllist. It can only be cleared
8346 * by restarting the nfsd.
8347 * This seems the safe way to handle
8348 * it, since it cannot be safely copied
8349 * with an outstanding RW layout.
8350 */
8351 if (lyp->lay_trycnt >= 60) {
8352 ret = ENXIO;
8353 break;
8354 }
8355 if (didprintf == 0) {
8356 printf("nfsrv_copymr: layout "
8357 "not returned\n");
8358 didprintf = 1;
8359 }
8360 }
8361 }
8362 goto tryagain2;
8363 }
8364 }
8365 NFSDRECALLUNLOCK();
8366 /* We can now get rid of the layouts that have been returned. */
8367 TAILQ_FOREACH_SAFE(lyp, &thl, lay_list, nlyp)
8368 nfsrv_freelayout(&thl, lyp);
8369
8370 /*
8371 * Do the vn_start_write() calls here, before the MDS vnode is
8372 * locked and the tvp is created (locked) in the NFS file system
8373 * that dvp is in.
8374 * For tvmp, this probably isn't necessary, since it will be an
8375 * NFS mount and they are not suspendable at this time.
8376 */
8377 if (ret == 0)
8378 ret = vn_start_write(vp, &mp, V_WAIT | V_PCATCH);
8379 if (ret == 0) {
8380 tvmp = dvp->v_mount;
8381 ret = vn_start_write(NULL, &tvmp, V_WAIT | V_PCATCH);
8382 }
8383
8384 /*
8385 * LK_EXCLUSIVE lock the MDS vnode, so that any
8386 * proxied writes through the MDS will be blocked until we have
8387 * completed the copy and update of the extended attributes.
8388 * This will also ensure that any attributes and ACL will not be
8389 * changed until the copy is complete.
8390 */
8391 NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
8392 if (ret == 0 && VN_IS_DOOMED(vp)) {
8393 NFSD_DEBUG(4, "nfsrv_copymr: lk_exclusive doomed\n");
8394 ret = ESTALE;
8395 }
8396
8397 /* Create the data file on the recovered DS. */
8398 if (ret == 0)
8399 ret = nfsrv_createdsfile(vp, &fh, pf, dvp, ds, cred, p, &tvp);
8400
8401 /* Copy the DS file, if created successfully. */
8402 if (ret == 0) {
8403 /*
8404 * Get any NFSv4 ACL on the MDS file, so that it can be set
8405 * on the new DS file.
8406 */
8407 aclp = acl_alloc(M_WAITOK | M_ZERO);
8408 retacl = VOP_GETACL(vp, ACL_TYPE_NFS4, aclp, cred, p);
8409 if (retacl != 0 && retacl != ENOATTR)
8410 NFSD_DEBUG(1, "nfsrv_copymr: vop_getacl=%d\n", retacl);
8411 dat = malloc(PNFSDS_COPYSIZ, M_TEMP, M_WAITOK);
8412 /* Malloc a block of 0s used to check for holes. */
8413 if (nfsrv_zeropnfsdat == NULL)
8414 nfsrv_zeropnfsdat = malloc(PNFSDS_COPYSIZ, M_TEMP,
8415 M_WAITOK | M_ZERO);
8416 rdpos = wrpos = 0;
8417 ret = VOP_GETATTR(fvp, &va, cred);
8418 aresid = 0;
8419 while (ret == 0 && aresid == 0) {
8420 ret = vn_rdwr(UIO_READ, fvp, dat, PNFSDS_COPYSIZ,
8421 rdpos, UIO_SYSSPACE, IO_NODELOCKED, cred, NULL,
8422 &aresid, p);
8423 xfer = PNFSDS_COPYSIZ - aresid;
8424 if (ret == 0 && xfer > 0) {
8425 rdpos += xfer;
8426 /*
8427 * Skip the write for holes, except for the
8428 * last block.
8429 */
8430 if (xfer < PNFSDS_COPYSIZ || rdpos ==
8431 va.va_size || NFSBCMP(dat,
8432 nfsrv_zeropnfsdat, PNFSDS_COPYSIZ) != 0)
8433 ret = vn_rdwr(UIO_WRITE, tvp, dat, xfer,
8434 wrpos, UIO_SYSSPACE, IO_NODELOCKED,
8435 cred, NULL, NULL, p);
8436 if (ret == 0)
8437 wrpos += xfer;
8438 }
8439 }
8440
8441 /* If there is an ACL and the copy succeeded, set the ACL. */
8442 if (ret == 0 && retacl == 0) {
8443 ret = VOP_SETACL(tvp, ACL_TYPE_NFS4, aclp, cred, p);
8444 /*
8445 * Don't consider these as errors, since VOP_GETACL()
8446 * can return an ACL when they are not actually
8447 * supported. For example, for UFS, VOP_GETACL()
8448 * will return a trivial ACL based on the uid/gid/mode
8449 * when there is no ACL on the file.
8450 * This case should be recognized as a trivial ACL
8451 * by UFS's VOP_SETACL() and succeed, but...
8452 */
8453 if (ret == ENOATTR || ret == EOPNOTSUPP || ret == EPERM)
8454 ret = 0;
8455 }
8456
8457 if (ret == 0)
8458 ret = VOP_FSYNC(tvp, MNT_WAIT, p);
8459
8460 /* Set the DS data file's modify time that of the MDS file. */
8461 if (ret == 0)
8462 ret = VOP_GETATTR(vp, &va, cred);
8463 if (ret == 0) {
8464 mtime = va.va_mtime;
8465 VATTR_NULL(&va);
8466 va.va_mtime = mtime;
8467 ret = VOP_SETATTR(tvp, &va, cred);
8468 }
8469
8470 vput(tvp);
8471 acl_free(aclp);
8472 free(dat, M_TEMP);
8473 }
8474 if (tvmp != NULL)
8475 vn_finished_write(tvmp);
8476
8477 /* Update the extended attributes for the newly created DS file. */
8478 if (ret == 0)
8479 ret = vn_extattr_set(vp, IO_NODELOCKED,
8480 EXTATTR_NAMESPACE_SYSTEM, "pnfsd.dsfile",
8481 sizeof(*wpf) * mirrorcnt, (char *)wpf, p);
8482 if (mp != NULL)
8483 vn_finished_write(mp);
8484
8485 /* Get rid of the dontlist entry, so that Layouts can be issued. */
8486 NFSDDONTLISTLOCK();
8487 LIST_REMOVE(mrp, nfsmr_list);
8488 NFSDDONTLISTUNLOCK();
8489 free(mrp, M_NFSDSTATE);
8490 return (ret);
8491 }
8492
8493 /*
8494 * Create a data storage file on the recovered DS.
8495 */
8496 static int
nfsrv_createdsfile(vnode_t vp,fhandle_t * fhp,struct pnfsdsfile * pf,vnode_t dvp,struct nfsdevice * ds,struct ucred * cred,NFSPROC_T * p,vnode_t * tvpp)8497 nfsrv_createdsfile(vnode_t vp, fhandle_t *fhp, struct pnfsdsfile *pf,
8498 vnode_t dvp, struct nfsdevice *ds, struct ucred *cred, NFSPROC_T *p,
8499 vnode_t *tvpp)
8500 {
8501 struct vattr va, nva;
8502 int error;
8503
8504 /* Make data file name based on FH. */
8505 error = VOP_GETATTR(vp, &va, cred);
8506 if (error == 0) {
8507 /* Set the attributes for "vp" to Setattr the DS vp. */
8508 VATTR_NULL(&nva);
8509 nva.va_uid = va.va_uid;
8510 nva.va_gid = va.va_gid;
8511 nva.va_mode = va.va_mode;
8512 nva.va_size = 0;
8513 VATTR_NULL(&va);
8514 va.va_type = VREG;
8515 va.va_mode = nva.va_mode;
8516 NFSD_DEBUG(4, "nfsrv_dscreatefile: dvp=%p pf=%p\n", dvp, pf);
8517 error = nfsrv_dscreate(dvp, &va, &nva, fhp, pf, NULL,
8518 pf->dsf_filename, cred, p, tvpp);
8519 }
8520 return (error);
8521 }
8522
8523 /*
8524 * Look up the MDS file shared locked, and then get the extended attribute
8525 * to find the extant DS file to be copied to the new mirror.
8526 * If successful, *vpp is set to the MDS file's vp and *nvpp is
8527 * set to a DS data file for the MDS file, both exclusively locked.
8528 * The "buf" argument has the pnfsdsfile structure from the MDS file
8529 * in it and buflen is set to its length.
8530 */
8531 int
nfsrv_mdscopymr(char * mdspathp,char * dspathp,char * curdspathp,char * buf,int * buflenp,char * fname,NFSPROC_T * p,struct vnode ** vpp,struct vnode ** nvpp,struct pnfsdsfile ** pfp,struct nfsdevice ** dsp,struct nfsdevice ** fdsp)8532 nfsrv_mdscopymr(char *mdspathp, char *dspathp, char *curdspathp, char *buf,
8533 int *buflenp, char *fname, NFSPROC_T *p, struct vnode **vpp,
8534 struct vnode **nvpp, struct pnfsdsfile **pfp, struct nfsdevice **dsp,
8535 struct nfsdevice **fdsp)
8536 {
8537 struct nameidata nd;
8538 struct vnode *vp, *curvp;
8539 struct pnfsdsfile *pf;
8540 struct nfsmount *nmp, *curnmp;
8541 int dsdir, error, mirrorcnt, ippos;
8542
8543 vp = NULL;
8544 curvp = NULL;
8545 curnmp = NULL;
8546 *dsp = NULL;
8547 *fdsp = NULL;
8548 if (dspathp == NULL && curdspathp != NULL)
8549 return (EPERM);
8550
8551 /*
8552 * Look up the MDS file shared locked. The lock will be upgraded
8553 * to an exclusive lock after any rw layouts have been returned.
8554 */
8555 NFSD_DEBUG(4, "mdsopen path=%s\n", mdspathp);
8556 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF, UIO_SYSSPACE,
8557 mdspathp);
8558 error = namei(&nd);
8559 NFSD_DEBUG(4, "lookup=%d\n", error);
8560 if (error != 0)
8561 return (error);
8562 if (nd.ni_vp->v_type != VREG) {
8563 vput(nd.ni_vp);
8564 NFSD_DEBUG(4, "mdspath not reg\n");
8565 return (EISDIR);
8566 }
8567 vp = nd.ni_vp;
8568
8569 if (curdspathp != NULL) {
8570 /*
8571 * Look up the current DS path and find the nfsdev structure for
8572 * it.
8573 */
8574 NFSD_DEBUG(4, "curmdsdev path=%s\n", curdspathp);
8575 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
8576 UIO_SYSSPACE, curdspathp);
8577 error = namei(&nd);
8578 NFSD_DEBUG(4, "ds lookup=%d\n", error);
8579 if (error != 0) {
8580 vput(vp);
8581 return (error);
8582 }
8583 if (nd.ni_vp->v_type != VDIR) {
8584 vput(nd.ni_vp);
8585 vput(vp);
8586 NFSD_DEBUG(4, "curdspath not dir\n");
8587 return (ENOTDIR);
8588 }
8589 if (strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) {
8590 vput(nd.ni_vp);
8591 vput(vp);
8592 NFSD_DEBUG(4, "curdspath not an NFS mount\n");
8593 return (ENXIO);
8594 }
8595 curnmp = VFSTONFS(nd.ni_vp->v_mount);
8596
8597 /* Search the nfsdev list for a match. */
8598 NFSDDSLOCK();
8599 *fdsp = nfsv4_findmirror(curnmp);
8600 NFSDDSUNLOCK();
8601 if (*fdsp == NULL)
8602 curnmp = NULL;
8603 if (curnmp == NULL) {
8604 vput(nd.ni_vp);
8605 vput(vp);
8606 NFSD_DEBUG(4, "mdscopymr: no current ds\n");
8607 return (ENXIO);
8608 }
8609 curvp = nd.ni_vp;
8610 }
8611
8612 if (dspathp != NULL) {
8613 /* Look up the nfsdev path and find the nfsdev structure. */
8614 NFSD_DEBUG(4, "mdsdev path=%s\n", dspathp);
8615 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
8616 UIO_SYSSPACE, dspathp);
8617 error = namei(&nd);
8618 NFSD_DEBUG(4, "ds lookup=%d\n", error);
8619 if (error != 0) {
8620 vput(vp);
8621 if (curvp != NULL)
8622 vput(curvp);
8623 return (error);
8624 }
8625 if (nd.ni_vp->v_type != VDIR || nd.ni_vp == curvp) {
8626 vput(nd.ni_vp);
8627 vput(vp);
8628 if (curvp != NULL)
8629 vput(curvp);
8630 NFSD_DEBUG(4, "dspath not dir\n");
8631 if (nd.ni_vp == curvp)
8632 return (EPERM);
8633 return (ENOTDIR);
8634 }
8635 if (strcmp(nd.ni_vp->v_mount->mnt_vfc->vfc_name, "nfs") != 0) {
8636 vput(nd.ni_vp);
8637 vput(vp);
8638 if (curvp != NULL)
8639 vput(curvp);
8640 NFSD_DEBUG(4, "dspath not an NFS mount\n");
8641 return (ENXIO);
8642 }
8643 nmp = VFSTONFS(nd.ni_vp->v_mount);
8644
8645 /*
8646 * Search the nfsdevice list for a match. If curnmp == NULL,
8647 * this is a recovery and there must be a mirror.
8648 */
8649 NFSDDSLOCK();
8650 if (curnmp == NULL)
8651 *dsp = nfsrv_findmirroredds(nmp);
8652 else
8653 *dsp = nfsv4_findmirror(nmp);
8654 NFSDDSUNLOCK();
8655 if (*dsp == NULL) {
8656 vput(nd.ni_vp);
8657 vput(vp);
8658 if (curvp != NULL)
8659 vput(curvp);
8660 NFSD_DEBUG(4, "mdscopymr: no ds\n");
8661 return (ENXIO);
8662 }
8663 } else {
8664 nd.ni_vp = NULL;
8665 nmp = NULL;
8666 }
8667
8668 /*
8669 * Get a vp for an available DS data file using the extended
8670 * attribute on the MDS file.
8671 * If there is a valid entry for the new DS in the extended attribute
8672 * on the MDS file (as checked via the nmp argument),
8673 * nfsrv_dsgetsockmnt() returns EEXIST, so no copying will occur.
8674 */
8675 error = nfsrv_dsgetsockmnt(vp, 0, buf, buflenp, &mirrorcnt, p,
8676 NULL, NULL, NULL, fname, nvpp, &nmp, curnmp, &ippos, &dsdir);
8677 if (curvp != NULL)
8678 vput(curvp);
8679 if (nd.ni_vp == NULL) {
8680 if (error == 0 && nmp != NULL) {
8681 /* Search the nfsdev list for a match. */
8682 NFSDDSLOCK();
8683 *dsp = nfsrv_findmirroredds(nmp);
8684 NFSDDSUNLOCK();
8685 }
8686 if (error == 0 && (nmp == NULL || *dsp == NULL)) {
8687 if (nvpp != NULL && *nvpp != NULL) {
8688 vput(*nvpp);
8689 *nvpp = NULL;
8690 }
8691 error = ENXIO;
8692 }
8693 } else
8694 vput(nd.ni_vp);
8695
8696 /*
8697 * When dspathp != NULL and curdspathp == NULL, this is a recovery
8698 * and is only allowed if there is a 0.0.0.0 IP address entry.
8699 * When curdspathp != NULL, the ippos will be set to that entry.
8700 */
8701 if (error == 0 && dspathp != NULL && ippos == -1) {
8702 if (nvpp != NULL && *nvpp != NULL) {
8703 vput(*nvpp);
8704 *nvpp = NULL;
8705 }
8706 error = ENXIO;
8707 }
8708 if (error == 0) {
8709 *vpp = vp;
8710
8711 pf = (struct pnfsdsfile *)buf;
8712 if (ippos == -1) {
8713 /* If no zeroip pnfsdsfile, add one. */
8714 ippos = *buflenp / sizeof(*pf);
8715 *buflenp += sizeof(*pf);
8716 pf += ippos;
8717 pf->dsf_dir = dsdir;
8718 strlcpy(pf->dsf_filename, fname,
8719 sizeof(pf->dsf_filename));
8720 } else
8721 pf += ippos;
8722 *pfp = pf;
8723 } else
8724 vput(vp);
8725 return (error);
8726 }
8727
8728 /*
8729 * Search for a matching pnfsd mirror device structure, base on the nmp arg.
8730 * Return one if found, NULL otherwise.
8731 */
8732 static struct nfsdevice *
nfsrv_findmirroredds(struct nfsmount * nmp)8733 nfsrv_findmirroredds(struct nfsmount *nmp)
8734 {
8735 struct nfsdevice *ds, *fndds;
8736 int fndmirror;
8737
8738 mtx_assert(NFSDDSMUTEXPTR, MA_OWNED);
8739 /*
8740 * Search the DS server list for a match with nmp.
8741 * Remove the DS entry if found and there is a mirror.
8742 */
8743 fndds = NULL;
8744 fndmirror = 0;
8745 if (nfsrv_devidcnt == 0)
8746 return (fndds);
8747 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
8748 if (ds->nfsdev_nmp == nmp) {
8749 NFSD_DEBUG(4, "nfsrv_findmirroredds: fnd main ds\n");
8750 fndds = ds;
8751 break;
8752 }
8753 }
8754 if (fndds == NULL)
8755 return (fndds);
8756 if (fndds->nfsdev_mdsisset == 0 && nfsrv_faildscnt > 0)
8757 fndmirror = 1;
8758 else if (fndds->nfsdev_mdsisset != 0) {
8759 /* For the fsid is set case, search for a mirror. */
8760 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
8761 if (ds != fndds && ds->nfsdev_nmp != NULL &&
8762 ds->nfsdev_mdsisset != 0 &&
8763 fsidcmp(&ds->nfsdev_mdsfsid,
8764 &fndds->nfsdev_mdsfsid) == 0) {
8765 fndmirror = 1;
8766 break;
8767 }
8768 }
8769 }
8770 if (fndmirror == 0) {
8771 NFSD_DEBUG(4, "nfsrv_findmirroredds: no mirror for DS\n");
8772 return (NULL);
8773 }
8774 return (fndds);
8775 }
8776
8777 /*
8778 * Mark the appropriate devid and all associated layout as "out of space".
8779 */
8780 void
nfsrv_marknospc(char * devid,bool setit)8781 nfsrv_marknospc(char *devid, bool setit)
8782 {
8783 struct nfsdevice *ds;
8784 struct nfslayout *lyp;
8785 struct nfslayouthash *lhyp;
8786 int i;
8787
8788 NFSDDSLOCK();
8789 TAILQ_FOREACH(ds, &nfsrv_devidhead, nfsdev_list) {
8790 if (NFSBCMP(ds->nfsdev_deviceid, devid, NFSX_V4DEVICEID) == 0) {
8791 NFSD_DEBUG(1, "nfsrv_marknospc: devid %d\n", setit);
8792 ds->nfsdev_nospc = setit;
8793 }
8794 }
8795 NFSDDSUNLOCK();
8796
8797 for (i = 0; i < nfsrv_layouthashsize; i++) {
8798 lhyp = &nfslayouthash[i];
8799 NFSLOCKLAYOUT(lhyp);
8800 TAILQ_FOREACH(lyp, &lhyp->list, lay_list) {
8801 if (NFSBCMP(lyp->lay_deviceid, devid,
8802 NFSX_V4DEVICEID) == 0) {
8803 NFSD_DEBUG(1, "nfsrv_marknospc: layout %d\n",
8804 setit);
8805 if (setit)
8806 lyp->lay_flags |= NFSLAY_NOSPC;
8807 else
8808 lyp->lay_flags &= ~NFSLAY_NOSPC;
8809 }
8810 }
8811 NFSUNLOCKLAYOUT(lhyp);
8812 }
8813 }
8814
8815 /*
8816 * Check to see if SP4_MACH_CRED is in use and, if it is, check that the
8817 * correct machine credential is being used.
8818 */
8819 static int
nfsrv_checkmachcred(int op,struct nfsrv_descript * nd,struct nfsclient * clp)8820 nfsrv_checkmachcred(int op, struct nfsrv_descript *nd, struct nfsclient *clp)
8821 {
8822
8823 if ((clp->lc_flags & LCL_MACHCRED) == 0 ||
8824 !NFSISSET_OPBIT(&clp->lc_mustops, op))
8825 return (0);
8826 KASSERT((nd->nd_flag & ND_NFSV41) != 0,
8827 ("nfsrv_checkmachcred: MachCred for NFSv4.0"));
8828 if ((nd->nd_flag & (ND_GSSINTEGRITY | ND_GSSPRIVACY)) != 0 &&
8829 nd->nd_princlen == clp->lc_namelen &&
8830 !NFSBCMP(nd->nd_principal, clp->lc_name, nd->nd_princlen))
8831 return (0);
8832 return (NFSERR_AUTHERR | AUTH_TOOWEAK);
8833 }
8834
8835 /*
8836 * Issue a delegation and, optionally set rflagsp for why not.
8837 */
8838 static void
nfsrv_issuedelegation(struct vnode * vp,struct nfsclient * clp,struct nfsrv_descript * nd,int delegate,int writedeleg,int readonly,u_quad_t filerev,uint64_t rdonly,struct nfsstate ** new_delegp,struct nfsstate * new_stp,struct nfslockfile * lfp,uint32_t * rflagsp,nfsv4stateid_t * delegstateidp)8839 nfsrv_issuedelegation(struct vnode *vp, struct nfsclient *clp,
8840 struct nfsrv_descript *nd, int delegate, int writedeleg, int readonly,
8841 u_quad_t filerev, uint64_t rdonly, struct nfsstate **new_delegp,
8842 struct nfsstate *new_stp, struct nfslockfile *lfp, uint32_t *rflagsp,
8843 nfsv4stateid_t *delegstateidp)
8844 {
8845 struct nfsstate *up_deleg, *new_deleg;
8846
8847 new_deleg = *new_delegp;
8848 up_deleg = LIST_FIRST(&lfp->lf_deleg);
8849 if ((new_stp->ls_flags & NFSLCK_WANTNODELEG) != 0)
8850 *rflagsp |= NFSV4OPEN_WDNOTWANTED;
8851 else if (nfsrv_issuedelegs == 0)
8852 *rflagsp |= NFSV4OPEN_WDSUPPFTYPE;
8853 else if (NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt))
8854 *rflagsp |= NFSV4OPEN_WDRESOURCE;
8855 else if (delegate == 0 || !NFSVNO_DELEGOK(vp) ||
8856 (writedeleg == 0 && (readonly == 0 ||
8857 (new_stp->ls_flags & NFSLCK_WANTWDELEG) != 0)) ||
8858 (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) !=
8859 LCL_CALLBACKSON) {
8860 /* Is this a downgrade attempt? */
8861 if (up_deleg != NULL && up_deleg->ls_clp == clp &&
8862 (up_deleg->ls_flags & NFSLCK_DELEGWRITE) != 0 &&
8863 (new_stp->ls_flags & NFSLCK_WANTRDELEG) != 0)
8864 *rflagsp |= NFSV4OPEN_WDNOTSUPPDOWNGRADE;
8865 else
8866 *rflagsp |= NFSV4OPEN_WDCONTENTION;
8867 } else if (up_deleg != NULL &&
8868 (up_deleg->ls_flags & NFSLCK_DELEGREAD) != 0 &&
8869 (new_stp->ls_flags & NFSLCK_WANTWDELEG) != 0) {
8870 /* This is an atomic upgrade. */
8871 up_deleg->ls_stateid.seqid++;
8872 delegstateidp->seqid = up_deleg->ls_stateid.seqid;
8873 delegstateidp->other[0] = up_deleg->ls_stateid.other[0];
8874 delegstateidp->other[1] = up_deleg->ls_stateid.other[1];
8875 delegstateidp->other[2] = up_deleg->ls_stateid.other[2];
8876 up_deleg->ls_flags = (NFSLCK_DELEGWRITE |
8877 NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
8878 *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
8879 nfsrv_writedelegcnt++;
8880 } else {
8881 new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1;
8882 new_deleg->ls_stateid.other[0] = delegstateidp->other[0]
8883 = clp->lc_clientid.lval[0];
8884 new_deleg->ls_stateid.other[1] = delegstateidp->other[1]
8885 = clp->lc_clientid.lval[1];
8886 new_deleg->ls_stateid.other[2] = delegstateidp->other[2]
8887 = nfsrv_nextstateindex(clp);
8888 if (writedeleg && !rdonly &&
8889 (nfsrv_writedelegifpos || !readonly) &&
8890 (new_stp->ls_flags & (NFSLCK_WANTRDELEG |
8891 NFSLCK_WANTWDELEG)) != NFSLCK_WANTRDELEG) {
8892 new_deleg->ls_flags = (NFSLCK_DELEGWRITE |
8893 NFSLCK_READACCESS | NFSLCK_WRITEACCESS);
8894 *rflagsp |= NFSV4OPEN_WRITEDELEGATE;
8895 nfsrv_writedelegcnt++;
8896 } else {
8897 new_deleg->ls_flags = (NFSLCK_DELEGREAD |
8898 NFSLCK_READACCESS);
8899 *rflagsp |= NFSV4OPEN_READDELEGATE;
8900 }
8901 new_deleg->ls_uid = new_stp->ls_uid;
8902 new_deleg->ls_lfp = lfp;
8903 new_deleg->ls_clp = clp;
8904 new_deleg->ls_filerev = filerev;
8905 new_deleg->ls_compref = nd->nd_compref;
8906 new_deleg->ls_lastrecall = 0;
8907 LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file);
8908 LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_deleg->ls_stateid),
8909 new_deleg, ls_hash);
8910 LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list);
8911 *new_delegp = NULL;
8912 NFSD_VNET(nfsstatsv1_p)->srvdelegates++;
8913 nfsrv_openpluslock++;
8914 nfsrv_delegatecnt++;
8915 }
8916 }
8917