1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
28 * All Rights Reserved
29 */
30
31 /*
32 * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
33 * Copyright 2019 Nexenta Systems, Inc.
34 * Copyright 2019 Nexenta by DDN, Inc.
35 * Copyright 2021 Racktop Systems, Inc.
36 */
37
38 #include <sys/param.h>
39 #include <sys/types.h>
40 #include <sys/systm.h>
41 #include <sys/cred.h>
42 #include <sys/buf.h>
43 #include <sys/vfs.h>
44 #include <sys/vfs_opreg.h>
45 #include <sys/vnode.h>
46 #include <sys/uio.h>
47 #include <sys/errno.h>
48 #include <sys/sysmacros.h>
49 #include <sys/statvfs.h>
50 #include <sys/kmem.h>
51 #include <sys/dirent.h>
52 #include <sys/cmn_err.h>
53 #include <sys/debug.h>
54 #include <sys/systeminfo.h>
55 #include <sys/flock.h>
56 #include <sys/pathname.h>
57 #include <sys/nbmlock.h>
58 #include <sys/share.h>
59 #include <sys/atomic.h>
60 #include <sys/policy.h>
61 #include <sys/fem.h>
62 #include <sys/sdt.h>
63 #include <sys/ddi.h>
64 #include <sys/zone.h>
65
66 #include <fs/fs_reparse.h>
67
68 #include <rpc/types.h>
69 #include <rpc/auth.h>
70 #include <rpc/rpcsec_gss.h>
71 #include <rpc/svc.h>
72
73 #include <nfs/nfs.h>
74 #include <nfs/nfssys.h>
75 #include <nfs/export.h>
76 #include <nfs/nfs_cmd.h>
77 #include <nfs/lm.h>
78 #include <nfs/nfs4.h>
79 #include <nfs/nfs4_drc.h>
80
81 #include <sys/strsubr.h>
82 #include <sys/strsun.h>
83
84 #include <inet/common.h>
85 #include <inet/ip.h>
86 #include <inet/ip6.h>
87
88 #include <sys/tsol/label.h>
89 #include <sys/tsol/tndb.h>
90
91 #define RFS4_MAXLOCK_TRIES 4 /* Try to get the lock this many times */
92 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
93 #define RFS4_LOCK_DELAY 10 /* Milliseconds */
94 static clock_t rfs4_lock_delay = RFS4_LOCK_DELAY;
95 extern struct svc_ops rdma_svc_ops;
96 extern int nfs_loaned_buffers;
97 /* End of Tunables */
98
99 static int rdma_setup_read_data4(READ4args *, READ4res *);
100
101 /*
102 * Used to bump the stateid4.seqid value and show changes in the stateid
103 */
104 #define next_stateid(sp) (++(sp)->bits.chgseq)
105
106 /*
107 * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
108 * This is used to return NFS4ERR_TOOSMALL when clients specify
109 * maxcount that isn't large enough to hold the smallest possible
110 * XDR encoded dirent.
111 *
112 * sizeof cookie (8 bytes) +
113 * sizeof name_len (4 bytes) +
114 * sizeof smallest (padded) name (4 bytes) +
115 * sizeof bitmap4_len (12 bytes) + NOTE: we always encode len=2 bm4
116 * sizeof attrlist4_len (4 bytes) +
117 * sizeof next boolean (4 bytes)
118 *
119 * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
120 * the smallest possible entry4 (assumes no attrs requested).
121 * sizeof nfsstat4 (4 bytes) +
122 * sizeof verifier4 (8 bytes) +
123 * sizeof entry4list bool (4 bytes) +
124 * sizeof entry4 (36 bytes) +
125 * sizeof eof bool (4 bytes)
126 *
127 * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
128 * VOP_READDIR. Its value is the size of the maximum possible dirent
129 * for solaris. The DIRENT64_RECLEN macro returns the size of dirent
130 * required for a given name length. MAXNAMELEN is the maximum
131 * filename length allowed in Solaris. The first two DIRENT64_RECLEN()
132 * macros are to allow for . and .. entries -- just a minor tweak to try
133 * and guarantee that buffer we give to VOP_READDIR will be large enough
134 * to hold ., .., and the largest possible solaris dirent64.
135 */
136 #define RFS4_MINLEN_ENTRY4 36
137 #define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
138 #define RFS4_MINLEN_RDDIR_BUF \
139 (DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
140
141 /*
142 * It would be better to pad to 4 bytes since that's what XDR would do,
143 * but the dirents UFS gives us are already padded to 8, so just take
144 * what we're given. Dircount is only a hint anyway. Currently the
145 * solaris kernel is ASCII only, so there's no point in calling the
146 * UTF8 functions.
147 *
148 * dirent64: named padded to provide 8 byte struct alignment
149 * d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
150 *
151 * cookie: uint64_t + utf8namelen: uint_t + utf8name padded to 8 bytes
152 *
153 */
154 #define DIRENT64_TO_DIRCOUNT(dp) \
155 (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
156
157
158 static sysid_t lockt_sysid; /* dummy sysid for all LOCKT calls */
159
160 u_longlong_t nfs4_srv_caller_id;
161 uint_t nfs4_srv_vkey = 0;
162
163 void rfs4_init_compound_state(struct compound_state *);
164
165 static void nullfree(caddr_t);
166 static void rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
167 struct compound_state *);
168 static void rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
169 struct compound_state *);
170 static void rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
171 struct compound_state *);
172 static void rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
173 struct compound_state *);
174 static void rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
175 struct compound_state *);
176 static void rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
177 struct svc_req *, struct compound_state *);
178 static void rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
179 struct svc_req *, struct compound_state *);
180 static void rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
181 struct compound_state *);
182 static void rfs4_op_getattr_free(nfs_resop4 *);
183 static void rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
184 struct compound_state *);
185 static void rfs4_op_getfh_free(nfs_resop4 *);
186 static void rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
187 struct compound_state *);
188 static void rfs4_op_notsup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
189 struct compound_state *);
190 static void rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
191 struct compound_state *);
192 static void rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
193 struct compound_state *);
194 static void lock_denied_free(nfs_resop4 *);
195 static void rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
196 struct compound_state *);
197 static void rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
198 struct compound_state *);
199 static void rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
200 struct compound_state *);
201 static void rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
202 struct compound_state *);
203 static void rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
204 struct svc_req *req, struct compound_state *cs);
205 static void rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
206 struct compound_state *);
207 static void rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
208 struct compound_state *);
209 static void rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
210 struct svc_req *, struct compound_state *);
211 static void rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
212 struct svc_req *, struct compound_state *);
213 static void rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
214 struct compound_state *);
215 static void rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
216 struct compound_state *);
217 static void rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
218 struct compound_state *);
219 static void rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
220 struct compound_state *);
221 static void rfs4_op_read_free(nfs_resop4 *);
222 static void rfs4_op_readdir_free(nfs_resop4 *resop);
223 static void rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
224 struct compound_state *);
225 static void rfs4_op_readlink_free(nfs_resop4 *);
226 static void rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
227 struct svc_req *, struct compound_state *);
228 static void rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
229 struct compound_state *);
230 static void rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
231 struct compound_state *);
232 static void rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
233 struct compound_state *);
234 static void rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
235 struct compound_state *);
236 static void rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
237 struct compound_state *);
238 static void rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
239 struct compound_state *);
240 static void rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
241 struct compound_state *);
242 static void rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
243 struct compound_state *);
244 static void rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
245 struct svc_req *, struct compound_state *);
246 static void rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
247 struct svc_req *req, struct compound_state *);
248 static void rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
249 struct compound_state *);
250 static void rfs4_op_secinfo_free(nfs_resop4 *);
251
252 void rfs4x_op_exchange_id(nfs_argop4 *argop, nfs_resop4 *resop,
253 struct svc_req *req, struct compound_state *cs);
254 void rfs4x_exchange_id_free(nfs_resop4 *);
255
256 void rfs4x_op_create_session(nfs_argop4 *argop, nfs_resop4 *resop,
257 struct svc_req *req, struct compound_state *cs);
258
259 void rfs4x_op_destroy_session(nfs_argop4 *argop, nfs_resop4 *resop,
260 struct svc_req *req, compound_state_t *cs);
261
262 void rfs4x_op_sequence(nfs_argop4 *argop, nfs_resop4 *resop,
263 struct svc_req *req, struct compound_state *cs);
264
265 void rfs4x_op_reclaim_complete(nfs_argop4 *argop, nfs_resop4 *resop,
266 struct svc_req *req, compound_state_t *cs);
267
268 void rfs4x_op_destroy_clientid(nfs_argop4 *argop, nfs_resop4 *resop,
269 struct svc_req *req, compound_state_t *cs);
270
271 void rfs4x_op_bind_conn_to_session(nfs_argop4 *argop, nfs_resop4 *resop,
272 struct svc_req *req, compound_state_t *cs);
273
274 void rfs4x_op_secinfo_noname(nfs_argop4 *argop, nfs_resop4 *resop,
275 struct svc_req *req, compound_state_t *cs);
276
277 static nfsstat4 check_open_access(uint32_t, struct compound_state *,
278 struct svc_req *);
279 nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *);
280 void rfs4_ss_clid(nfs4_srv_t *, rfs4_client_t *);
281
282 /*
283 * translation table for attrs
284 */
285 struct nfs4_ntov_table {
286 union nfs4_attr_u *na;
287 uint8_t amap[NFS4_MAXNUM_ATTRS];
288 int attrcnt;
289 bool_t vfsstat;
290 };
291
292 static void nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
293 static void nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
294 struct nfs4_svgetit_arg *sargp);
295
296 static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
297 struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
298 struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
299
300 static void hanfsv4_failover(nfs4_srv_t *);
301
302 fem_t *deleg_rdops;
303 fem_t *deleg_wrops;
304
305 /*
306 * NFS4 op dispatch table
307 */
308
309 struct rfsv4disp {
310 void (*dis_proc)(); /* proc to call */
311 void (*dis_resfree)(); /* frees space allocated by proc */
312 int dis_flags; /* RPC_IDEMPOTENT, etc... */
313 };
314
315 static struct rfsv4disp rfsv4disptab[] = {
316 /*
317 * NFS VERSION 4
318 */
319
320 /* RFS_NULL = 0 */
321 {rfs4_op_illegal, nullfree, 0},
322
323 /* UNUSED = 1 */
324 {rfs4_op_illegal, nullfree, 0},
325
326 /* UNUSED = 2 */
327 {rfs4_op_illegal, nullfree, 0},
328
329 /* OP_ACCESS = 3 */
330 {rfs4_op_access, nullfree, RPC_IDEMPOTENT},
331
332 /* OP_CLOSE = 4 */
333 {rfs4_op_close, nullfree, 0},
334
335 /* OP_COMMIT = 5 */
336 {rfs4_op_commit, nullfree, RPC_IDEMPOTENT},
337
338 /* OP_CREATE = 6 */
339 {rfs4_op_create, nullfree, 0},
340
341 /* OP_DELEGPURGE = 7 */
342 {rfs4_op_delegpurge, nullfree, 0},
343
344 /* OP_DELEGRETURN = 8 */
345 {rfs4_op_delegreturn, nullfree, 0},
346
347 /* OP_GETATTR = 9 */
348 {rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT},
349
350 /* OP_GETFH = 10 */
351 {rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL},
352
353 /* OP_LINK = 11 */
354 {rfs4_op_link, nullfree, 0},
355
356 /* OP_LOCK = 12 */
357 {rfs4_op_lock, lock_denied_free, 0},
358
359 /* OP_LOCKT = 13 */
360 {rfs4_op_lockt, lock_denied_free, 0},
361
362 /* OP_LOCKU = 14 */
363 {rfs4_op_locku, nullfree, 0},
364
365 /* OP_LOOKUP = 15 */
366 {rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
367
368 /* OP_LOOKUPP = 16 */
369 {rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
370
371 /* OP_NVERIFY = 17 */
372 {rfs4_op_nverify, nullfree, RPC_IDEMPOTENT},
373
374 /* OP_OPEN = 18 */
375 {rfs4_op_open, rfs4_free_reply, 0},
376
377 /* OP_OPENATTR = 19 */
378 {rfs4_op_openattr, nullfree, 0},
379
380 /* OP_OPEN_CONFIRM = 20 */
381 {rfs4_op_open_confirm, nullfree, 0},
382
383 /* OP_OPEN_DOWNGRADE = 21 */
384 {rfs4_op_open_downgrade, nullfree, 0},
385
386 /* OP_OPEN_PUTFH = 22 */
387 {rfs4_op_putfh, nullfree, RPC_ALL},
388
389 /* OP_PUTPUBFH = 23 */
390 {rfs4_op_putpubfh, nullfree, RPC_ALL},
391
392 /* OP_PUTROOTFH = 24 */
393 {rfs4_op_putrootfh, nullfree, RPC_ALL},
394
395 /* OP_READ = 25 */
396 {rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT},
397
398 /* OP_READDIR = 26 */
399 {rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT},
400
401 /* OP_READLINK = 27 */
402 {rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT},
403
404 /* OP_REMOVE = 28 */
405 {rfs4_op_remove, nullfree, 0},
406
407 /* OP_RENAME = 29 */
408 {rfs4_op_rename, nullfree, 0},
409
410 /* OP_RENEW = 30 */
411 {rfs4_op_renew, nullfree, 0},
412
413 /* OP_RESTOREFH = 31 */
414 {rfs4_op_restorefh, nullfree, RPC_ALL},
415
416 /* OP_SAVEFH = 32 */
417 {rfs4_op_savefh, nullfree, RPC_ALL},
418
419 /* OP_SECINFO = 33 */
420 {rfs4_op_secinfo, rfs4_op_secinfo_free, 0},
421
422 /* OP_SETATTR = 34 */
423 {rfs4_op_setattr, nullfree, 0},
424
425 /* OP_SETCLIENTID = 35 */
426 {rfs4_op_setclientid, nullfree, 0},
427
428 /* OP_SETCLIENTID_CONFIRM = 36 */
429 {rfs4_op_setclientid_confirm, nullfree, 0},
430
431 /* OP_VERIFY = 37 */
432 {rfs4_op_verify, nullfree, RPC_IDEMPOTENT},
433
434 /* OP_WRITE = 38 */
435 {rfs4_op_write, nullfree, 0},
436
437 /* OP_RELEASE_LOCKOWNER = 39 */
438 {rfs4_op_release_lockowner, nullfree, 0},
439
440 /*
441 * NFSv4.1 operations
442 */
443
444 /* OP_BACKCHANNEL_CTL = 40 */
445 {rfs4_op_notsup, nullfree, 0},
446
447 /* OP_BIND_CONN_TO_SESSION = 41 */
448 {rfs4x_op_bind_conn_to_session, nullfree, 0},
449
450 /* OP_EXCHANGE_ID = 42 */
451 {rfs4x_op_exchange_id, rfs4x_exchange_id_free, 0},
452
453 /* OP_CREATE_SESSION = 43 */
454 {rfs4x_op_create_session, nullfree, 0},
455
456 /* OP_DESTROY_SESSION = 44 */
457 {rfs4x_op_destroy_session, nullfree, 0},
458
459 /* OP_FREE_STATEID = 45 */
460 {rfs4_op_notsup, nullfree, 0},
461
462 /* OP_GET_DIR_DELEGATION = 46 */
463 {rfs4_op_notsup, nullfree, 0},
464
465 /* OP_GETDEVICEINFO = 47 */
466 {rfs4_op_notsup, nullfree, 0},
467
468 /* OP_GETDEVICELIST = 48 */
469 {rfs4_op_notsup, nullfree, 0},
470
471 /* OP_LAYOUTCOMMIT = 49 */
472 {rfs4_op_notsup, nullfree, 0},
473
474 /* OP_LAYOUTGET = 50 */
475 {rfs4_op_notsup, nullfree, 0},
476
477 /* OP_LAYOUTRETURN = 51 */
478 {rfs4_op_notsup, nullfree, 0},
479
480 /* OP_SECINFO_NO_NAME = 52 */
481 {rfs4x_op_secinfo_noname, rfs4_op_secinfo_free, 0},
482
483 /* OP_SEQUENCE = 53 */
484 {rfs4x_op_sequence, nullfree, 0},
485
486 /* OP_SET_SSV = 54 */
487 {rfs4_op_notsup, nullfree, 0},
488
489 /* OP_TEST_STATEID = 55 */
490 {rfs4_op_notsup, nullfree, 0},
491
492 /* OP_WANT_DELEGATION = 56 */
493 {rfs4_op_notsup, nullfree, 0},
494
495 /* OP_DESTROY_CLIENTID = 57 */
496 {rfs4x_op_destroy_clientid, nullfree, 0},
497
498 /* OP_RECLAIM_COMPLETE = 58 */
499 {rfs4x_op_reclaim_complete, nullfree, 0},
500 };
501
502 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
503
504 #define OP_ILLEGAL_IDX (rfsv4disp_cnt)
505
506 #ifdef DEBUG
507
508 int rfs4_fillone_debug = 0;
509 int rfs4_no_stub_access = 1;
510 int rfs4_rddir_debug = 0;
511
512 static char *rfs4_op_string[] = {
513 "rfs4_op_null",
514 "rfs4_op_1 unused",
515 "rfs4_op_2 unused",
516 "rfs4_op_access",
517 "rfs4_op_close",
518 "rfs4_op_commit",
519 "rfs4_op_create",
520 "rfs4_op_delegpurge",
521 "rfs4_op_delegreturn",
522 "rfs4_op_getattr",
523 "rfs4_op_getfh",
524 "rfs4_op_link",
525 "rfs4_op_lock",
526 "rfs4_op_lockt",
527 "rfs4_op_locku",
528 "rfs4_op_lookup",
529 "rfs4_op_lookupp",
530 "rfs4_op_nverify",
531 "rfs4_op_open",
532 "rfs4_op_openattr",
533 "rfs4_op_open_confirm",
534 "rfs4_op_open_downgrade",
535 "rfs4_op_putfh",
536 "rfs4_op_putpubfh",
537 "rfs4_op_putrootfh",
538 "rfs4_op_read",
539 "rfs4_op_readdir",
540 "rfs4_op_readlink",
541 "rfs4_op_remove",
542 "rfs4_op_rename",
543 "rfs4_op_renew",
544 "rfs4_op_restorefh",
545 "rfs4_op_savefh",
546 "rfs4_op_secinfo",
547 "rfs4_op_setattr",
548 "rfs4_op_setclientid",
549 "rfs4_op_setclient_confirm",
550 "rfs4_op_verify",
551 "rfs4_op_write",
552 "rfs4_op_release_lockowner",
553 /* NFSv4.1 */
554 "backchannel_ctl",
555 "bind_conn_to_session",
556 "exchange_id",
557 "create_session",
558 "destroy_session",
559 "free_stateid",
560 "get_dir_delegation",
561 "getdeviceinfo",
562 "getdevicelist",
563 "layoutcommit",
564 "layoutget",
565 "layoutreturn",
566 "secinfo_no_name",
567 "sequence",
568 "set_ssv",
569 "test_stateid",
570 "want_delegation",
571 "destroy_clientid",
572 "reclaim_complete",
573 "rfs4_op_illegal"
574 };
575
576 #endif
577
578 void rfs4_ss_chkclid(nfs4_srv_t *, rfs4_client_t *);
579
580 extern size_t strlcpy(char *dst, const char *src, size_t dstsize);
581
582 extern void rfs4_free_fs_locations4(fs_locations4 *);
583
584 #ifdef nextdp
585 #undef nextdp
586 #endif
587 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
588
589 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
590 VOPNAME_OPEN, { .femop_open = deleg_rd_open },
591 VOPNAME_WRITE, { .femop_write = deleg_rd_write },
592 VOPNAME_SETATTR, { .femop_setattr = deleg_rd_setattr },
593 VOPNAME_RWLOCK, { .femop_rwlock = deleg_rd_rwlock },
594 VOPNAME_SPACE, { .femop_space = deleg_rd_space },
595 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_rd_setsecattr },
596 VOPNAME_VNEVENT, { .femop_vnevent = deleg_rd_vnevent },
597 NULL, NULL
598 };
599 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
600 VOPNAME_OPEN, { .femop_open = deleg_wr_open },
601 VOPNAME_READ, { .femop_read = deleg_wr_read },
602 VOPNAME_WRITE, { .femop_write = deleg_wr_write },
603 VOPNAME_SETATTR, { .femop_setattr = deleg_wr_setattr },
604 VOPNAME_RWLOCK, { .femop_rwlock = deleg_wr_rwlock },
605 VOPNAME_SPACE, { .femop_space = deleg_wr_space },
606 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_wr_setsecattr },
607 VOPNAME_VNEVENT, { .femop_vnevent = deleg_wr_vnevent },
608 NULL, NULL
609 };
610
611 nfs4_srv_t *
nfs4_get_srv(void)612 nfs4_get_srv(void)
613 {
614 nfs_globals_t *ng = nfs_srv_getzg();
615 nfs4_srv_t *srv = ng->nfs4_srv;
616 ASSERT(srv != NULL);
617 return (srv);
618 }
619
620 void
rfs4_srv_zone_init(nfs_globals_t * ng)621 rfs4_srv_zone_init(nfs_globals_t *ng)
622 {
623 nfs4_srv_t *nsrv4;
624 timespec32_t verf;
625
626 nsrv4 = kmem_zalloc(sizeof (*nsrv4), KM_SLEEP);
627
628 /*
629 * The following algorithm attempts to find a unique verifier
630 * to be used as the write verifier returned from the server
631 * to the client. It is important that this verifier change
632 * whenever the server reboots. Of secondary importance, it
633 * is important for the verifier to be unique between two
634 * different servers.
635 *
636 * Thus, an attempt is made to use the system hostid and the
637 * current time in seconds when the nfssrv kernel module is
638 * loaded. It is assumed that an NFS server will not be able
639 * to boot and then to reboot in less than a second. If the
640 * hostid has not been set, then the current high resolution
641 * time is used. This will ensure different verifiers each
642 * time the server reboots and minimize the chances that two
643 * different servers will have the same verifier.
644 * XXX - this is broken on LP64 kernels.
645 */
646 verf.tv_sec = (time_t)zone_get_hostid(NULL);
647 if (verf.tv_sec != 0) {
648 verf.tv_nsec = gethrestime_sec();
649 } else {
650 timespec_t tverf;
651
652 gethrestime(&tverf);
653 verf.tv_sec = (time_t)tverf.tv_sec;
654 verf.tv_nsec = tverf.tv_nsec;
655 }
656 nsrv4->write4verf = *(uint64_t *)&verf;
657
658 /* Used to manage create/destroy of server state */
659 nsrv4->nfs4_server_state = NULL;
660 nsrv4->nfs4_cur_servinst = NULL;
661 nsrv4->nfs4_deleg_policy = SRV_NEVER_DELEGATE;
662 mutex_init(&nsrv4->deleg_lock, NULL, MUTEX_DEFAULT, NULL);
663 mutex_init(&nsrv4->state_lock, NULL, MUTEX_DEFAULT, NULL);
664 mutex_init(&nsrv4->servinst_lock, NULL, MUTEX_DEFAULT, NULL);
665 rw_init(&nsrv4->deleg_policy_lock, NULL, RW_DEFAULT, NULL);
666
667 ng->nfs4_srv = nsrv4;
668 }
669
670 void
rfs4_srv_zone_fini(nfs_globals_t * ng)671 rfs4_srv_zone_fini(nfs_globals_t *ng)
672 {
673 nfs4_srv_t *nsrv4 = ng->nfs4_srv;
674
675 ng->nfs4_srv = NULL;
676
677 mutex_destroy(&nsrv4->deleg_lock);
678 mutex_destroy(&nsrv4->state_lock);
679 mutex_destroy(&nsrv4->servinst_lock);
680 rw_destroy(&nsrv4->deleg_policy_lock);
681
682 kmem_free(nsrv4, sizeof (*nsrv4));
683 }
684
685 void
rfs4_srvrinit(void)686 rfs4_srvrinit(void)
687 {
688 extern void rfs4_attr_init();
689
690 rfs4_attr_init();
691
692 if (fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops) != 0) {
693 rfs4_disable_delegation();
694 } else if (fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
695 &deleg_wrops) != 0) {
696 rfs4_disable_delegation();
697 fem_free(deleg_rdops);
698 }
699
700 nfs4_srv_caller_id = fs_new_caller_id();
701 lockt_sysid = lm_alloc_sysidt();
702 vsd_create(&nfs4_srv_vkey, NULL);
703 rfs4_state_g_init();
704 }
705
706 void
rfs4_srvrfini(void)707 rfs4_srvrfini(void)
708 {
709 if (lockt_sysid != LM_NOSYSID) {
710 lm_free_sysidt(lockt_sysid);
711 lockt_sysid = LM_NOSYSID;
712 }
713
714 rfs4_state_g_fini();
715
716 fem_free(deleg_rdops);
717 fem_free(deleg_wrops);
718 }
719
720 void
rfs4_do_server_start(int server_upordown,int srv_delegation,nfs4_minor_t nfs4_minor_max,int cluster_booted)721 rfs4_do_server_start(int server_upordown, int srv_delegation,
722 nfs4_minor_t nfs4_minor_max, int cluster_booted)
723 {
724 nfs4_srv_t *nsrv4 = nfs4_get_srv();
725
726 /* Is this a warm start? */
727 if (server_upordown == NFS_SERVER_QUIESCED) {
728 cmn_err(CE_NOTE, "nfs4_srv: "
729 "server was previously quiesced; "
730 "existing NFSv4 state will be re-used");
731
732 /*
733 * HA-NFSv4: this is also the signal
734 * that a Resource Group failover has
735 * occurred.
736 */
737 if (cluster_booted)
738 hanfsv4_failover(nsrv4);
739 } else {
740 /* Cold start */
741 nsrv4->rfs4_start_time = 0;
742 rfs4_state_zone_init(nsrv4);
743 nsrv4->nfs4_drc = rfs4_init_drc(nfs4_drc_max,
744 nfs4_drc_hash);
745
746 /*
747 * The nfsd service was started with the -s option
748 * we need to pull in any state from the paths indicated.
749 */
750 if (curzone == global_zone && rfs4_dss_numnewpaths > 0) {
751 /* read in the stable storage state from these paths */
752 rfs4_dss_readstate(nsrv4, rfs4_dss_numnewpaths,
753 rfs4_dss_newpaths);
754 }
755 }
756
757 nsrv4->nfs4_minor_max = nfs4_minor_max;
758
759 /* Check if delegation is to be enabled */
760 if (srv_delegation != FALSE)
761 rfs4_set_deleg_policy(nsrv4, SRV_NORMAL_DELEGATE);
762 }
763
764 void
rfs4_init_compound_state(struct compound_state * cs)765 rfs4_init_compound_state(struct compound_state *cs)
766 {
767 bzero(cs, sizeof (*cs));
768 cs->cont = TRUE;
769 cs->access = CS_ACCESS_DENIED;
770 cs->deleg = FALSE;
771 cs->mandlock = FALSE;
772 cs->fh.nfs_fh4_val = cs->fhbuf;
773 }
774
775 /* Do cleanup of the compound_state */
776 void
rfs4_fini_compound_state(struct compound_state * cs)777 rfs4_fini_compound_state(struct compound_state *cs)
778 {
779 if (cs->vp) {
780 VN_RELE(cs->vp);
781 }
782 if (cs->saved_vp) {
783 VN_RELE(cs->saved_vp);
784 }
785 if (cs->cr) {
786 crfree(cs->cr);
787 }
788 if (cs->saved_fh.nfs_fh4_val) {
789 kmem_free(cs->saved_fh.nfs_fh4_val, NFS4_FHSIZE);
790 }
791 if (cs->sp) {
792 rfs4x_session_rele(cs->sp);
793 }
794 }
795
796 void
rfs4_grace_start(rfs4_servinst_t * sip)797 rfs4_grace_start(rfs4_servinst_t *sip)
798 {
799 rw_enter(&sip->rwlock, RW_WRITER);
800 sip->start_time = nfs_sys_uptime();
801 sip->grace_period = rfs4_grace_period;
802 rw_exit(&sip->rwlock);
803 }
804
805 /*
806 * returns true if the instance's grace period has never been started
807 */
808 int
rfs4_servinst_grace_new(rfs4_servinst_t * sip)809 rfs4_servinst_grace_new(rfs4_servinst_t *sip)
810 {
811 time_t start_time;
812
813 rw_enter(&sip->rwlock, RW_READER);
814 start_time = sip->start_time;
815 rw_exit(&sip->rwlock);
816
817 return (start_time == 0);
818 }
819
820 /*
821 * Indicates if server instance is within the
822 * grace period.
823 */
824 int
rfs4_servinst_in_grace(rfs4_servinst_t * sip)825 rfs4_servinst_in_grace(rfs4_servinst_t *sip)
826 {
827 time_t grace_expiry;
828
829 /* All clients called reclaim-complete */
830 if (sip->nreclaim == 0 || sip->grace_period == 0)
831 return (0);
832
833 rw_enter(&sip->rwlock, RW_READER);
834 grace_expiry = sip->start_time + sip->grace_period;
835 rw_exit(&sip->rwlock);
836
837 if (nfs_sys_uptime() < grace_expiry)
838 return (1);
839
840 /* Once grace period ends, optimize next calls */
841 sip->grace_period = 0;
842 return (0);
843 }
844
845 int
rfs4_clnt_in_grace(rfs4_client_t * cp)846 rfs4_clnt_in_grace(rfs4_client_t *cp)
847 {
848 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
849
850 return (rfs4_servinst_in_grace(cp->rc_server_instance));
851 }
852
853 /*
854 * reset all currently active grace periods
855 */
856 void
rfs4_grace_reset_all(nfs4_srv_t * nsrv4)857 rfs4_grace_reset_all(nfs4_srv_t *nsrv4)
858 {
859 rfs4_servinst_t *sip;
860
861 mutex_enter(&nsrv4->servinst_lock);
862 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
863 if (rfs4_servinst_in_grace(sip))
864 rfs4_grace_start(sip);
865 mutex_exit(&nsrv4->servinst_lock);
866 }
867
868 /*
869 * start any new instances' grace periods
870 */
871 void
rfs4_grace_start_new(nfs4_srv_t * nsrv4)872 rfs4_grace_start_new(nfs4_srv_t *nsrv4)
873 {
874 rfs4_servinst_t *sip;
875
876 mutex_enter(&nsrv4->servinst_lock);
877 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
878 if (rfs4_servinst_grace_new(sip))
879 rfs4_grace_start(sip);
880 mutex_exit(&nsrv4->servinst_lock);
881 }
882
883 static rfs4_dss_path_t *
rfs4_dss_newpath(nfs4_srv_t * nsrv4,rfs4_servinst_t * sip,char * path,unsigned index)884 rfs4_dss_newpath(nfs4_srv_t *nsrv4, rfs4_servinst_t *sip,
885 char *path, unsigned index)
886 {
887 size_t len;
888 rfs4_dss_path_t *dss_path;
889
890 dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
891
892 /*
893 * Take a copy of the string, since the original may be overwritten.
894 * Sadly, no strdup() in the kernel.
895 */
896 /* allow for NUL */
897 len = strlen(path) + 1;
898 dss_path->path = kmem_alloc(len, KM_SLEEP);
899 (void) strlcpy(dss_path->path, path, len);
900
901 /* associate with servinst */
902 dss_path->sip = sip;
903 dss_path->index = index;
904
905 /*
906 * Add to list of served paths.
907 * No locking required, as we're only ever called at startup.
908 */
909 if (nsrv4->dss_pathlist == NULL) {
910 /* this is the first dss_path_t */
911
912 /* needed for insque/remque */
913 dss_path->next = dss_path->prev = dss_path;
914
915 nsrv4->dss_pathlist = dss_path;
916 } else {
917 insque(dss_path, nsrv4->dss_pathlist);
918 }
919
920 return (dss_path);
921 }
922
923 /*
924 * Create a new server instance, and make it the currently active instance.
925 * Note that starting the grace period too early will reduce the clients'
926 * recovery window.
927 */
928 void
rfs4_servinst_create(nfs4_srv_t * nsrv4,int start_grace,int dss_npaths,char ** dss_paths)929 rfs4_servinst_create(nfs4_srv_t *nsrv4, int start_grace,
930 int dss_npaths, char **dss_paths)
931 {
932 unsigned i;
933 rfs4_servinst_t *sip;
934 rfs4_oldstate_t *oldstate;
935
936 sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
937 rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
938
939 sip->nreclaim = 0;
940 sip->start_time = (time_t)0;
941 sip->grace_period = (time_t)0;
942 sip->next = NULL;
943 sip->prev = NULL;
944
945 rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
946 /*
947 * This initial dummy entry is required to setup for insque/remque.
948 * It must be skipped over whenever the list is traversed.
949 */
950 oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
951 /* insque/remque require initial list entry to be self-terminated */
952 oldstate->next = oldstate;
953 oldstate->prev = oldstate;
954 sip->oldstate = oldstate;
955
956
957 sip->dss_npaths = dss_npaths;
958 sip->dss_paths = kmem_alloc(dss_npaths *
959 sizeof (rfs4_dss_path_t *), KM_SLEEP);
960
961 for (i = 0; i < dss_npaths; i++) {
962 sip->dss_paths[i] =
963 rfs4_dss_newpath(nsrv4, sip, dss_paths[i], i);
964 }
965
966 mutex_enter(&nsrv4->servinst_lock);
967 if (nsrv4->nfs4_cur_servinst != NULL) {
968 /* add to linked list */
969 sip->prev = nsrv4->nfs4_cur_servinst;
970 nsrv4->nfs4_cur_servinst->next = sip;
971 }
972 if (start_grace)
973 rfs4_grace_start(sip);
974 /* make the new instance "current" */
975 nsrv4->nfs4_cur_servinst = sip;
976
977 mutex_exit(&nsrv4->servinst_lock);
978 }
979
980 /*
981 * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
982 * all instances directly.
983 */
984 void
rfs4_servinst_destroy_all(nfs4_srv_t * nsrv4)985 rfs4_servinst_destroy_all(nfs4_srv_t *nsrv4)
986 {
987 rfs4_servinst_t *sip, *prev, *current;
988 #ifdef DEBUG
989 int n = 0;
990 #endif
991
992 mutex_enter(&nsrv4->servinst_lock);
993 ASSERT(nsrv4->nfs4_cur_servinst != NULL);
994 current = nsrv4->nfs4_cur_servinst;
995 nsrv4->nfs4_cur_servinst = NULL;
996 for (sip = current; sip != NULL; sip = prev) {
997 prev = sip->prev;
998 rw_destroy(&sip->rwlock);
999 if (sip->oldstate)
1000 kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
1001 if (sip->dss_paths) {
1002 int i = sip->dss_npaths;
1003
1004 while (i > 0) {
1005 i--;
1006 if (sip->dss_paths[i] != NULL) {
1007 char *path = sip->dss_paths[i]->path;
1008
1009 if (path != NULL) {
1010 kmem_free(path,
1011 strlen(path) + 1);
1012 }
1013 kmem_free(sip->dss_paths[i],
1014 sizeof (rfs4_dss_path_t));
1015 }
1016 }
1017 kmem_free(sip->dss_paths,
1018 sip->dss_npaths * sizeof (rfs4_dss_path_t *));
1019 }
1020 kmem_free(sip, sizeof (rfs4_servinst_t));
1021 #ifdef DEBUG
1022 n++;
1023 #endif
1024 }
1025 mutex_exit(&nsrv4->servinst_lock);
1026 }
1027
1028 /*
1029 * Assign the current server instance to a client_t.
1030 * Should be called with cp->rc_dbe held.
1031 */
1032 void
rfs4_servinst_assign(nfs4_srv_t * nsrv4,rfs4_client_t * cp,rfs4_servinst_t * sip)1033 rfs4_servinst_assign(nfs4_srv_t *nsrv4, rfs4_client_t *cp,
1034 rfs4_servinst_t *sip)
1035 {
1036 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
1037
1038 /*
1039 * The lock ensures that if the current instance is in the process
1040 * of changing, we will see the new one.
1041 */
1042 mutex_enter(&nsrv4->servinst_lock);
1043 cp->rc_server_instance = sip;
1044 mutex_exit(&nsrv4->servinst_lock);
1045 }
1046
1047 rfs4_servinst_t *
rfs4_servinst(rfs4_client_t * cp)1048 rfs4_servinst(rfs4_client_t *cp)
1049 {
1050 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
1051
1052 return (cp->rc_server_instance);
1053 }
1054
1055 /* ARGSUSED */
1056 static void
nullfree(caddr_t resop)1057 nullfree(caddr_t resop)
1058 {
1059 }
1060
1061 /*
1062 * This is a fall-through for invalid or not implemented (yet) ops
1063 */
1064 /* ARGSUSED */
1065 static void
rfs4_op_inval(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)1066 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1067 struct compound_state *cs)
1068 {
1069 *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
1070 }
1071
1072 /*
1073 * Check if the security flavor, nfsnum, is in the flavor_list.
1074 */
1075 bool_t
in_flavor_list(int nfsnum,int * flavor_list,int count)1076 in_flavor_list(int nfsnum, int *flavor_list, int count)
1077 {
1078 int i;
1079
1080 for (i = 0; i < count; i++) {
1081 if (nfsnum == flavor_list[i])
1082 return (TRUE);
1083 }
1084 return (FALSE);
1085 }
1086
1087 /*
1088 * Used by rfs4_op_secinfo to get the security information from the
1089 * export structure associated with the component.
1090 */
1091 /* ARGSUSED */
1092 nfsstat4
do_rfs4_op_secinfo(struct compound_state * cs,char * nm,SECINFO4res * resp)1093 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
1094 {
1095 int error, different_export = 0;
1096 vnode_t *dvp, *vp;
1097 struct exportinfo *exi;
1098 fid_t fid;
1099 uint_t count, i;
1100 secinfo4 *resok_val;
1101 struct secinfo *secp;
1102 seconfig_t *si;
1103 bool_t did_traverse = FALSE;
1104 int dotdot, walk;
1105 nfs_export_t *ne = nfs_get_export();
1106
1107 dvp = cs->vp;
1108 exi = cs->exi;
1109 ASSERT(exi != NULL);
1110 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
1111
1112 /*
1113 * If dotdotting, then need to check whether it's above the
1114 * root of a filesystem, or above an export point.
1115 */
1116 if (dotdot) {
1117 vnode_t *zone_rootvp = ne->exi_root->exi_vp;
1118
1119 ASSERT3U(exi->exi_zoneid, ==, ne->exi_root->exi_zoneid);
1120 /*
1121 * If dotdotting at the root of a filesystem, then
1122 * need to traverse back to the mounted-on filesystem
1123 * and do the dotdot lookup there.
1124 */
1125 if ((dvp->v_flag & VROOT) || VN_CMP(dvp, zone_rootvp)) {
1126
1127 /*
1128 * If at the system root, then can
1129 * go up no further.
1130 */
1131 if (VN_CMP(dvp, zone_rootvp))
1132 return (puterrno4(ENOENT));
1133
1134 /*
1135 * Traverse back to the mounted-on filesystem
1136 */
1137 dvp = untraverse(dvp, zone_rootvp);
1138
1139 /*
1140 * Set the different_export flag so we remember
1141 * to pick up a new exportinfo entry for
1142 * this new filesystem.
1143 */
1144 different_export = 1;
1145 } else {
1146
1147 /*
1148 * If dotdotting above an export point then set
1149 * the different_export to get new export info.
1150 */
1151 different_export = nfs_exported(exi, dvp);
1152 }
1153 }
1154
1155 /*
1156 * Get the vnode for the component "nm".
1157 */
1158 error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
1159 NULL, NULL, NULL);
1160 if (error)
1161 return (puterrno4(error));
1162
1163 /*
1164 * If the vnode is in a pseudo filesystem, or if the security flavor
1165 * used in the request is valid but not an explicitly shared flavor,
1166 * or the access bit indicates that this is a limited access,
1167 * check whether this vnode is visible.
1168 */
1169 if (!different_export &&
1170 (PSEUDO(exi) || !is_exported_sec(cs->nfsflavor, exi) ||
1171 cs->access & CS_ACCESS_LIMITED)) {
1172 if (! nfs_visible(exi, vp, &different_export)) {
1173 VN_RELE(vp);
1174 return (puterrno4(ENOENT));
1175 }
1176 }
1177
1178 /*
1179 * If it's a mountpoint, then traverse it.
1180 */
1181 if (vn_ismntpt(vp)) {
1182 if ((error = traverse(&vp)) != 0) {
1183 VN_RELE(vp);
1184 return (puterrno4(error));
1185 }
1186 /* remember that we had to traverse mountpoint */
1187 did_traverse = TRUE;
1188 different_export = 1;
1189 } else if (vp->v_vfsp != dvp->v_vfsp) {
1190 /*
1191 * If vp isn't a mountpoint and the vfs ptrs aren't the same,
1192 * then vp is probably an LOFS object. We don't need the
1193 * realvp, we just need to know that we might have crossed
1194 * a server fs boundary and need to call checkexport4.
1195 * (LOFS lookup hides server fs mountpoints, and actually calls
1196 * traverse)
1197 */
1198 different_export = 1;
1199 }
1200
1201 /*
1202 * Get the export information for it.
1203 */
1204 if (different_export) {
1205
1206 bzero(&fid, sizeof (fid));
1207 fid.fid_len = MAXFIDSZ;
1208 error = vop_fid_pseudo(vp, &fid);
1209 if (error) {
1210 VN_RELE(vp);
1211 return (puterrno4(error));
1212 }
1213
1214 /* We'll need to reassign "exi". */
1215 if (dotdot)
1216 exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
1217 else
1218 exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
1219
1220 if (exi == NULL) {
1221 if (did_traverse == TRUE) {
1222 /*
1223 * If this vnode is a mounted-on vnode,
1224 * but the mounted-on file system is not
1225 * exported, send back the secinfo for
1226 * the exported node that the mounted-on
1227 * vnode lives in.
1228 */
1229 exi = cs->exi;
1230 } else {
1231 VN_RELE(vp);
1232 return (puterrno4(EACCES));
1233 }
1234 }
1235 }
1236 ASSERT(exi != NULL);
1237
1238
1239 /*
1240 * Create the secinfo result based on the security information
1241 * from the exportinfo structure (exi).
1242 *
1243 * Return all flavors for a pseudo node.
1244 * For a real export node, return the flavor that the client
1245 * has access with.
1246 */
1247 ASSERT(RW_LOCK_HELD(&ne->exported_lock));
1248 if (PSEUDO(exi)) {
1249 count = exi->exi_export.ex_seccnt; /* total sec count */
1250 resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1251 secp = exi->exi_export.ex_secinfo;
1252
1253 for (i = 0; i < count; i++) {
1254 si = &secp[i].s_secinfo;
1255 resok_val[i].flavor = si->sc_rpcnum;
1256 if (resok_val[i].flavor == RPCSEC_GSS) {
1257 rpcsec_gss_info *info;
1258
1259 info = &resok_val[i].flavor_info;
1260 info->qop = si->sc_qop;
1261 info->service = (rpc_gss_svc_t)si->sc_service;
1262
1263 /* get oid opaque data */
1264 info->oid.sec_oid4_len =
1265 si->sc_gss_mech_type->length;
1266 info->oid.sec_oid4_val = kmem_alloc(
1267 si->sc_gss_mech_type->length, KM_SLEEP);
1268 bcopy(
1269 si->sc_gss_mech_type->elements,
1270 info->oid.sec_oid4_val,
1271 info->oid.sec_oid4_len);
1272 }
1273 }
1274 resp->SECINFO4resok_len = count;
1275 resp->SECINFO4resok_val = resok_val;
1276 } else {
1277 int ret_cnt = 0, k = 0;
1278 int *flavor_list;
1279
1280 count = exi->exi_export.ex_seccnt; /* total sec count */
1281 secp = exi->exi_export.ex_secinfo;
1282
1283 flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
1284 /* find out which flavors to return */
1285 for (i = 0; i < count; i ++) {
1286 int access, flavor, perm;
1287
1288 flavor = secp[i].s_secinfo.sc_nfsnum;
1289 perm = secp[i].s_flags;
1290
1291 access = nfsauth4_secinfo_access(exi, cs->req,
1292 flavor, perm, cs->basecr);
1293
1294 if (! (access & NFSAUTH_DENIED) &&
1295 ! (access & NFSAUTH_WRONGSEC)) {
1296 flavor_list[ret_cnt] = flavor;
1297 ret_cnt++;
1298 }
1299 }
1300
1301 /* Create the returning SECINFO value */
1302 resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
1303
1304 for (i = 0; i < count; i++) {
1305 /*
1306 * If the flavor is in the flavor list,
1307 * fill in resok_val.
1308 */
1309 si = &secp[i].s_secinfo;
1310 if (in_flavor_list(si->sc_nfsnum,
1311 flavor_list, ret_cnt)) {
1312 resok_val[k].flavor = si->sc_rpcnum;
1313 if (resok_val[k].flavor == RPCSEC_GSS) {
1314 rpcsec_gss_info *info;
1315
1316 info = &resok_val[k].flavor_info;
1317 info->qop = si->sc_qop;
1318 info->service = (rpc_gss_svc_t)
1319 si->sc_service;
1320
1321 /* get oid opaque data */
1322 info->oid.sec_oid4_len =
1323 si->sc_gss_mech_type->length;
1324 info->oid.sec_oid4_val = kmem_alloc(
1325 si->sc_gss_mech_type->length,
1326 KM_SLEEP);
1327 bcopy(si->sc_gss_mech_type->elements,
1328 info->oid.sec_oid4_val,
1329 info->oid.sec_oid4_len);
1330 }
1331 k++;
1332 }
1333 if (k >= ret_cnt)
1334 break;
1335 }
1336 resp->SECINFO4resok_len = ret_cnt;
1337 resp->SECINFO4resok_val = resok_val;
1338 kmem_free(flavor_list, count * sizeof (int));
1339 }
1340
1341 VN_RELE(vp);
1342 return (NFS4_OK);
1343 }
1344
1345 /*
1346 * SECINFO (Operation 33): Obtain required security information on
1347 * the component name in the format of (security-mechanism-oid, qop, service)
1348 * triplets.
1349 */
1350 /* ARGSUSED */
1351 static void
rfs4_op_secinfo(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)1352 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1353 struct compound_state *cs)
1354 {
1355 SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo;
1356 SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1357 utf8string *utfnm = &args->name;
1358 uint_t len;
1359 char *nm;
1360 struct sockaddr *ca;
1361 char *name = NULL;
1362 nfsstat4 status = NFS4_OK;
1363
1364 DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs,
1365 SECINFO4args *, args);
1366
1367 /*
1368 * Current file handle (cfh) should have been set before getting
1369 * into this function. If not, return error.
1370 */
1371 if (cs->vp == NULL) {
1372 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1373 goto out;
1374 }
1375
1376 if (cs->vp->v_type != VDIR) {
1377 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1378 goto out;
1379 }
1380
1381 /*
1382 * Verify the component name. If failed, error out, but
1383 * do not error out if the component name is a "..".
1384 * SECINFO will return its parents secinfo data for SECINFO "..".
1385 */
1386 status = utf8_dir_verify(utfnm);
1387 if (status != NFS4_OK) {
1388 if (utfnm->utf8string_len != 2 ||
1389 utfnm->utf8string_val[0] != '.' ||
1390 utfnm->utf8string_val[1] != '.') {
1391 *cs->statusp = resp->status = status;
1392 goto out;
1393 }
1394 }
1395
1396 nm = utf8_to_str(utfnm, &len, NULL);
1397 if (nm == NULL) {
1398 *cs->statusp = resp->status = NFS4ERR_INVAL;
1399 goto out;
1400 }
1401
1402 if (len > MAXNAMELEN) {
1403 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1404 kmem_free(nm, len);
1405 goto out;
1406 }
1407
1408 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1409 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1410 MAXPATHLEN + 1);
1411
1412 if (name == NULL) {
1413 *cs->statusp = resp->status = NFS4ERR_INVAL;
1414 kmem_free(nm, len);
1415 goto out;
1416 }
1417
1418 *cs->statusp = resp->status = do_rfs4_op_secinfo(cs, name, resp);
1419
1420 if (resp->status == NFS4_OK && rfs4_has_session(cs)) {
1421 /*
1422 * See rfc 5661 section 2.6.3.1.1.8 and 18.29.3
1423 *
1424 * 2.6.3.1.1.8
1425 * SECINFO and SECINFO_NO_NAME consume the current
1426 * filehandle (note that this is a change from NFSv4.0).
1427 *
1428 * 18.29.3
1429 * On success, the current filehandle is consumed (see
1430 * Section 2.6.3.1.1.8), and if the next operation after
1431 * SECINFO tries to use the current filehandle, that
1432 * operation will fail with the status
1433 * NFS4ERR_NOFILEHANDLE.
1434 */
1435 VN_RELE(cs->vp);
1436 cs->vp = NULL;
1437 }
1438
1439 if (name != nm)
1440 kmem_free(name, MAXPATHLEN + 1);
1441 kmem_free(nm, len);
1442
1443 out:
1444 DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs,
1445 SECINFO4res *, resp);
1446 }
1447
1448 /*
1449 * Free SECINFO result.
1450 */
1451 /* ARGSUSED */
1452 static void
rfs4_op_secinfo_free(nfs_resop4 * resop)1453 rfs4_op_secinfo_free(nfs_resop4 *resop)
1454 {
1455 SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1456 int count, i;
1457 secinfo4 *resok_val;
1458
1459 /* If this is not an Ok result, nothing to free. */
1460 if (resp->status != NFS4_OK) {
1461 return;
1462 }
1463
1464 count = resp->SECINFO4resok_len;
1465 resok_val = resp->SECINFO4resok_val;
1466
1467 for (i = 0; i < count; i++) {
1468 if (resok_val[i].flavor == RPCSEC_GSS) {
1469 rpcsec_gss_info *info;
1470
1471 info = &resok_val[i].flavor_info;
1472 kmem_free(info->oid.sec_oid4_val,
1473 info->oid.sec_oid4_len);
1474 }
1475 }
1476 kmem_free(resok_val, count * sizeof (secinfo4));
1477 resp->SECINFO4resok_len = 0;
1478 resp->SECINFO4resok_val = NULL;
1479 }
1480
1481 /* ARGSUSED */
1482 static void
rfs4_op_access(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)1483 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1484 struct compound_state *cs)
1485 {
1486 ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
1487 ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
1488 int error;
1489 vnode_t *vp;
1490 struct vattr va;
1491 int checkwriteperm;
1492 cred_t *cr = cs->cr;
1493 bslabel_t *clabel, *slabel;
1494 ts_label_t *tslabel;
1495 boolean_t admin_low_client;
1496
1497 DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs,
1498 ACCESS4args *, args);
1499
1500 #if 0 /* XXX allow access even if !cs->access. Eventually only pseudo fs */
1501 if (cs->access == CS_ACCESS_DENIED) {
1502 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1503 goto out;
1504 }
1505 #endif
1506 if (cs->vp == NULL) {
1507 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1508 goto out;
1509 }
1510
1511 ASSERT(cr != NULL);
1512
1513 vp = cs->vp;
1514
1515 /*
1516 * If the file system is exported read only, it is not appropriate
1517 * to check write permissions for regular files and directories.
1518 * Special files are interpreted by the client, so the underlying
1519 * permissions are sent back to the client for interpretation.
1520 */
1521 if (rdonly4(req, cs) &&
1522 (vp->v_type == VREG || vp->v_type == VDIR))
1523 checkwriteperm = 0;
1524 else
1525 checkwriteperm = 1;
1526
1527 /*
1528 * XXX
1529 * We need the mode so that we can correctly determine access
1530 * permissions relative to a mandatory lock file. Access to
1531 * mandatory lock files is denied on the server, so it might
1532 * as well be reflected to the server during the open.
1533 */
1534 va.va_mask = AT_MODE;
1535 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1536 if (error) {
1537 *cs->statusp = resp->status = puterrno4(error);
1538 goto out;
1539 }
1540 resp->access = 0;
1541 resp->supported = 0;
1542
1543 if (is_system_labeled()) {
1544 ASSERT(req->rq_label != NULL);
1545 clabel = req->rq_label;
1546 DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *,
1547 "got client label from request(1)",
1548 struct svc_req *, req);
1549 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1550 if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) {
1551 *cs->statusp = resp->status = puterrno4(EACCES);
1552 goto out;
1553 }
1554 slabel = label2bslabel(tslabel);
1555 DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel,
1556 char *, "got server label(1) for vp(2)",
1557 bslabel_t *, slabel, vnode_t *, vp);
1558
1559 admin_low_client = B_FALSE;
1560 } else
1561 admin_low_client = B_TRUE;
1562 }
1563
1564 if (args->access & ACCESS4_READ) {
1565 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
1566 if (!error && !MANDLOCK(vp, va.va_mode) &&
1567 (!is_system_labeled() || admin_low_client ||
1568 bldominates(clabel, slabel)))
1569 resp->access |= ACCESS4_READ;
1570 resp->supported |= ACCESS4_READ;
1571 }
1572 if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
1573 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1574 if (!error && (!is_system_labeled() || admin_low_client ||
1575 bldominates(clabel, slabel)))
1576 resp->access |= ACCESS4_LOOKUP;
1577 resp->supported |= ACCESS4_LOOKUP;
1578 }
1579 if (checkwriteperm &&
1580 (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
1581 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1582 if (!error && !MANDLOCK(vp, va.va_mode) &&
1583 (!is_system_labeled() || admin_low_client ||
1584 blequal(clabel, slabel)))
1585 resp->access |=
1586 (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND));
1587 resp->supported |=
1588 resp->access & (ACCESS4_MODIFY | ACCESS4_EXTEND);
1589 }
1590
1591 if (checkwriteperm &&
1592 (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
1593 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1594 if (!error && (!is_system_labeled() || admin_low_client ||
1595 blequal(clabel, slabel)))
1596 resp->access |= ACCESS4_DELETE;
1597 resp->supported |= ACCESS4_DELETE;
1598 }
1599 if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
1600 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1601 if (!error && !MANDLOCK(vp, va.va_mode) &&
1602 (!is_system_labeled() || admin_low_client ||
1603 bldominates(clabel, slabel)))
1604 resp->access |= ACCESS4_EXECUTE;
1605 resp->supported |= ACCESS4_EXECUTE;
1606 }
1607
1608 if (is_system_labeled() && !admin_low_client)
1609 label_rele(tslabel);
1610
1611 *cs->statusp = resp->status = NFS4_OK;
1612 out:
1613 DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
1614 ACCESS4res *, resp);
1615 }
1616
1617 /* ARGSUSED */
1618 static void
rfs4_op_commit(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)1619 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1620 struct compound_state *cs)
1621 {
1622 COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1623 COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1624 int error;
1625 vnode_t *vp = cs->vp;
1626 cred_t *cr = cs->cr;
1627 vattr_t va;
1628 nfs4_srv_t *nsrv4;
1629
1630 DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1631 COMMIT4args *, args);
1632
1633 if (vp == NULL) {
1634 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1635 goto out;
1636 }
1637 if (cs->access == CS_ACCESS_DENIED) {
1638 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1639 goto out;
1640 }
1641
1642 if (args->offset + args->count < args->offset) {
1643 *cs->statusp = resp->status = NFS4ERR_INVAL;
1644 goto out;
1645 }
1646
1647 va.va_mask = AT_UID;
1648 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1649
1650 /*
1651 * If we can't get the attributes, then we can't do the
1652 * right access checking. So, we'll fail the request.
1653 */
1654 if (error) {
1655 *cs->statusp = resp->status = puterrno4(error);
1656 goto out;
1657 }
1658 if (rdonly4(req, cs)) {
1659 *cs->statusp = resp->status = NFS4ERR_ROFS;
1660 goto out;
1661 }
1662
1663 if (vp->v_type != VREG) {
1664 if (vp->v_type == VDIR)
1665 resp->status = NFS4ERR_ISDIR;
1666 else
1667 resp->status = NFS4ERR_INVAL;
1668 *cs->statusp = resp->status;
1669 goto out;
1670 }
1671
1672 if (crgetuid(cr) != va.va_uid &&
1673 (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) {
1674 *cs->statusp = resp->status = puterrno4(error);
1675 goto out;
1676 }
1677
1678 error = VOP_FSYNC(vp, FSYNC, cr, NULL);
1679
1680 if (error) {
1681 *cs->statusp = resp->status = puterrno4(error);
1682 goto out;
1683 }
1684
1685 nsrv4 = nfs4_get_srv();
1686 *cs->statusp = resp->status = NFS4_OK;
1687 resp->writeverf = nsrv4->write4verf;
1688 out:
1689 DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1690 COMMIT4res *, resp);
1691 }
1692
1693 /*
1694 * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1695 * was completed. It does the nfsv4 create for special files.
1696 */
1697 /* ARGSUSED */
1698 static vnode_t *
do_rfs4_op_mknod(CREATE4args * args,CREATE4res * resp,struct svc_req * req,struct compound_state * cs,vattr_t * vap,char * nm)1699 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1700 struct compound_state *cs, vattr_t *vap, char *nm)
1701 {
1702 int error;
1703 cred_t *cr = cs->cr;
1704 vnode_t *dvp = cs->vp;
1705 vnode_t *vp = NULL;
1706 int mode;
1707 enum vcexcl excl;
1708
1709 switch (args->type) {
1710 case NF4CHR:
1711 case NF4BLK:
1712 if (secpolicy_sys_devices(cr) != 0) {
1713 *cs->statusp = resp->status = NFS4ERR_PERM;
1714 return (NULL);
1715 }
1716 if (args->type == NF4CHR)
1717 vap->va_type = VCHR;
1718 else
1719 vap->va_type = VBLK;
1720 vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
1721 args->ftype4_u.devdata.specdata2);
1722 vap->va_mask |= AT_RDEV;
1723 break;
1724 case NF4SOCK:
1725 vap->va_type = VSOCK;
1726 break;
1727 case NF4FIFO:
1728 vap->va_type = VFIFO;
1729 break;
1730 default:
1731 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1732 return (NULL);
1733 }
1734
1735 /*
1736 * Must specify the mode.
1737 */
1738 if (!(vap->va_mask & AT_MODE)) {
1739 *cs->statusp = resp->status = NFS4ERR_INVAL;
1740 return (NULL);
1741 }
1742
1743 excl = EXCL;
1744
1745 mode = 0;
1746
1747 error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0, NULL, NULL);
1748 if (error) {
1749 *cs->statusp = resp->status = puterrno4(error);
1750 return (NULL);
1751 }
1752 return (vp);
1753 }
1754
1755 /*
1756 * nfsv4 create is used to create non-regular files. For regular files,
1757 * use nfsv4 open.
1758 */
1759 /* ARGSUSED */
1760 static void
rfs4_op_create(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)1761 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1762 struct compound_state *cs)
1763 {
1764 CREATE4args *args = &argop->nfs_argop4_u.opcreate;
1765 CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
1766 int error;
1767 struct vattr bva, iva, iva2, ava, *vap;
1768 cred_t *cr = cs->cr;
1769 vnode_t *dvp = cs->vp;
1770 vnode_t *vp = NULL;
1771 vnode_t *realvp;
1772 char *nm, *lnm;
1773 uint_t len, llen;
1774 int syncval = 0;
1775 struct nfs4_svgetit_arg sarg;
1776 struct nfs4_ntov_table ntov;
1777 struct statvfs64 sb;
1778 nfsstat4 status;
1779 struct sockaddr *ca;
1780 char *name = NULL;
1781 char *lname = NULL;
1782
1783 DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs,
1784 CREATE4args *, args);
1785
1786 resp->attrset = 0;
1787
1788 if (dvp == NULL) {
1789 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1790 goto out;
1791 }
1792
1793 /*
1794 * If there is an unshared filesystem mounted on this vnode,
1795 * do not allow to create an object in this directory.
1796 */
1797 if (vn_ismntpt(dvp)) {
1798 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1799 goto out;
1800 }
1801
1802 /* Verify that type is correct */
1803 switch (args->type) {
1804 case NF4LNK:
1805 case NF4BLK:
1806 case NF4CHR:
1807 case NF4SOCK:
1808 case NF4FIFO:
1809 case NF4DIR:
1810 break;
1811 default:
1812 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1813 goto out;
1814 };
1815
1816 if (cs->access == CS_ACCESS_DENIED) {
1817 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1818 goto out;
1819 }
1820 if (dvp->v_type != VDIR) {
1821 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1822 goto out;
1823 }
1824 status = utf8_dir_verify(&args->objname);
1825 if (status != NFS4_OK) {
1826 *cs->statusp = resp->status = status;
1827 goto out;
1828 }
1829
1830 if (rdonly4(req, cs)) {
1831 *cs->statusp = resp->status = NFS4ERR_ROFS;
1832 goto out;
1833 }
1834
1835 /*
1836 * Name of newly created object
1837 */
1838 nm = utf8_to_fn(&args->objname, &len, NULL);
1839 if (nm == NULL) {
1840 *cs->statusp = resp->status = NFS4ERR_INVAL;
1841 goto out;
1842 }
1843
1844 if (len > MAXNAMELEN) {
1845 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1846 kmem_free(nm, len);
1847 goto out;
1848 }
1849
1850 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1851 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1852 MAXPATHLEN + 1);
1853
1854 if (name == NULL) {
1855 *cs->statusp = resp->status = NFS4ERR_INVAL;
1856 kmem_free(nm, len);
1857 goto out;
1858 }
1859
1860 resp->attrset = 0;
1861
1862 sarg.sbp = &sb;
1863 sarg.is_referral = B_FALSE;
1864 nfs4_ntov_table_init(&ntov);
1865
1866 status = do_rfs4_set_attrs(&resp->attrset,
1867 &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT);
1868
1869 if (sarg.vap->va_mask == 0 && status == NFS4_OK)
1870 status = NFS4ERR_INVAL;
1871
1872 if (status != NFS4_OK) {
1873 *cs->statusp = resp->status = status;
1874 if (name != nm)
1875 kmem_free(name, MAXPATHLEN + 1);
1876 kmem_free(nm, len);
1877 nfs4_ntov_table_free(&ntov, &sarg);
1878 resp->attrset = 0;
1879 goto out;
1880 }
1881
1882 /* Get "before" change value */
1883 bva.va_mask = AT_CTIME|AT_SEQ|AT_MODE;
1884 error = VOP_GETATTR(dvp, &bva, 0, cr, NULL);
1885 if (error) {
1886 *cs->statusp = resp->status = puterrno4(error);
1887 if (name != nm)
1888 kmem_free(name, MAXPATHLEN + 1);
1889 kmem_free(nm, len);
1890 nfs4_ntov_table_free(&ntov, &sarg);
1891 resp->attrset = 0;
1892 goto out;
1893 }
1894 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
1895
1896 vap = sarg.vap;
1897
1898 /*
1899 * Set the default initial values for attributes when the parent
1900 * directory does not have the VSUID/VSGID bit set and they have
1901 * not been specified in createattrs.
1902 */
1903 if (!(bva.va_mode & VSUID) && (vap->va_mask & AT_UID) == 0) {
1904 vap->va_uid = crgetuid(cr);
1905 vap->va_mask |= AT_UID;
1906 }
1907 if (!(bva.va_mode & VSGID) && (vap->va_mask & AT_GID) == 0) {
1908 vap->va_gid = crgetgid(cr);
1909 vap->va_mask |= AT_GID;
1910 }
1911
1912 vap->va_mask |= AT_TYPE;
1913 switch (args->type) {
1914 case NF4DIR:
1915 vap->va_type = VDIR;
1916 if ((vap->va_mask & AT_MODE) == 0) {
1917 vap->va_mode = 0700; /* default: owner rwx only */
1918 vap->va_mask |= AT_MODE;
1919 }
1920 error = VOP_MKDIR(dvp, name, vap, &vp, cr, NULL, 0, NULL);
1921 if (error)
1922 break;
1923
1924 /*
1925 * Get the initial "after" sequence number, if it fails,
1926 * set to zero
1927 */
1928 iva.va_mask = AT_SEQ;
1929 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1930 iva.va_seq = 0;
1931 break;
1932 case NF4LNK:
1933 vap->va_type = VLNK;
1934 if ((vap->va_mask & AT_MODE) == 0) {
1935 vap->va_mode = 0700; /* default: owner rwx only */
1936 vap->va_mask |= AT_MODE;
1937 }
1938
1939 /*
1940 * symlink names must be treated as data
1941 */
1942 lnm = utf8_to_str((utf8string *)&args->ftype4_u.linkdata,
1943 &llen, NULL);
1944
1945 if (lnm == NULL) {
1946 *cs->statusp = resp->status = NFS4ERR_INVAL;
1947 if (name != nm)
1948 kmem_free(name, MAXPATHLEN + 1);
1949 kmem_free(nm, len);
1950 nfs4_ntov_table_free(&ntov, &sarg);
1951 resp->attrset = 0;
1952 goto out;
1953 }
1954
1955 if (llen > MAXPATHLEN) {
1956 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1957 if (name != nm)
1958 kmem_free(name, MAXPATHLEN + 1);
1959 kmem_free(nm, len);
1960 kmem_free(lnm, llen);
1961 nfs4_ntov_table_free(&ntov, &sarg);
1962 resp->attrset = 0;
1963 goto out;
1964 }
1965
1966 lname = nfscmd_convname(ca, cs->exi, lnm,
1967 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1968
1969 if (lname == NULL) {
1970 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
1971 if (name != nm)
1972 kmem_free(name, MAXPATHLEN + 1);
1973 kmem_free(nm, len);
1974 kmem_free(lnm, llen);
1975 nfs4_ntov_table_free(&ntov, &sarg);
1976 resp->attrset = 0;
1977 goto out;
1978 }
1979
1980 error = VOP_SYMLINK(dvp, name, vap, lname, cr, NULL, 0);
1981 if (lname != lnm)
1982 kmem_free(lname, MAXPATHLEN + 1);
1983 kmem_free(lnm, llen);
1984 if (error)
1985 break;
1986
1987 /*
1988 * Get the initial "after" sequence number, if it fails,
1989 * set to zero
1990 */
1991 iva.va_mask = AT_SEQ;
1992 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1993 iva.va_seq = 0;
1994
1995 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
1996 NULL, NULL, NULL);
1997 if (error)
1998 break;
1999
2000 /*
2001 * va_seq is not safe over VOP calls, check it again
2002 * if it has changed zero out iva to force atomic = FALSE.
2003 */
2004 iva2.va_mask = AT_SEQ;
2005 if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, NULL) ||
2006 iva2.va_seq != iva.va_seq)
2007 iva.va_seq = 0;
2008 break;
2009 default:
2010 /*
2011 * probably a special file.
2012 */
2013 if ((vap->va_mask & AT_MODE) == 0) {
2014 vap->va_mode = 0600; /* default: owner rw only */
2015 vap->va_mask |= AT_MODE;
2016 }
2017 syncval = FNODSYNC;
2018 /*
2019 * We know this will only generate one VOP call
2020 */
2021 vp = do_rfs4_op_mknod(args, resp, req, cs, vap, name);
2022
2023 if (vp == NULL) {
2024 if (name != nm)
2025 kmem_free(name, MAXPATHLEN + 1);
2026 kmem_free(nm, len);
2027 nfs4_ntov_table_free(&ntov, &sarg);
2028 resp->attrset = 0;
2029 goto out;
2030 }
2031
2032 /*
2033 * Get the initial "after" sequence number, if it fails,
2034 * set to zero
2035 */
2036 iva.va_mask = AT_SEQ;
2037 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
2038 iva.va_seq = 0;
2039
2040 break;
2041 }
2042 if (name != nm)
2043 kmem_free(name, MAXPATHLEN + 1);
2044 kmem_free(nm, len);
2045
2046 if (error) {
2047 *cs->statusp = resp->status = puterrno4(error);
2048 }
2049
2050 /*
2051 * Force modified data and metadata out to stable storage.
2052 */
2053 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2054
2055 if (resp->status != NFS4_OK) {
2056 if (vp != NULL)
2057 VN_RELE(vp);
2058 nfs4_ntov_table_free(&ntov, &sarg);
2059 resp->attrset = 0;
2060 goto out;
2061 }
2062
2063 /*
2064 * Finish setup of cinfo response, "before" value already set.
2065 * Get "after" change value, if it fails, simply return the
2066 * before value.
2067 */
2068 ava.va_mask = AT_CTIME|AT_SEQ;
2069 if (VOP_GETATTR(dvp, &ava, 0, cr, NULL)) {
2070 ava.va_ctime = bva.va_ctime;
2071 ava.va_seq = 0;
2072 }
2073 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
2074
2075 /*
2076 * True verification that object was created with correct
2077 * attrs is impossible. The attrs could have been changed
2078 * immediately after object creation. If attributes did
2079 * not verify, the only recourse for the server is to
2080 * destroy the object. Maybe if some attrs (like gid)
2081 * are set incorrectly, the object should be destroyed;
2082 * however, seems bad as a default policy. Do we really
2083 * want to destroy an object over one of the times not
2084 * verifying correctly? For these reasons, the server
2085 * currently sets bits in attrset for createattrs
2086 * that were set; however, no verification is done.
2087 *
2088 * vmask_to_nmask accounts for vattr bits set on create
2089 * [do_rfs4_set_attrs() only sets resp bits for
2090 * non-vattr/vfs bits.]
2091 * Mask off any bits set by default so as not to return
2092 * more attrset bits than were requested in createattrs
2093 */
2094 nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
2095 resp->attrset &= args->createattrs.attrmask;
2096 nfs4_ntov_table_free(&ntov, &sarg);
2097
2098 error = makefh4(&cs->fh, vp, cs->exi);
2099 if (error) {
2100 *cs->statusp = resp->status = puterrno4(error);
2101 }
2102
2103 /*
2104 * The cinfo.atomic = TRUE only if we got no errors, we have
2105 * non-zero va_seq's, and it has incremented by exactly one
2106 * during the creation and it didn't change during the VOP_LOOKUP
2107 * or VOP_FSYNC.
2108 */
2109 if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
2110 iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
2111 resp->cinfo.atomic = TRUE;
2112 else
2113 resp->cinfo.atomic = FALSE;
2114
2115 /*
2116 * Force modified metadata out to stable storage.
2117 *
2118 * if a underlying vp exists, pass it to VOP_FSYNC
2119 */
2120 if (VOP_REALVP(vp, &realvp, NULL) == 0)
2121 (void) VOP_FSYNC(realvp, syncval, cr, NULL);
2122 else
2123 (void) VOP_FSYNC(vp, syncval, cr, NULL);
2124
2125 if (resp->status != NFS4_OK) {
2126 VN_RELE(vp);
2127 goto out;
2128 }
2129 if (cs->vp)
2130 VN_RELE(cs->vp);
2131
2132 cs->vp = vp;
2133 *cs->statusp = resp->status = NFS4_OK;
2134 out:
2135 DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs,
2136 CREATE4res *, resp);
2137 }
2138
2139 /*ARGSUSED*/
2140 static void
rfs4_op_delegpurge(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)2141 rfs4_op_delegpurge(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2142 struct compound_state *cs)
2143 {
2144 DTRACE_NFSV4_2(op__delegpurge__start, struct compound_state *, cs,
2145 DELEGPURGE4args *, &argop->nfs_argop4_u.opdelegpurge);
2146
2147 rfs4_op_inval(argop, resop, req, cs);
2148
2149 DTRACE_NFSV4_2(op__delegpurge__done, struct compound_state *, cs,
2150 DELEGPURGE4res *, &resop->nfs_resop4_u.opdelegpurge);
2151 }
2152
2153 /*ARGSUSED*/
2154 static void
rfs4_op_delegreturn(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)2155 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2156 struct compound_state *cs)
2157 {
2158 DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
2159 DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
2160 rfs4_deleg_state_t *dsp;
2161 nfsstat4 status;
2162
2163 DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs,
2164 DELEGRETURN4args *, args);
2165
2166 status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
2167 resp->status = *cs->statusp = status;
2168 if (status != NFS4_OK)
2169 goto out;
2170
2171 /* Ensure specified filehandle matches */
2172 if (cs->vp != dsp->rds_finfo->rf_vp) {
2173 resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
2174 } else
2175 rfs4_return_deleg(dsp, FALSE);
2176
2177 rfs4_update_lease(dsp->rds_client);
2178
2179 rfs4_deleg_state_rele(dsp);
2180 out:
2181 DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs,
2182 DELEGRETURN4res *, resp);
2183 }
2184
2185 /*
2186 * Check to see if a given "flavor" is an explicitly shared flavor.
2187 * The assumption of this routine is the "flavor" is already a valid
2188 * flavor in the secinfo list of "exi".
2189 *
2190 * e.g.
2191 * # share -o sec=flavor1 /export
2192 * # share -o sec=flavor2 /export/home
2193 *
2194 * flavor2 is not an explicitly shared flavor for /export,
2195 * however it is in the secinfo list for /export thru the
2196 * server namespace setup.
2197 */
2198 int
is_exported_sec(int flavor,struct exportinfo * exi)2199 is_exported_sec(int flavor, struct exportinfo *exi)
2200 {
2201 int i;
2202 struct secinfo *sp;
2203
2204 sp = exi->exi_export.ex_secinfo;
2205 for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
2206 if (flavor == sp[i].s_secinfo.sc_nfsnum ||
2207 sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
2208 return (SEC_REF_EXPORTED(&sp[i]));
2209 }
2210 }
2211
2212 /* Should not reach this point based on the assumption */
2213 return (0);
2214 }
2215
2216 /*
2217 * Check if the security flavor used in the request matches what is
2218 * required at the export point or at the root pseudo node (exi_root).
2219 *
2220 * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
2221 *
2222 */
2223 static int
secinfo_match_or_authnone(struct compound_state * cs)2224 secinfo_match_or_authnone(struct compound_state *cs)
2225 {
2226 int i;
2227 struct secinfo *sp;
2228
2229 /*
2230 * Check cs->nfsflavor (from the request) against
2231 * the current export data in cs->exi.
2232 */
2233 sp = cs->exi->exi_export.ex_secinfo;
2234 for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
2235 if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
2236 sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
2237 return (1);
2238 }
2239
2240 return (0);
2241 }
2242
2243 /*
2244 * Check the access authority for the client and return the correct error.
2245 */
2246 nfsstat4
call_checkauth4(struct compound_state * cs,struct svc_req * req)2247 call_checkauth4(struct compound_state *cs, struct svc_req *req)
2248 {
2249 int authres;
2250
2251 /*
2252 * First, check if the security flavor used in the request
2253 * are among the flavors set in the server namespace.
2254 */
2255 if (!secinfo_match_or_authnone(cs)) {
2256 *cs->statusp = NFS4ERR_WRONGSEC;
2257 return (*cs->statusp);
2258 }
2259
2260 authres = checkauth4(cs, req);
2261
2262 if (authres > 0) {
2263 *cs->statusp = NFS4_OK;
2264 if (! (cs->access & CS_ACCESS_LIMITED))
2265 cs->access = CS_ACCESS_OK;
2266 } else if (authres == 0) {
2267 *cs->statusp = NFS4ERR_ACCESS;
2268 } else if (authres == -2) {
2269 *cs->statusp = NFS4ERR_WRONGSEC;
2270 } else {
2271 *cs->statusp = NFS4ERR_DELAY;
2272 }
2273 return (*cs->statusp);
2274 }
2275
2276 /*
2277 * bitmap4_to_attrmask is called by getattr and readdir.
2278 * It sets up the vattr mask and determines whether vfsstat call is needed
2279 * based on the input bitmap.
2280 * Returns nfsv4 status.
2281 */
2282 static nfsstat4
bitmap4_to_attrmask(bitmap4 breq,struct nfs4_svgetit_arg * sargp)2283 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
2284 {
2285 int i;
2286 uint_t va_mask;
2287 struct statvfs64 *sbp = sargp->sbp;
2288
2289 sargp->sbp = NULL;
2290 sargp->flag = 0;
2291 sargp->rdattr_error = NFS4_OK;
2292 sargp->mntdfid_set = FALSE;
2293 if (sargp->cs->vp)
2294 sargp->xattr = get_fh4_flag(&sargp->cs->fh,
2295 FH4_ATTRDIR | FH4_NAMEDATTR);
2296 else
2297 sargp->xattr = 0;
2298
2299 /*
2300 * Set rdattr_error_req to true if return error per
2301 * failed entry rather than fail the readdir.
2302 */
2303 if (breq & FATTR4_RDATTR_ERROR_MASK)
2304 sargp->rdattr_error_req = 1;
2305 else
2306 sargp->rdattr_error_req = 0;
2307
2308 /*
2309 * generate the va_mask
2310 * Handle the easy cases first
2311 */
2312 switch (breq) {
2313 case NFS4_NTOV_ATTR_MASK:
2314 sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
2315 return (NFS4_OK);
2316
2317 case NFS4_FS_ATTR_MASK:
2318 sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
2319 sargp->sbp = sbp;
2320 return (NFS4_OK);
2321
2322 case NFS4_NTOV_ATTR_CACHE_MASK:
2323 sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
2324 return (NFS4_OK);
2325
2326 case FATTR4_LEASE_TIME_MASK:
2327 sargp->vap->va_mask = 0;
2328 return (NFS4_OK);
2329
2330 default:
2331 va_mask = 0;
2332 for (i = 0; i < nfs4_ntov_map_size; i++) {
2333 if ((breq & nfs4_ntov_map[i].fbit) &&
2334 nfs4_ntov_map[i].vbit)
2335 va_mask |= nfs4_ntov_map[i].vbit;
2336 }
2337
2338 /*
2339 * Check is vfsstat is needed
2340 */
2341 if (breq & NFS4_FS_ATTR_MASK)
2342 sargp->sbp = sbp;
2343
2344 sargp->vap->va_mask = va_mask;
2345 return (NFS4_OK);
2346 }
2347 /* NOTREACHED */
2348 }
2349
2350 /*
2351 * bitmap4_get_sysattrs is called by getattr and readdir.
2352 * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
2353 * Returns nfsv4 status.
2354 */
2355 static nfsstat4
bitmap4_get_sysattrs(struct nfs4_svgetit_arg * sargp)2356 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
2357 {
2358 int error;
2359 struct compound_state *cs = sargp->cs;
2360 vnode_t *vp = cs->vp;
2361
2362 if (sargp->sbp != NULL) {
2363 error = VFS_STATVFS(vp->v_vfsp, sargp->sbp);
2364 if (error != 0) {
2365 sargp->sbp = NULL; /* to identify error */
2366 return (puterrno4(error));
2367 }
2368 }
2369
2370 return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
2371 }
2372
2373 static void
nfs4_ntov_table_init(struct nfs4_ntov_table * ntovp)2374 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
2375 {
2376 ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
2377 KM_SLEEP);
2378 ntovp->attrcnt = 0;
2379 ntovp->vfsstat = FALSE;
2380 }
2381
2382 static void
nfs4_ntov_table_free(struct nfs4_ntov_table * ntovp,struct nfs4_svgetit_arg * sargp)2383 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
2384 struct nfs4_svgetit_arg *sargp)
2385 {
2386 int i;
2387 union nfs4_attr_u *na;
2388 uint8_t *amap;
2389
2390 /*
2391 * XXX Should do the same checks for whether the bit is set
2392 */
2393 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2394 i < ntovp->attrcnt; i++, na++, amap++) {
2395 (void) (*nfs4_ntov_map[*amap].sv_getit)(
2396 NFS4ATTR_FREEIT, sargp, na);
2397 }
2398 if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
2399 /*
2400 * xdr_free for getattr will be done later
2401 */
2402 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2403 i < ntovp->attrcnt; i++, na++, amap++) {
2404 xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
2405 }
2406 }
2407 kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
2408 }
2409
2410 /*
2411 * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
2412 */
2413 static nfsstat4
do_rfs4_op_getattr(bitmap4 breq,fattr4 * fattrp,struct nfs4_svgetit_arg * sargp)2414 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
2415 struct nfs4_svgetit_arg *sargp)
2416 {
2417 int error = 0;
2418 int i, k;
2419 struct nfs4_ntov_table ntov;
2420 XDR xdr;
2421 ulong_t xdr_size;
2422 char *xdr_attrs;
2423 nfsstat4 status = NFS4_OK;
2424 nfsstat4 prev_rdattr_error = sargp->rdattr_error;
2425 union nfs4_attr_u *na;
2426 uint8_t *amap;
2427
2428 sargp->op = NFS4ATTR_GETIT;
2429 sargp->flag = 0;
2430
2431 fattrp->attrmask = 0;
2432 /* if no bits requested, then return empty fattr4 */
2433 if (breq == 0) {
2434 fattrp->attrlist4_len = 0;
2435 fattrp->attrlist4 = NULL;
2436 return (NFS4_OK);
2437 }
2438
2439 /*
2440 * return NFS4ERR_INVAL when client requests write-only attrs
2441 */
2442 if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
2443 return (NFS4ERR_INVAL);
2444
2445 nfs4_ntov_table_init(&ntov);
2446 na = ntov.na;
2447 amap = ntov.amap;
2448
2449 /*
2450 * Now loop to get or verify the attrs
2451 */
2452 for (i = 0; i < nfs4_ntov_map_size; i++) {
2453 if (breq & nfs4_ntov_map[i].fbit) {
2454 if ((*nfs4_ntov_map[i].sv_getit)(
2455 NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
2456
2457 error = (*nfs4_ntov_map[i].sv_getit)(
2458 NFS4ATTR_GETIT, sargp, na);
2459
2460 /*
2461 * Possible error values:
2462 * >0 if sv_getit failed to
2463 * get the attr; 0 if succeeded;
2464 * <0 if rdattr_error and the
2465 * attribute cannot be returned.
2466 */
2467 if (error && !(sargp->rdattr_error_req))
2468 goto done;
2469 /*
2470 * If error then just for entry
2471 */
2472 if (error == 0) {
2473 fattrp->attrmask |=
2474 nfs4_ntov_map[i].fbit;
2475 *amap++ =
2476 (uint8_t)nfs4_ntov_map[i].nval;
2477 na++;
2478 (ntov.attrcnt)++;
2479 } else if ((error > 0) &&
2480 (sargp->rdattr_error == NFS4_OK)) {
2481 sargp->rdattr_error = puterrno4(error);
2482 }
2483 error = 0;
2484 }
2485 }
2486 }
2487
2488 /*
2489 * If rdattr_error was set after the return value for it was assigned,
2490 * update it.
2491 */
2492 if (prev_rdattr_error != sargp->rdattr_error) {
2493 na = ntov.na;
2494 amap = ntov.amap;
2495 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2496 k = *amap;
2497 if (k < FATTR4_RDATTR_ERROR) {
2498 continue;
2499 }
2500 if ((k == FATTR4_RDATTR_ERROR) &&
2501 ((*nfs4_ntov_map[k].sv_getit)(
2502 NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
2503
2504 (void) (*nfs4_ntov_map[k].sv_getit)(
2505 NFS4ATTR_GETIT, sargp, na);
2506 }
2507 break;
2508 }
2509 }
2510
2511 xdr_size = 0;
2512 na = ntov.na;
2513 amap = ntov.amap;
2514 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2515 xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
2516 }
2517
2518 fattrp->attrlist4_len = xdr_size;
2519 if (xdr_size) {
2520 /* freed by rfs4_op_getattr_free() */
2521 fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
2522
2523 xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
2524
2525 na = ntov.na;
2526 amap = ntov.amap;
2527 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2528 if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
2529 DTRACE_PROBE1(nfss__e__getattr4_encfail,
2530 int, *amap);
2531 status = NFS4ERR_SERVERFAULT;
2532 break;
2533 }
2534 }
2535 /* xdrmem_destroy(&xdrs); */ /* NO-OP */
2536 } else {
2537 fattrp->attrlist4 = NULL;
2538 }
2539 done:
2540
2541 nfs4_ntov_table_free(&ntov, sargp);
2542
2543 if (error != 0)
2544 status = puterrno4(error);
2545
2546 return (status);
2547 }
2548
2549 /* ARGSUSED */
2550 static void
rfs4_op_getattr(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)2551 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2552 struct compound_state *cs)
2553 {
2554 GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
2555 GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2556 struct nfs4_svgetit_arg sarg;
2557 struct statvfs64 sb;
2558 nfsstat4 status;
2559
2560 DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs,
2561 GETATTR4args *, args);
2562
2563 if (cs->vp == NULL) {
2564 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2565 goto out;
2566 }
2567
2568 if (cs->access == CS_ACCESS_DENIED) {
2569 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2570 goto out;
2571 }
2572
2573 sarg.sbp = &sb;
2574 sarg.cs = cs;
2575 sarg.is_referral = B_FALSE;
2576
2577 status = bitmap4_to_attrmask(args->attr_request, &sarg);
2578 if (status == NFS4_OK) {
2579
2580 status = bitmap4_get_sysattrs(&sarg);
2581 if (status == NFS4_OK) {
2582
2583 /* Is this a referral? */
2584 if (vn_is_nfs_reparse(cs->vp, cs->cr)) {
2585 /* Older V4 Solaris client sees a link */
2586 if (client_is_downrev(req))
2587 sarg.vap->va_type = VLNK;
2588 else
2589 sarg.is_referral = B_TRUE;
2590 }
2591
2592 status = do_rfs4_op_getattr(args->attr_request,
2593 &resp->obj_attributes, &sarg);
2594 }
2595 }
2596 *cs->statusp = resp->status = status;
2597 out:
2598 DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs,
2599 GETATTR4res *, resp);
2600 }
2601
2602 static void
rfs4_op_getattr_free(nfs_resop4 * resop)2603 rfs4_op_getattr_free(nfs_resop4 *resop)
2604 {
2605 GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2606
2607 nfs4_fattr4_free(&resp->obj_attributes);
2608 }
2609
2610 /* ARGSUSED */
2611 static void
rfs4_op_getfh(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)2612 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2613 struct compound_state *cs)
2614 {
2615 GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2616
2617 DTRACE_NFSV4_1(op__getfh__start, struct compound_state *, cs);
2618
2619 if (cs->vp == NULL) {
2620 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2621 goto out;
2622 }
2623 if (cs->access == CS_ACCESS_DENIED) {
2624 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2625 goto out;
2626 }
2627
2628 /* check for reparse point at the share point */
2629 if (cs->exi->exi_moved || vn_is_nfs_reparse(cs->exi->exi_vp, cs->cr)) {
2630 /* it's all bad */
2631 cs->exi->exi_moved = 1;
2632 *cs->statusp = resp->status = NFS4ERR_MOVED;
2633 DTRACE_PROBE2(nfs4serv__func__referral__shared__moved,
2634 vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2635 return;
2636 }
2637
2638 /* check for reparse point at vp */
2639 if (vn_is_nfs_reparse(cs->vp, cs->cr) && !client_is_downrev(req)) {
2640 /* it's not all bad */
2641 *cs->statusp = resp->status = NFS4ERR_MOVED;
2642 DTRACE_PROBE2(nfs4serv__func__referral__moved,
2643 vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2644 return;
2645 }
2646
2647 resp->object.nfs_fh4_val =
2648 kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
2649 nfs_fh4_copy(&cs->fh, &resp->object);
2650 *cs->statusp = resp->status = NFS4_OK;
2651 out:
2652 DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs,
2653 GETFH4res *, resp);
2654 }
2655
2656 static void
rfs4_op_getfh_free(nfs_resop4 * resop)2657 rfs4_op_getfh_free(nfs_resop4 *resop)
2658 {
2659 GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2660
2661 if (resp->status == NFS4_OK &&
2662 resp->object.nfs_fh4_val != NULL) {
2663 kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
2664 resp->object.nfs_fh4_val = NULL;
2665 resp->object.nfs_fh4_len = 0;
2666 }
2667 }
2668
2669 /*
2670 * illegal: args: void
2671 * res : status (NFS4ERR_OP_ILLEGAL)
2672 */
2673 /* ARGSUSED */
2674 static void
rfs4_op_illegal(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)2675 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
2676 struct svc_req *req, struct compound_state *cs)
2677 {
2678 ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
2679
2680 resop->resop = OP_ILLEGAL;
2681 *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
2682 }
2683
2684 /* ARGSUSED */
2685 static void
rfs4_op_notsup(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)2686 rfs4_op_notsup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2687 struct compound_state *cs)
2688 {
2689 *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_NOTSUPP;
2690 }
2691
2692 /*
2693 * link: args: SAVED_FH: file, CURRENT_FH: target directory
2694 * res: status. If success - CURRENT_FH unchanged, return change_info
2695 */
2696 /* ARGSUSED */
2697 static void
rfs4_op_link(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)2698 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2699 struct compound_state *cs)
2700 {
2701 LINK4args *args = &argop->nfs_argop4_u.oplink;
2702 LINK4res *resp = &resop->nfs_resop4_u.oplink;
2703 int error;
2704 vnode_t *vp;
2705 vnode_t *dvp;
2706 struct vattr bdva, idva, adva;
2707 char *nm;
2708 uint_t len;
2709 struct sockaddr *ca;
2710 char *name = NULL;
2711 nfsstat4 status;
2712
2713 DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs,
2714 LINK4args *, args);
2715
2716 /* SAVED_FH: source object */
2717 vp = cs->saved_vp;
2718 if (vp == NULL) {
2719 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2720 goto out;
2721 }
2722
2723 /* CURRENT_FH: target directory */
2724 dvp = cs->vp;
2725 if (dvp == NULL) {
2726 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2727 goto out;
2728 }
2729
2730 /*
2731 * If there is a non-shared filesystem mounted on this vnode,
2732 * do not allow to link any file in this directory.
2733 */
2734 if (vn_ismntpt(dvp)) {
2735 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2736 goto out;
2737 }
2738
2739 if (cs->access == CS_ACCESS_DENIED) {
2740 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2741 goto out;
2742 }
2743
2744 /* Check source object's type validity */
2745 if (vp->v_type == VDIR) {
2746 *cs->statusp = resp->status = NFS4ERR_ISDIR;
2747 goto out;
2748 }
2749
2750 /* Check target directory's type */
2751 if (dvp->v_type != VDIR) {
2752 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2753 goto out;
2754 }
2755
2756 if (cs->saved_exi != cs->exi) {
2757 *cs->statusp = resp->status = NFS4ERR_XDEV;
2758 goto out;
2759 }
2760
2761 status = utf8_dir_verify(&args->newname);
2762 if (status != NFS4_OK) {
2763 *cs->statusp = resp->status = status;
2764 goto out;
2765 }
2766
2767 nm = utf8_to_fn(&args->newname, &len, NULL);
2768 if (nm == NULL) {
2769 *cs->statusp = resp->status = NFS4ERR_INVAL;
2770 goto out;
2771 }
2772
2773 if (len > MAXNAMELEN) {
2774 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2775 kmem_free(nm, len);
2776 goto out;
2777 }
2778
2779 if (rdonly4(req, cs)) {
2780 *cs->statusp = resp->status = NFS4ERR_ROFS;
2781 kmem_free(nm, len);
2782 goto out;
2783 }
2784
2785 /* Get "before" change value */
2786 bdva.va_mask = AT_CTIME|AT_SEQ;
2787 error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
2788 if (error) {
2789 *cs->statusp = resp->status = puterrno4(error);
2790 kmem_free(nm, len);
2791 goto out;
2792 }
2793
2794 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2795 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2796 MAXPATHLEN + 1);
2797
2798 if (name == NULL) {
2799 *cs->statusp = resp->status = NFS4ERR_INVAL;
2800 kmem_free(nm, len);
2801 goto out;
2802 }
2803
2804 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
2805
2806 error = VOP_LINK(dvp, vp, name, cs->cr, NULL, 0);
2807
2808 if (nm != name)
2809 kmem_free(name, MAXPATHLEN + 1);
2810 kmem_free(nm, len);
2811
2812 /*
2813 * Get the initial "after" sequence number, if it fails, set to zero
2814 */
2815 idva.va_mask = AT_SEQ;
2816 if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
2817 idva.va_seq = 0;
2818
2819 /*
2820 * Force modified data and metadata out to stable storage.
2821 */
2822 (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
2823 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
2824
2825 if (error) {
2826 *cs->statusp = resp->status = puterrno4(error);
2827 goto out;
2828 }
2829
2830 /*
2831 * Get "after" change value, if it fails, simply return the
2832 * before value.
2833 */
2834 adva.va_mask = AT_CTIME|AT_SEQ;
2835 if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
2836 adva.va_ctime = bdva.va_ctime;
2837 adva.va_seq = 0;
2838 }
2839
2840 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
2841
2842 /*
2843 * The cinfo.atomic = TRUE only if we have
2844 * non-zero va_seq's, and it has incremented by exactly one
2845 * during the VOP_LINK and it didn't change during the VOP_FSYNC.
2846 */
2847 if (bdva.va_seq && idva.va_seq && adva.va_seq &&
2848 idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
2849 resp->cinfo.atomic = TRUE;
2850 else
2851 resp->cinfo.atomic = FALSE;
2852
2853 *cs->statusp = resp->status = NFS4_OK;
2854 out:
2855 DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs,
2856 LINK4res *, resp);
2857 }
2858
2859 /*
2860 * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
2861 */
2862
2863 /* ARGSUSED */
2864 static nfsstat4
do_rfs4_op_lookup(char * nm,struct svc_req * req,struct compound_state * cs)2865 do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs)
2866 {
2867 int error;
2868 int different_export = 0;
2869 vnode_t *vp, *pre_tvp = NULL, *oldvp = NULL;
2870 struct exportinfo *exi = NULL, *pre_exi = NULL;
2871 nfsstat4 stat;
2872 fid_t fid;
2873 int attrdir, dotdot, walk;
2874 bool_t is_newvp = FALSE;
2875
2876 if (cs->vp->v_flag & V_XATTRDIR) {
2877 attrdir = 1;
2878 ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2879 } else {
2880 attrdir = 0;
2881 ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2882 }
2883
2884 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2885
2886 /*
2887 * If dotdotting, then need to check whether it's
2888 * above the root of a filesystem, or above an
2889 * export point.
2890 */
2891 if (dotdot) {
2892 vnode_t *zone_rootvp;
2893
2894 ASSERT(cs->exi != NULL);
2895 zone_rootvp = cs->exi->exi_ne->exi_root->exi_vp;
2896 /*
2897 * If dotdotting at the root of a filesystem, then
2898 * need to traverse back to the mounted-on filesystem
2899 * and do the dotdot lookup there.
2900 */
2901 if ((cs->vp->v_flag & VROOT) || VN_CMP(cs->vp, zone_rootvp)) {
2902
2903 /*
2904 * If at the system root, then can
2905 * go up no further.
2906 */
2907 if (VN_CMP(cs->vp, zone_rootvp))
2908 return (puterrno4(ENOENT));
2909
2910 /*
2911 * Traverse back to the mounted-on filesystem
2912 */
2913 cs->vp = untraverse(cs->vp, zone_rootvp);
2914
2915 /*
2916 * Set the different_export flag so we remember
2917 * to pick up a new exportinfo entry for
2918 * this new filesystem.
2919 */
2920 different_export = 1;
2921 } else {
2922
2923 /*
2924 * If dotdotting above an export point then set
2925 * the different_export to get new export info.
2926 */
2927 different_export = nfs_exported(cs->exi, cs->vp);
2928 }
2929 }
2930
2931 error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
2932 NULL, NULL, NULL);
2933 if (error)
2934 return (puterrno4(error));
2935
2936 /*
2937 * If the vnode is in a pseudo filesystem, check whether it is visible.
2938 *
2939 * XXX if the vnode is a symlink and it is not visible in
2940 * a pseudo filesystem, return ENOENT (not following symlink).
2941 * V4 client can not mount such symlink. This is a regression
2942 * from V2/V3.
2943 *
2944 * In the same exported filesystem, if the security flavor used
2945 * is not an explicitly shared flavor, limit the view to the visible
2946 * list entries only. This is not a WRONGSEC case because it's already
2947 * checked via PUTROOTFH/PUTPUBFH or PUTFH.
2948 */
2949 if (!different_export &&
2950 (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
2951 cs->access & CS_ACCESS_LIMITED)) {
2952 if (! nfs_visible(cs->exi, vp, &different_export)) {
2953 VN_RELE(vp);
2954 return (puterrno4(ENOENT));
2955 }
2956 }
2957
2958 /*
2959 * If it's a mountpoint, then traverse it.
2960 */
2961 if (vn_ismntpt(vp)) {
2962 pre_exi = cs->exi; /* save pre-traversed exportinfo */
2963 pre_tvp = vp; /* save pre-traversed vnode */
2964
2965 /*
2966 * hold pre_tvp to counteract rele by traverse. We will
2967 * need pre_tvp below if checkexport4 fails
2968 */
2969 VN_HOLD(pre_tvp);
2970 if ((error = traverse(&vp)) != 0) {
2971 VN_RELE(vp);
2972 VN_RELE(pre_tvp);
2973 return (puterrno4(error));
2974 }
2975 different_export = 1;
2976 } else if (vp->v_vfsp != cs->vp->v_vfsp) {
2977 /*
2978 * The vfsp comparison is to handle the case where
2979 * a LOFS mount is shared. lo_lookup traverses mount points,
2980 * and NFS is unaware of local fs transistions because
2981 * v_vfsmountedhere isn't set. For this special LOFS case,
2982 * the dir and the obj returned by lookup will have different
2983 * vfs ptrs.
2984 */
2985 different_export = 1;
2986 }
2987
2988 if (different_export) {
2989
2990 bzero(&fid, sizeof (fid));
2991 fid.fid_len = MAXFIDSZ;
2992 error = vop_fid_pseudo(vp, &fid);
2993 if (error) {
2994 VN_RELE(vp);
2995 if (pre_tvp)
2996 VN_RELE(pre_tvp);
2997 return (puterrno4(error));
2998 }
2999
3000 if (dotdot)
3001 exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
3002 else
3003 exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
3004
3005 if (exi == NULL) {
3006 if (pre_tvp) {
3007 /*
3008 * If this vnode is a mounted-on vnode,
3009 * but the mounted-on file system is not
3010 * exported, send back the filehandle for
3011 * the mounted-on vnode, not the root of
3012 * the mounted-on file system.
3013 */
3014 VN_RELE(vp);
3015 vp = pre_tvp;
3016 exi = pre_exi;
3017 } else {
3018 VN_RELE(vp);
3019 return (puterrno4(EACCES));
3020 }
3021 } else if (pre_tvp) {
3022 /* we're done with pre_tvp now. release extra hold */
3023 VN_RELE(pre_tvp);
3024 }
3025
3026 cs->exi = exi;
3027
3028 /*
3029 * Now we do a checkauth4. The reason is that
3030 * this client/user may not have access to the new
3031 * exported file system, and if they do,
3032 * the client/user may be mapped to a different uid.
3033 *
3034 * We start with a new cr, because the checkauth4 done
3035 * in the PUT*FH operation over wrote the cred's uid,
3036 * gid, etc, and we want the real thing before calling
3037 * checkauth4()
3038 */
3039 crfree(cs->cr);
3040 cs->cr = crdup(cs->basecr);
3041
3042 oldvp = cs->vp;
3043 cs->vp = vp;
3044 is_newvp = TRUE;
3045
3046 stat = call_checkauth4(cs, req);
3047 if (stat != NFS4_OK) {
3048 VN_RELE(cs->vp);
3049 cs->vp = oldvp;
3050 return (stat);
3051 }
3052 }
3053
3054 /*
3055 * After various NFS checks, do a label check on the path
3056 * component. The label on this path should either be the
3057 * global zone's label or a zone's label. We are only
3058 * interested in the zone's label because exported files
3059 * in global zone is accessible (though read-only) to
3060 * clients. The exportability/visibility check is already
3061 * done before reaching this code.
3062 */
3063 if (is_system_labeled()) {
3064 bslabel_t *clabel;
3065
3066 ASSERT(req->rq_label != NULL);
3067 clabel = req->rq_label;
3068 DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *,
3069 "got client label from request(1)", struct svc_req *, req);
3070
3071 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3072 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3073 cs->exi)) {
3074 error = EACCES;
3075 goto err_out;
3076 }
3077 } else {
3078 /*
3079 * We grant access to admin_low label clients
3080 * only if the client is trusted, i.e. also
3081 * running Solaris Trusted Extension.
3082 */
3083 struct sockaddr *ca;
3084 int addr_type;
3085 void *ipaddr;
3086 tsol_tpc_t *tp;
3087
3088 ca = (struct sockaddr *)svc_getrpccaller(
3089 req->rq_xprt)->buf;
3090 if (ca->sa_family == AF_INET) {
3091 addr_type = IPV4_VERSION;
3092 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
3093 } else if (ca->sa_family == AF_INET6) {
3094 addr_type = IPV6_VERSION;
3095 ipaddr = &((struct sockaddr_in6 *)
3096 ca)->sin6_addr;
3097 }
3098 tp = find_tpc(ipaddr, addr_type, B_FALSE);
3099 if (tp == NULL || tp->tpc_tp.tp_doi !=
3100 l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
3101 SUN_CIPSO) {
3102 if (tp != NULL)
3103 TPC_RELE(tp);
3104 error = EACCES;
3105 goto err_out;
3106 }
3107 TPC_RELE(tp);
3108 }
3109 }
3110
3111 error = makefh4(&cs->fh, vp, cs->exi);
3112
3113 err_out:
3114 if (error) {
3115 if (is_newvp) {
3116 VN_RELE(cs->vp);
3117 cs->vp = oldvp;
3118 } else
3119 VN_RELE(vp);
3120 return (puterrno4(error));
3121 }
3122
3123 if (!is_newvp) {
3124 if (cs->vp)
3125 VN_RELE(cs->vp);
3126 cs->vp = vp;
3127 } else if (oldvp)
3128 VN_RELE(oldvp);
3129
3130 /*
3131 * if did lookup on attrdir and didn't lookup .., set named
3132 * attr fh flag
3133 */
3134 if (attrdir && ! dotdot)
3135 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
3136
3137 /* Assume false for now, open proc will set this */
3138 cs->mandlock = FALSE;
3139
3140 return (NFS4_OK);
3141 }
3142
3143 /* ARGSUSED */
3144 static void
rfs4_op_lookup(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)3145 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3146 struct compound_state *cs)
3147 {
3148 LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
3149 LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
3150 char *nm;
3151 uint_t len;
3152 struct sockaddr *ca;
3153 char *name = NULL;
3154 nfsstat4 status;
3155
3156 DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs,
3157 LOOKUP4args *, args);
3158
3159 if (cs->vp == NULL) {
3160 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3161 goto out;
3162 }
3163
3164 if (cs->vp->v_type == VLNK) {
3165 *cs->statusp = resp->status = NFS4ERR_SYMLINK;
3166 goto out;
3167 }
3168
3169 if (cs->vp->v_type != VDIR) {
3170 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
3171 goto out;
3172 }
3173
3174 status = utf8_dir_verify(&args->objname);
3175 if (status != NFS4_OK) {
3176 *cs->statusp = resp->status = status;
3177 goto out;
3178 }
3179
3180 nm = utf8_to_str(&args->objname, &len, NULL);
3181 if (nm == NULL) {
3182 *cs->statusp = resp->status = NFS4ERR_INVAL;
3183 goto out;
3184 }
3185
3186 if (len > MAXNAMELEN) {
3187 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
3188 kmem_free(nm, len);
3189 goto out;
3190 }
3191
3192 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3193 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
3194 MAXPATHLEN + 1);
3195
3196 if (name == NULL) {
3197 *cs->statusp = resp->status = NFS4ERR_INVAL;
3198 kmem_free(nm, len);
3199 goto out;
3200 }
3201
3202 *cs->statusp = resp->status = do_rfs4_op_lookup(name, req, cs);
3203
3204 if (name != nm)
3205 kmem_free(name, MAXPATHLEN + 1);
3206 kmem_free(nm, len);
3207
3208 out:
3209 DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs,
3210 LOOKUP4res *, resp);
3211 }
3212
3213 /* ARGSUSED */
3214 static void
rfs4_op_lookupp(nfs_argop4 * args,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)3215 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3216 struct compound_state *cs)
3217 {
3218 LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
3219
3220 DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs);
3221
3222 if (cs->vp == NULL) {
3223 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3224 goto out;
3225 }
3226
3227 if (cs->vp->v_type == VLNK) {
3228 *cs->statusp = resp->status = NFS4ERR_SYMLINK;
3229 goto out;
3230 }
3231
3232 if (cs->vp->v_type != VDIR) {
3233 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
3234 goto out;
3235 }
3236
3237 *cs->statusp = resp->status = do_rfs4_op_lookup("..", req, cs);
3238
3239 /*
3240 * From NFSV4 Specification, LOOKUPP should not check for
3241 * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
3242 */
3243 if (resp->status == NFS4ERR_WRONGSEC) {
3244 *cs->statusp = resp->status = NFS4_OK;
3245 }
3246
3247 out:
3248 DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs,
3249 LOOKUPP4res *, resp);
3250 }
3251
3252
3253 /*ARGSUSED2*/
3254 static void
rfs4_op_openattr(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)3255 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3256 struct compound_state *cs)
3257 {
3258 OPENATTR4args *args = &argop->nfs_argop4_u.opopenattr;
3259 OPENATTR4res *resp = &resop->nfs_resop4_u.opopenattr;
3260 vnode_t *avp = NULL;
3261 int lookup_flags = LOOKUP_XATTR, error;
3262 int exp_ro = 0;
3263
3264 DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs,
3265 OPENATTR4args *, args);
3266
3267 if (cs->vp == NULL) {
3268 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3269 goto out;
3270 }
3271
3272 if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0 &&
3273 !vfs_has_feature(cs->vp->v_vfsp, VFSFT_SYSATTR_VIEWS)) {
3274 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3275 goto out;
3276 }
3277
3278 /*
3279 * If file system supports passing ACE mask to VOP_ACCESS then
3280 * check for ACE_READ_NAMED_ATTRS, otherwise do legacy checks
3281 */
3282
3283 if (vfs_has_feature(cs->vp->v_vfsp, VFSFT_ACEMASKONACCESS))
3284 error = VOP_ACCESS(cs->vp, ACE_READ_NAMED_ATTRS,
3285 V_ACE_MASK, cs->cr, NULL);
3286 else
3287 error = ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, NULL) != 0) &&
3288 (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, NULL) != 0) &&
3289 (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, NULL) != 0));
3290
3291 if (error) {
3292 *cs->statusp = resp->status = puterrno4(EACCES);
3293 goto out;
3294 }
3295
3296 /*
3297 * The CREATE_XATTR_DIR VOP flag cannot be specified if
3298 * the file system is exported read-only -- regardless of
3299 * createdir flag. Otherwise the attrdir would be created
3300 * (assuming server fs isn't mounted readonly locally). If
3301 * VOP_LOOKUP returns ENOENT in this case, the error will
3302 * be translated into EROFS. ENOSYS is mapped to ENOTSUP
3303 * because specfs has no VOP_LOOKUP op, so the macro would
3304 * return ENOSYS. EINVAL is returned by all (current)
3305 * Solaris file system implementations when any of their
3306 * restrictions are violated (xattr(dir) can't have xattrdir).
3307 * Returning NOTSUPP is more appropriate in this case
3308 * because the object will never be able to have an attrdir.
3309 */
3310 if (args->createdir && ! (exp_ro = rdonly4(req, cs)))
3311 lookup_flags |= CREATE_XATTR_DIR;
3312
3313 error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr,
3314 NULL, NULL, NULL);
3315
3316 if (error) {
3317 if (error == ENOENT && args->createdir && exp_ro)
3318 *cs->statusp = resp->status = puterrno4(EROFS);
3319 else if (error == EINVAL || error == ENOSYS)
3320 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3321 else
3322 *cs->statusp = resp->status = puterrno4(error);
3323 goto out;
3324 }
3325
3326 ASSERT(avp->v_flag & V_XATTRDIR);
3327
3328 error = makefh4(&cs->fh, avp, cs->exi);
3329
3330 if (error) {
3331 VN_RELE(avp);
3332 *cs->statusp = resp->status = puterrno4(error);
3333 goto out;
3334 }
3335
3336 VN_RELE(cs->vp);
3337 cs->vp = avp;
3338
3339 /*
3340 * There is no requirement for an attrdir fh flag
3341 * because the attrdir has a vnode flag to distinguish
3342 * it from regular (non-xattr) directories. The
3343 * FH4_ATTRDIR flag is set for future sanity checks.
3344 */
3345 set_fh4_flag(&cs->fh, FH4_ATTRDIR);
3346 *cs->statusp = resp->status = NFS4_OK;
3347
3348 out:
3349 DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs,
3350 OPENATTR4res *, resp);
3351 }
3352
3353 static int
do_io(int direction,vnode_t * vp,struct uio * uio,int ioflag,cred_t * cred,caller_context_t * ct)3354 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred,
3355 caller_context_t *ct)
3356 {
3357 int error;
3358 int i;
3359 clock_t delaytime;
3360
3361 delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
3362
3363 /*
3364 * Don't block on mandatory locks. If this routine returns
3365 * EAGAIN, the caller should return NFS4ERR_LOCKED.
3366 */
3367 uio->uio_fmode = FNONBLOCK;
3368
3369 for (i = 0; i < rfs4_maxlock_tries; i++) {
3370
3371
3372 if (direction == FREAD) {
3373 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct);
3374 error = VOP_READ(vp, uio, ioflag, cred, ct);
3375 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct);
3376 } else {
3377 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct);
3378 error = VOP_WRITE(vp, uio, ioflag, cred, ct);
3379 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct);
3380 }
3381
3382 if (error != EAGAIN)
3383 break;
3384
3385 if (i < rfs4_maxlock_tries - 1) {
3386 delay(delaytime);
3387 delaytime *= 2;
3388 }
3389 }
3390
3391 return (error);
3392 }
3393
3394 /* ARGSUSED */
3395 static void
rfs4_op_read(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)3396 rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3397 struct compound_state *cs)
3398 {
3399 READ4args *args = &argop->nfs_argop4_u.opread;
3400 READ4res *resp = &resop->nfs_resop4_u.opread;
3401 int error;
3402 int verror;
3403 vnode_t *vp;
3404 struct vattr va;
3405 struct iovec iov, *iovp = NULL;
3406 int iovcnt;
3407 struct uio uio;
3408 u_offset_t offset;
3409 bool_t *deleg = &cs->deleg;
3410 nfsstat4 stat;
3411 int in_crit = 0;
3412 mblk_t *mp = NULL;
3413 int alloc_err = 0;
3414 int rdma_used = 0;
3415 int loaned_buffers;
3416 caller_context_t ct;
3417 struct uio *uiop;
3418
3419 DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs,
3420 READ4args, args);
3421
3422 vp = cs->vp;
3423 if (vp == NULL) {
3424 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3425 goto out;
3426 }
3427 if (cs->access == CS_ACCESS_DENIED) {
3428 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3429 goto out;
3430 }
3431
3432 if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
3433 deleg, TRUE, &ct, cs)) != NFS4_OK) {
3434 *cs->statusp = resp->status = stat;
3435 goto out;
3436 }
3437
3438 /*
3439 * Enter the critical region before calling VOP_RWLOCK
3440 * to avoid a deadlock with write requests.
3441 */
3442 if (nbl_need_check(vp)) {
3443 nbl_start_crit(vp, RW_READER);
3444 in_crit = 1;
3445 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
3446 &ct)) {
3447 *cs->statusp = resp->status = NFS4ERR_LOCKED;
3448 goto out;
3449 }
3450 }
3451
3452 if (args->wlist) {
3453 if (args->count > clist_len(args->wlist)) {
3454 *cs->statusp = resp->status = NFS4ERR_INVAL;
3455 goto out;
3456 }
3457 rdma_used = 1;
3458 }
3459
3460 /* use loaned buffers for TCP */
3461 loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
3462
3463 va.va_mask = AT_MODE|AT_SIZE|AT_UID;
3464 verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3465
3466 /*
3467 * If we can't get the attributes, then we can't do the
3468 * right access checking. So, we'll fail the request.
3469 */
3470 if (verror) {
3471 *cs->statusp = resp->status = puterrno4(verror);
3472 goto out;
3473 }
3474
3475 if (vp->v_type != VREG) {
3476 *cs->statusp = resp->status =
3477 ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
3478 goto out;
3479 }
3480
3481 if (crgetuid(cs->cr) != va.va_uid &&
3482 (error = VOP_ACCESS(vp, VREAD, 0, cs->cr, &ct)) &&
3483 (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr, &ct))) {
3484 *cs->statusp = resp->status = puterrno4(error);
3485 goto out;
3486 }
3487
3488 if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
3489 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3490 goto out;
3491 }
3492
3493 offset = args->offset;
3494 if (offset >= va.va_size) {
3495 *cs->statusp = resp->status = NFS4_OK;
3496 resp->eof = TRUE;
3497 resp->data_len = 0;
3498 resp->data_val = NULL;
3499 resp->mblk = NULL;
3500 /* RDMA */
3501 resp->wlist = args->wlist;
3502 resp->wlist_len = resp->data_len;
3503 *cs->statusp = resp->status = NFS4_OK;
3504 if (resp->wlist)
3505 clist_zero_len(resp->wlist);
3506 goto out;
3507 }
3508
3509 if (args->count == 0) {
3510 *cs->statusp = resp->status = NFS4_OK;
3511 resp->eof = FALSE;
3512 resp->data_len = 0;
3513 resp->data_val = NULL;
3514 resp->mblk = NULL;
3515 /* RDMA */
3516 resp->wlist = args->wlist;
3517 resp->wlist_len = resp->data_len;
3518 if (resp->wlist)
3519 clist_zero_len(resp->wlist);
3520 goto out;
3521 }
3522
3523 /*
3524 * Do not allocate memory more than maximum allowed
3525 * transfer size
3526 */
3527 if (args->count > rfs4_tsize(req))
3528 args->count = rfs4_tsize(req);
3529
3530 if (loaned_buffers) {
3531 uiop = (uio_t *)rfs_setup_xuio(vp);
3532 ASSERT(uiop != NULL);
3533 uiop->uio_segflg = UIO_SYSSPACE;
3534 uiop->uio_loffset = args->offset;
3535 uiop->uio_resid = args->count;
3536
3537 /* Jump to do the read if successful */
3538 if (!VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cs->cr, &ct)) {
3539 /*
3540 * Need to hold the vnode until after VOP_RETZCBUF()
3541 * is called.
3542 */
3543 VN_HOLD(vp);
3544 goto doio_read;
3545 }
3546
3547 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
3548 uiop->uio_loffset, int, uiop->uio_resid);
3549
3550 uiop->uio_extflg = 0;
3551
3552 /* failure to setup for zero copy */
3553 rfs_free_xuio((void *)uiop);
3554 loaned_buffers = 0;
3555 }
3556
3557 /*
3558 * If returning data via RDMA Write, then grab the chunk list. If we
3559 * aren't returning READ data w/RDMA_WRITE, then grab a mblk.
3560 */
3561 if (rdma_used) {
3562 mp = NULL;
3563 (void) rdma_get_wchunk(req, &iov, args->wlist);
3564 uio.uio_iov = &iov;
3565 uio.uio_iovcnt = 1;
3566 } else {
3567 /*
3568 * mp will contain the data to be sent out in the read reply.
3569 * It will be freed after the reply has been sent.
3570 */
3571 mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
3572 ASSERT(mp != NULL);
3573 ASSERT(alloc_err == 0);
3574 uio.uio_iov = iovp;
3575 uio.uio_iovcnt = iovcnt;
3576 }
3577
3578 uio.uio_segflg = UIO_SYSSPACE;
3579 uio.uio_extflg = UIO_COPY_CACHED;
3580 uio.uio_loffset = args->offset;
3581 uio.uio_resid = args->count;
3582 uiop = &uio;
3583
3584 doio_read:
3585 error = do_io(FREAD, vp, uiop, 0, cs->cr, &ct);
3586
3587 va.va_mask = AT_SIZE;
3588 verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3589
3590 if (error) {
3591 if (mp)
3592 freemsg(mp);
3593 *cs->statusp = resp->status = puterrno4(error);
3594 goto out;
3595 }
3596
3597 /* make mblk using zc buffers */
3598 if (loaned_buffers) {
3599 mp = uio_to_mblk(uiop);
3600 ASSERT(mp != NULL);
3601 }
3602
3603 *cs->statusp = resp->status = NFS4_OK;
3604
3605 ASSERT(uiop->uio_resid >= 0);
3606 resp->data_len = args->count - uiop->uio_resid;
3607 if (mp) {
3608 resp->data_val = (char *)mp->b_datap->db_base;
3609 rfs_rndup_mblks(mp, resp->data_len, loaned_buffers);
3610 } else {
3611 resp->data_val = (caddr_t)iov.iov_base;
3612 }
3613
3614 resp->mblk = mp;
3615
3616 if (!verror && offset + resp->data_len == va.va_size)
3617 resp->eof = TRUE;
3618 else
3619 resp->eof = FALSE;
3620
3621 if (rdma_used) {
3622 if (!rdma_setup_read_data4(args, resp)) {
3623 *cs->statusp = resp->status = NFS4ERR_INVAL;
3624 }
3625 } else {
3626 resp->wlist = NULL;
3627 }
3628
3629 out:
3630 if (in_crit)
3631 nbl_end_crit(vp);
3632
3633 if (iovp != NULL)
3634 kmem_free(iovp, iovcnt * sizeof (struct iovec));
3635
3636 DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs,
3637 READ4res *, resp);
3638 }
3639
3640 static void
rfs4_op_read_free(nfs_resop4 * resop)3641 rfs4_op_read_free(nfs_resop4 *resop)
3642 {
3643 READ4res *resp = &resop->nfs_resop4_u.opread;
3644
3645 if (resp->status == NFS4_OK && resp->mblk != NULL) {
3646 freemsg(resp->mblk);
3647 resp->mblk = NULL;
3648 resp->data_val = NULL;
3649 resp->data_len = 0;
3650 }
3651 }
3652
3653 static void
rfs4_op_readdir_free(nfs_resop4 * resop)3654 rfs4_op_readdir_free(nfs_resop4 * resop)
3655 {
3656 READDIR4res *resp = &resop->nfs_resop4_u.opreaddir;
3657
3658 if (resp->status == NFS4_OK && resp->mblk != NULL) {
3659 freeb(resp->mblk);
3660 resp->mblk = NULL;
3661 resp->data_len = 0;
3662 }
3663 }
3664
3665
3666 /* ARGSUSED */
3667 static void
rfs4_op_putpubfh(nfs_argop4 * args,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)3668 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3669 struct compound_state *cs)
3670 {
3671 PUTPUBFH4res *resp = &resop->nfs_resop4_u.opputpubfh;
3672 int error;
3673 vnode_t *vp;
3674 struct exportinfo *exi, *sav_exi;
3675 nfs_fh4_fmt_t *fh_fmtp;
3676 nfs_export_t *ne = nfs_get_export();
3677
3678 DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
3679
3680 if (cs->vp) {
3681 VN_RELE(cs->vp);
3682 cs->vp = NULL;
3683 }
3684
3685 if (cs->cr)
3686 crfree(cs->cr);
3687
3688 cs->cr = crdup(cs->basecr);
3689
3690 vp = ne->exi_public->exi_vp;
3691 if (vp == NULL) {
3692 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3693 goto out;
3694 }
3695
3696 error = makefh4(&cs->fh, vp, ne->exi_public);
3697 if (error != 0) {
3698 *cs->statusp = resp->status = puterrno4(error);
3699 goto out;
3700 }
3701 sav_exi = cs->exi;
3702 if (ne->exi_public == ne->exi_root) {
3703 /*
3704 * No filesystem is actually shared public, so we default
3705 * to exi_root. In this case, we must check whether root
3706 * is exported.
3707 */
3708 fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3709
3710 /*
3711 * if root filesystem is exported, the exportinfo struct that we
3712 * should use is what checkexport4 returns, because root_exi is
3713 * actually a mostly empty struct.
3714 */
3715 exi = checkexport4(&fh_fmtp->fh4_fsid,
3716 (fid_t *)&fh_fmtp->fh4_xlen, NULL);
3717 cs->exi = ((exi != NULL) ? exi : ne->exi_public);
3718 } else {
3719 /*
3720 * it's a properly shared filesystem
3721 */
3722 cs->exi = ne->exi_public;
3723 }
3724
3725 if (is_system_labeled()) {
3726 bslabel_t *clabel;
3727
3728 ASSERT(req->rq_label != NULL);
3729 clabel = req->rq_label;
3730 DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *,
3731 "got client label from request(1)",
3732 struct svc_req *, req);
3733 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3734 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3735 cs->exi)) {
3736 *cs->statusp = resp->status =
3737 NFS4ERR_SERVERFAULT;
3738 goto out;
3739 }
3740 }
3741 }
3742
3743 VN_HOLD(vp);
3744 cs->vp = vp;
3745
3746 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3747 VN_RELE(cs->vp);
3748 cs->vp = NULL;
3749 cs->exi = sav_exi;
3750 goto out;
3751 }
3752
3753 *cs->statusp = resp->status = NFS4_OK;
3754 out:
3755 DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs,
3756 PUTPUBFH4res *, resp);
3757 }
3758
3759 /*
3760 * XXX - issue with put*fh operations. Suppose /export/home is exported.
3761 * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
3762 * or joe have restrictive search permissions, then we shouldn't let
3763 * the client get a file handle. This is easy to enforce. However, we
3764 * don't know what security flavor should be used until we resolve the
3765 * path name. Another complication is uid mapping. If root is
3766 * the user, then it will be mapped to the anonymous user by default,
3767 * but we won't know that till we've resolved the path name. And we won't
3768 * know what the anonymous user is.
3769 * Luckily, SECINFO is specified to take a full filename.
3770 * So what we will have to in rfs4_op_lookup is check that flavor of
3771 * the target object matches that of the request, and if root was the
3772 * caller, check for the root= and anon= options, and if necessary,
3773 * repeat the lookup using the right cred_t. But that's not done yet.
3774 */
3775 /* ARGSUSED */
3776 static void
rfs4_op_putfh(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)3777 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3778 struct compound_state *cs)
3779 {
3780 PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3781 PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3782 nfs_fh4_fmt_t *fh_fmtp;
3783
3784 DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs,
3785 PUTFH4args *, args);
3786
3787 if (cs->vp) {
3788 VN_RELE(cs->vp);
3789 cs->vp = NULL;
3790 }
3791
3792 if (cs->cr) {
3793 crfree(cs->cr);
3794 cs->cr = NULL;
3795 }
3796
3797
3798 if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3799 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3800 goto out;
3801 }
3802
3803 fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3804 cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3805 NULL);
3806
3807 if (cs->exi == NULL) {
3808 *cs->statusp = resp->status = NFS4ERR_STALE;
3809 goto out;
3810 }
3811
3812 cs->cr = crdup(cs->basecr);
3813
3814 ASSERT(cs->cr != NULL);
3815
3816 if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3817 *cs->statusp = resp->status;
3818 goto out;
3819 }
3820
3821 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3822 VN_RELE(cs->vp);
3823 cs->vp = NULL;
3824 goto out;
3825 }
3826
3827 nfs_fh4_copy(&args->object, &cs->fh);
3828 *cs->statusp = resp->status = NFS4_OK;
3829 cs->deleg = FALSE;
3830
3831 out:
3832 DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs,
3833 PUTFH4res *, resp);
3834 }
3835
3836 /* ARGSUSED */
3837 static void
rfs4_op_putrootfh(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)3838 rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3839 struct compound_state *cs)
3840 {
3841 PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
3842 int error;
3843 fid_t fid;
3844 struct exportinfo *exi, *sav_exi;
3845
3846 DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
3847
3848 if (cs->vp) {
3849 VN_RELE(cs->vp);
3850 cs->vp = NULL;
3851 }
3852
3853 if (cs->cr)
3854 crfree(cs->cr);
3855
3856 cs->cr = crdup(cs->basecr);
3857
3858 /*
3859 * Using rootdir, the system root vnode,
3860 * get its fid.
3861 */
3862 bzero(&fid, sizeof (fid));
3863 fid.fid_len = MAXFIDSZ;
3864 error = vop_fid_pseudo(ZONE_ROOTVP(), &fid);
3865 if (error != 0) {
3866 *cs->statusp = resp->status = puterrno4(error);
3867 goto out;
3868 }
3869
3870 /*
3871 * Then use the root fsid & fid it to find out if it's exported
3872 *
3873 * If the server root isn't exported directly, then
3874 * it should at least be a pseudo export based on
3875 * one or more exports further down in the server's
3876 * file tree.
3877 */
3878 exi = checkexport4(&ZONE_ROOTVP()->v_vfsp->vfs_fsid, &fid, NULL);
3879 if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3880 NFS4_DEBUG(rfs4_debug,
3881 (CE_WARN, "rfs4_op_putrootfh: export check failure"));
3882 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3883 goto out;
3884 }
3885
3886 /*
3887 * Now make a filehandle based on the root
3888 * export and root vnode.
3889 */
3890 error = makefh4(&cs->fh, ZONE_ROOTVP(), exi);
3891 if (error != 0) {
3892 *cs->statusp = resp->status = puterrno4(error);
3893 goto out;
3894 }
3895
3896 sav_exi = cs->exi;
3897 cs->exi = exi;
3898
3899 VN_HOLD(ZONE_ROOTVP());
3900 cs->vp = ZONE_ROOTVP();
3901
3902 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3903 VN_RELE(cs->vp);
3904 cs->vp = NULL;
3905 cs->exi = sav_exi;
3906 goto out;
3907 }
3908
3909 *cs->statusp = resp->status = NFS4_OK;
3910 cs->deleg = FALSE;
3911 out:
3912 DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
3913 PUTROOTFH4res *, resp);
3914 }
3915
3916 /*
3917 * readlink: args: CURRENT_FH.
3918 * res: status. If success - CURRENT_FH unchanged, return linktext.
3919 */
3920
3921 /* ARGSUSED */
3922 static void
rfs4_op_readlink(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)3923 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3924 struct compound_state *cs)
3925 {
3926 READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3927 int error;
3928 vnode_t *vp;
3929 struct iovec iov;
3930 struct vattr va;
3931 struct uio uio;
3932 char *data;
3933 struct sockaddr *ca;
3934 char *name = NULL;
3935 int is_referral;
3936
3937 DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs);
3938
3939 /* CURRENT_FH: directory */
3940 vp = cs->vp;
3941 if (vp == NULL) {
3942 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3943 goto out;
3944 }
3945
3946 if (cs->access == CS_ACCESS_DENIED) {
3947 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3948 goto out;
3949 }
3950
3951 /* Is it a referral? */
3952 if (vn_is_nfs_reparse(vp, cs->cr) && client_is_downrev(req)) {
3953
3954 is_referral = 1;
3955
3956 } else {
3957
3958 is_referral = 0;
3959
3960 if (vp->v_type == VDIR) {
3961 *cs->statusp = resp->status = NFS4ERR_ISDIR;
3962 goto out;
3963 }
3964
3965 if (vp->v_type != VLNK) {
3966 *cs->statusp = resp->status = NFS4ERR_INVAL;
3967 goto out;
3968 }
3969
3970 }
3971
3972 va.va_mask = AT_MODE;
3973 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
3974 if (error) {
3975 *cs->statusp = resp->status = puterrno4(error);
3976 goto out;
3977 }
3978
3979 if (MANDLOCK(vp, va.va_mode)) {
3980 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3981 goto out;
3982 }
3983
3984 data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3985
3986 if (is_referral) {
3987 char *s;
3988 size_t strsz;
3989 kstat_named_t *stat =
3990 cs->exi->exi_ne->ne_globals->svstat[NFS_V4];
3991
3992 /* Get an artificial symlink based on a referral */
3993 s = build_symlink(vp, cs->cr, &strsz);
3994 stat[NFS_REFERLINKS].value.ui64++;
3995 DTRACE_PROBE2(nfs4serv__func__referral__reflink,
3996 vnode_t *, vp, char *, s);
3997 if (s == NULL)
3998 error = EINVAL;
3999 else {
4000 error = 0;
4001 (void) strlcpy(data, s, MAXPATHLEN + 1);
4002 kmem_free(s, strsz);
4003 }
4004
4005 } else {
4006
4007 iov.iov_base = data;
4008 iov.iov_len = MAXPATHLEN;
4009 uio.uio_iov = &iov;
4010 uio.uio_iovcnt = 1;
4011 uio.uio_segflg = UIO_SYSSPACE;
4012 uio.uio_extflg = UIO_COPY_CACHED;
4013 uio.uio_loffset = 0;
4014 uio.uio_resid = MAXPATHLEN;
4015
4016 error = VOP_READLINK(vp, &uio, cs->cr, NULL);
4017
4018 if (!error)
4019 *(data + MAXPATHLEN - uio.uio_resid) = '\0';
4020 }
4021
4022 if (error) {
4023 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
4024 *cs->statusp = resp->status = puterrno4(error);
4025 goto out;
4026 }
4027
4028 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4029 name = nfscmd_convname(ca, cs->exi, data, NFSCMD_CONV_OUTBOUND,
4030 MAXPATHLEN + 1);
4031
4032 if (name == NULL) {
4033 /*
4034 * Even though the conversion failed, we return
4035 * something. We just don't translate it.
4036 */
4037 name = data;
4038 }
4039
4040 /*
4041 * treat link name as data
4042 */
4043 (void) str_to_utf8(name, (utf8string *)&resp->link);
4044
4045 if (name != data)
4046 kmem_free(name, MAXPATHLEN + 1);
4047 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
4048 *cs->statusp = resp->status = NFS4_OK;
4049
4050 out:
4051 DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs,
4052 READLINK4res *, resp);
4053 }
4054
4055 static void
rfs4_op_readlink_free(nfs_resop4 * resop)4056 rfs4_op_readlink_free(nfs_resop4 *resop)
4057 {
4058 READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
4059 utf8string *symlink = (utf8string *)&resp->link;
4060
4061 if (symlink->utf8string_val) {
4062 UTF8STRING_FREE(*symlink)
4063 }
4064 }
4065
4066 /*
4067 * release_lockowner:
4068 * Release any state associated with the supplied
4069 * lockowner. Note if any lo_state is holding locks we will not
4070 * rele that lo_state and thus the lockowner will not be destroyed.
4071 * A client using lock after the lock owner stateid has been released
4072 * will suffer the consequence of NFS4ERR_BAD_STATEID and would have
4073 * to reissue the lock with new_lock_owner set to TRUE.
4074 * args: lock_owner
4075 * res: status
4076 */
4077 /* ARGSUSED */
4078 static void
rfs4_op_release_lockowner(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)4079 rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
4080 struct svc_req *req, struct compound_state *cs)
4081 {
4082 RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
4083 RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
4084 rfs4_lockowner_t *lo;
4085 rfs4_openowner_t *oo;
4086 rfs4_state_t *sp;
4087 rfs4_lo_state_t *lsp;
4088 rfs4_client_t *cp;
4089 bool_t create = FALSE;
4090 locklist_t *llist;
4091 sysid_t sysid;
4092
4093 DTRACE_NFSV4_2(op__release__lockowner__start, struct compound_state *,
4094 cs, RELEASE_LOCKOWNER4args *, ap);
4095
4096 /* Make sure there is a clientid around for this request */
4097 cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
4098
4099 if (cp == NULL) {
4100 *cs->statusp = resp->status =
4101 rfs4_check_clientid(&ap->lock_owner.clientid, 0);
4102 goto out;
4103 }
4104 rfs4_client_rele(cp);
4105
4106 lo = rfs4_findlockowner(&ap->lock_owner, &create);
4107 if (lo == NULL) {
4108 *cs->statusp = resp->status = NFS4_OK;
4109 goto out;
4110 }
4111 ASSERT(lo->rl_client != NULL);
4112
4113 /*
4114 * Check for EXPIRED client. If so will reap state with in a lease
4115 * period or on next set_clientid_confirm step
4116 */
4117 if (rfs4_lease_expired(lo->rl_client)) {
4118 rfs4_lockowner_rele(lo);
4119 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
4120 goto out;
4121 }
4122
4123 /*
4124 * If no sysid has been assigned, then no locks exist; just return.
4125 */
4126 rfs4_dbe_lock(lo->rl_client->rc_dbe);
4127 if (lo->rl_client->rc_sysidt == LM_NOSYSID) {
4128 rfs4_lockowner_rele(lo);
4129 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4130 goto out;
4131 }
4132
4133 sysid = lo->rl_client->rc_sysidt;
4134 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4135
4136 /*
4137 * Mark the lockowner invalid.
4138 */
4139 rfs4_dbe_hide(lo->rl_dbe);
4140
4141 /*
4142 * sysid-pid pair should now not be used since the lockowner is
4143 * invalid. If the client were to instantiate the lockowner again
4144 * it would be assigned a new pid. Thus we can get the list of
4145 * current locks.
4146 */
4147
4148 llist = flk_get_active_locks(sysid, lo->rl_pid);
4149 /* If we are still holding locks fail */
4150 if (llist != NULL) {
4151
4152 *cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
4153
4154 flk_free_locklist(llist);
4155 /*
4156 * We need to unhide the lockowner so the client can
4157 * try it again. The bad thing here is if the client
4158 * has a logic error that took it here in the first place
4159 * they probably have lost accounting of the locks that it
4160 * is holding. So we may have dangling state until the
4161 * open owner state is reaped via close. One scenario
4162 * that could possibly occur is that the client has
4163 * sent the unlock request(s) in separate threads
4164 * and has not waited for the replies before sending the
4165 * RELEASE_LOCKOWNER request. Presumably, it would expect
4166 * and deal appropriately with NFS4ERR_LOCKS_HELD, by
4167 * reissuing the request.
4168 */
4169 rfs4_dbe_unhide(lo->rl_dbe);
4170 rfs4_lockowner_rele(lo);
4171 goto out;
4172 }
4173
4174 /*
4175 * For the corresponding client we need to check each open
4176 * owner for any opens that have lockowner state associated
4177 * with this lockowner.
4178 */
4179
4180 rfs4_dbe_lock(lo->rl_client->rc_dbe);
4181 for (oo = list_head(&lo->rl_client->rc_openownerlist); oo != NULL;
4182 oo = list_next(&lo->rl_client->rc_openownerlist, oo)) {
4183
4184 rfs4_dbe_lock(oo->ro_dbe);
4185 for (sp = list_head(&oo->ro_statelist); sp != NULL;
4186 sp = list_next(&oo->ro_statelist, sp)) {
4187
4188 rfs4_dbe_lock(sp->rs_dbe);
4189 for (lsp = list_head(&sp->rs_lostatelist);
4190 lsp != NULL;
4191 lsp = list_next(&sp->rs_lostatelist, lsp)) {
4192 if (lsp->rls_locker == lo) {
4193 rfs4_dbe_lock(lsp->rls_dbe);
4194 rfs4_dbe_invalidate(lsp->rls_dbe);
4195 rfs4_dbe_unlock(lsp->rls_dbe);
4196 }
4197 }
4198 rfs4_dbe_unlock(sp->rs_dbe);
4199 }
4200 rfs4_dbe_unlock(oo->ro_dbe);
4201 }
4202 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4203
4204 rfs4_lockowner_rele(lo);
4205
4206 *cs->statusp = resp->status = NFS4_OK;
4207
4208 out:
4209 DTRACE_NFSV4_2(op__release__lockowner__done, struct compound_state *,
4210 cs, RELEASE_LOCKOWNER4res *, resp);
4211 }
4212
4213 /*
4214 * short utility function to lookup a file and recall the delegation
4215 */
4216 static rfs4_file_t *
rfs4_lookup_and_findfile(vnode_t * dvp,char * nm,vnode_t ** vpp,int * lkup_error,cred_t * cr)4217 rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
4218 int *lkup_error, cred_t *cr)
4219 {
4220 vnode_t *vp;
4221 rfs4_file_t *fp = NULL;
4222 bool_t fcreate = FALSE;
4223 int error;
4224
4225 if (vpp)
4226 *vpp = NULL;
4227
4228 if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr, NULL, NULL,
4229 NULL)) == 0) {
4230 if (vp->v_type == VREG)
4231 fp = rfs4_findfile(vp, NULL, &fcreate);
4232 if (vpp)
4233 *vpp = vp;
4234 else
4235 VN_RELE(vp);
4236 }
4237
4238 if (lkup_error)
4239 *lkup_error = error;
4240
4241 return (fp);
4242 }
4243
4244 /*
4245 * remove: args: CURRENT_FH: directory; name.
4246 * res: status. If success - CURRENT_FH unchanged, return change_info
4247 * for directory.
4248 */
4249 /* ARGSUSED */
4250 static void
rfs4_op_remove(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)4251 rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4252 struct compound_state *cs)
4253 {
4254 REMOVE4args *args = &argop->nfs_argop4_u.opremove;
4255 REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
4256 int error;
4257 vnode_t *dvp, *vp;
4258 struct vattr bdva, idva, adva;
4259 char *nm;
4260 uint_t len;
4261 rfs4_file_t *fp;
4262 int in_crit = 0;
4263 bslabel_t *clabel;
4264 struct sockaddr *ca;
4265 char *name = NULL;
4266 nfsstat4 status;
4267
4268 DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs,
4269 REMOVE4args *, args);
4270
4271 /* CURRENT_FH: directory */
4272 dvp = cs->vp;
4273 if (dvp == NULL) {
4274 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4275 goto out;
4276 }
4277
4278 if (cs->access == CS_ACCESS_DENIED) {
4279 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4280 goto out;
4281 }
4282
4283 /*
4284 * If there is an unshared filesystem mounted on this vnode,
4285 * Do not allow to remove anything in this directory.
4286 */
4287 if (vn_ismntpt(dvp)) {
4288 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4289 goto out;
4290 }
4291
4292 if (dvp->v_type != VDIR) {
4293 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4294 goto out;
4295 }
4296
4297 status = utf8_dir_verify(&args->target);
4298 if (status != NFS4_OK) {
4299 *cs->statusp = resp->status = status;
4300 goto out;
4301 }
4302
4303 /*
4304 * Lookup the file so that we can check if it's a directory
4305 */
4306 nm = utf8_to_fn(&args->target, &len, NULL);
4307 if (nm == NULL) {
4308 *cs->statusp = resp->status = NFS4ERR_INVAL;
4309 goto out;
4310 }
4311
4312 if (len > MAXNAMELEN) {
4313 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4314 kmem_free(nm, len);
4315 goto out;
4316 }
4317
4318 if (rdonly4(req, cs)) {
4319 *cs->statusp = resp->status = NFS4ERR_ROFS;
4320 kmem_free(nm, len);
4321 goto out;
4322 }
4323
4324 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4325 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
4326 MAXPATHLEN + 1);
4327
4328 if (name == NULL) {
4329 *cs->statusp = resp->status = NFS4ERR_INVAL;
4330 kmem_free(nm, len);
4331 goto out;
4332 }
4333
4334 /*
4335 * Lookup the file to determine type and while we are see if
4336 * there is a file struct around and check for delegation.
4337 * We don't need to acquire va_seq before this lookup, if
4338 * it causes an update, cinfo.before will not match, which will
4339 * trigger a cache flush even if atomic is TRUE.
4340 */
4341 fp = rfs4_lookup_and_findfile(dvp, name, &vp, &error, cs->cr);
4342 if (fp != NULL) {
4343 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4344 NULL)) {
4345 VN_RELE(vp);
4346 rfs4_file_rele(fp);
4347 *cs->statusp = resp->status = NFS4ERR_DELAY;
4348 if (nm != name)
4349 kmem_free(name, MAXPATHLEN + 1);
4350 kmem_free(nm, len);
4351 goto out;
4352 }
4353 }
4354
4355 /* Didn't find anything to remove */
4356 if (vp == NULL) {
4357 *cs->statusp = resp->status = error;
4358 if (nm != name)
4359 kmem_free(name, MAXPATHLEN + 1);
4360 kmem_free(nm, len);
4361 goto out;
4362 }
4363
4364 if (nbl_need_check(vp)) {
4365 nbl_start_crit(vp, RW_READER);
4366 in_crit = 1;
4367 if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
4368 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4369 if (nm != name)
4370 kmem_free(name, MAXPATHLEN + 1);
4371 kmem_free(nm, len);
4372 nbl_end_crit(vp);
4373 VN_RELE(vp);
4374 if (fp) {
4375 rfs4_clear_dont_grant(fp);
4376 rfs4_file_rele(fp);
4377 }
4378 goto out;
4379 }
4380 }
4381
4382 /* check label before allowing removal */
4383 if (is_system_labeled()) {
4384 ASSERT(req->rq_label != NULL);
4385 clabel = req->rq_label;
4386 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
4387 "got client label from request(1)",
4388 struct svc_req *, req);
4389 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4390 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4391 cs->exi)) {
4392 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4393 if (name != nm)
4394 kmem_free(name, MAXPATHLEN + 1);
4395 kmem_free(nm, len);
4396 if (in_crit)
4397 nbl_end_crit(vp);
4398 VN_RELE(vp);
4399 if (fp) {
4400 rfs4_clear_dont_grant(fp);
4401 rfs4_file_rele(fp);
4402 }
4403 goto out;
4404 }
4405 }
4406 }
4407
4408 /* Get dir "before" change value */
4409 bdva.va_mask = AT_CTIME|AT_SEQ;
4410 error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
4411 if (error) {
4412 *cs->statusp = resp->status = puterrno4(error);
4413 if (nm != name)
4414 kmem_free(name, MAXPATHLEN + 1);
4415 kmem_free(nm, len);
4416 if (in_crit)
4417 nbl_end_crit(vp);
4418 VN_RELE(vp);
4419 if (fp) {
4420 rfs4_clear_dont_grant(fp);
4421 rfs4_file_rele(fp);
4422 }
4423 goto out;
4424 }
4425 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
4426
4427 /* Actually do the REMOVE operation */
4428 if (vp->v_type == VDIR) {
4429 /*
4430 * Can't remove a directory that has a mounted-on filesystem.
4431 */
4432 if (vn_ismntpt(vp)) {
4433 error = EACCES;
4434 } else {
4435 /*
4436 * System V defines rmdir to return EEXIST,
4437 * not ENOTEMPTY, if the directory is not
4438 * empty. A System V NFS server needs to map
4439 * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
4440 * transmit over the wire.
4441 */
4442 if ((error = VOP_RMDIR(dvp, name, ZONE_ROOTVP(), cs->cr,
4443 NULL, 0)) == EEXIST)
4444 error = ENOTEMPTY;
4445 }
4446 } else {
4447 if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 &&
4448 fp != NULL) {
4449 struct vattr va;
4450 vnode_t *tvp;
4451
4452 rfs4_dbe_lock(fp->rf_dbe);
4453 tvp = fp->rf_vp;
4454 if (tvp)
4455 VN_HOLD(tvp);
4456 rfs4_dbe_unlock(fp->rf_dbe);
4457
4458 if (tvp) {
4459 /*
4460 * This is va_seq safe because we are not
4461 * manipulating dvp.
4462 */
4463 va.va_mask = AT_NLINK;
4464 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4465 va.va_nlink == 0) {
4466 /* Remove state on file remove */
4467 if (in_crit) {
4468 nbl_end_crit(vp);
4469 in_crit = 0;
4470 }
4471 rfs4_close_all_state(fp);
4472 }
4473 VN_RELE(tvp);
4474 }
4475 }
4476 }
4477
4478 if (in_crit)
4479 nbl_end_crit(vp);
4480 VN_RELE(vp);
4481
4482 if (fp) {
4483 rfs4_clear_dont_grant(fp);
4484 rfs4_file_rele(fp);
4485 }
4486 if (nm != name)
4487 kmem_free(name, MAXPATHLEN + 1);
4488 kmem_free(nm, len);
4489
4490 if (error) {
4491 *cs->statusp = resp->status = puterrno4(error);
4492 goto out;
4493 }
4494
4495 /*
4496 * Get the initial "after" sequence number, if it fails, set to zero
4497 */
4498 idva.va_mask = AT_SEQ;
4499 if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
4500 idva.va_seq = 0;
4501
4502 /*
4503 * Force modified data and metadata out to stable storage.
4504 */
4505 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
4506
4507 /*
4508 * Get "after" change value, if it fails, simply return the
4509 * before value.
4510 */
4511 adva.va_mask = AT_CTIME|AT_SEQ;
4512 if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
4513 adva.va_ctime = bdva.va_ctime;
4514 adva.va_seq = 0;
4515 }
4516
4517 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
4518
4519 /*
4520 * The cinfo.atomic = TRUE only if we have
4521 * non-zero va_seq's, and it has incremented by exactly one
4522 * during the VOP_REMOVE/RMDIR and it didn't change during
4523 * the VOP_FSYNC.
4524 */
4525 if (bdva.va_seq && idva.va_seq && adva.va_seq &&
4526 idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
4527 resp->cinfo.atomic = TRUE;
4528 else
4529 resp->cinfo.atomic = FALSE;
4530
4531 *cs->statusp = resp->status = NFS4_OK;
4532
4533 out:
4534 DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
4535 REMOVE4res *, resp);
4536 }
4537
4538 /*
4539 * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
4540 * oldname and newname.
4541 * res: status. If success - CURRENT_FH unchanged, return change_info
4542 * for both from and target directories.
4543 */
4544 /* ARGSUSED */
4545 static void
rfs4_op_rename(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)4546 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4547 struct compound_state *cs)
4548 {
4549 RENAME4args *args = &argop->nfs_argop4_u.oprename;
4550 RENAME4res *resp = &resop->nfs_resop4_u.oprename;
4551 int error;
4552 vnode_t *odvp;
4553 vnode_t *ndvp;
4554 vnode_t *srcvp, *targvp, *tvp;
4555 struct vattr obdva, oidva, oadva;
4556 struct vattr nbdva, nidva, nadva;
4557 char *onm, *nnm;
4558 uint_t olen, nlen;
4559 rfs4_file_t *fp, *sfp;
4560 int in_crit_src, in_crit_targ;
4561 int fp_rele_grant_hold, sfp_rele_grant_hold;
4562 int unlinked;
4563 bslabel_t *clabel;
4564 struct sockaddr *ca;
4565 char *converted_onm = NULL;
4566 char *converted_nnm = NULL;
4567 nfsstat4 status;
4568
4569 DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
4570 RENAME4args *, args);
4571
4572 fp = sfp = NULL;
4573 srcvp = targvp = tvp = NULL;
4574 in_crit_src = in_crit_targ = 0;
4575 fp_rele_grant_hold = sfp_rele_grant_hold = 0;
4576 unlinked = 0;
4577
4578 /* CURRENT_FH: target directory */
4579 ndvp = cs->vp;
4580 if (ndvp == NULL) {
4581 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4582 goto out;
4583 }
4584
4585 /* SAVED_FH: from directory */
4586 odvp = cs->saved_vp;
4587 if (odvp == NULL) {
4588 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4589 goto out;
4590 }
4591
4592 if (cs->access == CS_ACCESS_DENIED) {
4593 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4594 goto out;
4595 }
4596
4597 /*
4598 * If there is an unshared filesystem mounted on this vnode,
4599 * do not allow to rename objects in this directory.
4600 */
4601 if (vn_ismntpt(odvp)) {
4602 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4603 goto out;
4604 }
4605
4606 /*
4607 * If there is an unshared filesystem mounted on this vnode,
4608 * do not allow to rename to this directory.
4609 */
4610 if (vn_ismntpt(ndvp)) {
4611 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4612 goto out;
4613 }
4614
4615 if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
4616 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4617 goto out;
4618 }
4619
4620 if (cs->saved_exi != cs->exi) {
4621 *cs->statusp = resp->status = NFS4ERR_XDEV;
4622 goto out;
4623 }
4624
4625 status = utf8_dir_verify(&args->oldname);
4626 if (status != NFS4_OK) {
4627 *cs->statusp = resp->status = status;
4628 goto out;
4629 }
4630
4631 status = utf8_dir_verify(&args->newname);
4632 if (status != NFS4_OK) {
4633 *cs->statusp = resp->status = status;
4634 goto out;
4635 }
4636
4637 onm = utf8_to_fn(&args->oldname, &olen, NULL);
4638 if (onm == NULL) {
4639 *cs->statusp = resp->status = NFS4ERR_INVAL;
4640 goto out;
4641 }
4642 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4643 nlen = MAXPATHLEN + 1;
4644 converted_onm = nfscmd_convname(ca, cs->exi, onm, NFSCMD_CONV_INBOUND,
4645 nlen);
4646
4647 if (converted_onm == NULL) {
4648 *cs->statusp = resp->status = NFS4ERR_INVAL;
4649 kmem_free(onm, olen);
4650 goto out;
4651 }
4652
4653 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4654 if (nnm == NULL) {
4655 *cs->statusp = resp->status = NFS4ERR_INVAL;
4656 if (onm != converted_onm)
4657 kmem_free(converted_onm, MAXPATHLEN + 1);
4658 kmem_free(onm, olen);
4659 goto out;
4660 }
4661 converted_nnm = nfscmd_convname(ca, cs->exi, nnm, NFSCMD_CONV_INBOUND,
4662 MAXPATHLEN + 1);
4663
4664 if (converted_nnm == NULL) {
4665 *cs->statusp = resp->status = NFS4ERR_INVAL;
4666 kmem_free(nnm, nlen);
4667 nnm = NULL;
4668 if (onm != converted_onm)
4669 kmem_free(converted_onm, MAXPATHLEN + 1);
4670 kmem_free(onm, olen);
4671 goto out;
4672 }
4673
4674
4675 if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
4676 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4677 kmem_free(onm, olen);
4678 kmem_free(nnm, nlen);
4679 goto out;
4680 }
4681
4682
4683 if (rdonly4(req, cs)) {
4684 *cs->statusp = resp->status = NFS4ERR_ROFS;
4685 if (onm != converted_onm)
4686 kmem_free(converted_onm, MAXPATHLEN + 1);
4687 kmem_free(onm, olen);
4688 if (nnm != converted_nnm)
4689 kmem_free(converted_nnm, MAXPATHLEN + 1);
4690 kmem_free(nnm, nlen);
4691 goto out;
4692 }
4693
4694 /* check label of the target dir */
4695 if (is_system_labeled()) {
4696 ASSERT(req->rq_label != NULL);
4697 clabel = req->rq_label;
4698 DTRACE_PROBE2(tx__rfs4__log__info__oprename__clabel, char *,
4699 "got client label from request(1)",
4700 struct svc_req *, req);
4701 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4702 if (!do_rfs_label_check(clabel, ndvp,
4703 EQUALITY_CHECK, cs->exi)) {
4704 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4705 goto err_out;
4706 }
4707 }
4708 }
4709
4710 /*
4711 * Is the source a file and have a delegation?
4712 * We don't need to acquire va_seq before these lookups, if
4713 * it causes an update, cinfo.before will not match, which will
4714 * trigger a cache flush even if atomic is TRUE.
4715 */
4716 sfp = rfs4_lookup_and_findfile(odvp, converted_onm, &srcvp,
4717 &error, cs->cr);
4718 if (sfp != NULL) {
4719 if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
4720 NULL)) {
4721 *cs->statusp = resp->status = NFS4ERR_DELAY;
4722 goto err_out;
4723 }
4724 }
4725
4726 if (srcvp == NULL) {
4727 *cs->statusp = resp->status = puterrno4(error);
4728 if (onm != converted_onm)
4729 kmem_free(converted_onm, MAXPATHLEN + 1);
4730 kmem_free(onm, olen);
4731 if (nnm != converted_nnm)
4732 kmem_free(converted_nnm, MAXPATHLEN + 1);
4733 kmem_free(nnm, nlen);
4734 goto out;
4735 }
4736
4737 sfp_rele_grant_hold = 1;
4738
4739 /* Does the destination exist and a file and have a delegation? */
4740 fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp, NULL,
4741 cs->cr);
4742 if (fp != NULL) {
4743 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4744 NULL)) {
4745 *cs->statusp = resp->status = NFS4ERR_DELAY;
4746 goto err_out;
4747 }
4748 }
4749 fp_rele_grant_hold = 1;
4750
4751 /* Check for NBMAND lock on both source and target */
4752 if (nbl_need_check(srcvp)) {
4753 nbl_start_crit(srcvp, RW_READER);
4754 in_crit_src = 1;
4755 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
4756 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4757 goto err_out;
4758 }
4759 }
4760
4761 if (targvp && nbl_need_check(targvp)) {
4762 nbl_start_crit(targvp, RW_READER);
4763 in_crit_targ = 1;
4764 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
4765 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4766 goto err_out;
4767 }
4768 }
4769
4770 /* Get source "before" change value */
4771 obdva.va_mask = AT_CTIME|AT_SEQ;
4772 error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL);
4773 if (!error) {
4774 nbdva.va_mask = AT_CTIME|AT_SEQ;
4775 error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL);
4776 }
4777 if (error) {
4778 *cs->statusp = resp->status = puterrno4(error);
4779 goto err_out;
4780 }
4781
4782 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4783 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4784
4785 error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm, cs->cr,
4786 NULL, 0);
4787
4788 /*
4789 * If target existed and was unlinked by VOP_RENAME, state will need
4790 * closed. To avoid deadlock, rfs4_close_all_state will be done after
4791 * any necessary nbl_end_crit on srcvp and tgtvp.
4792 */
4793 if (error == 0 && fp != NULL) {
4794 rfs4_dbe_lock(fp->rf_dbe);
4795 tvp = fp->rf_vp;
4796 if (tvp)
4797 VN_HOLD(tvp);
4798 rfs4_dbe_unlock(fp->rf_dbe);
4799
4800 if (tvp) {
4801 struct vattr va;
4802 va.va_mask = AT_NLINK;
4803
4804 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4805 va.va_nlink == 0) {
4806 unlinked = 1;
4807
4808 /* DEBUG data */
4809 if ((srcvp == targvp) || (tvp != targvp)) {
4810 cmn_err(CE_WARN, "rfs4_op_rename: "
4811 "srcvp %p, targvp: %p, tvp: %p",
4812 (void *)srcvp, (void *)targvp,
4813 (void *)tvp);
4814 }
4815 } else {
4816 VN_RELE(tvp);
4817 }
4818 }
4819 }
4820 if (error == 0)
4821 vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
4822
4823 if (in_crit_src)
4824 nbl_end_crit(srcvp);
4825 if (srcvp)
4826 VN_RELE(srcvp);
4827 if (in_crit_targ)
4828 nbl_end_crit(targvp);
4829 if (targvp)
4830 VN_RELE(targvp);
4831
4832 if (unlinked) {
4833 ASSERT(fp != NULL);
4834 ASSERT(tvp != NULL);
4835
4836 /* DEBUG data */
4837 if (RW_READ_HELD(&tvp->v_nbllock)) {
4838 cmn_err(CE_WARN, "rfs4_op_rename: "
4839 "RW_READ_HELD(%p)", (void *)tvp);
4840 }
4841
4842 /* The file is gone and so should the state */
4843 rfs4_close_all_state(fp);
4844 VN_RELE(tvp);
4845 }
4846
4847 if (sfp) {
4848 rfs4_clear_dont_grant(sfp);
4849 rfs4_file_rele(sfp);
4850 }
4851 if (fp) {
4852 rfs4_clear_dont_grant(fp);
4853 rfs4_file_rele(fp);
4854 }
4855
4856 if (converted_onm != onm)
4857 kmem_free(converted_onm, MAXPATHLEN + 1);
4858 kmem_free(onm, olen);
4859 if (converted_nnm != nnm)
4860 kmem_free(converted_nnm, MAXPATHLEN + 1);
4861 kmem_free(nnm, nlen);
4862
4863 /*
4864 * Get the initial "after" sequence number, if it fails, set to zero
4865 */
4866 oidva.va_mask = AT_SEQ;
4867 if (VOP_GETATTR(odvp, &oidva, 0, cs->cr, NULL))
4868 oidva.va_seq = 0;
4869
4870 nidva.va_mask = AT_SEQ;
4871 if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr, NULL))
4872 nidva.va_seq = 0;
4873
4874 /*
4875 * Force modified data and metadata out to stable storage.
4876 */
4877 (void) VOP_FSYNC(odvp, 0, cs->cr, NULL);
4878 (void) VOP_FSYNC(ndvp, 0, cs->cr, NULL);
4879
4880 if (error) {
4881 *cs->statusp = resp->status = puterrno4(error);
4882 goto out;
4883 }
4884
4885 /*
4886 * Get "after" change values, if it fails, simply return the
4887 * before value.
4888 */
4889 oadva.va_mask = AT_CTIME|AT_SEQ;
4890 if (VOP_GETATTR(odvp, &oadva, 0, cs->cr, NULL)) {
4891 oadva.va_ctime = obdva.va_ctime;
4892 oadva.va_seq = 0;
4893 }
4894
4895 nadva.va_mask = AT_CTIME|AT_SEQ;
4896 if (VOP_GETATTR(odvp, &nadva, 0, cs->cr, NULL)) {
4897 nadva.va_ctime = nbdva.va_ctime;
4898 nadva.va_seq = 0;
4899 }
4900
4901 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
4902 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
4903
4904 /*
4905 * The cinfo.atomic = TRUE only if we have
4906 * non-zero va_seq's, and it has incremented by exactly one
4907 * during the VOP_RENAME and it didn't change during the VOP_FSYNC.
4908 */
4909 if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
4910 oidva.va_seq == (obdva.va_seq + 1) && oidva.va_seq == oadva.va_seq)
4911 resp->source_cinfo.atomic = TRUE;
4912 else
4913 resp->source_cinfo.atomic = FALSE;
4914
4915 if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
4916 nidva.va_seq == (nbdva.va_seq + 1) && nidva.va_seq == nadva.va_seq)
4917 resp->target_cinfo.atomic = TRUE;
4918 else
4919 resp->target_cinfo.atomic = FALSE;
4920
4921 #ifdef VOLATILE_FH_TEST
4922 {
4923 extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
4924
4925 /*
4926 * Add the renamed file handle to the volatile rename list
4927 */
4928 if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
4929 /* file handles may expire on rename */
4930 vnode_t *vp;
4931
4932 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4933 /*
4934 * Already know that nnm will be a valid string
4935 */
4936 error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr,
4937 NULL, NULL, NULL);
4938 kmem_free(nnm, nlen);
4939 if (!error) {
4940 add_volrnm_fh(cs->exi, vp);
4941 VN_RELE(vp);
4942 }
4943 }
4944 }
4945 #endif /* VOLATILE_FH_TEST */
4946
4947 *cs->statusp = resp->status = NFS4_OK;
4948 out:
4949 DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4950 RENAME4res *, resp);
4951 return;
4952
4953 err_out:
4954 if (onm != converted_onm)
4955 kmem_free(converted_onm, MAXPATHLEN + 1);
4956 if (onm != NULL)
4957 kmem_free(onm, olen);
4958 if (nnm != converted_nnm)
4959 kmem_free(converted_nnm, MAXPATHLEN + 1);
4960 if (nnm != NULL)
4961 kmem_free(nnm, nlen);
4962
4963 if (in_crit_src) nbl_end_crit(srcvp);
4964 if (in_crit_targ) nbl_end_crit(targvp);
4965 if (targvp) VN_RELE(targvp);
4966 if (srcvp) VN_RELE(srcvp);
4967 if (sfp) {
4968 if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
4969 rfs4_file_rele(sfp);
4970 }
4971 if (fp) {
4972 if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
4973 rfs4_file_rele(fp);
4974 }
4975
4976 DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4977 RENAME4res *, resp);
4978 }
4979
4980 /* ARGSUSED */
4981 static void
rfs4_op_renew(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)4982 rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4983 struct compound_state *cs)
4984 {
4985 RENEW4args *args = &argop->nfs_argop4_u.oprenew;
4986 RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
4987 rfs4_client_t *cp;
4988
4989 DTRACE_NFSV4_2(op__renew__start, struct compound_state *, cs,
4990 RENEW4args *, args);
4991
4992 if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
4993 *cs->statusp = resp->status =
4994 rfs4_check_clientid(&args->clientid, 0);
4995 goto out;
4996 }
4997
4998 if (rfs4_lease_expired(cp)) {
4999 rfs4_client_rele(cp);
5000 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
5001 goto out;
5002 }
5003
5004 rfs4_update_lease(cp);
5005
5006 mutex_enter(cp->rc_cbinfo.cb_lock);
5007 if (cp->rc_cbinfo.cb_notified_of_cb_path_down == FALSE) {
5008 cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
5009 *cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
5010 } else {
5011 *cs->statusp = resp->status = NFS4_OK;
5012 }
5013 mutex_exit(cp->rc_cbinfo.cb_lock);
5014
5015 rfs4_client_rele(cp);
5016
5017 out:
5018 DTRACE_NFSV4_2(op__renew__done, struct compound_state *, cs,
5019 RENEW4res *, resp);
5020 }
5021
5022 /* ARGSUSED */
5023 static void
rfs4_op_restorefh(nfs_argop4 * args,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)5024 rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
5025 struct compound_state *cs)
5026 {
5027 RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
5028
5029 DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs);
5030
5031 /* No need to check cs->access - we are not accessing any object */
5032 if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
5033 *cs->statusp = resp->status = NFS4ERR_RESTOREFH;
5034 goto out;
5035 }
5036 if (cs->vp != NULL) {
5037 VN_RELE(cs->vp);
5038 }
5039 cs->vp = cs->saved_vp;
5040 cs->saved_vp = NULL;
5041 cs->exi = cs->saved_exi;
5042 nfs_fh4_copy(&cs->saved_fh, &cs->fh);
5043 *cs->statusp = resp->status = NFS4_OK;
5044 cs->deleg = FALSE;
5045
5046 out:
5047 DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs,
5048 RESTOREFH4res *, resp);
5049 }
5050
5051 /* ARGSUSED */
5052 static void
rfs4_op_savefh(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)5053 rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5054 struct compound_state *cs)
5055 {
5056 SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
5057
5058 DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs);
5059
5060 /* No need to check cs->access - we are not accessing any object */
5061 if (cs->vp == NULL) {
5062 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5063 goto out;
5064 }
5065 if (cs->saved_vp != NULL) {
5066 VN_RELE(cs->saved_vp);
5067 }
5068 cs->saved_vp = cs->vp;
5069 VN_HOLD(cs->saved_vp);
5070 cs->saved_exi = cs->exi;
5071 /*
5072 * since SAVEFH is fairly rare, don't alloc space for its fh
5073 * unless necessary.
5074 */
5075 if (cs->saved_fh.nfs_fh4_val == NULL) {
5076 cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
5077 }
5078 nfs_fh4_copy(&cs->fh, &cs->saved_fh);
5079 *cs->statusp = resp->status = NFS4_OK;
5080
5081 out:
5082 DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs,
5083 SAVEFH4res *, resp);
5084 }
5085
5086 /*
5087 * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
5088 * return the bitmap of attrs that were set successfully. It is also
5089 * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
5090 * always be called only after rfs4_do_set_attrs().
5091 *
5092 * Verify that the attributes are same as the expected ones. sargp->vap
5093 * and sargp->sbp contain the input attributes as translated from fattr4.
5094 *
5095 * This function verifies only the attrs that correspond to a vattr or
5096 * vfsstat struct. That is because of the extra step needed to get the
5097 * corresponding system structs. Other attributes have already been set or
5098 * verified by do_rfs4_set_attrs.
5099 *
5100 * Return 0 if all attrs match, -1 if some don't, error if error processing.
5101 */
5102 static int
rfs4_verify_attr(struct nfs4_svgetit_arg * sargp,bitmap4 * resp,struct nfs4_ntov_table * ntovp)5103 rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
5104 bitmap4 *resp, struct nfs4_ntov_table *ntovp)
5105 {
5106 int error, ret_error = 0;
5107 int i, k;
5108 uint_t sva_mask = sargp->vap->va_mask;
5109 uint_t vbit;
5110 union nfs4_attr_u *na;
5111 uint8_t *amap;
5112 bool_t getsb = ntovp->vfsstat;
5113
5114 if (sva_mask != 0) {
5115 /*
5116 * Okay to overwrite sargp->vap because we verify based
5117 * on the incoming values.
5118 */
5119 ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0,
5120 sargp->cs->cr, NULL);
5121 if (ret_error) {
5122 if (resp == NULL)
5123 return (ret_error);
5124 /*
5125 * Must return bitmap of successful attrs
5126 */
5127 sva_mask = 0; /* to prevent checking vap later */
5128 } else {
5129 /*
5130 * Some file systems clobber va_mask. it is probably
5131 * wrong of them to do so, nonethless we practice
5132 * defensive coding.
5133 * See bug id 4276830.
5134 */
5135 sargp->vap->va_mask = sva_mask;
5136 }
5137 }
5138
5139 if (getsb) {
5140 /*
5141 * Now get the superblock and loop on the bitmap, as there is
5142 * no simple way of translating from superblock to bitmap4.
5143 */
5144 ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
5145 if (ret_error) {
5146 if (resp == NULL)
5147 goto errout;
5148 getsb = FALSE;
5149 }
5150 }
5151
5152 /*
5153 * Now loop and verify each attribute which getattr returned
5154 * whether it's the same as the input.
5155 */
5156 if (resp == NULL && !getsb && (sva_mask == 0))
5157 goto errout;
5158
5159 na = ntovp->na;
5160 amap = ntovp->amap;
5161 k = 0;
5162 for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
5163 k = *amap;
5164 ASSERT(nfs4_ntov_map[k].nval == k);
5165 vbit = nfs4_ntov_map[k].vbit;
5166
5167 /*
5168 * If vattr attribute but VOP_GETATTR failed, or it's
5169 * superblock attribute but VFS_STATVFS failed, skip
5170 */
5171 if (vbit) {
5172 if ((vbit & sva_mask) == 0)
5173 continue;
5174 } else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
5175 continue;
5176 }
5177 error = (*nfs4_ntov_map[k].sv_getit)(NFS4ATTR_VERIT, sargp, na);
5178 if (resp != NULL) {
5179 if (error)
5180 ret_error = -1; /* not all match */
5181 else /* update response bitmap */
5182 *resp |= nfs4_ntov_map[k].fbit;
5183 continue;
5184 }
5185 if (error) {
5186 ret_error = -1; /* not all match */
5187 break;
5188 }
5189 }
5190 errout:
5191 return (ret_error);
5192 }
5193
5194 /*
5195 * Decode the attribute to be set/verified. If the attr requires a sys op
5196 * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't
5197 * call the sv_getit function for it, because the sys op hasn't yet been done.
5198 * Return 0 for success, error code if failed.
5199 *
5200 * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
5201 */
5202 static int
decode_fattr4_attr(nfs4_attr_cmd_t cmd,struct nfs4_svgetit_arg * sargp,int k,XDR * xdrp,bitmap4 * resp_bval,union nfs4_attr_u * nap)5203 decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
5204 int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
5205 {
5206 int error = 0;
5207 bool_t set_later;
5208
5209 sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
5210
5211 if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
5212 set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
5213 /*
5214 * don't verify yet if a vattr or sb dependent attr,
5215 * because we don't have their sys values yet.
5216 * Will be done later.
5217 */
5218 if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
5219 /*
5220 * ACLs are a special case, since setting the MODE
5221 * conflicts with setting the ACL. We delay setting
5222 * the ACL until all other attributes have been set.
5223 * The ACL gets set in do_rfs4_op_setattr().
5224 */
5225 if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
5226 error = (*nfs4_ntov_map[k].sv_getit)(cmd,
5227 sargp, nap);
5228 if (error) {
5229 xdr_free(nfs4_ntov_map[k].xfunc,
5230 (caddr_t)nap);
5231 }
5232 }
5233 }
5234 } else {
5235 #ifdef DEBUG
5236 cmn_err(CE_NOTE, "decode_fattr4_attr: error "
5237 "decoding attribute %d\n", k);
5238 #endif
5239 error = EINVAL;
5240 }
5241 if (!error && resp_bval && !set_later) {
5242 *resp_bval |= nfs4_ntov_map[k].fbit;
5243 }
5244
5245 return (error);
5246 }
5247
5248 /*
5249 * Set vattr based on incoming fattr4 attrs - used by setattr.
5250 * Set response mask. Ignore any values that are not writable vattr attrs.
5251 */
5252 static nfsstat4
do_rfs4_set_attrs(bitmap4 * resp,fattr4 * fattrp,struct compound_state * cs,struct nfs4_svgetit_arg * sargp,struct nfs4_ntov_table * ntovp,nfs4_attr_cmd_t cmd)5253 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5254 struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
5255 nfs4_attr_cmd_t cmd)
5256 {
5257 int error = 0;
5258 int i;
5259 char *attrs = fattrp->attrlist4;
5260 uint32_t attrslen = fattrp->attrlist4_len;
5261 XDR xdr;
5262 nfsstat4 status = NFS4_OK;
5263 vnode_t *vp = cs->vp;
5264 union nfs4_attr_u *na;
5265 uint8_t *amap;
5266
5267 #ifndef lint
5268 /*
5269 * Make sure that maximum attribute number can be expressed as an
5270 * 8 bit quantity.
5271 */
5272 ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
5273 #endif
5274
5275 if (vp == NULL) {
5276 if (resp)
5277 *resp = 0;
5278 return (NFS4ERR_NOFILEHANDLE);
5279 }
5280 if (cs->access == CS_ACCESS_DENIED) {
5281 if (resp)
5282 *resp = 0;
5283 return (NFS4ERR_ACCESS);
5284 }
5285
5286 sargp->op = cmd;
5287 sargp->cs = cs;
5288 sargp->flag = 0; /* may be set later */
5289 sargp->vap->va_mask = 0;
5290 sargp->rdattr_error = NFS4_OK;
5291 sargp->rdattr_error_req = FALSE;
5292 /* sargp->sbp is set by the caller */
5293
5294 xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
5295
5296 na = ntovp->na;
5297 amap = ntovp->amap;
5298
5299 /*
5300 * The following loop iterates on the nfs4_ntov_map checking
5301 * if the fbit is set in the requested bitmap.
5302 * If set then we process the arguments using the
5303 * rfs4_fattr4 conversion functions to populate the setattr
5304 * vattr and va_mask. Any settable attrs that are not using vattr
5305 * will be set in this loop.
5306 */
5307 for (i = 0; i < nfs4_ntov_map_size; i++) {
5308 if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
5309 continue;
5310 }
5311 /*
5312 * If setattr, must be a writable attr.
5313 * If verify/nverify, must be a readable attr.
5314 */
5315 if ((error = (*nfs4_ntov_map[i].sv_getit)(
5316 NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
5317 /*
5318 * Client tries to set/verify an
5319 * unsupported attribute, tries to set
5320 * a read only attr or verify a write
5321 * only one - error!
5322 */
5323 break;
5324 }
5325 /*
5326 * Decode the attribute to set/verify
5327 */
5328 error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
5329 &xdr, resp ? resp : NULL, na);
5330 if (error)
5331 break;
5332 *amap++ = (uint8_t)nfs4_ntov_map[i].nval;
5333 na++;
5334 (ntovp->attrcnt)++;
5335 if (nfs4_ntov_map[i].vfsstat)
5336 ntovp->vfsstat = TRUE;
5337 }
5338
5339 if (error != 0)
5340 status = (error == ENOTSUP ? NFS4ERR_ATTRNOTSUPP :
5341 puterrno4(error));
5342 /* xdrmem_destroy(&xdrs); */ /* NO-OP */
5343 return (status);
5344 }
5345
5346 static nfsstat4
do_rfs4_op_setattr(bitmap4 * resp,fattr4 * fattrp,struct compound_state * cs,stateid4 * stateid)5347 do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5348 stateid4 *stateid)
5349 {
5350 int error = 0;
5351 struct nfs4_svgetit_arg sarg;
5352 bool_t trunc;
5353
5354 nfsstat4 status = NFS4_OK;
5355 cred_t *cr = cs->cr;
5356 vnode_t *vp = cs->vp;
5357 struct nfs4_ntov_table ntov;
5358 struct statvfs64 sb;
5359 struct vattr bva;
5360 struct flock64 bf;
5361 int in_crit = 0;
5362 uint_t saved_mask = 0;
5363 caller_context_t ct;
5364
5365 *resp = 0;
5366 sarg.sbp = &sb;
5367 sarg.is_referral = B_FALSE;
5368 nfs4_ntov_table_init(&ntov);
5369 status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
5370 NFS4ATTR_SETIT);
5371 if (status != NFS4_OK) {
5372 /*
5373 * failed set attrs
5374 */
5375 goto done;
5376 }
5377
5378 if ((sarg.vap->va_mask == 0) &&
5379 (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
5380 /*
5381 * no further work to be done
5382 */
5383 goto done;
5384 }
5385
5386 /*
5387 * If we got a request to set the ACL and the MODE, only
5388 * allow changing VSUID, VSGID, and VSVTX. Attempting
5389 * to change any other bits, along with setting an ACL,
5390 * gives NFS4ERR_INVAL.
5391 */
5392 if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
5393 (fattrp->attrmask & FATTR4_MODE_MASK)) {
5394 vattr_t va;
5395
5396 va.va_mask = AT_MODE;
5397 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
5398 if (error) {
5399 status = puterrno4(error);
5400 goto done;
5401 }
5402 if ((sarg.vap->va_mode ^ va.va_mode) &
5403 ~(VSUID | VSGID | VSVTX)) {
5404 status = NFS4ERR_INVAL;
5405 goto done;
5406 }
5407 }
5408
5409 /* Check stateid only if size has been set */
5410 if (sarg.vap->va_mask & AT_SIZE) {
5411 trunc = (sarg.vap->va_size == 0);
5412 status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
5413 trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE, &ct, cs);
5414 if (status != NFS4_OK)
5415 goto done;
5416 } else {
5417 ct.cc_sysid = 0;
5418 ct.cc_pid = 0;
5419 ct.cc_caller_id = nfs4_srv_caller_id;
5420 ct.cc_flags = CC_DONTBLOCK;
5421 }
5422
5423 /* XXX start of possible race with delegations */
5424
5425 /*
5426 * We need to specially handle size changes because it is
5427 * possible for the client to create a file with read-only
5428 * modes, but with the file opened for writing. If the client
5429 * then tries to set the file size, e.g. ftruncate(3C),
5430 * fcntl(F_FREESP), the normal access checking done in
5431 * VOP_SETATTR would prevent the client from doing it even though
5432 * it should be allowed to do so. To get around this, we do the
5433 * access checking for ourselves and use VOP_SPACE which doesn't
5434 * do the access checking.
5435 * Also the client should not be allowed to change the file
5436 * size if there is a conflicting non-blocking mandatory lock in
5437 * the region of the change.
5438 */
5439 if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
5440 u_offset_t offset;
5441 ssize_t length;
5442
5443 /*
5444 * ufs_setattr clears AT_SIZE from vap->va_mask, but
5445 * before returning, sarg.vap->va_mask is used to
5446 * generate the setattr reply bitmap. We also clear
5447 * AT_SIZE below before calling VOP_SPACE. For both
5448 * of these cases, the va_mask needs to be saved here
5449 * and restored after calling VOP_SETATTR.
5450 */
5451 saved_mask = sarg.vap->va_mask;
5452
5453 /*
5454 * Check any possible conflict due to NBMAND locks.
5455 * Get into critical region before VOP_GETATTR, so the
5456 * size attribute is valid when checking conflicts.
5457 */
5458 if (nbl_need_check(vp)) {
5459 nbl_start_crit(vp, RW_READER);
5460 in_crit = 1;
5461 }
5462
5463 bva.va_mask = AT_UID|AT_SIZE;
5464 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
5465 if (error != 0) {
5466 status = puterrno4(error);
5467 goto done;
5468 }
5469
5470 if (in_crit) {
5471 if (sarg.vap->va_size < bva.va_size) {
5472 offset = sarg.vap->va_size;
5473 length = bva.va_size - sarg.vap->va_size;
5474 } else {
5475 offset = bva.va_size;
5476 length = sarg.vap->va_size - bva.va_size;
5477 }
5478 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
5479 &ct)) {
5480 status = NFS4ERR_LOCKED;
5481 goto done;
5482 }
5483 }
5484
5485 if (crgetuid(cr) == bva.va_uid) {
5486 sarg.vap->va_mask &= ~AT_SIZE;
5487 bf.l_type = F_WRLCK;
5488 bf.l_whence = 0;
5489 bf.l_start = (off64_t)sarg.vap->va_size;
5490 bf.l_len = 0;
5491 bf.l_sysid = 0;
5492 bf.l_pid = 0;
5493 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
5494 (offset_t)sarg.vap->va_size, cr, &ct);
5495 }
5496 }
5497
5498 if (!error && sarg.vap->va_mask != 0)
5499 error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
5500
5501 /* restore va_mask -- ufs_setattr clears AT_SIZE */
5502 if (saved_mask & AT_SIZE)
5503 sarg.vap->va_mask |= AT_SIZE;
5504
5505 /*
5506 * If an ACL was being set, it has been delayed until now,
5507 * in order to set the mode (via the VOP_SETATTR() above) first.
5508 */
5509 if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
5510 int i;
5511
5512 for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
5513 if (ntov.amap[i] == FATTR4_ACL)
5514 break;
5515 if (i < NFS4_MAXNUM_ATTRS) {
5516 error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
5517 NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
5518 if (error == 0) {
5519 *resp |= FATTR4_ACL_MASK;
5520 } else if (error == ENOTSUP) {
5521 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5522 status = NFS4ERR_ATTRNOTSUPP;
5523 goto done;
5524 }
5525 } else {
5526 NFS4_DEBUG(rfs4_debug,
5527 (CE_NOTE, "do_rfs4_op_setattr: "
5528 "unable to find ACL in fattr4"));
5529 error = EINVAL;
5530 }
5531 }
5532
5533 if (error) {
5534 /* check if a monitor detected a delegation conflict */
5535 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
5536 status = NFS4ERR_DELAY;
5537 else
5538 status = puterrno4(error);
5539
5540 /*
5541 * Set the response bitmap when setattr failed.
5542 * If VOP_SETATTR partially succeeded, test by doing a
5543 * VOP_GETATTR on the object and comparing the data
5544 * to the setattr arguments.
5545 */
5546 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5547 } else {
5548 /*
5549 * Force modified metadata out to stable storage.
5550 */
5551 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
5552 /*
5553 * Set response bitmap
5554 */
5555 nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp);
5556 }
5557
5558 /* Return early and already have a NFSv4 error */
5559 done:
5560 /*
5561 * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr
5562 * conversion sets both readable and writeable NFS4 attrs
5563 * for AT_MTIME and AT_ATIME. The line below masks out
5564 * unrequested attrs from the setattr result bitmap. This
5565 * is placed after the done: label to catch the ATTRNOTSUP
5566 * case.
5567 */
5568 *resp &= fattrp->attrmask;
5569
5570 if (in_crit)
5571 nbl_end_crit(vp);
5572
5573 nfs4_ntov_table_free(&ntov, &sarg);
5574
5575 return (status);
5576 }
5577
5578 /* ARGSUSED */
5579 static void
rfs4_op_setattr(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)5580 rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5581 struct compound_state *cs)
5582 {
5583 SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
5584 SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
5585 bslabel_t *clabel;
5586
5587 DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs,
5588 SETATTR4args *, args);
5589
5590 if (cs->vp == NULL) {
5591 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5592 goto out;
5593 }
5594
5595 /*
5596 * If there is an unshared filesystem mounted on this vnode,
5597 * do not allow to setattr on this vnode.
5598 */
5599 if (vn_ismntpt(cs->vp)) {
5600 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5601 goto out;
5602 }
5603
5604 resp->attrsset = 0;
5605
5606 if (rdonly4(req, cs)) {
5607 *cs->statusp = resp->status = NFS4ERR_ROFS;
5608 goto out;
5609 }
5610
5611 /* check label before setting attributes */
5612 if (is_system_labeled()) {
5613 ASSERT(req->rq_label != NULL);
5614 clabel = req->rq_label;
5615 DTRACE_PROBE2(tx__rfs4__log__info__opsetattr__clabel, char *,
5616 "got client label from request(1)",
5617 struct svc_req *, req);
5618 if (!blequal(&l_admin_low->tsl_label, clabel)) {
5619 if (!do_rfs_label_check(clabel, cs->vp,
5620 EQUALITY_CHECK, cs->exi)) {
5621 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5622 goto out;
5623 }
5624 }
5625 }
5626
5627 *cs->statusp = resp->status =
5628 do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
5629 &args->stateid);
5630
5631 out:
5632 DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs,
5633 SETATTR4res *, resp);
5634 }
5635
5636 /* ARGSUSED */
5637 static void
rfs4_op_verify(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)5638 rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5639 struct compound_state *cs)
5640 {
5641 /*
5642 * verify and nverify are exactly the same, except that nverify
5643 * succeeds when some argument changed, and verify succeeds when
5644 * when none changed.
5645 */
5646
5647 VERIFY4args *args = &argop->nfs_argop4_u.opverify;
5648 VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
5649
5650 int error;
5651 struct nfs4_svgetit_arg sarg;
5652 struct statvfs64 sb;
5653 struct nfs4_ntov_table ntov;
5654
5655 DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs,
5656 VERIFY4args *, args);
5657
5658 if (cs->vp == NULL) {
5659 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5660 goto out;
5661 }
5662
5663 sarg.sbp = &sb;
5664 sarg.is_referral = B_FALSE;
5665 nfs4_ntov_table_init(&ntov);
5666 resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5667 &sarg, &ntov, NFS4ATTR_VERIT);
5668 if (resp->status != NFS4_OK) {
5669 /*
5670 * do_rfs4_set_attrs will try to verify systemwide attrs,
5671 * so could return -1 for "no match".
5672 */
5673 if (resp->status == -1)
5674 resp->status = NFS4ERR_NOT_SAME;
5675 goto done;
5676 }
5677 error = rfs4_verify_attr(&sarg, NULL, &ntov);
5678 switch (error) {
5679 case 0:
5680 resp->status = NFS4_OK;
5681 break;
5682 case -1:
5683 resp->status = NFS4ERR_NOT_SAME;
5684 break;
5685 default:
5686 resp->status = puterrno4(error);
5687 break;
5688 }
5689 done:
5690 *cs->statusp = resp->status;
5691 nfs4_ntov_table_free(&ntov, &sarg);
5692 out:
5693 DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs,
5694 VERIFY4res *, resp);
5695 }
5696
5697 /* ARGSUSED */
5698 static void
rfs4_op_nverify(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)5699 rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5700 struct compound_state *cs)
5701 {
5702 /*
5703 * verify and nverify are exactly the same, except that nverify
5704 * succeeds when some argument changed, and verify succeeds when
5705 * when none changed.
5706 */
5707
5708 NVERIFY4args *args = &argop->nfs_argop4_u.opnverify;
5709 NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
5710
5711 int error;
5712 struct nfs4_svgetit_arg sarg;
5713 struct statvfs64 sb;
5714 struct nfs4_ntov_table ntov;
5715
5716 DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs,
5717 NVERIFY4args *, args);
5718
5719 if (cs->vp == NULL) {
5720 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5721 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5722 NVERIFY4res *, resp);
5723 return;
5724 }
5725 sarg.sbp = &sb;
5726 sarg.is_referral = B_FALSE;
5727 nfs4_ntov_table_init(&ntov);
5728 resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5729 &sarg, &ntov, NFS4ATTR_VERIT);
5730 if (resp->status != NFS4_OK) {
5731 /*
5732 * do_rfs4_set_attrs will try to verify systemwide attrs,
5733 * so could return -1 for "no match".
5734 */
5735 if (resp->status == -1)
5736 resp->status = NFS4_OK;
5737 goto done;
5738 }
5739 error = rfs4_verify_attr(&sarg, NULL, &ntov);
5740 switch (error) {
5741 case 0:
5742 resp->status = NFS4ERR_SAME;
5743 break;
5744 case -1:
5745 resp->status = NFS4_OK;
5746 break;
5747 default:
5748 resp->status = puterrno4(error);
5749 break;
5750 }
5751 done:
5752 *cs->statusp = resp->status;
5753 nfs4_ntov_table_free(&ntov, &sarg);
5754
5755 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5756 NVERIFY4res *, resp);
5757 }
5758
5759 /*
5760 * XXX - This should live in an NFS header file.
5761 */
5762 #define MAX_IOVECS 12
5763
5764 /* ARGSUSED */
5765 static void
rfs4_op_write(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)5766 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5767 struct compound_state *cs)
5768 {
5769 WRITE4args *args = &argop->nfs_argop4_u.opwrite;
5770 WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
5771 int error;
5772 vnode_t *vp;
5773 struct vattr bva;
5774 u_offset_t rlimit;
5775 struct uio uio;
5776 struct iovec iov[MAX_IOVECS];
5777 struct iovec *iovp;
5778 int iovcnt;
5779 int ioflag;
5780 cred_t *savecred, *cr;
5781 bool_t *deleg = &cs->deleg;
5782 nfsstat4 stat;
5783 int in_crit = 0;
5784 caller_context_t ct;
5785 nfs4_srv_t *nsrv4;
5786
5787 DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
5788 WRITE4args *, args);
5789
5790 vp = cs->vp;
5791 if (vp == NULL) {
5792 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5793 goto out;
5794 }
5795 if (cs->access == CS_ACCESS_DENIED) {
5796 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5797 goto out;
5798 }
5799
5800 cr = cs->cr;
5801
5802 if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
5803 deleg, TRUE, &ct, cs)) != NFS4_OK) {
5804 *cs->statusp = resp->status = stat;
5805 goto out;
5806 }
5807
5808 /*
5809 * We have to enter the critical region before calling VOP_RWLOCK
5810 * to avoid a deadlock with ufs.
5811 */
5812 if (nbl_need_check(vp)) {
5813 nbl_start_crit(vp, RW_READER);
5814 in_crit = 1;
5815 if (nbl_conflict(vp, NBL_WRITE,
5816 args->offset, args->data_len, 0, &ct)) {
5817 *cs->statusp = resp->status = NFS4ERR_LOCKED;
5818 goto out;
5819 }
5820 }
5821
5822 bva.va_mask = AT_MODE | AT_UID;
5823 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
5824
5825 /*
5826 * If we can't get the attributes, then we can't do the
5827 * right access checking. So, we'll fail the request.
5828 */
5829 if (error) {
5830 *cs->statusp = resp->status = puterrno4(error);
5831 goto out;
5832 }
5833
5834 if (rdonly4(req, cs)) {
5835 *cs->statusp = resp->status = NFS4ERR_ROFS;
5836 goto out;
5837 }
5838
5839 if (vp->v_type != VREG) {
5840 *cs->statusp = resp->status =
5841 ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
5842 goto out;
5843 }
5844
5845 if (crgetuid(cr) != bva.va_uid &&
5846 (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) {
5847 *cs->statusp = resp->status = puterrno4(error);
5848 goto out;
5849 }
5850
5851 if (MANDLOCK(vp, bva.va_mode)) {
5852 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5853 goto out;
5854 }
5855
5856 nsrv4 = nfs4_get_srv();
5857 if (args->data_len == 0) {
5858 *cs->statusp = resp->status = NFS4_OK;
5859 resp->count = 0;
5860 resp->committed = args->stable;
5861 resp->writeverf = nsrv4->write4verf;
5862 goto out;
5863 }
5864
5865 if (args->mblk != NULL) {
5866 mblk_t *m;
5867 uint_t bytes, round_len;
5868
5869 iovcnt = 0;
5870 bytes = 0;
5871 round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
5872 for (m = args->mblk;
5873 m != NULL && bytes < round_len;
5874 m = m->b_cont) {
5875 iovcnt++;
5876 bytes += MBLKL(m);
5877 }
5878 #ifdef DEBUG
5879 /* should have ended on an mblk boundary */
5880 if (bytes != round_len) {
5881 printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
5882 bytes, round_len, args->data_len);
5883 printf("args=%p, args->mblk=%p, m=%p", (void *)args,
5884 (void *)args->mblk, (void *)m);
5885 ASSERT(bytes == round_len);
5886 }
5887 #endif
5888 if (iovcnt <= MAX_IOVECS) {
5889 iovp = iov;
5890 } else {
5891 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
5892 }
5893 mblk_to_iov(args->mblk, iovcnt, iovp);
5894 } else if (args->rlist != NULL) {
5895 iovcnt = 1;
5896 iovp = iov;
5897 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
5898 iovp->iov_len = args->data_len;
5899 } else {
5900 iovcnt = 1;
5901 iovp = iov;
5902 iovp->iov_base = args->data_val;
5903 iovp->iov_len = args->data_len;
5904 }
5905
5906 uio.uio_iov = iovp;
5907 uio.uio_iovcnt = iovcnt;
5908
5909 uio.uio_segflg = UIO_SYSSPACE;
5910 uio.uio_extflg = UIO_COPY_DEFAULT;
5911 uio.uio_loffset = args->offset;
5912 uio.uio_resid = args->data_len;
5913 uio.uio_llimit = curproc->p_fsz_ctl;
5914 rlimit = uio.uio_llimit - args->offset;
5915 if (rlimit < (u_offset_t)uio.uio_resid)
5916 uio.uio_resid = (int)rlimit;
5917
5918 if (args->stable == UNSTABLE4)
5919 ioflag = 0;
5920 else if (args->stable == FILE_SYNC4)
5921 ioflag = FSYNC;
5922 else if (args->stable == DATA_SYNC4)
5923 ioflag = FDSYNC;
5924 else {
5925 if (iovp != iov)
5926 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5927 *cs->statusp = resp->status = NFS4ERR_INVAL;
5928 goto out;
5929 }
5930
5931 /*
5932 * We're changing creds because VM may fault and we need
5933 * the cred of the current thread to be used if quota
5934 * checking is enabled.
5935 */
5936 savecred = curthread->t_cred;
5937 curthread->t_cred = cr;
5938 error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
5939 curthread->t_cred = savecred;
5940
5941 if (iovp != iov)
5942 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5943
5944 if (error) {
5945 *cs->statusp = resp->status = puterrno4(error);
5946 goto out;
5947 }
5948
5949 *cs->statusp = resp->status = NFS4_OK;
5950 resp->count = args->data_len - uio.uio_resid;
5951
5952 if (ioflag == 0)
5953 resp->committed = UNSTABLE4;
5954 else
5955 resp->committed = FILE_SYNC4;
5956
5957 resp->writeverf = nsrv4->write4verf;
5958
5959 out:
5960 if (in_crit)
5961 nbl_end_crit(vp);
5962
5963 DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
5964 WRITE4res *, resp);
5965 }
5966
5967 static inline int
rfs4_opnum_in_range(const compound_state_t * cs,int opnum)5968 rfs4_opnum_in_range(const compound_state_t *cs, int opnum)
5969 {
5970 if (opnum < FIRST_NFS4_OP || opnum > LAST_NFS4_OP)
5971 return (0);
5972 else if (cs->minorversion == 0 && opnum > LAST_NFS40_OP)
5973 return (0);
5974 else if (cs->minorversion == 1 && opnum > LAST_NFS41_OP)
5975 return (0);
5976 else if (cs->minorversion == 2 && opnum > LAST_NFS42_OP)
5977 return (0);
5978 return (1);
5979 }
5980
5981 void
rfs4_compound(COMPOUND4args * args,COMPOUND4res * resp,compound_state_t * cs,struct svc_req * req,int * rv)5982 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, compound_state_t *cs,
5983 struct svc_req *req, int *rv)
5984 {
5985 uint_t i;
5986 cred_t *cr;
5987 nfs4_srv_t *nsrv4;
5988 nfs_export_t *ne = nfs_get_export();
5989
5990 if (rv != NULL)
5991 *rv = 0;
5992 /*
5993 * Form a reply tag by copying over the request tag.
5994 */
5995 resp->tag.utf8string_len = args->tag.utf8string_len;
5996 if (args->tag.utf8string_len != 0) {
5997 resp->tag.utf8string_val =
5998 kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5999 bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
6000 resp->tag.utf8string_len);
6001 } else {
6002 resp->tag.utf8string_val = NULL;
6003 }
6004
6005 cs->statusp = &resp->status;
6006 cs->req = req;
6007 cs->minorversion = args->minorversion;
6008 resp->array = NULL;
6009 resp->array_len = 0;
6010
6011 if (args->array_len == 0) {
6012 resp->status = NFS4_OK;
6013 return;
6014 }
6015
6016 cr = svc_xprt_cred(req->rq_xprt);
6017 ASSERT(cr != NULL);
6018
6019 if (sec_svc_getcred(req, cr, &cs->principal, &cs->nfsflavor) == 0) {
6020 DTRACE_NFSV4_2(compound__start, struct compound_state *,
6021 cs, COMPOUND4args *, args);
6022 DTRACE_NFSV4_2(compound__done, struct compound_state *,
6023 cs, COMPOUND4res *, resp);
6024 svcerr_badcred(req->rq_xprt);
6025 if (rv != NULL)
6026 *rv = 1;
6027 return;
6028 }
6029
6030 resp->array_len = args->array_len;
6031 resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
6032 KM_SLEEP);
6033
6034 cs->op_len = args->array_len;
6035 cs->basecr = cr;
6036 nsrv4 = nfs4_get_srv();
6037
6038 DTRACE_NFSV4_2(compound__start, struct compound_state *, cs,
6039 COMPOUND4args *, args);
6040
6041 /*
6042 * For now, NFS4 compound processing must be protected by
6043 * exported_lock because it can access more than one exportinfo
6044 * per compound and share/unshare can now change multiple
6045 * exinfo structs. The NFS2/3 code only refs 1 exportinfo
6046 * per proc (excluding public exinfo), and exi_count design
6047 * is sufficient to protect concurrent execution of NFS2/3
6048 * ops along with unexport. This lock will be removed as
6049 * part of the NFSv4 phase 2 namespace redesign work.
6050 */
6051 rw_enter(&ne->exported_lock, RW_READER);
6052
6053 /*
6054 * If this is the first compound we've seen, we need to start all
6055 * new instances' grace periods.
6056 */
6057 if (nsrv4->seen_first_compound == 0) {
6058 rfs4_grace_start_new(nsrv4);
6059 /*
6060 * This must be set after rfs4_grace_start_new(), otherwise
6061 * another thread could proceed past here before the former
6062 * is finished.
6063 */
6064 nsrv4->seen_first_compound = 1;
6065 }
6066
6067 for (i = 0; i < args->array_len && cs->cont; i++) {
6068 nfs_argop4 *argop;
6069 nfs_resop4 *resop;
6070 uint_t op;
6071 kstat_named_t *stat = ne->ne_globals->rfsproccnt[NFS_V4];
6072
6073 argop = &args->array[i];
6074 resop = &resp->array[i];
6075 resop->resop = argop->argop;
6076 op = (uint_t)resop->resop;
6077
6078 cs->op_pos = i;
6079 if (op < rfsv4disp_cnt && rfs4_opnum_in_range(cs, op)) {
6080 /*
6081 * Count the individual ops here; NULL and COMPOUND
6082 * are counted in common_dispatch()
6083 */
6084 stat[op].value.ui64++;
6085
6086 NFS4_DEBUG(rfs4_debug > 1,
6087 (CE_NOTE, "Executing %s", rfs4_op_string[op]));
6088 (*rfsv4disptab[op].dis_proc)(argop, resop, req, cs);
6089 NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
6090 rfs4_op_string[op], *cs->statusp));
6091 if (*cs->statusp != NFS4_OK)
6092 cs->cont = FALSE;
6093 } else {
6094 /*
6095 * This is effectively dead code since XDR code
6096 * will have already returned BADXDR if op doesn't
6097 * decode to legal value. This only done for a
6098 * day when XDR code doesn't verify v4 opcodes.
6099 */
6100 op = OP_ILLEGAL;
6101 stat[OP_ILLEGAL_IDX].value.ui64++;
6102
6103 rfs4_op_illegal(argop, resop, req, cs);
6104 cs->cont = FALSE;
6105 }
6106
6107 /*
6108 * If not at last op, and if we are to stop, then
6109 * compact the results array.
6110 */
6111 if ((i + 1) < args->array_len && !cs->cont) {
6112 nfs_resop4 *new_res = kmem_alloc(
6113 (i+1) * sizeof (nfs_resop4), KM_SLEEP);
6114 bcopy(resp->array,
6115 new_res, (i+1) * sizeof (nfs_resop4));
6116 kmem_free(resp->array,
6117 args->array_len * sizeof (nfs_resop4));
6118
6119 resp->array_len = i + 1;
6120 resp->array = new_res;
6121 }
6122 }
6123
6124 rw_exit(&ne->exported_lock);
6125
6126 DTRACE_NFSV4_2(compound__done, struct compound_state *, cs,
6127 COMPOUND4res *, resp);
6128
6129 /*
6130 * done with this compound request, free the label
6131 */
6132
6133 if (req->rq_label != NULL) {
6134 kmem_free(req->rq_label, sizeof (bslabel_t));
6135 req->rq_label = NULL;
6136 }
6137 }
6138
6139 /*
6140 * XXX because of what appears to be duplicate calls to rfs4_compound_free
6141 * XXX zero out the tag and array values. Need to investigate why the
6142 * XXX calls occur, but at least prevent the panic for now.
6143 */
6144 void
rfs4_compound_free(COMPOUND4res * resp)6145 rfs4_compound_free(COMPOUND4res *resp)
6146 {
6147 uint_t i;
6148
6149 if (resp->tag.utf8string_val) {
6150 UTF8STRING_FREE(resp->tag)
6151 }
6152
6153 for (i = 0; i < resp->array_len; i++) {
6154 nfs_resop4 *resop;
6155 uint_t op;
6156
6157 resop = &resp->array[i];
6158 op = (uint_t)resop->resop;
6159 if (op < rfsv4disp_cnt) {
6160 (*rfsv4disptab[op].dis_resfree)(resop);
6161 }
6162 }
6163 if (resp->array != NULL) {
6164 kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
6165 }
6166 }
6167
6168 /*
6169 * Process the value of the compound request rpc flags, as a bit-AND
6170 * of the individual per-op flags (idempotent, allowork, publicfh_ok)
6171 */
6172 void
rfs4_compound_flagproc(COMPOUND4args * args,int * flagp)6173 rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
6174 {
6175 int i;
6176 int flag = RPC_ALL;
6177
6178 for (i = 0; flag && i < args->array_len; i++) {
6179 uint_t op;
6180
6181 op = (uint_t)args->array[i].argop;
6182
6183 if (op < rfsv4disp_cnt)
6184 flag &= rfsv4disptab[op].dis_flags;
6185 else
6186 flag = 0;
6187 }
6188 *flagp = flag;
6189 }
6190
6191 nfsstat4
rfs4_client_sysid(rfs4_client_t * cp,sysid_t * sp)6192 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
6193 {
6194 nfsstat4 e;
6195
6196 rfs4_dbe_lock(cp->rc_dbe);
6197
6198 if (cp->rc_sysidt != LM_NOSYSID) {
6199 *sp = cp->rc_sysidt;
6200 e = NFS4_OK;
6201
6202 } else if ((cp->rc_sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
6203 *sp = cp->rc_sysidt;
6204 e = NFS4_OK;
6205
6206 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
6207 "rfs4_client_sysid: allocated 0x%x\n", *sp));
6208 } else
6209 e = NFS4ERR_DELAY;
6210
6211 rfs4_dbe_unlock(cp->rc_dbe);
6212 return (e);
6213 }
6214
6215 #if defined(DEBUG) && ! defined(lint)
lock_print(char * str,int operation,struct flock64 * flk)6216 static void lock_print(char *str, int operation, struct flock64 *flk)
6217 {
6218 char *op, *type;
6219
6220 switch (operation) {
6221 case F_GETLK: op = "F_GETLK";
6222 break;
6223 case F_SETLK: op = "F_SETLK";
6224 break;
6225 case F_SETLK_NBMAND: op = "F_SETLK_NBMAND";
6226 break;
6227 default: op = "F_UNKNOWN";
6228 break;
6229 }
6230 switch (flk->l_type) {
6231 case F_UNLCK: type = "F_UNLCK";
6232 break;
6233 case F_RDLCK: type = "F_RDLCK";
6234 break;
6235 case F_WRLCK: type = "F_WRLCK";
6236 break;
6237 default: type = "F_UNKNOWN";
6238 break;
6239 }
6240
6241 ASSERT(flk->l_whence == 0);
6242 cmn_err(CE_NOTE, "%s: %s, type = %s, off = %llx len = %llx pid = %d",
6243 str, op, type, (longlong_t)flk->l_start,
6244 flk->l_len ? (longlong_t)flk->l_len : ~0LL, flk->l_pid);
6245 }
6246
6247 #define LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
6248 #else
6249 #define LOCK_PRINT(d, s, t, f)
6250 #endif
6251
6252 /*ARGSUSED*/
6253 static bool_t
creds_ok(cred_set_t * cr_set,struct svc_req * req,struct compound_state * cs)6254 creds_ok(cred_set_t *cr_set, struct svc_req *req, struct compound_state *cs)
6255 {
6256 return (TRUE);
6257 }
6258
6259 /*
6260 * Look up the pathname using the vp in cs as the directory vnode.
6261 * cs->vp will be the vnode for the file on success
6262 */
6263
6264 static nfsstat4
rfs4_lookup(component4 * component,struct svc_req * req,struct compound_state * cs)6265 rfs4_lookup(component4 *component, struct svc_req *req,
6266 struct compound_state *cs)
6267 {
6268 char *nm;
6269 uint32_t len;
6270 nfsstat4 status;
6271 struct sockaddr *ca;
6272 char *name;
6273
6274 if (cs->vp == NULL) {
6275 return (NFS4ERR_NOFILEHANDLE);
6276 }
6277 if (cs->vp->v_type != VDIR) {
6278 return (NFS4ERR_NOTDIR);
6279 }
6280
6281 status = utf8_dir_verify(component);
6282 if (status != NFS4_OK)
6283 return (status);
6284
6285 nm = utf8_to_fn(component, &len, NULL);
6286 if (nm == NULL) {
6287 return (NFS4ERR_INVAL);
6288 }
6289
6290 if (len > MAXNAMELEN) {
6291 kmem_free(nm, len);
6292 return (NFS4ERR_NAMETOOLONG);
6293 }
6294
6295 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6296 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6297 MAXPATHLEN + 1);
6298
6299 if (name == NULL) {
6300 kmem_free(nm, len);
6301 return (NFS4ERR_INVAL);
6302 }
6303
6304 status = do_rfs4_op_lookup(name, req, cs);
6305
6306 if (name != nm)
6307 kmem_free(name, MAXPATHLEN + 1);
6308
6309 kmem_free(nm, len);
6310
6311 return (status);
6312 }
6313
6314 static nfsstat4
rfs4_lookupfile(component4 * component,struct svc_req * req,struct compound_state * cs,uint32_t access,change_info4 * cinfo)6315 rfs4_lookupfile(component4 *component, struct svc_req *req,
6316 struct compound_state *cs, uint32_t access, change_info4 *cinfo)
6317 {
6318 nfsstat4 status;
6319 vnode_t *dvp = cs->vp;
6320 vattr_t bva, ava, fva;
6321 int error;
6322
6323 /* Get "before" change value */
6324 bva.va_mask = AT_CTIME|AT_SEQ;
6325 error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6326 if (error)
6327 return (puterrno4(error));
6328
6329 /* rfs4_lookup may VN_RELE directory */
6330 VN_HOLD(dvp);
6331
6332 status = rfs4_lookup(component, req, cs);
6333 if (status != NFS4_OK) {
6334 VN_RELE(dvp);
6335 return (status);
6336 }
6337
6338 /*
6339 * Get "after" change value, if it fails, simply return the
6340 * before value.
6341 */
6342 ava.va_mask = AT_CTIME|AT_SEQ;
6343 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6344 ava.va_ctime = bva.va_ctime;
6345 ava.va_seq = 0;
6346 }
6347 VN_RELE(dvp);
6348
6349 /*
6350 * Validate the file is a file
6351 */
6352 fva.va_mask = AT_TYPE|AT_MODE;
6353 error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr, NULL);
6354 if (error)
6355 return (puterrno4(error));
6356
6357 if (fva.va_type != VREG) {
6358 if (fva.va_type == VDIR)
6359 return (NFS4ERR_ISDIR);
6360 if (fva.va_type == VLNK)
6361 return (NFS4ERR_SYMLINK);
6362 return (NFS4ERR_INVAL);
6363 }
6364
6365 NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
6366 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6367
6368 /*
6369 * It is undefined if VOP_LOOKUP will change va_seq, so
6370 * cinfo.atomic = TRUE only if we have
6371 * non-zero va_seq's, and they have not changed.
6372 */
6373 if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
6374 cinfo->atomic = TRUE;
6375 else
6376 cinfo->atomic = FALSE;
6377
6378 /* Check for mandatory locking */
6379 cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
6380 return (check_open_access(access, cs, req));
6381 }
6382
6383 static nfsstat4
create_vnode(vnode_t * dvp,char * nm,vattr_t * vap,createmode4 mode,cred_t * cr,vnode_t ** vpp,bool_t * created)6384 create_vnode(vnode_t *dvp, char *nm, vattr_t *vap, createmode4 mode,
6385 cred_t *cr, vnode_t **vpp, bool_t *created)
6386 {
6387 int error;
6388 nfsstat4 status = NFS4_OK;
6389 vattr_t va;
6390
6391 tryagain:
6392
6393 /*
6394 * The file open mode used is VWRITE. If the client needs
6395 * some other semantic, then it should do the access checking
6396 * itself. It would have been nice to have the file open mode
6397 * passed as part of the arguments.
6398 */
6399
6400 *created = TRUE;
6401 error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0, NULL, NULL);
6402
6403 if (error) {
6404 *created = FALSE;
6405
6406 /*
6407 * If we got something other than file already exists
6408 * then just return this error. Otherwise, we got
6409 * EEXIST. If we were doing a GUARDED create, then
6410 * just return this error. Otherwise, we need to
6411 * make sure that this wasn't a duplicate of an
6412 * exclusive create request.
6413 *
6414 * The assumption is made that a non-exclusive create
6415 * request will never return EEXIST.
6416 */
6417
6418 if (error != EEXIST || mode == GUARDED4) {
6419 status = puterrno4(error);
6420 return (status);
6421 }
6422 error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr,
6423 NULL, NULL, NULL);
6424
6425 if (error) {
6426 /*
6427 * We couldn't find the file that we thought that
6428 * we just created. So, we'll just try creating
6429 * it again.
6430 */
6431 if (error == ENOENT)
6432 goto tryagain;
6433
6434 status = puterrno4(error);
6435 return (status);
6436 }
6437
6438 if (mode == UNCHECKED4) {
6439 /* existing object must be regular file */
6440 if ((*vpp)->v_type != VREG) {
6441 if ((*vpp)->v_type == VDIR)
6442 status = NFS4ERR_ISDIR;
6443 else if ((*vpp)->v_type == VLNK)
6444 status = NFS4ERR_SYMLINK;
6445 else
6446 status = NFS4ERR_INVAL;
6447 VN_RELE(*vpp);
6448 return (status);
6449 }
6450
6451 return (NFS4_OK);
6452 }
6453
6454 /* Check for duplicate request */
6455 va.va_mask = AT_MTIME;
6456 error = VOP_GETATTR(*vpp, &va, 0, cr, NULL);
6457 if (!error) {
6458 /* We found the file */
6459 const timestruc_t *mtime = &vap->va_mtime;
6460
6461 if (va.va_mtime.tv_sec != mtime->tv_sec ||
6462 va.va_mtime.tv_nsec != mtime->tv_nsec) {
6463 /* but its not our creation */
6464 VN_RELE(*vpp);
6465 return (NFS4ERR_EXIST);
6466 }
6467 *created = TRUE; /* retrans of create == created */
6468 return (NFS4_OK);
6469 }
6470 VN_RELE(*vpp);
6471 return (NFS4ERR_EXIST);
6472 }
6473
6474 return (NFS4_OK);
6475 }
6476
6477 static nfsstat4
check_open_access(uint32_t access,struct compound_state * cs,struct svc_req * req)6478 check_open_access(uint32_t access, struct compound_state *cs,
6479 struct svc_req *req)
6480 {
6481 int error;
6482 vnode_t *vp;
6483 bool_t readonly;
6484 cred_t *cr = cs->cr;
6485
6486 /* For now we don't allow mandatory locking as per V2/V3 */
6487 if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
6488 return (NFS4ERR_ACCESS);
6489 }
6490
6491 vp = cs->vp;
6492 ASSERT(cr != NULL && vp->v_type == VREG);
6493
6494 /*
6495 * If the file system is exported read only and we are trying
6496 * to open for write, then return NFS4ERR_ROFS
6497 */
6498
6499 readonly = rdonly4(req, cs);
6500
6501 if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
6502 return (NFS4ERR_ROFS);
6503
6504 if (access & OPEN4_SHARE_ACCESS_READ) {
6505 if ((VOP_ACCESS(vp, VREAD, 0, cr, NULL) != 0) &&
6506 (VOP_ACCESS(vp, VEXEC, 0, cr, NULL) != 0)) {
6507 return (NFS4ERR_ACCESS);
6508 }
6509 }
6510
6511 if (access & OPEN4_SHARE_ACCESS_WRITE) {
6512 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
6513 if (error)
6514 return (NFS4ERR_ACCESS);
6515 }
6516
6517 return (NFS4_OK);
6518 }
6519
6520 static void
rfs4_verifier_to_mtime(verifier4 v,timestruc_t * mtime)6521 rfs4_verifier_to_mtime(verifier4 v, timestruc_t *mtime)
6522 {
6523 timespec32_t *time = (timespec32_t *)&v;
6524
6525 /*
6526 * Ensure no time overflows. Assumes underlying
6527 * filesystem supports at least 32 bits.
6528 * Truncate nsec to usec resolution to allow valid
6529 * compares even if the underlying filesystem truncates.
6530 */
6531 mtime->tv_sec = time->tv_sec % TIME32_MAX;
6532 mtime->tv_nsec = (time->tv_nsec / 1000) * 1000;
6533 }
6534
6535 static nfsstat4
rfs4_createfile(OPEN4args * args,struct svc_req * req,struct compound_state * cs,change_info4 * cinfo,bitmap4 * attrset,clientid4 clientid)6536 rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
6537 change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
6538 {
6539 struct nfs4_svgetit_arg sarg;
6540 struct nfs4_ntov_table ntov;
6541
6542 bool_t ntov_table_init = FALSE;
6543 struct statvfs64 sb;
6544 nfsstat4 status;
6545 vnode_t *vp;
6546 vattr_t bva, ava, iva, cva, *vap;
6547 vnode_t *dvp;
6548 char *nm = NULL;
6549 uint_t buflen;
6550 bool_t created;
6551 bool_t setsize = FALSE;
6552 len_t reqsize;
6553 int error;
6554 bool_t trunc;
6555 caller_context_t ct;
6556 component4 *component;
6557 bslabel_t *clabel;
6558 struct sockaddr *ca;
6559 char *name = NULL;
6560 fattr4 *fattr = NULL;
6561
6562 ASSERT(*attrset == 0);
6563
6564 sarg.sbp = &sb;
6565 sarg.is_referral = B_FALSE;
6566
6567 dvp = cs->vp;
6568
6569 /* Check if the file system is read only */
6570 if (rdonly4(req, cs))
6571 return (NFS4ERR_ROFS);
6572
6573 /* check the label of including directory */
6574 if (is_system_labeled()) {
6575 ASSERT(req->rq_label != NULL);
6576 clabel = req->rq_label;
6577 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
6578 "got client label from request(1)",
6579 struct svc_req *, req);
6580 if (!blequal(&l_admin_low->tsl_label, clabel)) {
6581 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
6582 cs->exi)) {
6583 return (NFS4ERR_ACCESS);
6584 }
6585 }
6586 }
6587
6588 if ((args->mode == EXCLUSIVE4 || args->mode == EXCLUSIVE4_1) &&
6589 dvp->v_flag & V_XATTRDIR) {
6590 /* prohibit EXCL create of named attributes */
6591 return (NFS4ERR_INVAL);
6592 }
6593
6594 /*
6595 * Get the last component of path name in nm. cs will reference
6596 * the including directory on success.
6597 */
6598 component = &args->claim.open_claim4_u.file;
6599 status = utf8_dir_verify(component);
6600 if (status != NFS4_OK)
6601 return (status);
6602
6603 nm = utf8_to_fn(component, &buflen, NULL);
6604
6605 if (nm == NULL)
6606 return (NFS4ERR_RESOURCE);
6607
6608 if (buflen > MAXNAMELEN) {
6609 kmem_free(nm, buflen);
6610 return (NFS4ERR_NAMETOOLONG);
6611 }
6612
6613 bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
6614 error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6615 if (error) {
6616 kmem_free(nm, buflen);
6617 return (puterrno4(error));
6618 }
6619
6620 if (bva.va_type != VDIR) {
6621 kmem_free(nm, buflen);
6622 return (NFS4ERR_NOTDIR);
6623 }
6624
6625 NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
6626
6627 switch (args->mode) {
6628 case GUARDED4:
6629 /*FALLTHROUGH*/
6630 case UNCHECKED4:
6631 case EXCLUSIVE4_1:
6632 nfs4_ntov_table_init(&ntov);
6633 ntov_table_init = TRUE;
6634
6635 if (args->mode == EXCLUSIVE4_1)
6636 fattr = &args->createhow4_u.ch_createboth.cva_attrs;
6637 else
6638 fattr = &args->createhow4_u.createattrs;
6639
6640 status = do_rfs4_set_attrs(attrset,
6641 fattr,
6642 cs, &sarg, &ntov, NFS4ATTR_SETIT);
6643
6644 if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
6645 sarg.vap->va_type != VREG) {
6646 if (sarg.vap->va_type == VDIR)
6647 status = NFS4ERR_ISDIR;
6648 else if (sarg.vap->va_type == VLNK)
6649 status = NFS4ERR_SYMLINK;
6650 else
6651 status = NFS4ERR_INVAL;
6652 }
6653
6654 if (status != NFS4_OK) {
6655 kmem_free(nm, buflen);
6656 nfs4_ntov_table_free(&ntov, &sarg);
6657 *attrset = 0;
6658 return (status);
6659 }
6660
6661 vap = sarg.vap;
6662 vap->va_type = VREG;
6663 vap->va_mask |= AT_TYPE;
6664
6665 if ((vap->va_mask & AT_MODE) == 0) {
6666 vap->va_mask |= AT_MODE;
6667 vap->va_mode = (mode_t)0600;
6668 }
6669
6670 if (vap->va_mask & AT_SIZE) {
6671
6672 /* Disallow create with a non-zero size */
6673
6674 if ((reqsize = sarg.vap->va_size) != 0) {
6675 kmem_free(nm, buflen);
6676 nfs4_ntov_table_free(&ntov, &sarg);
6677 *attrset = 0;
6678 return (NFS4ERR_INVAL);
6679 }
6680 setsize = TRUE;
6681 }
6682 if (args->mode == EXCLUSIVE4_1) {
6683 rfs4_verifier_to_mtime(
6684 args->createhow4_u.ch_createboth.cva_verf,
6685 &vap->va_mtime);
6686 /* attrset will be set later */
6687 fattr->attrmask |= FATTR4_TIME_MODIFY_MASK;
6688 vap->va_mask |= AT_MTIME;
6689 }
6690 break;
6691
6692 case EXCLUSIVE4:
6693 cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
6694 cva.va_type = VREG;
6695 cva.va_mode = (mode_t)0;
6696
6697 rfs4_verifier_to_mtime(args->createhow4_u.createverf,
6698 &cva.va_mtime);
6699
6700 vap = &cva;
6701
6702 /*
6703 * For EXCL create, attrset is set to the server attr
6704 * used to cache the client's verifier.
6705 */
6706 *attrset = FATTR4_TIME_MODIFY_MASK;
6707 break;
6708 }
6709
6710 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6711 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6712 MAXPATHLEN + 1);
6713
6714 if (name == NULL) {
6715 kmem_free(nm, buflen);
6716 return (NFS4ERR_SERVERFAULT);
6717 }
6718
6719 status = create_vnode(dvp, name, vap, args->mode,
6720 cs->cr, &vp, &created);
6721 if (nm != name)
6722 kmem_free(name, MAXPATHLEN + 1);
6723 kmem_free(nm, buflen);
6724
6725 if (status != NFS4_OK) {
6726 if (ntov_table_init)
6727 nfs4_ntov_table_free(&ntov, &sarg);
6728 *attrset = 0;
6729 return (status);
6730 }
6731
6732 trunc = (setsize && !created);
6733
6734 if (args->mode != EXCLUSIVE4) {
6735 bitmap4 createmask = fattr->attrmask;
6736
6737 /*
6738 * True verification that object was created with correct
6739 * attrs is impossible. The attrs could have been changed
6740 * immediately after object creation. If attributes did
6741 * not verify, the only recourse for the server is to
6742 * destroy the object. Maybe if some attrs (like gid)
6743 * are set incorrectly, the object should be destroyed;
6744 * however, seems bad as a default policy. Do we really
6745 * want to destroy an object over one of the times not
6746 * verifying correctly? For these reasons, the server
6747 * currently sets bits in attrset for createattrs
6748 * that were set; however, no verification is done.
6749 *
6750 * vmask_to_nmask accounts for vattr bits set on create
6751 * [do_rfs4_set_attrs() only sets resp bits for
6752 * non-vattr/vfs bits.]
6753 * Mask off any bits we set by default so as not to return
6754 * more attrset bits than were requested in createattrs
6755 */
6756 if (created) {
6757 nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
6758 *attrset &= createmask;
6759 } else {
6760 /*
6761 * We did not create the vnode (we tried but it
6762 * already existed). In this case, the only createattr
6763 * that the spec allows the server to set is size,
6764 * and even then, it can only be set if it is 0.
6765 */
6766 *attrset = 0;
6767 if (trunc)
6768 *attrset = FATTR4_SIZE_MASK;
6769 }
6770 }
6771 if (ntov_table_init)
6772 nfs4_ntov_table_free(&ntov, &sarg);
6773
6774 /*
6775 * Get the initial "after" sequence number, if it fails,
6776 * set to zero, time to before.
6777 */
6778 iva.va_mask = AT_CTIME|AT_SEQ;
6779 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) {
6780 iva.va_seq = 0;
6781 iva.va_ctime = bva.va_ctime;
6782 }
6783
6784 /*
6785 * create_vnode attempts to create the file exclusive,
6786 * if it already exists the VOP_CREATE will fail and
6787 * may not increase va_seq. It is atomic if
6788 * we haven't changed the directory, but if it has changed
6789 * we don't know what changed it.
6790 */
6791 if (!created) {
6792 if (bva.va_seq && iva.va_seq &&
6793 bva.va_seq == iva.va_seq)
6794 cinfo->atomic = TRUE;
6795 else
6796 cinfo->atomic = FALSE;
6797 NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
6798 } else {
6799 /*
6800 * The entry was created, we need to sync the
6801 * directory metadata.
6802 */
6803 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
6804
6805 /*
6806 * Get "after" change value, if it fails, simply return the
6807 * before value.
6808 */
6809 ava.va_mask = AT_CTIME|AT_SEQ;
6810 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6811 ava.va_ctime = bva.va_ctime;
6812 ava.va_seq = 0;
6813 }
6814
6815 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6816
6817 /*
6818 * The cinfo->atomic = TRUE only if we have
6819 * non-zero va_seq's, and it has incremented by exactly one
6820 * during the create_vnode and it didn't
6821 * change during the VOP_FSYNC.
6822 */
6823 if (bva.va_seq && iva.va_seq && ava.va_seq &&
6824 iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
6825 cinfo->atomic = TRUE;
6826 else
6827 cinfo->atomic = FALSE;
6828 }
6829
6830 /* Check for mandatory locking and that the size gets set. */
6831 cva.va_mask = AT_MODE;
6832 if (setsize)
6833 cva.va_mask |= AT_SIZE;
6834
6835 /* Assume the worst */
6836 cs->mandlock = TRUE;
6837
6838 if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) {
6839 cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6840
6841 /*
6842 * Truncate the file if necessary; this would be
6843 * the case for create over an existing file.
6844 */
6845
6846 if (trunc) {
6847 int in_crit = 0;
6848 rfs4_file_t *fp;
6849 nfs4_srv_t *nsrv4;
6850 bool_t create = FALSE;
6851
6852 /*
6853 * We are writing over an existing file.
6854 * Check to see if we need to recall a delegation.
6855 */
6856 nsrv4 = nfs4_get_srv();
6857 rfs4_hold_deleg_policy(nsrv4);
6858 if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6859 if (rfs4_check_delegated_byfp(FWRITE, fp,
6860 (reqsize == 0), FALSE, FALSE, &clientid)) {
6861 rfs4_file_rele(fp);
6862 rfs4_rele_deleg_policy(nsrv4);
6863 VN_RELE(vp);
6864 *attrset = 0;
6865 return (NFS4ERR_DELAY);
6866 }
6867 rfs4_file_rele(fp);
6868 }
6869 rfs4_rele_deleg_policy(nsrv4);
6870
6871 if (nbl_need_check(vp)) {
6872 in_crit = 1;
6873
6874 ASSERT(reqsize == 0);
6875
6876 nbl_start_crit(vp, RW_READER);
6877 if (nbl_conflict(vp, NBL_WRITE, 0,
6878 cva.va_size, 0, NULL)) {
6879 in_crit = 0;
6880 nbl_end_crit(vp);
6881 VN_RELE(vp);
6882 *attrset = 0;
6883 return (NFS4ERR_ACCESS);
6884 }
6885 }
6886 ct.cc_sysid = 0;
6887 ct.cc_pid = 0;
6888 ct.cc_caller_id = nfs4_srv_caller_id;
6889 ct.cc_flags = CC_DONTBLOCK;
6890
6891 cva.va_mask = AT_SIZE;
6892 cva.va_size = reqsize;
6893 (void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct);
6894 if (in_crit)
6895 nbl_end_crit(vp);
6896 }
6897 }
6898
6899 error = makefh4(&cs->fh, vp, cs->exi);
6900
6901 /*
6902 * Force modified data and metadata out to stable storage.
6903 */
6904 (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
6905
6906 if (error) {
6907 VN_RELE(vp);
6908 *attrset = 0;
6909 return (puterrno4(error));
6910 }
6911
6912 /* if parent dir is attrdir, set namedattr fh flag */
6913 if (dvp->v_flag & V_XATTRDIR)
6914 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
6915
6916 if (cs->vp)
6917 VN_RELE(cs->vp);
6918
6919 cs->vp = vp;
6920
6921 /*
6922 * if we did not create the file, we will need to check
6923 * the access bits on the file
6924 */
6925
6926 if (!created) {
6927 if (setsize)
6928 args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
6929 status = check_open_access(args->share_access, cs, req);
6930 if (status != NFS4_OK)
6931 *attrset = 0;
6932 }
6933 return (status);
6934 }
6935
6936 /*ARGSUSED*/
6937 static void
rfs4_do_open(struct compound_state * cs,struct svc_req * req,rfs4_openowner_t * oo,delegreq_t deleg,uint32_t access,uint32_t deny,OPEN4res * resp,int deleg_cur)6938 rfs4_do_open(struct compound_state *cs, struct svc_req *req,
6939 rfs4_openowner_t *oo, delegreq_t deleg,
6940 uint32_t access, uint32_t deny,
6941 OPEN4res *resp, int deleg_cur)
6942 {
6943 /* XXX Currently not using req */
6944 rfs4_state_t *sp;
6945 rfs4_file_t *fp;
6946 bool_t screate = TRUE;
6947 bool_t fcreate = TRUE;
6948 uint32_t open_a, share_a;
6949 uint32_t open_d, share_d;
6950 rfs4_deleg_state_t *dsp;
6951 sysid_t sysid;
6952 nfsstat4 status;
6953 caller_context_t ct;
6954 int fflags = 0;
6955 int recall = 0;
6956 int err;
6957 int first_open;
6958
6959 /* get the file struct and hold a lock on it during initial open */
6960 fp = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
6961 if (fp == NULL) {
6962 resp->status = NFS4ERR_RESOURCE;
6963 DTRACE_PROBE1(nfss__e__do__open1, nfsstat4, resp->status);
6964 return;
6965 }
6966
6967 sp = rfs4_findstate_by_owner_file(oo, fp, &screate);
6968 if (sp == NULL) {
6969 resp->status = NFS4ERR_RESOURCE;
6970 DTRACE_PROBE1(nfss__e__do__open2, nfsstat4, resp->status);
6971 /* No need to keep any reference */
6972 rw_exit(&fp->rf_file_rwlock);
6973 rfs4_file_rele(fp);
6974 return;
6975 }
6976
6977 /* try to get the sysid before continuing */
6978 if ((status = rfs4_client_sysid(oo->ro_client, &sysid)) != NFS4_OK) {
6979 resp->status = status;
6980 rfs4_file_rele(fp);
6981 /* Not a fully formed open; "close" it */
6982 if (screate == TRUE)
6983 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6984 rfs4_state_rele(sp);
6985 return;
6986 }
6987
6988 /* Calculate the fflags for this OPEN. */
6989 if (access & OPEN4_SHARE_ACCESS_READ)
6990 fflags |= FREAD;
6991 if (access & OPEN4_SHARE_ACCESS_WRITE)
6992 fflags |= FWRITE;
6993
6994 rfs4_dbe_lock(sp->rs_dbe);
6995
6996 /*
6997 * Calculate the new deny and access mode that this open is adding to
6998 * the file for this open owner;
6999 */
7000 open_d = (deny & ~sp->rs_open_deny);
7001 open_a = (access & ~sp->rs_open_access);
7002
7003 /*
7004 * Calculate the new share access and share deny modes that this open
7005 * is adding to the file for this open owner;
7006 */
7007 share_a = (access & ~sp->rs_share_access);
7008 share_d = (deny & ~sp->rs_share_deny);
7009
7010 first_open = (sp->rs_open_access & OPEN4_SHARE_ACCESS_BOTH) == 0;
7011
7012 /*
7013 * Check to see the client has already sent an open for this
7014 * open owner on this file with the same share/deny modes.
7015 * If so, we don't need to check for a conflict and we don't
7016 * need to add another shrlock. If not, then we need to
7017 * check for conflicts in deny and access before checking for
7018 * conflicts in delegation. We don't want to recall a
7019 * delegation based on an open that will eventually fail based
7020 * on shares modes.
7021 */
7022
7023 if (share_a || share_d) {
7024 if ((err = rfs4_share(sp, access, deny)) != 0) {
7025 rfs4_dbe_unlock(sp->rs_dbe);
7026 resp->status = err;
7027
7028 rfs4_file_rele(fp);
7029 /* Not a fully formed open; "close" it */
7030 if (screate == TRUE)
7031 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
7032 rfs4_state_rele(sp);
7033 return;
7034 }
7035 }
7036
7037 rfs4_dbe_lock(fp->rf_dbe);
7038
7039 /*
7040 * Check to see if this file is delegated and if so, if a
7041 * recall needs to be done.
7042 */
7043 if (rfs4_check_recall(sp, access)) {
7044 rfs4_dbe_unlock(fp->rf_dbe);
7045 rfs4_dbe_unlock(sp->rs_dbe);
7046 rfs4_recall_deleg(fp, FALSE, sp->rs_owner->ro_client);
7047 delay(NFS4_DELEGATION_CONFLICT_DELAY);
7048 rfs4_dbe_lock(sp->rs_dbe);
7049
7050 /* if state closed while lock was dropped */
7051 if (sp->rs_closed) {
7052 if (share_a || share_d)
7053 (void) rfs4_unshare(sp);
7054 rfs4_dbe_unlock(sp->rs_dbe);
7055 rfs4_file_rele(fp);
7056 /* Not a fully formed open; "close" it */
7057 if (screate == TRUE)
7058 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
7059 rfs4_state_rele(sp);
7060 resp->status = NFS4ERR_OLD_STATEID;
7061 return;
7062 }
7063
7064 rfs4_dbe_lock(fp->rf_dbe);
7065 /* Let's see if the delegation was returned */
7066 if (rfs4_check_recall(sp, access)) {
7067 rfs4_dbe_unlock(fp->rf_dbe);
7068 if (share_a || share_d)
7069 (void) rfs4_unshare(sp);
7070 rfs4_dbe_unlock(sp->rs_dbe);
7071 rfs4_file_rele(fp);
7072 rfs4_update_lease(sp->rs_owner->ro_client);
7073
7074 /* Not a fully formed open; "close" it */
7075 if (screate == TRUE)
7076 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
7077 rfs4_state_rele(sp);
7078 resp->status = NFS4ERR_DELAY;
7079 return;
7080 }
7081 }
7082 /*
7083 * the share check passed and any delegation conflict has been
7084 * taken care of, now call vop_open.
7085 * if this is the first open then call vop_open with fflags.
7086 * if not, call vn_open_upgrade with just the upgrade flags.
7087 *
7088 * if the file has been opened already, it will have the current
7089 * access mode in the state struct. if it has no share access, then
7090 * this is a new open.
7091 *
7092 * However, if this is open with CLAIM_DLEGATE_CUR, then don't
7093 * call VOP_OPEN(), just do the open upgrade.
7094 */
7095 if (first_open && !deleg_cur) {
7096 ct.cc_sysid = sysid;
7097 ct.cc_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
7098 ct.cc_caller_id = nfs4_srv_caller_id;
7099 ct.cc_flags = CC_DONTBLOCK;
7100 err = VOP_OPEN(&cs->vp, fflags, cs->cr, &ct);
7101 if (err) {
7102 rfs4_dbe_unlock(fp->rf_dbe);
7103 if (share_a || share_d)
7104 (void) rfs4_unshare(sp);
7105 rfs4_dbe_unlock(sp->rs_dbe);
7106 rfs4_file_rele(fp);
7107
7108 /* Not a fully formed open; "close" it */
7109 if (screate == TRUE)
7110 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
7111 rfs4_state_rele(sp);
7112 /* check if a monitor detected a delegation conflict */
7113 if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
7114 resp->status = NFS4ERR_DELAY;
7115 else
7116 resp->status = NFS4ERR_SERVERFAULT;
7117 return;
7118 }
7119 } else { /* open upgrade */
7120 /*
7121 * calculate the fflags for the new mode that is being added
7122 * by this upgrade.
7123 */
7124 fflags = 0;
7125 if (open_a & OPEN4_SHARE_ACCESS_READ)
7126 fflags |= FREAD;
7127 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
7128 fflags |= FWRITE;
7129 vn_open_upgrade(cs->vp, fflags);
7130 }
7131 sp->rs_open_access |= access;
7132 sp->rs_open_deny |= deny;
7133
7134 if (open_d & OPEN4_SHARE_DENY_READ)
7135 fp->rf_deny_read++;
7136 if (open_d & OPEN4_SHARE_DENY_WRITE)
7137 fp->rf_deny_write++;
7138 fp->rf_share_deny |= deny;
7139
7140 if (open_a & OPEN4_SHARE_ACCESS_READ)
7141 fp->rf_access_read++;
7142 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
7143 fp->rf_access_write++;
7144 fp->rf_share_access |= access;
7145
7146 /*
7147 * Check for delegation here. if the deleg argument is not
7148 * DELEG_ANY, then this is a reclaim from a client and
7149 * we must honor the delegation requested. If necessary we can
7150 * set the recall flag.
7151 */
7152
7153 dsp = rfs4_grant_delegation(deleg, sp, &recall);
7154
7155 cs->deleg = (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE);
7156
7157 next_stateid(&sp->rs_stateid);
7158
7159 resp->stateid = sp->rs_stateid.stateid;
7160
7161 rfs4_dbe_unlock(fp->rf_dbe);
7162 rfs4_dbe_unlock(sp->rs_dbe);
7163
7164 if (dsp) {
7165 rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
7166 rfs4_deleg_state_rele(dsp);
7167 }
7168
7169 rfs4_file_rele(fp);
7170 rfs4_state_rele(sp);
7171
7172 resp->status = NFS4_OK;
7173 }
7174
7175 /*ARGSUSED*/
7176 static void
rfs4_do_openfh(struct compound_state * cs,struct svc_req * req,OPEN4args * args,rfs4_openowner_t * oo,OPEN4res * resp)7177 rfs4_do_openfh(struct compound_state *cs, struct svc_req *req, OPEN4args *args,
7178 rfs4_openowner_t *oo, OPEN4res *resp)
7179 {
7180 /* cs->vp and cs->fh have been updated by putfh. */
7181 rfs4_do_open(cs, req, oo, DELEG_ANY,
7182 (args->share_access & 0xff), args->share_deny, resp, 0);
7183 }
7184
7185 /*ARGSUSED*/
7186 static void
rfs4_do_opennull(struct compound_state * cs,struct svc_req * req,OPEN4args * args,rfs4_openowner_t * oo,OPEN4res * resp)7187 rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
7188 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7189 {
7190 change_info4 *cinfo = &resp->cinfo;
7191 bitmap4 *attrset = &resp->attrset;
7192
7193 if (args->opentype == OPEN4_NOCREATE)
7194 resp->status = rfs4_lookupfile(&args->claim.open_claim4_u.file,
7195 req, cs, args->share_access, cinfo);
7196 else {
7197 /* inhibit delegation grants during exclusive create */
7198
7199 if (args->mode == EXCLUSIVE4)
7200 rfs4_disable_delegation();
7201
7202 resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
7203 oo->ro_client->rc_clientid);
7204 }
7205
7206 if (resp->status == NFS4_OK) {
7207
7208 /* cs->vp cs->fh now reference the desired file */
7209
7210 rfs4_do_open(cs, req, oo,
7211 oo->ro_need_confirm ? DELEG_NONE : DELEG_ANY,
7212 args->share_access, args->share_deny, resp, 0);
7213
7214 /*
7215 * If rfs4_createfile set attrset, we must
7216 * clear this attrset before the response is copied.
7217 */
7218 if (resp->status != NFS4_OK && resp->attrset) {
7219 resp->attrset = 0;
7220 }
7221 }
7222 else
7223 *cs->statusp = resp->status;
7224
7225 if (args->mode == EXCLUSIVE4)
7226 rfs4_enable_delegation();
7227 }
7228
7229 /*ARGSUSED*/
7230 static void
rfs4_do_openprev(struct compound_state * cs,struct svc_req * req,OPEN4args * args,rfs4_openowner_t * oo,OPEN4res * resp)7231 rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
7232 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7233 {
7234 change_info4 *cinfo = &resp->cinfo;
7235 vattr_t va;
7236 vtype_t v_type = cs->vp->v_type;
7237 int error = 0;
7238
7239 /* Verify that we have a regular file */
7240 if (v_type != VREG) {
7241 if (v_type == VDIR)
7242 resp->status = NFS4ERR_ISDIR;
7243 else if (v_type == VLNK)
7244 resp->status = NFS4ERR_SYMLINK;
7245 else
7246 resp->status = NFS4ERR_INVAL;
7247 return;
7248 }
7249
7250 va.va_mask = AT_MODE|AT_UID;
7251 error = VOP_GETATTR(cs->vp, &va, 0, cs->cr, NULL);
7252 if (error) {
7253 resp->status = puterrno4(error);
7254 return;
7255 }
7256
7257 cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
7258
7259 /*
7260 * Check if we have access to the file, Note the the file
7261 * could have originally been open UNCHECKED or GUARDED
7262 * with mode bits that will now fail, but there is nothing
7263 * we can really do about that except in the case that the
7264 * owner of the file is the one requesting the open.
7265 */
7266 if (crgetuid(cs->cr) != va.va_uid) {
7267 resp->status = check_open_access(args->share_access, cs, req);
7268 if (resp->status != NFS4_OK) {
7269 return;
7270 }
7271 }
7272
7273 /*
7274 * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
7275 */
7276 cinfo->before = 0;
7277 cinfo->after = 0;
7278 cinfo->atomic = FALSE;
7279
7280 rfs4_do_open(cs, req, oo,
7281 NFS4_DELEG4TYPE2REQTYPE(args->claim.open_claim4_u.delegate_type),
7282 args->share_access, args->share_deny, resp, 0);
7283 }
7284
7285 static void
rfs4_do_opendelcur(struct compound_state * cs,struct svc_req * req,OPEN4args * args,rfs4_openowner_t * oo,OPEN4res * resp)7286 rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
7287 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7288 {
7289 int error;
7290 nfsstat4 status;
7291 stateid4 stateid =
7292 args->claim.open_claim4_u.delegate_cur_info.delegate_stateid;
7293 rfs4_deleg_state_t *dsp;
7294
7295 /*
7296 * Find the state info from the stateid and confirm that the
7297 * file is delegated. If the state openowner is the same as
7298 * the supplied openowner we're done. If not, get the file
7299 * info from the found state info. Use that file info to
7300 * create the state for this lock owner. Note solaris doen't
7301 * really need the pathname to find the file. We may want to
7302 * lookup the pathname and make sure that the vp exist and
7303 * matches the vp in the file structure. However it is
7304 * possible that the pathname nolonger exists (local process
7305 * unlinks the file), so this may not be that useful.
7306 */
7307
7308 status = rfs4_get_deleg_state(&stateid, &dsp);
7309 if (status != NFS4_OK) {
7310 resp->status = status;
7311 return;
7312 }
7313
7314 ASSERT(dsp->rds_finfo->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE);
7315
7316 /*
7317 * New lock owner, create state. Since this was probably called
7318 * in response to a CB_RECALL we set deleg to DELEG_NONE
7319 */
7320
7321 ASSERT(cs->vp != NULL);
7322 VN_RELE(cs->vp);
7323 VN_HOLD(dsp->rds_finfo->rf_vp);
7324 cs->vp = dsp->rds_finfo->rf_vp;
7325
7326 error = makefh4(&cs->fh, cs->vp, cs->exi);
7327 if (error != 0) {
7328 rfs4_deleg_state_rele(dsp);
7329 *cs->statusp = resp->status = puterrno4(error);
7330 return;
7331 }
7332
7333 /* Mark progress for delegation returns */
7334 dsp->rds_finfo->rf_dinfo.rd_time_lastwrite = gethrestime_sec();
7335 rfs4_deleg_state_rele(dsp);
7336 rfs4_do_open(cs, req, oo, DELEG_NONE,
7337 args->share_access, args->share_deny, resp, 1);
7338 }
7339
7340 /*ARGSUSED*/
7341 static void
rfs4_do_opendelprev(struct compound_state * cs,struct svc_req * req,OPEN4args * args,rfs4_openowner_t * oo,OPEN4res * resp)7342 rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
7343 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7344 {
7345 /*
7346 * Lookup the pathname, it must already exist since this file
7347 * was delegated.
7348 *
7349 * Find the file and state info for this vp and open owner pair.
7350 * check that they are in fact delegated.
7351 * check that the state access and deny modes are the same.
7352 *
7353 * Return the delgation possibly seting the recall flag.
7354 */
7355 rfs4_file_t *fp;
7356 rfs4_state_t *sp;
7357 bool_t create = FALSE;
7358 bool_t dcreate = FALSE;
7359 rfs4_deleg_state_t *dsp;
7360 nfsace4 *ace;
7361
7362 /* Note we ignore oflags */
7363 resp->status = rfs4_lookupfile(
7364 &args->claim.open_claim4_u.file_delegate_prev,
7365 req, cs, args->share_access, &resp->cinfo);
7366
7367 if (resp->status != NFS4_OK) {
7368 return;
7369 }
7370
7371 /* get the file struct and hold a lock on it during initial open */
7372 fp = rfs4_findfile_withlock(cs->vp, NULL, &create);
7373 if (fp == NULL) {
7374 resp->status = NFS4ERR_RESOURCE;
7375 DTRACE_PROBE1(nfss__e__do_opendelprev1, nfsstat4, resp->status);
7376 return;
7377 }
7378
7379 sp = rfs4_findstate_by_owner_file(oo, fp, &create);
7380 if (sp == NULL) {
7381 resp->status = NFS4ERR_SERVERFAULT;
7382 DTRACE_PROBE1(nfss__e__do_opendelprev2, nfsstat4, resp->status);
7383 rw_exit(&fp->rf_file_rwlock);
7384 rfs4_file_rele(fp);
7385 return;
7386 }
7387
7388 rfs4_dbe_lock(sp->rs_dbe);
7389 rfs4_dbe_lock(fp->rf_dbe);
7390 if (args->share_access != sp->rs_share_access ||
7391 args->share_deny != sp->rs_share_deny ||
7392 sp->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
7393 NFS4_DEBUG(rfs4_debug,
7394 (CE_NOTE, "rfs4_do_opendelprev: state mixup"));
7395 rfs4_dbe_unlock(fp->rf_dbe);
7396 rfs4_dbe_unlock(sp->rs_dbe);
7397 rfs4_file_rele(fp);
7398 rfs4_state_rele(sp);
7399 resp->status = NFS4ERR_SERVERFAULT;
7400 return;
7401 }
7402 rfs4_dbe_unlock(fp->rf_dbe);
7403 rfs4_dbe_unlock(sp->rs_dbe);
7404
7405 dsp = rfs4_finddeleg(sp, &dcreate);
7406 if (dsp == NULL) {
7407 rfs4_state_rele(sp);
7408 rfs4_file_rele(fp);
7409 resp->status = NFS4ERR_SERVERFAULT;
7410 return;
7411 }
7412
7413 next_stateid(&sp->rs_stateid);
7414
7415 resp->stateid = sp->rs_stateid.stateid;
7416
7417 resp->delegation.delegation_type = dsp->rds_dtype;
7418
7419 if (dsp->rds_dtype == OPEN_DELEGATE_READ) {
7420 open_read_delegation4 *rv =
7421 &resp->delegation.open_delegation4_u.read;
7422
7423 rv->stateid = dsp->rds_delegid.stateid;
7424 rv->recall = FALSE; /* no policy in place to set to TRUE */
7425 ace = &rv->permissions;
7426 } else {
7427 open_write_delegation4 *rv =
7428 &resp->delegation.open_delegation4_u.write;
7429
7430 rv->stateid = dsp->rds_delegid.stateid;
7431 rv->recall = FALSE; /* no policy in place to set to TRUE */
7432 ace = &rv->permissions;
7433 rv->space_limit.limitby = NFS_LIMIT_SIZE;
7434 rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
7435 }
7436
7437 /* XXX For now */
7438 ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
7439 ace->flag = 0;
7440 ace->access_mask = 0;
7441 ace->who.utf8string_len = 0;
7442 ace->who.utf8string_val = 0;
7443
7444 rfs4_deleg_state_rele(dsp);
7445 rfs4_state_rele(sp);
7446 rfs4_file_rele(fp);
7447 }
7448
7449 typedef enum {
7450 NFS4_CHKSEQ_OKAY = 0,
7451 NFS4_CHKSEQ_REPLAY = 1,
7452 NFS4_CHKSEQ_BAD = 2
7453 } rfs4_chkseq_t;
7454
7455 /*
7456 * Generic function for sequence number checks.
7457 */
7458 static rfs4_chkseq_t
rfs4_check_seqid(seqid4 seqid,nfs_resop4 * lastop,seqid4 rqst_seq,nfs_resop4 * resop,bool_t copyres)7459 rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
7460 seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
7461 {
7462 /* Same sequence ids and matching operations? */
7463 if (seqid == rqst_seq && resop->resop == lastop->resop) {
7464 if (copyres == TRUE) {
7465 rfs4_free_reply(resop);
7466 rfs4_copy_reply(resop, lastop);
7467 }
7468 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
7469 "Replayed SEQID %d\n", seqid));
7470 return (NFS4_CHKSEQ_REPLAY);
7471 }
7472
7473 /* If the incoming sequence is not the next expected then it is bad */
7474 if (rqst_seq != seqid + 1) {
7475 if (rqst_seq == seqid) {
7476 NFS4_DEBUG(rfs4_debug,
7477 (CE_NOTE, "BAD SEQID: Replayed sequence id "
7478 "but last op was %d current op is %d\n",
7479 lastop->resop, resop->resop));
7480 return (NFS4_CHKSEQ_BAD);
7481 }
7482 NFS4_DEBUG(rfs4_debug,
7483 (CE_NOTE, "BAD SEQID: got %u expecting %u\n",
7484 rqst_seq, seqid));
7485 return (NFS4_CHKSEQ_BAD);
7486 }
7487
7488 /* Everything okay -- next expected */
7489 return (NFS4_CHKSEQ_OKAY);
7490 }
7491
7492
7493 static rfs4_chkseq_t
rfs4_check_open_seqid(seqid4 seqid,rfs4_openowner_t * op,nfs_resop4 * resop,const compound_state_t * cs)7494 rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop,
7495 const compound_state_t *cs)
7496 {
7497 rfs4_chkseq_t rc;
7498
7499 if (rfs4_has_session(cs))
7500 return (NFS4_CHKSEQ_OKAY);
7501
7502 rfs4_dbe_lock(op->ro_dbe);
7503 rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, seqid, resop,
7504 TRUE);
7505 rfs4_dbe_unlock(op->ro_dbe);
7506
7507 if (rc == NFS4_CHKSEQ_OKAY)
7508 rfs4_update_lease(op->ro_client);
7509
7510 return (rc);
7511 }
7512
7513 static rfs4_chkseq_t
rfs4_check_olo_seqid(seqid4 olo_seqid,rfs4_openowner_t * op,nfs_resop4 * resop)7514 rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7515 {
7516 rfs4_chkseq_t rc;
7517
7518 rfs4_dbe_lock(op->ro_dbe);
7519 rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply,
7520 olo_seqid, resop, FALSE);
7521 rfs4_dbe_unlock(op->ro_dbe);
7522
7523 return (rc);
7524 }
7525
7526 static rfs4_chkseq_t
rfs4_check_lock_seqid(seqid4 seqid,rfs4_lo_state_t * lsp,nfs_resop4 * resop)7527 rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lsp, nfs_resop4 *resop)
7528 {
7529 rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
7530
7531 rfs4_dbe_lock(lsp->rls_dbe);
7532 if (!lsp->rls_skip_seqid_check)
7533 rc = rfs4_check_seqid(lsp->rls_seqid, &lsp->rls_reply, seqid,
7534 resop, TRUE);
7535 rfs4_dbe_unlock(lsp->rls_dbe);
7536
7537 return (rc);
7538 }
7539
7540 static void
rfs4_op_open(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)7541 rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
7542 struct svc_req *req, struct compound_state *cs)
7543 {
7544 OPEN4args *args = &argop->nfs_argop4_u.opopen;
7545 OPEN4res *resp = &resop->nfs_resop4_u.opopen;
7546 open_owner4 *owner = &args->owner;
7547 open_claim_type4 claim = args->claim.claim;
7548 rfs4_client_t *cp;
7549 rfs4_openowner_t *oo;
7550 bool_t create;
7551 bool_t replay = FALSE;
7552 int can_reclaim;
7553
7554 DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs,
7555 OPEN4args *, args);
7556
7557 if (cs->vp == NULL) {
7558 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7559 goto end;
7560 }
7561
7562 /* rfc5661 section 18.16.3 */
7563 if (rfs4_has_session(cs))
7564 owner->clientid = cs->client->rc_clientid;
7565
7566 /*
7567 * Need to check clientid and lease expiration first based on
7568 * error ordering and incrementing sequence id.
7569 */
7570 cp = rfs4_findclient_by_id(owner->clientid, FALSE);
7571 if (cp == NULL) {
7572 *cs->statusp = resp->status =
7573 rfs4_check_clientid(&owner->clientid, 0);
7574 goto end;
7575 }
7576
7577 if (rfs4_lease_expired(cp)) {
7578 rfs4_client_close(cp);
7579 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7580 goto end;
7581 }
7582 can_reclaim = cp->rc_can_reclaim;
7583
7584 /*
7585 * Find the open_owner for use from this point forward. Take
7586 * care in updating the sequence id based on the type of error
7587 * being returned.
7588 */
7589 retry:
7590 create = TRUE;
7591 oo = rfs4_findopenowner(owner, &create, args->seqid);
7592 if (oo == NULL) {
7593 *cs->statusp = resp->status = NFS4ERR_RESOURCE;
7594 rfs4_client_rele(cp);
7595 goto end;
7596 }
7597
7598 /*
7599 * OPEN_CONFIRM must not be implemented in v4.1
7600 */
7601 if (rfs4_has_session(cs)) {
7602 oo->ro_need_confirm = FALSE;
7603 }
7604
7605 /* Hold off access to the sequence space while the open is done */
7606 /* Workaround to avoid deadlock */
7607 if (!rfs4_has_session(cs))
7608 rfs4_sw_enter(&oo->ro_sw);
7609
7610 /*
7611 * If the open_owner existed before at the server, then check
7612 * the sequence id.
7613 */
7614 if (!create && !oo->ro_postpone_confirm) {
7615 switch (rfs4_check_open_seqid(args->seqid, oo, resop, cs)) {
7616 case NFS4_CHKSEQ_BAD:
7617 ASSERT(!rfs4_has_session(cs));
7618 if ((args->seqid > oo->ro_open_seqid) &&
7619 oo->ro_need_confirm) {
7620 rfs4_free_opens(oo, TRUE, FALSE);
7621 rfs4_sw_exit(&oo->ro_sw);
7622 rfs4_openowner_rele(oo);
7623 goto retry;
7624 }
7625 resp->status = NFS4ERR_BAD_SEQID;
7626 goto out;
7627 case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
7628 replay = TRUE;
7629 goto out;
7630 default:
7631 break;
7632 }
7633
7634 /*
7635 * Sequence was ok and open owner exists
7636 * check to see if we have yet to see an
7637 * open_confirm.
7638 */
7639 if (oo->ro_need_confirm) {
7640 rfs4_free_opens(oo, TRUE, FALSE);
7641 ASSERT(!rfs4_has_session(cs));
7642 rfs4_sw_exit(&oo->ro_sw);
7643 rfs4_openowner_rele(oo);
7644 goto retry;
7645 }
7646 }
7647 /* Grace only applies to regular-type OPENs */
7648 if (rfs4_clnt_in_grace(cp) &&
7649 (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR ||
7650 claim == CLAIM_FH)) {
7651 *cs->statusp = resp->status = NFS4ERR_GRACE;
7652 goto out;
7653 }
7654
7655 /*
7656 * If previous state at the server existed then can_reclaim
7657 * will be set. If not reply NFS4ERR_NO_GRACE to the
7658 * client.
7659 */
7660 if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
7661 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7662 goto out;
7663 }
7664
7665
7666 /*
7667 * Reject the open if the client has missed the grace period
7668 */
7669 if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
7670 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7671 goto out;
7672 }
7673
7674 /* Couple of up-front bookkeeping items */
7675 if (oo->ro_need_confirm) {
7676 /*
7677 * If this is a reclaim OPEN then we should not ask
7678 * for a confirmation of the open_owner per the
7679 * protocol specification.
7680 */
7681 if (claim == CLAIM_PREVIOUS)
7682 oo->ro_need_confirm = FALSE;
7683 else
7684 resp->rflags |= OPEN4_RESULT_CONFIRM;
7685 }
7686 resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
7687
7688 /*
7689 * If there is an unshared filesystem mounted on this vnode,
7690 * do not allow to open/create in this directory.
7691 */
7692 if (vn_ismntpt(cs->vp)) {
7693 *cs->statusp = resp->status = NFS4ERR_ACCESS;
7694 goto out;
7695 }
7696
7697 /*
7698 * access must READ, WRITE, or BOTH. No access is invalid.
7699 * deny can be READ, WRITE, BOTH, or NONE.
7700 * bits not defined for access/deny are invalid.
7701 */
7702 if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
7703 (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
7704 (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
7705 *cs->statusp = resp->status = NFS4ERR_INVAL;
7706 goto out;
7707 }
7708
7709
7710 /*
7711 * make sure attrset is zero before response is built.
7712 */
7713 resp->attrset = 0;
7714
7715 switch (claim) {
7716 case CLAIM_NULL:
7717 rfs4_do_opennull(cs, req, args, oo, resp);
7718 break;
7719 case CLAIM_PREVIOUS:
7720 rfs4_do_openprev(cs, req, args, oo, resp);
7721 break;
7722 case CLAIM_DELEGATE_CUR:
7723 rfs4_do_opendelcur(cs, req, args, oo, resp);
7724 break;
7725 case CLAIM_DELEGATE_PREV:
7726 rfs4_do_opendelprev(cs, req, args, oo, resp);
7727 break;
7728 case CLAIM_FH:
7729 rfs4_do_openfh(cs, req, args, oo, resp);
7730 break;
7731 default:
7732 resp->status = NFS4ERR_INVAL;
7733 break;
7734 }
7735
7736 out:
7737 rfs4_client_rele(cp);
7738
7739 /* Catch sequence id handling here to make it a little easier */
7740 switch (resp->status) {
7741 case NFS4ERR_BADXDR:
7742 case NFS4ERR_BAD_SEQID:
7743 case NFS4ERR_BAD_STATEID:
7744 case NFS4ERR_NOFILEHANDLE:
7745 case NFS4ERR_RESOURCE:
7746 case NFS4ERR_STALE_CLIENTID:
7747 case NFS4ERR_STALE_STATEID:
7748 /*
7749 * The protocol states that if any of these errors are
7750 * being returned, the sequence id should not be
7751 * incremented. Any other return requires an
7752 * increment.
7753 */
7754 break;
7755 default:
7756 /* Always update the lease in this case */
7757 rfs4_update_lease(oo->ro_client);
7758
7759 /* Regular response - copy the result */
7760 if (!replay)
7761 rfs4_update_open_resp(oo, resop, &cs->fh);
7762
7763 /*
7764 * REPLAY case: Only if the previous response was OK
7765 * do we copy the filehandle. If not OK, no
7766 * filehandle to copy.
7767 */
7768 if (replay == TRUE &&
7769 resp->status == NFS4_OK &&
7770 oo->ro_reply_fh.nfs_fh4_val) {
7771 /*
7772 * If this is a replay, we must restore the
7773 * current filehandle/vp to that of what was
7774 * returned originally. Try our best to do
7775 * it.
7776 */
7777 nfs_fh4_fmt_t *fh_fmtp =
7778 (nfs_fh4_fmt_t *)oo->ro_reply_fh.nfs_fh4_val;
7779
7780 cs->exi = checkexport4(&fh_fmtp->fh4_fsid,
7781 (fid_t *)&fh_fmtp->fh4_xlen, NULL);
7782
7783 if (cs->exi == NULL) {
7784 resp->status = NFS4ERR_STALE;
7785 goto finish;
7786 }
7787
7788 VN_RELE(cs->vp);
7789
7790 cs->vp = nfs4_fhtovp(&oo->ro_reply_fh, cs->exi,
7791 &resp->status);
7792
7793 if (cs->vp == NULL)
7794 goto finish;
7795
7796 nfs_fh4_copy(&oo->ro_reply_fh, &cs->fh);
7797 }
7798
7799 /*
7800 * If this was a replay, no need to update the
7801 * sequence id. If the open_owner was not created on
7802 * this pass, then update. The first use of an
7803 * open_owner will not bump the sequence id.
7804 */
7805 if (replay == FALSE && !create)
7806 rfs4_update_open_sequence(oo);
7807 /*
7808 * If the client is receiving an error and the
7809 * open_owner needs to be confirmed, there is no way
7810 * to notify the client of this fact ignoring the fact
7811 * that the server has no method of returning a
7812 * stateid to confirm. Therefore, the server needs to
7813 * mark this open_owner in a way as to avoid the
7814 * sequence id checking the next time the client uses
7815 * this open_owner.
7816 */
7817 if (resp->status != NFS4_OK && oo->ro_need_confirm)
7818 oo->ro_postpone_confirm = TRUE;
7819 /*
7820 * If OK response then clear the postpone flag and
7821 * reset the sequence id to keep in sync with the
7822 * client.
7823 */
7824 if (resp->status == NFS4_OK && oo->ro_postpone_confirm) {
7825 oo->ro_postpone_confirm = FALSE;
7826 oo->ro_open_seqid = args->seqid;
7827 }
7828 break;
7829 }
7830
7831 finish:
7832 *cs->statusp = resp->status;
7833
7834 if (!rfs4_has_session(cs))
7835 rfs4_sw_exit(&oo->ro_sw);
7836 rfs4_openowner_rele(oo);
7837
7838 end:
7839 DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs,
7840 OPEN4res *, resp);
7841 }
7842
7843 /*ARGSUSED*/
7844 void
rfs4_op_open_confirm(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)7845 rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7846 struct svc_req *req, struct compound_state *cs)
7847 {
7848 OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
7849 OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
7850 rfs4_state_t *sp;
7851 nfsstat4 status;
7852
7853 DTRACE_NFSV4_2(op__open__confirm__start, struct compound_state *, cs,
7854 OPEN_CONFIRM4args *, args);
7855
7856 ASSERT(!rfs4_has_session(cs));
7857
7858 if (cs->vp == NULL) {
7859 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7860 goto out;
7861 }
7862
7863 if (cs->vp->v_type != VREG) {
7864 *cs->statusp = resp->status =
7865 cs->vp->v_type == VDIR ? NFS4ERR_ISDIR : NFS4ERR_INVAL;
7866 return;
7867 }
7868
7869 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7870 if (status != NFS4_OK) {
7871 *cs->statusp = resp->status = status;
7872 goto out;
7873 }
7874
7875 /* Ensure specified filehandle matches */
7876 if (cs->vp != sp->rs_finfo->rf_vp) {
7877 rfs4_state_rele(sp);
7878 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7879 goto out;
7880 }
7881
7882 /* hold off other access to open_owner while we tinker */
7883 rfs4_sw_enter(&sp->rs_owner->ro_sw);
7884
7885 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid, cs)) {
7886 case NFS4_CHECK_STATEID_OKAY:
7887 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7888 resop, cs) != 0) {
7889 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7890 break;
7891 }
7892 /*
7893 * If it is the appropriate stateid and determined to
7894 * be "OKAY" then this means that the stateid does not
7895 * need to be confirmed and the client is in error for
7896 * sending an OPEN_CONFIRM.
7897 */
7898 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7899 break;
7900 case NFS4_CHECK_STATEID_OLD:
7901 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7902 break;
7903 case NFS4_CHECK_STATEID_BAD:
7904 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7905 break;
7906 case NFS4_CHECK_STATEID_EXPIRED:
7907 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7908 break;
7909 case NFS4_CHECK_STATEID_CLOSED:
7910 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7911 break;
7912 case NFS4_CHECK_STATEID_REPLAY:
7913 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7914 resop, cs)) {
7915 case NFS4_CHKSEQ_OKAY:
7916 /*
7917 * This is replayed stateid; if seqid matches
7918 * next expected, then client is using wrong seqid.
7919 */
7920 /* fall through */
7921 case NFS4_CHKSEQ_BAD:
7922 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7923 break;
7924 case NFS4_CHKSEQ_REPLAY:
7925 /*
7926 * Note this case is the duplicate case so
7927 * resp->status is already set.
7928 */
7929 *cs->statusp = resp->status;
7930 rfs4_update_lease(sp->rs_owner->ro_client);
7931 break;
7932 }
7933 break;
7934 case NFS4_CHECK_STATEID_UNCONFIRMED:
7935 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7936 resop, cs) != NFS4_CHKSEQ_OKAY) {
7937 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7938 break;
7939 }
7940 *cs->statusp = resp->status = NFS4_OK;
7941
7942 next_stateid(&sp->rs_stateid);
7943 resp->open_stateid = sp->rs_stateid.stateid;
7944 sp->rs_owner->ro_need_confirm = FALSE;
7945 rfs4_update_lease(sp->rs_owner->ro_client);
7946 rfs4_update_open_sequence(sp->rs_owner);
7947 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7948 break;
7949 default:
7950 ASSERT(FALSE);
7951 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7952 break;
7953 }
7954 rfs4_sw_exit(&sp->rs_owner->ro_sw);
7955 rfs4_state_rele(sp);
7956
7957 out:
7958 DTRACE_NFSV4_2(op__open__confirm__done, struct compound_state *, cs,
7959 OPEN_CONFIRM4res *, resp);
7960 }
7961
7962 /*ARGSUSED*/
7963 void
rfs4_op_open_downgrade(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)7964 rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
7965 struct svc_req *req, struct compound_state *cs)
7966 {
7967 OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
7968 OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
7969 uint32_t access = args->share_access;
7970 uint32_t deny = args->share_deny;
7971 nfsstat4 status;
7972 rfs4_state_t *sp;
7973 rfs4_file_t *fp;
7974 int fflags = 0;
7975
7976 DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs,
7977 OPEN_DOWNGRADE4args *, args);
7978
7979 if (cs->vp == NULL) {
7980 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7981 goto out;
7982 }
7983
7984 if (cs->vp->v_type != VREG) {
7985 *cs->statusp = resp->status = NFS4ERR_INVAL;
7986 return;
7987 }
7988
7989 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7990 if (status != NFS4_OK) {
7991 *cs->statusp = resp->status = status;
7992 goto out;
7993 }
7994
7995 /* Ensure specified filehandle matches */
7996 if (cs->vp != sp->rs_finfo->rf_vp) {
7997 rfs4_state_rele(sp);
7998 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7999 goto out;
8000 }
8001
8002 /* hold off other access to open_owner while we tinker */
8003 rfs4_sw_enter(&sp->rs_owner->ro_sw);
8004
8005 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid, cs)) {
8006 case NFS4_CHECK_STATEID_OKAY:
8007 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8008 resop, cs) != NFS4_CHKSEQ_OKAY) {
8009 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8010 goto end;
8011 }
8012 break;
8013 case NFS4_CHECK_STATEID_OLD:
8014 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8015 goto end;
8016 case NFS4_CHECK_STATEID_BAD:
8017 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8018 goto end;
8019 case NFS4_CHECK_STATEID_EXPIRED:
8020 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8021 goto end;
8022 case NFS4_CHECK_STATEID_CLOSED:
8023 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8024 goto end;
8025 case NFS4_CHECK_STATEID_UNCONFIRMED:
8026 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8027 goto end;
8028 case NFS4_CHECK_STATEID_REPLAY:
8029 ASSERT(!rfs4_has_session(cs));
8030
8031 /* Check the sequence id for the open owner */
8032 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8033 resop, cs)) {
8034 case NFS4_CHKSEQ_OKAY:
8035 /*
8036 * This is replayed stateid; if seqid matches
8037 * next expected, then client is using wrong seqid.
8038 */
8039 /* fall through */
8040 case NFS4_CHKSEQ_BAD:
8041 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8042 goto end;
8043 case NFS4_CHKSEQ_REPLAY:
8044 /*
8045 * Note this case is the duplicate case so
8046 * resp->status is already set.
8047 */
8048 *cs->statusp = resp->status;
8049 rfs4_update_lease(sp->rs_owner->ro_client);
8050 goto end;
8051 }
8052 break;
8053 default:
8054 ASSERT(FALSE);
8055 break;
8056 }
8057
8058 rfs4_dbe_lock(sp->rs_dbe);
8059 /*
8060 * Check that the new access modes and deny modes are valid.
8061 * Check that no invalid bits are set.
8062 */
8063 if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
8064 (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) {
8065 *cs->statusp = resp->status = NFS4ERR_INVAL;
8066 rfs4_update_open_sequence(sp->rs_owner);
8067 rfs4_dbe_unlock(sp->rs_dbe);
8068 goto end;
8069 }
8070
8071 /*
8072 * The new modes must be a subset of the current modes and
8073 * the access must specify at least one mode. To test that
8074 * the new mode is a subset of the current modes we bitwise
8075 * AND them together and check that the result equals the new
8076 * mode. For example:
8077 * New mode, access == R and current mode, sp->rs_open_access == RW
8078 * access & sp->rs_open_access == R == access, so the new access mode
8079 * is valid. Consider access == RW, sp->rs_open_access = R
8080 * access & sp->rs_open_access == R != access, so the new access mode
8081 * is invalid.
8082 */
8083 if ((access & sp->rs_open_access) != access ||
8084 (deny & sp->rs_open_deny) != deny ||
8085 (access &
8086 (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
8087 *cs->statusp = resp->status = NFS4ERR_INVAL;
8088 rfs4_update_open_sequence(sp->rs_owner);
8089 rfs4_dbe_unlock(sp->rs_dbe);
8090 goto end;
8091 }
8092
8093 /*
8094 * Release any share locks associated with this stateID.
8095 * Strictly speaking, this violates the spec because the
8096 * spec effectively requires that open downgrade be atomic.
8097 * At present, fs_shrlock does not have this capability.
8098 */
8099 (void) rfs4_unshare(sp);
8100
8101 status = rfs4_share(sp, access, deny);
8102 if (status != NFS4_OK) {
8103 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
8104 rfs4_update_open_sequence(sp->rs_owner);
8105 rfs4_dbe_unlock(sp->rs_dbe);
8106 goto end;
8107 }
8108
8109 fp = sp->rs_finfo;
8110 rfs4_dbe_lock(fp->rf_dbe);
8111
8112 /*
8113 * If the current mode has deny read and the new mode
8114 * does not, decrement the number of deny read mode bits
8115 * and if it goes to zero turn off the deny read bit
8116 * on the file.
8117 */
8118 if ((sp->rs_open_deny & OPEN4_SHARE_DENY_READ) &&
8119 (deny & OPEN4_SHARE_DENY_READ) == 0) {
8120 fp->rf_deny_read--;
8121 if (fp->rf_deny_read == 0)
8122 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8123 }
8124
8125 /*
8126 * If the current mode has deny write and the new mode
8127 * does not, decrement the number of deny write mode bits
8128 * and if it goes to zero turn off the deny write bit
8129 * on the file.
8130 */
8131 if ((sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) &&
8132 (deny & OPEN4_SHARE_DENY_WRITE) == 0) {
8133 fp->rf_deny_write--;
8134 if (fp->rf_deny_write == 0)
8135 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8136 }
8137
8138 /*
8139 * If the current mode has access read and the new mode
8140 * does not, decrement the number of access read mode bits
8141 * and if it goes to zero turn off the access read bit
8142 * on the file. set fflags to FREAD for the call to
8143 * vn_open_downgrade().
8144 */
8145 if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) &&
8146 (access & OPEN4_SHARE_ACCESS_READ) == 0) {
8147 fp->rf_access_read--;
8148 if (fp->rf_access_read == 0)
8149 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8150 fflags |= FREAD;
8151 }
8152
8153 /*
8154 * If the current mode has access write and the new mode
8155 * does not, decrement the number of access write mode bits
8156 * and if it goes to zero turn off the access write bit
8157 * on the file. set fflags to FWRITE for the call to
8158 * vn_open_downgrade().
8159 */
8160 if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) &&
8161 (access & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8162 fp->rf_access_write--;
8163 if (fp->rf_access_write == 0)
8164 fp->rf_share_deny &= ~OPEN4_SHARE_ACCESS_WRITE;
8165 fflags |= FWRITE;
8166 }
8167
8168 /* Check that the file is still accessible */
8169 ASSERT(fp->rf_share_access);
8170
8171 rfs4_dbe_unlock(fp->rf_dbe);
8172
8173 /* now set the new open access and deny modes */
8174 sp->rs_open_access = access;
8175 sp->rs_open_deny = deny;
8176
8177 /*
8178 * we successfully downgraded the share lock, now we need to downgrade
8179 * the open. it is possible that the downgrade was only for a deny
8180 * mode and we have nothing else to do.
8181 */
8182 if ((fflags & (FREAD|FWRITE)) != 0)
8183 vn_open_downgrade(cs->vp, fflags);
8184
8185 /* Update the stateid */
8186 next_stateid(&sp->rs_stateid);
8187 resp->open_stateid = sp->rs_stateid.stateid;
8188
8189 rfs4_dbe_unlock(sp->rs_dbe);
8190
8191 *cs->statusp = resp->status = NFS4_OK;
8192 /* Update the lease */
8193 rfs4_update_lease(sp->rs_owner->ro_client);
8194 /* And the sequence */
8195 rfs4_update_open_sequence(sp->rs_owner);
8196 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8197
8198 end:
8199 rfs4_sw_exit(&sp->rs_owner->ro_sw);
8200 rfs4_state_rele(sp);
8201 out:
8202 DTRACE_NFSV4_2(op__open__downgrade__done, struct compound_state *, cs,
8203 OPEN_DOWNGRADE4res *, resp);
8204 }
8205
8206 static void *
memstr(const void * s1,const char * s2,size_t n)8207 memstr(const void *s1, const char *s2, size_t n)
8208 {
8209 size_t l = strlen(s2);
8210 char *p = (char *)s1;
8211
8212 while (n >= l) {
8213 if (bcmp(p, s2, l) == 0)
8214 return (p);
8215 p++;
8216 n--;
8217 }
8218
8219 return (NULL);
8220 }
8221
8222 /*
8223 * The logic behind this function is detailed in the NFSv4 RFC in the
8224 * SETCLIENTID operation description under IMPLEMENTATION. Refer to
8225 * that section for explicit guidance to server behavior for
8226 * SETCLIENTID.
8227 */
8228 void
rfs4_op_setclientid(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)8229 rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop,
8230 struct svc_req *req, struct compound_state *cs)
8231 {
8232 SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid;
8233 SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid;
8234 rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed;
8235 rfs4_clntip_t *ci;
8236 bool_t create;
8237 char *addr, *netid;
8238 int len;
8239
8240 DTRACE_NFSV4_2(op__setclientid__start, struct compound_state *, cs,
8241 SETCLIENTID4args *, args);
8242 retry:
8243 newcp = cp_confirmed = cp_unconfirmed = NULL;
8244
8245 /*
8246 * Save the caller's IP address
8247 */
8248 args->client.cl_addr =
8249 (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
8250
8251 /*
8252 * Record if it is a Solaris client that cannot handle referrals.
8253 */
8254 if (memstr(args->client.id_val, "Solaris", args->client.id_len) &&
8255 !memstr(args->client.id_val, "+referrals", args->client.id_len)) {
8256 /* Add a "yes, it's downrev" record */
8257 create = TRUE;
8258 ci = rfs4_find_clntip(args->client.cl_addr, &create);
8259 ASSERT(ci != NULL);
8260 rfs4_dbe_rele(ci->ri_dbe);
8261 } else {
8262 /* Remove any previous record */
8263 rfs4_invalidate_clntip(args->client.cl_addr);
8264 }
8265
8266 /*
8267 * In search of an EXISTING client matching the incoming
8268 * request to establish a new client identifier at the server
8269 */
8270 create = TRUE;
8271 cp = rfs4_findclient(&args->client, &create, NULL);
8272
8273 /* Should never happen */
8274 ASSERT(cp != NULL);
8275
8276 if (cp == NULL) {
8277 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8278 goto out;
8279 }
8280
8281 /*
8282 * Easiest case. Client identifier is newly created and is
8283 * unconfirmed. Also note that for this case, no other
8284 * entries exist for the client identifier. Nothing else to
8285 * check. Just setup the response and respond.
8286 */
8287 if (create) {
8288 *cs->statusp = res->status = NFS4_OK;
8289 res->SETCLIENTID4res_u.resok4.clientid = cp->rc_clientid;
8290 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8291 cp->rc_confirm_verf;
8292 /* Setup callback information; CB_NULL confirmation later */
8293 rfs4_client_setcb(cp, &args->callback, args->callback_ident);
8294
8295 rfs4_client_rele(cp);
8296 goto out;
8297 }
8298
8299 /*
8300 * An existing, confirmed client may exist but it may not have
8301 * been active for at least one lease period. If so, then
8302 * "close" the client and create a new client identifier
8303 */
8304 if (rfs4_lease_expired(cp)) {
8305 rfs4_client_close(cp);
8306 goto retry;
8307 }
8308
8309 if (cp->rc_need_confirm == TRUE)
8310 cp_unconfirmed = cp;
8311 else
8312 cp_confirmed = cp;
8313
8314 cp = NULL;
8315
8316 /*
8317 * We have a confirmed client, now check for an
8318 * unconfimred entry
8319 */
8320 if (cp_confirmed) {
8321 /* If creds don't match then client identifier is inuse */
8322 if (!creds_ok(&cp_confirmed->rc_cr_set, req, cs)) {
8323 rfs4_cbinfo_t *cbp;
8324 /*
8325 * Some one else has established this client
8326 * id. Try and say * who they are. We will use
8327 * the call back address supplied by * the
8328 * first client.
8329 */
8330 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8331
8332 addr = netid = NULL;
8333
8334 cbp = &cp_confirmed->rc_cbinfo;
8335 if (cbp->cb_callback.cb_location.r_addr &&
8336 cbp->cb_callback.cb_location.r_netid) {
8337 cb_client4 *cbcp = &cbp->cb_callback;
8338
8339 len = strlen(cbcp->cb_location.r_addr)+1;
8340 addr = kmem_alloc(len, KM_SLEEP);
8341 bcopy(cbcp->cb_location.r_addr, addr, len);
8342 len = strlen(cbcp->cb_location.r_netid)+1;
8343 netid = kmem_alloc(len, KM_SLEEP);
8344 bcopy(cbcp->cb_location.r_netid, netid, len);
8345 }
8346
8347 res->SETCLIENTID4res_u.client_using.r_addr = addr;
8348 res->SETCLIENTID4res_u.client_using.r_netid = netid;
8349
8350 rfs4_client_rele(cp_confirmed);
8351 }
8352
8353 /*
8354 * Confirmed, creds match, and verifier matches; must
8355 * be an update of the callback info
8356 */
8357 if (cp_confirmed->rc_nfs_client.verifier ==
8358 args->client.verifier) {
8359 /* Setup callback information */
8360 rfs4_client_setcb(cp_confirmed, &args->callback,
8361 args->callback_ident);
8362
8363 /* everything okay -- move ahead */
8364 *cs->statusp = res->status = NFS4_OK;
8365 res->SETCLIENTID4res_u.resok4.clientid =
8366 cp_confirmed->rc_clientid;
8367
8368 /* update the confirm_verifier and return it */
8369 rfs4_client_scv_next(cp_confirmed);
8370 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8371 cp_confirmed->rc_confirm_verf;
8372
8373 rfs4_client_rele(cp_confirmed);
8374 goto out;
8375 }
8376
8377 /*
8378 * Creds match but the verifier doesn't. Must search
8379 * for an unconfirmed client that would be replaced by
8380 * this request.
8381 */
8382 create = FALSE;
8383 cp_unconfirmed = rfs4_findclient(&args->client, &create,
8384 cp_confirmed);
8385 }
8386
8387 /*
8388 * At this point, we have taken care of the brand new client
8389 * struct, INUSE case, update of an existing, and confirmed
8390 * client struct.
8391 */
8392
8393 /*
8394 * check to see if things have changed while we originally
8395 * picked up the client struct. If they have, then return and
8396 * retry the processing of this SETCLIENTID request.
8397 */
8398 if (cp_unconfirmed) {
8399 rfs4_dbe_lock(cp_unconfirmed->rc_dbe);
8400 if (!cp_unconfirmed->rc_need_confirm) {
8401 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8402 rfs4_client_rele(cp_unconfirmed);
8403 if (cp_confirmed)
8404 rfs4_client_rele(cp_confirmed);
8405 goto retry;
8406 }
8407 /* do away with the old unconfirmed one */
8408 rfs4_dbe_invalidate(cp_unconfirmed->rc_dbe);
8409 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8410 rfs4_client_rele(cp_unconfirmed);
8411 cp_unconfirmed = NULL;
8412 }
8413
8414 /*
8415 * This search will temporarily hide the confirmed client
8416 * struct while a new client struct is created as the
8417 * unconfirmed one.
8418 */
8419 create = TRUE;
8420 newcp = rfs4_findclient(&args->client, &create, cp_confirmed);
8421
8422 ASSERT(newcp != NULL);
8423
8424 if (newcp == NULL) {
8425 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8426 rfs4_client_rele(cp_confirmed);
8427 goto out;
8428 }
8429
8430 /*
8431 * If one was not created, then a similar request must be in
8432 * process so release and start over with this one
8433 */
8434 if (create != TRUE) {
8435 rfs4_client_rele(newcp);
8436 if (cp_confirmed)
8437 rfs4_client_rele(cp_confirmed);
8438 goto retry;
8439 }
8440
8441 *cs->statusp = res->status = NFS4_OK;
8442 res->SETCLIENTID4res_u.resok4.clientid = newcp->rc_clientid;
8443 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8444 newcp->rc_confirm_verf;
8445 /* Setup callback information; CB_NULL confirmation later */
8446 rfs4_client_setcb(newcp, &args->callback, args->callback_ident);
8447
8448 newcp->rc_cp_confirmed = cp_confirmed;
8449
8450 rfs4_client_rele(newcp);
8451
8452 out:
8453 DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs,
8454 SETCLIENTID4res *, res);
8455 }
8456
8457 /*ARGSUSED*/
8458 void
rfs4_op_setclientid_confirm(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)8459 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
8460 struct svc_req *req, struct compound_state *cs)
8461 {
8462 SETCLIENTID_CONFIRM4args *args =
8463 &argop->nfs_argop4_u.opsetclientid_confirm;
8464 SETCLIENTID_CONFIRM4res *res =
8465 &resop->nfs_resop4_u.opsetclientid_confirm;
8466 rfs4_client_t *cp, *cptoclose = NULL;
8467 nfs4_srv_t *nsrv4;
8468
8469 DTRACE_NFSV4_2(op__setclientid__confirm__start,
8470 struct compound_state *, cs,
8471 SETCLIENTID_CONFIRM4args *, args);
8472
8473 nsrv4 = nfs4_get_srv();
8474 *cs->statusp = res->status = NFS4_OK;
8475
8476 cp = rfs4_findclient_by_id(args->clientid, TRUE);
8477
8478 if (cp == NULL) {
8479 *cs->statusp = res->status =
8480 rfs4_check_clientid(&args->clientid, 1);
8481 goto out;
8482 }
8483
8484 if (!creds_ok(&cp->rc_cr_set, req, cs)) {
8485 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8486 rfs4_client_rele(cp);
8487 goto out;
8488 }
8489
8490 /* If the verifier doesn't match, the record doesn't match */
8491 if (cp->rc_confirm_verf != args->setclientid_confirm) {
8492 *cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
8493 rfs4_client_rele(cp);
8494 goto out;
8495 }
8496
8497 rfs4_dbe_lock(cp->rc_dbe);
8498 cp->rc_need_confirm = FALSE;
8499 if (cp->rc_cp_confirmed) {
8500 cptoclose = cp->rc_cp_confirmed;
8501 cptoclose->rc_ss_remove = 1;
8502 cp->rc_cp_confirmed = NULL;
8503 }
8504
8505 /*
8506 * Update the client's associated server instance, if it's changed
8507 * since the client was created.
8508 */
8509 if (rfs4_servinst(cp) != nsrv4->nfs4_cur_servinst)
8510 rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst);
8511
8512 /*
8513 * Record clientid in stable storage.
8514 * Must be done after server instance has been assigned.
8515 */
8516 rfs4_ss_clid(nsrv4, cp);
8517
8518 rfs4_dbe_unlock(cp->rc_dbe);
8519
8520 if (cptoclose)
8521 /* don't need to rele, client_close does it */
8522 rfs4_client_close(cptoclose);
8523
8524 /* If needed, initiate CB_NULL call for callback path */
8525 rfs4_deleg_cb_check(cp);
8526 rfs4_update_lease(cp);
8527
8528 /*
8529 * Check to see if client can perform reclaims
8530 */
8531 rfs4_ss_chkclid(nsrv4, cp);
8532
8533 rfs4_client_rele(cp);
8534
8535 out:
8536 DTRACE_NFSV4_2(op__setclientid__confirm__done,
8537 struct compound_state *, cs,
8538 SETCLIENTID_CONFIRM4 *, res);
8539 }
8540
8541
8542 /*ARGSUSED*/
8543 void
rfs4_op_close(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)8544 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
8545 struct svc_req *req, struct compound_state *cs)
8546 {
8547 CLOSE4args *args = &argop->nfs_argop4_u.opclose;
8548 CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
8549 rfs4_state_t *sp;
8550 nfsstat4 status;
8551
8552 DTRACE_NFSV4_2(op__close__start, struct compound_state *, cs,
8553 CLOSE4args *, args);
8554
8555 if (cs->vp == NULL) {
8556 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8557 goto out;
8558 }
8559
8560 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID);
8561 if (status != NFS4_OK) {
8562 *cs->statusp = resp->status = status;
8563 goto out;
8564 }
8565
8566 /* Ensure specified filehandle matches */
8567 if (cs->vp != sp->rs_finfo->rf_vp) {
8568 rfs4_state_rele(sp);
8569 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8570 goto out;
8571 }
8572
8573 /* hold off other access to open_owner while we tinker */
8574 rfs4_sw_enter(&sp->rs_owner->ro_sw);
8575
8576 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid, cs)) {
8577 case NFS4_CHECK_STATEID_OKAY:
8578 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8579 resop, cs) != NFS4_CHKSEQ_OKAY) {
8580 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8581 goto end;
8582 }
8583 break;
8584 case NFS4_CHECK_STATEID_OLD:
8585 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8586 goto end;
8587 case NFS4_CHECK_STATEID_BAD:
8588 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8589 goto end;
8590 case NFS4_CHECK_STATEID_EXPIRED:
8591 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8592 goto end;
8593 case NFS4_CHECK_STATEID_CLOSED:
8594 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8595 goto end;
8596 case NFS4_CHECK_STATEID_UNCONFIRMED:
8597 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8598 goto end;
8599 case NFS4_CHECK_STATEID_REPLAY:
8600 ASSERT(!rfs4_has_session(cs));
8601
8602 /* Check the sequence id for the open owner */
8603 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8604 resop, cs)) {
8605 case NFS4_CHKSEQ_OKAY:
8606 /*
8607 * This is replayed stateid; if seqid matches
8608 * next expected, then client is using wrong seqid.
8609 */
8610 /* FALL THROUGH */
8611 case NFS4_CHKSEQ_BAD:
8612 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8613 goto end;
8614 case NFS4_CHKSEQ_REPLAY:
8615 /*
8616 * Note this case is the duplicate case so
8617 * resp->status is already set.
8618 */
8619 *cs->statusp = resp->status;
8620 rfs4_update_lease(sp->rs_owner->ro_client);
8621 goto end;
8622 }
8623 break;
8624 default:
8625 ASSERT(FALSE);
8626 break;
8627 }
8628
8629 rfs4_dbe_lock(sp->rs_dbe);
8630
8631 /* Update the stateid. */
8632 next_stateid(&sp->rs_stateid);
8633 resp->open_stateid = sp->rs_stateid.stateid;
8634
8635 rfs4_dbe_unlock(sp->rs_dbe);
8636
8637 rfs4_update_lease(sp->rs_owner->ro_client);
8638 rfs4_update_open_sequence(sp->rs_owner);
8639 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8640
8641 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
8642
8643 *cs->statusp = resp->status = status;
8644
8645 end:
8646 rfs4_sw_exit(&sp->rs_owner->ro_sw);
8647 rfs4_state_rele(sp);
8648 out:
8649 DTRACE_NFSV4_2(op__close__done, struct compound_state *, cs,
8650 CLOSE4res *, resp);
8651 }
8652
8653 /*
8654 * Manage the counts on the file struct and close all file locks
8655 */
8656 /*ARGSUSED*/
8657 void
rfs4_release_share_lock_state(rfs4_state_t * sp,cred_t * cr,bool_t close_of_client)8658 rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr,
8659 bool_t close_of_client)
8660 {
8661 rfs4_file_t *fp = sp->rs_finfo;
8662 rfs4_lo_state_t *lsp;
8663 int fflags = 0;
8664
8665 /*
8666 * If this call is part of the larger closing down of client
8667 * state then it is just easier to release all locks
8668 * associated with this client instead of going through each
8669 * individual file and cleaning locks there.
8670 */
8671 if (close_of_client) {
8672 if (sp->rs_owner->ro_client->rc_unlksys_completed == FALSE &&
8673 !list_is_empty(&sp->rs_lostatelist) &&
8674 sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) {
8675 /* Is the PxFS kernel module loaded? */
8676 if (lm_remove_file_locks != NULL) {
8677 int new_sysid;
8678
8679 /* Encode the cluster nodeid in new sysid */
8680 new_sysid = sp->rs_owner->ro_client->rc_sysidt;
8681 lm_set_nlmid_flk(&new_sysid);
8682
8683 /*
8684 * This PxFS routine removes file locks for a
8685 * client over all nodes of a cluster.
8686 */
8687 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
8688 "lm_remove_file_locks(sysid=0x%x)\n",
8689 new_sysid));
8690 (*lm_remove_file_locks)(new_sysid);
8691 } else {
8692 struct flock64 flk;
8693
8694 /* Release all locks for this client */
8695 flk.l_type = F_UNLKSYS;
8696 flk.l_whence = 0;
8697 flk.l_start = 0;
8698 flk.l_len = 0;
8699 flk.l_sysid =
8700 sp->rs_owner->ro_client->rc_sysidt;
8701 flk.l_pid = 0;
8702 (void) VOP_FRLOCK(sp->rs_finfo->rf_vp, F_SETLK,
8703 &flk, F_REMOTELOCK | FREAD | FWRITE,
8704 (u_offset_t)0, NULL, CRED(), NULL);
8705 }
8706
8707 sp->rs_owner->ro_client->rc_unlksys_completed = TRUE;
8708 }
8709 }
8710
8711 /*
8712 * Release all locks on this file by this lock owner or at
8713 * least mark the locks as having been released
8714 */
8715 for (lsp = list_head(&sp->rs_lostatelist); lsp != NULL;
8716 lsp = list_next(&sp->rs_lostatelist, lsp)) {
8717 lsp->rls_locks_cleaned = TRUE;
8718
8719 /* Was this already taken care of above? */
8720 if (!close_of_client &&
8721 sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8722 (void) cleanlocks(sp->rs_finfo->rf_vp,
8723 lsp->rls_locker->rl_pid,
8724 lsp->rls_locker->rl_client->rc_sysidt);
8725 }
8726
8727 /*
8728 * Release any shrlocks associated with this open state ID.
8729 * This must be done before the rfs4_state gets marked closed.
8730 */
8731 if (sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8732 (void) rfs4_unshare(sp);
8733
8734 if (sp->rs_open_access) {
8735 rfs4_dbe_lock(fp->rf_dbe);
8736
8737 /*
8738 * Decrement the count for each access and deny bit that this
8739 * state has contributed to the file.
8740 * If the file counts go to zero
8741 * clear the appropriate bit in the appropriate mask.
8742 */
8743 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) {
8744 fp->rf_access_read--;
8745 fflags |= FREAD;
8746 if (fp->rf_access_read == 0)
8747 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8748 }
8749 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) {
8750 fp->rf_access_write--;
8751 fflags |= FWRITE;
8752 if (fp->rf_access_write == 0)
8753 fp->rf_share_access &=
8754 ~OPEN4_SHARE_ACCESS_WRITE;
8755 }
8756 if (sp->rs_open_deny & OPEN4_SHARE_DENY_READ) {
8757 fp->rf_deny_read--;
8758 if (fp->rf_deny_read == 0)
8759 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8760 }
8761 if (sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) {
8762 fp->rf_deny_write--;
8763 if (fp->rf_deny_write == 0)
8764 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8765 }
8766
8767 (void) VOP_CLOSE(fp->rf_vp, fflags, 1, (offset_t)0, cr, NULL);
8768
8769 rfs4_dbe_unlock(fp->rf_dbe);
8770
8771 sp->rs_open_access = 0;
8772 sp->rs_open_deny = 0;
8773 }
8774 }
8775
8776 /*
8777 * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure.
8778 */
8779 static nfsstat4
lock_denied(LOCK4denied * dp,struct flock64 * flk)8780 lock_denied(LOCK4denied *dp, struct flock64 *flk)
8781 {
8782 rfs4_lockowner_t *lo;
8783 rfs4_client_t *cp;
8784 uint32_t len;
8785
8786 lo = rfs4_findlockowner_by_pid(flk->l_pid);
8787 if (lo != NULL) {
8788 cp = lo->rl_client;
8789 if (rfs4_lease_expired(cp)) {
8790 rfs4_lockowner_rele(lo);
8791 rfs4_dbe_hold(cp->rc_dbe);
8792 rfs4_client_close(cp);
8793 return (NFS4ERR_EXPIRED);
8794 }
8795 dp->owner.clientid = lo->rl_owner.clientid;
8796 len = lo->rl_owner.owner_len;
8797 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8798 bcopy(lo->rl_owner.owner_val, dp->owner.owner_val, len);
8799 dp->owner.owner_len = len;
8800 rfs4_lockowner_rele(lo);
8801 goto finish;
8802 }
8803
8804 /*
8805 * Its not a NFS4 lock. We take advantage that the upper 32 bits
8806 * of the client id contain the boot time for a NFS4 lock. So we
8807 * fabricate and identity by setting clientid to the sysid, and
8808 * the lock owner to the pid.
8809 */
8810 dp->owner.clientid = flk->l_sysid;
8811 len = sizeof (pid_t);
8812 dp->owner.owner_len = len;
8813 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8814 bcopy(&flk->l_pid, dp->owner.owner_val, len);
8815 finish:
8816 dp->offset = flk->l_start;
8817 dp->length = flk->l_len;
8818
8819 if (flk->l_type == F_RDLCK)
8820 dp->locktype = READ_LT;
8821 else if (flk->l_type == F_WRLCK)
8822 dp->locktype = WRITE_LT;
8823 else
8824 return (NFS4ERR_INVAL); /* no mapping from POSIX ltype to v4 */
8825
8826 return (NFS4_OK);
8827 }
8828
8829 /*
8830 * The NFSv4.0 LOCK operation does not support the blocking lock (at the
8831 * NFSv4.0 protocol level) so the client needs to resend the LOCK request in a
8832 * case the lock is denied by the NFSv4.0 server. NFSv4.0 clients are prepared
8833 * for that (obviously); they are sending the LOCK requests with some delays
8834 * between the attempts. See nfs4frlock() and nfs4_block_and_wait() for the
8835 * locking and delay implementation at the client side.
8836 *
8837 * To make the life of the clients easier, the NFSv4.0 server tries to do some
8838 * fast retries on its own (the for loop below) in a hope the lock will be
8839 * available soon. And if not, the client won't need to resend the LOCK
8840 * requests so fast to check the lock availability. This basically saves some
8841 * network traffic and tries to make sure the client gets the lock ASAP.
8842 */
8843 static int
setlock(vnode_t * vp,struct flock64 * flock,int flag,cred_t * cred)8844 setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred)
8845 {
8846 int error;
8847 struct flock64 flk;
8848 int i;
8849 clock_t delaytime;
8850 int cmd;
8851 int spin_cnt = 0;
8852
8853 cmd = nbl_need_check(vp) ? F_SETLK_NBMAND : F_SETLK;
8854 retry:
8855 delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
8856
8857 for (i = 0; i < rfs4_maxlock_tries; i++) {
8858 LOCK_PRINT(rfs4_debug, "setlock", cmd, flock);
8859 error = VOP_FRLOCK(vp, cmd,
8860 flock, flag, (u_offset_t)0, NULL, cred, NULL);
8861
8862 if (error != EAGAIN && error != EACCES)
8863 break;
8864
8865 if (i < rfs4_maxlock_tries - 1) {
8866 delay(delaytime);
8867 delaytime *= 2;
8868 }
8869 }
8870
8871 if (error == EAGAIN || error == EACCES) {
8872 /* Get the owner of the lock */
8873 flk = *flock;
8874 LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk);
8875 if (VOP_FRLOCK(vp, F_GETLK, &flk, flag, 0, NULL, cred,
8876 NULL) == 0) {
8877 /*
8878 * There's a race inherent in the current VOP_FRLOCK
8879 * design where:
8880 * a: "other guy" takes a lock that conflicts with a
8881 * lock we want
8882 * b: we attempt to take our lock (non-blocking) and
8883 * the attempt fails.
8884 * c: "other guy" releases the conflicting lock
8885 * d: we ask what lock conflicts with the lock we want,
8886 * getting F_UNLCK (no lock blocks us)
8887 *
8888 * If we retry the non-blocking lock attempt in this
8889 * case (restart at step 'b') there's some possibility
8890 * that many such attempts might fail. However a test
8891 * designed to actually provoke this race shows that
8892 * the vast majority of cases require no retry, and
8893 * only a few took as many as three retries. Here's
8894 * the test outcome:
8895 *
8896 * number of retries how many times we needed
8897 * that many retries
8898 * 0 79461
8899 * 1 862
8900 * 2 49
8901 * 3 5
8902 *
8903 * Given those empirical results, we arbitrarily limit
8904 * the retry count to ten.
8905 *
8906 * If we actually make to ten retries and give up,
8907 * nothing catastrophic happens, but we're unable to
8908 * return the information about the conflicting lock to
8909 * the NFS client. That's an acceptable trade off vs.
8910 * letting this retry loop run forever.
8911 */
8912 if (flk.l_type == F_UNLCK) {
8913 if (spin_cnt++ < 10) {
8914 /* No longer locked, retry */
8915 goto retry;
8916 }
8917 } else {
8918 *flock = flk;
8919 LOCK_PRINT(rfs4_debug, "setlock(blocking lock)",
8920 F_GETLK, &flk);
8921 }
8922 }
8923 }
8924
8925 return (error);
8926 }
8927
8928 /*ARGSUSED*/
8929 static nfsstat4
rfs4_do_lock(rfs4_lo_state_t * lsp,nfs_lock_type4 locktype,offset4 offset,length4 length,cred_t * cred,nfs_resop4 * resop)8930 rfs4_do_lock(rfs4_lo_state_t *lsp, nfs_lock_type4 locktype,
8931 offset4 offset, length4 length, cred_t *cred, nfs_resop4 *resop)
8932 {
8933 nfsstat4 status;
8934 rfs4_lockowner_t *lo = lsp->rls_locker;
8935 rfs4_state_t *sp = lsp->rls_state;
8936 struct flock64 flock;
8937 int16_t ltype;
8938 int flag;
8939 int error;
8940 sysid_t sysid;
8941 LOCK4res *lres;
8942 vnode_t *vp;
8943
8944 if (rfs4_lease_expired(lo->rl_client)) {
8945 return (NFS4ERR_EXPIRED);
8946 }
8947
8948 if ((status = rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
8949 return (status);
8950
8951 /* Check for zero length. To lock to end of file use all ones for V4 */
8952 if (length == 0)
8953 return (NFS4ERR_INVAL);
8954 else if (length == (length4)(~0))
8955 length = 0; /* Posix to end of file */
8956
8957 retry:
8958 rfs4_dbe_lock(sp->rs_dbe);
8959 if (sp->rs_closed == TRUE) {
8960 rfs4_dbe_unlock(sp->rs_dbe);
8961 return (NFS4ERR_OLD_STATEID);
8962 }
8963
8964 if (resop->resop != OP_LOCKU) {
8965 switch (locktype) {
8966 case READ_LT:
8967 case READW_LT:
8968 if ((sp->rs_share_access
8969 & OPEN4_SHARE_ACCESS_READ) == 0) {
8970 rfs4_dbe_unlock(sp->rs_dbe);
8971
8972 return (NFS4ERR_OPENMODE);
8973 }
8974 ltype = F_RDLCK;
8975 break;
8976 case WRITE_LT:
8977 case WRITEW_LT:
8978 if ((sp->rs_share_access
8979 & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8980 rfs4_dbe_unlock(sp->rs_dbe);
8981
8982 return (NFS4ERR_OPENMODE);
8983 }
8984 ltype = F_WRLCK;
8985 break;
8986 }
8987 } else
8988 ltype = F_UNLCK;
8989
8990 flock.l_type = ltype;
8991 flock.l_whence = 0; /* SEEK_SET */
8992 flock.l_start = offset;
8993 flock.l_len = length;
8994 flock.l_sysid = sysid;
8995 flock.l_pid = lsp->rls_locker->rl_pid;
8996
8997 /* Note that length4 is uint64_t but l_len and l_start are off64_t */
8998 if (flock.l_len < 0 || flock.l_start < 0) {
8999 rfs4_dbe_unlock(sp->rs_dbe);
9000 return (NFS4ERR_INVAL);
9001 }
9002
9003 /*
9004 * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and
9005 * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE.
9006 */
9007 flag = (int)sp->rs_share_access | F_REMOTELOCK;
9008
9009 vp = sp->rs_finfo->rf_vp;
9010 VN_HOLD(vp);
9011
9012 /*
9013 * We need to unlock sp before we call the underlying filesystem to
9014 * acquire the file lock.
9015 */
9016 rfs4_dbe_unlock(sp->rs_dbe);
9017
9018 error = setlock(vp, &flock, flag, cred);
9019
9020 /*
9021 * Make sure the file is still open. In a case the file was closed in
9022 * the meantime, clean the lock we acquired using the setlock() call
9023 * above, and return the appropriate error.
9024 */
9025 rfs4_dbe_lock(sp->rs_dbe);
9026 if (sp->rs_closed == TRUE) {
9027 cleanlocks(vp, lsp->rls_locker->rl_pid, sysid);
9028 rfs4_dbe_unlock(sp->rs_dbe);
9029
9030 VN_RELE(vp);
9031
9032 return (NFS4ERR_OLD_STATEID);
9033 }
9034 rfs4_dbe_unlock(sp->rs_dbe);
9035
9036 VN_RELE(vp);
9037
9038 if (error == 0) {
9039 rfs4_dbe_lock(lsp->rls_dbe);
9040 next_stateid(&lsp->rls_lockid);
9041 rfs4_dbe_unlock(lsp->rls_dbe);
9042 }
9043
9044 /*
9045 * N.B. We map error values to nfsv4 errors. This is differrent
9046 * than puterrno4 routine.
9047 */
9048 switch (error) {
9049 case 0:
9050 status = NFS4_OK;
9051 break;
9052 case EAGAIN:
9053 case EACCES: /* Old value */
9054 /* Can only get here if op is OP_LOCK */
9055 ASSERT(resop->resop == OP_LOCK);
9056 lres = &resop->nfs_resop4_u.oplock;
9057 status = NFS4ERR_DENIED;
9058 if (lock_denied(&lres->LOCK4res_u.denied, &flock)
9059 == NFS4ERR_EXPIRED)
9060 goto retry;
9061 break;
9062 case ENOLCK:
9063 status = NFS4ERR_DELAY;
9064 break;
9065 case EOVERFLOW:
9066 status = NFS4ERR_INVAL;
9067 break;
9068 case EINVAL:
9069 status = NFS4ERR_NOTSUPP;
9070 break;
9071 default:
9072 status = NFS4ERR_SERVERFAULT;
9073 break;
9074 }
9075
9076 return (status);
9077 }
9078
9079 /*ARGSUSED*/
9080 void
rfs4_op_lock(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)9081 rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop,
9082 struct svc_req *req, struct compound_state *cs)
9083 {
9084 LOCK4args *args = &argop->nfs_argop4_u.oplock;
9085 LOCK4res *resp = &resop->nfs_resop4_u.oplock;
9086 nfsstat4 status;
9087 stateid4 *stateid;
9088 rfs4_lockowner_t *lo;
9089 rfs4_client_t *cp;
9090 rfs4_state_t *sp = NULL;
9091 rfs4_lo_state_t *lsp = NULL;
9092 bool_t ls_sw_held = FALSE;
9093 bool_t create = TRUE;
9094 bool_t lcreate = TRUE;
9095 bool_t dup_lock = FALSE;
9096 int rc;
9097
9098 DTRACE_NFSV4_2(op__lock__start, struct compound_state *, cs,
9099 LOCK4args *, args);
9100
9101 if (cs->vp == NULL) {
9102 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9103 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9104 cs, LOCK4res *, resp);
9105 return;
9106 }
9107
9108 if (args->locker.new_lock_owner) {
9109 /* Create a new lockowner for this instance */
9110 open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner;
9111
9112 NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner"));
9113
9114 stateid = &olo->open_stateid;
9115 status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID);
9116 if (status != NFS4_OK) {
9117 NFS4_DEBUG(rfs4_debug,
9118 (CE_NOTE, "Get state failed in lock %d", status));
9119 *cs->statusp = resp->status = status;
9120 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9121 cs, LOCK4res *, resp);
9122 return;
9123 }
9124
9125 /* Ensure specified filehandle matches */
9126 if (cs->vp != sp->rs_finfo->rf_vp) {
9127 rfs4_state_rele(sp);
9128 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9129 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9130 cs, LOCK4res *, resp);
9131 return;
9132 }
9133
9134 /* hold off other access to open_owner while we tinker */
9135 rfs4_sw_enter(&sp->rs_owner->ro_sw);
9136
9137 switch (rc = rfs4_check_stateid_seqid(sp, stateid, cs)) {
9138 case NFS4_CHECK_STATEID_OLD:
9139 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9140 goto end;
9141 case NFS4_CHECK_STATEID_BAD:
9142 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9143 goto end;
9144 case NFS4_CHECK_STATEID_EXPIRED:
9145 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9146 goto end;
9147 case NFS4_CHECK_STATEID_UNCONFIRMED:
9148 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9149 goto end;
9150 case NFS4_CHECK_STATEID_CLOSED:
9151 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9152 goto end;
9153 case NFS4_CHECK_STATEID_OKAY:
9154 if (rfs4_has_session(cs))
9155 break;
9156 /* FALLTHROUGH */
9157 case NFS4_CHECK_STATEID_REPLAY:
9158 ASSERT(!rfs4_has_session(cs));
9159
9160 switch (rfs4_check_olo_seqid(olo->open_seqid,
9161 sp->rs_owner, resop)) {
9162 case NFS4_CHKSEQ_OKAY:
9163 if (rc == NFS4_CHECK_STATEID_OKAY)
9164 break;
9165 /*
9166 * This is replayed stateid; if seqid
9167 * matches next expected, then client
9168 * is using wrong seqid.
9169 */
9170 /* FALLTHROUGH */
9171 case NFS4_CHKSEQ_BAD:
9172 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9173 goto end;
9174 case NFS4_CHKSEQ_REPLAY:
9175 /* This is a duplicate LOCK request */
9176 dup_lock = TRUE;
9177
9178 /*
9179 * For a duplicate we do not want to
9180 * create a new lockowner as it should
9181 * already exist.
9182 * Turn off the lockowner create flag.
9183 */
9184 lcreate = FALSE;
9185 }
9186 break;
9187 }
9188
9189 lo = rfs4_findlockowner(&olo->lock_owner, &lcreate);
9190 if (lo == NULL) {
9191 NFS4_DEBUG(rfs4_debug,
9192 (CE_NOTE, "rfs4_op_lock: no lock owner"));
9193 *cs->statusp = resp->status = NFS4ERR_RESOURCE;
9194 goto end;
9195 }
9196
9197 lsp = rfs4_findlo_state_by_owner(lo, sp, &create);
9198 if (lsp == NULL) {
9199 rfs4_update_lease(sp->rs_owner->ro_client);
9200 /*
9201 * Only update theh open_seqid if this is not
9202 * a duplicate request
9203 */
9204 if (dup_lock == FALSE) {
9205 rfs4_update_open_sequence(sp->rs_owner);
9206 }
9207
9208 NFS4_DEBUG(rfs4_debug,
9209 (CE_NOTE, "rfs4_op_lock: no state"));
9210 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
9211 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9212 rfs4_lockowner_rele(lo);
9213 goto end;
9214 }
9215
9216 /*
9217 * This is the new_lock_owner branch and the client is
9218 * supposed to be associating a new lock_owner with
9219 * the open file at this point. If we find that a
9220 * lock_owner/state association already exists and a
9221 * successful LOCK request was returned to the client,
9222 * an error is returned to the client since this is
9223 * not appropriate. The client should be using the
9224 * existing lock_owner branch.
9225 */
9226 if (!rfs4_has_session(cs) && !dup_lock && !create) {
9227 if (lsp->rls_lock_completed == TRUE) {
9228 *cs->statusp =
9229 resp->status = NFS4ERR_BAD_SEQID;
9230 rfs4_lockowner_rele(lo);
9231 goto end;
9232 }
9233 }
9234
9235 rfs4_update_lease(sp->rs_owner->ro_client);
9236
9237 /*
9238 * Only update theh open_seqid if this is not
9239 * a duplicate request
9240 */
9241 if (dup_lock == FALSE) {
9242 rfs4_update_open_sequence(sp->rs_owner);
9243 }
9244
9245 /*
9246 * If this is a duplicate lock request, just copy the
9247 * previously saved reply and return.
9248 */
9249 if (dup_lock == TRUE) {
9250 /* verify that lock_seqid's match */
9251 if (lsp->rls_seqid != olo->lock_seqid) {
9252 NFS4_DEBUG(rfs4_debug,
9253 (CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad"
9254 "lsp->seqid=%d old->seqid=%d",
9255 lsp->rls_seqid, olo->lock_seqid));
9256 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9257 } else {
9258 rfs4_copy_reply(resop, &lsp->rls_reply);
9259 /*
9260 * Make sure to copy the just
9261 * retrieved reply status into the
9262 * overall compound status
9263 */
9264 *cs->statusp = resp->status;
9265 }
9266 rfs4_lockowner_rele(lo);
9267 goto end;
9268 }
9269
9270 rfs4_dbe_lock(lsp->rls_dbe);
9271
9272 /* Make sure to update the lock sequence id */
9273 lsp->rls_seqid = olo->lock_seqid;
9274
9275 NFS4_DEBUG(rfs4_debug,
9276 (CE_NOTE, "Lock seqid established as %d", lsp->rls_seqid));
9277
9278 /*
9279 * This is used to signify the newly created lockowner
9280 * stateid and its sequence number. The checks for
9281 * sequence number and increment don't occur on the
9282 * very first lock request for a lockowner.
9283 */
9284 lsp->rls_skip_seqid_check = TRUE;
9285
9286 /* hold off other access to lsp while we tinker */
9287 rfs4_sw_enter(&lsp->rls_sw);
9288 ls_sw_held = TRUE;
9289
9290 rfs4_dbe_unlock(lsp->rls_dbe);
9291
9292 rfs4_lockowner_rele(lo);
9293 } else {
9294 stateid = &args->locker.locker4_u.lock_owner.lock_stateid;
9295 /* get lsp and hold the lock on the underlying file struct */
9296 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE))
9297 != NFS4_OK) {
9298 *cs->statusp = resp->status = status;
9299 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9300 cs, LOCK4res *, resp);
9301 return;
9302 }
9303 create = FALSE; /* We didn't create lsp */
9304
9305 /* Ensure specified filehandle matches */
9306 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9307 rfs4_lo_state_rele(lsp, TRUE);
9308 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9309 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9310 cs, LOCK4res *, resp);
9311 return;
9312 }
9313
9314 /* hold off other access to lsp while we tinker */
9315 rfs4_sw_enter(&lsp->rls_sw);
9316 ls_sw_held = TRUE;
9317
9318 switch (rfs4_check_lo_stateid_seqid(lsp, stateid, cs)) {
9319 /*
9320 * The stateid looks like it was okay (expected to be
9321 * the next one)
9322 */
9323 case NFS4_CHECK_STATEID_OKAY:
9324 if (rfs4_has_session(cs))
9325 break;
9326
9327 /*
9328 * The sequence id is now checked. Determine
9329 * if this is a replay or if it is in the
9330 * expected (next) sequence. In the case of a
9331 * replay, there are two replay conditions
9332 * that may occur. The first is the normal
9333 * condition where a LOCK is done with a
9334 * NFS4_OK response and the stateid is
9335 * updated. That case is handled below when
9336 * the stateid is identified as a REPLAY. The
9337 * second is the case where an error is
9338 * returned, like NFS4ERR_DENIED, and the
9339 * sequence number is updated but the stateid
9340 * is not updated. This second case is dealt
9341 * with here. So it may seem odd that the
9342 * stateid is okay but the sequence id is a
9343 * replay but it is okay.
9344 */
9345 switch (rfs4_check_lock_seqid(
9346 args->locker.locker4_u.lock_owner.lock_seqid,
9347 lsp, resop)) {
9348 case NFS4_CHKSEQ_REPLAY:
9349 if (resp->status != NFS4_OK) {
9350 /*
9351 * Here is our replay and need
9352 * to verify that the last
9353 * response was an error.
9354 */
9355 *cs->statusp = resp->status;
9356 goto end;
9357 }
9358 /*
9359 * This is done since the sequence id
9360 * looked like a replay but it didn't
9361 * pass our check so a BAD_SEQID is
9362 * returned as a result.
9363 */
9364 /*FALLTHROUGH*/
9365 case NFS4_CHKSEQ_BAD:
9366 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9367 goto end;
9368 case NFS4_CHKSEQ_OKAY:
9369 /* Everything looks okay move ahead */
9370 break;
9371 }
9372 break;
9373 case NFS4_CHECK_STATEID_OLD:
9374 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9375 goto end;
9376 case NFS4_CHECK_STATEID_BAD:
9377 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9378 goto end;
9379 case NFS4_CHECK_STATEID_EXPIRED:
9380 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9381 goto end;
9382 case NFS4_CHECK_STATEID_CLOSED:
9383 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9384 goto end;
9385 case NFS4_CHECK_STATEID_REPLAY:
9386 ASSERT(!rfs4_has_session(cs));
9387
9388 switch (rfs4_check_lock_seqid(
9389 args->locker.locker4_u.lock_owner.lock_seqid,
9390 lsp, resop)) {
9391 case NFS4_CHKSEQ_OKAY:
9392 /*
9393 * This is a replayed stateid; if
9394 * seqid matches the next expected,
9395 * then client is using wrong seqid.
9396 */
9397 case NFS4_CHKSEQ_BAD:
9398 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9399 goto end;
9400 case NFS4_CHKSEQ_REPLAY:
9401 rfs4_update_lease(lsp->rls_locker->rl_client);
9402 *cs->statusp = status = resp->status;
9403 goto end;
9404 }
9405 break;
9406 default:
9407 ASSERT(FALSE);
9408 break;
9409 }
9410
9411 rfs4_update_lock_sequence(lsp);
9412 rfs4_update_lease(lsp->rls_locker->rl_client);
9413 }
9414
9415 /*
9416 * NFS4 only allows locking on regular files, so
9417 * verify type of object.
9418 */
9419 if (cs->vp->v_type != VREG) {
9420 if (cs->vp->v_type == VDIR)
9421 status = NFS4ERR_ISDIR;
9422 else
9423 status = NFS4ERR_INVAL;
9424 goto out;
9425 }
9426
9427 cp = lsp->rls_state->rs_owner->ro_client;
9428
9429 if (rfs4_clnt_in_grace(cp) && !args->reclaim) {
9430 status = NFS4ERR_GRACE;
9431 goto out;
9432 }
9433
9434 if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->rc_can_reclaim) {
9435 status = NFS4ERR_NO_GRACE;
9436 goto out;
9437 }
9438
9439 if (!rfs4_clnt_in_grace(cp) && args->reclaim) {
9440 status = NFS4ERR_NO_GRACE;
9441 goto out;
9442 }
9443
9444 if (lsp->rls_state->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE)
9445 cs->deleg = TRUE;
9446
9447 status = rfs4_do_lock(lsp, args->locktype,
9448 args->offset, args->length, cs->cr, resop);
9449
9450 out:
9451 lsp->rls_skip_seqid_check = FALSE;
9452
9453 *cs->statusp = resp->status = status;
9454
9455 if (status == NFS4_OK) {
9456 resp->LOCK4res_u.lock_stateid = lsp->rls_lockid.stateid;
9457 lsp->rls_lock_completed = TRUE;
9458 }
9459 /*
9460 * Only update the "OPEN" response here if this was a new
9461 * lock_owner
9462 */
9463 if (sp)
9464 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9465
9466 rfs4_update_lock_resp(lsp, resop);
9467
9468 end:
9469 if (lsp) {
9470 if (ls_sw_held)
9471 rfs4_sw_exit(&lsp->rls_sw);
9472 /*
9473 * If an sp obtained, then the lsp does not represent
9474 * a lock on the file struct.
9475 */
9476 if (sp != NULL)
9477 rfs4_lo_state_rele(lsp, FALSE);
9478 else
9479 rfs4_lo_state_rele(lsp, TRUE);
9480 }
9481 if (sp) {
9482 rfs4_sw_exit(&sp->rs_owner->ro_sw);
9483 rfs4_state_rele(sp);
9484 }
9485
9486 DTRACE_NFSV4_2(op__lock__done, struct compound_state *, cs,
9487 LOCK4res *, resp);
9488 }
9489
9490 /* free function for LOCK/LOCKT */
9491 static void
lock_denied_free(nfs_resop4 * resop)9492 lock_denied_free(nfs_resop4 *resop)
9493 {
9494 LOCK4denied *dp = NULL;
9495
9496 switch (resop->resop) {
9497 case OP_LOCK:
9498 if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED)
9499 dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied;
9500 break;
9501 case OP_LOCKT:
9502 if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED)
9503 dp = &resop->nfs_resop4_u.oplockt.denied;
9504 break;
9505 default:
9506 break;
9507 }
9508
9509 if (dp)
9510 kmem_free(dp->owner.owner_val, dp->owner.owner_len);
9511 }
9512
9513 /*ARGSUSED*/
9514 void
rfs4_op_locku(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)9515 rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop,
9516 struct svc_req *req, struct compound_state *cs)
9517 {
9518 LOCKU4args *args = &argop->nfs_argop4_u.oplocku;
9519 LOCKU4res *resp = &resop->nfs_resop4_u.oplocku;
9520 nfsstat4 status;
9521 stateid4 *stateid = &args->lock_stateid;
9522 rfs4_lo_state_t *lsp;
9523
9524 DTRACE_NFSV4_2(op__locku__start, struct compound_state *, cs,
9525 LOCKU4args *, args);
9526
9527 if (cs->vp == NULL) {
9528 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9529 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9530 LOCKU4res *, resp);
9531 return;
9532 }
9533
9534 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) {
9535 *cs->statusp = resp->status = status;
9536 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9537 LOCKU4res *, resp);
9538 return;
9539 }
9540
9541 /* Ensure specified filehandle matches */
9542 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9543 rfs4_lo_state_rele(lsp, TRUE);
9544 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9545 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9546 LOCKU4res *, resp);
9547 return;
9548 }
9549
9550 /* hold off other access to lsp while we tinker */
9551 rfs4_sw_enter(&lsp->rls_sw);
9552
9553 switch (rfs4_check_lo_stateid_seqid(lsp, stateid, cs)) {
9554 case NFS4_CHECK_STATEID_OKAY:
9555 if (rfs4_has_session(cs))
9556 break;
9557
9558 if (rfs4_check_lock_seqid(args->seqid, lsp, resop)
9559 != NFS4_CHKSEQ_OKAY) {
9560 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9561 goto end;
9562 }
9563 break;
9564 case NFS4_CHECK_STATEID_OLD:
9565 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9566 goto end;
9567 case NFS4_CHECK_STATEID_BAD:
9568 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9569 goto end;
9570 case NFS4_CHECK_STATEID_EXPIRED:
9571 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9572 goto end;
9573 case NFS4_CHECK_STATEID_CLOSED:
9574 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9575 goto end;
9576 case NFS4_CHECK_STATEID_REPLAY:
9577 ASSERT(!rfs4_has_session(cs));
9578
9579 switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) {
9580 case NFS4_CHKSEQ_OKAY:
9581 /*
9582 * This is a replayed stateid; if
9583 * seqid matches the next expected,
9584 * then client is using wrong seqid.
9585 */
9586 case NFS4_CHKSEQ_BAD:
9587 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9588 goto end;
9589 case NFS4_CHKSEQ_REPLAY:
9590 rfs4_update_lease(lsp->rls_locker->rl_client);
9591 *cs->statusp = status = resp->status;
9592 goto end;
9593 }
9594 break;
9595 default:
9596 ASSERT(FALSE);
9597 break;
9598 }
9599
9600 rfs4_update_lock_sequence(lsp);
9601 rfs4_update_lease(lsp->rls_locker->rl_client);
9602
9603 /*
9604 * NFS4 only allows locking on regular files, so
9605 * verify type of object.
9606 */
9607 if (cs->vp->v_type != VREG) {
9608 if (cs->vp->v_type == VDIR)
9609 status = NFS4ERR_ISDIR;
9610 else
9611 status = NFS4ERR_INVAL;
9612 goto out;
9613 }
9614
9615 if (rfs4_clnt_in_grace(lsp->rls_state->rs_owner->ro_client)) {
9616 status = NFS4ERR_GRACE;
9617 goto out;
9618 }
9619
9620 status = rfs4_do_lock(lsp, args->locktype,
9621 args->offset, args->length, cs->cr, resop);
9622
9623 out:
9624 *cs->statusp = resp->status = status;
9625
9626 if (status == NFS4_OK)
9627 resp->lock_stateid = lsp->rls_lockid.stateid;
9628
9629 rfs4_update_lock_resp(lsp, resop);
9630
9631 end:
9632 rfs4_sw_exit(&lsp->rls_sw);
9633 rfs4_lo_state_rele(lsp, TRUE);
9634
9635 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9636 LOCKU4res *, resp);
9637 }
9638
9639 /*
9640 * LOCKT is a best effort routine, the client can not be guaranteed that
9641 * the status return is still in effect by the time the reply is received.
9642 * They are numerous race conditions in this routine, but we are not required
9643 * and can not be accurate.
9644 */
9645 /*ARGSUSED*/
9646 void
rfs4_op_lockt(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)9647 rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop,
9648 struct svc_req *req, struct compound_state *cs)
9649 {
9650 LOCKT4args *args = &argop->nfs_argop4_u.oplockt;
9651 LOCKT4res *resp = &resop->nfs_resop4_u.oplockt;
9652 rfs4_lockowner_t *lo;
9653 rfs4_client_t *cp;
9654 bool_t create = FALSE;
9655 struct flock64 flk;
9656 int error;
9657 int flag = FREAD | FWRITE;
9658 int ltype;
9659 length4 posix_length;
9660 sysid_t sysid;
9661 pid_t pid;
9662
9663 DTRACE_NFSV4_2(op__lockt__start, struct compound_state *, cs,
9664 LOCKT4args *, args);
9665
9666 if (cs->vp == NULL) {
9667 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9668 goto out;
9669 }
9670
9671 /*
9672 * NFS4 only allows locking on regular files, so
9673 * verify type of object.
9674 */
9675 if (cs->vp->v_type != VREG) {
9676 if (cs->vp->v_type == VDIR)
9677 *cs->statusp = resp->status = NFS4ERR_ISDIR;
9678 else
9679 *cs->statusp = resp->status = NFS4ERR_INVAL;
9680 goto out;
9681 }
9682
9683 /*
9684 * Check out the clientid to ensure the server knows about it
9685 * so that we correctly inform the client of a server reboot.
9686 */
9687 if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE))
9688 == NULL) {
9689 *cs->statusp = resp->status =
9690 rfs4_check_clientid(&args->owner.clientid, 0);
9691 goto out;
9692 }
9693 if (rfs4_lease_expired(cp)) {
9694 rfs4_client_close(cp);
9695 /*
9696 * Protocol doesn't allow returning NFS4ERR_STALE as
9697 * other operations do on this check so STALE_CLIENTID
9698 * is returned instead
9699 */
9700 *cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
9701 goto out;
9702 }
9703
9704 if (rfs4_clnt_in_grace(cp) && !(cp->rc_can_reclaim)) {
9705 *cs->statusp = resp->status = NFS4ERR_GRACE;
9706 rfs4_client_rele(cp);
9707 goto out;
9708 }
9709 rfs4_client_rele(cp);
9710
9711 resp->status = NFS4_OK;
9712
9713 switch (args->locktype) {
9714 case READ_LT:
9715 case READW_LT:
9716 ltype = F_RDLCK;
9717 break;
9718 case WRITE_LT:
9719 case WRITEW_LT:
9720 ltype = F_WRLCK;
9721 break;
9722 }
9723
9724 posix_length = args->length;
9725 /* Check for zero length. To lock to end of file use all ones for V4 */
9726 if (posix_length == 0) {
9727 *cs->statusp = resp->status = NFS4ERR_INVAL;
9728 goto out;
9729 } else if (posix_length == (length4)(~0)) {
9730 posix_length = 0; /* Posix to end of file */
9731 }
9732
9733 /* Find or create a lockowner */
9734 lo = rfs4_findlockowner(&args->owner, &create);
9735
9736 if (lo) {
9737 pid = lo->rl_pid;
9738 if ((resp->status =
9739 rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
9740 goto err;
9741 } else {
9742 pid = 0;
9743 sysid = lockt_sysid;
9744 }
9745 retry:
9746 flk.l_type = ltype;
9747 flk.l_whence = 0; /* SEEK_SET */
9748 flk.l_start = args->offset;
9749 flk.l_len = posix_length;
9750 flk.l_sysid = sysid;
9751 flk.l_pid = pid;
9752 flag |= F_REMOTELOCK;
9753
9754 LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk);
9755
9756 /* Note that length4 is uint64_t but l_len and l_start are off64_t */
9757 if (flk.l_len < 0 || flk.l_start < 0) {
9758 resp->status = NFS4ERR_INVAL;
9759 goto err;
9760 }
9761 error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0,
9762 NULL, cs->cr, NULL);
9763
9764 /*
9765 * N.B. We map error values to nfsv4 errors. This is differrent
9766 * than puterrno4 routine.
9767 */
9768 switch (error) {
9769 case 0:
9770 if (flk.l_type == F_UNLCK)
9771 resp->status = NFS4_OK;
9772 else {
9773 if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED)
9774 goto retry;
9775 resp->status = NFS4ERR_DENIED;
9776 }
9777 break;
9778 case EOVERFLOW:
9779 resp->status = NFS4ERR_INVAL;
9780 break;
9781 case EINVAL:
9782 resp->status = NFS4ERR_NOTSUPP;
9783 break;
9784 default:
9785 cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)",
9786 error);
9787 resp->status = NFS4ERR_SERVERFAULT;
9788 break;
9789 }
9790
9791 err:
9792 if (lo)
9793 rfs4_lockowner_rele(lo);
9794 *cs->statusp = resp->status;
9795 out:
9796 DTRACE_NFSV4_2(op__lockt__done, struct compound_state *, cs,
9797 LOCKT4res *, resp);
9798 }
9799
9800 int
rfs4_share(rfs4_state_t * sp,uint32_t access,uint32_t deny)9801 rfs4_share(rfs4_state_t *sp, uint32_t access, uint32_t deny)
9802 {
9803 int err;
9804 int cmd;
9805 vnode_t *vp;
9806 struct shrlock shr;
9807 struct shr_locowner shr_loco;
9808 int fflags = 0;
9809
9810 ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9811 ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9812
9813 if (sp->rs_closed)
9814 return (NFS4ERR_OLD_STATEID);
9815
9816 vp = sp->rs_finfo->rf_vp;
9817 ASSERT(vp);
9818
9819 shr.s_access = shr.s_deny = 0;
9820
9821 if (access & OPEN4_SHARE_ACCESS_READ) {
9822 fflags |= FREAD;
9823 shr.s_access |= F_RDACC;
9824 }
9825 if (access & OPEN4_SHARE_ACCESS_WRITE) {
9826 fflags |= FWRITE;
9827 shr.s_access |= F_WRACC;
9828 }
9829 ASSERT(shr.s_access);
9830
9831 if (deny & OPEN4_SHARE_DENY_READ)
9832 shr.s_deny |= F_RDDNY;
9833 if (deny & OPEN4_SHARE_DENY_WRITE)
9834 shr.s_deny |= F_WRDNY;
9835
9836 shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9837 shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9838 shr_loco.sl_pid = shr.s_pid;
9839 shr_loco.sl_id = shr.s_sysid;
9840 shr.s_owner = (caddr_t)&shr_loco;
9841 shr.s_own_len = sizeof (shr_loco);
9842
9843 cmd = nbl_need_check(vp) ? F_SHARE_NBMAND : F_SHARE;
9844
9845 err = VOP_SHRLOCK(vp, cmd, &shr, fflags, CRED(), NULL);
9846 if (err != 0) {
9847 if (err == EAGAIN)
9848 err = NFS4ERR_SHARE_DENIED;
9849 else
9850 err = puterrno4(err);
9851 return (err);
9852 }
9853
9854 sp->rs_share_access |= access;
9855 sp->rs_share_deny |= deny;
9856
9857 return (0);
9858 }
9859
9860 int
rfs4_unshare(rfs4_state_t * sp)9861 rfs4_unshare(rfs4_state_t *sp)
9862 {
9863 int err;
9864 struct shrlock shr;
9865 struct shr_locowner shr_loco;
9866
9867 ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9868
9869 if (sp->rs_closed || sp->rs_share_access == 0)
9870 return (0);
9871
9872 ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9873 ASSERT(sp->rs_finfo->rf_vp);
9874
9875 shr.s_access = shr.s_deny = 0;
9876 shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9877 shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9878 shr_loco.sl_pid = shr.s_pid;
9879 shr_loco.sl_id = shr.s_sysid;
9880 shr.s_owner = (caddr_t)&shr_loco;
9881 shr.s_own_len = sizeof (shr_loco);
9882
9883 err = VOP_SHRLOCK(sp->rs_finfo->rf_vp, F_UNSHARE, &shr, 0, CRED(),
9884 NULL);
9885 if (err != 0) {
9886 err = puterrno4(err);
9887 return (err);
9888 }
9889
9890 sp->rs_share_access = 0;
9891 sp->rs_share_deny = 0;
9892
9893 return (0);
9894
9895 }
9896
9897 static int
rdma_setup_read_data4(READ4args * args,READ4res * rok)9898 rdma_setup_read_data4(READ4args *args, READ4res *rok)
9899 {
9900 struct clist *wcl;
9901 count4 count = rok->data_len;
9902 int wlist_len;
9903
9904 wcl = args->wlist;
9905 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
9906 return (FALSE);
9907 }
9908 wcl = args->wlist;
9909 rok->wlist_len = wlist_len;
9910 rok->wlist = wcl;
9911 return (TRUE);
9912 }
9913
9914 /* tunable to disable server referrals */
9915 int rfs4_no_referrals = 0;
9916
9917 /*
9918 * Find an NFS record in reparse point data.
9919 * Returns 0 for success and <0 or an errno value on failure.
9920 */
9921 int
vn_find_nfs_record(vnode_t * vp,nvlist_t ** nvlp,char ** svcp,char ** datap)9922 vn_find_nfs_record(vnode_t *vp, nvlist_t **nvlp, char **svcp, char **datap)
9923 {
9924 int err;
9925 char *stype, *val;
9926 nvlist_t *nvl;
9927 nvpair_t *curr;
9928
9929 if ((nvl = reparse_init()) == NULL)
9930 return (-1);
9931
9932 if ((err = reparse_vnode_parse(vp, nvl)) != 0) {
9933 reparse_free(nvl);
9934 return (err);
9935 }
9936
9937 curr = NULL;
9938 while ((curr = nvlist_next_nvpair(nvl, curr)) != NULL) {
9939 if ((stype = nvpair_name(curr)) == NULL) {
9940 reparse_free(nvl);
9941 return (-2);
9942 }
9943 if (strncasecmp(stype, "NFS", 3) == 0)
9944 break;
9945 }
9946
9947 if ((curr == NULL) ||
9948 (nvpair_value_string(curr, &val))) {
9949 reparse_free(nvl);
9950 return (-3);
9951 }
9952 *nvlp = nvl;
9953 *svcp = stype;
9954 *datap = val;
9955 return (0);
9956 }
9957
9958 int
vn_is_nfs_reparse(vnode_t * vp,cred_t * cr)9959 vn_is_nfs_reparse(vnode_t *vp, cred_t *cr)
9960 {
9961 nvlist_t *nvl;
9962 char *s, *d;
9963
9964 if (rfs4_no_referrals != 0)
9965 return (B_FALSE);
9966
9967 if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9968 return (B_FALSE);
9969
9970 if (vn_find_nfs_record(vp, &nvl, &s, &d) != 0)
9971 return (B_FALSE);
9972
9973 reparse_free(nvl);
9974
9975 return (B_TRUE);
9976 }
9977
9978 /*
9979 * There is a user-level copy of this routine in ref_subr.c.
9980 * Changes should be kept in sync.
9981 */
9982 static int
nfs4_create_components(char * path,component4 * comp4)9983 nfs4_create_components(char *path, component4 *comp4)
9984 {
9985 int slen, plen, ncomp;
9986 char *ori_path, *nxtc, buf[MAXNAMELEN];
9987
9988 if (path == NULL)
9989 return (0);
9990
9991 plen = strlen(path) + 1; /* include the terminator */
9992 ori_path = path;
9993 ncomp = 0;
9994
9995 /* count number of components in the path */
9996 for (nxtc = path; nxtc < ori_path + plen; nxtc++) {
9997 if (*nxtc == '/' || *nxtc == '\0' || *nxtc == '\n') {
9998 if ((slen = nxtc - path) == 0) {
9999 path = nxtc + 1;
10000 continue;
10001 }
10002
10003 if (comp4 != NULL) {
10004 bcopy(path, buf, slen);
10005 buf[slen] = '\0';
10006 (void) str_to_utf8(buf, &comp4[ncomp]);
10007 }
10008
10009 ncomp++; /* 1 valid component */
10010 path = nxtc + 1;
10011 }
10012 if (*nxtc == '\0' || *nxtc == '\n')
10013 break;
10014 }
10015
10016 return (ncomp);
10017 }
10018
10019 /*
10020 * There is a user-level copy of this routine in ref_subr.c.
10021 * Changes should be kept in sync.
10022 */
10023 static int
make_pathname4(char * path,pathname4 * pathname)10024 make_pathname4(char *path, pathname4 *pathname)
10025 {
10026 int ncomp;
10027 component4 *comp4;
10028
10029 if (pathname == NULL)
10030 return (0);
10031
10032 if (path == NULL) {
10033 pathname->pathname4_val = NULL;
10034 pathname->pathname4_len = 0;
10035 return (0);
10036 }
10037
10038 /* count number of components to alloc buffer */
10039 if ((ncomp = nfs4_create_components(path, NULL)) == 0) {
10040 pathname->pathname4_val = NULL;
10041 pathname->pathname4_len = 0;
10042 return (0);
10043 }
10044 comp4 = kmem_zalloc(ncomp * sizeof (component4), KM_SLEEP);
10045
10046 /* copy components into allocated buffer */
10047 ncomp = nfs4_create_components(path, comp4);
10048
10049 pathname->pathname4_val = comp4;
10050 pathname->pathname4_len = ncomp;
10051
10052 return (ncomp);
10053 }
10054
10055 #define xdr_fs_locations4 xdr_fattr4_fs_locations
10056
10057 fs_locations4 *
fetch_referral(vnode_t * vp,cred_t * cr)10058 fetch_referral(vnode_t *vp, cred_t *cr)
10059 {
10060 nvlist_t *nvl;
10061 char *stype, *sdata;
10062 fs_locations4 *result;
10063 char buf[1024];
10064 size_t bufsize;
10065 XDR xdr;
10066 int err;
10067
10068 /*
10069 * Check attrs to ensure it's a reparse point
10070 */
10071 if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
10072 return (NULL);
10073
10074 /*
10075 * Look for an NFS record and get the type and data
10076 */
10077 if (vn_find_nfs_record(vp, &nvl, &stype, &sdata) != 0)
10078 return (NULL);
10079
10080 /*
10081 * With the type and data, upcall to get the referral
10082 */
10083 bufsize = sizeof (buf);
10084 bzero(buf, sizeof (buf));
10085 err = reparse_kderef((const char *)stype, (const char *)sdata,
10086 buf, &bufsize);
10087 reparse_free(nvl);
10088
10089 DTRACE_PROBE4(nfs4serv__func__referral__upcall,
10090 char *, stype, char *, sdata, char *, buf, int, err);
10091 if (err) {
10092 cmn_err(CE_NOTE,
10093 "reparsed daemon not running: unable to get referral (%d)",
10094 err);
10095 return (NULL);
10096 }
10097
10098 /*
10099 * We get an XDR'ed record back from the kderef call
10100 */
10101 xdrmem_create(&xdr, buf, bufsize, XDR_DECODE);
10102 result = kmem_alloc(sizeof (fs_locations4), KM_SLEEP);
10103 err = xdr_fs_locations4(&xdr, result);
10104 XDR_DESTROY(&xdr);
10105 if (err != TRUE) {
10106 DTRACE_PROBE1(nfs4serv__func__referral__upcall__xdrfail,
10107 int, err);
10108 return (NULL);
10109 }
10110
10111 /*
10112 * Look at path to recover fs_root, ignoring the leading '/'
10113 */
10114 (void) make_pathname4(vp->v_path, &result->fs_root);
10115
10116 return (result);
10117 }
10118
10119 char *
build_symlink(vnode_t * vp,cred_t * cr,size_t * strsz)10120 build_symlink(vnode_t *vp, cred_t *cr, size_t *strsz)
10121 {
10122 fs_locations4 *fsl;
10123 fs_location4 *fs;
10124 char *server, *path, *symbuf;
10125 static char *prefix = "/net/";
10126 int i, size, npaths;
10127 uint_t len;
10128
10129 /* Get the referral */
10130 if ((fsl = fetch_referral(vp, cr)) == NULL)
10131 return (NULL);
10132
10133 /* Deal with only the first location and first server */
10134 fs = &fsl->locations_val[0];
10135 server = utf8_to_str(&fs->server_val[0], &len, NULL);
10136 if (server == NULL) {
10137 rfs4_free_fs_locations4(fsl);
10138 kmem_free(fsl, sizeof (fs_locations4));
10139 return (NULL);
10140 }
10141
10142 /* Figure out size for "/net/" + host + /path/path/path + NULL */
10143 size = strlen(prefix) + len;
10144 for (i = 0; i < fs->rootpath.pathname4_len; i++)
10145 size += fs->rootpath.pathname4_val[i].utf8string_len + 1;
10146
10147 /* Allocate the symlink buffer and fill it */
10148 symbuf = kmem_zalloc(size, KM_SLEEP);
10149 (void) strcat(symbuf, prefix);
10150 (void) strcat(symbuf, server);
10151 kmem_free(server, len);
10152
10153 npaths = 0;
10154 for (i = 0; i < fs->rootpath.pathname4_len; i++) {
10155 path = utf8_to_str(&fs->rootpath.pathname4_val[i], &len, NULL);
10156 if (path == NULL)
10157 continue;
10158 (void) strcat(symbuf, "/");
10159 (void) strcat(symbuf, path);
10160 npaths++;
10161 kmem_free(path, len);
10162 }
10163
10164 rfs4_free_fs_locations4(fsl);
10165 kmem_free(fsl, sizeof (fs_locations4));
10166
10167 if (strsz != NULL)
10168 *strsz = size;
10169 return (symbuf);
10170 }
10171
10172 /*
10173 * Check to see if we have a downrev Solaris client, so that we
10174 * can send it a symlink instead of a referral.
10175 */
10176 int
client_is_downrev(struct svc_req * req)10177 client_is_downrev(struct svc_req *req)
10178 {
10179 struct sockaddr *ca;
10180 rfs4_clntip_t *ci;
10181 bool_t create = FALSE;
10182 int is_downrev;
10183
10184 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
10185 ASSERT(ca);
10186 ci = rfs4_find_clntip(ca, &create);
10187 if (ci == NULL)
10188 return (0);
10189 is_downrev = ci->ri_no_referrals;
10190 rfs4_dbe_rele(ci->ri_dbe);
10191 return (is_downrev);
10192 }
10193
10194 /*
10195 * Do the main work of handling HA-NFSv4 Resource Group failover on
10196 * Sun Cluster.
10197 * We need to detect whether any RG admin paths have been added or removed,
10198 * and adjust resources accordingly.
10199 * Currently we're using a very inefficient algorithm, ~ 2 * O(n**2). In
10200 * order to scale, the list and array of paths need to be held in more
10201 * suitable data structures.
10202 */
10203 static void
hanfsv4_failover(nfs4_srv_t * nsrv4)10204 hanfsv4_failover(nfs4_srv_t *nsrv4)
10205 {
10206 int i, start_grace, numadded_paths = 0;
10207 char **added_paths = NULL;
10208 rfs4_dss_path_t *dss_path;
10209
10210 /*
10211 * Note: currently, dss_pathlist cannot be NULL, since
10212 * it will always include an entry for NFS4_DSS_VAR_DIR. If we
10213 * make the latter dynamically specified too, the following will
10214 * need to be adjusted.
10215 */
10216
10217 /*
10218 * First, look for removed paths: RGs that have been failed-over
10219 * away from this node.
10220 * Walk the "currently-serving" dss_pathlist and, for each
10221 * path, check if it is on the "passed-in" rfs4_dss_newpaths array
10222 * from nfsd. If not, that RG path has been removed.
10223 *
10224 * Note that nfsd has sorted rfs4_dss_newpaths for us, and removed
10225 * any duplicates.
10226 */
10227 dss_path = nsrv4->dss_pathlist;
10228 do {
10229 int found = 0;
10230 char *path = dss_path->path;
10231
10232 /* used only for non-HA so may not be removed */
10233 if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
10234 dss_path = dss_path->next;
10235 continue;
10236 }
10237
10238 for (i = 0; i < rfs4_dss_numnewpaths; i++) {
10239 int cmpret;
10240 char *newpath = rfs4_dss_newpaths[i];
10241
10242 /*
10243 * Since nfsd has sorted rfs4_dss_newpaths for us,
10244 * once the return from strcmp is negative we know
10245 * we've passed the point where "path" should be,
10246 * and can stop searching: "path" has been removed.
10247 */
10248 cmpret = strcmp(path, newpath);
10249 if (cmpret < 0)
10250 break;
10251 if (cmpret == 0) {
10252 found = 1;
10253 break;
10254 }
10255 }
10256
10257 if (found == 0) {
10258 unsigned index = dss_path->index;
10259 rfs4_servinst_t *sip = dss_path->sip;
10260 rfs4_dss_path_t *path_next = dss_path->next;
10261
10262 /*
10263 * This path has been removed.
10264 * We must clear out the servinst reference to
10265 * it, since it's now owned by another
10266 * node: we should not attempt to touch it.
10267 */
10268 ASSERT(dss_path == sip->dss_paths[index]);
10269 sip->dss_paths[index] = NULL;
10270
10271 /* remove from "currently-serving" list, and destroy */
10272 remque(dss_path);
10273 /* allow for NUL */
10274 kmem_free(dss_path->path, strlen(dss_path->path) + 1);
10275 kmem_free(dss_path, sizeof (rfs4_dss_path_t));
10276
10277 dss_path = path_next;
10278 } else {
10279 /* path was found; not removed */
10280 dss_path = dss_path->next;
10281 }
10282 } while (dss_path != nsrv4->dss_pathlist);
10283
10284 /*
10285 * Now, look for added paths: RGs that have been failed-over
10286 * to this node.
10287 * Walk the "passed-in" rfs4_dss_newpaths array from nfsd and,
10288 * for each path, check if it is on the "currently-serving"
10289 * dss_pathlist. If not, that RG path has been added.
10290 *
10291 * Note: we don't do duplicate detection here; nfsd does that for us.
10292 *
10293 * Note: numadded_paths <= rfs4_dss_numnewpaths, which gives us
10294 * an upper bound for the size needed for added_paths[numadded_paths].
10295 */
10296
10297 /* probably more space than we need, but guaranteed to be enough */
10298 if (rfs4_dss_numnewpaths > 0) {
10299 size_t sz = rfs4_dss_numnewpaths * sizeof (char *);
10300 added_paths = kmem_zalloc(sz, KM_SLEEP);
10301 }
10302
10303 /* walk the "passed-in" rfs4_dss_newpaths array from nfsd */
10304 for (i = 0; i < rfs4_dss_numnewpaths; i++) {
10305 int found = 0;
10306 char *newpath = rfs4_dss_newpaths[i];
10307
10308 dss_path = nsrv4->dss_pathlist;
10309 do {
10310 char *path = dss_path->path;
10311
10312 /* used only for non-HA */
10313 if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
10314 dss_path = dss_path->next;
10315 continue;
10316 }
10317
10318 if (strncmp(path, newpath, strlen(path)) == 0) {
10319 found = 1;
10320 break;
10321 }
10322
10323 dss_path = dss_path->next;
10324 } while (dss_path != nsrv4->dss_pathlist);
10325
10326 if (found == 0) {
10327 added_paths[numadded_paths] = newpath;
10328 numadded_paths++;
10329 }
10330 }
10331
10332 /* did we find any added paths? */
10333 if (numadded_paths > 0) {
10334
10335 /* create a new server instance, and start its grace period */
10336 start_grace = 1;
10337 /* CSTYLED */
10338 rfs4_servinst_create(nsrv4, start_grace, numadded_paths, added_paths);
10339
10340 /* read in the stable storage state from these paths */
10341 rfs4_dss_readstate(nsrv4, numadded_paths, added_paths);
10342
10343 /*
10344 * Multiple failovers during a grace period will cause
10345 * clients of the same resource group to be partitioned
10346 * into different server instances, with different
10347 * grace periods. Since clients of the same resource
10348 * group must be subject to the same grace period,
10349 * we need to reset all currently active grace periods.
10350 */
10351 rfs4_grace_reset_all(nsrv4);
10352 }
10353
10354 if (rfs4_dss_numnewpaths > 0)
10355 kmem_free(added_paths, rfs4_dss_numnewpaths * sizeof (char *));
10356 }
10357