1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or https://opensource.org/licenses/CDDL-1.0.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2011, Lawrence Livermore National Security, LLC.
23  * Copyright (c) 2023, Datto Inc. All rights reserved.
24  */
25 
26 
27 #include <sys/zfs_znode.h>
28 #include <sys/zfs_vfsops.h>
29 #include <sys/zfs_vnops.h>
30 #include <sys/zfs_ctldir.h>
31 #include <sys/zpl.h>
32 
33 
34 static struct inode *
35 zpl_inode_alloc(struct super_block *sb)
36 {
37 	struct inode *ip;
38 
39 	VERIFY3S(zfs_inode_alloc(sb, &ip), ==, 0);
40 	inode_set_iversion(ip, 1);
41 
42 	return (ip);
43 }
44 
45 static void
46 zpl_inode_destroy(struct inode *ip)
47 {
48 	ASSERT(atomic_read(&ip->i_count) == 0);
49 	zfs_inode_destroy(ip);
50 }
51 
52 /*
53  * Called from __mark_inode_dirty() to reflect that something in the
54  * inode has changed.  We use it to ensure the znode system attributes
55  * are always strictly update to date with respect to the inode.
56  */
57 #ifdef HAVE_DIRTY_INODE_WITH_FLAGS
58 static void
59 zpl_dirty_inode(struct inode *ip, int flags)
60 {
61 	fstrans_cookie_t cookie;
62 
63 	cookie = spl_fstrans_mark();
64 	zfs_dirty_inode(ip, flags);
65 	spl_fstrans_unmark(cookie);
66 }
67 #else
68 static void
69 zpl_dirty_inode(struct inode *ip)
70 {
71 	fstrans_cookie_t cookie;
72 
73 	cookie = spl_fstrans_mark();
74 	zfs_dirty_inode(ip, 0);
75 	spl_fstrans_unmark(cookie);
76 }
77 #endif /* HAVE_DIRTY_INODE_WITH_FLAGS */
78 
79 /*
80  * When ->drop_inode() is called its return value indicates if the
81  * inode should be evicted from the inode cache.  If the inode is
82  * unhashed and has no links the default policy is to evict it
83  * immediately.
84  *
85  * The ->evict_inode() callback must minimally truncate the inode pages,
86  * and call clear_inode().  For 2.6.35 and later kernels this will
87  * simply update the inode state, with the sync occurring before the
88  * truncate in evict().  For earlier kernels clear_inode() maps to
89  * end_writeback() which is responsible for completing all outstanding
90  * write back.  In either case, once this is done it is safe to cleanup
91  * any remaining inode specific data via zfs_inactive().
92  * remaining filesystem specific data.
93  */
94 static void
95 zpl_evict_inode(struct inode *ip)
96 {
97 	fstrans_cookie_t cookie;
98 
99 	cookie = spl_fstrans_mark();
100 	truncate_setsize(ip, 0);
101 	clear_inode(ip);
102 	zfs_inactive(ip);
103 	spl_fstrans_unmark(cookie);
104 }
105 
106 static void
107 zpl_put_super(struct super_block *sb)
108 {
109 	fstrans_cookie_t cookie;
110 	int error;
111 
112 	cookie = spl_fstrans_mark();
113 	error = -zfs_umount(sb);
114 	spl_fstrans_unmark(cookie);
115 	ASSERT3S(error, <=, 0);
116 }
117 
118 static int
119 zpl_sync_fs(struct super_block *sb, int wait)
120 {
121 	fstrans_cookie_t cookie;
122 	cred_t *cr = CRED();
123 	int error;
124 
125 	crhold(cr);
126 	cookie = spl_fstrans_mark();
127 	error = -zfs_sync(sb, wait, cr);
128 	spl_fstrans_unmark(cookie);
129 	crfree(cr);
130 	ASSERT3S(error, <=, 0);
131 
132 	return (error);
133 }
134 
135 static int
136 zpl_statfs(struct dentry *dentry, struct kstatfs *statp)
137 {
138 	fstrans_cookie_t cookie;
139 	int error;
140 
141 	cookie = spl_fstrans_mark();
142 	error = -zfs_statvfs(dentry->d_inode, statp);
143 	spl_fstrans_unmark(cookie);
144 	ASSERT3S(error, <=, 0);
145 
146 	/*
147 	 * If required by a 32-bit system call, dynamically scale the
148 	 * block size up to 16MiB and decrease the block counts.  This
149 	 * allows for a maximum size of 64EiB to be reported.  The file
150 	 * counts must be artificially capped at 2^32-1.
151 	 */
152 	if (unlikely(zpl_is_32bit_api())) {
153 		while (statp->f_blocks > UINT32_MAX &&
154 		    statp->f_bsize < SPA_MAXBLOCKSIZE) {
155 			statp->f_frsize <<= 1;
156 			statp->f_bsize <<= 1;
157 
158 			statp->f_blocks >>= 1;
159 			statp->f_bfree >>= 1;
160 			statp->f_bavail >>= 1;
161 		}
162 
163 		uint64_t usedobjs = statp->f_files - statp->f_ffree;
164 		statp->f_ffree = MIN(statp->f_ffree, UINT32_MAX - usedobjs);
165 		statp->f_files = statp->f_ffree + usedobjs;
166 	}
167 
168 	return (error);
169 }
170 
171 static int
172 zpl_remount_fs(struct super_block *sb, int *flags, char *data)
173 {
174 	zfs_mnt_t zm = { .mnt_osname = NULL, .mnt_data = data };
175 	fstrans_cookie_t cookie;
176 	int error;
177 
178 	cookie = spl_fstrans_mark();
179 	error = -zfs_remount(sb, flags, &zm);
180 	spl_fstrans_unmark(cookie);
181 	ASSERT3S(error, <=, 0);
182 
183 	return (error);
184 }
185 
186 static int
187 __zpl_show_devname(struct seq_file *seq, zfsvfs_t *zfsvfs)
188 {
189 	int error;
190 	if ((error = zpl_enter(zfsvfs, FTAG)) != 0)
191 		return (error);
192 
193 	char *fsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
194 	dmu_objset_name(zfsvfs->z_os, fsname);
195 
196 	for (int i = 0; fsname[i] != 0; i++) {
197 		/*
198 		 * Spaces in the dataset name must be converted to their
199 		 * octal escape sequence for getmntent(3) to correctly
200 		 * parse then fsname portion of /proc/self/mounts.
201 		 */
202 		if (fsname[i] == ' ') {
203 			seq_puts(seq, "\\040");
204 		} else {
205 			seq_putc(seq, fsname[i]);
206 		}
207 	}
208 
209 	kmem_free(fsname, ZFS_MAX_DATASET_NAME_LEN);
210 
211 	zpl_exit(zfsvfs, FTAG);
212 
213 	return (0);
214 }
215 
216 static int
217 zpl_show_devname(struct seq_file *seq, struct dentry *root)
218 {
219 	return (__zpl_show_devname(seq, root->d_sb->s_fs_info));
220 }
221 
222 static int
223 __zpl_show_options(struct seq_file *seq, zfsvfs_t *zfsvfs)
224 {
225 	seq_printf(seq, ",%s",
226 	    zfsvfs->z_flags & ZSB_XATTR ? "xattr" : "noxattr");
227 
228 #ifdef CONFIG_FS_POSIX_ACL
229 	switch (zfsvfs->z_acl_type) {
230 	case ZFS_ACLTYPE_POSIX:
231 		seq_puts(seq, ",posixacl");
232 		break;
233 	default:
234 		seq_puts(seq, ",noacl");
235 		break;
236 	}
237 #endif /* CONFIG_FS_POSIX_ACL */
238 
239 	switch (zfsvfs->z_case) {
240 	case ZFS_CASE_SENSITIVE:
241 		seq_puts(seq, ",casesensitive");
242 		break;
243 	case ZFS_CASE_INSENSITIVE:
244 		seq_puts(seq, ",caseinsensitive");
245 		break;
246 	default:
247 		seq_puts(seq, ",casemixed");
248 		break;
249 	}
250 
251 	return (0);
252 }
253 
254 static int
255 zpl_show_options(struct seq_file *seq, struct dentry *root)
256 {
257 	return (__zpl_show_options(seq, root->d_sb->s_fs_info));
258 }
259 
260 static int
261 zpl_fill_super(struct super_block *sb, void *data, int silent)
262 {
263 	zfs_mnt_t *zm = (zfs_mnt_t *)data;
264 	fstrans_cookie_t cookie;
265 	int error;
266 
267 	cookie = spl_fstrans_mark();
268 	error = -zfs_domount(sb, zm, silent);
269 	spl_fstrans_unmark(cookie);
270 	ASSERT3S(error, <=, 0);
271 
272 	return (error);
273 }
274 
275 static int
276 zpl_test_super(struct super_block *s, void *data)
277 {
278 	zfsvfs_t *zfsvfs = s->s_fs_info;
279 	objset_t *os = data;
280 	int match;
281 
282 	/*
283 	 * If the os doesn't match the z_os in the super_block, assume it is
284 	 * not a match. Matching would imply a multimount of a dataset. It is
285 	 * possible that during a multimount, there is a simultaneous operation
286 	 * that changes the z_os, e.g., rollback, where the match will be
287 	 * missed, but in that case the user will get an EBUSY.
288 	 */
289 	if (zfsvfs == NULL || os != zfsvfs->z_os)
290 		return (0);
291 
292 	/*
293 	 * If they do match, recheck with the lock held to prevent mounting the
294 	 * wrong dataset since z_os can be stale when the teardown lock is held.
295 	 */
296 	if (zpl_enter(zfsvfs, FTAG) != 0)
297 		return (0);
298 	match = (os == zfsvfs->z_os);
299 	zpl_exit(zfsvfs, FTAG);
300 
301 	return (match);
302 }
303 
304 static struct super_block *
305 zpl_mount_impl(struct file_system_type *fs_type, int flags, zfs_mnt_t *zm)
306 {
307 	struct super_block *s;
308 	objset_t *os;
309 	int err;
310 
311 	err = dmu_objset_hold(zm->mnt_osname, FTAG, &os);
312 	if (err)
313 		return (ERR_PTR(-err));
314 
315 	/*
316 	 * The dsl pool lock must be released prior to calling sget().
317 	 * It is possible sget() may block on the lock in grab_super()
318 	 * while deactivate_super() holds that same lock and waits for
319 	 * a txg sync.  If the dsl_pool lock is held over sget()
320 	 * this can prevent the pool sync and cause a deadlock.
321 	 */
322 	dsl_dataset_long_hold(dmu_objset_ds(os), FTAG);
323 	dsl_pool_rele(dmu_objset_pool(os), FTAG);
324 
325 	s = sget(fs_type, zpl_test_super, set_anon_super, flags, os);
326 
327 	dsl_dataset_long_rele(dmu_objset_ds(os), FTAG);
328 	dsl_dataset_rele(dmu_objset_ds(os), FTAG);
329 
330 	if (IS_ERR(s))
331 		return (ERR_CAST(s));
332 
333 	if (s->s_root == NULL) {
334 		err = zpl_fill_super(s, zm, flags & SB_SILENT ? 1 : 0);
335 		if (err) {
336 			deactivate_locked_super(s);
337 			return (ERR_PTR(err));
338 		}
339 		s->s_flags |= SB_ACTIVE;
340 	} else if ((flags ^ s->s_flags) & SB_RDONLY) {
341 		deactivate_locked_super(s);
342 		return (ERR_PTR(-EBUSY));
343 	}
344 
345 	return (s);
346 }
347 
348 static struct dentry *
349 zpl_mount(struct file_system_type *fs_type, int flags,
350     const char *osname, void *data)
351 {
352 	zfs_mnt_t zm = { .mnt_osname = osname, .mnt_data = data };
353 
354 	struct super_block *sb = zpl_mount_impl(fs_type, flags, &zm);
355 	if (IS_ERR(sb))
356 		return (ERR_CAST(sb));
357 
358 	return (dget(sb->s_root));
359 }
360 
361 static void
362 zpl_kill_sb(struct super_block *sb)
363 {
364 	zfs_preumount(sb);
365 	kill_anon_super(sb);
366 }
367 
368 void
369 zpl_prune_sb(int64_t nr_to_scan, void *arg)
370 {
371 	struct super_block *sb = (struct super_block *)arg;
372 	int objects = 0;
373 
374 	(void) -zfs_prune(sb, nr_to_scan, &objects);
375 }
376 
377 const struct super_operations zpl_super_operations = {
378 	.alloc_inode		= zpl_inode_alloc,
379 	.destroy_inode		= zpl_inode_destroy,
380 	.dirty_inode		= zpl_dirty_inode,
381 	.write_inode		= NULL,
382 	.evict_inode		= zpl_evict_inode,
383 	.put_super		= zpl_put_super,
384 	.sync_fs		= zpl_sync_fs,
385 	.statfs			= zpl_statfs,
386 	.remount_fs		= zpl_remount_fs,
387 	.show_devname		= zpl_show_devname,
388 	.show_options		= zpl_show_options,
389 	.show_stats		= NULL,
390 };
391 
392 struct file_system_type zpl_fs_type = {
393 	.owner			= THIS_MODULE,
394 	.name			= ZFS_DRIVER,
395 #if defined(HAVE_IDMAP_MNT_API)
396 	.fs_flags		= FS_USERNS_MOUNT | FS_ALLOW_IDMAP,
397 #else
398 	.fs_flags		= FS_USERNS_MOUNT,
399 #endif
400 	.mount			= zpl_mount,
401 	.kill_sb		= zpl_kill_sb,
402 };
403