1 /*
2 * Copyright (c) 2021 Klara Systems, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27 #include <sys/types.h>
28 #include <sys/sysmacros.h>
29 #include <sys/kmem.h>
30 #include <linux/file.h>
31 #include <linux/magic.h>
32 #include <sys/zone.h>
33 #include <sys/string.h>
34
35 #if defined(CONFIG_USER_NS)
36 #include <linux/statfs.h>
37 #include <linux/proc_ns.h>
38 #endif
39
40 #include <sys/mutex.h>
41
42 static kmutex_t zone_datasets_lock;
43 static struct list_head zone_datasets;
44
45 typedef struct zone_datasets {
46 struct list_head zds_list; /* zone_datasets linkage */
47 struct user_namespace *zds_userns; /* namespace reference */
48 struct list_head zds_datasets; /* datasets for the namespace */
49 } zone_datasets_t;
50
51 typedef struct zone_dataset {
52 struct list_head zd_list; /* zone_dataset linkage */
53 size_t zd_dsnamelen; /* length of name */
54 char zd_dsname[]; /* name of the member dataset */
55 } zone_dataset_t;
56
57 #if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM)
58 /*
59 * Returns:
60 * - 0 on success
61 * - EBADF if it cannot open the provided file descriptor
62 * - ENOTTY if the file itself is a not a user namespace file. We want to
63 * intercept this error in the ZFS layer. We cannot just return one of the
64 * ZFS_ERR_* errors here as we want to preserve the seperation of the ZFS
65 * and the SPL layers.
66 */
67 static int
user_ns_get(int fd,struct user_namespace ** userns)68 user_ns_get(int fd, struct user_namespace **userns)
69 {
70 struct kstatfs st;
71 struct file *nsfile;
72 struct ns_common *ns;
73 int error;
74
75 if ((nsfile = fget(fd)) == NULL)
76 return (EBADF);
77 if (vfs_statfs(&nsfile->f_path, &st) != 0) {
78 error = ENOTTY;
79 goto done;
80 }
81 if (st.f_type != NSFS_MAGIC) {
82 error = ENOTTY;
83 goto done;
84 }
85 ns = get_proc_ns(file_inode(nsfile));
86 if (ns->ops->type != CLONE_NEWUSER) {
87 error = ENOTTY;
88 goto done;
89 }
90 *userns = container_of(ns, struct user_namespace, ns);
91
92 error = 0;
93 done:
94 fput(nsfile);
95
96 return (error);
97 }
98 #endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */
99
100 static unsigned int
user_ns_zoneid(struct user_namespace * user_ns)101 user_ns_zoneid(struct user_namespace *user_ns)
102 {
103 unsigned int r;
104
105 #if defined(HAVE_USER_NS_COMMON_INUM)
106 r = user_ns->ns.inum;
107 #else
108 r = user_ns->proc_inum;
109 #endif
110
111 return (r);
112 }
113
114 static struct zone_datasets *
zone_datasets_lookup(unsigned int nsinum)115 zone_datasets_lookup(unsigned int nsinum)
116 {
117 zone_datasets_t *zds;
118
119 list_for_each_entry(zds, &zone_datasets, zds_list) {
120 if (user_ns_zoneid(zds->zds_userns) == nsinum)
121 return (zds);
122 }
123 return (NULL);
124 }
125
126 #if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM)
127 static struct zone_dataset *
zone_dataset_lookup(zone_datasets_t * zds,const char * dataset,size_t dsnamelen)128 zone_dataset_lookup(zone_datasets_t *zds, const char *dataset, size_t dsnamelen)
129 {
130 zone_dataset_t *zd;
131
132 list_for_each_entry(zd, &zds->zds_datasets, zd_list) {
133 if (zd->zd_dsnamelen != dsnamelen)
134 continue;
135 if (strncmp(zd->zd_dsname, dataset, dsnamelen) == 0)
136 return (zd);
137 }
138
139 return (NULL);
140 }
141
142 static int
zone_dataset_cred_check(cred_t * cred)143 zone_dataset_cred_check(cred_t *cred)
144 {
145
146 if (!uid_eq(cred->uid, GLOBAL_ROOT_UID))
147 return (EPERM);
148
149 return (0);
150 }
151 #endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */
152
153 static int
zone_dataset_name_check(const char * dataset,size_t * dsnamelen)154 zone_dataset_name_check(const char *dataset, size_t *dsnamelen)
155 {
156
157 if (dataset[0] == '\0' || dataset[0] == '/')
158 return (ENOENT);
159
160 *dsnamelen = strlen(dataset);
161 /* Ignore trailing slash, if supplied. */
162 if (dataset[*dsnamelen - 1] == '/')
163 (*dsnamelen)--;
164
165 return (0);
166 }
167
168 int
zone_dataset_attach(cred_t * cred,const char * dataset,int userns_fd)169 zone_dataset_attach(cred_t *cred, const char *dataset, int userns_fd)
170 {
171 #if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM)
172 struct user_namespace *userns;
173 zone_datasets_t *zds;
174 zone_dataset_t *zd;
175 int error;
176 size_t dsnamelen;
177
178 if ((error = zone_dataset_cred_check(cred)) != 0)
179 return (error);
180 if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0)
181 return (error);
182 if ((error = user_ns_get(userns_fd, &userns)) != 0)
183 return (error);
184
185 mutex_enter(&zone_datasets_lock);
186 zds = zone_datasets_lookup(user_ns_zoneid(userns));
187 if (zds == NULL) {
188 zds = kmem_alloc(sizeof (zone_datasets_t), KM_SLEEP);
189 INIT_LIST_HEAD(&zds->zds_list);
190 INIT_LIST_HEAD(&zds->zds_datasets);
191 zds->zds_userns = userns;
192 /*
193 * Lock the namespace by incresing its refcount to prevent
194 * the namespace ID from being reused.
195 */
196 get_user_ns(userns);
197 list_add_tail(&zds->zds_list, &zone_datasets);
198 } else {
199 zd = zone_dataset_lookup(zds, dataset, dsnamelen);
200 if (zd != NULL) {
201 mutex_exit(&zone_datasets_lock);
202 return (EEXIST);
203 }
204 }
205
206 zd = kmem_alloc(sizeof (zone_dataset_t) + dsnamelen + 1, KM_SLEEP);
207 zd->zd_dsnamelen = dsnamelen;
208 strlcpy(zd->zd_dsname, dataset, dsnamelen + 1);
209 INIT_LIST_HEAD(&zd->zd_list);
210 list_add_tail(&zd->zd_list, &zds->zds_datasets);
211
212 mutex_exit(&zone_datasets_lock);
213 return (0);
214 #else
215 return (ENXIO);
216 #endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */
217 }
218 EXPORT_SYMBOL(zone_dataset_attach);
219
220 int
zone_dataset_detach(cred_t * cred,const char * dataset,int userns_fd)221 zone_dataset_detach(cred_t *cred, const char *dataset, int userns_fd)
222 {
223 #if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM)
224 struct user_namespace *userns;
225 zone_datasets_t *zds;
226 zone_dataset_t *zd;
227 int error;
228 size_t dsnamelen;
229
230 if ((error = zone_dataset_cred_check(cred)) != 0)
231 return (error);
232 if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0)
233 return (error);
234 if ((error = user_ns_get(userns_fd, &userns)) != 0)
235 return (error);
236
237 mutex_enter(&zone_datasets_lock);
238 zds = zone_datasets_lookup(user_ns_zoneid(userns));
239 if (zds != NULL)
240 zd = zone_dataset_lookup(zds, dataset, dsnamelen);
241 if (zds == NULL || zd == NULL) {
242 mutex_exit(&zone_datasets_lock);
243 return (ENOENT);
244 }
245
246 list_del(&zd->zd_list);
247 kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1);
248
249 /* Prune the namespace entry if it has no more delegations. */
250 if (list_empty(&zds->zds_datasets)) {
251 /*
252 * Decrease the refcount now that the namespace is no longer
253 * used. It is no longer necessary to prevent the namespace ID
254 * from being reused.
255 */
256 put_user_ns(userns);
257 list_del(&zds->zds_list);
258 kmem_free(zds, sizeof (*zds));
259 }
260
261 mutex_exit(&zone_datasets_lock);
262 return (0);
263 #else
264 return (ENXIO);
265 #endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */
266 }
267 EXPORT_SYMBOL(zone_dataset_detach);
268
269 /*
270 * A dataset is visible if:
271 * - It is a parent of a namespace entry.
272 * - It is one of the namespace entries.
273 * - It is a child of a namespace entry.
274 *
275 * A dataset is writable if:
276 * - It is one of the namespace entries.
277 * - It is a child of a namespace entry.
278 *
279 * The parent datasets of namespace entries are visible and
280 * read-only to provide a path back to the root of the pool.
281 */
282 int
zone_dataset_visible(const char * dataset,int * write)283 zone_dataset_visible(const char *dataset, int *write)
284 {
285 zone_datasets_t *zds;
286 zone_dataset_t *zd;
287 size_t dsnamelen, zd_len;
288 int visible;
289
290 /* Default to read-only, in case visible is returned. */
291 if (write != NULL)
292 *write = 0;
293 if (zone_dataset_name_check(dataset, &dsnamelen) != 0)
294 return (0);
295 if (INGLOBALZONE(curproc)) {
296 if (write != NULL)
297 *write = 1;
298 return (1);
299 }
300
301 mutex_enter(&zone_datasets_lock);
302 zds = zone_datasets_lookup(crgetzoneid(curproc->cred));
303 if (zds == NULL) {
304 mutex_exit(&zone_datasets_lock);
305 return (0);
306 }
307
308 visible = 0;
309 list_for_each_entry(zd, &zds->zds_datasets, zd_list) {
310 zd_len = strlen(zd->zd_dsname);
311 if (zd_len > dsnamelen) {
312 /*
313 * The name of the namespace entry is longer than that
314 * of the dataset, so it could be that the dataset is a
315 * parent of the namespace entry.
316 */
317 visible = memcmp(zd->zd_dsname, dataset,
318 dsnamelen) == 0 &&
319 zd->zd_dsname[dsnamelen] == '/';
320 if (visible)
321 break;
322 } else if (zd_len == dsnamelen) {
323 /*
324 * The name of the namespace entry is as long as that
325 * of the dataset, so perhaps the dataset itself is the
326 * namespace entry.
327 */
328 visible = memcmp(zd->zd_dsname, dataset, zd_len) == 0;
329 if (visible) {
330 if (write != NULL)
331 *write = 1;
332 break;
333 }
334 } else {
335 /*
336 * The name of the namespace entry is shorter than that
337 * of the dataset, so perhaps the dataset is a child of
338 * the namespace entry.
339 */
340 visible = memcmp(zd->zd_dsname, dataset,
341 zd_len) == 0 && dataset[zd_len] == '/';
342 if (visible) {
343 if (write != NULL)
344 *write = 1;
345 break;
346 }
347 }
348 }
349
350 mutex_exit(&zone_datasets_lock);
351 return (visible);
352 }
353 EXPORT_SYMBOL(zone_dataset_visible);
354
355 unsigned int
global_zoneid(void)356 global_zoneid(void)
357 {
358 unsigned int z = 0;
359
360 #if defined(CONFIG_USER_NS)
361 z = user_ns_zoneid(&init_user_ns);
362 #endif
363
364 return (z);
365 }
366 EXPORT_SYMBOL(global_zoneid);
367
368 unsigned int
crgetzoneid(const cred_t * cr)369 crgetzoneid(const cred_t *cr)
370 {
371 unsigned int r = 0;
372
373 #if defined(CONFIG_USER_NS)
374 r = user_ns_zoneid(cr->user_ns);
375 #endif
376
377 return (r);
378 }
379 EXPORT_SYMBOL(crgetzoneid);
380
381 boolean_t
inglobalzone(proc_t * proc)382 inglobalzone(proc_t *proc)
383 {
384 #if defined(CONFIG_USER_NS)
385 return (proc->cred->user_ns == &init_user_ns);
386 #else
387 return (B_TRUE);
388 #endif
389 }
390 EXPORT_SYMBOL(inglobalzone);
391
392 int
spl_zone_init(void)393 spl_zone_init(void)
394 {
395 mutex_init(&zone_datasets_lock, NULL, MUTEX_DEFAULT, NULL);
396 INIT_LIST_HEAD(&zone_datasets);
397 return (0);
398 }
399
400 void
spl_zone_fini(void)401 spl_zone_fini(void)
402 {
403 zone_datasets_t *zds;
404 zone_dataset_t *zd;
405
406 /*
407 * It would be better to assert an empty zone_datasets, but since
408 * there's no automatic mechanism for cleaning them up if the user
409 * namespace is destroyed, just do it here, since spl is about to go
410 * out of context.
411 */
412 while (!list_empty(&zone_datasets)) {
413 zds = list_entry(zone_datasets.next, zone_datasets_t, zds_list);
414 while (!list_empty(&zds->zds_datasets)) {
415 zd = list_entry(zds->zds_datasets.next,
416 zone_dataset_t, zd_list);
417 list_del(&zd->zd_list);
418 kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1);
419 }
420 put_user_ns(zds->zds_userns);
421 list_del(&zds->zds_list);
422 kmem_free(zds, sizeof (*zds));
423 }
424 mutex_destroy(&zone_datasets_lock);
425 }
426