xref: /dragonfly/sys/kern/vfs_quota.c (revision 8accc937)
1 /*
2  * Copyright (c) 2011,2012 François Tigeot <ftigeot@wolpond.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in
13  *    the documentation and/or other materials provided with the
14  *    distribution.
15  * 3. Neither the name of The DragonFly Project nor the names of its
16  *    contributors may be used to endorse or promote products derived
17  *    from this software without specific, prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
23  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
25  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 #include <sys/sysctl.h>
34 #include <sys/mount.h>
35 #include <sys/systm.h>
36 #include <sys/nlookup.h>
37 #include <sys/vnode.h>
38 #include <sys/stat.h>
39 #include <sys/vfs_quota.h>
40 #include <sys/spinlock.h>
41 #include <sys/spinlock2.h>
42 #include <inttypes.h>
43 
44 #include <sys/sysproto.h>
45 #include <libprop/proplib.h>
46 #include <libprop/prop_dictionary.h>
47 
48 /* in-memory accounting, red-black tree based */
49 /* FIXME: code duplication caused by uid_t / gid_t differences */
50 RB_PROTOTYPE(ac_utree, ac_unode, rb_entry, rb_ac_unode_cmp);
51 RB_PROTOTYPE(ac_gtree, ac_gnode, rb_entry, rb_ac_gnode_cmp);
52 
53 static int
54 rb_ac_unode_cmp(struct ac_unode *a, struct ac_unode *b);
55 static int
56 rb_ac_gnode_cmp(struct ac_gnode *a, struct ac_gnode *b);
57 
58 RB_GENERATE(ac_utree, ac_unode, rb_entry, rb_ac_unode_cmp);
59 RB_GENERATE(ac_gtree, ac_gnode, rb_entry, rb_ac_gnode_cmp);
60 
61 struct ac_unode* unode_insert(struct mount*, uid_t);
62 struct ac_gnode* gnode_insert(struct mount*, gid_t);
63 
64 static int
65 rb_ac_unode_cmp(struct ac_unode *a, struct ac_unode *b)
66 {
67 	if (a->left_bits < b->left_bits)
68 		return(-1);
69 	else if (a->left_bits > b->left_bits)
70 		return(1);
71 	return(0);
72 }
73 
74 static int
75 rb_ac_gnode_cmp(struct ac_gnode *a, struct ac_gnode *b)
76 {
77 	if (a->left_bits < b->left_bits)
78 		return(-1);
79 	else if (a->left_bits > b->left_bits)
80 		return(1);
81 	return(0);
82 }
83 
84 struct ac_unode*
85 unode_insert(struct mount *mp, uid_t uid)
86 {
87 	struct ac_unode *unp, *res;
88 
89 	unp = kmalloc(sizeof(struct ac_unode), M_MOUNT, M_ZERO | M_WAITOK);
90 
91 	unp->left_bits = (uid >> ACCT_CHUNK_BITS);
92 	res = RB_INSERT(ac_utree, &mp->mnt_acct.ac_uroot, unp);
93 	KASSERT(res == NULL, ("unode_insert(): RB_INSERT didn't return NULL"));
94 
95 	return unp;
96 }
97 
98 struct ac_gnode*
99 gnode_insert(struct mount *mp, gid_t gid)
100 {
101 	struct ac_gnode *gnp, *res;
102 
103 	gnp = kmalloc(sizeof(struct ac_gnode), M_MOUNT, M_ZERO | M_WAITOK);
104 
105 	gnp->left_bits = (gid >> ACCT_CHUNK_BITS);
106 	res = RB_INSERT(ac_gtree, &mp->mnt_acct.ac_groot, gnp);
107 	KASSERT(res == NULL, ("gnode_insert(): RB_INSERT didn't return NULL"));
108 
109 	return gnp;
110 }
111 
112 int vfs_quota_enabled = 0;
113 TUNABLE_INT("vfs.quota_enabled", &vfs_quota_enabled);
114 SYSCTL_INT(_vfs, OID_AUTO, quota_enabled, CTLFLAG_RD,
115                  &vfs_quota_enabled, 0, "Enable VFS quota");
116 
117 /* initializes per mount-point data structures */
118 void
119 vq_init(struct mount *mp)
120 {
121 
122 	if (!vfs_quota_enabled)
123 		return;
124 
125 	/* initialize the rb trees */
126 	RB_INIT(&mp->mnt_acct.ac_uroot);
127 	RB_INIT(&mp->mnt_acct.ac_groot);
128 	spin_init(&mp->mnt_acct.ac_spin);
129 
130 	mp->mnt_acct.ac_bytes = 0;
131 
132 	/* enable data collection */
133 	mp->mnt_op->vfs_account = vfs_stdaccount;
134 	/* mark this filesystem quota enabled */
135 	mp->mnt_flag |= MNT_QUOTA;
136 	if (bootverbose)
137 		kprintf("vfs accounting enabled for %s\n",
138 		    mp->mnt_stat.f_mntonname);
139 }
140 
141 
142 void
143 vq_done(struct mount *mp)
144 {
145 	/* TODO: remove the rb trees here */
146 }
147 
148 void
149 vfs_stdaccount(struct mount *mp, uid_t uid, gid_t gid, int64_t delta)
150 {
151 	struct ac_unode ufind, *unp;
152 	struct ac_gnode gfind, *gnp;
153 
154 	/* find or create address of chunk */
155 	ufind.left_bits = (uid >> ACCT_CHUNK_BITS);
156 	gfind.left_bits = (gid >> ACCT_CHUNK_BITS);
157 
158 	spin_lock(&mp->mnt_acct.ac_spin);
159 
160 	mp->mnt_acct.ac_bytes += delta;
161 
162 	if ((unp = RB_FIND(ac_utree, &mp->mnt_acct.ac_uroot, &ufind)) == NULL)
163 		unp = unode_insert(mp, uid);
164 	if ((gnp = RB_FIND(ac_gtree, &mp->mnt_acct.ac_groot, &gfind)) == NULL)
165 		gnp = gnode_insert(mp, gid);
166 
167 	/* update existing chunk */
168 	unp->uid_chunk[(uid & ACCT_CHUNK_MASK)].space += delta;
169 	gnp->gid_chunk[(gid & ACCT_CHUNK_MASK)].space += delta;
170 
171 	spin_unlock(&mp->mnt_acct.ac_spin);
172 }
173 
174 static void
175 cmd_get_usage_all(struct mount *mp, prop_array_t dict_out)
176 {
177 	struct ac_unode *unp;
178 	struct ac_gnode *gnp;
179 	int i;
180 	prop_dictionary_t item;
181 
182 	item = prop_dictionary_create();
183 	(void) prop_dictionary_set_uint64(item, "space used", mp->mnt_acct.ac_bytes);
184 	(void) prop_dictionary_set_uint64(item, "limit", mp->mnt_acct.ac_limit);
185 	prop_array_add_and_rel(dict_out, item);
186 
187 	RB_FOREACH(unp, ac_utree, &mp->mnt_acct.ac_uroot) {
188 		for (i=0; i<ACCT_CHUNK_NIDS; i++) {
189 			if (unp->uid_chunk[i].space != 0) {
190 				item = prop_dictionary_create();
191 				(void) prop_dictionary_set_uint32(item, "uid",
192 					(unp->left_bits << ACCT_CHUNK_BITS) + i);
193 				(void) prop_dictionary_set_uint64(item, "space used",
194 					unp->uid_chunk[i].space);
195 				(void) prop_dictionary_set_uint64(item, "limit",
196 					unp->uid_chunk[i].limit);
197 				prop_array_add_and_rel(dict_out, item);
198 			}
199 		}
200 	}
201 
202 	RB_FOREACH(gnp, ac_gtree, &mp->mnt_acct.ac_groot) {
203 		for (i=0; i<ACCT_CHUNK_NIDS; i++) {
204 			if (gnp->gid_chunk[i].space != 0) {
205 				item = prop_dictionary_create();
206 				(void) prop_dictionary_set_uint32(item, "gid",
207 					(gnp->left_bits << ACCT_CHUNK_BITS) + i);
208 				(void) prop_dictionary_set_uint64(item, "space used",
209 					gnp->gid_chunk[i].space);
210 				(void) prop_dictionary_set_uint64(item, "limit",
211 					gnp->gid_chunk[i].limit);
212 				prop_array_add_and_rel(dict_out, item);
213 			}
214 		}
215 	}
216 }
217 
218 static int
219 cmd_set_usage_all(struct mount *mp, prop_array_t args)
220 {
221 	struct ac_unode ufind, *unp;
222 	struct ac_gnode gfind, *gnp;
223 	prop_dictionary_t item;
224 	prop_object_iterator_t iter;
225 	uint32_t id;
226 	uint64_t space;
227 
228 	spin_lock(&mp->mnt_acct.ac_spin);
229 	/* 0. zero all statistics */
230 	/* we don't bother to free up memory, most of it would probably be
231 	 * re-allocated immediately anyway. just bzeroing the existing nodes
232 	 * is fine */
233 	mp->mnt_acct.ac_bytes = 0;
234 	RB_FOREACH(unp, ac_utree, &mp->mnt_acct.ac_uroot) {
235 		bzero(&unp->uid_chunk, sizeof(unp->uid_chunk));
236 	}
237 	RB_FOREACH(gnp, ac_gtree, &mp->mnt_acct.ac_groot) {
238 		bzero(&gnp->gid_chunk, sizeof(gnp->gid_chunk));
239 	}
240 
241 	/* args contains an array of dict */
242 	iter = prop_array_iterator(args);
243 	if (iter == NULL) {
244 		kprintf("cmd_set_usage_all(): failed to create iterator\n");
245 		spin_unlock(&mp->mnt_acct.ac_spin);
246 		return 1;
247 	}
248 	while ((item = prop_object_iterator_next(iter)) != NULL) {
249 		prop_dictionary_get_uint64(item, "space used", &space);
250 		if (prop_dictionary_get_uint32(item, "uid", &id)) {
251 			ufind.left_bits = (id >> ACCT_CHUNK_BITS);
252 			unp = RB_FIND(ac_utree, &mp->mnt_acct.ac_uroot, &ufind);
253 			if (unp == NULL)
254 				unp = unode_insert(mp, id);
255 			unp->uid_chunk[(id & ACCT_CHUNK_MASK)].space = space;
256 		} else if (prop_dictionary_get_uint32(item, "gid", &id)) {
257 			gfind.left_bits = (id >> ACCT_CHUNK_BITS);
258 			gnp = RB_FIND(ac_gtree, &mp->mnt_acct.ac_groot, &gfind);
259 			if (gnp == NULL)
260 				gnp = gnode_insert(mp, id);
261 			gnp->gid_chunk[(id & ACCT_CHUNK_MASK)].space = space;
262 		} else {
263 			mp->mnt_acct.ac_bytes = space;
264 		}
265 	}
266 	prop_object_iterator_release(iter);
267 
268 	spin_unlock(&mp->mnt_acct.ac_spin);
269 	return 0;
270 }
271 
272 static int
273 cmd_set_limit(struct mount *mp, prop_dictionary_t args)
274 {
275 	uint64_t limit;
276 
277 	prop_dictionary_get_uint64(args, "limit", &limit);
278 
279 	spin_lock(&mp->mnt_acct.ac_spin);
280 	mp->mnt_acct.ac_limit = limit;
281 	spin_unlock(&mp->mnt_acct.ac_spin);
282 
283 	return 0;
284 }
285 
286 static int
287 cmd_set_limit_uid(struct mount *mp, prop_dictionary_t args)
288 {
289 	uint64_t limit;
290 	uid_t uid;
291 	struct ac_unode ufind, *unp;
292 
293 	prop_dictionary_get_uint32(args, "uid", &uid);
294 	prop_dictionary_get_uint64(args, "limit", &limit);
295 
296 	ufind.left_bits = (uid >> ACCT_CHUNK_BITS);
297 
298 	spin_lock(&mp->mnt_acct.ac_spin);
299 	if ((unp = RB_FIND(ac_utree, &mp->mnt_acct.ac_uroot, &ufind)) == NULL)
300 		unp = unode_insert(mp, uid);
301 	unp->uid_chunk[(uid & ACCT_CHUNK_MASK)].limit = limit;
302 	spin_unlock(&mp->mnt_acct.ac_spin);
303 
304 	return 0;
305 }
306 
307 static int
308 cmd_set_limit_gid(struct mount *mp, prop_dictionary_t args)
309 {
310 	uint64_t limit;
311 	gid_t gid;
312 	struct ac_gnode gfind, *gnp;
313 
314 	prop_dictionary_get_uint32(args, "gid", &gid);
315 	prop_dictionary_get_uint64(args, "limit", &limit);
316 
317 	gfind.left_bits = (gid >> ACCT_CHUNK_BITS);
318 
319 	spin_lock(&mp->mnt_acct.ac_spin);
320 	if ((gnp = RB_FIND(ac_gtree, &mp->mnt_acct.ac_groot, &gfind)) == NULL)
321 		gnp = gnode_insert(mp, gid);
322 	gnp->gid_chunk[(gid & ACCT_CHUNK_MASK)].limit = limit;
323 	spin_unlock(&mp->mnt_acct.ac_spin);
324 
325 	return 0;
326 }
327 
328 int
329 sys_vquotactl(struct vquotactl_args *vqa)
330 /* const char *path, struct plistref *pref */
331 {
332 	const char *path;
333 	struct plistref pref;
334 	prop_dictionary_t dict;
335 	prop_object_t args;
336 	char *cmd;
337 
338 	prop_array_t pa_out;
339 
340 	struct nlookupdata nd;
341 	struct mount *mp;
342 	int error;
343 
344 	if (!vfs_quota_enabled)
345 		return EOPNOTSUPP;
346 	path = vqa->path;
347 	error = copyin(vqa->pref, &pref, sizeof(pref));
348 	error = prop_dictionary_copyin(&pref, &dict);
349 	if (error != 0)
350 		return(error);
351 
352 	/* we have a path, get its mount point */
353 	error = nlookup_init(&nd, path, UIO_USERSPACE, 0);
354 	if (error != 0)
355 		return (error);
356 	error = nlookup(&nd);
357 	if (error != 0)
358 		return (error);
359 	mp = nd.nl_nch.mount;
360 	nlookup_done(&nd);
361 
362 	/* get the command */
363 	if (prop_dictionary_get_cstring(dict, "command", &cmd) == 0) {
364 		kprintf("sys_vquotactl(): couldn't get command\n");
365 		return EINVAL;
366 	}
367 	args = prop_dictionary_get(dict, "arguments");
368 	if (args == NULL) {
369 		kprintf("couldn't get arguments\n");
370 		return EINVAL;
371 	}
372 
373 	pa_out = prop_array_create();
374 	if (pa_out == NULL)
375 		return ENOMEM;
376 
377 	if (strcmp(cmd, "get usage all") == 0) {
378 		cmd_get_usage_all(mp, pa_out);
379 		goto done;
380 	}
381 	if (strcmp(cmd, "set usage all") == 0) {
382 		error = cmd_set_usage_all(mp, args);
383 		goto done;
384 	}
385 	if (strcmp(cmd, "set limit") == 0) {
386 		error = cmd_set_limit(mp, args);
387 		goto done;
388 	}
389 	if (strcmp(cmd, "set limit uid") == 0) {
390 		error = cmd_set_limit_uid(mp, args);
391 		goto done;
392 	}
393 	if (strcmp(cmd, "set limit gid") == 0) {
394 		error = cmd_set_limit_gid(mp, args);
395 		goto done;
396 	}
397 	return EINVAL;
398 
399 done:
400 	/* kernel to userland */
401 	dict = prop_dictionary_create();
402 	error = prop_dictionary_set(dict, "returned data", pa_out);
403 
404 	error = prop_dictionary_copyout(&pref, dict);
405 	error = copyout(&pref, vqa->pref, sizeof(pref));
406 
407 	return error;
408 }
409 
410 /*
411  * Returns a valid mount point for accounting purposes
412  * We cannot simply use vp->v_mount if the vnode belongs
413  * to a PFS mount point
414  */
415 struct mount*
416 vq_vptomp(struct vnode *vp)
417 {
418 	/* XXX: vp->v_pfsmp may point to a freed structure
419 	* we use mountlist_exists() to check if it is valid
420 	* before using it */
421 	if ((vp->v_pfsmp != NULL) && (mountlist_exists(vp->v_pfsmp))) {
422 		/* This is a PFS, use a copy of the real mp */
423 		return vp->v_pfsmp;
424 	} else {
425 		/* Not a PFS or a PFS beeing unmounted */
426 		return vp->v_mount;
427 	}
428 }
429 
430 int
431 vq_write_ok(struct mount *mp, uid_t uid, gid_t gid, uint64_t delta)
432 {
433 	int rv = 1;
434 	struct ac_unode ufind, *unp;
435 	struct ac_gnode gfind, *gnp;
436 	uint64_t space, limit;
437 
438 	spin_lock(&mp->mnt_acct.ac_spin);
439 
440 	if (mp->mnt_acct.ac_limit == 0)
441 		goto check_uid;
442 	if ((mp->mnt_acct.ac_bytes + delta) > mp->mnt_acct.ac_limit) {
443 		rv = 0;
444 		goto done;
445 	}
446 
447 check_uid:
448 	ufind.left_bits = (uid >> ACCT_CHUNK_BITS);
449 	if ((unp = RB_FIND(ac_utree, &mp->mnt_acct.ac_uroot, &ufind)) == NULL) {
450 		space = 0;
451 		limit = 0;
452 	} else {
453 		space = unp->uid_chunk[(uid & ACCT_CHUNK_MASK)].space;
454 		limit = unp->uid_chunk[(uid & ACCT_CHUNK_MASK)].limit;
455 	}
456 	if (limit == 0)
457 		goto check_gid;
458 	if ((space + delta) > limit) {
459 		rv = 0;
460 		goto done;
461 	}
462 
463 check_gid:
464 	gfind.left_bits = (gid >> ACCT_CHUNK_BITS);
465 	if ((gnp = RB_FIND(ac_gtree, &mp->mnt_acct.ac_groot, &gfind)) == NULL) {
466 		space = 0;
467 		limit = 0;
468 	} else {
469 		space = gnp->gid_chunk[(gid & ACCT_CHUNK_MASK)].space;
470 		limit = gnp->gid_chunk[(gid & ACCT_CHUNK_MASK)].limit;
471 	}
472 	if (limit == 0)
473 		goto done;
474 	if ((space + delta) > limit)
475 		rv = 0;
476 
477 done:
478 	spin_unlock(&mp->mnt_acct.ac_spin);
479 	return rv;
480 }
481