xref: /dragonfly/sys/kern/vfs_quota.c (revision 0ca59c34)
1 /*
2  * Copyright (c) 2011,2012 François Tigeot <ftigeot@wolpond.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in
13  *    the documentation and/or other materials provided with the
14  *    distribution.
15  * 3. Neither the name of The DragonFly Project nor the names of its
16  *    contributors may be used to endorse or promote products derived
17  *    from this software without specific, prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
23  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
25  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
27  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
29  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 #include <sys/sysctl.h>
34 #include <sys/mount.h>
35 #include <sys/systm.h>
36 #include <sys/nlookup.h>
37 #include <sys/vnode.h>
38 #include <sys/stat.h>
39 #include <sys/vfs_quota.h>
40 #include <sys/spinlock.h>
41 #include <sys/spinlock2.h>
42 
43 #include <sys/sysproto.h>
44 #include <libprop/proplib.h>
45 #include <libprop/prop_dictionary.h>
46 
47 /* in-memory accounting, red-black tree based */
48 /* FIXME: code duplication caused by uid_t / gid_t differences */
49 RB_PROTOTYPE(ac_utree, ac_unode, rb_entry, rb_ac_unode_cmp);
50 RB_PROTOTYPE(ac_gtree, ac_gnode, rb_entry, rb_ac_gnode_cmp);
51 
52 static int
53 rb_ac_unode_cmp(struct ac_unode *a, struct ac_unode *b);
54 static int
55 rb_ac_gnode_cmp(struct ac_gnode *a, struct ac_gnode *b);
56 
57 RB_GENERATE(ac_utree, ac_unode, rb_entry, rb_ac_unode_cmp);
58 RB_GENERATE(ac_gtree, ac_gnode, rb_entry, rb_ac_gnode_cmp);
59 
60 struct ac_unode* unode_insert(struct mount*, uid_t);
61 struct ac_gnode* gnode_insert(struct mount*, gid_t);
62 
63 static int
64 rb_ac_unode_cmp(struct ac_unode *a, struct ac_unode *b)
65 {
66 	if (a->left_bits < b->left_bits)
67 		return(-1);
68 	else if (a->left_bits > b->left_bits)
69 		return(1);
70 	return(0);
71 }
72 
73 static int
74 rb_ac_gnode_cmp(struct ac_gnode *a, struct ac_gnode *b)
75 {
76 	if (a->left_bits < b->left_bits)
77 		return(-1);
78 	else if (a->left_bits > b->left_bits)
79 		return(1);
80 	return(0);
81 }
82 
83 struct ac_unode*
84 unode_insert(struct mount *mp, uid_t uid)
85 {
86 	struct ac_unode *unp, *res;
87 
88 	unp = kmalloc(sizeof(struct ac_unode), M_MOUNT, M_ZERO | M_WAITOK);
89 
90 	unp->left_bits = (uid >> ACCT_CHUNK_BITS);
91 	res = RB_INSERT(ac_utree, &mp->mnt_acct.ac_uroot, unp);
92 	KASSERT(res == NULL, ("unode_insert(): RB_INSERT didn't return NULL"));
93 
94 	return unp;
95 }
96 
97 struct ac_gnode*
98 gnode_insert(struct mount *mp, gid_t gid)
99 {
100 	struct ac_gnode *gnp, *res;
101 
102 	gnp = kmalloc(sizeof(struct ac_gnode), M_MOUNT, M_ZERO | M_WAITOK);
103 
104 	gnp->left_bits = (gid >> ACCT_CHUNK_BITS);
105 	res = RB_INSERT(ac_gtree, &mp->mnt_acct.ac_groot, gnp);
106 	KASSERT(res == NULL, ("gnode_insert(): RB_INSERT didn't return NULL"));
107 
108 	return gnp;
109 }
110 
111 int vfs_quota_enabled = 0;
112 TUNABLE_INT("vfs.quota_enabled", &vfs_quota_enabled);
113 SYSCTL_INT(_vfs, OID_AUTO, quota_enabled, CTLFLAG_RD,
114                  &vfs_quota_enabled, 0, "Enable VFS quota");
115 
116 /* initializes per mount-point data structures */
117 void
118 vq_init(struct mount *mp)
119 {
120 
121 	if (!vfs_quota_enabled)
122 		return;
123 
124 	/* initialize the rb trees */
125 	RB_INIT(&mp->mnt_acct.ac_uroot);
126 	RB_INIT(&mp->mnt_acct.ac_groot);
127 	spin_init(&mp->mnt_acct.ac_spin, "vqinit");
128 
129 	mp->mnt_acct.ac_bytes = 0;
130 
131 	/* enable data collection */
132 	mp->mnt_op->vfs_account = vfs_stdaccount;
133 	/* mark this filesystem quota enabled */
134 	mp->mnt_flag |= MNT_QUOTA;
135 	if (bootverbose)
136 		kprintf("vfs accounting enabled for %s\n",
137 		    mp->mnt_stat.f_mntonname);
138 }
139 
140 
141 void
142 vq_done(struct mount *mp)
143 {
144 	/* TODO: remove the rb trees here */
145 }
146 
147 void
148 vfs_stdaccount(struct mount *mp, uid_t uid, gid_t gid, int64_t delta)
149 {
150 	struct ac_unode ufind, *unp;
151 	struct ac_gnode gfind, *gnp;
152 
153 	/* find or create address of chunk */
154 	ufind.left_bits = (uid >> ACCT_CHUNK_BITS);
155 	gfind.left_bits = (gid >> ACCT_CHUNK_BITS);
156 
157 	spin_lock(&mp->mnt_acct.ac_spin);
158 
159 	mp->mnt_acct.ac_bytes += delta;
160 
161 	if ((unp = RB_FIND(ac_utree, &mp->mnt_acct.ac_uroot, &ufind)) == NULL)
162 		unp = unode_insert(mp, uid);
163 	if ((gnp = RB_FIND(ac_gtree, &mp->mnt_acct.ac_groot, &gfind)) == NULL)
164 		gnp = gnode_insert(mp, gid);
165 
166 	/* update existing chunk */
167 	unp->uid_chunk[(uid & ACCT_CHUNK_MASK)].space += delta;
168 	gnp->gid_chunk[(gid & ACCT_CHUNK_MASK)].space += delta;
169 
170 	spin_unlock(&mp->mnt_acct.ac_spin);
171 }
172 
173 static void
174 cmd_get_usage_all(struct mount *mp, prop_array_t dict_out)
175 {
176 	struct ac_unode *unp;
177 	struct ac_gnode *gnp;
178 	int i;
179 	prop_dictionary_t item;
180 
181 	item = prop_dictionary_create();
182 	(void) prop_dictionary_set_uint64(item, "space used", mp->mnt_acct.ac_bytes);
183 	(void) prop_dictionary_set_uint64(item, "limit", mp->mnt_acct.ac_limit);
184 	prop_array_add_and_rel(dict_out, item);
185 
186 	RB_FOREACH(unp, ac_utree, &mp->mnt_acct.ac_uroot) {
187 		for (i=0; i<ACCT_CHUNK_NIDS; i++) {
188 			if (unp->uid_chunk[i].space != 0) {
189 				item = prop_dictionary_create();
190 				(void) prop_dictionary_set_uint32(item, "uid",
191 					(unp->left_bits << ACCT_CHUNK_BITS) + i);
192 				(void) prop_dictionary_set_uint64(item, "space used",
193 					unp->uid_chunk[i].space);
194 				(void) prop_dictionary_set_uint64(item, "limit",
195 					unp->uid_chunk[i].limit);
196 				prop_array_add_and_rel(dict_out, item);
197 			}
198 		}
199 	}
200 
201 	RB_FOREACH(gnp, ac_gtree, &mp->mnt_acct.ac_groot) {
202 		for (i=0; i<ACCT_CHUNK_NIDS; i++) {
203 			if (gnp->gid_chunk[i].space != 0) {
204 				item = prop_dictionary_create();
205 				(void) prop_dictionary_set_uint32(item, "gid",
206 					(gnp->left_bits << ACCT_CHUNK_BITS) + i);
207 				(void) prop_dictionary_set_uint64(item, "space used",
208 					gnp->gid_chunk[i].space);
209 				(void) prop_dictionary_set_uint64(item, "limit",
210 					gnp->gid_chunk[i].limit);
211 				prop_array_add_and_rel(dict_out, item);
212 			}
213 		}
214 	}
215 }
216 
217 static int
218 cmd_set_usage_all(struct mount *mp, prop_array_t args)
219 {
220 	struct ac_unode ufind, *unp;
221 	struct ac_gnode gfind, *gnp;
222 	prop_dictionary_t item;
223 	prop_object_iterator_t iter;
224 	uint32_t id;
225 	uint64_t space;
226 
227 	spin_lock(&mp->mnt_acct.ac_spin);
228 	/* 0. zero all statistics */
229 	/* we don't bother to free up memory, most of it would probably be
230 	 * re-allocated immediately anyway. just bzeroing the existing nodes
231 	 * is fine */
232 	mp->mnt_acct.ac_bytes = 0;
233 	RB_FOREACH(unp, ac_utree, &mp->mnt_acct.ac_uroot) {
234 		bzero(&unp->uid_chunk, sizeof(unp->uid_chunk));
235 	}
236 	RB_FOREACH(gnp, ac_gtree, &mp->mnt_acct.ac_groot) {
237 		bzero(&gnp->gid_chunk, sizeof(gnp->gid_chunk));
238 	}
239 
240 	/* args contains an array of dict */
241 	iter = prop_array_iterator(args);
242 	if (iter == NULL) {
243 		kprintf("cmd_set_usage_all(): failed to create iterator\n");
244 		spin_unlock(&mp->mnt_acct.ac_spin);
245 		return 1;
246 	}
247 	while ((item = prop_object_iterator_next(iter)) != NULL) {
248 		prop_dictionary_get_uint64(item, "space used", &space);
249 		if (prop_dictionary_get_uint32(item, "uid", &id)) {
250 			ufind.left_bits = (id >> ACCT_CHUNK_BITS);
251 			unp = RB_FIND(ac_utree, &mp->mnt_acct.ac_uroot, &ufind);
252 			if (unp == NULL)
253 				unp = unode_insert(mp, id);
254 			unp->uid_chunk[(id & ACCT_CHUNK_MASK)].space = space;
255 		} else if (prop_dictionary_get_uint32(item, "gid", &id)) {
256 			gfind.left_bits = (id >> ACCT_CHUNK_BITS);
257 			gnp = RB_FIND(ac_gtree, &mp->mnt_acct.ac_groot, &gfind);
258 			if (gnp == NULL)
259 				gnp = gnode_insert(mp, id);
260 			gnp->gid_chunk[(id & ACCT_CHUNK_MASK)].space = space;
261 		} else {
262 			mp->mnt_acct.ac_bytes = space;
263 		}
264 	}
265 	prop_object_iterator_release(iter);
266 
267 	spin_unlock(&mp->mnt_acct.ac_spin);
268 	return 0;
269 }
270 
271 static int
272 cmd_set_limit(struct mount *mp, prop_dictionary_t args)
273 {
274 	uint64_t limit;
275 
276 	prop_dictionary_get_uint64(args, "limit", &limit);
277 
278 	spin_lock(&mp->mnt_acct.ac_spin);
279 	mp->mnt_acct.ac_limit = limit;
280 	spin_unlock(&mp->mnt_acct.ac_spin);
281 
282 	return 0;
283 }
284 
285 static int
286 cmd_set_limit_uid(struct mount *mp, prop_dictionary_t args)
287 {
288 	uint64_t limit;
289 	uid_t uid;
290 	struct ac_unode ufind, *unp;
291 
292 	prop_dictionary_get_uint32(args, "uid", &uid);
293 	prop_dictionary_get_uint64(args, "limit", &limit);
294 
295 	ufind.left_bits = (uid >> ACCT_CHUNK_BITS);
296 
297 	spin_lock(&mp->mnt_acct.ac_spin);
298 	if ((unp = RB_FIND(ac_utree, &mp->mnt_acct.ac_uroot, &ufind)) == NULL)
299 		unp = unode_insert(mp, uid);
300 	unp->uid_chunk[(uid & ACCT_CHUNK_MASK)].limit = limit;
301 	spin_unlock(&mp->mnt_acct.ac_spin);
302 
303 	return 0;
304 }
305 
306 static int
307 cmd_set_limit_gid(struct mount *mp, prop_dictionary_t args)
308 {
309 	uint64_t limit;
310 	gid_t gid;
311 	struct ac_gnode gfind, *gnp;
312 
313 	prop_dictionary_get_uint32(args, "gid", &gid);
314 	prop_dictionary_get_uint64(args, "limit", &limit);
315 
316 	gfind.left_bits = (gid >> ACCT_CHUNK_BITS);
317 
318 	spin_lock(&mp->mnt_acct.ac_spin);
319 	if ((gnp = RB_FIND(ac_gtree, &mp->mnt_acct.ac_groot, &gfind)) == NULL)
320 		gnp = gnode_insert(mp, gid);
321 	gnp->gid_chunk[(gid & ACCT_CHUNK_MASK)].limit = limit;
322 	spin_unlock(&mp->mnt_acct.ac_spin);
323 
324 	return 0;
325 }
326 
327 int
328 sys_vquotactl(struct vquotactl_args *vqa)
329 /* const char *path, struct plistref *pref */
330 {
331 	const char *path;
332 	struct plistref pref;
333 	prop_dictionary_t dict;
334 	prop_object_t args;
335 	char *cmd;
336 
337 	prop_array_t pa_out;
338 
339 	struct nlookupdata nd;
340 	struct mount *mp;
341 	int error;
342 
343 	if (!vfs_quota_enabled)
344 		return EOPNOTSUPP;
345 	path = vqa->path;
346 	error = copyin(vqa->pref, &pref, sizeof(pref));
347 	error = prop_dictionary_copyin(&pref, &dict);
348 	if (error != 0)
349 		return(error);
350 
351 	/* we have a path, get its mount point */
352 	error = nlookup_init(&nd, path, UIO_USERSPACE, 0);
353 	if (error != 0)
354 		return (error);
355 	error = nlookup(&nd);
356 	if (error != 0)
357 		return (error);
358 	mp = nd.nl_nch.mount;
359 	nlookup_done(&nd);
360 
361 	/* get the command */
362 	if (prop_dictionary_get_cstring(dict, "command", &cmd) == 0) {
363 		kprintf("sys_vquotactl(): couldn't get command\n");
364 		return EINVAL;
365 	}
366 	args = prop_dictionary_get(dict, "arguments");
367 	if (args == NULL) {
368 		kprintf("couldn't get arguments\n");
369 		return EINVAL;
370 	}
371 
372 	pa_out = prop_array_create();
373 	if (pa_out == NULL)
374 		return ENOMEM;
375 
376 	if (strcmp(cmd, "get usage all") == 0) {
377 		cmd_get_usage_all(mp, pa_out);
378 		goto done;
379 	}
380 	if (strcmp(cmd, "set usage all") == 0) {
381 		error = cmd_set_usage_all(mp, args);
382 		goto done;
383 	}
384 	if (strcmp(cmd, "set limit") == 0) {
385 		error = cmd_set_limit(mp, args);
386 		goto done;
387 	}
388 	if (strcmp(cmd, "set limit uid") == 0) {
389 		error = cmd_set_limit_uid(mp, args);
390 		goto done;
391 	}
392 	if (strcmp(cmd, "set limit gid") == 0) {
393 		error = cmd_set_limit_gid(mp, args);
394 		goto done;
395 	}
396 	return EINVAL;
397 
398 done:
399 	/* kernel to userland */
400 	dict = prop_dictionary_create();
401 	error = prop_dictionary_set(dict, "returned data", pa_out);
402 
403 	error = prop_dictionary_copyout(&pref, dict);
404 	error = copyout(&pref, vqa->pref, sizeof(pref));
405 
406 	return error;
407 }
408 
409 /*
410  * Returns a valid mount point for accounting purposes
411  * We cannot simply use vp->v_mount if the vnode belongs
412  * to a PFS mount point
413  */
414 struct mount*
415 vq_vptomp(struct vnode *vp)
416 {
417 	/* XXX: vp->v_pfsmp may point to a freed structure
418 	* we use mountlist_exists() to check if it is valid
419 	* before using it */
420 	if ((vp->v_pfsmp != NULL) && (mountlist_exists(vp->v_pfsmp))) {
421 		/* This is a PFS, use a copy of the real mp */
422 		return vp->v_pfsmp;
423 	} else {
424 		/* Not a PFS or a PFS beeing unmounted */
425 		return vp->v_mount;
426 	}
427 }
428 
429 int
430 vq_write_ok(struct mount *mp, uid_t uid, gid_t gid, uint64_t delta)
431 {
432 	int rv = 1;
433 	struct ac_unode ufind, *unp;
434 	struct ac_gnode gfind, *gnp;
435 	uint64_t space, limit;
436 
437 	spin_lock(&mp->mnt_acct.ac_spin);
438 
439 	if (mp->mnt_acct.ac_limit == 0)
440 		goto check_uid;
441 	if ((mp->mnt_acct.ac_bytes + delta) > mp->mnt_acct.ac_limit) {
442 		rv = 0;
443 		goto done;
444 	}
445 
446 check_uid:
447 	ufind.left_bits = (uid >> ACCT_CHUNK_BITS);
448 	if ((unp = RB_FIND(ac_utree, &mp->mnt_acct.ac_uroot, &ufind)) == NULL) {
449 		space = 0;
450 		limit = 0;
451 	} else {
452 		space = unp->uid_chunk[(uid & ACCT_CHUNK_MASK)].space;
453 		limit = unp->uid_chunk[(uid & ACCT_CHUNK_MASK)].limit;
454 	}
455 	if (limit == 0)
456 		goto check_gid;
457 	if ((space + delta) > limit) {
458 		rv = 0;
459 		goto done;
460 	}
461 
462 check_gid:
463 	gfind.left_bits = (gid >> ACCT_CHUNK_BITS);
464 	if ((gnp = RB_FIND(ac_gtree, &mp->mnt_acct.ac_groot, &gfind)) == NULL) {
465 		space = 0;
466 		limit = 0;
467 	} else {
468 		space = gnp->gid_chunk[(gid & ACCT_CHUNK_MASK)].space;
469 		limit = gnp->gid_chunk[(gid & ACCT_CHUNK_MASK)].limit;
470 	}
471 	if (limit == 0)
472 		goto done;
473 	if ((space + delta) > limit)
474 		rv = 0;
475 
476 done:
477 	spin_unlock(&mp->mnt_acct.ac_spin);
478 	return rv;
479 }
480