1 /* 2 * Copyright (c) 2011,2012 François Tigeot <ftigeot@wolpond.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in 13 * the documentation and/or other materials provided with the 14 * distribution. 15 * 3. Neither the name of The DragonFly Project nor the names of its 16 * contributors may be used to endorse or promote products derived 17 * from this software without specific, prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 27 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 29 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 #include <sys/sysctl.h> 34 #include <sys/mount.h> 35 #include <sys/systm.h> 36 #include <sys/nlookup.h> 37 #include <sys/vnode.h> 38 #include <sys/stat.h> 39 #include <sys/vfs_quota.h> 40 #include <sys/spinlock.h> 41 #include <sys/spinlock2.h> 42 43 #include <sys/sysproto.h> 44 #include <libprop/proplib.h> 45 #include <libprop/prop_dictionary.h> 46 47 /* in-memory accounting, red-black tree based */ 48 /* FIXME: code duplication caused by uid_t / gid_t differences */ 49 RB_PROTOTYPE(ac_utree, ac_unode, rb_entry, rb_ac_unode_cmp); 50 RB_PROTOTYPE(ac_gtree, ac_gnode, rb_entry, rb_ac_gnode_cmp); 51 52 static int 53 rb_ac_unode_cmp(struct ac_unode *a, struct ac_unode *b); 54 static int 55 rb_ac_gnode_cmp(struct ac_gnode *a, struct ac_gnode *b); 56 57 RB_GENERATE(ac_utree, ac_unode, rb_entry, rb_ac_unode_cmp); 58 RB_GENERATE(ac_gtree, ac_gnode, rb_entry, rb_ac_gnode_cmp); 59 60 struct ac_unode* unode_insert(struct mount*, uid_t); 61 struct ac_gnode* gnode_insert(struct mount*, gid_t); 62 63 static int 64 rb_ac_unode_cmp(struct ac_unode *a, struct ac_unode *b) 65 { 66 if (a->left_bits < b->left_bits) 67 return(-1); 68 else if (a->left_bits > b->left_bits) 69 return(1); 70 return(0); 71 } 72 73 static int 74 rb_ac_gnode_cmp(struct ac_gnode *a, struct ac_gnode *b) 75 { 76 if (a->left_bits < b->left_bits) 77 return(-1); 78 else if (a->left_bits > b->left_bits) 79 return(1); 80 return(0); 81 } 82 83 struct ac_unode* 84 unode_insert(struct mount *mp, uid_t uid) 85 { 86 struct ac_unode *unp, *res; 87 88 unp = kmalloc(sizeof(struct ac_unode), M_MOUNT, M_ZERO | M_WAITOK); 89 90 unp->left_bits = (uid >> ACCT_CHUNK_BITS); 91 res = RB_INSERT(ac_utree, &mp->mnt_acct.ac_uroot, unp); 92 KASSERT(res == NULL, ("unode_insert(): RB_INSERT didn't return NULL")); 93 94 return unp; 95 } 96 97 struct ac_gnode* 98 gnode_insert(struct mount *mp, gid_t gid) 99 { 100 struct ac_gnode *gnp, *res; 101 102 gnp = kmalloc(sizeof(struct ac_gnode), M_MOUNT, M_ZERO | M_WAITOK); 103 104 gnp->left_bits = (gid >> ACCT_CHUNK_BITS); 105 res = RB_INSERT(ac_gtree, &mp->mnt_acct.ac_groot, gnp); 106 KASSERT(res == NULL, ("gnode_insert(): RB_INSERT didn't return NULL")); 107 108 return gnp; 109 } 110 111 int vfs_quota_enabled = 0; 112 TUNABLE_INT("vfs.quota_enabled", &vfs_quota_enabled); 113 SYSCTL_INT(_vfs, OID_AUTO, quota_enabled, CTLFLAG_RD, 114 &vfs_quota_enabled, 0, "Enable VFS quota"); 115 116 /* initializes per mount-point data structures */ 117 void 118 vq_init(struct mount *mp) 119 { 120 121 if (!vfs_quota_enabled) 122 return; 123 124 /* initialize the rb trees */ 125 RB_INIT(&mp->mnt_acct.ac_uroot); 126 RB_INIT(&mp->mnt_acct.ac_groot); 127 spin_init(&mp->mnt_acct.ac_spin, "vqinit"); 128 129 mp->mnt_acct.ac_bytes = 0; 130 131 /* enable data collection */ 132 mp->mnt_op->vfs_account = vfs_stdaccount; 133 /* mark this filesystem quota enabled */ 134 mp->mnt_flag |= MNT_QUOTA; 135 if (bootverbose) 136 kprintf("vfs accounting enabled for %s\n", 137 mp->mnt_stat.f_mntonname); 138 } 139 140 141 void 142 vq_done(struct mount *mp) 143 { 144 /* TODO: remove the rb trees here */ 145 } 146 147 void 148 vfs_stdaccount(struct mount *mp, uid_t uid, gid_t gid, int64_t delta) 149 { 150 struct ac_unode ufind, *unp; 151 struct ac_gnode gfind, *gnp; 152 153 /* find or create address of chunk */ 154 ufind.left_bits = (uid >> ACCT_CHUNK_BITS); 155 gfind.left_bits = (gid >> ACCT_CHUNK_BITS); 156 157 spin_lock(&mp->mnt_acct.ac_spin); 158 159 mp->mnt_acct.ac_bytes += delta; 160 161 if ((unp = RB_FIND(ac_utree, &mp->mnt_acct.ac_uroot, &ufind)) == NULL) 162 unp = unode_insert(mp, uid); 163 if ((gnp = RB_FIND(ac_gtree, &mp->mnt_acct.ac_groot, &gfind)) == NULL) 164 gnp = gnode_insert(mp, gid); 165 166 /* update existing chunk */ 167 unp->uid_chunk[(uid & ACCT_CHUNK_MASK)].space += delta; 168 gnp->gid_chunk[(gid & ACCT_CHUNK_MASK)].space += delta; 169 170 spin_unlock(&mp->mnt_acct.ac_spin); 171 } 172 173 static void 174 cmd_get_usage_all(struct mount *mp, prop_array_t dict_out) 175 { 176 struct ac_unode *unp; 177 struct ac_gnode *gnp; 178 int i; 179 prop_dictionary_t item; 180 181 item = prop_dictionary_create(); 182 (void) prop_dictionary_set_uint64(item, "space used", mp->mnt_acct.ac_bytes); 183 (void) prop_dictionary_set_uint64(item, "limit", mp->mnt_acct.ac_limit); 184 prop_array_add_and_rel(dict_out, item); 185 186 RB_FOREACH(unp, ac_utree, &mp->mnt_acct.ac_uroot) { 187 for (i=0; i<ACCT_CHUNK_NIDS; i++) { 188 if (unp->uid_chunk[i].space != 0) { 189 item = prop_dictionary_create(); 190 (void) prop_dictionary_set_uint32(item, "uid", 191 (unp->left_bits << ACCT_CHUNK_BITS) + i); 192 (void) prop_dictionary_set_uint64(item, "space used", 193 unp->uid_chunk[i].space); 194 (void) prop_dictionary_set_uint64(item, "limit", 195 unp->uid_chunk[i].limit); 196 prop_array_add_and_rel(dict_out, item); 197 } 198 } 199 } 200 201 RB_FOREACH(gnp, ac_gtree, &mp->mnt_acct.ac_groot) { 202 for (i=0; i<ACCT_CHUNK_NIDS; i++) { 203 if (gnp->gid_chunk[i].space != 0) { 204 item = prop_dictionary_create(); 205 (void) prop_dictionary_set_uint32(item, "gid", 206 (gnp->left_bits << ACCT_CHUNK_BITS) + i); 207 (void) prop_dictionary_set_uint64(item, "space used", 208 gnp->gid_chunk[i].space); 209 (void) prop_dictionary_set_uint64(item, "limit", 210 gnp->gid_chunk[i].limit); 211 prop_array_add_and_rel(dict_out, item); 212 } 213 } 214 } 215 } 216 217 static int 218 cmd_set_usage_all(struct mount *mp, prop_array_t args) 219 { 220 struct ac_unode ufind, *unp; 221 struct ac_gnode gfind, *gnp; 222 prop_dictionary_t item; 223 prop_object_iterator_t iter; 224 uint32_t id; 225 uint64_t space; 226 227 spin_lock(&mp->mnt_acct.ac_spin); 228 /* 0. zero all statistics */ 229 /* we don't bother to free up memory, most of it would probably be 230 * re-allocated immediately anyway. just bzeroing the existing nodes 231 * is fine */ 232 mp->mnt_acct.ac_bytes = 0; 233 RB_FOREACH(unp, ac_utree, &mp->mnt_acct.ac_uroot) { 234 bzero(&unp->uid_chunk, sizeof(unp->uid_chunk)); 235 } 236 RB_FOREACH(gnp, ac_gtree, &mp->mnt_acct.ac_groot) { 237 bzero(&gnp->gid_chunk, sizeof(gnp->gid_chunk)); 238 } 239 240 /* args contains an array of dict */ 241 iter = prop_array_iterator(args); 242 if (iter == NULL) { 243 kprintf("cmd_set_usage_all(): failed to create iterator\n"); 244 spin_unlock(&mp->mnt_acct.ac_spin); 245 return 1; 246 } 247 while ((item = prop_object_iterator_next(iter)) != NULL) { 248 prop_dictionary_get_uint64(item, "space used", &space); 249 if (prop_dictionary_get_uint32(item, "uid", &id)) { 250 ufind.left_bits = (id >> ACCT_CHUNK_BITS); 251 unp = RB_FIND(ac_utree, &mp->mnt_acct.ac_uroot, &ufind); 252 if (unp == NULL) 253 unp = unode_insert(mp, id); 254 unp->uid_chunk[(id & ACCT_CHUNK_MASK)].space = space; 255 } else if (prop_dictionary_get_uint32(item, "gid", &id)) { 256 gfind.left_bits = (id >> ACCT_CHUNK_BITS); 257 gnp = RB_FIND(ac_gtree, &mp->mnt_acct.ac_groot, &gfind); 258 if (gnp == NULL) 259 gnp = gnode_insert(mp, id); 260 gnp->gid_chunk[(id & ACCT_CHUNK_MASK)].space = space; 261 } else { 262 mp->mnt_acct.ac_bytes = space; 263 } 264 } 265 prop_object_iterator_release(iter); 266 267 spin_unlock(&mp->mnt_acct.ac_spin); 268 return 0; 269 } 270 271 static int 272 cmd_set_limit(struct mount *mp, prop_dictionary_t args) 273 { 274 uint64_t limit; 275 276 prop_dictionary_get_uint64(args, "limit", &limit); 277 278 spin_lock(&mp->mnt_acct.ac_spin); 279 mp->mnt_acct.ac_limit = limit; 280 spin_unlock(&mp->mnt_acct.ac_spin); 281 282 return 0; 283 } 284 285 static int 286 cmd_set_limit_uid(struct mount *mp, prop_dictionary_t args) 287 { 288 uint64_t limit; 289 uid_t uid; 290 struct ac_unode ufind, *unp; 291 292 prop_dictionary_get_uint32(args, "uid", &uid); 293 prop_dictionary_get_uint64(args, "limit", &limit); 294 295 ufind.left_bits = (uid >> ACCT_CHUNK_BITS); 296 297 spin_lock(&mp->mnt_acct.ac_spin); 298 if ((unp = RB_FIND(ac_utree, &mp->mnt_acct.ac_uroot, &ufind)) == NULL) 299 unp = unode_insert(mp, uid); 300 unp->uid_chunk[(uid & ACCT_CHUNK_MASK)].limit = limit; 301 spin_unlock(&mp->mnt_acct.ac_spin); 302 303 return 0; 304 } 305 306 static int 307 cmd_set_limit_gid(struct mount *mp, prop_dictionary_t args) 308 { 309 uint64_t limit; 310 gid_t gid; 311 struct ac_gnode gfind, *gnp; 312 313 prop_dictionary_get_uint32(args, "gid", &gid); 314 prop_dictionary_get_uint64(args, "limit", &limit); 315 316 gfind.left_bits = (gid >> ACCT_CHUNK_BITS); 317 318 spin_lock(&mp->mnt_acct.ac_spin); 319 if ((gnp = RB_FIND(ac_gtree, &mp->mnt_acct.ac_groot, &gfind)) == NULL) 320 gnp = gnode_insert(mp, gid); 321 gnp->gid_chunk[(gid & ACCT_CHUNK_MASK)].limit = limit; 322 spin_unlock(&mp->mnt_acct.ac_spin); 323 324 return 0; 325 } 326 327 int 328 sys_vquotactl(struct vquotactl_args *vqa) 329 /* const char *path, struct plistref *pref */ 330 { 331 struct nchandle nch; 332 const char *path; 333 struct plistref pref; 334 prop_dictionary_t dict; 335 prop_object_t args; 336 char *cmd; 337 prop_array_t pa_out; 338 struct nlookupdata nd; 339 int error; 340 341 if (!vfs_quota_enabled) 342 return EOPNOTSUPP; 343 path = vqa->path; 344 error = copyin(vqa->pref, &pref, sizeof(pref)); 345 error = prop_dictionary_copyin(&pref, &dict); 346 if (error) 347 return(error); 348 349 /* we have a path, get its mount point */ 350 error = nlookup_init(&nd, path, UIO_USERSPACE, 0); 351 if (error) 352 return (error); 353 error = nlookup(&nd); 354 if (error) 355 return (error); 356 nch = nd.nl_nch; 357 cache_zero(&nd.nl_nch); 358 nlookup_done(&nd); 359 360 /* get the command */ 361 if (prop_dictionary_get_cstring(dict, "command", &cmd) == 0) { 362 kprintf("sys_vquotactl(): couldn't get command\n"); 363 cache_put(&nch); 364 return EINVAL; 365 } 366 args = prop_dictionary_get(dict, "arguments"); 367 if (args == NULL) { 368 kprintf("couldn't get arguments\n"); 369 cache_put(&nch); 370 return EINVAL; 371 } 372 373 pa_out = prop_array_create(); 374 if (pa_out == NULL) { 375 cache_put(&nch); 376 return ENOMEM; 377 } 378 379 if (strcmp(cmd, "get usage all") == 0) { 380 cmd_get_usage_all(nch.mount, pa_out); 381 goto done; 382 } 383 if (strcmp(cmd, "set usage all") == 0) { 384 error = cmd_set_usage_all(nch.mount, args); 385 goto done; 386 } 387 if (strcmp(cmd, "set limit") == 0) { 388 error = cmd_set_limit(nch.mount, args); 389 goto done; 390 } 391 if (strcmp(cmd, "set limit uid") == 0) { 392 error = cmd_set_limit_uid(nch.mount, args); 393 goto done; 394 } 395 if (strcmp(cmd, "set limit gid") == 0) { 396 error = cmd_set_limit_gid(nch.mount, args); 397 goto done; 398 } 399 cache_put(&nch); 400 return EINVAL; 401 402 done: 403 /* kernel to userland */ 404 dict = prop_dictionary_create(); 405 error = prop_dictionary_set(dict, "returned data", pa_out); 406 407 error = prop_dictionary_copyout(&pref, dict); 408 error = copyout(&pref, vqa->pref, sizeof(pref)); 409 cache_put(&nch); 410 411 return error; 412 } 413 414 /* 415 * Returns a valid mount point for accounting purposes 416 * We cannot simply use vp->v_mount if the vnode belongs 417 * to a PFS mount point 418 */ 419 struct mount* 420 vq_vptomp(struct vnode *vp) 421 { 422 /* XXX: vp->v_pfsmp may point to a freed structure 423 * we use mountlist_exists() to check if it is valid 424 * before using it */ 425 if ((vp->v_pfsmp != NULL) && (mountlist_exists(vp->v_pfsmp))) { 426 /* This is a PFS, use a copy of the real mp */ 427 return vp->v_pfsmp; 428 } else { 429 /* Not a PFS or a PFS beeing unmounted */ 430 return vp->v_mount; 431 } 432 } 433 434 int 435 vq_write_ok(struct mount *mp, uid_t uid, gid_t gid, uint64_t delta) 436 { 437 int rv = 1; 438 struct ac_unode ufind, *unp; 439 struct ac_gnode gfind, *gnp; 440 uint64_t space, limit; 441 442 spin_lock(&mp->mnt_acct.ac_spin); 443 444 if (mp->mnt_acct.ac_limit == 0) 445 goto check_uid; 446 if ((mp->mnt_acct.ac_bytes + delta) > mp->mnt_acct.ac_limit) { 447 rv = 0; 448 goto done; 449 } 450 451 check_uid: 452 ufind.left_bits = (uid >> ACCT_CHUNK_BITS); 453 if ((unp = RB_FIND(ac_utree, &mp->mnt_acct.ac_uroot, &ufind)) == NULL) { 454 space = 0; 455 limit = 0; 456 } else { 457 space = unp->uid_chunk[(uid & ACCT_CHUNK_MASK)].space; 458 limit = unp->uid_chunk[(uid & ACCT_CHUNK_MASK)].limit; 459 } 460 if (limit == 0) 461 goto check_gid; 462 if ((space + delta) > limit) { 463 rv = 0; 464 goto done; 465 } 466 467 check_gid: 468 gfind.left_bits = (gid >> ACCT_CHUNK_BITS); 469 if ((gnp = RB_FIND(ac_gtree, &mp->mnt_acct.ac_groot, &gfind)) == NULL) { 470 space = 0; 471 limit = 0; 472 } else { 473 space = gnp->gid_chunk[(gid & ACCT_CHUNK_MASK)].space; 474 limit = gnp->gid_chunk[(gid & ACCT_CHUNK_MASK)].limit; 475 } 476 if (limit == 0) 477 goto done; 478 if ((space + delta) > limit) 479 rv = 0; 480 481 done: 482 spin_unlock(&mp->mnt_acct.ac_spin); 483 return rv; 484 } 485