1 /* 2 * Copyright (c) 2011,2012 François Tigeot <ftigeot@wolpond.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in 13 * the documentation and/or other materials provided with the 14 * distribution. 15 * 3. Neither the name of The DragonFly Project nor the names of its 16 * contributors may be used to endorse or promote products derived 17 * from this software without specific, prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 27 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 29 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 #include <sys/sysctl.h> 34 #include <sys/mount.h> 35 #include <sys/systm.h> 36 #include <sys/nlookup.h> 37 #include <sys/vnode.h> 38 #include <sys/stat.h> 39 #include <sys/vfs_quota.h> 40 #include <sys/spinlock.h> 41 #include <sys/spinlock2.h> 42 #include <inttypes.h> 43 44 #include <sys/sysproto.h> 45 #include <libprop/proplib.h> 46 #include <libprop/prop_dictionary.h> 47 48 /* in-memory accounting, red-black tree based */ 49 /* FIXME: code duplication caused by uid_t / gid_t differences */ 50 RB_PROTOTYPE(ac_utree, ac_unode, rb_entry, rb_ac_unode_cmp); 51 RB_PROTOTYPE(ac_gtree, ac_gnode, rb_entry, rb_ac_gnode_cmp); 52 53 static int 54 rb_ac_unode_cmp(struct ac_unode *a, struct ac_unode *b); 55 static int 56 rb_ac_gnode_cmp(struct ac_gnode *a, struct ac_gnode *b); 57 58 RB_GENERATE(ac_utree, ac_unode, rb_entry, rb_ac_unode_cmp); 59 RB_GENERATE(ac_gtree, ac_gnode, rb_entry, rb_ac_gnode_cmp); 60 61 struct ac_unode* unode_insert(struct mount*, uid_t); 62 struct ac_gnode* gnode_insert(struct mount*, gid_t); 63 64 static int 65 rb_ac_unode_cmp(struct ac_unode *a, struct ac_unode *b) 66 { 67 if (a->left_bits < b->left_bits) 68 return(-1); 69 else if (a->left_bits > b->left_bits) 70 return(1); 71 return(0); 72 } 73 74 static int 75 rb_ac_gnode_cmp(struct ac_gnode *a, struct ac_gnode *b) 76 { 77 if (a->left_bits < b->left_bits) 78 return(-1); 79 else if (a->left_bits > b->left_bits) 80 return(1); 81 return(0); 82 } 83 84 struct ac_unode* 85 unode_insert(struct mount *mp, uid_t uid) 86 { 87 struct ac_unode *unp, *res; 88 89 unp = kmalloc(sizeof(struct ac_unode), M_MOUNT, M_ZERO | M_WAITOK); 90 91 unp->left_bits = (uid >> ACCT_CHUNK_BITS); 92 res = RB_INSERT(ac_utree, &mp->mnt_acct.ac_uroot, unp); 93 KASSERT(res == NULL, ("unode_insert(): RB_INSERT didn't return NULL")); 94 95 return unp; 96 } 97 98 struct ac_gnode* 99 gnode_insert(struct mount *mp, gid_t gid) 100 { 101 struct ac_gnode *gnp, *res; 102 103 gnp = kmalloc(sizeof(struct ac_gnode), M_MOUNT, M_ZERO | M_WAITOK); 104 105 gnp->left_bits = (gid >> ACCT_CHUNK_BITS); 106 res = RB_INSERT(ac_gtree, &mp->mnt_acct.ac_groot, gnp); 107 KASSERT(res == NULL, ("gnode_insert(): RB_INSERT didn't return NULL")); 108 109 return gnp; 110 } 111 112 int vfs_quota_enabled = 0; 113 TUNABLE_INT("vfs.quota_enabled", &vfs_quota_enabled); 114 SYSCTL_INT(_vfs, OID_AUTO, quota_enabled, CTLFLAG_RD, 115 &vfs_quota_enabled, 0, "Enable VFS quota"); 116 117 /* initializes per mount-point data structures */ 118 void 119 vq_init(struct mount *mp) 120 { 121 122 if (!vfs_quota_enabled) 123 return; 124 125 /* initialize the rb trees */ 126 RB_INIT(&mp->mnt_acct.ac_uroot); 127 RB_INIT(&mp->mnt_acct.ac_groot); 128 spin_init(&mp->mnt_acct.ac_spin); 129 130 mp->mnt_acct.ac_bytes = 0; 131 132 /* enable data collection */ 133 mp->mnt_op->vfs_account = vfs_stdaccount; 134 /* mark this filesystem quota enabled */ 135 mp->mnt_flag |= MNT_QUOTA; 136 if (bootverbose) 137 kprintf("vfs accounting enabled for %s\n", 138 mp->mnt_stat.f_mntonname); 139 } 140 141 142 void 143 vq_done(struct mount *mp) 144 { 145 /* TODO: remove the rb trees here */ 146 } 147 148 void 149 vfs_stdaccount(struct mount *mp, uid_t uid, gid_t gid, int64_t delta) 150 { 151 struct ac_unode ufind, *unp; 152 struct ac_gnode gfind, *gnp; 153 154 /* find or create address of chunk */ 155 ufind.left_bits = (uid >> ACCT_CHUNK_BITS); 156 gfind.left_bits = (gid >> ACCT_CHUNK_BITS); 157 158 spin_lock(&mp->mnt_acct.ac_spin); 159 160 mp->mnt_acct.ac_bytes += delta; 161 162 if ((unp = RB_FIND(ac_utree, &mp->mnt_acct.ac_uroot, &ufind)) == NULL) 163 unp = unode_insert(mp, uid); 164 if ((gnp = RB_FIND(ac_gtree, &mp->mnt_acct.ac_groot, &gfind)) == NULL) 165 gnp = gnode_insert(mp, gid); 166 167 /* update existing chunk */ 168 unp->uid_chunk[(uid & ACCT_CHUNK_MASK)].space += delta; 169 gnp->gid_chunk[(gid & ACCT_CHUNK_MASK)].space += delta; 170 171 spin_unlock(&mp->mnt_acct.ac_spin); 172 } 173 174 static void 175 cmd_get_usage_all(struct mount *mp, prop_array_t dict_out) 176 { 177 struct ac_unode *unp; 178 struct ac_gnode *gnp; 179 int i; 180 prop_dictionary_t item; 181 182 item = prop_dictionary_create(); 183 (void) prop_dictionary_set_uint64(item, "space used", mp->mnt_acct.ac_bytes); 184 (void) prop_dictionary_set_uint64(item, "limit", mp->mnt_acct.ac_limit); 185 prop_array_add_and_rel(dict_out, item); 186 187 RB_FOREACH(unp, ac_utree, &mp->mnt_acct.ac_uroot) { 188 for (i=0; i<ACCT_CHUNK_NIDS; i++) { 189 if (unp->uid_chunk[i].space != 0) { 190 item = prop_dictionary_create(); 191 (void) prop_dictionary_set_uint32(item, "uid", 192 (unp->left_bits << ACCT_CHUNK_BITS) + i); 193 (void) prop_dictionary_set_uint64(item, "space used", 194 unp->uid_chunk[i].space); 195 (void) prop_dictionary_set_uint64(item, "limit", 196 unp->uid_chunk[i].limit); 197 prop_array_add_and_rel(dict_out, item); 198 } 199 } 200 } 201 202 RB_FOREACH(gnp, ac_gtree, &mp->mnt_acct.ac_groot) { 203 for (i=0; i<ACCT_CHUNK_NIDS; i++) { 204 if (gnp->gid_chunk[i].space != 0) { 205 item = prop_dictionary_create(); 206 (void) prop_dictionary_set_uint32(item, "gid", 207 (gnp->left_bits << ACCT_CHUNK_BITS) + i); 208 (void) prop_dictionary_set_uint64(item, "space used", 209 gnp->gid_chunk[i].space); 210 (void) prop_dictionary_set_uint64(item, "limit", 211 gnp->gid_chunk[i].limit); 212 prop_array_add_and_rel(dict_out, item); 213 } 214 } 215 } 216 } 217 218 static int 219 cmd_set_usage_all(struct mount *mp, prop_array_t args) 220 { 221 struct ac_unode ufind, *unp; 222 struct ac_gnode gfind, *gnp; 223 prop_dictionary_t item; 224 prop_object_iterator_t iter; 225 uint32_t id; 226 uint64_t space; 227 228 spin_lock(&mp->mnt_acct.ac_spin); 229 /* 0. zero all statistics */ 230 /* we don't bother to free up memory, most of it would probably be 231 * re-allocated immediately anyway. just bzeroing the existing nodes 232 * is fine */ 233 mp->mnt_acct.ac_bytes = 0; 234 RB_FOREACH(unp, ac_utree, &mp->mnt_acct.ac_uroot) { 235 bzero(&unp->uid_chunk, sizeof(unp->uid_chunk)); 236 } 237 RB_FOREACH(gnp, ac_gtree, &mp->mnt_acct.ac_groot) { 238 bzero(&gnp->gid_chunk, sizeof(gnp->gid_chunk)); 239 } 240 241 /* args contains an array of dict */ 242 iter = prop_array_iterator(args); 243 if (iter == NULL) { 244 kprintf("cmd_set_usage_all(): failed to create iterator\n"); 245 spin_unlock(&mp->mnt_acct.ac_spin); 246 return 1; 247 } 248 while ((item = prop_object_iterator_next(iter)) != NULL) { 249 prop_dictionary_get_uint64(item, "space used", &space); 250 if (prop_dictionary_get_uint32(item, "uid", &id)) { 251 ufind.left_bits = (id >> ACCT_CHUNK_BITS); 252 unp = RB_FIND(ac_utree, &mp->mnt_acct.ac_uroot, &ufind); 253 if (unp == NULL) 254 unp = unode_insert(mp, id); 255 unp->uid_chunk[(id & ACCT_CHUNK_MASK)].space = space; 256 } else if (prop_dictionary_get_uint32(item, "gid", &id)) { 257 gfind.left_bits = (id >> ACCT_CHUNK_BITS); 258 gnp = RB_FIND(ac_gtree, &mp->mnt_acct.ac_groot, &gfind); 259 if (gnp == NULL) 260 gnp = gnode_insert(mp, id); 261 gnp->gid_chunk[(id & ACCT_CHUNK_MASK)].space = space; 262 } else { 263 mp->mnt_acct.ac_bytes = space; 264 } 265 } 266 prop_object_iterator_release(iter); 267 268 spin_unlock(&mp->mnt_acct.ac_spin); 269 return 0; 270 } 271 272 static int 273 cmd_set_limit(struct mount *mp, prop_dictionary_t args) 274 { 275 uint64_t limit; 276 277 prop_dictionary_get_uint64(args, "limit", &limit); 278 279 spin_lock(&mp->mnt_acct.ac_spin); 280 mp->mnt_acct.ac_limit = limit; 281 spin_unlock(&mp->mnt_acct.ac_spin); 282 283 return 0; 284 } 285 286 static int 287 cmd_set_limit_uid(struct mount *mp, prop_dictionary_t args) 288 { 289 uint64_t limit; 290 uid_t uid; 291 struct ac_unode ufind, *unp; 292 293 prop_dictionary_get_uint32(args, "uid", &uid); 294 prop_dictionary_get_uint64(args, "limit", &limit); 295 296 ufind.left_bits = (uid >> ACCT_CHUNK_BITS); 297 298 spin_lock(&mp->mnt_acct.ac_spin); 299 if ((unp = RB_FIND(ac_utree, &mp->mnt_acct.ac_uroot, &ufind)) == NULL) 300 unp = unode_insert(mp, uid); 301 unp->uid_chunk[(uid & ACCT_CHUNK_MASK)].limit = limit; 302 spin_unlock(&mp->mnt_acct.ac_spin); 303 304 return 0; 305 } 306 307 static int 308 cmd_set_limit_gid(struct mount *mp, prop_dictionary_t args) 309 { 310 uint64_t limit; 311 gid_t gid; 312 struct ac_gnode gfind, *gnp; 313 314 prop_dictionary_get_uint32(args, "gid", &gid); 315 prop_dictionary_get_uint64(args, "limit", &limit); 316 317 gfind.left_bits = (gid >> ACCT_CHUNK_BITS); 318 319 spin_lock(&mp->mnt_acct.ac_spin); 320 if ((gnp = RB_FIND(ac_gtree, &mp->mnt_acct.ac_groot, &gfind)) == NULL) 321 gnp = gnode_insert(mp, gid); 322 gnp->gid_chunk[(gid & ACCT_CHUNK_MASK)].limit = limit; 323 spin_unlock(&mp->mnt_acct.ac_spin); 324 325 return 0; 326 } 327 328 int 329 sys_vquotactl(struct vquotactl_args *vqa) 330 /* const char *path, struct plistref *pref */ 331 { 332 const char *path; 333 struct plistref pref; 334 prop_dictionary_t dict; 335 prop_object_t args; 336 char *cmd; 337 338 prop_array_t pa_out; 339 340 struct nlookupdata nd; 341 struct mount *mp; 342 int error; 343 344 if (!vfs_quota_enabled) 345 return EOPNOTSUPP; 346 path = vqa->path; 347 error = copyin(vqa->pref, &pref, sizeof(pref)); 348 error = prop_dictionary_copyin(&pref, &dict); 349 if (error != 0) 350 return(error); 351 352 /* we have a path, get its mount point */ 353 error = nlookup_init(&nd, path, UIO_USERSPACE, 0); 354 if (error != 0) 355 return (error); 356 error = nlookup(&nd); 357 if (error != 0) 358 return (error); 359 mp = nd.nl_nch.mount; 360 nlookup_done(&nd); 361 362 /* get the command */ 363 if (prop_dictionary_get_cstring(dict, "command", &cmd) == 0) { 364 kprintf("sys_vquotactl(): couldn't get command\n"); 365 return EINVAL; 366 } 367 args = prop_dictionary_get(dict, "arguments"); 368 if (args == NULL) { 369 kprintf("couldn't get arguments\n"); 370 return EINVAL; 371 } 372 373 pa_out = prop_array_create(); 374 if (pa_out == NULL) 375 return ENOMEM; 376 377 if (strcmp(cmd, "get usage all") == 0) { 378 cmd_get_usage_all(mp, pa_out); 379 goto done; 380 } 381 if (strcmp(cmd, "set usage all") == 0) { 382 error = cmd_set_usage_all(mp, args); 383 goto done; 384 } 385 if (strcmp(cmd, "set limit") == 0) { 386 error = cmd_set_limit(mp, args); 387 goto done; 388 } 389 if (strcmp(cmd, "set limit uid") == 0) { 390 error = cmd_set_limit_uid(mp, args); 391 goto done; 392 } 393 if (strcmp(cmd, "set limit gid") == 0) { 394 error = cmd_set_limit_gid(mp, args); 395 goto done; 396 } 397 return EINVAL; 398 399 done: 400 /* kernel to userland */ 401 dict = prop_dictionary_create(); 402 error = prop_dictionary_set(dict, "returned data", pa_out); 403 404 error = prop_dictionary_copyout(&pref, dict); 405 error = copyout(&pref, vqa->pref, sizeof(pref)); 406 407 return error; 408 } 409 410 /* 411 * Returns a valid mount point for accounting purposes 412 * We cannot simply use vp->v_mount if the vnode belongs 413 * to a PFS mount point 414 */ 415 struct mount* 416 vq_vptomp(struct vnode *vp) 417 { 418 /* XXX: vp->v_pfsmp may point to a freed structure 419 * we use mountlist_exists() to check if it is valid 420 * before using it */ 421 if ((vp->v_pfsmp != NULL) && (mountlist_exists(vp->v_pfsmp))) { 422 /* This is a PFS, use a copy of the real mp */ 423 return vp->v_pfsmp; 424 } else { 425 /* Not a PFS or a PFS beeing unmounted */ 426 return vp->v_mount; 427 } 428 } 429 430 int 431 vq_write_ok(struct mount *mp, uid_t uid, gid_t gid, uint64_t delta) 432 { 433 int rv = 1; 434 struct ac_unode ufind, *unp; 435 struct ac_gnode gfind, *gnp; 436 uint64_t space, limit; 437 438 spin_lock(&mp->mnt_acct.ac_spin); 439 440 if (mp->mnt_acct.ac_limit == 0) 441 goto check_uid; 442 if ((mp->mnt_acct.ac_bytes + delta) > mp->mnt_acct.ac_limit) { 443 rv = 0; 444 goto done; 445 } 446 447 check_uid: 448 ufind.left_bits = (uid >> ACCT_CHUNK_BITS); 449 if ((unp = RB_FIND(ac_utree, &mp->mnt_acct.ac_uroot, &ufind)) == NULL) { 450 space = 0; 451 limit = 0; 452 } else { 453 space = unp->uid_chunk[(uid & ACCT_CHUNK_MASK)].space; 454 limit = unp->uid_chunk[(uid & ACCT_CHUNK_MASK)].limit; 455 } 456 if (limit == 0) 457 goto check_gid; 458 if ((space + delta) > limit) { 459 rv = 0; 460 goto done; 461 } 462 463 check_gid: 464 gfind.left_bits = (gid >> ACCT_CHUNK_BITS); 465 if ((gnp = RB_FIND(ac_gtree, &mp->mnt_acct.ac_groot, &gfind)) == NULL) { 466 space = 0; 467 limit = 0; 468 } else { 469 space = gnp->gid_chunk[(gid & ACCT_CHUNK_MASK)].space; 470 limit = gnp->gid_chunk[(gid & ACCT_CHUNK_MASK)].limit; 471 } 472 if (limit == 0) 473 goto done; 474 if ((space + delta) > limit) 475 rv = 0; 476 477 done: 478 spin_unlock(&mp->mnt_acct.ac_spin); 479 return rv; 480 } 481