1 /* 2 * Copyright (c) 2011 François Tigeot <ftigeot@wolpond.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in 13 * the documentation and/or other materials provided with the 14 * distribution. 15 * 3. Neither the name of The DragonFly Project nor the names of its 16 * contributors may be used to endorse or promote products derived 17 * from this software without specific, prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 27 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 29 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 #include <sys/sysctl.h> 34 #include <sys/mount.h> 35 #include <sys/systm.h> 36 #include <sys/nlookup.h> 37 #include <sys/vnode.h> 38 #include <sys/stat.h> 39 #include <sys/vfs_quota.h> 40 #include <sys/spinlock.h> 41 #include <sys/spinlock2.h> 42 #include <inttypes.h> 43 44 #include <sys/sysproto.h> 45 #include <libprop/proplib.h> 46 #include <libprop/prop_dictionary.h> 47 48 /* in-memory accounting, red-black tree based */ 49 /* FIXME: code duplication caused by uid_t / gid_t differences */ 50 RB_PROTOTYPE(ac_utree, ac_unode, rb_entry, rb_ac_unode_cmp); 51 RB_PROTOTYPE(ac_gtree, ac_gnode, rb_entry, rb_ac_gnode_cmp); 52 53 static int 54 rb_ac_unode_cmp(struct ac_unode *a, struct ac_unode *b); 55 static int 56 rb_ac_gnode_cmp(struct ac_gnode *a, struct ac_gnode *b); 57 58 RB_GENERATE(ac_utree, ac_unode, rb_entry, rb_ac_unode_cmp); 59 RB_GENERATE(ac_gtree, ac_gnode, rb_entry, rb_ac_gnode_cmp); 60 61 struct ac_unode* unode_insert(struct mount*, uid_t); 62 struct ac_gnode* gnode_insert(struct mount*, gid_t); 63 64 static int 65 rb_ac_unode_cmp(struct ac_unode *a, struct ac_unode *b) 66 { 67 if (a->left_bits < b->left_bits) 68 return(-1); 69 else if (a->left_bits > b->left_bits) 70 return(1); 71 return(0); 72 } 73 74 static int 75 rb_ac_gnode_cmp(struct ac_gnode *a, struct ac_gnode *b) 76 { 77 if (a->left_bits < b->left_bits) 78 return(-1); 79 else if (a->left_bits > b->left_bits) 80 return(1); 81 return(0); 82 } 83 84 struct ac_unode* 85 unode_insert(struct mount *mp, uid_t uid) 86 { 87 struct ac_unode *unp, *res; 88 89 unp = kmalloc(sizeof(struct ac_unode), M_MOUNT, M_ZERO | M_WAITOK); 90 91 unp->left_bits = (uid >> ACCT_CHUNK_BITS); 92 res = RB_INSERT(ac_utree, &mp->mnt_acct.ac_uroot, unp); 93 KASSERT(res == NULL, ("unode_insert(): RB_INSERT didn't return NULL\n")); 94 95 return unp; 96 } 97 98 struct ac_gnode* 99 gnode_insert(struct mount *mp, gid_t gid) 100 { 101 struct ac_gnode *gnp, *res; 102 103 gnp = kmalloc(sizeof(struct ac_gnode), M_MOUNT, M_ZERO | M_WAITOK); 104 105 gnp->left_bits = (gid >> ACCT_CHUNK_BITS); 106 res = RB_INSERT(ac_gtree, &mp->mnt_acct.ac_groot, gnp); 107 KASSERT(res == NULL, ("gnode_insert(): RB_INSERT didn't return NULL\n")); 108 109 return gnp; 110 } 111 112 static int vfs_accounting_enabled = 0; /* global vfs accounting enable */ 113 TUNABLE_INT("vfs.accounting_enabled", &vfs_accounting_enabled); 114 SYSCTL_INT(_vfs, OID_AUTO, accounting_enabled, CTLFLAG_RD, 115 &vfs_accounting_enabled, 0, "Enable VFS accounting"); 116 117 /* initializes global accounting data */ 118 void 119 vq_init(struct mount *mp) 120 { 121 122 if (!vfs_accounting_enabled) 123 return; 124 125 /* initialize the rb trees */ 126 RB_INIT(&mp->mnt_acct.ac_uroot); 127 RB_INIT(&mp->mnt_acct.ac_groot); 128 spin_init(&mp->mnt_acct.ac_spin); 129 130 mp->mnt_acct.ac_bytes = 0; 131 132 /* enable data collection */ 133 mp->mnt_op->vfs_account = vfs_stdaccount; 134 /* mark this filesystem as having accounting enabled */ 135 mp->mnt_flag |= MNT_ACCOUNTING; 136 if (bootverbose) 137 kprintf("vfs accounting enabled for %s\n", 138 mp->mnt_stat.f_mntonname); 139 } 140 141 142 void 143 vq_done(struct mount *mp) 144 { 145 /* TODO: remove the rb trees here */ 146 } 147 148 void 149 vfs_stdaccount(struct mount *mp, uid_t uid, gid_t gid, int64_t delta) 150 { 151 struct ac_unode ufind, *unp; 152 struct ac_gnode gfind, *gnp; 153 154 /* find or create address of chunk */ 155 ufind.left_bits = (uid >> ACCT_CHUNK_BITS); 156 gfind.left_bits = (gid >> ACCT_CHUNK_BITS); 157 158 spin_lock(&mp->mnt_acct.ac_spin); 159 160 mp->mnt_acct.ac_bytes += delta; 161 162 if ((unp = RB_FIND(ac_utree, &mp->mnt_acct.ac_uroot, &ufind)) == NULL) 163 unp = unode_insert(mp, uid); 164 if ((gnp = RB_FIND(ac_gtree, &mp->mnt_acct.ac_groot, &gfind)) == NULL) 165 gnp = gnode_insert(mp, gid); 166 167 /* update existing chunk */ 168 unp->uid_chunk[(uid & ACCT_CHUNK_MASK)] += delta; 169 gnp->gid_chunk[(gid & ACCT_CHUNK_MASK)] += delta; 170 171 spin_unlock(&mp->mnt_acct.ac_spin); 172 } 173 174 static void 175 cmd_get_usage_all(struct mount *mp, prop_array_t dict_out) 176 { 177 struct ac_unode *unp; 178 struct ac_gnode *gnp; 179 int i; 180 prop_dictionary_t item; 181 182 item = prop_dictionary_create(); 183 (void) prop_dictionary_set_uint64(item, "space used", mp->mnt_acct.ac_bytes); 184 prop_array_add_and_rel(dict_out, item); 185 186 RB_FOREACH(unp, ac_utree, &mp->mnt_acct.ac_uroot) { 187 for (i=0; i<ACCT_CHUNK_NIDS; i++) { 188 if (unp->uid_chunk[i] != 0) { 189 item = prop_dictionary_create(); 190 (void) prop_dictionary_set_uint32(item, "uid", 191 (unp->left_bits << ACCT_CHUNK_BITS) + i); 192 (void) prop_dictionary_set_uint64(item, "space used", 193 unp->uid_chunk[i]); 194 prop_array_add_and_rel(dict_out, item); 195 } 196 } 197 } 198 199 RB_FOREACH(gnp, ac_gtree, &mp->mnt_acct.ac_groot) { 200 for (i=0; i<ACCT_CHUNK_NIDS; i++) { 201 if (gnp->gid_chunk[i] != 0) { 202 item = prop_dictionary_create(); 203 (void) prop_dictionary_set_uint32(item, "gid", 204 (gnp->left_bits << ACCT_CHUNK_BITS) + i); 205 (void) prop_dictionary_set_uint64(item, "space used", 206 gnp->gid_chunk[i]); 207 prop_array_add_and_rel(dict_out, item); 208 } 209 } 210 } 211 } 212 213 static int 214 cmd_set_usage_all(struct mount *mp, prop_array_t args) 215 { 216 struct ac_unode ufind, *unp; 217 struct ac_gnode gfind, *gnp; 218 prop_dictionary_t item; 219 prop_object_iterator_t iter; 220 uint32_t id; 221 uint64_t space; 222 223 spin_lock(&mp->mnt_acct.ac_spin); 224 /* 0. zero all statistics */ 225 /* we don't bother to free up memory, most of it would probably be 226 * re-allocated immediately anyway. just bzeroing the existing nodes 227 * is fine */ 228 mp->mnt_acct.ac_bytes = 0; 229 RB_FOREACH(unp, ac_utree, &mp->mnt_acct.ac_uroot) { 230 bzero(&unp->uid_chunk, sizeof(unp->uid_chunk)); 231 } 232 RB_FOREACH(gnp, ac_gtree, &mp->mnt_acct.ac_groot) { 233 bzero(&gnp->gid_chunk, sizeof(gnp->gid_chunk)); 234 } 235 236 /* args contains an array of dict */ 237 iter = prop_array_iterator(args); 238 if (iter == NULL) { 239 kprintf("cmd_set_usage_all(): failed to create iterator\n"); 240 spin_unlock(&mp->mnt_acct.ac_spin); 241 return 1; 242 } 243 while ((item = prop_object_iterator_next(iter)) != NULL) { 244 prop_dictionary_get_uint64(item, "space used", &space); 245 if (prop_dictionary_get_uint32(item, "uid", &id)) { 246 ufind.left_bits = (id >> ACCT_CHUNK_BITS); 247 unp = RB_FIND(ac_utree, &mp->mnt_acct.ac_uroot, &ufind); 248 if (unp == NULL) 249 unp = unode_insert(mp, id); 250 unp->uid_chunk[(id & ACCT_CHUNK_MASK)] = space; 251 } else if (prop_dictionary_get_uint32(item, "gid", &id)) { 252 gfind.left_bits = (id >> ACCT_CHUNK_BITS); 253 gnp = RB_FIND(ac_gtree, &mp->mnt_acct.ac_groot, &gfind); 254 if (gnp == NULL) 255 gnp = gnode_insert(mp, id); 256 gnp->gid_chunk[(id & ACCT_CHUNK_MASK)] = space; 257 } else { 258 mp->mnt_acct.ac_bytes = space; 259 } 260 } 261 prop_object_iterator_release(iter); 262 263 spin_unlock(&mp->mnt_acct.ac_spin); 264 return 0; 265 } 266 267 int 268 sys_vquotactl(struct vquotactl_args *vqa) 269 /* const char *path, struct plistref *pref */ 270 { 271 const char *path; 272 struct plistref pref; 273 prop_dictionary_t dict; 274 prop_object_t args; 275 char *cmd; 276 277 prop_array_t pa_out; 278 279 struct nlookupdata nd; 280 struct mount *mp; 281 int error; 282 283 path = vqa->path; 284 error = copyin(vqa->pref, &pref, sizeof(pref)); 285 error = prop_dictionary_copyin(&pref, &dict); 286 if (error != 0) 287 return(error); 288 289 /* we have a path, get its mount point */ 290 error = nlookup_init(&nd, path, UIO_USERSPACE, 0); 291 if (error != 0) 292 return (error); 293 error = nlookup(&nd); 294 if (error != 0) 295 return (error); 296 mp = nd.nl_nch.mount; 297 nlookup_done(&nd); 298 299 /* get the command */ 300 if (prop_dictionary_get_cstring(dict, "command", &cmd) == 0) { 301 kprintf("sys_vquotactl(): couldn't get command\n"); 302 return EINVAL; 303 } 304 args = prop_dictionary_get(dict, "arguments"); 305 if (args == NULL) { 306 kprintf("couldn't get arguments\n"); 307 return EINVAL; 308 } 309 310 pa_out = prop_array_create(); 311 if (pa_out == NULL) 312 return ENOMEM; 313 314 if (strcmp(cmd, "get usage all") == 0) { 315 cmd_get_usage_all(mp, pa_out); 316 goto done; 317 } 318 if (strcmp(cmd, "set usage all") == 0) { 319 error = cmd_set_usage_all(mp, args); 320 goto done; 321 } 322 return EINVAL; 323 324 done: 325 /* kernel to userland */ 326 dict = prop_dictionary_create(); 327 error = prop_dictionary_set(dict, "returned data", pa_out); 328 329 error = prop_dictionary_copyout(&pref, dict); 330 error = copyout(&pref, vqa->pref, sizeof(pref)); 331 332 return error; 333 } 334