1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * Copyright (c) 2016 by Delphix. All rights reserved. 25 */ 26 27 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 /* 31 * University Copyright- Copyright (c) 1982, 1986, 1988 32 * The Regents of the University of California 33 * All Rights Reserved 34 * 35 * University Acknowledgment- Portions of this document are derived from 36 * software developed by the University of California, Berkeley, and its 37 * contributors. 38 */ 39 40 41 #pragma ident "%Z%%M% %I% %E% SMI" 42 43 /* 44 * Routines used in checking limits on file system usage. 45 */ 46 47 #include <sys/types.h> 48 #include <sys/t_lock.h> 49 #include <sys/param.h> 50 #include <sys/time.h> 51 #include <sys/systm.h> 52 #include <sys/kmem.h> 53 #include <sys/signal.h> 54 #include <sys/cred.h> 55 #include <sys/proc.h> 56 #include <sys/user.h> 57 #include <sys/proc.h> 58 #include <sys/vfs.h> 59 #include <sys/vnode.h> 60 #include <sys/buf.h> 61 #include <sys/uio.h> 62 #include <sys/fs/ufs_inode.h> 63 #include <sys/fs/ufs_fs.h> 64 #include <sys/fs/ufs_quota.h> 65 #include <sys/errno.h> 66 #include <sys/cmn_err.h> 67 #include <sys/session.h> 68 #include <sys/debug.h> 69 70 /* 71 * Find the dquot structure that should 72 * be used in checking i/o on inode ip. 73 */ 74 struct dquot * 75 getinoquota(struct inode *ip) 76 { 77 struct dquot *dqp, *xdqp; 78 struct ufsvfs *ufsvfsp = ip->i_ufsvfs; 79 80 ASSERT(RW_LOCK_HELD(&ufsvfsp->vfs_dqrwlock)); 81 ASSERT(RW_WRITE_HELD(&ip->i_contents)); 82 /* 83 * Check for quotas enabled. 84 */ 85 if ((ufsvfsp->vfs_qflags & MQ_ENABLED) == 0) { 86 return (NULL); 87 } 88 89 /* 90 * Check for someone doing I/O to quota file. 91 */ 92 if (ip == ufsvfsp->vfs_qinod) { 93 return (NULL); 94 } 95 96 /* 97 * Check for a legal inode, e.g. not a shadow inode, 98 * not a extended attribute directory inode and a valid mode. 99 */ 100 ASSERT((ip->i_mode & IFMT) != IFSHAD); 101 ASSERT((ip->i_mode & IFMT) != IFATTRDIR); 102 ASSERT(ip->i_mode); 103 104 if (getdiskquota((uid_t)ip->i_uid, ufsvfsp, 0, &xdqp)) { 105 return (NULL); 106 } 107 dqp = xdqp; 108 mutex_enter(&dqp->dq_lock); 109 ASSERT(ip->i_uid == dqp->dq_uid); 110 111 if (dqp->dq_fhardlimit == 0 && dqp->dq_fsoftlimit == 0 && 112 dqp->dq_bhardlimit == 0 && dqp->dq_bsoftlimit == 0) { 113 dqput(dqp); 114 mutex_exit(&dqp->dq_lock); 115 dqp = NULL; 116 } else { 117 mutex_exit(&dqp->dq_lock); 118 } 119 return (dqp); 120 } 121 122 /* 123 * Update disk usage, and take corrective action. 124 */ 125 int 126 chkdq(struct inode *ip, long change, int force, struct cred *cr, 127 char **uerrp, size_t *lenp) 128 { 129 struct dquot *dqp; 130 uint64_t ncurblocks; 131 struct ufsvfs *ufsvfsp = ip->i_ufsvfs; 132 int error = 0; 133 long abs_change; 134 char *msg1 = 135 "!quota_ufs: over hard disk limit (pid %d, uid %d, inum %d, fs %s)\n"; 136 char *msg2 = 137 "!quota_ufs: Warning: over disk limit (pid %d, uid %d, inum %d, fs %s)\n"; 138 char *msg3 = 139 "!quota_ufs: over disk and time limit (pid %d, uid %d, inum %d, fs %s)\n"; 140 char *msg4 = 141 "!quota_ufs: Warning: quota overflow (pid %d, uid %d, inum %d, fs %s)\n"; 142 char *errmsg = NULL; 143 time_t now; 144 145 /* 146 * Shadow inodes do not need to hold the vfs_dqrwlock lock. 147 */ 148 ASSERT((ip->i_mode & IFMT) == IFSHAD || 149 RW_LOCK_HELD(&ufsvfsp->vfs_dqrwlock)); 150 ASSERT(RW_WRITE_HELD(&ip->i_contents)); 151 152 if (change == 0) 153 return (0); 154 dqp = ip->i_dquot; 155 156 /* 157 * Make sure the quota info record matches the owner. 158 */ 159 ASSERT(dqp == NULL || ip->i_uid == dqp->dq_uid); 160 161 #ifdef DEBUG 162 /* 163 * Shadow inodes and extended attribute directories 164 * should not have quota info records. 165 */ 166 if ((ip->i_mode & IFMT) == IFSHAD || (ip->i_mode & IFMT) == IFATTRDIR) { 167 ASSERT(dqp == NULL); 168 } 169 /* 170 * Paranoia for verifying that quotas are okay. 171 */ 172 else { 173 struct dquot *expect_dq; 174 int mismatch_ok = 0; 175 176 /* Get current quota information */ 177 expect_dq = getinoquota(ip); 178 /* 179 * We got NULL back from getinoquota(), but there is 180 * no error code return from that interface and some 181 * errors are "ok" because we may be testing via error 182 * injection. If this is not the quota inode then we 183 * use getdiskquota() to see if there is an error and 184 * if the error is ok. 185 */ 186 if (expect_dq == NULL && ip != ufsvfsp->vfs_qinod) { 187 int error; 188 struct dquot *xdqp; 189 190 error = getdiskquota((uid_t)ip->i_uid, ufsvfsp, 0, 191 &xdqp); 192 switch (error) { 193 /* 194 * Either the error was transient or the quota 195 * info record has no limits which gets optimized 196 * out by getinoquota(). 197 */ 198 case 0: 199 if (xdqp->dq_fhardlimit == 0 && 200 xdqp->dq_fsoftlimit == 0 && 201 xdqp->dq_bhardlimit == 0 && 202 xdqp->dq_bsoftlimit == 0) { 203 mutex_enter(&xdqp->dq_lock); 204 dqput(xdqp); 205 mutex_exit(&xdqp->dq_lock); 206 } else { 207 expect_dq = xdqp; 208 } 209 break; 210 211 case ESRCH: /* quotas are not enabled */ 212 case EINVAL: /* error flag set on cached record */ 213 case EUSERS: /* quota table is full */ 214 case EIO: /* I/O error */ 215 mismatch_ok = 1; 216 break; 217 } 218 } 219 220 /* 221 * Make sure dqp and the current quota info agree. 222 * The first part of the #ifndef is the quick way to 223 * do the check and should be part of the standard 224 * DEBUG code. The #else part is useful if you are 225 * actually chasing an inconsistency and don't want 226 * to have to look at stack frames to figure which 227 * variable has what value. 228 */ 229 #ifndef CHASE_QUOTA 230 ASSERT(mismatch_ok || dqp == expect_dq); 231 #else /* CHASE_QUOTA */ 232 if (expect_dq == NULL) { 233 /* 234 * If you hit this ASSERT() you know that quota 235 * subsystem does not expect quota info for this 236 * inode, but the inode has it. 237 */ 238 ASSERT(mismatch_ok || dqp == NULL); 239 } else { 240 /* 241 * If you hit this ASSERT() you know that quota 242 * subsystem expects quota info for this inode, 243 * but the inode does not have it. 244 */ 245 ASSERT(dqp); 246 /* 247 * If you hit this ASSERT() you know that quota 248 * subsystem expects quota info for this inode 249 * and the inode has quota info, but the two 250 * quota info pointers are not the same. 251 */ 252 ASSERT(dqp == expect_dq); 253 } 254 #endif /* !CHASE_QUOTA */ 255 /* 256 * Release for getinoquota() above or getdiskquota() 257 * call when error is transient. 258 */ 259 if (expect_dq) { 260 mutex_enter(&expect_dq->dq_lock); 261 dqput(expect_dq); 262 mutex_exit(&expect_dq->dq_lock); 263 } 264 } 265 #endif /* DEBUG */ 266 267 /* 268 * Shadow inodes and extended attribute directories 269 * do not have quota info records. 270 */ 271 if (dqp == NULL) 272 return (0); 273 /* 274 * Quotas are not enabled on this file system so there is nothing 275 * more to do. 276 */ 277 if ((ufsvfsp->vfs_qflags & MQ_ENABLED) == 0) { 278 return (0); 279 } 280 mutex_enter(&dqp->dq_lock); 281 if (change < 0) { 282 dqp->dq_flags |= DQ_MOD; 283 abs_change = -change; /* abs_change must be positive */ 284 if (dqp->dq_curblocks < abs_change) 285 dqp->dq_curblocks = 0; 286 else 287 dqp->dq_curblocks += change; 288 if (dqp->dq_curblocks < dqp->dq_bsoftlimit) 289 dqp->dq_btimelimit = 0; 290 dqp->dq_flags &= ~DQ_BLKS; 291 TRANS_QUOTA(dqp); 292 mutex_exit(&dqp->dq_lock); 293 return (0); 294 } 295 296 /* 297 * Adding 'change' to dq_curblocks could cause an overflow. 298 * So store the result in a 64-bit variable and check for 299 * overflow below. 300 */ 301 ncurblocks = (uint64_t)dqp->dq_curblocks + change; 302 303 /* 304 * Allocation. Check hard and soft limits. 305 * Skip checks for uid 0 owned files. 306 * This check used to require both euid and ip->i_uid 307 * to be 0; but there are no quotas for uid 0 so 308 * it really doesn't matter who is writing to the 309 * root owned file. And even root cannot write 310 * past a user's quota limit. 311 */ 312 if (ip->i_uid == 0) 313 goto out; 314 315 /* 316 * Disallow allocation if it would bring the current usage over 317 * the hard limit or if the user is over their soft limit and their 318 * time has run out. 319 */ 320 if (dqp->dq_bhardlimit && ncurblocks >= (uint64_t)dqp->dq_bhardlimit && 321 !force) { 322 /* If the user was not informed yet and the caller */ 323 /* is the owner of the file */ 324 if ((dqp->dq_flags & DQ_BLKS) == 0 && 325 ip->i_uid == crgetruid(cr)) { 326 errmsg = msg1; 327 dqp->dq_flags |= DQ_BLKS; 328 } 329 error = EDQUOT; 330 goto out; 331 } 332 if (dqp->dq_bsoftlimit && ncurblocks >= (uint64_t)dqp->dq_bsoftlimit) { 333 now = gethrestime_sec(); 334 if (dqp->dq_curblocks < dqp->dq_bsoftlimit || 335 dqp->dq_btimelimit == 0) { 336 dqp->dq_flags |= DQ_MOD; 337 dqp->dq_btimelimit = now + 338 ((struct ufsvfs *)ITOV(ip)->v_vfsp->vfs_data) 339 ->vfs_btimelimit; 340 if (ip->i_uid == crgetruid(cr)) { 341 errmsg = msg2; 342 } 343 } else if (now > dqp->dq_btimelimit && !force) { 344 /* If the user was not informed yet and the */ 345 /* caller is the owner of the file */ 346 if ((dqp->dq_flags & DQ_BLKS) == 0 && 347 ip->i_uid == crgetruid(cr)) { 348 errmsg = msg3; 349 dqp->dq_flags |= DQ_BLKS; 350 } 351 error = EDQUOT; 352 } 353 } 354 out: 355 if (error == 0) { 356 dqp->dq_flags |= DQ_MOD; 357 /* 358 * ncurblocks can be bigger than the maximum 359 * number that can be represented in 32-bits. 360 * When copying ncurblocks to dq_curblocks 361 * (an unsigned 32-bit quantity), make sure there 362 * is no overflow. The only way this can happen 363 * is if "force" is set. Otherwise, this allocation 364 * would have exceeded the hard limit check above 365 * (since the hard limit is a 32-bit quantity). 366 */ 367 if (ncurblocks > 0xffffffffLL) { 368 dqp->dq_curblocks = 0xffffffff; 369 errmsg = msg4; 370 } else { 371 dqp->dq_curblocks = ncurblocks; 372 } 373 } 374 375 if (dqp->dq_flags & DQ_MOD) 376 TRANS_QUOTA(dqp); 377 378 mutex_exit(&dqp->dq_lock); 379 /* 380 * Check for any error messages to be sent 381 */ 382 if (errmsg != NULL) { 383 /* 384 * Send message to the error log. 385 */ 386 if (uerrp != NULL) { 387 /* 388 * Set up message caller should send to user; 389 * gets copied to the message buffer as a side- 390 * effect of the caller's uprintf(). 391 */ 392 *lenp = strlen(errmsg) + 20 + 20 + 393 strlen(ip->i_fs->fs_fsmnt) + 1; 394 *uerrp = (char *)kmem_alloc(*lenp, KM_NOSLEEP); 395 if (*uerrp != NULL) { 396 /* errmsg+1 => skip leading ! */ 397 (void) sprintf(*uerrp, errmsg+1, 398 (int)ttoproc(curthread)->p_pid, 399 (int)ip->i_uid, (int)ip->i_number, 400 ip->i_fs->fs_fsmnt); 401 } 402 } else { 403 /* 404 * Caller doesn't care, so just copy to the 405 * message buffer. 406 */ 407 cmn_err(CE_NOTE, errmsg, 408 (int)ttoproc(curthread)->p_pid, 409 (int)ip->i_uid, (int)ip->i_number, 410 ip->i_fs->fs_fsmnt); 411 } 412 } 413 return (error); 414 } 415 416 /* 417 * Check the inode limit, applying corrective action. 418 */ 419 int 420 chkiq(struct ufsvfs *ufsvfsp, int change, struct inode *ip, uid_t uid, 421 int force, struct cred *cr, char **uerrp, size_t *lenp) 422 { 423 struct dquot *dqp, *xdqp; 424 unsigned int ncurfiles; 425 char *errmsg = NULL; 426 char *err1 = 427 "!quota_ufs: over file hard limit (pid %d, uid %d, fs %s)\n"; 428 char *err2 = 429 "!quota_ufs: Warning: too many files (pid %d, uid %d, fs %s)\n"; 430 char *err3 = 431 "!quota_ufs: over file and time limit (pid %d, uid %d, fs %s)\n"; 432 int error = 0; 433 time_t now; 434 435 ASSERT(RW_READ_HELD(&ufsvfsp->vfs_dqrwlock)); 436 /* 437 * Change must be either a single increment or decrement. 438 * If change is an increment, then ip must be NULL. 439 */ 440 ASSERT(change == 1 || change == -1); 441 ASSERT(change != 1 || ip == NULL); 442 443 /* 444 * Quotas are not enabled so bail out now. 445 */ 446 if ((ufsvfsp->vfs_qflags & MQ_ENABLED) == 0) { 447 return (0); 448 } 449 450 /* 451 * Free a specific inode. 452 */ 453 if (change == -1 && ip) { 454 dqp = ip->i_dquot; 455 /* 456 * Shadow inodes and extended attribute directories 457 * do not have quota info records. 458 */ 459 if (dqp == NULL) 460 return (0); 461 mutex_enter(&dqp->dq_lock); 462 if (dqp->dq_curfiles) { 463 dqp->dq_curfiles--; 464 dqp->dq_flags |= DQ_MOD; 465 } 466 if (dqp->dq_curfiles < dqp->dq_fsoftlimit) { 467 dqp->dq_ftimelimit = 0; 468 dqp->dq_flags |= DQ_MOD; 469 } 470 dqp->dq_flags &= ~DQ_FILES; 471 if (dqp->dq_flags & DQ_MOD) 472 TRANS_QUOTA(dqp); 473 mutex_exit(&dqp->dq_lock); 474 return (0); 475 } 476 477 /* 478 * Allocation or deallocation without a specific inode. 479 * Get dquot for for uid, fs. 480 */ 481 if (getdiskquota(uid, ufsvfsp, 0, &xdqp)) { 482 return (0); 483 } 484 dqp = xdqp; 485 mutex_enter(&dqp->dq_lock); 486 if (dqp->dq_fsoftlimit == 0 && dqp->dq_fhardlimit == 0) { 487 dqput(dqp); 488 mutex_exit(&dqp->dq_lock); 489 return (0); 490 } 491 492 /* 493 * Skip checks for uid 0 owned files. 494 * This check used to require both euid and uid 495 * to be 0; but there are no quotas for uid 0 so 496 * it really doesn't matter who is writing to the 497 * root owned file. And even root can not write 498 * past the user's quota limit. 499 */ 500 if (uid == 0) 501 goto out; 502 503 /* 504 * Theoretically, this could overflow, but in practice, it 505 * won't. Multi-terabyte file systems are required to have an 506 * nbpi value of at least 1MB. In order to overflow this 507 * field, there would have to be 2^32 inodes in the file. 508 * That would imply a file system of 2^32 * 1MB, which is 509 * 2^(32 + 20), which is 4096 terabytes, which is not 510 * contemplated for ufs any time soon. 511 */ 512 ncurfiles = dqp->dq_curfiles + change; 513 514 /* 515 * Dissallow allocation if it would bring the current usage over 516 * the hard limit or if the user is over their soft limit and their 517 * time has run out. 518 */ 519 if (change == 1 && ncurfiles >= dqp->dq_fhardlimit && 520 dqp->dq_fhardlimit && !force) { 521 /* If the user was not informed yet and the caller */ 522 /* is the owner of the file */ 523 if ((dqp->dq_flags & DQ_FILES) == 0 && uid == crgetruid(cr)) { 524 errmsg = err1; 525 dqp->dq_flags |= DQ_FILES; 526 } 527 error = EDQUOT; 528 } else if (change == 1 && ncurfiles >= dqp->dq_fsoftlimit && 529 dqp->dq_fsoftlimit) { 530 now = gethrestime_sec(); 531 if (ncurfiles == dqp->dq_fsoftlimit || 532 dqp->dq_ftimelimit == 0) { 533 dqp->dq_flags |= DQ_MOD; 534 dqp->dq_ftimelimit = now + ufsvfsp->vfs_ftimelimit; 535 /* If the caller owns the file */ 536 if (uid == crgetruid(cr)) 537 errmsg = err2; 538 } else if (now > dqp->dq_ftimelimit && !force) { 539 /* If the user was not informed yet and the */ 540 /* caller is the owner of the file */ 541 if ((dqp->dq_flags & DQ_FILES) == 0 && 542 uid == crgetruid(cr)) { 543 errmsg = err3; 544 dqp->dq_flags |= DQ_FILES; 545 } 546 error = EDQUOT; 547 } 548 } 549 out: 550 if (error == 0) { 551 dqp->dq_flags |= DQ_MOD; 552 dqp->dq_curfiles += change; 553 } 554 if (dqp->dq_flags & DQ_MOD) 555 TRANS_QUOTA(dqp); 556 dqput(dqp); 557 mutex_exit(&dqp->dq_lock); 558 /* 559 * Check for any error messages to be sent 560 */ 561 if (errmsg != NULL) { 562 /* 563 * Send message to the error log. 564 */ 565 if (uerrp != NULL) { 566 /* 567 * Set up message caller should send to user; 568 * gets copied to the message buffer as a side- 569 * effect of the caller's uprintf(). 570 */ 571 *lenp = strlen(errmsg) + 20 + 20 + 572 strlen(ufsvfsp->vfs_fs->fs_fsmnt) + 1; 573 *uerrp = (char *)kmem_alloc(*lenp, KM_NOSLEEP); 574 if (*uerrp != NULL) { 575 /* errmsg+1 => skip leading ! */ 576 (void) sprintf(*uerrp, errmsg+1, 577 (int)ttoproc(curthread)->p_pid, 578 (int)uid, ufsvfsp->vfs_fs->fs_fsmnt); 579 } 580 } else { 581 /* 582 * Caller doesn't care, so just copy to the 583 * message buffer. 584 */ 585 cmn_err(CE_NOTE, errmsg, 586 (int)ttoproc(curthread)->p_pid, 587 (int)uid, ufsvfsp->vfs_fs->fs_fsmnt); 588 } 589 } 590 return (error); 591 } 592 593 /* 594 * Release a dquot. 595 */ 596 void 597 dqrele(struct dquot *dqp) 598 { 599 /* 600 * Shadow inodes and extended attribute directories 601 * do not have quota info records. 602 */ 603 if (dqp != NULL) { 604 mutex_enter(&dqp->dq_lock); 605 if (dqp->dq_cnt == 1 && dqp->dq_flags & DQ_MOD) 606 dqupdate(dqp); 607 dqput(dqp); 608 mutex_exit(&dqp->dq_lock); 609 } 610 } 611