xref: /illumos-gate/usr/src/uts/common/fs/ufs/quota_ufs.c (revision 32991bed)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  * Copyright (c) 2016 by Delphix. All rights reserved.
25  */
26 
27 /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 /*
31  * University Copyright- Copyright (c) 1982, 1986, 1988
32  * The Regents of the University of California
33  * All Rights Reserved
34  *
35  * University Acknowledgment- Portions of this document are derived from
36  * software developed by the University of California, Berkeley, and its
37  * contributors.
38  */
39 
40 
41 #pragma ident	"%Z%%M%	%I%	%E% SMI"
42 
43 /*
44  * Routines used in checking limits on file system usage.
45  */
46 
47 #include <sys/types.h>
48 #include <sys/t_lock.h>
49 #include <sys/param.h>
50 #include <sys/time.h>
51 #include <sys/systm.h>
52 #include <sys/kmem.h>
53 #include <sys/signal.h>
54 #include <sys/cred.h>
55 #include <sys/proc.h>
56 #include <sys/user.h>
57 #include <sys/proc.h>
58 #include <sys/vfs.h>
59 #include <sys/vnode.h>
60 #include <sys/buf.h>
61 #include <sys/uio.h>
62 #include <sys/fs/ufs_inode.h>
63 #include <sys/fs/ufs_fs.h>
64 #include <sys/fs/ufs_quota.h>
65 #include <sys/errno.h>
66 #include <sys/cmn_err.h>
67 #include <sys/session.h>
68 #include <sys/debug.h>
69 
70 /*
71  * Find the dquot structure that should
72  * be used in checking i/o on inode ip.
73  */
74 struct dquot *
75 getinoquota(struct inode *ip)
76 {
77 	struct dquot *dqp, *xdqp;
78 	struct ufsvfs *ufsvfsp = ip->i_ufsvfs;
79 
80 	ASSERT(RW_LOCK_HELD(&ufsvfsp->vfs_dqrwlock));
81 	ASSERT(RW_WRITE_HELD(&ip->i_contents));
82 	/*
83 	 * Check for quotas enabled.
84 	 */
85 	if ((ufsvfsp->vfs_qflags & MQ_ENABLED) == 0) {
86 		return (NULL);
87 	}
88 
89 	/*
90 	 * Check for someone doing I/O to quota file.
91 	 */
92 	if (ip == ufsvfsp->vfs_qinod) {
93 		return (NULL);
94 	}
95 
96 	/*
97 	 * Check for a legal inode, e.g. not a shadow inode,
98 	 * not a extended attribute directory inode and a valid mode.
99 	 */
100 	ASSERT((ip->i_mode & IFMT) != IFSHAD);
101 	ASSERT((ip->i_mode & IFMT) != IFATTRDIR);
102 	ASSERT(ip->i_mode);
103 
104 	if (getdiskquota((uid_t)ip->i_uid, ufsvfsp, 0, &xdqp)) {
105 		return (NULL);
106 	}
107 	dqp = xdqp;
108 	mutex_enter(&dqp->dq_lock);
109 	ASSERT(ip->i_uid == dqp->dq_uid);
110 
111 	if (dqp->dq_fhardlimit == 0 && dqp->dq_fsoftlimit == 0 &&
112 	    dqp->dq_bhardlimit == 0 && dqp->dq_bsoftlimit == 0) {
113 		dqput(dqp);
114 		mutex_exit(&dqp->dq_lock);
115 		dqp = NULL;
116 	} else {
117 		mutex_exit(&dqp->dq_lock);
118 	}
119 	return (dqp);
120 }
121 
122 /*
123  * Update disk usage, and take corrective action.
124  */
125 int
126 chkdq(struct inode *ip, long change, int force, struct cred *cr,
127 	char **uerrp, size_t *lenp)
128 {
129 	struct dquot *dqp;
130 	uint64_t ncurblocks;
131 	struct ufsvfs *ufsvfsp = ip->i_ufsvfs;
132 	int error = 0;
133 	long abs_change;
134 	char *msg1 =
135 "!quota_ufs: over hard disk limit (pid %d, uid %d, inum %d, fs %s)\n";
136 	char *msg2 =
137 "!quota_ufs: Warning: over disk limit (pid %d, uid %d, inum %d, fs %s)\n";
138 	char *msg3 =
139 "!quota_ufs: over disk and time limit (pid %d, uid %d, inum %d, fs %s)\n";
140 	char *msg4 =
141 "!quota_ufs: Warning: quota overflow (pid %d, uid %d, inum %d, fs %s)\n";
142 	char *errmsg = NULL;
143 	time_t now;
144 
145 	/*
146 	 * Shadow inodes do not need to hold the vfs_dqrwlock lock.
147 	 */
148 	ASSERT((ip->i_mode & IFMT) == IFSHAD ||
149 	    RW_LOCK_HELD(&ufsvfsp->vfs_dqrwlock));
150 	ASSERT(RW_WRITE_HELD(&ip->i_contents));
151 
152 	if (change == 0)
153 		return (0);
154 	dqp = ip->i_dquot;
155 
156 	/*
157 	 * Make sure the quota info record matches the owner.
158 	 */
159 	ASSERT(dqp == NULL || ip->i_uid == dqp->dq_uid);
160 
161 #ifdef DEBUG
162 	/*
163 	 * Shadow inodes and extended attribute directories
164 	 * should not have quota info records.
165 	 */
166 	if ((ip->i_mode & IFMT) == IFSHAD || (ip->i_mode & IFMT) == IFATTRDIR) {
167 		ASSERT(dqp == NULL);
168 	}
169 	/*
170 	 * Paranoia for verifying that quotas are okay.
171 	 */
172 	else {
173 		struct dquot *expect_dq;
174 		int mismatch_ok = 0;
175 
176 		/* Get current quota information */
177 		expect_dq = getinoquota(ip);
178 		/*
179 		 * We got NULL back from getinoquota(), but there is
180 		 * no error code return from that interface and some
181 		 * errors are "ok" because we may be testing via error
182 		 * injection.  If this is not the quota inode then we
183 		 * use getdiskquota() to see if there is an error and
184 		 * if the error is ok.
185 		 */
186 		if (expect_dq == NULL && ip != ufsvfsp->vfs_qinod) {
187 			int error;
188 			struct dquot *xdqp;
189 
190 			error = getdiskquota((uid_t)ip->i_uid, ufsvfsp, 0,
191 			    &xdqp);
192 			switch (error) {
193 			/*
194 			 * Either the error was transient or the quota
195 			 * info record has no limits which gets optimized
196 			 * out by getinoquota().
197 			 */
198 			case 0:
199 				if (xdqp->dq_fhardlimit == 0 &&
200 				    xdqp->dq_fsoftlimit == 0 &&
201 				    xdqp->dq_bhardlimit == 0 &&
202 				    xdqp->dq_bsoftlimit == 0) {
203 					mutex_enter(&xdqp->dq_lock);
204 					dqput(xdqp);
205 					mutex_exit(&xdqp->dq_lock);
206 				} else {
207 					expect_dq = xdqp;
208 				}
209 				break;
210 
211 			case ESRCH:	/* quotas are not enabled */
212 			case EINVAL:	/* error flag set on cached record */
213 			case EUSERS:	/* quota table is full */
214 			case EIO:	/* I/O error */
215 				mismatch_ok = 1;
216 				break;
217 			}
218 		}
219 
220 		/*
221 		 * Make sure dqp and the current quota info agree.
222 		 * The first part of the #ifndef is the quick way to
223 		 * do the check and should be part of the standard
224 		 * DEBUG code. The #else part is useful if you are
225 		 * actually chasing an inconsistency and don't want
226 		 * to have to look at stack frames to figure which
227 		 * variable has what value.
228 		 */
229 #ifndef CHASE_QUOTA
230 		ASSERT(mismatch_ok || dqp == expect_dq);
231 #else /* CHASE_QUOTA */
232 		if (expect_dq == NULL) {
233 			/*
234 			 * If you hit this ASSERT() you know that quota
235 			 * subsystem does not expect quota info for this
236 			 * inode, but the inode has it.
237 			 */
238 			ASSERT(mismatch_ok || dqp == NULL);
239 		} else {
240 			/*
241 			 * If you hit this ASSERT() you know that quota
242 			 * subsystem expects quota info for this inode,
243 			 * but the inode does not have it.
244 			 */
245 			ASSERT(dqp);
246 			/*
247 			 * If you hit this ASSERT() you know that quota
248 			 * subsystem expects quota info for this inode
249 			 * and the inode has quota info, but the two
250 			 * quota info pointers are not the same.
251 			 */
252 			ASSERT(dqp == expect_dq);
253 		}
254 #endif /* !CHASE_QUOTA */
255 		/*
256 		 * Release for getinoquota() above or getdiskquota()
257 		 * call when error is transient.
258 		 */
259 		if (expect_dq) {
260 			mutex_enter(&expect_dq->dq_lock);
261 			dqput(expect_dq);
262 			mutex_exit(&expect_dq->dq_lock);
263 		}
264 	}
265 #endif /* DEBUG */
266 
267 	/*
268 	 * Shadow inodes and extended attribute directories
269 	 * do not have quota info records.
270 	 */
271 	if (dqp == NULL)
272 		return (0);
273 	/*
274 	 * Quotas are not enabled on this file system so there is nothing
275 	 * more to do.
276 	 */
277 	if ((ufsvfsp->vfs_qflags & MQ_ENABLED) == 0) {
278 		return (0);
279 	}
280 	mutex_enter(&dqp->dq_lock);
281 	if (change < 0) {
282 		dqp->dq_flags |= DQ_MOD;
283 		abs_change = -change;	/* abs_change must be positive */
284 		if (dqp->dq_curblocks < abs_change)
285 			dqp->dq_curblocks = 0;
286 		else
287 			dqp->dq_curblocks += change;
288 		if (dqp->dq_curblocks < dqp->dq_bsoftlimit)
289 			dqp->dq_btimelimit = 0;
290 		dqp->dq_flags &= ~DQ_BLKS;
291 		TRANS_QUOTA(dqp);
292 		mutex_exit(&dqp->dq_lock);
293 		return (0);
294 	}
295 
296 	/*
297 	 * Adding 'change' to dq_curblocks could cause an overflow.
298 	 * So store the result in a 64-bit variable and check for
299 	 * overflow below.
300 	 */
301 	ncurblocks = (uint64_t)dqp->dq_curblocks + change;
302 
303 	/*
304 	 * Allocation. Check hard and soft limits.
305 	 * Skip checks for uid 0 owned files.
306 	 * This check used to require both euid and ip->i_uid
307 	 * to be 0; but there are no quotas for uid 0 so
308 	 * it really doesn't matter who is writing to the
309 	 * root owned file.  And even root cannot write
310 	 * past a user's quota limit.
311 	 */
312 	if (ip->i_uid == 0)
313 		goto out;
314 
315 	/*
316 	 * Disallow allocation if it would bring the current usage over
317 	 * the hard limit or if the user is over their soft limit and their
318 	 * time has run out.
319 	 */
320 	if (dqp->dq_bhardlimit && ncurblocks >= (uint64_t)dqp->dq_bhardlimit &&
321 	    !force) {
322 		/* If the user was not informed yet and the caller	*/
323 		/* is the owner of the file				*/
324 		if ((dqp->dq_flags & DQ_BLKS) == 0 &&
325 		    ip->i_uid == crgetruid(cr)) {
326 			errmsg = msg1;
327 			dqp->dq_flags |= DQ_BLKS;
328 		}
329 		error = EDQUOT;
330 		goto out;
331 	}
332 	if (dqp->dq_bsoftlimit && ncurblocks >= (uint64_t)dqp->dq_bsoftlimit) {
333 		now = gethrestime_sec();
334 		if (dqp->dq_curblocks < dqp->dq_bsoftlimit ||
335 		    dqp->dq_btimelimit == 0) {
336 			dqp->dq_flags |= DQ_MOD;
337 			dqp->dq_btimelimit = now +
338 			    ((struct ufsvfs *)ITOV(ip)->v_vfsp->vfs_data)
339 			    ->vfs_btimelimit;
340 			if (ip->i_uid == crgetruid(cr)) {
341 				errmsg = msg2;
342 			}
343 		} else if (now > dqp->dq_btimelimit && !force) {
344 			/* If the user was not informed yet and the	*/
345 			/* caller is the owner of the file		*/
346 			if ((dqp->dq_flags & DQ_BLKS) == 0 &&
347 			    ip->i_uid == crgetruid(cr)) {
348 				errmsg = msg3;
349 				dqp->dq_flags |= DQ_BLKS;
350 			}
351 			error = EDQUOT;
352 		}
353 	}
354 out:
355 	if (error == 0) {
356 		dqp->dq_flags |= DQ_MOD;
357 		/*
358 		 * ncurblocks can be bigger than the maximum
359 		 * number that can be represented in 32-bits.
360 		 * When copying ncurblocks to dq_curblocks
361 		 * (an unsigned 32-bit quantity), make sure there
362 		 * is no overflow.  The only way this can happen
363 		 * is if "force" is set.  Otherwise, this allocation
364 		 * would have exceeded the hard limit check above
365 		 * (since the hard limit is a 32-bit quantity).
366 		 */
367 		if (ncurblocks > 0xffffffffLL) {
368 			dqp->dq_curblocks = 0xffffffff;
369 			errmsg = msg4;
370 		} else {
371 			dqp->dq_curblocks = ncurblocks;
372 		}
373 	}
374 
375 	if (dqp->dq_flags & DQ_MOD)
376 		TRANS_QUOTA(dqp);
377 
378 	mutex_exit(&dqp->dq_lock);
379 	/*
380 	 * Check for any error messages to be sent
381 	 */
382 	if (errmsg != NULL) {
383 		/*
384 		 * Send message to the error log.
385 		 */
386 		if (uerrp != NULL) {
387 			/*
388 			 * Set up message caller should send to user;
389 			 * gets copied to the message buffer as a side-
390 			 * effect of the caller's uprintf().
391 			 */
392 			*lenp = strlen(errmsg) + 20 + 20 +
393 			    strlen(ip->i_fs->fs_fsmnt) + 1;
394 			*uerrp = (char *)kmem_alloc(*lenp, KM_NOSLEEP);
395 			if (*uerrp != NULL) {
396 				/* errmsg+1 => skip leading ! */
397 				(void) sprintf(*uerrp, errmsg+1,
398 				    (int)ttoproc(curthread)->p_pid,
399 				    (int)ip->i_uid, (int)ip->i_number,
400 				    ip->i_fs->fs_fsmnt);
401 			}
402 		} else {
403 			/*
404 			 * Caller doesn't care, so just copy to the
405 			 * message buffer.
406 			 */
407 			cmn_err(CE_NOTE, errmsg,
408 			    (int)ttoproc(curthread)->p_pid,
409 			    (int)ip->i_uid, (int)ip->i_number,
410 			    ip->i_fs->fs_fsmnt);
411 		}
412 	}
413 	return (error);
414 }
415 
416 /*
417  * Check the inode limit, applying corrective action.
418  */
419 int
420 chkiq(struct ufsvfs *ufsvfsp, int change, struct inode *ip, uid_t uid,
421 	int force, struct cred *cr, char **uerrp, size_t *lenp)
422 {
423 	struct dquot *dqp, *xdqp;
424 	unsigned int ncurfiles;
425 	char *errmsg = NULL;
426 	char *err1 =
427 "!quota_ufs: over file hard limit (pid %d, uid %d, fs %s)\n";
428 	char *err2 =
429 "!quota_ufs: Warning: too many files (pid %d, uid %d, fs %s)\n";
430 	char *err3 =
431 "!quota_ufs: over file and time limit (pid %d, uid %d, fs %s)\n";
432 	int error = 0;
433 	time_t now;
434 
435 	ASSERT(RW_READ_HELD(&ufsvfsp->vfs_dqrwlock));
436 	/*
437 	 * Change must be either a single increment or decrement.
438 	 * If change is an increment, then ip must be NULL.
439 	 */
440 	ASSERT(change == 1 || change == -1);
441 	ASSERT(change != 1 || ip == NULL);
442 
443 	/*
444 	 * Quotas are not enabled so bail out now.
445 	 */
446 	if ((ufsvfsp->vfs_qflags & MQ_ENABLED) == 0) {
447 		return (0);
448 	}
449 
450 	/*
451 	 * Free a specific inode.
452 	 */
453 	if (change == -1 && ip) {
454 		dqp = ip->i_dquot;
455 		/*
456 		 * Shadow inodes and extended attribute directories
457 		 * do not have quota info records.
458 		 */
459 		if (dqp == NULL)
460 			return (0);
461 		mutex_enter(&dqp->dq_lock);
462 		if (dqp->dq_curfiles) {
463 			dqp->dq_curfiles--;
464 			dqp->dq_flags |= DQ_MOD;
465 		}
466 		if (dqp->dq_curfiles < dqp->dq_fsoftlimit) {
467 			dqp->dq_ftimelimit = 0;
468 			dqp->dq_flags |= DQ_MOD;
469 		}
470 		dqp->dq_flags &= ~DQ_FILES;
471 		if (dqp->dq_flags & DQ_MOD)
472 			TRANS_QUOTA(dqp);
473 		mutex_exit(&dqp->dq_lock);
474 		return (0);
475 	}
476 
477 	/*
478 	 * Allocation or deallocation without a specific inode.
479 	 * Get dquot for for uid, fs.
480 	 */
481 	if (getdiskquota(uid, ufsvfsp, 0, &xdqp)) {
482 		return (0);
483 	}
484 	dqp = xdqp;
485 	mutex_enter(&dqp->dq_lock);
486 	if (dqp->dq_fsoftlimit == 0 && dqp->dq_fhardlimit == 0) {
487 		dqput(dqp);
488 		mutex_exit(&dqp->dq_lock);
489 		return (0);
490 	}
491 
492 	/*
493 	 * Skip checks for uid 0 owned files.
494 	 * This check used to require both euid and uid
495 	 * to be 0; but there are no quotas for uid 0 so
496 	 * it really doesn't matter who is writing to the
497 	 * root owned file.  And even root can not write
498 	 * past the user's quota limit.
499 	 */
500 	if (uid == 0)
501 		goto out;
502 
503 	/*
504 	 * Theoretically, this could overflow, but in practice, it
505 	 * won't.  Multi-terabyte file systems are required to have an
506 	 * nbpi value of at least 1MB.  In order to overflow this
507 	 * field, there would have to be 2^32 inodes in the file.
508 	 * That would imply a file system of 2^32 * 1MB, which is
509 	 * 2^(32 + 20), which is 4096 terabytes, which is not
510 	 * contemplated for ufs any time soon.
511 	 */
512 	ncurfiles = dqp->dq_curfiles + change;
513 
514 	/*
515 	 * Dissallow allocation if it would bring the current usage over
516 	 * the hard limit or if the user is over their soft limit and their
517 	 * time has run out.
518 	 */
519 	if (change == 1 && ncurfiles >= dqp->dq_fhardlimit &&
520 	    dqp->dq_fhardlimit && !force) {
521 		/* If the user was not informed yet and the caller	*/
522 		/* is the owner of the file 				*/
523 		if ((dqp->dq_flags & DQ_FILES) == 0 && uid == crgetruid(cr)) {
524 			errmsg = err1;
525 			dqp->dq_flags |= DQ_FILES;
526 		}
527 		error = EDQUOT;
528 	} else if (change == 1 && ncurfiles >= dqp->dq_fsoftlimit &&
529 	    dqp->dq_fsoftlimit) {
530 		now = gethrestime_sec();
531 		if (ncurfiles == dqp->dq_fsoftlimit ||
532 		    dqp->dq_ftimelimit == 0) {
533 			dqp->dq_flags |= DQ_MOD;
534 			dqp->dq_ftimelimit = now + ufsvfsp->vfs_ftimelimit;
535 			/* If the caller owns the file */
536 			if (uid == crgetruid(cr))
537 				errmsg = err2;
538 		} else if (now > dqp->dq_ftimelimit && !force) {
539 			/* If the user was not informed yet and the	*/
540 			/* caller is the owner of the file 		*/
541 			if ((dqp->dq_flags & DQ_FILES) == 0 &&
542 			    uid == crgetruid(cr)) {
543 				errmsg = err3;
544 				dqp->dq_flags |= DQ_FILES;
545 			}
546 			error = EDQUOT;
547 		}
548 	}
549 out:
550 	if (error == 0) {
551 		dqp->dq_flags |= DQ_MOD;
552 		dqp->dq_curfiles += change;
553 	}
554 	if (dqp->dq_flags & DQ_MOD)
555 		TRANS_QUOTA(dqp);
556 	dqput(dqp);
557 	mutex_exit(&dqp->dq_lock);
558 	/*
559 	 * Check for any error messages to be sent
560 	 */
561 	if (errmsg != NULL) {
562 		/*
563 		 * Send message to the error log.
564 		 */
565 		if (uerrp != NULL) {
566 			/*
567 			 * Set up message caller should send to user;
568 			 * gets copied to the message buffer as a side-
569 			 * effect of the caller's uprintf().
570 			 */
571 			*lenp = strlen(errmsg) + 20 + 20 +
572 			    strlen(ufsvfsp->vfs_fs->fs_fsmnt) + 1;
573 			*uerrp = (char *)kmem_alloc(*lenp, KM_NOSLEEP);
574 			if (*uerrp != NULL) {
575 				/* errmsg+1 => skip leading ! */
576 				(void) sprintf(*uerrp, errmsg+1,
577 				    (int)ttoproc(curthread)->p_pid,
578 				    (int)uid, ufsvfsp->vfs_fs->fs_fsmnt);
579 			}
580 		} else {
581 			/*
582 			 * Caller doesn't care, so just copy to the
583 			 * message buffer.
584 			 */
585 			cmn_err(CE_NOTE, errmsg,
586 			    (int)ttoproc(curthread)->p_pid,
587 			    (int)uid, ufsvfsp->vfs_fs->fs_fsmnt);
588 		}
589 	}
590 	return (error);
591 }
592 
593 /*
594  * Release a dquot.
595  */
596 void
597 dqrele(struct dquot *dqp)
598 {
599 	/*
600 	 * Shadow inodes and extended attribute directories
601 	 * do not have quota info records.
602 	 */
603 	if (dqp != NULL) {
604 		mutex_enter(&dqp->dq_lock);
605 		if (dqp->dq_cnt == 1 && dqp->dq_flags & DQ_MOD)
606 			dqupdate(dqp);
607 		dqput(dqp);
608 		mutex_exit(&dqp->dq_lock);
609 	}
610 }
611