1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/param.h>
30 #include <sys/t_lock.h>
31 #include <sys/systm.h>
32 #include <sys/time.h>
33 #include <sys/sysmacros.h>
34 #include <sys/proc.h>
35 #include <sys/disp.h>
36 #include <sys/user.h>
37 #include <sys/time.h>
38 #include <sys/vfs.h>
39 #include <sys/vnode.h>
40 #include <sys/stat.h>
41 #include <sys/mode.h>
42 #include <sys/errno.h>
43 #include <sys/kmem.h>
44 #include <vm/seg.h>
45 #include <vm/seg_map.h>
46 #include <vm/anon.h>
47 #include <vm/page.h>
48 #include <vm/pvn.h>
49 #include <sys/fs/tmp.h>
50 #include <sys/fs/tmpnode.h>
51 #include <sys/debug.h>
52 #include <sys/cmn_err.h>
53 #include <sys/swap.h>
54 #include <sys/vtrace.h>
55 
56 /*
57  * Reserve swap space for the size of the file.
58  * Called before growing a file (i.e. ftruncate, write)
59  * Returns 0 on success.
60  */
61 int
62 tmp_resv(
63 	struct tmount *tm,
64 	struct tmpnode *tp,
65 	size_t delta,		/* size needed */
66 	int pagecreate)		/* call anon_resv if set */
67 {
68 	pgcnt_t pages = btopr(delta);
69 	zone_t *zone;
70 
71 	ASSERT(RW_WRITE_HELD(&tp->tn_rwlock));
72 	ASSERT(tp->tn_type == VREG);
73 	/*
74 	 * pagecreate is set only if we actually need to call anon_resv
75 	 * to reserve an additional page of anonymous memory.
76 	 * Since anon_resv always reserves a page at a time,
77 	 * it should only get called when we know we're growing the
78 	 * file into a new page or filling a hole.
79 	 *
80 	 * Deny if trying to reserve more than tmpfs can allocate
81 	 */
82 	zone = tm->tm_vfsp->vfs_zone;
83 	if (pagecreate && ((tm->tm_anonmem + pages > tm->tm_anonmax) ||
84 	    (!anon_checkspace(ptob(pages + tmpfs_minfree), zone)) ||
85 	    (anon_resv_zone(delta, zone) == 0))) {
86 		return (1);
87 	}
88 
89 	/*
90 	 * update statistics
91 	 */
92 	if (pagecreate) {
93 		mutex_enter(&tm->tm_contents);
94 		tm->tm_anonmem += pages;
95 		mutex_exit(&tm->tm_contents);
96 
97 		TRACE_2(TR_FAC_VM, TR_ANON_TMPFS, "anon tmpfs:%p %lu",
98 		    tp, delta);
99 	}
100 
101 	return (0);
102 }
103 
104 /*
105  * tmp_unresv - called when truncating a file
106  * Only called if we're freeing at least pagesize bytes
107  * because anon_unresv does a btopr(delta)
108  */
109 static void
110 tmp_unresv(
111 	struct tmount *tm,
112 	struct tmpnode *tp,
113 	size_t delta)
114 {
115 	ASSERT(RW_WRITE_HELD(&tp->tn_rwlock));
116 	ASSERT(tp->tn_type == VREG);
117 
118 	anon_unresv_zone(delta, tm->tm_vfsp->vfs_zone);
119 
120 	mutex_enter(&tm->tm_contents);
121 	tm->tm_anonmem -= btopr(delta);
122 	mutex_exit(&tm->tm_contents);
123 
124 	TRACE_2(TR_FAC_VM, TR_ANON_TMPFS, "anon tmpfs:%p %lu", tp, delta);
125 }
126 
127 #define	TMP_INIT_SZ	128
128 
129 /*
130  * Grow the anon pointer array to cover 'newsize' bytes plus slack.
131  */
132 void
133 tmpnode_growmap(struct tmpnode *tp, ulong_t newsize)
134 {
135 	pgcnt_t np = btopr(newsize);
136 
137 	ASSERT(RW_WRITE_HELD(&tp->tn_rwlock));
138 	ASSERT(RW_WRITE_HELD(&tp->tn_contents));
139 	ASSERT(tp->tn_type == VREG);
140 
141 	if (tp->tn_asize >= np)
142 		return;
143 
144 	if (newsize > MAXOFF_T)
145 		np = btopr(MAXOFF_T);
146 
147 	if (tp->tn_anon == NULL) {
148 		tp->tn_anon = anon_create(MAX(np, TMP_INIT_SZ), ANON_SLEEP);
149 		tp->tn_asize = tp->tn_anon->size;
150 		return;
151 	}
152 
153 	tp->tn_asize = anon_grow(tp->tn_anon, NULL, tp->tn_asize,
154 	    np - tp->tn_asize, ANON_SLEEP);
155 	ASSERT(tp->tn_asize >= np);
156 }
157 
158 /*
159  * Initialize a tmpnode and add it to file list under mount point.
160  */
161 void
162 tmpnode_init(struct tmount *tm, struct tmpnode *t, vattr_t *vap, cred_t *cred)
163 {
164 	struct vnode *vp;
165 	timestruc_t now;
166 
167 	ASSERT(vap != NULL);
168 
169 	rw_init(&t->tn_rwlock, NULL, RW_DEFAULT, NULL);
170 	mutex_init(&t->tn_tlock, NULL, MUTEX_DEFAULT, NULL);
171 	t->tn_mode = MAKEIMODE(vap->va_type, vap->va_mode);
172 	t->tn_mask = 0;
173 	t->tn_type = vap->va_type;
174 	t->tn_nodeid = (ino64_t)(uint32_t)((uintptr_t)t >> 3);
175 	t->tn_nlink = 1;
176 	t->tn_size = 0;
177 
178 	if (cred == NULL) {
179 		t->tn_uid = vap->va_uid;
180 		t->tn_gid = vap->va_gid;
181 	} else {
182 		t->tn_uid = crgetuid(cred);
183 		t->tn_gid = crgetgid(cred);
184 	}
185 
186 	t->tn_fsid = tm->tm_dev;
187 	t->tn_rdev = vap->va_rdev;
188 	t->tn_blksize = PAGESIZE;
189 	t->tn_nblocks = 0;
190 	gethrestime(&now);
191 	t->tn_atime = now;
192 	t->tn_mtime = now;
193 	t->tn_ctime = now;
194 	t->tn_seq = 0;
195 	t->tn_dir = NULL;
196 
197 	t->tn_vnode = vn_alloc(KM_SLEEP);
198 	vp = TNTOV(t);
199 	vn_setops(vp, tmp_vnodeops);
200 	vp->v_vfsp = tm->tm_vfsp;
201 	vp->v_type = vap->va_type;
202 	vp->v_rdev = vap->va_rdev;
203 	vp->v_data = (caddr_t)t;
204 	mutex_enter(&tm->tm_contents);
205 	/*
206 	 * Increment the pseudo generation number for this tmpnode.
207 	 * Since tmpnodes are allocated and freed, there really is no
208 	 * particular generation number for a new tmpnode.  Just fake it
209 	 * by using a counter in each file system.
210 	 */
211 	t->tn_gen = tm->tm_gen++;
212 
213 	/*
214 	 * Add new tmpnode to end of linked list of tmpnodes for this tmpfs
215 	 * Root directory is handled specially in tmp_mount.
216 	 */
217 	if (tm->tm_rootnode != (struct tmpnode *)NULL) {
218 		t->tn_forw = NULL;
219 		t->tn_back = tm->tm_rootnode->tn_back;
220 		t->tn_back->tn_forw = tm->tm_rootnode->tn_back = t;
221 	}
222 	mutex_exit(&tm->tm_contents);
223 	vn_exists(vp);
224 }
225 
226 /*
227  * tmpnode_trunc - set length of tmpnode and deal with resources
228  */
229 int
230 tmpnode_trunc(
231 	struct tmount *tm,
232 	struct tmpnode *tp,
233 	ulong_t newsize)
234 {
235 	size_t oldsize = tp->tn_size;
236 	size_t delta;
237 	struct vnode *vp = TNTOV(tp);
238 	timestruc_t now;
239 	int error = 0;
240 
241 	ASSERT(RW_WRITE_HELD(&tp->tn_rwlock));
242 	ASSERT(RW_WRITE_HELD(&tp->tn_contents));
243 
244 	if (newsize == oldsize) {
245 		/* Required by POSIX */
246 		goto stamp_out;
247 	}
248 
249 	switch (tp->tn_type) {
250 	case VREG:
251 		/* Growing the file */
252 		if (newsize > oldsize) {
253 			delta = P2ROUNDUP(newsize, PAGESIZE) -
254 			    P2ROUNDUP(oldsize, PAGESIZE);
255 			/*
256 			 * Grow the size of the anon array to the new size
257 			 * Reserve the space for the growth here.
258 			 * We do it this way for now because this is how
259 			 * tmpfs used to do it, and this way the reserved
260 			 * space is alway equal to the file size.
261 			 * Alternatively, we could wait to reserve space 'til
262 			 * someone tries to store into one of the newly
263 			 * trunc'ed up pages. This would give us behavior
264 			 * identical to ufs; i.e., you could fail a
265 			 * fault on storing into a holey region of a file
266 			 * if there is no space in the filesystem to fill
267 			 * the hole at that time.
268 			 */
269 			/*
270 			 * tmp_resv calls anon_resv only if we're extending
271 			 * the file into a new page
272 			 */
273 			if (tmp_resv(tm, tp, delta,
274 			    (btopr(newsize) != btopr(oldsize)))) {
275 				error = ENOSPC;
276 				goto out;
277 			}
278 			tmpnode_growmap(tp, newsize);
279 			tp->tn_size = newsize;
280 			break;
281 		}
282 
283 		/* Free anon pages if shrinking file over page boundary. */
284 		if (btopr(newsize) != btopr(oldsize)) {
285 			pgcnt_t freed;
286 			delta = P2ROUNDUP(oldsize, PAGESIZE) -
287 			    P2ROUNDUP(newsize, PAGESIZE);
288 			freed = anon_pages(tp->tn_anon, btopr(newsize),
289 			    btopr(delta));
290 			tp->tn_nblocks -= freed;
291 			anon_free(tp->tn_anon, btopr(newsize), delta);
292 			tmp_unresv(tm, tp, delta);
293 		}
294 
295 		/*
296 		 * Update the file size now to reflect the pages we just
297 		 * blew away as we're about to drop the
298 		 * contents lock to zero the partial page (which could
299 		 * re-enter tmpfs via getpage and try to reacquire the lock)
300 		 * Once we drop the lock, faulters can fill in holes in
301 		 * the file and if we haven't updated the size they
302 		 * may fill in holes that are beyond EOF, which will then
303 		 * never get cleared.
304 		 */
305 		tp->tn_size = newsize;
306 
307 		/* Zero new size of file to page boundary. */
308 		if (anon_get_ptr(tp->tn_anon, btop(newsize)) != NULL) {
309 			size_t zlen;
310 
311 			zlen = PAGESIZE - ((ulong_t)newsize & PAGEOFFSET);
312 			rw_exit(&tp->tn_contents);
313 			pvn_vpzero(TNTOV(tp), (u_offset_t)newsize, zlen);
314 			rw_enter(&tp->tn_contents, RW_WRITER);
315 		}
316 
317 		if (newsize == 0) {
318 			/* Delete anon array for tmpnode */
319 			ASSERT(tp->tn_nblocks == 0);
320 			ASSERT(anon_get_ptr(tp->tn_anon, 0) == NULL);
321 			ASSERT(!vn_has_cached_data(vp));
322 
323 			anon_release(tp->tn_anon, tp->tn_asize);
324 			tp->tn_anon = NULL;
325 			tp->tn_asize = 0;
326 		}
327 		break;
328 	case VLNK:
329 		/*
330 		 * Don't do anything here
331 		 * tmp_inactive frees the memory
332 		 */
333 		if (newsize != 0)
334 			error = EINVAL;
335 		goto out;
336 	case VDIR:
337 		/*
338 		 * Remove all the directory entries under this directory.
339 		 */
340 		if (newsize != 0) {
341 			error = EINVAL;
342 			goto out;
343 		}
344 		tdirtrunc(tp);
345 		ASSERT(tp->tn_nlink == 0);
346 		break;
347 	default:
348 		goto out;
349 	}
350 
351 stamp_out:
352 	gethrestime(&now);
353 	tp->tn_mtime = now;
354 	tp->tn_ctime = now;
355 out:
356 	/*
357 	 * tmpnode_trunc() cannot fail when newsize == 0.
358 	 */
359 	ASSERT(error == 0 || newsize != 0);
360 	return (error);
361 }
362