1 /*-
2 * modified for EXT2FS support in Lites 1.1
3 *
4 * Aug 1995, Godmar Back (gback@cs.utah.edu)
5 * University of Utah, Department of Computer Science
6 */
7 /*-
8 * SPDX-License-Identifier: BSD-3-Clause
9 *
10 * Copyright (c) 1982, 1986, 1989, 1993
11 * The Regents of the University of California. All rights reserved.
12 * (c) UNIX System Laboratories, Inc.
13 * All or some portions of this file are derived from material licensed
14 * to the University of California by American Telephone and Telegraph
15 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
16 * the permission of UNIX System Laboratories, Inc.
17 *
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions
20 * are met:
21 * 1. Redistributions of source code must retain the above copyright
22 * notice, this list of conditions and the following disclaimer.
23 * 2. Redistributions in binary form must reproduce the above copyright
24 * notice, this list of conditions and the following disclaimer in the
25 * documentation and/or other materials provided with the distribution.
26 * 3. Neither the name of the University nor the names of its contributors
27 * may be used to endorse or promote products derived from this software
28 * without specific prior written permission.
29 *
30 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
31 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
34 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
35 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
36 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
37 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
38 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
39 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40 * SUCH DAMAGE.
41 *
42 * @(#)ufs_vnops.c 8.7 (Berkeley) 2/3/94
43 * @(#)ufs_vnops.c 8.27 (Berkeley) 5/27/95
44 * $FreeBSD$
45 */
46
47 #include "opt_suiddir.h"
48
49 #include <sys/param.h>
50 #include <sys/systm.h>
51 #include <sys/kernel.h>
52 #include <sys/fcntl.h>
53 #include <sys/filio.h>
54 #include <sys/limits.h>
55 #include <sys/stat.h>
56 #include <sys/bio.h>
57 #include <sys/buf2.h>
58 #include <sys/endian.h>
59 #include <sys/caps.h>
60 #include <sys/mount.h>
61 #include <sys/unistd.h>
62 #include <sys/time.h>
63 #include <sys/vnode.h>
64 #include <sys/namei.h>
65 #include <sys/lockf.h>
66 #include <sys/event.h>
67 #include <sys/conf.h>
68 #include <sys/file.h>
69 #include <sys/vmmeter.h>
70 #include <sys/vfsops.h>
71 #include <sys/malloc.h>
72 #include <sys/uio.h>
73 #include <sys/jail.h>
74
75 #include <vm/vm.h>
76 #include <vm/vm_param.h>
77 #include <vm/vm_extern.h>
78 #include <vm/vm_object.h>
79 #include <vm/vm_page2.h>
80 #include <vm/vm_pager.h>
81 #include <vm/vnode_pager.h>
82
83 #include <vfs/ufs/dir.h>
84 #include <vfs/fifofs/fifo.h>
85
86 #include <vfs/ext2fs/fs.h>
87 #include <vfs/ext2fs/inode.h>
88 #include <vfs/ext2fs/ext2fs.h>
89 #include <vfs/ext2fs/ext2_extern.h>
90 #include <vfs/ext2fs/ext2_dinode.h>
91 #include <vfs/ext2fs/ext2_dir.h>
92 #include <vfs/ext2fs/ext2_mount.h>
93 #include <vfs/ext2fs/ext2_extents.h>
94
95 SDT_PROVIDER_DECLARE(ext2fs);
96 /*
97 * ext2fs trace probe:
98 * arg0: verbosity. Higher numbers give more verbose messages
99 * arg1: Textual message
100 */
101 SDT_PROBE_DEFINE2(ext2fs, , vnops, trace, "int", "char*");
102
103 static int ext2_makeinode(int mode, struct vnode *, struct vnode **, struct componentname *);
104
105 static int ext2_chmod(struct vnode *, int, struct ucred *, struct thread *);
106 static int ext2_chown(struct vnode *, uid_t, gid_t, struct ucred *,
107 struct thread *);
108
109 /*
110 * A virgin directory (no blushing please).
111 * Note that the type and namlen fields are reversed relative to ext2.
112 * Also, we don't use `struct odirtemplate', since it would just cause
113 * endianness problems.
114 */
115 static struct dirtemplate mastertemplate = {
116 0, htole16(12), 1, EXT2_FT_DIR, ".",
117 0, htole16(DIRBLKSIZ - 12), 2, EXT2_FT_DIR, ".."
118 };
119 static struct dirtemplate omastertemplate = {
120 0, htole16(12), 1, EXT2_FT_UNKNOWN, ".",
121 0, htole16(DIRBLKSIZ - 12), 2, EXT2_FT_UNKNOWN, ".."
122 };
123
124 void
ext2_itimes(struct vnode * vp)125 ext2_itimes(struct vnode *vp)
126 {
127 struct inode *ip;
128 struct timespec ts;
129
130 ip = VTOI(vp);
131 if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0)
132 return;
133 if ((vp->v_type == VBLK || vp->v_type == VCHR))
134 ip->i_flag |= IN_LAZYMOD;
135 else
136 ip->i_flag |= IN_MODIFIED;
137 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
138 vfs_timestamp(&ts);
139 if (ip->i_flag & IN_ACCESS) {
140 ip->i_atime = ts.tv_sec;
141 ip->i_atimensec = ts.tv_nsec;
142 }
143 if (ip->i_flag & IN_UPDATE) {
144 ip->i_mtime = ts.tv_sec;
145 ip->i_mtimensec = ts.tv_nsec;
146 ip->i_modrev++;
147 }
148 if (ip->i_flag & IN_CHANGE) {
149 ip->i_ctime = ts.tv_sec;
150 ip->i_ctimensec = ts.tv_nsec;
151 }
152 }
153 ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE);
154 }
155
156 /*
157 * Create a regular file
158 */
159 static int
ext2_create(struct vop_old_create_args * ap)160 ext2_create(struct vop_old_create_args *ap)
161 {
162 int error;
163
164 error =
165 ext2_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode),
166 ap->a_dvp, ap->a_vpp, ap->a_cnp);
167 if (error != 0)
168 return (error);
169 return (0);
170 }
171
172 static int
ext2_open(struct vop_open_args * ap)173 ext2_open(struct vop_open_args *ap)
174 {
175
176 if (ap->a_vp->v_type == VBLK || ap->a_vp->v_type == VCHR)
177 return (EOPNOTSUPP);
178
179 /*
180 * Files marked append-only must be opened for appending.
181 */
182 if ((VTOI(ap->a_vp)->i_flags & APPEND) &&
183 (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE)
184 return (EPERM);
185
186 return (vop_stdopen(ap));
187 }
188
189 /*
190 * Close called.
191 *
192 * Update the times on the inode.
193 */
194 static int
ext2_close(struct vop_close_args * ap)195 ext2_close(struct vop_close_args *ap)
196 {
197 struct vnode *vp = ap->a_vp;
198
199 if (VREFCNT(vp) > 1)
200 ext2_itimes(vp);
201 return (vop_stdclose(ap));
202 }
203
204 static int
ext2_access(struct vop_access_args * ap)205 ext2_access(struct vop_access_args *ap)
206 {
207 struct vnode *vp = ap->a_vp;
208 struct inode *ip = VTOI(vp);
209 int error;
210
211 if (vp->v_type == VBLK || vp->v_type == VCHR)
212 return (EOPNOTSUPP);
213
214 error = vop_helper_access(ap, ip->i_uid, ip->i_gid, ip->i_mode,
215 ip->i_flags);
216 return (error);
217 }
218
219 static int
ext2_getattr(struct vop_getattr_args * ap)220 ext2_getattr(struct vop_getattr_args *ap)
221 {
222 struct vnode *vp = ap->a_vp;
223 struct inode *ip = VTOI(vp);
224 struct vattr *vap = ap->a_vap;
225
226 ext2_itimes(vp);
227 /*
228 * Copy from inode table
229 */
230 vap->va_fsid = devid_from_dev(ip->i_dev);
231 vap->va_fileid = ip->i_number;
232 vap->va_mode = ip->i_mode & ~IFMT;
233 vap->va_nlink = ip->i_nlink;
234 vap->va_uid = ip->i_uid;
235 vap->va_gid = ip->i_gid;
236 vap->va_size = ip->i_size;
237 vap->va_atime.tv_sec = ip->i_atime;
238 vap->va_atime.tv_nsec = E2DI_HAS_XTIME(ip) ? ip->i_atimensec : 0;
239 vap->va_mtime.tv_sec = ip->i_mtime;
240 vap->va_mtime.tv_nsec = E2DI_HAS_XTIME(ip) ? ip->i_mtimensec : 0;
241 vap->va_ctime.tv_sec = ip->i_ctime;
242 vap->va_ctime.tv_nsec = E2DI_HAS_XTIME(ip) ? ip->i_ctimensec : 0;
243 vap->va_flags = ip->i_flags;
244 vap->va_gen = ip->i_gen;
245 vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize;
246 vap->va_bytes = dbtob((u_quad_t)ip->i_blocks);
247 vap->va_type = IFTOVT(ip->i_mode);
248 vap->va_filerev = ip->i_modrev;
249 return (0);
250 }
251
252 /*
253 * Set attribute vnode op. called from several syscalls
254 */
255 static int
ext2_setattr(struct vop_setattr_args * ap)256 ext2_setattr(struct vop_setattr_args *ap)
257 {
258 struct vattr *vap = ap->a_vap;
259 struct vnode *vp = ap->a_vp;
260 struct inode *ip = VTOI(vp);
261 struct ucred *cred = ap->a_cred;
262 struct thread *td = curthread;
263 int error;
264
265 /*
266 * Check for unsettable attributes.
267 */
268 if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
269 (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
270 (vap->va_blocksize != VNOVAL) || (vap->va_rmajor != VNOVAL) ||
271 ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
272 return (EINVAL);
273 }
274 if (vap->va_flags != VNOVAL) {
275 /* Disallow flags not supported by ext2fs. */
276 if (vap->va_flags & ~(SF_APPEND | SF_IMMUTABLE | UF_NODUMP))
277 return (EOPNOTSUPP);
278
279 if (vp->v_mount->mnt_flag & MNT_RDONLY)
280 return (EROFS);
281 if (cred->cr_uid != ip->i_uid &&
282 (error = caps_priv_check(cred, SYSCAP_NOVFS_SETATTR)))
283 {
284 return (error);
285 }
286
287 /*
288 * Note that a root chflags becomes a user chflags when
289 * we are jailed, unless the jail vfs_chflags sysctl
290 * is set.
291 */
292 if (cred->cr_uid == 0 &&
293 (!jailed(cred) || PRISON_CAP_ISSET(cred->cr_prison->pr_caps,
294 PRISON_CAP_VFS_CHFLAGS))) {
295 if ((ip->i_flags
296 & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) &&
297 securelevel > 0)
298 return (EPERM);
299 ip->i_flags = vap->va_flags;
300 } else {
301 if (ip->i_flags
302 & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) ||
303 (vap->va_flags & UF_SETTABLE) != vap->va_flags)
304 return (EPERM);
305 ip->i_flags &= SF_SETTABLE;
306 ip->i_flags |= (vap->va_flags & UF_SETTABLE);
307 }
308 ip->i_flag |= IN_CHANGE;
309 if (vap->va_flags & (IMMUTABLE | APPEND))
310 return (0);
311 }
312 if (ip->i_flags & (IMMUTABLE | APPEND))
313 return (EPERM);
314 /*
315 * Go through the fields and update iff not VNOVAL.
316 */
317 if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
318 if (vp->v_mount->mnt_flag & MNT_RDONLY)
319 return (EROFS);
320 if ((error = ext2_chown(vp, vap->va_uid, vap->va_gid, cred,
321 td)) != 0)
322 return (error);
323 }
324 if (vap->va_size != VNOVAL) {
325 /*
326 * Disallow write attempts on read-only file systems;
327 * unless the file is a socket, fifo, or a block or
328 * character device resident on the file system.
329 */
330 switch (vp->v_type) {
331 case VDIR:
332 return (EISDIR);
333 case VLNK:
334 case VREG:
335 if (vp->v_mount->mnt_flag & MNT_RDONLY)
336 return (EROFS);
337 break;
338 default:
339 break;
340 }
341 if ((error = ext2_truncate(vp, vap->va_size, 0, cred)) != 0)
342 return (error);
343 }
344 if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
345 if (vp->v_mount->mnt_flag & MNT_RDONLY)
346 return (EROFS);
347 if (cred->cr_uid != ip->i_uid &&
348 (error = caps_priv_check(cred, SYSCAP_NOVFS_SETATTR)) &&
349 ((vap->va_vaflags & VA_UTIMES_NULL) == 0 ||
350 (error = VOP_EACCESS(vp, VWRITE, cred))))
351 {
352 return (error);
353 }
354 ip->i_flag |= IN_CHANGE | IN_MODIFIED;
355 if (vap->va_atime.tv_sec != VNOVAL) {
356 ip->i_flag &= ~IN_ACCESS;
357 ip->i_atime = vap->va_atime.tv_sec;
358 ip->i_atimensec = vap->va_atime.tv_nsec;
359 }
360 if (vap->va_mtime.tv_sec != VNOVAL) {
361 ip->i_flag &= ~IN_UPDATE;
362 ip->i_mtime = vap->va_mtime.tv_sec;
363 ip->i_mtimensec = vap->va_mtime.tv_nsec;
364 }
365 error = ext2_update(vp, 0);
366 if (error)
367 return (error);
368 }
369 error = 0;
370 if (vap->va_mode != (mode_t)VNOVAL) {
371 if (vp->v_mount->mnt_flag & MNT_RDONLY)
372 return (EROFS);
373 error = ext2_chmod(vp, (int)vap->va_mode, cred, td);
374 }
375 return (error);
376 }
377
378 /*
379 * Change the mode on a file.
380 * Inode must be locked before calling.
381 */
382 static int
ext2_chmod(struct vnode * vp,int mode,struct ucred * cred,struct thread * td)383 ext2_chmod(struct vnode *vp, int mode, struct ucred *cred, struct thread *td)
384 {
385 struct inode *ip = VTOI(vp);
386 int error;
387
388 if (cred->cr_uid != ip->i_uid) {
389 error = caps_priv_check(cred, SYSCAP_NOVFS_CHMOD);
390 if (error)
391 return (error);
392 }
393 if (cred->cr_uid) {
394 if (vp->v_type != VDIR && (mode & S_ISTXT))
395 return (EFTYPE);
396 if (!groupmember(ip->i_gid, cred) && (mode & ISGID))
397 return (EPERM);
398 }
399 ip->i_mode &= ~ALLPERMS;
400 ip->i_mode |= (mode & ALLPERMS);
401 ip->i_flag |= IN_CHANGE;
402 return (0);
403 }
404
405 /*
406 * Perform chown operation on inode ip;
407 * inode must be locked prior to call.
408 */
409 static int
ext2_chown(struct vnode * vp,uid_t uid,gid_t gid,struct ucred * cred,struct thread * td)410 ext2_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred,
411 struct thread *td)
412 {
413 struct inode *ip = VTOI(vp);
414 uid_t ouid;
415 gid_t ogid;
416 int error = 0;
417
418 if (uid == (uid_t)VNOVAL)
419 uid = ip->i_uid;
420 if (gid == (gid_t)VNOVAL)
421 gid = ip->i_gid;
422 /*
423 * If we don't own the file, are trying to change the owner
424 * of the file, or are not a member of the target group,
425 * the caller must be superuser or the call fails.
426 */
427 if ((cred->cr_uid != ip->i_uid || uid != ip->i_uid ||
428 (gid != ip->i_gid && !(cred->cr_gid == gid ||
429 groupmember(gid, cred)))) &&
430 (error = caps_priv_check(cred, SYSCAP_NOVFS_CHOWN)))
431 {
432 return (error);
433 }
434
435 ogid = ip->i_gid;
436 ouid = ip->i_uid;
437 ip->i_gid = gid;
438 ip->i_uid = uid;
439 ip->i_flag |= IN_CHANGE;
440 if ((ip->i_mode & (ISUID | ISGID)) && (ouid != uid || ogid != gid)) {
441 if (caps_priv_check(cred, SYSCAP_NOVFS_RETAINSUGID) != 0)
442 ip->i_mode &= ~(ISUID | ISGID);
443 }
444 return (0);
445 }
446
447 struct ext2_fsync_bp_info {
448 struct vnode *vp;
449 int waitfor;
450 };
451
452 static int
ext2_fsync_bp(struct buf * bp,void * data)453 ext2_fsync_bp(struct buf *bp, void *data)
454 {
455 struct ext2_fsync_bp_info *info = data;
456
457 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT))
458 return (0);
459 if ((bp->b_flags & B_DELWRI) == 0)
460 panic("ext2_fsync: not dirty");
461 bremfree(bp);
462
463 /*
464 * Wait for I/O associated with indirect blocks to complete,
465 * since there is no way to quickly wait for them below.
466 */
467 if (bp->b_vp == info->vp || (info->waitfor & MNT_NOWAIT))
468 bawrite(bp);
469 else
470 bwrite(bp);
471 return (1);
472 }
473
474 /*
475 * Synch an open file.
476 */
477 /* ARGSUSED */
478 static int
ext2_fsync(struct vop_fsync_args * ap)479 ext2_fsync(struct vop_fsync_args *ap)
480 {
481 struct ext2_fsync_bp_info info;
482 struct vnode *vp = ap->a_vp;
483 int count;
484
485 /*
486 * XXX why is all this fs specific?
487 */
488
489 /*
490 * Flush all dirty buffers associated with a vnode.
491 */
492 lwkt_gettoken(&vp->v_token);
493 info.vp = vp;
494 loop:
495 info.waitfor = ap->a_waitfor;
496 count = RB_SCAN(buf_rb_tree, &vp->v_rbdirty_tree, NULL, ext2_fsync_bp,
497 &info);
498 if (count)
499 goto loop;
500
501 if (ap->a_waitfor == MNT_WAIT) {
502 bio_track_wait(&vp->v_track_write, 0, 0);
503 #ifdef DIAGNOSTIC
504 if (!RB_EMPTY(&vp->v_rbdirty_tree)) {
505 vprint("ext2_fsync: dirty", vp);
506 goto loop;
507 }
508 #endif
509 }
510 lwkt_reltoken(&vp->v_token);
511
512 return (ext2_update(ap->a_vp, ap->a_waitfor == MNT_WAIT));
513 }
514
515 /*
516 * Mknod vnode call
517 */
518 /* ARGSUSED */
519 static int
ext2_mknod(struct vop_old_mknod_args * ap)520 ext2_mknod(struct vop_old_mknod_args *ap)
521 {
522 struct vattr *vap = ap->a_vap;
523 struct vnode **vpp = ap->a_vpp;
524 struct inode *ip;
525 ino_t ino;
526 int error;
527
528 if (vap->va_rmajor != VNOVAL &&
529 makeudev(vap->va_rmajor, vap->va_rminor) == NOUDEV) {
530 return (EINVAL);
531 }
532
533 error = ext2_makeinode(MAKEIMODE(vap->va_type, vap->va_mode),
534 ap->a_dvp, vpp, ap->a_cnp);
535 if (error)
536 return (error);
537 ip = VTOI(*vpp);
538 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
539 if (vap->va_rmajor != VNOVAL) {
540 /*
541 * Want to be able to use this to make badblock
542 * inodes, so don't truncate the dev number.
543 */
544 ip->i_rdev = makeudev(vap->va_rmajor, vap->va_rminor);
545 }
546 /*
547 * Remove inode, then reload it through VFS_VGET so it is
548 * checked to see if it is an alias of an existing entry in
549 * the inode cache. XXX I don't believe this is necessary now.
550 */
551 (*vpp)->v_type = VNON;
552 ino = ip->i_number; /* Save this before vgone() invalidates ip. */
553 vgone_vxlocked(*vpp);
554 vput(*vpp);
555 error = VFS_VGET(ap->a_dvp->v_mount, NULL, ino, vpp);
556 if (error) {
557 *vpp = NULL;
558 return (error);
559 }
560 return (0);
561 }
562
563 static int
ext2_remove(struct vop_old_remove_args * ap)564 ext2_remove(struct vop_old_remove_args *ap)
565 {
566 struct inode *ip;
567 struct vnode *vp = ap->a_vp;
568 struct vnode *dvp = ap->a_dvp;
569 int error;
570
571 ip = VTOI(vp);
572 if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
573 (VTOI(dvp)->i_flags & APPEND)) {
574 error = EPERM;
575 goto out;
576 }
577 error = ext2_dirremove(dvp, ap->a_cnp);
578 if (error == 0) {
579 ip->i_nlink--;
580 ip->i_flag |= IN_CHANGE;
581 }
582 out:
583 return (error);
584 }
585
586 /*
587 * link vnode call
588 */
589 static int
ext2_link(struct vop_old_link_args * ap)590 ext2_link(struct vop_old_link_args *ap)
591 {
592 struct vnode *vp = ap->a_vp;
593 struct vnode *tdvp = ap->a_tdvp;
594 struct componentname *cnp = ap->a_cnp;
595 struct inode *ip;
596 int error;
597
598 if (tdvp->v_mount != vp->v_mount) {
599 error = EXDEV;
600 goto out2;
601 }
602 if (tdvp != vp) {
603 error = vn_lock(vp, LK_EXCLUSIVE | LK_FAILRECLAIM);
604 if (error)
605 goto out2;
606 }
607 ip = VTOI(vp);
608 if ((nlink_t)ip->i_nlink >= EXT4_LINK_MAX) {
609 error = EMLINK;
610 goto out;
611 }
612 if (ip->i_flags & (IMMUTABLE | APPEND)) {
613 error = EPERM;
614 goto out;
615 }
616 ip->i_nlink++;
617 ip->i_flag |= IN_CHANGE;
618 error = ext2_update(vp, !DOINGASYNC(vp));
619 if (!error)
620 error = ext2_direnter(ip, tdvp, cnp);
621 if (error) {
622 ip->i_nlink--;
623 ip->i_flag |= IN_CHANGE;
624 }
625 out:
626 if (tdvp != vp)
627 vn_unlock(vp);
628 out2:
629 return (error);
630 }
631
632 static int
ext2_inc_nlink(struct inode * ip)633 ext2_inc_nlink(struct inode *ip)
634 {
635
636 ip->i_nlink++;
637
638 if (S_ISDIR(ip->i_mode) &&
639 EXT2_HAS_RO_COMPAT_FEATURE(ip->i_e2fs, EXT2F_ROCOMPAT_DIR_NLINK) &&
640 ip->i_nlink > 1) {
641 if (ip->i_nlink >= EXT4_LINK_MAX || ip->i_nlink == 2)
642 ip->i_nlink = 1;
643 } else if (ip->i_nlink > EXT4_LINK_MAX) {
644 ip->i_nlink--;
645 return (EMLINK);
646 }
647
648 return (0);
649 }
650
651 static void
ext2_dec_nlink(struct inode * ip)652 ext2_dec_nlink(struct inode *ip)
653 {
654
655 if (!S_ISDIR(ip->i_mode) || ip->i_nlink > 2)
656 ip->i_nlink--;
657 }
658
659 /*
660 * Rename system call.
661 * rename("foo", "bar");
662 * is essentially
663 * unlink("bar");
664 * link("foo", "bar");
665 * unlink("foo");
666 * but ``atomically''. Can't do full commit without saving state in the
667 * inode on disk which isn't feasible at this time. Best we can do is
668 * always guarantee the target exists.
669 *
670 * Basic algorithm is:
671 *
672 * 1) Bump link count on source while we're linking it to the
673 * target. This also ensure the inode won't be deleted out
674 * from underneath us while we work (it may be truncated by
675 * a concurrent `trunc' or `open' for creation).
676 * 2) Link source to destination. If destination already exists,
677 * delete it first.
678 * 3) Unlink source reference to inode if still around. If a
679 * directory was moved and the parent of the destination
680 * is different from the source, patch the ".." entry in the
681 * directory.
682 */
683 static int
ext2_rename(struct vop_old_rename_args * ap)684 ext2_rename(struct vop_old_rename_args *ap)
685 {
686 struct vnode *tvp = ap->a_tvp;
687 struct vnode *tdvp = ap->a_tdvp;
688 struct vnode *fvp = ap->a_fvp;
689 struct vnode *fdvp = ap->a_fdvp;
690 struct componentname *tcnp = ap->a_tcnp;
691 struct componentname *fcnp = ap->a_fcnp;
692 struct inode *ip, *xp, *dp;
693 struct dirtemplate *dirbuf;
694 int doingdirectory = 0, oldparent = 0, newparent = 0;
695 int error = 0;
696 u_char namlen;
697
698 /*
699 * Check for cross-device rename.
700 */
701 if ((fvp->v_mount != tdvp->v_mount) ||
702 (tvp && (fvp->v_mount != tvp->v_mount))) {
703 error = EXDEV;
704 abortit:
705 if (tdvp == tvp)
706 vrele(tdvp);
707 else
708 vput(tdvp);
709 if (tvp)
710 vput(tvp);
711 vrele(fdvp);
712 vrele(fvp);
713 return (error);
714 }
715
716 if (tvp && ((VTOI(tvp)->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
717 (VTOI(tdvp)->i_flags & APPEND))) {
718 error = EPERM;
719 goto abortit;
720 }
721
722 /*
723 * Renaming a file to itself has no effect. The upper layers should
724 * not call us in that case. Temporarily just warn if they do.
725 */
726 if (fvp == tvp) {
727 SDT_PROBE2(ext2fs, , vnops, trace, 1,
728 "rename: fvp == tvp (can't happen)");
729 error = 0;
730 goto abortit;
731 }
732
733 if ((error = vn_lock(fvp, LK_EXCLUSIVE | LK_FAILRECLAIM)) != 0)
734 goto abortit;
735 dp = VTOI(fdvp);
736 ip = VTOI(fvp);
737 if (ip->i_nlink >= EXT4_LINK_MAX &&
738 !EXT2_HAS_RO_COMPAT_FEATURE(ip->i_e2fs, EXT2F_ROCOMPAT_DIR_NLINK)) {
739 vn_unlock(fvp);
740 error = EMLINK;
741 goto abortit;
742 }
743 if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))
744 || (dp->i_flags & APPEND)) {
745 vn_unlock(fvp);
746 error = EPERM;
747 goto abortit;
748 }
749 if ((ip->i_mode & IFMT) == IFDIR) {
750 /*
751 * Avoid ".", "..", and aliases of "." for obvious reasons.
752 */
753 if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
754 dp == ip || (fcnp->cn_flags | tcnp->cn_flags) & CNP_ISDOTDOT ||
755 (ip->i_flag & IN_RENAME)) {
756 vn_unlock(fvp);
757 error = EINVAL;
758 goto abortit;
759 }
760 ip->i_flag |= IN_RENAME;
761 oldparent = dp->i_number;
762 doingdirectory++;
763 }
764 //vrele(fdvp); XXX
765
766 /*
767 * When the target exists, both the directory
768 * and target vnodes are returned locked.
769 */
770 dp = VTOI(tdvp);
771 xp = NULL;
772 if (tvp)
773 xp = VTOI(tvp);
774
775 /*
776 * 1) Bump link count while we're moving stuff
777 * around. If we crash somewhere before
778 * completing our work, the link count
779 * may be wrong, but correctable.
780 */
781 ext2_inc_nlink(ip);
782 ip->i_flag |= IN_CHANGE;
783 if ((error = ext2_update(fvp, !DOINGASYNC(fvp))) != 0) {
784 vn_unlock(fvp);
785 goto bad;
786 }
787
788 /*
789 * If ".." must be changed (ie the directory gets a new
790 * parent) then the source directory must not be in the
791 * directory hierarchy above the target, as this would
792 * orphan everything below the source directory. Also
793 * the user must have write permission in the source so
794 * as to be able to change "..". We must repeat the call
795 * to namei, as the parent directory is unlocked by the
796 * call to checkpath().
797 */
798 error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred);
799 vn_unlock(fvp);
800
801 /*
802 * tvp (if not NULL) and tdvp are locked. fvp and fdvp are not.
803 * dp and xp are set according to tdvp and tvp.
804 */
805 if (oldparent != dp->i_number)
806 newparent = dp->i_number;
807 if (doingdirectory && newparent) {
808 if (error) /* write access check above */
809 goto bad;
810
811 /*
812 * Prepare for relookup, get rid of xp
813 */
814 if (xp != NULL) {
815 vput(tvp);
816 xp = NULL;
817 }
818
819 /*
820 * checkpath vput()'s tdvp (VTOI(dp)) on return no matter what,
821 * get an extra ref so we wind up with just an unlocked, ref'd
822 * tdvp. The 'out' target skips xp and tdvp cleanups. Our
823 * tdvp is now unlocked so we have to clean it up ourselves.
824 */
825 vref(tdvp);
826 error = ext2_checkpath(ip, dp, tcnp->cn_cred);
827 tcnp->cn_flags |= CNP_PDIRUNLOCK;
828 if (error) {
829 vrele(tdvp);
830 goto out;
831 }
832 /*
833 * relookup no longer messes with the ref count. An unlocked
834 * tdvp must be passed and if no error occurs a locked tdvp
835 * will be returned. We have to use the out target again.
836 */
837 error = relookup(tdvp, &tvp, tcnp);
838 if (error) {
839 if (tcnp->cn_flags & CNP_PDIRUNLOCK)
840 vrele(tdvp);
841 else
842 vput(tdvp);
843 goto out;
844 }
845
846 /*
847 * tdvp is locked at this point. in the RENAME case tvp may
848 * be NULL without an error, assign xp accordingly. The
849 * 'bad' target can be used again after this.
850 */
851 dp = VTOI(tdvp);
852 if (tvp)
853 xp = VTOI(tvp);
854 }
855
856 /*
857 * 2) If target doesn't exist, link the target
858 * to the source and unlink the source.
859 * Otherwise, rewrite the target directory
860 * entry to reference the source inode and
861 * expunge the original entry's existence.
862 */
863 if (xp == NULL) {
864 if (dp->i_devvp != ip->i_devvp)
865 panic("ext2_rename: EXDEV");
866 /*
867 * Account for ".." in new directory.
868 * When source and destination have the same
869 * parent we don't fool with the link count.
870 */
871 if (doingdirectory && newparent) {
872 if ((nlink_t)dp->i_nlink >= LINK_MAX) {
873 error = EMLINK;
874 goto bad;
875 }
876 error = ext2_inc_nlink(dp);
877 if (error)
878 goto bad;
879
880 dp->i_flag |= IN_CHANGE;
881 error = ext2_update(tdvp, !DOINGASYNC(tdvp));
882 if (error)
883 goto bad;
884 }
885 error = ext2_direnter(ip, tdvp, tcnp);
886 if (error) {
887 if (doingdirectory && newparent) {
888 ext2_dec_nlink(dp);
889 dp->i_flag |= IN_CHANGE;
890 (void)ext2_update(tdvp, 1);
891 }
892 goto bad;
893 }
894 vput(tdvp);
895 } else {
896 if (xp->i_devvp != dp->i_devvp || xp->i_devvp != ip->i_devvp)
897 panic("ext2_rename: EXDEV");
898 /*
899 * Short circuit rename(foo, foo).
900 */
901 if (xp->i_number == ip->i_number)
902 panic("ext2_rename: same file");
903 /*
904 * If the parent directory is "sticky", then the user must
905 * own the parent directory, or the destination of the rename,
906 * otherwise the destination may not be changed (except by
907 * root). This implements append-only directories.
908 */
909 if ((dp->i_mode & S_ISTXT) && tcnp->cn_cred->cr_uid != 0 &&
910 tcnp->cn_cred->cr_uid != dp->i_uid &&
911 xp->i_uid != tcnp->cn_cred->cr_uid) {
912 error = EPERM;
913 goto bad;
914 }
915 /*
916 * Target must be empty if a directory and have no links
917 * to it. Also, ensure source and target are compatible
918 * (both directories, or both not directories).
919 */
920 if ((xp->i_mode & IFMT) == IFDIR) {
921 if (!ext2_dirempty(xp, dp->i_number, tcnp->cn_cred)) {
922 error = ENOTEMPTY;
923 goto bad;
924 }
925 if (!doingdirectory) {
926 error = ENOTDIR;
927 goto bad;
928 }
929 } else if (doingdirectory) {
930 error = EISDIR;
931 goto bad;
932 }
933 error = ext2_dirrewrite(dp, ip, tcnp);
934 if (error)
935 goto bad;
936 /*
937 * If the target directory is in the same
938 * directory as the source directory,
939 * decrement the link count on the parent
940 * of the target directory.
941 */
942 if (doingdirectory && !newparent) {
943 ext2_dec_nlink(dp);
944 dp->i_flag |= IN_CHANGE;
945 }
946 vput(tdvp);
947 /*
948 * Adjust the link count of the target to
949 * reflect the dirrewrite above. If this is
950 * a directory it is empty and there are
951 * no links to it, so we can squash the inode and
952 * any space associated with it. We disallowed
953 * renaming over top of a directory with links to
954 * it above, as the remaining link would point to
955 * a directory without "." or ".." entries.
956 */
957 ext2_dec_nlink(xp);
958 if (doingdirectory) {
959 if (xp->i_nlink > 2)
960 panic("ext2_rename: linked directory");
961 error = ext2_truncate(tvp, (off_t)0, IO_SYNC,
962 tcnp->cn_cred);
963 xp->i_nlink = 0;
964 }
965 xp->i_flag |= IN_CHANGE;
966 vput(tvp);
967 xp = NULL;
968 }
969
970 /*
971 * 3) Unlink the source.
972 */
973 fcnp->cn_flags &= ~CNP_MODMASK;
974 fcnp->cn_flags |= CNP_LOCKPARENT;
975 //vref(fdvp); XXX
976 error = relookup(fdvp, &fvp, fcnp);
977 if (error) {
978 /*
979 * From name has disappeared.
980 */
981 if (doingdirectory)
982 panic("ext2_rename: lost dir entry");
983 /* ip->i_flag only sets IN_RENAME if doingdirectory */
984 vrele(ap->a_fvp);
985 if (fcnp->cn_flags & CNP_PDIRUNLOCK)
986 vrele(fdvp);
987 else
988 vput(fdvp);
989 return (0);
990 }
991 KKASSERT((fcnp->cn_flags & CNP_PDIRUNLOCK) == 0);
992
993 /*
994 * This case shouldn't occur
995 */
996 if (fvp == NULL) {
997 /*
998 * From name has disappeared.
999 */
1000 if (doingdirectory)
1001 panic("ext2_rename: lost dir entry");
1002 /* ip->i_flag only sets IN_RENAME if doingdirectory */
1003 vrele(ap->a_fvp);
1004 vput(fvp);
1005 vput(fdvp);
1006 return (0);
1007 }
1008
1009 /*
1010 * fvp and fdvp are both ref'd and locked.
1011 */
1012 xp = VTOI(fvp);
1013 dp = VTOI(fdvp);
1014
1015 /*
1016 * Ensure that the directory entry still exists and has not
1017 * changed while the new name has been entered. If the source is
1018 * a file then the entry may have been unlinked or renamed. In
1019 * either case there is no further work to be done. If the source
1020 * is a directory then it cannot have been rmdir'ed; its link
1021 * count of three would cause a rmdir to fail with ENOTEMPTY.
1022 * The IN_RENAME flag ensures that it cannot be moved by another
1023 * rename.
1024 */
1025 if (xp != ip) {
1026 /*
1027 * From name resolves to a different inode. IN_RENAME is
1028 * not sufficient protection against timing window races
1029 * so we can't panic here.
1030 */
1031 } else {
1032 /*
1033 * If the source is a directory with a
1034 * new parent, the link count of the old
1035 * parent directory must be decremented
1036 * and ".." set to point to the new parent.
1037 */
1038 if (doingdirectory && newparent) {
1039 ext2_dec_nlink(dp);
1040 dp->i_flag |= IN_CHANGE;
1041 dirbuf = malloc(dp->i_e2fs->e2fs_bsize, M_TEMP, M_WAITOK | M_ZERO);
1042 error = vn_rdwr(UIO_READ, fvp, (caddr_t)&dirbuf,
1043 sizeof (struct dirtemplate), (off_t)0,
1044 UIO_SYSSPACE, IO_NODELOCKED,
1045 tcnp->cn_cred, NULL);
1046 if (error == 0) {
1047 /* Like ufs little-endian: */
1048 namlen = dirbuf->dotdot_type;
1049 if (namlen != 2 ||
1050 dirbuf->dotdot_name[0] != '.' ||
1051 dirbuf->dotdot_name[1] != '.') {
1052 ext2_dirbad(xp, (doff_t)12,
1053 "rename: mangled dir");
1054 } else {
1055 dirbuf->dotdot_ino = htole32(newparent);
1056 /*
1057 * dirblock 0 could be htree root,
1058 * try both csum update functions.
1059 */
1060 ext2_dirent_csum_set(ip,
1061 (struct ext2fs_direct_2 *)dirbuf);
1062 ext2_dx_csum_set(ip,
1063 (struct ext2fs_direct_2 *)dirbuf);
1064 vn_rdwr(UIO_WRITE, fvp,
1065 (caddr_t)&dirbuf,
1066 sizeof (struct dirtemplate),
1067 (off_t)0, UIO_SYSSPACE,
1068 IO_NODELOCKED | IO_SYNC,
1069 tcnp->cn_cred, NULL);
1070 }
1071 }
1072 free(dirbuf, M_TEMP);
1073 }
1074 error = ext2_dirremove(fdvp, fcnp);
1075 if (!error) {
1076 ext2_dec_nlink(xp);
1077 xp->i_flag |= IN_CHANGE;
1078 }
1079 xp->i_flag &= ~IN_RENAME;
1080 }
1081 if (dp)
1082 vput(fdvp);
1083 if (xp)
1084 vput(fvp);
1085 vrele(ap->a_fvp);
1086 return (error);
1087
1088 bad:
1089 if (xp)
1090 vput(ITOV(xp));
1091 vput(ITOV(dp));
1092 out:
1093 if (doingdirectory)
1094 ip->i_flag &= ~IN_RENAME;
1095 if (vn_lock(fvp, LK_EXCLUSIVE) == 0) {
1096 ext2_dec_nlink(ip);
1097 ip->i_flag |= IN_CHANGE;
1098 ip->i_flag &= ~IN_RENAME;
1099 vput(fvp);
1100 } else
1101 vrele(fvp);
1102 return (error);
1103 }
1104
1105 /*
1106 * Mkdir system call
1107 */
1108 static int
ext2_mkdir(struct vop_old_mkdir_args * ap)1109 ext2_mkdir(struct vop_old_mkdir_args *ap)
1110 {
1111 struct m_ext2fs *fs;
1112 struct vnode *dvp = ap->a_dvp;
1113 struct vattr *vap = ap->a_vap;
1114 struct componentname *cnp = ap->a_cnp;
1115 struct inode *ip, *dp;
1116 struct vnode *tvp;
1117 struct dirtemplate dirtemplate, *dtp;
1118 char *buf = NULL;
1119 int error, dmode;
1120
1121 dp = VTOI(dvp);
1122 if ((nlink_t)dp->i_nlink >= EXT4_LINK_MAX &&
1123 !EXT2_HAS_RO_COMPAT_FEATURE(dp->i_e2fs, EXT2F_ROCOMPAT_DIR_NLINK)) {
1124 error = EMLINK;
1125 goto out;
1126 }
1127 dmode = vap->va_mode & 0777;
1128 dmode |= IFDIR;
1129 /*
1130 * Must simulate part of ext2_makeinode here to acquire the inode,
1131 * but not have it entered in the parent directory. The entry is
1132 * made later after writing "." and ".." entries.
1133 */
1134 error = ext2_valloc(dvp, dmode, cnp->cn_cred, &tvp);
1135 if (error)
1136 goto out;
1137 ip = VTOI(tvp);
1138 fs = ip->i_e2fs;
1139 ip->i_gid = dp->i_gid;
1140 #ifdef SUIDDIR
1141 {
1142 /*
1143 * if we are hacking owners here, (only do this where told to)
1144 * and we are not giving it TOO root, (would subvert quotas)
1145 * then go ahead and give it to the other user.
1146 * The new directory also inherits the SUID bit.
1147 * If user's UID and dir UID are the same,
1148 * 'give it away' so that the SUID is still forced on.
1149 */
1150 if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) &&
1151 (dp->i_mode & ISUID) && dp->i_uid) {
1152 dmode |= ISUID;
1153 ip->i_uid = dp->i_uid;
1154 } else {
1155 ip->i_uid = cnp->cn_cred->cr_uid;
1156 }
1157 }
1158 #else
1159 ip->i_uid = cnp->cn_cred->cr_uid;
1160 #endif
1161 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
1162 ip->i_mode = dmode;
1163 tvp->v_type = VDIR; /* Rest init'd in getnewvnode(). */
1164 ip->i_nlink = 2;
1165 if (cnp->cn_flags & CNP_ISWHITEOUT)
1166 ip->i_flags |= UF_OPAQUE;
1167 error = ext2_update(tvp, 1);
1168
1169 /*
1170 * The vnode must have a VM object in order to issue buffer cache
1171 * ops on it.
1172 */
1173 vinitvmio(tvp, 0, PAGE_SIZE, -1);
1174
1175 /*
1176 * Bump link count in parent directory
1177 * to reflect work done below. Should
1178 * be done before reference is created
1179 * so reparation is possible if we crash.
1180 */
1181 ext2_inc_nlink(dp);
1182 dp->i_flag |= IN_CHANGE;
1183 error = ext2_update(dvp, !DOINGASYNC(dvp));
1184 if (error)
1185 goto bad;
1186
1187 /* Initialize directory with "." and ".." from static template. */
1188 if (EXT2_HAS_INCOMPAT_FEATURE(ip->i_e2fs,
1189 EXT2F_INCOMPAT_FTYPE))
1190 dtp = &mastertemplate;
1191 else
1192 dtp = &omastertemplate;
1193 dirtemplate = *dtp;
1194 dirtemplate.dot_ino = htole32(ip->i_number);
1195 dirtemplate.dotdot_ino = htole32(dp->i_number);
1196 /*
1197 * note that in ext2 DIRBLKSIZ == blocksize, not DEV_BSIZE so let's
1198 * just redefine it - for this function only
1199 */
1200 #undef DIRBLKSIZ
1201 #define DIRBLKSIZ VTOI(dvp)->i_e2fs->e2fs_bsize
1202 dirtemplate.dotdot_reclen = htole16(DIRBLKSIZ - 12);
1203 buf = malloc(DIRBLKSIZ, M_TEMP, M_WAITOK | M_ZERO);
1204 if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) {
1205 dirtemplate.dotdot_reclen =
1206 htole16(le16toh(dirtemplate.dotdot_reclen) -
1207 sizeof(struct ext2fs_direct_tail));
1208 ext2_init_dirent_tail(EXT2_DIRENT_TAIL(buf, DIRBLKSIZ));
1209 }
1210 memcpy(buf, &dirtemplate, sizeof(dirtemplate));
1211 ext2_dirent_csum_set(ip, (struct ext2fs_direct_2 *)buf);
1212 error = vn_rdwr(UIO_WRITE, tvp, (caddr_t)buf,
1213 DIRBLKSIZ, (off_t)0, UIO_SYSSPACE,
1214 IO_NODELOCKED | IO_SYNC, cnp->cn_cred, NULL);
1215 if (error) {
1216 ext2_dec_nlink(dp);
1217 dp->i_flag |= IN_CHANGE;
1218 goto bad;
1219 }
1220 if (DIRBLKSIZ > VFSTOEXT2(dvp->v_mount)->um_mountp->mnt_stat.f_bsize)
1221 /* XXX should grow with balloc() */
1222 panic("ext2_mkdir: blksize");
1223 else {
1224 ip->i_size = DIRBLKSIZ;
1225 ip->i_flag |= IN_CHANGE;
1226 }
1227
1228 /* Directory set up, now install its entry in the parent directory. */
1229 error = ext2_direnter(ip, dvp, cnp);
1230 if (error) {
1231 ext2_dec_nlink(dp);
1232 dp->i_flag |= IN_CHANGE;
1233 }
1234 bad:
1235 /*
1236 * No need to do an explicit VOP_TRUNCATE here, vrele will do this
1237 * for us because we set the link count to 0.
1238 */
1239 if (error) {
1240 ip->i_nlink = 0;
1241 ip->i_flag |= IN_CHANGE;
1242 vput(tvp);
1243 } else
1244 *ap->a_vpp = tvp;
1245 out:
1246 free(buf, M_TEMP);
1247 return (error);
1248 #undef DIRBLKSIZ
1249 #define DIRBLKSIZ DEV_BSIZE
1250 }
1251
1252 /*
1253 * Rmdir system call.
1254 */
1255 static int
ext2_rmdir(struct vop_old_rmdir_args * ap)1256 ext2_rmdir(struct vop_old_rmdir_args *ap)
1257 {
1258 struct vnode *vp = ap->a_vp;
1259 struct vnode *dvp = ap->a_dvp;
1260 struct componentname *cnp = ap->a_cnp;
1261 struct inode *ip, *dp;
1262 int error;
1263
1264 ip = VTOI(vp);
1265 dp = VTOI(dvp);
1266
1267 /*
1268 * Verify the directory is empty (and valid).
1269 * (Rmdir ".." won't be valid since
1270 * ".." will contain a reference to
1271 * the current directory and thus be
1272 * non-empty.)
1273 */
1274 if (!ext2_dirempty(ip, dp->i_number, cnp->cn_cred)) {
1275 error = ENOTEMPTY;
1276 goto out;
1277 }
1278 if ((dp->i_flags & APPEND)
1279 || (ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))) {
1280 error = EPERM;
1281 goto out;
1282 }
1283 /*
1284 * Delete reference to directory before purging
1285 * inode. If we crash in between, the directory
1286 * will be reattached to lost+found,
1287 */
1288 error = ext2_dirremove(dvp, cnp);
1289 if (error)
1290 goto out;
1291 ext2_dec_nlink(dp);
1292 dp->i_flag |= IN_CHANGE;
1293 vn_unlock(dvp);
1294 /*
1295 * Truncate inode. The only stuff left
1296 * in the directory is "." and "..".
1297 */
1298 ip->i_nlink = 0;
1299 error = ext2_truncate(vp, (off_t)0, IO_SYNC, cnp->cn_cred);
1300 vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
1301 out:
1302 return (error);
1303 }
1304
1305 /*
1306 * symlink -- make a symbolic link
1307 */
1308 static int
ext2_symlink(struct vop_old_symlink_args * ap)1309 ext2_symlink(struct vop_old_symlink_args *ap)
1310 {
1311 struct vnode *vp, **vpp = ap->a_vpp;
1312 struct inode *ip;
1313 int len, error;
1314
1315 error = ext2_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp,
1316 vpp, ap->a_cnp);
1317 if (error)
1318 return (error);
1319 vp = *vpp;
1320 len = strlen(ap->a_target);
1321 if (len < vp->v_mount->mnt_maxsymlinklen) {
1322 ip = VTOI(vp);
1323 bcopy(ap->a_target, (char *)ip->i_shortlink, len);
1324 ip->i_size = len;
1325 ip->i_flag |= IN_CHANGE | IN_UPDATE;
1326 } else {
1327 /*
1328 * Make sure we have a VM object in order to use
1329 * the buffer cache.
1330 */
1331 if (vp->v_object == NULL)
1332 vinitvmio(vp, 0, PAGE_SIZE, -1);
1333
1334 error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0,
1335 UIO_SYSSPACE, IO_NODELOCKED, ap->a_cnp->cn_cred, NULL);
1336 }
1337 if (error)
1338 vput(vp);
1339 return (error);
1340 }
1341
1342 /*
1343 * Return target name of a symbolic link
1344 */
1345 static int
ext2_readlink(struct vop_readlink_args * ap)1346 ext2_readlink(struct vop_readlink_args *ap)
1347 {
1348 struct vnode *vp = ap->a_vp;
1349 struct inode *ip = VTOI(vp);
1350 int isize;
1351
1352 isize = ip->i_size;
1353 if (isize < vp->v_mount->mnt_maxsymlinklen) {
1354 uiomove((char *)ip->i_shortlink, isize, ap->a_uio);
1355 return (0);
1356 }
1357 return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred));
1358 }
1359
1360 /*
1361 * Calculate the logical to physical mapping if not done already,
1362 * then call the device strategy routine.
1363 *
1364 * In order to be able to swap to a file, the ext2_bmaparray() operation may not
1365 * deadlock on memory. See ext2_bmap() for details.
1366 */
1367 static int
ext2_strategy(struct vop_strategy_args * ap)1368 ext2_strategy(struct vop_strategy_args *ap)
1369 {
1370 struct bio *bio = ap->a_bio;
1371 struct bio *nbio;
1372 struct buf *bp = bio->bio_buf;
1373 struct vnode *vp = ap->a_vp;
1374 struct inode *ip;
1375 int error;
1376
1377 ip = VTOI(vp);
1378 if (vp->v_type == VBLK || vp->v_type == VCHR)
1379 panic("ext2_strategy: spec");
1380 nbio = push_bio(bio);
1381 if (nbio->bio_offset == NOOFFSET) {
1382 error = VOP_BMAP(vp, bio->bio_offset, &nbio->bio_offset, NULL,
1383 NULL, bp->b_cmd);
1384 if (error) {
1385 bp->b_error = error;
1386 bp->b_flags |= B_ERROR;
1387 /* I/O was never started on nbio, must biodone(bio) */
1388 biodone(bio);
1389 return (error);
1390 }
1391 if (nbio->bio_offset == NOOFFSET)
1392 vfs_bio_clrbuf(bp);
1393 }
1394 if (nbio->bio_offset == NOOFFSET) {
1395 /* I/O was never started on nbio, must biodone(bio) */
1396 biodone(bio);
1397 return (0);
1398 }
1399 vn_strategy(ip->i_devvp, nbio);
1400 return (0);
1401 }
1402
1403 /*
1404 * Print out the contents of an inode.
1405 */
1406 static int
ext2_print(struct vop_print_args * ap)1407 ext2_print(struct vop_print_args *ap)
1408 {
1409 struct vnode *vp = ap->a_vp;
1410 struct inode *ip = VTOI(vp);
1411
1412 printf("tag VT_EXT2FS, ino %lu, on dev %s (%d, %d)",
1413 (u_long)ip->i_number, devtoname(ip->i_dev), major(ip->i_dev),
1414 minor(ip->i_dev));
1415 if (vp->v_type == VFIFO)
1416 fifo_printinfo(vp);
1417 lockmgr_printinfo(&vp->v_lock);
1418 printf("\n");
1419 return (0);
1420 }
1421
1422 /*
1423 * Read wrapper for fifos.
1424 */
1425 static
1426 int
ext2fifo_read(struct vop_read_args * ap)1427 ext2fifo_read(struct vop_read_args *ap)
1428 {
1429 int error, resid;
1430 struct inode *ip;
1431 struct uio *uio;
1432
1433 uio = ap->a_uio;
1434 resid = uio->uio_resid;
1435 error = VOCALL(&fifo_vnode_vops, &ap->a_head);
1436 ip = VTOI(ap->a_vp);
1437 if ((ap->a_vp->v_mount->mnt_flag & MNT_NOATIME) == 0 && ip != NULL &&
1438 (uio->uio_resid != resid || (error == 0 && resid != 0)))
1439 VTOI(ap->a_vp)->i_flag |= IN_ACCESS;
1440 return (error);
1441 }
1442
1443 /*
1444 * Write wrapper for fifos.
1445 */
1446 static
1447 int
ext2fifo_write(struct vop_write_args * ap)1448 ext2fifo_write(struct vop_write_args *ap)
1449 {
1450 int error, resid;
1451 struct inode *ip;
1452 struct uio *uio;
1453
1454 uio = ap->a_uio;
1455 resid = uio->uio_resid;
1456 error = VOCALL(&fifo_vnode_vops, &ap->a_head);
1457 ip = VTOI(ap->a_vp);
1458 if (ip != NULL && (uio->uio_resid != resid || (error == 0 && resid != 0)))
1459 VTOI(ap->a_vp)->i_flag |= IN_CHANGE | IN_UPDATE;
1460 return (error);
1461 }
1462
1463 /*
1464 * Close wrapper for fifos.
1465 *
1466 * Update the times on the inode then do device close.
1467 */
1468 static int
ext2fifo_close(struct vop_close_args * ap)1469 ext2fifo_close(struct vop_close_args *ap)
1470 {
1471 struct vnode *vp = ap->a_vp;
1472
1473 if (VREFCNT(vp) > 1)
1474 ext2_itimes(vp);
1475 return (VOCALL(&fifo_vnode_vops, &ap->a_head));
1476 }
1477
1478 static void
filt_ext2detach(struct knote * kn)1479 filt_ext2detach(struct knote *kn)
1480 {
1481 struct vnode *vp = (struct vnode *)kn->kn_hook;
1482
1483 lwkt_gettoken(&vp->v_token);
1484 knote_remove(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn);
1485 lwkt_reltoken(&vp->v_token);
1486 }
1487
1488 /*ARGSUSED*/
1489 static int
filt_ext2read(struct knote * kn,long hint)1490 filt_ext2read(struct knote *kn, long hint)
1491 {
1492 struct vnode *vp = (struct vnode *)kn->kn_hook;
1493 struct inode *ip = VTOI(vp);
1494 off_t off;
1495
1496 /*
1497 * filesystem is gone, so set the EOF flag and schedule
1498 * the knote for deletion.
1499 */
1500 if (hint == NOTE_REVOKE) {
1501 kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT);
1502 return (1);
1503 }
1504 off = ip->i_size - kn->kn_fp->f_offset;
1505 kn->kn_data = (off < INTPTR_MAX) ? off : INTPTR_MAX;
1506 if (kn->kn_sfflags & NOTE_OLDAPI)
1507 return (1);
1508 return (kn->kn_data != 0);
1509 }
1510
1511 /*ARGSUSED*/
1512 static int
filt_ext2write(struct knote * kn,long hint)1513 filt_ext2write(struct knote *kn, long hint)
1514 {
1515 /*
1516 * filesystem is gone, so set the EOF flag and schedule
1517 * the knote for deletion.
1518 */
1519 if (hint == NOTE_REVOKE)
1520 kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT);
1521
1522 kn->kn_data = 0;
1523 return (1);
1524 }
1525
1526 static int
filt_ext2vnode(struct knote * kn,long hint)1527 filt_ext2vnode(struct knote *kn, long hint)
1528 {
1529 if (kn->kn_sfflags & hint)
1530 kn->kn_fflags |= hint;
1531 if (hint == NOTE_REVOKE) {
1532 kn->kn_flags |= (EV_EOF | EV_NODATA);
1533 return (1);
1534 }
1535 return (kn->kn_fflags != 0);
1536 }
1537
1538 static struct filterops ext2read_filtops =
1539 { FILTEROP_ISFD | FILTEROP_MPSAFE, NULL, filt_ext2detach, filt_ext2read };
1540 static struct filterops ext2write_filtops =
1541 { FILTEROP_ISFD | FILTEROP_MPSAFE, NULL, filt_ext2detach, filt_ext2write };
1542 static struct filterops ext2vnode_filtops =
1543 { FILTEROP_ISFD | FILTEROP_MPSAFE, NULL, filt_ext2detach, filt_ext2vnode };
1544
1545 static int
ext2_kqfilter(struct vop_kqfilter_args * ap)1546 ext2_kqfilter(struct vop_kqfilter_args *ap)
1547 {
1548 struct vnode *vp = ap->a_vp;
1549 struct knote *kn = ap->a_kn;
1550
1551 switch (kn->kn_filter) {
1552 case EVFILT_READ:
1553 kn->kn_fop = &ext2read_filtops;
1554 break;
1555 case EVFILT_WRITE:
1556 kn->kn_fop = &ext2write_filtops;
1557 break;
1558 case EVFILT_VNODE:
1559 kn->kn_fop = &ext2vnode_filtops;
1560 break;
1561 default:
1562 return (EOPNOTSUPP);
1563 }
1564
1565 kn->kn_hook = (caddr_t)vp;
1566
1567 /* XXX: kq token actually protects the list */
1568 lwkt_gettoken(&vp->v_token);
1569 knote_insert(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn);
1570 lwkt_reltoken(&vp->v_token);
1571
1572 return (0);
1573 }
1574
1575 /*
1576 * Kqfilter wrapper for fifos.
1577 *
1578 * Fall through to ext2 kqfilter routines if needed
1579 */
1580 static int
ext2fifo_kqfilter(struct vop_kqfilter_args * ap)1581 ext2fifo_kqfilter(struct vop_kqfilter_args *ap)
1582 {
1583 int error;
1584
1585 error = VOCALL(&fifo_vnode_vops, &ap->a_head);
1586 if (error)
1587 error = ext2_kqfilter(ap);
1588 return (error);
1589 }
1590
1591 /*
1592 * Return POSIX pathconf information applicable to ext2 filesystems.
1593 */
1594 static int
ext2_pathconf(struct vop_pathconf_args * ap)1595 ext2_pathconf(struct vop_pathconf_args *ap)
1596 {
1597 int error = 0;
1598
1599 switch (ap->a_name) {
1600 case _PC_LINK_MAX:
1601 if (EXT2_HAS_RO_COMPAT_FEATURE(VTOI(ap->a_vp)->i_e2fs,
1602 EXT2F_ROCOMPAT_DIR_NLINK))
1603 *ap->a_retval = INT_MAX;
1604 else
1605 *ap->a_retval = EXT4_LINK_MAX;
1606 break;
1607 case _PC_NAME_MAX:
1608 *ap->a_retval = NAME_MAX;
1609 break;
1610 case _PC_PATH_MAX:
1611 *ap->a_retval = PATH_MAX;
1612 break;
1613 case _PC_PIPE_BUF:
1614 if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO)
1615 *ap->a_retval = PIPE_BUF;
1616 else
1617 error = EINVAL;
1618 break;
1619 case _PC_CHOWN_RESTRICTED:
1620 *ap->a_retval = 1;
1621 break;
1622 case _PC_NO_TRUNC:
1623 *ap->a_retval = 1;
1624 break;
1625 case _PC_MIN_HOLE_SIZE:
1626 *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
1627 break;
1628 case _PC_PRIO_IO:
1629 *ap->a_retval = 0;
1630 break;
1631 case _PC_SYNC_IO:
1632 *ap->a_retval = 0;
1633 break;
1634 case _PC_ALLOC_SIZE_MIN:
1635 *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_bsize;
1636 break;
1637 case _PC_FILESIZEBITS:
1638 *ap->a_retval = 64;
1639 break;
1640 case _PC_REC_INCR_XFER_SIZE:
1641 *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
1642 break;
1643 case _PC_REC_MAX_XFER_SIZE:
1644 *ap->a_retval = -1; /* means ``unlimited'' */
1645 break;
1646 case _PC_REC_MIN_XFER_SIZE:
1647 *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
1648 break;
1649 case _PC_REC_XFER_ALIGN:
1650 *ap->a_retval = PAGE_SIZE;
1651 break;
1652 case _PC_SYMLINK_MAX:
1653 *ap->a_retval = MAXPATHLEN;
1654 break;
1655
1656 default:
1657 error = vop_stdpathconf(ap);
1658 break;
1659 }
1660 return (error);
1661 }
1662
1663 /*
1664 * Initialize the vnode associated with a new inode, handle aliased vnodes.
1665 */
1666 int
ext2_vinit(struct mount * mntp,struct vnode ** vpp)1667 ext2_vinit(struct mount *mntp, struct vnode **vpp)
1668 {
1669 struct inode *ip;
1670 struct vnode *vp;
1671
1672 vp = *vpp;
1673 ip = VTOI(vp);
1674
1675 switch (vp->v_type = IFTOVT(ip->i_mode)) {
1676 case VCHR:
1677 case VBLK:
1678 vp->v_ops = &mntp->mnt_vn_spec_ops;
1679 addaliasu(vp, umajor(ip->i_rdev), uminor(ip->i_rdev));
1680 break;
1681 case VFIFO:
1682 vp->v_ops = &mntp->mnt_vn_fifo_ops;
1683 break;
1684 case VDIR:
1685 case VREG:
1686 vinitvmio(vp, ip->i_size, PAGE_SIZE, -1); /* XXX */
1687 break;
1688 case VLNK:
1689 if ((ip->i_size >= vp->v_mount->mnt_maxsymlinklen) &&
1690 ip->i_blocks != 0) {
1691 vinitvmio(vp, ip->i_size, PAGE_SIZE, -1);
1692 }
1693 break;
1694 default:
1695 break;
1696 }
1697
1698 /*
1699 * Only unallocated inodes should be of type VNON.
1700 */
1701 if (ip->i_mode != 0 && vp->v_type == VNON)
1702 return (EINVAL);
1703
1704 if (ip->i_number == EXT2_ROOTINO)
1705 vp->v_flag |= VROOT;
1706 /*
1707 * Initialize modrev times.
1708 */
1709 ip->i_modrev = init_va_filerev();
1710 *vpp = vp;
1711 return (0);
1712 }
1713
1714 /*
1715 * Allocate a new inode.
1716 */
1717 static int
ext2_makeinode(int mode,struct vnode * dvp,struct vnode ** vpp,struct componentname * cnp)1718 ext2_makeinode(int mode, struct vnode *dvp, struct vnode **vpp,
1719 struct componentname *cnp)
1720 {
1721 struct inode *ip, *pdir;
1722 struct vnode *tvp;
1723 int error;
1724
1725 pdir = VTOI(dvp);
1726 *vpp = NULL;
1727 if ((mode & IFMT) == 0)
1728 mode |= IFREG;
1729
1730 error = ext2_valloc(dvp, mode, cnp->cn_cred, &tvp);
1731 if (error) {
1732 return (error);
1733 }
1734 ip = VTOI(tvp);
1735 ip->i_gid = pdir->i_gid;
1736 #ifdef SUIDDIR
1737 {
1738 /*
1739 * if we are
1740 * not the owner of the directory,
1741 * and we are hacking owners here, (only do this where told to)
1742 * and we are not giving it TOO root, (would subvert quotas)
1743 * then go ahead and give it to the other user.
1744 * Note that this drops off the execute bits for security.
1745 */
1746 if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) &&
1747 (pdir->i_mode & ISUID) &&
1748 (pdir->i_uid != cnp->cn_cred->cr_uid) && pdir->i_uid) {
1749 ip->i_uid = pdir->i_uid;
1750 mode &= ~07111;
1751 } else {
1752 ip->i_uid = cnp->cn_cred->cr_uid;
1753 }
1754 }
1755 #else
1756 ip->i_uid = cnp->cn_cred->cr_uid;
1757 #endif
1758 ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
1759 ip->i_mode = mode;
1760 tvp->v_type = IFTOVT(mode); /* Rest init'd in getnewvnode(). */
1761 ip->i_nlink = 1;
1762 if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred)) {
1763 if (caps_priv_check(cnp->cn_cred, SYSCAP_NOVFS_RETAINSUGID))
1764 ip->i_mode &= ~ISGID;
1765 }
1766
1767 if (cnp->cn_flags & CNP_ISWHITEOUT)
1768 ip->i_flags |= UF_OPAQUE;
1769
1770 /*
1771 * Regular files and directories need VM objects. Softlinks do
1772 * not (not immediately anyway).
1773 */
1774 if (tvp->v_type == VREG || tvp->v_type == VDIR)
1775 vinitvmio(tvp, 0, PAGE_SIZE, -1);
1776
1777 /*
1778 * Make sure inode goes to disk before directory entry.
1779 */
1780 error = ext2_update(tvp, !DOINGASYNC(tvp));
1781 if (error)
1782 goto bad;
1783
1784 error = ext2_direnter(ip, dvp, cnp);
1785 if (error)
1786 goto bad;
1787
1788 *vpp = tvp;
1789 return (0);
1790
1791 bad:
1792 /*
1793 * Write error occurred trying to update the inode
1794 * or the directory so must deallocate the inode.
1795 */
1796 ip->i_nlink = 0;
1797 ip->i_flag |= IN_CHANGE;
1798 vput(tvp);
1799 return (error);
1800 }
1801
1802 /*
1803 * Vnode op for reading.
1804 */
1805 static int
ext2_read(struct vop_read_args * ap)1806 ext2_read(struct vop_read_args *ap)
1807 {
1808 struct vnode *vp;
1809 struct inode *ip;
1810 struct uio *uio;
1811 struct m_ext2fs *fs;
1812 struct buf *bp;
1813 daddr_t lbn;
1814 off_t nextlbn;
1815 off_t nextloffset;
1816 off_t bytesinfile;
1817 long size, xfersize, blkoffset;
1818 int error, orig_resid, seqcount;
1819 int ioflag;
1820
1821 vp = ap->a_vp;
1822 uio = ap->a_uio;
1823 ioflag = ap->a_ioflag;
1824
1825 seqcount = ap->a_ioflag >> IO_SEQSHIFT;
1826 ip = VTOI(vp);
1827
1828 #ifdef INVARIANTS
1829 if (uio->uio_rw != UIO_READ)
1830 panic("%s: mode", "ext2_read");
1831
1832 if (vp->v_type == VLNK) {
1833 if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen)
1834 panic("%s: short symlink", "ext2_read");
1835 } else if (vp->v_type != VREG && vp->v_type != VDIR)
1836 panic("%s: type %d", "ext2_read", vp->v_type);
1837 #endif
1838 orig_resid = uio->uio_resid;
1839 KASSERT(orig_resid >= 0, ("ext2_read: uio->uio_resid < 0"));
1840 if (orig_resid == 0)
1841 return (0);
1842 KASSERT(uio->uio_offset >= 0, ("ext2_read: uio->uio_offset < 0"));
1843 fs = ip->i_e2fs;
1844 if (uio->uio_offset < ip->i_size &&
1845 uio->uio_offset >= fs->e2fs_maxfilesize)
1846 return (EOVERFLOW);
1847
1848 for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
1849 if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0)
1850 break;
1851 lbn = lblkno(fs, uio->uio_offset);
1852 nextlbn = lbn + 1;
1853 nextloffset = lblktodoff(fs, nextlbn);
1854 size = blksize(fs, ip, lbn);
1855 blkoffset = blkoff(fs, uio->uio_offset);
1856
1857 xfersize = fs->e2fs_fsize - blkoffset;
1858 if (uio->uio_resid < xfersize)
1859 xfersize = uio->uio_resid;
1860 if (bytesinfile < xfersize)
1861 xfersize = bytesinfile;
1862
1863 if (nextloffset >= ip->i_size)
1864 error = bread(vp, lblktodoff(fs, lbn), size, &bp);
1865 else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
1866 error = cluster_read(vp, (off_t)ip->i_size,
1867 lblktodoff(fs, lbn), size, uio->uio_resid,
1868 (ap->a_ioflag >> IO_SEQSHIFT) * MAXBSIZE, &bp);
1869 } else if (seqcount > 1) {
1870 u_int nextsize = blksize(fs, ip, nextlbn);
1871
1872 error = breadn(vp, lblktodoff(fs, lbn), size,
1873 &nextloffset, &nextsize, 1, &bp);
1874 } else
1875 error = bread(vp, lblktodoff(fs, lbn), size, &bp);
1876 if (error) {
1877 brelse(bp);
1878 bp = NULL;
1879 break;
1880 }
1881
1882 /*
1883 * We should only get non-zero b_resid when an I/O error
1884 * has occurred, which should cause us to break above.
1885 * However, if the short read did not cause an error,
1886 * then we want to ensure that we do not uiomove bad
1887 * or uninitialized data.
1888 */
1889 size -= bp->b_resid;
1890 if (size < xfersize) {
1891 if (size == 0)
1892 break;
1893 xfersize = size;
1894 }
1895 error = uiomove((char *)bp->b_data + blkoffset,
1896 (int)xfersize, uio);
1897 if (error)
1898 break;
1899 bqrelse(bp);
1900 }
1901
1902 /*
1903 * This can only happen in the case of an error because the loop
1904 * above resets bp to NULL on each iteration and on normal
1905 * completion has not set a new value into it. so it must have come
1906 * from a 'break' statement
1907 */
1908 if (bp != NULL)
1909 bqrelse(bp);
1910
1911 if ((error == 0 || uio->uio_resid != orig_resid) &&
1912 (vp->v_mount->mnt_flag & (MNT_NOATIME | MNT_RDONLY)) == 0)
1913 ip->i_flag |= IN_ACCESS;
1914 return (error);
1915 }
1916
1917 /*
1918 * Vnode op for writing.
1919 */
1920 static int
ext2_write(struct vop_write_args * ap)1921 ext2_write(struct vop_write_args *ap)
1922 {
1923 struct vnode *vp;
1924 struct uio *uio;
1925 struct inode *ip;
1926 struct m_ext2fs *fs;
1927 struct buf *bp;
1928 struct thread *td;
1929 daddr_t lbn;
1930 off_t osize;
1931 int blkoffset, error, flags, ioflag, resid, size, seqcount, xfersize;
1932
1933 ioflag = ap->a_ioflag;
1934 uio = ap->a_uio;
1935 vp = ap->a_vp;
1936
1937 seqcount = ioflag >> IO_SEQSHIFT;
1938 ip = VTOI(vp);
1939
1940 #ifdef INVARIANTS
1941 if (uio->uio_rw != UIO_WRITE)
1942 panic("%s: mode", "ext2_write");
1943 #endif
1944
1945 switch (vp->v_type) {
1946 case VREG:
1947 if (ioflag & IO_APPEND)
1948 uio->uio_offset = ip->i_size;
1949 if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size)
1950 return (EPERM);
1951 /* FALLTHROUGH */
1952 case VLNK:
1953 break;
1954 case VDIR:
1955 /* XXX differs from ffs -- this is called from ext2_mkdir(). */
1956 if ((ioflag & IO_SYNC) == 0)
1957 panic("ext2_write: nonsync dir write");
1958 break;
1959 default:
1960 panic("ext2_write: type %p %d (%jd,%jd)", (void *)vp,
1961 vp->v_type, (intmax_t)uio->uio_offset,
1962 (intmax_t)uio->uio_resid);
1963 }
1964
1965 KASSERT(uio->uio_resid >= 0, ("ext2_write: uio->uio_resid < 0"));
1966 KASSERT(uio->uio_offset >= 0, ("ext2_write: uio->uio_offset < 0"));
1967 fs = ip->i_e2fs;
1968 if ((uoff_t)uio->uio_offset + uio->uio_resid > fs->e2fs_maxfilesize)
1969 return (EFBIG);
1970 /*
1971 * Maybe this should be above the vnode op call, but so long as
1972 * file servers have no limits, I don't think it matters.
1973 */
1974 td = uio->uio_td;
1975 if (vp->v_type == VREG && td && td->td_proc &&
1976 uio->uio_offset + uio->uio_resid >
1977 td->td_proc->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
1978 lwpsignal(td->td_proc, td->td_lwp, SIGXFSZ);
1979 return (EFBIG);
1980 }
1981
1982 resid = uio->uio_resid;
1983 osize = ip->i_size;
1984 if (seqcount > BA_SEQMAX)
1985 flags = BA_SEQMAX << BA_SEQSHIFT;
1986 else
1987 flags = seqcount << BA_SEQSHIFT;
1988 if ((ioflag & IO_SYNC) && !DOINGASYNC(vp))
1989 flags |= IO_SYNC;
1990
1991 for (error = 0; uio->uio_resid > 0;) {
1992 lbn = lblkno(fs, uio->uio_offset);
1993 blkoffset = blkoff(fs, uio->uio_offset);
1994 xfersize = fs->e2fs_fsize - blkoffset;
1995 if (uio->uio_resid < xfersize)
1996 xfersize = uio->uio_resid;
1997 if (uio->uio_offset + xfersize > ip->i_size)
1998 vnode_pager_setsize(vp, uio->uio_offset + xfersize);
1999
2000 /*
2001 * We must perform a read-before-write if the transfer size
2002 * does not cover the entire buffer.
2003 */
2004 if (fs->e2fs_bsize > xfersize)
2005 flags |= BA_CLRBUF;
2006 else
2007 flags &= ~BA_CLRBUF;
2008 error = ext2_balloc(ip, lbn, blkoffset + xfersize,
2009 ap->a_cred, &bp, flags);
2010 if (error != 0)
2011 break;
2012
2013 if ((ioflag & (IO_SYNC | IO_INVAL)) == (IO_SYNC | IO_INVAL))
2014 bp->b_flags |= B_NOCACHE;
2015 if (uio->uio_offset + xfersize > ip->i_size)
2016 ip->i_size = uio->uio_offset + xfersize;
2017 size = blksize(fs, ip, lbn) - bp->b_resid;
2018 if (size < xfersize)
2019 xfersize = size;
2020
2021 error =
2022 uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio);
2023 if ((ioflag & IO_VMIO) &&
2024 LIST_FIRST(&bp->b_dep) == NULL) /* in ext2fs? */
2025 bp->b_flags |= B_RELBUF;
2026 /*
2027 * If the buffer is not already filled and we encounter an
2028 * error while trying to fill it, we have to clear out any
2029 * garbage data from the pages instantiated for the buffer.
2030 * If we do not, a failed uiomove() during a write can leave
2031 * the prior contents of the pages exposed to a userland mmap.
2032 *
2033 * Note that we need only clear buffers with a transfer size
2034 * equal to the block size because buffers with a shorter
2035 * transfer size were cleared above by the call to ext2_balloc()
2036 * with the BA_CLRBUF flag set.
2037 *
2038 * If the source region for uiomove identically mmaps the
2039 * buffer, uiomove() performed the NOP copy, and the buffer
2040 * content remains valid because the page fault handler
2041 * validated the pages.
2042 */
2043 if (error != 0 && (bp->b_flags & B_CACHE) == 0 &&
2044 fs->e2fs_bsize == xfersize)
2045 vfs_bio_clrbuf(bp);
2046
2047 /*
2048 * If IO_SYNC each buffer is written synchronously. Otherwise
2049 * if we have a severe page deficiency write the buffer
2050 * asynchronously. Otherwise try to cluster, and if that
2051 * doesn't do it then either do an async write (if O_DIRECT),
2052 * or a delayed write (if not).
2053 */
2054 if (ioflag & IO_SYNC) {
2055 (void)bwrite(bp);
2056 } else if (vm_paging_severe() ||
2057 buf_dirty_count_severe() ||
2058 (ioflag & IO_ASYNC))
2059 {
2060 bp->b_flags |= B_CLUSTEROK;
2061 bawrite(bp);
2062 } else if (xfersize + blkoffset == fs->e2fs_fsize) {
2063 if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) {
2064 bp->b_flags |= B_CLUSTEROK;
2065 cluster_write(bp, (off_t)ip->i_size,
2066 vp->v_mount->mnt_stat.f_iosize, seqcount);
2067 } else {
2068 bawrite(bp);
2069 }
2070 } else if (ioflag & IO_DIRECT) {
2071 bp->b_flags |= B_CLUSTEROK;
2072 bawrite(bp);
2073 } else {
2074 bp->b_flags |= B_CLUSTEROK;
2075 bdwrite(bp);
2076 }
2077 if (error || xfersize == 0)
2078 break;
2079 }
2080 /*
2081 * If we successfully wrote any data, and we are not the superuser
2082 * we clear the setuid and setgid bits as a precaution against
2083 * tampering.
2084 */
2085 if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid &&
2086 ap->a_cred) {
2087 if (caps_priv_check(ap->a_cred, SYSCAP_NOVFS_RETAINSUGID))
2088 ip->i_mode &= ~(ISUID | ISGID);
2089 }
2090 if (error) {
2091 if (ioflag & IO_UNIT) {
2092 (void)ext2_truncate(vp, osize, ioflag & IO_SYNC,
2093 ap->a_cred);
2094 uio->uio_offset -= resid - uio->uio_resid;
2095 uio->uio_resid = resid;
2096 }
2097 }
2098 if (uio->uio_resid != resid) {
2099 ip->i_flag |= IN_CHANGE | IN_UPDATE;
2100 if (ioflag & IO_SYNC)
2101 error = ext2_update(vp, 1);
2102 }
2103 return (error);
2104 }
2105
2106 /* Global vfs data structures for ext2. */
2107 struct vop_ops ext2_vnodeops = {
2108 .vop_default = vop_defaultop,
2109 .vop_access = ext2_access,
2110 .vop_bmap = ext2_bmap,
2111 .vop_old_lookup = ext2_lookup,
2112 .vop_close = ext2_close,
2113 .vop_old_create = ext2_create,
2114 .vop_fsync = ext2_fsync,
2115 .vop_getpages = vop_stdgetpages,
2116 .vop_putpages = vop_stdputpages,
2117 .vop_getattr = ext2_getattr,
2118 .vop_inactive = ext2_inactive,
2119 .vop_old_link = ext2_link,
2120 .vop_old_lookup = ext2_lookup,
2121 .vop_old_mkdir = ext2_mkdir,
2122 .vop_old_mknod = ext2_mknod,
2123 .vop_open = ext2_open,
2124 .vop_pathconf = ext2_pathconf,
2125 .vop_print = ext2_print,
2126 .vop_read = ext2_read,
2127 .vop_readdir = ext2_readdir,
2128 .vop_readlink = ext2_readlink,
2129 .vop_reallocblks = ext2_reallocblks,
2130 .vop_reclaim = ext2_reclaim,
2131 .vop_old_remove = ext2_remove,
2132 .vop_old_rename = ext2_rename,
2133 .vop_old_rmdir = ext2_rmdir,
2134 .vop_setattr = ext2_setattr,
2135 .vop_strategy = ext2_strategy,
2136 .vop_old_symlink = ext2_symlink,
2137 .vop_write = ext2_write,
2138 };
2139
2140 struct vop_ops ext2_specops = {
2141 .vop_default = vop_defaultop,
2142 .vop_access = ext2_access,
2143 .vop_close = ext2_close,
2144 .vop_fsync = ext2_fsync,
2145 .vop_getattr = ext2_getattr,
2146 .vop_inactive = ext2_inactive,
2147 .vop_pathconf = ext2_pathconf,
2148 .vop_print = ext2_print,
2149 .vop_read = vop_stdnoread,
2150 .vop_reclaim = ext2_reclaim,
2151 .vop_setattr = ext2_setattr,
2152 .vop_write = vop_stdnowrite
2153 };
2154
2155 struct vop_ops ext2_fifoops = {
2156 .vop_default = fifo_vnoperate,
2157 .vop_access = ext2_access,
2158 .vop_close = ext2fifo_close,
2159 .vop_fsync = ext2_fsync,
2160 .vop_getattr = ext2_getattr,
2161 .vop_inactive = ext2_inactive,
2162 .vop_kqfilter = ext2fifo_kqfilter,
2163 .vop_pathconf = ext2_pathconf,
2164 .vop_print = ext2_print,
2165 .vop_read = ext2fifo_read,
2166 .vop_reclaim = ext2_reclaim,
2167 .vop_setattr = ext2_setattr,
2168 .vop_write = ext2fifo_write
2169 };
2170
2171 VNODEOP_SET(ext2_vnodeops);
2172 VNODEOP_SET(ext2_specops);
2173 VNODEOP_SET(ext2_fifoops);
2174