1 /* $NetBSD: efs_subr.c,v 1.14 2021/12/10 20:36:04 andvar Exp $ */
2
3 /*
4 * Copyright (c) 2006 Stephen M. Rumble <rumble@ephemeral.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19 #include <sys/cdefs.h>
20 __KERNEL_RCSID(0, "$NetBSD: efs_subr.c,v 1.14 2021/12/10 20:36:04 andvar Exp $");
21
22 #include <sys/param.h>
23 #include <sys/kauth.h>
24 #include <sys/lwp.h>
25 #include <sys/proc.h>
26 #include <sys/buf.h>
27 #include <sys/mount.h>
28 #include <sys/vnode.h>
29 #include <sys/namei.h>
30 #include <sys/stat.h>
31 #include <sys/malloc.h>
32
33 #include <miscfs/genfs/genfs_node.h>
34
35 #include <fs/efs/efs.h>
36 #include <fs/efs/efs_sb.h>
37 #include <fs/efs/efs_dir.h>
38 #include <fs/efs/efs_genfs.h>
39 #include <fs/efs/efs_mount.h>
40 #include <fs/efs/efs_extent.h>
41 #include <fs/efs/efs_dinode.h>
42 #include <fs/efs/efs_inode.h>
43 #include <fs/efs/efs_subr.h>
44
45 struct pool efs_inode_pool;
46
47 /*
48 * Calculate a checksum for the provided superblock in __host byte order__.
49 *
50 * At some point SGI changed the checksum algorithm slightly, which can be
51 * enabled with the 'new' flag.
52 *
53 * Presumably this change occurred on or before 24 Oct 1988 (around IRIX 3.1),
54 * so we're pretty unlikely to ever actually see an old checksum. Further, it
55 * means that EFS_NEWMAGIC filesystems (IRIX >= 3.3) must match the new
56 * checksum whereas EFS_MAGIC filesystems could potentially use either
57 * algorithm.
58 *
59 * See comp.sys.sgi <1991Aug9.050838.16876@odin.corp.sgi.com>
60 */
61 int32_t
efs_sb_checksum(struct efs_sb * esb,int new)62 efs_sb_checksum(struct efs_sb *esb, int new)
63 {
64 int i;
65 int32_t cksum;
66 uint8_t *sbarray = (uint8_t *)esb;
67
68 KASSERT((EFS_SB_CHECKSUM_SIZE % 2) == 0);
69
70 for (i = cksum = 0; i < EFS_SB_CHECKSUM_SIZE; i += 2) {
71 uint16_t v;
72 memcpy(&v, &sbarray[i], sizeof(v));
73 cksum ^= be16toh(v);
74 cksum = (cksum << 1) | (new && cksum < 0);
75 }
76
77 return (cksum);
78 }
79
80 /*
81 * Determine if the superblock is valid.
82 *
83 * Returns 0 if valid, else invalid. If invalid, 'why' is set to an
84 * explanation.
85 */
86 int
efs_sb_validate(struct efs_sb * esb,const char ** why)87 efs_sb_validate(struct efs_sb *esb, const char **why)
88 {
89 uint32_t ocksum, ncksum;
90
91 *why = NULL;
92
93 if (be32toh(esb->sb_magic) != EFS_SB_MAGIC &&
94 be32toh(esb->sb_magic) != EFS_SB_NEWMAGIC) {
95 *why = "sb_magic invalid";
96 return (1);
97 }
98
99 ocksum = htobe32(efs_sb_checksum(esb, 0));
100 ncksum = htobe32(efs_sb_checksum(esb, 1));
101 if (esb->sb_checksum != ocksum && esb->sb_checksum != ncksum) {
102 *why = "sb_checksum invalid";
103 return (1);
104 }
105
106 if (be32toh(esb->sb_size) > EFS_SIZE_MAX) {
107 *why = "sb_size > EFS_SIZE_MAX";
108 return (1);
109 }
110
111 if (be32toh(esb->sb_firstcg) <= EFS_BB_BITMAP) {
112 *why = "sb_firstcg <= EFS_BB_BITMAP";
113 return (1);
114 }
115
116 /* XXX - add better sb consistency checks here */
117 if (esb->sb_cgfsize == 0 ||
118 esb->sb_cgisize == 0 ||
119 esb->sb_ncg == 0 ||
120 esb->sb_bmsize == 0) {
121 *why = "something bad happened";
122 return (1);
123 }
124
125 return (0);
126 }
127
128 /*
129 * Determine the basic block offset and inode index within that block, given
130 * the inode 'ino' and filesystem parameters _in host byte order_. The inode
131 * will live at byte address 'bboff' * EFS_BB_SIZE + 'index' * EFS_DINODE_SIZE.
132 */
133 void
efs_locate_inode(ino_t ino,struct efs_sb * sbp,uint32_t * bboff,int * index)134 efs_locate_inode(ino_t ino, struct efs_sb *sbp, uint32_t *bboff, int *index)
135 {
136 uint32_t cgfsize, firstcg;
137 uint16_t cgisize;
138
139 cgisize = be16toh(sbp->sb_cgisize);
140 cgfsize = be32toh(sbp->sb_cgfsize);
141 firstcg = be32toh(sbp->sb_firstcg);
142
143 *bboff = firstcg + ((ino / (cgisize * EFS_DINODES_PER_BB)) * cgfsize) +
144 ((ino % (cgisize * EFS_DINODES_PER_BB)) / EFS_DINODES_PER_BB);
145 *index = ino & (EFS_DINODES_PER_BB - 1);
146 }
147
148 /*
149 * Read in an inode from disk.
150 *
151 * We actually take in four inodes at a time. Hopefully these will stick
152 * around in the buffer cache and get used without going to disk.
153 *
154 * Returns 0 on success.
155 */
156 int
efs_read_inode(struct efs_mount * emp,ino_t ino,struct lwp * l,struct efs_dinode * di)157 efs_read_inode(struct efs_mount *emp, ino_t ino, struct lwp *l,
158 struct efs_dinode *di)
159 {
160 struct efs_sb *sbp;
161 struct buf *bp;
162 int index, err;
163 uint32_t bboff;
164
165 sbp = &emp->em_sb;
166 efs_locate_inode(ino, sbp, &bboff, &index);
167
168 err = efs_bread(emp, bboff, l, &bp);
169 if (err) {
170 return (err);
171 }
172 memcpy(di, ((struct efs_dinode *)bp->b_data) + index, sizeof(*di));
173 brelse(bp, 0);
174
175 return (0);
176 }
177
178 /*
179 * Perform a read from our device handling the potential DEV_BSIZE
180 * messiness (although as of 19.2.2006, all ports appear to use 512) as
181 * we as EFS block sizing.
182 *
183 * bboff: basic block offset
184 *
185 * Returns 0 on success.
186 */
187 int
efs_bread(struct efs_mount * emp,uint32_t bboff,struct lwp * l,struct buf ** bp)188 efs_bread(struct efs_mount *emp, uint32_t bboff, struct lwp *l, struct buf **bp)
189 {
190 KASSERT(bboff < EFS_SIZE_MAX);
191
192 return (bread(emp->em_devvp, (daddr_t)bboff * (EFS_BB_SIZE / DEV_BSIZE),
193 EFS_BB_SIZE, 0, bp));
194 }
195
196 /*
197 * Synchronise the in-core, host ordered and typed inode fields with their
198 * corresponding on-disk, EFS ordered and typed copies.
199 *
200 * This is the inverse of efs_dinode_sync_inode(), and should be called when
201 * an inode is loaded from disk.
202 */
203 void
efs_sync_dinode_to_inode(struct efs_inode * ei)204 efs_sync_dinode_to_inode(struct efs_inode *ei)
205 {
206
207 ei->ei_mode = be16toh(ei->ei_di.di_mode); /*same as nbsd*/
208 ei->ei_nlink = be16toh(ei->ei_di.di_nlink);
209 ei->ei_uid = be16toh(ei->ei_di.di_uid);
210 ei->ei_gid = be16toh(ei->ei_di.di_gid);
211 ei->ei_size = be32toh(ei->ei_di.di_size);
212 ei->ei_atime = be32toh(ei->ei_di.di_atime);
213 ei->ei_mtime = be32toh(ei->ei_di.di_mtime);
214 ei->ei_ctime = be32toh(ei->ei_di.di_ctime);
215 ei->ei_gen = be32toh(ei->ei_di.di_gen);
216 ei->ei_numextents = be16toh(ei->ei_di.di_numextents);
217 ei->ei_version = ei->ei_di.di_version;
218 }
219
220 /*
221 * Synchronise the on-disk, EFS ordered and typed inode fields with their
222 * corresponding in-core, host ordered and typed copies.
223 *
224 * This is the inverse of efs_inode_sync_dinode(), and should be called before
225 * an inode is flushed to disk.
226 */
227 void
efs_sync_inode_to_dinode(struct efs_inode * ei)228 efs_sync_inode_to_dinode(struct efs_inode *ei)
229 {
230
231 panic("readonly -- no need to call me");
232 }
233
234 #ifdef DIAGNOSTIC
235 /*
236 * Ensure that the in-core inode's host cached fields match its on-disk copy.
237 *
238 * Returns 0 if they match.
239 */
240 static int
efs_is_inode_synced(struct efs_inode * ei)241 efs_is_inode_synced(struct efs_inode *ei)
242 {
243 int s;
244
245 s = 0;
246 /* XXX -- see above remarks about assumption */
247 s += (ei->ei_mode != be16toh(ei->ei_di.di_mode));
248 s += (ei->ei_nlink != be16toh(ei->ei_di.di_nlink));
249 s += (ei->ei_uid != be16toh(ei->ei_di.di_uid));
250 s += (ei->ei_gid != be16toh(ei->ei_di.di_gid));
251 s += (ei->ei_size != be32toh(ei->ei_di.di_size));
252 s += (ei->ei_atime != be32toh(ei->ei_di.di_atime));
253 s += (ei->ei_mtime != be32toh(ei->ei_di.di_mtime));
254 s += (ei->ei_ctime != be32toh(ei->ei_di.di_ctime));
255 s += (ei->ei_gen != be32toh(ei->ei_di.di_gen));
256 s += (ei->ei_numextents != be16toh(ei->ei_di.di_numextents));
257 s += (ei->ei_version != ei->ei_di.di_version);
258
259 return (s);
260 }
261 #endif
262
263 /*
264 * Given an efs_dirblk structure and a componentname to search for, return the
265 * corresponding inode if it is found.
266 *
267 * Returns 0 on success.
268 */
269 static int
efs_dirblk_lookup(struct efs_dirblk * dir,struct componentname * cn,ino_t * inode)270 efs_dirblk_lookup(struct efs_dirblk *dir, struct componentname *cn,
271 ino_t *inode)
272 {
273 struct efs_dirent *de;
274 int i, slot __diagused, offset;
275
276 KASSERT(cn->cn_namelen <= EFS_DIRENT_NAMELEN_MAX);
277
278 slot = offset = 0;
279
280 for (i = 0; i < dir->db_slots; i++) {
281 offset = EFS_DIRENT_OFF_EXPND(dir->db_space[i]);
282
283 if (offset == EFS_DIRBLK_SLOT_FREE)
284 continue;
285
286 de = (struct efs_dirent *)((char *)dir + offset);
287 if (de->de_namelen == cn->cn_namelen &&
288 (strncmp(cn->cn_nameptr, de->de_name, cn->cn_namelen) == 0)){
289 slot = i;
290 break;
291 }
292 }
293 if (i == dir->db_slots)
294 return (ENOENT);
295
296 KASSERT(slot < offset && offset < EFS_DIRBLK_SPACE_SIZE);
297 de = (struct efs_dirent *)((char *)dir + offset);
298 *inode = be32toh(de->de_inumber);
299
300 return (0);
301 }
302
303 /*
304 * Given an extent descriptor that represents a directory, look up
305 * componentname within its efs_dirblk's. If it is found, return the
306 * corresponding inode in 'ino'.
307 *
308 * Returns 0 on success.
309 */
310 static int
efs_extent_lookup(struct efs_mount * emp,struct efs_extent * ex,struct componentname * cn,ino_t * ino)311 efs_extent_lookup(struct efs_mount *emp, struct efs_extent *ex,
312 struct componentname *cn, ino_t *ino)
313 {
314 struct efs_dirblk *db;
315 struct buf *bp;
316 int i, err;
317
318 /*
319 * Read in each of the dirblks until we find our entry.
320 * If we don't, return ENOENT.
321 */
322 for (i = 0; i < ex->ex_length; i++) {
323 err = efs_bread(emp, ex->ex_bn + i, NULL, &bp);
324 if (err) {
325 printf("efs: warning: invalid extent descriptor\n");
326 return (err);
327 }
328
329 db = (struct efs_dirblk *)bp->b_data;
330 if (efs_dirblk_lookup(db, cn, ino) == 0) {
331 brelse(bp, 0);
332 return (0);
333 }
334 brelse(bp, 0);
335 }
336
337 return (ENOENT);
338 }
339
340 /*
341 * Given the provided in-core inode, look up the pathname requested. If
342 * we find it, 'ino' reflects its corresponding on-disk inode number.
343 *
344 * Returns 0 on success.
345 */
346 int
efs_inode_lookup(struct efs_mount * emp,struct efs_inode * ei,struct componentname * cn,ino_t * ino)347 efs_inode_lookup(struct efs_mount *emp, struct efs_inode *ei,
348 struct componentname *cn, ino_t *ino)
349 {
350 struct efs_extent ex;
351 struct efs_extent_iterator exi;
352 int ret;
353
354 KASSERT(VOP_ISLOCKED(ei->ei_vp));
355 #ifdef DIAGNOSTIC
356 KASSERT(efs_is_inode_synced(ei) == 0);
357 #endif
358 KASSERT((ei->ei_mode & S_IFMT) == S_IFDIR);
359
360 efs_extent_iterator_init(&exi, ei, 0);
361 while ((ret = efs_extent_iterator_next(&exi, &ex)) == 0) {
362 if (efs_extent_lookup(emp, &ex, cn, ino) == 0) {
363 return (0);
364 }
365 }
366
367 return ((ret == -1) ? ENOENT : ret);
368 }
369
370 /*
371 * Convert on-disk extent structure to in-core format.
372 */
373 void
efs_dextent_to_extent(struct efs_dextent * dex,struct efs_extent * ex)374 efs_dextent_to_extent(struct efs_dextent *dex, struct efs_extent *ex)
375 {
376
377 KASSERT(dex != NULL && ex != NULL);
378
379 ex->ex_magic = dex->ex_bytes[0];
380 ex->ex_bn = be32toh(dex->ex_words[0]) & 0x00ffffff;
381 ex->ex_length = dex->ex_bytes[4];
382 ex->ex_offset = be32toh(dex->ex_words[1]) & 0x00ffffff;
383 }
384
385 /*
386 * Convert in-core extent format to on-disk structure.
387 */
388 void
efs_extent_to_dextent(struct efs_extent * ex,struct efs_dextent * dex)389 efs_extent_to_dextent(struct efs_extent *ex, struct efs_dextent *dex)
390 {
391
392 KASSERT(ex != NULL && dex != NULL);
393 KASSERT(ex->ex_magic == EFS_EXTENT_MAGIC);
394 KASSERT((ex->ex_bn & ~EFS_EXTENT_BN_MASK) == 0);
395 KASSERT((ex->ex_offset & ~EFS_EXTENT_OFFSET_MASK) == 0);
396
397 dex->ex_words[0] = htobe32(ex->ex_bn);
398 dex->ex_bytes[0] = ex->ex_magic;
399 dex->ex_words[1] = htobe32(ex->ex_offset);
400 dex->ex_bytes[4] = ex->ex_length;
401 }
402
403 /*
404 * Initialise an extent iterator.
405 *
406 * If start_hint is non-0, attempt to set up the iterator beginning with the
407 * extent descriptor in which the start_hint'th byte exists. Callers must not
408 * expect success (this is simply an optimisation), so we reserve the right
409 * to start from the beginning.
410 */
411 void
efs_extent_iterator_init(struct efs_extent_iterator * exi,struct efs_inode * eip,off_t start_hint)412 efs_extent_iterator_init(struct efs_extent_iterator *exi, struct efs_inode *eip,
413 off_t start_hint)
414 {
415 struct efs_extent ex, ex2;
416 struct buf *bp;
417 struct efs_mount *emp = VFSTOEFS(eip->ei_vp->v_mount);
418 off_t offset, length, next;
419 int i, err, numextents, numinextents;
420 int hi, lo, mid;
421 int indir;
422
423 exi->exi_eip = eip;
424 exi->exi_next = 0;
425 exi->exi_dnext = 0;
426 exi->exi_innext = 0;
427
428 if (start_hint == 0)
429 return;
430
431 /* force iterator to end if hint is too big */
432 if (start_hint >= eip->ei_size) {
433 exi->exi_next = eip->ei_numextents;
434 return;
435 }
436
437 /*
438 * Use start_hint to jump to the right extent descriptor. We'll
439 * iterate over the 12 indirect extents because it's cheap, then
440 * bring the appropriate vector into core and binary search it.
441 */
442
443 /*
444 * Handle the small file case separately first...
445 */
446 if (eip->ei_numextents <= EFS_DIRECTEXTENTS) {
447 for (i = 0; i < eip->ei_numextents; i++) {
448 efs_dextent_to_extent(&eip->ei_di.di_extents[i], &ex);
449
450 offset = ex.ex_offset * EFS_BB_SIZE;
451 length = ex.ex_length * EFS_BB_SIZE;
452
453 if (start_hint >= offset &&
454 start_hint < (offset + length)) {
455 exi->exi_next = exi->exi_dnext = i;
456 return;
457 }
458 }
459
460 /* shouldn't get here, no? */
461 EFS_DPRINTF(("efs_extent_iterator_init: bad direct extents\n"));
462 return;
463 }
464
465 /*
466 * Now do the large files with indirect extents...
467 *
468 * The first indirect extent's ex_offset field contains the
469 * number of indirect extents used.
470 */
471 efs_dextent_to_extent(&eip->ei_di.di_extents[0], &ex);
472
473 numinextents = ex.ex_offset;
474 if (numinextents < 1 || numinextents >= EFS_DIRECTEXTENTS) {
475 EFS_DPRINTF(("efs_extent_iterator_init: bad ex.ex_offset\n"));
476 return;
477 }
478
479 next = 0;
480 indir = -1;
481 numextents = 0;
482 for (i = 0; i < numinextents; i++) {
483 efs_dextent_to_extent(&eip->ei_di.di_extents[i], &ex);
484
485 err = efs_bread(emp, ex.ex_bn, NULL, &bp);
486 if (err) {
487 return;
488 }
489
490 efs_dextent_to_extent((struct efs_dextent *)bp->b_data, &ex2);
491 brelse(bp, 0);
492
493 offset = ex2.ex_offset * EFS_BB_SIZE;
494
495 if (offset > start_hint) {
496 indir = MAX(0, i - 1);
497 break;
498 }
499
500 /* number of extents prior to this indirect vector of extents */
501 next += numextents;
502
503 /* number of extents within this indirect vector of extents */
504 numextents = ex.ex_length * EFS_EXTENTS_PER_BB;
505 numextents = MIN(numextents, eip->ei_numextents - next);
506 }
507
508 /*
509 * We hit the end, so assume it's in the last extent.
510 */
511 if (indir == -1)
512 indir = numinextents - 1;
513
514 /*
515 * Binary search to find our desired direct extent.
516 */
517 lo = 0;
518 mid = 0;
519 hi = numextents - 1;
520 efs_dextent_to_extent(&eip->ei_di.di_extents[indir], &ex);
521 while (lo <= hi) {
522 int bboff;
523 int index;
524
525 mid = (lo + hi) / 2;
526
527 bboff = mid / EFS_EXTENTS_PER_BB;
528 index = mid % EFS_EXTENTS_PER_BB;
529
530 err = efs_bread(emp, ex.ex_bn + bboff, NULL, &bp);
531 if (err) {
532 EFS_DPRINTF(("efs_extent_iterator_init: bsrch read\n"));
533 return;
534 }
535
536 efs_dextent_to_extent((struct efs_dextent *)bp->b_data + index,
537 &ex2);
538 brelse(bp, 0);
539
540 offset = ex2.ex_offset * EFS_BB_SIZE;
541 length = ex2.ex_length * EFS_BB_SIZE;
542
543 if (start_hint >= offset && start_hint < (offset + length))
544 break;
545
546 if (start_hint < offset)
547 hi = mid - 1;
548 else
549 lo = mid + 1;
550 }
551
552 /*
553 * This is bad. Either the hint is bogus (which shouldn't
554 * happen) or the extent list must be screwed up. We
555 * have to abort.
556 */
557 if (lo > hi) {
558 EFS_DPRINTF(("efs_extent_iterator_init: bsearch "
559 "failed to find extent\n"));
560 return;
561 }
562
563 exi->exi_next = next + mid;
564 exi->exi_dnext = indir;
565 exi->exi_innext = mid;
566 }
567
568 /*
569 * Return the next EFS extent.
570 *
571 * Returns 0 if another extent was iterated, -1 if we've exhausted all
572 * extents, or an error number. If 'exi' is non-NULL, the next extent is
573 * written to it (should it exist).
574 */
575 int
efs_extent_iterator_next(struct efs_extent_iterator * exi,struct efs_extent * exp)576 efs_extent_iterator_next(struct efs_extent_iterator *exi,
577 struct efs_extent *exp)
578 {
579 struct efs_extent ex;
580 struct efs_dextent *dexp;
581 struct efs_inode *eip = exi->exi_eip;
582 struct buf *bp;
583 int err, bboff, index;
584
585 if (exi->exi_next++ >= eip->ei_numextents)
586 return (-1);
587
588 /* direct or indirect extents? */
589 if (eip->ei_numextents <= EFS_DIRECTEXTENTS) {
590 if (exp != NULL) {
591 dexp = &eip->ei_di.di_extents[exi->exi_dnext++];
592 efs_dextent_to_extent(dexp, exp);
593 }
594 } else {
595 efs_dextent_to_extent(
596 &eip->ei_di.di_extents[exi->exi_dnext], &ex);
597
598 bboff = exi->exi_innext / EFS_EXTENTS_PER_BB;
599 index = exi->exi_innext % EFS_EXTENTS_PER_BB;
600
601 err = efs_bread(VFSTOEFS(eip->ei_vp->v_mount),
602 ex.ex_bn + bboff, NULL, &bp);
603 if (err) {
604 EFS_DPRINTF(("efs_extent_iterator_next: "
605 "efs_bread failed: %d\n", err));
606 return (err);
607 }
608
609 if (exp != NULL) {
610 dexp = (struct efs_dextent *)bp->b_data + index;
611 efs_dextent_to_extent(dexp, exp);
612 }
613 brelse(bp, 0);
614
615 bboff = exi->exi_innext++ / EFS_EXTENTS_PER_BB;
616 if (bboff >= ex.ex_length) {
617 exi->exi_innext = 0;
618 exi->exi_dnext++;
619 }
620 }
621
622 return (0);
623 }
624