xref: /dragonfly/stand/lib/hammer1.c (revision 655933d6)
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Simon Schubert <corecode@fs.ei.tum.de>
6  * and Matthew Dillon <dillon@backplane.com>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  * 3. Neither the name of The DragonFly Project nor the names of its
19  *    contributors may be used to endorse or promote products derived
20  *    from this software without specific, prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
26  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 /*
37  * This file is being used by boot2 and libstand (loader).
38  * Compile with -DTESTING to obtain a binary.
39  */
40 
41 
42 #if !defined(BOOT2) && !defined(TESTING)
43 #define	LIBSTAND	1
44 #endif
45 
46 #ifndef DEBUG
47 #define	DEBUG		0
48 #endif
49 
50 #ifdef BOOT2
51 #include "boot2.h"
52 #else
53 #include <sys/param.h>
54 #include <stddef.h>
55 #include <stdint.h>
56 #endif
57 
58 #ifdef TESTING
59 #include <sys/fcntl.h>
60 #include <sys/stat.h>
61 #include <unistd.h>
62 #include <err.h>
63 #include <stdio.h>
64 #include <stdlib.h>
65 #include <string.h>
66 #include <errno.h>
67 #include <dirent.h>
68 #endif
69 
70 #ifdef LIBSTAND
71 #include "stand.h"
72 #endif
73 
74 #include <vfs/hammer/hammer_disk.h>
75 
76 #ifndef BOOT2
77 struct blockentry {
78 	hammer_off_t	off;
79 	int		use;
80 	char		*data;
81 };
82 
83 #ifdef TESTING
84 #define NUMCACHE	16
85 #else
86 #define	NUMCACHE	6
87 #endif
88 
89 struct hfs {
90 #ifdef TESTING
91 	int		fd;
92 #else	// libstand
93 	struct open_file *f;
94 #endif
95 	hammer_off_t	root;
96 	int64_t		buf_beg;
97 	int64_t		last_dir_ino;
98 	u_int8_t	last_dir_cap_flags;
99 	int		lru;
100 	struct blockentry cache[NUMCACHE];
101 };
102 
103 static void *
104 hread(struct hfs *hfs, hammer_off_t off)
105 {
106 	hammer_off_t boff = off & ~HAMMER_BUFMASK64;
107 
108 	boff &= HAMMER_OFF_LONG_MASK;
109 
110 	if (HAMMER_ZONE_DECODE(off) != HAMMER_ZONE_RAW_VOLUME_INDEX)
111 		boff += hfs->buf_beg;
112 
113 	struct blockentry *be = NULL;
114 	for (int i = 0; i < NUMCACHE; i++) {
115 		if (be == NULL || be->use > hfs->cache[i].use)
116 			be = &hfs->cache[i];
117 		if (hfs->cache[i].off == boff) {
118 			be = &hfs->cache[i];
119 			break;
120 		}
121 	}
122 	if (be->off != boff) {
123 		// Didn't find any match
124 		be->off = boff;
125 #ifdef TESTING
126 		ssize_t res = pread(hfs->fd, be->data, HAMMER_BUFSIZE,
127 				    boff & HAMMER_OFF_SHORT_MASK);
128 		if (res != HAMMER_BUFSIZE)
129 			err(1, "short read on off %llx", boff);
130 #else	// libstand
131 		size_t rlen;
132 		int rv = hfs->f->f_dev->dv_strategy(hfs->f->f_devdata, F_READ,
133 			boff >> DEV_BSHIFT, HAMMER_BUFSIZE,
134 			be->data, &rlen);
135 		if (rv || rlen != HAMMER_BUFSIZE)
136 			return (NULL);
137 #endif
138 	}
139 
140 	be->use = ++hfs->lru;
141 	return &be->data[off & HAMMER_BUFMASK];
142 }
143 
144 #else	/* BOOT2 */
145 
146 struct hammer_dmadat {
147 	struct boot2_dmadat boot2;
148 	char		buf[HAMMER_BUFSIZE];
149 };
150 
151 #define fsdmadat	((struct hammer_dmadat *)boot2_dmadat)
152 
153 struct hfs {
154 	hammer_off_t	root;
155 	int64_t		last_dir_ino;
156 	u_int8_t	last_dir_cap_flags;
157 	int64_t		buf_beg;
158 };
159 
160 static void *
161 hread(struct hfs *hfs, hammer_off_t off)
162 {
163 	char *buf = fsdmadat->buf;
164 
165 	hammer_off_t boff = off & ~HAMMER_BUFMASK64;
166 	boff &= HAMMER_OFF_LONG_MASK;
167 	if (HAMMER_ZONE_DECODE(off) != HAMMER_ZONE_RAW_VOLUME_INDEX)
168 		boff += hfs->buf_beg;
169 	boff &= HAMMER_OFF_SHORT_MASK;
170 	boff >>= DEV_BSHIFT;
171 	if (dskread(buf, boff, HAMMER_BUFSIZE >> DEV_BSHIFT))
172 		return (NULL);
173 	return (&buf[off & HAMMER_BUFMASK]);
174 }
175 
176 static void
177 bzero(void *buf, size_t size)
178 {
179 	for (size_t i = 0; i < size; i++)
180 		((char *)buf)[i] = 0;
181 }
182 
183 static void
184 bcopy(void *src, void *dst, size_t size)
185 {
186 	memcpy(dst, src, size);
187 }
188 
189 static size_t
190 strlen(const char *s)
191 {
192 	size_t l = 0;
193 	for (; *s != 0; s++)
194 		l++;
195 	return (l);
196 }
197 
198 static int
199 memcmp(const void *a, const void *b, size_t len)
200 {
201 	for (size_t p = 0; p < len; p++) {
202 		int r = ((const char *)a)[p] - ((const char *)b)[p];
203 		if (r != 0)
204 			return (r);
205 	}
206 
207 	return (0);
208 }
209 
210 #endif
211 
212 /*
213  * (from hammer_btree.c)
214  *
215  * Compare two B-Tree elements, return -N, 0, or +N (e.g. similar to strcmp).
216  *
217  * Note that for this particular function a return value of -1, 0, or +1
218  * can denote a match if create_tid is otherwise discounted.  A create_tid
219  * of zero is considered to be 'infinity' in comparisons.
220  *
221  * See also hammer_rec_rb_compare() and hammer_rec_cmp() in hammer_object.c.
222  */
223 static int
224 hammer_btree_cmp(hammer_base_elm_t key1, hammer_base_elm_t key2)
225 {
226 	if (key1->localization < key2->localization)
227 		return(-5);
228 	if (key1->localization > key2->localization)
229 		return(5);
230 
231 	if (key1->obj_id < key2->obj_id)
232 		return(-4);
233 	if (key1->obj_id > key2->obj_id)
234 		return(4);
235 
236 	if (key1->rec_type < key2->rec_type)
237 		return(-3);
238 	if (key1->rec_type > key2->rec_type)
239 		return(3);
240 
241 	if (key1->key < key2->key)
242 		return(-2);
243 	if (key1->key > key2->key)
244 		return(2);
245 
246 	/*
247 	 * A create_tid of zero indicates a record which is undeletable
248 	 * and must be considered to have a value of positive infinity.
249 	 */
250 	if (key1->create_tid == 0) {
251 		if (key2->create_tid == 0)
252 			return(0);
253 		return(1);
254 	}
255 	if (key2->create_tid == 0)
256 		return(-1);
257 	if (key1->create_tid < key2->create_tid)
258 		return(-1);
259 	if (key1->create_tid > key2->create_tid)
260 		return(1);
261 	return(0);
262 }
263 
264 /*
265  * Heuristical search for the first element whos comparison is <= 1.  May
266  * return an index whos compare result is > 1 but may only return an index
267  * whos compare result is <= 1 if it is the first element with that result.
268  */
269 static int
270 hammer_btree_search_node(hammer_base_elm_t elm, hammer_node_ondisk_t node)
271 {
272 	int b;
273 	int s;
274 	int i;
275 	int r;
276 
277 	/*
278 	 * Don't bother if the node does not have very many elements
279 	 */
280 	b = 0;
281 	s = node->count;
282 	while (s - b > 4) {
283 		i = b + (s - b) / 2;
284 		r = hammer_btree_cmp(elm, &node->elms[i].leaf.base);
285 		if (r <= 1) {
286 			s = i;
287 		} else {
288 			b = i;
289 		}
290 	}
291 	return(b);
292 }
293 
294 #if 0
295 /*
296  * (from hammer_subs.c)
297  *
298  * Return a namekey hash.   The 64 bit namekey hash consists of a 32 bit
299  * crc in the MSB and 0 in the LSB.  The caller will use the low bits to
300  * generate a unique key and will scan all entries with the same upper
301  * 32 bits when issuing a lookup.
302  *
303  * We strip bit 63 in order to provide a positive key, this way a seek
304  * offset of 0 will represent the base of the directory.
305  *
306  * This function can never return 0.  We use the MSB-0 space to synthesize
307  * artificial directory entries such as "." and "..".
308  */
309 static int64_t
310 hammer_directory_namekey(const void *name, int len)
311 {
312 	int64_t key;
313 
314 	key = (int64_t)(crc32(name, len) & 0x7FFFFFFF) << 32;
315 	if (key == 0)
316 		key |= 0x100000000LL;
317 	return(key);
318 }
319 #else
320 static int64_t
321 hammer_directory_namekey(const void *name __unused, int len __unused)
322 {
323 	return (0);
324 }
325 #endif
326 
327 
328 #ifndef BOOT2
329 /*
330  * Misc
331  */
332 static u_int32_t
333 hammer_to_unix_xid(hammer_uuid_t *uuid)
334 {
335 	return(*(u_int32_t *)&uuid->node[2]);
336 }
337 
338 static int
339 hammer_get_dtype(u_int8_t obj_type)
340 {
341 	switch(obj_type) {
342 	case HAMMER_OBJTYPE_DIRECTORY:
343 		return(DT_DIR);
344 	case HAMMER_OBJTYPE_REGFILE:
345 		return(DT_REG);
346 	case HAMMER_OBJTYPE_DBFILE:
347 		return(DT_DBF);
348 	case HAMMER_OBJTYPE_FIFO:
349 		return(DT_FIFO);
350 	case HAMMER_OBJTYPE_SOCKET:
351 		return(DT_SOCK);
352 	case HAMMER_OBJTYPE_CDEV:
353 		return(DT_CHR);
354 	case HAMMER_OBJTYPE_BDEV:
355 		return(DT_BLK);
356 	case HAMMER_OBJTYPE_SOFTLINK:
357 		return(DT_LNK);
358 	default:
359 		return(DT_UNKNOWN);
360 	}
361 	/* not reached */
362 }
363 
364 static int
365 hammer_get_mode(u_int8_t obj_type)
366 {
367 	switch(obj_type) {
368 	case HAMMER_OBJTYPE_DIRECTORY:
369 		return(S_IFDIR);
370 	case HAMMER_OBJTYPE_REGFILE:
371 		return(S_IFREG);
372 	case HAMMER_OBJTYPE_DBFILE:
373 		return(S_IFDB);
374 	case HAMMER_OBJTYPE_FIFO:
375 		return(S_IFIFO);
376 	case HAMMER_OBJTYPE_SOCKET:
377 		return(S_IFSOCK);
378 	case HAMMER_OBJTYPE_CDEV:
379 		return(S_IFCHR);
380 	case HAMMER_OBJTYPE_BDEV:
381 		return(S_IFBLK);
382 	case HAMMER_OBJTYPE_SOFTLINK:
383 		return(S_IFLNK);
384 	default:
385 		return(0);
386 	}
387 	/* not reached */
388 }
389 
390 #if DEBUG > 1
391 static void
392 hprintb(hammer_base_elm_t e)
393 {
394 	printf("%d/", e->localization);
395 	if (e->obj_id >> 32 != 0)
396 		printf("%lx%08lx",
397 		       (long)(e->obj_id >> 32),
398 		       (long)(e->obj_id & 0xffffffff));
399 	else
400 		printf("%lx", (long)e->obj_id);
401 	printf("/%d/", e->rec_type);
402 	if (e->key >> 32 != 0)
403 		printf("%lx%08lx",
404 		       (long)(e->key >> 32),
405 		       (long)(e->key & 0xffffffff));
406 	else
407 		printf("%lx", (long)e->key);
408 #ifdef TESTING
409 	printf("/%llx/%llx", e->create_tid, e->delete_tid);
410 #endif
411 }
412 #endif /* DEBUG > 1 */
413 #endif /* !BOOT2 */
414 
415 static hammer_btree_leaf_elm_t
416 hfind(struct hfs *hfs, hammer_base_elm_t key, hammer_base_elm_t end)
417 {
418 #if DEBUG > 1
419 	printf("searching for ");
420 	hprintb(key);
421 	printf(" end ");
422 	hprintb(end);
423 	printf("\n");
424 #endif
425 
426 	int n;
427 	int r;
428 	struct hammer_base_elm search = *key;
429 	struct hammer_base_elm backtrack;
430 	hammer_off_t nodeoff = hfs->root;
431 	hammer_node_ondisk_t node;
432 	hammer_btree_elm_t e = NULL;
433 	int internal;
434 
435 loop:
436 	node = hread(hfs, nodeoff);
437 	if (node == NULL)
438 		return (NULL);
439 	internal = node->type == HAMMER_BTREE_TYPE_INTERNAL;
440 
441 #if DEBUG > 3
442 	for (int i = 0; i < node->count; i++) {
443 		printf("E: ");
444 		hprintb(&node->elms[i].base);
445 		printf("\n");
446 	}
447 	if (internal) {
448 		printf("B: ");
449 		hprintb(&node->elms[node->count].base);
450 		printf("\n");
451 	}
452 #endif
453 
454 	n = hammer_btree_search_node(&search, node);
455 
456 	// In internal nodes, we cover the right boundary as well.
457 	// If we hit it, we'll backtrack.
458 	for (; n < node->count + internal; n++) {
459 		e = &node->elms[n];
460 		r = hammer_btree_cmp(&search, &e->base);
461 
462 		if (r < 0)
463 			break;
464 	}
465 
466 	// unless we stopped right on the left side, we need to back off a bit
467 	if (n > 0)
468 		e = &node->elms[--n];
469 
470 #if DEBUG > 2
471 	printf("  found: ");
472 	hprintb(&e->base);
473 	printf("\n");
474 #endif
475 
476 	if (internal) {
477 		// If we hit the right boundary, backtrack to
478 		// the next higher level.
479 		if (n == node->count)
480 			goto backtrack;
481 		nodeoff = e->internal.subtree_offset;
482 		backtrack = (e+1)->base;
483 		goto loop;
484 	}
485 
486 	r = hammer_btree_cmp(key, &e->base);
487 	// If we're more off than the createtid, take the next elem
488 	if (r > 1) {
489 		e++;
490 		n++;
491 	}
492 
493 	// Skip deleted elements
494 	while (n < node->count && e->base.delete_tid != 0) {
495 		e++;
496 		n++;
497 	}
498 
499 	// In the unfortunate event when there is no next
500 	// element in this node, we repeat the search with
501 	// a key beyond the right boundary
502 	if (n == node->count) {
503 backtrack:
504 		search = backtrack;
505 		nodeoff = hfs->root;
506 
507 #if DEBUG > 2
508 		printf("hit right boundary (%d), resetting search to ",
509 		       node->count);
510 		hprintb(&search);
511 		printf("\n");
512 #endif
513 		goto loop;
514 	}
515 
516 #if DEBUG > 1
517 	printf("  result: ");
518 	hprintb(&e->base);
519 	printf("\n");
520 #endif
521 
522 	if (end != NULL)
523 		if (hammer_btree_cmp(end, &e->base) < -1)
524 			goto fail;
525 
526 	return (&e->leaf);
527 
528 fail:
529 #if DEBUG > 1
530 	printf("  fail.\n");
531 #endif
532 	return (NULL);
533 }
534 
535 /*
536  * Returns the directory entry localization field based on the directory
537  * inode's capabilities.
538  */
539 static u_int32_t
540 hdirlocalization(struct hfs *hfs, ino_t ino)
541 {
542 	struct hammer_base_elm key;
543 
544 	if (ino != hfs->last_dir_ino) {
545 		bzero(&key, sizeof(key));
546 		key.obj_id = ino;
547 		key.localization = HAMMER_LOCALIZE_INODE;
548 		key.rec_type = HAMMER_RECTYPE_INODE;
549 		hammer_btree_leaf_elm_t e;
550 		hammer_data_ondisk_t ed;
551 
552 		e = hfind(hfs, &key, &key);
553 		if (e) {
554 			ed = hread(hfs, e->data_offset);
555 			if (ed) {
556 				hfs->last_dir_ino = ino;
557 				hfs->last_dir_cap_flags = ed->inode.cap_flags;
558 			} else {
559 				printf("hdirlocal: no inode data for %llx\n",
560 					(long long)ino);
561 			}
562 		} else {
563 			printf("hdirlocal: no inode entry for %llx\n",
564 				(long long)ino);
565 		}
566 	}
567 	if (hfs->last_dir_cap_flags & HAMMER_INODE_CAP_DIR_LOCAL_INO)
568 		return(HAMMER_LOCALIZE_INODE);
569 	else
570 		return(HAMMER_LOCALIZE_MISC);
571 }
572 
573 #ifndef BOOT2
574 static int
575 hreaddir(struct hfs *hfs, ino_t ino, int64_t *off, struct dirent *de)
576 {
577 	struct hammer_base_elm key, end;
578 
579 #if DEBUG > 2
580 	printf("%s(%llx, %lld)\n", __func__, (long long)ino, *off);
581 #endif
582 
583 	bzero(&key, sizeof(key));
584 	key.obj_id = ino;
585 	key.localization = hdirlocalization(hfs, ino);
586 	key.rec_type = HAMMER_RECTYPE_DIRENTRY;
587 	key.key = *off;
588 
589 	end = key;
590 	end.key = HAMMER_MAX_KEY;
591 
592 	hammer_btree_leaf_elm_t e;
593 
594 	e = hfind(hfs, &key, &end);
595 	if (e == NULL) {
596 		errno = ENOENT;
597 		return (-1);
598 	}
599 
600 	*off = e->base.key + 1;		// remember next pos
601 
602 	de->d_namlen = e->data_len - HAMMER_ENTRY_NAME_OFF;
603 	de->d_type = hammer_get_dtype(e->base.obj_type);
604 	hammer_data_ondisk_t ed = hread(hfs, e->data_offset);
605 	if (ed == NULL)
606 		return (-1);
607 	de->d_ino = ed->entry.obj_id;
608 	bcopy(ed->entry.name, de->d_name, de->d_namlen);
609 	de->d_name[de->d_namlen] = 0;
610 
611 	return (0);
612 }
613 #endif
614 
615 static ino_t
616 hresolve(struct hfs *hfs, ino_t dirino, const char *name)
617 {
618 	struct hammer_base_elm key, end;
619 	size_t namel = strlen(name);
620 
621 #if DEBUG > 2
622 	printf("%s(%llx, %s)\n", __func__, (long long)dirino, name);
623 #endif
624 
625 	bzero(&key, sizeof(key));
626 	key.obj_id = dirino;
627 	key.localization = hdirlocalization(hfs, dirino);
628 	key.key = hammer_directory_namekey(name, namel);
629 	key.rec_type = HAMMER_RECTYPE_DIRENTRY;
630 	end = key;
631 	end.key = HAMMER_MAX_KEY;
632 
633 	hammer_btree_leaf_elm_t e;
634 	while ((e = hfind(hfs, &key, &end)) != NULL) {
635 		key.key = e->base.key + 1;
636 
637 		size_t elen = e->data_len - HAMMER_ENTRY_NAME_OFF;
638 		hammer_data_ondisk_t ed = hread(hfs, e->data_offset);
639 		if (ed == NULL)
640 			return (-1);
641 #ifdef BOOT2
642 		if (ls) {
643 			for (int i = 0; i < elen; i++)
644 				putchar(ed->entry.name[i]);
645 			putchar(' ');
646 			ls = 2;
647 			continue;
648 		}
649 #endif
650 		if (elen == namel && memcmp(ed->entry.name, name, MIN(elen, namel)) == 0)
651 			return (ed->entry.obj_id);
652 	}
653 
654 #ifdef BOOT2
655 	if (ls == 2)
656 		printf("\n");
657 #endif
658 
659 	return -1;
660 }
661 
662 static ino_t
663 hlookup(struct hfs *hfs, const char *path)
664 {
665 #if DEBUG > 2
666 	printf("%s(%s)\n", __func__, path);
667 #endif
668 
669 #ifdef BOOT2
670 	ls = 0;
671 #endif
672 	ino_t ino = 1;
673 	do {
674 		char name[MAXPATHLEN + 1];
675 		while (*path == '/')
676 			path++;
677 		if (*path == 0)
678 			break;
679 		for (char *n = name; *path != 0 && *path != '/'; path++, n++) {
680 			n[0] = *path;
681 			n[1] = 0;
682 		}
683 
684 #ifdef BOOT2
685 		// A single ? means "list"
686 		if (name[0] == '?' && name[1] == 0)
687 			ls = 1;
688 #endif
689 
690 		ino = hresolve(hfs, ino, name);
691 	} while (ino != (ino_t)-1 && *path != 0);
692 
693 	return (ino);
694 }
695 
696 
697 #ifndef BOOT2
698 static int
699 hstat(struct hfs *hfs, ino_t ino, struct stat* st)
700 {
701 	struct hammer_base_elm key;
702 
703 #if DEBUG > 2
704 	printf("%s(%llx)\n", __func__, (long long)ino);
705 #endif
706 
707 	bzero(&key, sizeof(key));
708 	key.obj_id = ino;
709 	key.localization = HAMMER_LOCALIZE_INODE;
710 	key.rec_type = HAMMER_RECTYPE_INODE;
711 
712 	hammer_btree_leaf_elm_t e = hfind(hfs, &key, &key);
713 	if (e == NULL) {
714 #ifndef BOOT2
715 		errno = ENOENT;
716 #endif
717 		return -1;
718 	}
719 
720 	hammer_data_ondisk_t ed = hread(hfs, e->data_offset);
721 	if (ed == NULL)
722 		return (-1);
723 
724 	st->st_mode = ed->inode.mode | hammer_get_mode(ed->inode.obj_type);
725 	st->st_uid = hammer_to_unix_xid(&ed->inode.uid);
726 	st->st_gid = hammer_to_unix_xid(&ed->inode.gid);
727 	st->st_size = ed->inode.size;
728 
729 	return (0);
730 }
731 #endif
732 
733 static ssize_t
734 hreadf(struct hfs *hfs, ino_t ino, int64_t off, int64_t len, char *buf)
735 {
736 	int64_t startoff = off;
737 	struct hammer_base_elm key, end;
738 
739 	bzero(&key, sizeof(key));
740 	key.obj_id = ino;
741 	key.localization = HAMMER_LOCALIZE_MISC;
742 	key.rec_type = HAMMER_RECTYPE_DATA;
743 	end = key;
744 	end.key = HAMMER_MAX_KEY;
745 
746 	while (len > 0) {
747 		key.key = off + 1;
748 		hammer_btree_leaf_elm_t e = hfind(hfs, &key, &end);
749 		int64_t dlen;
750 
751 		if (e == NULL || off > e->base.key) {
752 			bzero(buf, len);
753 			off += len;
754 			len = 0;
755 			break;
756 		}
757 
758 		int64_t doff = e->base.key - e->data_len;
759 		if (off < doff) {
760 			// sparse file, beginning
761 			dlen = doff - off;
762 			dlen = MIN(dlen, len);
763 			bzero(buf, dlen);
764 		} else {
765 			int64_t boff = off - doff;
766 			hammer_off_t roff = e->data_offset;
767 
768 			dlen = e->data_len;
769 			dlen -= boff;
770 			dlen = MIN(dlen, len);
771 
772 			while (boff >= HAMMER_BUFSIZE) {
773 				boff -= HAMMER_BUFSIZE;
774 				roff += HAMMER_BUFSIZE;
775 			}
776 
777 			/*
778 			 * boff - relative offset in disk buffer (not aligned)
779 			 * roff - base offset of disk buffer     (not aligned)
780 			 * dlen - amount of data we think we can copy
781 			 *
782 			 * hread only reads 16K aligned buffers, check for
783 			 * a length overflow and truncate dlen appropriately.
784 			 */
785 			if ((roff & ~HAMMER_BUFMASK64) != ((roff + boff + dlen - 1) & ~HAMMER_BUFMASK64))
786 				dlen = HAMMER_BUFSIZE - ((boff + roff) & HAMMER_BUFMASK);
787 			char *data = hread(hfs, roff);
788 			if (data == NULL)
789 				return (-1);
790 			bcopy(data + boff, buf, dlen);
791 		}
792 
793 		buf += dlen;
794 		off += dlen;
795 		len -= dlen;
796 	}
797 
798 	return (off - startoff);
799 }
800 
801 #ifdef BOOT2
802 struct hfs hfs;
803 
804 static int
805 boot2_hammer_init(void)
806 {
807 	hammer_volume_ondisk_t volhead;
808 
809 	volhead = hread(&hfs, HAMMER_ZONE_ENCODE(1, 0));
810 	if (volhead == NULL)
811 		return (-1);
812 	if (volhead->vol_signature != HAMMER_FSBUF_VOLUME)
813 		return (-1);
814 	hfs.root = volhead->vol0_btree_root;
815 	hfs.buf_beg = volhead->vol_buf_beg;
816 	return (0);
817 }
818 
819 static boot2_ino_t
820 boot2_hammer_lookup(const char *path)
821 {
822 	ino_t ino = hlookup(&hfs, path);
823 
824 	if (ino == -1)
825 		ino = 0;
826 
827 	fs_off = 0;
828 
829 	return (ino);
830 }
831 
832 static ssize_t
833 boot2_hammer_read(boot2_ino_t ino, void *buf, size_t len)
834 {
835 	ssize_t rlen = hreadf(&hfs, ino, fs_off, len, buf);
836 	if (rlen != -1)
837 		fs_off += rlen;
838 	return (rlen);
839 }
840 
841 const struct boot2_fsapi boot2_hammer_api = {
842 	.fsinit = boot2_hammer_init,
843 	.fslookup = boot2_hammer_lookup,
844 	.fsread = boot2_hammer_read
845 };
846 
847 #endif
848 
849 #ifndef BOOT2
850 static int
851 hinit(struct hfs *hfs)
852 {
853 #if DEBUG
854 	printf("hinit\n");
855 #endif
856 	for (int i = 0; i < NUMCACHE; i++) {
857 		hfs->cache[i].data = malloc(HAMMER_BUFSIZE);
858 		hfs->cache[i].off = -1;	// invalid
859 		hfs->cache[i].use = 0;
860 
861 #if DEBUG
862 		if (hfs->cache[i].data == NULL)
863 			printf("malloc failed\n");
864 #endif
865 	}
866 	hfs->lru = 0;
867 	hfs->last_dir_ino = -1;
868 
869 	hammer_volume_ondisk_t volhead = hread(hfs, HAMMER_ZONE_ENCODE(1, 0));
870 
871 #ifdef TESTING
872 	if (volhead) {
873 		printf("signature: %svalid\n",
874 		       volhead->vol_signature != HAMMER_FSBUF_VOLUME ?
875 				"in" :
876 				"");
877 		printf("name: %s\n", volhead->vol_label);
878 	}
879 #endif
880 
881 	if (volhead == NULL || volhead->vol_signature != HAMMER_FSBUF_VOLUME) {
882 		for (int i = 0; i < NUMCACHE; i++) {
883 			free(hfs->cache[i].data);
884 			hfs->cache[i].data = NULL;
885 		}
886 		errno = ENODEV;
887 		return (-1);
888 	}
889 
890 	hfs->root = volhead->vol0_btree_root;
891 	hfs->buf_beg = volhead->vol_buf_beg;
892 
893 	return (0);
894 }
895 
896 static void
897 hclose(struct hfs *hfs)
898 {
899 #if DEBUG
900 	printf("hclose\n");
901 #endif
902 	for (int i = 0; i < NUMCACHE; i++) {
903 		if (hfs->cache[i].data) {
904 			free(hfs->cache[i].data);
905 			hfs->cache[i].data = NULL;
906 		}
907 	}
908 }
909 #endif
910 
911 #ifdef LIBSTAND
912 struct hfile {
913 	struct hfs	hfs;
914 	ino_t		ino;
915 	int64_t		fsize;
916 };
917 
918 static int
919 hammer_open(const char *path, struct open_file *f)
920 {
921 	struct hfile *hf = malloc(sizeof(*hf));
922 
923 	bzero(hf, sizeof(*hf));
924 	f->f_fsdata = hf;
925 	hf->hfs.f = f;
926 	f->f_offset = 0;
927 
928 	int rv = hinit(&hf->hfs);
929 	if (rv) {
930 		f->f_fsdata = NULL;
931 		free(hf);
932 		return (rv);
933 	}
934 
935 #if DEBUG
936 	printf("hammer_open %s %p\n", path, f);
937 #endif
938 
939 	hf->ino = hlookup(&hf->hfs, path);
940 	if (hf->ino == -1)
941 		goto fail;
942 
943 	struct stat st;
944 	if (hstat(&hf->hfs, hf->ino, &st) == -1)
945 		goto fail;
946 	hf->fsize = st.st_size;
947 
948 #if DEBUG
949 	printf("	%ld\n", (long)hf->fsize);
950 #endif
951 
952 	return (0);
953 
954 fail:
955 #if DEBUG
956 	printf("hammer_open fail\n");
957 #endif
958 	f->f_fsdata = NULL;
959 	hclose(&hf->hfs);
960 	free(hf);
961 	return (ENOENT);
962 }
963 
964 static int
965 hammer_close(struct open_file *f)
966 {
967 	struct hfile *hf = f->f_fsdata;
968 
969 	f->f_fsdata = NULL;
970 	if (hf) {
971 	    hclose(&hf->hfs);
972 	    free(hf);
973 	}
974 	return (0);
975 }
976 
977 static int
978 hammer_read(struct open_file *f, void *buf, size_t len, size_t *resid)
979 {
980 	struct hfile *hf = f->f_fsdata;
981 
982 #if DEBUG
983 	printf("hammer_read %p %ld %ld\n", f, f->f_offset, len);
984 #endif
985 
986 	if (f->f_offset >= hf->fsize)
987 		return (EINVAL);
988 
989 	size_t maxlen = len;
990 	if (f->f_offset + len > hf->fsize)
991 		maxlen = hf->fsize - f->f_offset;
992 
993 	ssize_t rlen = hreadf(&hf->hfs, hf->ino, f->f_offset, maxlen, buf);
994 	if (rlen == -1)
995 		return (EINVAL);
996 
997 	f->f_offset += rlen;
998 
999 	*resid = len - rlen;
1000 	return (0);
1001 }
1002 
1003 static off_t
1004 hammer_seek(struct open_file *f, off_t offset, int whence)
1005 {
1006 	struct hfile *hf = f->f_fsdata;
1007 
1008 	switch (whence) {
1009 	case SEEK_SET:
1010 		f->f_offset = offset;
1011 		break;
1012 	case SEEK_CUR:
1013 		f->f_offset += offset;
1014 		break;
1015 	case SEEK_END:
1016 		f->f_offset = hf->fsize - offset;
1017 		break;
1018 	default:
1019 		return (-1);
1020 	}
1021 	return (f->f_offset);
1022 }
1023 
1024 static int
1025 hammer_stat(struct open_file *f, struct stat *st)
1026 {
1027 	struct hfile *hf = f->f_fsdata;
1028 
1029 	return (hstat(&hf->hfs, hf->ino, st));
1030 }
1031 
1032 static int
1033 hammer_readdir(struct open_file *f, struct dirent *d)
1034 {
1035 	struct hfile *hf = f->f_fsdata;
1036 
1037 	int64_t off = f->f_offset;
1038 	int rv = hreaddir(&hf->hfs, hf->ino, &off, d);
1039 	f->f_offset = off;
1040 	return (rv);
1041 }
1042 
1043 // libstand
1044 struct fs_ops hammer1_fsops = {
1045 	"hammer",
1046 	hammer_open,
1047 	hammer_close,
1048 	hammer_read,
1049 	null_write,
1050 	hammer_seek,
1051 	hammer_stat,
1052 	hammer_readdir
1053 };
1054 #endif	// LIBSTAND
1055 
1056 #ifdef TESTING
1057 int
1058 main(int argc, char **argv)
1059 {
1060 	if (argc < 2) {
1061 		fprintf(stderr, "usage: hammerread <dev>\n");
1062 		return (1);
1063 	}
1064 
1065 	struct hfs hfs;
1066 	hfs.fd = open(argv[1], O_RDONLY);
1067 	if (hfs.fd == -1)
1068 		err(1, "unable to open %s", argv[1]);
1069 
1070 	if (hinit(&hfs) == -1)
1071 		err(1, "invalid hammerfs");
1072 
1073 	for (int i = 2; i < argc; i++) {
1074 		ino_t ino = hlookup(&hfs, argv[i]);
1075 		if (ino == (ino_t)-1) {
1076 			warn("hlookup %s", argv[i]);
1077 			continue;
1078 		}
1079 
1080 		struct stat st;
1081 		if (hstat(&hfs, ino, &st)) {
1082 			warn("hstat %s", argv[i]);
1083 			continue;
1084 		}
1085 
1086 		printf("%s %d/%d %o %lld\n",
1087 		       argv[i],
1088 		       st.st_uid, st.st_gid,
1089 		       st.st_mode, st.st_size);
1090 
1091 		if (S_ISDIR(st.st_mode)) {
1092 			int64_t off = 0;
1093 			struct dirent de;
1094 			while (hreaddir(&hfs, ino, &off, &de) == 0) {
1095 				printf("%s %d %llx\n",
1096 				       de.d_name, de.d_type, de.d_ino);
1097 			}
1098 		} else if (S_ISREG(st.st_mode)) {
1099 			char *buf = malloc(100000);
1100 			int64_t off = 0;
1101 			while (off < st.st_size) {
1102 				int64_t len = MIN(100000, st.st_size - off);
1103 				int64_t rl = hreadf(&hfs, ino, off, len, buf);
1104 				fwrite(buf, rl, 1, stdout);
1105 				off += rl;
1106 			}
1107 			free(buf);
1108 		}
1109 	}
1110 
1111 	return 0;
1112 }
1113 #endif
1114