xref: /dragonfly/sbin/jscan/jfile.c (revision abf903a5)
1 /*
2  * Copyright (c) 2004,2005 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * $DragonFly: src/sbin/jscan/jfile.c,v 1.10 2005/09/07 07:20:23 dillon Exp $
35  */
36 
37 #include "jscan.h"
38 #include <dirent.h>
39 
40 static void jalign(struct jfile *jf, enum jdirection direction);
41 static int jreadbuf(struct jfile *jf, enum jdirection direction,
42 		    void *buf, int bytes);
43 
44 /*
45  * Open a file descriptor for journal record access.
46  *
47  * NOTE: only seekable descriptors are supported for backwards scans.
48  */
49 struct jfile *
50 jopen_fd(int fd)
51 {
52     struct jfile *jf;
53 
54     jf = malloc(sizeof(struct jfile));
55     bzero(jf, sizeof(struct jfile));
56     jf->jf_fd = fd;
57     jf->jf_write_fd = -1;
58     jf->jf_open_flags = O_RDONLY;
59     jf->jf_pos = 0;
60     return(jf);
61 }
62 
63 /*
64  * Open a prefix set.  <prefix>.nnnnnnnnn files or a <prefix>.transid file
65  * must exist to succeed.  No file descriptor is actually opened but
66  * the sequence number is initialized to the beginning or end of the set.
67  */
68 struct jfile *
69 jopen_prefix(const char *prefix, int rw)
70 {
71     struct jfile *jf;
72     struct jdata *jd;
73     unsigned int seq_beg = -1;
74     unsigned int seq_end = -1;
75     unsigned int seq;
76     struct stat st;
77     const char *dirname;
78     struct dirent *den;
79     DIR *dir;
80     char *basename;
81     char *data;
82     char *ptr;
83     int hastransid;
84     int baselen;
85     int fd;
86 
87     dirname = data = strdup(prefix);
88     if ((basename = strrchr(dirname, '/')) != NULL) {
89 	*basename++ = 0;
90     } else {
91 	basename = data;
92 	dirname = "./";
93     }
94     baselen = strlen(basename);
95     if ((dir = opendir(dirname)) != NULL) {
96 	while ((den = readdir(dir)) != NULL) {
97 	    if (strncmp(den->d_name, basename, baselen) == 0 &&
98 		den->d_name[baselen] == '.'
99 	    ) {
100 		seq = strtoul(den->d_name + baselen + 1, &ptr, 16);
101 		if (*ptr == 0 && seq != UINT_MAX) {
102 		    if (seq_beg == (unsigned int)-1 || seq_beg > seq)
103 			seq_beg = seq;
104 		    if (seq_end == (unsigned int)-1 || seq_end < seq)
105 			seq_end = seq;
106 		}
107 	    }
108 	}
109 	closedir(dir);
110     }
111     free(data);
112 
113     hastransid = 0;
114     asprintf(&data, "%s.transid", prefix);
115     if (stat(data, &st) == 0)
116 	hastransid = 1;
117     free(data);
118 
119     if (seq_beg != (unsigned int)-1 || hastransid) {
120 	if (seq_beg == (unsigned int)-1) {
121 	    seq_beg = 0;
122 	    seq_end = 0;
123 	    if (rw) {
124 		asprintf(&data, "%s.%08x", prefix, 0);
125 		if ((fd = open(data, O_RDWR|O_CREAT, 0666)) >= 0)
126 		    close(fd);
127 		free(data);
128 	    }
129 	}
130 	jf = malloc(sizeof(struct jfile));
131 	bzero(jf, sizeof(struct jfile));
132 	jf->jf_fd = -1;
133 	jf->jf_write_fd = -1;
134 	jf->jf_prefix = strdup(prefix);
135 	jf->jf_seq = seq_beg;
136 	jf->jf_seq_beg = seq_beg;
137 	jf->jf_seq_end = seq_end;
138 	jf->jf_open_flags = rw ? (O_RDWR|O_CREAT) : O_RDONLY;
139 	if (verbose_opt)
140 	    fprintf(stderr, "Open prefix set %08x-%08x\n", seq_beg, seq_end);
141 	if ((jd = jread(jf, NULL, JD_BACKWARDS)) != NULL) {
142 	    jf->jf_last_transid = jd->jd_transid;
143 	    jfree(jf, jd);
144 	}
145     } else {
146 	jf = NULL;
147     }
148     return(jf);
149 }
150 
151 /*
152  * Get a prefix set ready for append.
153  */
154 int
155 jrecord_init(const char *prefix)
156 {
157     struct jfile *jf;
158     struct stat st;
159     char *data;
160     int hasseqspace;
161     int fd;
162 
163     /*
164      * Determine whether we already have a prefix set or whether we need
165      * to create one.
166      */
167     jf = jopen_prefix(prefix, 0);
168     hasseqspace = 0;
169     if (jf) {
170 	if (jf->jf_seq_beg != (unsigned int)-1)
171 	    hasseqspace = 1;
172 	jclose(jf);
173     }
174     asprintf(&data, "%s.transid", prefix);
175 
176     /*
177      * If the sequence exists the transid file must ALREADY exist for us
178      * to be able to safely 'append' to the space.  Locked-down sequence
179      * spaces do not have a transid file.
180      */
181     if (hasseqspace) {
182 	fd = open(data, O_RDWR, 0666);
183     } else {
184 	fd = open(data, O_RDWR|O_CREAT, 0666);
185     }
186     free(data);
187     if (fd < 0)
188 	return(-1);
189     if (fstat(fd, &st) == 0 && st.st_size == 0)
190 	write(fd, "0000000000000000\n", 17);	/* starting transid in hex */
191     close(fd);
192     return(0);
193 }
194 
195 /*
196  * Close a previously opened journal, clean up any side allocations.
197  */
198 void
199 jclose(struct jfile *jf)
200 {
201     if (jf->jf_fd >= 0) {
202 	close(jf->jf_fd);
203 	jf->jf_fd = -1;
204     }
205     if (jf->jf_write_fd >= 0) {
206 	close(jf->jf_write_fd);
207 	jf->jf_write_fd = -1;
208     }
209     free(jf);
210 }
211 
212 /*
213  * Locate the next (or previous) raw record given a jfile, current record,
214  * and direction.  If the current record is NULL then the first or last
215  * record for the current sequence number is returned.
216  *
217  * PAD RECORD SPECIAL CASE.  Pad records can be 16 bytes long, which means
218  * that that rawrecend overlaps the transid field of the rawrecbeg.  Because
219  * the transid is garbage, we must skip and cannot return pad records.
220  */
221 struct jdata *
222 jread(struct jfile *jf, struct jdata *jd, enum jdirection direction)
223 {
224     struct journal_rawrecbeg head;
225     struct journal_rawrecbeg *headp;
226     struct journal_rawrecend tail;
227     struct journal_rawrecend *tailp;
228     struct stat st;
229     char *filename;
230     int allocsize;
231     int recsize;
232     int search;
233     int error;
234     int n;
235 
236     if (jd) {
237 	/*
238 	 * Handle the next/previous record case.  If running in the forwards
239 	 * direction we position the file just after jd.  If running in the
240 	 * backwards direction we position the file at the base of jd so
241 	 * the backwards read gets the previous record.
242 	 *
243 	 * In prefix mode we have to get the right descriptor open and
244 	 * position the file, since the fall through code resets to the
245 	 * beginning or end if it has to open a descriptor.
246 	 */
247 	assert(direction != JD_SEQFIRST && direction != JD_SEQLAST);
248 	if (jf->jf_prefix) {
249 	    if (jf->jf_fd >= 0 && jf->jf_seq != jd->jd_seq) {
250 		close(jf->jf_fd);
251 		jf->jf_fd = -1;
252 	    }
253 	    jf->jf_seq = jd->jd_seq;
254 	    if (jf->jf_fd < 0) {
255 		asprintf(&filename, "%s.%08x", jf->jf_prefix, jf->jf_seq);
256 		jf->jf_fd = open(filename, O_RDONLY);
257 		if (verbose_opt > 1)
258 		    fprintf(stderr, "Open %s fd %d\n", filename, jf->jf_fd);
259 		free(filename);
260 	    }
261 	}
262 	if ((jmodes & JMODEF_INPUT_PIPE) == 0) {
263 	    if (direction == JD_FORWARDS) {
264 		jf->jf_pos = jd->jd_pos + jd->jd_size;
265 		lseek(jf->jf_fd, jf->jf_pos, 0);
266 	    } else {
267 		jf->jf_pos = jd->jd_pos;
268 		/* lseek(jf->jf_fd, jf->jf_pos, 0); not needed */
269 	    }
270 	} else {
271 	    assert(direction == JD_FORWARDS && jf->jf_prefix == NULL);
272 	    assert(jf->jf_pos == jd->jd_pos + jd->jd_size);
273 	}
274 	jfree(jf, jd);
275     } else {
276 	/*
277 	 * Handle the first/last record case.  In the prefix case we only
278 	 * need to set jf_seq and close the file handle and fall through.
279 	 * The SEQ modes maintain the current jf_seq (kinda a hack).
280 	 */
281 	if (jf->jf_prefix) {
282 	    if (jf->jf_fd >= 0) {
283 		close(jf->jf_fd);
284 		jf->jf_fd = -1;
285 	    }
286 	    switch(direction) {
287 	    case JD_FORWARDS:
288 		jf->jf_seq = jf->jf_seq_beg;
289 		break;
290 	    case JD_BACKWARDS:
291 		jf->jf_seq = jf->jf_seq_end;
292 		break;
293 	    case JD_SEQFIRST:
294 		direction = JD_FORWARDS;
295 		break;
296 	    case JD_SEQLAST:
297 		direction = JD_BACKWARDS;
298 		break;
299 	    }
300 	} else if ((jmodes & JMODEF_INPUT_PIPE) == 0) {
301 	    switch(direction) {
302 	    case JD_SEQFIRST:
303 		direction = JD_FORWARDS;
304 		/* fall through */
305 	    case JD_FORWARDS:
306 		jf->jf_pos = lseek(jf->jf_fd, 0L, SEEK_SET);
307 		break;
308 	    case JD_SEQLAST:
309 		direction = JD_BACKWARDS;
310 		/* fall through */
311 	    case JD_BACKWARDS:
312 		jf->jf_pos = lseek(jf->jf_fd, 0L, SEEK_END);
313 		break;
314 	    }
315 	} else {
316 	    if (direction == JD_SEQFIRST)
317 		direction = JD_FORWARDS;
318 	    assert(jf->jf_pos == 0 && direction == JD_FORWARDS);
319 	}
320     }
321 
322 top:
323     /*
324      * If we are doing a prefix scan and the descriptor is not open,
325      * open the file based on jf_seq and position it to the beginning
326      * or end based on the direction.  This is how we iterate through
327      * the prefix set.
328      */
329     if (jf->jf_fd < 0) {
330 	asprintf(&filename, "%s.%08x", jf->jf_prefix, jf->jf_seq);
331 	jf->jf_fd = open(filename, O_RDONLY);
332 	if (verbose_opt > 1)
333 	    fprintf(stderr, "Open %s fd %d\n", filename, jf->jf_fd);
334 	free(filename);
335 	if (direction == JD_FORWARDS)
336 	    jf->jf_pos = lseek(jf->jf_fd, 0L, SEEK_SET);
337 	else
338 	    jf->jf_pos = lseek(jf->jf_fd, 0L, SEEK_END);
339     }
340 
341     /*
342      * Get the current offset and make sure it is 16-byte aligned.  If it
343      * isn't, align it and enter search mode.
344      */
345     if (jf->jf_pos & 15) {
346 	jf_warn(jf, "realigning bad offset and entering search mode");
347 	jalign(jf, direction);
348 	search = 1;
349     } else {
350 	search = 0;
351     }
352 
353     error = 0;
354     if (direction == JD_FORWARDS) {
355 	/*
356 	 * Scan the journal forwards.  Note that the file pointer might not
357 	 * be seekable.
358 	 */
359 	while ((error = jreadbuf(jf, direction, &head, sizeof(head))) == sizeof(head)) {
360 	    if (head.begmagic != JREC_BEGMAGIC) {
361 		if (search == 0)
362 		    jf_warn(jf, "bad beginmagic, searching for new record");
363 		search = 1;
364 		jalign(jf, direction);
365 		continue;
366 	    }
367 
368 	    /*
369 	     * The actual record is 16-byte aligned.  head.recsize contains
370 	     * the unaligned record size.
371 	     */
372 	    recsize = (head.recsize + 15) & ~15;
373 	    if (recsize < JREC_MINRECSIZE || recsize > JREC_MAXRECSIZE) {
374 		if (search == 0)
375 		    jf_warn(jf, "bad recordsize: %d\n", recsize);
376 		search = 1;
377 		jalign(jf, direction);
378 		continue;
379 	    }
380 	    allocsize = offsetof(struct jdata, jd_data[recsize]);
381 	    allocsize = (allocsize + 255) & ~255;
382 	    jd = malloc(allocsize);
383 	    bzero(jd, offsetof(struct jdata, jd_data[0]));
384 	    bcopy(&head, jd->jd_data, sizeof(head));
385 	    n = jreadbuf(jf, direction, jd->jd_data + sizeof(head),
386 			 recsize - sizeof(head));
387 	    if (n != (int)(recsize - sizeof(head))) {
388 		if (search == 0)
389 		    jf_warn(jf, "Incomplete stream record\n");
390 		search = 1;
391 		jalign(jf, direction);
392 		free(jd);
393 		continue;
394 	    }
395 
396 	    tailp = (void *)(jd->jd_data + recsize - sizeof(*tailp));
397 	    if (tailp->endmagic != JREC_ENDMAGIC) {
398 		if (search == 0)
399 		    jf_warn(jf, "bad endmagic, searching for new record");
400 		search = 1;
401 		jalign(jf, direction);
402 		free(jd);
403 		continue;
404 	    }
405 
406 	    /*
407 	     * Skip pad records.
408 	     */
409 	    if (head.streamid == JREC_STREAMID_PAD) {
410 		free(jd);
411 		continue;
412 	    }
413 
414 	    /*
415 	     * note: recsize is aligned (the actual record size),
416 	     * head.recsize is unaligned (the actual payload size).
417 	     */
418 	    jd->jd_transid = head.transid;
419 	    jd->jd_alloc = allocsize;
420 	    jd->jd_size = recsize;
421 	    jd->jd_seq = jf->jf_seq;
422 	    jd->jd_pos = jf->jf_pos - recsize;
423 	    jd->jd_refs = 1;
424 	    return(jd);
425 	}
426     } else {
427 	/*
428 	 * Scan the journal backwards.  Note that jread()'s reverse-seek and
429 	 * read.  The data read will be forward ordered, however.
430 	 */
431 	while ((error = jreadbuf(jf, direction, &tail, sizeof(tail))) == sizeof(tail)) {
432 	    if (tail.endmagic != JREC_ENDMAGIC) {
433 		if (search == 0)
434 		    jf_warn(jf, "bad endmagic, searching for new record");
435 		search = 1;
436 		jalign(jf, direction);
437 		continue;
438 	    }
439 
440 	    /*
441 	     * The actual record is 16-byte aligned.  head.recsize contains
442 	     * the unaligned record size.
443 	     */
444 	    recsize = (tail.recsize + 15) & ~15;
445 	    if (recsize < JREC_MINRECSIZE || recsize > JREC_MAXRECSIZE) {
446 		if (search == 0)
447 		    jf_warn(jf, "bad recordsize: %d\n", recsize);
448 		search = 1;
449 		jalign(jf, direction);
450 		continue;
451 	    }
452 	    allocsize = offsetof(struct jdata, jd_data[recsize]);
453 	    allocsize = (allocsize + 255) & ~255;
454 	    jd = malloc(allocsize);
455 	    bzero(jd, offsetof(struct jdata, jd_data[0]));
456 	    bcopy(&tail, jd->jd_data + recsize - sizeof(tail), sizeof(tail));
457 	    n = jreadbuf(jf, direction, jd->jd_data, recsize - sizeof(tail));
458 	    if (n != (int)(recsize - sizeof(tail))) {
459 		if (search == 0)
460 		    jf_warn(jf, "Incomplete stream record\n");
461 		search = 1;
462 		jalign(jf, direction);
463 		free(jd);
464 		continue;
465 	    }
466 
467 	    headp = (void *)jd->jd_data;
468 	    if (headp->begmagic != JREC_BEGMAGIC) {
469 		if (search == 0)
470 		    jf_warn(jf, "bad begmagic, searching for new record");
471 		search = 1;
472 		jalign(jf, direction);
473 		free(jd);
474 		continue;
475 	    }
476 
477 	    /*
478 	     * Skip pad records.
479 	     */
480 	    if (head.streamid == JREC_STREAMID_PAD) {
481 		free(jd);
482 		continue;
483 	    }
484 
485 	    /*
486 	     * note: recsize is aligned (the actual record size),
487 	     * head.recsize is unaligned (the actual payload size).
488 	     */
489 	    jd->jd_transid = headp->transid;
490 	    jd->jd_alloc = allocsize;
491 	    jd->jd_size = recsize;
492 	    jd->jd_seq = jf->jf_seq;
493 	    jd->jd_pos = jf->jf_pos;
494 	    jd->jd_refs = 1;
495 	    return(jd);
496 	}
497     }
498 
499     /*
500      * If reading in prefix mode and there is no more data, close the
501      * current descriptor, adjust the sequence number, and loop.
502      *
503      * If we hit the end of the sequence space and were asked to loop,
504      * check for the next sequence number and adjust jf_seq_end.  Leave
505      * the current descriptor open so we do not loose track of its seek
506      * position, and also to catch a race where another jscan may have
507      * written more data to the current sequence number before rolling
508      * the next sequence number.
509      */
510     if (error == 0 && jf->jf_prefix) {
511 	if (direction == JD_FORWARDS) {
512 	    if (jf->jf_seq < jf->jf_seq_end) {
513 		++jf->jf_seq;
514 		if (verbose_opt)
515 		    fprintf(stderr, "jread: roll to seq %08x\n", jf->jf_seq);
516 		if (jf->jf_fd >= 0) {
517 		    close(jf->jf_fd);
518 		    jf->jf_fd = -1;
519 		}
520 		goto top;
521 	    }
522 	    if (jmodes & JMODEF_LOOP_FOREVER) {
523 		asprintf(&filename, "%s.%08x", jf->jf_prefix, jf->jf_seq + 1);
524 		if (stat(filename, &st) == 0) {
525 		    ++jf->jf_seq_end;
526 		    if (verbose_opt)
527 			fprintf(stderr, "jread: roll seq_end to %08x\n",
528 					 jf->jf_seq_end);
529 		} else {
530 		    sleep(5);
531 		}
532 		goto top;
533 	    }
534 	} else {
535 	    if (jf->jf_seq > jf->jf_seq_beg) {
536 		--jf->jf_seq;
537 		if (verbose_opt)
538 		    fprintf(stderr, "jread: roll to seq %08x\n", jf->jf_seq);
539 		if (jf->jf_fd >= 0) {
540 		    close(jf->jf_fd);
541 		    jf->jf_fd = -1;
542 		}
543 		goto top;
544 	    }
545 	}
546     }
547 
548     /*
549      * If we hit EOF and were asked to loop forever on the input, leave
550      * the current descriptor open, sleep, and loop.
551      *
552      * We have already handled the prefix case.  This feature only works
553      * when doing forward scans and the input is not a pipe.
554      */
555     if (error == 0 && jf->jf_prefix == NULL &&
556 	(jmodes & JMODEF_LOOP_FOREVER) &&
557 	!(jmodes & JMODEF_INPUT_PIPE) &&
558 	direction == JD_FORWARDS
559     ) {
560 	sleep(5);
561 	goto top;
562     }
563 
564     /*
565      * Otherwise there are no more records and we are done.
566      */
567     return(NULL);
568 }
569 
570 /*
571  * Write a record out.  If this is a prefix set and the file would
572  * exceed record_size, we rotate into a new sequence number.
573  */
574 void
575 jwrite(struct jfile *jf, struct jdata *jd)
576 {
577     struct stat st;
578     char *path;
579     int n;
580 
581     assert(jf->jf_prefix);
582 
583 again:
584     /*
585      * Open/create a new file in the prefix set
586      */
587     if (jf->jf_write_fd < 0) {
588 	asprintf(&path, "%s.%08x", jf->jf_prefix, jf->jf_seq_end);
589 	jf->jf_write_fd = open(path, O_RDWR|O_CREAT, 0666);
590 	if (jf->jf_write_fd < 0 || fstat(jf->jf_write_fd, &st) != 0) {
591 	    fprintf(stderr, "Unable to open/create %s\n", path);
592 	    exit(1);
593 	}
594 	jf->jf_write_pos = st.st_size;
595 	lseek(jf->jf_write_fd, jf->jf_write_pos, 0);
596 	free(path);
597     }
598 
599     /*
600      * Each file must contain at least one raw record, even if it exceeds
601      * the user-requested record-size.  Apart from that, we cycle to the next
602      * file when its size would exceed the user-specified
603      */
604     if (jf->jf_write_pos > 0 &&
605 	jf->jf_write_pos + jd->jd_size > prefix_file_size
606     ) {
607 	close(jf->jf_write_fd);
608 	jf->jf_write_fd = -1;
609 	++jf->jf_seq_end;
610 	goto again;
611     }
612 
613     /*
614      * Terminate if a failure occurs (for now).
615      */
616     n = write(jf->jf_write_fd, jd->jd_data, jd->jd_size);
617     if (n != jd->jd_size) {
618 	ftruncate(jf->jf_write_fd, jf->jf_write_pos);
619 	fprintf(stderr, "jwrite: failed %s\n", strerror(errno));
620 	exit(1);
621     }
622     jf->jf_write_pos += n;
623     jf->jf_last_transid = jd->jd_transid;
624 }
625 
626 /*
627  * Attempt to locate and return the record specified by the transid.  The
628  * returned record may be inexact.
629  *
630  * If scanning forwards this function guarentees that no record prior
631  * to the returned record is >= transid.
632  *
633  * If scanning backwards this function guarentees that no record after
634  * the returned record is <= transid.
635  */
636 struct jdata *
637 jseek(struct jfile *jf, int64_t transid, enum jdirection direction)
638 {
639     unsigned int seq;
640     struct jdata *jd = NULL;
641 
642     /*
643      * If the input is a pipe we can't seek.
644      */
645     if (jmodes & JMODEF_INPUT_PIPE) {
646 	assert(direction == JD_FORWARDS);
647 	return (jread(jf, NULL, direction));
648     }
649 
650     if (jf->jf_prefix) {
651 	/*
652 	 * If we have a prefix set search the sequence space backwards until
653 	 * we find the file most likely to contain the transaction id.
654 	 */
655 	if (verbose_opt > 2) {
656 	    fprintf(stderr, "jseek prefix set %s %08x-%08x\n", jf->jf_prefix,
657 		    jf->jf_seq_beg, jf->jf_seq_end);
658 	}
659 	jd = NULL;
660 	for (seq = jf->jf_seq_end; seq != jf->jf_seq_beg - 1; --seq) {
661 	    if (verbose_opt > 2)
662 		fprintf(stderr, "try seq %08x\n", seq);
663 	    jf->jf_seq = seq;
664 	    if ((jd = jread(jf, NULL, JD_SEQFIRST)) != NULL) {
665 		if (jd->jd_transid == transid)
666 		    return(jd);
667 		if (jd->jd_transid < transid) {
668 		    jfree(jf, jd);
669 		    break;
670 		}
671 		jfree(jf, jd);
672 	    }
673 	}
674 
675 	/*
676 	 * if transid is less the first file in the sequence space we
677 	 * return NULL if scanning backwards, indicating no records are
678 	 * available, or the first record in the sequence space if we
679 	 * are scanning forwards.
680 	 */
681 	if (seq == jf->jf_seq_beg - 1) {
682 	    if (direction == JD_BACKWARDS)
683 		return(NULL);
684 	    else
685 		return(jread(jf, NULL, JD_FORWARDS));
686 	}
687 	if (verbose_opt > 1)
688 	    fprintf(stderr, "jseek input prefix set to seq %08x\n", seq);
689     }
690 
691     /*
692      * Position us to the end of the current record, then scan backwards
693      * looking for the requested transid.
694      */
695     jd = jread(jf, NULL, JD_SEQLAST);
696     while (jd != NULL) {
697 	if (jd->jd_transid <= transid) {
698 	    if (jd->jd_transid < transid) {
699 		if (direction == JD_FORWARDS)
700 		    jd =jread(jf, jd, JD_FORWARDS);
701 	    }
702 	    if (verbose_opt > 1) {
703 		fprintf(stderr, "jseek returning seq %08x offset 0x%08jx\n",
704 			jd->jd_seq, (uintmax_t)jd->jd_pos);
705 	    }
706 	    return(jd);
707 	}
708 	jd = jread(jf, jd, JD_BACKWARDS);
709     }
710 
711     /*
712      * We scanned the whole file with no luck, all the transid's are
713      * greater then the requested transid.  If the intended read
714      * direction is backwards there are no records and we return NULL.
715      * If it is forwards we return the first record.
716      */
717     if (direction == JD_BACKWARDS)
718 	return(NULL);
719     else
720 	return(jread(jf, NULL, JD_FORWARDS));
721 }
722 
723 /*
724  * Data returned by jread() is persistent until released.
725  */
726 struct jdata *
727 jref(struct jdata *jd)
728 {
729     ++jd->jd_refs;
730     return(jd);
731 }
732 
733 void
734 jfree(struct jfile *jf __unused, struct jdata *jd)
735 {
736     if (--jd->jd_refs == 0)
737 	free(jd);
738 }
739 
740 /*
741  * Align us to the next 16 byte boundary.  If scanning forwards we align
742  * forwards if not already aligned.  If scanning backwards we align
743  * backwards if not already aligned.  We only have to synchronize the
744  * seek position with the file seek position for forward scans.
745  */
746 static void
747 jalign(struct jfile *jf, enum jdirection direction)
748 {
749     char dummy[16];
750     int bytes;
751 
752     if ((int)jf->jf_pos & 15) {
753 	if (direction == JD_FORWARDS) {
754 	    bytes = 16 - ((int)jf->jf_pos & 15);
755 	    jreadbuf(jf, direction, dummy, bytes);
756 	} else {
757 	    jf->jf_pos = jf->jf_pos & ~(off_t)15;
758 	}
759     }
760 }
761 
762 /*
763  * Read the next raw journal record forwards or backwards and return a
764  * pointer to it.  Note that the file pointer's actual seek position does
765  * not match jf_pos in the reverse direction case.
766  */
767 static int
768 jreadbuf(struct jfile *jf, enum jdirection direction, void *buf, int bytes)
769 {
770     int ttl = 0;
771     int n;
772 
773     if (jf->jf_fd < 0)
774 	return(0);
775 
776     if (direction == JD_FORWARDS) {
777 	while (ttl != bytes) {
778 	    n = read(jf->jf_fd, (char *)buf + ttl, bytes - ttl);
779 	    if (n <= 0) {
780 		if (n < 0 && ttl == 0)
781 		    ttl = -errno;
782 		break;
783 	    }
784 	    ttl += n;
785 	    jf->jf_pos += n;
786 	}
787     } else {
788 	if (jf->jf_pos >= bytes) {
789 	    jf->jf_pos -= bytes;
790 	    lseek(jf->jf_fd, jf->jf_pos, 0);
791 	    while (ttl != bytes) {
792 		n = read(jf->jf_fd, (char *)buf + ttl, bytes - ttl);
793 		if (n <= 0) {
794 		    if (n < 0 && ttl == 0)
795 			ttl = -errno;
796 		    break;
797 		}
798 		ttl += n;
799 	    }
800 	}
801     }
802     return(ttl);
803 }
804 
805