xref: /openbsd/usr.bin/less/ch.c (revision c74702f8)
1 /*
2  * Copyright (C) 1984-2012  Mark Nudelman
3  * Modified for use with illumos by Garrett D'Amore.
4  * Copyright 2014 Garrett D'Amore <garrett@damore.org>
5  *
6  * You may distribute under the terms of either the GNU General Public
7  * License or the Less License, as specified in the README file.
8  *
9  * For more information, see the README file.
10  */
11 
12 /*
13  * Low level character input from the input file.
14  * We use these special purpose routines which optimize moving
15  * both forward and backward from the current read pointer.
16  */
17 
18 #include <sys/stat.h>
19 
20 #include "less.h"
21 
22 extern dev_t curr_dev;
23 extern ino_t curr_ino;
24 extern int less_is_more;
25 
26 typedef off_t BLOCKNUM;
27 
28 int ignore_eoi;
29 
30 /*
31  * Pool of buffers holding the most recently used blocks of the input file.
32  * The buffer pool is kept as a doubly-linked circular list,
33  * in order from most- to least-recently used.
34  * The circular list is anchored by the file state "thisfile".
35  */
36 struct bufnode {
37 	struct bufnode *next, *prev;
38 	struct bufnode *hnext, *hprev;
39 };
40 
41 #define	LBUFSIZE	8192
42 struct buf {
43 	struct bufnode node;
44 	BLOCKNUM block;
45 	unsigned int datasize;
46 	unsigned char data[LBUFSIZE];
47 };
48 #define	bufnode_buf(bn)  ((struct buf *)bn)
49 
50 /*
51  * The file state is maintained in a filestate structure.
52  * A pointer to the filestate is kept in the ifile structure.
53  */
54 #define	BUFHASH_SIZE	64
55 struct filestate {
56 	struct bufnode buflist;
57 	struct bufnode hashtbl[BUFHASH_SIZE];
58 	int file;
59 	int flags;
60 	off_t fpos;
61 	int nbufs;
62 	BLOCKNUM block;
63 	unsigned int offset;
64 	off_t fsize;
65 };
66 
67 #define	ch_bufhead	thisfile->buflist.next
68 #define	ch_buftail	thisfile->buflist.prev
69 #define	ch_nbufs	thisfile->nbufs
70 #define	ch_block	thisfile->block
71 #define	ch_offset	thisfile->offset
72 #define	ch_fpos		thisfile->fpos
73 #define	ch_fsize	thisfile->fsize
74 #define	ch_flags	thisfile->flags
75 #define	ch_file		thisfile->file
76 
77 #define	END_OF_CHAIN	(&thisfile->buflist)
78 #define	END_OF_HCHAIN(h) (&thisfile->hashtbl[h])
79 #define	BUFHASH(blk)	((blk) & (BUFHASH_SIZE-1))
80 
81 /*
82  * Macros to manipulate the list of buffers in thisfile->buflist.
83  */
84 #define	FOR_BUFS(bn) \
85 	for ((bn) = ch_bufhead; (bn) != END_OF_CHAIN; (bn) = (bn)->next)
86 
87 #define	BUF_RM(bn) \
88 	(bn)->next->prev = (bn)->prev; \
89 	(bn)->prev->next = (bn)->next;
90 
91 #define	BUF_INS_HEAD(bn) \
92 	(bn)->next = ch_bufhead; \
93 	(bn)->prev = END_OF_CHAIN; \
94 	ch_bufhead->prev = (bn); \
95 	ch_bufhead = (bn);
96 
97 #define	BUF_INS_TAIL(bn) \
98 	(bn)->next = END_OF_CHAIN; \
99 	(bn)->prev = ch_buftail; \
100 	ch_buftail->next = (bn); \
101 	ch_buftail = (bn);
102 
103 /*
104  * Macros to manipulate the list of buffers in thisfile->hashtbl[n].
105  */
106 #define	FOR_BUFS_IN_CHAIN(h, bn) \
107 	for ((bn) = thisfile->hashtbl[h].hnext; \
108 	    (bn) != END_OF_HCHAIN(h); (bn) = (bn)->hnext)
109 
110 #define	BUF_HASH_RM(bn) \
111 	(bn)->hnext->hprev = (bn)->hprev; \
112 	(bn)->hprev->hnext = (bn)->hnext;
113 
114 #define	BUF_HASH_INS(bn, h) \
115 	(bn)->hnext = thisfile->hashtbl[h].hnext; \
116 	(bn)->hprev = END_OF_HCHAIN(h); \
117 	thisfile->hashtbl[h].hnext->hprev = (bn); \
118 	thisfile->hashtbl[h].hnext = (bn);
119 
120 static struct filestate *thisfile;
121 static int ch_ungotchar = -1;
122 static int maxbufs = -1;
123 
124 extern int autobuf;
125 extern int secure;
126 extern int screen_trashed;
127 extern int follow_mode;
128 extern IFILE curr_ifile;
129 extern int logfile;
130 extern char *namelogfile;
131 
132 static int ch_addbuf(void);
133 
134 
135 /*
136  * Get the character pointed to by the read pointer.
137  */
138 int
ch_get(void)139 ch_get(void)
140 {
141 	struct buf *bp;
142 	struct bufnode *bn;
143 	int n;
144 	int slept;
145 	int h;
146 	off_t pos;
147 	off_t len;
148 
149 	if (thisfile == NULL)
150 		return (EOI);
151 
152 	/*
153 	 * Quick check for the common case where
154 	 * the desired char is in the head buffer.
155 	 */
156 	if (ch_bufhead != END_OF_CHAIN) {
157 		bp = bufnode_buf(ch_bufhead);
158 		if (ch_block == bp->block && ch_offset < bp->datasize)
159 			return (bp->data[ch_offset]);
160 	}
161 
162 	slept = FALSE;
163 
164 	/*
165 	 * Look for a buffer holding the desired block.
166 	 */
167 	h = BUFHASH(ch_block);
168 	FOR_BUFS_IN_CHAIN(h, bn) {
169 		bp = bufnode_buf(bn);
170 		if (bp->block == ch_block) {
171 			if (ch_offset >= bp->datasize)
172 				/*
173 				 * Need more data in this buffer.
174 				 */
175 				break;
176 			goto found;
177 		}
178 	}
179 	if (bn == END_OF_HCHAIN(h)) {
180 		/*
181 		 * Block is not in a buffer.
182 		 * Take the least recently used buffer
183 		 * and read the desired block into it.
184 		 * If the LRU buffer has data in it,
185 		 * then maybe allocate a new buffer.
186 		 */
187 		if (ch_buftail == END_OF_CHAIN ||
188 		    bufnode_buf(ch_buftail)->block != -1) {
189 			/*
190 			 * There is no empty buffer to use.
191 			 * Allocate a new buffer if:
192 			 * 1. We can't seek on this file and -b is not in
193 			 *    effect; or
194 			 * 2. We haven't allocated the max buffers for this
195 			 *    file yet.
196 			 */
197 			if ((autobuf && !(ch_flags & CH_CANSEEK)) ||
198 			    (maxbufs < 0 || ch_nbufs < maxbufs))
199 				if (ch_addbuf())
200 					/*
201 					 * Allocation failed: turn off autobuf.
202 					 */
203 					autobuf = OPT_OFF;
204 		}
205 		bn = ch_buftail;
206 		bp = bufnode_buf(bn);
207 		BUF_HASH_RM(bn); /* Remove from old hash chain. */
208 		bp->block = ch_block;
209 		bp->datasize = 0;
210 		BUF_HASH_INS(bn, h); /* Insert into new hash chain. */
211 	}
212 
213 read_more:
214 	pos = (ch_block * LBUFSIZE) + bp->datasize;
215 	if ((len = ch_length()) != -1 && pos >= len)
216 		/*
217 		 * At end of file.
218 		 */
219 		return (EOI);
220 
221 	if (pos != ch_fpos) {
222 		/*
223 		 * Not at the correct position: must seek.
224 		 * If input is a pipe, we're in trouble (can't seek on a pipe).
225 		 * Some data has been lost: just return "?".
226 		 */
227 		if (!(ch_flags & CH_CANSEEK))
228 			return ('?');
229 		if (lseek(ch_file, (off_t)pos, SEEK_SET) == (off_t)-1) {
230 			error("seek error", NULL);
231 			clear_eol();
232 			return (EOI);
233 		}
234 		ch_fpos = pos;
235 	}
236 
237 	/*
238 	 * Read the block.
239 	 * If we read less than a full block, that's ok.
240 	 * We use partial block and pick up the rest next time.
241 	 */
242 	if (ch_ungotchar != -1) {
243 		bp->data[bp->datasize] = (unsigned char)ch_ungotchar;
244 		n = 1;
245 		ch_ungotchar = -1;
246 	} else {
247 		n = iread(ch_file, &bp->data[bp->datasize],
248 		    (unsigned int)(LBUFSIZE - bp->datasize));
249 	}
250 
251 	if (n == READ_INTR)
252 		return (EOI);
253 	if (n < 0) {
254 		error("read error", NULL);
255 		clear_eol();
256 		n = 0;
257 	}
258 
259 	/*
260 	 * If we have a log file, write the new data to it.
261 	 */
262 	if (!secure && logfile >= 0 && n > 0)
263 		(void) write(logfile, (char *)&bp->data[bp->datasize], n);
264 
265 	ch_fpos += n;
266 	bp->datasize += n;
267 
268 	/*
269 	 * If we have read to end of file, set ch_fsize to indicate
270 	 * the position of the end of file.
271 	 */
272 	if (n == 0) {
273 		ch_fsize = pos;
274 		if (ignore_eoi) {
275 			/*
276 			 * We are ignoring EOF.
277 			 * Wait a while, then try again.
278 			 */
279 			if (!slept) {
280 				PARG parg;
281 				parg.p_string = wait_message();
282 				ierror("%s", &parg);
283 			}
284 			sleep(1);
285 			slept = TRUE;
286 
287 			if (follow_mode == FOLLOW_NAME) {
288 				/*
289 				 * See whether the file's i-number has changed.
290 				 * If so, force the file to be closed and
291 				 * reopened.
292 				 */
293 				struct stat st;
294 				int r = stat(get_filename(curr_ifile), &st);
295 				if (r == 0 && (st.st_ino != curr_ino ||
296 				    st.st_dev != curr_dev)) {
297 					/*
298 					 * screen_trashed=2 causes
299 					 * make_display to reopen the file.
300 					 */
301 					screen_trashed = 2;
302 					return (EOI);
303 				}
304 			}
305 		}
306 		if (any_sigs())
307 			return (EOI);
308 	}
309 
310 found:
311 	if (ch_bufhead != bn) {
312 		/*
313 		 * Move the buffer to the head of the buffer chain.
314 		 * This orders the buffer chain, most- to least-recently used.
315 		 */
316 		BUF_RM(bn);
317 		BUF_INS_HEAD(bn);
318 
319 		/*
320 		 * Move to head of hash chain too.
321 		 */
322 		BUF_HASH_RM(bn);
323 		BUF_HASH_INS(bn, h);
324 	}
325 
326 	if (ch_offset >= bp->datasize)
327 		/*
328 		 * After all that, we still don't have enough data.
329 		 * Go back and try again.
330 		 */
331 		goto read_more;
332 
333 	return (bp->data[ch_offset]);
334 }
335 
336 /*
337  * ch_ungetchar is a rather kludgy and limited way to push
338  * a single char onto an input file descriptor.
339  */
340 void
ch_ungetchar(int c)341 ch_ungetchar(int c)
342 {
343 	if (c != -1 && ch_ungotchar != -1)
344 		error("ch_ungetchar overrun", NULL);
345 	ch_ungotchar = c;
346 }
347 
348 /*
349  * Close the logfile.
350  * If we haven't read all of standard input into it, do that now.
351  */
352 void
end_logfile(void)353 end_logfile(void)
354 {
355 	static int tried = FALSE;
356 
357 	if (logfile < 0)
358 		return;
359 	if (!tried && ch_fsize == -1) {
360 		tried = TRUE;
361 		ierror("Finishing logfile", NULL);
362 		while (ch_forw_get() != EOI)
363 			if (abort_sigs())
364 				break;
365 	}
366 	close(logfile);
367 	logfile = -1;
368 	free(namelogfile);
369 	namelogfile = NULL;
370 }
371 
372 /*
373  * Start a log file AFTER less has already been running.
374  * Invoked from the - command; see toggle_option().
375  * Write all the existing buffered data to the log file.
376  */
377 void
sync_logfile(void)378 sync_logfile(void)
379 {
380 	struct buf *bp;
381 	struct bufnode *bn;
382 	int warned = FALSE;
383 	BLOCKNUM block;
384 	BLOCKNUM nblocks;
385 
386 	nblocks = (ch_fpos + LBUFSIZE - 1) / LBUFSIZE;
387 	for (block = 0; block < nblocks; block++) {
388 		int wrote = FALSE;
389 		FOR_BUFS(bn) {
390 			bp = bufnode_buf(bn);
391 			if (bp->block == block) {
392 				(void) write(logfile, (char *)bp->data,
393 				    bp->datasize);
394 				wrote = TRUE;
395 				break;
396 			}
397 		}
398 		if (!wrote && !warned) {
399 			error("Warning: log file is incomplete", NULL);
400 			warned = TRUE;
401 		}
402 	}
403 }
404 
405 /*
406  * Determine if a specific block is currently in one of the buffers.
407  */
408 static int
buffered(BLOCKNUM block)409 buffered(BLOCKNUM block)
410 {
411 	struct buf *bp;
412 	struct bufnode *bn;
413 	int h;
414 
415 	h = BUFHASH(block);
416 	FOR_BUFS_IN_CHAIN(h, bn) {
417 		bp = bufnode_buf(bn);
418 		if (bp->block == block)
419 			return (TRUE);
420 	}
421 	return (FALSE);
422 }
423 
424 /*
425  * Seek to a specified position in the file.
426  * Return 0 if successful, non-zero if can't seek there.
427  */
428 int
ch_seek(off_t pos)429 ch_seek(off_t pos)
430 {
431 	BLOCKNUM new_block;
432 	off_t len;
433 
434 	if (thisfile == NULL)
435 		return (0);
436 
437 	len = ch_length();
438 	if (pos < ch_zero() || (len != -1 && pos > len))
439 		return (1);
440 
441 	new_block = pos / LBUFSIZE;
442 	if (!(ch_flags & CH_CANSEEK) && pos != ch_fpos &&
443 	    !buffered(new_block)) {
444 		if (ch_fpos > pos)
445 			return (1);
446 		while (ch_fpos < pos) {
447 			if (ch_forw_get() == EOI)
448 				return (1);
449 			if (abort_sigs())
450 				return (1);
451 		}
452 		return (0);
453 	}
454 	/*
455 	 * Set read pointer.
456 	 */
457 	ch_block = new_block;
458 	ch_offset = pos % LBUFSIZE;
459 	return (0);
460 }
461 
462 /*
463  * Seek to the end of the file.
464  */
465 int
ch_end_seek(void)466 ch_end_seek(void)
467 {
468 	off_t len;
469 
470 	if (thisfile == NULL)
471 		return (0);
472 
473 	if (ch_flags & CH_CANSEEK)
474 		ch_fsize = filesize(ch_file);
475 
476 	len = ch_length();
477 	if (len != -1)
478 		return (ch_seek(len));
479 
480 	/*
481 	 * Do it the slow way: read till end of data.
482 	 */
483 	while (ch_forw_get() != EOI)
484 		if (abort_sigs())
485 			return (1);
486 	return (0);
487 }
488 
489 /*
490  * Seek to the beginning of the file, or as close to it as we can get.
491  * We may not be able to seek there if input is a pipe and the
492  * beginning of the pipe is no longer buffered.
493  */
494 int
ch_beg_seek(void)495 ch_beg_seek(void)
496 {
497 	struct bufnode *bn;
498 	struct bufnode *firstbn;
499 
500 	/*
501 	 * Try a plain ch_seek first.
502 	 */
503 	if (ch_seek(ch_zero()) == 0)
504 		return (0);
505 
506 	/*
507 	 * Can't get to position 0.
508 	 * Look thru the buffers for the one closest to position 0.
509 	 */
510 	firstbn = ch_bufhead;
511 	if (firstbn == END_OF_CHAIN)
512 		return (1);
513 	FOR_BUFS(bn) {
514 		if (bufnode_buf(bn)->block < bufnode_buf(firstbn)->block)
515 			firstbn = bn;
516 	}
517 	ch_block = bufnode_buf(firstbn)->block;
518 	ch_offset = 0;
519 	return (0);
520 }
521 
522 /*
523  * Return the length of the file, if known.
524  */
525 off_t
ch_length(void)526 ch_length(void)
527 {
528 	if (thisfile == NULL)
529 		return (-1);
530 	if (ignore_eoi)
531 		return (-1);
532 	if (ch_flags & CH_NODATA)
533 		return (0);
534 	return (ch_fsize);
535 }
536 
537 /*
538  * Return the current position in the file.
539  */
540 off_t
ch_tell(void)541 ch_tell(void)
542 {
543 	if (thisfile == NULL)
544 		return (-1);
545 	return ((ch_block * LBUFSIZE) + ch_offset);
546 }
547 
548 /*
549  * Get the current char and post-increment the read pointer.
550  */
551 int
ch_forw_get(void)552 ch_forw_get(void)
553 {
554 	int c;
555 
556 	if (thisfile == NULL)
557 		return (EOI);
558 	c = ch_get();
559 	if (c == EOI)
560 		return (EOI);
561 	if (ch_offset < LBUFSIZE-1) {
562 		ch_offset++;
563 	} else {
564 		ch_block ++;
565 		ch_offset = 0;
566 	}
567 	return (c);
568 }
569 
570 /*
571  * Pre-decrement the read pointer and get the new current char.
572  */
573 int
ch_back_get(void)574 ch_back_get(void)
575 {
576 	if (thisfile == NULL)
577 		return (EOI);
578 	if (ch_offset > 0) {
579 		ch_offset --;
580 	} else {
581 		if (ch_block <= 0)
582 			return (EOI);
583 		if (!(ch_flags & CH_CANSEEK) && !buffered(ch_block-1))
584 			return (EOI);
585 		ch_block--;
586 		ch_offset = LBUFSIZE-1;
587 	}
588 	return (ch_get());
589 }
590 
591 /*
592  * Set max amount of buffer space.
593  * bufspace is in units of 1024 bytes.  -1 mean no limit.
594  */
595 void
ch_setbufspace(int bufspace)596 ch_setbufspace(int bufspace)
597 {
598 	if (bufspace < 0) {
599 		maxbufs = -1;
600 	} else {
601 		maxbufs = ((bufspace * 1024) + LBUFSIZE-1) / LBUFSIZE;
602 		if (maxbufs < 1)
603 			maxbufs = 1;
604 	}
605 }
606 
607 /*
608  * Flush (discard) any saved file state, including buffer contents.
609  */
610 void
ch_flush(void)611 ch_flush(void)
612 {
613 	struct bufnode *bn;
614 
615 	if (thisfile == NULL)
616 		return;
617 
618 	if (!(ch_flags & CH_CANSEEK)) {
619 		/*
620 		 * If input is a pipe, we don't flush buffer contents,
621 		 * since the contents can't be recovered.
622 		 */
623 		ch_fsize = -1;
624 		return;
625 	}
626 
627 	/*
628 	 * Initialize all the buffers.
629 	 */
630 	FOR_BUFS(bn) {
631 		bufnode_buf(bn)->block = -1;
632 	}
633 
634 	/*
635 	 * Figure out the size of the file, if we can.
636 	 */
637 	ch_fsize = filesize(ch_file);
638 
639 	/*
640 	 * Seek to a known position: the beginning of the file.
641 	 */
642 	ch_fpos = 0;
643 	ch_block = 0; /* ch_fpos / LBUFSIZE; */
644 	ch_offset = 0; /* ch_fpos % LBUFSIZE; */
645 
646 	if (lseek(ch_file, (off_t)0, SEEK_SET) == (off_t)-1) {
647 		/*
648 		 * Warning only; even if the seek fails for some reason,
649 		 * there's a good chance we're at the beginning anyway.
650 		 * {{ I think this is bogus reasoning. }}
651 		 */
652 		error("seek error to 0", NULL);
653 	}
654 }
655 
656 /*
657  * Allocate a new buffer.
658  * The buffer is added to the tail of the buffer chain.
659  */
660 static int
ch_addbuf(void)661 ch_addbuf(void)
662 {
663 	struct buf *bp;
664 	struct bufnode *bn;
665 
666 	/*
667 	 * Allocate and initialize a new buffer and link it
668 	 * onto the tail of the buffer list.
669 	 */
670 	bp = calloc(1, sizeof (struct buf));
671 	if (bp == NULL)
672 		return (1);
673 	ch_nbufs++;
674 	bp->block = -1;
675 	bn = &bp->node;
676 
677 	BUF_INS_TAIL(bn);
678 	BUF_HASH_INS(bn, 0);
679 	return (0);
680 }
681 
682 /*
683  *
684  */
685 static void
init_hashtbl(void)686 init_hashtbl(void)
687 {
688 	int h;
689 
690 	for (h = 0; h < BUFHASH_SIZE; h++) {
691 		thisfile->hashtbl[h].hnext = END_OF_HCHAIN(h);
692 		thisfile->hashtbl[h].hprev = END_OF_HCHAIN(h);
693 	}
694 }
695 
696 /*
697  * Delete all buffers for this file.
698  */
699 static void
ch_delbufs(void)700 ch_delbufs(void)
701 {
702 	struct bufnode *bn;
703 
704 	while (ch_bufhead != END_OF_CHAIN) {
705 		bn = ch_bufhead;
706 		BUF_RM(bn);
707 		free(bufnode_buf(bn));
708 	}
709 	ch_nbufs = 0;
710 	init_hashtbl();
711 }
712 
713 /*
714  * Is it possible to seek on a file descriptor?
715  */
716 int
seekable(int f)717 seekable(int f)
718 {
719 	return (lseek(f, (off_t)1, SEEK_SET) != (off_t)-1);
720 }
721 
722 /*
723  * Force EOF to be at the current read position.
724  * This is used after an ignore_eof read, during which the EOF may change.
725  */
726 void
ch_set_eof(void)727 ch_set_eof(void)
728 {
729 	ch_fsize = ch_fpos;
730 }
731 
732 
733 /*
734  * Initialize file state for a new file.
735  */
736 void
ch_init(int f,int flags)737 ch_init(int f, int flags)
738 {
739 	/*
740 	 * See if we already have a filestate for this file.
741 	 */
742 	thisfile = get_filestate(curr_ifile);
743 	if (thisfile == NULL) {
744 		/*
745 		 * Allocate and initialize a new filestate.
746 		 */
747 		thisfile = calloc(1, sizeof (struct filestate));
748 		thisfile->buflist.next = thisfile->buflist.prev = END_OF_CHAIN;
749 		thisfile->nbufs = 0;
750 		thisfile->flags = 0;
751 		thisfile->fpos = 0;
752 		thisfile->block = 0;
753 		thisfile->offset = 0;
754 		thisfile->file = -1;
755 		thisfile->fsize = -1;
756 		ch_flags = flags;
757 		init_hashtbl();
758 		/*
759 		 * Try to seek; set CH_CANSEEK if it works.
760 		 */
761 		if ((flags & CH_CANSEEK) && !seekable(f))
762 			ch_flags &= ~CH_CANSEEK;
763 		set_filestate(curr_ifile, (void *) thisfile);
764 	}
765 	if (thisfile->file == -1)
766 		thisfile->file = f;
767 	ch_flush();
768 }
769 
770 /*
771  * Close a filestate.
772  */
773 void
ch_close(void)774 ch_close(void)
775 {
776 	int keepstate = FALSE;
777 
778 	if (thisfile == NULL)
779 		return;
780 
781 	if (ch_flags & (CH_CANSEEK|CH_HELPFILE)) {
782 		/*
783 		 * We can seek or re-open, so we don't need to keep buffers.
784 		 */
785 		ch_delbufs();
786 	} else {
787 		keepstate = TRUE;
788 	}
789 	if (!(ch_flags & CH_KEEPOPEN)) {
790 		/*
791 		 * We don't need to keep the file descriptor open
792 		 * (because we can re-open it.)
793 		 */
794 		close(ch_file);
795 		ch_file = -1;
796 	} else {
797 		keepstate = TRUE;
798 	}
799 	if (!keepstate) {
800 		/*
801 		 * We don't even need to keep the filestate structure.
802 		 */
803 		free(thisfile);
804 		thisfile = NULL;
805 		set_filestate(curr_ifile, NULL);
806 	}
807 }
808 
809 /*
810  * Return ch_flags for the current file.
811  */
812 int
ch_getflags(void)813 ch_getflags(void)
814 {
815 	if (thisfile == NULL)
816 		return (0);
817 	return (ch_flags);
818 }
819