xref: /openbsd/usr.bin/less/line.c (revision b2f6cc19)
1 /*
2  * Copyright (C) 1984-2012  Mark Nudelman
3  * Modified for use with illumos by Garrett D'Amore.
4  * Copyright 2014 Garrett D'Amore <garrett@damore.org>
5  *
6  * You may distribute under the terms of either the GNU General Public
7  * License or the Less License, as specified in the README file.
8  *
9  * For more information, see the README file.
10  */
11 
12 /*
13  * Routines to manipulate the "line buffer".
14  * The line buffer holds a line of output as it is being built
15  * in preparation for output to the screen.
16  */
17 
18 #include <wchar.h>
19 #include <wctype.h>
20 
21 #include "charset.h"
22 #include "less.h"
23 
24 static char *linebuf = NULL;	/* Buffer which holds the current output line */
25 static char *attr = NULL;	/* Extension of linebuf to hold attributes */
26 int size_linebuf = 0;		/* Size of line buffer (and attr buffer) */
27 
28 static int cshift;		/* Current left-shift of output line buffer */
29 int hshift;			/* Desired left-shift of output line buffer */
30 int tabstops[TABSTOP_MAX] = { 0 }; /* Custom tabstops */
31 int ntabstops = 1;		/* Number of tabstops */
32 int tabdefault = 8;		/* Default repeated tabstops */
33 off_t highest_hilite;		/* Pos of last hilite in file found so far */
34 
35 static int curr;		/* Total number of bytes in linebuf */
36 static int column;		/* Display columns needed to show linebuf */
37 static int overstrike;		/* Next char should overstrike previous char */
38 static int is_null_line;	/* There is no current line */
39 static int lmargin;		/* Index in linebuf of start of content */
40 static char pendc;
41 static off_t pendpos;
42 static char *end_ansi_chars;
43 static char *mid_ansi_chars;
44 
45 static int attr_swidth(int);
46 static int attr_ewidth(int);
47 static int do_append(LWCHAR, char *, off_t);
48 
49 extern int bs_mode;
50 extern int linenums;
51 extern int ctldisp;
52 extern int twiddle;
53 extern int binattr;
54 extern int status_col;
55 extern int auto_wrap, ignaw;
56 extern int bo_s_width, bo_e_width;
57 extern int ul_s_width, ul_e_width;
58 extern int bl_s_width, bl_e_width;
59 extern int so_s_width, so_e_width;
60 extern int sc_width, sc_height;
61 extern int utf_mode;
62 extern off_t start_attnpos;
63 extern off_t end_attnpos;
64 
65 static char mbc_buf[MAX_UTF_CHAR_LEN];
66 static int mbc_buf_index = 0;
67 static off_t mbc_pos;
68 
69 /*
70  * Initialize from environment variables.
71  */
72 void
init_line(void)73 init_line(void)
74 {
75 	end_ansi_chars = lgetenv("LESSANSIENDCHARS");
76 	if (end_ansi_chars == NULL || *end_ansi_chars == '\0')
77 		end_ansi_chars = "m";
78 
79 	mid_ansi_chars = lgetenv("LESSANSIMIDCHARS");
80 	if (mid_ansi_chars == NULL || *mid_ansi_chars == '\0')
81 		mid_ansi_chars = "0123456789;[?!\"'#%()*+ ";
82 
83 	linebuf = ecalloc(LINEBUF_SIZE, sizeof (char));
84 	attr = ecalloc(LINEBUF_SIZE, sizeof (char));
85 	size_linebuf = LINEBUF_SIZE;
86 }
87 
88 /*
89  * Expand the line buffer.
90  */
91 static int
expand_linebuf(void)92 expand_linebuf(void)
93 {
94 	/* Double the size of the line buffer. */
95 	int new_size = size_linebuf * 2;
96 
97 	/* Just realloc to expand the buffer, if we can. */
98 	char *new_buf = recallocarray(linebuf, size_linebuf, new_size, 1);
99 	if (new_buf != NULL) {
100 		char *new_attr = recallocarray(attr, size_linebuf, new_size, 1);
101 		linebuf = new_buf;
102 		if (new_attr != NULL) {
103 			attr = new_attr;
104 			size_linebuf = new_size;
105 			return (0);
106 		}
107 	}
108 	return (1);
109 }
110 
111 /*
112  * Is a character ASCII?
113  */
114 static int
is_ascii_char(LWCHAR ch)115 is_ascii_char(LWCHAR ch)
116 {
117 	return (ch <= 0x7F);
118 }
119 
120 /*
121  * Rewind the line buffer.
122  */
123 void
prewind(void)124 prewind(void)
125 {
126 	curr = 0;
127 	column = 0;
128 	cshift = 0;
129 	overstrike = 0;
130 	is_null_line = 0;
131 	pendc = '\0';
132 	lmargin = 0;
133 	if (status_col)
134 		lmargin += 1;
135 }
136 
137 /*
138  * Insert the line number (of the given position) into the line buffer.
139  */
140 void
plinenum(off_t pos)141 plinenum(off_t pos)
142 {
143 	off_t linenum = 0;
144 	int i;
145 
146 	if (linenums == OPT_ONPLUS) {
147 		/*
148 		 * Get the line number and put it in the current line.
149 		 * {{ Note: since find_linenum calls forw_raw_line,
150 		 *    it may seek in the input file, requiring the caller
151 		 *    of plinenum to re-seek if necessary. }}
152 		 * {{ Since forw_raw_line modifies linebuf, we must
153 		 *    do this first, before storing anything in linebuf. }}
154 		 */
155 		linenum = find_linenum(pos);
156 	}
157 
158 	/*
159 	 * Display a status column if the -J option is set.
160 	 */
161 	if (status_col) {
162 		linebuf[curr] = ' ';
163 		if (start_attnpos != -1 &&
164 		    pos >= start_attnpos && pos < end_attnpos)
165 			attr[curr] = AT_NORMAL|AT_HILITE;
166 		else
167 			attr[curr] = AT_NORMAL;
168 		curr++;
169 		column++;
170 	}
171 	/*
172 	 * Display the line number at the start of each line
173 	 * if the -N option is set.
174 	 */
175 	if (linenums == OPT_ONPLUS) {
176 		char buf[23];
177 		int n;
178 
179 		postoa(linenum, buf, sizeof(buf));
180 		n = strlen(buf);
181 		if (n < MIN_LINENUM_WIDTH)
182 			n = MIN_LINENUM_WIDTH;
183 		snprintf(linebuf+curr, size_linebuf-curr, "%*s ", n, buf);
184 		n++;	/* One space after the line number. */
185 		for (i = 0; i < n; i++)
186 			attr[curr+i] = AT_NORMAL;
187 		curr += n;
188 		column += n;
189 		lmargin += n;
190 	}
191 
192 	/*
193 	 * Append enough spaces to bring us to the lmargin.
194 	 */
195 	while (column < lmargin) {
196 		linebuf[curr] = ' ';
197 		attr[curr++] = AT_NORMAL;
198 		column++;
199 	}
200 }
201 
202 /*
203  * Shift the input line left.
204  * Starting at lmargin, some bytes are discarded from the linebuf,
205  * until the number of display columns needed to show these bytes
206  * would exceed the argument.
207  */
208 static void
pshift(int shift)209 pshift(int shift)
210 {
211 	int shifted = 0;  /* Number of display columns already discarded. */
212 	int from;         /* Index in linebuf of the current character. */
213 	int to;           /* Index in linebuf to move this character to. */
214 	int len;          /* Number of bytes in this character. */
215 	int width = 0;    /* Display columns needed for this character. */
216 	int prev_attr;    /* Attributes of the preceding character. */
217 	int next_attr;    /* Attributes of the following character. */
218 	unsigned char c;  /* First byte of current character. */
219 
220 	if (shift > column - lmargin)
221 		shift = column - lmargin;
222 	if (shift > curr - lmargin)
223 		shift = curr - lmargin;
224 
225 	to = from = lmargin;
226 	/*
227 	 * We keep on going when shifted == shift
228 	 * to get all combining chars.
229 	 */
230 	while (shifted <= shift && from < curr) {
231 		c = linebuf[from];
232 		if (ctldisp == OPT_ONPLUS && c == ESC) {
233 			/* Keep cumulative effect.  */
234 			linebuf[to] = c;
235 			attr[to++] = attr[from++];
236 			while (from < curr && linebuf[from]) {
237 				linebuf[to] = linebuf[from];
238 				attr[to++] = attr[from];
239 				if (!is_ansi_middle(linebuf[from++]))
240 					break;
241 			}
242 			continue;
243 		}
244 		if (utf_mode && !isascii(c)) {
245 			wchar_t ch;
246 			/*
247 			 * Before this point, UTF-8 validity was already
248 			 * checked, but for additional safety, treat
249 			 * invalid bytes as single-width characters
250 			 * if they ever make it here.  Similarly, treat
251 			 * non-printable characters as width 1.
252 			 */
253 			len = mbtowc(&ch, linebuf + from, curr - from);
254 			if (len == -1)
255 				len = width = 1;
256 			else if ((width = wcwidth(ch)) == -1)
257 				width = 1;
258 		} else {
259 			len = 1;
260 			if (c == '\b')
261 				/* XXX - Incorrect if several '\b' in a row.  */
262 				width = width > 0 ? -width : -1;
263 			else
264 				width = iscntrl(c) ? 0 : 1;
265 		}
266 
267 		if (width == 2 && shift - shifted == 1) {
268 			/*
269 			 * Move the first half of a double-width character
270 			 * off screen.  Print a space instead of the second
271 			 * half.  This should never happen when called
272 			 * by pshift_all().
273 			 */
274 			attr[to] = attr[from];
275 			linebuf[to++] = ' ';
276 			from += len;
277 			shifted++;
278 			break;
279 		}
280 
281 		/* Adjust width for magic cookies. */
282 		prev_attr = (to > 0) ? attr[to-1] : AT_NORMAL;
283 		next_attr = (from + len < curr) ? attr[from + len] : prev_attr;
284 		if (!is_at_equiv(attr[from], prev_attr) &&
285 		    !is_at_equiv(attr[from], next_attr)) {
286 			width += attr_swidth(attr[from]);
287 			if (from + len < curr)
288 				width += attr_ewidth(attr[from]);
289 			if (is_at_equiv(prev_attr, next_attr)) {
290 				width += attr_ewidth(prev_attr);
291 				if (from + len < curr)
292 					width += attr_swidth(next_attr);
293 			}
294 		}
295 
296 		if (shift - shifted < width)
297 			break;
298 		from += len;
299 		shifted += width;
300 		if (shifted < 0)
301 			shifted = 0;
302 	}
303 	while (from < curr) {
304 		linebuf[to] = linebuf[from];
305 		attr[to++] = attr[from++];
306 	}
307 	curr = to;
308 	column -= shifted;
309 	cshift += shifted;
310 }
311 
312 /*
313  *
314  */
315 void
pshift_all(void)316 pshift_all(void)
317 {
318 	pshift(column);
319 }
320 
321 /*
322  * Return the printing width of the start (enter) sequence
323  * for a given character attribute.
324  */
325 static int
attr_swidth(int a)326 attr_swidth(int a)
327 {
328 	int w = 0;
329 
330 	a = apply_at_specials(a);
331 
332 	if (a & AT_UNDERLINE)
333 		w += ul_s_width;
334 	if (a & AT_BOLD)
335 		w += bo_s_width;
336 	if (a & AT_BLINK)
337 		w += bl_s_width;
338 	if (a & AT_STANDOUT)
339 		w += so_s_width;
340 
341 	return (w);
342 }
343 
344 /*
345  * Return the printing width of the end (exit) sequence
346  * for a given character attribute.
347  */
348 static int
attr_ewidth(int a)349 attr_ewidth(int a)
350 {
351 	int w = 0;
352 
353 	a = apply_at_specials(a);
354 
355 	if (a & AT_UNDERLINE)
356 		w += ul_e_width;
357 	if (a & AT_BOLD)
358 		w += bo_e_width;
359 	if (a & AT_BLINK)
360 		w += bl_e_width;
361 	if (a & AT_STANDOUT)
362 		w += so_e_width;
363 
364 	return (w);
365 }
366 
367 /*
368  * Return the printing width of a given character and attribute,
369  * if the character were added to the current position in the line buffer.
370  * Adding a character with a given attribute may cause an enter or exit
371  * attribute sequence to be inserted, so this must be taken into account.
372  */
373 static int
pwidth(wchar_t ch,int a,wchar_t prev_ch)374 pwidth(wchar_t ch, int a, wchar_t prev_ch)
375 {
376 	int w;
377 
378 	/*
379 	 * In case of a backspace, back up by the width of the previous
380 	 * character.  If that is non-printable (for example another
381 	 * backspace) or zero width (for example a combining accent),
382 	 * the terminal may actually back up to a character even further
383 	 * back, but we no longer know how wide that may have been.
384 	 * The best guess possible at this point is that it was
385 	 * hopefully width one.
386 	 */
387 	if (ch == L'\b') {
388 		w = wcwidth(prev_ch);
389 		if (w <= 0)
390 			w = 1;
391 		return (-w);
392 	}
393 
394 	w = wcwidth(ch);
395 
396 	/*
397 	 * Non-printable characters can get here if the -r flag is in
398 	 * effect, and possibly in some other situations (XXX check that!).
399 	 * Treat them as zero width.
400 	 * That may not always match their actual behaviour,
401 	 * but there is no reasonable way to be more exact.
402 	 */
403 	if (w == -1)
404 		w = 0;
405 
406 	/*
407 	 * Combining accents take up no space.
408 	 * Some terminals, upon failure to compose them with the
409 	 * characters that precede them, will actually take up one column
410 	 * for the combining accent; there isn't much we could do short
411 	 * of testing the (complex) composition process ourselves and
412 	 * printing a binary representation when it fails.
413 	 */
414 	if (w == 0)
415 		return (0);
416 
417 	/*
418 	 * Other characters take one or two columns,
419 	 * plus the width of any attribute enter/exit sequence.
420 	 */
421 	if (curr > 0 && !is_at_equiv(attr[curr-1], a))
422 		w += attr_ewidth(attr[curr-1]);
423 	if ((apply_at_specials(a) != AT_NORMAL) &&
424 	    (curr == 0 || !is_at_equiv(attr[curr-1], a)))
425 		w += attr_swidth(a);
426 	return (w);
427 }
428 
429 /*
430  * Delete to the previous base character in the line buffer.
431  * Return 1 if one is found.
432  */
433 static int
backc(void)434 backc(void)
435 {
436 	wchar_t	 ch, prev_ch;
437 	int	 len, width;
438 
439 	if ((len = mbtowc_left(&ch, linebuf + curr, curr)) <= 0)
440 		return (0);
441 	curr -= len;
442 
443 	/* This assumes that there is no '\b' in linebuf.  */
444 	while (curr >= lmargin && column > lmargin &&
445 	    !(attr[curr] & (AT_ANSI|AT_BINARY))) {
446 		if ((len = mbtowc_left(&prev_ch, linebuf + curr, curr)) <= 0)
447 			prev_ch = L'\0';
448 		width = pwidth(ch, attr[curr], prev_ch);
449 		column -= width;
450 		if (width > 0)
451 			return (1);
452 		curr -= len;
453 		if (prev_ch == L'\0')
454 			return (0);
455 		ch = prev_ch;
456 	}
457 	return (0);
458 }
459 
460 /*
461  * Is a character the end of an ANSI escape sequence?
462  */
463 static int
is_ansi_end(LWCHAR ch)464 is_ansi_end(LWCHAR ch)
465 {
466 	if (!is_ascii_char(ch))
467 		return (0);
468 	return (strchr(end_ansi_chars, (char)ch) != NULL);
469 }
470 
471 /*
472  *
473  */
474 int
is_ansi_middle(LWCHAR ch)475 is_ansi_middle(LWCHAR ch)
476 {
477 	if (!is_ascii_char(ch))
478 		return (0);
479 	if (is_ansi_end(ch))
480 		return (0);
481 	return (strchr(mid_ansi_chars, (char)ch) != NULL);
482 }
483 
484 /*
485  * Append a character and attribute to the line buffer.
486  */
487 static int
store_char(LWCHAR ch,char a,char * rep,off_t pos)488 store_char(LWCHAR ch, char a, char *rep, off_t pos)
489 {
490 	int i;
491 	int w;
492 	int replen;
493 	char cs;
494 	int matches;
495 
496 	if (is_hilited(pos, pos+1, 0, &matches)) {
497 		/*
498 		 * This character should be highlighted.
499 		 * Override the attribute passed in.
500 		 */
501 		if (a != AT_ANSI) {
502 			if (highest_hilite != -1 && pos > highest_hilite)
503 				highest_hilite = pos;
504 			a |= AT_HILITE;
505 		}
506 	}
507 
508 	w = -1;
509 	if (ctldisp == OPT_ONPLUS) {
510 		/*
511 		 * Set i to the beginning of an ANSI escape sequence
512 		 * that was begun and not yet ended, or to -1 otherwise.
513 		 */
514 		for (i = curr - 1; i >= 0; i--) {
515 			if (linebuf[i] == ESC)
516 				break;
517 			if (!is_ansi_middle(linebuf[i]))
518 				i = 0;
519 		}
520 		if (i >= 0 && !is_ansi_end(ch) && !is_ansi_middle(ch)) {
521 			/* Remove whole unrecognized sequence.  */
522 			curr = i;
523 			return (0);
524 		}
525 		if (i >= 0 || ch == ESC) {
526 			a = AT_ANSI;  /* Will force re-AT_'ing around it. */
527 			w = 0;
528 		}
529 	}
530 	if (w == -1) {
531 		wchar_t prev_ch;
532 		if (mbtowc_left(&prev_ch, linebuf + curr, curr) <= 0)
533 			prev_ch = L' ';
534 		w = pwidth(ch, a, prev_ch);
535 	}
536 
537 	if (ctldisp != OPT_ON && column + w + attr_ewidth(a) > sc_width)
538 		/*
539 		 * Won't fit on screen.
540 		 */
541 		return (1);
542 
543 	if (rep == NULL) {
544 		cs = (char)ch;
545 		rep = &cs;
546 		replen = 1;
547 	} else {
548 		replen = utf_len(rep[0]);
549 	}
550 	if (curr + replen >= size_linebuf-6) {
551 		/*
552 		 * Won't fit in line buffer.
553 		 * Try to expand it.
554 		 */
555 		if (expand_linebuf())
556 			return (1);
557 	}
558 
559 	while (replen-- > 0) {
560 		linebuf[curr] = *rep++;
561 		attr[curr] = a;
562 		curr++;
563 	}
564 	column += w;
565 	return (0);
566 }
567 
568 /*
569  * Append a tab to the line buffer.
570  * Store spaces to represent the tab.
571  */
572 static int
store_tab(int attr,off_t pos)573 store_tab(int attr, off_t pos)
574 {
575 	int to_tab = column + cshift - lmargin;
576 	int i;
577 
578 	if (ntabstops < 2 || to_tab >= tabstops[ntabstops-1])
579 		to_tab = tabdefault -
580 		    ((to_tab - tabstops[ntabstops-1]) % tabdefault);
581 	else {
582 		for (i = ntabstops - 2; i >= 0; i--)
583 			if (to_tab >= tabstops[i])
584 				break;
585 		to_tab = tabstops[i+1] - to_tab;
586 	}
587 
588 	if (column + to_tab - 1 + pwidth(' ', attr, 0) +
589 	    attr_ewidth(attr) > sc_width)
590 		return (1);
591 
592 	do {
593 		if (store_char(' ', attr, " ", pos))
594 			return (1);
595 	} while (--to_tab > 0);
596 	return (0);
597 }
598 
599 static int
store_prchar(char c,off_t pos)600 store_prchar(char c, off_t pos)
601 {
602 	char *s;
603 
604 	/*
605 	 * Convert to printable representation.
606 	 */
607 	s = prchar(c);
608 
609 	/*
610 	 * Make sure we can get the entire representation
611 	 * of the character on this line.
612 	 */
613 	if (column + (int)strlen(s) - 1 +
614 	    pwidth(' ', binattr, 0) + attr_ewidth(binattr) > sc_width)
615 		return (1);
616 
617 	for (; *s != 0; s++) {
618 		if (store_char(*s, AT_BINARY, NULL, pos))
619 			return (1);
620 	}
621 	return (0);
622 }
623 
624 static int
flush_mbc_buf(off_t pos)625 flush_mbc_buf(off_t pos)
626 {
627 	int i;
628 
629 	for (i = 0; i < mbc_buf_index; i++) {
630 		if (store_prchar(mbc_buf[i], pos))
631 			return (mbc_buf_index - i);
632 	}
633 	return (0);
634 }
635 
636 /*
637  * Append a character to the line buffer.
638  * Expand tabs into spaces, handle underlining, boldfacing, etc.
639  * Returns 0 if ok, 1 if couldn't fit in buffer.
640  */
641 int
pappend(char c,off_t pos)642 pappend(char c, off_t pos)
643 {
644 	mbstate_t mbs;
645 	size_t sz;
646 	wchar_t ch;
647 	int r;
648 
649 	if (pendc) {
650 		if (do_append(pendc, NULL, pendpos))
651 			/*
652 			 * Oops.  We've probably lost the char which
653 			 * was in pendc, since caller won't back up.
654 			 */
655 			return (1);
656 		pendc = '\0';
657 	}
658 
659 	if (c == '\r' && bs_mode == BS_SPECIAL) {
660 		if (mbc_buf_index > 0)  /* utf_mode must be on. */ {
661 			/* Flush incomplete (truncated) sequence. */
662 			r = flush_mbc_buf(mbc_pos);
663 			mbc_buf_index = 0;
664 			if (r)
665 				return (r + 1);
666 		}
667 
668 		/*
669 		 * Don't put the CR into the buffer until we see
670 		 * the next char.  If the next char is a newline,
671 		 * discard the CR.
672 		 */
673 		pendc = c;
674 		pendpos = pos;
675 		return (0);
676 	}
677 
678 	if (!utf_mode) {
679 		r = do_append((LWCHAR) c, NULL, pos);
680 	} else {
681 		for (;;) {
682 			if (mbc_buf_index == 0)
683 				mbc_pos = pos;
684 			mbc_buf[mbc_buf_index++] = c;
685 			memset(&mbs, 0, sizeof(mbs));
686 			sz = mbrtowc(&ch, mbc_buf, mbc_buf_index, &mbs);
687 
688 			/* Incomplete UTF-8: wait for more bytes. */
689 			if (sz == (size_t)-2)
690 				return (0);
691 
692 			/* Valid UTF-8: use the character. */
693 			if (sz != (size_t)-1) {
694 				r = do_append(ch, mbc_buf, mbc_pos) ?
695 				    mbc_buf_index : 0;
696 				break;
697 			}
698 
699 			/* Invalid start byte: encode it. */
700 			if (mbc_buf_index == 1) {
701 				r = store_prchar(c, pos);
702 				break;
703 			}
704 
705 			/*
706 			 * Invalid continuation.
707 			 * Encode the preceding bytes.
708 			 * If they fit, handle the interrupting byte.
709 			 * Otherwise, tell the caller to back up
710 			 * by the  number of bytes that do not fit,
711 			 * plus one for the new byte.
712 			 */
713 			mbc_buf_index--;
714 			if ((r = flush_mbc_buf(mbc_pos) + 1) == 1)
715 				mbc_buf_index = 0;
716 			else
717 				break;
718 		}
719 	}
720 
721 	/*
722 	 * If we need to shift the line, do it.
723 	 * But wait until we get to at least the middle of the screen,
724 	 * so shifting it doesn't affect the chars we're currently
725 	 * pappending.  (Bold & underline can get messed up otherwise.)
726 	 */
727 	if (cshift < hshift && column > sc_width / 2) {
728 		linebuf[curr] = '\0';
729 		pshift(hshift - cshift);
730 	}
731 	mbc_buf_index = 0;
732 	return (r);
733 }
734 
735 static int
do_append(LWCHAR ch,char * rep,off_t pos)736 do_append(LWCHAR ch, char *rep, off_t pos)
737 {
738 	wchar_t prev_ch;
739 	int a;
740 
741 	a = AT_NORMAL;
742 
743 	if (ch == '\b') {
744 		if (bs_mode == BS_CONTROL)
745 			goto do_control_char;
746 
747 		/*
748 		 * A better test is needed here so we don't
749 		 * backspace over part of the printed
750 		 * representation of a binary character.
751 		 */
752 		if (curr <= lmargin ||
753 		    column <= lmargin ||
754 		    (attr[curr - 1] & (AT_ANSI|AT_BINARY))) {
755 			if (store_prchar('\b', pos))
756 				return (1);
757 		} else if (bs_mode == BS_NORMAL) {
758 			if (store_char(ch, AT_NORMAL, NULL, pos))
759 				return (1);
760 		} else if (bs_mode == BS_SPECIAL) {
761 			overstrike = backc();
762 		}
763 
764 		return (0);
765 	}
766 
767 	if (overstrike > 0) {
768 		/*
769 		 * Overstrike the character at the current position
770 		 * in the line buffer.  This will cause either
771 		 * underline (if a "_" is overstruck),
772 		 * bold (if an identical character is overstruck),
773 		 * or just deletion of the character in the buffer.
774 		 */
775 		overstrike = utf_mode ? -1 : 0;
776 		/* To be correct, this must be a base character.  */
777 		if (mbtowc(&prev_ch, linebuf + curr, MB_CUR_MAX) == -1) {
778 			(void)mbtowc(NULL, NULL, MB_CUR_MAX);
779 			prev_ch = L'\0';
780 		}
781 		a = attr[curr];
782 		if (ch == prev_ch) {
783 			/*
784 			 * Overstriking a char with itself means make it bold.
785 			 * But overstriking an underscore with itself is
786 			 * ambiguous.  It could mean make it bold, or
787 			 * it could mean make it underlined.
788 			 * Use the previous overstrike to resolve it.
789 			 */
790 			if (ch == '_') {
791 				if ((a & (AT_BOLD|AT_UNDERLINE)) != AT_NORMAL)
792 					a |= (AT_BOLD|AT_UNDERLINE);
793 				else if (curr > 0 && attr[curr - 1] & AT_UNDERLINE)
794 					a |= AT_UNDERLINE;
795 				else if (curr > 0 && attr[curr - 1] & AT_BOLD)
796 					a |= AT_BOLD;
797 				else
798 					a |= AT_INDET;
799 			} else {
800 				a |= AT_BOLD;
801 			}
802 		} else if (ch == '_' && prev_ch != L'\0') {
803 			a |= AT_UNDERLINE;
804 			ch = prev_ch;
805 			rep = linebuf + curr;
806 		} else if (prev_ch == '_') {
807 			a |= AT_UNDERLINE;
808 		}
809 		/* Else we replace prev_ch, but we keep its attributes.  */
810 	} else if (overstrike < 0) {
811 		if (wcwidth(ch) == 0) {
812 			/* Continuation of the same overstrike.  */
813 			if (curr > 0)
814 				a = attr[curr - 1] & (AT_UNDERLINE | AT_BOLD);
815 			else
816 				a = AT_NORMAL;
817 		} else
818 			overstrike = 0;
819 	}
820 
821 	if (ch == '\t') {
822 		/*
823 		 * Expand a tab into spaces.
824 		 */
825 		switch (bs_mode) {
826 		case BS_CONTROL:
827 			goto do_control_char;
828 		case BS_NORMAL:
829 		case BS_SPECIAL:
830 			if (store_tab(a, pos))
831 				return (1);
832 			break;
833 		}
834 	} else if ((!utf_mode || is_ascii_char(ch)) &&
835 	    !isprint((unsigned char)ch)) {
836 do_control_char:
837 		if (ctldisp == OPT_ON ||
838 		    (ctldisp == OPT_ONPLUS && ch == ESC)) {
839 			/*
840 			 * Output as a normal character.
841 			 */
842 			if (store_char(ch, AT_NORMAL, rep, pos))
843 				return (1);
844 		} else {
845 			if (store_prchar(ch, pos))
846 				return (1);
847 		}
848 	} else if (utf_mode && ctldisp != OPT_ON && !iswprint(ch)) {
849 		char *s;
850 
851 		s = prutfchar(ch);
852 
853 		if (column + (int)strlen(s) - 1 +
854 		    pwidth(' ', binattr, 0) + attr_ewidth(binattr) > sc_width)
855 			return (1);
856 
857 		for (; *s != 0; s++) {
858 			if (store_char(*s, AT_BINARY, NULL, pos))
859 				return (1);
860 		}
861 	} else {
862 		if (store_char(ch, a, rep, pos))
863 			return (1);
864 	}
865 	return (0);
866 }
867 
868 /*
869  *
870  */
871 int
pflushmbc(void)872 pflushmbc(void)
873 {
874 	int r = 0;
875 
876 	if (mbc_buf_index > 0) {
877 		/* Flush incomplete (truncated) sequence.  */
878 		r = flush_mbc_buf(mbc_pos);
879 		mbc_buf_index = 0;
880 	}
881 	return (r);
882 }
883 
884 /*
885  * Terminate the line in the line buffer.
886  */
887 void
pdone(int endline,int forw)888 pdone(int endline, int forw)
889 {
890 	int i;
891 
892 	(void) pflushmbc();
893 
894 	if (pendc && (pendc != '\r' || !endline))
895 		/*
896 		 * If we had a pending character, put it in the buffer.
897 		 * But discard a pending CR if we are at end of line
898 		 * (that is, discard the CR in a CR/LF sequence).
899 		 */
900 		(void) do_append(pendc, NULL, pendpos);
901 
902 	for (i = curr - 1; i >= 0; i--) {
903 		if (attr[i] & AT_INDET) {
904 			attr[i] &= ~AT_INDET;
905 			if (i < curr - 1 && attr[i + 1] & AT_BOLD)
906 				attr[i] |= AT_BOLD;
907 			else
908 				attr[i] |= AT_UNDERLINE;
909 		}
910 	}
911 
912 	/*
913 	 * Make sure we've shifted the line, if we need to.
914 	 */
915 	if (cshift < hshift)
916 		pshift(hshift - cshift);
917 
918 	if (ctldisp == OPT_ONPLUS && is_ansi_end('m')) {
919 		/* Switch to normal attribute at end of line. */
920 		char *p = "\033[m";
921 		for (; *p != '\0'; p++) {
922 			linebuf[curr] = *p;
923 			attr[curr++] = AT_ANSI;
924 		}
925 	}
926 
927 	/*
928 	 * Add a newline if necessary,
929 	 * and append a '\0' to the end of the line.
930 	 * We output a newline if we're not at the right edge of the screen,
931 	 * or if the terminal doesn't auto wrap,
932 	 * or if this is really the end of the line AND the terminal ignores
933 	 * a newline at the right edge.
934 	 * (In the last case we don't want to output a newline if the terminal
935 	 * doesn't ignore it since that would produce an extra blank line.
936 	 * But we do want to output a newline if the terminal ignores it in case
937 	 * the next line is blank.  In that case the single newline output for
938 	 * that blank line would be ignored!)
939 	 */
940 	if (column < sc_width || !auto_wrap || (endline && ignaw) ||
941 	    ctldisp == OPT_ON) {
942 		linebuf[curr] = '\n';
943 		attr[curr] = AT_NORMAL;
944 		curr++;
945 	} else if (ignaw && column >= sc_width && forw) {
946 		/*
947 		 * Terminals with "ignaw" don't wrap until they *really* need
948 		 * to, i.e. when the character *after* the last one to fit on a
949 		 * line is output. But they are too hard to deal with when they
950 		 * get in the state where a full screen width of characters
951 		 * have been output but the cursor is sitting on the right edge
952 		 * instead of at the start of the next line.
953 		 * So we nudge them into wrapping by outputting a space
954 		 * character plus a backspace.  But do this only if moving
955 		 * forward; if we're moving backward and drawing this line at
956 		 * the top of the screen, the space would overwrite the first
957 		 * char on the next line.  We don't need to do this "nudge"
958 		 * at the top of the screen anyway.
959 		 */
960 		linebuf[curr] = ' ';
961 		attr[curr++] = AT_NORMAL;
962 		linebuf[curr] = '\b';
963 		attr[curr++] = AT_NORMAL;
964 	}
965 	linebuf[curr] = '\0';
966 	attr[curr] = AT_NORMAL;
967 }
968 
969 /*
970  *
971  */
972 void
set_status_col(char c)973 set_status_col(char c)
974 {
975 	linebuf[0] = c;
976 	attr[0] = AT_NORMAL|AT_HILITE;
977 }
978 
979 /*
980  * Get a character from the current line.
981  * Return the character as the function return value,
982  * and the character attribute in *ap.
983  */
984 int
gline(int i,int * ap)985 gline(int i, int *ap)
986 {
987 	if (is_null_line) {
988 		/*
989 		 * If there is no current line, we pretend the line is
990 		 * either "~" or "", depending on the "twiddle" flag.
991 		 */
992 		if (twiddle) {
993 			if (i == 0) {
994 				*ap = AT_BOLD;
995 				return ('~');
996 			}
997 			--i;
998 		}
999 		/* Make sure we're back to AT_NORMAL before the '\n'.  */
1000 		*ap = AT_NORMAL;
1001 		return (i ? '\0' : '\n');
1002 	}
1003 
1004 	*ap = attr[i];
1005 	return (linebuf[i] & 0xFF);
1006 }
1007 
1008 /*
1009  * Indicate that there is no current line.
1010  */
1011 void
null_line(void)1012 null_line(void)
1013 {
1014 	is_null_line = 1;
1015 	cshift = 0;
1016 }
1017 
1018 /*
1019  * Analogous to forw_line(), but deals with "raw lines":
1020  * lines which are not split for screen width.
1021  * {{ This is supposed to be more efficient than forw_line(). }}
1022  */
1023 off_t
forw_raw_line(off_t curr_pos,char ** linep,int * line_lenp)1024 forw_raw_line(off_t curr_pos, char **linep, int *line_lenp)
1025 {
1026 	int n;
1027 	int c;
1028 	off_t new_pos;
1029 
1030 	if (curr_pos == -1 || ch_seek(curr_pos) ||
1031 	    (c = ch_forw_get()) == EOI)
1032 		return (-1);
1033 
1034 	n = 0;
1035 	for (;;) {
1036 		if (c == '\n' || c == EOI || abort_sigs()) {
1037 			new_pos = ch_tell();
1038 			break;
1039 		}
1040 		if (n >= size_linebuf-1) {
1041 			if (expand_linebuf()) {
1042 				/*
1043 				 * Overflowed the input buffer.
1044 				 * Pretend the line ended here.
1045 				 */
1046 				new_pos = ch_tell() - 1;
1047 				break;
1048 			}
1049 		}
1050 		linebuf[n++] = (char)c;
1051 		c = ch_forw_get();
1052 	}
1053 	linebuf[n] = '\0';
1054 	if (linep != NULL)
1055 		*linep = linebuf;
1056 	if (line_lenp != NULL)
1057 		*line_lenp = n;
1058 	return (new_pos);
1059 }
1060 
1061 /*
1062  * Analogous to back_line(), but deals with "raw lines".
1063  * {{ This is supposed to be more efficient than back_line(). }}
1064  */
1065 off_t
back_raw_line(off_t curr_pos,char ** linep,int * line_lenp)1066 back_raw_line(off_t curr_pos, char **linep, int *line_lenp)
1067 {
1068 	int n;
1069 	int c;
1070 	off_t new_pos;
1071 
1072 	if (curr_pos == -1 || curr_pos <= ch_zero() || ch_seek(curr_pos - 1))
1073 		return (-1);
1074 
1075 	n = size_linebuf;
1076 	linebuf[--n] = '\0';
1077 	for (;;) {
1078 		c = ch_back_get();
1079 		if (c == '\n' || abort_sigs()) {
1080 			/*
1081 			 * This is the newline ending the previous line.
1082 			 * We have hit the beginning of the line.
1083 			 */
1084 			new_pos = ch_tell() + 1;
1085 			break;
1086 		}
1087 		if (c == EOI) {
1088 			/*
1089 			 * We have hit the beginning of the file.
1090 			 * This must be the first line in the file.
1091 			 * This must, of course, be the beginning of the line.
1092 			 */
1093 			new_pos = ch_zero();
1094 			break;
1095 		}
1096 		if (n <= 0) {
1097 			int old_size_linebuf = size_linebuf;
1098 			if (expand_linebuf()) {
1099 				/*
1100 				 * Overflowed the input buffer.
1101 				 * Pretend the line ended here.
1102 				 */
1103 				new_pos = ch_tell() + 1;
1104 				break;
1105 			}
1106 			/*
1107 			 * Shift the data to the end of the new linebuf.
1108 			 */
1109 			n = size_linebuf - old_size_linebuf;
1110 			memmove(linebuf + n, linebuf, old_size_linebuf);
1111 		}
1112 		linebuf[--n] = c;
1113 	}
1114 	if (linep != NULL)
1115 		*linep = &linebuf[n];
1116 	if (line_lenp != NULL)
1117 		*line_lenp = size_linebuf - 1 - n;
1118 	return (new_pos);
1119 }
1120