xref: /openbsd/usr.bin/less/line.c (revision a6445c1d)
1 /*
2  * Copyright (C) 1984-2012  Mark Nudelman
3  *
4  * You may distribute under the terms of either the GNU General Public
5  * License or the Less License, as specified in the README file.
6  *
7  * For more information, see the README file.
8  */
9 
10 
11 /*
12  * Routines to manipulate the "line buffer".
13  * The line buffer holds a line of output as it is being built
14  * in preparation for output to the screen.
15  */
16 
17 #include "less.h"
18 #include "charset.h"
19 
20 #include <err.h>
21 
22 static char *linebuf = NULL;	/* Buffer which holds the current output line */
23 static char *attr = NULL;	/* Extension of linebuf to hold attributes */
24 public int size_linebuf = 0;	/* Size of line buffer (and attr buffer) */
25 
26 static int cshift;		/* Current left-shift of output line buffer */
27 public int hshift;		/* Desired left-shift of output line buffer */
28 public int tabstops[TABSTOP_MAX] = { 0 }; /* Custom tabstops */
29 public int ntabstops = 1;	/* Number of tabstops */
30 public int tabdefault = 8;	/* Default repeated tabstops */
31 public POSITION highest_hilite;	/* Pos of last hilite in file found so far */
32 
33 static int curr;		/* Index into linebuf */
34 static int column;		/* Printable length, accounting for
35 				   backspaces, etc. */
36 static int overstrike;		/* Next char should overstrike previous char */
37 static int last_overstrike = AT_NORMAL;
38 static int is_null_line;	/* There is no current line */
39 static int lmargin;		/* Left margin */
40 static char pendc;
41 static POSITION pendpos;
42 static char *end_ansi_chars;
43 static char *mid_ansi_chars;
44 
45 static int attr_swidth();
46 static int attr_ewidth();
47 static int do_append();
48 
49 extern volatile sig_atomic_t sigs;
50 extern int bs_mode;
51 extern int linenums;
52 extern int ctldisp;
53 extern int twiddle;
54 extern int binattr;
55 extern int status_col;
56 extern int auto_wrap, ignaw;
57 extern int bo_s_width, bo_e_width;
58 extern int ul_s_width, ul_e_width;
59 extern int bl_s_width, bl_e_width;
60 extern int so_s_width, so_e_width;
61 extern int sc_width, sc_height;
62 extern int utf_mode;
63 extern POSITION start_attnpos;
64 extern POSITION end_attnpos;
65 
66 static char mbc_buf[MAX_UTF_CHAR_LEN];
67 static int mbc_buf_len = 0;
68 static int mbc_buf_index = 0;
69 static POSITION mbc_pos;
70 
71 /*
72  * Initialize from environment variables.
73  */
74 	public void
75 init_line()
76 {
77 	end_ansi_chars = lgetenv("LESSANSIENDCHARS");
78 	if (end_ansi_chars == NULL || *end_ansi_chars == '\0')
79 		end_ansi_chars = "m";
80 
81 	mid_ansi_chars = lgetenv("LESSANSIMIDCHARS");
82 	if (mid_ansi_chars == NULL || *mid_ansi_chars == '\0')
83 		mid_ansi_chars = "0123456789;[?!\"'#%()*+ ";
84 
85 	linebuf = (char *) ecalloc(LINEBUF_SIZE, sizeof(char));
86 	attr = (char *) ecalloc(LINEBUF_SIZE, sizeof(char));
87 	size_linebuf = LINEBUF_SIZE;
88 }
89 
90 /*
91  * Expand the line buffer.
92  */
93 	static int
94 expand_linebuf()
95 {
96 	/* Double the size of the line buffer. */
97 	int new_size = size_linebuf * 2;
98 
99 	/* Just realloc to expand the buffer, if we can. */
100 	char *new_buf;
101 	char *new_attr;
102 
103 	new_buf = realloc(linebuf, new_size);
104 	if (new_buf == NULL)
105 		return 1;
106 	new_attr = realloc(attr, new_size);
107 	if (new_attr == NULL) {
108 		/* realloc linebuf back to original size */
109 		linebuf = realloc(new_buf, size_linebuf);
110 		if (linebuf == NULL)
111 			err(1, NULL);
112 		return 1;
113 	}
114 	/*
115 	 * We realloc'd the buffers; they already have the old contents.
116 	 */
117 	memset(new_buf + size_linebuf, 0, new_size - size_linebuf);
118 	memset(new_attr + size_linebuf, 0, new_size - size_linebuf);
119 	linebuf = new_buf;
120 	attr = new_attr;
121 	size_linebuf = new_size;
122 	return 0;
123 }
124 
125 /*
126  * Is a character ASCII?
127  */
128 	public int
129 is_ascii_char(ch)
130 	LWCHAR ch;
131 {
132 	return (ch <= 0x7F);
133 }
134 
135 /*
136  * Rewind the line buffer.
137  */
138 	public void
139 prewind()
140 {
141 	curr = 0;
142 	column = 0;
143 	cshift = 0;
144 	overstrike = 0;
145 	last_overstrike = AT_NORMAL;
146 	mbc_buf_len = 0;
147 	is_null_line = 0;
148 	pendc = '\0';
149 	lmargin = 0;
150 	if (status_col)
151 		lmargin += 1;
152 }
153 
154 /*
155  * Insert the line number (of the given position) into the line buffer.
156  */
157 	public void
158 plinenum(pos)
159 	POSITION pos;
160 {
161 	register LINENUM linenum = 0;
162 	register int i;
163 
164 	if (linenums == OPT_ONPLUS)
165 	{
166 		/*
167 		 * Get the line number and put it in the current line.
168 		 * {{ Note: since find_linenum calls forw_raw_line,
169 		 *    it may seek in the input file, requiring the caller
170 		 *    of plinenum to re-seek if necessary. }}
171 		 * {{ Since forw_raw_line modifies linebuf, we must
172 		 *    do this first, before storing anything in linebuf. }}
173 		 */
174 		linenum = find_linenum(pos);
175 	}
176 
177 	/*
178 	 * Display a status column if the -J option is set.
179 	 */
180 	if (status_col)
181 	{
182 		linebuf[curr] = ' ';
183 		if (start_attnpos != NULL_POSITION &&
184 		    pos >= start_attnpos && pos < end_attnpos)
185 			attr[curr] = AT_NORMAL|AT_HILITE;
186 		else
187 			attr[curr] = AT_NORMAL;
188 		curr++;
189 		column++;
190 	}
191 	/*
192 	 * Display the line number at the start of each line
193 	 * if the -N option is set.
194 	 */
195 	if (linenums == OPT_ONPLUS)
196 	{
197 		char buf[INT_STRLEN_BOUND(pos) + 2];
198 		int n;
199 
200 		linenumtoa(linenum, buf, sizeof(buf));
201 		n = strlen(buf);
202 		if (n < MIN_LINENUM_WIDTH)
203 			n = MIN_LINENUM_WIDTH;
204 		snprintf(linebuf+curr, size_linebuf-curr, "%*s ", n, buf);
205 		n++;  /* One space after the line number. */
206 		for (i = 0; i < n; i++)
207 			attr[curr+i] = AT_NORMAL;
208 		curr += n;
209 		column += n;
210 		lmargin += n;
211 	}
212 
213 	/*
214 	 * Append enough spaces to bring us to the lmargin.
215 	 */
216 	while (column < lmargin)
217 	{
218 		linebuf[curr] = ' ';
219 		attr[curr++] = AT_NORMAL;
220 		column++;
221 	}
222 }
223 
224 /*
225  * Shift the input line left.
226  * This means discarding N printable chars at the start of the buffer.
227  */
228 	static void
229 pshift(shift)
230 	int shift;
231 {
232 	LWCHAR prev_ch = 0;
233 	unsigned char c;
234 	int shifted = 0;
235 	int to;
236 	int from;
237 	int len;
238 	int width;
239 	int prev_attr;
240 	int next_attr;
241 
242 	if (shift > column - lmargin)
243 		shift = column - lmargin;
244 	if (shift > curr - lmargin)
245 		shift = curr - lmargin;
246 
247 	to = from = lmargin;
248 	/*
249 	 * We keep on going when shifted == shift
250 	 * to get all combining chars.
251 	 */
252 	while (shifted <= shift && from < curr)
253 	{
254 		c = linebuf[from];
255 		if (ctldisp == OPT_ONPLUS && IS_CSI_START(c))
256 		{
257 			/* Keep cumulative effect.  */
258 			linebuf[to] = c;
259 			attr[to++] = attr[from++];
260 			while (from < curr && linebuf[from])
261 			{
262 				linebuf[to] = linebuf[from];
263 				attr[to++] = attr[from];
264 				if (!is_ansi_middle(linebuf[from++]))
265 					break;
266 			}
267 			continue;
268 		}
269 
270 		width = 0;
271 
272 #if !SMALL
273 		if (!IS_ASCII_OCTET(c) && utf_mode)
274 		{
275 			/* Assumes well-formedness validation already done.  */
276 			LWCHAR ch;
277 
278 			len = utf_len(c);
279 			if (from + len > curr)
280 				break;
281 			ch = get_wchar(linebuf + from);
282 			if (!is_composing_char(ch) && !is_combining_char(prev_ch, ch))
283 				width = is_wide_char(ch) ? 2 : 1;
284 			prev_ch = ch;
285 		} else
286 #endif /* !SMALL */
287 		{
288 			len = 1;
289 			if (c == '\b')
290 				/* XXX - Incorrect if several '\b' in a row.  */
291 #if !SMALL
292 				width = (utf_mode && is_wide_char(prev_ch)) ? -2 : -1;
293 #else
294 				width = -1;
295 #endif /* !SMALL */
296 			else if (!control_char(c))
297 				width = 1;
298 			prev_ch = 0;
299 		}
300 
301 		if (width == 2 && shift - shifted == 1) {
302 			/* Should never happen when called by pshift_all().  */
303 			attr[to] = attr[from];
304 			/*
305 			 * Assume a wide_char will never be the first half of a
306 			 * combining_char pair, so reset prev_ch in case we're
307 			 * followed by a '\b'.
308 			 */
309 			prev_ch = linebuf[to++] = ' ';
310 			from += len;
311 			shifted++;
312 			continue;
313 		}
314 
315 		/* Adjust width for magic cookies. */
316 		prev_attr = (to > 0) ? attr[to-1] : AT_NORMAL;
317 		next_attr = (from + len < curr) ? attr[from + len] : prev_attr;
318 		if (!is_at_equiv(attr[from], prev_attr) &&
319 			!is_at_equiv(attr[from], next_attr))
320 		{
321 			width += attr_swidth(attr[from]);
322 			if (from + len < curr)
323 				width += attr_ewidth(attr[from]);
324 			if (is_at_equiv(prev_attr, next_attr))
325 			{
326 				width += attr_ewidth(prev_attr);
327 				if (from + len < curr)
328 					width += attr_swidth(next_attr);
329 			}
330 		}
331 
332 		if (shift - shifted < width)
333 			break;
334 		from += len;
335 		shifted += width;
336 		if (shifted < 0)
337 			shifted = 0;
338 	}
339 	while (from < curr)
340 	{
341 		linebuf[to] = linebuf[from];
342 		attr[to++] = attr[from++];
343 	}
344 	curr = to;
345 	column -= shifted;
346 	cshift += shifted;
347 }
348 
349 /*
350  *
351  */
352 	public void
353 pshift_all()
354 {
355 	pshift(column);
356 }
357 
358 /*
359  * Return the printing width of the start (enter) sequence
360  * for a given character attribute.
361  */
362 	static int
363 attr_swidth(a)
364 	int a;
365 {
366 	int w = 0;
367 
368 	a = apply_at_specials(a);
369 
370 	if (a & AT_UNDERLINE)
371 		w += ul_s_width;
372 	if (a & AT_BOLD)
373 		w += bo_s_width;
374 	if (a & AT_BLINK)
375 		w += bl_s_width;
376 	if (a & AT_STANDOUT)
377 		w += so_s_width;
378 
379 	return w;
380 }
381 
382 /*
383  * Return the printing width of the end (exit) sequence
384  * for a given character attribute.
385  */
386 	static int
387 attr_ewidth(a)
388 	int a;
389 {
390 	int w = 0;
391 
392 	a = apply_at_specials(a);
393 
394 	if (a & AT_UNDERLINE)
395 		w += ul_e_width;
396 	if (a & AT_BOLD)
397 		w += bo_e_width;
398 	if (a & AT_BLINK)
399 		w += bl_e_width;
400 	if (a & AT_STANDOUT)
401 		w += so_e_width;
402 
403 	return w;
404 }
405 
406 /*
407  * Return the printing width of a given character and attribute,
408  * if the character were added to the current position in the line buffer.
409  * Adding a character with a given attribute may cause an enter or exit
410  * attribute sequence to be inserted, so this must be taken into account.
411  */
412 	static int
413 pwidth(ch, a, prev_ch)
414 	LWCHAR ch;
415 	int a;
416 	LWCHAR prev_ch;
417 {
418 	int w;
419 
420 	if (ch == '\b')
421 		/*
422 		 * Backspace moves backwards one or two positions.
423 		 * XXX - Incorrect if several '\b' in a row.
424 		 */
425 #if !SMALL
426 		return (utf_mode && is_wide_char(prev_ch)) ? -2 : -1;
427 #else
428 		return -1;
429 #endif /* !SMALL */
430 
431 	if (!utf_mode || is_ascii_char(ch))
432 	{
433 		if (control_char((char)ch))
434 		{
435 			/*
436 			 * Control characters do unpredictable things,
437 			 * so we don't even try to guess; say it doesn't move.
438 			 * This can only happen if the -r flag is in effect.
439 			 */
440 			return (0);
441 		}
442 	}
443 #if !SMALL
444 	else
445 	{
446 		if (is_composing_char(ch) || is_combining_char(prev_ch, ch))
447 		{
448 			/*
449 			 * Composing and combining chars take up no space.
450 			 *
451 			 * Some terminals, upon failure to compose a
452 			 * composing character with the character(s) that
453 			 * precede(s) it will actually take up one column
454 			 * for the composing character; there isn't much
455 			 * we could do short of testing the (complex)
456 			 * composition process ourselves and printing
457 			 * a binary representation when it fails.
458 			 */
459 			return (0);
460 		}
461 	}
462 #endif /* !SMALL */
463 
464 	/*
465 	 * Other characters take one or two columns,
466 	 * plus the width of any attribute enter/exit sequence.
467 	 */
468 	w = 1;
469 #if !SMALL
470 	if (is_wide_char(ch))
471 		w++;
472 #endif /* !SMALL */
473 	if (curr > 0 && !is_at_equiv(attr[curr-1], a))
474 		w += attr_ewidth(attr[curr-1]);
475 	if ((apply_at_specials(a) != AT_NORMAL) &&
476 	    (curr == 0 || !is_at_equiv(attr[curr-1], a)))
477 		w += attr_swidth(a);
478 	return (w);
479 }
480 
481 /*
482  * Delete to the previous base character in the line buffer.
483  * Return 1 if one is found.
484  */
485 	static int
486 backc()
487 {
488 	LWCHAR prev_ch;
489 	char *p = linebuf + curr;
490 	LWCHAR ch = step_char(&p, -1, linebuf + lmargin);
491 	int width;
492 
493 	/* This assumes that there is no '\b' in linebuf.  */
494 	while (   curr > lmargin
495 	       && column > lmargin
496 	       && (!(attr[curr - 1] & (AT_ANSI|AT_BINARY))))
497 	{
498 		curr = p - linebuf;
499 		prev_ch = step_char(&p, -1, linebuf + lmargin);
500 		width = pwidth(ch, attr[curr], prev_ch);
501 		column -= width;
502 		if (width > 0)
503 			return 1;
504 		ch = prev_ch;
505 	}
506 
507 	return 0;
508 }
509 
510 /*
511  * Are we currently within a recognized ANSI escape sequence?
512  */
513 	static int
514 in_ansi_esc_seq()
515 {
516 	char *p;
517 
518 	/*
519 	 * Search backwards for either an ESC (which means we ARE in a seq);
520 	 * or an end char (which means we're NOT in a seq).
521 	 */
522 	for (p = &linebuf[curr];  p > linebuf; )
523 	{
524 		LWCHAR ch = step_char(&p, -1, linebuf);
525 		if (IS_CSI_START(ch))
526 			return (1);
527 		if (!is_ansi_middle(ch))
528 			return (0);
529 	}
530 	return (0);
531 }
532 
533 /*
534  * Is a character the end of an ANSI escape sequence?
535  */
536 	public int
537 is_ansi_end(ch)
538 	LWCHAR ch;
539 {
540 	if (!is_ascii_char(ch))
541 		return (0);
542 	return (strchr(end_ansi_chars, (char) ch) != NULL);
543 }
544 
545 /*
546  *
547  */
548 	public int
549 is_ansi_middle(ch)
550 	LWCHAR ch;
551 {
552 	if (!is_ascii_char(ch))
553 		return (0);
554 	if (is_ansi_end(ch))
555 		return (0);
556 	return (strchr(mid_ansi_chars, (char) ch) != NULL);
557 }
558 
559 /*
560  * Append a character and attribute to the line buffer.
561  */
562 #define	STORE_CHAR(ch,a,rep,pos) \
563 	do { \
564 		if (store_char((ch),(a),(rep),(pos))) return (1); \
565 	} while (0)
566 
567 	static int
568 store_char(ch, a, rep, pos)
569 	LWCHAR ch;
570 	int a;
571 	char *rep;
572 	POSITION pos;
573 {
574 	int w;
575 	int replen;
576 	char cs;
577 
578 	w = (a & (AT_UNDERLINE|AT_BOLD));	/* Pre-use w.  */
579 	if (w != AT_NORMAL)
580 		last_overstrike = w;
581 
582 #if HILITE_SEARCH
583 	{
584 		int matches;
585 		if (is_hilited(pos, pos+1, 0, &matches))
586 		{
587 			/*
588 			 * This character should be highlighted.
589 			 * Override the attribute passed in.
590 			 */
591 			if (a != AT_ANSI)
592 			{
593 				if (highest_hilite != NULL_POSITION &&
594 				    pos > highest_hilite)
595 				    	highest_hilite = pos;
596 				a |= AT_HILITE;
597 			}
598 		}
599 	}
600 #endif
601 
602 	if (ctldisp == OPT_ONPLUS && in_ansi_esc_seq())
603 	{
604 		if (!is_ansi_end(ch) && !is_ansi_middle(ch)) {
605 			/* Remove whole unrecognized sequence.  */
606 			char *p = &linebuf[curr];
607 			LWCHAR bch;
608 			do {
609 				bch = step_char(&p, -1, linebuf);
610 			} while (p > linebuf && !IS_CSI_START(bch));
611 			curr = p - linebuf;
612 			return 0;
613 		}
614 		a = AT_ANSI;	/* Will force re-AT_'ing around it.  */
615 		w = 0;
616 	}
617 	else if (ctldisp == OPT_ONPLUS && IS_CSI_START(ch))
618 	{
619 		a = AT_ANSI;	/* Will force re-AT_'ing around it.  */
620 		w = 0;
621 	}
622 	else
623 	{
624 		char *p = &linebuf[curr];
625 		LWCHAR prev_ch = step_char(&p, -1, linebuf);
626 		w = pwidth(ch, a, prev_ch);
627 	}
628 
629 	if (ctldisp != OPT_ON && column + w + attr_ewidth(a) > sc_width)
630 		/*
631 		 * Won't fit on screen.
632 		 */
633 		return (1);
634 
635 	if (rep == NULL)
636 	{
637 		cs = (char) ch;
638 		rep = &cs;
639 		replen = 1;
640 	} else
641 	{
642 #if !SMALL
643 		replen = utf_len(rep[0]);
644 #else
645 		replen = 1;
646 #endif /* !SMALL */
647 	}
648 	if (curr + replen >= size_linebuf-6)
649 	{
650 		/*
651 		 * Won't fit in line buffer.
652 		 * Try to expand it.
653 		 */
654 		if (expand_linebuf())
655 			return (1);
656 	}
657 
658 	while (replen-- > 0)
659 	{
660 		linebuf[curr] = *rep++;
661 		attr[curr] = a;
662 		curr++;
663 	}
664 	column += w;
665 	return (0);
666 }
667 
668 /*
669  * Append a tab to the line buffer.
670  * Store spaces to represent the tab.
671  */
672 #define	STORE_TAB(a,pos) \
673 	do { if (store_tab((a),(pos))) return (1); } while (0)
674 
675 	static int
676 store_tab(attr, pos)
677 	int attr;
678 	POSITION pos;
679 {
680 	int to_tab = column + cshift - lmargin;
681 	int i;
682 
683 	if (ntabstops < 2 || to_tab >= tabstops[ntabstops-1])
684 		to_tab = tabdefault -
685 		     ((to_tab - tabstops[ntabstops-1]) % tabdefault);
686 	else
687 	{
688 		for (i = ntabstops - 2;  i >= 0;  i--)
689 			if (to_tab >= tabstops[i])
690 				break;
691 		to_tab = tabstops[i+1] - to_tab;
692 	}
693 
694 	if (column + to_tab - 1 + pwidth(' ', attr, 0) + attr_ewidth(attr) > sc_width)
695 		return 1;
696 
697 	do {
698 		STORE_CHAR(' ', attr, " ", pos);
699 	} while (--to_tab > 0);
700 	return 0;
701 }
702 
703 #define STORE_PRCHAR(c, pos) \
704 	do { if (store_prchar((c), (pos))) return 1; } while (0)
705 
706 	static int
707 store_prchar(c, pos)
708 	char c;
709 	POSITION pos;
710 {
711 	char *s;
712 
713 	/*
714 	 * Convert to printable representation.
715 	 */
716 	s = prchar(c);
717 
718 	/*
719 	 * Make sure we can get the entire representation
720 	 * of the character on this line.
721 	 */
722 	if (column + (int) strlen(s) - 1 +
723             pwidth(' ', binattr, 0) + attr_ewidth(binattr) > sc_width)
724 		return 1;
725 
726 	for ( ;  *s != 0;  s++)
727 		STORE_CHAR(*s, AT_BINARY, NULL, pos);
728 
729 	return 0;
730 }
731 
732 #if !SMALL
733 	static int
734 flush_mbc_buf(pos)
735 	POSITION pos;
736 {
737 	int i;
738 
739 	for (i = 0; i < mbc_buf_index; i++)
740 		if (store_prchar(mbc_buf[i], pos))
741 			return mbc_buf_index - i;
742 
743 	return 0;
744 }
745 #endif /* !SMALL */
746 
747 /*
748  * Append a character to the line buffer.
749  * Expand tabs into spaces, handle underlining, boldfacing, etc.
750  * Returns 0 if ok, 1 if couldn't fit in buffer.
751  */
752 	public int
753 pappend(c, pos)
754 	char c;
755 	POSITION pos;
756 {
757 	int r;
758 
759 	if (pendc)
760 	{
761 		if (do_append(pendc, NULL, pendpos))
762 			/*
763 			 * Oops.  We've probably lost the char which
764 			 * was in pendc, since caller won't back up.
765 			 */
766 			return (1);
767 		pendc = '\0';
768 	}
769 
770 	if (c == '\r' && bs_mode == BS_SPECIAL)
771 	{
772 #if !SMALL
773 		if (mbc_buf_len > 0)  /* utf_mode must be on. */
774 		{
775 			/* Flush incomplete (truncated) sequence. */
776 			r = flush_mbc_buf(mbc_pos);
777 			mbc_buf_index = r + 1;
778 			mbc_buf_len = 0;
779 			if (r)
780 				return (mbc_buf_index);
781 		}
782 #endif /* !SMALL */
783 
784 		/*
785 		 * Don't put the CR into the buffer until we see
786 		 * the next char.  If the next char is a newline,
787 		 * discard the CR.
788 		 */
789 		pendc = c;
790 		pendpos = pos;
791 		return (0);
792 	}
793 
794 	if (!utf_mode)
795 	{
796 		r = do_append((LWCHAR) c, NULL, pos);
797 	}
798 #if !SMALL
799 	else
800 	{
801 		/* Perform strict validation in all possible cases. */
802 		if (mbc_buf_len == 0)
803 		{
804 		retry:
805 			mbc_buf_index = 1;
806 			*mbc_buf = c;
807 			if (IS_ASCII_OCTET(c))
808 				r = do_append((LWCHAR) c, NULL, pos);
809 			else if (IS_UTF8_LEAD(c))
810 			{
811 				mbc_buf_len = utf_len(c);
812 				mbc_pos = pos;
813 				return (0);
814 			} else
815 				/* UTF8_INVALID or stray UTF8_TRAIL */
816 				r = flush_mbc_buf(pos);
817 		} else if (IS_UTF8_TRAIL(c))
818 		{
819 			mbc_buf[mbc_buf_index++] = c;
820 			if (mbc_buf_index < mbc_buf_len)
821 				return (0);
822 			if (is_utf8_well_formed(mbc_buf))
823 				r = do_append(get_wchar(mbc_buf), mbc_buf, mbc_pos);
824 			else
825 				/* Complete, but not shortest form, sequence. */
826 				mbc_buf_index = r = flush_mbc_buf(mbc_pos);
827 			mbc_buf_len = 0;
828 		} else
829 		{
830 			/* Flush incomplete (truncated) sequence.  */
831 			r = flush_mbc_buf(mbc_pos);
832 			mbc_buf_index = r + 1;
833 			mbc_buf_len = 0;
834 			/* Handle new char.  */
835 			if (!r)
836 				goto retry;
837  		}
838 	}
839 #endif /* !SMALL */
840 
841 	/*
842 	 * If we need to shift the line, do it.
843 	 * But wait until we get to at least the middle of the screen,
844 	 * so shifting it doesn't affect the chars we're currently
845 	 * pappending.  (Bold & underline can get messed up otherwise.)
846 	 */
847 	if (cshift < hshift && column > sc_width / 2)
848 	{
849 		linebuf[curr] = '\0';
850 		pshift(hshift - cshift);
851 	}
852 	if (r)
853 	{
854 		/* How many chars should caller back up? */
855 		r = (!utf_mode) ? 1 : mbc_buf_index;
856 	}
857 	return (r);
858 }
859 
860 	static int
861 do_append(ch, rep, pos)
862 	LWCHAR ch;
863 	char *rep;
864 	POSITION pos;
865 {
866 	register int a;
867 	LWCHAR prev_ch;
868 
869 	a = AT_NORMAL;
870 
871 	if (ch == '\b')
872 	{
873 		if (bs_mode == BS_CONTROL)
874 			goto do_control_char;
875 
876 		/*
877 		 * A better test is needed here so we don't
878 		 * backspace over part of the printed
879 		 * representation of a binary character.
880 		 */
881 		if (   curr <= lmargin
882 		    || column <= lmargin
883 		    || (attr[curr - 1] & (AT_ANSI|AT_BINARY)))
884 			STORE_PRCHAR('\b', pos);
885 		else if (bs_mode == BS_NORMAL)
886 			STORE_CHAR(ch, AT_NORMAL, NULL, pos);
887 		else if (bs_mode == BS_SPECIAL)
888 			overstrike = backc();
889 
890 		return 0;
891 	}
892 
893 	if (overstrike > 0)
894 	{
895 		/*
896 		 * Overstrike the character at the current position
897 		 * in the line buffer.  This will cause either
898 		 * underline (if a "_" is overstruck),
899 		 * bold (if an identical character is overstruck),
900 		 * or just deletion of the character in the buffer.
901 		 */
902 		overstrike = utf_mode ? -1 : 0;
903 		/* To be correct, this must be a base character.  */
904 #if !SMALL
905 		prev_ch = get_wchar(linebuf + curr);
906 #else
907 		prev_ch = (LWCHAR)((char)(linebuf + curr)[0] & 0xFF);
908 #endif /* !SMALL */
909 		a = attr[curr];
910 		if (ch == prev_ch)
911 		{
912 			/*
913 			 * Overstriking a char with itself means make it bold.
914 			 * But overstriking an underscore with itself is
915 			 * ambiguous.  It could mean make it bold, or
916 			 * it could mean make it underlined.
917 			 * Use the previous overstrike to resolve it.
918 			 */
919 			if (ch == '_')
920 			{
921 				if ((a & (AT_BOLD|AT_UNDERLINE)) != AT_NORMAL)
922 					a |= (AT_BOLD|AT_UNDERLINE);
923 				else if (last_overstrike != AT_NORMAL)
924 					a |= last_overstrike;
925 				else
926 					a |= AT_BOLD;
927 			} else
928 				a |= AT_BOLD;
929 		} else if (ch == '_')
930 		{
931 			a |= AT_UNDERLINE;
932 			ch = prev_ch;
933 			rep = linebuf + curr;
934 		} else if (prev_ch == '_')
935 		{
936 			a |= AT_UNDERLINE;
937 		}
938 		/* Else we replace prev_ch, but we keep its attributes.  */
939 	} else if (overstrike < 0)
940 	{
941 #if !SMALL
942 		if (   is_composing_char(ch)
943 		    || is_combining_char(get_wchar(linebuf + curr), ch))
944 			/* Continuation of the same overstrike.  */
945 			a = last_overstrike;
946 		else
947 #endif /* !SMALL */
948 			overstrike = 0;
949 	}
950 
951 	if (ch == '\t')
952 	{
953 		/*
954 		 * Expand a tab into spaces.
955 		 */
956 		switch (bs_mode)
957 		{
958 		case BS_CONTROL:
959 			goto do_control_char;
960 		case BS_NORMAL:
961 		case BS_SPECIAL:
962 			STORE_TAB(a, pos);
963 			break;
964 		}
965 	} else if ((!utf_mode || is_ascii_char(ch)) && control_char((char)ch))
966 	{
967 	do_control_char:
968 		if (ctldisp == OPT_ON || (ctldisp == OPT_ONPLUS && IS_CSI_START(ch)))
969 		{
970 			/*
971 			 * Output as a normal character.
972 			 */
973 			STORE_CHAR(ch, AT_NORMAL, rep, pos);
974 		} else
975 		{
976 			STORE_PRCHAR((char) ch, pos);
977 		}
978 	}
979 #if !SMALL
980 	else if (utf_mode && ctldisp != OPT_ON && is_ubin_char(ch))
981 	{
982 		char *s;
983 
984 		s = prutfchar(ch);
985 
986 		if (column + (int) strlen(s) - 1 +
987 		    pwidth(' ', binattr, 0) + attr_ewidth(binattr) > sc_width)
988 			return (1);
989 
990 		for ( ;  *s != 0;  s++)
991 			STORE_CHAR(*s, AT_BINARY, NULL, pos);
992  	}
993 #endif /* !SMALL */
994 	else
995 	{
996 		STORE_CHAR(ch, a, rep, pos);
997 	}
998  	return (0);
999 }
1000 
1001 /*
1002  *
1003  */
1004 	public int
1005 pflushmbc()
1006 {
1007 	int r = 0;
1008 
1009 #if !SMALL
1010 	if (mbc_buf_len > 0)
1011 	{
1012 		/* Flush incomplete (truncated) sequence.  */
1013 		r = flush_mbc_buf(mbc_pos);
1014 		mbc_buf_len = 0;
1015 	}
1016 #endif /* !SMALL */
1017 	return r;
1018 }
1019 
1020 /*
1021  * Terminate the line in the line buffer.
1022  */
1023 	public void
1024 pdone(endline, forw)
1025 	int endline;
1026 	int forw;
1027 {
1028 	(void) pflushmbc();
1029 
1030 	if (pendc && (pendc != '\r' || !endline))
1031 		/*
1032 		 * If we had a pending character, put it in the buffer.
1033 		 * But discard a pending CR if we are at end of line
1034 		 * (that is, discard the CR in a CR/LF sequence).
1035 		 */
1036 		(void) do_append(pendc, NULL, pendpos);
1037 
1038 	/*
1039 	 * Make sure we've shifted the line, if we need to.
1040 	 */
1041 	if (cshift < hshift)
1042 		pshift(hshift - cshift);
1043 
1044 	if (ctldisp == OPT_ONPLUS && is_ansi_end('m'))
1045 	{
1046 		/* Switch to normal attribute at end of line. */
1047 		char *p = "\033[m";
1048 		for ( ;  *p != '\0';  p++)
1049 		{
1050 			linebuf[curr] = *p;
1051 			attr[curr++] = AT_ANSI;
1052 		}
1053 	}
1054 
1055 	/*
1056 	 * Add a newline if necessary,
1057 	 * and append a '\0' to the end of the line.
1058 	 * We output a newline if we're not at the right edge of the screen,
1059 	 * or if the terminal doesn't auto wrap,
1060 	 * or if this is really the end of the line AND the terminal ignores
1061 	 * a newline at the right edge.
1062 	 * (In the last case we don't want to output a newline if the terminal
1063 	 * doesn't ignore it since that would produce an extra blank line.
1064 	 * But we do want to output a newline if the terminal ignores it in case
1065 	 * the next line is blank.  In that case the single newline output for
1066 	 * that blank line would be ignored!)
1067 	 */
1068 	if (column < sc_width || !auto_wrap || (endline && ignaw) || ctldisp == OPT_ON)
1069 	{
1070 		linebuf[curr] = '\n';
1071 		attr[curr] = AT_NORMAL;
1072 		curr++;
1073 	}
1074 	else if (ignaw && column >= sc_width && forw)
1075 	{
1076 		/*
1077 		 * Terminals with "ignaw" don't wrap until they *really* need
1078 		 * to, i.e. when the character *after* the last one to fit on a
1079 		 * line is output. But they are too hard to deal with when they
1080 		 * get in the state where a full screen width of characters
1081 		 * have been output but the cursor is sitting on the right edge
1082 		 * instead of at the start of the next line.
1083 		 * So we nudge them into wrapping by outputting a space
1084 		 * character plus a backspace.  But do this only if moving
1085 		 * forward; if we're moving backward and drawing this line at
1086 		 * the top of the screen, the space would overwrite the first
1087 		 * char on the next line.  We don't need to do this "nudge"
1088 		 * at the top of the screen anyway.
1089 		 */
1090 		linebuf[curr] = ' ';
1091 		attr[curr++] = AT_NORMAL;
1092 		linebuf[curr] = '\b';
1093 		attr[curr++] = AT_NORMAL;
1094 	}
1095 	linebuf[curr] = '\0';
1096 	attr[curr] = AT_NORMAL;
1097 }
1098 
1099 /*
1100  *
1101  */
1102 	public void
1103 set_status_col(c)
1104 	char c;
1105 {
1106 	linebuf[0] = c;
1107 	attr[0] = AT_NORMAL|AT_HILITE;
1108 }
1109 
1110 /*
1111  * Get a character from the current line.
1112  * Return the character as the function return value,
1113  * and the character attribute in *ap.
1114  */
1115 	public int
1116 gline(i, ap)
1117 	register int i;
1118 	register int *ap;
1119 {
1120 	if (is_null_line)
1121 	{
1122 		/*
1123 		 * If there is no current line, we pretend the line is
1124 		 * either "~" or "", depending on the "twiddle" flag.
1125 		 */
1126 		if (twiddle)
1127 		{
1128 			if (i == 0)
1129 			{
1130 				*ap = AT_BOLD;
1131 				return '~';
1132 			}
1133 			--i;
1134 		}
1135 		/* Make sure we're back to AT_NORMAL before the '\n'.  */
1136 		*ap = AT_NORMAL;
1137 		return i ? '\0' : '\n';
1138 	}
1139 
1140 	*ap = attr[i];
1141 	return (linebuf[i] & 0xFF);
1142 }
1143 
1144 /*
1145  * Indicate that there is no current line.
1146  */
1147 	public void
1148 null_line()
1149 {
1150 	is_null_line = 1;
1151 	cshift = 0;
1152 }
1153 
1154 /*
1155  * Analogous to forw_line(), but deals with "raw lines":
1156  * lines which are not split for screen width.
1157  * {{ This is supposed to be more efficient than forw_line(). }}
1158  */
1159 	public POSITION
1160 forw_raw_line(curr_pos, linep, line_lenp)
1161 	POSITION curr_pos;
1162 	char **linep;
1163 	int *line_lenp;
1164 {
1165 	register int n;
1166 	register int c;
1167 	POSITION new_pos;
1168 
1169 	if (curr_pos == NULL_POSITION || ch_seek(curr_pos) ||
1170 		(c = ch_forw_get()) == EOI)
1171 		return (NULL_POSITION);
1172 
1173 	n = 0;
1174 	for (;;)
1175 	{
1176 		if (c == '\n' || c == EOI || ABORT_SIGS())
1177 		{
1178 			new_pos = ch_tell();
1179 			break;
1180 		}
1181 		if (n >= size_linebuf-1)
1182 		{
1183 			if (expand_linebuf())
1184 			{
1185 				/*
1186 				 * Overflowed the input buffer.
1187 				 * Pretend the line ended here.
1188 				 */
1189 				new_pos = ch_tell() - 1;
1190 				break;
1191 			}
1192 		}
1193 		linebuf[n++] = c;
1194 		c = ch_forw_get();
1195 	}
1196 	linebuf[n] = '\0';
1197 	if (linep != NULL)
1198 		*linep = linebuf;
1199 	if (line_lenp != NULL)
1200 		*line_lenp = n;
1201 	return (new_pos);
1202 }
1203 
1204 /*
1205  * Analogous to back_line(), but deals with "raw lines".
1206  * {{ This is supposed to be more efficient than back_line(). }}
1207  */
1208 	public POSITION
1209 back_raw_line(curr_pos, linep, line_lenp)
1210 	POSITION curr_pos;
1211 	char **linep;
1212 	int *line_lenp;
1213 {
1214 	register int n;
1215 	register int c;
1216 	POSITION new_pos;
1217 
1218 	if (curr_pos == NULL_POSITION || curr_pos <= ch_zero() ||
1219 		ch_seek(curr_pos-1))
1220 		return (NULL_POSITION);
1221 
1222 	n = size_linebuf;
1223 	linebuf[--n] = '\0';
1224 	for (;;)
1225 	{
1226 		c = ch_back_get();
1227 		if (c == '\n' || ABORT_SIGS())
1228 		{
1229 			/*
1230 			 * This is the newline ending the previous line.
1231 			 * We have hit the beginning of the line.
1232 			 */
1233 			new_pos = ch_tell() + 1;
1234 			break;
1235 		}
1236 		if (c == EOI)
1237 		{
1238 			/*
1239 			 * We have hit the beginning of the file.
1240 			 * This must be the first line in the file.
1241 			 * This must, of course, be the beginning of the line.
1242 			 */
1243 			new_pos = ch_zero();
1244 			break;
1245 		}
1246 		if (n <= 0)
1247 		{
1248 			int old_size_linebuf = size_linebuf;
1249 			if (expand_linebuf())
1250 			{
1251 				/*
1252 				 * Overflowed the input buffer.
1253 				 * Pretend the line ended here.
1254 				 */
1255 				new_pos = ch_tell() + 1;
1256 				break;
1257 			}
1258 			/*
1259 			 * Shift the data to the end of the new linebuf.
1260 			 */
1261 			n = size_linebuf - old_size_linebuf;
1262 			memmove(linebuf + n, linebuf, old_size_linebuf);
1263 		}
1264 		linebuf[--n] = c;
1265 	}
1266 	if (linep != NULL)
1267 		*linep = &linebuf[n];
1268 	if (line_lenp != NULL)
1269 		*line_lenp = size_linebuf - 1 - n;
1270 	return (new_pos);
1271 }
1272