xref: /netbsd/distrib/utils/more/line.c (revision 6550d01e)
1 /*	$NetBSD: line.c,v 1.5 2003/10/13 14:34:25 agc Exp $	*/
2 
3 /*
4  * Copyright (c) 1988 Mark Nudelman
5  * Copyright (c) 1988, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 #include <sys/cdefs.h>
34 #ifndef lint
35 #if 0
36 static char sccsid[] = "@(#)line.c	8.1 (Berkeley) 6/6/93";
37 #else
38 __RCSID("$NetBSD: line.c,v 1.5 2003/10/13 14:34:25 agc Exp $");
39 #endif
40 #endif /* not lint */
41 
42 /*
43  * Routines to manipulate the "line buffer".
44  * The line buffer holds a line of output as it is being built
45  * in preparation for output to the screen.
46  * We keep track of the PRINTABLE length of the line as it is being built.
47  */
48 
49 #include <sys/types.h>
50 #include <ctype.h>
51 
52 #include "less.h"
53 #include "extern.h"
54 
55 static char linebuf[1024];	/* Buffer which holds the current output line */
56 static char *curr;		/* Pointer into linebuf */
57 static int column;		/* Printable length, accounting for
58 				   backspaces, etc. */
59 /*
60  * A ridiculously complex state machine takes care of backspaces.  The
61  * complexity arises from the attempt to deal with all cases, especially
62  * involving long lines with underlining, boldfacing or whatever.  There
63  * are still some cases which will break it.
64  *
65  * There are four states:
66  *	LN_NORMAL is the normal state (not in underline mode).
67  *	LN_UNDERLINE means we are in underline mode.  We expect to get
68  *		either a sequence like "_\bX" or "X\b_" to continue
69  *		underline mode, or anything else to end underline mode.
70  *	LN_BOLDFACE means we are in boldface mode.  We expect to get sequences
71  *		like "X\bX\b...X\bX" to continue boldface mode, or anything
72  *		else to end boldface mode.
73  *	LN_UL_X means we are one character after LN_UNDERLINE
74  *		(we have gotten the '_' in "_\bX" or the 'X' in "X\b_").
75  *	LN_UL_XB means we are one character after LN_UL_X
76  *		(we have gotten the backspace in "_\bX" or "X\b_";
77  *		we expect one more ordinary character,
78  *		which will put us back in state LN_UNDERLINE).
79  *	LN_BO_X means we are one character after LN_BOLDFACE
80  *		(we have gotten the 'X' in "X\bX").
81  *	LN_BO_XB means we are one character after LN_BO_X
82  *		(we have gotten the backspace in "X\bX";
83  *		we expect one more 'X' which will put us back
84  *		in LN_BOLDFACE).
85  */
86 static int ln_state;		/* Currently in normal/underline/bold/etc mode? */
87 #define	LN_NORMAL	0	/* Not in underline, boldface or whatever mode */
88 #define	LN_UNDERLINE	1	/* In underline, need next char */
89 #define	LN_UL_X		2	/* In underline, got char, need \b */
90 #define	LN_UL_XB	3	/* In underline, got char & \b, need one more */
91 #define	LN_BOLDFACE	4	/* In boldface, need next char */
92 #define	LN_BO_X		5	/* In boldface, got char, need \b */
93 #define	LN_BO_XB	6	/* In boldface, got char & \b, need same char */
94 
95 char *line;			/* Pointer to the current line.
96 				   Usually points to linebuf. */
97 /*
98  * Rewind the line buffer.
99  */
100 void
101 prewind()
102 {
103 	line = curr = linebuf;
104 	ln_state = LN_NORMAL;
105 	column = 0;
106 }
107 
108 /*
109  * Append a character to the line buffer.
110  * Expand tabs into spaces, handle underlining, boldfacing, etc.
111  * Returns 0 if ok, 1 if couldn't fit in buffer.
112  */
113 #define	NEW_COLUMN(addon) \
114 	if (column + addon + (ln_state ? ue_width : 0) > sc_width) \
115 		return(1); \
116 	else \
117 		column += addon
118 
119 int
120 pappend(c)
121 	int c;
122 {
123 	if (c == '\0') {
124 		/*
125 		 * Terminate any special modes, if necessary.
126 		 * Append a '\0' to the end of the line.
127 		 */
128 		switch (ln_state) {
129 		case LN_UL_X:
130 			curr[0] = curr[-1];
131 			curr[-1] = UE_CHAR;
132 			curr++;
133 			break;
134 		case LN_BO_X:
135 			curr[0] = curr[-1];
136 			curr[-1] = BE_CHAR;
137 			curr++;
138 			break;
139 		case LN_UL_XB:
140 		case LN_UNDERLINE:
141 			*curr++ = UE_CHAR;
142 			break;
143 		case LN_BO_XB:
144 		case LN_BOLDFACE:
145 			*curr++ = BE_CHAR;
146 			break;
147 		}
148 		ln_state = LN_NORMAL;
149 		*curr = '\0';
150 		return(0);
151 	}
152 
153 	if (curr > linebuf + sizeof(linebuf) - 12)
154 		/*
155 		 * Almost out of room in the line buffer.
156 		 * Don't take any chances.
157 		 * {{ Linebuf is supposed to be big enough that this
158 		 *    will never happen, but may need to be made
159 		 *    bigger for wide screens or lots of backspaces. }}
160 		 */
161 		return(1);
162 
163 	if (!bs_mode) {
164 		/*
165 		 * Advance the state machine.
166 		 */
167 		switch (ln_state) {
168 		case LN_NORMAL:
169 			if (curr <= linebuf + 1
170 			    || curr[-1] != (char)('H' | 0200))
171 				break;
172 			column -= 2;
173 			if (c == curr[-2])
174 				goto enter_boldface;
175 			if (c == '_' || curr[-2] == '_')
176 				goto enter_underline;
177 			curr -= 2;
178 			break;
179 
180 enter_boldface:
181 			/*
182 			 * We have "X\bX" (including the current char).
183 			 * Switch into boldface mode.
184 			 */
185 			column--;
186 			if (column + bo_width + be_width + 1 >= sc_width)
187 				/*
188 				 * Not enough room left on the screen to
189 				 * enter and exit boldface mode.
190 				 */
191 				return (1);
192 
193 			if (bo_width > 0 && curr > linebuf + 2
194 			    && curr[-3] == ' ') {
195 				/*
196 				 * Special case for magic cookie terminals:
197 				 * if the previous char was a space, replace
198 				 * it with the "enter boldface" sequence.
199 				 */
200 				curr[-3] = BO_CHAR;
201 				column += bo_width-1;
202 			} else {
203 				curr[-1] = curr[-2];
204 				curr[-2] = BO_CHAR;
205 				column += bo_width;
206 				curr++;
207 			}
208 			goto ln_bo_xb_case;
209 
210 enter_underline:
211 			/*
212 			 * We have either "_\bX" or "X\b_" (including
213 			 * the current char).  Switch into underline mode.
214 			 */
215 			column--;
216 			if (column + ul_width + ue_width + 1 >= sc_width)
217 				/*
218 				 * Not enough room left on the screen to
219 				 * enter and exit underline mode.
220 				 */
221 				return (1);
222 
223 			if (ul_width > 0 &&
224 			    curr > linebuf + 2 && curr[-3] == ' ')
225 			{
226 				/*
227 				 * Special case for magic cookie terminals:
228 				 * if the previous char was a space, replace
229 				 * it with the "enter underline" sequence.
230 				 */
231 				curr[-3] = UL_CHAR;
232 				column += ul_width-1;
233 			} else
234 			{
235 				curr[-1] = curr[-2];
236 				curr[-2] = UL_CHAR;
237 				column += ul_width;
238 				curr++;
239 			}
240 			goto ln_ul_xb_case;
241 			/*NOTREACHED*/
242 		case LN_UL_XB:
243 			/*
244 			 * Termination of a sequence "_\bX" or "X\b_".
245 			 */
246 			if (c != '_' && curr[-2] != '_' && c == curr[-2])
247 			{
248 				/*
249 				 * We seem to have run on from underlining
250 				 * into boldfacing - this is a nasty fix, but
251 				 * until this whole routine is rewritten as a
252 				 * real DFA, ...  well ...
253 				 */
254 				curr[0] = curr[-2];
255 				curr[-2] = UE_CHAR;
256 				curr[-1] = BO_CHAR;
257 				curr += 2; /* char & non-existent backspace */
258 				ln_state = LN_BO_XB;
259 				goto ln_bo_xb_case;
260 			}
261 ln_ul_xb_case:
262 			if (c == '_')
263 				c = curr[-2];
264 			curr -= 2;
265 			ln_state = LN_UNDERLINE;
266 			break;
267 		case LN_BO_XB:
268 			/*
269 			 * Termination of a sequnce "X\bX".
270 			 */
271 			if (c != curr[-2] && (c == '_' || curr[-2] == '_'))
272 			{
273 				/*
274 				 * We seem to have run on from
275 				 * boldfacing into underlining.
276 				 */
277 				curr[0] = curr[-2];
278 				curr[-2] = BE_CHAR;
279 				curr[-1] = UL_CHAR;
280 				curr += 2; /* char & non-existent backspace */
281 				ln_state = LN_UL_XB;
282 				goto ln_ul_xb_case;
283 			}
284 ln_bo_xb_case:
285 			curr -= 2;
286 			ln_state = LN_BOLDFACE;
287 			break;
288 		case LN_UNDERLINE:
289 			if (column + ue_width + bo_width + 1 + be_width >= sc_width)
290 				/*
291 				 * We have just barely enough room to
292 				 * exit underline mode and handle a possible
293 				 * underline/boldface run on mixup.
294 				 */
295 				return (1);
296 			ln_state = LN_UL_X;
297 			break;
298 		case LN_BOLDFACE:
299 			if (c == '\b')
300 			{
301 				ln_state = LN_BO_XB;
302 				break;
303 			}
304 			if (column + be_width + ul_width + 1 + ue_width >= sc_width)
305 				/*
306 				 * We have just barely enough room to
307 				 * exit underline mode and handle a possible
308 				 * underline/boldface run on mixup.
309 				 */
310 				return (1);
311 			ln_state = LN_BO_X;
312 			break;
313 		case LN_UL_X:
314 			if (c == '\b')
315 				ln_state = LN_UL_XB;
316 			else
317 			{
318 				/*
319 				 * Exit underline mode.
320 				 * We have to shuffle the chars a bit
321 				 * to make this work.
322 				 */
323 				curr[0] = curr[-1];
324 				curr[-1] = UE_CHAR;
325 				column += ue_width;
326 				if (ue_width > 0 && curr[0] == ' ')
327 					/*
328 					 * Another special case for magic
329 					 * cookie terminals: if the next
330 					 * char is a space, replace it
331 					 * with the "exit underline" sequence.
332 					 */
333 					column--;
334 				else
335 					curr++;
336 				ln_state = LN_NORMAL;
337 			}
338 			break;
339 		case LN_BO_X:
340 			if (c == '\b')
341 				ln_state = LN_BO_XB;
342 			else
343 			{
344 				/*
345 				 * Exit boldface mode.
346 				 * We have to shuffle the chars a bit
347 				 * to make this work.
348 				 */
349 				curr[0] = curr[-1];
350 				curr[-1] = BE_CHAR;
351 				column += be_width;
352 				if (be_width > 0 && curr[0] == ' ')
353 					/*
354 					 * Another special case for magic
355 					 * cookie terminals: if the next
356 					 * char is a space, replace it
357 					 * with the "exit boldface" sequence.
358 					 */
359 					column--;
360 				else
361 					curr++;
362 				ln_state = LN_NORMAL;
363 			}
364 			break;
365 		}
366 	}
367 
368 	if (c == '\t') {
369 		/*
370 		 * Expand a tab into spaces.
371 		 */
372 		do {
373 			NEW_COLUMN(1);
374 		} while ((column % tabstop) != 0);
375 		*curr++ = '\t';
376 		return (0);
377 	}
378 
379 	if (c == '\b') {
380 		if (ln_state == LN_NORMAL)
381 			NEW_COLUMN(2);
382 		else
383 			column--;
384 		*curr++ = ('H' | 0200);
385 		return(0);
386 	}
387 
388 	if (CONTROL_CHAR(c)) {
389 		/*
390 		 * Put a "^X" into the buffer.  The 0200 bit is used to tell
391 		 * put_line() to prefix the char with a ^.  We don't actually
392 		 * put the ^ in the buffer because we sometimes need to move
393 		 * chars around, and such movement might separate the ^ from
394 		 * its following character.
395 		 */
396 		NEW_COLUMN(2);
397 		*curr++ = (CARAT_CHAR(c) | 0200);
398 		return(0);
399 	}
400 
401 	/*
402 	 * Ordinary character.  Just put it in the buffer.
403 	 */
404 	NEW_COLUMN(1);
405 	*curr++ = c;
406 	return (0);
407 }
408 
409 /*
410  * Analogous to forw_line(), but deals with "raw lines":
411  * lines which are not split for screen width.
412  * {{ This is supposed to be more efficient than forw_line(). }}
413  */
414 off_t
415 forw_raw_line(curr_pos)
416 	off_t curr_pos;
417 {
418 	char *p;
419 	int c;
420 	off_t new_pos;
421 
422 	if (curr_pos == NULL_POSITION || ch_seek(curr_pos) ||
423 		(c = ch_forw_get()) == EOI)
424 		return (NULL_POSITION);
425 
426 	p = linebuf;
427 
428 	for (;;)
429 	{
430 		if (c == '\n' || c == EOI)
431 		{
432 			new_pos = ch_tell();
433 			break;
434 		}
435 		if (p >= &linebuf[sizeof(linebuf)-1])
436 		{
437 			/*
438 			 * Overflowed the input buffer.
439 			 * Pretend the line ended here.
440 			 * {{ The line buffer is supposed to be big
441 			 *    enough that this never happens. }}
442 			 */
443 			new_pos = ch_tell() - 1;
444 			break;
445 		}
446 		*p++ = c;
447 		c = ch_forw_get();
448 	}
449 	*p = '\0';
450 	line = linebuf;
451 	return (new_pos);
452 }
453 
454 /*
455  * Analogous to back_line(), but deals with "raw lines".
456  * {{ This is supposed to be more efficient than back_line(). }}
457  */
458 off_t
459 back_raw_line(curr_pos)
460 	off_t curr_pos;
461 {
462 	char *p;
463 	int c;
464 	off_t new_pos;
465 
466 	if (curr_pos == NULL_POSITION || curr_pos <= (off_t)0 ||
467 		ch_seek(curr_pos-1))
468 		return (NULL_POSITION);
469 
470 	p = &linebuf[sizeof(linebuf)];
471 	*--p = '\0';
472 
473 	for (;;)
474 	{
475 		c = ch_back_get();
476 		if (c == '\n')
477 		{
478 			/*
479 			 * This is the newline ending the previous line.
480 			 * We have hit the beginning of the line.
481 			 */
482 			new_pos = ch_tell() + 1;
483 			break;
484 		}
485 		if (c == EOI)
486 		{
487 			/*
488 			 * We have hit the beginning of the file.
489 			 * This must be the first line in the file.
490 			 * This must, of course, be the beginning of the line.
491 			 */
492 			new_pos = (off_t)0;
493 			break;
494 		}
495 		if (p <= linebuf)
496 		{
497 			/*
498 			 * Overflowed the input buffer.
499 			 * Pretend the line ended here.
500 			 */
501 			new_pos = ch_tell() + 1;
502 			break;
503 		}
504 		*--p = c;
505 	}
506 	line = p;
507 	return (new_pos);
508 }
509