xref: /original-bsd/usr.bin/more/line.c (revision 87febec0)
1 /*
2  * Copyright (c) 1988 Mark Nudleman
3  * Copyright (c) 1988 Regents of the University of California.
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms are permitted
7  * provided that the above copyright notice and this paragraph are
8  * duplicated in all such forms and that any documentation,
9  * advertising materials, and other materials related to such
10  * distribution and use acknowledge that the software was developed
11  * by Mark Nudleman and the University of California, Berkeley.  The
12  * name of Mark Nudleman or the
13  * University may not be used to endorse or promote products derived
14  * from this software without specific prior written permission.
15  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
17  * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
18  */
19 
20 #ifndef lint
21 static char sccsid[] = "@(#)line.c	5.3 (Berkeley) 11/22/88";
22 #endif /* not lint */
23 
24 /*
25  * Routines to manipulate the "line buffer".
26  * The line buffer holds a line of output as it is being built
27  * in preparation for output to the screen.
28  * We keep track of the PRINTABLE length of the line as it is being built.
29  */
30 
31 #include <sys/types.h>
32 #include <ctype.h>
33 #include <less.h>
34 
35 static char linebuf[1024];	/* Buffer which holds the current output line */
36 static char *curr;		/* Pointer into linebuf */
37 static int column;		/* Printable length, accounting for
38 				   backspaces, etc. */
39 /*
40  * A ridiculously complex state machine takes care of backspaces.  The
41  * complexity arises from the attempt to deal with all cases, especially
42  * involving long lines with underlining, boldfacing or whatever.  There
43  * are still some cases which will break it.
44  *
45  * There are four states:
46  *	LN_NORMAL is the normal state (not in underline mode).
47  *	LN_UNDERLINE means we are in underline mode.  We expect to get
48  *		either a sequence like "_\bX" or "X\b_" to continue
49  *		underline mode, or anything else to end underline mode.
50  *	LN_BOLDFACE means we are in boldface mode.  We expect to get sequences
51  *		like "X\bX\b...X\bX" to continue boldface mode, or anything
52  *		else to end boldface mode.
53  *	LN_UL_X means we are one character after LN_UNDERLINE
54  *		(we have gotten the '_' in "_\bX" or the 'X' in "X\b_").
55  *	LN_UL_XB means we are one character after LN_UL_X
56  *		(we have gotten the backspace in "_\bX" or "X\b_";
57  *		we expect one more ordinary character,
58  *		which will put us back in state LN_UNDERLINE).
59  *	LN_BO_X means we are one character after LN_BOLDFACE
60  *		(we have gotten the 'X' in "X\bX").
61  *	LN_BO_XB means we are one character after LN_BO_X
62  *		(we have gotten the backspace in "X\bX";
63  *		we expect one more 'X' which will put us back
64  *		in LN_BOLDFACE).
65  */
66 static int ln_state;		/* Currently in normal/underline/bold/etc mode? */
67 #define	LN_NORMAL	0	/* Not in underline, boldface or whatever mode */
68 #define	LN_UNDERLINE	1	/* In underline, need next char */
69 #define	LN_UL_X		2	/* In underline, got char, need \b */
70 #define	LN_UL_XB	3	/* In underline, got char & \b, need one more */
71 #define	LN_BOLDFACE	4	/* In boldface, need next char */
72 #define	LN_BO_X		5	/* In boldface, got char, need \b */
73 #define	LN_BO_XB	6	/* In boldface, got char & \b, need same char */
74 
75 char *line;			/* Pointer to the current line.
76 				   Usually points to linebuf. */
77 
78 extern int bs_mode;
79 extern int tabstop;
80 extern int bo_width, be_width;
81 extern int ul_width, ue_width;
82 extern int sc_width, sc_height;
83 
84 /*
85  * Rewind the line buffer.
86  */
87 prewind()
88 {
89 	line = curr = linebuf;
90 	ln_state = LN_NORMAL;
91 	column = 0;
92 }
93 
94 /*
95  * Append a character to the line buffer.
96  * Expand tabs into spaces, handle underlining, boldfacing, etc.
97  * Returns 0 if ok, 1 if couldn't fit in buffer.
98  */
99 #define	NEW_COLUMN(addon) \
100 	if (column + addon + (ln_state ? ue_width : 0) > sc_width) \
101 		return(1); \
102 	else \
103 		column += addon
104 
105 pappend(c)
106 	int c;
107 {
108 	if (c == '\0') {
109 		/*
110 		 * Terminate any special modes, if necessary.
111 		 * Append a '\0' to the end of the line.
112 		 */
113 		switch (ln_state) {
114 		case LN_UL_X:
115 			curr[0] = curr[-1];
116 			curr[-1] = UE_CHAR;
117 			curr++;
118 			break;
119 		case LN_BO_X:
120 			curr[0] = curr[-1];
121 			curr[-1] = BE_CHAR;
122 			curr++;
123 			break;
124 		case LN_UL_XB:
125 		case LN_UNDERLINE:
126 			*curr++ = UE_CHAR;
127 			break;
128 		case LN_BO_XB:
129 		case LN_BOLDFACE:
130 			*curr++ = BE_CHAR;
131 			break;
132 		}
133 		ln_state = LN_NORMAL;
134 		*curr = '\0';
135 		return(0);
136 	}
137 
138 	if (curr > linebuf + sizeof(linebuf) - 12)
139 		/*
140 		 * Almost out of room in the line buffer.
141 		 * Don't take any chances.
142 		 * {{ Linebuf is supposed to be big enough that this
143 		 *    will never happen, but may need to be made
144 		 *    bigger for wide screens or lots of backspaces. }}
145 		 */
146 		return(1);
147 
148 	if (!bs_mode) {
149 		/*
150 		 * Advance the state machine.
151 		 */
152 		switch (ln_state) {
153 		case LN_NORMAL:
154 			if (curr <= linebuf + 1
155 			    || curr[-1] != (char)('H' | 0200))
156 				break;
157 			column -= 2;
158 			if (c == curr[-2])
159 				goto enter_boldface;
160 			if (c == '_' || curr[-2] == '_')
161 				goto enter_underline;
162 			curr -= 2;
163 			break;
164 
165 enter_boldface:
166 			/*
167 			 * We have "X\bX" (including the current char).
168 			 * Switch into boldface mode.
169 			 */
170 			if (column + bo_width + be_width + 1 >= sc_width)
171 				/*
172 				 * Not enough room left on the screen to
173 				 * enter and exit boldface mode.
174 				 */
175 				return (1);
176 
177 			if (bo_width > 0 && curr > linebuf + 2
178 			    && curr[-3] == ' ') {
179 				/*
180 				 * Special case for magic cookie terminals:
181 				 * if the previous char was a space, replace
182 				 * it with the "enter boldface" sequence.
183 				 */
184 				curr[-3] = BO_CHAR;
185 				column += bo_width-1;
186 			} else {
187 				curr[-1] = curr[-2];
188 				curr[-2] = BO_CHAR;
189 				column += bo_width;
190 				curr++;
191 			}
192 			goto ln_bo_xb_case;
193 
194 enter_underline:
195 			/*
196 			 * We have either "_\bX" or "X\b_" (including
197 			 * the current char).  Switch into underline mode.
198 			 */
199 			if (column + ul_width + ue_width + 1 >= sc_width)
200 				/*
201 				 * Not enough room left on the screen to
202 				 * enter and exit underline mode.
203 				 */
204 				return (1);
205 
206 			if (ul_width > 0 &&
207 			    curr > linebuf + 2 && curr[-3] == ' ')
208 			{
209 				/*
210 				 * Special case for magic cookie terminals:
211 				 * if the previous char was a space, replace
212 				 * it with the "enter underline" sequence.
213 				 */
214 				curr[-3] = UL_CHAR;
215 				column += ul_width-1;
216 			} else
217 			{
218 				curr[-1] = curr[-2];
219 				curr[-2] = UL_CHAR;
220 				column += ul_width;
221 				curr++;
222 			}
223 			goto ln_ul_xb_case;
224 			/*NOTREACHED*/
225 		case LN_UL_XB:
226 			/*
227 			 * Termination of a sequence "_\bX" or "X\b_".
228 			 */
229 			if (c != '_' && curr[-2] != '_' && c == curr[-2])
230 			{
231 				/*
232 				 * We seem to have run on from underlining
233 				 * into boldfacing - this is a nasty fix, but
234 				 * until this whole routine is rewritten as a
235 				 * real DFA, ...  well ...
236 				 */
237 				curr[0] = curr[-2];
238 				curr[-2] = UE_CHAR;
239 				curr[-1] = BO_CHAR;
240 				curr += 2; /* char & non-existent backspace */
241 				ln_state = LN_BO_XB;
242 				goto ln_bo_xb_case;
243 			}
244 ln_ul_xb_case:
245 			if (c == '_')
246 				c = curr[-2];
247 			curr -= 2;
248 			ln_state = LN_UNDERLINE;
249 			break;
250 		case LN_BO_XB:
251 			/*
252 			 * Termination of a sequnce "X\bX".
253 			 */
254 			if (c != curr[-2] && (c == '_' || curr[-2] == '_'))
255 			{
256 				/*
257 				 * We seem to have run on from
258 				 * boldfacing into underlining.
259 				 */
260 				curr[0] = curr[-2];
261 				curr[-2] = BE_CHAR;
262 				curr[-1] = UL_CHAR;
263 				curr += 2; /* char & non-existent backspace */
264 				ln_state = LN_UL_XB;
265 				goto ln_ul_xb_case;
266 			}
267 ln_bo_xb_case:
268 			curr -= 2;
269 			ln_state = LN_BOLDFACE;
270 			break;
271 		case LN_UNDERLINE:
272 			if (column + ue_width + bo_width + 1 + be_width >= sc_width)
273 				/*
274 				 * We have just barely enough room to
275 				 * exit underline mode and handle a possible
276 				 * underline/boldface run on mixup.
277 				 */
278 				return (1);
279 			ln_state = LN_UL_X;
280 			break;
281 		case LN_BOLDFACE:
282 			if (c == '\b')
283 			{
284 				ln_state = LN_BO_XB;
285 				break;
286 			}
287 			if (column + be_width + ul_width + 1 + ue_width >= sc_width)
288 				/*
289 				 * We have just barely enough room to
290 				 * exit underline mode and handle a possible
291 				 * underline/boldface run on mixup.
292 				 */
293 				return (1);
294 			ln_state = LN_BO_X;
295 			break;
296 		case LN_UL_X:
297 			if (c == '\b')
298 				ln_state = LN_UL_XB;
299 			else
300 			{
301 				/*
302 				 * Exit underline mode.
303 				 * We have to shuffle the chars a bit
304 				 * to make this work.
305 				 */
306 				curr[0] = curr[-1];
307 				curr[-1] = UE_CHAR;
308 				column += ue_width;
309 				if (ue_width > 0 && curr[0] == ' ')
310 					/*
311 					 * Another special case for magic
312 					 * cookie terminals: if the next
313 					 * char is a space, replace it
314 					 * with the "exit underline" sequence.
315 					 */
316 					column--;
317 				else
318 					curr++;
319 				ln_state = LN_NORMAL;
320 			}
321 			break;
322 		case LN_BO_X:
323 			if (c == '\b')
324 				ln_state = LN_BO_XB;
325 			else
326 			{
327 				/*
328 				 * Exit boldface mode.
329 				 * We have to shuffle the chars a bit
330 				 * to make this work.
331 				 */
332 				curr[0] = curr[-1];
333 				curr[-1] = BE_CHAR;
334 				column += be_width;
335 				if (be_width > 0 && curr[0] == ' ')
336 					/*
337 					 * Another special case for magic
338 					 * cookie terminals: if the next
339 					 * char is a space, replace it
340 					 * with the "exit boldface" sequence.
341 					 */
342 					column--;
343 				else
344 					curr++;
345 				ln_state = LN_NORMAL;
346 			}
347 			break;
348 		}
349 	}
350 
351 	if (c == '\t') {
352 		/*
353 		 * Expand a tab into spaces.
354 		 */
355 		do {
356 			NEW_COLUMN(1);
357 		} while ((column % tabstop) != 0);
358 		*curr++ = '\t';
359 		return (0);
360 	}
361 
362 	if (c == '\b') {
363 		if (ln_state == LN_NORMAL)
364 			NEW_COLUMN(2);
365 		else
366 			column--;
367 		*curr++ = ('H' | 0200);
368 		return(0);
369 	}
370 
371 	if (CONTROL_CHAR(c)) {
372 		/*
373 		 * Put a "^X" into the buffer.  The 0200 bit is used to tell
374 		 * put_line() to prefix the char with a ^.  We don't actually
375 		 * put the ^ in the buffer because we sometimes need to move
376 		 * chars around, and such movement might separate the ^ from
377 		 * its following character.
378 		 */
379 		NEW_COLUMN(2);
380 		*curr++ = (CARAT_CHAR(c) | 0200);
381 		return(0);
382 	}
383 
384 	/*
385 	 * Ordinary character.  Just put it in the buffer.
386 	 */
387 	NEW_COLUMN(1);
388 	*curr++ = c;
389 	return (0);
390 }
391 
392 /*
393  * Analogous to forw_line(), but deals with "raw lines":
394  * lines which are not split for screen width.
395  * {{ This is supposed to be more efficient than forw_line(). }}
396  */
397 off_t
398 forw_raw_line(curr_pos)
399 	off_t curr_pos;
400 {
401 	register char *p;
402 	register int c;
403 	off_t new_pos, ch_tell();
404 
405 	if (curr_pos == NULL_POSITION || ch_seek(curr_pos) ||
406 		(c = ch_forw_get()) == EOI)
407 		return (NULL_POSITION);
408 
409 	p = linebuf;
410 
411 	for (;;)
412 	{
413 		if (c == '\n' || c == EOI)
414 		{
415 			new_pos = ch_tell();
416 			break;
417 		}
418 		if (p >= &linebuf[sizeof(linebuf)-1])
419 		{
420 			/*
421 			 * Overflowed the input buffer.
422 			 * Pretend the line ended here.
423 			 * {{ The line buffer is supposed to be big
424 			 *    enough that this never happens. }}
425 			 */
426 			new_pos = ch_tell() - 1;
427 			break;
428 		}
429 		*p++ = c;
430 		c = ch_forw_get();
431 	}
432 	*p = '\0';
433 	line = linebuf;
434 	return (new_pos);
435 }
436 
437 /*
438  * Analogous to back_line(), but deals with "raw lines".
439  * {{ This is supposed to be more efficient than back_line(). }}
440  */
441 off_t
442 back_raw_line(curr_pos)
443 	off_t curr_pos;
444 {
445 	register char *p;
446 	register int c;
447 	off_t new_pos, ch_tell();
448 
449 	if (curr_pos == NULL_POSITION || curr_pos <= (off_t)0 ||
450 		ch_seek(curr_pos-1))
451 		return (NULL_POSITION);
452 
453 	p = &linebuf[sizeof(linebuf)];
454 	*--p = '\0';
455 
456 	for (;;)
457 	{
458 		c = ch_back_get();
459 		if (c == '\n')
460 		{
461 			/*
462 			 * This is the newline ending the previous line.
463 			 * We have hit the beginning of the line.
464 			 */
465 			new_pos = ch_tell() + 1;
466 			break;
467 		}
468 		if (c == EOI)
469 		{
470 			/*
471 			 * We have hit the beginning of the file.
472 			 * This must be the first line in the file.
473 			 * This must, of course, be the beginning of the line.
474 			 */
475 			new_pos = (off_t)0;
476 			break;
477 		}
478 		if (p <= linebuf)
479 		{
480 			/*
481 			 * Overflowed the input buffer.
482 			 * Pretend the line ended here.
483 			 */
484 			new_pos = ch_tell() + 1;
485 			break;
486 		}
487 		*--p = c;
488 	}
489 	line = p;
490 	return (new_pos);
491 }
492