xref: /original-bsd/usr.bin/more/line.c (revision 3f73ce2f)
1 /*
2  * Copyright (c) 1988 Mark Nudleman
3  * Copyright (c) 1988 Regents of the University of California.
4  * All rights reserved.
5  *
6  * %sccs.include.redist.c%
7  */
8 
9 #ifndef lint
10 static char sccsid[] = "@(#)line.c	5.4 (Berkeley) 06/01/90";
11 #endif /* not lint */
12 
13 /*
14  * Routines to manipulate the "line buffer".
15  * The line buffer holds a line of output as it is being built
16  * in preparation for output to the screen.
17  * We keep track of the PRINTABLE length of the line as it is being built.
18  */
19 
20 #include <sys/types.h>
21 #include <ctype.h>
22 #include <less.h>
23 
24 static char linebuf[1024];	/* Buffer which holds the current output line */
25 static char *curr;		/* Pointer into linebuf */
26 static int column;		/* Printable length, accounting for
27 				   backspaces, etc. */
28 /*
29  * A ridiculously complex state machine takes care of backspaces.  The
30  * complexity arises from the attempt to deal with all cases, especially
31  * involving long lines with underlining, boldfacing or whatever.  There
32  * are still some cases which will break it.
33  *
34  * There are four states:
35  *	LN_NORMAL is the normal state (not in underline mode).
36  *	LN_UNDERLINE means we are in underline mode.  We expect to get
37  *		either a sequence like "_\bX" or "X\b_" to continue
38  *		underline mode, or anything else to end underline mode.
39  *	LN_BOLDFACE means we are in boldface mode.  We expect to get sequences
40  *		like "X\bX\b...X\bX" to continue boldface mode, or anything
41  *		else to end boldface mode.
42  *	LN_UL_X means we are one character after LN_UNDERLINE
43  *		(we have gotten the '_' in "_\bX" or the 'X' in "X\b_").
44  *	LN_UL_XB means we are one character after LN_UL_X
45  *		(we have gotten the backspace in "_\bX" or "X\b_";
46  *		we expect one more ordinary character,
47  *		which will put us back in state LN_UNDERLINE).
48  *	LN_BO_X means we are one character after LN_BOLDFACE
49  *		(we have gotten the 'X' in "X\bX").
50  *	LN_BO_XB means we are one character after LN_BO_X
51  *		(we have gotten the backspace in "X\bX";
52  *		we expect one more 'X' which will put us back
53  *		in LN_BOLDFACE).
54  */
55 static int ln_state;		/* Currently in normal/underline/bold/etc mode? */
56 #define	LN_NORMAL	0	/* Not in underline, boldface or whatever mode */
57 #define	LN_UNDERLINE	1	/* In underline, need next char */
58 #define	LN_UL_X		2	/* In underline, got char, need \b */
59 #define	LN_UL_XB	3	/* In underline, got char & \b, need one more */
60 #define	LN_BOLDFACE	4	/* In boldface, need next char */
61 #define	LN_BO_X		5	/* In boldface, got char, need \b */
62 #define	LN_BO_XB	6	/* In boldface, got char & \b, need same char */
63 
64 char *line;			/* Pointer to the current line.
65 				   Usually points to linebuf. */
66 
67 extern int bs_mode;
68 extern int tabstop;
69 extern int bo_width, be_width;
70 extern int ul_width, ue_width;
71 extern int sc_width, sc_height;
72 
73 /*
74  * Rewind the line buffer.
75  */
76 prewind()
77 {
78 	line = curr = linebuf;
79 	ln_state = LN_NORMAL;
80 	column = 0;
81 }
82 
83 /*
84  * Append a character to the line buffer.
85  * Expand tabs into spaces, handle underlining, boldfacing, etc.
86  * Returns 0 if ok, 1 if couldn't fit in buffer.
87  */
88 #define	NEW_COLUMN(addon) \
89 	if (column + addon + (ln_state ? ue_width : 0) > sc_width) \
90 		return(1); \
91 	else \
92 		column += addon
93 
94 pappend(c)
95 	int c;
96 {
97 	if (c == '\0') {
98 		/*
99 		 * Terminate any special modes, if necessary.
100 		 * Append a '\0' to the end of the line.
101 		 */
102 		switch (ln_state) {
103 		case LN_UL_X:
104 			curr[0] = curr[-1];
105 			curr[-1] = UE_CHAR;
106 			curr++;
107 			break;
108 		case LN_BO_X:
109 			curr[0] = curr[-1];
110 			curr[-1] = BE_CHAR;
111 			curr++;
112 			break;
113 		case LN_UL_XB:
114 		case LN_UNDERLINE:
115 			*curr++ = UE_CHAR;
116 			break;
117 		case LN_BO_XB:
118 		case LN_BOLDFACE:
119 			*curr++ = BE_CHAR;
120 			break;
121 		}
122 		ln_state = LN_NORMAL;
123 		*curr = '\0';
124 		return(0);
125 	}
126 
127 	if (curr > linebuf + sizeof(linebuf) - 12)
128 		/*
129 		 * Almost out of room in the line buffer.
130 		 * Don't take any chances.
131 		 * {{ Linebuf is supposed to be big enough that this
132 		 *    will never happen, but may need to be made
133 		 *    bigger for wide screens or lots of backspaces. }}
134 		 */
135 		return(1);
136 
137 	if (!bs_mode) {
138 		/*
139 		 * Advance the state machine.
140 		 */
141 		switch (ln_state) {
142 		case LN_NORMAL:
143 			if (curr <= linebuf + 1
144 			    || curr[-1] != (char)('H' | 0200))
145 				break;
146 			column -= 2;
147 			if (c == curr[-2])
148 				goto enter_boldface;
149 			if (c == '_' || curr[-2] == '_')
150 				goto enter_underline;
151 			curr -= 2;
152 			break;
153 
154 enter_boldface:
155 			/*
156 			 * We have "X\bX" (including the current char).
157 			 * Switch into boldface mode.
158 			 */
159 			if (column + bo_width + be_width + 1 >= sc_width)
160 				/*
161 				 * Not enough room left on the screen to
162 				 * enter and exit boldface mode.
163 				 */
164 				return (1);
165 
166 			if (bo_width > 0 && curr > linebuf + 2
167 			    && curr[-3] == ' ') {
168 				/*
169 				 * Special case for magic cookie terminals:
170 				 * if the previous char was a space, replace
171 				 * it with the "enter boldface" sequence.
172 				 */
173 				curr[-3] = BO_CHAR;
174 				column += bo_width-1;
175 			} else {
176 				curr[-1] = curr[-2];
177 				curr[-2] = BO_CHAR;
178 				column += bo_width;
179 				curr++;
180 			}
181 			goto ln_bo_xb_case;
182 
183 enter_underline:
184 			/*
185 			 * We have either "_\bX" or "X\b_" (including
186 			 * the current char).  Switch into underline mode.
187 			 */
188 			if (column + ul_width + ue_width + 1 >= sc_width)
189 				/*
190 				 * Not enough room left on the screen to
191 				 * enter and exit underline mode.
192 				 */
193 				return (1);
194 
195 			if (ul_width > 0 &&
196 			    curr > linebuf + 2 && curr[-3] == ' ')
197 			{
198 				/*
199 				 * Special case for magic cookie terminals:
200 				 * if the previous char was a space, replace
201 				 * it with the "enter underline" sequence.
202 				 */
203 				curr[-3] = UL_CHAR;
204 				column += ul_width-1;
205 			} else
206 			{
207 				curr[-1] = curr[-2];
208 				curr[-2] = UL_CHAR;
209 				column += ul_width;
210 				curr++;
211 			}
212 			goto ln_ul_xb_case;
213 			/*NOTREACHED*/
214 		case LN_UL_XB:
215 			/*
216 			 * Termination of a sequence "_\bX" or "X\b_".
217 			 */
218 			if (c != '_' && curr[-2] != '_' && c == curr[-2])
219 			{
220 				/*
221 				 * We seem to have run on from underlining
222 				 * into boldfacing - this is a nasty fix, but
223 				 * until this whole routine is rewritten as a
224 				 * real DFA, ...  well ...
225 				 */
226 				curr[0] = curr[-2];
227 				curr[-2] = UE_CHAR;
228 				curr[-1] = BO_CHAR;
229 				curr += 2; /* char & non-existent backspace */
230 				ln_state = LN_BO_XB;
231 				goto ln_bo_xb_case;
232 			}
233 ln_ul_xb_case:
234 			if (c == '_')
235 				c = curr[-2];
236 			curr -= 2;
237 			ln_state = LN_UNDERLINE;
238 			break;
239 		case LN_BO_XB:
240 			/*
241 			 * Termination of a sequnce "X\bX".
242 			 */
243 			if (c != curr[-2] && (c == '_' || curr[-2] == '_'))
244 			{
245 				/*
246 				 * We seem to have run on from
247 				 * boldfacing into underlining.
248 				 */
249 				curr[0] = curr[-2];
250 				curr[-2] = BE_CHAR;
251 				curr[-1] = UL_CHAR;
252 				curr += 2; /* char & non-existent backspace */
253 				ln_state = LN_UL_XB;
254 				goto ln_ul_xb_case;
255 			}
256 ln_bo_xb_case:
257 			curr -= 2;
258 			ln_state = LN_BOLDFACE;
259 			break;
260 		case LN_UNDERLINE:
261 			if (column + ue_width + bo_width + 1 + be_width >= sc_width)
262 				/*
263 				 * We have just barely enough room to
264 				 * exit underline mode and handle a possible
265 				 * underline/boldface run on mixup.
266 				 */
267 				return (1);
268 			ln_state = LN_UL_X;
269 			break;
270 		case LN_BOLDFACE:
271 			if (c == '\b')
272 			{
273 				ln_state = LN_BO_XB;
274 				break;
275 			}
276 			if (column + be_width + ul_width + 1 + ue_width >= sc_width)
277 				/*
278 				 * We have just barely enough room to
279 				 * exit underline mode and handle a possible
280 				 * underline/boldface run on mixup.
281 				 */
282 				return (1);
283 			ln_state = LN_BO_X;
284 			break;
285 		case LN_UL_X:
286 			if (c == '\b')
287 				ln_state = LN_UL_XB;
288 			else
289 			{
290 				/*
291 				 * Exit underline mode.
292 				 * We have to shuffle the chars a bit
293 				 * to make this work.
294 				 */
295 				curr[0] = curr[-1];
296 				curr[-1] = UE_CHAR;
297 				column += ue_width;
298 				if (ue_width > 0 && curr[0] == ' ')
299 					/*
300 					 * Another special case for magic
301 					 * cookie terminals: if the next
302 					 * char is a space, replace it
303 					 * with the "exit underline" sequence.
304 					 */
305 					column--;
306 				else
307 					curr++;
308 				ln_state = LN_NORMAL;
309 			}
310 			break;
311 		case LN_BO_X:
312 			if (c == '\b')
313 				ln_state = LN_BO_XB;
314 			else
315 			{
316 				/*
317 				 * Exit boldface mode.
318 				 * We have to shuffle the chars a bit
319 				 * to make this work.
320 				 */
321 				curr[0] = curr[-1];
322 				curr[-1] = BE_CHAR;
323 				column += be_width;
324 				if (be_width > 0 && curr[0] == ' ')
325 					/*
326 					 * Another special case for magic
327 					 * cookie terminals: if the next
328 					 * char is a space, replace it
329 					 * with the "exit boldface" sequence.
330 					 */
331 					column--;
332 				else
333 					curr++;
334 				ln_state = LN_NORMAL;
335 			}
336 			break;
337 		}
338 	}
339 
340 	if (c == '\t') {
341 		/*
342 		 * Expand a tab into spaces.
343 		 */
344 		do {
345 			NEW_COLUMN(1);
346 		} while ((column % tabstop) != 0);
347 		*curr++ = '\t';
348 		return (0);
349 	}
350 
351 	if (c == '\b') {
352 		if (ln_state == LN_NORMAL)
353 			NEW_COLUMN(2);
354 		else
355 			column--;
356 		*curr++ = ('H' | 0200);
357 		return(0);
358 	}
359 
360 	if (CONTROL_CHAR(c)) {
361 		/*
362 		 * Put a "^X" into the buffer.  The 0200 bit is used to tell
363 		 * put_line() to prefix the char with a ^.  We don't actually
364 		 * put the ^ in the buffer because we sometimes need to move
365 		 * chars around, and such movement might separate the ^ from
366 		 * its following character.
367 		 */
368 		NEW_COLUMN(2);
369 		*curr++ = (CARAT_CHAR(c) | 0200);
370 		return(0);
371 	}
372 
373 	/*
374 	 * Ordinary character.  Just put it in the buffer.
375 	 */
376 	NEW_COLUMN(1);
377 	*curr++ = c;
378 	return (0);
379 }
380 
381 /*
382  * Analogous to forw_line(), but deals with "raw lines":
383  * lines which are not split for screen width.
384  * {{ This is supposed to be more efficient than forw_line(). }}
385  */
386 off_t
387 forw_raw_line(curr_pos)
388 	off_t curr_pos;
389 {
390 	register char *p;
391 	register int c;
392 	off_t new_pos, ch_tell();
393 
394 	if (curr_pos == NULL_POSITION || ch_seek(curr_pos) ||
395 		(c = ch_forw_get()) == EOI)
396 		return (NULL_POSITION);
397 
398 	p = linebuf;
399 
400 	for (;;)
401 	{
402 		if (c == '\n' || c == EOI)
403 		{
404 			new_pos = ch_tell();
405 			break;
406 		}
407 		if (p >= &linebuf[sizeof(linebuf)-1])
408 		{
409 			/*
410 			 * Overflowed the input buffer.
411 			 * Pretend the line ended here.
412 			 * {{ The line buffer is supposed to be big
413 			 *    enough that this never happens. }}
414 			 */
415 			new_pos = ch_tell() - 1;
416 			break;
417 		}
418 		*p++ = c;
419 		c = ch_forw_get();
420 	}
421 	*p = '\0';
422 	line = linebuf;
423 	return (new_pos);
424 }
425 
426 /*
427  * Analogous to back_line(), but deals with "raw lines".
428  * {{ This is supposed to be more efficient than back_line(). }}
429  */
430 off_t
431 back_raw_line(curr_pos)
432 	off_t curr_pos;
433 {
434 	register char *p;
435 	register int c;
436 	off_t new_pos, ch_tell();
437 
438 	if (curr_pos == NULL_POSITION || curr_pos <= (off_t)0 ||
439 		ch_seek(curr_pos-1))
440 		return (NULL_POSITION);
441 
442 	p = &linebuf[sizeof(linebuf)];
443 	*--p = '\0';
444 
445 	for (;;)
446 	{
447 		c = ch_back_get();
448 		if (c == '\n')
449 		{
450 			/*
451 			 * This is the newline ending the previous line.
452 			 * We have hit the beginning of the line.
453 			 */
454 			new_pos = ch_tell() + 1;
455 			break;
456 		}
457 		if (c == EOI)
458 		{
459 			/*
460 			 * We have hit the beginning of the file.
461 			 * This must be the first line in the file.
462 			 * This must, of course, be the beginning of the line.
463 			 */
464 			new_pos = (off_t)0;
465 			break;
466 		}
467 		if (p <= linebuf)
468 		{
469 			/*
470 			 * Overflowed the input buffer.
471 			 * Pretend the line ended here.
472 			 */
473 			new_pos = ch_tell() + 1;
474 			break;
475 		}
476 		*--p = c;
477 	}
478 	line = p;
479 	return (new_pos);
480 }
481