xref: /openbsd/usr.bin/mandoc/term.c (revision 8932bfb7)
1 /*	$Id: term.c,v 1.59 2011/05/29 21:22:18 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include <sys/types.h>
19 
20 #include <assert.h>
21 #include <ctype.h>
22 #include <stdint.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 
27 #include "mandoc.h"
28 #include "out.h"
29 #include "term.h"
30 #include "main.h"
31 
32 static	void		 adjbuf(struct termp *p, int);
33 static	void		 bufferc(struct termp *, char);
34 static	void		 encode(struct termp *, const char *, size_t);
35 static	void		 encode1(struct termp *, int);
36 
37 void
38 term_free(struct termp *p)
39 {
40 
41 	if (p->buf)
42 		free(p->buf);
43 	if (p->symtab)
44 		mchars_free(p->symtab);
45 
46 	free(p);
47 }
48 
49 
50 void
51 term_begin(struct termp *p, term_margin head,
52 		term_margin foot, const void *arg)
53 {
54 
55 	p->headf = head;
56 	p->footf = foot;
57 	p->argf = arg;
58 	(*p->begin)(p);
59 }
60 
61 
62 void
63 term_end(struct termp *p)
64 {
65 
66 	(*p->end)(p);
67 }
68 
69 /*
70  * Flush a line of text.  A "line" is loosely defined as being something
71  * that should be followed by a newline, regardless of whether it's
72  * broken apart by newlines getting there.  A line can also be a
73  * fragment of a columnar list (`Bl -tag' or `Bl -column'), which does
74  * not have a trailing newline.
75  *
76  * The following flags may be specified:
77  *
78  *  - TERMP_NOLPAD: when beginning to write the line, don't left-pad the
79  *    offset value.  This is useful when doing columnar lists where the
80  *    prior column has right-padded.
81  *
82  *  - TERMP_NOBREAK: this is the most important and is used when making
83  *    columns.  In short: don't print a newline and instead pad to the
84  *    right margin.  Used in conjunction with TERMP_NOLPAD.
85  *
86  *  - TERMP_TWOSPACE: when padding, make sure there are at least two
87  *    space characters of padding.  Otherwise, rather break the line.
88  *
89  *  - TERMP_DANGLE: don't newline when TERMP_NOBREAK is specified and
90  *    the line is overrun, and don't pad-right if it's underrun.
91  *
92  *  - TERMP_HANG: like TERMP_DANGLE, but doesn't newline when
93  *    overruning, instead save the position and continue at that point
94  *    when the next invocation.
95  *
96  *  In-line line breaking:
97  *
98  *  If TERMP_NOBREAK is specified and the line overruns the right
99  *  margin, it will break and pad-right to the right margin after
100  *  writing.  If maxrmargin is violated, it will break and continue
101  *  writing from the right-margin, which will lead to the above scenario
102  *  upon exit.  Otherwise, the line will break at the right margin.
103  */
104 void
105 term_flushln(struct termp *p)
106 {
107 	int		 i;     /* current input position in p->buf */
108 	size_t		 vis;   /* current visual position on output */
109 	size_t		 vbl;   /* number of blanks to prepend to output */
110 	size_t		 vend;	/* end of word visual position on output */
111 	size_t		 bp;    /* visual right border position */
112 	size_t		 dv;    /* temporary for visual pos calculations */
113 	int		 j;     /* temporary loop index for p->buf */
114 	int		 jhy;	/* last hyph before overflow w/r/t j */
115 	size_t		 maxvis; /* output position of visible boundary */
116 	size_t		 mmax; /* used in calculating bp */
117 
118 	/*
119 	 * First, establish the maximum columns of "visible" content.
120 	 * This is usually the difference between the right-margin and
121 	 * an indentation, but can be, for tagged lists or columns, a
122 	 * small set of values.
123 	 */
124 	assert  (p->rmargin >= p->offset);
125 	dv     = p->rmargin - p->offset;
126 	maxvis = (int)dv > p->overstep ? dv - (size_t)p->overstep : 0;
127 	dv     = p->maxrmargin - p->offset;
128 	mmax   = (int)dv > p->overstep ? dv - (size_t)p->overstep : 0;
129 
130 	bp = TERMP_NOBREAK & p->flags ? mmax : maxvis;
131 
132 	/*
133 	 * Indent the first line of a paragraph.
134 	 */
135 	vbl = p->flags & TERMP_NOLPAD ? (size_t)0 : p->offset;
136 
137 	vis = vend = 0;
138 	i = 0;
139 
140 	while (i < p->col) {
141 		/*
142 		 * Handle literal tab characters: collapse all
143 		 * subsequent tabs into a single huge set of spaces.
144 		 */
145 		while (i < p->col && '\t' == p->buf[i]) {
146 			vend = (vis / p->tabwidth + 1) * p->tabwidth;
147 			vbl += vend - vis;
148 			vis = vend;
149 			i++;
150 		}
151 
152 		/*
153 		 * Count up visible word characters.  Control sequences
154 		 * (starting with the CSI) aren't counted.  A space
155 		 * generates a non-printing word, which is valid (the
156 		 * space is printed according to regular spacing rules).
157 		 */
158 
159 		for (j = i, jhy = 0; j < p->col; j++) {
160 			if ((j && ' ' == p->buf[j]) || '\t' == p->buf[j])
161 				break;
162 
163 			/* Back over the the last printed character. */
164 			if (8 == p->buf[j]) {
165 				assert(j);
166 				vend -= (*p->width)(p, p->buf[j - 1]);
167 				continue;
168 			}
169 
170 			/* Regular word. */
171 			/* Break at the hyphen point if we overrun. */
172 			if (vend > vis && vend < bp &&
173 					ASCII_HYPH == p->buf[j])
174 				jhy = j;
175 
176 			vend += (*p->width)(p, p->buf[j]);
177 		}
178 
179 		/*
180 		 * Find out whether we would exceed the right margin.
181 		 * If so, break to the next line.
182 		 */
183 		if (vend > bp && 0 == jhy && vis > 0) {
184 			vend -= vis;
185 			(*p->endline)(p);
186 			if (TERMP_NOBREAK & p->flags) {
187 				p->viscol = p->rmargin;
188 				(*p->advance)(p, p->rmargin);
189 				vend += p->rmargin - p->offset;
190 			} else {
191 				p->viscol = 0;
192 				vbl = p->offset;
193 			}
194 
195 			/* Remove the p->overstep width. */
196 
197 			bp += (size_t)p->overstep;
198 			p->overstep = 0;
199 		}
200 
201 		/* Write out the [remaining] word. */
202 		for ( ; i < p->col; i++) {
203 			if (vend > bp && jhy > 0 && i > jhy)
204 				break;
205 			if ('\t' == p->buf[i])
206 				break;
207 			if (' ' == p->buf[i]) {
208 				j = i;
209 				while (' ' == p->buf[i])
210 					i++;
211 				dv = (size_t)(i - j) * (*p->width)(p, ' ');
212 				vbl += dv;
213 				vend += dv;
214 				break;
215 			}
216 			if (ASCII_NBRSP == p->buf[i]) {
217 				vbl += (*p->width)(p, ' ');
218 				continue;
219 			}
220 
221 			/*
222 			 * Now we definitely know there will be
223 			 * printable characters to output,
224 			 * so write preceding white space now.
225 			 */
226 			if (vbl) {
227 				(*p->advance)(p, vbl);
228 				p->viscol += vbl;
229 				vbl = 0;
230 			}
231 
232 			if (ASCII_HYPH == p->buf[i]) {
233 				(*p->letter)(p, '-');
234 				p->viscol += (*p->width)(p, '-');
235 			} else {
236 				(*p->letter)(p, p->buf[i]);
237 				p->viscol += (*p->width)(p, p->buf[i]);
238 			}
239 		}
240 		vis = vend;
241 	}
242 
243 	/*
244 	 * If there was trailing white space, it was not printed;
245 	 * so reset the cursor position accordingly.
246 	 */
247 	vis -= vbl;
248 
249 	p->col = 0;
250 	p->overstep = 0;
251 
252 	if ( ! (TERMP_NOBREAK & p->flags)) {
253 		p->viscol = 0;
254 		(*p->endline)(p);
255 		return;
256 	}
257 
258 	if (TERMP_HANG & p->flags) {
259 		/* We need one blank after the tag. */
260 		p->overstep = (int)(vis - maxvis + (*p->width)(p, ' '));
261 
262 		/*
263 		 * Behave exactly the same way as groff:
264 		 * If we have overstepped the margin, temporarily move
265 		 * it to the right and flag the rest of the line to be
266 		 * shorter.
267 		 * If we landed right at the margin, be happy.
268 		 * If we are one step before the margin, temporarily
269 		 * move it one step LEFT and flag the rest of the line
270 		 * to be longer.
271 		 */
272 		if (p->overstep >= -1) {
273 			assert((int)maxvis + p->overstep >= 0);
274 			maxvis += (size_t)p->overstep;
275 		} else
276 			p->overstep = 0;
277 
278 	} else if (TERMP_DANGLE & p->flags)
279 		return;
280 
281 	/* Right-pad. */
282 	if (maxvis > vis +
283 	    ((TERMP_TWOSPACE & p->flags) ? (*p->width)(p, ' ') : 0)) {
284 		p->viscol += maxvis - vis;
285 		(*p->advance)(p, maxvis - vis);
286 		vis += (maxvis - vis);
287 	} else {	/* ...or newline break. */
288 		(*p->endline)(p);
289 		p->viscol = p->rmargin;
290 		(*p->advance)(p, p->rmargin);
291 	}
292 }
293 
294 
295 /*
296  * A newline only breaks an existing line; it won't assert vertical
297  * space.  All data in the output buffer is flushed prior to the newline
298  * assertion.
299  */
300 void
301 term_newln(struct termp *p)
302 {
303 
304 	p->flags |= TERMP_NOSPACE;
305 	if (0 == p->col && 0 == p->viscol) {
306 		p->flags &= ~TERMP_NOLPAD;
307 		return;
308 	}
309 	term_flushln(p);
310 	p->flags &= ~TERMP_NOLPAD;
311 }
312 
313 
314 /*
315  * Asserts a vertical space (a full, empty line-break between lines).
316  * Note that if used twice, this will cause two blank spaces and so on.
317  * All data in the output buffer is flushed prior to the newline
318  * assertion.
319  */
320 void
321 term_vspace(struct termp *p)
322 {
323 
324 	term_newln(p);
325 	p->viscol = 0;
326 	(*p->endline)(p);
327 }
328 
329 void
330 term_fontlast(struct termp *p)
331 {
332 	enum termfont	 f;
333 
334 	f = p->fontl;
335 	p->fontl = p->fontq[p->fonti];
336 	p->fontq[p->fonti] = f;
337 }
338 
339 
340 void
341 term_fontrepl(struct termp *p, enum termfont f)
342 {
343 
344 	p->fontl = p->fontq[p->fonti];
345 	p->fontq[p->fonti] = f;
346 }
347 
348 
349 void
350 term_fontpush(struct termp *p, enum termfont f)
351 {
352 
353 	assert(p->fonti + 1 < 10);
354 	p->fontl = p->fontq[p->fonti];
355 	p->fontq[++p->fonti] = f;
356 }
357 
358 
359 const void *
360 term_fontq(struct termp *p)
361 {
362 
363 	return(&p->fontq[p->fonti]);
364 }
365 
366 
367 enum termfont
368 term_fonttop(struct termp *p)
369 {
370 
371 	return(p->fontq[p->fonti]);
372 }
373 
374 
375 void
376 term_fontpopq(struct termp *p, const void *key)
377 {
378 
379 	while (p->fonti >= 0 && key != &p->fontq[p->fonti])
380 		p->fonti--;
381 	assert(p->fonti >= 0);
382 }
383 
384 
385 void
386 term_fontpop(struct termp *p)
387 {
388 
389 	assert(p->fonti);
390 	p->fonti--;
391 }
392 
393 /*
394  * Handle pwords, partial words, which may be either a single word or a
395  * phrase that cannot be broken down (such as a literal string).  This
396  * handles word styling.
397  */
398 void
399 term_word(struct termp *p, const char *word)
400 {
401 	const char	*seq, *cp;
402 	char		 c;
403 	int		 sz, uc;
404 	size_t		 ssz;
405 	enum mandoc_esc	 esc;
406 
407 	if ( ! (TERMP_NOSPACE & p->flags)) {
408 		if ( ! (TERMP_KEEP & p->flags)) {
409 			if (TERMP_PREKEEP & p->flags)
410 				p->flags |= TERMP_KEEP;
411 			bufferc(p, ' ');
412 			if (TERMP_SENTENCE & p->flags)
413 				bufferc(p, ' ');
414 		} else
415 			bufferc(p, ASCII_NBRSP);
416 	}
417 
418 	if ( ! (p->flags & TERMP_NONOSPACE))
419 		p->flags &= ~TERMP_NOSPACE;
420 	else
421 		p->flags |= TERMP_NOSPACE;
422 
423 	p->flags &= ~(TERMP_SENTENCE | TERMP_IGNDELIM);
424 
425 	while ('\0' != *word) {
426 		if ((ssz = strcspn(word, "\\")) > 0)
427 			encode(p, word, ssz);
428 
429 		word += (int)ssz;
430 		if ('\\' != *word)
431 			continue;
432 
433 		word++;
434 		esc = mandoc_escape(&word, &seq, &sz);
435 		if (ESCAPE_ERROR == esc)
436 			break;
437 
438 		if (TERMENC_ASCII != p->enc)
439 			switch (esc) {
440 			case (ESCAPE_UNICODE):
441 				uc = mchars_num2uc(seq + 1, sz - 1);
442 				if ('\0' == uc)
443 					break;
444 				encode1(p, uc);
445 				continue;
446 			case (ESCAPE_SPECIAL):
447 				uc = mchars_spec2cp(p->symtab, seq, sz);
448 				if (uc <= 0)
449 					break;
450 				encode1(p, uc);
451 				continue;
452 			default:
453 				break;
454 			}
455 
456 		switch (esc) {
457 		case (ESCAPE_UNICODE):
458 			encode1(p, '?');
459 			break;
460 		case (ESCAPE_NUMBERED):
461 			c = mchars_num2char(seq, sz);
462 			if ('\0' != c)
463 				encode(p, &c, 1);
464 			break;
465 		case (ESCAPE_SPECIAL):
466 			cp = mchars_spec2str(p->symtab, seq, sz, &ssz);
467 			if (NULL != cp)
468 				encode(p, cp, ssz);
469 			else if (1 == ssz)
470 				encode(p, seq, sz);
471 			break;
472 		case (ESCAPE_FONTBOLD):
473 			term_fontrepl(p, TERMFONT_BOLD);
474 			break;
475 		case (ESCAPE_FONTITALIC):
476 			term_fontrepl(p, TERMFONT_UNDER);
477 			break;
478 		case (ESCAPE_FONT):
479 			/* FALLTHROUGH */
480 		case (ESCAPE_FONTROMAN):
481 			term_fontrepl(p, TERMFONT_NONE);
482 			break;
483 		case (ESCAPE_FONTPREV):
484 			term_fontlast(p);
485 			break;
486 		case (ESCAPE_NOSPACE):
487 			if ('\0' == *word)
488 				p->flags |= TERMP_NOSPACE;
489 			break;
490 		default:
491 			break;
492 		}
493 	}
494 }
495 
496 static void
497 adjbuf(struct termp *p, int sz)
498 {
499 
500 	if (0 == p->maxcols)
501 		p->maxcols = 1024;
502 	while (sz >= p->maxcols)
503 		p->maxcols <<= 2;
504 
505 	p->buf = mandoc_realloc
506 		(p->buf, sizeof(int) * (size_t)p->maxcols);
507 }
508 
509 static void
510 bufferc(struct termp *p, char c)
511 {
512 
513 	if (p->col + 1 >= p->maxcols)
514 		adjbuf(p, p->col + 1);
515 
516 	p->buf[p->col++] = c;
517 }
518 
519 /*
520  * See encode().
521  * Do this for a single (probably unicode) value.
522  * Does not check for non-decorated glyphs.
523  */
524 static void
525 encode1(struct termp *p, int c)
526 {
527 	enum termfont	  f;
528 
529 	if (p->col + 4 >= p->maxcols)
530 		adjbuf(p, p->col + 4);
531 
532 	f = term_fonttop(p);
533 
534 	if (TERMFONT_NONE == f) {
535 		p->buf[p->col++] = c;
536 		return;
537 	} else if (TERMFONT_UNDER == f) {
538 		p->buf[p->col++] = '_';
539 	} else
540 		p->buf[p->col++] = c;
541 
542 	p->buf[p->col++] = 8;
543 	p->buf[p->col++] = c;
544 }
545 
546 static void
547 encode(struct termp *p, const char *word, size_t sz)
548 {
549 	enum termfont	  f;
550 	int		  i, len;
551 
552 	/* LINTED */
553 	len = sz;
554 
555 	/*
556 	 * Encode and buffer a string of characters.  If the current
557 	 * font mode is unset, buffer directly, else encode then buffer
558 	 * character by character.
559 	 */
560 
561 	if (TERMFONT_NONE == (f = term_fonttop(p))) {
562 		if (p->col + len >= p->maxcols)
563 			adjbuf(p, p->col + len);
564 		for (i = 0; i < len; i++)
565 			p->buf[p->col++] = word[i];
566 		return;
567 	}
568 
569 	/* Pre-buffer, assuming worst-case. */
570 
571 	if (p->col + 1 + (len * 3) >= p->maxcols)
572 		adjbuf(p, p->col + 1 + (len * 3));
573 
574 	for (i = 0; i < len; i++) {
575 		if ( ! isgraph((unsigned char)word[i])) {
576 			p->buf[p->col++] = word[i];
577 			continue;
578 		}
579 
580 		if (TERMFONT_UNDER == f)
581 			p->buf[p->col++] = '_';
582 		else
583 			p->buf[p->col++] = word[i];
584 
585 		p->buf[p->col++] = 8;
586 		p->buf[p->col++] = word[i];
587 	}
588 }
589 
590 size_t
591 term_len(const struct termp *p, size_t sz)
592 {
593 
594 	return((*p->width)(p, ' ') * sz);
595 }
596 
597 
598 size_t
599 term_strlen(const struct termp *p, const char *cp)
600 {
601 	size_t		 sz, rsz, i;
602 	int		 ssz, c;
603 	const char	*seq, *rhs;
604 	enum mandoc_esc	 esc;
605 	static const char rej[] = { '\\', ASCII_HYPH, ASCII_NBRSP, '\0' };
606 
607 	/*
608 	 * Account for escaped sequences within string length
609 	 * calculations.  This follows the logic in term_word() as we
610 	 * must calculate the width of produced strings.
611 	 */
612 
613 	sz = 0;
614 	while ('\0' != *cp) {
615 		rsz = strcspn(cp, rej);
616 		for (i = 0; i < rsz; i++)
617 			sz += (*p->width)(p, *cp++);
618 
619 		c = 0;
620 		switch (*cp) {
621 		case ('\\'):
622 			cp++;
623 			esc = mandoc_escape(&cp, &seq, &ssz);
624 			if (ESCAPE_ERROR == esc)
625 				return(sz);
626 
627 			if (TERMENC_ASCII != p->enc)
628 				switch (esc) {
629 				case (ESCAPE_UNICODE):
630 					c = mchars_num2uc
631 						(seq + 1, ssz - 1);
632 					if ('\0' == c)
633 						break;
634 					sz += (*p->width)(p, c);
635 					continue;
636 				case (ESCAPE_SPECIAL):
637 					c = mchars_spec2cp
638 						(p->symtab, seq, ssz);
639 					if (c <= 0)
640 						break;
641 					sz += (*p->width)(p, c);
642 					continue;
643 				default:
644 					break;
645 				}
646 
647 			rhs = NULL;
648 
649 			switch (esc) {
650 			case (ESCAPE_UNICODE):
651 				sz += (*p->width)(p, '?');
652 				break;
653 			case (ESCAPE_NUMBERED):
654 				c = mchars_num2char(seq, ssz);
655 				if ('\0' != c)
656 					sz += (*p->width)(p, c);
657 				break;
658 			case (ESCAPE_SPECIAL):
659 				rhs = mchars_spec2str
660 					(p->symtab, seq, ssz, &rsz);
661 
662 				if (ssz != 1 || rhs)
663 					break;
664 
665 				rhs = seq;
666 				rsz = ssz;
667 				break;
668 			default:
669 				break;
670 			}
671 
672 			if (NULL == rhs)
673 				break;
674 
675 			for (i = 0; i < rsz; i++)
676 				sz += (*p->width)(p, *rhs++);
677 			break;
678 		case (ASCII_NBRSP):
679 			sz += (*p->width)(p, ' ');
680 			cp++;
681 			break;
682 		case (ASCII_HYPH):
683 			sz += (*p->width)(p, '-');
684 			cp++;
685 			break;
686 		default:
687 			break;
688 		}
689 	}
690 
691 	return(sz);
692 }
693 
694 /* ARGSUSED */
695 size_t
696 term_vspan(const struct termp *p, const struct roffsu *su)
697 {
698 	double		 r;
699 
700 	switch (su->unit) {
701 	case (SCALE_CM):
702 		r = su->scale * 2;
703 		break;
704 	case (SCALE_IN):
705 		r = su->scale * 6;
706 		break;
707 	case (SCALE_PC):
708 		r = su->scale;
709 		break;
710 	case (SCALE_PT):
711 		r = su->scale / 8;
712 		break;
713 	case (SCALE_MM):
714 		r = su->scale / 1000;
715 		break;
716 	case (SCALE_VS):
717 		r = su->scale;
718 		break;
719 	default:
720 		r = su->scale - 1;
721 		break;
722 	}
723 
724 	if (r < 0.0)
725 		r = 0.0;
726 	return(/* LINTED */(size_t)
727 			r);
728 }
729 
730 size_t
731 term_hspan(const struct termp *p, const struct roffsu *su)
732 {
733 	double		 v;
734 
735 	v = ((*p->hspan)(p, su));
736 	if (v < 0.0)
737 		v = 0.0;
738 	return((size_t) /* LINTED */
739 			v);
740 }
741