xref: /dragonfly/contrib/mdocml/term.c (revision ad9f8794)
1 /*	$Id: term.c,v 1.197 2011/05/24 21:31:23 kristaps Exp $ */
2 /*
3  * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21 
22 #include <sys/types.h>
23 
24 #include <assert.h>
25 #include <ctype.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 
31 #include "mandoc.h"
32 #include "out.h"
33 #include "term.h"
34 #include "main.h"
35 
36 static	void		 adjbuf(struct termp *p, int);
37 static	void		 bufferc(struct termp *, char);
38 static	void		 encode(struct termp *, const char *, size_t);
39 static	void		 encode1(struct termp *, int);
40 
41 void
42 term_free(struct termp *p)
43 {
44 
45 	if (p->buf)
46 		free(p->buf);
47 	if (p->symtab)
48 		mchars_free(p->symtab);
49 
50 	free(p);
51 }
52 
53 
54 void
55 term_begin(struct termp *p, term_margin head,
56 		term_margin foot, const void *arg)
57 {
58 
59 	p->headf = head;
60 	p->footf = foot;
61 	p->argf = arg;
62 	(*p->begin)(p);
63 }
64 
65 
66 void
67 term_end(struct termp *p)
68 {
69 
70 	(*p->end)(p);
71 }
72 
73 /*
74  * Flush a line of text.  A "line" is loosely defined as being something
75  * that should be followed by a newline, regardless of whether it's
76  * broken apart by newlines getting there.  A line can also be a
77  * fragment of a columnar list (`Bl -tag' or `Bl -column'), which does
78  * not have a trailing newline.
79  *
80  * The following flags may be specified:
81  *
82  *  - TERMP_NOLPAD: when beginning to write the line, don't left-pad the
83  *    offset value.  This is useful when doing columnar lists where the
84  *    prior column has right-padded.
85  *
86  *  - TERMP_NOBREAK: this is the most important and is used when making
87  *    columns.  In short: don't print a newline and instead pad to the
88  *    right margin.  Used in conjunction with TERMP_NOLPAD.
89  *
90  *  - TERMP_TWOSPACE: when padding, make sure there are at least two
91  *    space characters of padding.  Otherwise, rather break the line.
92  *
93  *  - TERMP_DANGLE: don't newline when TERMP_NOBREAK is specified and
94  *    the line is overrun, and don't pad-right if it's underrun.
95  *
96  *  - TERMP_HANG: like TERMP_DANGLE, but doesn't newline when
97  *    overruning, instead save the position and continue at that point
98  *    when the next invocation.
99  *
100  *  In-line line breaking:
101  *
102  *  If TERMP_NOBREAK is specified and the line overruns the right
103  *  margin, it will break and pad-right to the right margin after
104  *  writing.  If maxrmargin is violated, it will break and continue
105  *  writing from the right-margin, which will lead to the above scenario
106  *  upon exit.  Otherwise, the line will break at the right margin.
107  */
108 void
109 term_flushln(struct termp *p)
110 {
111 	int		 i;     /* current input position in p->buf */
112 	size_t		 vis;   /* current visual position on output */
113 	size_t		 vbl;   /* number of blanks to prepend to output */
114 	size_t		 vend;	/* end of word visual position on output */
115 	size_t		 bp;    /* visual right border position */
116 	size_t		 dv;    /* temporary for visual pos calculations */
117 	int		 j;     /* temporary loop index for p->buf */
118 	int		 jhy;	/* last hyph before overflow w/r/t j */
119 	size_t		 maxvis; /* output position of visible boundary */
120 	size_t		 mmax; /* used in calculating bp */
121 
122 	/*
123 	 * First, establish the maximum columns of "visible" content.
124 	 * This is usually the difference between the right-margin and
125 	 * an indentation, but can be, for tagged lists or columns, a
126 	 * small set of values.
127 	 */
128 	assert  (p->rmargin >= p->offset);
129 	dv     = p->rmargin - p->offset;
130 	maxvis = (int)dv > p->overstep ? dv - (size_t)p->overstep : 0;
131 	dv     = p->maxrmargin - p->offset;
132 	mmax   = (int)dv > p->overstep ? dv - (size_t)p->overstep : 0;
133 
134 	bp = TERMP_NOBREAK & p->flags ? mmax : maxvis;
135 
136 	/*
137 	 * Indent the first line of a paragraph.
138 	 */
139 	vbl = p->flags & TERMP_NOLPAD ? (size_t)0 : p->offset;
140 
141 	vis = vend = 0;
142 	i = 0;
143 
144 	while (i < p->col) {
145 		/*
146 		 * Handle literal tab characters: collapse all
147 		 * subsequent tabs into a single huge set of spaces.
148 		 */
149 		while (i < p->col && '\t' == p->buf[i]) {
150 			vend = (vis / p->tabwidth + 1) * p->tabwidth;
151 			vbl += vend - vis;
152 			vis = vend;
153 			i++;
154 		}
155 
156 		/*
157 		 * Count up visible word characters.  Control sequences
158 		 * (starting with the CSI) aren't counted.  A space
159 		 * generates a non-printing word, which is valid (the
160 		 * space is printed according to regular spacing rules).
161 		 */
162 
163 		for (j = i, jhy = 0; j < p->col; j++) {
164 			if ((j && ' ' == p->buf[j]) || '\t' == p->buf[j])
165 				break;
166 
167 			/* Back over the the last printed character. */
168 			if (8 == p->buf[j]) {
169 				assert(j);
170 				vend -= (*p->width)(p, p->buf[j - 1]);
171 				continue;
172 			}
173 
174 			/* Regular word. */
175 			/* Break at the hyphen point if we overrun. */
176 			if (vend > vis && vend < bp &&
177 					ASCII_HYPH == p->buf[j])
178 				jhy = j;
179 
180 			vend += (*p->width)(p, p->buf[j]);
181 		}
182 
183 		/*
184 		 * Find out whether we would exceed the right margin.
185 		 * If so, break to the next line.
186 		 */
187 		if (vend > bp && 0 == jhy && vis > 0) {
188 			vend -= vis;
189 			(*p->endline)(p);
190 			if (TERMP_NOBREAK & p->flags) {
191 				p->viscol = p->rmargin;
192 				(*p->advance)(p, p->rmargin);
193 				vend += p->rmargin - p->offset;
194 			} else {
195 				p->viscol = 0;
196 				vbl = p->offset;
197 			}
198 
199 			/* Remove the p->overstep width. */
200 
201 			bp += (size_t)p->overstep;
202 			p->overstep = 0;
203 		}
204 
205 		/* Write out the [remaining] word. */
206 		for ( ; i < p->col; i++) {
207 			if (vend > bp && jhy > 0 && i > jhy)
208 				break;
209 			if ('\t' == p->buf[i])
210 				break;
211 			if (' ' == p->buf[i]) {
212 				j = i;
213 				while (' ' == p->buf[i])
214 					i++;
215 				dv = (size_t)(i - j) * (*p->width)(p, ' ');
216 				vbl += dv;
217 				vend += dv;
218 				break;
219 			}
220 			if (ASCII_NBRSP == p->buf[i]) {
221 				vbl += (*p->width)(p, ' ');
222 				continue;
223 			}
224 
225 			/*
226 			 * Now we definitely know there will be
227 			 * printable characters to output,
228 			 * so write preceding white space now.
229 			 */
230 			if (vbl) {
231 				(*p->advance)(p, vbl);
232 				p->viscol += vbl;
233 				vbl = 0;
234 			}
235 
236 			if (ASCII_HYPH == p->buf[i]) {
237 				(*p->letter)(p, '-');
238 				p->viscol += (*p->width)(p, '-');
239 			} else {
240 				(*p->letter)(p, p->buf[i]);
241 				p->viscol += (*p->width)(p, p->buf[i]);
242 			}
243 		}
244 		vis = vend;
245 	}
246 
247 	/*
248 	 * If there was trailing white space, it was not printed;
249 	 * so reset the cursor position accordingly.
250 	 */
251 	vis -= vbl;
252 
253 	p->col = 0;
254 	p->overstep = 0;
255 
256 	if ( ! (TERMP_NOBREAK & p->flags)) {
257 		p->viscol = 0;
258 		(*p->endline)(p);
259 		return;
260 	}
261 
262 	if (TERMP_HANG & p->flags) {
263 		/* We need one blank after the tag. */
264 		p->overstep = (int)(vis - maxvis + (*p->width)(p, ' '));
265 
266 		/*
267 		 * Behave exactly the same way as groff:
268 		 * If we have overstepped the margin, temporarily move
269 		 * it to the right and flag the rest of the line to be
270 		 * shorter.
271 		 * If we landed right at the margin, be happy.
272 		 * If we are one step before the margin, temporarily
273 		 * move it one step LEFT and flag the rest of the line
274 		 * to be longer.
275 		 */
276 		if (p->overstep >= -1) {
277 			assert((int)maxvis + p->overstep >= 0);
278 			maxvis += (size_t)p->overstep;
279 		} else
280 			p->overstep = 0;
281 
282 	} else if (TERMP_DANGLE & p->flags)
283 		return;
284 
285 	/* Right-pad. */
286 	if (maxvis > vis +
287 	    ((TERMP_TWOSPACE & p->flags) ? (*p->width)(p, ' ') : 0)) {
288 		p->viscol += maxvis - vis;
289 		(*p->advance)(p, maxvis - vis);
290 		vis += (maxvis - vis);
291 	} else {	/* ...or newline break. */
292 		(*p->endline)(p);
293 		p->viscol = p->rmargin;
294 		(*p->advance)(p, p->rmargin);
295 	}
296 }
297 
298 
299 /*
300  * A newline only breaks an existing line; it won't assert vertical
301  * space.  All data in the output buffer is flushed prior to the newline
302  * assertion.
303  */
304 void
305 term_newln(struct termp *p)
306 {
307 
308 	p->flags |= TERMP_NOSPACE;
309 	if (0 == p->col && 0 == p->viscol) {
310 		p->flags &= ~TERMP_NOLPAD;
311 		return;
312 	}
313 	term_flushln(p);
314 	p->flags &= ~TERMP_NOLPAD;
315 }
316 
317 
318 /*
319  * Asserts a vertical space (a full, empty line-break between lines).
320  * Note that if used twice, this will cause two blank spaces and so on.
321  * All data in the output buffer is flushed prior to the newline
322  * assertion.
323  */
324 void
325 term_vspace(struct termp *p)
326 {
327 
328 	term_newln(p);
329 	p->viscol = 0;
330 	(*p->endline)(p);
331 }
332 
333 void
334 term_fontlast(struct termp *p)
335 {
336 	enum termfont	 f;
337 
338 	f = p->fontl;
339 	p->fontl = p->fontq[p->fonti];
340 	p->fontq[p->fonti] = f;
341 }
342 
343 
344 void
345 term_fontrepl(struct termp *p, enum termfont f)
346 {
347 
348 	p->fontl = p->fontq[p->fonti];
349 	p->fontq[p->fonti] = f;
350 }
351 
352 
353 void
354 term_fontpush(struct termp *p, enum termfont f)
355 {
356 
357 	assert(p->fonti + 1 < 10);
358 	p->fontl = p->fontq[p->fonti];
359 	p->fontq[++p->fonti] = f;
360 }
361 
362 
363 const void *
364 term_fontq(struct termp *p)
365 {
366 
367 	return(&p->fontq[p->fonti]);
368 }
369 
370 
371 enum termfont
372 term_fonttop(struct termp *p)
373 {
374 
375 	return(p->fontq[p->fonti]);
376 }
377 
378 
379 void
380 term_fontpopq(struct termp *p, const void *key)
381 {
382 
383 	while (p->fonti >= 0 && key != &p->fontq[p->fonti])
384 		p->fonti--;
385 	assert(p->fonti >= 0);
386 }
387 
388 
389 void
390 term_fontpop(struct termp *p)
391 {
392 
393 	assert(p->fonti);
394 	p->fonti--;
395 }
396 
397 /*
398  * Handle pwords, partial words, which may be either a single word or a
399  * phrase that cannot be broken down (such as a literal string).  This
400  * handles word styling.
401  */
402 void
403 term_word(struct termp *p, const char *word)
404 {
405 	const char	*seq, *cp;
406 	char		 c;
407 	int		 sz, uc;
408 	size_t		 ssz;
409 	enum mandoc_esc	 esc;
410 
411 	if ( ! (TERMP_NOSPACE & p->flags)) {
412 		if ( ! (TERMP_KEEP & p->flags)) {
413 			if (TERMP_PREKEEP & p->flags)
414 				p->flags |= TERMP_KEEP;
415 			bufferc(p, ' ');
416 			if (TERMP_SENTENCE & p->flags)
417 				bufferc(p, ' ');
418 		} else
419 			bufferc(p, ASCII_NBRSP);
420 	}
421 
422 	if ( ! (p->flags & TERMP_NONOSPACE))
423 		p->flags &= ~TERMP_NOSPACE;
424 	else
425 		p->flags |= TERMP_NOSPACE;
426 
427 	p->flags &= ~(TERMP_SENTENCE | TERMP_IGNDELIM);
428 
429 	while ('\0' != *word) {
430 		if ((ssz = strcspn(word, "\\")) > 0)
431 			encode(p, word, ssz);
432 
433 		word += (int)ssz;
434 		if ('\\' != *word)
435 			continue;
436 
437 		word++;
438 		esc = mandoc_escape(&word, &seq, &sz);
439 		if (ESCAPE_ERROR == esc)
440 			break;
441 
442 		if (TERMENC_ASCII != p->enc)
443 			switch (esc) {
444 			case (ESCAPE_UNICODE):
445 				uc = mchars_num2uc(seq + 1, sz - 1);
446 				if ('\0' == uc)
447 					break;
448 				encode1(p, uc);
449 				continue;
450 			case (ESCAPE_SPECIAL):
451 				uc = mchars_spec2cp(p->symtab, seq, sz);
452 				if (uc <= 0)
453 					break;
454 				encode1(p, uc);
455 				continue;
456 			default:
457 				break;
458 			}
459 
460 		switch (esc) {
461 		case (ESCAPE_UNICODE):
462 			encode1(p, '?');
463 			break;
464 		case (ESCAPE_NUMBERED):
465 			c = mchars_num2char(seq, sz);
466 			if ('\0' != c)
467 				encode(p, &c, 1);
468 			break;
469 		case (ESCAPE_SPECIAL):
470 			cp = mchars_spec2str(p->symtab, seq, sz, &ssz);
471 			if (NULL != cp)
472 				encode(p, cp, ssz);
473 			else if (1 == ssz)
474 				encode(p, seq, sz);
475 			break;
476 		case (ESCAPE_FONTBOLD):
477 			term_fontrepl(p, TERMFONT_BOLD);
478 			break;
479 		case (ESCAPE_FONTITALIC):
480 			term_fontrepl(p, TERMFONT_UNDER);
481 			break;
482 		case (ESCAPE_FONT):
483 			/* FALLTHROUGH */
484 		case (ESCAPE_FONTROMAN):
485 			term_fontrepl(p, TERMFONT_NONE);
486 			break;
487 		case (ESCAPE_FONTPREV):
488 			term_fontlast(p);
489 			break;
490 		case (ESCAPE_NOSPACE):
491 			if ('\0' == *word)
492 				p->flags |= TERMP_NOSPACE;
493 			break;
494 		default:
495 			break;
496 		}
497 	}
498 }
499 
500 static void
501 adjbuf(struct termp *p, int sz)
502 {
503 
504 	if (0 == p->maxcols)
505 		p->maxcols = 1024;
506 	while (sz >= p->maxcols)
507 		p->maxcols <<= 2;
508 
509 	p->buf = mandoc_realloc
510 		(p->buf, sizeof(int) * (size_t)p->maxcols);
511 }
512 
513 static void
514 bufferc(struct termp *p, char c)
515 {
516 
517 	if (p->col + 1 >= p->maxcols)
518 		adjbuf(p, p->col + 1);
519 
520 	p->buf[p->col++] = c;
521 }
522 
523 /*
524  * See encode().
525  * Do this for a single (probably unicode) value.
526  * Does not check for non-decorated glyphs.
527  */
528 static void
529 encode1(struct termp *p, int c)
530 {
531 	enum termfont	  f;
532 
533 	if (p->col + 4 >= p->maxcols)
534 		adjbuf(p, p->col + 4);
535 
536 	f = term_fonttop(p);
537 
538 	if (TERMFONT_NONE == f) {
539 		p->buf[p->col++] = c;
540 		return;
541 	} else if (TERMFONT_UNDER == f) {
542 		p->buf[p->col++] = '_';
543 	} else
544 		p->buf[p->col++] = c;
545 
546 	p->buf[p->col++] = 8;
547 	p->buf[p->col++] = c;
548 }
549 
550 static void
551 encode(struct termp *p, const char *word, size_t sz)
552 {
553 	enum termfont	  f;
554 	int		  i, len;
555 
556 	/* LINTED */
557 	len = sz;
558 
559 	/*
560 	 * Encode and buffer a string of characters.  If the current
561 	 * font mode is unset, buffer directly, else encode then buffer
562 	 * character by character.
563 	 */
564 
565 	if (TERMFONT_NONE == (f = term_fonttop(p))) {
566 		if (p->col + len >= p->maxcols)
567 			adjbuf(p, p->col + len);
568 		for (i = 0; i < len; i++)
569 			p->buf[p->col++] = word[i];
570 		return;
571 	}
572 
573 	/* Pre-buffer, assuming worst-case. */
574 
575 	if (p->col + 1 + (len * 3) >= p->maxcols)
576 		adjbuf(p, p->col + 1 + (len * 3));
577 
578 	for (i = 0; i < len; i++) {
579 		if ( ! isgraph((unsigned char)word[i])) {
580 			p->buf[p->col++] = word[i];
581 			continue;
582 		}
583 
584 		if (TERMFONT_UNDER == f)
585 			p->buf[p->col++] = '_';
586 		else
587 			p->buf[p->col++] = word[i];
588 
589 		p->buf[p->col++] = 8;
590 		p->buf[p->col++] = word[i];
591 	}
592 }
593 
594 size_t
595 term_len(const struct termp *p, size_t sz)
596 {
597 
598 	return((*p->width)(p, ' ') * sz);
599 }
600 
601 
602 size_t
603 term_strlen(const struct termp *p, const char *cp)
604 {
605 	size_t		 sz, rsz, i;
606 	int		 ssz, c;
607 	const char	*seq, *rhs;
608 	enum mandoc_esc	 esc;
609 	static const char rej[] = { '\\', ASCII_HYPH, ASCII_NBRSP, '\0' };
610 
611 	/*
612 	 * Account for escaped sequences within string length
613 	 * calculations.  This follows the logic in term_word() as we
614 	 * must calculate the width of produced strings.
615 	 */
616 
617 	sz = 0;
618 	while ('\0' != *cp) {
619 		rsz = strcspn(cp, rej);
620 		for (i = 0; i < rsz; i++)
621 			sz += (*p->width)(p, *cp++);
622 
623 		c = 0;
624 		switch (*cp) {
625 		case ('\\'):
626 			cp++;
627 			esc = mandoc_escape(&cp, &seq, &ssz);
628 			if (ESCAPE_ERROR == esc)
629 				return(sz);
630 
631 			if (TERMENC_ASCII != p->enc)
632 				switch (esc) {
633 				case (ESCAPE_UNICODE):
634 					c = mchars_num2uc
635 						(seq + 1, ssz - 1);
636 					if ('\0' == c)
637 						break;
638 					sz += (*p->width)(p, c);
639 					continue;
640 				case (ESCAPE_SPECIAL):
641 					c = mchars_spec2cp
642 						(p->symtab, seq, ssz);
643 					if (c <= 0)
644 						break;
645 					sz += (*p->width)(p, c);
646 					continue;
647 				default:
648 					break;
649 				}
650 
651 			rhs = NULL;
652 
653 			switch (esc) {
654 			case (ESCAPE_UNICODE):
655 				sz += (*p->width)(p, '?');
656 				break;
657 			case (ESCAPE_NUMBERED):
658 				c = mchars_num2char(seq, ssz);
659 				if ('\0' != c)
660 					sz += (*p->width)(p, c);
661 				break;
662 			case (ESCAPE_SPECIAL):
663 				rhs = mchars_spec2str
664 					(p->symtab, seq, ssz, &rsz);
665 
666 				if (ssz != 1 || rhs)
667 					break;
668 
669 				rhs = seq;
670 				rsz = ssz;
671 				break;
672 			default:
673 				break;
674 			}
675 
676 			if (NULL == rhs)
677 				break;
678 
679 			for (i = 0; i < rsz; i++)
680 				sz += (*p->width)(p, *rhs++);
681 			break;
682 		case (ASCII_NBRSP):
683 			sz += (*p->width)(p, ' ');
684 			cp++;
685 			break;
686 		case (ASCII_HYPH):
687 			sz += (*p->width)(p, '-');
688 			cp++;
689 			break;
690 		default:
691 			break;
692 		}
693 	}
694 
695 	return(sz);
696 }
697 
698 /* ARGSUSED */
699 size_t
700 term_vspan(const struct termp *p, const struct roffsu *su)
701 {
702 	double		 r;
703 
704 	switch (su->unit) {
705 	case (SCALE_CM):
706 		r = su->scale * 2;
707 		break;
708 	case (SCALE_IN):
709 		r = su->scale * 6;
710 		break;
711 	case (SCALE_PC):
712 		r = su->scale;
713 		break;
714 	case (SCALE_PT):
715 		r = su->scale / 8;
716 		break;
717 	case (SCALE_MM):
718 		r = su->scale / 1000;
719 		break;
720 	case (SCALE_VS):
721 		r = su->scale;
722 		break;
723 	default:
724 		r = su->scale - 1;
725 		break;
726 	}
727 
728 	if (r < 0.0)
729 		r = 0.0;
730 	return(/* LINTED */(size_t)
731 			r);
732 }
733 
734 size_t
735 term_hspan(const struct termp *p, const struct roffsu *su)
736 {
737 	double		 v;
738 
739 	v = ((*p->hspan)(p, su));
740 	if (v < 0.0)
741 		v = 0.0;
742 	return((size_t) /* LINTED */
743 			v);
744 }
745