xref: /openbsd/usr.bin/mandoc/term.c (revision d9a51c35)
1 /* $OpenBSD: term.c,v 1.151 2022/12/26 19:16:02 jmc Exp $ */
2 /*
3  * Copyright (c) 2010-2022 Ingo Schwarze <schwarze@openbsd.org>
4  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include <sys/types.h>
19 
20 #include <assert.h>
21 #include <ctype.h>
22 #include <stdint.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 
27 #include "mandoc.h"
28 #include "mandoc_aux.h"
29 #include "out.h"
30 #include "term.h"
31 #include "main.h"
32 
33 static	size_t		 cond_width(const struct termp *, int, int *);
34 static	void		 adjbuf(struct termp_col *, size_t);
35 static	void		 bufferc(struct termp *, char);
36 static	void		 encode(struct termp *, const char *, size_t);
37 static	void		 encode1(struct termp *, int);
38 static	void		 endline(struct termp *);
39 static	void		 term_field(struct termp *, size_t, size_t);
40 static	void		 term_fill(struct termp *, size_t *, size_t *,
41 				size_t);
42 
43 
44 void
term_setcol(struct termp * p,size_t maxtcol)45 term_setcol(struct termp *p, size_t maxtcol)
46 {
47 	if (maxtcol > p->maxtcol) {
48 		p->tcols = mandoc_recallocarray(p->tcols,
49 		    p->maxtcol, maxtcol, sizeof(*p->tcols));
50 		p->maxtcol = maxtcol;
51 	}
52 	p->lasttcol = maxtcol - 1;
53 	p->tcol = p->tcols;
54 }
55 
56 void
term_free(struct termp * p)57 term_free(struct termp *p)
58 {
59 	term_tab_free();
60 	for (p->tcol = p->tcols; p->tcol < p->tcols + p->maxtcol; p->tcol++)
61 		free(p->tcol->buf);
62 	free(p->tcols);
63 	free(p->fontq);
64 	free(p);
65 }
66 
67 void
term_begin(struct termp * p,term_margin head,term_margin foot,const struct roff_meta * arg)68 term_begin(struct termp *p, term_margin head,
69 		term_margin foot, const struct roff_meta *arg)
70 {
71 
72 	p->headf = head;
73 	p->footf = foot;
74 	p->argf = arg;
75 	(*p->begin)(p);
76 }
77 
78 void
term_end(struct termp * p)79 term_end(struct termp *p)
80 {
81 
82 	(*p->end)(p);
83 }
84 
85 /*
86  * Flush a chunk of text.  By default, break the output line each time
87  * the right margin is reached, and continue output on the next line
88  * at the same offset as the chunk itself.  By default, also break the
89  * output line at the end of the chunk.  There are many flags modifying
90  * this behaviour, see the comments in the body of the function.
91  */
92 void
term_flushln(struct termp * p)93 term_flushln(struct termp *p)
94 {
95 	size_t	 vbl;      /* Number of blanks to prepend to the output. */
96 	size_t	 vbr;      /* Actual visual position of the end of field. */
97 	size_t	 vfield;   /* Desired visual field width. */
98 	size_t	 vtarget;  /* Desired visual position of the right margin. */
99 	size_t	 ic;       /* Character position in the input buffer. */
100 	size_t	 nbr;      /* Number of characters to print in this field. */
101 
102 	/*
103 	 * Normally, start writing at the left margin, but with the
104 	 * NOPAD flag, start writing at the current position instead.
105 	 */
106 
107 	vbl = (p->flags & TERMP_NOPAD) || p->tcol->offset < p->viscol ?
108 	    0 : p->tcol->offset - p->viscol;
109 	if (p->minbl && vbl < p->minbl)
110 		vbl = p->minbl;
111 
112 	if ((p->flags & TERMP_MULTICOL) == 0)
113 		p->tcol->col = 0;
114 
115 	/* Loop over output lines. */
116 
117 	for (;;) {
118 		vfield = p->tcol->rmargin > p->viscol + vbl ?
119 		    p->tcol->rmargin - p->viscol - vbl : 0;
120 
121 		/*
122 		 * Normally, break the line at the the right margin
123 		 * of the field, but with the NOBREAK flag, only
124 		 * break it at the max right margin of the screen,
125 		 * and with the BRNEVER flag, never break it at all.
126 		 */
127 
128 		vtarget = (p->flags & TERMP_NOBREAK) == 0 ? vfield :
129 		    p->maxrmargin > p->viscol + vbl ?
130 		    p->maxrmargin - p->viscol - vbl : 0;
131 
132 		/*
133 		 * Figure out how much text will fit in the field.
134 		 * If there is whitespace only, print nothing.
135 		 */
136 
137 		term_fill(p, &nbr, &vbr,
138 		    p->flags & TERMP_BRNEVER ? SIZE_MAX : vtarget);
139 		if (nbr == 0)
140 			break;
141 
142 		/*
143 		 * With the CENTER or RIGHT flag, increase the indentation
144 		 * to center the text between the left and right margins
145 		 * or to adjust it to the right margin, respectively.
146 		 */
147 
148 		if (vbr < vtarget) {
149 			if (p->flags & TERMP_CENTER)
150 				vbl += (vtarget - vbr) / 2;
151 			else if (p->flags & TERMP_RIGHT)
152 				vbl += vtarget - vbr;
153 		}
154 
155 		/* Finally, print the field content. */
156 
157 		term_field(p, vbl, nbr);
158 		if (vbr < vtarget)
159 			p->tcol->taboff += vbr;
160 		else
161 			p->tcol->taboff += vtarget;
162 		p->tcol->taboff += (*p->width)(p, ' ');
163 
164 		/*
165 		 * If there is no text left in the field, exit the loop.
166 		 * If the BRTRSP flag is set, consider trailing
167 		 * whitespace significant when deciding whether
168 		 * the field fits or not.
169 		 */
170 
171 		for (ic = p->tcol->col; ic < p->tcol->lastcol; ic++) {
172 			switch (p->tcol->buf[ic]) {
173 			case '\t':
174 				if (p->flags & TERMP_BRTRSP)
175 					vbr = term_tab_next(vbr);
176 				continue;
177 			case ' ':
178 				if (p->flags & TERMP_BRTRSP)
179 					vbr += (*p->width)(p, ' ');
180 				continue;
181 			case '\n':
182 			case ASCII_NBRZW:
183 			case ASCII_BREAK:
184 			case ASCII_TABREF:
185 				continue;
186 			default:
187 				break;
188 			}
189 			break;
190 		}
191 		if (ic == p->tcol->lastcol)
192 			break;
193 
194 		/*
195 		 * At the location of an automatic line break, input
196 		 * space characters are consumed by the line break.
197 		 */
198 
199 		while (p->tcol->col < p->tcol->lastcol &&
200 		    p->tcol->buf[p->tcol->col] == ' ')
201 			p->tcol->col++;
202 
203 		/*
204 		 * In multi-column mode, leave the rest of the text
205 		 * in the buffer to be handled by a subsequent
206 		 * invocation, such that the other columns of the
207 		 * table can be handled first.
208 		 * In single-column mode, simply break the line.
209 		 */
210 
211 		if (p->flags & TERMP_MULTICOL)
212 			return;
213 
214 		endline(p);
215 
216 		/*
217 		 * Normally, start the next line at the same indentation
218 		 * as this one, but with the BRIND flag, start it at the
219 		 * right margin instead.  This is used together with
220 		 * NOBREAK for the tags in various kinds of tagged lists.
221 		 */
222 
223 		vbl = p->flags & TERMP_BRIND ?
224 		    p->tcol->rmargin : p->tcol->offset;
225 	}
226 
227 	/* Reset output state in preparation for the next field. */
228 
229 	p->col = p->tcol->col = p->tcol->lastcol = 0;
230 	p->minbl = p->trailspace;
231 	p->flags &= ~(TERMP_BACKAFTER | TERMP_BACKBEFORE | TERMP_NOPAD);
232 
233 	if (p->flags & TERMP_MULTICOL)
234 		return;
235 
236 	/*
237 	 * The HANG flag means that the next field
238 	 * always follows on the same line.
239 	 * The NOBREAK flag means that the next field
240 	 * follows on the same line unless the field was overrun.
241 	 * Normally, break the line at the end of each field.
242 	 */
243 
244 	if ((p->flags & TERMP_HANG) == 0 &&
245 	    ((p->flags & TERMP_NOBREAK) == 0 ||
246 	     vbr + term_len(p, p->trailspace) > vfield))
247 		endline(p);
248 }
249 
250 /*
251  * Store the number of input characters to print in this field in *nbr
252  * and their total visual width to print in *vbr.
253  * If there is only whitespace in the field, both remain zero.
254  * The desired visual width of the field is provided by vtarget.
255  * If the first word is longer, the field will be overrun.
256  */
257 static void
term_fill(struct termp * p,size_t * nbr,size_t * vbr,size_t vtarget)258 term_fill(struct termp *p, size_t *nbr, size_t *vbr, size_t vtarget)
259 {
260 	size_t	 ic;        /* Character position in the input buffer. */
261 	size_t	 vis;       /* Visual position of the current character. */
262 	size_t	 vn;        /* Visual position of the next character. */
263 	int	 breakline; /* Break at the end of this word. */
264 	int	 graph;     /* Last character was non-blank. */
265 	int	 taboff;    /* Temporary offset for literal tabs. */
266 
267 	*nbr = *vbr = vis = 0;
268 	breakline = graph = 0;
269 	taboff = p->tcol->taboff;
270 	for (ic = p->tcol->col; ic < p->tcol->lastcol; ic++) {
271 		switch (p->tcol->buf[ic]) {
272 		case '\b':  /* Escape \o (overstrike) or backspace markup. */
273 			assert(ic > 0);
274 			vis -= (*p->width)(p, p->tcol->buf[ic - 1]);
275 			continue;
276 
277 		case ' ':
278 		case ASCII_BREAK:  /* Escape \: (breakpoint). */
279 			vn = vis;
280 			if (p->tcol->buf[ic] == ' ')
281 				vn += (*p->width)(p, ' ');
282 			/* Can break at the end of a word. */
283 			if (breakline || vn > vtarget)
284 				break;
285 			if (graph) {
286 				*nbr = ic;
287 				*vbr = vis;
288 				graph = 0;
289 			}
290 			vis = vn;
291 			continue;
292 
293 		case '\n':  /* Escape \p (break at the end of the word). */
294 			breakline = 1;
295 			continue;
296 
297 		case ASCII_HYPH:  /* Breakable hyphen. */
298 			graph = 1;
299 			/*
300 			 * We are about to decide whether to break the
301 			 * line or not, so we no longer need this hyphen
302 			 * to be marked as breakable.  Put back a real
303 			 * hyphen such that we get the correct width.
304 			 */
305 			p->tcol->buf[ic] = '-';
306 			vis += (*p->width)(p, '-');
307 			if (vis > vtarget) {
308 				ic++;
309 				break;
310 			}
311 			*nbr = ic + 1;
312 			*vbr = vis;
313 			continue;
314 
315 		case ASCII_TABREF:
316 			taboff = -vis - (*p->width)(p, ' ');
317 			continue;
318 
319 		default:
320 			switch (p->tcol->buf[ic]) {
321 			case '\t':
322 				if (taboff < 0 && (size_t)-taboff > vis)
323 					vis = 0;
324 				else
325 					vis += taboff;
326 				vis = term_tab_next(vis);
327 				vis -= taboff;
328 				break;
329 			case ASCII_NBRZW:  /* Non-breakable zero-width. */
330 				break;
331 			case ASCII_NBRSP:  /* Non-breakable space. */
332 				p->tcol->buf[ic] = ' ';
333 				/* FALLTHROUGH */
334 			default:  /* Printable character. */
335 				vis += (*p->width)(p, p->tcol->buf[ic]);
336 				break;
337 			}
338 			graph = 1;
339 			if (vis > vtarget && *nbr > 0)
340 				return;
341 			continue;
342 		}
343 		break;
344 	}
345 
346 	/*
347 	 * If the last word extends to the end of the field without any
348 	 * trailing whitespace, the loop could not check yet whether it
349 	 * can remain on this line.  So do the check now.
350 	 */
351 
352 	if (graph && (vis <= vtarget || *nbr == 0)) {
353 		*nbr = ic;
354 		*vbr = vis;
355 	}
356 }
357 
358 /*
359  * Print the contents of one field
360  * with an indentation of	 vbl	  visual columns,
361  * and an input string length of nbr	  characters.
362  */
363 static void
term_field(struct termp * p,size_t vbl,size_t nbr)364 term_field(struct termp *p, size_t vbl, size_t nbr)
365 {
366 	size_t	 ic;	/* Character position in the input buffer. */
367 	size_t	 vis;	/* Visual position of the current character. */
368 	size_t	 vt;	/* Visual position including tab offset. */
369 	size_t	 dv;	/* Visual width of the current character. */
370 	int	 taboff; /* Temporary offset for literal tabs. */
371 
372 	vis = 0;
373 	taboff = p->tcol->taboff;
374 	for (ic = p->tcol->col; ic < nbr; ic++) {
375 
376 		/*
377 		 * To avoid the printing of trailing whitespace,
378 		 * do not print whitespace right away, only count it.
379 		 */
380 
381 		switch (p->tcol->buf[ic]) {
382 		case '\n':
383 		case ASCII_BREAK:
384 		case ASCII_NBRZW:
385 			continue;
386 		case ASCII_TABREF:
387 			taboff = -vis - (*p->width)(p, ' ');
388 			continue;
389 		case '\t':
390 		case ' ':
391 		case ASCII_NBRSP:
392 			if (p->tcol->buf[ic] == '\t') {
393 				if (taboff < 0 && (size_t)-taboff > vis)
394 					vt = 0;
395 				else
396 					vt = vis + taboff;
397 				dv = term_tab_next(vt) - vt;
398 			} else
399 				dv = (*p->width)(p, ' ');
400 			vbl += dv;
401 			vis += dv;
402 			continue;
403 		default:
404 			break;
405 		}
406 
407 		/*
408 		 * We found a non-blank character to print,
409 		 * so write preceding white space now.
410 		 */
411 
412 		if (vbl > 0) {
413 			(*p->advance)(p, vbl);
414 			p->viscol += vbl;
415 			vbl = 0;
416 		}
417 
418 		/* Print the character and adjust the visual position. */
419 
420 		(*p->letter)(p, p->tcol->buf[ic]);
421 		if (p->tcol->buf[ic] == '\b') {
422 			dv = (*p->width)(p, p->tcol->buf[ic - 1]);
423 			p->viscol -= dv;
424 			vis -= dv;
425 		} else {
426 			dv = (*p->width)(p, p->tcol->buf[ic]);
427 			p->viscol += dv;
428 			vis += dv;
429 		}
430 	}
431 	p->tcol->col = nbr;
432 }
433 
434 static void
endline(struct termp * p)435 endline(struct termp *p)
436 {
437 	if ((p->flags & (TERMP_NEWMC | TERMP_ENDMC)) == TERMP_ENDMC) {
438 		p->mc = NULL;
439 		p->flags &= ~TERMP_ENDMC;
440 	}
441 	if (p->mc != NULL) {
442 		if (p->viscol && p->maxrmargin >= p->viscol)
443 			(*p->advance)(p, p->maxrmargin - p->viscol + 1);
444 		p->flags |= TERMP_NOBUF | TERMP_NOSPACE;
445 		term_word(p, p->mc);
446 		p->flags &= ~(TERMP_NOBUF | TERMP_NEWMC);
447 	}
448 	p->viscol = 0;
449 	p->minbl = 0;
450 	(*p->endline)(p);
451 }
452 
453 /*
454  * A newline only breaks an existing line; it won't assert vertical
455  * space.  All data in the output buffer is flushed prior to the newline
456  * assertion.
457  */
458 void
term_newln(struct termp * p)459 term_newln(struct termp *p)
460 {
461 	p->flags |= TERMP_NOSPACE;
462 	if (p->tcol->lastcol || p->viscol)
463 		term_flushln(p);
464 	p->tcol->taboff = 0;
465 }
466 
467 /*
468  * Asserts a vertical space (a full, empty line-break between lines).
469  * Note that if used twice, this will cause two blank spaces and so on.
470  * All data in the output buffer is flushed prior to the newline
471  * assertion.
472  */
473 void
term_vspace(struct termp * p)474 term_vspace(struct termp *p)
475 {
476 
477 	term_newln(p);
478 	p->viscol = 0;
479 	p->minbl = 0;
480 	if (0 < p->skipvsp)
481 		p->skipvsp--;
482 	else
483 		(*p->endline)(p);
484 }
485 
486 /* Swap current and previous font; for \fP and .ft P */
487 void
term_fontlast(struct termp * p)488 term_fontlast(struct termp *p)
489 {
490 	enum termfont	 f;
491 
492 	f = p->fontl;
493 	p->fontl = p->fontq[p->fonti];
494 	p->fontq[p->fonti] = f;
495 }
496 
497 /* Set font, save current, discard previous; for \f, .ft, .B etc. */
498 void
term_fontrepl(struct termp * p,enum termfont f)499 term_fontrepl(struct termp *p, enum termfont f)
500 {
501 
502 	p->fontl = p->fontq[p->fonti];
503 	p->fontq[p->fonti] = f;
504 }
505 
506 /* Set font, save previous. */
507 void
term_fontpush(struct termp * p,enum termfont f)508 term_fontpush(struct termp *p, enum termfont f)
509 {
510 
511 	p->fontl = p->fontq[p->fonti];
512 	if (++p->fonti == p->fontsz) {
513 		p->fontsz += 8;
514 		p->fontq = mandoc_reallocarray(p->fontq,
515 		    p->fontsz, sizeof(*p->fontq));
516 	}
517 	p->fontq[p->fonti] = f;
518 }
519 
520 /* Flush to make the saved pointer current again. */
521 void
term_fontpopq(struct termp * p,int i)522 term_fontpopq(struct termp *p, int i)
523 {
524 
525 	assert(i >= 0);
526 	if (p->fonti > i)
527 		p->fonti = i;
528 }
529 
530 /* Pop one font off the stack. */
531 void
term_fontpop(struct termp * p)532 term_fontpop(struct termp *p)
533 {
534 
535 	assert(p->fonti);
536 	p->fonti--;
537 }
538 
539 /*
540  * Handle pwords, partial words, which may be either a single word or a
541  * phrase that cannot be broken down (such as a literal string).  This
542  * handles word styling.
543  */
544 void
term_word(struct termp * p,const char * word)545 term_word(struct termp *p, const char *word)
546 {
547 	struct roffsu	 su;
548 	const char	 nbrsp[2] = { ASCII_NBRSP, 0 };
549 	const char	*seq, *cp;
550 	int		 sz, uc;
551 	size_t		 csz, lsz, ssz;
552 	enum mandoc_esc	 esc;
553 
554 	if ((p->flags & TERMP_NOBUF) == 0) {
555 		if ((p->flags & TERMP_NOSPACE) == 0) {
556 			if ((p->flags & TERMP_KEEP) == 0) {
557 				bufferc(p, ' ');
558 				if (p->flags & TERMP_SENTENCE)
559 					bufferc(p, ' ');
560 			} else
561 				bufferc(p, ASCII_NBRSP);
562 		}
563 		if (p->flags & TERMP_PREKEEP)
564 			p->flags |= TERMP_KEEP;
565 		if (p->flags & TERMP_NONOSPACE)
566 			p->flags |= TERMP_NOSPACE;
567 		else
568 			p->flags &= ~TERMP_NOSPACE;
569 		p->flags &= ~(TERMP_SENTENCE | TERMP_NONEWLINE);
570 		p->skipvsp = 0;
571 	}
572 
573 	while ('\0' != *word) {
574 		if ('\\' != *word) {
575 			if (TERMP_NBRWORD & p->flags) {
576 				if (' ' == *word) {
577 					encode(p, nbrsp, 1);
578 					word++;
579 					continue;
580 				}
581 				ssz = strcspn(word, "\\ ");
582 			} else
583 				ssz = strcspn(word, "\\");
584 			encode(p, word, ssz);
585 			word += (int)ssz;
586 			continue;
587 		}
588 
589 		word++;
590 		esc = mandoc_escape(&word, &seq, &sz);
591 		switch (esc) {
592 		case ESCAPE_UNICODE:
593 			uc = mchars_num2uc(seq + 1, sz - 1);
594 			break;
595 		case ESCAPE_NUMBERED:
596 			uc = mchars_num2char(seq, sz);
597 			if (uc >= 0)
598 				break;
599 			bufferc(p, ASCII_NBRZW);
600 			continue;
601 		case ESCAPE_SPECIAL:
602 			if (p->enc == TERMENC_ASCII) {
603 				cp = mchars_spec2str(seq, sz, &ssz);
604 				if (cp != NULL)
605 					encode(p, cp, ssz);
606 				else
607 					bufferc(p, ASCII_NBRZW);
608 			} else {
609 				uc = mchars_spec2cp(seq, sz);
610 				if (uc > 0)
611 					encode1(p, uc);
612 				else
613 					bufferc(p, ASCII_NBRZW);
614 			}
615 			continue;
616 		case ESCAPE_UNDEF:
617 			uc = *seq;
618 			break;
619 		case ESCAPE_FONTBOLD:
620 		case ESCAPE_FONTCB:
621 			term_fontrepl(p, TERMFONT_BOLD);
622 			continue;
623 		case ESCAPE_FONTITALIC:
624 		case ESCAPE_FONTCI:
625 			term_fontrepl(p, TERMFONT_UNDER);
626 			continue;
627 		case ESCAPE_FONTBI:
628 			term_fontrepl(p, TERMFONT_BI);
629 			continue;
630 		case ESCAPE_FONT:
631 		case ESCAPE_FONTCR:
632 		case ESCAPE_FONTROMAN:
633 			term_fontrepl(p, TERMFONT_NONE);
634 			continue;
635 		case ESCAPE_FONTPREV:
636 			term_fontlast(p);
637 			continue;
638 		case ESCAPE_BREAK:
639 			bufferc(p, '\n');
640 			continue;
641 		case ESCAPE_NOSPACE:
642 			if (p->flags & TERMP_BACKAFTER)
643 				p->flags &= ~TERMP_BACKAFTER;
644 			else if (*word == '\0')
645 				p->flags |= (TERMP_NOSPACE | TERMP_NONEWLINE);
646 			continue;
647 		case ESCAPE_DEVICE:
648 			if (p->type == TERMTYPE_PDF)
649 				encode(p, "pdf", 3);
650 			else if (p->type == TERMTYPE_PS)
651 				encode(p, "ps", 2);
652 			else if (p->enc == TERMENC_ASCII)
653 				encode(p, "ascii", 5);
654 			else
655 				encode(p, "utf8", 4);
656 			continue;
657 		case ESCAPE_HORIZ:
658 			if (p->flags & TERMP_BACKAFTER) {
659 				p->flags &= ~TERMP_BACKAFTER;
660 				continue;
661 			}
662 			if (*seq == '|') {
663 				seq++;
664 				uc = -p->col;
665 			} else
666 				uc = 0;
667 			if (a2roffsu(seq, &su, SCALE_EM) == NULL)
668 				continue;
669 			uc += term_hen(p, &su);
670 			if (uc >= 0) {
671 				while (uc > 0) {
672 					uc -= term_len(p, 1);
673 					if (p->flags & TERMP_BACKBEFORE)
674 						p->flags &= ~TERMP_BACKBEFORE;
675 					else
676 						bufferc(p, ASCII_NBRSP);
677 				}
678 				continue;
679 			}
680 			if (p->flags & TERMP_BACKBEFORE) {
681 				p->flags &= ~TERMP_BACKBEFORE;
682 				assert(p->col > 0);
683 				p->col--;
684 			}
685 			if (p->col >= (size_t)(-uc)) {
686 				p->col += uc;
687 			} else {
688 				uc += p->col;
689 				p->col = 0;
690 				if (p->tcol->offset > (size_t)(-uc)) {
691 					p->ti += uc;
692 					p->tcol->offset += uc;
693 				} else {
694 					p->ti -= p->tcol->offset;
695 					p->tcol->offset = 0;
696 				}
697 			}
698 			continue;
699 		case ESCAPE_HLINE:
700 			if ((cp = a2roffsu(seq, &su, SCALE_EM)) == NULL)
701 				continue;
702 			uc = term_hen(p, &su);
703 			if (uc <= 0) {
704 				if (p->tcol->rmargin <= p->tcol->offset)
705 					continue;
706 				lsz = p->tcol->rmargin - p->tcol->offset;
707 			} else
708 				lsz = uc;
709 			if (*cp == seq[-1])
710 				uc = -1;
711 			else if (*cp == '\\') {
712 				seq = cp + 1;
713 				esc = mandoc_escape(&seq, &cp, &sz);
714 				switch (esc) {
715 				case ESCAPE_UNICODE:
716 					uc = mchars_num2uc(cp + 1, sz - 1);
717 					break;
718 				case ESCAPE_NUMBERED:
719 					uc = mchars_num2char(cp, sz);
720 					break;
721 				case ESCAPE_SPECIAL:
722 					uc = mchars_spec2cp(cp, sz);
723 					break;
724 				case ESCAPE_UNDEF:
725 					uc = *seq;
726 					break;
727 				default:
728 					uc = -1;
729 					break;
730 				}
731 			} else
732 				uc = *cp;
733 			if (uc < 0x20 || (uc > 0x7E && uc < 0xA0))
734 				uc = '_';
735 			if (p->enc == TERMENC_ASCII) {
736 				cp = ascii_uc2str(uc);
737 				csz = term_strlen(p, cp);
738 				ssz = strlen(cp);
739 			} else
740 				csz = (*p->width)(p, uc);
741 			while (lsz >= csz) {
742 				if (p->enc == TERMENC_ASCII)
743 					encode(p, cp, ssz);
744 				else
745 					encode1(p, uc);
746 				lsz -= csz;
747 			}
748 			continue;
749 		case ESCAPE_SKIPCHAR:
750 			p->flags |= TERMP_BACKAFTER;
751 			continue;
752 		case ESCAPE_OVERSTRIKE:
753 			cp = seq + sz;
754 			while (seq < cp) {
755 				if (*seq == '\\') {
756 					mandoc_escape(&seq, NULL, NULL);
757 					continue;
758 				}
759 				encode1(p, *seq++);
760 				if (seq < cp) {
761 					if (p->flags & TERMP_BACKBEFORE)
762 						p->flags |= TERMP_BACKAFTER;
763 					else
764 						p->flags |= TERMP_BACKBEFORE;
765 				}
766 			}
767 			/* Trim trailing backspace/blank pair. */
768 			if (p->tcol->lastcol > 2 &&
769 			    (p->tcol->buf[p->tcol->lastcol - 1] == ' ' ||
770 			     p->tcol->buf[p->tcol->lastcol - 1] == '\t'))
771 				p->tcol->lastcol -= 2;
772 			if (p->col > p->tcol->lastcol)
773 				p->col = p->tcol->lastcol;
774 			continue;
775 		case ESCAPE_IGNORE:
776 			bufferc(p, ASCII_NBRZW);
777 			continue;
778 		default:
779 			continue;
780 		}
781 
782 		/*
783 		 * Common handling for Unicode and numbered
784 		 * character escape sequences.
785 		 */
786 
787 		if (p->enc == TERMENC_ASCII) {
788 			cp = ascii_uc2str(uc);
789 			encode(p, cp, strlen(cp));
790 		} else {
791 			if ((uc < 0x20 && uc != 0x09) ||
792 			    (uc > 0x7E && uc < 0xA0))
793 				uc = 0xFFFD;
794 			encode1(p, uc);
795 		}
796 	}
797 	p->flags &= ~TERMP_NBRWORD;
798 }
799 
800 static void
adjbuf(struct termp_col * c,size_t sz)801 adjbuf(struct termp_col *c, size_t sz)
802 {
803 	if (c->maxcols == 0)
804 		c->maxcols = 1024;
805 	while (c->maxcols <= sz)
806 		c->maxcols <<= 2;
807 	c->buf = mandoc_reallocarray(c->buf, c->maxcols, sizeof(*c->buf));
808 }
809 
810 static void
bufferc(struct termp * p,char c)811 bufferc(struct termp *p, char c)
812 {
813 	if (p->flags & TERMP_NOBUF) {
814 		(*p->letter)(p, c);
815 		return;
816 	}
817 	if (p->col + 1 >= p->tcol->maxcols)
818 		adjbuf(p->tcol, p->col + 1);
819 	if (p->tcol->lastcol <= p->col || (c != ' ' && c != ASCII_NBRSP))
820 		p->tcol->buf[p->col] = c;
821 	if (p->tcol->lastcol < ++p->col)
822 		p->tcol->lastcol = p->col;
823 }
824 
825 void
term_tab_ref(struct termp * p)826 term_tab_ref(struct termp *p)
827 {
828 	if (p->tcol->lastcol && p->tcol->lastcol <= p->col &&
829 	    (p->flags & TERMP_NOBUF) == 0)
830 		bufferc(p, ASCII_TABREF);
831 }
832 
833 /*
834  * See encode().
835  * Do this for a single (probably unicode) value.
836  * Does not check for non-decorated glyphs.
837  */
838 static void
encode1(struct termp * p,int c)839 encode1(struct termp *p, int c)
840 {
841 	enum termfont	  f;
842 
843 	if (p->flags & TERMP_NOBUF) {
844 		(*p->letter)(p, c);
845 		return;
846 	}
847 
848 	if (p->col + 7 >= p->tcol->maxcols)
849 		adjbuf(p->tcol, p->col + 7);
850 
851 	f = (c == ASCII_HYPH || c > 127 || isgraph(c)) ?
852 	    p->fontq[p->fonti] : TERMFONT_NONE;
853 
854 	if (p->flags & TERMP_BACKBEFORE) {
855 		if (p->tcol->buf[p->col - 1] == ' ' ||
856 		    p->tcol->buf[p->col - 1] == '\t')
857 			p->col--;
858 		else
859 			p->tcol->buf[p->col++] = '\b';
860 		p->flags &= ~TERMP_BACKBEFORE;
861 	}
862 	if (f == TERMFONT_UNDER || f == TERMFONT_BI) {
863 		p->tcol->buf[p->col++] = '_';
864 		p->tcol->buf[p->col++] = '\b';
865 	}
866 	if (f == TERMFONT_BOLD || f == TERMFONT_BI) {
867 		if (c == ASCII_HYPH)
868 			p->tcol->buf[p->col++] = '-';
869 		else
870 			p->tcol->buf[p->col++] = c;
871 		p->tcol->buf[p->col++] = '\b';
872 	}
873 	if (p->tcol->lastcol <= p->col || (c != ' ' && c != ASCII_NBRSP))
874 		p->tcol->buf[p->col] = c;
875 	if (p->tcol->lastcol < ++p->col)
876 		p->tcol->lastcol = p->col;
877 	if (p->flags & TERMP_BACKAFTER) {
878 		p->flags |= TERMP_BACKBEFORE;
879 		p->flags &= ~TERMP_BACKAFTER;
880 	}
881 }
882 
883 static void
encode(struct termp * p,const char * word,size_t sz)884 encode(struct termp *p, const char *word, size_t sz)
885 {
886 	size_t		  i;
887 
888 	if (p->flags & TERMP_NOBUF) {
889 		for (i = 0; i < sz; i++)
890 			(*p->letter)(p, word[i]);
891 		return;
892 	}
893 
894 	if (p->col + 2 + (sz * 5) >= p->tcol->maxcols)
895 		adjbuf(p->tcol, p->col + 2 + (sz * 5));
896 
897 	for (i = 0; i < sz; i++) {
898 		if (ASCII_HYPH == word[i] ||
899 		    isgraph((unsigned char)word[i]))
900 			encode1(p, word[i]);
901 		else {
902 			if (p->tcol->lastcol <= p->col ||
903 			    (word[i] != ' ' && word[i] != ASCII_NBRSP))
904 				p->tcol->buf[p->col] = word[i];
905 			p->col++;
906 
907 			/*
908 			 * Postpone the effect of \z while handling
909 			 * an overstrike sequence from ascii_uc2str().
910 			 */
911 
912 			if (word[i] == '\b' &&
913 			    (p->flags & TERMP_BACKBEFORE)) {
914 				p->flags &= ~TERMP_BACKBEFORE;
915 				p->flags |= TERMP_BACKAFTER;
916 			}
917 		}
918 	}
919 	if (p->tcol->lastcol < p->col)
920 		p->tcol->lastcol = p->col;
921 }
922 
923 void
term_setwidth(struct termp * p,const char * wstr)924 term_setwidth(struct termp *p, const char *wstr)
925 {
926 	struct roffsu	 su;
927 	int		 iop, width;
928 
929 	iop = 0;
930 	width = 0;
931 	if (NULL != wstr) {
932 		switch (*wstr) {
933 		case '+':
934 			iop = 1;
935 			wstr++;
936 			break;
937 		case '-':
938 			iop = -1;
939 			wstr++;
940 			break;
941 		default:
942 			break;
943 		}
944 		if (a2roffsu(wstr, &su, SCALE_MAX) != NULL)
945 			width = term_hspan(p, &su);
946 		else
947 			iop = 0;
948 	}
949 	(*p->setwidth)(p, iop, width);
950 }
951 
952 size_t
term_len(const struct termp * p,size_t sz)953 term_len(const struct termp *p, size_t sz)
954 {
955 
956 	return (*p->width)(p, ' ') * sz;
957 }
958 
959 static size_t
cond_width(const struct termp * p,int c,int * skip)960 cond_width(const struct termp *p, int c, int *skip)
961 {
962 
963 	if (*skip) {
964 		(*skip) = 0;
965 		return 0;
966 	} else
967 		return (*p->width)(p, c);
968 }
969 
970 size_t
term_strlen(const struct termp * p,const char * cp)971 term_strlen(const struct termp *p, const char *cp)
972 {
973 	size_t		 sz, rsz, i;
974 	int		 ssz, skip, uc;
975 	const char	*seq, *rhs;
976 	enum mandoc_esc	 esc;
977 	static const char rej[] = { '\\', ASCII_NBRSP, ASCII_NBRZW,
978 		ASCII_BREAK, ASCII_HYPH, ASCII_TABREF, '\0' };
979 
980 	/*
981 	 * Account for escaped sequences within string length
982 	 * calculations.  This follows the logic in term_word() as we
983 	 * must calculate the width of produced strings.
984 	 */
985 
986 	sz = 0;
987 	skip = 0;
988 	while ('\0' != *cp) {
989 		rsz = strcspn(cp, rej);
990 		for (i = 0; i < rsz; i++)
991 			sz += cond_width(p, *cp++, &skip);
992 
993 		switch (*cp) {
994 		case '\\':
995 			cp++;
996 			rhs = NULL;
997 			esc = mandoc_escape(&cp, &seq, &ssz);
998 			switch (esc) {
999 			case ESCAPE_UNICODE:
1000 				uc = mchars_num2uc(seq + 1, ssz - 1);
1001 				break;
1002 			case ESCAPE_NUMBERED:
1003 				uc = mchars_num2char(seq, ssz);
1004 				if (uc < 0)
1005 					continue;
1006 				break;
1007 			case ESCAPE_SPECIAL:
1008 				if (p->enc == TERMENC_ASCII) {
1009 					rhs = mchars_spec2str(seq, ssz, &rsz);
1010 					if (rhs != NULL)
1011 						break;
1012 				} else {
1013 					uc = mchars_spec2cp(seq, ssz);
1014 					if (uc > 0)
1015 						sz += cond_width(p, uc, &skip);
1016 				}
1017 				continue;
1018 			case ESCAPE_UNDEF:
1019 				uc = *seq;
1020 				break;
1021 			case ESCAPE_DEVICE:
1022 				if (p->type == TERMTYPE_PDF) {
1023 					rhs = "pdf";
1024 					rsz = 3;
1025 				} else if (p->type == TERMTYPE_PS) {
1026 					rhs = "ps";
1027 					rsz = 2;
1028 				} else if (p->enc == TERMENC_ASCII) {
1029 					rhs = "ascii";
1030 					rsz = 5;
1031 				} else {
1032 					rhs = "utf8";
1033 					rsz = 4;
1034 				}
1035 				break;
1036 			case ESCAPE_SKIPCHAR:
1037 				skip = 1;
1038 				continue;
1039 			case ESCAPE_OVERSTRIKE:
1040 				rsz = 0;
1041 				rhs = seq + ssz;
1042 				while (seq < rhs) {
1043 					if (*seq == '\\') {
1044 						mandoc_escape(&seq, NULL, NULL);
1045 						continue;
1046 					}
1047 					i = (*p->width)(p, *seq++);
1048 					if (rsz < i)
1049 						rsz = i;
1050 				}
1051 				sz += rsz;
1052 				continue;
1053 			default:
1054 				continue;
1055 			}
1056 
1057 			/*
1058 			 * Common handling for Unicode and numbered
1059 			 * character escape sequences.
1060 			 */
1061 
1062 			if (rhs == NULL) {
1063 				if (p->enc == TERMENC_ASCII) {
1064 					rhs = ascii_uc2str(uc);
1065 					rsz = strlen(rhs);
1066 				} else {
1067 					if ((uc < 0x20 && uc != 0x09) ||
1068 					    (uc > 0x7E && uc < 0xA0))
1069 						uc = 0xFFFD;
1070 					sz += cond_width(p, uc, &skip);
1071 					continue;
1072 				}
1073 			}
1074 
1075 			if (skip) {
1076 				skip = 0;
1077 				break;
1078 			}
1079 
1080 			/*
1081 			 * Common handling for all escape sequences
1082 			 * printing more than one character.
1083 			 */
1084 
1085 			for (i = 0; i < rsz; i++)
1086 				sz += (*p->width)(p, *rhs++);
1087 			break;
1088 		case ASCII_NBRSP:
1089 			sz += cond_width(p, ' ', &skip);
1090 			cp++;
1091 			break;
1092 		case ASCII_HYPH:
1093 			sz += cond_width(p, '-', &skip);
1094 			cp++;
1095 			break;
1096 		default:
1097 			break;
1098 		}
1099 	}
1100 
1101 	return sz;
1102 }
1103 
1104 int
term_vspan(const struct termp * p,const struct roffsu * su)1105 term_vspan(const struct termp *p, const struct roffsu *su)
1106 {
1107 	double		 r;
1108 	int		 ri;
1109 
1110 	switch (su->unit) {
1111 	case SCALE_BU:
1112 		r = su->scale / 40.0;
1113 		break;
1114 	case SCALE_CM:
1115 		r = su->scale * 6.0 / 2.54;
1116 		break;
1117 	case SCALE_FS:
1118 		r = su->scale * 65536.0 / 40.0;
1119 		break;
1120 	case SCALE_IN:
1121 		r = su->scale * 6.0;
1122 		break;
1123 	case SCALE_MM:
1124 		r = su->scale * 0.006;
1125 		break;
1126 	case SCALE_PC:
1127 		r = su->scale;
1128 		break;
1129 	case SCALE_PT:
1130 		r = su->scale / 12.0;
1131 		break;
1132 	case SCALE_EN:
1133 	case SCALE_EM:
1134 		r = su->scale * 0.6;
1135 		break;
1136 	case SCALE_VS:
1137 		r = su->scale;
1138 		break;
1139 	default:
1140 		abort();
1141 	}
1142 	ri = r > 0.0 ? r + 0.4995 : r - 0.4995;
1143 	return ri < 66 ? ri : 1;
1144 }
1145 
1146 /*
1147  * Convert a scaling width to basic units, rounding towards 0.
1148  */
1149 int
term_hspan(const struct termp * p,const struct roffsu * su)1150 term_hspan(const struct termp *p, const struct roffsu *su)
1151 {
1152 
1153 	return (*p->hspan)(p, su);
1154 }
1155 
1156 /*
1157  * Convert a scaling width to basic units, rounding to closest.
1158  */
1159 int
term_hen(const struct termp * p,const struct roffsu * su)1160 term_hen(const struct termp *p, const struct roffsu *su)
1161 {
1162 	int bu;
1163 
1164 	if ((bu = (*p->hspan)(p, su)) >= 0)
1165 		return (bu + 11) / 24;
1166 	else
1167 		return -((-bu + 11) / 24);
1168 }
1169