1 /* $OpenBSD: term.c,v 1.151 2022/12/26 19:16:02 jmc Exp $ */
2 /*
3 * Copyright (c) 2010-2022 Ingo Schwarze <schwarze@openbsd.org>
4 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include <sys/types.h>
19
20 #include <assert.h>
21 #include <ctype.h>
22 #include <stdint.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26
27 #include "mandoc.h"
28 #include "mandoc_aux.h"
29 #include "out.h"
30 #include "term.h"
31 #include "main.h"
32
33 static size_t cond_width(const struct termp *, int, int *);
34 static void adjbuf(struct termp_col *, size_t);
35 static void bufferc(struct termp *, char);
36 static void encode(struct termp *, const char *, size_t);
37 static void encode1(struct termp *, int);
38 static void endline(struct termp *);
39 static void term_field(struct termp *, size_t, size_t);
40 static void term_fill(struct termp *, size_t *, size_t *,
41 size_t);
42
43
44 void
term_setcol(struct termp * p,size_t maxtcol)45 term_setcol(struct termp *p, size_t maxtcol)
46 {
47 if (maxtcol > p->maxtcol) {
48 p->tcols = mandoc_recallocarray(p->tcols,
49 p->maxtcol, maxtcol, sizeof(*p->tcols));
50 p->maxtcol = maxtcol;
51 }
52 p->lasttcol = maxtcol - 1;
53 p->tcol = p->tcols;
54 }
55
56 void
term_free(struct termp * p)57 term_free(struct termp *p)
58 {
59 term_tab_free();
60 for (p->tcol = p->tcols; p->tcol < p->tcols + p->maxtcol; p->tcol++)
61 free(p->tcol->buf);
62 free(p->tcols);
63 free(p->fontq);
64 free(p);
65 }
66
67 void
term_begin(struct termp * p,term_margin head,term_margin foot,const struct roff_meta * arg)68 term_begin(struct termp *p, term_margin head,
69 term_margin foot, const struct roff_meta *arg)
70 {
71
72 p->headf = head;
73 p->footf = foot;
74 p->argf = arg;
75 (*p->begin)(p);
76 }
77
78 void
term_end(struct termp * p)79 term_end(struct termp *p)
80 {
81
82 (*p->end)(p);
83 }
84
85 /*
86 * Flush a chunk of text. By default, break the output line each time
87 * the right margin is reached, and continue output on the next line
88 * at the same offset as the chunk itself. By default, also break the
89 * output line at the end of the chunk. There are many flags modifying
90 * this behaviour, see the comments in the body of the function.
91 */
92 void
term_flushln(struct termp * p)93 term_flushln(struct termp *p)
94 {
95 size_t vbl; /* Number of blanks to prepend to the output. */
96 size_t vbr; /* Actual visual position of the end of field. */
97 size_t vfield; /* Desired visual field width. */
98 size_t vtarget; /* Desired visual position of the right margin. */
99 size_t ic; /* Character position in the input buffer. */
100 size_t nbr; /* Number of characters to print in this field. */
101
102 /*
103 * Normally, start writing at the left margin, but with the
104 * NOPAD flag, start writing at the current position instead.
105 */
106
107 vbl = (p->flags & TERMP_NOPAD) || p->tcol->offset < p->viscol ?
108 0 : p->tcol->offset - p->viscol;
109 if (p->minbl && vbl < p->minbl)
110 vbl = p->minbl;
111
112 if ((p->flags & TERMP_MULTICOL) == 0)
113 p->tcol->col = 0;
114
115 /* Loop over output lines. */
116
117 for (;;) {
118 vfield = p->tcol->rmargin > p->viscol + vbl ?
119 p->tcol->rmargin - p->viscol - vbl : 0;
120
121 /*
122 * Normally, break the line at the the right margin
123 * of the field, but with the NOBREAK flag, only
124 * break it at the max right margin of the screen,
125 * and with the BRNEVER flag, never break it at all.
126 */
127
128 vtarget = (p->flags & TERMP_NOBREAK) == 0 ? vfield :
129 p->maxrmargin > p->viscol + vbl ?
130 p->maxrmargin - p->viscol - vbl : 0;
131
132 /*
133 * Figure out how much text will fit in the field.
134 * If there is whitespace only, print nothing.
135 */
136
137 term_fill(p, &nbr, &vbr,
138 p->flags & TERMP_BRNEVER ? SIZE_MAX : vtarget);
139 if (nbr == 0)
140 break;
141
142 /*
143 * With the CENTER or RIGHT flag, increase the indentation
144 * to center the text between the left and right margins
145 * or to adjust it to the right margin, respectively.
146 */
147
148 if (vbr < vtarget) {
149 if (p->flags & TERMP_CENTER)
150 vbl += (vtarget - vbr) / 2;
151 else if (p->flags & TERMP_RIGHT)
152 vbl += vtarget - vbr;
153 }
154
155 /* Finally, print the field content. */
156
157 term_field(p, vbl, nbr);
158 if (vbr < vtarget)
159 p->tcol->taboff += vbr;
160 else
161 p->tcol->taboff += vtarget;
162 p->tcol->taboff += (*p->width)(p, ' ');
163
164 /*
165 * If there is no text left in the field, exit the loop.
166 * If the BRTRSP flag is set, consider trailing
167 * whitespace significant when deciding whether
168 * the field fits or not.
169 */
170
171 for (ic = p->tcol->col; ic < p->tcol->lastcol; ic++) {
172 switch (p->tcol->buf[ic]) {
173 case '\t':
174 if (p->flags & TERMP_BRTRSP)
175 vbr = term_tab_next(vbr);
176 continue;
177 case ' ':
178 if (p->flags & TERMP_BRTRSP)
179 vbr += (*p->width)(p, ' ');
180 continue;
181 case '\n':
182 case ASCII_NBRZW:
183 case ASCII_BREAK:
184 case ASCII_TABREF:
185 continue;
186 default:
187 break;
188 }
189 break;
190 }
191 if (ic == p->tcol->lastcol)
192 break;
193
194 /*
195 * At the location of an automatic line break, input
196 * space characters are consumed by the line break.
197 */
198
199 while (p->tcol->col < p->tcol->lastcol &&
200 p->tcol->buf[p->tcol->col] == ' ')
201 p->tcol->col++;
202
203 /*
204 * In multi-column mode, leave the rest of the text
205 * in the buffer to be handled by a subsequent
206 * invocation, such that the other columns of the
207 * table can be handled first.
208 * In single-column mode, simply break the line.
209 */
210
211 if (p->flags & TERMP_MULTICOL)
212 return;
213
214 endline(p);
215
216 /*
217 * Normally, start the next line at the same indentation
218 * as this one, but with the BRIND flag, start it at the
219 * right margin instead. This is used together with
220 * NOBREAK for the tags in various kinds of tagged lists.
221 */
222
223 vbl = p->flags & TERMP_BRIND ?
224 p->tcol->rmargin : p->tcol->offset;
225 }
226
227 /* Reset output state in preparation for the next field. */
228
229 p->col = p->tcol->col = p->tcol->lastcol = 0;
230 p->minbl = p->trailspace;
231 p->flags &= ~(TERMP_BACKAFTER | TERMP_BACKBEFORE | TERMP_NOPAD);
232
233 if (p->flags & TERMP_MULTICOL)
234 return;
235
236 /*
237 * The HANG flag means that the next field
238 * always follows on the same line.
239 * The NOBREAK flag means that the next field
240 * follows on the same line unless the field was overrun.
241 * Normally, break the line at the end of each field.
242 */
243
244 if ((p->flags & TERMP_HANG) == 0 &&
245 ((p->flags & TERMP_NOBREAK) == 0 ||
246 vbr + term_len(p, p->trailspace) > vfield))
247 endline(p);
248 }
249
250 /*
251 * Store the number of input characters to print in this field in *nbr
252 * and their total visual width to print in *vbr.
253 * If there is only whitespace in the field, both remain zero.
254 * The desired visual width of the field is provided by vtarget.
255 * If the first word is longer, the field will be overrun.
256 */
257 static void
term_fill(struct termp * p,size_t * nbr,size_t * vbr,size_t vtarget)258 term_fill(struct termp *p, size_t *nbr, size_t *vbr, size_t vtarget)
259 {
260 size_t ic; /* Character position in the input buffer. */
261 size_t vis; /* Visual position of the current character. */
262 size_t vn; /* Visual position of the next character. */
263 int breakline; /* Break at the end of this word. */
264 int graph; /* Last character was non-blank. */
265 int taboff; /* Temporary offset for literal tabs. */
266
267 *nbr = *vbr = vis = 0;
268 breakline = graph = 0;
269 taboff = p->tcol->taboff;
270 for (ic = p->tcol->col; ic < p->tcol->lastcol; ic++) {
271 switch (p->tcol->buf[ic]) {
272 case '\b': /* Escape \o (overstrike) or backspace markup. */
273 assert(ic > 0);
274 vis -= (*p->width)(p, p->tcol->buf[ic - 1]);
275 continue;
276
277 case ' ':
278 case ASCII_BREAK: /* Escape \: (breakpoint). */
279 vn = vis;
280 if (p->tcol->buf[ic] == ' ')
281 vn += (*p->width)(p, ' ');
282 /* Can break at the end of a word. */
283 if (breakline || vn > vtarget)
284 break;
285 if (graph) {
286 *nbr = ic;
287 *vbr = vis;
288 graph = 0;
289 }
290 vis = vn;
291 continue;
292
293 case '\n': /* Escape \p (break at the end of the word). */
294 breakline = 1;
295 continue;
296
297 case ASCII_HYPH: /* Breakable hyphen. */
298 graph = 1;
299 /*
300 * We are about to decide whether to break the
301 * line or not, so we no longer need this hyphen
302 * to be marked as breakable. Put back a real
303 * hyphen such that we get the correct width.
304 */
305 p->tcol->buf[ic] = '-';
306 vis += (*p->width)(p, '-');
307 if (vis > vtarget) {
308 ic++;
309 break;
310 }
311 *nbr = ic + 1;
312 *vbr = vis;
313 continue;
314
315 case ASCII_TABREF:
316 taboff = -vis - (*p->width)(p, ' ');
317 continue;
318
319 default:
320 switch (p->tcol->buf[ic]) {
321 case '\t':
322 if (taboff < 0 && (size_t)-taboff > vis)
323 vis = 0;
324 else
325 vis += taboff;
326 vis = term_tab_next(vis);
327 vis -= taboff;
328 break;
329 case ASCII_NBRZW: /* Non-breakable zero-width. */
330 break;
331 case ASCII_NBRSP: /* Non-breakable space. */
332 p->tcol->buf[ic] = ' ';
333 /* FALLTHROUGH */
334 default: /* Printable character. */
335 vis += (*p->width)(p, p->tcol->buf[ic]);
336 break;
337 }
338 graph = 1;
339 if (vis > vtarget && *nbr > 0)
340 return;
341 continue;
342 }
343 break;
344 }
345
346 /*
347 * If the last word extends to the end of the field without any
348 * trailing whitespace, the loop could not check yet whether it
349 * can remain on this line. So do the check now.
350 */
351
352 if (graph && (vis <= vtarget || *nbr == 0)) {
353 *nbr = ic;
354 *vbr = vis;
355 }
356 }
357
358 /*
359 * Print the contents of one field
360 * with an indentation of vbl visual columns,
361 * and an input string length of nbr characters.
362 */
363 static void
term_field(struct termp * p,size_t vbl,size_t nbr)364 term_field(struct termp *p, size_t vbl, size_t nbr)
365 {
366 size_t ic; /* Character position in the input buffer. */
367 size_t vis; /* Visual position of the current character. */
368 size_t vt; /* Visual position including tab offset. */
369 size_t dv; /* Visual width of the current character. */
370 int taboff; /* Temporary offset for literal tabs. */
371
372 vis = 0;
373 taboff = p->tcol->taboff;
374 for (ic = p->tcol->col; ic < nbr; ic++) {
375
376 /*
377 * To avoid the printing of trailing whitespace,
378 * do not print whitespace right away, only count it.
379 */
380
381 switch (p->tcol->buf[ic]) {
382 case '\n':
383 case ASCII_BREAK:
384 case ASCII_NBRZW:
385 continue;
386 case ASCII_TABREF:
387 taboff = -vis - (*p->width)(p, ' ');
388 continue;
389 case '\t':
390 case ' ':
391 case ASCII_NBRSP:
392 if (p->tcol->buf[ic] == '\t') {
393 if (taboff < 0 && (size_t)-taboff > vis)
394 vt = 0;
395 else
396 vt = vis + taboff;
397 dv = term_tab_next(vt) - vt;
398 } else
399 dv = (*p->width)(p, ' ');
400 vbl += dv;
401 vis += dv;
402 continue;
403 default:
404 break;
405 }
406
407 /*
408 * We found a non-blank character to print,
409 * so write preceding white space now.
410 */
411
412 if (vbl > 0) {
413 (*p->advance)(p, vbl);
414 p->viscol += vbl;
415 vbl = 0;
416 }
417
418 /* Print the character and adjust the visual position. */
419
420 (*p->letter)(p, p->tcol->buf[ic]);
421 if (p->tcol->buf[ic] == '\b') {
422 dv = (*p->width)(p, p->tcol->buf[ic - 1]);
423 p->viscol -= dv;
424 vis -= dv;
425 } else {
426 dv = (*p->width)(p, p->tcol->buf[ic]);
427 p->viscol += dv;
428 vis += dv;
429 }
430 }
431 p->tcol->col = nbr;
432 }
433
434 static void
endline(struct termp * p)435 endline(struct termp *p)
436 {
437 if ((p->flags & (TERMP_NEWMC | TERMP_ENDMC)) == TERMP_ENDMC) {
438 p->mc = NULL;
439 p->flags &= ~TERMP_ENDMC;
440 }
441 if (p->mc != NULL) {
442 if (p->viscol && p->maxrmargin >= p->viscol)
443 (*p->advance)(p, p->maxrmargin - p->viscol + 1);
444 p->flags |= TERMP_NOBUF | TERMP_NOSPACE;
445 term_word(p, p->mc);
446 p->flags &= ~(TERMP_NOBUF | TERMP_NEWMC);
447 }
448 p->viscol = 0;
449 p->minbl = 0;
450 (*p->endline)(p);
451 }
452
453 /*
454 * A newline only breaks an existing line; it won't assert vertical
455 * space. All data in the output buffer is flushed prior to the newline
456 * assertion.
457 */
458 void
term_newln(struct termp * p)459 term_newln(struct termp *p)
460 {
461 p->flags |= TERMP_NOSPACE;
462 if (p->tcol->lastcol || p->viscol)
463 term_flushln(p);
464 p->tcol->taboff = 0;
465 }
466
467 /*
468 * Asserts a vertical space (a full, empty line-break between lines).
469 * Note that if used twice, this will cause two blank spaces and so on.
470 * All data in the output buffer is flushed prior to the newline
471 * assertion.
472 */
473 void
term_vspace(struct termp * p)474 term_vspace(struct termp *p)
475 {
476
477 term_newln(p);
478 p->viscol = 0;
479 p->minbl = 0;
480 if (0 < p->skipvsp)
481 p->skipvsp--;
482 else
483 (*p->endline)(p);
484 }
485
486 /* Swap current and previous font; for \fP and .ft P */
487 void
term_fontlast(struct termp * p)488 term_fontlast(struct termp *p)
489 {
490 enum termfont f;
491
492 f = p->fontl;
493 p->fontl = p->fontq[p->fonti];
494 p->fontq[p->fonti] = f;
495 }
496
497 /* Set font, save current, discard previous; for \f, .ft, .B etc. */
498 void
term_fontrepl(struct termp * p,enum termfont f)499 term_fontrepl(struct termp *p, enum termfont f)
500 {
501
502 p->fontl = p->fontq[p->fonti];
503 p->fontq[p->fonti] = f;
504 }
505
506 /* Set font, save previous. */
507 void
term_fontpush(struct termp * p,enum termfont f)508 term_fontpush(struct termp *p, enum termfont f)
509 {
510
511 p->fontl = p->fontq[p->fonti];
512 if (++p->fonti == p->fontsz) {
513 p->fontsz += 8;
514 p->fontq = mandoc_reallocarray(p->fontq,
515 p->fontsz, sizeof(*p->fontq));
516 }
517 p->fontq[p->fonti] = f;
518 }
519
520 /* Flush to make the saved pointer current again. */
521 void
term_fontpopq(struct termp * p,int i)522 term_fontpopq(struct termp *p, int i)
523 {
524
525 assert(i >= 0);
526 if (p->fonti > i)
527 p->fonti = i;
528 }
529
530 /* Pop one font off the stack. */
531 void
term_fontpop(struct termp * p)532 term_fontpop(struct termp *p)
533 {
534
535 assert(p->fonti);
536 p->fonti--;
537 }
538
539 /*
540 * Handle pwords, partial words, which may be either a single word or a
541 * phrase that cannot be broken down (such as a literal string). This
542 * handles word styling.
543 */
544 void
term_word(struct termp * p,const char * word)545 term_word(struct termp *p, const char *word)
546 {
547 struct roffsu su;
548 const char nbrsp[2] = { ASCII_NBRSP, 0 };
549 const char *seq, *cp;
550 int sz, uc;
551 size_t csz, lsz, ssz;
552 enum mandoc_esc esc;
553
554 if ((p->flags & TERMP_NOBUF) == 0) {
555 if ((p->flags & TERMP_NOSPACE) == 0) {
556 if ((p->flags & TERMP_KEEP) == 0) {
557 bufferc(p, ' ');
558 if (p->flags & TERMP_SENTENCE)
559 bufferc(p, ' ');
560 } else
561 bufferc(p, ASCII_NBRSP);
562 }
563 if (p->flags & TERMP_PREKEEP)
564 p->flags |= TERMP_KEEP;
565 if (p->flags & TERMP_NONOSPACE)
566 p->flags |= TERMP_NOSPACE;
567 else
568 p->flags &= ~TERMP_NOSPACE;
569 p->flags &= ~(TERMP_SENTENCE | TERMP_NONEWLINE);
570 p->skipvsp = 0;
571 }
572
573 while ('\0' != *word) {
574 if ('\\' != *word) {
575 if (TERMP_NBRWORD & p->flags) {
576 if (' ' == *word) {
577 encode(p, nbrsp, 1);
578 word++;
579 continue;
580 }
581 ssz = strcspn(word, "\\ ");
582 } else
583 ssz = strcspn(word, "\\");
584 encode(p, word, ssz);
585 word += (int)ssz;
586 continue;
587 }
588
589 word++;
590 esc = mandoc_escape(&word, &seq, &sz);
591 switch (esc) {
592 case ESCAPE_UNICODE:
593 uc = mchars_num2uc(seq + 1, sz - 1);
594 break;
595 case ESCAPE_NUMBERED:
596 uc = mchars_num2char(seq, sz);
597 if (uc >= 0)
598 break;
599 bufferc(p, ASCII_NBRZW);
600 continue;
601 case ESCAPE_SPECIAL:
602 if (p->enc == TERMENC_ASCII) {
603 cp = mchars_spec2str(seq, sz, &ssz);
604 if (cp != NULL)
605 encode(p, cp, ssz);
606 else
607 bufferc(p, ASCII_NBRZW);
608 } else {
609 uc = mchars_spec2cp(seq, sz);
610 if (uc > 0)
611 encode1(p, uc);
612 else
613 bufferc(p, ASCII_NBRZW);
614 }
615 continue;
616 case ESCAPE_UNDEF:
617 uc = *seq;
618 break;
619 case ESCAPE_FONTBOLD:
620 case ESCAPE_FONTCB:
621 term_fontrepl(p, TERMFONT_BOLD);
622 continue;
623 case ESCAPE_FONTITALIC:
624 case ESCAPE_FONTCI:
625 term_fontrepl(p, TERMFONT_UNDER);
626 continue;
627 case ESCAPE_FONTBI:
628 term_fontrepl(p, TERMFONT_BI);
629 continue;
630 case ESCAPE_FONT:
631 case ESCAPE_FONTCR:
632 case ESCAPE_FONTROMAN:
633 term_fontrepl(p, TERMFONT_NONE);
634 continue;
635 case ESCAPE_FONTPREV:
636 term_fontlast(p);
637 continue;
638 case ESCAPE_BREAK:
639 bufferc(p, '\n');
640 continue;
641 case ESCAPE_NOSPACE:
642 if (p->flags & TERMP_BACKAFTER)
643 p->flags &= ~TERMP_BACKAFTER;
644 else if (*word == '\0')
645 p->flags |= (TERMP_NOSPACE | TERMP_NONEWLINE);
646 continue;
647 case ESCAPE_DEVICE:
648 if (p->type == TERMTYPE_PDF)
649 encode(p, "pdf", 3);
650 else if (p->type == TERMTYPE_PS)
651 encode(p, "ps", 2);
652 else if (p->enc == TERMENC_ASCII)
653 encode(p, "ascii", 5);
654 else
655 encode(p, "utf8", 4);
656 continue;
657 case ESCAPE_HORIZ:
658 if (p->flags & TERMP_BACKAFTER) {
659 p->flags &= ~TERMP_BACKAFTER;
660 continue;
661 }
662 if (*seq == '|') {
663 seq++;
664 uc = -p->col;
665 } else
666 uc = 0;
667 if (a2roffsu(seq, &su, SCALE_EM) == NULL)
668 continue;
669 uc += term_hen(p, &su);
670 if (uc >= 0) {
671 while (uc > 0) {
672 uc -= term_len(p, 1);
673 if (p->flags & TERMP_BACKBEFORE)
674 p->flags &= ~TERMP_BACKBEFORE;
675 else
676 bufferc(p, ASCII_NBRSP);
677 }
678 continue;
679 }
680 if (p->flags & TERMP_BACKBEFORE) {
681 p->flags &= ~TERMP_BACKBEFORE;
682 assert(p->col > 0);
683 p->col--;
684 }
685 if (p->col >= (size_t)(-uc)) {
686 p->col += uc;
687 } else {
688 uc += p->col;
689 p->col = 0;
690 if (p->tcol->offset > (size_t)(-uc)) {
691 p->ti += uc;
692 p->tcol->offset += uc;
693 } else {
694 p->ti -= p->tcol->offset;
695 p->tcol->offset = 0;
696 }
697 }
698 continue;
699 case ESCAPE_HLINE:
700 if ((cp = a2roffsu(seq, &su, SCALE_EM)) == NULL)
701 continue;
702 uc = term_hen(p, &su);
703 if (uc <= 0) {
704 if (p->tcol->rmargin <= p->tcol->offset)
705 continue;
706 lsz = p->tcol->rmargin - p->tcol->offset;
707 } else
708 lsz = uc;
709 if (*cp == seq[-1])
710 uc = -1;
711 else if (*cp == '\\') {
712 seq = cp + 1;
713 esc = mandoc_escape(&seq, &cp, &sz);
714 switch (esc) {
715 case ESCAPE_UNICODE:
716 uc = mchars_num2uc(cp + 1, sz - 1);
717 break;
718 case ESCAPE_NUMBERED:
719 uc = mchars_num2char(cp, sz);
720 break;
721 case ESCAPE_SPECIAL:
722 uc = mchars_spec2cp(cp, sz);
723 break;
724 case ESCAPE_UNDEF:
725 uc = *seq;
726 break;
727 default:
728 uc = -1;
729 break;
730 }
731 } else
732 uc = *cp;
733 if (uc < 0x20 || (uc > 0x7E && uc < 0xA0))
734 uc = '_';
735 if (p->enc == TERMENC_ASCII) {
736 cp = ascii_uc2str(uc);
737 csz = term_strlen(p, cp);
738 ssz = strlen(cp);
739 } else
740 csz = (*p->width)(p, uc);
741 while (lsz >= csz) {
742 if (p->enc == TERMENC_ASCII)
743 encode(p, cp, ssz);
744 else
745 encode1(p, uc);
746 lsz -= csz;
747 }
748 continue;
749 case ESCAPE_SKIPCHAR:
750 p->flags |= TERMP_BACKAFTER;
751 continue;
752 case ESCAPE_OVERSTRIKE:
753 cp = seq + sz;
754 while (seq < cp) {
755 if (*seq == '\\') {
756 mandoc_escape(&seq, NULL, NULL);
757 continue;
758 }
759 encode1(p, *seq++);
760 if (seq < cp) {
761 if (p->flags & TERMP_BACKBEFORE)
762 p->flags |= TERMP_BACKAFTER;
763 else
764 p->flags |= TERMP_BACKBEFORE;
765 }
766 }
767 /* Trim trailing backspace/blank pair. */
768 if (p->tcol->lastcol > 2 &&
769 (p->tcol->buf[p->tcol->lastcol - 1] == ' ' ||
770 p->tcol->buf[p->tcol->lastcol - 1] == '\t'))
771 p->tcol->lastcol -= 2;
772 if (p->col > p->tcol->lastcol)
773 p->col = p->tcol->lastcol;
774 continue;
775 case ESCAPE_IGNORE:
776 bufferc(p, ASCII_NBRZW);
777 continue;
778 default:
779 continue;
780 }
781
782 /*
783 * Common handling for Unicode and numbered
784 * character escape sequences.
785 */
786
787 if (p->enc == TERMENC_ASCII) {
788 cp = ascii_uc2str(uc);
789 encode(p, cp, strlen(cp));
790 } else {
791 if ((uc < 0x20 && uc != 0x09) ||
792 (uc > 0x7E && uc < 0xA0))
793 uc = 0xFFFD;
794 encode1(p, uc);
795 }
796 }
797 p->flags &= ~TERMP_NBRWORD;
798 }
799
800 static void
adjbuf(struct termp_col * c,size_t sz)801 adjbuf(struct termp_col *c, size_t sz)
802 {
803 if (c->maxcols == 0)
804 c->maxcols = 1024;
805 while (c->maxcols <= sz)
806 c->maxcols <<= 2;
807 c->buf = mandoc_reallocarray(c->buf, c->maxcols, sizeof(*c->buf));
808 }
809
810 static void
bufferc(struct termp * p,char c)811 bufferc(struct termp *p, char c)
812 {
813 if (p->flags & TERMP_NOBUF) {
814 (*p->letter)(p, c);
815 return;
816 }
817 if (p->col + 1 >= p->tcol->maxcols)
818 adjbuf(p->tcol, p->col + 1);
819 if (p->tcol->lastcol <= p->col || (c != ' ' && c != ASCII_NBRSP))
820 p->tcol->buf[p->col] = c;
821 if (p->tcol->lastcol < ++p->col)
822 p->tcol->lastcol = p->col;
823 }
824
825 void
term_tab_ref(struct termp * p)826 term_tab_ref(struct termp *p)
827 {
828 if (p->tcol->lastcol && p->tcol->lastcol <= p->col &&
829 (p->flags & TERMP_NOBUF) == 0)
830 bufferc(p, ASCII_TABREF);
831 }
832
833 /*
834 * See encode().
835 * Do this for a single (probably unicode) value.
836 * Does not check for non-decorated glyphs.
837 */
838 static void
encode1(struct termp * p,int c)839 encode1(struct termp *p, int c)
840 {
841 enum termfont f;
842
843 if (p->flags & TERMP_NOBUF) {
844 (*p->letter)(p, c);
845 return;
846 }
847
848 if (p->col + 7 >= p->tcol->maxcols)
849 adjbuf(p->tcol, p->col + 7);
850
851 f = (c == ASCII_HYPH || c > 127 || isgraph(c)) ?
852 p->fontq[p->fonti] : TERMFONT_NONE;
853
854 if (p->flags & TERMP_BACKBEFORE) {
855 if (p->tcol->buf[p->col - 1] == ' ' ||
856 p->tcol->buf[p->col - 1] == '\t')
857 p->col--;
858 else
859 p->tcol->buf[p->col++] = '\b';
860 p->flags &= ~TERMP_BACKBEFORE;
861 }
862 if (f == TERMFONT_UNDER || f == TERMFONT_BI) {
863 p->tcol->buf[p->col++] = '_';
864 p->tcol->buf[p->col++] = '\b';
865 }
866 if (f == TERMFONT_BOLD || f == TERMFONT_BI) {
867 if (c == ASCII_HYPH)
868 p->tcol->buf[p->col++] = '-';
869 else
870 p->tcol->buf[p->col++] = c;
871 p->tcol->buf[p->col++] = '\b';
872 }
873 if (p->tcol->lastcol <= p->col || (c != ' ' && c != ASCII_NBRSP))
874 p->tcol->buf[p->col] = c;
875 if (p->tcol->lastcol < ++p->col)
876 p->tcol->lastcol = p->col;
877 if (p->flags & TERMP_BACKAFTER) {
878 p->flags |= TERMP_BACKBEFORE;
879 p->flags &= ~TERMP_BACKAFTER;
880 }
881 }
882
883 static void
encode(struct termp * p,const char * word,size_t sz)884 encode(struct termp *p, const char *word, size_t sz)
885 {
886 size_t i;
887
888 if (p->flags & TERMP_NOBUF) {
889 for (i = 0; i < sz; i++)
890 (*p->letter)(p, word[i]);
891 return;
892 }
893
894 if (p->col + 2 + (sz * 5) >= p->tcol->maxcols)
895 adjbuf(p->tcol, p->col + 2 + (sz * 5));
896
897 for (i = 0; i < sz; i++) {
898 if (ASCII_HYPH == word[i] ||
899 isgraph((unsigned char)word[i]))
900 encode1(p, word[i]);
901 else {
902 if (p->tcol->lastcol <= p->col ||
903 (word[i] != ' ' && word[i] != ASCII_NBRSP))
904 p->tcol->buf[p->col] = word[i];
905 p->col++;
906
907 /*
908 * Postpone the effect of \z while handling
909 * an overstrike sequence from ascii_uc2str().
910 */
911
912 if (word[i] == '\b' &&
913 (p->flags & TERMP_BACKBEFORE)) {
914 p->flags &= ~TERMP_BACKBEFORE;
915 p->flags |= TERMP_BACKAFTER;
916 }
917 }
918 }
919 if (p->tcol->lastcol < p->col)
920 p->tcol->lastcol = p->col;
921 }
922
923 void
term_setwidth(struct termp * p,const char * wstr)924 term_setwidth(struct termp *p, const char *wstr)
925 {
926 struct roffsu su;
927 int iop, width;
928
929 iop = 0;
930 width = 0;
931 if (NULL != wstr) {
932 switch (*wstr) {
933 case '+':
934 iop = 1;
935 wstr++;
936 break;
937 case '-':
938 iop = -1;
939 wstr++;
940 break;
941 default:
942 break;
943 }
944 if (a2roffsu(wstr, &su, SCALE_MAX) != NULL)
945 width = term_hspan(p, &su);
946 else
947 iop = 0;
948 }
949 (*p->setwidth)(p, iop, width);
950 }
951
952 size_t
term_len(const struct termp * p,size_t sz)953 term_len(const struct termp *p, size_t sz)
954 {
955
956 return (*p->width)(p, ' ') * sz;
957 }
958
959 static size_t
cond_width(const struct termp * p,int c,int * skip)960 cond_width(const struct termp *p, int c, int *skip)
961 {
962
963 if (*skip) {
964 (*skip) = 0;
965 return 0;
966 } else
967 return (*p->width)(p, c);
968 }
969
970 size_t
term_strlen(const struct termp * p,const char * cp)971 term_strlen(const struct termp *p, const char *cp)
972 {
973 size_t sz, rsz, i;
974 int ssz, skip, uc;
975 const char *seq, *rhs;
976 enum mandoc_esc esc;
977 static const char rej[] = { '\\', ASCII_NBRSP, ASCII_NBRZW,
978 ASCII_BREAK, ASCII_HYPH, ASCII_TABREF, '\0' };
979
980 /*
981 * Account for escaped sequences within string length
982 * calculations. This follows the logic in term_word() as we
983 * must calculate the width of produced strings.
984 */
985
986 sz = 0;
987 skip = 0;
988 while ('\0' != *cp) {
989 rsz = strcspn(cp, rej);
990 for (i = 0; i < rsz; i++)
991 sz += cond_width(p, *cp++, &skip);
992
993 switch (*cp) {
994 case '\\':
995 cp++;
996 rhs = NULL;
997 esc = mandoc_escape(&cp, &seq, &ssz);
998 switch (esc) {
999 case ESCAPE_UNICODE:
1000 uc = mchars_num2uc(seq + 1, ssz - 1);
1001 break;
1002 case ESCAPE_NUMBERED:
1003 uc = mchars_num2char(seq, ssz);
1004 if (uc < 0)
1005 continue;
1006 break;
1007 case ESCAPE_SPECIAL:
1008 if (p->enc == TERMENC_ASCII) {
1009 rhs = mchars_spec2str(seq, ssz, &rsz);
1010 if (rhs != NULL)
1011 break;
1012 } else {
1013 uc = mchars_spec2cp(seq, ssz);
1014 if (uc > 0)
1015 sz += cond_width(p, uc, &skip);
1016 }
1017 continue;
1018 case ESCAPE_UNDEF:
1019 uc = *seq;
1020 break;
1021 case ESCAPE_DEVICE:
1022 if (p->type == TERMTYPE_PDF) {
1023 rhs = "pdf";
1024 rsz = 3;
1025 } else if (p->type == TERMTYPE_PS) {
1026 rhs = "ps";
1027 rsz = 2;
1028 } else if (p->enc == TERMENC_ASCII) {
1029 rhs = "ascii";
1030 rsz = 5;
1031 } else {
1032 rhs = "utf8";
1033 rsz = 4;
1034 }
1035 break;
1036 case ESCAPE_SKIPCHAR:
1037 skip = 1;
1038 continue;
1039 case ESCAPE_OVERSTRIKE:
1040 rsz = 0;
1041 rhs = seq + ssz;
1042 while (seq < rhs) {
1043 if (*seq == '\\') {
1044 mandoc_escape(&seq, NULL, NULL);
1045 continue;
1046 }
1047 i = (*p->width)(p, *seq++);
1048 if (rsz < i)
1049 rsz = i;
1050 }
1051 sz += rsz;
1052 continue;
1053 default:
1054 continue;
1055 }
1056
1057 /*
1058 * Common handling for Unicode and numbered
1059 * character escape sequences.
1060 */
1061
1062 if (rhs == NULL) {
1063 if (p->enc == TERMENC_ASCII) {
1064 rhs = ascii_uc2str(uc);
1065 rsz = strlen(rhs);
1066 } else {
1067 if ((uc < 0x20 && uc != 0x09) ||
1068 (uc > 0x7E && uc < 0xA0))
1069 uc = 0xFFFD;
1070 sz += cond_width(p, uc, &skip);
1071 continue;
1072 }
1073 }
1074
1075 if (skip) {
1076 skip = 0;
1077 break;
1078 }
1079
1080 /*
1081 * Common handling for all escape sequences
1082 * printing more than one character.
1083 */
1084
1085 for (i = 0; i < rsz; i++)
1086 sz += (*p->width)(p, *rhs++);
1087 break;
1088 case ASCII_NBRSP:
1089 sz += cond_width(p, ' ', &skip);
1090 cp++;
1091 break;
1092 case ASCII_HYPH:
1093 sz += cond_width(p, '-', &skip);
1094 cp++;
1095 break;
1096 default:
1097 break;
1098 }
1099 }
1100
1101 return sz;
1102 }
1103
1104 int
term_vspan(const struct termp * p,const struct roffsu * su)1105 term_vspan(const struct termp *p, const struct roffsu *su)
1106 {
1107 double r;
1108 int ri;
1109
1110 switch (su->unit) {
1111 case SCALE_BU:
1112 r = su->scale / 40.0;
1113 break;
1114 case SCALE_CM:
1115 r = su->scale * 6.0 / 2.54;
1116 break;
1117 case SCALE_FS:
1118 r = su->scale * 65536.0 / 40.0;
1119 break;
1120 case SCALE_IN:
1121 r = su->scale * 6.0;
1122 break;
1123 case SCALE_MM:
1124 r = su->scale * 0.006;
1125 break;
1126 case SCALE_PC:
1127 r = su->scale;
1128 break;
1129 case SCALE_PT:
1130 r = su->scale / 12.0;
1131 break;
1132 case SCALE_EN:
1133 case SCALE_EM:
1134 r = su->scale * 0.6;
1135 break;
1136 case SCALE_VS:
1137 r = su->scale;
1138 break;
1139 default:
1140 abort();
1141 }
1142 ri = r > 0.0 ? r + 0.4995 : r - 0.4995;
1143 return ri < 66 ? ri : 1;
1144 }
1145
1146 /*
1147 * Convert a scaling width to basic units, rounding towards 0.
1148 */
1149 int
term_hspan(const struct termp * p,const struct roffsu * su)1150 term_hspan(const struct termp *p, const struct roffsu *su)
1151 {
1152
1153 return (*p->hspan)(p, su);
1154 }
1155
1156 /*
1157 * Convert a scaling width to basic units, rounding to closest.
1158 */
1159 int
term_hen(const struct termp * p,const struct roffsu * su)1160 term_hen(const struct termp *p, const struct roffsu *su)
1161 {
1162 int bu;
1163
1164 if ((bu = (*p->hspan)(p, su)) >= 0)
1165 return (bu + 11) / 24;
1166 else
1167 return -((-bu + 11) / 24);
1168 }
1169