1 /* $OpenBSD: term.c,v 1.96 2014/11/21 01:52:45 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010-2014 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <sys/types.h> 19 20 #include <assert.h> 21 #include <ctype.h> 22 #include <stdio.h> 23 #include <stdlib.h> 24 #include <string.h> 25 26 #include "mandoc.h" 27 #include "mandoc_aux.h" 28 #include "out.h" 29 #include "term.h" 30 #include "main.h" 31 32 static size_t cond_width(const struct termp *, int, int *); 33 static void adjbuf(struct termp *p, size_t); 34 static void bufferc(struct termp *, char); 35 static void encode(struct termp *, const char *, size_t); 36 static void encode1(struct termp *, int); 37 38 39 void 40 term_free(struct termp *p) 41 { 42 43 free(p->buf); 44 free(p); 45 } 46 47 void 48 term_begin(struct termp *p, term_margin head, 49 term_margin foot, const void *arg) 50 { 51 52 p->headf = head; 53 p->footf = foot; 54 p->argf = arg; 55 (*p->begin)(p); 56 } 57 58 void 59 term_end(struct termp *p) 60 { 61 62 (*p->end)(p); 63 } 64 65 /* 66 * Flush a chunk of text. By default, break the output line each time 67 * the right margin is reached, and continue output on the next line 68 * at the same offset as the chunk itself. By default, also break the 69 * output line at the end of the chunk. 70 * The following flags may be specified: 71 * 72 * - TERMP_NOBREAK: Do not break the output line at the right margin, 73 * but only at the max right margin. Also, do not break the output 74 * line at the end of the chunk, such that the next call can pad to 75 * the next column. However, if less than p->trailspace blanks, 76 * which can be 0, 1, or 2, remain to the right margin, the line 77 * will be broken. 78 * - TERMP_BRIND: If the chunk does not fit and the output line has 79 * to be broken, start the next line at the right margin instead 80 * of at the offset. Used together with TERMP_NOBREAK for the tags 81 * in various kinds of tagged lists. 82 * - TERMP_DANGLE: Do not break the output line at the right margin, 83 * append the next chunk after it even if this one is too long. 84 * To be used together with TERMP_NOBREAK. 85 * - TERMP_HANG: Like TERMP_DANGLE, and also suppress padding before 86 * the next chunk if this column is not full. 87 */ 88 void 89 term_flushln(struct termp *p) 90 { 91 size_t i; /* current input position in p->buf */ 92 int ntab; /* number of tabs to prepend */ 93 size_t vis; /* current visual position on output */ 94 size_t vbl; /* number of blanks to prepend to output */ 95 size_t vend; /* end of word visual position on output */ 96 size_t bp; /* visual right border position */ 97 size_t dv; /* temporary for visual pos calculations */ 98 size_t j; /* temporary loop index for p->buf */ 99 size_t jhy; /* last hyph before overflow w/r/t j */ 100 size_t maxvis; /* output position of visible boundary */ 101 size_t rmargin; /* the rightmost of the two margins */ 102 103 /* 104 * First, establish the maximum columns of "visible" content. 105 * This is usually the difference between the right-margin and 106 * an indentation, but can be, for tagged lists or columns, a 107 * small set of values. 108 * 109 * The following unsigned-signed subtractions look strange, 110 * but they are actually correct. If the int p->overstep 111 * is negative, it gets sign extended. Subtracting that 112 * very large size_t effectively adds a small number to dv. 113 */ 114 rmargin = p->rmargin > p->offset ? p->rmargin : p->offset; 115 dv = p->rmargin - p->offset; 116 maxvis = (int)dv > p->overstep ? dv - (size_t)p->overstep : 0; 117 118 if (p->flags & TERMP_NOBREAK) { 119 dv = p->maxrmargin > p->offset ? 120 p->maxrmargin - p->offset : 0; 121 bp = (int)dv > p->overstep ? 122 dv - (size_t)p->overstep : 0; 123 } else 124 bp = maxvis; 125 126 /* 127 * Calculate the required amount of padding. 128 */ 129 vbl = p->offset + p->overstep > p->viscol ? 130 p->offset + p->overstep - p->viscol : 0; 131 132 vis = vend = 0; 133 i = 0; 134 135 while (i < p->col) { 136 /* 137 * Handle literal tab characters: collapse all 138 * subsequent tabs into a single huge set of spaces. 139 */ 140 ntab = 0; 141 while (i < p->col && '\t' == p->buf[i]) { 142 vend = (vis / p->tabwidth + 1) * p->tabwidth; 143 vbl += vend - vis; 144 vis = vend; 145 ntab++; 146 i++; 147 } 148 149 /* 150 * Count up visible word characters. Control sequences 151 * (starting with the CSI) aren't counted. A space 152 * generates a non-printing word, which is valid (the 153 * space is printed according to regular spacing rules). 154 */ 155 156 for (j = i, jhy = 0; j < p->col; j++) { 157 if (' ' == p->buf[j] || '\t' == p->buf[j]) 158 break; 159 160 /* Back over the the last printed character. */ 161 if (8 == p->buf[j]) { 162 assert(j); 163 vend -= (*p->width)(p, p->buf[j - 1]); 164 continue; 165 } 166 167 /* Regular word. */ 168 /* Break at the hyphen point if we overrun. */ 169 if (vend > vis && vend < bp && 170 (ASCII_HYPH == p->buf[j] || 171 ASCII_BREAK == p->buf[j])) 172 jhy = j; 173 174 /* 175 * Hyphenation now decided, put back a real 176 * hyphen such that we get the correct width. 177 */ 178 if (ASCII_HYPH == p->buf[j]) 179 p->buf[j] = '-'; 180 181 vend += (*p->width)(p, p->buf[j]); 182 } 183 184 /* 185 * Find out whether we would exceed the right margin. 186 * If so, break to the next line. 187 */ 188 if (vend > bp && 0 == jhy && vis > 0) { 189 vend -= vis; 190 (*p->endline)(p); 191 p->viscol = 0; 192 if (TERMP_BRIND & p->flags) { 193 vbl = rmargin; 194 vend += rmargin - p->offset; 195 } else 196 vbl = p->offset; 197 198 /* use pending tabs on the new line */ 199 200 if (0 < ntab) 201 vbl += ntab * p->tabwidth; 202 203 /* 204 * Remove the p->overstep width. 205 * Again, if p->overstep is negative, 206 * sign extension does the right thing. 207 */ 208 209 bp += (size_t)p->overstep; 210 p->overstep = 0; 211 } 212 213 /* Write out the [remaining] word. */ 214 for ( ; i < p->col; i++) { 215 if (vend > bp && jhy > 0 && i > jhy) 216 break; 217 if ('\t' == p->buf[i]) 218 break; 219 if (' ' == p->buf[i]) { 220 j = i; 221 while (i < p->col && ' ' == p->buf[i]) 222 i++; 223 dv = (i - j) * (*p->width)(p, ' '); 224 vbl += dv; 225 vend += dv; 226 break; 227 } 228 if (ASCII_NBRSP == p->buf[i]) { 229 vbl += (*p->width)(p, ' '); 230 continue; 231 } 232 if (ASCII_BREAK == p->buf[i]) 233 continue; 234 235 /* 236 * Now we definitely know there will be 237 * printable characters to output, 238 * so write preceding white space now. 239 */ 240 if (vbl) { 241 (*p->advance)(p, vbl); 242 p->viscol += vbl; 243 vbl = 0; 244 } 245 246 (*p->letter)(p, p->buf[i]); 247 if (8 == p->buf[i]) 248 p->viscol -= (*p->width)(p, p->buf[i-1]); 249 else 250 p->viscol += (*p->width)(p, p->buf[i]); 251 } 252 vis = vend; 253 } 254 255 /* 256 * If there was trailing white space, it was not printed; 257 * so reset the cursor position accordingly. 258 */ 259 if (vis > vbl) 260 vis -= vbl; 261 else 262 vis = 0; 263 264 p->col = 0; 265 p->overstep = 0; 266 267 if ( ! (TERMP_NOBREAK & p->flags)) { 268 p->viscol = 0; 269 (*p->endline)(p); 270 return; 271 } 272 273 if (TERMP_HANG & p->flags) { 274 p->overstep = (int)(vis - maxvis + 275 p->trailspace * (*p->width)(p, ' ')); 276 277 /* 278 * If we have overstepped the margin, temporarily move 279 * it to the right and flag the rest of the line to be 280 * shorter. 281 * If there is a request to keep the columns together, 282 * allow negative overstep when the column is not full. 283 */ 284 if (p->trailspace && p->overstep < 0) 285 p->overstep = 0; 286 return; 287 288 } else if (TERMP_DANGLE & p->flags) 289 return; 290 291 /* If the column was overrun, break the line. */ 292 if (maxvis < vis + p->trailspace * (*p->width)(p, ' ')) { 293 (*p->endline)(p); 294 p->viscol = 0; 295 } 296 } 297 298 /* 299 * A newline only breaks an existing line; it won't assert vertical 300 * space. All data in the output buffer is flushed prior to the newline 301 * assertion. 302 */ 303 void 304 term_newln(struct termp *p) 305 { 306 307 p->flags |= TERMP_NOSPACE; 308 if (p->col || p->viscol) 309 term_flushln(p); 310 } 311 312 /* 313 * Asserts a vertical space (a full, empty line-break between lines). 314 * Note that if used twice, this will cause two blank spaces and so on. 315 * All data in the output buffer is flushed prior to the newline 316 * assertion. 317 */ 318 void 319 term_vspace(struct termp *p) 320 { 321 322 term_newln(p); 323 p->viscol = 0; 324 if (0 < p->skipvsp) 325 p->skipvsp--; 326 else 327 (*p->endline)(p); 328 } 329 330 void 331 term_fontlast(struct termp *p) 332 { 333 enum termfont f; 334 335 f = p->fontl; 336 p->fontl = p->fontq[p->fonti]; 337 p->fontq[p->fonti] = f; 338 } 339 340 void 341 term_fontrepl(struct termp *p, enum termfont f) 342 { 343 344 p->fontl = p->fontq[p->fonti]; 345 p->fontq[p->fonti] = f; 346 } 347 348 void 349 term_fontpush(struct termp *p, enum termfont f) 350 { 351 352 assert(p->fonti + 1 < 10); 353 p->fontl = p->fontq[p->fonti]; 354 p->fontq[++p->fonti] = f; 355 } 356 357 const void * 358 term_fontq(struct termp *p) 359 { 360 361 return(&p->fontq[p->fonti]); 362 } 363 364 enum termfont 365 term_fonttop(struct termp *p) 366 { 367 368 return(p->fontq[p->fonti]); 369 } 370 371 void 372 term_fontpopq(struct termp *p, const void *key) 373 { 374 375 while (p->fonti >= 0 && key < (void *)(p->fontq + p->fonti)) 376 p->fonti--; 377 assert(p->fonti >= 0); 378 } 379 380 void 381 term_fontpop(struct termp *p) 382 { 383 384 assert(p->fonti); 385 p->fonti--; 386 } 387 388 /* 389 * Handle pwords, partial words, which may be either a single word or a 390 * phrase that cannot be broken down (such as a literal string). This 391 * handles word styling. 392 */ 393 void 394 term_word(struct termp *p, const char *word) 395 { 396 const char nbrsp[2] = { ASCII_NBRSP, 0 }; 397 const char *seq, *cp; 398 int sz, uc; 399 size_t ssz; 400 enum mandoc_esc esc; 401 402 if ( ! (TERMP_NOSPACE & p->flags)) { 403 if ( ! (TERMP_KEEP & p->flags)) { 404 bufferc(p, ' '); 405 if (TERMP_SENTENCE & p->flags) 406 bufferc(p, ' '); 407 } else 408 bufferc(p, ASCII_NBRSP); 409 } 410 if (TERMP_PREKEEP & p->flags) 411 p->flags |= TERMP_KEEP; 412 413 if ( ! (p->flags & TERMP_NONOSPACE)) 414 p->flags &= ~TERMP_NOSPACE; 415 else 416 p->flags |= TERMP_NOSPACE; 417 418 p->flags &= ~TERMP_SENTENCE; 419 420 while ('\0' != *word) { 421 if ('\\' != *word) { 422 if (TERMP_SKIPCHAR & p->flags) { 423 p->flags &= ~TERMP_SKIPCHAR; 424 word++; 425 continue; 426 } 427 if (TERMP_NBRWORD & p->flags) { 428 if (' ' == *word) { 429 encode(p, nbrsp, 1); 430 word++; 431 continue; 432 } 433 ssz = strcspn(word, "\\ "); 434 } else 435 ssz = strcspn(word, "\\"); 436 encode(p, word, ssz); 437 word += (int)ssz; 438 continue; 439 } 440 441 word++; 442 esc = mandoc_escape(&word, &seq, &sz); 443 if (ESCAPE_ERROR == esc) 444 continue; 445 446 switch (esc) { 447 case ESCAPE_UNICODE: 448 uc = mchars_num2uc(seq + 1, sz - 1); 449 break; 450 case ESCAPE_NUMBERED: 451 uc = mchars_num2char(seq, sz); 452 if (uc < 0) 453 continue; 454 break; 455 case ESCAPE_SPECIAL: 456 if (p->enc == TERMENC_ASCII) { 457 cp = mchars_spec2str(p->symtab, 458 seq, sz, &ssz); 459 if (cp != NULL) 460 encode(p, cp, ssz); 461 } else { 462 uc = mchars_spec2cp(p->symtab, seq, sz); 463 if (uc > 0) 464 encode1(p, uc); 465 } 466 continue; 467 case ESCAPE_FONTBOLD: 468 term_fontrepl(p, TERMFONT_BOLD); 469 continue; 470 case ESCAPE_FONTITALIC: 471 term_fontrepl(p, TERMFONT_UNDER); 472 continue; 473 case ESCAPE_FONTBI: 474 term_fontrepl(p, TERMFONT_BI); 475 continue; 476 case ESCAPE_FONT: 477 /* FALLTHROUGH */ 478 case ESCAPE_FONTROMAN: 479 term_fontrepl(p, TERMFONT_NONE); 480 continue; 481 case ESCAPE_FONTPREV: 482 term_fontlast(p); 483 continue; 484 case ESCAPE_NOSPACE: 485 if (TERMP_SKIPCHAR & p->flags) 486 p->flags &= ~TERMP_SKIPCHAR; 487 else if ('\0' == *word) 488 p->flags |= TERMP_NOSPACE; 489 continue; 490 case ESCAPE_SKIPCHAR: 491 p->flags |= TERMP_SKIPCHAR; 492 continue; 493 default: 494 continue; 495 } 496 497 /* 498 * Common handling for Unicode and numbered 499 * character escape sequences. 500 */ 501 502 if (p->enc == TERMENC_ASCII) { 503 cp = ascii_uc2str(uc); 504 encode(p, cp, strlen(cp)); 505 } else { 506 if ((uc < 0x20 && uc != 0x09) || 507 (uc > 0x7E && uc < 0xA0)) 508 uc = 0xFFFD; 509 encode1(p, uc); 510 } 511 } 512 p->flags &= ~TERMP_NBRWORD; 513 } 514 515 static void 516 adjbuf(struct termp *p, size_t sz) 517 { 518 519 if (0 == p->maxcols) 520 p->maxcols = 1024; 521 while (sz >= p->maxcols) 522 p->maxcols <<= 2; 523 524 p->buf = mandoc_reallocarray(p->buf, p->maxcols, sizeof(int)); 525 } 526 527 static void 528 bufferc(struct termp *p, char c) 529 { 530 531 if (p->col + 1 >= p->maxcols) 532 adjbuf(p, p->col + 1); 533 534 p->buf[p->col++] = c; 535 } 536 537 /* 538 * See encode(). 539 * Do this for a single (probably unicode) value. 540 * Does not check for non-decorated glyphs. 541 */ 542 static void 543 encode1(struct termp *p, int c) 544 { 545 enum termfont f; 546 547 if (TERMP_SKIPCHAR & p->flags) { 548 p->flags &= ~TERMP_SKIPCHAR; 549 return; 550 } 551 552 if (p->col + 6 >= p->maxcols) 553 adjbuf(p, p->col + 6); 554 555 f = term_fonttop(p); 556 557 if (TERMFONT_UNDER == f || TERMFONT_BI == f) { 558 p->buf[p->col++] = '_'; 559 p->buf[p->col++] = 8; 560 } 561 if (TERMFONT_BOLD == f || TERMFONT_BI == f) { 562 if (ASCII_HYPH == c) 563 p->buf[p->col++] = '-'; 564 else 565 p->buf[p->col++] = c; 566 p->buf[p->col++] = 8; 567 } 568 p->buf[p->col++] = c; 569 } 570 571 static void 572 encode(struct termp *p, const char *word, size_t sz) 573 { 574 size_t i; 575 576 if (TERMP_SKIPCHAR & p->flags) { 577 p->flags &= ~TERMP_SKIPCHAR; 578 return; 579 } 580 581 /* 582 * Encode and buffer a string of characters. If the current 583 * font mode is unset, buffer directly, else encode then buffer 584 * character by character. 585 */ 586 587 if (TERMFONT_NONE == term_fonttop(p)) { 588 if (p->col + sz >= p->maxcols) 589 adjbuf(p, p->col + sz); 590 for (i = 0; i < sz; i++) 591 p->buf[p->col++] = word[i]; 592 return; 593 } 594 595 /* Pre-buffer, assuming worst-case. */ 596 597 if (p->col + 1 + (sz * 5) >= p->maxcols) 598 adjbuf(p, p->col + 1 + (sz * 5)); 599 600 for (i = 0; i < sz; i++) { 601 if (ASCII_HYPH == word[i] || 602 isgraph((unsigned char)word[i])) 603 encode1(p, word[i]); 604 else 605 p->buf[p->col++] = word[i]; 606 } 607 } 608 609 void 610 term_setwidth(struct termp *p, const char *wstr) 611 { 612 struct roffsu su; 613 size_t width; 614 int iop; 615 616 iop = 0; 617 width = 0; 618 if (NULL != wstr) { 619 switch (*wstr) { 620 case '+': 621 iop = 1; 622 wstr++; 623 break; 624 case '-': 625 iop = -1; 626 wstr++; 627 break; 628 default: 629 break; 630 } 631 if (a2roffsu(wstr, &su, SCALE_MAX)) 632 width = term_hspan(p, &su); 633 else 634 iop = 0; 635 } 636 (*p->setwidth)(p, iop, width); 637 } 638 639 size_t 640 term_len(const struct termp *p, size_t sz) 641 { 642 643 return((*p->width)(p, ' ') * sz); 644 } 645 646 static size_t 647 cond_width(const struct termp *p, int c, int *skip) 648 { 649 650 if (*skip) { 651 (*skip) = 0; 652 return(0); 653 } else 654 return((*p->width)(p, c)); 655 } 656 657 size_t 658 term_strlen(const struct termp *p, const char *cp) 659 { 660 size_t sz, rsz, i; 661 int ssz, skip, uc; 662 const char *seq, *rhs; 663 enum mandoc_esc esc; 664 static const char rej[] = { '\\', ASCII_NBRSP, ASCII_HYPH, 665 ASCII_BREAK, '\0' }; 666 667 /* 668 * Account for escaped sequences within string length 669 * calculations. This follows the logic in term_word() as we 670 * must calculate the width of produced strings. 671 */ 672 673 sz = 0; 674 skip = 0; 675 while ('\0' != *cp) { 676 rsz = strcspn(cp, rej); 677 for (i = 0; i < rsz; i++) 678 sz += cond_width(p, *cp++, &skip); 679 680 switch (*cp) { 681 case '\\': 682 cp++; 683 esc = mandoc_escape(&cp, &seq, &ssz); 684 if (ESCAPE_ERROR == esc) 685 continue; 686 687 rhs = NULL; 688 689 switch (esc) { 690 case ESCAPE_UNICODE: 691 uc = mchars_num2uc(seq + 1, ssz - 1); 692 break; 693 case ESCAPE_NUMBERED: 694 uc = mchars_num2char(seq, ssz); 695 if (uc < 0) 696 continue; 697 break; 698 case ESCAPE_SPECIAL: 699 if (p->enc == TERMENC_ASCII) { 700 rhs = mchars_spec2str(p->symtab, 701 seq, ssz, &rsz); 702 if (rhs != NULL) 703 break; 704 } else { 705 uc = mchars_spec2cp(p->symtab, 706 seq, ssz); 707 if (uc > 0) 708 sz += cond_width(p, uc, &skip); 709 } 710 continue; 711 case ESCAPE_SKIPCHAR: 712 skip = 1; 713 continue; 714 default: 715 continue; 716 } 717 718 /* 719 * Common handling for Unicode and numbered 720 * character escape sequences. 721 */ 722 723 if (rhs == NULL) { 724 if (p->enc == TERMENC_ASCII) { 725 rhs = ascii_uc2str(uc); 726 rsz = strlen(rhs); 727 } else { 728 if ((uc < 0x20 && uc != 0x09) || 729 (uc > 0x7E && uc < 0xA0)) 730 uc = 0xFFFD; 731 sz += cond_width(p, uc, &skip); 732 continue; 733 } 734 } 735 736 if (skip) { 737 skip = 0; 738 break; 739 } 740 741 /* 742 * Common handling for all escape sequences 743 * printing more than one character. 744 */ 745 746 for (i = 0; i < rsz; i++) 747 sz += (*p->width)(p, *rhs++); 748 break; 749 case ASCII_NBRSP: 750 sz += cond_width(p, ' ', &skip); 751 cp++; 752 break; 753 case ASCII_HYPH: 754 sz += cond_width(p, '-', &skip); 755 cp++; 756 /* FALLTHROUGH */ 757 case ASCII_BREAK: 758 break; 759 default: 760 break; 761 } 762 } 763 764 return(sz); 765 } 766 767 size_t 768 term_vspan(const struct termp *p, const struct roffsu *su) 769 { 770 double r; 771 772 switch (su->unit) { 773 case SCALE_CM: 774 r = su->scale * 2.0; 775 break; 776 case SCALE_IN: 777 r = su->scale * 6.0; 778 break; 779 case SCALE_PC: 780 r = su->scale; 781 break; 782 case SCALE_PT: 783 r = su->scale / 8.0; 784 break; 785 case SCALE_MM: 786 r = su->scale / 1000.0; 787 break; 788 case SCALE_VS: 789 r = su->scale; 790 break; 791 default: 792 r = su->scale - 1.0; 793 break; 794 } 795 796 if (r < 0.0) 797 r = 0.0; 798 return((size_t)(r + 0.0005)); 799 } 800 801 size_t 802 term_hspan(const struct termp *p, const struct roffsu *su) 803 { 804 double v; 805 806 v = (*p->hspan)(p, su); 807 if (v < 0.0) 808 v = 0.0; 809 return((size_t)(v + 0.0005)); 810 } 811