1 /* $Id: term.c,v 1.59 2011/05/29 21:22:18 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <sys/types.h> 19 20 #include <assert.h> 21 #include <ctype.h> 22 #include <stdint.h> 23 #include <stdio.h> 24 #include <stdlib.h> 25 #include <string.h> 26 27 #include "mandoc.h" 28 #include "out.h" 29 #include "term.h" 30 #include "main.h" 31 32 static void adjbuf(struct termp *p, int); 33 static void bufferc(struct termp *, char); 34 static void encode(struct termp *, const char *, size_t); 35 static void encode1(struct termp *, int); 36 37 void 38 term_free(struct termp *p) 39 { 40 41 if (p->buf) 42 free(p->buf); 43 if (p->symtab) 44 mchars_free(p->symtab); 45 46 free(p); 47 } 48 49 50 void 51 term_begin(struct termp *p, term_margin head, 52 term_margin foot, const void *arg) 53 { 54 55 p->headf = head; 56 p->footf = foot; 57 p->argf = arg; 58 (*p->begin)(p); 59 } 60 61 62 void 63 term_end(struct termp *p) 64 { 65 66 (*p->end)(p); 67 } 68 69 /* 70 * Flush a line of text. A "line" is loosely defined as being something 71 * that should be followed by a newline, regardless of whether it's 72 * broken apart by newlines getting there. A line can also be a 73 * fragment of a columnar list (`Bl -tag' or `Bl -column'), which does 74 * not have a trailing newline. 75 * 76 * The following flags may be specified: 77 * 78 * - TERMP_NOLPAD: when beginning to write the line, don't left-pad the 79 * offset value. This is useful when doing columnar lists where the 80 * prior column has right-padded. 81 * 82 * - TERMP_NOBREAK: this is the most important and is used when making 83 * columns. In short: don't print a newline and instead pad to the 84 * right margin. Used in conjunction with TERMP_NOLPAD. 85 * 86 * - TERMP_TWOSPACE: when padding, make sure there are at least two 87 * space characters of padding. Otherwise, rather break the line. 88 * 89 * - TERMP_DANGLE: don't newline when TERMP_NOBREAK is specified and 90 * the line is overrun, and don't pad-right if it's underrun. 91 * 92 * - TERMP_HANG: like TERMP_DANGLE, but doesn't newline when 93 * overruning, instead save the position and continue at that point 94 * when the next invocation. 95 * 96 * In-line line breaking: 97 * 98 * If TERMP_NOBREAK is specified and the line overruns the right 99 * margin, it will break and pad-right to the right margin after 100 * writing. If maxrmargin is violated, it will break and continue 101 * writing from the right-margin, which will lead to the above scenario 102 * upon exit. Otherwise, the line will break at the right margin. 103 */ 104 void 105 term_flushln(struct termp *p) 106 { 107 int i; /* current input position in p->buf */ 108 size_t vis; /* current visual position on output */ 109 size_t vbl; /* number of blanks to prepend to output */ 110 size_t vend; /* end of word visual position on output */ 111 size_t bp; /* visual right border position */ 112 size_t dv; /* temporary for visual pos calculations */ 113 int j; /* temporary loop index for p->buf */ 114 int jhy; /* last hyph before overflow w/r/t j */ 115 size_t maxvis; /* output position of visible boundary */ 116 size_t mmax; /* used in calculating bp */ 117 118 /* 119 * First, establish the maximum columns of "visible" content. 120 * This is usually the difference between the right-margin and 121 * an indentation, but can be, for tagged lists or columns, a 122 * small set of values. 123 */ 124 assert (p->rmargin >= p->offset); 125 dv = p->rmargin - p->offset; 126 maxvis = (int)dv > p->overstep ? dv - (size_t)p->overstep : 0; 127 dv = p->maxrmargin - p->offset; 128 mmax = (int)dv > p->overstep ? dv - (size_t)p->overstep : 0; 129 130 bp = TERMP_NOBREAK & p->flags ? mmax : maxvis; 131 132 /* 133 * Indent the first line of a paragraph. 134 */ 135 vbl = p->flags & TERMP_NOLPAD ? (size_t)0 : p->offset; 136 137 vis = vend = 0; 138 i = 0; 139 140 while (i < p->col) { 141 /* 142 * Handle literal tab characters: collapse all 143 * subsequent tabs into a single huge set of spaces. 144 */ 145 while (i < p->col && '\t' == p->buf[i]) { 146 vend = (vis / p->tabwidth + 1) * p->tabwidth; 147 vbl += vend - vis; 148 vis = vend; 149 i++; 150 } 151 152 /* 153 * Count up visible word characters. Control sequences 154 * (starting with the CSI) aren't counted. A space 155 * generates a non-printing word, which is valid (the 156 * space is printed according to regular spacing rules). 157 */ 158 159 for (j = i, jhy = 0; j < p->col; j++) { 160 if ((j && ' ' == p->buf[j]) || '\t' == p->buf[j]) 161 break; 162 163 /* Back over the the last printed character. */ 164 if (8 == p->buf[j]) { 165 assert(j); 166 vend -= (*p->width)(p, p->buf[j - 1]); 167 continue; 168 } 169 170 /* Regular word. */ 171 /* Break at the hyphen point if we overrun. */ 172 if (vend > vis && vend < bp && 173 ASCII_HYPH == p->buf[j]) 174 jhy = j; 175 176 vend += (*p->width)(p, p->buf[j]); 177 } 178 179 /* 180 * Find out whether we would exceed the right margin. 181 * If so, break to the next line. 182 */ 183 if (vend > bp && 0 == jhy && vis > 0) { 184 vend -= vis; 185 (*p->endline)(p); 186 if (TERMP_NOBREAK & p->flags) { 187 p->viscol = p->rmargin; 188 (*p->advance)(p, p->rmargin); 189 vend += p->rmargin - p->offset; 190 } else { 191 p->viscol = 0; 192 vbl = p->offset; 193 } 194 195 /* Remove the p->overstep width. */ 196 197 bp += (size_t)p->overstep; 198 p->overstep = 0; 199 } 200 201 /* Write out the [remaining] word. */ 202 for ( ; i < p->col; i++) { 203 if (vend > bp && jhy > 0 && i > jhy) 204 break; 205 if ('\t' == p->buf[i]) 206 break; 207 if (' ' == p->buf[i]) { 208 j = i; 209 while (' ' == p->buf[i]) 210 i++; 211 dv = (size_t)(i - j) * (*p->width)(p, ' '); 212 vbl += dv; 213 vend += dv; 214 break; 215 } 216 if (ASCII_NBRSP == p->buf[i]) { 217 vbl += (*p->width)(p, ' '); 218 continue; 219 } 220 221 /* 222 * Now we definitely know there will be 223 * printable characters to output, 224 * so write preceding white space now. 225 */ 226 if (vbl) { 227 (*p->advance)(p, vbl); 228 p->viscol += vbl; 229 vbl = 0; 230 } 231 232 if (ASCII_HYPH == p->buf[i]) { 233 (*p->letter)(p, '-'); 234 p->viscol += (*p->width)(p, '-'); 235 } else { 236 (*p->letter)(p, p->buf[i]); 237 p->viscol += (*p->width)(p, p->buf[i]); 238 } 239 } 240 vis = vend; 241 } 242 243 /* 244 * If there was trailing white space, it was not printed; 245 * so reset the cursor position accordingly. 246 */ 247 vis -= vbl; 248 249 p->col = 0; 250 p->overstep = 0; 251 252 if ( ! (TERMP_NOBREAK & p->flags)) { 253 p->viscol = 0; 254 (*p->endline)(p); 255 return; 256 } 257 258 if (TERMP_HANG & p->flags) { 259 /* We need one blank after the tag. */ 260 p->overstep = (int)(vis - maxvis + (*p->width)(p, ' ')); 261 262 /* 263 * Behave exactly the same way as groff: 264 * If we have overstepped the margin, temporarily move 265 * it to the right and flag the rest of the line to be 266 * shorter. 267 * If we landed right at the margin, be happy. 268 * If we are one step before the margin, temporarily 269 * move it one step LEFT and flag the rest of the line 270 * to be longer. 271 */ 272 if (p->overstep >= -1) { 273 assert((int)maxvis + p->overstep >= 0); 274 maxvis += (size_t)p->overstep; 275 } else 276 p->overstep = 0; 277 278 } else if (TERMP_DANGLE & p->flags) 279 return; 280 281 /* Right-pad. */ 282 if (maxvis > vis + 283 ((TERMP_TWOSPACE & p->flags) ? (*p->width)(p, ' ') : 0)) { 284 p->viscol += maxvis - vis; 285 (*p->advance)(p, maxvis - vis); 286 vis += (maxvis - vis); 287 } else { /* ...or newline break. */ 288 (*p->endline)(p); 289 p->viscol = p->rmargin; 290 (*p->advance)(p, p->rmargin); 291 } 292 } 293 294 295 /* 296 * A newline only breaks an existing line; it won't assert vertical 297 * space. All data in the output buffer is flushed prior to the newline 298 * assertion. 299 */ 300 void 301 term_newln(struct termp *p) 302 { 303 304 p->flags |= TERMP_NOSPACE; 305 if (0 == p->col && 0 == p->viscol) { 306 p->flags &= ~TERMP_NOLPAD; 307 return; 308 } 309 term_flushln(p); 310 p->flags &= ~TERMP_NOLPAD; 311 } 312 313 314 /* 315 * Asserts a vertical space (a full, empty line-break between lines). 316 * Note that if used twice, this will cause two blank spaces and so on. 317 * All data in the output buffer is flushed prior to the newline 318 * assertion. 319 */ 320 void 321 term_vspace(struct termp *p) 322 { 323 324 term_newln(p); 325 p->viscol = 0; 326 (*p->endline)(p); 327 } 328 329 void 330 term_fontlast(struct termp *p) 331 { 332 enum termfont f; 333 334 f = p->fontl; 335 p->fontl = p->fontq[p->fonti]; 336 p->fontq[p->fonti] = f; 337 } 338 339 340 void 341 term_fontrepl(struct termp *p, enum termfont f) 342 { 343 344 p->fontl = p->fontq[p->fonti]; 345 p->fontq[p->fonti] = f; 346 } 347 348 349 void 350 term_fontpush(struct termp *p, enum termfont f) 351 { 352 353 assert(p->fonti + 1 < 10); 354 p->fontl = p->fontq[p->fonti]; 355 p->fontq[++p->fonti] = f; 356 } 357 358 359 const void * 360 term_fontq(struct termp *p) 361 { 362 363 return(&p->fontq[p->fonti]); 364 } 365 366 367 enum termfont 368 term_fonttop(struct termp *p) 369 { 370 371 return(p->fontq[p->fonti]); 372 } 373 374 375 void 376 term_fontpopq(struct termp *p, const void *key) 377 { 378 379 while (p->fonti >= 0 && key != &p->fontq[p->fonti]) 380 p->fonti--; 381 assert(p->fonti >= 0); 382 } 383 384 385 void 386 term_fontpop(struct termp *p) 387 { 388 389 assert(p->fonti); 390 p->fonti--; 391 } 392 393 /* 394 * Handle pwords, partial words, which may be either a single word or a 395 * phrase that cannot be broken down (such as a literal string). This 396 * handles word styling. 397 */ 398 void 399 term_word(struct termp *p, const char *word) 400 { 401 const char *seq, *cp; 402 char c; 403 int sz, uc; 404 size_t ssz; 405 enum mandoc_esc esc; 406 407 if ( ! (TERMP_NOSPACE & p->flags)) { 408 if ( ! (TERMP_KEEP & p->flags)) { 409 if (TERMP_PREKEEP & p->flags) 410 p->flags |= TERMP_KEEP; 411 bufferc(p, ' '); 412 if (TERMP_SENTENCE & p->flags) 413 bufferc(p, ' '); 414 } else 415 bufferc(p, ASCII_NBRSP); 416 } 417 418 if ( ! (p->flags & TERMP_NONOSPACE)) 419 p->flags &= ~TERMP_NOSPACE; 420 else 421 p->flags |= TERMP_NOSPACE; 422 423 p->flags &= ~(TERMP_SENTENCE | TERMP_IGNDELIM); 424 425 while ('\0' != *word) { 426 if ((ssz = strcspn(word, "\\")) > 0) 427 encode(p, word, ssz); 428 429 word += (int)ssz; 430 if ('\\' != *word) 431 continue; 432 433 word++; 434 esc = mandoc_escape(&word, &seq, &sz); 435 if (ESCAPE_ERROR == esc) 436 break; 437 438 if (TERMENC_ASCII != p->enc) 439 switch (esc) { 440 case (ESCAPE_UNICODE): 441 uc = mchars_num2uc(seq + 1, sz - 1); 442 if ('\0' == uc) 443 break; 444 encode1(p, uc); 445 continue; 446 case (ESCAPE_SPECIAL): 447 uc = mchars_spec2cp(p->symtab, seq, sz); 448 if (uc <= 0) 449 break; 450 encode1(p, uc); 451 continue; 452 default: 453 break; 454 } 455 456 switch (esc) { 457 case (ESCAPE_UNICODE): 458 encode1(p, '?'); 459 break; 460 case (ESCAPE_NUMBERED): 461 c = mchars_num2char(seq, sz); 462 if ('\0' != c) 463 encode(p, &c, 1); 464 break; 465 case (ESCAPE_SPECIAL): 466 cp = mchars_spec2str(p->symtab, seq, sz, &ssz); 467 if (NULL != cp) 468 encode(p, cp, ssz); 469 else if (1 == ssz) 470 encode(p, seq, sz); 471 break; 472 case (ESCAPE_FONTBOLD): 473 term_fontrepl(p, TERMFONT_BOLD); 474 break; 475 case (ESCAPE_FONTITALIC): 476 term_fontrepl(p, TERMFONT_UNDER); 477 break; 478 case (ESCAPE_FONT): 479 /* FALLTHROUGH */ 480 case (ESCAPE_FONTROMAN): 481 term_fontrepl(p, TERMFONT_NONE); 482 break; 483 case (ESCAPE_FONTPREV): 484 term_fontlast(p); 485 break; 486 case (ESCAPE_NOSPACE): 487 if ('\0' == *word) 488 p->flags |= TERMP_NOSPACE; 489 break; 490 default: 491 break; 492 } 493 } 494 } 495 496 static void 497 adjbuf(struct termp *p, int sz) 498 { 499 500 if (0 == p->maxcols) 501 p->maxcols = 1024; 502 while (sz >= p->maxcols) 503 p->maxcols <<= 2; 504 505 p->buf = mandoc_realloc 506 (p->buf, sizeof(int) * (size_t)p->maxcols); 507 } 508 509 static void 510 bufferc(struct termp *p, char c) 511 { 512 513 if (p->col + 1 >= p->maxcols) 514 adjbuf(p, p->col + 1); 515 516 p->buf[p->col++] = c; 517 } 518 519 /* 520 * See encode(). 521 * Do this for a single (probably unicode) value. 522 * Does not check for non-decorated glyphs. 523 */ 524 static void 525 encode1(struct termp *p, int c) 526 { 527 enum termfont f; 528 529 if (p->col + 4 >= p->maxcols) 530 adjbuf(p, p->col + 4); 531 532 f = term_fonttop(p); 533 534 if (TERMFONT_NONE == f) { 535 p->buf[p->col++] = c; 536 return; 537 } else if (TERMFONT_UNDER == f) { 538 p->buf[p->col++] = '_'; 539 } else 540 p->buf[p->col++] = c; 541 542 p->buf[p->col++] = 8; 543 p->buf[p->col++] = c; 544 } 545 546 static void 547 encode(struct termp *p, const char *word, size_t sz) 548 { 549 enum termfont f; 550 int i, len; 551 552 /* LINTED */ 553 len = sz; 554 555 /* 556 * Encode and buffer a string of characters. If the current 557 * font mode is unset, buffer directly, else encode then buffer 558 * character by character. 559 */ 560 561 if (TERMFONT_NONE == (f = term_fonttop(p))) { 562 if (p->col + len >= p->maxcols) 563 adjbuf(p, p->col + len); 564 for (i = 0; i < len; i++) 565 p->buf[p->col++] = word[i]; 566 return; 567 } 568 569 /* Pre-buffer, assuming worst-case. */ 570 571 if (p->col + 1 + (len * 3) >= p->maxcols) 572 adjbuf(p, p->col + 1 + (len * 3)); 573 574 for (i = 0; i < len; i++) { 575 if ( ! isgraph((unsigned char)word[i])) { 576 p->buf[p->col++] = word[i]; 577 continue; 578 } 579 580 if (TERMFONT_UNDER == f) 581 p->buf[p->col++] = '_'; 582 else 583 p->buf[p->col++] = word[i]; 584 585 p->buf[p->col++] = 8; 586 p->buf[p->col++] = word[i]; 587 } 588 } 589 590 size_t 591 term_len(const struct termp *p, size_t sz) 592 { 593 594 return((*p->width)(p, ' ') * sz); 595 } 596 597 598 size_t 599 term_strlen(const struct termp *p, const char *cp) 600 { 601 size_t sz, rsz, i; 602 int ssz, c; 603 const char *seq, *rhs; 604 enum mandoc_esc esc; 605 static const char rej[] = { '\\', ASCII_HYPH, ASCII_NBRSP, '\0' }; 606 607 /* 608 * Account for escaped sequences within string length 609 * calculations. This follows the logic in term_word() as we 610 * must calculate the width of produced strings. 611 */ 612 613 sz = 0; 614 while ('\0' != *cp) { 615 rsz = strcspn(cp, rej); 616 for (i = 0; i < rsz; i++) 617 sz += (*p->width)(p, *cp++); 618 619 c = 0; 620 switch (*cp) { 621 case ('\\'): 622 cp++; 623 esc = mandoc_escape(&cp, &seq, &ssz); 624 if (ESCAPE_ERROR == esc) 625 return(sz); 626 627 if (TERMENC_ASCII != p->enc) 628 switch (esc) { 629 case (ESCAPE_UNICODE): 630 c = mchars_num2uc 631 (seq + 1, ssz - 1); 632 if ('\0' == c) 633 break; 634 sz += (*p->width)(p, c); 635 continue; 636 case (ESCAPE_SPECIAL): 637 c = mchars_spec2cp 638 (p->symtab, seq, ssz); 639 if (c <= 0) 640 break; 641 sz += (*p->width)(p, c); 642 continue; 643 default: 644 break; 645 } 646 647 rhs = NULL; 648 649 switch (esc) { 650 case (ESCAPE_UNICODE): 651 sz += (*p->width)(p, '?'); 652 break; 653 case (ESCAPE_NUMBERED): 654 c = mchars_num2char(seq, ssz); 655 if ('\0' != c) 656 sz += (*p->width)(p, c); 657 break; 658 case (ESCAPE_SPECIAL): 659 rhs = mchars_spec2str 660 (p->symtab, seq, ssz, &rsz); 661 662 if (ssz != 1 || rhs) 663 break; 664 665 rhs = seq; 666 rsz = ssz; 667 break; 668 default: 669 break; 670 } 671 672 if (NULL == rhs) 673 break; 674 675 for (i = 0; i < rsz; i++) 676 sz += (*p->width)(p, *rhs++); 677 break; 678 case (ASCII_NBRSP): 679 sz += (*p->width)(p, ' '); 680 cp++; 681 break; 682 case (ASCII_HYPH): 683 sz += (*p->width)(p, '-'); 684 cp++; 685 break; 686 default: 687 break; 688 } 689 } 690 691 return(sz); 692 } 693 694 /* ARGSUSED */ 695 size_t 696 term_vspan(const struct termp *p, const struct roffsu *su) 697 { 698 double r; 699 700 switch (su->unit) { 701 case (SCALE_CM): 702 r = su->scale * 2; 703 break; 704 case (SCALE_IN): 705 r = su->scale * 6; 706 break; 707 case (SCALE_PC): 708 r = su->scale; 709 break; 710 case (SCALE_PT): 711 r = su->scale / 8; 712 break; 713 case (SCALE_MM): 714 r = su->scale / 1000; 715 break; 716 case (SCALE_VS): 717 r = su->scale; 718 break; 719 default: 720 r = su->scale - 1; 721 break; 722 } 723 724 if (r < 0.0) 725 r = 0.0; 726 return(/* LINTED */(size_t) 727 r); 728 } 729 730 size_t 731 term_hspan(const struct termp *p, const struct roffsu *su) 732 { 733 double v; 734 735 v = ((*p->hspan)(p, su)); 736 if (v < 0.0) 737 v = 0.0; 738 return((size_t) /* LINTED */ 739 v); 740 } 741