1 /* $Id: term.c,v 1.197 2011/05/24 21:31:23 kristaps Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #ifdef HAVE_CONFIG_H 19 #include "config.h" 20 #endif 21 22 #include <sys/types.h> 23 24 #include <assert.h> 25 #include <ctype.h> 26 #include <stdint.h> 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <string.h> 30 31 #include "mandoc.h" 32 #include "out.h" 33 #include "term.h" 34 #include "main.h" 35 36 static void adjbuf(struct termp *p, int); 37 static void bufferc(struct termp *, char); 38 static void encode(struct termp *, const char *, size_t); 39 static void encode1(struct termp *, int); 40 41 void 42 term_free(struct termp *p) 43 { 44 45 if (p->buf) 46 free(p->buf); 47 if (p->symtab) 48 mchars_free(p->symtab); 49 50 free(p); 51 } 52 53 54 void 55 term_begin(struct termp *p, term_margin head, 56 term_margin foot, const void *arg) 57 { 58 59 p->headf = head; 60 p->footf = foot; 61 p->argf = arg; 62 (*p->begin)(p); 63 } 64 65 66 void 67 term_end(struct termp *p) 68 { 69 70 (*p->end)(p); 71 } 72 73 /* 74 * Flush a line of text. A "line" is loosely defined as being something 75 * that should be followed by a newline, regardless of whether it's 76 * broken apart by newlines getting there. A line can also be a 77 * fragment of a columnar list (`Bl -tag' or `Bl -column'), which does 78 * not have a trailing newline. 79 * 80 * The following flags may be specified: 81 * 82 * - TERMP_NOLPAD: when beginning to write the line, don't left-pad the 83 * offset value. This is useful when doing columnar lists where the 84 * prior column has right-padded. 85 * 86 * - TERMP_NOBREAK: this is the most important and is used when making 87 * columns. In short: don't print a newline and instead pad to the 88 * right margin. Used in conjunction with TERMP_NOLPAD. 89 * 90 * - TERMP_TWOSPACE: when padding, make sure there are at least two 91 * space characters of padding. Otherwise, rather break the line. 92 * 93 * - TERMP_DANGLE: don't newline when TERMP_NOBREAK is specified and 94 * the line is overrun, and don't pad-right if it's underrun. 95 * 96 * - TERMP_HANG: like TERMP_DANGLE, but doesn't newline when 97 * overruning, instead save the position and continue at that point 98 * when the next invocation. 99 * 100 * In-line line breaking: 101 * 102 * If TERMP_NOBREAK is specified and the line overruns the right 103 * margin, it will break and pad-right to the right margin after 104 * writing. If maxrmargin is violated, it will break and continue 105 * writing from the right-margin, which will lead to the above scenario 106 * upon exit. Otherwise, the line will break at the right margin. 107 */ 108 void 109 term_flushln(struct termp *p) 110 { 111 int i; /* current input position in p->buf */ 112 size_t vis; /* current visual position on output */ 113 size_t vbl; /* number of blanks to prepend to output */ 114 size_t vend; /* end of word visual position on output */ 115 size_t bp; /* visual right border position */ 116 size_t dv; /* temporary for visual pos calculations */ 117 int j; /* temporary loop index for p->buf */ 118 int jhy; /* last hyph before overflow w/r/t j */ 119 size_t maxvis; /* output position of visible boundary */ 120 size_t mmax; /* used in calculating bp */ 121 122 /* 123 * First, establish the maximum columns of "visible" content. 124 * This is usually the difference between the right-margin and 125 * an indentation, but can be, for tagged lists or columns, a 126 * small set of values. 127 */ 128 assert (p->rmargin >= p->offset); 129 dv = p->rmargin - p->offset; 130 maxvis = (int)dv > p->overstep ? dv - (size_t)p->overstep : 0; 131 dv = p->maxrmargin - p->offset; 132 mmax = (int)dv > p->overstep ? dv - (size_t)p->overstep : 0; 133 134 bp = TERMP_NOBREAK & p->flags ? mmax : maxvis; 135 136 /* 137 * Indent the first line of a paragraph. 138 */ 139 vbl = p->flags & TERMP_NOLPAD ? (size_t)0 : p->offset; 140 141 vis = vend = 0; 142 i = 0; 143 144 while (i < p->col) { 145 /* 146 * Handle literal tab characters: collapse all 147 * subsequent tabs into a single huge set of spaces. 148 */ 149 while (i < p->col && '\t' == p->buf[i]) { 150 vend = (vis / p->tabwidth + 1) * p->tabwidth; 151 vbl += vend - vis; 152 vis = vend; 153 i++; 154 } 155 156 /* 157 * Count up visible word characters. Control sequences 158 * (starting with the CSI) aren't counted. A space 159 * generates a non-printing word, which is valid (the 160 * space is printed according to regular spacing rules). 161 */ 162 163 for (j = i, jhy = 0; j < p->col; j++) { 164 if ((j && ' ' == p->buf[j]) || '\t' == p->buf[j]) 165 break; 166 167 /* Back over the the last printed character. */ 168 if (8 == p->buf[j]) { 169 assert(j); 170 vend -= (*p->width)(p, p->buf[j - 1]); 171 continue; 172 } 173 174 /* Regular word. */ 175 /* Break at the hyphen point if we overrun. */ 176 if (vend > vis && vend < bp && 177 ASCII_HYPH == p->buf[j]) 178 jhy = j; 179 180 vend += (*p->width)(p, p->buf[j]); 181 } 182 183 /* 184 * Find out whether we would exceed the right margin. 185 * If so, break to the next line. 186 */ 187 if (vend > bp && 0 == jhy && vis > 0) { 188 vend -= vis; 189 (*p->endline)(p); 190 if (TERMP_NOBREAK & p->flags) { 191 p->viscol = p->rmargin; 192 (*p->advance)(p, p->rmargin); 193 vend += p->rmargin - p->offset; 194 } else { 195 p->viscol = 0; 196 vbl = p->offset; 197 } 198 199 /* Remove the p->overstep width. */ 200 201 bp += (size_t)p->overstep; 202 p->overstep = 0; 203 } 204 205 /* Write out the [remaining] word. */ 206 for ( ; i < p->col; i++) { 207 if (vend > bp && jhy > 0 && i > jhy) 208 break; 209 if ('\t' == p->buf[i]) 210 break; 211 if (' ' == p->buf[i]) { 212 j = i; 213 while (' ' == p->buf[i]) 214 i++; 215 dv = (size_t)(i - j) * (*p->width)(p, ' '); 216 vbl += dv; 217 vend += dv; 218 break; 219 } 220 if (ASCII_NBRSP == p->buf[i]) { 221 vbl += (*p->width)(p, ' '); 222 continue; 223 } 224 225 /* 226 * Now we definitely know there will be 227 * printable characters to output, 228 * so write preceding white space now. 229 */ 230 if (vbl) { 231 (*p->advance)(p, vbl); 232 p->viscol += vbl; 233 vbl = 0; 234 } 235 236 if (ASCII_HYPH == p->buf[i]) { 237 (*p->letter)(p, '-'); 238 p->viscol += (*p->width)(p, '-'); 239 } else { 240 (*p->letter)(p, p->buf[i]); 241 p->viscol += (*p->width)(p, p->buf[i]); 242 } 243 } 244 vis = vend; 245 } 246 247 /* 248 * If there was trailing white space, it was not printed; 249 * so reset the cursor position accordingly. 250 */ 251 vis -= vbl; 252 253 p->col = 0; 254 p->overstep = 0; 255 256 if ( ! (TERMP_NOBREAK & p->flags)) { 257 p->viscol = 0; 258 (*p->endline)(p); 259 return; 260 } 261 262 if (TERMP_HANG & p->flags) { 263 /* We need one blank after the tag. */ 264 p->overstep = (int)(vis - maxvis + (*p->width)(p, ' ')); 265 266 /* 267 * Behave exactly the same way as groff: 268 * If we have overstepped the margin, temporarily move 269 * it to the right and flag the rest of the line to be 270 * shorter. 271 * If we landed right at the margin, be happy. 272 * If we are one step before the margin, temporarily 273 * move it one step LEFT and flag the rest of the line 274 * to be longer. 275 */ 276 if (p->overstep >= -1) { 277 assert((int)maxvis + p->overstep >= 0); 278 maxvis += (size_t)p->overstep; 279 } else 280 p->overstep = 0; 281 282 } else if (TERMP_DANGLE & p->flags) 283 return; 284 285 /* Right-pad. */ 286 if (maxvis > vis + 287 ((TERMP_TWOSPACE & p->flags) ? (*p->width)(p, ' ') : 0)) { 288 p->viscol += maxvis - vis; 289 (*p->advance)(p, maxvis - vis); 290 vis += (maxvis - vis); 291 } else { /* ...or newline break. */ 292 (*p->endline)(p); 293 p->viscol = p->rmargin; 294 (*p->advance)(p, p->rmargin); 295 } 296 } 297 298 299 /* 300 * A newline only breaks an existing line; it won't assert vertical 301 * space. All data in the output buffer is flushed prior to the newline 302 * assertion. 303 */ 304 void 305 term_newln(struct termp *p) 306 { 307 308 p->flags |= TERMP_NOSPACE; 309 if (0 == p->col && 0 == p->viscol) { 310 p->flags &= ~TERMP_NOLPAD; 311 return; 312 } 313 term_flushln(p); 314 p->flags &= ~TERMP_NOLPAD; 315 } 316 317 318 /* 319 * Asserts a vertical space (a full, empty line-break between lines). 320 * Note that if used twice, this will cause two blank spaces and so on. 321 * All data in the output buffer is flushed prior to the newline 322 * assertion. 323 */ 324 void 325 term_vspace(struct termp *p) 326 { 327 328 term_newln(p); 329 p->viscol = 0; 330 (*p->endline)(p); 331 } 332 333 void 334 term_fontlast(struct termp *p) 335 { 336 enum termfont f; 337 338 f = p->fontl; 339 p->fontl = p->fontq[p->fonti]; 340 p->fontq[p->fonti] = f; 341 } 342 343 344 void 345 term_fontrepl(struct termp *p, enum termfont f) 346 { 347 348 p->fontl = p->fontq[p->fonti]; 349 p->fontq[p->fonti] = f; 350 } 351 352 353 void 354 term_fontpush(struct termp *p, enum termfont f) 355 { 356 357 assert(p->fonti + 1 < 10); 358 p->fontl = p->fontq[p->fonti]; 359 p->fontq[++p->fonti] = f; 360 } 361 362 363 const void * 364 term_fontq(struct termp *p) 365 { 366 367 return(&p->fontq[p->fonti]); 368 } 369 370 371 enum termfont 372 term_fonttop(struct termp *p) 373 { 374 375 return(p->fontq[p->fonti]); 376 } 377 378 379 void 380 term_fontpopq(struct termp *p, const void *key) 381 { 382 383 while (p->fonti >= 0 && key != &p->fontq[p->fonti]) 384 p->fonti--; 385 assert(p->fonti >= 0); 386 } 387 388 389 void 390 term_fontpop(struct termp *p) 391 { 392 393 assert(p->fonti); 394 p->fonti--; 395 } 396 397 /* 398 * Handle pwords, partial words, which may be either a single word or a 399 * phrase that cannot be broken down (such as a literal string). This 400 * handles word styling. 401 */ 402 void 403 term_word(struct termp *p, const char *word) 404 { 405 const char *seq, *cp; 406 char c; 407 int sz, uc; 408 size_t ssz; 409 enum mandoc_esc esc; 410 411 if ( ! (TERMP_NOSPACE & p->flags)) { 412 if ( ! (TERMP_KEEP & p->flags)) { 413 if (TERMP_PREKEEP & p->flags) 414 p->flags |= TERMP_KEEP; 415 bufferc(p, ' '); 416 if (TERMP_SENTENCE & p->flags) 417 bufferc(p, ' '); 418 } else 419 bufferc(p, ASCII_NBRSP); 420 } 421 422 if ( ! (p->flags & TERMP_NONOSPACE)) 423 p->flags &= ~TERMP_NOSPACE; 424 else 425 p->flags |= TERMP_NOSPACE; 426 427 p->flags &= ~(TERMP_SENTENCE | TERMP_IGNDELIM); 428 429 while ('\0' != *word) { 430 if ((ssz = strcspn(word, "\\")) > 0) 431 encode(p, word, ssz); 432 433 word += (int)ssz; 434 if ('\\' != *word) 435 continue; 436 437 word++; 438 esc = mandoc_escape(&word, &seq, &sz); 439 if (ESCAPE_ERROR == esc) 440 break; 441 442 if (TERMENC_ASCII != p->enc) 443 switch (esc) { 444 case (ESCAPE_UNICODE): 445 uc = mchars_num2uc(seq + 1, sz - 1); 446 if ('\0' == uc) 447 break; 448 encode1(p, uc); 449 continue; 450 case (ESCAPE_SPECIAL): 451 uc = mchars_spec2cp(p->symtab, seq, sz); 452 if (uc <= 0) 453 break; 454 encode1(p, uc); 455 continue; 456 default: 457 break; 458 } 459 460 switch (esc) { 461 case (ESCAPE_UNICODE): 462 encode1(p, '?'); 463 break; 464 case (ESCAPE_NUMBERED): 465 c = mchars_num2char(seq, sz); 466 if ('\0' != c) 467 encode(p, &c, 1); 468 break; 469 case (ESCAPE_SPECIAL): 470 cp = mchars_spec2str(p->symtab, seq, sz, &ssz); 471 if (NULL != cp) 472 encode(p, cp, ssz); 473 else if (1 == ssz) 474 encode(p, seq, sz); 475 break; 476 case (ESCAPE_FONTBOLD): 477 term_fontrepl(p, TERMFONT_BOLD); 478 break; 479 case (ESCAPE_FONTITALIC): 480 term_fontrepl(p, TERMFONT_UNDER); 481 break; 482 case (ESCAPE_FONT): 483 /* FALLTHROUGH */ 484 case (ESCAPE_FONTROMAN): 485 term_fontrepl(p, TERMFONT_NONE); 486 break; 487 case (ESCAPE_FONTPREV): 488 term_fontlast(p); 489 break; 490 case (ESCAPE_NOSPACE): 491 if ('\0' == *word) 492 p->flags |= TERMP_NOSPACE; 493 break; 494 default: 495 break; 496 } 497 } 498 } 499 500 static void 501 adjbuf(struct termp *p, int sz) 502 { 503 504 if (0 == p->maxcols) 505 p->maxcols = 1024; 506 while (sz >= p->maxcols) 507 p->maxcols <<= 2; 508 509 p->buf = mandoc_realloc 510 (p->buf, sizeof(int) * (size_t)p->maxcols); 511 } 512 513 static void 514 bufferc(struct termp *p, char c) 515 { 516 517 if (p->col + 1 >= p->maxcols) 518 adjbuf(p, p->col + 1); 519 520 p->buf[p->col++] = c; 521 } 522 523 /* 524 * See encode(). 525 * Do this for a single (probably unicode) value. 526 * Does not check for non-decorated glyphs. 527 */ 528 static void 529 encode1(struct termp *p, int c) 530 { 531 enum termfont f; 532 533 if (p->col + 4 >= p->maxcols) 534 adjbuf(p, p->col + 4); 535 536 f = term_fonttop(p); 537 538 if (TERMFONT_NONE == f) { 539 p->buf[p->col++] = c; 540 return; 541 } else if (TERMFONT_UNDER == f) { 542 p->buf[p->col++] = '_'; 543 } else 544 p->buf[p->col++] = c; 545 546 p->buf[p->col++] = 8; 547 p->buf[p->col++] = c; 548 } 549 550 static void 551 encode(struct termp *p, const char *word, size_t sz) 552 { 553 enum termfont f; 554 int i, len; 555 556 /* LINTED */ 557 len = sz; 558 559 /* 560 * Encode and buffer a string of characters. If the current 561 * font mode is unset, buffer directly, else encode then buffer 562 * character by character. 563 */ 564 565 if (TERMFONT_NONE == (f = term_fonttop(p))) { 566 if (p->col + len >= p->maxcols) 567 adjbuf(p, p->col + len); 568 for (i = 0; i < len; i++) 569 p->buf[p->col++] = word[i]; 570 return; 571 } 572 573 /* Pre-buffer, assuming worst-case. */ 574 575 if (p->col + 1 + (len * 3) >= p->maxcols) 576 adjbuf(p, p->col + 1 + (len * 3)); 577 578 for (i = 0; i < len; i++) { 579 if ( ! isgraph((unsigned char)word[i])) { 580 p->buf[p->col++] = word[i]; 581 continue; 582 } 583 584 if (TERMFONT_UNDER == f) 585 p->buf[p->col++] = '_'; 586 else 587 p->buf[p->col++] = word[i]; 588 589 p->buf[p->col++] = 8; 590 p->buf[p->col++] = word[i]; 591 } 592 } 593 594 size_t 595 term_len(const struct termp *p, size_t sz) 596 { 597 598 return((*p->width)(p, ' ') * sz); 599 } 600 601 602 size_t 603 term_strlen(const struct termp *p, const char *cp) 604 { 605 size_t sz, rsz, i; 606 int ssz, c; 607 const char *seq, *rhs; 608 enum mandoc_esc esc; 609 static const char rej[] = { '\\', ASCII_HYPH, ASCII_NBRSP, '\0' }; 610 611 /* 612 * Account for escaped sequences within string length 613 * calculations. This follows the logic in term_word() as we 614 * must calculate the width of produced strings. 615 */ 616 617 sz = 0; 618 while ('\0' != *cp) { 619 rsz = strcspn(cp, rej); 620 for (i = 0; i < rsz; i++) 621 sz += (*p->width)(p, *cp++); 622 623 c = 0; 624 switch (*cp) { 625 case ('\\'): 626 cp++; 627 esc = mandoc_escape(&cp, &seq, &ssz); 628 if (ESCAPE_ERROR == esc) 629 return(sz); 630 631 if (TERMENC_ASCII != p->enc) 632 switch (esc) { 633 case (ESCAPE_UNICODE): 634 c = mchars_num2uc 635 (seq + 1, ssz - 1); 636 if ('\0' == c) 637 break; 638 sz += (*p->width)(p, c); 639 continue; 640 case (ESCAPE_SPECIAL): 641 c = mchars_spec2cp 642 (p->symtab, seq, ssz); 643 if (c <= 0) 644 break; 645 sz += (*p->width)(p, c); 646 continue; 647 default: 648 break; 649 } 650 651 rhs = NULL; 652 653 switch (esc) { 654 case (ESCAPE_UNICODE): 655 sz += (*p->width)(p, '?'); 656 break; 657 case (ESCAPE_NUMBERED): 658 c = mchars_num2char(seq, ssz); 659 if ('\0' != c) 660 sz += (*p->width)(p, c); 661 break; 662 case (ESCAPE_SPECIAL): 663 rhs = mchars_spec2str 664 (p->symtab, seq, ssz, &rsz); 665 666 if (ssz != 1 || rhs) 667 break; 668 669 rhs = seq; 670 rsz = ssz; 671 break; 672 default: 673 break; 674 } 675 676 if (NULL == rhs) 677 break; 678 679 for (i = 0; i < rsz; i++) 680 sz += (*p->width)(p, *rhs++); 681 break; 682 case (ASCII_NBRSP): 683 sz += (*p->width)(p, ' '); 684 cp++; 685 break; 686 case (ASCII_HYPH): 687 sz += (*p->width)(p, '-'); 688 cp++; 689 break; 690 default: 691 break; 692 } 693 } 694 695 return(sz); 696 } 697 698 /* ARGSUSED */ 699 size_t 700 term_vspan(const struct termp *p, const struct roffsu *su) 701 { 702 double r; 703 704 switch (su->unit) { 705 case (SCALE_CM): 706 r = su->scale * 2; 707 break; 708 case (SCALE_IN): 709 r = su->scale * 6; 710 break; 711 case (SCALE_PC): 712 r = su->scale; 713 break; 714 case (SCALE_PT): 715 r = su->scale / 8; 716 break; 717 case (SCALE_MM): 718 r = su->scale / 1000; 719 break; 720 case (SCALE_VS): 721 r = su->scale; 722 break; 723 default: 724 r = su->scale - 1; 725 break; 726 } 727 728 if (r < 0.0) 729 r = 0.0; 730 return(/* LINTED */(size_t) 731 r); 732 } 733 734 size_t 735 term_hspan(const struct termp *p, const struct roffsu *su) 736 { 737 double v; 738 739 v = ((*p->hspan)(p, su)); 740 if (v < 0.0) 741 v = 0.0; 742 return((size_t) /* LINTED */ 743 v); 744 } 745