1 /* $OpenBSD: mdoc.c,v 1.146 2016/08/20 14:43:39 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010, 2012-2016 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include <sys/types.h> 19 20 #include <assert.h> 21 #include <ctype.h> 22 #include <stdarg.h> 23 #include <stdio.h> 24 #include <stdlib.h> 25 #include <string.h> 26 #include <time.h> 27 28 #include "mandoc_aux.h" 29 #include "mandoc.h" 30 #include "roff.h" 31 #include "mdoc.h" 32 #include "libmandoc.h" 33 #include "roff_int.h" 34 #include "libmdoc.h" 35 36 const char *const __mdoc_macronames[MDOC_MAX + 1] = { 37 "text", 38 "Ap", "Dd", "Dt", "Os", 39 "Sh", "Ss", "Pp", "D1", 40 "Dl", "Bd", "Ed", "Bl", 41 "El", "It", "Ad", "An", 42 "Ar", "Cd", "Cm", "Dv", 43 "Er", "Ev", "Ex", "Fa", 44 "Fd", "Fl", "Fn", "Ft", 45 "Ic", "In", "Li", "Nd", 46 "Nm", "Op", "Ot", "Pa", 47 "Rv", "St", "Va", "Vt", 48 "Xr", "%A", "%B", "%D", 49 "%I", "%J", "%N", "%O", 50 "%P", "%R", "%T", "%V", 51 "Ac", "Ao", "Aq", "At", 52 "Bc", "Bf", "Bo", "Bq", 53 "Bsx", "Bx", "Db", "Dc", 54 "Do", "Dq", "Ec", "Ef", 55 "Em", "Eo", "Fx", "Ms", 56 "No", "Ns", "Nx", "Ox", 57 "Pc", "Pf", "Po", "Pq", 58 "Qc", "Ql", "Qo", "Qq", 59 "Re", "Rs", "Sc", "So", 60 "Sq", "Sm", "Sx", "Sy", 61 "Tn", "Ux", "Xc", "Xo", 62 "Fo", "Fc", "Oo", "Oc", 63 "Bk", "Ek", "Bt", "Hf", 64 "Fr", "Ud", "Lb", "Lp", 65 "Lk", "Mt", "Brq", "Bro", 66 "Brc", "%C", "Es", "En", 67 "Dx", "%Q", "br", "sp", 68 "%U", "Ta", "ll", 69 }; 70 71 const char *const __mdoc_argnames[MDOC_ARG_MAX] = { 72 "split", "nosplit", "ragged", 73 "unfilled", "literal", "file", 74 "offset", "bullet", "dash", 75 "hyphen", "item", "enum", 76 "tag", "diag", "hang", 77 "ohang", "inset", "column", 78 "width", "compact", "std", 79 "filled", "words", "emphasis", 80 "symbolic", "nested", "centered" 81 }; 82 83 const char * const *mdoc_macronames = __mdoc_macronames + 1; 84 const char * const *mdoc_argnames = __mdoc_argnames; 85 86 static int mdoc_ptext(struct roff_man *, int, char *, int); 87 static int mdoc_pmacro(struct roff_man *, int, char *, int); 88 89 90 /* 91 * Main parse routine. Parses a single line -- really just hands off to 92 * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()). 93 */ 94 int 95 mdoc_parseln(struct roff_man *mdoc, int ln, char *buf, int offs) 96 { 97 98 if (mdoc->last->type != ROFFT_EQN || ln > mdoc->last->line) 99 mdoc->flags |= MDOC_NEWLINE; 100 101 /* 102 * Let the roff nS register switch SYNOPSIS mode early, 103 * such that the parser knows at all times 104 * whether this mode is on or off. 105 * Note that this mode is also switched by the Sh macro. 106 */ 107 if (roff_getreg(mdoc->roff, "nS")) 108 mdoc->flags |= MDOC_SYNOPSIS; 109 else 110 mdoc->flags &= ~MDOC_SYNOPSIS; 111 112 return roff_getcontrol(mdoc->roff, buf, &offs) ? 113 mdoc_pmacro(mdoc, ln, buf, offs) : 114 mdoc_ptext(mdoc, ln, buf, offs); 115 } 116 117 void 118 mdoc_macro(MACRO_PROT_ARGS) 119 { 120 assert(tok > TOKEN_NONE && tok < MDOC_MAX); 121 122 (*mdoc_macros[tok].fp)(mdoc, tok, line, ppos, pos, buf); 123 } 124 125 void 126 mdoc_tail_alloc(struct roff_man *mdoc, int line, int pos, int tok) 127 { 128 struct roff_node *p; 129 130 p = roff_node_alloc(mdoc, line, pos, ROFFT_TAIL, tok); 131 roff_node_append(mdoc, p); 132 mdoc->next = ROFF_NEXT_CHILD; 133 } 134 135 struct roff_node * 136 mdoc_endbody_alloc(struct roff_man *mdoc, int line, int pos, int tok, 137 struct roff_node *body, enum mdoc_endbody end) 138 { 139 struct roff_node *p; 140 141 body->flags |= MDOC_ENDED; 142 body->parent->flags |= MDOC_ENDED; 143 p = roff_node_alloc(mdoc, line, pos, ROFFT_BODY, tok); 144 p->body = body; 145 p->norm = body->norm; 146 p->end = end; 147 roff_node_append(mdoc, p); 148 mdoc->next = ROFF_NEXT_SIBLING; 149 return p; 150 } 151 152 struct roff_node * 153 mdoc_block_alloc(struct roff_man *mdoc, int line, int pos, 154 int tok, struct mdoc_arg *args) 155 { 156 struct roff_node *p; 157 158 p = roff_node_alloc(mdoc, line, pos, ROFFT_BLOCK, tok); 159 p->args = args; 160 if (p->args) 161 (args->refcnt)++; 162 163 switch (tok) { 164 case MDOC_Bd: 165 case MDOC_Bf: 166 case MDOC_Bl: 167 case MDOC_En: 168 case MDOC_Rs: 169 p->norm = mandoc_calloc(1, sizeof(union mdoc_data)); 170 break; 171 default: 172 break; 173 } 174 roff_node_append(mdoc, p); 175 mdoc->next = ROFF_NEXT_CHILD; 176 return p; 177 } 178 179 void 180 mdoc_elem_alloc(struct roff_man *mdoc, int line, int pos, 181 int tok, struct mdoc_arg *args) 182 { 183 struct roff_node *p; 184 185 p = roff_node_alloc(mdoc, line, pos, ROFFT_ELEM, tok); 186 p->args = args; 187 if (p->args) 188 (args->refcnt)++; 189 190 switch (tok) { 191 case MDOC_An: 192 p->norm = mandoc_calloc(1, sizeof(union mdoc_data)); 193 break; 194 default: 195 break; 196 } 197 roff_node_append(mdoc, p); 198 mdoc->next = ROFF_NEXT_CHILD; 199 } 200 201 void 202 mdoc_node_relink(struct roff_man *mdoc, struct roff_node *p) 203 { 204 205 roff_node_unlink(mdoc, p); 206 p->prev = p->next = NULL; 207 roff_node_append(mdoc, p); 208 } 209 210 /* 211 * Parse free-form text, that is, a line that does not begin with the 212 * control character. 213 */ 214 static int 215 mdoc_ptext(struct roff_man *mdoc, int line, char *buf, int offs) 216 { 217 struct roff_node *n; 218 char *c, *ws, *end; 219 220 n = mdoc->last; 221 222 /* 223 * If a column list contains plain text, assume an implicit item 224 * macro. This can happen one or more times at the beginning 225 * of such a list, intermixed with non-It mdoc macros and with 226 * nodes generated on the roff level, for example by tbl. 227 */ 228 229 if ((n->tok == MDOC_Bl && n->type == ROFFT_BODY && 230 n->end == ENDBODY_NOT && n->norm->Bl.type == LIST_column) || 231 (n->parent != NULL && n->parent->tok == MDOC_Bl && 232 n->parent->norm->Bl.type == LIST_column)) { 233 mdoc->flags |= MDOC_FREECOL; 234 mdoc_macro(mdoc, MDOC_It, line, offs, &offs, buf); 235 return 1; 236 } 237 238 /* 239 * Search for the beginning of unescaped trailing whitespace (ws) 240 * and for the first character not to be output (end). 241 */ 242 243 /* FIXME: replace with strcspn(). */ 244 ws = NULL; 245 for (c = end = buf + offs; *c; c++) { 246 switch (*c) { 247 case ' ': 248 if (NULL == ws) 249 ws = c; 250 continue; 251 case '\t': 252 /* 253 * Always warn about trailing tabs, 254 * even outside literal context, 255 * where they should be put on the next line. 256 */ 257 if (NULL == ws) 258 ws = c; 259 /* 260 * Strip trailing tabs in literal context only; 261 * outside, they affect the next line. 262 */ 263 if (MDOC_LITERAL & mdoc->flags) 264 continue; 265 break; 266 case '\\': 267 /* Skip the escaped character, too, if any. */ 268 if (c[1]) 269 c++; 270 /* FALLTHROUGH */ 271 default: 272 ws = NULL; 273 break; 274 } 275 end = c + 1; 276 } 277 *end = '\0'; 278 279 if (ws) 280 mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse, 281 line, (int)(ws-buf), NULL); 282 283 if (buf[offs] == '\0' && ! (mdoc->flags & MDOC_LITERAL)) { 284 mandoc_msg(MANDOCERR_FI_BLANK, mdoc->parse, 285 line, (int)(c - buf), NULL); 286 287 /* 288 * Insert a `sp' in the case of a blank line. Technically, 289 * blank lines aren't allowed, but enough manuals assume this 290 * behaviour that we want to work around it. 291 */ 292 roff_elem_alloc(mdoc, line, offs, MDOC_sp); 293 mdoc->last->flags |= MDOC_VALID | MDOC_ENDED; 294 mdoc->next = ROFF_NEXT_SIBLING; 295 return 1; 296 } 297 298 roff_word_alloc(mdoc, line, offs, buf+offs); 299 300 if (mdoc->flags & MDOC_LITERAL) 301 return 1; 302 303 /* 304 * End-of-sentence check. If the last character is an unescaped 305 * EOS character, then flag the node as being the end of a 306 * sentence. The front-end will know how to interpret this. 307 */ 308 309 assert(buf < end); 310 311 if (mandoc_eos(buf+offs, (size_t)(end-buf-offs))) 312 mdoc->last->flags |= MDOC_EOS; 313 return 1; 314 } 315 316 /* 317 * Parse a macro line, that is, a line beginning with the control 318 * character. 319 */ 320 static int 321 mdoc_pmacro(struct roff_man *mdoc, int ln, char *buf, int offs) 322 { 323 struct roff_node *n; 324 const char *cp; 325 int tok; 326 int i, sv; 327 char mac[5]; 328 329 sv = offs; 330 331 /* 332 * Copy the first word into a nil-terminated buffer. 333 * Stop when a space, tab, escape, or eoln is encountered. 334 */ 335 336 i = 0; 337 while (i < 4 && strchr(" \t\\", buf[offs]) == NULL) 338 mac[i++] = buf[offs++]; 339 340 mac[i] = '\0'; 341 342 tok = (i > 1 && i < 4) ? mdoc_hash_find(mac) : TOKEN_NONE; 343 344 if (tok == TOKEN_NONE) { 345 mandoc_msg(MANDOCERR_MACRO, mdoc->parse, 346 ln, sv, buf + sv - 1); 347 return 1; 348 } 349 350 /* Skip a leading escape sequence or tab. */ 351 352 switch (buf[offs]) { 353 case '\\': 354 cp = buf + offs + 1; 355 mandoc_escape(&cp, NULL, NULL); 356 offs = cp - buf; 357 break; 358 case '\t': 359 offs++; 360 break; 361 default: 362 break; 363 } 364 365 /* Jump to the next non-whitespace word. */ 366 367 while (buf[offs] && ' ' == buf[offs]) 368 offs++; 369 370 /* 371 * Trailing whitespace. Note that tabs are allowed to be passed 372 * into the parser as "text", so we only warn about spaces here. 373 */ 374 375 if ('\0' == buf[offs] && ' ' == buf[offs - 1]) 376 mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse, 377 ln, offs - 1, NULL); 378 379 /* 380 * If an initial macro or a list invocation, divert directly 381 * into macro processing. 382 */ 383 384 n = mdoc->last; 385 if (n == NULL || tok == MDOC_It || tok == MDOC_El) { 386 mdoc_macro(mdoc, tok, ln, sv, &offs, buf); 387 return 1; 388 } 389 390 /* 391 * If a column list contains a non-It macro, assume an implicit 392 * item macro. This can happen one or more times at the 393 * beginning of such a list, intermixed with text lines and 394 * with nodes generated on the roff level, for example by tbl. 395 */ 396 397 if ((n->tok == MDOC_Bl && n->type == ROFFT_BODY && 398 n->end == ENDBODY_NOT && n->norm->Bl.type == LIST_column) || 399 (n->parent != NULL && n->parent->tok == MDOC_Bl && 400 n->parent->norm->Bl.type == LIST_column)) { 401 mdoc->flags |= MDOC_FREECOL; 402 mdoc_macro(mdoc, MDOC_It, ln, sv, &sv, buf); 403 return 1; 404 } 405 406 /* Normal processing of a macro. */ 407 408 mdoc_macro(mdoc, tok, ln, sv, &offs, buf); 409 410 /* In quick mode (for mandocdb), abort after the NAME section. */ 411 412 if (mdoc->quick && MDOC_Sh == tok && 413 SEC_NAME != mdoc->last->sec) 414 return 2; 415 416 return 1; 417 } 418 419 enum mdelim 420 mdoc_isdelim(const char *p) 421 { 422 423 if ('\0' == p[0]) 424 return DELIM_NONE; 425 426 if ('\0' == p[1]) 427 switch (p[0]) { 428 case '(': 429 case '[': 430 return DELIM_OPEN; 431 case '|': 432 return DELIM_MIDDLE; 433 case '.': 434 case ',': 435 case ';': 436 case ':': 437 case '?': 438 case '!': 439 case ')': 440 case ']': 441 return DELIM_CLOSE; 442 default: 443 return DELIM_NONE; 444 } 445 446 if ('\\' != p[0]) 447 return DELIM_NONE; 448 449 if (0 == strcmp(p + 1, ".")) 450 return DELIM_CLOSE; 451 if (0 == strcmp(p + 1, "fR|\\fP")) 452 return DELIM_MIDDLE; 453 454 return DELIM_NONE; 455 } 456 457 void 458 mdoc_validate(struct roff_man *mdoc) 459 { 460 461 mdoc->last = mdoc->first; 462 mdoc_node_validate(mdoc); 463 mdoc_state_reset(mdoc); 464 } 465