1*60e1e752SSascha Wildner /* $Id: read.c,v 1.10 2011/04/03 10:11:25 kristaps Exp $ */ 2*60e1e752SSascha Wildner /* 3*60e1e752SSascha Wildner * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4*60e1e752SSascha Wildner * Copyright (c) 2010, 2011 Ingo Schwarze <schwarze@openbsd.org> 5*60e1e752SSascha Wildner * 6*60e1e752SSascha Wildner * Permission to use, copy, modify, and distribute this software for any 7*60e1e752SSascha Wildner * purpose with or without fee is hereby granted, provided that the above 8*60e1e752SSascha Wildner * copyright notice and this permission notice appear in all copies. 9*60e1e752SSascha Wildner * 10*60e1e752SSascha Wildner * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11*60e1e752SSascha Wildner * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12*60e1e752SSascha Wildner * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13*60e1e752SSascha Wildner * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14*60e1e752SSascha Wildner * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15*60e1e752SSascha Wildner * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16*60e1e752SSascha Wildner * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17*60e1e752SSascha Wildner */ 18*60e1e752SSascha Wildner #include <sys/stat.h> 19*60e1e752SSascha Wildner #include <sys/mman.h> 20*60e1e752SSascha Wildner 21*60e1e752SSascha Wildner #include <assert.h> 22*60e1e752SSascha Wildner #include <ctype.h> 23*60e1e752SSascha Wildner #include <fcntl.h> 24*60e1e752SSascha Wildner #include <stdarg.h> 25*60e1e752SSascha Wildner #include <stdio.h> 26*60e1e752SSascha Wildner #include <stdlib.h> 27*60e1e752SSascha Wildner #include <string.h> 28*60e1e752SSascha Wildner #include <unistd.h> 29*60e1e752SSascha Wildner 30*60e1e752SSascha Wildner #include "mandoc.h" 31*60e1e752SSascha Wildner #include "libmandoc.h" 32*60e1e752SSascha Wildner #include "mdoc.h" 33*60e1e752SSascha Wildner #include "man.h" 34*60e1e752SSascha Wildner 35*60e1e752SSascha Wildner #ifndef MAP_FILE 36*60e1e752SSascha Wildner #define MAP_FILE 0 37*60e1e752SSascha Wildner #endif 38*60e1e752SSascha Wildner 39*60e1e752SSascha Wildner #define REPARSE_LIMIT 1000 40*60e1e752SSascha Wildner 41*60e1e752SSascha Wildner struct buf { 42*60e1e752SSascha Wildner char *buf; /* binary input buffer */ 43*60e1e752SSascha Wildner size_t sz; /* size of binary buffer */ 44*60e1e752SSascha Wildner }; 45*60e1e752SSascha Wildner 46*60e1e752SSascha Wildner struct mparse { 47*60e1e752SSascha Wildner enum mandoclevel file_status; /* status of current parse */ 48*60e1e752SSascha Wildner enum mandoclevel wlevel; /* ignore messages below this */ 49*60e1e752SSascha Wildner int line; /* line number in the file */ 50*60e1e752SSascha Wildner enum mparset inttype; /* which parser to use */ 51*60e1e752SSascha Wildner struct man *pman; /* persistent man parser */ 52*60e1e752SSascha Wildner struct mdoc *pmdoc; /* persistent mdoc parser */ 53*60e1e752SSascha Wildner struct man *man; /* man parser */ 54*60e1e752SSascha Wildner struct mdoc *mdoc; /* mdoc parser */ 55*60e1e752SSascha Wildner struct roff *roff; /* roff parser (!NULL) */ 56*60e1e752SSascha Wildner struct regset regs; /* roff registers */ 57*60e1e752SSascha Wildner int reparse_count; /* finite interp. stack */ 58*60e1e752SSascha Wildner mandocmsg mmsg; /* warning/error message handler */ 59*60e1e752SSascha Wildner void *arg; /* argument to mmsg */ 60*60e1e752SSascha Wildner const char *file; 61*60e1e752SSascha Wildner }; 62*60e1e752SSascha Wildner 63*60e1e752SSascha Wildner static void resize_buf(struct buf *, size_t); 64*60e1e752SSascha Wildner static void mparse_buf_r(struct mparse *, struct buf, int); 65*60e1e752SSascha Wildner static void mparse_readfd_r(struct mparse *, int, const char *, int); 66*60e1e752SSascha Wildner static void pset(const char *, int, struct mparse *); 67*60e1e752SSascha Wildner static void pdesc(struct mparse *, const char *, int); 68*60e1e752SSascha Wildner static int read_whole_file(const char *, int, struct buf *, int *); 69*60e1e752SSascha Wildner static void mparse_end(struct mparse *); 70*60e1e752SSascha Wildner 71*60e1e752SSascha Wildner static const enum mandocerr mandoclimits[MANDOCLEVEL_MAX] = { 72*60e1e752SSascha Wildner MANDOCERR_OK, 73*60e1e752SSascha Wildner MANDOCERR_WARNING, 74*60e1e752SSascha Wildner MANDOCERR_WARNING, 75*60e1e752SSascha Wildner MANDOCERR_ERROR, 76*60e1e752SSascha Wildner MANDOCERR_FATAL, 77*60e1e752SSascha Wildner MANDOCERR_MAX, 78*60e1e752SSascha Wildner MANDOCERR_MAX 79*60e1e752SSascha Wildner }; 80*60e1e752SSascha Wildner 81*60e1e752SSascha Wildner static const char * const mandocerrs[MANDOCERR_MAX] = { 82*60e1e752SSascha Wildner "ok", 83*60e1e752SSascha Wildner 84*60e1e752SSascha Wildner "generic warning", 85*60e1e752SSascha Wildner 86*60e1e752SSascha Wildner /* related to the prologue */ 87*60e1e752SSascha Wildner "no title in document", 88*60e1e752SSascha Wildner "document title should be all caps", 89*60e1e752SSascha Wildner "unknown manual section", 90*60e1e752SSascha Wildner "date missing, using today's date", 91*60e1e752SSascha Wildner "cannot parse date, using it verbatim", 92*60e1e752SSascha Wildner "prologue macros out of order", 93*60e1e752SSascha Wildner "duplicate prologue macro", 94*60e1e752SSascha Wildner "macro not allowed in prologue", 95*60e1e752SSascha Wildner "macro not allowed in body", 96*60e1e752SSascha Wildner 97*60e1e752SSascha Wildner /* related to document structure */ 98*60e1e752SSascha Wildner ".so is fragile, better use ln(1)", 99*60e1e752SSascha Wildner "NAME section must come first", 100*60e1e752SSascha Wildner "bad NAME section contents", 101*60e1e752SSascha Wildner "manual name not yet set", 102*60e1e752SSascha Wildner "sections out of conventional order", 103*60e1e752SSascha Wildner "duplicate section name", 104*60e1e752SSascha Wildner "section not in conventional manual section", 105*60e1e752SSascha Wildner 106*60e1e752SSascha Wildner /* related to macros and nesting */ 107*60e1e752SSascha Wildner "skipping obsolete macro", 108*60e1e752SSascha Wildner "skipping paragraph macro", 109*60e1e752SSascha Wildner "skipping no-space macro", 110*60e1e752SSascha Wildner "blocks badly nested", 111*60e1e752SSascha Wildner "child violates parent syntax", 112*60e1e752SSascha Wildner "nested displays are not portable", 113*60e1e752SSascha Wildner "already in literal mode", 114*60e1e752SSascha Wildner "line scope broken", 115*60e1e752SSascha Wildner 116*60e1e752SSascha Wildner /* related to missing macro arguments */ 117*60e1e752SSascha Wildner "skipping empty macro", 118*60e1e752SSascha Wildner "argument count wrong", 119*60e1e752SSascha Wildner "missing display type", 120*60e1e752SSascha Wildner "list type must come first", 121*60e1e752SSascha Wildner "tag lists require a width argument", 122*60e1e752SSascha Wildner "missing font type", 123*60e1e752SSascha Wildner "skipping end of block that is not open", 124*60e1e752SSascha Wildner 125*60e1e752SSascha Wildner /* related to bad macro arguments */ 126*60e1e752SSascha Wildner "skipping argument", 127*60e1e752SSascha Wildner "duplicate argument", 128*60e1e752SSascha Wildner "duplicate display type", 129*60e1e752SSascha Wildner "duplicate list type", 130*60e1e752SSascha Wildner "unknown AT&T UNIX version", 131*60e1e752SSascha Wildner "bad Boolean value", 132*60e1e752SSascha Wildner "unknown font", 133*60e1e752SSascha Wildner "unknown standard specifier", 134*60e1e752SSascha Wildner "bad width argument", 135*60e1e752SSascha Wildner 136*60e1e752SSascha Wildner /* related to plain text */ 137*60e1e752SSascha Wildner "blank line in non-literal context", 138*60e1e752SSascha Wildner "tab in non-literal context", 139*60e1e752SSascha Wildner "end of line whitespace", 140*60e1e752SSascha Wildner "bad comment style", 141*60e1e752SSascha Wildner "unknown escape sequence", 142*60e1e752SSascha Wildner "unterminated quoted string", 143*60e1e752SSascha Wildner 144*60e1e752SSascha Wildner "generic error", 145*60e1e752SSascha Wildner 146*60e1e752SSascha Wildner /* related to tables */ 147*60e1e752SSascha Wildner "bad table syntax", 148*60e1e752SSascha Wildner "bad table option", 149*60e1e752SSascha Wildner "bad table layout", 150*60e1e752SSascha Wildner "no table layout cells specified", 151*60e1e752SSascha Wildner "no table data cells specified", 152*60e1e752SSascha Wildner "ignore data in cell", 153*60e1e752SSascha Wildner "data block still open", 154*60e1e752SSascha Wildner "ignoring extra data cells", 155*60e1e752SSascha Wildner 156*60e1e752SSascha Wildner "input stack limit exceeded, infinite loop?", 157*60e1e752SSascha Wildner "skipping bad character", 158*60e1e752SSascha Wildner "escaped character not allowed in a name", 159*60e1e752SSascha Wildner "skipping text before the first section header", 160*60e1e752SSascha Wildner "skipping unknown macro", 161*60e1e752SSascha Wildner "NOT IMPLEMENTED, please use groff: skipping request", 162*60e1e752SSascha Wildner "argument count wrong", 163*60e1e752SSascha Wildner "skipping end of block that is not open", 164*60e1e752SSascha Wildner "missing end of block", 165*60e1e752SSascha Wildner "scope open on exit", 166*60e1e752SSascha Wildner "uname(3) system call failed", 167*60e1e752SSascha Wildner "macro requires line argument(s)", 168*60e1e752SSascha Wildner "macro requires body argument(s)", 169*60e1e752SSascha Wildner "macro requires argument(s)", 170*60e1e752SSascha Wildner "missing list type", 171*60e1e752SSascha Wildner "line argument(s) will be lost", 172*60e1e752SSascha Wildner "body argument(s) will be lost", 173*60e1e752SSascha Wildner 174*60e1e752SSascha Wildner "generic fatal error", 175*60e1e752SSascha Wildner 176*60e1e752SSascha Wildner "not a manual", 177*60e1e752SSascha Wildner "column syntax is inconsistent", 178*60e1e752SSascha Wildner "NOT IMPLEMENTED: .Bd -file", 179*60e1e752SSascha Wildner "line scope broken, syntax violated", 180*60e1e752SSascha Wildner "argument count wrong, violates syntax", 181*60e1e752SSascha Wildner "child violates parent syntax", 182*60e1e752SSascha Wildner "argument count wrong, violates syntax", 183*60e1e752SSascha Wildner "NOT IMPLEMENTED: .so with absolute path or \"..\"", 184*60e1e752SSascha Wildner "no document body", 185*60e1e752SSascha Wildner "no document prologue", 186*60e1e752SSascha Wildner "static buffer exhausted", 187*60e1e752SSascha Wildner }; 188*60e1e752SSascha Wildner 189*60e1e752SSascha Wildner static const char * const mandoclevels[MANDOCLEVEL_MAX] = { 190*60e1e752SSascha Wildner "SUCCESS", 191*60e1e752SSascha Wildner "RESERVED", 192*60e1e752SSascha Wildner "WARNING", 193*60e1e752SSascha Wildner "ERROR", 194*60e1e752SSascha Wildner "FATAL", 195*60e1e752SSascha Wildner "BADARG", 196*60e1e752SSascha Wildner "SYSERR" 197*60e1e752SSascha Wildner }; 198*60e1e752SSascha Wildner 199*60e1e752SSascha Wildner static void 200*60e1e752SSascha Wildner resize_buf(struct buf *buf, size_t initial) 201*60e1e752SSascha Wildner { 202*60e1e752SSascha Wildner 203*60e1e752SSascha Wildner buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial; 204*60e1e752SSascha Wildner buf->buf = mandoc_realloc(buf->buf, buf->sz); 205*60e1e752SSascha Wildner } 206*60e1e752SSascha Wildner 207*60e1e752SSascha Wildner static void 208*60e1e752SSascha Wildner pset(const char *buf, int pos, struct mparse *curp) 209*60e1e752SSascha Wildner { 210*60e1e752SSascha Wildner int i; 211*60e1e752SSascha Wildner 212*60e1e752SSascha Wildner /* 213*60e1e752SSascha Wildner * Try to intuit which kind of manual parser should be used. If 214*60e1e752SSascha Wildner * passed in by command-line (-man, -mdoc), then use that 215*60e1e752SSascha Wildner * explicitly. If passed as -mandoc, then try to guess from the 216*60e1e752SSascha Wildner * line: either skip dot-lines, use -mdoc when finding `.Dt', or 217*60e1e752SSascha Wildner * default to -man, which is more lenient. 218*60e1e752SSascha Wildner * 219*60e1e752SSascha Wildner * Separate out pmdoc/pman from mdoc/man: the first persists 220*60e1e752SSascha Wildner * through all parsers, while the latter is used per-parse. 221*60e1e752SSascha Wildner */ 222*60e1e752SSascha Wildner 223*60e1e752SSascha Wildner if ('.' == buf[0] || '\'' == buf[0]) { 224*60e1e752SSascha Wildner for (i = 1; buf[i]; i++) 225*60e1e752SSascha Wildner if (' ' != buf[i] && '\t' != buf[i]) 226*60e1e752SSascha Wildner break; 227*60e1e752SSascha Wildner if ('\0' == buf[i]) 228*60e1e752SSascha Wildner return; 229*60e1e752SSascha Wildner } 230*60e1e752SSascha Wildner 231*60e1e752SSascha Wildner switch (curp->inttype) { 232*60e1e752SSascha Wildner case (MPARSE_MDOC): 233*60e1e752SSascha Wildner if (NULL == curp->pmdoc) 234*60e1e752SSascha Wildner curp->pmdoc = mdoc_alloc(&curp->regs, curp); 235*60e1e752SSascha Wildner assert(curp->pmdoc); 236*60e1e752SSascha Wildner curp->mdoc = curp->pmdoc; 237*60e1e752SSascha Wildner return; 238*60e1e752SSascha Wildner case (MPARSE_MAN): 239*60e1e752SSascha Wildner if (NULL == curp->pman) 240*60e1e752SSascha Wildner curp->pman = man_alloc(&curp->regs, curp); 241*60e1e752SSascha Wildner assert(curp->pman); 242*60e1e752SSascha Wildner curp->man = curp->pman; 243*60e1e752SSascha Wildner return; 244*60e1e752SSascha Wildner default: 245*60e1e752SSascha Wildner break; 246*60e1e752SSascha Wildner } 247*60e1e752SSascha Wildner 248*60e1e752SSascha Wildner if (pos >= 3 && 0 == memcmp(buf, ".Dd", 3)) { 249*60e1e752SSascha Wildner if (NULL == curp->pmdoc) 250*60e1e752SSascha Wildner curp->pmdoc = mdoc_alloc(&curp->regs, curp); 251*60e1e752SSascha Wildner assert(curp->pmdoc); 252*60e1e752SSascha Wildner curp->mdoc = curp->pmdoc; 253*60e1e752SSascha Wildner return; 254*60e1e752SSascha Wildner } 255*60e1e752SSascha Wildner 256*60e1e752SSascha Wildner if (NULL == curp->pman) 257*60e1e752SSascha Wildner curp->pman = man_alloc(&curp->regs, curp); 258*60e1e752SSascha Wildner assert(curp->pman); 259*60e1e752SSascha Wildner curp->man = curp->pman; 260*60e1e752SSascha Wildner } 261*60e1e752SSascha Wildner 262*60e1e752SSascha Wildner /* 263*60e1e752SSascha Wildner * Main parse routine for an opened file. This is called for each 264*60e1e752SSascha Wildner * opened file and simply loops around the full input file, possibly 265*60e1e752SSascha Wildner * nesting (i.e., with `so'). 266*60e1e752SSascha Wildner */ 267*60e1e752SSascha Wildner static void 268*60e1e752SSascha Wildner mparse_buf_r(struct mparse *curp, struct buf blk, int start) 269*60e1e752SSascha Wildner { 270*60e1e752SSascha Wildner const struct tbl_span *span; 271*60e1e752SSascha Wildner struct buf ln; 272*60e1e752SSascha Wildner enum rofferr rr; 273*60e1e752SSascha Wildner int i, of, rc; 274*60e1e752SSascha Wildner int pos; /* byte number in the ln buffer */ 275*60e1e752SSascha Wildner int lnn; /* line number in the real file */ 276*60e1e752SSascha Wildner unsigned char c; 277*60e1e752SSascha Wildner 278*60e1e752SSascha Wildner memset(&ln, 0, sizeof(struct buf)); 279*60e1e752SSascha Wildner 280*60e1e752SSascha Wildner lnn = curp->line; 281*60e1e752SSascha Wildner pos = 0; 282*60e1e752SSascha Wildner 283*60e1e752SSascha Wildner for (i = 0; i < (int)blk.sz; ) { 284*60e1e752SSascha Wildner if (0 == pos && '\0' == blk.buf[i]) 285*60e1e752SSascha Wildner break; 286*60e1e752SSascha Wildner 287*60e1e752SSascha Wildner if (start) { 288*60e1e752SSascha Wildner curp->line = lnn; 289*60e1e752SSascha Wildner curp->reparse_count = 0; 290*60e1e752SSascha Wildner } 291*60e1e752SSascha Wildner 292*60e1e752SSascha Wildner while (i < (int)blk.sz && (start || '\0' != blk.buf[i])) { 293*60e1e752SSascha Wildner 294*60e1e752SSascha Wildner /* 295*60e1e752SSascha Wildner * When finding an unescaped newline character, 296*60e1e752SSascha Wildner * leave the character loop to process the line. 297*60e1e752SSascha Wildner * Skip a preceding carriage return, if any. 298*60e1e752SSascha Wildner */ 299*60e1e752SSascha Wildner 300*60e1e752SSascha Wildner if ('\r' == blk.buf[i] && i + 1 < (int)blk.sz && 301*60e1e752SSascha Wildner '\n' == blk.buf[i + 1]) 302*60e1e752SSascha Wildner ++i; 303*60e1e752SSascha Wildner if ('\n' == blk.buf[i]) { 304*60e1e752SSascha Wildner ++i; 305*60e1e752SSascha Wildner ++lnn; 306*60e1e752SSascha Wildner break; 307*60e1e752SSascha Wildner } 308*60e1e752SSascha Wildner 309*60e1e752SSascha Wildner /* 310*60e1e752SSascha Wildner * Warn about bogus characters. If you're using 311*60e1e752SSascha Wildner * non-ASCII encoding, you're screwing your 312*60e1e752SSascha Wildner * readers. Since I'd rather this not happen, 313*60e1e752SSascha Wildner * I'll be helpful and drop these characters so 314*60e1e752SSascha Wildner * we don't display gibberish. Note to manual 315*60e1e752SSascha Wildner * writers: use special characters. 316*60e1e752SSascha Wildner */ 317*60e1e752SSascha Wildner 318*60e1e752SSascha Wildner c = (unsigned char) blk.buf[i]; 319*60e1e752SSascha Wildner 320*60e1e752SSascha Wildner if ( ! (isascii(c) && 321*60e1e752SSascha Wildner (isgraph(c) || isblank(c)))) { 322*60e1e752SSascha Wildner mandoc_msg(MANDOCERR_BADCHAR, curp, 323*60e1e752SSascha Wildner curp->line, pos, "ignoring byte"); 324*60e1e752SSascha Wildner i++; 325*60e1e752SSascha Wildner continue; 326*60e1e752SSascha Wildner } 327*60e1e752SSascha Wildner 328*60e1e752SSascha Wildner /* Trailing backslash = a plain char. */ 329*60e1e752SSascha Wildner 330*60e1e752SSascha Wildner if ('\\' != blk.buf[i] || i + 1 == (int)blk.sz) { 331*60e1e752SSascha Wildner if (pos >= (int)ln.sz) 332*60e1e752SSascha Wildner resize_buf(&ln, 256); 333*60e1e752SSascha Wildner ln.buf[pos++] = blk.buf[i++]; 334*60e1e752SSascha Wildner continue; 335*60e1e752SSascha Wildner } 336*60e1e752SSascha Wildner 337*60e1e752SSascha Wildner /* 338*60e1e752SSascha Wildner * Found escape and at least one other character. 339*60e1e752SSascha Wildner * When it's a newline character, skip it. 340*60e1e752SSascha Wildner * When there is a carriage return in between, 341*60e1e752SSascha Wildner * skip that one as well. 342*60e1e752SSascha Wildner */ 343*60e1e752SSascha Wildner 344*60e1e752SSascha Wildner if ('\r' == blk.buf[i + 1] && i + 2 < (int)blk.sz && 345*60e1e752SSascha Wildner '\n' == blk.buf[i + 2]) 346*60e1e752SSascha Wildner ++i; 347*60e1e752SSascha Wildner if ('\n' == blk.buf[i + 1]) { 348*60e1e752SSascha Wildner i += 2; 349*60e1e752SSascha Wildner ++lnn; 350*60e1e752SSascha Wildner continue; 351*60e1e752SSascha Wildner } 352*60e1e752SSascha Wildner 353*60e1e752SSascha Wildner if ('"' == blk.buf[i + 1]) { 354*60e1e752SSascha Wildner i += 2; 355*60e1e752SSascha Wildner /* Comment, skip to end of line */ 356*60e1e752SSascha Wildner for (; i < (int)blk.sz; ++i) { 357*60e1e752SSascha Wildner if ('\n' == blk.buf[i]) { 358*60e1e752SSascha Wildner ++i; 359*60e1e752SSascha Wildner ++lnn; 360*60e1e752SSascha Wildner break; 361*60e1e752SSascha Wildner } 362*60e1e752SSascha Wildner } 363*60e1e752SSascha Wildner 364*60e1e752SSascha Wildner /* Backout trailing whitespaces */ 365*60e1e752SSascha Wildner for (; pos > 0; --pos) { 366*60e1e752SSascha Wildner if (ln.buf[pos - 1] != ' ') 367*60e1e752SSascha Wildner break; 368*60e1e752SSascha Wildner if (pos > 2 && ln.buf[pos - 2] == '\\') 369*60e1e752SSascha Wildner break; 370*60e1e752SSascha Wildner } 371*60e1e752SSascha Wildner break; 372*60e1e752SSascha Wildner } 373*60e1e752SSascha Wildner 374*60e1e752SSascha Wildner /* Some other escape sequence, copy & cont. */ 375*60e1e752SSascha Wildner 376*60e1e752SSascha Wildner if (pos + 1 >= (int)ln.sz) 377*60e1e752SSascha Wildner resize_buf(&ln, 256); 378*60e1e752SSascha Wildner 379*60e1e752SSascha Wildner ln.buf[pos++] = blk.buf[i++]; 380*60e1e752SSascha Wildner ln.buf[pos++] = blk.buf[i++]; 381*60e1e752SSascha Wildner } 382*60e1e752SSascha Wildner 383*60e1e752SSascha Wildner if (pos >= (int)ln.sz) 384*60e1e752SSascha Wildner resize_buf(&ln, 256); 385*60e1e752SSascha Wildner 386*60e1e752SSascha Wildner ln.buf[pos] = '\0'; 387*60e1e752SSascha Wildner 388*60e1e752SSascha Wildner /* 389*60e1e752SSascha Wildner * A significant amount of complexity is contained by 390*60e1e752SSascha Wildner * the roff preprocessor. It's line-oriented but can be 391*60e1e752SSascha Wildner * expressed on one line, so we need at times to 392*60e1e752SSascha Wildner * readjust our starting point and re-run it. The roff 393*60e1e752SSascha Wildner * preprocessor can also readjust the buffers with new 394*60e1e752SSascha Wildner * data, so we pass them in wholesale. 395*60e1e752SSascha Wildner */ 396*60e1e752SSascha Wildner 397*60e1e752SSascha Wildner of = 0; 398*60e1e752SSascha Wildner 399*60e1e752SSascha Wildner rerun: 400*60e1e752SSascha Wildner rr = roff_parseln 401*60e1e752SSascha Wildner (curp->roff, curp->line, 402*60e1e752SSascha Wildner &ln.buf, &ln.sz, of, &of); 403*60e1e752SSascha Wildner 404*60e1e752SSascha Wildner switch (rr) { 405*60e1e752SSascha Wildner case (ROFF_REPARSE): 406*60e1e752SSascha Wildner if (REPARSE_LIMIT >= ++curp->reparse_count) 407*60e1e752SSascha Wildner mparse_buf_r(curp, ln, 0); 408*60e1e752SSascha Wildner else 409*60e1e752SSascha Wildner mandoc_msg(MANDOCERR_ROFFLOOP, curp, 410*60e1e752SSascha Wildner curp->line, pos, NULL); 411*60e1e752SSascha Wildner pos = 0; 412*60e1e752SSascha Wildner continue; 413*60e1e752SSascha Wildner case (ROFF_APPEND): 414*60e1e752SSascha Wildner pos = (int)strlen(ln.buf); 415*60e1e752SSascha Wildner continue; 416*60e1e752SSascha Wildner case (ROFF_RERUN): 417*60e1e752SSascha Wildner goto rerun; 418*60e1e752SSascha Wildner case (ROFF_IGN): 419*60e1e752SSascha Wildner pos = 0; 420*60e1e752SSascha Wildner continue; 421*60e1e752SSascha Wildner case (ROFF_ERR): 422*60e1e752SSascha Wildner assert(MANDOCLEVEL_FATAL <= curp->file_status); 423*60e1e752SSascha Wildner break; 424*60e1e752SSascha Wildner case (ROFF_SO): 425*60e1e752SSascha Wildner mparse_readfd_r(curp, -1, ln.buf + of, 1); 426*60e1e752SSascha Wildner if (MANDOCLEVEL_FATAL <= curp->file_status) 427*60e1e752SSascha Wildner break; 428*60e1e752SSascha Wildner pos = 0; 429*60e1e752SSascha Wildner continue; 430*60e1e752SSascha Wildner default: 431*60e1e752SSascha Wildner break; 432*60e1e752SSascha Wildner } 433*60e1e752SSascha Wildner 434*60e1e752SSascha Wildner /* 435*60e1e752SSascha Wildner * If we encounter errors in the recursive parse, make 436*60e1e752SSascha Wildner * sure we don't continue parsing. 437*60e1e752SSascha Wildner */ 438*60e1e752SSascha Wildner 439*60e1e752SSascha Wildner if (MANDOCLEVEL_FATAL <= curp->file_status) 440*60e1e752SSascha Wildner break; 441*60e1e752SSascha Wildner 442*60e1e752SSascha Wildner /* 443*60e1e752SSascha Wildner * If input parsers have not been allocated, do so now. 444*60e1e752SSascha Wildner * We keep these instanced betwen parsers, but set them 445*60e1e752SSascha Wildner * locally per parse routine since we can use different 446*60e1e752SSascha Wildner * parsers with each one. 447*60e1e752SSascha Wildner */ 448*60e1e752SSascha Wildner 449*60e1e752SSascha Wildner if ( ! (curp->man || curp->mdoc)) 450*60e1e752SSascha Wildner pset(ln.buf + of, pos - of, curp); 451*60e1e752SSascha Wildner 452*60e1e752SSascha Wildner /* 453*60e1e752SSascha Wildner * Lastly, push down into the parsers themselves. One 454*60e1e752SSascha Wildner * of these will have already been set in the pset() 455*60e1e752SSascha Wildner * routine. 456*60e1e752SSascha Wildner * If libroff returns ROFF_TBL, then add it to the 457*60e1e752SSascha Wildner * currently open parse. Since we only get here if 458*60e1e752SSascha Wildner * there does exist data (see tbl_data.c), we're 459*60e1e752SSascha Wildner * guaranteed that something's been allocated. 460*60e1e752SSascha Wildner * Do the same for ROFF_EQN. 461*60e1e752SSascha Wildner */ 462*60e1e752SSascha Wildner 463*60e1e752SSascha Wildner rc = -1; 464*60e1e752SSascha Wildner 465*60e1e752SSascha Wildner if (ROFF_TBL == rr) 466*60e1e752SSascha Wildner while (NULL != (span = roff_span(curp->roff))) { 467*60e1e752SSascha Wildner rc = curp->man ? 468*60e1e752SSascha Wildner man_addspan(curp->man, span) : 469*60e1e752SSascha Wildner mdoc_addspan(curp->mdoc, span); 470*60e1e752SSascha Wildner if (0 == rc) 471*60e1e752SSascha Wildner break; 472*60e1e752SSascha Wildner } 473*60e1e752SSascha Wildner else if (ROFF_EQN == rr) 474*60e1e752SSascha Wildner rc = curp->mdoc ? 475*60e1e752SSascha Wildner mdoc_addeqn(curp->mdoc, 476*60e1e752SSascha Wildner roff_eqn(curp->roff)) : 477*60e1e752SSascha Wildner man_addeqn(curp->man, 478*60e1e752SSascha Wildner roff_eqn(curp->roff)); 479*60e1e752SSascha Wildner else if (curp->man || curp->mdoc) 480*60e1e752SSascha Wildner rc = curp->man ? 481*60e1e752SSascha Wildner man_parseln(curp->man, 482*60e1e752SSascha Wildner curp->line, ln.buf, of) : 483*60e1e752SSascha Wildner mdoc_parseln(curp->mdoc, 484*60e1e752SSascha Wildner curp->line, ln.buf, of); 485*60e1e752SSascha Wildner 486*60e1e752SSascha Wildner if (0 == rc) { 487*60e1e752SSascha Wildner assert(MANDOCLEVEL_FATAL <= curp->file_status); 488*60e1e752SSascha Wildner break; 489*60e1e752SSascha Wildner } 490*60e1e752SSascha Wildner 491*60e1e752SSascha Wildner /* Temporary buffers typically are not full. */ 492*60e1e752SSascha Wildner 493*60e1e752SSascha Wildner if (0 == start && '\0' == blk.buf[i]) 494*60e1e752SSascha Wildner break; 495*60e1e752SSascha Wildner 496*60e1e752SSascha Wildner /* Start the next input line. */ 497*60e1e752SSascha Wildner 498*60e1e752SSascha Wildner pos = 0; 499*60e1e752SSascha Wildner } 500*60e1e752SSascha Wildner 501*60e1e752SSascha Wildner free(ln.buf); 502*60e1e752SSascha Wildner } 503*60e1e752SSascha Wildner 504*60e1e752SSascha Wildner static void 505*60e1e752SSascha Wildner pdesc(struct mparse *curp, const char *file, int fd) 506*60e1e752SSascha Wildner { 507*60e1e752SSascha Wildner struct buf blk; 508*60e1e752SSascha Wildner int with_mmap; 509*60e1e752SSascha Wildner 510*60e1e752SSascha Wildner /* 511*60e1e752SSascha Wildner * Run for each opened file; may be called more than once for 512*60e1e752SSascha Wildner * each full parse sequence if the opened file is nested (i.e., 513*60e1e752SSascha Wildner * from `so'). Simply sucks in the whole file and moves into 514*60e1e752SSascha Wildner * the parse phase for the file. 515*60e1e752SSascha Wildner */ 516*60e1e752SSascha Wildner 517*60e1e752SSascha Wildner if ( ! read_whole_file(file, fd, &blk, &with_mmap)) { 518*60e1e752SSascha Wildner curp->file_status = MANDOCLEVEL_SYSERR; 519*60e1e752SSascha Wildner return; 520*60e1e752SSascha Wildner } 521*60e1e752SSascha Wildner 522*60e1e752SSascha Wildner /* Line number is per-file. */ 523*60e1e752SSascha Wildner 524*60e1e752SSascha Wildner curp->line = 1; 525*60e1e752SSascha Wildner 526*60e1e752SSascha Wildner mparse_buf_r(curp, blk, 1); 527*60e1e752SSascha Wildner 528*60e1e752SSascha Wildner if (with_mmap) 529*60e1e752SSascha Wildner munmap(blk.buf, blk.sz); 530*60e1e752SSascha Wildner else 531*60e1e752SSascha Wildner free(blk.buf); 532*60e1e752SSascha Wildner } 533*60e1e752SSascha Wildner 534*60e1e752SSascha Wildner static int 535*60e1e752SSascha Wildner read_whole_file(const char *file, int fd, struct buf *fb, int *with_mmap) 536*60e1e752SSascha Wildner { 537*60e1e752SSascha Wildner struct stat st; 538*60e1e752SSascha Wildner size_t off; 539*60e1e752SSascha Wildner ssize_t ssz; 540*60e1e752SSascha Wildner 541*60e1e752SSascha Wildner if (-1 == fstat(fd, &st)) { 542*60e1e752SSascha Wildner perror(file); 543*60e1e752SSascha Wildner return(0); 544*60e1e752SSascha Wildner } 545*60e1e752SSascha Wildner 546*60e1e752SSascha Wildner /* 547*60e1e752SSascha Wildner * If we're a regular file, try just reading in the whole entry 548*60e1e752SSascha Wildner * via mmap(). This is faster than reading it into blocks, and 549*60e1e752SSascha Wildner * since each file is only a few bytes to begin with, I'm not 550*60e1e752SSascha Wildner * concerned that this is going to tank any machines. 551*60e1e752SSascha Wildner */ 552*60e1e752SSascha Wildner 553*60e1e752SSascha Wildner if (S_ISREG(st.st_mode)) { 554*60e1e752SSascha Wildner if (st.st_size >= (1U << 31)) { 555*60e1e752SSascha Wildner fprintf(stderr, "%s: input too large\n", file); 556*60e1e752SSascha Wildner return(0); 557*60e1e752SSascha Wildner } 558*60e1e752SSascha Wildner *with_mmap = 1; 559*60e1e752SSascha Wildner fb->sz = (size_t)st.st_size; 560*60e1e752SSascha Wildner fb->buf = mmap(NULL, fb->sz, PROT_READ, 561*60e1e752SSascha Wildner MAP_FILE|MAP_SHARED, fd, 0); 562*60e1e752SSascha Wildner if (fb->buf != MAP_FAILED) 563*60e1e752SSascha Wildner return(1); 564*60e1e752SSascha Wildner } 565*60e1e752SSascha Wildner 566*60e1e752SSascha Wildner /* 567*60e1e752SSascha Wildner * If this isn't a regular file (like, say, stdin), then we must 568*60e1e752SSascha Wildner * go the old way and just read things in bit by bit. 569*60e1e752SSascha Wildner */ 570*60e1e752SSascha Wildner 571*60e1e752SSascha Wildner *with_mmap = 0; 572*60e1e752SSascha Wildner off = 0; 573*60e1e752SSascha Wildner fb->sz = 0; 574*60e1e752SSascha Wildner fb->buf = NULL; 575*60e1e752SSascha Wildner for (;;) { 576*60e1e752SSascha Wildner if (off == fb->sz) { 577*60e1e752SSascha Wildner if (fb->sz == (1U << 31)) { 578*60e1e752SSascha Wildner fprintf(stderr, "%s: input too large\n", file); 579*60e1e752SSascha Wildner break; 580*60e1e752SSascha Wildner } 581*60e1e752SSascha Wildner resize_buf(fb, 65536); 582*60e1e752SSascha Wildner } 583*60e1e752SSascha Wildner ssz = read(fd, fb->buf + (int)off, fb->sz - off); 584*60e1e752SSascha Wildner if (ssz == 0) { 585*60e1e752SSascha Wildner fb->sz = off; 586*60e1e752SSascha Wildner return(1); 587*60e1e752SSascha Wildner } 588*60e1e752SSascha Wildner if (ssz == -1) { 589*60e1e752SSascha Wildner perror(file); 590*60e1e752SSascha Wildner break; 591*60e1e752SSascha Wildner } 592*60e1e752SSascha Wildner off += (size_t)ssz; 593*60e1e752SSascha Wildner } 594*60e1e752SSascha Wildner 595*60e1e752SSascha Wildner free(fb->buf); 596*60e1e752SSascha Wildner fb->buf = NULL; 597*60e1e752SSascha Wildner return(0); 598*60e1e752SSascha Wildner } 599*60e1e752SSascha Wildner 600*60e1e752SSascha Wildner static void 601*60e1e752SSascha Wildner mparse_end(struct mparse *curp) 602*60e1e752SSascha Wildner { 603*60e1e752SSascha Wildner 604*60e1e752SSascha Wildner if (MANDOCLEVEL_FATAL <= curp->file_status) 605*60e1e752SSascha Wildner return; 606*60e1e752SSascha Wildner 607*60e1e752SSascha Wildner if (curp->mdoc && ! mdoc_endparse(curp->mdoc)) { 608*60e1e752SSascha Wildner assert(MANDOCLEVEL_FATAL <= curp->file_status); 609*60e1e752SSascha Wildner return; 610*60e1e752SSascha Wildner } 611*60e1e752SSascha Wildner 612*60e1e752SSascha Wildner if (curp->man && ! man_endparse(curp->man)) { 613*60e1e752SSascha Wildner assert(MANDOCLEVEL_FATAL <= curp->file_status); 614*60e1e752SSascha Wildner return; 615*60e1e752SSascha Wildner } 616*60e1e752SSascha Wildner 617*60e1e752SSascha Wildner if ( ! (curp->man || curp->mdoc)) { 618*60e1e752SSascha Wildner mandoc_msg(MANDOCERR_NOTMANUAL, curp, 1, 0, NULL); 619*60e1e752SSascha Wildner curp->file_status = MANDOCLEVEL_FATAL; 620*60e1e752SSascha Wildner return; 621*60e1e752SSascha Wildner } 622*60e1e752SSascha Wildner 623*60e1e752SSascha Wildner roff_endparse(curp->roff); 624*60e1e752SSascha Wildner } 625*60e1e752SSascha Wildner 626*60e1e752SSascha Wildner static void 627*60e1e752SSascha Wildner mparse_readfd_r(struct mparse *curp, int fd, const char *file, int re) 628*60e1e752SSascha Wildner { 629*60e1e752SSascha Wildner const char *svfile; 630*60e1e752SSascha Wildner 631*60e1e752SSascha Wildner if (-1 == fd) 632*60e1e752SSascha Wildner if (-1 == (fd = open(file, O_RDONLY, 0))) { 633*60e1e752SSascha Wildner perror(file); 634*60e1e752SSascha Wildner curp->file_status = MANDOCLEVEL_SYSERR; 635*60e1e752SSascha Wildner return; 636*60e1e752SSascha Wildner } 637*60e1e752SSascha Wildner 638*60e1e752SSascha Wildner svfile = curp->file; 639*60e1e752SSascha Wildner curp->file = file; 640*60e1e752SSascha Wildner 641*60e1e752SSascha Wildner pdesc(curp, file, fd); 642*60e1e752SSascha Wildner 643*60e1e752SSascha Wildner if (0 == re && MANDOCLEVEL_FATAL > curp->file_status) 644*60e1e752SSascha Wildner mparse_end(curp); 645*60e1e752SSascha Wildner 646*60e1e752SSascha Wildner if (STDIN_FILENO != fd && -1 == close(fd)) 647*60e1e752SSascha Wildner perror(file); 648*60e1e752SSascha Wildner 649*60e1e752SSascha Wildner curp->file = svfile; 650*60e1e752SSascha Wildner } 651*60e1e752SSascha Wildner 652*60e1e752SSascha Wildner enum mandoclevel 653*60e1e752SSascha Wildner mparse_readfd(struct mparse *curp, int fd, const char *file) 654*60e1e752SSascha Wildner { 655*60e1e752SSascha Wildner 656*60e1e752SSascha Wildner mparse_readfd_r(curp, fd, file, 0); 657*60e1e752SSascha Wildner return(curp->file_status); 658*60e1e752SSascha Wildner } 659*60e1e752SSascha Wildner 660*60e1e752SSascha Wildner struct mparse * 661*60e1e752SSascha Wildner mparse_alloc(enum mparset inttype, enum mandoclevel wlevel, mandocmsg mmsg, void *arg) 662*60e1e752SSascha Wildner { 663*60e1e752SSascha Wildner struct mparse *curp; 664*60e1e752SSascha Wildner 665*60e1e752SSascha Wildner assert(wlevel <= MANDOCLEVEL_FATAL); 666*60e1e752SSascha Wildner 667*60e1e752SSascha Wildner curp = mandoc_calloc(1, sizeof(struct mparse)); 668*60e1e752SSascha Wildner 669*60e1e752SSascha Wildner curp->wlevel = wlevel; 670*60e1e752SSascha Wildner curp->mmsg = mmsg; 671*60e1e752SSascha Wildner curp->arg = arg; 672*60e1e752SSascha Wildner curp->inttype = inttype; 673*60e1e752SSascha Wildner 674*60e1e752SSascha Wildner curp->roff = roff_alloc(&curp->regs, curp); 675*60e1e752SSascha Wildner return(curp); 676*60e1e752SSascha Wildner } 677*60e1e752SSascha Wildner 678*60e1e752SSascha Wildner void 679*60e1e752SSascha Wildner mparse_reset(struct mparse *curp) 680*60e1e752SSascha Wildner { 681*60e1e752SSascha Wildner 682*60e1e752SSascha Wildner memset(&curp->regs, 0, sizeof(struct regset)); 683*60e1e752SSascha Wildner 684*60e1e752SSascha Wildner roff_reset(curp->roff); 685*60e1e752SSascha Wildner 686*60e1e752SSascha Wildner if (curp->mdoc) 687*60e1e752SSascha Wildner mdoc_reset(curp->mdoc); 688*60e1e752SSascha Wildner if (curp->man) 689*60e1e752SSascha Wildner man_reset(curp->man); 690*60e1e752SSascha Wildner 691*60e1e752SSascha Wildner curp->file_status = MANDOCLEVEL_OK; 692*60e1e752SSascha Wildner curp->mdoc = NULL; 693*60e1e752SSascha Wildner curp->man = NULL; 694*60e1e752SSascha Wildner } 695*60e1e752SSascha Wildner 696*60e1e752SSascha Wildner void 697*60e1e752SSascha Wildner mparse_free(struct mparse *curp) 698*60e1e752SSascha Wildner { 699*60e1e752SSascha Wildner 700*60e1e752SSascha Wildner if (curp->pmdoc) 701*60e1e752SSascha Wildner mdoc_free(curp->pmdoc); 702*60e1e752SSascha Wildner if (curp->pman) 703*60e1e752SSascha Wildner man_free(curp->pman); 704*60e1e752SSascha Wildner if (curp->roff) 705*60e1e752SSascha Wildner roff_free(curp->roff); 706*60e1e752SSascha Wildner 707*60e1e752SSascha Wildner free(curp); 708*60e1e752SSascha Wildner } 709*60e1e752SSascha Wildner 710*60e1e752SSascha Wildner void 711*60e1e752SSascha Wildner mparse_result(struct mparse *curp, struct mdoc **mdoc, struct man **man) 712*60e1e752SSascha Wildner { 713*60e1e752SSascha Wildner 714*60e1e752SSascha Wildner if (mdoc) 715*60e1e752SSascha Wildner *mdoc = curp->mdoc; 716*60e1e752SSascha Wildner if (man) 717*60e1e752SSascha Wildner *man = curp->man; 718*60e1e752SSascha Wildner } 719*60e1e752SSascha Wildner 720*60e1e752SSascha Wildner void 721*60e1e752SSascha Wildner mandoc_vmsg(enum mandocerr t, struct mparse *m, 722*60e1e752SSascha Wildner int ln, int pos, const char *fmt, ...) 723*60e1e752SSascha Wildner { 724*60e1e752SSascha Wildner char buf[256]; 725*60e1e752SSascha Wildner va_list ap; 726*60e1e752SSascha Wildner 727*60e1e752SSascha Wildner va_start(ap, fmt); 728*60e1e752SSascha Wildner vsnprintf(buf, sizeof(buf) - 1, fmt, ap); 729*60e1e752SSascha Wildner va_end(ap); 730*60e1e752SSascha Wildner 731*60e1e752SSascha Wildner mandoc_msg(t, m, ln, pos, buf); 732*60e1e752SSascha Wildner } 733*60e1e752SSascha Wildner 734*60e1e752SSascha Wildner void 735*60e1e752SSascha Wildner mandoc_msg(enum mandocerr er, struct mparse *m, 736*60e1e752SSascha Wildner int ln, int col, const char *msg) 737*60e1e752SSascha Wildner { 738*60e1e752SSascha Wildner enum mandoclevel level; 739*60e1e752SSascha Wildner 740*60e1e752SSascha Wildner level = MANDOCLEVEL_FATAL; 741*60e1e752SSascha Wildner while (er < mandoclimits[level]) 742*60e1e752SSascha Wildner level--; 743*60e1e752SSascha Wildner 744*60e1e752SSascha Wildner if (level < m->wlevel) 745*60e1e752SSascha Wildner return; 746*60e1e752SSascha Wildner 747*60e1e752SSascha Wildner if (m->mmsg) 748*60e1e752SSascha Wildner (*m->mmsg)(er, level, m->file, ln, col, msg); 749*60e1e752SSascha Wildner 750*60e1e752SSascha Wildner if (m->file_status < level) 751*60e1e752SSascha Wildner m->file_status = level; 752*60e1e752SSascha Wildner } 753*60e1e752SSascha Wildner 754*60e1e752SSascha Wildner const char * 755*60e1e752SSascha Wildner mparse_strerror(enum mandocerr er) 756*60e1e752SSascha Wildner { 757*60e1e752SSascha Wildner 758*60e1e752SSascha Wildner return(mandocerrs[er]); 759*60e1e752SSascha Wildner } 760*60e1e752SSascha Wildner 761*60e1e752SSascha Wildner const char * 762*60e1e752SSascha Wildner mparse_strlevel(enum mandoclevel lvl) 763*60e1e752SSascha Wildner { 764*60e1e752SSascha Wildner return(mandoclevels[lvl]); 765*60e1e752SSascha Wildner } 766