1*99db7d0eSSascha Wildner /* $Id: read.c,v 1.220 2021/06/27 17:57:54 schwarze Exp $ */
260e1e752SSascha Wildner /*
3*99db7d0eSSascha Wildner * Copyright (c) 2010-2020 Ingo Schwarze <schwarze@openbsd.org>
460e1e752SSascha Wildner * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
5070c62a6SFranco Fichtner * Copyright (c) 2010, 2012 Joerg Sonnenberger <joerg@netbsd.org>
660e1e752SSascha Wildner *
760e1e752SSascha Wildner * Permission to use, copy, modify, and distribute this software for any
860e1e752SSascha Wildner * purpose with or without fee is hereby granted, provided that the above
960e1e752SSascha Wildner * copyright notice and this permission notice appear in all copies.
1060e1e752SSascha Wildner *
1154ba9607SSascha Wildner * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
1260e1e752SSascha Wildner * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1354ba9607SSascha Wildner * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
1460e1e752SSascha Wildner * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
1560e1e752SSascha Wildner * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
1660e1e752SSascha Wildner * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
1760e1e752SSascha Wildner * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18*99db7d0eSSascha Wildner *
19*99db7d0eSSascha Wildner * Top-level functions of the mandoc(3) parser:
20*99db7d0eSSascha Wildner * Parser and input encoding selection, decompression,
21*99db7d0eSSascha Wildner * handling of input bytes, characters, lines, and files,
22*99db7d0eSSascha Wildner * handling of roff(7) loops and file inclusion,
23*99db7d0eSSascha Wildner * and steering of the various parsers.
2460e1e752SSascha Wildner */
25a4c7eb57SSascha Wildner #include "config.h"
26a4c7eb57SSascha Wildner
2754ba9607SSascha Wildner #include <sys/types.h>
2860e1e752SSascha Wildner #include <sys/mman.h>
2954ba9607SSascha Wildner #include <sys/stat.h>
3060e1e752SSascha Wildner
3160e1e752SSascha Wildner #include <assert.h>
3260e1e752SSascha Wildner #include <ctype.h>
33070c62a6SFranco Fichtner #include <errno.h>
3460e1e752SSascha Wildner #include <fcntl.h>
3560e1e752SSascha Wildner #include <stdarg.h>
3660e1e752SSascha Wildner #include <stdio.h>
3760e1e752SSascha Wildner #include <stdlib.h>
3860e1e752SSascha Wildner #include <string.h>
3960e1e752SSascha Wildner #include <unistd.h>
4054ba9607SSascha Wildner #include <zlib.h>
4160e1e752SSascha Wildner
42070c62a6SFranco Fichtner #include "mandoc_aux.h"
4354ba9607SSascha Wildner #include "mandoc.h"
4454ba9607SSascha Wildner #include "roff.h"
4560e1e752SSascha Wildner #include "mdoc.h"
4660e1e752SSascha Wildner #include "man.h"
4754ba9607SSascha Wildner #include "mandoc_parse.h"
4854ba9607SSascha Wildner #include "libmandoc.h"
4954ba9607SSascha Wildner #include "roff_int.h"
50*99db7d0eSSascha Wildner #include "tag.h"
5160e1e752SSascha Wildner
5260e1e752SSascha Wildner #define REPARSE_LIMIT 1000
5360e1e752SSascha Wildner
5460e1e752SSascha Wildner struct mparse {
5560e1e752SSascha Wildner struct roff *roff; /* roff parser (!NULL) */
5654ba9607SSascha Wildner struct roff_man *man; /* man parser */
5754ba9607SSascha Wildner struct buf *primary; /* buffer currently being parsed */
5854ba9607SSascha Wildner struct buf *secondary; /* copy of top level input */
5954ba9607SSascha Wildner struct buf *loop; /* open .while request line */
6054ba9607SSascha Wildner const char *os_s; /* default operating system */
6154ba9607SSascha Wildner int options; /* parser options */
6254ba9607SSascha Wildner int gzip; /* current input file is gzipped */
6354ba9607SSascha Wildner int filenc; /* encoding of the current file */
6460e1e752SSascha Wildner int reparse_count; /* finite interp. stack */
6554ba9607SSascha Wildner int line; /* line number in the file */
6660e1e752SSascha Wildner };
6760e1e752SSascha Wildner
6854ba9607SSascha Wildner static void choose_parser(struct mparse *);
6954ba9607SSascha Wildner static void free_buf_list(struct buf *);
7060e1e752SSascha Wildner static void resize_buf(struct buf *, size_t);
7154ba9607SSascha Wildner static int mparse_buf_r(struct mparse *, struct buf, size_t, int);
7254ba9607SSascha Wildner static int read_whole_file(struct mparse *, int, struct buf *, int *);
7360e1e752SSascha Wildner static void mparse_end(struct mparse *);
7460e1e752SSascha Wildner
75070c62a6SFranco Fichtner
7660e1e752SSascha Wildner static void
resize_buf(struct buf * buf,size_t initial)7760e1e752SSascha Wildner resize_buf(struct buf *buf, size_t initial)
7860e1e752SSascha Wildner {
7960e1e752SSascha Wildner
8060e1e752SSascha Wildner buf->sz = buf->sz > initial/2 ? 2 * buf->sz : initial;
8160e1e752SSascha Wildner buf->buf = mandoc_realloc(buf->buf, buf->sz);
8260e1e752SSascha Wildner }
8360e1e752SSascha Wildner
8460e1e752SSascha Wildner static void
free_buf_list(struct buf * buf)8554ba9607SSascha Wildner free_buf_list(struct buf *buf)
8660e1e752SSascha Wildner {
8754ba9607SSascha Wildner struct buf *tmp;
8860e1e752SSascha Wildner
8954ba9607SSascha Wildner while (buf != NULL) {
9054ba9607SSascha Wildner tmp = buf;
9154ba9607SSascha Wildner buf = tmp->next;
9254ba9607SSascha Wildner free(tmp->buf);
9354ba9607SSascha Wildner free(tmp);
9454ba9607SSascha Wildner }
9560e1e752SSascha Wildner }
9660e1e752SSascha Wildner
9760e1e752SSascha Wildner static void
choose_parser(struct mparse * curp)9854ba9607SSascha Wildner choose_parser(struct mparse *curp)
9960e1e752SSascha Wildner {
10054ba9607SSascha Wildner char *cp, *ep;
10154ba9607SSascha Wildner int format;
10254ba9607SSascha Wildner
10354ba9607SSascha Wildner /*
10454ba9607SSascha Wildner * If neither command line arguments -mdoc or -man select
10554ba9607SSascha Wildner * a parser nor the roff parser found a .Dd or .TH macro
10654ba9607SSascha Wildner * yet, look ahead in the main input buffer.
10754ba9607SSascha Wildner */
10854ba9607SSascha Wildner
10954ba9607SSascha Wildner if ((format = roff_getformat(curp->roff)) == 0) {
11054ba9607SSascha Wildner cp = curp->primary->buf;
11154ba9607SSascha Wildner ep = cp + curp->primary->sz;
11254ba9607SSascha Wildner while (cp < ep) {
11354ba9607SSascha Wildner if (*cp == '.' || *cp == '\'') {
11454ba9607SSascha Wildner cp++;
11554ba9607SSascha Wildner if (cp[0] == 'D' && cp[1] == 'd') {
11654ba9607SSascha Wildner format = MPARSE_MDOC;
11754ba9607SSascha Wildner break;
11854ba9607SSascha Wildner }
11954ba9607SSascha Wildner if (cp[0] == 'T' && cp[1] == 'H') {
12054ba9607SSascha Wildner format = MPARSE_MAN;
12154ba9607SSascha Wildner break;
12254ba9607SSascha Wildner }
12354ba9607SSascha Wildner }
12454ba9607SSascha Wildner cp = memchr(cp, '\n', ep - cp);
12554ba9607SSascha Wildner if (cp == NULL)
12654ba9607SSascha Wildner break;
12754ba9607SSascha Wildner cp++;
12854ba9607SSascha Wildner }
12954ba9607SSascha Wildner }
13054ba9607SSascha Wildner
13154ba9607SSascha Wildner if (format == MPARSE_MDOC) {
13254ba9607SSascha Wildner curp->man->meta.macroset = MACROSET_MDOC;
13354ba9607SSascha Wildner if (curp->man->mdocmac == NULL)
13454ba9607SSascha Wildner curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX);
13554ba9607SSascha Wildner } else {
13654ba9607SSascha Wildner curp->man->meta.macroset = MACROSET_MAN;
13754ba9607SSascha Wildner if (curp->man->manmac == NULL)
13854ba9607SSascha Wildner curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX);
13954ba9607SSascha Wildner }
14054ba9607SSascha Wildner curp->man->meta.first->tok = TOKEN_NONE;
14154ba9607SSascha Wildner }
14254ba9607SSascha Wildner
14354ba9607SSascha Wildner /*
14454ba9607SSascha Wildner * Main parse routine for a buffer.
14554ba9607SSascha Wildner * It assumes encoding and line numbering are already set up.
14654ba9607SSascha Wildner * It can recurse directly (for invocations of user-defined
14754ba9607SSascha Wildner * macros, inline equations, and input line traps)
14854ba9607SSascha Wildner * and indirectly (for .so file inclusion).
14954ba9607SSascha Wildner */
15054ba9607SSascha Wildner static int
mparse_buf_r(struct mparse * curp,struct buf blk,size_t i,int start)15154ba9607SSascha Wildner mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start)
15254ba9607SSascha Wildner {
15360e1e752SSascha Wildner struct buf ln;
15454ba9607SSascha Wildner struct buf *firstln, *lastln, *thisln, *loop;
15554ba9607SSascha Wildner char *cp;
15654ba9607SSascha Wildner size_t pos; /* byte number in the ln buffer */
157*99db7d0eSSascha Wildner size_t spos; /* at the start of the current line parse */
15854ba9607SSascha Wildner int line_result, result;
15954ba9607SSascha Wildner int of;
16060e1e752SSascha Wildner int lnn; /* line number in the real file */
16154ba9607SSascha Wildner int fd;
16254ba9607SSascha Wildner int inloop; /* Saw .while on this level. */
16360e1e752SSascha Wildner unsigned char c;
16460e1e752SSascha Wildner
16554ba9607SSascha Wildner ln.sz = 256;
16654ba9607SSascha Wildner ln.buf = mandoc_malloc(ln.sz);
16754ba9607SSascha Wildner ln.next = NULL;
168*99db7d0eSSascha Wildner firstln = lastln = loop = NULL;
16960e1e752SSascha Wildner lnn = curp->line;
17060e1e752SSascha Wildner pos = 0;
17154ba9607SSascha Wildner inloop = 0;
17254ba9607SSascha Wildner result = ROFF_CONT;
17360e1e752SSascha Wildner
17454ba9607SSascha Wildner while (i < blk.sz && (blk.buf[i] != '\0' || pos != 0)) {
17560e1e752SSascha Wildner if (start) {
17660e1e752SSascha Wildner curp->line = lnn;
17760e1e752SSascha Wildner curp->reparse_count = 0;
17854ba9607SSascha Wildner
17954ba9607SSascha Wildner if (lnn < 3 &&
18054ba9607SSascha Wildner curp->filenc & MPARSE_UTF8 &&
18154ba9607SSascha Wildner curp->filenc & MPARSE_LATIN1)
18254ba9607SSascha Wildner curp->filenc = preconv_cue(&blk, i);
18360e1e752SSascha Wildner }
184*99db7d0eSSascha Wildner spos = pos;
18560e1e752SSascha Wildner
18654ba9607SSascha Wildner while (i < blk.sz && (start || blk.buf[i] != '\0')) {
18760e1e752SSascha Wildner
18860e1e752SSascha Wildner /*
18960e1e752SSascha Wildner * When finding an unescaped newline character,
19060e1e752SSascha Wildner * leave the character loop to process the line.
19160e1e752SSascha Wildner * Skip a preceding carriage return, if any.
19260e1e752SSascha Wildner */
19360e1e752SSascha Wildner
19454ba9607SSascha Wildner if ('\r' == blk.buf[i] && i + 1 < blk.sz &&
19560e1e752SSascha Wildner '\n' == blk.buf[i + 1])
19660e1e752SSascha Wildner ++i;
19760e1e752SSascha Wildner if ('\n' == blk.buf[i]) {
19860e1e752SSascha Wildner ++i;
19960e1e752SSascha Wildner ++lnn;
20060e1e752SSascha Wildner break;
20160e1e752SSascha Wildner }
20260e1e752SSascha Wildner
20360e1e752SSascha Wildner /*
20454ba9607SSascha Wildner * Make sure we have space for the worst
20554ba9607SSascha Wildner * case of 12 bytes: "\\[u10ffff]\n\0"
206f88b6c16SFranco Fichtner */
207f88b6c16SFranco Fichtner
20854ba9607SSascha Wildner if (pos + 12 > ln.sz)
209f88b6c16SFranco Fichtner resize_buf(&ln, 256);
210f88b6c16SFranco Fichtner
211f88b6c16SFranco Fichtner /*
21254ba9607SSascha Wildner * Encode 8-bit input.
21360e1e752SSascha Wildner */
21460e1e752SSascha Wildner
21554ba9607SSascha Wildner c = blk.buf[i];
21654ba9607SSascha Wildner if (c & 0x80) {
21754ba9607SSascha Wildner if ( ! (curp->filenc && preconv_encode(
21854ba9607SSascha Wildner &blk, &i, &ln, &pos, &curp->filenc))) {
21954ba9607SSascha Wildner mandoc_msg(MANDOCERR_CHAR_BAD,
22054ba9607SSascha Wildner curp->line, pos, "0x%x", c);
22154ba9607SSascha Wildner ln.buf[pos++] = '?';
22254ba9607SSascha Wildner i++;
22354ba9607SSascha Wildner }
22454ba9607SSascha Wildner continue;
22554ba9607SSascha Wildner }
22660e1e752SSascha Wildner
22754ba9607SSascha Wildner /*
22854ba9607SSascha Wildner * Exclude control characters.
22954ba9607SSascha Wildner */
23054ba9607SSascha Wildner
23154ba9607SSascha Wildner if (c == 0x7f || (c < 0x20 && c != 0x09)) {
23254ba9607SSascha Wildner mandoc_msg(c == 0x00 || c == 0x04 ||
23354ba9607SSascha Wildner c > 0x0a ? MANDOCERR_CHAR_BAD :
23454ba9607SSascha Wildner MANDOCERR_CHAR_UNSUPP,
235070c62a6SFranco Fichtner curp->line, pos, "0x%x", c);
23660e1e752SSascha Wildner i++;
23754ba9607SSascha Wildner if (c != '\r')
23836342e81SSascha Wildner ln.buf[pos++] = '?';
23960e1e752SSascha Wildner continue;
24060e1e752SSascha Wildner }
24160e1e752SSascha Wildner
24260e1e752SSascha Wildner ln.buf[pos++] = blk.buf[i++];
24360e1e752SSascha Wildner }
24454ba9607SSascha Wildner ln.buf[pos] = '\0';
24560e1e752SSascha Wildner
24660e1e752SSascha Wildner /*
24754ba9607SSascha Wildner * Maintain a lookaside buffer of all lines.
24854ba9607SSascha Wildner * parsed from this input source.
24960e1e752SSascha Wildner */
25060e1e752SSascha Wildner
25154ba9607SSascha Wildner thisln = mandoc_malloc(sizeof(*thisln));
25254ba9607SSascha Wildner thisln->buf = mandoc_strdup(ln.buf);
25354ba9607SSascha Wildner thisln->sz = strlen(ln.buf) + 1;
25454ba9607SSascha Wildner thisln->next = NULL;
25554ba9607SSascha Wildner if (firstln == NULL) {
25654ba9607SSascha Wildner firstln = lastln = thisln;
25754ba9607SSascha Wildner if (curp->secondary == NULL)
25854ba9607SSascha Wildner curp->secondary = firstln;
25954ba9607SSascha Wildner } else {
26054ba9607SSascha Wildner lastln->next = thisln;
26154ba9607SSascha Wildner lastln = thisln;
26260e1e752SSascha Wildner }
26360e1e752SSascha Wildner
26454ba9607SSascha Wildner /* XXX Ugly hack to mark the end of the input. */
26560e1e752SSascha Wildner
26654ba9607SSascha Wildner if (i == blk.sz || blk.buf[i] == '\0') {
267*99db7d0eSSascha Wildner if (pos + 2 > ln.sz)
268*99db7d0eSSascha Wildner resize_buf(&ln, 256);
26954ba9607SSascha Wildner ln.buf[pos++] = '\n';
27060e1e752SSascha Wildner ln.buf[pos] = '\0';
27154ba9607SSascha Wildner }
27260e1e752SSascha Wildner
27360e1e752SSascha Wildner /*
27460e1e752SSascha Wildner * A significant amount of complexity is contained by
27560e1e752SSascha Wildner * the roff preprocessor. It's line-oriented but can be
27660e1e752SSascha Wildner * expressed on one line, so we need at times to
27760e1e752SSascha Wildner * readjust our starting point and re-run it. The roff
27860e1e752SSascha Wildner * preprocessor can also readjust the buffers with new
27960e1e752SSascha Wildner * data, so we pass them in wholesale.
28060e1e752SSascha Wildner */
28160e1e752SSascha Wildner
28260e1e752SSascha Wildner of = 0;
28360e1e752SSascha Wildner rerun:
284*99db7d0eSSascha Wildner line_result = roff_parseln(curp->roff, curp->line,
285*99db7d0eSSascha Wildner &ln, &of, start && spos == 0 ? pos : 0);
28660e1e752SSascha Wildner
28754ba9607SSascha Wildner /* Process options. */
28854ba9607SSascha Wildner
28954ba9607SSascha Wildner if (line_result & ROFF_APPEND)
29054ba9607SSascha Wildner assert(line_result == (ROFF_IGN | ROFF_APPEND));
29154ba9607SSascha Wildner
29254ba9607SSascha Wildner if (line_result & ROFF_USERCALL)
29354ba9607SSascha Wildner assert((line_result & ROFF_MASK) == ROFF_REPARSE);
29454ba9607SSascha Wildner
29554ba9607SSascha Wildner if (line_result & ROFF_USERRET) {
29654ba9607SSascha Wildner assert(line_result == (ROFF_IGN | ROFF_USERRET));
29754ba9607SSascha Wildner if (start == 0) {
29854ba9607SSascha Wildner /* Return from the current macro. */
29954ba9607SSascha Wildner result = ROFF_USERRET;
30054ba9607SSascha Wildner goto out;
30154ba9607SSascha Wildner }
30254ba9607SSascha Wildner }
30354ba9607SSascha Wildner
30454ba9607SSascha Wildner switch (line_result & ROFF_LOOPMASK) {
30554ba9607SSascha Wildner case ROFF_IGN:
30654ba9607SSascha Wildner break;
30754ba9607SSascha Wildner case ROFF_WHILE:
30854ba9607SSascha Wildner if (curp->loop != NULL) {
30954ba9607SSascha Wildner if (loop == curp->loop)
31054ba9607SSascha Wildner break;
31154ba9607SSascha Wildner mandoc_msg(MANDOCERR_WHILE_NEST,
31260e1e752SSascha Wildner curp->line, pos, NULL);
31354ba9607SSascha Wildner }
31454ba9607SSascha Wildner curp->loop = thisln;
31554ba9607SSascha Wildner loop = NULL;
31654ba9607SSascha Wildner inloop = 1;
31754ba9607SSascha Wildner break;
31854ba9607SSascha Wildner case ROFF_LOOPCONT:
31954ba9607SSascha Wildner case ROFF_LOOPEXIT:
32054ba9607SSascha Wildner if (curp->loop == NULL) {
32154ba9607SSascha Wildner mandoc_msg(MANDOCERR_WHILE_FAIL,
32254ba9607SSascha Wildner curp->line, pos, NULL);
32354ba9607SSascha Wildner break;
32454ba9607SSascha Wildner }
32554ba9607SSascha Wildner if (inloop == 0) {
32654ba9607SSascha Wildner mandoc_msg(MANDOCERR_WHILE_INTO,
32754ba9607SSascha Wildner curp->line, pos, NULL);
32854ba9607SSascha Wildner curp->loop = loop = NULL;
32954ba9607SSascha Wildner break;
33054ba9607SSascha Wildner }
33154ba9607SSascha Wildner if (line_result & ROFF_LOOPCONT)
33254ba9607SSascha Wildner loop = curp->loop;
33354ba9607SSascha Wildner else {
33454ba9607SSascha Wildner curp->loop = loop = NULL;
33554ba9607SSascha Wildner inloop = 0;
33654ba9607SSascha Wildner }
33754ba9607SSascha Wildner break;
33854ba9607SSascha Wildner default:
33954ba9607SSascha Wildner abort();
34054ba9607SSascha Wildner }
34154ba9607SSascha Wildner
34254ba9607SSascha Wildner /* Process the main instruction from the roff parser. */
34354ba9607SSascha Wildner
34454ba9607SSascha Wildner switch (line_result & ROFF_MASK) {
34554ba9607SSascha Wildner case ROFF_IGN:
34654ba9607SSascha Wildner break;
34754ba9607SSascha Wildner case ROFF_CONT:
34854ba9607SSascha Wildner if (curp->man->meta.macroset == MACROSET_NONE)
34954ba9607SSascha Wildner choose_parser(curp);
35054ba9607SSascha Wildner if ((curp->man->meta.macroset == MACROSET_MDOC ?
35154ba9607SSascha Wildner mdoc_parseln(curp->man, curp->line, ln.buf, of) :
35254ba9607SSascha Wildner man_parseln(curp->man, curp->line, ln.buf, of)
35354ba9607SSascha Wildner ) == 2)
35454ba9607SSascha Wildner goto out;
35554ba9607SSascha Wildner break;
356070c62a6SFranco Fichtner case ROFF_RERUN:
35760e1e752SSascha Wildner goto rerun;
35854ba9607SSascha Wildner case ROFF_REPARSE:
35954ba9607SSascha Wildner if (++curp->reparse_count > REPARSE_LIMIT) {
36054ba9607SSascha Wildner /* Abort and return to the top level. */
36154ba9607SSascha Wildner result = ROFF_IGN;
36254ba9607SSascha Wildner mandoc_msg(MANDOCERR_ROFFLOOP,
36354ba9607SSascha Wildner curp->line, pos, NULL);
36454ba9607SSascha Wildner goto out;
36554ba9607SSascha Wildner }
36654ba9607SSascha Wildner result = mparse_buf_r(curp, ln, of, 0);
36754ba9607SSascha Wildner if (line_result & ROFF_USERCALL) {
36854ba9607SSascha Wildner roff_userret(curp->roff);
36954ba9607SSascha Wildner /* Continue normally. */
37054ba9607SSascha Wildner if (result & ROFF_USERRET)
37154ba9607SSascha Wildner result = ROFF_CONT;
37254ba9607SSascha Wildner }
37354ba9607SSascha Wildner if (start == 0 && result != ROFF_CONT)
37454ba9607SSascha Wildner goto out;
37560e1e752SSascha Wildner break;
376070c62a6SFranco Fichtner case ROFF_SO:
37754ba9607SSascha Wildner if ( ! (curp->options & MPARSE_SO) &&
37854ba9607SSascha Wildner (i >= blk.sz || blk.buf[i] == '\0')) {
37954ba9607SSascha Wildner curp->man->meta.sodest =
38054ba9607SSascha Wildner mandoc_strdup(ln.buf + of);
38154ba9607SSascha Wildner goto out;
38254ba9607SSascha Wildner }
38354ba9607SSascha Wildner if ((fd = mparse_open(curp, ln.buf + of)) != -1) {
38454ba9607SSascha Wildner mparse_readfd(curp, fd, ln.buf + of);
38554ba9607SSascha Wildner close(fd);
38654ba9607SSascha Wildner } else {
38754ba9607SSascha Wildner mandoc_msg(MANDOCERR_SO_FAIL,
38854ba9607SSascha Wildner curp->line, of, ".so %s: %s",
38954ba9607SSascha Wildner ln.buf + of, strerror(errno));
39054ba9607SSascha Wildner ln.sz = mandoc_asprintf(&cp,
39154ba9607SSascha Wildner ".sp\nSee the file %s.\n.sp",
39254ba9607SSascha Wildner ln.buf + of);
393070c62a6SFranco Fichtner free(ln.buf);
39454ba9607SSascha Wildner ln.buf = cp;
39554ba9607SSascha Wildner of = 0;
39654ba9607SSascha Wildner mparse_buf_r(curp, ln, of, 0);
397070c62a6SFranco Fichtner }
39860e1e752SSascha Wildner break;
39960e1e752SSascha Wildner default:
40054ba9607SSascha Wildner abort();
40160e1e752SSascha Wildner }
40260e1e752SSascha Wildner
40360e1e752SSascha Wildner /* Start the next input line. */
40460e1e752SSascha Wildner
40554ba9607SSascha Wildner if (loop != NULL &&
40654ba9607SSascha Wildner (line_result & ROFF_LOOPMASK) == ROFF_IGN)
40754ba9607SSascha Wildner loop = loop->next;
40854ba9607SSascha Wildner
40954ba9607SSascha Wildner if (loop != NULL) {
41054ba9607SSascha Wildner if ((line_result & ROFF_APPEND) == 0)
41154ba9607SSascha Wildner *ln.buf = '\0';
41254ba9607SSascha Wildner if (ln.sz < loop->sz)
41354ba9607SSascha Wildner resize_buf(&ln, loop->sz);
41454ba9607SSascha Wildner (void)strlcat(ln.buf, loop->buf, ln.sz);
41554ba9607SSascha Wildner of = 0;
41654ba9607SSascha Wildner goto rerun;
41760e1e752SSascha Wildner }
41860e1e752SSascha Wildner
41954ba9607SSascha Wildner pos = (line_result & ROFF_APPEND) ? strlen(ln.buf) : 0;
42054ba9607SSascha Wildner }
42154ba9607SSascha Wildner out:
42254ba9607SSascha Wildner if (inloop) {
42354ba9607SSascha Wildner if (result != ROFF_USERRET)
42454ba9607SSascha Wildner mandoc_msg(MANDOCERR_WHILE_OUTOF,
42554ba9607SSascha Wildner curp->line, pos, NULL);
42654ba9607SSascha Wildner curp->loop = NULL;
42754ba9607SSascha Wildner }
42860e1e752SSascha Wildner free(ln.buf);
42954ba9607SSascha Wildner if (firstln != curp->secondary)
43054ba9607SSascha Wildner free_buf_list(firstln);
43154ba9607SSascha Wildner return result;
43260e1e752SSascha Wildner }
43360e1e752SSascha Wildner
43460e1e752SSascha Wildner static int
read_whole_file(struct mparse * curp,int fd,struct buf * fb,int * with_mmap)43554ba9607SSascha Wildner read_whole_file(struct mparse *curp, int fd, struct buf *fb, int *with_mmap)
43660e1e752SSascha Wildner {
43754ba9607SSascha Wildner struct stat st;
43854ba9607SSascha Wildner gzFile gz;
43960e1e752SSascha Wildner size_t off;
44060e1e752SSascha Wildner ssize_t ssz;
44154ba9607SSascha Wildner int gzerrnum, retval;
44260e1e752SSascha Wildner
44354ba9607SSascha Wildner if (fstat(fd, &st) == -1) {
444*99db7d0eSSascha Wildner mandoc_msg(MANDOCERR_FSTAT, 0, 0, "%s", strerror(errno));
445*99db7d0eSSascha Wildner return -1;
44660e1e752SSascha Wildner }
44760e1e752SSascha Wildner
44860e1e752SSascha Wildner /*
44960e1e752SSascha Wildner * If we're a regular file, try just reading in the whole entry
45060e1e752SSascha Wildner * via mmap(). This is faster than reading it into blocks, and
45160e1e752SSascha Wildner * since each file is only a few bytes to begin with, I'm not
45260e1e752SSascha Wildner * concerned that this is going to tank any machines.
45360e1e752SSascha Wildner */
45460e1e752SSascha Wildner
45554ba9607SSascha Wildner if (curp->gzip == 0 && S_ISREG(st.st_mode)) {
45654ba9607SSascha Wildner if (st.st_size > 0x7fffffff) {
45754ba9607SSascha Wildner mandoc_msg(MANDOCERR_TOOLARGE, 0, 0, NULL);
458*99db7d0eSSascha Wildner return -1;
45960e1e752SSascha Wildner }
46060e1e752SSascha Wildner *with_mmap = 1;
46160e1e752SSascha Wildner fb->sz = (size_t)st.st_size;
462f88b6c16SFranco Fichtner fb->buf = mmap(NULL, fb->sz, PROT_READ, MAP_SHARED, fd, 0);
46360e1e752SSascha Wildner if (fb->buf != MAP_FAILED)
464*99db7d0eSSascha Wildner return 0;
46560e1e752SSascha Wildner }
46654ba9607SSascha Wildner
46754ba9607SSascha Wildner if (curp->gzip) {
46854ba9607SSascha Wildner /*
46954ba9607SSascha Wildner * Duplicating the file descriptor is required
47054ba9607SSascha Wildner * because we will have to call gzclose(3)
47154ba9607SSascha Wildner * to free memory used internally by zlib,
47254ba9607SSascha Wildner * but that will also close the file descriptor,
47354ba9607SSascha Wildner * which this function must not do.
47454ba9607SSascha Wildner */
47554ba9607SSascha Wildner if ((fd = dup(fd)) == -1) {
476*99db7d0eSSascha Wildner mandoc_msg(MANDOCERR_DUP, 0, 0,
477*99db7d0eSSascha Wildner "%s", strerror(errno));
478*99db7d0eSSascha Wildner return -1;
47954ba9607SSascha Wildner }
48054ba9607SSascha Wildner if ((gz = gzdopen(fd, "rb")) == NULL) {
481*99db7d0eSSascha Wildner mandoc_msg(MANDOCERR_GZDOPEN, 0, 0,
482*99db7d0eSSascha Wildner "%s", strerror(errno));
48354ba9607SSascha Wildner close(fd);
484*99db7d0eSSascha Wildner return -1;
48554ba9607SSascha Wildner }
48654ba9607SSascha Wildner } else
48754ba9607SSascha Wildner gz = NULL;
48860e1e752SSascha Wildner
48960e1e752SSascha Wildner /*
49060e1e752SSascha Wildner * If this isn't a regular file (like, say, stdin), then we must
49160e1e752SSascha Wildner * go the old way and just read things in bit by bit.
49260e1e752SSascha Wildner */
49360e1e752SSascha Wildner
49460e1e752SSascha Wildner *with_mmap = 0;
49560e1e752SSascha Wildner off = 0;
496*99db7d0eSSascha Wildner retval = -1;
49760e1e752SSascha Wildner fb->sz = 0;
49860e1e752SSascha Wildner fb->buf = NULL;
49960e1e752SSascha Wildner for (;;) {
50060e1e752SSascha Wildner if (off == fb->sz) {
50160e1e752SSascha Wildner if (fb->sz == (1U << 31)) {
50254ba9607SSascha Wildner mandoc_msg(MANDOCERR_TOOLARGE, 0, 0, NULL);
50360e1e752SSascha Wildner break;
50460e1e752SSascha Wildner }
50560e1e752SSascha Wildner resize_buf(fb, 65536);
50660e1e752SSascha Wildner }
50754ba9607SSascha Wildner ssz = curp->gzip ?
50854ba9607SSascha Wildner gzread(gz, fb->buf + (int)off, fb->sz - off) :
50954ba9607SSascha Wildner read(fd, fb->buf + (int)off, fb->sz - off);
51060e1e752SSascha Wildner if (ssz == 0) {
51160e1e752SSascha Wildner fb->sz = off;
512*99db7d0eSSascha Wildner retval = 0;
51354ba9607SSascha Wildner break;
51460e1e752SSascha Wildner }
51560e1e752SSascha Wildner if (ssz == -1) {
51654ba9607SSascha Wildner if (curp->gzip)
51754ba9607SSascha Wildner (void)gzerror(gz, &gzerrnum);
518*99db7d0eSSascha Wildner mandoc_msg(MANDOCERR_READ, 0, 0, "%s",
51954ba9607SSascha Wildner curp->gzip && gzerrnum != Z_ERRNO ?
52054ba9607SSascha Wildner zError(gzerrnum) : strerror(errno));
52160e1e752SSascha Wildner break;
52260e1e752SSascha Wildner }
52360e1e752SSascha Wildner off += (size_t)ssz;
52460e1e752SSascha Wildner }
52560e1e752SSascha Wildner
52654ba9607SSascha Wildner if (curp->gzip && (gzerrnum = gzclose(gz)) != Z_OK)
527*99db7d0eSSascha Wildner mandoc_msg(MANDOCERR_GZCLOSE, 0, 0, "%s",
52854ba9607SSascha Wildner gzerrnum == Z_ERRNO ? strerror(errno) :
52954ba9607SSascha Wildner zError(gzerrnum));
530*99db7d0eSSascha Wildner if (retval == -1) {
53160e1e752SSascha Wildner free(fb->buf);
53260e1e752SSascha Wildner fb->buf = NULL;
53354ba9607SSascha Wildner }
53454ba9607SSascha Wildner return retval;
53560e1e752SSascha Wildner }
53660e1e752SSascha Wildner
53760e1e752SSascha Wildner static void
mparse_end(struct mparse * curp)53860e1e752SSascha Wildner mparse_end(struct mparse *curp)
53960e1e752SSascha Wildner {
54054ba9607SSascha Wildner if (curp->man->meta.macroset == MACROSET_NONE)
54154ba9607SSascha Wildner curp->man->meta.macroset = MACROSET_MAN;
54254ba9607SSascha Wildner if (curp->man->meta.macroset == MACROSET_MDOC)
54354ba9607SSascha Wildner mdoc_endparse(curp->man);
54454ba9607SSascha Wildner else
54554ba9607SSascha Wildner man_endparse(curp->man);
54660e1e752SSascha Wildner roff_endparse(curp->roff);
54760e1e752SSascha Wildner }
54860e1e752SSascha Wildner
54954ba9607SSascha Wildner /*
55054ba9607SSascha Wildner * Read the whole file into memory and call the parsers.
55154ba9607SSascha Wildner * Called recursively when an .so request is encountered.
55254ba9607SSascha Wildner */
55354ba9607SSascha Wildner void
mparse_readfd(struct mparse * curp,int fd,const char * filename)55454ba9607SSascha Wildner mparse_readfd(struct mparse *curp, int fd, const char *filename)
55560e1e752SSascha Wildner {
556f88b6c16SFranco Fichtner static int recursion_depth;
557f88b6c16SFranco Fichtner
55836342e81SSascha Wildner struct buf blk;
55954ba9607SSascha Wildner struct buf *save_primary;
560*99db7d0eSSascha Wildner const char *save_filename, *cp;
56154ba9607SSascha Wildner size_t offset;
56254ba9607SSascha Wildner int save_filenc, save_lineno;
56336342e81SSascha Wildner int with_mmap;
56436342e81SSascha Wildner
56554ba9607SSascha Wildner if (recursion_depth > 64) {
56654ba9607SSascha Wildner mandoc_msg(MANDOCERR_ROFFLOOP, curp->line, 0, NULL);
56754ba9607SSascha Wildner return;
568*99db7d0eSSascha Wildner } else if (recursion_depth == 0 &&
569*99db7d0eSSascha Wildner (cp = strrchr(filename, '.')) != NULL &&
570*99db7d0eSSascha Wildner cp[1] >= '1' && cp[1] <= '9')
571*99db7d0eSSascha Wildner curp->man->filesec = cp[1];
572*99db7d0eSSascha Wildner else
573*99db7d0eSSascha Wildner curp->man->filesec = '\0';
574*99db7d0eSSascha Wildner
575*99db7d0eSSascha Wildner if (read_whole_file(curp, fd, &blk, &with_mmap) == -1)
57654ba9607SSascha Wildner return;
577070c62a6SFranco Fichtner
57836342e81SSascha Wildner /*
57954ba9607SSascha Wildner * Save some properties of the parent file.
58036342e81SSascha Wildner */
58160e1e752SSascha Wildner
58254ba9607SSascha Wildner save_primary = curp->primary;
58354ba9607SSascha Wildner save_filenc = curp->filenc;
58454ba9607SSascha Wildner save_lineno = curp->line;
58554ba9607SSascha Wildner save_filename = mandoc_msg_getinfilename();
58660e1e752SSascha Wildner
58754ba9607SSascha Wildner curp->primary = &blk;
58854ba9607SSascha Wildner curp->filenc = curp->options & (MPARSE_UTF8 | MPARSE_LATIN1);
58954ba9607SSascha Wildner curp->line = 1;
59054ba9607SSascha Wildner mandoc_msg_setinfilename(filename);
59160e1e752SSascha Wildner
59254ba9607SSascha Wildner /* Skip an UTF-8 byte order mark. */
59354ba9607SSascha Wildner if (curp->filenc & MPARSE_UTF8 && blk.sz > 2 &&
59454ba9607SSascha Wildner (unsigned char)blk.buf[0] == 0xef &&
59554ba9607SSascha Wildner (unsigned char)blk.buf[1] == 0xbb &&
59654ba9607SSascha Wildner (unsigned char)blk.buf[2] == 0xbf) {
59754ba9607SSascha Wildner offset = 3;
59854ba9607SSascha Wildner curp->filenc &= ~MPARSE_LATIN1;
59954ba9607SSascha Wildner } else
60054ba9607SSascha Wildner offset = 0;
60154ba9607SSascha Wildner
60254ba9607SSascha Wildner recursion_depth++;
60354ba9607SSascha Wildner mparse_buf_r(curp, blk, offset, 1);
60454ba9607SSascha Wildner if (--recursion_depth == 0)
60554ba9607SSascha Wildner mparse_end(curp);
60654ba9607SSascha Wildner
60754ba9607SSascha Wildner /*
60854ba9607SSascha Wildner * Clean up and restore saved parent properties.
60954ba9607SSascha Wildner */
61054ba9607SSascha Wildner
61136342e81SSascha Wildner if (with_mmap)
61236342e81SSascha Wildner munmap(blk.buf, blk.sz);
61336342e81SSascha Wildner else
61436342e81SSascha Wildner free(blk.buf);
61560e1e752SSascha Wildner
61654ba9607SSascha Wildner curp->primary = save_primary;
61754ba9607SSascha Wildner curp->filenc = save_filenc;
61854ba9607SSascha Wildner curp->line = save_lineno;
61954ba9607SSascha Wildner if (save_filename != NULL)
62054ba9607SSascha Wildner mandoc_msg_setinfilename(save_filename);
62154ba9607SSascha Wildner }
62254ba9607SSascha Wildner
62354ba9607SSascha Wildner int
mparse_open(struct mparse * curp,const char * file)62454ba9607SSascha Wildner mparse_open(struct mparse *curp, const char *file)
62554ba9607SSascha Wildner {
62654ba9607SSascha Wildner char *cp;
62754ba9607SSascha Wildner int fd, save_errno;
62854ba9607SSascha Wildner
62954ba9607SSascha Wildner cp = strrchr(file, '.');
63054ba9607SSascha Wildner curp->gzip = (cp != NULL && ! strcmp(cp + 1, "gz"));
63154ba9607SSascha Wildner
63254ba9607SSascha Wildner /* First try to use the filename as it is. */
63354ba9607SSascha Wildner
63454ba9607SSascha Wildner if ((fd = open(file, O_RDONLY)) != -1)
63554ba9607SSascha Wildner return fd;
63654ba9607SSascha Wildner
63754ba9607SSascha Wildner /*
63854ba9607SSascha Wildner * If that doesn't work and the filename doesn't
63954ba9607SSascha Wildner * already end in .gz, try appending .gz.
64054ba9607SSascha Wildner */
64154ba9607SSascha Wildner
64254ba9607SSascha Wildner if ( ! curp->gzip) {
64354ba9607SSascha Wildner save_errno = errno;
64454ba9607SSascha Wildner mandoc_asprintf(&cp, "%s.gz", file);
64554ba9607SSascha Wildner fd = open(cp, O_RDONLY);
64654ba9607SSascha Wildner free(cp);
64754ba9607SSascha Wildner errno = save_errno;
64854ba9607SSascha Wildner if (fd != -1) {
64954ba9607SSascha Wildner curp->gzip = 1;
65054ba9607SSascha Wildner return fd;
65154ba9607SSascha Wildner }
65254ba9607SSascha Wildner }
65354ba9607SSascha Wildner
65454ba9607SSascha Wildner /* Neither worked, give up. */
65554ba9607SSascha Wildner
65654ba9607SSascha Wildner return -1;
65760e1e752SSascha Wildner }
65860e1e752SSascha Wildner
65960e1e752SSascha Wildner struct mparse *
mparse_alloc(int options,enum mandoc_os os_e,const char * os_s)66054ba9607SSascha Wildner mparse_alloc(int options, enum mandoc_os os_e, const char *os_s)
66160e1e752SSascha Wildner {
66260e1e752SSascha Wildner struct mparse *curp;
66360e1e752SSascha Wildner
66460e1e752SSascha Wildner curp = mandoc_calloc(1, sizeof(struct mparse));
66560e1e752SSascha Wildner
666070c62a6SFranco Fichtner curp->options = options;
66754ba9607SSascha Wildner curp->os_s = os_s;
66860e1e752SSascha Wildner
66954ba9607SSascha Wildner curp->roff = roff_alloc(options);
67054ba9607SSascha Wildner curp->man = roff_man_alloc(curp->roff, curp->os_s,
671070c62a6SFranco Fichtner curp->options & MPARSE_QUICK ? 1 : 0);
67254ba9607SSascha Wildner if (curp->options & MPARSE_MDOC) {
67354ba9607SSascha Wildner curp->man->meta.macroset = MACROSET_MDOC;
67454ba9607SSascha Wildner if (curp->man->mdocmac == NULL)
67554ba9607SSascha Wildner curp->man->mdocmac = roffhash_alloc(MDOC_Dd, MDOC_MAX);
67654ba9607SSascha Wildner } else if (curp->options & MPARSE_MAN) {
67754ba9607SSascha Wildner curp->man->meta.macroset = MACROSET_MAN;
67854ba9607SSascha Wildner if (curp->man->manmac == NULL)
67954ba9607SSascha Wildner curp->man->manmac = roffhash_alloc(MAN_TH, MAN_MAX);
68054ba9607SSascha Wildner }
68154ba9607SSascha Wildner curp->man->meta.first->tok = TOKEN_NONE;
68254ba9607SSascha Wildner curp->man->meta.os_e = os_e;
683*99db7d0eSSascha Wildner tag_alloc();
68454ba9607SSascha Wildner return curp;
68560e1e752SSascha Wildner }
68660e1e752SSascha Wildner
68760e1e752SSascha Wildner void
mparse_reset(struct mparse * curp)68860e1e752SSascha Wildner mparse_reset(struct mparse *curp)
68960e1e752SSascha Wildner {
690*99db7d0eSSascha Wildner tag_free();
69160e1e752SSascha Wildner roff_reset(curp->roff);
69254ba9607SSascha Wildner roff_man_reset(curp->man);
69354ba9607SSascha Wildner free_buf_list(curp->secondary);
69454ba9607SSascha Wildner curp->secondary = NULL;
69554ba9607SSascha Wildner curp->gzip = 0;
696*99db7d0eSSascha Wildner tag_alloc();
69760e1e752SSascha Wildner }
69860e1e752SSascha Wildner
69960e1e752SSascha Wildner void
mparse_free(struct mparse * curp)70060e1e752SSascha Wildner mparse_free(struct mparse *curp)
70160e1e752SSascha Wildner {
702*99db7d0eSSascha Wildner tag_free();
70354ba9607SSascha Wildner roffhash_free(curp->man->mdocmac);
70454ba9607SSascha Wildner roffhash_free(curp->man->manmac);
70554ba9607SSascha Wildner roff_man_free(curp->man);
70660e1e752SSascha Wildner roff_free(curp->roff);
70754ba9607SSascha Wildner free_buf_list(curp->secondary);
70860e1e752SSascha Wildner free(curp);
70960e1e752SSascha Wildner }
71060e1e752SSascha Wildner
71154ba9607SSascha Wildner struct roff_meta *
mparse_result(struct mparse * curp)71254ba9607SSascha Wildner mparse_result(struct mparse *curp)
71360e1e752SSascha Wildner {
71454ba9607SSascha Wildner roff_state_reset(curp->man);
71554ba9607SSascha Wildner if (curp->options & MPARSE_VALIDATE) {
71654ba9607SSascha Wildner if (curp->man->meta.macroset == MACROSET_MDOC)
71754ba9607SSascha Wildner mdoc_validate(curp->man);
71854ba9607SSascha Wildner else
71954ba9607SSascha Wildner man_validate(curp->man);
720*99db7d0eSSascha Wildner tag_postprocess(curp->man, curp->man->meta.first);
721070c62a6SFranco Fichtner }
72254ba9607SSascha Wildner return &curp->man->meta;
72360e1e752SSascha Wildner }
72460e1e752SSascha Wildner
72560e1e752SSascha Wildner void
mparse_copy(const struct mparse * p)72654ba9607SSascha Wildner mparse_copy(const struct mparse *p)
72760e1e752SSascha Wildner {
72854ba9607SSascha Wildner struct buf *buf;
72960e1e752SSascha Wildner
73054ba9607SSascha Wildner for (buf = p->secondary; buf != NULL; buf = buf->next)
73154ba9607SSascha Wildner puts(buf->buf);
73236342e81SSascha Wildner }
733