1*99db7d0eSSascha Wildner /* $Id: mandoc.c,v 1.119 2021/08/10 12:55:03 schwarze Exp $ */
280387638SSascha Wildner /*
354ba9607SSascha Wildner * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4*99db7d0eSSascha Wildner * Copyright (c) 2011-2015, 2017-2021 Ingo Schwarze <schwarze@openbsd.org>
580387638SSascha Wildner *
680387638SSascha Wildner * Permission to use, copy, modify, and distribute this software for any
780387638SSascha Wildner * purpose with or without fee is hereby granted, provided that the above
880387638SSascha Wildner * copyright notice and this permission notice appear in all copies.
980387638SSascha Wildner *
1080387638SSascha Wildner * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
1180387638SSascha Wildner * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1280387638SSascha Wildner * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
1380387638SSascha Wildner * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
1480387638SSascha Wildner * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
1580387638SSascha Wildner * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
1680387638SSascha Wildner * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1780387638SSascha Wildner */
1880387638SSascha Wildner #include "config.h"
1980387638SSascha Wildner
2080387638SSascha Wildner #include <sys/types.h>
2180387638SSascha Wildner
2280387638SSascha Wildner #include <assert.h>
2380387638SSascha Wildner #include <ctype.h>
24a4c7eb57SSascha Wildner #include <errno.h>
25a4c7eb57SSascha Wildner #include <limits.h>
2680387638SSascha Wildner #include <stdlib.h>
2780387638SSascha Wildner #include <stdio.h>
2880387638SSascha Wildner #include <string.h>
2980387638SSascha Wildner #include <time.h>
3080387638SSascha Wildner
31070c62a6SFranco Fichtner #include "mandoc_aux.h"
3254ba9607SSascha Wildner #include "mandoc.h"
3354ba9607SSascha Wildner #include "roff.h"
3480387638SSascha Wildner #include "libmandoc.h"
3554ba9607SSascha Wildner #include "roff_int.h"
3680387638SSascha Wildner
3760e1e752SSascha Wildner static int a2time(time_t *, const char *, const char *);
3860e1e752SSascha Wildner static char *time2a(time_t);
3980387638SSascha Wildner
40a4c7eb57SSascha Wildner
41a4c7eb57SSascha Wildner enum mandoc_esc
mandoc_font(const char * cp,int sz)4254ba9607SSascha Wildner mandoc_font(const char *cp, int sz)
4354ba9607SSascha Wildner {
4454ba9607SSascha Wildner switch (sz) {
4554ba9607SSascha Wildner case 0:
4654ba9607SSascha Wildner return ESCAPE_FONTPREV;
4754ba9607SSascha Wildner case 1:
4854ba9607SSascha Wildner switch (cp[0]) {
4954ba9607SSascha Wildner case 'B':
5054ba9607SSascha Wildner case '3':
5154ba9607SSascha Wildner return ESCAPE_FONTBOLD;
5254ba9607SSascha Wildner case 'I':
5354ba9607SSascha Wildner case '2':
5454ba9607SSascha Wildner return ESCAPE_FONTITALIC;
5554ba9607SSascha Wildner case 'P':
5654ba9607SSascha Wildner return ESCAPE_FONTPREV;
5754ba9607SSascha Wildner case 'R':
5854ba9607SSascha Wildner case '1':
5954ba9607SSascha Wildner return ESCAPE_FONTROMAN;
6054ba9607SSascha Wildner case '4':
6154ba9607SSascha Wildner return ESCAPE_FONTBI;
6254ba9607SSascha Wildner default:
6354ba9607SSascha Wildner return ESCAPE_ERROR;
6454ba9607SSascha Wildner }
6554ba9607SSascha Wildner case 2:
6654ba9607SSascha Wildner switch (cp[0]) {
6754ba9607SSascha Wildner case 'B':
6854ba9607SSascha Wildner switch (cp[1]) {
6954ba9607SSascha Wildner case 'I':
7054ba9607SSascha Wildner return ESCAPE_FONTBI;
7154ba9607SSascha Wildner default:
7254ba9607SSascha Wildner return ESCAPE_ERROR;
7354ba9607SSascha Wildner }
7454ba9607SSascha Wildner case 'C':
7554ba9607SSascha Wildner switch (cp[1]) {
7654ba9607SSascha Wildner case 'B':
77*99db7d0eSSascha Wildner return ESCAPE_FONTCB;
7854ba9607SSascha Wildner case 'I':
79*99db7d0eSSascha Wildner return ESCAPE_FONTCI;
8054ba9607SSascha Wildner case 'R':
8154ba9607SSascha Wildner case 'W':
82*99db7d0eSSascha Wildner return ESCAPE_FONTCR;
8354ba9607SSascha Wildner default:
8454ba9607SSascha Wildner return ESCAPE_ERROR;
8554ba9607SSascha Wildner }
8654ba9607SSascha Wildner default:
8754ba9607SSascha Wildner return ESCAPE_ERROR;
8854ba9607SSascha Wildner }
8954ba9607SSascha Wildner default:
9054ba9607SSascha Wildner return ESCAPE_ERROR;
9154ba9607SSascha Wildner }
9254ba9607SSascha Wildner }
9354ba9607SSascha Wildner
9454ba9607SSascha Wildner enum mandoc_esc
mandoc_escape(const char ** end,const char ** start,int * sz)95a4c7eb57SSascha Wildner mandoc_escape(const char **end, const char **start, int *sz)
96a4c7eb57SSascha Wildner {
97f88b6c16SFranco Fichtner const char *local_start;
9854ba9607SSascha Wildner int local_sz, c, i;
99f88b6c16SFranco Fichtner char term;
100a4c7eb57SSascha Wildner enum mandoc_esc gly;
101a4c7eb57SSascha Wildner
102f88b6c16SFranco Fichtner /*
103f88b6c16SFranco Fichtner * When the caller doesn't provide return storage,
104f88b6c16SFranco Fichtner * use local storage.
105f88b6c16SFranco Fichtner */
106a4c7eb57SSascha Wildner
107f88b6c16SFranco Fichtner if (NULL == start)
108f88b6c16SFranco Fichtner start = &local_start;
109f88b6c16SFranco Fichtner if (NULL == sz)
110f88b6c16SFranco Fichtner sz = &local_sz;
111f88b6c16SFranco Fichtner
112f88b6c16SFranco Fichtner /*
11354ba9607SSascha Wildner * Treat "\E" just like "\";
11454ba9607SSascha Wildner * it only makes a difference in copy mode.
11554ba9607SSascha Wildner */
11654ba9607SSascha Wildner
11754ba9607SSascha Wildner if (**end == 'E')
11854ba9607SSascha Wildner ++*end;
11954ba9607SSascha Wildner
12054ba9607SSascha Wildner /*
121f88b6c16SFranco Fichtner * Beyond the backslash, at least one input character
122f88b6c16SFranco Fichtner * is part of the escape sequence. With one exception
123f88b6c16SFranco Fichtner * (see below), that character won't be returned.
124f88b6c16SFranco Fichtner */
125f88b6c16SFranco Fichtner
126f88b6c16SFranco Fichtner gly = ESCAPE_ERROR;
127f88b6c16SFranco Fichtner *start = ++*end;
128f88b6c16SFranco Fichtner *sz = 0;
129f88b6c16SFranco Fichtner term = '\0';
130f88b6c16SFranco Fichtner
131f88b6c16SFranco Fichtner switch ((*start)[-1]) {
132a4c7eb57SSascha Wildner /*
133a4c7eb57SSascha Wildner * First the glyphs. There are several different forms of
134a4c7eb57SSascha Wildner * these, but each eventually returns a substring of the glyph
135a4c7eb57SSascha Wildner * name.
136a4c7eb57SSascha Wildner */
137070c62a6SFranco Fichtner case '(':
138a4c7eb57SSascha Wildner gly = ESCAPE_SPECIAL;
139f88b6c16SFranco Fichtner *sz = 2;
14080387638SSascha Wildner break;
141070c62a6SFranco Fichtner case '[':
14254ba9607SSascha Wildner if (**start == ' ') {
14354ba9607SSascha Wildner ++*end;
14454ba9607SSascha Wildner return ESCAPE_ERROR;
14554ba9607SSascha Wildner }
146a4c7eb57SSascha Wildner gly = ESCAPE_SPECIAL;
14780387638SSascha Wildner term = ']';
14880387638SSascha Wildner break;
149070c62a6SFranco Fichtner case 'C':
150f88b6c16SFranco Fichtner if ('\'' != **start)
15154ba9607SSascha Wildner return ESCAPE_ERROR;
152f88b6c16SFranco Fichtner *start = ++*end;
1537888c61dSFranco Fichtner gly = ESCAPE_SPECIAL;
15480387638SSascha Wildner term = '\'';
15580387638SSascha Wildner break;
156a4c7eb57SSascha Wildner
157a4c7eb57SSascha Wildner /*
1587888c61dSFranco Fichtner * Escapes taking no arguments at all.
1597888c61dSFranco Fichtner */
16054ba9607SSascha Wildner case '!':
16154ba9607SSascha Wildner case '?':
16254ba9607SSascha Wildner return ESCAPE_UNSUPP;
16354ba9607SSascha Wildner case '%':
16454ba9607SSascha Wildner case '&':
16554ba9607SSascha Wildner case ')':
16654ba9607SSascha Wildner case ',':
16754ba9607SSascha Wildner case '/':
16854ba9607SSascha Wildner case '^':
16954ba9607SSascha Wildner case 'a':
170070c62a6SFranco Fichtner case 'd':
17154ba9607SSascha Wildner case 'r':
17254ba9607SSascha Wildner case 't':
173070c62a6SFranco Fichtner case 'u':
17454ba9607SSascha Wildner case '{':
17554ba9607SSascha Wildner case '|':
17654ba9607SSascha Wildner case '}':
17754ba9607SSascha Wildner return ESCAPE_IGNORE;
17854ba9607SSascha Wildner case 'c':
17954ba9607SSascha Wildner return ESCAPE_NOSPACE;
18054ba9607SSascha Wildner case 'p':
18154ba9607SSascha Wildner return ESCAPE_BREAK;
1827888c61dSFranco Fichtner
1837888c61dSFranco Fichtner /*
184f88b6c16SFranco Fichtner * The \z escape is supposed to output the following
185f88b6c16SFranco Fichtner * character without advancing the cursor position.
186f88b6c16SFranco Fichtner * Since we are mostly dealing with terminal mode,
187f88b6c16SFranco Fichtner * let us just skip the next character.
188f88b6c16SFranco Fichtner */
189070c62a6SFranco Fichtner case 'z':
19054ba9607SSascha Wildner return ESCAPE_SKIPCHAR;
191f88b6c16SFranco Fichtner
192f88b6c16SFranco Fichtner /*
193a4c7eb57SSascha Wildner * Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where
194a4c7eb57SSascha Wildner * 'X' is the trigger. These have opaque sub-strings.
195a4c7eb57SSascha Wildner */
196070c62a6SFranco Fichtner case 'F':
197070c62a6SFranco Fichtner case 'f':
19854ba9607SSascha Wildner case 'g':
19954ba9607SSascha Wildner case 'k':
20054ba9607SSascha Wildner case 'M':
20154ba9607SSascha Wildner case 'm':
20254ba9607SSascha Wildner case 'n':
20354ba9607SSascha Wildner case 'O':
20454ba9607SSascha Wildner case 'V':
20554ba9607SSascha Wildner case 'Y':
206*99db7d0eSSascha Wildner case '*':
207*99db7d0eSSascha Wildner switch ((*start)[-1]) {
208*99db7d0eSSascha Wildner case 'f':
209*99db7d0eSSascha Wildner gly = ESCAPE_FONT;
210*99db7d0eSSascha Wildner break;
211*99db7d0eSSascha Wildner case '*':
212*99db7d0eSSascha Wildner gly = ESCAPE_DEVICE;
213*99db7d0eSSascha Wildner break;
214*99db7d0eSSascha Wildner default:
215*99db7d0eSSascha Wildner gly = ESCAPE_IGNORE;
216*99db7d0eSSascha Wildner break;
217*99db7d0eSSascha Wildner }
218f88b6c16SFranco Fichtner switch (**start) {
219070c62a6SFranco Fichtner case '(':
22054ba9607SSascha Wildner if ((*start)[-1] == 'O')
22154ba9607SSascha Wildner gly = ESCAPE_ERROR;
222f88b6c16SFranco Fichtner *start = ++*end;
223f88b6c16SFranco Fichtner *sz = 2;
22480387638SSascha Wildner break;
225070c62a6SFranco Fichtner case '[':
22654ba9607SSascha Wildner if ((*start)[-1] == 'O')
22754ba9607SSascha Wildner gly = (*start)[1] == '5' ?
22854ba9607SSascha Wildner ESCAPE_UNSUPP : ESCAPE_ERROR;
229f88b6c16SFranco Fichtner *start = ++*end;
23080387638SSascha Wildner term = ']';
23180387638SSascha Wildner break;
23280387638SSascha Wildner default:
23354ba9607SSascha Wildner if ((*start)[-1] == 'O') {
23454ba9607SSascha Wildner switch (**start) {
23554ba9607SSascha Wildner case '0':
23654ba9607SSascha Wildner gly = ESCAPE_UNSUPP;
23754ba9607SSascha Wildner break;
23854ba9607SSascha Wildner case '1':
23954ba9607SSascha Wildner case '2':
24054ba9607SSascha Wildner case '3':
24154ba9607SSascha Wildner case '4':
24254ba9607SSascha Wildner break;
24354ba9607SSascha Wildner default:
24454ba9607SSascha Wildner gly = ESCAPE_ERROR;
24554ba9607SSascha Wildner break;
24654ba9607SSascha Wildner }
24754ba9607SSascha Wildner }
248f88b6c16SFranco Fichtner *sz = 1;
24980387638SSascha Wildner break;
25080387638SSascha Wildner }
25180387638SSascha Wildner break;
252a4c7eb57SSascha Wildner
253a4c7eb57SSascha Wildner /*
254a4c7eb57SSascha Wildner * These escapes are of the form \X'Y', where 'X' is the trigger
255a4c7eb57SSascha Wildner * and 'Y' is any string. These have opaque sub-strings.
256070c62a6SFranco Fichtner * The \B and \w escapes are handled in roff.c, roff_res().
257a4c7eb57SSascha Wildner */
258070c62a6SFranco Fichtner case 'A':
259070c62a6SFranco Fichtner case 'b':
260070c62a6SFranco Fichtner case 'D':
26154ba9607SSascha Wildner case 'R':
26254ba9607SSascha Wildner case 'X':
26354ba9607SSascha Wildner case 'Z':
26454ba9607SSascha Wildner gly = ESCAPE_IGNORE;
2657888c61dSFranco Fichtner /* FALLTHROUGH */
266070c62a6SFranco Fichtner case 'o':
26754ba9607SSascha Wildner if (**start == '\0')
26854ba9607SSascha Wildner return ESCAPE_ERROR;
26954ba9607SSascha Wildner if (gly == ESCAPE_ERROR)
27054ba9607SSascha Wildner gly = ESCAPE_OVERSTRIKE;
271070c62a6SFranco Fichtner term = **start;
272f88b6c16SFranco Fichtner *start = ++*end;
27380387638SSascha Wildner break;
274a4c7eb57SSascha Wildner
275a4c7eb57SSascha Wildner /*
276a4c7eb57SSascha Wildner * These escapes are of the form \X'N', where 'X' is the trigger
277a4c7eb57SSascha Wildner * and 'N' resolves to a numerical expression.
278a4c7eb57SSascha Wildner */
279070c62a6SFranco Fichtner case 'h':
280070c62a6SFranco Fichtner case 'H':
281070c62a6SFranco Fichtner case 'L':
282070c62a6SFranco Fichtner case 'l':
283070c62a6SFranco Fichtner case 'S':
284070c62a6SFranco Fichtner case 'v':
285070c62a6SFranco Fichtner case 'x':
286070c62a6SFranco Fichtner if (strchr(" %&()*+-./0123456789:<=>", **start)) {
28754ba9607SSascha Wildner if ('\0' != **start)
288070c62a6SFranco Fichtner ++*end;
28954ba9607SSascha Wildner return ESCAPE_ERROR;
290070c62a6SFranco Fichtner }
29154ba9607SSascha Wildner switch ((*start)[-1]) {
29254ba9607SSascha Wildner case 'h':
29354ba9607SSascha Wildner gly = ESCAPE_HORIZ;
29454ba9607SSascha Wildner break;
29554ba9607SSascha Wildner case 'l':
29654ba9607SSascha Wildner gly = ESCAPE_HLINE;
29754ba9607SSascha Wildner break;
29854ba9607SSascha Wildner default:
299a4c7eb57SSascha Wildner gly = ESCAPE_IGNORE;
30054ba9607SSascha Wildner break;
30154ba9607SSascha Wildner }
302070c62a6SFranco Fichtner term = **start;
303f88b6c16SFranco Fichtner *start = ++*end;
304a4c7eb57SSascha Wildner break;
305a4c7eb57SSascha Wildner
306a4c7eb57SSascha Wildner /*
30736342e81SSascha Wildner * Special handling for the numbered character escape.
30836342e81SSascha Wildner * XXX Do any other escapes need similar handling?
30936342e81SSascha Wildner */
310070c62a6SFranco Fichtner case 'N':
311f88b6c16SFranco Fichtner if ('\0' == **start)
31254ba9607SSascha Wildner return ESCAPE_ERROR;
313f88b6c16SFranco Fichtner (*end)++;
314f88b6c16SFranco Fichtner if (isdigit((unsigned char)**start)) {
315f88b6c16SFranco Fichtner *sz = 1;
31654ba9607SSascha Wildner return ESCAPE_IGNORE;
317f88b6c16SFranco Fichtner }
318f88b6c16SFranco Fichtner (*start)++;
31936342e81SSascha Wildner while (isdigit((unsigned char)**end))
32036342e81SSascha Wildner (*end)++;
321f88b6c16SFranco Fichtner *sz = *end - *start;
32236342e81SSascha Wildner if ('\0' != **end)
32336342e81SSascha Wildner (*end)++;
32454ba9607SSascha Wildner return ESCAPE_NUMBERED;
32536342e81SSascha Wildner
32636342e81SSascha Wildner /*
327a4c7eb57SSascha Wildner * Sizes get a special category of their own.
328a4c7eb57SSascha Wildner */
329070c62a6SFranco Fichtner case 's':
330a4c7eb57SSascha Wildner gly = ESCAPE_IGNORE;
331a4c7eb57SSascha Wildner
332a4c7eb57SSascha Wildner /* See +/- counts as a sign. */
333f88b6c16SFranco Fichtner if ('+' == **end || '-' == **end || ASCII_HYPH == **end)
33454ba9607SSascha Wildner *start = ++*end;
335a4c7eb57SSascha Wildner
336f88b6c16SFranco Fichtner switch (**end) {
337070c62a6SFranco Fichtner case '(':
338f88b6c16SFranco Fichtner *start = ++*end;
339f88b6c16SFranco Fichtner *sz = 2;
340a4c7eb57SSascha Wildner break;
341070c62a6SFranco Fichtner case '[':
342f88b6c16SFranco Fichtner *start = ++*end;
343f88b6c16SFranco Fichtner term = ']';
344a4c7eb57SSascha Wildner break;
345070c62a6SFranco Fichtner case '\'':
346f88b6c16SFranco Fichtner *start = ++*end;
347f88b6c16SFranco Fichtner term = '\'';
348a4c7eb57SSascha Wildner break;
34954ba9607SSascha Wildner case '3':
35054ba9607SSascha Wildner case '2':
35154ba9607SSascha Wildner case '1':
35254ba9607SSascha Wildner *sz = (*end)[-1] == 's' &&
35354ba9607SSascha Wildner isdigit((unsigned char)(*end)[1]) ? 2 : 1;
35454ba9607SSascha Wildner break;
35580387638SSascha Wildner default:
356f88b6c16SFranco Fichtner *sz = 1;
35780387638SSascha Wildner break;
35880387638SSascha Wildner }
35980387638SSascha Wildner
360a4c7eb57SSascha Wildner break;
361a4c7eb57SSascha Wildner
362a4c7eb57SSascha Wildner /*
36354ba9607SSascha Wildner * Several special characters can be encoded as
36454ba9607SSascha Wildner * one-byte escape sequences without using \[].
365a4c7eb57SSascha Wildner */
36654ba9607SSascha Wildner case ' ':
36754ba9607SSascha Wildner case '\'':
36854ba9607SSascha Wildner case '-':
36954ba9607SSascha Wildner case '.':
37054ba9607SSascha Wildner case '0':
37154ba9607SSascha Wildner case ':':
37254ba9607SSascha Wildner case '_':
37354ba9607SSascha Wildner case '`':
37454ba9607SSascha Wildner case 'e':
37554ba9607SSascha Wildner case '~':
376a4c7eb57SSascha Wildner gly = ESCAPE_SPECIAL;
37754ba9607SSascha Wildner /* FALLTHROUGH */
37854ba9607SSascha Wildner default:
37954ba9607SSascha Wildner if (gly == ESCAPE_ERROR)
38054ba9607SSascha Wildner gly = ESCAPE_UNDEF;
381f88b6c16SFranco Fichtner *start = --*end;
382f88b6c16SFranco Fichtner *sz = 1;
383a4c7eb57SSascha Wildner break;
38480387638SSascha Wildner }
38580387638SSascha Wildner
386a4c7eb57SSascha Wildner /*
387f88b6c16SFranco Fichtner * Read up to the terminating character,
388f88b6c16SFranco Fichtner * paying attention to nested escapes.
389a4c7eb57SSascha Wildner */
390a4c7eb57SSascha Wildner
391a4c7eb57SSascha Wildner if ('\0' != term) {
392f88b6c16SFranco Fichtner while (**end != term) {
393f88b6c16SFranco Fichtner switch (**end) {
394070c62a6SFranco Fichtner case '\0':
39554ba9607SSascha Wildner return ESCAPE_ERROR;
396070c62a6SFranco Fichtner case '\\':
397a4c7eb57SSascha Wildner (*end)++;
398f88b6c16SFranco Fichtner if (ESCAPE_ERROR ==
399f88b6c16SFranco Fichtner mandoc_escape(end, NULL, NULL))
40054ba9607SSascha Wildner return ESCAPE_ERROR;
401f88b6c16SFranco Fichtner break;
402f88b6c16SFranco Fichtner default:
403f88b6c16SFranco Fichtner (*end)++;
404f88b6c16SFranco Fichtner break;
405f88b6c16SFranco Fichtner }
406f88b6c16SFranco Fichtner }
407f88b6c16SFranco Fichtner *sz = (*end)++ - *start;
40854ba9607SSascha Wildner
40954ba9607SSascha Wildner /*
41054ba9607SSascha Wildner * The file chars.c only provides one common list
41154ba9607SSascha Wildner * of character names, but \[-] == \- is the only
41254ba9607SSascha Wildner * one of the characters with one-byte names that
41354ba9607SSascha Wildner * allows enclosing the name in brackets.
41454ba9607SSascha Wildner */
41554ba9607SSascha Wildner if (gly == ESCAPE_SPECIAL && *sz == 1 && **start != '-')
41654ba9607SSascha Wildner return ESCAPE_ERROR;
417f88b6c16SFranco Fichtner } else {
418f88b6c16SFranco Fichtner assert(*sz > 0);
419f88b6c16SFranco Fichtner if ((size_t)*sz > strlen(*start))
42054ba9607SSascha Wildner return ESCAPE_ERROR;
421f88b6c16SFranco Fichtner *end += *sz;
422f88b6c16SFranco Fichtner }
423a4c7eb57SSascha Wildner
424a4c7eb57SSascha Wildner /* Run post-processors. */
425a4c7eb57SSascha Wildner
426a4c7eb57SSascha Wildner switch (gly) {
427070c62a6SFranco Fichtner case ESCAPE_FONT:
42854ba9607SSascha Wildner gly = mandoc_font(*start, *sz);
429a4c7eb57SSascha Wildner break;
430070c62a6SFranco Fichtner case ESCAPE_SPECIAL:
43154ba9607SSascha Wildner if (**start == 'c') {
43254ba9607SSascha Wildner if (*sz < 6 || *sz > 7 ||
43354ba9607SSascha Wildner strncmp(*start, "char", 4) != 0 ||
43454ba9607SSascha Wildner (int)strspn(*start + 4, "0123456789") + 4 < *sz)
43554ba9607SSascha Wildner break;
43654ba9607SSascha Wildner c = 0;
43754ba9607SSascha Wildner for (i = 4; i < *sz; i++)
43854ba9607SSascha Wildner c = 10 * c + ((*start)[i] - '0');
43954ba9607SSascha Wildner if (c < 0x21 || (c > 0x7e && c < 0xa0) || c > 0xff)
44054ba9607SSascha Wildner break;
44154ba9607SSascha Wildner *start += 4;
44254ba9607SSascha Wildner *sz -= 4;
44354ba9607SSascha Wildner gly = ESCAPE_NUMBERED;
44454ba9607SSascha Wildner break;
44554ba9607SSascha Wildner }
44654ba9607SSascha Wildner
44754ba9607SSascha Wildner /*
44854ba9607SSascha Wildner * Unicode escapes are defined in groff as \[u0000]
44954ba9607SSascha Wildner * to \[u10FFFF], where the contained value must be
45054ba9607SSascha Wildner * a valid Unicode codepoint. Here, however, only
45154ba9607SSascha Wildner * check the length and range.
45254ba9607SSascha Wildner */
45354ba9607SSascha Wildner if (**start != 'u' || *sz < 5 || *sz > 7)
45454ba9607SSascha Wildner break;
45554ba9607SSascha Wildner if (*sz == 7 && ((*start)[1] != '1' || (*start)[2] != '0'))
45654ba9607SSascha Wildner break;
45754ba9607SSascha Wildner if (*sz == 6 && (*start)[1] == '0')
45854ba9607SSascha Wildner break;
45954ba9607SSascha Wildner if (*sz == 5 && (*start)[1] == 'D' &&
46054ba9607SSascha Wildner strchr("89ABCDEF", (*start)[2]) != NULL)
46154ba9607SSascha Wildner break;
46254ba9607SSascha Wildner if ((int)strspn(*start + 1, "0123456789ABCDEFabcdef")
46354ba9607SSascha Wildner + 1 == *sz)
46454ba9607SSascha Wildner gly = ESCAPE_UNICODE;
465a4c7eb57SSascha Wildner break;
466*99db7d0eSSascha Wildner case ESCAPE_DEVICE:
467*99db7d0eSSascha Wildner assert(*sz == 2 && (*start)[0] == '.' && (*start)[1] == 'T');
468*99db7d0eSSascha Wildner break;
469a4c7eb57SSascha Wildner default:
470a4c7eb57SSascha Wildner break;
471a4c7eb57SSascha Wildner }
472a4c7eb57SSascha Wildner
47354ba9607SSascha Wildner return gly;
47480387638SSascha Wildner }
47580387638SSascha Wildner
47680387638SSascha Wildner static int
a2time(time_t * t,const char * fmt,const char * p)47780387638SSascha Wildner a2time(time_t *t, const char *fmt, const char *p)
47880387638SSascha Wildner {
47980387638SSascha Wildner struct tm tm;
48080387638SSascha Wildner char *pp;
48180387638SSascha Wildner
48280387638SSascha Wildner memset(&tm, 0, sizeof(struct tm));
48380387638SSascha Wildner
48436342e81SSascha Wildner pp = NULL;
48554ba9607SSascha Wildner #if HAVE_STRPTIME
48680387638SSascha Wildner pp = strptime(p, fmt, &tm);
48736342e81SSascha Wildner #endif
48880387638SSascha Wildner if (NULL != pp && '\0' == *pp) {
48980387638SSascha Wildner *t = mktime(&tm);
49054ba9607SSascha Wildner return 1;
49180387638SSascha Wildner }
49280387638SSascha Wildner
49354ba9607SSascha Wildner return 0;
49480387638SSascha Wildner }
49580387638SSascha Wildner
49660e1e752SSascha Wildner static char *
time2a(time_t t)49760e1e752SSascha Wildner time2a(time_t t)
49860e1e752SSascha Wildner {
49936342e81SSascha Wildner struct tm *tm;
50060e1e752SSascha Wildner char *buf, *p;
50160e1e752SSascha Wildner size_t ssz;
50260e1e752SSascha Wildner int isz;
50360e1e752SSascha Wildner
504*99db7d0eSSascha Wildner buf = NULL;
50536342e81SSascha Wildner tm = localtime(&t);
50654ba9607SSascha Wildner if (tm == NULL)
507*99db7d0eSSascha Wildner goto fail;
50880387638SSascha Wildner
50980387638SSascha Wildner /*
51060e1e752SSascha Wildner * Reserve space:
51160e1e752SSascha Wildner * up to 9 characters for the month (September) + blank
51260e1e752SSascha Wildner * up to 2 characters for the day + comma + blank
51360e1e752SSascha Wildner * 4 characters for the year and a terminating '\0'
51480387638SSascha Wildner */
51554ba9607SSascha Wildner
51660e1e752SSascha Wildner p = buf = mandoc_malloc(10 + 4 + 4 + 1);
51760e1e752SSascha Wildner
51854ba9607SSascha Wildner if ((ssz = strftime(p, 10 + 1, "%B ", tm)) == 0)
51960e1e752SSascha Wildner goto fail;
52060e1e752SSascha Wildner p += (int)ssz;
52160e1e752SSascha Wildner
52254ba9607SSascha Wildner /*
52354ba9607SSascha Wildner * The output format is just "%d" here, not "%2d" or "%02d".
52454ba9607SSascha Wildner * That's also the reason why we can't just format the
52554ba9607SSascha Wildner * date as a whole with "%B %e, %Y" or "%B %d, %Y".
52654ba9607SSascha Wildner * Besides, the present approach is less prone to buffer
52754ba9607SSascha Wildner * overflows, in case anybody should ever introduce the bug
52854ba9607SSascha Wildner * of looking at LC_TIME.
52954ba9607SSascha Wildner */
53054ba9607SSascha Wildner
531*99db7d0eSSascha Wildner isz = snprintf(p, 4 + 1, "%d, ", tm->tm_mday);
532*99db7d0eSSascha Wildner if (isz < 0 || isz > 4)
53360e1e752SSascha Wildner goto fail;
53460e1e752SSascha Wildner p += isz;
53560e1e752SSascha Wildner
53654ba9607SSascha Wildner if (strftime(p, 4 + 1, "%Y", tm) == 0)
53760e1e752SSascha Wildner goto fail;
53854ba9607SSascha Wildner return buf;
53960e1e752SSascha Wildner
54060e1e752SSascha Wildner fail:
54160e1e752SSascha Wildner free(buf);
542*99db7d0eSSascha Wildner return mandoc_strdup("");
54360e1e752SSascha Wildner }
54460e1e752SSascha Wildner
54560e1e752SSascha Wildner char *
mandoc_normdate(struct roff_node * nch,struct roff_node * nbl)546*99db7d0eSSascha Wildner mandoc_normdate(struct roff_node *nch, struct roff_node *nbl)
54780387638SSascha Wildner {
54854ba9607SSascha Wildner char *cp;
54980387638SSascha Wildner time_t t;
55080387638SSascha Wildner
551*99db7d0eSSascha Wildner /* No date specified. */
55254ba9607SSascha Wildner
553*99db7d0eSSascha Wildner if (nch == NULL) {
554*99db7d0eSSascha Wildner if (nbl == NULL)
555*99db7d0eSSascha Wildner mandoc_msg(MANDOCERR_DATE_MISSING, 0, 0, NULL);
556*99db7d0eSSascha Wildner else
557*99db7d0eSSascha Wildner mandoc_msg(MANDOCERR_DATE_MISSING, nbl->line,
558*99db7d0eSSascha Wildner nbl->pos, "%s", roff_name[nbl->tok]);
559*99db7d0eSSascha Wildner return mandoc_strdup("");
56080387638SSascha Wildner }
561*99db7d0eSSascha Wildner if (*nch->string == '\0') {
562*99db7d0eSSascha Wildner mandoc_msg(MANDOCERR_DATE_MISSING, nch->line,
563*99db7d0eSSascha Wildner nch->pos, "%s", roff_name[nbl->tok]);
564*99db7d0eSSascha Wildner return mandoc_strdup("");
565*99db7d0eSSascha Wildner }
566*99db7d0eSSascha Wildner if (strcmp(nch->string, "$" "Mdocdate$") == 0)
567*99db7d0eSSascha Wildner return time2a(time(NULL));
56854ba9607SSascha Wildner
56954ba9607SSascha Wildner /* Valid mdoc(7) date format. */
57054ba9607SSascha Wildner
571*99db7d0eSSascha Wildner if (a2time(&t, "$" "Mdocdate: %b %d %Y $", nch->string) ||
572*99db7d0eSSascha Wildner a2time(&t, "%b %d, %Y", nch->string)) {
57354ba9607SSascha Wildner cp = time2a(t);
57454ba9607SSascha Wildner if (t > time(NULL) + 86400)
575*99db7d0eSSascha Wildner mandoc_msg(MANDOCERR_DATE_FUTURE, nch->line,
576*99db7d0eSSascha Wildner nch->pos, "%s %s", roff_name[nbl->tok], cp);
577*99db7d0eSSascha Wildner else if (*nch->string != '$' &&
578*99db7d0eSSascha Wildner strcmp(nch->string, cp) != 0)
579*99db7d0eSSascha Wildner mandoc_msg(MANDOCERR_DATE_NORM, nch->line,
580*99db7d0eSSascha Wildner nch->pos, "%s %s", roff_name[nbl->tok], cp);
58154ba9607SSascha Wildner return cp;
58280387638SSascha Wildner }
58354ba9607SSascha Wildner
58454ba9607SSascha Wildner /* In man(7), do not warn about the legacy format. */
58554ba9607SSascha Wildner
586*99db7d0eSSascha Wildner if (a2time(&t, "%Y-%m-%d", nch->string) == 0)
587*99db7d0eSSascha Wildner mandoc_msg(MANDOCERR_DATE_BAD, nch->line, nch->pos,
588*99db7d0eSSascha Wildner "%s %s", roff_name[nbl->tok], nch->string);
58954ba9607SSascha Wildner else if (t > time(NULL) + 86400)
590*99db7d0eSSascha Wildner mandoc_msg(MANDOCERR_DATE_FUTURE, nch->line, nch->pos,
591*99db7d0eSSascha Wildner "%s %s", roff_name[nbl->tok], nch->string);
592*99db7d0eSSascha Wildner else if (nbl->tok == MDOC_Dd)
593*99db7d0eSSascha Wildner mandoc_msg(MANDOCERR_DATE_LEGACY, nch->line, nch->pos,
594*99db7d0eSSascha Wildner "Dd %s", nch->string);
59554ba9607SSascha Wildner
59654ba9607SSascha Wildner /* Use any non-mdoc(7) date verbatim. */
59754ba9607SSascha Wildner
598*99db7d0eSSascha Wildner return mandoc_strdup(nch->string);
59980387638SSascha Wildner }
60080387638SSascha Wildner
60180387638SSascha Wildner int
mandoc_eos(const char * p,size_t sz)602070c62a6SFranco Fichtner mandoc_eos(const char *p, size_t sz)
60380387638SSascha Wildner {
60480387638SSascha Wildner const char *q;
605070c62a6SFranco Fichtner int enclosed, found;
60680387638SSascha Wildner
60780387638SSascha Wildner if (0 == sz)
60854ba9607SSascha Wildner return 0;
60980387638SSascha Wildner
61080387638SSascha Wildner /*
61180387638SSascha Wildner * End-of-sentence recognition must include situations where
61280387638SSascha Wildner * some symbols, such as `)', allow prior EOS punctuation to
613a4c7eb57SSascha Wildner * propagate outward.
61480387638SSascha Wildner */
61580387638SSascha Wildner
616070c62a6SFranco Fichtner enclosed = found = 0;
61780387638SSascha Wildner for (q = p + (int)sz - 1; q >= p; q--) {
61880387638SSascha Wildner switch (*q) {
619070c62a6SFranco Fichtner case '\"':
620070c62a6SFranco Fichtner case '\'':
621070c62a6SFranco Fichtner case ']':
622070c62a6SFranco Fichtner case ')':
62380387638SSascha Wildner if (0 == found)
62480387638SSascha Wildner enclosed = 1;
62580387638SSascha Wildner break;
626070c62a6SFranco Fichtner case '.':
627070c62a6SFranco Fichtner case '!':
628070c62a6SFranco Fichtner case '?':
62980387638SSascha Wildner found = 1;
63080387638SSascha Wildner break;
63180387638SSascha Wildner default:
63254ba9607SSascha Wildner return found &&
63354ba9607SSascha Wildner (!enclosed || isalnum((unsigned char)*q));
63480387638SSascha Wildner }
63580387638SSascha Wildner }
63680387638SSascha Wildner
63754ba9607SSascha Wildner return found && !enclosed;
63880387638SSascha Wildner }
63980387638SSascha Wildner
64060e1e752SSascha Wildner /*
641a4c7eb57SSascha Wildner * Convert a string to a long that may not be <0.
642a4c7eb57SSascha Wildner * If the string is invalid, or is less than 0, return -1.
643a4c7eb57SSascha Wildner */
644a4c7eb57SSascha Wildner int
mandoc_strntoi(const char * p,size_t sz,int base)64536342e81SSascha Wildner mandoc_strntoi(const char *p, size_t sz, int base)
646a4c7eb57SSascha Wildner {
647a4c7eb57SSascha Wildner char buf[32];
648a4c7eb57SSascha Wildner char *ep;
649a4c7eb57SSascha Wildner long v;
650a4c7eb57SSascha Wildner
651a4c7eb57SSascha Wildner if (sz > 31)
65254ba9607SSascha Wildner return -1;
653a4c7eb57SSascha Wildner
654a4c7eb57SSascha Wildner memcpy(buf, p, sz);
655a4c7eb57SSascha Wildner buf[(int)sz] = '\0';
656a4c7eb57SSascha Wildner
657a4c7eb57SSascha Wildner errno = 0;
658a4c7eb57SSascha Wildner v = strtol(buf, &ep, base);
659a4c7eb57SSascha Wildner
660a4c7eb57SSascha Wildner if (buf[0] == '\0' || *ep != '\0')
66154ba9607SSascha Wildner return -1;
662a4c7eb57SSascha Wildner
66336342e81SSascha Wildner if (v > INT_MAX)
66436342e81SSascha Wildner v = INT_MAX;
66536342e81SSascha Wildner if (v < INT_MIN)
66636342e81SSascha Wildner v = INT_MIN;
667a4c7eb57SSascha Wildner
66854ba9607SSascha Wildner return (int)v;
669a4c7eb57SSascha Wildner }
670