xref: /openbsd/usr.bin/mandoc/mandoc.c (revision cd14d642)
1 /* $OpenBSD: mandoc.c,v 1.89 2022/05/19 15:17:50 schwarze Exp $ */
2 /*
3  * Copyright (c) 2010, 2011, 2015, 2017, 2018, 2019, 2020, 2021
4  *               Ingo Schwarze <schwarze@openbsd.org>
5  * Copyright (c) 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  *
19  * Utility functions to handle end of sentence punctuation
20  * and dates and times, for use by mdoc(7) and man(7) parsers.
21  * Utility functions to handle fonts and numbers,
22  * for use by mandoc(1) parsers and formatters.
23  */
24 #include <sys/types.h>
25 
26 #include <assert.h>
27 #include <ctype.h>
28 #include <errno.h>
29 #include <limits.h>
30 #include <stdlib.h>
31 #include <stdio.h>
32 #include <string.h>
33 #include <time.h>
34 
35 #include "mandoc_aux.h"
36 #include "mandoc.h"
37 #include "roff.h"
38 #include "libmandoc.h"
39 #include "roff_int.h"
40 
41 static	int	 a2time(time_t *, const char *, const char *);
42 static	char	*time2a(time_t);
43 
44 
45 enum mandoc_esc
mandoc_font(const char * cp,int sz)46 mandoc_font(const char *cp, int sz)
47 {
48 	switch (sz) {
49 	case 0:
50 		return ESCAPE_FONTPREV;
51 	case 1:
52 		switch (cp[0]) {
53 		case 'B':
54 		case '3':
55 			return ESCAPE_FONTBOLD;
56 		case 'I':
57 		case '2':
58 			return ESCAPE_FONTITALIC;
59 		case 'P':
60 			return ESCAPE_FONTPREV;
61 		case 'R':
62 		case '1':
63 			return ESCAPE_FONTROMAN;
64 		case '4':
65 			return ESCAPE_FONTBI;
66 		default:
67 			return ESCAPE_ERROR;
68 		}
69 	case 2:
70 		switch (cp[0]) {
71 		case 'B':
72 			switch (cp[1]) {
73 			case 'I':
74 				return ESCAPE_FONTBI;
75 			default:
76 				return ESCAPE_ERROR;
77 			}
78 		case 'C':
79 			switch (cp[1]) {
80 			case 'B':
81 				return ESCAPE_FONTCB;
82 			case 'I':
83 				return ESCAPE_FONTCI;
84 			case 'R':
85 			case 'W':
86 				return ESCAPE_FONTCR;
87 			default:
88 				return ESCAPE_ERROR;
89 			}
90 		default:
91 			return ESCAPE_ERROR;
92 		}
93 	default:
94 		return ESCAPE_ERROR;
95 	}
96 }
97 
98 static int
a2time(time_t * t,const char * fmt,const char * p)99 a2time(time_t *t, const char *fmt, const char *p)
100 {
101 	struct tm	 tm;
102 	char		*pp;
103 
104 	memset(&tm, 0, sizeof(struct tm));
105 
106 	pp = strptime(p, fmt, &tm);
107 	if (NULL != pp && '\0' == *pp) {
108 		*t = mktime(&tm);
109 		return 1;
110 	}
111 
112 	return 0;
113 }
114 
115 static char *
time2a(time_t t)116 time2a(time_t t)
117 {
118 	struct tm	*tm;
119 	char		*buf, *p;
120 	size_t		 ssz;
121 	int		 isz;
122 
123 	buf = NULL;
124 	tm = localtime(&t);
125 	if (tm == NULL)
126 		goto fail;
127 
128 	/*
129 	 * Reserve space:
130 	 * up to 9 characters for the month (September) + blank
131 	 * up to 2 characters for the day + comma + blank
132 	 * 4 characters for the year and a terminating '\0'
133 	 */
134 
135 	p = buf = mandoc_malloc(10 + 4 + 4 + 1);
136 
137 	if ((ssz = strftime(p, 10 + 1, "%B ", tm)) == 0)
138 		goto fail;
139 	p += (int)ssz;
140 
141 	/*
142 	 * The output format is just "%d" here, not "%2d" or "%02d".
143 	 * That's also the reason why we can't just format the
144 	 * date as a whole with "%B %e, %Y" or "%B %d, %Y".
145 	 * Besides, the present approach is less prone to buffer
146 	 * overflows, in case anybody should ever introduce the bug
147 	 * of looking at LC_TIME.
148 	 */
149 
150 	isz = snprintf(p, 4 + 1, "%d, ", tm->tm_mday);
151 	if (isz < 0 || isz > 4)
152 		goto fail;
153 	p += isz;
154 
155 	if (strftime(p, 4 + 1, "%Y", tm) == 0)
156 		goto fail;
157 	return buf;
158 
159 fail:
160 	free(buf);
161 	return mandoc_strdup("");
162 }
163 
164 char *
mandoc_normdate(struct roff_node * nch,struct roff_node * nbl)165 mandoc_normdate(struct roff_node *nch, struct roff_node *nbl)
166 {
167 	char		*cp;
168 	time_t		 t;
169 
170 	/* No date specified. */
171 
172 	if (nch == NULL) {
173 		if (nbl == NULL)
174 			mandoc_msg(MANDOCERR_DATE_MISSING, 0, 0, NULL);
175 		else
176 			mandoc_msg(MANDOCERR_DATE_MISSING, nbl->line,
177 			    nbl->pos, "%s", roff_name[nbl->tok]);
178 		return mandoc_strdup("");
179 	}
180 	if (*nch->string == '\0') {
181 		mandoc_msg(MANDOCERR_DATE_MISSING, nch->line,
182 		    nch->pos, "%s", roff_name[nbl->tok]);
183 		return mandoc_strdup("");
184 	}
185 	if (strcmp(nch->string, "$" "Mdocdate$") == 0)
186 		return time2a(time(NULL));
187 
188 	/* Valid mdoc(7) date format. */
189 
190 	if (a2time(&t, "$" "Mdocdate: %b %d %Y $", nch->string) ||
191 	    a2time(&t, "%b %d, %Y", nch->string)) {
192 		cp = time2a(t);
193 		if (t > time(NULL) + 86400)
194 			mandoc_msg(MANDOCERR_DATE_FUTURE, nch->line,
195 			    nch->pos, "%s %s", roff_name[nbl->tok], cp);
196 		else if (*nch->string != '$' &&
197 		    strcmp(nch->string, cp) != 0)
198 			mandoc_msg(MANDOCERR_DATE_NORM, nch->line,
199 			    nch->pos, "%s %s", roff_name[nbl->tok], cp);
200 		return cp;
201 	}
202 
203 	/* In man(7), do not warn about the legacy format. */
204 
205 	if (a2time(&t, "%Y-%m-%d", nch->string) == 0)
206 		mandoc_msg(MANDOCERR_DATE_BAD, nch->line, nch->pos,
207 		    "%s %s", roff_name[nbl->tok], nch->string);
208 	else if (t > time(NULL) + 86400)
209 		mandoc_msg(MANDOCERR_DATE_FUTURE, nch->line, nch->pos,
210 		    "%s %s", roff_name[nbl->tok], nch->string);
211 	else if (nbl->tok == MDOC_Dd)
212 		mandoc_msg(MANDOCERR_DATE_LEGACY, nch->line, nch->pos,
213 		    "Dd %s", nch->string);
214 
215 	/* Use any non-mdoc(7) date verbatim. */
216 
217 	return mandoc_strdup(nch->string);
218 }
219 
220 int
mandoc_eos(const char * p,size_t sz)221 mandoc_eos(const char *p, size_t sz)
222 {
223 	const char	*q;
224 	int		 enclosed, found;
225 
226 	if (0 == sz)
227 		return 0;
228 
229 	/*
230 	 * End-of-sentence recognition must include situations where
231 	 * some symbols, such as `)', allow prior EOS punctuation to
232 	 * propagate outward.
233 	 */
234 
235 	enclosed = found = 0;
236 	for (q = p + (int)sz - 1; q >= p; q--) {
237 		switch (*q) {
238 		case '\"':
239 		case '\'':
240 		case ']':
241 		case ')':
242 			if (0 == found)
243 				enclosed = 1;
244 			break;
245 		case '.':
246 		case '!':
247 		case '?':
248 			found = 1;
249 			break;
250 		default:
251 			return found &&
252 			    (!enclosed || isalnum((unsigned char)*q));
253 		}
254 	}
255 
256 	return found && !enclosed;
257 }
258 
259 /*
260  * Convert a string to a long that may not be <0.
261  * If the string is invalid, or is less than 0, return -1.
262  */
263 int
mandoc_strntoi(const char * p,size_t sz,int base)264 mandoc_strntoi(const char *p, size_t sz, int base)
265 {
266 	char		 buf[32];
267 	char		*ep;
268 	long		 v;
269 
270 	if (sz > 31)
271 		return -1;
272 
273 	memcpy(buf, p, sz);
274 	buf[(int)sz] = '\0';
275 
276 	errno = 0;
277 	v = strtol(buf, &ep, base);
278 
279 	if (buf[0] == '\0' || *ep != '\0')
280 		return -1;
281 
282 	if (v > INT_MAX)
283 		v = INT_MAX;
284 	if (v < INT_MIN)
285 		v = INT_MIN;
286 
287 	return (int)v;
288 }
289