1 /* $OpenBSD: mandoc.c,v 1.89 2022/05/19 15:17:50 schwarze Exp $ */
2 /*
3 * Copyright (c) 2010, 2011, 2015, 2017, 2018, 2019, 2020, 2021
4 * Ingo Schwarze <schwarze@openbsd.org>
5 * Copyright (c) 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 *
19 * Utility functions to handle end of sentence punctuation
20 * and dates and times, for use by mdoc(7) and man(7) parsers.
21 * Utility functions to handle fonts and numbers,
22 * for use by mandoc(1) parsers and formatters.
23 */
24 #include <sys/types.h>
25
26 #include <assert.h>
27 #include <ctype.h>
28 #include <errno.h>
29 #include <limits.h>
30 #include <stdlib.h>
31 #include <stdio.h>
32 #include <string.h>
33 #include <time.h>
34
35 #include "mandoc_aux.h"
36 #include "mandoc.h"
37 #include "roff.h"
38 #include "libmandoc.h"
39 #include "roff_int.h"
40
41 static int a2time(time_t *, const char *, const char *);
42 static char *time2a(time_t);
43
44
45 enum mandoc_esc
mandoc_font(const char * cp,int sz)46 mandoc_font(const char *cp, int sz)
47 {
48 switch (sz) {
49 case 0:
50 return ESCAPE_FONTPREV;
51 case 1:
52 switch (cp[0]) {
53 case 'B':
54 case '3':
55 return ESCAPE_FONTBOLD;
56 case 'I':
57 case '2':
58 return ESCAPE_FONTITALIC;
59 case 'P':
60 return ESCAPE_FONTPREV;
61 case 'R':
62 case '1':
63 return ESCAPE_FONTROMAN;
64 case '4':
65 return ESCAPE_FONTBI;
66 default:
67 return ESCAPE_ERROR;
68 }
69 case 2:
70 switch (cp[0]) {
71 case 'B':
72 switch (cp[1]) {
73 case 'I':
74 return ESCAPE_FONTBI;
75 default:
76 return ESCAPE_ERROR;
77 }
78 case 'C':
79 switch (cp[1]) {
80 case 'B':
81 return ESCAPE_FONTCB;
82 case 'I':
83 return ESCAPE_FONTCI;
84 case 'R':
85 case 'W':
86 return ESCAPE_FONTCR;
87 default:
88 return ESCAPE_ERROR;
89 }
90 default:
91 return ESCAPE_ERROR;
92 }
93 default:
94 return ESCAPE_ERROR;
95 }
96 }
97
98 static int
a2time(time_t * t,const char * fmt,const char * p)99 a2time(time_t *t, const char *fmt, const char *p)
100 {
101 struct tm tm;
102 char *pp;
103
104 memset(&tm, 0, sizeof(struct tm));
105
106 pp = strptime(p, fmt, &tm);
107 if (NULL != pp && '\0' == *pp) {
108 *t = mktime(&tm);
109 return 1;
110 }
111
112 return 0;
113 }
114
115 static char *
time2a(time_t t)116 time2a(time_t t)
117 {
118 struct tm *tm;
119 char *buf, *p;
120 size_t ssz;
121 int isz;
122
123 buf = NULL;
124 tm = localtime(&t);
125 if (tm == NULL)
126 goto fail;
127
128 /*
129 * Reserve space:
130 * up to 9 characters for the month (September) + blank
131 * up to 2 characters for the day + comma + blank
132 * 4 characters for the year and a terminating '\0'
133 */
134
135 p = buf = mandoc_malloc(10 + 4 + 4 + 1);
136
137 if ((ssz = strftime(p, 10 + 1, "%B ", tm)) == 0)
138 goto fail;
139 p += (int)ssz;
140
141 /*
142 * The output format is just "%d" here, not "%2d" or "%02d".
143 * That's also the reason why we can't just format the
144 * date as a whole with "%B %e, %Y" or "%B %d, %Y".
145 * Besides, the present approach is less prone to buffer
146 * overflows, in case anybody should ever introduce the bug
147 * of looking at LC_TIME.
148 */
149
150 isz = snprintf(p, 4 + 1, "%d, ", tm->tm_mday);
151 if (isz < 0 || isz > 4)
152 goto fail;
153 p += isz;
154
155 if (strftime(p, 4 + 1, "%Y", tm) == 0)
156 goto fail;
157 return buf;
158
159 fail:
160 free(buf);
161 return mandoc_strdup("");
162 }
163
164 char *
mandoc_normdate(struct roff_node * nch,struct roff_node * nbl)165 mandoc_normdate(struct roff_node *nch, struct roff_node *nbl)
166 {
167 char *cp;
168 time_t t;
169
170 /* No date specified. */
171
172 if (nch == NULL) {
173 if (nbl == NULL)
174 mandoc_msg(MANDOCERR_DATE_MISSING, 0, 0, NULL);
175 else
176 mandoc_msg(MANDOCERR_DATE_MISSING, nbl->line,
177 nbl->pos, "%s", roff_name[nbl->tok]);
178 return mandoc_strdup("");
179 }
180 if (*nch->string == '\0') {
181 mandoc_msg(MANDOCERR_DATE_MISSING, nch->line,
182 nch->pos, "%s", roff_name[nbl->tok]);
183 return mandoc_strdup("");
184 }
185 if (strcmp(nch->string, "$" "Mdocdate$") == 0)
186 return time2a(time(NULL));
187
188 /* Valid mdoc(7) date format. */
189
190 if (a2time(&t, "$" "Mdocdate: %b %d %Y $", nch->string) ||
191 a2time(&t, "%b %d, %Y", nch->string)) {
192 cp = time2a(t);
193 if (t > time(NULL) + 86400)
194 mandoc_msg(MANDOCERR_DATE_FUTURE, nch->line,
195 nch->pos, "%s %s", roff_name[nbl->tok], cp);
196 else if (*nch->string != '$' &&
197 strcmp(nch->string, cp) != 0)
198 mandoc_msg(MANDOCERR_DATE_NORM, nch->line,
199 nch->pos, "%s %s", roff_name[nbl->tok], cp);
200 return cp;
201 }
202
203 /* In man(7), do not warn about the legacy format. */
204
205 if (a2time(&t, "%Y-%m-%d", nch->string) == 0)
206 mandoc_msg(MANDOCERR_DATE_BAD, nch->line, nch->pos,
207 "%s %s", roff_name[nbl->tok], nch->string);
208 else if (t > time(NULL) + 86400)
209 mandoc_msg(MANDOCERR_DATE_FUTURE, nch->line, nch->pos,
210 "%s %s", roff_name[nbl->tok], nch->string);
211 else if (nbl->tok == MDOC_Dd)
212 mandoc_msg(MANDOCERR_DATE_LEGACY, nch->line, nch->pos,
213 "Dd %s", nch->string);
214
215 /* Use any non-mdoc(7) date verbatim. */
216
217 return mandoc_strdup(nch->string);
218 }
219
220 int
mandoc_eos(const char * p,size_t sz)221 mandoc_eos(const char *p, size_t sz)
222 {
223 const char *q;
224 int enclosed, found;
225
226 if (0 == sz)
227 return 0;
228
229 /*
230 * End-of-sentence recognition must include situations where
231 * some symbols, such as `)', allow prior EOS punctuation to
232 * propagate outward.
233 */
234
235 enclosed = found = 0;
236 for (q = p + (int)sz - 1; q >= p; q--) {
237 switch (*q) {
238 case '\"':
239 case '\'':
240 case ']':
241 case ')':
242 if (0 == found)
243 enclosed = 1;
244 break;
245 case '.':
246 case '!':
247 case '?':
248 found = 1;
249 break;
250 default:
251 return found &&
252 (!enclosed || isalnum((unsigned char)*q));
253 }
254 }
255
256 return found && !enclosed;
257 }
258
259 /*
260 * Convert a string to a long that may not be <0.
261 * If the string is invalid, or is less than 0, return -1.
262 */
263 int
mandoc_strntoi(const char * p,size_t sz,int base)264 mandoc_strntoi(const char *p, size_t sz, int base)
265 {
266 char buf[32];
267 char *ep;
268 long v;
269
270 if (sz > 31)
271 return -1;
272
273 memcpy(buf, p, sz);
274 buf[(int)sz] = '\0';
275
276 errno = 0;
277 v = strtol(buf, &ep, base);
278
279 if (buf[0] == '\0' || *ep != '\0')
280 return -1;
281
282 if (v > INT_MAX)
283 v = INT_MAX;
284 if (v < INT_MIN)
285 v = INT_MIN;
286
287 return (int)v;
288 }
289