xref: /dragonfly/contrib/mdocml/mandoc.c (revision 80387638)
1 /*	$Id: mandoc.c,v 1.36 2011/01/03 22:42:37 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21 
22 #include <sys/types.h>
23 
24 #include <assert.h>
25 #include <ctype.h>
26 #include <stdlib.h>
27 #include <stdio.h>
28 #include <string.h>
29 #include <time.h>
30 
31 #include "mandoc.h"
32 #include "libmandoc.h"
33 
34 static	int	 a2time(time_t *, const char *, const char *);
35 
36 
37 int
38 mandoc_special(char *p)
39 {
40 	int		 len, i;
41 	char		 term;
42 	char		*sv;
43 
44 	len = 0;
45 	term = '\0';
46 	sv = p;
47 
48 	assert('\\' == *p);
49 	p++;
50 
51 	switch (*p++) {
52 #if 0
53 	case ('Z'):
54 		/* FALLTHROUGH */
55 	case ('X'):
56 		/* FALLTHROUGH */
57 	case ('x'):
58 		/* FALLTHROUGH */
59 	case ('S'):
60 		/* FALLTHROUGH */
61 	case ('R'):
62 		/* FALLTHROUGH */
63 	case ('N'):
64 		/* FALLTHROUGH */
65 	case ('l'):
66 		/* FALLTHROUGH */
67 	case ('L'):
68 		/* FALLTHROUGH */
69 	case ('H'):
70 		/* FALLTHROUGH */
71 	case ('h'):
72 		/* FALLTHROUGH */
73 	case ('D'):
74 		/* FALLTHROUGH */
75 	case ('C'):
76 		/* FALLTHROUGH */
77 	case ('b'):
78 		/* FALLTHROUGH */
79 	case ('B'):
80 		/* FALLTHROUGH */
81 	case ('a'):
82 		/* FALLTHROUGH */
83 	case ('A'):
84 		if (*p++ != '\'')
85 			return(0);
86 		term = '\'';
87 		break;
88 #endif
89 	case ('h'):
90 		/* FALLTHROUGH */
91 	case ('v'):
92 		/* FALLTHROUGH */
93 	case ('s'):
94 		if (ASCII_HYPH == *p)
95 			*p = '-';
96 
97 		i = 0;
98 		if ('+' == *p || '-' == *p) {
99 			p++;
100 			i = 1;
101 		}
102 
103 		switch (*p++) {
104 		case ('('):
105 			len = 2;
106 			break;
107 		case ('['):
108 			term = ']';
109 			break;
110 		case ('\''):
111 			term = '\'';
112 			break;
113 		case ('0'):
114 			i = 1;
115 			/* FALLTHROUGH */
116 		default:
117 			len = 1;
118 			p--;
119 			break;
120 		}
121 
122 		if (ASCII_HYPH == *p)
123 			*p = '-';
124 		if ('+' == *p || '-' == *p) {
125 			if (i)
126 				return(0);
127 			p++;
128 		}
129 
130 		/* Handle embedded numerical subexp or escape. */
131 
132 		if ('(' == *p) {
133 			while (*p && ')' != *p)
134 				if ('\\' == *p++) {
135 					i = mandoc_special(--p);
136 					if (0 == i)
137 						return(0);
138 					p += i;
139 				}
140 
141 			if (')' == *p++)
142 				break;
143 
144 			return(0);
145 		} else if ('\\' == *p) {
146 			if (0 == (i = mandoc_special(p)))
147 				return(0);
148 			p += i;
149 		}
150 
151 		break;
152 #if 0
153 	case ('Y'):
154 		/* FALLTHROUGH */
155 	case ('V'):
156 		/* FALLTHROUGH */
157 	case ('$'):
158 		/* FALLTHROUGH */
159 	case ('n'):
160 		/* FALLTHROUGH */
161 #endif
162 	case ('k'):
163 		/* FALLTHROUGH */
164 	case ('M'):
165 		/* FALLTHROUGH */
166 	case ('m'):
167 		/* FALLTHROUGH */
168 	case ('f'):
169 		/* FALLTHROUGH */
170 	case ('F'):
171 		/* FALLTHROUGH */
172 	case ('*'):
173 		switch (*p++) {
174 		case ('('):
175 			len = 2;
176 			break;
177 		case ('['):
178 			term = ']';
179 			break;
180 		default:
181 			len = 1;
182 			p--;
183 			break;
184 		}
185 		break;
186 	case ('('):
187 		len = 2;
188 		break;
189 	case ('['):
190 		term = ']';
191 		break;
192 	case ('z'):
193 		len = 1;
194 		if ('\\' == *p) {
195 			if (0 == (i = mandoc_special(p)))
196 				return(0);
197 			p += i;
198 			return(*p ? (int)(p - sv) : 0);
199 		}
200 		break;
201 	case ('o'):
202 		/* FALLTHROUGH */
203 	case ('w'):
204 		if ('\'' == *p++) {
205 			term = '\'';
206 			break;
207 		}
208 		/* FALLTHROUGH */
209 	default:
210 		len = 1;
211 		p--;
212 		break;
213 	}
214 
215 	if (term) {
216 		for ( ; *p && term != *p; p++)
217 			if (ASCII_HYPH == *p)
218 				*p = '-';
219 		return(*p ? (int)(p - sv) : 0);
220 	}
221 
222 	for (i = 0; *p && i < len; i++, p++)
223 		if (ASCII_HYPH == *p)
224 			*p = '-';
225 	return(i == len ? (int)(p - sv) : 0);
226 }
227 
228 
229 void *
230 mandoc_calloc(size_t num, size_t size)
231 {
232 	void		*ptr;
233 
234 	ptr = calloc(num, size);
235 	if (NULL == ptr) {
236 		perror(NULL);
237 		exit((int)MANDOCLEVEL_SYSERR);
238 	}
239 
240 	return(ptr);
241 }
242 
243 
244 void *
245 mandoc_malloc(size_t size)
246 {
247 	void		*ptr;
248 
249 	ptr = malloc(size);
250 	if (NULL == ptr) {
251 		perror(NULL);
252 		exit((int)MANDOCLEVEL_SYSERR);
253 	}
254 
255 	return(ptr);
256 }
257 
258 
259 void *
260 mandoc_realloc(void *ptr, size_t size)
261 {
262 
263 	ptr = realloc(ptr, size);
264 	if (NULL == ptr) {
265 		perror(NULL);
266 		exit((int)MANDOCLEVEL_SYSERR);
267 	}
268 
269 	return(ptr);
270 }
271 
272 
273 char *
274 mandoc_strdup(const char *ptr)
275 {
276 	char		*p;
277 
278 	p = strdup(ptr);
279 	if (NULL == p) {
280 		perror(NULL);
281 		exit((int)MANDOCLEVEL_SYSERR);
282 	}
283 
284 	return(p);
285 }
286 
287 /*
288  * Parse a quoted or unquoted roff-style request or macro argument.
289  * Return a pointer to the parsed argument, which is either the original
290  * pointer or advanced by one byte in case the argument is quoted.
291  * Null-terminate the argument in place.
292  * Collapse pairs of quotes inside quoted arguments.
293  * Advance the argument pointer to the next argument,
294  * or to the null byte terminating the argument line.
295  */
296 char *
297 mandoc_getarg(char **cpp, mandocmsg msg, void *data, int ln, int *pos)
298 {
299 	char	 *start, *cp;
300 	int	  quoted, pairs, white;
301 
302 	/* Quoting can only start with a new word. */
303 	start = *cpp;
304 	if ('"' == *start) {
305 		quoted = 1;
306 		start++;
307 	} else
308 		quoted = 0;
309 
310 	pairs = 0;
311 	white = 0;
312 	for (cp = start; '\0' != *cp; cp++) {
313 		/* Move left after quoted quotes and escaped backslashes. */
314 		if (pairs)
315 			cp[-pairs] = cp[0];
316 		if ('\\' == cp[0]) {
317 			if ('\\' == cp[1]) {
318 				/* Poor man's copy mode. */
319 				pairs++;
320 				cp++;
321 			} else if (0 == quoted && ' ' == cp[1])
322 				/* Skip escaped blanks. */
323 				cp++;
324 		} else if (0 == quoted) {
325 			if (' ' == cp[0]) {
326 				/* Unescaped blanks end unquoted args. */
327 				white = 1;
328 				break;
329 			}
330 		} else if ('"' == cp[0]) {
331 			if ('"' == cp[1]) {
332 				/* Quoted quotes collapse. */
333 				pairs++;
334 				cp++;
335 			} else {
336 				/* Unquoted quotes end quoted args. */
337 				quoted = 2;
338 				break;
339 			}
340 		}
341 	}
342 
343 	/* Quoted argument without a closing quote. */
344 	if (1 == quoted && msg)
345 		(*msg)(MANDOCERR_BADQUOTE, data, ln, *pos, NULL);
346 
347 	/* Null-terminate this argument and move to the next one. */
348 	if (pairs)
349 		cp[-pairs] = '\0';
350 	if ('\0' != *cp) {
351 		*cp++ = '\0';
352 		while (' ' == *cp)
353 			cp++;
354 	}
355 	*pos += (cp - start) + (quoted ? 1 : 0);
356 	*cpp = cp;
357 
358 	if ('\0' == *cp && msg && (white || ' ' == cp[-1]))
359 		(*msg)(MANDOCERR_EOLNSPACE, data, ln, *pos, NULL);
360 
361 	return(start);
362 }
363 
364 
365 static int
366 a2time(time_t *t, const char *fmt, const char *p)
367 {
368 	struct tm	 tm;
369 	char		*pp;
370 
371 	memset(&tm, 0, sizeof(struct tm));
372 
373 	pp = strptime(p, fmt, &tm);
374 	if (NULL != pp && '\0' == *pp) {
375 		*t = mktime(&tm);
376 		return(1);
377 	}
378 
379 	return(0);
380 }
381 
382 
383 /*
384  * Convert from a manual date string (see mdoc(7) and man(7)) into a
385  * date according to the stipulated date type.
386  */
387 time_t
388 mandoc_a2time(int flags, const char *p)
389 {
390 	time_t		 t;
391 
392 	if (MTIME_MDOCDATE & flags) {
393 		if (0 == strcmp(p, "$" "Mdocdate$"))
394 			return(time(NULL));
395 		if (a2time(&t, "$" "Mdocdate: %b %d %Y $", p))
396 			return(t);
397 	}
398 
399 	if (MTIME_CANONICAL & flags || MTIME_REDUCED & flags)
400 		if (a2time(&t, "%b %d, %Y", p))
401 			return(t);
402 
403 	if (MTIME_ISO_8601 & flags)
404 		if (a2time(&t, "%Y-%m-%d", p))
405 			return(t);
406 
407 	if (MTIME_REDUCED & flags) {
408 		if (a2time(&t, "%d, %Y", p))
409 			return(t);
410 		if (a2time(&t, "%Y", p))
411 			return(t);
412 	}
413 
414 	return(0);
415 }
416 
417 
418 int
419 mandoc_eos(const char *p, size_t sz, int enclosed)
420 {
421 	const char *q;
422 	int found;
423 
424 	if (0 == sz)
425 		return(0);
426 
427 	/*
428 	 * End-of-sentence recognition must include situations where
429 	 * some symbols, such as `)', allow prior EOS punctuation to
430 	 * propogate outward.
431 	 */
432 
433 	found = 0;
434 	for (q = p + (int)sz - 1; q >= p; q--) {
435 		switch (*q) {
436 		case ('\"'):
437 			/* FALLTHROUGH */
438 		case ('\''):
439 			/* FALLTHROUGH */
440 		case (']'):
441 			/* FALLTHROUGH */
442 		case (')'):
443 			if (0 == found)
444 				enclosed = 1;
445 			break;
446 		case ('.'):
447 			/* FALLTHROUGH */
448 		case ('!'):
449 			/* FALLTHROUGH */
450 		case ('?'):
451 			found = 1;
452 			break;
453 		default:
454 			return(found && (!enclosed || isalnum((unsigned char)*q)));
455 		}
456 	}
457 
458 	return(found && !enclosed);
459 }
460 
461 
462 int
463 mandoc_hyph(const char *start, const char *c)
464 {
465 
466 	/*
467 	 * Choose whether to break at a hyphenated character.  We only
468 	 * do this if it's free-standing within a word.
469 	 */
470 
471 	/* Skip first/last character of buffer. */
472 	if (c == start || '\0' == *(c + 1))
473 		return(0);
474 	/* Skip first/last character of word. */
475 	if ('\t' == *(c + 1) || '\t' == *(c - 1))
476 		return(0);
477 	if (' ' == *(c + 1) || ' ' == *(c - 1))
478 		return(0);
479 	/* Skip double invocations. */
480 	if ('-' == *(c + 1) || '-' == *(c - 1))
481 		return(0);
482 	/* Skip escapes. */
483 	if ('\\' == *(c - 1))
484 		return(0);
485 
486 	return(1);
487 }
488