1 /* $Id: mandoc.c,v 1.44 2011/03/28 23:52:13 kristaps Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #ifdef HAVE_CONFIG_H 19 #include "config.h" 20 #endif 21 22 #include <sys/types.h> 23 24 #include <assert.h> 25 #include <ctype.h> 26 #include <stdlib.h> 27 #include <stdio.h> 28 #include <string.h> 29 #include <time.h> 30 31 #include "mandoc.h" 32 #include "libmandoc.h" 33 34 #define DATESIZE 32 35 36 static int a2time(time_t *, const char *, const char *); 37 static char *time2a(time_t); 38 39 int 40 mandoc_special(char *p) 41 { 42 int len, i; 43 char term; 44 char *sv; 45 46 len = 0; 47 term = '\0'; 48 sv = p; 49 50 assert('\\' == *p); 51 p++; 52 53 switch (*p++) { 54 #if 0 55 case ('Z'): 56 /* FALLTHROUGH */ 57 case ('X'): 58 /* FALLTHROUGH */ 59 case ('x'): 60 /* FALLTHROUGH */ 61 case ('S'): 62 /* FALLTHROUGH */ 63 case ('R'): 64 /* FALLTHROUGH */ 65 case ('N'): 66 /* FALLTHROUGH */ 67 case ('l'): 68 /* FALLTHROUGH */ 69 case ('L'): 70 /* FALLTHROUGH */ 71 case ('H'): 72 /* FALLTHROUGH */ 73 case ('h'): 74 /* FALLTHROUGH */ 75 case ('D'): 76 /* FALLTHROUGH */ 77 case ('C'): 78 /* FALLTHROUGH */ 79 case ('b'): 80 /* FALLTHROUGH */ 81 case ('B'): 82 /* FALLTHROUGH */ 83 case ('a'): 84 /* FALLTHROUGH */ 85 case ('A'): 86 if (*p++ != '\'') 87 return(0); 88 term = '\''; 89 break; 90 #endif 91 case ('h'): 92 /* FALLTHROUGH */ 93 case ('v'): 94 /* FALLTHROUGH */ 95 case ('s'): 96 if (ASCII_HYPH == *p) 97 *p = '-'; 98 99 i = 0; 100 if ('+' == *p || '-' == *p) { 101 p++; 102 i = 1; 103 } 104 105 switch (*p++) { 106 case ('('): 107 len = 2; 108 break; 109 case ('['): 110 term = ']'; 111 break; 112 case ('\''): 113 term = '\''; 114 break; 115 case ('0'): 116 i = 1; 117 /* FALLTHROUGH */ 118 default: 119 len = 1; 120 p--; 121 break; 122 } 123 124 if (ASCII_HYPH == *p) 125 *p = '-'; 126 if ('+' == *p || '-' == *p) { 127 if (i) 128 return(0); 129 p++; 130 } 131 132 /* Handle embedded numerical subexp or escape. */ 133 134 if ('(' == *p) { 135 while (*p && ')' != *p) 136 if ('\\' == *p++) { 137 i = mandoc_special(--p); 138 if (0 == i) 139 return(0); 140 p += i; 141 } 142 143 if (')' == *p++) 144 break; 145 146 return(0); 147 } else if ('\\' == *p) { 148 if (0 == (i = mandoc_special(p))) 149 return(0); 150 p += i; 151 } 152 153 break; 154 #if 0 155 case ('Y'): 156 /* FALLTHROUGH */ 157 case ('V'): 158 /* FALLTHROUGH */ 159 case ('$'): 160 /* FALLTHROUGH */ 161 case ('n'): 162 /* FALLTHROUGH */ 163 #endif 164 case ('k'): 165 /* FALLTHROUGH */ 166 case ('M'): 167 /* FALLTHROUGH */ 168 case ('m'): 169 /* FALLTHROUGH */ 170 case ('f'): 171 /* FALLTHROUGH */ 172 case ('F'): 173 /* FALLTHROUGH */ 174 case ('*'): 175 switch (*p++) { 176 case ('('): 177 len = 2; 178 break; 179 case ('['): 180 term = ']'; 181 break; 182 default: 183 len = 1; 184 p--; 185 break; 186 } 187 break; 188 case ('('): 189 len = 2; 190 break; 191 case ('['): 192 term = ']'; 193 break; 194 case ('z'): 195 len = 1; 196 if ('\\' == *p) { 197 if (0 == (i = mandoc_special(p))) 198 return(0); 199 p += i; 200 return(*p ? (int)(p - sv) : 0); 201 } 202 break; 203 case ('o'): 204 /* FALLTHROUGH */ 205 case ('w'): 206 if ('\'' == *p++) { 207 term = '\''; 208 break; 209 } 210 /* FALLTHROUGH */ 211 default: 212 len = 1; 213 p--; 214 break; 215 } 216 217 if (term) { 218 for ( ; *p && term != *p; p++) 219 if (ASCII_HYPH == *p) 220 *p = '-'; 221 return(*p ? (int)(p - sv) : 0); 222 } 223 224 for (i = 0; *p && i < len; i++, p++) 225 if (ASCII_HYPH == *p) 226 *p = '-'; 227 return(i == len ? (int)(p - sv) : 0); 228 } 229 230 231 void * 232 mandoc_calloc(size_t num, size_t size) 233 { 234 void *ptr; 235 236 ptr = calloc(num, size); 237 if (NULL == ptr) { 238 perror(NULL); 239 exit((int)MANDOCLEVEL_SYSERR); 240 } 241 242 return(ptr); 243 } 244 245 246 void * 247 mandoc_malloc(size_t size) 248 { 249 void *ptr; 250 251 ptr = malloc(size); 252 if (NULL == ptr) { 253 perror(NULL); 254 exit((int)MANDOCLEVEL_SYSERR); 255 } 256 257 return(ptr); 258 } 259 260 261 void * 262 mandoc_realloc(void *ptr, size_t size) 263 { 264 265 ptr = realloc(ptr, size); 266 if (NULL == ptr) { 267 perror(NULL); 268 exit((int)MANDOCLEVEL_SYSERR); 269 } 270 271 return(ptr); 272 } 273 274 275 char * 276 mandoc_strdup(const char *ptr) 277 { 278 char *p; 279 280 p = strdup(ptr); 281 if (NULL == p) { 282 perror(NULL); 283 exit((int)MANDOCLEVEL_SYSERR); 284 } 285 286 return(p); 287 } 288 289 /* 290 * Parse a quoted or unquoted roff-style request or macro argument. 291 * Return a pointer to the parsed argument, which is either the original 292 * pointer or advanced by one byte in case the argument is quoted. 293 * Null-terminate the argument in place. 294 * Collapse pairs of quotes inside quoted arguments. 295 * Advance the argument pointer to the next argument, 296 * or to the null byte terminating the argument line. 297 */ 298 char * 299 mandoc_getarg(struct mparse *parse, char **cpp, int ln, int *pos) 300 { 301 char *start, *cp; 302 int quoted, pairs, white; 303 304 /* Quoting can only start with a new word. */ 305 start = *cpp; 306 if ('"' == *start) { 307 quoted = 1; 308 start++; 309 } else 310 quoted = 0; 311 312 pairs = 0; 313 white = 0; 314 for (cp = start; '\0' != *cp; cp++) { 315 /* Move left after quoted quotes and escaped backslashes. */ 316 if (pairs) 317 cp[-pairs] = cp[0]; 318 if ('\\' == cp[0]) { 319 if ('\\' == cp[1]) { 320 /* Poor man's copy mode. */ 321 pairs++; 322 cp++; 323 } else if (0 == quoted && ' ' == cp[1]) 324 /* Skip escaped blanks. */ 325 cp++; 326 } else if (0 == quoted) { 327 if (' ' == cp[0]) { 328 /* Unescaped blanks end unquoted args. */ 329 white = 1; 330 break; 331 } 332 } else if ('"' == cp[0]) { 333 if ('"' == cp[1]) { 334 /* Quoted quotes collapse. */ 335 pairs++; 336 cp++; 337 } else { 338 /* Unquoted quotes end quoted args. */ 339 quoted = 2; 340 break; 341 } 342 } 343 } 344 345 /* Quoted argument without a closing quote. */ 346 if (1 == quoted) 347 mandoc_msg(MANDOCERR_BADQUOTE, parse, ln, *pos, NULL); 348 349 /* Null-terminate this argument and move to the next one. */ 350 if (pairs) 351 cp[-pairs] = '\0'; 352 if ('\0' != *cp) { 353 *cp++ = '\0'; 354 while (' ' == *cp) 355 cp++; 356 } 357 *pos += (int)(cp - start) + (quoted ? 1 : 0); 358 *cpp = cp; 359 360 if ('\0' == *cp && (white || ' ' == cp[-1])) 361 mandoc_msg(MANDOCERR_EOLNSPACE, parse, ln, *pos, NULL); 362 363 return(start); 364 } 365 366 static int 367 a2time(time_t *t, const char *fmt, const char *p) 368 { 369 struct tm tm; 370 char *pp; 371 372 memset(&tm, 0, sizeof(struct tm)); 373 374 pp = strptime(p, fmt, &tm); 375 if (NULL != pp && '\0' == *pp) { 376 *t = mktime(&tm); 377 return(1); 378 } 379 380 return(0); 381 } 382 383 static char * 384 time2a(time_t t) 385 { 386 struct tm tm; 387 char *buf, *p; 388 size_t ssz; 389 int isz; 390 391 localtime_r(&t, &tm); 392 393 /* 394 * Reserve space: 395 * up to 9 characters for the month (September) + blank 396 * up to 2 characters for the day + comma + blank 397 * 4 characters for the year and a terminating '\0' 398 */ 399 p = buf = mandoc_malloc(10 + 4 + 4 + 1); 400 401 if (0 == (ssz = strftime(p, 10 + 1, "%B ", &tm))) 402 goto fail; 403 p += (int)ssz; 404 405 if (-1 == (isz = snprintf(p, 4 + 1, "%d, ", tm.tm_mday))) 406 goto fail; 407 p += isz; 408 409 if (0 == strftime(p, 4 + 1, "%Y", &tm)) 410 goto fail; 411 return(buf); 412 413 fail: 414 free(buf); 415 return(NULL); 416 } 417 418 char * 419 mandoc_normdate(struct mparse *parse, char *in, int ln, int pos) 420 { 421 char *out; 422 time_t t; 423 424 if (NULL == in || '\0' == *in || 425 0 == strcmp(in, "$" "Mdocdate$")) { 426 mandoc_msg(MANDOCERR_NODATE, parse, ln, pos, NULL); 427 time(&t); 428 } 429 else if (!a2time(&t, "$" "Mdocdate: %b %d %Y $", in) && 430 !a2time(&t, "%b %d, %Y", in) && 431 !a2time(&t, "%Y-%m-%d", in)) { 432 mandoc_msg(MANDOCERR_BADDATE, parse, ln, pos, NULL); 433 t = 0; 434 } 435 out = t ? time2a(t) : NULL; 436 return(out ? out : mandoc_strdup(in)); 437 } 438 439 int 440 mandoc_eos(const char *p, size_t sz, int enclosed) 441 { 442 const char *q; 443 int found; 444 445 if (0 == sz) 446 return(0); 447 448 /* 449 * End-of-sentence recognition must include situations where 450 * some symbols, such as `)', allow prior EOS punctuation to 451 * propogate outward. 452 */ 453 454 found = 0; 455 for (q = p + (int)sz - 1; q >= p; q--) { 456 switch (*q) { 457 case ('\"'): 458 /* FALLTHROUGH */ 459 case ('\''): 460 /* FALLTHROUGH */ 461 case (']'): 462 /* FALLTHROUGH */ 463 case (')'): 464 if (0 == found) 465 enclosed = 1; 466 break; 467 case ('.'): 468 /* FALLTHROUGH */ 469 case ('!'): 470 /* FALLTHROUGH */ 471 case ('?'): 472 found = 1; 473 break; 474 default: 475 return(found && (!enclosed || isalnum((unsigned char)*q))); 476 } 477 } 478 479 return(found && !enclosed); 480 } 481 482 int 483 mandoc_hyph(const char *start, const char *c) 484 { 485 486 /* 487 * Choose whether to break at a hyphenated character. We only 488 * do this if it's free-standing within a word. 489 */ 490 491 /* Skip first/last character of buffer. */ 492 if (c == start || '\0' == *(c + 1)) 493 return(0); 494 /* Skip first/last character of word. */ 495 if ('\t' == *(c + 1) || '\t' == *(c - 1)) 496 return(0); 497 if (' ' == *(c + 1) || ' ' == *(c - 1)) 498 return(0); 499 /* Skip double invocations. */ 500 if ('-' == *(c + 1) || '-' == *(c - 1)) 501 return(0); 502 /* Skip escapes. */ 503 if ('\\' == *(c - 1)) 504 return(0); 505 506 return(1); 507 } 508 509 /* 510 * Find out whether a line is a macro line or not. If it is, adjust the 511 * current position and return one; if it isn't, return zero and don't 512 * change the current position. 513 */ 514 int 515 mandoc_getcontrol(const char *cp, int *ppos) 516 { 517 int pos; 518 519 pos = *ppos; 520 521 if ('\\' == cp[pos] && '.' == cp[pos + 1]) 522 pos += 2; 523 else if ('.' == cp[pos] || '\'' == cp[pos]) 524 pos++; 525 else 526 return(0); 527 528 while (' ' == cp[pos] || '\t' == cp[pos]) 529 pos++; 530 531 *ppos = pos; 532 return(1); 533 } 534