1 /* $Id: mandoc.c,v 1.36 2011/01/03 22:42:37 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #ifdef HAVE_CONFIG_H 19 #include "config.h" 20 #endif 21 22 #include <sys/types.h> 23 24 #include <assert.h> 25 #include <ctype.h> 26 #include <stdlib.h> 27 #include <stdio.h> 28 #include <string.h> 29 #include <time.h> 30 31 #include "mandoc.h" 32 #include "libmandoc.h" 33 34 static int a2time(time_t *, const char *, const char *); 35 36 37 int 38 mandoc_special(char *p) 39 { 40 int len, i; 41 char term; 42 char *sv; 43 44 len = 0; 45 term = '\0'; 46 sv = p; 47 48 assert('\\' == *p); 49 p++; 50 51 switch (*p++) { 52 #if 0 53 case ('Z'): 54 /* FALLTHROUGH */ 55 case ('X'): 56 /* FALLTHROUGH */ 57 case ('x'): 58 /* FALLTHROUGH */ 59 case ('S'): 60 /* FALLTHROUGH */ 61 case ('R'): 62 /* FALLTHROUGH */ 63 case ('N'): 64 /* FALLTHROUGH */ 65 case ('l'): 66 /* FALLTHROUGH */ 67 case ('L'): 68 /* FALLTHROUGH */ 69 case ('H'): 70 /* FALLTHROUGH */ 71 case ('h'): 72 /* FALLTHROUGH */ 73 case ('D'): 74 /* FALLTHROUGH */ 75 case ('C'): 76 /* FALLTHROUGH */ 77 case ('b'): 78 /* FALLTHROUGH */ 79 case ('B'): 80 /* FALLTHROUGH */ 81 case ('a'): 82 /* FALLTHROUGH */ 83 case ('A'): 84 if (*p++ != '\'') 85 return(0); 86 term = '\''; 87 break; 88 #endif 89 case ('h'): 90 /* FALLTHROUGH */ 91 case ('v'): 92 /* FALLTHROUGH */ 93 case ('s'): 94 if (ASCII_HYPH == *p) 95 *p = '-'; 96 97 i = 0; 98 if ('+' == *p || '-' == *p) { 99 p++; 100 i = 1; 101 } 102 103 switch (*p++) { 104 case ('('): 105 len = 2; 106 break; 107 case ('['): 108 term = ']'; 109 break; 110 case ('\''): 111 term = '\''; 112 break; 113 case ('0'): 114 i = 1; 115 /* FALLTHROUGH */ 116 default: 117 len = 1; 118 p--; 119 break; 120 } 121 122 if (ASCII_HYPH == *p) 123 *p = '-'; 124 if ('+' == *p || '-' == *p) { 125 if (i) 126 return(0); 127 p++; 128 } 129 130 /* Handle embedded numerical subexp or escape. */ 131 132 if ('(' == *p) { 133 while (*p && ')' != *p) 134 if ('\\' == *p++) { 135 i = mandoc_special(--p); 136 if (0 == i) 137 return(0); 138 p += i; 139 } 140 141 if (')' == *p++) 142 break; 143 144 return(0); 145 } else if ('\\' == *p) { 146 if (0 == (i = mandoc_special(p))) 147 return(0); 148 p += i; 149 } 150 151 break; 152 #if 0 153 case ('Y'): 154 /* FALLTHROUGH */ 155 case ('V'): 156 /* FALLTHROUGH */ 157 case ('$'): 158 /* FALLTHROUGH */ 159 case ('n'): 160 /* FALLTHROUGH */ 161 #endif 162 case ('k'): 163 /* FALLTHROUGH */ 164 case ('M'): 165 /* FALLTHROUGH */ 166 case ('m'): 167 /* FALLTHROUGH */ 168 case ('f'): 169 /* FALLTHROUGH */ 170 case ('F'): 171 /* FALLTHROUGH */ 172 case ('*'): 173 switch (*p++) { 174 case ('('): 175 len = 2; 176 break; 177 case ('['): 178 term = ']'; 179 break; 180 default: 181 len = 1; 182 p--; 183 break; 184 } 185 break; 186 case ('('): 187 len = 2; 188 break; 189 case ('['): 190 term = ']'; 191 break; 192 case ('z'): 193 len = 1; 194 if ('\\' == *p) { 195 if (0 == (i = mandoc_special(p))) 196 return(0); 197 p += i; 198 return(*p ? (int)(p - sv) : 0); 199 } 200 break; 201 case ('o'): 202 /* FALLTHROUGH */ 203 case ('w'): 204 if ('\'' == *p++) { 205 term = '\''; 206 break; 207 } 208 /* FALLTHROUGH */ 209 default: 210 len = 1; 211 p--; 212 break; 213 } 214 215 if (term) { 216 for ( ; *p && term != *p; p++) 217 if (ASCII_HYPH == *p) 218 *p = '-'; 219 return(*p ? (int)(p - sv) : 0); 220 } 221 222 for (i = 0; *p && i < len; i++, p++) 223 if (ASCII_HYPH == *p) 224 *p = '-'; 225 return(i == len ? (int)(p - sv) : 0); 226 } 227 228 229 void * 230 mandoc_calloc(size_t num, size_t size) 231 { 232 void *ptr; 233 234 ptr = calloc(num, size); 235 if (NULL == ptr) { 236 perror(NULL); 237 exit((int)MANDOCLEVEL_SYSERR); 238 } 239 240 return(ptr); 241 } 242 243 244 void * 245 mandoc_malloc(size_t size) 246 { 247 void *ptr; 248 249 ptr = malloc(size); 250 if (NULL == ptr) { 251 perror(NULL); 252 exit((int)MANDOCLEVEL_SYSERR); 253 } 254 255 return(ptr); 256 } 257 258 259 void * 260 mandoc_realloc(void *ptr, size_t size) 261 { 262 263 ptr = realloc(ptr, size); 264 if (NULL == ptr) { 265 perror(NULL); 266 exit((int)MANDOCLEVEL_SYSERR); 267 } 268 269 return(ptr); 270 } 271 272 273 char * 274 mandoc_strdup(const char *ptr) 275 { 276 char *p; 277 278 p = strdup(ptr); 279 if (NULL == p) { 280 perror(NULL); 281 exit((int)MANDOCLEVEL_SYSERR); 282 } 283 284 return(p); 285 } 286 287 /* 288 * Parse a quoted or unquoted roff-style request or macro argument. 289 * Return a pointer to the parsed argument, which is either the original 290 * pointer or advanced by one byte in case the argument is quoted. 291 * Null-terminate the argument in place. 292 * Collapse pairs of quotes inside quoted arguments. 293 * Advance the argument pointer to the next argument, 294 * or to the null byte terminating the argument line. 295 */ 296 char * 297 mandoc_getarg(char **cpp, mandocmsg msg, void *data, int ln, int *pos) 298 { 299 char *start, *cp; 300 int quoted, pairs, white; 301 302 /* Quoting can only start with a new word. */ 303 start = *cpp; 304 if ('"' == *start) { 305 quoted = 1; 306 start++; 307 } else 308 quoted = 0; 309 310 pairs = 0; 311 white = 0; 312 for (cp = start; '\0' != *cp; cp++) { 313 /* Move left after quoted quotes and escaped backslashes. */ 314 if (pairs) 315 cp[-pairs] = cp[0]; 316 if ('\\' == cp[0]) { 317 if ('\\' == cp[1]) { 318 /* Poor man's copy mode. */ 319 pairs++; 320 cp++; 321 } else if (0 == quoted && ' ' == cp[1]) 322 /* Skip escaped blanks. */ 323 cp++; 324 } else if (0 == quoted) { 325 if (' ' == cp[0]) { 326 /* Unescaped blanks end unquoted args. */ 327 white = 1; 328 break; 329 } 330 } else if ('"' == cp[0]) { 331 if ('"' == cp[1]) { 332 /* Quoted quotes collapse. */ 333 pairs++; 334 cp++; 335 } else { 336 /* Unquoted quotes end quoted args. */ 337 quoted = 2; 338 break; 339 } 340 } 341 } 342 343 /* Quoted argument without a closing quote. */ 344 if (1 == quoted && msg) 345 (*msg)(MANDOCERR_BADQUOTE, data, ln, *pos, NULL); 346 347 /* Null-terminate this argument and move to the next one. */ 348 if (pairs) 349 cp[-pairs] = '\0'; 350 if ('\0' != *cp) { 351 *cp++ = '\0'; 352 while (' ' == *cp) 353 cp++; 354 } 355 *pos += (cp - start) + (quoted ? 1 : 0); 356 *cpp = cp; 357 358 if ('\0' == *cp && msg && (white || ' ' == cp[-1])) 359 (*msg)(MANDOCERR_EOLNSPACE, data, ln, *pos, NULL); 360 361 return(start); 362 } 363 364 365 static int 366 a2time(time_t *t, const char *fmt, const char *p) 367 { 368 struct tm tm; 369 char *pp; 370 371 memset(&tm, 0, sizeof(struct tm)); 372 373 pp = strptime(p, fmt, &tm); 374 if (NULL != pp && '\0' == *pp) { 375 *t = mktime(&tm); 376 return(1); 377 } 378 379 return(0); 380 } 381 382 383 /* 384 * Convert from a manual date string (see mdoc(7) and man(7)) into a 385 * date according to the stipulated date type. 386 */ 387 time_t 388 mandoc_a2time(int flags, const char *p) 389 { 390 time_t t; 391 392 if (MTIME_MDOCDATE & flags) { 393 if (0 == strcmp(p, "$" "Mdocdate$")) 394 return(time(NULL)); 395 if (a2time(&t, "$" "Mdocdate: %b %d %Y $", p)) 396 return(t); 397 } 398 399 if (MTIME_CANONICAL & flags || MTIME_REDUCED & flags) 400 if (a2time(&t, "%b %d, %Y", p)) 401 return(t); 402 403 if (MTIME_ISO_8601 & flags) 404 if (a2time(&t, "%Y-%m-%d", p)) 405 return(t); 406 407 if (MTIME_REDUCED & flags) { 408 if (a2time(&t, "%d, %Y", p)) 409 return(t); 410 if (a2time(&t, "%Y", p)) 411 return(t); 412 } 413 414 return(0); 415 } 416 417 418 int 419 mandoc_eos(const char *p, size_t sz, int enclosed) 420 { 421 const char *q; 422 int found; 423 424 if (0 == sz) 425 return(0); 426 427 /* 428 * End-of-sentence recognition must include situations where 429 * some symbols, such as `)', allow prior EOS punctuation to 430 * propogate outward. 431 */ 432 433 found = 0; 434 for (q = p + (int)sz - 1; q >= p; q--) { 435 switch (*q) { 436 case ('\"'): 437 /* FALLTHROUGH */ 438 case ('\''): 439 /* FALLTHROUGH */ 440 case (']'): 441 /* FALLTHROUGH */ 442 case (')'): 443 if (0 == found) 444 enclosed = 1; 445 break; 446 case ('.'): 447 /* FALLTHROUGH */ 448 case ('!'): 449 /* FALLTHROUGH */ 450 case ('?'): 451 found = 1; 452 break; 453 default: 454 return(found && (!enclosed || isalnum((unsigned char)*q))); 455 } 456 } 457 458 return(found && !enclosed); 459 } 460 461 462 int 463 mandoc_hyph(const char *start, const char *c) 464 { 465 466 /* 467 * Choose whether to break at a hyphenated character. We only 468 * do this if it's free-standing within a word. 469 */ 470 471 /* Skip first/last character of buffer. */ 472 if (c == start || '\0' == *(c + 1)) 473 return(0); 474 /* Skip first/last character of word. */ 475 if ('\t' == *(c + 1) || '\t' == *(c - 1)) 476 return(0); 477 if (' ' == *(c + 1) || ' ' == *(c - 1)) 478 return(0); 479 /* Skip double invocations. */ 480 if ('-' == *(c + 1) || '-' == *(c - 1)) 481 return(0); 482 /* Skip escapes. */ 483 if ('\\' == *(c - 1)) 484 return(0); 485 486 return(1); 487 } 488