1 /* $Id: mandocdb.c,v 1.49.2.10 2013/11/21 01:53:48 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2011, 2012 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #ifdef HAVE_CONFIG_H 19 #include "config.h" 20 #endif 21 22 #include <sys/types.h> 23 24 #include <assert.h> 25 #include <ctype.h> 26 #include <dirent.h> 27 #include <errno.h> 28 #include <fcntl.h> 29 #include <getopt.h> 30 #include <limits.h> 31 #include <stdio.h> 32 #include <stdint.h> 33 #include <stdlib.h> 34 #include <string.h> 35 #include <unistd.h> 36 37 #if defined(__APPLE__) 38 # include <libkern/OSByteOrder.h> 39 #elif defined(__linux__) 40 # include <endian.h> 41 #elif defined(__sun) 42 # include <sys/byteorder.h> 43 # include <sys/stat.h> 44 #else 45 # include <sys/endian.h> 46 #endif 47 48 #if defined(__linux__) || defined(__sun) 49 # include <db_185.h> 50 #else 51 # include <db.h> 52 #endif 53 54 #include "man.h" 55 #include "mdoc.h" 56 #include "mandoc.h" 57 #include "mandocdb.h" 58 #include "manpath.h" 59 60 #define MANDOC_BUFSZ BUFSIZ 61 #define MANDOC_SLOP 1024 62 63 #define MANDOC_SRC 0x1 64 #define MANDOC_FORM 0x2 65 66 /* Access to the mandoc database on disk. */ 67 68 struct mdb { 69 char idxn[PATH_MAX]; /* index db filename */ 70 char dbn[PATH_MAX]; /* keyword db filename */ 71 DB *idx; /* index recno database */ 72 DB *db; /* keyword btree database */ 73 }; 74 75 /* Stack of temporarily unused index records. */ 76 77 struct recs { 78 recno_t *stack; /* pointer to a malloc'ed array */ 79 size_t size; /* number of allocated slots */ 80 size_t cur; /* current number of empty records */ 81 recno_t last; /* last record number in the index */ 82 }; 83 84 /* Tiny list for files. No need to bring in QUEUE. */ 85 86 struct of { 87 char *fname; /* heap-allocated */ 88 char *sec; 89 char *arch; 90 char *title; 91 int src_form; 92 struct of *next; /* NULL for last one */ 93 struct of *first; /* first in list */ 94 }; 95 96 /* Buffer for storing growable data. */ 97 98 struct buf { 99 char *cp; 100 size_t len; /* current length */ 101 size_t size; /* total buffer size */ 102 }; 103 104 /* Operation we're going to perform. */ 105 106 enum op { 107 OP_DEFAULT = 0, /* new dbs from dir list or default config */ 108 OP_CONFFILE, /* new databases from custom config file */ 109 OP_UPDATE, /* delete/add entries in existing database */ 110 OP_DELETE, /* delete entries from existing database */ 111 OP_TEST /* change no databases, report potential problems */ 112 }; 113 114 #define MAN_ARGS DB *hash, \ 115 struct buf *buf, \ 116 struct buf *dbuf, \ 117 const struct man_node *n 118 #define MDOC_ARGS DB *hash, \ 119 struct buf *buf, \ 120 struct buf *dbuf, \ 121 const struct mdoc_node *n, \ 122 const struct mdoc_meta *m 123 124 static void buf_appendmdoc(struct buf *, 125 const struct mdoc_node *, int); 126 static void buf_append(struct buf *, const char *); 127 static void buf_appendb(struct buf *, 128 const void *, size_t); 129 static void dbt_put(DB *, const char *, DBT *, DBT *); 130 static void hash_put(DB *, const struct buf *, uint64_t); 131 static void hash_reset(DB **); 132 static void index_merge(const struct of *, struct mparse *, 133 struct buf *, struct buf *, DB *, 134 struct mdb *, struct recs *); 135 static void index_prune(const struct of *, struct mdb *, 136 struct recs *); 137 static void ofile_argbuild(int, char *[], struct of **, 138 const char *); 139 static void ofile_dirbuild(const char *, const char *, 140 const char *, int, struct of **); 141 static void ofile_free(struct of *); 142 static void pformatted(DB *, struct buf *, 143 struct buf *, const struct of *); 144 static int pman_node(MAN_ARGS); 145 static void pmdoc_node(MDOC_ARGS); 146 static int pmdoc_head(MDOC_ARGS); 147 static int pmdoc_body(MDOC_ARGS); 148 static int pmdoc_Fd(MDOC_ARGS); 149 static int pmdoc_In(MDOC_ARGS); 150 static int pmdoc_Fn(MDOC_ARGS); 151 static int pmdoc_Nd(MDOC_ARGS); 152 static int pmdoc_Nm(MDOC_ARGS); 153 static int pmdoc_Sh(MDOC_ARGS); 154 static int pmdoc_St(MDOC_ARGS); 155 static int pmdoc_Xr(MDOC_ARGS); 156 157 #define MDOCF_CHILD 0x01 /* Automatically index child nodes. */ 158 159 struct mdoc_handler { 160 int (*fp)(MDOC_ARGS); /* Optional handler. */ 161 uint64_t mask; /* Set unless handler returns 0. */ 162 int flags; /* For use by pmdoc_node. */ 163 }; 164 165 static const struct mdoc_handler mdocs[MDOC_MAX] = { 166 { NULL, 0, 0 }, /* Ap */ 167 { NULL, 0, 0 }, /* Dd */ 168 { NULL, 0, 0 }, /* Dt */ 169 { NULL, 0, 0 }, /* Os */ 170 { pmdoc_Sh, TYPE_Sh, MDOCF_CHILD }, /* Sh */ 171 { pmdoc_head, TYPE_Ss, MDOCF_CHILD }, /* Ss */ 172 { NULL, 0, 0 }, /* Pp */ 173 { NULL, 0, 0 }, /* D1 */ 174 { NULL, 0, 0 }, /* Dl */ 175 { NULL, 0, 0 }, /* Bd */ 176 { NULL, 0, 0 }, /* Ed */ 177 { NULL, 0, 0 }, /* Bl */ 178 { NULL, 0, 0 }, /* El */ 179 { NULL, 0, 0 }, /* It */ 180 { NULL, 0, 0 }, /* Ad */ 181 { NULL, TYPE_An, MDOCF_CHILD }, /* An */ 182 { NULL, TYPE_Ar, MDOCF_CHILD }, /* Ar */ 183 { NULL, TYPE_Cd, MDOCF_CHILD }, /* Cd */ 184 { NULL, TYPE_Cm, MDOCF_CHILD }, /* Cm */ 185 { NULL, TYPE_Dv, MDOCF_CHILD }, /* Dv */ 186 { NULL, TYPE_Er, MDOCF_CHILD }, /* Er */ 187 { NULL, TYPE_Ev, MDOCF_CHILD }, /* Ev */ 188 { NULL, 0, 0 }, /* Ex */ 189 { NULL, TYPE_Fa, MDOCF_CHILD }, /* Fa */ 190 { pmdoc_Fd, TYPE_In, 0 }, /* Fd */ 191 { NULL, TYPE_Fl, MDOCF_CHILD }, /* Fl */ 192 { pmdoc_Fn, 0, 0 }, /* Fn */ 193 { NULL, TYPE_Ft, MDOCF_CHILD }, /* Ft */ 194 { NULL, TYPE_Ic, MDOCF_CHILD }, /* Ic */ 195 { pmdoc_In, TYPE_In, 0 }, /* In */ 196 { NULL, TYPE_Li, MDOCF_CHILD }, /* Li */ 197 { pmdoc_Nd, TYPE_Nd, MDOCF_CHILD }, /* Nd */ 198 { pmdoc_Nm, TYPE_Nm, MDOCF_CHILD }, /* Nm */ 199 { NULL, 0, 0 }, /* Op */ 200 { NULL, 0, 0 }, /* Ot */ 201 { NULL, TYPE_Pa, MDOCF_CHILD }, /* Pa */ 202 { NULL, 0, 0 }, /* Rv */ 203 { pmdoc_St, TYPE_St, 0 }, /* St */ 204 { NULL, TYPE_Va, MDOCF_CHILD }, /* Va */ 205 { pmdoc_body, TYPE_Va, MDOCF_CHILD }, /* Vt */ 206 { pmdoc_Xr, TYPE_Xr, 0 }, /* Xr */ 207 { NULL, 0, 0 }, /* %A */ 208 { NULL, 0, 0 }, /* %B */ 209 { NULL, 0, 0 }, /* %D */ 210 { NULL, 0, 0 }, /* %I */ 211 { NULL, 0, 0 }, /* %J */ 212 { NULL, 0, 0 }, /* %N */ 213 { NULL, 0, 0 }, /* %O */ 214 { NULL, 0, 0 }, /* %P */ 215 { NULL, 0, 0 }, /* %R */ 216 { NULL, 0, 0 }, /* %T */ 217 { NULL, 0, 0 }, /* %V */ 218 { NULL, 0, 0 }, /* Ac */ 219 { NULL, 0, 0 }, /* Ao */ 220 { NULL, 0, 0 }, /* Aq */ 221 { NULL, TYPE_At, MDOCF_CHILD }, /* At */ 222 { NULL, 0, 0 }, /* Bc */ 223 { NULL, 0, 0 }, /* Bf */ 224 { NULL, 0, 0 }, /* Bo */ 225 { NULL, 0, 0 }, /* Bq */ 226 { NULL, TYPE_Bsx, MDOCF_CHILD }, /* Bsx */ 227 { NULL, TYPE_Bx, MDOCF_CHILD }, /* Bx */ 228 { NULL, 0, 0 }, /* Db */ 229 { NULL, 0, 0 }, /* Dc */ 230 { NULL, 0, 0 }, /* Do */ 231 { NULL, 0, 0 }, /* Dq */ 232 { NULL, 0, 0 }, /* Ec */ 233 { NULL, 0, 0 }, /* Ef */ 234 { NULL, TYPE_Em, MDOCF_CHILD }, /* Em */ 235 { NULL, 0, 0 }, /* Eo */ 236 { NULL, TYPE_Fx, MDOCF_CHILD }, /* Fx */ 237 { NULL, TYPE_Ms, MDOCF_CHILD }, /* Ms */ 238 { NULL, 0, 0 }, /* No */ 239 { NULL, 0, 0 }, /* Ns */ 240 { NULL, TYPE_Nx, MDOCF_CHILD }, /* Nx */ 241 { NULL, TYPE_Ox, MDOCF_CHILD }, /* Ox */ 242 { NULL, 0, 0 }, /* Pc */ 243 { NULL, 0, 0 }, /* Pf */ 244 { NULL, 0, 0 }, /* Po */ 245 { NULL, 0, 0 }, /* Pq */ 246 { NULL, 0, 0 }, /* Qc */ 247 { NULL, 0, 0 }, /* Ql */ 248 { NULL, 0, 0 }, /* Qo */ 249 { NULL, 0, 0 }, /* Qq */ 250 { NULL, 0, 0 }, /* Re */ 251 { NULL, 0, 0 }, /* Rs */ 252 { NULL, 0, 0 }, /* Sc */ 253 { NULL, 0, 0 }, /* So */ 254 { NULL, 0, 0 }, /* Sq */ 255 { NULL, 0, 0 }, /* Sm */ 256 { NULL, 0, 0 }, /* Sx */ 257 { NULL, TYPE_Sy, MDOCF_CHILD }, /* Sy */ 258 { NULL, TYPE_Tn, MDOCF_CHILD }, /* Tn */ 259 { NULL, 0, 0 }, /* Ux */ 260 { NULL, 0, 0 }, /* Xc */ 261 { NULL, 0, 0 }, /* Xo */ 262 { pmdoc_head, TYPE_Fn, 0 }, /* Fo */ 263 { NULL, 0, 0 }, /* Fc */ 264 { NULL, 0, 0 }, /* Oo */ 265 { NULL, 0, 0 }, /* Oc */ 266 { NULL, 0, 0 }, /* Bk */ 267 { NULL, 0, 0 }, /* Ek */ 268 { NULL, 0, 0 }, /* Bt */ 269 { NULL, 0, 0 }, /* Hf */ 270 { NULL, 0, 0 }, /* Fr */ 271 { NULL, 0, 0 }, /* Ud */ 272 { NULL, TYPE_Lb, MDOCF_CHILD }, /* Lb */ 273 { NULL, 0, 0 }, /* Lp */ 274 { NULL, TYPE_Lk, MDOCF_CHILD }, /* Lk */ 275 { NULL, TYPE_Mt, MDOCF_CHILD }, /* Mt */ 276 { NULL, 0, 0 }, /* Brq */ 277 { NULL, 0, 0 }, /* Bro */ 278 { NULL, 0, 0 }, /* Brc */ 279 { NULL, 0, 0 }, /* %C */ 280 { NULL, 0, 0 }, /* Es */ 281 { NULL, 0, 0 }, /* En */ 282 { NULL, TYPE_Dx, MDOCF_CHILD }, /* Dx */ 283 { NULL, 0, 0 }, /* %Q */ 284 { NULL, 0, 0 }, /* br */ 285 { NULL, 0, 0 }, /* sp */ 286 { NULL, 0, 0 }, /* %U */ 287 { NULL, 0, 0 }, /* Ta */ 288 }; 289 290 static const char *progname; 291 static int use_all; /* Use all directories and files. */ 292 static int verb; /* Output verbosity level. */ 293 static int warnings; /* Potential problems in manuals. */ 294 295 int 296 main(int argc, char *argv[]) 297 { 298 struct mparse *mp; /* parse sequence */ 299 struct manpaths dirs; 300 struct mdb mdb; 301 struct recs recs; 302 enum op op; /* current operation */ 303 const char *dir; 304 char *cp; 305 char pbuf[PATH_MAX]; 306 int ch, i, flags; 307 DB *hash; /* temporary keyword hashtable */ 308 BTREEINFO info; /* btree configuration */ 309 size_t sz1, sz2, ipath; 310 struct buf buf, /* keyword buffer */ 311 dbuf; /* description buffer */ 312 struct of *of; /* list of files for processing */ 313 extern int optind; 314 extern char *optarg; 315 316 progname = strrchr(argv[0], '/'); 317 if (progname == NULL) 318 progname = argv[0]; 319 else 320 ++progname; 321 322 memset(&dirs, 0, sizeof(struct manpaths)); 323 memset(&mdb, 0, sizeof(struct mdb)); 324 memset(&recs, 0, sizeof(struct recs)); 325 326 of = NULL; 327 mp = NULL; 328 hash = NULL; 329 op = OP_DEFAULT; 330 dir = NULL; 331 332 while (-1 != (ch = getopt(argc, argv, "aC:d:tu:vW"))) 333 switch (ch) { 334 case ('a'): 335 use_all = 1; 336 break; 337 case ('C'): 338 if (op) { 339 fprintf(stderr, 340 "-C: conflicting options\n"); 341 goto usage; 342 } 343 dir = optarg; 344 op = OP_CONFFILE; 345 break; 346 case ('d'): 347 if (op) { 348 fprintf(stderr, 349 "-d: conflicting options\n"); 350 goto usage; 351 } 352 dir = optarg; 353 op = OP_UPDATE; 354 break; 355 case ('t'): 356 dup2(STDOUT_FILENO, STDERR_FILENO); 357 if (op) { 358 fprintf(stderr, 359 "-t: conflicting options\n"); 360 goto usage; 361 } 362 op = OP_TEST; 363 use_all = 1; 364 warnings = 1; 365 break; 366 case ('u'): 367 if (op) { 368 fprintf(stderr, 369 "-u: conflicting options\n"); 370 goto usage; 371 } 372 dir = optarg; 373 op = OP_DELETE; 374 break; 375 case ('v'): 376 verb++; 377 break; 378 case ('W'): 379 warnings = 1; 380 break; 381 default: 382 goto usage; 383 } 384 385 argc -= optind; 386 argv += optind; 387 388 if (OP_CONFFILE == op && argc > 0) { 389 fprintf(stderr, "-C: too many arguments\n"); 390 goto usage; 391 } 392 393 memset(&info, 0, sizeof(BTREEINFO)); 394 info.lorder = 4321; 395 info.flags = R_DUP; 396 397 mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL, NULL); 398 399 memset(&buf, 0, sizeof(struct buf)); 400 memset(&dbuf, 0, sizeof(struct buf)); 401 402 buf.size = dbuf.size = MANDOC_BUFSZ; 403 404 buf.cp = mandoc_malloc(buf.size); 405 dbuf.cp = mandoc_malloc(dbuf.size); 406 407 if (OP_TEST == op) { 408 ofile_argbuild(argc, argv, &of, NULL); 409 if (NULL == of) 410 goto out; 411 index_merge(of, mp, &dbuf, &buf, hash, &mdb, &recs); 412 goto out; 413 } 414 415 if (OP_UPDATE == op || OP_DELETE == op) { 416 if (NULL == realpath(dir, pbuf)) { 417 perror(dir); 418 exit((int)MANDOCLEVEL_BADARG); 419 } 420 if (strlcat(pbuf, "/", PATH_MAX) >= PATH_MAX) { 421 fprintf(stderr, "%s: path too long\n", pbuf); 422 exit((int)MANDOCLEVEL_BADARG); 423 } 424 425 strlcat(mdb.dbn, pbuf, PATH_MAX); 426 sz1 = strlcat(mdb.dbn, MANDOC_DB, PATH_MAX); 427 428 strlcat(mdb.idxn, pbuf, PATH_MAX); 429 sz2 = strlcat(mdb.idxn, MANDOC_IDX, PATH_MAX); 430 431 if (sz1 >= PATH_MAX || sz2 >= PATH_MAX) { 432 fprintf(stderr, "%s: path too long\n", mdb.idxn); 433 exit((int)MANDOCLEVEL_BADARG); 434 } 435 436 flags = O_CREAT | O_RDWR; 437 mdb.db = dbopen(mdb.dbn, flags, 0644, DB_BTREE, &info); 438 mdb.idx = dbopen(mdb.idxn, flags, 0644, DB_RECNO, NULL); 439 440 if (NULL == mdb.db) { 441 perror(mdb.dbn); 442 exit((int)MANDOCLEVEL_SYSERR); 443 } else if (NULL == mdb.idx) { 444 perror(mdb.idxn); 445 exit((int)MANDOCLEVEL_SYSERR); 446 } 447 448 ofile_argbuild(argc, argv, &of, pbuf); 449 450 if (NULL == of) 451 goto out; 452 453 index_prune(of, &mdb, &recs); 454 455 /* 456 * Go to the root of the respective manual tree. 457 * This must work or no manuals may be found (they're 458 * indexed relative to the root). 459 */ 460 461 if (OP_UPDATE == op) { 462 if (-1 == chdir(dir)) { 463 perror(dir); 464 exit((int)MANDOCLEVEL_SYSERR); 465 } 466 index_merge(of, mp, &dbuf, &buf, hash, 467 &mdb, &recs); 468 } 469 470 goto out; 471 } 472 473 /* 474 * Configure the directories we're going to scan. 475 * If we have command-line arguments, use them. 476 * If not, we use man(1)'s method (see mandocdb.8). 477 */ 478 479 if (argc > 0) { 480 dirs.paths = mandoc_calloc(argc, sizeof(char *)); 481 dirs.sz = argc; 482 for (i = 0; i < argc; i++) { 483 if (NULL == (cp = realpath(argv[i], pbuf))) { 484 perror(argv[i]); 485 goto out; 486 } 487 dirs.paths[i] = mandoc_strdup(cp); 488 } 489 } else 490 manpath_parse(&dirs, dir, NULL, NULL); 491 492 for (ipath = 0; ipath < dirs.sz; ipath++) { 493 494 /* 495 * Go to the root of the respective manual tree. 496 * This must work or no manuals may be found: 497 * They are indexed relative to the root. 498 */ 499 500 if (-1 == chdir(dirs.paths[ipath])) { 501 perror(dirs.paths[ipath]); 502 exit((int)MANDOCLEVEL_SYSERR); 503 } 504 505 /* Create a new database in two temporary files. */ 506 507 flags = O_CREAT | O_EXCL | O_RDWR; 508 while (NULL == mdb.db) { 509 strlcpy(mdb.dbn, MANDOC_DB, PATH_MAX); 510 strlcat(mdb.dbn, ".XXXXXXXXXX", PATH_MAX); 511 if (NULL == mktemp(mdb.dbn)) { 512 perror(mdb.dbn); 513 exit((int)MANDOCLEVEL_SYSERR); 514 } 515 mdb.db = dbopen(mdb.dbn, flags, 0644, 516 DB_BTREE, &info); 517 if (NULL == mdb.db && EEXIST != errno) { 518 perror(mdb.dbn); 519 exit((int)MANDOCLEVEL_SYSERR); 520 } 521 } 522 while (NULL == mdb.idx) { 523 strlcpy(mdb.idxn, MANDOC_IDX, PATH_MAX); 524 strlcat(mdb.idxn, ".XXXXXXXXXX", PATH_MAX); 525 if (NULL == mktemp(mdb.idxn)) { 526 perror(mdb.idxn); 527 unlink(mdb.dbn); 528 exit((int)MANDOCLEVEL_SYSERR); 529 } 530 mdb.idx = dbopen(mdb.idxn, flags, 0644, 531 DB_RECNO, NULL); 532 if (NULL == mdb.idx && EEXIST != errno) { 533 perror(mdb.idxn); 534 unlink(mdb.dbn); 535 exit((int)MANDOCLEVEL_SYSERR); 536 } 537 } 538 539 /* 540 * Search for manuals and fill the new database. 541 */ 542 543 ofile_dirbuild(".", "", "", 0, &of); 544 545 if (NULL != of) { 546 index_merge(of, mp, &dbuf, &buf, hash, 547 &mdb, &recs); 548 ofile_free(of); 549 of = NULL; 550 } 551 552 (*mdb.db->close)(mdb.db); 553 (*mdb.idx->close)(mdb.idx); 554 mdb.db = NULL; 555 mdb.idx = NULL; 556 557 /* 558 * Replace the old database with the new one. 559 * This is not perfectly atomic, 560 * but i cannot think of a better way. 561 */ 562 563 if (-1 == rename(mdb.dbn, MANDOC_DB)) { 564 perror(MANDOC_DB); 565 unlink(mdb.dbn); 566 unlink(mdb.idxn); 567 exit((int)MANDOCLEVEL_SYSERR); 568 } 569 if (-1 == rename(mdb.idxn, MANDOC_IDX)) { 570 perror(MANDOC_IDX); 571 unlink(MANDOC_DB); 572 unlink(MANDOC_IDX); 573 unlink(mdb.idxn); 574 exit((int)MANDOCLEVEL_SYSERR); 575 } 576 } 577 578 out: 579 if (mdb.db) 580 (*mdb.db->close)(mdb.db); 581 if (mdb.idx) 582 (*mdb.idx->close)(mdb.idx); 583 if (hash) 584 (*hash->close)(hash); 585 if (mp) 586 mparse_free(mp); 587 588 manpath_free(&dirs); 589 ofile_free(of); 590 free(buf.cp); 591 free(dbuf.cp); 592 free(recs.stack); 593 594 return(MANDOCLEVEL_OK); 595 596 usage: 597 fprintf(stderr, 598 "usage: %s [-avvv] [-C file] | dir ... | -t file ...\n" 599 " -d dir [file ...] | " 600 "-u dir [file ...]\n", 601 progname); 602 603 return((int)MANDOCLEVEL_BADARG); 604 } 605 606 void 607 index_merge(const struct of *of, struct mparse *mp, 608 struct buf *dbuf, struct buf *buf, DB *hash, 609 struct mdb *mdb, struct recs *recs) 610 { 611 recno_t rec; 612 int ch, skip; 613 DBT key, val; 614 DB *files; /* temporary file name table */ 615 struct mdoc *mdoc; 616 struct man *man; 617 const char *fn, *msec, *march, *mtitle; 618 char *p; 619 uint64_t mask; 620 size_t sv; 621 unsigned seq; 622 uint64_t vbuf[2]; 623 char type; 624 625 static char emptystring[] = ""; 626 627 if (warnings) { 628 files = NULL; 629 hash_reset(&files); 630 } 631 632 rec = 0; 633 for (of = of->first; of; of = of->next) { 634 fn = of->fname; 635 636 /* 637 * Try interpreting the file as mdoc(7) or man(7) 638 * source code, unless it is already known to be 639 * formatted. Fall back to formatted mode. 640 */ 641 642 mparse_reset(mp); 643 mdoc = NULL; 644 man = NULL; 645 646 if ((MANDOC_SRC & of->src_form || 647 ! (MANDOC_FORM & of->src_form)) && 648 MANDOCLEVEL_FATAL > mparse_readfd(mp, -1, fn)) 649 mparse_result(mp, &mdoc, &man); 650 651 if (NULL != mdoc) { 652 msec = mdoc_meta(mdoc)->msec; 653 march = mdoc_meta(mdoc)->arch; 654 if (NULL == march) 655 march = ""; 656 mtitle = mdoc_meta(mdoc)->title; 657 } else if (NULL != man) { 658 msec = man_meta(man)->msec; 659 march = ""; 660 mtitle = man_meta(man)->title; 661 } else { 662 msec = of->sec; 663 march = of->arch; 664 mtitle = of->title; 665 } 666 667 /* 668 * Check whether the manual section given in a file 669 * agrees with the directory where the file is located. 670 * Some manuals have suffixes like (3p) on their 671 * section number either inside the file or in the 672 * directory name, some are linked into more than one 673 * section, like encrypt(1) = makekey(8). Do not skip 674 * manuals for such reasons. 675 */ 676 677 skip = 0; 678 assert(of->sec); 679 assert(msec); 680 if (warnings) 681 if (strcasecmp(msec, of->sec)) 682 fprintf(stderr, "%s: " 683 "section \"%s\" manual " 684 "in \"%s\" directory\n", 685 fn, msec, of->sec); 686 687 /* 688 * Manual page directories exist for each kernel 689 * architecture as returned by machine(1). 690 * However, many manuals only depend on the 691 * application architecture as returned by arch(1). 692 * For example, some (2/ARM) manuals are shared 693 * across the "armish" and "zaurus" kernel 694 * architectures. 695 * A few manuals are even shared across completely 696 * different architectures, for example fdformat(1) 697 * on amd64, i386, sparc, and sparc64. 698 * Thus, warn about architecture mismatches, 699 * but don't skip manuals for this reason. 700 */ 701 702 assert(of->arch); 703 assert(march); 704 if (warnings) 705 if (strcasecmp(march, of->arch)) 706 fprintf(stderr, "%s: " 707 "architecture \"%s\" manual " 708 "in \"%s\" directory\n", 709 fn, march, of->arch); 710 711 /* 712 * By default, skip a file if the title given 713 * in the file disagrees with the file name. 714 * Do not warn, this happens for all MLINKs. 715 */ 716 717 assert(of->title); 718 assert(mtitle); 719 if (strcasecmp(mtitle, of->title)) 720 skip = 1; 721 722 /* 723 * Build a title string for the file. If it matches 724 * the location of the file, remember the title as 725 * found; else, remember it as missing. 726 */ 727 728 if (warnings) { 729 buf->len = 0; 730 buf_appendb(buf, mtitle, strlen(mtitle)); 731 buf_appendb(buf, "(", 1); 732 buf_appendb(buf, msec, strlen(msec)); 733 if ('\0' != *march) { 734 buf_appendb(buf, "/", 1); 735 buf_appendb(buf, march, strlen(march)); 736 } 737 buf_appendb(buf, ")", 2); 738 for (p = buf->cp; '\0' != *p; p++) 739 *p = tolower((unsigned char)*p); 740 key.data = buf->cp; 741 key.size = buf->len; 742 val.data = NULL; 743 val.size = 0; 744 if (0 == skip) 745 val.data = emptystring; 746 else { 747 ch = (*files->get)(files, &key, &val, 0); 748 if (ch < 0) { 749 perror("hash"); 750 exit((int)MANDOCLEVEL_SYSERR); 751 } else if (ch > 0) { 752 val.data = (void *)fn; 753 val.size = strlen(fn) + 1; 754 } else 755 val.data = NULL; 756 } 757 if (NULL != val.data && 758 (*files->put)(files, &key, &val, 0) < 0) { 759 perror("hash"); 760 exit((int)MANDOCLEVEL_SYSERR); 761 } 762 } 763 764 if (skip && !use_all) 765 continue; 766 767 /* 768 * The index record value consists of a nil-terminated 769 * filename, a nil-terminated manual section, and a 770 * nil-terminated description. Use the actual 771 * location of the file, such that the user can find 772 * it with man(1). Since the description may not be 773 * set, we set a sentinel to see if we're going to 774 * write a nil byte in its place. 775 */ 776 777 dbuf->len = 0; 778 type = mdoc ? 'd' : (man ? 'a' : 'c'); 779 buf_appendb(dbuf, &type, 1); 780 buf_appendb(dbuf, fn, strlen(fn) + 1); 781 buf_appendb(dbuf, of->sec, strlen(of->sec) + 1); 782 buf_appendb(dbuf, of->title, strlen(of->title) + 1); 783 buf_appendb(dbuf, of->arch, strlen(of->arch) + 1); 784 785 sv = dbuf->len; 786 787 /* 788 * Collect keyword/mask pairs. 789 * Each pair will become a new btree node. 790 */ 791 792 hash_reset(&hash); 793 if (mdoc) 794 pmdoc_node(hash, buf, dbuf, 795 mdoc_node(mdoc), mdoc_meta(mdoc)); 796 else if (man) 797 pman_node(hash, buf, dbuf, man_node(man)); 798 else 799 pformatted(hash, buf, dbuf, of); 800 801 /* Test mode, do not access any database. */ 802 803 if (NULL == mdb->db || NULL == mdb->idx) 804 continue; 805 806 /* 807 * Make sure the file name is always registered 808 * as an .Nm search key. 809 */ 810 buf->len = 0; 811 buf_append(buf, of->title); 812 hash_put(hash, buf, TYPE_Nm); 813 814 /* 815 * Reclaim an empty index record, if available. 816 * Use its record number for all new btree nodes. 817 */ 818 819 if (recs->cur > 0) { 820 recs->cur--; 821 rec = recs->stack[(int)recs->cur]; 822 } else if (recs->last > 0) { 823 rec = recs->last; 824 recs->last = 0; 825 } else 826 rec++; 827 vbuf[1] = htobe64(rec); 828 829 /* 830 * Copy from the in-memory hashtable of pending 831 * keyword/mask pairs into the database. 832 */ 833 834 seq = R_FIRST; 835 while (0 == (ch = (*hash->seq)(hash, &key, &val, seq))) { 836 seq = R_NEXT; 837 assert(sizeof(uint64_t) == val.size); 838 memcpy(&mask, val.data, val.size); 839 vbuf[0] = htobe64(mask); 840 val.size = sizeof(vbuf); 841 val.data = &vbuf; 842 dbt_put(mdb->db, mdb->dbn, &key, &val); 843 } 844 if (ch < 0) { 845 perror("hash"); 846 unlink(mdb->dbn); 847 unlink(mdb->idxn); 848 exit((int)MANDOCLEVEL_SYSERR); 849 } 850 851 /* 852 * Apply to the index. If we haven't had a description 853 * set, put an empty one in now. 854 */ 855 856 if (dbuf->len == sv) 857 buf_appendb(dbuf, "", 1); 858 859 key.data = &rec; 860 key.size = sizeof(recno_t); 861 862 val.data = dbuf->cp; 863 val.size = dbuf->len; 864 865 if (verb) 866 printf("%s: adding to index\n", fn); 867 868 dbt_put(mdb->idx, mdb->idxn, &key, &val); 869 } 870 871 /* 872 * Iterate the remembered file titles and check that 873 * all files can be found by their main title. 874 */ 875 876 if (warnings) { 877 seq = R_FIRST; 878 while (0 == (*files->seq)(files, &key, &val, seq)) { 879 seq = R_NEXT; 880 if (val.size) 881 fprintf(stderr, "%s: probably " 882 "unreachable, title is %s\n", 883 (char *)val.data, (char *)key.data); 884 } 885 (*files->close)(files); 886 } 887 } 888 889 /* 890 * Scan through all entries in the index file `idx' and prune those 891 * entries in `ofile'. 892 * Pruning consists of removing from `db', then invalidating the entry 893 * in `idx' (zeroing its value size). 894 */ 895 static void 896 index_prune(const struct of *ofile, struct mdb *mdb, struct recs *recs) 897 { 898 const struct of *of; 899 const char *fn; 900 uint64_t vbuf[2]; 901 unsigned seq, sseq; 902 DBT key, val; 903 int ch; 904 905 recs->cur = 0; 906 seq = R_FIRST; 907 while (0 == (ch = (*mdb->idx->seq)(mdb->idx, &key, &val, seq))) { 908 seq = R_NEXT; 909 assert(sizeof(recno_t) == key.size); 910 memcpy(&recs->last, key.data, key.size); 911 912 /* Deleted records are zero-sized. Skip them. */ 913 914 if (0 == val.size) 915 goto cont; 916 917 /* 918 * Make sure we're sane. 919 * Read past our mdoc/man/cat type to the next string, 920 * then make sure it's bounded by a NUL. 921 * Failing any of these, we go into our error handler. 922 */ 923 924 fn = (char *)val.data + 1; 925 if (NULL == memchr(fn, '\0', val.size - 1)) 926 break; 927 928 /* 929 * Search for the file in those we care about. 930 * XXX: build this into a tree. Too slow. 931 */ 932 933 for (of = ofile->first; of; of = of->next) 934 if (0 == strcmp(fn, of->fname)) 935 break; 936 937 if (NULL == of) 938 continue; 939 940 /* 941 * Search through the keyword database, throwing out all 942 * references to our file. 943 */ 944 945 sseq = R_FIRST; 946 while (0 == (ch = (*mdb->db->seq)(mdb->db, 947 &key, &val, sseq))) { 948 sseq = R_NEXT; 949 if (sizeof(vbuf) != val.size) 950 break; 951 952 memcpy(vbuf, val.data, val.size); 953 if (recs->last != betoh64(vbuf[1])) 954 continue; 955 956 if ((ch = (*mdb->db->del)(mdb->db, 957 &key, R_CURSOR)) < 0) 958 break; 959 } 960 961 if (ch < 0) { 962 perror(mdb->dbn); 963 exit((int)MANDOCLEVEL_SYSERR); 964 } else if (1 != ch) { 965 fprintf(stderr, "%s: corrupt database\n", 966 mdb->dbn); 967 exit((int)MANDOCLEVEL_SYSERR); 968 } 969 970 if (verb) 971 printf("%s: deleting from index\n", fn); 972 973 val.size = 0; 974 ch = (*mdb->idx->put)(mdb->idx, &key, &val, R_CURSOR); 975 976 if (ch < 0) 977 break; 978 cont: 979 if (recs->cur >= recs->size) { 980 recs->size += MANDOC_SLOP; 981 recs->stack = mandoc_realloc(recs->stack, 982 recs->size * sizeof(recno_t)); 983 } 984 985 recs->stack[(int)recs->cur] = recs->last; 986 recs->cur++; 987 } 988 989 if (ch < 0) { 990 perror(mdb->idxn); 991 exit((int)MANDOCLEVEL_SYSERR); 992 } else if (1 != ch) { 993 fprintf(stderr, "%s: corrupt index\n", mdb->idxn); 994 exit((int)MANDOCLEVEL_SYSERR); 995 } 996 997 recs->last++; 998 } 999 1000 /* 1001 * Grow the buffer (if necessary) and copy in a binary string. 1002 */ 1003 static void 1004 buf_appendb(struct buf *buf, const void *cp, size_t sz) 1005 { 1006 1007 /* Overshoot by MANDOC_BUFSZ. */ 1008 1009 while (buf->len + sz >= buf->size) { 1010 buf->size = buf->len + sz + MANDOC_BUFSZ; 1011 buf->cp = mandoc_realloc(buf->cp, buf->size); 1012 } 1013 1014 memcpy(buf->cp + (int)buf->len, cp, sz); 1015 buf->len += sz; 1016 } 1017 1018 /* 1019 * Append a nil-terminated string to the buffer. 1020 * This can be invoked multiple times. 1021 * The buffer string will be nil-terminated. 1022 * If invoked multiple times, a space is put between strings. 1023 */ 1024 static void 1025 buf_append(struct buf *buf, const char *cp) 1026 { 1027 size_t sz; 1028 1029 if (0 == (sz = strlen(cp))) 1030 return; 1031 1032 if (buf->len) 1033 buf->cp[(int)buf->len - 1] = ' '; 1034 1035 buf_appendb(buf, cp, sz + 1); 1036 } 1037 1038 /* 1039 * Recursively add all text from a given node. 1040 * This is optimised for general mdoc nodes in this context, which do 1041 * not consist of subexpressions and having a recursive call for n->next 1042 * would be wasteful. 1043 * The "f" variable should be 0 unless called from pmdoc_Nd for the 1044 * description buffer, which does not start at the beginning of the 1045 * buffer. 1046 */ 1047 static void 1048 buf_appendmdoc(struct buf *buf, const struct mdoc_node *n, int f) 1049 { 1050 1051 for ( ; n; n = n->next) { 1052 if (n->child) 1053 buf_appendmdoc(buf, n->child, f); 1054 1055 if (MDOC_TEXT == n->type && f) { 1056 f = 0; 1057 buf_appendb(buf, n->string, 1058 strlen(n->string) + 1); 1059 } else if (MDOC_TEXT == n->type) 1060 buf_append(buf, n->string); 1061 1062 } 1063 } 1064 1065 static void 1066 hash_reset(DB **db) 1067 { 1068 DB *hash; 1069 1070 if (NULL != (hash = *db)) 1071 (*hash->close)(hash); 1072 1073 *db = dbopen(NULL, O_CREAT|O_RDWR, 0644, DB_HASH, NULL); 1074 if (NULL == *db) { 1075 perror("hash"); 1076 exit((int)MANDOCLEVEL_SYSERR); 1077 } 1078 } 1079 1080 /* ARGSUSED */ 1081 static int 1082 pmdoc_head(MDOC_ARGS) 1083 { 1084 1085 return(MDOC_HEAD == n->type); 1086 } 1087 1088 /* ARGSUSED */ 1089 static int 1090 pmdoc_body(MDOC_ARGS) 1091 { 1092 1093 return(MDOC_BODY == n->type); 1094 } 1095 1096 /* ARGSUSED */ 1097 static int 1098 pmdoc_Fd(MDOC_ARGS) 1099 { 1100 const char *start, *end; 1101 size_t sz; 1102 1103 if (SEC_SYNOPSIS != n->sec) 1104 return(0); 1105 if (NULL == (n = n->child) || MDOC_TEXT != n->type) 1106 return(0); 1107 1108 /* 1109 * Only consider those `Fd' macro fields that begin with an 1110 * "inclusion" token (versus, e.g., #define). 1111 */ 1112 if (strcmp("#include", n->string)) 1113 return(0); 1114 1115 if (NULL == (n = n->next) || MDOC_TEXT != n->type) 1116 return(0); 1117 1118 /* 1119 * Strip away the enclosing angle brackets and make sure we're 1120 * not zero-length. 1121 */ 1122 1123 start = n->string; 1124 if ('<' == *start || '"' == *start) 1125 start++; 1126 1127 if (0 == (sz = strlen(start))) 1128 return(0); 1129 1130 end = &start[(int)sz - 1]; 1131 if ('>' == *end || '"' == *end) 1132 end--; 1133 1134 assert(end >= start); 1135 1136 buf_appendb(buf, start, (size_t)(end - start + 1)); 1137 buf_appendb(buf, "", 1); 1138 return(1); 1139 } 1140 1141 /* ARGSUSED */ 1142 static int 1143 pmdoc_In(MDOC_ARGS) 1144 { 1145 1146 if (NULL == n->child || MDOC_TEXT != n->child->type) 1147 return(0); 1148 1149 buf_append(buf, n->child->string); 1150 return(1); 1151 } 1152 1153 /* ARGSUSED */ 1154 static int 1155 pmdoc_Fn(MDOC_ARGS) 1156 { 1157 struct mdoc_node *nn; 1158 const char *cp; 1159 1160 nn = n->child; 1161 1162 if (NULL == nn || MDOC_TEXT != nn->type) 1163 return(0); 1164 1165 /* .Fn "struct type *name" "char *arg" */ 1166 1167 cp = strrchr(nn->string, ' '); 1168 if (NULL == cp) 1169 cp = nn->string; 1170 1171 /* Strip away pointer symbol. */ 1172 1173 while ('*' == *cp) 1174 cp++; 1175 1176 /* Store the function name. */ 1177 1178 buf_append(buf, cp); 1179 hash_put(hash, buf, TYPE_Fn); 1180 1181 /* Store the function type. */ 1182 1183 if (nn->string < cp) { 1184 buf->len = 0; 1185 buf_appendb(buf, nn->string, cp - nn->string); 1186 buf_appendb(buf, "", 1); 1187 hash_put(hash, buf, TYPE_Ft); 1188 } 1189 1190 /* Store the arguments. */ 1191 1192 for (nn = nn->next; nn; nn = nn->next) { 1193 if (MDOC_TEXT != nn->type) 1194 continue; 1195 buf->len = 0; 1196 buf_append(buf, nn->string); 1197 hash_put(hash, buf, TYPE_Fa); 1198 } 1199 1200 return(0); 1201 } 1202 1203 /* ARGSUSED */ 1204 static int 1205 pmdoc_St(MDOC_ARGS) 1206 { 1207 1208 if (NULL == n->child || MDOC_TEXT != n->child->type) 1209 return(0); 1210 1211 buf_append(buf, n->child->string); 1212 return(1); 1213 } 1214 1215 /* ARGSUSED */ 1216 static int 1217 pmdoc_Xr(MDOC_ARGS) 1218 { 1219 1220 if (NULL == (n = n->child)) 1221 return(0); 1222 1223 buf_appendb(buf, n->string, strlen(n->string)); 1224 1225 if (NULL != (n = n->next)) { 1226 buf_appendb(buf, ".", 1); 1227 buf_appendb(buf, n->string, strlen(n->string) + 1); 1228 } else 1229 buf_appendb(buf, ".", 2); 1230 1231 return(1); 1232 } 1233 1234 /* ARGSUSED */ 1235 static int 1236 pmdoc_Nd(MDOC_ARGS) 1237 { 1238 1239 if (MDOC_BODY != n->type) 1240 return(0); 1241 1242 buf_appendmdoc(dbuf, n->child, 1); 1243 return(1); 1244 } 1245 1246 /* ARGSUSED */ 1247 static int 1248 pmdoc_Nm(MDOC_ARGS) 1249 { 1250 1251 if (SEC_NAME == n->sec) 1252 return(1); 1253 else if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type) 1254 return(0); 1255 1256 if (NULL == n->child) 1257 buf_append(buf, m->name); 1258 1259 return(1); 1260 } 1261 1262 /* ARGSUSED */ 1263 static int 1264 pmdoc_Sh(MDOC_ARGS) 1265 { 1266 1267 return(SEC_CUSTOM == n->sec && MDOC_HEAD == n->type); 1268 } 1269 1270 static void 1271 hash_put(DB *db, const struct buf *buf, uint64_t mask) 1272 { 1273 uint64_t oldmask; 1274 DBT key, val; 1275 int rc; 1276 1277 if (buf->len < 2) 1278 return; 1279 1280 key.data = buf->cp; 1281 key.size = buf->len; 1282 1283 if ((rc = (*db->get)(db, &key, &val, 0)) < 0) { 1284 perror("hash"); 1285 exit((int)MANDOCLEVEL_SYSERR); 1286 } else if (0 == rc) { 1287 assert(sizeof(uint64_t) == val.size); 1288 memcpy(&oldmask, val.data, val.size); 1289 mask |= oldmask; 1290 } 1291 1292 val.data = &mask; 1293 val.size = sizeof(uint64_t); 1294 1295 if ((rc = (*db->put)(db, &key, &val, 0)) < 0) { 1296 perror("hash"); 1297 exit((int)MANDOCLEVEL_SYSERR); 1298 } 1299 } 1300 1301 static void 1302 dbt_put(DB *db, const char *dbn, DBT *key, DBT *val) 1303 { 1304 1305 assert(key->size); 1306 assert(val->size); 1307 1308 if (0 == (*db->put)(db, key, val, 0)) 1309 return; 1310 1311 perror(dbn); 1312 exit((int)MANDOCLEVEL_SYSERR); 1313 /* NOTREACHED */ 1314 } 1315 1316 /* 1317 * Call out to per-macro handlers after clearing the persistent database 1318 * key. If the macro sets the database key, flush it to the database. 1319 */ 1320 static void 1321 pmdoc_node(MDOC_ARGS) 1322 { 1323 1324 if (NULL == n) 1325 return; 1326 1327 switch (n->type) { 1328 case (MDOC_HEAD): 1329 /* FALLTHROUGH */ 1330 case (MDOC_BODY): 1331 /* FALLTHROUGH */ 1332 case (MDOC_TAIL): 1333 /* FALLTHROUGH */ 1334 case (MDOC_BLOCK): 1335 /* FALLTHROUGH */ 1336 case (MDOC_ELEM): 1337 buf->len = 0; 1338 1339 /* 1340 * Both NULL handlers and handlers returning true 1341 * request using the data. Only skip the element 1342 * when the handler returns false. 1343 */ 1344 1345 if (NULL != mdocs[n->tok].fp && 1346 0 == (*mdocs[n->tok].fp)(hash, buf, dbuf, n, m)) 1347 break; 1348 1349 /* 1350 * For many macros, use the text from all children. 1351 * Set zero flags for macros not needing this. 1352 * In that case, the handler must fill the buffer. 1353 */ 1354 1355 if (MDOCF_CHILD & mdocs[n->tok].flags) 1356 buf_appendmdoc(buf, n->child, 0); 1357 1358 /* 1359 * Cover the most common case: 1360 * Automatically stage one string per element. 1361 * Set a zero mask for macros not needing this. 1362 * Additional staging can be done in the handler. 1363 */ 1364 1365 if (mdocs[n->tok].mask) 1366 hash_put(hash, buf, mdocs[n->tok].mask); 1367 break; 1368 default: 1369 break; 1370 } 1371 1372 pmdoc_node(hash, buf, dbuf, n->child, m); 1373 pmdoc_node(hash, buf, dbuf, n->next, m); 1374 } 1375 1376 static int 1377 pman_node(MAN_ARGS) 1378 { 1379 const struct man_node *head, *body; 1380 char *start, *sv, *title; 1381 size_t sz, titlesz; 1382 1383 if (NULL == n) 1384 return(0); 1385 1386 /* 1387 * We're only searching for one thing: the first text child in 1388 * the BODY of a NAME section. Since we don't keep track of 1389 * sections in -man, run some hoops to find out whether we're in 1390 * the correct section or not. 1391 */ 1392 1393 if (MAN_BODY == n->type && MAN_SH == n->tok) { 1394 body = n; 1395 assert(body->parent); 1396 if (NULL != (head = body->parent->head) && 1397 1 == head->nchild && 1398 NULL != (head = (head->child)) && 1399 MAN_TEXT == head->type && 1400 0 == strcmp(head->string, "NAME") && 1401 NULL != (body = body->child) && 1402 MAN_TEXT == body->type) { 1403 1404 title = NULL; 1405 titlesz = 0; 1406 /* 1407 * Suck the entire NAME section into memory. 1408 * Yes, we might run away. 1409 * But too many manuals have big, spread-out 1410 * NAME sections over many lines. 1411 */ 1412 for ( ; NULL != body; body = body->next) { 1413 if (MAN_TEXT != body->type) 1414 break; 1415 if (0 == (sz = strlen(body->string))) 1416 continue; 1417 title = mandoc_realloc 1418 (title, titlesz + sz + 1); 1419 memcpy(title + titlesz, body->string, sz); 1420 titlesz += sz + 1; 1421 title[(int)titlesz - 1] = ' '; 1422 } 1423 if (NULL == title) 1424 return(0); 1425 1426 title = mandoc_realloc(title, titlesz + 1); 1427 title[(int)titlesz] = '\0'; 1428 1429 /* Skip leading space. */ 1430 1431 sv = title; 1432 while (isspace((unsigned char)*sv)) 1433 sv++; 1434 1435 if (0 == (sz = strlen(sv))) { 1436 free(title); 1437 return(0); 1438 } 1439 1440 /* Erase trailing space. */ 1441 1442 start = &sv[sz - 1]; 1443 while (start > sv && isspace((unsigned char)*start)) 1444 *start-- = '\0'; 1445 1446 if (start == sv) { 1447 free(title); 1448 return(0); 1449 } 1450 1451 start = sv; 1452 1453 /* 1454 * Go through a special heuristic dance here. 1455 * This is why -man manuals are great! 1456 * (I'm being sarcastic: my eyes are bleeding.) 1457 * Conventionally, one or more manual names are 1458 * comma-specified prior to a whitespace, then a 1459 * dash, then a description. Try to puzzle out 1460 * the name parts here. 1461 */ 1462 1463 for ( ;; ) { 1464 sz = strcspn(start, " ,"); 1465 if ('\0' == start[(int)sz]) 1466 break; 1467 1468 buf->len = 0; 1469 buf_appendb(buf, start, sz); 1470 buf_appendb(buf, "", 1); 1471 1472 hash_put(hash, buf, TYPE_Nm); 1473 1474 if (' ' == start[(int)sz]) { 1475 start += (int)sz + 1; 1476 break; 1477 } 1478 1479 assert(',' == start[(int)sz]); 1480 start += (int)sz + 1; 1481 while (' ' == *start) 1482 start++; 1483 } 1484 1485 buf->len = 0; 1486 1487 if (sv == start) { 1488 buf_append(buf, start); 1489 free(title); 1490 return(1); 1491 } 1492 1493 while (isspace((unsigned char)*start)) 1494 start++; 1495 1496 if (0 == strncmp(start, "-", 1)) 1497 start += 1; 1498 else if (0 == strncmp(start, "\\-\\-", 4)) 1499 start += 4; 1500 else if (0 == strncmp(start, "\\-", 2)) 1501 start += 2; 1502 else if (0 == strncmp(start, "\\(en", 4)) 1503 start += 4; 1504 else if (0 == strncmp(start, "\\(em", 4)) 1505 start += 4; 1506 1507 while (' ' == *start) 1508 start++; 1509 1510 sz = strlen(start) + 1; 1511 buf_appendb(dbuf, start, sz); 1512 buf_appendb(buf, start, sz); 1513 1514 hash_put(hash, buf, TYPE_Nd); 1515 free(title); 1516 } 1517 } 1518 1519 for (n = n->child; n; n = n->next) 1520 if (pman_node(hash, buf, dbuf, n)) 1521 return(1); 1522 1523 return(0); 1524 } 1525 1526 /* 1527 * Parse a formatted manual page. 1528 * By necessity, this involves rather crude guesswork. 1529 */ 1530 static void 1531 pformatted(DB *hash, struct buf *buf, 1532 struct buf *dbuf, const struct of *of) 1533 { 1534 FILE *stream; 1535 char *line, *p, *title; 1536 size_t len, plen, titlesz; 1537 1538 if (NULL == (stream = fopen(of->fname, "r"))) { 1539 if (warnings) 1540 perror(of->fname); 1541 return; 1542 } 1543 1544 /* 1545 * Always use the title derived from the filename up front, 1546 * do not even try to find it in the file. This also makes 1547 * sure we don't end up with an orphan index record, even if 1548 * the file content turns out to be completely unintelligible. 1549 */ 1550 1551 buf->len = 0; 1552 buf_append(buf, of->title); 1553 hash_put(hash, buf, TYPE_Nm); 1554 1555 /* Skip to first blank line. */ 1556 1557 while (NULL != (line = fgetln(stream, &len))) 1558 if ('\n' == *line) 1559 break; 1560 1561 /* 1562 * Assume the first line that is not indented 1563 * is the first section header. Skip to it. 1564 */ 1565 1566 while (NULL != (line = fgetln(stream, &len))) 1567 if ('\n' != *line && ' ' != *line) 1568 break; 1569 1570 /* 1571 * Read up until the next section into a buffer. 1572 * Strip the leading and trailing newline from each read line, 1573 * appending a trailing space. 1574 * Ignore empty (whitespace-only) lines. 1575 */ 1576 1577 titlesz = 0; 1578 title = NULL; 1579 1580 while (NULL != (line = fgetln(stream, &len))) { 1581 if (' ' != *line || '\n' != line[(int)len - 1]) 1582 break; 1583 while (len > 0 && isspace((unsigned char)*line)) { 1584 line++; 1585 len--; 1586 } 1587 if (1 == len) 1588 continue; 1589 title = mandoc_realloc(title, titlesz + len); 1590 memcpy(title + titlesz, line, len); 1591 titlesz += len; 1592 title[(int)titlesz - 1] = ' '; 1593 } 1594 1595 1596 /* 1597 * If no page content can be found, or the input line 1598 * is already the next section header, or there is no 1599 * trailing newline, reuse the page title as the page 1600 * description. 1601 */ 1602 1603 if (NULL == title || '\0' == *title) { 1604 if (warnings) 1605 fprintf(stderr, "%s: cannot find NAME section\n", 1606 of->fname); 1607 buf_appendb(dbuf, buf->cp, buf->size); 1608 hash_put(hash, buf, TYPE_Nd); 1609 fclose(stream); 1610 free(title); 1611 return; 1612 } 1613 1614 title = mandoc_realloc(title, titlesz + 1); 1615 title[(int)titlesz] = '\0'; 1616 1617 /* 1618 * Skip to the first dash. 1619 * Use the remaining line as the description (no more than 70 1620 * bytes). 1621 */ 1622 1623 if (NULL != (p = strstr(title, "- "))) { 1624 for (p += 2; ' ' == *p || '\b' == *p; p++) 1625 /* Skip to next word. */ ; 1626 } else { 1627 if (warnings) 1628 fprintf(stderr, "%s: no dash in title line\n", 1629 of->fname); 1630 p = title; 1631 } 1632 1633 plen = strlen(p); 1634 1635 /* Strip backspace-encoding from line. */ 1636 1637 while (NULL != (line = memchr(p, '\b', plen))) { 1638 len = line - p; 1639 if (0 == len) { 1640 memmove(line, line + 1, plen--); 1641 continue; 1642 } 1643 memmove(line - 1, line + 1, plen - len); 1644 plen -= 2; 1645 } 1646 1647 buf_appendb(dbuf, p, plen + 1); 1648 buf->len = 0; 1649 buf_appendb(buf, p, plen + 1); 1650 hash_put(hash, buf, TYPE_Nd); 1651 fclose(stream); 1652 free(title); 1653 } 1654 1655 static void 1656 ofile_argbuild(int argc, char *argv[], struct of **of, 1657 const char *basedir) 1658 { 1659 char buf[PATH_MAX]; 1660 char pbuf[PATH_MAX]; 1661 const char *sec, *arch, *title; 1662 char *relpath, *p; 1663 int i, src_form; 1664 struct of *nof; 1665 1666 for (i = 0; i < argc; i++) { 1667 if (NULL == (relpath = realpath(argv[i], pbuf))) { 1668 perror(argv[i]); 1669 continue; 1670 } 1671 if (NULL != basedir) { 1672 if (strstr(pbuf, basedir) != pbuf) { 1673 fprintf(stderr, "%s: file outside " 1674 "base directory %s\n", 1675 pbuf, basedir); 1676 continue; 1677 } 1678 relpath = pbuf + strlen(basedir); 1679 } 1680 1681 /* 1682 * Try to infer the manual section, architecture and 1683 * page title from the path, assuming it looks like 1684 * man*[/<arch>]/<title>.<section> or 1685 * cat<section>[/<arch>]/<title>.0 1686 */ 1687 1688 if (strlcpy(buf, relpath, sizeof(buf)) >= sizeof(buf)) { 1689 fprintf(stderr, "%s: path too long\n", relpath); 1690 continue; 1691 } 1692 sec = arch = title = ""; 1693 src_form = 0; 1694 p = strrchr(buf, '\0'); 1695 while (p-- > buf) { 1696 if ('\0' == *sec && '.' == *p) { 1697 sec = p + 1; 1698 *p = '\0'; 1699 if ('0' == *sec) 1700 src_form |= MANDOC_FORM; 1701 else if ('1' <= *sec && '9' >= *sec) 1702 src_form |= MANDOC_SRC; 1703 continue; 1704 } 1705 if ('/' != *p) 1706 continue; 1707 if ('\0' == *title) { 1708 title = p + 1; 1709 *p = '\0'; 1710 continue; 1711 } 1712 if (0 == strncmp("man", p + 1, 3)) 1713 src_form |= MANDOC_SRC; 1714 else if (0 == strncmp("cat", p + 1, 3)) 1715 src_form |= MANDOC_FORM; 1716 else 1717 arch = p + 1; 1718 break; 1719 } 1720 if ('\0' == *title) { 1721 if (warnings) 1722 fprintf(stderr, 1723 "%s: cannot deduce title " 1724 "from filename\n", 1725 relpath); 1726 title = buf; 1727 } 1728 1729 /* 1730 * Build the file structure. 1731 */ 1732 1733 nof = mandoc_calloc(1, sizeof(struct of)); 1734 nof->fname = mandoc_strdup(relpath); 1735 nof->sec = mandoc_strdup(sec); 1736 nof->arch = mandoc_strdup(arch); 1737 nof->title = mandoc_strdup(title); 1738 nof->src_form = src_form; 1739 1740 /* 1741 * Add the structure to the list. 1742 */ 1743 1744 if (NULL == *of) { 1745 *of = nof; 1746 (*of)->first = nof; 1747 } else { 1748 nof->first = (*of)->first; 1749 (*of)->next = nof; 1750 *of = nof; 1751 } 1752 } 1753 } 1754 1755 /* 1756 * Recursively build up a list of files to parse. 1757 * We use this instead of ftw() and so on because I don't want global 1758 * variables hanging around. 1759 * This ignores the mandoc.db and mandoc.index files, but assumes that 1760 * everything else is a manual. 1761 * Pass in a pointer to a NULL structure for the first invocation. 1762 */ 1763 static void 1764 ofile_dirbuild(const char *dir, const char* psec, const char *parch, 1765 int p_src_form, struct of **of) 1766 { 1767 char buf[PATH_MAX]; 1768 #if defined(__sun) 1769 struct stat sb; 1770 #endif 1771 size_t sz; 1772 DIR *d; 1773 const char *fn, *sec, *arch; 1774 char *p, *q, *suffix; 1775 struct of *nof; 1776 struct dirent *dp; 1777 int src_form; 1778 1779 if (NULL == (d = opendir(dir))) { 1780 if (warnings) 1781 perror(dir); 1782 return; 1783 } 1784 1785 while (NULL != (dp = readdir(d))) { 1786 fn = dp->d_name; 1787 1788 if ('.' == *fn) 1789 continue; 1790 1791 src_form = p_src_form; 1792 1793 #if defined(__sun) 1794 stat(dp->d_name, &sb); 1795 if (S_IFDIR & sb.st_mode) { 1796 #else 1797 if (DT_DIR == dp->d_type) { 1798 #endif 1799 sec = psec; 1800 arch = parch; 1801 1802 /* 1803 * By default, only use directories called: 1804 * man<section>/[<arch>/] or 1805 * cat<section>/[<arch>/] 1806 */ 1807 1808 if ('\0' == *sec) { 1809 if(0 == strncmp("man", fn, 3)) { 1810 src_form |= MANDOC_SRC; 1811 sec = fn + 3; 1812 } else if (0 == strncmp("cat", fn, 3)) { 1813 src_form |= MANDOC_FORM; 1814 sec = fn + 3; 1815 } else { 1816 if (warnings) fprintf(stderr, 1817 "%s/%s: bad section\n", 1818 dir, fn); 1819 if (use_all) 1820 sec = fn; 1821 else 1822 continue; 1823 } 1824 } else if ('\0' == *arch) { 1825 if (NULL != strchr(fn, '.')) { 1826 if (warnings) fprintf(stderr, 1827 "%s/%s: bad architecture\n", 1828 dir, fn); 1829 if (0 == use_all) 1830 continue; 1831 } 1832 arch = fn; 1833 } else { 1834 if (warnings) fprintf(stderr, "%s/%s: " 1835 "excessive subdirectory\n", dir, fn); 1836 if (0 == use_all) 1837 continue; 1838 } 1839 1840 buf[0] = '\0'; 1841 strlcat(buf, dir, PATH_MAX); 1842 strlcat(buf, "/", PATH_MAX); 1843 sz = strlcat(buf, fn, PATH_MAX); 1844 1845 if (PATH_MAX <= sz) { 1846 if (warnings) fprintf(stderr, "%s/%s: " 1847 "path too long\n", dir, fn); 1848 continue; 1849 } 1850 1851 ofile_dirbuild(buf, sec, arch, src_form, of); 1852 continue; 1853 } 1854 1855 #if defined(__sun) 1856 if (0 == S_IFREG & sb.st_mode) { 1857 #else 1858 if (DT_REG != dp->d_type) { 1859 #endif 1860 if (warnings) 1861 fprintf(stderr, 1862 "%s/%s: not a regular file\n", 1863 dir, fn); 1864 continue; 1865 } 1866 if (!strcmp(MANDOC_DB, fn) || !strcmp(MANDOC_IDX, fn)) 1867 continue; 1868 if ('\0' == *psec) { 1869 if (warnings) 1870 fprintf(stderr, 1871 "%s/%s: file outside section\n", 1872 dir, fn); 1873 if (0 == use_all) 1874 continue; 1875 } 1876 1877 /* 1878 * By default, skip files where the file name suffix 1879 * does not agree with the section directory 1880 * they are located in. 1881 */ 1882 1883 suffix = strrchr(fn, '.'); 1884 if (NULL == suffix) { 1885 if (warnings) 1886 fprintf(stderr, 1887 "%s/%s: no filename suffix\n", 1888 dir, fn); 1889 if (0 == use_all) 1890 continue; 1891 } else if ((MANDOC_SRC & src_form && 1892 strcmp(suffix + 1, psec)) || 1893 (MANDOC_FORM & src_form && 1894 strcmp(suffix + 1, "0"))) { 1895 if (warnings) 1896 fprintf(stderr, 1897 "%s/%s: wrong filename suffix\n", 1898 dir, fn); 1899 if (0 == use_all) 1900 continue; 1901 if ('0' == suffix[1]) 1902 src_form |= MANDOC_FORM; 1903 else if ('1' <= suffix[1] && '9' >= suffix[1]) 1904 src_form |= MANDOC_SRC; 1905 } 1906 1907 /* 1908 * Skip formatted manuals if a source version is 1909 * available. Ignore the age: it is very unlikely 1910 * that people install newer formatted base manuals 1911 * when they used to have source manuals before, 1912 * and in ports, old manuals get removed on update. 1913 */ 1914 if (0 == use_all && MANDOC_FORM & src_form && 1915 '\0' != *psec) { 1916 buf[0] = '\0'; 1917 strlcat(buf, dir, PATH_MAX); 1918 p = strrchr(buf, '/'); 1919 if ('\0' != *parch && NULL != p) 1920 for (p--; p > buf; p--) 1921 if ('/' == *p) 1922 break; 1923 if (NULL == p) 1924 p = buf; 1925 else 1926 p++; 1927 if (0 == strncmp("cat", p, 3)) 1928 memcpy(p, "man", 3); 1929 strlcat(buf, "/", PATH_MAX); 1930 sz = strlcat(buf, fn, PATH_MAX); 1931 if (sz >= PATH_MAX) { 1932 if (warnings) fprintf(stderr, 1933 "%s/%s: path too long\n", 1934 dir, fn); 1935 continue; 1936 } 1937 q = strrchr(buf, '.'); 1938 if (NULL != q && p < q++) { 1939 *q = '\0'; 1940 sz = strlcat(buf, psec, PATH_MAX); 1941 if (sz >= PATH_MAX) { 1942 if (warnings) fprintf(stderr, 1943 "%s/%s: path too long\n", 1944 dir, fn); 1945 continue; 1946 } 1947 if (0 == access(buf, R_OK)) 1948 continue; 1949 } 1950 } 1951 1952 buf[0] = '\0'; 1953 assert('.' == dir[0]); 1954 if ('/' == dir[1]) { 1955 strlcat(buf, dir + 2, PATH_MAX); 1956 strlcat(buf, "/", PATH_MAX); 1957 } 1958 sz = strlcat(buf, fn, PATH_MAX); 1959 if (sz >= PATH_MAX) { 1960 if (warnings) fprintf(stderr, 1961 "%s/%s: path too long\n", dir, fn); 1962 continue; 1963 } 1964 1965 nof = mandoc_calloc(1, sizeof(struct of)); 1966 nof->fname = mandoc_strdup(buf); 1967 nof->sec = mandoc_strdup(psec); 1968 nof->arch = mandoc_strdup(parch); 1969 nof->src_form = src_form; 1970 1971 /* 1972 * Remember the file name without the extension, 1973 * to be used as the page title in the database. 1974 */ 1975 1976 if (NULL != suffix) 1977 *suffix = '\0'; 1978 nof->title = mandoc_strdup(fn); 1979 1980 /* 1981 * Add the structure to the list. 1982 */ 1983 1984 if (NULL == *of) { 1985 *of = nof; 1986 (*of)->first = nof; 1987 } else { 1988 nof->first = (*of)->first; 1989 (*of)->next = nof; 1990 *of = nof; 1991 } 1992 } 1993 1994 closedir(d); 1995 } 1996 1997 static void 1998 ofile_free(struct of *of) 1999 { 2000 struct of *nof; 2001 2002 if (NULL != of) 2003 of = of->first; 2004 2005 while (NULL != of) { 2006 nof = of->next; 2007 free(of->fname); 2008 free(of->sec); 2009 free(of->arch); 2010 free(of->title); 2011 free(of); 2012 of = nof; 2013 } 2014 } 2015