1 /* $Vendor-Id: mandocdb.c,v 1.43 2011/12/31 18:47:52 kristaps Exp $ */ 2 /* 3 * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #ifdef HAVE_CONFIG_H 19 #include "config.h" 20 #endif 21 22 #include <sys/param.h> 23 #include <sys/types.h> 24 25 #include <assert.h> 26 #include <ctype.h> 27 #include <dirent.h> 28 #include <fcntl.h> 29 #include <getopt.h> 30 #include <stdio.h> 31 #include <stdint.h> 32 #include <stdlib.h> 33 #include <string.h> 34 #include <unistd.h> 35 36 #if defined(__linux__) 37 # include <endian.h> 38 # include <db_185.h> 39 #elif defined(__APPLE__) 40 # include <libkern/OSByteOrder.h> 41 # include <db.h> 42 #else 43 # include <db.h> 44 #endif 45 46 #include "man.h" 47 #include "mdoc.h" 48 #include "mandoc.h" 49 #include "mandocdb.h" 50 #include "manpath.h" 51 52 #define MANDOC_BUFSZ BUFSIZ 53 #define MANDOC_SLOP 1024 54 55 #define MANDOC_SRC 0x1 56 #define MANDOC_FORM 0x2 57 58 /* Access to the mandoc database on disk. */ 59 60 struct mdb { 61 char idxn[MAXPATHLEN]; /* index db filename */ 62 char dbn[MAXPATHLEN]; /* keyword db filename */ 63 DB *idx; /* index recno database */ 64 DB *db; /* keyword btree database */ 65 }; 66 67 /* Stack of temporarily unused index records. */ 68 69 struct recs { 70 recno_t *stack; /* pointer to a malloc'ed array */ 71 size_t size; /* number of allocated slots */ 72 size_t cur; /* current number of empty records */ 73 recno_t last; /* last record number in the index */ 74 }; 75 76 /* Tiny list for files. No need to bring in QUEUE. */ 77 78 struct of { 79 char *fname; /* heap-allocated */ 80 char *sec; 81 char *arch; 82 char *title; 83 int src_form; 84 struct of *next; /* NULL for last one */ 85 struct of *first; /* first in list */ 86 }; 87 88 /* Buffer for storing growable data. */ 89 90 struct buf { 91 char *cp; 92 size_t len; /* current length */ 93 size_t size; /* total buffer size */ 94 }; 95 96 /* Operation we're going to perform. */ 97 98 enum op { 99 OP_DEFAULT = 0, /* new dbs from dir list or default config */ 100 OP_CONFFILE, /* new databases from custom config file */ 101 OP_UPDATE, /* delete/add entries in existing database */ 102 OP_DELETE, /* delete entries from existing database */ 103 OP_TEST /* change no databases, report potential problems */ 104 }; 105 106 #define MAN_ARGS DB *hash, \ 107 struct buf *buf, \ 108 struct buf *dbuf, \ 109 const struct man_node *n 110 #define MDOC_ARGS DB *hash, \ 111 struct buf *buf, \ 112 struct buf *dbuf, \ 113 const struct mdoc_node *n, \ 114 const struct mdoc_meta *m 115 116 static void buf_appendmdoc(struct buf *, 117 const struct mdoc_node *, int); 118 static void buf_append(struct buf *, const char *); 119 static void buf_appendb(struct buf *, 120 const void *, size_t); 121 static void dbt_put(DB *, const char *, DBT *, DBT *); 122 static void hash_put(DB *, const struct buf *, uint64_t); 123 static void hash_reset(DB **); 124 static void index_merge(const struct of *, struct mparse *, 125 struct buf *, struct buf *, DB *, 126 struct mdb *, struct recs *); 127 static void index_prune(const struct of *, struct mdb *, 128 struct recs *); 129 static void ofile_argbuild(int, char *[], struct of **); 130 static void ofile_dirbuild(const char *, const char *, 131 const char *, int, struct of **); 132 static void ofile_free(struct of *); 133 static void pformatted(DB *, struct buf *, 134 struct buf *, const struct of *); 135 static int pman_node(MAN_ARGS); 136 static void pmdoc_node(MDOC_ARGS); 137 static int pmdoc_head(MDOC_ARGS); 138 static int pmdoc_body(MDOC_ARGS); 139 static int pmdoc_Fd(MDOC_ARGS); 140 static int pmdoc_In(MDOC_ARGS); 141 static int pmdoc_Fn(MDOC_ARGS); 142 static int pmdoc_Nd(MDOC_ARGS); 143 static int pmdoc_Nm(MDOC_ARGS); 144 static int pmdoc_Sh(MDOC_ARGS); 145 static int pmdoc_St(MDOC_ARGS); 146 static int pmdoc_Xr(MDOC_ARGS); 147 148 #define MDOCF_CHILD 0x01 /* Automatically index child nodes. */ 149 150 struct mdoc_handler { 151 int (*fp)(MDOC_ARGS); /* Optional handler. */ 152 uint64_t mask; /* Set unless handler returns 0. */ 153 int flags; /* For use by pmdoc_node. */ 154 }; 155 156 static const struct mdoc_handler mdocs[MDOC_MAX] = { 157 { NULL, 0, 0 }, /* Ap */ 158 { NULL, 0, 0 }, /* Dd */ 159 { NULL, 0, 0 }, /* Dt */ 160 { NULL, 0, 0 }, /* Os */ 161 { pmdoc_Sh, TYPE_Sh, MDOCF_CHILD }, /* Sh */ 162 { pmdoc_head, TYPE_Ss, MDOCF_CHILD }, /* Ss */ 163 { NULL, 0, 0 }, /* Pp */ 164 { NULL, 0, 0 }, /* D1 */ 165 { NULL, 0, 0 }, /* Dl */ 166 { NULL, 0, 0 }, /* Bd */ 167 { NULL, 0, 0 }, /* Ed */ 168 { NULL, 0, 0 }, /* Bl */ 169 { NULL, 0, 0 }, /* El */ 170 { NULL, 0, 0 }, /* It */ 171 { NULL, 0, 0 }, /* Ad */ 172 { NULL, TYPE_An, MDOCF_CHILD }, /* An */ 173 { NULL, TYPE_Ar, MDOCF_CHILD }, /* Ar */ 174 { NULL, TYPE_Cd, MDOCF_CHILD }, /* Cd */ 175 { NULL, TYPE_Cm, MDOCF_CHILD }, /* Cm */ 176 { NULL, TYPE_Dv, MDOCF_CHILD }, /* Dv */ 177 { NULL, TYPE_Er, MDOCF_CHILD }, /* Er */ 178 { NULL, TYPE_Ev, MDOCF_CHILD }, /* Ev */ 179 { NULL, 0, 0 }, /* Ex */ 180 { NULL, TYPE_Fa, MDOCF_CHILD }, /* Fa */ 181 { pmdoc_Fd, TYPE_In, 0 }, /* Fd */ 182 { NULL, TYPE_Fl, MDOCF_CHILD }, /* Fl */ 183 { pmdoc_Fn, 0, 0 }, /* Fn */ 184 { NULL, TYPE_Ft, MDOCF_CHILD }, /* Ft */ 185 { NULL, TYPE_Ic, MDOCF_CHILD }, /* Ic */ 186 { pmdoc_In, TYPE_In, 0 }, /* In */ 187 { NULL, TYPE_Li, MDOCF_CHILD }, /* Li */ 188 { pmdoc_Nd, TYPE_Nd, MDOCF_CHILD }, /* Nd */ 189 { pmdoc_Nm, TYPE_Nm, MDOCF_CHILD }, /* Nm */ 190 { NULL, 0, 0 }, /* Op */ 191 { NULL, 0, 0 }, /* Ot */ 192 { NULL, TYPE_Pa, MDOCF_CHILD }, /* Pa */ 193 { NULL, 0, 0 }, /* Rv */ 194 { pmdoc_St, TYPE_St, 0 }, /* St */ 195 { NULL, TYPE_Va, MDOCF_CHILD }, /* Va */ 196 { pmdoc_body, TYPE_Va, MDOCF_CHILD }, /* Vt */ 197 { pmdoc_Xr, TYPE_Xr, 0 }, /* Xr */ 198 { NULL, 0, 0 }, /* %A */ 199 { NULL, 0, 0 }, /* %B */ 200 { NULL, 0, 0 }, /* %D */ 201 { NULL, 0, 0 }, /* %I */ 202 { NULL, 0, 0 }, /* %J */ 203 { NULL, 0, 0 }, /* %N */ 204 { NULL, 0, 0 }, /* %O */ 205 { NULL, 0, 0 }, /* %P */ 206 { NULL, 0, 0 }, /* %R */ 207 { NULL, 0, 0 }, /* %T */ 208 { NULL, 0, 0 }, /* %V */ 209 { NULL, 0, 0 }, /* Ac */ 210 { NULL, 0, 0 }, /* Ao */ 211 { NULL, 0, 0 }, /* Aq */ 212 { NULL, TYPE_At, MDOCF_CHILD }, /* At */ 213 { NULL, 0, 0 }, /* Bc */ 214 { NULL, 0, 0 }, /* Bf */ 215 { NULL, 0, 0 }, /* Bo */ 216 { NULL, 0, 0 }, /* Bq */ 217 { NULL, TYPE_Bsx, MDOCF_CHILD }, /* Bsx */ 218 { NULL, TYPE_Bx, MDOCF_CHILD }, /* Bx */ 219 { NULL, 0, 0 }, /* Db */ 220 { NULL, 0, 0 }, /* Dc */ 221 { NULL, 0, 0 }, /* Do */ 222 { NULL, 0, 0 }, /* Dq */ 223 { NULL, 0, 0 }, /* Ec */ 224 { NULL, 0, 0 }, /* Ef */ 225 { NULL, TYPE_Em, MDOCF_CHILD }, /* Em */ 226 { NULL, 0, 0 }, /* Eo */ 227 { NULL, TYPE_Fx, MDOCF_CHILD }, /* Fx */ 228 { NULL, TYPE_Ms, MDOCF_CHILD }, /* Ms */ 229 { NULL, 0, 0 }, /* No */ 230 { NULL, 0, 0 }, /* Ns */ 231 { NULL, TYPE_Nx, MDOCF_CHILD }, /* Nx */ 232 { NULL, TYPE_Ox, MDOCF_CHILD }, /* Ox */ 233 { NULL, 0, 0 }, /* Pc */ 234 { NULL, 0, 0 }, /* Pf */ 235 { NULL, 0, 0 }, /* Po */ 236 { NULL, 0, 0 }, /* Pq */ 237 { NULL, 0, 0 }, /* Qc */ 238 { NULL, 0, 0 }, /* Ql */ 239 { NULL, 0, 0 }, /* Qo */ 240 { NULL, 0, 0 }, /* Qq */ 241 { NULL, 0, 0 }, /* Re */ 242 { NULL, 0, 0 }, /* Rs */ 243 { NULL, 0, 0 }, /* Sc */ 244 { NULL, 0, 0 }, /* So */ 245 { NULL, 0, 0 }, /* Sq */ 246 { NULL, 0, 0 }, /* Sm */ 247 { NULL, 0, 0 }, /* Sx */ 248 { NULL, TYPE_Sy, MDOCF_CHILD }, /* Sy */ 249 { NULL, TYPE_Tn, MDOCF_CHILD }, /* Tn */ 250 { NULL, 0, 0 }, /* Ux */ 251 { NULL, 0, 0 }, /* Xc */ 252 { NULL, 0, 0 }, /* Xo */ 253 { pmdoc_head, TYPE_Fn, 0 }, /* Fo */ 254 { NULL, 0, 0 }, /* Fc */ 255 { NULL, 0, 0 }, /* Oo */ 256 { NULL, 0, 0 }, /* Oc */ 257 { NULL, 0, 0 }, /* Bk */ 258 { NULL, 0, 0 }, /* Ek */ 259 { NULL, 0, 0 }, /* Bt */ 260 { NULL, 0, 0 }, /* Hf */ 261 { NULL, 0, 0 }, /* Fr */ 262 { NULL, 0, 0 }, /* Ud */ 263 { NULL, TYPE_Lb, MDOCF_CHILD }, /* Lb */ 264 { NULL, 0, 0 }, /* Lp */ 265 { NULL, TYPE_Lk, MDOCF_CHILD }, /* Lk */ 266 { NULL, TYPE_Mt, MDOCF_CHILD }, /* Mt */ 267 { NULL, 0, 0 }, /* Brq */ 268 { NULL, 0, 0 }, /* Bro */ 269 { NULL, 0, 0 }, /* Brc */ 270 { NULL, 0, 0 }, /* %C */ 271 { NULL, 0, 0 }, /* Es */ 272 { NULL, 0, 0 }, /* En */ 273 { NULL, TYPE_Dx, MDOCF_CHILD }, /* Dx */ 274 { NULL, 0, 0 }, /* %Q */ 275 { NULL, 0, 0 }, /* br */ 276 { NULL, 0, 0 }, /* sp */ 277 { NULL, 0, 0 }, /* %U */ 278 { NULL, 0, 0 }, /* Ta */ 279 }; 280 281 static const char *progname; 282 static int use_all; /* Use all directories and files. */ 283 static int verb; /* Output verbosity level. */ 284 static int warnings; /* Potential problems in manuals. */ 285 286 int 287 main(int argc, char *argv[]) 288 { 289 struct mparse *mp; /* parse sequence */ 290 struct manpaths dirs; 291 struct mdb mdb; 292 struct recs recs; 293 enum op op; /* current operation */ 294 const char *dir; 295 char *cp; 296 char pbuf[PATH_MAX]; 297 int ch, i, flags; 298 DB *hash; /* temporary keyword hashtable */ 299 BTREEINFO info; /* btree configuration */ 300 size_t sz1, sz2; 301 struct buf buf, /* keyword buffer */ 302 dbuf; /* description buffer */ 303 struct of *of; /* list of files for processing */ 304 extern int optind; 305 extern char *optarg; 306 307 progname = strrchr(argv[0], '/'); 308 if (progname == NULL) 309 progname = argv[0]; 310 else 311 ++progname; 312 313 memset(&dirs, 0, sizeof(struct manpaths)); 314 memset(&mdb, 0, sizeof(struct mdb)); 315 memset(&recs, 0, sizeof(struct recs)); 316 317 of = NULL; 318 mp = NULL; 319 hash = NULL; 320 op = OP_DEFAULT; 321 dir = NULL; 322 323 while (-1 != (ch = getopt(argc, argv, "aC:d:tu:vW"))) 324 switch (ch) { 325 case ('a'): 326 use_all = 1; 327 break; 328 case ('C'): 329 if (op) { 330 fprintf(stderr, 331 "-C: conflicting options\n"); 332 goto usage; 333 } 334 dir = optarg; 335 op = OP_CONFFILE; 336 break; 337 case ('d'): 338 if (op) { 339 fprintf(stderr, 340 "-d: conflicting options\n"); 341 goto usage; 342 } 343 dir = optarg; 344 op = OP_UPDATE; 345 break; 346 case ('t'): 347 dup2(STDOUT_FILENO, STDERR_FILENO); 348 if (op) { 349 fprintf(stderr, 350 "-t: conflicting options\n"); 351 goto usage; 352 } 353 op = OP_TEST; 354 use_all = 1; 355 warnings = 1; 356 break; 357 case ('u'): 358 if (op) { 359 fprintf(stderr, 360 "-u: conflicting options\n"); 361 goto usage; 362 } 363 dir = optarg; 364 op = OP_DELETE; 365 break; 366 case ('v'): 367 verb++; 368 break; 369 case ('W'): 370 warnings = 1; 371 break; 372 default: 373 goto usage; 374 } 375 376 argc -= optind; 377 argv += optind; 378 379 if (OP_CONFFILE == op && argc > 0) { 380 fprintf(stderr, "-C: too many arguments\n"); 381 goto usage; 382 } 383 384 memset(&info, 0, sizeof(BTREEINFO)); 385 info.flags = R_DUP; 386 387 mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL); 388 389 memset(&buf, 0, sizeof(struct buf)); 390 memset(&dbuf, 0, sizeof(struct buf)); 391 392 buf.size = dbuf.size = MANDOC_BUFSZ; 393 394 buf.cp = mandoc_malloc(buf.size); 395 dbuf.cp = mandoc_malloc(dbuf.size); 396 397 flags = O_CREAT | O_RDWR; 398 if (OP_DEFAULT == op || OP_CONFFILE == op) 399 flags |= O_TRUNC; 400 401 if (OP_TEST == op) { 402 ofile_argbuild(argc, argv, &of); 403 if (NULL == of) 404 goto out; 405 index_merge(of, mp, &dbuf, &buf, hash, &mdb, &recs); 406 goto out; 407 } 408 409 if (OP_UPDATE == op || OP_DELETE == op) { 410 strlcat(mdb.dbn, dir, MAXPATHLEN); 411 strlcat(mdb.dbn, "/", MAXPATHLEN); 412 sz1 = strlcat(mdb.dbn, MANDOC_DB, MAXPATHLEN); 413 414 strlcat(mdb.idxn, dir, MAXPATHLEN); 415 strlcat(mdb.idxn, "/", MAXPATHLEN); 416 sz2 = strlcat(mdb.idxn, MANDOC_IDX, MAXPATHLEN); 417 418 if (sz1 >= MAXPATHLEN || sz2 >= MAXPATHLEN) { 419 fprintf(stderr, "%s: path too long\n", dir); 420 exit((int)MANDOCLEVEL_BADARG); 421 } 422 423 mdb.db = dbopen(mdb.dbn, flags, 0644, DB_BTREE, &info); 424 mdb.idx = dbopen(mdb.idxn, flags, 0644, DB_RECNO, NULL); 425 426 if (NULL == mdb.db) { 427 perror(mdb.dbn); 428 exit((int)MANDOCLEVEL_SYSERR); 429 } else if (NULL == mdb.idx) { 430 perror(mdb.idxn); 431 exit((int)MANDOCLEVEL_SYSERR); 432 } 433 434 ofile_argbuild(argc, argv, &of); 435 436 if (NULL == of) 437 goto out; 438 439 index_prune(of, &mdb, &recs); 440 441 /* 442 * Go to the root of the respective manual tree. 443 * This must work or no manuals may be found (they're 444 * indexed relative to the root). 445 */ 446 447 if (OP_UPDATE == op) { 448 if (-1 == chdir(dir)) { 449 perror(dir); 450 exit((int)MANDOCLEVEL_SYSERR); 451 } 452 index_merge(of, mp, &dbuf, &buf, hash, 453 &mdb, &recs); 454 } 455 456 goto out; 457 } 458 459 /* 460 * Configure the directories we're going to scan. 461 * If we have command-line arguments, use them. 462 * If not, we use man(1)'s method (see mandocdb.8). 463 */ 464 465 if (argc > 0) { 466 dirs.paths = mandoc_calloc(argc, sizeof(char *)); 467 dirs.sz = argc; 468 for (i = 0; i < argc; i++) { 469 if (NULL == (cp = realpath(argv[i], pbuf))) { 470 perror(argv[i]); 471 goto out; 472 } 473 dirs.paths[i] = mandoc_strdup(cp); 474 } 475 } else 476 manpath_parse(&dirs, dir, NULL, NULL); 477 478 for (i = 0; i < dirs.sz; i++) { 479 mdb.idxn[0] = mdb.dbn[0] = '\0'; 480 481 strlcat(mdb.dbn, dirs.paths[i], MAXPATHLEN); 482 strlcat(mdb.dbn, "/", MAXPATHLEN); 483 sz1 = strlcat(mdb.dbn, MANDOC_DB, MAXPATHLEN); 484 485 strlcat(mdb.idxn, dirs.paths[i], MAXPATHLEN); 486 strlcat(mdb.idxn, "/", MAXPATHLEN); 487 sz2 = strlcat(mdb.idxn, MANDOC_IDX, MAXPATHLEN); 488 489 if (sz1 >= MAXPATHLEN || sz2 >= MAXPATHLEN) { 490 fprintf(stderr, "%s: path too long\n", 491 dirs.paths[i]); 492 exit((int)MANDOCLEVEL_BADARG); 493 } 494 495 if (mdb.db) 496 (*mdb.db->close)(mdb.db); 497 if (mdb.idx) 498 (*mdb.idx->close)(mdb.idx); 499 500 mdb.db = dbopen(mdb.dbn, flags, 0644, DB_BTREE, &info); 501 mdb.idx = dbopen(mdb.idxn, flags, 0644, DB_RECNO, NULL); 502 503 if (NULL == mdb.db) { 504 perror(mdb.dbn); 505 exit((int)MANDOCLEVEL_SYSERR); 506 } else if (NULL == mdb.idx) { 507 perror(mdb.idxn); 508 exit((int)MANDOCLEVEL_SYSERR); 509 } 510 511 ofile_free(of); 512 of = NULL; 513 514 if (-1 == chdir(dirs.paths[i])) { 515 perror(dirs.paths[i]); 516 exit((int)MANDOCLEVEL_SYSERR); 517 } 518 519 ofile_dirbuild(".", "", "", 0, &of); 520 if (NULL == of) 521 continue; 522 523 /* 524 * Go to the root of the respective manual tree. 525 * This must work or no manuals may be found (they're 526 * indexed relative to the root). 527 */ 528 529 if (-1 == chdir(dirs.paths[i])) { 530 perror(dirs.paths[i]); 531 exit((int)MANDOCLEVEL_SYSERR); 532 } 533 534 index_merge(of, mp, &dbuf, &buf, hash, &mdb, &recs); 535 } 536 537 out: 538 if (mdb.db) 539 (*mdb.db->close)(mdb.db); 540 if (mdb.idx) 541 (*mdb.idx->close)(mdb.idx); 542 if (hash) 543 (*hash->close)(hash); 544 if (mp) 545 mparse_free(mp); 546 547 manpath_free(&dirs); 548 ofile_free(of); 549 free(buf.cp); 550 free(dbuf.cp); 551 free(recs.stack); 552 553 return(MANDOCLEVEL_OK); 554 555 usage: 556 fprintf(stderr, 557 "usage: %s [-avvv] [-C file] | dir ... | -t file ...\n" 558 " -d dir [file ...] | " 559 "-u dir [file ...]\n", 560 progname); 561 562 return((int)MANDOCLEVEL_BADARG); 563 } 564 565 void 566 index_merge(const struct of *of, struct mparse *mp, 567 struct buf *dbuf, struct buf *buf, DB *hash, 568 struct mdb *mdb, struct recs *recs) 569 { 570 recno_t rec; 571 int ch, skip; 572 DBT key, val; 573 struct mdoc *mdoc; 574 struct man *man; 575 const char *fn, *msec, *march, *mtitle; 576 uint64_t mask; 577 size_t sv; 578 unsigned seq; 579 uint64_t vbuf[2]; 580 char type; 581 582 rec = 0; 583 for (of = of->first; of; of = of->next) { 584 fn = of->fname; 585 586 /* 587 * Try interpreting the file as mdoc(7) or man(7) 588 * source code, unless it is already known to be 589 * formatted. Fall back to formatted mode. 590 */ 591 592 mparse_reset(mp); 593 mdoc = NULL; 594 man = NULL; 595 596 if ((MANDOC_SRC & of->src_form || 597 ! (MANDOC_FORM & of->src_form)) && 598 MANDOCLEVEL_FATAL > mparse_readfd(mp, -1, fn)) 599 mparse_result(mp, &mdoc, &man); 600 601 if (NULL != mdoc) { 602 msec = mdoc_meta(mdoc)->msec; 603 march = mdoc_meta(mdoc)->arch; 604 if (NULL == march) 605 march = ""; 606 mtitle = mdoc_meta(mdoc)->title; 607 } else if (NULL != man) { 608 msec = man_meta(man)->msec; 609 march = ""; 610 mtitle = man_meta(man)->title; 611 } else { 612 msec = of->sec; 613 march = of->arch; 614 mtitle = of->title; 615 } 616 617 /* 618 * By default, skip a file if the manual section 619 * given in the file disagrees with the directory 620 * where the file is located. 621 */ 622 623 skip = 0; 624 assert(of->sec); 625 assert(msec); 626 if (strcasecmp(msec, of->sec)) { 627 if (warnings) 628 fprintf(stderr, "%s: " 629 "section \"%s\" manual " 630 "in \"%s\" directory\n", 631 fn, msec, of->sec); 632 skip = 1; 633 } 634 635 /* 636 * Manual page directories exist for each kernel 637 * architecture as returned by machine(1). 638 * However, many manuals only depend on the 639 * application architecture as returned by arch(1). 640 * For example, some (2/ARM) manuals are shared 641 * across the "armish" and "zaurus" kernel 642 * architectures. 643 * A few manuals are even shared across completely 644 * different architectures, for example fdformat(1) 645 * on amd64, i386, sparc, and sparc64. 646 * Thus, warn about architecture mismatches, 647 * but don't skip manuals for this reason. 648 */ 649 650 assert(of->arch); 651 assert(march); 652 if (strcasecmp(march, of->arch)) { 653 if (warnings) 654 fprintf(stderr, "%s: " 655 "architecture \"%s\" manual " 656 "in \"%s\" directory\n", 657 fn, march, of->arch); 658 march = of->arch; 659 } 660 661 /* 662 * By default, skip a file if the title given 663 * in the file disagrees with the file name. 664 * If both agree, use the file name as the title, 665 * because the one in the file usually is all caps. 666 */ 667 668 assert(of->title); 669 assert(mtitle); 670 if (strcasecmp(mtitle, of->title)) { 671 if (warnings) 672 fprintf(stderr, "%s: " 673 "title \"%s\" in file " 674 "but \"%s\" in filename\n", 675 fn, mtitle, of->title); 676 skip = 1; 677 } else 678 mtitle = of->title; 679 680 if (skip && !use_all) 681 continue; 682 683 /* 684 * The index record value consists of a nil-terminated 685 * filename, a nil-terminated manual section, and a 686 * nil-terminated description. Since the description 687 * may not be set, we set a sentinel to see if we're 688 * going to write a nil byte in its place. 689 */ 690 691 dbuf->len = 0; 692 type = mdoc ? 'd' : (man ? 'a' : 'c'); 693 buf_appendb(dbuf, &type, 1); 694 buf_appendb(dbuf, fn, strlen(fn) + 1); 695 buf_appendb(dbuf, msec, strlen(msec) + 1); 696 buf_appendb(dbuf, mtitle, strlen(mtitle) + 1); 697 buf_appendb(dbuf, march, strlen(march) + 1); 698 699 sv = dbuf->len; 700 701 /* 702 * Collect keyword/mask pairs. 703 * Each pair will become a new btree node. 704 */ 705 706 hash_reset(&hash); 707 if (mdoc) 708 pmdoc_node(hash, buf, dbuf, 709 mdoc_node(mdoc), mdoc_meta(mdoc)); 710 else if (man) 711 pman_node(hash, buf, dbuf, man_node(man)); 712 else 713 pformatted(hash, buf, dbuf, of); 714 715 /* Test mode, do not access any database. */ 716 717 if (NULL == mdb->db || NULL == mdb->idx) 718 continue; 719 720 /* 721 * Reclaim an empty index record, if available. 722 * Use its record number for all new btree nodes. 723 */ 724 725 if (recs->cur > 0) { 726 recs->cur--; 727 rec = recs->stack[(int)recs->cur]; 728 } else if (recs->last > 0) { 729 rec = recs->last; 730 recs->last = 0; 731 } else 732 rec++; 733 vbuf[1] = htobe64(rec); 734 735 /* 736 * Copy from the in-memory hashtable of pending 737 * keyword/mask pairs into the database. 738 */ 739 740 seq = R_FIRST; 741 while (0 == (ch = (*hash->seq)(hash, &key, &val, seq))) { 742 seq = R_NEXT; 743 assert(sizeof(uint64_t) == val.size); 744 memcpy(&mask, val.data, val.size); 745 vbuf[0] = htobe64(mask); 746 val.size = sizeof(vbuf); 747 val.data = &vbuf; 748 dbt_put(mdb->db, mdb->dbn, &key, &val); 749 } 750 if (ch < 0) { 751 perror("hash"); 752 exit((int)MANDOCLEVEL_SYSERR); 753 } 754 755 /* 756 * Apply to the index. If we haven't had a description 757 * set, put an empty one in now. 758 */ 759 760 if (dbuf->len == sv) 761 buf_appendb(dbuf, "", 1); 762 763 key.data = &rec; 764 key.size = sizeof(recno_t); 765 766 val.data = dbuf->cp; 767 val.size = dbuf->len; 768 769 if (verb) 770 printf("%s: adding to index\n", fn); 771 772 dbt_put(mdb->idx, mdb->idxn, &key, &val); 773 } 774 } 775 776 /* 777 * Scan through all entries in the index file `idx' and prune those 778 * entries in `ofile'. 779 * Pruning consists of removing from `db', then invalidating the entry 780 * in `idx' (zeroing its value size). 781 */ 782 static void 783 index_prune(const struct of *ofile, struct mdb *mdb, struct recs *recs) 784 { 785 const struct of *of; 786 const char *fn; 787 uint64_t vbuf[2]; 788 unsigned seq, sseq; 789 DBT key, val; 790 int ch; 791 792 recs->cur = 0; 793 seq = R_FIRST; 794 while (0 == (ch = (*mdb->idx->seq)(mdb->idx, &key, &val, seq))) { 795 seq = R_NEXT; 796 assert(sizeof(recno_t) == key.size); 797 memcpy(&recs->last, key.data, key.size); 798 799 /* Deleted records are zero-sized. Skip them. */ 800 801 if (0 == val.size) 802 goto cont; 803 804 /* 805 * Make sure we're sane. 806 * Read past our mdoc/man/cat type to the next string, 807 * then make sure it's bounded by a NUL. 808 * Failing any of these, we go into our error handler. 809 */ 810 811 fn = (char *)val.data + 1; 812 if (NULL == memchr(fn, '\0', val.size - 1)) 813 break; 814 815 /* 816 * Search for the file in those we care about. 817 * XXX: build this into a tree. Too slow. 818 */ 819 820 for (of = ofile->first; of; of = of->next) 821 if (0 == strcmp(fn, of->fname)) 822 break; 823 824 if (NULL == of) 825 continue; 826 827 /* 828 * Search through the keyword database, throwing out all 829 * references to our file. 830 */ 831 832 sseq = R_FIRST; 833 while (0 == (ch = (*mdb->db->seq)(mdb->db, 834 &key, &val, sseq))) { 835 sseq = R_NEXT; 836 if (sizeof(vbuf) != val.size) 837 break; 838 839 memcpy(vbuf, val.data, val.size); 840 if (recs->last != betoh64(vbuf[1])) 841 continue; 842 843 if ((ch = (*mdb->db->del)(mdb->db, 844 &key, R_CURSOR)) < 0) 845 break; 846 } 847 848 if (ch < 0) { 849 perror(mdb->dbn); 850 exit((int)MANDOCLEVEL_SYSERR); 851 } else if (1 != ch) { 852 fprintf(stderr, "%s: corrupt database\n", 853 mdb->dbn); 854 exit((int)MANDOCLEVEL_SYSERR); 855 } 856 857 if (verb) 858 printf("%s: deleting from index\n", fn); 859 860 val.size = 0; 861 ch = (*mdb->idx->put)(mdb->idx, &key, &val, R_CURSOR); 862 863 if (ch < 0) 864 break; 865 cont: 866 if (recs->cur >= recs->size) { 867 recs->size += MANDOC_SLOP; 868 recs->stack = mandoc_realloc(recs->stack, 869 recs->size * sizeof(recno_t)); 870 } 871 872 recs->stack[(int)recs->cur] = recs->last; 873 recs->cur++; 874 } 875 876 if (ch < 0) { 877 perror(mdb->idxn); 878 exit((int)MANDOCLEVEL_SYSERR); 879 } else if (1 != ch) { 880 fprintf(stderr, "%s: corrupt index\n", mdb->idxn); 881 exit((int)MANDOCLEVEL_SYSERR); 882 } 883 884 recs->last++; 885 } 886 887 /* 888 * Grow the buffer (if necessary) and copy in a binary string. 889 */ 890 static void 891 buf_appendb(struct buf *buf, const void *cp, size_t sz) 892 { 893 894 /* Overshoot by MANDOC_BUFSZ. */ 895 896 while (buf->len + sz >= buf->size) { 897 buf->size = buf->len + sz + MANDOC_BUFSZ; 898 buf->cp = mandoc_realloc(buf->cp, buf->size); 899 } 900 901 memcpy(buf->cp + (int)buf->len, cp, sz); 902 buf->len += sz; 903 } 904 905 /* 906 * Append a nil-terminated string to the buffer. 907 * This can be invoked multiple times. 908 * The buffer string will be nil-terminated. 909 * If invoked multiple times, a space is put between strings. 910 */ 911 static void 912 buf_append(struct buf *buf, const char *cp) 913 { 914 size_t sz; 915 916 if (0 == (sz = strlen(cp))) 917 return; 918 919 if (buf->len) 920 buf->cp[(int)buf->len - 1] = ' '; 921 922 buf_appendb(buf, cp, sz + 1); 923 } 924 925 /* 926 * Recursively add all text from a given node. 927 * This is optimised for general mdoc nodes in this context, which do 928 * not consist of subexpressions and having a recursive call for n->next 929 * would be wasteful. 930 * The "f" variable should be 0 unless called from pmdoc_Nd for the 931 * description buffer, which does not start at the beginning of the 932 * buffer. 933 */ 934 static void 935 buf_appendmdoc(struct buf *buf, const struct mdoc_node *n, int f) 936 { 937 938 for ( ; n; n = n->next) { 939 if (n->child) 940 buf_appendmdoc(buf, n->child, f); 941 942 if (MDOC_TEXT == n->type && f) { 943 f = 0; 944 buf_appendb(buf, n->string, 945 strlen(n->string) + 1); 946 } else if (MDOC_TEXT == n->type) 947 buf_append(buf, n->string); 948 949 } 950 } 951 952 static void 953 hash_reset(DB **db) 954 { 955 DB *hash; 956 957 if (NULL != (hash = *db)) 958 (*hash->close)(hash); 959 960 *db = dbopen(NULL, O_CREAT|O_RDWR, 0644, DB_HASH, NULL); 961 if (NULL == *db) { 962 perror("hash"); 963 exit((int)MANDOCLEVEL_SYSERR); 964 } 965 } 966 967 /* ARGSUSED */ 968 static int 969 pmdoc_head(MDOC_ARGS) 970 { 971 972 return(MDOC_HEAD == n->type); 973 } 974 975 /* ARGSUSED */ 976 static int 977 pmdoc_body(MDOC_ARGS) 978 { 979 980 return(MDOC_BODY == n->type); 981 } 982 983 /* ARGSUSED */ 984 static int 985 pmdoc_Fd(MDOC_ARGS) 986 { 987 const char *start, *end; 988 size_t sz; 989 990 if (SEC_SYNOPSIS != n->sec) 991 return(0); 992 if (NULL == (n = n->child) || MDOC_TEXT != n->type) 993 return(0); 994 995 /* 996 * Only consider those `Fd' macro fields that begin with an 997 * "inclusion" token (versus, e.g., #define). 998 */ 999 if (strcmp("#include", n->string)) 1000 return(0); 1001 1002 if (NULL == (n = n->next) || MDOC_TEXT != n->type) 1003 return(0); 1004 1005 /* 1006 * Strip away the enclosing angle brackets and make sure we're 1007 * not zero-length. 1008 */ 1009 1010 start = n->string; 1011 if ('<' == *start || '"' == *start) 1012 start++; 1013 1014 if (0 == (sz = strlen(start))) 1015 return(0); 1016 1017 end = &start[(int)sz - 1]; 1018 if ('>' == *end || '"' == *end) 1019 end--; 1020 1021 assert(end >= start); 1022 1023 buf_appendb(buf, start, (size_t)(end - start + 1)); 1024 buf_appendb(buf, "", 1); 1025 return(1); 1026 } 1027 1028 /* ARGSUSED */ 1029 static int 1030 pmdoc_In(MDOC_ARGS) 1031 { 1032 1033 if (NULL == n->child || MDOC_TEXT != n->child->type) 1034 return(0); 1035 1036 buf_append(buf, n->child->string); 1037 return(1); 1038 } 1039 1040 /* ARGSUSED */ 1041 static int 1042 pmdoc_Fn(MDOC_ARGS) 1043 { 1044 struct mdoc_node *nn; 1045 const char *cp; 1046 1047 nn = n->child; 1048 1049 if (NULL == nn || MDOC_TEXT != nn->type) 1050 return(0); 1051 1052 /* .Fn "struct type *name" "char *arg" */ 1053 1054 cp = strrchr(nn->string, ' '); 1055 if (NULL == cp) 1056 cp = nn->string; 1057 1058 /* Strip away pointer symbol. */ 1059 1060 while ('*' == *cp) 1061 cp++; 1062 1063 /* Store the function name. */ 1064 1065 buf_append(buf, cp); 1066 hash_put(hash, buf, TYPE_Fn); 1067 1068 /* Store the function type. */ 1069 1070 if (nn->string < cp) { 1071 buf->len = 0; 1072 buf_appendb(buf, nn->string, cp - nn->string); 1073 buf_appendb(buf, "", 1); 1074 hash_put(hash, buf, TYPE_Ft); 1075 } 1076 1077 /* Store the arguments. */ 1078 1079 for (nn = nn->next; nn; nn = nn->next) { 1080 if (MDOC_TEXT != nn->type) 1081 continue; 1082 buf->len = 0; 1083 buf_append(buf, nn->string); 1084 hash_put(hash, buf, TYPE_Fa); 1085 } 1086 1087 return(0); 1088 } 1089 1090 /* ARGSUSED */ 1091 static int 1092 pmdoc_St(MDOC_ARGS) 1093 { 1094 1095 if (NULL == n->child || MDOC_TEXT != n->child->type) 1096 return(0); 1097 1098 buf_append(buf, n->child->string); 1099 return(1); 1100 } 1101 1102 /* ARGSUSED */ 1103 static int 1104 pmdoc_Xr(MDOC_ARGS) 1105 { 1106 1107 if (NULL == (n = n->child)) 1108 return(0); 1109 1110 buf_appendb(buf, n->string, strlen(n->string)); 1111 1112 if (NULL != (n = n->next)) { 1113 buf_appendb(buf, ".", 1); 1114 buf_appendb(buf, n->string, strlen(n->string) + 1); 1115 } else 1116 buf_appendb(buf, ".", 2); 1117 1118 return(1); 1119 } 1120 1121 /* ARGSUSED */ 1122 static int 1123 pmdoc_Nd(MDOC_ARGS) 1124 { 1125 1126 if (MDOC_BODY != n->type) 1127 return(0); 1128 1129 buf_appendmdoc(dbuf, n->child, 1); 1130 return(1); 1131 } 1132 1133 /* ARGSUSED */ 1134 static int 1135 pmdoc_Nm(MDOC_ARGS) 1136 { 1137 1138 if (SEC_NAME == n->sec) 1139 return(1); 1140 else if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type) 1141 return(0); 1142 1143 if (NULL == n->child) 1144 buf_append(buf, m->name); 1145 1146 return(1); 1147 } 1148 1149 /* ARGSUSED */ 1150 static int 1151 pmdoc_Sh(MDOC_ARGS) 1152 { 1153 1154 return(SEC_CUSTOM == n->sec && MDOC_HEAD == n->type); 1155 } 1156 1157 static void 1158 hash_put(DB *db, const struct buf *buf, uint64_t mask) 1159 { 1160 uint64_t oldmask; 1161 DBT key, val; 1162 int rc; 1163 1164 if (buf->len < 2) 1165 return; 1166 1167 key.data = buf->cp; 1168 key.size = buf->len; 1169 1170 if ((rc = (*db->get)(db, &key, &val, 0)) < 0) { 1171 perror("hash"); 1172 exit((int)MANDOCLEVEL_SYSERR); 1173 } else if (0 == rc) { 1174 assert(sizeof(uint64_t) == val.size); 1175 memcpy(&oldmask, val.data, val.size); 1176 mask |= oldmask; 1177 } 1178 1179 val.data = &mask; 1180 val.size = sizeof(uint64_t); 1181 1182 if ((rc = (*db->put)(db, &key, &val, 0)) < 0) { 1183 perror("hash"); 1184 exit((int)MANDOCLEVEL_SYSERR); 1185 } 1186 } 1187 1188 static void 1189 dbt_put(DB *db, const char *dbn, DBT *key, DBT *val) 1190 { 1191 1192 assert(key->size); 1193 assert(val->size); 1194 1195 if (0 == (*db->put)(db, key, val, 0)) 1196 return; 1197 1198 perror(dbn); 1199 exit((int)MANDOCLEVEL_SYSERR); 1200 /* NOTREACHED */ 1201 } 1202 1203 /* 1204 * Call out to per-macro handlers after clearing the persistent database 1205 * key. If the macro sets the database key, flush it to the database. 1206 */ 1207 static void 1208 pmdoc_node(MDOC_ARGS) 1209 { 1210 1211 if (NULL == n) 1212 return; 1213 1214 switch (n->type) { 1215 case (MDOC_HEAD): 1216 /* FALLTHROUGH */ 1217 case (MDOC_BODY): 1218 /* FALLTHROUGH */ 1219 case (MDOC_TAIL): 1220 /* FALLTHROUGH */ 1221 case (MDOC_BLOCK): 1222 /* FALLTHROUGH */ 1223 case (MDOC_ELEM): 1224 buf->len = 0; 1225 1226 /* 1227 * Both NULL handlers and handlers returning true 1228 * request using the data. Only skip the element 1229 * when the handler returns false. 1230 */ 1231 1232 if (NULL != mdocs[n->tok].fp && 1233 0 == (*mdocs[n->tok].fp)(hash, buf, dbuf, n, m)) 1234 break; 1235 1236 /* 1237 * For many macros, use the text from all children. 1238 * Set zero flags for macros not needing this. 1239 * In that case, the handler must fill the buffer. 1240 */ 1241 1242 if (MDOCF_CHILD & mdocs[n->tok].flags) 1243 buf_appendmdoc(buf, n->child, 0); 1244 1245 /* 1246 * Cover the most common case: 1247 * Automatically stage one string per element. 1248 * Set a zero mask for macros not needing this. 1249 * Additional staging can be done in the handler. 1250 */ 1251 1252 if (mdocs[n->tok].mask) 1253 hash_put(hash, buf, mdocs[n->tok].mask); 1254 break; 1255 default: 1256 break; 1257 } 1258 1259 pmdoc_node(hash, buf, dbuf, n->child, m); 1260 pmdoc_node(hash, buf, dbuf, n->next, m); 1261 } 1262 1263 static int 1264 pman_node(MAN_ARGS) 1265 { 1266 const struct man_node *head, *body; 1267 const char *start, *sv; 1268 size_t sz; 1269 1270 if (NULL == n) 1271 return(0); 1272 1273 /* 1274 * We're only searching for one thing: the first text child in 1275 * the BODY of a NAME section. Since we don't keep track of 1276 * sections in -man, run some hoops to find out whether we're in 1277 * the correct section or not. 1278 */ 1279 1280 if (MAN_BODY == n->type && MAN_SH == n->tok) { 1281 body = n; 1282 assert(body->parent); 1283 if (NULL != (head = body->parent->head) && 1284 1 == head->nchild && 1285 NULL != (head = (head->child)) && 1286 MAN_TEXT == head->type && 1287 0 == strcmp(head->string, "NAME") && 1288 NULL != (body = body->child) && 1289 MAN_TEXT == body->type) { 1290 1291 assert(body->string); 1292 start = sv = body->string; 1293 1294 /* 1295 * Go through a special heuristic dance here. 1296 * This is why -man manuals are great! 1297 * (I'm being sarcastic: my eyes are bleeding.) 1298 * Conventionally, one or more manual names are 1299 * comma-specified prior to a whitespace, then a 1300 * dash, then a description. Try to puzzle out 1301 * the name parts here. 1302 */ 1303 1304 for ( ;; ) { 1305 sz = strcspn(start, " ,"); 1306 if ('\0' == start[(int)sz]) 1307 break; 1308 1309 buf->len = 0; 1310 buf_appendb(buf, start, sz); 1311 buf_appendb(buf, "", 1); 1312 1313 hash_put(hash, buf, TYPE_Nm); 1314 1315 if (' ' == start[(int)sz]) { 1316 start += (int)sz + 1; 1317 break; 1318 } 1319 1320 assert(',' == start[(int)sz]); 1321 start += (int)sz + 1; 1322 while (' ' == *start) 1323 start++; 1324 } 1325 1326 buf->len = 0; 1327 1328 if (sv == start) { 1329 buf_append(buf, start); 1330 return(1); 1331 } 1332 1333 while (' ' == *start) 1334 start++; 1335 1336 if (0 == strncmp(start, "-", 1)) 1337 start += 1; 1338 else if (0 == strncmp(start, "\\-\\-", 4)) 1339 start += 4; 1340 else if (0 == strncmp(start, "\\-", 2)) 1341 start += 2; 1342 else if (0 == strncmp(start, "\\(en", 4)) 1343 start += 4; 1344 else if (0 == strncmp(start, "\\(em", 4)) 1345 start += 4; 1346 1347 while (' ' == *start) 1348 start++; 1349 1350 sz = strlen(start) + 1; 1351 buf_appendb(dbuf, start, sz); 1352 buf_appendb(buf, start, sz); 1353 1354 hash_put(hash, buf, TYPE_Nd); 1355 } 1356 } 1357 1358 for (n = n->child; n; n = n->next) 1359 if (pman_node(hash, buf, dbuf, n)) 1360 return(1); 1361 1362 return(0); 1363 } 1364 1365 /* 1366 * Parse a formatted manual page. 1367 * By necessity, this involves rather crude guesswork. 1368 */ 1369 static void 1370 pformatted(DB *hash, struct buf *buf, 1371 struct buf *dbuf, const struct of *of) 1372 { 1373 FILE *stream; 1374 char *line, *p, *title; 1375 size_t len, plen, titlesz; 1376 1377 if (NULL == (stream = fopen(of->fname, "r"))) { 1378 if (warnings) 1379 perror(of->fname); 1380 return; 1381 } 1382 1383 /* 1384 * Always use the title derived from the filename up front, 1385 * do not even try to find it in the file. This also makes 1386 * sure we don't end up with an orphan index record, even if 1387 * the file content turns out to be completely unintelligible. 1388 */ 1389 1390 buf->len = 0; 1391 buf_append(buf, of->title); 1392 hash_put(hash, buf, TYPE_Nm); 1393 1394 /* Skip to first blank line. */ 1395 1396 while (NULL != (line = fgetln(stream, &len))) 1397 if ('\n' == *line) 1398 break; 1399 1400 /* 1401 * Assume the first line that is not indented 1402 * is the first section header. Skip to it. 1403 */ 1404 1405 while (NULL != (line = fgetln(stream, &len))) 1406 if ('\n' != *line && ' ' != *line) 1407 break; 1408 1409 /* 1410 * Read up until the next section into a buffer. 1411 * Strip the leading and trailing newline from each read line, 1412 * appending a trailing space. 1413 * Ignore empty (whitespace-only) lines. 1414 */ 1415 1416 titlesz = 0; 1417 title = NULL; 1418 1419 while (NULL != (line = fgetln(stream, &len))) { 1420 if (' ' != *line || '\n' != line[(int)len - 1]) 1421 break; 1422 while (len > 0 && isspace((unsigned char)*line)) { 1423 line++; 1424 len--; 1425 } 1426 if (1 == len) 1427 continue; 1428 title = mandoc_realloc(title, titlesz + len); 1429 memcpy(title + titlesz, line, len); 1430 titlesz += len; 1431 title[(int)titlesz - 1] = ' '; 1432 } 1433 1434 1435 /* 1436 * If no page content can be found, or the input line 1437 * is already the next section header, or there is no 1438 * trailing newline, reuse the page title as the page 1439 * description. 1440 */ 1441 1442 if (NULL == title || '\0' == *title) { 1443 if (warnings) 1444 fprintf(stderr, "%s: cannot find NAME section\n", 1445 of->fname); 1446 buf_appendb(dbuf, buf->cp, buf->size); 1447 hash_put(hash, buf, TYPE_Nd); 1448 fclose(stream); 1449 free(title); 1450 return; 1451 } 1452 1453 title = mandoc_realloc(title, titlesz + 1); 1454 title[(int)titlesz] = '\0'; 1455 1456 /* 1457 * Skip to the first dash. 1458 * Use the remaining line as the description (no more than 70 1459 * bytes). 1460 */ 1461 1462 if (NULL != (p = strstr(title, "- "))) { 1463 for (p += 2; ' ' == *p || '\b' == *p; p++) 1464 /* Skip to next word. */ ; 1465 } else { 1466 if (warnings) 1467 fprintf(stderr, "%s: no dash in title line\n", 1468 of->fname); 1469 p = title; 1470 } 1471 1472 plen = strlen(p); 1473 1474 /* Strip backspace-encoding from line. */ 1475 1476 while (NULL != (line = memchr(p, '\b', plen))) { 1477 len = line - p; 1478 if (0 == len) { 1479 memmove(line, line + 1, plen--); 1480 continue; 1481 } 1482 memmove(line - 1, line + 1, plen - len); 1483 plen -= 2; 1484 } 1485 1486 buf_appendb(dbuf, p, plen + 1); 1487 buf->len = 0; 1488 buf_appendb(buf, p, plen + 1); 1489 hash_put(hash, buf, TYPE_Nd); 1490 fclose(stream); 1491 free(title); 1492 } 1493 1494 static void 1495 ofile_argbuild(int argc, char *argv[], struct of **of) 1496 { 1497 char buf[MAXPATHLEN]; 1498 const char *sec, *arch, *title; 1499 char *p; 1500 int i, src_form; 1501 struct of *nof; 1502 1503 for (i = 0; i < argc; i++) { 1504 1505 /* 1506 * Try to infer the manual section, architecture and 1507 * page title from the path, assuming it looks like 1508 * man*[/<arch>]/<title>.<section> or 1509 * cat<section>[/<arch>]/<title>.0 1510 */ 1511 1512 if (strlcpy(buf, argv[i], sizeof(buf)) >= sizeof(buf)) { 1513 fprintf(stderr, "%s: path too long\n", argv[i]); 1514 continue; 1515 } 1516 sec = arch = title = ""; 1517 src_form = 0; 1518 p = strrchr(buf, '\0'); 1519 while (p-- > buf) { 1520 if ('\0' == *sec && '.' == *p) { 1521 sec = p + 1; 1522 *p = '\0'; 1523 if ('0' == *sec) 1524 src_form |= MANDOC_FORM; 1525 else if ('1' <= *sec && '9' >= *sec) 1526 src_form |= MANDOC_SRC; 1527 continue; 1528 } 1529 if ('/' != *p) 1530 continue; 1531 if ('\0' == *title) { 1532 title = p + 1; 1533 *p = '\0'; 1534 continue; 1535 } 1536 if (0 == strncmp("man", p + 1, 3)) 1537 src_form |= MANDOC_SRC; 1538 else if (0 == strncmp("cat", p + 1, 3)) 1539 src_form |= MANDOC_FORM; 1540 else 1541 arch = p + 1; 1542 break; 1543 } 1544 if ('\0' == *title) { 1545 if (warnings) 1546 fprintf(stderr, 1547 "%s: cannot deduce title " 1548 "from filename\n", 1549 argv[i]); 1550 title = buf; 1551 } 1552 1553 /* 1554 * Build the file structure. 1555 */ 1556 1557 nof = mandoc_calloc(1, sizeof(struct of)); 1558 nof->fname = mandoc_strdup(argv[i]); 1559 nof->sec = mandoc_strdup(sec); 1560 nof->arch = mandoc_strdup(arch); 1561 nof->title = mandoc_strdup(title); 1562 nof->src_form = src_form; 1563 1564 /* 1565 * Add the structure to the list. 1566 */ 1567 1568 if (verb > 1) 1569 printf("%s: scheduling\n", argv[i]); 1570 if (NULL == *of) { 1571 *of = nof; 1572 (*of)->first = nof; 1573 } else { 1574 nof->first = (*of)->first; 1575 (*of)->next = nof; 1576 *of = nof; 1577 } 1578 } 1579 } 1580 1581 /* 1582 * Recursively build up a list of files to parse. 1583 * We use this instead of ftw() and so on because I don't want global 1584 * variables hanging around. 1585 * This ignores the whatis.db and whatis.index files, but assumes that 1586 * everything else is a manual. 1587 * Pass in a pointer to a NULL structure for the first invocation. 1588 */ 1589 static void 1590 ofile_dirbuild(const char *dir, const char* psec, const char *parch, 1591 int p_src_form, struct of **of) 1592 { 1593 char buf[MAXPATHLEN]; 1594 size_t sz; 1595 DIR *d; 1596 const char *fn, *sec, *arch; 1597 char *p, *q, *suffix; 1598 struct of *nof; 1599 struct dirent *dp; 1600 int src_form; 1601 1602 if (NULL == (d = opendir(dir))) { 1603 if (warnings) 1604 perror(dir); 1605 return; 1606 } 1607 1608 while (NULL != (dp = readdir(d))) { 1609 fn = dp->d_name; 1610 1611 if ('.' == *fn) 1612 continue; 1613 1614 src_form = p_src_form; 1615 1616 if (DT_DIR == dp->d_type) { 1617 sec = psec; 1618 arch = parch; 1619 1620 /* 1621 * By default, only use directories called: 1622 * man<section>/[<arch>/] or 1623 * cat<section>/[<arch>/] 1624 */ 1625 1626 if ('\0' == *sec) { 1627 if(0 == strncmp("man", fn, 3)) { 1628 src_form |= MANDOC_SRC; 1629 sec = fn + 3; 1630 } else if (0 == strncmp("cat", fn, 3)) { 1631 src_form |= MANDOC_FORM; 1632 sec = fn + 3; 1633 } else { 1634 if (warnings) fprintf(stderr, 1635 "%s/%s: bad section\n", 1636 dir, fn); 1637 if (use_all) 1638 sec = fn; 1639 else 1640 continue; 1641 } 1642 } else if ('\0' == *arch) { 1643 if (NULL != strchr(fn, '.')) { 1644 if (warnings) fprintf(stderr, 1645 "%s/%s: bad architecture\n", 1646 dir, fn); 1647 if (0 == use_all) 1648 continue; 1649 } 1650 arch = fn; 1651 } else { 1652 if (warnings) fprintf(stderr, "%s/%s: " 1653 "excessive subdirectory\n", dir, fn); 1654 if (0 == use_all) 1655 continue; 1656 } 1657 1658 buf[0] = '\0'; 1659 strlcat(buf, dir, MAXPATHLEN); 1660 strlcat(buf, "/", MAXPATHLEN); 1661 sz = strlcat(buf, fn, MAXPATHLEN); 1662 1663 if (MAXPATHLEN <= sz) { 1664 if (warnings) fprintf(stderr, "%s/%s: " 1665 "path too long\n", dir, fn); 1666 continue; 1667 } 1668 1669 if (verb > 1) 1670 printf("%s: scanning\n", buf); 1671 1672 ofile_dirbuild(buf, sec, arch, src_form, of); 1673 continue; 1674 } 1675 1676 if (DT_REG != dp->d_type) { 1677 if (warnings) 1678 fprintf(stderr, 1679 "%s/%s: not a regular file\n", 1680 dir, fn); 1681 continue; 1682 } 1683 if (!strcmp(MANDOC_DB, fn) || !strcmp(MANDOC_IDX, fn)) 1684 continue; 1685 if ('\0' == *psec) { 1686 if (warnings) 1687 fprintf(stderr, 1688 "%s/%s: file outside section\n", 1689 dir, fn); 1690 if (0 == use_all) 1691 continue; 1692 } 1693 1694 /* 1695 * By default, skip files where the file name suffix 1696 * does not agree with the section directory 1697 * they are located in. 1698 */ 1699 1700 suffix = strrchr(fn, '.'); 1701 if (NULL == suffix) { 1702 if (warnings) 1703 fprintf(stderr, 1704 "%s/%s: no filename suffix\n", 1705 dir, fn); 1706 if (0 == use_all) 1707 continue; 1708 } else if ((MANDOC_SRC & src_form && 1709 strcmp(suffix + 1, psec)) || 1710 (MANDOC_FORM & src_form && 1711 strcmp(suffix + 1, "0"))) { 1712 if (warnings) 1713 fprintf(stderr, 1714 "%s/%s: wrong filename suffix\n", 1715 dir, fn); 1716 if (0 == use_all) 1717 continue; 1718 if ('0' == suffix[1]) 1719 src_form |= MANDOC_FORM; 1720 else if ('1' <= suffix[1] && '9' >= suffix[1]) 1721 src_form |= MANDOC_SRC; 1722 } 1723 1724 /* 1725 * Skip formatted manuals if a source version is 1726 * available. Ignore the age: it is very unlikely 1727 * that people install newer formatted base manuals 1728 * when they used to have source manuals before, 1729 * and in ports, old manuals get removed on update. 1730 */ 1731 if (0 == use_all && MANDOC_FORM & src_form && 1732 '\0' != *psec) { 1733 buf[0] = '\0'; 1734 strlcat(buf, dir, MAXPATHLEN); 1735 p = strrchr(buf, '/'); 1736 if ('\0' != *parch && NULL != p) 1737 for (p--; p > buf; p--) 1738 if ('/' == *p) 1739 break; 1740 if (NULL == p) 1741 p = buf; 1742 else 1743 p++; 1744 if (0 == strncmp("cat", p, 3)) 1745 memcpy(p, "man", 3); 1746 strlcat(buf, "/", MAXPATHLEN); 1747 sz = strlcat(buf, fn, MAXPATHLEN); 1748 if (sz >= MAXPATHLEN) { 1749 if (warnings) fprintf(stderr, 1750 "%s/%s: path too long\n", 1751 dir, fn); 1752 continue; 1753 } 1754 q = strrchr(buf, '.'); 1755 if (NULL != q && p < q++) { 1756 *q = '\0'; 1757 sz = strlcat(buf, psec, MAXPATHLEN); 1758 if (sz >= MAXPATHLEN) { 1759 if (warnings) fprintf(stderr, 1760 "%s/%s: path too long\n", 1761 dir, fn); 1762 continue; 1763 } 1764 if (0 == access(buf, R_OK)) 1765 continue; 1766 } 1767 } 1768 1769 buf[0] = '\0'; 1770 assert('.' == dir[0]); 1771 if ('/' == dir[1]) { 1772 strlcat(buf, dir + 2, MAXPATHLEN); 1773 strlcat(buf, "/", MAXPATHLEN); 1774 } 1775 sz = strlcat(buf, fn, MAXPATHLEN); 1776 if (sz >= MAXPATHLEN) { 1777 if (warnings) fprintf(stderr, 1778 "%s/%s: path too long\n", dir, fn); 1779 continue; 1780 } 1781 1782 nof = mandoc_calloc(1, sizeof(struct of)); 1783 nof->fname = mandoc_strdup(buf); 1784 nof->sec = mandoc_strdup(psec); 1785 nof->arch = mandoc_strdup(parch); 1786 nof->src_form = src_form; 1787 1788 /* 1789 * Remember the file name without the extension, 1790 * to be used as the page title in the database. 1791 */ 1792 1793 if (NULL != suffix) 1794 *suffix = '\0'; 1795 nof->title = mandoc_strdup(fn); 1796 1797 /* 1798 * Add the structure to the list. 1799 */ 1800 1801 if (verb > 1) 1802 printf("%s: scheduling\n", buf); 1803 1804 if (NULL == *of) { 1805 *of = nof; 1806 (*of)->first = nof; 1807 } else { 1808 nof->first = (*of)->first; 1809 (*of)->next = nof; 1810 *of = nof; 1811 } 1812 } 1813 1814 closedir(d); 1815 } 1816 1817 static void 1818 ofile_free(struct of *of) 1819 { 1820 struct of *nof; 1821 1822 if (NULL != of) 1823 of = of->first; 1824 1825 while (NULL != of) { 1826 nof = of->next; 1827 free(of->fname); 1828 free(of->sec); 1829 free(of->arch); 1830 free(of->title); 1831 free(of); 1832 of = nof; 1833 } 1834 } 1835