1 /* $Id: mandocdb.c,v 1.49.2.7 2013/10/02 21:03:26 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2011, 2012 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #ifdef HAVE_CONFIG_H 19 #include "config.h" 20 #endif 21 22 #include <sys/types.h> 23 24 #include <assert.h> 25 #include <ctype.h> 26 #include <dirent.h> 27 #include <errno.h> 28 #include <fcntl.h> 29 #include <getopt.h> 30 #include <limits.h> 31 #include <stdio.h> 32 #include <stdint.h> 33 #include <stdlib.h> 34 #include <string.h> 35 #include <unistd.h> 36 37 #if defined(__linux__) || defined(__sun) 38 # include <endian.h> 39 # include <db_185.h> 40 #elif defined(__APPLE__) 41 # include <libkern/OSByteOrder.h> 42 # include <db.h> 43 #else 44 # include <sys/endian.h> 45 # include <db.h> 46 #endif 47 48 #if defined(__sun) 49 #include <sys/stat.h> 50 #endif 51 52 #include "man.h" 53 #include "mdoc.h" 54 #include "mandoc.h" 55 #include "mandocdb.h" 56 #include "manpath.h" 57 58 #define MANDOC_BUFSZ BUFSIZ 59 #define MANDOC_SLOP 1024 60 61 #define MANDOC_SRC 0x1 62 #define MANDOC_FORM 0x2 63 64 /* Access to the mandoc database on disk. */ 65 66 struct mdb { 67 char idxn[PATH_MAX]; /* index db filename */ 68 char dbn[PATH_MAX]; /* keyword db filename */ 69 DB *idx; /* index recno database */ 70 DB *db; /* keyword btree database */ 71 }; 72 73 /* Stack of temporarily unused index records. */ 74 75 struct recs { 76 recno_t *stack; /* pointer to a malloc'ed array */ 77 size_t size; /* number of allocated slots */ 78 size_t cur; /* current number of empty records */ 79 recno_t last; /* last record number in the index */ 80 }; 81 82 /* Tiny list for files. No need to bring in QUEUE. */ 83 84 struct of { 85 char *fname; /* heap-allocated */ 86 char *sec; 87 char *arch; 88 char *title; 89 int src_form; 90 struct of *next; /* NULL for last one */ 91 struct of *first; /* first in list */ 92 }; 93 94 /* Buffer for storing growable data. */ 95 96 struct buf { 97 char *cp; 98 size_t len; /* current length */ 99 size_t size; /* total buffer size */ 100 }; 101 102 /* Operation we're going to perform. */ 103 104 enum op { 105 OP_DEFAULT = 0, /* new dbs from dir list or default config */ 106 OP_CONFFILE, /* new databases from custom config file */ 107 OP_UPDATE, /* delete/add entries in existing database */ 108 OP_DELETE, /* delete entries from existing database */ 109 OP_TEST /* change no databases, report potential problems */ 110 }; 111 112 #define MAN_ARGS DB *hash, \ 113 struct buf *buf, \ 114 struct buf *dbuf, \ 115 const struct man_node *n 116 #define MDOC_ARGS DB *hash, \ 117 struct buf *buf, \ 118 struct buf *dbuf, \ 119 const struct mdoc_node *n, \ 120 const struct mdoc_meta *m 121 122 static void buf_appendmdoc(struct buf *, 123 const struct mdoc_node *, int); 124 static void buf_append(struct buf *, const char *); 125 static void buf_appendb(struct buf *, 126 const void *, size_t); 127 static void dbt_put(DB *, const char *, DBT *, DBT *); 128 static void hash_put(DB *, const struct buf *, uint64_t); 129 static void hash_reset(DB **); 130 static void index_merge(const struct of *, struct mparse *, 131 struct buf *, struct buf *, DB *, 132 struct mdb *, struct recs *); 133 static void index_prune(const struct of *, struct mdb *, 134 struct recs *); 135 static void ofile_argbuild(int, char *[], struct of **, 136 const char *); 137 static void ofile_dirbuild(const char *, const char *, 138 const char *, int, struct of **); 139 static void ofile_free(struct of *); 140 static void pformatted(DB *, struct buf *, 141 struct buf *, const struct of *); 142 static int pman_node(MAN_ARGS); 143 static void pmdoc_node(MDOC_ARGS); 144 static int pmdoc_head(MDOC_ARGS); 145 static int pmdoc_body(MDOC_ARGS); 146 static int pmdoc_Fd(MDOC_ARGS); 147 static int pmdoc_In(MDOC_ARGS); 148 static int pmdoc_Fn(MDOC_ARGS); 149 static int pmdoc_Nd(MDOC_ARGS); 150 static int pmdoc_Nm(MDOC_ARGS); 151 static int pmdoc_Sh(MDOC_ARGS); 152 static int pmdoc_St(MDOC_ARGS); 153 static int pmdoc_Xr(MDOC_ARGS); 154 155 #define MDOCF_CHILD 0x01 /* Automatically index child nodes. */ 156 157 struct mdoc_handler { 158 int (*fp)(MDOC_ARGS); /* Optional handler. */ 159 uint64_t mask; /* Set unless handler returns 0. */ 160 int flags; /* For use by pmdoc_node. */ 161 }; 162 163 static const struct mdoc_handler mdocs[MDOC_MAX] = { 164 { NULL, 0, 0 }, /* Ap */ 165 { NULL, 0, 0 }, /* Dd */ 166 { NULL, 0, 0 }, /* Dt */ 167 { NULL, 0, 0 }, /* Os */ 168 { pmdoc_Sh, TYPE_Sh, MDOCF_CHILD }, /* Sh */ 169 { pmdoc_head, TYPE_Ss, MDOCF_CHILD }, /* Ss */ 170 { NULL, 0, 0 }, /* Pp */ 171 { NULL, 0, 0 }, /* D1 */ 172 { NULL, 0, 0 }, /* Dl */ 173 { NULL, 0, 0 }, /* Bd */ 174 { NULL, 0, 0 }, /* Ed */ 175 { NULL, 0, 0 }, /* Bl */ 176 { NULL, 0, 0 }, /* El */ 177 { NULL, 0, 0 }, /* It */ 178 { NULL, 0, 0 }, /* Ad */ 179 { NULL, TYPE_An, MDOCF_CHILD }, /* An */ 180 { NULL, TYPE_Ar, MDOCF_CHILD }, /* Ar */ 181 { NULL, TYPE_Cd, MDOCF_CHILD }, /* Cd */ 182 { NULL, TYPE_Cm, MDOCF_CHILD }, /* Cm */ 183 { NULL, TYPE_Dv, MDOCF_CHILD }, /* Dv */ 184 { NULL, TYPE_Er, MDOCF_CHILD }, /* Er */ 185 { NULL, TYPE_Ev, MDOCF_CHILD }, /* Ev */ 186 { NULL, 0, 0 }, /* Ex */ 187 { NULL, TYPE_Fa, MDOCF_CHILD }, /* Fa */ 188 { pmdoc_Fd, TYPE_In, 0 }, /* Fd */ 189 { NULL, TYPE_Fl, MDOCF_CHILD }, /* Fl */ 190 { pmdoc_Fn, 0, 0 }, /* Fn */ 191 { NULL, TYPE_Ft, MDOCF_CHILD }, /* Ft */ 192 { NULL, TYPE_Ic, MDOCF_CHILD }, /* Ic */ 193 { pmdoc_In, TYPE_In, 0 }, /* In */ 194 { NULL, TYPE_Li, MDOCF_CHILD }, /* Li */ 195 { pmdoc_Nd, TYPE_Nd, MDOCF_CHILD }, /* Nd */ 196 { pmdoc_Nm, TYPE_Nm, MDOCF_CHILD }, /* Nm */ 197 { NULL, 0, 0 }, /* Op */ 198 { NULL, 0, 0 }, /* Ot */ 199 { NULL, TYPE_Pa, MDOCF_CHILD }, /* Pa */ 200 { NULL, 0, 0 }, /* Rv */ 201 { pmdoc_St, TYPE_St, 0 }, /* St */ 202 { NULL, TYPE_Va, MDOCF_CHILD }, /* Va */ 203 { pmdoc_body, TYPE_Va, MDOCF_CHILD }, /* Vt */ 204 { pmdoc_Xr, TYPE_Xr, 0 }, /* Xr */ 205 { NULL, 0, 0 }, /* %A */ 206 { NULL, 0, 0 }, /* %B */ 207 { NULL, 0, 0 }, /* %D */ 208 { NULL, 0, 0 }, /* %I */ 209 { NULL, 0, 0 }, /* %J */ 210 { NULL, 0, 0 }, /* %N */ 211 { NULL, 0, 0 }, /* %O */ 212 { NULL, 0, 0 }, /* %P */ 213 { NULL, 0, 0 }, /* %R */ 214 { NULL, 0, 0 }, /* %T */ 215 { NULL, 0, 0 }, /* %V */ 216 { NULL, 0, 0 }, /* Ac */ 217 { NULL, 0, 0 }, /* Ao */ 218 { NULL, 0, 0 }, /* Aq */ 219 { NULL, TYPE_At, MDOCF_CHILD }, /* At */ 220 { NULL, 0, 0 }, /* Bc */ 221 { NULL, 0, 0 }, /* Bf */ 222 { NULL, 0, 0 }, /* Bo */ 223 { NULL, 0, 0 }, /* Bq */ 224 { NULL, TYPE_Bsx, MDOCF_CHILD }, /* Bsx */ 225 { NULL, TYPE_Bx, MDOCF_CHILD }, /* Bx */ 226 { NULL, 0, 0 }, /* Db */ 227 { NULL, 0, 0 }, /* Dc */ 228 { NULL, 0, 0 }, /* Do */ 229 { NULL, 0, 0 }, /* Dq */ 230 { NULL, 0, 0 }, /* Ec */ 231 { NULL, 0, 0 }, /* Ef */ 232 { NULL, TYPE_Em, MDOCF_CHILD }, /* Em */ 233 { NULL, 0, 0 }, /* Eo */ 234 { NULL, TYPE_Fx, MDOCF_CHILD }, /* Fx */ 235 { NULL, TYPE_Ms, MDOCF_CHILD }, /* Ms */ 236 { NULL, 0, 0 }, /* No */ 237 { NULL, 0, 0 }, /* Ns */ 238 { NULL, TYPE_Nx, MDOCF_CHILD }, /* Nx */ 239 { NULL, TYPE_Ox, MDOCF_CHILD }, /* Ox */ 240 { NULL, 0, 0 }, /* Pc */ 241 { NULL, 0, 0 }, /* Pf */ 242 { NULL, 0, 0 }, /* Po */ 243 { NULL, 0, 0 }, /* Pq */ 244 { NULL, 0, 0 }, /* Qc */ 245 { NULL, 0, 0 }, /* Ql */ 246 { NULL, 0, 0 }, /* Qo */ 247 { NULL, 0, 0 }, /* Qq */ 248 { NULL, 0, 0 }, /* Re */ 249 { NULL, 0, 0 }, /* Rs */ 250 { NULL, 0, 0 }, /* Sc */ 251 { NULL, 0, 0 }, /* So */ 252 { NULL, 0, 0 }, /* Sq */ 253 { NULL, 0, 0 }, /* Sm */ 254 { NULL, 0, 0 }, /* Sx */ 255 { NULL, TYPE_Sy, MDOCF_CHILD }, /* Sy */ 256 { NULL, TYPE_Tn, MDOCF_CHILD }, /* Tn */ 257 { NULL, 0, 0 }, /* Ux */ 258 { NULL, 0, 0 }, /* Xc */ 259 { NULL, 0, 0 }, /* Xo */ 260 { pmdoc_head, TYPE_Fn, 0 }, /* Fo */ 261 { NULL, 0, 0 }, /* Fc */ 262 { NULL, 0, 0 }, /* Oo */ 263 { NULL, 0, 0 }, /* Oc */ 264 { NULL, 0, 0 }, /* Bk */ 265 { NULL, 0, 0 }, /* Ek */ 266 { NULL, 0, 0 }, /* Bt */ 267 { NULL, 0, 0 }, /* Hf */ 268 { NULL, 0, 0 }, /* Fr */ 269 { NULL, 0, 0 }, /* Ud */ 270 { NULL, TYPE_Lb, MDOCF_CHILD }, /* Lb */ 271 { NULL, 0, 0 }, /* Lp */ 272 { NULL, TYPE_Lk, MDOCF_CHILD }, /* Lk */ 273 { NULL, TYPE_Mt, MDOCF_CHILD }, /* Mt */ 274 { NULL, 0, 0 }, /* Brq */ 275 { NULL, 0, 0 }, /* Bro */ 276 { NULL, 0, 0 }, /* Brc */ 277 { NULL, 0, 0 }, /* %C */ 278 { NULL, 0, 0 }, /* Es */ 279 { NULL, 0, 0 }, /* En */ 280 { NULL, TYPE_Dx, MDOCF_CHILD }, /* Dx */ 281 { NULL, 0, 0 }, /* %Q */ 282 { NULL, 0, 0 }, /* br */ 283 { NULL, 0, 0 }, /* sp */ 284 { NULL, 0, 0 }, /* %U */ 285 { NULL, 0, 0 }, /* Ta */ 286 }; 287 288 static const char *progname; 289 static int use_all; /* Use all directories and files. */ 290 static int verb; /* Output verbosity level. */ 291 static int warnings; /* Potential problems in manuals. */ 292 293 int 294 main(int argc, char *argv[]) 295 { 296 struct mparse *mp; /* parse sequence */ 297 struct manpaths dirs; 298 struct mdb mdb; 299 struct recs recs; 300 enum op op; /* current operation */ 301 const char *dir; 302 char *cp; 303 char pbuf[PATH_MAX]; 304 int ch, i, flags; 305 DB *hash; /* temporary keyword hashtable */ 306 BTREEINFO info; /* btree configuration */ 307 size_t sz1, sz2, ipath; 308 struct buf buf, /* keyword buffer */ 309 dbuf; /* description buffer */ 310 struct of *of; /* list of files for processing */ 311 extern int optind; 312 extern char *optarg; 313 314 progname = strrchr(argv[0], '/'); 315 if (progname == NULL) 316 progname = argv[0]; 317 else 318 ++progname; 319 320 memset(&dirs, 0, sizeof(struct manpaths)); 321 memset(&mdb, 0, sizeof(struct mdb)); 322 memset(&recs, 0, sizeof(struct recs)); 323 324 of = NULL; 325 mp = NULL; 326 hash = NULL; 327 op = OP_DEFAULT; 328 dir = NULL; 329 330 while (-1 != (ch = getopt(argc, argv, "aC:d:tu:vW"))) 331 switch (ch) { 332 case ('a'): 333 use_all = 1; 334 break; 335 case ('C'): 336 if (op) { 337 fprintf(stderr, 338 "-C: conflicting options\n"); 339 goto usage; 340 } 341 dir = optarg; 342 op = OP_CONFFILE; 343 break; 344 case ('d'): 345 if (op) { 346 fprintf(stderr, 347 "-d: conflicting options\n"); 348 goto usage; 349 } 350 dir = optarg; 351 op = OP_UPDATE; 352 break; 353 case ('t'): 354 dup2(STDOUT_FILENO, STDERR_FILENO); 355 if (op) { 356 fprintf(stderr, 357 "-t: conflicting options\n"); 358 goto usage; 359 } 360 op = OP_TEST; 361 use_all = 1; 362 warnings = 1; 363 break; 364 case ('u'): 365 if (op) { 366 fprintf(stderr, 367 "-u: conflicting options\n"); 368 goto usage; 369 } 370 dir = optarg; 371 op = OP_DELETE; 372 break; 373 case ('v'): 374 verb++; 375 break; 376 case ('W'): 377 warnings = 1; 378 break; 379 default: 380 goto usage; 381 } 382 383 argc -= optind; 384 argv += optind; 385 386 if (OP_CONFFILE == op && argc > 0) { 387 fprintf(stderr, "-C: too many arguments\n"); 388 goto usage; 389 } 390 391 memset(&info, 0, sizeof(BTREEINFO)); 392 info.lorder = 4321; 393 info.flags = R_DUP; 394 395 mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL, NULL); 396 397 memset(&buf, 0, sizeof(struct buf)); 398 memset(&dbuf, 0, sizeof(struct buf)); 399 400 buf.size = dbuf.size = MANDOC_BUFSZ; 401 402 buf.cp = mandoc_malloc(buf.size); 403 dbuf.cp = mandoc_malloc(dbuf.size); 404 405 if (OP_TEST == op) { 406 ofile_argbuild(argc, argv, &of, NULL); 407 if (NULL == of) 408 goto out; 409 index_merge(of, mp, &dbuf, &buf, hash, &mdb, &recs); 410 goto out; 411 } 412 413 if (OP_UPDATE == op || OP_DELETE == op) { 414 if (NULL == realpath(dir, pbuf)) { 415 perror(dir); 416 exit((int)MANDOCLEVEL_BADARG); 417 } 418 if (strlcat(pbuf, "/", PATH_MAX) >= PATH_MAX) { 419 fprintf(stderr, "%s: path too long\n", pbuf); 420 exit((int)MANDOCLEVEL_BADARG); 421 } 422 423 strlcat(mdb.dbn, pbuf, PATH_MAX); 424 sz1 = strlcat(mdb.dbn, MANDOC_DB, PATH_MAX); 425 426 strlcat(mdb.idxn, pbuf, PATH_MAX); 427 sz2 = strlcat(mdb.idxn, MANDOC_IDX, PATH_MAX); 428 429 if (sz1 >= PATH_MAX || sz2 >= PATH_MAX) { 430 fprintf(stderr, "%s: path too long\n", mdb.idxn); 431 exit((int)MANDOCLEVEL_BADARG); 432 } 433 434 flags = O_CREAT | O_RDWR; 435 mdb.db = dbopen(mdb.dbn, flags, 0644, DB_BTREE, &info); 436 mdb.idx = dbopen(mdb.idxn, flags, 0644, DB_RECNO, NULL); 437 438 if (NULL == mdb.db) { 439 perror(mdb.dbn); 440 exit((int)MANDOCLEVEL_SYSERR); 441 } else if (NULL == mdb.idx) { 442 perror(mdb.idxn); 443 exit((int)MANDOCLEVEL_SYSERR); 444 } 445 446 ofile_argbuild(argc, argv, &of, pbuf); 447 448 if (NULL == of) 449 goto out; 450 451 index_prune(of, &mdb, &recs); 452 453 /* 454 * Go to the root of the respective manual tree. 455 * This must work or no manuals may be found (they're 456 * indexed relative to the root). 457 */ 458 459 if (OP_UPDATE == op) { 460 if (-1 == chdir(dir)) { 461 perror(dir); 462 exit((int)MANDOCLEVEL_SYSERR); 463 } 464 index_merge(of, mp, &dbuf, &buf, hash, 465 &mdb, &recs); 466 } 467 468 goto out; 469 } 470 471 /* 472 * Configure the directories we're going to scan. 473 * If we have command-line arguments, use them. 474 * If not, we use man(1)'s method (see mandocdb.8). 475 */ 476 477 if (argc > 0) { 478 dirs.paths = mandoc_calloc(argc, sizeof(char *)); 479 dirs.sz = argc; 480 for (i = 0; i < argc; i++) { 481 if (NULL == (cp = realpath(argv[i], pbuf))) { 482 perror(argv[i]); 483 goto out; 484 } 485 dirs.paths[i] = mandoc_strdup(cp); 486 } 487 } else 488 manpath_parse(&dirs, dir, NULL, NULL); 489 490 for (ipath = 0; ipath < dirs.sz; ipath++) { 491 492 /* 493 * Go to the root of the respective manual tree. 494 * This must work or no manuals may be found: 495 * They are indexed relative to the root. 496 */ 497 498 if (-1 == chdir(dirs.paths[ipath])) { 499 perror(dirs.paths[ipath]); 500 exit((int)MANDOCLEVEL_SYSERR); 501 } 502 503 /* Create a new database in two temporary files. */ 504 505 flags = O_CREAT | O_EXCL | O_RDWR; 506 while (NULL == mdb.db) { 507 strlcpy(mdb.dbn, MANDOC_DB, PATH_MAX); 508 strlcat(mdb.dbn, ".XXXXXXXXXX", PATH_MAX); 509 if (NULL == mktemp(mdb.dbn)) { 510 perror(mdb.dbn); 511 exit((int)MANDOCLEVEL_SYSERR); 512 } 513 mdb.db = dbopen(mdb.dbn, flags, 0644, 514 DB_BTREE, &info); 515 if (NULL == mdb.db && EEXIST != errno) { 516 perror(mdb.dbn); 517 exit((int)MANDOCLEVEL_SYSERR); 518 } 519 } 520 while (NULL == mdb.idx) { 521 strlcpy(mdb.idxn, MANDOC_IDX, PATH_MAX); 522 strlcat(mdb.idxn, ".XXXXXXXXXX", PATH_MAX); 523 if (NULL == mktemp(mdb.idxn)) { 524 perror(mdb.idxn); 525 unlink(mdb.dbn); 526 exit((int)MANDOCLEVEL_SYSERR); 527 } 528 mdb.idx = dbopen(mdb.idxn, flags, 0644, 529 DB_RECNO, NULL); 530 if (NULL == mdb.idx && EEXIST != errno) { 531 perror(mdb.idxn); 532 unlink(mdb.dbn); 533 exit((int)MANDOCLEVEL_SYSERR); 534 } 535 } 536 537 /* 538 * Search for manuals and fill the new database. 539 */ 540 541 ofile_dirbuild(".", "", "", 0, &of); 542 543 if (NULL != of) { 544 index_merge(of, mp, &dbuf, &buf, hash, 545 &mdb, &recs); 546 ofile_free(of); 547 of = NULL; 548 } 549 550 (*mdb.db->close)(mdb.db); 551 (*mdb.idx->close)(mdb.idx); 552 mdb.db = NULL; 553 mdb.idx = NULL; 554 555 /* 556 * Replace the old database with the new one. 557 * This is not perfectly atomic, 558 * but i cannot think of a better way. 559 */ 560 561 if (-1 == rename(mdb.dbn, MANDOC_DB)) { 562 perror(MANDOC_DB); 563 unlink(mdb.dbn); 564 unlink(mdb.idxn); 565 exit((int)MANDOCLEVEL_SYSERR); 566 } 567 if (-1 == rename(mdb.idxn, MANDOC_IDX)) { 568 perror(MANDOC_IDX); 569 unlink(MANDOC_DB); 570 unlink(MANDOC_IDX); 571 unlink(mdb.idxn); 572 exit((int)MANDOCLEVEL_SYSERR); 573 } 574 } 575 576 out: 577 if (mdb.db) 578 (*mdb.db->close)(mdb.db); 579 if (mdb.idx) 580 (*mdb.idx->close)(mdb.idx); 581 if (hash) 582 (*hash->close)(hash); 583 if (mp) 584 mparse_free(mp); 585 586 manpath_free(&dirs); 587 ofile_free(of); 588 free(buf.cp); 589 free(dbuf.cp); 590 free(recs.stack); 591 592 return(MANDOCLEVEL_OK); 593 594 usage: 595 fprintf(stderr, 596 "usage: %s [-avvv] [-C file] | dir ... | -t file ...\n" 597 " -d dir [file ...] | " 598 "-u dir [file ...]\n", 599 progname); 600 601 return((int)MANDOCLEVEL_BADARG); 602 } 603 604 void 605 index_merge(const struct of *of, struct mparse *mp, 606 struct buf *dbuf, struct buf *buf, DB *hash, 607 struct mdb *mdb, struct recs *recs) 608 { 609 recno_t rec; 610 int ch, skip; 611 DBT key, val; 612 DB *files; /* temporary file name table */ 613 struct mdoc *mdoc; 614 struct man *man; 615 const char *fn, *msec, *march, *mtitle; 616 char *p; 617 uint64_t mask; 618 size_t sv; 619 unsigned seq; 620 uint64_t vbuf[2]; 621 char type; 622 623 if (warnings) { 624 files = NULL; 625 hash_reset(&files); 626 } 627 628 rec = 0; 629 for (of = of->first; of; of = of->next) { 630 fn = of->fname; 631 632 /* 633 * Try interpreting the file as mdoc(7) or man(7) 634 * source code, unless it is already known to be 635 * formatted. Fall back to formatted mode. 636 */ 637 638 mparse_reset(mp); 639 mdoc = NULL; 640 man = NULL; 641 642 if ((MANDOC_SRC & of->src_form || 643 ! (MANDOC_FORM & of->src_form)) && 644 MANDOCLEVEL_FATAL > mparse_readfd(mp, -1, fn)) 645 mparse_result(mp, &mdoc, &man); 646 647 if (NULL != mdoc) { 648 msec = mdoc_meta(mdoc)->msec; 649 march = mdoc_meta(mdoc)->arch; 650 if (NULL == march) 651 march = ""; 652 mtitle = mdoc_meta(mdoc)->title; 653 } else if (NULL != man) { 654 msec = man_meta(man)->msec; 655 march = ""; 656 mtitle = man_meta(man)->title; 657 } else { 658 msec = of->sec; 659 march = of->arch; 660 mtitle = of->title; 661 } 662 663 /* 664 * Check whether the manual section given in a file 665 * agrees with the directory where the file is located. 666 * Some manuals have suffixes like (3p) on their 667 * section number either inside the file or in the 668 * directory name, some are linked into more than one 669 * section, like encrypt(1) = makekey(8). Do not skip 670 * manuals for such reasons. 671 */ 672 673 skip = 0; 674 assert(of->sec); 675 assert(msec); 676 if (warnings) 677 if (strcasecmp(msec, of->sec)) 678 fprintf(stderr, "%s: " 679 "section \"%s\" manual " 680 "in \"%s\" directory\n", 681 fn, msec, of->sec); 682 683 /* 684 * Manual page directories exist for each kernel 685 * architecture as returned by machine(1). 686 * However, many manuals only depend on the 687 * application architecture as returned by arch(1). 688 * For example, some (2/ARM) manuals are shared 689 * across the "armish" and "zaurus" kernel 690 * architectures. 691 * A few manuals are even shared across completely 692 * different architectures, for example fdformat(1) 693 * on amd64, i386, sparc, and sparc64. 694 * Thus, warn about architecture mismatches, 695 * but don't skip manuals for this reason. 696 */ 697 698 assert(of->arch); 699 assert(march); 700 if (warnings) 701 if (strcasecmp(march, of->arch)) 702 fprintf(stderr, "%s: " 703 "architecture \"%s\" manual " 704 "in \"%s\" directory\n", 705 fn, march, of->arch); 706 707 /* 708 * By default, skip a file if the title given 709 * in the file disagrees with the file name. 710 * Do not warn, this happens for all MLINKs. 711 */ 712 713 assert(of->title); 714 assert(mtitle); 715 #if 0 716 if (strcasecmp(mtitle, of->title)) 717 skip = 1; 718 #endif 719 720 /* 721 * Build a title string for the file. If it matches 722 * the location of the file, remember the title as 723 * found; else, remember it as missing. 724 */ 725 726 if (warnings) { 727 buf->len = 0; 728 buf_appendb(buf, mtitle, strlen(mtitle)); 729 buf_appendb(buf, "(", 1); 730 buf_appendb(buf, msec, strlen(msec)); 731 if ('\0' != *march) { 732 buf_appendb(buf, "/", 1); 733 buf_appendb(buf, march, strlen(march)); 734 } 735 buf_appendb(buf, ")", 2); 736 for (p = buf->cp; '\0' != *p; p++) 737 *p = tolower(*p); 738 key.data = buf->cp; 739 key.size = buf->len; 740 val.data = NULL; 741 val.size = 0; 742 if (0 == skip) 743 val.data = ""; 744 else { 745 ch = (*files->get)(files, &key, &val, 0); 746 if (ch < 0) { 747 perror("hash"); 748 exit((int)MANDOCLEVEL_SYSERR); 749 } else if (ch > 0) { 750 val.data = (void *)fn; 751 val.size = strlen(fn) + 1; 752 } else 753 val.data = NULL; 754 } 755 if (NULL != val.data && 756 (*files->put)(files, &key, &val, 0) < 0) { 757 perror("hash"); 758 exit((int)MANDOCLEVEL_SYSERR); 759 } 760 } 761 762 if (skip && !use_all) 763 continue; 764 765 /* 766 * The index record value consists of a nil-terminated 767 * filename, a nil-terminated manual section, and a 768 * nil-terminated description. Use the actual 769 * location of the file, such that the user can find 770 * it with man(1). Since the description may not be 771 * set, we set a sentinel to see if we're going to 772 * write a nil byte in its place. 773 */ 774 775 dbuf->len = 0; 776 type = mdoc ? 'd' : (man ? 'a' : 'c'); 777 buf_appendb(dbuf, &type, 1); 778 buf_appendb(dbuf, fn, strlen(fn) + 1); 779 buf_appendb(dbuf, of->sec, strlen(of->sec) + 1); 780 buf_appendb(dbuf, of->title, strlen(of->title) + 1); 781 buf_appendb(dbuf, of->arch, strlen(of->arch) + 1); 782 783 sv = dbuf->len; 784 785 /* 786 * Collect keyword/mask pairs. 787 * Each pair will become a new btree node. 788 */ 789 790 hash_reset(&hash); 791 if (mdoc) 792 pmdoc_node(hash, buf, dbuf, 793 mdoc_node(mdoc), mdoc_meta(mdoc)); 794 else if (man) 795 pman_node(hash, buf, dbuf, man_node(man)); 796 else 797 pformatted(hash, buf, dbuf, of); 798 799 /* Test mode, do not access any database. */ 800 801 if (NULL == mdb->db || NULL == mdb->idx) 802 continue; 803 804 /* 805 * Make sure the file name is always registered 806 * as an .Nm search key. 807 */ 808 buf->len = 0; 809 buf_append(buf, of->title); 810 hash_put(hash, buf, TYPE_Nm); 811 812 /* 813 * Reclaim an empty index record, if available. 814 * Use its record number for all new btree nodes. 815 */ 816 817 if (recs->cur > 0) { 818 recs->cur--; 819 rec = recs->stack[(int)recs->cur]; 820 } else if (recs->last > 0) { 821 rec = recs->last; 822 recs->last = 0; 823 } else 824 rec++; 825 vbuf[1] = htobe64(rec); 826 827 /* 828 * Copy from the in-memory hashtable of pending 829 * keyword/mask pairs into the database. 830 */ 831 832 seq = R_FIRST; 833 while (0 == (ch = (*hash->seq)(hash, &key, &val, seq))) { 834 seq = R_NEXT; 835 assert(sizeof(uint64_t) == val.size); 836 memcpy(&mask, val.data, val.size); 837 vbuf[0] = htobe64(mask); 838 val.size = sizeof(vbuf); 839 val.data = &vbuf; 840 dbt_put(mdb->db, mdb->dbn, &key, &val); 841 } 842 if (ch < 0) { 843 perror("hash"); 844 unlink(mdb->dbn); 845 unlink(mdb->idxn); 846 exit((int)MANDOCLEVEL_SYSERR); 847 } 848 849 /* 850 * Apply to the index. If we haven't had a description 851 * set, put an empty one in now. 852 */ 853 854 if (dbuf->len == sv) 855 buf_appendb(dbuf, "", 1); 856 857 key.data = &rec; 858 key.size = sizeof(recno_t); 859 860 val.data = dbuf->cp; 861 val.size = dbuf->len; 862 863 if (verb) 864 printf("%s: adding to index\n", fn); 865 866 dbt_put(mdb->idx, mdb->idxn, &key, &val); 867 } 868 869 /* 870 * Iterate the remembered file titles and check that 871 * all files can be found by their main title. 872 */ 873 874 if (warnings) { 875 seq = R_FIRST; 876 while (0 == (*files->seq)(files, &key, &val, seq)) { 877 seq = R_NEXT; 878 if (val.size) 879 fprintf(stderr, "%s: probably " 880 "unreachable, title is %s\n", 881 (char *)val.data, (char *)key.data); 882 } 883 (*files->close)(files); 884 } 885 } 886 887 /* 888 * Scan through all entries in the index file `idx' and prune those 889 * entries in `ofile'. 890 * Pruning consists of removing from `db', then invalidating the entry 891 * in `idx' (zeroing its value size). 892 */ 893 static void 894 index_prune(const struct of *ofile, struct mdb *mdb, struct recs *recs) 895 { 896 const struct of *of; 897 const char *fn; 898 uint64_t vbuf[2]; 899 unsigned seq, sseq; 900 DBT key, val; 901 int ch; 902 903 recs->cur = 0; 904 seq = R_FIRST; 905 while (0 == (ch = (*mdb->idx->seq)(mdb->idx, &key, &val, seq))) { 906 seq = R_NEXT; 907 assert(sizeof(recno_t) == key.size); 908 memcpy(&recs->last, key.data, key.size); 909 910 /* Deleted records are zero-sized. Skip them. */ 911 912 if (0 == val.size) 913 goto cont; 914 915 /* 916 * Make sure we're sane. 917 * Read past our mdoc/man/cat type to the next string, 918 * then make sure it's bounded by a NUL. 919 * Failing any of these, we go into our error handler. 920 */ 921 922 fn = (char *)val.data + 1; 923 if (NULL == memchr(fn, '\0', val.size - 1)) 924 break; 925 926 /* 927 * Search for the file in those we care about. 928 * XXX: build this into a tree. Too slow. 929 */ 930 931 for (of = ofile->first; of; of = of->next) 932 if (0 == strcmp(fn, of->fname)) 933 break; 934 935 if (NULL == of) 936 continue; 937 938 /* 939 * Search through the keyword database, throwing out all 940 * references to our file. 941 */ 942 943 sseq = R_FIRST; 944 while (0 == (ch = (*mdb->db->seq)(mdb->db, 945 &key, &val, sseq))) { 946 sseq = R_NEXT; 947 if (sizeof(vbuf) != val.size) 948 break; 949 950 memcpy(vbuf, val.data, val.size); 951 if (recs->last != betoh64(vbuf[1])) 952 continue; 953 954 if ((ch = (*mdb->db->del)(mdb->db, 955 &key, R_CURSOR)) < 0) 956 break; 957 } 958 959 if (ch < 0) { 960 perror(mdb->dbn); 961 exit((int)MANDOCLEVEL_SYSERR); 962 } else if (1 != ch) { 963 fprintf(stderr, "%s: corrupt database\n", 964 mdb->dbn); 965 exit((int)MANDOCLEVEL_SYSERR); 966 } 967 968 if (verb) 969 printf("%s: deleting from index\n", fn); 970 971 val.size = 0; 972 ch = (*mdb->idx->put)(mdb->idx, &key, &val, R_CURSOR); 973 974 if (ch < 0) 975 break; 976 cont: 977 if (recs->cur >= recs->size) { 978 recs->size += MANDOC_SLOP; 979 recs->stack = mandoc_realloc(recs->stack, 980 recs->size * sizeof(recno_t)); 981 } 982 983 recs->stack[(int)recs->cur] = recs->last; 984 recs->cur++; 985 } 986 987 if (ch < 0) { 988 perror(mdb->idxn); 989 exit((int)MANDOCLEVEL_SYSERR); 990 } else if (1 != ch) { 991 fprintf(stderr, "%s: corrupt index\n", mdb->idxn); 992 exit((int)MANDOCLEVEL_SYSERR); 993 } 994 995 recs->last++; 996 } 997 998 /* 999 * Grow the buffer (if necessary) and copy in a binary string. 1000 */ 1001 static void 1002 buf_appendb(struct buf *buf, const void *cp, size_t sz) 1003 { 1004 1005 /* Overshoot by MANDOC_BUFSZ. */ 1006 1007 while (buf->len + sz >= buf->size) { 1008 buf->size = buf->len + sz + MANDOC_BUFSZ; 1009 buf->cp = mandoc_realloc(buf->cp, buf->size); 1010 } 1011 1012 memcpy(buf->cp + (int)buf->len, cp, sz); 1013 buf->len += sz; 1014 } 1015 1016 /* 1017 * Append a nil-terminated string to the buffer. 1018 * This can be invoked multiple times. 1019 * The buffer string will be nil-terminated. 1020 * If invoked multiple times, a space is put between strings. 1021 */ 1022 static void 1023 buf_append(struct buf *buf, const char *cp) 1024 { 1025 size_t sz; 1026 1027 if (0 == (sz = strlen(cp))) 1028 return; 1029 1030 if (buf->len) 1031 buf->cp[(int)buf->len - 1] = ' '; 1032 1033 buf_appendb(buf, cp, sz + 1); 1034 } 1035 1036 /* 1037 * Recursively add all text from a given node. 1038 * This is optimised for general mdoc nodes in this context, which do 1039 * not consist of subexpressions and having a recursive call for n->next 1040 * would be wasteful. 1041 * The "f" variable should be 0 unless called from pmdoc_Nd for the 1042 * description buffer, which does not start at the beginning of the 1043 * buffer. 1044 */ 1045 static void 1046 buf_appendmdoc(struct buf *buf, const struct mdoc_node *n, int f) 1047 { 1048 1049 for ( ; n; n = n->next) { 1050 if (n->child) 1051 buf_appendmdoc(buf, n->child, f); 1052 1053 if (MDOC_TEXT == n->type && f) { 1054 f = 0; 1055 buf_appendb(buf, n->string, 1056 strlen(n->string) + 1); 1057 } else if (MDOC_TEXT == n->type) 1058 buf_append(buf, n->string); 1059 1060 } 1061 } 1062 1063 static void 1064 hash_reset(DB **db) 1065 { 1066 DB *hash; 1067 1068 if (NULL != (hash = *db)) 1069 (*hash->close)(hash); 1070 1071 *db = dbopen(NULL, O_CREAT|O_RDWR, 0644, DB_HASH, NULL); 1072 if (NULL == *db) { 1073 perror("hash"); 1074 exit((int)MANDOCLEVEL_SYSERR); 1075 } 1076 } 1077 1078 /* ARGSUSED */ 1079 static int 1080 pmdoc_head(MDOC_ARGS) 1081 { 1082 1083 return(MDOC_HEAD == n->type); 1084 } 1085 1086 /* ARGSUSED */ 1087 static int 1088 pmdoc_body(MDOC_ARGS) 1089 { 1090 1091 return(MDOC_BODY == n->type); 1092 } 1093 1094 /* ARGSUSED */ 1095 static int 1096 pmdoc_Fd(MDOC_ARGS) 1097 { 1098 const char *start, *end; 1099 size_t sz; 1100 1101 if (SEC_SYNOPSIS != n->sec) 1102 return(0); 1103 if (NULL == (n = n->child) || MDOC_TEXT != n->type) 1104 return(0); 1105 1106 /* 1107 * Only consider those `Fd' macro fields that begin with an 1108 * "inclusion" token (versus, e.g., #define). 1109 */ 1110 if (strcmp("#include", n->string)) 1111 return(0); 1112 1113 if (NULL == (n = n->next) || MDOC_TEXT != n->type) 1114 return(0); 1115 1116 /* 1117 * Strip away the enclosing angle brackets and make sure we're 1118 * not zero-length. 1119 */ 1120 1121 start = n->string; 1122 if ('<' == *start || '"' == *start) 1123 start++; 1124 1125 if (0 == (sz = strlen(start))) 1126 return(0); 1127 1128 end = &start[(int)sz - 1]; 1129 if ('>' == *end || '"' == *end) 1130 end--; 1131 1132 assert(end >= start); 1133 1134 buf_appendb(buf, start, (size_t)(end - start + 1)); 1135 buf_appendb(buf, "", 1); 1136 return(1); 1137 } 1138 1139 /* ARGSUSED */ 1140 static int 1141 pmdoc_In(MDOC_ARGS) 1142 { 1143 1144 if (NULL == n->child || MDOC_TEXT != n->child->type) 1145 return(0); 1146 1147 buf_append(buf, n->child->string); 1148 return(1); 1149 } 1150 1151 /* ARGSUSED */ 1152 static int 1153 pmdoc_Fn(MDOC_ARGS) 1154 { 1155 struct mdoc_node *nn; 1156 const char *cp; 1157 1158 nn = n->child; 1159 1160 if (NULL == nn || MDOC_TEXT != nn->type) 1161 return(0); 1162 1163 /* .Fn "struct type *name" "char *arg" */ 1164 1165 cp = strrchr(nn->string, ' '); 1166 if (NULL == cp) 1167 cp = nn->string; 1168 1169 /* Strip away pointer symbol. */ 1170 1171 while ('*' == *cp) 1172 cp++; 1173 1174 /* Store the function name. */ 1175 1176 buf_append(buf, cp); 1177 hash_put(hash, buf, TYPE_Fn); 1178 1179 /* Store the function type. */ 1180 1181 if (nn->string < cp) { 1182 buf->len = 0; 1183 buf_appendb(buf, nn->string, cp - nn->string); 1184 buf_appendb(buf, "", 1); 1185 hash_put(hash, buf, TYPE_Ft); 1186 } 1187 1188 /* Store the arguments. */ 1189 1190 for (nn = nn->next; nn; nn = nn->next) { 1191 if (MDOC_TEXT != nn->type) 1192 continue; 1193 buf->len = 0; 1194 buf_append(buf, nn->string); 1195 hash_put(hash, buf, TYPE_Fa); 1196 } 1197 1198 return(0); 1199 } 1200 1201 /* ARGSUSED */ 1202 static int 1203 pmdoc_St(MDOC_ARGS) 1204 { 1205 1206 if (NULL == n->child || MDOC_TEXT != n->child->type) 1207 return(0); 1208 1209 buf_append(buf, n->child->string); 1210 return(1); 1211 } 1212 1213 /* ARGSUSED */ 1214 static int 1215 pmdoc_Xr(MDOC_ARGS) 1216 { 1217 1218 if (NULL == (n = n->child)) 1219 return(0); 1220 1221 buf_appendb(buf, n->string, strlen(n->string)); 1222 1223 if (NULL != (n = n->next)) { 1224 buf_appendb(buf, ".", 1); 1225 buf_appendb(buf, n->string, strlen(n->string) + 1); 1226 } else 1227 buf_appendb(buf, ".", 2); 1228 1229 return(1); 1230 } 1231 1232 /* ARGSUSED */ 1233 static int 1234 pmdoc_Nd(MDOC_ARGS) 1235 { 1236 1237 if (MDOC_BODY != n->type) 1238 return(0); 1239 1240 buf_appendmdoc(dbuf, n->child, 1); 1241 return(1); 1242 } 1243 1244 /* ARGSUSED */ 1245 static int 1246 pmdoc_Nm(MDOC_ARGS) 1247 { 1248 1249 if (SEC_NAME == n->sec) 1250 return(1); 1251 else if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type) 1252 return(0); 1253 1254 if (NULL == n->child) 1255 buf_append(buf, m->name); 1256 1257 return(1); 1258 } 1259 1260 /* ARGSUSED */ 1261 static int 1262 pmdoc_Sh(MDOC_ARGS) 1263 { 1264 1265 return(SEC_CUSTOM == n->sec && MDOC_HEAD == n->type); 1266 } 1267 1268 static void 1269 hash_put(DB *db, const struct buf *buf, uint64_t mask) 1270 { 1271 uint64_t oldmask; 1272 DBT key, val; 1273 int rc; 1274 1275 if (buf->len < 2) 1276 return; 1277 1278 key.data = buf->cp; 1279 key.size = buf->len; 1280 1281 if ((rc = (*db->get)(db, &key, &val, 0)) < 0) { 1282 perror("hash"); 1283 exit((int)MANDOCLEVEL_SYSERR); 1284 } else if (0 == rc) { 1285 assert(sizeof(uint64_t) == val.size); 1286 memcpy(&oldmask, val.data, val.size); 1287 mask |= oldmask; 1288 } 1289 1290 val.data = &mask; 1291 val.size = sizeof(uint64_t); 1292 1293 if ((rc = (*db->put)(db, &key, &val, 0)) < 0) { 1294 perror("hash"); 1295 exit((int)MANDOCLEVEL_SYSERR); 1296 } 1297 } 1298 1299 static void 1300 dbt_put(DB *db, const char *dbn, DBT *key, DBT *val) 1301 { 1302 1303 assert(key->size); 1304 assert(val->size); 1305 1306 if (0 == (*db->put)(db, key, val, 0)) 1307 return; 1308 1309 perror(dbn); 1310 exit((int)MANDOCLEVEL_SYSERR); 1311 /* NOTREACHED */ 1312 } 1313 1314 /* 1315 * Call out to per-macro handlers after clearing the persistent database 1316 * key. If the macro sets the database key, flush it to the database. 1317 */ 1318 static void 1319 pmdoc_node(MDOC_ARGS) 1320 { 1321 1322 if (NULL == n) 1323 return; 1324 1325 switch (n->type) { 1326 case (MDOC_HEAD): 1327 /* FALLTHROUGH */ 1328 case (MDOC_BODY): 1329 /* FALLTHROUGH */ 1330 case (MDOC_TAIL): 1331 /* FALLTHROUGH */ 1332 case (MDOC_BLOCK): 1333 /* FALLTHROUGH */ 1334 case (MDOC_ELEM): 1335 buf->len = 0; 1336 1337 /* 1338 * Both NULL handlers and handlers returning true 1339 * request using the data. Only skip the element 1340 * when the handler returns false. 1341 */ 1342 1343 if (NULL != mdocs[n->tok].fp && 1344 0 == (*mdocs[n->tok].fp)(hash, buf, dbuf, n, m)) 1345 break; 1346 1347 /* 1348 * For many macros, use the text from all children. 1349 * Set zero flags for macros not needing this. 1350 * In that case, the handler must fill the buffer. 1351 */ 1352 1353 if (MDOCF_CHILD & mdocs[n->tok].flags) 1354 buf_appendmdoc(buf, n->child, 0); 1355 1356 /* 1357 * Cover the most common case: 1358 * Automatically stage one string per element. 1359 * Set a zero mask for macros not needing this. 1360 * Additional staging can be done in the handler. 1361 */ 1362 1363 if (mdocs[n->tok].mask) 1364 hash_put(hash, buf, mdocs[n->tok].mask); 1365 break; 1366 default: 1367 break; 1368 } 1369 1370 pmdoc_node(hash, buf, dbuf, n->child, m); 1371 pmdoc_node(hash, buf, dbuf, n->next, m); 1372 } 1373 1374 static int 1375 pman_node(MAN_ARGS) 1376 { 1377 const struct man_node *head, *body; 1378 char *start, *sv, *title; 1379 size_t sz, titlesz; 1380 1381 if (NULL == n) 1382 return(0); 1383 1384 /* 1385 * We're only searching for one thing: the first text child in 1386 * the BODY of a NAME section. Since we don't keep track of 1387 * sections in -man, run some hoops to find out whether we're in 1388 * the correct section or not. 1389 */ 1390 1391 if (MAN_BODY == n->type && MAN_SH == n->tok) { 1392 body = n; 1393 assert(body->parent); 1394 if (NULL != (head = body->parent->head) && 1395 1 == head->nchild && 1396 NULL != (head = (head->child)) && 1397 MAN_TEXT == head->type && 1398 0 == strcmp(head->string, "NAME") && 1399 NULL != (body = body->child) && 1400 MAN_TEXT == body->type) { 1401 1402 title = NULL; 1403 titlesz = 0; 1404 /* 1405 * Suck the entire NAME section into memory. 1406 * Yes, we might run away. 1407 * But too many manuals have big, spread-out 1408 * NAME sections over many lines. 1409 */ 1410 for ( ; NULL != body; body = body->next) { 1411 if (MAN_TEXT != body->type) 1412 break; 1413 if (0 == (sz = strlen(body->string))) 1414 continue; 1415 title = mandoc_realloc 1416 (title, titlesz + sz + 1); 1417 memcpy(title + titlesz, body->string, sz); 1418 titlesz += sz + 1; 1419 title[(int)titlesz - 1] = ' '; 1420 } 1421 if (NULL == title) 1422 return(0); 1423 1424 title = mandoc_realloc(title, titlesz + 1); 1425 title[(int)titlesz] = '\0'; 1426 1427 /* Skip leading space. */ 1428 1429 sv = title; 1430 while (isspace((unsigned char)*sv)) 1431 sv++; 1432 1433 if (0 == (sz = strlen(sv))) { 1434 free(title); 1435 return(0); 1436 } 1437 1438 /* Erase trailing space. */ 1439 1440 start = &sv[sz - 1]; 1441 while (start > sv && isspace((unsigned char)*start)) 1442 *start-- = '\0'; 1443 1444 if (start == sv) { 1445 free(title); 1446 return(0); 1447 } 1448 1449 start = sv; 1450 1451 /* 1452 * Go through a special heuristic dance here. 1453 * This is why -man manuals are great! 1454 * (I'm being sarcastic: my eyes are bleeding.) 1455 * Conventionally, one or more manual names are 1456 * comma-specified prior to a whitespace, then a 1457 * dash, then a description. Try to puzzle out 1458 * the name parts here. 1459 */ 1460 1461 for ( ;; ) { 1462 sz = strcspn(start, " ,"); 1463 if ('\0' == start[(int)sz]) 1464 break; 1465 1466 buf->len = 0; 1467 buf_appendb(buf, start, sz); 1468 buf_appendb(buf, "", 1); 1469 1470 hash_put(hash, buf, TYPE_Nm); 1471 1472 if (' ' == start[(int)sz]) { 1473 start += (int)sz + 1; 1474 break; 1475 } 1476 1477 assert(',' == start[(int)sz]); 1478 start += (int)sz + 1; 1479 while (' ' == *start) 1480 start++; 1481 } 1482 1483 buf->len = 0; 1484 1485 if (sv == start) { 1486 buf_append(buf, start); 1487 free(title); 1488 return(1); 1489 } 1490 1491 while (isspace((unsigned char)*start)) 1492 start++; 1493 1494 if (0 == strncmp(start, "-", 1)) 1495 start += 1; 1496 else if (0 == strncmp(start, "\\-\\-", 4)) 1497 start += 4; 1498 else if (0 == strncmp(start, "\\-", 2)) 1499 start += 2; 1500 else if (0 == strncmp(start, "\\(en", 4)) 1501 start += 4; 1502 else if (0 == strncmp(start, "\\(em", 4)) 1503 start += 4; 1504 1505 while (' ' == *start) 1506 start++; 1507 1508 sz = strlen(start) + 1; 1509 buf_appendb(dbuf, start, sz); 1510 buf_appendb(buf, start, sz); 1511 1512 hash_put(hash, buf, TYPE_Nd); 1513 free(title); 1514 } 1515 } 1516 1517 for (n = n->child; n; n = n->next) 1518 if (pman_node(hash, buf, dbuf, n)) 1519 return(1); 1520 1521 return(0); 1522 } 1523 1524 /* 1525 * Parse a formatted manual page. 1526 * By necessity, this involves rather crude guesswork. 1527 */ 1528 static void 1529 pformatted(DB *hash, struct buf *buf, 1530 struct buf *dbuf, const struct of *of) 1531 { 1532 FILE *stream; 1533 char *line, *p, *title; 1534 size_t len, plen, titlesz; 1535 1536 if (NULL == (stream = fopen(of->fname, "r"))) { 1537 if (warnings) 1538 perror(of->fname); 1539 return; 1540 } 1541 1542 /* 1543 * Always use the title derived from the filename up front, 1544 * do not even try to find it in the file. This also makes 1545 * sure we don't end up with an orphan index record, even if 1546 * the file content turns out to be completely unintelligible. 1547 */ 1548 1549 buf->len = 0; 1550 buf_append(buf, of->title); 1551 hash_put(hash, buf, TYPE_Nm); 1552 1553 /* Skip to first blank line. */ 1554 1555 while (NULL != (line = fgetln(stream, &len))) 1556 if ('\n' == *line) 1557 break; 1558 1559 /* 1560 * Assume the first line that is not indented 1561 * is the first section header. Skip to it. 1562 */ 1563 1564 while (NULL != (line = fgetln(stream, &len))) 1565 if ('\n' != *line && ' ' != *line) 1566 break; 1567 1568 /* 1569 * Read up until the next section into a buffer. 1570 * Strip the leading and trailing newline from each read line, 1571 * appending a trailing space. 1572 * Ignore empty (whitespace-only) lines. 1573 */ 1574 1575 titlesz = 0; 1576 title = NULL; 1577 1578 while (NULL != (line = fgetln(stream, &len))) { 1579 if (' ' != *line || '\n' != line[(int)len - 1]) 1580 break; 1581 while (len > 0 && isspace((unsigned char)*line)) { 1582 line++; 1583 len--; 1584 } 1585 if (1 == len) 1586 continue; 1587 title = mandoc_realloc(title, titlesz + len); 1588 memcpy(title + titlesz, line, len); 1589 titlesz += len; 1590 title[(int)titlesz - 1] = ' '; 1591 } 1592 1593 1594 /* 1595 * If no page content can be found, or the input line 1596 * is already the next section header, or there is no 1597 * trailing newline, reuse the page title as the page 1598 * description. 1599 */ 1600 1601 if (NULL == title || '\0' == *title) { 1602 if (warnings) 1603 fprintf(stderr, "%s: cannot find NAME section\n", 1604 of->fname); 1605 buf_appendb(dbuf, buf->cp, buf->size); 1606 hash_put(hash, buf, TYPE_Nd); 1607 fclose(stream); 1608 free(title); 1609 return; 1610 } 1611 1612 title = mandoc_realloc(title, titlesz + 1); 1613 title[(int)titlesz] = '\0'; 1614 1615 /* 1616 * Skip to the first dash. 1617 * Use the remaining line as the description (no more than 70 1618 * bytes). 1619 */ 1620 1621 if (NULL != (p = strstr(title, "- "))) { 1622 for (p += 2; ' ' == *p || '\b' == *p; p++) 1623 /* Skip to next word. */ ; 1624 } else { 1625 if (warnings) 1626 fprintf(stderr, "%s: no dash in title line\n", 1627 of->fname); 1628 p = title; 1629 } 1630 1631 plen = strlen(p); 1632 1633 /* Strip backspace-encoding from line. */ 1634 1635 while (NULL != (line = memchr(p, '\b', plen))) { 1636 len = line - p; 1637 if (0 == len) { 1638 memmove(line, line + 1, plen--); 1639 continue; 1640 } 1641 memmove(line - 1, line + 1, plen - len); 1642 plen -= 2; 1643 } 1644 1645 buf_appendb(dbuf, p, plen + 1); 1646 buf->len = 0; 1647 buf_appendb(buf, p, plen + 1); 1648 hash_put(hash, buf, TYPE_Nd); 1649 fclose(stream); 1650 free(title); 1651 } 1652 1653 static void 1654 ofile_argbuild(int argc, char *argv[], struct of **of, 1655 const char *basedir) 1656 { 1657 char buf[PATH_MAX]; 1658 char pbuf[PATH_MAX]; 1659 const char *sec, *arch, *title; 1660 char *relpath, *p; 1661 int i, src_form; 1662 struct of *nof; 1663 1664 for (i = 0; i < argc; i++) { 1665 if (NULL == (relpath = realpath(argv[i], pbuf))) { 1666 perror(argv[i]); 1667 continue; 1668 } 1669 if (NULL != basedir) { 1670 if (strstr(pbuf, basedir) != pbuf) { 1671 fprintf(stderr, "%s: file outside " 1672 "base directory %s\n", 1673 pbuf, basedir); 1674 continue; 1675 } 1676 relpath = pbuf + strlen(basedir); 1677 } 1678 1679 /* 1680 * Try to infer the manual section, architecture and 1681 * page title from the path, assuming it looks like 1682 * man*[/<arch>]/<title>.<section> or 1683 * cat<section>[/<arch>]/<title>.0 1684 */ 1685 1686 if (strlcpy(buf, relpath, sizeof(buf)) >= sizeof(buf)) { 1687 fprintf(stderr, "%s: path too long\n", relpath); 1688 continue; 1689 } 1690 sec = arch = title = ""; 1691 src_form = 0; 1692 p = strrchr(buf, '\0'); 1693 while (p-- > buf) { 1694 if ('\0' == *sec && '.' == *p) { 1695 sec = p + 1; 1696 *p = '\0'; 1697 if ('0' == *sec) 1698 src_form |= MANDOC_FORM; 1699 else if ('1' <= *sec && '9' >= *sec) 1700 src_form |= MANDOC_SRC; 1701 continue; 1702 } 1703 if ('/' != *p) 1704 continue; 1705 if ('\0' == *title) { 1706 title = p + 1; 1707 *p = '\0'; 1708 continue; 1709 } 1710 if (0 == strncmp("man", p + 1, 3)) 1711 src_form |= MANDOC_SRC; 1712 else if (0 == strncmp("cat", p + 1, 3)) 1713 src_form |= MANDOC_FORM; 1714 else 1715 arch = p + 1; 1716 break; 1717 } 1718 if ('\0' == *title) { 1719 if (warnings) 1720 fprintf(stderr, 1721 "%s: cannot deduce title " 1722 "from filename\n", 1723 relpath); 1724 title = buf; 1725 } 1726 1727 /* 1728 * Build the file structure. 1729 */ 1730 1731 nof = mandoc_calloc(1, sizeof(struct of)); 1732 nof->fname = mandoc_strdup(relpath); 1733 nof->sec = mandoc_strdup(sec); 1734 nof->arch = mandoc_strdup(arch); 1735 nof->title = mandoc_strdup(title); 1736 nof->src_form = src_form; 1737 1738 /* 1739 * Add the structure to the list. 1740 */ 1741 1742 if (NULL == *of) { 1743 *of = nof; 1744 (*of)->first = nof; 1745 } else { 1746 nof->first = (*of)->first; 1747 (*of)->next = nof; 1748 *of = nof; 1749 } 1750 } 1751 } 1752 1753 /* 1754 * Recursively build up a list of files to parse. 1755 * We use this instead of ftw() and so on because I don't want global 1756 * variables hanging around. 1757 * This ignores the mandoc.db and mandoc.index files, but assumes that 1758 * everything else is a manual. 1759 * Pass in a pointer to a NULL structure for the first invocation. 1760 */ 1761 static void 1762 ofile_dirbuild(const char *dir, const char* psec, const char *parch, 1763 int p_src_form, struct of **of) 1764 { 1765 char buf[PATH_MAX]; 1766 #if defined(__sun) 1767 struct stat sb; 1768 #endif 1769 size_t sz; 1770 DIR *d; 1771 const char *fn, *sec, *arch; 1772 char *p, *q, *suffix; 1773 struct of *nof; 1774 struct dirent *dp; 1775 int src_form; 1776 1777 if (NULL == (d = opendir(dir))) { 1778 if (warnings) 1779 perror(dir); 1780 return; 1781 } 1782 1783 while (NULL != (dp = readdir(d))) { 1784 fn = dp->d_name; 1785 1786 if ('.' == *fn) 1787 continue; 1788 1789 src_form = p_src_form; 1790 1791 #if defined(__sun) 1792 stat(dp->d_name, &sb); 1793 if (S_IFDIR & sb.st_mode) { 1794 #else 1795 if (DT_DIR == dp->d_type) { 1796 #endif 1797 sec = psec; 1798 arch = parch; 1799 1800 /* 1801 * By default, only use directories called: 1802 * man<section>/[<arch>/] or 1803 * cat<section>/[<arch>/] 1804 */ 1805 1806 if ('\0' == *sec) { 1807 if(0 == strncmp("man", fn, 3)) { 1808 src_form |= MANDOC_SRC; 1809 sec = fn + 3; 1810 } else if (0 == strncmp("cat", fn, 3)) { 1811 src_form |= MANDOC_FORM; 1812 sec = fn + 3; 1813 } else { 1814 if (warnings) fprintf(stderr, 1815 "%s/%s: bad section\n", 1816 dir, fn); 1817 if (use_all) 1818 sec = fn; 1819 else 1820 continue; 1821 } 1822 } else if ('\0' == *arch) { 1823 if (NULL != strchr(fn, '.')) { 1824 if (warnings) fprintf(stderr, 1825 "%s/%s: bad architecture\n", 1826 dir, fn); 1827 if (0 == use_all) 1828 continue; 1829 } 1830 arch = fn; 1831 } else { 1832 if (warnings) fprintf(stderr, "%s/%s: " 1833 "excessive subdirectory\n", dir, fn); 1834 if (0 == use_all) 1835 continue; 1836 } 1837 1838 buf[0] = '\0'; 1839 strlcat(buf, dir, PATH_MAX); 1840 strlcat(buf, "/", PATH_MAX); 1841 sz = strlcat(buf, fn, PATH_MAX); 1842 1843 if (PATH_MAX <= sz) { 1844 if (warnings) fprintf(stderr, "%s/%s: " 1845 "path too long\n", dir, fn); 1846 continue; 1847 } 1848 1849 ofile_dirbuild(buf, sec, arch, src_form, of); 1850 continue; 1851 } 1852 1853 #if defined(__sun) 1854 if (0 == S_IFREG & sb.st_mode) { 1855 #else 1856 if (DT_REG != dp->d_type) { 1857 #endif 1858 if (warnings) 1859 fprintf(stderr, 1860 "%s/%s: not a regular file\n", 1861 dir, fn); 1862 continue; 1863 } 1864 if (!strcmp(MANDOC_DB, fn) || !strcmp(MANDOC_IDX, fn)) 1865 continue; 1866 if ('\0' == *psec) { 1867 if (warnings) 1868 fprintf(stderr, 1869 "%s/%s: file outside section\n", 1870 dir, fn); 1871 if (0 == use_all) 1872 continue; 1873 } 1874 1875 /* 1876 * By default, skip files where the file name suffix 1877 * does not agree with the section directory 1878 * they are located in. 1879 */ 1880 1881 suffix = strrchr(fn, '.'); 1882 if (NULL == suffix) { 1883 if (warnings) 1884 fprintf(stderr, 1885 "%s/%s: no filename suffix\n", 1886 dir, fn); 1887 if (0 == use_all) 1888 continue; 1889 } else if ((MANDOC_SRC & src_form && 1890 strcmp(suffix + 1, psec)) || 1891 (MANDOC_FORM & src_form && 1892 strcmp(suffix + 1, "0"))) { 1893 if (warnings) 1894 fprintf(stderr, 1895 "%s/%s: wrong filename suffix\n", 1896 dir, fn); 1897 if (0 == use_all) 1898 continue; 1899 if ('0' == suffix[1]) 1900 src_form |= MANDOC_FORM; 1901 else if ('1' <= suffix[1] && '9' >= suffix[1]) 1902 src_form |= MANDOC_SRC; 1903 } 1904 1905 /* 1906 * Skip formatted manuals if a source version is 1907 * available. Ignore the age: it is very unlikely 1908 * that people install newer formatted base manuals 1909 * when they used to have source manuals before, 1910 * and in ports, old manuals get removed on update. 1911 */ 1912 if (0 == use_all && MANDOC_FORM & src_form && 1913 '\0' != *psec) { 1914 buf[0] = '\0'; 1915 strlcat(buf, dir, PATH_MAX); 1916 p = strrchr(buf, '/'); 1917 if ('\0' != *parch && NULL != p) 1918 for (p--; p > buf; p--) 1919 if ('/' == *p) 1920 break; 1921 if (NULL == p) 1922 p = buf; 1923 else 1924 p++; 1925 if (0 == strncmp("cat", p, 3)) 1926 memcpy(p, "man", 3); 1927 strlcat(buf, "/", PATH_MAX); 1928 sz = strlcat(buf, fn, PATH_MAX); 1929 if (sz >= PATH_MAX) { 1930 if (warnings) fprintf(stderr, 1931 "%s/%s: path too long\n", 1932 dir, fn); 1933 continue; 1934 } 1935 q = strrchr(buf, '.'); 1936 if (NULL != q && p < q++) { 1937 *q = '\0'; 1938 sz = strlcat(buf, psec, PATH_MAX); 1939 if (sz >= PATH_MAX) { 1940 if (warnings) fprintf(stderr, 1941 "%s/%s: path too long\n", 1942 dir, fn); 1943 continue; 1944 } 1945 if (0 == access(buf, R_OK)) 1946 continue; 1947 } 1948 } 1949 1950 buf[0] = '\0'; 1951 assert('.' == dir[0]); 1952 if ('/' == dir[1]) { 1953 strlcat(buf, dir + 2, PATH_MAX); 1954 strlcat(buf, "/", PATH_MAX); 1955 } 1956 sz = strlcat(buf, fn, PATH_MAX); 1957 if (sz >= PATH_MAX) { 1958 if (warnings) fprintf(stderr, 1959 "%s/%s: path too long\n", dir, fn); 1960 continue; 1961 } 1962 1963 nof = mandoc_calloc(1, sizeof(struct of)); 1964 nof->fname = mandoc_strdup(buf); 1965 nof->sec = mandoc_strdup(psec); 1966 nof->arch = mandoc_strdup(parch); 1967 nof->src_form = src_form; 1968 1969 /* 1970 * Remember the file name without the extension, 1971 * to be used as the page title in the database. 1972 */ 1973 1974 if (NULL != suffix) 1975 *suffix = '\0'; 1976 nof->title = mandoc_strdup(fn); 1977 1978 /* 1979 * Add the structure to the list. 1980 */ 1981 1982 if (NULL == *of) { 1983 *of = nof; 1984 (*of)->first = nof; 1985 } else { 1986 nof->first = (*of)->first; 1987 (*of)->next = nof; 1988 *of = nof; 1989 } 1990 } 1991 1992 closedir(d); 1993 } 1994 1995 static void 1996 ofile_free(struct of *of) 1997 { 1998 struct of *nof; 1999 2000 if (NULL != of) 2001 of = of->first; 2002 2003 while (NULL != of) { 2004 nof = of->next; 2005 free(of->fname); 2006 free(of->sec); 2007 free(of->arch); 2008 free(of->title); 2009 free(of); 2010 of = nof; 2011 } 2012 } 2013