1 /* $Id: mandocdb.c,v 1.46 2012/03/23 06:52:17 kristaps Exp $ */ 2 /* 3 * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #ifdef HAVE_CONFIG_H 19 #include "config.h" 20 #endif 21 22 #include <sys/param.h> 23 #include <sys/types.h> 24 25 #include <assert.h> 26 #include <ctype.h> 27 #include <dirent.h> 28 #include <errno.h> 29 #include <fcntl.h> 30 #include <getopt.h> 31 #include <stdio.h> 32 #include <stdint.h> 33 #include <stdlib.h> 34 #include <string.h> 35 #include <unistd.h> 36 37 #if defined(__linux__) 38 # include <endian.h> 39 # include <db_185.h> 40 #elif defined(__APPLE__) 41 # include <libkern/OSByteOrder.h> 42 # include <db.h> 43 #else 44 # include <db.h> 45 #endif 46 47 #include "man.h" 48 #include "mdoc.h" 49 #include "mandoc.h" 50 #include "mandocdb.h" 51 #include "manpath.h" 52 53 #define MANDOC_BUFSZ BUFSIZ 54 #define MANDOC_SLOP 1024 55 56 #define MANDOC_SRC 0x1 57 #define MANDOC_FORM 0x2 58 59 #define WARNING(_f, _b, _fmt, _args...) \ 60 do if (warnings) { \ 61 fprintf(stderr, "%s: ", (_b)); \ 62 fprintf(stderr, (_fmt), ##_args); \ 63 if ('\0' != *(_f)) \ 64 fprintf(stderr, ": %s", (_f)); \ 65 fprintf(stderr, "\n"); \ 66 } while (/* CONSTCOND */ 0) 67 68 /* Access to the mandoc database on disk. */ 69 70 struct mdb { 71 char idxn[MAXPATHLEN]; /* index db filename */ 72 char dbn[MAXPATHLEN]; /* keyword db filename */ 73 DB *idx; /* index recno database */ 74 DB *db; /* keyword btree database */ 75 }; 76 77 /* Stack of temporarily unused index records. */ 78 79 struct recs { 80 recno_t *stack; /* pointer to a malloc'ed array */ 81 size_t size; /* number of allocated slots */ 82 size_t cur; /* current number of empty records */ 83 recno_t last; /* last record number in the index */ 84 }; 85 86 /* Tiny list for files. No need to bring in QUEUE. */ 87 88 struct of { 89 char *fname; /* heap-allocated */ 90 char *sec; 91 char *arch; 92 char *title; 93 int src_form; 94 struct of *next; /* NULL for last one */ 95 struct of *first; /* first in list */ 96 }; 97 98 /* Buffer for storing growable data. */ 99 100 struct buf { 101 char *cp; 102 size_t len; /* current length */ 103 size_t size; /* total buffer size */ 104 }; 105 106 /* Operation we're going to perform. */ 107 108 enum op { 109 OP_DEFAULT = 0, /* new dbs from dir list or default config */ 110 OP_CONFFILE, /* new databases from custom config file */ 111 OP_UPDATE, /* delete/add entries in existing database */ 112 OP_DELETE, /* delete entries from existing database */ 113 OP_TEST /* change no databases, report potential problems */ 114 }; 115 116 #define MAN_ARGS DB *hash, \ 117 struct buf *buf, \ 118 struct buf *dbuf, \ 119 const struct man_node *n 120 #define MDOC_ARGS DB *hash, \ 121 struct buf *buf, \ 122 struct buf *dbuf, \ 123 const struct mdoc_node *n, \ 124 const struct mdoc_meta *m 125 126 static void buf_appendmdoc(struct buf *, 127 const struct mdoc_node *, int); 128 static void buf_append(struct buf *, const char *); 129 static void buf_appendb(struct buf *, 130 const void *, size_t); 131 static void dbt_put(DB *, const char *, DBT *, DBT *); 132 static void hash_put(DB *, const struct buf *, uint64_t); 133 static void hash_reset(DB **); 134 static void index_merge(const struct of *, struct mparse *, 135 struct buf *, struct buf *, DB *, 136 struct mdb *, struct recs *, 137 const char *); 138 static void index_prune(const struct of *, struct mdb *, 139 struct recs *, const char *); 140 static void ofile_argbuild(int, char *[], 141 struct of **, const char *); 142 static void ofile_dirbuild(const char *, const char *, 143 const char *, int, struct of **, char *); 144 static void ofile_free(struct of *); 145 static void pformatted(DB *, struct buf *, struct buf *, 146 const struct of *, const char *); 147 static int pman_node(MAN_ARGS); 148 static void pmdoc_node(MDOC_ARGS); 149 static int pmdoc_head(MDOC_ARGS); 150 static int pmdoc_body(MDOC_ARGS); 151 static int pmdoc_Fd(MDOC_ARGS); 152 static int pmdoc_In(MDOC_ARGS); 153 static int pmdoc_Fn(MDOC_ARGS); 154 static int pmdoc_Nd(MDOC_ARGS); 155 static int pmdoc_Nm(MDOC_ARGS); 156 static int pmdoc_Sh(MDOC_ARGS); 157 static int pmdoc_St(MDOC_ARGS); 158 static int pmdoc_Xr(MDOC_ARGS); 159 160 #define MDOCF_CHILD 0x01 /* Automatically index child nodes. */ 161 162 struct mdoc_handler { 163 int (*fp)(MDOC_ARGS); /* Optional handler. */ 164 uint64_t mask; /* Set unless handler returns 0. */ 165 int flags; /* For use by pmdoc_node. */ 166 }; 167 168 static const struct mdoc_handler mdocs[MDOC_MAX] = { 169 { NULL, 0, 0 }, /* Ap */ 170 { NULL, 0, 0 }, /* Dd */ 171 { NULL, 0, 0 }, /* Dt */ 172 { NULL, 0, 0 }, /* Os */ 173 { pmdoc_Sh, TYPE_Sh, MDOCF_CHILD }, /* Sh */ 174 { pmdoc_head, TYPE_Ss, MDOCF_CHILD }, /* Ss */ 175 { NULL, 0, 0 }, /* Pp */ 176 { NULL, 0, 0 }, /* D1 */ 177 { NULL, 0, 0 }, /* Dl */ 178 { NULL, 0, 0 }, /* Bd */ 179 { NULL, 0, 0 }, /* Ed */ 180 { NULL, 0, 0 }, /* Bl */ 181 { NULL, 0, 0 }, /* El */ 182 { NULL, 0, 0 }, /* It */ 183 { NULL, 0, 0 }, /* Ad */ 184 { NULL, TYPE_An, MDOCF_CHILD }, /* An */ 185 { NULL, TYPE_Ar, MDOCF_CHILD }, /* Ar */ 186 { NULL, TYPE_Cd, MDOCF_CHILD }, /* Cd */ 187 { NULL, TYPE_Cm, MDOCF_CHILD }, /* Cm */ 188 { NULL, TYPE_Dv, MDOCF_CHILD }, /* Dv */ 189 { NULL, TYPE_Er, MDOCF_CHILD }, /* Er */ 190 { NULL, TYPE_Ev, MDOCF_CHILD }, /* Ev */ 191 { NULL, 0, 0 }, /* Ex */ 192 { NULL, TYPE_Fa, MDOCF_CHILD }, /* Fa */ 193 { pmdoc_Fd, TYPE_In, 0 }, /* Fd */ 194 { NULL, TYPE_Fl, MDOCF_CHILD }, /* Fl */ 195 { pmdoc_Fn, 0, 0 }, /* Fn */ 196 { NULL, TYPE_Ft, MDOCF_CHILD }, /* Ft */ 197 { NULL, TYPE_Ic, MDOCF_CHILD }, /* Ic */ 198 { pmdoc_In, TYPE_In, 0 }, /* In */ 199 { NULL, TYPE_Li, MDOCF_CHILD }, /* Li */ 200 { pmdoc_Nd, TYPE_Nd, MDOCF_CHILD }, /* Nd */ 201 { pmdoc_Nm, TYPE_Nm, MDOCF_CHILD }, /* Nm */ 202 { NULL, 0, 0 }, /* Op */ 203 { NULL, 0, 0 }, /* Ot */ 204 { NULL, TYPE_Pa, MDOCF_CHILD }, /* Pa */ 205 { NULL, 0, 0 }, /* Rv */ 206 { pmdoc_St, TYPE_St, 0 }, /* St */ 207 { NULL, TYPE_Va, MDOCF_CHILD }, /* Va */ 208 { pmdoc_body, TYPE_Va, MDOCF_CHILD }, /* Vt */ 209 { pmdoc_Xr, TYPE_Xr, 0 }, /* Xr */ 210 { NULL, 0, 0 }, /* %A */ 211 { NULL, 0, 0 }, /* %B */ 212 { NULL, 0, 0 }, /* %D */ 213 { NULL, 0, 0 }, /* %I */ 214 { NULL, 0, 0 }, /* %J */ 215 { NULL, 0, 0 }, /* %N */ 216 { NULL, 0, 0 }, /* %O */ 217 { NULL, 0, 0 }, /* %P */ 218 { NULL, 0, 0 }, /* %R */ 219 { NULL, 0, 0 }, /* %T */ 220 { NULL, 0, 0 }, /* %V */ 221 { NULL, 0, 0 }, /* Ac */ 222 { NULL, 0, 0 }, /* Ao */ 223 { NULL, 0, 0 }, /* Aq */ 224 { NULL, TYPE_At, MDOCF_CHILD }, /* At */ 225 { NULL, 0, 0 }, /* Bc */ 226 { NULL, 0, 0 }, /* Bf */ 227 { NULL, 0, 0 }, /* Bo */ 228 { NULL, 0, 0 }, /* Bq */ 229 { NULL, TYPE_Bsx, MDOCF_CHILD }, /* Bsx */ 230 { NULL, TYPE_Bx, MDOCF_CHILD }, /* Bx */ 231 { NULL, 0, 0 }, /* Db */ 232 { NULL, 0, 0 }, /* Dc */ 233 { NULL, 0, 0 }, /* Do */ 234 { NULL, 0, 0 }, /* Dq */ 235 { NULL, 0, 0 }, /* Ec */ 236 { NULL, 0, 0 }, /* Ef */ 237 { NULL, TYPE_Em, MDOCF_CHILD }, /* Em */ 238 { NULL, 0, 0 }, /* Eo */ 239 { NULL, TYPE_Fx, MDOCF_CHILD }, /* Fx */ 240 { NULL, TYPE_Ms, MDOCF_CHILD }, /* Ms */ 241 { NULL, 0, 0 }, /* No */ 242 { NULL, 0, 0 }, /* Ns */ 243 { NULL, TYPE_Nx, MDOCF_CHILD }, /* Nx */ 244 { NULL, TYPE_Ox, MDOCF_CHILD }, /* Ox */ 245 { NULL, 0, 0 }, /* Pc */ 246 { NULL, 0, 0 }, /* Pf */ 247 { NULL, 0, 0 }, /* Po */ 248 { NULL, 0, 0 }, /* Pq */ 249 { NULL, 0, 0 }, /* Qc */ 250 { NULL, 0, 0 }, /* Ql */ 251 { NULL, 0, 0 }, /* Qo */ 252 { NULL, 0, 0 }, /* Qq */ 253 { NULL, 0, 0 }, /* Re */ 254 { NULL, 0, 0 }, /* Rs */ 255 { NULL, 0, 0 }, /* Sc */ 256 { NULL, 0, 0 }, /* So */ 257 { NULL, 0, 0 }, /* Sq */ 258 { NULL, 0, 0 }, /* Sm */ 259 { NULL, 0, 0 }, /* Sx */ 260 { NULL, TYPE_Sy, MDOCF_CHILD }, /* Sy */ 261 { NULL, TYPE_Tn, MDOCF_CHILD }, /* Tn */ 262 { NULL, 0, 0 }, /* Ux */ 263 { NULL, 0, 0 }, /* Xc */ 264 { NULL, 0, 0 }, /* Xo */ 265 { pmdoc_head, TYPE_Fn, 0 }, /* Fo */ 266 { NULL, 0, 0 }, /* Fc */ 267 { NULL, 0, 0 }, /* Oo */ 268 { NULL, 0, 0 }, /* Oc */ 269 { NULL, 0, 0 }, /* Bk */ 270 { NULL, 0, 0 }, /* Ek */ 271 { NULL, 0, 0 }, /* Bt */ 272 { NULL, 0, 0 }, /* Hf */ 273 { NULL, 0, 0 }, /* Fr */ 274 { NULL, 0, 0 }, /* Ud */ 275 { NULL, TYPE_Lb, MDOCF_CHILD }, /* Lb */ 276 { NULL, 0, 0 }, /* Lp */ 277 { NULL, TYPE_Lk, MDOCF_CHILD }, /* Lk */ 278 { NULL, TYPE_Mt, MDOCF_CHILD }, /* Mt */ 279 { NULL, 0, 0 }, /* Brq */ 280 { NULL, 0, 0 }, /* Bro */ 281 { NULL, 0, 0 }, /* Brc */ 282 { NULL, 0, 0 }, /* %C */ 283 { NULL, 0, 0 }, /* Es */ 284 { NULL, 0, 0 }, /* En */ 285 { NULL, TYPE_Dx, MDOCF_CHILD }, /* Dx */ 286 { NULL, 0, 0 }, /* %Q */ 287 { NULL, 0, 0 }, /* br */ 288 { NULL, 0, 0 }, /* sp */ 289 { NULL, 0, 0 }, /* %U */ 290 { NULL, 0, 0 }, /* Ta */ 291 }; 292 293 static const char *progname; 294 static int use_all; /* Use all directories and files. */ 295 static int verb; /* Output verbosity level. */ 296 static int warnings; /* Potential problems in manuals. */ 297 298 int 299 main(int argc, char *argv[]) 300 { 301 struct mparse *mp; /* parse sequence */ 302 struct manpaths dirs; 303 struct mdb mdb; 304 struct recs recs; 305 enum op op; /* current operation */ 306 const char *dir; 307 int ch, i, flags; 308 char dirbuf[MAXPATHLEN]; 309 DB *hash; /* temporary keyword hashtable */ 310 BTREEINFO info; /* btree configuration */ 311 size_t sz1, sz2; 312 struct buf buf, /* keyword buffer */ 313 dbuf; /* description buffer */ 314 struct of *of; /* list of files for processing */ 315 extern int optind; 316 extern char *optarg; 317 318 progname = strrchr(argv[0], '/'); 319 if (progname == NULL) 320 progname = argv[0]; 321 else 322 ++progname; 323 324 memset(&dirs, 0, sizeof(struct manpaths)); 325 memset(&mdb, 0, sizeof(struct mdb)); 326 memset(&recs, 0, sizeof(struct recs)); 327 328 of = NULL; 329 mp = NULL; 330 hash = NULL; 331 op = OP_DEFAULT; 332 dir = NULL; 333 334 while (-1 != (ch = getopt(argc, argv, "aC:d:tu:vW"))) 335 switch (ch) { 336 case ('a'): 337 use_all = 1; 338 break; 339 case ('C'): 340 if (op) { 341 fprintf(stderr, 342 "-C: conflicting options\n"); 343 goto usage; 344 } 345 dir = optarg; 346 op = OP_CONFFILE; 347 break; 348 case ('d'): 349 if (op) { 350 fprintf(stderr, 351 "-d: conflicting options\n"); 352 goto usage; 353 } 354 dir = optarg; 355 op = OP_UPDATE; 356 break; 357 case ('t'): 358 dup2(STDOUT_FILENO, STDERR_FILENO); 359 if (op) { 360 fprintf(stderr, 361 "-t: conflicting options\n"); 362 goto usage; 363 } 364 op = OP_TEST; 365 use_all = 1; 366 warnings = 1; 367 break; 368 case ('u'): 369 if (op) { 370 fprintf(stderr, 371 "-u: conflicting options\n"); 372 goto usage; 373 } 374 dir = optarg; 375 op = OP_DELETE; 376 break; 377 case ('v'): 378 verb++; 379 break; 380 case ('W'): 381 warnings = 1; 382 break; 383 default: 384 goto usage; 385 } 386 387 argc -= optind; 388 argv += optind; 389 390 if (OP_CONFFILE == op && argc > 0) { 391 fprintf(stderr, "-C: too many arguments\n"); 392 goto usage; 393 } 394 395 memset(&info, 0, sizeof(BTREEINFO)); 396 info.lorder = 4321; 397 info.flags = R_DUP; 398 399 mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL); 400 401 memset(&buf, 0, sizeof(struct buf)); 402 memset(&dbuf, 0, sizeof(struct buf)); 403 404 buf.size = dbuf.size = MANDOC_BUFSZ; 405 406 buf.cp = mandoc_malloc(buf.size); 407 dbuf.cp = mandoc_malloc(dbuf.size); 408 409 if (OP_TEST == op) { 410 ofile_argbuild(argc, argv, &of, "."); 411 if (NULL == of) 412 goto out; 413 index_merge(of, mp, &dbuf, &buf, 414 hash, &mdb, &recs, "."); 415 goto out; 416 } 417 418 if (OP_UPDATE == op || OP_DELETE == op) { 419 strlcat(mdb.dbn, dir, MAXPATHLEN); 420 strlcat(mdb.dbn, "/", MAXPATHLEN); 421 sz1 = strlcat(mdb.dbn, MANDOC_DB, MAXPATHLEN); 422 423 strlcat(mdb.idxn, dir, MAXPATHLEN); 424 strlcat(mdb.idxn, "/", MAXPATHLEN); 425 sz2 = strlcat(mdb.idxn, MANDOC_IDX, MAXPATHLEN); 426 427 if (sz1 >= MAXPATHLEN || sz2 >= MAXPATHLEN) { 428 fprintf(stderr, "%s: path too long\n", dir); 429 exit((int)MANDOCLEVEL_BADARG); 430 } 431 432 flags = O_CREAT | O_RDWR; 433 mdb.db = dbopen(mdb.dbn, flags, 0644, DB_BTREE, &info); 434 mdb.idx = dbopen(mdb.idxn, flags, 0644, DB_RECNO, NULL); 435 436 if (NULL == mdb.db) { 437 perror(mdb.dbn); 438 exit((int)MANDOCLEVEL_SYSERR); 439 } else if (NULL == mdb.idx) { 440 perror(mdb.idxn); 441 exit((int)MANDOCLEVEL_SYSERR); 442 } 443 444 ofile_argbuild(argc, argv, &of, dir); 445 446 if (NULL == of) 447 goto out; 448 449 index_prune(of, &mdb, &recs, dir); 450 451 /* 452 * Go to the root of the respective manual tree. 453 * This must work or no manuals may be found (they're 454 * indexed relative to the root). 455 */ 456 457 if (OP_UPDATE == op) { 458 if (-1 == chdir(dir)) { 459 perror(dir); 460 exit((int)MANDOCLEVEL_SYSERR); 461 } 462 index_merge(of, mp, &dbuf, &buf, hash, 463 &mdb, &recs, dir); 464 } 465 466 goto out; 467 } 468 469 /* 470 * Configure the directories we're going to scan. 471 * If we have command-line arguments, use them. 472 * If not, we use man(1)'s method (see mandocdb.8). 473 */ 474 475 if (argc > 0) { 476 dirs.paths = mandoc_calloc(argc, sizeof(char *)); 477 dirs.sz = argc; 478 for (i = 0; i < argc; i++) 479 dirs.paths[i] = mandoc_strdup(argv[i]); 480 } else 481 manpath_parse(&dirs, dir, NULL, NULL); 482 483 for (i = 0; i < dirs.sz; i++) { 484 /* 485 * Go to the root of the respective manual tree. 486 * This must work or no manuals may be found: 487 * They are indexed relative to the root. 488 */ 489 490 if (-1 == chdir(dirs.paths[i])) { 491 perror(dirs.paths[i]); 492 exit((int)MANDOCLEVEL_SYSERR); 493 } 494 495 strlcpy(mdb.dbn, MANDOC_DB, MAXPATHLEN); 496 strlcpy(mdb.idxn, MANDOC_IDX, MAXPATHLEN); 497 498 flags = O_CREAT | O_TRUNC | O_RDWR; 499 mdb.db = dbopen(mdb.dbn, flags, 0644, DB_BTREE, &info); 500 mdb.idx = dbopen(mdb.idxn, flags, 0644, DB_RECNO, NULL); 501 502 if (NULL == mdb.db) { 503 perror(mdb.dbn); 504 exit((int)MANDOCLEVEL_SYSERR); 505 } else if (NULL == mdb.idx) { 506 perror(mdb.idxn); 507 exit((int)MANDOCLEVEL_SYSERR); 508 } 509 510 /* 511 * Search for manuals and fill the new database. 512 */ 513 514 strlcpy(dirbuf, dirs.paths[i], MAXPATHLEN); 515 ofile_dirbuild(".", "", "", 0, &of, dirbuf); 516 517 if (NULL != of) { 518 index_merge(of, mp, &dbuf, &buf, hash, 519 &mdb, &recs, dirs.paths[i]); 520 ofile_free(of); 521 of = NULL; 522 } 523 524 (*mdb.db->close)(mdb.db); 525 (*mdb.idx->close)(mdb.idx); 526 mdb.db = NULL; 527 mdb.idx = NULL; 528 } 529 530 out: 531 if (mdb.db) 532 (*mdb.db->close)(mdb.db); 533 if (mdb.idx) 534 (*mdb.idx->close)(mdb.idx); 535 if (hash) 536 (*hash->close)(hash); 537 if (mp) 538 mparse_free(mp); 539 540 manpath_free(&dirs); 541 ofile_free(of); 542 free(buf.cp); 543 free(dbuf.cp); 544 free(recs.stack); 545 546 return(MANDOCLEVEL_OK); 547 548 usage: 549 fprintf(stderr, 550 "usage: %s [-av] [-C file] | dir ... | -t file ...\n" 551 " -d dir [file ...] | " 552 "-u dir [file ...]\n", 553 progname); 554 555 return((int)MANDOCLEVEL_BADARG); 556 } 557 558 void 559 index_merge(const struct of *of, struct mparse *mp, 560 struct buf *dbuf, struct buf *buf, DB *hash, 561 struct mdb *mdb, struct recs *recs, 562 const char *basedir) 563 { 564 recno_t rec; 565 int ch, skip; 566 DBT key, val; 567 DB *files; /* temporary file name table */ 568 char emptystring[1] = {'\0'}; 569 struct mdoc *mdoc; 570 struct man *man; 571 char *p; 572 const char *fn, *msec, *march, *mtitle; 573 uint64_t mask; 574 size_t sv; 575 unsigned seq; 576 uint64_t vbuf[2]; 577 char type; 578 579 if (warnings) { 580 files = NULL; 581 hash_reset(&files); 582 } 583 584 rec = 0; 585 for (of = of->first; of; of = of->next) { 586 fn = of->fname; 587 588 /* 589 * Try interpreting the file as mdoc(7) or man(7) 590 * source code, unless it is already known to be 591 * formatted. Fall back to formatted mode. 592 */ 593 594 mparse_reset(mp); 595 mdoc = NULL; 596 man = NULL; 597 598 if ((MANDOC_SRC & of->src_form || 599 ! (MANDOC_FORM & of->src_form)) && 600 MANDOCLEVEL_FATAL > mparse_readfd(mp, -1, fn)) 601 mparse_result(mp, &mdoc, &man); 602 603 if (NULL != mdoc) { 604 msec = mdoc_meta(mdoc)->msec; 605 march = mdoc_meta(mdoc)->arch; 606 if (NULL == march) 607 march = ""; 608 mtitle = mdoc_meta(mdoc)->title; 609 } else if (NULL != man) { 610 msec = man_meta(man)->msec; 611 march = ""; 612 mtitle = man_meta(man)->title; 613 } else { 614 msec = of->sec; 615 march = of->arch; 616 mtitle = of->title; 617 } 618 619 /* 620 * Check whether the manual section given in a file 621 * agrees with the directory where the file is located. 622 * Some manuals have suffixes like (3p) on their 623 * section number either inside the file or in the 624 * directory name, some are linked into more than one 625 * section, like encrypt(1) = makekey(8). Do not skip 626 * manuals for such reasons. 627 */ 628 629 skip = 0; 630 assert(of->sec); 631 assert(msec); 632 if (strcasecmp(msec, of->sec)) 633 WARNING(fn, basedir, "Section \"%s\" manual " 634 "in \"%s\" directory", msec, of->sec); 635 /* 636 * Manual page directories exist for each kernel 637 * architecture as returned by machine(1). 638 * However, many manuals only depend on the 639 * application architecture as returned by arch(1). 640 * For example, some (2/ARM) manuals are shared 641 * across the "armish" and "zaurus" kernel 642 * architectures. 643 * A few manuals are even shared across completely 644 * different architectures, for example fdformat(1) 645 * on amd64, i386, sparc, and sparc64. 646 * Thus, warn about architecture mismatches, 647 * but don't skip manuals for this reason. 648 */ 649 650 assert(of->arch); 651 assert(march); 652 if (strcasecmp(march, of->arch)) 653 WARNING(fn, basedir, "Architecture \"%s\" " 654 "manual in \"%s\" directory", 655 march, of->arch); 656 657 /* 658 * By default, skip a file if the title given 659 * in the file disagrees with the file name. 660 * Do not warn, this happens for all MLINKs. 661 */ 662 663 assert(of->title); 664 assert(mtitle); 665 if (strcasecmp(mtitle, of->title)) 666 skip = 1; 667 668 /* 669 * Build a title string for the file. If it matches 670 * the location of the file, remember the title as 671 * found; else, remember it as missing. 672 */ 673 674 if (warnings) { 675 buf->len = 0; 676 buf_appendb(buf, mtitle, strlen(mtitle)); 677 buf_appendb(buf, "(", 1); 678 buf_appendb(buf, msec, strlen(msec)); 679 if ('\0' != *march) { 680 buf_appendb(buf, "/", 1); 681 buf_appendb(buf, march, strlen(march)); 682 } 683 buf_appendb(buf, ")", 2); 684 for (p = buf->cp; '\0' != *p; p++) 685 *p = tolower(*p); 686 key.data = buf->cp; 687 key.size = buf->len; 688 val.data = NULL; 689 val.size = 0; 690 if (0 == skip) 691 val.data = emptystring; 692 else { 693 ch = (*files->get)(files, &key, &val, 0); 694 if (ch < 0) { 695 perror("hash"); 696 exit((int)MANDOCLEVEL_SYSERR); 697 } else if (ch > 0) { 698 val.data = (void *)fn; 699 val.size = strlen(fn) + 1; 700 } else 701 val.data = NULL; 702 } 703 if (NULL != val.data && 704 (*files->put)(files, &key, &val, 0) < 0) { 705 perror("hash"); 706 exit((int)MANDOCLEVEL_SYSERR); 707 } 708 } 709 710 if (skip && !use_all) 711 continue; 712 713 /* 714 * The index record value consists of a nil-terminated 715 * filename, a nil-terminated manual section, and a 716 * nil-terminated description. Use the actual 717 * location of the file, such that the user can find 718 * it with man(1). Since the description may not be 719 * set, we set a sentinel to see if we're going to 720 * write a nil byte in its place. 721 */ 722 723 dbuf->len = 0; 724 type = mdoc ? 'd' : (man ? 'a' : 'c'); 725 buf_appendb(dbuf, &type, 1); 726 buf_appendb(dbuf, fn, strlen(fn) + 1); 727 buf_appendb(dbuf, of->sec, strlen(of->sec) + 1); 728 buf_appendb(dbuf, of->title, strlen(of->title) + 1); 729 buf_appendb(dbuf, of->arch, strlen(of->arch) + 1); 730 731 sv = dbuf->len; 732 733 /* 734 * Collect keyword/mask pairs. 735 * Each pair will become a new btree node. 736 */ 737 738 hash_reset(&hash); 739 if (mdoc) 740 pmdoc_node(hash, buf, dbuf, 741 mdoc_node(mdoc), mdoc_meta(mdoc)); 742 else if (man) 743 pman_node(hash, buf, dbuf, man_node(man)); 744 else 745 pformatted(hash, buf, dbuf, of, basedir); 746 747 /* Test mode, do not access any database. */ 748 749 if (NULL == mdb->db || NULL == mdb->idx) 750 continue; 751 752 /* 753 * Make sure the file name is always registered 754 * as an .Nm search key. 755 */ 756 buf->len = 0; 757 buf_append(buf, of->title); 758 hash_put(hash, buf, TYPE_Nm); 759 760 /* 761 * Reclaim an empty index record, if available. 762 * Use its record number for all new btree nodes. 763 */ 764 765 if (recs->cur > 0) { 766 recs->cur--; 767 rec = recs->stack[(int)recs->cur]; 768 } else if (recs->last > 0) { 769 rec = recs->last; 770 recs->last = 0; 771 } else 772 rec++; 773 vbuf[1] = htobe64(rec); 774 775 /* 776 * Copy from the in-memory hashtable of pending 777 * keyword/mask pairs into the database. 778 */ 779 780 seq = R_FIRST; 781 while (0 == (ch = (*hash->seq)(hash, &key, &val, seq))) { 782 seq = R_NEXT; 783 assert(sizeof(uint64_t) == val.size); 784 memcpy(&mask, val.data, val.size); 785 vbuf[0] = htobe64(mask); 786 val.size = sizeof(vbuf); 787 val.data = &vbuf; 788 dbt_put(mdb->db, mdb->dbn, &key, &val); 789 } 790 if (ch < 0) { 791 perror("hash"); 792 exit((int)MANDOCLEVEL_SYSERR); 793 } 794 795 /* 796 * Apply to the index. If we haven't had a description 797 * set, put an empty one in now. 798 */ 799 800 if (dbuf->len == sv) 801 buf_appendb(dbuf, "", 1); 802 803 key.data = &rec; 804 key.size = sizeof(recno_t); 805 806 val.data = dbuf->cp; 807 val.size = dbuf->len; 808 809 if (verb) 810 printf("%s: Adding to index: %s\n", basedir, fn); 811 812 dbt_put(mdb->idx, mdb->idxn, &key, &val); 813 } 814 815 /* 816 * Iterate the remembered file titles and check that 817 * all files can be found by their main title. 818 */ 819 820 if (warnings) { 821 seq = R_FIRST; 822 while (0 == (*files->seq)(files, &key, &val, seq)) { 823 seq = R_NEXT; 824 if (val.size) 825 WARNING((char *)val.data, basedir, 826 "Probably unreachable, title " 827 "is %s", (char *)key.data); 828 } 829 (*files->close)(files); 830 } 831 } 832 833 /* 834 * Scan through all entries in the index file `idx' and prune those 835 * entries in `ofile'. 836 * Pruning consists of removing from `db', then invalidating the entry 837 * in `idx' (zeroing its value size). 838 */ 839 static void 840 index_prune(const struct of *ofile, struct mdb *mdb, 841 struct recs *recs, const char *basedir) 842 { 843 const struct of *of; 844 const char *fn; 845 uint64_t vbuf[2]; 846 unsigned seq, sseq; 847 DBT key, val; 848 int ch; 849 850 recs->cur = 0; 851 seq = R_FIRST; 852 while (0 == (ch = (*mdb->idx->seq)(mdb->idx, &key, &val, seq))) { 853 seq = R_NEXT; 854 assert(sizeof(recno_t) == key.size); 855 memcpy(&recs->last, key.data, key.size); 856 857 /* Deleted records are zero-sized. Skip them. */ 858 859 if (0 == val.size) 860 goto cont; 861 862 /* 863 * Make sure we're sane. 864 * Read past our mdoc/man/cat type to the next string, 865 * then make sure it's bounded by a NUL. 866 * Failing any of these, we go into our error handler. 867 */ 868 869 fn = (char *)val.data + 1; 870 if (NULL == memchr(fn, '\0', val.size - 1)) 871 break; 872 873 /* 874 * Search for the file in those we care about. 875 * XXX: build this into a tree. Too slow. 876 */ 877 878 for (of = ofile->first; of; of = of->next) 879 if (0 == strcmp(fn, of->fname)) 880 break; 881 882 if (NULL == of) 883 continue; 884 885 /* 886 * Search through the keyword database, throwing out all 887 * references to our file. 888 */ 889 890 sseq = R_FIRST; 891 while (0 == (ch = (*mdb->db->seq)(mdb->db, 892 &key, &val, sseq))) { 893 sseq = R_NEXT; 894 if (sizeof(vbuf) != val.size) 895 break; 896 897 memcpy(vbuf, val.data, val.size); 898 if (recs->last != betoh64(vbuf[1])) 899 continue; 900 901 if ((ch = (*mdb->db->del)(mdb->db, 902 &key, R_CURSOR)) < 0) 903 break; 904 } 905 906 if (ch < 0) { 907 perror(mdb->dbn); 908 exit((int)MANDOCLEVEL_SYSERR); 909 } else if (1 != ch) { 910 fprintf(stderr, "%s: corrupt database\n", 911 mdb->dbn); 912 exit((int)MANDOCLEVEL_SYSERR); 913 } 914 915 if (verb) 916 printf("%s: Deleting from index: %s\n", 917 basedir, fn); 918 919 val.size = 0; 920 ch = (*mdb->idx->put)(mdb->idx, &key, &val, R_CURSOR); 921 922 if (ch < 0) 923 break; 924 cont: 925 if (recs->cur >= recs->size) { 926 recs->size += MANDOC_SLOP; 927 recs->stack = mandoc_realloc(recs->stack, 928 recs->size * sizeof(recno_t)); 929 } 930 931 recs->stack[(int)recs->cur] = recs->last; 932 recs->cur++; 933 } 934 935 if (ch < 0) { 936 perror(mdb->idxn); 937 exit((int)MANDOCLEVEL_SYSERR); 938 } else if (1 != ch) { 939 fprintf(stderr, "%s: corrupt index\n", mdb->idxn); 940 exit((int)MANDOCLEVEL_SYSERR); 941 } 942 943 recs->last++; 944 } 945 946 /* 947 * Grow the buffer (if necessary) and copy in a binary string. 948 */ 949 static void 950 buf_appendb(struct buf *buf, const void *cp, size_t sz) 951 { 952 953 /* Overshoot by MANDOC_BUFSZ. */ 954 955 while (buf->len + sz >= buf->size) { 956 buf->size = buf->len + sz + MANDOC_BUFSZ; 957 buf->cp = mandoc_realloc(buf->cp, buf->size); 958 } 959 960 memcpy(buf->cp + (int)buf->len, cp, sz); 961 buf->len += sz; 962 } 963 964 /* 965 * Append a nil-terminated string to the buffer. 966 * This can be invoked multiple times. 967 * The buffer string will be nil-terminated. 968 * If invoked multiple times, a space is put between strings. 969 */ 970 static void 971 buf_append(struct buf *buf, const char *cp) 972 { 973 size_t sz; 974 975 if (0 == (sz = strlen(cp))) 976 return; 977 978 if (buf->len) 979 buf->cp[(int)buf->len - 1] = ' '; 980 981 buf_appendb(buf, cp, sz + 1); 982 } 983 984 /* 985 * Recursively add all text from a given node. 986 * This is optimised for general mdoc nodes in this context, which do 987 * not consist of subexpressions and having a recursive call for n->next 988 * would be wasteful. 989 * The "f" variable should be 0 unless called from pmdoc_Nd for the 990 * description buffer, which does not start at the beginning of the 991 * buffer. 992 */ 993 static void 994 buf_appendmdoc(struct buf *buf, const struct mdoc_node *n, int f) 995 { 996 997 for ( ; n; n = n->next) { 998 if (n->child) 999 buf_appendmdoc(buf, n->child, f); 1000 1001 if (MDOC_TEXT == n->type && f) { 1002 f = 0; 1003 buf_appendb(buf, n->string, 1004 strlen(n->string) + 1); 1005 } else if (MDOC_TEXT == n->type) 1006 buf_append(buf, n->string); 1007 1008 } 1009 } 1010 1011 static void 1012 hash_reset(DB **db) 1013 { 1014 DB *hash; 1015 1016 if (NULL != (hash = *db)) 1017 (*hash->close)(hash); 1018 1019 *db = dbopen(NULL, O_CREAT|O_RDWR, 0644, DB_HASH, NULL); 1020 if (NULL == *db) { 1021 perror("hash"); 1022 exit((int)MANDOCLEVEL_SYSERR); 1023 } 1024 } 1025 1026 /* ARGSUSED */ 1027 static int 1028 pmdoc_head(MDOC_ARGS) 1029 { 1030 1031 return(MDOC_HEAD == n->type); 1032 } 1033 1034 /* ARGSUSED */ 1035 static int 1036 pmdoc_body(MDOC_ARGS) 1037 { 1038 1039 return(MDOC_BODY == n->type); 1040 } 1041 1042 /* ARGSUSED */ 1043 static int 1044 pmdoc_Fd(MDOC_ARGS) 1045 { 1046 const char *start, *end; 1047 size_t sz; 1048 1049 if (SEC_SYNOPSIS != n->sec) 1050 return(0); 1051 if (NULL == (n = n->child) || MDOC_TEXT != n->type) 1052 return(0); 1053 1054 /* 1055 * Only consider those `Fd' macro fields that begin with an 1056 * "inclusion" token (versus, e.g., #define). 1057 */ 1058 if (strcmp("#include", n->string)) 1059 return(0); 1060 1061 if (NULL == (n = n->next) || MDOC_TEXT != n->type) 1062 return(0); 1063 1064 /* 1065 * Strip away the enclosing angle brackets and make sure we're 1066 * not zero-length. 1067 */ 1068 1069 start = n->string; 1070 if ('<' == *start || '"' == *start) 1071 start++; 1072 1073 if (0 == (sz = strlen(start))) 1074 return(0); 1075 1076 end = &start[(int)sz - 1]; 1077 if ('>' == *end || '"' == *end) 1078 end--; 1079 1080 assert(end >= start); 1081 1082 buf_appendb(buf, start, (size_t)(end - start + 1)); 1083 buf_appendb(buf, "", 1); 1084 return(1); 1085 } 1086 1087 /* ARGSUSED */ 1088 static int 1089 pmdoc_In(MDOC_ARGS) 1090 { 1091 1092 if (NULL == n->child || MDOC_TEXT != n->child->type) 1093 return(0); 1094 1095 buf_append(buf, n->child->string); 1096 return(1); 1097 } 1098 1099 /* ARGSUSED */ 1100 static int 1101 pmdoc_Fn(MDOC_ARGS) 1102 { 1103 struct mdoc_node *nn; 1104 const char *cp; 1105 1106 nn = n->child; 1107 1108 if (NULL == nn || MDOC_TEXT != nn->type) 1109 return(0); 1110 1111 /* .Fn "struct type *name" "char *arg" */ 1112 1113 cp = strrchr(nn->string, ' '); 1114 if (NULL == cp) 1115 cp = nn->string; 1116 1117 /* Strip away pointer symbol. */ 1118 1119 while ('*' == *cp) 1120 cp++; 1121 1122 /* Store the function name. */ 1123 1124 buf_append(buf, cp); 1125 hash_put(hash, buf, TYPE_Fn); 1126 1127 /* Store the function type. */ 1128 1129 if (nn->string < cp) { 1130 buf->len = 0; 1131 buf_appendb(buf, nn->string, cp - nn->string); 1132 buf_appendb(buf, "", 1); 1133 hash_put(hash, buf, TYPE_Ft); 1134 } 1135 1136 /* Store the arguments. */ 1137 1138 for (nn = nn->next; nn; nn = nn->next) { 1139 if (MDOC_TEXT != nn->type) 1140 continue; 1141 buf->len = 0; 1142 buf_append(buf, nn->string); 1143 hash_put(hash, buf, TYPE_Fa); 1144 } 1145 1146 return(0); 1147 } 1148 1149 /* ARGSUSED */ 1150 static int 1151 pmdoc_St(MDOC_ARGS) 1152 { 1153 1154 if (NULL == n->child || MDOC_TEXT != n->child->type) 1155 return(0); 1156 1157 buf_append(buf, n->child->string); 1158 return(1); 1159 } 1160 1161 /* ARGSUSED */ 1162 static int 1163 pmdoc_Xr(MDOC_ARGS) 1164 { 1165 1166 if (NULL == (n = n->child)) 1167 return(0); 1168 1169 buf_appendb(buf, n->string, strlen(n->string)); 1170 1171 if (NULL != (n = n->next)) { 1172 buf_appendb(buf, ".", 1); 1173 buf_appendb(buf, n->string, strlen(n->string) + 1); 1174 } else 1175 buf_appendb(buf, ".", 2); 1176 1177 return(1); 1178 } 1179 1180 /* ARGSUSED */ 1181 static int 1182 pmdoc_Nd(MDOC_ARGS) 1183 { 1184 1185 if (MDOC_BODY != n->type) 1186 return(0); 1187 1188 buf_appendmdoc(dbuf, n->child, 1); 1189 return(1); 1190 } 1191 1192 /* ARGSUSED */ 1193 static int 1194 pmdoc_Nm(MDOC_ARGS) 1195 { 1196 1197 if (SEC_NAME == n->sec) 1198 return(1); 1199 else if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type) 1200 return(0); 1201 1202 if (NULL == n->child) 1203 buf_append(buf, m->name); 1204 1205 return(1); 1206 } 1207 1208 /* ARGSUSED */ 1209 static int 1210 pmdoc_Sh(MDOC_ARGS) 1211 { 1212 1213 return(SEC_CUSTOM == n->sec && MDOC_HEAD == n->type); 1214 } 1215 1216 static void 1217 hash_put(DB *db, const struct buf *buf, uint64_t mask) 1218 { 1219 uint64_t oldmask; 1220 DBT key, val; 1221 int rc; 1222 1223 if (buf->len < 2) 1224 return; 1225 1226 key.data = buf->cp; 1227 key.size = buf->len; 1228 1229 if ((rc = (*db->get)(db, &key, &val, 0)) < 0) { 1230 perror("hash"); 1231 exit((int)MANDOCLEVEL_SYSERR); 1232 } else if (0 == rc) { 1233 assert(sizeof(uint64_t) == val.size); 1234 memcpy(&oldmask, val.data, val.size); 1235 mask |= oldmask; 1236 } 1237 1238 val.data = &mask; 1239 val.size = sizeof(uint64_t); 1240 1241 if ((rc = (*db->put)(db, &key, &val, 0)) < 0) { 1242 perror("hash"); 1243 exit((int)MANDOCLEVEL_SYSERR); 1244 } 1245 } 1246 1247 static void 1248 dbt_put(DB *db, const char *dbn, DBT *key, DBT *val) 1249 { 1250 1251 assert(key->size); 1252 assert(val->size); 1253 1254 if (0 == (*db->put)(db, key, val, 0)) 1255 return; 1256 1257 perror(dbn); 1258 exit((int)MANDOCLEVEL_SYSERR); 1259 /* NOTREACHED */ 1260 } 1261 1262 /* 1263 * Call out to per-macro handlers after clearing the persistent database 1264 * key. If the macro sets the database key, flush it to the database. 1265 */ 1266 static void 1267 pmdoc_node(MDOC_ARGS) 1268 { 1269 1270 if (NULL == n) 1271 return; 1272 1273 switch (n->type) { 1274 case (MDOC_HEAD): 1275 /* FALLTHROUGH */ 1276 case (MDOC_BODY): 1277 /* FALLTHROUGH */ 1278 case (MDOC_TAIL): 1279 /* FALLTHROUGH */ 1280 case (MDOC_BLOCK): 1281 /* FALLTHROUGH */ 1282 case (MDOC_ELEM): 1283 buf->len = 0; 1284 1285 /* 1286 * Both NULL handlers and handlers returning true 1287 * request using the data. Only skip the element 1288 * when the handler returns false. 1289 */ 1290 1291 if (NULL != mdocs[n->tok].fp && 1292 0 == (*mdocs[n->tok].fp)(hash, buf, dbuf, n, m)) 1293 break; 1294 1295 /* 1296 * For many macros, use the text from all children. 1297 * Set zero flags for macros not needing this. 1298 * In that case, the handler must fill the buffer. 1299 */ 1300 1301 if (MDOCF_CHILD & mdocs[n->tok].flags) 1302 buf_appendmdoc(buf, n->child, 0); 1303 1304 /* 1305 * Cover the most common case: 1306 * Automatically stage one string per element. 1307 * Set a zero mask for macros not needing this. 1308 * Additional staging can be done in the handler. 1309 */ 1310 1311 if (mdocs[n->tok].mask) 1312 hash_put(hash, buf, mdocs[n->tok].mask); 1313 break; 1314 default: 1315 break; 1316 } 1317 1318 pmdoc_node(hash, buf, dbuf, n->child, m); 1319 pmdoc_node(hash, buf, dbuf, n->next, m); 1320 } 1321 1322 static int 1323 pman_node(MAN_ARGS) 1324 { 1325 const struct man_node *head, *body; 1326 char *start, *sv, *title; 1327 size_t sz, titlesz; 1328 1329 if (NULL == n) 1330 return(0); 1331 1332 /* 1333 * We're only searching for one thing: the first text child in 1334 * the BODY of a NAME section. Since we don't keep track of 1335 * sections in -man, run some hoops to find out whether we're in 1336 * the correct section or not. 1337 */ 1338 1339 if (MAN_BODY == n->type && MAN_SH == n->tok) { 1340 body = n; 1341 assert(body->parent); 1342 if (NULL != (head = body->parent->head) && 1343 1 == head->nchild && 1344 NULL != (head = (head->child)) && 1345 MAN_TEXT == head->type && 1346 0 == strcmp(head->string, "NAME") && 1347 NULL != (body = body->child) && 1348 MAN_TEXT == body->type) { 1349 1350 title = NULL; 1351 titlesz = 0; 1352 /* 1353 * Suck the entire NAME section into memory. 1354 * Yes, we might run away. 1355 * But too many manuals have big, spread-out 1356 * NAME sections over many lines. 1357 */ 1358 for ( ; NULL != body; body = body->next) { 1359 if (MAN_TEXT != body->type) 1360 break; 1361 if (0 == (sz = strlen(body->string))) 1362 continue; 1363 title = mandoc_realloc 1364 (title, titlesz + sz + 1); 1365 memcpy(title + titlesz, body->string, sz); 1366 titlesz += sz + 1; 1367 title[(int)titlesz - 1] = ' '; 1368 } 1369 if (NULL == title) 1370 return(0); 1371 1372 title = mandoc_realloc(title, titlesz + 1); 1373 title[(int)titlesz] = '\0'; 1374 1375 /* Skip leading space. */ 1376 1377 sv = title; 1378 while (isspace((unsigned char)*sv)) 1379 sv++; 1380 1381 if (0 == (sz = strlen(sv))) { 1382 free(title); 1383 return(0); 1384 } 1385 1386 /* Erase trailing space. */ 1387 1388 start = &sv[sz - 1]; 1389 while (start > sv && isspace((unsigned char)*start)) 1390 *start-- = '\0'; 1391 1392 if (start == sv) { 1393 free(title); 1394 return(0); 1395 } 1396 1397 start = sv; 1398 1399 /* 1400 * Go through a special heuristic dance here. 1401 * This is why -man manuals are great! 1402 * (I'm being sarcastic: my eyes are bleeding.) 1403 * Conventionally, one or more manual names are 1404 * comma-specified prior to a whitespace, then a 1405 * dash, then a description. Try to puzzle out 1406 * the name parts here. 1407 */ 1408 1409 for ( ;; ) { 1410 sz = strcspn(start, " ,"); 1411 if ('\0' == start[(int)sz]) 1412 break; 1413 1414 buf->len = 0; 1415 buf_appendb(buf, start, sz); 1416 buf_appendb(buf, "", 1); 1417 1418 hash_put(hash, buf, TYPE_Nm); 1419 1420 if (' ' == start[(int)sz]) { 1421 start += (int)sz + 1; 1422 break; 1423 } 1424 1425 assert(',' == start[(int)sz]); 1426 start += (int)sz + 1; 1427 while (' ' == *start) 1428 start++; 1429 } 1430 1431 buf->len = 0; 1432 1433 if (sv == start) { 1434 buf_append(buf, start); 1435 free(title); 1436 return(1); 1437 } 1438 1439 while (isspace((unsigned char)*start)) 1440 start++; 1441 1442 if (0 == strncmp(start, "-", 1)) 1443 start += 1; 1444 else if (0 == strncmp(start, "\\-\\-", 4)) 1445 start += 4; 1446 else if (0 == strncmp(start, "\\-", 2)) 1447 start += 2; 1448 else if (0 == strncmp(start, "\\(en", 4)) 1449 start += 4; 1450 else if (0 == strncmp(start, "\\(em", 4)) 1451 start += 4; 1452 1453 while (' ' == *start) 1454 start++; 1455 1456 sz = strlen(start) + 1; 1457 buf_appendb(dbuf, start, sz); 1458 buf_appendb(buf, start, sz); 1459 1460 hash_put(hash, buf, TYPE_Nd); 1461 free(title); 1462 } 1463 } 1464 1465 for (n = n->child; n; n = n->next) 1466 if (pman_node(hash, buf, dbuf, n)) 1467 return(1); 1468 1469 return(0); 1470 } 1471 1472 /* 1473 * Parse a formatted manual page. 1474 * By necessity, this involves rather crude guesswork. 1475 */ 1476 static void 1477 pformatted(DB *hash, struct buf *buf, struct buf *dbuf, 1478 const struct of *of, const char *basedir) 1479 { 1480 FILE *stream; 1481 char *line, *p, *title; 1482 size_t len, plen, titlesz; 1483 1484 if (NULL == (stream = fopen(of->fname, "r"))) { 1485 WARNING(of->fname, basedir, "%s", strerror(errno)); 1486 return; 1487 } 1488 1489 /* 1490 * Always use the title derived from the filename up front, 1491 * do not even try to find it in the file. This also makes 1492 * sure we don't end up with an orphan index record, even if 1493 * the file content turns out to be completely unintelligible. 1494 */ 1495 1496 buf->len = 0; 1497 buf_append(buf, of->title); 1498 hash_put(hash, buf, TYPE_Nm); 1499 1500 /* Skip to first blank line. */ 1501 1502 while (NULL != (line = fgetln(stream, &len))) 1503 if ('\n' == *line) 1504 break; 1505 1506 /* 1507 * Assume the first line that is not indented 1508 * is the first section header. Skip to it. 1509 */ 1510 1511 while (NULL != (line = fgetln(stream, &len))) 1512 if ('\n' != *line && ' ' != *line) 1513 break; 1514 1515 /* 1516 * Read up until the next section into a buffer. 1517 * Strip the leading and trailing newline from each read line, 1518 * appending a trailing space. 1519 * Ignore empty (whitespace-only) lines. 1520 */ 1521 1522 titlesz = 0; 1523 title = NULL; 1524 1525 while (NULL != (line = fgetln(stream, &len))) { 1526 if (' ' != *line || '\n' != line[(int)len - 1]) 1527 break; 1528 while (len > 0 && isspace((unsigned char)*line)) { 1529 line++; 1530 len--; 1531 } 1532 if (1 == len) 1533 continue; 1534 title = mandoc_realloc(title, titlesz + len); 1535 memcpy(title + titlesz, line, len); 1536 titlesz += len; 1537 title[(int)titlesz - 1] = ' '; 1538 } 1539 1540 /* 1541 * If no page content can be found, or the input line 1542 * is already the next section header, or there is no 1543 * trailing newline, reuse the page title as the page 1544 * description. 1545 */ 1546 1547 if (NULL == title || '\0' == *title) { 1548 WARNING(of->fname, basedir, 1549 "Cannot find NAME section"); 1550 buf_appendb(dbuf, buf->cp, buf->size); 1551 hash_put(hash, buf, TYPE_Nd); 1552 fclose(stream); 1553 free(title); 1554 return; 1555 } 1556 1557 title = mandoc_realloc(title, titlesz + 1); 1558 title[(int)titlesz] = '\0'; 1559 1560 /* 1561 * Skip to the first dash. 1562 * Use the remaining line as the description (no more than 70 1563 * bytes). 1564 */ 1565 1566 if (NULL != (p = strstr(title, "- "))) { 1567 for (p += 2; ' ' == *p || '\b' == *p; p++) 1568 /* Skip to next word. */ ; 1569 } else { 1570 WARNING(of->fname, basedir, 1571 "No dash in title line"); 1572 p = title; 1573 } 1574 1575 plen = strlen(p); 1576 1577 /* Strip backspace-encoding from line. */ 1578 1579 while (NULL != (line = memchr(p, '\b', plen))) { 1580 len = line - p; 1581 if (0 == len) { 1582 memmove(line, line + 1, plen--); 1583 continue; 1584 } 1585 memmove(line - 1, line + 1, plen - len); 1586 plen -= 2; 1587 } 1588 1589 buf_appendb(dbuf, p, plen + 1); 1590 buf->len = 0; 1591 buf_appendb(buf, p, plen + 1); 1592 hash_put(hash, buf, TYPE_Nd); 1593 fclose(stream); 1594 free(title); 1595 } 1596 1597 static void 1598 ofile_argbuild(int argc, char *argv[], 1599 struct of **of, const char *basedir) 1600 { 1601 char buf[MAXPATHLEN]; 1602 const char *sec, *arch, *title; 1603 char *p; 1604 int i, src_form; 1605 struct of *nof; 1606 1607 for (i = 0; i < argc; i++) { 1608 1609 /* 1610 * Try to infer the manual section, architecture and 1611 * page title from the path, assuming it looks like 1612 * man*[/<arch>]/<title>.<section> or 1613 * cat<section>[/<arch>]/<title>.0 1614 */ 1615 1616 if (strlcpy(buf, argv[i], sizeof(buf)) >= sizeof(buf)) { 1617 fprintf(stderr, "%s: Path too long\n", argv[i]); 1618 continue; 1619 } 1620 sec = arch = title = ""; 1621 src_form = 0; 1622 p = strrchr(buf, '\0'); 1623 while (p-- > buf) { 1624 if ('\0' == *sec && '.' == *p) { 1625 sec = p + 1; 1626 *p = '\0'; 1627 if ('0' == *sec) 1628 src_form |= MANDOC_FORM; 1629 else if ('1' <= *sec && '9' >= *sec) 1630 src_form |= MANDOC_SRC; 1631 continue; 1632 } 1633 if ('/' != *p) 1634 continue; 1635 if ('\0' == *title) { 1636 title = p + 1; 1637 *p = '\0'; 1638 continue; 1639 } 1640 if (0 == strncmp("man", p + 1, 3)) 1641 src_form |= MANDOC_SRC; 1642 else if (0 == strncmp("cat", p + 1, 3)) 1643 src_form |= MANDOC_FORM; 1644 else 1645 arch = p + 1; 1646 break; 1647 } 1648 if ('\0' == *title) { 1649 WARNING(argv[i], basedir, 1650 "Cannot deduce title from filename"); 1651 title = buf; 1652 } 1653 1654 /* 1655 * Build the file structure. 1656 */ 1657 1658 nof = mandoc_calloc(1, sizeof(struct of)); 1659 nof->fname = mandoc_strdup(argv[i]); 1660 nof->sec = mandoc_strdup(sec); 1661 nof->arch = mandoc_strdup(arch); 1662 nof->title = mandoc_strdup(title); 1663 nof->src_form = src_form; 1664 1665 /* 1666 * Add the structure to the list. 1667 */ 1668 1669 if (NULL == *of) { 1670 *of = nof; 1671 (*of)->first = nof; 1672 } else { 1673 nof->first = (*of)->first; 1674 (*of)->next = nof; 1675 *of = nof; 1676 } 1677 } 1678 } 1679 1680 /* 1681 * Recursively build up a list of files to parse. 1682 * We use this instead of ftw() and so on because I don't want global 1683 * variables hanging around. 1684 * This ignores the mandocdb.db and mandocdb.index files, but assumes that 1685 * everything else is a manual. 1686 * Pass in a pointer to a NULL structure for the first invocation. 1687 */ 1688 static void 1689 ofile_dirbuild(const char *dir, const char* psec, const char *parch, 1690 int p_src_form, struct of **of, char *basedir) 1691 { 1692 char buf[MAXPATHLEN]; 1693 size_t sz; 1694 DIR *d; 1695 const char *fn, *sec, *arch; 1696 char *p, *q, *suffix; 1697 struct of *nof; 1698 struct dirent *dp; 1699 int src_form; 1700 1701 if (NULL == (d = opendir(dir))) { 1702 WARNING("", dir, "%s", strerror(errno)); 1703 return; 1704 } 1705 1706 while (NULL != (dp = readdir(d))) { 1707 fn = dp->d_name; 1708 1709 if ('.' == *fn) 1710 continue; 1711 1712 src_form = p_src_form; 1713 1714 if (DT_DIR == dp->d_type) { 1715 sec = psec; 1716 arch = parch; 1717 1718 /* 1719 * By default, only use directories called: 1720 * man<section>/[<arch>/] or 1721 * cat<section>/[<arch>/] 1722 */ 1723 1724 if ('\0' == *sec) { 1725 if(0 == strncmp("man", fn, 3)) { 1726 src_form |= MANDOC_SRC; 1727 sec = fn + 3; 1728 } else if (0 == strncmp("cat", fn, 3)) { 1729 src_form |= MANDOC_FORM; 1730 sec = fn + 3; 1731 } else { 1732 WARNING(fn, basedir, "Bad section"); 1733 if (use_all) 1734 sec = fn; 1735 else 1736 continue; 1737 } 1738 } else if ('\0' == *arch) { 1739 if (NULL != strchr(fn, '.')) { 1740 WARNING(fn, basedir, "Bad architecture"); 1741 if (0 == use_all) 1742 continue; 1743 } 1744 arch = fn; 1745 } else { 1746 WARNING(fn, basedir, "Excessive subdirectory"); 1747 if (0 == use_all) 1748 continue; 1749 } 1750 1751 buf[0] = '\0'; 1752 strlcat(buf, dir, MAXPATHLEN); 1753 strlcat(buf, "/", MAXPATHLEN); 1754 strlcat(basedir, "/", MAXPATHLEN); 1755 strlcat(basedir, fn, MAXPATHLEN); 1756 sz = strlcat(buf, fn, MAXPATHLEN); 1757 1758 if (MAXPATHLEN <= sz) { 1759 WARNING(fn, basedir, "Path too long"); 1760 continue; 1761 } 1762 1763 ofile_dirbuild(buf, sec, arch, 1764 src_form, of, basedir); 1765 1766 p = strrchr(basedir, '/'); 1767 *p = '\0'; 1768 continue; 1769 } 1770 1771 if (DT_REG != dp->d_type) { 1772 WARNING(fn, basedir, "Not a regular file"); 1773 continue; 1774 } 1775 if (!strcmp(MANDOC_DB, fn) || !strcmp(MANDOC_IDX, fn)) 1776 continue; 1777 if ('\0' == *psec) { 1778 WARNING(fn, basedir, "File outside section"); 1779 if (0 == use_all) 1780 continue; 1781 } 1782 1783 /* 1784 * By default, skip files where the file name suffix 1785 * does not agree with the section directory 1786 * they are located in. 1787 */ 1788 1789 suffix = strrchr(fn, '.'); 1790 if (NULL == suffix) { 1791 WARNING(fn, basedir, "No filename suffix"); 1792 if (0 == use_all) 1793 continue; 1794 } else if ((MANDOC_SRC & src_form && 1795 strcmp(suffix + 1, psec)) || 1796 (MANDOC_FORM & src_form && 1797 strcmp(suffix + 1, "0"))) { 1798 WARNING(fn, basedir, "Wrong filename suffix"); 1799 if (0 == use_all) 1800 continue; 1801 if ('0' == suffix[1]) 1802 src_form |= MANDOC_FORM; 1803 else if ('1' <= suffix[1] && '9' >= suffix[1]) 1804 src_form |= MANDOC_SRC; 1805 } 1806 1807 /* 1808 * Skip formatted manuals if a source version is 1809 * available. Ignore the age: it is very unlikely 1810 * that people install newer formatted base manuals 1811 * when they used to have source manuals before, 1812 * and in ports, old manuals get removed on update. 1813 */ 1814 if (0 == use_all && MANDOC_FORM & src_form && 1815 '\0' != *psec) { 1816 buf[0] = '\0'; 1817 strlcat(buf, dir, MAXPATHLEN); 1818 p = strrchr(buf, '/'); 1819 if ('\0' != *parch && NULL != p) 1820 for (p--; p > buf; p--) 1821 if ('/' == *p) 1822 break; 1823 if (NULL == p) 1824 p = buf; 1825 else 1826 p++; 1827 if (0 == strncmp("cat", p, 3)) 1828 memcpy(p, "man", 3); 1829 strlcat(buf, "/", MAXPATHLEN); 1830 sz = strlcat(buf, fn, MAXPATHLEN); 1831 if (sz >= MAXPATHLEN) { 1832 WARNING(fn, basedir, "Path too long"); 1833 continue; 1834 } 1835 q = strrchr(buf, '.'); 1836 if (NULL != q && p < q++) { 1837 *q = '\0'; 1838 sz = strlcat(buf, psec, MAXPATHLEN); 1839 if (sz >= MAXPATHLEN) { 1840 WARNING(fn, basedir, "Path too long"); 1841 continue; 1842 } 1843 if (0 == access(buf, R_OK)) 1844 continue; 1845 } 1846 } 1847 1848 buf[0] = '\0'; 1849 assert('.' == dir[0]); 1850 if ('/' == dir[1]) { 1851 strlcat(buf, dir + 2, MAXPATHLEN); 1852 strlcat(buf, "/", MAXPATHLEN); 1853 } 1854 sz = strlcat(buf, fn, MAXPATHLEN); 1855 if (sz >= MAXPATHLEN) { 1856 WARNING(fn, basedir, "Path too long"); 1857 continue; 1858 } 1859 1860 nof = mandoc_calloc(1, sizeof(struct of)); 1861 nof->fname = mandoc_strdup(buf); 1862 nof->sec = mandoc_strdup(psec); 1863 nof->arch = mandoc_strdup(parch); 1864 nof->src_form = src_form; 1865 1866 /* 1867 * Remember the file name without the extension, 1868 * to be used as the page title in the database. 1869 */ 1870 1871 if (NULL != suffix) 1872 *suffix = '\0'; 1873 nof->title = mandoc_strdup(fn); 1874 1875 /* 1876 * Add the structure to the list. 1877 */ 1878 1879 if (NULL == *of) { 1880 *of = nof; 1881 (*of)->first = nof; 1882 } else { 1883 nof->first = (*of)->first; 1884 (*of)->next = nof; 1885 *of = nof; 1886 } 1887 } 1888 1889 closedir(d); 1890 } 1891 1892 static void 1893 ofile_free(struct of *of) 1894 { 1895 struct of *nof; 1896 1897 if (NULL != of) 1898 of = of->first; 1899 1900 while (NULL != of) { 1901 nof = of->next; 1902 free(of->fname); 1903 free(of->sec); 1904 free(of->arch); 1905 free(of->title); 1906 free(of); 1907 of = nof; 1908 } 1909 } 1910