xref: /minix/external/bsd/mdocml/dist/mandocdb.c (revision 6c8f7fc3)
1 /*	$Vendor-Id: mandocdb.c,v 1.43 2011/12/31 18:47:52 kristaps Exp $ */
2 /*
3  * Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21 
22 #include <sys/param.h>
23 #include <sys/types.h>
24 
25 #include <assert.h>
26 #include <ctype.h>
27 #include <dirent.h>
28 #include <fcntl.h>
29 #include <getopt.h>
30 #include <stdio.h>
31 #include <stdint.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <unistd.h>
35 
36 #if defined(__linux__)
37 # include <endian.h>
38 # include <db_185.h>
39 #elif defined(__APPLE__)
40 # include <libkern/OSByteOrder.h>
41 # include <db.h>
42 #else
43 # include <db.h>
44 #endif
45 
46 #include "man.h"
47 #include "mdoc.h"
48 #include "mandoc.h"
49 #include "mandocdb.h"
50 #include "manpath.h"
51 
52 #define	MANDOC_BUFSZ	  BUFSIZ
53 #define	MANDOC_SLOP	  1024
54 
55 #define	MANDOC_SRC	  0x1
56 #define	MANDOC_FORM	  0x2
57 
58 /* Access to the mandoc database on disk. */
59 
60 struct	mdb {
61 	char		  idxn[MAXPATHLEN]; /* index db filename */
62 	char		  dbn[MAXPATHLEN]; /* keyword db filename */
63 	DB		 *idx; /* index recno database */
64 	DB		 *db; /* keyword btree database */
65 };
66 
67 /* Stack of temporarily unused index records. */
68 
69 struct	recs {
70 	recno_t		 *stack; /* pointer to a malloc'ed array */
71 	size_t		  size; /* number of allocated slots */
72 	size_t		  cur; /* current number of empty records */
73 	recno_t		  last; /* last record number in the index */
74 };
75 
76 /* Tiny list for files.  No need to bring in QUEUE. */
77 
78 struct	of {
79 	char		 *fname; /* heap-allocated */
80 	char		 *sec;
81 	char		 *arch;
82 	char		 *title;
83 	int		  src_form;
84 	struct of	 *next; /* NULL for last one */
85 	struct of	 *first; /* first in list */
86 };
87 
88 /* Buffer for storing growable data. */
89 
90 struct	buf {
91 	char		 *cp;
92 	size_t		  len; /* current length */
93 	size_t		  size; /* total buffer size */
94 };
95 
96 /* Operation we're going to perform. */
97 
98 enum	op {
99 	OP_DEFAULT = 0, /* new dbs from dir list or default config */
100 	OP_CONFFILE, /* new databases from custom config file */
101 	OP_UPDATE, /* delete/add entries in existing database */
102 	OP_DELETE, /* delete entries from existing database */
103 	OP_TEST /* change no databases, report potential problems */
104 };
105 
106 #define	MAN_ARGS	  DB *hash, \
107 			  struct buf *buf, \
108 			  struct buf *dbuf, \
109 			  const struct man_node *n
110 #define	MDOC_ARGS	  DB *hash, \
111 			  struct buf *buf, \
112 			  struct buf *dbuf, \
113 			  const struct mdoc_node *n, \
114 			  const struct mdoc_meta *m
115 
116 static	void		  buf_appendmdoc(struct buf *,
117 				const struct mdoc_node *, int);
118 static	void		  buf_append(struct buf *, const char *);
119 static	void		  buf_appendb(struct buf *,
120 				const void *, size_t);
121 static	void		  dbt_put(DB *, const char *, DBT *, DBT *);
122 static	void		  hash_put(DB *, const struct buf *, uint64_t);
123 static	void		  hash_reset(DB **);
124 static	void		  index_merge(const struct of *, struct mparse *,
125 				struct buf *, struct buf *, DB *,
126 				struct mdb *, struct recs *);
127 static	void		  index_prune(const struct of *, struct mdb *,
128 				struct recs *);
129 static	void		  ofile_argbuild(int, char *[], struct of **);
130 static	void		  ofile_dirbuild(const char *, const char *,
131 				const char *, int, struct of **);
132 static	void		  ofile_free(struct of *);
133 static	void		  pformatted(DB *, struct buf *,
134 				struct buf *, const struct of *);
135 static	int		  pman_node(MAN_ARGS);
136 static	void		  pmdoc_node(MDOC_ARGS);
137 static	int		  pmdoc_head(MDOC_ARGS);
138 static	int		  pmdoc_body(MDOC_ARGS);
139 static	int		  pmdoc_Fd(MDOC_ARGS);
140 static	int		  pmdoc_In(MDOC_ARGS);
141 static	int		  pmdoc_Fn(MDOC_ARGS);
142 static	int		  pmdoc_Nd(MDOC_ARGS);
143 static	int		  pmdoc_Nm(MDOC_ARGS);
144 static	int		  pmdoc_Sh(MDOC_ARGS);
145 static	int		  pmdoc_St(MDOC_ARGS);
146 static	int		  pmdoc_Xr(MDOC_ARGS);
147 
148 #define	MDOCF_CHILD	  0x01  /* Automatically index child nodes. */
149 
150 struct	mdoc_handler {
151 	int		(*fp)(MDOC_ARGS);  /* Optional handler. */
152 	uint64_t	  mask;  /* Set unless handler returns 0. */
153 	int		  flags;  /* For use by pmdoc_node. */
154 };
155 
156 static	const struct mdoc_handler mdocs[MDOC_MAX] = {
157 	{ NULL, 0, 0 },  /* Ap */
158 	{ NULL, 0, 0 },  /* Dd */
159 	{ NULL, 0, 0 },  /* Dt */
160 	{ NULL, 0, 0 },  /* Os */
161 	{ pmdoc_Sh, TYPE_Sh, MDOCF_CHILD }, /* Sh */
162 	{ pmdoc_head, TYPE_Ss, MDOCF_CHILD }, /* Ss */
163 	{ NULL, 0, 0 },  /* Pp */
164 	{ NULL, 0, 0 },  /* D1 */
165 	{ NULL, 0, 0 },  /* Dl */
166 	{ NULL, 0, 0 },  /* Bd */
167 	{ NULL, 0, 0 },  /* Ed */
168 	{ NULL, 0, 0 },  /* Bl */
169 	{ NULL, 0, 0 },  /* El */
170 	{ NULL, 0, 0 },  /* It */
171 	{ NULL, 0, 0 },  /* Ad */
172 	{ NULL, TYPE_An, MDOCF_CHILD },  /* An */
173 	{ NULL, TYPE_Ar, MDOCF_CHILD },  /* Ar */
174 	{ NULL, TYPE_Cd, MDOCF_CHILD },  /* Cd */
175 	{ NULL, TYPE_Cm, MDOCF_CHILD },  /* Cm */
176 	{ NULL, TYPE_Dv, MDOCF_CHILD },  /* Dv */
177 	{ NULL, TYPE_Er, MDOCF_CHILD },  /* Er */
178 	{ NULL, TYPE_Ev, MDOCF_CHILD },  /* Ev */
179 	{ NULL, 0, 0 },  /* Ex */
180 	{ NULL, TYPE_Fa, MDOCF_CHILD },  /* Fa */
181 	{ pmdoc_Fd, TYPE_In, 0 },  /* Fd */
182 	{ NULL, TYPE_Fl, MDOCF_CHILD },  /* Fl */
183 	{ pmdoc_Fn, 0, 0 },  /* Fn */
184 	{ NULL, TYPE_Ft, MDOCF_CHILD },  /* Ft */
185 	{ NULL, TYPE_Ic, MDOCF_CHILD },  /* Ic */
186 	{ pmdoc_In, TYPE_In, 0 },  /* In */
187 	{ NULL, TYPE_Li, MDOCF_CHILD },  /* Li */
188 	{ pmdoc_Nd, TYPE_Nd, MDOCF_CHILD },  /* Nd */
189 	{ pmdoc_Nm, TYPE_Nm, MDOCF_CHILD },  /* Nm */
190 	{ NULL, 0, 0 },  /* Op */
191 	{ NULL, 0, 0 },  /* Ot */
192 	{ NULL, TYPE_Pa, MDOCF_CHILD },  /* Pa */
193 	{ NULL, 0, 0 },  /* Rv */
194 	{ pmdoc_St, TYPE_St, 0 },  /* St */
195 	{ NULL, TYPE_Va, MDOCF_CHILD },  /* Va */
196 	{ pmdoc_body, TYPE_Va, MDOCF_CHILD },  /* Vt */
197 	{ pmdoc_Xr, TYPE_Xr, 0 },  /* Xr */
198 	{ NULL, 0, 0 },  /* %A */
199 	{ NULL, 0, 0 },  /* %B */
200 	{ NULL, 0, 0 },  /* %D */
201 	{ NULL, 0, 0 },  /* %I */
202 	{ NULL, 0, 0 },  /* %J */
203 	{ NULL, 0, 0 },  /* %N */
204 	{ NULL, 0, 0 },  /* %O */
205 	{ NULL, 0, 0 },  /* %P */
206 	{ NULL, 0, 0 },  /* %R */
207 	{ NULL, 0, 0 },  /* %T */
208 	{ NULL, 0, 0 },  /* %V */
209 	{ NULL, 0, 0 },  /* Ac */
210 	{ NULL, 0, 0 },  /* Ao */
211 	{ NULL, 0, 0 },  /* Aq */
212 	{ NULL, TYPE_At, MDOCF_CHILD },  /* At */
213 	{ NULL, 0, 0 },  /* Bc */
214 	{ NULL, 0, 0 },  /* Bf */
215 	{ NULL, 0, 0 },  /* Bo */
216 	{ NULL, 0, 0 },  /* Bq */
217 	{ NULL, TYPE_Bsx, MDOCF_CHILD },  /* Bsx */
218 	{ NULL, TYPE_Bx, MDOCF_CHILD },  /* Bx */
219 	{ NULL, 0, 0 },  /* Db */
220 	{ NULL, 0, 0 },  /* Dc */
221 	{ NULL, 0, 0 },  /* Do */
222 	{ NULL, 0, 0 },  /* Dq */
223 	{ NULL, 0, 0 },  /* Ec */
224 	{ NULL, 0, 0 },  /* Ef */
225 	{ NULL, TYPE_Em, MDOCF_CHILD },  /* Em */
226 	{ NULL, 0, 0 },  /* Eo */
227 	{ NULL, TYPE_Fx, MDOCF_CHILD },  /* Fx */
228 	{ NULL, TYPE_Ms, MDOCF_CHILD },  /* Ms */
229 	{ NULL, 0, 0 },  /* No */
230 	{ NULL, 0, 0 },  /* Ns */
231 	{ NULL, TYPE_Nx, MDOCF_CHILD },  /* Nx */
232 	{ NULL, TYPE_Ox, MDOCF_CHILD },  /* Ox */
233 	{ NULL, 0, 0 },  /* Pc */
234 	{ NULL, 0, 0 },  /* Pf */
235 	{ NULL, 0, 0 },  /* Po */
236 	{ NULL, 0, 0 },  /* Pq */
237 	{ NULL, 0, 0 },  /* Qc */
238 	{ NULL, 0, 0 },  /* Ql */
239 	{ NULL, 0, 0 },  /* Qo */
240 	{ NULL, 0, 0 },  /* Qq */
241 	{ NULL, 0, 0 },  /* Re */
242 	{ NULL, 0, 0 },  /* Rs */
243 	{ NULL, 0, 0 },  /* Sc */
244 	{ NULL, 0, 0 },  /* So */
245 	{ NULL, 0, 0 },  /* Sq */
246 	{ NULL, 0, 0 },  /* Sm */
247 	{ NULL, 0, 0 },  /* Sx */
248 	{ NULL, TYPE_Sy, MDOCF_CHILD },  /* Sy */
249 	{ NULL, TYPE_Tn, MDOCF_CHILD },  /* Tn */
250 	{ NULL, 0, 0 },  /* Ux */
251 	{ NULL, 0, 0 },  /* Xc */
252 	{ NULL, 0, 0 },  /* Xo */
253 	{ pmdoc_head, TYPE_Fn, 0 },  /* Fo */
254 	{ NULL, 0, 0 },  /* Fc */
255 	{ NULL, 0, 0 },  /* Oo */
256 	{ NULL, 0, 0 },  /* Oc */
257 	{ NULL, 0, 0 },  /* Bk */
258 	{ NULL, 0, 0 },  /* Ek */
259 	{ NULL, 0, 0 },  /* Bt */
260 	{ NULL, 0, 0 },  /* Hf */
261 	{ NULL, 0, 0 },  /* Fr */
262 	{ NULL, 0, 0 },  /* Ud */
263 	{ NULL, TYPE_Lb, MDOCF_CHILD },  /* Lb */
264 	{ NULL, 0, 0 },  /* Lp */
265 	{ NULL, TYPE_Lk, MDOCF_CHILD },  /* Lk */
266 	{ NULL, TYPE_Mt, MDOCF_CHILD },  /* Mt */
267 	{ NULL, 0, 0 },  /* Brq */
268 	{ NULL, 0, 0 },  /* Bro */
269 	{ NULL, 0, 0 },  /* Brc */
270 	{ NULL, 0, 0 },  /* %C */
271 	{ NULL, 0, 0 },  /* Es */
272 	{ NULL, 0, 0 },  /* En */
273 	{ NULL, TYPE_Dx, MDOCF_CHILD },  /* Dx */
274 	{ NULL, 0, 0 },  /* %Q */
275 	{ NULL, 0, 0 },  /* br */
276 	{ NULL, 0, 0 },  /* sp */
277 	{ NULL, 0, 0 },  /* %U */
278 	{ NULL, 0, 0 },  /* Ta */
279 };
280 
281 static	const char	 *progname;
282 static	int		  use_all;  /* Use all directories and files. */
283 static	int		  verb;  /* Output verbosity level. */
284 static	int		  warnings;  /* Potential problems in manuals. */
285 
286 int
287 main(int argc, char *argv[])
288 {
289 	struct mparse	*mp; /* parse sequence */
290 	struct manpaths	 dirs;
291 	struct mdb	 mdb;
292 	struct recs	 recs;
293 	enum op		 op; /* current operation */
294 	const char	*dir;
295 	char		*cp;
296 	char		 pbuf[PATH_MAX];
297 	int		 ch, i, flags;
298 	DB		*hash; /* temporary keyword hashtable */
299 	BTREEINFO	 info; /* btree configuration */
300 	size_t		 sz1, sz2;
301 	struct buf	 buf, /* keyword buffer */
302 			 dbuf; /* description buffer */
303 	struct of	*of; /* list of files for processing */
304 	extern int	 optind;
305 	extern char	*optarg;
306 
307 	progname = strrchr(argv[0], '/');
308 	if (progname == NULL)
309 		progname = argv[0];
310 	else
311 		++progname;
312 
313 	memset(&dirs, 0, sizeof(struct manpaths));
314 	memset(&mdb, 0, sizeof(struct mdb));
315 	memset(&recs, 0, sizeof(struct recs));
316 
317 	of = NULL;
318 	mp = NULL;
319 	hash = NULL;
320 	op = OP_DEFAULT;
321 	dir = NULL;
322 
323 	while (-1 != (ch = getopt(argc, argv, "aC:d:tu:vW")))
324 		switch (ch) {
325 		case ('a'):
326 			use_all = 1;
327 			break;
328 		case ('C'):
329 			if (op) {
330 				fprintf(stderr,
331 				    "-C: conflicting options\n");
332 				goto usage;
333 			}
334 			dir = optarg;
335 			op = OP_CONFFILE;
336 			break;
337 		case ('d'):
338 			if (op) {
339 				fprintf(stderr,
340 				    "-d: conflicting options\n");
341 				goto usage;
342 			}
343 			dir = optarg;
344 			op = OP_UPDATE;
345 			break;
346 		case ('t'):
347 			dup2(STDOUT_FILENO, STDERR_FILENO);
348 			if (op) {
349 				fprintf(stderr,
350 				    "-t: conflicting options\n");
351 				goto usage;
352 			}
353 			op = OP_TEST;
354 			use_all = 1;
355 			warnings = 1;
356 			break;
357 		case ('u'):
358 			if (op) {
359 				fprintf(stderr,
360 				    "-u: conflicting options\n");
361 				goto usage;
362 			}
363 			dir = optarg;
364 			op = OP_DELETE;
365 			break;
366 		case ('v'):
367 			verb++;
368 			break;
369 		case ('W'):
370 			warnings = 1;
371 			break;
372 		default:
373 			goto usage;
374 		}
375 
376 	argc -= optind;
377 	argv += optind;
378 
379 	if (OP_CONFFILE == op && argc > 0) {
380 		fprintf(stderr, "-C: too many arguments\n");
381 		goto usage;
382 	}
383 
384 	memset(&info, 0, sizeof(BTREEINFO));
385 	info.flags = R_DUP;
386 
387 	mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL);
388 
389 	memset(&buf, 0, sizeof(struct buf));
390 	memset(&dbuf, 0, sizeof(struct buf));
391 
392 	buf.size = dbuf.size = MANDOC_BUFSZ;
393 
394 	buf.cp = mandoc_malloc(buf.size);
395 	dbuf.cp = mandoc_malloc(dbuf.size);
396 
397 	flags = O_CREAT | O_RDWR;
398 	if (OP_DEFAULT == op || OP_CONFFILE == op)
399 		flags |= O_TRUNC;
400 
401 	if (OP_TEST == op) {
402 		ofile_argbuild(argc, argv, &of);
403 		if (NULL == of)
404 			goto out;
405 		index_merge(of, mp, &dbuf, &buf, hash, &mdb, &recs);
406 		goto out;
407 	}
408 
409 	if (OP_UPDATE == op || OP_DELETE == op) {
410 		strlcat(mdb.dbn, dir, MAXPATHLEN);
411 		strlcat(mdb.dbn, "/", MAXPATHLEN);
412 		sz1 = strlcat(mdb.dbn, MANDOC_DB, MAXPATHLEN);
413 
414 		strlcat(mdb.idxn, dir, MAXPATHLEN);
415 		strlcat(mdb.idxn, "/", MAXPATHLEN);
416 		sz2 = strlcat(mdb.idxn, MANDOC_IDX, MAXPATHLEN);
417 
418 		if (sz1 >= MAXPATHLEN || sz2 >= MAXPATHLEN) {
419 			fprintf(stderr, "%s: path too long\n", dir);
420 			exit((int)MANDOCLEVEL_BADARG);
421 		}
422 
423 		mdb.db = dbopen(mdb.dbn, flags, 0644, DB_BTREE, &info);
424 		mdb.idx = dbopen(mdb.idxn, flags, 0644, DB_RECNO, NULL);
425 
426 		if (NULL == mdb.db) {
427 			perror(mdb.dbn);
428 			exit((int)MANDOCLEVEL_SYSERR);
429 		} else if (NULL == mdb.idx) {
430 			perror(mdb.idxn);
431 			exit((int)MANDOCLEVEL_SYSERR);
432 		}
433 
434 		ofile_argbuild(argc, argv, &of);
435 
436 		if (NULL == of)
437 			goto out;
438 
439 		index_prune(of, &mdb, &recs);
440 
441 		/*
442 		 * Go to the root of the respective manual tree.
443 		 * This must work or no manuals may be found (they're
444 		 * indexed relative to the root).
445 		 */
446 
447 		if (OP_UPDATE == op) {
448 			if (-1 == chdir(dir)) {
449 				perror(dir);
450 				exit((int)MANDOCLEVEL_SYSERR);
451 			}
452 			index_merge(of, mp, &dbuf, &buf, hash,
453 					&mdb, &recs);
454 		}
455 
456 		goto out;
457 	}
458 
459 	/*
460 	 * Configure the directories we're going to scan.
461 	 * If we have command-line arguments, use them.
462 	 * If not, we use man(1)'s method (see mandocdb.8).
463 	 */
464 
465 	if (argc > 0) {
466 		dirs.paths = mandoc_calloc(argc, sizeof(char *));
467 		dirs.sz = argc;
468 		for (i = 0; i < argc; i++) {
469 			if (NULL == (cp = realpath(argv[i], pbuf))) {
470 				perror(argv[i]);
471 				goto out;
472 			}
473 			dirs.paths[i] = mandoc_strdup(cp);
474 		}
475 	} else
476 		manpath_parse(&dirs, dir, NULL, NULL);
477 
478 	for (i = 0; i < dirs.sz; i++) {
479 		mdb.idxn[0] = mdb.dbn[0] = '\0';
480 
481 		strlcat(mdb.dbn, dirs.paths[i], MAXPATHLEN);
482 		strlcat(mdb.dbn, "/", MAXPATHLEN);
483 		sz1 = strlcat(mdb.dbn, MANDOC_DB, MAXPATHLEN);
484 
485 		strlcat(mdb.idxn, dirs.paths[i], MAXPATHLEN);
486 		strlcat(mdb.idxn, "/", MAXPATHLEN);
487 		sz2 = strlcat(mdb.idxn, MANDOC_IDX, MAXPATHLEN);
488 
489 		if (sz1 >= MAXPATHLEN || sz2 >= MAXPATHLEN) {
490 			fprintf(stderr, "%s: path too long\n",
491 					dirs.paths[i]);
492 			exit((int)MANDOCLEVEL_BADARG);
493 		}
494 
495 		if (mdb.db)
496 			(*mdb.db->close)(mdb.db);
497 		if (mdb.idx)
498 			(*mdb.idx->close)(mdb.idx);
499 
500 		mdb.db = dbopen(mdb.dbn, flags, 0644, DB_BTREE, &info);
501 		mdb.idx = dbopen(mdb.idxn, flags, 0644, DB_RECNO, NULL);
502 
503 		if (NULL == mdb.db) {
504 			perror(mdb.dbn);
505 			exit((int)MANDOCLEVEL_SYSERR);
506 		} else if (NULL == mdb.idx) {
507 			perror(mdb.idxn);
508 			exit((int)MANDOCLEVEL_SYSERR);
509 		}
510 
511 		ofile_free(of);
512 		of = NULL;
513 
514 		if (-1 == chdir(dirs.paths[i])) {
515 			perror(dirs.paths[i]);
516 			exit((int)MANDOCLEVEL_SYSERR);
517 		}
518 
519 	       	ofile_dirbuild(".", "", "", 0, &of);
520 		if (NULL == of)
521 			continue;
522 
523 		/*
524 		 * Go to the root of the respective manual tree.
525 		 * This must work or no manuals may be found (they're
526 		 * indexed relative to the root).
527 		 */
528 
529 		if (-1 == chdir(dirs.paths[i])) {
530 			perror(dirs.paths[i]);
531 			exit((int)MANDOCLEVEL_SYSERR);
532 		}
533 
534 		index_merge(of, mp, &dbuf, &buf, hash, &mdb, &recs);
535 	}
536 
537 out:
538 	if (mdb.db)
539 		(*mdb.db->close)(mdb.db);
540 	if (mdb.idx)
541 		(*mdb.idx->close)(mdb.idx);
542 	if (hash)
543 		(*hash->close)(hash);
544 	if (mp)
545 		mparse_free(mp);
546 
547 	manpath_free(&dirs);
548 	ofile_free(of);
549 	free(buf.cp);
550 	free(dbuf.cp);
551 	free(recs.stack);
552 
553 	return(MANDOCLEVEL_OK);
554 
555 usage:
556 	fprintf(stderr,
557 		"usage: %s [-avvv] [-C file] | dir ... | -t file ...\n"
558 		"                        -d dir [file ...] | "
559 		"-u dir [file ...]\n",
560 		progname);
561 
562 	return((int)MANDOCLEVEL_BADARG);
563 }
564 
565 void
566 index_merge(const struct of *of, struct mparse *mp,
567 		struct buf *dbuf, struct buf *buf, DB *hash,
568 		struct mdb *mdb, struct recs *recs)
569 {
570 	recno_t		 rec;
571 	int		 ch, skip;
572 	DBT		 key, val;
573 	struct mdoc	*mdoc;
574 	struct man	*man;
575 	const char	*fn, *msec, *march, *mtitle;
576 	uint64_t	 mask;
577 	size_t		 sv;
578 	unsigned	 seq;
579 	uint64_t	 vbuf[2];
580 	char		 type;
581 
582 	rec = 0;
583 	for (of = of->first; of; of = of->next) {
584 		fn = of->fname;
585 
586 		/*
587 		 * Try interpreting the file as mdoc(7) or man(7)
588 		 * source code, unless it is already known to be
589 		 * formatted.  Fall back to formatted mode.
590 		 */
591 
592 		mparse_reset(mp);
593 		mdoc = NULL;
594 		man = NULL;
595 
596 		if ((MANDOC_SRC & of->src_form ||
597 		    ! (MANDOC_FORM & of->src_form)) &&
598 		    MANDOCLEVEL_FATAL > mparse_readfd(mp, -1, fn))
599 			mparse_result(mp, &mdoc, &man);
600 
601 		if (NULL != mdoc) {
602 			msec = mdoc_meta(mdoc)->msec;
603 			march = mdoc_meta(mdoc)->arch;
604 			if (NULL == march)
605 				march = "";
606 			mtitle = mdoc_meta(mdoc)->title;
607 		} else if (NULL != man) {
608 			msec = man_meta(man)->msec;
609 			march = "";
610 			mtitle = man_meta(man)->title;
611 		} else {
612 			msec = of->sec;
613 			march = of->arch;
614 			mtitle = of->title;
615 		}
616 
617 		/*
618 		 * By default, skip a file if the manual section
619 		 * given in the file disagrees with the directory
620 		 * where the file is located.
621 		 */
622 
623 		skip = 0;
624 		assert(of->sec);
625 		assert(msec);
626 		if (strcasecmp(msec, of->sec)) {
627 			if (warnings)
628 				fprintf(stderr, "%s: "
629 					"section \"%s\" manual "
630 					"in \"%s\" directory\n",
631 					fn, msec, of->sec);
632 			skip = 1;
633 		}
634 
635 		/*
636 		 * Manual page directories exist for each kernel
637 		 * architecture as returned by machine(1).
638 		 * However, many manuals only depend on the
639 		 * application architecture as returned by arch(1).
640 		 * For example, some (2/ARM) manuals are shared
641 		 * across the "armish" and "zaurus" kernel
642 		 * architectures.
643 		 * A few manuals are even shared across completely
644 		 * different architectures, for example fdformat(1)
645 		 * on amd64, i386, sparc, and sparc64.
646 		 * Thus, warn about architecture mismatches,
647 		 * but don't skip manuals for this reason.
648 		 */
649 
650 		assert(of->arch);
651 		assert(march);
652 		if (strcasecmp(march, of->arch)) {
653 			if (warnings)
654 				fprintf(stderr, "%s: "
655 					"architecture \"%s\" manual "
656 					"in \"%s\" directory\n",
657 					fn, march, of->arch);
658 			march = of->arch;
659 		}
660 
661 		/*
662 		 * By default, skip a file if the title given
663 		 * in the file disagrees with the file name.
664 		 * If both agree, use the file name as the title,
665 		 * because the one in the file usually is all caps.
666 		 */
667 
668 		assert(of->title);
669 		assert(mtitle);
670 		if (strcasecmp(mtitle, of->title)) {
671 			if (warnings)
672 				fprintf(stderr, "%s: "
673 					"title \"%s\" in file "
674 					"but \"%s\" in filename\n",
675 					fn, mtitle, of->title);
676 			skip = 1;
677 		} else
678 			mtitle = of->title;
679 
680 		if (skip && !use_all)
681 			continue;
682 
683 		/*
684 		 * The index record value consists of a nil-terminated
685 		 * filename, a nil-terminated manual section, and a
686 		 * nil-terminated description.  Since the description
687 		 * may not be set, we set a sentinel to see if we're
688 		 * going to write a nil byte in its place.
689 		 */
690 
691 		dbuf->len = 0;
692 		type = mdoc ? 'd' : (man ? 'a' : 'c');
693 		buf_appendb(dbuf, &type, 1);
694 		buf_appendb(dbuf, fn, strlen(fn) + 1);
695 		buf_appendb(dbuf, msec, strlen(msec) + 1);
696 		buf_appendb(dbuf, mtitle, strlen(mtitle) + 1);
697 		buf_appendb(dbuf, march, strlen(march) + 1);
698 
699 		sv = dbuf->len;
700 
701 		/*
702 		 * Collect keyword/mask pairs.
703 		 * Each pair will become a new btree node.
704 		 */
705 
706 		hash_reset(&hash);
707 		if (mdoc)
708 			pmdoc_node(hash, buf, dbuf,
709 				mdoc_node(mdoc), mdoc_meta(mdoc));
710 		else if (man)
711 			pman_node(hash, buf, dbuf, man_node(man));
712 		else
713 			pformatted(hash, buf, dbuf, of);
714 
715 		/* Test mode, do not access any database. */
716 
717 		if (NULL == mdb->db || NULL == mdb->idx)
718 			continue;
719 
720 		/*
721 		 * Reclaim an empty index record, if available.
722 		 * Use its record number for all new btree nodes.
723 		 */
724 
725 		if (recs->cur > 0) {
726 			recs->cur--;
727 			rec = recs->stack[(int)recs->cur];
728 		} else if (recs->last > 0) {
729 			rec = recs->last;
730 			recs->last = 0;
731 		} else
732 			rec++;
733 		vbuf[1] = htobe64(rec);
734 
735 		/*
736 		 * Copy from the in-memory hashtable of pending
737 		 * keyword/mask pairs into the database.
738 		 */
739 
740 		seq = R_FIRST;
741 		while (0 == (ch = (*hash->seq)(hash, &key, &val, seq))) {
742 			seq = R_NEXT;
743 			assert(sizeof(uint64_t) == val.size);
744 			memcpy(&mask, val.data, val.size);
745 			vbuf[0] = htobe64(mask);
746 			val.size = sizeof(vbuf);
747 			val.data = &vbuf;
748 			dbt_put(mdb->db, mdb->dbn, &key, &val);
749 		}
750 		if (ch < 0) {
751 			perror("hash");
752 			exit((int)MANDOCLEVEL_SYSERR);
753 		}
754 
755 		/*
756 		 * Apply to the index.  If we haven't had a description
757 		 * set, put an empty one in now.
758 		 */
759 
760 		if (dbuf->len == sv)
761 			buf_appendb(dbuf, "", 1);
762 
763 		key.data = &rec;
764 		key.size = sizeof(recno_t);
765 
766 		val.data = dbuf->cp;
767 		val.size = dbuf->len;
768 
769 		if (verb)
770 			printf("%s: adding to index\n", fn);
771 
772 		dbt_put(mdb->idx, mdb->idxn, &key, &val);
773 	}
774 }
775 
776 /*
777  * Scan through all entries in the index file `idx' and prune those
778  * entries in `ofile'.
779  * Pruning consists of removing from `db', then invalidating the entry
780  * in `idx' (zeroing its value size).
781  */
782 static void
783 index_prune(const struct of *ofile, struct mdb *mdb, struct recs *recs)
784 {
785 	const struct of	*of;
786 	const char	*fn;
787 	uint64_t	 vbuf[2];
788 	unsigned	 seq, sseq;
789 	DBT		 key, val;
790 	int		 ch;
791 
792 	recs->cur = 0;
793 	seq = R_FIRST;
794 	while (0 == (ch = (*mdb->idx->seq)(mdb->idx, &key, &val, seq))) {
795 		seq = R_NEXT;
796 		assert(sizeof(recno_t) == key.size);
797 		memcpy(&recs->last, key.data, key.size);
798 
799 		/* Deleted records are zero-sized.  Skip them. */
800 
801 		if (0 == val.size)
802 			goto cont;
803 
804 		/*
805 		 * Make sure we're sane.
806 		 * Read past our mdoc/man/cat type to the next string,
807 		 * then make sure it's bounded by a NUL.
808 		 * Failing any of these, we go into our error handler.
809 		 */
810 
811 		fn = (char *)val.data + 1;
812 		if (NULL == memchr(fn, '\0', val.size - 1))
813 			break;
814 
815 		/*
816 		 * Search for the file in those we care about.
817 		 * XXX: build this into a tree.  Too slow.
818 		 */
819 
820 		for (of = ofile->first; of; of = of->next)
821 			if (0 == strcmp(fn, of->fname))
822 				break;
823 
824 		if (NULL == of)
825 			continue;
826 
827 		/*
828 		 * Search through the keyword database, throwing out all
829 		 * references to our file.
830 		 */
831 
832 		sseq = R_FIRST;
833 		while (0 == (ch = (*mdb->db->seq)(mdb->db,
834 					&key, &val, sseq))) {
835 			sseq = R_NEXT;
836 			if (sizeof(vbuf) != val.size)
837 				break;
838 
839 			memcpy(vbuf, val.data, val.size);
840 			if (recs->last != betoh64(vbuf[1]))
841 				continue;
842 
843 			if ((ch = (*mdb->db->del)(mdb->db,
844 					&key, R_CURSOR)) < 0)
845 				break;
846 		}
847 
848 		if (ch < 0) {
849 			perror(mdb->dbn);
850 			exit((int)MANDOCLEVEL_SYSERR);
851 		} else if (1 != ch) {
852 			fprintf(stderr, "%s: corrupt database\n",
853 					mdb->dbn);
854 			exit((int)MANDOCLEVEL_SYSERR);
855 		}
856 
857 		if (verb)
858 			printf("%s: deleting from index\n", fn);
859 
860 		val.size = 0;
861 		ch = (*mdb->idx->put)(mdb->idx, &key, &val, R_CURSOR);
862 
863 		if (ch < 0)
864 			break;
865 cont:
866 		if (recs->cur >= recs->size) {
867 			recs->size += MANDOC_SLOP;
868 			recs->stack = mandoc_realloc(recs->stack,
869 					recs->size * sizeof(recno_t));
870 		}
871 
872 		recs->stack[(int)recs->cur] = recs->last;
873 		recs->cur++;
874 	}
875 
876 	if (ch < 0) {
877 		perror(mdb->idxn);
878 		exit((int)MANDOCLEVEL_SYSERR);
879 	} else if (1 != ch) {
880 		fprintf(stderr, "%s: corrupt index\n", mdb->idxn);
881 		exit((int)MANDOCLEVEL_SYSERR);
882 	}
883 
884 	recs->last++;
885 }
886 
887 /*
888  * Grow the buffer (if necessary) and copy in a binary string.
889  */
890 static void
891 buf_appendb(struct buf *buf, const void *cp, size_t sz)
892 {
893 
894 	/* Overshoot by MANDOC_BUFSZ. */
895 
896 	while (buf->len + sz >= buf->size) {
897 		buf->size = buf->len + sz + MANDOC_BUFSZ;
898 		buf->cp = mandoc_realloc(buf->cp, buf->size);
899 	}
900 
901 	memcpy(buf->cp + (int)buf->len, cp, sz);
902 	buf->len += sz;
903 }
904 
905 /*
906  * Append a nil-terminated string to the buffer.
907  * This can be invoked multiple times.
908  * The buffer string will be nil-terminated.
909  * If invoked multiple times, a space is put between strings.
910  */
911 static void
912 buf_append(struct buf *buf, const char *cp)
913 {
914 	size_t		 sz;
915 
916 	if (0 == (sz = strlen(cp)))
917 		return;
918 
919 	if (buf->len)
920 		buf->cp[(int)buf->len - 1] = ' ';
921 
922 	buf_appendb(buf, cp, sz + 1);
923 }
924 
925 /*
926  * Recursively add all text from a given node.
927  * This is optimised for general mdoc nodes in this context, which do
928  * not consist of subexpressions and having a recursive call for n->next
929  * would be wasteful.
930  * The "f" variable should be 0 unless called from pmdoc_Nd for the
931  * description buffer, which does not start at the beginning of the
932  * buffer.
933  */
934 static void
935 buf_appendmdoc(struct buf *buf, const struct mdoc_node *n, int f)
936 {
937 
938 	for ( ; n; n = n->next) {
939 		if (n->child)
940 			buf_appendmdoc(buf, n->child, f);
941 
942 		if (MDOC_TEXT == n->type && f) {
943 			f = 0;
944 			buf_appendb(buf, n->string,
945 					strlen(n->string) + 1);
946 		} else if (MDOC_TEXT == n->type)
947 			buf_append(buf, n->string);
948 
949 	}
950 }
951 
952 static void
953 hash_reset(DB **db)
954 {
955 	DB		*hash;
956 
957 	if (NULL != (hash = *db))
958 		(*hash->close)(hash);
959 
960 	*db = dbopen(NULL, O_CREAT|O_RDWR, 0644, DB_HASH, NULL);
961 	if (NULL == *db) {
962 		perror("hash");
963 		exit((int)MANDOCLEVEL_SYSERR);
964 	}
965 }
966 
967 /* ARGSUSED */
968 static int
969 pmdoc_head(MDOC_ARGS)
970 {
971 
972 	return(MDOC_HEAD == n->type);
973 }
974 
975 /* ARGSUSED */
976 static int
977 pmdoc_body(MDOC_ARGS)
978 {
979 
980 	return(MDOC_BODY == n->type);
981 }
982 
983 /* ARGSUSED */
984 static int
985 pmdoc_Fd(MDOC_ARGS)
986 {
987 	const char	*start, *end;
988 	size_t		 sz;
989 
990 	if (SEC_SYNOPSIS != n->sec)
991 		return(0);
992 	if (NULL == (n = n->child) || MDOC_TEXT != n->type)
993 		return(0);
994 
995 	/*
996 	 * Only consider those `Fd' macro fields that begin with an
997 	 * "inclusion" token (versus, e.g., #define).
998 	 */
999 	if (strcmp("#include", n->string))
1000 		return(0);
1001 
1002 	if (NULL == (n = n->next) || MDOC_TEXT != n->type)
1003 		return(0);
1004 
1005 	/*
1006 	 * Strip away the enclosing angle brackets and make sure we're
1007 	 * not zero-length.
1008 	 */
1009 
1010 	start = n->string;
1011 	if ('<' == *start || '"' == *start)
1012 		start++;
1013 
1014 	if (0 == (sz = strlen(start)))
1015 		return(0);
1016 
1017 	end = &start[(int)sz - 1];
1018 	if ('>' == *end || '"' == *end)
1019 		end--;
1020 
1021 	assert(end >= start);
1022 
1023 	buf_appendb(buf, start, (size_t)(end - start + 1));
1024 	buf_appendb(buf, "", 1);
1025 	return(1);
1026 }
1027 
1028 /* ARGSUSED */
1029 static int
1030 pmdoc_In(MDOC_ARGS)
1031 {
1032 
1033 	if (NULL == n->child || MDOC_TEXT != n->child->type)
1034 		return(0);
1035 
1036 	buf_append(buf, n->child->string);
1037 	return(1);
1038 }
1039 
1040 /* ARGSUSED */
1041 static int
1042 pmdoc_Fn(MDOC_ARGS)
1043 {
1044 	struct mdoc_node *nn;
1045 	const char	*cp;
1046 
1047 	nn = n->child;
1048 
1049 	if (NULL == nn || MDOC_TEXT != nn->type)
1050 		return(0);
1051 
1052 	/* .Fn "struct type *name" "char *arg" */
1053 
1054 	cp = strrchr(nn->string, ' ');
1055 	if (NULL == cp)
1056 		cp = nn->string;
1057 
1058 	/* Strip away pointer symbol. */
1059 
1060 	while ('*' == *cp)
1061 		cp++;
1062 
1063 	/* Store the function name. */
1064 
1065 	buf_append(buf, cp);
1066 	hash_put(hash, buf, TYPE_Fn);
1067 
1068 	/* Store the function type. */
1069 
1070 	if (nn->string < cp) {
1071 		buf->len = 0;
1072 		buf_appendb(buf, nn->string, cp - nn->string);
1073 		buf_appendb(buf, "", 1);
1074 		hash_put(hash, buf, TYPE_Ft);
1075 	}
1076 
1077 	/* Store the arguments. */
1078 
1079 	for (nn = nn->next; nn; nn = nn->next) {
1080 		if (MDOC_TEXT != nn->type)
1081 			continue;
1082 		buf->len = 0;
1083 		buf_append(buf, nn->string);
1084 		hash_put(hash, buf, TYPE_Fa);
1085 	}
1086 
1087 	return(0);
1088 }
1089 
1090 /* ARGSUSED */
1091 static int
1092 pmdoc_St(MDOC_ARGS)
1093 {
1094 
1095 	if (NULL == n->child || MDOC_TEXT != n->child->type)
1096 		return(0);
1097 
1098 	buf_append(buf, n->child->string);
1099 	return(1);
1100 }
1101 
1102 /* ARGSUSED */
1103 static int
1104 pmdoc_Xr(MDOC_ARGS)
1105 {
1106 
1107 	if (NULL == (n = n->child))
1108 		return(0);
1109 
1110 	buf_appendb(buf, n->string, strlen(n->string));
1111 
1112 	if (NULL != (n = n->next)) {
1113 		buf_appendb(buf, ".", 1);
1114 		buf_appendb(buf, n->string, strlen(n->string) + 1);
1115 	} else
1116 		buf_appendb(buf, ".", 2);
1117 
1118 	return(1);
1119 }
1120 
1121 /* ARGSUSED */
1122 static int
1123 pmdoc_Nd(MDOC_ARGS)
1124 {
1125 
1126 	if (MDOC_BODY != n->type)
1127 		return(0);
1128 
1129 	buf_appendmdoc(dbuf, n->child, 1);
1130 	return(1);
1131 }
1132 
1133 /* ARGSUSED */
1134 static int
1135 pmdoc_Nm(MDOC_ARGS)
1136 {
1137 
1138 	if (SEC_NAME == n->sec)
1139 		return(1);
1140 	else if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type)
1141 		return(0);
1142 
1143 	if (NULL == n->child)
1144 		buf_append(buf, m->name);
1145 
1146 	return(1);
1147 }
1148 
1149 /* ARGSUSED */
1150 static int
1151 pmdoc_Sh(MDOC_ARGS)
1152 {
1153 
1154 	return(SEC_CUSTOM == n->sec && MDOC_HEAD == n->type);
1155 }
1156 
1157 static void
1158 hash_put(DB *db, const struct buf *buf, uint64_t mask)
1159 {
1160 	uint64_t	 oldmask;
1161 	DBT		 key, val;
1162 	int		 rc;
1163 
1164 	if (buf->len < 2)
1165 		return;
1166 
1167 	key.data = buf->cp;
1168 	key.size = buf->len;
1169 
1170 	if ((rc = (*db->get)(db, &key, &val, 0)) < 0) {
1171 		perror("hash");
1172 		exit((int)MANDOCLEVEL_SYSERR);
1173 	} else if (0 == rc) {
1174 		assert(sizeof(uint64_t) == val.size);
1175 		memcpy(&oldmask, val.data, val.size);
1176 		mask |= oldmask;
1177 	}
1178 
1179 	val.data = &mask;
1180 	val.size = sizeof(uint64_t);
1181 
1182 	if ((rc = (*db->put)(db, &key, &val, 0)) < 0) {
1183 		perror("hash");
1184 		exit((int)MANDOCLEVEL_SYSERR);
1185 	}
1186 }
1187 
1188 static void
1189 dbt_put(DB *db, const char *dbn, DBT *key, DBT *val)
1190 {
1191 
1192 	assert(key->size);
1193 	assert(val->size);
1194 
1195 	if (0 == (*db->put)(db, key, val, 0))
1196 		return;
1197 
1198 	perror(dbn);
1199 	exit((int)MANDOCLEVEL_SYSERR);
1200 	/* NOTREACHED */
1201 }
1202 
1203 /*
1204  * Call out to per-macro handlers after clearing the persistent database
1205  * key.  If the macro sets the database key, flush it to the database.
1206  */
1207 static void
1208 pmdoc_node(MDOC_ARGS)
1209 {
1210 
1211 	if (NULL == n)
1212 		return;
1213 
1214 	switch (n->type) {
1215 	case (MDOC_HEAD):
1216 		/* FALLTHROUGH */
1217 	case (MDOC_BODY):
1218 		/* FALLTHROUGH */
1219 	case (MDOC_TAIL):
1220 		/* FALLTHROUGH */
1221 	case (MDOC_BLOCK):
1222 		/* FALLTHROUGH */
1223 	case (MDOC_ELEM):
1224 		buf->len = 0;
1225 
1226 		/*
1227 		 * Both NULL handlers and handlers returning true
1228 		 * request using the data.  Only skip the element
1229 		 * when the handler returns false.
1230 		 */
1231 
1232 		if (NULL != mdocs[n->tok].fp &&
1233 		    0 == (*mdocs[n->tok].fp)(hash, buf, dbuf, n, m))
1234 			break;
1235 
1236 		/*
1237 		 * For many macros, use the text from all children.
1238 		 * Set zero flags for macros not needing this.
1239 		 * In that case, the handler must fill the buffer.
1240 		 */
1241 
1242 		if (MDOCF_CHILD & mdocs[n->tok].flags)
1243 			buf_appendmdoc(buf, n->child, 0);
1244 
1245 		/*
1246 		 * Cover the most common case:
1247 		 * Automatically stage one string per element.
1248 		 * Set a zero mask for macros not needing this.
1249 		 * Additional staging can be done in the handler.
1250 		 */
1251 
1252 		if (mdocs[n->tok].mask)
1253 			hash_put(hash, buf, mdocs[n->tok].mask);
1254 		break;
1255 	default:
1256 		break;
1257 	}
1258 
1259 	pmdoc_node(hash, buf, dbuf, n->child, m);
1260 	pmdoc_node(hash, buf, dbuf, n->next, m);
1261 }
1262 
1263 static int
1264 pman_node(MAN_ARGS)
1265 {
1266 	const struct man_node *head, *body;
1267 	const char	*start, *sv;
1268 	size_t		 sz;
1269 
1270 	if (NULL == n)
1271 		return(0);
1272 
1273 	/*
1274 	 * We're only searching for one thing: the first text child in
1275 	 * the BODY of a NAME section.  Since we don't keep track of
1276 	 * sections in -man, run some hoops to find out whether we're in
1277 	 * the correct section or not.
1278 	 */
1279 
1280 	if (MAN_BODY == n->type && MAN_SH == n->tok) {
1281 		body = n;
1282 		assert(body->parent);
1283 		if (NULL != (head = body->parent->head) &&
1284 				1 == head->nchild &&
1285 				NULL != (head = (head->child)) &&
1286 				MAN_TEXT == head->type &&
1287 				0 == strcmp(head->string, "NAME") &&
1288 				NULL != (body = body->child) &&
1289 				MAN_TEXT == body->type) {
1290 
1291 			assert(body->string);
1292 			start = sv = body->string;
1293 
1294 			/*
1295 			 * Go through a special heuristic dance here.
1296 			 * This is why -man manuals are great!
1297 			 * (I'm being sarcastic: my eyes are bleeding.)
1298 			 * Conventionally, one or more manual names are
1299 			 * comma-specified prior to a whitespace, then a
1300 			 * dash, then a description.  Try to puzzle out
1301 			 * the name parts here.
1302 			 */
1303 
1304 			for ( ;; ) {
1305 				sz = strcspn(start, " ,");
1306 				if ('\0' == start[(int)sz])
1307 					break;
1308 
1309 				buf->len = 0;
1310 				buf_appendb(buf, start, sz);
1311 				buf_appendb(buf, "", 1);
1312 
1313 				hash_put(hash, buf, TYPE_Nm);
1314 
1315 				if (' ' == start[(int)sz]) {
1316 					start += (int)sz + 1;
1317 					break;
1318 				}
1319 
1320 				assert(',' == start[(int)sz]);
1321 				start += (int)sz + 1;
1322 				while (' ' == *start)
1323 					start++;
1324 			}
1325 
1326 			buf->len = 0;
1327 
1328 			if (sv == start) {
1329 				buf_append(buf, start);
1330 				return(1);
1331 			}
1332 
1333 			while (' ' == *start)
1334 				start++;
1335 
1336 			if (0 == strncmp(start, "-", 1))
1337 				start += 1;
1338 			else if (0 == strncmp(start, "\\-\\-", 4))
1339 				start += 4;
1340 			else if (0 == strncmp(start, "\\-", 2))
1341 				start += 2;
1342 			else if (0 == strncmp(start, "\\(en", 4))
1343 				start += 4;
1344 			else if (0 == strncmp(start, "\\(em", 4))
1345 				start += 4;
1346 
1347 			while (' ' == *start)
1348 				start++;
1349 
1350 			sz = strlen(start) + 1;
1351 			buf_appendb(dbuf, start, sz);
1352 			buf_appendb(buf, start, sz);
1353 
1354 			hash_put(hash, buf, TYPE_Nd);
1355 		}
1356 	}
1357 
1358 	for (n = n->child; n; n = n->next)
1359 		if (pman_node(hash, buf, dbuf, n))
1360 			return(1);
1361 
1362 	return(0);
1363 }
1364 
1365 /*
1366  * Parse a formatted manual page.
1367  * By necessity, this involves rather crude guesswork.
1368  */
1369 static void
1370 pformatted(DB *hash, struct buf *buf,
1371 		struct buf *dbuf, const struct of *of)
1372 {
1373 	FILE		*stream;
1374 	char		*line, *p, *title;
1375 	size_t		 len, plen, titlesz;
1376 
1377 	if (NULL == (stream = fopen(of->fname, "r"))) {
1378 		if (warnings)
1379 			perror(of->fname);
1380 		return;
1381 	}
1382 
1383 	/*
1384 	 * Always use the title derived from the filename up front,
1385 	 * do not even try to find it in the file.  This also makes
1386 	 * sure we don't end up with an orphan index record, even if
1387 	 * the file content turns out to be completely unintelligible.
1388 	 */
1389 
1390 	buf->len = 0;
1391 	buf_append(buf, of->title);
1392 	hash_put(hash, buf, TYPE_Nm);
1393 
1394 	/* Skip to first blank line. */
1395 
1396 	while (NULL != (line = fgetln(stream, &len)))
1397 		if ('\n' == *line)
1398 			break;
1399 
1400 	/*
1401 	 * Assume the first line that is not indented
1402 	 * is the first section header.  Skip to it.
1403 	 */
1404 
1405 	while (NULL != (line = fgetln(stream, &len)))
1406 		if ('\n' != *line && ' ' != *line)
1407 			break;
1408 
1409 	/*
1410 	 * Read up until the next section into a buffer.
1411 	 * Strip the leading and trailing newline from each read line,
1412 	 * appending a trailing space.
1413 	 * Ignore empty (whitespace-only) lines.
1414 	 */
1415 
1416 	titlesz = 0;
1417 	title = NULL;
1418 
1419 	while (NULL != (line = fgetln(stream, &len))) {
1420 		if (' ' != *line || '\n' != line[(int)len - 1])
1421 			break;
1422 		while (len > 0 && isspace((unsigned char)*line)) {
1423 			line++;
1424 			len--;
1425 		}
1426 		if (1 == len)
1427 			continue;
1428 		title = mandoc_realloc(title, titlesz + len);
1429 		memcpy(title + titlesz, line, len);
1430 		titlesz += len;
1431 		title[(int)titlesz - 1] = ' ';
1432 	}
1433 
1434 
1435 	/*
1436 	 * If no page content can be found, or the input line
1437 	 * is already the next section header, or there is no
1438 	 * trailing newline, reuse the page title as the page
1439 	 * description.
1440 	 */
1441 
1442 	if (NULL == title || '\0' == *title) {
1443 		if (warnings)
1444 			fprintf(stderr, "%s: cannot find NAME section\n",
1445 					of->fname);
1446 		buf_appendb(dbuf, buf->cp, buf->size);
1447 		hash_put(hash, buf, TYPE_Nd);
1448 		fclose(stream);
1449 		free(title);
1450 		return;
1451 	}
1452 
1453 	title = mandoc_realloc(title, titlesz + 1);
1454 	title[(int)titlesz] = '\0';
1455 
1456 	/*
1457 	 * Skip to the first dash.
1458 	 * Use the remaining line as the description (no more than 70
1459 	 * bytes).
1460 	 */
1461 
1462 	if (NULL != (p = strstr(title, "- "))) {
1463 		for (p += 2; ' ' == *p || '\b' == *p; p++)
1464 			/* Skip to next word. */ ;
1465 	} else {
1466 		if (warnings)
1467 			fprintf(stderr, "%s: no dash in title line\n",
1468 					of->fname);
1469 		p = title;
1470 	}
1471 
1472 	plen = strlen(p);
1473 
1474 	/* Strip backspace-encoding from line. */
1475 
1476 	while (NULL != (line = memchr(p, '\b', plen))) {
1477 		len = line - p;
1478 		if (0 == len) {
1479 			memmove(line, line + 1, plen--);
1480 			continue;
1481 		}
1482 		memmove(line - 1, line + 1, plen - len);
1483 		plen -= 2;
1484 	}
1485 
1486 	buf_appendb(dbuf, p, plen + 1);
1487 	buf->len = 0;
1488 	buf_appendb(buf, p, plen + 1);
1489 	hash_put(hash, buf, TYPE_Nd);
1490 	fclose(stream);
1491 	free(title);
1492 }
1493 
1494 static void
1495 ofile_argbuild(int argc, char *argv[], struct of **of)
1496 {
1497 	char		 buf[MAXPATHLEN];
1498 	const char	*sec, *arch, *title;
1499 	char		*p;
1500 	int		 i, src_form;
1501 	struct of	*nof;
1502 
1503 	for (i = 0; i < argc; i++) {
1504 
1505 		/*
1506 		 * Try to infer the manual section, architecture and
1507 		 * page title from the path, assuming it looks like
1508 		 *   man*[/<arch>]/<title>.<section>   or
1509 		 *   cat<section>[/<arch>]/<title>.0
1510 		 */
1511 
1512 		if (strlcpy(buf, argv[i], sizeof(buf)) >= sizeof(buf)) {
1513 			fprintf(stderr, "%s: path too long\n", argv[i]);
1514 			continue;
1515 		}
1516 		sec = arch = title = "";
1517 		src_form = 0;
1518 		p = strrchr(buf, '\0');
1519 		while (p-- > buf) {
1520 			if ('\0' == *sec && '.' == *p) {
1521 				sec = p + 1;
1522 				*p = '\0';
1523 				if ('0' == *sec)
1524 					src_form |= MANDOC_FORM;
1525 				else if ('1' <= *sec && '9' >= *sec)
1526 					src_form |= MANDOC_SRC;
1527 				continue;
1528 			}
1529 			if ('/' != *p)
1530 				continue;
1531 			if ('\0' == *title) {
1532 				title = p + 1;
1533 				*p = '\0';
1534 				continue;
1535 			}
1536 			if (0 == strncmp("man", p + 1, 3))
1537 				src_form |= MANDOC_SRC;
1538 			else if (0 == strncmp("cat", p + 1, 3))
1539 				src_form |= MANDOC_FORM;
1540 			else
1541 				arch = p + 1;
1542 			break;
1543 		}
1544 		if ('\0' == *title) {
1545 			if (warnings)
1546 				fprintf(stderr,
1547 				    "%s: cannot deduce title "
1548 				    "from filename\n",
1549 				    argv[i]);
1550 			title = buf;
1551 		}
1552 
1553 		/*
1554 		 * Build the file structure.
1555 		 */
1556 
1557 		nof = mandoc_calloc(1, sizeof(struct of));
1558 		nof->fname = mandoc_strdup(argv[i]);
1559 		nof->sec = mandoc_strdup(sec);
1560 		nof->arch = mandoc_strdup(arch);
1561 		nof->title = mandoc_strdup(title);
1562 		nof->src_form = src_form;
1563 
1564 		/*
1565 		 * Add the structure to the list.
1566 		 */
1567 
1568 		if (verb > 1)
1569 			printf("%s: scheduling\n", argv[i]);
1570 		if (NULL == *of) {
1571 			*of = nof;
1572 			(*of)->first = nof;
1573 		} else {
1574 			nof->first = (*of)->first;
1575 			(*of)->next = nof;
1576 			*of = nof;
1577 		}
1578 	}
1579 }
1580 
1581 /*
1582  * Recursively build up a list of files to parse.
1583  * We use this instead of ftw() and so on because I don't want global
1584  * variables hanging around.
1585  * This ignores the whatis.db and whatis.index files, but assumes that
1586  * everything else is a manual.
1587  * Pass in a pointer to a NULL structure for the first invocation.
1588  */
1589 static void
1590 ofile_dirbuild(const char *dir, const char* psec, const char *parch,
1591 		int p_src_form, struct of **of)
1592 {
1593 	char		 buf[MAXPATHLEN];
1594 	size_t		 sz;
1595 	DIR		*d;
1596 	const char	*fn, *sec, *arch;
1597 	char		*p, *q, *suffix;
1598 	struct of	*nof;
1599 	struct dirent	*dp;
1600 	int		 src_form;
1601 
1602 	if (NULL == (d = opendir(dir))) {
1603 		if (warnings)
1604 			perror(dir);
1605 		return;
1606 	}
1607 
1608 	while (NULL != (dp = readdir(d))) {
1609 		fn = dp->d_name;
1610 
1611 		if ('.' == *fn)
1612 			continue;
1613 
1614 		src_form = p_src_form;
1615 
1616 		if (DT_DIR == dp->d_type) {
1617 			sec = psec;
1618 			arch = parch;
1619 
1620 			/*
1621 			 * By default, only use directories called:
1622 			 *   man<section>/[<arch>/]   or
1623 			 *   cat<section>/[<arch>/]
1624 			 */
1625 
1626 			if ('\0' == *sec) {
1627 				if(0 == strncmp("man", fn, 3)) {
1628 					src_form |= MANDOC_SRC;
1629 					sec = fn + 3;
1630 				} else if (0 == strncmp("cat", fn, 3)) {
1631 					src_form |= MANDOC_FORM;
1632 					sec = fn + 3;
1633 				} else {
1634 					if (warnings) fprintf(stderr,
1635 					    "%s/%s: bad section\n",
1636 					    dir, fn);
1637 					if (use_all)
1638 						sec = fn;
1639 					else
1640 						continue;
1641 				}
1642 			} else if ('\0' == *arch) {
1643 				if (NULL != strchr(fn, '.')) {
1644 					if (warnings) fprintf(stderr,
1645 					    "%s/%s: bad architecture\n",
1646 					    dir, fn);
1647 					if (0 == use_all)
1648 						continue;
1649 				}
1650 				arch = fn;
1651 			} else {
1652 				if (warnings) fprintf(stderr, "%s/%s: "
1653 				    "excessive subdirectory\n", dir, fn);
1654 				if (0 == use_all)
1655 					continue;
1656 			}
1657 
1658 			buf[0] = '\0';
1659 			strlcat(buf, dir, MAXPATHLEN);
1660 			strlcat(buf, "/", MAXPATHLEN);
1661 			sz = strlcat(buf, fn, MAXPATHLEN);
1662 
1663 			if (MAXPATHLEN <= sz) {
1664 				if (warnings) fprintf(stderr, "%s/%s: "
1665 				    "path too long\n", dir, fn);
1666 				continue;
1667 			}
1668 
1669 			if (verb > 1)
1670 				printf("%s: scanning\n", buf);
1671 
1672 			ofile_dirbuild(buf, sec, arch, src_form, of);
1673 			continue;
1674 		}
1675 
1676 		if (DT_REG != dp->d_type) {
1677 			if (warnings)
1678 				fprintf(stderr,
1679 				    "%s/%s: not a regular file\n",
1680 				    dir, fn);
1681 			continue;
1682 		}
1683 		if (!strcmp(MANDOC_DB, fn) || !strcmp(MANDOC_IDX, fn))
1684 			continue;
1685 		if ('\0' == *psec) {
1686 			if (warnings)
1687 				fprintf(stderr,
1688 				    "%s/%s: file outside section\n",
1689 				    dir, fn);
1690 			if (0 == use_all)
1691 				continue;
1692 		}
1693 
1694 		/*
1695 		 * By default, skip files where the file name suffix
1696 		 * does not agree with the section directory
1697 		 * they are located in.
1698 		 */
1699 
1700 		suffix = strrchr(fn, '.');
1701 		if (NULL == suffix) {
1702 			if (warnings)
1703 				fprintf(stderr,
1704 				    "%s/%s: no filename suffix\n",
1705 				    dir, fn);
1706 			if (0 == use_all)
1707 				continue;
1708 		} else if ((MANDOC_SRC & src_form &&
1709 				strcmp(suffix + 1, psec)) ||
1710 			    (MANDOC_FORM & src_form &&
1711 				strcmp(suffix + 1, "0"))) {
1712 			if (warnings)
1713 				fprintf(stderr,
1714 				    "%s/%s: wrong filename suffix\n",
1715 				    dir, fn);
1716 			if (0 == use_all)
1717 				continue;
1718 			if ('0' == suffix[1])
1719 				src_form |= MANDOC_FORM;
1720 			else if ('1' <= suffix[1] && '9' >= suffix[1])
1721 				src_form |= MANDOC_SRC;
1722 		}
1723 
1724 		/*
1725 		 * Skip formatted manuals if a source version is
1726 		 * available.  Ignore the age: it is very unlikely
1727 		 * that people install newer formatted base manuals
1728 		 * when they used to have source manuals before,
1729 		 * and in ports, old manuals get removed on update.
1730 		 */
1731 		if (0 == use_all && MANDOC_FORM & src_form &&
1732 				'\0' != *psec) {
1733 			buf[0] = '\0';
1734 			strlcat(buf, dir, MAXPATHLEN);
1735 			p = strrchr(buf, '/');
1736 			if ('\0' != *parch && NULL != p)
1737 				for (p--; p > buf; p--)
1738 					if ('/' == *p)
1739 						break;
1740 			if (NULL == p)
1741 				p = buf;
1742 			else
1743 				p++;
1744 			if (0 == strncmp("cat", p, 3))
1745 				memcpy(p, "man", 3);
1746 			strlcat(buf, "/", MAXPATHLEN);
1747 			sz = strlcat(buf, fn, MAXPATHLEN);
1748 			if (sz >= MAXPATHLEN) {
1749 				if (warnings) fprintf(stderr,
1750 				    "%s/%s: path too long\n",
1751 				    dir, fn);
1752 				continue;
1753 			}
1754 			q = strrchr(buf, '.');
1755 			if (NULL != q && p < q++) {
1756 				*q = '\0';
1757 				sz = strlcat(buf, psec, MAXPATHLEN);
1758 				if (sz >= MAXPATHLEN) {
1759 					if (warnings) fprintf(stderr,
1760 					    "%s/%s: path too long\n",
1761 					    dir, fn);
1762 					continue;
1763 				}
1764 				if (0 == access(buf, R_OK))
1765 					continue;
1766 			}
1767 		}
1768 
1769 		buf[0] = '\0';
1770 		assert('.' == dir[0]);
1771 		if ('/' == dir[1]) {
1772 			strlcat(buf, dir + 2, MAXPATHLEN);
1773 			strlcat(buf, "/", MAXPATHLEN);
1774 		}
1775 		sz = strlcat(buf, fn, MAXPATHLEN);
1776 		if (sz >= MAXPATHLEN) {
1777 			if (warnings) fprintf(stderr,
1778 			    "%s/%s: path too long\n", dir, fn);
1779 			continue;
1780 		}
1781 
1782 		nof = mandoc_calloc(1, sizeof(struct of));
1783 		nof->fname = mandoc_strdup(buf);
1784 		nof->sec = mandoc_strdup(psec);
1785 		nof->arch = mandoc_strdup(parch);
1786 		nof->src_form = src_form;
1787 
1788 		/*
1789 		 * Remember the file name without the extension,
1790 		 * to be used as the page title in the database.
1791 		 */
1792 
1793 		if (NULL != suffix)
1794 			*suffix = '\0';
1795 		nof->title = mandoc_strdup(fn);
1796 
1797 		/*
1798 		 * Add the structure to the list.
1799 		 */
1800 
1801 		if (verb > 1)
1802 			printf("%s: scheduling\n", buf);
1803 
1804 		if (NULL == *of) {
1805 			*of = nof;
1806 			(*of)->first = nof;
1807 		} else {
1808 			nof->first = (*of)->first;
1809 			(*of)->next = nof;
1810 			*of = nof;
1811 		}
1812 	}
1813 
1814 	closedir(d);
1815 }
1816 
1817 static void
1818 ofile_free(struct of *of)
1819 {
1820 	struct of	*nof;
1821 
1822 	if (NULL != of)
1823 		of = of->first;
1824 
1825 	while (NULL != of) {
1826 		nof = of->next;
1827 		free(of->fname);
1828 		free(of->sec);
1829 		free(of->arch);
1830 		free(of->title);
1831 		free(of);
1832 		of = nof;
1833 	}
1834 }
1835