xref: /dragonfly/contrib/mdocml/mandocdb.c (revision 38b930d0)
1 /*	$Id: mandocdb.c,v 1.49.2.7 2013/10/02 21:03:26 schwarze Exp $ */
2 /*
3  * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2011, 2012 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21 
22 #include <sys/types.h>
23 
24 #include <assert.h>
25 #include <ctype.h>
26 #include <dirent.h>
27 #include <errno.h>
28 #include <fcntl.h>
29 #include <getopt.h>
30 #include <limits.h>
31 #include <stdio.h>
32 #include <stdint.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <unistd.h>
36 
37 #if defined(__linux__) || defined(__sun)
38 # include <endian.h>
39 # include <db_185.h>
40 #elif defined(__APPLE__)
41 # include <libkern/OSByteOrder.h>
42 # include <db.h>
43 #else
44 # include <sys/endian.h>
45 # include <db.h>
46 #endif
47 
48 #if defined(__sun)
49 #include <sys/stat.h>
50 #endif
51 
52 #include "man.h"
53 #include "mdoc.h"
54 #include "mandoc.h"
55 #include "mandocdb.h"
56 #include "manpath.h"
57 
58 #define	MANDOC_BUFSZ	  BUFSIZ
59 #define	MANDOC_SLOP	  1024
60 
61 #define	MANDOC_SRC	  0x1
62 #define	MANDOC_FORM	  0x2
63 
64 /* Access to the mandoc database on disk. */
65 
66 struct	mdb {
67 	char		  idxn[PATH_MAX]; /* index db filename */
68 	char		  dbn[PATH_MAX]; /* keyword db filename */
69 	DB		 *idx; /* index recno database */
70 	DB		 *db; /* keyword btree database */
71 };
72 
73 /* Stack of temporarily unused index records. */
74 
75 struct	recs {
76 	recno_t		 *stack; /* pointer to a malloc'ed array */
77 	size_t		  size; /* number of allocated slots */
78 	size_t		  cur; /* current number of empty records */
79 	recno_t		  last; /* last record number in the index */
80 };
81 
82 /* Tiny list for files.  No need to bring in QUEUE. */
83 
84 struct	of {
85 	char		 *fname; /* heap-allocated */
86 	char		 *sec;
87 	char		 *arch;
88 	char		 *title;
89 	int		  src_form;
90 	struct of	 *next; /* NULL for last one */
91 	struct of	 *first; /* first in list */
92 };
93 
94 /* Buffer for storing growable data. */
95 
96 struct	buf {
97 	char		 *cp;
98 	size_t		  len; /* current length */
99 	size_t		  size; /* total buffer size */
100 };
101 
102 /* Operation we're going to perform. */
103 
104 enum	op {
105 	OP_DEFAULT = 0, /* new dbs from dir list or default config */
106 	OP_CONFFILE, /* new databases from custom config file */
107 	OP_UPDATE, /* delete/add entries in existing database */
108 	OP_DELETE, /* delete entries from existing database */
109 	OP_TEST /* change no databases, report potential problems */
110 };
111 
112 #define	MAN_ARGS	  DB *hash, \
113 			  struct buf *buf, \
114 			  struct buf *dbuf, \
115 			  const struct man_node *n
116 #define	MDOC_ARGS	  DB *hash, \
117 			  struct buf *buf, \
118 			  struct buf *dbuf, \
119 			  const struct mdoc_node *n, \
120 			  const struct mdoc_meta *m
121 
122 static	void		  buf_appendmdoc(struct buf *,
123 				const struct mdoc_node *, int);
124 static	void		  buf_append(struct buf *, const char *);
125 static	void		  buf_appendb(struct buf *,
126 				const void *, size_t);
127 static	void		  dbt_put(DB *, const char *, DBT *, DBT *);
128 static	void		  hash_put(DB *, const struct buf *, uint64_t);
129 static	void		  hash_reset(DB **);
130 static	void		  index_merge(const struct of *, struct mparse *,
131 				struct buf *, struct buf *, DB *,
132 				struct mdb *, struct recs *);
133 static	void		  index_prune(const struct of *, struct mdb *,
134 				struct recs *);
135 static	void		  ofile_argbuild(int, char *[], struct of **,
136 				const char *);
137 static	void		  ofile_dirbuild(const char *, const char *,
138 				const char *, int, struct of **);
139 static	void		  ofile_free(struct of *);
140 static	void		  pformatted(DB *, struct buf *,
141 				struct buf *, const struct of *);
142 static	int		  pman_node(MAN_ARGS);
143 static	void		  pmdoc_node(MDOC_ARGS);
144 static	int		  pmdoc_head(MDOC_ARGS);
145 static	int		  pmdoc_body(MDOC_ARGS);
146 static	int		  pmdoc_Fd(MDOC_ARGS);
147 static	int		  pmdoc_In(MDOC_ARGS);
148 static	int		  pmdoc_Fn(MDOC_ARGS);
149 static	int		  pmdoc_Nd(MDOC_ARGS);
150 static	int		  pmdoc_Nm(MDOC_ARGS);
151 static	int		  pmdoc_Sh(MDOC_ARGS);
152 static	int		  pmdoc_St(MDOC_ARGS);
153 static	int		  pmdoc_Xr(MDOC_ARGS);
154 
155 #define	MDOCF_CHILD	  0x01  /* Automatically index child nodes. */
156 
157 struct	mdoc_handler {
158 	int		(*fp)(MDOC_ARGS);  /* Optional handler. */
159 	uint64_t	  mask;  /* Set unless handler returns 0. */
160 	int		  flags;  /* For use by pmdoc_node. */
161 };
162 
163 static	const struct mdoc_handler mdocs[MDOC_MAX] = {
164 	{ NULL, 0, 0 },  /* Ap */
165 	{ NULL, 0, 0 },  /* Dd */
166 	{ NULL, 0, 0 },  /* Dt */
167 	{ NULL, 0, 0 },  /* Os */
168 	{ pmdoc_Sh, TYPE_Sh, MDOCF_CHILD }, /* Sh */
169 	{ pmdoc_head, TYPE_Ss, MDOCF_CHILD }, /* Ss */
170 	{ NULL, 0, 0 },  /* Pp */
171 	{ NULL, 0, 0 },  /* D1 */
172 	{ NULL, 0, 0 },  /* Dl */
173 	{ NULL, 0, 0 },  /* Bd */
174 	{ NULL, 0, 0 },  /* Ed */
175 	{ NULL, 0, 0 },  /* Bl */
176 	{ NULL, 0, 0 },  /* El */
177 	{ NULL, 0, 0 },  /* It */
178 	{ NULL, 0, 0 },  /* Ad */
179 	{ NULL, TYPE_An, MDOCF_CHILD },  /* An */
180 	{ NULL, TYPE_Ar, MDOCF_CHILD },  /* Ar */
181 	{ NULL, TYPE_Cd, MDOCF_CHILD },  /* Cd */
182 	{ NULL, TYPE_Cm, MDOCF_CHILD },  /* Cm */
183 	{ NULL, TYPE_Dv, MDOCF_CHILD },  /* Dv */
184 	{ NULL, TYPE_Er, MDOCF_CHILD },  /* Er */
185 	{ NULL, TYPE_Ev, MDOCF_CHILD },  /* Ev */
186 	{ NULL, 0, 0 },  /* Ex */
187 	{ NULL, TYPE_Fa, MDOCF_CHILD },  /* Fa */
188 	{ pmdoc_Fd, TYPE_In, 0 },  /* Fd */
189 	{ NULL, TYPE_Fl, MDOCF_CHILD },  /* Fl */
190 	{ pmdoc_Fn, 0, 0 },  /* Fn */
191 	{ NULL, TYPE_Ft, MDOCF_CHILD },  /* Ft */
192 	{ NULL, TYPE_Ic, MDOCF_CHILD },  /* Ic */
193 	{ pmdoc_In, TYPE_In, 0 },  /* In */
194 	{ NULL, TYPE_Li, MDOCF_CHILD },  /* Li */
195 	{ pmdoc_Nd, TYPE_Nd, MDOCF_CHILD },  /* Nd */
196 	{ pmdoc_Nm, TYPE_Nm, MDOCF_CHILD },  /* Nm */
197 	{ NULL, 0, 0 },  /* Op */
198 	{ NULL, 0, 0 },  /* Ot */
199 	{ NULL, TYPE_Pa, MDOCF_CHILD },  /* Pa */
200 	{ NULL, 0, 0 },  /* Rv */
201 	{ pmdoc_St, TYPE_St, 0 },  /* St */
202 	{ NULL, TYPE_Va, MDOCF_CHILD },  /* Va */
203 	{ pmdoc_body, TYPE_Va, MDOCF_CHILD },  /* Vt */
204 	{ pmdoc_Xr, TYPE_Xr, 0 },  /* Xr */
205 	{ NULL, 0, 0 },  /* %A */
206 	{ NULL, 0, 0 },  /* %B */
207 	{ NULL, 0, 0 },  /* %D */
208 	{ NULL, 0, 0 },  /* %I */
209 	{ NULL, 0, 0 },  /* %J */
210 	{ NULL, 0, 0 },  /* %N */
211 	{ NULL, 0, 0 },  /* %O */
212 	{ NULL, 0, 0 },  /* %P */
213 	{ NULL, 0, 0 },  /* %R */
214 	{ NULL, 0, 0 },  /* %T */
215 	{ NULL, 0, 0 },  /* %V */
216 	{ NULL, 0, 0 },  /* Ac */
217 	{ NULL, 0, 0 },  /* Ao */
218 	{ NULL, 0, 0 },  /* Aq */
219 	{ NULL, TYPE_At, MDOCF_CHILD },  /* At */
220 	{ NULL, 0, 0 },  /* Bc */
221 	{ NULL, 0, 0 },  /* Bf */
222 	{ NULL, 0, 0 },  /* Bo */
223 	{ NULL, 0, 0 },  /* Bq */
224 	{ NULL, TYPE_Bsx, MDOCF_CHILD },  /* Bsx */
225 	{ NULL, TYPE_Bx, MDOCF_CHILD },  /* Bx */
226 	{ NULL, 0, 0 },  /* Db */
227 	{ NULL, 0, 0 },  /* Dc */
228 	{ NULL, 0, 0 },  /* Do */
229 	{ NULL, 0, 0 },  /* Dq */
230 	{ NULL, 0, 0 },  /* Ec */
231 	{ NULL, 0, 0 },  /* Ef */
232 	{ NULL, TYPE_Em, MDOCF_CHILD },  /* Em */
233 	{ NULL, 0, 0 },  /* Eo */
234 	{ NULL, TYPE_Fx, MDOCF_CHILD },  /* Fx */
235 	{ NULL, TYPE_Ms, MDOCF_CHILD },  /* Ms */
236 	{ NULL, 0, 0 },  /* No */
237 	{ NULL, 0, 0 },  /* Ns */
238 	{ NULL, TYPE_Nx, MDOCF_CHILD },  /* Nx */
239 	{ NULL, TYPE_Ox, MDOCF_CHILD },  /* Ox */
240 	{ NULL, 0, 0 },  /* Pc */
241 	{ NULL, 0, 0 },  /* Pf */
242 	{ NULL, 0, 0 },  /* Po */
243 	{ NULL, 0, 0 },  /* Pq */
244 	{ NULL, 0, 0 },  /* Qc */
245 	{ NULL, 0, 0 },  /* Ql */
246 	{ NULL, 0, 0 },  /* Qo */
247 	{ NULL, 0, 0 },  /* Qq */
248 	{ NULL, 0, 0 },  /* Re */
249 	{ NULL, 0, 0 },  /* Rs */
250 	{ NULL, 0, 0 },  /* Sc */
251 	{ NULL, 0, 0 },  /* So */
252 	{ NULL, 0, 0 },  /* Sq */
253 	{ NULL, 0, 0 },  /* Sm */
254 	{ NULL, 0, 0 },  /* Sx */
255 	{ NULL, TYPE_Sy, MDOCF_CHILD },  /* Sy */
256 	{ NULL, TYPE_Tn, MDOCF_CHILD },  /* Tn */
257 	{ NULL, 0, 0 },  /* Ux */
258 	{ NULL, 0, 0 },  /* Xc */
259 	{ NULL, 0, 0 },  /* Xo */
260 	{ pmdoc_head, TYPE_Fn, 0 },  /* Fo */
261 	{ NULL, 0, 0 },  /* Fc */
262 	{ NULL, 0, 0 },  /* Oo */
263 	{ NULL, 0, 0 },  /* Oc */
264 	{ NULL, 0, 0 },  /* Bk */
265 	{ NULL, 0, 0 },  /* Ek */
266 	{ NULL, 0, 0 },  /* Bt */
267 	{ NULL, 0, 0 },  /* Hf */
268 	{ NULL, 0, 0 },  /* Fr */
269 	{ NULL, 0, 0 },  /* Ud */
270 	{ NULL, TYPE_Lb, MDOCF_CHILD },  /* Lb */
271 	{ NULL, 0, 0 },  /* Lp */
272 	{ NULL, TYPE_Lk, MDOCF_CHILD },  /* Lk */
273 	{ NULL, TYPE_Mt, MDOCF_CHILD },  /* Mt */
274 	{ NULL, 0, 0 },  /* Brq */
275 	{ NULL, 0, 0 },  /* Bro */
276 	{ NULL, 0, 0 },  /* Brc */
277 	{ NULL, 0, 0 },  /* %C */
278 	{ NULL, 0, 0 },  /* Es */
279 	{ NULL, 0, 0 },  /* En */
280 	{ NULL, TYPE_Dx, MDOCF_CHILD },  /* Dx */
281 	{ NULL, 0, 0 },  /* %Q */
282 	{ NULL, 0, 0 },  /* br */
283 	{ NULL, 0, 0 },  /* sp */
284 	{ NULL, 0, 0 },  /* %U */
285 	{ NULL, 0, 0 },  /* Ta */
286 };
287 
288 static	const char	 *progname;
289 static	int		  use_all;  /* Use all directories and files. */
290 static	int		  verb;  /* Output verbosity level. */
291 static	int		  warnings;  /* Potential problems in manuals. */
292 
293 int
294 main(int argc, char *argv[])
295 {
296 	struct mparse	*mp; /* parse sequence */
297 	struct manpaths	 dirs;
298 	struct mdb	 mdb;
299 	struct recs	 recs;
300 	enum op		 op; /* current operation */
301 	const char	*dir;
302 	char		*cp;
303 	char		 pbuf[PATH_MAX];
304 	int		 ch, i, flags;
305 	DB		*hash; /* temporary keyword hashtable */
306 	BTREEINFO	 info; /* btree configuration */
307 	size_t		 sz1, sz2, ipath;
308 	struct buf	 buf, /* keyword buffer */
309 			 dbuf; /* description buffer */
310 	struct of	*of; /* list of files for processing */
311 	extern int	 optind;
312 	extern char	*optarg;
313 
314 	progname = strrchr(argv[0], '/');
315 	if (progname == NULL)
316 		progname = argv[0];
317 	else
318 		++progname;
319 
320 	memset(&dirs, 0, sizeof(struct manpaths));
321 	memset(&mdb, 0, sizeof(struct mdb));
322 	memset(&recs, 0, sizeof(struct recs));
323 
324 	of = NULL;
325 	mp = NULL;
326 	hash = NULL;
327 	op = OP_DEFAULT;
328 	dir = NULL;
329 
330 	while (-1 != (ch = getopt(argc, argv, "aC:d:tu:vW")))
331 		switch (ch) {
332 		case ('a'):
333 			use_all = 1;
334 			break;
335 		case ('C'):
336 			if (op) {
337 				fprintf(stderr,
338 				    "-C: conflicting options\n");
339 				goto usage;
340 			}
341 			dir = optarg;
342 			op = OP_CONFFILE;
343 			break;
344 		case ('d'):
345 			if (op) {
346 				fprintf(stderr,
347 				    "-d: conflicting options\n");
348 				goto usage;
349 			}
350 			dir = optarg;
351 			op = OP_UPDATE;
352 			break;
353 		case ('t'):
354 			dup2(STDOUT_FILENO, STDERR_FILENO);
355 			if (op) {
356 				fprintf(stderr,
357 				    "-t: conflicting options\n");
358 				goto usage;
359 			}
360 			op = OP_TEST;
361 			use_all = 1;
362 			warnings = 1;
363 			break;
364 		case ('u'):
365 			if (op) {
366 				fprintf(stderr,
367 				    "-u: conflicting options\n");
368 				goto usage;
369 			}
370 			dir = optarg;
371 			op = OP_DELETE;
372 			break;
373 		case ('v'):
374 			verb++;
375 			break;
376 		case ('W'):
377 			warnings = 1;
378 			break;
379 		default:
380 			goto usage;
381 		}
382 
383 	argc -= optind;
384 	argv += optind;
385 
386 	if (OP_CONFFILE == op && argc > 0) {
387 		fprintf(stderr, "-C: too many arguments\n");
388 		goto usage;
389 	}
390 
391 	memset(&info, 0, sizeof(BTREEINFO));
392 	info.lorder = 4321;
393 	info.flags = R_DUP;
394 
395 	mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL, NULL);
396 
397 	memset(&buf, 0, sizeof(struct buf));
398 	memset(&dbuf, 0, sizeof(struct buf));
399 
400 	buf.size = dbuf.size = MANDOC_BUFSZ;
401 
402 	buf.cp = mandoc_malloc(buf.size);
403 	dbuf.cp = mandoc_malloc(dbuf.size);
404 
405 	if (OP_TEST == op) {
406 		ofile_argbuild(argc, argv, &of, NULL);
407 		if (NULL == of)
408 			goto out;
409 		index_merge(of, mp, &dbuf, &buf, hash, &mdb, &recs);
410 		goto out;
411 	}
412 
413 	if (OP_UPDATE == op || OP_DELETE == op) {
414 		if (NULL == realpath(dir, pbuf)) {
415 			perror(dir);
416 			exit((int)MANDOCLEVEL_BADARG);
417 		}
418 		if (strlcat(pbuf, "/", PATH_MAX) >= PATH_MAX) {
419 			fprintf(stderr, "%s: path too long\n", pbuf);
420 			exit((int)MANDOCLEVEL_BADARG);
421 		}
422 
423 		strlcat(mdb.dbn, pbuf, PATH_MAX);
424 		sz1 = strlcat(mdb.dbn, MANDOC_DB, PATH_MAX);
425 
426 		strlcat(mdb.idxn, pbuf, PATH_MAX);
427 		sz2 = strlcat(mdb.idxn, MANDOC_IDX, PATH_MAX);
428 
429 		if (sz1 >= PATH_MAX || sz2 >= PATH_MAX) {
430 			fprintf(stderr, "%s: path too long\n", mdb.idxn);
431 			exit((int)MANDOCLEVEL_BADARG);
432 		}
433 
434 		flags = O_CREAT | O_RDWR;
435 		mdb.db = dbopen(mdb.dbn, flags, 0644, DB_BTREE, &info);
436 		mdb.idx = dbopen(mdb.idxn, flags, 0644, DB_RECNO, NULL);
437 
438 		if (NULL == mdb.db) {
439 			perror(mdb.dbn);
440 			exit((int)MANDOCLEVEL_SYSERR);
441 		} else if (NULL == mdb.idx) {
442 			perror(mdb.idxn);
443 			exit((int)MANDOCLEVEL_SYSERR);
444 		}
445 
446 		ofile_argbuild(argc, argv, &of, pbuf);
447 
448 		if (NULL == of)
449 			goto out;
450 
451 		index_prune(of, &mdb, &recs);
452 
453 		/*
454 		 * Go to the root of the respective manual tree.
455 		 * This must work or no manuals may be found (they're
456 		 * indexed relative to the root).
457 		 */
458 
459 		if (OP_UPDATE == op) {
460 			if (-1 == chdir(dir)) {
461 				perror(dir);
462 				exit((int)MANDOCLEVEL_SYSERR);
463 			}
464 			index_merge(of, mp, &dbuf, &buf, hash,
465 					&mdb, &recs);
466 		}
467 
468 		goto out;
469 	}
470 
471 	/*
472 	 * Configure the directories we're going to scan.
473 	 * If we have command-line arguments, use them.
474 	 * If not, we use man(1)'s method (see mandocdb.8).
475 	 */
476 
477 	if (argc > 0) {
478 		dirs.paths = mandoc_calloc(argc, sizeof(char *));
479 		dirs.sz = argc;
480 		for (i = 0; i < argc; i++) {
481 			if (NULL == (cp = realpath(argv[i], pbuf))) {
482 				perror(argv[i]);
483 				goto out;
484 			}
485 			dirs.paths[i] = mandoc_strdup(cp);
486 		}
487 	} else
488 		manpath_parse(&dirs, dir, NULL, NULL);
489 
490 	for (ipath = 0; ipath < dirs.sz; ipath++) {
491 
492 		/*
493 		 * Go to the root of the respective manual tree.
494 		 * This must work or no manuals may be found:
495 		 * They are indexed relative to the root.
496 		 */
497 
498 		if (-1 == chdir(dirs.paths[ipath])) {
499 			perror(dirs.paths[ipath]);
500 			exit((int)MANDOCLEVEL_SYSERR);
501 		}
502 
503 		/* Create a new database in two temporary files. */
504 
505 		flags = O_CREAT | O_EXCL | O_RDWR;
506 		while (NULL == mdb.db) {
507 			strlcpy(mdb.dbn, MANDOC_DB, PATH_MAX);
508 			strlcat(mdb.dbn, ".XXXXXXXXXX", PATH_MAX);
509 			if (NULL == mktemp(mdb.dbn)) {
510 				perror(mdb.dbn);
511 				exit((int)MANDOCLEVEL_SYSERR);
512 			}
513 			mdb.db = dbopen(mdb.dbn, flags, 0644,
514 					DB_BTREE, &info);
515 			if (NULL == mdb.db && EEXIST != errno) {
516 				perror(mdb.dbn);
517 				exit((int)MANDOCLEVEL_SYSERR);
518 			}
519 		}
520 		while (NULL == mdb.idx) {
521 			strlcpy(mdb.idxn, MANDOC_IDX, PATH_MAX);
522 			strlcat(mdb.idxn, ".XXXXXXXXXX", PATH_MAX);
523 			if (NULL == mktemp(mdb.idxn)) {
524 				perror(mdb.idxn);
525 				unlink(mdb.dbn);
526 				exit((int)MANDOCLEVEL_SYSERR);
527 			}
528 			mdb.idx = dbopen(mdb.idxn, flags, 0644,
529 					DB_RECNO, NULL);
530 			if (NULL == mdb.idx && EEXIST != errno) {
531 				perror(mdb.idxn);
532 				unlink(mdb.dbn);
533 				exit((int)MANDOCLEVEL_SYSERR);
534 			}
535 		}
536 
537 		/*
538 		 * Search for manuals and fill the new database.
539 		 */
540 
541 	       	ofile_dirbuild(".", "", "", 0, &of);
542 
543 		if (NULL != of) {
544 			index_merge(of, mp, &dbuf, &buf, hash,
545 			     &mdb, &recs);
546 			ofile_free(of);
547 			of = NULL;
548 		}
549 
550 		(*mdb.db->close)(mdb.db);
551 		(*mdb.idx->close)(mdb.idx);
552 		mdb.db = NULL;
553 		mdb.idx = NULL;
554 
555 		/*
556 		 * Replace the old database with the new one.
557 		 * This is not perfectly atomic,
558 		 * but i cannot think of a better way.
559 		 */
560 
561 		if (-1 == rename(mdb.dbn, MANDOC_DB)) {
562 			perror(MANDOC_DB);
563 			unlink(mdb.dbn);
564 			unlink(mdb.idxn);
565 			exit((int)MANDOCLEVEL_SYSERR);
566 		}
567 		if (-1 == rename(mdb.idxn, MANDOC_IDX)) {
568 			perror(MANDOC_IDX);
569 			unlink(MANDOC_DB);
570 			unlink(MANDOC_IDX);
571 			unlink(mdb.idxn);
572 			exit((int)MANDOCLEVEL_SYSERR);
573 		}
574 	}
575 
576 out:
577 	if (mdb.db)
578 		(*mdb.db->close)(mdb.db);
579 	if (mdb.idx)
580 		(*mdb.idx->close)(mdb.idx);
581 	if (hash)
582 		(*hash->close)(hash);
583 	if (mp)
584 		mparse_free(mp);
585 
586 	manpath_free(&dirs);
587 	ofile_free(of);
588 	free(buf.cp);
589 	free(dbuf.cp);
590 	free(recs.stack);
591 
592 	return(MANDOCLEVEL_OK);
593 
594 usage:
595 	fprintf(stderr,
596 		"usage: %s [-avvv] [-C file] | dir ... | -t file ...\n"
597 		"                        -d dir [file ...] | "
598 		"-u dir [file ...]\n",
599 		progname);
600 
601 	return((int)MANDOCLEVEL_BADARG);
602 }
603 
604 void
605 index_merge(const struct of *of, struct mparse *mp,
606 		struct buf *dbuf, struct buf *buf, DB *hash,
607 		struct mdb *mdb, struct recs *recs)
608 {
609 	recno_t		 rec;
610 	int		 ch, skip;
611 	DBT		 key, val;
612 	DB		*files;  /* temporary file name table */
613 	struct mdoc	*mdoc;
614 	struct man	*man;
615 	const char	*fn, *msec, *march, *mtitle;
616 	char		*p;
617 	uint64_t	 mask;
618 	size_t		 sv;
619 	unsigned	 seq;
620 	uint64_t	 vbuf[2];
621 	char		 type;
622 
623 	if (warnings) {
624 		files = NULL;
625 		hash_reset(&files);
626 	}
627 
628 	rec = 0;
629 	for (of = of->first; of; of = of->next) {
630 		fn = of->fname;
631 
632 		/*
633 		 * Try interpreting the file as mdoc(7) or man(7)
634 		 * source code, unless it is already known to be
635 		 * formatted.  Fall back to formatted mode.
636 		 */
637 
638 		mparse_reset(mp);
639 		mdoc = NULL;
640 		man = NULL;
641 
642 		if ((MANDOC_SRC & of->src_form ||
643 		    ! (MANDOC_FORM & of->src_form)) &&
644 		    MANDOCLEVEL_FATAL > mparse_readfd(mp, -1, fn))
645 			mparse_result(mp, &mdoc, &man);
646 
647 		if (NULL != mdoc) {
648 			msec = mdoc_meta(mdoc)->msec;
649 			march = mdoc_meta(mdoc)->arch;
650 			if (NULL == march)
651 				march = "";
652 			mtitle = mdoc_meta(mdoc)->title;
653 		} else if (NULL != man) {
654 			msec = man_meta(man)->msec;
655 			march = "";
656 			mtitle = man_meta(man)->title;
657 		} else {
658 			msec = of->sec;
659 			march = of->arch;
660 			mtitle = of->title;
661 		}
662 
663 		/*
664 		 * Check whether the manual section given in a file
665 		 * agrees with the directory where the file is located.
666 		 * Some manuals have suffixes like (3p) on their
667 		 * section number either inside the file or in the
668 		 * directory name, some are linked into more than one
669 		 * section, like encrypt(1) = makekey(8).  Do not skip
670 		 * manuals for such reasons.
671 		 */
672 
673 		skip = 0;
674 		assert(of->sec);
675 		assert(msec);
676 		if (warnings)
677 			if (strcasecmp(msec, of->sec))
678 				fprintf(stderr, "%s: "
679 					"section \"%s\" manual "
680 					"in \"%s\" directory\n",
681 					fn, msec, of->sec);
682 
683 		/*
684 		 * Manual page directories exist for each kernel
685 		 * architecture as returned by machine(1).
686 		 * However, many manuals only depend on the
687 		 * application architecture as returned by arch(1).
688 		 * For example, some (2/ARM) manuals are shared
689 		 * across the "armish" and "zaurus" kernel
690 		 * architectures.
691 		 * A few manuals are even shared across completely
692 		 * different architectures, for example fdformat(1)
693 		 * on amd64, i386, sparc, and sparc64.
694 		 * Thus, warn about architecture mismatches,
695 		 * but don't skip manuals for this reason.
696 		 */
697 
698 		assert(of->arch);
699 		assert(march);
700 		if (warnings)
701 			if (strcasecmp(march, of->arch))
702 				fprintf(stderr, "%s: "
703 					"architecture \"%s\" manual "
704 					"in \"%s\" directory\n",
705 					fn, march, of->arch);
706 
707 		/*
708 		 * By default, skip a file if the title given
709 		 * in the file disagrees with the file name.
710 		 * Do not warn, this happens for all MLINKs.
711 		 */
712 
713 		assert(of->title);
714 		assert(mtitle);
715 #if 0
716 		if (strcasecmp(mtitle, of->title))
717 			skip = 1;
718 #endif
719 
720 		/*
721 		 * Build a title string for the file.  If it matches
722 		 * the location of the file, remember the title as
723 		 * found; else, remember it as missing.
724 		 */
725 
726 		if (warnings) {
727 			buf->len = 0;
728 			buf_appendb(buf, mtitle, strlen(mtitle));
729 			buf_appendb(buf, "(", 1);
730 			buf_appendb(buf, msec, strlen(msec));
731 			if ('\0' != *march) {
732 				buf_appendb(buf, "/", 1);
733 				buf_appendb(buf, march, strlen(march));
734 			}
735 			buf_appendb(buf, ")", 2);
736 			for (p = buf->cp; '\0' != *p; p++)
737 				*p = tolower(*p);
738 			key.data = buf->cp;
739 			key.size = buf->len;
740 			val.data = NULL;
741 			val.size = 0;
742 			if (0 == skip)
743 				val.data = "";
744 			else {
745 				ch = (*files->get)(files, &key, &val, 0);
746 				if (ch < 0) {
747 					perror("hash");
748 					exit((int)MANDOCLEVEL_SYSERR);
749 				} else if (ch > 0) {
750 					val.data = (void *)fn;
751 					val.size = strlen(fn) + 1;
752 				} else
753 					val.data = NULL;
754 			}
755 			if (NULL != val.data &&
756 			    (*files->put)(files, &key, &val, 0) < 0) {
757 				perror("hash");
758 				exit((int)MANDOCLEVEL_SYSERR);
759 			}
760 		}
761 
762 		if (skip && !use_all)
763 			continue;
764 
765 		/*
766 		 * The index record value consists of a nil-terminated
767 		 * filename, a nil-terminated manual section, and a
768 		 * nil-terminated description.  Use the actual
769 		 * location of the file, such that the user can find
770 		 * it with man(1).  Since the description may not be
771 		 * set, we set a sentinel to see if we're going to
772 		 * write a nil byte in its place.
773 		 */
774 
775 		dbuf->len = 0;
776 		type = mdoc ? 'd' : (man ? 'a' : 'c');
777 		buf_appendb(dbuf, &type, 1);
778 		buf_appendb(dbuf, fn, strlen(fn) + 1);
779 		buf_appendb(dbuf, of->sec, strlen(of->sec) + 1);
780 		buf_appendb(dbuf, of->title, strlen(of->title) + 1);
781 		buf_appendb(dbuf, of->arch, strlen(of->arch) + 1);
782 
783 		sv = dbuf->len;
784 
785 		/*
786 		 * Collect keyword/mask pairs.
787 		 * Each pair will become a new btree node.
788 		 */
789 
790 		hash_reset(&hash);
791 		if (mdoc)
792 			pmdoc_node(hash, buf, dbuf,
793 				mdoc_node(mdoc), mdoc_meta(mdoc));
794 		else if (man)
795 			pman_node(hash, buf, dbuf, man_node(man));
796 		else
797 			pformatted(hash, buf, dbuf, of);
798 
799 		/* Test mode, do not access any database. */
800 
801 		if (NULL == mdb->db || NULL == mdb->idx)
802 			continue;
803 
804 		/*
805 		 * Make sure the file name is always registered
806 		 * as an .Nm search key.
807 		 */
808 		buf->len = 0;
809 		buf_append(buf, of->title);
810 		hash_put(hash, buf, TYPE_Nm);
811 
812 		/*
813 		 * Reclaim an empty index record, if available.
814 		 * Use its record number for all new btree nodes.
815 		 */
816 
817 		if (recs->cur > 0) {
818 			recs->cur--;
819 			rec = recs->stack[(int)recs->cur];
820 		} else if (recs->last > 0) {
821 			rec = recs->last;
822 			recs->last = 0;
823 		} else
824 			rec++;
825 		vbuf[1] = htobe64(rec);
826 
827 		/*
828 		 * Copy from the in-memory hashtable of pending
829 		 * keyword/mask pairs into the database.
830 		 */
831 
832 		seq = R_FIRST;
833 		while (0 == (ch = (*hash->seq)(hash, &key, &val, seq))) {
834 			seq = R_NEXT;
835 			assert(sizeof(uint64_t) == val.size);
836 			memcpy(&mask, val.data, val.size);
837 			vbuf[0] = htobe64(mask);
838 			val.size = sizeof(vbuf);
839 			val.data = &vbuf;
840 			dbt_put(mdb->db, mdb->dbn, &key, &val);
841 		}
842 		if (ch < 0) {
843 			perror("hash");
844 			unlink(mdb->dbn);
845 			unlink(mdb->idxn);
846 			exit((int)MANDOCLEVEL_SYSERR);
847 		}
848 
849 		/*
850 		 * Apply to the index.  If we haven't had a description
851 		 * set, put an empty one in now.
852 		 */
853 
854 		if (dbuf->len == sv)
855 			buf_appendb(dbuf, "", 1);
856 
857 		key.data = &rec;
858 		key.size = sizeof(recno_t);
859 
860 		val.data = dbuf->cp;
861 		val.size = dbuf->len;
862 
863 		if (verb)
864 			printf("%s: adding to index\n", fn);
865 
866 		dbt_put(mdb->idx, mdb->idxn, &key, &val);
867 	}
868 
869 	/*
870 	 * Iterate the remembered file titles and check that
871 	 * all files can be found by their main title.
872 	 */
873 
874 	if (warnings) {
875 		seq = R_FIRST;
876 		while (0 == (*files->seq)(files, &key, &val, seq)) {
877 			seq = R_NEXT;
878 			if (val.size)
879 				fprintf(stderr, "%s: probably "
880 				    "unreachable, title is %s\n",
881 				    (char *)val.data, (char *)key.data);
882 		}
883 		(*files->close)(files);
884 	}
885 }
886 
887 /*
888  * Scan through all entries in the index file `idx' and prune those
889  * entries in `ofile'.
890  * Pruning consists of removing from `db', then invalidating the entry
891  * in `idx' (zeroing its value size).
892  */
893 static void
894 index_prune(const struct of *ofile, struct mdb *mdb, struct recs *recs)
895 {
896 	const struct of	*of;
897 	const char	*fn;
898 	uint64_t	 vbuf[2];
899 	unsigned	 seq, sseq;
900 	DBT		 key, val;
901 	int		 ch;
902 
903 	recs->cur = 0;
904 	seq = R_FIRST;
905 	while (0 == (ch = (*mdb->idx->seq)(mdb->idx, &key, &val, seq))) {
906 		seq = R_NEXT;
907 		assert(sizeof(recno_t) == key.size);
908 		memcpy(&recs->last, key.data, key.size);
909 
910 		/* Deleted records are zero-sized.  Skip them. */
911 
912 		if (0 == val.size)
913 			goto cont;
914 
915 		/*
916 		 * Make sure we're sane.
917 		 * Read past our mdoc/man/cat type to the next string,
918 		 * then make sure it's bounded by a NUL.
919 		 * Failing any of these, we go into our error handler.
920 		 */
921 
922 		fn = (char *)val.data + 1;
923 		if (NULL == memchr(fn, '\0', val.size - 1))
924 			break;
925 
926 		/*
927 		 * Search for the file in those we care about.
928 		 * XXX: build this into a tree.  Too slow.
929 		 */
930 
931 		for (of = ofile->first; of; of = of->next)
932 			if (0 == strcmp(fn, of->fname))
933 				break;
934 
935 		if (NULL == of)
936 			continue;
937 
938 		/*
939 		 * Search through the keyword database, throwing out all
940 		 * references to our file.
941 		 */
942 
943 		sseq = R_FIRST;
944 		while (0 == (ch = (*mdb->db->seq)(mdb->db,
945 					&key, &val, sseq))) {
946 			sseq = R_NEXT;
947 			if (sizeof(vbuf) != val.size)
948 				break;
949 
950 			memcpy(vbuf, val.data, val.size);
951 			if (recs->last != betoh64(vbuf[1]))
952 				continue;
953 
954 			if ((ch = (*mdb->db->del)(mdb->db,
955 					&key, R_CURSOR)) < 0)
956 				break;
957 		}
958 
959 		if (ch < 0) {
960 			perror(mdb->dbn);
961 			exit((int)MANDOCLEVEL_SYSERR);
962 		} else if (1 != ch) {
963 			fprintf(stderr, "%s: corrupt database\n",
964 					mdb->dbn);
965 			exit((int)MANDOCLEVEL_SYSERR);
966 		}
967 
968 		if (verb)
969 			printf("%s: deleting from index\n", fn);
970 
971 		val.size = 0;
972 		ch = (*mdb->idx->put)(mdb->idx, &key, &val, R_CURSOR);
973 
974 		if (ch < 0)
975 			break;
976 cont:
977 		if (recs->cur >= recs->size) {
978 			recs->size += MANDOC_SLOP;
979 			recs->stack = mandoc_realloc(recs->stack,
980 					recs->size * sizeof(recno_t));
981 		}
982 
983 		recs->stack[(int)recs->cur] = recs->last;
984 		recs->cur++;
985 	}
986 
987 	if (ch < 0) {
988 		perror(mdb->idxn);
989 		exit((int)MANDOCLEVEL_SYSERR);
990 	} else if (1 != ch) {
991 		fprintf(stderr, "%s: corrupt index\n", mdb->idxn);
992 		exit((int)MANDOCLEVEL_SYSERR);
993 	}
994 
995 	recs->last++;
996 }
997 
998 /*
999  * Grow the buffer (if necessary) and copy in a binary string.
1000  */
1001 static void
1002 buf_appendb(struct buf *buf, const void *cp, size_t sz)
1003 {
1004 
1005 	/* Overshoot by MANDOC_BUFSZ. */
1006 
1007 	while (buf->len + sz >= buf->size) {
1008 		buf->size = buf->len + sz + MANDOC_BUFSZ;
1009 		buf->cp = mandoc_realloc(buf->cp, buf->size);
1010 	}
1011 
1012 	memcpy(buf->cp + (int)buf->len, cp, sz);
1013 	buf->len += sz;
1014 }
1015 
1016 /*
1017  * Append a nil-terminated string to the buffer.
1018  * This can be invoked multiple times.
1019  * The buffer string will be nil-terminated.
1020  * If invoked multiple times, a space is put between strings.
1021  */
1022 static void
1023 buf_append(struct buf *buf, const char *cp)
1024 {
1025 	size_t		 sz;
1026 
1027 	if (0 == (sz = strlen(cp)))
1028 		return;
1029 
1030 	if (buf->len)
1031 		buf->cp[(int)buf->len - 1] = ' ';
1032 
1033 	buf_appendb(buf, cp, sz + 1);
1034 }
1035 
1036 /*
1037  * Recursively add all text from a given node.
1038  * This is optimised for general mdoc nodes in this context, which do
1039  * not consist of subexpressions and having a recursive call for n->next
1040  * would be wasteful.
1041  * The "f" variable should be 0 unless called from pmdoc_Nd for the
1042  * description buffer, which does not start at the beginning of the
1043  * buffer.
1044  */
1045 static void
1046 buf_appendmdoc(struct buf *buf, const struct mdoc_node *n, int f)
1047 {
1048 
1049 	for ( ; n; n = n->next) {
1050 		if (n->child)
1051 			buf_appendmdoc(buf, n->child, f);
1052 
1053 		if (MDOC_TEXT == n->type && f) {
1054 			f = 0;
1055 			buf_appendb(buf, n->string,
1056 					strlen(n->string) + 1);
1057 		} else if (MDOC_TEXT == n->type)
1058 			buf_append(buf, n->string);
1059 
1060 	}
1061 }
1062 
1063 static void
1064 hash_reset(DB **db)
1065 {
1066 	DB		*hash;
1067 
1068 	if (NULL != (hash = *db))
1069 		(*hash->close)(hash);
1070 
1071 	*db = dbopen(NULL, O_CREAT|O_RDWR, 0644, DB_HASH, NULL);
1072 	if (NULL == *db) {
1073 		perror("hash");
1074 		exit((int)MANDOCLEVEL_SYSERR);
1075 	}
1076 }
1077 
1078 /* ARGSUSED */
1079 static int
1080 pmdoc_head(MDOC_ARGS)
1081 {
1082 
1083 	return(MDOC_HEAD == n->type);
1084 }
1085 
1086 /* ARGSUSED */
1087 static int
1088 pmdoc_body(MDOC_ARGS)
1089 {
1090 
1091 	return(MDOC_BODY == n->type);
1092 }
1093 
1094 /* ARGSUSED */
1095 static int
1096 pmdoc_Fd(MDOC_ARGS)
1097 {
1098 	const char	*start, *end;
1099 	size_t		 sz;
1100 
1101 	if (SEC_SYNOPSIS != n->sec)
1102 		return(0);
1103 	if (NULL == (n = n->child) || MDOC_TEXT != n->type)
1104 		return(0);
1105 
1106 	/*
1107 	 * Only consider those `Fd' macro fields that begin with an
1108 	 * "inclusion" token (versus, e.g., #define).
1109 	 */
1110 	if (strcmp("#include", n->string))
1111 		return(0);
1112 
1113 	if (NULL == (n = n->next) || MDOC_TEXT != n->type)
1114 		return(0);
1115 
1116 	/*
1117 	 * Strip away the enclosing angle brackets and make sure we're
1118 	 * not zero-length.
1119 	 */
1120 
1121 	start = n->string;
1122 	if ('<' == *start || '"' == *start)
1123 		start++;
1124 
1125 	if (0 == (sz = strlen(start)))
1126 		return(0);
1127 
1128 	end = &start[(int)sz - 1];
1129 	if ('>' == *end || '"' == *end)
1130 		end--;
1131 
1132 	assert(end >= start);
1133 
1134 	buf_appendb(buf, start, (size_t)(end - start + 1));
1135 	buf_appendb(buf, "", 1);
1136 	return(1);
1137 }
1138 
1139 /* ARGSUSED */
1140 static int
1141 pmdoc_In(MDOC_ARGS)
1142 {
1143 
1144 	if (NULL == n->child || MDOC_TEXT != n->child->type)
1145 		return(0);
1146 
1147 	buf_append(buf, n->child->string);
1148 	return(1);
1149 }
1150 
1151 /* ARGSUSED */
1152 static int
1153 pmdoc_Fn(MDOC_ARGS)
1154 {
1155 	struct mdoc_node *nn;
1156 	const char	*cp;
1157 
1158 	nn = n->child;
1159 
1160 	if (NULL == nn || MDOC_TEXT != nn->type)
1161 		return(0);
1162 
1163 	/* .Fn "struct type *name" "char *arg" */
1164 
1165 	cp = strrchr(nn->string, ' ');
1166 	if (NULL == cp)
1167 		cp = nn->string;
1168 
1169 	/* Strip away pointer symbol. */
1170 
1171 	while ('*' == *cp)
1172 		cp++;
1173 
1174 	/* Store the function name. */
1175 
1176 	buf_append(buf, cp);
1177 	hash_put(hash, buf, TYPE_Fn);
1178 
1179 	/* Store the function type. */
1180 
1181 	if (nn->string < cp) {
1182 		buf->len = 0;
1183 		buf_appendb(buf, nn->string, cp - nn->string);
1184 		buf_appendb(buf, "", 1);
1185 		hash_put(hash, buf, TYPE_Ft);
1186 	}
1187 
1188 	/* Store the arguments. */
1189 
1190 	for (nn = nn->next; nn; nn = nn->next) {
1191 		if (MDOC_TEXT != nn->type)
1192 			continue;
1193 		buf->len = 0;
1194 		buf_append(buf, nn->string);
1195 		hash_put(hash, buf, TYPE_Fa);
1196 	}
1197 
1198 	return(0);
1199 }
1200 
1201 /* ARGSUSED */
1202 static int
1203 pmdoc_St(MDOC_ARGS)
1204 {
1205 
1206 	if (NULL == n->child || MDOC_TEXT != n->child->type)
1207 		return(0);
1208 
1209 	buf_append(buf, n->child->string);
1210 	return(1);
1211 }
1212 
1213 /* ARGSUSED */
1214 static int
1215 pmdoc_Xr(MDOC_ARGS)
1216 {
1217 
1218 	if (NULL == (n = n->child))
1219 		return(0);
1220 
1221 	buf_appendb(buf, n->string, strlen(n->string));
1222 
1223 	if (NULL != (n = n->next)) {
1224 		buf_appendb(buf, ".", 1);
1225 		buf_appendb(buf, n->string, strlen(n->string) + 1);
1226 	} else
1227 		buf_appendb(buf, ".", 2);
1228 
1229 	return(1);
1230 }
1231 
1232 /* ARGSUSED */
1233 static int
1234 pmdoc_Nd(MDOC_ARGS)
1235 {
1236 
1237 	if (MDOC_BODY != n->type)
1238 		return(0);
1239 
1240 	buf_appendmdoc(dbuf, n->child, 1);
1241 	return(1);
1242 }
1243 
1244 /* ARGSUSED */
1245 static int
1246 pmdoc_Nm(MDOC_ARGS)
1247 {
1248 
1249 	if (SEC_NAME == n->sec)
1250 		return(1);
1251 	else if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type)
1252 		return(0);
1253 
1254 	if (NULL == n->child)
1255 		buf_append(buf, m->name);
1256 
1257 	return(1);
1258 }
1259 
1260 /* ARGSUSED */
1261 static int
1262 pmdoc_Sh(MDOC_ARGS)
1263 {
1264 
1265 	return(SEC_CUSTOM == n->sec && MDOC_HEAD == n->type);
1266 }
1267 
1268 static void
1269 hash_put(DB *db, const struct buf *buf, uint64_t mask)
1270 {
1271 	uint64_t	 oldmask;
1272 	DBT		 key, val;
1273 	int		 rc;
1274 
1275 	if (buf->len < 2)
1276 		return;
1277 
1278 	key.data = buf->cp;
1279 	key.size = buf->len;
1280 
1281 	if ((rc = (*db->get)(db, &key, &val, 0)) < 0) {
1282 		perror("hash");
1283 		exit((int)MANDOCLEVEL_SYSERR);
1284 	} else if (0 == rc) {
1285 		assert(sizeof(uint64_t) == val.size);
1286 		memcpy(&oldmask, val.data, val.size);
1287 		mask |= oldmask;
1288 	}
1289 
1290 	val.data = &mask;
1291 	val.size = sizeof(uint64_t);
1292 
1293 	if ((rc = (*db->put)(db, &key, &val, 0)) < 0) {
1294 		perror("hash");
1295 		exit((int)MANDOCLEVEL_SYSERR);
1296 	}
1297 }
1298 
1299 static void
1300 dbt_put(DB *db, const char *dbn, DBT *key, DBT *val)
1301 {
1302 
1303 	assert(key->size);
1304 	assert(val->size);
1305 
1306 	if (0 == (*db->put)(db, key, val, 0))
1307 		return;
1308 
1309 	perror(dbn);
1310 	exit((int)MANDOCLEVEL_SYSERR);
1311 	/* NOTREACHED */
1312 }
1313 
1314 /*
1315  * Call out to per-macro handlers after clearing the persistent database
1316  * key.  If the macro sets the database key, flush it to the database.
1317  */
1318 static void
1319 pmdoc_node(MDOC_ARGS)
1320 {
1321 
1322 	if (NULL == n)
1323 		return;
1324 
1325 	switch (n->type) {
1326 	case (MDOC_HEAD):
1327 		/* FALLTHROUGH */
1328 	case (MDOC_BODY):
1329 		/* FALLTHROUGH */
1330 	case (MDOC_TAIL):
1331 		/* FALLTHROUGH */
1332 	case (MDOC_BLOCK):
1333 		/* FALLTHROUGH */
1334 	case (MDOC_ELEM):
1335 		buf->len = 0;
1336 
1337 		/*
1338 		 * Both NULL handlers and handlers returning true
1339 		 * request using the data.  Only skip the element
1340 		 * when the handler returns false.
1341 		 */
1342 
1343 		if (NULL != mdocs[n->tok].fp &&
1344 		    0 == (*mdocs[n->tok].fp)(hash, buf, dbuf, n, m))
1345 			break;
1346 
1347 		/*
1348 		 * For many macros, use the text from all children.
1349 		 * Set zero flags for macros not needing this.
1350 		 * In that case, the handler must fill the buffer.
1351 		 */
1352 
1353 		if (MDOCF_CHILD & mdocs[n->tok].flags)
1354 			buf_appendmdoc(buf, n->child, 0);
1355 
1356 		/*
1357 		 * Cover the most common case:
1358 		 * Automatically stage one string per element.
1359 		 * Set a zero mask for macros not needing this.
1360 		 * Additional staging can be done in the handler.
1361 		 */
1362 
1363 		if (mdocs[n->tok].mask)
1364 			hash_put(hash, buf, mdocs[n->tok].mask);
1365 		break;
1366 	default:
1367 		break;
1368 	}
1369 
1370 	pmdoc_node(hash, buf, dbuf, n->child, m);
1371 	pmdoc_node(hash, buf, dbuf, n->next, m);
1372 }
1373 
1374 static int
1375 pman_node(MAN_ARGS)
1376 {
1377 	const struct man_node *head, *body;
1378 	char		*start, *sv, *title;
1379 	size_t		 sz, titlesz;
1380 
1381 	if (NULL == n)
1382 		return(0);
1383 
1384 	/*
1385 	 * We're only searching for one thing: the first text child in
1386 	 * the BODY of a NAME section.  Since we don't keep track of
1387 	 * sections in -man, run some hoops to find out whether we're in
1388 	 * the correct section or not.
1389 	 */
1390 
1391 	if (MAN_BODY == n->type && MAN_SH == n->tok) {
1392 		body = n;
1393 		assert(body->parent);
1394 		if (NULL != (head = body->parent->head) &&
1395 				1 == head->nchild &&
1396 				NULL != (head = (head->child)) &&
1397 				MAN_TEXT == head->type &&
1398 				0 == strcmp(head->string, "NAME") &&
1399 				NULL != (body = body->child) &&
1400 				MAN_TEXT == body->type) {
1401 
1402 			title = NULL;
1403 			titlesz = 0;
1404 			/*
1405 			 * Suck the entire NAME section into memory.
1406 			 * Yes, we might run away.
1407 			 * But too many manuals have big, spread-out
1408 			 * NAME sections over many lines.
1409 			 */
1410 			for ( ; NULL != body; body = body->next) {
1411 				if (MAN_TEXT != body->type)
1412 					break;
1413 				if (0 == (sz = strlen(body->string)))
1414 					continue;
1415 				title = mandoc_realloc
1416 					(title, titlesz + sz + 1);
1417 				memcpy(title + titlesz, body->string, sz);
1418 				titlesz += sz + 1;
1419 				title[(int)titlesz - 1] = ' ';
1420 			}
1421 			if (NULL == title)
1422 				return(0);
1423 
1424 			title = mandoc_realloc(title, titlesz + 1);
1425 			title[(int)titlesz] = '\0';
1426 
1427 			/* Skip leading space.  */
1428 
1429 			sv = title;
1430 			while (isspace((unsigned char)*sv))
1431 				sv++;
1432 
1433 			if (0 == (sz = strlen(sv))) {
1434 				free(title);
1435 				return(0);
1436 			}
1437 
1438 			/* Erase trailing space. */
1439 
1440 			start = &sv[sz - 1];
1441 			while (start > sv && isspace((unsigned char)*start))
1442 				*start-- = '\0';
1443 
1444 			if (start == sv) {
1445 				free(title);
1446 				return(0);
1447 			}
1448 
1449 			start = sv;
1450 
1451 			/*
1452 			 * Go through a special heuristic dance here.
1453 			 * This is why -man manuals are great!
1454 			 * (I'm being sarcastic: my eyes are bleeding.)
1455 			 * Conventionally, one or more manual names are
1456 			 * comma-specified prior to a whitespace, then a
1457 			 * dash, then a description.  Try to puzzle out
1458 			 * the name parts here.
1459 			 */
1460 
1461 			for ( ;; ) {
1462 				sz = strcspn(start, " ,");
1463 				if ('\0' == start[(int)sz])
1464 					break;
1465 
1466 				buf->len = 0;
1467 				buf_appendb(buf, start, sz);
1468 				buf_appendb(buf, "", 1);
1469 
1470 				hash_put(hash, buf, TYPE_Nm);
1471 
1472 				if (' ' == start[(int)sz]) {
1473 					start += (int)sz + 1;
1474 					break;
1475 				}
1476 
1477 				assert(',' == start[(int)sz]);
1478 				start += (int)sz + 1;
1479 				while (' ' == *start)
1480 					start++;
1481 			}
1482 
1483 			buf->len = 0;
1484 
1485 			if (sv == start) {
1486 				buf_append(buf, start);
1487 				free(title);
1488 				return(1);
1489 			}
1490 
1491 			while (isspace((unsigned char)*start))
1492 				start++;
1493 
1494 			if (0 == strncmp(start, "-", 1))
1495 				start += 1;
1496 			else if (0 == strncmp(start, "\\-\\-", 4))
1497 				start += 4;
1498 			else if (0 == strncmp(start, "\\-", 2))
1499 				start += 2;
1500 			else if (0 == strncmp(start, "\\(en", 4))
1501 				start += 4;
1502 			else if (0 == strncmp(start, "\\(em", 4))
1503 				start += 4;
1504 
1505 			while (' ' == *start)
1506 				start++;
1507 
1508 			sz = strlen(start) + 1;
1509 			buf_appendb(dbuf, start, sz);
1510 			buf_appendb(buf, start, sz);
1511 
1512 			hash_put(hash, buf, TYPE_Nd);
1513 			free(title);
1514 		}
1515 	}
1516 
1517 	for (n = n->child; n; n = n->next)
1518 		if (pman_node(hash, buf, dbuf, n))
1519 			return(1);
1520 
1521 	return(0);
1522 }
1523 
1524 /*
1525  * Parse a formatted manual page.
1526  * By necessity, this involves rather crude guesswork.
1527  */
1528 static void
1529 pformatted(DB *hash, struct buf *buf,
1530 		struct buf *dbuf, const struct of *of)
1531 {
1532 	FILE		*stream;
1533 	char		*line, *p, *title;
1534 	size_t		 len, plen, titlesz;
1535 
1536 	if (NULL == (stream = fopen(of->fname, "r"))) {
1537 		if (warnings)
1538 			perror(of->fname);
1539 		return;
1540 	}
1541 
1542 	/*
1543 	 * Always use the title derived from the filename up front,
1544 	 * do not even try to find it in the file.  This also makes
1545 	 * sure we don't end up with an orphan index record, even if
1546 	 * the file content turns out to be completely unintelligible.
1547 	 */
1548 
1549 	buf->len = 0;
1550 	buf_append(buf, of->title);
1551 	hash_put(hash, buf, TYPE_Nm);
1552 
1553 	/* Skip to first blank line. */
1554 
1555 	while (NULL != (line = fgetln(stream, &len)))
1556 		if ('\n' == *line)
1557 			break;
1558 
1559 	/*
1560 	 * Assume the first line that is not indented
1561 	 * is the first section header.  Skip to it.
1562 	 */
1563 
1564 	while (NULL != (line = fgetln(stream, &len)))
1565 		if ('\n' != *line && ' ' != *line)
1566 			break;
1567 
1568 	/*
1569 	 * Read up until the next section into a buffer.
1570 	 * Strip the leading and trailing newline from each read line,
1571 	 * appending a trailing space.
1572 	 * Ignore empty (whitespace-only) lines.
1573 	 */
1574 
1575 	titlesz = 0;
1576 	title = NULL;
1577 
1578 	while (NULL != (line = fgetln(stream, &len))) {
1579 		if (' ' != *line || '\n' != line[(int)len - 1])
1580 			break;
1581 		while (len > 0 && isspace((unsigned char)*line)) {
1582 			line++;
1583 			len--;
1584 		}
1585 		if (1 == len)
1586 			continue;
1587 		title = mandoc_realloc(title, titlesz + len);
1588 		memcpy(title + titlesz, line, len);
1589 		titlesz += len;
1590 		title[(int)titlesz - 1] = ' ';
1591 	}
1592 
1593 
1594 	/*
1595 	 * If no page content can be found, or the input line
1596 	 * is already the next section header, or there is no
1597 	 * trailing newline, reuse the page title as the page
1598 	 * description.
1599 	 */
1600 
1601 	if (NULL == title || '\0' == *title) {
1602 		if (warnings)
1603 			fprintf(stderr, "%s: cannot find NAME section\n",
1604 					of->fname);
1605 		buf_appendb(dbuf, buf->cp, buf->size);
1606 		hash_put(hash, buf, TYPE_Nd);
1607 		fclose(stream);
1608 		free(title);
1609 		return;
1610 	}
1611 
1612 	title = mandoc_realloc(title, titlesz + 1);
1613 	title[(int)titlesz] = '\0';
1614 
1615 	/*
1616 	 * Skip to the first dash.
1617 	 * Use the remaining line as the description (no more than 70
1618 	 * bytes).
1619 	 */
1620 
1621 	if (NULL != (p = strstr(title, "- "))) {
1622 		for (p += 2; ' ' == *p || '\b' == *p; p++)
1623 			/* Skip to next word. */ ;
1624 	} else {
1625 		if (warnings)
1626 			fprintf(stderr, "%s: no dash in title line\n",
1627 					of->fname);
1628 		p = title;
1629 	}
1630 
1631 	plen = strlen(p);
1632 
1633 	/* Strip backspace-encoding from line. */
1634 
1635 	while (NULL != (line = memchr(p, '\b', plen))) {
1636 		len = line - p;
1637 		if (0 == len) {
1638 			memmove(line, line + 1, plen--);
1639 			continue;
1640 		}
1641 		memmove(line - 1, line + 1, plen - len);
1642 		plen -= 2;
1643 	}
1644 
1645 	buf_appendb(dbuf, p, plen + 1);
1646 	buf->len = 0;
1647 	buf_appendb(buf, p, plen + 1);
1648 	hash_put(hash, buf, TYPE_Nd);
1649 	fclose(stream);
1650 	free(title);
1651 }
1652 
1653 static void
1654 ofile_argbuild(int argc, char *argv[], struct of **of,
1655 		const char *basedir)
1656 {
1657 	char		 buf[PATH_MAX];
1658 	char		 pbuf[PATH_MAX];
1659 	const char	*sec, *arch, *title;
1660 	char		*relpath, *p;
1661 	int		 i, src_form;
1662 	struct of	*nof;
1663 
1664 	for (i = 0; i < argc; i++) {
1665 		if (NULL == (relpath = realpath(argv[i], pbuf))) {
1666 			perror(argv[i]);
1667 			continue;
1668 		}
1669 		if (NULL != basedir) {
1670 			if (strstr(pbuf, basedir) != pbuf) {
1671 				fprintf(stderr, "%s: file outside "
1672 				    "base directory %s\n",
1673 				    pbuf, basedir);
1674 				continue;
1675 			}
1676 			relpath = pbuf + strlen(basedir);
1677 		}
1678 
1679 		/*
1680 		 * Try to infer the manual section, architecture and
1681 		 * page title from the path, assuming it looks like
1682 		 *   man*[/<arch>]/<title>.<section>   or
1683 		 *   cat<section>[/<arch>]/<title>.0
1684 		 */
1685 
1686 		if (strlcpy(buf, relpath, sizeof(buf)) >= sizeof(buf)) {
1687 			fprintf(stderr, "%s: path too long\n", relpath);
1688 			continue;
1689 		}
1690 		sec = arch = title = "";
1691 		src_form = 0;
1692 		p = strrchr(buf, '\0');
1693 		while (p-- > buf) {
1694 			if ('\0' == *sec && '.' == *p) {
1695 				sec = p + 1;
1696 				*p = '\0';
1697 				if ('0' == *sec)
1698 					src_form |= MANDOC_FORM;
1699 				else if ('1' <= *sec && '9' >= *sec)
1700 					src_form |= MANDOC_SRC;
1701 				continue;
1702 			}
1703 			if ('/' != *p)
1704 				continue;
1705 			if ('\0' == *title) {
1706 				title = p + 1;
1707 				*p = '\0';
1708 				continue;
1709 			}
1710 			if (0 == strncmp("man", p + 1, 3))
1711 				src_form |= MANDOC_SRC;
1712 			else if (0 == strncmp("cat", p + 1, 3))
1713 				src_form |= MANDOC_FORM;
1714 			else
1715 				arch = p + 1;
1716 			break;
1717 		}
1718 		if ('\0' == *title) {
1719 			if (warnings)
1720 				fprintf(stderr,
1721 				    "%s: cannot deduce title "
1722 				    "from filename\n",
1723 				    relpath);
1724 			title = buf;
1725 		}
1726 
1727 		/*
1728 		 * Build the file structure.
1729 		 */
1730 
1731 		nof = mandoc_calloc(1, sizeof(struct of));
1732 		nof->fname = mandoc_strdup(relpath);
1733 		nof->sec = mandoc_strdup(sec);
1734 		nof->arch = mandoc_strdup(arch);
1735 		nof->title = mandoc_strdup(title);
1736 		nof->src_form = src_form;
1737 
1738 		/*
1739 		 * Add the structure to the list.
1740 		 */
1741 
1742 		if (NULL == *of) {
1743 			*of = nof;
1744 			(*of)->first = nof;
1745 		} else {
1746 			nof->first = (*of)->first;
1747 			(*of)->next = nof;
1748 			*of = nof;
1749 		}
1750 	}
1751 }
1752 
1753 /*
1754  * Recursively build up a list of files to parse.
1755  * We use this instead of ftw() and so on because I don't want global
1756  * variables hanging around.
1757  * This ignores the mandoc.db and mandoc.index files, but assumes that
1758  * everything else is a manual.
1759  * Pass in a pointer to a NULL structure for the first invocation.
1760  */
1761 static void
1762 ofile_dirbuild(const char *dir, const char* psec, const char *parch,
1763 		int p_src_form, struct of **of)
1764 {
1765 	char		 buf[PATH_MAX];
1766 #if defined(__sun)
1767 	struct stat	 sb;
1768 #endif
1769 	size_t		 sz;
1770 	DIR		*d;
1771 	const char	*fn, *sec, *arch;
1772 	char		*p, *q, *suffix;
1773 	struct of	*nof;
1774 	struct dirent	*dp;
1775 	int		 src_form;
1776 
1777 	if (NULL == (d = opendir(dir))) {
1778 		if (warnings)
1779 			perror(dir);
1780 		return;
1781 	}
1782 
1783 	while (NULL != (dp = readdir(d))) {
1784 		fn = dp->d_name;
1785 
1786 		if ('.' == *fn)
1787 			continue;
1788 
1789 		src_form = p_src_form;
1790 
1791 #if defined(__sun)
1792 		stat(dp->d_name, &sb);
1793 		if (S_IFDIR & sb.st_mode) {
1794 #else
1795 		if (DT_DIR == dp->d_type) {
1796 #endif
1797 			sec = psec;
1798 			arch = parch;
1799 
1800 			/*
1801 			 * By default, only use directories called:
1802 			 *   man<section>/[<arch>/]   or
1803 			 *   cat<section>/[<arch>/]
1804 			 */
1805 
1806 			if ('\0' == *sec) {
1807 				if(0 == strncmp("man", fn, 3)) {
1808 					src_form |= MANDOC_SRC;
1809 					sec = fn + 3;
1810 				} else if (0 == strncmp("cat", fn, 3)) {
1811 					src_form |= MANDOC_FORM;
1812 					sec = fn + 3;
1813 				} else {
1814 					if (warnings) fprintf(stderr,
1815 					    "%s/%s: bad section\n",
1816 					    dir, fn);
1817 					if (use_all)
1818 						sec = fn;
1819 					else
1820 						continue;
1821 				}
1822 			} else if ('\0' == *arch) {
1823 				if (NULL != strchr(fn, '.')) {
1824 					if (warnings) fprintf(stderr,
1825 					    "%s/%s: bad architecture\n",
1826 					    dir, fn);
1827 					if (0 == use_all)
1828 						continue;
1829 				}
1830 				arch = fn;
1831 			} else {
1832 				if (warnings) fprintf(stderr, "%s/%s: "
1833 				    "excessive subdirectory\n", dir, fn);
1834 				if (0 == use_all)
1835 					continue;
1836 			}
1837 
1838 			buf[0] = '\0';
1839 			strlcat(buf, dir, PATH_MAX);
1840 			strlcat(buf, "/", PATH_MAX);
1841 			sz = strlcat(buf, fn, PATH_MAX);
1842 
1843 			if (PATH_MAX <= sz) {
1844 				if (warnings) fprintf(stderr, "%s/%s: "
1845 				    "path too long\n", dir, fn);
1846 				continue;
1847 			}
1848 
1849 			ofile_dirbuild(buf, sec, arch, src_form, of);
1850 			continue;
1851 		}
1852 
1853 #if defined(__sun)
1854 		if (0 == S_IFREG & sb.st_mode) {
1855 #else
1856 		if (DT_REG != dp->d_type) {
1857 #endif
1858 			if (warnings)
1859 				fprintf(stderr,
1860 				    "%s/%s: not a regular file\n",
1861 				    dir, fn);
1862 			continue;
1863 		}
1864 		if (!strcmp(MANDOC_DB, fn) || !strcmp(MANDOC_IDX, fn))
1865 			continue;
1866 		if ('\0' == *psec) {
1867 			if (warnings)
1868 				fprintf(stderr,
1869 				    "%s/%s: file outside section\n",
1870 				    dir, fn);
1871 			if (0 == use_all)
1872 				continue;
1873 		}
1874 
1875 		/*
1876 		 * By default, skip files where the file name suffix
1877 		 * does not agree with the section directory
1878 		 * they are located in.
1879 		 */
1880 
1881 		suffix = strrchr(fn, '.');
1882 		if (NULL == suffix) {
1883 			if (warnings)
1884 				fprintf(stderr,
1885 				    "%s/%s: no filename suffix\n",
1886 				    dir, fn);
1887 			if (0 == use_all)
1888 				continue;
1889 		} else if ((MANDOC_SRC & src_form &&
1890 				strcmp(suffix + 1, psec)) ||
1891 			    (MANDOC_FORM & src_form &&
1892 				strcmp(suffix + 1, "0"))) {
1893 			if (warnings)
1894 				fprintf(stderr,
1895 				    "%s/%s: wrong filename suffix\n",
1896 				    dir, fn);
1897 			if (0 == use_all)
1898 				continue;
1899 			if ('0' == suffix[1])
1900 				src_form |= MANDOC_FORM;
1901 			else if ('1' <= suffix[1] && '9' >= suffix[1])
1902 				src_form |= MANDOC_SRC;
1903 		}
1904 
1905 		/*
1906 		 * Skip formatted manuals if a source version is
1907 		 * available.  Ignore the age: it is very unlikely
1908 		 * that people install newer formatted base manuals
1909 		 * when they used to have source manuals before,
1910 		 * and in ports, old manuals get removed on update.
1911 		 */
1912 		if (0 == use_all && MANDOC_FORM & src_form &&
1913 				'\0' != *psec) {
1914 			buf[0] = '\0';
1915 			strlcat(buf, dir, PATH_MAX);
1916 			p = strrchr(buf, '/');
1917 			if ('\0' != *parch && NULL != p)
1918 				for (p--; p > buf; p--)
1919 					if ('/' == *p)
1920 						break;
1921 			if (NULL == p)
1922 				p = buf;
1923 			else
1924 				p++;
1925 			if (0 == strncmp("cat", p, 3))
1926 				memcpy(p, "man", 3);
1927 			strlcat(buf, "/", PATH_MAX);
1928 			sz = strlcat(buf, fn, PATH_MAX);
1929 			if (sz >= PATH_MAX) {
1930 				if (warnings) fprintf(stderr,
1931 				    "%s/%s: path too long\n",
1932 				    dir, fn);
1933 				continue;
1934 			}
1935 			q = strrchr(buf, '.');
1936 			if (NULL != q && p < q++) {
1937 				*q = '\0';
1938 				sz = strlcat(buf, psec, PATH_MAX);
1939 				if (sz >= PATH_MAX) {
1940 					if (warnings) fprintf(stderr,
1941 					    "%s/%s: path too long\n",
1942 					    dir, fn);
1943 					continue;
1944 				}
1945 				if (0 == access(buf, R_OK))
1946 					continue;
1947 			}
1948 		}
1949 
1950 		buf[0] = '\0';
1951 		assert('.' == dir[0]);
1952 		if ('/' == dir[1]) {
1953 			strlcat(buf, dir + 2, PATH_MAX);
1954 			strlcat(buf, "/", PATH_MAX);
1955 		}
1956 		sz = strlcat(buf, fn, PATH_MAX);
1957 		if (sz >= PATH_MAX) {
1958 			if (warnings) fprintf(stderr,
1959 			    "%s/%s: path too long\n", dir, fn);
1960 			continue;
1961 		}
1962 
1963 		nof = mandoc_calloc(1, sizeof(struct of));
1964 		nof->fname = mandoc_strdup(buf);
1965 		nof->sec = mandoc_strdup(psec);
1966 		nof->arch = mandoc_strdup(parch);
1967 		nof->src_form = src_form;
1968 
1969 		/*
1970 		 * Remember the file name without the extension,
1971 		 * to be used as the page title in the database.
1972 		 */
1973 
1974 		if (NULL != suffix)
1975 			*suffix = '\0';
1976 		nof->title = mandoc_strdup(fn);
1977 
1978 		/*
1979 		 * Add the structure to the list.
1980 		 */
1981 
1982 		if (NULL == *of) {
1983 			*of = nof;
1984 			(*of)->first = nof;
1985 		} else {
1986 			nof->first = (*of)->first;
1987 			(*of)->next = nof;
1988 			*of = nof;
1989 		}
1990 	}
1991 
1992 	closedir(d);
1993 }
1994 
1995 static void
1996 ofile_free(struct of *of)
1997 {
1998 	struct of	*nof;
1999 
2000 	if (NULL != of)
2001 		of = of->first;
2002 
2003 	while (NULL != of) {
2004 		nof = of->next;
2005 		free(of->fname);
2006 		free(of->sec);
2007 		free(of->arch);
2008 		free(of->title);
2009 		free(of);
2010 		of = nof;
2011 	}
2012 }
2013