xref: /dragonfly/contrib/mdocml/mandocdb.c (revision 31c7ac8b)
1 /*	$Id: mandocdb.c,v 1.49.2.10 2013/11/21 01:53:48 schwarze Exp $ */
2 /*
3  * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2011, 2012 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21 
22 #include <sys/types.h>
23 
24 #include <assert.h>
25 #include <ctype.h>
26 #include <dirent.h>
27 #include <errno.h>
28 #include <fcntl.h>
29 #include <getopt.h>
30 #include <limits.h>
31 #include <stdio.h>
32 #include <stdint.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <unistd.h>
36 
37 #if defined(__APPLE__)
38 # include <libkern/OSByteOrder.h>
39 #elif defined(__linux__)
40 # include <endian.h>
41 #elif defined(__sun)
42 # include <sys/byteorder.h>
43 # include <sys/stat.h>
44 #else
45 # include <sys/endian.h>
46 #endif
47 
48 #if defined(__linux__) || defined(__sun)
49 # include <db_185.h>
50 #else
51 # include <db.h>
52 #endif
53 
54 #include "man.h"
55 #include "mdoc.h"
56 #include "mandoc.h"
57 #include "mandocdb.h"
58 #include "manpath.h"
59 
60 #define	MANDOC_BUFSZ	  BUFSIZ
61 #define	MANDOC_SLOP	  1024
62 
63 #define	MANDOC_SRC	  0x1
64 #define	MANDOC_FORM	  0x2
65 
66 /* Access to the mandoc database on disk. */
67 
68 struct	mdb {
69 	char		  idxn[PATH_MAX]; /* index db filename */
70 	char		  dbn[PATH_MAX]; /* keyword db filename */
71 	DB		 *idx; /* index recno database */
72 	DB		 *db; /* keyword btree database */
73 };
74 
75 /* Stack of temporarily unused index records. */
76 
77 struct	recs {
78 	recno_t		 *stack; /* pointer to a malloc'ed array */
79 	size_t		  size; /* number of allocated slots */
80 	size_t		  cur; /* current number of empty records */
81 	recno_t		  last; /* last record number in the index */
82 };
83 
84 /* Tiny list for files.  No need to bring in QUEUE. */
85 
86 struct	of {
87 	char		 *fname; /* heap-allocated */
88 	char		 *sec;
89 	char		 *arch;
90 	char		 *title;
91 	int		  src_form;
92 	struct of	 *next; /* NULL for last one */
93 	struct of	 *first; /* first in list */
94 };
95 
96 /* Buffer for storing growable data. */
97 
98 struct	buf {
99 	char		 *cp;
100 	size_t		  len; /* current length */
101 	size_t		  size; /* total buffer size */
102 };
103 
104 /* Operation we're going to perform. */
105 
106 enum	op {
107 	OP_DEFAULT = 0, /* new dbs from dir list or default config */
108 	OP_CONFFILE, /* new databases from custom config file */
109 	OP_UPDATE, /* delete/add entries in existing database */
110 	OP_DELETE, /* delete entries from existing database */
111 	OP_TEST /* change no databases, report potential problems */
112 };
113 
114 #define	MAN_ARGS	  DB *hash, \
115 			  struct buf *buf, \
116 			  struct buf *dbuf, \
117 			  const struct man_node *n
118 #define	MDOC_ARGS	  DB *hash, \
119 			  struct buf *buf, \
120 			  struct buf *dbuf, \
121 			  const struct mdoc_node *n, \
122 			  const struct mdoc_meta *m
123 
124 static	void		  buf_appendmdoc(struct buf *,
125 				const struct mdoc_node *, int);
126 static	void		  buf_append(struct buf *, const char *);
127 static	void		  buf_appendb(struct buf *,
128 				const void *, size_t);
129 static	void		  dbt_put(DB *, const char *, DBT *, DBT *);
130 static	void		  hash_put(DB *, const struct buf *, uint64_t);
131 static	void		  hash_reset(DB **);
132 static	void		  index_merge(const struct of *, struct mparse *,
133 				struct buf *, struct buf *, DB *,
134 				struct mdb *, struct recs *);
135 static	void		  index_prune(const struct of *, struct mdb *,
136 				struct recs *);
137 static	void		  ofile_argbuild(int, char *[], struct of **,
138 				const char *);
139 static	void		  ofile_dirbuild(const char *, const char *,
140 				const char *, int, struct of **);
141 static	void		  ofile_free(struct of *);
142 static	void		  pformatted(DB *, struct buf *,
143 				struct buf *, const struct of *);
144 static	int		  pman_node(MAN_ARGS);
145 static	void		  pmdoc_node(MDOC_ARGS);
146 static	int		  pmdoc_head(MDOC_ARGS);
147 static	int		  pmdoc_body(MDOC_ARGS);
148 static	int		  pmdoc_Fd(MDOC_ARGS);
149 static	int		  pmdoc_In(MDOC_ARGS);
150 static	int		  pmdoc_Fn(MDOC_ARGS);
151 static	int		  pmdoc_Nd(MDOC_ARGS);
152 static	int		  pmdoc_Nm(MDOC_ARGS);
153 static	int		  pmdoc_Sh(MDOC_ARGS);
154 static	int		  pmdoc_St(MDOC_ARGS);
155 static	int		  pmdoc_Xr(MDOC_ARGS);
156 
157 #define	MDOCF_CHILD	  0x01  /* Automatically index child nodes. */
158 
159 struct	mdoc_handler {
160 	int		(*fp)(MDOC_ARGS);  /* Optional handler. */
161 	uint64_t	  mask;  /* Set unless handler returns 0. */
162 	int		  flags;  /* For use by pmdoc_node. */
163 };
164 
165 static	const struct mdoc_handler mdocs[MDOC_MAX] = {
166 	{ NULL, 0, 0 },  /* Ap */
167 	{ NULL, 0, 0 },  /* Dd */
168 	{ NULL, 0, 0 },  /* Dt */
169 	{ NULL, 0, 0 },  /* Os */
170 	{ pmdoc_Sh, TYPE_Sh, MDOCF_CHILD }, /* Sh */
171 	{ pmdoc_head, TYPE_Ss, MDOCF_CHILD }, /* Ss */
172 	{ NULL, 0, 0 },  /* Pp */
173 	{ NULL, 0, 0 },  /* D1 */
174 	{ NULL, 0, 0 },  /* Dl */
175 	{ NULL, 0, 0 },  /* Bd */
176 	{ NULL, 0, 0 },  /* Ed */
177 	{ NULL, 0, 0 },  /* Bl */
178 	{ NULL, 0, 0 },  /* El */
179 	{ NULL, 0, 0 },  /* It */
180 	{ NULL, 0, 0 },  /* Ad */
181 	{ NULL, TYPE_An, MDOCF_CHILD },  /* An */
182 	{ NULL, TYPE_Ar, MDOCF_CHILD },  /* Ar */
183 	{ NULL, TYPE_Cd, MDOCF_CHILD },  /* Cd */
184 	{ NULL, TYPE_Cm, MDOCF_CHILD },  /* Cm */
185 	{ NULL, TYPE_Dv, MDOCF_CHILD },  /* Dv */
186 	{ NULL, TYPE_Er, MDOCF_CHILD },  /* Er */
187 	{ NULL, TYPE_Ev, MDOCF_CHILD },  /* Ev */
188 	{ NULL, 0, 0 },  /* Ex */
189 	{ NULL, TYPE_Fa, MDOCF_CHILD },  /* Fa */
190 	{ pmdoc_Fd, TYPE_In, 0 },  /* Fd */
191 	{ NULL, TYPE_Fl, MDOCF_CHILD },  /* Fl */
192 	{ pmdoc_Fn, 0, 0 },  /* Fn */
193 	{ NULL, TYPE_Ft, MDOCF_CHILD },  /* Ft */
194 	{ NULL, TYPE_Ic, MDOCF_CHILD },  /* Ic */
195 	{ pmdoc_In, TYPE_In, 0 },  /* In */
196 	{ NULL, TYPE_Li, MDOCF_CHILD },  /* Li */
197 	{ pmdoc_Nd, TYPE_Nd, MDOCF_CHILD },  /* Nd */
198 	{ pmdoc_Nm, TYPE_Nm, MDOCF_CHILD },  /* Nm */
199 	{ NULL, 0, 0 },  /* Op */
200 	{ NULL, 0, 0 },  /* Ot */
201 	{ NULL, TYPE_Pa, MDOCF_CHILD },  /* Pa */
202 	{ NULL, 0, 0 },  /* Rv */
203 	{ pmdoc_St, TYPE_St, 0 },  /* St */
204 	{ NULL, TYPE_Va, MDOCF_CHILD },  /* Va */
205 	{ pmdoc_body, TYPE_Va, MDOCF_CHILD },  /* Vt */
206 	{ pmdoc_Xr, TYPE_Xr, 0 },  /* Xr */
207 	{ NULL, 0, 0 },  /* %A */
208 	{ NULL, 0, 0 },  /* %B */
209 	{ NULL, 0, 0 },  /* %D */
210 	{ NULL, 0, 0 },  /* %I */
211 	{ NULL, 0, 0 },  /* %J */
212 	{ NULL, 0, 0 },  /* %N */
213 	{ NULL, 0, 0 },  /* %O */
214 	{ NULL, 0, 0 },  /* %P */
215 	{ NULL, 0, 0 },  /* %R */
216 	{ NULL, 0, 0 },  /* %T */
217 	{ NULL, 0, 0 },  /* %V */
218 	{ NULL, 0, 0 },  /* Ac */
219 	{ NULL, 0, 0 },  /* Ao */
220 	{ NULL, 0, 0 },  /* Aq */
221 	{ NULL, TYPE_At, MDOCF_CHILD },  /* At */
222 	{ NULL, 0, 0 },  /* Bc */
223 	{ NULL, 0, 0 },  /* Bf */
224 	{ NULL, 0, 0 },  /* Bo */
225 	{ NULL, 0, 0 },  /* Bq */
226 	{ NULL, TYPE_Bsx, MDOCF_CHILD },  /* Bsx */
227 	{ NULL, TYPE_Bx, MDOCF_CHILD },  /* Bx */
228 	{ NULL, 0, 0 },  /* Db */
229 	{ NULL, 0, 0 },  /* Dc */
230 	{ NULL, 0, 0 },  /* Do */
231 	{ NULL, 0, 0 },  /* Dq */
232 	{ NULL, 0, 0 },  /* Ec */
233 	{ NULL, 0, 0 },  /* Ef */
234 	{ NULL, TYPE_Em, MDOCF_CHILD },  /* Em */
235 	{ NULL, 0, 0 },  /* Eo */
236 	{ NULL, TYPE_Fx, MDOCF_CHILD },  /* Fx */
237 	{ NULL, TYPE_Ms, MDOCF_CHILD },  /* Ms */
238 	{ NULL, 0, 0 },  /* No */
239 	{ NULL, 0, 0 },  /* Ns */
240 	{ NULL, TYPE_Nx, MDOCF_CHILD },  /* Nx */
241 	{ NULL, TYPE_Ox, MDOCF_CHILD },  /* Ox */
242 	{ NULL, 0, 0 },  /* Pc */
243 	{ NULL, 0, 0 },  /* Pf */
244 	{ NULL, 0, 0 },  /* Po */
245 	{ NULL, 0, 0 },  /* Pq */
246 	{ NULL, 0, 0 },  /* Qc */
247 	{ NULL, 0, 0 },  /* Ql */
248 	{ NULL, 0, 0 },  /* Qo */
249 	{ NULL, 0, 0 },  /* Qq */
250 	{ NULL, 0, 0 },  /* Re */
251 	{ NULL, 0, 0 },  /* Rs */
252 	{ NULL, 0, 0 },  /* Sc */
253 	{ NULL, 0, 0 },  /* So */
254 	{ NULL, 0, 0 },  /* Sq */
255 	{ NULL, 0, 0 },  /* Sm */
256 	{ NULL, 0, 0 },  /* Sx */
257 	{ NULL, TYPE_Sy, MDOCF_CHILD },  /* Sy */
258 	{ NULL, TYPE_Tn, MDOCF_CHILD },  /* Tn */
259 	{ NULL, 0, 0 },  /* Ux */
260 	{ NULL, 0, 0 },  /* Xc */
261 	{ NULL, 0, 0 },  /* Xo */
262 	{ pmdoc_head, TYPE_Fn, 0 },  /* Fo */
263 	{ NULL, 0, 0 },  /* Fc */
264 	{ NULL, 0, 0 },  /* Oo */
265 	{ NULL, 0, 0 },  /* Oc */
266 	{ NULL, 0, 0 },  /* Bk */
267 	{ NULL, 0, 0 },  /* Ek */
268 	{ NULL, 0, 0 },  /* Bt */
269 	{ NULL, 0, 0 },  /* Hf */
270 	{ NULL, 0, 0 },  /* Fr */
271 	{ NULL, 0, 0 },  /* Ud */
272 	{ NULL, TYPE_Lb, MDOCF_CHILD },  /* Lb */
273 	{ NULL, 0, 0 },  /* Lp */
274 	{ NULL, TYPE_Lk, MDOCF_CHILD },  /* Lk */
275 	{ NULL, TYPE_Mt, MDOCF_CHILD },  /* Mt */
276 	{ NULL, 0, 0 },  /* Brq */
277 	{ NULL, 0, 0 },  /* Bro */
278 	{ NULL, 0, 0 },  /* Brc */
279 	{ NULL, 0, 0 },  /* %C */
280 	{ NULL, 0, 0 },  /* Es */
281 	{ NULL, 0, 0 },  /* En */
282 	{ NULL, TYPE_Dx, MDOCF_CHILD },  /* Dx */
283 	{ NULL, 0, 0 },  /* %Q */
284 	{ NULL, 0, 0 },  /* br */
285 	{ NULL, 0, 0 },  /* sp */
286 	{ NULL, 0, 0 },  /* %U */
287 	{ NULL, 0, 0 },  /* Ta */
288 };
289 
290 static	const char	 *progname;
291 static	int		  use_all;  /* Use all directories and files. */
292 static	int		  verb;  /* Output verbosity level. */
293 static	int		  warnings;  /* Potential problems in manuals. */
294 
295 int
296 main(int argc, char *argv[])
297 {
298 	struct mparse	*mp; /* parse sequence */
299 	struct manpaths	 dirs;
300 	struct mdb	 mdb;
301 	struct recs	 recs;
302 	enum op		 op; /* current operation */
303 	const char	*dir;
304 	char		*cp;
305 	char		 pbuf[PATH_MAX];
306 	int		 ch, i, flags;
307 	DB		*hash; /* temporary keyword hashtable */
308 	BTREEINFO	 info; /* btree configuration */
309 	size_t		 sz1, sz2, ipath;
310 	struct buf	 buf, /* keyword buffer */
311 			 dbuf; /* description buffer */
312 	struct of	*of; /* list of files for processing */
313 	extern int	 optind;
314 	extern char	*optarg;
315 
316 	progname = strrchr(argv[0], '/');
317 	if (progname == NULL)
318 		progname = argv[0];
319 	else
320 		++progname;
321 
322 	memset(&dirs, 0, sizeof(struct manpaths));
323 	memset(&mdb, 0, sizeof(struct mdb));
324 	memset(&recs, 0, sizeof(struct recs));
325 
326 	of = NULL;
327 	mp = NULL;
328 	hash = NULL;
329 	op = OP_DEFAULT;
330 	dir = NULL;
331 
332 	while (-1 != (ch = getopt(argc, argv, "aC:d:tu:vW")))
333 		switch (ch) {
334 		case ('a'):
335 			use_all = 1;
336 			break;
337 		case ('C'):
338 			if (op) {
339 				fprintf(stderr,
340 				    "-C: conflicting options\n");
341 				goto usage;
342 			}
343 			dir = optarg;
344 			op = OP_CONFFILE;
345 			break;
346 		case ('d'):
347 			if (op) {
348 				fprintf(stderr,
349 				    "-d: conflicting options\n");
350 				goto usage;
351 			}
352 			dir = optarg;
353 			op = OP_UPDATE;
354 			break;
355 		case ('t'):
356 			dup2(STDOUT_FILENO, STDERR_FILENO);
357 			if (op) {
358 				fprintf(stderr,
359 				    "-t: conflicting options\n");
360 				goto usage;
361 			}
362 			op = OP_TEST;
363 			use_all = 1;
364 			warnings = 1;
365 			break;
366 		case ('u'):
367 			if (op) {
368 				fprintf(stderr,
369 				    "-u: conflicting options\n");
370 				goto usage;
371 			}
372 			dir = optarg;
373 			op = OP_DELETE;
374 			break;
375 		case ('v'):
376 			verb++;
377 			break;
378 		case ('W'):
379 			warnings = 1;
380 			break;
381 		default:
382 			goto usage;
383 		}
384 
385 	argc -= optind;
386 	argv += optind;
387 
388 	if (OP_CONFFILE == op && argc > 0) {
389 		fprintf(stderr, "-C: too many arguments\n");
390 		goto usage;
391 	}
392 
393 	memset(&info, 0, sizeof(BTREEINFO));
394 	info.lorder = 4321;
395 	info.flags = R_DUP;
396 
397 	mp = mparse_alloc(MPARSE_AUTO, MANDOCLEVEL_FATAL, NULL, NULL, NULL);
398 
399 	memset(&buf, 0, sizeof(struct buf));
400 	memset(&dbuf, 0, sizeof(struct buf));
401 
402 	buf.size = dbuf.size = MANDOC_BUFSZ;
403 
404 	buf.cp = mandoc_malloc(buf.size);
405 	dbuf.cp = mandoc_malloc(dbuf.size);
406 
407 	if (OP_TEST == op) {
408 		ofile_argbuild(argc, argv, &of, NULL);
409 		if (NULL == of)
410 			goto out;
411 		index_merge(of, mp, &dbuf, &buf, hash, &mdb, &recs);
412 		goto out;
413 	}
414 
415 	if (OP_UPDATE == op || OP_DELETE == op) {
416 		if (NULL == realpath(dir, pbuf)) {
417 			perror(dir);
418 			exit((int)MANDOCLEVEL_BADARG);
419 		}
420 		if (strlcat(pbuf, "/", PATH_MAX) >= PATH_MAX) {
421 			fprintf(stderr, "%s: path too long\n", pbuf);
422 			exit((int)MANDOCLEVEL_BADARG);
423 		}
424 
425 		strlcat(mdb.dbn, pbuf, PATH_MAX);
426 		sz1 = strlcat(mdb.dbn, MANDOC_DB, PATH_MAX);
427 
428 		strlcat(mdb.idxn, pbuf, PATH_MAX);
429 		sz2 = strlcat(mdb.idxn, MANDOC_IDX, PATH_MAX);
430 
431 		if (sz1 >= PATH_MAX || sz2 >= PATH_MAX) {
432 			fprintf(stderr, "%s: path too long\n", mdb.idxn);
433 			exit((int)MANDOCLEVEL_BADARG);
434 		}
435 
436 		flags = O_CREAT | O_RDWR;
437 		mdb.db = dbopen(mdb.dbn, flags, 0644, DB_BTREE, &info);
438 		mdb.idx = dbopen(mdb.idxn, flags, 0644, DB_RECNO, NULL);
439 
440 		if (NULL == mdb.db) {
441 			perror(mdb.dbn);
442 			exit((int)MANDOCLEVEL_SYSERR);
443 		} else if (NULL == mdb.idx) {
444 			perror(mdb.idxn);
445 			exit((int)MANDOCLEVEL_SYSERR);
446 		}
447 
448 		ofile_argbuild(argc, argv, &of, pbuf);
449 
450 		if (NULL == of)
451 			goto out;
452 
453 		index_prune(of, &mdb, &recs);
454 
455 		/*
456 		 * Go to the root of the respective manual tree.
457 		 * This must work or no manuals may be found (they're
458 		 * indexed relative to the root).
459 		 */
460 
461 		if (OP_UPDATE == op) {
462 			if (-1 == chdir(dir)) {
463 				perror(dir);
464 				exit((int)MANDOCLEVEL_SYSERR);
465 			}
466 			index_merge(of, mp, &dbuf, &buf, hash,
467 					&mdb, &recs);
468 		}
469 
470 		goto out;
471 	}
472 
473 	/*
474 	 * Configure the directories we're going to scan.
475 	 * If we have command-line arguments, use them.
476 	 * If not, we use man(1)'s method (see mandocdb.8).
477 	 */
478 
479 	if (argc > 0) {
480 		dirs.paths = mandoc_calloc(argc, sizeof(char *));
481 		dirs.sz = argc;
482 		for (i = 0; i < argc; i++) {
483 			if (NULL == (cp = realpath(argv[i], pbuf))) {
484 				perror(argv[i]);
485 				goto out;
486 			}
487 			dirs.paths[i] = mandoc_strdup(cp);
488 		}
489 	} else
490 		manpath_parse(&dirs, dir, NULL, NULL);
491 
492 	for (ipath = 0; ipath < dirs.sz; ipath++) {
493 
494 		/*
495 		 * Go to the root of the respective manual tree.
496 		 * This must work or no manuals may be found:
497 		 * They are indexed relative to the root.
498 		 */
499 
500 		if (-1 == chdir(dirs.paths[ipath])) {
501 			perror(dirs.paths[ipath]);
502 			exit((int)MANDOCLEVEL_SYSERR);
503 		}
504 
505 		/* Create a new database in two temporary files. */
506 
507 		flags = O_CREAT | O_EXCL | O_RDWR;
508 		while (NULL == mdb.db) {
509 			strlcpy(mdb.dbn, MANDOC_DB, PATH_MAX);
510 			strlcat(mdb.dbn, ".XXXXXXXXXX", PATH_MAX);
511 			if (NULL == mktemp(mdb.dbn)) {
512 				perror(mdb.dbn);
513 				exit((int)MANDOCLEVEL_SYSERR);
514 			}
515 			mdb.db = dbopen(mdb.dbn, flags, 0644,
516 					DB_BTREE, &info);
517 			if (NULL == mdb.db && EEXIST != errno) {
518 				perror(mdb.dbn);
519 				exit((int)MANDOCLEVEL_SYSERR);
520 			}
521 		}
522 		while (NULL == mdb.idx) {
523 			strlcpy(mdb.idxn, MANDOC_IDX, PATH_MAX);
524 			strlcat(mdb.idxn, ".XXXXXXXXXX", PATH_MAX);
525 			if (NULL == mktemp(mdb.idxn)) {
526 				perror(mdb.idxn);
527 				unlink(mdb.dbn);
528 				exit((int)MANDOCLEVEL_SYSERR);
529 			}
530 			mdb.idx = dbopen(mdb.idxn, flags, 0644,
531 					DB_RECNO, NULL);
532 			if (NULL == mdb.idx && EEXIST != errno) {
533 				perror(mdb.idxn);
534 				unlink(mdb.dbn);
535 				exit((int)MANDOCLEVEL_SYSERR);
536 			}
537 		}
538 
539 		/*
540 		 * Search for manuals and fill the new database.
541 		 */
542 
543 	       	ofile_dirbuild(".", "", "", 0, &of);
544 
545 		if (NULL != of) {
546 			index_merge(of, mp, &dbuf, &buf, hash,
547 			     &mdb, &recs);
548 			ofile_free(of);
549 			of = NULL;
550 		}
551 
552 		(*mdb.db->close)(mdb.db);
553 		(*mdb.idx->close)(mdb.idx);
554 		mdb.db = NULL;
555 		mdb.idx = NULL;
556 
557 		/*
558 		 * Replace the old database with the new one.
559 		 * This is not perfectly atomic,
560 		 * but i cannot think of a better way.
561 		 */
562 
563 		if (-1 == rename(mdb.dbn, MANDOC_DB)) {
564 			perror(MANDOC_DB);
565 			unlink(mdb.dbn);
566 			unlink(mdb.idxn);
567 			exit((int)MANDOCLEVEL_SYSERR);
568 		}
569 		if (-1 == rename(mdb.idxn, MANDOC_IDX)) {
570 			perror(MANDOC_IDX);
571 			unlink(MANDOC_DB);
572 			unlink(MANDOC_IDX);
573 			unlink(mdb.idxn);
574 			exit((int)MANDOCLEVEL_SYSERR);
575 		}
576 	}
577 
578 out:
579 	if (mdb.db)
580 		(*mdb.db->close)(mdb.db);
581 	if (mdb.idx)
582 		(*mdb.idx->close)(mdb.idx);
583 	if (hash)
584 		(*hash->close)(hash);
585 	if (mp)
586 		mparse_free(mp);
587 
588 	manpath_free(&dirs);
589 	ofile_free(of);
590 	free(buf.cp);
591 	free(dbuf.cp);
592 	free(recs.stack);
593 
594 	return(MANDOCLEVEL_OK);
595 
596 usage:
597 	fprintf(stderr,
598 		"usage: %s [-avvv] [-C file] | dir ... | -t file ...\n"
599 		"                        -d dir [file ...] | "
600 		"-u dir [file ...]\n",
601 		progname);
602 
603 	return((int)MANDOCLEVEL_BADARG);
604 }
605 
606 void
607 index_merge(const struct of *of, struct mparse *mp,
608 		struct buf *dbuf, struct buf *buf, DB *hash,
609 		struct mdb *mdb, struct recs *recs)
610 {
611 	recno_t		 rec;
612 	int		 ch, skip;
613 	DBT		 key, val;
614 	DB		*files;  /* temporary file name table */
615 	struct mdoc	*mdoc;
616 	struct man	*man;
617 	const char	*fn, *msec, *march, *mtitle;
618 	char		*p;
619 	uint64_t	 mask;
620 	size_t		 sv;
621 	unsigned	 seq;
622 	uint64_t	 vbuf[2];
623 	char		 type;
624 
625 	static char	 emptystring[] = "";
626 
627 	if (warnings) {
628 		files = NULL;
629 		hash_reset(&files);
630 	}
631 
632 	rec = 0;
633 	for (of = of->first; of; of = of->next) {
634 		fn = of->fname;
635 
636 		/*
637 		 * Try interpreting the file as mdoc(7) or man(7)
638 		 * source code, unless it is already known to be
639 		 * formatted.  Fall back to formatted mode.
640 		 */
641 
642 		mparse_reset(mp);
643 		mdoc = NULL;
644 		man = NULL;
645 
646 		if ((MANDOC_SRC & of->src_form ||
647 		    ! (MANDOC_FORM & of->src_form)) &&
648 		    MANDOCLEVEL_FATAL > mparse_readfd(mp, -1, fn))
649 			mparse_result(mp, &mdoc, &man);
650 
651 		if (NULL != mdoc) {
652 			msec = mdoc_meta(mdoc)->msec;
653 			march = mdoc_meta(mdoc)->arch;
654 			if (NULL == march)
655 				march = "";
656 			mtitle = mdoc_meta(mdoc)->title;
657 		} else if (NULL != man) {
658 			msec = man_meta(man)->msec;
659 			march = "";
660 			mtitle = man_meta(man)->title;
661 		} else {
662 			msec = of->sec;
663 			march = of->arch;
664 			mtitle = of->title;
665 		}
666 
667 		/*
668 		 * Check whether the manual section given in a file
669 		 * agrees with the directory where the file is located.
670 		 * Some manuals have suffixes like (3p) on their
671 		 * section number either inside the file or in the
672 		 * directory name, some are linked into more than one
673 		 * section, like encrypt(1) = makekey(8).  Do not skip
674 		 * manuals for such reasons.
675 		 */
676 
677 		skip = 0;
678 		assert(of->sec);
679 		assert(msec);
680 		if (warnings)
681 			if (strcasecmp(msec, of->sec))
682 				fprintf(stderr, "%s: "
683 					"section \"%s\" manual "
684 					"in \"%s\" directory\n",
685 					fn, msec, of->sec);
686 
687 		/*
688 		 * Manual page directories exist for each kernel
689 		 * architecture as returned by machine(1).
690 		 * However, many manuals only depend on the
691 		 * application architecture as returned by arch(1).
692 		 * For example, some (2/ARM) manuals are shared
693 		 * across the "armish" and "zaurus" kernel
694 		 * architectures.
695 		 * A few manuals are even shared across completely
696 		 * different architectures, for example fdformat(1)
697 		 * on amd64, i386, sparc, and sparc64.
698 		 * Thus, warn about architecture mismatches,
699 		 * but don't skip manuals for this reason.
700 		 */
701 
702 		assert(of->arch);
703 		assert(march);
704 		if (warnings)
705 			if (strcasecmp(march, of->arch))
706 				fprintf(stderr, "%s: "
707 					"architecture \"%s\" manual "
708 					"in \"%s\" directory\n",
709 					fn, march, of->arch);
710 
711 		/*
712 		 * By default, skip a file if the title given
713 		 * in the file disagrees with the file name.
714 		 * Do not warn, this happens for all MLINKs.
715 		 */
716 
717 		assert(of->title);
718 		assert(mtitle);
719 		if (strcasecmp(mtitle, of->title))
720 			skip = 1;
721 
722 		/*
723 		 * Build a title string for the file.  If it matches
724 		 * the location of the file, remember the title as
725 		 * found; else, remember it as missing.
726 		 */
727 
728 		if (warnings) {
729 			buf->len = 0;
730 			buf_appendb(buf, mtitle, strlen(mtitle));
731 			buf_appendb(buf, "(", 1);
732 			buf_appendb(buf, msec, strlen(msec));
733 			if ('\0' != *march) {
734 				buf_appendb(buf, "/", 1);
735 				buf_appendb(buf, march, strlen(march));
736 			}
737 			buf_appendb(buf, ")", 2);
738 			for (p = buf->cp; '\0' != *p; p++)
739 				*p = tolower((unsigned char)*p);
740 			key.data = buf->cp;
741 			key.size = buf->len;
742 			val.data = NULL;
743 			val.size = 0;
744 			if (0 == skip)
745 				val.data = emptystring;
746 			else {
747 				ch = (*files->get)(files, &key, &val, 0);
748 				if (ch < 0) {
749 					perror("hash");
750 					exit((int)MANDOCLEVEL_SYSERR);
751 				} else if (ch > 0) {
752 					val.data = (void *)fn;
753 					val.size = strlen(fn) + 1;
754 				} else
755 					val.data = NULL;
756 			}
757 			if (NULL != val.data &&
758 			    (*files->put)(files, &key, &val, 0) < 0) {
759 				perror("hash");
760 				exit((int)MANDOCLEVEL_SYSERR);
761 			}
762 		}
763 
764 		if (skip && !use_all)
765 			continue;
766 
767 		/*
768 		 * The index record value consists of a nil-terminated
769 		 * filename, a nil-terminated manual section, and a
770 		 * nil-terminated description.  Use the actual
771 		 * location of the file, such that the user can find
772 		 * it with man(1).  Since the description may not be
773 		 * set, we set a sentinel to see if we're going to
774 		 * write a nil byte in its place.
775 		 */
776 
777 		dbuf->len = 0;
778 		type = mdoc ? 'd' : (man ? 'a' : 'c');
779 		buf_appendb(dbuf, &type, 1);
780 		buf_appendb(dbuf, fn, strlen(fn) + 1);
781 		buf_appendb(dbuf, of->sec, strlen(of->sec) + 1);
782 		buf_appendb(dbuf, of->title, strlen(of->title) + 1);
783 		buf_appendb(dbuf, of->arch, strlen(of->arch) + 1);
784 
785 		sv = dbuf->len;
786 
787 		/*
788 		 * Collect keyword/mask pairs.
789 		 * Each pair will become a new btree node.
790 		 */
791 
792 		hash_reset(&hash);
793 		if (mdoc)
794 			pmdoc_node(hash, buf, dbuf,
795 				mdoc_node(mdoc), mdoc_meta(mdoc));
796 		else if (man)
797 			pman_node(hash, buf, dbuf, man_node(man));
798 		else
799 			pformatted(hash, buf, dbuf, of);
800 
801 		/* Test mode, do not access any database. */
802 
803 		if (NULL == mdb->db || NULL == mdb->idx)
804 			continue;
805 
806 		/*
807 		 * Make sure the file name is always registered
808 		 * as an .Nm search key.
809 		 */
810 		buf->len = 0;
811 		buf_append(buf, of->title);
812 		hash_put(hash, buf, TYPE_Nm);
813 
814 		/*
815 		 * Reclaim an empty index record, if available.
816 		 * Use its record number for all new btree nodes.
817 		 */
818 
819 		if (recs->cur > 0) {
820 			recs->cur--;
821 			rec = recs->stack[(int)recs->cur];
822 		} else if (recs->last > 0) {
823 			rec = recs->last;
824 			recs->last = 0;
825 		} else
826 			rec++;
827 		vbuf[1] = htobe64(rec);
828 
829 		/*
830 		 * Copy from the in-memory hashtable of pending
831 		 * keyword/mask pairs into the database.
832 		 */
833 
834 		seq = R_FIRST;
835 		while (0 == (ch = (*hash->seq)(hash, &key, &val, seq))) {
836 			seq = R_NEXT;
837 			assert(sizeof(uint64_t) == val.size);
838 			memcpy(&mask, val.data, val.size);
839 			vbuf[0] = htobe64(mask);
840 			val.size = sizeof(vbuf);
841 			val.data = &vbuf;
842 			dbt_put(mdb->db, mdb->dbn, &key, &val);
843 		}
844 		if (ch < 0) {
845 			perror("hash");
846 			unlink(mdb->dbn);
847 			unlink(mdb->idxn);
848 			exit((int)MANDOCLEVEL_SYSERR);
849 		}
850 
851 		/*
852 		 * Apply to the index.  If we haven't had a description
853 		 * set, put an empty one in now.
854 		 */
855 
856 		if (dbuf->len == sv)
857 			buf_appendb(dbuf, "", 1);
858 
859 		key.data = &rec;
860 		key.size = sizeof(recno_t);
861 
862 		val.data = dbuf->cp;
863 		val.size = dbuf->len;
864 
865 		if (verb)
866 			printf("%s: adding to index\n", fn);
867 
868 		dbt_put(mdb->idx, mdb->idxn, &key, &val);
869 	}
870 
871 	/*
872 	 * Iterate the remembered file titles and check that
873 	 * all files can be found by their main title.
874 	 */
875 
876 	if (warnings) {
877 		seq = R_FIRST;
878 		while (0 == (*files->seq)(files, &key, &val, seq)) {
879 			seq = R_NEXT;
880 			if (val.size)
881 				fprintf(stderr, "%s: probably "
882 				    "unreachable, title is %s\n",
883 				    (char *)val.data, (char *)key.data);
884 		}
885 		(*files->close)(files);
886 	}
887 }
888 
889 /*
890  * Scan through all entries in the index file `idx' and prune those
891  * entries in `ofile'.
892  * Pruning consists of removing from `db', then invalidating the entry
893  * in `idx' (zeroing its value size).
894  */
895 static void
896 index_prune(const struct of *ofile, struct mdb *mdb, struct recs *recs)
897 {
898 	const struct of	*of;
899 	const char	*fn;
900 	uint64_t	 vbuf[2];
901 	unsigned	 seq, sseq;
902 	DBT		 key, val;
903 	int		 ch;
904 
905 	recs->cur = 0;
906 	seq = R_FIRST;
907 	while (0 == (ch = (*mdb->idx->seq)(mdb->idx, &key, &val, seq))) {
908 		seq = R_NEXT;
909 		assert(sizeof(recno_t) == key.size);
910 		memcpy(&recs->last, key.data, key.size);
911 
912 		/* Deleted records are zero-sized.  Skip them. */
913 
914 		if (0 == val.size)
915 			goto cont;
916 
917 		/*
918 		 * Make sure we're sane.
919 		 * Read past our mdoc/man/cat type to the next string,
920 		 * then make sure it's bounded by a NUL.
921 		 * Failing any of these, we go into our error handler.
922 		 */
923 
924 		fn = (char *)val.data + 1;
925 		if (NULL == memchr(fn, '\0', val.size - 1))
926 			break;
927 
928 		/*
929 		 * Search for the file in those we care about.
930 		 * XXX: build this into a tree.  Too slow.
931 		 */
932 
933 		for (of = ofile->first; of; of = of->next)
934 			if (0 == strcmp(fn, of->fname))
935 				break;
936 
937 		if (NULL == of)
938 			continue;
939 
940 		/*
941 		 * Search through the keyword database, throwing out all
942 		 * references to our file.
943 		 */
944 
945 		sseq = R_FIRST;
946 		while (0 == (ch = (*mdb->db->seq)(mdb->db,
947 					&key, &val, sseq))) {
948 			sseq = R_NEXT;
949 			if (sizeof(vbuf) != val.size)
950 				break;
951 
952 			memcpy(vbuf, val.data, val.size);
953 			if (recs->last != betoh64(vbuf[1]))
954 				continue;
955 
956 			if ((ch = (*mdb->db->del)(mdb->db,
957 					&key, R_CURSOR)) < 0)
958 				break;
959 		}
960 
961 		if (ch < 0) {
962 			perror(mdb->dbn);
963 			exit((int)MANDOCLEVEL_SYSERR);
964 		} else if (1 != ch) {
965 			fprintf(stderr, "%s: corrupt database\n",
966 					mdb->dbn);
967 			exit((int)MANDOCLEVEL_SYSERR);
968 		}
969 
970 		if (verb)
971 			printf("%s: deleting from index\n", fn);
972 
973 		val.size = 0;
974 		ch = (*mdb->idx->put)(mdb->idx, &key, &val, R_CURSOR);
975 
976 		if (ch < 0)
977 			break;
978 cont:
979 		if (recs->cur >= recs->size) {
980 			recs->size += MANDOC_SLOP;
981 			recs->stack = mandoc_realloc(recs->stack,
982 					recs->size * sizeof(recno_t));
983 		}
984 
985 		recs->stack[(int)recs->cur] = recs->last;
986 		recs->cur++;
987 	}
988 
989 	if (ch < 0) {
990 		perror(mdb->idxn);
991 		exit((int)MANDOCLEVEL_SYSERR);
992 	} else if (1 != ch) {
993 		fprintf(stderr, "%s: corrupt index\n", mdb->idxn);
994 		exit((int)MANDOCLEVEL_SYSERR);
995 	}
996 
997 	recs->last++;
998 }
999 
1000 /*
1001  * Grow the buffer (if necessary) and copy in a binary string.
1002  */
1003 static void
1004 buf_appendb(struct buf *buf, const void *cp, size_t sz)
1005 {
1006 
1007 	/* Overshoot by MANDOC_BUFSZ. */
1008 
1009 	while (buf->len + sz >= buf->size) {
1010 		buf->size = buf->len + sz + MANDOC_BUFSZ;
1011 		buf->cp = mandoc_realloc(buf->cp, buf->size);
1012 	}
1013 
1014 	memcpy(buf->cp + (int)buf->len, cp, sz);
1015 	buf->len += sz;
1016 }
1017 
1018 /*
1019  * Append a nil-terminated string to the buffer.
1020  * This can be invoked multiple times.
1021  * The buffer string will be nil-terminated.
1022  * If invoked multiple times, a space is put between strings.
1023  */
1024 static void
1025 buf_append(struct buf *buf, const char *cp)
1026 {
1027 	size_t		 sz;
1028 
1029 	if (0 == (sz = strlen(cp)))
1030 		return;
1031 
1032 	if (buf->len)
1033 		buf->cp[(int)buf->len - 1] = ' ';
1034 
1035 	buf_appendb(buf, cp, sz + 1);
1036 }
1037 
1038 /*
1039  * Recursively add all text from a given node.
1040  * This is optimised for general mdoc nodes in this context, which do
1041  * not consist of subexpressions and having a recursive call for n->next
1042  * would be wasteful.
1043  * The "f" variable should be 0 unless called from pmdoc_Nd for the
1044  * description buffer, which does not start at the beginning of the
1045  * buffer.
1046  */
1047 static void
1048 buf_appendmdoc(struct buf *buf, const struct mdoc_node *n, int f)
1049 {
1050 
1051 	for ( ; n; n = n->next) {
1052 		if (n->child)
1053 			buf_appendmdoc(buf, n->child, f);
1054 
1055 		if (MDOC_TEXT == n->type && f) {
1056 			f = 0;
1057 			buf_appendb(buf, n->string,
1058 					strlen(n->string) + 1);
1059 		} else if (MDOC_TEXT == n->type)
1060 			buf_append(buf, n->string);
1061 
1062 	}
1063 }
1064 
1065 static void
1066 hash_reset(DB **db)
1067 {
1068 	DB		*hash;
1069 
1070 	if (NULL != (hash = *db))
1071 		(*hash->close)(hash);
1072 
1073 	*db = dbopen(NULL, O_CREAT|O_RDWR, 0644, DB_HASH, NULL);
1074 	if (NULL == *db) {
1075 		perror("hash");
1076 		exit((int)MANDOCLEVEL_SYSERR);
1077 	}
1078 }
1079 
1080 /* ARGSUSED */
1081 static int
1082 pmdoc_head(MDOC_ARGS)
1083 {
1084 
1085 	return(MDOC_HEAD == n->type);
1086 }
1087 
1088 /* ARGSUSED */
1089 static int
1090 pmdoc_body(MDOC_ARGS)
1091 {
1092 
1093 	return(MDOC_BODY == n->type);
1094 }
1095 
1096 /* ARGSUSED */
1097 static int
1098 pmdoc_Fd(MDOC_ARGS)
1099 {
1100 	const char	*start, *end;
1101 	size_t		 sz;
1102 
1103 	if (SEC_SYNOPSIS != n->sec)
1104 		return(0);
1105 	if (NULL == (n = n->child) || MDOC_TEXT != n->type)
1106 		return(0);
1107 
1108 	/*
1109 	 * Only consider those `Fd' macro fields that begin with an
1110 	 * "inclusion" token (versus, e.g., #define).
1111 	 */
1112 	if (strcmp("#include", n->string))
1113 		return(0);
1114 
1115 	if (NULL == (n = n->next) || MDOC_TEXT != n->type)
1116 		return(0);
1117 
1118 	/*
1119 	 * Strip away the enclosing angle brackets and make sure we're
1120 	 * not zero-length.
1121 	 */
1122 
1123 	start = n->string;
1124 	if ('<' == *start || '"' == *start)
1125 		start++;
1126 
1127 	if (0 == (sz = strlen(start)))
1128 		return(0);
1129 
1130 	end = &start[(int)sz - 1];
1131 	if ('>' == *end || '"' == *end)
1132 		end--;
1133 
1134 	assert(end >= start);
1135 
1136 	buf_appendb(buf, start, (size_t)(end - start + 1));
1137 	buf_appendb(buf, "", 1);
1138 	return(1);
1139 }
1140 
1141 /* ARGSUSED */
1142 static int
1143 pmdoc_In(MDOC_ARGS)
1144 {
1145 
1146 	if (NULL == n->child || MDOC_TEXT != n->child->type)
1147 		return(0);
1148 
1149 	buf_append(buf, n->child->string);
1150 	return(1);
1151 }
1152 
1153 /* ARGSUSED */
1154 static int
1155 pmdoc_Fn(MDOC_ARGS)
1156 {
1157 	struct mdoc_node *nn;
1158 	const char	*cp;
1159 
1160 	nn = n->child;
1161 
1162 	if (NULL == nn || MDOC_TEXT != nn->type)
1163 		return(0);
1164 
1165 	/* .Fn "struct type *name" "char *arg" */
1166 
1167 	cp = strrchr(nn->string, ' ');
1168 	if (NULL == cp)
1169 		cp = nn->string;
1170 
1171 	/* Strip away pointer symbol. */
1172 
1173 	while ('*' == *cp)
1174 		cp++;
1175 
1176 	/* Store the function name. */
1177 
1178 	buf_append(buf, cp);
1179 	hash_put(hash, buf, TYPE_Fn);
1180 
1181 	/* Store the function type. */
1182 
1183 	if (nn->string < cp) {
1184 		buf->len = 0;
1185 		buf_appendb(buf, nn->string, cp - nn->string);
1186 		buf_appendb(buf, "", 1);
1187 		hash_put(hash, buf, TYPE_Ft);
1188 	}
1189 
1190 	/* Store the arguments. */
1191 
1192 	for (nn = nn->next; nn; nn = nn->next) {
1193 		if (MDOC_TEXT != nn->type)
1194 			continue;
1195 		buf->len = 0;
1196 		buf_append(buf, nn->string);
1197 		hash_put(hash, buf, TYPE_Fa);
1198 	}
1199 
1200 	return(0);
1201 }
1202 
1203 /* ARGSUSED */
1204 static int
1205 pmdoc_St(MDOC_ARGS)
1206 {
1207 
1208 	if (NULL == n->child || MDOC_TEXT != n->child->type)
1209 		return(0);
1210 
1211 	buf_append(buf, n->child->string);
1212 	return(1);
1213 }
1214 
1215 /* ARGSUSED */
1216 static int
1217 pmdoc_Xr(MDOC_ARGS)
1218 {
1219 
1220 	if (NULL == (n = n->child))
1221 		return(0);
1222 
1223 	buf_appendb(buf, n->string, strlen(n->string));
1224 
1225 	if (NULL != (n = n->next)) {
1226 		buf_appendb(buf, ".", 1);
1227 		buf_appendb(buf, n->string, strlen(n->string) + 1);
1228 	} else
1229 		buf_appendb(buf, ".", 2);
1230 
1231 	return(1);
1232 }
1233 
1234 /* ARGSUSED */
1235 static int
1236 pmdoc_Nd(MDOC_ARGS)
1237 {
1238 
1239 	if (MDOC_BODY != n->type)
1240 		return(0);
1241 
1242 	buf_appendmdoc(dbuf, n->child, 1);
1243 	return(1);
1244 }
1245 
1246 /* ARGSUSED */
1247 static int
1248 pmdoc_Nm(MDOC_ARGS)
1249 {
1250 
1251 	if (SEC_NAME == n->sec)
1252 		return(1);
1253 	else if (SEC_SYNOPSIS != n->sec || MDOC_HEAD != n->type)
1254 		return(0);
1255 
1256 	if (NULL == n->child)
1257 		buf_append(buf, m->name);
1258 
1259 	return(1);
1260 }
1261 
1262 /* ARGSUSED */
1263 static int
1264 pmdoc_Sh(MDOC_ARGS)
1265 {
1266 
1267 	return(SEC_CUSTOM == n->sec && MDOC_HEAD == n->type);
1268 }
1269 
1270 static void
1271 hash_put(DB *db, const struct buf *buf, uint64_t mask)
1272 {
1273 	uint64_t	 oldmask;
1274 	DBT		 key, val;
1275 	int		 rc;
1276 
1277 	if (buf->len < 2)
1278 		return;
1279 
1280 	key.data = buf->cp;
1281 	key.size = buf->len;
1282 
1283 	if ((rc = (*db->get)(db, &key, &val, 0)) < 0) {
1284 		perror("hash");
1285 		exit((int)MANDOCLEVEL_SYSERR);
1286 	} else if (0 == rc) {
1287 		assert(sizeof(uint64_t) == val.size);
1288 		memcpy(&oldmask, val.data, val.size);
1289 		mask |= oldmask;
1290 	}
1291 
1292 	val.data = &mask;
1293 	val.size = sizeof(uint64_t);
1294 
1295 	if ((rc = (*db->put)(db, &key, &val, 0)) < 0) {
1296 		perror("hash");
1297 		exit((int)MANDOCLEVEL_SYSERR);
1298 	}
1299 }
1300 
1301 static void
1302 dbt_put(DB *db, const char *dbn, DBT *key, DBT *val)
1303 {
1304 
1305 	assert(key->size);
1306 	assert(val->size);
1307 
1308 	if (0 == (*db->put)(db, key, val, 0))
1309 		return;
1310 
1311 	perror(dbn);
1312 	exit((int)MANDOCLEVEL_SYSERR);
1313 	/* NOTREACHED */
1314 }
1315 
1316 /*
1317  * Call out to per-macro handlers after clearing the persistent database
1318  * key.  If the macro sets the database key, flush it to the database.
1319  */
1320 static void
1321 pmdoc_node(MDOC_ARGS)
1322 {
1323 
1324 	if (NULL == n)
1325 		return;
1326 
1327 	switch (n->type) {
1328 	case (MDOC_HEAD):
1329 		/* FALLTHROUGH */
1330 	case (MDOC_BODY):
1331 		/* FALLTHROUGH */
1332 	case (MDOC_TAIL):
1333 		/* FALLTHROUGH */
1334 	case (MDOC_BLOCK):
1335 		/* FALLTHROUGH */
1336 	case (MDOC_ELEM):
1337 		buf->len = 0;
1338 
1339 		/*
1340 		 * Both NULL handlers and handlers returning true
1341 		 * request using the data.  Only skip the element
1342 		 * when the handler returns false.
1343 		 */
1344 
1345 		if (NULL != mdocs[n->tok].fp &&
1346 		    0 == (*mdocs[n->tok].fp)(hash, buf, dbuf, n, m))
1347 			break;
1348 
1349 		/*
1350 		 * For many macros, use the text from all children.
1351 		 * Set zero flags for macros not needing this.
1352 		 * In that case, the handler must fill the buffer.
1353 		 */
1354 
1355 		if (MDOCF_CHILD & mdocs[n->tok].flags)
1356 			buf_appendmdoc(buf, n->child, 0);
1357 
1358 		/*
1359 		 * Cover the most common case:
1360 		 * Automatically stage one string per element.
1361 		 * Set a zero mask for macros not needing this.
1362 		 * Additional staging can be done in the handler.
1363 		 */
1364 
1365 		if (mdocs[n->tok].mask)
1366 			hash_put(hash, buf, mdocs[n->tok].mask);
1367 		break;
1368 	default:
1369 		break;
1370 	}
1371 
1372 	pmdoc_node(hash, buf, dbuf, n->child, m);
1373 	pmdoc_node(hash, buf, dbuf, n->next, m);
1374 }
1375 
1376 static int
1377 pman_node(MAN_ARGS)
1378 {
1379 	const struct man_node *head, *body;
1380 	char		*start, *sv, *title;
1381 	size_t		 sz, titlesz;
1382 
1383 	if (NULL == n)
1384 		return(0);
1385 
1386 	/*
1387 	 * We're only searching for one thing: the first text child in
1388 	 * the BODY of a NAME section.  Since we don't keep track of
1389 	 * sections in -man, run some hoops to find out whether we're in
1390 	 * the correct section or not.
1391 	 */
1392 
1393 	if (MAN_BODY == n->type && MAN_SH == n->tok) {
1394 		body = n;
1395 		assert(body->parent);
1396 		if (NULL != (head = body->parent->head) &&
1397 				1 == head->nchild &&
1398 				NULL != (head = (head->child)) &&
1399 				MAN_TEXT == head->type &&
1400 				0 == strcmp(head->string, "NAME") &&
1401 				NULL != (body = body->child) &&
1402 				MAN_TEXT == body->type) {
1403 
1404 			title = NULL;
1405 			titlesz = 0;
1406 			/*
1407 			 * Suck the entire NAME section into memory.
1408 			 * Yes, we might run away.
1409 			 * But too many manuals have big, spread-out
1410 			 * NAME sections over many lines.
1411 			 */
1412 			for ( ; NULL != body; body = body->next) {
1413 				if (MAN_TEXT != body->type)
1414 					break;
1415 				if (0 == (sz = strlen(body->string)))
1416 					continue;
1417 				title = mandoc_realloc
1418 					(title, titlesz + sz + 1);
1419 				memcpy(title + titlesz, body->string, sz);
1420 				titlesz += sz + 1;
1421 				title[(int)titlesz - 1] = ' ';
1422 			}
1423 			if (NULL == title)
1424 				return(0);
1425 
1426 			title = mandoc_realloc(title, titlesz + 1);
1427 			title[(int)titlesz] = '\0';
1428 
1429 			/* Skip leading space.  */
1430 
1431 			sv = title;
1432 			while (isspace((unsigned char)*sv))
1433 				sv++;
1434 
1435 			if (0 == (sz = strlen(sv))) {
1436 				free(title);
1437 				return(0);
1438 			}
1439 
1440 			/* Erase trailing space. */
1441 
1442 			start = &sv[sz - 1];
1443 			while (start > sv && isspace((unsigned char)*start))
1444 				*start-- = '\0';
1445 
1446 			if (start == sv) {
1447 				free(title);
1448 				return(0);
1449 			}
1450 
1451 			start = sv;
1452 
1453 			/*
1454 			 * Go through a special heuristic dance here.
1455 			 * This is why -man manuals are great!
1456 			 * (I'm being sarcastic: my eyes are bleeding.)
1457 			 * Conventionally, one or more manual names are
1458 			 * comma-specified prior to a whitespace, then a
1459 			 * dash, then a description.  Try to puzzle out
1460 			 * the name parts here.
1461 			 */
1462 
1463 			for ( ;; ) {
1464 				sz = strcspn(start, " ,");
1465 				if ('\0' == start[(int)sz])
1466 					break;
1467 
1468 				buf->len = 0;
1469 				buf_appendb(buf, start, sz);
1470 				buf_appendb(buf, "", 1);
1471 
1472 				hash_put(hash, buf, TYPE_Nm);
1473 
1474 				if (' ' == start[(int)sz]) {
1475 					start += (int)sz + 1;
1476 					break;
1477 				}
1478 
1479 				assert(',' == start[(int)sz]);
1480 				start += (int)sz + 1;
1481 				while (' ' == *start)
1482 					start++;
1483 			}
1484 
1485 			buf->len = 0;
1486 
1487 			if (sv == start) {
1488 				buf_append(buf, start);
1489 				free(title);
1490 				return(1);
1491 			}
1492 
1493 			while (isspace((unsigned char)*start))
1494 				start++;
1495 
1496 			if (0 == strncmp(start, "-", 1))
1497 				start += 1;
1498 			else if (0 == strncmp(start, "\\-\\-", 4))
1499 				start += 4;
1500 			else if (0 == strncmp(start, "\\-", 2))
1501 				start += 2;
1502 			else if (0 == strncmp(start, "\\(en", 4))
1503 				start += 4;
1504 			else if (0 == strncmp(start, "\\(em", 4))
1505 				start += 4;
1506 
1507 			while (' ' == *start)
1508 				start++;
1509 
1510 			sz = strlen(start) + 1;
1511 			buf_appendb(dbuf, start, sz);
1512 			buf_appendb(buf, start, sz);
1513 
1514 			hash_put(hash, buf, TYPE_Nd);
1515 			free(title);
1516 		}
1517 	}
1518 
1519 	for (n = n->child; n; n = n->next)
1520 		if (pman_node(hash, buf, dbuf, n))
1521 			return(1);
1522 
1523 	return(0);
1524 }
1525 
1526 /*
1527  * Parse a formatted manual page.
1528  * By necessity, this involves rather crude guesswork.
1529  */
1530 static void
1531 pformatted(DB *hash, struct buf *buf,
1532 		struct buf *dbuf, const struct of *of)
1533 {
1534 	FILE		*stream;
1535 	char		*line, *p, *title;
1536 	size_t		 len, plen, titlesz;
1537 
1538 	if (NULL == (stream = fopen(of->fname, "r"))) {
1539 		if (warnings)
1540 			perror(of->fname);
1541 		return;
1542 	}
1543 
1544 	/*
1545 	 * Always use the title derived from the filename up front,
1546 	 * do not even try to find it in the file.  This also makes
1547 	 * sure we don't end up with an orphan index record, even if
1548 	 * the file content turns out to be completely unintelligible.
1549 	 */
1550 
1551 	buf->len = 0;
1552 	buf_append(buf, of->title);
1553 	hash_put(hash, buf, TYPE_Nm);
1554 
1555 	/* Skip to first blank line. */
1556 
1557 	while (NULL != (line = fgetln(stream, &len)))
1558 		if ('\n' == *line)
1559 			break;
1560 
1561 	/*
1562 	 * Assume the first line that is not indented
1563 	 * is the first section header.  Skip to it.
1564 	 */
1565 
1566 	while (NULL != (line = fgetln(stream, &len)))
1567 		if ('\n' != *line && ' ' != *line)
1568 			break;
1569 
1570 	/*
1571 	 * Read up until the next section into a buffer.
1572 	 * Strip the leading and trailing newline from each read line,
1573 	 * appending a trailing space.
1574 	 * Ignore empty (whitespace-only) lines.
1575 	 */
1576 
1577 	titlesz = 0;
1578 	title = NULL;
1579 
1580 	while (NULL != (line = fgetln(stream, &len))) {
1581 		if (' ' != *line || '\n' != line[(int)len - 1])
1582 			break;
1583 		while (len > 0 && isspace((unsigned char)*line)) {
1584 			line++;
1585 			len--;
1586 		}
1587 		if (1 == len)
1588 			continue;
1589 		title = mandoc_realloc(title, titlesz + len);
1590 		memcpy(title + titlesz, line, len);
1591 		titlesz += len;
1592 		title[(int)titlesz - 1] = ' ';
1593 	}
1594 
1595 
1596 	/*
1597 	 * If no page content can be found, or the input line
1598 	 * is already the next section header, or there is no
1599 	 * trailing newline, reuse the page title as the page
1600 	 * description.
1601 	 */
1602 
1603 	if (NULL == title || '\0' == *title) {
1604 		if (warnings)
1605 			fprintf(stderr, "%s: cannot find NAME section\n",
1606 					of->fname);
1607 		buf_appendb(dbuf, buf->cp, buf->size);
1608 		hash_put(hash, buf, TYPE_Nd);
1609 		fclose(stream);
1610 		free(title);
1611 		return;
1612 	}
1613 
1614 	title = mandoc_realloc(title, titlesz + 1);
1615 	title[(int)titlesz] = '\0';
1616 
1617 	/*
1618 	 * Skip to the first dash.
1619 	 * Use the remaining line as the description (no more than 70
1620 	 * bytes).
1621 	 */
1622 
1623 	if (NULL != (p = strstr(title, "- "))) {
1624 		for (p += 2; ' ' == *p || '\b' == *p; p++)
1625 			/* Skip to next word. */ ;
1626 	} else {
1627 		if (warnings)
1628 			fprintf(stderr, "%s: no dash in title line\n",
1629 					of->fname);
1630 		p = title;
1631 	}
1632 
1633 	plen = strlen(p);
1634 
1635 	/* Strip backspace-encoding from line. */
1636 
1637 	while (NULL != (line = memchr(p, '\b', plen))) {
1638 		len = line - p;
1639 		if (0 == len) {
1640 			memmove(line, line + 1, plen--);
1641 			continue;
1642 		}
1643 		memmove(line - 1, line + 1, plen - len);
1644 		plen -= 2;
1645 	}
1646 
1647 	buf_appendb(dbuf, p, plen + 1);
1648 	buf->len = 0;
1649 	buf_appendb(buf, p, plen + 1);
1650 	hash_put(hash, buf, TYPE_Nd);
1651 	fclose(stream);
1652 	free(title);
1653 }
1654 
1655 static void
1656 ofile_argbuild(int argc, char *argv[], struct of **of,
1657 		const char *basedir)
1658 {
1659 	char		 buf[PATH_MAX];
1660 	char		 pbuf[PATH_MAX];
1661 	const char	*sec, *arch, *title;
1662 	char		*relpath, *p;
1663 	int		 i, src_form;
1664 	struct of	*nof;
1665 
1666 	for (i = 0; i < argc; i++) {
1667 		if (NULL == (relpath = realpath(argv[i], pbuf))) {
1668 			perror(argv[i]);
1669 			continue;
1670 		}
1671 		if (NULL != basedir) {
1672 			if (strstr(pbuf, basedir) != pbuf) {
1673 				fprintf(stderr, "%s: file outside "
1674 				    "base directory %s\n",
1675 				    pbuf, basedir);
1676 				continue;
1677 			}
1678 			relpath = pbuf + strlen(basedir);
1679 		}
1680 
1681 		/*
1682 		 * Try to infer the manual section, architecture and
1683 		 * page title from the path, assuming it looks like
1684 		 *   man*[/<arch>]/<title>.<section>   or
1685 		 *   cat<section>[/<arch>]/<title>.0
1686 		 */
1687 
1688 		if (strlcpy(buf, relpath, sizeof(buf)) >= sizeof(buf)) {
1689 			fprintf(stderr, "%s: path too long\n", relpath);
1690 			continue;
1691 		}
1692 		sec = arch = title = "";
1693 		src_form = 0;
1694 		p = strrchr(buf, '\0');
1695 		while (p-- > buf) {
1696 			if ('\0' == *sec && '.' == *p) {
1697 				sec = p + 1;
1698 				*p = '\0';
1699 				if ('0' == *sec)
1700 					src_form |= MANDOC_FORM;
1701 				else if ('1' <= *sec && '9' >= *sec)
1702 					src_form |= MANDOC_SRC;
1703 				continue;
1704 			}
1705 			if ('/' != *p)
1706 				continue;
1707 			if ('\0' == *title) {
1708 				title = p + 1;
1709 				*p = '\0';
1710 				continue;
1711 			}
1712 			if (0 == strncmp("man", p + 1, 3))
1713 				src_form |= MANDOC_SRC;
1714 			else if (0 == strncmp("cat", p + 1, 3))
1715 				src_form |= MANDOC_FORM;
1716 			else
1717 				arch = p + 1;
1718 			break;
1719 		}
1720 		if ('\0' == *title) {
1721 			if (warnings)
1722 				fprintf(stderr,
1723 				    "%s: cannot deduce title "
1724 				    "from filename\n",
1725 				    relpath);
1726 			title = buf;
1727 		}
1728 
1729 		/*
1730 		 * Build the file structure.
1731 		 */
1732 
1733 		nof = mandoc_calloc(1, sizeof(struct of));
1734 		nof->fname = mandoc_strdup(relpath);
1735 		nof->sec = mandoc_strdup(sec);
1736 		nof->arch = mandoc_strdup(arch);
1737 		nof->title = mandoc_strdup(title);
1738 		nof->src_form = src_form;
1739 
1740 		/*
1741 		 * Add the structure to the list.
1742 		 */
1743 
1744 		if (NULL == *of) {
1745 			*of = nof;
1746 			(*of)->first = nof;
1747 		} else {
1748 			nof->first = (*of)->first;
1749 			(*of)->next = nof;
1750 			*of = nof;
1751 		}
1752 	}
1753 }
1754 
1755 /*
1756  * Recursively build up a list of files to parse.
1757  * We use this instead of ftw() and so on because I don't want global
1758  * variables hanging around.
1759  * This ignores the mandoc.db and mandoc.index files, but assumes that
1760  * everything else is a manual.
1761  * Pass in a pointer to a NULL structure for the first invocation.
1762  */
1763 static void
1764 ofile_dirbuild(const char *dir, const char* psec, const char *parch,
1765 		int p_src_form, struct of **of)
1766 {
1767 	char		 buf[PATH_MAX];
1768 #if defined(__sun)
1769 	struct stat	 sb;
1770 #endif
1771 	size_t		 sz;
1772 	DIR		*d;
1773 	const char	*fn, *sec, *arch;
1774 	char		*p, *q, *suffix;
1775 	struct of	*nof;
1776 	struct dirent	*dp;
1777 	int		 src_form;
1778 
1779 	if (NULL == (d = opendir(dir))) {
1780 		if (warnings)
1781 			perror(dir);
1782 		return;
1783 	}
1784 
1785 	while (NULL != (dp = readdir(d))) {
1786 		fn = dp->d_name;
1787 
1788 		if ('.' == *fn)
1789 			continue;
1790 
1791 		src_form = p_src_form;
1792 
1793 #if defined(__sun)
1794 		stat(dp->d_name, &sb);
1795 		if (S_IFDIR & sb.st_mode) {
1796 #else
1797 		if (DT_DIR == dp->d_type) {
1798 #endif
1799 			sec = psec;
1800 			arch = parch;
1801 
1802 			/*
1803 			 * By default, only use directories called:
1804 			 *   man<section>/[<arch>/]   or
1805 			 *   cat<section>/[<arch>/]
1806 			 */
1807 
1808 			if ('\0' == *sec) {
1809 				if(0 == strncmp("man", fn, 3)) {
1810 					src_form |= MANDOC_SRC;
1811 					sec = fn + 3;
1812 				} else if (0 == strncmp("cat", fn, 3)) {
1813 					src_form |= MANDOC_FORM;
1814 					sec = fn + 3;
1815 				} else {
1816 					if (warnings) fprintf(stderr,
1817 					    "%s/%s: bad section\n",
1818 					    dir, fn);
1819 					if (use_all)
1820 						sec = fn;
1821 					else
1822 						continue;
1823 				}
1824 			} else if ('\0' == *arch) {
1825 				if (NULL != strchr(fn, '.')) {
1826 					if (warnings) fprintf(stderr,
1827 					    "%s/%s: bad architecture\n",
1828 					    dir, fn);
1829 					if (0 == use_all)
1830 						continue;
1831 				}
1832 				arch = fn;
1833 			} else {
1834 				if (warnings) fprintf(stderr, "%s/%s: "
1835 				    "excessive subdirectory\n", dir, fn);
1836 				if (0 == use_all)
1837 					continue;
1838 			}
1839 
1840 			buf[0] = '\0';
1841 			strlcat(buf, dir, PATH_MAX);
1842 			strlcat(buf, "/", PATH_MAX);
1843 			sz = strlcat(buf, fn, PATH_MAX);
1844 
1845 			if (PATH_MAX <= sz) {
1846 				if (warnings) fprintf(stderr, "%s/%s: "
1847 				    "path too long\n", dir, fn);
1848 				continue;
1849 			}
1850 
1851 			ofile_dirbuild(buf, sec, arch, src_form, of);
1852 			continue;
1853 		}
1854 
1855 #if defined(__sun)
1856 		if (0 == S_IFREG & sb.st_mode) {
1857 #else
1858 		if (DT_REG != dp->d_type) {
1859 #endif
1860 			if (warnings)
1861 				fprintf(stderr,
1862 				    "%s/%s: not a regular file\n",
1863 				    dir, fn);
1864 			continue;
1865 		}
1866 		if (!strcmp(MANDOC_DB, fn) || !strcmp(MANDOC_IDX, fn))
1867 			continue;
1868 		if ('\0' == *psec) {
1869 			if (warnings)
1870 				fprintf(stderr,
1871 				    "%s/%s: file outside section\n",
1872 				    dir, fn);
1873 			if (0 == use_all)
1874 				continue;
1875 		}
1876 
1877 		/*
1878 		 * By default, skip files where the file name suffix
1879 		 * does not agree with the section directory
1880 		 * they are located in.
1881 		 */
1882 
1883 		suffix = strrchr(fn, '.');
1884 		if (NULL == suffix) {
1885 			if (warnings)
1886 				fprintf(stderr,
1887 				    "%s/%s: no filename suffix\n",
1888 				    dir, fn);
1889 			if (0 == use_all)
1890 				continue;
1891 		} else if ((MANDOC_SRC & src_form &&
1892 				strcmp(suffix + 1, psec)) ||
1893 			    (MANDOC_FORM & src_form &&
1894 				strcmp(suffix + 1, "0"))) {
1895 			if (warnings)
1896 				fprintf(stderr,
1897 				    "%s/%s: wrong filename suffix\n",
1898 				    dir, fn);
1899 			if (0 == use_all)
1900 				continue;
1901 			if ('0' == suffix[1])
1902 				src_form |= MANDOC_FORM;
1903 			else if ('1' <= suffix[1] && '9' >= suffix[1])
1904 				src_form |= MANDOC_SRC;
1905 		}
1906 
1907 		/*
1908 		 * Skip formatted manuals if a source version is
1909 		 * available.  Ignore the age: it is very unlikely
1910 		 * that people install newer formatted base manuals
1911 		 * when they used to have source manuals before,
1912 		 * and in ports, old manuals get removed on update.
1913 		 */
1914 		if (0 == use_all && MANDOC_FORM & src_form &&
1915 				'\0' != *psec) {
1916 			buf[0] = '\0';
1917 			strlcat(buf, dir, PATH_MAX);
1918 			p = strrchr(buf, '/');
1919 			if ('\0' != *parch && NULL != p)
1920 				for (p--; p > buf; p--)
1921 					if ('/' == *p)
1922 						break;
1923 			if (NULL == p)
1924 				p = buf;
1925 			else
1926 				p++;
1927 			if (0 == strncmp("cat", p, 3))
1928 				memcpy(p, "man", 3);
1929 			strlcat(buf, "/", PATH_MAX);
1930 			sz = strlcat(buf, fn, PATH_MAX);
1931 			if (sz >= PATH_MAX) {
1932 				if (warnings) fprintf(stderr,
1933 				    "%s/%s: path too long\n",
1934 				    dir, fn);
1935 				continue;
1936 			}
1937 			q = strrchr(buf, '.');
1938 			if (NULL != q && p < q++) {
1939 				*q = '\0';
1940 				sz = strlcat(buf, psec, PATH_MAX);
1941 				if (sz >= PATH_MAX) {
1942 					if (warnings) fprintf(stderr,
1943 					    "%s/%s: path too long\n",
1944 					    dir, fn);
1945 					continue;
1946 				}
1947 				if (0 == access(buf, R_OK))
1948 					continue;
1949 			}
1950 		}
1951 
1952 		buf[0] = '\0';
1953 		assert('.' == dir[0]);
1954 		if ('/' == dir[1]) {
1955 			strlcat(buf, dir + 2, PATH_MAX);
1956 			strlcat(buf, "/", PATH_MAX);
1957 		}
1958 		sz = strlcat(buf, fn, PATH_MAX);
1959 		if (sz >= PATH_MAX) {
1960 			if (warnings) fprintf(stderr,
1961 			    "%s/%s: path too long\n", dir, fn);
1962 			continue;
1963 		}
1964 
1965 		nof = mandoc_calloc(1, sizeof(struct of));
1966 		nof->fname = mandoc_strdup(buf);
1967 		nof->sec = mandoc_strdup(psec);
1968 		nof->arch = mandoc_strdup(parch);
1969 		nof->src_form = src_form;
1970 
1971 		/*
1972 		 * Remember the file name without the extension,
1973 		 * to be used as the page title in the database.
1974 		 */
1975 
1976 		if (NULL != suffix)
1977 			*suffix = '\0';
1978 		nof->title = mandoc_strdup(fn);
1979 
1980 		/*
1981 		 * Add the structure to the list.
1982 		 */
1983 
1984 		if (NULL == *of) {
1985 			*of = nof;
1986 			(*of)->first = nof;
1987 		} else {
1988 			nof->first = (*of)->first;
1989 			(*of)->next = nof;
1990 			*of = nof;
1991 		}
1992 	}
1993 
1994 	closedir(d);
1995 }
1996 
1997 static void
1998 ofile_free(struct of *of)
1999 {
2000 	struct of	*nof;
2001 
2002 	if (NULL != of)
2003 		of = of->first;
2004 
2005 	while (NULL != of) {
2006 		nof = of->next;
2007 		free(of->fname);
2008 		free(of->sec);
2009 		free(of->arch);
2010 		free(of->title);
2011 		free(of);
2012 		of = nof;
2013 	}
2014 }
2015