xref: /openbsd/usr.bin/mandoc/dbm.c (revision c8dfc550)
1 /*	$OpenBSD: dbm.c,v 1.5 2019/07/01 22:43:03 schwarze Exp $ */
2 /*
3  * Copyright (c) 2016 Ingo Schwarze <schwarze@openbsd.org>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  *
17  * Map-based version of the mandoc database, for read-only access.
18  * The interface is defined in "dbm.h".
19  */
20 #include <assert.h>
21 #include <endian.h>
22 #include <err.h>
23 #include <errno.h>
24 #include <regex.h>
25 #include <stdint.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 
30 #include "mansearch.h"
31 #include "dbm_map.h"
32 #include "dbm.h"
33 
34 struct macro {
35 	int32_t	value;
36 	int32_t	pages;
37 };
38 
39 struct page {
40 	int32_t	name;
41 	int32_t	sect;
42 	int32_t	arch;
43 	int32_t	desc;
44 	int32_t	file;
45 };
46 
47 enum iter {
48 	ITER_NONE = 0,
49 	ITER_NAME,
50 	ITER_SECT,
51 	ITER_ARCH,
52 	ITER_DESC,
53 	ITER_MACRO
54 };
55 
56 static struct macro	*macros[MACRO_MAX];
57 static int32_t		 nvals[MACRO_MAX];
58 static struct page	*pages;
59 static int32_t		 npages;
60 static enum iter	 iteration;
61 
62 static struct dbm_res	 page_bytitle(enum iter, const struct dbm_match *);
63 static struct dbm_res	 page_byarch(const struct dbm_match *);
64 static struct dbm_res	 page_bymacro(int32_t, const struct dbm_match *);
65 static char		*macro_bypage(int32_t, int32_t);
66 
67 
68 /*** top level functions **********************************************/
69 
70 /*
71  * Open a disk-based mandoc database for read-only access.
72  * Map the pages and macros[] arrays.
73  * Return 0 on success.  Return -1 and set errno on failure.
74  */
75 int
dbm_open(const char * fname)76 dbm_open(const char *fname)
77 {
78 	const int32_t	*mp, *ep;
79 	int32_t		 im;
80 
81 	if (dbm_map(fname) == -1)
82 		return -1;
83 
84 	if ((npages = be32toh(*dbm_getint(4))) < 0) {
85 		warnx("dbm_open(%s): Invalid number of pages: %d",
86 		    fname, npages);
87 		goto fail;
88 	}
89 	pages = (struct page *)dbm_getint(5);
90 
91 	if ((mp = dbm_get(*dbm_getint(2))) == NULL) {
92 		warnx("dbm_open(%s): Invalid offset of macros array", fname);
93 		goto fail;
94 	}
95 	if (be32toh(*mp) != MACRO_MAX) {
96 		warnx("dbm_open(%s): Invalid number of macros: %d",
97 		    fname, be32toh(*mp));
98 		goto fail;
99 	}
100 	for (im = 0; im < MACRO_MAX; im++) {
101 		if ((ep = dbm_get(*++mp)) == NULL) {
102 			warnx("dbm_open(%s): Invalid offset of macro %d",
103 			    fname, im);
104 			goto fail;
105 		}
106 		nvals[im] = be32toh(*ep);
107 		macros[im] = (struct macro *)++ep;
108 	}
109 	return 0;
110 
111 fail:
112 	dbm_unmap();
113 	errno = EFTYPE;
114 	return -1;
115 }
116 
117 void
dbm_close(void)118 dbm_close(void)
119 {
120 	dbm_unmap();
121 }
122 
123 
124 /*** functions for handling pages *************************************/
125 
126 int32_t
dbm_page_count(void)127 dbm_page_count(void)
128 {
129 	return npages;
130 }
131 
132 /*
133  * Give the caller pointers to the data for one manual page.
134  */
135 struct dbm_page *
dbm_page_get(int32_t ip)136 dbm_page_get(int32_t ip)
137 {
138 	static struct dbm_page	 res;
139 
140 	assert(ip >= 0);
141 	assert(ip < npages);
142 	res.name = dbm_get(pages[ip].name);
143 	if (res.name == NULL)
144 		res.name = "(NULL)\0";
145 	res.sect = dbm_get(pages[ip].sect);
146 	if (res.sect == NULL)
147 		res.sect = "(NULL)\0";
148 	res.arch = pages[ip].arch ? dbm_get(pages[ip].arch) : NULL;
149 	res.desc = dbm_get(pages[ip].desc);
150 	if (res.desc == NULL)
151 		res.desc = "(NULL)";
152 	res.file = dbm_get(pages[ip].file);
153 	if (res.file == NULL)
154 		res.file = " (NULL)\0";
155 	res.addr = dbm_addr(pages + ip);
156 	return &res;
157 }
158 
159 /*
160  * Functions to start filtered iterations over manual pages.
161  */
162 void
dbm_page_byname(const struct dbm_match * match)163 dbm_page_byname(const struct dbm_match *match)
164 {
165 	assert(match != NULL);
166 	page_bytitle(ITER_NAME, match);
167 }
168 
169 void
dbm_page_bysect(const struct dbm_match * match)170 dbm_page_bysect(const struct dbm_match *match)
171 {
172 	assert(match != NULL);
173 	page_bytitle(ITER_SECT, match);
174 }
175 
176 void
dbm_page_byarch(const struct dbm_match * match)177 dbm_page_byarch(const struct dbm_match *match)
178 {
179 	assert(match != NULL);
180 	page_byarch(match);
181 }
182 
183 void
dbm_page_bydesc(const struct dbm_match * match)184 dbm_page_bydesc(const struct dbm_match *match)
185 {
186 	assert(match != NULL);
187 	page_bytitle(ITER_DESC, match);
188 }
189 
190 void
dbm_page_bymacro(int32_t im,const struct dbm_match * match)191 dbm_page_bymacro(int32_t im, const struct dbm_match *match)
192 {
193 	assert(im >= 0);
194 	assert(im < MACRO_MAX);
195 	assert(match != NULL);
196 	page_bymacro(im, match);
197 }
198 
199 /*
200  * Return the number of the next manual page in the current iteration.
201  */
202 struct dbm_res
dbm_page_next(void)203 dbm_page_next(void)
204 {
205 	struct dbm_res			 res = {-1, 0};
206 
207 	switch(iteration) {
208 	case ITER_NONE:
209 		return res;
210 	case ITER_ARCH:
211 		return page_byarch(NULL);
212 	case ITER_MACRO:
213 		return page_bymacro(0, NULL);
214 	default:
215 		return page_bytitle(iteration, NULL);
216 	}
217 }
218 
219 /*
220  * Functions implementing the iteration over manual pages.
221  */
222 static struct dbm_res
page_bytitle(enum iter arg_iter,const struct dbm_match * arg_match)223 page_bytitle(enum iter arg_iter, const struct dbm_match *arg_match)
224 {
225 	static const struct dbm_match	*match;
226 	static const char		*cp;
227 	static int32_t			 ip;
228 	struct dbm_res			 res = {-1, 0};
229 
230 	assert(arg_iter == ITER_NAME || arg_iter == ITER_DESC ||
231 	    arg_iter == ITER_SECT);
232 
233 	/* Initialize for a new iteration. */
234 
235 	if (arg_match != NULL) {
236 		iteration = arg_iter;
237 		match = arg_match;
238 		switch (iteration) {
239 		case ITER_NAME:
240 			cp = dbm_get(pages[0].name);
241 			break;
242 		case ITER_SECT:
243 			cp = dbm_get(pages[0].sect);
244 			break;
245 		case ITER_DESC:
246 			cp = dbm_get(pages[0].desc);
247 			break;
248 		default:
249 			abort();
250 		}
251 		if (cp == NULL) {
252 			iteration = ITER_NONE;
253 			match = NULL;
254 			cp = NULL;
255 			ip = npages;
256 		} else
257 			ip = 0;
258 		return res;
259 	}
260 
261 	/* Search for a name. */
262 
263 	while (ip < npages) {
264 		if (iteration == ITER_NAME)
265 			cp++;
266 		if (dbm_match(match, cp))
267 			break;
268 		cp = strchr(cp, '\0') + 1;
269 		if (iteration == ITER_DESC)
270 			ip++;
271 		else if (*cp == '\0') {
272 			cp++;
273 			ip++;
274 		}
275 	}
276 
277 	/* Reached the end without a match. */
278 
279 	if (ip == npages) {
280 		iteration = ITER_NONE;
281 		match = NULL;
282 		cp = NULL;
283 		return res;
284 	}
285 
286 	/* Found a match; save the quality for later retrieval. */
287 
288 	res.page = ip;
289 	res.bits = iteration == ITER_NAME ? cp[-1] : 0;
290 
291 	/* Skip the remaining names of this page. */
292 
293 	if (++ip < npages) {
294 		do {
295 			cp++;
296 		} while (cp[-1] != '\0' ||
297 		    (iteration != ITER_DESC && cp[-2] != '\0'));
298 	}
299 	return res;
300 }
301 
302 static struct dbm_res
page_byarch(const struct dbm_match * arg_match)303 page_byarch(const struct dbm_match *arg_match)
304 {
305 	static const struct dbm_match	*match;
306 	struct dbm_res			 res = {-1, 0};
307 	static int32_t			 ip;
308 	const char			*cp;
309 
310 	/* Initialize for a new iteration. */
311 
312 	if (arg_match != NULL) {
313 		iteration = ITER_ARCH;
314 		match = arg_match;
315 		ip = 0;
316 		return res;
317 	}
318 
319 	/* Search for an architecture. */
320 
321 	for ( ; ip < npages; ip++)
322 		if (pages[ip].arch)
323 			for (cp = dbm_get(pages[ip].arch);
324 			    *cp != '\0';
325 			    cp = strchr(cp, '\0') + 1)
326 				if (dbm_match(match, cp)) {
327 					res.page = ip++;
328 					return res;
329 				}
330 
331 	/* Reached the end without a match. */
332 
333 	iteration = ITER_NONE;
334 	match = NULL;
335 	return res;
336 }
337 
338 static struct dbm_res
page_bymacro(int32_t arg_im,const struct dbm_match * arg_match)339 page_bymacro(int32_t arg_im, const struct dbm_match *arg_match)
340 {
341 	static const struct dbm_match	*match;
342 	static const int32_t		*pp;
343 	static const char		*cp;
344 	static int32_t			 im, iv;
345 	struct dbm_res			 res = {-1, 0};
346 
347 	assert(im >= 0);
348 	assert(im < MACRO_MAX);
349 
350 	/* Initialize for a new iteration. */
351 
352 	if (arg_match != NULL) {
353 		iteration = ITER_MACRO;
354 		match = arg_match;
355 		im = arg_im;
356 		cp = nvals[im] ? dbm_get(macros[im]->value) : NULL;
357 		pp = NULL;
358 		iv = -1;
359 		return res;
360 	}
361 	if (iteration != ITER_MACRO)
362 		return res;
363 
364 	/* Find the next matching macro value. */
365 
366 	while (pp == NULL || *pp == 0) {
367 		if (++iv == nvals[im]) {
368 			iteration = ITER_NONE;
369 			return res;
370 		}
371 		if (iv)
372 			cp = strchr(cp, '\0') + 1;
373 		if (dbm_match(match, cp))
374 			pp = dbm_get(macros[im][iv].pages);
375 	}
376 
377 	/* Found a matching page. */
378 
379 	res.page = (struct page *)dbm_get(*pp++) - pages;
380 	return res;
381 }
382 
383 
384 /*** functions for handling macros ************************************/
385 
386 int32_t
dbm_macro_count(int32_t im)387 dbm_macro_count(int32_t im)
388 {
389 	assert(im >= 0);
390 	assert(im < MACRO_MAX);
391 	return nvals[im];
392 }
393 
394 struct dbm_macro *
dbm_macro_get(int32_t im,int32_t iv)395 dbm_macro_get(int32_t im, int32_t iv)
396 {
397 	static struct dbm_macro macro;
398 
399 	assert(im >= 0);
400 	assert(im < MACRO_MAX);
401 	assert(iv >= 0);
402 	assert(iv < nvals[im]);
403 	macro.value = dbm_get(macros[im][iv].value);
404 	macro.pp = dbm_get(macros[im][iv].pages);
405 	return &macro;
406 }
407 
408 /*
409  * Filtered iteration over macro entries.
410  */
411 void
dbm_macro_bypage(int32_t im,int32_t ip)412 dbm_macro_bypage(int32_t im, int32_t ip)
413 {
414 	assert(im >= 0);
415 	assert(im < MACRO_MAX);
416 	assert(ip != 0);
417 	macro_bypage(im, ip);
418 }
419 
420 char *
dbm_macro_next(void)421 dbm_macro_next(void)
422 {
423 	return macro_bypage(MACRO_MAX, 0);
424 }
425 
426 static char *
macro_bypage(int32_t arg_im,int32_t arg_ip)427 macro_bypage(int32_t arg_im, int32_t arg_ip)
428 {
429 	static const int32_t	*pp;
430 	static int32_t		 im, ip, iv;
431 
432 	/* Initialize for a new iteration. */
433 
434 	if (arg_im < MACRO_MAX && arg_ip != 0) {
435 		im = arg_im;
436 		ip = arg_ip;
437 		pp = dbm_get(macros[im]->pages);
438 		iv = 0;
439 		return NULL;
440 	}
441 	if (im >= MACRO_MAX)
442 		return NULL;
443 
444 	/* Search for the next value. */
445 
446 	while (iv < nvals[im]) {
447 		if (*pp == ip)
448 			break;
449 		if (*pp == 0)
450 			iv++;
451 		pp++;
452 	}
453 
454 	/* Reached the end without a match. */
455 
456 	if (iv == nvals[im]) {
457 		im = MACRO_MAX;
458 		ip = 0;
459 		pp = NULL;
460 		return NULL;
461 	}
462 
463 	/* Found a match; skip the remaining pages of this entry. */
464 
465 	if (++iv < nvals[im])
466 		while (*pp++ != 0)
467 			continue;
468 
469 	return dbm_get(macros[im][iv - 1].value);
470 }
471