1 /* $OpenBSD: dbm.c,v 1.5 2019/07/01 22:43:03 schwarze Exp $ */
2 /*
3 * Copyright (c) 2016 Ingo Schwarze <schwarze@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 *
17 * Map-based version of the mandoc database, for read-only access.
18 * The interface is defined in "dbm.h".
19 */
20 #include <assert.h>
21 #include <endian.h>
22 #include <err.h>
23 #include <errno.h>
24 #include <regex.h>
25 #include <stdint.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29
30 #include "mansearch.h"
31 #include "dbm_map.h"
32 #include "dbm.h"
33
34 struct macro {
35 int32_t value;
36 int32_t pages;
37 };
38
39 struct page {
40 int32_t name;
41 int32_t sect;
42 int32_t arch;
43 int32_t desc;
44 int32_t file;
45 };
46
47 enum iter {
48 ITER_NONE = 0,
49 ITER_NAME,
50 ITER_SECT,
51 ITER_ARCH,
52 ITER_DESC,
53 ITER_MACRO
54 };
55
56 static struct macro *macros[MACRO_MAX];
57 static int32_t nvals[MACRO_MAX];
58 static struct page *pages;
59 static int32_t npages;
60 static enum iter iteration;
61
62 static struct dbm_res page_bytitle(enum iter, const struct dbm_match *);
63 static struct dbm_res page_byarch(const struct dbm_match *);
64 static struct dbm_res page_bymacro(int32_t, const struct dbm_match *);
65 static char *macro_bypage(int32_t, int32_t);
66
67
68 /*** top level functions **********************************************/
69
70 /*
71 * Open a disk-based mandoc database for read-only access.
72 * Map the pages and macros[] arrays.
73 * Return 0 on success. Return -1 and set errno on failure.
74 */
75 int
dbm_open(const char * fname)76 dbm_open(const char *fname)
77 {
78 const int32_t *mp, *ep;
79 int32_t im;
80
81 if (dbm_map(fname) == -1)
82 return -1;
83
84 if ((npages = be32toh(*dbm_getint(4))) < 0) {
85 warnx("dbm_open(%s): Invalid number of pages: %d",
86 fname, npages);
87 goto fail;
88 }
89 pages = (struct page *)dbm_getint(5);
90
91 if ((mp = dbm_get(*dbm_getint(2))) == NULL) {
92 warnx("dbm_open(%s): Invalid offset of macros array", fname);
93 goto fail;
94 }
95 if (be32toh(*mp) != MACRO_MAX) {
96 warnx("dbm_open(%s): Invalid number of macros: %d",
97 fname, be32toh(*mp));
98 goto fail;
99 }
100 for (im = 0; im < MACRO_MAX; im++) {
101 if ((ep = dbm_get(*++mp)) == NULL) {
102 warnx("dbm_open(%s): Invalid offset of macro %d",
103 fname, im);
104 goto fail;
105 }
106 nvals[im] = be32toh(*ep);
107 macros[im] = (struct macro *)++ep;
108 }
109 return 0;
110
111 fail:
112 dbm_unmap();
113 errno = EFTYPE;
114 return -1;
115 }
116
117 void
dbm_close(void)118 dbm_close(void)
119 {
120 dbm_unmap();
121 }
122
123
124 /*** functions for handling pages *************************************/
125
126 int32_t
dbm_page_count(void)127 dbm_page_count(void)
128 {
129 return npages;
130 }
131
132 /*
133 * Give the caller pointers to the data for one manual page.
134 */
135 struct dbm_page *
dbm_page_get(int32_t ip)136 dbm_page_get(int32_t ip)
137 {
138 static struct dbm_page res;
139
140 assert(ip >= 0);
141 assert(ip < npages);
142 res.name = dbm_get(pages[ip].name);
143 if (res.name == NULL)
144 res.name = "(NULL)\0";
145 res.sect = dbm_get(pages[ip].sect);
146 if (res.sect == NULL)
147 res.sect = "(NULL)\0";
148 res.arch = pages[ip].arch ? dbm_get(pages[ip].arch) : NULL;
149 res.desc = dbm_get(pages[ip].desc);
150 if (res.desc == NULL)
151 res.desc = "(NULL)";
152 res.file = dbm_get(pages[ip].file);
153 if (res.file == NULL)
154 res.file = " (NULL)\0";
155 res.addr = dbm_addr(pages + ip);
156 return &res;
157 }
158
159 /*
160 * Functions to start filtered iterations over manual pages.
161 */
162 void
dbm_page_byname(const struct dbm_match * match)163 dbm_page_byname(const struct dbm_match *match)
164 {
165 assert(match != NULL);
166 page_bytitle(ITER_NAME, match);
167 }
168
169 void
dbm_page_bysect(const struct dbm_match * match)170 dbm_page_bysect(const struct dbm_match *match)
171 {
172 assert(match != NULL);
173 page_bytitle(ITER_SECT, match);
174 }
175
176 void
dbm_page_byarch(const struct dbm_match * match)177 dbm_page_byarch(const struct dbm_match *match)
178 {
179 assert(match != NULL);
180 page_byarch(match);
181 }
182
183 void
dbm_page_bydesc(const struct dbm_match * match)184 dbm_page_bydesc(const struct dbm_match *match)
185 {
186 assert(match != NULL);
187 page_bytitle(ITER_DESC, match);
188 }
189
190 void
dbm_page_bymacro(int32_t im,const struct dbm_match * match)191 dbm_page_bymacro(int32_t im, const struct dbm_match *match)
192 {
193 assert(im >= 0);
194 assert(im < MACRO_MAX);
195 assert(match != NULL);
196 page_bymacro(im, match);
197 }
198
199 /*
200 * Return the number of the next manual page in the current iteration.
201 */
202 struct dbm_res
dbm_page_next(void)203 dbm_page_next(void)
204 {
205 struct dbm_res res = {-1, 0};
206
207 switch(iteration) {
208 case ITER_NONE:
209 return res;
210 case ITER_ARCH:
211 return page_byarch(NULL);
212 case ITER_MACRO:
213 return page_bymacro(0, NULL);
214 default:
215 return page_bytitle(iteration, NULL);
216 }
217 }
218
219 /*
220 * Functions implementing the iteration over manual pages.
221 */
222 static struct dbm_res
page_bytitle(enum iter arg_iter,const struct dbm_match * arg_match)223 page_bytitle(enum iter arg_iter, const struct dbm_match *arg_match)
224 {
225 static const struct dbm_match *match;
226 static const char *cp;
227 static int32_t ip;
228 struct dbm_res res = {-1, 0};
229
230 assert(arg_iter == ITER_NAME || arg_iter == ITER_DESC ||
231 arg_iter == ITER_SECT);
232
233 /* Initialize for a new iteration. */
234
235 if (arg_match != NULL) {
236 iteration = arg_iter;
237 match = arg_match;
238 switch (iteration) {
239 case ITER_NAME:
240 cp = dbm_get(pages[0].name);
241 break;
242 case ITER_SECT:
243 cp = dbm_get(pages[0].sect);
244 break;
245 case ITER_DESC:
246 cp = dbm_get(pages[0].desc);
247 break;
248 default:
249 abort();
250 }
251 if (cp == NULL) {
252 iteration = ITER_NONE;
253 match = NULL;
254 cp = NULL;
255 ip = npages;
256 } else
257 ip = 0;
258 return res;
259 }
260
261 /* Search for a name. */
262
263 while (ip < npages) {
264 if (iteration == ITER_NAME)
265 cp++;
266 if (dbm_match(match, cp))
267 break;
268 cp = strchr(cp, '\0') + 1;
269 if (iteration == ITER_DESC)
270 ip++;
271 else if (*cp == '\0') {
272 cp++;
273 ip++;
274 }
275 }
276
277 /* Reached the end without a match. */
278
279 if (ip == npages) {
280 iteration = ITER_NONE;
281 match = NULL;
282 cp = NULL;
283 return res;
284 }
285
286 /* Found a match; save the quality for later retrieval. */
287
288 res.page = ip;
289 res.bits = iteration == ITER_NAME ? cp[-1] : 0;
290
291 /* Skip the remaining names of this page. */
292
293 if (++ip < npages) {
294 do {
295 cp++;
296 } while (cp[-1] != '\0' ||
297 (iteration != ITER_DESC && cp[-2] != '\0'));
298 }
299 return res;
300 }
301
302 static struct dbm_res
page_byarch(const struct dbm_match * arg_match)303 page_byarch(const struct dbm_match *arg_match)
304 {
305 static const struct dbm_match *match;
306 struct dbm_res res = {-1, 0};
307 static int32_t ip;
308 const char *cp;
309
310 /* Initialize for a new iteration. */
311
312 if (arg_match != NULL) {
313 iteration = ITER_ARCH;
314 match = arg_match;
315 ip = 0;
316 return res;
317 }
318
319 /* Search for an architecture. */
320
321 for ( ; ip < npages; ip++)
322 if (pages[ip].arch)
323 for (cp = dbm_get(pages[ip].arch);
324 *cp != '\0';
325 cp = strchr(cp, '\0') + 1)
326 if (dbm_match(match, cp)) {
327 res.page = ip++;
328 return res;
329 }
330
331 /* Reached the end without a match. */
332
333 iteration = ITER_NONE;
334 match = NULL;
335 return res;
336 }
337
338 static struct dbm_res
page_bymacro(int32_t arg_im,const struct dbm_match * arg_match)339 page_bymacro(int32_t arg_im, const struct dbm_match *arg_match)
340 {
341 static const struct dbm_match *match;
342 static const int32_t *pp;
343 static const char *cp;
344 static int32_t im, iv;
345 struct dbm_res res = {-1, 0};
346
347 assert(im >= 0);
348 assert(im < MACRO_MAX);
349
350 /* Initialize for a new iteration. */
351
352 if (arg_match != NULL) {
353 iteration = ITER_MACRO;
354 match = arg_match;
355 im = arg_im;
356 cp = nvals[im] ? dbm_get(macros[im]->value) : NULL;
357 pp = NULL;
358 iv = -1;
359 return res;
360 }
361 if (iteration != ITER_MACRO)
362 return res;
363
364 /* Find the next matching macro value. */
365
366 while (pp == NULL || *pp == 0) {
367 if (++iv == nvals[im]) {
368 iteration = ITER_NONE;
369 return res;
370 }
371 if (iv)
372 cp = strchr(cp, '\0') + 1;
373 if (dbm_match(match, cp))
374 pp = dbm_get(macros[im][iv].pages);
375 }
376
377 /* Found a matching page. */
378
379 res.page = (struct page *)dbm_get(*pp++) - pages;
380 return res;
381 }
382
383
384 /*** functions for handling macros ************************************/
385
386 int32_t
dbm_macro_count(int32_t im)387 dbm_macro_count(int32_t im)
388 {
389 assert(im >= 0);
390 assert(im < MACRO_MAX);
391 return nvals[im];
392 }
393
394 struct dbm_macro *
dbm_macro_get(int32_t im,int32_t iv)395 dbm_macro_get(int32_t im, int32_t iv)
396 {
397 static struct dbm_macro macro;
398
399 assert(im >= 0);
400 assert(im < MACRO_MAX);
401 assert(iv >= 0);
402 assert(iv < nvals[im]);
403 macro.value = dbm_get(macros[im][iv].value);
404 macro.pp = dbm_get(macros[im][iv].pages);
405 return ¯o;
406 }
407
408 /*
409 * Filtered iteration over macro entries.
410 */
411 void
dbm_macro_bypage(int32_t im,int32_t ip)412 dbm_macro_bypage(int32_t im, int32_t ip)
413 {
414 assert(im >= 0);
415 assert(im < MACRO_MAX);
416 assert(ip != 0);
417 macro_bypage(im, ip);
418 }
419
420 char *
dbm_macro_next(void)421 dbm_macro_next(void)
422 {
423 return macro_bypage(MACRO_MAX, 0);
424 }
425
426 static char *
macro_bypage(int32_t arg_im,int32_t arg_ip)427 macro_bypage(int32_t arg_im, int32_t arg_ip)
428 {
429 static const int32_t *pp;
430 static int32_t im, ip, iv;
431
432 /* Initialize for a new iteration. */
433
434 if (arg_im < MACRO_MAX && arg_ip != 0) {
435 im = arg_im;
436 ip = arg_ip;
437 pp = dbm_get(macros[im]->pages);
438 iv = 0;
439 return NULL;
440 }
441 if (im >= MACRO_MAX)
442 return NULL;
443
444 /* Search for the next value. */
445
446 while (iv < nvals[im]) {
447 if (*pp == ip)
448 break;
449 if (*pp == 0)
450 iv++;
451 pp++;
452 }
453
454 /* Reached the end without a match. */
455
456 if (iv == nvals[im]) {
457 im = MACRO_MAX;
458 ip = 0;
459 pp = NULL;
460 return NULL;
461 }
462
463 /* Found a match; skip the remaining pages of this entry. */
464
465 if (++iv < nvals[im])
466 while (*pp++ != 0)
467 continue;
468
469 return dbm_get(macros[im][iv - 1].value);
470 }
471