xref: /openbsd/usr.bin/mandoc/dba.c (revision 79a81166)
1 /*	$OpenBSD: dba.c,v 1.7 2017/02/09 18:26:17 schwarze Exp $ */
2 /*
3  * Copyright (c) 2016, 2017 Ingo Schwarze <schwarze@openbsd.org>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  *
17  * Allocation-based version of the mandoc database, for read-write access.
18  * The interface is defined in "dba.h".
19  */
20 #include <sys/types.h>
21 #include <endian.h>
22 #include <errno.h>
23 #include <stddef.h>
24 #include <stdint.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <unistd.h>
28 
29 #include "mandoc_aux.h"
30 #include "mandoc_ohash.h"
31 #include "mansearch.h"
32 #include "dba_write.h"
33 #include "dba_array.h"
34 #include "dba.h"
35 
36 struct macro_entry {
37 	struct dba_array	*pages;
38 	char			 value[];
39 };
40 
41 static void	*prepend(const char *, char);
42 static void	 dba_pages_write(struct dba_array *);
43 static int	 compare_names(const void *, const void *);
44 static int	 compare_strings(const void *, const void *);
45 
46 static struct macro_entry
47 		*get_macro_entry(struct ohash *, const char *, int32_t);
48 static void	 dba_macros_write(struct dba_array *);
49 static void	 dba_macro_write(struct ohash *);
50 static int	 compare_entries(const void *, const void *);
51 
52 
53 /*** top-level functions **********************************************/
54 
55 struct dba *
dba_new(int32_t npages)56 dba_new(int32_t npages)
57 {
58 	struct dba	*dba;
59 	struct ohash	*macro;
60 	int32_t		 im;
61 
62 	dba = mandoc_malloc(sizeof(*dba));
63 	dba->pages = dba_array_new(npages, DBA_GROW);
64 	dba->macros = dba_array_new(MACRO_MAX, 0);
65 	for (im = 0; im < MACRO_MAX; im++) {
66 		macro = mandoc_malloc(sizeof(*macro));
67 		mandoc_ohash_init(macro, 4,
68 		    offsetof(struct macro_entry, value));
69 		dba_array_set(dba->macros, im, macro);
70 	}
71 	return dba;
72 }
73 
74 void
dba_free(struct dba * dba)75 dba_free(struct dba *dba)
76 {
77 	struct dba_array	*page;
78 	struct ohash		*macro;
79 	struct macro_entry	*entry;
80 	unsigned int		 slot;
81 
82 	dba_array_FOREACH(dba->macros, macro) {
83 		for (entry = ohash_first(macro, &slot); entry != NULL;
84 		     entry = ohash_next(macro, &slot)) {
85 			dba_array_free(entry->pages);
86 			free(entry);
87 		}
88 		ohash_delete(macro);
89 		free(macro);
90 	}
91 	dba_array_free(dba->macros);
92 
93 	dba_array_undel(dba->pages);
94 	dba_array_FOREACH(dba->pages, page) {
95 		dba_array_free(dba_array_get(page, DBP_NAME));
96 		dba_array_free(dba_array_get(page, DBP_SECT));
97 		dba_array_free(dba_array_get(page, DBP_ARCH));
98 		free(dba_array_get(page, DBP_DESC));
99 		dba_array_free(dba_array_get(page, DBP_FILE));
100 		dba_array_free(page);
101 	}
102 	dba_array_free(dba->pages);
103 
104 	free(dba);
105 }
106 
107 /*
108  * Write the complete mandoc database to disk; the format is:
109  * - One integer each for magic and version.
110  * - One pointer each to the macros table and to the final magic.
111  * - The pages table.
112  * - The macros table.
113  * - And at the very end, the magic integer again.
114  */
115 int
dba_write(const char * fname,struct dba * dba)116 dba_write(const char *fname, struct dba *dba)
117 {
118 	int	 save_errno;
119 	int32_t	 pos_end, pos_macros, pos_macros_ptr;
120 
121 	if (dba_open(fname) == -1)
122 		return -1;
123 	dba_int_write(MANDOCDB_MAGIC);
124 	dba_int_write(MANDOCDB_VERSION);
125 	pos_macros_ptr = dba_skip(1, 2);
126 	dba_pages_write(dba->pages);
127 	pos_macros = dba_tell();
128 	dba_macros_write(dba->macros);
129 	pos_end = dba_tell();
130 	dba_int_write(MANDOCDB_MAGIC);
131 	dba_seek(pos_macros_ptr);
132 	dba_int_write(pos_macros);
133 	dba_int_write(pos_end);
134 	if (dba_close() == -1) {
135 		save_errno = errno;
136 		unlink(fname);
137 		errno = save_errno;
138 		return -1;
139 	}
140 	return 0;
141 }
142 
143 
144 /*** functions for handling pages *************************************/
145 
146 /*
147  * Create a new page and append it to the pages table.
148  */
149 struct dba_array *
dba_page_new(struct dba_array * pages,const char * arch,const char * desc,const char * file,enum form form)150 dba_page_new(struct dba_array *pages, const char *arch,
151     const char *desc, const char *file, enum form form)
152 {
153 	struct dba_array *page, *entry;
154 
155 	page = dba_array_new(DBP_MAX, 0);
156 	entry = dba_array_new(1, DBA_STR | DBA_GROW);
157 	dba_array_add(page, entry);
158 	entry = dba_array_new(1, DBA_STR | DBA_GROW);
159 	dba_array_add(page, entry);
160 	if (arch != NULL && *arch != '\0') {
161 		entry = dba_array_new(1, DBA_STR | DBA_GROW);
162 		dba_array_add(entry, (void *)arch);
163 	} else
164 		entry = NULL;
165 	dba_array_add(page, entry);
166 	dba_array_add(page, mandoc_strdup(desc));
167 	entry = dba_array_new(1, DBA_STR | DBA_GROW);
168 	dba_array_add(entry, prepend(file, form));
169 	dba_array_add(page, entry);
170 	dba_array_add(pages, page);
171 	return page;
172 }
173 
174 /*
175  * Add a section, architecture, or file name to an existing page.
176  * Passing the NULL pointer for the architecture makes the page MI.
177  * In that case, any earlier or later architectures are ignored.
178  */
179 void
dba_page_add(struct dba_array * page,int32_t ie,const char * str)180 dba_page_add(struct dba_array *page, int32_t ie, const char *str)
181 {
182 	struct dba_array	*entries;
183 	char			*entry;
184 
185 	entries = dba_array_get(page, ie);
186 	if (ie == DBP_ARCH) {
187 		if (entries == NULL)
188 			return;
189 		if (str == NULL || *str == '\0') {
190 			dba_array_free(entries);
191 			dba_array_set(page, DBP_ARCH, NULL);
192 			return;
193 		}
194 	}
195 	if (*str == '\0')
196 		return;
197 	dba_array_FOREACH(entries, entry) {
198 		if (ie == DBP_FILE && *entry < ' ')
199 			entry++;
200 		if (strcmp(entry, str) == 0)
201 			return;
202 	}
203 	dba_array_add(entries, (void *)str);
204 }
205 
206 /*
207  * Add an additional name to an existing page.
208  */
209 void
dba_page_alias(struct dba_array * page,const char * name,uint64_t mask)210 dba_page_alias(struct dba_array *page, const char *name, uint64_t mask)
211 {
212 	struct dba_array	*entries;
213 	char			*entry;
214 	char			 maskbyte;
215 
216 	if (*name == '\0')
217 		return;
218 	maskbyte = mask & NAME_MASK;
219 	entries = dba_array_get(page, DBP_NAME);
220 	dba_array_FOREACH(entries, entry) {
221 		if (strcmp(entry + 1, name) == 0) {
222 			*entry |= maskbyte;
223 			return;
224 		}
225 	}
226 	dba_array_add(entries, prepend(name, maskbyte));
227 }
228 
229 /*
230  * Return a pointer to a temporary copy of instr with inbyte prepended.
231  */
232 static void *
prepend(const char * instr,char inbyte)233 prepend(const char *instr, char inbyte)
234 {
235 	static char	*outstr = NULL;
236 	static size_t	 outlen = 0;
237 	size_t		 newlen;
238 
239 	newlen = strlen(instr) + 1;
240 	if (newlen > outlen) {
241 		outstr = mandoc_realloc(outstr, newlen + 1);
242 		outlen = newlen;
243 	}
244 	*outstr = inbyte;
245 	memcpy(outstr + 1, instr, newlen);
246 	return outstr;
247 }
248 
249 /*
250  * Write the pages table to disk; the format is:
251  * - One integer containing the number of pages.
252  * - For each page, five pointers to the names, sections,
253  *   architectures, description, and file names of the page.
254  *   MI pages write 0 instead of the architecture pointer.
255  * - One list each for names, sections, architectures, descriptions and
256  *   file names.  The description for each page ends with a NUL byte.
257  *   For all the other lists, each string ends with a NUL byte,
258  *   and the last string for a page ends with two NUL bytes.
259  * - To assure alignment of following integers,
260  *   the end is padded with NUL bytes up to a multiple of four bytes.
261  */
262 static void
dba_pages_write(struct dba_array * pages)263 dba_pages_write(struct dba_array *pages)
264 {
265 	struct dba_array	*page, *entry;
266 	int32_t			 pos_pages, pos_end;
267 
268 	pos_pages = dba_array_writelen(pages, 5);
269 	dba_array_FOREACH(pages, page) {
270 		dba_array_setpos(page, DBP_NAME, dba_tell());
271 		entry = dba_array_get(page, DBP_NAME);
272 		dba_array_sort(entry, compare_names);
273 		dba_array_writelst(entry);
274 	}
275 	dba_array_FOREACH(pages, page) {
276 		dba_array_setpos(page, DBP_SECT, dba_tell());
277 		entry = dba_array_get(page, DBP_SECT);
278 		dba_array_sort(entry, compare_strings);
279 		dba_array_writelst(entry);
280 	}
281 	dba_array_FOREACH(pages, page) {
282 		if ((entry = dba_array_get(page, DBP_ARCH)) != NULL) {
283 			dba_array_setpos(page, DBP_ARCH, dba_tell());
284 			dba_array_sort(entry, compare_strings);
285 			dba_array_writelst(entry);
286 		} else
287 			dba_array_setpos(page, DBP_ARCH, 0);
288 	}
289 	dba_array_FOREACH(pages, page) {
290 		dba_array_setpos(page, DBP_DESC, dba_tell());
291 		dba_str_write(dba_array_get(page, DBP_DESC));
292 	}
293 	dba_array_FOREACH(pages, page) {
294 		dba_array_setpos(page, DBP_FILE, dba_tell());
295 		dba_array_writelst(dba_array_get(page, DBP_FILE));
296 	}
297 	pos_end = dba_align();
298 	dba_seek(pos_pages);
299 	dba_array_FOREACH(pages, page)
300 		dba_array_writepos(page);
301 	dba_seek(pos_end);
302 }
303 
304 static int
compare_names(const void * vp1,const void * vp2)305 compare_names(const void *vp1, const void *vp2)
306 {
307 	const char	*cp1, *cp2;
308 	int		 diff;
309 
310 	cp1 = *(const char * const *)vp1;
311 	cp2 = *(const char * const *)vp2;
312 	return (diff = *cp2 - *cp1) ? diff :
313 	    strcasecmp(cp1 + 1, cp2 + 1);
314 }
315 
316 static int
compare_strings(const void * vp1,const void * vp2)317 compare_strings(const void *vp1, const void *vp2)
318 {
319 	const char	*cp1, *cp2;
320 
321 	cp1 = *(const char * const *)vp1;
322 	cp2 = *(const char * const *)vp2;
323 	return strcmp(cp1, cp2);
324 }
325 
326 /*** functions for handling macros ************************************/
327 
328 /*
329  * In the hash table for a single macro, look up an entry by
330  * the macro value or add an empty one if it doesn't exist yet.
331  */
332 static struct macro_entry *
get_macro_entry(struct ohash * macro,const char * value,int32_t np)333 get_macro_entry(struct ohash *macro, const char *value, int32_t np)
334 {
335 	struct macro_entry	*entry;
336 	size_t			 len;
337 	unsigned int		 slot;
338 
339 	slot = ohash_qlookup(macro, value);
340 	if ((entry = ohash_find(macro, slot)) == NULL) {
341 		len = strlen(value) + 1;
342 		entry = mandoc_malloc(sizeof(*entry) + len);
343 		memcpy(&entry->value, value, len);
344 		entry->pages = dba_array_new(np, DBA_GROW);
345 		ohash_insert(macro, slot, entry);
346 	}
347 	return entry;
348 }
349 
350 /*
351  * In addition to get_macro_entry(), add multiple page references,
352  * converting them from the on-disk format (byte offsets in the file)
353  * to page pointers in memory.
354  */
355 void
dba_macro_new(struct dba * dba,int32_t im,const char * value,const int32_t * pp)356 dba_macro_new(struct dba *dba, int32_t im, const char *value,
357     const int32_t *pp)
358 {
359 	struct macro_entry	*entry;
360 	const int32_t		*ip;
361 	int32_t			 np;
362 
363 	np = 0;
364 	for (ip = pp; *ip; ip++)
365 		np++;
366 
367 	entry = get_macro_entry(dba_array_get(dba->macros, im), value, np);
368 	for (ip = pp; *ip; ip++)
369 		dba_array_add(entry->pages, dba_array_get(dba->pages,
370 		    be32toh(*ip) / 5 / sizeof(*ip) - 1));
371 }
372 
373 /*
374  * In addition to get_macro_entry(), add one page reference,
375  * directly taking the in-memory page pointer as an argument.
376  */
377 void
dba_macro_add(struct dba_array * macros,int32_t im,const char * value,struct dba_array * page)378 dba_macro_add(struct dba_array *macros, int32_t im, const char *value,
379     struct dba_array *page)
380 {
381 	struct macro_entry	*entry;
382 
383 	if (*value == '\0')
384 		return;
385 	entry = get_macro_entry(dba_array_get(macros, im), value, 1);
386 	dba_array_add(entry->pages, page);
387 }
388 
389 /*
390  * Write the macros table to disk; the format is:
391  * - The number of macro tables (actually, MACRO_MAX).
392  * - That number of pointers to the individual macro tables.
393  * - The individual macro tables.
394  */
395 static void
dba_macros_write(struct dba_array * macros)396 dba_macros_write(struct dba_array *macros)
397 {
398 	struct ohash		*macro;
399 	int32_t			 im, pos_macros, pos_end;
400 
401 	pos_macros = dba_array_writelen(macros, 1);
402 	im = 0;
403 	dba_array_FOREACH(macros, macro) {
404 		dba_array_setpos(macros, im++, dba_tell());
405 		dba_macro_write(macro);
406 	}
407 	pos_end = dba_tell();
408 	dba_seek(pos_macros);
409 	dba_array_writepos(macros);
410 	dba_seek(pos_end);
411 }
412 
413 /*
414  * Write one individual macro table to disk; the format is:
415  * - The number of entries in the table.
416  * - For each entry, two pointers, the first one to the value
417  *   and the second one to the list of pages.
418  * - A list of values, each ending in a NUL byte.
419  * - To assure alignment of following integers,
420  *   padding with NUL bytes up to a multiple of four bytes.
421  * - A list of pointers to pages, each list ending in a 0 integer.
422  */
423 static void
dba_macro_write(struct ohash * macro)424 dba_macro_write(struct ohash *macro)
425 {
426 	struct macro_entry	**entries, *entry;
427 	struct dba_array	 *page;
428 	int32_t			 *kpos, *dpos;
429 	unsigned int		  ie, ne, slot;
430 	int			  use;
431 	int32_t			  addr, pos_macro, pos_end;
432 
433 	/* Temporary storage for filtering and sorting. */
434 
435 	ne = ohash_entries(macro);
436 	entries = mandoc_reallocarray(NULL, ne, sizeof(*entries));
437 	kpos = mandoc_reallocarray(NULL, ne, sizeof(*kpos));
438 	dpos = mandoc_reallocarray(NULL, ne, sizeof(*dpos));
439 
440 	/* Build a list of non-empty entries and sort it. */
441 
442 	ne = 0;
443 	for (entry = ohash_first(macro, &slot); entry != NULL;
444 	     entry = ohash_next(macro, &slot)) {
445 		use = 0;
446 		dba_array_FOREACH(entry->pages, page)
447 			if (dba_array_getpos(page))
448 				use = 1;
449 		if (use)
450 			entries[ne++] = entry;
451 	}
452 	qsort(entries, ne, sizeof(*entries), compare_entries);
453 
454 	/* Number of entries, and space for the pointer pairs. */
455 
456 	dba_int_write(ne);
457 	pos_macro = dba_skip(2, ne);
458 
459 	/* String table. */
460 
461 	for (ie = 0; ie < ne; ie++) {
462 		kpos[ie] = dba_tell();
463 		dba_str_write(entries[ie]->value);
464 	}
465 	dba_align();
466 
467 	/* Pages table. */
468 
469 	for (ie = 0; ie < ne; ie++) {
470 		dpos[ie] = dba_tell();
471 		dba_array_FOREACH(entries[ie]->pages, page)
472 			if ((addr = dba_array_getpos(page)))
473 				dba_int_write(addr);
474 		dba_int_write(0);
475 	}
476 	pos_end = dba_tell();
477 
478 	/* Fill in the pointer pairs. */
479 
480 	dba_seek(pos_macro);
481 	for (ie = 0; ie < ne; ie++) {
482 		dba_int_write(kpos[ie]);
483 		dba_int_write(dpos[ie]);
484 	}
485 	dba_seek(pos_end);
486 
487 	free(entries);
488 	free(kpos);
489 	free(dpos);
490 }
491 
492 static int
compare_entries(const void * vp1,const void * vp2)493 compare_entries(const void *vp1, const void *vp2)
494 {
495 	const struct macro_entry *ep1, *ep2;
496 
497 	ep1 = *(const struct macro_entry * const *)vp1;
498 	ep2 = *(const struct macro_entry * const *)vp2;
499 	return strcmp(ep1->value, ep2->value);
500 }
501