1 /* $OpenBSD: dba.c,v 1.7 2017/02/09 18:26:17 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2016, 2017 Ingo Schwarze <schwarze@openbsd.org> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 * 17 * Allocation-based version of the mandoc database, for read-write access. 18 * The interface is defined in "dba.h". 19 */ 20 #include <sys/types.h> 21 #include <endian.h> 22 #include <errno.h> 23 #include <stddef.h> 24 #include <stdint.h> 25 #include <stdlib.h> 26 #include <string.h> 27 #include <unistd.h> 28 29 #include "mandoc_aux.h" 30 #include "mandoc_ohash.h" 31 #include "mansearch.h" 32 #include "dba_write.h" 33 #include "dba_array.h" 34 #include "dba.h" 35 36 struct macro_entry { 37 struct dba_array *pages; 38 char value[]; 39 }; 40 41 static void *prepend(const char *, char); 42 static void dba_pages_write(struct dba_array *); 43 static int compare_names(const void *, const void *); 44 static int compare_strings(const void *, const void *); 45 46 static struct macro_entry 47 *get_macro_entry(struct ohash *, const char *, int32_t); 48 static void dba_macros_write(struct dba_array *); 49 static void dba_macro_write(struct ohash *); 50 static int compare_entries(const void *, const void *); 51 52 53 /*** top-level functions **********************************************/ 54 55 struct dba * 56 dba_new(int32_t npages) 57 { 58 struct dba *dba; 59 struct ohash *macro; 60 int32_t im; 61 62 dba = mandoc_malloc(sizeof(*dba)); 63 dba->pages = dba_array_new(npages, DBA_GROW); 64 dba->macros = dba_array_new(MACRO_MAX, 0); 65 for (im = 0; im < MACRO_MAX; im++) { 66 macro = mandoc_malloc(sizeof(*macro)); 67 mandoc_ohash_init(macro, 4, 68 offsetof(struct macro_entry, value)); 69 dba_array_set(dba->macros, im, macro); 70 } 71 return dba; 72 } 73 74 void 75 dba_free(struct dba *dba) 76 { 77 struct dba_array *page; 78 struct ohash *macro; 79 struct macro_entry *entry; 80 unsigned int slot; 81 82 dba_array_FOREACH(dba->macros, macro) { 83 for (entry = ohash_first(macro, &slot); entry != NULL; 84 entry = ohash_next(macro, &slot)) { 85 dba_array_free(entry->pages); 86 free(entry); 87 } 88 ohash_delete(macro); 89 free(macro); 90 } 91 dba_array_free(dba->macros); 92 93 dba_array_undel(dba->pages); 94 dba_array_FOREACH(dba->pages, page) { 95 dba_array_free(dba_array_get(page, DBP_NAME)); 96 dba_array_free(dba_array_get(page, DBP_SECT)); 97 dba_array_free(dba_array_get(page, DBP_ARCH)); 98 free(dba_array_get(page, DBP_DESC)); 99 dba_array_free(dba_array_get(page, DBP_FILE)); 100 dba_array_free(page); 101 } 102 dba_array_free(dba->pages); 103 104 free(dba); 105 } 106 107 /* 108 * Write the complete mandoc database to disk; the format is: 109 * - One integer each for magic and version. 110 * - One pointer each to the macros table and to the final magic. 111 * - The pages table. 112 * - The macros table. 113 * - And at the very end, the magic integer again. 114 */ 115 int 116 dba_write(const char *fname, struct dba *dba) 117 { 118 int save_errno; 119 int32_t pos_end, pos_macros, pos_macros_ptr; 120 121 if (dba_open(fname) == -1) 122 return -1; 123 dba_int_write(MANDOCDB_MAGIC); 124 dba_int_write(MANDOCDB_VERSION); 125 pos_macros_ptr = dba_skip(1, 2); 126 dba_pages_write(dba->pages); 127 pos_macros = dba_tell(); 128 dba_macros_write(dba->macros); 129 pos_end = dba_tell(); 130 dba_int_write(MANDOCDB_MAGIC); 131 dba_seek(pos_macros_ptr); 132 dba_int_write(pos_macros); 133 dba_int_write(pos_end); 134 if (dba_close() == -1) { 135 save_errno = errno; 136 unlink(fname); 137 errno = save_errno; 138 return -1; 139 } 140 return 0; 141 } 142 143 144 /*** functions for handling pages *************************************/ 145 146 /* 147 * Create a new page and append it to the pages table. 148 */ 149 struct dba_array * 150 dba_page_new(struct dba_array *pages, const char *arch, 151 const char *desc, const char *file, enum form form) 152 { 153 struct dba_array *page, *entry; 154 155 page = dba_array_new(DBP_MAX, 0); 156 entry = dba_array_new(1, DBA_STR | DBA_GROW); 157 dba_array_add(page, entry); 158 entry = dba_array_new(1, DBA_STR | DBA_GROW); 159 dba_array_add(page, entry); 160 if (arch != NULL && *arch != '\0') { 161 entry = dba_array_new(1, DBA_STR | DBA_GROW); 162 dba_array_add(entry, (void *)arch); 163 } else 164 entry = NULL; 165 dba_array_add(page, entry); 166 dba_array_add(page, mandoc_strdup(desc)); 167 entry = dba_array_new(1, DBA_STR | DBA_GROW); 168 dba_array_add(entry, prepend(file, form)); 169 dba_array_add(page, entry); 170 dba_array_add(pages, page); 171 return page; 172 } 173 174 /* 175 * Add a section, architecture, or file name to an existing page. 176 * Passing the NULL pointer for the architecture makes the page MI. 177 * In that case, any earlier or later architectures are ignored. 178 */ 179 void 180 dba_page_add(struct dba_array *page, int32_t ie, const char *str) 181 { 182 struct dba_array *entries; 183 char *entry; 184 185 entries = dba_array_get(page, ie); 186 if (ie == DBP_ARCH) { 187 if (entries == NULL) 188 return; 189 if (str == NULL || *str == '\0') { 190 dba_array_free(entries); 191 dba_array_set(page, DBP_ARCH, NULL); 192 return; 193 } 194 } 195 if (*str == '\0') 196 return; 197 dba_array_FOREACH(entries, entry) { 198 if (ie == DBP_FILE && *entry < ' ') 199 entry++; 200 if (strcmp(entry, str) == 0) 201 return; 202 } 203 dba_array_add(entries, (void *)str); 204 } 205 206 /* 207 * Add an additional name to an existing page. 208 */ 209 void 210 dba_page_alias(struct dba_array *page, const char *name, uint64_t mask) 211 { 212 struct dba_array *entries; 213 char *entry; 214 char maskbyte; 215 216 if (*name == '\0') 217 return; 218 maskbyte = mask & NAME_MASK; 219 entries = dba_array_get(page, DBP_NAME); 220 dba_array_FOREACH(entries, entry) { 221 if (strcmp(entry + 1, name) == 0) { 222 *entry |= maskbyte; 223 return; 224 } 225 } 226 dba_array_add(entries, prepend(name, maskbyte)); 227 } 228 229 /* 230 * Return a pointer to a temporary copy of instr with inbyte prepended. 231 */ 232 static void * 233 prepend(const char *instr, char inbyte) 234 { 235 static char *outstr = NULL; 236 static size_t outlen = 0; 237 size_t newlen; 238 239 newlen = strlen(instr) + 1; 240 if (newlen > outlen) { 241 outstr = mandoc_realloc(outstr, newlen + 1); 242 outlen = newlen; 243 } 244 *outstr = inbyte; 245 memcpy(outstr + 1, instr, newlen); 246 return outstr; 247 } 248 249 /* 250 * Write the pages table to disk; the format is: 251 * - One integer containing the number of pages. 252 * - For each page, five pointers to the names, sections, 253 * architectures, description, and file names of the page. 254 * MI pages write 0 instead of the architecture pointer. 255 * - One list each for names, sections, architectures, descriptions and 256 * file names. The description for each page ends with a NUL byte. 257 * For all the other lists, each string ends with a NUL byte, 258 * and the last string for a page ends with two NUL bytes. 259 * - To assure alignment of following integers, 260 * the end is padded with NUL bytes up to a multiple of four bytes. 261 */ 262 static void 263 dba_pages_write(struct dba_array *pages) 264 { 265 struct dba_array *page, *entry; 266 int32_t pos_pages, pos_end; 267 268 pos_pages = dba_array_writelen(pages, 5); 269 dba_array_FOREACH(pages, page) { 270 dba_array_setpos(page, DBP_NAME, dba_tell()); 271 entry = dba_array_get(page, DBP_NAME); 272 dba_array_sort(entry, compare_names); 273 dba_array_writelst(entry); 274 } 275 dba_array_FOREACH(pages, page) { 276 dba_array_setpos(page, DBP_SECT, dba_tell()); 277 entry = dba_array_get(page, DBP_SECT); 278 dba_array_sort(entry, compare_strings); 279 dba_array_writelst(entry); 280 } 281 dba_array_FOREACH(pages, page) { 282 if ((entry = dba_array_get(page, DBP_ARCH)) != NULL) { 283 dba_array_setpos(page, DBP_ARCH, dba_tell()); 284 dba_array_sort(entry, compare_strings); 285 dba_array_writelst(entry); 286 } else 287 dba_array_setpos(page, DBP_ARCH, 0); 288 } 289 dba_array_FOREACH(pages, page) { 290 dba_array_setpos(page, DBP_DESC, dba_tell()); 291 dba_str_write(dba_array_get(page, DBP_DESC)); 292 } 293 dba_array_FOREACH(pages, page) { 294 dba_array_setpos(page, DBP_FILE, dba_tell()); 295 dba_array_writelst(dba_array_get(page, DBP_FILE)); 296 } 297 pos_end = dba_align(); 298 dba_seek(pos_pages); 299 dba_array_FOREACH(pages, page) 300 dba_array_writepos(page); 301 dba_seek(pos_end); 302 } 303 304 static int 305 compare_names(const void *vp1, const void *vp2) 306 { 307 const char *cp1, *cp2; 308 int diff; 309 310 cp1 = *(const char * const *)vp1; 311 cp2 = *(const char * const *)vp2; 312 return (diff = *cp2 - *cp1) ? diff : 313 strcasecmp(cp1 + 1, cp2 + 1); 314 } 315 316 static int 317 compare_strings(const void *vp1, const void *vp2) 318 { 319 const char *cp1, *cp2; 320 321 cp1 = *(const char * const *)vp1; 322 cp2 = *(const char * const *)vp2; 323 return strcmp(cp1, cp2); 324 } 325 326 /*** functions for handling macros ************************************/ 327 328 /* 329 * In the hash table for a single macro, look up an entry by 330 * the macro value or add an empty one if it doesn't exist yet. 331 */ 332 static struct macro_entry * 333 get_macro_entry(struct ohash *macro, const char *value, int32_t np) 334 { 335 struct macro_entry *entry; 336 size_t len; 337 unsigned int slot; 338 339 slot = ohash_qlookup(macro, value); 340 if ((entry = ohash_find(macro, slot)) == NULL) { 341 len = strlen(value) + 1; 342 entry = mandoc_malloc(sizeof(*entry) + len); 343 memcpy(&entry->value, value, len); 344 entry->pages = dba_array_new(np, DBA_GROW); 345 ohash_insert(macro, slot, entry); 346 } 347 return entry; 348 } 349 350 /* 351 * In addition to get_macro_entry(), add multiple page references, 352 * converting them from the on-disk format (byte offsets in the file) 353 * to page pointers in memory. 354 */ 355 void 356 dba_macro_new(struct dba *dba, int32_t im, const char *value, 357 const int32_t *pp) 358 { 359 struct macro_entry *entry; 360 const int32_t *ip; 361 int32_t np; 362 363 np = 0; 364 for (ip = pp; *ip; ip++) 365 np++; 366 367 entry = get_macro_entry(dba_array_get(dba->macros, im), value, np); 368 for (ip = pp; *ip; ip++) 369 dba_array_add(entry->pages, dba_array_get(dba->pages, 370 be32toh(*ip) / 5 / sizeof(*ip) - 1)); 371 } 372 373 /* 374 * In addition to get_macro_entry(), add one page reference, 375 * directly taking the in-memory page pointer as an argument. 376 */ 377 void 378 dba_macro_add(struct dba_array *macros, int32_t im, const char *value, 379 struct dba_array *page) 380 { 381 struct macro_entry *entry; 382 383 if (*value == '\0') 384 return; 385 entry = get_macro_entry(dba_array_get(macros, im), value, 1); 386 dba_array_add(entry->pages, page); 387 } 388 389 /* 390 * Write the macros table to disk; the format is: 391 * - The number of macro tables (actually, MACRO_MAX). 392 * - That number of pointers to the individual macro tables. 393 * - The individual macro tables. 394 */ 395 static void 396 dba_macros_write(struct dba_array *macros) 397 { 398 struct ohash *macro; 399 int32_t im, pos_macros, pos_end; 400 401 pos_macros = dba_array_writelen(macros, 1); 402 im = 0; 403 dba_array_FOREACH(macros, macro) { 404 dba_array_setpos(macros, im++, dba_tell()); 405 dba_macro_write(macro); 406 } 407 pos_end = dba_tell(); 408 dba_seek(pos_macros); 409 dba_array_writepos(macros); 410 dba_seek(pos_end); 411 } 412 413 /* 414 * Write one individual macro table to disk; the format is: 415 * - The number of entries in the table. 416 * - For each entry, two pointers, the first one to the value 417 * and the second one to the list of pages. 418 * - A list of values, each ending in a NUL byte. 419 * - To assure alignment of following integers, 420 * padding with NUL bytes up to a multiple of four bytes. 421 * - A list of pointers to pages, each list ending in a 0 integer. 422 */ 423 static void 424 dba_macro_write(struct ohash *macro) 425 { 426 struct macro_entry **entries, *entry; 427 struct dba_array *page; 428 int32_t *kpos, *dpos; 429 unsigned int ie, ne, slot; 430 int use; 431 int32_t addr, pos_macro, pos_end; 432 433 /* Temporary storage for filtering and sorting. */ 434 435 ne = ohash_entries(macro); 436 entries = mandoc_reallocarray(NULL, ne, sizeof(*entries)); 437 kpos = mandoc_reallocarray(NULL, ne, sizeof(*kpos)); 438 dpos = mandoc_reallocarray(NULL, ne, sizeof(*dpos)); 439 440 /* Build a list of non-empty entries and sort it. */ 441 442 ne = 0; 443 for (entry = ohash_first(macro, &slot); entry != NULL; 444 entry = ohash_next(macro, &slot)) { 445 use = 0; 446 dba_array_FOREACH(entry->pages, page) 447 if (dba_array_getpos(page)) 448 use = 1; 449 if (use) 450 entries[ne++] = entry; 451 } 452 qsort(entries, ne, sizeof(*entries), compare_entries); 453 454 /* Number of entries, and space for the pointer pairs. */ 455 456 dba_int_write(ne); 457 pos_macro = dba_skip(2, ne); 458 459 /* String table. */ 460 461 for (ie = 0; ie < ne; ie++) { 462 kpos[ie] = dba_tell(); 463 dba_str_write(entries[ie]->value); 464 } 465 dba_align(); 466 467 /* Pages table. */ 468 469 for (ie = 0; ie < ne; ie++) { 470 dpos[ie] = dba_tell(); 471 dba_array_FOREACH(entries[ie]->pages, page) 472 if ((addr = dba_array_getpos(page))) 473 dba_int_write(addr); 474 dba_int_write(0); 475 } 476 pos_end = dba_tell(); 477 478 /* Fill in the pointer pairs. */ 479 480 dba_seek(pos_macro); 481 for (ie = 0; ie < ne; ie++) { 482 dba_int_write(kpos[ie]); 483 dba_int_write(dpos[ie]); 484 } 485 dba_seek(pos_end); 486 487 free(entries); 488 free(kpos); 489 free(dpos); 490 } 491 492 static int 493 compare_entries(const void *vp1, const void *vp2) 494 { 495 const struct macro_entry *ep1, *ep2; 496 497 ep1 = *(const struct macro_entry * const *)vp1; 498 ep2 = *(const struct macro_entry * const *)vp2; 499 return strcmp(ep1->value, ep2->value); 500 } 501