1 /* $NetBSD: cdbw.c,v 1.1 2010/04/25 00:54:46 joerg Exp $ */ 2 /*- 3 * Copyright (c) 2009, 2010 The NetBSD Foundation, Inc. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to The NetBSD Foundation 7 * by Joerg Sonnenberger. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in 17 * the documentation and/or other materials provided with the 18 * distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 #if HAVE_NBTOOL_CONFIG_H 35 #include "nbtool_config.h" 36 #endif 37 38 #include <sys/cdefs.h> 39 __RCSID("$NetBSD: cdbw.c,v 1.1 2010/04/25 00:54:46 joerg Exp $"); 40 41 #include "namespace.h" 42 43 #include <sys/endian.h> 44 #include <sys/queue.h> 45 #include <cdbw.h> 46 #include <stdlib.h> 47 #include <string.h> 48 #include <unistd.h> 49 50 #ifdef __weak_alias 51 __weak_alias(cdbw_close,_cdbw_close) 52 __weak_alias(cdbw_open,_cdbw_open) 53 __weak_alias(cdbw_output,_cdbw_output) 54 __weak_alias(cdbw_put,_cdbw_put) 55 __weak_alias(cdbw_put_data,_cdbw_put_data) 56 __weak_alias(cdbw_put_key,_cdbw_put_key) 57 #endif 58 59 struct key_hash { 60 SLIST_ENTRY(key_hash) link; 61 uint32_t hashes[3]; 62 uint32_t idx; 63 void *key; 64 size_t keylen; 65 }; 66 67 SLIST_HEAD(key_hash_head, key_hash); 68 69 struct cdbw { 70 size_t data_counter; 71 size_t data_allocated; 72 size_t data_size; 73 size_t *data_len; 74 void **data_ptr; 75 76 size_t hash_size; 77 struct key_hash_head *hash; 78 size_t key_counter; 79 }; 80 81 /* Max. data counter that allows the index size to be 32bit. */ 82 static const uint32_t max_data_counter = 0xccccccccU; 83 84 struct cdbw * 85 cdbw_open(void) 86 { 87 struct cdbw *cdbw; 88 size_t i; 89 90 cdbw = calloc(sizeof(*cdbw), 1); 91 if (cdbw == NULL) 92 return NULL; 93 94 cdbw->hash_size = 1024; 95 cdbw->hash = calloc(cdbw->hash_size, sizeof(*cdbw->hash)); 96 if (cdbw->hash == NULL) { 97 free(cdbw); 98 return NULL; 99 } 100 101 for (i = 0; i < cdbw->hash_size; ++i) 102 SLIST_INIT(cdbw->hash + i); 103 104 return cdbw; 105 } 106 107 int 108 cdbw_put(struct cdbw *cdbw, const void *key, size_t keylen, 109 const void *data, size_t datalen) 110 { 111 uint32_t idx; 112 int rv; 113 114 rv = cdbw_put_data(cdbw, data, datalen, &idx); 115 if (rv) 116 return rv; 117 rv = cdbw_put_key(cdbw, key, keylen, idx); 118 if (rv) { 119 --cdbw->data_counter; 120 free(cdbw->data_ptr[cdbw->data_counter]); 121 cdbw->data_size -= datalen; 122 return rv; 123 } 124 return 0; 125 } 126 127 int 128 cdbw_put_data(struct cdbw *cdbw, const void *data, size_t datalen, 129 uint32_t *idx) 130 { 131 132 if (cdbw->data_counter == max_data_counter) 133 return -1; 134 135 if (cdbw->data_size + datalen < cdbw->data_size || 136 cdbw->data_size + datalen > 0xffffffffU) 137 return -1; /* Overflow */ 138 139 if (cdbw->data_allocated == cdbw->data_counter) { 140 void **new_data_ptr; 141 size_t *new_data_len; 142 size_t new_allocated; 143 144 if (cdbw->data_allocated == 0) 145 new_allocated = 256; 146 else 147 new_allocated = cdbw->data_allocated * 2; 148 149 new_data_ptr = realloc(cdbw->data_ptr, 150 sizeof(*cdbw->data_ptr) * new_allocated); 151 if (new_data_ptr == NULL) 152 return -1; 153 cdbw->data_ptr = new_data_ptr; 154 155 new_data_len = realloc(cdbw->data_len, 156 sizeof(*cdbw->data_len) * new_allocated); 157 if (new_data_len == NULL) 158 return -1; 159 cdbw->data_len = new_data_len; 160 161 cdbw->data_allocated = new_allocated; 162 } 163 164 cdbw->data_ptr[cdbw->data_counter] = malloc(datalen); 165 if (cdbw->data_ptr[cdbw->data_counter] == NULL) 166 return -1; 167 memcpy(cdbw->data_ptr[cdbw->data_counter], data, datalen); 168 cdbw->data_len[cdbw->data_counter] = datalen; 169 cdbw->data_size += datalen; 170 *idx = cdbw->data_counter++; 171 return 0; 172 } 173 174 int 175 cdbw_put_key(struct cdbw *cdbw, const void *key, size_t keylen, uint32_t idx) 176 { 177 uint32_t hashes[3]; 178 struct key_hash_head *head, *head2, *new_head; 179 struct key_hash *key_hash; 180 size_t new_hash_size, i; 181 182 if (idx >= cdbw->data_counter || 183 cdbw->key_counter == max_data_counter) 184 return -1; 185 186 mi_vector_hash(key, keylen, 0, hashes); 187 188 head = cdbw->hash + (hashes[0] & (cdbw->hash_size - 1)); 189 SLIST_FOREACH(key_hash, head, link) { 190 if (key_hash->keylen != keylen) 191 continue; 192 if (key_hash->hashes[0] != hashes[0]) 193 continue; 194 if (key_hash->hashes[1] != hashes[1]) 195 continue; 196 if (key_hash->hashes[2] != hashes[2]) 197 continue; 198 if (memcmp(key, key_hash->key, keylen)) 199 continue; 200 return -1; 201 } 202 key_hash = malloc(sizeof(*key_hash)); 203 if (key_hash == NULL) 204 return -1; 205 key_hash->key = malloc(keylen); 206 if (key_hash->key == NULL) { 207 free(key_hash); 208 return -1; 209 } 210 memcpy(key_hash->key, key, keylen); 211 key_hash->hashes[0] = hashes[0]; 212 key_hash->hashes[1] = hashes[1]; 213 key_hash->hashes[2] = hashes[2]; 214 key_hash->keylen = keylen; 215 key_hash->idx = idx; 216 SLIST_INSERT_HEAD(head, key_hash, link); 217 ++cdbw->key_counter; 218 219 if (cdbw->key_counter <= cdbw->hash_size) 220 return 0; 221 222 /* Try to resize the hash table, but ignore errors. */ 223 new_hash_size = cdbw->hash_size * 2; 224 new_head = calloc(sizeof(*new_head), new_hash_size); 225 if (new_head == NULL) 226 return 0; 227 228 head = &cdbw->hash[hashes[0] & (cdbw->hash_size - 1)]; 229 for (i = 0; i < new_hash_size; ++i) 230 SLIST_INIT(new_head + i); 231 232 for (i = 0; i < cdbw->hash_size; ++i) { 233 head = cdbw->hash + i; 234 235 while ((key_hash = SLIST_FIRST(head)) != NULL) { 236 SLIST_REMOVE_HEAD(head, link); 237 head2 = new_head + 238 (key_hash->hashes[0] & (new_hash_size - 1)); 239 SLIST_INSERT_HEAD(head2, key_hash, link); 240 } 241 } 242 free(cdbw->hash); 243 cdbw->hash_size = new_hash_size; 244 cdbw->hash = new_head; 245 246 return 0; 247 } 248 249 void 250 cdbw_close(struct cdbw *cdbw) 251 { 252 struct key_hash_head *head; 253 struct key_hash *key_hash; 254 size_t i; 255 256 for (i = 0; i < cdbw->hash_size; ++i) { 257 head = cdbw->hash + i; 258 while ((key_hash = SLIST_FIRST(head)) != NULL) { 259 SLIST_REMOVE_HEAD(head, link); 260 free(key_hash->key); 261 free(key_hash); 262 } 263 } 264 265 for (i = 0; i < cdbw->data_counter; ++i) 266 free(cdbw->data_ptr[i]); 267 free(cdbw->data_ptr); 268 free(cdbw->data_len); 269 free(cdbw->hash); 270 free(cdbw); 271 } 272 273 #define unused 0xffffffffU 274 275 struct vertex { 276 uint32_t l_edge, m_edge, r_edge; 277 }; 278 279 struct edge { 280 uint32_t idx; 281 282 uint32_t left, middle, right; 283 uint32_t l_prev, m_prev, l_next; 284 uint32_t r_prev, m_next, r_next; 285 }; 286 287 struct state { 288 uint32_t data_entries; 289 uint32_t entries; 290 uint32_t keys; 291 uint32_t seed; 292 293 uint32_t *g; 294 char *visited; 295 296 struct vertex *verts; 297 struct edge *edges; 298 uint32_t output_index; 299 uint32_t *output_order; 300 }; 301 302 static void 303 remove_vertex(struct state *state, struct vertex *v) 304 { 305 struct edge *e; 306 struct vertex *vl, *vm, *vr; 307 308 if (v->l_edge != unused && v->m_edge != unused) 309 return; 310 if (v->l_edge != unused && v->r_edge != unused) 311 return; 312 if (v->m_edge != unused && v->r_edge != unused) 313 return; 314 if (v->l_edge == unused && v->m_edge == unused && v->r_edge == unused) 315 return; 316 317 if (v->l_edge != unused) { 318 e = &state->edges[v->l_edge]; 319 if (e->l_next != unused) 320 return; 321 } else if (v->m_edge != unused) { 322 e = &state->edges[v->m_edge]; 323 if (e->m_next != unused) 324 return; 325 } else { 326 if (v->r_edge == unused) 327 abort(); 328 e = &state->edges[v->r_edge]; 329 if (e->r_next != unused) 330 return; 331 } 332 333 state->output_order[--state->output_index] = e - state->edges; 334 335 vl = &state->verts[e->left]; 336 vm = &state->verts[e->middle]; 337 vr = &state->verts[e->right]; 338 339 if (e->l_prev == unused) 340 vl->l_edge = e->l_next; 341 else 342 state->edges[e->l_prev].l_next = e->l_next; 343 if (e->l_next != unused) 344 state->edges[e->l_next].l_prev = e->l_prev; 345 346 if (e->m_prev == unused) 347 vm->m_edge = e->m_next; 348 else 349 state->edges[e->m_prev].m_next = e->m_next; 350 if (e->m_next != unused) 351 state->edges[e->m_next].m_prev = e->m_prev; 352 353 if (e->r_prev == unused) 354 vr->r_edge = e->r_next; 355 else 356 state->edges[e->r_prev].r_next = e->r_next; 357 if (e->r_next != unused) 358 state->edges[e->r_next].r_prev = e->r_prev; 359 } 360 361 static int 362 build_graph(struct cdbw *cdbw, struct state *state) 363 { 364 struct key_hash_head *head; 365 struct key_hash *key_hash; 366 struct vertex *v; 367 struct edge *e; 368 uint32_t hashes[3]; 369 size_t i; 370 371 e = state->edges; 372 for (i = 0; i < cdbw->hash_size; ++i) { 373 head = &cdbw->hash[i]; 374 SLIST_FOREACH(key_hash, head, link) { 375 e->idx = key_hash->idx; 376 mi_vector_hash(key_hash->key, key_hash->keylen, 377 state->seed, hashes); 378 e->left = hashes[0] % state->entries; 379 e->middle = hashes[1] % state->entries; 380 e->right = hashes[2] % state->entries; 381 382 ++e; 383 } 384 } 385 386 for (i = 0; i < state->entries; ++i) { 387 v = state->verts + i; 388 v->l_edge = unused; 389 v->m_edge = unused; 390 v->r_edge = unused; 391 } 392 393 for (i = 0; i < state->keys; ++i) { 394 e = state->edges + i; 395 v = state->verts + e->left; 396 if (v->l_edge != unused) 397 state->edges[v->l_edge].l_prev = i; 398 e->l_next = v->l_edge; 399 e->l_prev = unused; 400 v->l_edge = i; 401 402 v = &state->verts[e->middle]; 403 if (v->m_edge != unused) 404 state->edges[v->m_edge].m_prev = i; 405 e->m_next = v->m_edge; 406 e->m_prev = unused; 407 v->m_edge = i; 408 409 v = &state->verts[e->right]; 410 if (v->r_edge != unused) 411 state->edges[v->r_edge].r_prev = i; 412 e->r_next = v->r_edge; 413 e->r_prev = unused; 414 v->r_edge = i; 415 } 416 417 state->output_index = state->keys; 418 for (i = 0; i < state->entries; ++i) 419 remove_vertex(state, state->verts + i); 420 421 i = state->keys; 422 while (i > 0 && i > state->output_index) { 423 --i; 424 e = state->edges + state->output_order[i]; 425 remove_vertex(state, state->verts + e->left); 426 remove_vertex(state, state->verts + e->middle); 427 remove_vertex(state, state->verts + e->right); 428 } 429 430 return state->output_index == 0 ? 0 : -1; 431 } 432 433 static void 434 assign_nodes(struct state *state) 435 { 436 struct edge *e; 437 size_t i; 438 439 for (i = 0; i < state->keys; ++i) { 440 e = state->edges + state->output_order[i]; 441 442 if (!state->visited[e->left]) { 443 state->g[e->left] = 444 (2 * state->data_entries + e->idx 445 - state->g[e->middle] - state->g[e->right]) 446 % state->data_entries; 447 } else if (!state->visited[e->middle]) { 448 state->g[e->middle] = 449 (2 * state->data_entries + e->idx 450 - state->g[e->left] - state->g[e->right]) 451 % state->data_entries; 452 } else { 453 state->g[e->right] = 454 (2 * state->data_entries + e->idx 455 - state->g[e->left] - state->g[e->middle]) 456 % state->data_entries; 457 } 458 state->visited[e->left] = 1; 459 state->visited[e->middle] = 1; 460 state->visited[e->right] = 1; 461 } 462 } 463 464 static size_t 465 compute_size(uint32_t size) 466 { 467 if (size < 0x100) 468 return 1; 469 else if (size < 0x10000) 470 return 2; 471 else 472 return 4; 473 } 474 475 #define COND_FLUSH_BUFFER(n) do { \ 476 if (__predict_false(cur_pos + (n) >= sizeof(buf))) { \ 477 ret = write(fd, buf, cur_pos); \ 478 if (ret == -1 || (size_t)ret != cur_pos) \ 479 return -1; \ 480 cur_pos = 0; \ 481 } \ 482 } while (/* CONSTCOND */ 0) 483 484 static int 485 print_hash(struct cdbw *cdbw, struct state *state, int fd, const char *descr) 486 { 487 uint32_t data_size; 488 uint8_t buf[90000]; 489 size_t i, size, size2, cur_pos; 490 ssize_t ret; 491 492 memcpy(buf, "NBCDB\n\0", 7); 493 buf[7] = 1; 494 strncpy((char *)buf + 8, descr, 16); 495 le32enc(buf + 24, cdbw->data_size); 496 le32enc(buf + 28, cdbw->data_counter); 497 le32enc(buf + 32, state->entries); 498 le32enc(buf + 36, state->seed); 499 cur_pos = 40; 500 501 size = compute_size(state->entries); 502 for (i = 0; i < state->entries; ++i) { 503 COND_FLUSH_BUFFER(4); 504 le32enc(buf + cur_pos, state->g[i]); 505 cur_pos += size; 506 } 507 size2 = compute_size(cdbw->data_size); 508 size = size * state->entries % size2; 509 if (size != 0) { 510 size = size2 - size; 511 COND_FLUSH_BUFFER(4); 512 le32enc(buf + cur_pos, 0); 513 cur_pos += size; 514 } 515 for (data_size = 0, i = 0; i < cdbw->data_counter; ++i) { 516 COND_FLUSH_BUFFER(4); 517 le32enc(buf + cur_pos, data_size); 518 cur_pos += size2; 519 data_size += cdbw->data_len[i]; 520 } 521 COND_FLUSH_BUFFER(4); 522 le32enc(buf + cur_pos, data_size); 523 cur_pos += size2; 524 525 for (i = 0; i < cdbw->data_counter; ++i) { 526 COND_FLUSH_BUFFER(cdbw->data_len[i]); 527 if (cdbw->data_len[i] < sizeof(buf)) { 528 memcpy(buf + cur_pos, cdbw->data_ptr[i], 529 cdbw->data_len[i]); 530 cur_pos += cdbw->data_len[i]; 531 } else { 532 ret = write(fd, cdbw->data_ptr[i], cdbw->data_len[i]); 533 if (ret == -1 || (size_t)ret != cdbw->data_len[i]) 534 return -1; 535 } 536 } 537 if (cur_pos != 0) { 538 ret = write(fd, buf, cur_pos); 539 if (ret == -1 || (size_t)ret != cur_pos) 540 return -1; 541 } 542 return 0; 543 } 544 545 int 546 cdbw_output(struct cdbw *cdbw, int fd, const char descr[16], 547 uint32_t (*seedgen)(void)) 548 { 549 struct state state; 550 int rv; 551 552 if (cdbw->data_counter == 0 || cdbw->key_counter == 0) { 553 state.entries = 0; 554 state.seed = 0; 555 print_hash(cdbw, &state, fd, descr); 556 return 0; 557 } 558 559 if (seedgen == NULL) 560 seedgen = arc4random; 561 562 rv = 0; 563 564 state.keys = cdbw->key_counter; 565 state.data_entries = cdbw->data_counter; 566 state.entries = state.keys + (state.keys + 3) / 4; 567 if (state.entries < 10) 568 state.entries = 10; 569 570 #define NALLOC(var, n) var = calloc(sizeof(*var), n) 571 NALLOC(state.g, state.entries); 572 NALLOC(state.visited, state.entries); 573 NALLOC(state.verts, state.entries); 574 NALLOC(state.edges, state.entries); 575 NALLOC(state.output_order, state.keys); 576 #undef NALLOC 577 578 if (state.g == NULL || state.visited == NULL || state.verts == NULL || 579 state.edges == NULL || state.output_order == NULL) { 580 rv = -1; 581 goto release; 582 } 583 584 do { 585 state.seed = (*seedgen)(); 586 } while (build_graph(cdbw, &state)); 587 588 assign_nodes(&state); 589 rv = print_hash(cdbw, &state, fd, descr); 590 591 release: 592 free(state.g); 593 free(state.visited); 594 free(state.verts); 595 free(state.edges); 596 free(state.output_order); 597 598 return rv; 599 } 600