1 /* NetBSD: tdb.c,v 1.7 2013/10/19 17:16:38 christos Exp */
2
3 /*
4 * Database functions
5 * Copyright (C) Andrew Tridgell 1999
6 *
7 * Redistribution and use in source and binary forms are permitted
8 * provided that the above copyright notice and this paragraph are
9 * duplicated in all such forms AND provided that this software or
10 * any derived work is only used as part of the PPP daemon (pppd)
11 * and related utilities.
12 * The name of the author may not be used to endorse or promote products
13 * derived from this software without specific prior written permission.
14 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
16 * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
17 *
18 * Note: this software is also available under the Gnu Public License
19 * version 2 or later.
20 */
21
22 #include <sys/cdefs.h>
23 #ifndef lint
24 __RCSID("NetBSD: tdb.c,v 1.7 2013/10/19 17:16:38 christos Exp ");
25 #endif
26
27 #include <stdlib.h>
28 #include <stdio.h>
29 #include <fcntl.h>
30 #include <unistd.h>
31 #include <string.h>
32 #include <errno.h>
33 #include <sys/mman.h>
34 #include <sys/stat.h>
35 #include "tdb.h"
36
37 #define TDB_VERSION (0x26011967 + 1)
38 #define TDB_MAGIC (0x26011999U)
39 #define TDB_FREE_MAGIC (~TDB_MAGIC)
40 #define TDB_ALIGN 4
41 #define MIN_REC_SIZE (2*sizeof(struct list_struct) + TDB_ALIGN)
42 #define DEFAULT_HASH_SIZE 128
43 #define TDB_PAGE_SIZE 0x2000
44 #define TDB_LEN_MULTIPLIER 10
45 #define FREELIST_TOP (sizeof(struct tdb_header))
46
47 #define LOCK_SET 1
48 #define LOCK_CLEAR 0
49
50 /* lock offsets */
51 #define GLOBAL_LOCK 0
52 #define ACTIVE_LOCK 4
53 #define LIST_LOCK_BASE 1024
54
55 #define BUCKET(hash) ((hash) % tdb->header.hash_size)
56
57 #ifndef MAP_FILE
58 #define MAP_FILE 0
59 #endif
60
61 /* the body of the database is made of one list_struct for the free space
62 plus a separate data list for each hash value */
63 struct list_struct {
64 tdb_len rec_len; /* total byte length of record */
65 tdb_off next; /* offset of the next record in the list */
66 tdb_len key_len; /* byte length of key */
67 tdb_len data_len; /* byte length of data */
68 unsigned full_hash; /* the full 32 bit hash of the key */
69 unsigned magic; /* try to catch errors */
70 /*
71 the following union is implied
72 union {
73 char record[rec_len];
74 struct {
75 char key[key_len];
76 char data[data_len];
77 }
78 }
79 */
80 };
81
82 /* a null data record - useful for error returns */
83 static TDB_DATA null_data;
84
85 static int tdb_update __P((TDB_CONTEXT *, TDB_DATA, TDB_DATA));
86 #ifdef notyet
87 static int tdb_lockchain __P((TDB_CONTEXT *, TDB_DATA));
88 static int tdb_unlockchain __P((TDB_CONTEXT *, TDB_DATA));
89 #endif
90
91 /* a byte range locking function - return 0 on success
92 this functions locks/unlocks 1 byte at the specified offset */
tdb_brlock(TDB_CONTEXT * tdb,tdb_off offset,int set,int rw_type,int lck_type)93 static int tdb_brlock(TDB_CONTEXT *tdb, tdb_off offset,
94 int set, int rw_type, int lck_type)
95 {
96 #if NOLOCK
97 return 0;
98 #else
99 struct flock fl;
100
101 if (tdb->fd == -1) return 0; /* for in memory tdb */
102
103 if (tdb->read_only) return -1;
104
105 fl.l_type = set==LOCK_SET?rw_type:F_UNLCK;
106 fl.l_whence = SEEK_SET;
107 fl.l_start = offset;
108 fl.l_len = 1;
109 fl.l_pid = 0;
110
111 if (fcntl(tdb->fd, lck_type, &fl) != 0) {
112 #if TDB_DEBUG
113 if (lck_type == F_SETLKW) {
114 printf("lock %d failed at %d (%s)\n",
115 set, offset, strerror(errno));
116 }
117 #endif
118 tdb->ecode = TDB_ERR_LOCK;
119 return -1;
120 }
121 return 0;
122 #endif
123 }
124
125 /* lock a list in the database. list -1 is the alloc list */
tdb_lock(TDB_CONTEXT * tdb,int list)126 static int tdb_lock(TDB_CONTEXT *tdb, int list)
127 {
128 if (list < -1 || list >= (int)tdb->header.hash_size) {
129 #if TDB_DEBUG
130 printf("bad list %d\n", list);
131 #endif
132 return -1;
133 }
134 if (tdb->locked[list+1] == 0) {
135 if (tdb_brlock(tdb, LIST_LOCK_BASE + 4*list, LOCK_SET,
136 F_WRLCK, F_SETLKW) != 0) {
137 return -1;
138 }
139 }
140 tdb->locked[list+1]++;
141 return 0;
142 }
143
144 /* unlock the database. */
tdb_unlock(TDB_CONTEXT * tdb,int list)145 static int tdb_unlock(TDB_CONTEXT *tdb, int list)
146 {
147 if (list < -1 || list >= (int)tdb->header.hash_size) {
148 #if TDB_DEBUG
149 printf("bad unlock list %d\n", list);
150 #endif
151 return -1;
152 }
153
154 if (tdb->locked[list+1] == 0) {
155 #if TDB_DEBUG
156 printf("not locked %d\n", list);
157 #endif
158 tdb->ecode = TDB_ERR_LOCK;
159 return -1;
160 }
161 if (tdb->locked[list+1] == 1) {
162 if (tdb_brlock(tdb, LIST_LOCK_BASE + 4*list, LOCK_CLEAR,
163 F_WRLCK, F_SETLKW) != 0) {
164 return -1;
165 }
166 }
167 tdb->locked[list+1]--;
168 return 0;
169 }
170
171 /* the hash algorithm - turn a key into an integer
172 This is based on the hash agorithm from gdbm */
tdb_hash(TDB_DATA * key)173 static unsigned tdb_hash(TDB_DATA *key)
174 {
175 unsigned value; /* Used to compute the hash value. */
176 unsigned i; /* Used to cycle through random values. */
177
178 /* Set the initial value from the key size. */
179 value = 0x238F13AF * key->dsize;
180 for (i=0; i < key->dsize; i++) {
181 value = (value + (key->dptr[i] << (i*5 % 24)));
182 }
183
184 value = (1103515243 * value + 12345);
185
186 return value;
187 }
188
189 /* find the top of the hash chain for an open database */
tdb_hash_top(TDB_CONTEXT * tdb,unsigned hash)190 static tdb_off tdb_hash_top(TDB_CONTEXT *tdb, unsigned hash)
191 {
192 tdb_off ret;
193 hash = BUCKET(hash);
194 ret = FREELIST_TOP + (hash+1)*sizeof(tdb_off);
195 return ret;
196 }
197
198
199 /* check for an out of bounds access - if it is out of bounds then
200 see if the database has been expanded by someone else and expand
201 if necessary */
tdb_oob(TDB_CONTEXT * tdb,tdb_off offset)202 static int tdb_oob(TDB_CONTEXT *tdb, tdb_off offset)
203 {
204 struct stat st;
205 if ((offset <= tdb->map_size) || (tdb->fd == -1)) return 0;
206
207 fstat(tdb->fd, &st);
208 if (st.st_size <= (ssize_t)offset) {
209 tdb->ecode = TDB_ERR_IO;
210 return -1;
211 }
212
213 #if HAVE_MMAP
214 if (tdb->map_ptr) {
215 munmap(tdb->map_ptr, tdb->map_size);
216 tdb->map_ptr = NULL;
217 }
218 #endif
219
220 tdb->map_size = st.st_size;
221 #if HAVE_MMAP
222 tdb->map_ptr = (void *)mmap(NULL, tdb->map_size,
223 tdb->read_only?PROT_READ:PROT_READ|PROT_WRITE,
224 MAP_SHARED | MAP_FILE, tdb->fd, 0);
225 if (tdb->map_ptr == MAP_FAILED)
226 tdb->map_ptr = NULL;
227 #endif
228 return 0;
229 }
230
231
232 /* write a lump of data at a specified offset */
tdb_write(TDB_CONTEXT * tdb,tdb_off offset,const char * buf,tdb_len len)233 static int tdb_write(TDB_CONTEXT *tdb, tdb_off offset, const char *buf, tdb_len len)
234 {
235 if (tdb_oob(tdb, offset + len) != 0) {
236 /* oops - trying to write beyond the end of the database! */
237 return -1;
238 }
239
240 if (tdb->map_ptr) {
241 memcpy(offset + (char *)tdb->map_ptr, buf, len);
242 } else {
243 if (lseek(tdb->fd, offset, SEEK_SET) != offset ||
244 write(tdb->fd, buf, len) != (ssize_t)len) {
245 tdb->ecode = TDB_ERR_IO;
246 return -1;
247 }
248 }
249 return 0;
250 }
251
252 /* read a lump of data at a specified offset */
tdb_read(TDB_CONTEXT * tdb,tdb_off offset,char * buf,tdb_len len)253 static int tdb_read(TDB_CONTEXT *tdb, tdb_off offset, char *buf, tdb_len len)
254 {
255 if (tdb_oob(tdb, offset + len) != 0) {
256 /* oops - trying to read beyond the end of the database! */
257 return -1;
258 }
259
260 if (tdb->map_ptr) {
261 memcpy(buf, offset + (char *)tdb->map_ptr, len);
262 } else {
263 if (lseek(tdb->fd, offset, SEEK_SET) != offset ||
264 read(tdb->fd, buf, len) != (ssize_t)len) {
265 tdb->ecode = TDB_ERR_IO;
266 return -1;
267 }
268 }
269 return 0;
270 }
271
272
273 /* read a lump of data, allocating the space for it */
tdb_alloc_read(TDB_CONTEXT * tdb,tdb_off offset,tdb_len len)274 static char *tdb_alloc_read(TDB_CONTEXT *tdb, tdb_off offset, tdb_len len)
275 {
276 char *buf;
277
278 buf = (char *)malloc(len);
279
280 if (!buf) {
281 tdb->ecode = TDB_ERR_OOM;
282 return NULL;
283 }
284
285 if (tdb_read(tdb, offset, buf, len) == -1) {
286 free(buf);
287 return NULL;
288 }
289
290 return buf;
291 }
292
293 /* convenience routine for writing a record */
rec_write(TDB_CONTEXT * tdb,tdb_off offset,struct list_struct * rec)294 static int rec_write(TDB_CONTEXT *tdb, tdb_off offset, struct list_struct *rec)
295 {
296 return tdb_write(tdb, offset, (char *)rec, sizeof(*rec));
297 }
298
299 /* convenience routine for writing a tdb_off */
ofs_write(TDB_CONTEXT * tdb,tdb_off offset,tdb_off * d)300 static int ofs_write(TDB_CONTEXT *tdb, tdb_off offset, tdb_off *d)
301 {
302 return tdb_write(tdb, offset, (char *)d, sizeof(*d));
303 }
304
305 /* read a tdb_off from the store */
ofs_read(TDB_CONTEXT * tdb,tdb_off offset,tdb_off * d)306 static int ofs_read(TDB_CONTEXT *tdb, tdb_off offset, tdb_off *d)
307 {
308 return tdb_read(tdb, offset, (char *)d, sizeof(*d));
309 }
310
311 /* read a record and check for simple errors */
rec_read(TDB_CONTEXT * tdb,tdb_off offset,struct list_struct * rec)312 static int rec_read(TDB_CONTEXT *tdb, tdb_off offset, struct list_struct *rec)
313 {
314 if (tdb_read(tdb, offset, (char *)rec, sizeof(*rec)) == -1) return -1;
315 if (rec->magic != TDB_MAGIC) {
316 #if TDB_DEBUG
317 printf("bad magic 0x%08x at offset %d\n",
318 rec->magic, offset);
319 #endif
320 tdb->ecode = TDB_ERR_CORRUPT;
321 return -1;
322 }
323 if (tdb_oob(tdb, rec->next) != 0) {
324 return -1;
325 }
326 return 0;
327 }
328
329 /* expand the database at least length bytes by expanding the
330 underlying file and doing the mmap again if necessary */
tdb_expand(TDB_CONTEXT * tdb,tdb_off length)331 static int tdb_expand(TDB_CONTEXT *tdb, tdb_off length)
332 {
333 struct list_struct rec;
334 tdb_off offset, ptr;
335 char b = 0;
336
337 tdb_lock(tdb,-1);
338
339 /* make sure we know about any previous expansions by another
340 process */
341 tdb_oob(tdb,tdb->map_size + 1);
342
343 /* always make room for at least 10 more records */
344 length *= TDB_LEN_MULTIPLIER;
345
346 /* and round the database up to a multiple of TDB_PAGE_SIZE */
347 length = ((tdb->map_size + length + TDB_PAGE_SIZE) & ~(TDB_PAGE_SIZE - 1)) - tdb->map_size;
348
349 /* expand the file itself */
350 if (tdb->fd != -1) {
351 lseek(tdb->fd, tdb->map_size + length - 1, SEEK_SET);
352 if (write(tdb->fd, &b, 1) != 1) goto fail;
353 }
354
355 /* form a new freelist record */
356 offset = FREELIST_TOP;
357 rec.rec_len = length - sizeof(rec);
358 rec.magic = TDB_FREE_MAGIC;
359 if (ofs_read(tdb, offset, &rec.next) == -1) {
360 goto fail;
361 }
362
363 #if HAVE_MMAP
364 if (tdb->fd != -1 && tdb->map_ptr) {
365 munmap(tdb->map_ptr, tdb->map_size);
366 tdb->map_ptr = NULL;
367 }
368 #endif
369
370 tdb->map_size += length;
371
372 if (tdb->fd == -1) {
373 void *n;
374 n = realloc(tdb->map_ptr, tdb->map_size);
375 if (!n)
376 goto fail;
377 tdb->map_ptr = n;
378 }
379
380 /* write it out */
381 if (rec_write(tdb, tdb->map_size - length, &rec) == -1) {
382 goto fail;
383 }
384
385 /* link it into the free list */
386 ptr = tdb->map_size - length;
387 if (ofs_write(tdb, offset, &ptr) == -1) goto fail;
388
389 #if HAVE_MMAP
390 if (tdb->fd != -1) {
391 tdb->map_ptr = (void *)mmap(NULL, tdb->map_size,
392 PROT_READ|PROT_WRITE,
393 MAP_SHARED | MAP_FILE, tdb->fd, 0);
394 if (tdb->map_ptr == MAP_FAILED)
395 tdb->map_ptr = NULL;
396 }
397 #endif
398
399 tdb_unlock(tdb, -1);
400 return 0;
401
402 fail:
403 tdb_unlock(tdb,-1);
404 return -1;
405 }
406
407 /* allocate some space from the free list. The offset returned points
408 to a unconnected list_struct within the database with room for at
409 least length bytes of total data
410
411 0 is returned if the space could not be allocated
412 */
tdb_allocate(TDB_CONTEXT * tdb,tdb_len length)413 static tdb_off tdb_allocate(TDB_CONTEXT *tdb, tdb_len length)
414 {
415 tdb_off offset, rec_ptr, last_ptr;
416 struct list_struct rec, lastrec, newrec;
417
418 tdb_lock(tdb, -1);
419
420 again:
421 last_ptr = 0;
422 offset = FREELIST_TOP;
423
424 /* read in the freelist top */
425 if (ofs_read(tdb, offset, &rec_ptr) == -1) {
426 goto fail;
427 }
428
429 /* keep looking until we find a freelist record that is big
430 enough */
431 while (rec_ptr) {
432 if (tdb_read(tdb, rec_ptr, (char *)&rec, sizeof(rec)) == -1) {
433 goto fail;
434 }
435
436 if (rec.magic != TDB_FREE_MAGIC) {
437 #if TDB_DEBUG
438 printf("bad magic 0x%08x in free list\n", rec.magic);
439 #endif
440 goto fail;
441 }
442
443 if (rec.rec_len >= length) {
444 /* found it - now possibly split it up */
445 if (rec.rec_len > length + MIN_REC_SIZE) {
446 length = (length + TDB_ALIGN) & ~(TDB_ALIGN-1);
447
448 newrec.rec_len = rec.rec_len - (sizeof(rec) + length);
449 newrec.next = rec.next;
450 newrec.magic = TDB_FREE_MAGIC;
451
452 rec.rec_len = length;
453 rec.next = rec_ptr + sizeof(rec) + rec.rec_len;
454
455 if (rec_write(tdb, rec.next, &newrec) == -1) {
456 goto fail;
457 }
458
459 if (rec_write(tdb, rec_ptr, &rec) == -1) {
460 goto fail;
461 }
462 }
463
464 /* remove it from the list */
465 if (last_ptr == 0) {
466 offset = FREELIST_TOP;
467
468 if (ofs_write(tdb, offset, &rec.next) == -1) {
469 goto fail;
470 }
471 } else {
472 lastrec.next = rec.next;
473 if (rec_write(tdb, last_ptr, &lastrec) == -1) {
474 goto fail;
475 }
476 }
477
478 /* all done - return the new record offset */
479 tdb_unlock(tdb, -1);
480 return rec_ptr;
481 }
482
483 /* move to the next record */
484 lastrec = rec;
485 last_ptr = rec_ptr;
486 rec_ptr = rec.next;
487 }
488
489 /* we didn't find enough space. See if we can expand the
490 database and if we can then try again */
491 if (tdb_expand(tdb, length + sizeof(rec)) == 0) goto again;
492
493 fail:
494 #if TDB_DEBUG
495 printf("tdb_allocate failed for size %u\n", length);
496 #endif
497 tdb_unlock(tdb, -1);
498 return 0;
499 }
500
501 /* initialise a new database with a specified hash size */
tdb_new_database(TDB_CONTEXT * tdb,int hash_size)502 static int tdb_new_database(TDB_CONTEXT *tdb, int hash_size)
503 {
504 struct tdb_header header;
505 int i, size = 0;
506 tdb_off buf[16];
507
508 /* create the header */
509 memset(&header, 0, sizeof(header));
510 memcpy(header.magic_food, TDB_MAGIC_FOOD, strlen(TDB_MAGIC_FOOD)+1);
511 header.version = TDB_VERSION;
512 header.hash_size = hash_size;
513 lseek(tdb->fd, 0, SEEK_SET);
514 ftruncate(tdb->fd, 0);
515
516 if (tdb->fd != -1 && write(tdb->fd, &header, sizeof(header)) !=
517 sizeof(header)) {
518 tdb->ecode = TDB_ERR_IO;
519 return -1;
520 } else size += sizeof(header);
521
522 /* the freelist and hash pointers */
523 memset(buf, 0, sizeof(buf));
524
525 for (i=0;(hash_size+1)-i >= 16; i += 16) {
526 if (tdb->fd != -1 && write(tdb->fd, buf, sizeof(buf)) !=
527 sizeof(buf)) {
528 tdb->ecode = TDB_ERR_IO;
529 return -1;
530 } else size += sizeof(buf);
531 }
532
533 for (;i<hash_size+1; i++) {
534 if (tdb->fd != -1 && write(tdb->fd, buf, sizeof(tdb_off)) !=
535 sizeof(tdb_off)) {
536 tdb->ecode = TDB_ERR_IO;
537 return -1;
538 } else size += sizeof(tdb_off);
539 }
540
541 if (tdb->fd == -1) {
542 tdb->map_ptr = calloc(size, 1);
543 tdb->map_size = size;
544 if (tdb->map_ptr == NULL) {
545 tdb->ecode = TDB_ERR_IO;
546 return -1;
547 }
548 memcpy(&tdb->header, &header, sizeof(header));
549 }
550
551 #if TDB_DEBUG
552 printf("initialised database of hash_size %u\n",
553 hash_size);
554 #endif
555 return 0;
556 }
557
558 /* Returns 0 on fail. On success, return offset of record, and fills
559 in rec */
tdb_find(TDB_CONTEXT * tdb,TDB_DATA key,unsigned int hash,struct list_struct * rec)560 static tdb_off tdb_find(TDB_CONTEXT *tdb, TDB_DATA key, unsigned int hash,
561 struct list_struct *rec)
562 {
563 tdb_off offset, rec_ptr;
564
565 /* find the top of the hash chain */
566 offset = tdb_hash_top(tdb, hash);
567
568 /* read in the hash top */
569 if (ofs_read(tdb, offset, &rec_ptr) == -1)
570 return 0;
571
572 /* keep looking until we find the right record */
573 while (rec_ptr) {
574 if (rec_read(tdb, rec_ptr, rec) == -1)
575 return 0;
576
577 if (hash == rec->full_hash && key.dsize == rec->key_len) {
578 char *k;
579 /* a very likely hit - read the key */
580 k = tdb_alloc_read(tdb, rec_ptr + sizeof(*rec),
581 rec->key_len);
582
583 if (!k)
584 return 0;
585
586 if (memcmp(key.dptr, k, key.dsize) == 0) {
587 free(k);
588 return rec_ptr;
589 }
590 free(k);
591 }
592
593 /* move to the next record */
594 rec_ptr = rec->next;
595 }
596 return 0;
597 }
598
599 /*
600 return an error string for the last tdb error
601 */
tdb_errorstr(TDB_CONTEXT * tdb)602 char *tdb_errorstr(TDB_CONTEXT *tdb)
603 {
604 int i;
605 static struct {
606 enum TDB_ERROR ecode;
607 char *estring;
608 } emap[] = {
609 {TDB_SUCCESS, "Success"},
610 {TDB_ERR_CORRUPT, "Corrupt database"},
611 {TDB_ERR_IO, "IO Error"},
612 {TDB_ERR_LOCK, "Locking error"},
613 {TDB_ERR_OOM, "Out of memory"},
614 {TDB_ERR_EXISTS, "Record exists"},
615 {-1, NULL}};
616 if (tdb != NULL) {
617 for (i=0;emap[i].estring;i++) {
618 if (tdb->ecode == emap[i].ecode) return emap[i].estring;
619 }
620 } else {
621 return "Invalid tdb context";
622 }
623 return "Invalid error code";
624 }
625
626
627 /* update an entry in place - this only works if the new data size
628 is <= the old data size and the key exists.
629 on failure return -1
630 */
tdb_update(TDB_CONTEXT * tdb,TDB_DATA key,TDB_DATA dbuf)631 static int tdb_update(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA dbuf)
632 {
633 unsigned hash;
634 struct list_struct rec;
635 tdb_off rec_ptr;
636 int ret = -1;
637
638 if (tdb == NULL) {
639 #ifdef TDB_DEBUG
640 printf("tdb_update() called with null context\n");
641 #endif
642 return -1;
643 }
644
645 /* find which hash bucket it is in */
646 hash = tdb_hash(&key);
647
648 tdb_lock(tdb, BUCKET(hash));
649 rec_ptr = tdb_find(tdb, key, hash, &rec);
650
651 if (!rec_ptr)
652 goto out;
653
654 /* must be long enough */
655 if (rec.rec_len < key.dsize + dbuf.dsize)
656 goto out;
657
658 if (tdb_write(tdb, rec_ptr + sizeof(rec) + rec.key_len,
659 dbuf.dptr, dbuf.dsize) == -1)
660 goto out;
661
662 if (dbuf.dsize != rec.data_len) {
663 /* update size */
664 rec.data_len = dbuf.dsize;
665 ret = rec_write(tdb, rec_ptr, &rec);
666 } else
667 ret = 0;
668
669 out:
670 tdb_unlock(tdb, BUCKET(hash));
671 return ret;
672 }
673
674 /* find an entry in the database given a key */
tdb_fetch(TDB_CONTEXT * tdb,TDB_DATA key)675 TDB_DATA tdb_fetch(TDB_CONTEXT *tdb, TDB_DATA key)
676 {
677 unsigned hash;
678 tdb_off rec_ptr;
679 struct list_struct rec;
680 TDB_DATA ret = null_data;
681
682 if (tdb == NULL) {
683 #ifdef TDB_DEBUG
684 printf("tdb_fetch() called with null context\n");
685 #endif
686 return null_data;
687 }
688
689 /* find which hash bucket it is in */
690 hash = tdb_hash(&key);
691
692 tdb_lock(tdb, BUCKET(hash));
693 rec_ptr = tdb_find(tdb, key, hash, &rec);
694
695 if (rec_ptr) {
696 ret.dptr = tdb_alloc_read(tdb,
697 rec_ptr + sizeof(rec) + rec.key_len,
698 rec.data_len);
699 ret.dsize = rec.data_len;
700 }
701
702 tdb_unlock(tdb, BUCKET(hash));
703 return ret;
704 }
705
706 /* check if an entry in the database exists
707
708 note that 1 is returned if the key is found and 0 is returned if not found
709 this doesn't match the conventions in the rest of this module, but is
710 compatible with gdbm
711 */
tdb_exists(TDB_CONTEXT * tdb,TDB_DATA key)712 int tdb_exists(TDB_CONTEXT *tdb, TDB_DATA key)
713 {
714 unsigned hash;
715 tdb_off rec_ptr;
716 struct list_struct rec;
717
718 if (tdb == NULL) {
719 #ifdef TDB_DEBUG
720 printf("tdb_exists() called with null context\n");
721 #endif
722 return 0;
723 }
724
725 /* find which hash bucket it is in */
726 hash = tdb_hash(&key);
727
728 tdb_lock(tdb, BUCKET(hash));
729 rec_ptr = tdb_find(tdb, key, hash, &rec);
730 tdb_unlock(tdb, BUCKET(hash));
731
732 return rec_ptr != 0;
733 }
734
735 /* traverse the entire database - calling fn(tdb, key, data) on each element.
736 return -1 on error or the record count traversed
737 if fn is NULL then it is not called
738 a non-zero return value from fn() indicates that the traversal should stop
739 */
tdb_traverse(TDB_CONTEXT * tdb,int (* fn)(TDB_CONTEXT * tdb,TDB_DATA key,TDB_DATA dbuf,void * state),void * state)740 int tdb_traverse(TDB_CONTEXT *tdb, int (*fn)(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA dbuf, void* state), void* state)
741 {
742 int count = 0;
743 unsigned h;
744 tdb_off offset, rec_ptr;
745 struct list_struct rec;
746 char *data;
747 TDB_DATA key, dbuf;
748
749 if (tdb == NULL) {
750 #ifdef TDB_DEBUG
751 printf("tdb_traverse() called with null context\n");
752 #endif
753 return -1;
754 }
755
756 /* loop over all hash chains */
757 for (h = 0; h < tdb->header.hash_size; h++) {
758 tdb_lock(tdb, BUCKET(h));
759
760 /* read in the hash top */
761 offset = tdb_hash_top(tdb, h);
762 if (ofs_read(tdb, offset, &rec_ptr) == -1) {
763 goto fail;
764 }
765
766 /* traverse all records for this hash */
767 while (rec_ptr) {
768 if (rec_read(tdb, rec_ptr, &rec) == -1) {
769 goto fail;
770 }
771
772 /* now read the full record */
773 data = tdb_alloc_read(tdb, rec_ptr + sizeof(rec),
774 rec.key_len + rec.data_len);
775 if (!data) {
776 goto fail;
777 }
778
779 key.dptr = data;
780 key.dsize = rec.key_len;
781 dbuf.dptr = data + rec.key_len;
782 dbuf.dsize = rec.data_len;
783 count++;
784
785 if (fn && fn(tdb, key, dbuf, state) != 0) {
786 /* they want us to stop traversing */
787 free(data);
788 tdb_unlock(tdb, BUCKET(h));
789 return count;
790 }
791
792 /* a miss - drat */
793 free(data);
794
795 /* move to the next record */
796 rec_ptr = rec.next;
797 }
798 tdb_unlock(tdb, BUCKET(h));
799 }
800
801 /* return the number traversed */
802 return count;
803
804 fail:
805 tdb_unlock(tdb, BUCKET(h));
806 return -1;
807 }
808
809
810 /* find the first entry in the database and return its key */
tdb_firstkey(TDB_CONTEXT * tdb)811 TDB_DATA tdb_firstkey(TDB_CONTEXT *tdb)
812 {
813 tdb_off offset, rec_ptr;
814 struct list_struct rec;
815 unsigned hash;
816 TDB_DATA ret;
817
818 if (tdb == NULL) {
819 #ifdef TDB_DEBUG
820 printf("tdb_firstkey() called with null context\n");
821 #endif
822 return null_data;
823 }
824
825 /* look for a non-empty hash chain */
826 for (hash = 0, rec_ptr = 0;
827 hash < tdb->header.hash_size;
828 hash++) {
829 /* find the top of the hash chain */
830 offset = tdb_hash_top(tdb, hash);
831
832 tdb_lock(tdb, BUCKET(hash));
833
834 /* read in the hash top */
835 if (ofs_read(tdb, offset, &rec_ptr) == -1) {
836 goto fail;
837 }
838
839 if (rec_ptr) break;
840
841 tdb_unlock(tdb, BUCKET(hash));
842 }
843
844 if (rec_ptr == 0) return null_data;
845
846 /* we've found a non-empty chain, now read the record */
847 if (rec_read(tdb, rec_ptr, &rec) == -1) {
848 goto fail;
849 }
850
851 /* allocate and read the key space */
852 ret.dptr = tdb_alloc_read(tdb, rec_ptr + sizeof(rec), rec.key_len);
853 ret.dsize = rec.key_len;
854 tdb_unlock(tdb, BUCKET(hash));
855 return ret;
856
857 fail:
858 tdb_unlock(tdb, BUCKET(hash));
859 return null_data;
860 }
861
862 /* find the next entry in the database, returning its key */
tdb_nextkey(TDB_CONTEXT * tdb,TDB_DATA key)863 TDB_DATA tdb_nextkey(TDB_CONTEXT *tdb, TDB_DATA key)
864 {
865 unsigned hash, hbucket;
866 tdb_off rec_ptr, offset;
867 struct list_struct rec;
868 TDB_DATA ret;
869
870 if (tdb == NULL) {
871 #ifdef TDB_DEBUG
872 printf("tdb_nextkey() called with null context\n");
873 #endif
874 return null_data;
875 }
876
877 /* find which hash bucket it is in */
878 hash = tdb_hash(&key);
879 hbucket = BUCKET(hash);
880
881 tdb_lock(tdb, hbucket);
882 rec_ptr = tdb_find(tdb, key, hash, &rec);
883 if (rec_ptr) {
884 /* we want the next record after this one */
885 rec_ptr = rec.next;
886 }
887
888 /* not found or last in hash: look for next non-empty hash chain */
889 while (rec_ptr == 0) {
890 tdb_unlock(tdb, hbucket);
891
892 if (++hbucket >= tdb->header.hash_size - 1)
893 return null_data;
894
895 offset = tdb_hash_top(tdb, hbucket);
896 tdb_lock(tdb, hbucket);
897 /* read in the hash top */
898 if (ofs_read(tdb, offset, &rec_ptr) == -1) {
899 tdb_unlock(tdb, hbucket);
900 return null_data;
901 }
902 }
903
904 /* Read the record. */
905 if (rec_read(tdb, rec_ptr, &rec) == -1) {
906 tdb_unlock(tdb, hbucket);
907 return null_data;
908 }
909 /* allocate and read the key */
910 ret.dptr = tdb_alloc_read(tdb, rec_ptr + sizeof(rec), rec.key_len);
911 ret.dsize = rec.key_len;
912 tdb_unlock(tdb, hbucket);
913
914 return ret;
915 }
916
917 /* delete an entry in the database given a key */
tdb_delete(TDB_CONTEXT * tdb,TDB_DATA key)918 int tdb_delete(TDB_CONTEXT *tdb, TDB_DATA key)
919 {
920 unsigned hash;
921 tdb_off offset, rec_ptr, last_ptr;
922 struct list_struct rec, lastrec;
923 char *data = NULL;
924
925 if (tdb == NULL) {
926 #ifdef TDB_DEBUG
927 printf("tdb_delete() called with null context\n");
928 #endif
929 return -1;
930 }
931
932 /* find which hash bucket it is in */
933 hash = tdb_hash(&key);
934
935 tdb_lock(tdb, BUCKET(hash));
936
937 /* find the top of the hash chain */
938 offset = tdb_hash_top(tdb, hash);
939
940 /* read in the hash top */
941 if (ofs_read(tdb, offset, &rec_ptr) == -1) {
942 goto fail;
943 }
944
945 last_ptr = 0;
946
947 /* keep looking until we find the right record */
948 while (rec_ptr) {
949 if (rec_read(tdb, rec_ptr, &rec) == -1) {
950 goto fail;
951 }
952
953 if (hash == rec.full_hash && key.dsize == rec.key_len) {
954 /* a very likely hit - read the record and full key */
955 data = tdb_alloc_read(tdb, rec_ptr + sizeof(rec),
956 rec.key_len);
957 if (!data) {
958 goto fail;
959 }
960
961 if (memcmp(key.dptr, data, key.dsize) == 0) {
962 /* a definite match - delete it */
963 if (last_ptr == 0) {
964 offset = tdb_hash_top(tdb, hash);
965 if (ofs_write(tdb, offset, &rec.next) == -1) {
966 goto fail;
967 }
968 } else {
969 lastrec.next = rec.next;
970 if (rec_write(tdb, last_ptr, &lastrec) == -1) {
971 goto fail;
972 }
973 }
974 tdb_unlock(tdb, BUCKET(hash));
975 tdb_lock(tdb, -1);
976 /* and recover the space */
977 offset = FREELIST_TOP;
978 if (ofs_read(tdb, offset, &rec.next) == -1) {
979 goto fail2;
980 }
981 rec.magic = TDB_FREE_MAGIC;
982 if (rec_write(tdb, rec_ptr, &rec) == -1) {
983 goto fail2;
984 }
985 if (ofs_write(tdb, offset, &rec_ptr) == -1) {
986 goto fail2;
987 }
988
989 /* yipee - all done */
990 free(data);
991 tdb_unlock(tdb, -1);
992 return 0;
993 }
994
995 /* a miss - drat */
996 free(data);
997 data = NULL;
998 }
999
1000 /* move to the next record */
1001 last_ptr = rec_ptr;
1002 lastrec = rec;
1003 rec_ptr = rec.next;
1004 }
1005
1006 fail:
1007 if (data) free(data);
1008 tdb_unlock(tdb, BUCKET(hash));
1009 return -1;
1010
1011 fail2:
1012 if (data) free(data);
1013 tdb_unlock(tdb, -1);
1014 return -1;
1015 }
1016
1017
1018 /* store an element in the database, replacing any existing element
1019 with the same key
1020
1021 return 0 on success, -1 on failure
1022 */
tdb_store(TDB_CONTEXT * tdb,TDB_DATA key,TDB_DATA dbuf,int flag)1023 int tdb_store(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA dbuf, int flag)
1024 {
1025 struct list_struct rec;
1026 unsigned hash;
1027 tdb_off rec_ptr, offset;
1028 char *p = NULL;
1029
1030 if (tdb == NULL) {
1031 #ifdef TDB_DEBUG
1032 printf("tdb_store() called with null context\n");
1033 #endif
1034 return -1;
1035 }
1036
1037 /* find which hash bucket it is in */
1038 hash = tdb_hash(&key);
1039
1040 /* check for it existing */
1041 if (flag == TDB_INSERT && tdb_exists(tdb, key)) {
1042 tdb->ecode = TDB_ERR_EXISTS;
1043 return -1;
1044 }
1045
1046 /* first try in-place update */
1047 if (flag != TDB_INSERT && tdb_update(tdb, key, dbuf) == 0) {
1048 return 0;
1049 }
1050
1051 rec_ptr = tdb_allocate(tdb, key.dsize + dbuf.dsize);
1052 if (rec_ptr == 0) {
1053 return -1;
1054 }
1055
1056 tdb_lock(tdb, BUCKET(hash));
1057
1058 /* delete any existing record - if it doesn't exist we don't care */
1059 if (flag != TDB_INSERT) {
1060 tdb_delete(tdb, key);
1061 }
1062
1063 /* read the newly created record */
1064 if (tdb_read(tdb, rec_ptr, (char *)&rec, sizeof(rec)) == -1) {
1065 goto fail;
1066 }
1067
1068 if (rec.magic != TDB_FREE_MAGIC) goto fail;
1069
1070 /* find the top of the hash chain */
1071 offset = tdb_hash_top(tdb, hash);
1072
1073 /* read in the hash top diretcly into our next pointer */
1074 if (ofs_read(tdb, offset, &rec.next) == -1) {
1075 goto fail;
1076 }
1077
1078 rec.key_len = key.dsize;
1079 rec.data_len = dbuf.dsize;
1080 rec.full_hash = hash;
1081 rec.magic = TDB_MAGIC;
1082
1083 p = (char *)malloc(sizeof(rec) + key.dsize + dbuf.dsize);
1084 if (!p) {
1085 tdb->ecode = TDB_ERR_OOM;
1086 goto fail;
1087 }
1088
1089 memcpy(p, &rec, sizeof(rec));
1090 memcpy(p+sizeof(rec), key.dptr, key.dsize);
1091 memcpy(p+sizeof(rec)+key.dsize, dbuf.dptr, dbuf.dsize);
1092
1093 if (tdb_write(tdb, rec_ptr, p, sizeof(rec)+key.dsize+dbuf.dsize) == -1)
1094 goto fail;
1095
1096 free(p);
1097 p = NULL;
1098
1099 /* and point the top of the hash chain at it */
1100 if (ofs_write(tdb, offset, &rec_ptr) == -1) goto fail;
1101
1102 tdb_unlock(tdb, BUCKET(hash));
1103 return 0;
1104
1105 fail:
1106 #if TDB_DEBUG
1107 printf("store failed for hash 0x%08x in bucket %u\n", hash, BUCKET(hash));
1108 #endif
1109 if (p) free(p);
1110 tdb_unlock(tdb, BUCKET(hash));
1111 return -1;
1112 }
1113
1114
1115 /* open the database, creating it if necessary
1116
1117 The open_flags and mode are passed straight to the open call on the database
1118 file. A flags value of O_WRONLY is invalid
1119
1120 The hash size is advisory, use zero for a default value.
1121
1122 return is NULL on error
1123 */
tdb_open(char * name,int hash_size,int tdb_flags,int open_flags,mode_t mode)1124 TDB_CONTEXT *tdb_open(char *name, int hash_size, int tdb_flags,
1125 int open_flags, mode_t mode)
1126 {
1127 TDB_CONTEXT tdb, *ret;
1128 struct stat st;
1129
1130 memset(&tdb, 0, sizeof(tdb));
1131
1132 tdb.fd = -1;
1133 tdb.name = NULL;
1134 tdb.map_ptr = NULL;
1135
1136 if ((open_flags & O_ACCMODE) == O_WRONLY) {
1137 goto fail;
1138 }
1139
1140 if (hash_size == 0) hash_size = DEFAULT_HASH_SIZE;
1141
1142 tdb.read_only = ((open_flags & O_ACCMODE) == O_RDONLY);
1143
1144 if (name != NULL) {
1145 tdb.fd = open(name, open_flags, mode);
1146 if (tdb.fd == -1) {
1147 goto fail;
1148 }
1149 }
1150
1151 /* ensure there is only one process initialising at once */
1152 tdb_brlock(&tdb, GLOBAL_LOCK, LOCK_SET, F_WRLCK, F_SETLKW);
1153
1154 if (tdb_flags & TDB_CLEAR_IF_FIRST) {
1155 /* we need to zero the database if we are the only
1156 one with it open */
1157 if (tdb_brlock(&tdb, ACTIVE_LOCK, LOCK_SET, F_WRLCK, F_SETLK) == 0) {
1158 ftruncate(tdb.fd, 0);
1159 tdb_brlock(&tdb, ACTIVE_LOCK, LOCK_CLEAR, F_WRLCK, F_SETLK);
1160 }
1161 }
1162
1163 /* leave this lock in place */
1164 tdb_brlock(&tdb, ACTIVE_LOCK, LOCK_SET, F_RDLCK, F_SETLKW);
1165
1166 if (read(tdb.fd, &tdb.header, sizeof(tdb.header)) != sizeof(tdb.header) ||
1167 strcmp(tdb.header.magic_food, TDB_MAGIC_FOOD) != 0 ||
1168 tdb.header.version != TDB_VERSION) {
1169 /* its not a valid database - possibly initialise it */
1170 if (!(open_flags & O_CREAT)) {
1171 goto fail;
1172 }
1173 if (tdb_new_database(&tdb, hash_size) == -1) goto fail;
1174
1175 lseek(tdb.fd, 0, SEEK_SET);
1176 if (tdb.fd != -1 && read(tdb.fd, &tdb.header,
1177 sizeof(tdb.header)) !=
1178 sizeof(tdb.header))
1179 goto fail;
1180 }
1181
1182 if (tdb.fd != -1) {
1183 fstat(tdb.fd, &st);
1184
1185 /* map the database and fill in the return structure */
1186 tdb.name = name ? strdup(name) : NULL;
1187 tdb.map_size = st.st_size;
1188 }
1189
1190 tdb.locked = (int *)calloc(tdb.header.hash_size+1,
1191 sizeof(tdb.locked[0]));
1192 if (!tdb.locked) {
1193 goto fail;
1194 }
1195
1196 #if HAVE_MMAP
1197 if (tdb.fd != -1) {
1198 tdb.map_ptr = (void *)mmap(NULL, st.st_size,
1199 tdb.read_only? PROT_READ : PROT_READ|PROT_WRITE,
1200 MAP_SHARED | MAP_FILE, tdb.fd, 0);
1201 if (tdb->map_ptr == MAP_FAILED)
1202 tdb->map_ptr = NULL;
1203 }
1204 #endif
1205
1206 ret = (TDB_CONTEXT *)malloc(sizeof(tdb));
1207 if (!ret) goto fail;
1208
1209 *ret = tdb;
1210
1211 #if TDB_DEBUG
1212 printf("mapped database of hash_size %u map_size=%u\n",
1213 hash_size, tdb.map_size);
1214 #endif
1215
1216 tdb_brlock(&tdb, GLOBAL_LOCK, LOCK_CLEAR, F_WRLCK, F_SETLKW);
1217 return ret;
1218
1219 fail:
1220 if (tdb.name) free(tdb.name);
1221 if (tdb.fd != -1) close(tdb.fd);
1222 if (tdb.map_ptr) munmap(tdb.map_ptr, tdb.map_size);
1223
1224 return NULL;
1225 }
1226
1227 /* close a database */
tdb_close(TDB_CONTEXT * tdb)1228 int tdb_close(TDB_CONTEXT *tdb)
1229 {
1230 if (!tdb) return -1;
1231
1232 if (tdb->name) free(tdb->name);
1233 if (tdb->fd != -1) close(tdb->fd);
1234 if (tdb->locked) free(tdb->locked);
1235
1236 if (tdb->map_ptr) {
1237 if (tdb->fd != -1) {
1238 munmap(tdb->map_ptr, tdb->map_size);
1239 } else {
1240 free(tdb->map_ptr);
1241 }
1242 }
1243
1244 memset(tdb, 0, sizeof(*tdb));
1245 free(tdb);
1246
1247 return 0;
1248 }
1249
1250 /* lock the database. If we already have it locked then don't do anything */
tdb_writelock(TDB_CONTEXT * tdb)1251 int tdb_writelock(TDB_CONTEXT *tdb)
1252 {
1253 if (tdb == NULL) {
1254 #ifdef TDB_DEBUG
1255 printf("tdb_writelock() called with null context\n");
1256 #endif
1257 return -1;
1258 }
1259
1260 return tdb_lock(tdb, -1);
1261 }
1262
1263 /* unlock the database. */
tdb_writeunlock(TDB_CONTEXT * tdb)1264 int tdb_writeunlock(TDB_CONTEXT *tdb)
1265 {
1266 if (tdb == NULL) {
1267 #ifdef TDB_DEBUG
1268 printf("tdb_writeunlock() called with null context\n");
1269 #endif
1270 return -1;
1271 }
1272
1273 return tdb_unlock(tdb, -1);
1274 }
1275
tdb_chainlock(TDB_CONTEXT * tdb,TDB_DATA key)1276 int tdb_chainlock(TDB_CONTEXT *tdb, TDB_DATA key)
1277 {
1278 if (tdb == NULL) {
1279 #ifdef TDB_DEBUG
1280 printf("tdb_lockchain() called with null context\n");
1281 #endif
1282 return -1;
1283 }
1284
1285 return tdb_lock(tdb, BUCKET(tdb_hash(&key)));
1286 }
1287
1288
tdb_chainunlock(TDB_CONTEXT * tdb,TDB_DATA key)1289 int tdb_chainunlock(TDB_CONTEXT *tdb, TDB_DATA key)
1290 {
1291 if (tdb == NULL) {
1292 #ifdef TDB_DEBUG
1293 printf("tdb_unlockchain() called with null context\n");
1294 #endif
1295 return -1;
1296 }
1297
1298 return tdb_unlock(tdb, BUCKET(tdb_hash(&key)));
1299 }
1300