1 /*
2  * Binary tree database backend.
3  *
4  * Copyright 2007 Andrew Wood, distributed under the Artistic License.
5  */
6 
7 #include "config.h"
8 #include "database.h"
9 #include "log.h"
10 
11 #ifdef USING_BTREE
12 
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <string.h>
16 #include <errno.h>
17 #include <signal.h>
18 #include <unistd.h>
19 #include <sys/types.h>
20 #include <sys/stat.h>
21 #ifdef HAVE_FCNTL
22 #include <fcntl.h>
23 #endif
24 #ifdef HAVE_MMAP
25 #include <sys/mman.h>
26 #endif
27 #ifdef HAVE_UTIME
28 #include <time.h>
29 #include <utime.h>
30 #endif
31 
32 #define BT_RECORD_SIZE sizeof(struct bt_record)
33 #define BT_TOKEN_MAX   36
34 
35 typedef unsigned long bt_ul;
36 
37 struct bt_record {			 /* single binary tree record */
38 	bt_ul lower;			 /* offset of "lower" token */
39 	bt_ul higher;			 /* offset of "higher" token */
40 	unsigned char token[BT_TOKEN_MAX];	/* RATS: ignore - 0-term. token */
41 	long data[3];			 /* token data (i.e. the counts) */
42 };
43 
44 typedef struct bt_record *bt_record_t;
45 
46 struct qdbint_s {			 /* database state */
47 	int fd;				 /* file descriptor of database file */
48 	bt_ul size;			 /* total size of database */
49 #ifdef HAVE_FCNTL
50 	int lockcount;			 /* number of times lock asked for */
51 	int locktype;			 /* type of lock to use (read or write) */
52 #endif
53 	int gotheadoffs;		 /* flag, set once head offset read */
54 	bt_ul head_offset;		 /* offset of head record of tree */
55 #ifdef HAVE_MMAP
56 	unsigned char *data;		 /* mmap()ed database data */
57 	bt_ul filepos;			 /* current file pointer */
58 #endif
59 	char *filename;			 /* filename of database */
60 	int modified;			 /* flag, set if db modified at all */
61 };
62 
63 static char *bt_lasterror = "";
64 
65 
66 #ifdef HAVE_FCNTL
67 /*
68  * Obtain / release a read or write lock on the database. Returns nonzero on
69  * error, and blocks until a lock can be obtained.
70  */
dbbt_lock(qdbint_t db,int lock_type)71 static int dbbt_lock(qdbint_t db, int lock_type)
72 {
73 	int ret;
74 	ret = qdb_int__lock(db->fd, lock_type, &(db->lockcount));
75 	if (ret != 0) {
76 		bt_lasterror = strerror(errno);
77 		return 1;
78 	}
79 	return 0;
80 }
81 #endif				/* HAVE_FCNTL */
82 
83 
dbbt_lseek(qdbint_t db,off_t offset,int whence)84 static off_t dbbt_lseek(qdbint_t db, off_t offset, int whence)
85 {
86 #ifdef HAVE_MMAP
87 	if ((db->data == 0) || (db->filepos < 0) || (whence != SEEK_SET)) {
88 		db->filepos = lseek(db->fd, offset, whence);
89 	} else {
90 		db->filepos = offset;
91 	}
92 	return db->filepos;
93 #else
94 	return lseek(db->fd, offset, whence);
95 #endif
96 }
97 
98 
99 /*
100  * Analogue of fread().
101  */
dbbt_chunkread_raw(void * ptr,int size,int nmemb,int fd)102 static int dbbt_chunkread_raw(void *ptr, int size, int nmemb, int fd)
103 {
104 	int numread, togo, got;
105 
106 	for (numread = 0; nmemb > 0; nmemb--, numread++) {
107 		for (togo = size; togo > 0;) {
108 			got = read(fd, ptr, togo);	/* RATS: ignore (OK) */
109 			if (got <= 0)
110 				return numread;
111 			togo -= got;
112 			ptr = (void *) (((char *) ptr) + got);
113 		}
114 	}
115 
116 	return numread;
117 }
118 
119 
120 /*
121  * Analogue of fwrite().
122  */
dbbt_chunkwrite_raw(void * ptr,int size,int nmemb,int fd)123 static int dbbt_chunkwrite_raw(void *ptr, int size, int nmemb, int fd)
124 {
125 	int numwritten, togo, written;
126 
127 	for (numwritten = 0; nmemb > 0; nmemb--, numwritten++) {
128 		for (togo = size; togo > 0;) {
129 			written = write(fd, ptr, togo);
130 			if (written <= 0)
131 				return numwritten;
132 			togo -= written;
133 			ptr = (void *) (((char *) ptr) + written);
134 		}
135 	}
136 
137 	return numwritten;
138 }
139 
140 
141 /*
142  * Analogue of fread(), using memory mapped data if possible.
143  */
dbbt_chunkread(void * ptr,int size,int nmemb,qdbint_t db)144 static int dbbt_chunkread(void *ptr, int size, int nmemb, qdbint_t db)
145 {
146 #ifdef HAVE_MMAP
147 	long available, copyable, tocopy;
148 
149 	if ((db->data == 0) || (db->filepos < 0)) {
150 		return dbbt_chunkread_raw(ptr, size, nmemb, db->fd);
151 	}
152 
153 	if (size < 1)
154 		return 0;
155 
156 	available = db->size - db->filepos;
157 
158 	if (available <= 0)
159 		return 0;
160 
161 	copyable = available / size;
162 	if (copyable < nmemb)
163 		nmemb = copyable;
164 
165 	tocopy = nmemb * size;
166 	memcpy(ptr, db->data + db->filepos, tocopy);
167 	db->filepos += tocopy;
168 
169 	return nmemb;
170 #else				/* !HAVE_MMAP */
171 	return dbbt_chunkread_raw(ptr, size, nmemb, db->fd);
172 #endif				/* HAVE_MMAP */
173 }
174 
175 
176 /*
177  * Analogue of fwrite(), using memory mapped data if possible, remapping
178  * after extending the file if not.
179  */
dbbt_chunkwrite(void * ptr,int size,int nmemb,qdbint_t db)180 static int dbbt_chunkwrite(void *ptr, int size, int nmemb, qdbint_t db)
181 {
182 #ifdef HAVE_MMAP
183 	long available, copyable, tocopy;
184 
185 	db->modified = 1;
186 
187 	if ((db->data == 0) || (db->filepos < 0))
188 		return dbbt_chunkwrite_raw(ptr, size, nmemb, db->fd);
189 
190 	if (size < 1)
191 		return 0;
192 
193 	available = db->size - db->filepos;
194 	copyable = available / size;
195 	if (copyable < nmemb) {
196 		int retval;
197 
198 		munmap(db->data, db->size);
199 		db->data = 0;
200 
201 		lseek(db->fd, db->filepos, SEEK_SET);
202 		retval = dbbt_chunkwrite_raw(ptr, size, nmemb, db->fd);
203 
204 		db->size = lseek(db->fd, 0, SEEK_END);
205 		db->data =
206 		    mmap(NULL, db->size, PROT_READ | PROT_WRITE,
207 			 MAP_SHARED, db->fd, 0);
208 		if (db->data == MAP_FAILED)
209 			db->data = 0;
210 		db->filepos = db->size;
211 
212 		return retval;
213 	} else {
214 		tocopy = nmemb * size;
215 		memcpy(db->data + db->filepos, ptr, tocopy);
216 		db->filepos += tocopy;
217 
218 		return nmemb;
219 	}
220 #else				/* !HAVE_MMAP */
221 	return dbbt_chunkwrite_raw(ptr, size, nmemb, db->fd);
222 #endif				/* HAVE_MMAP */
223 }
224 
225 
226 /*
227  * Read a record from the database at the given offset into the given record
228  * structure, returning nonzero on failure.
229  */
dbbt_read_record(qdbint_t db,bt_ul offset,bt_record_t record)230 static int dbbt_read_record(qdbint_t db, bt_ul offset, bt_record_t record)
231 {
232 	int got;
233 
234 	if (dbbt_lseek(db, offset, SEEK_SET) == (off_t) - 1) {
235 		bt_lasterror = strerror(errno);
236 		return 1;
237 	}
238 
239 	got = dbbt_chunkread(record, BT_RECORD_SIZE, 1, db);
240 	if (got < 1) {
241 		bt_lasterror = strerror(errno);
242 		return 1;
243 	}
244 
245 	record->token[BT_TOKEN_MAX - 1] = 0;
246 
247 	return 0;
248 }
249 
250 
251 /*
252  * Write a record to the database at the given offset, returning nonzero on
253  * failure.
254  */
dbbt_write_record(qdbint_t db,bt_ul offset,bt_record_t record)255 static int dbbt_write_record(qdbint_t db, bt_ul offset, bt_record_t record)
256 {
257 	if (dbbt_lseek(db, offset, SEEK_SET) == (off_t) - 1) {
258 		bt_lasterror = strerror(errno);
259 		return 1;
260 	}
261 
262 	if (dbbt_chunkwrite(record, BT_RECORD_SIZE, 1, db) < 1) {
263 		bt_lasterror = strerror(errno);
264 		return 1;
265 	}
266 
267 	return 0;
268 }
269 
270 
271 /*
272  * Find the given token in the database and fill in the given record
273  * structure if found, also filling in the offset of the record (or 0 if not
274  * found) and the offset of the parent record (0 if none).
275  *
276  * Returns -1 if the token looked for was "lower" than its parent or +1 if
277  * "higher", or 0 if there was no parent record (i.e. this is the first
278  * record).
279  */
dbbt_find_token(qdbint_t db,qdb_datum key,bt_record_t record,bt_ul * offset,bt_ul * parent)280 static int dbbt_find_token(qdbint_t db, qdb_datum key, bt_record_t record,
281 			   bt_ul * offset, bt_ul * parent)
282 {
283 	int hilow = 0;
284 	int x;
285 
286 	*offset = 0;
287 	*parent = 0;
288 
289 	if (db == NULL)
290 		return 0;
291 
292 	if (db->size < 2 * sizeof(long))
293 		return 0;
294 
295 	if (!db->gotheadoffs) {
296 		dbbt_lseek(db, 4, SEEK_SET);
297 		dbbt_chunkread(offset, sizeof(*offset), 1, db);
298 		db->head_offset = *offset;
299 		db->gotheadoffs = 1;
300 	} else {
301 		*offset = db->head_offset;
302 	}
303 
304 	while (*offset > 0) {
305 		int len;
306 
307 		if (dbbt_read_record(db, *offset, record)) {
308 			*offset = 0;
309 			break;
310 		}
311 
312 		x = strncmp((char *) (record->token), (char *) (key.data),
313 			    key.size);
314 		len = strlen((char *) (record->token));
315 		if (len < key.size) {
316 			x = -1;
317 		} else if (len > key.size) {
318 			x = 1;
319 		}
320 
321 		if (x == 0) {
322 			return hilow;
323 		} else if (x < 0) {
324 			*parent = *offset;
325 			hilow = -1;
326 			*offset = record->lower;
327 		} else {
328 			*parent = *offset;
329 			hilow = 1;
330 			*offset = record->higher;
331 		}
332 	}
333 
334 	return hilow;
335 }
336 
337 
338 /*
339  * Mark the block at the given offset as free space by adding it to the head
340  * of the free space list. Returns nonzero on error.
341  */
dbbt_markfree(qdbint_t db,bt_ul offset)342 static int dbbt_markfree(qdbint_t db, bt_ul offset)
343 {
344 	bt_ul nextfree, storeoffs;
345 
346 	/*
347 	 * Read the "next free" offset from the free space head block
348 	 */
349 	if (dbbt_lseek(db, 4 + sizeof(offset), SEEK_SET) == (off_t) - 1) {
350 		bt_lasterror = strerror(errno);
351 		return 1;
352 	}
353 	nextfree = 0x80000000;
354 	if (dbbt_chunkread(&nextfree, sizeof(nextfree), 1, db) < 0) {
355 		bt_lasterror = strerror(errno);
356 		return 1;
357 	}
358 
359 	/*
360 	 * Write the given offset as the new "next free" offset
361 	 */
362 	if (dbbt_lseek(db, 4 + sizeof(offset), SEEK_SET) == (off_t) - 1) {
363 		bt_lasterror = strerror(errno);
364 		return 1;
365 	}
366 	storeoffs = offset;
367 	storeoffs |= 0x80000000;
368 	if (dbbt_chunkwrite(&storeoffs, sizeof(offset), 1, db) < 0) {
369 		bt_lasterror = strerror(errno);
370 		return 1;
371 	}
372 
373 	/*
374 	 * Write the old "next free" offset into the block being freed
375 	 */
376 	if (dbbt_lseek(db, offset, SEEK_SET) == (off_t) - 1) {
377 		bt_lasterror = strerror(errno);
378 		return 1;
379 	}
380 	if (dbbt_chunkwrite(&nextfree, sizeof(nextfree), 1, db) < 1) {
381 		bt_lasterror = strerror(errno);
382 		return 1;
383 	}
384 
385 	return 0;
386 }
387 
388 
389 /*
390  * Return nonzero if the given file is of this database type.
391  */
qdb_btree_identify(const char * file)392 int qdb_btree_identify(const char *file)
393 {
394 	FILE *fptr;
395 	unsigned char buf[8];		 /* RATS: ignore (checked) */
396 
397 	if (file == NULL)
398 		return 0;
399 	if (strncasecmp(file, "btree:", 6) == 0)
400 		return 1;
401 
402 	fptr = fopen(file, "rb");
403 	if (fptr == NULL)
404 		return 0;
405 	if (fread(buf, 4, 1, fptr) < 1) {
406 		fclose(fptr);
407 		return 0;
408 	}
409 
410 	fclose(fptr);
411 
412 	if (strncmp((char *) buf, "QSF1", 4) == 0)
413 		return 1;
414 
415 	return 0;
416 }
417 
418 
419 /*
420  * Open the given database in the given way (new database, read-only, or
421  * read-write); return a qdbint_t or NULL on error.
422  */
qdb_btree_open(const char * file,qdb_open_t method)423 qdbint_t qdb_btree_open(const char *file, qdb_open_t method)
424 {
425 	qdbint_t db;
426 	int fd = -1;
427 #ifdef HAVE_FCNTL
428 	int locktype = F_RDLCK;
429 #endif
430 
431 	if (strncasecmp(file, "btree:", 6) == 0)
432 		file += 6;
433 
434 	switch (method) {
435 	case QDB_NEW:
436 		fd = open(file,		    /* RATS: ignore (no race) */
437 			  O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
438 		if (fd < 0)
439 			bt_lasterror = strerror(errno);
440 #ifdef HAVE_FCNTL
441 		locktype = F_WRLCK;
442 #endif
443 		break;
444 	case QDB_READONLY:
445 		fd = open(file,		    /* RATS: ignore (no race) */
446 			  O_RDONLY);
447 		if (fd < 0)
448 			bt_lasterror = strerror(errno);
449 		break;
450 	case QDB_READWRITE:
451 		fd = open(file,		    /* RATS: ignore (no race) */
452 			  O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
453 		if (fd < 0)
454 			bt_lasterror = strerror(errno);
455 #ifdef HAVE_FCNTL
456 		locktype = F_WRLCK;
457 #endif
458 		break;
459 	default:
460 		break;
461 	}
462 
463 	if (fd < 0)
464 		return NULL;
465 
466 	db = calloc(1, sizeof(*db));
467 	if (db == NULL) {
468 		bt_lasterror = strerror(errno);
469 		close(fd);
470 		return NULL;
471 	}
472 
473 	db->filename = strdup(file);
474 	db->fd = fd;
475 	db->modified = 0;
476 
477 #ifdef HAVE_FCNTL
478 	db->locktype = locktype;
479 
480 	if (dbbt_lock(db, locktype)) {
481 		close(fd);
482 		if (db->filename)
483 			free(db->filename);
484 		free(db);
485 		return NULL;
486 	}
487 #endif
488 
489 	db->size = dbbt_lseek(db, 0, SEEK_END);
490 
491 	/*
492 	 * If this is a new database, and it's not readonly, write our
493 	 * header to it and return.
494 	 */
495 	if ((db->size < 5) && (method != QDB_READONLY)) {
496 		dbbt_lseek(db, 0, SEEK_SET);
497 		dbbt_chunkwrite_raw("QSF1", 4, 1, db->fd);
498 		db->size = 4;
499 		db->modified = 1;
500 #ifdef HAVE_MMAP
501 		db->filepos = 0;
502 		db->data =
503 		    mmap(NULL, db->size,
504 			 PROT_READ | (method ==
505 				      QDB_READONLY ? 0 : PROT_WRITE),
506 			 MAP_SHARED, db->fd, 0);
507 		if (db->data == MAP_FAILED)
508 			db->data = 0;
509 #endif
510 		return db;
511 	}
512 
513 	/*
514 	 * We now do some simple checks to make sure that the file is a
515 	 * database of the format we're expecting.
516 	 */
517 
518 	/*
519 	 * If it's shorter than 4 bytes plus 2 long ints, it's not a valid
520 	 * database.
521 	 */
522 	if (db->size < 4 + 2 * sizeof(long)) {
523 		bt_lasterror = _("invalid database (too small)");
524 		close(fd);
525 		if (db->filename)
526 			free(db->filename);
527 		free(db);
528 		return NULL;
529 	}
530 
531 	/*
532 	 * If its size, discounting the two longs at the start, isn't a
533 	 * multiple of our record size, it's not a valid database.
534 	 */
535 	if (((db->size - (4 + 2 * sizeof(long))) % BT_RECORD_SIZE) != 0) {
536 		bt_lasterror = _("invalid database (irregular size)");
537 		close(fd);
538 		if (db->filename)
539 			free(db->filename);
540 		free(db);
541 		return NULL;
542 	}
543 
544 	/*
545 	 * If the first long int (the "head offset") is larger than the size
546 	 * of the file, this isn't a valid database.
547 	 */
548 	{
549 		bt_ul offset = db->size + 1;
550 		dbbt_lseek(db, 4, SEEK_SET);
551 		dbbt_chunkread(&offset, sizeof(offset), 1, db);
552 		if (offset > db->size) {
553 			bt_lasterror =
554 			    _("invalid database (bad head offset)");
555 			close(fd);
556 			if (db->filename)
557 				free(db->filename);
558 			free(db);
559 			return NULL;
560 		}
561 		dbbt_lseek(db, 4, SEEK_SET);
562 	}
563 
564 #ifdef HAVE_MMAP
565 	db->data =
566 	    mmap(NULL, db->size,
567 		 PROT_READ | (method == QDB_READONLY ? 0 : PROT_WRITE),
568 		 MAP_SHARED, db->fd, 0);
569 	if (db->data == MAP_FAILED)
570 		db->data = 0;
571 #endif
572 
573 	return db;
574 }
575 
576 
577 /*
578  * Close the given database.
579  */
qdb_btree_close(qdbint_t db)580 void qdb_btree_close(qdbint_t db)
581 {
582 	if (db == NULL)
583 		return;
584 
585 #ifdef HAVE_MMAP
586 	if (db->data)
587 		munmap(db->data, db->size);
588 #endif
589 
590 #ifdef HAVE_FCNTL
591 	while (db->lockcount > 0)
592 		dbbt_lock(db, F_UNLCK);
593 #endif
594 
595 	close(db->fd);
596 
597 	if (db->modified && db->filename) {
598 #ifdef HAVE_UTIME
599 		struct utimbuf utb;
600 
601 		utb.actime = time(NULL);
602 		utb.modtime = time(NULL);
603 		utime(db->filename, &utb);
604 #endif
605 	}
606 
607 	if (db->filename)
608 		free(db->filename);
609 
610 	free(db);
611 }
612 
613 
614 /*
615  * Fetch a value from the database. The datum returned needs its val.data
616  * free()ing after use. If val.data is NULL, no value was found for the
617  * given key.
618  */
qdb_btree_fetch(qdbint_t db,qdb_datum key)619 qdb_datum qdb_btree_fetch(qdbint_t db, qdb_datum key)
620 {
621 	struct bt_record record;
622 	unsigned long offset, parent;
623 	qdb_datum val;
624 
625 	val.data = NULL;
626 	val.size = 0;
627 
628 	if (db == NULL)
629 		return val;
630 
631 	if (key.size >= BT_TOKEN_MAX)
632 		key.size = BT_TOKEN_MAX - 1;
633 
634 	dbbt_find_token(db, key, &record, &offset, &parent);
635 	if (offset == 0)
636 		return val;
637 
638 	val.size = 3 * sizeof(long);
639 	val.data = calloc(1, val.size);
640 	if (val.data == NULL) {
641 		val.size = 0;
642 		return val;
643 	}
644 
645 	((long *) val.data)[0] = record.data[0];
646 	((long *) val.data)[1] = record.data[1];
647 	((long *) val.data)[2] = record.data[2];
648 
649 	return val;
650 }
651 
652 
653 /*
654  * Return a file descriptor for the given database, or -1 on error.
655  */
qdb_btree_fd(qdbint_t db)656 int qdb_btree_fd(qdbint_t db)
657 {
658 	if (db == NULL)
659 		return -1;
660 	return db->fd;
661 }
662 
663 
664 /*
665  * Store the given key with the given value into the database, replacing any
666  * existing value for that key. Returns nonzero on error.
667  */
qdb_btree_store(qdbint_t db,qdb_datum key,qdb_datum val)668 int qdb_btree_store(qdbint_t db, qdb_datum key, qdb_datum val)
669 {
670 	struct bt_record record, head;
671 	unsigned long offset, parent, nextfree;
672 	int x;
673 
674 	if (db == NULL)
675 		return 1;
676 
677 	memset(&head, 0, BT_RECORD_SIZE);
678 	memset(&record, 0, BT_RECORD_SIZE);
679 
680 	if (key.size >= BT_TOKEN_MAX)
681 		key.size = BT_TOKEN_MAX - 1;
682 
683 	x = dbbt_find_token(db, key, &record, &offset, &parent);
684 
685 	memcpy(record.token, key.data, key.size);
686 	record.token[key.size] = 0;
687 	record.data[0] = ((long *) val.data)[0];
688 	record.data[1] = ((long *) val.data)[1];
689 	if (val.size > 2 * sizeof(long))
690 		record.data[2] = ((long *) val.data)[2];
691 
692 	qdb_int__sig_block();
693 
694 	/*
695 	 * Record exists - overwrite it.
696 	 */
697 	if (offset > 0) {
698 		if (dbbt_write_record(db, offset, &record)) {
699 			qdb_int__sig_unblock();
700 			return 1;
701 		}
702 		qdb_int__sig_unblock();
703 		return 0;
704 	}
705 
706 	record.lower = 0;
707 	record.higher = 0;
708 
709 	/*
710 	 * Database has just been created, so fill in the header and write
711 	 * this record as the first one.
712 	 */
713 	if (db->size <= 4 + sizeof(offset)) {
714 
715 		if (dbbt_lseek(db, 4, SEEK_SET) == (off_t) - 1) {
716 			bt_lasterror = strerror(errno);
717 			qdb_int__sig_unblock();
718 			return 1;
719 		}
720 		offset = 4 + 2 * sizeof(offset);
721 		if (dbbt_chunkwrite(&offset, sizeof(offset), 1, db) < 1) {
722 			bt_lasterror = strerror(errno);
723 			qdb_int__sig_unblock();
724 			return 1;
725 		}
726 
727 		db->gotheadoffs = 0;
728 
729 		offset = 0;
730 		if (dbbt_chunkwrite(&offset, sizeof(offset), 1, db) < 1) {
731 			bt_lasterror = strerror(errno);
732 			qdb_int__sig_unblock();
733 			return 1;
734 		}
735 
736 		head.lower = (4 + 2 * sizeof(offset)) + BT_RECORD_SIZE;
737 		if (dbbt_chunkwrite(&head, BT_RECORD_SIZE, 1, db) < 1) {
738 			bt_lasterror = strerror(errno);
739 			qdb_int__sig_unblock();
740 			return 1;
741 		}
742 
743 		if (dbbt_chunkwrite(&record, BT_RECORD_SIZE, 1, db) < 1) {
744 			bt_lasterror = strerror(errno);
745 			qdb_int__sig_unblock();
746 			return 1;
747 		}
748 
749 		db->size = dbbt_lseek(db, 0, SEEK_CUR);
750 		qdb_int__sig_unblock();
751 		return 0;
752 	}
753 
754 	/*
755 	 * Get offset of next free space block.
756 	 */
757 	if (dbbt_lseek(db, 4 + sizeof(offset), SEEK_SET) == (off_t) - 1) {
758 		bt_lasterror = strerror(errno);
759 		qdb_int__sig_unblock();
760 		return 1;
761 	}
762 	if (dbbt_chunkread(&offset, sizeof(offset), 1, db) < 1) {
763 		bt_lasterror = strerror(errno);
764 		qdb_int__sig_unblock();
765 		return 1;
766 	}
767 
768 	offset &= 0x7FFFFFFF;
769 
770 	/*
771 	 * If offset is 0 or we can't read from that offset, it's a new
772 	 * block at the end of the file, otherwise we take the next free
773 	 * offset from there and store it in the core free pointer, and then
774 	 * use that free offset.
775 	 */
776 	if (dbbt_lseek(db, offset, SEEK_SET) == (off_t) - 1) {
777 		bt_lasterror = strerror(errno);
778 		qdb_int__sig_unblock();
779 		return 1;
780 	}
781 	if ((offset < 5)
782 	    || (dbbt_chunkread(&nextfree, sizeof(nextfree), 1, db) < 1)
783 	    ) {
784 		offset = dbbt_lseek(db, 0, SEEK_END);
785 		nextfree = 0x80000000;
786 	}
787 	if (dbbt_lseek(db, 4 + sizeof(offset), SEEK_SET) == (off_t) - 1) {
788 		bt_lasterror = strerror(errno);
789 		qdb_int__sig_unblock();
790 		return 1;
791 	}
792 	if (dbbt_chunkwrite(&nextfree, sizeof(nextfree), 1, db) < 0) {
793 		bt_lasterror = strerror(errno);
794 		qdb_int__sig_unblock();
795 		return 1;
796 	}
797 
798 	if (dbbt_write_record(db, offset, &record)) {
799 		qdb_int__sig_unblock();
800 		return 1;
801 	}
802 
803 	/*
804 	 * Now attach the new record to its parent, if applicable.
805 	 */
806 	if (parent > 0) {
807 		if (dbbt_read_record(db, parent, &record)) {
808 			qdb_int__sig_unblock();
809 			return 1;
810 		}
811 		if (x < 0) {
812 			record.lower = offset;
813 		} else {
814 			record.higher = offset;
815 		}
816 		if (dbbt_write_record(db, parent, &record)) {
817 			qdb_int__sig_unblock();
818 			return 1;
819 		}
820 	}
821 
822 	qdb_int__sig_unblock();
823 
824 	return 0;
825 }
826 
827 
828 /*
829  * Return the "first" key in the database, suitable for using with repeated
830  * calls to qdb_nextkey() to walk through every key in the database.
831  */
qdb_btree_firstkey(qdbint_t db)832 qdb_datum qdb_btree_firstkey(qdbint_t db)
833 {
834 	struct bt_record record;
835 	qdb_datum newkey;
836 
837 	newkey.data = NULL;
838 	newkey.size = 0;
839 
840 	if (dbbt_lseek(db, 4 + 2 * sizeof(unsigned long) + BT_RECORD_SIZE,
841 		       SEEK_SET) == (off_t) - 1)
842 		return newkey;
843 
844 	do {
845 		if (dbbt_chunkread(&record, BT_RECORD_SIZE, 1, db) < 1) {
846 			return newkey;
847 		}
848 	} while (record.lower & 0x80000000);
849 
850 	record.token[BT_TOKEN_MAX - 1] = 0;
851 
852 	newkey.data = (unsigned char *) strdup((char *) (record.token));
853 	newkey.size = strlen((char *) (record.token));
854 
855 	return newkey;
856 }
857 
858 
859 /*
860  * Return the "next" key in the database, or key.data=NULL when all keys
861  * have been returned.
862  */
qdb_btree_nextkey(qdbint_t db,qdb_datum key)863 qdb_datum qdb_btree_nextkey(qdbint_t db, qdb_datum key)
864 {
865 	struct bt_record record;
866 	unsigned long offset, parent;
867 	qdb_datum newkey;
868 
869 	newkey.data = NULL;
870 	newkey.size = 0;
871 
872 	if (key.data == NULL) {
873 		return newkey;
874 	}
875 
876 	dbbt_find_token(db, key, &record, &offset, &parent);
877 
878 	if (offset < 1) {
879 		return newkey;
880 	}
881 
882 	if (dbbt_lseek(db, offset + BT_RECORD_SIZE, SEEK_SET) ==
883 	    (off_t) - 1) {
884 		return newkey;
885 	}
886 
887 	do {
888 		if (dbbt_chunkread(&record, BT_RECORD_SIZE, 1, db) < 1) {
889 			return newkey;
890 		}
891 	} while (record.lower & 0x80000000);
892 
893 	record.token[BT_TOKEN_MAX - 1] = 0;
894 
895 	newkey.data = (unsigned char *) strdup((char *) (record.token));
896 	newkey.size = strlen((char *) (record.token));
897 
898 	return newkey;
899 }
900 
901 
902 /*
903  * Put the given record back into the binary tree, by storing it over again.
904  * Returns nonzero on error.
905  */
qdb_btree_delete__rewrite(qdbint_t db,bt_record_t record)906 static int qdb_btree_delete__rewrite(qdbint_t db, bt_record_t record)
907 {
908 	qdb_datum key, val;
909 	struct bt_record newrec;
910 	unsigned long offset, parent;
911 	int x;
912 
913 	key.data = record->token;
914 	key.size = strlen((char *) (record->token));
915 	val.data = (void *) (record->data);
916 	val.size = 3 * sizeof(long);
917 
918 	if (qdb_btree_store(db, key, val))
919 		return 1;
920 
921 	x = dbbt_find_token(db, key, &newrec, &offset, &parent);
922 	if (offset < 4)
923 		return 1;
924 
925 	newrec.lower = record->lower;
926 	newrec.higher = record->higher;
927 
928 	qdb_int__sig_block();
929 	if (dbbt_write_record(db, offset, &newrec)) {
930 		qdb_int__sig_unblock();
931 		return 1;
932 	}
933 
934 	qdb_int__sig_unblock();
935 
936 	return 0;
937 }
938 
939 
940 /*
941  * Delete the given key from the database. Returns nonzero on error.
942  */
qdb_btree_delete(qdbint_t db,qdb_datum key)943 int qdb_btree_delete(qdbint_t db, qdb_datum key)
944 {
945 	struct bt_record record, recparent, reclower, rechigher;
946 	unsigned long offset, parent;
947 
948 	int x;
949 
950 	if (db == NULL)
951 		return 1;
952 
953 	if (key.size < 1)
954 		return 1;
955 
956 	x = dbbt_find_token(db, key, &record, &offset, &parent);
957 	if (offset < 5)
958 		return 1;
959 
960 	/*
961 	 * Get a copy of the lower and higher records, if any.
962 	 */
963 	if (record.lower > 0) {
964 		if (dbbt_read_record(db, record.lower, &reclower))
965 			return 1;
966 	}
967 
968 	if (record.higher > 0) {
969 		if (dbbt_read_record(db, record.higher, &rechigher))
970 			return 1;
971 	}
972 
973 	qdb_int__sig_block();
974 
975 	/*
976 	 * Remove the link to this record from its parent.
977 	 */
978 	if (parent > 0) {
979 		if (dbbt_read_record(db, parent, &recparent)) {
980 			qdb_int__sig_unblock();
981 			return 1;
982 		}
983 		if (x < 0) {
984 			recparent.lower = 0;
985 		} else {
986 			recparent.higher = 0;
987 		}
988 		if (dbbt_write_record(db, parent, &recparent)) {
989 			qdb_int__sig_unblock();
990 			return 1;
991 		}
992 	}
993 
994 	/*
995 	 * Now we add this record's offset to the head of the "free records"
996 	 * linked list.
997 	 */
998 	if (dbbt_markfree(db, offset)) {
999 		qdb_int__sig_unblock();
1000 		return 1;
1001 	}
1002 
1003 	/*
1004 	 * If there were any direct children, mark them as free as well.
1005 	 */
1006 	if (record.lower > 0) {
1007 		if (dbbt_markfree(db, record.lower)) {
1008 			qdb_int__sig_unblock();
1009 			return 1;
1010 		}
1011 	}
1012 	if (record.higher > 0) {
1013 		if (dbbt_markfree(db, record.higher)) {
1014 			qdb_int__sig_unblock();
1015 			return 1;
1016 		}
1017 	}
1018 
1019 	qdb_int__sig_unblock();
1020 
1021 	/*
1022 	 * Re-attach any children of this record to the database.
1023 	 */
1024 	if (record.lower > 0) {
1025 		if (qdb_btree_delete__rewrite(db, &reclower)) {
1026 			return 1;
1027 		}
1028 	}
1029 	if (record.higher > 0) {
1030 		if (qdb_btree_delete__rewrite(db, &rechigher)) {
1031 			return 1;
1032 		}
1033 	}
1034 
1035 	return 0;
1036 }
1037 
1038 
1039 /*
1040  * Temporarily release the lock on the database.
1041  */
qdb_btree_unlock(qdbint_t db)1042 void qdb_btree_unlock(qdbint_t db)
1043 {
1044 #ifdef HAVE_FCNTL
1045 	dbbt_lock(db, F_UNLCK);
1046 #endif
1047 }
1048 
1049 
1050 /*
1051  * Reassert the lock on the database.
1052  */
qdb_btree_relock(qdbint_t db)1053 void qdb_btree_relock(qdbint_t db)
1054 {
1055 #ifdef HAVE_FCNTL
1056 	dbbt_lock(db, db->locktype);
1057 #endif
1058 	db->gotheadoffs = 0;
1059 }
1060 
1061 
1062 /*
1063  * Reorganise the database for better efficiency.
1064  *
1065  * This is done by dumping all the tokens in this database into a temporary
1066  * new one, then copying this temporary database over the top of the current
1067  * database.
1068  */
qdb_btree_optimise(qdbint_t db)1069 void qdb_btree_optimise(qdbint_t db)
1070 {
1071 	char filename[1024];		 /* RATS: ignore (checked all) */
1072 	qdbint_t newdb;
1073 	qdb_datum key, val, nextkey;
1074 	int newdbfd, got;
1075 
1076 #ifdef P_tmpdir
1077 #ifdef HAVE_SNPRINTF
1078 	snprintf(filename, sizeof(filename),
1079 #else
1080 	sprintf(filename,		    /* RATS: ignore (checked) */
1081 #endif
1082 		"%.*s", (int) (sizeof(filename) - 1),
1083 		P_tmpdir "/qsfXXXXXX");
1084 #else
1085 #ifdef HAVE_SNPRINTF
1086 	snprintf(filename, sizeof(filename),
1087 #else
1088 	sprintf(filename,		    /* RATS: ignore (checked) */
1089 #endif
1090 		"%.*s", (int) (sizeof(filename) - 1), "/tmp/qsfXXXXXX");
1091 #endif
1092 
1093 #ifdef HAVE_MKSTEMP
1094 	newdbfd = mkstemp(filename);
1095 #else
1096 	newdbfd = -1;
1097 	if (tmpnam(filename) != NULL) {	    /* RATS: ignore (OK) */
1098 		newdbfd = open(filename,    /* RATS: ignore (OK) */
1099 			       O_RDWR | O_CREAT | O_EXCL,
1100 			       S_IRUSR | S_IWUSR);
1101 	}
1102 #endif
1103 
1104 	if (newdbfd < 0) {
1105 		fprintf(stderr, "%s: %s: %s\n",
1106 			filename,
1107 			_("failed to create temporary file"),
1108 			strerror(errno));
1109 		return;
1110 	}
1111 
1112 	newdb = qdb_btree_open(filename, QDB_NEW);
1113 	if (newdb == NULL) {
1114 		close(newdbfd);
1115 		return;
1116 	}
1117 
1118 	key = qdb_btree_firstkey(db);
1119 	while (key.data != NULL) {
1120 		val = qdb_btree_fetch(db, key);
1121 		if (val.data != NULL) {
1122 			qdb_btree_store(newdb, key, val);
1123 			free(val.data);
1124 		}
1125 		nextkey = qdb_btree_nextkey(db, key);
1126 		free(key.data);
1127 		key = nextkey;
1128 	}
1129 
1130 	qdb_btree_close(newdb);
1131 
1132 #ifdef HAVE_MMAP
1133 	munmap(db->data, db->size);
1134 	db->data = 0;
1135 #endif
1136 
1137 	qdb_int__sig_block();
1138 
1139 	dbbt_lseek(db, 0, SEEK_SET);
1140 	lseek(newdbfd, 0, SEEK_SET);
1141 
1142 	do {
1143 		unsigned char buf[4096]; /* RATS: ignore (checked) */
1144 
1145 		got = dbbt_chunkread_raw(buf, 1, sizeof(buf), newdbfd);
1146 		if (got > 0)
1147 			dbbt_chunkwrite_raw(buf, 1, got, db->fd);
1148 
1149 	} while (got > 0);
1150 
1151 	if (ftruncate(db->fd, lseek(db->fd, 0, SEEK_CUR)) != 0) {
1152 		log_add(0, "%s: failed to truncate database: %s",
1153 			db->filename, strerror(errno));
1154 	}
1155 
1156 	close(newdbfd);
1157 	remove(filename);
1158 
1159 	qdb_int__sig_unblock();
1160 
1161 	db->size = dbbt_lseek(db, 0, SEEK_END);
1162 	db->gotheadoffs = 0;
1163 
1164 #ifdef HAVE_MMAP
1165 	db->data =
1166 	    mmap(NULL, db->size, PROT_READ | PROT_WRITE, MAP_SHARED,
1167 		 db->fd, 0);
1168 	if (db->data == MAP_FAILED)
1169 		db->data = 0;
1170 	db->filepos = db->size;
1171 #endif
1172 }
1173 
1174 
1175 /*
1176  * Return a string describing the last database error to occur.
1177  */
qdb_btree_error(void)1178 char *qdb_btree_error(void)
1179 {
1180 	return bt_lasterror;
1181 }
1182 
1183 #endif				/* USING_BTREE */
1184 
1185 /* EOF */
1186