xref: /openbsd/gnu/usr.bin/perl/ext/SDBM_File/sdbm.c (revision 76d0caae)
1 /*
2  * sdbm - ndbm work-alike hashed database library
3  * based on Per-Aake Larson's Dynamic Hashing algorithms. BIT 18 (1978).
4  * author: oz@nexus.yorku.ca
5  * status: public domain.
6  *
7  * core routines
8  */
9 
10 #include "config.h"
11 #ifdef WIN32
12 #include "io.h"
13 #endif
14 #include "sdbm.h"
15 #include "tune.h"
16 #include "pair.h"
17 
18 #ifdef I_FCNTL
19 # include <fcntl.h>
20 #endif
21 #ifdef I_SYS_FILE
22 # include <sys/file.h>
23 #endif
24 
25 #include <string.h>
26 
27 /*
28  * externals
29  */
30 
31 #include <errno.h> /* See notes in perl.h about avoiding
32 			extern int errno; */
33 #ifdef __cplusplus
34 extern "C" {
35 #endif
36 
37 extern Malloc_t malloc(MEM_SIZE);
38 extern Free_t free(Malloc_t);
39 
40 #ifdef __cplusplus
41 }
42 #endif
43 
44 const datum nullitem = {0, 0};
45 
46 /*
47  * forward
48  */
49 static int getdbit(DBM *, long);
50 static int setdbit(DBM *, long);
51 static int getpage(DBM *, long);
52 static datum getnext(DBM *);
53 static int makroom(DBM *, long, int);
54 
55 /*
56  * useful macros
57  */
58 #define bad(x)		((x).dptr == NULL || (x).dsize < 0)
59 #define exhash(item)	sdbm_hash((item).dptr, (item).dsize)
60 #define ioerr(db)	((db)->flags |= DBM_IOERR)
61 
62 #define OFF_PAG(off)	(long) (off) * PBLKSIZ
63 #define OFF_DIR(off)	(long) (off) * DBLKSIZ
64 
65 static const long masks[] = {
66 	000000000000, 000000000001, 000000000003, 000000000007,
67 	000000000017, 000000000037, 000000000077, 000000000177,
68 	000000000377, 000000000777, 000000001777, 000000003777,
69 	000000007777, 000000017777, 000000037777, 000000077777,
70 	000000177777, 000000377777, 000000777777, 000001777777,
71 	000003777777, 000007777777, 000017777777, 000037777777,
72 	000077777777, 000177777777, 000377777777, 000777777777,
73 	001777777777, 003777777777, 007777777777, 017777777777
74 };
75 
76 DBM *
77 sdbm_open(char *file, int flags, int mode)
78 {
79 	DBM *db;
80 	char *dirname;
81 	char *pagname;
82 	size_t filelen;
83 	const size_t dirfext_size = sizeof(DIRFEXT "");
84 	const size_t pagfext_size = sizeof(PAGFEXT "");
85 
86 	if (file == NULL || !*file)
87 		return errno = EINVAL, (DBM *) NULL;
88 /*
89  * need space for two separate filenames
90  */
91 	filelen = strlen(file);
92 
93 	if ((dirname = (char *) malloc(filelen + dirfext_size
94 				       + filelen + pagfext_size)) == NULL)
95 		return errno = ENOMEM, (DBM *) NULL;
96 /*
97  * build the file names
98  */
99 	memcpy(dirname, file, filelen);
100 	memcpy(dirname + filelen, DIRFEXT, dirfext_size);
101 	pagname = dirname + filelen + dirfext_size;
102 	memcpy(pagname, file, filelen);
103 	memcpy(pagname + filelen, PAGFEXT, pagfext_size);
104 
105 	db = sdbm_prep(dirname, pagname, flags, mode);
106 	free((char *) dirname);
107 	return db;
108 }
109 
110 DBM *
111 sdbm_prep(char *dirname, char *pagname, int flags, int mode)
112 {
113 	DBM *db;
114 	struct stat dstat;
115 
116 	if ((db = (DBM *) malloc(sizeof(DBM))) == NULL)
117 		return errno = ENOMEM, (DBM *) NULL;
118 
119         db->flags = 0;
120         db->hmask = 0;
121         db->blkptr = 0;
122         db->keyptr = 0;
123 /*
124  * adjust user flags so that WRONLY becomes RDWR,
125  * as required by this package. Also set our internal
126  * flag for RDONLY if needed.
127  */
128 	if (flags & O_WRONLY)
129 		flags = (flags & ~O_WRONLY) | O_RDWR;
130 
131 	else if ((flags & 03) == O_RDONLY)
132 		db->flags = DBM_RDONLY;
133 /*
134  * open the files in sequence, and stat the dirfile.
135  * If we fail anywhere, undo everything, return NULL.
136  */
137 #if defined(OS2) || defined(MSDOS) || defined(WIN32) || defined(__CYGWIN__)
138 	flags |= O_BINARY;
139 #	endif
140 	if ((db->pagf = open(pagname, flags, mode)) > -1) {
141 		if ((db->dirf = open(dirname, flags, mode)) > -1) {
142 /*
143  * need the dirfile size to establish max bit number.
144  */
145 			if (fstat(db->dirf, &dstat) == 0) {
146 /*
147  * zero size: either a fresh database, or one with a single,
148  * unsplit data page: dirpage is all zeros.
149  */
150 				db->dirbno = (!dstat.st_size) ? 0 : -1;
151 				db->pagbno = -1;
152 				db->maxbno = dstat.st_size * BYTESIZ;
153 
154 				(void) memset(db->pagbuf, 0, PBLKSIZ);
155 				(void) memset(db->dirbuf, 0, DBLKSIZ);
156 			/*
157 			 * success
158 			 */
159 				return db;
160 			}
161 			(void) close(db->dirf);
162 		}
163 		(void) close(db->pagf);
164 	}
165 	free((char *) db);
166 	return (DBM *) NULL;
167 }
168 
169 void
170 sdbm_close(DBM *db)
171 {
172 	if (db == NULL)
173 		errno = EINVAL;
174 	else {
175 		(void) close(db->dirf);
176 		(void) close(db->pagf);
177 		free((char *) db);
178 	}
179 }
180 
181 datum
182 sdbm_fetch(DBM *db, datum key)
183 {
184 	if (db == NULL || bad(key))
185 		return errno = EINVAL, nullitem;
186 
187 	if (getpage(db, exhash(key)))
188 		return getpair(db->pagbuf, key);
189 
190 	return ioerr(db), nullitem;
191 }
192 
193 int
194 sdbm_exists(DBM *db, datum key)
195 {
196 	if (db == NULL || bad(key))
197 		return errno = EINVAL, -1;
198 
199 	if (getpage(db, exhash(key)))
200 		return exipair(db->pagbuf, key);
201 
202 	return ioerr(db), -1;
203 }
204 
205 int
206 sdbm_delete(DBM *db, datum key)
207 {
208 	if (db == NULL || bad(key))
209 		return errno = EINVAL, -1;
210 	if (sdbm_rdonly(db))
211 		return errno = EPERM, -1;
212 
213 	if (getpage(db, exhash(key))) {
214 		if (!delpair(db->pagbuf, key))
215 			return -1;
216 /*
217  * update the page file
218  */
219 		if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
220 		    || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
221 			return ioerr(db), -1;
222 
223 		return 0;
224 	}
225 
226 	return ioerr(db), -1;
227 }
228 
229 int
230 sdbm_store(DBM *db, datum key, datum val, int flags)
231 {
232 	int need;
233 	long hash;
234 
235 	if (db == NULL || bad(key))
236 		return errno = EINVAL, -1;
237 	if (sdbm_rdonly(db))
238 		return errno = EPERM, -1;
239 
240 	need = key.dsize + val.dsize;
241 /*
242  * is the pair too big (or too small) for this database ??
243  */
244 	if (need < 0 || need > PAIRMAX)
245 		return errno = EINVAL, -1;
246 
247 	if (getpage(db, (hash = exhash(key)))) {
248 /*
249  * if we need to replace, delete the key/data pair
250  * first. If it is not there, ignore.
251  */
252 		if (flags == DBM_REPLACE)
253 			(void) delpair(db->pagbuf, key);
254 #ifdef SEEDUPS
255 		else if (duppair(db->pagbuf, key))
256 			return 1;
257 #endif
258 /*
259  * if we do not have enough room, we have to split.
260  */
261 		if (!fitpair(db->pagbuf, need))
262 			if (!makroom(db, hash, need))
263 				return ioerr(db), -1;
264 /*
265  * we have enough room or split is successful. insert the key,
266  * and update the page file.
267  */
268 		(void) putpair(db->pagbuf, key, val);
269 
270 		if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
271 		    || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
272 			return ioerr(db), -1;
273 	/*
274 	 * success
275 	 */
276 		return 0;
277 	}
278 
279 	return ioerr(db), -1;
280 }
281 
282 /*
283  * makroom - make room by splitting the overfull page
284  * this routine will attempt to make room for SPLTMAX times before
285  * giving up.
286  */
287 static int
288 makroom(DBM *db, long int hash, int need)
289 {
290 	long newp;
291 	char twin[PBLKSIZ];
292 #if defined(DOSISH) || defined(WIN32)
293 	char zer[PBLKSIZ];
294 	long oldtail;
295 #endif
296 	char *pag = db->pagbuf;
297 	char *New = twin;
298 	int smax = SPLTMAX;
299 #ifdef BADMESS
300 	int rc;
301 #endif
302 
303 	do {
304 /*
305  * split the current page
306  */
307 		(void) splpage(pag, New, db->hmask + 1);
308 /*
309  * address of the new page
310  */
311 		newp = (hash & db->hmask) | (db->hmask + 1);
312 
313 /*
314  * write delay, read avoidance/cache shuffle:
315  * select the page for incoming pair: if key is to go to the new page,
316  * write out the previous one, and copy the new one over, thus making
317  * it the current page. If not, simply write the new page, and we are
318  * still looking at the page of interest. current page is not updated
319  * here, as sdbm_store will do so, after it inserts the incoming pair.
320  */
321 
322 #if defined(DOSISH) || defined(WIN32)
323 		/*
324 		 * Fill hole with 0 if made it.
325 		 * (hole is NOT read as 0)
326 		 */
327 		oldtail = lseek(db->pagf, 0L, SEEK_END);
328 		memset(zer, 0, PBLKSIZ);
329 		while (OFF_PAG(newp) > oldtail) {
330 			if (lseek(db->pagf, 0L, SEEK_END) < 0 ||
331 			    write(db->pagf, zer, PBLKSIZ) < 0) {
332 
333 				return 0;
334 			}
335 			oldtail += PBLKSIZ;
336 		}
337 #endif
338 		if (hash & (db->hmask + 1)) {
339 			if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
340 			    || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
341 				return 0;
342 			db->pagbno = newp;
343 			(void) memcpy(pag, New, PBLKSIZ);
344 		}
345 		else if (lseek(db->pagf, OFF_PAG(newp), SEEK_SET) < 0
346 			 || write(db->pagf, New, PBLKSIZ) < 0)
347 			return 0;
348 
349 		if (!setdbit(db, db->curbit))
350 			return 0;
351 /*
352  * see if we have enough room now
353  */
354 		if (fitpair(pag, need))
355 			return 1;
356 /*
357  * try again... update curbit and hmask as getpage would have
358  * done. because of our update of the current page, we do not
359  * need to read in anything. BUT we have to write the current
360  * [deferred] page out, as the window of failure is too great.
361  */
362 		db->curbit = 2 * db->curbit +
363 			((hash & (db->hmask + 1)) ? 2 : 1);
364 		db->hmask |= db->hmask + 1;
365 
366 		if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
367 		    || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
368 			return 0;
369 
370 	} while (--smax);
371 /*
372  * if we are here, this is real bad news. After SPLTMAX splits,
373  * we still cannot fit the key. say goodnight.
374  */
375 #ifdef BADMESS
376 	rc = write(2, "sdbm: cannot insert after SPLTMAX attempts.\n", 44);
377 	/* PERL_UNUSED_VAR() or PERL_UNUSED_RESULT() would be
378 	 * useful here but that would mean pulling in perl.h */
379 	(void)rc;
380 #endif
381 	return 0;
382 
383 }
384 
385 /*
386  * the following two routines will break if
387  * deletions aren't taken into account. (ndbm bug)
388  */
389 datum
390 sdbm_firstkey(DBM *db)
391 {
392 	if (db == NULL)
393 		return errno = EINVAL, nullitem;
394 /*
395  * start at page 0
396  */
397 	if (lseek(db->pagf, OFF_PAG(0), SEEK_SET) < 0
398 	    || read(db->pagf, db->pagbuf, PBLKSIZ) < 0)
399 		return ioerr(db), nullitem;
400         if (!chkpage(db->pagbuf)) {
401             errno = EINVAL;
402             ioerr(db);
403             db->pagbno = -1;
404             return nullitem;
405         }
406 	db->pagbno = 0;
407 	db->blkptr = 0;
408 	db->keyptr = 0;
409 
410 	return getnext(db);
411 }
412 
413 datum
414 sdbm_nextkey(DBM *db)
415 {
416 	if (db == NULL)
417 		return errno = EINVAL, nullitem;
418 	return getnext(db);
419 }
420 
421 /*
422  * all important binary trie traversal
423  */
424 static int
425 getpage(DBM *db, long int hash)
426 {
427 	int hbit;
428 	long dbit;
429 	long pagb;
430 
431 	dbit = 0;
432 	hbit = 0;
433 	while (dbit < db->maxbno && getdbit(db, dbit))
434 		dbit = 2 * dbit + ((hash & (1 << hbit++)) ? 2 : 1);
435 
436 	debug(("dbit: %d...", dbit));
437 
438 	db->curbit = dbit;
439 	db->hmask = masks[hbit];
440 
441 	pagb = hash & db->hmask;
442 /*
443  * see if the block we need is already in memory.
444  * note: this lookaside cache has about 10% hit rate.
445  */
446 	if (pagb != db->pagbno) {
447 /*
448  * note: here, we assume a "hole" is read as 0s.
449  * if not, must zero pagbuf first.
450  */
451 		if (lseek(db->pagf, OFF_PAG(pagb), SEEK_SET) < 0
452 		    || read(db->pagf, db->pagbuf, PBLKSIZ) < 0)
453 			return 0;
454 		if (!chkpage(db->pagbuf)) {
455                     errno = EINVAL;
456                     db->pagbno = -1;
457                     ioerr(db);
458                     return 0;
459                 }
460 		db->pagbno = pagb;
461 
462 		debug(("pag read: %d\n", pagb));
463 	}
464 	return 1;
465 }
466 
467 static int
468 getdbit(DBM *db, long int dbit)
469 {
470 	long c;
471 	long dirb;
472 
473 	c = dbit / BYTESIZ;
474 	dirb = c / DBLKSIZ;
475 
476 	if (dirb != db->dirbno) {
477 		int got;
478 		if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
479 		    || (got=read(db->dirf, db->dirbuf, DBLKSIZ)) < 0)
480 			return 0;
481 		if (got==0)
482 			memset(db->dirbuf,0,DBLKSIZ);
483 		db->dirbno = dirb;
484 
485 		debug(("dir read: %d\n", dirb));
486 	}
487 
488 	return db->dirbuf[c % DBLKSIZ] & (1 << dbit % BYTESIZ);
489 }
490 
491 static int
492 setdbit(DBM *db, long int dbit)
493 {
494 	long c;
495 	long dirb;
496 
497 	c = dbit / BYTESIZ;
498 	dirb = c / DBLKSIZ;
499 
500 	if (dirb != db->dirbno) {
501 		int got;
502 		if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
503 		    || (got=read(db->dirf, db->dirbuf, DBLKSIZ)) < 0)
504 			return 0;
505 		if (got==0)
506 			memset(db->dirbuf,0,DBLKSIZ);
507 		db->dirbno = dirb;
508 
509 		debug(("dir read: %d\n", dirb));
510 	}
511 
512 	db->dirbuf[c % DBLKSIZ] |= (1 << dbit % BYTESIZ);
513 
514 #if 0
515 	if (dbit >= db->maxbno)
516 		db->maxbno += DBLKSIZ * BYTESIZ;
517 #else
518 	if (OFF_DIR((dirb+1))*BYTESIZ > db->maxbno)
519 		db->maxbno=OFF_DIR((dirb+1))*BYTESIZ;
520 #endif
521 
522 	if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
523 	    || write(db->dirf, db->dirbuf, DBLKSIZ) < 0)
524 		return 0;
525 
526 	return 1;
527 }
528 
529 /*
530  * getnext - get the next key in the page, and if done with
531  * the page, try the next page in sequence
532  */
533 static datum
534 getnext(DBM *db)
535 {
536 	datum key;
537 
538 	for (;;) {
539 		db->keyptr++;
540 		key = getnkey(db->pagbuf, db->keyptr);
541 		if (key.dptr != NULL)
542 			return key;
543 /*
544  * we either run out, or there is nothing on this page..
545  * try the next one... If we lost our position on the
546  * file, we will have to seek.
547  */
548 		db->keyptr = 0;
549 		if (db->pagbno != db->blkptr++)
550 			if (lseek(db->pagf, OFF_PAG(db->blkptr), SEEK_SET) < 0)
551 				break;
552 		db->pagbno = db->blkptr;
553 		if (read(db->pagf, db->pagbuf, PBLKSIZ) <= 0)
554 			break;
555 		if (!chkpage(db->pagbuf)) {
556                     errno = EINVAL;
557                     db->pagbno = -1;
558                     ioerr(db);
559                     break;
560                 }
561 	}
562 
563 	return ioerr(db), nullitem;
564 }
565 
566