1 /*
2 * sdbm - ndbm work-alike hashed database library
3 * based on Per-Aake Larson's Dynamic Hashing algorithms. BIT 18 (1978).
4 * author: oz@nexus.yorku.ca
5 * status: public domain.
6 *
7 * core routines
8 */
9
10 #include "config.h"
11 #ifdef WIN32
12 #include "io.h"
13 #endif
14 #include "sdbm.h"
15 #include "tune.h"
16 #include "pair.h"
17
18 #ifdef I_FCNTL
19 # include <fcntl.h>
20 #endif
21 #ifdef I_SYS_FILE
22 # include <sys/file.h>
23 #endif
24
25 #include <string.h>
26
27 /*
28 * externals
29 */
30
31 #include <errno.h> /* See notes in perl.h about avoiding
32 extern int errno; */
33 #ifdef __cplusplus
34 extern "C" {
35 #endif
36
37 extern Malloc_t malloc(MEM_SIZE);
38 extern Free_t free(Malloc_t);
39
40 #ifdef __cplusplus
41 }
42 #endif
43
44 const datum nullitem = {0, 0};
45
46 #ifdef WIN32
47 # undef lseek
48 # define lseek _lseeki64
49 #endif
50
51 /*
52 * forward
53 */
54 static int getdbit(DBM *, long);
55 static int setdbit(DBM *, long);
56 static int getpage(DBM *, long);
57 static datum getnext(DBM *);
58 static int makroom(DBM *, long, int);
59
60 /*
61 * useful macros
62 */
63 #define bad(x) ((x).dptr == NULL || (x).dsize < 0)
64 #define exhash(item) sdbm_hash((item).dptr, (item).dsize)
65 #define ioerr(db) ((db)->flags |= DBM_IOERR)
66
67 #define OFF_PAG(off) (Off_t) (off) * PBLKSIZ
68 #define OFF_DIR(off) (Off_t) (off) * DBLKSIZ
69
70 static const long masks[] = {
71 000000000000, 000000000001, 000000000003, 000000000007,
72 000000000017, 000000000037, 000000000077, 000000000177,
73 000000000377, 000000000777, 000000001777, 000000003777,
74 000000007777, 000000017777, 000000037777, 000000077777,
75 000000177777, 000000377777, 000000777777, 000001777777,
76 000003777777, 000007777777, 000017777777, 000037777777,
77 000077777777, 000177777777, 000377777777, 000777777777,
78 001777777777, 003777777777, 007777777777, 017777777777
79 };
80
81 DBM *
sdbm_open(char * file,int flags,int mode)82 sdbm_open(char *file, int flags, int mode)
83 {
84 DBM *db;
85 char *dirname;
86 char *pagname;
87 size_t filelen;
88 const size_t dirfext_size = sizeof(DIRFEXT "");
89 const size_t pagfext_size = sizeof(PAGFEXT "");
90
91 if (file == NULL || !*file)
92 return errno = EINVAL, (DBM *) NULL;
93 /*
94 * need space for two separate filenames
95 */
96 filelen = strlen(file);
97
98 if ((dirname = (char *) malloc(filelen + dirfext_size
99 + filelen + pagfext_size)) == NULL)
100 return errno = ENOMEM, (DBM *) NULL;
101 /*
102 * build the file names
103 */
104 memcpy(dirname, file, filelen);
105 memcpy(dirname + filelen, DIRFEXT, dirfext_size);
106 pagname = dirname + filelen + dirfext_size;
107 memcpy(pagname, file, filelen);
108 memcpy(pagname + filelen, PAGFEXT, pagfext_size);
109
110 db = sdbm_prep(dirname, pagname, flags, mode);
111 free((char *) dirname);
112 return db;
113 }
114
115 DBM *
sdbm_prep(char * dirname,char * pagname,int flags,int mode)116 sdbm_prep(char *dirname, char *pagname, int flags, int mode)
117 {
118 DBM *db;
119 struct stat dstat;
120
121 if ((db = (DBM *) malloc(sizeof(DBM))) == NULL)
122 return errno = ENOMEM, (DBM *) NULL;
123
124 db->flags = 0;
125 db->hmask = 0;
126 db->blkptr = 0;
127 db->keyptr = 0;
128 /*
129 * adjust user flags so that WRONLY becomes RDWR,
130 * as required by this package. Also set our internal
131 * flag for RDONLY if needed.
132 */
133 if (flags & O_WRONLY)
134 flags = (flags & ~O_WRONLY) | O_RDWR;
135
136 else if ((flags & 03) == O_RDONLY)
137 db->flags = DBM_RDONLY;
138 /*
139 * open the files in sequence, and stat the dirfile.
140 * If we fail anywhere, undo everything, return NULL.
141 */
142 #if defined(OS2) || defined(WIN32) || defined(__CYGWIN__)
143 flags |= O_BINARY;
144 # endif
145 if ((db->pagf = open(pagname, flags, mode)) > -1) {
146 if ((db->dirf = open(dirname, flags, mode)) > -1) {
147 /*
148 * need the dirfile size to establish max bit number.
149 */
150 if (fstat(db->dirf, &dstat) == 0) {
151 /*
152 * zero size: either a fresh database, or one with a single,
153 * unsplit data page: dirpage is all zeros.
154 */
155 db->dirbno = (!dstat.st_size) ? 0 : -1;
156 db->pagbno = -1;
157 db->maxbno = dstat.st_size * BYTESIZ;
158
159 (void) memset(db->pagbuf, 0, PBLKSIZ);
160 (void) memset(db->dirbuf, 0, DBLKSIZ);
161 /*
162 * success
163 */
164 return db;
165 }
166 (void) close(db->dirf);
167 }
168 (void) close(db->pagf);
169 }
170 free((char *) db);
171 return (DBM *) NULL;
172 }
173
174 void
sdbm_close(DBM * db)175 sdbm_close(DBM *db)
176 {
177 if (db == NULL)
178 errno = EINVAL;
179 else {
180 (void) close(db->dirf);
181 (void) close(db->pagf);
182 free((char *) db);
183 }
184 }
185
186 datum
sdbm_fetch(DBM * db,datum key)187 sdbm_fetch(DBM *db, datum key)
188 {
189 if (db == NULL || bad(key))
190 return errno = EINVAL, nullitem;
191
192 if (getpage(db, exhash(key)))
193 return getpair(db->pagbuf, key);
194
195 return ioerr(db), nullitem;
196 }
197
198 int
sdbm_exists(DBM * db,datum key)199 sdbm_exists(DBM *db, datum key)
200 {
201 if (db == NULL || bad(key))
202 return errno = EINVAL, -1;
203
204 if (getpage(db, exhash(key)))
205 return exipair(db->pagbuf, key);
206
207 return ioerr(db), -1;
208 }
209
210 int
sdbm_delete(DBM * db,datum key)211 sdbm_delete(DBM *db, datum key)
212 {
213 if (db == NULL || bad(key))
214 return errno = EINVAL, -1;
215 if (sdbm_rdonly(db))
216 return errno = EPERM, -1;
217
218 if (getpage(db, exhash(key))) {
219 if (!delpair(db->pagbuf, key))
220 return -1;
221 /*
222 * update the page file
223 */
224 if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
225 || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
226 return ioerr(db), -1;
227
228 return 0;
229 }
230
231 return ioerr(db), -1;
232 }
233
234 int
sdbm_store(DBM * db,datum key,datum val,int flags)235 sdbm_store(DBM *db, datum key, datum val, int flags)
236 {
237 int need;
238 long hash;
239
240 if (db == NULL || bad(key))
241 return errno = EINVAL, -1;
242 if (sdbm_rdonly(db))
243 return errno = EPERM, -1;
244
245 need = key.dsize + val.dsize;
246 /*
247 * is the pair too big (or too small) for this database ??
248 */
249 if (need < 0 || need > PAIRMAX)
250 return errno = EINVAL, -1;
251
252 if (getpage(db, (hash = exhash(key)))) {
253 /*
254 * if we need to replace, delete the key/data pair
255 * first. If it is not there, ignore.
256 */
257 if (flags == DBM_REPLACE)
258 (void) delpair(db->pagbuf, key);
259 #ifdef SEEDUPS
260 else if (duppair(db->pagbuf, key))
261 return 1;
262 #endif
263 /*
264 * if we do not have enough room, we have to split.
265 */
266 if (!fitpair(db->pagbuf, need))
267 if (!makroom(db, hash, need))
268 return ioerr(db), -1;
269 /*
270 * we have enough room or split is successful. insert the key,
271 * and update the page file.
272 */
273 (void) putpair(db->pagbuf, key, val);
274
275 if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
276 || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
277 return ioerr(db), -1;
278 /*
279 * success
280 */
281 return 0;
282 }
283
284 return ioerr(db), -1;
285 }
286
287 /*
288 * makroom - make room by splitting the overfull page
289 * this routine will attempt to make room for SPLTMAX times before
290 * giving up.
291 */
292 static int
makroom(DBM * db,long int hash,int need)293 makroom(DBM *db, long int hash, int need)
294 {
295 long newp;
296 char twin[PBLKSIZ];
297 #if defined(DOSISH) || defined(WIN32)
298 char zer[PBLKSIZ];
299 Off_t oldtail;
300 #endif
301 char *pag = db->pagbuf;
302 char *New = twin;
303 int smax = SPLTMAX;
304 #ifdef BADMESS
305 int rc;
306 #endif
307
308 do {
309 /*
310 * split the current page
311 */
312 (void) splpage(pag, New, db->hmask + 1);
313 /*
314 * address of the new page
315 */
316 newp = (hash & db->hmask) | (db->hmask + 1);
317
318 /*
319 * write delay, read avoidance/cache shuffle:
320 * select the page for incoming pair: if key is to go to the new page,
321 * write out the previous one, and copy the new one over, thus making
322 * it the current page. If not, simply write the new page, and we are
323 * still looking at the page of interest. current page is not updated
324 * here, as sdbm_store will do so, after it inserts the incoming pair.
325 */
326
327 #if defined(DOSISH) || defined(WIN32)
328 /*
329 * Fill hole with 0 if made it.
330 * (hole is NOT read as 0)
331 */
332 oldtail = lseek(db->pagf, 0L, SEEK_END);
333 memset(zer, 0, PBLKSIZ);
334 while (OFF_PAG(newp) > oldtail) {
335 if (lseek(db->pagf, 0L, SEEK_END) < 0 ||
336 write(db->pagf, zer, PBLKSIZ) < 0) {
337
338 return 0;
339 }
340 oldtail += PBLKSIZ;
341 }
342 #endif
343 if (hash & (db->hmask + 1)) {
344 if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
345 || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
346 return 0;
347 db->pagbno = newp;
348 (void) memcpy(pag, New, PBLKSIZ);
349 }
350 else if (lseek(db->pagf, OFF_PAG(newp), SEEK_SET) < 0
351 || write(db->pagf, New, PBLKSIZ) < 0)
352 return 0;
353
354 if (!setdbit(db, db->curbit))
355 return 0;
356 /*
357 * see if we have enough room now
358 */
359 if (fitpair(pag, need))
360 return 1;
361 /*
362 * try again... update curbit and hmask as getpage would have
363 * done. because of our update of the current page, we do not
364 * need to read in anything. BUT we have to write the current
365 * [deferred] page out, as the window of failure is too great.
366 */
367 db->curbit = 2 * db->curbit +
368 ((hash & (db->hmask + 1)) ? 2 : 1);
369 db->hmask |= db->hmask + 1;
370
371 if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
372 || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
373 return 0;
374
375 } while (--smax);
376 /*
377 * if we are here, this is real bad news. After SPLTMAX splits,
378 * we still cannot fit the key. say goodnight.
379 */
380 #ifdef BADMESS
381 rc = write(2, "sdbm: cannot insert after SPLTMAX attempts.\n", 44);
382 /* PERL_UNUSED_VAR() or PERL_UNUSED_RESULT() would be
383 * useful here but that would mean pulling in perl.h */
384 (void)rc;
385 #endif
386 return 0;
387
388 }
389
390 /*
391 * the following two routines will break if
392 * deletions aren't taken into account. (ndbm bug)
393 */
394 datum
sdbm_firstkey(DBM * db)395 sdbm_firstkey(DBM *db)
396 {
397 if (db == NULL)
398 return errno = EINVAL, nullitem;
399 /*
400 * start at page 0
401 */
402 if (lseek(db->pagf, OFF_PAG(0), SEEK_SET) < 0
403 || read(db->pagf, db->pagbuf, PBLKSIZ) < 0)
404 return ioerr(db), nullitem;
405 if (!chkpage(db->pagbuf)) {
406 errno = EINVAL;
407 ioerr(db);
408 db->pagbno = -1;
409 return nullitem;
410 }
411 db->pagbno = 0;
412 db->blkptr = 0;
413 db->keyptr = 0;
414
415 return getnext(db);
416 }
417
418 datum
sdbm_nextkey(DBM * db)419 sdbm_nextkey(DBM *db)
420 {
421 if (db == NULL)
422 return errno = EINVAL, nullitem;
423 return getnext(db);
424 }
425
426 /*
427 * all important binary trie traversal
428 */
429 static int
getpage(DBM * db,long int hash)430 getpage(DBM *db, long int hash)
431 {
432 int hbit;
433 long dbit;
434 long pagb;
435
436 dbit = 0;
437 hbit = 0;
438 while (dbit < db->maxbno && getdbit(db, dbit))
439 dbit = 2 * dbit + ((hash & (1 << hbit++)) ? 2 : 1);
440
441 debug(("dbit: %d...", dbit));
442
443 db->curbit = dbit;
444 db->hmask = masks[hbit];
445
446 pagb = hash & db->hmask;
447 /*
448 * see if the block we need is already in memory.
449 * note: this lookaside cache has about 10% hit rate.
450 */
451 if (pagb != db->pagbno) {
452 /*
453 * note: here, we assume a "hole" is read as 0s.
454 * if not, must zero pagbuf first.
455 */
456 if (lseek(db->pagf, OFF_PAG(pagb), SEEK_SET) < 0
457 || read(db->pagf, db->pagbuf, PBLKSIZ) < 0)
458 return 0;
459 if (!chkpage(db->pagbuf)) {
460 errno = EINVAL;
461 db->pagbno = -1;
462 ioerr(db);
463 return 0;
464 }
465 db->pagbno = pagb;
466
467 debug(("pag read: %d\n", pagb));
468 }
469 return 1;
470 }
471
472 static int
getdbit(DBM * db,long int dbit)473 getdbit(DBM *db, long int dbit)
474 {
475 long c;
476 long dirb;
477
478 c = dbit / BYTESIZ;
479 dirb = c / DBLKSIZ;
480
481 if (dirb != db->dirbno) {
482 int got;
483 if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
484 || (got=read(db->dirf, db->dirbuf, DBLKSIZ)) < 0)
485 return 0;
486 if (got==0)
487 memset(db->dirbuf,0,DBLKSIZ);
488 db->dirbno = dirb;
489
490 debug(("dir read: %d\n", dirb));
491 }
492
493 return db->dirbuf[c % DBLKSIZ] & (1 << dbit % BYTESIZ);
494 }
495
496 static int
setdbit(DBM * db,long int dbit)497 setdbit(DBM *db, long int dbit)
498 {
499 long c;
500 long dirb;
501
502 c = dbit / BYTESIZ;
503 dirb = c / DBLKSIZ;
504
505 if (dirb != db->dirbno) {
506 int got;
507 if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
508 || (got=read(db->dirf, db->dirbuf, DBLKSIZ)) < 0)
509 return 0;
510 if (got==0)
511 memset(db->dirbuf,0,DBLKSIZ);
512 db->dirbno = dirb;
513
514 debug(("dir read: %d\n", dirb));
515 }
516
517 db->dirbuf[c % DBLKSIZ] |= (1 << dbit % BYTESIZ);
518
519 #if 0
520 if (dbit >= db->maxbno)
521 db->maxbno += DBLKSIZ * BYTESIZ;
522 #else
523 if (OFF_DIR((dirb+1))*BYTESIZ > db->maxbno)
524 db->maxbno=OFF_DIR((dirb+1))*BYTESIZ;
525 #endif
526
527 if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
528 || write(db->dirf, db->dirbuf, DBLKSIZ) < 0)
529 return 0;
530
531 return 1;
532 }
533
534 /*
535 * getnext - get the next key in the page, and if done with
536 * the page, try the next page in sequence
537 */
538 static datum
getnext(DBM * db)539 getnext(DBM *db)
540 {
541 datum key;
542
543 for (;;) {
544 db->keyptr++;
545 key = getnkey(db->pagbuf, db->keyptr);
546 if (key.dptr != NULL)
547 return key;
548 /*
549 * we either run out, or there is nothing on this page..
550 * try the next one... If we lost our position on the
551 * file, we will have to seek.
552 */
553 db->keyptr = 0;
554 if (db->pagbno != db->blkptr++)
555 if (lseek(db->pagf, OFF_PAG(db->blkptr), SEEK_SET) < 0)
556 break;
557 db->pagbno = db->blkptr;
558 if (read(db->pagf, db->pagbuf, PBLKSIZ) <= 0)
559 break;
560 if (!chkpage(db->pagbuf)) {
561 errno = EINVAL;
562 db->pagbno = -1;
563 ioerr(db);
564 break;
565 }
566 }
567
568 return ioerr(db), nullitem;
569 }
570
571