1 /* 2 * sdbm - ndbm work-alike hashed database library 3 * based on Per-Aake Larson's Dynamic Hashing algorithms. BIT 18 (1978). 4 * author: oz@nexus.yorku.ca 5 * status: public domain. 6 * 7 * core routines 8 */ 9 10 #include "config.h" 11 #ifdef WIN32 12 #include "io.h" 13 #endif 14 #include "sdbm.h" 15 #include "tune.h" 16 #include "pair.h" 17 18 #ifdef I_FCNTL 19 # include <fcntl.h> 20 #endif 21 #ifdef I_SYS_FILE 22 # include <sys/file.h> 23 #endif 24 25 #include <string.h> 26 27 /* 28 * externals 29 */ 30 31 #include <errno.h> /* See notes in perl.h about avoiding 32 extern int errno; */ 33 #ifdef __cplusplus 34 extern "C" { 35 #endif 36 37 extern Malloc_t malloc(MEM_SIZE); 38 extern Free_t free(Malloc_t); 39 40 #ifdef __cplusplus 41 } 42 #endif 43 44 const datum nullitem = {0, 0}; 45 46 /* 47 * forward 48 */ 49 static int getdbit(DBM *, long); 50 static int setdbit(DBM *, long); 51 static int getpage(DBM *, long); 52 static datum getnext(DBM *); 53 static int makroom(DBM *, long, int); 54 55 /* 56 * useful macros 57 */ 58 #define bad(x) ((x).dptr == NULL || (x).dsize < 0) 59 #define exhash(item) sdbm_hash((item).dptr, (item).dsize) 60 #define ioerr(db) ((db)->flags |= DBM_IOERR) 61 62 #define OFF_PAG(off) (long) (off) * PBLKSIZ 63 #define OFF_DIR(off) (long) (off) * DBLKSIZ 64 65 static const long masks[] = { 66 000000000000, 000000000001, 000000000003, 000000000007, 67 000000000017, 000000000037, 000000000077, 000000000177, 68 000000000377, 000000000777, 000000001777, 000000003777, 69 000000007777, 000000017777, 000000037777, 000000077777, 70 000000177777, 000000377777, 000000777777, 000001777777, 71 000003777777, 000007777777, 000017777777, 000037777777, 72 000077777777, 000177777777, 000377777777, 000777777777, 73 001777777777, 003777777777, 007777777777, 017777777777 74 }; 75 76 DBM * 77 sdbm_open(char *file, int flags, int mode) 78 { 79 DBM *db; 80 char *dirname; 81 char *pagname; 82 size_t filelen; 83 const size_t dirfext_size = sizeof(DIRFEXT ""); 84 const size_t pagfext_size = sizeof(PAGFEXT ""); 85 86 if (file == NULL || !*file) 87 return errno = EINVAL, (DBM *) NULL; 88 /* 89 * need space for two separate filenames 90 */ 91 filelen = strlen(file); 92 93 if ((dirname = (char *) malloc(filelen + dirfext_size 94 + filelen + pagfext_size)) == NULL) 95 return errno = ENOMEM, (DBM *) NULL; 96 /* 97 * build the file names 98 */ 99 memcpy(dirname, file, filelen); 100 memcpy(dirname + filelen, DIRFEXT, dirfext_size); 101 pagname = dirname + filelen + dirfext_size; 102 memcpy(pagname, file, filelen); 103 memcpy(pagname + filelen, PAGFEXT, pagfext_size); 104 105 db = sdbm_prep(dirname, pagname, flags, mode); 106 free((char *) dirname); 107 return db; 108 } 109 110 DBM * 111 sdbm_prep(char *dirname, char *pagname, int flags, int mode) 112 { 113 DBM *db; 114 struct stat dstat; 115 116 if ((db = (DBM *) malloc(sizeof(DBM))) == NULL) 117 return errno = ENOMEM, (DBM *) NULL; 118 119 db->flags = 0; 120 db->hmask = 0; 121 db->blkptr = 0; 122 db->keyptr = 0; 123 /* 124 * adjust user flags so that WRONLY becomes RDWR, 125 * as required by this package. Also set our internal 126 * flag for RDONLY if needed. 127 */ 128 if (flags & O_WRONLY) 129 flags = (flags & ~O_WRONLY) | O_RDWR; 130 131 else if ((flags & 03) == O_RDONLY) 132 db->flags = DBM_RDONLY; 133 /* 134 * open the files in sequence, and stat the dirfile. 135 * If we fail anywhere, undo everything, return NULL. 136 */ 137 #if defined(OS2) || defined(MSDOS) || defined(WIN32) || defined(__CYGWIN__) 138 flags |= O_BINARY; 139 # endif 140 if ((db->pagf = open(pagname, flags, mode)) > -1) { 141 if ((db->dirf = open(dirname, flags, mode)) > -1) { 142 /* 143 * need the dirfile size to establish max bit number. 144 */ 145 if (fstat(db->dirf, &dstat) == 0) { 146 /* 147 * zero size: either a fresh database, or one with a single, 148 * unsplit data page: dirpage is all zeros. 149 */ 150 db->dirbno = (!dstat.st_size) ? 0 : -1; 151 db->pagbno = -1; 152 db->maxbno = dstat.st_size * BYTESIZ; 153 154 (void) memset(db->pagbuf, 0, PBLKSIZ); 155 (void) memset(db->dirbuf, 0, DBLKSIZ); 156 /* 157 * success 158 */ 159 return db; 160 } 161 (void) close(db->dirf); 162 } 163 (void) close(db->pagf); 164 } 165 free((char *) db); 166 return (DBM *) NULL; 167 } 168 169 void 170 sdbm_close(DBM *db) 171 { 172 if (db == NULL) 173 errno = EINVAL; 174 else { 175 (void) close(db->dirf); 176 (void) close(db->pagf); 177 free((char *) db); 178 } 179 } 180 181 datum 182 sdbm_fetch(DBM *db, datum key) 183 { 184 if (db == NULL || bad(key)) 185 return errno = EINVAL, nullitem; 186 187 if (getpage(db, exhash(key))) 188 return getpair(db->pagbuf, key); 189 190 return ioerr(db), nullitem; 191 } 192 193 int 194 sdbm_exists(DBM *db, datum key) 195 { 196 if (db == NULL || bad(key)) 197 return errno = EINVAL, -1; 198 199 if (getpage(db, exhash(key))) 200 return exipair(db->pagbuf, key); 201 202 return ioerr(db), -1; 203 } 204 205 int 206 sdbm_delete(DBM *db, datum key) 207 { 208 if (db == NULL || bad(key)) 209 return errno = EINVAL, -1; 210 if (sdbm_rdonly(db)) 211 return errno = EPERM, -1; 212 213 if (getpage(db, exhash(key))) { 214 if (!delpair(db->pagbuf, key)) 215 return -1; 216 /* 217 * update the page file 218 */ 219 if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0 220 || write(db->pagf, db->pagbuf, PBLKSIZ) < 0) 221 return ioerr(db), -1; 222 223 return 0; 224 } 225 226 return ioerr(db), -1; 227 } 228 229 int 230 sdbm_store(DBM *db, datum key, datum val, int flags) 231 { 232 int need; 233 long hash; 234 235 if (db == NULL || bad(key)) 236 return errno = EINVAL, -1; 237 if (sdbm_rdonly(db)) 238 return errno = EPERM, -1; 239 240 need = key.dsize + val.dsize; 241 /* 242 * is the pair too big (or too small) for this database ?? 243 */ 244 if (need < 0 || need > PAIRMAX) 245 return errno = EINVAL, -1; 246 247 if (getpage(db, (hash = exhash(key)))) { 248 /* 249 * if we need to replace, delete the key/data pair 250 * first. If it is not there, ignore. 251 */ 252 if (flags == DBM_REPLACE) 253 (void) delpair(db->pagbuf, key); 254 #ifdef SEEDUPS 255 else if (duppair(db->pagbuf, key)) 256 return 1; 257 #endif 258 /* 259 * if we do not have enough room, we have to split. 260 */ 261 if (!fitpair(db->pagbuf, need)) 262 if (!makroom(db, hash, need)) 263 return ioerr(db), -1; 264 /* 265 * we have enough room or split is successful. insert the key, 266 * and update the page file. 267 */ 268 (void) putpair(db->pagbuf, key, val); 269 270 if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0 271 || write(db->pagf, db->pagbuf, PBLKSIZ) < 0) 272 return ioerr(db), -1; 273 /* 274 * success 275 */ 276 return 0; 277 } 278 279 return ioerr(db), -1; 280 } 281 282 /* 283 * makroom - make room by splitting the overfull page 284 * this routine will attempt to make room for SPLTMAX times before 285 * giving up. 286 */ 287 static int 288 makroom(DBM *db, long int hash, int need) 289 { 290 long newp; 291 char twin[PBLKSIZ]; 292 #if defined(DOSISH) || defined(WIN32) 293 char zer[PBLKSIZ]; 294 long oldtail; 295 #endif 296 char *pag = db->pagbuf; 297 char *New = twin; 298 int smax = SPLTMAX; 299 #ifdef BADMESS 300 int rc; 301 #endif 302 303 do { 304 /* 305 * split the current page 306 */ 307 (void) splpage(pag, New, db->hmask + 1); 308 /* 309 * address of the new page 310 */ 311 newp = (hash & db->hmask) | (db->hmask + 1); 312 313 /* 314 * write delay, read avoidance/cache shuffle: 315 * select the page for incoming pair: if key is to go to the new page, 316 * write out the previous one, and copy the new one over, thus making 317 * it the current page. If not, simply write the new page, and we are 318 * still looking at the page of interest. current page is not updated 319 * here, as sdbm_store will do so, after it inserts the incoming pair. 320 */ 321 322 #if defined(DOSISH) || defined(WIN32) 323 /* 324 * Fill hole with 0 if made it. 325 * (hole is NOT read as 0) 326 */ 327 oldtail = lseek(db->pagf, 0L, SEEK_END); 328 memset(zer, 0, PBLKSIZ); 329 while (OFF_PAG(newp) > oldtail) { 330 if (lseek(db->pagf, 0L, SEEK_END) < 0 || 331 write(db->pagf, zer, PBLKSIZ) < 0) { 332 333 return 0; 334 } 335 oldtail += PBLKSIZ; 336 } 337 #endif 338 if (hash & (db->hmask + 1)) { 339 if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0 340 || write(db->pagf, db->pagbuf, PBLKSIZ) < 0) 341 return 0; 342 db->pagbno = newp; 343 (void) memcpy(pag, New, PBLKSIZ); 344 } 345 else if (lseek(db->pagf, OFF_PAG(newp), SEEK_SET) < 0 346 || write(db->pagf, New, PBLKSIZ) < 0) 347 return 0; 348 349 if (!setdbit(db, db->curbit)) 350 return 0; 351 /* 352 * see if we have enough room now 353 */ 354 if (fitpair(pag, need)) 355 return 1; 356 /* 357 * try again... update curbit and hmask as getpage would have 358 * done. because of our update of the current page, we do not 359 * need to read in anything. BUT we have to write the current 360 * [deferred] page out, as the window of failure is too great. 361 */ 362 db->curbit = 2 * db->curbit + 363 ((hash & (db->hmask + 1)) ? 2 : 1); 364 db->hmask |= db->hmask + 1; 365 366 if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0 367 || write(db->pagf, db->pagbuf, PBLKSIZ) < 0) 368 return 0; 369 370 } while (--smax); 371 /* 372 * if we are here, this is real bad news. After SPLTMAX splits, 373 * we still cannot fit the key. say goodnight. 374 */ 375 #ifdef BADMESS 376 rc = write(2, "sdbm: cannot insert after SPLTMAX attempts.\n", 44); 377 /* PERL_UNUSED_VAR() or PERL_UNUSED_RESULT() would be 378 * useful here but that would mean pulling in perl.h */ 379 (void)rc; 380 #endif 381 return 0; 382 383 } 384 385 /* 386 * the following two routines will break if 387 * deletions aren't taken into account. (ndbm bug) 388 */ 389 datum 390 sdbm_firstkey(DBM *db) 391 { 392 if (db == NULL) 393 return errno = EINVAL, nullitem; 394 /* 395 * start at page 0 396 */ 397 if (lseek(db->pagf, OFF_PAG(0), SEEK_SET) < 0 398 || read(db->pagf, db->pagbuf, PBLKSIZ) < 0) 399 return ioerr(db), nullitem; 400 if (!chkpage(db->pagbuf)) { 401 errno = EINVAL; 402 ioerr(db); 403 db->pagbno = -1; 404 return nullitem; 405 } 406 db->pagbno = 0; 407 db->blkptr = 0; 408 db->keyptr = 0; 409 410 return getnext(db); 411 } 412 413 datum 414 sdbm_nextkey(DBM *db) 415 { 416 if (db == NULL) 417 return errno = EINVAL, nullitem; 418 return getnext(db); 419 } 420 421 /* 422 * all important binary trie traversal 423 */ 424 static int 425 getpage(DBM *db, long int hash) 426 { 427 int hbit; 428 long dbit; 429 long pagb; 430 431 dbit = 0; 432 hbit = 0; 433 while (dbit < db->maxbno && getdbit(db, dbit)) 434 dbit = 2 * dbit + ((hash & (1 << hbit++)) ? 2 : 1); 435 436 debug(("dbit: %d...", dbit)); 437 438 db->curbit = dbit; 439 db->hmask = masks[hbit]; 440 441 pagb = hash & db->hmask; 442 /* 443 * see if the block we need is already in memory. 444 * note: this lookaside cache has about 10% hit rate. 445 */ 446 if (pagb != db->pagbno) { 447 /* 448 * note: here, we assume a "hole" is read as 0s. 449 * if not, must zero pagbuf first. 450 */ 451 if (lseek(db->pagf, OFF_PAG(pagb), SEEK_SET) < 0 452 || read(db->pagf, db->pagbuf, PBLKSIZ) < 0) 453 return 0; 454 if (!chkpage(db->pagbuf)) { 455 errno = EINVAL; 456 db->pagbno = -1; 457 ioerr(db); 458 return 0; 459 } 460 db->pagbno = pagb; 461 462 debug(("pag read: %d\n", pagb)); 463 } 464 return 1; 465 } 466 467 static int 468 getdbit(DBM *db, long int dbit) 469 { 470 long c; 471 long dirb; 472 473 c = dbit / BYTESIZ; 474 dirb = c / DBLKSIZ; 475 476 if (dirb != db->dirbno) { 477 int got; 478 if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0 479 || (got=read(db->dirf, db->dirbuf, DBLKSIZ)) < 0) 480 return 0; 481 if (got==0) 482 memset(db->dirbuf,0,DBLKSIZ); 483 db->dirbno = dirb; 484 485 debug(("dir read: %d\n", dirb)); 486 } 487 488 return db->dirbuf[c % DBLKSIZ] & (1 << dbit % BYTESIZ); 489 } 490 491 static int 492 setdbit(DBM *db, long int dbit) 493 { 494 long c; 495 long dirb; 496 497 c = dbit / BYTESIZ; 498 dirb = c / DBLKSIZ; 499 500 if (dirb != db->dirbno) { 501 int got; 502 if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0 503 || (got=read(db->dirf, db->dirbuf, DBLKSIZ)) < 0) 504 return 0; 505 if (got==0) 506 memset(db->dirbuf,0,DBLKSIZ); 507 db->dirbno = dirb; 508 509 debug(("dir read: %d\n", dirb)); 510 } 511 512 db->dirbuf[c % DBLKSIZ] |= (1 << dbit % BYTESIZ); 513 514 #if 0 515 if (dbit >= db->maxbno) 516 db->maxbno += DBLKSIZ * BYTESIZ; 517 #else 518 if (OFF_DIR((dirb+1))*BYTESIZ > db->maxbno) 519 db->maxbno=OFF_DIR((dirb+1))*BYTESIZ; 520 #endif 521 522 if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0 523 || write(db->dirf, db->dirbuf, DBLKSIZ) < 0) 524 return 0; 525 526 return 1; 527 } 528 529 /* 530 * getnext - get the next key in the page, and if done with 531 * the page, try the next page in sequence 532 */ 533 static datum 534 getnext(DBM *db) 535 { 536 datum key; 537 538 for (;;) { 539 db->keyptr++; 540 key = getnkey(db->pagbuf, db->keyptr); 541 if (key.dptr != NULL) 542 return key; 543 /* 544 * we either run out, or there is nothing on this page.. 545 * try the next one... If we lost our position on the 546 * file, we will have to seek. 547 */ 548 db->keyptr = 0; 549 if (db->pagbno != db->blkptr++) 550 if (lseek(db->pagf, OFF_PAG(db->blkptr), SEEK_SET) < 0) 551 break; 552 db->pagbno = db->blkptr; 553 if (read(db->pagf, db->pagbuf, PBLKSIZ) <= 0) 554 break; 555 if (!chkpage(db->pagbuf)) { 556 errno = EINVAL; 557 db->pagbno = -1; 558 ioerr(db); 559 break; 560 } 561 } 562 563 return ioerr(db), nullitem; 564 } 565 566