1# DB_File.pm -- Perl 5 interface to Berkeley DB 2# 3# written by Paul Marquess (pmqs@cpan.org) 4# last modified 28th October 2007 5# version 1.818 6# 7# Copyright (c) 1995-2009 Paul Marquess. All rights reserved. 8# This program is free software; you can redistribute it and/or 9# modify it under the same terms as Perl itself. 10 11 12package DB_File::HASHINFO ; 13 14require 5.00404; 15 16use warnings; 17use strict; 18use Carp; 19require Tie::Hash; 20@DB_File::HASHINFO::ISA = qw(Tie::Hash); 21 22sub new 23{ 24 my $pkg = shift ; 25 my %x ; 26 tie %x, $pkg ; 27 bless \%x, $pkg ; 28} 29 30 31sub TIEHASH 32{ 33 my $pkg = shift ; 34 35 bless { VALID => { 36 bsize => 1, 37 ffactor => 1, 38 nelem => 1, 39 cachesize => 1, 40 hash => 2, 41 lorder => 1, 42 }, 43 GOT => {} 44 }, $pkg ; 45} 46 47 48sub FETCH 49{ 50 my $self = shift ; 51 my $key = shift ; 52 53 return $self->{GOT}{$key} if exists $self->{VALID}{$key} ; 54 55 my $pkg = ref $self ; 56 croak "${pkg}::FETCH - Unknown element '$key'" ; 57} 58 59 60sub STORE 61{ 62 my $self = shift ; 63 my $key = shift ; 64 my $value = shift ; 65 66 my $type = $self->{VALID}{$key}; 67 68 if ( $type ) 69 { 70 croak "Key '$key' not associated with a code reference" 71 if $type == 2 && !ref $value && ref $value ne 'CODE'; 72 $self->{GOT}{$key} = $value ; 73 return ; 74 } 75 76 my $pkg = ref $self ; 77 croak "${pkg}::STORE - Unknown element '$key'" ; 78} 79 80sub DELETE 81{ 82 my $self = shift ; 83 my $key = shift ; 84 85 if ( exists $self->{VALID}{$key} ) 86 { 87 delete $self->{GOT}{$key} ; 88 return ; 89 } 90 91 my $pkg = ref $self ; 92 croak "DB_File::HASHINFO::DELETE - Unknown element '$key'" ; 93} 94 95sub EXISTS 96{ 97 my $self = shift ; 98 my $key = shift ; 99 100 exists $self->{VALID}{$key} ; 101} 102 103sub NotHere 104{ 105 my $self = shift ; 106 my $method = shift ; 107 108 croak ref($self) . " does not define the method ${method}" ; 109} 110 111sub FIRSTKEY { my $self = shift ; $self->NotHere("FIRSTKEY") } 112sub NEXTKEY { my $self = shift ; $self->NotHere("NEXTKEY") } 113sub CLEAR { my $self = shift ; $self->NotHere("CLEAR") } 114 115package DB_File::RECNOINFO ; 116 117use warnings; 118use strict ; 119 120@DB_File::RECNOINFO::ISA = qw(DB_File::HASHINFO) ; 121 122sub TIEHASH 123{ 124 my $pkg = shift ; 125 126 bless { VALID => { map {$_, 1} 127 qw( bval cachesize psize flags lorder reclen bfname ) 128 }, 129 GOT => {}, 130 }, $pkg ; 131} 132 133package DB_File::BTREEINFO ; 134 135use warnings; 136use strict ; 137 138@DB_File::BTREEINFO::ISA = qw(DB_File::HASHINFO) ; 139 140sub TIEHASH 141{ 142 my $pkg = shift ; 143 144 bless { VALID => { 145 flags => 1, 146 cachesize => 1, 147 maxkeypage => 1, 148 minkeypage => 1, 149 psize => 1, 150 compare => 2, 151 prefix => 2, 152 lorder => 1, 153 }, 154 GOT => {}, 155 }, $pkg ; 156} 157 158 159package DB_File ; 160 161use warnings; 162use strict; 163our ($VERSION, @ISA, @EXPORT, $AUTOLOAD, $DB_BTREE, $DB_HASH, $DB_RECNO); 164our ($db_version, $use_XSLoader, $splice_end_array, $Error); 165use Carp; 166 167 168$VERSION = "1.820" ; 169$VERSION = eval $VERSION; # needed for dev releases 170 171{ 172 local $SIG{__WARN__} = sub {$splice_end_array = "@_";}; 173 my @a =(1); splice(@a, 3); 174 $splice_end_array = 175 ($splice_end_array =~ /^splice\(\) offset past end of array at /); 176} 177 178#typedef enum { DB_BTREE, DB_HASH, DB_RECNO } DBTYPE; 179$DB_BTREE = new DB_File::BTREEINFO ; 180$DB_HASH = new DB_File::HASHINFO ; 181$DB_RECNO = new DB_File::RECNOINFO ; 182 183require Tie::Hash; 184require Exporter; 185use AutoLoader; 186BEGIN { 187 $use_XSLoader = 1 ; 188 { local $SIG{__DIE__} ; eval { require XSLoader } ; } 189 190 if ($@) { 191 $use_XSLoader = 0 ; 192 require DynaLoader; 193 @ISA = qw(DynaLoader); 194 } 195} 196 197push @ISA, qw(Tie::Hash Exporter); 198@EXPORT = qw( 199 $DB_BTREE $DB_HASH $DB_RECNO 200 201 BTREEMAGIC 202 BTREEVERSION 203 DB_LOCK 204 DB_SHMEM 205 DB_TXN 206 HASHMAGIC 207 HASHVERSION 208 MAX_PAGE_NUMBER 209 MAX_PAGE_OFFSET 210 MAX_REC_NUMBER 211 RET_ERROR 212 RET_SPECIAL 213 RET_SUCCESS 214 R_CURSOR 215 R_DUP 216 R_FIRST 217 R_FIXEDLEN 218 R_IAFTER 219 R_IBEFORE 220 R_LAST 221 R_NEXT 222 R_NOKEY 223 R_NOOVERWRITE 224 R_PREV 225 R_RECNOSYNC 226 R_SETCURSOR 227 R_SNAPSHOT 228 __R_UNUSED 229 230); 231 232sub AUTOLOAD { 233 my($constname); 234 ($constname = $AUTOLOAD) =~ s/.*:://; 235 my ($error, $val) = constant($constname); 236 Carp::croak $error if $error; 237 no strict 'refs'; 238 *{$AUTOLOAD} = sub { $val }; 239 goto &{$AUTOLOAD}; 240} 241 242 243eval { 244 # Make all Fcntl O_XXX constants available for importing 245 require Fcntl; 246 my @O = grep /^O_/, @Fcntl::EXPORT; 247 Fcntl->import(@O); # first we import what we want to export 248 push(@EXPORT, @O); 249}; 250 251if ($use_XSLoader) 252 { XSLoader::load("DB_File", $VERSION)} 253else 254 { bootstrap DB_File $VERSION } 255 256# Preloaded methods go here. Autoload methods go after __END__, and are 257# processed by the autosplit program. 258 259sub tie_hash_or_array 260{ 261 my (@arg) = @_ ; 262 my $tieHASH = ( (caller(1))[3] =~ /TIEHASH/ ) ; 263 264 use File::Spec; 265 $arg[1] = File::Spec->rel2abs($arg[1]) 266 if defined $arg[1] ; 267 268 $arg[4] = tied %{ $arg[4] } 269 if @arg >= 5 && ref $arg[4] && $arg[4] =~ /=HASH/ && tied %{ $arg[4] } ; 270 271 $arg[2] = O_CREAT()|O_RDWR() if @arg >=3 && ! defined $arg[2]; 272 $arg[3] = 0666 if @arg >=4 && ! defined $arg[3]; 273 274 # make recno in Berkeley DB version 2 (or better) work like 275 # recno in version 1. 276 if ($db_version >= 4 and ! $tieHASH) { 277 $arg[2] |= O_CREAT(); 278 } 279 280 if ($db_version > 1 and defined $arg[4] and $arg[4] =~ /RECNO/ and 281 $arg[1] and ! -e $arg[1]) { 282 open(FH, ">$arg[1]") or return undef ; 283 close FH ; 284 chmod $arg[3] ? $arg[3] : 0666 , $arg[1] ; 285 } 286 287 DoTie_($tieHASH, @arg) ; 288} 289 290sub TIEHASH 291{ 292 tie_hash_or_array(@_) ; 293} 294 295sub TIEARRAY 296{ 297 tie_hash_or_array(@_) ; 298} 299 300sub CLEAR 301{ 302 my $self = shift; 303 my $key = 0 ; 304 my $value = "" ; 305 my $status = $self->seq($key, $value, R_FIRST()); 306 my @keys; 307 308 while ($status == 0) { 309 push @keys, $key; 310 $status = $self->seq($key, $value, R_NEXT()); 311 } 312 foreach $key (reverse @keys) { 313 my $s = $self->del($key); 314 } 315} 316 317sub EXTEND { } 318 319sub STORESIZE 320{ 321 my $self = shift; 322 my $length = shift ; 323 my $current_length = $self->length() ; 324 325 if ($length < $current_length) { 326 my $key ; 327 for ($key = $current_length - 1 ; $key >= $length ; -- $key) 328 { $self->del($key) } 329 } 330 elsif ($length > $current_length) { 331 $self->put($length-1, "") ; 332 } 333} 334 335 336sub SPLICE 337{ 338 my $self = shift; 339 my $offset = shift; 340 if (not defined $offset) { 341 warnings::warnif('uninitialized', 'Use of uninitialized value in splice'); 342 $offset = 0; 343 } 344 345 my $length = @_ ? shift : 0; 346 # Carping about definedness comes _after_ the OFFSET sanity check. 347 # This is so we get the same error messages as Perl's splice(). 348 # 349 350 my @list = @_; 351 352 my $size = $self->FETCHSIZE(); 353 354 # 'If OFFSET is negative then it start that far from the end of 355 # the array.' 356 # 357 if ($offset < 0) { 358 my $new_offset = $size + $offset; 359 if ($new_offset < 0) { 360 die "Modification of non-creatable array value attempted, " 361 . "subscript $offset"; 362 } 363 $offset = $new_offset; 364 } 365 366 if (not defined $length) { 367 warnings::warnif('uninitialized', 'Use of uninitialized value in splice'); 368 $length = 0; 369 } 370 371 if ($offset > $size) { 372 $offset = $size; 373 warnings::warnif('misc', 'splice() offset past end of array') 374 if $splice_end_array; 375 } 376 377 # 'If LENGTH is omitted, removes everything from OFFSET onward.' 378 if (not defined $length) { 379 $length = $size - $offset; 380 } 381 382 # 'If LENGTH is negative, leave that many elements off the end of 383 # the array.' 384 # 385 if ($length < 0) { 386 $length = $size - $offset + $length; 387 388 if ($length < 0) { 389 # The user must have specified a length bigger than the 390 # length of the array passed in. But perl's splice() 391 # doesn't catch this, it just behaves as for length=0. 392 # 393 $length = 0; 394 } 395 } 396 397 if ($length > $size - $offset) { 398 $length = $size - $offset; 399 } 400 401 # $num_elems holds the current number of elements in the database. 402 my $num_elems = $size; 403 404 # 'Removes the elements designated by OFFSET and LENGTH from an 405 # array,'... 406 # 407 my @removed = (); 408 foreach (0 .. $length - 1) { 409 my $old; 410 my $status = $self->get($offset, $old); 411 if ($status != 0) { 412 my $msg = "error from Berkeley DB on get($offset, \$old)"; 413 if ($status == 1) { 414 $msg .= ' (no such element?)'; 415 } 416 else { 417 $msg .= ": error status $status"; 418 if (defined $! and $! ne '') { 419 $msg .= ", message $!"; 420 } 421 } 422 die $msg; 423 } 424 push @removed, $old; 425 426 $status = $self->del($offset); 427 if ($status != 0) { 428 my $msg = "error from Berkeley DB on del($offset)"; 429 if ($status == 1) { 430 $msg .= ' (no such element?)'; 431 } 432 else { 433 $msg .= ": error status $status"; 434 if (defined $! and $! ne '') { 435 $msg .= ", message $!"; 436 } 437 } 438 die $msg; 439 } 440 441 -- $num_elems; 442 } 443 444 # ...'and replaces them with the elements of LIST, if any.' 445 my $pos = $offset; 446 while (defined (my $elem = shift @list)) { 447 my $old_pos = $pos; 448 my $status; 449 if ($pos >= $num_elems) { 450 $status = $self->put($pos, $elem); 451 } 452 else { 453 $status = $self->put($pos, $elem, $self->R_IBEFORE); 454 } 455 456 if ($status != 0) { 457 my $msg = "error from Berkeley DB on put($pos, $elem, ...)"; 458 if ($status == 1) { 459 $msg .= ' (no such element?)'; 460 } 461 else { 462 $msg .= ", error status $status"; 463 if (defined $! and $! ne '') { 464 $msg .= ", message $!"; 465 } 466 } 467 die $msg; 468 } 469 470 die "pos unexpectedly changed from $old_pos to $pos with R_IBEFORE" 471 if $old_pos != $pos; 472 473 ++ $pos; 474 ++ $num_elems; 475 } 476 477 if (wantarray) { 478 # 'In list context, returns the elements removed from the 479 # array.' 480 # 481 return @removed; 482 } 483 elsif (defined wantarray and not wantarray) { 484 # 'In scalar context, returns the last element removed, or 485 # undef if no elements are removed.' 486 # 487 if (@removed) { 488 my $last = pop @removed; 489 return "$last"; 490 } 491 else { 492 return undef; 493 } 494 } 495 elsif (not defined wantarray) { 496 # Void context 497 } 498 else { die } 499} 500sub ::DB_File::splice { &SPLICE } 501 502sub find_dup 503{ 504 croak "Usage: \$db->find_dup(key,value)\n" 505 unless @_ == 3 ; 506 507 my $db = shift ; 508 my ($origkey, $value_wanted) = @_ ; 509 my ($key, $value) = ($origkey, 0); 510 my ($status) = 0 ; 511 512 for ($status = $db->seq($key, $value, R_CURSOR() ) ; 513 $status == 0 ; 514 $status = $db->seq($key, $value, R_NEXT() ) ) { 515 516 return 0 if $key eq $origkey and $value eq $value_wanted ; 517 } 518 519 return $status ; 520} 521 522sub del_dup 523{ 524 croak "Usage: \$db->del_dup(key,value)\n" 525 unless @_ == 3 ; 526 527 my $db = shift ; 528 my ($key, $value) = @_ ; 529 my ($status) = $db->find_dup($key, $value) ; 530 return $status if $status != 0 ; 531 532 $status = $db->del($key, R_CURSOR() ) ; 533 return $status ; 534} 535 536sub get_dup 537{ 538 croak "Usage: \$db->get_dup(key [,flag])\n" 539 unless @_ == 2 or @_ == 3 ; 540 541 my $db = shift ; 542 my $key = shift ; 543 my $flag = shift ; 544 my $value = 0 ; 545 my $origkey = $key ; 546 my $wantarray = wantarray ; 547 my %values = () ; 548 my @values = () ; 549 my $counter = 0 ; 550 my $status = 0 ; 551 552 # iterate through the database until either EOF ($status == 0) 553 # or a different key is encountered ($key ne $origkey). 554 for ($status = $db->seq($key, $value, R_CURSOR()) ; 555 $status == 0 and $key eq $origkey ; 556 $status = $db->seq($key, $value, R_NEXT()) ) { 557 558 # save the value or count number of matches 559 if ($wantarray) { 560 if ($flag) 561 { ++ $values{$value} } 562 else 563 { push (@values, $value) } 564 } 565 else 566 { ++ $counter } 567 568 } 569 570 return ($wantarray ? ($flag ? %values : @values) : $counter) ; 571} 572 573 5741; 575__END__ 576 577=head1 NAME 578 579DB_File - Perl5 access to Berkeley DB version 1.x 580 581=head1 SYNOPSIS 582 583 use DB_File; 584 585 [$X =] tie %hash, 'DB_File', [$filename, $flags, $mode, $DB_HASH] ; 586 [$X =] tie %hash, 'DB_File', $filename, $flags, $mode, $DB_BTREE ; 587 [$X =] tie @array, 'DB_File', $filename, $flags, $mode, $DB_RECNO ; 588 589 $status = $X->del($key [, $flags]) ; 590 $status = $X->put($key, $value [, $flags]) ; 591 $status = $X->get($key, $value [, $flags]) ; 592 $status = $X->seq($key, $value, $flags) ; 593 $status = $X->sync([$flags]) ; 594 $status = $X->fd ; 595 596 # BTREE only 597 $count = $X->get_dup($key) ; 598 @list = $X->get_dup($key) ; 599 %list = $X->get_dup($key, 1) ; 600 $status = $X->find_dup($key, $value) ; 601 $status = $X->del_dup($key, $value) ; 602 603 # RECNO only 604 $a = $X->length; 605 $a = $X->pop ; 606 $X->push(list); 607 $a = $X->shift; 608 $X->unshift(list); 609 @r = $X->splice(offset, length, elements); 610 611 # DBM Filters 612 $old_filter = $db->filter_store_key ( sub { ... } ) ; 613 $old_filter = $db->filter_store_value( sub { ... } ) ; 614 $old_filter = $db->filter_fetch_key ( sub { ... } ) ; 615 $old_filter = $db->filter_fetch_value( sub { ... } ) ; 616 617 untie %hash ; 618 untie @array ; 619 620=head1 DESCRIPTION 621 622B<DB_File> is a module which allows Perl programs to make use of the 623facilities provided by Berkeley DB version 1.x (if you have a newer 624version of DB, see L<Using DB_File with Berkeley DB version 2 or greater>). 625It is assumed that you have a copy of the Berkeley DB manual pages at 626hand when reading this documentation. The interface defined here 627mirrors the Berkeley DB interface closely. 628 629Berkeley DB is a C library which provides a consistent interface to a 630number of database formats. B<DB_File> provides an interface to all 631three of the database types currently supported by Berkeley DB. 632 633The file types are: 634 635=over 5 636 637=item B<DB_HASH> 638 639This database type allows arbitrary key/value pairs to be stored in data 640files. This is equivalent to the functionality provided by other 641hashing packages like DBM, NDBM, ODBM, GDBM, and SDBM. Remember though, 642the files created using DB_HASH are not compatible with any of the 643other packages mentioned. 644 645A default hashing algorithm, which will be adequate for most 646applications, is built into Berkeley DB. If you do need to use your own 647hashing algorithm it is possible to write your own in Perl and have 648B<DB_File> use it instead. 649 650=item B<DB_BTREE> 651 652The btree format allows arbitrary key/value pairs to be stored in a 653sorted, balanced binary tree. 654 655As with the DB_HASH format, it is possible to provide a user defined 656Perl routine to perform the comparison of keys. By default, though, the 657keys are stored in lexical order. 658 659=item B<DB_RECNO> 660 661DB_RECNO allows both fixed-length and variable-length flat text files 662to be manipulated using the same key/value pair interface as in DB_HASH 663and DB_BTREE. In this case the key will consist of a record (line) 664number. 665 666=back 667 668=head2 Using DB_File with Berkeley DB version 2 or greater 669 670Although B<DB_File> is intended to be used with Berkeley DB version 1, 671it can also be used with version 2, 3 or 4. In this case the interface is 672limited to the functionality provided by Berkeley DB 1.x. Anywhere the 673version 2 or greater interface differs, B<DB_File> arranges for it to work 674like version 1. This feature allows B<DB_File> scripts that were built 675with version 1 to be migrated to version 2 or greater without any changes. 676 677If you want to make use of the new features available in Berkeley DB 6782.x or greater, use the Perl module B<BerkeleyDB> instead. 679 680B<Note:> The database file format has changed multiple times in Berkeley 681DB version 2, 3 and 4. If you cannot recreate your databases, you 682must dump any existing databases with either the C<db_dump> or the 683C<db_dump185> utility that comes with Berkeley DB. 684Once you have rebuilt DB_File to use Berkeley DB version 2 or greater, 685your databases can be recreated using C<db_load>. Refer to the Berkeley DB 686documentation for further details. 687 688Please read L<"COPYRIGHT"> before using version 2.x or greater of Berkeley 689DB with DB_File. 690 691=head2 Interface to Berkeley DB 692 693B<DB_File> allows access to Berkeley DB files using the tie() mechanism 694in Perl 5 (for full details, see L<perlfunc/tie()>). This facility 695allows B<DB_File> to access Berkeley DB files using either an 696associative array (for DB_HASH & DB_BTREE file types) or an ordinary 697array (for the DB_RECNO file type). 698 699In addition to the tie() interface, it is also possible to access most 700of the functions provided in the Berkeley DB API directly. 701See L<THE API INTERFACE>. 702 703=head2 Opening a Berkeley DB Database File 704 705Berkeley DB uses the function dbopen() to open or create a database. 706Here is the C prototype for dbopen(): 707 708 DB* 709 dbopen (const char * file, int flags, int mode, 710 DBTYPE type, const void * openinfo) 711 712The parameter C<type> is an enumeration which specifies which of the 3 713interface methods (DB_HASH, DB_BTREE or DB_RECNO) is to be used. 714Depending on which of these is actually chosen, the final parameter, 715I<openinfo> points to a data structure which allows tailoring of the 716specific interface method. 717 718This interface is handled slightly differently in B<DB_File>. Here is 719an equivalent call using B<DB_File>: 720 721 tie %array, 'DB_File', $filename, $flags, $mode, $DB_HASH ; 722 723The C<filename>, C<flags> and C<mode> parameters are the direct 724equivalent of their dbopen() counterparts. The final parameter $DB_HASH 725performs the function of both the C<type> and C<openinfo> parameters in 726dbopen(). 727 728In the example above $DB_HASH is actually a pre-defined reference to a 729hash object. B<DB_File> has three of these pre-defined references. 730Apart from $DB_HASH, there is also $DB_BTREE and $DB_RECNO. 731 732The keys allowed in each of these pre-defined references is limited to 733the names used in the equivalent C structure. So, for example, the 734$DB_HASH reference will only allow keys called C<bsize>, C<cachesize>, 735C<ffactor>, C<hash>, C<lorder> and C<nelem>. 736 737To change one of these elements, just assign to it like this: 738 739 $DB_HASH->{'cachesize'} = 10000 ; 740 741The three predefined variables $DB_HASH, $DB_BTREE and $DB_RECNO are 742usually adequate for most applications. If you do need to create extra 743instances of these objects, constructors are available for each file 744type. 745 746Here are examples of the constructors and the valid options available 747for DB_HASH, DB_BTREE and DB_RECNO respectively. 748 749 $a = new DB_File::HASHINFO ; 750 $a->{'bsize'} ; 751 $a->{'cachesize'} ; 752 $a->{'ffactor'}; 753 $a->{'hash'} ; 754 $a->{'lorder'} ; 755 $a->{'nelem'} ; 756 757 $b = new DB_File::BTREEINFO ; 758 $b->{'flags'} ; 759 $b->{'cachesize'} ; 760 $b->{'maxkeypage'} ; 761 $b->{'minkeypage'} ; 762 $b->{'psize'} ; 763 $b->{'compare'} ; 764 $b->{'prefix'} ; 765 $b->{'lorder'} ; 766 767 $c = new DB_File::RECNOINFO ; 768 $c->{'bval'} ; 769 $c->{'cachesize'} ; 770 $c->{'psize'} ; 771 $c->{'flags'} ; 772 $c->{'lorder'} ; 773 $c->{'reclen'} ; 774 $c->{'bfname'} ; 775 776The values stored in the hashes above are mostly the direct equivalent 777of their C counterpart. Like their C counterparts, all are set to a 778default values - that means you don't have to set I<all> of the 779values when you only want to change one. Here is an example: 780 781 $a = new DB_File::HASHINFO ; 782 $a->{'cachesize'} = 12345 ; 783 tie %y, 'DB_File', "filename", $flags, 0777, $a ; 784 785A few of the options need extra discussion here. When used, the C 786equivalent of the keys C<hash>, C<compare> and C<prefix> store pointers 787to C functions. In B<DB_File> these keys are used to store references 788to Perl subs. Below are templates for each of the subs: 789 790 sub hash 791 { 792 my ($data) = @_ ; 793 ... 794 # return the hash value for $data 795 return $hash ; 796 } 797 798 sub compare 799 { 800 my ($key, $key2) = @_ ; 801 ... 802 # return 0 if $key1 eq $key2 803 # -1 if $key1 lt $key2 804 # 1 if $key1 gt $key2 805 return (-1 , 0 or 1) ; 806 } 807 808 sub prefix 809 { 810 my ($key, $key2) = @_ ; 811 ... 812 # return number of bytes of $key2 which are 813 # necessary to determine that it is greater than $key1 814 return $bytes ; 815 } 816 817See L<Changing the BTREE sort order> for an example of using the 818C<compare> template. 819 820If you are using the DB_RECNO interface and you intend making use of 821C<bval>, you should check out L<The 'bval' Option>. 822 823=head2 Default Parameters 824 825It is possible to omit some or all of the final 4 parameters in the 826call to C<tie> and let them take default values. As DB_HASH is the most 827common file format used, the call: 828 829 tie %A, "DB_File", "filename" ; 830 831is equivalent to: 832 833 tie %A, "DB_File", "filename", O_CREAT|O_RDWR, 0666, $DB_HASH ; 834 835It is also possible to omit the filename parameter as well, so the 836call: 837 838 tie %A, "DB_File" ; 839 840is equivalent to: 841 842 tie %A, "DB_File", undef, O_CREAT|O_RDWR, 0666, $DB_HASH ; 843 844See L<In Memory Databases> for a discussion on the use of C<undef> 845in place of a filename. 846 847=head2 In Memory Databases 848 849Berkeley DB allows the creation of in-memory databases by using NULL 850(that is, a C<(char *)0> in C) in place of the filename. B<DB_File> 851uses C<undef> instead of NULL to provide this functionality. 852 853=head1 DB_HASH 854 855The DB_HASH file format is probably the most commonly used of the three 856file formats that B<DB_File> supports. It is also very straightforward 857to use. 858 859=head2 A Simple Example 860 861This example shows how to create a database, add key/value pairs to the 862database, delete keys/value pairs and finally how to enumerate the 863contents of the database. 864 865 use warnings ; 866 use strict ; 867 use DB_File ; 868 our (%h, $k, $v) ; 869 870 unlink "fruit" ; 871 tie %h, "DB_File", "fruit", O_RDWR|O_CREAT, 0666, $DB_HASH 872 or die "Cannot open file 'fruit': $!\n"; 873 874 # Add a few key/value pairs to the file 875 $h{"apple"} = "red" ; 876 $h{"orange"} = "orange" ; 877 $h{"banana"} = "yellow" ; 878 $h{"tomato"} = "red" ; 879 880 # Check for existence of a key 881 print "Banana Exists\n\n" if $h{"banana"} ; 882 883 # Delete a key/value pair. 884 delete $h{"apple"} ; 885 886 # print the contents of the file 887 while (($k, $v) = each %h) 888 { print "$k -> $v\n" } 889 890 untie %h ; 891 892here is the output: 893 894 Banana Exists 895 896 orange -> orange 897 tomato -> red 898 banana -> yellow 899 900Note that the like ordinary associative arrays, the order of the keys 901retrieved is in an apparently random order. 902 903=head1 DB_BTREE 904 905The DB_BTREE format is useful when you want to store data in a given 906order. By default the keys will be stored in lexical order, but as you 907will see from the example shown in the next section, it is very easy to 908define your own sorting function. 909 910=head2 Changing the BTREE sort order 911 912This script shows how to override the default sorting algorithm that 913BTREE uses. Instead of using the normal lexical ordering, a case 914insensitive compare function will be used. 915 916 use warnings ; 917 use strict ; 918 use DB_File ; 919 920 my %h ; 921 922 sub Compare 923 { 924 my ($key1, $key2) = @_ ; 925 "\L$key1" cmp "\L$key2" ; 926 } 927 928 # specify the Perl sub that will do the comparison 929 $DB_BTREE->{'compare'} = \&Compare ; 930 931 unlink "tree" ; 932 tie %h, "DB_File", "tree", O_RDWR|O_CREAT, 0666, $DB_BTREE 933 or die "Cannot open file 'tree': $!\n" ; 934 935 # Add a key/value pair to the file 936 $h{'Wall'} = 'Larry' ; 937 $h{'Smith'} = 'John' ; 938 $h{'mouse'} = 'mickey' ; 939 $h{'duck'} = 'donald' ; 940 941 # Delete 942 delete $h{"duck"} ; 943 944 # Cycle through the keys printing them in order. 945 # Note it is not necessary to sort the keys as 946 # the btree will have kept them in order automatically. 947 foreach (keys %h) 948 { print "$_\n" } 949 950 untie %h ; 951 952Here is the output from the code above. 953 954 mouse 955 Smith 956 Wall 957 958There are a few point to bear in mind if you want to change the 959ordering in a BTREE database: 960 961=over 5 962 963=item 1. 964 965The new compare function must be specified when you create the database. 966 967=item 2. 968 969You cannot change the ordering once the database has been created. Thus 970you must use the same compare function every time you access the 971database. 972 973=item 3 974 975Duplicate keys are entirely defined by the comparison function. 976In the case-insensitive example above, the keys: 'KEY' and 'key' 977would be considered duplicates, and assigning to the second one 978would overwrite the first. If duplicates are allowed for (with the 979R_DUP flag discussed below), only a single copy of duplicate keys 980is stored in the database --- so (again with example above) assigning 981three values to the keys: 'KEY', 'Key', and 'key' would leave just 982the first key: 'KEY' in the database with three values. For some 983situations this results in information loss, so care should be taken 984to provide fully qualified comparison functions when necessary. 985For example, the above comparison routine could be modified to 986additionally compare case-sensitively if two keys are equal in the 987case insensitive comparison: 988 989 sub compare { 990 my($key1, $key2) = @_; 991 lc $key1 cmp lc $key2 || 992 $key1 cmp $key2; 993 } 994 995And now you will only have duplicates when the keys themselves 996are truly the same. (note: in versions of the db library prior to 997about November 1996, such duplicate keys were retained so it was 998possible to recover the original keys in sets of keys that 999compared as equal). 1000 1001 1002=back 1003 1004=head2 Handling Duplicate Keys 1005 1006The BTREE file type optionally allows a single key to be associated 1007with an arbitrary number of values. This option is enabled by setting 1008the flags element of C<$DB_BTREE> to R_DUP when creating the database. 1009 1010There are some difficulties in using the tied hash interface if you 1011want to manipulate a BTREE database with duplicate keys. Consider this 1012code: 1013 1014 use warnings ; 1015 use strict ; 1016 use DB_File ; 1017 1018 my ($filename, %h) ; 1019 1020 $filename = "tree" ; 1021 unlink $filename ; 1022 1023 # Enable duplicate records 1024 $DB_BTREE->{'flags'} = R_DUP ; 1025 1026 tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE 1027 or die "Cannot open $filename: $!\n"; 1028 1029 # Add some key/value pairs to the file 1030 $h{'Wall'} = 'Larry' ; 1031 $h{'Wall'} = 'Brick' ; # Note the duplicate key 1032 $h{'Wall'} = 'Brick' ; # Note the duplicate key and value 1033 $h{'Smith'} = 'John' ; 1034 $h{'mouse'} = 'mickey' ; 1035 1036 # iterate through the associative array 1037 # and print each key/value pair. 1038 foreach (sort keys %h) 1039 { print "$_ -> $h{$_}\n" } 1040 1041 untie %h ; 1042 1043Here is the output: 1044 1045 Smith -> John 1046 Wall -> Larry 1047 Wall -> Larry 1048 Wall -> Larry 1049 mouse -> mickey 1050 1051As you can see 3 records have been successfully created with key C<Wall> 1052- the only thing is, when they are retrieved from the database they 1053I<seem> to have the same value, namely C<Larry>. The problem is caused 1054by the way that the associative array interface works. Basically, when 1055the associative array interface is used to fetch the value associated 1056with a given key, it will only ever retrieve the first value. 1057 1058Although it may not be immediately obvious from the code above, the 1059associative array interface can be used to write values with duplicate 1060keys, but it cannot be used to read them back from the database. 1061 1062The way to get around this problem is to use the Berkeley DB API method 1063called C<seq>. This method allows sequential access to key/value 1064pairs. See L<THE API INTERFACE> for details of both the C<seq> method 1065and the API in general. 1066 1067Here is the script above rewritten using the C<seq> API method. 1068 1069 use warnings ; 1070 use strict ; 1071 use DB_File ; 1072 1073 my ($filename, $x, %h, $status, $key, $value) ; 1074 1075 $filename = "tree" ; 1076 unlink $filename ; 1077 1078 # Enable duplicate records 1079 $DB_BTREE->{'flags'} = R_DUP ; 1080 1081 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE 1082 or die "Cannot open $filename: $!\n"; 1083 1084 # Add some key/value pairs to the file 1085 $h{'Wall'} = 'Larry' ; 1086 $h{'Wall'} = 'Brick' ; # Note the duplicate key 1087 $h{'Wall'} = 'Brick' ; # Note the duplicate key and value 1088 $h{'Smith'} = 'John' ; 1089 $h{'mouse'} = 'mickey' ; 1090 1091 # iterate through the btree using seq 1092 # and print each key/value pair. 1093 $key = $value = 0 ; 1094 for ($status = $x->seq($key, $value, R_FIRST) ; 1095 $status == 0 ; 1096 $status = $x->seq($key, $value, R_NEXT) ) 1097 { print "$key -> $value\n" } 1098 1099 undef $x ; 1100 untie %h ; 1101 1102that prints: 1103 1104 Smith -> John 1105 Wall -> Brick 1106 Wall -> Brick 1107 Wall -> Larry 1108 mouse -> mickey 1109 1110This time we have got all the key/value pairs, including the multiple 1111values associated with the key C<Wall>. 1112 1113To make life easier when dealing with duplicate keys, B<DB_File> comes with 1114a few utility methods. 1115 1116=head2 The get_dup() Method 1117 1118The C<get_dup> method assists in 1119reading duplicate values from BTREE databases. The method can take the 1120following forms: 1121 1122 $count = $x->get_dup($key) ; 1123 @list = $x->get_dup($key) ; 1124 %list = $x->get_dup($key, 1) ; 1125 1126In a scalar context the method returns the number of values associated 1127with the key, C<$key>. 1128 1129In list context, it returns all the values which match C<$key>. Note 1130that the values will be returned in an apparently random order. 1131 1132In list context, if the second parameter is present and evaluates 1133TRUE, the method returns an associative array. The keys of the 1134associative array correspond to the values that matched in the BTREE 1135and the values of the array are a count of the number of times that 1136particular value occurred in the BTREE. 1137 1138So assuming the database created above, we can use C<get_dup> like 1139this: 1140 1141 use warnings ; 1142 use strict ; 1143 use DB_File ; 1144 1145 my ($filename, $x, %h) ; 1146 1147 $filename = "tree" ; 1148 1149 # Enable duplicate records 1150 $DB_BTREE->{'flags'} = R_DUP ; 1151 1152 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE 1153 or die "Cannot open $filename: $!\n"; 1154 1155 my $cnt = $x->get_dup("Wall") ; 1156 print "Wall occurred $cnt times\n" ; 1157 1158 my %hash = $x->get_dup("Wall", 1) ; 1159 print "Larry is there\n" if $hash{'Larry'} ; 1160 print "There are $hash{'Brick'} Brick Walls\n" ; 1161 1162 my @list = sort $x->get_dup("Wall") ; 1163 print "Wall => [@list]\n" ; 1164 1165 @list = $x->get_dup("Smith") ; 1166 print "Smith => [@list]\n" ; 1167 1168 @list = $x->get_dup("Dog") ; 1169 print "Dog => [@list]\n" ; 1170 1171 1172and it will print: 1173 1174 Wall occurred 3 times 1175 Larry is there 1176 There are 2 Brick Walls 1177 Wall => [Brick Brick Larry] 1178 Smith => [John] 1179 Dog => [] 1180 1181=head2 The find_dup() Method 1182 1183 $status = $X->find_dup($key, $value) ; 1184 1185This method checks for the existence of a specific key/value pair. If the 1186pair exists, the cursor is left pointing to the pair and the method 1187returns 0. Otherwise the method returns a non-zero value. 1188 1189Assuming the database from the previous example: 1190 1191 use warnings ; 1192 use strict ; 1193 use DB_File ; 1194 1195 my ($filename, $x, %h, $found) ; 1196 1197 $filename = "tree" ; 1198 1199 # Enable duplicate records 1200 $DB_BTREE->{'flags'} = R_DUP ; 1201 1202 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE 1203 or die "Cannot open $filename: $!\n"; 1204 1205 $found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ; 1206 print "Larry Wall is $found there\n" ; 1207 1208 $found = ( $x->find_dup("Wall", "Harry") == 0 ? "" : "not") ; 1209 print "Harry Wall is $found there\n" ; 1210 1211 undef $x ; 1212 untie %h ; 1213 1214prints this 1215 1216 Larry Wall is there 1217 Harry Wall is not there 1218 1219 1220=head2 The del_dup() Method 1221 1222 $status = $X->del_dup($key, $value) ; 1223 1224This method deletes a specific key/value pair. It returns 12250 if they exist and have been deleted successfully. 1226Otherwise the method returns a non-zero value. 1227 1228Again assuming the existence of the C<tree> database 1229 1230 use warnings ; 1231 use strict ; 1232 use DB_File ; 1233 1234 my ($filename, $x, %h, $found) ; 1235 1236 $filename = "tree" ; 1237 1238 # Enable duplicate records 1239 $DB_BTREE->{'flags'} = R_DUP ; 1240 1241 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE 1242 or die "Cannot open $filename: $!\n"; 1243 1244 $x->del_dup("Wall", "Larry") ; 1245 1246 $found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ; 1247 print "Larry Wall is $found there\n" ; 1248 1249 undef $x ; 1250 untie %h ; 1251 1252prints this 1253 1254 Larry Wall is not there 1255 1256=head2 Matching Partial Keys 1257 1258The BTREE interface has a feature which allows partial keys to be 1259matched. This functionality is I<only> available when the C<seq> method 1260is used along with the R_CURSOR flag. 1261 1262 $x->seq($key, $value, R_CURSOR) ; 1263 1264Here is the relevant quote from the dbopen man page where it defines 1265the use of the R_CURSOR flag with seq: 1266 1267 Note, for the DB_BTREE access method, the returned key is not 1268 necessarily an exact match for the specified key. The returned key 1269 is the smallest key greater than or equal to the specified key, 1270 permitting partial key matches and range searches. 1271 1272In the example script below, the C<match> sub uses this feature to find 1273and print the first matching key/value pair given a partial key. 1274 1275 use warnings ; 1276 use strict ; 1277 use DB_File ; 1278 use Fcntl ; 1279 1280 my ($filename, $x, %h, $st, $key, $value) ; 1281 1282 sub match 1283 { 1284 my $key = shift ; 1285 my $value = 0; 1286 my $orig_key = $key ; 1287 $x->seq($key, $value, R_CURSOR) ; 1288 print "$orig_key\t-> $key\t-> $value\n" ; 1289 } 1290 1291 $filename = "tree" ; 1292 unlink $filename ; 1293 1294 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE 1295 or die "Cannot open $filename: $!\n"; 1296 1297 # Add some key/value pairs to the file 1298 $h{'mouse'} = 'mickey' ; 1299 $h{'Wall'} = 'Larry' ; 1300 $h{'Walls'} = 'Brick' ; 1301 $h{'Smith'} = 'John' ; 1302 1303 1304 $key = $value = 0 ; 1305 print "IN ORDER\n" ; 1306 for ($st = $x->seq($key, $value, R_FIRST) ; 1307 $st == 0 ; 1308 $st = $x->seq($key, $value, R_NEXT) ) 1309 1310 { print "$key -> $value\n" } 1311 1312 print "\nPARTIAL MATCH\n" ; 1313 1314 match "Wa" ; 1315 match "A" ; 1316 match "a" ; 1317 1318 undef $x ; 1319 untie %h ; 1320 1321Here is the output: 1322 1323 IN ORDER 1324 Smith -> John 1325 Wall -> Larry 1326 Walls -> Brick 1327 mouse -> mickey 1328 1329 PARTIAL MATCH 1330 Wa -> Wall -> Larry 1331 A -> Smith -> John 1332 a -> mouse -> mickey 1333 1334=head1 DB_RECNO 1335 1336DB_RECNO provides an interface to flat text files. Both variable and 1337fixed length records are supported. 1338 1339In order to make RECNO more compatible with Perl, the array offset for 1340all RECNO arrays begins at 0 rather than 1 as in Berkeley DB. 1341 1342As with normal Perl arrays, a RECNO array can be accessed using 1343negative indexes. The index -1 refers to the last element of the array, 1344-2 the second last, and so on. Attempting to access an element before 1345the start of the array will raise a fatal run-time error. 1346 1347=head2 The 'bval' Option 1348 1349The operation of the bval option warrants some discussion. Here is the 1350definition of bval from the Berkeley DB 1.85 recno manual page: 1351 1352 The delimiting byte to be used to mark the end of a 1353 record for variable-length records, and the pad charac- 1354 ter for fixed-length records. If no value is speci- 1355 fied, newlines (``\n'') are used to mark the end of 1356 variable-length records and fixed-length records are 1357 padded with spaces. 1358 1359The second sentence is wrong. In actual fact bval will only default to 1360C<"\n"> when the openinfo parameter in dbopen is NULL. If a non-NULL 1361openinfo parameter is used at all, the value that happens to be in bval 1362will be used. That means you always have to specify bval when making 1363use of any of the options in the openinfo parameter. This documentation 1364error will be fixed in the next release of Berkeley DB. 1365 1366That clarifies the situation with regards Berkeley DB itself. What 1367about B<DB_File>? Well, the behavior defined in the quote above is 1368quite useful, so B<DB_File> conforms to it. 1369 1370That means that you can specify other options (e.g. cachesize) and 1371still have bval default to C<"\n"> for variable length records, and 1372space for fixed length records. 1373 1374Also note that the bval option only allows you to specify a single byte 1375as a delimiter. 1376 1377=head2 A Simple Example 1378 1379Here is a simple example that uses RECNO (if you are using a version 1380of Perl earlier than 5.004_57 this example won't work -- see 1381L<Extra RECNO Methods> for a workaround). 1382 1383 use warnings ; 1384 use strict ; 1385 use DB_File ; 1386 1387 my $filename = "text" ; 1388 unlink $filename ; 1389 1390 my @h ; 1391 tie @h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_RECNO 1392 or die "Cannot open file 'text': $!\n" ; 1393 1394 # Add a few key/value pairs to the file 1395 $h[0] = "orange" ; 1396 $h[1] = "blue" ; 1397 $h[2] = "yellow" ; 1398 1399 push @h, "green", "black" ; 1400 1401 my $elements = scalar @h ; 1402 print "The array contains $elements entries\n" ; 1403 1404 my $last = pop @h ; 1405 print "popped $last\n" ; 1406 1407 unshift @h, "white" ; 1408 my $first = shift @h ; 1409 print "shifted $first\n" ; 1410 1411 # Check for existence of a key 1412 print "Element 1 Exists with value $h[1]\n" if $h[1] ; 1413 1414 # use a negative index 1415 print "The last element is $h[-1]\n" ; 1416 print "The 2nd last element is $h[-2]\n" ; 1417 1418 untie @h ; 1419 1420Here is the output from the script: 1421 1422 The array contains 5 entries 1423 popped black 1424 shifted white 1425 Element 1 Exists with value blue 1426 The last element is green 1427 The 2nd last element is yellow 1428 1429=head2 Extra RECNO Methods 1430 1431If you are using a version of Perl earlier than 5.004_57, the tied 1432array interface is quite limited. In the example script above 1433C<push>, C<pop>, C<shift>, C<unshift> 1434or determining the array length will not work with a tied array. 1435 1436To make the interface more useful for older versions of Perl, a number 1437of methods are supplied with B<DB_File> to simulate the missing array 1438operations. All these methods are accessed via the object returned from 1439the tie call. 1440 1441Here are the methods: 1442 1443=over 5 1444 1445=item B<$X-E<gt>push(list) ;> 1446 1447Pushes the elements of C<list> to the end of the array. 1448 1449=item B<$value = $X-E<gt>pop ;> 1450 1451Removes and returns the last element of the array. 1452 1453=item B<$X-E<gt>shift> 1454 1455Removes and returns the first element of the array. 1456 1457=item B<$X-E<gt>unshift(list) ;> 1458 1459Pushes the elements of C<list> to the start of the array. 1460 1461=item B<$X-E<gt>length> 1462 1463Returns the number of elements in the array. 1464 1465=item B<$X-E<gt>splice(offset, length, elements);> 1466 1467Returns a splice of the array. 1468 1469=back 1470 1471=head2 Another Example 1472 1473Here is a more complete example that makes use of some of the methods 1474described above. It also makes use of the API interface directly (see 1475L<THE API INTERFACE>). 1476 1477 use warnings ; 1478 use strict ; 1479 my (@h, $H, $file, $i) ; 1480 use DB_File ; 1481 use Fcntl ; 1482 1483 $file = "text" ; 1484 1485 unlink $file ; 1486 1487 $H = tie @h, "DB_File", $file, O_RDWR|O_CREAT, 0666, $DB_RECNO 1488 or die "Cannot open file $file: $!\n" ; 1489 1490 # first create a text file to play with 1491 $h[0] = "zero" ; 1492 $h[1] = "one" ; 1493 $h[2] = "two" ; 1494 $h[3] = "three" ; 1495 $h[4] = "four" ; 1496 1497 1498 # Print the records in order. 1499 # 1500 # The length method is needed here because evaluating a tied 1501 # array in a scalar context does not return the number of 1502 # elements in the array. 1503 1504 print "\nORIGINAL\n" ; 1505 foreach $i (0 .. $H->length - 1) { 1506 print "$i: $h[$i]\n" ; 1507 } 1508 1509 # use the push & pop methods 1510 $a = $H->pop ; 1511 $H->push("last") ; 1512 print "\nThe last record was [$a]\n" ; 1513 1514 # and the shift & unshift methods 1515 $a = $H->shift ; 1516 $H->unshift("first") ; 1517 print "The first record was [$a]\n" ; 1518 1519 # Use the API to add a new record after record 2. 1520 $i = 2 ; 1521 $H->put($i, "Newbie", R_IAFTER) ; 1522 1523 # and a new record before record 1. 1524 $i = 1 ; 1525 $H->put($i, "New One", R_IBEFORE) ; 1526 1527 # delete record 3 1528 $H->del(3) ; 1529 1530 # now print the records in reverse order 1531 print "\nREVERSE\n" ; 1532 for ($i = $H->length - 1 ; $i >= 0 ; -- $i) 1533 { print "$i: $h[$i]\n" } 1534 1535 # same again, but use the API functions instead 1536 print "\nREVERSE again\n" ; 1537 my ($s, $k, $v) = (0, 0, 0) ; 1538 for ($s = $H->seq($k, $v, R_LAST) ; 1539 $s == 0 ; 1540 $s = $H->seq($k, $v, R_PREV)) 1541 { print "$k: $v\n" } 1542 1543 undef $H ; 1544 untie @h ; 1545 1546and this is what it outputs: 1547 1548 ORIGINAL 1549 0: zero 1550 1: one 1551 2: two 1552 3: three 1553 4: four 1554 1555 The last record was [four] 1556 The first record was [zero] 1557 1558 REVERSE 1559 5: last 1560 4: three 1561 3: Newbie 1562 2: one 1563 1: New One 1564 0: first 1565 1566 REVERSE again 1567 5: last 1568 4: three 1569 3: Newbie 1570 2: one 1571 1: New One 1572 0: first 1573 1574Notes: 1575 1576=over 5 1577 1578=item 1. 1579 1580Rather than iterating through the array, C<@h> like this: 1581 1582 foreach $i (@h) 1583 1584it is necessary to use either this: 1585 1586 foreach $i (0 .. $H->length - 1) 1587 1588or this: 1589 1590 for ($a = $H->get($k, $v, R_FIRST) ; 1591 $a == 0 ; 1592 $a = $H->get($k, $v, R_NEXT) ) 1593 1594=item 2. 1595 1596Notice that both times the C<put> method was used the record index was 1597specified using a variable, C<$i>, rather than the literal value 1598itself. This is because C<put> will return the record number of the 1599inserted line via that parameter. 1600 1601=back 1602 1603=head1 THE API INTERFACE 1604 1605As well as accessing Berkeley DB using a tied hash or array, it is also 1606possible to make direct use of most of the API functions defined in the 1607Berkeley DB documentation. 1608 1609To do this you need to store a copy of the object returned from the tie. 1610 1611 $db = tie %hash, "DB_File", "filename" ; 1612 1613Once you have done that, you can access the Berkeley DB API functions 1614as B<DB_File> methods directly like this: 1615 1616 $db->put($key, $value, R_NOOVERWRITE) ; 1617 1618B<Important:> If you have saved a copy of the object returned from 1619C<tie>, the underlying database file will I<not> be closed until both 1620the tied variable is untied and all copies of the saved object are 1621destroyed. 1622 1623 use DB_File ; 1624 $db = tie %hash, "DB_File", "filename" 1625 or die "Cannot tie filename: $!" ; 1626 ... 1627 undef $db ; 1628 untie %hash ; 1629 1630See L<The untie() Gotcha> for more details. 1631 1632All the functions defined in L<dbopen> are available except for 1633close() and dbopen() itself. The B<DB_File> method interface to the 1634supported functions have been implemented to mirror the way Berkeley DB 1635works whenever possible. In particular note that: 1636 1637=over 5 1638 1639=item * 1640 1641The methods return a status value. All return 0 on success. 1642All return -1 to signify an error and set C<$!> to the exact 1643error code. The return code 1 generally (but not always) means that the 1644key specified did not exist in the database. 1645 1646Other return codes are defined. See below and in the Berkeley DB 1647documentation for details. The Berkeley DB documentation should be used 1648as the definitive source. 1649 1650=item * 1651 1652Whenever a Berkeley DB function returns data via one of its parameters, 1653the equivalent B<DB_File> method does exactly the same. 1654 1655=item * 1656 1657If you are careful, it is possible to mix API calls with the tied 1658hash/array interface in the same piece of code. Although only a few of 1659the methods used to implement the tied interface currently make use of 1660the cursor, you should always assume that the cursor has been changed 1661any time the tied hash/array interface is used. As an example, this 1662code will probably not do what you expect: 1663 1664 $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE 1665 or die "Cannot tie $filename: $!" ; 1666 1667 # Get the first key/value pair and set the cursor 1668 $X->seq($key, $value, R_FIRST) ; 1669 1670 # this line will modify the cursor 1671 $count = scalar keys %x ; 1672 1673 # Get the second key/value pair. 1674 # oops, it didn't, it got the last key/value pair! 1675 $X->seq($key, $value, R_NEXT) ; 1676 1677The code above can be rearranged to get around the problem, like this: 1678 1679 $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE 1680 or die "Cannot tie $filename: $!" ; 1681 1682 # this line will modify the cursor 1683 $count = scalar keys %x ; 1684 1685 # Get the first key/value pair and set the cursor 1686 $X->seq($key, $value, R_FIRST) ; 1687 1688 # Get the second key/value pair. 1689 # worked this time. 1690 $X->seq($key, $value, R_NEXT) ; 1691 1692=back 1693 1694All the constants defined in L<dbopen> for use in the flags parameters 1695in the methods defined below are also available. Refer to the Berkeley 1696DB documentation for the precise meaning of the flags values. 1697 1698Below is a list of the methods available. 1699 1700=over 5 1701 1702=item B<$status = $X-E<gt>get($key, $value [, $flags]) ;> 1703 1704Given a key (C<$key>) this method reads the value associated with it 1705from the database. The value read from the database is returned in the 1706C<$value> parameter. 1707 1708If the key does not exist the method returns 1. 1709 1710No flags are currently defined for this method. 1711 1712=item B<$status = $X-E<gt>put($key, $value [, $flags]) ;> 1713 1714Stores the key/value pair in the database. 1715 1716If you use either the R_IAFTER or R_IBEFORE flags, the C<$key> parameter 1717will have the record number of the inserted key/value pair set. 1718 1719Valid flags are R_CURSOR, R_IAFTER, R_IBEFORE, R_NOOVERWRITE and 1720R_SETCURSOR. 1721 1722=item B<$status = $X-E<gt>del($key [, $flags]) ;> 1723 1724Removes all key/value pairs with key C<$key> from the database. 1725 1726A return code of 1 means that the requested key was not in the 1727database. 1728 1729R_CURSOR is the only valid flag at present. 1730 1731=item B<$status = $X-E<gt>fd ;> 1732 1733Returns the file descriptor for the underlying database. 1734 1735See L<Locking: The Trouble with fd> for an explanation for why you should 1736not use C<fd> to lock your database. 1737 1738=item B<$status = $X-E<gt>seq($key, $value, $flags) ;> 1739 1740This interface allows sequential retrieval from the database. See 1741L<dbopen> for full details. 1742 1743Both the C<$key> and C<$value> parameters will be set to the key/value 1744pair read from the database. 1745 1746The flags parameter is mandatory. The valid flag values are R_CURSOR, 1747R_FIRST, R_LAST, R_NEXT and R_PREV. 1748 1749=item B<$status = $X-E<gt>sync([$flags]) ;> 1750 1751Flushes any cached buffers to disk. 1752 1753R_RECNOSYNC is the only valid flag at present. 1754 1755=back 1756 1757=head1 DBM FILTERS 1758 1759A DBM Filter is a piece of code that is be used when you I<always> 1760want to make the same transformation to all keys and/or values in a 1761DBM database. 1762 1763There are four methods associated with DBM Filters. All work identically, 1764and each is used to install (or uninstall) a single DBM Filter. Each 1765expects a single parameter, namely a reference to a sub. The only 1766difference between them is the place that the filter is installed. 1767 1768To summarise: 1769 1770=over 5 1771 1772=item B<filter_store_key> 1773 1774If a filter has been installed with this method, it will be invoked 1775every time you write a key to a DBM database. 1776 1777=item B<filter_store_value> 1778 1779If a filter has been installed with this method, it will be invoked 1780every time you write a value to a DBM database. 1781 1782 1783=item B<filter_fetch_key> 1784 1785If a filter has been installed with this method, it will be invoked 1786every time you read a key from a DBM database. 1787 1788=item B<filter_fetch_value> 1789 1790If a filter has been installed with this method, it will be invoked 1791every time you read a value from a DBM database. 1792 1793=back 1794 1795You can use any combination of the methods, from none, to all four. 1796 1797All filter methods return the existing filter, if present, or C<undef> 1798in not. 1799 1800To delete a filter pass C<undef> to it. 1801 1802=head2 The Filter 1803 1804When each filter is called by Perl, a local copy of C<$_> will contain 1805the key or value to be filtered. Filtering is achieved by modifying 1806the contents of C<$_>. The return code from the filter is ignored. 1807 1808=head2 An Example -- the NULL termination problem. 1809 1810Consider the following scenario. You have a DBM database 1811that you need to share with a third-party C application. The C application 1812assumes that I<all> keys and values are NULL terminated. Unfortunately 1813when Perl writes to DBM databases it doesn't use NULL termination, so 1814your Perl application will have to manage NULL termination itself. When 1815you write to the database you will have to use something like this: 1816 1817 $hash{"$key\0"} = "$value\0" ; 1818 1819Similarly the NULL needs to be taken into account when you are considering 1820the length of existing keys/values. 1821 1822It would be much better if you could ignore the NULL terminations issue 1823in the main application code and have a mechanism that automatically 1824added the terminating NULL to all keys and values whenever you write to 1825the database and have them removed when you read from the database. As I'm 1826sure you have already guessed, this is a problem that DBM Filters can 1827fix very easily. 1828 1829 use warnings ; 1830 use strict ; 1831 use DB_File ; 1832 1833 my %hash ; 1834 my $filename = "filt" ; 1835 unlink $filename ; 1836 1837 my $db = tie %hash, 'DB_File', $filename, O_CREAT|O_RDWR, 0666, $DB_HASH 1838 or die "Cannot open $filename: $!\n" ; 1839 1840 # Install DBM Filters 1841 $db->filter_fetch_key ( sub { s/\0$// } ) ; 1842 $db->filter_store_key ( sub { $_ .= "\0" } ) ; 1843 $db->filter_fetch_value( sub { s/\0$// } ) ; 1844 $db->filter_store_value( sub { $_ .= "\0" } ) ; 1845 1846 $hash{"abc"} = "def" ; 1847 my $a = $hash{"ABC"} ; 1848 # ... 1849 undef $db ; 1850 untie %hash ; 1851 1852Hopefully the contents of each of the filters should be 1853self-explanatory. Both "fetch" filters remove the terminating NULL, 1854and both "store" filters add a terminating NULL. 1855 1856 1857=head2 Another Example -- Key is a C int. 1858 1859Here is another real-life example. By default, whenever Perl writes to 1860a DBM database it always writes the key and value as strings. So when 1861you use this: 1862 1863 $hash{12345} = "something" ; 1864 1865the key 12345 will get stored in the DBM database as the 5 byte string 1866"12345". If you actually want the key to be stored in the DBM database 1867as a C int, you will have to use C<pack> when writing, and C<unpack> 1868when reading. 1869 1870Here is a DBM Filter that does it: 1871 1872 use warnings ; 1873 use strict ; 1874 use DB_File ; 1875 my %hash ; 1876 my $filename = "filt" ; 1877 unlink $filename ; 1878 1879 1880 my $db = tie %hash, 'DB_File', $filename, O_CREAT|O_RDWR, 0666, $DB_HASH 1881 or die "Cannot open $filename: $!\n" ; 1882 1883 $db->filter_fetch_key ( sub { $_ = unpack("i", $_) } ) ; 1884 $db->filter_store_key ( sub { $_ = pack ("i", $_) } ) ; 1885 $hash{123} = "def" ; 1886 # ... 1887 undef $db ; 1888 untie %hash ; 1889 1890This time only two filters have been used -- we only need to manipulate 1891the contents of the key, so it wasn't necessary to install any value 1892filters. 1893 1894=head1 HINTS AND TIPS 1895 1896 1897=head2 Locking: The Trouble with fd 1898 1899Until version 1.72 of this module, the recommended technique for locking 1900B<DB_File> databases was to flock the filehandle returned from the "fd" 1901function. Unfortunately this technique has been shown to be fundamentally 1902flawed (Kudos to David Harris for tracking this down). Use it at your own 1903peril! 1904 1905The locking technique went like this. 1906 1907 $db = tie(%db, 'DB_File', 'foo.db', O_CREAT|O_RDWR, 0644) 1908 || die "dbcreat foo.db $!"; 1909 $fd = $db->fd; 1910 open(DB_FH, "+<&=$fd") || die "dup $!"; 1911 flock (DB_FH, LOCK_EX) || die "flock: $!"; 1912 ... 1913 $db{"Tom"} = "Jerry" ; 1914 ... 1915 flock(DB_FH, LOCK_UN); 1916 undef $db; 1917 untie %db; 1918 close(DB_FH); 1919 1920In simple terms, this is what happens: 1921 1922=over 5 1923 1924=item 1. 1925 1926Use "tie" to open the database. 1927 1928=item 2. 1929 1930Lock the database with fd & flock. 1931 1932=item 3. 1933 1934Read & Write to the database. 1935 1936=item 4. 1937 1938Unlock and close the database. 1939 1940=back 1941 1942Here is the crux of the problem. A side-effect of opening the B<DB_File> 1943database in step 2 is that an initial block from the database will get 1944read from disk and cached in memory. 1945 1946To see why this is a problem, consider what can happen when two processes, 1947say "A" and "B", both want to update the same B<DB_File> database 1948using the locking steps outlined above. Assume process "A" has already 1949opened the database and has a write lock, but it hasn't actually updated 1950the database yet (it has finished step 2, but not started step 3 yet). Now 1951process "B" tries to open the same database - step 1 will succeed, 1952but it will block on step 2 until process "A" releases the lock. The 1953important thing to notice here is that at this point in time both 1954processes will have cached identical initial blocks from the database. 1955 1956Now process "A" updates the database and happens to change some of the 1957data held in the initial buffer. Process "A" terminates, flushing 1958all cached data to disk and releasing the database lock. At this point 1959the database on disk will correctly reflect the changes made by process 1960"A". 1961 1962With the lock released, process "B" can now continue. It also updates the 1963database and unfortunately it too modifies the data that was in its 1964initial buffer. Once that data gets flushed to disk it will overwrite 1965some/all of the changes process "A" made to the database. 1966 1967The result of this scenario is at best a database that doesn't contain 1968what you expect. At worst the database will corrupt. 1969 1970The above won't happen every time competing process update the same 1971B<DB_File> database, but it does illustrate why the technique should 1972not be used. 1973 1974=head2 Safe ways to lock a database 1975 1976Starting with version 2.x, Berkeley DB has internal support for locking. 1977The companion module to this one, B<BerkeleyDB>, provides an interface 1978to this locking functionality. If you are serious about locking 1979Berkeley DB databases, I strongly recommend using B<BerkeleyDB>. 1980 1981If using B<BerkeleyDB> isn't an option, there are a number of modules 1982available on CPAN that can be used to implement locking. Each one 1983implements locking differently and has different goals in mind. It is 1984therefore worth knowing the difference, so that you can pick the right 1985one for your application. Here are the three locking wrappers: 1986 1987=over 5 1988 1989=item B<Tie::DB_Lock> 1990 1991A B<DB_File> wrapper which creates copies of the database file for 1992read access, so that you have a kind of a multiversioning concurrent read 1993system. However, updates are still serial. Use for databases where reads 1994may be lengthy and consistency problems may occur. 1995 1996=item B<Tie::DB_LockFile> 1997 1998A B<DB_File> wrapper that has the ability to lock and unlock the database 1999while it is being used. Avoids the tie-before-flock problem by simply 2000re-tie-ing the database when you get or drop a lock. Because of the 2001flexibility in dropping and re-acquiring the lock in the middle of a 2002session, this can be massaged into a system that will work with long 2003updates and/or reads if the application follows the hints in the POD 2004documentation. 2005 2006=item B<DB_File::Lock> 2007 2008An extremely lightweight B<DB_File> wrapper that simply flocks a lockfile 2009before tie-ing the database and drops the lock after the untie. Allows 2010one to use the same lockfile for multiple databases to avoid deadlock 2011problems, if desired. Use for databases where updates are reads are 2012quick and simple flock locking semantics are enough. 2013 2014=back 2015 2016=head2 Sharing Databases With C Applications 2017 2018There is no technical reason why a Berkeley DB database cannot be 2019shared by both a Perl and a C application. 2020 2021The vast majority of problems that are reported in this area boil down 2022to the fact that C strings are NULL terminated, whilst Perl strings are 2023not. See L<DBM FILTERS> for a generic way to work around this problem. 2024 2025Here is a real example. Netscape 2.0 keeps a record of the locations you 2026visit along with the time you last visited them in a DB_HASH database. 2027This is usually stored in the file F<~/.netscape/history.db>. The key 2028field in the database is the location string and the value field is the 2029time the location was last visited stored as a 4 byte binary value. 2030 2031If you haven't already guessed, the location string is stored with a 2032terminating NULL. This means you need to be careful when accessing the 2033database. 2034 2035Here is a snippet of code that is loosely based on Tom Christiansen's 2036I<ggh> script (available from your nearest CPAN archive in 2037F<authors/id/TOMC/scripts/nshist.gz>). 2038 2039 use warnings ; 2040 use strict ; 2041 use DB_File ; 2042 use Fcntl ; 2043 2044 my ($dotdir, $HISTORY, %hist_db, $href, $binary_time, $date) ; 2045 $dotdir = $ENV{HOME} || $ENV{LOGNAME}; 2046 2047 $HISTORY = "$dotdir/.netscape/history.db"; 2048 2049 tie %hist_db, 'DB_File', $HISTORY 2050 or die "Cannot open $HISTORY: $!\n" ;; 2051 2052 # Dump the complete database 2053 while ( ($href, $binary_time) = each %hist_db ) { 2054 2055 # remove the terminating NULL 2056 $href =~ s/\x00$// ; 2057 2058 # convert the binary time into a user friendly string 2059 $date = localtime unpack("V", $binary_time); 2060 print "$date $href\n" ; 2061 } 2062 2063 # check for the existence of a specific key 2064 # remember to add the NULL 2065 if ( $binary_time = $hist_db{"http://mox.perl.com/\x00"} ) { 2066 $date = localtime unpack("V", $binary_time) ; 2067 print "Last visited mox.perl.com on $date\n" ; 2068 } 2069 else { 2070 print "Never visited mox.perl.com\n" 2071 } 2072 2073 untie %hist_db ; 2074 2075=head2 The untie() Gotcha 2076 2077If you make use of the Berkeley DB API, it is I<very> strongly 2078recommended that you read L<perltie/The untie Gotcha>. 2079 2080Even if you don't currently make use of the API interface, it is still 2081worth reading it. 2082 2083Here is an example which illustrates the problem from a B<DB_File> 2084perspective: 2085 2086 use DB_File ; 2087 use Fcntl ; 2088 2089 my %x ; 2090 my $X ; 2091 2092 $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_TRUNC 2093 or die "Cannot tie first time: $!" ; 2094 2095 $x{123} = 456 ; 2096 2097 untie %x ; 2098 2099 tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT 2100 or die "Cannot tie second time: $!" ; 2101 2102 untie %x ; 2103 2104When run, the script will produce this error message: 2105 2106 Cannot tie second time: Invalid argument at bad.file line 14. 2107 2108Although the error message above refers to the second tie() statement 2109in the script, the source of the problem is really with the untie() 2110statement that precedes it. 2111 2112Having read L<perltie> you will probably have already guessed that the 2113error is caused by the extra copy of the tied object stored in C<$X>. 2114If you haven't, then the problem boils down to the fact that the 2115B<DB_File> destructor, DESTROY, will not be called until I<all> 2116references to the tied object are destroyed. Both the tied variable, 2117C<%x>, and C<$X> above hold a reference to the object. The call to 2118untie() will destroy the first, but C<$X> still holds a valid 2119reference, so the destructor will not get called and the database file 2120F<tst.fil> will remain open. The fact that Berkeley DB then reports the 2121attempt to open a database that is already open via the catch-all 2122"Invalid argument" doesn't help. 2123 2124If you run the script with the C<-w> flag the error message becomes: 2125 2126 untie attempted while 1 inner references still exist at bad.file line 12. 2127 Cannot tie second time: Invalid argument at bad.file line 14. 2128 2129which pinpoints the real problem. Finally the script can now be 2130modified to fix the original problem by destroying the API object 2131before the untie: 2132 2133 ... 2134 $x{123} = 456 ; 2135 2136 undef $X ; 2137 untie %x ; 2138 2139 $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT 2140 ... 2141 2142 2143=head1 COMMON QUESTIONS 2144 2145=head2 Why is there Perl source in my database? 2146 2147If you look at the contents of a database file created by DB_File, 2148there can sometimes be part of a Perl script included in it. 2149 2150This happens because Berkeley DB uses dynamic memory to allocate 2151buffers which will subsequently be written to the database file. Being 2152dynamic, the memory could have been used for anything before DB 2153malloced it. As Berkeley DB doesn't clear the memory once it has been 2154allocated, the unused portions will contain random junk. In the case 2155where a Perl script gets written to the database, the random junk will 2156correspond to an area of dynamic memory that happened to be used during 2157the compilation of the script. 2158 2159Unless you don't like the possibility of there being part of your Perl 2160scripts embedded in a database file, this is nothing to worry about. 2161 2162=head2 How do I store complex data structures with DB_File? 2163 2164Although B<DB_File> cannot do this directly, there is a module which 2165can layer transparently over B<DB_File> to accomplish this feat. 2166 2167Check out the MLDBM module, available on CPAN in the directory 2168F<modules/by-module/MLDBM>. 2169 2170=head2 What does "Invalid Argument" mean? 2171 2172You will get this error message when one of the parameters in the 2173C<tie> call is wrong. Unfortunately there are quite a few parameters to 2174get wrong, so it can be difficult to figure out which one it is. 2175 2176Here are a couple of possibilities: 2177 2178=over 5 2179 2180=item 1. 2181 2182Attempting to reopen a database without closing it. 2183 2184=item 2. 2185 2186Using the O_WRONLY flag. 2187 2188=back 2189 2190=head2 What does "Bareword 'DB_File' not allowed" mean? 2191 2192You will encounter this particular error message when you have the 2193C<strict 'subs'> pragma (or the full strict pragma) in your script. 2194Consider this script: 2195 2196 use warnings ; 2197 use strict ; 2198 use DB_File ; 2199 my %x ; 2200 tie %x, DB_File, "filename" ; 2201 2202Running it produces the error in question: 2203 2204 Bareword "DB_File" not allowed while "strict subs" in use 2205 2206To get around the error, place the word C<DB_File> in either single or 2207double quotes, like this: 2208 2209 tie %x, "DB_File", "filename" ; 2210 2211Although it might seem like a real pain, it is really worth the effort 2212of having a C<use strict> in all your scripts. 2213 2214=head1 REFERENCES 2215 2216Articles that are either about B<DB_File> or make use of it. 2217 2218=over 5 2219 2220=item 1. 2221 2222I<Full-Text Searching in Perl>, Tim Kientzle (tkientzle@ddj.com), 2223Dr. Dobb's Journal, Issue 295, January 1999, pp 34-41 2224 2225=back 2226 2227=head1 HISTORY 2228 2229Moved to the Changes file. 2230 2231=head1 BUGS 2232 2233Some older versions of Berkeley DB had problems with fixed length 2234records using the RECNO file format. This problem has been fixed since 2235version 1.85 of Berkeley DB. 2236 2237I am sure there are bugs in the code. If you do find any, or can 2238suggest any enhancements, I would welcome your comments. 2239 2240=head1 AVAILABILITY 2241 2242B<DB_File> comes with the standard Perl source distribution. Look in 2243the directory F<ext/DB_File>. Given the amount of time between releases 2244of Perl the version that ships with Perl is quite likely to be out of 2245date, so the most recent version can always be found on CPAN (see 2246L<perlmodlib/CPAN> for details), in the directory 2247F<modules/by-module/DB_File>. 2248 2249This version of B<DB_File> will work with either version 1.x, 2.x or 22503.x of Berkeley DB, but is limited to the functionality provided by 2251version 1. 2252 2253The official web site for Berkeley DB is F<http://www.oracle.com/technology/products/berkeley-db/db/index.html>. 2254All versions of Berkeley DB are available there. 2255 2256Alternatively, Berkeley DB version 1 is available at your nearest CPAN 2257archive in F<src/misc/db.1.85.tar.gz>. 2258 2259If you are running IRIX, then get Berkeley DB version 1 from 2260F<http://reality.sgi.com/ariel>. It has the patches necessary to 2261compile properly on IRIX 5.3. 2262 2263=head1 COPYRIGHT 2264 2265Copyright (c) 1995-2007 Paul Marquess. All rights reserved. This program 2266is free software; you can redistribute it and/or modify it under the 2267same terms as Perl itself. 2268 2269Although B<DB_File> is covered by the Perl license, the library it 2270makes use of, namely Berkeley DB, is not. Berkeley DB has its own 2271copyright and its own license. Please take the time to read it. 2272 2273Here are are few words taken from the Berkeley DB FAQ (at 2274F<http://www.oracle.com/technology/products/berkeley-db/db/index.html>) regarding the license: 2275 2276 Do I have to license DB to use it in Perl scripts? 2277 2278 No. The Berkeley DB license requires that software that uses 2279 Berkeley DB be freely redistributable. In the case of Perl, that 2280 software is Perl, and not your scripts. Any Perl scripts that you 2281 write are your property, including scripts that make use of 2282 Berkeley DB. Neither the Perl license nor the Berkeley DB license 2283 place any restriction on what you may do with them. 2284 2285If you are in any doubt about the license situation, contact either the 2286Berkeley DB authors or the author of DB_File. See L<"AUTHOR"> for details. 2287 2288 2289=head1 SEE ALSO 2290 2291L<perl>, L<dbopen(3)>, L<hash(3)>, L<recno(3)>, L<btree(3)>, 2292L<perldbmfilter> 2293 2294=head1 AUTHOR 2295 2296The DB_File interface was written by Paul Marquess 2297E<lt>pmqs@cpan.orgE<gt>. 2298 2299=cut 2300