1# DB_File.pm -- Perl 5 interface to Berkeley DB 2# 3# Written by Paul Marquess (pmqs@cpan.org) 4# 5# Copyright (c) 1995-2013 Paul Marquess. All rights reserved. 6# This program is free software; you can redistribute it and/or 7# modify it under the same terms as Perl itself. 8 9 10package DB_File::HASHINFO ; 11 12require 5.00504; 13 14use warnings; 15use strict; 16use Carp; 17require Tie::Hash; 18@DB_File::HASHINFO::ISA = qw(Tie::Hash); 19 20sub new 21{ 22 my $pkg = shift ; 23 my %x ; 24 tie %x, $pkg ; 25 bless \%x, $pkg ; 26} 27 28 29sub TIEHASH 30{ 31 my $pkg = shift ; 32 33 bless { VALID => { 34 bsize => 1, 35 ffactor => 1, 36 nelem => 1, 37 cachesize => 1, 38 hash => 2, 39 lorder => 1, 40 }, 41 GOT => {} 42 }, $pkg ; 43} 44 45 46sub FETCH 47{ 48 my $self = shift ; 49 my $key = shift ; 50 51 return $self->{GOT}{$key} if exists $self->{VALID}{$key} ; 52 53 my $pkg = ref $self ; 54 croak "${pkg}::FETCH - Unknown element '$key'" ; 55} 56 57 58sub STORE 59{ 60 my $self = shift ; 61 my $key = shift ; 62 my $value = shift ; 63 64 my $type = $self->{VALID}{$key}; 65 66 if ( $type ) 67 { 68 croak "Key '$key' not associated with a code reference" 69 if $type == 2 && !ref $value && ref $value ne 'CODE'; 70 $self->{GOT}{$key} = $value ; 71 return ; 72 } 73 74 my $pkg = ref $self ; 75 croak "${pkg}::STORE - Unknown element '$key'" ; 76} 77 78sub DELETE 79{ 80 my $self = shift ; 81 my $key = shift ; 82 83 if ( exists $self->{VALID}{$key} ) 84 { 85 delete $self->{GOT}{$key} ; 86 return ; 87 } 88 89 my $pkg = ref $self ; 90 croak "DB_File::HASHINFO::DELETE - Unknown element '$key'" ; 91} 92 93sub EXISTS 94{ 95 my $self = shift ; 96 my $key = shift ; 97 98 exists $self->{VALID}{$key} ; 99} 100 101sub NotHere 102{ 103 my $self = shift ; 104 my $method = shift ; 105 106 croak ref($self) . " does not define the method ${method}" ; 107} 108 109sub FIRSTKEY { my $self = shift ; $self->NotHere("FIRSTKEY") } 110sub NEXTKEY { my $self = shift ; $self->NotHere("NEXTKEY") } 111sub CLEAR { my $self = shift ; $self->NotHere("CLEAR") } 112 113package DB_File::RECNOINFO ; 114 115use warnings; 116use strict ; 117 118@DB_File::RECNOINFO::ISA = qw(DB_File::HASHINFO) ; 119 120sub TIEHASH 121{ 122 my $pkg = shift ; 123 124 bless { VALID => { map {$_, 1} 125 qw( bval cachesize psize flags lorder reclen bfname ) 126 }, 127 GOT => {}, 128 }, $pkg ; 129} 130 131package DB_File::BTREEINFO ; 132 133use warnings; 134use strict ; 135 136@DB_File::BTREEINFO::ISA = qw(DB_File::HASHINFO) ; 137 138sub TIEHASH 139{ 140 my $pkg = shift ; 141 142 bless { VALID => { 143 flags => 1, 144 cachesize => 1, 145 maxkeypage => 1, 146 minkeypage => 1, 147 psize => 1, 148 compare => 2, 149 prefix => 2, 150 lorder => 1, 151 }, 152 GOT => {}, 153 }, $pkg ; 154} 155 156 157package DB_File ; 158 159use warnings; 160use strict; 161our ($VERSION, @ISA, @EXPORT, $AUTOLOAD, $DB_BTREE, $DB_HASH, $DB_RECNO); 162our ($db_version, $use_XSLoader, $splice_end_array_no_length, $splice_end_array, $Error); 163use Carp; 164 165 166$VERSION = "1.831" ; 167$VERSION = eval $VERSION; # needed for dev releases 168 169{ 170 local $SIG{__WARN__} = sub {$splice_end_array_no_length = join(" ",@_);}; 171 my @a =(1); splice(@a, 3); 172 $splice_end_array_no_length = 173 ($splice_end_array_no_length =~ /^splice\(\) offset past end of array at /); 174} 175{ 176 local $SIG{__WARN__} = sub {$splice_end_array = join(" ", @_);}; 177 my @a =(1); splice(@a, 3, 1); 178 $splice_end_array = 179 ($splice_end_array =~ /^splice\(\) offset past end of array at /); 180} 181 182#typedef enum { DB_BTREE, DB_HASH, DB_RECNO } DBTYPE; 183$DB_BTREE = new DB_File::BTREEINFO ; 184$DB_HASH = new DB_File::HASHINFO ; 185$DB_RECNO = new DB_File::RECNOINFO ; 186 187require Tie::Hash; 188require Exporter; 189BEGIN { 190 $use_XSLoader = 1 ; 191 { local $SIG{__DIE__} ; eval { require XSLoader } ; } 192 193 if ($@) { 194 $use_XSLoader = 0 ; 195 require DynaLoader; 196 @ISA = qw(DynaLoader); 197 } 198} 199 200push @ISA, qw(Tie::Hash Exporter); 201@EXPORT = qw( 202 $DB_BTREE $DB_HASH $DB_RECNO 203 204 BTREEMAGIC 205 BTREEVERSION 206 DB_LOCK 207 DB_SHMEM 208 DB_TXN 209 HASHMAGIC 210 HASHVERSION 211 MAX_PAGE_NUMBER 212 MAX_PAGE_OFFSET 213 MAX_REC_NUMBER 214 RET_ERROR 215 RET_SPECIAL 216 RET_SUCCESS 217 R_CURSOR 218 R_DUP 219 R_FIRST 220 R_FIXEDLEN 221 R_IAFTER 222 R_IBEFORE 223 R_LAST 224 R_NEXT 225 R_NOKEY 226 R_NOOVERWRITE 227 R_PREV 228 R_RECNOSYNC 229 R_SETCURSOR 230 R_SNAPSHOT 231 __R_UNUSED 232 233); 234 235sub AUTOLOAD { 236 my($constname); 237 ($constname = $AUTOLOAD) =~ s/.*:://; 238 my ($error, $val) = constant($constname); 239 Carp::croak $error if $error; 240 no strict 'refs'; 241 *{$AUTOLOAD} = sub { $val }; 242 goto &{$AUTOLOAD}; 243} 244 245 246eval { 247 # Make all Fcntl O_XXX constants available for importing 248 require Fcntl; 249 my @O = grep /^O_/, @Fcntl::EXPORT; 250 Fcntl->import(@O); # first we import what we want to export 251 push(@EXPORT, @O); 252}; 253 254if ($use_XSLoader) 255 { XSLoader::load("DB_File", $VERSION)} 256else 257 { bootstrap DB_File $VERSION } 258 259sub tie_hash_or_array 260{ 261 my (@arg) = @_ ; 262 my $tieHASH = ( (caller(1))[3] =~ /TIEHASH/ ) ; 263 264 use File::Spec; 265 $arg[1] = File::Spec->rel2abs($arg[1]) 266 if defined $arg[1] ; 267 268 $arg[4] = tied %{ $arg[4] } 269 if @arg >= 5 && ref $arg[4] && $arg[4] =~ /=HASH/ && tied %{ $arg[4] } ; 270 271 $arg[2] = O_CREAT()|O_RDWR() if @arg >=3 && ! defined $arg[2]; 272 $arg[3] = 0666 if @arg >=4 && ! defined $arg[3]; 273 274 # make recno in Berkeley DB version 2 (or better) work like 275 # recno in version 1. 276 if ($db_version >= 4 and ! $tieHASH) { 277 $arg[2] |= O_CREAT(); 278 } 279 280 if ($db_version > 1 and defined $arg[4] and $arg[4] =~ /RECNO/ and 281 $arg[1] and ! -e $arg[1]) { 282 open(FH, ">$arg[1]") or return undef ; 283 close FH ; 284 chmod $arg[3] ? $arg[3] : 0666 , $arg[1] ; 285 } 286 287 DoTie_($tieHASH, @arg) ; 288} 289 290sub TIEHASH 291{ 292 tie_hash_or_array(@_) ; 293} 294 295sub TIEARRAY 296{ 297 tie_hash_or_array(@_) ; 298} 299 300sub CLEAR 301{ 302 my $self = shift; 303 my $key = 0 ; 304 my $value = "" ; 305 my $status = $self->seq($key, $value, R_FIRST()); 306 my @keys; 307 308 while ($status == 0) { 309 push @keys, $key; 310 $status = $self->seq($key, $value, R_NEXT()); 311 } 312 foreach $key (reverse @keys) { 313 my $s = $self->del($key); 314 } 315} 316 317sub EXTEND { } 318 319sub STORESIZE 320{ 321 my $self = shift; 322 my $length = shift ; 323 my $current_length = $self->length() ; 324 325 if ($length < $current_length) { 326 my $key ; 327 for ($key = $current_length - 1 ; $key >= $length ; -- $key) 328 { $self->del($key) } 329 } 330 elsif ($length > $current_length) { 331 $self->put($length-1, "") ; 332 } 333} 334 335 336sub SPLICE 337{ 338 my $self = shift; 339 my $offset = shift; 340 if (not defined $offset) { 341 warnings::warnif('uninitialized', 'Use of uninitialized value in splice'); 342 $offset = 0; 343 } 344 345 my $has_length = @_; 346 my $length = @_ ? shift : 0; 347 # Carping about definedness comes _after_ the OFFSET sanity check. 348 # This is so we get the same error messages as Perl's splice(). 349 # 350 351 my @list = @_; 352 353 my $size = $self->FETCHSIZE(); 354 355 # 'If OFFSET is negative then it start that far from the end of 356 # the array.' 357 # 358 if ($offset < 0) { 359 my $new_offset = $size + $offset; 360 if ($new_offset < 0) { 361 die "Modification of non-creatable array value attempted, " 362 . "subscript $offset"; 363 } 364 $offset = $new_offset; 365 } 366 367 if (not defined $length) { 368 warnings::warnif('uninitialized', 'Use of uninitialized value in splice'); 369 $length = 0; 370 } 371 372 if ($offset > $size) { 373 $offset = $size; 374 warnings::warnif('misc', 'splice() offset past end of array') 375 if $has_length ? $splice_end_array : $splice_end_array_no_length; 376 } 377 378 # 'If LENGTH is omitted, removes everything from OFFSET onward.' 379 if (not defined $length) { 380 $length = $size - $offset; 381 } 382 383 # 'If LENGTH is negative, leave that many elements off the end of 384 # the array.' 385 # 386 if ($length < 0) { 387 $length = $size - $offset + $length; 388 389 if ($length < 0) { 390 # The user must have specified a length bigger than the 391 # length of the array passed in. But perl's splice() 392 # doesn't catch this, it just behaves as for length=0. 393 # 394 $length = 0; 395 } 396 } 397 398 if ($length > $size - $offset) { 399 $length = $size - $offset; 400 } 401 402 # $num_elems holds the current number of elements in the database. 403 my $num_elems = $size; 404 405 # 'Removes the elements designated by OFFSET and LENGTH from an 406 # array,'... 407 # 408 my @removed = (); 409 foreach (0 .. $length - 1) { 410 my $old; 411 my $status = $self->get($offset, $old); 412 if ($status != 0) { 413 my $msg = "error from Berkeley DB on get($offset, \$old)"; 414 if ($status == 1) { 415 $msg .= ' (no such element?)'; 416 } 417 else { 418 $msg .= ": error status $status"; 419 if (defined $! and $! ne '') { 420 $msg .= ", message $!"; 421 } 422 } 423 die $msg; 424 } 425 push @removed, $old; 426 427 $status = $self->del($offset); 428 if ($status != 0) { 429 my $msg = "error from Berkeley DB on del($offset)"; 430 if ($status == 1) { 431 $msg .= ' (no such element?)'; 432 } 433 else { 434 $msg .= ": error status $status"; 435 if (defined $! and $! ne '') { 436 $msg .= ", message $!"; 437 } 438 } 439 die $msg; 440 } 441 442 -- $num_elems; 443 } 444 445 # ...'and replaces them with the elements of LIST, if any.' 446 my $pos = $offset; 447 while (defined (my $elem = shift @list)) { 448 my $old_pos = $pos; 449 my $status; 450 if ($pos >= $num_elems) { 451 $status = $self->put($pos, $elem); 452 } 453 else { 454 $status = $self->put($pos, $elem, $self->R_IBEFORE); 455 } 456 457 if ($status != 0) { 458 my $msg = "error from Berkeley DB on put($pos, $elem, ...)"; 459 if ($status == 1) { 460 $msg .= ' (no such element?)'; 461 } 462 else { 463 $msg .= ", error status $status"; 464 if (defined $! and $! ne '') { 465 $msg .= ", message $!"; 466 } 467 } 468 die $msg; 469 } 470 471 die "pos unexpectedly changed from $old_pos to $pos with R_IBEFORE" 472 if $old_pos != $pos; 473 474 ++ $pos; 475 ++ $num_elems; 476 } 477 478 if (wantarray) { 479 # 'In list context, returns the elements removed from the 480 # array.' 481 # 482 return @removed; 483 } 484 elsif (defined wantarray and not wantarray) { 485 # 'In scalar context, returns the last element removed, or 486 # undef if no elements are removed.' 487 # 488 if (@removed) { 489 my $last = pop @removed; 490 return "$last"; 491 } 492 else { 493 return undef; 494 } 495 } 496 elsif (not defined wantarray) { 497 # Void context 498 } 499 else { die } 500} 501sub ::DB_File::splice { &SPLICE } 502 503sub find_dup 504{ 505 croak "Usage: \$db->find_dup(key,value)\n" 506 unless @_ == 3 ; 507 508 my $db = shift ; 509 my ($origkey, $value_wanted) = @_ ; 510 my ($key, $value) = ($origkey, 0); 511 my ($status) = 0 ; 512 513 for ($status = $db->seq($key, $value, R_CURSOR() ) ; 514 $status == 0 ; 515 $status = $db->seq($key, $value, R_NEXT() ) ) { 516 517 return 0 if $key eq $origkey and $value eq $value_wanted ; 518 } 519 520 return $status ; 521} 522 523sub del_dup 524{ 525 croak "Usage: \$db->del_dup(key,value)\n" 526 unless @_ == 3 ; 527 528 my $db = shift ; 529 my ($key, $value) = @_ ; 530 my ($status) = $db->find_dup($key, $value) ; 531 return $status if $status != 0 ; 532 533 $status = $db->del($key, R_CURSOR() ) ; 534 return $status ; 535} 536 537sub get_dup 538{ 539 croak "Usage: \$db->get_dup(key [,flag])\n" 540 unless @_ == 2 or @_ == 3 ; 541 542 my $db = shift ; 543 my $key = shift ; 544 my $flag = shift ; 545 my $value = 0 ; 546 my $origkey = $key ; 547 my $wantarray = wantarray ; 548 my %values = () ; 549 my @values = () ; 550 my $counter = 0 ; 551 my $status = 0 ; 552 553 # iterate through the database until either EOF ($status == 0) 554 # or a different key is encountered ($key ne $origkey). 555 for ($status = $db->seq($key, $value, R_CURSOR()) ; 556 $status == 0 and $key eq $origkey ; 557 $status = $db->seq($key, $value, R_NEXT()) ) { 558 559 # save the value or count number of matches 560 if ($wantarray) { 561 if ($flag) 562 { ++ $values{$value} } 563 else 564 { push (@values, $value) } 565 } 566 else 567 { ++ $counter } 568 569 } 570 571 return ($wantarray ? ($flag ? %values : @values) : $counter) ; 572} 573 574 575sub STORABLE_freeze 576{ 577 my $type = ref shift; 578 croak "Cannot freeze $type object\n"; 579} 580 581sub STORABLE_thaw 582{ 583 my $type = ref shift; 584 croak "Cannot thaw $type object\n"; 585} 586 587 588 5891; 590__END__ 591 592=head1 NAME 593 594DB_File - Perl5 access to Berkeley DB version 1.x 595 596=head1 SYNOPSIS 597 598 use DB_File; 599 600 [$X =] tie %hash, 'DB_File', [$filename, $flags, $mode, $DB_HASH] ; 601 [$X =] tie %hash, 'DB_File', $filename, $flags, $mode, $DB_BTREE ; 602 [$X =] tie @array, 'DB_File', $filename, $flags, $mode, $DB_RECNO ; 603 604 $status = $X->del($key [, $flags]) ; 605 $status = $X->put($key, $value [, $flags]) ; 606 $status = $X->get($key, $value [, $flags]) ; 607 $status = $X->seq($key, $value, $flags) ; 608 $status = $X->sync([$flags]) ; 609 $status = $X->fd ; 610 611 # BTREE only 612 $count = $X->get_dup($key) ; 613 @list = $X->get_dup($key) ; 614 %list = $X->get_dup($key, 1) ; 615 $status = $X->find_dup($key, $value) ; 616 $status = $X->del_dup($key, $value) ; 617 618 # RECNO only 619 $a = $X->length; 620 $a = $X->pop ; 621 $X->push(list); 622 $a = $X->shift; 623 $X->unshift(list); 624 @r = $X->splice(offset, length, elements); 625 626 # DBM Filters 627 $old_filter = $db->filter_store_key ( sub { ... } ) ; 628 $old_filter = $db->filter_store_value( sub { ... } ) ; 629 $old_filter = $db->filter_fetch_key ( sub { ... } ) ; 630 $old_filter = $db->filter_fetch_value( sub { ... } ) ; 631 632 untie %hash ; 633 untie @array ; 634 635=head1 DESCRIPTION 636 637B<DB_File> is a module which allows Perl programs to make use of the 638facilities provided by Berkeley DB version 1.x (if you have a newer 639version of DB, see L<Using DB_File with Berkeley DB version 2 or greater>). 640It is assumed that you have a copy of the Berkeley DB manual pages at 641hand when reading this documentation. The interface defined here 642mirrors the Berkeley DB interface closely. 643 644Berkeley DB is a C library which provides a consistent interface to a 645number of database formats. B<DB_File> provides an interface to all 646three of the database types currently supported by Berkeley DB. 647 648The file types are: 649 650=over 5 651 652=item B<DB_HASH> 653 654This database type allows arbitrary key/value pairs to be stored in data 655files. This is equivalent to the functionality provided by other 656hashing packages like DBM, NDBM, ODBM, GDBM, and SDBM. Remember though, 657the files created using DB_HASH are not compatible with any of the 658other packages mentioned. 659 660A default hashing algorithm, which will be adequate for most 661applications, is built into Berkeley DB. If you do need to use your own 662hashing algorithm it is possible to write your own in Perl and have 663B<DB_File> use it instead. 664 665=item B<DB_BTREE> 666 667The btree format allows arbitrary key/value pairs to be stored in a 668sorted, balanced binary tree. 669 670As with the DB_HASH format, it is possible to provide a user defined 671Perl routine to perform the comparison of keys. By default, though, the 672keys are stored in lexical order. 673 674=item B<DB_RECNO> 675 676DB_RECNO allows both fixed-length and variable-length flat text files 677to be manipulated using the same key/value pair interface as in DB_HASH 678and DB_BTREE. In this case the key will consist of a record (line) 679number. 680 681=back 682 683=head2 Using DB_File with Berkeley DB version 2 or greater 684 685Although B<DB_File> is intended to be used with Berkeley DB version 1, 686it can also be used with version 2, 3 or 4. In this case the interface is 687limited to the functionality provided by Berkeley DB 1.x. Anywhere the 688version 2 or greater interface differs, B<DB_File> arranges for it to work 689like version 1. This feature allows B<DB_File> scripts that were built 690with version 1 to be migrated to version 2 or greater without any changes. 691 692If you want to make use of the new features available in Berkeley DB 6932.x or greater, use the Perl module B<BerkeleyDB> instead. 694 695B<Note:> The database file format has changed multiple times in Berkeley 696DB version 2, 3 and 4. If you cannot recreate your databases, you 697must dump any existing databases with either the C<db_dump> or the 698C<db_dump185> utility that comes with Berkeley DB. 699Once you have rebuilt DB_File to use Berkeley DB version 2 or greater, 700your databases can be recreated using C<db_load>. Refer to the Berkeley DB 701documentation for further details. 702 703Please read L<"COPYRIGHT"> before using version 2.x or greater of Berkeley 704DB with DB_File. 705 706=head2 Interface to Berkeley DB 707 708B<DB_File> allows access to Berkeley DB files using the tie() mechanism 709in Perl 5 (for full details, see L<perlfunc/tie()>). This facility 710allows B<DB_File> to access Berkeley DB files using either an 711associative array (for DB_HASH & DB_BTREE file types) or an ordinary 712array (for the DB_RECNO file type). 713 714In addition to the tie() interface, it is also possible to access most 715of the functions provided in the Berkeley DB API directly. 716See L<THE API INTERFACE>. 717 718=head2 Opening a Berkeley DB Database File 719 720Berkeley DB uses the function dbopen() to open or create a database. 721Here is the C prototype for dbopen(): 722 723 DB* 724 dbopen (const char * file, int flags, int mode, 725 DBTYPE type, const void * openinfo) 726 727The parameter C<type> is an enumeration which specifies which of the 3 728interface methods (DB_HASH, DB_BTREE or DB_RECNO) is to be used. 729Depending on which of these is actually chosen, the final parameter, 730I<openinfo> points to a data structure which allows tailoring of the 731specific interface method. 732 733This interface is handled slightly differently in B<DB_File>. Here is 734an equivalent call using B<DB_File>: 735 736 tie %array, 'DB_File', $filename, $flags, $mode, $DB_HASH ; 737 738The C<filename>, C<flags> and C<mode> parameters are the direct 739equivalent of their dbopen() counterparts. The final parameter $DB_HASH 740performs the function of both the C<type> and C<openinfo> parameters in 741dbopen(). 742 743In the example above $DB_HASH is actually a pre-defined reference to a 744hash object. B<DB_File> has three of these pre-defined references. 745Apart from $DB_HASH, there is also $DB_BTREE and $DB_RECNO. 746 747The keys allowed in each of these pre-defined references is limited to 748the names used in the equivalent C structure. So, for example, the 749$DB_HASH reference will only allow keys called C<bsize>, C<cachesize>, 750C<ffactor>, C<hash>, C<lorder> and C<nelem>. 751 752To change one of these elements, just assign to it like this: 753 754 $DB_HASH->{'cachesize'} = 10000 ; 755 756The three predefined variables $DB_HASH, $DB_BTREE and $DB_RECNO are 757usually adequate for most applications. If you do need to create extra 758instances of these objects, constructors are available for each file 759type. 760 761Here are examples of the constructors and the valid options available 762for DB_HASH, DB_BTREE and DB_RECNO respectively. 763 764 $a = new DB_File::HASHINFO ; 765 $a->{'bsize'} ; 766 $a->{'cachesize'} ; 767 $a->{'ffactor'}; 768 $a->{'hash'} ; 769 $a->{'lorder'} ; 770 $a->{'nelem'} ; 771 772 $b = new DB_File::BTREEINFO ; 773 $b->{'flags'} ; 774 $b->{'cachesize'} ; 775 $b->{'maxkeypage'} ; 776 $b->{'minkeypage'} ; 777 $b->{'psize'} ; 778 $b->{'compare'} ; 779 $b->{'prefix'} ; 780 $b->{'lorder'} ; 781 782 $c = new DB_File::RECNOINFO ; 783 $c->{'bval'} ; 784 $c->{'cachesize'} ; 785 $c->{'psize'} ; 786 $c->{'flags'} ; 787 $c->{'lorder'} ; 788 $c->{'reclen'} ; 789 $c->{'bfname'} ; 790 791The values stored in the hashes above are mostly the direct equivalent 792of their C counterpart. Like their C counterparts, all are set to a 793default values - that means you don't have to set I<all> of the 794values when you only want to change one. Here is an example: 795 796 $a = new DB_File::HASHINFO ; 797 $a->{'cachesize'} = 12345 ; 798 tie %y, 'DB_File', "filename", $flags, 0777, $a ; 799 800A few of the options need extra discussion here. When used, the C 801equivalent of the keys C<hash>, C<compare> and C<prefix> store pointers 802to C functions. In B<DB_File> these keys are used to store references 803to Perl subs. Below are templates for each of the subs: 804 805 sub hash 806 { 807 my ($data) = @_ ; 808 ... 809 # return the hash value for $data 810 return $hash ; 811 } 812 813 sub compare 814 { 815 my ($key, $key2) = @_ ; 816 ... 817 # return 0 if $key1 eq $key2 818 # -1 if $key1 lt $key2 819 # 1 if $key1 gt $key2 820 return (-1 , 0 or 1) ; 821 } 822 823 sub prefix 824 { 825 my ($key, $key2) = @_ ; 826 ... 827 # return number of bytes of $key2 which are 828 # necessary to determine that it is greater than $key1 829 return $bytes ; 830 } 831 832See L<Changing the BTREE sort order> for an example of using the 833C<compare> template. 834 835If you are using the DB_RECNO interface and you intend making use of 836C<bval>, you should check out L<The 'bval' Option>. 837 838=head2 Default Parameters 839 840It is possible to omit some or all of the final 4 parameters in the 841call to C<tie> and let them take default values. As DB_HASH is the most 842common file format used, the call: 843 844 tie %A, "DB_File", "filename" ; 845 846is equivalent to: 847 848 tie %A, "DB_File", "filename", O_CREAT|O_RDWR, 0666, $DB_HASH ; 849 850It is also possible to omit the filename parameter as well, so the 851call: 852 853 tie %A, "DB_File" ; 854 855is equivalent to: 856 857 tie %A, "DB_File", undef, O_CREAT|O_RDWR, 0666, $DB_HASH ; 858 859See L<In Memory Databases> for a discussion on the use of C<undef> 860in place of a filename. 861 862=head2 In Memory Databases 863 864Berkeley DB allows the creation of in-memory databases by using NULL 865(that is, a C<(char *)0> in C) in place of the filename. B<DB_File> 866uses C<undef> instead of NULL to provide this functionality. 867 868=head1 DB_HASH 869 870The DB_HASH file format is probably the most commonly used of the three 871file formats that B<DB_File> supports. It is also very straightforward 872to use. 873 874=head2 A Simple Example 875 876This example shows how to create a database, add key/value pairs to the 877database, delete keys/value pairs and finally how to enumerate the 878contents of the database. 879 880 use warnings ; 881 use strict ; 882 use DB_File ; 883 our (%h, $k, $v) ; 884 885 unlink "fruit" ; 886 tie %h, "DB_File", "fruit", O_RDWR|O_CREAT, 0666, $DB_HASH 887 or die "Cannot open file 'fruit': $!\n"; 888 889 # Add a few key/value pairs to the file 890 $h{"apple"} = "red" ; 891 $h{"orange"} = "orange" ; 892 $h{"banana"} = "yellow" ; 893 $h{"tomato"} = "red" ; 894 895 # Check for existence of a key 896 print "Banana Exists\n\n" if $h{"banana"} ; 897 898 # Delete a key/value pair. 899 delete $h{"apple"} ; 900 901 # print the contents of the file 902 while (($k, $v) = each %h) 903 { print "$k -> $v\n" } 904 905 untie %h ; 906 907here is the output: 908 909 Banana Exists 910 911 orange -> orange 912 tomato -> red 913 banana -> yellow 914 915Note that the like ordinary associative arrays, the order of the keys 916retrieved is in an apparently random order. 917 918=head1 DB_BTREE 919 920The DB_BTREE format is useful when you want to store data in a given 921order. By default the keys will be stored in lexical order, but as you 922will see from the example shown in the next section, it is very easy to 923define your own sorting function. 924 925=head2 Changing the BTREE sort order 926 927This script shows how to override the default sorting algorithm that 928BTREE uses. Instead of using the normal lexical ordering, a case 929insensitive compare function will be used. 930 931 use warnings ; 932 use strict ; 933 use DB_File ; 934 935 my %h ; 936 937 sub Compare 938 { 939 my ($key1, $key2) = @_ ; 940 "\L$key1" cmp "\L$key2" ; 941 } 942 943 # specify the Perl sub that will do the comparison 944 $DB_BTREE->{'compare'} = \&Compare ; 945 946 unlink "tree" ; 947 tie %h, "DB_File", "tree", O_RDWR|O_CREAT, 0666, $DB_BTREE 948 or die "Cannot open file 'tree': $!\n" ; 949 950 # Add a key/value pair to the file 951 $h{'Wall'} = 'Larry' ; 952 $h{'Smith'} = 'John' ; 953 $h{'mouse'} = 'mickey' ; 954 $h{'duck'} = 'donald' ; 955 956 # Delete 957 delete $h{"duck"} ; 958 959 # Cycle through the keys printing them in order. 960 # Note it is not necessary to sort the keys as 961 # the btree will have kept them in order automatically. 962 foreach (keys %h) 963 { print "$_\n" } 964 965 untie %h ; 966 967Here is the output from the code above. 968 969 mouse 970 Smith 971 Wall 972 973There are a few point to bear in mind if you want to change the 974ordering in a BTREE database: 975 976=over 5 977 978=item 1. 979 980The new compare function must be specified when you create the database. 981 982=item 2. 983 984You cannot change the ordering once the database has been created. Thus 985you must use the same compare function every time you access the 986database. 987 988=item 3 989 990Duplicate keys are entirely defined by the comparison function. 991In the case-insensitive example above, the keys: 'KEY' and 'key' 992would be considered duplicates, and assigning to the second one 993would overwrite the first. If duplicates are allowed for (with the 994R_DUP flag discussed below), only a single copy of duplicate keys 995is stored in the database --- so (again with example above) assigning 996three values to the keys: 'KEY', 'Key', and 'key' would leave just 997the first key: 'KEY' in the database with three values. For some 998situations this results in information loss, so care should be taken 999to provide fully qualified comparison functions when necessary. 1000For example, the above comparison routine could be modified to 1001additionally compare case-sensitively if two keys are equal in the 1002case insensitive comparison: 1003 1004 sub compare { 1005 my($key1, $key2) = @_; 1006 lc $key1 cmp lc $key2 || 1007 $key1 cmp $key2; 1008 } 1009 1010And now you will only have duplicates when the keys themselves 1011are truly the same. (note: in versions of the db library prior to 1012about November 1996, such duplicate keys were retained so it was 1013possible to recover the original keys in sets of keys that 1014compared as equal). 1015 1016 1017=back 1018 1019=head2 Handling Duplicate Keys 1020 1021The BTREE file type optionally allows a single key to be associated 1022with an arbitrary number of values. This option is enabled by setting 1023the flags element of C<$DB_BTREE> to R_DUP when creating the database. 1024 1025There are some difficulties in using the tied hash interface if you 1026want to manipulate a BTREE database with duplicate keys. Consider this 1027code: 1028 1029 use warnings ; 1030 use strict ; 1031 use DB_File ; 1032 1033 my ($filename, %h) ; 1034 1035 $filename = "tree" ; 1036 unlink $filename ; 1037 1038 # Enable duplicate records 1039 $DB_BTREE->{'flags'} = R_DUP ; 1040 1041 tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE 1042 or die "Cannot open $filename: $!\n"; 1043 1044 # Add some key/value pairs to the file 1045 $h{'Wall'} = 'Larry' ; 1046 $h{'Wall'} = 'Brick' ; # Note the duplicate key 1047 $h{'Wall'} = 'Brick' ; # Note the duplicate key and value 1048 $h{'Smith'} = 'John' ; 1049 $h{'mouse'} = 'mickey' ; 1050 1051 # iterate through the associative array 1052 # and print each key/value pair. 1053 foreach (sort keys %h) 1054 { print "$_ -> $h{$_}\n" } 1055 1056 untie %h ; 1057 1058Here is the output: 1059 1060 Smith -> John 1061 Wall -> Larry 1062 Wall -> Larry 1063 Wall -> Larry 1064 mouse -> mickey 1065 1066As you can see 3 records have been successfully created with key C<Wall> 1067- the only thing is, when they are retrieved from the database they 1068I<seem> to have the same value, namely C<Larry>. The problem is caused 1069by the way that the associative array interface works. Basically, when 1070the associative array interface is used to fetch the value associated 1071with a given key, it will only ever retrieve the first value. 1072 1073Although it may not be immediately obvious from the code above, the 1074associative array interface can be used to write values with duplicate 1075keys, but it cannot be used to read them back from the database. 1076 1077The way to get around this problem is to use the Berkeley DB API method 1078called C<seq>. This method allows sequential access to key/value 1079pairs. See L<THE API INTERFACE> for details of both the C<seq> method 1080and the API in general. 1081 1082Here is the script above rewritten using the C<seq> API method. 1083 1084 use warnings ; 1085 use strict ; 1086 use DB_File ; 1087 1088 my ($filename, $x, %h, $status, $key, $value) ; 1089 1090 $filename = "tree" ; 1091 unlink $filename ; 1092 1093 # Enable duplicate records 1094 $DB_BTREE->{'flags'} = R_DUP ; 1095 1096 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE 1097 or die "Cannot open $filename: $!\n"; 1098 1099 # Add some key/value pairs to the file 1100 $h{'Wall'} = 'Larry' ; 1101 $h{'Wall'} = 'Brick' ; # Note the duplicate key 1102 $h{'Wall'} = 'Brick' ; # Note the duplicate key and value 1103 $h{'Smith'} = 'John' ; 1104 $h{'mouse'} = 'mickey' ; 1105 1106 # iterate through the btree using seq 1107 # and print each key/value pair. 1108 $key = $value = 0 ; 1109 for ($status = $x->seq($key, $value, R_FIRST) ; 1110 $status == 0 ; 1111 $status = $x->seq($key, $value, R_NEXT) ) 1112 { print "$key -> $value\n" } 1113 1114 undef $x ; 1115 untie %h ; 1116 1117that prints: 1118 1119 Smith -> John 1120 Wall -> Brick 1121 Wall -> Brick 1122 Wall -> Larry 1123 mouse -> mickey 1124 1125This time we have got all the key/value pairs, including the multiple 1126values associated with the key C<Wall>. 1127 1128To make life easier when dealing with duplicate keys, B<DB_File> comes with 1129a few utility methods. 1130 1131=head2 The get_dup() Method 1132 1133The C<get_dup> method assists in 1134reading duplicate values from BTREE databases. The method can take the 1135following forms: 1136 1137 $count = $x->get_dup($key) ; 1138 @list = $x->get_dup($key) ; 1139 %list = $x->get_dup($key, 1) ; 1140 1141In a scalar context the method returns the number of values associated 1142with the key, C<$key>. 1143 1144In list context, it returns all the values which match C<$key>. Note 1145that the values will be returned in an apparently random order. 1146 1147In list context, if the second parameter is present and evaluates 1148TRUE, the method returns an associative array. The keys of the 1149associative array correspond to the values that matched in the BTREE 1150and the values of the array are a count of the number of times that 1151particular value occurred in the BTREE. 1152 1153So assuming the database created above, we can use C<get_dup> like 1154this: 1155 1156 use warnings ; 1157 use strict ; 1158 use DB_File ; 1159 1160 my ($filename, $x, %h) ; 1161 1162 $filename = "tree" ; 1163 1164 # Enable duplicate records 1165 $DB_BTREE->{'flags'} = R_DUP ; 1166 1167 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE 1168 or die "Cannot open $filename: $!\n"; 1169 1170 my $cnt = $x->get_dup("Wall") ; 1171 print "Wall occurred $cnt times\n" ; 1172 1173 my %hash = $x->get_dup("Wall", 1) ; 1174 print "Larry is there\n" if $hash{'Larry'} ; 1175 print "There are $hash{'Brick'} Brick Walls\n" ; 1176 1177 my @list = sort $x->get_dup("Wall") ; 1178 print "Wall => [@list]\n" ; 1179 1180 @list = $x->get_dup("Smith") ; 1181 print "Smith => [@list]\n" ; 1182 1183 @list = $x->get_dup("Dog") ; 1184 print "Dog => [@list]\n" ; 1185 1186 1187and it will print: 1188 1189 Wall occurred 3 times 1190 Larry is there 1191 There are 2 Brick Walls 1192 Wall => [Brick Brick Larry] 1193 Smith => [John] 1194 Dog => [] 1195 1196=head2 The find_dup() Method 1197 1198 $status = $X->find_dup($key, $value) ; 1199 1200This method checks for the existence of a specific key/value pair. If the 1201pair exists, the cursor is left pointing to the pair and the method 1202returns 0. Otherwise the method returns a non-zero value. 1203 1204Assuming the database from the previous example: 1205 1206 use warnings ; 1207 use strict ; 1208 use DB_File ; 1209 1210 my ($filename, $x, %h, $found) ; 1211 1212 $filename = "tree" ; 1213 1214 # Enable duplicate records 1215 $DB_BTREE->{'flags'} = R_DUP ; 1216 1217 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE 1218 or die "Cannot open $filename: $!\n"; 1219 1220 $found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ; 1221 print "Larry Wall is $found there\n" ; 1222 1223 $found = ( $x->find_dup("Wall", "Harry") == 0 ? "" : "not") ; 1224 print "Harry Wall is $found there\n" ; 1225 1226 undef $x ; 1227 untie %h ; 1228 1229prints this 1230 1231 Larry Wall is there 1232 Harry Wall is not there 1233 1234 1235=head2 The del_dup() Method 1236 1237 $status = $X->del_dup($key, $value) ; 1238 1239This method deletes a specific key/value pair. It returns 12400 if they exist and have been deleted successfully. 1241Otherwise the method returns a non-zero value. 1242 1243Again assuming the existence of the C<tree> database 1244 1245 use warnings ; 1246 use strict ; 1247 use DB_File ; 1248 1249 my ($filename, $x, %h, $found) ; 1250 1251 $filename = "tree" ; 1252 1253 # Enable duplicate records 1254 $DB_BTREE->{'flags'} = R_DUP ; 1255 1256 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE 1257 or die "Cannot open $filename: $!\n"; 1258 1259 $x->del_dup("Wall", "Larry") ; 1260 1261 $found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ; 1262 print "Larry Wall is $found there\n" ; 1263 1264 undef $x ; 1265 untie %h ; 1266 1267prints this 1268 1269 Larry Wall is not there 1270 1271=head2 Matching Partial Keys 1272 1273The BTREE interface has a feature which allows partial keys to be 1274matched. This functionality is I<only> available when the C<seq> method 1275is used along with the R_CURSOR flag. 1276 1277 $x->seq($key, $value, R_CURSOR) ; 1278 1279Here is the relevant quote from the dbopen man page where it defines 1280the use of the R_CURSOR flag with seq: 1281 1282 Note, for the DB_BTREE access method, the returned key is not 1283 necessarily an exact match for the specified key. The returned key 1284 is the smallest key greater than or equal to the specified key, 1285 permitting partial key matches and range searches. 1286 1287In the example script below, the C<match> sub uses this feature to find 1288and print the first matching key/value pair given a partial key. 1289 1290 use warnings ; 1291 use strict ; 1292 use DB_File ; 1293 use Fcntl ; 1294 1295 my ($filename, $x, %h, $st, $key, $value) ; 1296 1297 sub match 1298 { 1299 my $key = shift ; 1300 my $value = 0; 1301 my $orig_key = $key ; 1302 $x->seq($key, $value, R_CURSOR) ; 1303 print "$orig_key\t-> $key\t-> $value\n" ; 1304 } 1305 1306 $filename = "tree" ; 1307 unlink $filename ; 1308 1309 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE 1310 or die "Cannot open $filename: $!\n"; 1311 1312 # Add some key/value pairs to the file 1313 $h{'mouse'} = 'mickey' ; 1314 $h{'Wall'} = 'Larry' ; 1315 $h{'Walls'} = 'Brick' ; 1316 $h{'Smith'} = 'John' ; 1317 1318 1319 $key = $value = 0 ; 1320 print "IN ORDER\n" ; 1321 for ($st = $x->seq($key, $value, R_FIRST) ; 1322 $st == 0 ; 1323 $st = $x->seq($key, $value, R_NEXT) ) 1324 1325 { print "$key -> $value\n" } 1326 1327 print "\nPARTIAL MATCH\n" ; 1328 1329 match "Wa" ; 1330 match "A" ; 1331 match "a" ; 1332 1333 undef $x ; 1334 untie %h ; 1335 1336Here is the output: 1337 1338 IN ORDER 1339 Smith -> John 1340 Wall -> Larry 1341 Walls -> Brick 1342 mouse -> mickey 1343 1344 PARTIAL MATCH 1345 Wa -> Wall -> Larry 1346 A -> Smith -> John 1347 a -> mouse -> mickey 1348 1349=head1 DB_RECNO 1350 1351DB_RECNO provides an interface to flat text files. Both variable and 1352fixed length records are supported. 1353 1354In order to make RECNO more compatible with Perl, the array offset for 1355all RECNO arrays begins at 0 rather than 1 as in Berkeley DB. 1356 1357As with normal Perl arrays, a RECNO array can be accessed using 1358negative indexes. The index -1 refers to the last element of the array, 1359-2 the second last, and so on. Attempting to access an element before 1360the start of the array will raise a fatal run-time error. 1361 1362=head2 The 'bval' Option 1363 1364The operation of the bval option warrants some discussion. Here is the 1365definition of bval from the Berkeley DB 1.85 recno manual page: 1366 1367 The delimiting byte to be used to mark the end of a 1368 record for variable-length records, and the pad charac- 1369 ter for fixed-length records. If no value is speci- 1370 fied, newlines (``\n'') are used to mark the end of 1371 variable-length records and fixed-length records are 1372 padded with spaces. 1373 1374The second sentence is wrong. In actual fact bval will only default to 1375C<"\n"> when the openinfo parameter in dbopen is NULL. If a non-NULL 1376openinfo parameter is used at all, the value that happens to be in bval 1377will be used. That means you always have to specify bval when making 1378use of any of the options in the openinfo parameter. This documentation 1379error will be fixed in the next release of Berkeley DB. 1380 1381That clarifies the situation with regards Berkeley DB itself. What 1382about B<DB_File>? Well, the behavior defined in the quote above is 1383quite useful, so B<DB_File> conforms to it. 1384 1385That means that you can specify other options (e.g. cachesize) and 1386still have bval default to C<"\n"> for variable length records, and 1387space for fixed length records. 1388 1389Also note that the bval option only allows you to specify a single byte 1390as a delimiter. 1391 1392=head2 A Simple Example 1393 1394Here is a simple example that uses RECNO (if you are using a version 1395of Perl earlier than 5.004_57 this example won't work -- see 1396L<Extra RECNO Methods> for a workaround). 1397 1398 use warnings ; 1399 use strict ; 1400 use DB_File ; 1401 1402 my $filename = "text" ; 1403 unlink $filename ; 1404 1405 my @h ; 1406 tie @h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_RECNO 1407 or die "Cannot open file 'text': $!\n" ; 1408 1409 # Add a few key/value pairs to the file 1410 $h[0] = "orange" ; 1411 $h[1] = "blue" ; 1412 $h[2] = "yellow" ; 1413 1414 push @h, "green", "black" ; 1415 1416 my $elements = scalar @h ; 1417 print "The array contains $elements entries\n" ; 1418 1419 my $last = pop @h ; 1420 print "popped $last\n" ; 1421 1422 unshift @h, "white" ; 1423 my $first = shift @h ; 1424 print "shifted $first\n" ; 1425 1426 # Check for existence of a key 1427 print "Element 1 Exists with value $h[1]\n" if $h[1] ; 1428 1429 # use a negative index 1430 print "The last element is $h[-1]\n" ; 1431 print "The 2nd last element is $h[-2]\n" ; 1432 1433 untie @h ; 1434 1435Here is the output from the script: 1436 1437 The array contains 5 entries 1438 popped black 1439 shifted white 1440 Element 1 Exists with value blue 1441 The last element is green 1442 The 2nd last element is yellow 1443 1444=head2 Extra RECNO Methods 1445 1446If you are using a version of Perl earlier than 5.004_57, the tied 1447array interface is quite limited. In the example script above 1448C<push>, C<pop>, C<shift>, C<unshift> 1449or determining the array length will not work with a tied array. 1450 1451To make the interface more useful for older versions of Perl, a number 1452of methods are supplied with B<DB_File> to simulate the missing array 1453operations. All these methods are accessed via the object returned from 1454the tie call. 1455 1456Here are the methods: 1457 1458=over 5 1459 1460=item B<$X-E<gt>push(list) ;> 1461 1462Pushes the elements of C<list> to the end of the array. 1463 1464=item B<$value = $X-E<gt>pop ;> 1465 1466Removes and returns the last element of the array. 1467 1468=item B<$X-E<gt>shift> 1469 1470Removes and returns the first element of the array. 1471 1472=item B<$X-E<gt>unshift(list) ;> 1473 1474Pushes the elements of C<list> to the start of the array. 1475 1476=item B<$X-E<gt>length> 1477 1478Returns the number of elements in the array. 1479 1480=item B<$X-E<gt>splice(offset, length, elements);> 1481 1482Returns a splice of the array. 1483 1484=back 1485 1486=head2 Another Example 1487 1488Here is a more complete example that makes use of some of the methods 1489described above. It also makes use of the API interface directly (see 1490L<THE API INTERFACE>). 1491 1492 use warnings ; 1493 use strict ; 1494 my (@h, $H, $file, $i) ; 1495 use DB_File ; 1496 use Fcntl ; 1497 1498 $file = "text" ; 1499 1500 unlink $file ; 1501 1502 $H = tie @h, "DB_File", $file, O_RDWR|O_CREAT, 0666, $DB_RECNO 1503 or die "Cannot open file $file: $!\n" ; 1504 1505 # first create a text file to play with 1506 $h[0] = "zero" ; 1507 $h[1] = "one" ; 1508 $h[2] = "two" ; 1509 $h[3] = "three" ; 1510 $h[4] = "four" ; 1511 1512 1513 # Print the records in order. 1514 # 1515 # The length method is needed here because evaluating a tied 1516 # array in a scalar context does not return the number of 1517 # elements in the array. 1518 1519 print "\nORIGINAL\n" ; 1520 foreach $i (0 .. $H->length - 1) { 1521 print "$i: $h[$i]\n" ; 1522 } 1523 1524 # use the push & pop methods 1525 $a = $H->pop ; 1526 $H->push("last") ; 1527 print "\nThe last record was [$a]\n" ; 1528 1529 # and the shift & unshift methods 1530 $a = $H->shift ; 1531 $H->unshift("first") ; 1532 print "The first record was [$a]\n" ; 1533 1534 # Use the API to add a new record after record 2. 1535 $i = 2 ; 1536 $H->put($i, "Newbie", R_IAFTER) ; 1537 1538 # and a new record before record 1. 1539 $i = 1 ; 1540 $H->put($i, "New One", R_IBEFORE) ; 1541 1542 # delete record 3 1543 $H->del(3) ; 1544 1545 # now print the records in reverse order 1546 print "\nREVERSE\n" ; 1547 for ($i = $H->length - 1 ; $i >= 0 ; -- $i) 1548 { print "$i: $h[$i]\n" } 1549 1550 # same again, but use the API functions instead 1551 print "\nREVERSE again\n" ; 1552 my ($s, $k, $v) = (0, 0, 0) ; 1553 for ($s = $H->seq($k, $v, R_LAST) ; 1554 $s == 0 ; 1555 $s = $H->seq($k, $v, R_PREV)) 1556 { print "$k: $v\n" } 1557 1558 undef $H ; 1559 untie @h ; 1560 1561and this is what it outputs: 1562 1563 ORIGINAL 1564 0: zero 1565 1: one 1566 2: two 1567 3: three 1568 4: four 1569 1570 The last record was [four] 1571 The first record was [zero] 1572 1573 REVERSE 1574 5: last 1575 4: three 1576 3: Newbie 1577 2: one 1578 1: New One 1579 0: first 1580 1581 REVERSE again 1582 5: last 1583 4: three 1584 3: Newbie 1585 2: one 1586 1: New One 1587 0: first 1588 1589Notes: 1590 1591=over 5 1592 1593=item 1. 1594 1595Rather than iterating through the array, C<@h> like this: 1596 1597 foreach $i (@h) 1598 1599it is necessary to use either this: 1600 1601 foreach $i (0 .. $H->length - 1) 1602 1603or this: 1604 1605 for ($a = $H->get($k, $v, R_FIRST) ; 1606 $a == 0 ; 1607 $a = $H->get($k, $v, R_NEXT) ) 1608 1609=item 2. 1610 1611Notice that both times the C<put> method was used the record index was 1612specified using a variable, C<$i>, rather than the literal value 1613itself. This is because C<put> will return the record number of the 1614inserted line via that parameter. 1615 1616=back 1617 1618=head1 THE API INTERFACE 1619 1620As well as accessing Berkeley DB using a tied hash or array, it is also 1621possible to make direct use of most of the API functions defined in the 1622Berkeley DB documentation. 1623 1624To do this you need to store a copy of the object returned from the tie. 1625 1626 $db = tie %hash, "DB_File", "filename" ; 1627 1628Once you have done that, you can access the Berkeley DB API functions 1629as B<DB_File> methods directly like this: 1630 1631 $db->put($key, $value, R_NOOVERWRITE) ; 1632 1633B<Important:> If you have saved a copy of the object returned from 1634C<tie>, the underlying database file will I<not> be closed until both 1635the tied variable is untied and all copies of the saved object are 1636destroyed. 1637 1638 use DB_File ; 1639 $db = tie %hash, "DB_File", "filename" 1640 or die "Cannot tie filename: $!" ; 1641 ... 1642 undef $db ; 1643 untie %hash ; 1644 1645See L<The untie() Gotcha> for more details. 1646 1647All the functions defined in L<dbopen> are available except for 1648close() and dbopen() itself. The B<DB_File> method interface to the 1649supported functions have been implemented to mirror the way Berkeley DB 1650works whenever possible. In particular note that: 1651 1652=over 5 1653 1654=item * 1655 1656The methods return a status value. All return 0 on success. 1657All return -1 to signify an error and set C<$!> to the exact 1658error code. The return code 1 generally (but not always) means that the 1659key specified did not exist in the database. 1660 1661Other return codes are defined. See below and in the Berkeley DB 1662documentation for details. The Berkeley DB documentation should be used 1663as the definitive source. 1664 1665=item * 1666 1667Whenever a Berkeley DB function returns data via one of its parameters, 1668the equivalent B<DB_File> method does exactly the same. 1669 1670=item * 1671 1672If you are careful, it is possible to mix API calls with the tied 1673hash/array interface in the same piece of code. Although only a few of 1674the methods used to implement the tied interface currently make use of 1675the cursor, you should always assume that the cursor has been changed 1676any time the tied hash/array interface is used. As an example, this 1677code will probably not do what you expect: 1678 1679 $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE 1680 or die "Cannot tie $filename: $!" ; 1681 1682 # Get the first key/value pair and set the cursor 1683 $X->seq($key, $value, R_FIRST) ; 1684 1685 # this line will modify the cursor 1686 $count = scalar keys %x ; 1687 1688 # Get the second key/value pair. 1689 # oops, it didn't, it got the last key/value pair! 1690 $X->seq($key, $value, R_NEXT) ; 1691 1692The code above can be rearranged to get around the problem, like this: 1693 1694 $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE 1695 or die "Cannot tie $filename: $!" ; 1696 1697 # this line will modify the cursor 1698 $count = scalar keys %x ; 1699 1700 # Get the first key/value pair and set the cursor 1701 $X->seq($key, $value, R_FIRST) ; 1702 1703 # Get the second key/value pair. 1704 # worked this time. 1705 $X->seq($key, $value, R_NEXT) ; 1706 1707=back 1708 1709All the constants defined in L<dbopen> for use in the flags parameters 1710in the methods defined below are also available. Refer to the Berkeley 1711DB documentation for the precise meaning of the flags values. 1712 1713Below is a list of the methods available. 1714 1715=over 5 1716 1717=item B<$status = $X-E<gt>get($key, $value [, $flags]) ;> 1718 1719Given a key (C<$key>) this method reads the value associated with it 1720from the database. The value read from the database is returned in the 1721C<$value> parameter. 1722 1723If the key does not exist the method returns 1. 1724 1725No flags are currently defined for this method. 1726 1727=item B<$status = $X-E<gt>put($key, $value [, $flags]) ;> 1728 1729Stores the key/value pair in the database. 1730 1731If you use either the R_IAFTER or R_IBEFORE flags, the C<$key> parameter 1732will have the record number of the inserted key/value pair set. 1733 1734Valid flags are R_CURSOR, R_IAFTER, R_IBEFORE, R_NOOVERWRITE and 1735R_SETCURSOR. 1736 1737=item B<$status = $X-E<gt>del($key [, $flags]) ;> 1738 1739Removes all key/value pairs with key C<$key> from the database. 1740 1741A return code of 1 means that the requested key was not in the 1742database. 1743 1744R_CURSOR is the only valid flag at present. 1745 1746=item B<$status = $X-E<gt>fd ;> 1747 1748Returns the file descriptor for the underlying database. 1749 1750See L<Locking: The Trouble with fd> for an explanation for why you should 1751not use C<fd> to lock your database. 1752 1753=item B<$status = $X-E<gt>seq($key, $value, $flags) ;> 1754 1755This interface allows sequential retrieval from the database. See 1756L<dbopen> for full details. 1757 1758Both the C<$key> and C<$value> parameters will be set to the key/value 1759pair read from the database. 1760 1761The flags parameter is mandatory. The valid flag values are R_CURSOR, 1762R_FIRST, R_LAST, R_NEXT and R_PREV. 1763 1764=item B<$status = $X-E<gt>sync([$flags]) ;> 1765 1766Flushes any cached buffers to disk. 1767 1768R_RECNOSYNC is the only valid flag at present. 1769 1770=back 1771 1772=head1 DBM FILTERS 1773 1774A DBM Filter is a piece of code that is be used when you I<always> 1775want to make the same transformation to all keys and/or values in a 1776DBM database. 1777 1778There are four methods associated with DBM Filters. All work identically, 1779and each is used to install (or uninstall) a single DBM Filter. Each 1780expects a single parameter, namely a reference to a sub. The only 1781difference between them is the place that the filter is installed. 1782 1783To summarise: 1784 1785=over 5 1786 1787=item B<filter_store_key> 1788 1789If a filter has been installed with this method, it will be invoked 1790every time you write a key to a DBM database. 1791 1792=item B<filter_store_value> 1793 1794If a filter has been installed with this method, it will be invoked 1795every time you write a value to a DBM database. 1796 1797 1798=item B<filter_fetch_key> 1799 1800If a filter has been installed with this method, it will be invoked 1801every time you read a key from a DBM database. 1802 1803=item B<filter_fetch_value> 1804 1805If a filter has been installed with this method, it will be invoked 1806every time you read a value from a DBM database. 1807 1808=back 1809 1810You can use any combination of the methods, from none, to all four. 1811 1812All filter methods return the existing filter, if present, or C<undef> 1813in not. 1814 1815To delete a filter pass C<undef> to it. 1816 1817=head2 The Filter 1818 1819When each filter is called by Perl, a local copy of C<$_> will contain 1820the key or value to be filtered. Filtering is achieved by modifying 1821the contents of C<$_>. The return code from the filter is ignored. 1822 1823=head2 An Example -- the NULL termination problem. 1824 1825Consider the following scenario. You have a DBM database 1826that you need to share with a third-party C application. The C application 1827assumes that I<all> keys and values are NULL terminated. Unfortunately 1828when Perl writes to DBM databases it doesn't use NULL termination, so 1829your Perl application will have to manage NULL termination itself. When 1830you write to the database you will have to use something like this: 1831 1832 $hash{"$key\0"} = "$value\0" ; 1833 1834Similarly the NULL needs to be taken into account when you are considering 1835the length of existing keys/values. 1836 1837It would be much better if you could ignore the NULL terminations issue 1838in the main application code and have a mechanism that automatically 1839added the terminating NULL to all keys and values whenever you write to 1840the database and have them removed when you read from the database. As I'm 1841sure you have already guessed, this is a problem that DBM Filters can 1842fix very easily. 1843 1844 use warnings ; 1845 use strict ; 1846 use DB_File ; 1847 1848 my %hash ; 1849 my $filename = "filt" ; 1850 unlink $filename ; 1851 1852 my $db = tie %hash, 'DB_File', $filename, O_CREAT|O_RDWR, 0666, $DB_HASH 1853 or die "Cannot open $filename: $!\n" ; 1854 1855 # Install DBM Filters 1856 $db->filter_fetch_key ( sub { s/\0$// } ) ; 1857 $db->filter_store_key ( sub { $_ .= "\0" } ) ; 1858 $db->filter_fetch_value( sub { s/\0$// } ) ; 1859 $db->filter_store_value( sub { $_ .= "\0" } ) ; 1860 1861 $hash{"abc"} = "def" ; 1862 my $a = $hash{"ABC"} ; 1863 # ... 1864 undef $db ; 1865 untie %hash ; 1866 1867Hopefully the contents of each of the filters should be 1868self-explanatory. Both "fetch" filters remove the terminating NULL, 1869and both "store" filters add a terminating NULL. 1870 1871 1872=head2 Another Example -- Key is a C int. 1873 1874Here is another real-life example. By default, whenever Perl writes to 1875a DBM database it always writes the key and value as strings. So when 1876you use this: 1877 1878 $hash{12345} = "something" ; 1879 1880the key 12345 will get stored in the DBM database as the 5 byte string 1881"12345". If you actually want the key to be stored in the DBM database 1882as a C int, you will have to use C<pack> when writing, and C<unpack> 1883when reading. 1884 1885Here is a DBM Filter that does it: 1886 1887 use warnings ; 1888 use strict ; 1889 use DB_File ; 1890 my %hash ; 1891 my $filename = "filt" ; 1892 unlink $filename ; 1893 1894 1895 my $db = tie %hash, 'DB_File', $filename, O_CREAT|O_RDWR, 0666, $DB_HASH 1896 or die "Cannot open $filename: $!\n" ; 1897 1898 $db->filter_fetch_key ( sub { $_ = unpack("i", $_) } ) ; 1899 $db->filter_store_key ( sub { $_ = pack ("i", $_) } ) ; 1900 $hash{123} = "def" ; 1901 # ... 1902 undef $db ; 1903 untie %hash ; 1904 1905This time only two filters have been used -- we only need to manipulate 1906the contents of the key, so it wasn't necessary to install any value 1907filters. 1908 1909=head1 HINTS AND TIPS 1910 1911 1912=head2 Locking: The Trouble with fd 1913 1914Until version 1.72 of this module, the recommended technique for locking 1915B<DB_File> databases was to flock the filehandle returned from the "fd" 1916function. Unfortunately this technique has been shown to be fundamentally 1917flawed (Kudos to David Harris for tracking this down). Use it at your own 1918peril! 1919 1920The locking technique went like this. 1921 1922 $db = tie(%db, 'DB_File', 'foo.db', O_CREAT|O_RDWR, 0644) 1923 || die "dbcreat foo.db $!"; 1924 $fd = $db->fd; 1925 open(DB_FH, "+<&=$fd") || die "dup $!"; 1926 flock (DB_FH, LOCK_EX) || die "flock: $!"; 1927 ... 1928 $db{"Tom"} = "Jerry" ; 1929 ... 1930 flock(DB_FH, LOCK_UN); 1931 undef $db; 1932 untie %db; 1933 close(DB_FH); 1934 1935In simple terms, this is what happens: 1936 1937=over 5 1938 1939=item 1. 1940 1941Use "tie" to open the database. 1942 1943=item 2. 1944 1945Lock the database with fd & flock. 1946 1947=item 3. 1948 1949Read & Write to the database. 1950 1951=item 4. 1952 1953Unlock and close the database. 1954 1955=back 1956 1957Here is the crux of the problem. A side-effect of opening the B<DB_File> 1958database in step 2 is that an initial block from the database will get 1959read from disk and cached in memory. 1960 1961To see why this is a problem, consider what can happen when two processes, 1962say "A" and "B", both want to update the same B<DB_File> database 1963using the locking steps outlined above. Assume process "A" has already 1964opened the database and has a write lock, but it hasn't actually updated 1965the database yet (it has finished step 2, but not started step 3 yet). Now 1966process "B" tries to open the same database - step 1 will succeed, 1967but it will block on step 2 until process "A" releases the lock. The 1968important thing to notice here is that at this point in time both 1969processes will have cached identical initial blocks from the database. 1970 1971Now process "A" updates the database and happens to change some of the 1972data held in the initial buffer. Process "A" terminates, flushing 1973all cached data to disk and releasing the database lock. At this point 1974the database on disk will correctly reflect the changes made by process 1975"A". 1976 1977With the lock released, process "B" can now continue. It also updates the 1978database and unfortunately it too modifies the data that was in its 1979initial buffer. Once that data gets flushed to disk it will overwrite 1980some/all of the changes process "A" made to the database. 1981 1982The result of this scenario is at best a database that doesn't contain 1983what you expect. At worst the database will corrupt. 1984 1985The above won't happen every time competing process update the same 1986B<DB_File> database, but it does illustrate why the technique should 1987not be used. 1988 1989=head2 Safe ways to lock a database 1990 1991Starting with version 2.x, Berkeley DB has internal support for locking. 1992The companion module to this one, B<BerkeleyDB>, provides an interface 1993to this locking functionality. If you are serious about locking 1994Berkeley DB databases, I strongly recommend using B<BerkeleyDB>. 1995 1996If using B<BerkeleyDB> isn't an option, there are a number of modules 1997available on CPAN that can be used to implement locking. Each one 1998implements locking differently and has different goals in mind. It is 1999therefore worth knowing the difference, so that you can pick the right 2000one for your application. Here are the three locking wrappers: 2001 2002=over 5 2003 2004=item B<Tie::DB_Lock> 2005 2006A B<DB_File> wrapper which creates copies of the database file for 2007read access, so that you have a kind of a multiversioning concurrent read 2008system. However, updates are still serial. Use for databases where reads 2009may be lengthy and consistency problems may occur. 2010 2011=item B<Tie::DB_LockFile> 2012 2013A B<DB_File> wrapper that has the ability to lock and unlock the database 2014while it is being used. Avoids the tie-before-flock problem by simply 2015re-tie-ing the database when you get or drop a lock. Because of the 2016flexibility in dropping and re-acquiring the lock in the middle of a 2017session, this can be massaged into a system that will work with long 2018updates and/or reads if the application follows the hints in the POD 2019documentation. 2020 2021=item B<DB_File::Lock> 2022 2023An extremely lightweight B<DB_File> wrapper that simply flocks a lockfile 2024before tie-ing the database and drops the lock after the untie. Allows 2025one to use the same lockfile for multiple databases to avoid deadlock 2026problems, if desired. Use for databases where updates are reads are 2027quick and simple flock locking semantics are enough. 2028 2029=back 2030 2031=head2 Sharing Databases With C Applications 2032 2033There is no technical reason why a Berkeley DB database cannot be 2034shared by both a Perl and a C application. 2035 2036The vast majority of problems that are reported in this area boil down 2037to the fact that C strings are NULL terminated, whilst Perl strings are 2038not. See L<DBM FILTERS> for a generic way to work around this problem. 2039 2040Here is a real example. Netscape 2.0 keeps a record of the locations you 2041visit along with the time you last visited them in a DB_HASH database. 2042This is usually stored in the file F<~/.netscape/history.db>. The key 2043field in the database is the location string and the value field is the 2044time the location was last visited stored as a 4 byte binary value. 2045 2046If you haven't already guessed, the location string is stored with a 2047terminating NULL. This means you need to be careful when accessing the 2048database. 2049 2050Here is a snippet of code that is loosely based on Tom Christiansen's 2051I<ggh> script (available from your nearest CPAN archive in 2052F<authors/id/TOMC/scripts/nshist.gz>). 2053 2054 use warnings ; 2055 use strict ; 2056 use DB_File ; 2057 use Fcntl ; 2058 2059 my ($dotdir, $HISTORY, %hist_db, $href, $binary_time, $date) ; 2060 $dotdir = $ENV{HOME} || $ENV{LOGNAME}; 2061 2062 $HISTORY = "$dotdir/.netscape/history.db"; 2063 2064 tie %hist_db, 'DB_File', $HISTORY 2065 or die "Cannot open $HISTORY: $!\n" ;; 2066 2067 # Dump the complete database 2068 while ( ($href, $binary_time) = each %hist_db ) { 2069 2070 # remove the terminating NULL 2071 $href =~ s/\x00$// ; 2072 2073 # convert the binary time into a user friendly string 2074 $date = localtime unpack("V", $binary_time); 2075 print "$date $href\n" ; 2076 } 2077 2078 # check for the existence of a specific key 2079 # remember to add the NULL 2080 if ( $binary_time = $hist_db{"http://mox.perl.com/\x00"} ) { 2081 $date = localtime unpack("V", $binary_time) ; 2082 print "Last visited mox.perl.com on $date\n" ; 2083 } 2084 else { 2085 print "Never visited mox.perl.com\n" 2086 } 2087 2088 untie %hist_db ; 2089 2090=head2 The untie() Gotcha 2091 2092If you make use of the Berkeley DB API, it is I<very> strongly 2093recommended that you read L<perltie/The untie Gotcha>. 2094 2095Even if you don't currently make use of the API interface, it is still 2096worth reading it. 2097 2098Here is an example which illustrates the problem from a B<DB_File> 2099perspective: 2100 2101 use DB_File ; 2102 use Fcntl ; 2103 2104 my %x ; 2105 my $X ; 2106 2107 $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_TRUNC 2108 or die "Cannot tie first time: $!" ; 2109 2110 $x{123} = 456 ; 2111 2112 untie %x ; 2113 2114 tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT 2115 or die "Cannot tie second time: $!" ; 2116 2117 untie %x ; 2118 2119When run, the script will produce this error message: 2120 2121 Cannot tie second time: Invalid argument at bad.file line 14. 2122 2123Although the error message above refers to the second tie() statement 2124in the script, the source of the problem is really with the untie() 2125statement that precedes it. 2126 2127Having read L<perltie> you will probably have already guessed that the 2128error is caused by the extra copy of the tied object stored in C<$X>. 2129If you haven't, then the problem boils down to the fact that the 2130B<DB_File> destructor, DESTROY, will not be called until I<all> 2131references to the tied object are destroyed. Both the tied variable, 2132C<%x>, and C<$X> above hold a reference to the object. The call to 2133untie() will destroy the first, but C<$X> still holds a valid 2134reference, so the destructor will not get called and the database file 2135F<tst.fil> will remain open. The fact that Berkeley DB then reports the 2136attempt to open a database that is already open via the catch-all 2137"Invalid argument" doesn't help. 2138 2139If you run the script with the C<-w> flag the error message becomes: 2140 2141 untie attempted while 1 inner references still exist at bad.file line 12. 2142 Cannot tie second time: Invalid argument at bad.file line 14. 2143 2144which pinpoints the real problem. Finally the script can now be 2145modified to fix the original problem by destroying the API object 2146before the untie: 2147 2148 ... 2149 $x{123} = 456 ; 2150 2151 undef $X ; 2152 untie %x ; 2153 2154 $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT 2155 ... 2156 2157 2158=head1 COMMON QUESTIONS 2159 2160=head2 Why is there Perl source in my database? 2161 2162If you look at the contents of a database file created by DB_File, 2163there can sometimes be part of a Perl script included in it. 2164 2165This happens because Berkeley DB uses dynamic memory to allocate 2166buffers which will subsequently be written to the database file. Being 2167dynamic, the memory could have been used for anything before DB 2168malloced it. As Berkeley DB doesn't clear the memory once it has been 2169allocated, the unused portions will contain random junk. In the case 2170where a Perl script gets written to the database, the random junk will 2171correspond to an area of dynamic memory that happened to be used during 2172the compilation of the script. 2173 2174Unless you don't like the possibility of there being part of your Perl 2175scripts embedded in a database file, this is nothing to worry about. 2176 2177=head2 How do I store complex data structures with DB_File? 2178 2179Although B<DB_File> cannot do this directly, there is a module which 2180can layer transparently over B<DB_File> to accomplish this feat. 2181 2182Check out the MLDBM module, available on CPAN in the directory 2183F<modules/by-module/MLDBM>. 2184 2185=head2 What does "Invalid Argument" mean? 2186 2187You will get this error message when one of the parameters in the 2188C<tie> call is wrong. Unfortunately there are quite a few parameters to 2189get wrong, so it can be difficult to figure out which one it is. 2190 2191Here are a couple of possibilities: 2192 2193=over 5 2194 2195=item 1. 2196 2197Attempting to reopen a database without closing it. 2198 2199=item 2. 2200 2201Using the O_WRONLY flag. 2202 2203=back 2204 2205=head2 What does "Bareword 'DB_File' not allowed" mean? 2206 2207You will encounter this particular error message when you have the 2208C<strict 'subs'> pragma (or the full strict pragma) in your script. 2209Consider this script: 2210 2211 use warnings ; 2212 use strict ; 2213 use DB_File ; 2214 my %x ; 2215 tie %x, DB_File, "filename" ; 2216 2217Running it produces the error in question: 2218 2219 Bareword "DB_File" not allowed while "strict subs" in use 2220 2221To get around the error, place the word C<DB_File> in either single or 2222double quotes, like this: 2223 2224 tie %x, "DB_File", "filename" ; 2225 2226Although it might seem like a real pain, it is really worth the effort 2227of having a C<use strict> in all your scripts. 2228 2229=head1 REFERENCES 2230 2231Articles that are either about B<DB_File> or make use of it. 2232 2233=over 5 2234 2235=item 1. 2236 2237I<Full-Text Searching in Perl>, Tim Kientzle (tkientzle@ddj.com), 2238Dr. Dobb's Journal, Issue 295, January 1999, pp 34-41 2239 2240=back 2241 2242=head1 HISTORY 2243 2244Moved to the Changes file. 2245 2246=head1 BUGS 2247 2248Some older versions of Berkeley DB had problems with fixed length 2249records using the RECNO file format. This problem has been fixed since 2250version 1.85 of Berkeley DB. 2251 2252I am sure there are bugs in the code. If you do find any, or can 2253suggest any enhancements, I would welcome your comments. 2254 2255=head1 AVAILABILITY 2256 2257B<DB_File> comes with the standard Perl source distribution. Look in 2258the directory F<ext/DB_File>. Given the amount of time between releases 2259of Perl the version that ships with Perl is quite likely to be out of 2260date, so the most recent version can always be found on CPAN (see 2261L<perlmodlib/CPAN> for details), in the directory 2262F<modules/by-module/DB_File>. 2263 2264This version of B<DB_File> will work with either version 1.x, 2.x or 22653.x of Berkeley DB, but is limited to the functionality provided by 2266version 1. 2267 2268The official web site for Berkeley DB is F<http://www.oracle.com/technology/products/berkeley-db/db/index.html>. 2269All versions of Berkeley DB are available there. 2270 2271Alternatively, Berkeley DB version 1 is available at your nearest CPAN 2272archive in F<src/misc/db.1.85.tar.gz>. 2273 2274=head1 COPYRIGHT 2275 2276Copyright (c) 1995-2012 Paul Marquess. All rights reserved. This program 2277is free software; you can redistribute it and/or modify it under the 2278same terms as Perl itself. 2279 2280Although B<DB_File> is covered by the Perl license, the library it 2281makes use of, namely Berkeley DB, is not. Berkeley DB has its own 2282copyright and its own license. Please take the time to read it. 2283 2284Here are a few words taken from the Berkeley DB FAQ (at 2285F<http://www.oracle.com/technology/products/berkeley-db/db/index.html>) regarding the license: 2286 2287 Do I have to license DB to use it in Perl scripts? 2288 2289 No. The Berkeley DB license requires that software that uses 2290 Berkeley DB be freely redistributable. In the case of Perl, that 2291 software is Perl, and not your scripts. Any Perl scripts that you 2292 write are your property, including scripts that make use of 2293 Berkeley DB. Neither the Perl license nor the Berkeley DB license 2294 place any restriction on what you may do with them. 2295 2296If you are in any doubt about the license situation, contact either the 2297Berkeley DB authors or the author of DB_File. See L<"AUTHOR"> for details. 2298 2299 2300=head1 SEE ALSO 2301 2302L<perl>, L<dbopen(3)>, L<hash(3)>, L<recno(3)>, L<btree(3)>, 2303L<perldbmfilter> 2304 2305=head1 AUTHOR 2306 2307The DB_File interface was written by Paul Marquess 2308E<lt>pmqs@cpan.orgE<gt>. 2309 2310=cut 2311