1# DB_File.pm -- Perl 5 interface to Berkeley DB 2# 3# written by Paul Marquess (pmqs@cpan.org) 4# last modified 28th October 2007 5# version 1.818 6# 7# Copyright (c) 1995-2009 Paul Marquess. All rights reserved. 8# This program is free software; you can redistribute it and/or 9# modify it under the same terms as Perl itself. 10 11 12package DB_File::HASHINFO ; 13 14require 5.00404; 15 16use warnings; 17use strict; 18use Carp; 19require Tie::Hash; 20@DB_File::HASHINFO::ISA = qw(Tie::Hash); 21 22sub new 23{ 24 my $pkg = shift ; 25 my %x ; 26 tie %x, $pkg ; 27 bless \%x, $pkg ; 28} 29 30 31sub TIEHASH 32{ 33 my $pkg = shift ; 34 35 bless { VALID => { 36 bsize => 1, 37 ffactor => 1, 38 nelem => 1, 39 cachesize => 1, 40 hash => 2, 41 lorder => 1, 42 }, 43 GOT => {} 44 }, $pkg ; 45} 46 47 48sub FETCH 49{ 50 my $self = shift ; 51 my $key = shift ; 52 53 return $self->{GOT}{$key} if exists $self->{VALID}{$key} ; 54 55 my $pkg = ref $self ; 56 croak "${pkg}::FETCH - Unknown element '$key'" ; 57} 58 59 60sub STORE 61{ 62 my $self = shift ; 63 my $key = shift ; 64 my $value = shift ; 65 66 my $type = $self->{VALID}{$key}; 67 68 if ( $type ) 69 { 70 croak "Key '$key' not associated with a code reference" 71 if $type == 2 && !ref $value && ref $value ne 'CODE'; 72 $self->{GOT}{$key} = $value ; 73 return ; 74 } 75 76 my $pkg = ref $self ; 77 croak "${pkg}::STORE - Unknown element '$key'" ; 78} 79 80sub DELETE 81{ 82 my $self = shift ; 83 my $key = shift ; 84 85 if ( exists $self->{VALID}{$key} ) 86 { 87 delete $self->{GOT}{$key} ; 88 return ; 89 } 90 91 my $pkg = ref $self ; 92 croak "DB_File::HASHINFO::DELETE - Unknown element '$key'" ; 93} 94 95sub EXISTS 96{ 97 my $self = shift ; 98 my $key = shift ; 99 100 exists $self->{VALID}{$key} ; 101} 102 103sub NotHere 104{ 105 my $self = shift ; 106 my $method = shift ; 107 108 croak ref($self) . " does not define the method ${method}" ; 109} 110 111sub FIRSTKEY { my $self = shift ; $self->NotHere("FIRSTKEY") } 112sub NEXTKEY { my $self = shift ; $self->NotHere("NEXTKEY") } 113sub CLEAR { my $self = shift ; $self->NotHere("CLEAR") } 114 115package DB_File::RECNOINFO ; 116 117use warnings; 118use strict ; 119 120@DB_File::RECNOINFO::ISA = qw(DB_File::HASHINFO) ; 121 122sub TIEHASH 123{ 124 my $pkg = shift ; 125 126 bless { VALID => { map {$_, 1} 127 qw( bval cachesize psize flags lorder reclen bfname ) 128 }, 129 GOT => {}, 130 }, $pkg ; 131} 132 133package DB_File::BTREEINFO ; 134 135use warnings; 136use strict ; 137 138@DB_File::BTREEINFO::ISA = qw(DB_File::HASHINFO) ; 139 140sub TIEHASH 141{ 142 my $pkg = shift ; 143 144 bless { VALID => { 145 flags => 1, 146 cachesize => 1, 147 maxkeypage => 1, 148 minkeypage => 1, 149 psize => 1, 150 compare => 2, 151 prefix => 2, 152 lorder => 1, 153 }, 154 GOT => {}, 155 }, $pkg ; 156} 157 158 159package DB_File ; 160 161use warnings; 162use strict; 163our ($VERSION, @ISA, @EXPORT, $AUTOLOAD, $DB_BTREE, $DB_HASH, $DB_RECNO); 164our ($db_version, $use_XSLoader, $splice_end_array_no_length, $splice_end_array, $Error); 165use Carp; 166 167 168$VERSION = "1.827" ; 169$VERSION = eval $VERSION; # needed for dev releases 170 171{ 172 local $SIG{__WARN__} = sub {$splice_end_array_no_length = join(" ",@_);}; 173 my @a =(1); splice(@a, 3); 174 $splice_end_array_no_length = 175 ($splice_end_array_no_length =~ /^splice\(\) offset past end of array at /); 176} 177{ 178 local $SIG{__WARN__} = sub {$splice_end_array = join(" ", @_);}; 179 my @a =(1); splice(@a, 3, 1); 180 $splice_end_array = 181 ($splice_end_array =~ /^splice\(\) offset past end of array at /); 182} 183 184#typedef enum { DB_BTREE, DB_HASH, DB_RECNO } DBTYPE; 185$DB_BTREE = new DB_File::BTREEINFO ; 186$DB_HASH = new DB_File::HASHINFO ; 187$DB_RECNO = new DB_File::RECNOINFO ; 188 189require Tie::Hash; 190require Exporter; 191use AutoLoader; 192BEGIN { 193 $use_XSLoader = 1 ; 194 { local $SIG{__DIE__} ; eval { require XSLoader } ; } 195 196 if ($@) { 197 $use_XSLoader = 0 ; 198 require DynaLoader; 199 @ISA = qw(DynaLoader); 200 } 201} 202 203push @ISA, qw(Tie::Hash Exporter); 204@EXPORT = qw( 205 $DB_BTREE $DB_HASH $DB_RECNO 206 207 BTREEMAGIC 208 BTREEVERSION 209 DB_LOCK 210 DB_SHMEM 211 DB_TXN 212 HASHMAGIC 213 HASHVERSION 214 MAX_PAGE_NUMBER 215 MAX_PAGE_OFFSET 216 MAX_REC_NUMBER 217 RET_ERROR 218 RET_SPECIAL 219 RET_SUCCESS 220 R_CURSOR 221 R_DUP 222 R_FIRST 223 R_FIXEDLEN 224 R_IAFTER 225 R_IBEFORE 226 R_LAST 227 R_NEXT 228 R_NOKEY 229 R_NOOVERWRITE 230 R_PREV 231 R_RECNOSYNC 232 R_SETCURSOR 233 R_SNAPSHOT 234 __R_UNUSED 235 236); 237 238sub AUTOLOAD { 239 my($constname); 240 ($constname = $AUTOLOAD) =~ s/.*:://; 241 my ($error, $val) = constant($constname); 242 Carp::croak $error if $error; 243 no strict 'refs'; 244 *{$AUTOLOAD} = sub { $val }; 245 goto &{$AUTOLOAD}; 246} 247 248 249eval { 250 # Make all Fcntl O_XXX constants available for importing 251 require Fcntl; 252 my @O = grep /^O_/, @Fcntl::EXPORT; 253 Fcntl->import(@O); # first we import what we want to export 254 push(@EXPORT, @O); 255}; 256 257if ($use_XSLoader) 258 { XSLoader::load("DB_File", $VERSION)} 259else 260 { bootstrap DB_File $VERSION } 261 262# Preloaded methods go here. Autoload methods go after __END__, and are 263# processed by the autosplit program. 264 265sub tie_hash_or_array 266{ 267 my (@arg) = @_ ; 268 my $tieHASH = ( (caller(1))[3] =~ /TIEHASH/ ) ; 269 270 use File::Spec; 271 $arg[1] = File::Spec->rel2abs($arg[1]) 272 if defined $arg[1] ; 273 274 $arg[4] = tied %{ $arg[4] } 275 if @arg >= 5 && ref $arg[4] && $arg[4] =~ /=HASH/ && tied %{ $arg[4] } ; 276 277 $arg[2] = O_CREAT()|O_RDWR() if @arg >=3 && ! defined $arg[2]; 278 $arg[3] = 0666 if @arg >=4 && ! defined $arg[3]; 279 280 # make recno in Berkeley DB version 2 (or better) work like 281 # recno in version 1. 282 if ($db_version >= 4 and ! $tieHASH) { 283 $arg[2] |= O_CREAT(); 284 } 285 286 if ($db_version > 1 and defined $arg[4] and $arg[4] =~ /RECNO/ and 287 $arg[1] and ! -e $arg[1]) { 288 open(FH, ">$arg[1]") or return undef ; 289 close FH ; 290 chmod $arg[3] ? $arg[3] : 0666 , $arg[1] ; 291 } 292 293 DoTie_($tieHASH, @arg) ; 294} 295 296sub TIEHASH 297{ 298 tie_hash_or_array(@_) ; 299} 300 301sub TIEARRAY 302{ 303 tie_hash_or_array(@_) ; 304} 305 306sub CLEAR 307{ 308 my $self = shift; 309 my $key = 0 ; 310 my $value = "" ; 311 my $status = $self->seq($key, $value, R_FIRST()); 312 my @keys; 313 314 while ($status == 0) { 315 push @keys, $key; 316 $status = $self->seq($key, $value, R_NEXT()); 317 } 318 foreach $key (reverse @keys) { 319 my $s = $self->del($key); 320 } 321} 322 323sub EXTEND { } 324 325sub STORESIZE 326{ 327 my $self = shift; 328 my $length = shift ; 329 my $current_length = $self->length() ; 330 331 if ($length < $current_length) { 332 my $key ; 333 for ($key = $current_length - 1 ; $key >= $length ; -- $key) 334 { $self->del($key) } 335 } 336 elsif ($length > $current_length) { 337 $self->put($length-1, "") ; 338 } 339} 340 341 342sub SPLICE 343{ 344 my $self = shift; 345 my $offset = shift; 346 if (not defined $offset) { 347 warnings::warnif('uninitialized', 'Use of uninitialized value in splice'); 348 $offset = 0; 349 } 350 351 my $has_length = @_; 352 my $length = @_ ? shift : 0; 353 # Carping about definedness comes _after_ the OFFSET sanity check. 354 # This is so we get the same error messages as Perl's splice(). 355 # 356 357 my @list = @_; 358 359 my $size = $self->FETCHSIZE(); 360 361 # 'If OFFSET is negative then it start that far from the end of 362 # the array.' 363 # 364 if ($offset < 0) { 365 my $new_offset = $size + $offset; 366 if ($new_offset < 0) { 367 die "Modification of non-creatable array value attempted, " 368 . "subscript $offset"; 369 } 370 $offset = $new_offset; 371 } 372 373 if (not defined $length) { 374 warnings::warnif('uninitialized', 'Use of uninitialized value in splice'); 375 $length = 0; 376 } 377 378 if ($offset > $size) { 379 $offset = $size; 380 warnings::warnif('misc', 'splice() offset past end of array') 381 if $has_length ? $splice_end_array : $splice_end_array_no_length; 382 } 383 384 # 'If LENGTH is omitted, removes everything from OFFSET onward.' 385 if (not defined $length) { 386 $length = $size - $offset; 387 } 388 389 # 'If LENGTH is negative, leave that many elements off the end of 390 # the array.' 391 # 392 if ($length < 0) { 393 $length = $size - $offset + $length; 394 395 if ($length < 0) { 396 # The user must have specified a length bigger than the 397 # length of the array passed in. But perl's splice() 398 # doesn't catch this, it just behaves as for length=0. 399 # 400 $length = 0; 401 } 402 } 403 404 if ($length > $size - $offset) { 405 $length = $size - $offset; 406 } 407 408 # $num_elems holds the current number of elements in the database. 409 my $num_elems = $size; 410 411 # 'Removes the elements designated by OFFSET and LENGTH from an 412 # array,'... 413 # 414 my @removed = (); 415 foreach (0 .. $length - 1) { 416 my $old; 417 my $status = $self->get($offset, $old); 418 if ($status != 0) { 419 my $msg = "error from Berkeley DB on get($offset, \$old)"; 420 if ($status == 1) { 421 $msg .= ' (no such element?)'; 422 } 423 else { 424 $msg .= ": error status $status"; 425 if (defined $! and $! ne '') { 426 $msg .= ", message $!"; 427 } 428 } 429 die $msg; 430 } 431 push @removed, $old; 432 433 $status = $self->del($offset); 434 if ($status != 0) { 435 my $msg = "error from Berkeley DB on del($offset)"; 436 if ($status == 1) { 437 $msg .= ' (no such element?)'; 438 } 439 else { 440 $msg .= ": error status $status"; 441 if (defined $! and $! ne '') { 442 $msg .= ", message $!"; 443 } 444 } 445 die $msg; 446 } 447 448 -- $num_elems; 449 } 450 451 # ...'and replaces them with the elements of LIST, if any.' 452 my $pos = $offset; 453 while (defined (my $elem = shift @list)) { 454 my $old_pos = $pos; 455 my $status; 456 if ($pos >= $num_elems) { 457 $status = $self->put($pos, $elem); 458 } 459 else { 460 $status = $self->put($pos, $elem, $self->R_IBEFORE); 461 } 462 463 if ($status != 0) { 464 my $msg = "error from Berkeley DB on put($pos, $elem, ...)"; 465 if ($status == 1) { 466 $msg .= ' (no such element?)'; 467 } 468 else { 469 $msg .= ", error status $status"; 470 if (defined $! and $! ne '') { 471 $msg .= ", message $!"; 472 } 473 } 474 die $msg; 475 } 476 477 die "pos unexpectedly changed from $old_pos to $pos with R_IBEFORE" 478 if $old_pos != $pos; 479 480 ++ $pos; 481 ++ $num_elems; 482 } 483 484 if (wantarray) { 485 # 'In list context, returns the elements removed from the 486 # array.' 487 # 488 return @removed; 489 } 490 elsif (defined wantarray and not wantarray) { 491 # 'In scalar context, returns the last element removed, or 492 # undef if no elements are removed.' 493 # 494 if (@removed) { 495 my $last = pop @removed; 496 return "$last"; 497 } 498 else { 499 return undef; 500 } 501 } 502 elsif (not defined wantarray) { 503 # Void context 504 } 505 else { die } 506} 507sub ::DB_File::splice { &SPLICE } 508 509sub find_dup 510{ 511 croak "Usage: \$db->find_dup(key,value)\n" 512 unless @_ == 3 ; 513 514 my $db = shift ; 515 my ($origkey, $value_wanted) = @_ ; 516 my ($key, $value) = ($origkey, 0); 517 my ($status) = 0 ; 518 519 for ($status = $db->seq($key, $value, R_CURSOR() ) ; 520 $status == 0 ; 521 $status = $db->seq($key, $value, R_NEXT() ) ) { 522 523 return 0 if $key eq $origkey and $value eq $value_wanted ; 524 } 525 526 return $status ; 527} 528 529sub del_dup 530{ 531 croak "Usage: \$db->del_dup(key,value)\n" 532 unless @_ == 3 ; 533 534 my $db = shift ; 535 my ($key, $value) = @_ ; 536 my ($status) = $db->find_dup($key, $value) ; 537 return $status if $status != 0 ; 538 539 $status = $db->del($key, R_CURSOR() ) ; 540 return $status ; 541} 542 543sub get_dup 544{ 545 croak "Usage: \$db->get_dup(key [,flag])\n" 546 unless @_ == 2 or @_ == 3 ; 547 548 my $db = shift ; 549 my $key = shift ; 550 my $flag = shift ; 551 my $value = 0 ; 552 my $origkey = $key ; 553 my $wantarray = wantarray ; 554 my %values = () ; 555 my @values = () ; 556 my $counter = 0 ; 557 my $status = 0 ; 558 559 # iterate through the database until either EOF ($status == 0) 560 # or a different key is encountered ($key ne $origkey). 561 for ($status = $db->seq($key, $value, R_CURSOR()) ; 562 $status == 0 and $key eq $origkey ; 563 $status = $db->seq($key, $value, R_NEXT()) ) { 564 565 # save the value or count number of matches 566 if ($wantarray) { 567 if ($flag) 568 { ++ $values{$value} } 569 else 570 { push (@values, $value) } 571 } 572 else 573 { ++ $counter } 574 575 } 576 577 return ($wantarray ? ($flag ? %values : @values) : $counter) ; 578} 579 580 581sub STORABLE_freeze 582{ 583 my $type = ref shift; 584 croak "Cannot freeze $type object\n"; 585} 586 587sub STORABLE_thaw 588{ 589 my $type = ref shift; 590 croak "Cannot thaw $type object\n"; 591} 592 593 594 5951; 596__END__ 597 598=head1 NAME 599 600DB_File - Perl5 access to Berkeley DB version 1.x 601 602=head1 SYNOPSIS 603 604 use DB_File; 605 606 [$X =] tie %hash, 'DB_File', [$filename, $flags, $mode, $DB_HASH] ; 607 [$X =] tie %hash, 'DB_File', $filename, $flags, $mode, $DB_BTREE ; 608 [$X =] tie @array, 'DB_File', $filename, $flags, $mode, $DB_RECNO ; 609 610 $status = $X->del($key [, $flags]) ; 611 $status = $X->put($key, $value [, $flags]) ; 612 $status = $X->get($key, $value [, $flags]) ; 613 $status = $X->seq($key, $value, $flags) ; 614 $status = $X->sync([$flags]) ; 615 $status = $X->fd ; 616 617 # BTREE only 618 $count = $X->get_dup($key) ; 619 @list = $X->get_dup($key) ; 620 %list = $X->get_dup($key, 1) ; 621 $status = $X->find_dup($key, $value) ; 622 $status = $X->del_dup($key, $value) ; 623 624 # RECNO only 625 $a = $X->length; 626 $a = $X->pop ; 627 $X->push(list); 628 $a = $X->shift; 629 $X->unshift(list); 630 @r = $X->splice(offset, length, elements); 631 632 # DBM Filters 633 $old_filter = $db->filter_store_key ( sub { ... } ) ; 634 $old_filter = $db->filter_store_value( sub { ... } ) ; 635 $old_filter = $db->filter_fetch_key ( sub { ... } ) ; 636 $old_filter = $db->filter_fetch_value( sub { ... } ) ; 637 638 untie %hash ; 639 untie @array ; 640 641=head1 DESCRIPTION 642 643B<DB_File> is a module which allows Perl programs to make use of the 644facilities provided by Berkeley DB version 1.x (if you have a newer 645version of DB, see L<Using DB_File with Berkeley DB version 2 or greater>). 646It is assumed that you have a copy of the Berkeley DB manual pages at 647hand when reading this documentation. The interface defined here 648mirrors the Berkeley DB interface closely. 649 650Berkeley DB is a C library which provides a consistent interface to a 651number of database formats. B<DB_File> provides an interface to all 652three of the database types currently supported by Berkeley DB. 653 654The file types are: 655 656=over 5 657 658=item B<DB_HASH> 659 660This database type allows arbitrary key/value pairs to be stored in data 661files. This is equivalent to the functionality provided by other 662hashing packages like DBM, NDBM, ODBM, GDBM, and SDBM. Remember though, 663the files created using DB_HASH are not compatible with any of the 664other packages mentioned. 665 666A default hashing algorithm, which will be adequate for most 667applications, is built into Berkeley DB. If you do need to use your own 668hashing algorithm it is possible to write your own in Perl and have 669B<DB_File> use it instead. 670 671=item B<DB_BTREE> 672 673The btree format allows arbitrary key/value pairs to be stored in a 674sorted, balanced binary tree. 675 676As with the DB_HASH format, it is possible to provide a user defined 677Perl routine to perform the comparison of keys. By default, though, the 678keys are stored in lexical order. 679 680=item B<DB_RECNO> 681 682DB_RECNO allows both fixed-length and variable-length flat text files 683to be manipulated using the same key/value pair interface as in DB_HASH 684and DB_BTREE. In this case the key will consist of a record (line) 685number. 686 687=back 688 689=head2 Using DB_File with Berkeley DB version 2 or greater 690 691Although B<DB_File> is intended to be used with Berkeley DB version 1, 692it can also be used with version 2, 3 or 4. In this case the interface is 693limited to the functionality provided by Berkeley DB 1.x. Anywhere the 694version 2 or greater interface differs, B<DB_File> arranges for it to work 695like version 1. This feature allows B<DB_File> scripts that were built 696with version 1 to be migrated to version 2 or greater without any changes. 697 698If you want to make use of the new features available in Berkeley DB 6992.x or greater, use the Perl module B<BerkeleyDB> instead. 700 701B<Note:> The database file format has changed multiple times in Berkeley 702DB version 2, 3 and 4. If you cannot recreate your databases, you 703must dump any existing databases with either the C<db_dump> or the 704C<db_dump185> utility that comes with Berkeley DB. 705Once you have rebuilt DB_File to use Berkeley DB version 2 or greater, 706your databases can be recreated using C<db_load>. Refer to the Berkeley DB 707documentation for further details. 708 709Please read L<"COPYRIGHT"> before using version 2.x or greater of Berkeley 710DB with DB_File. 711 712=head2 Interface to Berkeley DB 713 714B<DB_File> allows access to Berkeley DB files using the tie() mechanism 715in Perl 5 (for full details, see L<perlfunc/tie()>). This facility 716allows B<DB_File> to access Berkeley DB files using either an 717associative array (for DB_HASH & DB_BTREE file types) or an ordinary 718array (for the DB_RECNO file type). 719 720In addition to the tie() interface, it is also possible to access most 721of the functions provided in the Berkeley DB API directly. 722See L<THE API INTERFACE>. 723 724=head2 Opening a Berkeley DB Database File 725 726Berkeley DB uses the function dbopen() to open or create a database. 727Here is the C prototype for dbopen(): 728 729 DB* 730 dbopen (const char * file, int flags, int mode, 731 DBTYPE type, const void * openinfo) 732 733The parameter C<type> is an enumeration which specifies which of the 3 734interface methods (DB_HASH, DB_BTREE or DB_RECNO) is to be used. 735Depending on which of these is actually chosen, the final parameter, 736I<openinfo> points to a data structure which allows tailoring of the 737specific interface method. 738 739This interface is handled slightly differently in B<DB_File>. Here is 740an equivalent call using B<DB_File>: 741 742 tie %array, 'DB_File', $filename, $flags, $mode, $DB_HASH ; 743 744The C<filename>, C<flags> and C<mode> parameters are the direct 745equivalent of their dbopen() counterparts. The final parameter $DB_HASH 746performs the function of both the C<type> and C<openinfo> parameters in 747dbopen(). 748 749In the example above $DB_HASH is actually a pre-defined reference to a 750hash object. B<DB_File> has three of these pre-defined references. 751Apart from $DB_HASH, there is also $DB_BTREE and $DB_RECNO. 752 753The keys allowed in each of these pre-defined references is limited to 754the names used in the equivalent C structure. So, for example, the 755$DB_HASH reference will only allow keys called C<bsize>, C<cachesize>, 756C<ffactor>, C<hash>, C<lorder> and C<nelem>. 757 758To change one of these elements, just assign to it like this: 759 760 $DB_HASH->{'cachesize'} = 10000 ; 761 762The three predefined variables $DB_HASH, $DB_BTREE and $DB_RECNO are 763usually adequate for most applications. If you do need to create extra 764instances of these objects, constructors are available for each file 765type. 766 767Here are examples of the constructors and the valid options available 768for DB_HASH, DB_BTREE and DB_RECNO respectively. 769 770 $a = new DB_File::HASHINFO ; 771 $a->{'bsize'} ; 772 $a->{'cachesize'} ; 773 $a->{'ffactor'}; 774 $a->{'hash'} ; 775 $a->{'lorder'} ; 776 $a->{'nelem'} ; 777 778 $b = new DB_File::BTREEINFO ; 779 $b->{'flags'} ; 780 $b->{'cachesize'} ; 781 $b->{'maxkeypage'} ; 782 $b->{'minkeypage'} ; 783 $b->{'psize'} ; 784 $b->{'compare'} ; 785 $b->{'prefix'} ; 786 $b->{'lorder'} ; 787 788 $c = new DB_File::RECNOINFO ; 789 $c->{'bval'} ; 790 $c->{'cachesize'} ; 791 $c->{'psize'} ; 792 $c->{'flags'} ; 793 $c->{'lorder'} ; 794 $c->{'reclen'} ; 795 $c->{'bfname'} ; 796 797The values stored in the hashes above are mostly the direct equivalent 798of their C counterpart. Like their C counterparts, all are set to a 799default values - that means you don't have to set I<all> of the 800values when you only want to change one. Here is an example: 801 802 $a = new DB_File::HASHINFO ; 803 $a->{'cachesize'} = 12345 ; 804 tie %y, 'DB_File', "filename", $flags, 0777, $a ; 805 806A few of the options need extra discussion here. When used, the C 807equivalent of the keys C<hash>, C<compare> and C<prefix> store pointers 808to C functions. In B<DB_File> these keys are used to store references 809to Perl subs. Below are templates for each of the subs: 810 811 sub hash 812 { 813 my ($data) = @_ ; 814 ... 815 # return the hash value for $data 816 return $hash ; 817 } 818 819 sub compare 820 { 821 my ($key, $key2) = @_ ; 822 ... 823 # return 0 if $key1 eq $key2 824 # -1 if $key1 lt $key2 825 # 1 if $key1 gt $key2 826 return (-1 , 0 or 1) ; 827 } 828 829 sub prefix 830 { 831 my ($key, $key2) = @_ ; 832 ... 833 # return number of bytes of $key2 which are 834 # necessary to determine that it is greater than $key1 835 return $bytes ; 836 } 837 838See L<Changing the BTREE sort order> for an example of using the 839C<compare> template. 840 841If you are using the DB_RECNO interface and you intend making use of 842C<bval>, you should check out L<The 'bval' Option>. 843 844=head2 Default Parameters 845 846It is possible to omit some or all of the final 4 parameters in the 847call to C<tie> and let them take default values. As DB_HASH is the most 848common file format used, the call: 849 850 tie %A, "DB_File", "filename" ; 851 852is equivalent to: 853 854 tie %A, "DB_File", "filename", O_CREAT|O_RDWR, 0666, $DB_HASH ; 855 856It is also possible to omit the filename parameter as well, so the 857call: 858 859 tie %A, "DB_File" ; 860 861is equivalent to: 862 863 tie %A, "DB_File", undef, O_CREAT|O_RDWR, 0666, $DB_HASH ; 864 865See L<In Memory Databases> for a discussion on the use of C<undef> 866in place of a filename. 867 868=head2 In Memory Databases 869 870Berkeley DB allows the creation of in-memory databases by using NULL 871(that is, a C<(char *)0> in C) in place of the filename. B<DB_File> 872uses C<undef> instead of NULL to provide this functionality. 873 874=head1 DB_HASH 875 876The DB_HASH file format is probably the most commonly used of the three 877file formats that B<DB_File> supports. It is also very straightforward 878to use. 879 880=head2 A Simple Example 881 882This example shows how to create a database, add key/value pairs to the 883database, delete keys/value pairs and finally how to enumerate the 884contents of the database. 885 886 use warnings ; 887 use strict ; 888 use DB_File ; 889 our (%h, $k, $v) ; 890 891 unlink "fruit" ; 892 tie %h, "DB_File", "fruit", O_RDWR|O_CREAT, 0666, $DB_HASH 893 or die "Cannot open file 'fruit': $!\n"; 894 895 # Add a few key/value pairs to the file 896 $h{"apple"} = "red" ; 897 $h{"orange"} = "orange" ; 898 $h{"banana"} = "yellow" ; 899 $h{"tomato"} = "red" ; 900 901 # Check for existence of a key 902 print "Banana Exists\n\n" if $h{"banana"} ; 903 904 # Delete a key/value pair. 905 delete $h{"apple"} ; 906 907 # print the contents of the file 908 while (($k, $v) = each %h) 909 { print "$k -> $v\n" } 910 911 untie %h ; 912 913here is the output: 914 915 Banana Exists 916 917 orange -> orange 918 tomato -> red 919 banana -> yellow 920 921Note that the like ordinary associative arrays, the order of the keys 922retrieved is in an apparently random order. 923 924=head1 DB_BTREE 925 926The DB_BTREE format is useful when you want to store data in a given 927order. By default the keys will be stored in lexical order, but as you 928will see from the example shown in the next section, it is very easy to 929define your own sorting function. 930 931=head2 Changing the BTREE sort order 932 933This script shows how to override the default sorting algorithm that 934BTREE uses. Instead of using the normal lexical ordering, a case 935insensitive compare function will be used. 936 937 use warnings ; 938 use strict ; 939 use DB_File ; 940 941 my %h ; 942 943 sub Compare 944 { 945 my ($key1, $key2) = @_ ; 946 "\L$key1" cmp "\L$key2" ; 947 } 948 949 # specify the Perl sub that will do the comparison 950 $DB_BTREE->{'compare'} = \&Compare ; 951 952 unlink "tree" ; 953 tie %h, "DB_File", "tree", O_RDWR|O_CREAT, 0666, $DB_BTREE 954 or die "Cannot open file 'tree': $!\n" ; 955 956 # Add a key/value pair to the file 957 $h{'Wall'} = 'Larry' ; 958 $h{'Smith'} = 'John' ; 959 $h{'mouse'} = 'mickey' ; 960 $h{'duck'} = 'donald' ; 961 962 # Delete 963 delete $h{"duck"} ; 964 965 # Cycle through the keys printing them in order. 966 # Note it is not necessary to sort the keys as 967 # the btree will have kept them in order automatically. 968 foreach (keys %h) 969 { print "$_\n" } 970 971 untie %h ; 972 973Here is the output from the code above. 974 975 mouse 976 Smith 977 Wall 978 979There are a few point to bear in mind if you want to change the 980ordering in a BTREE database: 981 982=over 5 983 984=item 1. 985 986The new compare function must be specified when you create the database. 987 988=item 2. 989 990You cannot change the ordering once the database has been created. Thus 991you must use the same compare function every time you access the 992database. 993 994=item 3 995 996Duplicate keys are entirely defined by the comparison function. 997In the case-insensitive example above, the keys: 'KEY' and 'key' 998would be considered duplicates, and assigning to the second one 999would overwrite the first. If duplicates are allowed for (with the 1000R_DUP flag discussed below), only a single copy of duplicate keys 1001is stored in the database --- so (again with example above) assigning 1002three values to the keys: 'KEY', 'Key', and 'key' would leave just 1003the first key: 'KEY' in the database with three values. For some 1004situations this results in information loss, so care should be taken 1005to provide fully qualified comparison functions when necessary. 1006For example, the above comparison routine could be modified to 1007additionally compare case-sensitively if two keys are equal in the 1008case insensitive comparison: 1009 1010 sub compare { 1011 my($key1, $key2) = @_; 1012 lc $key1 cmp lc $key2 || 1013 $key1 cmp $key2; 1014 } 1015 1016And now you will only have duplicates when the keys themselves 1017are truly the same. (note: in versions of the db library prior to 1018about November 1996, such duplicate keys were retained so it was 1019possible to recover the original keys in sets of keys that 1020compared as equal). 1021 1022 1023=back 1024 1025=head2 Handling Duplicate Keys 1026 1027The BTREE file type optionally allows a single key to be associated 1028with an arbitrary number of values. This option is enabled by setting 1029the flags element of C<$DB_BTREE> to R_DUP when creating the database. 1030 1031There are some difficulties in using the tied hash interface if you 1032want to manipulate a BTREE database with duplicate keys. Consider this 1033code: 1034 1035 use warnings ; 1036 use strict ; 1037 use DB_File ; 1038 1039 my ($filename, %h) ; 1040 1041 $filename = "tree" ; 1042 unlink $filename ; 1043 1044 # Enable duplicate records 1045 $DB_BTREE->{'flags'} = R_DUP ; 1046 1047 tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE 1048 or die "Cannot open $filename: $!\n"; 1049 1050 # Add some key/value pairs to the file 1051 $h{'Wall'} = 'Larry' ; 1052 $h{'Wall'} = 'Brick' ; # Note the duplicate key 1053 $h{'Wall'} = 'Brick' ; # Note the duplicate key and value 1054 $h{'Smith'} = 'John' ; 1055 $h{'mouse'} = 'mickey' ; 1056 1057 # iterate through the associative array 1058 # and print each key/value pair. 1059 foreach (sort keys %h) 1060 { print "$_ -> $h{$_}\n" } 1061 1062 untie %h ; 1063 1064Here is the output: 1065 1066 Smith -> John 1067 Wall -> Larry 1068 Wall -> Larry 1069 Wall -> Larry 1070 mouse -> mickey 1071 1072As you can see 3 records have been successfully created with key C<Wall> 1073- the only thing is, when they are retrieved from the database they 1074I<seem> to have the same value, namely C<Larry>. The problem is caused 1075by the way that the associative array interface works. Basically, when 1076the associative array interface is used to fetch the value associated 1077with a given key, it will only ever retrieve the first value. 1078 1079Although it may not be immediately obvious from the code above, the 1080associative array interface can be used to write values with duplicate 1081keys, but it cannot be used to read them back from the database. 1082 1083The way to get around this problem is to use the Berkeley DB API method 1084called C<seq>. This method allows sequential access to key/value 1085pairs. See L<THE API INTERFACE> for details of both the C<seq> method 1086and the API in general. 1087 1088Here is the script above rewritten using the C<seq> API method. 1089 1090 use warnings ; 1091 use strict ; 1092 use DB_File ; 1093 1094 my ($filename, $x, %h, $status, $key, $value) ; 1095 1096 $filename = "tree" ; 1097 unlink $filename ; 1098 1099 # Enable duplicate records 1100 $DB_BTREE->{'flags'} = R_DUP ; 1101 1102 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE 1103 or die "Cannot open $filename: $!\n"; 1104 1105 # Add some key/value pairs to the file 1106 $h{'Wall'} = 'Larry' ; 1107 $h{'Wall'} = 'Brick' ; # Note the duplicate key 1108 $h{'Wall'} = 'Brick' ; # Note the duplicate key and value 1109 $h{'Smith'} = 'John' ; 1110 $h{'mouse'} = 'mickey' ; 1111 1112 # iterate through the btree using seq 1113 # and print each key/value pair. 1114 $key = $value = 0 ; 1115 for ($status = $x->seq($key, $value, R_FIRST) ; 1116 $status == 0 ; 1117 $status = $x->seq($key, $value, R_NEXT) ) 1118 { print "$key -> $value\n" } 1119 1120 undef $x ; 1121 untie %h ; 1122 1123that prints: 1124 1125 Smith -> John 1126 Wall -> Brick 1127 Wall -> Brick 1128 Wall -> Larry 1129 mouse -> mickey 1130 1131This time we have got all the key/value pairs, including the multiple 1132values associated with the key C<Wall>. 1133 1134To make life easier when dealing with duplicate keys, B<DB_File> comes with 1135a few utility methods. 1136 1137=head2 The get_dup() Method 1138 1139The C<get_dup> method assists in 1140reading duplicate values from BTREE databases. The method can take the 1141following forms: 1142 1143 $count = $x->get_dup($key) ; 1144 @list = $x->get_dup($key) ; 1145 %list = $x->get_dup($key, 1) ; 1146 1147In a scalar context the method returns the number of values associated 1148with the key, C<$key>. 1149 1150In list context, it returns all the values which match C<$key>. Note 1151that the values will be returned in an apparently random order. 1152 1153In list context, if the second parameter is present and evaluates 1154TRUE, the method returns an associative array. The keys of the 1155associative array correspond to the values that matched in the BTREE 1156and the values of the array are a count of the number of times that 1157particular value occurred in the BTREE. 1158 1159So assuming the database created above, we can use C<get_dup> like 1160this: 1161 1162 use warnings ; 1163 use strict ; 1164 use DB_File ; 1165 1166 my ($filename, $x, %h) ; 1167 1168 $filename = "tree" ; 1169 1170 # Enable duplicate records 1171 $DB_BTREE->{'flags'} = R_DUP ; 1172 1173 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE 1174 or die "Cannot open $filename: $!\n"; 1175 1176 my $cnt = $x->get_dup("Wall") ; 1177 print "Wall occurred $cnt times\n" ; 1178 1179 my %hash = $x->get_dup("Wall", 1) ; 1180 print "Larry is there\n" if $hash{'Larry'} ; 1181 print "There are $hash{'Brick'} Brick Walls\n" ; 1182 1183 my @list = sort $x->get_dup("Wall") ; 1184 print "Wall => [@list]\n" ; 1185 1186 @list = $x->get_dup("Smith") ; 1187 print "Smith => [@list]\n" ; 1188 1189 @list = $x->get_dup("Dog") ; 1190 print "Dog => [@list]\n" ; 1191 1192 1193and it will print: 1194 1195 Wall occurred 3 times 1196 Larry is there 1197 There are 2 Brick Walls 1198 Wall => [Brick Brick Larry] 1199 Smith => [John] 1200 Dog => [] 1201 1202=head2 The find_dup() Method 1203 1204 $status = $X->find_dup($key, $value) ; 1205 1206This method checks for the existence of a specific key/value pair. If the 1207pair exists, the cursor is left pointing to the pair and the method 1208returns 0. Otherwise the method returns a non-zero value. 1209 1210Assuming the database from the previous example: 1211 1212 use warnings ; 1213 use strict ; 1214 use DB_File ; 1215 1216 my ($filename, $x, %h, $found) ; 1217 1218 $filename = "tree" ; 1219 1220 # Enable duplicate records 1221 $DB_BTREE->{'flags'} = R_DUP ; 1222 1223 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE 1224 or die "Cannot open $filename: $!\n"; 1225 1226 $found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ; 1227 print "Larry Wall is $found there\n" ; 1228 1229 $found = ( $x->find_dup("Wall", "Harry") == 0 ? "" : "not") ; 1230 print "Harry Wall is $found there\n" ; 1231 1232 undef $x ; 1233 untie %h ; 1234 1235prints this 1236 1237 Larry Wall is there 1238 Harry Wall is not there 1239 1240 1241=head2 The del_dup() Method 1242 1243 $status = $X->del_dup($key, $value) ; 1244 1245This method deletes a specific key/value pair. It returns 12460 if they exist and have been deleted successfully. 1247Otherwise the method returns a non-zero value. 1248 1249Again assuming the existence of the C<tree> database 1250 1251 use warnings ; 1252 use strict ; 1253 use DB_File ; 1254 1255 my ($filename, $x, %h, $found) ; 1256 1257 $filename = "tree" ; 1258 1259 # Enable duplicate records 1260 $DB_BTREE->{'flags'} = R_DUP ; 1261 1262 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE 1263 or die "Cannot open $filename: $!\n"; 1264 1265 $x->del_dup("Wall", "Larry") ; 1266 1267 $found = ( $x->find_dup("Wall", "Larry") == 0 ? "" : "not") ; 1268 print "Larry Wall is $found there\n" ; 1269 1270 undef $x ; 1271 untie %h ; 1272 1273prints this 1274 1275 Larry Wall is not there 1276 1277=head2 Matching Partial Keys 1278 1279The BTREE interface has a feature which allows partial keys to be 1280matched. This functionality is I<only> available when the C<seq> method 1281is used along with the R_CURSOR flag. 1282 1283 $x->seq($key, $value, R_CURSOR) ; 1284 1285Here is the relevant quote from the dbopen man page where it defines 1286the use of the R_CURSOR flag with seq: 1287 1288 Note, for the DB_BTREE access method, the returned key is not 1289 necessarily an exact match for the specified key. The returned key 1290 is the smallest key greater than or equal to the specified key, 1291 permitting partial key matches and range searches. 1292 1293In the example script below, the C<match> sub uses this feature to find 1294and print the first matching key/value pair given a partial key. 1295 1296 use warnings ; 1297 use strict ; 1298 use DB_File ; 1299 use Fcntl ; 1300 1301 my ($filename, $x, %h, $st, $key, $value) ; 1302 1303 sub match 1304 { 1305 my $key = shift ; 1306 my $value = 0; 1307 my $orig_key = $key ; 1308 $x->seq($key, $value, R_CURSOR) ; 1309 print "$orig_key\t-> $key\t-> $value\n" ; 1310 } 1311 1312 $filename = "tree" ; 1313 unlink $filename ; 1314 1315 $x = tie %h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_BTREE 1316 or die "Cannot open $filename: $!\n"; 1317 1318 # Add some key/value pairs to the file 1319 $h{'mouse'} = 'mickey' ; 1320 $h{'Wall'} = 'Larry' ; 1321 $h{'Walls'} = 'Brick' ; 1322 $h{'Smith'} = 'John' ; 1323 1324 1325 $key = $value = 0 ; 1326 print "IN ORDER\n" ; 1327 for ($st = $x->seq($key, $value, R_FIRST) ; 1328 $st == 0 ; 1329 $st = $x->seq($key, $value, R_NEXT) ) 1330 1331 { print "$key -> $value\n" } 1332 1333 print "\nPARTIAL MATCH\n" ; 1334 1335 match "Wa" ; 1336 match "A" ; 1337 match "a" ; 1338 1339 undef $x ; 1340 untie %h ; 1341 1342Here is the output: 1343 1344 IN ORDER 1345 Smith -> John 1346 Wall -> Larry 1347 Walls -> Brick 1348 mouse -> mickey 1349 1350 PARTIAL MATCH 1351 Wa -> Wall -> Larry 1352 A -> Smith -> John 1353 a -> mouse -> mickey 1354 1355=head1 DB_RECNO 1356 1357DB_RECNO provides an interface to flat text files. Both variable and 1358fixed length records are supported. 1359 1360In order to make RECNO more compatible with Perl, the array offset for 1361all RECNO arrays begins at 0 rather than 1 as in Berkeley DB. 1362 1363As with normal Perl arrays, a RECNO array can be accessed using 1364negative indexes. The index -1 refers to the last element of the array, 1365-2 the second last, and so on. Attempting to access an element before 1366the start of the array will raise a fatal run-time error. 1367 1368=head2 The 'bval' Option 1369 1370The operation of the bval option warrants some discussion. Here is the 1371definition of bval from the Berkeley DB 1.85 recno manual page: 1372 1373 The delimiting byte to be used to mark the end of a 1374 record for variable-length records, and the pad charac- 1375 ter for fixed-length records. If no value is speci- 1376 fied, newlines (``\n'') are used to mark the end of 1377 variable-length records and fixed-length records are 1378 padded with spaces. 1379 1380The second sentence is wrong. In actual fact bval will only default to 1381C<"\n"> when the openinfo parameter in dbopen is NULL. If a non-NULL 1382openinfo parameter is used at all, the value that happens to be in bval 1383will be used. That means you always have to specify bval when making 1384use of any of the options in the openinfo parameter. This documentation 1385error will be fixed in the next release of Berkeley DB. 1386 1387That clarifies the situation with regards Berkeley DB itself. What 1388about B<DB_File>? Well, the behavior defined in the quote above is 1389quite useful, so B<DB_File> conforms to it. 1390 1391That means that you can specify other options (e.g. cachesize) and 1392still have bval default to C<"\n"> for variable length records, and 1393space for fixed length records. 1394 1395Also note that the bval option only allows you to specify a single byte 1396as a delimiter. 1397 1398=head2 A Simple Example 1399 1400Here is a simple example that uses RECNO (if you are using a version 1401of Perl earlier than 5.004_57 this example won't work -- see 1402L<Extra RECNO Methods> for a workaround). 1403 1404 use warnings ; 1405 use strict ; 1406 use DB_File ; 1407 1408 my $filename = "text" ; 1409 unlink $filename ; 1410 1411 my @h ; 1412 tie @h, "DB_File", $filename, O_RDWR|O_CREAT, 0666, $DB_RECNO 1413 or die "Cannot open file 'text': $!\n" ; 1414 1415 # Add a few key/value pairs to the file 1416 $h[0] = "orange" ; 1417 $h[1] = "blue" ; 1418 $h[2] = "yellow" ; 1419 1420 push @h, "green", "black" ; 1421 1422 my $elements = scalar @h ; 1423 print "The array contains $elements entries\n" ; 1424 1425 my $last = pop @h ; 1426 print "popped $last\n" ; 1427 1428 unshift @h, "white" ; 1429 my $first = shift @h ; 1430 print "shifted $first\n" ; 1431 1432 # Check for existence of a key 1433 print "Element 1 Exists with value $h[1]\n" if $h[1] ; 1434 1435 # use a negative index 1436 print "The last element is $h[-1]\n" ; 1437 print "The 2nd last element is $h[-2]\n" ; 1438 1439 untie @h ; 1440 1441Here is the output from the script: 1442 1443 The array contains 5 entries 1444 popped black 1445 shifted white 1446 Element 1 Exists with value blue 1447 The last element is green 1448 The 2nd last element is yellow 1449 1450=head2 Extra RECNO Methods 1451 1452If you are using a version of Perl earlier than 5.004_57, the tied 1453array interface is quite limited. In the example script above 1454C<push>, C<pop>, C<shift>, C<unshift> 1455or determining the array length will not work with a tied array. 1456 1457To make the interface more useful for older versions of Perl, a number 1458of methods are supplied with B<DB_File> to simulate the missing array 1459operations. All these methods are accessed via the object returned from 1460the tie call. 1461 1462Here are the methods: 1463 1464=over 5 1465 1466=item B<$X-E<gt>push(list) ;> 1467 1468Pushes the elements of C<list> to the end of the array. 1469 1470=item B<$value = $X-E<gt>pop ;> 1471 1472Removes and returns the last element of the array. 1473 1474=item B<$X-E<gt>shift> 1475 1476Removes and returns the first element of the array. 1477 1478=item B<$X-E<gt>unshift(list) ;> 1479 1480Pushes the elements of C<list> to the start of the array. 1481 1482=item B<$X-E<gt>length> 1483 1484Returns the number of elements in the array. 1485 1486=item B<$X-E<gt>splice(offset, length, elements);> 1487 1488Returns a splice of the array. 1489 1490=back 1491 1492=head2 Another Example 1493 1494Here is a more complete example that makes use of some of the methods 1495described above. It also makes use of the API interface directly (see 1496L<THE API INTERFACE>). 1497 1498 use warnings ; 1499 use strict ; 1500 my (@h, $H, $file, $i) ; 1501 use DB_File ; 1502 use Fcntl ; 1503 1504 $file = "text" ; 1505 1506 unlink $file ; 1507 1508 $H = tie @h, "DB_File", $file, O_RDWR|O_CREAT, 0666, $DB_RECNO 1509 or die "Cannot open file $file: $!\n" ; 1510 1511 # first create a text file to play with 1512 $h[0] = "zero" ; 1513 $h[1] = "one" ; 1514 $h[2] = "two" ; 1515 $h[3] = "three" ; 1516 $h[4] = "four" ; 1517 1518 1519 # Print the records in order. 1520 # 1521 # The length method is needed here because evaluating a tied 1522 # array in a scalar context does not return the number of 1523 # elements in the array. 1524 1525 print "\nORIGINAL\n" ; 1526 foreach $i (0 .. $H->length - 1) { 1527 print "$i: $h[$i]\n" ; 1528 } 1529 1530 # use the push & pop methods 1531 $a = $H->pop ; 1532 $H->push("last") ; 1533 print "\nThe last record was [$a]\n" ; 1534 1535 # and the shift & unshift methods 1536 $a = $H->shift ; 1537 $H->unshift("first") ; 1538 print "The first record was [$a]\n" ; 1539 1540 # Use the API to add a new record after record 2. 1541 $i = 2 ; 1542 $H->put($i, "Newbie", R_IAFTER) ; 1543 1544 # and a new record before record 1. 1545 $i = 1 ; 1546 $H->put($i, "New One", R_IBEFORE) ; 1547 1548 # delete record 3 1549 $H->del(3) ; 1550 1551 # now print the records in reverse order 1552 print "\nREVERSE\n" ; 1553 for ($i = $H->length - 1 ; $i >= 0 ; -- $i) 1554 { print "$i: $h[$i]\n" } 1555 1556 # same again, but use the API functions instead 1557 print "\nREVERSE again\n" ; 1558 my ($s, $k, $v) = (0, 0, 0) ; 1559 for ($s = $H->seq($k, $v, R_LAST) ; 1560 $s == 0 ; 1561 $s = $H->seq($k, $v, R_PREV)) 1562 { print "$k: $v\n" } 1563 1564 undef $H ; 1565 untie @h ; 1566 1567and this is what it outputs: 1568 1569 ORIGINAL 1570 0: zero 1571 1: one 1572 2: two 1573 3: three 1574 4: four 1575 1576 The last record was [four] 1577 The first record was [zero] 1578 1579 REVERSE 1580 5: last 1581 4: three 1582 3: Newbie 1583 2: one 1584 1: New One 1585 0: first 1586 1587 REVERSE again 1588 5: last 1589 4: three 1590 3: Newbie 1591 2: one 1592 1: New One 1593 0: first 1594 1595Notes: 1596 1597=over 5 1598 1599=item 1. 1600 1601Rather than iterating through the array, C<@h> like this: 1602 1603 foreach $i (@h) 1604 1605it is necessary to use either this: 1606 1607 foreach $i (0 .. $H->length - 1) 1608 1609or this: 1610 1611 for ($a = $H->get($k, $v, R_FIRST) ; 1612 $a == 0 ; 1613 $a = $H->get($k, $v, R_NEXT) ) 1614 1615=item 2. 1616 1617Notice that both times the C<put> method was used the record index was 1618specified using a variable, C<$i>, rather than the literal value 1619itself. This is because C<put> will return the record number of the 1620inserted line via that parameter. 1621 1622=back 1623 1624=head1 THE API INTERFACE 1625 1626As well as accessing Berkeley DB using a tied hash or array, it is also 1627possible to make direct use of most of the API functions defined in the 1628Berkeley DB documentation. 1629 1630To do this you need to store a copy of the object returned from the tie. 1631 1632 $db = tie %hash, "DB_File", "filename" ; 1633 1634Once you have done that, you can access the Berkeley DB API functions 1635as B<DB_File> methods directly like this: 1636 1637 $db->put($key, $value, R_NOOVERWRITE) ; 1638 1639B<Important:> If you have saved a copy of the object returned from 1640C<tie>, the underlying database file will I<not> be closed until both 1641the tied variable is untied and all copies of the saved object are 1642destroyed. 1643 1644 use DB_File ; 1645 $db = tie %hash, "DB_File", "filename" 1646 or die "Cannot tie filename: $!" ; 1647 ... 1648 undef $db ; 1649 untie %hash ; 1650 1651See L<The untie() Gotcha> for more details. 1652 1653All the functions defined in L<dbopen> are available except for 1654close() and dbopen() itself. The B<DB_File> method interface to the 1655supported functions have been implemented to mirror the way Berkeley DB 1656works whenever possible. In particular note that: 1657 1658=over 5 1659 1660=item * 1661 1662The methods return a status value. All return 0 on success. 1663All return -1 to signify an error and set C<$!> to the exact 1664error code. The return code 1 generally (but not always) means that the 1665key specified did not exist in the database. 1666 1667Other return codes are defined. See below and in the Berkeley DB 1668documentation for details. The Berkeley DB documentation should be used 1669as the definitive source. 1670 1671=item * 1672 1673Whenever a Berkeley DB function returns data via one of its parameters, 1674the equivalent B<DB_File> method does exactly the same. 1675 1676=item * 1677 1678If you are careful, it is possible to mix API calls with the tied 1679hash/array interface in the same piece of code. Although only a few of 1680the methods used to implement the tied interface currently make use of 1681the cursor, you should always assume that the cursor has been changed 1682any time the tied hash/array interface is used. As an example, this 1683code will probably not do what you expect: 1684 1685 $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE 1686 or die "Cannot tie $filename: $!" ; 1687 1688 # Get the first key/value pair and set the cursor 1689 $X->seq($key, $value, R_FIRST) ; 1690 1691 # this line will modify the cursor 1692 $count = scalar keys %x ; 1693 1694 # Get the second key/value pair. 1695 # oops, it didn't, it got the last key/value pair! 1696 $X->seq($key, $value, R_NEXT) ; 1697 1698The code above can be rearranged to get around the problem, like this: 1699 1700 $X = tie %x, 'DB_File', $filename, O_RDWR|O_CREAT, 0777, $DB_BTREE 1701 or die "Cannot tie $filename: $!" ; 1702 1703 # this line will modify the cursor 1704 $count = scalar keys %x ; 1705 1706 # Get the first key/value pair and set the cursor 1707 $X->seq($key, $value, R_FIRST) ; 1708 1709 # Get the second key/value pair. 1710 # worked this time. 1711 $X->seq($key, $value, R_NEXT) ; 1712 1713=back 1714 1715All the constants defined in L<dbopen> for use in the flags parameters 1716in the methods defined below are also available. Refer to the Berkeley 1717DB documentation for the precise meaning of the flags values. 1718 1719Below is a list of the methods available. 1720 1721=over 5 1722 1723=item B<$status = $X-E<gt>get($key, $value [, $flags]) ;> 1724 1725Given a key (C<$key>) this method reads the value associated with it 1726from the database. The value read from the database is returned in the 1727C<$value> parameter. 1728 1729If the key does not exist the method returns 1. 1730 1731No flags are currently defined for this method. 1732 1733=item B<$status = $X-E<gt>put($key, $value [, $flags]) ;> 1734 1735Stores the key/value pair in the database. 1736 1737If you use either the R_IAFTER or R_IBEFORE flags, the C<$key> parameter 1738will have the record number of the inserted key/value pair set. 1739 1740Valid flags are R_CURSOR, R_IAFTER, R_IBEFORE, R_NOOVERWRITE and 1741R_SETCURSOR. 1742 1743=item B<$status = $X-E<gt>del($key [, $flags]) ;> 1744 1745Removes all key/value pairs with key C<$key> from the database. 1746 1747A return code of 1 means that the requested key was not in the 1748database. 1749 1750R_CURSOR is the only valid flag at present. 1751 1752=item B<$status = $X-E<gt>fd ;> 1753 1754Returns the file descriptor for the underlying database. 1755 1756See L<Locking: The Trouble with fd> for an explanation for why you should 1757not use C<fd> to lock your database. 1758 1759=item B<$status = $X-E<gt>seq($key, $value, $flags) ;> 1760 1761This interface allows sequential retrieval from the database. See 1762L<dbopen> for full details. 1763 1764Both the C<$key> and C<$value> parameters will be set to the key/value 1765pair read from the database. 1766 1767The flags parameter is mandatory. The valid flag values are R_CURSOR, 1768R_FIRST, R_LAST, R_NEXT and R_PREV. 1769 1770=item B<$status = $X-E<gt>sync([$flags]) ;> 1771 1772Flushes any cached buffers to disk. 1773 1774R_RECNOSYNC is the only valid flag at present. 1775 1776=back 1777 1778=head1 DBM FILTERS 1779 1780A DBM Filter is a piece of code that is be used when you I<always> 1781want to make the same transformation to all keys and/or values in a 1782DBM database. 1783 1784There are four methods associated with DBM Filters. All work identically, 1785and each is used to install (or uninstall) a single DBM Filter. Each 1786expects a single parameter, namely a reference to a sub. The only 1787difference between them is the place that the filter is installed. 1788 1789To summarise: 1790 1791=over 5 1792 1793=item B<filter_store_key> 1794 1795If a filter has been installed with this method, it will be invoked 1796every time you write a key to a DBM database. 1797 1798=item B<filter_store_value> 1799 1800If a filter has been installed with this method, it will be invoked 1801every time you write a value to a DBM database. 1802 1803 1804=item B<filter_fetch_key> 1805 1806If a filter has been installed with this method, it will be invoked 1807every time you read a key from a DBM database. 1808 1809=item B<filter_fetch_value> 1810 1811If a filter has been installed with this method, it will be invoked 1812every time you read a value from a DBM database. 1813 1814=back 1815 1816You can use any combination of the methods, from none, to all four. 1817 1818All filter methods return the existing filter, if present, or C<undef> 1819in not. 1820 1821To delete a filter pass C<undef> to it. 1822 1823=head2 The Filter 1824 1825When each filter is called by Perl, a local copy of C<$_> will contain 1826the key or value to be filtered. Filtering is achieved by modifying 1827the contents of C<$_>. The return code from the filter is ignored. 1828 1829=head2 An Example -- the NULL termination problem. 1830 1831Consider the following scenario. You have a DBM database 1832that you need to share with a third-party C application. The C application 1833assumes that I<all> keys and values are NULL terminated. Unfortunately 1834when Perl writes to DBM databases it doesn't use NULL termination, so 1835your Perl application will have to manage NULL termination itself. When 1836you write to the database you will have to use something like this: 1837 1838 $hash{"$key\0"} = "$value\0" ; 1839 1840Similarly the NULL needs to be taken into account when you are considering 1841the length of existing keys/values. 1842 1843It would be much better if you could ignore the NULL terminations issue 1844in the main application code and have a mechanism that automatically 1845added the terminating NULL to all keys and values whenever you write to 1846the database and have them removed when you read from the database. As I'm 1847sure you have already guessed, this is a problem that DBM Filters can 1848fix very easily. 1849 1850 use warnings ; 1851 use strict ; 1852 use DB_File ; 1853 1854 my %hash ; 1855 my $filename = "filt" ; 1856 unlink $filename ; 1857 1858 my $db = tie %hash, 'DB_File', $filename, O_CREAT|O_RDWR, 0666, $DB_HASH 1859 or die "Cannot open $filename: $!\n" ; 1860 1861 # Install DBM Filters 1862 $db->filter_fetch_key ( sub { s/\0$// } ) ; 1863 $db->filter_store_key ( sub { $_ .= "\0" } ) ; 1864 $db->filter_fetch_value( sub { s/\0$// } ) ; 1865 $db->filter_store_value( sub { $_ .= "\0" } ) ; 1866 1867 $hash{"abc"} = "def" ; 1868 my $a = $hash{"ABC"} ; 1869 # ... 1870 undef $db ; 1871 untie %hash ; 1872 1873Hopefully the contents of each of the filters should be 1874self-explanatory. Both "fetch" filters remove the terminating NULL, 1875and both "store" filters add a terminating NULL. 1876 1877 1878=head2 Another Example -- Key is a C int. 1879 1880Here is another real-life example. By default, whenever Perl writes to 1881a DBM database it always writes the key and value as strings. So when 1882you use this: 1883 1884 $hash{12345} = "something" ; 1885 1886the key 12345 will get stored in the DBM database as the 5 byte string 1887"12345". If you actually want the key to be stored in the DBM database 1888as a C int, you will have to use C<pack> when writing, and C<unpack> 1889when reading. 1890 1891Here is a DBM Filter that does it: 1892 1893 use warnings ; 1894 use strict ; 1895 use DB_File ; 1896 my %hash ; 1897 my $filename = "filt" ; 1898 unlink $filename ; 1899 1900 1901 my $db = tie %hash, 'DB_File', $filename, O_CREAT|O_RDWR, 0666, $DB_HASH 1902 or die "Cannot open $filename: $!\n" ; 1903 1904 $db->filter_fetch_key ( sub { $_ = unpack("i", $_) } ) ; 1905 $db->filter_store_key ( sub { $_ = pack ("i", $_) } ) ; 1906 $hash{123} = "def" ; 1907 # ... 1908 undef $db ; 1909 untie %hash ; 1910 1911This time only two filters have been used -- we only need to manipulate 1912the contents of the key, so it wasn't necessary to install any value 1913filters. 1914 1915=head1 HINTS AND TIPS 1916 1917 1918=head2 Locking: The Trouble with fd 1919 1920Until version 1.72 of this module, the recommended technique for locking 1921B<DB_File> databases was to flock the filehandle returned from the "fd" 1922function. Unfortunately this technique has been shown to be fundamentally 1923flawed (Kudos to David Harris for tracking this down). Use it at your own 1924peril! 1925 1926The locking technique went like this. 1927 1928 $db = tie(%db, 'DB_File', 'foo.db', O_CREAT|O_RDWR, 0644) 1929 || die "dbcreat foo.db $!"; 1930 $fd = $db->fd; 1931 open(DB_FH, "+<&=$fd") || die "dup $!"; 1932 flock (DB_FH, LOCK_EX) || die "flock: $!"; 1933 ... 1934 $db{"Tom"} = "Jerry" ; 1935 ... 1936 flock(DB_FH, LOCK_UN); 1937 undef $db; 1938 untie %db; 1939 close(DB_FH); 1940 1941In simple terms, this is what happens: 1942 1943=over 5 1944 1945=item 1. 1946 1947Use "tie" to open the database. 1948 1949=item 2. 1950 1951Lock the database with fd & flock. 1952 1953=item 3. 1954 1955Read & Write to the database. 1956 1957=item 4. 1958 1959Unlock and close the database. 1960 1961=back 1962 1963Here is the crux of the problem. A side-effect of opening the B<DB_File> 1964database in step 2 is that an initial block from the database will get 1965read from disk and cached in memory. 1966 1967To see why this is a problem, consider what can happen when two processes, 1968say "A" and "B", both want to update the same B<DB_File> database 1969using the locking steps outlined above. Assume process "A" has already 1970opened the database and has a write lock, but it hasn't actually updated 1971the database yet (it has finished step 2, but not started step 3 yet). Now 1972process "B" tries to open the same database - step 1 will succeed, 1973but it will block on step 2 until process "A" releases the lock. The 1974important thing to notice here is that at this point in time both 1975processes will have cached identical initial blocks from the database. 1976 1977Now process "A" updates the database and happens to change some of the 1978data held in the initial buffer. Process "A" terminates, flushing 1979all cached data to disk and releasing the database lock. At this point 1980the database on disk will correctly reflect the changes made by process 1981"A". 1982 1983With the lock released, process "B" can now continue. It also updates the 1984database and unfortunately it too modifies the data that was in its 1985initial buffer. Once that data gets flushed to disk it will overwrite 1986some/all of the changes process "A" made to the database. 1987 1988The result of this scenario is at best a database that doesn't contain 1989what you expect. At worst the database will corrupt. 1990 1991The above won't happen every time competing process update the same 1992B<DB_File> database, but it does illustrate why the technique should 1993not be used. 1994 1995=head2 Safe ways to lock a database 1996 1997Starting with version 2.x, Berkeley DB has internal support for locking. 1998The companion module to this one, B<BerkeleyDB>, provides an interface 1999to this locking functionality. If you are serious about locking 2000Berkeley DB databases, I strongly recommend using B<BerkeleyDB>. 2001 2002If using B<BerkeleyDB> isn't an option, there are a number of modules 2003available on CPAN that can be used to implement locking. Each one 2004implements locking differently and has different goals in mind. It is 2005therefore worth knowing the difference, so that you can pick the right 2006one for your application. Here are the three locking wrappers: 2007 2008=over 5 2009 2010=item B<Tie::DB_Lock> 2011 2012A B<DB_File> wrapper which creates copies of the database file for 2013read access, so that you have a kind of a multiversioning concurrent read 2014system. However, updates are still serial. Use for databases where reads 2015may be lengthy and consistency problems may occur. 2016 2017=item B<Tie::DB_LockFile> 2018 2019A B<DB_File> wrapper that has the ability to lock and unlock the database 2020while it is being used. Avoids the tie-before-flock problem by simply 2021re-tie-ing the database when you get or drop a lock. Because of the 2022flexibility in dropping and re-acquiring the lock in the middle of a 2023session, this can be massaged into a system that will work with long 2024updates and/or reads if the application follows the hints in the POD 2025documentation. 2026 2027=item B<DB_File::Lock> 2028 2029An extremely lightweight B<DB_File> wrapper that simply flocks a lockfile 2030before tie-ing the database and drops the lock after the untie. Allows 2031one to use the same lockfile for multiple databases to avoid deadlock 2032problems, if desired. Use for databases where updates are reads are 2033quick and simple flock locking semantics are enough. 2034 2035=back 2036 2037=head2 Sharing Databases With C Applications 2038 2039There is no technical reason why a Berkeley DB database cannot be 2040shared by both a Perl and a C application. 2041 2042The vast majority of problems that are reported in this area boil down 2043to the fact that C strings are NULL terminated, whilst Perl strings are 2044not. See L<DBM FILTERS> for a generic way to work around this problem. 2045 2046Here is a real example. Netscape 2.0 keeps a record of the locations you 2047visit along with the time you last visited them in a DB_HASH database. 2048This is usually stored in the file F<~/.netscape/history.db>. The key 2049field in the database is the location string and the value field is the 2050time the location was last visited stored as a 4 byte binary value. 2051 2052If you haven't already guessed, the location string is stored with a 2053terminating NULL. This means you need to be careful when accessing the 2054database. 2055 2056Here is a snippet of code that is loosely based on Tom Christiansen's 2057I<ggh> script (available from your nearest CPAN archive in 2058F<authors/id/TOMC/scripts/nshist.gz>). 2059 2060 use warnings ; 2061 use strict ; 2062 use DB_File ; 2063 use Fcntl ; 2064 2065 my ($dotdir, $HISTORY, %hist_db, $href, $binary_time, $date) ; 2066 $dotdir = $ENV{HOME} || $ENV{LOGNAME}; 2067 2068 $HISTORY = "$dotdir/.netscape/history.db"; 2069 2070 tie %hist_db, 'DB_File', $HISTORY 2071 or die "Cannot open $HISTORY: $!\n" ;; 2072 2073 # Dump the complete database 2074 while ( ($href, $binary_time) = each %hist_db ) { 2075 2076 # remove the terminating NULL 2077 $href =~ s/\x00$// ; 2078 2079 # convert the binary time into a user friendly string 2080 $date = localtime unpack("V", $binary_time); 2081 print "$date $href\n" ; 2082 } 2083 2084 # check for the existence of a specific key 2085 # remember to add the NULL 2086 if ( $binary_time = $hist_db{"http://mox.perl.com/\x00"} ) { 2087 $date = localtime unpack("V", $binary_time) ; 2088 print "Last visited mox.perl.com on $date\n" ; 2089 } 2090 else { 2091 print "Never visited mox.perl.com\n" 2092 } 2093 2094 untie %hist_db ; 2095 2096=head2 The untie() Gotcha 2097 2098If you make use of the Berkeley DB API, it is I<very> strongly 2099recommended that you read L<perltie/The untie Gotcha>. 2100 2101Even if you don't currently make use of the API interface, it is still 2102worth reading it. 2103 2104Here is an example which illustrates the problem from a B<DB_File> 2105perspective: 2106 2107 use DB_File ; 2108 use Fcntl ; 2109 2110 my %x ; 2111 my $X ; 2112 2113 $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_TRUNC 2114 or die "Cannot tie first time: $!" ; 2115 2116 $x{123} = 456 ; 2117 2118 untie %x ; 2119 2120 tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT 2121 or die "Cannot tie second time: $!" ; 2122 2123 untie %x ; 2124 2125When run, the script will produce this error message: 2126 2127 Cannot tie second time: Invalid argument at bad.file line 14. 2128 2129Although the error message above refers to the second tie() statement 2130in the script, the source of the problem is really with the untie() 2131statement that precedes it. 2132 2133Having read L<perltie> you will probably have already guessed that the 2134error is caused by the extra copy of the tied object stored in C<$X>. 2135If you haven't, then the problem boils down to the fact that the 2136B<DB_File> destructor, DESTROY, will not be called until I<all> 2137references to the tied object are destroyed. Both the tied variable, 2138C<%x>, and C<$X> above hold a reference to the object. The call to 2139untie() will destroy the first, but C<$X> still holds a valid 2140reference, so the destructor will not get called and the database file 2141F<tst.fil> will remain open. The fact that Berkeley DB then reports the 2142attempt to open a database that is already open via the catch-all 2143"Invalid argument" doesn't help. 2144 2145If you run the script with the C<-w> flag the error message becomes: 2146 2147 untie attempted while 1 inner references still exist at bad.file line 12. 2148 Cannot tie second time: Invalid argument at bad.file line 14. 2149 2150which pinpoints the real problem. Finally the script can now be 2151modified to fix the original problem by destroying the API object 2152before the untie: 2153 2154 ... 2155 $x{123} = 456 ; 2156 2157 undef $X ; 2158 untie %x ; 2159 2160 $X = tie %x, 'DB_File', 'tst.fil' , O_RDWR|O_CREAT 2161 ... 2162 2163 2164=head1 COMMON QUESTIONS 2165 2166=head2 Why is there Perl source in my database? 2167 2168If you look at the contents of a database file created by DB_File, 2169there can sometimes be part of a Perl script included in it. 2170 2171This happens because Berkeley DB uses dynamic memory to allocate 2172buffers which will subsequently be written to the database file. Being 2173dynamic, the memory could have been used for anything before DB 2174malloced it. As Berkeley DB doesn't clear the memory once it has been 2175allocated, the unused portions will contain random junk. In the case 2176where a Perl script gets written to the database, the random junk will 2177correspond to an area of dynamic memory that happened to be used during 2178the compilation of the script. 2179 2180Unless you don't like the possibility of there being part of your Perl 2181scripts embedded in a database file, this is nothing to worry about. 2182 2183=head2 How do I store complex data structures with DB_File? 2184 2185Although B<DB_File> cannot do this directly, there is a module which 2186can layer transparently over B<DB_File> to accomplish this feat. 2187 2188Check out the MLDBM module, available on CPAN in the directory 2189F<modules/by-module/MLDBM>. 2190 2191=head2 What does "Invalid Argument" mean? 2192 2193You will get this error message when one of the parameters in the 2194C<tie> call is wrong. Unfortunately there are quite a few parameters to 2195get wrong, so it can be difficult to figure out which one it is. 2196 2197Here are a couple of possibilities: 2198 2199=over 5 2200 2201=item 1. 2202 2203Attempting to reopen a database without closing it. 2204 2205=item 2. 2206 2207Using the O_WRONLY flag. 2208 2209=back 2210 2211=head2 What does "Bareword 'DB_File' not allowed" mean? 2212 2213You will encounter this particular error message when you have the 2214C<strict 'subs'> pragma (or the full strict pragma) in your script. 2215Consider this script: 2216 2217 use warnings ; 2218 use strict ; 2219 use DB_File ; 2220 my %x ; 2221 tie %x, DB_File, "filename" ; 2222 2223Running it produces the error in question: 2224 2225 Bareword "DB_File" not allowed while "strict subs" in use 2226 2227To get around the error, place the word C<DB_File> in either single or 2228double quotes, like this: 2229 2230 tie %x, "DB_File", "filename" ; 2231 2232Although it might seem like a real pain, it is really worth the effort 2233of having a C<use strict> in all your scripts. 2234 2235=head1 REFERENCES 2236 2237Articles that are either about B<DB_File> or make use of it. 2238 2239=over 5 2240 2241=item 1. 2242 2243I<Full-Text Searching in Perl>, Tim Kientzle (tkientzle@ddj.com), 2244Dr. Dobb's Journal, Issue 295, January 1999, pp 34-41 2245 2246=back 2247 2248=head1 HISTORY 2249 2250Moved to the Changes file. 2251 2252=head1 BUGS 2253 2254Some older versions of Berkeley DB had problems with fixed length 2255records using the RECNO file format. This problem has been fixed since 2256version 1.85 of Berkeley DB. 2257 2258I am sure there are bugs in the code. If you do find any, or can 2259suggest any enhancements, I would welcome your comments. 2260 2261=head1 AVAILABILITY 2262 2263B<DB_File> comes with the standard Perl source distribution. Look in 2264the directory F<ext/DB_File>. Given the amount of time between releases 2265of Perl the version that ships with Perl is quite likely to be out of 2266date, so the most recent version can always be found on CPAN (see 2267L<perlmodlib/CPAN> for details), in the directory 2268F<modules/by-module/DB_File>. 2269 2270This version of B<DB_File> will work with either version 1.x, 2.x or 22713.x of Berkeley DB, but is limited to the functionality provided by 2272version 1. 2273 2274The official web site for Berkeley DB is F<http://www.oracle.com/technology/products/berkeley-db/db/index.html>. 2275All versions of Berkeley DB are available there. 2276 2277Alternatively, Berkeley DB version 1 is available at your nearest CPAN 2278archive in F<src/misc/db.1.85.tar.gz>. 2279 2280=head1 COPYRIGHT 2281 2282Copyright (c) 1995-2012 Paul Marquess. All rights reserved. This program 2283is free software; you can redistribute it and/or modify it under the 2284same terms as Perl itself. 2285 2286Although B<DB_File> is covered by the Perl license, the library it 2287makes use of, namely Berkeley DB, is not. Berkeley DB has its own 2288copyright and its own license. Please take the time to read it. 2289 2290Here are are few words taken from the Berkeley DB FAQ (at 2291F<http://www.oracle.com/technology/products/berkeley-db/db/index.html>) regarding the license: 2292 2293 Do I have to license DB to use it in Perl scripts? 2294 2295 No. The Berkeley DB license requires that software that uses 2296 Berkeley DB be freely redistributable. In the case of Perl, that 2297 software is Perl, and not your scripts. Any Perl scripts that you 2298 write are your property, including scripts that make use of 2299 Berkeley DB. Neither the Perl license nor the Berkeley DB license 2300 place any restriction on what you may do with them. 2301 2302If you are in any doubt about the license situation, contact either the 2303Berkeley DB authors or the author of DB_File. See L<"AUTHOR"> for details. 2304 2305 2306=head1 SEE ALSO 2307 2308L<perl>, L<dbopen(3)>, L<hash(3)>, L<recno(3)>, L<btree(3)>, 2309L<perldbmfilter> 2310 2311=head1 AUTHOR 2312 2313The DB_File interface was written by Paul Marquess 2314E<lt>pmqs@cpan.orgE<gt>. 2315 2316=cut 2317