1# ---------------------------------------------------------------------- 2# NAME : BibTeX/Entry.pm 3# CLASSES : Text::BibTeX::Entry 4# RELATIONS : base class for Text::BibTeX::StructuredEntry, and 5# ultimately for all user-supplied structured entry classes 6# DESCRIPTION: Provides an object-oriented interface to BibTeX entries. 7# CREATED : March 1997, Greg Ward 8# MODIFIED : 9# VERSION : $Id$ 10# COPYRIGHT : Copyright (c) 1997-2000 by Gregory P. Ward. All rights 11# reserved. 12# 13# This file is part of the Text::BibTeX library. This 14# library is free software; you may redistribute it and/or 15# modify it under the same terms as Perl itself. 16# ---------------------------------------------------------------------- 17package Text::BibTeX::Entry; 18 19require 5.004; # for isa, and delete on a slice 20 21use strict; 22use vars qw'$VERSION'; 23use Carp; 24use Text::BibTeX qw(:metatypes :nodetypes); 25 26$VERSION = 0.88; 27 28=head1 NAME 29 30Text::BibTeX::Entry - read and parse BibTeX files 31 32=head1 SYNOPSIS 33 34 use Text::BibTeX::Entry; 35 36 # ...assuming that $bibfile and $newbib are both objects of class 37 # Text::BibTeX::File, opened for reading and writing (respectively): 38 39 # Entry creation/parsing methods: 40 $entry = Text::BibTeX::Entry->new(); 41 $entry->read ($bibfile); 42 $entry->parse ($filename, $filehandle); 43 $entry->parse_s ($entry_text); 44 45 # or: 46 $entry = Text::BibTeX::Entry->new( $bibfile ); 47 $entry = Text::BibTeX::Entry->new( $filename, $filehandle ); 48 $entry = Text::BibTeX::Entry->new( $entry_text ); 49 50 # Entry query methods 51 warn "error in input" unless $entry->parse_ok; 52 $metatype = $entry->metatype; 53 $type = $entry->type; 54 55 # if metatype is BTE_REGULAR or BTE_MACRODEF: 56 $key = $entry->key; # only for BTE_REGULAR metatype 57 $num_fields = $entry->num_fields; 58 @fieldlist = $entry->fieldlist; 59 $has_title = $entry->exists ('title'); 60 $title = $entry->get ('title'); 61 # or: 62 ($val1,$val2,...$valn) = $entry->get ($field1, $field2, ..., $fieldn); 63 64 # if metatype is BTE_COMMENT or BTE_PREAMBLE: 65 $value = $entry->value; 66 67 # Author name methods 68 @authors = $entry->split ('author'); 69 ($first_author) = $entry->names ('author'); 70 71 # Entry modification methods 72 $entry->set_type ($new_type); 73 $entry->set_key ($new_key); 74 $entry->set ('title', $new_title); 75 # or: 76 $entry->set ($field1, $val1, $field2, $val2, ..., $fieldn, $valn); 77 $entry->delete (@fields); 78 $entry->set_fieldlist (\@fieldlist); 79 80 # Entry output methods 81 $entry->write ($newbib); 82 $entry->print ($filehandle); 83 $entry_text = $entry->print_s; 84 85 # Reset internal parser state: 86 $entry = Text::BibTeX::Entry->new(); 87 $entry->parse ($filename, undef); 88 $entry->parse_s (undef); 89 90 # or: 91 $entry = Text::BibTeX::Entry->new( $filename, undef ); 92 $entry = Text::BibTeX::Entry->new( undef ); 93 94 # Miscellaneous methods 95 $entry->warn ($entry_warning); 96 # or: 97 $entry->warn ($field_warning, $field); 98 $entry->clone; 99 100=head1 DESCRIPTION 101 102C<Text::BibTeX::Entry> does all the real work of reading and parsing 103BibTeX files. (Well, actually it just provides an object-oriented Perl 104front-end to a C library that does all that. But that's not important 105right now.) 106 107BibTeX entries can be read either from C<Text::BibTeX::File> objects (using 108the C<read> method), or directly from a filehandle (using the C<parse> 109method), or from a string (using C<parse_s>). The first is preferable, 110since you don't have to worry about supplying the filename, and because of 111the extra functionality provided by the C<Text::BibTeX::File> class. 112Currently, this means that you may specify the I<database structure> to 113which entries are expected to conform via the C<File> class. This lets you 114ensure that entries follow the rules for required fields and mutually 115constrained fields for a particular type of database, and also gives you 116access to all the methods of the I<structured entry class> for this 117database structure. See L<Text::BibTeX::Structure> for details on database 118structures. 119 120Once you have the entry, you can query it or change it in a variety of 121ways. The query methods are C<parse_ok>, C<type>, C<key>, C<num_fields>, 122C<fieldlist>, C<exists>, and C<get>. Methods for changing the entry are 123C<set_type>, C<set_key>, C<set_fieldlist>, C<delete>, and C<set>. 124 125Finally, you can output BibTeX entries, again either to an open 126C<Text::BibTeX::File> object, a filehandle or a string. (A filehandle or 127C<File> object must, of course, have been opened in write mode.) Output to 128a C<File> object is done with the C<write> method, to a filehandle via 129C<print>, and to a string with C<print_s>. Using the C<File> class is 130recommended for future extensibility, although it currently doesn't offer 131anything extra. 132 133=head1 METHODS 134 135=head2 Entry creation/parsing methods 136 137=over 4 138 139=item new ([OPTS ,] [SOURCE]) 140 141Creates a new C<Text::BibTeX::Entry> object. If the SOURCE parameter is 142supplied, it must be one of the following: a C<Text::BibTeX::File> (or 143descendant class) object, a filename/filehandle pair, or a string. Calls 144C<read> to read from a C<Text::BibTeX::File> object, C<parse> to read from 145a filehandle, and C<parse_s> to read from a string. 146 147A filehandle can be specified as a GLOB reference, or as an 148C<IO::Handle> (or descendants) object, or as a C<FileHandle> (or 149descendants) object. (But there's really no point in using 150C<FileHandle> objects, since C<Text::BibTeX> requires Perl 5.004, which 151always includes the C<IO> modules.) You can I<not> pass in the name of 152a filehandle as a string, though, because C<Text::BibTeX::Entry> 153conforms to the C<use strict> pragma (which disallows such symbolic 154references). 155 156The corresponding filename should be supplied in order to allow for 157accurate error messages; if you simply don't have the filename, you can 158pass C<undef> and you'll get error messages without a filename. (It's 159probably better to rearrange your code so that the filename is 160available, though.) 161 162Thus, the following are equivalent to read from a file named by 163C<$filename> (error handling ignored): 164 165 # good ol' fashioned filehandle and GLOB ref 166 open (BIBFILE, $filename); 167 $entry = Text::BibTeX::Entry->new($filename, \*BIBFILE); 168 169 # newfangled IO::File thingy 170 $file = IO::File->new($filename); 171 $entry = Text::BibTeX::Entry->new($filename, $file); 172 173But using a C<Text::BibTeX::File> object is simpler and preferred: 174 175 $file = Text::BibTeX::File->new($filename); 176 $entry = Text::BibTeX::Entry->new($file); 177 178Returns the new object, unless SOURCE is supplied and reading/parsing 179the entry fails (e.g., due to end of file) -- then it returns false. 180 181You may supply a reference to an option hash as first argument. 182Supported options are: 183 184=over 4 185 186=item BINMODE 187 188Set the way Text::BibTeX deals with strings. By default it manages 189strings as bytes. You can set BINMODE to 'utf-8' to get NFC normalized 190 191Text::BibTeX::Entry->new( 192 { binmode => 'utf-8', normalization => 'NFD' }, 193 $file }); 194 195 196=item NORMALIZATION 197 198UTF-8 strings and you can customise the normalization with the NORMALIZATION option. 199 200=back 201 202 203=cut 204 205sub new 206{ 207 my ($class, @source) = @_; 208 209 $class = ref ($class) || $class; 210 211 my $self = {'file' => undef, 212 'type' => undef, 213 'key' => undef, 214 'status' => undef, 215 'metatype' => undef, 216 'fields' => [], 217 'values' => {}}; 218 bless $self, $class; 219 220 my $opts = {}; 221 $opts = shift @source if scalar(@source) and ref $source[0] eq "HASH"; 222 $opts->{ lc $_ } = $opts->{$_} for ( keys %$opts ); 223 $self->{binmode} = 'utf-8' 224 if exists $opts->{binmode} && $opts->{binmode} =~ /utf-?8/i; 225 $self->{normalization} = $opts->{normalization} if exists $opts->{normalization}; 226 227 if (@source) 228 { 229 my $status; 230 231 if (@source == 1 && ref($source[0]) && $source[0]->isa ('Text::BibTeX::File')) 232 { 233 my $file = $source[0]; 234 $status = $self->read ($file); 235 if (my $structure = $file->structure) 236 { 237 $self->{structure} = $structure; 238 bless $self, $structure->entry_class; 239 } 240 } 241 elsif (@source == 2 && (defined ($source[0]) && ! ref ($source[0])) && (!defined ($source[1]) || fileno ($source[1]) >= 0)) 242 { $status = $self->parse ($source[0], $source[1]) } 243 elsif (@source == 1 && ! ref ($source[0])) 244 { $status = $self->parse_s ($source[0]) } 245 else 246 { croak "new: source argument must be either a Text::BibTeX::File " . 247 "(or descendant) object, filename/filehandle pair, or " . 248 "a string"; } 249 250 return $status unless $status; # parse failed -- tell our caller 251 } 252 $self; 253} 254 255=item clone 256 257Clone a Text::BibTeX::Entry object, returning the clone. This re-uses the reference to any 258Text::BibTeX::Structure or Text::BibTeX::File but copies everything else, 259so that the clone can be modified apart from the original. 260 261=cut 262 263sub clone 264{ 265 my $self = shift; 266 my $clone = {}; 267 # Use the same structure object - won't be changed 268 if ($self->{structure}) { 269 $clone->{structure} = $self->{structure}; 270 } 271 # Use the same file object - won't be changed 272 if ($self->{file}) { 273 $clone->{file} = $self->{file} 274 } 275 # These might be changed so make copies 276 $clone->{binmode} = $self->{binmode}; 277 $clone->{normalization} = $self->{normalization}; 278 $clone->{type} = $self->{type}; 279 $clone->{key} = $self->{key}; 280 $clone->{status} = $self->{status}; 281 $clone->{metatype} = $self->{metatype}; 282 $clone->{fields} = [ map {$_} @{$self->{fields}} ]; 283 while (my ($k, $v) = each %{$self->{values}}) { 284 $clone->{values}{$k} = $v; 285 } 286 while (my ($k, $v) = each %{$self->{lines}}) { 287 $clone->{lines}{$k} = $v; 288 } 289 bless $clone, ref($self); 290 return $clone; 291} 292 293=item read (BIBFILE) 294 295Reads and parses an entry from BIBFILE, which must be a 296C<Text::BibTeX::File> object (or descendant). The next entry will be read 297from the file associated with that object. 298 299Returns the same as C<parse> (or C<parse_s>): false if no entry found 300(e.g., at end-of-file), true otherwise. To see if the parse itself failed 301(due to errors in the input), call the C<parse_ok> method. 302 303=cut 304 305sub read 306{ 307 my ($self, $source, $preserve) = @_; 308 croak "`source' argument must be ref to open Text::BibTeX::File " . 309 "(or descendant) object" 310 unless ($source->isa('Text::BibTeX::File')); 311 312 my $fn = $source->{'filename'}; 313 my $fh = $source->{'handle'}; 314 $self->{'file'} = $source; # store File object for later use 315 ## Propagate flags 316 for my $f (qw.binmode normalization.) { 317 $self->{$f} = $source->{$f} unless exists $self->{$f}; 318 } 319 return $self->parse ($fn, $fh, $preserve); 320} 321 322 323=item parse (FILENAME, FILEHANDLE) 324 325Reads and parses the next entry from FILEHANDLE. (That is, it scans the 326input until an '@' sign is seen, and then slurps up to the next '@' 327sign. Everything between the two '@' signs [including the first one, 328but not the second one -- it's pushed back onto the input stream for the 329next entry] is parsed as a BibTeX entry, with the simultaneous 330construction of an abstract syntax tree [AST]. The AST is traversed to 331ferret out the most interesting information, and this is stuffed into a 332Perl hash, which coincidentally is the C<Text::BibTeX::Entry> object 333you've been tossing around. But you don't need to know any of that -- I 334just figured if you've read this far, you might want to know something 335about the inner workings of this module.) 336 337The success of the parse is stored internally so that you can later 338query it with the C<parse_ok> method. Even in the presence of syntax 339errors, you'll usually get something resembling your input, but it's 340usually not wise to try to do anything with it. Just call C<parse_ok>, 341and if it returns false then silently skip to the next entry. (The 342error messages printed out by the parser should be quite adequate for 343the user to figure out what's wrong. And no, there's currently no way 344for you to capture or redirect those error messages -- they're always 345printed to C<stderr> by the underlying C code. That should change in 346future releases.) 347 348If no '@' signs are seen on the input before reaching end-of-file, then 349we've exhausted all the entries in the file, and C<parse> returns a 350false value. Otherwise, it returns a true value -- even if there were 351syntax errors. Hence, it's important to check C<parse_ok>. 352 353The FILENAME parameter is only used for generating error messages, but 354anybody using your program will certainly appreciate your setting it 355correctly! 356 357Passing C<undef> to FILEHANDLE will reset the state of the underlying 358C parser, which is required in order to parse multiple files. 359 360=item parse_s (TEXT) 361 362Parses a BibTeX entry (using the above rules) from the string TEXT. The 363string is not modified; repeatedly calling C<parse_s> with the same string 364will give you the same results each time. Thus, there's no point in 365putting multiple entries in one string. 366 367Passing C<undef> to TEXT will reset the state of the underlying 368C parser, which may be required in order to parse multiple strings. 369 370=back 371 372=cut 373 374sub _preserve 375{ 376 my ($self, $preserve) = @_; 377 378 $preserve = $self->{'file'}->preserve_values 379 if ! defined $preserve && 380 defined $self->{'file'} && 381 $self->{'file'}->isa ('Text::BibTeX::File'); 382 require Text::BibTeX::Value if $preserve; 383 $preserve; 384} 385 386sub parse 387{ 388 my ($self, $filename, $filehandle, $preserve) = @_; 389 390 $preserve = $self->_preserve ($preserve); 391 if (defined $filehandle) { 392 _parse ($self, $filename, $filehandle, $preserve); 393 } else { 394 _reset_parse (); 395 } 396} 397 398 399sub parse_s 400{ 401 my ($self, $text, $preserve) = @_; 402 403 $preserve = $self->_preserve ($preserve); 404 if (defined $text) { 405 _parse_s ($self, $text, $preserve); 406 } else { 407 _reset_parse_s (); 408 } 409} 410 411 412=head2 Entry query methods 413 414=over 4 415 416=item parse_ok () 417 418Returns false if there were any serious errors encountered while parsing 419the entry. (A "serious" error is a lexical or syntax error; currently, 420warnings such as "undefined macro" result in an error message being 421printed to C<stderr> for the user's edification, but no notice is 422available to the calling code.) 423 424=item type () 425 426Returns the type of the entry. (The `type' is the word that follows the 427'@' sign; e.g. `article', `book', `inproceedings', etc. for the standard 428BibTeX styles.) 429 430=item metatype () 431 432Returns the metatype of the entry. (The `metatype' is a numeric value used 433to classify entry types into four groups: comment, preamble, macro 434definition (C<@string> entries), and regular (all other entry types). 435C<Text::BibTeX> exports four constants for these metatypes: C<BTE_COMMENT>, 436C<BTE_PREAMBLE>, C<BTE_MACRODEF>, and C<BTE_REGULAR>.) 437 438=item key () 439 440Returns the key of the entry. (The key is the token immediately 441following the opening `{' or `(' in "regular" entries. Returns C<undef> 442for entries that don't have a key, such as macro definition (C<@string>) 443entries.) 444 445=item num_fields () 446 447Returns the number of fields in the entry. (Note that, currently, this is 448I<not> equivalent to putting C<scalar> in front of a call to C<fieldlist>. 449See below for the consequences of calling C<fieldlist> in a scalar 450context.) 451 452=item fieldlist () 453 454Returns the list of fields in the entry. 455 456B<WARNING> In scalar context, it no longer returns a 457reference to the object's own list of fields. 458 459=cut 460 461sub parse_ok { shift->{'status'}; } 462 463sub metatype { 464 my $self = shift; 465 Text::BibTeX->_process_result( $self->{'metatype'}, $self->{binmode}, $self->{normalization} ); 466} 467 468sub type { 469 my $self = shift; 470 Text::BibTeX->_process_result( $self->{'type'}, $self->{binmode}, $self->{normalization} ); 471} 472 473sub key { 474 my $self = shift; 475 exists $self->{key} 476 ? Text::BibTeX->_process_result($self->{key}, $self->{binmode}, $self->{normalization}) 477 : undef; 478} 479 480sub num_fields { scalar @{shift->{'fields'}}; } 481 482sub fieldlist { 483 my $self = shift; 484 return map { Text::BibTeX->_process_result($_, $self->{binmode}, $self->{normalization})} @{$self->{'fields'}}; 485} 486 487=item exists (FIELD) 488 489Returns true if a field named FIELD is present in the entry, false 490otherwise. 491 492=item get (FIELD, ...) 493 494Returns the value of one or more FIELDs, as a list of values. For example: 495 496 $author = $entry->get ('author'); 497 ($author, $editor) = $entry->get ('author', 'editor'); 498 499If a FIELD is not present in the entry, C<undef> will be returned at its 500place in the return list. However, you can't completely trust this as a 501test for presence or absence of a field; it is possible for a field to be 502present but undefined. Currently this can only happen due to certain 503syntax errors in the input, or if you pass an undefined value to C<set>, or 504if you create a new field with C<set_fieldlist> (the new field's value is 505implicitly set to C<undef>). 506 507Normally, the field value is what the input looks like after "maximal 508processing"--quote characters are removed, whitespace is collapsed (the 509same way that BibTeX itself does it), macros are expanded, and multiple 510tokens are pasted together. (See L<bt_postprocess> for details on the 511post-processing performed by B<btparse>.) 512 513For example, if your input file has the following: 514 515 @string{of = "of"} 516 @string{foobars = "Foobars"} 517 518 @article{foobar, 519 title = { The Mating Habits } # of # " Adult " # foobars 520 } 521 522then using C<get> to query the value of the C<title> field from the 523C<foobar> entry would give the string "The Mating Habits of Adult Foobars". 524 525However, in certain circumstances you may wish to preserve the values as 526they appear in the input. This is done by setting a C<preserve_values> 527flag at some point; then, C<get> will return not strings but 528C<Text::BibTeX::Value> objects. Each C<Value> object is a list of 529C<Text::BibTeX::SimpleValue> objects, which in turn consists of a simple 530value type (string, macro, or number) and the text of the simple value. 531Various ways to set the C<preserve_values> flag and the interface to 532both C<Value> and C<SimpleValue> objects are described in 533L<Text::BibTeX::Value>. 534 535=item value () 536 537Returns the single string associated with C<@comment> and C<@preamble> 538entries. For instance, the entry 539 540 @preamble{" This is a preamble" # 541 {---the concatenation of several strings}} 542 543would return a value of "This is a preamble---the concatenation of 544several strings". 545 546If this entry was parsed in "value preservation" mode, then C<value> 547acts like C<get>, and returns a C<Value> object rather than a simple 548string. 549 550=back 551 552=cut 553 554sub exists 555{ 556 my ($self, $field) = @_; 557 558 exists $self->{values}{Text::BibTeX->_process_argument($field, $self->{binmode}, $self->{normalization})}; 559} 560 561sub get 562{ 563 my ($self, @fields) = @_; 564 565 my @x = @{$self->{'values'}}{map {Text::BibTeX->_process_argument($_, $self->{binmode}, $self->{normalization})} @fields}; 566 567 @x = map {defined($_) ? Text::BibTeX->_process_result($_, $self->{binmode}, $self->{normalization}): undef} @x; 568 569 return (@x > 1) ? @x : $x[0]; 570} 571 572sub value { 573 my $self = shift; 574 Text::BibTeX->_process_result($self->{value}, $self->{binmode}, $self->{normalization}); 575} 576 577 578=head2 Author name methods 579 580This is the only part of the module that makes any assumption about the 581nature of the data, namely that certain fields are lists delimited by a 582simple word such as "and", and that the delimited sub-strings are human 583names of the "First von Last" or "von Last, Jr., First" style used by 584BibTeX. If you are using this module for anything other than 585bibliographic data, you can most likely forget about these two methods. 586However, if you are in fact hacking on BibTeX-style bibliographic data, 587these could come in very handy -- the name-parsing done by BibTeX is not 588trivial, and the list-splitting would also be a pain to implement in 589Perl because you have to pay attention to brace-depth. (Not that it 590wasn't a pain to implement in C -- it's just a lot more efficient than a 591Perl implementation would be.) 592 593Incidentally, both of these methods assume that the strings being split 594have already been "collapsed" in the BibTeX way, i.e. all leading and 595trailing whitespace removed and internal whitespace reduced to single 596spaces. This should always be the case when using these two methods on 597a C<Text::BibTeX::Entry> object, but these are actually just front ends 598to more general functions in C<Text::BibTeX>. (More general in that you 599supply the string to be parsed, rather than supplying the name of an 600entry field.) Should you ever use those more general functions 601directly, you might have to worry about collapsing whitespace; see 602L<Text::BibTeX> (the C<split_list> and C<split_name> functions in 603particular) for more information. 604 605Please note that the interface to author name parsing is experimental, 606subject to change, and open to discussion. Please let me know if you 607have problems with it, think it's just perfect, or whatever. 608 609=over 4 610 611=item split (FIELD [, DELIM [, DESC]]) 612 613Splits the value of FIELD on DELIM (default: `and'). Don't assume that 614this works the same as Perl's builtin C<split> just because the names are 615the same: in particular, DELIM must be a simple string (no regexps), and 616delimiters that are at the beginning or end of the string, or at non-zero 617brace depth, or not surrounded by whitespace, are ignored. Some examples 618might illuminate matters: 619 620 if field F is... then split (F) returns... 621 'Name1 and Name2' ('Name1', 'Name2') 622 'Name1 and and Name2' ('Name1', undef, 'Name2') 623 'Name1 and' ('Name1 and') 624 'and Name2' ('and Name2') 625 'Name1 {and} Name2 and Name3' ('Name1 {and} Name2', 'Name3') 626 '{Name1 and Name2} and Name3' ('{Name1 and Name2}', 'Name3') 627 628Note that a warning will be issued for empty names (as in the second 629example above). A warning ought to be issued for delimiters at the 630beginning or end of a string, but currently this isn't done. (Hmmm.) 631 632DESC is a one-word description of the substrings; it defaults to 'name'. 633It is only used for generating warning messages. 634 635=item names (FIELD) 636 637Splits FIELD as described above, and further splits each name into four 638components: first, von, last, and jr. 639 640Returns a list of C<Text::BibTeX::Name> objects, each of which represents 641one name. Use the C<part> method to query these objects; see 642L<Text::BibTeX::Name> for details on the interface to name objects (and on 643name-parsing as well). 644 645For example if this entry: 646 647 @article{foo, 648 author = {John Smith and 649 Hacker, J. Random and 650 Ludwig van Beethoven and 651 {Foo, Bar and Company}}} 652 653has been parsed into a C<Text::BibTeX::Entry> object C<$entry>, then 654 655 @names = $entry->names ('author'); 656 657will put a list of C<Text::BibTeX::Name> objects in C<@names>. These can 658be queried individually as described in L<Text::BibTeX::Name>; for instance, 659 660 @last = $names[0]->part ('last'); 661 662would put the list of tokens comprising the last name of the first author 663into the C<@last> array: C<('Smith')>. 664 665=cut 666 667sub split 668{ 669 my ($self, $field, $delim, $desc) = @_; 670 671 return unless $self->exists($field); 672 $delim ||= 'and'; 673 $desc ||= 'name'; 674 675# local $^W = 0 # suppress spurious warning from 676# unless defined $filename; # undefined $filename 677 Text::BibTeX::split_list($self->{values}{$field}, 678 $delim, 679 ($self->{file} && $self->{file}{filename}), 680 $self->{lines}{$field}, 681 $desc, 682 {binmode => $self->{binmode}, 683 normalization => $self->{normalization}}); 684} 685 686sub names 687{ 688 require Text::BibTeX::Name; 689 690 my ($self, $field) = @_; 691 my (@names, $i); 692 693 my $filename = ($self->{'file'} && $self->{'file'}{'filename'}); 694 my $line = $self->{'lines'}{$field}; 695 696 @names = $self->split ($field); 697# local $^W = 0 # suppress spurious warning from 698# unless defined $filename; # undefined $filename 699 for $i (0 .. $#names) 700 { 701 $names[$i] = Text::BibTeX::Name->new( 702 {binmode => $self->{binmode}, normalization => $self->{normalization}},$names[$i], $filename, $line, $i); 703 } 704 @names; 705} 706 707=back 708 709=head2 Entry modification methods 710 711=over 4 712 713=item set_type (TYPE) 714 715Sets the entry's type. 716 717=item set_metatype (METATYPE) 718 719Sets the entry's metatype (must be one of the four constants 720C<BTE_COMMENT>, C<BTE_PREAMBLE>, C<BTE_MACRODEF>, and C<BTE_REGULAR>, which 721are all optionally exported from C<Text::BibTeX>). 722 723=item set_key (KEY) 724 725Sets the entry's key. 726 727=item set (FIELD, VALUE, ...) 728 729Sets the value of field FIELD. (VALUE might be C<undef> or unsupplied, 730in which case FIELD will simply be set to C<undef> -- this is where the 731difference between the C<exists> method and testing the definedness of 732field values becomes clear.) 733 734Multiple (FIELD, VALUE) pairs may be supplied; they will be processed in 735order (i.e. the input is treated like a list, not a hash). For example: 736 737 $entry->set ('author', $author); 738 $entry->set ('author', $author, 'editor', $editor); 739 740VALUE can be either a simple string or a C<Text::BibTeX::Value> object; 741it doesn't matter if the entry was parsed in "full post-processing" or 742"preserve input values" mode. 743 744=item delete (FIELD) 745 746Deletes field FIELD from an entry. 747 748=item set_fieldlist (FIELDLIST) 749 750Sets the entry's list of fields to FIELDLIST, which must be a list 751reference. If any of the field names supplied in FIELDLIST are not 752currently present in the entry, they are created with the value C<undef> 753and a warning is printed. Conversely, if any of the fields currently 754present in the entry are not named in the list of fields supplied to 755C<set_fields>, they are deleted from the entry and another warning is 756printed. 757 758=back 759 760=cut 761 762sub set_type 763{ 764 my ($self, $type) = @_; 765 766 $self->{'type'} = $type; 767} 768 769sub set_metatype 770{ 771 my ($self, $metatype) = @_; 772 773 $self->{'metatype'} = $metatype; 774} 775 776sub set_key 777{ 778 my ($self, $key) = @_; 779 780 $self->{'key'} = Text::BibTeX->_process_argument($key, $self->{binmode}, $self->{normalization}); 781} 782 783sub set 784{ 785 my $self = shift; 786 croak "set: must supply an even number of arguments" 787 unless (@_ % 2 == 0); 788 my ($field, $value); 789 790 while (@_) 791 { 792 ($field,$value) = (shift,Text::BibTeX->_process_argument(shift, $self->{binmode}, $self->{normalization})); 793 push (@{$self->{'fields'}}, $field) 794 unless exists $self->{'values'}{$field}; 795 $self->{'values'}{$field} = $value; 796 } 797} 798 799sub delete 800{ 801 my ($self, @fields) = @_; 802 my (%gone); 803 804 %gone = map {$_, 1} @fields; 805 @{$self->{'fields'}} = grep (! $gone{$_}, @{$self->{'fields'}}); 806 delete @{$self->{'values'}}{@fields}; 807} 808 809sub set_fieldlist 810{ 811 my ($self, $fields) = @_; 812 813 # Warn if any of the caller's fields aren't already present in the entry 814 815 my ($field, %in_list); 816 foreach $field (@$fields) 817 { 818 $in_list{$field} = 1; 819 unless (exists $self->{'values'}{$field}) 820 { 821 carp "Implicitly adding undefined field \"$field\""; 822 $self->{'values'}{$field} = undef; 823 } 824 } 825 826 # And see if there are any fields in the entry that aren't in the user's 827 # list; delete them from the entry if so 828 829 foreach $field (keys %{$self->{'values'}}) 830 { 831 unless ($in_list{$field}) 832 { 833 carp "Implicitly deleting field \"$field\""; 834 delete $self->{'values'}{$field}; 835 } 836 } 837 838 # Now we can install (a copy of) the caller's desired field list; 839 840 $self->{'fields'} = [@$fields]; 841} 842 843 844=head2 Entry output methods 845 846=over 4 847 848=item write (BIBFILE) 849 850Prints a BibTeX entry on the filehandle associated with BIBFILE (which 851should be a C<Text::BibTeX::File> object, opened for output). Currently 852the printout is not particularly human-friendly; a highly configurable 853pretty-printer will be developed eventually. 854 855=item print (FILEHANDLE) 856 857Prints a BibTeX entry on FILEHANDLE. 858 859=item print_s () 860 861Prints a BibTeX entry to a string, which is the return value. 862 863=cut 864 865sub write 866{ 867 my ($self, $bibfile) = @_; 868 869 my $fh = $bibfile->{'handle'}; 870 $self->print ($fh); 871} 872 873sub print 874{ 875 my ($self, $handle) = @_; 876 877 $handle ||= \*STDOUT; 878 print $handle $self->print_s; 879} 880 881sub print_s 882{ 883 my $self = shift; 884 my ($field, $output); 885 886 sub value_to_string 887 { 888 my $value = shift; 889 890 if (! ref $value) # just a string 891 { 892 return "{$value}"; 893 } 894 else # a Text::BibTeX::Value object 895 { 896 confess "value is a reference, but not to Text::BibTeX::Value object" 897 unless $value->isa ('Text::BibTeX::Value'); 898 my @values = $value->values; 899 foreach (@values) 900 { 901 $_ = $_->type == &BTAST_STRING ? '{' . $_->text . '}' : $_->text; 902 } 903 return join (' # ', @values); 904 } 905 } 906 907 carp "entry type undefined" unless defined $self->{'type'}; 908 carp "entry metatype undefined" unless defined $self->{'metatype'}; 909 910 # Regular and macro-def entries have to be treated differently when 911 # printing the first line, because the former have keys and the latter 912 # do not. 913 if ($self->{'metatype'} == &BTE_REGULAR) 914 { 915 carp "entry key undefined" unless defined $self->{'key'}; 916 $output = sprintf ("@%s{%s,\n", 917 $self->{'type'} || '', 918 $self->{'key'} || ''); 919 } 920 elsif ($self->{'metatype'} == &BTE_MACRODEF) 921 { 922 $output = sprintf ("@%s{\n", 923 $self->{'type'} || ''); 924 } 925 926 # Comment and preamble entries are treated the same -- we print out 927 # the entire entry, on one line, right here. 928 else # comment or preamble 929 { 930 return sprintf ("@%s{%s}\n\n", 931 $self->{'type'}, 932 value_to_string ($self->{'value'})); 933 } 934 935 # Here we print out all the fields/values of a regular or macro-def entry 936 my @fields = @{$self->{'fields'}}; 937 while ($field = shift @fields) 938 { 939 my $value = $self->{'values'}{$field}; 940 if (! defined $value) 941 { 942 carp "field \"$field\" has undefined value\n"; 943 $value = ''; 944 } 945 946 $output .= " $field = "; 947 $output .= value_to_string ($value); 948 949 $output .= ",\n"; 950 } 951 952 # Tack on the last line, and we're done! 953 $output .= "}\n\n"; 954 955 Text::BibTeX->_process_result($output, $self->{binmode}, $self->{normalization}); 956} 957 958=back 959 960=head2 Miscellaneous methods 961 962=over 4 963 964=item warn (WARNING [, FIELD]) 965 966Prepends a bit of location information (filename and line number(s)) to 967WARNING, appends a newline, and passes it to Perl's C<warn>. If FIELD is 968supplied, the line number given is just that of the field; otherwise, the 969range of lines for the whole entry is given. (Well, almost -- currently, 970the line number of the last field is used as the last line of the whole 971entry. This is a bug.) 972 973For example, if lines 10-15 of file F<foo.bib> look like this: 974 975 @article{homer97, 976 author = {Homer Simpson and Ned Flanders}, 977 title = {Territorial Imperatives in Modern Suburbia}, 978 journal = {Journal of Suburban Studies}, 979 year = 1997 980 } 981 982then, after parsing this entry to C<$entry>, the calls 983 984 $entry->warn ('what a silly entry'); 985 $entry->warn ('what a silly journal', 'journal'); 986 987would result in the following warnings being issued: 988 989 foo.bib, lines 10-14: what a silly entry 990 foo.bib, line 13: what a silly journal 991 992=cut 993 994sub warn 995{ 996 my ($self, $warning, $field) = @_; 997 998 my $location = ''; 999 if ($self->{'file'}) 1000 { 1001 $location = $self->{'file'}{'filename'} . ", "; 1002 } 1003 1004 my $lines = $self->{'lines'}; 1005 my $entry_range = ($lines->{'START'} == $lines->{'STOP'}) 1006 ? "line $lines->{'START'}" 1007 : "lines $lines->{'START'}-$lines->{'STOP'}"; 1008 1009 if (defined $field) 1010 { 1011 $location .= (exists $lines->{$field}) 1012 ? "line $lines->{$field}: " 1013 : "$entry_range (unknown field \"$field\"): "; 1014 } 1015 else 1016 { 1017 $location .= "$entry_range: "; 1018 } 1019 1020 warn "$location$warning\n"; 1021} 1022 1023 1024=item line ([FIELD]) 1025 1026Returns the line number of FIELD. If the entry was parsed from a string, 1027this still works--it's just the line number relative to the start of the 1028string. If the entry was parsed from a file, this works just as you'd 1029expect it to: it returns the absolute line number with respect to the 1030whole file. Line numbers are one-based. 1031 1032If FIELD is not supplied, returns a two-element list containing the line 1033numbers of the beginning and end of the whole entry. (Actually, the 1034"end" line number is currently inaccurate: it's really the the line 1035number of the last field in the entry. But it's better than nothing.) 1036 1037=cut 1038 1039sub line 1040{ 1041 my ($self, $field) = @_; 1042 1043 if (defined $field) 1044 { 1045 return $self->{'lines'}{$field}; 1046 } 1047 else 1048 { 1049 return @{$self->{'lines'}}{'START','STOP'}; 1050 } 1051} 1052 1053=item filename () 1054 1055Returns the name of the file from which the entry was parsed. Only 1056works if the file is represented by a C<Text::BibTeX::File> object---if 1057you just passed a filename/filehandle pair to C<parse>, you can't get 1058the filename back. (Sorry.) 1059 1060=cut 1061 1062sub filename 1063{ 1064 my $self = shift; 1065 1066 $self->{'file'}{'filename'}; # ooh yuck -- poking into File object 1067} 1068 10691; 1070 1071=back 1072 1073=head1 SEE ALSO 1074 1075L<Text::BibTeX>, L<Text::BibTeX::File>, L<Text::BibTeX::Structure> 1076 1077=head1 AUTHOR 1078 1079Greg Ward <gward@python.net> 1080 1081=head1 COPYRIGHT 1082 1083Copyright (c) 1997-2000 by Gregory P. Ward. All rights reserved. This file 1084is part of the Text::BibTeX library. This library is free software; you 1085may redistribute it and/or modify it under the same terms as Perl itself. 1086 1087=cut 1088