1# ----------------------------------------------------------------------
2# NAME       : BibTeX/Entry.pm
3# CLASSES    : Text::BibTeX::Entry
4# RELATIONS  : base class for Text::BibTeX::StructuredEntry, and
5#              ultimately for all user-supplied structured entry classes
6# DESCRIPTION: Provides an object-oriented interface to BibTeX entries.
7# CREATED    : March 1997, Greg Ward
8# MODIFIED   :
9# VERSION    : $Id$
10# COPYRIGHT  : Copyright (c) 1997-2000 by Gregory P. Ward.  All rights
11#              reserved.
12#
13#              This file is part of the Text::BibTeX library.  This
14#              library is free software; you may redistribute it and/or
15#              modify it under the same terms as Perl itself.
16# ----------------------------------------------------------------------
17package Text::BibTeX::Entry;
18
19require 5.004;                          # for isa, and delete on a slice
20
21use strict;
22use vars qw'$VERSION';
23use Carp;
24use Text::BibTeX qw(:metatypes :nodetypes);
25
26$VERSION = 0.88;
27
28=head1 NAME
29
30Text::BibTeX::Entry - read and parse BibTeX files
31
32=head1 SYNOPSIS
33
34   use Text::BibTeX::Entry;
35
36   # ...assuming that $bibfile and $newbib are both objects of class
37   # Text::BibTeX::File, opened for reading and writing (respectively):
38
39   # Entry creation/parsing methods:
40   $entry = Text::BibTeX::Entry->new();
41   $entry->read ($bibfile);
42   $entry->parse ($filename, $filehandle);
43   $entry->parse_s ($entry_text);
44
45   # or:
46   $entry = Text::BibTeX::Entry->new( $bibfile );
47   $entry = Text::BibTeX::Entry->new( $filename, $filehandle );
48   $entry = Text::BibTeX::Entry->new( $entry_text );
49
50   # Entry query methods
51   warn "error in input" unless $entry->parse_ok;
52   $metatype = $entry->metatype;
53   $type = $entry->type;
54
55   # if metatype is BTE_REGULAR or BTE_MACRODEF:
56   $key = $entry->key;                  # only for BTE_REGULAR metatype
57   $num_fields = $entry->num_fields;
58   @fieldlist = $entry->fieldlist;
59   $has_title = $entry->exists ('title');
60   $title = $entry->get ('title');
61   # or:
62   ($val1,$val2,...$valn) = $entry->get ($field1, $field2, ..., $fieldn);
63
64   # if metatype is BTE_COMMENT or BTE_PREAMBLE:
65   $value = $entry->value;
66
67   # Author name methods
68   @authors = $entry->split ('author');
69   ($first_author) = $entry->names ('author');
70
71   # Entry modification methods
72   $entry->set_type ($new_type);
73   $entry->set_key ($new_key);
74   $entry->set ('title', $new_title);
75   # or:
76   $entry->set ($field1, $val1, $field2, $val2, ..., $fieldn, $valn);
77   $entry->delete (@fields);
78   $entry->set_fieldlist (\@fieldlist);
79
80   # Entry output methods
81   $entry->write ($newbib);
82   $entry->print ($filehandle);
83   $entry_text = $entry->print_s;
84
85   # Reset internal parser state:
86   $entry = Text::BibTeX::Entry->new();
87   $entry->parse ($filename, undef);
88   $entry->parse_s (undef);
89
90   # or:
91   $entry = Text::BibTeX::Entry->new( $filename, undef );
92   $entry = Text::BibTeX::Entry->new( undef );
93
94   # Miscellaneous methods
95   $entry->warn ($entry_warning);
96   # or:
97   $entry->warn ($field_warning, $field);
98   $entry->clone;
99
100=head1 DESCRIPTION
101
102C<Text::BibTeX::Entry> does all the real work of reading and parsing
103BibTeX files.  (Well, actually it just provides an object-oriented Perl
104front-end to a C library that does all that.  But that's not important
105right now.)
106
107BibTeX entries can be read either from C<Text::BibTeX::File> objects (using
108the C<read> method), or directly from a filehandle (using the C<parse>
109method), or from a string (using C<parse_s>).  The first is preferable,
110since you don't have to worry about supplying the filename, and because of
111the extra functionality provided by the C<Text::BibTeX::File> class.
112Currently, this means that you may specify the I<database structure> to
113which entries are expected to conform via the C<File> class.  This lets you
114ensure that entries follow the rules for required fields and mutually
115constrained fields for a particular type of database, and also gives you
116access to all the methods of the I<structured entry class> for this
117database structure.  See L<Text::BibTeX::Structure> for details on database
118structures.
119
120Once you have the entry, you can query it or change it in a variety of
121ways.  The query methods are C<parse_ok>, C<type>, C<key>, C<num_fields>,
122C<fieldlist>, C<exists>, and C<get>.  Methods for changing the entry are
123C<set_type>, C<set_key>, C<set_fieldlist>, C<delete>, and C<set>.
124
125Finally, you can output BibTeX entries, again either to an open
126C<Text::BibTeX::File> object, a filehandle or a string.  (A filehandle or
127C<File> object must, of course, have been opened in write mode.)  Output to
128a C<File> object is done with the C<write> method, to a filehandle via
129C<print>, and to a string with C<print_s>.  Using the C<File> class is
130recommended for future extensibility, although it currently doesn't offer
131anything extra.
132
133=head1 METHODS
134
135=head2 Entry creation/parsing methods
136
137=over 4
138
139=item new ([OPTS ,] [SOURCE])
140
141Creates a new C<Text::BibTeX::Entry> object.  If the SOURCE parameter is
142supplied, it must be one of the following: a C<Text::BibTeX::File> (or
143descendant class) object, a filename/filehandle pair, or a string.  Calls
144C<read> to read from a C<Text::BibTeX::File> object, C<parse> to read from
145a filehandle, and C<parse_s> to read from a string.
146
147A filehandle can be specified as a GLOB reference, or as an
148C<IO::Handle> (or descendants) object, or as a C<FileHandle> (or
149descendants) object.  (But there's really no point in using
150C<FileHandle> objects, since C<Text::BibTeX> requires Perl 5.004, which
151always includes the C<IO> modules.)  You can I<not> pass in the name of
152a filehandle as a string, though, because C<Text::BibTeX::Entry>
153conforms to the C<use strict> pragma (which disallows such symbolic
154references).
155
156The corresponding filename should be supplied in order to allow for
157accurate error messages; if you simply don't have the filename, you can
158pass C<undef> and you'll get error messages without a filename.  (It's
159probably better to rearrange your code so that the filename is
160available, though.)
161
162Thus, the following are equivalent to read from a file named by
163C<$filename> (error handling ignored):
164
165   # good ol' fashioned filehandle and GLOB ref
166   open (BIBFILE, $filename);
167   $entry = Text::BibTeX::Entry->new($filename, \*BIBFILE);
168
169   # newfangled IO::File thingy
170   $file = IO::File->new($filename);
171   $entry = Text::BibTeX::Entry->new($filename, $file);
172
173But using a C<Text::BibTeX::File> object is simpler and preferred:
174
175   $file  = Text::BibTeX::File->new($filename);
176   $entry = Text::BibTeX::Entry->new($file);
177
178Returns the new object, unless SOURCE is supplied and reading/parsing
179the entry fails (e.g., due to end of file) -- then it returns false.
180
181You may supply a reference to an option hash as first argument.
182Supported options are:
183
184=over 4
185
186=item BINMODE
187
188Set the way Text::BibTeX deals with strings. By default it manages
189strings as bytes. You can set BINMODE to 'utf-8' to get NFC normalized
190
191Text::BibTeX::Entry->new(
192      { binmode => 'utf-8', normalization => 'NFD' },
193      $file });
194
195
196=item NORMALIZATION
197
198UTF-8 strings and you can customise the normalization with the NORMALIZATION option.
199
200=back
201
202
203=cut
204
205sub new
206{
207   my ($class, @source) = @_;
208
209   $class = ref ($class) || $class;
210
211   my $self = {'file'     => undef,
212               'type'     => undef,
213               'key'      => undef,
214               'status'   => undef,
215               'metatype' => undef,
216               'fields'   => [],
217               'values'   => {}};
218   bless $self, $class;
219
220   my $opts = {};
221   $opts = shift @source if scalar(@source) and ref $source[0] eq "HASH";
222   $opts->{ lc $_ } = $opts->{$_} for ( keys %$opts );
223   $self->{binmode} = 'utf-8'
224          if exists $opts->{binmode} && $opts->{binmode} =~ /utf-?8/i;
225   $self->{normalization} = $opts->{normalization} if exists $opts->{normalization};
226
227   if (@source)
228   {
229      my $status;
230
231      if (@source == 1 && ref($source[0]) && $source[0]->isa ('Text::BibTeX::File'))
232      {
233         my $file = $source[0];
234         $status = $self->read ($file);
235         if (my $structure = $file->structure)
236         {
237            $self->{structure} = $structure;
238            bless $self, $structure->entry_class;
239         }
240      }
241      elsif (@source == 2 && (defined ($source[0]) && ! ref ($source[0])) && (!defined ($source[1]) || fileno ($source[1]) >= 0))
242          { $status = $self->parse ($source[0], $source[1]) }
243      elsif (@source == 1 && ! ref ($source[0]))
244          { $status = $self->parse_s ($source[0]) }
245      else
246          { croak "new: source argument must be either a Text::BibTeX::File " .
247                  "(or descendant) object, filename/filehandle pair, or " .
248                  "a string"; }
249
250      return $status unless $status;    # parse failed -- tell our caller
251   }
252   $self;
253}
254
255=item clone
256
257Clone a Text::BibTeX::Entry object, returning the clone. This re-uses the reference to any
258Text::BibTeX::Structure or Text::BibTeX::File but copies everything else,
259so that the clone can be modified apart from the original.
260
261=cut
262
263sub clone
264{
265  my $self = shift;
266  my $clone = {};
267  # Use the same structure object - won't be changed
268  if ($self->{structure}) {
269    $clone->{structure} = $self->{structure};
270  }
271  # Use the same file object - won't be changed
272  if ($self->{file}) {
273    $clone->{file} = $self->{file}
274  }
275  # These might be changed so make copies
276  $clone->{binmode} = $self->{binmode};
277  $clone->{normalization} = $self->{normalization};
278  $clone->{type}     = $self->{type};
279  $clone->{key}      = $self->{key};
280  $clone->{status}   = $self->{status};
281  $clone->{metatype} = $self->{metatype};
282  $clone->{fields}   = [ map {$_} @{$self->{fields}} ];
283  while (my ($k, $v) = each %{$self->{values}}) {
284    $clone->{values}{$k} = $v;
285  }
286  while (my ($k, $v) = each %{$self->{lines}}) {
287    $clone->{lines}{$k} = $v;
288  }
289  bless $clone, ref($self);
290  return $clone;
291}
292
293=item read (BIBFILE)
294
295Reads and parses an entry from BIBFILE, which must be a
296C<Text::BibTeX::File> object (or descendant).  The next entry will be read
297from the file associated with that object.
298
299Returns the same as C<parse> (or C<parse_s>): false if no entry found
300(e.g., at end-of-file), true otherwise.  To see if the parse itself failed
301(due to errors in the input), call the C<parse_ok> method.
302
303=cut
304
305sub read
306{
307   my ($self, $source, $preserve) = @_;
308   croak "`source' argument must be ref to open Text::BibTeX::File " .
309         "(or descendant) object"
310      unless ($source->isa('Text::BibTeX::File'));
311
312   my $fn = $source->{'filename'};
313   my $fh = $source->{'handle'};
314   $self->{'file'} = $source;        # store File object for later use
315   ## Propagate flags
316   for my $f (qw.binmode normalization.) {
317      $self->{$f} = $source->{$f} unless exists $self->{$f};
318   }
319   return $self->parse ($fn, $fh, $preserve);
320}
321
322
323=item parse (FILENAME, FILEHANDLE)
324
325Reads and parses the next entry from FILEHANDLE.  (That is, it scans the
326input until an '@' sign is seen, and then slurps up to the next '@'
327sign.  Everything between the two '@' signs [including the first one,
328but not the second one -- it's pushed back onto the input stream for the
329next entry] is parsed as a BibTeX entry, with the simultaneous
330construction of an abstract syntax tree [AST].  The AST is traversed to
331ferret out the most interesting information, and this is stuffed into a
332Perl hash, which coincidentally is the C<Text::BibTeX::Entry> object
333you've been tossing around.  But you don't need to know any of that -- I
334just figured if you've read this far, you might want to know something
335about the inner workings of this module.)
336
337The success of the parse is stored internally so that you can later
338query it with the C<parse_ok> method.  Even in the presence of syntax
339errors, you'll usually get something resembling your input, but it's
340usually not wise to try to do anything with it.  Just call C<parse_ok>,
341and if it returns false then silently skip to the next entry.  (The
342error messages printed out by the parser should be quite adequate for
343the user to figure out what's wrong.  And no, there's currently no way
344for you to capture or redirect those error messages -- they're always
345printed to C<stderr> by the underlying C code.  That should change in
346future releases.)
347
348If no '@' signs are seen on the input before reaching end-of-file, then
349we've exhausted all the entries in the file, and C<parse> returns a
350false value.  Otherwise, it returns a true value -- even if there were
351syntax errors.  Hence, it's important to check C<parse_ok>.
352
353The FILENAME parameter is only used for generating error messages, but
354anybody using your program will certainly appreciate your setting it
355correctly!
356
357Passing C<undef> to FILEHANDLE will reset the state of the underlying
358C parser, which is required in order to parse multiple files.
359
360=item parse_s (TEXT)
361
362Parses a BibTeX entry (using the above rules) from the string TEXT.  The
363string is not modified; repeatedly calling C<parse_s> with the same string
364will give you the same results each time.  Thus, there's no point in
365putting multiple entries in one string.
366
367Passing C<undef> to TEXT will reset the state of the underlying
368C parser, which may be required in order to parse multiple strings.
369
370=back
371
372=cut
373
374sub _preserve
375{
376   my ($self, $preserve) = @_;
377
378   $preserve = $self->{'file'}->preserve_values
379      if ! defined $preserve &&
380         defined $self->{'file'} &&
381           $self->{'file'}->isa ('Text::BibTeX::File');
382   require Text::BibTeX::Value if $preserve;
383   $preserve;
384}
385
386sub parse
387{
388   my ($self, $filename, $filehandle, $preserve) = @_;
389
390   $preserve = $self->_preserve ($preserve);
391   if (defined $filehandle) {
392      _parse ($self, $filename, $filehandle, $preserve);
393   } else {
394      _reset_parse ();
395   }
396}
397
398
399sub parse_s
400{
401   my ($self, $text, $preserve) = @_;
402
403   $preserve = $self->_preserve ($preserve);
404   if (defined $text) {
405      _parse_s ($self, $text, $preserve);
406   } else {
407      _reset_parse_s ();
408   }
409}
410
411
412=head2 Entry query methods
413
414=over 4
415
416=item parse_ok ()
417
418Returns false if there were any serious errors encountered while parsing
419the entry.  (A "serious" error is a lexical or syntax error; currently,
420warnings such as "undefined macro" result in an error message being
421printed to C<stderr> for the user's edification, but no notice is
422available to the calling code.)
423
424=item type ()
425
426Returns the type of the entry.  (The `type' is the word that follows the
427'@' sign; e.g. `article', `book', `inproceedings', etc. for the standard
428BibTeX styles.)
429
430=item metatype ()
431
432Returns the metatype of the entry.  (The `metatype' is a numeric value used
433to classify entry types into four groups: comment, preamble, macro
434definition (C<@string> entries), and regular (all other entry types).
435C<Text::BibTeX> exports four constants for these metatypes: C<BTE_COMMENT>,
436C<BTE_PREAMBLE>, C<BTE_MACRODEF>, and C<BTE_REGULAR>.)
437
438=item key ()
439
440Returns the key of the entry.  (The key is the token immediately
441following the opening `{' or `(' in "regular" entries.  Returns C<undef>
442for entries that don't have a key, such as macro definition (C<@string>)
443entries.)
444
445=item num_fields ()
446
447Returns the number of fields in the entry.  (Note that, currently, this is
448I<not> equivalent to putting C<scalar> in front of a call to C<fieldlist>.
449See below for the consequences of calling C<fieldlist> in a scalar
450context.)
451
452=item fieldlist ()
453
454Returns the list of fields in the entry.
455
456B<WARNING> In scalar context, it no longer returns a
457reference to the object's own list of fields.
458
459=cut
460
461sub parse_ok   { shift->{'status'}; }
462
463sub metatype   {
464    my $self = shift;
465    Text::BibTeX->_process_result( $self->{'metatype'}, $self->{binmode}, $self->{normalization} );
466}
467
468sub type {
469    my $self = shift;
470    Text::BibTeX->_process_result( $self->{'type'}, $self->{binmode}, $self->{normalization} );
471}
472
473sub key        {
474  my $self = shift;
475  exists $self->{key}
476    ? Text::BibTeX->_process_result($self->{key}, $self->{binmode}, $self->{normalization})
477    : undef;
478}
479
480sub num_fields { scalar @{shift->{'fields'}}; }
481
482sub fieldlist  {
483  my $self = shift;
484  return map { Text::BibTeX->_process_result($_, $self->{binmode}, $self->{normalization})} @{$self->{'fields'}};
485}
486
487=item exists (FIELD)
488
489Returns true if a field named FIELD is present in the entry, false
490otherwise.
491
492=item get (FIELD, ...)
493
494Returns the value of one or more FIELDs, as a list of values.  For example:
495
496   $author = $entry->get ('author');
497   ($author, $editor) = $entry->get ('author', 'editor');
498
499If a FIELD is not present in the entry, C<undef> will be returned at its
500place in the return list.  However, you can't completely trust this as a
501test for presence or absence of a field; it is possible for a field to be
502present but undefined.  Currently this can only happen due to certain
503syntax errors in the input, or if you pass an undefined value to C<set>, or
504if you create a new field with C<set_fieldlist> (the new field's value is
505implicitly set to C<undef>).
506
507Normally, the field value is what the input looks like after "maximal
508processing"--quote characters are removed, whitespace is collapsed (the
509same way that BibTeX itself does it), macros are expanded, and multiple
510tokens are pasted together.  (See L<bt_postprocess> for details on the
511post-processing performed by B<btparse>.)
512
513For example, if your input file has the following:
514
515   @string{of = "of"}
516   @string{foobars = "Foobars"}
517
518   @article{foobar,
519     title = {   The Mating Habits      } # of # " Adult   " # foobars
520   }
521
522then using C<get> to query the value of the C<title> field from the
523C<foobar> entry would give the string "The Mating Habits of Adult Foobars".
524
525However, in certain circumstances you may wish to preserve the values as
526they appear in the input.  This is done by setting a C<preserve_values>
527flag at some point; then, C<get> will return not strings but
528C<Text::BibTeX::Value> objects.  Each C<Value> object is a list of
529C<Text::BibTeX::SimpleValue> objects, which in turn consists of a simple
530value type (string, macro, or number) and the text of the simple value.
531Various ways to set the C<preserve_values> flag and the interface to
532both C<Value> and C<SimpleValue> objects are described in
533L<Text::BibTeX::Value>.
534
535=item value ()
536
537Returns the single string associated with C<@comment> and C<@preamble>
538entries.  For instance, the entry
539
540   @preamble{" This is   a preamble" #
541             {---the concatenation of several strings}}
542
543would return a value of "This is a preamble---the concatenation of
544several strings".
545
546If this entry was parsed in "value preservation" mode, then C<value>
547acts like C<get>, and returns a C<Value> object rather than a simple
548string.
549
550=back
551
552=cut
553
554sub exists
555{
556   my ($self, $field) = @_;
557
558   exists $self->{values}{Text::BibTeX->_process_argument($field, $self->{binmode}, $self->{normalization})};
559}
560
561sub get
562{
563   my ($self, @fields) = @_;
564
565   my @x = @{$self->{'values'}}{map {Text::BibTeX->_process_argument($_, $self->{binmode}, $self->{normalization})} @fields};
566
567   @x = map {defined($_) ? Text::BibTeX->_process_result($_, $self->{binmode}, $self->{normalization}): undef} @x;
568
569   return (@x > 1) ? @x : $x[0];
570}
571
572sub value {
573  my $self = shift;
574  Text::BibTeX->_process_result($self->{value}, $self->{binmode}, $self->{normalization});
575}
576
577
578=head2 Author name methods
579
580This is the only part of the module that makes any assumption about the
581nature of the data, namely that certain fields are lists delimited by a
582simple word such as "and", and that the delimited sub-strings are human
583names of the "First von Last" or "von Last, Jr., First" style used by
584BibTeX.  If you are using this module for anything other than
585bibliographic data, you can most likely forget about these two methods.
586However, if you are in fact hacking on BibTeX-style bibliographic data,
587these could come in very handy -- the name-parsing done by BibTeX is not
588trivial, and the list-splitting would also be a pain to implement in
589Perl because you have to pay attention to brace-depth.  (Not that it
590wasn't a pain to implement in C -- it's just a lot more efficient than a
591Perl implementation would be.)
592
593Incidentally, both of these methods assume that the strings being split
594have already been "collapsed" in the BibTeX way, i.e. all leading and
595trailing whitespace removed and internal whitespace reduced to single
596spaces.  This should always be the case when using these two methods on
597a C<Text::BibTeX::Entry> object, but these are actually just front ends
598to more general functions in C<Text::BibTeX>.  (More general in that you
599supply the string to be parsed, rather than supplying the name of an
600entry field.)  Should you ever use those more general functions
601directly, you might have to worry about collapsing whitespace; see
602L<Text::BibTeX> (the C<split_list> and C<split_name> functions in
603particular) for more information.
604
605Please note that the interface to author name parsing is experimental,
606subject to change, and open to discussion.  Please let me know if you
607have problems with it, think it's just perfect, or whatever.
608
609=over 4
610
611=item split (FIELD [, DELIM [, DESC]])
612
613Splits the value of FIELD on DELIM (default: `and').  Don't assume that
614this works the same as Perl's builtin C<split> just because the names are
615the same: in particular, DELIM must be a simple string (no regexps), and
616delimiters that are at the beginning or end of the string, or at non-zero
617brace depth, or not surrounded by whitespace, are ignored.  Some examples
618might illuminate matters:
619
620   if field F is...                then split (F) returns...
621   'Name1 and Name2'               ('Name1', 'Name2')
622   'Name1 and and Name2'           ('Name1', undef, 'Name2')
623   'Name1 and'                     ('Name1 and')
624   'and Name2'                     ('and Name2')
625   'Name1 {and} Name2 and Name3'   ('Name1 {and} Name2', 'Name3')
626   '{Name1 and Name2} and Name3'   ('{Name1 and Name2}', 'Name3')
627
628Note that a warning will be issued for empty names (as in the second
629example above).  A warning ought to be issued for delimiters at the
630beginning or end of a string, but currently this isn't done.  (Hmmm.)
631
632DESC is a one-word description of the substrings; it defaults to 'name'.
633It is only used for generating warning messages.
634
635=item names (FIELD)
636
637Splits FIELD as described above, and further splits each name into four
638components: first, von, last, and jr.
639
640Returns a list of C<Text::BibTeX::Name> objects, each of which represents
641one name.  Use the C<part> method to query these objects; see
642L<Text::BibTeX::Name> for details on the interface to name objects (and on
643name-parsing as well).
644
645For example if this entry:
646
647   @article{foo,
648            author = {John Smith and
649                      Hacker, J. Random and
650                      Ludwig van Beethoven and
651                      {Foo, Bar and Company}}}
652
653has been parsed into a C<Text::BibTeX::Entry> object C<$entry>, then
654
655   @names = $entry->names ('author');
656
657will put a list of C<Text::BibTeX::Name> objects in C<@names>.  These can
658be queried individually as described in L<Text::BibTeX::Name>; for instance,
659
660   @last = $names[0]->part ('last');
661
662would put the list of tokens comprising the last name of the first author
663into the C<@last> array: C<('Smith')>.
664
665=cut
666
667sub split
668{
669   my ($self, $field, $delim, $desc) = @_;
670
671   return unless $self->exists($field);
672   $delim ||= 'and';
673   $desc ||= 'name';
674
675#   local $^W = 0                        # suppress spurious warning from
676#      unless defined $filename;         # undefined $filename
677   Text::BibTeX::split_list($self->{values}{$field},
678                            $delim,
679                            ($self->{file} && $self->{file}{filename}),
680                            $self->{lines}{$field},
681                            $desc,
682                            {binmode       => $self->{binmode},
683                             normalization => $self->{normalization}});
684}
685
686sub names
687{
688   require Text::BibTeX::Name;
689
690   my ($self, $field) = @_;
691   my (@names, $i);
692
693   my $filename = ($self->{'file'} && $self->{'file'}{'filename'});
694   my $line = $self->{'lines'}{$field};
695
696   @names = $self->split ($field);
697#   local $^W = 0                        # suppress spurious warning from
698#      unless defined $filename;         # undefined $filename
699   for $i (0 .. $#names)
700   {
701      $names[$i] = Text::BibTeX::Name->new(
702        {binmode => $self->{binmode}, normalization => $self->{normalization}},$names[$i], $filename, $line, $i);
703   }
704   @names;
705}
706
707=back
708
709=head2 Entry modification methods
710
711=over 4
712
713=item set_type (TYPE)
714
715Sets the entry's type.
716
717=item set_metatype (METATYPE)
718
719Sets the entry's metatype (must be one of the four constants
720C<BTE_COMMENT>, C<BTE_PREAMBLE>, C<BTE_MACRODEF>, and C<BTE_REGULAR>, which
721are all optionally exported from C<Text::BibTeX>).
722
723=item set_key (KEY)
724
725Sets the entry's key.
726
727=item set (FIELD, VALUE, ...)
728
729Sets the value of field FIELD.  (VALUE might be C<undef> or unsupplied,
730in which case FIELD will simply be set to C<undef> -- this is where the
731difference between the C<exists> method and testing the definedness of
732field values becomes clear.)
733
734Multiple (FIELD, VALUE) pairs may be supplied; they will be processed in
735order (i.e. the input is treated like a list, not a hash).  For example:
736
737   $entry->set ('author', $author);
738   $entry->set ('author', $author, 'editor', $editor);
739
740VALUE can be either a simple string or a C<Text::BibTeX::Value> object;
741it doesn't matter if the entry was parsed in "full post-processing" or
742"preserve input values" mode.
743
744=item delete (FIELD)
745
746Deletes field FIELD from an entry.
747
748=item set_fieldlist (FIELDLIST)
749
750Sets the entry's list of fields to FIELDLIST, which must be a list
751reference.  If any of the field names supplied in FIELDLIST are not
752currently present in the entry, they are created with the value C<undef>
753and a warning is printed.  Conversely, if any of the fields currently
754present in the entry are not named in the list of fields supplied to
755C<set_fields>, they are deleted from the entry and another warning is
756printed.
757
758=back
759
760=cut
761
762sub set_type
763{
764   my ($self, $type) = @_;
765
766   $self->{'type'} = $type;
767}
768
769sub set_metatype
770{
771   my ($self, $metatype) = @_;
772
773   $self->{'metatype'} = $metatype;
774}
775
776sub set_key
777{
778   my ($self, $key) = @_;
779
780   $self->{'key'} = Text::BibTeX->_process_argument($key, $self->{binmode}, $self->{normalization});
781}
782
783sub set
784{
785   my $self = shift;
786   croak "set: must supply an even number of arguments"
787      unless (@_ % 2 == 0);
788   my ($field, $value);
789
790   while (@_)
791   {
792      ($field,$value) = (shift,Text::BibTeX->_process_argument(shift, $self->{binmode}, $self->{normalization}));
793      push (@{$self->{'fields'}}, $field)
794         unless exists $self->{'values'}{$field};
795      $self->{'values'}{$field} = $value;
796   }
797}
798
799sub delete
800{
801   my ($self, @fields) = @_;
802   my (%gone);
803
804   %gone = map {$_, 1} @fields;
805   @{$self->{'fields'}} = grep (! $gone{$_}, @{$self->{'fields'}});
806   delete @{$self->{'values'}}{@fields};
807}
808
809sub set_fieldlist
810{
811   my ($self, $fields) = @_;
812
813   # Warn if any of the caller's fields aren't already present in the entry
814
815   my ($field, %in_list);
816   foreach $field (@$fields)
817   {
818      $in_list{$field} = 1;
819      unless (exists $self->{'values'}{$field})
820      {
821         carp "Implicitly adding undefined field \"$field\"";
822         $self->{'values'}{$field} = undef;
823      }
824   }
825
826   # And see if there are any fields in the entry that aren't in the user's
827   # list; delete them from the entry if so
828
829   foreach $field (keys %{$self->{'values'}})
830   {
831      unless ($in_list{$field})
832      {
833         carp "Implicitly deleting field \"$field\"";
834         delete $self->{'values'}{$field};
835      }
836   }
837
838   # Now we can install (a copy of) the caller's desired field list;
839
840   $self->{'fields'} = [@$fields];
841}
842
843
844=head2 Entry output methods
845
846=over 4
847
848=item write (BIBFILE)
849
850Prints a BibTeX entry on the filehandle associated with BIBFILE (which
851should be a C<Text::BibTeX::File> object, opened for output).  Currently
852the printout is not particularly human-friendly; a highly configurable
853pretty-printer will be developed eventually.
854
855=item print (FILEHANDLE)
856
857Prints a BibTeX entry on FILEHANDLE.
858
859=item print_s ()
860
861Prints a BibTeX entry to a string, which is the return value.
862
863=cut
864
865sub write
866{
867   my ($self, $bibfile) = @_;
868
869   my $fh = $bibfile->{'handle'};
870   $self->print ($fh);
871}
872
873sub print
874{
875   my ($self, $handle) = @_;
876
877   $handle ||= \*STDOUT;
878   print $handle $self->print_s;
879}
880
881sub print_s
882{
883   my $self = shift;
884   my ($field, $output);
885
886   sub value_to_string
887   {
888      my $value = shift;
889
890      if (! ref $value)                 # just a string
891      {
892         return "{$value}";
893      }
894      else                              # a Text::BibTeX::Value object
895      {
896         confess "value is a reference, but not to Text::BibTeX::Value object"
897            unless $value->isa ('Text::BibTeX::Value');
898         my @values = $value->values;
899         foreach (@values)
900         {
901            $_ = $_->type == &BTAST_STRING ? '{' . $_->text . '}' : $_->text;
902         }
903         return join (' # ', @values);
904     }
905   }
906
907   carp "entry type undefined" unless defined $self->{'type'};
908   carp "entry metatype undefined" unless defined $self->{'metatype'};
909
910   # Regular and macro-def entries have to be treated differently when
911   # printing the first line, because the former have keys and the latter
912   # do not.
913   if ($self->{'metatype'} == &BTE_REGULAR)
914   {
915      carp "entry key undefined" unless defined $self->{'key'};
916      $output = sprintf ("@%s{%s,\n",
917                         $self->{'type'} || '',
918                         $self->{'key'}  || '');
919   }
920   elsif ($self->{'metatype'} == &BTE_MACRODEF)
921   {
922      $output = sprintf ("@%s{\n",
923                         $self->{'type'} || '');
924   }
925
926   # Comment and preamble entries are treated the same -- we print out
927   # the entire entry, on one line, right here.
928   else                                 # comment or preamble
929   {
930      return sprintf ("@%s{%s}\n\n",
931                      $self->{'type'},
932                      value_to_string ($self->{'value'}));
933   }
934
935   # Here we print out all the fields/values of a regular or macro-def entry
936   my @fields = @{$self->{'fields'}};
937   while ($field = shift @fields)
938   {
939      my $value = $self->{'values'}{$field};
940      if (! defined $value)
941      {
942         carp "field \"$field\" has undefined value\n";
943         $value = '';
944      }
945
946      $output .= "  $field = ";
947      $output .= value_to_string ($value);
948
949      $output .= ",\n";
950   }
951
952   # Tack on the last line, and we're done!
953   $output .= "}\n\n";
954
955   Text::BibTeX->_process_result($output, $self->{binmode}, $self->{normalization});
956}
957
958=back
959
960=head2 Miscellaneous methods
961
962=over 4
963
964=item warn (WARNING [, FIELD])
965
966Prepends a bit of location information (filename and line number(s)) to
967WARNING, appends a newline, and passes it to Perl's C<warn>.  If FIELD is
968supplied, the line number given is just that of the field; otherwise, the
969range of lines for the whole entry is given.  (Well, almost -- currently,
970the line number of the last field is used as the last line of the whole
971entry.  This is a bug.)
972
973For example, if lines 10-15 of file F<foo.bib> look like this:
974
975   @article{homer97,
976     author = {Homer Simpson and Ned Flanders},
977     title = {Territorial Imperatives in Modern Suburbia},
978     journal = {Journal of Suburban Studies},
979     year = 1997
980   }
981
982then, after parsing this entry to C<$entry>, the calls
983
984   $entry->warn ('what a silly entry');
985   $entry->warn ('what a silly journal', 'journal');
986
987would result in the following warnings being issued:
988
989   foo.bib, lines 10-14: what a silly entry
990   foo.bib, line 13: what a silly journal
991
992=cut
993
994sub warn
995{
996   my ($self, $warning, $field) = @_;
997
998   my $location = '';
999   if ($self->{'file'})
1000   {
1001      $location = $self->{'file'}{'filename'} . ", ";
1002   }
1003
1004   my $lines = $self->{'lines'};
1005   my $entry_range = ($lines->{'START'} == $lines->{'STOP'})
1006      ? "line $lines->{'START'}"
1007      : "lines $lines->{'START'}-$lines->{'STOP'}";
1008
1009   if (defined $field)
1010   {
1011      $location .= (exists $lines->{$field})
1012         ? "line $lines->{$field}: "
1013         : "$entry_range (unknown field \"$field\"): ";
1014   }
1015   else
1016   {
1017      $location .= "$entry_range: ";
1018   }
1019
1020   warn "$location$warning\n";
1021}
1022
1023
1024=item line ([FIELD])
1025
1026Returns the line number of FIELD.  If the entry was parsed from a string,
1027this still works--it's just the line number relative to the start of the
1028string.  If the entry was parsed from a file, this works just as you'd
1029expect it to: it returns the absolute line number with respect to the
1030whole file.  Line numbers are one-based.
1031
1032If FIELD is not supplied, returns a two-element list containing the line
1033numbers of the beginning and end of the whole entry.  (Actually, the
1034"end" line number is currently inaccurate: it's really the the line
1035number of the last field in the entry.  But it's better than nothing.)
1036
1037=cut
1038
1039sub line
1040{
1041   my ($self, $field) = @_;
1042
1043   if (defined $field)
1044   {
1045      return $self->{'lines'}{$field};
1046   }
1047   else
1048   {
1049      return @{$self->{'lines'}}{'START','STOP'};
1050   }
1051}
1052
1053=item filename ()
1054
1055Returns the name of the file from which the entry was parsed.  Only
1056works if the file is represented by a C<Text::BibTeX::File> object---if
1057you just passed a filename/filehandle pair to C<parse>, you can't get
1058the filename back.  (Sorry.)
1059
1060=cut
1061
1062sub filename
1063{
1064   my $self = shift;
1065
1066   $self->{'file'}{'filename'};         # ooh yuck -- poking into File object
1067}
1068
10691;
1070
1071=back
1072
1073=head1 SEE ALSO
1074
1075L<Text::BibTeX>, L<Text::BibTeX::File>, L<Text::BibTeX::Structure>
1076
1077=head1 AUTHOR
1078
1079Greg Ward <gward@python.net>
1080
1081=head1 COPYRIGHT
1082
1083Copyright (c) 1997-2000 by Gregory P. Ward.  All rights reserved.  This file
1084is part of the Text::BibTeX library.  This library is free software; you
1085may redistribute it and/or modify it under the same terms as Perl itself.
1086
1087=cut
1088