1package Biber;
2use v5.16;
3use strict;
4use warnings;
5use base 'Biber::Internals';
6
7use constant {
8  EXIT_OK => 0,
9  EXIT_ERROR => 2
10};
11
12use Carp;
13use Encode;
14use File::Copy;
15use File::Spec;
16use File::Temp;
17use IO::File;
18use POSIX qw( locale_h ); # for sorting with built-in "sort"
19use Biber::Config;
20use Biber::Constants;
21use List::AllUtils qw( first uniq max );
22use Biber::DataModel;
23use Biber::Internals;
24use Biber::Entries;
25use Biber::Entry;
26use Biber::Entry::Name;
27use Biber::Sections;
28use Biber::Section;
29use Biber::LaTeX::Recode;
30use Biber::SortLists;
31use Biber::SortList;
32use Biber::Utils;
33use Log::Log4perl qw( :no_extra_logdie_message );
34use Data::Dump;
35use Data::Compare;
36use Text::BibTeX qw(:macrosubs);
37use Unicode::Normalize;
38
39=encoding utf-8
40
41=head1 NAME
42
43Biber - main module for biber, a bibtex replacement for users of biblatex
44
45=cut
46
47my $logger = Log::Log4perl::get_logger('main');
48
49
50=head1 SYNOPSIS
51
52    use Biber;
53
54    my $biber = Biber->new();
55    $biber->parse_ctrlfile("example.bcf");
56    $biber->prepare;
57
58=cut
59
60our $MASTER; # reference to biber object. Needed all over the place
61
62=head1 METHODS
63
64=head2 new
65
66    Initialize the Biber object, optionally passing named options as arguments.
67
68=cut
69
70sub new {
71  my ($class, %opts) = @_;
72  my $self = bless {}, $class;
73
74  Biber::Config->_initopts(\%opts);
75
76  # Add a reference to a global temp dir we might use for various things
77  $self->{TEMPDIR} = File::Temp->newdir();
78
79  # Initialise recoding schemes
80  Biber::LaTeX::Recode->init_sets(Biber::Config->getoption('decodecharsset'),
81                                  Biber::Config->getoption('output_safecharsset'));
82
83  $MASTER = $self;
84
85  # Validate if asked to
86  # This has to be here, after config file is read and options
87  # are parsed. It seems strange to validate the config file after it's been
88  # read but there is no choice and it's useful anyway as this will catch some semantic
89  # errors. Uses biber_error() and so $MASTER has to be defined before we call this
90  if (Biber::Config->getoption('validate_config') and $opts{configfile}) {
91    validate_biber_xml($opts{configfile}, 'config', '');
92  }
93
94  return $self;
95}
96
97
98=head2 display_problems
99
100   Output summary of warnings/errors before exit
101
102=cut
103
104sub display_problems {
105  my $self = shift;
106  if ($self->{warnings}) {
107    $logger->info('WARNINGS: ' . $self->{warnings});
108  }
109  if ($self->{errors}) {
110    $logger->info('ERRORS: ' . $self->{errors});
111    exit EXIT_ERROR;
112  }
113}
114
115=head2 biber_tempdir
116
117    my $sections= $biber->biber_tempdir
118
119    Returns a File::Temp directory object for use in various things
120
121=cut
122
123sub biber_tempdir {
124  my $self = shift;
125  return $self->{TEMPDIR};
126}
127
128
129=head2 sections
130
131    my $sections= $biber->sections
132
133    Returns a Biber::Sections object describing the bibliography sections
134
135=cut
136
137sub sections {
138  my $self = shift;
139  return $self->{sections};
140}
141
142=head2 add_sections
143
144    Adds a Biber::Sections object. Used externally from, e.g. biber
145
146=cut
147
148sub add_sections {
149  my ($self, $sections) = @_;
150  $self->{sections} = $sections;
151  return;
152}
153
154=head2 sortlists
155
156    my $sortlists= $biber->sortlists
157
158    Returns a Biber::SortLists object describing the bibliography sorting lists
159
160=cut
161
162sub sortlists {
163  my $self = shift;
164  return $self->{sortlists};
165}
166
167
168
169=head2 set_output_obj
170
171    Sets the object used to output final results
172    Must be a subclass of Biber::Output::base
173
174=cut
175
176sub set_output_obj {
177  my $self = shift;
178  my $obj = shift;
179  croak('Output object must be subclass of Biber::Output::base!') unless $obj->isa('Biber::Output::base');
180  $self->{output_obj} = $obj;
181  return;
182}
183
184
185=head2 get_preamble
186
187    Returns the current preamble as an array ref
188
189=cut
190
191sub get_preamble {
192  my $self = shift;
193  return $self->{preamble};
194}
195
196
197=head2 get_output_obj
198
199    Returns the object used to output final results
200
201=cut
202
203sub get_output_obj {
204  my $self = shift;
205  return $self->{output_obj};
206}
207
208=head2 set_current_section
209
210    Sets the current section number that we are working on to a section number
211
212=cut
213
214sub set_current_section {
215  my $self = shift;
216  my $secnum = shift;
217  $self->{current_section} = $secnum;
218  return;
219}
220
221=head2 get_current_section
222
223    Gets the current section number that we are working on
224
225=cut
226
227sub get_current_section {
228  my $self = shift;
229  return $self->{current_section};
230}
231
232=head2 tool_mode_setup
233
234  Fakes parts of the control file for tool mode
235
236=cut
237
238sub tool_mode_setup {
239  my $self = shift;
240  my $bib_sections = new Biber::Sections;
241  # There are no sections in tool mode so create a pseudo-section
242  my $bib_section = new Biber::Section('number' => 99999);
243  $bib_section->set_datasources([{type => 'file',
244                                  name => $ARGV[0],
245                                  datatype => Biber::Config->getoption('input_format')}]);
246  $bib_section->set_allkeys(1);
247  $bib_sections->add_section($bib_section);
248
249  # Add the Biber::Sections object to the Biber object
250  $self->add_sections($bib_sections);
251
252  my $sortlists = new Biber::SortLists;
253  my $seclist = Biber::SortList->new(section => 99999, sortschemename => Biber::Config->getblxoption('sortscheme'), name => Biber::Config->getblxoption('sortscheme'));
254  $seclist->set_type('entry');
255  $seclist->set_sortscheme(Biber::Config->getblxoption('sorting'));
256  # Locale just needs a default here - there is no biblatex option to take it from
257  Biber::Config->setblxoption('sortlocale', 'en_US');
258  $logger->debug("Adding 'entry' list 'tool' for pseudo-section 99999");
259  $sortlists->add_list($seclist);
260  $self->{sortlists} = $sortlists;
261
262  # User maps are set in config file and need some massaging which normally
263  # happens in parse_ctrlfile
264  if (my $usms = Biber::Config->getoption('sourcemap')) {
265    # Force "user" level for the maps
266    @$usms = map {$_->{level} = 'user';$_} @$usms;
267  }
268  return;
269}
270
271=head2 parse_ctrlfile
272
273    This method reads the control file
274    generated by biblatex to work out the various biblatex options.
275    See Constants.pm for defaults and example of the data structure being built here.
276
277=cut
278
279sub parse_ctrlfile {
280  my ($self, $ctrl_file) = @_;
281
282  my $ctrl_file_path = locate_biber_file($ctrl_file);
283  Biber::Config->set_ctrlfile_path($ctrl_file_path);
284
285  biber_error("Cannot find control file '$ctrl_file'! - did you pass the \"backend=biber\" option to BibLaTeX?") unless ($ctrl_file_path and -e $ctrl_file_path);
286
287  # Validate if asked to
288  if (Biber::Config->getoption('validate_control')) {
289    validate_biber_xml($ctrl_file_path, 'bcf', 'https://sourceforge.net/projects/biblatex');
290  }
291
292  # Convert .bcf to .html using XSLT transform if asked to
293  if (Biber::Config->getoption('convert_control')) {
294
295    require XML::LibXSLT;
296    require XML::LibXML;
297
298    my $xslt = XML::LibXSLT->new();
299    my $CFstyle;
300
301    # we assume that the schema files are in the same dir as Biber.pm:
302    (my $vol, my $biber_path, undef) = File::Spec->splitpath( $INC{"Biber.pm"} );
303
304    # Deal with the strange world of Par::Packer paths
305    # We might be running inside a PAR executable and @INC is a bit odd in this case
306    # Specifically, "Biber.pm" in @INC might resolve to an internal jumbled name
307    # nowhere near to these files. You know what I mean if you've dealt with pp
308    my $bcf_xsl;
309    if ($biber_path =~ m|/par\-| and $biber_path !~ m|/inc|) { # a mangled PAR @INC path
310      $bcf_xsl = File::Spec->catpath($vol, "$biber_path/inc/lib/Biber", 'bcf.xsl');
311    }
312    else {
313      $bcf_xsl = File::Spec->catpath($vol, "$biber_path/Biber", 'bcf.xsl');
314    }
315
316    if (-e $bcf_xsl) {
317      $CFstyle = XML::LibXML->load_xml( location => $bcf_xsl, no_cdata=>1 )
318    }
319    else {
320      biber_warn("Cannot find XML::LibXSLT stylesheet. Skipping conversion : $!");
321      goto LOADCF;
322    }
323
324    my $CF = XML::LibXML->load_xml(location => $ctrl_file_path);
325    my $stylesheet = $xslt->parse_stylesheet($CFstyle);
326    my $CFhtml = $stylesheet->transform($CF);
327    $stylesheet->output_file($CFhtml, $ctrl_file_path . '.html');
328    $logger->info("Converted BibLaTeX control file '$ctrl_file_path' to '$ctrl_file_path.html'");
329  }
330
331  # Open control file
332 LOADCF:
333  $logger->info("Reading '$ctrl_file_path'");
334  my $buf = File::Slurp::read_file($ctrl_file_path) or biber_error("Cannot open $ctrl_file_path: $!");
335  $buf = NFD(decode('UTF-8', $buf));# Unicode NFD boundary
336
337  # Read control file
338  require XML::LibXML::Simple;
339
340  my $bcfxml = XML::LibXML::Simple::XMLin($buf,
341                                          'ForceContent' => 1,
342                                          'ForceArray' => [
343                                                           qr/\Acitekey\z/,
344                                                           qr/\Aoption\z/,
345                                                           qr/\Aoptions\z/,
346                                                           qr/\Avalue\z/,
347                                                           qr/\Asortitem\z/,
348                                                           qr/\Abibdata\z/,
349                                                           qr/\Adatasource\z/,
350                                                           qr/\Asection\z/,
351                                                           qr/\Asortexclusion\z/,
352                                                           qr/\Aexclusion\z/,
353                                                           qr/\Asort\z/,
354                                                           qr/\Amode\z/,
355                                                           qr/\Amaps\z/,
356                                                           qr/\Amap\z/,
357                                                           qr/\Amap_step\z/,
358                                                           qr/\Aper_type\z/,
359                                                           qr/\Aper_nottype\z/,
360                                                           qr/\Aper_datasource\z/,
361                                                           qr/\Anosort\z/,
362                                                           qr/\Anoinit\z/,
363                                                           qr/\Apresort\z/,
364                                                           qr/\Atype_pair\z/,
365                                                           qr/\Ainherit\z/,
366                                                           qr/\Afieldor\z/,
367                                                           qr/\Afieldxor\z/,
368                                                           qr/\Afield\z/,
369                                                           qr/\Aalias\z/,
370                                                           qr/\Aalsoset\z/,
371                                                           qr/\Aconstraints\z/,
372                                                           qr/\Aconstraint\z/,
373                                                           qr/\Aentrytype\z/,
374                                                           qr/\Adatetype\z/,
375                                                           qr/\Asortlist\z/,
376                                                           qr/\Alabel(?:part|element|alphatemplate)\z/,
377                                                           qr/\Acondition\z/,
378                                                           qr/\A(?:or)?filter\z/,
379                                                           qr/\Aoptionscope\z/,
380                                                          ],
381                                          'NsStrip' => 1,
382                                          'KeyAttr' => []);
383#  use Data::Dump;dd($bcfxml);exit 0;
384  my $controlversion = $bcfxml->{version};
385  Biber::Config->setblxoption('controlversion', $controlversion);
386  unless ($controlversion eq $BCF_VERSION) {
387    biber_warn("Warning: Found biblatex control file version $controlversion, expected version $BCF_VERSION");
388  }
389
390  # Look at control file and populate our main data structure with its information
391
392  # Option scope
393  foreach my $bcfscopeopts (@{$bcfxml->{optionscope}}) {
394    my $type = $bcfscopeopts->{type};
395    foreach my $bcfscopeopt (@{$bcfscopeopts->{option}}) {
396      $CONFIG_SCOPE_BIBLATEX{$bcfscopeopt->{content}}{$type} = 1;
397    }
398  }
399
400  # OPTIONS
401  foreach my $bcfopts (@{$bcfxml->{options}}) {
402
403    # Biber options
404    if ($bcfopts->{component} eq 'biber') {
405
406      # Global options
407      if ($bcfopts->{type} eq 'global') {
408        foreach my $bcfopt (@{$bcfopts->{option}}) {
409          # unless already explicitly set from cmdline/config file
410          unless (Biber::Config->isexplicitoption($bcfopt->{key}{content})) {
411            if ($bcfopt->{type} eq 'singlevalued') {
412              Biber::Config->setoption($bcfopt->{key}{content}, $bcfopt->{value}[0]{content});
413            }
414            elsif ($bcfopt->{type} eq 'multivalued') {
415              Biber::Config->setoption($bcfopt->{key}{content},
416                [ map {$_->{content}} sort {$a->{order} <=> $b->{order}} @{$bcfopt->{value}} ]);
417            }
418          }
419        }
420      }
421    }
422
423    # BibLaTeX options
424    if ($bcfopts->{component} eq 'biblatex') {
425
426      # Global options
427      if ($bcfopts->{type} eq 'global') {
428        foreach my $bcfopt (@{$bcfopts->{option}}) {
429          if ($bcfopt->{type} eq 'singlevalued') {
430            Biber::Config->setblxoption($bcfopt->{key}{content}, $bcfopt->{value}[0]{content});
431          }
432          elsif ($bcfopt->{type} eq 'multivalued') {
433            # sort on order attribute and then remove it
434            Biber::Config->setblxoption($bcfopt->{key}{content},
435              [ map {delete($_->{order}); $_} sort {$a->{order} <=> $b->{order}} @{$bcfopt->{value}} ]);
436          }
437        }
438      }
439
440      # Entrytype options
441      else {
442        my $entrytype = $bcfopts->{type};
443        foreach my $bcfopt (@{$bcfopts->{option}}) {
444          if ($bcfopt->{type} eq 'singlevalued') {
445            Biber::Config->setblxoption($bcfopt->{key}{content}, $bcfopt->{value}[0]{content}, 'PER_TYPE', $entrytype);
446          }
447          elsif ($bcfopt->{type} eq 'multivalued') {
448            # sort on order attribute and then remove it
449            Biber::Config->setblxoption($bcfopt->{key}{content},
450              [ map {delete($_->{order}); $_} sort {$a->{order} <=> $b->{order}} @{$bcfopt->{value}} ],
451              'PER_TYPE',
452              $entrytype);
453          }
454        }
455      }
456    }
457  }
458
459  # DATASOURCE MAPPING
460  # This is special as it's both a biblatex option and a biber option
461  # We merge into the biber option
462  # In biblatex you can set driver mappings but not in biber
463  # Order of application of maps is decided by the level and within 'user' level,
464  # which can come from two places (biber.conf and \DeclareSourcemap), order is
465  # \DeclareSourcemap, then biber.conf
466  if (exists($bcfxml->{sourcemap})) {
467    # User maps are set in config file
468    if (my $usms = Biber::Config->getoption('sourcemap')) {
469      # Force "user" level for the maps
470      @$usms = map {$_->{level} = 'user';$_} @$usms;
471
472      # Merge any user maps from the document set by \DeclareSourcemap into user
473      # maps set in the biber config file. These document user maps take precedence so go
474      # at the front of any other user maps
475      unshift(@$usms, grep {$_->{level} eq 'user'} @{$bcfxml->{sourcemap}{maps}});
476
477      # Merge the driver/style maps with the user maps from the config file
478      if (my @m = grep {$_->{level} eq 'driver' or
479                        $_->{level} eq 'style'} @{$bcfxml->{sourcemap}{maps}} ) {
480        Biber::Config->setoption('sourcemap', [@$usms, @m]);
481      }
482      else { # no driver defaults, just override the config file user map settings
483        Biber::Config->setoption('sourcemap', $bcfxml->{sourcemap}{maps});
484      }
485    }
486    else { # just write the option as there are no config file settings at all
487      Biber::Config->setoption('sourcemap', $bcfxml->{sourcemap}{maps});
488    }
489  }
490
491  # LABELALPHA TEMPLATE
492  foreach my $t (@{$bcfxml->{labelalphatemplate}}) {
493    my $latype = $t->{type};
494    if ($latype eq 'global') {
495      Biber::Config->setblxoption('labelalphatemplate', $t);
496    }
497    else {
498      Biber::Config->setblxoption('labelalphatemplate',
499                                  $t,
500                                  'PER_TYPE',
501                                  $latype);
502    }
503  }
504
505  # INHERITANCE schemes for crossreferences (always global)
506  Biber::Config->setblxoption('inheritance', $bcfxml->{inheritance});
507
508  # NOINIT
509  # Make the data structure look like the biber config file structure
510  # "value" is forced to arrays for other elements so we extract
511  # the first element here as they will always be only length=1
512  my $noinit;
513  foreach my $ni (@{$bcfxml->{noinits}{noinit}}) {
514    push @$noinit, { value => $ni->{value}[0]};
515  }
516  # There is a default so don't set this option if nothing is in the .bcf
517  Biber::Config->setoption('noinit', $noinit) if $noinit;
518
519  # NOSORT
520  # Make the data structure look like the biber config file structure
521  # "field" and "value" are forced to arrays for other elements so we extract
522  # the first element here as they will always be only length=1
523  my $nosort;
524  foreach my $ns (@{$bcfxml->{nosorts}{nosort}}) {
525    push @$nosort, { name => $ns->{field}[0], value => $ns->{value}[0]};
526  }
527  # There is a default so don't set this option if nothing is in the .bcf
528  Biber::Config->setoption('nosort', $nosort) if $nosort;
529
530  # SORTING
531
532  # sorting excludes
533  foreach my $sex (@{$bcfxml->{sorting}{sortexclusion}}) {
534    my $excludes;
535    foreach my $ex (@{$sex->{exclusion}}) {
536      $excludes->{$ex->{content}} = 1;
537    }
538    Biber::Config->setblxoption('sortexclusion',
539                                $excludes,
540                                'PER_TYPE',
541                                $sex->{type});
542  }
543
544  # presort defaults
545  foreach my $presort (@{$bcfxml->{sorting}{presort}}) {
546    # Global presort default
547    unless (exists($presort->{type})) {
548      Biber::Config->setblxoption('presort', $presort->{content});
549    }
550    # Per-type default
551    else {
552      Biber::Config->setblxoption('presort',
553                                  $presort->{content},
554                                  'PER_TYPE',
555                                  $presort->{type});
556    }
557  }
558
559  my $sorting = _parse_sort($bcfxml->{sorting});
560
561  Biber::Config->setblxoption('sorting', $sorting);
562
563  # DATAMODEL schema (always global)
564  Biber::Config->setblxoption('datamodel', $bcfxml->{datamodel});
565
566  # SECTIONS
567  # This is also where we set data files as these are associated with a bib section
568
569  # Data sources
570  my %bibdatasources = ();
571  foreach my $data (@{$bcfxml->{bibdata}}) {
572    foreach my $datasource (@{$data->{datasource}}) {
573      unless (first {$_->{type} eq $datasource->{type} and
574             $_->{datatype} eq $datasource->{datatype} and
575               $_->{name} eq $datasource->{content}} @{$bibdatasources{$data->{section}[0]}}) {
576        push @{$bibdatasources{$data->{section}[0]}}, { type     => $datasource->{type},
577                                                        name     => $datasource->{content},
578                                                        datatype => $datasource->{datatype} };
579      }
580    }
581  }
582
583  # Be friendly to latexmk etc.
584  unless (%bibdatasources) {
585    biber_warn("No data sources defined!");
586    exit EXIT_OK;
587  }
588
589  my $key_flag = 0;
590  my $bib_sections = new Biber::Sections;
591
592SECTION: foreach my $section (@{$bcfxml->{section}}) {
593    my $bib_section;
594    my $secnum = $section->{number};
595    # Can be multiple section 0 entries and so re-use that section object if it exists
596    if (my $existing_section = $bib_sections->get_section($secnum)) {
597      $bib_section = $existing_section;
598    }
599    else {
600      $bib_section = new Biber::Section('number' => $secnum);
601    }
602
603    # Set the data files for the section unless we've already done so
604    # (for example, for multiple section 0 entries)
605    $bib_section->set_datasources($bibdatasources{$secnum}) unless
606      $bib_section->get_datasources;
607
608    my @keys = ();
609    foreach my $keyc (@{$section->{citekey}}) {
610      my $key = NFD($keyc->{content});# Key is already UTF-8 - it comes from UTF-8 XML
611      # Stop reading citekeys if we encounter "*" as a citation as this means
612      # "all keys"
613      if ($key eq '*') {
614        $bib_section->set_allkeys(1);
615        $key_flag = 1; # There is at least one key, used for error reporting below
616      }
617      elsif (not Biber::Config->get_seenkey($key, $secnum)) {
618        # Dynamic set definition
619        # Save dynamic key -> member keys mapping for set entry auto creation later
620        # We still need to find these even if allkeys is set
621        if (exists($keyc->{type}) and $keyc->{type} eq 'set') {
622          $bib_section->set_dynamic_set($key, split /\s*,\s*/, $keyc->{members});
623          push @keys, $key;
624          $key_flag = 1; # There is at least one key, used for error reporting below
625        }
626        else {
627          next if $bib_section->is_allkeys; # Skip if we have already encountered '*'
628          # Set order information - there is no order on dynamic key defs above
629          # as they are a definition, not a cite
630          Biber::Config->set_keyorder($secnum, $key, $keyc->{order});
631          push @keys, $key;
632          $key_flag = 1; # There is at least one key, used for error reporting below
633          Biber::Config->incr_seenkey($key, $secnum);
634        }
635      }
636    }
637
638    if ($bib_section->is_allkeys) {
639      # Normalise - when allkeys is true don't need citekeys - just in case someone
640      # lists "*" and also some other citekeys
641      $bib_section->del_citekeys;
642      $logger->info("Using all citekeys in bib section " . $secnum);
643    }
644    else {
645      $logger->info('Found ', $#keys+1 , " citekeys in bib section $secnum");
646    }
647
648    if (Biber::Config->getoption('debug')) {
649      unless ($bib_section->is_allkeys) {
650        $logger->debug("The citekeys for section $secnum are: ", join(', ', sort @keys), "\n");
651      }
652    }
653
654    $bib_section->add_citekeys(@keys) unless $bib_section->is_allkeys;
655    $bib_sections->add_section($bib_section);
656  }
657
658  # Add the Biber::Sections object to the Biber object
659  $self->{sections} = $bib_sections;
660
661  # Read sortlists
662  my $sortlists = new Biber::SortLists;
663
664  foreach my $list (@{$bcfxml->{sortlist}}) {
665    my $ltype  = $list->{type};
666    my $lssn = $list->{sortscheme};
667    my $lname = $list->{name};
668
669    my $lsection = $list->{section}[0]; # because "section" needs to be a list elsewhere in XML
670    if (my $l = $sortlists->get_list($lsection, $lname, $ltype, $lssn)) {
671      $logger->debug("Section sortlist '$lname' of type '$ltype' with sortscheme '$lssn' is repeated for section $lsection - ignoring");
672      next;
673    }
674
675    my $seclist = Biber::SortList->new(section => $lsection, sortschemename => $lssn, name => $lname);
676    $seclist->set_type($ltype || 'entry'); # lists are entry lists by default
677    $seclist->set_name($lname || $lssn); # name is only relevelant for "list" type, default to ss
678    foreach my $filter (@{$list->{filter}}) {
679      $seclist->add_filter($filter->{type}, $filter->{content});
680    }
681    # disjunctive filters
682    foreach my $orfilter (@{$list->{orfilter}}) {
683      $seclist->add_filter('orfilter', { map {$_->{type} => [$_->{content}]} @{$orfilter->{filter}} });
684    }
685
686    if (my $sorting = $list->{sorting}) { # can be undef for fallback to global sorting
687      $seclist->set_sortscheme(_parse_sort($sorting));
688    }
689    else {
690      $seclist->set_sortscheme(Biber::Config->getblxoption('sorting'));
691    }
692    $logger->debug("Adding sortlist of type '$ltype' with sortscheme '$lssn' and name '$lname' for section $lsection");
693    $sortlists->add_list($seclist);
694  }
695
696  # Check to make sure that each section has an entry sortlist for global sorting
697  # We have to make sure in case sortcites is used which uses the global order.
698  foreach my $section (@{$bcfxml->{section}}) {
699    my $globalss = Biber::Config->getblxoption('sortscheme');
700    my $secnum = $section->{number};
701    unless ($sortlists->get_list($secnum, $globalss, 'entry', $globalss)) {
702      my $seclist = Biber::SortList->new(section => $secnum, type => 'entry', sortschemename => $globalss, name => $globalss);
703      $seclist->set_sortscheme(Biber::Config->getblxoption('sorting'));
704      $sortlists->add_list($seclist);
705    }
706  }
707
708  # Add the Biber::SortLists object to the Biber object
709  $self->{sortlists} = $sortlists;
710
711  # Warn if there are no citations in any section
712  unless ($key_flag) {
713    biber_warn("The file '$ctrl_file_path' does not contain any citations!");
714  }
715
716  # Normalise any UTF-8 encoding string immediately to exactly what we want
717  # We want the strict perl utf8 "UTF-8"
718  normalise_utf8();
719
720  # bibtex output when not in tool mode, is essentially entering tool mode but
721  # without allkeys. We are not in tool mode if we are here. We fake tool mode
722  # and then add a special section which contains all cited keys from all sections
723  if (Biber::Config->getoption('output_format') eq 'bibtex') {
724    Biber::Config->setoption('tool' ,1);
725    Biber::Config->setoption('pseudo_tool' ,1);
726
727    my $bib_section = new Biber::Section('number' => 99999);
728
729    foreach my $section (@{$self->sections->get_sections}) {
730      if ($section->is_allkeys) {
731        $bib_section->set_allkeys(1);
732      }
733      else {
734        $bib_section->add_citekeys($section->get_citekeys);
735      }
736      foreach my $ds (@{$section->get_datasources}) {
737        $bib_section->add_datasource($ds);
738      }
739    }
740
741    $self->sections->add_section($bib_section);
742
743    # Global sorting in non tool mode bibtex output is citeorder so override the .bcf here
744    Biber::Config->setblxoption('sortscheme', 'none');
745    # Global locale in non tool mode bibtex output is default
746    Biber::Config->setblxoption('sortlocale', 'english');
747
748    my $seclist = Biber::SortList->new(section => 99999, sortschemename => Biber::Config->getblxoption('sortscheme'), name => Biber::Config->getblxoption('sortscheme'));
749    $seclist->set_type('entry');
750    # bibtex output in non-tool mode is just citeorder
751    $seclist->set_sortscheme({locale => locale2bcp47(Biber::Config->getblxoption('sortlocale')),
752                              spec   =>
753                             [
754                              [
755                               {},
756                               {'citeorder'    => {}}
757                              ]
758                             ]});
759    $logger->debug("Adding 'entry' list 'none' for pseudo-section 99999");
760    $self->{sortlists}->add_list($seclist);
761  }
762
763  return;
764}
765
766
767=head2 process_setup
768
769   Place to put misc pre-processing things needed later
770
771=cut
772
773sub process_setup {
774  my $self = shift;
775
776  # Make sure there is a default entry list with global sorting for each refsection
777  # Needed in case someone cites entries which are included in no
778  # bibliography as this results in no entry list in the .bcf
779  foreach my $section (@{$self->sections->get_sections}) {
780    my $secnum = $section->number;
781    unless ($self->sortlists->has_lists_of_type_for_section($secnum, 'entry')) {
782      my $dlist = Biber::SortList->new(sortschemename => Biber::Config->getblxoption('sortscheme'), name => Biber::Config->getblxoption('sortscheme'));
783      $dlist->set_sortscheme(Biber::Config->getblxoption('sorting'));
784      $dlist->set_type('entry');
785      $dlist->set_section($secnum);
786      $self->sortlists->add_list($dlist);
787    }
788  }
789
790  # Break data model information up into more processing-friendly formats
791  # for use in verification checks later
792  # This has to be here as opposed to in parse_ctrlfile() so that it can pick
793  # up user config dm settings
794  Biber::Config->set_dm(Biber::DataModel->new(Biber::Config->getblxoption('datamodel')));
795
796  # Force output_safechars flag if output to ASCII and input_encoding is not ASCII
797  if (Biber::Config->getoption('output_encoding') =~ /(?:x-)?ascii/xmsi and
798      Biber::Config->getoption('input_encoding') !~ /(?:x-)?ascii/xmsi) {
799    Biber::Config->setoption('output_safechars', 1);
800  }
801}
802
803=head2 process_setup_tool
804
805   Place to put misc pre-processing things needed later for tool mode
806
807=cut
808
809sub process_setup_tool {
810  my $self = shift;
811
812  Biber::Config->set_dm(Biber::DataModel->new(Biber::Config->getblxoption('datamodel')));
813
814  # Force output_safechars flag if output to ASCII and input_encoding is not ASCII
815  if (Biber::Config->getoption('output_encoding') =~ /(?:x-)?ascii/xmsi and
816      Biber::Config->getoption('input_encoding') !~ /(?:x-)?ascii/xmsi) {
817    Biber::Config->setoption('output_safechars', 1);
818  }
819}
820
821
822=head2 resolve_alias_refs
823
824  Resolve aliases in xref/crossref/xdata which take keys as values to their real keys
825
826  We use set_datafield as we are overriding the alias in the datasource
827
828=cut
829
830sub resolve_alias_refs {
831  my $self = shift;
832  my $secnum = $self->get_current_section;
833  my $section = $self->sections->get_section($secnum);
834  foreach my $citekey ($section->get_citekeys) {
835    my $be = $section->bibentry($citekey);
836
837    # XREF
838    if (my $refkey = $be->get_field('xref')) {
839      if (my $realkey = $section->get_citekey_alias($refkey)) {
840        $be->set_datafield('xref', $realkey);
841      }
842    }
843    # CROSSREF
844    if (my $refkey = $be->get_field('crossref')) {
845      if (my $realkey = $section->get_citekey_alias($refkey)) {
846        $be->set_datafield('crossref', $realkey);
847      }
848    }
849    # XDATA
850    if (my $xdata = $be->get_field('xdata')) {
851      my $resolved_keys;
852      foreach my $refkey (@$xdata) {
853        $refkey = $section->get_citekey_alias($refkey) // $refkey;
854        push @$resolved_keys, $refkey;
855      }
856      $be->set_datafield('xdata', $resolved_keys);
857    }
858  }
859}
860
861=head2 process_citekey_aliases
862
863 Remove citekey aliases from citekeys as they don't point to real
864 entries.
865
866=cut
867
868sub process_citekey_aliases {
869  my $self = shift;
870  my $secnum = $self->get_current_section;
871  my $section = $self->sections->get_section($secnum);
872  foreach my $citekey ($section->get_citekeys) {
873    if ($section->get_citekey_alias($citekey)) {
874      $logger->debug("Pruning citekey alias '$citekey' from citekeys");
875      $section->del_citekey($citekey);
876    }
877  }
878}
879
880=head2 nullable_check
881
882  Check entries for nullable fields
883
884=cut
885
886sub nullable_check {
887  my $self = shift;
888  my $secnum = $self->get_current_section;
889  my $section = $self->sections->get_section($secnum);
890  my $dm = Biber::Config->get_dm;
891  foreach my $citekey ($section->get_citekeys) {
892    my $be = $section->bibentry($citekey);
893    my $bee = $be->get_field('entrytype');
894    foreach my $f ($be->datafields) {
895      if (is_null($be->get_datafield($f))) {
896        unless ($dm->field_is_nullok($f)) {
897          biber_warn("The field '$f' in entry '$citekey' cannot be null, deleting it");
898          $be->del_field($f);
899        }
900      }
901    }
902  }
903}
904
905
906=head2 instantiate_dynamic
907
908    This instantiates any dynamic entries so that they are available
909    for processing later on. This has to be done before most all other
910    processing so that when we call $section->bibentry($key), as we
911    do many times in the code, we don't die because there is a key but
912    no Entry object.
913
914=cut
915
916sub instantiate_dynamic {
917  my $self = shift;
918  my $secnum = $self->get_current_section;
919  my $section = $self->sections->get_section($secnum);
920
921  $logger->debug("Creating dynamic entries (sets/related) for section $secnum");
922
923  # Instantiate any dynamic set entries before we do anything else
924  foreach my $dset (@{$section->dynamic_set_keys}) {
925    my @members = $section->get_dynamic_set($dset);
926
927    # Resolve any aliases in the members
928    my @realmems;
929    foreach my $mem (@members) {
930      push @realmems, $section->get_citekey_alias($mem) // $mem;
931    }
932    @members = @realmems;
933    $section->set_dynamic_set($dset, @realmems);
934
935    my $be = new Biber::Entry;
936    $be->set_field('entrytype', 'set');
937    $be->set_field('entryset', [ @members ]);
938    $be->set_field('citekey', $dset);
939    $be->set_field('datatype', 'dynamic');
940    $section->bibentries->add_entry($dset, $be);
941    $logger->debug("Created dynamic set entry '$dset' in section $secnum");
942
943    # Save graph information if requested
944    if (Biber::Config->getoption('output_format') eq 'dot') {
945      foreach my $m (@members) {
946        Biber::Config->set_graph('set', $dset, $m);
947      }
948    }
949    # Setting dataonly for members is handled by process_sets()
950  }
951
952  # Instantiate any related entry clones we need
953  foreach my $citekey ($section->get_citekeys) {
954    my $be = $section->bibentry($citekey);
955    $be->relclone;
956  }
957  return;
958}
959
960=head2 resolve_xdata
961
962    Resolve xdata entries
963
964=cut
965
966sub resolve_xdata {
967  my $self = shift;
968  my $secnum = $self->get_current_section;
969  my $section = $self->sections->get_section($secnum);
970  $logger->debug("Resolving XDATA entries for section $secnum");
971
972  # We are not looping over citekeys here as XDATA entries are not cited.
973  # They may have been added to the section as entries, however.
974  foreach my $be ($section->bibentries->entries) {
975    # Don't directly resolve XDATA entrytypes - this is done recursively in the Entry method
976    # Otherwise, we will die on loops etc. for XDATA entries which are never referenced from
977    # any cited entry
978    next if $be->get_field('entrytype') eq 'xdata';
979    next unless my $xdata = $be->get_field('xdata');
980    $be->resolve_xdata($xdata);
981  }
982}
983
984
985=head2 cite_setmembers
986
987    Promotes set member to cited status
988
989=cut
990
991sub cite_setmembers {
992  my $self = shift;
993  my $secnum = $self->get_current_section;
994  my $section = $self->sections->get_section($secnum);
995
996  $logger->debug("Adding set members to citekeys for section $secnum");
997
998  foreach my $citekey ($section->get_citekeys) {
999    my $be = $section->bibentry($citekey);
1000
1001    # promote indirectly cited inset set members to fully cited entries
1002    if ($be->get_field('entrytype') eq 'set' and
1003        $be->get_field('entryset')) {
1004      my $inset_keys = $be->get_field('entryset');
1005
1006      my $realmems;
1007      foreach my $mem (@$inset_keys) {
1008        push @$realmems, $section->get_citekey_alias($mem) // $mem;
1009      }
1010      $inset_keys = $realmems;
1011      $be->set_datafield('entryset', $inset_keys);
1012
1013      foreach my $inset_key (@$inset_keys) {
1014        $logger->debug("Adding set member '$inset_key' to the citekeys (section $secnum)");
1015        $section->add_citekeys($inset_key);
1016
1017        # Save graph information if requested
1018        if (Biber::Config->getoption('output_format') eq 'dot') {
1019          Biber::Config->set_graph('set', $citekey, $inset_key);
1020        }
1021      }
1022      # automatically crossref for the first set member using plain set inheritance
1023      $be->set_inherit_from($section->bibentry($inset_keys->[0]), $section);
1024      # warning for the old pre-Biber way of doing things
1025      if ($be->get_field('crossref')) {
1026        biber_warn("Field 'crossref' is no longer needed in set entries in Biber - ignoring in entry '$citekey'", $be);
1027        $be->del_field('crossref');
1028      }
1029    }
1030  }
1031}
1032
1033=head2 process_interentry
1034
1035    $biber->process_interentry
1036
1037    This does several things:
1038    1. Records the set information for use later
1039    2. Ensures proper inheritance of data from cross-references.
1040    3. Ensures that crossrefs/xrefs that are directly cited or cross-referenced
1041       at least mincrossrefs times are included in the bibliography.
1042
1043=cut
1044
1045sub process_interentry {
1046  my $self = shift;
1047  my $secnum = $self->get_current_section;
1048  my $section = $self->sections->get_section($secnum);
1049
1050  $logger->debug("Processing explicit and implicit crossrefs for section $secnum");
1051
1052  foreach my $citekey ($section->get_citekeys) {
1053    my $be = $section->bibentry($citekey);
1054    my $refkey;
1055
1056    # Record set information
1057    # It's best to do this in the loop here as every entry needs the information
1058    # from all other entries in process_sets()
1059    if ($be->get_field('entrytype') eq 'set') {
1060      my $entrysetkeys = $be->get_field('entryset');
1061      foreach my $member (@$entrysetkeys) {
1062        Biber::Config->set_set_pc($citekey, $member);
1063        Biber::Config->set_set_cp($member, $citekey);
1064      }
1065    }
1066
1067    # Loop over cited keys and count the cross/xrefs
1068    # Can't do this when parsing entries as this would count them
1069    # for potentially uncited children
1070    if ($refkey = $be->get_field('xref') or $refkey = $be->get_field('crossref')) {
1071      $logger->debug("Incrementing cross/xrefkey count for entry '$refkey' via entry '$citekey'");
1072      Biber::Config->incr_crossrefkey($refkey);
1073    }
1074
1075    # Record xref inheritance for graphing if required
1076    if (Biber::Config->getoption('output_format') eq 'dot' and my $xref = $be->get_field('xref')) {
1077      Biber::Config->set_graph('xref', $citekey, $xref);
1078    }
1079
1080    # Do crossref inheritance
1081    if (my $cr = $be->get_field('crossref')) {
1082      # Skip inheritance if we've already done it
1083      next if Biber::Config->get_inheritance('crossref', $cr, $be->get_field('citekey'));
1084
1085      my $parent = $section->bibentry($cr);
1086      $logger->debug("Entry $citekey inheriting fields from parent $cr");
1087      unless ($parent) {
1088        biber_warn("Cannot inherit from crossref key '$cr' - does it exist?", $be);
1089      }
1090      else {
1091        $be->inherit_from($parent);
1092      }
1093    }
1094  }
1095
1096  # We make sure that crossrefs that are directly cited or cross-referenced
1097  # at least $mincrossrefs times are included in the bibliography.
1098  foreach my $k ( @{Biber::Config->get_crossrefkeys} ) {
1099    # If parent has been crossref'ed more than mincrossref times, upgrade it
1100    # to cited crossref status and add it to the citekeys list
1101    if (Biber::Config->get_crossrefkey($k) >= Biber::Config->getoption('mincrossrefs')) {
1102      $logger->debug("cross/xref key '$k' is cross/xref'ed >= mincrossrefs, adding to citekeys");
1103      $section->add_citekeys($k);
1104    }
1105  }
1106}
1107
1108=head2 validate_datamodel
1109
1110  Validate bib data according to a datamodel
1111  Note that we are validating the internal Biber::Entries
1112  after they have been created from the datasources so this is
1113  datasource neutral, as it should be. It is here to enforce
1114  adherence to what biblatex expects.
1115
1116=cut
1117
1118sub validate_datamodel {
1119  my $self = shift;
1120  my $secnum = $self->get_current_section;
1121  my $section = $self->sections->get_section($secnum);
1122  my $dm = Biber::Config->get_dm;
1123
1124  if (Biber::Config->getoption('validate_datamodel')) {
1125    foreach my $citekey ($section->get_citekeys) {
1126      my $be = $section->bibentry($citekey);
1127      my $citekey = $be->get_field('citekey');
1128      my $et = $be->get_field('entrytype');
1129      my $ds = $section->get_keytods($citekey);
1130
1131      # default entrytype to MISC type if not a known type
1132      unless ($dm->is_entrytype($et)) {
1133        biber_warn("Datamodel: Entry '$citekey' ($ds): Invalid entry type '" . $be->get_field('entrytype') . "' - defaulting to 'misc'", $be);
1134        $be->set_field('entrytype', 'misc');
1135        $et = 'misc';           # reset this too
1136      }
1137
1138      # Are all fields valid fields?
1139      # Each field must be:
1140      # * Valid because it's allowed for "ALL" entrytypes OR
1141      # * Valid field for the specific entrytype OR
1142      # * Valid because entrytype allows "ALL" fields
1143      foreach my $ef ($be->datafields) {
1144        unless ($dm->is_field_for_entrytype($et, $ef)) {
1145          biber_warn("Datamodel: Entry '$citekey' ($ds): Invalid field '$ef' for entrytype '$et'", $be);
1146        }
1147      }
1148
1149      # Mandatory constraints
1150      foreach my $warning ($dm->check_mandatory_constraints($be)) {
1151        biber_warn($warning, $be);
1152      }
1153
1154      # Conditional constraints
1155      foreach my $warning ($dm->check_conditional_constraints($be)) {
1156        biber_warn($warning, $be);
1157      }
1158
1159      # Data constraints
1160      foreach my $warning ($dm->check_data_constraints($be)) {
1161        biber_warn($warning, $be);
1162      }
1163    }
1164  }
1165}
1166
1167=head2 process_entries_pre
1168
1169    Main processing operations, to generate metadata and entry information
1170    This method is automatically called by C<prepare>.
1171    Here we generate the "namehash" and the strings for
1172    "labelname", "labelyear", "labelalpha", "sortstrings", etc.
1173    Runs prior to uniqueness processing
1174
1175=cut
1176
1177sub process_entries_pre {
1178  my $self = shift;
1179  my $secnum = $self->get_current_section;
1180  my $section = $self->sections->get_section($secnum);
1181  foreach my $citekey ( $section->get_citekeys ) {
1182    $logger->debug("Postprocessing entry '$citekey' from section $secnum (before uniqueness)");
1183
1184    # process set entries
1185    $self->process_sets($citekey);
1186
1187    # generate labelname name
1188    $self->process_labelname($citekey);
1189
1190    # generate labeldate name
1191    $self->process_labeldate($citekey);
1192
1193    # generate labeltitle name
1194    $self->process_labeltitle($citekey);
1195
1196    # generate fullhash
1197    $self->process_fullhash($citekey);
1198
1199    # push entry-specific presort fields into the presort state
1200    $self->process_presort($citekey);
1201
1202  }
1203
1204  $logger->debug("Finished processing entries in section $secnum (before uniqueness)");
1205
1206  return;
1207}
1208
1209=head2 process_entries_post
1210
1211    More processing operations, to generate things which require uniqueness
1212    information like namehash
1213    Runs after uniqueness processing
1214
1215=cut
1216
1217sub process_entries_post {
1218  my $self = shift;
1219  my $secnum = $self->get_current_section;
1220  my $section = $self->sections->get_section($secnum);
1221  foreach my $citekey ( $section->get_citekeys ) {
1222    $logger->debug("Postprocessing entry '$citekey' from section $secnum (after uniqueness)");
1223
1224    # generate labelalpha information
1225    $self->process_labelalpha($citekey);
1226
1227    # generate information for tracking extraalpha
1228    $self->process_extraalpha($citekey);
1229
1230    # generate information for tracking extrayear
1231    $self->process_extrayear($citekey);
1232
1233    # generate information for tracking extratitle
1234    $self->process_extratitle($citekey);
1235
1236    # generate information for tracking extratitleyear
1237    $self->process_extratitleyear($citekey);
1238
1239    # generate information for tracking singletitle
1240    $self->process_singletitle($citekey);
1241
1242    # generate namehash
1243    $self->process_namehash($citekey);
1244
1245    # generate per-name hashes
1246    $self->process_pername_hashes($citekey);
1247
1248  }
1249
1250  $logger->debug("Finished processing entries in section $secnum (after uniqueness)");
1251
1252  return;
1253}
1254
1255
1256=head2 process_singletitle
1257
1258    Track seen work combination for generation of singletitle
1259
1260=cut
1261
1262sub process_singletitle {
1263  my $self = shift;
1264  my $citekey = shift;
1265  my $secnum = $self->get_current_section;
1266  my $section = $self->sections->get_section($secnum);
1267  my $be = $section->bibentry($citekey);
1268  my $bee = $be->get_field('entrytype');
1269  $logger->trace("Creating singletitle information for '$citekey'");
1270
1271  # Use labelname to generate this, if there is one ...
1272  my $identifier;
1273  if (my $lni = $be->get_labelname_info) {
1274    $identifier = $self->_getnamehash_u($citekey, $be->get_field($lni));
1275  }
1276  # ... otherwise use labeltitle
1277  elsif (my $lti = $be->get_labeltitle_info) {
1278    $identifier = $be->get_field($lti);
1279  }
1280
1281  # Don't generate this information for entries with no labelname or labeltitle
1282  # as it would make no sense
1283  if ($identifier and Biber::Config->getblxoption('singletitle', $bee)) {
1284    Biber::Config->incr_seenwork($identifier);
1285    $logger->trace("Setting seenwork for '$citekey' to '$identifier'");
1286    $be->set_field('seenwork', $identifier);
1287  }
1288  return;
1289}
1290
1291
1292
1293=head2 process_extrayear
1294
1295    Track labelname/year combination for generation of extrayear
1296
1297=cut
1298
1299sub process_extrayear {
1300  my $self = shift;
1301  my $citekey = shift;
1302  my $secnum = $self->get_current_section;
1303  my $section = $self->sections->get_section($secnum);
1304  my $be = $section->bibentry($citekey);
1305  my $bee = $be->get_field('entrytype');
1306
1307  # Generate labelname/year combination for tracking extrayear
1308  # * If there is no labelname to use, use empty string
1309  # * If there is no labelyear to use:
1310  #   * If there is no pubstate to use, use empty string otherwise use pubstate key
1311  # * Don't increment the seen_nameyear count if either name or year string is empty
1312  #   (see code in incr_seen_nameyear method).
1313  # * Don't increment if skiplab is set
1314
1315  if (Biber::Config->getblxoption('labeldate', $bee)) {
1316    if (Biber::Config->getblxoption('skiplab', $bee, $citekey)) {
1317      return;
1318    }
1319
1320    $logger->trace("Creating extrayear information for '$citekey'");
1321
1322    my $name_string = '';
1323    if (my $lni = $be->get_labelname_info) {
1324      $name_string = $self->_getnamehash_u($citekey, $be->get_field($lni));
1325    }
1326
1327    # extrayear takes into account the labelyear which can be a range
1328    my $year_string = $be->get_field('labelyear') || $be->get_field('year') || '';
1329
1330    my $nameyear_string = "$name_string,$year_string";
1331    $logger->trace("Setting nameyear to '$nameyear_string' for entry '$citekey'");
1332    $be->set_field('nameyear', $nameyear_string);
1333    $logger->trace("Incrementing nameyear for '$name_string'");
1334    Biber::Config->incr_seen_nameyear($name_string, $year_string);
1335  }
1336
1337  return;
1338}
1339
1340=head2 process_extratitle
1341
1342    Track labelname/labeltitle combination for generation of extratitle
1343
1344=cut
1345
1346sub process_extratitle {
1347  my $self = shift;
1348  my $citekey = shift;
1349  my $secnum = $self->get_current_section;
1350  my $section = $self->sections->get_section($secnum);
1351  my $be = $section->bibentry($citekey);
1352  my $bee = $be->get_field('entrytype');
1353
1354  # Generate labelname/labeltitle combination for tracking extratitle
1355  # * If there is no labelname to use, use empty string
1356  # * If there is no labeltitle to use, use empty string
1357  # * Don't increment if skiplab is set
1358
1359  # This is different from extrayear in that we do track the information
1360  # if the labelname is empty as titles are much more unique than years
1361
1362  if (Biber::Config->getblxoption('labeltitle', $bee)) {
1363    if (Biber::Config->getblxoption('skiplab', $bee, $citekey)) {
1364      return;
1365    }
1366
1367    $logger->trace("Creating extratitle information for '$citekey'");
1368
1369    my $name_string = '';
1370    if (my $lni = $be->get_labelname_info) {
1371      $name_string = $self->_getnamehash_u($citekey, $be->get_field($lni));
1372    }
1373
1374    my $lti = $be->get_labeltitle_info;
1375    my $title_string = $be->get_field($lti) // '';
1376
1377    my $nametitle_string = "$name_string,$title_string";
1378    $logger->trace("Setting nametitle to '$nametitle_string' for entry '$citekey'");
1379    $be->set_field('nametitle', $nametitle_string);
1380    $logger->trace("Incrementing nametitle for '$name_string'");
1381    Biber::Config->incr_seen_nametitle($name_string, $title_string);
1382  }
1383
1384  return;
1385}
1386
1387=head2 process_extratitleyear
1388
1389    Track labeltitle/labelyear combination for generation of extratitleyear
1390
1391=cut
1392
1393sub process_extratitleyear {
1394  my $self = shift;
1395  my $citekey = shift;
1396  my $secnum = $self->get_current_section;
1397  my $section = $self->sections->get_section($secnum);
1398  my $be = $section->bibentry($citekey);
1399  my $bee = $be->get_field('entrytype');
1400
1401  # Generate labeltitle/labelyear combination for tracking extratitleyear
1402  # * If there is no labeltitle to use, use empty string
1403  # * If there is no labelyear to use, use empty string
1404  # * Don't increment the seen_titleyear count if the labeltitle field is empty
1405  #   (see code in incr_seen_titleyear method).
1406  # * Don't increment if skiplab is set
1407
1408  if (Biber::Config->getblxoption('labeltitleyear', $bee)) {
1409    if (Biber::Config->getblxoption('skiplab', $bee, $citekey)) {
1410      return;
1411    }
1412
1413    $logger->trace("Creating extratitleyear information for '$citekey'");
1414
1415    my $lti = $be->get_labeltitle_info;
1416    my $title_string = $be->get_field($lti) // '';
1417
1418    # Takes into account the labelyear which can be a range
1419    my $year_string = $be->get_field('labelyear') || $be->get_field('year') || '';
1420
1421    my $titleyear_string = "$title_string,$year_string";
1422    $logger->trace("Setting titleyear to '$titleyear_string' for entry '$citekey'");
1423    $be->set_field('titleyear', $titleyear_string);
1424    $logger->trace("Incrementing titleyear for '$title_string'");
1425    Biber::Config->incr_seen_titleyear($title_string, $year_string);
1426  }
1427
1428  return;
1429}
1430
1431
1432=head2 process_sets
1433
1434    Postprocess set entries
1435
1436    Checks for common set errors and enforces 'dataonly' for set members
1437
1438=cut
1439
1440sub process_sets {
1441  my $self = shift;
1442  my $citekey = shift;
1443  my $secnum = $self->get_current_section;
1444  my $section = $self->sections->get_section($secnum);
1445  my $be = $section->bibentry($citekey);
1446  if (my @entrysetkeys = Biber::Config->get_set_children($citekey)) {
1447    # Enforce Biber parts of virtual "dataonly" for set members
1448    # Also automatically create an "entryset" field for the members
1449    foreach my $member (@entrysetkeys) {
1450      process_entry_options($member, [ 'skiplab', 'skipbiblist', 'uniquename=0', 'uniquelist=0' ]);
1451
1452      my $me = $section->bibentry($member);
1453      if ($me->get_field('entryset')) {
1454        biber_warn("Field 'entryset' is no longer needed in set member entries in Biber - ignoring in entry '$member'", $me);
1455        $me->del_field('entryset');
1456      }
1457      # This ends up setting \inset{} in the bbl
1458      $me->set_field('entryset', [ $citekey ]);
1459    }
1460
1461    unless (@entrysetkeys) {
1462      biber_warn("No entryset found for entry $citekey of type 'set'", $be);
1463    }
1464  }
1465  # Also set this here for any non-set keys which are in a set and which haven't
1466  # had skips set by being seen as a member of that set yet
1467  else {
1468    if (Biber::Config->get_set_parents($citekey)) {
1469      process_entry_options($citekey, [ 'skiplab', 'skipbiblist', 'uniquename=0', 'uniquelist=0' ]);
1470    }
1471  }
1472}
1473
1474=head2 process_labelname
1475
1476    Generate labelname information.
1477
1478=cut
1479
1480sub process_labelname {
1481  my $self = shift;
1482  my $citekey = shift;
1483  my $secnum = $self->get_current_section;
1484  my $section = $self->sections->get_section($secnum);
1485  my $be = $section->bibentry($citekey);
1486  my $bee = $be->get_field('entrytype');
1487  my $lnamespec = Biber::Config->getblxoption('labelnamespec', $bee);
1488  my $dm = Biber::Config->get_dm;
1489
1490  # First we set the normal labelname name
1491  foreach my $h_ln ( @$lnamespec ) {
1492    my $lnameopt;
1493    my $ln = $h_ln->{content};
1494    if ( $ln =~ /\Ashort(\X+)\z/xms ) {
1495      $lnameopt = $1;
1496    }
1497    else {
1498      $lnameopt = $ln;
1499    }
1500
1501    unless (first {$ln eq $_} @{$dm->get_fields_of_type('list', 'name')}) {
1502      biber_warn("Labelname candidate '$ln' is not a name field - skipping");
1503      next;
1504    }
1505
1506    # If there is a biblatex option which controls the use of this labelname info, check it
1507    if ($CONFIG_SCOPE_BIBLATEX{"use$lnameopt"} and
1508       not Biber::Config->getblxoption("use$lnameopt", $bee, $citekey)) {
1509      next;
1510    }
1511
1512    if ($be->get_field($ln)) {
1513      $be->set_labelname_info($ln);
1514      last;
1515    }
1516  }
1517
1518  # Then we loop again to set the labelname name for the fullhash generation code
1519  # This is because fullhash generation ignores SHORT* fields (section 4.2.4.1, BibLaTeX
1520  # manual)
1521  foreach my $h_ln ( @$lnamespec ) {
1522    my $ln = $h_ln->{content};
1523    if ( $ln =~ /\Ashort(.+)\z/xms ) {
1524      next;
1525    }
1526
1527    # We have already warned about this above
1528    unless (first {$ln eq $_} @{$dm->get_fields_of_type('list', 'name')}) {
1529      next;
1530    }
1531
1532    # If there is a biblatex option which controls the use of this labelname info, check it
1533    if ($CONFIG_SCOPE_BIBLATEX{"use$ln"} and
1534       not Biber::Config->getblxoption("use$ln", $bee, $citekey)) {
1535      next;
1536    }
1537
1538    if ($be->get_field($ln)) {
1539      $be->set_labelnamefh_info($ln);
1540      last;
1541    }
1542  }
1543
1544  unless ($be->get_labelname_info) {
1545    $logger->debug("Could not determine the labelname source of entry $citekey");
1546  }
1547}
1548
1549=head2 process_labeldate
1550
1551    Generate labeldate information
1552
1553=cut
1554
1555sub process_labeldate {
1556  my $self = shift;
1557  my $citekey = shift;
1558  my $secnum = $self->get_current_section;
1559  my $section = $self->sections->get_section($secnum);
1560  my $be = $section->bibentry($citekey);
1561  my $bee = $be->get_field('entrytype');
1562  my $dm = Biber::Config->get_dm;
1563
1564  if (Biber::Config->getblxoption('labeldate', $bee)) {
1565    if (Biber::Config->getblxoption('skiplab', $bee, $citekey)) {
1566      return;
1567    }
1568
1569    my $pseudodate;
1570    my $ldatespec = Biber::Config->getblxoption('labeldatespec', $bee);
1571    foreach my $h_ly (@$ldatespec) {
1572      my $ly = $h_ly->{content};
1573      if ($h_ly->{'type'} eq 'field') { # labeldate field
1574        my $ldy;
1575        my $ldm;
1576        my $ldd;
1577        my $datetype;
1578        if ($dm->field_is_datatype('date', $ly)) { # resolve dates
1579          $datetype = $ly =~ s/date\z//xmsr;
1580          $ldy = $datetype . 'year';
1581          $ldm = $datetype . 'month';
1582          $ldd = $datetype . 'day';
1583        }
1584        else {
1585          $ldy = $ly; # labelyear can be a non-date field so make a pseudo-year
1586          $pseudodate = 1;
1587        }
1588        if ($be->get_field($ldy)) { # did we find a labeldate?
1589          # set source to field or date field prefix for a real date field
1590          $be->set_labeldate_info({'field' => { 'year'  => $ldy,
1591                                                'month' => $ldm,
1592                                                'day'   => $ldd,
1593                                                'source' => $pseudodate ? $ldy : $datetype }});
1594          last;
1595        }
1596      }
1597      elsif ($h_ly->{'type'} eq 'string') { # labelyear fallback string
1598        $be->set_labeldate_info({'string' => $ly});
1599        last;
1600      }
1601    }
1602
1603    # Construct labelyear, labelmonth, labelday
1604    # Might not have been set due to skiplab/dataonly
1605    if (my $ldi = $be->get_labeldate_info) {
1606      if (my $df = $ldi->{field}) { # set labelyear to a field value
1607        $be->set_field('labelyear', $be->get_field($df->{year}));
1608        $be->set_field('labelmonth', $be->get_field($df->{month})) if $df->{month};
1609        $be->set_field('labelday', $be->get_field($df->{day})) if $df->{day};
1610        $be->set_field('datelabelsource', $df->{source});
1611        # ignore endyear if it's the same as year
1612        my ($ytype) = $df->{year} =~ /\A(\X*)year\z/xms;
1613        $ytype = $ytype // ''; # Avoid undef warnings since no match above can make it undef
1614        # endyear can be null which makes labelyear different to plain year
1615        if ($be->field_exists($ytype . 'endyear')
1616            and ($be->get_field($df->{year}) ne $be->get_field($ytype . 'endyear'))) {
1617          $be->set_field('labelyear',
1618                         $be->get_field('labelyear') . '\bibdatedash ' . $be->get_field($ytype . 'endyear'));
1619        }
1620        # pseudodates (field which are not really dates per se) are just years
1621        if (not $pseudodate and
1622            $be->get_field($ytype . 'endmonth')
1623            and ($be->get_field($df->{month}) ne $be->get_field($ytype . 'endmonth'))) {
1624          $be->set_field('labelmonth',
1625                         $be->get_field('labelmonth') . '\bibdatedash ' . $be->get_field($ytype . 'endmonth'));
1626        }
1627        # pseudodates (field which are not really dates per se) are just years
1628        if (not $pseudodate and
1629            $be->get_field($ytype . 'endday')
1630            and ($be->get_field($df->{day}) ne $be->get_field($ytype . 'endday'))) {
1631          $be->set_field('labelday',
1632                         $be->get_field('labelday') . '\bibdatedash ' . $be->get_field($ytype . 'endday'));
1633        }
1634      }
1635      elsif (my $ys = $ldi->{string}) { # set labelyear to a fallback string
1636        $be->set_field('labelyear', $ys);
1637      }
1638    }
1639    else {
1640      $logger->debug("labeldate information of entry $citekey is unset");
1641    }
1642  }
1643}
1644
1645=head2 process_labeltitle
1646
1647  Generate labeltitle
1648
1649  Note that this is not conditionalised on the biblatex "labeltitle"
1650  as labeltitle should always be output since all standard styles need it.
1651  Only extratitle is conditionalised on the biblatex "labeltitle" option.
1652
1653=cut
1654
1655
1656sub process_labeltitle {
1657  my $self = shift;
1658  my $citekey = shift;
1659  my $secnum = $self->get_current_section;
1660  my $section = $self->sections->get_section($secnum);
1661  my $be = $section->bibentry($citekey);
1662  my $bee = $be->get_field('entrytype');
1663
1664  my $ltitlespec = Biber::Config->getblxoption('labeltitlespec', $bee);
1665
1666  foreach my $h_ltn (@$ltitlespec) {
1667    my $ltn = $h_ltn->{content};
1668    if (my $lt = $be->get_field($ltn)) {
1669      $be->set_labeltitle_info($ltn);
1670      $be->set_field('labeltitle', $lt);
1671      last;
1672    }
1673    $logger->debug("labeltitle information of entry $citekey is unset");
1674  }
1675}
1676
1677=head2 process_fullhash
1678
1679    Generate fullhash
1680
1681=cut
1682
1683sub process_fullhash {
1684  my $self = shift;
1685  my $citekey = shift;
1686  my $secnum = $self->get_current_section;
1687  my $section = $self->sections->get_section($secnum);
1688  my $be = $section->bibentry($citekey);
1689
1690  # fullhash is generated from the labelname but ignores SHORT* fields and
1691  # max/mincitenames settings
1692  if (my $lnfhi = $be->get_labelnamefh_info) {
1693    if (my $lnfh = $be->get_field($lnfhi)) {
1694      $be->set_field('fullhash', $self->_getfullhash($citekey, $lnfh));
1695    }
1696  }
1697
1698  return;
1699}
1700
1701=head2 process_namehash
1702
1703    Generate namehash
1704
1705=cut
1706
1707
1708sub process_namehash {
1709  my $self = shift;
1710  my $citekey = shift;
1711  my $secnum = $self->get_current_section;
1712  my $section = $self->sections->get_section($secnum);
1713  my $be = $section->bibentry($citekey);
1714
1715  # namehash is generated from the labelname
1716  if (my $lni = $be->get_labelname_info) {
1717    if (my $ln = $be->get_field($lni)) {
1718      $be->set_field('namehash', $self->_getnamehash($citekey, $ln));
1719    }
1720  }
1721
1722  return;
1723}
1724
1725
1726=head2 process_pername_hashes
1727
1728    Generate per_name_hashes
1729
1730=cut
1731
1732sub process_pername_hashes {
1733  my $self = shift;
1734  my $citekey = shift;
1735  my $secnum = $self->get_current_section;
1736  my $section = $self->sections->get_section($secnum);
1737  my $be = $section->bibentry($citekey);
1738  my $bee = $be->get_field('entrytype');
1739  my $dm = Biber::Config->get_dm;
1740
1741  foreach my $pn (@{$dm->get_fields_of_type('list', 'name')}) {
1742    next unless my $names = $be->get_field($pn);
1743    foreach my $n (@{$names->names}) {
1744      $n->set_hash($self->_genpnhash($citekey, $n));
1745    }
1746  }
1747  return;
1748}
1749
1750
1751=head2 process_visible_names
1752
1753    Generate the visible name information.
1754    This is used in various places and it is useful to have it generated in one place.
1755
1756=cut
1757
1758sub process_visible_names {
1759  my $self = shift;
1760  my $secnum = $self->get_current_section;
1761  my $section = $self->sections->get_section($secnum);
1762  my $dm = Biber::Config->get_dm;
1763
1764  foreach my $citekey ( $section->get_citekeys ) {
1765    $logger->debug("Postprocessing visible names for key '$citekey'");
1766    my $be = $section->bibentry($citekey);
1767    my $bee = $be->get_field('entrytype');
1768
1769    my $maxcn = Biber::Config->getblxoption('maxcitenames', $bee, $citekey);
1770    my $mincn = Biber::Config->getblxoption('mincitenames', $bee, $citekey);
1771    my $maxbn = Biber::Config->getblxoption('maxbibnames', $bee, $citekey);
1772    my $minbn = Biber::Config->getblxoption('minbibnames', $bee, $citekey);
1773    my $maxan = Biber::Config->getblxoption('maxalphanames', $bee, $citekey);
1774    my $minan = Biber::Config->getblxoption('minalphanames', $bee, $citekey);
1775
1776    foreach my $n (@{$dm->get_fields_of_type('list', 'name')}) {
1777      next unless my $names = $be->get_field($n);
1778
1779      my $count = $names->count_names;
1780      my $visible_names_cite;
1781      my $visible_names_bib;
1782      my $visible_names_alpha;
1783
1784      # Cap min*names for this entry at $count. Why? Because imagine we have this:
1785      #
1786      # John Smith and Bill Jones
1787      #
1788      # and mincitenames=3. Then visibility will be set to 3 but there aren't 3 names to
1789      # get information from so looping over the visibility count would cause name methods
1790      # to operate on undef at index 3 and die
1791      my $l_mincn = $count < $mincn ? $count : $mincn;
1792      my $l_minbn = $count < $minbn ? $count : $minbn;
1793      my $l_minan = $count < $minan ? $count : $minan;
1794
1795      # If name list was truncated in bib with "and others", this overrides maxcitenames
1796      my $morenames = $names->get_morenames ? 1 : 0;
1797
1798      # max/minalphanames doesn't care about uniquelist - labels are just labels
1799      if ( $morenames or $count > $maxan ) {
1800        $visible_names_alpha = $l_minan;
1801      }
1802      else {
1803        $visible_names_alpha = $count;
1804      }
1805
1806      # max/mincitenames
1807      if ( $morenames or $count > $maxcn ) {
1808        # Visibiliy to the uniquelist point if uniquelist is requested
1809        # We know at this stage that if uniquelist is set, there are more than maxcitenames
1810        # names. We also know that uniquelist > mincitenames because it is a further disambiguation
1811        # on top of mincitenames so can't be less as you can't disambiguate by losing information
1812        $visible_names_cite = $names->get_uniquelist // $l_mincn;
1813      }
1814      else { # visibility is simply the full list
1815        $visible_names_cite = $count;
1816      }
1817
1818      # max/minbibnames
1819      if ( $morenames or $count > $maxbn ) {
1820        # Visibiliy to the uniquelist point if uniquelist is requested
1821        # We know at this stage that if uniquelist is set, there are more than maxbibnames
1822        # names. We also know that uniquelist > mincitenames because it is a further disambiguation
1823        # on top of mincitenames so can't be less as you can't disambiguate by losing information
1824        $visible_names_bib = $names->get_uniquelist // $l_minbn;
1825      }
1826      else { # visibility is simply the full list
1827        $visible_names_bib = $count;
1828      }
1829
1830      $logger->trace("Setting visible names (cite) for key '$citekey' to '$visible_names_cite'");
1831      $logger->trace("Setting visible names (bib) for key '$citekey' to '$visible_names_bib'");
1832      $logger->trace("Setting visible names (alpha) for key '$citekey' to '$visible_names_alpha'");
1833      # Need to set these on all name forms
1834      my $ns = $be->get_field($n);
1835      $ns->set_visible_cite($visible_names_cite);
1836      $ns->set_visible_bib($visible_names_bib);
1837      $ns->set_visible_alpha($visible_names_alpha);
1838    }
1839  }
1840}
1841
1842
1843=head2 process_labelalpha
1844
1845    Generate the labelalpha and also the variant for sorting
1846
1847=cut
1848
1849sub process_labelalpha {
1850  my $self = shift;
1851  my $citekey = shift;
1852  my $secnum = $self->get_current_section;
1853  my $section = $self->sections->get_section($secnum);
1854  my $be = $section->bibentry($citekey);
1855  my $bee = $be->get_field('entrytype');
1856  # Don't add a label if skiplab is set for entry
1857  if (Biber::Config->getblxoption('skiplab', $bee, $citekey)) {
1858    return;
1859  }
1860  if ( my $la = Biber::Config->getblxoption('labelalpha', $be->get_field('entrytype')) ) {
1861    my $label;
1862    my $sortlabel;
1863    ( $label, $sortlabel ) = @{ $self->_genlabel($citekey) };
1864    $be->set_field('labelalpha', $label);
1865    $be->set_field('sortlabelalpha', $sortlabel);
1866  }
1867}
1868
1869=head2 process_extraalpha
1870
1871    Generate the extraalpha information
1872
1873=cut
1874
1875sub process_extraalpha {
1876  my $self = shift;
1877  my $citekey = shift;
1878  my $secnum = $self->get_current_section;
1879  my $section = $self->sections->get_section($secnum);
1880  my $be = $section->bibentry($citekey);
1881  my $bee = $be->get_field('entrytype');
1882  if (Biber::Config->getblxoption('labelalpha', $be->get_field('entrytype'))) {
1883    if (my $la = $be->get_field('labelalpha')) {
1884      Biber::Config->incr_la_disambiguation($la);
1885    }
1886  }
1887}
1888
1889
1890
1891=head2 process_presort
1892
1893    Put presort fields for an entry into the main Biber bltx state
1894    so that it is all available in the same place since this can be
1895    set per-type and globally too.
1896
1897=cut
1898
1899sub process_presort {
1900  my $self = shift;
1901  my $citekey = shift;
1902  my $secnum = $self->get_current_section;
1903  my $section = $self->sections->get_section($secnum);
1904  my $be = $section->bibentry($citekey);
1905  # We are treating presort as an option as it can be set per-type and globally too
1906  if (my $ps = $be->get_field('presort')) {
1907    Biber::Config->setblxoption('presort', $ps, 'PER_ENTRY', $citekey);
1908  }
1909}
1910
1911=head2 process_lists
1912
1913    Sort and filter lists for a section
1914
1915=cut
1916
1917sub process_lists {
1918  my $self = shift;
1919  my $secnum = $self->get_current_section;
1920  my $section = $self->sections->get_section($secnum);
1921  foreach my $list (@{$self->sortlists->get_lists_for_section($secnum)}) {
1922    my $lssn = $list->get_sortschemename;
1923    my $ltype = $list->get_type;
1924    my $lname = $list->get_name;
1925    # Last-ditch fallback in case we still don't have a sorting spec
1926    $list->set_sortscheme(Biber::Config->getblxoption('sorting')) unless $list->get_sortscheme;
1927
1928    $list->set_keys([ $section->get_citekeys ]);
1929    $logger->debug("Populated sortlist '$lname' of type '$ltype' with sortscheme '$lssn' in section $secnum with keys: " . join(', ', $list->get_keys));
1930
1931    # Now we check the sorting cache to see if we already have results
1932    # for this scheme since sorting is computationally expensive.
1933    # We know the keys are the same as we just set them
1934    # to a copy of the section citekeys above. If the scheme is the same
1935    # as a previous sort then the results have to also be the same so inherit
1936    # the results which are normally set by sorting:
1937    #
1938    # * sorted keys
1939    # * sortinit data
1940    # * extra* data
1941
1942    my $cache_flag = 0;
1943    $logger->debug("Checking sorting cache for scheme '$lssn'");
1944    foreach my $cacheitem (@{$section->get_sort_cache}) {
1945      if (Compare($list->get_sortscheme, $cacheitem->[0])) {
1946        $logger->debug("Found sorting cache entry for scheme '$lssn'");
1947        $logger->trace("Sorting list cache for scheme '$lssn':\n-------------------\n" . Data::Dump::pp($list->get_sortscheme) . "\n-------------------\n");
1948        $list->set_keys($cacheitem->[1]);
1949        $list->set_sortinitdata($cacheitem->[2]);
1950        $list->set_extrayeardata($cacheitem->[3]);
1951        $list->set_extraalphadata($cacheitem->[4]);
1952        $cache_flag = 1;
1953        last;
1954      }
1955    }
1956
1957    unless ($cache_flag) {
1958      $logger->debug("No sorting cache entry for scheme '$lssn'");
1959      # Sorting
1960      $self->generate_sortinfo($list);       # generate the sort information
1961      $self->sort_list($list);               # sort the list
1962      $self->generate_extra($list) unless Biber::Config->getoption('tool'); # generate the extra* fields
1963
1964      # Cache the results
1965      $logger->debug("Adding sorting cache entry for scheme '$lssn'");
1966      $section->add_sort_cache($list->get_listdata);
1967    }
1968
1969    # Filtering
1970    # This is not really used - filtering is more efficient to do on the biblatex
1971    # side since we are filtering after sorting anyway. It is used to provide
1972    # a field=shorthand filter for type=shorthand lists though.
1973    if (my $filters = $list->get_filters) {
1974      my $flist = [];
1975KEYLOOP: foreach my $k ($list->get_keys) {
1976        # Filter out skipbiblist entries as a special case in 'shorthand' type lists
1977        if ($list->get_type eq 'list') {
1978          next if Biber::Config->getblxoption('skipbiblist', $section->bibentry($k)->get_field('entrytype'), $k);
1979        }
1980
1981        $logger->debug("Checking key '$k' in list '$lname' against list filters");
1982        my $be = $section->bibentry($k);
1983        foreach my $t (keys %$filters) {
1984          my $fs = $filters->{$t};
1985          # Filter disjunction is ok if any of the checks are ok, hence the grep()
1986          if ($t eq 'orfilter') {
1987            next KEYLOOP unless grep {check_list_filter($k, $_, $fs->{$_}, $be)} keys %$fs;
1988          }
1989          else {
1990            next KEYLOOP unless check_list_filter($k, $t, $fs, $be);
1991          }
1992        }
1993        push @$flist, $k;
1994      }
1995      $logger->debug("Keys after filtering list '$lname' in section $secnum: " . join(', ', @$flist));
1996      $list->set_keys($flist); # Now save the sorted list in the list object
1997    }
1998  }
1999  return;
2000}
2001
2002
2003=head2 check_list_filter
2004
2005    Run an entry through a list filter. Returns a boolean.
2006
2007=cut
2008
2009sub check_list_filter {
2010  my ($k, $t, $fs, $be) = @_;
2011  $logger->debug("Checking key '$k' against filter '$t=" . join(',', @$fs) . "'");
2012  if ($t eq 'type') {
2013    return 0 unless grep {$be->get_field('entrytype') eq $_} @$fs;
2014  }
2015  elsif ($t eq 'nottype') {
2016    return 0 if grep {$be->get_field('entrytype') eq $_} @$fs;
2017  }
2018  elsif ($t eq 'subtype') {
2019    return 0 unless grep {$be->field_exists('entrysubtype') and
2020                                $be->get_field('entrysubtype') eq $_} @$fs;
2021  }
2022  elsif ($t eq 'notsubtype') {
2023    return 0 if grep {$be->field_exists('entrysubtype') and
2024                            $be->get_field('entrysubtype') eq $_} @$fs;
2025  }
2026  elsif ($t eq 'keyword') {
2027    return 0 unless grep {$be->has_keyword($_)} @$fs;
2028  }
2029  elsif ($t eq 'notkeyword') {
2030    return 0 if grep {$be->has_keyword($_)} @$fs;
2031  }
2032  elsif ($t eq 'field') {
2033    return 0 unless grep {$be->field_exists($_)} @$fs;
2034  }
2035  elsif ($t eq 'notfield') {
2036    return 0 if grep {$be->field_exists($_)} @$fs;
2037  }
2038  return 1;
2039}
2040
2041=head2 generate_sortinfo
2042
2043    Generate information for sorting
2044
2045=cut
2046
2047sub generate_sortinfo {
2048  my $self = shift;
2049  my $list = shift;
2050
2051  my $sortscheme = $list->get_sortscheme;
2052  my $secnum = $self->get_current_section;
2053  my $section = $self->sections->get_section($secnum);
2054  foreach my $key ($list->get_keys) {
2055    $self->_generatesortinfo($key, $list, $sortscheme);
2056  }
2057  return;
2058}
2059
2060=head2 uniqueness
2061
2062    Generate the uniqueness information needed when creating .bbl
2063
2064=cut
2065
2066sub uniqueness {
2067  my $self = shift;
2068  my $secnum = $self->get_current_section;
2069  my $section = $self->sections->get_section($secnum);
2070  # Generate uniqueness information according to this algorithm:
2071  # 1. Generate uniquename if uniquename option is set
2072  # 2. if (uniquelist has never run before OR step 1 changed any uniquename values) {
2073  #      goto step 3
2074  #    } else { return }
2075  # 3. Completely regenerate uniquelist if uniquelist option is set
2076  # 4. if (step 3 changed any uniquelist values) {
2077  #      goto step 1
2078  #    } else { return }
2079
2080  # uniquelist can never shorten to a list shorter than maxcitenames because:
2081  # * Shortening a list can't make it unique
2082  # * You can't lengthen it if the list is shorter than maxcitenames because there
2083  #   is no more information to add that you don't already have.
2084  # uniquelist cannot be less than mincitenames as the list is either unambiguous
2085  # at mincitenames or it isn't and uniquelist needs more information by adding items
2086
2087  # Set a flag for first uniquelist pass. This is a special case as we always want to run
2088  # at least one uniquelist pass if requested, regardless of unul_done global flag.
2089  my $first_ul_pass = 1;
2090
2091  # Generate uniquename information, if requested
2092  while ('true') {
2093    unless (Biber::Config->get_unul_done) {
2094      Biber::Config->set_unul_changed(0); # reset state for global unul changed flag
2095      $self->create_uniquename_info;
2096      $self->generate_uniquename;
2097    }
2098    else {
2099      last; # uniquename/uniquelist disambiguation is finished as nothing changed
2100    }
2101    # Generate uniquelist information, if requested
2102    # Always run uniquelist at least once, if requested
2103    if ($first_ul_pass or not Biber::Config->get_unul_done) {
2104      Biber::Config->set_unul_changed(0); # reset state for global unul changed flag
2105      $first_ul_pass = 0; # Ignore special case when uniquelist has run once
2106      $self->create_uniquelist_info;
2107      $self->generate_uniquelist;
2108    }
2109    else {
2110      last; # uniquename/uniquelist disambiguation is finished as nothing changed
2111    }
2112  }
2113  return;
2114}
2115
2116
2117=head2 create_uniquename_info
2118
2119    Gather the uniquename information as we look through the names
2120
2121    What is happening in here is the following:
2122    We are registering the number of occurences of each name, name+init and fullname
2123    within a specific context. For example, the context is "global" with uniquename < 5
2124    and "name list" for uniquename=5 or 6. The keys we store to count this are the most specific
2125    information for the context, so, for uniquename < 5, this is the full name and for
2126    uniquename=5 or 6, this is the complete list of full names. These keys have values in a hash
2127    which are ignored. They serve only to accumulate repeated occurences with the context
2128    and we don't care about this and so the values are a useful sinkhole for such repetition.
2129
2130    For example, if we find in the global context a lastname "Smith" in two different entries
2131    under the same form "Alan Smith", the data structure will look like:
2132
2133    {Smith}->{global}->{Alan Smith} = 2
2134
2135    We don't care about the value as this means that there are 2 "Alan Smith"s in the global
2136    context which need disambiguating identically anyway. So, we just count the keys for the
2137    lastname "Smith" in the global context to see how ambiguous the lastname itself is. This
2138    would be "1" and so "Alan Smith" would get uniquename=0 because it's unambiguous as just
2139    "Smith".
2140
2141    The same goes for "minimal" list context disambiguation for uniquename=5 or 6.
2142    For example, if we had the lastname "Smith" to disambiguate in two entries with labelname
2143    "John Smith and Alan Jones", the data structure would look like:
2144
2145    {Smith}->{Smith+Jones}->{John Smith+Alan Jones} = 2
2146
2147    Again, counting the keys of the context for the lastname gives us "1" which means we
2148    have uniquename=0 for "John Smith" in both entries because it's the same list. This also works
2149    for repeated names in the same list "John Smith and Bert Smith". Disambiguating "Smith" in this:
2150
2151    {Smith}->{Smith+Smith}->{John Smith+Bert Smith} = 2
2152
2153    So both "John Smith" and "Bert Smith" in this entry get uniquename=0 (of course, as long as
2154    there are no other "X Smith and Y Smith" entries where X != "John" or Y != "Bert").
2155
2156=cut
2157
2158sub create_uniquename_info {
2159  my $self = shift;
2160  my $secnum = $self->get_current_section;
2161  my $section = $self->sections->get_section($secnum);
2162  my $bibentries = $section->bibentries;
2163
2164  # Reset uniquename information as we have to generate it
2165  # again because uniquelist information might have changed
2166  Biber::Config->reset_uniquenamecount;
2167
2168  foreach my $citekey ( $section->get_citekeys ) {
2169    my $be = $bibentries->entry($citekey);
2170    my $bee = $be->get_field('entrytype');
2171
2172    next unless my $un = Biber::Config->getblxoption('uniquename', $bee, $citekey);
2173
2174    $logger->trace("Generating uniquename information for '$citekey'");
2175
2176    if (my $lni = $be->get_labelname_info) {
2177
2178      # Set the index limit beyond which we don't look for disambiguating information
2179      my $ul = undef;           # Not set
2180      if (defined($be->get_field($lni)->get_uniquelist)) {
2181        # If defined, $ul will always be >1, see comment in set_uniquelist() in Names.pm
2182        $ul = $be->get_field($lni)->get_uniquelist;
2183      }
2184      my $maxcn = Biber::Config->getblxoption('maxcitenames', $bee, $citekey);
2185      my $mincn = Biber::Config->getblxoption('mincitenames', $bee, $citekey);
2186
2187      # Note that we don't determine if a name is unique here -
2188      # we can't, were still processing entries at this point.
2189      # Here we are just recording seen combinations of:
2190      #
2191      # lastname and how many name context keys contain this (uniquename = 0)
2192      # lastnames+initials and how many name context keys contain this (uniquename = 1)
2193      # Full name and how many name context keys contain this (uniquename = 2)
2194      #
2195      # A name context can be either a complete single name or a list of names
2196      # depending on whether uniquename=min* or not
2197      #
2198      # Anything which has more than one combination for both of these would
2199      # be uniquename = 2 unless even the full name doesn't disambiguate
2200      # and then it is left at uniquename = 0
2201
2202      my $nl = $be->get_field($lni);
2203      my $num_names = $nl->count_names;
2204      my $names = $nl->names;
2205      # If name list was truncated in bib with "and others", this overrides maxcitenames
2206      my $morenames = $nl->get_morenames ? 1 : 0;
2207
2208      my @truncnames;
2209      my @lastnames;
2210      my @fullnames;
2211      my @initnames;
2212
2213      foreach my $name (@$names) {
2214        # We need to track two types of uniquename disambiguation here:
2215        #
2216        # 1. Information to disambiguate visible names from visible names
2217        #    where "visibility" is governed by uniquelist/max/mincitenames.
2218        #    This is the actual "uniquename" feature information.
2219        # 2. Information to disambiguate all names, regardless of visibility
2220        #    This is needed for uniquelist because it needs to construct
2221        #    hypothetical ambiguity information for every list position.
2222
2223        # We want to record disambiguation information for visible names when:
2224        # uniquename = 3 (allinit) or 4 (allfull)
2225        # Uniquelist is set and a name appears before the uniquelist truncation
2226        # Uniquelist is not set and the entry has an explicit "and others" at the end
2227        #   since this means that every name is less than maxcitenames by definition
2228        # Uniquelist is not set and a name list is shorter than the maxcitenames truncation
2229        # Uniquelist is not set, a name list is longer than the maxcitenames truncation
2230        #   and the name appears before the mincitenames truncation
2231        if ($un == 3 or $un == 4 or
2232            ($ul and $name->get_index <= $ul) or
2233            $morenames or
2234            $num_names <= $maxcn or
2235            $name->get_index <= $mincn) { # implicitly, $num_names > $maxcn here
2236
2237          push @truncnames, $name;
2238          if ($un == 5 or $un == 6) {
2239            push @lastnames, $name->get_lastname;
2240            push @fullnames, $name->get_namestring;
2241            push @initnames, $name->get_nameinitstring;
2242          }
2243        }
2244      }
2245      # Information for mininit ($un=5) or minfull ($un=6)
2246      my $lastnames_string;
2247      my $fullnames_string;
2248      my $initnames_string;
2249      if ($un == 5) {
2250        $lastnames_string = join("\x{10FFFD}", @lastnames);
2251        $initnames_string = join("\x{10FFFD}", @initnames);
2252        if ($#lastnames + 1 < $num_names or
2253            $morenames) {
2254          $lastnames_string .= "\x{10FFFD}et al"; # if truncated, record this
2255          $initnames_string .= "\x{10FFFD}et al"; # if truncated, record this
2256        }
2257      }
2258      elsif ($un == 6) {
2259        $lastnames_string = join("\x{10FFFD}", @lastnames);
2260        $fullnames_string = join("\x{10FFFD}", @fullnames);
2261        if ($#lastnames + 1 < $num_names or
2262            $morenames) {
2263          $lastnames_string .= "\x{10FFFD}et al"; # if truncated, record this
2264          $fullnames_string .= "\x{10FFFD}et al"; # if truncated, record this
2265        }
2266      }
2267
2268      foreach my $name (@$names) {
2269        # we have to differentiate here between last names with and without
2270        # prefices otherwise we end up falsely trying to disambiguate
2271        # "X" and "von X" using initials/first names when there is no need.
2272        my $lastname = (Biber::Config->getblxoption('useprefix', $bee, $citekey) and
2273                        $name->get_prefix ? $name->get_prefix : '') .
2274                          $name->get_lastname;
2275        my $nameinitstring = $name->get_nameinitstring;
2276        my $namestring     = $name->get_namestring;
2277        my $namecontext;
2278        my $key;
2279
2280        # Context and key depend on the uniquename setting
2281        if ($un == 1 or $un == 3) {
2282          $namecontext = 'global';
2283          $key = $nameinitstring;
2284        }
2285        elsif ($un == 2 or $un == 4) {
2286          $namecontext = 'global';
2287          $key = $namestring;
2288        }
2289        elsif ($un == 5) {
2290          $namecontext = $lastnames_string;
2291          $key = $initnames_string;
2292          $name->set_minimal_info($lastnames_string);
2293        }
2294        elsif ($un == 6) {
2295          $namecontext = $lastnames_string;
2296          $key = $fullnames_string;
2297          $name->set_minimal_info($lastnames_string);
2298        }
2299        if (first {Compare($_, $name)} @truncnames) {
2300          # Record a uniqueness information entry for the lastname showing that
2301          # this lastname has been seen in this name context
2302          Biber::Config->add_uniquenamecount($lastname, $namecontext, $key);
2303
2304          # Record a uniqueness information entry for the lastname+initials showing that
2305          # this lastname_initials has been seen in this name context
2306          Biber::Config->add_uniquenamecount($nameinitstring, $namecontext, $key);
2307
2308          # Record a uniqueness information entry for the fullname
2309          # showing that this fullname has been seen in this name context
2310          Biber::Config->add_uniquenamecount($namestring, $namecontext, $key);
2311        }
2312
2313        # As above but here we are collecting (separate) information for all
2314        # names, regardless of visibility (needed to track uniquelist)
2315        if (Biber::Config->getblxoption('uniquelist', $bee, $citekey)) {
2316          Biber::Config->add_uniquenamecount_all($lastname, $namecontext, $key);
2317          Biber::Config->add_uniquenamecount_all($nameinitstring, $namecontext, $key);
2318          Biber::Config->add_uniquenamecount_all($namestring, $namecontext, $key);
2319        }
2320      }
2321    }
2322  }
2323
2324  return;
2325}
2326
2327=head2 generate_uniquename
2328
2329   Generate the per-name uniquename values using the information
2330   harvested by create_uniquename_info()
2331
2332=cut
2333
2334sub generate_uniquename {
2335  my $self = shift;
2336  my $secnum = $self->get_current_section;
2337  my $section = $self->sections->get_section($secnum);
2338  my $bibentries = $section->bibentries;
2339
2340  # Now use the information to set the actual uniquename information
2341  foreach my $citekey ( $section->get_citekeys ) {
2342    my $be = $bibentries->entry($citekey);
2343    my $bee = $be->get_field('entrytype');
2344
2345    next unless my $un = Biber::Config->getblxoption('uniquename', $bee, $citekey);
2346
2347    $logger->trace("Setting uniquename for '$citekey'");
2348
2349    if (my $lni = $be->get_labelname_info) {
2350      # Set the index limit beyond which we don't look for disambiguating information
2351
2352      # If defined, $ul will always be >1, see comment in set_uniquelist() in Names.pm
2353      my $ul = $be->get_field($lni)->get_uniquelist;
2354
2355      my $maxcn = Biber::Config->getblxoption('maxcitenames', $bee, $citekey);
2356      my $mincn = Biber::Config->getblxoption('mincitenames', $bee, $citekey);
2357
2358      my $nl = $be->get_field($lni);
2359      my $num_names = $nl->count_names;
2360      my $names = $nl->names;
2361      # If name list was truncated in bib with "and others", this overrides maxcitenames
2362      my $morenames = ($nl->get_morenames) ? 1 : 0;
2363
2364      my @truncnames;
2365
2366      foreach my $name (@$names) {
2367        if ($un == 3 or $un == 4 or
2368            ($ul and $name->get_index <= $ul) or
2369            $morenames or
2370            $num_names <= $maxcn or
2371            $name->get_index <= $mincn) { # implicitly, $num_names > $maxcn here
2372          push @truncnames, $name;
2373        }
2374        else {
2375          # Set anything now not visible due to uniquelist back to 0
2376          $name->reset_uniquename;
2377        }
2378      }
2379
2380      foreach my $name (@$names) {
2381        # we have to differentiate here between last names with and without
2382        # prefices otherwise we end up falsely trying to disambiguate
2383        # "X" and "von X" using initials/first names when there is no need.
2384        my $lastname = (Biber::Config->getblxoption('useprefix', $bee, $citekey) and
2385                        $name->get_prefix ? $name->get_prefix : '') .
2386                          $name->get_lastname;
2387        my $nameinitstring = $name->get_nameinitstring;
2388        my $namestring = $name->get_namestring;
2389        my $namecontext = 'global'; # default
2390        if ($un == 5 or $un == 6) {
2391          $namecontext = $name->get_minimal_info; # $un=5 and 6
2392        }
2393
2394        if (first {Compare($_, $name)} @truncnames) {
2395
2396          # If there is one key for the lastname, then it's unique using just lastname
2397          # because either:
2398          # * There are no other identical lastnames
2399          # * All identical lastnames have a lastname+init ($un=5) or fullname ($un=6)
2400          #   which is identical and therefore can't be disambiguated any further anyway
2401          if (Biber::Config->get_numofuniquenames($lastname, $namecontext) == 1) {
2402            $name->set_uniquename(0);
2403          }
2404          # Otherwise, if there is one key for the lastname+inits, then it's unique
2405          # using initials because either:
2406          # * There are no other identical lastname+inits
2407          # * All identical lastname+inits have a fullname ($un=6) which is identical
2408          #   and therefore can't be disambiguated any further anyway
2409          elsif (Biber::Config->get_numofuniquenames($nameinitstring, $namecontext) == 1) {
2410            $name->set_uniquename(1);
2411          }
2412          # Otherwise if there is one key for the fullname, then it's unique using
2413          # the fullname because:
2414          # * There are no other identical full names
2415          #
2416          # But restrict to uniquename biblatex option maximum
2417          elsif (Biber::Config->get_numofuniquenames($namestring, $namecontext) == 1) {
2418            my $run;
2419            if ($un == 1)    {$run = 1}   # init
2420            elsif ($un == 2) {$run = 2}   # full
2421            elsif ($un == 3) {$run = 1}   # allinit
2422            elsif ($un == 4) {$run = 2}   # allfull
2423            elsif ($un == 5) {$run = 1}   # mininit
2424            elsif ($un == 6) {$run = 2}   # minfull
2425            $name->set_uniquename($run)
2426          }
2427          # Otherwise, there must be more than one key for the full name,
2428          # so set to 0 since nothing will uniqueify this name and it's just
2429          # misleading to expand it
2430          else {
2431            $name->set_uniquename(0);
2432          }
2433        }
2434
2435        # As above but not just for visible names (needed for uniquelist)
2436        if (Biber::Config->getblxoption('uniquelist', $bee, $citekey)) {
2437          if (Biber::Config->get_numofuniquenames_all($lastname, $namecontext) == 1) {
2438            $name->set_uniquename_all(0);
2439          }
2440          elsif (Biber::Config->get_numofuniquenames_all($nameinitstring, $namecontext) == 1) {
2441            $name->set_uniquename_all(1);
2442          }
2443          elsif (Biber::Config->get_numofuniquenames_all($namestring, $namecontext) == 1) {
2444            my $run;
2445            if ($un == 1) {$run = 1}   # init
2446            elsif ($un == 2) {$run = 2}   # full
2447            elsif ($un == 3) {$run = 1}   # allinit
2448            elsif ($un == 4) {$run = 2}   # allfull
2449            elsif ($un == 5) {$run = 1}   # mininit
2450            elsif ($un == 6) {$run = 2}   # minfull
2451            $name->set_uniquename_all($run)
2452          }
2453          else {
2454            $name->set_uniquename_all(0);
2455          }
2456        }
2457      }
2458    }
2459  }
2460  return;
2461}
2462
2463=head2 create_uniquelist_info
2464
2465    Gather the uniquename information as we look through the names
2466
2467=cut
2468
2469sub create_uniquelist_info {
2470  my $self = shift;
2471  my $secnum = $self->get_current_section;
2472  my $section = $self->sections->get_section($secnum);
2473  my $bibentries = $section->bibentries;
2474
2475  # Reset uniquelist information as we have to generate it again because uniquename
2476  # information might have changed
2477  Biber::Config->reset_uniquelistcount;
2478
2479  foreach my $citekey ( $section->get_citekeys ) {
2480    my $be = $bibentries->entry($citekey);
2481    my $bee = $be->get_field('entrytype');
2482    my $maxcn = Biber::Config->getblxoption('maxcitenames', $bee, $citekey);
2483    my $mincn = Biber::Config->getblxoption('mincitenames', $bee, $citekey);
2484
2485    next unless my $ul = Biber::Config->getblxoption('uniquelist', $bee, $citekey);
2486
2487    $logger->trace("Generating uniquelist information for '$citekey'");
2488
2489    if (my $lni = $be->get_labelname_info) {
2490      my $nl = $be->get_field($lni);
2491      my $num_names = $nl->count_names;
2492      my $namelist = [];
2493      my $ulminyear_namelist = [];
2494
2495      foreach my $name (@{$nl->names}) {
2496
2497        my $lastname   = $name->get_lastname;
2498        my $nameinitstring = $name->get_nameinitstring;
2499        my $namestring = $name->get_namestring;
2500        my $ulminyearflag = 0;
2501
2502        # uniquelist = minyear
2503        if ($ul == 2) {
2504          # minyear uniquename, we set based on the max/mincitenames list
2505          if ($num_names > $maxcn and
2506              $name->get_index <= $mincn) {
2507            $ulminyearflag = 1;
2508          }
2509        }
2510
2511        # uniquename is not set so generate uniquelist based on just lastname
2512        if (not defined($name->get_uniquename_all)) {
2513          push @$namelist, $lastname;
2514          push @$ulminyear_namelist, $lastname if $ulminyearflag;
2515        }
2516        # uniquename indicates unique with just lastname
2517        elsif ($name->get_uniquename_all == 0) {
2518          push @$namelist, $lastname;
2519          push @$ulminyear_namelist, $lastname if $ulminyearflag;
2520        }
2521        # uniquename indicates unique with lastname with initials
2522        elsif ($name->get_uniquename_all == 1) {
2523          push @$namelist, $nameinitstring;
2524          push @$ulminyear_namelist, $nameinitstring if $ulminyearflag;
2525        }
2526        # uniquename indicates unique with full name
2527        elsif ($name->get_uniquename_all == 2) {
2528          push @$namelist, $namestring;
2529          push @$ulminyear_namelist, $namestring if $ulminyearflag;
2530        }
2531
2532        Biber::Config->add_uniquelistcount($namelist);
2533      }
2534      # We need to know the list uniqueness counts for the whole list seperately otherwise
2535      # we will falsely "disambiguate" identical name lists from each other by setting
2536      # uniquelist to the full list because every part of each list will have more than
2537      # one count. We therefore need to distinguish counts which are of the final, complete
2538      # list of names. If there is more than one count for these, (meaning that there are
2539      # two or more identical name lists), we don't expand them at all as there is no point.
2540      Biber::Config->add_uniquelistcount_final($namelist);
2541
2542      # Add count for uniquelist=minyear
2543      unless (Compare($ulminyear_namelist, [])) {
2544        Biber::Config->add_uniquelistcount_minyear($ulminyear_namelist, $be->get_field('labelyear'), $namelist);
2545      }
2546    }
2547  }
2548  return;
2549}
2550
2551
2552=head2 generate_uniquelist
2553
2554   Generate the per-namelist uniquelist values using the information
2555   harvested by create_uniquelist_info()
2556
2557=cut
2558
2559sub generate_uniquelist {
2560  my $self = shift;
2561  my $secnum = $self->get_current_section;
2562  my $section = $self->sections->get_section($secnum);
2563  my $bibentries = $section->bibentries;
2564
2565LOOP: foreach my $citekey ( $section->get_citekeys ) {
2566    my $be = $bibentries->entry($citekey);
2567    my $bee = $be->get_field('entrytype');
2568    my $maxcn = Biber::Config->getblxoption('maxcitenames', $bee, $citekey);
2569    my $mincn = Biber::Config->getblxoption('mincitenames', $bee, $citekey);
2570
2571    next unless my $ul = Biber::Config->getblxoption('uniquelist', $bee, $citekey);
2572
2573    $logger->trace("Creating uniquelist for '$citekey'");
2574
2575    if (my $lni = $be->get_labelname_info) {
2576      my $nl = $be->get_field($lni);
2577      my $namelist = [];
2578      my $num_names = $nl->count_names;
2579
2580      foreach my $name (@{$nl->names}) {
2581
2582        my $lastname   = $name->get_lastname;
2583        my $nameinitstring = $name->get_nameinitstring;
2584        my $namestring = $name->get_namestring;
2585
2586        # uniquename is not set so generate uniquelist based on just lastname
2587        if (not defined($name->get_uniquename_all)) {
2588          push @$namelist, $lastname;
2589        }
2590        # uniquename indicates unique with just lastname
2591        elsif ($name->get_uniquename_all == 0) {
2592          push @$namelist, $lastname;
2593        }
2594        # uniquename indicates unique with lastname with initials
2595        elsif ($name->get_uniquename_all == 1) {
2596          push @$namelist, $nameinitstring;
2597        }
2598        # uniquename indicates unique with full name
2599        elsif ($name->get_uniquename_all == 2) {
2600          push @$namelist, $namestring;
2601        }
2602
2603        # With uniquelist=minyear, uniquelist should not be set at all if there are
2604        # no other entries with the same max/mincitenames visible list and different years
2605        # to disambiguate from
2606        if ($ul == 2 and
2607            $num_names > $maxcn and
2608            $name->get_index <= $mincn and
2609            Biber::Config->get_uniquelistcount_minyear($namelist, $be->get_field('labelyear')) == 1) {
2610          $logger->trace("Not setting uniquelist=minyear for '$citekey'");
2611          next LOOP;
2612        }
2613
2614        # list is unique after this many names so we set uniquelist to this point
2615        # Even if uniquelist=minyear, we record normal uniquelist information if
2616        # we didn't skip this key in the test above
2617        if (Biber::Config->get_uniquelistcount($namelist) == 1) {
2618          last;
2619        }
2620      }
2621
2622      $logger->trace("Setting uniquelist for '$citekey' using " . join(',', @$namelist));
2623      $logger->trace("Uniquelist count for '$citekey' is '" . Biber::Config->get_uniquelistcount_final($namelist) . "'");
2624      $nl->set_uniquelist($namelist, $maxcn, $mincn);
2625    }
2626  }
2627  return;
2628}
2629
2630
2631=head2 generate_extra
2632
2633    Generate information for:
2634
2635      * extraalpha
2636      * extrayear
2637      * extratitle
2638      * extratitleyear
2639
2640=cut
2641
2642sub generate_extra {
2643  my $self = shift;
2644  my $list = shift;
2645  my $secnum = $self->get_current_section;
2646  my $section = $self->sections->get_section($secnum);
2647
2648  Biber::Config->reset_seen_extra(); # Since this sub is per-list, have to reset the
2649                                     # extra* counters per list
2650  # This loop critically depends on the order of the citekeys which
2651  # is why we have to do sorting before this
2652  foreach my $key ($list->get_keys) {
2653    my $be = $section->bibentry($key);
2654    my $bee = $be->get_field('entrytype');
2655    # Only generate extra* information if skiplab is not set.
2656    # Don't forget that skiplab is implied for set members
2657    unless (Biber::Config->getblxoption('skiplab', $bee, $key)) {
2658      # extrayear
2659      if (Biber::Config->getblxoption('labeldate', $bee)) {
2660        my $nameyear = $be->get_field('nameyear');
2661        if (Biber::Config->get_seen_nameyear($nameyear) > 1) {
2662          $logger->trace("nameyear for '$nameyear': " . Biber::Config->get_seen_nameyear($nameyear));
2663          my $v = Biber::Config->incr_seen_extrayear($nameyear);
2664          $list->set_extrayeardata_for_key($key, $v);
2665        }
2666      }
2667      # extratitle
2668      if (Biber::Config->getblxoption('labeltitle', $bee)) {
2669        my $nametitle = $be->get_field('nametitle');
2670        if (Biber::Config->get_seen_nametitle($nametitle) > 1) {
2671          $logger->trace("nametitle for '$nametitle': " . Biber::Config->get_seen_nametitle($nametitle));
2672          my $v = Biber::Config->incr_seen_extratitle($nametitle);
2673          $list->set_extratitledata_for_key($key, $v);
2674        }
2675      }
2676      # extratitleyear
2677      if (Biber::Config->getblxoption('labeltitleyear', $bee)) {
2678        my $titleyear = $be->get_field('titleyear');
2679        if (Biber::Config->get_seen_titleyear($titleyear) > 1) {
2680          $logger->trace("titleyear for '$titleyear': " . Biber::Config->get_seen_titleyear($titleyear));
2681          my $v = Biber::Config->incr_seen_extratitleyear($titleyear);
2682          $list->set_extratitleyeardata_for_key($key, $v);
2683        }
2684      }
2685      # extraalpha
2686      if (Biber::Config->getblxoption('labelalpha', $bee)) {
2687        my $la = $be->get_field('labelalpha');
2688        if (Biber::Config->get_la_disambiguation($la) > 1) {
2689          $logger->trace("labelalpha disambiguation for '$la': " . Biber::Config->get_la_disambiguation($la));
2690          my $v = Biber::Config->incr_seen_extraalpha($la);
2691          $list->set_extraalphadata_for_key($key, $v);
2692        }
2693      }
2694    }
2695  }
2696  return;
2697}
2698
2699=head2 generate_singletitle
2700
2701    Generate the singletitle field, if requested. The information for generating
2702    this is gathered in process_singletitle()
2703
2704=cut
2705
2706sub generate_singletitle {
2707  my $self = shift;
2708  my $secnum = $self->get_current_section;
2709  my $section = $self->sections->get_section($secnum);
2710  my $bibentries = $section->bibentries;
2711
2712  foreach my $citekey ( $section->get_citekeys ) {
2713    my $be = $bibentries->entry($citekey);
2714    if (Biber::Config->getblxoption('singletitle', $be->get_field('entrytype'))) {
2715      if ($be->get_field('seenwork') and
2716          Biber::Config->get_seenwork($be->get_field('seenwork')) < 2 ) {
2717        $logger->trace("Setting singletitle for '$citekey'");
2718        $be->set_field('singletitle', 1);
2719      }
2720      else {
2721        $logger->trace("Not setting singletitle for '$citekey'");
2722      }
2723    }
2724  }
2725  return;
2726}
2727
2728=head2 sort_list
2729
2730    Sort a list using information in entries according to a certain sorting scheme.
2731    Use a flag to skip info messages on first pass
2732
2733=cut
2734
2735sub sort_list {
2736  my $self = shift;
2737  my $list = shift;
2738  my $sortscheme = $list->get_sortscheme;
2739  my @keys = $list->get_keys;
2740  my $lssn = $list->get_sortschemename;
2741  my $ltype = $list->get_type;
2742  my $lname = $list->get_name;
2743  my $llocale = locale2bcp47($sortscheme->{locale} || Biber::Config->getblxoption('sortlocale'));
2744  my $secnum = $self->get_current_section;
2745  my $section = $self->sections->get_section($secnum);
2746
2747  if (Biber::Config->getoption('sortcase')) {
2748    $logger->debug("Sorting is by default case-SENSITIVE");
2749  }
2750  else {
2751    $logger->debug("Sorting is by default case-INSENSITIVE");
2752  }
2753  $logger->debug("Keys before sort:\n");
2754  foreach my $k (@keys) {
2755    $logger->debug("$k => " . $list->get_sortdata($k)->[0]);
2756  }
2757
2758  $logger->trace("Sorting sortlist '$lname' of type '$ltype' with sortscheme '$lssn'. Scheme is\n-------------------\n" . Data::Dump::pp($sortscheme) . "\n-------------------\n");
2759
2760  # Set up locale. Order of priority is:
2761  # 1. locale value passed to Unicode::Collate::Locale->new() (Unicode::Collate sorts only)
2762  # 2. Biber sortlocale option
2763  # 3. Sorting 'locale' option
2764  # 4. Global biblatex 'sortlocale' option
2765
2766  my $thislocale = Biber::Config->getoption('sortlocale') || $llocale;
2767  $logger->debug("Locale for sorting is '$thislocale'");
2768
2769  if ( Biber::Config->getoption('fastsort') ) {
2770    use locale;
2771    $logger->info("Sorting list '$lname' of type '$ltype' with scheme '$lssn'");
2772    $logger->debug("Sorting with fastsort (locale $thislocale)");
2773    unless (setlocale(LC_ALL, $thislocale)) {
2774      biber_warn("Unavailable locale $thislocale");
2775    }
2776
2777    # Construct a multi-field Schwartzian Transform with the right number of
2778    # extractions into a string representing an array ref as we musn't eval this yet
2779    my $num_sorts = 0;
2780    my $data_extractor = '[';
2781    my $sorter;
2782    my $sort_extractor;
2783    # Global lowercase setting
2784    my $glc = Biber::Config->getoption('sortcase') ? '' : 'lc ';
2785
2786    foreach my $sortset (@{$sortscheme->{spec}}) {
2787      $data_extractor .= '$list->get_sortdata($_)->[1][' . $num_sorts . '],';
2788      $sorter .= ' || ' if $num_sorts; # don't add separator before first field
2789      my $lc = $glc; # Casing defaults to global default ...
2790      my $sc = $sortset->[0]{sortcase};
2791      # but is overriden by field setting if it exists
2792      if (defined($sc) and $sc != Biber::Config->getoption('sortcase')) {
2793        unless ($sc) {
2794          $lc = 'lc ';
2795        }
2796        else {
2797          $lc = '';
2798        }
2799      }
2800
2801      my $sd = $sortset->[0]{sort_direction};
2802      if (defined($sd) and $sd eq 'descending') {
2803        # descending field
2804        $sorter .= $lc
2805          . '$b->['
2806            . $num_sorts
2807              . '] cmp '
2808                . $lc
2809                  . '$a->['
2810                    . $num_sorts
2811                      . ']';
2812      }
2813      else {
2814        # ascending field
2815        $sorter .= $lc
2816          . '$a->['
2817            . $num_sorts
2818              . '] cmp '
2819                . $lc
2820                  . '$b->['
2821                    . $num_sorts
2822                      . ']';
2823      }
2824      $num_sorts++;
2825    }
2826    $data_extractor .= '$_]';
2827    # Handily, $num_sorts is now one larger than the number of fields which is the
2828    # correct index for the actual data in the sort array
2829    $sort_extractor = '$_->[' . $num_sorts . ']';
2830    $logger->trace("Sorting structure is: $sorter");
2831
2832    # Schwartzian transform multi-field sort
2833    @keys = map  { eval $sort_extractor }
2834            sort { eval $sorter }
2835            map  { eval $data_extractor } @keys;
2836  }
2837  else {
2838    require Biber::UCollate;
2839    my $collopts = Biber::Config->getoption('collate_options');
2840
2841    # UCA level 2 if case insensitive sorting is requested
2842    unless (Biber::Config->getoption('sortcase')) {
2843      $collopts->{level} = 2;
2844    }
2845
2846    # Add upper_before_lower option
2847    $collopts->{upper_before_lower} = Biber::Config->getoption('sortupper');
2848
2849    # Create collation object
2850
2851    my $Collator = Biber::UCollate->new($thislocale, %$collopts);
2852
2853    my $UCAversion = $Collator->version();
2854    $logger->info("Sorting list '$lname' of type '$ltype' with scheme '$lssn' and locale '$thislocale'");
2855    $logger->debug("Sorting with Unicode::Collate (" . stringify_hash($collopts) . ", UCA version: $UCAversion, Locale: " . $Collator->getlocale . ")");
2856
2857    # Log if U::C::L currently has no tailoring for used locale
2858    if ($Collator->getlocale eq 'default') {
2859      $logger->info("No sort tailoring available for locale '$thislocale'");
2860    }
2861
2862    # Construct a multi-field Schwartzian Transform with the right number of
2863    # extractions into a string representing an array ref as we musn't eval this yet
2864    my $num_sorts = 0;
2865    my $data_extractor = '[';
2866    my $sorter;
2867    my $sort_extractor;
2868    foreach my $sortset (@{$sortscheme->{spec}}) {
2869      my $fc = '';
2870      my @fc;
2871
2872      # Re-instantiate collation object if a different locale is required for this sort item.
2873      # This can't be done in a ->change() method, has to be a new object.
2874      my $cobj;
2875      my $sl = locale2bcp47($sortset->[0]{locale});
2876      if (defined($sl) and $sl ne $thislocale) {
2877        $cobj = 'Biber::UCollate->new(' . "'$sl'" . ",'" . join("','", %$collopts) . "')";
2878      }
2879      else {
2880        $cobj = '$Collator';
2881      }
2882
2883      # If the case or upper option on a field is not the global default
2884      # set it locally on the $Collator by constructing a change() method call
2885      my $sc = $sortset->[0]{sortcase};
2886      if (defined($sc) and $sc != Biber::Config->getoption('sortcase')) {
2887        push @fc, $sc ? 'level => 4' : 'level => 2';
2888      }
2889      my $su = $sortset->[0]{sortupper};
2890      if (defined($su) and $su != Biber::Config->getoption('sortupper')) {
2891        push @fc, $su ? 'upper_before_lower => 1' : 'upper_before_lower => 0';
2892      }
2893
2894      if (@fc) {
2895        # This field has custom collation options
2896        $fc = '->change(' . join(',', @fc) . ')';
2897      }
2898      else {
2899        # Reset collation options to global defaults if there are no field options
2900        # We have to do this as ->change modifies the Collation object
2901        $fc = '->change(level => '
2902          . $collopts->{level}
2903            . ' ,upper_before_lower => '
2904              . $collopts->{upper_before_lower}
2905                . ')';
2906      }
2907
2908      $data_extractor .= '$list->get_sortdata($_)->[1][' . $num_sorts . '],';
2909      $sorter .= ' || ' if $num_sorts; # don't add separator before first field
2910
2911      my $sd = $sortset->[0]{sort_direction};
2912      if (defined($sd) and $sd eq 'descending') {
2913        # descending field
2914        $sorter .= $cobj
2915          . $fc
2916            . '->cmp($b->['
2917              . $num_sorts
2918                . '],$a->['
2919                  . $num_sorts
2920                    . '])';
2921      }
2922      else {
2923        # ascending field
2924        $sorter .= $cobj
2925          . $fc
2926            . '->cmp($a->['
2927              . $num_sorts
2928                . '],$b->['
2929                  . $num_sorts
2930                    . '])';
2931      }
2932      $num_sorts++;
2933    }
2934    $data_extractor .= '$_]';
2935    # Handily, $num_sorts is now one larger than the number of fields which is the
2936    # correct index for the actual data in the sort array
2937    $sort_extractor = '$_->[' . $num_sorts . ']';
2938    $logger->trace("Sorting extractor is: $sort_extractor");
2939    $logger->trace("Sorting structure is: $sorter");
2940    $logger->trace("Data extractor is: $data_extractor");
2941
2942    # Schwartzian transform multi-field sort
2943    @keys = map  { eval $sort_extractor }
2944            sort { eval $sorter }
2945            map  { eval $data_extractor } @keys;
2946  }
2947
2948  $logger->debug("Keys after sort:\n");
2949  foreach my $k (@keys) {
2950    $logger->debug("$k => " . $list->get_sortdata($k)->[0]);
2951  }
2952  $list->set_keys([ @keys ]);
2953
2954  return;
2955}
2956
2957=head2 prepare
2958
2959    Do the main work.
2960    Process and sort all entries before writing the output.
2961
2962=cut
2963
2964sub prepare {
2965  my $self = shift;
2966
2967  my $out = $self->get_output_obj;          # Biber::Output object
2968
2969  # Place to put global pre-processing things
2970  $self->process_setup;
2971
2972  foreach my $section (@{$self->sections->get_sections}) {
2973    # shortcut - skip sections that don't have any keys
2974    next unless $section->get_citekeys or $section->is_allkeys;
2975    my $secnum = $section->number;
2976
2977    $logger->info("Processing section $secnum");
2978
2979    $section->reset_caches;              # Reset the the section caches (sorting, label etc.)
2980    Biber::Config->_init;                # (re)initialise Config object
2981    $self->set_current_section($secnum); # Set the section number we are working on
2982    $self->fetch_data;                   # Fetch cited key and dependent data from sources
2983    $self->process_citekey_aliases;      # Remove citekey aliases from citekeys
2984    $self->instantiate_dynamic;          # Instantiate any dynamic entries (sets, related)
2985    $self->resolve_alias_refs;           # Resolve xref/crossref/xdata aliases to real keys
2986    $self->resolve_xdata;                # Resolve xdata entries
2987    $self->cite_setmembers;              # Cite set members
2988    $self->process_interentry;           # Process crossrefs/sets etc.
2989    $self->nullable_check;               # Check entries for nullable fields
2990    $self->validate_datamodel;           # Check against data model
2991    $self->process_entries_pre;          # Main entry processing loop, part 1
2992    $self->uniqueness;                   # Here we generate uniqueness information
2993    $self->process_visible_names;        # Generate visible names information for all entries
2994    $self->process_entries_post;         # Main entry processing loop, part 2
2995    $self->process_lists;                # process the output lists (sort and filtering)
2996    $self->generate_singletitle;         # Generate singletitle field if requested
2997    $out->create_output_section;         # Generate and push the section output into the
2998                                         # output object ready for writing
2999  }
3000  $out->create_output_misc;              # Generate and push the final misc bits of output
3001                                         # into the output object ready for writing
3002  return;
3003}
3004
3005=head2 prepare_tool
3006
3007    Do the main work for tool mode
3008
3009=cut
3010
3011sub prepare_tool {
3012  my $self = shift;
3013  my $out = $self->get_output_obj;          # Biber::Output object
3014
3015  # Place to put global pre-processing things
3016  $self->process_setup_tool;
3017
3018  # tool mode only has a section '99999'
3019  my $secnum = 99999;
3020  my $section = $self->sections->get_section($secnum);
3021
3022  $section->reset_caches; # Reset the the section caches (sorting, label etc.)
3023  Biber::Config->_init;   # (re)initialise Config object
3024  $self->set_current_section($secnum); # Set the section number we are working on
3025  $self->fetch_data;      # Fetch cited key and dependent data from sources
3026
3027  if (Biber::Config->getoption('output_resolve')) {
3028    $self->resolve_alias_refs; # Resolve xref/crossref/xdata aliases to real keys
3029    $self->resolve_xdata;      # Resolve xdata entries
3030    $self->process_interentry; # Process crossrefs/sets etc.
3031  }
3032
3033  $self->validate_datamodel;   # Check against data model
3034  $self->process_lists;        # process the output lists (sort and filtering)
3035  $out->create_output_section; # Generate and push the section output into the
3036                               # into the output object ready for writing
3037  return;
3038}
3039
3040
3041=head2 fetch_data
3042
3043    Fetch citekey and dependents data from section datasources
3044    Expects to find datasource packages named:
3045
3046    Biber::Input::<type>::<datatype>
3047
3048    and one defined subroutine called:
3049
3050    Biber::Input::<type>::<datatype>::extract_entries
3051
3052    which takes args:
3053
3054    1: Biber object
3055    2: Datasource name
3056    3: Reference to an array of cite keys to look for
3057
3058    and returns an array of the cite keys it did not find in the datasource
3059
3060=cut
3061
3062sub fetch_data {
3063  my $self = shift;
3064  my $secnum = $self->get_current_section;
3065  my $section = $self->sections->get_section($secnum);
3066  # Only looking for static keys, dynamic key entries are not in any datasource ...
3067  my @citekeys = $section->get_static_citekeys;
3068  no strict 'refs'; # symbolic references below ...
3069
3070  # Clear all T::B macro definitions between sections if asked as T::B never clears these
3071  if (Biber::Config->getoption('clrmacros')) {
3072    $logger->debug('Clearing Text::BibTeX macros definitions');
3073    Text::BibTeX::delete_all_macros();
3074  }
3075
3076  # (Re-)define the old BibTeX month macros to what biblatex wants unless user stops this
3077  unless (Biber::Config->getoption('nostdmacros')) {
3078    my %months = ('jan' => '01',
3079                  'feb' => '02',
3080                  'mar' => '03',
3081                  'apr' => '04',
3082                  'may' => '05',
3083                  'jun' => '06',
3084                  'jul' => '07',
3085                  'aug' => '08',
3086                  'sep' => '09',
3087                  'oct' => '10',
3088                  'nov' => '11',
3089                  'dec' => '12');
3090
3091    foreach my $mon (keys %months) {
3092      Text::BibTeX::delete_macro($mon);
3093      Text::BibTeX::add_macro_text($mon, $months{$mon});
3094    }
3095  }
3096
3097  # First we look for the directly cited keys in each datasource
3098  my @remaining_keys = @citekeys;
3099  $logger->debug('Looking for directly cited keys: ' . join(', ', @remaining_keys));
3100  foreach my $datasource (@{$section->get_datasources}) {
3101    # shortcut if we have found all the keys now
3102    last unless (@remaining_keys or $section->is_allkeys);
3103    my $type = $datasource->{type};
3104    my $name = $datasource->{name};
3105    my $datatype = $datasource->{datatype};
3106    my $package = 'Biber::Input::' . $type . '::' . $datatype;
3107    eval "require $package" or
3108      biber_error("Error loading data source package '$package': $@");
3109
3110    # Slightly different message for tool mode
3111    if (Biber::Config->getoption('tool')) {
3112      $logger->info("Looking for $datatype format $type '$name'");
3113    }
3114    else {
3115      $logger->info("Looking for $datatype format $type '$name' for section $secnum");
3116    }
3117
3118    @remaining_keys = &{"${package}::extract_entries"}($name, \@remaining_keys);
3119  }
3120
3121  # error reporting
3122  $logger->debug("Directly cited keys not found for section '$secnum': " . join(',', @remaining_keys));
3123  foreach my $citekey (@remaining_keys) {
3124    biber_warn("I didn't find a database entry for '$citekey' (section $secnum)");
3125    $section->del_citekey($citekey);
3126    $section->add_undef_citekey($citekey);
3127  }
3128
3129  # Don't need to do dependent detection if running in (real) tool mode since this is always
3130  # allkeys=1 and we don't care about missing dependents which get_dependents() might prune.
3131  # pseudo_tool mode is bibtex output when not in tool mode. Internally, it's essentially
3132  # the same but without allkeys.
3133  if (Biber::Config->getoption('tool') and not
3134      Biber::Config->getoption('pseudo_tool')) {
3135    return;
3136  }
3137
3138  $logger->debug('Building dependents for keys: ' . join(',', $section->get_citekeys));
3139
3140  # dependent key list generation - has to be a sub as it's recursive to catch
3141  # nested crossrefs, xdata etc.
3142  get_dependents($self, [$section->get_citekeys]);
3143  $logger->debug("Citekeys for section '$secnum' after fetching data: " . join(', ', $section->get_citekeys));
3144  return;
3145}
3146
3147=head2 get_dependents
3148
3149  Get dependents of the entries for a given list of citekeys. Is called recursively
3150  until there are no more dependents to look for.
3151
3152=cut
3153
3154sub get_dependents {
3155  my ($self, $keys) = @_;
3156  my $secnum = $self->get_current_section;
3157  my $section = $self->sections->get_section($secnum);
3158  my $dep_map; # Flag to say an entry has some deps so we can shortcut deletions
3159  my $new_deps;
3160  no strict 'refs'; # symbolic references below ...
3161
3162  foreach my $citekey (@$keys) {
3163    # aliases need resolving here and are treated as dependents
3164    if (my $real = $section->get_citekey_alias($citekey)) {
3165      $logger->debug("Alias '$citekey' requires real key '$real'");
3166      push @$new_deps, $real;
3167      $dep_map->{$real} = 1;
3168    }
3169    # Dynamic sets don't exist yet but their members do
3170    elsif (my @dmems = $section->get_dynamic_set($citekey)) {
3171      # skip looking for dependent if it's already there
3172      foreach my $dm (@dmems) {
3173        unless ($section->bibentry($dm)) {
3174          push @$new_deps, $dm;
3175          $dep_map->{$citekey} = 1;
3176        }
3177      }
3178      $logger->debug("Dynamic set entry '$citekey' has members: " . join(', ', @dmems));
3179    }
3180    else {
3181      # This must exist for all but dynamic sets
3182      my $be = $section->bibentry($citekey);
3183
3184      # xdata
3185      if (my $xdata = $be->get_field('xdata')) {
3186        foreach my $xdatum (@$xdata) {
3187          # skip looking for dependent if it's already there (loop suppression)
3188          push @$new_deps, $xdatum unless $section->bibentry($xdatum);
3189          $logger->debug("Entry '$citekey' has xdata '$xdatum'");
3190          $dep_map->{$citekey} = 1;
3191        }
3192      }
3193
3194      # crossrefs/xrefs
3195      my $refkey;
3196      if ($refkey = $be->get_field('xref') or
3197          $refkey = $be->get_field('crossref')) {
3198        # skip looking for dependent if it's already there (loop suppression)
3199        push @$new_deps, $refkey unless $section->bibentry($refkey);
3200        $logger->debug("Entry '$citekey' has cross/xref '$refkey'");
3201        $dep_map->{$citekey} = 1;
3202      }
3203
3204      # static sets
3205      if ($be->get_field('entrytype') eq 'set') {
3206        my $smems = $be->get_field('entryset');
3207        # skip looking for dependent if it's already there (loop suppression)
3208        foreach my $sm (@$smems) {
3209          unless ($section->has_citekey($sm)) {
3210            push @$new_deps, $sm;
3211            $dep_map->{$citekey} = 1;
3212          }
3213        }
3214        $logger->debug("Static set entry '$citekey' has members: " . join(', ', @$smems));
3215      }
3216
3217      # Related entries
3218      if (my $relkeys = $be->get_field('related')) {
3219        # skip looking for dependent if it's already there (loop suppression)
3220        foreach my $rm (@$relkeys) {
3221          unless ($section->has_citekey($rm) or $section->is_related($rm)) {
3222            # record that $rm is used as a related entry key
3223            $section->add_related($rm);
3224            push @$new_deps, $rm;
3225            $dep_map->{$citekey} = 1;
3226          }
3227        }
3228        $logger->debug("Entry '$citekey' has related entries: " . join(', ', @$relkeys));
3229      }
3230    }
3231  }
3232
3233  # Remove repeated keys which are dependents of more than one entry
3234  @$new_deps = uniq @$new_deps;
3235  my @missing;
3236
3237  if (@$new_deps) {
3238    # Now look for the dependents of the directly cited keys
3239    $logger->debug('Looking for dependent keys: ' . join(', ', @$new_deps));
3240
3241    # No need to go back to the datasource if allkeys, just see if the keys
3242    # are in section
3243    if ($section->is_allkeys) {
3244      foreach my $dk (@$new_deps) {
3245        push @missing, $dk unless $section->has_citekey($dk);
3246      }
3247    }
3248    else {
3249      @missing = @$new_deps;
3250      foreach my $datasource (@{$section->get_datasources}) {
3251        # shortcut if we have found all the keys now
3252        last unless @missing;
3253        my $type = $datasource->{type};
3254        my $name = $datasource->{name};
3255        my $datatype = $datasource->{datatype};
3256        my $package = 'Biber::Input::' . $type . '::' . $datatype;
3257        eval "require $package" or
3258          biber_error("Error loading data source package '$package': $@");
3259        @missing = &{"${package}::extract_entries"}($name, \@missing);
3260      }
3261    }
3262
3263    # error reporting
3264    $logger->debug("Dependent keys not found for section '$secnum': " . join(', ', @missing));
3265    foreach my $citekey ($section->get_citekeys) {
3266      next unless $dep_map->{$citekey}; # only if we have some missing deps to delete
3267      foreach my $missing_key (@missing) {
3268        $self->remove_undef_dependent($citekey, $missing_key);
3269        # Remove the missing key from the list to recurse with
3270        @$new_deps = grep { $_ ne $missing_key } @$new_deps;
3271      }
3272    }
3273  }
3274
3275  $logger->trace('Recursing in get_dependents with: ' . join(', ', @$new_deps));
3276  get_dependents($self, $new_deps) if @$new_deps; # recurse if there are more things to find
3277  return; # bottom of recursion
3278}
3279
3280
3281=head2 remove_undef_dependent
3282
3283    Remove undefined dependent keys from an entry using a map of
3284    dependent keys to entries
3285
3286=cut
3287
3288sub remove_undef_dependent {
3289  my $self = shift;
3290  my ($citekey, $missing_key) = @_;
3291  my $secnum = $self->get_current_section;
3292  my $section = $self->sections->get_section($secnum);
3293
3294  # remove from any dynamic keys
3295  if (my @dmems = $section->get_dynamic_set($citekey)){
3296    if (first {$missing_key eq $_} @dmems) {
3297      $section->set_dynamic_set($citekey, grep {$_ ne $missing_key} @dmems);
3298    }
3299    else {
3300      biber_warn("I didn't find a database entry for dynamic set member '$missing_key' - ignoring (section $secnum)");
3301    }
3302  }
3303  else {
3304    my $be = $section->bibentry($citekey);
3305    # remove any xrefs
3306    if ($be->get_field('xref') and ($be->get_field('xref') eq $missing_key)) {
3307      $be->del_field('xref');
3308      biber_warn("I didn't find a database entry for xref '$missing_key' in entry '$citekey' - ignoring (section $secnum)");
3309    }
3310
3311    # remove any crossrefs
3312    if ($be->get_field('crossref') and ($be->get_field('crossref') eq $missing_key)) {
3313      $be->del_field('crossref');
3314      biber_warn("I didn't find a database entry for crossref '$missing_key' in entry '$citekey' - ignoring (section $secnum)");
3315    }
3316
3317    # remove xdata
3318    if (my $xdata = $be->get_field('xdata')) {
3319      if (first {$missing_key eq $_} @$xdata) {
3320        $be->set_datafield('xdata', [ grep {$_ ne $missing_key} @$xdata ]) ;
3321        biber_warn("I didn't find a database entry for xdata entry '$missing_key' in entry '$citekey' - ignoring (section $secnum)");
3322      }
3323    }
3324
3325    # remove static sets
3326    if ($be->get_field('entrytype') eq 'set') {
3327      my $smems = $be->get_field('entryset');
3328      if (first {$missing_key eq $_} @$smems) {
3329        $be->set_datafield('entryset', [ grep {$_ ne $missing_key} @$smems ]);
3330        biber_warn("I didn't find a database entry for static set member '$missing_key' in entry '$citekey' - ignoring (section $secnum)");
3331      }
3332    }
3333
3334    # remove related entries
3335    if (my $relkeys = $be->get_field('related')) {
3336      if (first {$missing_key eq $_} @$relkeys) {
3337        $be->set_datafield('related', [ grep {$_ ne $missing_key} @$relkeys ]);
3338        # If no more related entries, remove the other related fields
3339        unless ($be->get_field('related')) {
3340          $be->del_field('relatedtype');
3341          $be->del_field('relatedstring');
3342        }
3343        biber_warn("I didn't find a database entry for related entry '$missing_key' in entry '$citekey' - ignoring (section $secnum)");
3344      }
3345    }
3346  }
3347    return;
3348}
3349
3350=head2 _parse_sort
3351
3352   Convenience sub to parse a .bcf sorting section and return nice
3353   sorting object
3354
3355=cut
3356
3357sub _parse_sort {
3358  my $root_obj = shift;
3359  my $sorting;
3360
3361  foreach my $sort (sort {$a->{order} <=> $b->{order}} @{$root_obj->{sort}}) {
3362    my $sortingitems;
3363
3364    # Generate sorting pass structures
3365    foreach my $sortitem (sort {$a->{order} <=> $b->{order}} @{$sort->{sortitem}}) {
3366      my $sortitemattributes = {};
3367      if (defined($sortitem->{substring_side})) { # Found sorting substring side attribute
3368        $sortitemattributes->{substring_side} = $sortitem->{substring_side};
3369      }
3370      if (defined($sortitem->{substring_width})) { # Found sorting substring length attribute
3371        $sortitemattributes->{substring_width} = $sortitem->{substring_width};
3372      }
3373      if (defined($sortitem->{pad_width})) { # Found sorting pad length attribute
3374        $sortitemattributes->{pad_width} = $sortitem->{pad_width};
3375      }
3376      if (defined($sortitem->{pad_char})) { # Found sorting pad char attribute
3377        $sortitemattributes->{pad_char} = $sortitem->{pad_char};
3378      }
3379      if (defined($sortitem->{pad_side})) { # Found sorting pad side attribute
3380        $sortitemattributes->{pad_side} = $sortitem->{pad_side};
3381      }
3382      push @{$sortingitems}, {$sortitem->{content} => $sortitemattributes};
3383    }
3384
3385    # Only push a sortitem if defined.
3386    # Also, we only push the sort attributes if there are any sortitems otherwise
3387    # we end up with a blank sort
3388    my $sopts;
3389    $sopts->{final}          = $sort->{final}          if defined($sort->{final});
3390    $sopts->{sort_direction} = $sort->{sort_direction} if defined($sort->{sort_direction});
3391    $sopts->{sortcase}       = $sort->{sortcase}       if defined($sort->{sortcase});
3392    $sopts->{sortupper}      = $sort->{sortupper}      if defined($sort->{sortupper});
3393    $sopts->{locale}         = $sort->{locale}         if defined($sort->{locale});
3394    if (defined($sortingitems)) {
3395      unshift @{$sortingitems}, $sopts;
3396      push @{$sorting}, $sortingitems;
3397    }
3398  }
3399
3400  return {locale => locale2bcp47($root_obj->{locale} || Biber::Config->getblxoption('sortlocale')),
3401          spec   => $sorting};
3402}
3403
3404=head2 _filedump and _stringdump
3405
3406    Dump the biber object with Data::Dump for debugging
3407
3408=cut
3409
3410sub _filedump {
3411  my ($self, $file) = @_;
3412  my $fh = IO::File->new($file, '>') or croak "Can't open file $file for writing";
3413  print $fh Data::Dump::pp($self);
3414  close $fh;
3415  return
3416}
3417
3418sub _stringdump {
3419  my $self = shift ;
3420  return Data::Dump::pp($self);
3421}
3422
34231;
3424
3425__END__
3426
3427=head1 AUTHORS
3428
3429François Charette, C<< <firmicus at ankabut.net> >>
3430Philip Kime C<< <philip at kime.org.uk> >>
3431
3432=head1 BUGS
3433
3434Please report any bugs or feature requests on our Github tracker at
3435L<https://github.com/plk/biber/issues>.
3436
3437=head1 COPYRIGHT & LICENSE
3438
3439Copyright 2009-2015 François Charette and Philip Kime, all rights reserved.
3440
3441This module is free software.  You can redistribute it and/or
3442modify it under the terms of the Artistic License 2.0.
3443
3444This program is distributed in the hope that it will be useful,
3445but without any warranty; without even the implied warranty of
3446merchantability or fitness for a particular purpose.
3447
3448=cut
3449