1package Biber::Input::file::endnotexml;
2use v5.16;
3use strict;
4use warnings;
5use base 'Exporter';
6
7use Carp;
8use Biber::Constants;
9use Biber::Entries;
10use Biber::Entry;
11use Biber::Entry::Names;
12use Biber::Entry::Name;
13use Biber::Sections;
14use Biber::Section;
15use Biber::Utils;
16use Biber::Config;
17use Digest::MD5 qw( md5_hex );
18use Encode;
19use File::Spec;
20use File::Slurp;
21use File::Temp;
22use Log::Log4perl qw(:no_extra_logdie_message);
23use List::AllUtils qw( :all );
24use XML::LibXML;
25use XML::LibXML::Simple;
26use Data::Dump qw(dump);
27use Text::BibTeX qw(:nameparts :joinmethods :metatypes);
28use Text::BibTeX::Name;
29use Text::BibTeX::NameFormat;
30use Unicode::Normalize;
31use Unicode::GCString;
32use URI;
33
34##### This is based on Endnote X4 #####
35
36my $logger = Log::Log4perl::get_logger('main');
37my $orig_key_order = {};
38
39# Determine handlers from data model
40my $dm = Biber::Config->get_dm;
41my $handlers = {
42                'field' => {
43                            'default' => {
44                                          'code'     => \&_literal,
45                                          'date'     => \&_date,
46                                          'entrykey' => \&_literal,
47                                          'integer'  => \&_literal,
48                                          'key'      => \&_literal,
49                                          'literal'  => \&_literal,
50                                          'range'    => \&_range,
51                                          'verbatim' => \&_verbatim,
52                                          'uri'      => \&_uri
53                                         },
54                            'xsv'     => {
55                                          'entrykey' => \&_xsv,
56                                          'keyword'  => \&_xsv,
57                                          'option'   => \&_xsv,
58                                         }
59                           },
60                'list' => {
61                           'default' => {
62                                         'entrykey' => \&_literal,
63                                         'key'      => \&_list,
64                                         'literal'  => \&_list,
65                                         'name'     => \&_name
66                                        }
67                          }
68};
69
70
71=head2 extract_entries
72
73   Main data extraction routine.
74   Accepts a data source identifier (filename in this case),
75   preprocesses the file and then looks for the passed keys,
76   creating entries when it finds them and passes out an
77   array of keys it didn't find.
78
79=cut
80
81sub extract_entries {
82  my ($source, $keys) = @_;
83  my $secnum = $Biber::MASTER->get_current_section;
84  my $section = $Biber::MASTER->sections->get_section($secnum);
85  my $filename;
86  my @rkeys = @$keys;
87  my $tf; # Up here so that the temp file has enough scope to survive until we've
88          # used it
89  $logger->trace("Entering extract_entries() in driver 'endnotexml'");
90
91  # Get a reference to the correct sourcemap sections, if they exist
92  my $smaps = [];
93  # Maps are applied in order USER->STYLE->DRIVER
94  if (defined(Biber::Config->getoption('sourcemap'))) {
95    # User maps
96    if (my $m = first {$_->{datatype} eq 'endnotexml' and $_->{level} eq 'user' } @{Biber::Config->getoption('sourcemap')} ) {
97      push @$smaps, $m;
98    }
99    # Style maps
100    if (my $m = first {$_->{datatype} eq 'endnotexml' and $_->{level} eq 'style' } @{Biber::Config->getoption('sourcemap')} ) {
101      push @$smaps, $m;
102    }
103    # Driver default maps
104    if (my $m = first {$_->{datatype} eq 'endnotexml' and $_->{level} eq 'driver'} @{Biber::Config->getoption('sourcemap')} ) {
105      push @$smaps, $m;
106    }
107  }
108
109  # If it's a remote data file, fetch it first
110  if ($source =~ m/\A(?:http|ftp)(s?):\/\//xms) {
111    $logger->info("Data source '$source' is a remote EndNote XML datasource - fetching ...");
112    if ($1) { # HTTPS
113      # use IO::Socket::SSL qw(debug99); # useful for debugging SSL issues
114      # We have to explicitly set the cert path because otherwise the https module
115      # can't find the .pem when PAR::Packer'ed
116      # Have to explicitly try to require Mozilla::CA here to get it into %INC below
117      # It may, however, have been removed by some biber unpacked dists
118      if (not exists($ENV{PERL_LWP_SSL_CA_FILE}) and
119          not exists($ENV{PERL_LWP_SSL_CA_PATH}) and
120          not defined(Biber::Config->getoption('ssl-nointernalca')) and
121          eval {require Mozilla::CA}) {
122        # we assume that the default CA file is in .../Mozilla/CA/cacert.pem
123        (my $vol, my $dir, undef) = File::Spec->splitpath( $INC{"Mozilla/CA.pm"} );
124        $dir =~ s/\/$//; # splitpath sometimes leaves a trailing '/'
125        $ENV{PERL_LWP_SSL_CA_FILE} = File::Spec->catpath($vol, "$dir/CA", 'cacert.pem');
126      }
127
128      # fallbacks for, e.g., linux
129      unless (exists($ENV{PERL_LWP_SSL_CA_FILE})) {
130        foreach my $ca_bundle (qw{
131                                   /etc/ssl/certs/ca-certificates.crt
132                                   /etc/pki/tls/certs/ca-bundle.crt
133                                   /etc/ssl/ca-bundle.pem
134                               }) {
135          next if ! -e $ca_bundle;
136          $ENV{PERL_LWP_SSL_CA_FILE} = $ca_bundle;
137          last;
138        }
139        foreach my $ca_path (qw{
140                                 /etc/ssl/certs/
141                                 /etc/pki/tls/
142                             }) {
143          next if ! -d $ca_path;
144          $ENV{PERL_LWP_SSL_CA_PATH} = $ca_path;
145          last;
146        }
147      }
148
149      if (defined(Biber::Config->getoption('ssl-noverify-host'))) {
150          $ENV{PERL_LWP_SSL_VERIFY_HOSTNAME} = 0;
151      }
152      require LWP::Protocol::https;
153    }
154    require LWP::Simple;
155    $tf = File::Temp->new(TEMPLATE => 'biber_remote_data_source_XXXXX',
156                          DIR => $Biber::MASTER->biber_tempdir,
157                          SUFFIX => '.xml');
158    unless (LWP::Simple::is_success(LWP::Simple::getstore($source, $tf->filename))) {
159      biber_error("Could not fetch '$source'");
160    }
161    $filename = $tf->filename;
162  }
163  else {
164    # Need to get the filename so we increment
165    # the filename count for preambles at the bottom of this sub
166    unless ($filename = locate_biber_file($source)) {
167      biber_error("Cannot find '$source'!")
168    }
169  }
170
171  # Log that we found a data file
172  $logger->info("Found EndNote XML data source '$filename'");
173
174  # Set up XML parser and namespaces
175  my $parser = XML::LibXML->new();
176  my $xml = File::Slurp::read_file($filename) or biber_error("Can't read file $filename");
177  $xml = NFD(decode('UTF-8', $xml));# Unicode NFD boundary
178  my $enxml = $parser->parse_string($xml);
179  my $xpc = XML::LibXML::XPathContext->new($enxml);
180
181  if ($section->is_allkeys) {
182    $logger->debug("All citekeys will be used for section '$secnum'");
183    # Loop over all entries, creating objects
184    foreach my $entry ($xpc->findnodes("/xml/records/record")) {
185      $logger->debug('Parsing Endnote XML entry object ' . $entry->nodePath);
186
187      # If an entry has no key, ignore it and warn
188      unless ($entry->findvalue('./rec-number')) {
189        biber_warn("Invalid or undefined entry ID in file '$filename', skipping ...");
190        next;
191      }
192
193      my $ek = $entry->findvalue('./rec-number');
194      # If we've already seen a case variant, warn
195      if (my $okey = $section->has_badcasekey($ek)) {
196        biber_warn("Possible typo (case mismatch): '$ek' and '$okey' in file '$filename', skipping '$ek' ...");
197      }
198
199      # If we've already seen this key, ignore it and warn
200      if ($section->has_everykey($ek)) {
201        biber_warn("Duplicate entry key: '$ek' in file '$filename', skipping ...");
202        next;
203      }
204      else {
205        $section->add_everykey($ek);
206      }
207
208      my $dbdid = $entry->findvalue('./foreign-keys/key/@db-id');
209      my $key = $ek;
210
211      # Record a key->datasource name mapping for error reporting
212      $section->set_keytods("$dbdid:$key", $filename);
213
214      # We do this as otherwise we have no way of determining the origing .bib entry order
215      # We need this in order to do sorting=none + allkeys because in this case, there is no
216      # "citeorder" because nothing is explicitly cited and so "citeorder" means .bib order
217      push @{$orig_key_order->{$filename}}, "$dbdid:$key";
218      create_entry("$dbdid:$key", $entry, $source, $smaps);
219    }
220
221    # if allkeys, push all bibdata keys into citekeys (if they are not already there)
222    # We are using the special "orig_key_order" array which is used to deal with the
223    # situation when sorting=non and allkeys is set. We need an array rather than the
224    # keys from the bibentries hash because we need to preserver the original order of
225    # the .bib as in this case the sorting sub "citeorder" means "bib order" as there are
226    # no explicitly cited keys
227    $section->add_citekeys(@{$orig_key_order->{$filename}});
228    $logger->debug("Added all citekeys to section '$secnum': " . join(', ', $section->get_citekeys));
229  }
230  else {
231    # loop over all keys we're looking for and create objects
232    $logger->debug('Wanted keys: ' . join(', ', @$keys));
233    foreach my $wanted_key (@$keys) {
234      $logger->debug("Looking for key '$wanted_key' in Endnote XML file '$filename'");
235      # Split key into parts
236      my ($wdbid, $wnum) = split(/:/, $wanted_key);
237
238      if (my @entries = $xpc->findnodes("/xml/records/record[rec-number[text()='$wnum']][foreign-keys/key[\@db-id='$wdbid']]")) {
239        # Check to see if there is more than one entry with this key and warn if so
240        if ($#entries > 0) {
241          biber_warn("Found more than one entry for key '$wanted_key' in '$wdbid:$wnum' - Skipping duplicates ...");
242        }
243        my $entry = $entries[0];
244
245        $logger->debug("Found key '$wanted_key' in Endnote XML file '$filename'");
246        $logger->debug('Parsing Endnote XML entry object ' . $entry->nodePath);
247
248        # Record a key->datasource name mapping for error reporting
249        $section->set_keytods($wanted_key, $filename);
250
251        # See comment above about the importance of the case of the key
252        # passed to create_entry()
253        create_entry($wanted_key, $entry, $source, $smaps);
254        # found a key, remove it from the list of keys we want
255        @rkeys = grep {$wanted_key ne $_} @rkeys;
256      }
257      $logger->debug('Wanted keys now: ' . join(', ', @rkeys));
258    }
259  }
260
261  return @rkeys;
262}
263
264
265=head2 create_entry
266
267   Create a Biber::Entry object from an entry found in a Endnote
268   XML data source
269
270=cut
271
272sub create_entry {
273  my ($key, $entry, $source, $smaps) = @_;
274  my $secnum = $Biber::MASTER->get_current_section;
275  my $section = $Biber::MASTER->sections->get_section($secnum);
276  my $bibentries = $section->bibentries;
277  my $bibentry = new Biber::Entry;
278
279  $bibentry->set_field('citekey', $key);
280
281  # Datasource mapping applied in $smap order (USER->STYLE->DRIVER)
282  foreach my $smap (@$smaps) {
283    my $level = $smap->{level};
284
285    # DATASOURCE MAPPING DEFINED BY USER IN CONFIG FILE OR .bcf
286  MAP:    foreach my $map (@{$smap->{map}}) {
287      my $last_field = undef;
288      my $last_fieldval = undef;
289
290      my @imatches; # For persising parenthetical matches over several steps
291
292      my $itype = $entry->findvalue('./ref-type/@name');
293      my $last_type = $itype; # defaults to the entrytype unless changed below
294
295      # Check pertype restrictions
296      unless (not exists($map->{per_type}) or
297              first {$_->{content} eq $itype} @{$map->{per_type}}) {
298        next;
299      }
300
301      # Check per_datasource restrictions
302      # Don't compare case insensitively - this might not be correct
303      unless (not exists($map->{per_datasource}) or
304              first {$_->{content} eq $source} @{$map->{per_datasource}}) {
305        next;
306      }
307
308      # loop over mapping steps
309      foreach my $step (@{$map->{map_step}}) {
310
311        # Entrytype map
312        if (my $source = $step->{map_type_source}) {
313
314          unless ($itype eq $source) {
315            # Skip the rest of the map if this step doesn't match and match is final
316            if ($step->{map_final}) {
317              $logger->debug("Source mapping (type=$level, key=$key): Entry type is '$itype' but map wants '$source' and step has 'final' set ... skipping rest of map ...");
318              next MAP;
319            }
320            else {
321              # just ignore this step
322              $logger->debug("Source mapping (type=$level, key=$key): Entry type is '$itype' but map wants '$source' ... skipping step ...");
323              next;
324            }
325          }
326          # Change entrytype if requested
327          $last_type = $itype;
328          $logger->debug("Source mapping (type=$level, key=$key): Changing entry type from '$last_type' to " . $step->{map_type_target});
329          $entry->findnodes('./ref-type')->get_node(1)->setAttribute('name', $step->{map_type_target});
330        }
331
332        # Field map
333        if (my $source = $step->{map_field_source}) {
334          unless ($entry->exists($source)) {
335            # Skip the rest of the map if this step doesn't match and match is final
336            if ($step->{map_final}) {
337              $logger->debug("Source mapping (type=$level, key=$key): No field '$source' and step has 'final' set, skipping rest of map ...");
338              next MAP;
339            }
340            else {
341              # just ignore this step
342              $logger->debug("Source mapping (type=$level, key=$key): No field '$source', skipping step ...");
343              next;
344            }
345          }
346
347          $last_field = $source;
348          $last_fieldval = $entry->findvalue($source);
349
350          # map fields to targets
351          if (my $m = $step->{map_match}) {
352            if (defined($step->{map_replace})) { # replace can be null
353              my $r = $step->{map_replace};
354              $logger->debug("Source mapping (type=$level, key=$key): Doing match/replace '$m' -> '$r' on field '$source'");
355              my $text = ireplace($last_fieldval, $m, $r);
356              $entry->findnodes($source . '/style/text()')->get_node(1)->setData($text);
357            }
358            else {
359              unless (@imatches = imatch($last_fieldval, $m)) {
360                # Skip the rest of the map if this step doesn't match and match is final
361                if ($step->{map_final}) {
362                  $logger->debug("Source mapping (type=$level, key=$key): Field '$source' does not match '$m' and step has 'final' set, skipping rest of map ...");
363                  next MAP;
364                }
365                else {
366                  # just ignore this step
367                  $logger->debug("Source mapping (type=$level, key=$key): Field '$source' does not match '$m', skipping step ...");
368                  next;
369                }
370              }
371            }
372          }
373
374          # Set to a different target if there is one
375          if (my $target = $step->{map_field_target}) {
376            if (my @t = $entry->findnodes($target)) {
377              if ($map->{map_overwrite} // $smap->{map_overwrite}) {
378                $logger->debug("Source mapping (type=$level, key=$key): Overwriting existing field '$target'");
379                # Have to do this otherwise XML::LibXML will merge the nodes
380                map {$_->unbindNode} @t;
381              }
382              else {
383                $logger->debug("Source mapping (type=$level, key=$key): Field '$source' is aliased to field '$target' but both are defined, skipping ...");
384                next;
385              }
386            }
387
388            map {$_->setNodeName(_leaf_node($target))} $entry->findnodes($source);
389          }
390        }
391
392        # field creation
393        if (my $field = $step->{map_field_set}) {
394
395          # Deal with special tokens
396          if ($step->{map_null}) {
397            $logger->debug("Source mapping (type=$level, key=$key): Deleting field '$field'");
398            map {$_->unbindNode} $entry->findnodes($field);
399          }
400          else {
401            if ($entry->exists($field)) {
402              unless ($map->{map_overwrite} // $smap->{map_overwrite}) {
403                if ($step->{map_final}) {
404                  # map_final is set, ignore and skip rest of step
405                  $logger->debug("Source mapping (type=$level, key=$key): Field '$field' exists, overwrite is not set and step has 'final' set, skipping rest of map ...");
406                  next MAP;
407                }
408                else {
409                  # just ignore this step
410                  $logger->debug("Source mapping (type=$level, key=$key): Field '$field' exists and overwrite is not set, skipping step ...");
411                  next;
412                }
413              }
414            }
415
416            # If append is set, keep the original value and append the new
417            my $orig = $step->{map_append} ? $entry->findvalue($field) : '';
418
419            if ($step->{map_origentrytype}) {
420              next unless $last_type;
421              $logger->debug("Source mapping (type=$level, key=$key): Setting field '$field' to '${orig}${last_type}'");
422              $entry->appendTextChild($field, $orig . $last_type);
423            }
424            elsif ($step->{map_origfieldval}) {
425              next unless $last_fieldval;
426              $logger->debug("Source mapping (type=$level, key=$key): Setting field '$field' to '${orig}${last_fieldval}'");
427              $entry->appendTextChild($field, $orig . $last_fieldval);
428            }
429            elsif ($step->{map_origfield}) {
430              next unless $last_field;
431              $logger->debug("Source mapping (type=$level, key=$key): Setting field '$field' to '${orig}${last_field}'");
432              $entry->appendTextChild($field, $orig . $last_field);
433            }
434            else {
435              my $fv = $step->{map_field_value};
436              # Now re-instate any unescaped $1 .. $9 to get round these being
437              # dynamically scoped and being null when we get here from any
438              # previous map_match
439              $fv =~ s/(?<!\\)\$(\d)/$imatches[$1-1]/ge;
440              $logger->debug("Source mapping (type=$level, key=$key): Setting field '$field' to '${orig}${fv}'");
441              $entry->appendTextChild($field, $orig . $fv);
442            }
443          }
444        }
445      }
446    }
447  }
448
449  my $itype = $entry->findvalue('./ref-type/@name');
450  foreach my $f (uniq map {$_->nodeName()} $entry->findnodes('(./*|./titles/*|./contributors/*|./urls/web-urls/*|./dates/*)')) {
451
452    # Now run any defined handler
453    # There is no else clause here to warn on invalid fields as there are so many
454    # in Endnote
455    if ($dm->is_field($f)) {
456      my $handler = _get_handler($f);
457      &$handler($bibentry, $entry, $f, $key);
458    }
459  }
460
461  $bibentry->set_field('entrytype', $itype);
462  $bibentry->set_field('datatype', 'endnotexml');
463  $bibentries->add_entry($key, $bibentry);
464
465  return;
466}
467
468# HANDLERS
469# ========
470
471# List fields
472sub _list {
473  my ($bibentry, $entry, $f) = @_;
474  my $value = $entry->findvalue("./$f");
475  $bibentry->set_datafield($f, [ _norm($value) ]);
476  return;
477}
478
479# literal fields
480sub _literal {
481  my ($bibentry, $entry, $f) = @_;
482  my $value = $entry->findvalue("(./$f|./titles/$f|./contributors/$f|./urls/web-urls/$f)");
483  $bibentry->set_datafield($f, _norm($value));
484  return;
485}
486
487# Verbatim fields
488sub _verbatim {
489  my ($bibentry, $entry, $f) = @_;
490  my $value = $entry->findvalue("(./$f|./titles/$f|./contributors/$f|./urls/web-urls/$f)");
491  $bibentry->set_datafield($f, _norm($value));
492  return;
493}
494
495# URI fields
496sub _uri {
497  my ($bibentry, $entry, $f) = @_;
498  my $value = _norm($entry->findvalue("(./$f|./titles/$f|./contributors/$f|./urls/web-urls/$f)"));
499
500  # URL escape if it doesn't look like it already is
501  # This is useful if we are generating URLs automatically with maps which may
502  # contain UTF-8 from other fields
503  unless ($value =~ /\%/) {
504   $value = URI->new($value)->as_string;
505  }
506
507  $bibentry->set_datafield($f, $value);
508  return;
509}
510
511# Range fields
512sub _range {
513  my ($bibentry, $entry, $f) = @_;
514  my $values_ref;
515  my $value = $entry->findvalue("./$f");
516  my @values = split(/\s*,\s*/, _norm($value));
517  # Here the "-–" contains two different chars even though they might
518  # look the same in some fonts ...
519  # If there is a range sep, then we set the end of the range even if it's null
520  # If no  range sep, then the end of the range is undef
521  foreach my $value (@values) {
522    $value =~ m/\A\s*([^-–]+)([-–]*)([^-–]*)\s*\z/xms;
523    my $end;
524    if ($2) {
525      $end = $3;
526    }
527    else {
528      $end = undef;
529    }
530    push @$values_ref, [$1 || '', $end];
531  }
532  $bibentry->set_datafield($f, $values_ref);
533  return;
534}
535
536# Date fields
537sub _date {
538  my ($bibentry, $entry, $f, $key) = @_;
539  my $daten = $entry->findnodes("./dates/$f")->get_node(1);
540  my $secnum = $Biber::MASTER->get_current_section;
541  my $section = $Biber::MASTER->sections->get_section($secnum);
542  my $ds = $section->get_keytods($key);
543
544  # Use Endnote explicit date attributes, if present
545  # It's not clear if Endnote actually uses these attributes
546  if ($daten->hasAttribute('year')) {
547    $bibentry->set_datafield('year', $daten->getAttribute('year'));
548    if ($daten->hasAttribute('month')) {
549      $bibentry->set_datafield('month', $daten->getAttribute('month'));
550    }
551    if ($daten->hasAttribute('day')) {
552      $bibentry->set_datafield('day', $daten->getAttribute('day'));
553    }
554    return;
555  }
556  else {
557    my $date = _norm($entry->findvalue("./dates/$f"));
558    # We are not validating dates here, just syntax parsing
559    my $date_re = qr/(\d{4}) # year
560                     (?:-(\d{2}))? # month
561                     (?:-(\d{2}))? # day
562                    /xms;
563    if (my ($byear, $bmonth, $bday, $r, $eyear, $emonth, $eday) =
564        $date =~ m|\A$date_re(/)?(?:$date_re)?\z|xms) {
565
566      $bibentry->set_datafield('year',     $byear)      if $byear;
567      $bibentry->set_datafield('month',    $bmonth)     if $bmonth;
568      $bibentry->set_datafield('day',      $bday)       if $bday;
569      $bibentry->set_datafield('endmonth', $emonth)     if $emonth;
570      $bibentry->set_datafield('endday',   $eday)       if $eday;
571      if ($r and $eyear) {      # normal range
572        $bibentry->set_datafield('endyear', $eyear);
573      }
574      elsif ($r and not $eyear) { # open ended range - endyear is defined but empty
575        $bibentry->set_datafield('endyear', '');
576      }
577    }
578    else {
579      biber_warn("Datamodel: Entry '$key' ($ds): Invalid format '$date' of date field '$f' - ignoring", $bibentry);
580    }
581    return;
582  }
583}
584
585# Name fields
586sub _name {
587  my ($bibentry, $entry, $f, $key) = @_;
588  my $names = new Biber::Entry::Names;
589  my $useprefix = Biber::Config->getblxoption('useprefix', $bibentry->get_field('entrytype'), $key);
590  foreach my $name ($entry->findnodes("./contributors/$f/*")) {
591    $names->add_name(parsename($name, $f, {useprefix => $useprefix}));
592  }
593  $bibentry->set_datafield($f, $names);
594  return;
595}
596
597sub _xsv {
598  my ($bibentry, $entry, $f) = @_;
599  # Keywords
600  if (my @s = $entry->findnodes("./$f/keyword")) {
601    my $kws;
602    foreach my $s (@s) {
603      push @$kws, '{'._norm($s->textContent()).'}';
604    }
605    $bibentry->set_datafield($f, $kws);
606  }
607  return;
608}
609
610=head2 parsename
611
612    Given a name node, this function returns a Biber::Entry::Name object
613
614    Returns an object which internally looks a bit like this:
615
616    { firstname     => 'John',
617      firstname_i   => 'J',
618      middlename    => 'Fred',
619      middlename_i  => 'F',
620      lastname      => 'Doe',
621      lastname_i    => 'D',
622      prefix        => undef,
623      prefix_i      => undef,
624      suffix        => undef,
625      suffix_i      => undef,
626      namestring    => 'Doe, John Fred',
627      nameinitstring => 'Doe_JF',
628
629=cut
630
631sub parsename {
632  my ($node, $fieldname, $opts) = @_;
633  $logger->debug('Parsing Endnote XML name object ' . $node->nodePath);
634  my $usepre = $opts->{useprefix};
635
636  my %namec;
637
638  # Assume that we are using the Endnote name attrs if we find a 'last-name' attr
639  # It's not clear if Endnote actually ever uses these even though they are in the
640  # DTD
641  if ($node->hasAttribute('last-name')) {
642    foreach my $n ('last-name', 'first-name', 'suffix', 'corp-name', 'initials', 'middle-initial', 'title', 'salutation') {
643      # If there is a name attribute for this component ...
644      # This is all guessing as I've never seen Endnote export this
645      if ($node->hasAttribute($n)) {
646        my $np = $node->getAttribute($n);
647        if ($n eq 'last-name') {
648          $namec{last} = $np;
649          $namec{last_i} = [_gen_initials($np)];
650        }
651        elsif ($n eq 'first-name') {
652          $namec{first} = $np;
653          $namec{first_i} = [_gen_initials($np)];
654        }
655        elsif ($n eq 'suffix') {
656          $namec{suffix} = $np;
657          $namec{suffix_i} = [_gen_initials($np)];
658        }
659        elsif ($n eq 'corp-name') {
660          $namec{last} = $np;
661          $namec{last_i} = [_gen_initials($np)];
662        }
663        elsif ($n eq 'initials') {
664          $namec{first_i} = $np;
665        }
666        elsif ($n eq 'middle-initial') {
667          my $mi = $np;
668          $mi =~ s/\s*\.//g;
669          $namec{middle} = $np;
670          $namec{middle_i} = [ $mi ];
671        }
672        elsif ($n eq 'title' or $n eq 'salutation') {
673          $namec{first} = "$np " . $namec{first};
674        }
675      }
676    }
677  # Only warn about lastnames since there should always be one
678    biber_warn("Couldn't determine Lastname for name XPath: " . $node->nodePath) unless exists($namec{last});
679
680    my $namestring = '';
681
682    # Don't add suffix to namestring or nameinitstring as these are used for uniquename disambiguation
683    # which should only care about lastname + any prefix (if useprefix=true). See biblatex github
684    # tracker #306.
685
686    # lastname
687    if (my $l = $namec{last}) {
688      $namestring .= "$l, ";
689    }
690
691    # firstname
692    if (my $f = $namec{first}) {
693      $namestring .= "$f";
694    }
695
696    # Remove any trailing comma and space if, e.g. missing firstname
697    $namestring =~ s/,\s+\z//xms;
698
699    # Construct $nameinitstring
700    my $nameinitstr = '';
701    $nameinitstr .= $namec{last} if exists($namec{last});
702    $nameinitstr .= '_' . join('', @{$namec{first_i}}) if exists($namec{first});
703    $nameinitstr .= '_' . join('', @{$namec{middle_i}}) if exists($namec{middle});
704    $nameinitstr =~ s/\s+/_/g;
705
706    return Biber::Entry::Name->new(
707      firstname       => $namec{first} // undef,
708      firstname_i     => exists($namec{first}) ? $namec{first_i} : undef,
709      middlename      => $namec{middle} // undef,
710      middlename_i    => exists($namec{middle}) ? $namec{middle_i} : undef,
711      lastname        => $namec{last} // undef,
712      lastname_i      => exists($namec{last}) ? $namec{last_i} : undef,
713      suffix          => $namec{suffix} // undef,
714      suffix_i        => exists($namec{suffix}) ? $namec{suffix_i} : undef,
715      namestring      => $namestring,
716      nameinitstring  => $nameinitstr,
717    );
718  }
719  else { # parse with bibtex library because Endnote XML is rubbish
720    my $namestr = $node->textContent();
721
722    # First sanitise the namestring due to Text::BibTeX::Name limitations on whitespace
723    $namestr =~ s/\A\s*//xms;   # leading whitespace
724    $namestr =~ s/\s*\z//xms;   # trailing whitespace
725    $namestr =~ s/\s+/ /g;      # Collapse internal whitespace
726
727    my $tberr = File::Temp->new(TEMPLATE => 'biber_Text_BibTeX_STDERR_XXXXX',
728                                DIR => $Biber::MASTER->biber_tempdir);
729    my $tberr_name = $tberr->filename;
730
731    open OLDERR, '>&', \*STDERR;
732    open STDERR, '>', $tberr_name;
733    my $name = new Text::BibTeX::Name($namestr);
734    open STDERR, '>&', \*OLDERR;
735    close OLDERR;
736
737    # Put any Text::BibTeX errors into the biber warnings/errors collections
738    # We are parsing the libbtparse library error/warning strings a little here
739    # This is not so bad as they have a clean structure (see error.c in libbtparse)
740    open my $tbe, '<', $tberr_name;
741    while (<$tbe>) {
742      if (/error:/) {
743        chomp;
744        biber_error("BibTeX subsystem: $_");
745      }
746      elsif (/warning:/) {
747        chomp;
748        biber_warn("BibTeX subsystem: $_");
749      }
750    }
751    close($tbe);
752
753    # Formats so we can get BibTeX compatible nbsp inserted
754    my $l_f = new Text::BibTeX::NameFormat('l', 0);
755    my $f_f = new Text::BibTeX::NameFormat('f', 0);
756    my $p_f = new Text::BibTeX::NameFormat('v', 0);
757    my $s_f = new Text::BibTeX::NameFormat('j', 0);
758    $l_f->set_options(BTN_LAST,  0, BTJ_MAYTIE, BTJ_NOTHING);
759    $f_f->set_options(BTN_FIRST, 0, BTJ_MAYTIE, BTJ_NOTHING);
760    $p_f->set_options(BTN_VON,   0, BTJ_MAYTIE, BTJ_NOTHING);
761    $s_f->set_options(BTN_JR,    0, BTJ_MAYTIE, BTJ_NOTHING);
762
763    # Generate name parts
764    my $lastname  = $name->format($l_f);
765    my $firstname = $name->format($f_f);
766    my $prefix    = $name->format($p_f);
767    my $suffix    = $name->format($s_f);
768
769    # Variables to hold the Text::BibTeX::NameFormat generated initials string
770    my $gen_lastname_i;
771    my $gen_firstname_i;
772    my $gen_prefix_i;
773    my $gen_suffix_i;
774
775    # Use a copy of $name so that when we generate the
776    # initials, we do so without certain things. This is easier than trying
777    # hack robust initials code into btparse ...
778    my $nd_namestr = strip_noinit($namestr);
779    my $nd_name = new Text::BibTeX::Name($nd_namestr, $fieldname);
780
781    # Initials formats
782    my $li_f = new Text::BibTeX::NameFormat('l', 1);
783    my $fi_f = new Text::BibTeX::NameFormat('f', 1);
784    my $pi_f = new Text::BibTeX::NameFormat('v', 1);
785    my $si_f = new Text::BibTeX::NameFormat('j', 1);
786
787    # Initials generated with forced tie so we can make an array
788    $li_f->set_text(BTN_LAST,  undef, undef, undef, '');
789    $fi_f->set_text(BTN_FIRST, undef, undef, undef, '');
790    $pi_f->set_text(BTN_VON,   undef, undef, undef, '');
791    $si_f->set_text(BTN_JR,    undef, undef, undef, '');
792    $li_f->set_options(BTN_LAST,  1, BTJ_FORCETIE, BTJ_NOTHING);
793    $fi_f->set_options(BTN_FIRST, 1, BTJ_FORCETIE, BTJ_NOTHING);
794    $pi_f->set_options(BTN_VON,   1, BTJ_FORCETIE, BTJ_NOTHING);
795    $si_f->set_options(BTN_JR,    1, BTJ_FORCETIE, BTJ_NOTHING);
796
797    $gen_lastname_i    = inits($nd_name->format($li_f));
798    $gen_firstname_i   = inits($nd_name->format($fi_f));
799    $gen_prefix_i      = inits($nd_name->format($pi_f));
800    $gen_suffix_i      = inits($nd_name->format($si_f));
801
802    my $namestring = '';
803    # prefix
804    my $ps;
805    my $prefix_stripped;
806    my $prefix_i;
807    if ($prefix) {
808      $prefix_i = $gen_prefix_i;
809      $namestring .= "$prefix ";
810    }
811    # lastname
812    my $ls;
813    my $lastname_stripped;
814    my $lastname_i;
815    if ($lastname) {
816      $lastname_i = $gen_lastname_i;
817      $namestring .= "$lastname, ";
818    }
819    # suffix
820    my $ss;
821    my $suffix_stripped;
822    my $suffix_i;
823    if ($suffix) {
824      $suffix_i = $gen_suffix_i;
825      $namestring .= "$suffix, ";
826    }
827    # firstname
828    my $fs;
829    my $firstname_stripped;
830    my $firstname_i;
831    if ($firstname) {
832      $firstname_i = $gen_firstname_i;
833      $namestring .= "$firstname";
834    }
835
836    # Remove any trailing comma and space if, e.g. missing firstname
837    # Replace any nbspes
838    $namestring =~ s/,\s+\z//xms;
839    $namestring =~ s/~/ /gxms;
840
841    # Construct $nameinitstring
842    my $nameinitstr = '';
843    $nameinitstr .= join('', @$prefix_i) . '_' if ( $usepre and $prefix );
844    $nameinitstr .= $lastname if $lastname;
845    $nameinitstr .= '_' . join('', @$suffix_i) if $suffix;
846    $nameinitstr .= '_' . join('', @$firstname_i) if $firstname;
847    $nameinitstr =~ s/\s+/_/g;
848    $nameinitstr =~ s/~/_/g;
849
850    # The "strip" entry tells us which of the name parts had outer braces
851    # stripped during processing so we can add them back when printing the
852    # .bbl so as to maintain maximum BibTeX compatibility
853    return Biber::Entry::Name->new(
854      firstname       => $firstname      eq '' ? undef : $firstname,
855      firstname_i     => $firstname      eq '' ? undef : $firstname_i,
856      lastname        => $lastname       eq '' ? undef : $lastname,
857      lastname_i      => $lastname       eq '' ? undef : $lastname_i,
858      prefix          => $prefix         eq '' ? undef : $prefix,
859      prefix_i        => $prefix         eq '' ? undef : $prefix_i,
860      suffix          => $suffix         eq '' ? undef : $suffix,
861      suffix_i        => $suffix         eq '' ? undef : $suffix_i,
862      namestring      => $namestring,
863      nameinitstring  => $nameinitstr
864    );
865  }
866}
867
868# Passed an array ref of strings, returns an array ref of initials
869sub _gen_initials {
870  my @strings = @_;
871  my @strings_out;
872  foreach my $str (@strings) {
873    # Deal with hyphenated name parts and normalise to a '-' character for easy
874    # replacement with macro later
875    if ($str =~ m/\p{Dash}/) {
876      push @strings_out, join('-', _gen_initials(split(/\p{Dash}/, $str)));
877    }
878    else {
879      my $chr = Unicode::GCString->new($str)->substr(0, 1)->as_string;
880      # Keep diacritics with their following characters
881      if ($chr =~ m/\p{Dia}/) {
882        push @strings_out, Unicode::GCString->new($str)->substr(0, 2)->as_string;
883      }
884      else {
885        push @strings_out, $chr;
886      }
887    }
888  }
889  return @strings_out;
890}
891
892
893# Syntactically get the leaf node of a node path
894sub _leaf_node {
895  my $node_path = shift;
896  return $node_path =~ s|.+/([^/]+$)|$1|r;
897}
898
899
900# Do some sanitising since this can't be nicely done by the parser
901sub _norm {
902  my $t = shift;
903  return undef unless $t;
904  $t =~ s/\A[\n\s]+//xms;
905  $t =~ s/[\n\s]+\z//xms;
906  return $t;
907}
908
909sub _get_handler {
910  my $field = shift;
911  if (my $h = $handlers->{CUSTOM}{$field}) {
912    return $h;
913  }
914  else {
915    return $handlers->{$dm->get_fieldtype($field)}{$dm->get_fieldformat($field) || 'default'}{$dm->get_datatype($field)};
916  }
917}
918
9191;
920
921__END__
922
923=pod
924
925=encoding utf-8
926
927=head1 NAME
928
929Biber::Input::file::endnotexml - look in a Zotero RDFXML file for an entry and create it if found
930
931=head1 DESCRIPTION
932
933Provides the extract_entries() method to get entries from a biblatexml data source
934and instantiate Biber::Entry objects for what it finds
935
936=head1 AUTHOR
937
938François Charette, C<< <firmicus at ankabut.net> >>
939Philip Kime C<< <philip at kime.org.uk> >>
940
941=head1 BUGS
942
943Please report any bugs or feature requests on our Github tracker at
944L<https://github.com/plk/biber/issues>.
945
946=head1 COPYRIGHT & LICENSE
947
948Copyright 2009-2015 François Charette and Philip Kime, all rights reserved.
949
950This module is free software.  You can redistribute it and/or
951modify it under the terms of the Artistic License 2.0.
952
953This program is distributed in the hope that it will be useful,
954but without any warranty; without even the implied warranty of
955merchantability or fitness for a particular purpose.
956
957=cut
958