1package Biber::Input::file::endnotexml; 2use v5.16; 3use strict; 4use warnings; 5use base 'Exporter'; 6 7use Carp; 8use Biber::Constants; 9use Biber::Entries; 10use Biber::Entry; 11use Biber::Entry::Names; 12use Biber::Entry::Name; 13use Biber::Sections; 14use Biber::Section; 15use Biber::Utils; 16use Biber::Config; 17use Digest::MD5 qw( md5_hex ); 18use Encode; 19use File::Spec; 20use File::Slurp; 21use File::Temp; 22use Log::Log4perl qw(:no_extra_logdie_message); 23use List::AllUtils qw( :all ); 24use XML::LibXML; 25use XML::LibXML::Simple; 26use Data::Dump qw(dump); 27use Text::BibTeX qw(:nameparts :joinmethods :metatypes); 28use Text::BibTeX::Name; 29use Text::BibTeX::NameFormat; 30use Unicode::Normalize; 31use Unicode::GCString; 32use URI; 33 34##### This is based on Endnote X4 ##### 35 36my $logger = Log::Log4perl::get_logger('main'); 37my $orig_key_order = {}; 38 39# Determine handlers from data model 40my $dm = Biber::Config->get_dm; 41my $handlers = { 42 'field' => { 43 'default' => { 44 'code' => \&_literal, 45 'date' => \&_date, 46 'entrykey' => \&_literal, 47 'integer' => \&_literal, 48 'key' => \&_literal, 49 'literal' => \&_literal, 50 'range' => \&_range, 51 'verbatim' => \&_verbatim, 52 'uri' => \&_uri 53 }, 54 'xsv' => { 55 'entrykey' => \&_xsv, 56 'keyword' => \&_xsv, 57 'option' => \&_xsv, 58 } 59 }, 60 'list' => { 61 'default' => { 62 'entrykey' => \&_literal, 63 'key' => \&_list, 64 'literal' => \&_list, 65 'name' => \&_name 66 } 67 } 68}; 69 70 71=head2 extract_entries 72 73 Main data extraction routine. 74 Accepts a data source identifier (filename in this case), 75 preprocesses the file and then looks for the passed keys, 76 creating entries when it finds them and passes out an 77 array of keys it didn't find. 78 79=cut 80 81sub extract_entries { 82 my ($source, $keys) = @_; 83 my $secnum = $Biber::MASTER->get_current_section; 84 my $section = $Biber::MASTER->sections->get_section($secnum); 85 my $filename; 86 my @rkeys = @$keys; 87 my $tf; # Up here so that the temp file has enough scope to survive until we've 88 # used it 89 $logger->trace("Entering extract_entries() in driver 'endnotexml'"); 90 91 # Get a reference to the correct sourcemap sections, if they exist 92 my $smaps = []; 93 # Maps are applied in order USER->STYLE->DRIVER 94 if (defined(Biber::Config->getoption('sourcemap'))) { 95 # User maps 96 if (my $m = first {$_->{datatype} eq 'endnotexml' and $_->{level} eq 'user' } @{Biber::Config->getoption('sourcemap')} ) { 97 push @$smaps, $m; 98 } 99 # Style maps 100 if (my $m = first {$_->{datatype} eq 'endnotexml' and $_->{level} eq 'style' } @{Biber::Config->getoption('sourcemap')} ) { 101 push @$smaps, $m; 102 } 103 # Driver default maps 104 if (my $m = first {$_->{datatype} eq 'endnotexml' and $_->{level} eq 'driver'} @{Biber::Config->getoption('sourcemap')} ) { 105 push @$smaps, $m; 106 } 107 } 108 109 # If it's a remote data file, fetch it first 110 if ($source =~ m/\A(?:http|ftp)(s?):\/\//xms) { 111 $logger->info("Data source '$source' is a remote EndNote XML datasource - fetching ..."); 112 if ($1) { # HTTPS 113 # use IO::Socket::SSL qw(debug99); # useful for debugging SSL issues 114 # We have to explicitly set the cert path because otherwise the https module 115 # can't find the .pem when PAR::Packer'ed 116 # Have to explicitly try to require Mozilla::CA here to get it into %INC below 117 # It may, however, have been removed by some biber unpacked dists 118 if (not exists($ENV{PERL_LWP_SSL_CA_FILE}) and 119 not exists($ENV{PERL_LWP_SSL_CA_PATH}) and 120 not defined(Biber::Config->getoption('ssl-nointernalca')) and 121 eval {require Mozilla::CA}) { 122 # we assume that the default CA file is in .../Mozilla/CA/cacert.pem 123 (my $vol, my $dir, undef) = File::Spec->splitpath( $INC{"Mozilla/CA.pm"} ); 124 $dir =~ s/\/$//; # splitpath sometimes leaves a trailing '/' 125 $ENV{PERL_LWP_SSL_CA_FILE} = File::Spec->catpath($vol, "$dir/CA", 'cacert.pem'); 126 } 127 128 # fallbacks for, e.g., linux 129 unless (exists($ENV{PERL_LWP_SSL_CA_FILE})) { 130 foreach my $ca_bundle (qw{ 131 /etc/ssl/certs/ca-certificates.crt 132 /etc/pki/tls/certs/ca-bundle.crt 133 /etc/ssl/ca-bundle.pem 134 }) { 135 next if ! -e $ca_bundle; 136 $ENV{PERL_LWP_SSL_CA_FILE} = $ca_bundle; 137 last; 138 } 139 foreach my $ca_path (qw{ 140 /etc/ssl/certs/ 141 /etc/pki/tls/ 142 }) { 143 next if ! -d $ca_path; 144 $ENV{PERL_LWP_SSL_CA_PATH} = $ca_path; 145 last; 146 } 147 } 148 149 if (defined(Biber::Config->getoption('ssl-noverify-host'))) { 150 $ENV{PERL_LWP_SSL_VERIFY_HOSTNAME} = 0; 151 } 152 require LWP::Protocol::https; 153 } 154 require LWP::Simple; 155 $tf = File::Temp->new(TEMPLATE => 'biber_remote_data_source_XXXXX', 156 DIR => $Biber::MASTER->biber_tempdir, 157 SUFFIX => '.xml'); 158 unless (LWP::Simple::is_success(LWP::Simple::getstore($source, $tf->filename))) { 159 biber_error("Could not fetch '$source'"); 160 } 161 $filename = $tf->filename; 162 } 163 else { 164 # Need to get the filename so we increment 165 # the filename count for preambles at the bottom of this sub 166 unless ($filename = locate_biber_file($source)) { 167 biber_error("Cannot find '$source'!") 168 } 169 } 170 171 # Log that we found a data file 172 $logger->info("Found EndNote XML data source '$filename'"); 173 174 # Set up XML parser and namespaces 175 my $parser = XML::LibXML->new(); 176 my $xml = File::Slurp::read_file($filename) or biber_error("Can't read file $filename"); 177 $xml = NFD(decode('UTF-8', $xml));# Unicode NFD boundary 178 my $enxml = $parser->parse_string($xml); 179 my $xpc = XML::LibXML::XPathContext->new($enxml); 180 181 if ($section->is_allkeys) { 182 $logger->debug("All citekeys will be used for section '$secnum'"); 183 # Loop over all entries, creating objects 184 foreach my $entry ($xpc->findnodes("/xml/records/record")) { 185 $logger->debug('Parsing Endnote XML entry object ' . $entry->nodePath); 186 187 # If an entry has no key, ignore it and warn 188 unless ($entry->findvalue('./rec-number')) { 189 biber_warn("Invalid or undefined entry ID in file '$filename', skipping ..."); 190 next; 191 } 192 193 my $ek = $entry->findvalue('./rec-number'); 194 # If we've already seen a case variant, warn 195 if (my $okey = $section->has_badcasekey($ek)) { 196 biber_warn("Possible typo (case mismatch): '$ek' and '$okey' in file '$filename', skipping '$ek' ..."); 197 } 198 199 # If we've already seen this key, ignore it and warn 200 if ($section->has_everykey($ek)) { 201 biber_warn("Duplicate entry key: '$ek' in file '$filename', skipping ..."); 202 next; 203 } 204 else { 205 $section->add_everykey($ek); 206 } 207 208 my $dbdid = $entry->findvalue('./foreign-keys/key/@db-id'); 209 my $key = $ek; 210 211 # Record a key->datasource name mapping for error reporting 212 $section->set_keytods("$dbdid:$key", $filename); 213 214 # We do this as otherwise we have no way of determining the origing .bib entry order 215 # We need this in order to do sorting=none + allkeys because in this case, there is no 216 # "citeorder" because nothing is explicitly cited and so "citeorder" means .bib order 217 push @{$orig_key_order->{$filename}}, "$dbdid:$key"; 218 create_entry("$dbdid:$key", $entry, $source, $smaps); 219 } 220 221 # if allkeys, push all bibdata keys into citekeys (if they are not already there) 222 # We are using the special "orig_key_order" array which is used to deal with the 223 # situation when sorting=non and allkeys is set. We need an array rather than the 224 # keys from the bibentries hash because we need to preserver the original order of 225 # the .bib as in this case the sorting sub "citeorder" means "bib order" as there are 226 # no explicitly cited keys 227 $section->add_citekeys(@{$orig_key_order->{$filename}}); 228 $logger->debug("Added all citekeys to section '$secnum': " . join(', ', $section->get_citekeys)); 229 } 230 else { 231 # loop over all keys we're looking for and create objects 232 $logger->debug('Wanted keys: ' . join(', ', @$keys)); 233 foreach my $wanted_key (@$keys) { 234 $logger->debug("Looking for key '$wanted_key' in Endnote XML file '$filename'"); 235 # Split key into parts 236 my ($wdbid, $wnum) = split(/:/, $wanted_key); 237 238 if (my @entries = $xpc->findnodes("/xml/records/record[rec-number[text()='$wnum']][foreign-keys/key[\@db-id='$wdbid']]")) { 239 # Check to see if there is more than one entry with this key and warn if so 240 if ($#entries > 0) { 241 biber_warn("Found more than one entry for key '$wanted_key' in '$wdbid:$wnum' - Skipping duplicates ..."); 242 } 243 my $entry = $entries[0]; 244 245 $logger->debug("Found key '$wanted_key' in Endnote XML file '$filename'"); 246 $logger->debug('Parsing Endnote XML entry object ' . $entry->nodePath); 247 248 # Record a key->datasource name mapping for error reporting 249 $section->set_keytods($wanted_key, $filename); 250 251 # See comment above about the importance of the case of the key 252 # passed to create_entry() 253 create_entry($wanted_key, $entry, $source, $smaps); 254 # found a key, remove it from the list of keys we want 255 @rkeys = grep {$wanted_key ne $_} @rkeys; 256 } 257 $logger->debug('Wanted keys now: ' . join(', ', @rkeys)); 258 } 259 } 260 261 return @rkeys; 262} 263 264 265=head2 create_entry 266 267 Create a Biber::Entry object from an entry found in a Endnote 268 XML data source 269 270=cut 271 272sub create_entry { 273 my ($key, $entry, $source, $smaps) = @_; 274 my $secnum = $Biber::MASTER->get_current_section; 275 my $section = $Biber::MASTER->sections->get_section($secnum); 276 my $bibentries = $section->bibentries; 277 my $bibentry = new Biber::Entry; 278 279 $bibentry->set_field('citekey', $key); 280 281 # Datasource mapping applied in $smap order (USER->STYLE->DRIVER) 282 foreach my $smap (@$smaps) { 283 my $level = $smap->{level}; 284 285 # DATASOURCE MAPPING DEFINED BY USER IN CONFIG FILE OR .bcf 286 MAP: foreach my $map (@{$smap->{map}}) { 287 my $last_field = undef; 288 my $last_fieldval = undef; 289 290 my @imatches; # For persising parenthetical matches over several steps 291 292 my $itype = $entry->findvalue('./ref-type/@name'); 293 my $last_type = $itype; # defaults to the entrytype unless changed below 294 295 # Check pertype restrictions 296 unless (not exists($map->{per_type}) or 297 first {$_->{content} eq $itype} @{$map->{per_type}}) { 298 next; 299 } 300 301 # Check per_datasource restrictions 302 # Don't compare case insensitively - this might not be correct 303 unless (not exists($map->{per_datasource}) or 304 first {$_->{content} eq $source} @{$map->{per_datasource}}) { 305 next; 306 } 307 308 # loop over mapping steps 309 foreach my $step (@{$map->{map_step}}) { 310 311 # Entrytype map 312 if (my $source = $step->{map_type_source}) { 313 314 unless ($itype eq $source) { 315 # Skip the rest of the map if this step doesn't match and match is final 316 if ($step->{map_final}) { 317 $logger->debug("Source mapping (type=$level, key=$key): Entry type is '$itype' but map wants '$source' and step has 'final' set ... skipping rest of map ..."); 318 next MAP; 319 } 320 else { 321 # just ignore this step 322 $logger->debug("Source mapping (type=$level, key=$key): Entry type is '$itype' but map wants '$source' ... skipping step ..."); 323 next; 324 } 325 } 326 # Change entrytype if requested 327 $last_type = $itype; 328 $logger->debug("Source mapping (type=$level, key=$key): Changing entry type from '$last_type' to " . $step->{map_type_target}); 329 $entry->findnodes('./ref-type')->get_node(1)->setAttribute('name', $step->{map_type_target}); 330 } 331 332 # Field map 333 if (my $source = $step->{map_field_source}) { 334 unless ($entry->exists($source)) { 335 # Skip the rest of the map if this step doesn't match and match is final 336 if ($step->{map_final}) { 337 $logger->debug("Source mapping (type=$level, key=$key): No field '$source' and step has 'final' set, skipping rest of map ..."); 338 next MAP; 339 } 340 else { 341 # just ignore this step 342 $logger->debug("Source mapping (type=$level, key=$key): No field '$source', skipping step ..."); 343 next; 344 } 345 } 346 347 $last_field = $source; 348 $last_fieldval = $entry->findvalue($source); 349 350 # map fields to targets 351 if (my $m = $step->{map_match}) { 352 if (defined($step->{map_replace})) { # replace can be null 353 my $r = $step->{map_replace}; 354 $logger->debug("Source mapping (type=$level, key=$key): Doing match/replace '$m' -> '$r' on field '$source'"); 355 my $text = ireplace($last_fieldval, $m, $r); 356 $entry->findnodes($source . '/style/text()')->get_node(1)->setData($text); 357 } 358 else { 359 unless (@imatches = imatch($last_fieldval, $m)) { 360 # Skip the rest of the map if this step doesn't match and match is final 361 if ($step->{map_final}) { 362 $logger->debug("Source mapping (type=$level, key=$key): Field '$source' does not match '$m' and step has 'final' set, skipping rest of map ..."); 363 next MAP; 364 } 365 else { 366 # just ignore this step 367 $logger->debug("Source mapping (type=$level, key=$key): Field '$source' does not match '$m', skipping step ..."); 368 next; 369 } 370 } 371 } 372 } 373 374 # Set to a different target if there is one 375 if (my $target = $step->{map_field_target}) { 376 if (my @t = $entry->findnodes($target)) { 377 if ($map->{map_overwrite} // $smap->{map_overwrite}) { 378 $logger->debug("Source mapping (type=$level, key=$key): Overwriting existing field '$target'"); 379 # Have to do this otherwise XML::LibXML will merge the nodes 380 map {$_->unbindNode} @t; 381 } 382 else { 383 $logger->debug("Source mapping (type=$level, key=$key): Field '$source' is aliased to field '$target' but both are defined, skipping ..."); 384 next; 385 } 386 } 387 388 map {$_->setNodeName(_leaf_node($target))} $entry->findnodes($source); 389 } 390 } 391 392 # field creation 393 if (my $field = $step->{map_field_set}) { 394 395 # Deal with special tokens 396 if ($step->{map_null}) { 397 $logger->debug("Source mapping (type=$level, key=$key): Deleting field '$field'"); 398 map {$_->unbindNode} $entry->findnodes($field); 399 } 400 else { 401 if ($entry->exists($field)) { 402 unless ($map->{map_overwrite} // $smap->{map_overwrite}) { 403 if ($step->{map_final}) { 404 # map_final is set, ignore and skip rest of step 405 $logger->debug("Source mapping (type=$level, key=$key): Field '$field' exists, overwrite is not set and step has 'final' set, skipping rest of map ..."); 406 next MAP; 407 } 408 else { 409 # just ignore this step 410 $logger->debug("Source mapping (type=$level, key=$key): Field '$field' exists and overwrite is not set, skipping step ..."); 411 next; 412 } 413 } 414 } 415 416 # If append is set, keep the original value and append the new 417 my $orig = $step->{map_append} ? $entry->findvalue($field) : ''; 418 419 if ($step->{map_origentrytype}) { 420 next unless $last_type; 421 $logger->debug("Source mapping (type=$level, key=$key): Setting field '$field' to '${orig}${last_type}'"); 422 $entry->appendTextChild($field, $orig . $last_type); 423 } 424 elsif ($step->{map_origfieldval}) { 425 next unless $last_fieldval; 426 $logger->debug("Source mapping (type=$level, key=$key): Setting field '$field' to '${orig}${last_fieldval}'"); 427 $entry->appendTextChild($field, $orig . $last_fieldval); 428 } 429 elsif ($step->{map_origfield}) { 430 next unless $last_field; 431 $logger->debug("Source mapping (type=$level, key=$key): Setting field '$field' to '${orig}${last_field}'"); 432 $entry->appendTextChild($field, $orig . $last_field); 433 } 434 else { 435 my $fv = $step->{map_field_value}; 436 # Now re-instate any unescaped $1 .. $9 to get round these being 437 # dynamically scoped and being null when we get here from any 438 # previous map_match 439 $fv =~ s/(?<!\\)\$(\d)/$imatches[$1-1]/ge; 440 $logger->debug("Source mapping (type=$level, key=$key): Setting field '$field' to '${orig}${fv}'"); 441 $entry->appendTextChild($field, $orig . $fv); 442 } 443 } 444 } 445 } 446 } 447 } 448 449 my $itype = $entry->findvalue('./ref-type/@name'); 450 foreach my $f (uniq map {$_->nodeName()} $entry->findnodes('(./*|./titles/*|./contributors/*|./urls/web-urls/*|./dates/*)')) { 451 452 # Now run any defined handler 453 # There is no else clause here to warn on invalid fields as there are so many 454 # in Endnote 455 if ($dm->is_field($f)) { 456 my $handler = _get_handler($f); 457 &$handler($bibentry, $entry, $f, $key); 458 } 459 } 460 461 $bibentry->set_field('entrytype', $itype); 462 $bibentry->set_field('datatype', 'endnotexml'); 463 $bibentries->add_entry($key, $bibentry); 464 465 return; 466} 467 468# HANDLERS 469# ======== 470 471# List fields 472sub _list { 473 my ($bibentry, $entry, $f) = @_; 474 my $value = $entry->findvalue("./$f"); 475 $bibentry->set_datafield($f, [ _norm($value) ]); 476 return; 477} 478 479# literal fields 480sub _literal { 481 my ($bibentry, $entry, $f) = @_; 482 my $value = $entry->findvalue("(./$f|./titles/$f|./contributors/$f|./urls/web-urls/$f)"); 483 $bibentry->set_datafield($f, _norm($value)); 484 return; 485} 486 487# Verbatim fields 488sub _verbatim { 489 my ($bibentry, $entry, $f) = @_; 490 my $value = $entry->findvalue("(./$f|./titles/$f|./contributors/$f|./urls/web-urls/$f)"); 491 $bibentry->set_datafield($f, _norm($value)); 492 return; 493} 494 495# URI fields 496sub _uri { 497 my ($bibentry, $entry, $f) = @_; 498 my $value = _norm($entry->findvalue("(./$f|./titles/$f|./contributors/$f|./urls/web-urls/$f)")); 499 500 # URL escape if it doesn't look like it already is 501 # This is useful if we are generating URLs automatically with maps which may 502 # contain UTF-8 from other fields 503 unless ($value =~ /\%/) { 504 $value = URI->new($value)->as_string; 505 } 506 507 $bibentry->set_datafield($f, $value); 508 return; 509} 510 511# Range fields 512sub _range { 513 my ($bibentry, $entry, $f) = @_; 514 my $values_ref; 515 my $value = $entry->findvalue("./$f"); 516 my @values = split(/\s*,\s*/, _norm($value)); 517 # Here the "-–" contains two different chars even though they might 518 # look the same in some fonts ... 519 # If there is a range sep, then we set the end of the range even if it's null 520 # If no range sep, then the end of the range is undef 521 foreach my $value (@values) { 522 $value =~ m/\A\s*([^-–]+)([-–]*)([^-–]*)\s*\z/xms; 523 my $end; 524 if ($2) { 525 $end = $3; 526 } 527 else { 528 $end = undef; 529 } 530 push @$values_ref, [$1 || '', $end]; 531 } 532 $bibentry->set_datafield($f, $values_ref); 533 return; 534} 535 536# Date fields 537sub _date { 538 my ($bibentry, $entry, $f, $key) = @_; 539 my $daten = $entry->findnodes("./dates/$f")->get_node(1); 540 my $secnum = $Biber::MASTER->get_current_section; 541 my $section = $Biber::MASTER->sections->get_section($secnum); 542 my $ds = $section->get_keytods($key); 543 544 # Use Endnote explicit date attributes, if present 545 # It's not clear if Endnote actually uses these attributes 546 if ($daten->hasAttribute('year')) { 547 $bibentry->set_datafield('year', $daten->getAttribute('year')); 548 if ($daten->hasAttribute('month')) { 549 $bibentry->set_datafield('month', $daten->getAttribute('month')); 550 } 551 if ($daten->hasAttribute('day')) { 552 $bibentry->set_datafield('day', $daten->getAttribute('day')); 553 } 554 return; 555 } 556 else { 557 my $date = _norm($entry->findvalue("./dates/$f")); 558 # We are not validating dates here, just syntax parsing 559 my $date_re = qr/(\d{4}) # year 560 (?:-(\d{2}))? # month 561 (?:-(\d{2}))? # day 562 /xms; 563 if (my ($byear, $bmonth, $bday, $r, $eyear, $emonth, $eday) = 564 $date =~ m|\A$date_re(/)?(?:$date_re)?\z|xms) { 565 566 $bibentry->set_datafield('year', $byear) if $byear; 567 $bibentry->set_datafield('month', $bmonth) if $bmonth; 568 $bibentry->set_datafield('day', $bday) if $bday; 569 $bibentry->set_datafield('endmonth', $emonth) if $emonth; 570 $bibentry->set_datafield('endday', $eday) if $eday; 571 if ($r and $eyear) { # normal range 572 $bibentry->set_datafield('endyear', $eyear); 573 } 574 elsif ($r and not $eyear) { # open ended range - endyear is defined but empty 575 $bibentry->set_datafield('endyear', ''); 576 } 577 } 578 else { 579 biber_warn("Datamodel: Entry '$key' ($ds): Invalid format '$date' of date field '$f' - ignoring", $bibentry); 580 } 581 return; 582 } 583} 584 585# Name fields 586sub _name { 587 my ($bibentry, $entry, $f, $key) = @_; 588 my $names = new Biber::Entry::Names; 589 my $useprefix = Biber::Config->getblxoption('useprefix', $bibentry->get_field('entrytype'), $key); 590 foreach my $name ($entry->findnodes("./contributors/$f/*")) { 591 $names->add_name(parsename($name, $f, {useprefix => $useprefix})); 592 } 593 $bibentry->set_datafield($f, $names); 594 return; 595} 596 597sub _xsv { 598 my ($bibentry, $entry, $f) = @_; 599 # Keywords 600 if (my @s = $entry->findnodes("./$f/keyword")) { 601 my $kws; 602 foreach my $s (@s) { 603 push @$kws, '{'._norm($s->textContent()).'}'; 604 } 605 $bibentry->set_datafield($f, $kws); 606 } 607 return; 608} 609 610=head2 parsename 611 612 Given a name node, this function returns a Biber::Entry::Name object 613 614 Returns an object which internally looks a bit like this: 615 616 { firstname => 'John', 617 firstname_i => 'J', 618 middlename => 'Fred', 619 middlename_i => 'F', 620 lastname => 'Doe', 621 lastname_i => 'D', 622 prefix => undef, 623 prefix_i => undef, 624 suffix => undef, 625 suffix_i => undef, 626 namestring => 'Doe, John Fred', 627 nameinitstring => 'Doe_JF', 628 629=cut 630 631sub parsename { 632 my ($node, $fieldname, $opts) = @_; 633 $logger->debug('Parsing Endnote XML name object ' . $node->nodePath); 634 my $usepre = $opts->{useprefix}; 635 636 my %namec; 637 638 # Assume that we are using the Endnote name attrs if we find a 'last-name' attr 639 # It's not clear if Endnote actually ever uses these even though they are in the 640 # DTD 641 if ($node->hasAttribute('last-name')) { 642 foreach my $n ('last-name', 'first-name', 'suffix', 'corp-name', 'initials', 'middle-initial', 'title', 'salutation') { 643 # If there is a name attribute for this component ... 644 # This is all guessing as I've never seen Endnote export this 645 if ($node->hasAttribute($n)) { 646 my $np = $node->getAttribute($n); 647 if ($n eq 'last-name') { 648 $namec{last} = $np; 649 $namec{last_i} = [_gen_initials($np)]; 650 } 651 elsif ($n eq 'first-name') { 652 $namec{first} = $np; 653 $namec{first_i} = [_gen_initials($np)]; 654 } 655 elsif ($n eq 'suffix') { 656 $namec{suffix} = $np; 657 $namec{suffix_i} = [_gen_initials($np)]; 658 } 659 elsif ($n eq 'corp-name') { 660 $namec{last} = $np; 661 $namec{last_i} = [_gen_initials($np)]; 662 } 663 elsif ($n eq 'initials') { 664 $namec{first_i} = $np; 665 } 666 elsif ($n eq 'middle-initial') { 667 my $mi = $np; 668 $mi =~ s/\s*\.//g; 669 $namec{middle} = $np; 670 $namec{middle_i} = [ $mi ]; 671 } 672 elsif ($n eq 'title' or $n eq 'salutation') { 673 $namec{first} = "$np " . $namec{first}; 674 } 675 } 676 } 677 # Only warn about lastnames since there should always be one 678 biber_warn("Couldn't determine Lastname for name XPath: " . $node->nodePath) unless exists($namec{last}); 679 680 my $namestring = ''; 681 682 # Don't add suffix to namestring or nameinitstring as these are used for uniquename disambiguation 683 # which should only care about lastname + any prefix (if useprefix=true). See biblatex github 684 # tracker #306. 685 686 # lastname 687 if (my $l = $namec{last}) { 688 $namestring .= "$l, "; 689 } 690 691 # firstname 692 if (my $f = $namec{first}) { 693 $namestring .= "$f"; 694 } 695 696 # Remove any trailing comma and space if, e.g. missing firstname 697 $namestring =~ s/,\s+\z//xms; 698 699 # Construct $nameinitstring 700 my $nameinitstr = ''; 701 $nameinitstr .= $namec{last} if exists($namec{last}); 702 $nameinitstr .= '_' . join('', @{$namec{first_i}}) if exists($namec{first}); 703 $nameinitstr .= '_' . join('', @{$namec{middle_i}}) if exists($namec{middle}); 704 $nameinitstr =~ s/\s+/_/g; 705 706 return Biber::Entry::Name->new( 707 firstname => $namec{first} // undef, 708 firstname_i => exists($namec{first}) ? $namec{first_i} : undef, 709 middlename => $namec{middle} // undef, 710 middlename_i => exists($namec{middle}) ? $namec{middle_i} : undef, 711 lastname => $namec{last} // undef, 712 lastname_i => exists($namec{last}) ? $namec{last_i} : undef, 713 suffix => $namec{suffix} // undef, 714 suffix_i => exists($namec{suffix}) ? $namec{suffix_i} : undef, 715 namestring => $namestring, 716 nameinitstring => $nameinitstr, 717 ); 718 } 719 else { # parse with bibtex library because Endnote XML is rubbish 720 my $namestr = $node->textContent(); 721 722 # First sanitise the namestring due to Text::BibTeX::Name limitations on whitespace 723 $namestr =~ s/\A\s*//xms; # leading whitespace 724 $namestr =~ s/\s*\z//xms; # trailing whitespace 725 $namestr =~ s/\s+/ /g; # Collapse internal whitespace 726 727 my $tberr = File::Temp->new(TEMPLATE => 'biber_Text_BibTeX_STDERR_XXXXX', 728 DIR => $Biber::MASTER->biber_tempdir); 729 my $tberr_name = $tberr->filename; 730 731 open OLDERR, '>&', \*STDERR; 732 open STDERR, '>', $tberr_name; 733 my $name = new Text::BibTeX::Name($namestr); 734 open STDERR, '>&', \*OLDERR; 735 close OLDERR; 736 737 # Put any Text::BibTeX errors into the biber warnings/errors collections 738 # We are parsing the libbtparse library error/warning strings a little here 739 # This is not so bad as they have a clean structure (see error.c in libbtparse) 740 open my $tbe, '<', $tberr_name; 741 while (<$tbe>) { 742 if (/error:/) { 743 chomp; 744 biber_error("BibTeX subsystem: $_"); 745 } 746 elsif (/warning:/) { 747 chomp; 748 biber_warn("BibTeX subsystem: $_"); 749 } 750 } 751 close($tbe); 752 753 # Formats so we can get BibTeX compatible nbsp inserted 754 my $l_f = new Text::BibTeX::NameFormat('l', 0); 755 my $f_f = new Text::BibTeX::NameFormat('f', 0); 756 my $p_f = new Text::BibTeX::NameFormat('v', 0); 757 my $s_f = new Text::BibTeX::NameFormat('j', 0); 758 $l_f->set_options(BTN_LAST, 0, BTJ_MAYTIE, BTJ_NOTHING); 759 $f_f->set_options(BTN_FIRST, 0, BTJ_MAYTIE, BTJ_NOTHING); 760 $p_f->set_options(BTN_VON, 0, BTJ_MAYTIE, BTJ_NOTHING); 761 $s_f->set_options(BTN_JR, 0, BTJ_MAYTIE, BTJ_NOTHING); 762 763 # Generate name parts 764 my $lastname = $name->format($l_f); 765 my $firstname = $name->format($f_f); 766 my $prefix = $name->format($p_f); 767 my $suffix = $name->format($s_f); 768 769 # Variables to hold the Text::BibTeX::NameFormat generated initials string 770 my $gen_lastname_i; 771 my $gen_firstname_i; 772 my $gen_prefix_i; 773 my $gen_suffix_i; 774 775 # Use a copy of $name so that when we generate the 776 # initials, we do so without certain things. This is easier than trying 777 # hack robust initials code into btparse ... 778 my $nd_namestr = strip_noinit($namestr); 779 my $nd_name = new Text::BibTeX::Name($nd_namestr, $fieldname); 780 781 # Initials formats 782 my $li_f = new Text::BibTeX::NameFormat('l', 1); 783 my $fi_f = new Text::BibTeX::NameFormat('f', 1); 784 my $pi_f = new Text::BibTeX::NameFormat('v', 1); 785 my $si_f = new Text::BibTeX::NameFormat('j', 1); 786 787 # Initials generated with forced tie so we can make an array 788 $li_f->set_text(BTN_LAST, undef, undef, undef, ''); 789 $fi_f->set_text(BTN_FIRST, undef, undef, undef, ''); 790 $pi_f->set_text(BTN_VON, undef, undef, undef, ''); 791 $si_f->set_text(BTN_JR, undef, undef, undef, ''); 792 $li_f->set_options(BTN_LAST, 1, BTJ_FORCETIE, BTJ_NOTHING); 793 $fi_f->set_options(BTN_FIRST, 1, BTJ_FORCETIE, BTJ_NOTHING); 794 $pi_f->set_options(BTN_VON, 1, BTJ_FORCETIE, BTJ_NOTHING); 795 $si_f->set_options(BTN_JR, 1, BTJ_FORCETIE, BTJ_NOTHING); 796 797 $gen_lastname_i = inits($nd_name->format($li_f)); 798 $gen_firstname_i = inits($nd_name->format($fi_f)); 799 $gen_prefix_i = inits($nd_name->format($pi_f)); 800 $gen_suffix_i = inits($nd_name->format($si_f)); 801 802 my $namestring = ''; 803 # prefix 804 my $ps; 805 my $prefix_stripped; 806 my $prefix_i; 807 if ($prefix) { 808 $prefix_i = $gen_prefix_i; 809 $namestring .= "$prefix "; 810 } 811 # lastname 812 my $ls; 813 my $lastname_stripped; 814 my $lastname_i; 815 if ($lastname) { 816 $lastname_i = $gen_lastname_i; 817 $namestring .= "$lastname, "; 818 } 819 # suffix 820 my $ss; 821 my $suffix_stripped; 822 my $suffix_i; 823 if ($suffix) { 824 $suffix_i = $gen_suffix_i; 825 $namestring .= "$suffix, "; 826 } 827 # firstname 828 my $fs; 829 my $firstname_stripped; 830 my $firstname_i; 831 if ($firstname) { 832 $firstname_i = $gen_firstname_i; 833 $namestring .= "$firstname"; 834 } 835 836 # Remove any trailing comma and space if, e.g. missing firstname 837 # Replace any nbspes 838 $namestring =~ s/,\s+\z//xms; 839 $namestring =~ s/~/ /gxms; 840 841 # Construct $nameinitstring 842 my $nameinitstr = ''; 843 $nameinitstr .= join('', @$prefix_i) . '_' if ( $usepre and $prefix ); 844 $nameinitstr .= $lastname if $lastname; 845 $nameinitstr .= '_' . join('', @$suffix_i) if $suffix; 846 $nameinitstr .= '_' . join('', @$firstname_i) if $firstname; 847 $nameinitstr =~ s/\s+/_/g; 848 $nameinitstr =~ s/~/_/g; 849 850 # The "strip" entry tells us which of the name parts had outer braces 851 # stripped during processing so we can add them back when printing the 852 # .bbl so as to maintain maximum BibTeX compatibility 853 return Biber::Entry::Name->new( 854 firstname => $firstname eq '' ? undef : $firstname, 855 firstname_i => $firstname eq '' ? undef : $firstname_i, 856 lastname => $lastname eq '' ? undef : $lastname, 857 lastname_i => $lastname eq '' ? undef : $lastname_i, 858 prefix => $prefix eq '' ? undef : $prefix, 859 prefix_i => $prefix eq '' ? undef : $prefix_i, 860 suffix => $suffix eq '' ? undef : $suffix, 861 suffix_i => $suffix eq '' ? undef : $suffix_i, 862 namestring => $namestring, 863 nameinitstring => $nameinitstr 864 ); 865 } 866} 867 868# Passed an array ref of strings, returns an array ref of initials 869sub _gen_initials { 870 my @strings = @_; 871 my @strings_out; 872 foreach my $str (@strings) { 873 # Deal with hyphenated name parts and normalise to a '-' character for easy 874 # replacement with macro later 875 if ($str =~ m/\p{Dash}/) { 876 push @strings_out, join('-', _gen_initials(split(/\p{Dash}/, $str))); 877 } 878 else { 879 my $chr = Unicode::GCString->new($str)->substr(0, 1)->as_string; 880 # Keep diacritics with their following characters 881 if ($chr =~ m/\p{Dia}/) { 882 push @strings_out, Unicode::GCString->new($str)->substr(0, 2)->as_string; 883 } 884 else { 885 push @strings_out, $chr; 886 } 887 } 888 } 889 return @strings_out; 890} 891 892 893# Syntactically get the leaf node of a node path 894sub _leaf_node { 895 my $node_path = shift; 896 return $node_path =~ s|.+/([^/]+$)|$1|r; 897} 898 899 900# Do some sanitising since this can't be nicely done by the parser 901sub _norm { 902 my $t = shift; 903 return undef unless $t; 904 $t =~ s/\A[\n\s]+//xms; 905 $t =~ s/[\n\s]+\z//xms; 906 return $t; 907} 908 909sub _get_handler { 910 my $field = shift; 911 if (my $h = $handlers->{CUSTOM}{$field}) { 912 return $h; 913 } 914 else { 915 return $handlers->{$dm->get_fieldtype($field)}{$dm->get_fieldformat($field) || 'default'}{$dm->get_datatype($field)}; 916 } 917} 918 9191; 920 921__END__ 922 923=pod 924 925=encoding utf-8 926 927=head1 NAME 928 929Biber::Input::file::endnotexml - look in a Zotero RDFXML file for an entry and create it if found 930 931=head1 DESCRIPTION 932 933Provides the extract_entries() method to get entries from a biblatexml data source 934and instantiate Biber::Entry objects for what it finds 935 936=head1 AUTHOR 937 938François Charette, C<< <firmicus at ankabut.net> >> 939Philip Kime C<< <philip at kime.org.uk> >> 940 941=head1 BUGS 942 943Please report any bugs or feature requests on our Github tracker at 944L<https://github.com/plk/biber/issues>. 945 946=head1 COPYRIGHT & LICENSE 947 948Copyright 2009-2015 François Charette and Philip Kime, all rights reserved. 949 950This module is free software. You can redistribute it and/or 951modify it under the terms of the Artistic License 2.0. 952 953This program is distributed in the hope that it will be useful, 954but without any warranty; without even the implied warranty of 955merchantability or fitness for a particular purpose. 956 957=cut 958