1# /=====================================================================\ # 2# | LaTeXML::Post::CrossRef | # 3# | Scan for ID's etc | # 4# |=====================================================================| # 5# | Part of LaTeXML: | # 6# | Public domain software, produced as part of work done by the | # 7# | United States Government & not subject to copyright in the US. | # 8# |---------------------------------------------------------------------| # 9# | Bruce Miller <bruce.miller@nist.gov> #_# | # 10# | http://dlmf.nist.gov/LaTeXML/ (o o) | # 11# \=========================================================ooo==U==ooo=/ # 12 13package LaTeXML::Post::CrossRef; 14use strict; 15use warnings; 16use LaTeXML::Util::Pathname; 17use LaTeXML::Common::XML; 18use LaTeXML::Common::Error; 19use charnames qw(:full); 20use LaTeXML::Post; 21use base qw(LaTeXML::Post::Processor); 22 23my $NBSP = pack('U', 0xA0); # CONSTANT 24 25sub new { 26 my ($class, %options) = @_; 27 my $self = $class->SUPER::new(%options); 28 $$self{db} = $options{db}; 29 $$self{urlstyle} = $options{urlstyle}; 30## $$self{toc_show} = ($options{number_sections} ? "typerefnum title" : "title"); 31 # Default format for ltx:ref's within TOC's 32 $$self{toc_show} = 'toctitle'; 33 # Default format for regular ltx:ref's 34 # [BTW: Does number_sections really still make sense?] 35 $$self{ref_show} = ($options{number_sections} ? "refnum" : "title"); 36 $$self{min_ref_length} = (defined $options{min_ref_length} ? $options{min_ref_length} : 1); 37 $$self{ref_join} = (defined $options{ref_join} ? $options{ref_join} : " \x{2023} "); # or " in " or ... ? 38 $$self{navigation_toc} = $options{navigation_toc}; 39 return $self; } 40 41sub process { 42 my ($self, $doc, $root) = @_; 43 local %LaTeXML::Post::CrossRef::MISSING = (); 44 if (my $navtoc = $$self{navigation_toc}) { # If a navigation toc requested, put a toc in nav; will get filled in 45 my $toc = ['ltx:TOC', { format => $navtoc }]; 46 if (my $nav = $doc->findnode('//ltx:navigation')) { 47 $doc->addNodes($nav, $toc); } 48 else { 49 $doc->addNodes($doc->getDocumentElement, ['ltx:navigation', {}, $toc]); } } 50 $self->fillInGlossaryRef($doc); 51 $self->fill_in_relations($doc); 52 $self->fill_in_tocs($doc); 53 $self->fill_in_frags($doc); 54 $self->fill_in_refs($doc); 55 $self->fill_in_RDFa_refs($doc); 56 $self->fill_in_bibrefs($doc); 57 $self->fill_in_mathlinks($doc); 58 $self->copy_resources($doc); 59 60 if (keys %LaTeXML::Post::CrossRef::MISSING) { 61 my $tempid = 0; 62 foreach my $severity (qw(error warn info)) { 63 my @msgs = (); 64 foreach my $type (sort keys %{ $LaTeXML::Post::CrossRef::MISSING{$severity} }) { 65 my @items = keys %{ $LaTeXML::Post::CrossRef::MISSING{$severity}{$type} }; 66 $tempid ||= grep { $_ eq 'TEMPORARY_DOCUMENT_ID' } @items; 67 my @args = ('expected', 'ids', undef, 68 "Missing $type: " . join(',', @items), 69 ($tempid ? "[Note TEMPORARY_DOCUMENT_ID is a stand-in ID for the main document.]" : ())); 70 if ($severity eq 'error') { Error(@args); } 71 elsif ($severity eq 'warn') { Warn(@args); } 72 elsif ($severity eq 'info') { Info(@args); } } } } 73 return $doc; } 74 75sub note_missing { 76 my ($self, $severity, $type, $key) = @_; 77 $LaTeXML::Post::CrossRef::MISSING{$severity}{$type}{$key}++; 78 return; } 79 80sub fill_in_relations { 81 my ($self, $doc) = @_; 82 my $db = $$self{db}; 83 if (my $id = $doc->getDocumentElement->getAttribute('xml:id')) { 84 if (my $entry = $db->lookup("ID:" . $id)) { 85 # First, add the basic relations 86 my $x; 87 # Apparently, "up", "up up", "up up up" is the desired form for html5 88 my $xentry = $entry; 89 my $rel = 'up'; 90 while (($x = $xentry->getValue('parent')) && ($xentry = $db->lookup("ID:" . $x))) { 91 if ($xentry->getValue('title')) { # it's interesting if it has a title (INCONSISTENT!!!) 92 ### NOT pageid, like the others, because of the sleasy link to \part in dlmf!!! 93 $doc->addNavigation($rel => $xentry->getValue('id')); 94 $rel .= ' up'; } } 95 if ($xentry && ($id ne $xentry->getValue('pageid'))) { 96 $doc->addNavigation(start => $xentry->getValue('pageid')); } 97 if (my $prev = $self->findPreviousPage($entry)) { # previous page 98 $doc->addNavigation(prev => $prev->getValue('pageid')); } 99 if (my $next = $self->findNextPage($entry)) { 100 $doc->addNavigation(next => $next->getValue('pageid')); } 101 102 # Now, dig around for other interesting related documents 103 # Use the entry types themselves for the relations 104 $xentry = $entry; 105 # Firstly, look at siblings of this page, then at siblings of parent, 106 # then those of grandparent, etc. 107 # In a large/complex site, this gets way too much. But how to prune? 108 while ($xentry = $self->getParentPage($xentry)) { 109 # any siblings of (grand)parent are "interesting" structural elements 110 # OR, even more interesting: the index, bibliography, glossary related to current page! 111 foreach my $sib ($self->getChildPages($xentry)) { 112 my $sib_id = $sib->getValue('pageid'); 113 next if $sib_id eq $id; 114 if ($sib->getValue('primary')) { # If a primary page 115 # Use the element name (w/o prefix) as the relation !!!! 116 my $sib_rel = $sib->getValue('type'); $sib_rel =~ s/^(\w+)://; 117 $doc->addNavigation($sib_rel => $sib_id); } 118 else { # Else, consider it as some sort of sidebar. 119 $doc->addNavigation('sidebar' => $sib_id); } } } 120 # Then Look at (only?) 1st level of pages below this one. 121 foreach my $child ($self->getChildPages($entry)) { 122 my $child_id = $child->getValue('pageid'); 123 if ($child->getValue('primary')) { # If a primary page 124 # Use the element name (w/o prefix) as the relation !!!! 125 my $child_rel = $child->getValue('type'); $child_rel =~ s/^(\w+)://; 126 $doc->addNavigation($child_rel => $child_id); } 127 else { # Else, consider it as some sort of sidebar. 128 $doc->addNavigation('sidebar' => $child_id); } } 129 } } 130 return; } 131 132sub findPreviousPage { 133 my ($self, $entry) = @_; 134 my $page = $entry->getValue('pageid'); 135 # Look at parent's entry, and get the list of our siblings 136 if (my $pentry = $self->getParentPage($entry)) { 137 my @sibs = $self->getChildPages($pentry); 138 while (@sibs && $sibs[-1]->getValue('pageid') ne $page) { # peel off following sibs 139 pop(@sibs); } 140 return unless @sibs && $sibs[-1]->getValue('pageid') eq $page; # Broken database? 141 pop(@sibs); # Now skip our own entry ($id) 142 @sibs = grep { $_->getValue('primary') } @sibs; 143 # If there IS a preceding sibling, find it's rightmost descendant 144 while (@sibs) { 145 $pentry = $sibs[-1]; 146 @sibs = grep { $_->getValue('primary') } $self->getChildPages($pentry); } 147 return $pentry; } # Return deepest page found 148 return; } 149 150sub findNextPage { 151 my ($self, $entry) = @_; 152 # Return first child page, if any 153 my @ch = grep { $_->getValue('primary') } $self->getChildPages($entry); 154 return $ch[0] if @ch; 155 my $page = $entry->getValue('pageid'); 156 # Look at parent's entry, and get the list of siblings 157 while ($entry = $self->getParentPage($entry)) { 158 my @sibs = $self->getChildPages($entry); 159 while (@sibs && $sibs[0]->getValue('pageid') ne $page) { # peel off preceding sibs, till found, 160 shift(@sibs); } 161 return unless @sibs && ($sibs[0]->getValue('pageid') eq $page); # Broken database? 162 shift(@sibs); # remove our own entry ($id) 163 @sibs = grep { $_->getValue('primary') } @sibs; # Skip uninteresting pages 164 return $sibs[0] if @sibs; 165 $page = $entry->getValue('pageid'); } 166 return; } 167 168sub getParentPage { 169 my ($self, $entry) = @_; 170 my $x; 171 return ($x = $entry->getValue('pageid')) && ($x = $$self{db}->lookup("ID:" . $x)) 172 && ($x = $x->getValue('parent')) && ($x = $$self{db}->lookup("ID:" . $x)) 173 && ($x = $x->getValue('pageid')) && ($x = $$self{db}->lookup("ID:" . $x)) 174 && $x; } 175 176sub getRootPage { 177 my ($self, $entry) = @_; 178 my $x = $entry; 179 my $root = $entry; 180 while (($x = $x->getValue('parent')) && ($x = $$self{db}->lookup("ID:" . $x)) 181 && ($x = $x->getValue('pageid')) && ($x = $$self{db}->lookup("ID:" . $x))) { 182 $root = $x; } 183 return $root; } 184 185# Assuming this entry is for a page, find the closest descendants that are (distinct) pages 186sub getChildPages { 187 my ($self, $entry) = @_; 188 my $page = $entry->getValue('pageid'); 189 my @p = (); 190 foreach my $ch (@{ $entry->getValue('children') || [] }) { 191 if (my $e = $$self{db}->lookup("ID:" . $ch)) { 192 if (my $p = $e->getValue('pageid')) { # if valid page 193 push(@p, ($p ne $page ? ($e) : $self->getChildPages($e))); } } } 194 return @p; } 195 196# this is probably the same as "Interesting" for the above relations. 197# To make it more extensible, it really should be integrated into the database? 198# Eg. "sectional" things might mark their entries specially? 199my $normaltoctypes = { map { ($_ => 1) } # CONSTANT 200 qw (ltx:document ltx:part ltx:chapter 201 ltx:section ltx:subsection ltx:subsubsection 202 ltx:paragraph ltx:subparagraph 203 ltx:index ltx:bibliography ltx:glossary ltx:appendix) }; 204 205sub fill_in_tocs { 206 my ($self, $doc) = @_; 207 my $n = 0; 208 foreach my $toc ($doc->findnodes('descendant::ltx:TOC[not(ltx:toclist)]')) { 209 $n++; 210 my $selector = $toc->getAttribute('select'); 211 my $types; 212 if ($selector) { 213 $types = { map { ($_ => 1) } split(/\s*\|\s*/, $selector) }; } 214 # global vs children of THIS or Document node? 215 my $id = $doc->getDocumentElement->getAttribute('xml:id'); 216 my $scope = $toc->getAttribute('scope') || 'current'; 217 my $format = $toc->getAttribute('format') || 'normal'; 218 my $lists; 219 if (my $listname = $toc->getAttribute('lists')) { 220 $lists = { map { $_ => 1 } split(/\s/, $listname) }; } 221 else { 222 $lists = { toc => 1 }; } 223 if ($scope eq 'global') { 224 if (my $entry = $$self{db}->lookup("ID:" . $id)) { 225 if (my $root = $self->getRootPage($entry)) { 226 $id = $root->getValue('pageid'); } } } 227 my $show = $toc->getAttribute('show') || $$self{toc_show}; 228 my @list = (); 229 if (!$format || ($format =~ /^normal/)) { 230 @list = $self->gentoc($doc, $id, $show, $lists, $types); } 231 elsif ($format eq 'context') { 232 $lists = { toc => 1 }; 233 @list = $self->gentoc_context($doc, $id, $show, $lists, $types); } 234 $doc->addNodes($toc, ['ltx:toclist', {}, @list]) if @list; } 235 Debug("Filled in $n TOCs") if $LaTeXML::DEBUG{crossref}; 236 return; } 237 238# generate TOC for $id & its children, 239# providing that those objects are of appropriate type. 240# Returns a list of 0 or more ltx:tocentry's (possibly containing ltx:toclist's) 241# Note that parent/child relationships stored in ObjectDB can also reflect less 242# `interesting' objects like para or p style paragraphs, and such. 243sub gentoc { 244 my ($self, $doc, $id, $show, $lists, $types, $localto, $selfid) = @_; 245 if (my $entry = $$self{db}->lookup("ID:$id")) { 246 my @kids = (); 247 if ((!defined $localto) || (($entry->getValue('location') || '') eq $localto)) { 248 @kids = map { $self->gentoc($doc, $_, $show, $lists, $types, $localto, $selfid) } 249 @{ $entry->getValue('children') || [] }; } 250 my $type = $entry->getValue('type'); 251 my $role = $entry->getValue('role'); 252 if (($types ? ($type = $entry->getValue('type')) && $$types{$type} : 1) 253 && inlist_match($lists, $entry->getValue('inlist'))) { 254 return $self->gentocentry($doc, $entry, $selfid, $show, @kids); } 255 else { 256 return @kids; } } 257 else { 258 return (); } } 259 260sub inlist_match { 261 my ($listsa, $listsb) = @_; 262 return ($listsa && $listsb && grep { $$listsb{$_} } keys %$listsa); } 263 264# Experimental show pattern: before < filling > after 265sub gentocentry { 266 my ($self, $doc, $entry, $selfid, $show, @children) = @_; 267 my $id = $entry->getValue('id'); 268 my $type = $entry->getValue('type'); 269 my $typename = $type; $typename =~ s/^ltx://; 270 my ($before, $after); 271 if ($show =~ /^(.*?)\<(.*?)$/) { $before = $1; $show = $2; } 272 if ($show =~ /^(.*?)\>(.*?)$/) { $show = $1; $after = $2; } 273 # Good candidate for before = thumbnail 274 return (['ltx:tocentry', 275 { class => "ltx_tocentry_$typename" 276 . (defined $selfid && ($selfid eq $id) ? ' ltx_ref_self' : "") }, 277 ($before ? $self->generateRef_simple($doc, $id, $before) : ()), 278 ['ltx:ref', { show => $show, idref => $id }], 279 ($after ? $self->generateRef_simple($doc, $id, $after) : ()), 280 (@children ? (['ltx:toclist', { class => "ltx_toclist_$typename" }, @children]) : ())]); } 281 282# Generate a "context" TOC, that shows what's on the current page, 283# but also shows the page in the context of it's siblings & ancestors. 284# This is useful for putting in a navigation bar. 285sub gentoc_context { 286 my ($self, $doc, $id, $show, $lists, $types) = @_; 287 if (my $entry = $$self{db}->lookup("ID:$id")) { 288 # Generate Downward TOC covering items WITHIN the current page. 289 my @navtoc = $self->gentoc($doc, $id, $show, $lists, $types, $entry->getValue('location') || '', $id); 290 # Then enclose it upwards along with siblings & ancestors 291 my $p_id; 292 while (($p_id = $entry->getValue('parent')) && ($entry = $$self{db}->lookup("ID:$p_id"))) { 293 @navtoc = 294 map { 295 ($_->getValue('id') eq $id 296 ? @navtoc 297 : $self->gentocentry($doc, $_, undef, $show)) } 298 grep { $$normaltoctypes{ $_->getValue('type') } } # or should we use @inlist??? 299 map { $$self{db}->lookup("ID:$_") } 300 @{ $entry->getValue('children') || [] }; 301 if (($types ? $$types{ $entry->getValue('type') } : 1) 302 && $entry->getValue('parent')) { 303 @navtoc = ($self->gentocentry($doc, $entry, undef, $show, @navtoc)); } 304 $id = $p_id; } 305 return @navtoc; } 306 else { 307 return (); } } 308 309sub fill_in_frags { 310 my ($self, $doc) = @_; 311 my $n = 0; 312 my $db = $$self{db}; 313 # Any nodes with an ID will get a fragid; 314 # This is the id/name that will be used within xhtml/html. 315 foreach my $node ($doc->findnodes('//@xml:id')) { 316 if (my $entry = $db->lookup("ID:" . $node->value)) { 317 if (my $fragid = $entry->getValue('fragid')) { 318 $n++; 319 $node->parentNode->setAttribute(fragid => $fragid); } } } 320 Debug("Filled in fragment $n ids") if $LaTeXML::DEBUG{crossref}; 321 return; } 322 323# Fill in content text for any <... @idref..>'s or @labelref 324sub fill_in_refs { 325 my ($self, $doc) = @_; 326 my $db = $$self{db}; 327 my $n = 0; 328 foreach my $ref ($doc->findnodes('descendant::*[@idref or @labelref]')) { 329 my $tag = $doc->getQName($ref); 330 next if $tag eq 'ltx:XMRef'; # Blech; list those TO fill-in, or list those to exclude? 331 my $id = $ref->getAttribute('idref'); 332 my $show = $ref->getAttribute('show'); 333 $show = $$self{ref_show} unless $show; 334 if (!$id) { 335 if (my $label = $ref->getAttribute('labelref')) { 336 my $entry; 337 if (($entry = $db->lookup($label)) && ($id = $entry->getValue('id'))) { 338 $ref->setAttribute(idref => $id); } 339 else { 340 $self->note_missing('warn', 'Target for Label', $label); 341 my $cl = $ref->getAttribute('class'); 342 $ref->setAttribute(class => ($cl ? $cl . ' ltx_missing_label' : 'ltx_missing_label')); 343 if (!$ref->textContent) { 344 $doc->addNodes($ref, $label); # Just to reassure (?) readers. 345 $ref->setAttribute(broken => 1); } 346 } } } 347 348 if ($id) { 349 $n++; 350 if (!$ref->getAttribute('href')) { 351 if (my $url = $self->generateURL($doc, $id)) { 352 $ref->setAttribute(href => $url); } } 353 if (!$ref->getAttribute('title')) { 354 if (my $titlestring = $self->generateTitle($doc, $id)) { 355 $ref->setAttribute(title => $titlestring); } } 356 if (!$ref->textContent && !element_nodes($ref) 357 && !(($tag eq 'ltx:graphics') || ($tag eq 'ltx:picture'))) { 358 my $is_nameref = ($ref->getAttribute('class')||'') =~ 'ltx_refmacro_nameref'; 359 $doc->addNodes($ref, $self->generateRef($doc, $id, $show, $is_nameref)); } 360 if (my $entry = $$self{db}->lookup("ID:$id")) { 361 $ref->setAttribute(stub => 1) if $entry->getValue('stub'); } 362 } } 363 Debug("Filled in $n refs") if $LaTeXML::DEBUG{crossref}; 364 return; } 365 366# similar sorta thing for RDF about & resource labels & ids 367sub fill_in_RDFa_refs { 368 my ($self, $doc) = @_; 369 my $db = $$self{db}; 370 my $n = 0; 371 foreach my $key (qw(about resource)) { 372 foreach my $ref ($doc->findnodes('descendant::*[@' . $key . 'idref or @' . $key . 'labelref]')) { 373 my $id = $ref->getAttribute($key . 'idref'); 374 if (!$id) { 375 if (my $label = $ref->getAttribute($key . 'labelref')) { 376 my $entry; 377 if (($entry = $db->lookup($label)) && ($id = $entry->getValue('id'))) { 378 $ref->setAttribute($key . 'idref' => $id); } 379 else { 380 $self->note_missing('warn', "Target for $key Label", $label); 381 } } } 382 if ($id) { 383 $n++; 384 if (!$ref->getAttribute($key)) { 385 if ($db->lookup("ID:" . $id)) { # RDF "id" need not be real, valid, ids!!! 386 if (my $url = $self->generateURL($doc, $id)) { 387 $ref->setAttribute($key => $url); } } 388 else { 389 $ref->setAttribute($key => '#' . $id); } } 390 } } } 391 set_RDFa_prefixes($doc->getDocument, {}); # what prefixes?? 392 Debug("Filled in $n RDFa refs") if $LaTeXML::DEBUG{crossref}; 393 return; } 394 395sub fill_in_mathlinks { 396 my ($self, $doc) = @_; 397 my $db = $$self{db}; 398 my $n = 0; 399 foreach my $sym ($doc->findnodes('descendant::*[@decl_id or @meaning]')) { 400 my $tag = $doc->getQName($sym); 401 next if $tag eq 'ltx:XMRef'; # Blech; list those TO fill-in, or list those to exclude? 402 next if $sym->hasAttribute('href'); 403 my $decl_id = $sym->getAttribute('decl_id'); 404 my $meaning = $sym->getAttribute('meaning'); 405 my $entry; 406 if ($decl_id 407 && !$doc->findnodes('ancestor::ltx:glossaryphrase | ancestor::ltx:declare[@type]', $sym)) { 408 $entry = $$self{db}->lookup("DECLARATION:local:$decl_id"); } 409 elsif ($meaning) { 410 $entry = $$self{db}->lookup("DECLARATION:global:$meaning"); } 411 if ($entry) { 412 $n++; 413 ## HACK: DLMF copies $meaning to ltxx:meaning for search indexing 414 ## This should evolve into using (future) mml @mathrole? 415## if ($meaning && $$doc{namespaces}{ltxx}) { 416## $$node[1]{'ltxx:meaning'} = $meaning; } 417 if (my $id = $self->getIDForDeclaration($entry)) { # Where defined 418 $sym->setAttribute(href => $self->generateURL($doc, $id)); 419 if (my $tag = $entry->getValue('tag:short') || $entry->getValue('description')) { 420 $sym->setAttribute(title => getTextContent($doc, $tag)); } 421 } } } 422 Debug("Filled in $n math links") if $LaTeXML::DEBUG{crossref}; 423 return; } 424 425# Given a declaration entry (ltx:declare, or ltx:mark or ...) 426# Return the id of an appropriate link target. 427# Basically this is the parent, except (DLMF specific?) it should be a table ROW, not CELL 428# Or the numbered equationgroup, not the unnumbered equation 429sub getIDForDeclaration { 430 my ($self, $entry) = @_; 431 if (my $pid = $entry && $entry->getValue('parent')) { 432 if (my $pentry = $$self{db}->lookup("ID:$pid")) { 433 my $ptype = $pentry->getValue('type') || ''; 434 # If definition is in a table cell, the correct id will be that of the row 435 if ($ptype eq 'ltx:td') { 436 if (my $gpid = $pentry->getValue('parent')) { 437 return $gpid; } } 438 # If definition is in unnumbered equation within an equation group, use id of the group 439 elsif (($ptype eq 'ltx:equation') && !$pentry->getValue('refnum')) { 440 if (my $gpid = $pentry->getValue('parent')) { 441 if (my $gpentry = $$self{db}->lookup("ID:$gpid")) { 442 my $gptype = $gpentry->getValue('type') || ''; 443 if ($gptype eq 'ltx:equationgroup') { 444 return $gpid; } } } } 445 } 446 return $pid; } } 447 448# Needs to evolve into the combined stuff that we had in DLMF. 449# (eg. concise author/year combinations for multiple bibrefs) 450sub fill_in_bibrefs { 451 my ($self, $doc) = @_; 452 my $n = 0; 453 foreach my $bibref ($doc->findnodes('descendant::ltx:bibref')) { 454 $n++; 455 $doc->replaceNode($bibref, $self->make_bibcite($doc, $bibref)); } 456 Debug("Filled in $n bibrefs") if $LaTeXML::DEBUG{crossref}; 457 return; } 458 459# Given a list of bibkeys, construct links to them. 460# Mostly tuned to author-year style. 461# Combines when multiple bibitems share the same authors. 462sub make_bibcite { 463 my ($self, $doc, $bibref) = @_; 464 465 my @keys = grep { $_ } split(/,/, $bibref->getAttribute('bibrefs') || ''); 466 my $show = $bibref->getAttribute('show'); 467 my @preformatted = $bibref->childNodes(); 468 if ($show && ($show eq 'none') && !@preformatted) { 469 $show = 'refnum'; } 470 if (!$show) { 471 $show = 'refnum'; } 472 if ($show eq 'nothing') { # Ad Hoc support for \nocite!t 473 return (); } 474 my $sep = $bibref->getAttribute('separator') || ','; 475 my $yysep = $bibref->getAttribute('yyseparator') || ','; 476 my @phrases = element_nodes($bibref); # get the ltx;bibrefphrase's in the bibref! 477 # Collect all the data from the bibliography 478 my @data = (); 479 my @lists = split(/\s+/, $bibref->getAttribute('inlist') || 'bibliography'); 480 foreach my $key (@keys) { 481 my ($bentry, $id, $entry); 482 # NOTE: bibkeys are downcased when we look them up! 483 foreach my $list (@lists) { # Find the first of the lists that contains this bibkey 484 $bentry = $$self{db}->lookup("BIBLABEL:" . $list . ':' . lc($key)); 485 last if $bentry; } 486 if ($bentry 487 && ($id = $bentry->getValue('id')) 488 && ($entry = $$self{db}->lookup("ID:$id"))) { 489 my $authors = $entry->getValue('authors'); 490 my $fauthors = $entry->getValue('fullauthors'); 491 my $keytag = $entry->getValue('keytag'); 492 my $year = $entry->getValue('year'); 493 my $typetag = $entry->getValue('typetag'); 494 my $number = $entry->getValue('number'); 495 my $title = $entry->getValue('title'); 496 my $refnum = $entry->getValue('refnum'); # This come's from the \bibitem, w/o BibTeX 497 my ($rawyear, $suffix); 498 499 my $titlestring = undef; 500 if (defined $title) { 501 $titlestring = $title->textContent; 502 $titlestring =~ s/^\s+//; # Trim leading whitespace 503 $titlestring =~ s/\s+$//; # and trailing 504 $titlestring =~ s/\s+/ /gs; } # and normalize all other whitespace. 505 if ($year && ($year->textContent) =~ /^(\d\d\d\d)(\w)$/) { 506 ($rawyear, $suffix) = ($1, $2); } 507 $show = 'refnum' unless ($show eq 'none') || $authors || $fauthors || $keytag; # Disable author-year format! 508 # fullnames ? 509 push(@data, { 510 key => $key, 511 authors => [$doc->trimChildNodes($authors || $fauthors || $keytag)], 512 fullauthors => [$doc->trimChildNodes($fauthors || $authors || $keytag)], 513 authortext => ($authors || $fauthors ? ($authors || $fauthors)->textContent : ''), 514 year => [$doc->trimChildNodes($year || $typetag)], 515 rawyear => $rawyear, 516 suffix => $suffix, 517 number => [$doc->trimChildNodes($number)], 518 refnum => [$doc->trimChildNodes($refnum)], 519 title => [$doc->trimChildNodes($title || $keytag)], 520 attr => { idref => $id, 521 href => orNull($self->generateURL($doc, $id)), 522 ($titlestring ? (title => $titlestring) : ()) } }); } 523 else { 524 $self->note_missing('warn', 'Entry for citation', $key); 525 push(@data, { key => $key, refnum => [$key], title => [$key], year => [], 526 attr => { idref => $key, title => $key, class => "ltx_missing_citation" } }); 527 } } 528 my $checkdups = ($show =~ /author/i) && ($show =~ /(year|number)/i); 529 my @refs = (); 530 my $saveshow = $show; 531 while (@data) { 532 my $datum = shift(@data); 533 my $didref = 0; 534 my @stuff = (); 535 $show = $saveshow; 536 if (($show eq 'none') && @preformatted) { 537 @stuff = @preformatted; $show = ''; } 538 elsif ($$datum{attr}{class} && ($$datum{attr}{class} eq 'ltx_missing_citation')) { 539 @stuff = (['ltx:ref', $$datum{attr}, $$datum{key}]); 540 $didref = 1; 541 $show = ''; 542 } 543 544 # Add delimeters for parsing... 545 $show =~ s/(\w)year/$1\{\}year/gi; 546 $show =~ s/(\w)phrase/$1\{\}phrase/gi; 547 while ($show) { 548 if ($show =~ s/^(\w+)//) { 549 my $role = lc($1); $role =~ s/s$//; # remove trailing plural 550 if ($role eq 'author') { 551 push(@stuff, $doc->cloneNodes(@{ $$datum{authors} })); } 552 elsif ($role eq 'fullauthor') { 553 push(@stuff, $doc->cloneNodes(@{ $$datum{fullauthors} })); } 554 elsif ($role eq 'title') { 555 push(@stuff, $doc->cloneNodes(@{ $$datum{title} })); } 556 elsif ($role eq 'refnum') { 557 push(@stuff, $doc->cloneNodes(@{ $$datum{refnum} })); } 558 elsif ($role =~ /^phrase(\d)$/) { 559 # HACK! Avoid empty () from situations where we've set the show (CITE_STYLE) too early 560 # and don't actually have author-year information! 561 my $n = $1; 562 if (($n == 1) && ($show =~ /^\{\}year\{\}phrase2/i) && !scalar(@{ $$datum{year} }) 563 && (!$phrases[0] || (length($phrases[0]->textContent) <= 1)) 564 && (!$phrases[1] || (length($phrases[1]->textContent) <= 1))) { 565 $show =~ s/^\{\}year\{\}phrase2//i; } 566 else { 567 push(@stuff, $phrases[$n - 1]->childNodes) if $phrases[$n - 1]; } } 568 elsif ($role eq 'year') { 569 if (!$$datum{year}) { 570 $self->note_missing('warn', 'Date for citation', $$datum{key}); } 571 elsif (@{ $$datum{year} }) { 572 push(@stuff, ['ltx:ref', $$datum{attr}, @{ $$datum{year} }]); 573 $didref = 1; 574 while ($checkdups && @data && ($$datum{authortext} eq $data[0]{authortext})) { 575 my $next = shift(@data); 576 push(@stuff, $yysep, ' '); 577 if ((($$datum{rawyear} || 'no_year_1') eq ($$next{rawyear} || 'no_year_2')) && $$next{suffix}) { 578 push(@stuff, ['ltx:ref', $$next{attr}, $$next{suffix}]); } 579 else { 580 push(@stuff, ['ltx:ref', $$next{attr}, @{ $$next{year} }]); } } } } 581 elsif ($role eq 'number') { 582 push(@stuff, ['ltx:ref', $$datum{attr}, @{ $$datum{number} }]); 583 $didref = 1; 584 while ($checkdups && @data && ($$datum{authortext} eq $data[0]{authortext})) { 585 my $next = shift(@data); 586 push(@stuff, $yysep, ' ', ['ltx:ref', $$next{attr}, @{ $$next{number} }]); } } 587 elsif ($role eq 'super') { 588 my @r = (); 589 push(@r, ['ltx:ref', $$datum{attr}, @{ $$datum{number} }]); 590 $didref = 1; 591 while ($checkdups && @data && ($$datum{authortext} eq $data[0]{authortext})) { 592 my $next = shift(@data); 593 push(@r, $yysep, ' ', ['ltx:ref', $$next{attr}, @{ $$next{number} }]); } 594 push(@stuff, ['ltx:sup', {}, @r]); } 595 else { 596 Info('unexpected', $role, $doc, "CITE ignoring show key '$role'"); } } 597 elsif ($show =~ s/^\{([^\}]*)\}//) { # pass-thru literal, quoted with {} 598 push(@stuff, $1) if $1; } 599 elsif ($show =~ s/^~//) { # Pass-thru spaces 600 push(@stuff, $NBSP) if @stuff; } 601 elsif ($show =~ s/^(\s+)//) { # Pass-thru spaces 602 push(@stuff, $1) if @stuff; } 603 elsif ($show =~ s/^(\W+)//) { # Pass-thru non show keywords 604 push(@stuff, $1); } } 605 push(@refs, 606 (@refs ? ($sep, ' ') : ()), 607 ($didref ? @stuff : (['ltx:ref', $$datum{attr}, @stuff]))); } 608 return @refs; } 609 610sub generateURL { 611 my ($self, $doc, $id) = @_; 612 my ($object, $location); 613 if ($object = $$self{db}->lookup("ID:" . $id)) { 614 if ($location = $object->getValue('location')) { 615 my $doclocation = $doc->siteRelativeDestination; 616 my $pathdir = pathname_directory($doclocation); 617 my $url = pathname_relative(($location =~ m|^/| ? $location : '/' . $location), 618 ($pathdir =~ m|^/| ? $pathdir : '/' . $pathdir)); 619 my $extension = $$self{extension} || 'xml'; 620 my $urlstyle = $$self{urlstyle} || 'file'; 621 if ($urlstyle eq 'server') { 622 # Remove trailing index.$extension but be careful not to leave url empty! (then it's "self") 623 $url =~ s/(^|\/)index.\Q$extension\E$/($1 ? $1 : '.\/')/e; } 624 elsif ($urlstyle eq 'negotiated') { 625 $url =~ s/\.\Q$extension\E$//; # Remove trailing $extension 626 $url =~ s/(^|\/)index$/$1/; # AND trailing index 627 } 628 $url = '.' unless $url; 629 if (my $fragid = $object->getValue('fragid')) { 630 $url = '' if ($url eq '.') or ($location eq $doclocation); 631 $url .= '#' . $fragid; } 632 elsif ($location eq $doclocation) { 633 $url = ''; } 634 return $url; } 635 else { 636 $self->note_missing('warn', 'File location for ID', $id); } } 637 else { 638 $self->note_missing('warn', 'DB Entry for ID', $id); } 639 return; } 640 641# Generate the contents of a <ltx:ref> of the given id. 642# show is a string containing substrings 'type', 'refnum' and 'title' 643# (standing for the type prefix, refnum and title of the id'd object) 644# and any other random characters; the 645sub generateRef { 646 my ($self, $doc, $reqid, $reqshow, $is_nameref) = @_; 647 my $pending = ''; 648 my @stuff; 649 # Try the requested show pattern, and if it fails, try a fallback of just the title or refnum 650 foreach my $show (($reqshow, ($reqshow !~ /title/ ? "title" : "refnum"))) { 651 my $id = $reqid; 652 # Start with requested ID, add some from parent(s), if needed/until to make "useful" link content 653 while (my $entry = $id && $$self{db}->lookup("ID:$id")) { 654 if (my @s = $self->generateRef_aux($doc, $entry, $show, $is_nameref)) { 655 push(@stuff, $pending) if $pending; 656 push(@stuff, @s); 657 return @stuff if $self->checkRefContent($doc, @stuff); 658 $pending = $$self{ref_join}; } # inside/outside this brace determines if text can START with the join. 659 $id = $entry->getValue('parent'); } } 660 if (!@stuff) { # Try first child for a title-less document? 661 if (my $entry = $$self{db}->lookup("ID:$reqid")) { 662 if (($entry->getValue('type') || '') eq 'ltx:document') { 663 foreach my $c (@{ $entry->getValue('children') }) { 664 if (my $centry = $$self{db}->lookup("ID:$c")) { 665 if (my @s = $self->generateRef_aux($doc, $centry, $reqshow, $is_nameref)) { 666 push(@stuff, @s); last; } } } } } } 667 if (@stuff) { 668 return @stuff; } 669 else { 670 $self->note_missing('info', 'Usable title for ID', $reqid); 671 return ($reqid); } } # id is crummy, but better than "?"... or? 672 673# Just return the reqshow value for $reqid, or nothing 674sub generateRef_simple { 675 my ($self, $doc, $reqid, $reqshow) = @_; 676 my $pending = ''; 677 my @stuff; 678 if (my $entry = $reqshow && $reqid && $$self{db}->lookup("ID:$reqid")) { 679 return $self->generateRef_aux($doc, $entry, $reqshow); } 680 return (); } 681 682# Check if the proposed content of a <ltx:ref> is "Good Enough" 683# (long enough, unique enough to give reader feedback,...) 684sub checkRefContent { 685 my ($self, $doc, @stuff) = @_; 686 # Length? having _some_ actual text ? 687 my $s = text_content(@stuff); 688 # Could compare a minum length 689 # But perhaps this is better: check that there's some "text", not just symbols! 690 $s =~ s/\bin\s+//g; 691 return ($s =~ /\w/ ? 1 : 0); } 692 693sub text_content { 694 my (@stuff) = @_; 695 return join('', map { text_content_aux($_) } @stuff); } 696 697sub text_content_aux { 698 my ($n) = @_; 699 my $r = ref $n; 700 if (!$r) { 701 return $n; } 702 elsif ($r eq 'ARRAY') { 703 my ($t, $a, @c) = @$n; 704 return text_content(@c); } 705 elsif ($r =~ /^XML::/) { 706 return $n->textContent; } 707 else { 708 return $n; } } 709 710my %ref_fallbacks = ( # Alternative fields, when not found 711 typerefnum => [qw(refnum)], 712 rrefnum => [qw(typerefnum frefnum refnum)], # obsolete? 713 toctitle => [qw(title toccaption)], 714 title => [qw(toccaption)], 715 rawtoctitle => [qw(toctitle title toccaption)], 716 rawtitle => [qw(title toccaption)], 717); 718 719# Generate text to fill in an ltx:ref from a database entry for some object. 720# The show pattern indicates what data to use; usually a single keyword 721# (or keywords separated by spaces, ~ or {} enclosed literal text) 722# The keywords are things like refnum, title, caption, etc 723# (possibly coming from ltx:tag or other data; see Scan) 724sub generateRef_aux { 725 my ($self, $doc, $entry, $show, $is_nameref) = @_; 726 my @stuff = (); 727 my $OK = 0; 728 while ($show) { 729 if ($show =~ s/^(\w+)//) { # peel off next keyword 730 my $key = lc($1); 731 my $class = ($key =~ /title/ ? 'ltx_ref_title' : 'ltx_ref_tag'); 732 my @keys = ($key, 'tag:' . $key, 733 ($ref_fallbacks{$key} ? @{ $ref_fallbacks{$key} } : ())); 734 my $value; 735 foreach my $k (@keys) { # lookup the data for that keyword (or an alternative) 736 $value = $entry->getValue($k); 737 last if $value; } 738 if ($value) { 739 $OK = 1; 740 if ($is_nameref) { 741 # yank out the tag if this is nameref 742 my ($first_child) = element_nodes($value); 743 $first_child->unbindNode if $first_child && ($doc->getQName($first_child) eq 'ltx:tag'); } 744 push(@stuff, ['ltx:text', { class => $class }, $self->prepRefText($doc, $value)]); } } 745 elsif ($show =~ s/^\{([^\}]*)\}//) { # pass-thru literal, quoted with {} 746 push(@stuff, $1) if $1; } 747 elsif ($show =~ s/^~//) { # Pass-thru spaces 748 push(@stuff, $NBSP) if @stuff; } 749 elsif ($show =~ s/^(\s+)//) { # Pass-thru spaces 750 push(@stuff, $1) if @stuff; } 751 elsif ($show =~ s/^(\W+)//) { # Pass-thru non show keywords 752 push(@stuff, $1); } } 753 # Maybe nothing found for this entry (probably retry on parent?) 754 return ($OK ? @stuff : ()); } 755 756sub prepRefText { 757 my ($self, $doc, $title) = @_; 758 return $doc->cloneNodes($doc->trimChildNodes($self->fillInTitle($doc, $title))); } 759 760sub prepRawRefText { 761 my ($self, $doc, $title) = @_; 762 my $node = $self->prepRefText($doc, $title); 763 if ($doc->getQName($node) =~ /^ltx:(?:toc)title$/) { # Trim tags from titles 764 my ($first) = element_nodes($node); 765 if ($first && ($doc->getQName($first) eq 'ltx:tag')) { 766 $node->removeChild($first); } } 767 return $node; } 768 769# Generate a title string for ltx:ref 770sub generateTitle { 771 my ($self, $doc, $id) = @_; 772 # Add author, if any ??? 773 my $string = ""; 774 my $altstring = ""; 775 while (my $entry = $id && $$self{db}->lookup("ID:$id")) { 776 my $title = $self->fillInTitle($doc, 777### $entry->getValue('title') || $entry->getValue('rrefnum') 778 $entry->getValue('title') || $entry->getValue('typerefnum') 779 || $entry->getValue('frefnum') || $entry->getValue('refnum')); 780 # $title = $title->textContent if $title && ref $title; 781 $title = getTextContent($doc, $title) if $title && ref $title; 782 if ($title) { 783 $string .= $$self{ref_join} if $string; 784 $string .= $title; } 785 $id = $entry->getValue('parent'); } 786 return $string || $altstring; } 787 788sub getTextContent { 789 my ($doc, $title) = @_; 790 $title = getTextContent_rec($doc, $title) if $title && ref $title; 791 $title =~ s/^\s+//s if $title; # Trim leading whitespace 792 $title =~ s/\s+$//s if $title; # and trailing 793 $title =~ s/\s+/ /gs if $title; # and normalize all other whitespace. 794 return $title; } 795 796sub getTextContent_rec { 797 my ($doc, $node) = @_; 798 my $type = $node->nodeType; 799 if ($type == XML_TEXT_NODE) { 800 return $node->textContent; } 801 elsif ($type == XML_ELEMENT_NODE) { 802 my $tag = $doc->getQName($node); 803 if ($tag eq 'ltx:tag') { 804 return ($node->getAttribute('open') || '') 805 . $node->textContent # assuming no nested ltx:tag 806 . ($node->getAttribute('close') || ''); } 807 else { 808 return join('', map { getTextContent_rec($doc, $_); } $node->childNodes); } } 809 elsif ($type == XML_DOCUMENT_FRAG_NODE) { 810 return join('', map { getTextContent_rec($doc, $_); } $node->childNodes); } 811 else { 812 return ''; } } 813 814# Fill in any embedded ltx:ref's & ltx:cite's within a title 815sub fillInTitle { 816 my ($self, $doc, $title) = @_; 817 return $title unless $title && ref $title; 818 # Fill in any nested ref's! 819 foreach my $ref ($doc->findnodes('descendant::ltx:ref[@idref or @labelref]', $title)) { 820 next if $ref->textContent; 821 my $show = $ref->getAttribute('show'); 822 $show = $$self{ref_show} unless $show; 823 my $refentry; 824 if (my $id = $ref->getAttribute('idref')) { 825 $refentry = $$self{db}->lookup("ID:$id"); } 826 elsif (my $label = $ref->getAttribute('labelref')) { 827 $refentry = $$self{db}->lookup($label); 828 if ($id = $refentry->getValue('id')) { 829 $refentry = $$self{db}->lookup("ID:$id"); } } 830 if ($refentry) { 831 $doc->replaceNode($ref, $self->generateRef_aux($doc, $refentry, $show)); } } 832 # Fill in (replace, actually) any embedded citations. 833 foreach my $bibref ($doc->findnodes('descendant::ltx:bibref', $title)) { 834 $doc->replaceNode($bibref, $self->make_bibcite($doc, $bibref)); } 835 foreach my $break ($doc->findnodes('descendant::ltx:break', $title)) { 836 $doc->replaceNode($break, ['ltx:text', {}, " "]); } 837 return $title; } 838 839sub fillInGlossaryRef { 840 my ($self, $doc) = @_; 841 my $n = 0; 842 foreach my $ref ($doc->findnodes('descendant::ltx:glossaryref')) { 843 $n++; 844 my $key = $ref->getAttribute('key'); 845 my @lists = split(/\s+/, $ref->getAttribute('inlist') || 'glossary'); 846 my $show = $ref->getAttribute('show'); 847 my ($list, $entry) = ('', undef); 848 foreach my $alist (@lists) { # Find list with this key 849 if ($entry = $$self{db}->lookup(join(':', 'GLOSSARY', $alist, $key))) { 850 $list = $alist; last; } } 851 if ($entry) { 852 my $title = $entry->getValue('phrase:definition'); 853 if (!$ref->getAttribute('title') && $title) { 854 $ref->setAttribute(title => $title->textContent); } 855 if (my $id = $entry->getValue('id')) { 856 $ref->setAttribute(idref => $id); } 857 if (!$ref->textContent && !element_nodes($ref)) { 858 my @stuff = $self->generateGlossaryRefTitle($doc, $entry, $show); 859 if (@stuff) { 860 $doc->addNodes($ref, @stuff); } 861 else { 862 $self->note_missing('warn', "Glossary ($list) contents ($show) for key", $key); 863 $doc->addNodes($ref, $key); 864 $doc->addClass($ref, 'ltx_missing'); } } } 865 else { 866 $self->note_missing('warn', "Glossary ($list) Entry for key", $key); } 867 if (!$ref->textContent && !element_nodes($ref)) { 868 $doc->addNodes($ref, $key); 869 $doc->addClass($ref, 'ltx_missing'); } } 870 Debug("Filled in $n glossaryrefs") if $LaTeXML::DEBUG{crossref}; 871 return; } 872 873sub generateGlossaryRefTitle { 874 my ($self, $doc, $entry, $show) = @_; 875 my $phrases = $entry->getValue('phrases'); 876 my @stuff = (); 877 if (my $phrase = $entry->getValue('phrase:' . $show)) { 878 push(@stuff, ['ltx:text', { class => 'ltx_glossary_' . $show }, 879 $self->prepRefText($doc, $phrase)]); } 880 elsif ($show =~ /^(\w+)-plural$/) { 881 my $sh = $1; 882 if (my $phrase = $entry->getValue('phrase:' . $sh)) { 883 push(@stuff, ['ltx:text', { class => 'ltx_glossary_' . $show }, 884 $self->prepRefText($doc, $phrase), 's']); } } 885 elsif ($show =~ /^(\w+)-indefinite$/) { 886 my $sh = $1; 887 if (my $phrase = $entry->getValue('phrase:' . $sh)) { 888 my $s = $phrase->textContent; 889 my $art = ($s =~ /^[aeiou]/i ? 'an ' : 'a '); 890 push(@stuff, ['ltx:text', { class => 'ltx_glossary_' . $show }, 891 $art, $self->prepRefText($doc, $phrase)]); } } 892 return @stuff; } 893 894sub orNull { 895 return (grep { defined } @_) ? @_ : undef; } 896 897# Possibly this needs support from Scan, as well? 898# to manage resources, record in Manifest, something like that? 899sub copy_resources { 900 my ($self, $doc) = @_; 901 # Copy any "resources" linked from the document 902 my $paths = [$doc->getSearchPaths]; 903 foreach my $n ($doc->findnodes('//ltx:ref[@href and not(@idref) and not(@labelref)]')) { 904 my $url = $n->getAttribute('href'); 905 if ($url !~ /^(\w+:|\/)/) { # relative path? (No explicit protocol, or absolute) 906 if (my $src = pathname_find($url, paths => $paths)) { # AND if file exists there. 907 my $dst = $doc->checkDestination($url); 908 pathname_copy($src, $dst); 909 } } } 910 return; } 911 912# ================================================================================ 9131; 914