1# /=====================================================================\ #
2# |  LaTeXML::Post::CrossRef                                            | #
3# | Scan for ID's etc                                                   | #
4# |=====================================================================| #
5# | Part of LaTeXML:                                                    | #
6# |  Public domain software, produced as part of work done by the       | #
7# |  United States Government & not subject to copyright in the US.     | #
8# |---------------------------------------------------------------------| #
9# | Bruce Miller <bruce.miller@nist.gov>                        #_#     | #
10# | http://dlmf.nist.gov/LaTeXML/                              (o o)    | #
11# \=========================================================ooo==U==ooo=/ #
12
13package LaTeXML::Post::CrossRef;
14use strict;
15use warnings;
16use LaTeXML::Util::Pathname;
17use LaTeXML::Common::XML;
18use LaTeXML::Common::Error;
19use charnames qw(:full);
20use LaTeXML::Post;
21use base qw(LaTeXML::Post::Processor);
22
23my $NBSP = pack('U', 0xA0);    # CONSTANT
24
25sub new {
26  my ($class, %options) = @_;
27  my $self = $class->SUPER::new(%options);
28  $$self{db}       = $options{db};
29  $$self{urlstyle} = $options{urlstyle};
30##  $$self{toc_show} = ($options{number_sections} ? "typerefnum title" : "title");
31  # Default format for ltx:ref's within TOC's
32  $$self{toc_show} = 'toctitle';
33  # Default format for regular ltx:ref's
34  # [BTW: Does number_sections really still make sense?]
35  $$self{ref_show}       = ($options{number_sections}        ? "refnum"                 : "title");
36  $$self{min_ref_length} = (defined $options{min_ref_length} ? $options{min_ref_length} : 1);
37  $$self{ref_join} = (defined $options{ref_join} ? $options{ref_join} : " \x{2023} "); # or " in " or ... ?
38  $$self{navigation_toc} = $options{navigation_toc};
39  return $self; }
40
41sub process {
42  my ($self, $doc, $root) = @_;
43  local %LaTeXML::Post::CrossRef::MISSING = ();
44  if (my $navtoc = $$self{navigation_toc}) { # If a navigation toc requested, put a toc in nav; will get filled in
45    my $toc = ['ltx:TOC', { format => $navtoc }];
46    if (my $nav = $doc->findnode('//ltx:navigation')) {
47      $doc->addNodes($nav, $toc); }
48    else {
49      $doc->addNodes($doc->getDocumentElement, ['ltx:navigation', {}, $toc]); } }
50  $self->fillInGlossaryRef($doc);
51  $self->fill_in_relations($doc);
52  $self->fill_in_tocs($doc);
53  $self->fill_in_frags($doc);
54  $self->fill_in_refs($doc);
55  $self->fill_in_RDFa_refs($doc);
56  $self->fill_in_bibrefs($doc);
57  $self->fill_in_mathlinks($doc);
58  $self->copy_resources($doc);
59
60  if (keys %LaTeXML::Post::CrossRef::MISSING) {
61    my $tempid = 0;
62    foreach my $severity (qw(error warn info)) {
63      my @msgs = ();
64      foreach my $type (sort keys %{ $LaTeXML::Post::CrossRef::MISSING{$severity} }) {
65        my @items = keys %{ $LaTeXML::Post::CrossRef::MISSING{$severity}{$type} };
66        $tempid ||= grep { $_ eq 'TEMPORARY_DOCUMENT_ID' } @items;
67        my @args = ('expected', 'ids', undef,
68          "Missing $type: " . join(',', @items),
69          ($tempid ? "[Note TEMPORARY_DOCUMENT_ID is a stand-in ID for the main document.]" : ()));
70        if    ($severity eq 'error') { Error(@args); }
71        elsif ($severity eq 'warn')  { Warn(@args); }
72        elsif ($severity eq 'info')  { Info(@args); } } } }
73  return $doc; }
74
75sub note_missing {
76  my ($self, $severity, $type, $key) = @_;
77  $LaTeXML::Post::CrossRef::MISSING{$severity}{$type}{$key}++;
78  return; }
79
80sub fill_in_relations {
81  my ($self, $doc) = @_;
82  my $db = $$self{db};
83  if (my $id = $doc->getDocumentElement->getAttribute('xml:id')) {
84    if (my $entry = $db->lookup("ID:" . $id)) {
85      # First, add the basic relations
86      my $x;
87      # Apparently, "up", "up up", "up up up" is the desired form for html5
88      my $xentry = $entry;
89      my $rel    = 'up';
90      while (($x = $xentry->getValue('parent')) && ($xentry = $db->lookup("ID:" . $x))) {
91        if ($xentry->getValue('title')) {    # it's interesting if it has a title (INCONSISTENT!!!)
92          ### NOT pageid, like the others, because of the sleasy link to \part in dlmf!!!
93          $doc->addNavigation($rel => $xentry->getValue('id'));
94          $rel .= ' up'; } }
95      if ($xentry && ($id ne $xentry->getValue('pageid'))) {
96        $doc->addNavigation(start => $xentry->getValue('pageid')); }
97      if (my $prev = $self->findPreviousPage($entry)) {    # previous page
98        $doc->addNavigation(prev => $prev->getValue('pageid')); }
99      if (my $next = $self->findNextPage($entry)) {
100        $doc->addNavigation(next => $next->getValue('pageid')); }
101
102      # Now, dig around for other interesting related documents
103      # Use the entry types themselves for the relations
104      $xentry = $entry;
105      # Firstly, look at siblings of this page, then at siblings of parent,
106      # then those of grandparent, etc.
107      # In a large/complex site, this gets way too much. But how to prune?
108      while ($xentry = $self->getParentPage($xentry)) {
109        # any siblings of (grand)parent are "interesting" structural elements
110        # OR, even more interesting: the index, bibliography, glossary related to current page!
111        foreach my $sib ($self->getChildPages($xentry)) {
112          my $sib_id = $sib->getValue('pageid');
113          next if $sib_id eq $id;
114          if ($sib->getValue('primary')) {    # If a primary page
115                                              # Use the element name (w/o prefix) as the relation !!!!
116            my $sib_rel = $sib->getValue('type'); $sib_rel =~ s/^(\w+)://;
117            $doc->addNavigation($sib_rel => $sib_id); }
118          else {                              # Else, consider it as some sort of sidebar.
119            $doc->addNavigation('sidebar' => $sib_id); } } }
120      # Then Look at (only?) 1st level of pages below this one.
121      foreach my $child ($self->getChildPages($entry)) {
122        my $child_id = $child->getValue('pageid');
123        if ($child->getValue('primary')) {    # If a primary page
124                                              # Use the element name (w/o prefix) as the relation !!!!
125          my $child_rel = $child->getValue('type'); $child_rel =~ s/^(\w+)://;
126          $doc->addNavigation($child_rel => $child_id); }
127        else {                                # Else, consider it as some sort of sidebar.
128          $doc->addNavigation('sidebar' => $child_id); } }
129  } }
130  return; }
131
132sub findPreviousPage {
133  my ($self, $entry) = @_;
134  my $page = $entry->getValue('pageid');
135  # Look at parent's entry, and get the list of our siblings
136  if (my $pentry = $self->getParentPage($entry)) {
137    my @sibs = $self->getChildPages($pentry);
138    while (@sibs && $sibs[-1]->getValue('pageid') ne $page) {    # peel off following sibs
139      pop(@sibs); }
140    return unless @sibs && $sibs[-1]->getValue('pageid') eq $page;    # Broken database?
141    pop(@sibs);                                                       # Now skip our own entry ($id)
142    @sibs = grep { $_->getValue('primary') } @sibs;
143    # If there IS a preceding sibling, find it's rightmost descendant
144    while (@sibs) {
145      $pentry = $sibs[-1];
146      @sibs   = grep { $_->getValue('primary') } $self->getChildPages($pentry); }
147    return $pentry; }                                                 # Return deepest page found
148  return; }
149
150sub findNextPage {
151  my ($self, $entry) = @_;
152  # Return first child page, if any
153  my @ch = grep { $_->getValue('primary') } $self->getChildPages($entry);
154  return $ch[0] if @ch;
155  my $page = $entry->getValue('pageid');
156  # Look at parent's entry, and get the list of siblings
157  while ($entry = $self->getParentPage($entry)) {
158    my @sibs = $self->getChildPages($entry);
159    while (@sibs && $sibs[0]->getValue('pageid') ne $page) {    # peel off preceding sibs, till found,
160      shift(@sibs); }
161    return unless @sibs && ($sibs[0]->getValue('pageid') eq $page);    # Broken database?
162    shift(@sibs);                                                      # remove our own entry ($id)
163    @sibs = grep { $_->getValue('primary') } @sibs;                    # Skip uninteresting pages
164    return $sibs[0] if @sibs;
165    $page = $entry->getValue('pageid'); }
166  return; }
167
168sub getParentPage {
169  my ($self, $entry) = @_;
170  my $x;
171  return ($x = $entry->getValue('pageid')) && ($x = $$self{db}->lookup("ID:" . $x))
172    && ($x = $x->getValue('parent')) && ($x = $$self{db}->lookup("ID:" . $x))
173    && ($x = $x->getValue('pageid')) && ($x = $$self{db}->lookup("ID:" . $x))
174    && $x; }
175
176sub getRootPage {
177  my ($self, $entry) = @_;
178  my $x    = $entry;
179  my $root = $entry;
180  while (($x = $x->getValue('parent')) && ($x = $$self{db}->lookup("ID:" . $x))
181    && ($x = $x->getValue('pageid')) && ($x = $$self{db}->lookup("ID:" . $x))) {
182    $root = $x; }
183  return $root; }
184
185# Assuming this entry is for a page, find the closest descendants that are (distinct) pages
186sub getChildPages {
187  my ($self, $entry) = @_;
188  my $page = $entry->getValue('pageid');
189  my @p    = ();
190  foreach my $ch (@{ $entry->getValue('children') || [] }) {
191    if (my $e = $$self{db}->lookup("ID:" . $ch)) {
192      if (my $p = $e->getValue('pageid')) {    # if valid page
193        push(@p, ($p ne $page ? ($e) : $self->getChildPages($e))); } } }
194  return @p; }
195
196# this is probably the same as "Interesting" for the above relations.
197# To make it more extensible, it really should be integrated into the database?
198# Eg. "sectional" things might mark their entries specially?
199my $normaltoctypes = { map { ($_ => 1) }    # CONSTANT
200    qw (ltx:document ltx:part ltx:chapter
201    ltx:section ltx:subsection ltx:subsubsection
202    ltx:paragraph ltx:subparagraph
203    ltx:index ltx:bibliography ltx:glossary ltx:appendix) };
204
205sub fill_in_tocs {
206  my ($self, $doc) = @_;
207  my $n = 0;
208  foreach my $toc ($doc->findnodes('descendant::ltx:TOC[not(ltx:toclist)]')) {
209    $n++;
210    my $selector = $toc->getAttribute('select');
211    my $types;
212    if ($selector) {
213      $types = { map { ($_ => 1) } split(/\s*\|\s*/, $selector) }; }
214    # global vs children of THIS or Document node?
215    my $id     = $doc->getDocumentElement->getAttribute('xml:id');
216    my $scope  = $toc->getAttribute('scope')  || 'current';
217    my $format = $toc->getAttribute('format') || 'normal';
218    my $lists;
219    if (my $listname = $toc->getAttribute('lists')) {
220      $lists = { map { $_ => 1 } split(/\s/, $listname) }; }
221    else {
222      $lists = { toc => 1 }; }
223    if ($scope eq 'global') {
224      if (my $entry = $$self{db}->lookup("ID:" . $id)) {
225        if (my $root = $self->getRootPage($entry)) {
226          $id = $root->getValue('pageid'); } } }
227    my $show = $toc->getAttribute('show') || $$self{toc_show};
228    my @list = ();
229    if (!$format || ($format =~ /^normal/)) {
230      @list = $self->gentoc($doc, $id, $show, $lists, $types); }
231    elsif ($format eq 'context') {
232      $lists = { toc => 1 };
233      @list  = $self->gentoc_context($doc, $id, $show, $lists, $types); }
234    $doc->addNodes($toc, ['ltx:toclist', {}, @list]) if @list; }
235  Debug("Filled in $n TOCs") if $LaTeXML::DEBUG{crossref};
236  return; }
237
238# generate TOC for $id & its children,
239# providing that those objects are of appropriate type.
240# Returns a list of 0 or more ltx:tocentry's (possibly containing ltx:toclist's)
241# Note that parent/child relationships stored in ObjectDB can also reflect less
242# `interesting' objects like para or p style paragraphs, and such.
243sub gentoc {
244  my ($self, $doc, $id, $show, $lists, $types, $localto, $selfid) = @_;
245  if (my $entry = $$self{db}->lookup("ID:$id")) {
246    my @kids = ();
247    if ((!defined $localto) || (($entry->getValue('location') || '') eq $localto)) {
248      @kids = map { $self->gentoc($doc, $_, $show, $lists, $types, $localto, $selfid) }
249        @{ $entry->getValue('children') || [] }; }
250    my $type = $entry->getValue('type');
251    my $role = $entry->getValue('role');
252    if (($types ? ($type = $entry->getValue('type')) && $$types{$type} : 1)
253      && inlist_match($lists, $entry->getValue('inlist'))) {
254      return $self->gentocentry($doc, $entry, $selfid, $show, @kids); }
255    else {
256      return @kids; } }
257  else {
258    return (); } }
259
260sub inlist_match {
261  my ($listsa, $listsb) = @_;
262  return ($listsa && $listsb && grep { $$listsb{$_} } keys %$listsa); }
263
264# Experimental show pattern:  before < filling > after
265sub gentocentry {
266  my ($self, $doc, $entry, $selfid, $show, @children) = @_;
267  my $id       = $entry->getValue('id');
268  my $type     = $entry->getValue('type');
269  my $typename = $type; $typename =~ s/^ltx://;
270  my ($before, $after);
271  if ($show =~ /^(.*?)\<(.*?)$/) { $before = $1; $show  = $2; }
272  if ($show =~ /^(.*?)\>(.*?)$/) { $show   = $1; $after = $2; }
273  # Good candidate for before = thumbnail
274  return (['ltx:tocentry',
275      { class => "ltx_tocentry_$typename"
276          . (defined $selfid && ($selfid eq $id) ? ' ltx_ref_self' : "") },
277      ($before ? $self->generateRef_simple($doc, $id, $before) : ()),
278      ['ltx:ref', { show => $show, idref => $id }],
279      ($after    ? $self->generateRef_simple($doc, $id, $after)                       : ()),
280      (@children ? (['ltx:toclist', { class => "ltx_toclist_$typename" }, @children]) : ())]); }
281
282# Generate a "context" TOC, that shows what's on the current page,
283# but also shows the page in the context of it's siblings & ancestors.
284# This is useful for putting in a navigation bar.
285sub gentoc_context {
286  my ($self, $doc, $id, $show, $lists, $types) = @_;
287  if (my $entry = $$self{db}->lookup("ID:$id")) {
288    # Generate Downward TOC covering items WITHIN the current page.
289    my @navtoc = $self->gentoc($doc, $id, $show, $lists, $types, $entry->getValue('location') || '', $id);
290    # Then enclose it upwards along with siblings & ancestors
291    my $p_id;
292    while (($p_id = $entry->getValue('parent')) && ($entry = $$self{db}->lookup("ID:$p_id"))) {
293      @navtoc =
294        map {
295        ($_->getValue('id') eq $id
296          ? @navtoc
297          : $self->gentocentry($doc, $_, undef, $show)) }
298        grep { $$normaltoctypes{ $_->getValue('type') } }    # or should we use @inlist???
299        map  { $$self{db}->lookup("ID:$_") }
300        @{ $entry->getValue('children') || [] };
301      if (($types ? $$types{ $entry->getValue('type') } : 1)
302        && $entry->getValue('parent')) {
303        @navtoc = ($self->gentocentry($doc, $entry, undef, $show, @navtoc)); }
304      $id = $p_id; }
305    return @navtoc; }
306  else {
307    return (); } }
308
309sub fill_in_frags {
310  my ($self, $doc) = @_;
311  my $n  = 0;
312  my $db = $$self{db};
313  # Any nodes with an ID will get a fragid;
314  # This is the id/name that will be used within xhtml/html.
315  foreach my $node ($doc->findnodes('//@xml:id')) {
316    if (my $entry = $db->lookup("ID:" . $node->value)) {
317      if (my $fragid = $entry->getValue('fragid')) {
318        $n++;
319        $node->parentNode->setAttribute(fragid => $fragid); } } }
320  Debug("Filled in fragment $n ids") if $LaTeXML::DEBUG{crossref};
321  return; }
322
323# Fill in content text for any <... @idref..>'s or @labelref
324sub fill_in_refs {
325  my ($self, $doc) = @_;
326  my $db = $$self{db};
327  my $n  = 0;
328  foreach my $ref ($doc->findnodes('descendant::*[@idref or @labelref]')) {
329    my $tag = $doc->getQName($ref);
330    next if $tag eq 'ltx:XMRef';    # Blech; list those TO fill-in, or list those to exclude?
331    my $id   = $ref->getAttribute('idref');
332    my $show = $ref->getAttribute('show');
333    $show = $$self{ref_show} unless $show;
334    if (!$id) {
335      if (my $label = $ref->getAttribute('labelref')) {
336        my $entry;
337        if (($entry = $db->lookup($label)) && ($id = $entry->getValue('id'))) {
338          $ref->setAttribute(idref => $id); }
339        else {
340          $self->note_missing('warn', 'Target for Label', $label);
341          my $cl = $ref->getAttribute('class');
342          $ref->setAttribute(class => ($cl ? $cl . ' ltx_missing_label' : 'ltx_missing_label'));
343          if (!$ref->textContent) {
344            $doc->addNodes($ref, $label);    # Just to reassure (?) readers.
345            $ref->setAttribute(broken => 1); }
346    } } }
347
348    if ($id) {
349      $n++;
350      if (!$ref->getAttribute('href')) {
351        if (my $url = $self->generateURL($doc, $id)) {
352          $ref->setAttribute(href => $url); } }
353      if (!$ref->getAttribute('title')) {
354        if (my $titlestring = $self->generateTitle($doc, $id)) {
355          $ref->setAttribute(title => $titlestring); } }
356      if (!$ref->textContent && !element_nodes($ref)
357        && !(($tag eq 'ltx:graphics') || ($tag eq 'ltx:picture'))) {
358        my $is_nameref = ($ref->getAttribute('class')||'') =~ 'ltx_refmacro_nameref';
359        $doc->addNodes($ref, $self->generateRef($doc, $id, $show, $is_nameref)); }
360      if (my $entry = $$self{db}->lookup("ID:$id")) {
361        $ref->setAttribute(stub => 1) if $entry->getValue('stub'); }
362  } }
363  Debug("Filled in $n refs") if $LaTeXML::DEBUG{crossref};
364  return; }
365
366# similar sorta thing for RDF about & resource labels & ids
367sub fill_in_RDFa_refs {
368  my ($self, $doc) = @_;
369  my $db = $$self{db};
370  my $n  = 0;
371  foreach my $key (qw(about resource)) {
372    foreach my $ref ($doc->findnodes('descendant::*[@' . $key . 'idref or @' . $key . 'labelref]')) {
373      my $id = $ref->getAttribute($key . 'idref');
374      if (!$id) {
375        if (my $label = $ref->getAttribute($key . 'labelref')) {
376          my $entry;
377          if (($entry = $db->lookup($label)) && ($id = $entry->getValue('id'))) {
378            $ref->setAttribute($key . 'idref' => $id); }
379          else {
380            $self->note_missing('warn', "Target for $key Label", $label);
381      } } }
382      if ($id) {
383        $n++;
384        if (!$ref->getAttribute($key)) {
385          if ($db->lookup("ID:" . $id)) {    # RDF "id" need not be real, valid, ids!!!
386            if (my $url = $self->generateURL($doc, $id)) {
387              $ref->setAttribute($key => $url); } }
388          else {
389            $ref->setAttribute($key => '#' . $id); } }
390  } } }
391  set_RDFa_prefixes($doc->getDocument, {});    # what prefixes??
392  Debug("Filled in $n RDFa refs") if $LaTeXML::DEBUG{crossref};
393  return; }
394
395sub fill_in_mathlinks {
396  my ($self, $doc) = @_;
397  my $db = $$self{db};
398  my $n  = 0;
399  foreach my $sym ($doc->findnodes('descendant::*[@decl_id or @meaning]')) {
400    my $tag = $doc->getQName($sym);
401    next if $tag eq 'ltx:XMRef';               # Blech; list those TO fill-in, or list those to exclude?
402    next if $sym->hasAttribute('href');
403    my $decl_id = $sym->getAttribute('decl_id');
404    my $meaning = $sym->getAttribute('meaning');
405    my $entry;
406    if ($decl_id
407      && !$doc->findnodes('ancestor::ltx:glossaryphrase | ancestor::ltx:declare[@type]', $sym)) {
408      $entry = $$self{db}->lookup("DECLARATION:local:$decl_id"); }
409    elsif ($meaning) {
410      $entry = $$self{db}->lookup("DECLARATION:global:$meaning"); }
411    if ($entry) {
412      $n++;
413      ## HACK: DLMF copies $meaning to ltxx:meaning for search indexing
414      ## This should evolve into using (future) mml @mathrole?
415##      if ($meaning && $$doc{namespaces}{ltxx}) {
416##        $$node[1]{'ltxx:meaning'} = $meaning; }
417      if (my $id = $self->getIDForDeclaration($entry)) {    # Where defined
418        $sym->setAttribute(href => $self->generateURL($doc, $id));
419        if (my $tag = $entry->getValue('tag:short') || $entry->getValue('description')) {
420          $sym->setAttribute(title => getTextContent($doc, $tag)); }
421  } } }
422  Debug("Filled in $n math links") if $LaTeXML::DEBUG{crossref};
423  return; }
424
425# Given a declaration entry (ltx:declare, or ltx:mark or ...)
426# Return the id of an appropriate link target.
427# Basically this is the parent, except (DLMF specific?) it should be a table ROW, not CELL
428# Or the numbered equationgroup, not the unnumbered equation
429sub getIDForDeclaration {
430  my ($self, $entry) = @_;
431  if (my $pid = $entry && $entry->getValue('parent')) {
432    if (my $pentry = $$self{db}->lookup("ID:$pid")) {
433      my $ptype = $pentry->getValue('type') || '';
434      # If definition is in a table cell, the correct id will be that of the row
435      if ($ptype eq 'ltx:td') {
436        if (my $gpid = $pentry->getValue('parent')) {
437          return $gpid; } }
438      # If definition is in unnumbered equation within an equation group, use id of the group
439      elsif (($ptype eq 'ltx:equation') && !$pentry->getValue('refnum')) {
440        if (my $gpid = $pentry->getValue('parent')) {
441          if (my $gpentry = $$self{db}->lookup("ID:$gpid")) {
442            my $gptype = $gpentry->getValue('type') || '';
443            if ($gptype eq 'ltx:equationgroup') {
444              return $gpid; } } } }
445    }
446    return $pid; } }
447
448# Needs to evolve into the combined stuff that we had in DLMF.
449# (eg. concise author/year combinations for multiple bibrefs)
450sub fill_in_bibrefs {
451  my ($self, $doc) = @_;
452  my $n = 0;
453  foreach my $bibref ($doc->findnodes('descendant::ltx:bibref')) {
454    $n++;
455    $doc->replaceNode($bibref, $self->make_bibcite($doc, $bibref)); }
456  Debug("Filled in $n bibrefs") if $LaTeXML::DEBUG{crossref};
457  return; }
458
459# Given a list of bibkeys, construct links to them.
460# Mostly tuned to author-year style.
461# Combines when multiple bibitems share the same authors.
462sub make_bibcite {
463  my ($self, $doc, $bibref) = @_;
464
465  my @keys         = grep { $_ } split(/,/, $bibref->getAttribute('bibrefs') || '');
466  my $show         = $bibref->getAttribute('show');
467  my @preformatted = $bibref->childNodes();
468  if ($show && ($show eq 'none') && !@preformatted) {
469    $show = 'refnum'; }
470  if (!$show) {
471    $show = 'refnum'; }
472  if ($show eq 'nothing') {    # Ad Hoc support for \nocite!t
473    return (); }
474  my $sep     = $bibref->getAttribute('separator')   || ',';
475  my $yysep   = $bibref->getAttribute('yyseparator') || ',';
476  my @phrases = element_nodes($bibref);    # get the ltx;bibrefphrase's in the bibref!
477                                           # Collect all the data from the bibliography
478  my @data    = ();
479  my @lists   = split(/\s+/, $bibref->getAttribute('inlist') || 'bibliography');
480  foreach my $key (@keys) {
481    my ($bentry, $id, $entry);
482    # NOTE: bibkeys are downcased when we look them up!
483    foreach my $list (@lists) {            # Find the first of the lists that contains this bibkey
484      $bentry = $$self{db}->lookup("BIBLABEL:" . $list . ':' . lc($key));
485      last if $bentry; }
486    if ($bentry
487      && ($id    = $bentry->getValue('id'))
488      && ($entry = $$self{db}->lookup("ID:$id"))) {
489      my $authors  = $entry->getValue('authors');
490      my $fauthors = $entry->getValue('fullauthors');
491      my $keytag   = $entry->getValue('keytag');
492      my $year     = $entry->getValue('year');
493      my $typetag  = $entry->getValue('typetag');
494      my $number   = $entry->getValue('number');
495      my $title    = $entry->getValue('title');
496      my $refnum   = $entry->getValue('refnum');        # This come's from the \bibitem, w/o BibTeX
497      my ($rawyear, $suffix);
498
499      my $titlestring = undef;
500      if (defined $title) {
501        $titlestring = $title->textContent;
502        $titlestring =~ s/^\s+//;                       # Trim leading whitespace
503        $titlestring =~ s/\s+$//;                       # and trailing
504        $titlestring =~ s/\s+/ /gs; }                   # and normalize all other whitespace.
505      if ($year && ($year->textContent) =~ /^(\d\d\d\d)(\w)$/) {
506        ($rawyear, $suffix) = ($1, $2); }
507      $show = 'refnum' unless ($show eq 'none') || $authors || $fauthors || $keytag; # Disable author-year format!
508                                                                                     # fullnames ?
509      push(@data, {
510          key         => $key,
511          authors     => [$doc->trimChildNodes($authors  || $fauthors || $keytag)],
512          fullauthors => [$doc->trimChildNodes($fauthors || $authors  || $keytag)],
513          authortext  => ($authors || $fauthors ? ($authors || $fauthors)->textContent : ''),
514          year        => [$doc->trimChildNodes($year || $typetag)],
515          rawyear     => $rawyear,
516          suffix      => $suffix,
517          number      => [$doc->trimChildNodes($number)],
518          refnum      => [$doc->trimChildNodes($refnum)],
519          title       => [$doc->trimChildNodes($title || $keytag)],
520          attr        => { idref => $id,
521            href => orNull($self->generateURL($doc, $id)),
522            ($titlestring ? (title => $titlestring) : ()) } }); }
523    else {
524      $self->note_missing('warn', 'Entry for citation', $key);
525      push(@data, { key => $key, refnum => [$key], title => [$key], year => [],
526          attr => { idref => $key, title => $key, class => "ltx_missing_citation" } });
527  } }
528  my $checkdups = ($show =~ /author/i) && ($show =~ /(year|number)/i);
529  my @refs      = ();
530  my $saveshow  = $show;
531  while (@data) {
532    my $datum  = shift(@data);
533    my $didref = 0;
534    my @stuff  = ();
535    $show = $saveshow;
536    if (($show eq 'none') && @preformatted) {
537      @stuff = @preformatted; $show = ''; }
538    elsif ($$datum{attr}{class} && ($$datum{attr}{class} eq 'ltx_missing_citation')) {
539      @stuff  = (['ltx:ref', $$datum{attr}, $$datum{key}]);
540      $didref = 1;
541      $show   = '';
542    }
543
544    # Add delimeters for parsing...
545    $show =~ s/(\w)year/$1\{\}year/gi;
546    $show =~ s/(\w)phrase/$1\{\}phrase/gi;
547    while ($show) {
548      if ($show =~ s/^(\w+)//) {
549        my $role = lc($1); $role =~ s/s$//;    # remove trailing plural
550        if ($role eq 'author') {
551          push(@stuff, $doc->cloneNodes(@{ $$datum{authors} })); }
552        elsif ($role eq 'fullauthor') {
553          push(@stuff, $doc->cloneNodes(@{ $$datum{fullauthors} })); }
554        elsif ($role eq 'title') {
555          push(@stuff, $doc->cloneNodes(@{ $$datum{title} })); }
556        elsif ($role eq 'refnum') {
557          push(@stuff, $doc->cloneNodes(@{ $$datum{refnum} })); }
558        elsif ($role =~ /^phrase(\d)$/) {
559          # HACK! Avoid empty () from situations where we've set the show (CITE_STYLE) too early
560          # and don't actually have author-year information!
561          my $n = $1;
562          if (($n == 1) && ($show =~ /^\{\}year\{\}phrase2/i) && !scalar(@{ $$datum{year} })
563            && (!$phrases[0] || (length($phrases[0]->textContent) <= 1))
564            && (!$phrases[1] || (length($phrases[1]->textContent) <= 1))) {
565            $show =~ s/^\{\}year\{\}phrase2//i; }
566          else {
567            push(@stuff, $phrases[$n - 1]->childNodes) if $phrases[$n - 1]; } }
568        elsif ($role eq 'year') {
569          if (!$$datum{year}) {
570            $self->note_missing('warn', 'Date for citation', $$datum{key}); }
571          elsif (@{ $$datum{year} }) {
572            push(@stuff, ['ltx:ref', $$datum{attr}, @{ $$datum{year} }]);
573            $didref = 1;
574            while ($checkdups && @data && ($$datum{authortext} eq $data[0]{authortext})) {
575              my $next = shift(@data);
576              push(@stuff, $yysep, ' ');
577              if ((($$datum{rawyear} || 'no_year_1') eq ($$next{rawyear} || 'no_year_2')) && $$next{suffix}) {
578                push(@stuff, ['ltx:ref', $$next{attr}, $$next{suffix}]); }
579              else {
580                push(@stuff, ['ltx:ref', $$next{attr}, @{ $$next{year} }]); } } } }
581        elsif ($role eq 'number') {
582          push(@stuff, ['ltx:ref', $$datum{attr}, @{ $$datum{number} }]);
583          $didref = 1;
584          while ($checkdups && @data && ($$datum{authortext} eq $data[0]{authortext})) {
585            my $next = shift(@data);
586            push(@stuff, $yysep, ' ', ['ltx:ref', $$next{attr}, @{ $$next{number} }]); } }
587        elsif ($role eq 'super') {
588          my @r = ();
589          push(@r, ['ltx:ref', $$datum{attr}, @{ $$datum{number} }]);
590          $didref = 1;
591          while ($checkdups && @data && ($$datum{authortext} eq $data[0]{authortext})) {
592            my $next = shift(@data);
593            push(@r, $yysep, ' ', ['ltx:ref', $$next{attr}, @{ $$next{number} }]); }
594          push(@stuff, ['ltx:sup', {}, @r]); }
595        else {
596          Info('unexpected', $role, $doc, "CITE ignoring show key '$role'"); } }
597      elsif ($show =~ s/^\{([^\}]*)\}//) {    # pass-thru literal, quoted with {}
598        push(@stuff, $1) if $1; }
599      elsif ($show =~ s/^~//) {               # Pass-thru spaces
600        push(@stuff, $NBSP) if @stuff; }
601      elsif ($show =~ s/^(\s+)//) {           # Pass-thru spaces
602        push(@stuff, $1) if @stuff; }
603      elsif ($show =~ s/^(\W+)//) {           # Pass-thru non show keywords
604        push(@stuff, $1); } }
605    push(@refs,
606      (@refs   ? ($sep, ' ') : ()),
607      ($didref ? @stuff      : (['ltx:ref', $$datum{attr}, @stuff]))); }
608  return @refs; }
609
610sub generateURL {
611  my ($self, $doc, $id) = @_;
612  my ($object, $location);
613  if ($object = $$self{db}->lookup("ID:" . $id)) {
614    if ($location = $object->getValue('location')) {
615      my $doclocation = $doc->siteRelativeDestination;
616      my $pathdir     = pathname_directory($doclocation);
617      my $url         = pathname_relative(($location =~ m|^/| ? $location : '/' . $location),
618        ($pathdir =~ m|^/| ? $pathdir : '/' . $pathdir));
619      my $extension = $$self{extension} || 'xml';
620      my $urlstyle  = $$self{urlstyle}  || 'file';
621      if ($urlstyle eq 'server') {
622        # Remove trailing index.$extension but be careful not to leave url empty! (then it's "self")
623        $url =~ s/(^|\/)index.\Q$extension\E$/($1 ? $1 : '.\/')/e; }
624      elsif ($urlstyle eq 'negotiated') {
625        $url =~ s/\.\Q$extension\E$//;    # Remove trailing $extension
626        $url =~ s/(^|\/)index$/$1/;       # AND trailing index
627      }
628      $url = '.' unless $url;
629      if (my $fragid = $object->getValue('fragid')) {
630        $url = '' if ($url eq '.') or ($location eq $doclocation);
631        $url .= '#' . $fragid; }
632      elsif ($location eq $doclocation) {
633        $url = ''; }
634      return $url; }
635    else {
636      $self->note_missing('warn', 'File location for ID', $id); } }
637  else {
638    $self->note_missing('warn', 'DB Entry for ID', $id); }
639  return; }
640
641# Generate the contents of a <ltx:ref> of the given id.
642# show is a string containing substrings 'type', 'refnum' and 'title'
643# (standing for the type prefix, refnum and title of the id'd object)
644# and any other random characters; the
645sub generateRef {
646  my ($self, $doc, $reqid, $reqshow, $is_nameref) = @_;
647  my $pending = '';
648  my @stuff;
649  # Try the requested show pattern, and if it fails, try a fallback of just the title or refnum
650  foreach my $show (($reqshow, ($reqshow !~ /title/ ? "title" : "refnum"))) {
651    my $id = $reqid;
652    # Start with requested ID, add some from parent(s), if needed/until to make "useful" link content
653    while (my $entry = $id && $$self{db}->lookup("ID:$id")) {
654      if (my @s = $self->generateRef_aux($doc, $entry, $show, $is_nameref)) {
655        push(@stuff, $pending) if $pending;
656        push(@stuff, @s);
657        return @stuff if $self->checkRefContent($doc, @stuff);
658        $pending = $$self{ref_join}; } # inside/outside this brace determines if text can START with the join.
659      $id = $entry->getValue('parent'); } }
660  if (!@stuff) {                       # Try first child for a title-less document?
661    if (my $entry = $$self{db}->lookup("ID:$reqid")) {
662      if (($entry->getValue('type') || '') eq 'ltx:document') {
663        foreach my $c (@{ $entry->getValue('children') }) {
664          if (my $centry = $$self{db}->lookup("ID:$c")) {
665            if (my @s = $self->generateRef_aux($doc, $centry, $reqshow, $is_nameref)) {
666              push(@stuff, @s); last; } } } } } }
667  if (@stuff) {
668    return @stuff; }
669  else {
670    $self->note_missing('info', 'Usable title for ID', $reqid);
671    return ($reqid); } }               # id is crummy, but better than "?"... or?
672
673# Just return the reqshow value for $reqid, or nothing
674sub generateRef_simple {
675  my ($self, $doc, $reqid, $reqshow) = @_;
676  my $pending = '';
677  my @stuff;
678  if (my $entry = $reqshow && $reqid && $$self{db}->lookup("ID:$reqid")) {
679    return $self->generateRef_aux($doc, $entry, $reqshow); }
680  return (); }
681
682# Check if the proposed content of a <ltx:ref> is "Good Enough"
683# (long enough, unique enough to give reader feedback,...)
684sub checkRefContent {
685  my ($self, $doc, @stuff) = @_;
686  # Length? having _some_ actual text ?
687  my $s = text_content(@stuff);
688  # Could compare a minum length
689  # But perhaps this is better: check that there's some "text", not just symbols!
690  $s =~ s/\bin\s+//g;
691  return ($s =~ /\w/ ? 1 : 0); }
692
693sub text_content {
694  my (@stuff) = @_;
695  return join('', map { text_content_aux($_) } @stuff); }
696
697sub text_content_aux {
698  my ($n) = @_;
699  my $r = ref $n;
700  if (!$r) {
701    return $n; }
702  elsif ($r eq 'ARRAY') {
703    my ($t, $a, @c) = @$n;
704    return text_content(@c); }
705  elsif ($r =~ /^XML::/) {
706    return $n->textContent; }
707  else {
708    return $n; } }
709
710my %ref_fallbacks = (    # Alternative fields, when not found
711  typerefnum  => [qw(refnum)],
712  rrefnum     => [qw(typerefnum frefnum refnum)],    # obsolete?
713  toctitle    => [qw(title toccaption)],
714  title       => [qw(toccaption)],
715  rawtoctitle => [qw(toctitle title toccaption)],
716  rawtitle    => [qw(title toccaption)],
717);
718
719# Generate text to fill in an ltx:ref from a database entry for some object.
720# The show pattern indicates what data to use; usually a single keyword
721# (or keywords separated by spaces, ~ or {} enclosed literal text)
722# The keywords are things like refnum, title, caption, etc
723# (possibly coming from ltx:tag or other data; see Scan)
724sub generateRef_aux {
725  my ($self, $doc, $entry, $show, $is_nameref) = @_;
726  my @stuff = ();
727  my $OK    = 0;
728  while ($show) {
729    if ($show =~ s/^(\w+)//) {    # peel off next keyword
730      my $key   = lc($1);
731      my $class = ($key =~ /title/ ? 'ltx_ref_title' : 'ltx_ref_tag');
732      my @keys  = ($key, 'tag:' . $key,
733        ($ref_fallbacks{$key} ? @{ $ref_fallbacks{$key} } : ()));
734      my $value;
735      foreach my $k (@keys) {     # lookup the data for that keyword (or an alternative)
736        $value = $entry->getValue($k);
737        last if $value; }
738      if ($value) {
739        $OK = 1;
740        if ($is_nameref) {
741          # yank out the tag if this is nameref
742          my ($first_child) = element_nodes($value);
743          $first_child->unbindNode if $first_child && ($doc->getQName($first_child) eq 'ltx:tag'); }
744        push(@stuff, ['ltx:text', { class => $class }, $self->prepRefText($doc, $value)]); } }
745    elsif ($show =~ s/^\{([^\}]*)\}//) {    # pass-thru literal, quoted with {}
746      push(@stuff, $1) if $1; }
747    elsif ($show =~ s/^~//) {               # Pass-thru spaces
748      push(@stuff, $NBSP) if @stuff; }
749    elsif ($show =~ s/^(\s+)//) {           # Pass-thru spaces
750      push(@stuff, $1) if @stuff; }
751    elsif ($show =~ s/^(\W+)//) {           # Pass-thru non show keywords
752      push(@stuff, $1); } }
753  # Maybe nothing found for this entry (probably retry on parent?)
754  return ($OK ? @stuff : ()); }
755
756sub prepRefText {
757  my ($self, $doc, $title) = @_;
758  return $doc->cloneNodes($doc->trimChildNodes($self->fillInTitle($doc, $title))); }
759
760sub prepRawRefText {
761  my ($self, $doc, $title) = @_;
762  my $node = $self->prepRefText($doc, $title);
763  if ($doc->getQName($node) =~ /^ltx:(?:toc)title$/) {    # Trim tags from titles
764    my ($first) = element_nodes($node);
765    if ($first && ($doc->getQName($first) eq 'ltx:tag')) {
766      $node->removeChild($first); } }
767  return $node; }
768
769# Generate a title string for ltx:ref
770sub generateTitle {
771  my ($self, $doc, $id) = @_;
772  # Add author, if any ???
773  my $string    = "";
774  my $altstring = "";
775  while (my $entry = $id && $$self{db}->lookup("ID:$id")) {
776    my $title = $self->fillInTitle($doc,
777###      $entry->getValue('title') || $entry->getValue('rrefnum')
778      $entry->getValue('title') || $entry->getValue('typerefnum')
779        || $entry->getValue('frefnum') || $entry->getValue('refnum'));
780    #    $title = $title->textContent if $title && ref $title;
781    $title = getTextContent($doc, $title) if $title && ref $title;
782    if ($title) {
783      $string .= $$self{ref_join} if $string;
784      $string .= $title; }
785    $id = $entry->getValue('parent'); }
786  return $string || $altstring; }
787
788sub getTextContent {
789  my ($doc, $title) = @_;
790  $title = getTextContent_rec($doc, $title) if $title && ref $title;
791  $title =~ s/^\s+//s  if $title;    # Trim leading whitespace
792  $title =~ s/\s+$//s  if $title;    # and trailing
793  $title =~ s/\s+/ /gs if $title;    # and normalize all other whitespace.
794  return $title; }
795
796sub getTextContent_rec {
797  my ($doc, $node) = @_;
798  my $type = $node->nodeType;
799  if ($type == XML_TEXT_NODE) {
800    return $node->textContent; }
801  elsif ($type == XML_ELEMENT_NODE) {
802    my $tag = $doc->getQName($node);
803    if ($tag eq 'ltx:tag') {
804      return ($node->getAttribute('open') || '')
805        . $node->textContent         # assuming no nested ltx:tag
806        . ($node->getAttribute('close') || ''); }
807    else {
808      return join('', map { getTextContent_rec($doc, $_); } $node->childNodes); } }
809  elsif ($type == XML_DOCUMENT_FRAG_NODE) {
810    return join('', map { getTextContent_rec($doc, $_); } $node->childNodes); }
811  else {
812    return ''; } }
813
814# Fill in any embedded ltx:ref's & ltx:cite's within a title
815sub fillInTitle {
816  my ($self, $doc, $title) = @_;
817  return $title unless $title && ref $title;
818  # Fill in any nested ref's!
819  foreach my $ref ($doc->findnodes('descendant::ltx:ref[@idref or @labelref]', $title)) {
820    next if $ref->textContent;
821    my $show = $ref->getAttribute('show');
822    $show = $$self{ref_show} unless $show;
823    my $refentry;
824    if (my $id = $ref->getAttribute('idref')) {
825      $refentry = $$self{db}->lookup("ID:$id"); }
826    elsif (my $label = $ref->getAttribute('labelref')) {
827      $refentry = $$self{db}->lookup($label);
828      if ($id = $refentry->getValue('id')) {
829        $refentry = $$self{db}->lookup("ID:$id"); } }
830    if ($refentry) {
831      $doc->replaceNode($ref, $self->generateRef_aux($doc, $refentry, $show)); } }
832  # Fill in (replace, actually) any embedded citations.
833  foreach my $bibref ($doc->findnodes('descendant::ltx:bibref', $title)) {
834    $doc->replaceNode($bibref, $self->make_bibcite($doc, $bibref)); }
835  foreach my $break ($doc->findnodes('descendant::ltx:break', $title)) {
836    $doc->replaceNode($break, ['ltx:text', {}, " "]); }
837  return $title; }
838
839sub fillInGlossaryRef {
840  my ($self, $doc) = @_;
841  my $n = 0;
842  foreach my $ref ($doc->findnodes('descendant::ltx:glossaryref')) {
843    $n++;
844    my $key   = $ref->getAttribute('key');
845    my @lists = split(/\s+/, $ref->getAttribute('inlist') || 'glossary');
846    my $show  = $ref->getAttribute('show');
847    my ($list, $entry) = ('', undef);
848    foreach my $alist (@lists) {    # Find list with this key
849      if ($entry = $$self{db}->lookup(join(':', 'GLOSSARY', $alist, $key))) {
850        $list = $alist; last; } }
851    if ($entry) {
852      my $title = $entry->getValue('phrase:definition');
853      if (!$ref->getAttribute('title') && $title) {
854        $ref->setAttribute(title => $title->textContent); }
855      if (my $id = $entry->getValue('id')) {
856        $ref->setAttribute(idref => $id); }
857      if (!$ref->textContent && !element_nodes($ref)) {
858        my @stuff = $self->generateGlossaryRefTitle($doc, $entry, $show);
859        if (@stuff) {
860          $doc->addNodes($ref, @stuff); }
861        else {
862          $self->note_missing('warn', "Glossary ($list) contents ($show) for key", $key);
863          $doc->addNodes($ref, $key);
864          $doc->addClass($ref, 'ltx_missing'); } } }
865    else {
866      $self->note_missing('warn', "Glossary ($list) Entry for key", $key); }
867    if (!$ref->textContent && !element_nodes($ref)) {
868      $doc->addNodes($ref, $key);
869      $doc->addClass($ref, 'ltx_missing'); } }
870  Debug("Filled in $n glossaryrefs") if $LaTeXML::DEBUG{crossref};
871  return; }
872
873sub generateGlossaryRefTitle {
874  my ($self, $doc, $entry, $show) = @_;
875  my $phrases = $entry->getValue('phrases');
876  my @stuff   = ();
877  if (my $phrase = $entry->getValue('phrase:' . $show)) {
878    push(@stuff, ['ltx:text', { class => 'ltx_glossary_' . $show },
879        $self->prepRefText($doc, $phrase)]); }
880  elsif ($show =~ /^(\w+)-plural$/) {
881    my $sh = $1;
882    if (my $phrase = $entry->getValue('phrase:' . $sh)) {
883      push(@stuff, ['ltx:text', { class => 'ltx_glossary_' . $show },
884          $self->prepRefText($doc, $phrase), 's']); } }
885  elsif ($show =~ /^(\w+)-indefinite$/) {
886    my $sh = $1;
887    if (my $phrase = $entry->getValue('phrase:' . $sh)) {
888      my $s   = $phrase->textContent;
889      my $art = ($s =~ /^[aeiou]/i ? 'an ' : 'a ');
890      push(@stuff, ['ltx:text', { class => 'ltx_glossary_' . $show },
891          $art, $self->prepRefText($doc, $phrase)]); } }
892  return @stuff; }
893
894sub orNull {
895  return (grep { defined } @_) ? @_ : undef; }
896
897# Possibly this needs support from Scan, as well?
898# to manage resources, record in Manifest, something like that?
899sub copy_resources {
900  my ($self, $doc) = @_;
901  # Copy any "resources" linked from the document
902  my $paths = [$doc->getSearchPaths];
903  foreach my $n ($doc->findnodes('//ltx:ref[@href and not(@idref) and not(@labelref)]')) {
904    my $url = $n->getAttribute('href');
905    if ($url !~ /^(\w+:|\/)/) {    # relative path? (No explicit protocol, or absolute)
906      if (my $src = pathname_find($url, paths => $paths)) {    # AND if file exists there.
907        my $dst = $doc->checkDestination($url);
908        pathname_copy($src, $dst);
909  } } }
910  return; }
911
912# ================================================================================
9131;
914