1# /=====================================================================\ #
2# |  LaTeXML::Post::Scan                                                | #
3# | Scan for ID's etc                                                   | #
4# |=====================================================================| #
5# | Part of LaTeXML:                                                    | #
6# |  Public domain software, produced as part of work done by the       | #
7# |  United States Government & not subject to copyright in the US.     | #
8# |---------------------------------------------------------------------| #
9# | Bruce Miller <bruce.miller@nist.gov>                        #_#     | #
10# | http://dlmf.nist.gov/LaTeXML/                              (o o)    | #
11# \=========================================================ooo==U==ooo=/ #
12package LaTeXML::Post::Scan;
13use strict;
14use warnings;
15use LaTeXML::Util::Pathname;
16use LaTeXML::Common::XML;
17use LaTeXML::Post;
18use base qw(LaTeXML::Post::Processor);
19
20# NOTE: This module is one that probably needs a lot of customizability.
21sub new {
22  my ($class, %options) = @_;
23  my $self = $class->SUPER::new(%options);
24  $$self{db}       = $options{db};
25  $$self{handlers} = {};
26  $self->registerHandler('ltx:document'      => \&section_handler);
27  $self->registerHandler('ltx:part'          => \&section_handler);
28  $self->registerHandler('ltx:chapter'       => \&section_handler);
29  $self->registerHandler('ltx:section'       => \&section_handler);
30  $self->registerHandler('ltx:appendix'      => \&section_handler);
31  $self->registerHandler('ltx:subsection'    => \&section_handler);
32  $self->registerHandler('ltx:subsubsection' => \&section_handler);
33  $self->registerHandler('ltx:paragraph'     => \&section_handler);
34  $self->registerHandler('ltx:subparagraph'  => \&section_handler);
35  $self->registerHandler('ltx:bibliography'  => \&section_handler);
36  $self->registerHandler('ltx:index'         => \&section_handler);
37  $self->registerHandler('ltx:glossary'      => \&section_handler);
38
39  $self->registerHandler('ltx:table'   => \&captioned_handler);
40  $self->registerHandler('ltx:figure'  => \&captioned_handler);
41  $self->registerHandler('ltx:float'   => \&captioned_handler);
42  $self->registerHandler('ltx:listing' => \&captioned_handler);
43  $self->registerHandler('ltx:theorem' => \&section_handler);
44  $self->registerHandler('ltx:proof'   => \&section_handler);
45
46  $self->registerHandler('ltx:equation'      => \&labelled_handler);
47  $self->registerHandler('ltx:equationgroup' => \&labelled_handler);
48  $self->registerHandler('ltx:item'          => \&labelled_handler);
49  $self->registerHandler('ltx:anchor'        => \&anchor_handler);
50  $self->registerHandler('ltx:note'          => \&note_handler);
51
52  $self->registerHandler('ltx:bibitem'            => \&bibitem_handler);
53  $self->registerHandler('ltx:bibentry'           => \&bibentry_handler);
54  $self->registerHandler('ltx:indexmark'          => \&indexmark_handler);
55  $self->registerHandler('ltx:glossaryentry'      => \&glossaryentry_handler);
56  $self->registerHandler('ltx:glossarydefinition' => \&glossaryentry_handler);
57  $self->registerHandler('ltx:ref'                => \&ref_handler);
58  $self->registerHandler('ltx:bibref'             => \&bibref_handler);
59
60  $self->registerHandler('ltx:navigation' => \&navigation_handler);
61  $self->registerHandler('ltx:rdf'        => \&rdf_handler);
62  $self->registerHandler('ltx:declare'    => \&declare_handler);
63
64  $self->registerHandler('ltx:rawhtml' => \&rawhtml_handler);
65
66  return $self; }
67
68sub registerHandler {
69  my ($self, $tag, $handler) = @_;
70  $$self{handlers}{$tag} = $handler;
71  return; }
72
73sub process {
74  my ($self, $doc, $root) = @_;
75  # I think we really need an ID here to establish the root node in the DB,
76  # even if the document didn't have one originally.
77  # And for the common case of a single docucment, we'd like to be silent about it,
78  # UNLESS there seem to be multiple documents which would lead to a conflict.
79  my $id = $root->getAttribute('xml:id');
80  if (!defined $id) {
81    $id = "Document";
82    if (my $preventry = $$self{db}->lookup("ID:$id")) {
83      if (my $loc = $doc->siteRelativeDestination) {
84        my $prevloc = $preventry->getValue('location');
85        if ((defined $prevloc) && ($loc ne $prevloc)) {
86          Warn('unexpected', 'location', undef,
87            "Using default ID='$id', "
88              . "but there's an apparent conflict with location '$loc' and previous '$prevloc'"); } } }
89    $root->setAttribute('xml:id' => $id); }
90
91  # By default, 1st document processed is considered the root of the site
92  my $siteentry = $$self{db}->lookup('SITE_ROOT');
93  if (!$siteentry) {
94    $siteentry = $$self{db}->register('SITE_ROOT', id => $id); }
95  my $siteid = $siteentry->getValue('id');
96
97  $self->scan($doc, $root, $$doc{parent_id});
98
99  # Set up interconnections on multidocument site.
100  $$self{db}->register("DOCUMENT:" . ($doc->siteRelativeDestination || ''), id => $id);
101
102  # Question: If (on multidoc sites) a doc contains a single node (say ltx:chapter)
103  # might it make sense to treat the doc as ONLY that node?
104  # Alternative: May be necessary to extract title from that child?
105
106  # Find a plausible parent doc, unless this is the root, or already has one
107  # Either by relative id's, destination location, or default to the site itself.
108  my $entry = $$self{db}->lookup("ID:$id");
109  if (($id ne $siteid) && !$entry->getValue('parent')) {
110    my $parent_id;
111    if (!$parent_id) {    # Look for parent assuming it's id is component of $id
112      my $upid = $id;
113      while ($upid =~ s/\.[^\.]+$//) {
114        if ($$self{db}->lookup("ID:$upid")) {
115          $parent_id = $upid; last; } } }
116    if (!$parent_id) {    # Look for parent as index.xml in a containing directory.
117      my $loc = $entry->getValue('location');
118      my $dir = $loc;
119      while (($dir) = pathname_split($dir)) {
120        if (my $pentry = $$self{db}->lookup("DOCUMENT:" . pathname_concat($dir, 'index.xml'))) {
121          my $pid = $pentry->getValue('id');
122          if ($pid && ($pid ne $id)) {
123            $parent_id = $pid; last; } } } }
124    if (!$parent_id) {    # Else default to the id of the site itself.
125      $parent_id = $siteid; }
126    if ($parent_id && ($parent_id ne $id)) {
127      $entry->setValues(parent => $parent_id);
128      # Children are added in the order that they were scanned
129      $self->addAsChild($id, $parent_id); }
130    else {
131      Info('expected', 'parent', undef, "No parent document found for '$id'"); } }
132  NoteLog("Scan: DBStatus: " . $$self{db}->status);
133  return $doc; }
134
135sub scan {
136  my ($self, $doc, $node, $parent_id) = @_;
137  no warnings 'recursion';
138  my $tag     = $doc->getQName($node);
139  my $handler = $$self{handlers}{$tag} || \&default_handler;
140  &$handler($self, $doc, $node, $tag, $parent_id);
141  return; }
142
143sub scanChildren {
144  my ($self, $doc, $node, $parent_id) = @_;
145  no warnings 'recursion';
146  foreach my $child ($node->childNodes) {
147    if ($child->nodeType == XML_ELEMENT_NODE) {
148      $self->scan($doc, $child, $parent_id); } }
149  return; }
150
151sub addAsChild {
152  my ($self, $id, $parent_id) = @_;
153  # Find the ancestor that maintains a children list
154  while (my $parent = $parent_id && $$self{db}->lookup("ID:$parent_id")) {
155    if ($parent->hasValue('children')) {
156      $parent->pushNew('children', $id);
157      last; }
158    else {
159      $parent_id = $parent->getValue('parent'); } }
160  return; }
161
162sub pageID {
163  my ($self, $doc) = @_;
164  return $doc->getDocumentElement->getAttribute('xml:id'); }
165
166# Compute a "Fragment ID", ie. an ID based on the given ID,
167# but which is potentially shortened so that it need only be
168# unique within the given page.
169sub inPageID {
170  my ($self, $doc, $node) = @_;
171  my $id     = $node->getAttribute('xml:id');
172  my $baseid = $doc->getDocumentElement->getAttribute('xml:id') || '';
173  # And we're using label-based ids in the target document...
174  if ($$self{labelids}) {
175    if (my $labels = $node->getAttribute('labels')) {
176      my ($l) = split(' ', $labels);
177      $l =~ s/^LABEL://;
178      $id = $l;
179      if (my $baselabels = $doc->getDocumentElement->getAttribute('labels')) {
180        my ($bl) = split(' ', $baselabels);
181        $bl =~ s/^LABEL://;
182        $baseid = $bl; } } }
183  if (!$id) {
184    return $id; }
185  elsif ($baseid eq $id) {
186    return; }
187  elsif ($baseid && ($id =~ /^\Q$baseid\E\.(.*)$/)) {
188    return $1; }
189  elsif ($$doc{split_from_id} && ($id =~ /^\Q$$doc{split_from_id}\E\.(.*)$/)) {
190    return $1; }
191  else {
192    return $id; } }
193
194sub noteLabels {
195  my ($self, $node) = @_;
196  if (my $id = $node->getAttribute('xml:id')) {
197    if (my $labels = $node->getAttribute('labels')) {
198      my @labels = split(' ', $node->getAttribute('labels'));
199      foreach my $label (@labels) {
200        $$self{db}->register($label, id => orNull($id)); }
201      return [@labels]; } }
202  return; }
203
204# Clean up a node before insertion into database.
205sub cleanNode {
206  my ($self, $doc, $node) = @_;
207  return $node unless $node;
208  # Clone the node, and get the ID's unique (at least) within the originating document
209  my $cleaned = $doc->cloneNode($node);
210  # Remove indexmark (anything else ?)
211  map { $_->parentNode->removeChild($_) } $doc->findnodes('.//ltx:indexmark', $cleaned);
212  return $cleaned; }
213
214# Assumes $node has been cloned, if needed.
215# Set to something smallish (eg. 6) to forcibly truncate toctitle/toccaption
216our $TOCTEXT_MAX_LENGTH = undef;
217
218sub truncateNode {
219  my ($self, $doc, $node) = @_;
220  return $node if !$node || !defined $TOCTEXT_MAX_LENGTH;
221  my @children = $node->childNodes;
222  my $n        = $TOCTEXT_MAX_LENGTH;
223  my $trunc    = 0;
224  while ($n && @children) {
225    my $c = shift(@children);
226    if ($c->nodeType == XML_TEXT_NODE) {
227      my $s = $c->textContent;
228      my @w = split(/\s/, $s);
229      if (scalar(@w) > $n) {
230        $c->setData(join(' ', @w[0 .. $n]));
231        $trunc = 1; $n = 0; }
232      else {
233        $n--; } }
234    else {
235      $n--; } }
236  if ($trunc || (scalar(@children) > 1)) {
237    map { $node->removeChild($_) } @children;    # Remove any remaining children.
238    $node->appendText("\x{2026}"); }
239  return $node; }
240
241sub addCommon {
242  my ($self, $doc, $node, $tag, $parent_id) = @_;
243  my $id = $node->getAttribute('xml:id');
244  my $inlist;
245  if (my $listnames = $node->getAttribute('inlist')) {
246    $inlist = { map { ($_ => 1) } split(/\s/, $listnames) }; }
247  my %props = (
248    id       => orNull($id),
249    type     => orNull($tag),
250    parent   => orNull($parent_id),
251    labels   => orNull($self->noteLabels($node)),
252    location => orNull($doc->siteRelativeDestination),
253    pageid   => orNull($self->pageID($doc)),
254    fragid   => orNull($self->inPageID($doc, $node)),
255    inlist   => $inlist,
256  );
257  # Figure out sane, safe naming?
258  foreach my $tagnode ($doc->findnodes('ltx:tags/ltx:tag', $node)) {
259    my $key;
260    if (my $role = $tagnode->getAttribute('role')) {
261      if ($role =~ /.*refnum$/) {
262        $key = $role; }
263      else {
264        $key = 'tag:' . $role; } }
265    else {
266      $key = 'frefnum'; }
267    ###      $key = 'refnum'; }        # ???
268    $props{$key} = $self->cleanNode($doc, $tagnode); }
269  return %props; }
270
271sub default_handler {
272  my ($self, $doc, $node, $tag, $parent_id) = @_;
273  no warnings 'recursion';
274  my $id = $node->getAttribute('xml:id');
275  if ($id) {
276    $$self{db}->register("ID:$id",
277      $self->addCommon($doc, $node, $tag, $parent_id));
278    $self->addAsChild($id, $parent_id); }
279  $self->scanChildren($doc, $node, $id || $parent_id);
280  return; }
281
282sub section_handler {
283  my ($self, $doc, $node, $tag, $parent_id) = @_;
284  my $id = $node->getAttribute('xml:id');
285  if ($id) {
286    $$self{db}->register("ID:$id",
287      $self->addCommon($doc, $node, $tag, $parent_id),
288      primary  => 1,
289      title    => orNull($self->cleanNode($doc, $doc->findnode('ltx:title',    $node))),
290      toctitle => orNull($self->cleanNode($doc, $doc->findnode('ltx:toctitle', $node))),
291      children => [],
292      stub     => orNull($node->getAttribute('stub')));
293    $self->addAsChild($id, $parent_id); }
294  $self->scanChildren($doc, $node, $id || $parent_id);
295  return; }
296
297sub captioned_handler {
298  my ($self, $doc, $node, $tag, $parent_id) = @_;
299  my $id = $node->getAttribute('xml:id');
300  if ($id) {
301    # We're actually trying to find the shallowest caption
302    # Not one nested in another figure/table/float/whoknowswhat !
303    my ($caption) = ($doc->findnode('child::ltx:caption', $node),
304      $doc->findnode('descendant::ltx:caption', $node));
305    my ($toccaption) = ($doc->findnode('child::ltx:toccaption', $node),
306      $doc->findnode('descendant::ltx:toccaption', $node));
307    $$self{db}->register("ID:$id",
308      $self->addCommon($doc, $node, $tag, $parent_id),
309      role    => orNull($node->getAttribute('role')),
310      caption => orNull($self->cleanNode($doc, $caption)),
311###      toccaption => orNull($self->cleanNode($doc,
312###          $doc->findnode('descendant::ltx:toccaption', $node))));
313      toccaption => orNull($self->truncateNode($doc, $self->cleanNode($doc, $toccaption))));
314    $self->addAsChild($id, $parent_id); }
315  $self->scanChildren($doc, $node, $id || $parent_id);
316  return; }
317
318sub labelled_handler {
319  my ($self, $doc, $node, $tag, $parent_id) = @_;
320  my $id = $node->getAttribute('xml:id');
321  if ($id) {
322    $$self{db}->register("ID:$id",
323      $self->addCommon($doc, $node, $tag, $parent_id),
324      role => orNull($node->getAttribute('role')),
325    );
326    $self->addAsChild($id, $parent_id); }
327  $self->scanChildren($doc, $node, $id || $parent_id);
328  return; }
329
330# Maybe with some careful redesign of the schema, this would fall under labelled?
331sub note_handler {
332  my ($self, $doc, $node, $tag, $parent_id) = @_;
333  my $id = $node->getAttribute('xml:id');
334  if ($id) {
335    my $note = $self->cleanNode($doc, $node);
336    map { $note->removeChild($_) } $doc->findnodes('.//ltx:tags', $note);
337    $$self{db}->register("ID:$id",
338      $self->addCommon($doc, $node, $tag, $parent_id),
339      role => orNull($node->getAttribute('role')),
340      note => $note,
341    );
342    $self->addAsChild($id, $parent_id); }
343  $self->scanChildren($doc, $node, $id || $parent_id);
344  return; }
345
346sub anchor_handler {
347  my ($self, $doc, $node, $tag, $parent_id) = @_;
348  my $id = $node->getAttribute('xml:id');
349  if ($id) {
350    $$self{db}->register("ID:$id",
351      $self->addCommon($doc, $node, $tag, $parent_id),
352      title => orNull($self->cleanNode($doc, $node)),
353    );
354    $self->addAsChild($id, $parent_id); }
355  $self->scanChildren($doc, $node, $id || $parent_id);
356  return; }
357
358sub ref_handler {
359  my ($self, $doc, $node, $tag, $parent_id) = @_;
360  my $id = $node->getAttribute('xml:id');
361  if (my $label = $node->getAttribute('labelref')) {    # Only record refs of labels
362                                                        # Don't scan refs from TOC or 'cited' bibblock
363    if (!$doc->findnodes('ancestor::ltx:tocentry'
364          . '| ancestor::ltx:bibblock[contains(@class,"ltx_bib_cited")]',
365        $node)) {
366      my $entry = $$self{db}->register($label);
367      $entry->noteAssociation(referrers => $parent_id); } }
368  # Usually, a ref won't YET have content; but if it does, we should scan it.
369  $self->default_handler($doc, $node, $tag, $parent_id);
370  return; }
371
372sub bibref_handler {
373  my ($self, $doc, $node, $tag, $parent_id) = @_;
374  # Don't scan refs from 'cited' bibblock
375  if (!$doc->findnodes('ancestor::ltx:bibblock[contains(@class,"ltx_bib_cited")]', $node)) {
376    if (my $keys = $node->getAttribute('bibrefs')) {
377      # Citation specifies main 'bibliography', as well as any specific others (eg. per chapter)
378      my $l     = $node->getAttribute('inlist');
379      my @lists = (($l ? split(/\s+/, $l) : ()), 'bibliography');
380      foreach my $bibkey (split(',', $keys)) {
381        if ($bibkey) {
382          $bibkey = lc($bibkey);         # NOW we downcase!
383          foreach my $list (@lists) {    # Records a *reference* to a bibkey! (for each list)
384            my $entry = $$self{db}->register("BIBLABEL:$list:$bibkey");
385            $entry->noteAssociation(referrers => $parent_id); } } } } }
386  # Usually, a bibref will have, at most, some ltx:bibphrase's; should be scanned.
387  $self->default_handler($doc, $node, $tag, $parent_id);
388  return; }
389
390# Note that index entries get stored in simple form; just the terms & location.
391# They will be turned into a tree, sorted, possibly permuted, whatever, by MakeIndex.
392# [the only content of indexmark should be un-marked up(?) don't recurse]
393sub indexmark_handler {
394  my ($self, $doc, $node, $tag, $parent_id) = @_;
395  # Get the actual phrases, and any see_also phrases (if any)
396  # Do these need ->cleanNode ???
397  my @phrases = $doc->findnodes('ltx:indexphrase', $node);
398  my @seealso = $doc->findnodes('ltx:indexsee',    $node);
399  my $key     = join(':', 'INDEX', map { $_->getAttribute('key') } @phrases);
400  my $inlist;
401  if (my $listnames = $node->getAttribute('inlist')) {
402    $inlist = { map { ($_ => 1) } split(/\s/, $listnames) }; }
403  my $entry = $$self{db}->lookup($key)
404    || $$self{db}->register($key, phrases => [@phrases], see_also => [], inlist => $inlist);
405  if (@seealso) {
406    $entry->pushNew('see_also', @seealso); }
407  else {
408    $entry->noteAssociation(referrers => $parent_id => ($node->getAttribute('style') || 'normal')); }
409  return; }
410
411# This handles glossaryentry or glossarydefinition
412sub glossaryentry_handler {
413  my ($self, $doc, $node, $tag, $parent_id) = @_;
414  my $id = $node->getAttribute('xml:id');
415  my $p;
416  my $lists = $node->getAttribute('inlist') ||
417    (($p = $doc->findnode('ancestor::ltx:glossarylist[@lists] | ancestor::ltx:glossary[@lists]', $node))
418    && $p->getAttribute('lists'))
419    || 'glossary';
420  my $key = $node->getAttribute('key');
421  # Get the actual phrases, and any see_also phrases (if any)
422  # Do these need ->cleanNode ???
423  my @phrases = $doc->findnodes('ltx:glossaryphrase', $node);
424  # Create an entry for EACH list (they could be distinct definitions)
425  foreach my $list (split(/\s+/, $lists)) {
426    my $gkey  = join(':', 'GLOSSARY', $list, $key);
427    my $entry = $$self{db}->lookup($gkey) || $$self{db}->register($gkey);
428    $entry->setValues(map { ('phrase:' . ($_->getAttribute('role') || 'label') => $_) } @phrases);
429    $entry->noteAssociation(referrers => $parent_id => ($node->getAttribute('style') || 'normal'));
430    $entry->setValues(id => $id) if $id; }
431
432  if ($id) {
433    $$self{db}->register("ID:$id", id => orNull($id), type => orNull($tag), parent => orNull($parent_id),
434      labels   => orNull($self->noteLabels($node)),
435      location => orNull($doc->siteRelativeDestination),
436      pageid   => orNull($self->pageID($doc)),
437      fragid   => orNull($self->inPageID($doc, $node))); }
438  # Scan content, since could contain other interesting stuff...
439  $self->scanChildren($doc, $node, $id || $parent_id);
440  return; }
441
442# Note this bit of perversity:
443#  <ltx:bibentry> is a semantic bibliographic entry,
444#     as generated from a BibTeX file.
445#  <ltx:bibitem> is a formatted bibliographic entry,
446#     as generated from an explicit thebibliography environment (eg. manually, or in a .bbl),
447#     or as formatted from a <ltx:bibentry> by MakeBibliography.
448# For a bibitem, we'll store the bibliographic metadata in the DB, keyed by the ID of the item.
449sub bibitem_handler {
450  my ($self, $doc, $node, $tag, $parent_id) = @_;
451  my $id = $node->getAttribute('xml:id');
452  if ($id) {
453    # NOTE: We didn't downcase the key when we created the bib file
454    # BUT, we're going to index it in the ObjectDB by the downcased name!!!
455    my $key = $node->getAttribute('key');
456    $key = lc($key) if $key;
457    my $bib = $doc->findnode('ancestor-or-self::ltx:bibliography', $node);
458    # Probably should only be one list, but just in case?
459    my @lists = split(/\s+/, ($bib && $bib->getAttribute('lists')) || 'bibliography');
460    if ($key) {
461      foreach my $list (@lists) {    # BIBLABEL is for the reference to a biblio. item/entry
462        $$self{db}->register("BIBLABEL:$list:$key", id => orNull($id)); } }
463    # The actual bibliographic data is recorded keyed by the xml:id of the bibitem!
464    # Do these need ->cleanNode ???
465    $$self{db}->register("ID:$id", id => orNull($id), type => orNull($tag), parent => orNull($parent_id), bibkey => orNull($key),
466      location    => orNull($doc->siteRelativeDestination),
467      pageid      => orNull($self->pageID($doc)),
468      fragid      => orNull($self->inPageID($doc, $node)),
469      authors     => orNull($doc->findnode('ltx:tags/ltx:tag[@role="authors"]',     $node)),
470      fullauthors => orNull($doc->findnode('ltx:tags/ltx:tag[@role="fullauthors"]', $node)),
471      year        => orNull($doc->findnode('ltx:tags/ltx:tag[@role="year"]',        $node)),
472      number      => orNull($doc->findnode('ltx:tags/ltx:tag[@role="number"]',      $node)),
473      refnum      => orNull($doc->findnode('ltx:tags/ltx:tag[@role="refnum"]',      $node)),
474      title       => orNull($doc->findnode('ltx:tags/ltx:tag[@role="title"]',       $node)),
475      keytag      => orNull($doc->findnode('ltx:tags/ltx:tag[@role="key"]',         $node)),
476      typetag     => orNull($doc->findnode('ltx:tags/ltx:tag[@role="bibtype"]',     $node))); }
477  $self->scanChildren($doc, $node, $id || $parent_id);
478  return; }
479
480# For a bibentry, we'll only store the citation key, so we know it's there.
481sub bibentry_handler {
482  my ($self, $doc, $node, $tag, $parent_id) = @_;
483  # The actual bibliographic data is recorded keyed by the xml:id of the bibitem
484  # AFTER the bibentry has been formatted into a bibitem by MakeBibliography!
485  # So, there's really nothing to do now.
486  ## HOWEVER; this ultimately requires formatting the bibliography twice (for complex sites).
487  ## This needs to be reworked!
488  return; }
489
490sub declare_handler {
491  my ($self, $doc, $node, $tag, $parent_id) = @_;
492  # See preprocess_symbols for the extraction of the "defined" symbol (if any)
493  # Also recognize marks for definition, notation...
494  my $type    = $node->getAttribute('type');
495  my $sort    = $node->getAttribute('sortkey');
496  my $decl_id = $node->getAttribute('xml:id');
497  my $term = $self->cleanNode($doc, $doc->findnode('child::ltx:tags/ltx:tag[@role="term"]', $node));
498  my $description = $self->cleanNode($doc, $doc->findnode('child::ltx:text', $node));
499  my $definiens   = $node->getAttribute('definiens');
500  if (defined $type && ($type eq 'definition')) {
501    if ((!defined $definiens) && (defined $term)) {
502      # Extract the definiens from the term nade
503      my (@syms) = $doc->findnodes('descendant-or-self::ltx:XMTok[@meaning]', $term);
504      # We're probably not defining a relation, so put non-relations first.
505      @syms = ((grep { ($_->getAttribute('role') || '') ne 'RELOP'; } @syms), @syms);
506      # HACK; remove apparent definitions to lists
507      # [these will have to be handled much more intentionally]
508      @syms      = grep { $_->getAttribute('meaning') !~ /^delimited-/ } @syms;
509      $definiens = $syms[0] && $syms[0]->getAttribute('meaning'); }
510    if (defined $definiens) {
511      $$self{db}->register("DECLARATION:global:$definiens",
512        $self->addCommon($doc, $node, $tag, $parent_id),
513        description => $description); } }
514  elsif ((!$type) && $parent_id) {   # No type? Assume local definition. (or should be explicit scope?
515    if ($decl_id && ($description || $doc->findnode('ltx:tags/ltx:tag', $node))) {
516      $$self{db}->register("DECLARATION:local:$decl_id",
517        $self->addCommon($doc, $node, $tag, $parent_id),
518        description => $description); } }
519
520  if ($sort) {                       # It only goes into Notation tables/indices if a sortkey.
521    $$self{db}->register("NOTATION:" . ($definiens || $decl_id || $sort),
522      $self->addCommon($doc, $node, $tag, $parent_id),
523      sortkey => $sort, description => $description); }
524  # No real benefit to scan the contents? (and makes it SLOW)
525  #  $self->default_handler($doc,$node,$tag,$parent_id);
526  return; }
527
528# I'm thinking we shouldn't acknowledge navigation data at all?
529sub navigation_handler {
530  my ($self, $doc, $node, $tag, $parent_id) = @_;
531  return; }
532
533# RDF should be recorded with its "about" designation, or its immediate parent
534sub rdf_handler {
535  my ($self, $doc, $node, $tag, $parent_id) = @_;
536  my $id = $node->getAttribute('about');
537  if (!($id && ($id =~ s/^#//))) {
538    $id = $parent_id; }
539  my $property = $node->getAttribute('property');
540  my $value    = $node->getAttribute('resource') || $node->getAttribute('content');
541  return unless ($property && $value);
542  $$self{db}->register("ID:$id", $property => orNull($value));
543  return; }
544
545# I'm thinking we shouldn't acknowledge rawhtml data at all?
546sub rawhtml_handler {
547  my ($self, $doc, $node, $tag, $parent_id) = @_;
548  return; }
549
550sub orNull {
551  return (grep { defined } @_) ? @_ : undef; }
552
553# ================================================================================
5541;
555