1# /=====================================================================\ # 2# | LaTeXML::Post::Scan | # 3# | Scan for ID's etc | # 4# |=====================================================================| # 5# | Part of LaTeXML: | # 6# | Public domain software, produced as part of work done by the | # 7# | United States Government & not subject to copyright in the US. | # 8# |---------------------------------------------------------------------| # 9# | Bruce Miller <bruce.miller@nist.gov> #_# | # 10# | http://dlmf.nist.gov/LaTeXML/ (o o) | # 11# \=========================================================ooo==U==ooo=/ # 12package LaTeXML::Post::Scan; 13use strict; 14use warnings; 15use LaTeXML::Util::Pathname; 16use LaTeXML::Common::XML; 17use LaTeXML::Post; 18use base qw(LaTeXML::Post::Processor); 19 20# NOTE: This module is one that probably needs a lot of customizability. 21sub new { 22 my ($class, %options) = @_; 23 my $self = $class->SUPER::new(%options); 24 $$self{db} = $options{db}; 25 $$self{handlers} = {}; 26 $self->registerHandler('ltx:document' => \§ion_handler); 27 $self->registerHandler('ltx:part' => \§ion_handler); 28 $self->registerHandler('ltx:chapter' => \§ion_handler); 29 $self->registerHandler('ltx:section' => \§ion_handler); 30 $self->registerHandler('ltx:appendix' => \§ion_handler); 31 $self->registerHandler('ltx:subsection' => \§ion_handler); 32 $self->registerHandler('ltx:subsubsection' => \§ion_handler); 33 $self->registerHandler('ltx:paragraph' => \§ion_handler); 34 $self->registerHandler('ltx:subparagraph' => \§ion_handler); 35 $self->registerHandler('ltx:bibliography' => \§ion_handler); 36 $self->registerHandler('ltx:index' => \§ion_handler); 37 $self->registerHandler('ltx:glossary' => \§ion_handler); 38 39 $self->registerHandler('ltx:table' => \&captioned_handler); 40 $self->registerHandler('ltx:figure' => \&captioned_handler); 41 $self->registerHandler('ltx:float' => \&captioned_handler); 42 $self->registerHandler('ltx:listing' => \&captioned_handler); 43 $self->registerHandler('ltx:theorem' => \§ion_handler); 44 $self->registerHandler('ltx:proof' => \§ion_handler); 45 46 $self->registerHandler('ltx:equation' => \&labelled_handler); 47 $self->registerHandler('ltx:equationgroup' => \&labelled_handler); 48 $self->registerHandler('ltx:item' => \&labelled_handler); 49 $self->registerHandler('ltx:anchor' => \&anchor_handler); 50 $self->registerHandler('ltx:note' => \¬e_handler); 51 52 $self->registerHandler('ltx:bibitem' => \&bibitem_handler); 53 $self->registerHandler('ltx:bibentry' => \&bibentry_handler); 54 $self->registerHandler('ltx:indexmark' => \&indexmark_handler); 55 $self->registerHandler('ltx:glossaryentry' => \&glossaryentry_handler); 56 $self->registerHandler('ltx:glossarydefinition' => \&glossaryentry_handler); 57 $self->registerHandler('ltx:ref' => \&ref_handler); 58 $self->registerHandler('ltx:bibref' => \&bibref_handler); 59 60 $self->registerHandler('ltx:navigation' => \&navigation_handler); 61 $self->registerHandler('ltx:rdf' => \&rdf_handler); 62 $self->registerHandler('ltx:declare' => \&declare_handler); 63 64 $self->registerHandler('ltx:rawhtml' => \&rawhtml_handler); 65 66 return $self; } 67 68sub registerHandler { 69 my ($self, $tag, $handler) = @_; 70 $$self{handlers}{$tag} = $handler; 71 return; } 72 73sub process { 74 my ($self, $doc, $root) = @_; 75 # I think we really need an ID here to establish the root node in the DB, 76 # even if the document didn't have one originally. 77 # And for the common case of a single docucment, we'd like to be silent about it, 78 # UNLESS there seem to be multiple documents which would lead to a conflict. 79 my $id = $root->getAttribute('xml:id'); 80 if (!defined $id) { 81 $id = "Document"; 82 if (my $preventry = $$self{db}->lookup("ID:$id")) { 83 if (my $loc = $doc->siteRelativeDestination) { 84 my $prevloc = $preventry->getValue('location'); 85 if ((defined $prevloc) && ($loc ne $prevloc)) { 86 Warn('unexpected', 'location', undef, 87 "Using default ID='$id', " 88 . "but there's an apparent conflict with location '$loc' and previous '$prevloc'"); } } } 89 $root->setAttribute('xml:id' => $id); } 90 91 # By default, 1st document processed is considered the root of the site 92 my $siteentry = $$self{db}->lookup('SITE_ROOT'); 93 if (!$siteentry) { 94 $siteentry = $$self{db}->register('SITE_ROOT', id => $id); } 95 my $siteid = $siteentry->getValue('id'); 96 97 $self->scan($doc, $root, $$doc{parent_id}); 98 99 # Set up interconnections on multidocument site. 100 $$self{db}->register("DOCUMENT:" . ($doc->siteRelativeDestination || ''), id => $id); 101 102 # Question: If (on multidoc sites) a doc contains a single node (say ltx:chapter) 103 # might it make sense to treat the doc as ONLY that node? 104 # Alternative: May be necessary to extract title from that child? 105 106 # Find a plausible parent doc, unless this is the root, or already has one 107 # Either by relative id's, destination location, or default to the site itself. 108 my $entry = $$self{db}->lookup("ID:$id"); 109 if (($id ne $siteid) && !$entry->getValue('parent')) { 110 my $parent_id; 111 if (!$parent_id) { # Look for parent assuming it's id is component of $id 112 my $upid = $id; 113 while ($upid =~ s/\.[^\.]+$//) { 114 if ($$self{db}->lookup("ID:$upid")) { 115 $parent_id = $upid; last; } } } 116 if (!$parent_id) { # Look for parent as index.xml in a containing directory. 117 my $loc = $entry->getValue('location'); 118 my $dir = $loc; 119 while (($dir) = pathname_split($dir)) { 120 if (my $pentry = $$self{db}->lookup("DOCUMENT:" . pathname_concat($dir, 'index.xml'))) { 121 my $pid = $pentry->getValue('id'); 122 if ($pid && ($pid ne $id)) { 123 $parent_id = $pid; last; } } } } 124 if (!$parent_id) { # Else default to the id of the site itself. 125 $parent_id = $siteid; } 126 if ($parent_id && ($parent_id ne $id)) { 127 $entry->setValues(parent => $parent_id); 128 # Children are added in the order that they were scanned 129 $self->addAsChild($id, $parent_id); } 130 else { 131 Info('expected', 'parent', undef, "No parent document found for '$id'"); } } 132 NoteLog("Scan: DBStatus: " . $$self{db}->status); 133 return $doc; } 134 135sub scan { 136 my ($self, $doc, $node, $parent_id) = @_; 137 no warnings 'recursion'; 138 my $tag = $doc->getQName($node); 139 my $handler = $$self{handlers}{$tag} || \&default_handler; 140 &$handler($self, $doc, $node, $tag, $parent_id); 141 return; } 142 143sub scanChildren { 144 my ($self, $doc, $node, $parent_id) = @_; 145 no warnings 'recursion'; 146 foreach my $child ($node->childNodes) { 147 if ($child->nodeType == XML_ELEMENT_NODE) { 148 $self->scan($doc, $child, $parent_id); } } 149 return; } 150 151sub addAsChild { 152 my ($self, $id, $parent_id) = @_; 153 # Find the ancestor that maintains a children list 154 while (my $parent = $parent_id && $$self{db}->lookup("ID:$parent_id")) { 155 if ($parent->hasValue('children')) { 156 $parent->pushNew('children', $id); 157 last; } 158 else { 159 $parent_id = $parent->getValue('parent'); } } 160 return; } 161 162sub pageID { 163 my ($self, $doc) = @_; 164 return $doc->getDocumentElement->getAttribute('xml:id'); } 165 166# Compute a "Fragment ID", ie. an ID based on the given ID, 167# but which is potentially shortened so that it need only be 168# unique within the given page. 169sub inPageID { 170 my ($self, $doc, $node) = @_; 171 my $id = $node->getAttribute('xml:id'); 172 my $baseid = $doc->getDocumentElement->getAttribute('xml:id') || ''; 173 # And we're using label-based ids in the target document... 174 if ($$self{labelids}) { 175 if (my $labels = $node->getAttribute('labels')) { 176 my ($l) = split(' ', $labels); 177 $l =~ s/^LABEL://; 178 $id = $l; 179 if (my $baselabels = $doc->getDocumentElement->getAttribute('labels')) { 180 my ($bl) = split(' ', $baselabels); 181 $bl =~ s/^LABEL://; 182 $baseid = $bl; } } } 183 if (!$id) { 184 return $id; } 185 elsif ($baseid eq $id) { 186 return; } 187 elsif ($baseid && ($id =~ /^\Q$baseid\E\.(.*)$/)) { 188 return $1; } 189 elsif ($$doc{split_from_id} && ($id =~ /^\Q$$doc{split_from_id}\E\.(.*)$/)) { 190 return $1; } 191 else { 192 return $id; } } 193 194sub noteLabels { 195 my ($self, $node) = @_; 196 if (my $id = $node->getAttribute('xml:id')) { 197 if (my $labels = $node->getAttribute('labels')) { 198 my @labels = split(' ', $node->getAttribute('labels')); 199 foreach my $label (@labels) { 200 $$self{db}->register($label, id => orNull($id)); } 201 return [@labels]; } } 202 return; } 203 204# Clean up a node before insertion into database. 205sub cleanNode { 206 my ($self, $doc, $node) = @_; 207 return $node unless $node; 208 # Clone the node, and get the ID's unique (at least) within the originating document 209 my $cleaned = $doc->cloneNode($node); 210 # Remove indexmark (anything else ?) 211 map { $_->parentNode->removeChild($_) } $doc->findnodes('.//ltx:indexmark', $cleaned); 212 return $cleaned; } 213 214# Assumes $node has been cloned, if needed. 215# Set to something smallish (eg. 6) to forcibly truncate toctitle/toccaption 216our $TOCTEXT_MAX_LENGTH = undef; 217 218sub truncateNode { 219 my ($self, $doc, $node) = @_; 220 return $node if !$node || !defined $TOCTEXT_MAX_LENGTH; 221 my @children = $node->childNodes; 222 my $n = $TOCTEXT_MAX_LENGTH; 223 my $trunc = 0; 224 while ($n && @children) { 225 my $c = shift(@children); 226 if ($c->nodeType == XML_TEXT_NODE) { 227 my $s = $c->textContent; 228 my @w = split(/\s/, $s); 229 if (scalar(@w) > $n) { 230 $c->setData(join(' ', @w[0 .. $n])); 231 $trunc = 1; $n = 0; } 232 else { 233 $n--; } } 234 else { 235 $n--; } } 236 if ($trunc || (scalar(@children) > 1)) { 237 map { $node->removeChild($_) } @children; # Remove any remaining children. 238 $node->appendText("\x{2026}"); } 239 return $node; } 240 241sub addCommon { 242 my ($self, $doc, $node, $tag, $parent_id) = @_; 243 my $id = $node->getAttribute('xml:id'); 244 my $inlist; 245 if (my $listnames = $node->getAttribute('inlist')) { 246 $inlist = { map { ($_ => 1) } split(/\s/, $listnames) }; } 247 my %props = ( 248 id => orNull($id), 249 type => orNull($tag), 250 parent => orNull($parent_id), 251 labels => orNull($self->noteLabels($node)), 252 location => orNull($doc->siteRelativeDestination), 253 pageid => orNull($self->pageID($doc)), 254 fragid => orNull($self->inPageID($doc, $node)), 255 inlist => $inlist, 256 ); 257 # Figure out sane, safe naming? 258 foreach my $tagnode ($doc->findnodes('ltx:tags/ltx:tag', $node)) { 259 my $key; 260 if (my $role = $tagnode->getAttribute('role')) { 261 if ($role =~ /.*refnum$/) { 262 $key = $role; } 263 else { 264 $key = 'tag:' . $role; } } 265 else { 266 $key = 'frefnum'; } 267 ### $key = 'refnum'; } # ??? 268 $props{$key} = $self->cleanNode($doc, $tagnode); } 269 return %props; } 270 271sub default_handler { 272 my ($self, $doc, $node, $tag, $parent_id) = @_; 273 no warnings 'recursion'; 274 my $id = $node->getAttribute('xml:id'); 275 if ($id) { 276 $$self{db}->register("ID:$id", 277 $self->addCommon($doc, $node, $tag, $parent_id)); 278 $self->addAsChild($id, $parent_id); } 279 $self->scanChildren($doc, $node, $id || $parent_id); 280 return; } 281 282sub section_handler { 283 my ($self, $doc, $node, $tag, $parent_id) = @_; 284 my $id = $node->getAttribute('xml:id'); 285 if ($id) { 286 $$self{db}->register("ID:$id", 287 $self->addCommon($doc, $node, $tag, $parent_id), 288 primary => 1, 289 title => orNull($self->cleanNode($doc, $doc->findnode('ltx:title', $node))), 290 toctitle => orNull($self->cleanNode($doc, $doc->findnode('ltx:toctitle', $node))), 291 children => [], 292 stub => orNull($node->getAttribute('stub'))); 293 $self->addAsChild($id, $parent_id); } 294 $self->scanChildren($doc, $node, $id || $parent_id); 295 return; } 296 297sub captioned_handler { 298 my ($self, $doc, $node, $tag, $parent_id) = @_; 299 my $id = $node->getAttribute('xml:id'); 300 if ($id) { 301 # We're actually trying to find the shallowest caption 302 # Not one nested in another figure/table/float/whoknowswhat ! 303 my ($caption) = ($doc->findnode('child::ltx:caption', $node), 304 $doc->findnode('descendant::ltx:caption', $node)); 305 my ($toccaption) = ($doc->findnode('child::ltx:toccaption', $node), 306 $doc->findnode('descendant::ltx:toccaption', $node)); 307 $$self{db}->register("ID:$id", 308 $self->addCommon($doc, $node, $tag, $parent_id), 309 role => orNull($node->getAttribute('role')), 310 caption => orNull($self->cleanNode($doc, $caption)), 311### toccaption => orNull($self->cleanNode($doc, 312### $doc->findnode('descendant::ltx:toccaption', $node)))); 313 toccaption => orNull($self->truncateNode($doc, $self->cleanNode($doc, $toccaption)))); 314 $self->addAsChild($id, $parent_id); } 315 $self->scanChildren($doc, $node, $id || $parent_id); 316 return; } 317 318sub labelled_handler { 319 my ($self, $doc, $node, $tag, $parent_id) = @_; 320 my $id = $node->getAttribute('xml:id'); 321 if ($id) { 322 $$self{db}->register("ID:$id", 323 $self->addCommon($doc, $node, $tag, $parent_id), 324 role => orNull($node->getAttribute('role')), 325 ); 326 $self->addAsChild($id, $parent_id); } 327 $self->scanChildren($doc, $node, $id || $parent_id); 328 return; } 329 330# Maybe with some careful redesign of the schema, this would fall under labelled? 331sub note_handler { 332 my ($self, $doc, $node, $tag, $parent_id) = @_; 333 my $id = $node->getAttribute('xml:id'); 334 if ($id) { 335 my $note = $self->cleanNode($doc, $node); 336 map { $note->removeChild($_) } $doc->findnodes('.//ltx:tags', $note); 337 $$self{db}->register("ID:$id", 338 $self->addCommon($doc, $node, $tag, $parent_id), 339 role => orNull($node->getAttribute('role')), 340 note => $note, 341 ); 342 $self->addAsChild($id, $parent_id); } 343 $self->scanChildren($doc, $node, $id || $parent_id); 344 return; } 345 346sub anchor_handler { 347 my ($self, $doc, $node, $tag, $parent_id) = @_; 348 my $id = $node->getAttribute('xml:id'); 349 if ($id) { 350 $$self{db}->register("ID:$id", 351 $self->addCommon($doc, $node, $tag, $parent_id), 352 title => orNull($self->cleanNode($doc, $node)), 353 ); 354 $self->addAsChild($id, $parent_id); } 355 $self->scanChildren($doc, $node, $id || $parent_id); 356 return; } 357 358sub ref_handler { 359 my ($self, $doc, $node, $tag, $parent_id) = @_; 360 my $id = $node->getAttribute('xml:id'); 361 if (my $label = $node->getAttribute('labelref')) { # Only record refs of labels 362 # Don't scan refs from TOC or 'cited' bibblock 363 if (!$doc->findnodes('ancestor::ltx:tocentry' 364 . '| ancestor::ltx:bibblock[contains(@class,"ltx_bib_cited")]', 365 $node)) { 366 my $entry = $$self{db}->register($label); 367 $entry->noteAssociation(referrers => $parent_id); } } 368 # Usually, a ref won't YET have content; but if it does, we should scan it. 369 $self->default_handler($doc, $node, $tag, $parent_id); 370 return; } 371 372sub bibref_handler { 373 my ($self, $doc, $node, $tag, $parent_id) = @_; 374 # Don't scan refs from 'cited' bibblock 375 if (!$doc->findnodes('ancestor::ltx:bibblock[contains(@class,"ltx_bib_cited")]', $node)) { 376 if (my $keys = $node->getAttribute('bibrefs')) { 377 # Citation specifies main 'bibliography', as well as any specific others (eg. per chapter) 378 my $l = $node->getAttribute('inlist'); 379 my @lists = (($l ? split(/\s+/, $l) : ()), 'bibliography'); 380 foreach my $bibkey (split(',', $keys)) { 381 if ($bibkey) { 382 $bibkey = lc($bibkey); # NOW we downcase! 383 foreach my $list (@lists) { # Records a *reference* to a bibkey! (for each list) 384 my $entry = $$self{db}->register("BIBLABEL:$list:$bibkey"); 385 $entry->noteAssociation(referrers => $parent_id); } } } } } 386 # Usually, a bibref will have, at most, some ltx:bibphrase's; should be scanned. 387 $self->default_handler($doc, $node, $tag, $parent_id); 388 return; } 389 390# Note that index entries get stored in simple form; just the terms & location. 391# They will be turned into a tree, sorted, possibly permuted, whatever, by MakeIndex. 392# [the only content of indexmark should be un-marked up(?) don't recurse] 393sub indexmark_handler { 394 my ($self, $doc, $node, $tag, $parent_id) = @_; 395 # Get the actual phrases, and any see_also phrases (if any) 396 # Do these need ->cleanNode ??? 397 my @phrases = $doc->findnodes('ltx:indexphrase', $node); 398 my @seealso = $doc->findnodes('ltx:indexsee', $node); 399 my $key = join(':', 'INDEX', map { $_->getAttribute('key') } @phrases); 400 my $inlist; 401 if (my $listnames = $node->getAttribute('inlist')) { 402 $inlist = { map { ($_ => 1) } split(/\s/, $listnames) }; } 403 my $entry = $$self{db}->lookup($key) 404 || $$self{db}->register($key, phrases => [@phrases], see_also => [], inlist => $inlist); 405 if (@seealso) { 406 $entry->pushNew('see_also', @seealso); } 407 else { 408 $entry->noteAssociation(referrers => $parent_id => ($node->getAttribute('style') || 'normal')); } 409 return; } 410 411# This handles glossaryentry or glossarydefinition 412sub glossaryentry_handler { 413 my ($self, $doc, $node, $tag, $parent_id) = @_; 414 my $id = $node->getAttribute('xml:id'); 415 my $p; 416 my $lists = $node->getAttribute('inlist') || 417 (($p = $doc->findnode('ancestor::ltx:glossarylist[@lists] | ancestor::ltx:glossary[@lists]', $node)) 418 && $p->getAttribute('lists')) 419 || 'glossary'; 420 my $key = $node->getAttribute('key'); 421 # Get the actual phrases, and any see_also phrases (if any) 422 # Do these need ->cleanNode ??? 423 my @phrases = $doc->findnodes('ltx:glossaryphrase', $node); 424 # Create an entry for EACH list (they could be distinct definitions) 425 foreach my $list (split(/\s+/, $lists)) { 426 my $gkey = join(':', 'GLOSSARY', $list, $key); 427 my $entry = $$self{db}->lookup($gkey) || $$self{db}->register($gkey); 428 $entry->setValues(map { ('phrase:' . ($_->getAttribute('role') || 'label') => $_) } @phrases); 429 $entry->noteAssociation(referrers => $parent_id => ($node->getAttribute('style') || 'normal')); 430 $entry->setValues(id => $id) if $id; } 431 432 if ($id) { 433 $$self{db}->register("ID:$id", id => orNull($id), type => orNull($tag), parent => orNull($parent_id), 434 labels => orNull($self->noteLabels($node)), 435 location => orNull($doc->siteRelativeDestination), 436 pageid => orNull($self->pageID($doc)), 437 fragid => orNull($self->inPageID($doc, $node))); } 438 # Scan content, since could contain other interesting stuff... 439 $self->scanChildren($doc, $node, $id || $parent_id); 440 return; } 441 442# Note this bit of perversity: 443# <ltx:bibentry> is a semantic bibliographic entry, 444# as generated from a BibTeX file. 445# <ltx:bibitem> is a formatted bibliographic entry, 446# as generated from an explicit thebibliography environment (eg. manually, or in a .bbl), 447# or as formatted from a <ltx:bibentry> by MakeBibliography. 448# For a bibitem, we'll store the bibliographic metadata in the DB, keyed by the ID of the item. 449sub bibitem_handler { 450 my ($self, $doc, $node, $tag, $parent_id) = @_; 451 my $id = $node->getAttribute('xml:id'); 452 if ($id) { 453 # NOTE: We didn't downcase the key when we created the bib file 454 # BUT, we're going to index it in the ObjectDB by the downcased name!!! 455 my $key = $node->getAttribute('key'); 456 $key = lc($key) if $key; 457 my $bib = $doc->findnode('ancestor-or-self::ltx:bibliography', $node); 458 # Probably should only be one list, but just in case? 459 my @lists = split(/\s+/, ($bib && $bib->getAttribute('lists')) || 'bibliography'); 460 if ($key) { 461 foreach my $list (@lists) { # BIBLABEL is for the reference to a biblio. item/entry 462 $$self{db}->register("BIBLABEL:$list:$key", id => orNull($id)); } } 463 # The actual bibliographic data is recorded keyed by the xml:id of the bibitem! 464 # Do these need ->cleanNode ??? 465 $$self{db}->register("ID:$id", id => orNull($id), type => orNull($tag), parent => orNull($parent_id), bibkey => orNull($key), 466 location => orNull($doc->siteRelativeDestination), 467 pageid => orNull($self->pageID($doc)), 468 fragid => orNull($self->inPageID($doc, $node)), 469 authors => orNull($doc->findnode('ltx:tags/ltx:tag[@role="authors"]', $node)), 470 fullauthors => orNull($doc->findnode('ltx:tags/ltx:tag[@role="fullauthors"]', $node)), 471 year => orNull($doc->findnode('ltx:tags/ltx:tag[@role="year"]', $node)), 472 number => orNull($doc->findnode('ltx:tags/ltx:tag[@role="number"]', $node)), 473 refnum => orNull($doc->findnode('ltx:tags/ltx:tag[@role="refnum"]', $node)), 474 title => orNull($doc->findnode('ltx:tags/ltx:tag[@role="title"]', $node)), 475 keytag => orNull($doc->findnode('ltx:tags/ltx:tag[@role="key"]', $node)), 476 typetag => orNull($doc->findnode('ltx:tags/ltx:tag[@role="bibtype"]', $node))); } 477 $self->scanChildren($doc, $node, $id || $parent_id); 478 return; } 479 480# For a bibentry, we'll only store the citation key, so we know it's there. 481sub bibentry_handler { 482 my ($self, $doc, $node, $tag, $parent_id) = @_; 483 # The actual bibliographic data is recorded keyed by the xml:id of the bibitem 484 # AFTER the bibentry has been formatted into a bibitem by MakeBibliography! 485 # So, there's really nothing to do now. 486 ## HOWEVER; this ultimately requires formatting the bibliography twice (for complex sites). 487 ## This needs to be reworked! 488 return; } 489 490sub declare_handler { 491 my ($self, $doc, $node, $tag, $parent_id) = @_; 492 # See preprocess_symbols for the extraction of the "defined" symbol (if any) 493 # Also recognize marks for definition, notation... 494 my $type = $node->getAttribute('type'); 495 my $sort = $node->getAttribute('sortkey'); 496 my $decl_id = $node->getAttribute('xml:id'); 497 my $term = $self->cleanNode($doc, $doc->findnode('child::ltx:tags/ltx:tag[@role="term"]', $node)); 498 my $description = $self->cleanNode($doc, $doc->findnode('child::ltx:text', $node)); 499 my $definiens = $node->getAttribute('definiens'); 500 if (defined $type && ($type eq 'definition')) { 501 if ((!defined $definiens) && (defined $term)) { 502 # Extract the definiens from the term nade 503 my (@syms) = $doc->findnodes('descendant-or-self::ltx:XMTok[@meaning]', $term); 504 # We're probably not defining a relation, so put non-relations first. 505 @syms = ((grep { ($_->getAttribute('role') || '') ne 'RELOP'; } @syms), @syms); 506 # HACK; remove apparent definitions to lists 507 # [these will have to be handled much more intentionally] 508 @syms = grep { $_->getAttribute('meaning') !~ /^delimited-/ } @syms; 509 $definiens = $syms[0] && $syms[0]->getAttribute('meaning'); } 510 if (defined $definiens) { 511 $$self{db}->register("DECLARATION:global:$definiens", 512 $self->addCommon($doc, $node, $tag, $parent_id), 513 description => $description); } } 514 elsif ((!$type) && $parent_id) { # No type? Assume local definition. (or should be explicit scope? 515 if ($decl_id && ($description || $doc->findnode('ltx:tags/ltx:tag', $node))) { 516 $$self{db}->register("DECLARATION:local:$decl_id", 517 $self->addCommon($doc, $node, $tag, $parent_id), 518 description => $description); } } 519 520 if ($sort) { # It only goes into Notation tables/indices if a sortkey. 521 $$self{db}->register("NOTATION:" . ($definiens || $decl_id || $sort), 522 $self->addCommon($doc, $node, $tag, $parent_id), 523 sortkey => $sort, description => $description); } 524 # No real benefit to scan the contents? (and makes it SLOW) 525 # $self->default_handler($doc,$node,$tag,$parent_id); 526 return; } 527 528# I'm thinking we shouldn't acknowledge navigation data at all? 529sub navigation_handler { 530 my ($self, $doc, $node, $tag, $parent_id) = @_; 531 return; } 532 533# RDF should be recorded with its "about" designation, or its immediate parent 534sub rdf_handler { 535 my ($self, $doc, $node, $tag, $parent_id) = @_; 536 my $id = $node->getAttribute('about'); 537 if (!($id && ($id =~ s/^#//))) { 538 $id = $parent_id; } 539 my $property = $node->getAttribute('property'); 540 my $value = $node->getAttribute('resource') || $node->getAttribute('content'); 541 return unless ($property && $value); 542 $$self{db}->register("ID:$id", $property => orNull($value)); 543 return; } 544 545# I'm thinking we shouldn't acknowledge rawhtml data at all? 546sub rawhtml_handler { 547 my ($self, $doc, $node, $tag, $parent_id) = @_; 548 return; } 549 550sub orNull { 551 return (grep { defined } @_) ? @_ : undef; } 552 553# ================================================================================ 5541; 555