1# /=====================================================================\ #
2# |  LaTeXML::Post::MathML                                              | #
3# | MathML generator for LaTeXML                                        | #
4# |=====================================================================| #
5# | Part of LaTeXML:                                                    | #
6# |  Public domain software, produced as part of work done by the       | #
7# |  United States Government & not subject to copyright in the US.     | #
8# |---------------------------------------------------------------------| #
9# | Bruce Miller <bruce.miller@nist.gov>                        #_#     | #
10# | http://dlmf.nist.gov/LaTeXML/                              (o o)    | #
11# \=========================================================ooo==U==ooo=/ #
12
13package LaTeXML::Post::MathML;
14use strict;
15use warnings;
16use LaTeXML::Common::XML;
17use LaTeXML::Post;
18use List::Util qw(max);
19use base qw(LaTeXML::Post::MathProcessor);
20use base qw(Exporter);
21our @EXPORT = (
22  qw( &DefMathML ),
23  qw( &pmml &pmml_scriptsize &pmml_smaller
24    &pmml_mi &pmml_mo &pmml_mn &pmml_bigop
25    &pmml_punctuate &pmml_parenthesize
26    &pmml_infix &pmml_script &pmml_summation),
27  qw( &cmml &cmml_share &cmml_shared &cmml_leaf
28    &cmml_or_compose &cmml_synth_not &cmml_synth_complement),
29);
30require LaTeXML::Post::MathML::Presentation;
31require LaTeXML::Post::MathML::Content;
32
33my $mmlURI = "http://www.w3.org/1998/Math/MathML";    # CONSTANT
34
35# ================================================================================
36# LaTeXML::MathML  Base-level Math Formatter for LaTeXML's Parsed Math.
37#   Cooperate with the parsed math structure generated by LaTeXML::Math and
38# convert into presentation & content MathML.
39# (See LaTeXML::Post::MathML::Presentation, LaTeXML::Post::MathML::Content)
40# ================================================================================
41# Some clarity to work out:
42#  We're trying to convert either parsed or unparsed math (sometimes intertwined).
43# How clearly do these have to be separated?
44# at least, sub/superscripts do not attach to anything meaningful.
45# ================================================================================
46
47#================================================================================
48# Useful switches when creating a converter with special needs.
49#  plane1  : use Unicode plane 1 characters for math letters
50#  hackplane1 : use a hybrid of plane1 for script and fraktur,
51#               otherwise regular chars with mathvariant
52#  nestmath : allow m:math to be nested within m:mtext
53#             otherwise flatten to m:mrow sequence of m:mtext and other math bits.
54#  usemfenced : whether to use mfenced instead of mrow
55#          this would be desired for MathML-CSS profile,
56#          but (I think) mrow usually gets better handling in firefox,..?
57#================================================================================
58
59#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
60# Top level
61#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
62sub preprocess {
63  my ($self, $doc, @nodes) = @_;
64  # Set up rational, modern, defaults.
65  $$self{hackplane1} = 0 unless $$self{hackplane1};
66  $$self{plane1}     = 1 if $$self{hackplane1} || !defined $$self{plane1};
67  $$self{nestmath}   = 0 unless $$self{nestmath};
68  $doc->adjust_latexml_doctype('MathML');    # Add MathML if LaTeXML dtd.
69  $doc->addNamespace($mmlURI, 'm');
70  return; }
71
72# Works for pmml, cmml
73sub outerWrapper {
74  my ($self, $doc, $xmath, $mml) = @_;
75  my $math = $xmath->parentNode;
76  my $mode = $math->getAttribute('mode') || 'inline';
77  my @img  = ();
78  if (my $src = $math->getAttribute('imagesrc')) {
79    my $depth = $math->getAttribute('imagedepth');
80    @img = (altimg => $src,
81      'altimg-width'  => $math->getAttribute('imagewidth') . 'px',
82      'altimg-height' => $math->getAttribute('imageheight') . 'px',
83      'altimg-valign' => ($depth ? -$depth . 'px' : undef)); }        # Note the sign!
84  my @rdfa = map { my $val = ($math->getAttribute($_) || $xmath->getAttribute($_)); $val ? ($_ => $val) : () }
85    qw(about resource property rel rev typeof datatype content);
86  my $wrapped = ['m:math', { display => ($mode eq 'display' ? 'block' : 'inline'),
87      class   => $math->getAttribute('class'),
88      alttext => $math->getAttribute('tex'),
89#### Handy for debugging math
90###      title => $math->getAttribute('text'),
91      @rdfa,
92      @img },
93    $mml];
94  # Associate the generated node with the source XMath node, but don't cross-reference
95  $self->associateNode($wrapped, $xmath, 1);
96  return $wrapped; }
97
98# Map mimetype to Official MathML encodings
99our %ENCODINGS = (
100  'application/mathml-presentation+xml' => 'MathML-Presentation',
101  'application/mathml-content+xml'      => 'MathML-Content',
102  'image/svg+xml'                       => 'SVG1.1',
103);
104
105sub rawIDSuffix {
106  return '.msvg'; }
107
108# This works for either pmml or cmml.
109sub combineParallel {
110  my ($self, $doc, $xmath, $primary, @secondaries) = @_;
111  my $id  = $xmath->getAttribute('fragid');
112  my @alt = ();
113  foreach my $secondary (@secondaries) {
114    my $mimetype = $$secondary{mimetype} || 'unknown';
115    my $encoding = $ENCODINGS{$mimetype} || $mimetype;
116    if ($mimetype =~ /^application\/mathml/) {    # Some flavor of MathML? simple case
117      push(@alt, ['m:annotation-xml', { encoding => $encoding },
118          $$secondary{xml}]); }
119    elsif (my $xml = $$secondary{xml}) {          # Other XML? may need wrapping.
120      push(@alt, ['m:annotation-xml', { encoding => $encoding },
121          $$secondary{processor}->outerWrapper($doc, $xmath, $xml)]); }
122    elsif (my $src = $$secondary{src}) {          # something referred to by a file? Image, maybe?
123      push(@alt, ['m:annotation', { encoding => $encoding, src => $src }]); }
124    elsif (my $string = $$secondary{string}) {    # simple string data?
125      push(@alt, ['m:annotation', { encoding => $encoding }, $string]); }
126    # anything else ignore?
127  }
128  return { processor => $self, mimetype => $$primary{mimetype},
129    xml => ['m:semantics', {}, $$primary{xml}, @alt] }; }
130
131# $self->convertNode($doc,$node);
132# will be handled by specific Presentation or Content MathML converters; See at END.
133
134# $self->translateNode($doc,$XMath,$style,$embedding)
135# returns the translation of the XMath node (but doesn't insert it)
136# $style will be either 'display' or 'text' (if relevant),
137# The result should be wrapped as necessary for the result to
138# be embedded within the tag $embedding.
139# Eg. for parallel markup.
140
141# See END for presentation, content and parallel versions.
142
143sub getQName {
144  my ($node) = @_;
145  return $LaTeXML::Post::DOCUMENT->getQName($node); }
146
147# Add a cross-reference linkage (eg. xref) onto $node to refer to the given $id.
148# (presumably $id is the id of a node created by another Math Postprocessor
149# from the same source XMath node that generated $node)
150sub addCrossref {
151  my ($self, $node, $id) = @_;
152  $node->setAttribute(xref => $id);
153  return; }
154
155#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
156# General translation utilities.
157#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
158
159sub realize {
160  my ($node, $branch) = @_;
161  return (ref $node) ? $LaTeXML::Post::DOCUMENT->realizeXMNode($node, $branch) : $node; }
162
163# For a node that is a (possibly embellished) operator,
164# find the underlying role.
165my %EMBELLISHING_ROLE = (    # CONSTANT
166  SUPERSCRIPTOP => 1, SUBSCRIPTOP => 1,
167  OVERACCENT => 1, UNDERACCENT => 1, MODIFIER => 1, MODIFIEROP => 1);
168
169sub getOperatorRole {
170  my ($node) = @_;
171  if (!$node) {
172    return; }
173  elsif (my $role = $node->getAttribute('role')) {
174    return $role; }
175  elsif (getQName($node) eq 'ltx:XMApp') {
176    my ($op, $base) = element_nodes($node);
177    return ($EMBELLISHING_ROLE{ $op->getAttribute('role') || '' }
178      ? getOperatorRole($base)
179      : undef); } }
180
181#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
182# Table of Translators for presentation|content
183#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
184# All translators take XMath XML::LibXML nodes as arguments,
185# and return an intermediate form (ie. array form) of MathML to be added.
186
187# DANGER!!! These accumulate all the DefMathML declarations.
188# They're fixed after the module has been loaded, so are Daemon Safe,
189# but probably should be going into (post) STATE, so that they are extensible.
190# IN FACT, I'm already taking baby-steps to export DefMathML (and needed helpers),
191# in order to assist these extensions, so that will bring up daemon issues pretty quick.
192our $MMLTable_P = {};
193our $MMLTable_C = {};
194
195sub DefMathML {
196  my ($key, $presentation, $content) = @_;
197  $$MMLTable_P{$key} = $presentation if $presentation;
198  $$MMLTable_C{$key} = $content      if $content;
199  return; }
200
201sub lookupPresenter {
202  my ($mode, $role, $name) = @_;
203  $name = '?' unless $name;
204  $role = '?' unless $role;
205  return $$MMLTable_P{"$mode:$role:$name"} || $$MMLTable_P{"$mode:?:$name"}
206    || $$MMLTable_P{"$mode:$role:?"} || $$MMLTable_P{"$mode:?:?"}; }
207
208sub lookupContent {
209  my ($mode, $role, $name) = @_;
210  # Content-first lookup. Idea:
211  # If we have a meaning/name provided, we can make a csymbol.
212  # 1. Sometimes we can make a role-specific adaptation to the symbol, so check that first
213  return $name ? (($role && $$MMLTable_C{"$mode:$role:$name"}) ||
214      # 2. Sometimes we want to make a name-specific adaptation, check that second
215      $$MMLTable_C{"$mode:?:$name"} ||
216      # 3. If no special code, but we have a name, use a generic handler for this element
217      $$MMLTable_C{"$mode:?:?"}) : (
218    # 4. If we do not have a name, check for a role-based handler
219    ($role && $$MMLTable_C{"$mode:$role:?"}) ||
220      # 5. Always use a default handler if nothing is known
221      $$MMLTable_C{"$mode:?:?"}); }
222
223#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
224# Various needed maps
225#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
226my %stylestep = (    # CONSTANT
227  display => 'text',         text         => 'script',
228  script  => 'scriptscript', scriptscript => 'scriptscript');
229my %stylesize = (    # CONSTANT
230  display => '100%', text         => '100%',
231  script  => '70%',  scriptscript => '50%');
232my %style_script_step = (    # CONSTANT
233  display => 'script',       text         => 'script',
234  script  => 'scriptscript', scriptscript => 'scriptscript');
235# Attributes for m:mstyle when changing between two mathstyles
236my %stylemap = (             # CONSTANT
237  display => { text => { displaystyle => 'false' },
238    script       => { displaystyle => 'false', scriptlevel => '+1' },
239    scriptscript => { displaystyle => 'false', scriptlevel => '+2' } },
240  text => { display => { displaystyle => 'true' },
241    script       => { scriptlevel => '+1' },
242    scriptscript => { scriptlevel => '+2' } },
243  script => { display => { displaystyle => 'true', scriptlevel => '-1' },
244    text         => { scriptlevel => '-1' },
245    scriptscript => { scriptlevel => '+1' } },
246  scriptscript => { display => { displaystyle => 'true', scriptlevel => '-2' },
247    text   => { scriptlevel => '-2' },
248    script => { scriptlevel => '-1' } });
249# Similar to above, but for use when there are no MathML structures used
250# that NEED displaystyle to be set; presumably only to set a fontsize context
251my %stylemap2 = (    # CONSTANT
252  display => { text => {},
253    script       => { scriptlevel => '+1' },
254    scriptscript => { scriptlevel => '+2' } },
255  text => { display => {},
256    script       => { scriptlevel => '+1' },
257    scriptscript => { scriptlevel => '+2' } },
258  script => { display => { displaystyle => 'true', scriptlevel => '-1' },
259    text         => { scriptlevel => '-1' },
260    scriptscript => { scriptlevel => '+1' } },
261  scriptscript => { display => { displaystyle => 'true', scriptlevel => '-2' },
262    text   => { scriptlevel => '-2' },
263    script => { scriptlevel => '-1' } });
264
265# Mappings between (normalized) internal fonts & sizes.
266# Default math font is roman|medium|upright.
267my %mathvariants = (    # CONSTANT
268  'upright'                 => 'normal',
269  'serif'                   => 'normal',
270  'medium'                  => 'normal',
271  'bold'                    => 'bold',
272  'italic'                  => 'italic',
273  'medium italic'           => 'italic',
274  'bold italic'             => 'bold-italic',
275  'doublestruck'            => 'double-struck',
276  'blackboard'              => 'double-struck',
277  'blackboard bold'         => 'double-struck',    # all collapse
278  'blackboard upright'      => 'double-struck',    # all collapse
279  'blackboard bold upright' => 'double-struck',    # all collapse
280  'fraktur'                 => 'fraktur',
281  'fraktur italic'          => 'fraktur',          # all collapse
282  'fraktur bold'            => 'bold-fraktur',
283  'script'                  => 'script',
284  'script italic'           => 'script',           # all collapse
285  'script bold'             => 'bold-script',
286  'caligraphic'      => 'script',              # all collapse; NOTE: In TeX caligraphic is NOT script!
287  'caligraphic bold' => 'bold-script',
288  'sansserif'        => 'sans-serif',
289  'sansserif bold'   => 'bold-sans-serif',
290  'sansserif italic' => 'sans-serif-italic',
291  'sansserif bold italic'  => 'sans-serif-bold-italic',
292  'typewriter'             => 'monospace',
293  'typewriter bold'        => 'monospace',
294  'typewriter italic'      => 'monospace',
295  'typewriter bold italic' => 'monospace',
296);
297
298# The font differences (from the containing context) have been deciphered
299# into font, size and color attributes.  The font should match
300# one of the above... (?)
301
302# Given a font string (joining the components)
303# reduce it to a "sane" font.  Note that MathML uses a single mathvariant
304# to name the font, and doesn't inherit font components like italic or bold.
305# Thus the font should be "complete", but also we can ignore components with
306#  default values like medium or upright (unless that is the only component).
307sub mathvariantForFont {
308  my ($font) = @_;
309  $font =~ s/slanted/italic/;                           # equivalent in math
310  $font =~ s/(?<!\w)serif// unless $font eq 'serif';    # Not needed (unless alone)
311  $font =~ s/(?<!^)upright//;                           # Not needed (unless 1st element)
312  $font =~ s/(?<!^)medium//;                            # Not needed (unless 1st element)
313  $font =~ s/^\s+//; $font =~ s/\s+$//;
314  my $variant;
315  return $variant if $variant = $mathvariants{$font};
316  #  $font =~ s/\sitalic//;          # try w/o italic ?
317  #  return $variant if $variant = $mathvariants{$font};
318  #  $font =~ s/\sbold//;          # try w/o bold ?
319  #  return $variant if $variant = $mathvariants{$font};
320  return 'normal'; }
321
322#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
323# Support functions for Presentation MathML
324#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
325
326sub pmml_top {
327  my ($self, $node, $style) = @_;
328  # These bindings reflect the style, font, size & color that we are displaying in.
329  # Ie. if you want to draw in that size & color, you'll get it automatically.
330  local $LaTeXML::MathML::STYLE = $style || 'text';
331  local $LaTeXML::MathML::FONT  = find_inherited_attribute($node, 'font');
332  #  $LaTeXML::MathML::FONT = undef
333  #    if $LaTeXML::MathML::FONT && !$mathvariants{$LaTeXML::MathML::FONT};    # verify sane font
334  local $LaTeXML::MathML::SIZE         = find_inherited_attribute($node, 'fontsize') || '100%';
335  local $LaTeXML::MathML::COLOR        = find_inherited_attribute($node, 'color');
336  local $LaTeXML::MathML::BGCOLOR      = find_inherited_attribute($node, 'backgroundcolor');
337  local $LaTeXML::MathML::OPACITY      = find_inherited_attribute($node, 'opacity');
338  local $LaTeXML::MathML::DESIRED_SIZE = $LaTeXML::MathML::SIZE;
339  my @result = map { pmml($_) } element_nodes($node);
340  return (scalar(@result) > 1 ? ['m:mrow', {}, @result] : $result[0]); }
341
342sub find_inherited_attribute {
343  my ($node, $attribute) = @_;
344  # Check for inherited style attributes, but stop at non-LaTeXML nodes
345  # [or at least be aware that the attribute may have totally different format or even meaning!]
346  while ($node && isElementNode($node) && ($node->namespaceURI eq 'http://dlmf.nist.gov/LaTeXML')) {
347    if (my $value = $node->getAttribute($attribute)) {
348      return $value; }
349    $node = $node->parentNode; }
350  return; }
351
352# Convert a node that will automatically be made smaller, due to its context,
353# such as in the numerator or denominator of a fraction.
354sub pmml_smaller {
355  my ($node) = @_;
356  local $LaTeXML::MathML::STYLE = $stylestep{$LaTeXML::MathML::STYLE};
357  local $LaTeXML::MathML::SIZE  = $stylesize{$LaTeXML::MathML::STYLE};
358  return pmml($node); }
359
360# Convert a node that will automatically be made scriptsize,
361# such as sub- or superscripts.
362sub pmml_scriptsize {
363  my ($script) = @_;
364  local $LaTeXML::MathML::STYLE = $style_script_step{$LaTeXML::MathML::STYLE};
365  local $LaTeXML::MathML::SIZE  = $stylesize{$LaTeXML::MathML::STYLE};
366  return ($script ? pmml($script) : ['m:none']); }
367
368sub pmml {
369  my ($node) = @_;
370  return unless $node;
371  # [since we follow split/scan, use the fragid, not xml:id! TO SOLVE LATER]
372  # Do the core conversion.
373  # Fetch the "real" node, if this is an XMRef to one; also use the OTHER's id!
374  my $refr;
375  if (getQName($node) eq 'ltx:XMRef') {
376    $refr = $node;
377    $node = realize($node); }
378  # Bind any other style information from the refering node or the current node
379  # so that any tokens synthesized from strings recover that style.
380  local $LaTeXML::MathML::DESIRED_SIZE = _getattr($refr, $node, 'fontsize') || $LaTeXML::MathML::DESIRED_SIZE;
381  local $LaTeXML::MathML::COLOR   = _getattr($refr, $node, 'color') || $LaTeXML::MathML::COLOR;
382  local $LaTeXML::MathML::BGCOLOR = _getattr($refr, $node, 'backgroundcolor')
383    || $LaTeXML::MathML::BGCOLOR;
384  local $LaTeXML::MathML::OPACITY = _getattr($refr, $node, 'opacity') || $LaTeXML::MathML::OPACITY;
385  my $result = pmml_internal($node);
386  # Let customization annotate the result.
387  # Now possibly wrap the result in a row, enclose, etc, if needed
388  my $e = _getattr($refr, $node, 'enclose');
389  # these should COMBINE!
390  my $l  = _getspace($refr, $node, 'lpadding');
391  my $r  = _getspace($refr, $node, 'rpadding');
392  my $cl = join(' ', grep { $_ } $refr && $refr->getAttribute('class'), $node->getAttribute('class'));
393  # Wrap in an enclose, if there's an enclose attribute (Ugh!)
394  $result = ['m:menclose', { notation => $e }, $result] if $e;
395  # Add spacing last; outside parens & enclosing (?)
396  if (!(((ref $result) eq 'ARRAY') && ($$result[0] eq 'm:mo'))  # mo will already have gotten spacing!
397    && ($r || $l)) {
398    # If only lpadding given, we'll try to find an inner m:mo to accept it (so it will take effect)
399    my $inner = $result;
400    while (($$inner[0] eq 'm:mrow') && $$inner[2] && ref $$inner[2]) {
401      $inner = $$inner[2]; }
402    if (!$r && $inner && ($$inner[0] eq 'm:mo')) {
403      my $ls = $l && max(0, 1.6 + $l);                          # must be \ge 0
404      $$inner[1]{lspace} = $ls . 'pt'; }                        # Found inner op: use simple lspace
405    else {                                                      # Else fall back to wrap with m:mpadded
406      my $w = ($l && $r ? $l + $r : ($l ? $l : $r));
407      $result = ['m:mpadded', { ($l ? (lspace => $l . "pt") : ()),
408          ($w ? (width => ($w =~ /^-/ ? $w : '+' . $w) . "pt") : ()) }, $result]; } }
409
410  if ($cl && ((ref $result) eq 'ARRAY')) {                      # Add classs, if any and different
411    my $ocl = $$result[1]{class};
412    $$result[1]{class} = (!$ocl || ($ocl eq $cl) ? $cl : "$ocl $cl"); }
413  # Associate the generated node with the source XMath node.
414  $LaTeXML::Post::MATHPROCESSOR->associateNode($result, $node);
415  return $result; }
416
417sub first_element {
418  my ($node) = @_;
419  my $c = $node->firstChild;
420  while ($c) {
421    return $c if $c->nodeType == XML_ELEMENT_NODE;
422    $c = $c->nextSibling; }
423  return; }
424
425sub _getattr {
426  my ($refr, $node, $attribute) = @_;
427  return ($refr && $refr->getAttribute($attribute)) || $node && $node->getAttribute($attribute); }
428
429sub _getspace {
430  my ($refr, $node, $attribute) = @_;
431  my $refspace  = $refr && $refr->getAttribute($attribute);
432  my $nodespace = $node && $node->getAttribute($attribute);
433  return ($refspace ? getXMHintSpacing($refspace) : 0)
434    + ($nodespace ? getXMHintSpacing($nodespace) : 0); }
435
436# Needs to be a utility somewhere...
437sub getXMHintSpacing {
438  my ($width) = @_;
439  if ($width && ($width =~ /^([\d\.\+\-]+)(pt|mu)(\s+plus\s+[\d\.]+pt)?(\s+minus\s+[\d\.]+pt)?$/)) {
440    return ($2 eq 'mu' ? $1 / 1.8 : $1); }
441  else {
442    return 0; } }
443
444my $NBSP = pack('U', 0xA0);    # CONSTANT
445
446sub pmml_internal {
447  my ($node) = @_;
448  return ['m:merror', {}, ['m:mtext', {}, "Missing Subexpression"]] unless $node;
449  my $self = $LaTeXML::Post::MATHPROCESSOR;
450  my $doc  = $LaTeXML::Post::DOCUMENT;
451  my $tag  = getQName($node);
452  my $role = $node->getAttribute('role');
453  if ($tag eq 'ltx:XMath') {
454    return pmml_row(map { pmml($_) } element_nodes($node)); }    # Really multiple nodes???
455  elsif ($tag eq 'ltx:XMDual') {
456    my ($content, $presentation) = element_nodes($node);
457    return pmml($presentation); }
458  elsif (($tag eq 'ltx:XMWrap') || ($tag eq 'ltx:XMArg')) {      # Only present if parsing failed!
459    return pmml_mayberesize($node, pmml_row(map { pmml($_) } element_nodes($node))); }
460  elsif ($tag eq 'ltx:XMApp') {
461    my ($op, @args) = element_nodes($node);
462    if (!$op) {
463      return ['m:merror', {}, ['m:mtext', {}, "Missing Operator"]]; }
464    elsif ($role && ($role =~ /^(FLOAT|POST)(SUB|SUPER)SCRIPT$/)) {
465      # (FLOAT|POST)(SUB|SUPER)SCRIPT's should NOT remain in successfully parsed math.
466      # This conversion creates something "presentable", though doubtfully correct (empty mi?)
467      # Really should mark & make a fake parsing pass to & group open/close pairs & attach scripts
468      return [($2 eq 'SUB' ? 'm:msub' : 'm:msup'), {}, ['m:mi'],
469        pmml_scriptsize($op)]; }
470    else {
471      my $rop = realize($op);
472      my $style =
473        $rop->getAttribute('mathstyle') || $op->getAttribute('mathstyle');
474      my $ostyle = $LaTeXML::MathML::STYLE;
475      local $LaTeXML::MathML::STYLE
476        = ($style && $stylestep{$style} ? $style : $LaTeXML::MathML::STYLE);
477      my $result = &{ lookupPresenter('Apply', getOperatorRole($rop), $rop->getAttribute('meaning'))
478      }($op, @args);
479      $result = pmml_mayberesize($node, $result);
480      my $needsmathstyle = needsMathstyle($result);
481      my %styleattr      = %{ ($style && ($needsmathstyle
482            ? $stylemap{$ostyle}{$style}
483            : $stylemap2{$ostyle}{$style})) || {} };
484      $result = ['m:mstyle', {%styleattr}, $result] if keys %styleattr;
485      return $result; } }
486  elsif ($tag eq 'ltx:XMTok') {
487    return &{ lookupPresenter('Token', $role, $node->getAttribute('meaning')) }($node); }
488  elsif ($tag eq 'ltx:XMHint') {
489    return &{ lookupPresenter('Hint', $role, $node->getAttribute('meaning')) }($node); }
490  elsif ($tag eq 'ltx:XMArray') {
491    my $width   = $node->getAttribute('width');
492    my $style   = $node->getAttribute('mathstyle');
493    my $vattach = $node->getAttribute('vattach');
494    my $rowsep  = $node->getAttribute('rowsep') || '0pt';
495    my $colsep  = $node->getAttribute('colsep') || '5pt';
496    $vattach = 'axis'    if !$vattach || ($vattach eq 'middle');    # roughly MathML's axis?
497    $vattach = 'bottom1' if $vattach && ($vattach eq 'top');
498    my $ostyle = $LaTeXML::MathML::STYLE;
499    local $LaTeXML::MathML::STYLE
500      = ($style && $stylestep{$style} ? $style : $LaTeXML::MathML::STYLE);
501    my @rows  = ();
502    my $nrows = 0;
503    my $ncols = 0;
504
505    my @spanned = ();                                               # record columns to be skipped
506    foreach my $row (element_nodes($node)) {
507      my @cols = ();
508      my $nc   = 0;
509      $nrows++;
510      foreach my $col (element_nodes($row)) {
511        $nc++;
512        $spanned[$nc - 1]-- if $spanned[$nc - 1];
513        next                if $spanned[$nc - 1];                   # Omit this mtd, if spanned by another!
514        my $a    = $col->getAttribute('align');
515        my $b    = $col->getAttribute('border');
516        my $bc   = ($b ? join(' ', map { 'ltx_border_' . $_ } split(/\s/, $b)) : $b);
517        my $th   = $col->getAttribute('thead');
518        my $hc   = ($th ? join(' ', map { 'ltx_th_' . $_ } split(/\s/, $th)) : '');
519        my $cl   = $col->getAttribute('class');
520        my $c    = ($bc ? ($hc ? "$bc $hc" : $bc) : $hc);
521        my $cs   = $col->getAttribute('colspan');
522        my $rs   = $col->getAttribute('rowspan');
523        my @cell = map { pmml($_) } element_nodes($col);
524
525        if ($rs || $cs) {    # Note following cells to be omitted from MathML
526          for (my $i = 0 ; $i < ($cs || 1) ; $i++) {
527            $spanned[$nc - 1 + $i] = ($rs || 1); } }
528        push(@cols, ['m:mtd', { ($a ? (columnalign => $a) : ()),
529              ($c || $cl ? (class      => ($c && $cl ? "$c $cl" : $c || $cl)) : ()),
530              ($cs       ? (columnspan => $cs)                                : ()),
531              ($rs       ? (rowspan    => $rs)                                : ()) },
532            @cell]); }
533      $ncols = $nc if $nc > $ncols;
534      push(@rows, ['m:mtr', {}, @cols]); }
535    $rowsep = undef if $nrows < 2;
536    $colsep = undef if $ncols < 2;
537    my $result = ['m:mtable', { ($vattach ne 'axis' ? (align => $vattach) : ()),
538        ($rowsep ? (rowspacing    => $rowsep) : ()),
539        ($colsep ? (columnspacing => $colsep) : ()),
540        ($width  ? (width         => $width)  : ()),
541        # Mozilla seems to need some encouragement?
542        ($LaTeXML::MathML::STYLE eq 'display' ? (displaystyle => 'true') : ()) },
543      @rows];
544    my $needsmathstyle = needsMathstyle($result);
545    my %styleattr      = %{ ($style && ($needsmathstyle
546          ? $stylemap{$ostyle}{$style}
547          : $stylemap2{$ostyle}{$style})) || {} };
548    $result = ['m:mstyle', {%styleattr}, $result] if keys %styleattr;
549    $result = pmml_mayberesize($node, $result);
550    return $result; }
551  elsif ($tag eq 'ltx:XMText') {
552    my @c = $node->childNodes;
553    my $result;
554    if (!$$self{nestmath}) {
555      $result = pmml_row(map { pmml_text_aux($_) } @c); }
556    else {
557      $result = ['m:mtext', {}, $self->convertXMTextContent($doc, 1, @c)]; }
558    return pmml_mayberesize($node, $result); }
559  elsif ($tag eq 'ltx:ERROR') {
560    my $cl = $node->getAttribute('class');
561    return ['m:merror', { class => join(' ', grep { $_ } 'ltx_ERROR', $cl) },
562      ['m:mtext', {}, $node->textContent]]; }
563  else {
564    my $text = $node->textContent;    #  Spaces are significant here
565    $text =~ s/^\s+/$NBSP/;
566    $text =~ s/\s+$/$NBSP/;
567    return ['m:mtext', {}, $text]; } }
568
569sub needsMathstyle {
570  my ($node) = @_;
571  if (ref $node eq 'ARRAY') {
572    my ($tag, $attr, @children) = @$node;
573    return 1 if $tag eq 'm:mfrac';
574    return 1 if $$attr{largeop};
575    return 0 if ($tag eq 'm:mstyle') && defined $$attr{displaystyle};
576    return 1 if grep { needsMathstyle($_) } @children; }
577  return; }
578
579# Use mpadded instead of mrow if size has been given
580# And maybe this is a convenient place to deal with frames?
581sub pmml_mayberesize {
582  my ($node, $result) = @_;
583  return $result unless ref $node;
584  my $parent;
585  # There MAY be relevant attributes on a containing XMDual (if any)!!!
586  if ((ref $node) && ($node->nodeType == XML_ELEMENT_NODE)
587    && ($parent = $node->parentNode) && (getQName($parent) eq 'ltx:XMDual')) { }
588  else { $parent = undef; }
589  my $width  = $node->getAttribute('width')   || ($parent && $parent->getAttribute('width'));
590  my $height = $node->getAttribute('height')  || ($parent && $parent->getAttribute('height'));
591  my $depth  = $node->getAttribute('depth')   || ($parent && $parent->getAttribute('depth'));
592  my $xoff   = $node->getAttribute('xoffset') || ($parent && $parent->getAttribute('xoffset'));
593  my $yoff   = $node->getAttribute('yoffset') || ($parent && $parent->getAttribute('yoffset'));
594  if ($width || $height || $depth || $xoff || $yoff) {
595    if    ($$result[0] eq 'm:mpadded') { }
596    elsif ($$result[0] eq 'm:mrow') {
597      $$result[0] = 'm:mpadded'; }
598    else {
599      $result = ['m:mpadded', {}, $result]; }
600    my $attr = $$result[1];
601    if ($yoff) {    # assume this means to move the BOX? (not just the contents?)
602      if (!$height) {
603        if ($yoff =~ /^-/) { $height = $yoff; }
604        else               { $height = "+" . $yoff; } }
605      if (!$depth) {
606        if ($yoff =~ /^-/) { $depth = $yoff; $depth =~ s/^-/+/; }
607        else               { $depth = "-" . $yoff; } } }
608    $$attr{width}   = $width  if $width;
609    $$attr{height}  = $height if $height;
610    $$attr{depth}   = $depth  if $depth;
611    $$attr{lspace}  = $xoff   if $xoff;
612    $$attr{voffset} = $yoff   if $yoff; }
613
614  if (my $frame = $node->getAttribute('framed')) {
615    my $attr  = $$result[1];
616    my $c     = $$attr{class};
617    my $class = 'ltx_framed_' . $frame;
618    $$attr{class} = ($c ? $c . ' ' . $class : $class);
619    if (my $color = $node->getAttribute('framecolor')) {
620      my $s     = $$attr{style};
621      my $style = 'border-color: ' . $color;
622      $$attr{style} = ($s ? $s . '; ' . $style : $style); } }
623  return $result; }
624
625sub pmml_row {
626  my (@items) = @_;
627  @items = grep { $_ } @items;
628  return (scalar(@items) == 1 ? $items[0] : ['m:mrow', {}, @items]); }
629
630sub pmml_unrow {
631  my ($mml) = @_;
632  if ($mml && (ref $mml) && ($$mml[0] eq 'm:mrow') && !scalar(keys %{ $$mml[1] })) {
633    my ($tag, $attr, @children) = @$mml;
634    return @children; }
635  else {
636    return ($mml); } }
637
638sub pmml_parenthesize {
639  my ($item, $open, $close) = @_;
640  if (!$open && !$close) {
641    return $item; }
642  # OR, maybe we should just use mfenced?
643  # mfenced is better for CSS profile.
644  # when the insides are line-broken, induces a less traditional appearance
645  # (however, line-breaking inside of a mrow w/parens needs some special treatment too! scripts!!)
646  elsif ($$LaTeXML::Post::MATHPROCESSOR{usemfenced}) {
647    return ['m:mfenced', { open => ($open || ''), close => ($close || '') }, $item]; }
648  else {
649    return ['m:mrow', {},
650      ($open ? (pmml_mo($open, role => 'OPEN')) : ()),
651      $item,
652      ($close ? (pmml_mo($close, role => 'CLOSE')) : ())]; } }
653
654# never used?
655sub pmml_punctuate {
656  my ($separators, @items) = @_;
657  $separators = '' unless defined $separators;
658  my $lastsep = ', ';
659  my @arglist;
660  if (@items) {
661    push(@arglist, shift(@items));
662    while (@items) {
663      if ($separators =~ s/^(.*?)( |$)//) {    # delimited by SINGLE SPACE!!
664        $lastsep = $1 if $1; }
665      push(@arglist, pmml_mo($lastsep, role => 'PUNCT'), shift(@items)); } }
666  return pmml_row(@arglist); }
667
668# args are XMath nodes
669# This is suitable for use as an Apply handler.
670sub pmml_infix {
671  my ($op, @args) = @_;
672  $op = realize($op) if ref $op;
673  return ['m:mrow', {}] unless $op && @args;    # ??
674  my @items = ();
675  if (scalar(@args) == 1) {                     # Infix with 1 arg is presumably Prefix!
676    push(@items, (ref $op ? pmml($op) : pmml_mo($op)), pmml($args[0])); }
677  else {
678    ## push(@items, pmml(shift(@args)));
679    # Experiment at flattening?
680    my $role = (ref $op ? getOperatorRole($op) : 'none') || 'none';
681    my $arg1 = realize(shift(@args));
682    if (($role eq 'ADDOP')
683      && (getQName($arg1) eq 'ltx:XMApp')
684      && ((getOperatorRole((element_nodes($arg1))[0]) || 'none') eq $role)) {
685      push(@items, pmml_unrow(pmml($arg1))); }
686    else {
687      push(@items, pmml($arg1)); }
688    while (@args) {
689      push(@items, (ref $op ? pmml($op) : pmml_mo($op)));
690      push(@items, pmml(shift(@args))); } }
691  return pmml_row(@items); }
692
693sub UTF {
694  my ($code) = @_;
695  return pack('U', $code); }
696
697sub makePlane1Map {
698  my ($latin, $GREEK, $greek, $digits) = @_;
699  return (
700    (map { (UTF(ord('A') + $_) => UTF($latin + $_)) } 0 .. 25),
701    (map { (UTF(ord('a') + $_) => UTF($latin + 26 + $_)) } 0 .. 25),
702    ($GREEK  ? (map { (UTF(0x0391 + $_)   => UTF($GREEK + $_)) } 0 .. 24) : ()),
703    ($greek  ? (map { (UTF(0x03B1 + $_)   => UTF($greek + $_)) } 0 .. 24) : ()),
704    ($digits ? (map { (UTF(ord('0') + $_) => UTF($digits + $_)) } 0 .. 9) : ())); }
705
706my %plane1map = (    # CONSTANT
707  'bold'   => { makePlane1Map(0x1D400, 0x1D6A8, 0x1D6C2, 0x1D7CE) },
708  'italic' => { makePlane1Map(0x1D434, 0x1D6E2, 0x1D6FC, undef),
709    h => "\x{210E}" },
710  'bold-italic'            => { makePlane1Map(0x1D468, 0x1D71C, 0x1D736, undef) },
711  'sans-serif'             => { makePlane1Map(0x1D5A0, undef,   undef,   0x1D7E2) },
712  'bold-sans-serif'        => { makePlane1Map(0x1D5D4, 0x1D756, 0x1D770, 0x1D7EC) },
713  'sans-serif-italic'      => { makePlane1Map(0x1D608, undef,   undef,   undef) },
714  'sans-serif-bold-italic' => { makePlane1Map(0x1D63C, 0x1D790, 0x1D7AA, undef) },
715  'monospace'              => { makePlane1Map(0x1D670, undef,   undef,   0x1D7F6) },
716  'script'                 => { makePlane1Map(0x1D49C, undef,   undef,   undef),
717    B => "\x{212C}", E => "\x{2130}", F => "\x{2131}", H => "\x{210B}", I => "\x{2110}",
718    L => "\x{2112}", M => "\x{2133}", R => "\x{211B}",
719    e => "\x{212F}", g => "\x{210A}", o => "\x{2134}" },
720  'bold-script' => { makePlane1Map(0x1D4D0, undef, undef, undef) },
721  'fraktur'     => { makePlane1Map(0x1D504, undef, undef, undef),
722    C => "\x{212D}", H => "\x{210C}", I => "\x{2111}", R => "\x{211C}", Z => "\x{2128}" },
723  'bold-fraktur'  => { makePlane1Map(0x1D56C, undef, undef, undef) },
724  'double-struck' => { makePlane1Map(0x1D538, undef, undef, 0x1D7D8),
725    C => "\x{2102}", H => "\x{210D}", N => "\x{2115}", P => "\x{2119}", Q => "\x{211A}",
726    R => "\x{211D}", Z => "\x{2124}" }
727);
728
729my %plane1hack = (    # CONSTANT
730  script          => $plane1map{script},  'bold-script'  => $plane1map{script},
731  fraktur         => $plane1map{fraktur}, 'bold-fraktur' => $plane1map{fraktur},
732  'double-struck' => $plane1map{'double-struck'});
733
734my %symmetric_roles = (OPEN => 1, CLOSE => 1, MIDDLE => 1, VERTBAR => 1);
735# operator content that's stretchy by default [fill-in from operator dictionary!]
736# [ grep stretchy ~/src/firefox/res/fonts/mathfont.properties | cut -d . -f 2 ]
737my %normally_stretchy = map { $_ => 1 }
738  ("(", ")", "[", "]", "{", "}",
739  "\x{27E8}", "\x{2308}", "\x{27E6}", "\x{230A}", "\x{27E9}", "\x{2309}", "\x{27E7}", "\x{230B}",
740  "\x{2500}", "\x{007C}", "\x{2758}", "\x{21D2}", "\x{2A54}", "\x{2A53}", "\x{21D0}", "\x{21D4}",
741  "\x{2950}", "\x{295E}", "\x{21BD}", "\x{2956}", "\x{295F}", "\x{21C1}", "\x{2957}", "\x{2190}",
742  "\x{21E4}", "\x{21C6}", "\x{2194}", "\x{294E}", "\x{21A4}", "\x{295A}", "\x{21BC}", "\x{2952}",
743  "\x{2199}", "\x{2198}", "\x{2192}", "\x{21E5}", "\x{21C4}", "\x{21A6}", "\x{295B}", "\x{21C0}",
744  "\x{2953}", "\x{2196}", "\x{2197}", "\x{2225}", "\x{2016}", "\x{21CC}", "\x{21CB}", "\x{2223}",
745  "\x{2294}", "\x{22C3}", "\x{228E}", "\x{22C2}", "\x{2293}", "\x{22C1}", "\x{2211}", "\x{22C3}",
746  "\x{228E}", "\x{2A04}", "\x{2A06}", "\x{2232}", "\x{222E}", "\x{2233}", "\x{222F}", "\x{222B}",
747  "\x{22C0}", "\x{2210}", "\x{220F}", "\x{22C2}", "\x{2216}", "\x{221A}", "\x{21D3}",
748  "\x{27F8}", "\x{27FA}", "\x{27F9}", "\x{21D1}", "\x{21D5}", "\x{2193}", "\x{2913}", "\x{21F5}",
749  "\x{21A7}", "\x{2961}", "\x{21C3}", "\x{2959}", "\x{2951}", "\x{2960}", "\x{21BF}", "\x{2958}",
750  "\x{27F5}", "\x{27F7}", "\x{27F6}", "\x{296F}", "\x{295D}", "\x{21C2}", "\x{2955}", "\x{294F}",
751  "\x{295C}", "\x{21BE}", "\x{2954}", "\x{2191}", "\x{2912}", "\x{21C5}", "\x{2195}", "\x{296E}",
752  "\x{21A5}", "\x{02DC}", "\x{02C7}", "\x{005E}", "\x{00AF}", "\x{23DE}", "\x{FE37}", "\x{23B4}",
753  "\x{23DC}", "\x{FE35}", "\x{0332}", "\x{23DF}", "\x{FE38}", "\x{23B5}", "\x{23DD}", "\x{FE36}",
754  "\x{2225}", "\x{2225}", "\x{2016}", "\x{2016}", "\x{2223}", "\x{2223}", "\x{007C}", "\x{007C}",
755  "\x{20D7}", "\x{20D6}", "\x{20E1}", "\x{20D1}", "\x{20D0}", "\x{21A9}", "\x{21AA}", "\x{23B0}",
756  "\x{23B1}");
757my %default_token_content = (
758  MULOP => "\x{2062}", ADDOP => "\x{2064}", PUNCT => "\x{2063}");
759# Given an item (string or token element w/attributes) and latexml attributes,
760# convert the string to the appropriate unicode (possibly plane1)
761# & MathML presentation attributes (mathvariant, mathsize, mathcolor, stretchy)
762# $tag specifies the element that these attributes will apply to (some attributes disallowed)
763sub stylizeContent {
764  my ($item, $tag, %attr) = @_;
765  my $iselement = (ref $item) eq 'XML::LibXML::Element';
766  my $role      = ($iselement ? $item->getAttribute('role') : 'ID');
767  my $font      = ($iselement ? $item->getAttribute('font') : $attr{font})
768    || $LaTeXML::MathML::FONT;
769  my $size = ($iselement ? $item->getAttribute('fontsize') : $attr{fontsize})
770    || $LaTeXML::MathML::DESIRED_SIZE;
771  my $color = ($iselement ? $item->getAttribute('color') : $attr{color})
772    || $LaTeXML::MathML::COLOR;
773  my $bgcolor = ($iselement ? $item->getAttribute('backgroundcolor') : $attr{backgroundcolor})
774    || $LaTeXML::MathML::BGCOLOR;
775  my $opacity = ($iselement ? $item->getAttribute('opacity') : $attr{opacity})
776    || $LaTeXML::MathML::OPACITY;
777  my $class    = ($iselement ? $item->getAttribute('class')    : $attr{class});
778  my $cssstyle = ($iselement ? $item->getAttribute('cssstyle') : $attr{ccsstyle});
779  my $text     = (ref $item  ? $item->textContent              : $item);
780  my $variant  = ($font      ? mathvariantForFont($font)       : '');
781  my $stretchy = ($iselement ? $item->getAttribute('stretchy') : $attr{stretchy});
782  $stretchy = undef if ($tag ne 'm:mo');                    # Only allowed on m:mo!
783  $size     = undef if ($stretchy || 'false') eq 'true';    # Ignore size, if we're stretching.
784
785  my $stretchyhack = undef;
786
787  if ($text =~ /^[\x{2061}\x{2062}\x{2063}]*$/) {           # invisible get no size or stretchiness
788    $stretchy = $size = undef; }
789  if ($size) {
790    if ($size eq $LaTeXML::MathML::SIZE) {                  # If default size, no need to mention.
791      $size = undef; }
792    # If requested relative size, and in script or scriptscript, we'll need to adjust the size
793    elsif (($size =~ /%$/) && ($LaTeXML::MathML::STYLE =~ /script/)) {
794      my $req = $size;                               $req =~ s/%$//;
795      my $ex  = $stylesize{$LaTeXML::MathML::STYLE}; $ex  =~ s/%$//;
796      $size = int(100 * $req / $ex) . '%'; }
797    # Note that symmetric is only allowed when stretchy, which looks crappy for specific sizes
798    # so we'll pretend that delimiters are still stretchy, but restrict size by minsize & maxsize
799    # (Thanks Peter Krautzberger)
800    # Really we should check the Operator Dictionary to see if it's expected to be symmetric
801    if ($size) {
802      if ($role && $symmetric_roles{$role}) {
803        $stretchyhack = 1;
804        $stretchy     = undef; }
805      elsif ($tag eq 'm:mo') {
806        $stretchy = 'false'; } } }    # Conversely, if size specifically set, don't stretch it!
807  elsif ($normally_stretchy{$text}) {    # Else, if this would normally be stretchy
808    if ($stretchy && ($stretchy eq 'true')) {
809      $stretchy = undef; }               # Don't need to say this explicitly
810    else {
811      $stretchy = 'false'; } }           # or need to explicitly disable it.
812  elsif ($stretchy && ($stretchy eq 'false')) {
813    $stretchy = undef; }                 # Otherwise, doesn't need to be said at all.
814
815  if ((!defined $text) || ($text eq '')) {    # Failsafe for empty tokens?
816    if (my $default = $role && $default_token_content{$role}) {
817      $text = $default; }
818    else {
819      $text = ($iselement ? $item->getAttribute('name') || $item->getAttribute('meaning') || $role : '?');
820      $color = 'red'; } }
821  if ($opacity) {
822    $cssstyle = ($cssstyle ? $cssstyle . ';' : '') . "opacity:$opacity"; }
823  if ($font && !$variant) {
824    Warn('unexpected', $font, undef, "Unrecognized font variant '$font'"); $variant = ''; }
825  # Special case for single char identifiers?
826  if (($tag eq 'm:mi') && ($text =~ /^.$/)) {    # Single char in mi? (what about m:ci?)
827    if    ($variant eq 'italic') { $variant = undef; }         # Defaults to italic
828    elsif (!$variant)            { $variant = 'normal'; } }    # must say so explicitly.
829
830  # Use class (css) to patchup some weak translations
831  if    (!$font) { }
832  elsif ($font =~ /caligraphic/) {
833    # Note that this is unlikely to have effect when plane1 chars are used!
834    $class = ($class ? $class . ' ' : '') . 'ltx_font_mathcaligraphic'; }
835  elsif ($font =~ /script/) {
836    $class = ($class ? $class . ' ' : '') . 'ltx_font_mathscript'; }
837  elsif (($font =~ /fraktur/) && ($text =~ /^[\+\-\d\.]*$/)) {    # fraktur number?
838    $class = ($class ? $class . ' ' : '') . 'ltx_font_oldstyle'; }
839  elsif ($font =~ /smallcaps/) {
840    $class = ($class ? $class . ' ' : '') . 'ltx_font_smallcaps'; }
841
842  # Should we map to Unicode's Plane 1 blocks for Mathematical Alphanumeric Symbols?
843  # Only upper & lower case latin & greek, and also numerals can be mapped.
844  # For each mathvariant, and for each of those 5 groups, there is a linear mapping,
845  # EXCEPT for chars defined before Plain 1, which already exist in lower blocks.
846  my $mapping;
847  # Get desired mapping strategy
848  my $plane1     = $$LaTeXML::Post::MATHPROCESSOR{plane1};
849  my $plane1hack = $$LaTeXML::Post::MATHPROCESSOR{hackplane1};
850  if ($variant
851    && ($plane1 || $plane1hack)
852    && ($mapping = ($plane1hack ? $plane1hack{$variant} : $plane1map{$variant}))) {
853    my @c = map { $$mapping{$_} } split(//, (defined $text ? $text : ''));
854    if (!grep { !defined $_ } @c) {    # Only if ALL chars in the token could be mapped... ?????
855      $text    = join('', @c);
856      $variant = ($plane1hack && ($variant =~ /^bold/) ? 'bold' : undef); } }
857  # Other attributes that should be copied?
858  my $istoken = $tag =~ /^m:(?:mi|mo|mn)$/;    # mrow?
859  my $href    = $istoken && ($iselement ? $item->getAttribute('href')  : $attr{href});
860  my $title   = $istoken && ($iselement ? $item->getAttribute('title') : $attr{title});
861  return ($text,
862    ($variant ? (mathvariant => $variant) : ()),
863    ($size    ? ($stretchyhack
864        ? (minsize => $size, maxsize => $size)
865        : (mathsize => $size))
866      : ()),
867    ($color    ? (mathcolor      => $color)    : ()),
868    ($bgcolor  ? (mathbackground => $bgcolor)  : ()),
869    ($cssstyle ? (style          => $cssstyle) : ()),
870    ($stretchy ? (stretchy       => $stretchy) : ()),
871    ($class    ? (class          => $class)    : ()),
872    ($href     ? (href           => $href)     : ()),
873    ($title    ? (title          => $title)    : ()),
874  ); }
875
876# These are the strings that should be known as fences in a normal operator dictionary.
877my %fences = (    # CONSTANT
878  '('        => 1, ')' => 1, '[' => 1, ']' => 1, '{' => 1, '}' => 1, "\x{201C}" => 1, "\x{201D}" => 1,
879  '`'        => 1, "'" => 1, "<" => 1, ">" => 1,
880  "\x{2329}" => 1, "\x{232A}" => 1, # angle brackets; NOT mathematical, but balance in case they show up.
881  "\x{27E8}" => 1, "\x{27E9}" => 1,    # angle brackets (preferred)
882  "\x{230A}" => 1, "\x{230B}" => 1, "\x{2308}" => 1, "\x{2309}" => 1);
883
884my %punctuation = (',' => 1, ';' => 1, "\x{2063}" => 1);    # CONSTANT
885
886# Generally, $item in the following ought to be a string.
887sub pmml_mi {
888  my ($item, %attr)    = @_;
889  my ($text, %mmlattr) = stylizeContent($item, 'm:mi', %attr);
890  #  return ['m:mi', {%mmlattr}, $text]; }
891  return pmml_mayberesize($item, ['m:mi', {%mmlattr}, $text]); }
892
893# Really, the same issues as with mi.
894sub pmml_mn {
895  my ($item, %attr)    = @_;
896  my ($text, %mmlattr) = stylizeContent($item, 'm:mn', %attr);
897  #  return ['m:mn', {%mmlattr}, $text]; }
898  return pmml_mayberesize($item, ['m:mn', {%mmlattr}, $text]); }
899
900# Note that $item should be either a string, or at most, an XMTok
901sub pmml_mo {
902  my ($item, %attr)    = @_;
903  my ($text, %mmlattr) = stylizeContent($item, 'm:mo', %attr);
904  my $role      = (ref $item ? $item->getAttribute('role') : $attr{role});
905  my $isfence   = $role && ($role =~ /^(OPEN|CLOSE)$/);
906  my $ispunct   = $role && ($role eq 'PUNCT');
907  my $islargeop = $role && ($role =~ /^(SUMOP|INTOP)$/);
908  my $lpad      = $attr{lpadding}
909    || ((ref $item) && $item->getAttribute('lpadding'))
910    || ($role && ($role eq 'MODIFIEROP') && 'mediummathspace');
911  my $rpad = $attr{rpadding}
912    || ((ref $item) && $item->getAttribute('rpadding'))
913    || ($role && ($role eq 'MODIFIEROP') && 'mediummathspace');
914  my $pos = (ref $item && $item->getAttribute('scriptpos')) || 'post';
915  return
916    pmml_mayberesize($item,
917    ['m:mo', { %mmlattr,
918        ($isfence && !$fences{$text}      ? (fence     => 'true') : ()),
919        ($ispunct && !$punctuation{$text} ? (separator => 'true') : ()),
920        ($islargeop                       ? (largeop   => 'true') : ()),
921        ($islargeop ? (symmetric => 'true') : ()),    # Not sure this is strictly correct...
922            # Note that lspace,rspace is the left & right space that replaces Op.Dictionary
923            # what we've recorded is _padding_, so we have to adjust the unknown OpDict entry!
924            # Just assume something between mediummathspace = 4/18em = 2.222pt
925            # and thickmathspace = 5/18em = 2.7777pt, so 2.5pt.
926        ($lpad ? (lspace => max(0, (2.5 + getXMHintSpacing($lpad))) . 'pt') : ()),
927        ($rpad ? (rspace => max(0, (2.5 + getXMHintSpacing($rpad))) . 'pt') : ()),
928        # If an operator has specifically located it's scripts,
929        # don't let mathml move them.
930        (($pos =~ /mid/) || $LaTeXML::MathML::NOMOVABLELIMITS
931          ? (movablelimits => 'false') : ()) },
932      $text]); }
933
934sub pmml_bigop {
935  my ($op)      = @_;
936  my $style     = $op->getAttribute('mathstyle');
937  my %styleattr = %{ ($style && ($style ne $LaTeXML::MathML::STYLE)
938        && $stylemap{$LaTeXML::MathML::STYLE}{$style}) || {} };
939  local $LaTeXML::MathML::STYLE
940    = ($style && $stylestep{$style} ? $style : $LaTeXML::MathML::STYLE);
941  my $mml = pmml_mo($op);
942  $mml = ['m:mstyle', {%styleattr}, $mml] if keys %styleattr;
943  return $mml; }
944
945# Since we're keeping track of display style, under/over vs. sub/super
946# We've got to keep track of MathML's desire to do it for us,
947# and be prepared to override it.
948# When we encounter a script, we've got to look into the possibly embellished
949# operator for more scripts, and attempt to decipher (based on scriptpos attribute)
950# the various positionings (pre, mid, post) and determine whether
951# prescripts, multiscripts, munderover or msubsup should be used.
952#
953# Depending on which order the pre/post sub/super/over/under scripts appear,
954# we may end up with a multiscript, or scripts applied to embellished operators.
955# In particular, we may end up with postscripts applied to an object with over/under;
956# OR, the other way around.
957# In the latter case, we may have limits on a primed sum, in which case
958# we will want to adjust the spacing so the limits center on the sum WITHOUT the primes!!!!
959#
960# Moreoever, the inner operator may be a largeop and need to be displaystyle;
961# since mstyle doesn't nest well inside the scripts, we'll handle that too.
962# We also make sure the eventual inner operator (if any) has movablelimits disabled.
963sub pmml_script {
964  my ($op, $base, $script) = @_;
965  # disentangle base & pre/post-scripts
966  my ($innerbase, $prescripts, $midscripts, $postscripts, $emb_left, $emb_right)
967    = pmml_script_decipher($op, $base, $script);
968  # check if base needs displaystyle.
969  my $style = $innerbase->getAttribute('mathstyle');
970  if ($style && ($style ne $LaTeXML::MathML::STYLE)) {
971    return ['m:mstyle', { displaystyle => ($style eq 'display' ? 'true' : 'false') },
972      pmml_script_multi_layout(
973        pmml_script_mid_layout($innerbase, $midscripts, $emb_left, $emb_right),
974        $prescripts, $postscripts)]; }
975  else {
976    return pmml_script_multi_layout(
977      pmml_script_mid_layout($innerbase, $midscripts, $emb_left, $emb_right),
978      $prescripts, $postscripts); } }
979
980sub pmml_script_mid_layout {
981  my ($base, $midscripts, $emb_left, $emb_right) = @_;
982
983  if (scalar(@$midscripts) == 0) {
984    ##### TRY this to block an extra mstyle
985    { local $LaTeXML::MathML::STYLE = $base->getAttribute('mathstyle') || $LaTeXML::MathML::STYLE;
986      $base = pmml($base); }
987    return $base; }
988  else {
989    { local $LaTeXML::MathML::NOMOVABLELIMITS = 1;
990      ##### TRY this to block an extra mstyle
991      local $LaTeXML::MathML::STYLE = $base->getAttribute('mathstyle') || $LaTeXML::MathML::STYLE;
992      $base = pmml($base); }
993    # Get the (possibly padded) over & under scripts (if any)
994
995    my $result = $base;
996    for my $midscript (@$midscripts) {
997      my $under = (!defined $$midscript[0] ? undef
998        : pmml_scriptsize_padded($$midscript[0], $emb_left, $emb_right));
999      my $over = (!defined $$midscript[1] ? undef
1000        : pmml_scriptsize_padded($$midscript[1], $emb_left, $emb_right));
1001      if (!defined $over) {
1002        $result = ['m:munder', {}, $result, $under]; }
1003      elsif (!defined $under) {
1004        $result = ['m:mover', {}, $result, $over]; }
1005      else {
1006        $result = ['m:munderover', {}, $result, $under, $over]; } }
1007    return $result; } }
1008
1009# Convert an over or under script to scriptsize,
1010# but pad by phantoms of the base's embellishments, if any.
1011# This is to handle primed sums, etc....
1012sub pmml_scriptsize_padded {
1013  my ($script, $emb_left, $emb_right) = @_;
1014  return ($emb_left || $emb_right
1015    ? ['m:mrow', {},
1016      ($emb_left ? (['m:mphantom', {}, pmml_scriptsize($emb_left)]) : ()),
1017      pmml_scriptsize($script),
1018      ($emb_right ? (['m:mphantom', {}, pmml_scriptsize($emb_right)]) : ())]
1019    : pmml_scriptsize($script)); }
1020
1021# base is already converted
1022sub pmml_script_multi_layout {
1023  my ($base, $prescripts, $postscripts) = @_;
1024  if (scalar(@$prescripts) > 0) {
1025    return ['m:mmultiscripts', {},
1026      $base,
1027      (map { (pmml_scriptsize($_->[0]), pmml_scriptsize($_->[1])) } @$postscripts),
1028      ['m:mprescripts'],
1029      (map { (pmml_scriptsize($_->[0]), pmml_scriptsize($_->[1])) } @$prescripts)]; }
1030  elsif (scalar(@$postscripts) > 1) {
1031    return ['m:mmultiscripts', {},
1032      $base,
1033      (map { (pmml_scriptsize($_->[0]), pmml_scriptsize($_->[1])) } @$postscripts)]; }
1034  elsif (scalar(@$postscripts) == 0) {
1035    return $base; }
1036  elsif (!defined $$postscripts[0][1]) {
1037    return ['m:msub', {}, $base, pmml_scriptsize($$postscripts[0][0])]; }
1038  elsif (!defined $$postscripts[0][0]) {
1039    return ['m:msup', {}, $base, pmml_scriptsize($$postscripts[0][1])]; }
1040  else {
1041    return ['m:msubsup', {}, $base,
1042      pmml_scriptsize($$postscripts[0][0]), pmml_scriptsize($$postscripts[0][1])]; } }
1043
1044# Various pre, post and even mid scripts can be wrapped around a base element.
1045# Try to decipher such a nesting (in the XMath element) to collect these separate groups
1046# They propbably shouldn't be stirred up, but appear in pre, mid,post order,
1047# otherwise it's not at all clear how this was expected to look; likely an upstream error?
1048# Nor should there be more than a single sub & single sup mid positioned script!
1049sub pmml_script_decipher {
1050  my ($op, $base, $script) = @_;
1051  my (@pres, @mids, @posts);
1052  my ($prelevel, $midlevel, $postlevel) = (0, 0, 0);
1053  my $sawmid = 0;
1054  my ($emb_left, $emb_right) = (undef, undef);    # embellishments of base on left & right
1055  my ($y) = ($op->getAttribute('role') || '') =~ /^(SUPER|SUB)SCRIPTOP$/;
1056  my ($pos, $level) = ($op->getAttribute('scriptpos') || 'post0')
1057    =~ /^(pre|mid|post)?(\d+)?$/;
1058
1059  if ($pos eq 'pre') {
1060    if ($y eq 'SUB') {
1061      push(@pres, [$script, undef]); $prelevel = $level; }
1062    elsif ($y eq 'SUPER') {
1063      push(@pres, [undef, $script]); $prelevel = $level; } }
1064  elsif ($pos eq 'mid') {
1065    $sawmid = 1;
1066    if ($y eq 'SUB') {
1067      push(@mids, [$script, undef]); $midlevel = $level; }
1068    elsif ($y eq 'SUPER') {
1069      push(@mids, [undef, $script]); $midlevel = $level; } }
1070  else {    # else it's post
1071    if ($y eq 'SUB') {
1072      push(@posts, [$script, undef]); $postlevel = $level; }
1073    elsif ($y eq 'SUPER') {
1074      push(@posts, [undef, $script]); $postlevel = $level; } }
1075
1076  # Examine $base to see if there are nested scripts.
1077  # We'll fold them together they seem to be on the appropriate levels
1078  # Keep from having multiple scripts when $loc is stack!!!
1079  while (1) {
1080    $base = realize($base, 'presentation');
1081    last unless getQName($base) eq 'ltx:XMApp';
1082    my ($xop, $xbase, $xscript) = element_nodes($base);
1083    last unless (getQName($xop) eq 'ltx:XMTok');
1084    my ($ny) = ($xop->getAttribute('role') || '') =~ /^(SUPER|SUB)SCRIPTOP$/;
1085    last unless $ny;
1086    my ($nx, $nl) = ($xop->getAttribute('scriptpos') || 'post0')
1087      =~ /^(pre|mid|post)?(\d+)?$/;
1088    my $spos = ($ny eq 'SUB' ? 0 : 1);
1089
1090    if ($nx eq 'pre') {
1091      push(@pres, [undef, undef])    # New empty pair (?)
1092        if ($prelevel ne $nl) || $pres[-1][$spos];
1093      $pres[-1][$spos] = $xscript; $prelevel = $nl; }
1094    elsif ($nx eq 'mid') {
1095      $sawmid = 1;
1096      unshift(@mids, [undef, undef])    # New empty pair (?)
1097        if ($midlevel ne $nl) || $mids[0][$spos];
1098      $mids[0][$spos] = $xscript; $midlevel = $nl; }
1099    else {
1100      if ($sawmid) {    # If we already saw mid-scripts (over/under); check for embellishmnt
1101        $emb_right = $xscript;
1102        last; }
1103      unshift(@posts, [undef, undef])    # New empty pair (?)
1104        if ($postlevel ne $nl) || $posts[0][$spos];
1105      $posts[0][$spos] = $xscript; $postlevel = $nl; }
1106    $base = $xbase;
1107  }
1108  return ($base, [@pres], [@mids], [@posts], $emb_left, $emb_right); }
1109
1110# Handle text contents.
1111# Note that (currently) MathML doesn't allow math nested in m:mtext,
1112# nor in fact any other markup within m:mtext,
1113# but LaTeXML creates that, if the document is structured that way.
1114# Here we try to flatten the contents to strings, but keep the math as math
1115sub pmml_text_aux {
1116  my ($node, %attr) = @_;
1117  return () unless $node;
1118  my $type = $node->nodeType;
1119  if ($type == XML_TEXT_NODE) {
1120    my ($string, %mmlattr) = stylizeContent($node, 'm:mtext', %attr);
1121    $string =~ s/^\s+/$NBSP/; $string =~ s/\s+$/$NBSP/;
1122    return ['m:mtext', {%mmlattr}, $string]; }
1123  elsif ($type == XML_DOCUMENT_FRAG_NODE) {
1124    return map { pmml_text_aux($_, %attr) } $node->childNodes; }
1125  elsif ($type == XML_ELEMENT_NODE) {
1126    if (my $font    = $node->getAttribute('font'))            { $attr{font}            = $font; }
1127    if (my $size    = $node->getAttribute('fontsize'))        { $attr{fontsize}        = $size; }
1128    if (my $color   = $node->getAttribute('color'))           { $attr{color}           = $color; }
1129    if (my $bgcolor = $node->getAttribute('backgroundcolor')) { $attr{backgroundcolor} = $bgcolor; }
1130    if (my $opacity = $node->getAttribute('opacity'))         { $attr{opacity}         = $opacity; }
1131    my $tag = getQName($node);
1132
1133    if ($tag eq 'ltx:Math') {
1134      # [NOTE BUG!!! we're not passing through the context... (but maybe pick it up anyway)]
1135      # If XMath still there, convert it now.
1136      if (my $xmath = $LaTeXML::Post::DOCUMENT->findnode('ltx:XMath', $node)) {
1137        return pmml($xmath); }
1138      # Otherwise, may already have gotten converted ? return that
1139      elsif (my $mml = $LaTeXML::Post::DOCUMENT->findnode('m:math', $node)) {
1140        return $mml->childNodes; }
1141      else {    # ???
1142        return (); } }
1143    elsif (($tag eq 'ltx:text')    # ltx:text element is fine, if we can manage the attributes!
1144      && (!grep { $node->hasAttribute($_) } qw(framed framecolor))) {
1145      return pmml_mayberesize($node, pmml_row(map { pmml_text_aux($_, %attr) } $node->childNodes)); }
1146    else {
1147      # We could just recurse on raw content like this, but it loses a lot...
1148      ###      map(pmml_text_aux($_,%attr), $node->childNodes); }}
1149      # So, let's just include the raw latexml markup, let the xslt convert it
1150      # And hope that the ultimate agent can deal with it!
1151      my ($ignore, %mmlattr) = stylizeContent($node, 'm:mtext', %attr);
1152      delete $mmlattr{stretchy};    # not useful (not too sure
1153      Warn('unexpected', 'nested-math', $node,
1154        "We're getting m:math nested within an m:mtext")
1155        if $LaTeXML::Post::DOCUMENT->findnodes('.//ltx:Math', $node);
1156      return ['m:mtext', {%mmlattr}, $LaTeXML::Post::DOCUMENT->cloneNode($node, 'nest')]; } }
1157  else {
1158    return (); } }
1159
1160#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1161# Support functions for Content MathML
1162#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1163
1164sub cmml_top {
1165  my ($self, $node) = @_;
1166  local $LaTeXML::MathML::STYLE        = 'text';
1167  local $LaTeXML::MathML::FONT         = find_inherited_attribute($node, 'font');
1168  local $LaTeXML::MathML::SIZE         = find_inherited_attribute($node, 'fontsize') || '100%';
1169  local $LaTeXML::MathML::COLOR        = find_inherited_attribute($node, 'color');
1170  local $LaTeXML::MathML::BGCOLOR      = find_inherited_attribute($node, 'backgroundcolor');
1171  local $LaTeXML::MathML::OPACITY      = find_inherited_attribute($node, 'opacity');
1172  local $LaTeXML::MathML::DESIRED_SIZE = $LaTeXML::MathML::SIZE;
1173  return cmml_contents($node); }
1174
1175sub cmml {
1176  my ($node) = @_;
1177  if (getQName($node) eq 'ltx:XMRef') {
1178    $node = realize($node); }
1179  my $result = cmml_internal($node);
1180  # Associate the generated node with the source XMath node.
1181  $LaTeXML::Post::MATHPROCESSOR->associateNode($result, $node);
1182  return $result; }
1183
1184sub cmml_internal {
1185  my ($node) = @_;
1186  return ['m:merror', {}, ['m:mtext', {}, "Missing Subexpression"]] unless $node;
1187  $node = realize($node) if getQName($node) eq 'ltx:XMRef';
1188  my $tag = getQName($node);
1189  if ($tag eq 'ltx:XMDual') {
1190    my ($content, $presentation) = element_nodes($node);
1191    return cmml($content); }
1192  elsif (($tag eq 'ltx:XMWrap') || ($tag eq 'ltx:XMArg')) {    # Only present if parsing failed!
1193    return cmml_contents($node); }
1194  elsif ($tag eq 'ltx:XMApp') {
1195    if (my $meaning = $node->getAttribute('meaning')) {
1196      return &{ lookupContent('Token', $node->getAttribute('role'), $meaning) }($node); }
1197    # Experiment: If XMApp has role ID, we treat it as a "Decorated Symbol"
1198    if (($node->getAttribute('role') || '') eq 'ID') {
1199      return cmml_decoratedSymbol($node); }
1200    else {
1201      my ($op, @args) = element_nodes($node);
1202      my $rop = $op;
1203      if (!$op || !($rop = realize($op))) {
1204        return ['m:merror', {}, ['m:mtext', {}, "Missing Operator"]]; }
1205      else {
1206        return &{ lookupContent('Apply', $rop->getAttribute('role'), $rop->getAttribute('meaning')) }($op, @args); } } }
1207  elsif ($tag eq 'ltx:XMTok') {
1208    return &{ lookupContent('Token', $node->getAttribute('role'), $node->getAttribute('meaning')) }($node); }
1209  elsif ($tag eq 'ltx:XMHint') {    # ????
1210    return &{ lookupContent('Hint', $node->getAttribute('role'), $node->getAttribute('meaning')) }($node); }
1211  elsif ($tag eq 'ltx:XMArray') {
1212    return &{ lookupContent('Array', $node->getAttribute('role'), $node->getAttribute('meaning')) }($node); }
1213  elsif ($tag eq 'ltx:XMText') {
1214    return cmml_decoratedSymbol($node); }
1215  else {
1216    return cmml_decoratedSymbol($node); } }
1217
1218# Convert the contents of a node, which normally should contain a single child.
1219# It may be empty (assumed to be an error),
1220# or contain multiple nodes (presumably not properly parsed).
1221# We really should use m:cerror here, but need to find appropriate csymbol cd:name
1222sub cmml_contents {
1223  my ($node) = @_;
1224  my ($item, @rest) = element_nodes($node);
1225  if (!$item) {
1226    return ['m:cerror', {}, ['m:csymbol', { cd => 'ambiguous' }, 'missing-subexpression']]; }
1227  elsif (@rest) {
1228    return cmml_unparsed($item, @rest); }
1229  else {
1230    return cmml($item); } }
1231
1232sub cmml_unparsed {
1233  my (@nodes) = @_;
1234  my @results = ();
1235  foreach my $node (@nodes) {
1236    # Deal with random, unknown symbols, but still record association.
1237    if ((getQName($node) eq 'ltx:XMTok')
1238      && (($node->getAttribute('role') || 'UNKNOWN') eq 'UNKNOWN')) {
1239      my $result = ['m:csymbol', { cd => 'unknown' }, $node->textContent];
1240      $LaTeXML::Post::MATHPROCESSOR->associateNode($result, $node);
1241      push(@results, $result); }
1242    else {
1243      push(@results, cmml($node)); } }
1244  return ['m:cerror', {},
1245    ['m:csymbol', { cd => 'ambiguous' }, 'fragments'],
1246    @results]; }
1247
1248# Or csymbol if there's some kind of "defining" attribute?
1249sub cmml_leaf {
1250  my ($item) = @_;
1251  if (my $meaning = (ref $item) && $item->getAttribute('meaning')) {
1252    if (my $cd = $item->getAttribute('omcd')) {
1253      return ['m:csymbol', { cd => $cd }, $meaning]; }
1254    elsif (($item->getAttribute('role') || '') eq 'NUMBER') {
1255      # special case, numbers with a meaning attribute
1256      return ['m:cn', { type => ($meaning =~ /^[+-]?\d+$/ ? 'integer' : 'float') }, $meaning]; }
1257    else {
1258      return ['m:csymbol', { cd => 'latexml' }, $meaning]; } }
1259  else {
1260    my ($content, %mmlattr) = stylizeContent($item, 'm:ci');
1261    if (my $mv = $mmlattr{mathvariant}) {
1262      $content = $mv . "-" . $content; }
1263    return ['m:ci', {}, $content]; } }
1264
1265# Experimental; for an XMApp with role=ID, we treat it as a ci
1266# or ultimately as csymbol, if it had defining attributes,
1267# but we format its contents as pmml
1268sub cmml_decoratedSymbol {
1269  my ($item) = @_;
1270  # Presumably, if we're claiming this blob has "meaning", we should just get a csymbol
1271  if (my $meaning = (ref $item) && $item->getAttribute('meaning')) {
1272    my $cd = $item->getAttribute('omcd') || 'latexml';
1273    return ['m:csymbol', { cd => $cd }, $meaning]; }
1274  else {    # Otherwise, wrap as needed
1275    return ['m:ci', {}, pmml($item)]; } }
1276
1277# Return the NOT of the argument.
1278sub cmml_not {
1279  my ($arg) = @_;
1280  return ['m:apply', {}, ['m:not', {}], cmml($arg)]; }
1281
1282sub cmml_synth_not {
1283  my ($op, @args) = @_;
1284  return ['m:apply', {}, ['m:not', {}], ['m:apply', {}, [$op, {}], map { cmml($_) } @args]]; }
1285
1286# Return the result of converting the arguments, but reversed.
1287sub cmml_synth_complement {
1288  my ($op, @args) = @_;
1289  return ['m:apply', {}, [$op, {}], map { cmml($_) } reverse(@args)]; }
1290
1291# Given an XMath node, Make sure it has an id so it can be shared, then convert to cmml
1292sub cmml_shared {
1293  my ($node) = @_;
1294  $LaTeXML::Post::DOCUMENT->generateNodeID($node, 'sh');
1295  return cmml($node); }
1296
1297# Given an XMath node, convert to cmml share form
1298sub cmml_share {
1299  my ($node) = @_;
1300  my $fragid = $node->getAttribute('fragid');
1301  if ($fragid) {
1302    return ['m:share', { href => '#' . $fragid . $LaTeXML::Post::MATHPROCESSOR->IDSuffix }]; }
1303  else {    # No fragid should be error/warning or something???
1304    Warn('expected', 'fragid', $node,
1305      "Shared node is missing fragid");
1306    return ['m:share']; } }
1307
1308sub cmml_or_compose {
1309  my ($operators, @args) = @_;
1310  my @operators = @$operators;
1311  if (scalar(@operators) == 1) {
1312    return ['m:apply', {}, [shift(@operators), {}], map { cmml($_) } @args]; }
1313  else {
1314    my @parts = (['m:apply', {}, [shift(@operators), {}], map { cmml_shared($_) } @args]);
1315    foreach my $op (@operators) {
1316      push(@parts, ['m:apply', {}, [shift(@operators), {}], map { cmml_share($_) } @args]); }
1317    return ['m:or', {}, @parts]; } }
1318
1319#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1320# Tranlators
1321#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1322# Organized according to the MathML Content element lists.
1323# As a general rule, presentation conversions are based on role
1324# (eg "Token:role:?"), whereas content conversions are based
1325# on meaning or name (eg. "Token:?:meaning").
1326
1327#======================================================================
1328# Token elements:
1329#   cn, ci, csymbol
1330
1331DefMathML("Token:?:?",           \&pmml_mi, \&cmml_leaf);
1332DefMathML("Token:PUNCT:?",       \&pmml_mo, undef);
1333DefMathML("Token:PERIOD:?",      \&pmml_mo, undef);
1334DefMathML("Token:OPEN:?",        \&pmml_mo, undef);
1335DefMathML("Token:CLOSE:?",       \&pmml_mo, undef);
1336DefMathML("Token:MIDDLE:?",      \&pmml_mo, undef);
1337DefMathML("Token:VERTBAR:?",     \&pmml_mo, undef);
1338DefMathML("Token:ARROW:?",       \&pmml_mo, undef);
1339DefMathML("Token:OVERACCENT:?",  \&pmml_mo, undef);
1340DefMathML("Token:UNDERACCENT:?", \&pmml_mo, undef);
1341
1342DefMathML("Token:NUMBER:?", \&pmml_mn, sub {
1343    my $n = $_[0]->textContent;
1344    return ['m:cn', { type => ($n =~ /^[+-]?\d+$/ ? 'integer' : 'float') }, $n]; });
1345DefMathML("Token:?:absent", sub { return ['m:mi', {}] });    # Not m:none!
1346    # Hints normally would have disappeared during parsing
1347    # (turned into punctuation or padding?)
1348    # but if they survive (unparsed?) turn them into space
1349DefMathML('Hint:?:?', sub {
1350    my ($node) = @_;
1351    if (my $w = $node->getAttribute('width')) {
1352      $w = getXMHintSpacing($w) . "pt";
1353      ['m:mspace', { width => $w }]; }
1354    else {
1355      undef } },
1356  sub { undef; });    # Should Disappear from cmml!
1357
1358# At presentation level, these are essentially adorned tokens.
1359# args are (accent,base)
1360# Note: accents are drawn at same size as base!
1361DefMathML('Apply:OVERACCENT:?', sub {
1362    my ($accent, $base) = @_;
1363    if (getQName($base) eq 'ltx:XMApp') {
1364      my ($xaccent, $xbase) = element_nodes($base);
1365      if (($xaccent->getAttribute('role') || '') eq 'UNDERACCENT') {
1366        return ['m:munderover', { accent => 'true', accentunder => 'true' },
1367          pmml($xbase), pmml_scriptsize($xaccent), pmml($accent)]; } }
1368    return ['m:mover', { accent => 'true' }, pmml($base), pmml($accent)]; });
1369
1370DefMathML('Apply:UNDERACCENT:?', sub {
1371    my ($accent, $base) = @_;
1372    if (getQName($base) eq 'ltx:XMApp') {
1373      my ($xaccent, $xbase) = element_nodes($base);
1374      if (($xaccent->getAttribute('role') || '') eq 'OVERACCENT') {
1375        return ['m:munderover', { accent => 'true', accentunder => 'true' },
1376          pmml($xbase), pmml_scriptsize($accent), pmml($xaccent)]; } }
1377    return ['m:munder', { accentunder => 'true' }, pmml($base), pmml($accent)]; });
1378
1379DefMathML('Apply:ENCLOSE:?', sub {
1380    my ($op, $base) = @_;
1381    my $enclosure = $op->getAttribute('enclose');
1382    my $color     = $op->getAttribute('color') || $LaTeXML::MathML::COLOR;
1383    return ['m:menclose', { notation => $enclosure, mathcolor => $color },
1384      ($color ? ['m:mstyle', { mathcolor => $LaTeXML::MathML::COLOR || 'black' }, pmml($base)]
1385        : pmml($base))]; });
1386
1387#======================================================================
1388# Basic Content elements:
1389#   apply, interval, inverse, sep, condition, declare, lambda, compose, ident,
1390#   domain, codomain, image, domainofapplication, piecewise, piece, otherwise
1391
1392DefMathML("Token:APPLYOP:?",  \&pmml_mo, undef);  # APPLYOP is (only) \x{2061}; FUNCTION APPLICATION
1393DefMathML("Token:OPERATOR:?", \&pmml_mo, undef);
1394
1395DefMathML('Apply:?:?', sub {
1396    my ($op, @args) = @_;
1397    return ['m:mrow', {},
1398      pmml($op), pmml_mo("\x{2061}"),    # FUNCTION APPLICATION
1399      map { pmml($_) } @args]; },
1400  sub {
1401    my ($op, @args) = @_;
1402    return ['m:apply', {}, cmml($op), map { cmml($_) } @args]; });
1403DefMathML('Apply:COMPOSEOP:?', \&pmml_infix,                                  undef);
1404DefMathML("Token:DIFFOP:?",    sub { pmml_mo($_[0], rpadding => '-2.5pt'); }, undef);
1405DefMathML("Apply:DIFFOP:?", sub {
1406    my ($op, @args) = @_;
1407###    return ['m:mrow', {}, map { pmml($_) } $op, @args]; },
1408    my $pop = pmml($op);
1409    return ['m:mrow', {}, $pop,
1410      # Unless op (or embellished op), put in a FunctionApplication
1411      ($$pop[0] =~ /^m:(mo|msub|msup|mover|munder)/ ? () : pmml_mo("\x{2061}")),
1412      map { pmml($_) } @args]; },
1413  undef);
1414
1415# In pragmatic CMML, these are containers
1416DefMathML("Apply:?:open-interval", undef, sub {
1417    my ($op, @args) = @_;
1418    return ['m:interval', { closure => "open" }, map { cmml($_) } @args]; });
1419DefMathML("Apply:?:closed-interval", undef, sub {
1420    my ($op, @args) = @_;
1421    return ['m:interval', { closure => "closed" }, map { cmml($_) } @args]; });
1422DefMathML("Apply:?:closed-open-interval", undef, sub {
1423    my ($op, @args) = @_;
1424    return ['m:interval', { closure => "closed-open" }, map { cmml($_) } @args]; });
1425DefMathML("Apply:?:open-closed-interval", undef, sub {
1426    my ($op, @args) = @_;
1427    return ['m:interval', { closure => "open-closed" }, map { cmml($_) } @args]; });
1428
1429DefMathML("Token:?:inverse",  undef, sub { return ['m:inverse']; });
1430DefMathML("Token:?:lambda",   undef, sub { return ['m:lambda']; });
1431DefMathML("Token:?:compose",  undef, sub { return ['m:compose']; });
1432DefMathML("Token:?:identity", undef, sub { return ['m:ident']; });
1433DefMathML("Token:?:domain",   undef, sub { return ['m:domain']; });
1434DefMathML("Token:?:codomain", undef, sub { return ['m:codomain']; });
1435DefMathML("Token:?:image",    undef, sub { return ['m:image']; });
1436
1437# m:piece, m:piecewise & m:otherwise are generated as part of a cases construct
1438DefMathML("Array:?:cases", undef, sub {
1439    my ($node) = @_;
1440    my @rows = ();
1441    my @otherwises;
1442    foreach my $row (element_nodes($node)) {
1443      my @items = element_nodes($row);
1444      my $n     = scalar(@items);
1445      if    ($n == 0) { }    # empty row, just skip
1446      elsif ($n == 1) {      # No condition? Perhaps it means "otherwise" ?
1447        push(@otherwises, $items[0]); }
1448      elsif ($items[1]->textContent eq 'otherwise') {    # more robust test?
1449        push(@otherwises, $items[0]); }
1450      else {    # Really, the 2nd cell needs to be "Looked at"; may contain "if","when" or "unless"?!?!
1451        push(@rows, ['m:piece', {}, cmml_contents($items[0]), cmml_contents($items[1])]); } }
1452    if (@otherwises) {
1453      if (@otherwises > 1) {
1454        Warn('unexpected', 'otherwise', $node,
1455          "Cases statement seems to have multiple otherwise clauses",
1456          @otherwises); }
1457      push(@rows, ['m:otherwise', {}, cmml_contents($otherwises[0])]); }
1458    return ['m:piecewise', {}, @rows]; });
1459
1460#======================================================================
1461# Arithmetic, Algebra and Logic:
1462#   quotient, factorial, divide, max, min, minus, plus, power, rem, times, root
1463#   gcd, and, or, xor, not, implies, forall, exists, abs, conjugate, arg, real,
1464#   imaginary, lcm, floor, ceiling.
1465
1466# BRM:
1467
1468DefMathML("Token:ADDOP:?",     \&pmml_mo,    undef);
1469DefMathML("Token:ADDOP:plus",  undef,        sub { return ['m:plus']; });
1470DefMathML("Token:ADDOP:minus", undef,        sub { return ['m:minus']; });
1471DefMathML('Apply:ADDOP:?',     \&pmml_infix, undef);
1472
1473DefMathML("Token:MULOP:?", \&pmml_mo,    undef);
1474DefMathML('Apply:MULOP:?', \&pmml_infix, undef);
1475
1476# Unsatisfactory BINOP = ADDOP or MULOP ???
1477DefMathML("Token:BINOP:?", \&pmml_mo,    undef);
1478DefMathML('Apply:BINOP:?', \&pmml_infix, undef);
1479
1480DefMathML('Apply:FRACOP:?', sub {
1481    my ($op, $num, $den, @more) = @_;
1482    my $thickness = $op->getAttribute('thickness');
1483    my $color     = $op->getAttribute('color') || $LaTeXML::MathML::COLOR;
1484    my $bevelled  = grep { $_ eq 'ltx_bevelled' } split(/\s+/, $op->getAttribute('class') || '');
1485    return ['m:mfrac', { (defined $thickness ? (linethickness => $thickness) : ()),
1486        ($color    ? (mathcolor => $color) : ()),
1487        ($bevelled ? (bevelled  => 'true') : ()) },
1488      pmml_smaller($num), pmml_smaller($den)]; });
1489
1490DefMathML('Apply:MODIFIEROP:?', \&pmml_infix, undef);
1491DefMathML("Token:MODIFIEROP:?", \&pmml_mo,    undef);
1492
1493DefMathML('Apply:MIDDLE:?', \&pmml_infix, undef);
1494
1495DefMathML("Token:SUPOP:?",         \&pmml_mo,     undef);
1496DefMathML('Apply:SUPERSCRIPTOP:?', \&pmml_script, undef);
1497DefMathML('Apply:SUBSCRIPTOP:?',   \&pmml_script, undef);
1498DefMathML('Token:SUPERSCRIPTOP:?', undef, sub {
1499    return ['m:csymbol', { cd => 'ambiguous' }, 'superscript']; });
1500DefMathML('Token:SUBSCRIPTOP:?', undef, sub {
1501    return ['m:csymbol', { cd => 'ambiguous' }, 'subscript']; });
1502
1503DefMathML('Apply:POSTFIX:?', sub {    # Reverse presentation, no @apply
1504    return ['m:mrow', {}, pmml($_[1]), pmml($_[0])]; });
1505# Apparently ends up too much spacing shift
1506#DefMathML("Token:POSTFIX:?", sub { pmml_mo($_[0], lpadding => '-4pt', rpadding => '1pt'); }, undef)
1507DefMathML("Token:POSTFIX:?", \&pmml_mo, undef);
1508
1509DefMathML('Apply:?:square-root',
1510  sub {
1511    my $color = $_[0]->getAttribute('color') || $LaTeXML::MathML::COLOR;
1512    return ['m:msqrt', { ($color ? (mathcolor => $color) : ()) }, pmml($_[1])]; },
1513  sub { return ['m:apply', {}, ['m:root', {}], cmml($_[1])]; });
1514DefMathML('Apply:?:nth-root',
1515  sub {
1516    my $color = $_[0]->getAttribute('color') || $LaTeXML::MathML::COLOR;
1517    return ['m:mroot', { ($color ? (mathcolor => $color) : ()) }, pmml($_[2]), pmml_scriptsize($_[1])]; },
1518  sub { return ['m:apply', {}, ['m:root', {}], ['m:degree', {}, cmml($_[1])], cmml($_[2])]; });
1519
1520# Note MML's distinction between quotient and divide: quotient yeilds an integer
1521DefMathML("Token:?:quotient",       undef, sub { return ['m:quotient']; });
1522DefMathML("Token:?:factorial",      undef, sub { return ['m:factorial']; });
1523DefMathML("Token:?:divide",         undef, sub { return ['m:divide']; });
1524DefMathML("Token:?:maximum",        undef, sub { return ['m:max']; });
1525DefMathML("Token:?:minimum",        undef, sub { return ['m:min']; });
1526DefMathML("Token:?:minus",          undef, sub { return ['m:minus']; });
1527DefMathML("Token:?:uminus",         undef, sub { return ['m:uminus']; });
1528DefMathML("Token:?:plus",           undef, sub { return ['m:plus']; });
1529DefMathML("Token:?:power",          undef, sub { return ['m:power']; });
1530DefMathML("Token:?:remainder",      undef, sub { return ['m:rem']; });
1531DefMathML("Token:?:times",          undef, sub { return ['m:times']; });
1532DefMathML("Token:?:gcd",            undef, sub { return ['m:gcd']; });
1533DefMathML("Token:?:and",            undef, sub { return ['m:and']; });
1534DefMathML("Token:?:or",             undef, sub { return ['m:or']; });
1535DefMathML("Token:?:xor",            undef, sub { return ['m:xor']; });
1536DefMathML("Token:?:not",            undef, sub { return ['m:not']; });
1537DefMathML("Token:?:implies",        undef, sub { return ['m:implies']; });
1538DefMathML("Token:?:forall",         undef, sub { return ['m:forall']; });
1539DefMathML("Token:?:exists",         undef, sub { return ['m:exists']; });
1540DefMathML("Token:?:absolute-value", undef, sub { return ['m:abs']; });
1541DefMathML("Token:?:conjugate",      undef, sub { return ['m:conjugate']; });
1542DefMathML("Token:?:argument",       undef, sub { return ['m:arg']; });
1543DefMathML("Token:?:real-part",      undef, sub { return ['m:real']; });
1544DefMathML("Token:?:imaginary-part", undef, sub { return ['m:imaginary']; });
1545DefMathML("Token:?:lcm",            undef, sub { return ['m:lcm']; });
1546DefMathML("Token:?:floor",          undef, sub { return ['m:floor']; });
1547DefMathML("Token:?:ceiling",        undef, sub { return ['m:ceiling']; });
1548
1549#======================================================================
1550# Relations:
1551#   eq, neq, gt, lt, geq, leq, equivalent, approx, factorof
1552
1553DefMathML("Token:RELOP:?",                  \&pmml_mo);
1554DefMathML("Token:?:equals",                 undef, sub { return ['m:eq']; });
1555DefMathML("Token:?:not-equals",             undef, sub { return ['m:neq']; });
1556DefMathML("Token:?:greater-than",           undef, sub { return ['m:gt']; });
1557DefMathML("Token:?:less-than",              undef, sub { return ['m:lt']; });
1558DefMathML("Token:?:greater-than-or-equals", undef, sub { return ['m:geq']; });
1559DefMathML("Token:?:less-than-or-equals",    undef, sub { return ['m:leq']; });
1560DefMathML("Token:?:equivalent-to",          undef, sub { return ['m:equivalent']; });
1561DefMathML("Token:?:approximately-equals",   undef, sub { return ['m:approx']; });
1562DefMathML("Apply:?:not-approximately-equals", undef, sub {
1563    my ($op, @args) = @_;
1564    return cmml_synth_not('m:approx', @args); });
1565DefMathML("Apply:?:less-than-or-approximately-equals", undef, sub {
1566    my ($op, @args) = @_;
1567    return cmml_or_compose(['m:lt', 'm:approx'], @args); });
1568
1569DefMathML("Token:?:factor-of", undef, sub { return ['m:factorof']; });
1570
1571DefMathML("Token:METARELOP:?", \&pmml_mo);
1572DefMathML('Apply:RELOP:?',     \&pmml_infix);
1573DefMathML('Apply:METARELOP:?', \&pmml_infix);
1574
1575# Top level relations
1576DefMathML('Apply:?:formulae', sub {
1577    my ($op, @elements) = @_;
1578    return pmml_row(map { pmml($_) } @elements); },
1579  sub {
1580    my ($op, @elements) = @_;
1581    return ['m:apply', {},
1582      ['m:csymbol', { cd => 'ambiguous' }, 'formulae-sequence'],
1583      map { cmml($_) } @elements];
1584  });
1585
1586DefMathML('Apply:?:multirelation',
1587  sub {
1588    my ($op, @elements) = @_;
1589    # This presumes that the relational operators scattered through here
1590    # will be recognized as such by pmml and turned into m:mo
1591    return pmml_row(map { pmml($_) } @elements); },
1592  sub {
1593    my ($op, @elements) = @_;
1594    my $lhs = cmml(shift(@elements));
1595    return $lhs unless @elements;
1596    my @relations = ();
1597    while (@elements) {
1598      my $rel = shift(@elements);
1599      my $rhs = shift(@elements);
1600      push(@relations, ['m:apply', {}, cmml($rel), $lhs, cmml_shared($rhs)]);
1601      $lhs = cmml_share($rhs); }
1602    return (scalar(@relations) > 1 ? ['m:apply', {}, ['m:and', {}], @relations] : $relations[0]); }
1603);
1604
1605#======================================================================
1606# Calculus and Vector Calculus:
1607#   int, diff, partialdiff, lowlimit, uplimit, bvar, degree,
1608#   divergence, grad, curl, laplacian.
1609
1610DefMathML("Token:INTOP:?",   \&pmml_bigop);
1611DefMathML("Token:LIMITOP:?", \&pmml_mo);
1612DefMathML('Apply:ARROW:?',   \&pmml_infix);
1613
1614DefMathML("Token:?:integral",             undef, sub { return ['m:int']; });
1615DefMathML("Token:?:differential",         undef, sub { return ['m:diff']; });
1616DefMathML("Token:?:partial-differential", undef, sub { return ['m:partialdiff']; });
1617# lowlimit, uplimit, degree ?
1618DefMathML("Token:?:divergence", undef, sub { return ['m:divergence']; });
1619DefMathML("Token:?:gradient",   undef, sub { return ['m:grad']; });
1620DefMathML("Token:?:curl",       undef, sub { return ['m:curl']; });
1621DefMathML("Token:?:laplacian",  undef, sub { return ['m:laplacian']; });
1622
1623#======================================================================
1624# Theory of Sets,
1625#   set, list, union, intersect, in, notin, subset, prsubset, notsubset, notprsubset,
1626#   setdiff, card, cartesianproduct.
1627
1628DefMathML("Apply:?:set", undef, sub {
1629    my ($op, @args) = @_;
1630    return ['m:set', {}, map { cmml($_) } @args]; });
1631DefMathML("Apply:?:list", undef, sub {
1632    my ($op, @args) = @_;
1633    return ['m:list', {}, map { cmml($_) } @args]; });
1634DefMathML("Token:?:union",          undef, sub { return ['m:union']; });
1635DefMathML("Token:?:intersection",   undef, sub { return ['m:intersect']; });
1636DefMathML("Token:?:element-of",     undef, sub { return ['m:in']; });
1637DefMathML("Token:?:not-element-of", undef, sub { return ['m:notin']; });
1638DefMathML("Apply:?:contains", undef, sub {
1639    my ($op, @args) = @_;
1640    return cmml_synth_complement('m:in', @args); });
1641DefMathML("Apply:?:not-contains", undef, sub {
1642    my ($op, @args) = @_;
1643    return cmml_synth_complement('m:notin', @args); });
1644DefMathML("Token:?:subset-of",                undef, sub { return ['m:subset']; });
1645DefMathML("Token:?:subset-of-or-equals",      undef, sub { return ['m:subset']; });
1646DefMathML("Token:?:subset-of-and-not-equals", undef, sub { return ['m:prsubset']; });
1647DefMathML("Apply:?:superset-of", undef, sub {
1648    my ($op, @args) = @_;
1649    return cmml_synth_complement('m:subset', @args); });
1650DefMathML("Apply:?:superset-of-or-equals", undef, sub {
1651    my ($op, @args) = @_;
1652    return cmml_synth_complement('m:subset', @args); });
1653DefMathML("Apply:?:superset-of-and-not-equals", undef, sub {
1654    my ($op, @args) = @_;
1655    return cmml_synth_complement('m:prsubset', @args); });
1656DefMathML("Token:?:set-minus",         undef, sub { return ['m:setdiff']; });
1657DefMathML("Token:?:cardinality",       undef, sub { return ['m:card']; });
1658DefMathML("Token:?:cartesian-product", undef, sub { return ['m:cartesianproduct']; });
1659
1660#======================================================================
1661# Sequences and Series:
1662#   sum, product, limit, tendsto
1663# (but see calculus for limit too!!)
1664
1665DefMathML("Token:BIGOP:?", \&pmml_bigop);
1666DefMathML("Token:SUMOP:?", \&pmml_bigop);
1667
1668# ?? or something....
1669sub pmml_summation {
1670  my ($op, $body) = @_;
1671  return ['m:mrow', {}, pmml($op), pmml($body)]; }
1672
1673DefMathML('Apply:BIGOP:?', \&pmml_summation);
1674DefMathML('Apply:INTOP:?', \&pmml_summation);
1675DefMathML('Apply:SUMOP:?', \&pmml_summation);
1676
1677DefMathML('Apply:?:limit-from', sub {
1678    my ($op, $arg, $dir) = @_;
1679    ['m:mrow', {}, pmml($arg), pmml($dir)]; });
1680
1681DefMathML('Apply:?:annotated', sub {
1682    my ($op, $var, $annotation) = @_;
1683    return ['m:mrow', {}, pmml($var),
1684      ['m:mspace', { width => 'veryverythickmathspace' }],
1685      pmml($annotation)]; });
1686
1687DefMathML("Token:?:sum",      undef, sub { return ['m:sum']; });
1688DefMathML("Token:?:prod",     undef, sub { return ['m:prod']; });
1689DefMathML("Token:?:limit",    undef, sub { return ['m:limit']; });
1690DefMathML("Token:?:tends-to", undef, sub { return ['m:tendsto']; });
1691
1692#======================================================================
1693# Elementary Classical Functions,
1694#   exp, ln, log, sin, cos tan, sec, csc, cot, sinh, cosh, tanh, sech, csch, coth,
1695#   arcsin, arccos, arctan, arccosh, arccot, arccoth, arccsc, arccsch, arcsec, arcsech,
1696#   arcsinh, arctanh
1697
1698DefMathML("Token:?:exponential",                  undef, sub { return ['m:exp']; });
1699DefMathML("Token:?:natural-logarithm",            undef, sub { return ['m:ln']; });
1700DefMathML("Token:?:logarithm",                    undef, sub { return ['m:log']; });
1701DefMathML("Token:?:sine",                         undef, sub { return ['m:sin']; });
1702DefMathML("Token:?:cosine",                       undef, sub { return ['m:cos']; });
1703DefMathML("Token:?:tangent",                      undef, sub { return ['m:tan']; });
1704DefMathML("Token:?:secant",                       undef, sub { return ['m:sec']; });
1705DefMathML("Token:?:cosecant",                     undef, sub { return ['m:csc']; });
1706DefMathML("Token:?:cotangent",                    undef, sub { return ['m:cot']; });
1707DefMathML("Token:?:hyperbolic-sine",              undef, sub { return ['m:sinh']; });
1708DefMathML("Token:?:hyperbolic-cosine",            undef, sub { return ['m:cosh']; });
1709DefMathML("Token:?:hyperbolic-tangent",           undef, sub { return ['m:tanh']; });
1710DefMathML("Token:?:hyperbolic-secant",            undef, sub { return ['m:sech']; });
1711DefMathML("Token:?:hyperbolic-cosecant",          undef, sub { return ['m:csch']; });
1712DefMathML("Token:?:hyperbolic-cotantent",         undef, sub { return ['m:coth']; });
1713DefMathML("Token:?:inverse-sine",                 undef, sub { return ['m:arcsin']; });
1714DefMathML("Token:?:inverse-cosine",               undef, sub { return ['m:arccos']; });
1715DefMathML("Token:?:inverse-tangent",              undef, sub { return ['m:arctan']; });
1716DefMathML("Token:?:inverse-secant",               undef, sub { return ['m:arcsec']; });
1717DefMathML("Token:?:inverse-cosecant",             undef, sub { return ['m:arccsc']; });
1718DefMathML("Token:?:inverse-cotangent",            undef, sub { return ['m:arccot']; });
1719DefMathML("Token:?:inverse-hyperbolic-sine",      undef, sub { return ['m:arcsinh']; });
1720DefMathML("Token:?:inverse-hyperbolic-cosine",    undef, sub { return ['m:arccosh']; });
1721DefMathML("Token:?:inverse-hyperbolic-tangent",   undef, sub { return ['m:arctanh']; });
1722DefMathML("Token:?:inverse-hyperbolic-secant",    undef, sub { return ['m:arcsech']; });
1723DefMathML("Token:?:inverse-hyperbolic-cosecant",  undef, sub { return ['m:arccsch']; });
1724DefMathML("Token:?:inverse-hyperbolic-cotangent", undef, sub { return ['m:arccoth']; });
1725
1726#======================================================================
1727# Statistics:
1728#   mean, sdev, variance, median, mode, moment, momentabout
1729
1730DefMathML("Token:?:mean",               undef, sub { return ['m:mean']; });
1731DefMathML("Token:?:standard-deviation", undef, sub { return ['m:sdev']; });
1732DefMathML("Token:?:variance",           undef, sub { return ['m:var']; });
1733DefMathML("Token:?:median",             undef, sub { return ['m:median']; });
1734DefMathML("Token:?:mode",               undef, sub { return ['m:mode']; });
1735DefMathML("Token:?:moment",             undef, sub { return ['m:moment']; });
1736# momentabout ???
1737
1738#======================================================================
1739# Linear Algebra:
1740#   vector, matrix, matrixrow, determinant, transpose, selector,
1741#   vectorproduct, scalarproduct, outerproduct.
1742
1743DefMathML("Apply:?:vector", undef, sub {
1744    my ($op, @args) = @_;
1745    return ['m:vector', {}, map { cmml($_) } @args]; });
1746#DefMathML("Token:?:matrix",         undef, sub { return ['m:matrix']; });
1747DefMathML("Token:?:determinant",    undef, sub { return ['m:determinant']; });
1748DefMathML("Token:?:transpose",      undef, sub { return ['m:transpose']; });
1749DefMathML("Token:?:selector",       undef, sub { return ['m:selector']; });
1750DefMathML("Token:?:vector-product", undef, sub { return ['m:vectorproduct']; });
1751DefMathML("Token:?:scalar-product", undef, sub { return ['m:scalarproduct']; });
1752DefMathML("Token:?:outer-product",  undef, sub { return ['m:outerproduct']; });
1753
1754# So by default any Array is a Matrix? hmmm....
1755DefMathML("Array:?:?", undef, sub {
1756    my ($node) = @_;
1757    return ['m:matrix', {},
1758      map { ['m:matrixrow', {}, map { cmml_contents($_) } element_nodes($_)] }
1759        element_nodes($node)]; });
1760
1761#======================================================================
1762# Semantic Mapping Elements
1763#   annotation, semantics, annotation-xml
1764#======================================================================
1765# Constant and Symbol Elements
1766#   integers, reals, rationals, naturalnumbers, complexes, primes,
1767#   exponentiale, imaginaryi, notanumber, true, false, emptyset, pi,
1768#   eulergamma, infinity
1769
1770DefMathML("Token:ID:integers",       undef, sub { return ['m:integers']; });
1771DefMathML("Token:ID:reals",          undef, sub { return ['m:reals']; });
1772DefMathML("Token:ID:rationals",      undef, sub { return ['m:rationals']; });
1773DefMathML("Token:ID:numbers",        undef, sub { return ['m:naturalnumbers']; });
1774DefMathML("Token:ID:complexes",      undef, sub { return ['m:complexes']; });
1775DefMathML("Token:ID:primes",         undef, sub { return ['m:primes']; });
1776DefMathML("Token:ID:exponential-e",  undef, sub { return ['m:exponentiale']; });
1777DefMathML("Token:ID:imaginary-i",    undef, sub { return ['m:imaginaryi']; });
1778DefMathML("Token:ID:notanumber",     undef, sub { return ['m:notanumber']; });
1779DefMathML("Token:ID:true",           undef, sub { return ['m:true']; });
1780DefMathML("Token:ID:false",          undef, sub { return ['m:false']; });
1781DefMathML("Token:ID:empty-set",      undef, sub { return ['m:emptyset']; });
1782DefMathML("Token:ID:circular-pi",    undef, sub { return ['m:pi']; });
1783DefMathML("Token:ID:Euler-constant", undef, sub { return ['m:eulergamma']; });
1784DefMathML("Token:ID:infinity",       undef, sub { return ['m:infinity']; });
1785
1786#======================================================================
1787# Purely presentational constructs.
1788# An issue here:
1789#  Some constructs are pretty purely presentational.  Hopefully, these would
1790# only appear in XWrap's or in the presentation branch of an XMDual, so we won't
1791# attempt to convert them to content.  But if we do, should we warn?
1792
1793# ================================================================================
1794# More exotic things
1795
1796# ================================================================================
1797# cfrac! Ugh!
1798
1799# Have to deal w/ screwy structure:
1800# If denom is a sum/diff then last summand can be: cdots, cfrac
1801#  or invisibleTimes of cdots and something which could also be a cfrac!
1802# There is some really messy manipulation of display/text style...probably not all correct.
1803# This really should be handled earlier by an XMDual.
1804sub do_cfrac {
1805  my ($numer, $denom) = @_;
1806  if (getQName($denom) eq 'ltx:XMApp') {    # Denominator is some kind of application
1807    my ($denomop, @denomargs) = element_nodes($denom);
1808    if ((($denomop->getAttribute('role') || '') eq 'ADDOP')    # Is it a sum or difference?
1809      || (($denomop->textContent || '') eq "\x{22EF}")) {      # OR a \cdots
1810      my $last = pop(@denomargs);                              # Check last operand in denominator.
1811          # this is the current contribution to the cfrac (if we match the last term)
1812      my $curr = ['m:mfrac', {}, pmml_smaller($numer),
1813        ['m:mrow', {},
1814          (@denomargs > 1 ? pmml_infix($denomop, @denomargs) : pmml_smaller($denomargs[0])),
1815          pmml_smaller($denomop)]];
1816      if (($last->textContent || '') eq "\x{22EF}") {    # Denom ends w/ \cdots
1817        return ($curr, pmml_smaller($last)); }           # bring dots up to toplevel
1818      elsif (getQName($last) eq 'ltx:XMApp') {           # Denom ends w/ application --- what kind?
1819        my ($lastop, @lastargs) = element_nodes($last);
1820        if (($lastop->getAttribute('meaning') || '') eq 'continued-fraction') { # Denom ends w/ cfrac, pull it to toplevel
1821          return ($curr, do_cfrac(@lastargs)); }
1822        elsif ((($lastop->textContent || '') eq "\x{2062}")    # Denom ends w/ * (invisible)
1823          && (scalar(@lastargs) == 2) && (($lastargs[0]->textContent || '') eq "\x{22EF}")) {
1824          return ($curr, pmml_smaller($lastargs[0]), pmml_smaller($lastargs[1])); } } } }
1825  return ['m:mfrac', {}, pmml_smaller($numer), pmml_smaller($denom)]; }
1826
1827DefMathML('Apply:?:continued-fraction', sub {
1828    my ($op, $numer, $denom) = @_;
1829    my $style = (($op->getAttribute('name') || '') eq 'cfrac-inline' ? 'inline' : 'display');
1830    if ($style eq 'inline') {
1831      return pmml_row(do_cfrac($numer, $denom)); }
1832    else {
1833      return ['m:mfrac', {}, pmml_smaller($numer), pmml_smaller($denom)]; } });
1834
1835#================================================================================
1836# A Hack for Demo/Testing Purposes ONLY!!!
1837# [Illustrates that we'd like these to be defineable in bindings!]
1838DefMathML('Apply:?:hack-definite-integral', undef,
1839  sub {
1840    my ($op, $lower, $upper, $integrand, $variable) = @_;
1841    return ['m:apply', {},
1842      ['m:int'],
1843      ['m:bvar',     {}, cmml($variable)],
1844      ['m:lowlimit', {}, cmml($lower)],
1845      ['m:uplimit',  {}, cmml($upper)],
1846      cmml($integrand)]; });
1847
1848#================================================================================
18491;
1850
1851__END__
1852
1853=pod
1854
1855=head1 NAME
1856
1857C<LaTeXML::Post::MathML> - Post-Processing modules for converting math to MathML.
1858
1859=head1 SYNOPSIS
1860
1861C<LaTeXML::Post::MathML> is the abstract base class for the MathML Postprocessor;
1862C<LaTeXML::Post::MathML::Presentation> and C<LaTeXML::Post::MathML::Content>
1863convert XMath to either Presentation or Content MathML, or with that format
1864as the principle branch for Parallel markup.
1865
1866=head1 DESCRIPTION
1867
1868The conversion is carried out primarly by a tree walk of the C<XMath> expression;
1869appropriate handlers are selected and called depending on the operators and forms encountered.
1870Handlers can be defined on applications of operators, or on tokens;
1871when a token is applied, it's application handler takes precedence over it's token handler
1872
1873=head2 C<< DefMathML($key,$presentation,$content); >>
1874
1875Defines presentation and content handlers for C<$key>.
1876C<$key> is of the form C<TYPE:ROLE:MEANING>, where
1877
1878  TYPE    : is one either C<Token> or C<Apply> (or C<Hint> ?)
1879  ROLE    : is a grammatical role (on XMath tokens)
1880  MEANING : is the meaning attribute (on XMath tokens)
1881
1882Any of these can be C<?> to match any role or meaning;
1883matches of both are preferred, then match of meaning
1884or role, or neither.
1885
1886The subroutine handlers for presentation and content are given
1887by C<$presentation> and C<$content>, respectively.
1888Either can be C<undef>, in which case some other matching
1889handler will be invoked.
1890
1891For C<Token> handlers, the arguments passed are the token node;
1892for C<Apply> handler, the arguments passed are the operator node
1893and any arguments.
1894
1895However, it looks like some C<TOKEN> handlers are being defined
1896to take C<$content,%attributes> being the string content of the token,
1897and the token's attributes!
1898
1899=head2 Presentation Conversion Utilties
1900
1901=over
1902
1903=item C<< $mmlpost->pmml_top($node,$style); >>
1904
1905This is the top-level converter applied to an C<XMath> node.
1906It establishes a local context for font, style, size, etc.
1907It generally does the bulk of the work for a PresentationMathML's C<translateNode>,
1908although the latter wraps the actual C<m:math> element around it.
1909(C<style> is display or text).
1910
1911=item C<pmml($node)>, C<pmml_smaller($node)>, C<pmml_scriptsizsize($node)>
1912
1913Converts the C<XMath> C<$node> to Presentation MathML.
1914The latter two are used when the context calls for smaller (eg. fraction parts)
1915or scriptsize (eg sub or superscript) size or style, so that the size encoded
1916within C<$node> will be properly accounted for.
1917
1918=item C<pmml_mi($node,%attributes)>, C<pmml_mn($node,%attributes)>, C<pmml_mo($node,%attributes)>
1919
1920These are C<Token> handlers, to create C<m:mi>, C<m:mn> and C<m:mo> elements,
1921respectively.  When called as a handler, they will be supplied only with an C<XMath>
1922node (typically an C<XMTok>). For convenient reuse, these functions may also be called
1923on a 'virtual' token: with C<$node> being a string (that would have been the text
1924content of the C<XMTok>), and the C<%attributes> that would have been the token's attributes.
1925
1926=item C<pmml_infix($op,@args)>, C<pmml_script($op,@args)>, C<pmml_bigop($op,@args)>
1927
1928These are C<Apply> handlers, for handling general infix, sub or superscript,
1929or bigop (eg. summations) constructs.  They are called with the operator
1930token, followed by the arguments; all are C<XMath> elements.
1931
1932=item C<pmml_row(@items)>
1933
1934This wraps an C<m:mrow> around the already converted C<@items> if need;
1935That is, if there is only a single item it is returned without the C<m:mrow>.
1936
1937=item C<pmml_unrow($pmml)>
1938
1939This perverse utility takes something that has already been converted
1940to Presentation MathML.  If the argument is an C<m:mrow>, it returns a list of the
1941mathml elements within that row, otherwise it returns a list containing
1942the single element C<$pmml>.
1943
1944=item C<pmml_parenthesize($item,$open,$close)>
1945
1946This utility parenthesizes the (already converted MathML) C<$item> with the string delimiters
1947C<$open> and C<$close>.  These are converted to an C<m:mrow> with C<m:mo> for the fences,
1948unless the C<usemfenced> switch is set, in which case C<m:mfenced> is used.
1949
1950=item C<pmml_punctuate($separators,@items) >
1951
1952This utility creates an C<m:mrow> by interjecting the punctuation
1953between suceessive items in the list of already converted C<@items>.
1954If there are more than one character in C<$separators> the first
1955is used between the first pair, the next between the next pair;
1956if the separators is exhausted, the last is repeated between remaining pairs.
1957C<$separators> defaults to (repeated) comma.
1958
1959=back
1960
1961=head2 Content Conversion Utilties
1962
1963=over
1964
1965=item C<$mmlpost->cmml_top($node); >
1966
1967This is the top-level converter applied to an C<XMath> node.
1968It establishes a local context for font, style, size, etc (were it needed).
1969It generally does the bulk of the work for a ContentMathML's C<translateNode>,
1970although the latter wraps the actual C<m:math> element around it.
1971
1972=item C<cmml($node)>
1973
1974Converts the C<XMath> C<$node> to Content MathML.
1975
1976=item C<cmml_leaf($token)>
1977
1978Converts the C<XMath> token to an C<m:ci>, C<m:cn> or C<m:csymbol>, under appropriate circumstances.
1979
1980=item C<cmml_decoratedSymbol($item)>
1981
1982Similar to C<cmml_leaf>, but used when an operator is itself, apparently, an application.
1983This converts C<$item> to Presentation MathML to use for the content of the C<m:ci>.
1984
1985=item C<cmml_not($arg)>
1986
1987Construct the not of the argument C<$arg>.
1988
1989=item C<cmml_synth_not($op,@args)>
1990
1991Synthesize an operator by applying C<m:not> to another operator (C<$op>) applied to its C<@args>
1992(C<XMath> elements that will be converted to Content MathML).
1993This is useful to define a handler for, eg., c<not-approximately-equals> in terms
1994of c<m:approx>.
1995
1996=item C<cmml_synth_complement($op,@args)>
1997
1998Synthesize an operator by applying a complementary operator (C<$op>) to the reverse of its C<@args>
1999(C<XMath> elements that will be converted to Content MathML).
2000This is useful to define a handler for, eg. C<superset-of-or-equals> using C<m:subset>.
2001
2002=item C<cmml_or_compose($operators,@args)>
2003
2004Synthesize an operator that stands for the C<or> of several other operators
2005(eg. c<less-than-or-similar-to-or-approximately-equals>) by composing it
2006of the C<m:or> of applying each of C<m:less> and C<m:approx> to the arguments.
2007The first operator is applied to the converted arguments, while the rest
2008are applied to C<m:share> elements referring to the previous ones.
2009
2010=item C<cmml_share($node)>
2011
2012Converts the C<XMath> C<$node> to Content MathML, after assuring that it has an id,
2013so that it can be shared.
2014
2015=item C<cmml_shared($node)>
2016
2017Generates a C<m:share> element referting to C<$node>, which should have
2018an id (such as after calling C<cmml_share>).
2019
2020=back
2021
2022=head1 Math Processors, Generally.
2023
2024We should probably formalize the idea of a Math Processor as an
2025abstract class, but let this description provide a starting overview.
2026A MathProcessor follows the API of C<LaTeXML::Post> processors, by
2027handling C<process>, which invokes C<processNode> on all C<Math> nodes;
2028That latter inserts the result of either C<translateNode> or
2029C<translateParallel>, applied to the C<XMath> representation, into the C<Math> node.
2030
2031Parallel translation is done whenever additional MathProcessors have
2032been specified, via the C<setParallel> method; these are simply other
2033MathProcessors following the same API.
2034
2035=cut
2036