1# /=====================================================================\ #
2# |  LaTeXML::Common::Config                                            | #
3# | Configuration logic for LaTeXML                                     | #
4# |=====================================================================| #
5# | Part of LaTeXML:                                                    | #
6# |  Public domain software, produced as part of work done by the       | #
7# |  United States Government & not subject to copyright in the US.     | #
8# |---------------------------------------------------------------------| #
9# | Bruce Miller <bruce.miller@nist.gov>                                | #
10# | Deyan Ginev <deyan.ginev@nist.gov>                          #_#     | #
11# | http://dlmf.nist.gov/LaTeXML/                              (o o)    | #
12# \=========================================================ooo==U==ooo=/ #
13package LaTeXML::Common::Config;
14use strict;
15use warnings;
16use Carp;
17use Getopt::Long qw(:config no_ignore_case);
18use Pod::Usage;
19use Pod::Find qw(pod_where);
20use LaTeXML::Util::Pathname;
21use LaTeXML::Global;
22use LaTeXML::Common::Error;
23use Data::Dumper;
24our $PROFILES_DB = {};    # Class-wide, caches all profiles that get used while the server is alive
25our $is_bibtex   = qr/(^literal\:\s*\@)|(\.bib$)/;
26our $is_archive  = qr/(^literal\:PK)|(\.zip$)/;
27
28use base qw(Exporter);
29our @EXPORT = (qw(addMathFormat removeMathFormat maybeAddMathFormat));
30
31sub new {
32  my ($class, %opts) = @_;
33  #TODO: How about defaults in the daemon server use case? Should we support those here?
34  #      or are defaults always bad/confusing to allow?
35  %opts = () unless %opts;
36  return bless { dirty => 1, opts => \%opts }, $class; }
37
38###########################################
39#### Command-line reader              #####
40###########################################
41sub getopt_specification {
42  my (%options) = @_;
43  my $opts      = $options{options} || {};
44  my $spec      = {
45    # Basics and Paths
46    "output=s"        => \$$opts{destination},
47    "destination=s"   => \$$opts{destination},
48    "log=s"           => \$$opts{log},
49    "preload=s"       => \@{ $$opts{preload} },
50    "preamble=s"      => \$$opts{preamble},
51    "postamble=s"     => \$$opts{postamble},
52    "base=s"          => \$$opts{base},
53    "path=s"          => \@{ $$opts{paths} },
54    "quiet"           => sub { $$opts{verbosity}--; },
55    "verbose"         => sub { $$opts{verbosity}++; },
56    "strict"          => \$$opts{strict},
57    "includestyles"   => \$$opts{includestyles},
58    "inputencoding=s" => \$$opts{inputencoding},
59    # Formats
60    "xml"      => sub { $$opts{format}    = 'xml'; },
61    "tex"      => sub { $$opts{format}    = 'tex'; },
62    "box"      => sub { $$opts{format}    = 'box'; },
63    "bibtex"   => sub { $$opts{type}      = 'BibTeX'; },
64    "noparse"  => sub { $$opts{mathparse} = 'no'; },
65    "format=s" => \$$opts{format},
66    "parse=s"  => \$$opts{mathparse},
67    # Profiles
68    "profile=s"   => \$$opts{profile},
69    "cache_key=s" => \$$opts{cache_key},
70    "mode=s"      => \$$opts{profile},
71    "source=s"    => \$$opts{source},
72    # Output framing
73    "embed"      => sub { $$opts{whatsout} = 'fragment'; },
74    "whatsin=s"  => \$$opts{whatsin},
75    "whatsout=s" => \$$opts{whatsout},
76    # Daemon options
77    "autoflush=i" => \$$opts{input_limit},
78    "timeout=i"   => \$$opts{timeout},
79    "expire=i"    => \$$opts{expire},
80    "address=s"   => \$$opts{address},
81    "port=i"      => \$$opts{port},
82    # Post-processing
83    "post!"           => \$$opts{post},
84    "validate!"       => \$$opts{validate},
85    "omitdoctype!"    => \$$opts{omitdoctype},
86    "numbersections!" => \$$opts{numbersections},
87    "timestamp=s"     => \$$opts{timestamp},
88    # Various choices for math processing.
89    # Note: Could want OM embedded in mml annotation, too.
90    # In general, could(?) want multiple math reps within <Math>
91    # OR, multiple math reps combined with <mml:sematics>
92    #   or, in fact, _other_ parallel means? (om?, omdoc? ...)
93    # So, need to separate multiple transformations from the combination.
94    # However, IF combining, then will need to support a id/ref mechanism.
95    "mathimagemagnification=f"    => \$$opts{mathimagemag},
96    "linelength=i"                => \$$opts{linelength},
97    "plane1!"                     => \$$opts{plane1},
98    "hackplane1!"                 => \$$opts{hackplane1},
99    "mathimages"                  => sub { addMathFormat($opts, 'images'); },
100    "nomathimages"                => sub { removeMathFormat($opts, 'images'); },
101    "mathsvg"                     => sub { addMathFormat($opts, 'svg'); },
102    "nomathsvg"                   => sub { removeMathFormat($opts, 'svg'); },
103    "presentationmathml|pmml"     => sub { addMathFormat($opts, 'pmml'); },
104    "contentmathml|cmml"          => sub { addMathFormat($opts, 'cmml'); },
105    "openmath|om"                 => sub { addMathFormat($opts, 'om'); },
106    "keepXMath|xmath"             => sub { addMathFormat($opts, 'xmath'); },
107    "nopresentationmathml|nopmml" => sub { removeMathFormat($opts, 'pmml'); },
108    "nocontentmathml|nocmml"      => sub { removeMathFormat($opts, 'cmml'); },
109    "noopenmath|noom"             => sub { removeMathFormat($opts, 'om'); },
110    "nokeepXMath|noxmath"         => sub { removeMathFormat($opts, 'xmath'); },
111    "mathtex"                     => sub { addMathFormat($opts, 'mathtex'); },
112    "nomathtex"                   => sub { removeMathFormat($opts, 'mathtex'); },
113    "parallelmath!"               => \$$opts{parallelmath},
114    # Some general XSLT/CSS/JavaScript options.
115    "stylesheet=s"      => \$$opts{stylesheet},
116    "xsltparameter=s"   => \@{ $$opts{xsltparameters} },
117    "css=s"             => \@{ $$opts{css} },
118    "defaultresources!" => \$$opts{defaultresources},
119    "javascript=s"      => \@{ $$opts{javascript} },
120    "icon=s"            => \$$opts{icon},
121    # Options for broader document set processing
122    "split!"    => \$$opts{split},
123    "splitat=s" => sub { $$opts{splitat} = $_[1];
124      $$opts{split} = 1 unless defined $$opts{split}; },
125    "splitpath=s" => sub { $$opts{splitpath} = $_[1];
126      $$opts{split} = 1 unless defined $$opts{split}; },
127    "splitnaming=s" => sub { $$opts{splitnaming} = $_[1];
128      $$opts{split} = 1 unless defined $$opts{split}; },
129    "scan!"           => \$$opts{scan},
130    "crossref!"       => \$$opts{crossref},
131    "urlstyle=s"      => \$$opts{urlstyle},
132    "navigationtoc=s" => \$$opts{navtoc},
133    "navtoc=s"        => \$$opts{navtoc},
134    # Generating indices
135    "index!"         => \$$opts{index},
136    "permutedindex!" => \$$opts{permutedindex},
137    "splitindex!"    => \$$opts{splitindex},
138    # Generating Bibliographies
139    "bibliography=s"     => \@{ $$opts{bibliographies} },    # TODO: Document
140    "splitbibliography!" => \$$opts{splitbibliography},
141    # Options for two phase processing
142    "prescan"           => \$$opts{prescan},
143    "dbfile=s"          => \$$opts{dbfile},
144    "sitedirectory=s"   => \$$opts{sitedirectory},
145    "sourcedirectory=s" => \$$opts{sourcedirectory},
146    # For graphics: vaguely similar issues, but more limited.
147    # includegraphics images (eg. ps) can be converted to webimages (eg.png)
148    # picture/pstricks images can be converted to png or possibly svg.
149    "graphicimages!" => \$$opts{dographics},
150    "graphicsmap=s"  => \@{ $$opts{graphicsmaps} },
151    "svg!"           => \$$opts{svg},
152    "pictureimages!" => \$$opts{picimages},
153    # HELP
154    "comments!"    => \$$opts{comments},
155    "VERSION!"     => \$$opts{showversion},
156    "debug=s"      => \@{ $$opts{debug} },
157    "documentid=s" => \$$opts{documentid},
158    "help"         => \$$opts{help}
159  };
160  return ($spec, $opts) unless ($options{type} && ($options{type} eq 'keyvals'));
161  # Representation use case:
162  my $keyvals  = $options{keyvals} || [];
163  my $rep_spec = {};                        # Representation specification
164  foreach my $key (keys %$spec) {
165    if ($key =~ /^(.+)=\w$/) {
166      my $name = $1;
167      $$rep_spec{$key} = sub { CORE::push @$keyvals, [$name, $_[1]] };
168    } else {
169      $$rep_spec{$key} = sub {
170        my $ctl  = $_[0]->{ctl};
171        my $used = ($$ctl[0] ? 'no' : '') . $$ctl[1];
172        CORE::push @$keyvals, [$used, undef] };
173    }
174  }
175  return ($rep_spec, $keyvals);
176}
177# TODO: Separate the keyvals scan from getopt_specification()
178#       into its own sub, using @GETOPT_KEYS entirely.
179our @GETOPT_KEYS = keys %{ (getopt_specification())[0] };
180
181sub read {
182  my ($self, $argref, %read_options) = @_;
183  my $opts = $$self{opts};
184  local @ARGV = @$argref;
185  my ($spec)             = getopt_specification(options => $opts);
186  my $silent             = %read_options && $read_options{silent};
187  my $getOptions_success = GetOptions(%{$spec});
188  if (!$getOptions_success && !$silent) {
189    pod2usage(-message => $LaTeXML::IDENTITY, -exitval => 1, -verbose => 99,
190      -input    => pod_where({ -inc => 1 }, __PACKAGE__),
191      -sections => 'OPTION SYNOPSIS', -output => \*STDERR);
192  }
193  if (!$silent && $$opts{help}) {
194    pod2usage(-message => $LaTeXML::IDENTITY, -exitval => 0, -verbose => 99,
195      -input    => pod_where({ -inc => 1 }, __PACKAGE__),
196      -sections => 'OPTION SYNOPSIS', output => \*STDOUT);
197  }
198
199  # Check that options for system I/O (destination and log) are valid before wasting any time...
200  foreach my $IO_option (qw(destination log)) {
201    if ($$opts{$IO_option}) {
202      $$opts{$IO_option} = pathname_canonical($$opts{$IO_option});
203      if (my $dir = pathname_directory($$opts{$IO_option})) {
204        pathname_mkdir($dir) or croak "Couldn't create $IO_option directory $dir: $!"; } } }
205  # Removed math formats are irrelevant for conversion:
206  delete $$opts{removed_math_formats};
207
208  if ($$opts{showversion}) { print STDERR "$LaTeXML::IDENTITY\n"; exit(0); }
209
210  $$opts{source} = $ARGV[0] unless $$opts{source};
211  # Special source-based guessing needs to happen here,
212  #   as we won't have access to the source file/literal/resource later on:
213  if (!$$opts{type} || ($$opts{type} eq 'auto')) {
214    $$opts{type} = 'BibTeX' if ($$opts{source} && ($$opts{source} =~ /$is_bibtex/)); }
215  if (!$$opts{whatsin}) {
216    $$opts{whatsin} = 'archive' if ($$opts{source} && ($$opts{source} =~ /$is_archive/)); }
217  return $getOptions_success;
218}
219
220sub read_keyvals {
221  my ($self, $conversion_options, %read_options) = @_;
222  my $cmdopts = [];
223  while (my ($key, $value) = splice(@$conversion_options, 0, 2)) {
224    # TODO: Is skipping over empty values ever harmful? Do we have non-empty defaults anywhere?
225    next if (!length($value)) && (grep { /^$key\=/ } @GETOPT_KEYS);
226    $key   = "--$key" unless $key =~ /^\-\-/;
227    $value = length($value) ? "=$value" : '';
228    CORE::push @$cmdopts, "$key$value";
229  }
230  # Read into a Config object:
231  return $self->read($cmdopts, %read_options); }
232
233sub scan_to_keyvals {
234  my ($self, $argref, %read_options) = @_;
235  local @ARGV = @$argref;
236  my ($spec, $keyvals) = getopt_specification(type => 'keyvals');
237  my $silent             = %read_options && $read_options{silent};
238  my $getOptions_success = GetOptions(%$spec);
239  if (!$getOptions_success && !$silent) {
240    pod2usage(-message => $LaTeXML::IDENTITY, -exitval => 1, -verbose => 99,
241      -input    => pod_where({ -inc => 1 }, __PACKAGE__),
242      -sections => 'OPTION SYNOPSIS', -output => \*STDERR);
243  }
244  CORE::push @$keyvals, ['source', $ARGV[0]] if $ARGV[0];
245  return $getOptions_success && $keyvals;
246}
247
248###########################################
249#### Options Object Hashlike API      #####
250###########################################
251sub get {
252  my ($self, $key, $value) = @_;
253  return $$self{opts}{$key}; }
254
255sub set {
256  my ($self, $key, $value) = @_;
257  $$self{dirty} = 1;
258  $$self{opts}{$key} = $value;
259  return; }
260
261sub push {
262  my ($self, $key, $value) = @_;
263  $$self{dirty} = 1;
264  $$self{opts}{$key} = [] unless ref $$self{opts}{$key};
265  CORE::push @{ $$self{opts}{$key} }, $value;
266  return; }
267
268sub delete {
269  my ($self, $key) = @_;
270  $$self{dirty} = 1;
271  delete $$self{opts}{$key};
272  return; }
273
274sub exists {
275  my ($self, $key) = @_;
276  return exists $$self{opts}{$key}; }
277
278sub defined {
279  my ($self, $key) = @_;
280  return defined $$self{opts}{$key}; }
281
282sub keys {
283  my ($self) = @_;
284  return keys %{ $$self{opts} }; }
285
286sub options {
287  my ($self) = @_;
288  return $$self{opts}; }
289
290sub clone {
291  my ($self) = @_;
292  my $clone = LaTeXML::Common::Config->new(%{ $self->options });
293  $$clone{dirty} = $$self{dirty};
294  return $clone; }
295
296###########################################
297#### Option Sanity Checking           #####
298###########################################
299
300# Perform all option sanity checks
301sub check {
302  my ($self) = @_;
303  return unless $$self{dirty};
304  # 1. Resolve profile
305  $self->_obey_profile;
306  # 2. Place sane defaults where needed
307  return $self->_prepare_options; }
308
309sub _obey_profile {
310  my ($self) = @_;
311  $$self{dirty} = 1;
312  my $opts    = $$self{opts};
313  my $profile = lc($$opts{profile} || 'custom');
314  $profile =~ s/\.opt$//;
315  # Look at the PROFILES_DB or find a profiles file (otherwise fallback to custom)
316  my $profile_opts = {};
317  if ($profile ne 'custom') {
318    if (defined $$PROFILES_DB{$profile}) {
319      %$profile_opts = %{ $$PROFILES_DB{$profile} };
320    } elsif (my $file = pathname_find($profile . '.opt', paths => $$opts{paths},
321        types => [], installation_subdir => 'resources/Profiles')) {
322      my $conf_tmp = LaTeXML::Common::Config->new;
323      $conf_tmp->read(_read_options_file($file));
324      $profile_opts = $conf_tmp->options;
325    } else {
326      # Throw an error, fallback to custom
327      carp("Warning:unexpected:$profile Profile $profile was not recognized, reverting to 'custom'\n");
328      $$opts{profile} = 'custom';
329      $profile = 'custom';
330    }
331  }
332  # Erase the profile, save it as cache key
333  delete $$opts{profile};
334  $$opts{cache_key} = $profile unless defined $$opts{cache_key};
335  if (%$profile_opts) {
336    # Merge the new options with the profile defaults:
337    for my $key (grep { defined $$opts{$_} } (CORE::keys %$opts)) {
338      if ($key =~ /^p(ath|reload)/) {    # Paths and preloads get merged in
339        $$profile_opts{$key} = [] unless defined $$profile_opts{$key};
340        foreach my $entry (@{ $$opts{$key} }) {
341          my $new = 1;
342          foreach (@{ $$profile_opts{$key} }) {
343            if ($entry eq $_) { $new = 0; last; }
344          }
345          # If new to the array, push:
346          CORE::push(@{ $$profile_opts{$key} }, $entry) if ($new);
347        }
348      } else {                           # The other options get overwritten
349        $$profile_opts{$key} = $$opts{$key};
350      }
351    }
352    %$opts = %$profile_opts;             # Move back into the user options
353  }
354  return; }
355
356# TODO: Best way to throw errors when options don't work out?
357#       How about in the case of Extras::ReadOptions?
358#       Error() and Warn() would be neat, but we have to make sure STDERR is caught beforehand.
359#       Also, there is no eval() here, so we might need a softer handling of Error()s.
360sub _prepare_options {
361  my ($self) = @_;
362  my $opts = $$self{opts};
363  #======================================================================
364  # I. Sanity check and Completion of Core options.
365  #======================================================================
366  # "safe" and semi-perlcrtic acceptable way to set DEBUG inside arbitrary modules.
367  # Note: 'LaTeXML' refers to the top-level class
368  foreach my $ltx_class (@{ $$opts{debug} || [] }) {
369    if ($ltx_class eq 'LaTeXML') {
370      $LaTeXML::DEBUG{LaTeXML} = 1; }
371    else {
372      $LaTeXML::DEBUG{$ltx_class} = 1; } }
373
374  $$opts{input_limit}   = 100          unless defined $$opts{input_limit}; # 100 jobs until restart
375  $$opts{timeout}       = 600          unless defined $$opts{timeout};     # 10 minute timeout default
376  $$opts{expire}        = 600          unless defined $$opts{expire};      # 10 minute timeout default
377  $$opts{mathparse}     = 'RecDescent' unless defined $$opts{mathparse};
378  $$opts{inputencoding} = "utf-8"      unless defined $$opts{inputencoding};
379  if ($$opts{mathparse} eq 'no') {
380    $$opts{mathparse}   = 0;
381    $$opts{nomathparse} = 1; }                                             #Backwards compatible
382##  $$opts{verbosity} = 0     unless defined $$opts{verbosity};
383  $$opts{preload} = []    unless defined $$opts{preload};
384  $$opts{paths}   = ['.'] unless defined $$opts{paths};
385  @{ $$opts{paths} } = map { pathname_canonical($_) } @{ $$opts{paths} };
386  foreach (('destination', 'dbfile', 'sourcedirectory', 'sitedirectory')) {
387    $$opts{$_} = pathname_canonical($$opts{$_}) if defined $$opts{$_};
388  }
389
390  if (!defined $$opts{whatsin}) {
391    if ($$opts{preamble} || $$opts{postamble}) {
392      # Preamble or postamble imply a fragment whatsin
393      $$opts{whatsin} = 'fragment'; }
394    else {    # Default input chunk is a document
395      $$opts{whatsin} = 'document'; } }
396  $$opts{type} = 'auto' unless defined $$opts{type};
397  unshift(@{ $$opts{preload} }, ('TeX.pool', 'LaTeX.pool', 'BibTeX.pool')) if ($$opts{type} eq 'BibTeX');
398
399  # Destination extension might indicate the format:
400  if ((!defined $$opts{extension}) && (defined $$opts{destination})) {
401    if ($$opts{destination} =~ /\.([^.]+)$/) {
402      $$opts{extension} = $1; } }
403  if ((!defined $$opts{format}) && (defined $$opts{extension})) {
404    $$opts{format} = $$opts{extension}; }
405  if ((!defined $$opts{extension}) && (defined $$opts{format})) {
406    if ($$opts{format} =~ /^html/) {
407      $$opts{extension} = 'html'; }
408    elsif ($$opts{format} =~ /^xhtml/) {
409      $$opts{extension} = 'xhtml'; }
410    else {
411      $$opts{extension} = 'xml'; } }
412  if (!defined $$opts{whatsout}) {
413    if ((defined $$opts{extension}) && ($$opts{extension} eq 'zip')) {
414      $$opts{whatsout} = 'archive';
415    } else {
416      $$opts{whatsout} = 'document';
417  } }
418  if ($$opts{format}) {
419    # Lower-case for sanity's sake
420    $$opts{format} = lc($$opts{format});
421    if ($$opts{format} eq 'zip') {
422      # Not encouraged! But try to produce something sensible anyway...
423      $$opts{format}   = 'html5';
424      $$opts{whatsout} = 'archive'; }
425    else {    # Default HTML is 5
426      $$opts{format} = 'html5' if $$opts{format} eq 'html'; }
427
428    $$opts{is_html}  = ($$opts{format} =~ /^html/);
429    $$opts{is_xhtml} = ($$opts{format} =~ /^(xhtml5?|epub|mobi)$/);
430    $$opts{whatsout} = 'archive' if (($$opts{format} eq 'epub') || ($$opts{format} eq 'mobi'));
431  } else {
432    $$opts{format} = 'xml'    # We failed to guess format in any-which-way, so XML is default
433  }
434  #======================================================================
435  # II. Sanity check and Completion of Post options.
436  #======================================================================
437  # Any post switch implies post (TODO: whew, lots of those, add them all!):
438  $$opts{math_formats} = [] unless defined $$opts{math_formats};
439  $$opts{post}         = 1 if ((!defined $$opts{post}) &&
440    (scalar(@{ $$opts{math_formats} })
441      || ($$opts{stylesheet})
442      || $$opts{is_html}
443      || $$opts{is_xhtml}
444      || (($$opts{format} || '') eq 'jats')
445      || ($$opts{whatsout} && ($$opts{whatsout} ne 'document'))
446    )
447  );
448# || ... || ... || ...
449# $$opts{post}=0 if (defined $$opts{mathparse} && (! $$opts{mathparse})); # No-parse overrides post-processing
450  if ($$opts{post}) {    # No need to bother if we're not post-processing
451                         # Default: scan and crossref on, other advanced off
452    $$opts{prescan}       = undef unless defined $$opts{prescan};
453    $$opts{dbfile}        = undef unless defined $$opts{dbfile};
454    $$opts{scan}          = 1     unless defined $$opts{scan};
455    $$opts{index}         = 1     unless defined $$opts{index};
456    $$opts{crossref}      = 1     unless defined $$opts{crossref};
457    $$opts{sitedirectory} = defined $$opts{sitedirectory} ? $$opts{sitedirectory}
458      : (defined $$opts{destination} ? pathname_directory($$opts{destination})
459      : (defined $$opts{dbfile} ? pathname_directory($$opts{dbfile})
460        : "."));
461    $$opts{sourcedirectory} = undef unless defined $$opts{sourcedirectory};
462    $$opts{numbersections}  = 1     unless defined $$opts{numbersections};
463    $$opts{navtoc}          = undef unless defined $$opts{numbersections};
464    $$opts{navtocstyles} = { context => 1, normal => 1, none => 1 } unless defined $$opts{navtocstyles};
465    $$opts{navtoc}       = lc($$opts{navtoc}) if defined $$opts{navtoc};
466    delete $$opts{navtoc} if ($$opts{navtoc} && ($$opts{navtoc} eq 'none'));
467
468    if ($$opts{navtoc}) {
469      if (!$$opts{navtocstyles}->{ $$opts{navtoc} }) {
470        croak($$opts{navtoc} . " is not a recognized style of navigation TOC"); }
471      if (!$$opts{crossref}) {
472        croak("Cannot use option \"navigationtoc\" (" . $$opts{navtoc} . ") without \"crossref\""); } }
473    $$opts{urlstyle}       = 'server' unless defined $$opts{urlstyle};
474    $$opts{bibliographies} = []       unless defined $$opts{bibliographies};
475
476    # Validation:
477    $$opts{validate} = 1 unless defined $$opts{validate};
478    # Graphics:
479    $$opts{mathimagemag} = 1.75 unless defined $$opts{mathimagemag};
480    if ((defined $$opts{destination}) || ($$opts{whatsout} =~ /^archive/)) {
481      # We want the graphics enabled by default, but only when we have a destination
482      $$opts{dographics} = 1 unless defined $$opts{dographics};
483      $$opts{picimages}  = 1 if (($$opts{format} eq "html4") || ($$opts{format} eq "jats"))
484        && !defined $$opts{picimages};
485    }
486    # Split sanity:
487    if ($$opts{split}) {
488      $$opts{splitat}        = 'section' unless defined $$opts{splitat};
489      $$opts{splitnaming}    = 'id'      unless defined $$opts{splitnaming};
490      $$opts{splitancestors} = {
491        part          => [qw()],
492        chapter       => [qw(part)],
493        section       => [qw(part chapter)],
494        subsection    => [qw(part chapter section)],
495        subsubsection => [qw(part chapter section subsection)] };
496      $$opts{splitback} = [qw(bibliography appendix index)];
497
498      $$opts{splitnaming} = _checkOptionValue('--splitnaming', $$opts{splitnaming},
499        qw(id idrelative label labelrelative));
500      $$opts{splitat} = _checkOptionValue('--splitat', $$opts{splitat}, CORE::keys %{ $$opts{splitancestors} });
501      $$opts{splitpath} = make_splitpaths($opts, $$opts{splitat}) unless defined $$opts{splitpath}; }
502    # Check for appropriate combination of split, scan, prescan, dbfile, crossref
503    if ($$opts{split} && (!defined $$opts{destination}) && ($$opts{whatsout} !~ /^archive/)) {
504      croak("Must supply --destination when using --split"); }
505    if ($$opts{prescan} && !$$opts{scan}) {
506      croak("Makes no sense to --prescan with scanning disabled (--noscan)"); }
507    if ($$opts{prescan} && (!defined $$opts{dbfile})) {
508      croak("Cannot prescan documents (--prescan) without specifying --dbfile"); }
509    if (!$$opts{prescan} && $$opts{crossref} && !($$opts{scan} || (defined $$opts{dbfile}))) {
510      croak("Cannot cross-reference (--crossref) without --scan or --dbfile "); }
511    if ($$opts{crossref}) {
512      $$opts{urlstyle} = _checkOptionValue('--urlstyle', $$opts{urlstyle}, qw(server negotiated file)); }
513    if (($$opts{permutedindex} || $$opts{splitindex}) && (!defined $$opts{index})) {
514      $$opts{index} = 1; }
515    if (!$$opts{prescan} && $$opts{index} && !($$opts{scan} || defined $$opts{crossref})) {
516      croak("Cannot generate index (--index) without --scan or --dbfile"); }
517    if (!$$opts{prescan} && @{ $$opts{bibliographies} } && !($$opts{scan} || defined $$opts{crossref})) {
518      croak("Cannot generate bibliography (--bibliography) without --scan or --dbfile"); }
519
520    # There is now a legitimate case to preserve graphics here.
521    # if ((!defined $$opts{destination}) && ($$opts{whatsout} !~ /^archive/)
522    #   && (_checkMathFormat($opts, 'images') || _checkMathFormat($opts, 'svg')
523    #     || $$opts{dographics} || $$opts{picimages})) {
524    #   croak("Must supply --destination unless all auxilliary file writing is disabled"
525    #       . "(--nomathimages --nomathsvg --nographicimages --nopictureimages --nodefaultcss)"); }
526
527    # Format:
528    #Default is XHTML, XML otherwise (TODO: Expand)
529    if (!defined $$opts{format}) {
530      if   ($$opts{stylesheet}) { $$opts{format} = "xml"; }
531      else                      { $$opts{format} = "xhtml"; }
532    }
533
534    if (!$$opts{stylesheet}) {
535      if    ($$opts{format} eq 'xhtml')       { $$opts{stylesheet} = "LaTeXML-xhtml.xsl"; }
536      elsif ($$opts{format} eq "html4")       { $$opts{stylesheet} = "LaTeXML-html4.xsl"; }
537      elsif ($$opts{format} =~ /^epub|mobi$/) { $$opts{stylesheet} = "LaTeXML-epub3.xsl"; }
538      elsif ($$opts{format} eq "html5")       { $$opts{stylesheet} = "LaTeXML-html5.xsl"; }
539      elsif ($$opts{format} eq "jats")        { $$opts{stylesheet} = "LaTeXML-jats.xsl"; }
540      elsif ($$opts{format} eq "xml")         { delete $$opts{stylesheet}; }
541      else                                    { croak("Unrecognized target format: " . $$opts{format}); }
542    }
543    # Check format and complete math and image options
544    if ($$opts{format} eq 'html4') {
545      $$opts{svg} = 0 unless defined $$opts{svg};    # No SVG by default in HTML.
546      croak("Default html4 stylesheet only supports math images, not " . join(', ', @{ $$opts{math_formats} }))
547        if (!defined $$opts{stylesheet})
548        && scalar(grep { $_ ne 'images' } @{ $$opts{math_formats} });
549      croak("Default html stylesheet does not support svg") if $$opts{svg};
550      $$opts{math_formats} = [];
551      maybeAddMathFormat($opts, 'images');
552    }
553    $$opts{svg} = 1 unless defined $$opts{svg};    # If we're not making HTML, SVG is on by default
554        # PMML default if we're HTMLy and all else fails and no mathimages:
555    if (((!defined $$opts{math_formats}) || (!scalar(@{ $$opts{math_formats} })))
556      && ($$opts{is_html} || $$opts{is_xhtml} || ($$opts{format} eq 'jats'))) {
557      CORE::push @{ $$opts{math_formats} }, 'pmml';
558    }
559    # use parallel markup if there are multiple formats requested.
560    $$opts{parallelmath} = 1 if ($$opts{math_formats} && (@{ $$opts{math_formats} } > 1));
561  }
562  # If really nothing hints to define format, then default it to XML
563  $$opts{format} = 'xml' unless defined $$opts{format};
564  $$self{dirty}  = 0;
565  return; }
566
567## Public Utilities:
568
569sub addMathFormat {
570  my ($opts, $fmt) = @_;
571  $$opts{math_formats} = [] unless defined $$opts{math_formats};
572  CORE::push(@{ $$opts{math_formats} }, $fmt)
573    unless (grep { $_ eq $fmt } @{ $$opts{math_formats} }) || $$opts{removed_math_formats}->{$fmt};
574  return; }
575
576sub removeMathFormat {
577  my ($opts, $fmt) = @_;
578  @{ $$opts{math_formats} } = grep { $_ ne $fmt } @{ $$opts{math_formats} };
579  $$opts{removed_math_formats}->{$fmt} = 1;
580  return; }
581
582# Add a default math format, when no math formatter is requested, unless specifically forbidden
583sub maybeAddMathFormat {
584  my ($opts, $fmt) = @_;
585  unshift(@{ $$opts{math_formats} }, $fmt)
586    unless @{ $$opts{math_formats} } || $$opts{removed_math_formats}{$fmt};
587  return; }
588
589sub _checkMathFormat {
590  my ($opts, $fmt) = @_;
591  return grep { $_ eq $fmt } @{ $$opts{math_formats} }; }
592
593## Utilities:
594
595sub _checkOptionValue {
596  my ($option, $value, @choices) = @_;
597  if ($value) {
598    foreach my $choice (@choices) {
599      return $choice if substr($choice, 0, length($value)) eq $value; } }
600  croak("Value for $option, $value, doesn't match " . join(', ', @choices)); }
601
602# Contrived xpath, since backmatter can now be at any level!
603sub make_splitpaths {
604  my ($opts, $splitat) = @_;
605  my @paths = ();
606  my $anc   = $$opts{splitancestors}{$splitat};
607  foreach my $unit ($splitat, ($anc ? @$anc : ())) {
608    CORE::push(@paths, "//ltx:$unit");
609    foreach my $back (@{ $$opts{splitback} }) {
610      CORE::push(@paths, "//ltx:$back\["
611          . join(' or ', "preceding-sibling::ltx:$unit",
612          map { "parent::ltx:$_"; } @{ $$opts{splitancestors}{$unit} })
613          . "]"); } }
614  return join(' | ', @paths); }
615
616### This is from t/lib/TestDaemon.pm and ideally belongs in Util::Pathname
617sub _read_options_file {
618  my ($file) = @_;
619  my $opts = [];
620  my $OPT;
621#### Now can we report status to right places before we've gotten configuration??? (verbosity, logfile...)
622####  ProgressSpinup("Loading profile $file");
623  unless (open($OPT, "<", $file)) {
624    Error('expected', $file, "Could not open options file '$file'");
625    return; }
626  while (my $line = <$OPT>) {
627    # Cleanup comments, padding on the input line.
628    $line =~ s/(?<!\\)#.*$//;    # Strip trailing comments starting w/ # (but \# is quoted)
629    $line =~ s/\\#/#/g;          # unslashify any \#
630    $line =~ s/^\s+//;           # Trim leading & trailing whitespace
631    $line =~ s/\s+$//;
632    next unless $line;           # if line isn't empty, after that.....
633    chomp($line);
634    if ($line =~ /(\S+)\s*=\s*(.*)/) {
635      my ($key, $value) = ($1, $2 || '');
636      $value =~ s/\s+$//;
637      # Special treatment for --path=$env:
638      if ($value =~ /^\$(.+)$/) {
639        my @values   = ();
640        my $env_name = $1;
641        my $env_value;
642        # Allow $env/foo paths, starting with $env prefixes
643        if ($env_name =~ /^([^\/]+)(\/+)(.+)$/) {
644          my $trailer = $3;
645          if (my $env_path = $ENV{$1}) {
646            $env_path .= '/' unless $env_path =~ /\/$/;
647            CORE::push @values, $env_path . $trailer; } }
648        else {
649          # But also the standard behaviour, where the $env is an array of paths
650          $env_value = $ENV{$env_name};
651          next unless $env_value;
652          @values = grep { -d $_ } reverse(split(':', $env_value));
653          next unless @values; }
654        CORE::push(@$opts, "--$key=$_") foreach (@values); }
655      else {
656        $value = $value ? "=$value" : '';
657        CORE::push @$opts, "--$key" . $value; } }
658    else {
659      Warning('unexpected', $line, undef,
660        "Unrecognized configuration data '$line'"); }
661  }
662  close $OPT;
663####  ProgressSpindown("Loading profile $file");
664  return $opts; }
665
6661;
667
668__END__
669
670=pod
671
672=head1 NAME
673
674C<LaTeXML::Common::Config> - Configuration logic for LaTeXML
675
676=head1 SYNPOSIS
677
678    use LaTeXML::Common::Config;
679    my $config = LaTeXML::Common::Config->new(
680              profile=>'name',
681              timeout=>60,
682              ... );
683    $config->read(\@ARGV);
684    $config->check;
685
686    my $value = $config->get($name);
687    $config->set($name,$value);
688    $config->delete($name);
689    my $bool = $config->exists($name);
690    my @keys = $config->keys;
691    my $options_hashref = $config->options;
692    my $config_clone = $config->clone;
693
694=head1 DESCRIPTION
695
696Configuration management class for LaTeXML options.
697    * Responsible for defining the options interface
698      and parsing the usual Perl command-line options syntax
699    * Provides the intuitive getters, setters, as well as
700      hash methods for manipulating the option values.
701    * Also supports cloning into new configuration objects.
702
703=head2 METHODS
704
705=over 4
706
707=item C<< my $config = LaTeXML::Common::Config->new(%options); >>
708
709Creates a new configuration object. Note that you should try
710    not to provide your own %options hash but rather create an empty
711    configuration and use $config->read to read in the options.
712
713=item C<< $config->read(\@ARGV); >>
714
715This is the main method for parsing in LaTeXML options.
716    The input array should either be @ARGV, e.g. when the
717    options were provided from the command line using the
718    classic Getopt::Long syntax,
719    or any other array reference that conforms to that setup.
720
721=item C<< $config->check; >>
722
723Ensures that the configuration obeys the given profile and
724    performs a set of assignments of meaningful defaults
725    (when needed) and normalizations (for relative paths, etc).
726
727=item C<< my $value = $config->get($name); >>
728
729Classic getter for the $value of an option $name.
730
731=item C<< $config->set($name,$value); >>
732
733Classic setter for the $value of an option $name.
734
735=item C<< $config->delete($name); >>
736
737Deletes option $name from the configuration.
738
739=item C<< my $bool = $config->exists($name); >>
740
741Checks whether the key $name exists in the options hash of the configuration.
742    Similarly to Perl's "exist" for hashes, it returns true even when
743    the option's value is undefined.
744
745=item C<< my @keys = $config->keys; >>
746
747Similar to "keys %hash" in Perl. Returns an array of all option names.
748
749=item C<< my $options_hashref = $config->options; >>
750
751Returns the actual hash reference that holds all options within the configuration object.
752
753=item C<< my $config_clone = $config->clone; >>
754
755Clones $config into a new LaTeXML::Common::Config object, $config_clone.
756
757=back
758
759=head1 OPTION SYNOPSIS
760
761latexmlc [options]
762
763 Options:
764 --VERSION               show version number.
765 --help                  shows this help message.
766 --destination=file      specifies destination file.
767 --output=file           [obsolete synonym for --destination]
768 --preload=module        requests loading of an optional module;
769                         can be repeated
770 --preamble=file         loads a tex file containing document
771                         frontmatter. MUST include \begin{document}
772                         or equivalent
773 --postamble=file        loads a tex file containing document
774                         backmatter. MUST include \end{document}
775                         or equivalent
776 --includestyles         allows latexml to load raw *.sty file;
777                         by default it avoids this.
778 --base=dir              sets the current working directory
779 --path=dir              adds dir to the paths searched for files,
780                         modules, etc;
781 --log=file              specifies log file (default: STDERR)
782 --autoflush=count       Automatically restart the daemon after
783                         "count" inputs. Good practice for vast
784                         batch jobs. (default: 100)
785 --timeout=secs          Timecap for conversions (default 600)
786 --expire=secs           Timecap for server inactivity (default 600)
787 --address=URL           Specify server address (default: localhost)
788 --port=number           Specify server port (default: 3354)
789 --documentid=id         assign an id to the document root.
790 --quiet                 suppress messages (can repeat)
791 --verbose               more informative output (can repeat)
792 --strict                makes latexml less forgiving of errors
793 --bibtex                processes a BibTeX bibliography.
794 --xml                   requests xml output (default).
795 --tex                   requests TeX output after expansion.
796 --box                   requests box output after expansion
797                         and digestion.
798 --format=name           requests "name" as the output format.
799                         Supported: tex,box,xml,html4,html5,xhtml
800                         html implies html5
801 --noparse               suppresses parsing math (default: off)
802 --parse=name            enables parsing math (default: on)
803                         and selects parser framework "name".
804                         Supported: RecDescent, no
805 --profile=name          specify profile as defined in
806                         LaTeXML::Common::Config
807                         Supported: standard|math|fragment|...
808                         (default: standard)
809 --mode=name             Alias for profile
810 --cache_key=name        Provides a name for the current option set,
811                         to enable daemonized conversions without
812                         needing re-initializing
813 --whatsin=chunk         Defines the provided input chunk,
814                         choose from document (default), fragment
815                         and formula
816 --whatsout=chunk        Defines the expected output chunk,
817                         choose from document (default), fragment
818                         and formula
819 --post                  requests a followup post-processing
820 --nopost                forbids followup post-processing
821 --validate, --novalidate Enables (the default) or disables
822                         validation of the source xml.
823 --omitdoctype           omits the Doctype declaration,
824 --noomitdoctype         disables the omission (the default)
825 --numbersections        enables (the default) the inclusion of
826                         section numbers in titles, crossrefs.
827 --nonumbersections      disables the above
828 --timestamp             provides a timestamp (typically a time and date)
829                         to be embedded in the comments
830 --embed                 requests an embeddable XHTML snippet
831                         (requires: --post,--profile=fragment)
832                         DEPRECATED: Use --whatsout=fragment
833                         TODO: Remove completely
834 --stylesheet            specifies a stylesheet,
835                         to be used by the post-processor.
836 --css=cssfile           adds a css stylesheet to html/xhtml
837                         (can be repeated)
838 --nodefaultresources    disables processing built-in resources
839 --javscript=jsfile      adds a link to a javascript file into
840                         html/html5/xhtml (can be repeated)
841 --icon=iconfile         specify a file to use as a "favicon"
842 --xsltparameter=name:value passes parameters to the XSLT.
843 --split                 requests splitting each document
844 --nosplit               disables the above (default)
845 --splitat               sets level to split the document
846 --splitpath=xpath       sets xpath expression to use for
847                         splitting (default splits at
848                         sections, if splitting is enabled)
849 --splitnaming=(id|idrelative|label|labelrelative) specifies
850                         how to name split files (idrelative).
851 --scan                  scans documents to extract ids,
852                         labels, etc.
853                         section titles, etc. (default)
854 --noscan                disables the above
855 --crossref              fills in crossreferences (default)
856 --nocrossref            disables the above
857 --urlstyle=(server|negotiated|file) format to use for urls
858                         (default server).
859 --navigationtoc=(context|none) generates a table of contents
860                         in navigation bar
861 --index                 requests creating an index (default)
862 --noindex               disables the above
863 --splitindex            Splits index into pages per initial.
864 --nosplitindex          disables the above (default)
865 --permutedindex         permutes index phrases in the index
866 --nopermutedindex       disables the above (default)
867 --bibliography=file     sets a bibliography file
868 --splitbibliography     splits the bibliography into pages per
869                         initial.
870 --nosplitbibliography   disables the above (default)
871 --prescan               carries out only the split (if
872                         enabled) and scan, storing
873                         cross-referencing data in dbfile
874                         (default is complete processing)
875 --dbfile=dbfile         sets file to store crossreferences
876 --sitedirectory=dir     sets the base directory of the site
877 --sourcedirectory=dir   sets the base directory of the
878                         original TeX source
879 --source=input          as an alternative to passing the input as
880                         the last argument, after the option set
881                         you can also specify it as the value here.
882                         useful for predictable API calls
883 --mathimages            converts math to images
884                         (default for html4 format)
885 --nomathimages          disables the above
886 --mathimagemagnification=mag specifies magnification factor
887 --presentationmathml    converts math to Presentation MathML
888                         (default for xhtml & html5 formats)
889 --pmml                  alias for --presentationmathml
890 --nopresentationmathml  disables the above
891 --linelength=n          formats presentation mathml to a
892                         linelength max of n characters
893 --contentmathml         converts math to Content MathML
894 --nocontentmathml       disables the above (default)
895 --cmml                  alias for --contentmathml
896 --openmath              converts math to OpenMath
897 --noopenmath            disables the above (default)
898 --om                    alias for --openmath
899 --keepXMath             preserves the intermediate XMath
900                         representation (default is to remove)
901 --mathtex               adds TeX annotation to parallel markup
902 --nomathtex             disables the above (default)
903 --parallelmath          use parallel math annotations (default)
904 --noparallelmath        disable parallel math annotations
905 --plane1                use plane-1 unicode for symbols
906                         (default, if needed)
907 --noplane1              do not use plane-1 unicode
908 --graphicimages         converts graphics to images (default)
909 --nographicimages       disables the above
910 --graphicsmap=type.type specifies a graphics file mapping
911 --pictureimages         converts picture environments to
912                         images (default)
913 --nopictureimages       disables the above
914 --svg                   converts picture environments to SVG
915 --nosvg                 disables the above (default)
916 --nocomments            omit comments from the output
917 --inputencoding=enc     specify the input encoding.
918 --debug=package         enables debugging output for the named
919                         package
920
921
922If you want to provide a TeX snippet directly on input, rather than supply a filename,
923use the C<literal:> protocol to prefix your snippet.
924
925=head1 OPTIONS AND ARGUMENTS
926
927=head2 General Options
928
929=over 4
930
931=item C<--verbose>
932
933Increases the verbosity of output during processing, used twice is pretty chatty.
934    Can be useful for getting more details when errors occur.
935
936=item C<--quiet>
937
938Reduces the verbosity of output during processing, used twice is pretty silent.
939
940=item C<--VERSION>
941
942Shows the version number of the LaTeXML package..
943
944=item C<--debug>=I<package>
945
946Enables debugging output for the named package. The package is given without the leading LaTeXML::.
947
948=item C<--base>=I<dir>
949
950Specifies the base working directory for the conversion server.
951    Useful when converting sets of documents that use relative paths.
952
953=item C<--log>=I<file>
954
955Specifies the log file; be default any conversion messages are printed to STDERR.
956
957=item C<--help>
958
959Shows this help message.
960
961=back
962
963
964=head2 Source Options
965
966=over 4
967
968=item C<--destination>=I<file>
969
970Specifies the destination file; by default the XML is written to STDOUT.
971
972
973=item C<--preload>=I<module>
974
975Requests the loading of an optional module or package.  This may be useful if the TeX code
976    does not specifically require the module (eg. through input or usepackage).
977    For example, use C<--preload=LaTeX.pool> to force LaTeX mode.
978
979=item C<--preamble>=I<file>
980
981Requests the loading of a tex file with document frontmatter, to be read in before the converted document,
982    but after all --preload entries.
983
984Note that the given file MUST contain \begin{document} or an equivalent environment start,
985    when processing LaTeX documents.
986
987If the file does not contain content to appear in the final document, but only macro definitions and
988    setting of internal counters, it is more appropriate to use --preload instead.
989
990=item C<--postamble>=I<file>
991
992Requests the loading of a tex file with document backmatter, to be read in after the converted document.
993
994Note that the given file MUST contain \end{document} or an equivalent environment end,
995    when processing LaTeX documents.
996
997=item C<--sourcedirectory>=I<source>
998
999Specifies the directory where the original latex source is located.
1000Unless LaTeXML is run from that directory, or it can be determined
1001from the xml filename, it may be necessary to specify this option in
1002order to find graphics and style files.
1003
1004=item C<--path>=I<dir>
1005
1006Add I<dir> to the search paths used when searching for files, modules, style files, etc;
1007    somewhat like TEXINPUTS.  This option can be repeated.
1008
1009=item C<--validate>, C<--novalidate>
1010
1011Enables (or disables) the validation of the source XML document (the default).
1012
1013=item C<--bibtex>
1014
1015Forces latexml to treat the file as a BibTeX bibliography.
1016    Note that the timing is slightly different than the usual
1017    case with BibTeX and LaTeX.  In the latter case, BibTeX simply
1018    selects and formats a subset of the bibliographic entries; the
1019    actual TeX expansion is carried out when the result is included
1020    in a LaTeX document.  In contrast, latexml processes and expands
1021    the entire bibliography; the selection of entries is done
1022    during post-processing.  This also means that any packages
1023    that define macros used in the bibliography must be
1024    specified using the C<--preload> option.
1025
1026=item C<--inputencoding=>I<encoding>
1027
1028Specify the input encoding, eg. C<--inputencoding=iso-8859-1>.
1029    The encoding must be one known to Perl's Encode package.
1030    Note that this only enables the translation of the input bytes to
1031    UTF-8 used internally by LaTeXML, but does not affect catcodes.
1032    In such cases, you should be using the inputenc package.
1033    Note also that this does not affect the output encoding, which is
1034    always UTF-8.
1035
1036=back
1037
1038
1039=head2 TeX Conversion Options
1040
1041=over 4
1042
1043=item C<--includestyles>
1044
1045This optional allows processing of style files (files with extensions C<sty>,
1046    C<cls>, C<clo>, C<cnf>).  By default, these files are ignored  unless a latexml
1047    implementation of them is found (with an extension of C<ltxml>).
1048
1049These style files generally fall into two classes:  Those
1050    that merely affect document style are ignorable in the XML.
1051    Others define new markup and document structure, often using
1052    deeper LaTeX macros to achieve their ends.  Although the omission
1053    will lead to other errors (missing macro definitions), it is
1054    unlikely that processing the TeX code in the style file will
1055    lead to a correct document.
1056
1057
1058=item C<--timeout>=I<secs>
1059
1060Set time cap for conversion jobs, in seconds. Any job failing to convert in the
1061    time range would return with a Fatal error of timing out.
1062    Default value is 600, set to 0 to disable.
1063
1064=item C<--nocomments>
1065
1066Normally latexml preserves comments from the source file, and adds a comment every 25 lines as
1067    an aid in tracking the source.  The option --nocomments discards such comments.
1068
1069=item C<--documentid>=I<id>
1070
1071Assigns an ID to the root element of the XML document.  This ID is generally
1072    inherited as the prefix of ID's on all other elements within the document.
1073    This is useful when constructing a site of multiple documents so that
1074    all nodes have unique IDs.
1075
1076=item C<--strict>
1077
1078Specifies a strict processing mode. By default, undefined control sequences and
1079    invalid document constructs (that violate the DTD) give warning messages, but attempt
1080    to continue processing.  Using C<--strict> makes them generate fatal errors.
1081
1082=item C<--post>
1083
1084Request post-processing, auto-enabled by any requested post-processor. Disabled by default.
1085    If post-processing is enabled, the graphics and cross-referencing processors are on by default.
1086
1087=back
1088
1089
1090=head2 Format Options
1091
1092=over 4
1093
1094=item C<--format>=C<(html|html5|html4|xhtml|xml|epub)>
1095
1096Specifies the output format for post processing.
1097By default, it will be guessed from the file extension of the destination
1098(if given), with html implying C<html5>, xhtml implying C<xhtml> and the
1099default being C<xml>, which you probably don't want.
1100
1101The C<html5> format converts the material to html5 form with mathematics as MathML;
1102C<html5> supports SVG.
1103C<html4> format converts the material to the earlier html form, version 4,
1104and the mathematics to png images.
1105C<xhtml> format converts to xhtml and uses presentation MathML (after attempting
1106to parse the mathematics) for representing the math.  C<html5> similarly converts
1107math to presentation MathML. In these cases, any
1108graphics will be converted to web-friendly formats and/or copied to the
1109destination directory. If you simply specify C<html>, it will treat that as C<html5>.
1110
1111For the default, C<xml>, the output is left in LaTeXML's internal xml,
1112although the math can be converted by enabling one of the math postprocessors,
1113such as --pmml to obtain presentation MathML.
1114For html, html5 and xhtml, a default stylesheet is provided, but see
1115the C<--stylesheet> option.
1116
1117=item C<--xml>
1118
1119Requests XML output; this is the default.
1120  DEPRECATED: use --format=xml instead
1121
1122=item C<--tex>
1123
1124Requests TeX output for debugging purposes;
1125    processing is only carried out through expansion and digestion.
1126    This may not be quite valid TeX, since Unicode may be introduced.
1127
1128=item C<--box>
1129
1130Requests Box output for debugging purposes;
1131    processing is carried out through expansion and digestions,
1132    and the result is printed.
1133
1134=item C<--profile>
1135
1136Variety of shorthand profiles.
1137    Note that the profiles come with a variety of preset options.
1138    You can examine any of them in their C<resources/Profiles/name.opt>
1139    file.
1140
1141Example: C<latexmlc --profile=math 'literal:1+2=3'>
1142
1143=item C<--omitdoctype>, C<--noomitdoctype>
1144
1145Omits (or includes) the document type declaration.
1146The default is to include it if the document model was based on a DTD.
1147
1148=item C<--numbersections>, C<--nonumbersections>
1149
1150Includes (default), or disables the inclusion of section, equation, etc,
1151numbers in the formatted document and crossreference links.
1152
1153=item C<--stylesheet>=I<xslfile>
1154
1155Requests the XSL transformation of the document using the given xslfile as stylesheet.
1156If the stylesheet is omitted, a `standard' one appropriate for the
1157format (html4, html5 or xhtml) will be used.
1158
1159=item C<--css>=I<cssfile>
1160
1161Adds I<cssfile> as a css stylesheet to be used in the transformed html/html5/xhtml.
1162Multiple stylesheets can be used; they are included in the html in the
1163order given, following the default C<ltx-LaTeXML.css> (unless C<--nodefaultcss>).
1164The stylesheet is copied to the destination directory, unless it is an absolute url.
1165
1166Some stylesheets included in the distribution are
1167  --css=navbar-left   Puts a navigation bar on the left.
1168                      (default omits navbar)
1169  --css=navbar-right  Puts a navigation bar on the left.
1170  --css=theme-blue    A blue coloring theme for headings.
1171  --css=amsart        A style suitable for journal articles.
1172
1173=item C<--javascript>=I<jsfile>
1174
1175Includes a link to the javascript file I<jsfile>, to be used in the transformed html/html5/xhtml.
1176Multiple javascript files can be included; they are linked in the html in the order given.
1177The javascript file is copied to the destination directory, unless it is an absolute url.
1178
1179=item C<--icon>=I<iconfile>
1180
1181Copies I<iconfile> to the destination directory and sets up the linkage in
1182the transformed html/html5/xhtml to use that as the "favicon".
1183
1184=item C<--nodefaultresources>
1185
1186Disables the copying and inclusion of resources added by the binding files;
1187This includes CSS, javascript or other files.  This does not affect
1188resources explicitly requested by the C<--css> or C<--javascript> options.
1189
1190
1191=item C<--timestamp>=I<timestamp>
1192
1193Provides a timestamp (typically a time and date) to be embedded in
1194the comments by the stock XSLT stylesheets.
1195If you don't supply a timestamp, the current time and date will be used.
1196(You can use C<--timestamp=0> to omit the timestamp).
1197
1198=item C<--xsltparameter>=I<name>:I<value>
1199
1200Passes parameters to the XSLT stylesheet.
1201See the manual or the stylesheet itself for available parameters.
1202
1203=back
1204
1205
1206=head2 Site & Crossreferencing Options
1207
1208=over 4
1209
1210=item C<--split>, C<--nosplit>
1211
1212Enables or disables (default) the splitting of documents into multiple `pages'.
1213If enabled, the the document will be split into sections, bibliography,
1214index and appendices (if any) by default, unless C<--splitpath> is specified.
1215
1216=item C<--splitat=>I<unit>
1217
1218Specifies what level of the document to split at. Should be one
1219of C<chapter>, C<section> (the default), C<subsection> or C<subsubsection>.
1220For more control, see C<--splitpath>.
1221
1222=item C<--splitpath=>I<xpath>
1223
1224Specifies an XPath expression to select nodes that will generate separate
1225pages. The default splitpath is
1226  //ltx:section | //ltx:bibliography | //ltx:appendix | //ltx:index
1227
1228Specifying
1229
1230  --splitpath="//ltx:section | //ltx:subsection
1231         | //ltx:bibliography | //ltx:appendix | //ltx:index"
1232
1233would split the document at subsections as well as sections.
1234
1235=item C<--splitnaming>=C<(id|idrelative|label|labelrelative)>
1236
1237Specifies how to name the files for subdocuments created by splitting.
1238The values C<id> and C<label> simply use the id or label of the subdocument's
1239root node for it's filename.  C<idrelative> and C<labelrelative> use
1240the portion of the id or label that follows the parent document's
1241id or label. Furthermore, to impose structure and uniqueness,
1242if a split document has children that are also split, that document
1243(and it's children) will be in a separate subdirectory with the
1244name index.
1245
1246=item C<--scan>, C<--noscan>
1247
1248Enables (default) or disables the scanning of documents for ids, labels,
1249references, indexmarks, etc, for use in filling in refs, cites, index and
1250so on.  It may be useful to disable when generating documents not based
1251on the LaTeXML doctype.
1252
1253=item C<--crossref>, C<--nocrossref>
1254
1255Enables (default) or disables the filling in of references, hrefs, etc
1256based on a previous scan (either from C<--scan>, or C<--dbfile>)
1257It may be useful to disable when generating documents not based
1258on the LaTeXML doctype.
1259
1260=item C<--urlstyle>=C<(server|negotiated|file)>
1261
1262This option determines the way that URLs within the documents
1263are formatted, depending on the way they are intended to be served.
1264The default, C<server>, eliminates unnecessary
1265trailing C<index.html>.  With C<negotiated>, the trailing
1266file extension (typically C<html> or C<xhtml>) are eliminated.
1267The scheme C<file> preserves complete (but relative) urls
1268so that the site can be browsed as files without any server.
1269
1270=item C<--navigationtoc>=C<(context|none)>
1271
1272Generates a table of contents in the navigation bar; default is C<none>.
1273The `context' style of TOC, is somewhat verbose and reveals more detail near the current
1274page; it is most suitable for navigation bars placed on the left or right.
1275Other styles of TOC should be developed and added here, such as a short form.
1276
1277=item C<--index>, C<--noindex>
1278
1279Enables (default) or disables the generation of an index from indexmarks
1280embedded within the document.  Enabling this has no effect unless
1281there is an index element in the document (generated by \printindex).
1282
1283=item C<--splitindex>, C<--nosplitindex>
1284
1285Enables or disables (default) the splitting of generated indexes
1286into separate pages per initial letter.
1287
1288=item C<--bibliography=>I<pathname>
1289
1290Specifies a bibliography generated from a BibTeX file
1291to be used to fill in a bibliography element.
1292Hand-written bibliographies placed in a C<thebibliography> environment
1293do not need this.  The option has no effect unless
1294there is an bibliography element in the document (generated by \bibliography).
1295
1296Note that this option provides the bibliography to be used to
1297fill in the bibliography element (generated by C<\bibliography>);
1298latexmlpost does not (currently) directly process and format such a bibliography.
1299
1300=item C<--splitbibliography>, C<--nosplitbibliography>
1301
1302Enables or disables (default) the splitting of generated bibliographies
1303into separate pages per initial letter.
1304
1305=item C<--prescan>
1306
1307By default C<latexmlpost> processes a single document into one
1308(or more; see C<--split>) destination files in a single pass.
1309When generating a complicated site consisting of several documents
1310it may be advantageous to first scan through the documents
1311to extract and store (in C<dbfile>) cross-referencing data
1312(such as ids, titles, urls, and so on).
1313A later pass then has complete information allowing all documents
1314to reference each other, and also constructs an index and bibliography
1315that reflects the entire document set.  The same effect (though less efficient)
1316can be achieved by running C<latexmlpost> twice, provided a C<dbfile>
1317is specified.
1318
1319=item C<--dbfile>I<=file>
1320
1321Specifies a filename to use for the crossreferencing data when
1322using two-pass processing.  This file may reside in the intermediate
1323destination directory.
1324
1325=item C<--sitedirectory=>I<dir>
1326
1327Specifies the base directory of the overall web site.
1328Pathnames in the database are stored in a form relative
1329to this directory to make it more portable.
1330
1331=item C<--embed>
1332
1333TODO: Deprecated, use --whatsout=fragment
1334Requests an embeddable XHTML div (requires: --post --format=xhtml),
1335    respectively the top division of the document's body.
1336    Caveat: This experimental mode is enabled only for fragment profile and post-processed
1337    documents (to XHTML).
1338
1339=back
1340
1341
1342=head2 Math Options
1343
1344These options specify how math should be converted into other formats.
1345Multiple formats can be requested; how they will be combined
1346depends on the format and other options.
1347
1348=over 4
1349
1350=item C<--noparse>
1351
1352Suppresses parsing math (default: parsing is on)
1353
1354=item C<--parse=name>
1355
1356Enables parsing math (default: parsing is on)
1357    and selects parser framework "name".
1358    Supported: RecDescent, no
1359    Tip: --parse=no is equivalent to --noparse
1360
1361=item C<--mathimages>, C<--nomathimages>
1362
1363Requests or disables the conversion of math to images (png by default).
1364Conversion is the default for html4 format.
1365
1366=item C<--mathsvg>, C<--nomathsvg>
1367
1368Requests or disables the conversion of math to svg images.
1369
1370=item C<--mathimagemagnification=>I<factor>
1371
1372Specifies the magnification used for math images (both png and svg),
1373if they are made. Default is 1.75.
1374
1375=item C<--presentationmathml>, C<--nopresentationmathml>
1376
1377Requests or disables conversion of math to Presentation MathML.
1378Conversion is the default for xhtml and html5 formats.
1379
1380=item C<--linelength>I<=number>
1381
1382(Experimental) Line-breaks the generated Presentation
1383MathML so that it is no longer than I<number> `characters'.
1384
1385=item C<--plane1>
1386
1387Converts the content of Presentation MathML token elements to
1388the appropriate Unicode Plane-1 codepoints according to the selected font,
1389when applicable (the default).
1390
1391=item C<--hackplane1>
1392
1393Converts the content of Presentation MathML token elements to
1394the appropriate Unicode Plane-1 codepoints according to the selected font,
1395but only for the mathvariants double-struck, fraktur and script.
1396This gives support for current (as of August 2009) versions of
1397Firefox and MathPlayer, provided a sufficient set of fonts is available (eg. STIX).
1398
1399=item C<--contentmathml>, C<--nocontentmathml>
1400
1401Requests or disables conversion of math to Content MathML.
1402Conversion is disabled by default.
1403B<Note> that this conversion is only partially implemented.
1404
1405=item C<--openmath>
1406
1407Requests or disables conversion of math to OpenMath.
1408Conversion is disabled by default.
1409B<Note> that this conversion is only partially implemented.
1410
1411=item C<--keepXMath>, C<--xmath>
1412
1413By default, when any of the MathML or OpenMath conversions
1414are used, the intermediate math representation will be removed;
1415this option preserves it; it will be used as secondary parallel
1416markup, when it follows the options for other math representations.
1417
1418=back
1419
1420
1421=head2 Graphics Options
1422
1423=over 4
1424
1425=item C<--graphicimages>, C<--nographicimages>
1426
1427Enables (default) or disables the conversion of graphics
1428to web-appropriate format (png).
1429
1430=item C<--graphicsmap=>I<sourcetype.desttype>
1431
1432Specifies a mapping of graphics file types. Typically, graphics elements
1433specify a graphics file that will be converted to a more appropriate file
1434target format; for example, postscript files used for graphics with LaTeX
1435will be converted to png format for use on the web.  As with LaTeX,
1436when a graphics file is specified without a file type, the system will search
1437for the most appropriate target type file.
1438
1439When this option is used, it overrides I<and replaces> the defaults and provides
1440a mapping of I<sourcetype> to I<desttype>.  The option can be
1441repeated to provide several mappings, with the earlier formats preferred.
1442If the I<desttype> is omitted, it specifies copying files of type I<sourcetype>, unchanged.
1443
1444The default settings is equivalent to having supplied the options:
1445  svg png gif jpg jpeg eps.png ps.png ai.png pdf.png
1446
1447The first formats are preferred and used unchanged, while the latter
1448ones are converted to png.
1449
1450=item C<--pictureimages>, C<--nopictureimages>
1451
1452Enables (default) or disables the conversion of picture environments
1453and pstricks material into images.
1454
1455=item C<--svg>, C<--nosvg>
1456
1457Enables or disables (default) the conversion of picture environments
1458and pstricks material to SVG.
1459
1460=back
1461
1462
1463=head2 Daemon, Server and Client Options
1464
1465Options used only for daemonized conversions, e.g. talking to a remote server
1466via latexmlc, or local processing via the C<LaTeXML::Plugin::latexmls> plugin.
1467
1468For reliable communication and a stable conversion experience, invoke latexmls
1469only through the latexmlc client (you need to set --expire to a positive value,
1470in order to request auto-spawning of a dedicated conversion server).
1471
1472=over 4
1473
1474=item C<--autoflush>=I<count>
1475
1476Automatically restart the daemon after converting "count" inputs.
1477    Good practice for vast batch jobs. (default: 100)
1478
1479=item C<--expire>=I<secs>
1480
1481Set an inactivity timeout value in seconds.
1482    If the server process is not given any input for the specified duration,
1483    it will automatically terminate.
1484    The default value is 600 seconds, set to 0 to never expire,
1485    -1 to entirely opt out of using an independent server.
1486
1487=item C<--address>=I<URL>
1488
1489Specify server address (default: localhost)
1490
1491=item C<--port>=I<number>
1492
1493Specify server port (default: 3334 for math, 3344 for fragment and 3354 for standard)
1494
1495=back
1496
1497=head1 AUTHOR
1498
1499Bruce Miller <bruce.miller@nist.gov>
1500Deyan Ginev <deyan.ginev@nist.gov>
1501
1502=head1 COPYRIGHT
1503
1504Public domain software, produced as part of work done by the
1505United States Government & not subject to copyright in the US.
1506
1507=cut
1508