1# /=====================================================================\ # 2# | LaTeXML::Common::Config | # 3# | Configuration logic for LaTeXML | # 4# |=====================================================================| # 5# | Part of LaTeXML: | # 6# | Public domain software, produced as part of work done by the | # 7# | United States Government & not subject to copyright in the US. | # 8# |---------------------------------------------------------------------| # 9# | Bruce Miller <bruce.miller@nist.gov> | # 10# | Deyan Ginev <deyan.ginev@nist.gov> #_# | # 11# | http://dlmf.nist.gov/LaTeXML/ (o o) | # 12# \=========================================================ooo==U==ooo=/ # 13package LaTeXML::Common::Config; 14use strict; 15use warnings; 16use Carp; 17use Getopt::Long qw(:config no_ignore_case); 18use Pod::Usage; 19use Pod::Find qw(pod_where); 20use LaTeXML::Util::Pathname; 21use LaTeXML::Global; 22use LaTeXML::Common::Error; 23use Data::Dumper; 24our $PROFILES_DB = {}; # Class-wide, caches all profiles that get used while the server is alive 25our $is_bibtex = qr/(^literal\:\s*\@)|(\.bib$)/; 26our $is_archive = qr/(^literal\:PK)|(\.zip$)/; 27 28use base qw(Exporter); 29our @EXPORT = (qw(addMathFormat removeMathFormat maybeAddMathFormat)); 30 31sub new { 32 my ($class, %opts) = @_; 33 #TODO: How about defaults in the daemon server use case? Should we support those here? 34 # or are defaults always bad/confusing to allow? 35 %opts = () unless %opts; 36 return bless { dirty => 1, opts => \%opts }, $class; } 37 38########################################### 39#### Command-line reader ##### 40########################################### 41sub getopt_specification { 42 my (%options) = @_; 43 my $opts = $options{options} || {}; 44 my $spec = { 45 # Basics and Paths 46 "output=s" => \$$opts{destination}, 47 "destination=s" => \$$opts{destination}, 48 "log=s" => \$$opts{log}, 49 "preload=s" => \@{ $$opts{preload} }, 50 "preamble=s" => \$$opts{preamble}, 51 "postamble=s" => \$$opts{postamble}, 52 "base=s" => \$$opts{base}, 53 "path=s" => \@{ $$opts{paths} }, 54 "quiet" => sub { $$opts{verbosity}--; }, 55 "verbose" => sub { $$opts{verbosity}++; }, 56 "strict" => \$$opts{strict}, 57 "includestyles" => \$$opts{includestyles}, 58 "inputencoding=s" => \$$opts{inputencoding}, 59 # Formats 60 "xml" => sub { $$opts{format} = 'xml'; }, 61 "tex" => sub { $$opts{format} = 'tex'; }, 62 "box" => sub { $$opts{format} = 'box'; }, 63 "bibtex" => sub { $$opts{type} = 'BibTeX'; }, 64 "noparse" => sub { $$opts{mathparse} = 'no'; }, 65 "format=s" => \$$opts{format}, 66 "parse=s" => \$$opts{mathparse}, 67 # Profiles 68 "profile=s" => \$$opts{profile}, 69 "cache_key=s" => \$$opts{cache_key}, 70 "mode=s" => \$$opts{profile}, 71 "source=s" => \$$opts{source}, 72 # Output framing 73 "embed" => sub { $$opts{whatsout} = 'fragment'; }, 74 "whatsin=s" => \$$opts{whatsin}, 75 "whatsout=s" => \$$opts{whatsout}, 76 # Daemon options 77 "autoflush=i" => \$$opts{input_limit}, 78 "timeout=i" => \$$opts{timeout}, 79 "expire=i" => \$$opts{expire}, 80 "address=s" => \$$opts{address}, 81 "port=i" => \$$opts{port}, 82 # Post-processing 83 "post!" => \$$opts{post}, 84 "validate!" => \$$opts{validate}, 85 "omitdoctype!" => \$$opts{omitdoctype}, 86 "numbersections!" => \$$opts{numbersections}, 87 "timestamp=s" => \$$opts{timestamp}, 88 # Various choices for math processing. 89 # Note: Could want OM embedded in mml annotation, too. 90 # In general, could(?) want multiple math reps within <Math> 91 # OR, multiple math reps combined with <mml:sematics> 92 # or, in fact, _other_ parallel means? (om?, omdoc? ...) 93 # So, need to separate multiple transformations from the combination. 94 # However, IF combining, then will need to support a id/ref mechanism. 95 "mathimagemagnification=f" => \$$opts{mathimagemag}, 96 "linelength=i" => \$$opts{linelength}, 97 "plane1!" => \$$opts{plane1}, 98 "hackplane1!" => \$$opts{hackplane1}, 99 "mathimages" => sub { addMathFormat($opts, 'images'); }, 100 "nomathimages" => sub { removeMathFormat($opts, 'images'); }, 101 "mathsvg" => sub { addMathFormat($opts, 'svg'); }, 102 "nomathsvg" => sub { removeMathFormat($opts, 'svg'); }, 103 "presentationmathml|pmml" => sub { addMathFormat($opts, 'pmml'); }, 104 "contentmathml|cmml" => sub { addMathFormat($opts, 'cmml'); }, 105 "openmath|om" => sub { addMathFormat($opts, 'om'); }, 106 "keepXMath|xmath" => sub { addMathFormat($opts, 'xmath'); }, 107 "nopresentationmathml|nopmml" => sub { removeMathFormat($opts, 'pmml'); }, 108 "nocontentmathml|nocmml" => sub { removeMathFormat($opts, 'cmml'); }, 109 "noopenmath|noom" => sub { removeMathFormat($opts, 'om'); }, 110 "nokeepXMath|noxmath" => sub { removeMathFormat($opts, 'xmath'); }, 111 "mathtex" => sub { addMathFormat($opts, 'mathtex'); }, 112 "nomathtex" => sub { removeMathFormat($opts, 'mathtex'); }, 113 "parallelmath!" => \$$opts{parallelmath}, 114 # Some general XSLT/CSS/JavaScript options. 115 "stylesheet=s" => \$$opts{stylesheet}, 116 "xsltparameter=s" => \@{ $$opts{xsltparameters} }, 117 "css=s" => \@{ $$opts{css} }, 118 "defaultresources!" => \$$opts{defaultresources}, 119 "javascript=s" => \@{ $$opts{javascript} }, 120 "icon=s" => \$$opts{icon}, 121 # Options for broader document set processing 122 "split!" => \$$opts{split}, 123 "splitat=s" => sub { $$opts{splitat} = $_[1]; 124 $$opts{split} = 1 unless defined $$opts{split}; }, 125 "splitpath=s" => sub { $$opts{splitpath} = $_[1]; 126 $$opts{split} = 1 unless defined $$opts{split}; }, 127 "splitnaming=s" => sub { $$opts{splitnaming} = $_[1]; 128 $$opts{split} = 1 unless defined $$opts{split}; }, 129 "scan!" => \$$opts{scan}, 130 "crossref!" => \$$opts{crossref}, 131 "urlstyle=s" => \$$opts{urlstyle}, 132 "navigationtoc=s" => \$$opts{navtoc}, 133 "navtoc=s" => \$$opts{navtoc}, 134 # Generating indices 135 "index!" => \$$opts{index}, 136 "permutedindex!" => \$$opts{permutedindex}, 137 "splitindex!" => \$$opts{splitindex}, 138 # Generating Bibliographies 139 "bibliography=s" => \@{ $$opts{bibliographies} }, # TODO: Document 140 "splitbibliography!" => \$$opts{splitbibliography}, 141 # Options for two phase processing 142 "prescan" => \$$opts{prescan}, 143 "dbfile=s" => \$$opts{dbfile}, 144 "sitedirectory=s" => \$$opts{sitedirectory}, 145 "sourcedirectory=s" => \$$opts{sourcedirectory}, 146 # For graphics: vaguely similar issues, but more limited. 147 # includegraphics images (eg. ps) can be converted to webimages (eg.png) 148 # picture/pstricks images can be converted to png or possibly svg. 149 "graphicimages!" => \$$opts{dographics}, 150 "graphicsmap=s" => \@{ $$opts{graphicsmaps} }, 151 "svg!" => \$$opts{svg}, 152 "pictureimages!" => \$$opts{picimages}, 153 # HELP 154 "comments!" => \$$opts{comments}, 155 "VERSION!" => \$$opts{showversion}, 156 "debug=s" => \@{ $$opts{debug} }, 157 "documentid=s" => \$$opts{documentid}, 158 "help" => \$$opts{help} 159 }; 160 return ($spec, $opts) unless ($options{type} && ($options{type} eq 'keyvals')); 161 # Representation use case: 162 my $keyvals = $options{keyvals} || []; 163 my $rep_spec = {}; # Representation specification 164 foreach my $key (keys %$spec) { 165 if ($key =~ /^(.+)=\w$/) { 166 my $name = $1; 167 $$rep_spec{$key} = sub { CORE::push @$keyvals, [$name, $_[1]] }; 168 } else { 169 $$rep_spec{$key} = sub { 170 my $ctl = $_[0]->{ctl}; 171 my $used = ($$ctl[0] ? 'no' : '') . $$ctl[1]; 172 CORE::push @$keyvals, [$used, undef] }; 173 } 174 } 175 return ($rep_spec, $keyvals); 176} 177# TODO: Separate the keyvals scan from getopt_specification() 178# into its own sub, using @GETOPT_KEYS entirely. 179our @GETOPT_KEYS = keys %{ (getopt_specification())[0] }; 180 181sub read { 182 my ($self, $argref, %read_options) = @_; 183 my $opts = $$self{opts}; 184 local @ARGV = @$argref; 185 my ($spec) = getopt_specification(options => $opts); 186 my $silent = %read_options && $read_options{silent}; 187 my $getOptions_success = GetOptions(%{$spec}); 188 if (!$getOptions_success && !$silent) { 189 pod2usage(-message => $LaTeXML::IDENTITY, -exitval => 1, -verbose => 99, 190 -input => pod_where({ -inc => 1 }, __PACKAGE__), 191 -sections => 'OPTION SYNOPSIS', -output => \*STDERR); 192 } 193 if (!$silent && $$opts{help}) { 194 pod2usage(-message => $LaTeXML::IDENTITY, -exitval => 0, -verbose => 99, 195 -input => pod_where({ -inc => 1 }, __PACKAGE__), 196 -sections => 'OPTION SYNOPSIS', output => \*STDOUT); 197 } 198 199 # Check that options for system I/O (destination and log) are valid before wasting any time... 200 foreach my $IO_option (qw(destination log)) { 201 if ($$opts{$IO_option}) { 202 $$opts{$IO_option} = pathname_canonical($$opts{$IO_option}); 203 if (my $dir = pathname_directory($$opts{$IO_option})) { 204 pathname_mkdir($dir) or croak "Couldn't create $IO_option directory $dir: $!"; } } } 205 # Removed math formats are irrelevant for conversion: 206 delete $$opts{removed_math_formats}; 207 208 if ($$opts{showversion}) { print STDERR "$LaTeXML::IDENTITY\n"; exit(0); } 209 210 $$opts{source} = $ARGV[0] unless $$opts{source}; 211 # Special source-based guessing needs to happen here, 212 # as we won't have access to the source file/literal/resource later on: 213 if (!$$opts{type} || ($$opts{type} eq 'auto')) { 214 $$opts{type} = 'BibTeX' if ($$opts{source} && ($$opts{source} =~ /$is_bibtex/)); } 215 if (!$$opts{whatsin}) { 216 $$opts{whatsin} = 'archive' if ($$opts{source} && ($$opts{source} =~ /$is_archive/)); } 217 return $getOptions_success; 218} 219 220sub read_keyvals { 221 my ($self, $conversion_options, %read_options) = @_; 222 my $cmdopts = []; 223 while (my ($key, $value) = splice(@$conversion_options, 0, 2)) { 224 # TODO: Is skipping over empty values ever harmful? Do we have non-empty defaults anywhere? 225 next if (!length($value)) && (grep { /^$key\=/ } @GETOPT_KEYS); 226 $key = "--$key" unless $key =~ /^\-\-/; 227 $value = length($value) ? "=$value" : ''; 228 CORE::push @$cmdopts, "$key$value"; 229 } 230 # Read into a Config object: 231 return $self->read($cmdopts, %read_options); } 232 233sub scan_to_keyvals { 234 my ($self, $argref, %read_options) = @_; 235 local @ARGV = @$argref; 236 my ($spec, $keyvals) = getopt_specification(type => 'keyvals'); 237 my $silent = %read_options && $read_options{silent}; 238 my $getOptions_success = GetOptions(%$spec); 239 if (!$getOptions_success && !$silent) { 240 pod2usage(-message => $LaTeXML::IDENTITY, -exitval => 1, -verbose => 99, 241 -input => pod_where({ -inc => 1 }, __PACKAGE__), 242 -sections => 'OPTION SYNOPSIS', -output => \*STDERR); 243 } 244 CORE::push @$keyvals, ['source', $ARGV[0]] if $ARGV[0]; 245 return $getOptions_success && $keyvals; 246} 247 248########################################### 249#### Options Object Hashlike API ##### 250########################################### 251sub get { 252 my ($self, $key, $value) = @_; 253 return $$self{opts}{$key}; } 254 255sub set { 256 my ($self, $key, $value) = @_; 257 $$self{dirty} = 1; 258 $$self{opts}{$key} = $value; 259 return; } 260 261sub push { 262 my ($self, $key, $value) = @_; 263 $$self{dirty} = 1; 264 $$self{opts}{$key} = [] unless ref $$self{opts}{$key}; 265 CORE::push @{ $$self{opts}{$key} }, $value; 266 return; } 267 268sub delete { 269 my ($self, $key) = @_; 270 $$self{dirty} = 1; 271 delete $$self{opts}{$key}; 272 return; } 273 274sub exists { 275 my ($self, $key) = @_; 276 return exists $$self{opts}{$key}; } 277 278sub defined { 279 my ($self, $key) = @_; 280 return defined $$self{opts}{$key}; } 281 282sub keys { 283 my ($self) = @_; 284 return keys %{ $$self{opts} }; } 285 286sub options { 287 my ($self) = @_; 288 return $$self{opts}; } 289 290sub clone { 291 my ($self) = @_; 292 my $clone = LaTeXML::Common::Config->new(%{ $self->options }); 293 $$clone{dirty} = $$self{dirty}; 294 return $clone; } 295 296########################################### 297#### Option Sanity Checking ##### 298########################################### 299 300# Perform all option sanity checks 301sub check { 302 my ($self) = @_; 303 return unless $$self{dirty}; 304 # 1. Resolve profile 305 $self->_obey_profile; 306 # 2. Place sane defaults where needed 307 return $self->_prepare_options; } 308 309sub _obey_profile { 310 my ($self) = @_; 311 $$self{dirty} = 1; 312 my $opts = $$self{opts}; 313 my $profile = lc($$opts{profile} || 'custom'); 314 $profile =~ s/\.opt$//; 315 # Look at the PROFILES_DB or find a profiles file (otherwise fallback to custom) 316 my $profile_opts = {}; 317 if ($profile ne 'custom') { 318 if (defined $$PROFILES_DB{$profile}) { 319 %$profile_opts = %{ $$PROFILES_DB{$profile} }; 320 } elsif (my $file = pathname_find($profile . '.opt', paths => $$opts{paths}, 321 types => [], installation_subdir => 'resources/Profiles')) { 322 my $conf_tmp = LaTeXML::Common::Config->new; 323 $conf_tmp->read(_read_options_file($file)); 324 $profile_opts = $conf_tmp->options; 325 } else { 326 # Throw an error, fallback to custom 327 carp("Warning:unexpected:$profile Profile $profile was not recognized, reverting to 'custom'\n"); 328 $$opts{profile} = 'custom'; 329 $profile = 'custom'; 330 } 331 } 332 # Erase the profile, save it as cache key 333 delete $$opts{profile}; 334 $$opts{cache_key} = $profile unless defined $$opts{cache_key}; 335 if (%$profile_opts) { 336 # Merge the new options with the profile defaults: 337 for my $key (grep { defined $$opts{$_} } (CORE::keys %$opts)) { 338 if ($key =~ /^p(ath|reload)/) { # Paths and preloads get merged in 339 $$profile_opts{$key} = [] unless defined $$profile_opts{$key}; 340 foreach my $entry (@{ $$opts{$key} }) { 341 my $new = 1; 342 foreach (@{ $$profile_opts{$key} }) { 343 if ($entry eq $_) { $new = 0; last; } 344 } 345 # If new to the array, push: 346 CORE::push(@{ $$profile_opts{$key} }, $entry) if ($new); 347 } 348 } else { # The other options get overwritten 349 $$profile_opts{$key} = $$opts{$key}; 350 } 351 } 352 %$opts = %$profile_opts; # Move back into the user options 353 } 354 return; } 355 356# TODO: Best way to throw errors when options don't work out? 357# How about in the case of Extras::ReadOptions? 358# Error() and Warn() would be neat, but we have to make sure STDERR is caught beforehand. 359# Also, there is no eval() here, so we might need a softer handling of Error()s. 360sub _prepare_options { 361 my ($self) = @_; 362 my $opts = $$self{opts}; 363 #====================================================================== 364 # I. Sanity check and Completion of Core options. 365 #====================================================================== 366 # "safe" and semi-perlcrtic acceptable way to set DEBUG inside arbitrary modules. 367 # Note: 'LaTeXML' refers to the top-level class 368 foreach my $ltx_class (@{ $$opts{debug} || [] }) { 369 if ($ltx_class eq 'LaTeXML') { 370 $LaTeXML::DEBUG{LaTeXML} = 1; } 371 else { 372 $LaTeXML::DEBUG{$ltx_class} = 1; } } 373 374 $$opts{input_limit} = 100 unless defined $$opts{input_limit}; # 100 jobs until restart 375 $$opts{timeout} = 600 unless defined $$opts{timeout}; # 10 minute timeout default 376 $$opts{expire} = 600 unless defined $$opts{expire}; # 10 minute timeout default 377 $$opts{mathparse} = 'RecDescent' unless defined $$opts{mathparse}; 378 $$opts{inputencoding} = "utf-8" unless defined $$opts{inputencoding}; 379 if ($$opts{mathparse} eq 'no') { 380 $$opts{mathparse} = 0; 381 $$opts{nomathparse} = 1; } #Backwards compatible 382## $$opts{verbosity} = 0 unless defined $$opts{verbosity}; 383 $$opts{preload} = [] unless defined $$opts{preload}; 384 $$opts{paths} = ['.'] unless defined $$opts{paths}; 385 @{ $$opts{paths} } = map { pathname_canonical($_) } @{ $$opts{paths} }; 386 foreach (('destination', 'dbfile', 'sourcedirectory', 'sitedirectory')) { 387 $$opts{$_} = pathname_canonical($$opts{$_}) if defined $$opts{$_}; 388 } 389 390 if (!defined $$opts{whatsin}) { 391 if ($$opts{preamble} || $$opts{postamble}) { 392 # Preamble or postamble imply a fragment whatsin 393 $$opts{whatsin} = 'fragment'; } 394 else { # Default input chunk is a document 395 $$opts{whatsin} = 'document'; } } 396 $$opts{type} = 'auto' unless defined $$opts{type}; 397 unshift(@{ $$opts{preload} }, ('TeX.pool', 'LaTeX.pool', 'BibTeX.pool')) if ($$opts{type} eq 'BibTeX'); 398 399 # Destination extension might indicate the format: 400 if ((!defined $$opts{extension}) && (defined $$opts{destination})) { 401 if ($$opts{destination} =~ /\.([^.]+)$/) { 402 $$opts{extension} = $1; } } 403 if ((!defined $$opts{format}) && (defined $$opts{extension})) { 404 $$opts{format} = $$opts{extension}; } 405 if ((!defined $$opts{extension}) && (defined $$opts{format})) { 406 if ($$opts{format} =~ /^html/) { 407 $$opts{extension} = 'html'; } 408 elsif ($$opts{format} =~ /^xhtml/) { 409 $$opts{extension} = 'xhtml'; } 410 else { 411 $$opts{extension} = 'xml'; } } 412 if (!defined $$opts{whatsout}) { 413 if ((defined $$opts{extension}) && ($$opts{extension} eq 'zip')) { 414 $$opts{whatsout} = 'archive'; 415 } else { 416 $$opts{whatsout} = 'document'; 417 } } 418 if ($$opts{format}) { 419 # Lower-case for sanity's sake 420 $$opts{format} = lc($$opts{format}); 421 if ($$opts{format} eq 'zip') { 422 # Not encouraged! But try to produce something sensible anyway... 423 $$opts{format} = 'html5'; 424 $$opts{whatsout} = 'archive'; } 425 else { # Default HTML is 5 426 $$opts{format} = 'html5' if $$opts{format} eq 'html'; } 427 428 $$opts{is_html} = ($$opts{format} =~ /^html/); 429 $$opts{is_xhtml} = ($$opts{format} =~ /^(xhtml5?|epub|mobi)$/); 430 $$opts{whatsout} = 'archive' if (($$opts{format} eq 'epub') || ($$opts{format} eq 'mobi')); 431 } else { 432 $$opts{format} = 'xml' # We failed to guess format in any-which-way, so XML is default 433 } 434 #====================================================================== 435 # II. Sanity check and Completion of Post options. 436 #====================================================================== 437 # Any post switch implies post (TODO: whew, lots of those, add them all!): 438 $$opts{math_formats} = [] unless defined $$opts{math_formats}; 439 $$opts{post} = 1 if ((!defined $$opts{post}) && 440 (scalar(@{ $$opts{math_formats} }) 441 || ($$opts{stylesheet}) 442 || $$opts{is_html} 443 || $$opts{is_xhtml} 444 || (($$opts{format} || '') eq 'jats') 445 || ($$opts{whatsout} && ($$opts{whatsout} ne 'document')) 446 ) 447 ); 448# || ... || ... || ... 449# $$opts{post}=0 if (defined $$opts{mathparse} && (! $$opts{mathparse})); # No-parse overrides post-processing 450 if ($$opts{post}) { # No need to bother if we're not post-processing 451 # Default: scan and crossref on, other advanced off 452 $$opts{prescan} = undef unless defined $$opts{prescan}; 453 $$opts{dbfile} = undef unless defined $$opts{dbfile}; 454 $$opts{scan} = 1 unless defined $$opts{scan}; 455 $$opts{index} = 1 unless defined $$opts{index}; 456 $$opts{crossref} = 1 unless defined $$opts{crossref}; 457 $$opts{sitedirectory} = defined $$opts{sitedirectory} ? $$opts{sitedirectory} 458 : (defined $$opts{destination} ? pathname_directory($$opts{destination}) 459 : (defined $$opts{dbfile} ? pathname_directory($$opts{dbfile}) 460 : ".")); 461 $$opts{sourcedirectory} = undef unless defined $$opts{sourcedirectory}; 462 $$opts{numbersections} = 1 unless defined $$opts{numbersections}; 463 $$opts{navtoc} = undef unless defined $$opts{numbersections}; 464 $$opts{navtocstyles} = { context => 1, normal => 1, none => 1 } unless defined $$opts{navtocstyles}; 465 $$opts{navtoc} = lc($$opts{navtoc}) if defined $$opts{navtoc}; 466 delete $$opts{navtoc} if ($$opts{navtoc} && ($$opts{navtoc} eq 'none')); 467 468 if ($$opts{navtoc}) { 469 if (!$$opts{navtocstyles}->{ $$opts{navtoc} }) { 470 croak($$opts{navtoc} . " is not a recognized style of navigation TOC"); } 471 if (!$$opts{crossref}) { 472 croak("Cannot use option \"navigationtoc\" (" . $$opts{navtoc} . ") without \"crossref\""); } } 473 $$opts{urlstyle} = 'server' unless defined $$opts{urlstyle}; 474 $$opts{bibliographies} = [] unless defined $$opts{bibliographies}; 475 476 # Validation: 477 $$opts{validate} = 1 unless defined $$opts{validate}; 478 # Graphics: 479 $$opts{mathimagemag} = 1.75 unless defined $$opts{mathimagemag}; 480 if ((defined $$opts{destination}) || ($$opts{whatsout} =~ /^archive/)) { 481 # We want the graphics enabled by default, but only when we have a destination 482 $$opts{dographics} = 1 unless defined $$opts{dographics}; 483 $$opts{picimages} = 1 if (($$opts{format} eq "html4") || ($$opts{format} eq "jats")) 484 && !defined $$opts{picimages}; 485 } 486 # Split sanity: 487 if ($$opts{split}) { 488 $$opts{splitat} = 'section' unless defined $$opts{splitat}; 489 $$opts{splitnaming} = 'id' unless defined $$opts{splitnaming}; 490 $$opts{splitancestors} = { 491 part => [qw()], 492 chapter => [qw(part)], 493 section => [qw(part chapter)], 494 subsection => [qw(part chapter section)], 495 subsubsection => [qw(part chapter section subsection)] }; 496 $$opts{splitback} = [qw(bibliography appendix index)]; 497 498 $$opts{splitnaming} = _checkOptionValue('--splitnaming', $$opts{splitnaming}, 499 qw(id idrelative label labelrelative)); 500 $$opts{splitat} = _checkOptionValue('--splitat', $$opts{splitat}, CORE::keys %{ $$opts{splitancestors} }); 501 $$opts{splitpath} = make_splitpaths($opts, $$opts{splitat}) unless defined $$opts{splitpath}; } 502 # Check for appropriate combination of split, scan, prescan, dbfile, crossref 503 if ($$opts{split} && (!defined $$opts{destination}) && ($$opts{whatsout} !~ /^archive/)) { 504 croak("Must supply --destination when using --split"); } 505 if ($$opts{prescan} && !$$opts{scan}) { 506 croak("Makes no sense to --prescan with scanning disabled (--noscan)"); } 507 if ($$opts{prescan} && (!defined $$opts{dbfile})) { 508 croak("Cannot prescan documents (--prescan) without specifying --dbfile"); } 509 if (!$$opts{prescan} && $$opts{crossref} && !($$opts{scan} || (defined $$opts{dbfile}))) { 510 croak("Cannot cross-reference (--crossref) without --scan or --dbfile "); } 511 if ($$opts{crossref}) { 512 $$opts{urlstyle} = _checkOptionValue('--urlstyle', $$opts{urlstyle}, qw(server negotiated file)); } 513 if (($$opts{permutedindex} || $$opts{splitindex}) && (!defined $$opts{index})) { 514 $$opts{index} = 1; } 515 if (!$$opts{prescan} && $$opts{index} && !($$opts{scan} || defined $$opts{crossref})) { 516 croak("Cannot generate index (--index) without --scan or --dbfile"); } 517 if (!$$opts{prescan} && @{ $$opts{bibliographies} } && !($$opts{scan} || defined $$opts{crossref})) { 518 croak("Cannot generate bibliography (--bibliography) without --scan or --dbfile"); } 519 520 # There is now a legitimate case to preserve graphics here. 521 # if ((!defined $$opts{destination}) && ($$opts{whatsout} !~ /^archive/) 522 # && (_checkMathFormat($opts, 'images') || _checkMathFormat($opts, 'svg') 523 # || $$opts{dographics} || $$opts{picimages})) { 524 # croak("Must supply --destination unless all auxilliary file writing is disabled" 525 # . "(--nomathimages --nomathsvg --nographicimages --nopictureimages --nodefaultcss)"); } 526 527 # Format: 528 #Default is XHTML, XML otherwise (TODO: Expand) 529 if (!defined $$opts{format}) { 530 if ($$opts{stylesheet}) { $$opts{format} = "xml"; } 531 else { $$opts{format} = "xhtml"; } 532 } 533 534 if (!$$opts{stylesheet}) { 535 if ($$opts{format} eq 'xhtml') { $$opts{stylesheet} = "LaTeXML-xhtml.xsl"; } 536 elsif ($$opts{format} eq "html4") { $$opts{stylesheet} = "LaTeXML-html4.xsl"; } 537 elsif ($$opts{format} =~ /^epub|mobi$/) { $$opts{stylesheet} = "LaTeXML-epub3.xsl"; } 538 elsif ($$opts{format} eq "html5") { $$opts{stylesheet} = "LaTeXML-html5.xsl"; } 539 elsif ($$opts{format} eq "jats") { $$opts{stylesheet} = "LaTeXML-jats.xsl"; } 540 elsif ($$opts{format} eq "xml") { delete $$opts{stylesheet}; } 541 else { croak("Unrecognized target format: " . $$opts{format}); } 542 } 543 # Check format and complete math and image options 544 if ($$opts{format} eq 'html4') { 545 $$opts{svg} = 0 unless defined $$opts{svg}; # No SVG by default in HTML. 546 croak("Default html4 stylesheet only supports math images, not " . join(', ', @{ $$opts{math_formats} })) 547 if (!defined $$opts{stylesheet}) 548 && scalar(grep { $_ ne 'images' } @{ $$opts{math_formats} }); 549 croak("Default html stylesheet does not support svg") if $$opts{svg}; 550 $$opts{math_formats} = []; 551 maybeAddMathFormat($opts, 'images'); 552 } 553 $$opts{svg} = 1 unless defined $$opts{svg}; # If we're not making HTML, SVG is on by default 554 # PMML default if we're HTMLy and all else fails and no mathimages: 555 if (((!defined $$opts{math_formats}) || (!scalar(@{ $$opts{math_formats} }))) 556 && ($$opts{is_html} || $$opts{is_xhtml} || ($$opts{format} eq 'jats'))) { 557 CORE::push @{ $$opts{math_formats} }, 'pmml'; 558 } 559 # use parallel markup if there are multiple formats requested. 560 $$opts{parallelmath} = 1 if ($$opts{math_formats} && (@{ $$opts{math_formats} } > 1)); 561 } 562 # If really nothing hints to define format, then default it to XML 563 $$opts{format} = 'xml' unless defined $$opts{format}; 564 $$self{dirty} = 0; 565 return; } 566 567## Public Utilities: 568 569sub addMathFormat { 570 my ($opts, $fmt) = @_; 571 $$opts{math_formats} = [] unless defined $$opts{math_formats}; 572 CORE::push(@{ $$opts{math_formats} }, $fmt) 573 unless (grep { $_ eq $fmt } @{ $$opts{math_formats} }) || $$opts{removed_math_formats}->{$fmt}; 574 return; } 575 576sub removeMathFormat { 577 my ($opts, $fmt) = @_; 578 @{ $$opts{math_formats} } = grep { $_ ne $fmt } @{ $$opts{math_formats} }; 579 $$opts{removed_math_formats}->{$fmt} = 1; 580 return; } 581 582# Add a default math format, when no math formatter is requested, unless specifically forbidden 583sub maybeAddMathFormat { 584 my ($opts, $fmt) = @_; 585 unshift(@{ $$opts{math_formats} }, $fmt) 586 unless @{ $$opts{math_formats} } || $$opts{removed_math_formats}{$fmt}; 587 return; } 588 589sub _checkMathFormat { 590 my ($opts, $fmt) = @_; 591 return grep { $_ eq $fmt } @{ $$opts{math_formats} }; } 592 593## Utilities: 594 595sub _checkOptionValue { 596 my ($option, $value, @choices) = @_; 597 if ($value) { 598 foreach my $choice (@choices) { 599 return $choice if substr($choice, 0, length($value)) eq $value; } } 600 croak("Value for $option, $value, doesn't match " . join(', ', @choices)); } 601 602# Contrived xpath, since backmatter can now be at any level! 603sub make_splitpaths { 604 my ($opts, $splitat) = @_; 605 my @paths = (); 606 my $anc = $$opts{splitancestors}{$splitat}; 607 foreach my $unit ($splitat, ($anc ? @$anc : ())) { 608 CORE::push(@paths, "//ltx:$unit"); 609 foreach my $back (@{ $$opts{splitback} }) { 610 CORE::push(@paths, "//ltx:$back\[" 611 . join(' or ', "preceding-sibling::ltx:$unit", 612 map { "parent::ltx:$_"; } @{ $$opts{splitancestors}{$unit} }) 613 . "]"); } } 614 return join(' | ', @paths); } 615 616### This is from t/lib/TestDaemon.pm and ideally belongs in Util::Pathname 617sub _read_options_file { 618 my ($file) = @_; 619 my $opts = []; 620 my $OPT; 621#### Now can we report status to right places before we've gotten configuration??? (verbosity, logfile...) 622#### ProgressSpinup("Loading profile $file"); 623 unless (open($OPT, "<", $file)) { 624 Error('expected', $file, "Could not open options file '$file'"); 625 return; } 626 while (my $line = <$OPT>) { 627 # Cleanup comments, padding on the input line. 628 $line =~ s/(?<!\\)#.*$//; # Strip trailing comments starting w/ # (but \# is quoted) 629 $line =~ s/\\#/#/g; # unslashify any \# 630 $line =~ s/^\s+//; # Trim leading & trailing whitespace 631 $line =~ s/\s+$//; 632 next unless $line; # if line isn't empty, after that..... 633 chomp($line); 634 if ($line =~ /(\S+)\s*=\s*(.*)/) { 635 my ($key, $value) = ($1, $2 || ''); 636 $value =~ s/\s+$//; 637 # Special treatment for --path=$env: 638 if ($value =~ /^\$(.+)$/) { 639 my @values = (); 640 my $env_name = $1; 641 my $env_value; 642 # Allow $env/foo paths, starting with $env prefixes 643 if ($env_name =~ /^([^\/]+)(\/+)(.+)$/) { 644 my $trailer = $3; 645 if (my $env_path = $ENV{$1}) { 646 $env_path .= '/' unless $env_path =~ /\/$/; 647 CORE::push @values, $env_path . $trailer; } } 648 else { 649 # But also the standard behaviour, where the $env is an array of paths 650 $env_value = $ENV{$env_name}; 651 next unless $env_value; 652 @values = grep { -d $_ } reverse(split(':', $env_value)); 653 next unless @values; } 654 CORE::push(@$opts, "--$key=$_") foreach (@values); } 655 else { 656 $value = $value ? "=$value" : ''; 657 CORE::push @$opts, "--$key" . $value; } } 658 else { 659 Warning('unexpected', $line, undef, 660 "Unrecognized configuration data '$line'"); } 661 } 662 close $OPT; 663#### ProgressSpindown("Loading profile $file"); 664 return $opts; } 665 6661; 667 668__END__ 669 670=pod 671 672=head1 NAME 673 674C<LaTeXML::Common::Config> - Configuration logic for LaTeXML 675 676=head1 SYNPOSIS 677 678 use LaTeXML::Common::Config; 679 my $config = LaTeXML::Common::Config->new( 680 profile=>'name', 681 timeout=>60, 682 ... ); 683 $config->read(\@ARGV); 684 $config->check; 685 686 my $value = $config->get($name); 687 $config->set($name,$value); 688 $config->delete($name); 689 my $bool = $config->exists($name); 690 my @keys = $config->keys; 691 my $options_hashref = $config->options; 692 my $config_clone = $config->clone; 693 694=head1 DESCRIPTION 695 696Configuration management class for LaTeXML options. 697 * Responsible for defining the options interface 698 and parsing the usual Perl command-line options syntax 699 * Provides the intuitive getters, setters, as well as 700 hash methods for manipulating the option values. 701 * Also supports cloning into new configuration objects. 702 703=head2 METHODS 704 705=over 4 706 707=item C<< my $config = LaTeXML::Common::Config->new(%options); >> 708 709Creates a new configuration object. Note that you should try 710 not to provide your own %options hash but rather create an empty 711 configuration and use $config->read to read in the options. 712 713=item C<< $config->read(\@ARGV); >> 714 715This is the main method for parsing in LaTeXML options. 716 The input array should either be @ARGV, e.g. when the 717 options were provided from the command line using the 718 classic Getopt::Long syntax, 719 or any other array reference that conforms to that setup. 720 721=item C<< $config->check; >> 722 723Ensures that the configuration obeys the given profile and 724 performs a set of assignments of meaningful defaults 725 (when needed) and normalizations (for relative paths, etc). 726 727=item C<< my $value = $config->get($name); >> 728 729Classic getter for the $value of an option $name. 730 731=item C<< $config->set($name,$value); >> 732 733Classic setter for the $value of an option $name. 734 735=item C<< $config->delete($name); >> 736 737Deletes option $name from the configuration. 738 739=item C<< my $bool = $config->exists($name); >> 740 741Checks whether the key $name exists in the options hash of the configuration. 742 Similarly to Perl's "exist" for hashes, it returns true even when 743 the option's value is undefined. 744 745=item C<< my @keys = $config->keys; >> 746 747Similar to "keys %hash" in Perl. Returns an array of all option names. 748 749=item C<< my $options_hashref = $config->options; >> 750 751Returns the actual hash reference that holds all options within the configuration object. 752 753=item C<< my $config_clone = $config->clone; >> 754 755Clones $config into a new LaTeXML::Common::Config object, $config_clone. 756 757=back 758 759=head1 OPTION SYNOPSIS 760 761latexmlc [options] 762 763 Options: 764 --VERSION show version number. 765 --help shows this help message. 766 --destination=file specifies destination file. 767 --output=file [obsolete synonym for --destination] 768 --preload=module requests loading of an optional module; 769 can be repeated 770 --preamble=file loads a tex file containing document 771 frontmatter. MUST include \begin{document} 772 or equivalent 773 --postamble=file loads a tex file containing document 774 backmatter. MUST include \end{document} 775 or equivalent 776 --includestyles allows latexml to load raw *.sty file; 777 by default it avoids this. 778 --base=dir sets the current working directory 779 --path=dir adds dir to the paths searched for files, 780 modules, etc; 781 --log=file specifies log file (default: STDERR) 782 --autoflush=count Automatically restart the daemon after 783 "count" inputs. Good practice for vast 784 batch jobs. (default: 100) 785 --timeout=secs Timecap for conversions (default 600) 786 --expire=secs Timecap for server inactivity (default 600) 787 --address=URL Specify server address (default: localhost) 788 --port=number Specify server port (default: 3354) 789 --documentid=id assign an id to the document root. 790 --quiet suppress messages (can repeat) 791 --verbose more informative output (can repeat) 792 --strict makes latexml less forgiving of errors 793 --bibtex processes a BibTeX bibliography. 794 --xml requests xml output (default). 795 --tex requests TeX output after expansion. 796 --box requests box output after expansion 797 and digestion. 798 --format=name requests "name" as the output format. 799 Supported: tex,box,xml,html4,html5,xhtml 800 html implies html5 801 --noparse suppresses parsing math (default: off) 802 --parse=name enables parsing math (default: on) 803 and selects parser framework "name". 804 Supported: RecDescent, no 805 --profile=name specify profile as defined in 806 LaTeXML::Common::Config 807 Supported: standard|math|fragment|... 808 (default: standard) 809 --mode=name Alias for profile 810 --cache_key=name Provides a name for the current option set, 811 to enable daemonized conversions without 812 needing re-initializing 813 --whatsin=chunk Defines the provided input chunk, 814 choose from document (default), fragment 815 and formula 816 --whatsout=chunk Defines the expected output chunk, 817 choose from document (default), fragment 818 and formula 819 --post requests a followup post-processing 820 --nopost forbids followup post-processing 821 --validate, --novalidate Enables (the default) or disables 822 validation of the source xml. 823 --omitdoctype omits the Doctype declaration, 824 --noomitdoctype disables the omission (the default) 825 --numbersections enables (the default) the inclusion of 826 section numbers in titles, crossrefs. 827 --nonumbersections disables the above 828 --timestamp provides a timestamp (typically a time and date) 829 to be embedded in the comments 830 --embed requests an embeddable XHTML snippet 831 (requires: --post,--profile=fragment) 832 DEPRECATED: Use --whatsout=fragment 833 TODO: Remove completely 834 --stylesheet specifies a stylesheet, 835 to be used by the post-processor. 836 --css=cssfile adds a css stylesheet to html/xhtml 837 (can be repeated) 838 --nodefaultresources disables processing built-in resources 839 --javscript=jsfile adds a link to a javascript file into 840 html/html5/xhtml (can be repeated) 841 --icon=iconfile specify a file to use as a "favicon" 842 --xsltparameter=name:value passes parameters to the XSLT. 843 --split requests splitting each document 844 --nosplit disables the above (default) 845 --splitat sets level to split the document 846 --splitpath=xpath sets xpath expression to use for 847 splitting (default splits at 848 sections, if splitting is enabled) 849 --splitnaming=(id|idrelative|label|labelrelative) specifies 850 how to name split files (idrelative). 851 --scan scans documents to extract ids, 852 labels, etc. 853 section titles, etc. (default) 854 --noscan disables the above 855 --crossref fills in crossreferences (default) 856 --nocrossref disables the above 857 --urlstyle=(server|negotiated|file) format to use for urls 858 (default server). 859 --navigationtoc=(context|none) generates a table of contents 860 in navigation bar 861 --index requests creating an index (default) 862 --noindex disables the above 863 --splitindex Splits index into pages per initial. 864 --nosplitindex disables the above (default) 865 --permutedindex permutes index phrases in the index 866 --nopermutedindex disables the above (default) 867 --bibliography=file sets a bibliography file 868 --splitbibliography splits the bibliography into pages per 869 initial. 870 --nosplitbibliography disables the above (default) 871 --prescan carries out only the split (if 872 enabled) and scan, storing 873 cross-referencing data in dbfile 874 (default is complete processing) 875 --dbfile=dbfile sets file to store crossreferences 876 --sitedirectory=dir sets the base directory of the site 877 --sourcedirectory=dir sets the base directory of the 878 original TeX source 879 --source=input as an alternative to passing the input as 880 the last argument, after the option set 881 you can also specify it as the value here. 882 useful for predictable API calls 883 --mathimages converts math to images 884 (default for html4 format) 885 --nomathimages disables the above 886 --mathimagemagnification=mag specifies magnification factor 887 --presentationmathml converts math to Presentation MathML 888 (default for xhtml & html5 formats) 889 --pmml alias for --presentationmathml 890 --nopresentationmathml disables the above 891 --linelength=n formats presentation mathml to a 892 linelength max of n characters 893 --contentmathml converts math to Content MathML 894 --nocontentmathml disables the above (default) 895 --cmml alias for --contentmathml 896 --openmath converts math to OpenMath 897 --noopenmath disables the above (default) 898 --om alias for --openmath 899 --keepXMath preserves the intermediate XMath 900 representation (default is to remove) 901 --mathtex adds TeX annotation to parallel markup 902 --nomathtex disables the above (default) 903 --parallelmath use parallel math annotations (default) 904 --noparallelmath disable parallel math annotations 905 --plane1 use plane-1 unicode for symbols 906 (default, if needed) 907 --noplane1 do not use plane-1 unicode 908 --graphicimages converts graphics to images (default) 909 --nographicimages disables the above 910 --graphicsmap=type.type specifies a graphics file mapping 911 --pictureimages converts picture environments to 912 images (default) 913 --nopictureimages disables the above 914 --svg converts picture environments to SVG 915 --nosvg disables the above (default) 916 --nocomments omit comments from the output 917 --inputencoding=enc specify the input encoding. 918 --debug=package enables debugging output for the named 919 package 920 921 922If you want to provide a TeX snippet directly on input, rather than supply a filename, 923use the C<literal:> protocol to prefix your snippet. 924 925=head1 OPTIONS AND ARGUMENTS 926 927=head2 General Options 928 929=over 4 930 931=item C<--verbose> 932 933Increases the verbosity of output during processing, used twice is pretty chatty. 934 Can be useful for getting more details when errors occur. 935 936=item C<--quiet> 937 938Reduces the verbosity of output during processing, used twice is pretty silent. 939 940=item C<--VERSION> 941 942Shows the version number of the LaTeXML package.. 943 944=item C<--debug>=I<package> 945 946Enables debugging output for the named package. The package is given without the leading LaTeXML::. 947 948=item C<--base>=I<dir> 949 950Specifies the base working directory for the conversion server. 951 Useful when converting sets of documents that use relative paths. 952 953=item C<--log>=I<file> 954 955Specifies the log file; be default any conversion messages are printed to STDERR. 956 957=item C<--help> 958 959Shows this help message. 960 961=back 962 963 964=head2 Source Options 965 966=over 4 967 968=item C<--destination>=I<file> 969 970Specifies the destination file; by default the XML is written to STDOUT. 971 972 973=item C<--preload>=I<module> 974 975Requests the loading of an optional module or package. This may be useful if the TeX code 976 does not specifically require the module (eg. through input or usepackage). 977 For example, use C<--preload=LaTeX.pool> to force LaTeX mode. 978 979=item C<--preamble>=I<file> 980 981Requests the loading of a tex file with document frontmatter, to be read in before the converted document, 982 but after all --preload entries. 983 984Note that the given file MUST contain \begin{document} or an equivalent environment start, 985 when processing LaTeX documents. 986 987If the file does not contain content to appear in the final document, but only macro definitions and 988 setting of internal counters, it is more appropriate to use --preload instead. 989 990=item C<--postamble>=I<file> 991 992Requests the loading of a tex file with document backmatter, to be read in after the converted document. 993 994Note that the given file MUST contain \end{document} or an equivalent environment end, 995 when processing LaTeX documents. 996 997=item C<--sourcedirectory>=I<source> 998 999Specifies the directory where the original latex source is located. 1000Unless LaTeXML is run from that directory, or it can be determined 1001from the xml filename, it may be necessary to specify this option in 1002order to find graphics and style files. 1003 1004=item C<--path>=I<dir> 1005 1006Add I<dir> to the search paths used when searching for files, modules, style files, etc; 1007 somewhat like TEXINPUTS. This option can be repeated. 1008 1009=item C<--validate>, C<--novalidate> 1010 1011Enables (or disables) the validation of the source XML document (the default). 1012 1013=item C<--bibtex> 1014 1015Forces latexml to treat the file as a BibTeX bibliography. 1016 Note that the timing is slightly different than the usual 1017 case with BibTeX and LaTeX. In the latter case, BibTeX simply 1018 selects and formats a subset of the bibliographic entries; the 1019 actual TeX expansion is carried out when the result is included 1020 in a LaTeX document. In contrast, latexml processes and expands 1021 the entire bibliography; the selection of entries is done 1022 during post-processing. This also means that any packages 1023 that define macros used in the bibliography must be 1024 specified using the C<--preload> option. 1025 1026=item C<--inputencoding=>I<encoding> 1027 1028Specify the input encoding, eg. C<--inputencoding=iso-8859-1>. 1029 The encoding must be one known to Perl's Encode package. 1030 Note that this only enables the translation of the input bytes to 1031 UTF-8 used internally by LaTeXML, but does not affect catcodes. 1032 In such cases, you should be using the inputenc package. 1033 Note also that this does not affect the output encoding, which is 1034 always UTF-8. 1035 1036=back 1037 1038 1039=head2 TeX Conversion Options 1040 1041=over 4 1042 1043=item C<--includestyles> 1044 1045This optional allows processing of style files (files with extensions C<sty>, 1046 C<cls>, C<clo>, C<cnf>). By default, these files are ignored unless a latexml 1047 implementation of them is found (with an extension of C<ltxml>). 1048 1049These style files generally fall into two classes: Those 1050 that merely affect document style are ignorable in the XML. 1051 Others define new markup and document structure, often using 1052 deeper LaTeX macros to achieve their ends. Although the omission 1053 will lead to other errors (missing macro definitions), it is 1054 unlikely that processing the TeX code in the style file will 1055 lead to a correct document. 1056 1057 1058=item C<--timeout>=I<secs> 1059 1060Set time cap for conversion jobs, in seconds. Any job failing to convert in the 1061 time range would return with a Fatal error of timing out. 1062 Default value is 600, set to 0 to disable. 1063 1064=item C<--nocomments> 1065 1066Normally latexml preserves comments from the source file, and adds a comment every 25 lines as 1067 an aid in tracking the source. The option --nocomments discards such comments. 1068 1069=item C<--documentid>=I<id> 1070 1071Assigns an ID to the root element of the XML document. This ID is generally 1072 inherited as the prefix of ID's on all other elements within the document. 1073 This is useful when constructing a site of multiple documents so that 1074 all nodes have unique IDs. 1075 1076=item C<--strict> 1077 1078Specifies a strict processing mode. By default, undefined control sequences and 1079 invalid document constructs (that violate the DTD) give warning messages, but attempt 1080 to continue processing. Using C<--strict> makes them generate fatal errors. 1081 1082=item C<--post> 1083 1084Request post-processing, auto-enabled by any requested post-processor. Disabled by default. 1085 If post-processing is enabled, the graphics and cross-referencing processors are on by default. 1086 1087=back 1088 1089 1090=head2 Format Options 1091 1092=over 4 1093 1094=item C<--format>=C<(html|html5|html4|xhtml|xml|epub)> 1095 1096Specifies the output format for post processing. 1097By default, it will be guessed from the file extension of the destination 1098(if given), with html implying C<html5>, xhtml implying C<xhtml> and the 1099default being C<xml>, which you probably don't want. 1100 1101The C<html5> format converts the material to html5 form with mathematics as MathML; 1102C<html5> supports SVG. 1103C<html4> format converts the material to the earlier html form, version 4, 1104and the mathematics to png images. 1105C<xhtml> format converts to xhtml and uses presentation MathML (after attempting 1106to parse the mathematics) for representing the math. C<html5> similarly converts 1107math to presentation MathML. In these cases, any 1108graphics will be converted to web-friendly formats and/or copied to the 1109destination directory. If you simply specify C<html>, it will treat that as C<html5>. 1110 1111For the default, C<xml>, the output is left in LaTeXML's internal xml, 1112although the math can be converted by enabling one of the math postprocessors, 1113such as --pmml to obtain presentation MathML. 1114For html, html5 and xhtml, a default stylesheet is provided, but see 1115the C<--stylesheet> option. 1116 1117=item C<--xml> 1118 1119Requests XML output; this is the default. 1120 DEPRECATED: use --format=xml instead 1121 1122=item C<--tex> 1123 1124Requests TeX output for debugging purposes; 1125 processing is only carried out through expansion and digestion. 1126 This may not be quite valid TeX, since Unicode may be introduced. 1127 1128=item C<--box> 1129 1130Requests Box output for debugging purposes; 1131 processing is carried out through expansion and digestions, 1132 and the result is printed. 1133 1134=item C<--profile> 1135 1136Variety of shorthand profiles. 1137 Note that the profiles come with a variety of preset options. 1138 You can examine any of them in their C<resources/Profiles/name.opt> 1139 file. 1140 1141Example: C<latexmlc --profile=math 'literal:1+2=3'> 1142 1143=item C<--omitdoctype>, C<--noomitdoctype> 1144 1145Omits (or includes) the document type declaration. 1146The default is to include it if the document model was based on a DTD. 1147 1148=item C<--numbersections>, C<--nonumbersections> 1149 1150Includes (default), or disables the inclusion of section, equation, etc, 1151numbers in the formatted document and crossreference links. 1152 1153=item C<--stylesheet>=I<xslfile> 1154 1155Requests the XSL transformation of the document using the given xslfile as stylesheet. 1156If the stylesheet is omitted, a `standard' one appropriate for the 1157format (html4, html5 or xhtml) will be used. 1158 1159=item C<--css>=I<cssfile> 1160 1161Adds I<cssfile> as a css stylesheet to be used in the transformed html/html5/xhtml. 1162Multiple stylesheets can be used; they are included in the html in the 1163order given, following the default C<ltx-LaTeXML.css> (unless C<--nodefaultcss>). 1164The stylesheet is copied to the destination directory, unless it is an absolute url. 1165 1166Some stylesheets included in the distribution are 1167 --css=navbar-left Puts a navigation bar on the left. 1168 (default omits navbar) 1169 --css=navbar-right Puts a navigation bar on the left. 1170 --css=theme-blue A blue coloring theme for headings. 1171 --css=amsart A style suitable for journal articles. 1172 1173=item C<--javascript>=I<jsfile> 1174 1175Includes a link to the javascript file I<jsfile>, to be used in the transformed html/html5/xhtml. 1176Multiple javascript files can be included; they are linked in the html in the order given. 1177The javascript file is copied to the destination directory, unless it is an absolute url. 1178 1179=item C<--icon>=I<iconfile> 1180 1181Copies I<iconfile> to the destination directory and sets up the linkage in 1182the transformed html/html5/xhtml to use that as the "favicon". 1183 1184=item C<--nodefaultresources> 1185 1186Disables the copying and inclusion of resources added by the binding files; 1187This includes CSS, javascript or other files. This does not affect 1188resources explicitly requested by the C<--css> or C<--javascript> options. 1189 1190 1191=item C<--timestamp>=I<timestamp> 1192 1193Provides a timestamp (typically a time and date) to be embedded in 1194the comments by the stock XSLT stylesheets. 1195If you don't supply a timestamp, the current time and date will be used. 1196(You can use C<--timestamp=0> to omit the timestamp). 1197 1198=item C<--xsltparameter>=I<name>:I<value> 1199 1200Passes parameters to the XSLT stylesheet. 1201See the manual or the stylesheet itself for available parameters. 1202 1203=back 1204 1205 1206=head2 Site & Crossreferencing Options 1207 1208=over 4 1209 1210=item C<--split>, C<--nosplit> 1211 1212Enables or disables (default) the splitting of documents into multiple `pages'. 1213If enabled, the the document will be split into sections, bibliography, 1214index and appendices (if any) by default, unless C<--splitpath> is specified. 1215 1216=item C<--splitat=>I<unit> 1217 1218Specifies what level of the document to split at. Should be one 1219of C<chapter>, C<section> (the default), C<subsection> or C<subsubsection>. 1220For more control, see C<--splitpath>. 1221 1222=item C<--splitpath=>I<xpath> 1223 1224Specifies an XPath expression to select nodes that will generate separate 1225pages. The default splitpath is 1226 //ltx:section | //ltx:bibliography | //ltx:appendix | //ltx:index 1227 1228Specifying 1229 1230 --splitpath="//ltx:section | //ltx:subsection 1231 | //ltx:bibliography | //ltx:appendix | //ltx:index" 1232 1233would split the document at subsections as well as sections. 1234 1235=item C<--splitnaming>=C<(id|idrelative|label|labelrelative)> 1236 1237Specifies how to name the files for subdocuments created by splitting. 1238The values C<id> and C<label> simply use the id or label of the subdocument's 1239root node for it's filename. C<idrelative> and C<labelrelative> use 1240the portion of the id or label that follows the parent document's 1241id or label. Furthermore, to impose structure and uniqueness, 1242if a split document has children that are also split, that document 1243(and it's children) will be in a separate subdirectory with the 1244name index. 1245 1246=item C<--scan>, C<--noscan> 1247 1248Enables (default) or disables the scanning of documents for ids, labels, 1249references, indexmarks, etc, for use in filling in refs, cites, index and 1250so on. It may be useful to disable when generating documents not based 1251on the LaTeXML doctype. 1252 1253=item C<--crossref>, C<--nocrossref> 1254 1255Enables (default) or disables the filling in of references, hrefs, etc 1256based on a previous scan (either from C<--scan>, or C<--dbfile>) 1257It may be useful to disable when generating documents not based 1258on the LaTeXML doctype. 1259 1260=item C<--urlstyle>=C<(server|negotiated|file)> 1261 1262This option determines the way that URLs within the documents 1263are formatted, depending on the way they are intended to be served. 1264The default, C<server>, eliminates unnecessary 1265trailing C<index.html>. With C<negotiated>, the trailing 1266file extension (typically C<html> or C<xhtml>) are eliminated. 1267The scheme C<file> preserves complete (but relative) urls 1268so that the site can be browsed as files without any server. 1269 1270=item C<--navigationtoc>=C<(context|none)> 1271 1272Generates a table of contents in the navigation bar; default is C<none>. 1273The `context' style of TOC, is somewhat verbose and reveals more detail near the current 1274page; it is most suitable for navigation bars placed on the left or right. 1275Other styles of TOC should be developed and added here, such as a short form. 1276 1277=item C<--index>, C<--noindex> 1278 1279Enables (default) or disables the generation of an index from indexmarks 1280embedded within the document. Enabling this has no effect unless 1281there is an index element in the document (generated by \printindex). 1282 1283=item C<--splitindex>, C<--nosplitindex> 1284 1285Enables or disables (default) the splitting of generated indexes 1286into separate pages per initial letter. 1287 1288=item C<--bibliography=>I<pathname> 1289 1290Specifies a bibliography generated from a BibTeX file 1291to be used to fill in a bibliography element. 1292Hand-written bibliographies placed in a C<thebibliography> environment 1293do not need this. The option has no effect unless 1294there is an bibliography element in the document (generated by \bibliography). 1295 1296Note that this option provides the bibliography to be used to 1297fill in the bibliography element (generated by C<\bibliography>); 1298latexmlpost does not (currently) directly process and format such a bibliography. 1299 1300=item C<--splitbibliography>, C<--nosplitbibliography> 1301 1302Enables or disables (default) the splitting of generated bibliographies 1303into separate pages per initial letter. 1304 1305=item C<--prescan> 1306 1307By default C<latexmlpost> processes a single document into one 1308(or more; see C<--split>) destination files in a single pass. 1309When generating a complicated site consisting of several documents 1310it may be advantageous to first scan through the documents 1311to extract and store (in C<dbfile>) cross-referencing data 1312(such as ids, titles, urls, and so on). 1313A later pass then has complete information allowing all documents 1314to reference each other, and also constructs an index and bibliography 1315that reflects the entire document set. The same effect (though less efficient) 1316can be achieved by running C<latexmlpost> twice, provided a C<dbfile> 1317is specified. 1318 1319=item C<--dbfile>I<=file> 1320 1321Specifies a filename to use for the crossreferencing data when 1322using two-pass processing. This file may reside in the intermediate 1323destination directory. 1324 1325=item C<--sitedirectory=>I<dir> 1326 1327Specifies the base directory of the overall web site. 1328Pathnames in the database are stored in a form relative 1329to this directory to make it more portable. 1330 1331=item C<--embed> 1332 1333TODO: Deprecated, use --whatsout=fragment 1334Requests an embeddable XHTML div (requires: --post --format=xhtml), 1335 respectively the top division of the document's body. 1336 Caveat: This experimental mode is enabled only for fragment profile and post-processed 1337 documents (to XHTML). 1338 1339=back 1340 1341 1342=head2 Math Options 1343 1344These options specify how math should be converted into other formats. 1345Multiple formats can be requested; how they will be combined 1346depends on the format and other options. 1347 1348=over 4 1349 1350=item C<--noparse> 1351 1352Suppresses parsing math (default: parsing is on) 1353 1354=item C<--parse=name> 1355 1356Enables parsing math (default: parsing is on) 1357 and selects parser framework "name". 1358 Supported: RecDescent, no 1359 Tip: --parse=no is equivalent to --noparse 1360 1361=item C<--mathimages>, C<--nomathimages> 1362 1363Requests or disables the conversion of math to images (png by default). 1364Conversion is the default for html4 format. 1365 1366=item C<--mathsvg>, C<--nomathsvg> 1367 1368Requests or disables the conversion of math to svg images. 1369 1370=item C<--mathimagemagnification=>I<factor> 1371 1372Specifies the magnification used for math images (both png and svg), 1373if they are made. Default is 1.75. 1374 1375=item C<--presentationmathml>, C<--nopresentationmathml> 1376 1377Requests or disables conversion of math to Presentation MathML. 1378Conversion is the default for xhtml and html5 formats. 1379 1380=item C<--linelength>I<=number> 1381 1382(Experimental) Line-breaks the generated Presentation 1383MathML so that it is no longer than I<number> `characters'. 1384 1385=item C<--plane1> 1386 1387Converts the content of Presentation MathML token elements to 1388the appropriate Unicode Plane-1 codepoints according to the selected font, 1389when applicable (the default). 1390 1391=item C<--hackplane1> 1392 1393Converts the content of Presentation MathML token elements to 1394the appropriate Unicode Plane-1 codepoints according to the selected font, 1395but only for the mathvariants double-struck, fraktur and script. 1396This gives support for current (as of August 2009) versions of 1397Firefox and MathPlayer, provided a sufficient set of fonts is available (eg. STIX). 1398 1399=item C<--contentmathml>, C<--nocontentmathml> 1400 1401Requests or disables conversion of math to Content MathML. 1402Conversion is disabled by default. 1403B<Note> that this conversion is only partially implemented. 1404 1405=item C<--openmath> 1406 1407Requests or disables conversion of math to OpenMath. 1408Conversion is disabled by default. 1409B<Note> that this conversion is only partially implemented. 1410 1411=item C<--keepXMath>, C<--xmath> 1412 1413By default, when any of the MathML or OpenMath conversions 1414are used, the intermediate math representation will be removed; 1415this option preserves it; it will be used as secondary parallel 1416markup, when it follows the options for other math representations. 1417 1418=back 1419 1420 1421=head2 Graphics Options 1422 1423=over 4 1424 1425=item C<--graphicimages>, C<--nographicimages> 1426 1427Enables (default) or disables the conversion of graphics 1428to web-appropriate format (png). 1429 1430=item C<--graphicsmap=>I<sourcetype.desttype> 1431 1432Specifies a mapping of graphics file types. Typically, graphics elements 1433specify a graphics file that will be converted to a more appropriate file 1434target format; for example, postscript files used for graphics with LaTeX 1435will be converted to png format for use on the web. As with LaTeX, 1436when a graphics file is specified without a file type, the system will search 1437for the most appropriate target type file. 1438 1439When this option is used, it overrides I<and replaces> the defaults and provides 1440a mapping of I<sourcetype> to I<desttype>. The option can be 1441repeated to provide several mappings, with the earlier formats preferred. 1442If the I<desttype> is omitted, it specifies copying files of type I<sourcetype>, unchanged. 1443 1444The default settings is equivalent to having supplied the options: 1445 svg png gif jpg jpeg eps.png ps.png ai.png pdf.png 1446 1447The first formats are preferred and used unchanged, while the latter 1448ones are converted to png. 1449 1450=item C<--pictureimages>, C<--nopictureimages> 1451 1452Enables (default) or disables the conversion of picture environments 1453and pstricks material into images. 1454 1455=item C<--svg>, C<--nosvg> 1456 1457Enables or disables (default) the conversion of picture environments 1458and pstricks material to SVG. 1459 1460=back 1461 1462 1463=head2 Daemon, Server and Client Options 1464 1465Options used only for daemonized conversions, e.g. talking to a remote server 1466via latexmlc, or local processing via the C<LaTeXML::Plugin::latexmls> plugin. 1467 1468For reliable communication and a stable conversion experience, invoke latexmls 1469only through the latexmlc client (you need to set --expire to a positive value, 1470in order to request auto-spawning of a dedicated conversion server). 1471 1472=over 4 1473 1474=item C<--autoflush>=I<count> 1475 1476Automatically restart the daemon after converting "count" inputs. 1477 Good practice for vast batch jobs. (default: 100) 1478 1479=item C<--expire>=I<secs> 1480 1481Set an inactivity timeout value in seconds. 1482 If the server process is not given any input for the specified duration, 1483 it will automatically terminate. 1484 The default value is 600 seconds, set to 0 to never expire, 1485 -1 to entirely opt out of using an independent server. 1486 1487=item C<--address>=I<URL> 1488 1489Specify server address (default: localhost) 1490 1491=item C<--port>=I<number> 1492 1493Specify server port (default: 3334 for math, 3344 for fragment and 3354 for standard) 1494 1495=back 1496 1497=head1 AUTHOR 1498 1499Bruce Miller <bruce.miller@nist.gov> 1500Deyan Ginev <deyan.ginev@nist.gov> 1501 1502=head1 COPYRIGHT 1503 1504Public domain software, produced as part of work done by the 1505United States Government & not subject to copyright in the US. 1506 1507=cut 1508