1#========================================================================
2#
3# LaTeX::Encode
4#
5# DESCRIPTION
6#   Provides a function to encode text that contains characters
7#   special to LaTeX.
8#
9# AUTHOR
10#   Andrew Ford <a.ford@ford-mason.co.uk>
11#
12# COPYRIGHT
13#   Copyright (C) 2007-2012 Andrew Ford.   All Rights Reserved.
14#
15#   This module is free software; you can redistribute it and/or
16#   modify it under the same terms as Perl itself.
17#
18#   $Id: Encode.pm 32 2012-09-30 20:33:42Z andrew $
19#========================================================================
20
21package LaTeX::Encode;
22
23use strict;
24use warnings;
25
26require 5.008_001;
27
28use Readonly;
29
30use base qw(Exporter);
31
32our $VERSION     = '0.092.0';
33
34our @EXPORT      = qw(latex_encode);
35our @EXPORT_OK   = qw(add_latex_encodings remove_latex_encodings reset_latex_encodings);
36our %EXPORT_TAGS = ( all => [ qw( latex_encode
37                                  add_latex_encodings
38                                  remove_latex_encodings
39                                  reset_latex_encodings ) ] );
40
41our @mappings_specified_on_import;
42
43Readonly my $IMPORT_TAG_ADD    => 'add';
44Readonly my $IMPORT_TAG_REMOVE => 'remove';
45
46my %latex_encoding_base;
47
48our $encoded_char_re;
49
50our %latex_encoding;
51
52our %provided_by;
53
54# Encode text with characters special to LaTeX
55
56sub latex_encode {
57    my $text = shift;
58    my $options = ref $_[0] ? shift : { @_ };
59    my $exceptions    = $options->{except};
60    my $iquotes       = $options->{iquotes};
61    my $packages_reqd = $options->{packages};
62    my $unmatched     = $options->{unmatched};
63
64
65    # If a list of exception characters was specified then we replace
66    # those characters in the text string with something that is not
67    # going to match the encoding regular expression.  The encoding we
68    # use is a hex 01 byte followed by four hexadecimal digits
69
70    if ($exceptions) {
71        $exceptions =~ s{ \\ }{\\\\}gx;
72        $text =~ s{ ([\x{01}$exceptions]) }
73                  { sprintf("\x{01}%04x", ord($1)); }gxe;
74    }
75
76    # Deal with "intelligent quotes".  This can be done separately
77    # from the rest of the encoding as the characters ` and ' are not
78    # encoded.
79
80    if ($iquotes) {
81
82        # A single or double quote before a word character, preceded
83        # by start of line, whitespace or punctuation gets converted
84        # to "`" or "``" respectively.
85
86        $text =~ s{ ( ^ | [\s\p{IsPunct}] )( ['"] ) (?= \w ) }
87                  { $2 eq '"' ? "$1``" : "$1`" }mgxe;
88
89        # A double quote preceded by a word or punctuation character
90        # and followed by whitespace or end of line gets converted to
91        # "''".  (Final single quotes are represented by themselves so
92        # we don't need to worry about those.)
93
94        $text =~ s{ (?<= [\w\p{IsPunct}] ) " (?= \s | $ ) }
95                  { "''" }mgxe
96    }
97
98
99    # Replace any characters that need encoding
100
101    $text =~ s{ ($encoded_char_re) }
102              { $packages_reqd->{$provided_by{$1}} = 1
103                    if ref $packages_reqd and exists $provided_by{$1};
104                $latex_encoding{$1} }gsxe;
105
106    $text =~ s{ ([\x{00}\x{02}-\x{09}\x{0b}\x{0c}\x{0e}-\x{1f}\x{007f}-\x{ffff}]) }
107              { _replace_unencoded_char(ord($1), $unmatched) }gxse;
108
109
110    # If the caller specified exceptions then we need to decode them
111
112    if ($exceptions) {
113        $text =~ s{ \x{01} ([0-9a-f]{4}) }{ chr(hex($1)) }gxe;
114    }
115
116    return $text;
117}
118
119
120sub _replace_unencoded_char {
121    my ($charcode, $action) = @_;
122
123    if (ref $action eq 'CODE') {
124        return $action->($charcode);
125    }
126    elsif (($action || '') eq 'ignore') {
127        return '';
128    }
129    else {
130        return sprintf('\\%s{%04x}', $action || 'unmatched', $charcode);
131    }
132}
133
134
135# Add encodings to the encoding table
136# Return the changed encodings
137
138sub add_latex_encodings {
139    my (%new_encoding) = @_;
140    my %old_encoding;
141    my $changed;
142
143    foreach my $key (keys %new_encoding) {
144        if ((! exists $latex_encoding{$key}) or ($latex_encoding{$key} ne $new_encoding{$key})) {
145            $old_encoding{$key} = $latex_encoding{$key} if defined wantarray and exists $latex_encoding{$key};
146            $latex_encoding{$key} = $new_encoding{$key};
147            $changed = 1;
148        }
149    }
150    _compile_encoding_regexp() if $changed;
151    return unless defined wantarray;
152    return %old_encoding;
153}
154
155
156# Remove encodings from the encoding table
157# Return the removed encodings
158
159sub remove_latex_encodings {
160    my (@keys) = @_;
161    my %removed_encoding;
162
163    foreach my $key (@keys) {
164        if (exists $latex_encoding{$key}) {
165            $removed_encoding{$key} = delete $latex_encoding{$key};
166        }
167    }
168    _compile_encoding_regexp() if keys %removed_encoding;
169    return unless defined wantarray;
170    return %removed_encoding;
171}
172
173
174# Reset the encoding table
175
176sub reset_latex_encodings {
177    my ($class, $forget_import_specifiers) = @_;
178    if ($class !~ /::/) {
179        $forget_import_specifiers = $class;
180    }
181    %latex_encoding = ();
182
183    $latex_encoding{$_} = $latex_encoding_base{$_}
184         for keys %latex_encoding_base;
185
186    if (! $forget_import_specifiers ) {
187        foreach my $spec ( @mappings_specified_on_import ) {
188            if ($spec->[0] eq $IMPORT_TAG_ADD) {
189                add_latex_encodings(%{$spec->[1]});
190            }
191            elsif ($spec->[0] eq $IMPORT_TAG_REMOVE) {
192                remove_latex_encodings(@{$spec->[1]});
193            }
194        }
195    }
196    _compile_encoding_regexp();
197
198    return;
199}
200
201
202# Import function - picks out 'add' and 'remove' tags and adds or removes encodings
203# appropriately
204
205sub import {
206    my ($self, @list) = @_;
207    $DB::Simple = 1;
208    my $i = 0;
209    while ($i < @list) {
210        if ($list[$i] eq $IMPORT_TAG_ADD) {
211            my ($add, $to_add) = splice(@list, $i, 2);
212            add_latex_encodings(%$to_add);
213            push @mappings_specified_on_import, [ $IMPORT_TAG_ADD => $to_add ];
214        }
215        elsif ($list[$i] eq $IMPORT_TAG_REMOVE) {
216            my ($remove, $to_remove) = splice(@list, $i, 2);
217            remove_latex_encodings(@$to_remove);
218            push @mappings_specified_on_import, [ $IMPORT_TAG_REMOVE => $to_remove ];
219        }
220        else {
221            $i++;
222        }
223    }
224    $self->export_to_level(1, $self, @list);
225    return;
226}
227
228
229%latex_encoding_base = (
230
231    chr(0x0022) => '{\\textacutedbl}',            # QUOTATION MARK                               (&quot;)
232    chr(0x0023) => '\\#',                         # NUMBER SIGN                                  (&#35;)
233    chr(0x0024) => '\\$',                         # DOLLAR SIGN                                  (&#36;)
234    chr(0x0025) => '\\%',                         # PERCENT SIGN                                 (&#37;)
235    chr(0x0026) => '\\&',                         # AMPERSAND                                    (&amp;)
236    chr(0x003c) => '{\\textlangle}',              # LESS-THAN SIGN                               (&lt;)
237    chr(0x003e) => '{\\textrangle}',              # GREATER-THAN SIGN                            (&gt;)
238    chr(0x005c) => '{\\textbackslash}',           # REVERSE SOLIDUS                              (&#92;)
239    chr(0x005e) => '\\^{ }',                      # CIRCUMFLEX ACCENT                            (&#94;)
240    chr(0x005f) => '\\_',                         # LOW LINE                                     (&#95;)
241    chr(0x007b) => '\\{',                         # LEFT CURLY BRACKET                           (&#123;)
242    chr(0x007d) => '\\}',                         # RIGHT CURLY BRACKET                          (&#125;)
243    chr(0x007e) => '{\\texttildelow}',            # TILDE                                        (&#126;)
244
245    # C1 Controls and Latin-1 Supplement
246
247    chr(0x00a0) => '~',                           # NO-BREAK SPACE                               (&nbsp;)
248    chr(0x00a1) => '{\\textexclamdown}',          # INVERTED EXCLAMATION MARK                    (&iexcl;)
249    chr(0x00a2) => '{\\textcent}',                # CENT SIGN                                    (&cent;)
250    chr(0x00a3) => '{\\textsterling}',            # POUND SIGN                                   (&pound;)
251    chr(0x00a4) => '{\\textcurrency}',            # CURRENCY SIGN                                (&curren;)
252    chr(0x00a5) => '{\\textyen}',                 # YEN SIGN                                     (&yen;)
253    chr(0x00a6) => '{\\textbrokenbar}',           # BROKEN BAR                                   (&brvbar;)
254    chr(0x00a7) => '{\\textsection}',             # SECTION SIGN                                 (&sect;)
255    chr(0x00a8) => '{\\textasciidieresis}',       # DIAERESIS                                    (&uml;)
256    chr(0x00a9) => '{\\textcopyright}',           # COPYRIGHT SIGN                               (&copy;)
257    chr(0x00aa) => '{\\textordfeminine}',         # FEMININE ORDINAL INDICATOR                   (&ordf;)
258    chr(0x00ab) => '{\\guillemotleft}',           # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK    (&laquo;)
259    chr(0x00ac) => '{\\textlnot}',                # NOT SIGN                                     (&not;)
260    chr(0x00ad) => '\\-',                         # SOFT HYPHEN                                  (&shy;)
261    chr(0x00ae) => '{\\textregistered}',          # REGISTERED SIGN                              (&reg;)
262    chr(0x00af) => '{\\textasciimacron}',         # MACRON                                       (&macr;)
263    chr(0x00b0) => '{\\textdegree}',              # DEGREE SIGN                                  (&deg;)
264    chr(0x00b1) => '{\\textpm}',                  # PLUS-MINUS SIGN                              (&plusmn;)
265    chr(0x00b2) => '{\\texttwosuperior}',         # SUPERSCRIPT TWO                              (&sup2;)
266    chr(0x00b3) => '{\\textthreesuperior}',       # SUPERSCRIPT THREE                            (&sup3;)
267    chr(0x00b4) => '{\\textasciiacute}',          # ACUTE ACCENT                                 (&acute;)
268    chr(0x00b5) => '{\\textmu}',                  # MICRO SIGN                                   (&micro;)
269    chr(0x00b6) => '{\\textparagraph}',           # PILCROW SIGN                                 (&para;)
270    chr(0x00b7) => '{\\textperiodcentered}',      # MIDDLE DOT                                   (&middot;)
271    chr(0x00b8) => '{\\c{~}}',                    # CEDILLA                                      (&cedil;)
272    chr(0x00b9) => '{\\textonesuperior}',         # SUPERSCRIPT ONE                              (&sup1;)
273    chr(0x00ba) => '{\\textordmasculine}',        # MASCULINE ORDINAL INDICATOR                  (&ordm;)
274    chr(0x00bb) => '{\\guillemotright}',          # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK   (&raquo;)
275    chr(0x00bc) => '{\\textonequarter}',          # VULGAR FRACTION ONE QUARTER                  (&frac14;)
276    chr(0x00bd) => '{\\textonehalf}',             # VULGAR FRACTION ONE HALF                     (&frac12;)
277    chr(0x00be) => '{\\textthreequarters}',       # VULGAR FRACTION THREE QUARTERS               (&frac34;)
278    chr(0x00bf) => '{\\textquestiondown}',        # INVERTED QUESTION MARK                       (&iquest;)
279    chr(0x00c0) => '{\\`A}',                      # LATIN CAPITAL LETTER A WITH GRAVE            (&Agrave;)
280    chr(0x00c1) => '{\\\'A}',                     # LATIN CAPITAL LETTER A WITH ACUTE            (&Aacute;)
281    chr(0x00c2) => '{\\^A}',                      # LATIN CAPITAL LETTER A WITH CIRCUMFLEX       (&Acirc;)
282    chr(0x00c3) => '{\\~A}',                      # LATIN CAPITAL LETTER A WITH TILDE            (&Atilde;)
283    chr(0x00c4) => '{\\"A}',                      # LATIN CAPITAL LETTER A WITH DIAERESIS        (&Auml;)
284    chr(0x00c5) => '{\\AA}',                      # LATIN CAPITAL LETTER A WITH RING ABOVE       (&Aring;)
285    chr(0x00c6) => '{\\AE}',                      # LATIN CAPITAL LETTER AE                      (&AElig;)
286    chr(0x00c7) => '\\c{C}',                      # LATIN CAPITAL LETTER C WITH CEDILLA          (&Ccedil;)
287    chr(0x00c8) => '{\\`E}',                      # LATIN CAPITAL LETTER E WITH GRAVE            (&Egrave;)
288    chr(0x00c9) => '{\\\'E}',                     # LATIN CAPITAL LETTER E WITH ACUTE            (&Eacute;)
289    chr(0x00ca) => '{\\^E}',                      # LATIN CAPITAL LETTER E WITH CIRCUMFLEX       (&Ecirc;)
290    chr(0x00cb) => '{\\"E}',                      # LATIN CAPITAL LETTER E WITH DIAERESIS        (&Euml;)
291    chr(0x00cc) => '{\\`I}',                      # LATIN CAPITAL LETTER I WITH GRAVE            (&Igrave;)
292    chr(0x00cd) => '{\\\'I}',                     # LATIN CAPITAL LETTER I WITH ACUTE            (&Iacute;)
293    chr(0x00ce) => '{\\^I}',                      # LATIN CAPITAL LETTER I WITH CIRCUMFLEX       (&Icirc;)
294    chr(0x00cf) => '{\\"I}',                      # LATIN CAPITAL LETTER I WITH DIAERESIS        (&Iuml;)
295    chr(0x00d0) => '{\\DH}',                      # LATIN CAPITAL LETTER ETH                     (&ETH;)
296    chr(0x00d1) => '{\\~N}',                      # LATIN CAPITAL LETTER N WITH TILDE            (&Ntilde;)
297    chr(0x00d2) => '{\\`O}',                      # LATIN CAPITAL LETTER O WITH GRAVE            (&Ograve;)
298    chr(0x00d3) => '{\\\'O}',                     # LATIN CAPITAL LETTER O WITH ACUTE            (&Oacute;)
299    chr(0x00d4) => '{\\^O}',                      # LATIN CAPITAL LETTER O WITH CIRCUMFLEX       (&Ocirc;)
300    chr(0x00d5) => '{\\~O}',                      # LATIN CAPITAL LETTER O WITH TILDE            (&Otilde;)
301    chr(0x00d6) => '{\\"O}',                      # LATIN CAPITAL LETTER O WITH DIAERESIS        (&Ouml;)
302    chr(0x00d7) => '{\\texttimes}',               # MULTIPLICATION SIGN                          (&times;)
303    chr(0x00d8) => '{\\O}',                       # LATIN CAPITAL LETTER O WITH STROKE           (&Oslash;)
304    chr(0x00d9) => '{\\`U}',                      # LATIN CAPITAL LETTER U WITH GRAVE            (&Ugrave;)
305    chr(0x00da) => '{\\\'U}',                     # LATIN CAPITAL LETTER U WITH ACUTE            (&Uacute;)
306    chr(0x00db) => '{\\^U}',                      # LATIN CAPITAL LETTER U WITH CIRCUMFLEX       (&Ucirc;)
307    chr(0x00dc) => '{\\"U}',                      # LATIN CAPITAL LETTER U WITH DIAERESIS        (&Uuml;)
308    chr(0x00dd) => '{\\\'Y}',                     # LATIN CAPITAL LETTER Y WITH ACUTE            (&Yacute;)
309    chr(0x00de) => '{\\TH}',                      # LATIN CAPITAL LETTER THORN                   (&THORN;)
310    chr(0x00df) => '{\\ss}',                      # LATIN SMALL LETTER SHARP S                   (&szlig;)
311    chr(0x00e0) => '{\\`a}',                      # LATIN SMALL LETTER A WITH GRAVE              (&agrave;)
312    chr(0x00e1) => '{\\\'a}',                     # LATIN SMALL LETTER A WITH ACUTE              (&aacute;)
313    chr(0x00e2) => '{\\^a}',                      # LATIN SMALL LETTER A WITH CIRCUMFLEX         (&acirc;)
314    chr(0x00e3) => '{\\~a}',                      # LATIN SMALL LETTER A WITH TILDE              (&atilde;)
315    chr(0x00e4) => '{\\"a}',                      # LATIN SMALL LETTER A WITH DIAERESIS          (&auml;)
316    chr(0x00e5) => '{\\aa}',                      # LATIN SMALL LETTER A WITH RING ABOVE         (&aring;)
317    chr(0x00e6) => '{\\ae}',                      # LATIN SMALL LETTER AE                        (&aelig;)
318    chr(0x00e7) => '\\c{c}',                      # LATIN SMALL LETTER C WITH CEDILLA            (&ccedil;)
319    chr(0x00e8) => '{\\`e}',                      # LATIN SMALL LETTER E WITH GRAVE              (&egrave;)
320    chr(0x00e9) => '{\\\'e}',                     # LATIN SMALL LETTER E WITH ACUTE              (&eacute;)
321    chr(0x00ea) => '{\\^e}',                      # LATIN SMALL LETTER E WITH CIRCUMFLEX         (&ecirc;)
322    chr(0x00eb) => '{\\"e}',                      # LATIN SMALL LETTER E WITH DIAERESIS          (&euml;)
323    chr(0x00ec) => '{\\`i}',                      # LATIN SMALL LETTER I WITH GRAVE              (&igrave;)
324    chr(0x00ed) => '{\\\'i}',                     # LATIN SMALL LETTER I WITH ACUTE              (&iacute;)
325    chr(0x00ee) => '{\\^i}',                      # LATIN SMALL LETTER I WITH CIRCUMFLEX         (&icirc;)
326    chr(0x00ef) => '{\\"i}',                      # LATIN SMALL LETTER I WITH DIAERESIS          (&iuml;)
327    chr(0x00f0) => '{\\dh}',                      # LATIN SMALL LETTER ETH                       (&eth;)
328    chr(0x00f1) => '{\\~n}',                      # LATIN SMALL LETTER N WITH TILDE              (&ntilde;)
329    chr(0x00f2) => '{\\`o}',                      # LATIN SMALL LETTER O WITH GRAVE              (&ograve;)
330    chr(0x00f3) => '{\\\'o}',                     # LATIN SMALL LETTER O WITH ACUTE              (&oacute;)
331    chr(0x00f4) => '{\\^o}',                      # LATIN SMALL LETTER O WITH CIRCUMFLEX         (&ocirc;)
332    chr(0x00f5) => '{\\~o}',                      # LATIN SMALL LETTER O WITH TILDE              (&otilde;)
333    chr(0x00f6) => '{\\"o}',                      # LATIN SMALL LETTER O WITH DIAERESIS          (&ouml;)
334    chr(0x00f7) => '{\\textdiv}',                 # DIVISION SIGN                                (&divide;)
335    chr(0x00f8) => '{\\o}',                       # LATIN SMALL LETTER O WITH STROKE             (&oslash;)
336    chr(0x00f9) => '{\\`u}',                      # LATIN SMALL LETTER U WITH GRAVE              (&ugrave;)
337    chr(0x00fa) => '{\\\'u}',                     # LATIN SMALL LETTER U WITH ACUTE              (&uacute;)
338    chr(0x00fb) => '{\\^u}',                      # LATIN SMALL LETTER U WITH CIRCUMFLEX         (&ucirc;)
339    chr(0x00fc) => '{\\"u}',                      # LATIN SMALL LETTER U WITH DIAERESIS          (&uuml;)
340    chr(0x00fd) => '{\\\'y}',                     # LATIN SMALL LETTER Y WITH ACUTE              (&yacute;)
341    chr(0x00fe) => '{\\th}',                      # LATIN SMALL LETTER THORN                     (&thorn;)
342    chr(0x00ff) => '{\\"y}',                      # LATIN SMALL LETTER Y WITH DIAERESIS          (&yuml;)
343
344
345    # Latin Extended-A
346
347    chr(0x0100) => '\\={A}',                      # LATIN CAPITAL LETTER A WITH MACRON
348    chr(0x0101) => '\\={a}',                      # LATIN SMALL LETTER A WITH MACRON
349    chr(0x0102) => '\\u{A}',                      # LATIN CAPITAL LETTER A WITH BREVE
350    chr(0x0103) => '\\u{a}',                      # LATIN SMALL LETTER A WITH BREVE
351    chr(0x0104) => '\\k{A}',                      # LATIN CAPITAL LETTER A WITH OGONEK
352    chr(0x0105) => '\\k{a}',                      # LATIN SMALL LETTER A WITH OGONEK
353    chr(0x0106) => '\\\'{C}',                     # LATIN CAPITAL LETTER C WITH ACUTE
354    chr(0x0107) => '\\\'{c}',                     # LATIN SMALL LETTER C WITH ACUTE
355    chr(0x0108) => '\\^{C}',                      # LATIN CAPITAL LETTER C WITH CIRCUMFLEX
356    chr(0x0109) => '\\^{c}',                      # LATIN SMALL LETTER C WITH CIRCUMFLEX
357    chr(0x010a) => '\\.{C}',                      # LATIN CAPITAL LETTER C WITH DOT ABOVE
358    chr(0x010b) => '\\.{c}',                      # LATIN SMALL LETTER C WITH DOT ABOVE
359    chr(0x010c) => '\\v{C}',                      # LATIN CAPITAL LETTER C WITH CARON
360    chr(0x010d) => '\\v{c}',                      # LATIN SMALL LETTER C WITH CARON
361    chr(0x010e) => '\\v{D}',                      # LATIN CAPITAL LETTER D WITH CARON
362    chr(0x010f) => '\\v{d}',                      # LATIN SMALL LETTER D WITH CARON
363    chr(0x0112) => '\\={E}',                      # LATIN CAPITAL LETTER E WITH MACRON
364    chr(0x0113) => '\\={e}',                      # LATIN SMALL LETTER E WITH MACRON
365    chr(0x0114) => '\\u{E}',                      # LATIN CAPITAL LETTER E WITH BREVE
366    chr(0x0115) => '\\u{e}',                      # LATIN SMALL LETTER E WITH BREVE
367    chr(0x0116) => '\\.{E}',                      # LATIN CAPITAL LETTER E WITH DOT ABOVE
368    chr(0x0117) => '\\.{e}',                      # LATIN SMALL LETTER E WITH DOT ABOVE
369    chr(0x0118) => '\\k{E}',                      # LATIN CAPITAL LETTER E WITH OGONEK
370    chr(0x0119) => '\\k{e}',                      # LATIN SMALL LETTER E WITH OGONEK
371    chr(0x011a) => '\\v{E}',                      # LATIN CAPITAL LETTER E WITH CARON
372    chr(0x011b) => '\\v{e}',                      # LATIN SMALL LETTER E WITH CARON
373    chr(0x011c) => '\\^{G}',                      # LATIN CAPITAL LETTER G WITH CIRCUMFLEX
374    chr(0x011d) => '\\^{g}',                      # LATIN SMALL LETTER G WITH CIRCUMFLEX
375    chr(0x011e) => '\\u{G}',                      # LATIN CAPITAL LETTER G WITH BREVE
376    chr(0x011f) => '\\u{g}',                      # LATIN SMALL LETTER G WITH BREVE
377    chr(0x0120) => '\\.{G}',                      # LATIN CAPITAL LETTER G WITH DOT ABOVE
378    chr(0x0121) => '\\.{g}',                      # LATIN SMALL LETTER G WITH DOT ABOVE
379    chr(0x0122) => '\\c{G}',                      # LATIN CAPITAL LETTER G WITH CEDILLA
380    chr(0x0123) => '\\c{g}',                      # LATIN SMALL LETTER G WITH CEDILLA
381    chr(0x0124) => '\\^{H}',                      # LATIN CAPITAL LETTER H WITH CIRCUMFLEX
382    chr(0x0125) => '\\^{h}',                      # LATIN SMALL LETTER H WITH CIRCUMFLEX
383    chr(0x0128) => '\\~{I}',                      # LATIN CAPITAL LETTER I WITH TILDE
384    chr(0x0129) => '\\~{\\i}',                    # LATIN SMALL LETTER I WITH TILDE
385    chr(0x012a) => '\\={I}',                      # LATIN CAPITAL LETTER I WITH MACRON
386    chr(0x012b) => '\\={\\i}',                    # LATIN SMALL LETTER I WITH MACRON
387    chr(0x012c) => '\\u{I}',                      # LATIN CAPITAL LETTER I WITH BREVE
388    chr(0x012d) => '\\u{\\i}',                    # LATIN SMALL LETTER I WITH BREVE
389    chr(0x012e) => '\\k{I}',                      # LATIN CAPITAL LETTER I WITH OGONEK
390    chr(0x012f) => '\\k{i}',                      # LATIN SMALL LETTER I WITH OGONEK
391    chr(0x0130) => '\\.{I}',                      # LATIN CAPITAL LETTER I WITH DOT ABOVE
392    chr(0x0131) => '{\\i}',                       # LATIN SMALL LETTER DOTLESS I
393    chr(0x0134) => '\\^{J}',                      # LATIN CAPITAL LETTER J WITH CIRCUMFLEX
394    chr(0x0135) => '\\^{\\j}',                    # LATIN SMALL LETTER J WITH CIRCUMFLEX
395    chr(0x0136) => '\\c{K}',                      # LATIN CAPITAL LETTER K WITH CEDILLA
396    chr(0x0137) => '\\c{k}',                      # LATIN SMALL LETTER K WITH CEDILLA
397    chr(0x0139) => '\\\'{L}',                     # LATIN CAPITAL LETTER L WITH ACUTE
398    chr(0x013a) => '\\\'{l}',                     # LATIN SMALL LETTER L WITH ACUTE
399    chr(0x013b) => '\\c{L}',                      # LATIN CAPITAL LETTER L WITH CEDILLA
400    chr(0x013c) => '\\c{l}',                      # LATIN SMALL LETTER L WITH CEDILLA
401    chr(0x013d) => '\\v{L}',                      # LATIN CAPITAL LETTER L WITH CARON
402    chr(0x013e) => '\\v{l}',                      # LATIN SMALL LETTER L WITH CARON
403    chr(0x0141) => "{\\L}",                       # 0x0141 LATIN CAPITAL LETTER L WITH STROKE
404    chr(0x0142) => "{\\l}",                       # 0x0142 LATIN SMALL LETTER L WITH STROKE
405    chr(0x0143) => '\\\'{N}',                     # LATIN CAPITAL LETTER N WITH ACUTE
406    chr(0x0144) => '\\\'{n}',                     # LATIN SMALL LETTER N WITH ACUTE
407    chr(0x0145) => '\\c{N}',                      # LATIN CAPITAL LETTER N WITH CEDILLA
408    chr(0x0146) => '\\c{n}',                      # LATIN SMALL LETTER N WITH CEDILLA
409    chr(0x0147) => '\\v{N}',                      # LATIN CAPITAL LETTER N WITH CARON
410    chr(0x0148) => '\\v{n}',                      # LATIN SMALL LETTER N WITH CARON
411    chr(0x014c) => '\\={O}',                      # LATIN CAPITAL LETTER O WITH MACRON
412    chr(0x014d) => '\\={o}',                      # LATIN SMALL LETTER O WITH MACRON
413    chr(0x014e) => '\\u{O}',                      # LATIN CAPITAL LETTER O WITH BREVE
414    chr(0x014f) => '\\u{o}',                      # LATIN SMALL LETTER O WITH BREVE
415    chr(0x0152) => '{\\OE}',                      # LATIN CAPITAL LIGATURE OE                    (&OElig;)
416    chr(0x0153) => '{\\oe}',                      # LATIN SMALL LIGATURE OE                      (&oelig;)
417    chr(0x0154) => '\\\'{R}',                     # LATIN CAPITAL LETTER R WITH ACUTE
418    chr(0x0155) => '\\\'{r}',                     # LATIN SMALL LETTER R WITH ACUTE
419    chr(0x0156) => '\\c{R}',                      # LATIN CAPITAL LETTER R WITH CEDILLA
420    chr(0x0157) => '\\c{r}',                      # LATIN SMALL LETTER R WITH CEDILLA
421    chr(0x0158) => '\\v{R}',                      # LATIN CAPITAL LETTER R WITH CARON
422    chr(0x0159) => '\\v{r}',                      # LATIN SMALL LETTER R WITH CARON
423    chr(0x015a) => '\\\'{S}',                     # LATIN CAPITAL LETTER S WITH ACUTE
424    chr(0x015b) => '\\\'{s}',                     # LATIN SMALL LETTER S WITH ACUTE
425    chr(0x015c) => '\\^{S}',                      # LATIN CAPITAL LETTER S WITH CIRCUMFLEX
426    chr(0x015d) => '\\^{s}',                      # LATIN SMALL LETTER S WITH CIRCUMFLEX
427    chr(0x015e) => '\\c{S}',                      # LATIN CAPITAL LETTER S WITH CEDILLA
428    chr(0x015f) => '\\c{s}',                      # LATIN SMALL LETTER S WITH CEDILLA
429    chr(0x0160) => '\\v{S}',                      # LATIN CAPITAL LETTER S WITH CARON            (&Scaron;)
430    chr(0x0161) => '\\v{s}',                      # LATIN SMALL LETTER S WITH CARON              (&scaron;)
431    chr(0x0162) => '\\c{T}',                      # LATIN CAPITAL LETTER T WITH CEDILLA
432    chr(0x0163) => '\\c{t}',                      # LATIN SMALL LETTER T WITH CEDILLA
433    chr(0x0164) => '\\v{T}',                      # LATIN CAPITAL LETTER T WITH CARON
434    chr(0x0165) => '\\v{t}',                      # LATIN SMALL LETTER T WITH CARON
435    chr(0x0168) => '\\~{U}',                      # LATIN CAPITAL LETTER U WITH TILDE
436    chr(0x0169) => '\\~{u}',                      # LATIN SMALL LETTER U WITH TILDE
437    chr(0x016a) => '\\={U}',                      # LATIN CAPITAL LETTER U WITH MACRON
438    chr(0x016b) => '\\={u}',                      # LATIN SMALL LETTER U WITH MACRON
439    chr(0x016c) => '\\u{U}',                      # LATIN CAPITAL LETTER U WITH BREVE
440    chr(0x016d) => '\\u{u}',                      # LATIN SMALL LETTER U WITH BREVE
441    chr(0x016e) => '\\r{U}',                      # LATIN CAPITAL LETTER U WITH RING ABOVE
442    chr(0x016f) => '\\r{u}',                      # LATIN SMALL LETTER U WITH RING ABOVE
443    chr(0x0172) => '\\k{U}',                      # LATIN CAPITAL LETTER U WITH OGONEK
444    chr(0x0173) => '\\k{u}',                      # LATIN SMALL LETTER U WITH OGONEK
445    chr(0x0174) => '\\^{W}',                      # LATIN CAPITAL LETTER W WITH CIRCUMFLEX
446    chr(0x0175) => '\\^{w}',                      # LATIN SMALL LETTER W WITH CIRCUMFLEX
447    chr(0x0176) => '\\^{Y}',                      # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
448    chr(0x0177) => '\\^{y}',                      # LATIN SMALL LETTER Y WITH CIRCUMFLEX
449    chr(0x0178) => '{\\"Y}',                      # LATIN CAPITAL LETTER Y WITH DIAERESIS        (&Yuml;)
450    chr(0x0179) => '\\\'{Z}',                     # LATIN CAPITAL LETTER Z WITH ACUTE
451    chr(0x017a) => '\\\'{z}',                     # LATIN SMALL LETTER Z WITH ACUTE
452    chr(0x017b) => '\\.{Z}',                      # LATIN CAPITAL LETTER Z WITH DOT ABOVE
453    chr(0x017c) => '\\.{z}',                      # LATIN SMALL LETTER Z WITH DOT ABOVE
454    chr(0x017d) => '\\v{Z}',                      # LATIN CAPITAL LETTER Z WITH CARON
455    chr(0x017e) => '\\v{z}',                      # LATIN SMALL LETTER Z WITH CARON
456    chr(0x0192) => '{\\textflorin}',              # LATIN SMALL LETTER F WITH HOOK               (&fnof;)
457    chr(0x01cd) => '\\v{A}',                      # LATIN CAPITAL LETTER A WITH CARON
458    chr(0x01ce) => '\\v{a}',                      # LATIN SMALL LETTER A WITH CARON
459    chr(0x01cf) => '\\v{I}',                      # LATIN CAPITAL LETTER I WITH CARON
460    chr(0x01d0) => '\\v{i}',                      # LATIN SMALL LETTER I WITH CARON
461    chr(0x01d1) => '\\v{O}',                      # LATIN CAPITAL LETTER O WITH CARON
462    chr(0x01d2) => '\\v{o}',                      # LATIN SMALL LETTER O WITH CARON
463    chr(0x01d3) => '\\v{U}',                      # LATIN CAPITAL LETTER U WITH CARON
464    chr(0x01d4) => '\\v{u}',                      # LATIN SMALL LETTER U WITH CARON
465    chr(0x01e6) => '\\v{G}',                      # LATIN CAPITAL LETTER G WITH CARON
466    chr(0x01e7) => '\\v{g}',                      # LATIN SMALL LETTER G WITH CARON
467    chr(0x01e8) => '\\v{K}',                      # LATIN CAPITAL LETTER K WITH CARON
468    chr(0x01e9) => '\\v{k}',                      # LATIN SMALL LETTER K WITH CARON
469    chr(0x01ea) => '\\k{O}',                      # LATIN CAPITAL LETTER O WITH OGONEK
470    chr(0x01eb) => '\\k{o}',                      # LATIN SMALL LETTER O WITH OGONEK
471    chr(0x01f0) => '\\v{j}',                      # LATIN SMALL LETTER J WITH CARON
472    chr(0x01f4) => '\\\'{G}',                     # LATIN CAPITAL LETTER G WITH ACUTE
473    chr(0x01f5) => '\\\'{g}',                     # LATIN SMALL LETTER G WITH ACUTE
474    chr(0x01f8) => '\\`{N}',                      # LATIN CAPITAL LETTER N WITH GRAVE
475    chr(0x01f9) => '\\`{n}',                      # LATIN SMALL LETTER N WITH GRAVE
476
477    # Spacing Modifier Letters
478
479    chr(0x02c6) => '{\\textasciicircum}',         # MODIFIER LETTER CIRCUMFLEX ACCENT            (&circ;)
480    chr(0x02dc) => '{\\textasciitilde}',          # SMALL TILDE                                  (&tilde;)
481
482    # Greek and Coptic
483
484    chr(0x0391) => '\\ensuremath{\\mathrm{A}}',   # GREEK CAPITAL LETTER ALPHA                   (&Alpha;)
485    chr(0x0392) => '\\ensuremath{\\mathrm{B}}',   # GREEK CAPITAL LETTER BETA                    (&Beta;)
486    chr(0x0393) => '\\ensuremath{\\Gamma}',       # GREEK CAPITAL LETTER GAMMA                   (&Gamma;)
487    chr(0x0394) => '\\ensuremath{\\Delta}',       # GREEK CAPITAL LETTER DELTA                   (&Delta;)
488    chr(0x0395) => '\\ensuremath{\\mathrm{E}}',   # GREEK CAPITAL LETTER EPSILON                 (&Epsilon;)
489    chr(0x0396) => '\\ensuremath{\\mathrm{Z}}',   # GREEK CAPITAL LETTER ZETA                    (&Zeta;)
490    chr(0x0397) => '\\ensuremath{\\mathrm{H}}',   # GREEK CAPITAL LETTER ETA                     (&Eta;)
491    chr(0x0398) => '\\ensuremath{\\Theta}',       # GREEK CAPITAL LETTER THETA                   (&Theta;)
492    chr(0x0399) => '\\ensuremath{\\mathrm{I}}',   # GREEK CAPITAL LETTER IOTA                    (&Iota;)
493    chr(0x039a) => '\\ensuremath{\\mathrm{K}}',   # GREEK CAPITAL LETTER KAPPA                   (&Kappa;)
494    chr(0x039b) => '\\ensuremath{\\Lambda}',      # GREEK CAPITAL LETTER LAMDA                   (&Lambda;)
495    chr(0x039c) => '\\ensuremath{\\mathrm{M}}',   # GREEK CAPITAL LETTER MU                      (&Mu;)
496    chr(0x039d) => '\\ensuremath{\\mathrm{N}}',   # GREEK CAPITAL LETTER NU                      (&Nu;)
497    chr(0x039e) => '\\ensuremath{\\Xi}',          # GREEK CAPITAL LETTER XI                      (&Xi;)
498    chr(0x039f) => '\\ensuremath{\\mathrm{O}}',   # GREEK CAPITAL LETTER OMICRON                 (&Omicron;)
499    chr(0x03a0) => '\\ensuremath{\\Pi}',          # GREEK CAPITAL LETTER PI                      (&Pi;)
500    chr(0x03a1) => '\\ensuremath{\\mathrm{R}}',   # GREEK CAPITAL LETTER RHO                     (&Rho;)
501    chr(0x03a3) => '\\ensuremath{\\Sigma}',       # GREEK CAPITAL LETTER SIGMA                   (&Sigma;)
502    chr(0x03a4) => '\\ensuremath{\\mathrm{T}}',   # GREEK CAPITAL LETTER TAU                     (&Tau;)
503    chr(0x03a5) => '\\ensuremath{\\Upsilon}',     # GREEK CAPITAL LETTER UPSILON                 (&Upsilon;)
504    chr(0x03a6) => '\\ensuremath{\\Phi}',         # GREEK CAPITAL LETTER PHI                     (&Phi;)
505    chr(0x03a7) => '\\ensuremath{\\mathrm{X}}',   # GREEK CAPITAL LETTER CHI                     (&Chi;)
506    chr(0x03a8) => '\\ensuremath{\\Psi}',         # GREEK CAPITAL LETTER PSI                     (&Psi;)
507    chr(0x03a9) => '\\ensuremath{\\Omega}',       # GREEK CAPITAL LETTER OMEGA                   (&Omega;)
508    chr(0x03b1) => '\\ensuremath{\\alpha}',       # GREEK SMALL LETTER ALPHA                     (&alpha;)
509    chr(0x03b2) => '\\ensuremath{\\beta}',        # GREEK SMALL LETTER BETA                      (&beta;)
510    chr(0x03b3) => '\\ensuremath{\\gamma}',       # GREEK SMALL LETTER GAMMA                     (&gamma;)
511    chr(0x03b4) => '\\ensuremath{\\delta}',       # GREEK SMALL LETTER DELTA                     (&delta;)
512    chr(0x03b5) => '\\ensuremath{\\epsilon}',     # GREEK SMALL LETTER EPSILON                   (&epsilon;)
513    chr(0x03b6) => '\\ensuremath{\\zeta}',        # GREEK SMALL LETTER ZETA                      (&zeta;)
514    chr(0x03b7) => '\\ensuremath{\\eta}',         # GREEK SMALL LETTER ETA                       (&eta;)
515    chr(0x03b8) => '\\ensuremath{\\theta}',       # GREEK SMALL LETTER THETA                     (&theta;)
516    chr(0x03b9) => '\\ensuremath{\\iota}',        # GREEK SMALL LETTER IOTA                      (&iota;)
517    chr(0x03ba) => '\\ensuremath{\\kappa}',       # GREEK SMALL LETTER KAPPA                     (&kappa;)
518    chr(0x03bb) => '\\ensuremath{\\lambda}',      # GREEK SMALL LETTER LAMDA                     (&lambda;)
519    chr(0x03bc) => '\\ensuremath{\\mu}',          # GREEK SMALL LETTER MU                        (&mu;)
520    chr(0x03bd) => '\\ensuremath{\\nu}',          # GREEK SMALL LETTER NU                        (&nu;)
521    chr(0x03be) => '\\ensuremath{\\xi}',          # GREEK SMALL LETTER XI                        (&xi;)
522    chr(0x03bf) => '\\ensuremath{o}',             # GREEK SMALL LETTER OMICRON                   (&omicron;)
523    chr(0x03c0) => '\\ensuremath{\\pi}',          # GREEK SMALL LETTER PI                        (&pi;)
524    chr(0x03c1) => '\\ensuremath{\\rho}',         # GREEK SMALL LETTER RHO                       (&rho;)
525    chr(0x03c3) => '\\ensuremath{\\sigma}',       # GREEK SMALL LETTER SIGMA                     (&sigma;)
526    chr(0x03c4) => '\\ensuremath{\\tau}',         # GREEK SMALL LETTER TAU                       (&tau;)
527    chr(0x03c5) => '\\ensuremath{\\upsilon}',     # GREEK SMALL LETTER UPSILON                   (&upsilon;)
528    chr(0x03c6) => '\\ensuremath{\\phi}',         # GREEK SMALL LETTER PHI                       (&phi;)
529    chr(0x03c7) => '\\ensuremath{\\chi}',         # GREEK SMALL LETTER CHI                       (&chi;)
530    chr(0x03c8) => '\\ensuremath{\\psi}',         # GREEK SMALL LETTER PSI                       (&psi;)
531    chr(0x03c9) => '\\ensuremath{\\omega}',       # GREEK SMALL LETTER OMEGA                     (&omega;)
532    chr(0x0e3f) => '{\\textbaht}',                # THAI CURRENCY SYMBOL BAHT
533
534    # Latin Extended Additional
535
536    chr(0x1e02) => '\\.{B}',                      # LATIN CAPITAL LETTER B WITH DOT ABOVE
537    chr(0x1e03) => '\\.{b}',                      # LATIN SMALL LETTER B WITH DOT ABOVE
538    chr(0x1e04) => '\\d{B}',                      # LATIN CAPITAL LETTER B WITH DOT BELOW
539    chr(0x1e05) => '\\d{b}',                      # LATIN SMALL LETTER B WITH DOT BELOW
540    chr(0x1e06) => '\\b{B}',                      # LATIN CAPITAL LETTER B WITH LINE BELOW
541    chr(0x1e07) => '\\b{b}',                      # LATIN SMALL LETTER B WITH LINE BELOW
542    chr(0x1e0a) => '\\.{D}',                      # LATIN CAPITAL LETTER D WITH DOT ABOVE
543    chr(0x1e0b) => '\\.{d}',                      # LATIN SMALL LETTER D WITH DOT ABOVE
544    chr(0x1e0c) => '\\d{D}',                      # LATIN CAPITAL LETTER D WITH DOT BELOW
545    chr(0x1e0d) => '\\d{d}',                      # LATIN SMALL LETTER D WITH DOT BELOW
546    chr(0x1e0e) => '\\b{D}',                      # LATIN CAPITAL LETTER D WITH LINE BELOW
547    chr(0x1e0f) => '\\b{d}',                      # LATIN SMALL LETTER D WITH LINE BELOW
548    chr(0x1e10) => '\\c{D}',                      # LATIN CAPITAL LETTER D WITH CEDILLA
549    chr(0x1e11) => '\\c{d}',                      # LATIN SMALL LETTER D WITH CEDILLA
550    chr(0x1e1e) => '\\.{F}',                      # LATIN CAPITAL LETTER F WITH DOT ABOVE
551    chr(0x1e1f) => '\\.{f}',                      # LATIN SMALL LETTER F WITH DOT ABOVE
552    chr(0x1e20) => '\\={G}',                      # LATIN CAPITAL LETTER G WITH MACRON
553    chr(0x1e21) => '\\={g}',                      # LATIN SMALL LETTER G WITH MACRON
554    chr(0x1e22) => '\\.{H}',                      # LATIN CAPITAL LETTER H WITH DOT ABOVE
555    chr(0x1e23) => '\\.{h}',                      # LATIN SMALL LETTER H WITH DOT ABOVE
556    chr(0x1e24) => '\\d{H}',                      # LATIN CAPITAL LETTER H WITH DOT BELOW
557    chr(0x1e25) => '\\d{h}',                      # LATIN SMALL LETTER H WITH DOT BELOW
558    chr(0x1e28) => '\\c{H}',                      # LATIN CAPITAL LETTER H WITH CEDILLA
559    chr(0x1e29) => '\\c{h}',                      # LATIN SMALL LETTER H WITH CEDILLA
560    chr(0x1e30) => '\\\'{K}',                     # LATIN CAPITAL LETTER K WITH ACUTE
561    chr(0x1e31) => '\\\'{k}',                     # LATIN SMALL LETTER K WITH ACUTE
562    chr(0x1e32) => '\\d{K}',                      # LATIN CAPITAL LETTER K WITH DOT BELOW
563    chr(0x1e33) => '\\d{k}',                      # LATIN SMALL LETTER K WITH DOT BELOW
564    chr(0x1e34) => '\\b{K}',                      # LATIN CAPITAL LETTER K WITH LINE BELOW
565    chr(0x1e35) => '\\b{k}',                      # LATIN SMALL LETTER K WITH LINE BELOW
566    chr(0x1e36) => '\\d{L}',                      # LATIN CAPITAL LETTER L WITH DOT BELOW
567    chr(0x1e37) => '\\d{l}',                      # LATIN SMALL LETTER L WITH DOT BELOW
568    chr(0x1e3a) => '\\b{L}',                      # LATIN CAPITAL LETTER L WITH LINE BELOW
569    chr(0x1e3b) => '\\b{l}',                      # LATIN SMALL LETTER L WITH LINE BELOW
570    chr(0x1e3e) => '\\\'{M}',                     # LATIN CAPITAL LETTER M WITH ACUTE
571    chr(0x1e3f) => '\\\'{m}',                     # LATIN SMALL LETTER M WITH ACUTE
572    chr(0x1e40) => '\\.{M}',                      # LATIN CAPITAL LETTER M WITH DOT ABOVE
573    chr(0x1e41) => '\\.{m}',                      # LATIN SMALL LETTER M WITH DOT ABOVE
574    chr(0x1e42) => '\\d{M}',                      # LATIN CAPITAL LETTER M WITH DOT BELOW
575    chr(0x1e43) => '\\d{m}',                      # LATIN SMALL LETTER M WITH DOT BELOW
576    chr(0x1e44) => '\\.{N}',                      # LATIN CAPITAL LETTER N WITH DOT ABOVE
577    chr(0x1e45) => '\\.{n}',                      # LATIN SMALL LETTER N WITH DOT ABOVE
578    chr(0x1e46) => '\\d{N}',                      # LATIN CAPITAL LETTER N WITH DOT BELOW
579    chr(0x1e47) => '\\d{n}',                      # LATIN SMALL LETTER N WITH DOT BELOW
580    chr(0x1e48) => '\\b{N}',                      # LATIN CAPITAL LETTER N WITH LINE BELOW
581    chr(0x1e49) => '\\b{n}',                      # LATIN SMALL LETTER N WITH LINE BELOW
582    chr(0x1e54) => '\\\'{P}',                     # LATIN CAPITAL LETTER P WITH ACUTE
583    chr(0x1e55) => '\\\'{p}',                     # LATIN SMALL LETTER P WITH ACUTE
584    chr(0x1e56) => '\\.{P}',                      # LATIN CAPITAL LETTER P WITH DOT ABOVE
585    chr(0x1e57) => '\\.{p}',                      # LATIN SMALL LETTER P WITH DOT ABOVE
586    chr(0x1e58) => '\\.{R}',                      # LATIN CAPITAL LETTER R WITH DOT ABOVE
587    chr(0x1e59) => '\\.{r}',                      # LATIN SMALL LETTER R WITH DOT ABOVE
588    chr(0x1e5a) => '\\d{R}',                      # LATIN CAPITAL LETTER R WITH DOT BELOW
589    chr(0x1e5b) => '\\d{r}',                      # LATIN SMALL LETTER R WITH DOT BELOW
590    chr(0x1e5e) => '\\b{R}',                      # LATIN CAPITAL LETTER R WITH LINE BELOW
591    chr(0x1e5f) => '\\b{r}',                      # LATIN SMALL LETTER R WITH LINE BELOW
592    chr(0x1e60) => '\\.{S}',                      # LATIN CAPITAL LETTER S WITH DOT ABOVE
593    chr(0x1e61) => '\\.{s}',                      # LATIN SMALL LETTER S WITH DOT ABOVE
594    chr(0x1e62) => '\\d{S}',                      # LATIN CAPITAL LETTER S WITH DOT BELOW
595    chr(0x1e63) => '\\d{s}',                      # LATIN SMALL LETTER S WITH DOT BELOW
596    chr(0x1e6a) => '\\.{T}',                      # LATIN CAPITAL LETTER T WITH DOT ABOVE
597    chr(0x1e6b) => '\\.{t}',                      # LATIN SMALL LETTER T WITH DOT ABOVE
598    chr(0x1e6c) => '\\d{T}',                      # LATIN CAPITAL LETTER T WITH DOT BELOW
599    chr(0x1e6d) => '\\d{t}',                      # LATIN SMALL LETTER T WITH DOT BELOW
600    chr(0x1e6e) => '\\b{T}',                      # LATIN CAPITAL LETTER T WITH LINE BELOW
601    chr(0x1e6f) => '\\b{t}',                      # LATIN SMALL LETTER T WITH LINE BELOW
602    chr(0x1e7c) => '\\~{V}',                      # LATIN CAPITAL LETTER V WITH TILDE
603    chr(0x1e7d) => '\\~{v}',                      # LATIN SMALL LETTER V WITH TILDE
604    chr(0x1e7e) => '\\d{V}',                      # LATIN CAPITAL LETTER V WITH DOT BELOW
605    chr(0x1e7f) => '\\d{v}',                      # LATIN SMALL LETTER V WITH DOT BELOW
606    chr(0x1e80) => '\\`{W}',                      # LATIN CAPITAL LETTER W WITH GRAVE
607    chr(0x1e81) => '\\`{w}',                      # LATIN SMALL LETTER W WITH GRAVE
608    chr(0x1e82) => '\\\'{W}',                     # LATIN CAPITAL LETTER W WITH ACUTE
609    chr(0x1e83) => '\\\'{w}',                     # LATIN SMALL LETTER W WITH ACUTE
610    chr(0x1e86) => '\\.{W}',                      # LATIN CAPITAL LETTER W WITH DOT ABOVE
611    chr(0x1e87) => '\\.{w}',                      # LATIN SMALL LETTER W WITH DOT ABOVE
612    chr(0x1e88) => '\\d{W}',                      # LATIN CAPITAL LETTER W WITH DOT BELOW
613    chr(0x1e89) => '\\d{w}',                      # LATIN SMALL LETTER W WITH DOT BELOW
614    chr(0x1e8a) => '\\.{X}',                      # LATIN CAPITAL LETTER X WITH DOT ABOVE
615    chr(0x1e8b) => '\\.{x}',                      # LATIN SMALL LETTER X WITH DOT ABOVE
616    chr(0x1e8e) => '\\.{Y}',                      # LATIN CAPITAL LETTER Y WITH DOT ABOVE
617    chr(0x1e8f) => '\\.{y}',                      # LATIN SMALL LETTER Y WITH DOT ABOVE
618    chr(0x1e90) => '\\^{Z}',                      # LATIN CAPITAL LETTER Z WITH CIRCUMFLEX
619    chr(0x1e91) => '\\^{z}',                      # LATIN SMALL LETTER Z WITH CIRCUMFLEX
620    chr(0x1e92) => '\\d{Z}',                      # LATIN CAPITAL LETTER Z WITH DOT BELOW
621    chr(0x1e93) => '\\d{z}',                      # LATIN SMALL LETTER Z WITH DOT BELOW
622    chr(0x1e94) => '\\b{Z}',                      # LATIN CAPITAL LETTER Z WITH LINE BELOW
623    chr(0x1e95) => '\\b{z}',                      # LATIN SMALL LETTER Z WITH LINE BELOW
624    chr(0x1e96) => '\\b{h}',                      # LATIN SMALL LETTER H WITH LINE BELOW
625    chr(0x1e98) => '\\r{w}',                      # LATIN SMALL LETTER W WITH RING ABOVE
626    chr(0x1e99) => '\\r{y}',                      # LATIN SMALL LETTER Y WITH RING ABOVE
627    chr(0x1ea0) => '\\d{A}',                      # LATIN CAPITAL LETTER A WITH DOT BELOW
628    chr(0x1ea1) => '\\d{a}',                      # LATIN SMALL LETTER A WITH DOT BELOW
629    chr(0x1eb8) => '\\d{E}',                      # LATIN CAPITAL LETTER E WITH DOT BELOW
630    chr(0x1eb9) => '\\d{e}',                      # LATIN SMALL LETTER E WITH DOT BELOW
631    chr(0x1ebc) => '\\~{E}',                      # LATIN CAPITAL LETTER E WITH TILDE
632    chr(0x1ebd) => '\\~{e}',                      # LATIN SMALL LETTER E WITH TILDE
633    chr(0x1eca) => '\\d{I}',                      # LATIN CAPITAL LETTER I WITH DOT BELOW
634    chr(0x1ecb) => '\\d{i}',                      # LATIN SMALL LETTER I WITH DOT BELOW
635    chr(0x1ecc) => '\\d{O}',                      # LATIN CAPITAL LETTER O WITH DOT BELOW
636    chr(0x1ecd) => '\\d{o}',                      # LATIN SMALL LETTER O WITH DOT BELOW
637    chr(0x1ee4) => '\\d{U}',                      # LATIN CAPITAL LETTER U WITH DOT BELOW
638    chr(0x1ee5) => '\\d{u}',                      # LATIN SMALL LETTER U WITH DOT BELOW
639    chr(0x1ef2) => '\\`{Y}',                      # LATIN CAPITAL LETTER Y WITH GRAVE
640    chr(0x1ef3) => '\\`{y}',                      # LATIN SMALL LETTER Y WITH GRAVE
641    chr(0x1ef4) => '\\d{Y}',                      # LATIN CAPITAL LETTER Y WITH DOT BELOW
642    chr(0x1ef5) => '\\d{y}',                      # LATIN SMALL LETTER Y WITH DOT BELOW
643    chr(0x1ef8) => '\\~{Y}',                      # LATIN CAPITAL LETTER Y WITH TILDE
644    chr(0x1ef9) => '\\~{y}',                      # LATIN SMALL LETTER Y WITH TILDE
645
646    # General Punctuation
647
648    chr(0x2002) => '\\phantom{N}',                # EN SPACE                                     (&ensp;)
649    chr(0x2003) => '\\hspace{1em}',               # EM SPACE                                     (&emsp;)
650    chr(0x2004) => '\\hspace{.333333em}',         # THREE-PER-EM SPACE
651    chr(0x2005) => '\\hspace{.25em}',             # FOUR-PER-EM SPACE
652    chr(0x2006) => '\\hspace{.166666em}',         # SIX-PER-EM SPACE
653    chr(0x2007) => '\\phantom{0}',                # FIGURE SPACE
654    chr(0x2008) => '\\phantom{,}',                # PUNCTUATION SPACE
655    chr(0x2009) => '\\,',                         # THIN SPACE                                   (&thinsp;)
656    chr(0x200a) => '\\ensuremath{\\mkern1mu}',    # HAIR SPACE
657    chr(0x200c) => '{}',                          # ZERO WIDTH NON-JOINER                        (&zwnj;)
658    chr(0x2013) => '--',                          # EN DASH                                      (&ndash;)
659    chr(0x2014) => '---',                         # EM DASH                                      (&mdash;)
660    chr(0x2015) => '\\rule{1em}{1pt}',            # HORIZONTAL BAR
661    chr(0x2016) => '{\\textbardbl}',              # DOUBLE VERTICAL LINE
662    chr(0x2018) => '{\\textquoteleft}',           # LEFT SINGLE QUOTATION MARK                   (&lsquo;)
663    chr(0x2019) => '{\\textquoteright}',          # RIGHT SINGLE QUOTATION MARK                  (&rsquo;)
664    chr(0x201a) => '{\\quotesinglbase}',          # SINGLE LOW-9 QUOTATION MARK                  (&sbquo;)
665    chr(0x201c) => '{\\textquotedblleft}',        # LEFT DOUBLE QUOTATION MARK                   (&ldquo;)
666    chr(0x201d) => '{\\textquotedblright}',       # RIGHT DOUBLE QUOTATION MARK                  (&rdquo;)
667    chr(0x201e) => '{\\quotedblbase}',            # DOUBLE LOW-9 QUOTATION MARK                  (&bdquo;)
668    chr(0x2020) => '{\\textdagger}',              # DAGGER                                       (&dagger;)
669    chr(0x2021) => '{\\textdaggerdbl}',           # DOUBLE DAGGER                                (&Dagger;)
670    chr(0x2022) => '{\\textbullet}',              # BULLET                                       (&bull;)
671    chr(0x2026) => '{\\textellipsis}',            # HORIZONTAL ELLIPSIS                          (&hellip;)
672    chr(0x2030) => '{\\textperthousand}',         # PER MILLE SIGN                               (&permil;)
673    chr(0x2032) => '{\\textquotesingle}',         # PRIME                                        (&prime;)
674    chr(0x2033) => '{\\textquotedbl}',            # DOUBLE PRIME                                 (&Prime;)
675    chr(0x2039) => '{\\guilsinglleft}',           # SINGLE LEFT-POINTING ANGLE QUOTATION MARK    (&lsaquo;)
676    chr(0x203a) => '{\\guilsinglright}',          # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK   (&rsaquo;)
677    chr(0x203b) => '{\\textreferencemark}',       # REFERENCE MARK
678    chr(0x203d) => '{\\textinterrobang}',         # INTERROBANG
679    chr(0x203e) => '{\\textasciimacron}',         # OVERLINE                                     (&oline;)
680    chr(0x2044) => '{\\textfractionsolidus}',     # FRACTION SLASH                               (&frasl;)
681
682    # Currency Symbols
683
684    chr(0x20a1) => '{\\textcolonmonetary}',       # COLON SIGN
685    chr(0x20a4) => '{\\textlira}',                # LIRA SIGN
686    chr(0x20a6) => '{\\textnaira}',               # NAIRA SIGN
687    chr(0x20a9) => '{\\textwon}',                 # WON SIGN
688    chr(0x20ab) => '{\\textdong}',                # DONG SIGN
689    chr(0x20ac) => '{\\texteuro}',                # EURO SIGN                                    (&euro;)
690
691    # Letterlike Symbols
692
693    chr(0x2111) => '\\ensuremath{\\Re}',          # BLACK-LETTER CAPITAL I                       (&image;)
694    chr(0x2116) => '{\\textnumero}',              # NUMERO SIGN
695    chr(0x2117) => '{\\textcircledP}',            # SOUND RECORDING COPYRIGHT
696    chr(0x2118) => '\\ensuremath{\\wp}',          # SCRIPT CAPITAL P                             (&weierp;)
697    chr(0x211c) => '\\ensuremath{\\Im}',          # BLACK-LETTER CAPITAL R                       (&real;)
698    chr(0x211e) => '{\\textrecipe}',              # PRESCRIPTION TAKE
699    chr(0x2120) => '{\\textservicemark}',         # SERVICE MARK
700    chr(0x2122) => '{\\texttrademark}',           # TRADE MARK SIGN                              (&trade;)
701    chr(0x2126) => '{\\textohm}',                 # OHM SIGN
702    chr(0x2127) => '{\\textmho}',                 # INVERTED OHM SIGN
703    chr(0x212e) => '{\\textestimated}',           # ESTIMATED SYMBOL
704    chr(0x2190) => '{\\textleftarrow}',           # LEFTWARDS ARROW                              (&larr;)
705    chr(0x2191) => '{\\textuparrow}',             # UPWARDS ARROW                                (&uarr;)
706    chr(0x2192) => '{\\textrightarrow}',          # RIGHTWARDS ARROW                             (&rarr;)
707    chr(0x2193) => '{\\textdownarrow}',           # DOWNWARDS ARROW                              (&darr;)
708    chr(0x2194) => '\\ensuremath{\\leftrightarrow}', # LEFT RIGHT ARROW                             (&harr;)
709    chr(0x21d0) => '\\ensuremath{\\Leftarrow}',   # LEFTWARDS DOUBLE ARROW                       (&lArr;)
710    chr(0x21d1) => '\\ensuremath{\\Uparrow}',     # UPWARDS DOUBLE ARROW                         (&uArr;)
711    chr(0x21d2) => '\\ensuremath{\\Rightarrow}',  # RIGHTWARDS DOUBLE ARROW                      (&rArr;)
712    chr(0x21d3) => '\\ensuremath{\\Downarrow}',   # DOWNWARDS DOUBLE ARROW                       (&dArr;)
713    chr(0x21d4) => '\\ensuremath{\\Leftrightarrow}', # LEFT RIGHT DOUBLE ARROW                      (&hArr;)
714
715    # Mathematical Operations
716
717    chr(0x2200) => '\\ensuremath{\\forall}',      # FOR ALL                                      (&forall;)
718    chr(0x2202) => '\\ensuremath{\\partial}',     # PARTIAL DIFFERENTIAL                         (&part;)
719    chr(0x2203) => '\\ensuremath{\\exists}',      # THERE EXISTS                                 (&exist;)
720    chr(0x2205) => '\\ensuremath{\\emptyset}',    # EMPTY SET                                    (&empty;)
721    chr(0x2207) => '\\ensuremath{\\nabla}',       # NABLA                                        (&nabla;)
722    chr(0x2208) => '\\ensuremath{\\in}',          # ELEMENT OF                                   (&isin;)
723    chr(0x2209) => '\\ensuremath{\\notin}',       # NOT AN ELEMENT OF                            (&notin;)
724    chr(0x220b) => '\\ensuremath{\\ni}',          # CONTAINS AS MEMBER                           (&ni;)
725    chr(0x220f) => '\\ensuremath{\\prod}',        # N-ARY PRODUCT                                (&prod;)
726    chr(0x2211) => '\\ensuremath{\\sum}',         # N-ARY SUMMATION                              (&sum;)
727    chr(0x2212) => '\\ensuremath{-}',             # MINUS SIGN                                   (&minus;)
728    chr(0x2217) => '\\ensuremath{\\ast}',         # ASTERISK OPERATOR                            (&lowast;)
729    chr(0x221a) => '\\ensuremath{\\surd}',        # SQUARE ROOT                                  (&radic;)
730    chr(0x221d) => '\\ensuremath{\\propto}',      # PROPORTIONAL TO                              (&prop;)
731    chr(0x221e) => '\\ensuremath{\\infty}',       # INFINITY                                     (&infin;)
732    chr(0x2220) => '\\ensuremath{\\angle}',       # ANGLE                                        (&ang;)
733    chr(0x2227) => '\\ensuremath{\\wedge}',       # LOGICAL AND                                  (&and;)
734    chr(0x2228) => '\\ensuremath{\\vee}',         # LOGICAL OR                                   (&or;)
735    chr(0x2229) => '\\ensuremath{\\cap}',         # INTERSECTION                                 (&cap;)
736    chr(0x222a) => '\\ensuremath{\\cup}',         # UNION                                        (&cup;)
737    chr(0x222b) => '\\ensuremath{\\int}',         # INTEGRAL                                     (&int;)
738    chr(0x2234) => '\\ensuremath{\\therefore}',   # THEREFORE                                    (&there4;)
739    chr(0x223c) => '\\ensuremath{\\sim}',         # TILDE OPERATOR                               (&sim;)
740    chr(0x2245) => '\\ensuremath{\\cong}',        # APPROXIMATELY EQUAL TO                       (&cong;)
741    chr(0x2248) => '\\ensuremath{\\asymp}',       # ALMOST EQUAL TO                              (&asymp;)
742    chr(0x2260) => '\\ensuremath{\\neq}',         # NOT EQUAL TO                                 (&ne;)
743    chr(0x2261) => '\\ensuremath{\\equiv}',       # IDENTICAL TO                                 (&equiv;)
744    chr(0x2264) => '\\ensuremath{\\leq}',         # LESS-THAN OR EQUAL TO                        (&le;)
745    chr(0x2265) => '\\ensuremath{\\geq}',         # GREATER-THAN OR EQUAL TO                     (&ge;)
746    chr(0x2282) => '\\ensuremath{\\subset}',      # SUBSET OF                                    (&sub;)
747    chr(0x2283) => '\\ensuremath{\\supset}',      # SUPERSET OF                                  (&sup;)
748    chr(0x2284) => '\\ensuremath{\\not\\subset}', # NOT A SUBSET OF                              (&nsub;)
749    chr(0x2286) => '\\ensuremath{\\subseteq}',    # SUBSET OF OR EQUAL TO                        (&sube;)
750    chr(0x2287) => '\\ensuremath{\\supseteq}',    # SUPERSET OF OR EQUAL TO                      (&supe;)
751    chr(0x2295) => '\\ensuremath{\\oplus}',       # CIRCLED PLUS                                 (&oplus;)
752    chr(0x2297) => '\\ensuremath{\\otimes}',      # CIRCLED TIMES                                (&otimes;)
753    chr(0x22a5) => '\\ensuremath{\\perp}',        # UP TACK                                      (&perp;)
754    chr(0x22c5) => '\\ensuremath{\\cdot}',        # DOT OPERATOR                                 (&sdot;)
755    chr(0x2308) => '\\ensuremath{\\lceil}',       # LEFT CEILING                                 (&lceil;)
756    chr(0x2309) => '\\ensuremath{\\rceil}',       # RIGHT CEILING                                (&rceil;)
757    chr(0x230a) => '\\ensuremath{\\lfloor}',      # LEFT FLOOR                                   (&lfloor;)
758    chr(0x230b) => '\\ensuremath{\\rfloor}',      # RIGHT FLOOR                                  (&rfloor;)
759    chr(0x2329) => '\\ensuremath{\\langle}',      # LEFT-POINTING ANGLE BRACKET                  (&lang;)
760    chr(0x232a) => '\\ensuremath{\\rangle}',      # RIGHT-POINTING ANGLE BRACKET                 (&rang;)
761    chr(0x25ca) => '\\ensuremath{\\lozenge}',     # LOZENGE                                      (&loz;)
762
763    # Miscellaneous Symbols
764
765    chr(0x263f) => '{\\Mercury}',                 # MERCURY
766    chr(0x2640) => '{\\Venus}',                   # FEMALE SIGN
767    chr(0x2641) => '{\\Earth}',                   # EARTH
768    chr(0x2642) => '{\\Mars}',                    # MALE SIGN
769    chr(0x2643) => '{\\Jupiter}',                 # JUPITER
770    chr(0x2644) => '{\\Saturn}',                  # SATURN
771    chr(0x2645) => '{\\Uranus}',                  # URANUS
772    chr(0x2646) => '{\\Neptune}',                 # NEPTUNE
773    chr(0x2647) => '{\\Pluto}',                   # PLUTO
774    chr(0x2648) => '{\\Aries}',                   # ARIES
775    chr(0x2649) => '{\\Taurus}',                  # TAURUS
776    chr(0x264a) => '{\\Gemini}',                  # GEMINI
777    chr(0x264b) => '{\\Cancer}',                  # CANCER
778    chr(0x264c) => '{\\Leo}',                     # LEO
779    chr(0x264d) => '{\\Virgo}',                   # VIRGO
780    chr(0x264e) => '{\\Libra}',                   # LIBRA
781    chr(0x264f) => '{\\Scorpio}',                 # SCORPIUS
782    chr(0x2650) => '{\\Sagittarius}',             # SAGITTARIUS
783    chr(0x2651) => '{\\Capricorn}',               # CAPRICORN
784    chr(0x2652) => '{\\Aquarius}',                # AQUARIUS
785    chr(0x2653) => '{\\Pisces}',                  # PISCES
786    chr(0x2660) => '\\ensuremath{\\spadesuit}',   # BLACK SPADE SUIT                             (&spades;)
787    chr(0x2663) => '\\ensuremath{\\clubsuit}',    # BLACK CLUB SUIT                              (&clubs;)
788    chr(0x2665) => '\\ensuremath{\\heartsuit}',   # BLACK HEART SUIT                             (&hearts;)
789    chr(0x2666) => '\\ensuremath{\\diamondsuit}', # BLACK DIAMOND SUIT                           (&diams;)
790    chr(0x266d) => '\\ensuremath{\\flat}',        # MUSIC FLAT SIGN
791    chr(0x266e) => '\\ensuremath{\\natural}',     # MUSIC NATURAL SIGN
792    chr(0x266f) => '\\ensuremath{\\sharp}',       # MUSIC SHARP SIGN
793    chr(0x26ad) => '{\\textmarried}',             # MARRIAGE SYMBOL
794    chr(0x26ae) => '{\\textdivorced}',            # DIVORCE SYMBOL
795
796    # Supplemental Punctuation
797
798    chr(0x2e18) => '{\\textinterrobangdown}',     # INVERTED INTERROBANG
799    chr(0x2e3a) => '---{}---',                    # unnamed character
800    chr(0x2e3b) => '---{}---{}---',               # unnamed character
801
802);
803
804%provided_by = (
805
806    chr(0x0022) => 'textcomp',    # QUOTATION MARK
807    chr(0x003c) => 'textcomp',    # LESS-THAN SIGN
808    chr(0x003e) => 'textcomp',    # GREATER-THAN SIGN
809    chr(0x005c) => 'textcomp',    # REVERSE SOLIDUS
810    chr(0x007e) => 'textcomp',    # TILDE
811    chr(0x0e3f) => 'textcomp',    # THAI CURRENCY SYMBOL BAHT
812    chr(0x2016) => 'textcomp',    # DOUBLE VERTICAL LINE
813    chr(0x203b) => 'textcomp',    # REFERENCE MARK
814    chr(0x203d) => 'textcomp',    # INTERROBANG
815    chr(0x20a1) => 'textcomp',    # COLON SIGN
816    chr(0x20a4) => 'textcomp',    # LIRA SIGN
817    chr(0x20a6) => 'textcomp',    # NAIRA SIGN
818    chr(0x20a9) => 'textcomp',    # WON SIGN
819    chr(0x20ab) => 'textcomp',    # DONG SIGN
820    chr(0x2116) => 'textcomp',    # NUMERO SIGN
821    chr(0x2117) => 'textcomp',    # SOUND RECORDING COPYRIGHT
822    chr(0x211e) => 'textcomp',    # PRESCRIPTION TAKE
823    chr(0x2120) => 'textcomp',    # SERVICE MARK
824    chr(0x2126) => 'textcomp',    # OHM SIGN
825    chr(0x2127) => 'textcomp',    # INVERTED OHM SIGN
826    chr(0x212e) => 'textcomp',    # ESTIMATED SYMBOL
827    chr(0x263f) => 'marvosym',    # MERCURY
828    chr(0x2640) => 'marvosym',    # FEMALE SIGN
829    chr(0x2641) => 'marvosym',    # EARTH
830    chr(0x2642) => 'marvosym',    # MALE SIGN
831    chr(0x2643) => 'marvosym',    # JUPITER
832    chr(0x2644) => 'marvosym',    # SATURN
833    chr(0x2645) => 'marvosym',    # URANUS
834    chr(0x2646) => 'marvosym',    # NEPTUNE
835    chr(0x2647) => 'marvosym',    # PLUTO
836    chr(0x2648) => 'marvosym',    # ARIES
837    chr(0x2649) => 'marvosym',    # TAURUS
838    chr(0x264a) => 'marvosym',    # GEMINI
839    chr(0x264b) => 'marvosym',    # CANCER
840    chr(0x264c) => 'marvosym',    # LEO
841    chr(0x264d) => 'marvosym',    # VIRGO
842    chr(0x264e) => 'marvosym',    # LIBRA
843    chr(0x264f) => 'marvosym',    # SCORPIUS
844    chr(0x2650) => 'marvosym',    # SAGITTARIUS
845    chr(0x2651) => 'marvosym',    # CAPRICORN
846    chr(0x2652) => 'marvosym',    # AQUARIUS
847    chr(0x2653) => 'marvosym',    # PISCES
848    chr(0x26ad) => 'textcomp',    # MARRIAGE SYMBOL
849    chr(0x26ae) => 'textcomp',    # DIVORCE SYMBOL
850    chr(0x2e18) => 'textcomp',    # INVERTED INTERROBANG
851
852);
853
854reset_latex_encodings(1);
855
856sub _compile_encoding_regexp {
857    $encoded_char_re = join q{}, sort keys %latex_encoding;
858    $encoded_char_re =~ s{ ([#\[\]\\\$]) }{\\$1}gmsx;
859    $encoded_char_re = eval "qr{[$encoded_char_re]}x";
860    return;
861}
862
863_compile_encoding_regexp;
864
865
8661;
867
868__END__
869
870=encoding utf8
871
872=head1 NAME
873
874LaTeX::Encode - encode characters for LaTeX formatting
875
876=head1 SYNOPSIS
877
878  use LaTeX::Encode ':all', add => { '@' => 'AT' }, remove => [ '$' ];
879
880  $latex_string  = latex_encode($text, %options);
881
882  %old_encodings = add_latex_encodings( chr(0x2002) => '\\hspace{.6em}' );
883  %old_encodings = remove_latex_encodings( '<', '>' );
884
885  reset_latex_encodings(1);
886
887=head1 VERSION
888
889This manual page describes version 0.091.5 of the C<LaTeX::Encode> module.
890
891
892=head1 DESCRIPTION
893
894This module provides a function to encode text that is to be formatted
895with LaTeX.  It encodes characters that are special to LaTeX or that
896are represented in LaTeX by LaTeX text-mode commands.
897
898The special characters are: C<\> (command character), C<{> (open
899group), C<}> (end group), C<&> (table column separator), C<#>
900(parameter specifier), C<%> (comment character), C<_> (subscript),
901C<^> (superscript), C<~> (non-breakable space), C<$> (mathematics mode).
902
903Note that some of the LaTeX commands for characters are defined in the
904LaTeX C<textcomp> package.  If your text includes such characters, you
905will need to include the following lines in the preamble to your LaTeX
906document.
907
908    \usepackage[T1]{fontenc}
909    \usepackage{textcomp}
910
911The function is useful for encoding data that is interpolated into
912LaTeX document templates, say with C<Template::Plugin::Latex>
913(shameless plug!).
914
915=head1 WARNING ABOUT UTF-8 DATA
916
917Note that C<latex_encode()> will encode a UTF8 string (a string with the UTF8 flag set) or
918a non-UTF8 string, which will normally be regarded as ISO-8859-1 (Latin 1) and will be
919upgraded to UTF8.  The UTF8 flag indicates whether the contents of a string are regarded
920as a sequence of Unicode characters or as a string of bytes.  Refer to the L<Unicode
921Support in Perl|perlunicode>, L<Perl Unicode Introduction|perluniintro> and L<Perl Unicode
922Tutorial|perlunitut> manual pages for more details.
923
924If you are seeing spurious LaTeX commands in the output of C<latex_encode()> then it may
925be that you are reading from a UTF-8 input or have data with UTF-8 characters in a literal
926but the UTF8 flag is not being set correctly.  The fact that your programs are dealing
927with UTF-8 characters on a byte-by-byte basis may not be apparent normally as the terminal
928may make no distinction and happily display the byte sequence in the program's output as
929the UTF-8 characters they represent, however in a Perl program that deals with individual
930characters, what happens is that the individual bytes that make up multi-byte characters
931are regarded as separate characters; if the strings are promoted to UTF8 strings then the
932individual bytes are converted separately to UTF8.  This is termed double encoding.
933C<latex_encode()> will then map the double-encoded characters.
934
935If the input text is Western European text then what you are likely to see in the output
936from C<latex_encode()> is spurious sequences of C<{\^A}> or C<{\~A}> followed by the
937mapping of an apparently random character (or the right character if it is a symbol such
938as the Sterling POUND sign, i.e. "£" will map to C<{\^A}\textsterling>); this is because
939the initial byte of a two-byte UTF-8 character in the LATIN1 range will either be 0xC2 or
9400xC3 and the next byte will always have the top two bits set to C<10> to indicate that it
941is a continuation byte.
942
943
944=head1 SUBROUTINES/METHODS
945
946=over 4
947
948=item C<latex_encode($text, %options)>
949
950Encodes the specified text such that it is suitable for processing
951with LaTeX.  The behaviour of the filter is modified by the options:
952
953=over 4
954
955=item C<except>
956
957Lists the characters that should be excluded from encoding.  By
958default no special characters are excluded, but it may be useful to
959specify C<except = "\\{}"> to allow the input string to contain LaTeX
960commands such as C<"this is \\textbf{bold} text"> (the doubled
961backslashes in the strings represent Perl escapes, and will be
962evaluated to single backslashes).
963
964=item C<iquotes>
965
966If true then single or double quotes around words will be changed to
967LaTeX single or double quotes; double quotes around a phrase will be
968converted to "``" and "''" and single quotes to "`" and "'".  This is
969sometimes called "intelligent quotes"
970
971=item C<packages>
972
973If passed a reference to a hash C<latex_encode()> will update the hash with names of LaTeX
974packages that are required for typesetting the encoded string.
975
976=back
977
978
979=item C<add_latex_encodings(%encodings)>
980
981Adds a set of new or modified encodings.  Returns a hash of any encodings that were
982modified.
983
984
985=item C<remove_latex_encodings(@keys)>
986
987Removes a set of encodings.  Returns a hash of the removed encodings.
988
989
990=item C<reset_latex_encodings($forget_import_specifiers)>
991
992Resets the LaTeX encodings to the state that they were when the module was loaded
993(including any additions and removals specified on the 'use' statement), or to the
994standard set of encodings if C<$forget_import_specifiers> is true.
995
996
997=back
998
999
1000=head1 EXAMPLES
1001
1002The following snippet shows how data from a database can be encoded
1003and inserted into a LaTeX table, the source of which is generated with
1004C<LaTeX::Table>.
1005
1006    my $sth = $dbh->prepare('select col1, col2, col3 from table where $expr');
1007    $sth->execute;
1008    while (my $href = $sth->fetchrow_hashref) {
1009        my @row;
1010        foreach my $col (qw(col1 col2 col3)) {
1011            push(@row, latex_encode($href->{$col}));
1012        }
1013        push @data, \@row;
1014    }
1015
1016    my $headings = [ [ 'Col1', 'Col2', 'Col3' ] ];
1017
1018    my $table = LaTeX::Table->new( { caption => 'My caption',
1019                                     label   => 'table:caption',
1020                                     type    => 'xtab',
1021                                     header  => $header,
1022                                     data    => \@data } );
1023
1024    my $table_text = $table->generate_string;
1025
1026Now C<$table_text> can be interpolated into a LaTeX document template.
1027
1028
1029=head1 DIAGNOSTICS
1030
1031None.  You could probably break the C<latex_encode> function by
1032passing it an array reference as the options, but there are no checks
1033for that.
1034
1035=head1 CONFIGURATION AND ENVIRONMENT
1036
1037Not applicable.
1038
1039
1040=head1 DEPENDENCIES
1041
1042The C<HTML::Entities> and C<Pod::LaTeX> modules were used for building
1043the encoding table but this is not
1044rebuilt at installation time.  The C<LaTeX::Driver> module is used for
1045formatting the character encodings reference document.
1046
1047=head1 INCOMPATIBILITIES
1048
1049None known.
1050
1051=head1 BUGS AND LIMITATIONS
1052
1053Not all LaTeX special characters are included in the encoding tables
1054(more may be added when I track down the definitions).
1055
1056
1057
1058=head1 AUTHOR
1059
1060Andrew Ford E<lt>a.ford@ford-mason.co.ukE<gt>
1061
1062=head1 LICENSE AND COPYRIGHT
1063
1064Copyright (C) 2007-2012 Andrew Ford.  All Rights Reserved.
1065
1066This module is free software; you can redistribute it and/or
1067modify it under the same terms as Perl itself.
1068
1069This software is distributed in the hope that it will be useful, but
1070WITHOUT ANY WARRANTY; without even the implied warranty of
1071MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
1072
1073=head1 SEE ALSO
1074
1075L<Template::Plugin::Latex>
1076
1077L<Unicode Support in Perl|perlunicode>
1078
1079L<Perl Unicode Introduction|perluniintro>
1080
1081L<Perl Unicode Tutorial|perlunitut>
1082
1083=cut
1084
1085# Local Variables:
1086# mode: perl
1087# perl-indent-level: 4
1088# indent-tabs-mode: nil
1089# End:
1090#
1091# vim: expandtab shiftwidth=4:
1092