1 /* catdvi - get text from DVI files
2 Copyright (C) 2000, 2002 Bjoern Brill <brill@fs.math.uni-frankfurt.de>
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18
19 #include "glyphops.h"
20 #include "sparse.h"
21 #include "glyphenm.h"
22
23 #define EOT 0x04 /* end-of-table mark */
24
25
26 /* known formatting hints */
27 struct glyph_hintentry_t {
28 glyph_t glyph;
29 enum glyph_hint_t hint;
30 };
31
32 static struct glyph_hintentry_t hints[] = {
33 {GLYPH_asciicircum, GH_DIACRITIC},
34 {GLYPH_grave, GH_DIACRITIC},
35 {GLYPH_asciitilde, GH_DIACRITIC},
36
37 {GLYPH_dieresis, GH_DIACRITIC},
38 {GLYPH_UNI_macron, GH_DIACRITIC},
39 {GLYPH_acute, GH_DIACRITIC},
40 {GLYPH_cedilla, GH_DIACRITIC},
41
42 {GLYPH_circumflex, GH_DIACRITIC},
43 {GLYPH_caron, GH_DIACRITIC},
44 {GLYPH_UNI_macronmodifier, GH_DIACRITIC},
45 {GLYPH_UNI_acutemodifier, GH_DIACRITIC},
46 {GLYPH_UNI_gravemodifier, GH_DIACRITIC},
47 {GLYPH_breve, GH_DIACRITIC},
48 {GLYPH_dotaccent, GH_DIACRITIC},
49 {GLYPH_ring, GH_DIACRITIC},
50 {GLYPH_ogonek, GH_DIACRITIC},
51 {GLYPH_tilde, GH_DIACRITIC},
52 {GLYPH_CATDVI_polishstroke, GH_DIACRITIC},
53
54 {GLYPH_CATDVI_negationslash, GH_DIACRITIC},
55 {GLYPH_UNI_circlelarge, GH_DIACRITIC},
56 {GLYPH_CATDVI_vector, GH_DIACRITIC},
57
58 {GLYPH_gravecomb, GH_COMBINING_DIACRITIC},
59 {GLYPH_acutecomb, GH_COMBINING_DIACRITIC},
60 {GLYPH_UNI_circumflexcomb, GH_COMBINING_DIACRITIC},
61 {GLYPH_tildecomb, GH_COMBINING_DIACRITIC},
62 {GLYPH_UNI_macroncomb, GH_COMBINING_DIACRITIC},
63 {GLYPH_UNI_overlinecomb, GH_COMBINING_DIACRITIC},
64 {GLYPH_UNI_brevecomb, GH_COMBINING_DIACRITIC},
65 {GLYPH_UNI_dotaccentcomb, GH_COMBINING_DIACRITIC},
66 {GLYPH_UNI_dieresiscomb, GH_COMBINING_DIACRITIC},
67 {GLYPH_hookabovecomb, GH_COMBINING_DIACRITIC},
68 {GLYPH_UNI_ringcomb, GH_COMBINING_DIACRITIC},
69 {GLYPH_UNI_caroncomb, GH_COMBINING_DIACRITIC},
70 {GLYPH_UNI_cedillacomb, GH_COMBINING_DIACRITIC},
71 {GLYPH_UNI_ogonekcomb, GH_COMBINING_DIACRITIC},
72 {GLYPH_CATDVI_polishstrokecomb, GH_COMBINING_DIACRITIC},
73
74 {GLYPH_UNI_slashlongcomb, GH_COMBINING_DIACRITIC},
75 {GLYPH_UNI_circlecomb, GH_COMBINING_DIACRITIC},
76 {GLYPH_UNI_vectorcomb, GH_COMBINING_DIACRITIC},
77
78 {GLYPH_ADOBE_periodsuperior, GH_DIACRITIC},
79
80 {GLYPH_CATDVI_Eurodblstroke, GH_DIACRITIC},
81 {GLYPH_CATDVI_Eurodblstrokecomb, GH_COMBINING_DIACRITIC},
82
83 {GLYPH_radical, GH_RADICAL},
84 /* Is in the math symbols font but used just like the big
85 * variants in the math extension font.
86 */
87
88 /* The TeX math extension stuff */
89 {GLYPH_CATDVI_parenleftbig, GH_ON_AXIS},
90 {GLYPH_CATDVI_parenrightbig, GH_ON_AXIS | GH_MOREMATH_LEFT},
91 {GLYPH_CATDVI_bracketleftbig, GH_ON_AXIS},
92 {GLYPH_CATDVI_bracketrightbig, GH_ON_AXIS | GH_MOREMATH_LEFT},
93 {GLYPH_CATDVI_floorleftbig, GH_ON_AXIS},
94 {GLYPH_CATDVI_floorrightbig, GH_ON_AXIS | GH_MOREMATH_LEFT},
95 {GLYPH_CATDVI_ceilingleftbig, GH_ON_AXIS},
96 {GLYPH_CATDVI_ceilingrightbig, GH_ON_AXIS | GH_MOREMATH_LEFT},
97 {GLYPH_CATDVI_braceleftbig, GH_ON_AXIS},
98 {GLYPH_CATDVI_bracerightbig, GH_ON_AXIS | GH_MOREMATH_LEFT},
99 {GLYPH_CATDVI_angbracketleftbig, GH_ON_AXIS},
100 {GLYPH_CATDVI_angbracketrightbig, GH_ON_AXIS | GH_MOREMATH_LEFT},
101 {GLYPH_CATDVI_vextendsingle, GH_EXTENSIBLE_RECIPE},
102 {GLYPH_CATDVI_vextenddouble, GH_EXTENSIBLE_RECIPE},
103 {GLYPH_CATDVI_slashbig, GH_ON_AXIS | GH_MOREMATH_LEFT},
104 {GLYPH_CATDVI_backslashbig, GH_ON_AXIS},
105 {GLYPH_CATDVI_parenleftBig, GH_ON_AXIS},
106 {GLYPH_CATDVI_parenrightBig, GH_ON_AXIS | GH_MOREMATH_LEFT},
107 {GLYPH_CATDVI_parenleftbigg, GH_ON_AXIS},
108 {GLYPH_CATDVI_parenrightbigg, GH_ON_AXIS | GH_MOREMATH_LEFT},
109 {GLYPH_CATDVI_bracketleftbigg, GH_ON_AXIS},
110 {GLYPH_CATDVI_bracketrightbigg, GH_ON_AXIS | GH_MOREMATH_LEFT},
111 {GLYPH_CATDVI_floorleftbigg, GH_ON_AXIS},
112 {GLYPH_CATDVI_floorrightbigg, GH_ON_AXIS | GH_MOREMATH_LEFT},
113 {GLYPH_CATDVI_ceilingleftbigg, GH_ON_AXIS},
114 {GLYPH_CATDVI_ceilingrightbigg, GH_ON_AXIS | GH_MOREMATH_LEFT},
115 {GLYPH_CATDVI_braceleftbigg, GH_ON_AXIS},
116 {GLYPH_CATDVI_bracerightbigg, GH_ON_AXIS | GH_MOREMATH_LEFT},
117 {GLYPH_CATDVI_angbracketleftbigg, GH_ON_AXIS},
118 {GLYPH_CATDVI_angbracketrightbigg, GH_ON_AXIS | GH_MOREMATH_LEFT},
119 {GLYPH_CATDVI_slashbigg, GH_ON_AXIS | GH_MOREMATH_LEFT},
120 {GLYPH_CATDVI_backslashbigg, GH_ON_AXIS},
121 {GLYPH_CATDVI_parenleftBigg, GH_ON_AXIS},
122 {GLYPH_CATDVI_parenrightBigg, GH_ON_AXIS | GH_MOREMATH_LEFT},
123 {GLYPH_CATDVI_bracketleftBigg, GH_ON_AXIS},
124 {GLYPH_CATDVI_bracketrightBigg, GH_ON_AXIS | GH_MOREMATH_LEFT},
125 {GLYPH_CATDVI_floorleftBigg, GH_ON_AXIS},
126 {GLYPH_CATDVI_floorrightBigg, GH_ON_AXIS | GH_MOREMATH_LEFT},
127 {GLYPH_CATDVI_ceilingleftBigg, GH_ON_AXIS},
128 {GLYPH_CATDVI_ceilingrightBigg, GH_ON_AXIS | GH_MOREMATH_LEFT},
129 {GLYPH_CATDVI_braceleftBigg, GH_ON_AXIS},
130 {GLYPH_CATDVI_bracerightBigg, GH_ON_AXIS | GH_MOREMATH_LEFT},
131 {GLYPH_CATDVI_angbracketleftBigg, GH_ON_AXIS},
132 {GLYPH_CATDVI_angbracketrightBigg, GH_ON_AXIS | GH_MOREMATH_LEFT},
133 {GLYPH_CATDVI_slashBigg, GH_ON_AXIS | GH_MOREMATH_LEFT},
134 {GLYPH_CATDVI_backslashBigg, GH_ON_AXIS},
135 {GLYPH_CATDVI_slashBig, GH_ON_AXIS | GH_MOREMATH_LEFT},
136 {GLYPH_CATDVI_backslashBig, GH_ON_AXIS},
137 {GLYPH_ADOBE_parenlefttp, GH_EXTENSIBLE_RECIPE},
138 {GLYPH_ADOBE_parenrighttp, GH_EXTENSIBLE_RECIPE | GH_MOREMATH_LEFT},
139 {GLYPH_ADOBE_bracketlefttp, GH_EXTENSIBLE_RECIPE},
140 {GLYPH_ADOBE_bracketrighttp, GH_EXTENSIBLE_RECIPE | GH_MOREMATH_LEFT},
141 {GLYPH_ADOBE_bracketleftbt, GH_EXTENSIBLE_RECIPE},
142 {GLYPH_ADOBE_bracketrightbt, GH_EXTENSIBLE_RECIPE | GH_MOREMATH_LEFT},
143 {GLYPH_ADOBE_bracketleftex, GH_EXTENSIBLE_RECIPE},
144 {GLYPH_ADOBE_bracketrightex, GH_EXTENSIBLE_RECIPE | GH_MOREMATH_LEFT},
145 {GLYPH_ADOBE_bracelefttp, GH_EXTENSIBLE_RECIPE},
146 {GLYPH_ADOBE_bracerighttp, GH_EXTENSIBLE_RECIPE | GH_MOREMATH_LEFT},
147 {GLYPH_ADOBE_braceleftbt, GH_EXTENSIBLE_RECIPE},
148 {GLYPH_ADOBE_bracerightbt, GH_EXTENSIBLE_RECIPE | GH_MOREMATH_LEFT},
149 {GLYPH_ADOBE_braceleftmid, GH_EXTENSIBLE_RECIPE},
150 {GLYPH_ADOBE_bracerightmid, GH_EXTENSIBLE_RECIPE | GH_MOREMATH_LEFT},
151 {GLYPH_ADOBE_braceex, GH_EXTENSIBLE_RECIPE},
152 {GLYPH_ADOBE_arrowvertex, GH_EXTENSIBLE_RECIPE},
153 {GLYPH_ADOBE_parenleftbt, GH_EXTENSIBLE_RECIPE},
154 {GLYPH_ADOBE_parenrightbt, GH_EXTENSIBLE_RECIPE | GH_MOREMATH_LEFT},
155 {GLYPH_ADOBE_parenleftex, GH_EXTENSIBLE_RECIPE},
156 {GLYPH_ADOBE_parenrightex, GH_EXTENSIBLE_RECIPE | GH_MOREMATH_LEFT},
157 {GLYPH_CATDVI_angbracketleftBig, GH_ON_AXIS},
158 {GLYPH_CATDVI_angbracketrightBig, GH_ON_AXIS | GH_MOREMATH_LEFT},
159 {GLYPH_CATDVI_unionsqtext, GH_ON_AXIS},
160 {GLYPH_CATDVI_unionsqdisplay, GH_ON_AXIS},
161 {GLYPH_CATDVI_contintegraltext, GH_ON_AXIS},
162 {GLYPH_CATDVI_contintegraldisplay, GH_ON_AXIS},
163 {GLYPH_CATDVI_circledottext, GH_ON_AXIS},
164 {GLYPH_CATDVI_circledotdisplay, GH_ON_AXIS},
165 {GLYPH_CATDVI_circleplustext, GH_ON_AXIS},
166 {GLYPH_CATDVI_circleplusdisplay, GH_ON_AXIS},
167 {GLYPH_CATDVI_circlemultiplytext, GH_ON_AXIS},
168 {GLYPH_CATDVI_circlemultiplydisplay, GH_ON_AXIS},
169 {GLYPH_CATDVI_summationtext, GH_ON_AXIS},
170 {GLYPH_CATDVI_producttext, GH_ON_AXIS},
171 {GLYPH_CATDVI_integraltext, GH_ON_AXIS},
172 {GLYPH_CATDVI_uniontext, GH_ON_AXIS},
173 {GLYPH_CATDVI_intersectiontext, GH_ON_AXIS},
174 {GLYPH_CATDVI_unionmultitext, GH_ON_AXIS},
175 {GLYPH_CATDVI_logicalandtext, GH_ON_AXIS},
176 {GLYPH_CATDVI_logicalortext, GH_ON_AXIS},
177 {GLYPH_CATDVI_summationdisplay, GH_ON_AXIS},
178 {GLYPH_CATDVI_productdisplay, GH_ON_AXIS},
179 {GLYPH_CATDVI_integraldisplay, GH_ON_AXIS},
180 {GLYPH_CATDVI_uniondisplay, GH_ON_AXIS},
181 {GLYPH_CATDVI_intersectiondisplay, GH_ON_AXIS},
182 {GLYPH_CATDVI_unionmultidisplay, GH_ON_AXIS},
183 {GLYPH_CATDVI_logicalanddisplay, GH_ON_AXIS},
184 {GLYPH_CATDVI_logicalordisplay, GH_ON_AXIS},
185 {GLYPH_CATDVI_coproducttext, GH_ON_AXIS},
186 {GLYPH_CATDVI_coproductdisplay, GH_ON_AXIS},
187 {GLYPH_CATDVI_hatwide, GH_WIDE_DIACRITIC},
188 {GLYPH_CATDVI_hatwider, GH_WIDE_DIACRITIC},
189 {GLYPH_CATDVI_hatwidest, GH_WIDE_DIACRITIC},
190 {GLYPH_CATDVI_tildewide, GH_WIDE_DIACRITIC},
191 {GLYPH_CATDVI_tildewider, GH_WIDE_DIACRITIC},
192 {GLYPH_CATDVI_tildewidest, GH_WIDE_DIACRITIC},
193 {GLYPH_CATDVI_bracketleftBig, GH_ON_AXIS},
194 {GLYPH_CATDVI_bracketrightBig, GH_ON_AXIS | GH_MOREMATH_LEFT},
195 {GLYPH_CATDVI_floorleftBig, GH_ON_AXIS},
196 {GLYPH_CATDVI_floorrightBig, GH_ON_AXIS | GH_MOREMATH_LEFT},
197 {GLYPH_CATDVI_ceilingleftBig, GH_ON_AXIS},
198 {GLYPH_CATDVI_ceilingrightBig, GH_ON_AXIS | GH_MOREMATH_LEFT},
199 {GLYPH_CATDVI_braceleftBig, GH_ON_AXIS},
200 {GLYPH_CATDVI_bracerightBig, GH_ON_AXIS | GH_MOREMATH_LEFT},
201 {GLYPH_CATDVI_radicalbig, GH_RADICAL},
202 {GLYPH_CATDVI_radicalBig, GH_RADICAL},
203 {GLYPH_CATDVI_radicalbigg, GH_RADICAL},
204 {GLYPH_CATDVI_radicalBigg, GH_RADICAL},
205 {GLYPH_CATDVI_radicalbt, GH_EXTENSIBLE_RECIPE},
206 {GLYPH_CATDVI_radicalvertex, GH_EXTENSIBLE_RECIPE},
207 {GLYPH_CATDVI_radicaltp, GH_EXTENSIBLE_RECIPE},
208 {GLYPH_CATDVI_arrowvertexdbl, GH_EXTENSIBLE_RECIPE},
209 {GLYPH_CATDVI_arrowtp, GH_EXTENSIBLE_RECIPE},
210 {GLYPH_CATDVI_arrowbt, GH_EXTENSIBLE_RECIPE},
211 {GLYPH_CATDVI_bracehtipdownright, GH_MOREMATH_LEFT},
212 {GLYPH_CATDVI_bracehtipupright, GH_MOREMATH_LEFT},
213 {GLYPH_CATDVI_arrowdbltp, GH_EXTENSIBLE_RECIPE},
214 {GLYPH_CATDVI_arrowdblbt, GH_EXTENSIBLE_RECIPE},
215 /* end TeX math extension */
216
217 {GLYPH_CATDVI_tildexwide, GH_WIDE_DIACRITIC},
218 {GLYPH_CATDVI_hatxwide, GH_WIDE_DIACRITIC},
219
220 /* end of table */
221 {0, 0}
222 };
223
224 static struct spars32_t hint_spars32;
225
226
227 /* known mappings between combining and non-combining diacritics.
228 * Incomplete. Use 0 for unavailable or unknown variants.
229 *
230 * variant3 and variant4 are for diacritics that are doubled in the
231 * unicode character set (doubled at least for our purposes and IMHO -
232 * if there is some underlying philosophy, I just can't see it) and
233 * should be mapped to something canonical.
234 *
235 * we try to keep the spacing variant in the ISO 8859-1 range if possible.
236 */
237 struct diacritic_variantentry_t {
238 glyph_t spacing;
239 glyph_t combining;
240 glyph_t variant3;
241 glyph_t variant4;
242 };
243
244 static struct diacritic_variantentry_t diavars[] =
245 {
246 {GLYPH_acute, GLYPH_acutecomb, GLYPH_UNI_acutemodifier, 0},
247 {GLYPH_grave, GLYPH_gravecomb, GLYPH_UNI_gravemodifier, 0},
248 {GLYPH_asciicircum, GLYPH_UNI_circumflexcomb, GLYPH_circumflex, 0},
249 {GLYPH_dieresis, GLYPH_UNI_dieresiscomb, 0, 0},
250 {GLYPH_tilde, GLYPH_tildecomb, GLYPH_asciitilde, 0},
251 {GLYPH_ring, GLYPH_UNI_ringcomb, 0, 0},
252
253 {GLYPH_cedilla, GLYPH_UNI_cedillacomb, 0, 0},
254
255 {GLYPH_caron, GLYPH_UNI_caroncomb, 0, 0},
256 {GLYPH_UNI_macron, GLYPH_UNI_macroncomb, GLYPH_UNI_macronmodifier, 0},
257 {GLYPH_breve, GLYPH_UNI_brevecomb, 0, 0},
258 {GLYPH_dotaccent, GLYPH_UNI_dotaccentcomb, GLYPH_ADOBE_periodsuperior, 0},
259
260 {GLYPH_ogonek, GLYPH_UNI_ogonekcomb, 0, 0},
261 {GLYPH_CATDVI_polishstroke, GLYPH_CATDVI_polishstrokecomb, 0, 0},
262
263 {GLYPH_CATDVI_negationslash, GLYPH_UNI_slashlongcomb, 0, 0},
264 {GLYPH_UNI_circlelarge, GLYPH_UNI_circlecomb, 0, 0},
265 {GLYPH_CATDVI_vector, GLYPH_UNI_vectorcomb, 0, 0},
266
267 {GLYPH_CATDVI_Eurodblstroke, GLYPH_CATDVI_Eurodblstrokecomb, 0, 0},
268
269 /* end of table */
270 {EOT, 0, 0, 0}
271 };
272
273 static struct sparp_t diavar_sparp;
274
glyphops_init()275 void glyphops_init()
276 {
277 struct glyph_hintentry_t * ph;
278 struct diacritic_variantentry_t * pd;
279
280 /* The glyph hint sparp is indexed by the glyph and points directly to
281 * the hint.
282 */
283 spars32_init(&hint_spars32, 0);
284 for(ph = hints; ph->glyph != 0; ++ph) {
285 spars32_write(&hint_spars32, ph->glyph, ph->hint);
286 }
287
288 /* The diacritics variant sparp is indexed by any variant of the diacritic.
289 * The values point to the corresponding dicaritic_variantentry_t .
290 */
291 sparp_init(&diavar_sparp);
292 for(pd = diavars; pd->spacing != EOT; ++pd) {
293 if(pd->spacing != 0) sparp_write(&diavar_sparp, pd->spacing, pd);
294 if(pd->combining != 0) sparp_write(&diavar_sparp, pd->combining, pd);
295 if(pd->variant3 != 0) sparp_write(&diavar_sparp, pd->variant3, pd);
296 if(pd->variant4 != 0) sparp_write(&diavar_sparp, pd->variant4, pd);
297 }
298
299 }
300
glyph_get_hint(glyph_t glyph)301 enum glyph_hint_t glyph_get_hint(glyph_t glyph)
302 {
303 return (enum glyph_hint_t) spars32_read(&hint_spars32, glyph);
304 }
305
diacritic_combining_variant(glyph_t diacritic)306 glyph_t diacritic_combining_variant(glyph_t diacritic)
307 {
308 struct diacritic_variantentry_t * p;
309
310 p = sparp_read(&diavar_sparp, diacritic);
311 return (p != NULL) ? p->combining : 0;
312 }
313
diacritic_spacing_variant(glyph_t diacritic)314 glyph_t diacritic_spacing_variant(glyph_t diacritic)
315 {
316 struct diacritic_variantentry_t * p;
317
318 p = sparp_read(&diavar_sparp, diacritic);
319 return (p != NULL) ? p->spacing : 0;
320 }
321