1 /* catdvi - get text from DVI files
2    Copyright (C) 2000, 2002 Bjoern Brill <brill@fs.math.uni-frankfurt.de>
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; either version 2 of the License, or
7    (at your option) any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program; if not, write to the Free Software
16    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18 
19 #include "glyphops.h"
20 #include "sparse.h"
21 #include "glyphenm.h"
22 
23 #define EOT 0x04    /* end-of-table mark */
24 
25 
26 /* known formatting hints */
27 struct glyph_hintentry_t {
28     glyph_t glyph;
29     enum glyph_hint_t hint;
30 };
31 
32 static struct glyph_hintentry_t hints[] = {
33     {GLYPH_asciicircum,    	    GH_DIACRITIC},
34     {GLYPH_grave,	    	    GH_DIACRITIC},
35     {GLYPH_asciitilde, 	    	    GH_DIACRITIC},
36 
37     {GLYPH_dieresis,     	    GH_DIACRITIC},
38     {GLYPH_UNI_macron,	    	    GH_DIACRITIC},
39     {GLYPH_acute, 	    	    GH_DIACRITIC},
40     {GLYPH_cedilla,	    	    GH_DIACRITIC},
41 
42     {GLYPH_circumflex,	    	    GH_DIACRITIC},
43     {GLYPH_caron, 	    	    GH_DIACRITIC},
44     {GLYPH_UNI_macronmodifier,	    GH_DIACRITIC},
45     {GLYPH_UNI_acutemodifier,	    GH_DIACRITIC},
46     {GLYPH_UNI_gravemodifier,	    GH_DIACRITIC},
47     {GLYPH_breve,   	    	    GH_DIACRITIC},
48     {GLYPH_dotaccent,	    	    GH_DIACRITIC},
49     {GLYPH_ring,    	    	    GH_DIACRITIC},
50     {GLYPH_ogonek,	    	    GH_DIACRITIC},
51     {GLYPH_tilde,     	    	    GH_DIACRITIC},
52     {GLYPH_CATDVI_polishstroke,     GH_DIACRITIC},
53 
54     {GLYPH_CATDVI_negationslash,    GH_DIACRITIC},
55     {GLYPH_UNI_circlelarge, 	    GH_DIACRITIC},
56     {GLYPH_CATDVI_vector,   	    GH_DIACRITIC},
57 
58     {GLYPH_gravecomb,	    	    GH_COMBINING_DIACRITIC},
59     {GLYPH_acutecomb,	    	    GH_COMBINING_DIACRITIC},
60     {GLYPH_UNI_circumflexcomb,	    GH_COMBINING_DIACRITIC},
61     {GLYPH_tildecomb, 	    	    GH_COMBINING_DIACRITIC},
62     {GLYPH_UNI_macroncomb,          GH_COMBINING_DIACRITIC},
63     {GLYPH_UNI_overlinecomb,        GH_COMBINING_DIACRITIC},
64     {GLYPH_UNI_brevecomb, 	    GH_COMBINING_DIACRITIC},
65     {GLYPH_UNI_dotaccentcomb,       GH_COMBINING_DIACRITIC},
66     {GLYPH_UNI_dieresiscomb,   	    GH_COMBINING_DIACRITIC},
67     {GLYPH_hookabovecomb,  	    GH_COMBINING_DIACRITIC},
68     {GLYPH_UNI_ringcomb,    	    GH_COMBINING_DIACRITIC},
69     {GLYPH_UNI_caroncomb,   	    GH_COMBINING_DIACRITIC},
70     {GLYPH_UNI_cedillacomb,  	    GH_COMBINING_DIACRITIC},
71     {GLYPH_UNI_ogonekcomb,  	    GH_COMBINING_DIACRITIC},
72     {GLYPH_CATDVI_polishstrokecomb, GH_COMBINING_DIACRITIC},
73 
74     {GLYPH_UNI_slashlongcomb,	    GH_COMBINING_DIACRITIC},
75     {GLYPH_UNI_circlecomb,  	    GH_COMBINING_DIACRITIC},
76     {GLYPH_UNI_vectorcomb,  	    GH_COMBINING_DIACRITIC},
77 
78     {GLYPH_ADOBE_periodsuperior,    GH_DIACRITIC},
79 
80     {GLYPH_CATDVI_Eurodblstroke,    GH_DIACRITIC},
81     {GLYPH_CATDVI_Eurodblstrokecomb,    GH_COMBINING_DIACRITIC},
82 
83     {GLYPH_radical,	GH_RADICAL},
84     	/* Is in the math symbols font but used just like the big
85 	 * variants in the math extension font.
86 	 */
87 
88     /* The TeX math extension stuff */
89     {GLYPH_CATDVI_parenleftbig,	GH_ON_AXIS},
90     {GLYPH_CATDVI_parenrightbig,	GH_ON_AXIS | GH_MOREMATH_LEFT},
91     {GLYPH_CATDVI_bracketleftbig,	GH_ON_AXIS},
92     {GLYPH_CATDVI_bracketrightbig,	GH_ON_AXIS | GH_MOREMATH_LEFT},
93     {GLYPH_CATDVI_floorleftbig,	GH_ON_AXIS},
94     {GLYPH_CATDVI_floorrightbig,	GH_ON_AXIS | GH_MOREMATH_LEFT},
95     {GLYPH_CATDVI_ceilingleftbig,	GH_ON_AXIS},
96     {GLYPH_CATDVI_ceilingrightbig,	GH_ON_AXIS | GH_MOREMATH_LEFT},
97     {GLYPH_CATDVI_braceleftbig,	GH_ON_AXIS},
98     {GLYPH_CATDVI_bracerightbig,	GH_ON_AXIS | GH_MOREMATH_LEFT},
99     {GLYPH_CATDVI_angbracketleftbig,	GH_ON_AXIS},
100     {GLYPH_CATDVI_angbracketrightbig,	GH_ON_AXIS | GH_MOREMATH_LEFT},
101     {GLYPH_CATDVI_vextendsingle,	GH_EXTENSIBLE_RECIPE},
102     {GLYPH_CATDVI_vextenddouble,	GH_EXTENSIBLE_RECIPE},
103     {GLYPH_CATDVI_slashbig,	GH_ON_AXIS | GH_MOREMATH_LEFT},
104     {GLYPH_CATDVI_backslashbig,	GH_ON_AXIS},
105     {GLYPH_CATDVI_parenleftBig,	GH_ON_AXIS},
106     {GLYPH_CATDVI_parenrightBig,	GH_ON_AXIS | GH_MOREMATH_LEFT},
107     {GLYPH_CATDVI_parenleftbigg,	GH_ON_AXIS},
108     {GLYPH_CATDVI_parenrightbigg,	GH_ON_AXIS | GH_MOREMATH_LEFT},
109     {GLYPH_CATDVI_bracketleftbigg,	GH_ON_AXIS},
110     {GLYPH_CATDVI_bracketrightbigg,	GH_ON_AXIS | GH_MOREMATH_LEFT},
111     {GLYPH_CATDVI_floorleftbigg,	GH_ON_AXIS},
112     {GLYPH_CATDVI_floorrightbigg,	GH_ON_AXIS | GH_MOREMATH_LEFT},
113     {GLYPH_CATDVI_ceilingleftbigg,	GH_ON_AXIS},
114     {GLYPH_CATDVI_ceilingrightbigg,	GH_ON_AXIS | GH_MOREMATH_LEFT},
115     {GLYPH_CATDVI_braceleftbigg,	GH_ON_AXIS},
116     {GLYPH_CATDVI_bracerightbigg,	GH_ON_AXIS | GH_MOREMATH_LEFT},
117     {GLYPH_CATDVI_angbracketleftbigg,	GH_ON_AXIS},
118     {GLYPH_CATDVI_angbracketrightbigg,	GH_ON_AXIS | GH_MOREMATH_LEFT},
119     {GLYPH_CATDVI_slashbigg,	GH_ON_AXIS | GH_MOREMATH_LEFT},
120     {GLYPH_CATDVI_backslashbigg,	GH_ON_AXIS},
121     {GLYPH_CATDVI_parenleftBigg,	GH_ON_AXIS},
122     {GLYPH_CATDVI_parenrightBigg,	GH_ON_AXIS | GH_MOREMATH_LEFT},
123     {GLYPH_CATDVI_bracketleftBigg,	GH_ON_AXIS},
124     {GLYPH_CATDVI_bracketrightBigg,	GH_ON_AXIS | GH_MOREMATH_LEFT},
125     {GLYPH_CATDVI_floorleftBigg,	GH_ON_AXIS},
126     {GLYPH_CATDVI_floorrightBigg,	GH_ON_AXIS | GH_MOREMATH_LEFT},
127     {GLYPH_CATDVI_ceilingleftBigg,	GH_ON_AXIS},
128     {GLYPH_CATDVI_ceilingrightBigg,	GH_ON_AXIS | GH_MOREMATH_LEFT},
129     {GLYPH_CATDVI_braceleftBigg,	GH_ON_AXIS},
130     {GLYPH_CATDVI_bracerightBigg,	GH_ON_AXIS | GH_MOREMATH_LEFT},
131     {GLYPH_CATDVI_angbracketleftBigg,	GH_ON_AXIS},
132     {GLYPH_CATDVI_angbracketrightBigg,	GH_ON_AXIS | GH_MOREMATH_LEFT},
133     {GLYPH_CATDVI_slashBigg,	GH_ON_AXIS | GH_MOREMATH_LEFT},
134     {GLYPH_CATDVI_backslashBigg,	GH_ON_AXIS},
135     {GLYPH_CATDVI_slashBig,	GH_ON_AXIS | GH_MOREMATH_LEFT},
136     {GLYPH_CATDVI_backslashBig,	GH_ON_AXIS},
137     {GLYPH_ADOBE_parenlefttp,	GH_EXTENSIBLE_RECIPE},
138     {GLYPH_ADOBE_parenrighttp,	GH_EXTENSIBLE_RECIPE | GH_MOREMATH_LEFT},
139     {GLYPH_ADOBE_bracketlefttp,	GH_EXTENSIBLE_RECIPE},
140     {GLYPH_ADOBE_bracketrighttp,	GH_EXTENSIBLE_RECIPE | GH_MOREMATH_LEFT},
141     {GLYPH_ADOBE_bracketleftbt,	GH_EXTENSIBLE_RECIPE},
142     {GLYPH_ADOBE_bracketrightbt,	GH_EXTENSIBLE_RECIPE | GH_MOREMATH_LEFT},
143     {GLYPH_ADOBE_bracketleftex,	GH_EXTENSIBLE_RECIPE},
144     {GLYPH_ADOBE_bracketrightex,	GH_EXTENSIBLE_RECIPE | GH_MOREMATH_LEFT},
145     {GLYPH_ADOBE_bracelefttp,	GH_EXTENSIBLE_RECIPE},
146     {GLYPH_ADOBE_bracerighttp,	GH_EXTENSIBLE_RECIPE | GH_MOREMATH_LEFT},
147     {GLYPH_ADOBE_braceleftbt,	GH_EXTENSIBLE_RECIPE},
148     {GLYPH_ADOBE_bracerightbt,	GH_EXTENSIBLE_RECIPE | GH_MOREMATH_LEFT},
149     {GLYPH_ADOBE_braceleftmid,	GH_EXTENSIBLE_RECIPE},
150     {GLYPH_ADOBE_bracerightmid,	GH_EXTENSIBLE_RECIPE | GH_MOREMATH_LEFT},
151     {GLYPH_ADOBE_braceex,	GH_EXTENSIBLE_RECIPE},
152     {GLYPH_ADOBE_arrowvertex,	GH_EXTENSIBLE_RECIPE},
153     {GLYPH_ADOBE_parenleftbt,	GH_EXTENSIBLE_RECIPE},
154     {GLYPH_ADOBE_parenrightbt,	GH_EXTENSIBLE_RECIPE | GH_MOREMATH_LEFT},
155     {GLYPH_ADOBE_parenleftex,	GH_EXTENSIBLE_RECIPE},
156     {GLYPH_ADOBE_parenrightex,	GH_EXTENSIBLE_RECIPE | GH_MOREMATH_LEFT},
157     {GLYPH_CATDVI_angbracketleftBig,	GH_ON_AXIS},
158     {GLYPH_CATDVI_angbracketrightBig,	GH_ON_AXIS | GH_MOREMATH_LEFT},
159     {GLYPH_CATDVI_unionsqtext,	GH_ON_AXIS},
160     {GLYPH_CATDVI_unionsqdisplay,	GH_ON_AXIS},
161     {GLYPH_CATDVI_contintegraltext,	GH_ON_AXIS},
162     {GLYPH_CATDVI_contintegraldisplay,	GH_ON_AXIS},
163     {GLYPH_CATDVI_circledottext,	GH_ON_AXIS},
164     {GLYPH_CATDVI_circledotdisplay,	GH_ON_AXIS},
165     {GLYPH_CATDVI_circleplustext,	GH_ON_AXIS},
166     {GLYPH_CATDVI_circleplusdisplay,	GH_ON_AXIS},
167     {GLYPH_CATDVI_circlemultiplytext,	GH_ON_AXIS},
168     {GLYPH_CATDVI_circlemultiplydisplay,	GH_ON_AXIS},
169     {GLYPH_CATDVI_summationtext,	GH_ON_AXIS},
170     {GLYPH_CATDVI_producttext,	GH_ON_AXIS},
171     {GLYPH_CATDVI_integraltext,	GH_ON_AXIS},
172     {GLYPH_CATDVI_uniontext,	GH_ON_AXIS},
173     {GLYPH_CATDVI_intersectiontext,	GH_ON_AXIS},
174     {GLYPH_CATDVI_unionmultitext,	GH_ON_AXIS},
175     {GLYPH_CATDVI_logicalandtext,	GH_ON_AXIS},
176     {GLYPH_CATDVI_logicalortext,	GH_ON_AXIS},
177     {GLYPH_CATDVI_summationdisplay,	GH_ON_AXIS},
178     {GLYPH_CATDVI_productdisplay,	GH_ON_AXIS},
179     {GLYPH_CATDVI_integraldisplay,	GH_ON_AXIS},
180     {GLYPH_CATDVI_uniondisplay,	GH_ON_AXIS},
181     {GLYPH_CATDVI_intersectiondisplay,	GH_ON_AXIS},
182     {GLYPH_CATDVI_unionmultidisplay,	GH_ON_AXIS},
183     {GLYPH_CATDVI_logicalanddisplay,	GH_ON_AXIS},
184     {GLYPH_CATDVI_logicalordisplay,	GH_ON_AXIS},
185     {GLYPH_CATDVI_coproducttext,	GH_ON_AXIS},
186     {GLYPH_CATDVI_coproductdisplay,	GH_ON_AXIS},
187     {GLYPH_CATDVI_hatwide,	GH_WIDE_DIACRITIC},
188     {GLYPH_CATDVI_hatwider,	GH_WIDE_DIACRITIC},
189     {GLYPH_CATDVI_hatwidest,	GH_WIDE_DIACRITIC},
190     {GLYPH_CATDVI_tildewide,	GH_WIDE_DIACRITIC},
191     {GLYPH_CATDVI_tildewider,	GH_WIDE_DIACRITIC},
192     {GLYPH_CATDVI_tildewidest,	GH_WIDE_DIACRITIC},
193     {GLYPH_CATDVI_bracketleftBig,	GH_ON_AXIS},
194     {GLYPH_CATDVI_bracketrightBig,	GH_ON_AXIS | GH_MOREMATH_LEFT},
195     {GLYPH_CATDVI_floorleftBig,	GH_ON_AXIS},
196     {GLYPH_CATDVI_floorrightBig,	GH_ON_AXIS | GH_MOREMATH_LEFT},
197     {GLYPH_CATDVI_ceilingleftBig,	GH_ON_AXIS},
198     {GLYPH_CATDVI_ceilingrightBig,	GH_ON_AXIS | GH_MOREMATH_LEFT},
199     {GLYPH_CATDVI_braceleftBig,	GH_ON_AXIS},
200     {GLYPH_CATDVI_bracerightBig,	GH_ON_AXIS | GH_MOREMATH_LEFT},
201     {GLYPH_CATDVI_radicalbig,	GH_RADICAL},
202     {GLYPH_CATDVI_radicalBig,	GH_RADICAL},
203     {GLYPH_CATDVI_radicalbigg,	GH_RADICAL},
204     {GLYPH_CATDVI_radicalBigg,	GH_RADICAL},
205     {GLYPH_CATDVI_radicalbt,	GH_EXTENSIBLE_RECIPE},
206     {GLYPH_CATDVI_radicalvertex,	GH_EXTENSIBLE_RECIPE},
207     {GLYPH_CATDVI_radicaltp,	GH_EXTENSIBLE_RECIPE},
208     {GLYPH_CATDVI_arrowvertexdbl,	GH_EXTENSIBLE_RECIPE},
209     {GLYPH_CATDVI_arrowtp,	GH_EXTENSIBLE_RECIPE},
210     {GLYPH_CATDVI_arrowbt,	GH_EXTENSIBLE_RECIPE},
211     {GLYPH_CATDVI_bracehtipdownright,	GH_MOREMATH_LEFT},
212     {GLYPH_CATDVI_bracehtipupright,	GH_MOREMATH_LEFT},
213     {GLYPH_CATDVI_arrowdbltp,	GH_EXTENSIBLE_RECIPE},
214     {GLYPH_CATDVI_arrowdblbt,	GH_EXTENSIBLE_RECIPE},
215     /* end TeX math extension */
216 
217     {GLYPH_CATDVI_tildexwide,	GH_WIDE_DIACRITIC},
218     {GLYPH_CATDVI_hatxwide,	GH_WIDE_DIACRITIC},
219 
220     /* end of table */
221     {0, 0}
222 };
223 
224 static struct spars32_t hint_spars32;
225 
226 
227 /* known mappings between combining and non-combining diacritics.
228  * Incomplete. Use 0 for unavailable or unknown variants.
229  *
230  * variant3 and variant4 are for diacritics that are doubled in the
231  * unicode character set (doubled at least for our purposes and IMHO -
232  * if there is some underlying philosophy, I just can't see it) and
233  * should be mapped to something canonical.
234  *
235  * we try to keep the spacing variant in the ISO 8859-1 range if possible.
236  */
237 struct diacritic_variantentry_t {
238     glyph_t spacing;
239     glyph_t combining;
240     glyph_t variant3;
241     glyph_t variant4;
242 };
243 
244 static struct diacritic_variantentry_t diavars[] =
245 {
246     {GLYPH_acute, GLYPH_acutecomb, GLYPH_UNI_acutemodifier, 0},
247     {GLYPH_grave, GLYPH_gravecomb, GLYPH_UNI_gravemodifier, 0},
248     {GLYPH_asciicircum, GLYPH_UNI_circumflexcomb, GLYPH_circumflex, 0},
249     {GLYPH_dieresis, GLYPH_UNI_dieresiscomb, 0, 0},
250     {GLYPH_tilde, GLYPH_tildecomb, GLYPH_asciitilde, 0},
251     {GLYPH_ring, GLYPH_UNI_ringcomb, 0, 0},
252 
253     {GLYPH_cedilla, GLYPH_UNI_cedillacomb, 0, 0},
254 
255     {GLYPH_caron, GLYPH_UNI_caroncomb, 0, 0},
256     {GLYPH_UNI_macron, GLYPH_UNI_macroncomb, GLYPH_UNI_macronmodifier, 0},
257     {GLYPH_breve, GLYPH_UNI_brevecomb, 0, 0},
258     {GLYPH_dotaccent, GLYPH_UNI_dotaccentcomb, GLYPH_ADOBE_periodsuperior, 0},
259 
260     {GLYPH_ogonek, GLYPH_UNI_ogonekcomb, 0, 0},
261     {GLYPH_CATDVI_polishstroke, GLYPH_CATDVI_polishstrokecomb, 0, 0},
262 
263     {GLYPH_CATDVI_negationslash, GLYPH_UNI_slashlongcomb, 0, 0},
264     {GLYPH_UNI_circlelarge, GLYPH_UNI_circlecomb, 0, 0},
265     {GLYPH_CATDVI_vector, GLYPH_UNI_vectorcomb, 0, 0},
266 
267     {GLYPH_CATDVI_Eurodblstroke, GLYPH_CATDVI_Eurodblstrokecomb, 0, 0},
268 
269     /* end of table */
270     {EOT, 0, 0, 0}
271 };
272 
273 static struct sparp_t diavar_sparp;
274 
glyphops_init()275 void glyphops_init()
276 {
277     struct glyph_hintentry_t * ph;
278     struct diacritic_variantentry_t * pd;
279 
280     /* The glyph hint sparp is indexed by the glyph and points directly to
281      * the hint.
282      */
283     spars32_init(&hint_spars32, 0);
284     for(ph = hints; ph->glyph != 0; ++ph) {
285     	spars32_write(&hint_spars32, ph->glyph, ph->hint);
286     }
287 
288     /* The diacritics variant sparp is indexed by any variant of the diacritic.
289      * The values point to the corresponding dicaritic_variantentry_t .
290      */
291     sparp_init(&diavar_sparp);
292     for(pd = diavars; pd->spacing != EOT; ++pd) {
293     	if(pd->spacing != 0) sparp_write(&diavar_sparp, pd->spacing, pd);
294     	if(pd->combining != 0) sparp_write(&diavar_sparp, pd->combining, pd);
295     	if(pd->variant3 != 0) sparp_write(&diavar_sparp, pd->variant3, pd);
296     	if(pd->variant4 != 0) sparp_write(&diavar_sparp, pd->variant4, pd);
297     }
298 
299 }
300 
glyph_get_hint(glyph_t glyph)301 enum glyph_hint_t glyph_get_hint(glyph_t glyph)
302 {
303     return (enum glyph_hint_t) spars32_read(&hint_spars32, glyph);
304 }
305 
diacritic_combining_variant(glyph_t diacritic)306 glyph_t diacritic_combining_variant(glyph_t diacritic)
307 {
308     struct diacritic_variantentry_t * p;
309 
310     p = sparp_read(&diavar_sparp, diacritic);
311     return (p != NULL) ? p->combining : 0;
312 }
313 
diacritic_spacing_variant(glyph_t diacritic)314 glyph_t diacritic_spacing_variant(glyph_t diacritic)
315 {
316     struct diacritic_variantentry_t * p;
317 
318     p = sparp_read(&diavar_sparp, diacritic);
319     return (p != NULL) ? p->spacing : 0;
320 }
321