1 // charmetrics.cpp                              Copyright (C) 2021 Codemist
2 
3 //=======================================================================//
4 //                                                                       //
5 //  WARNING. In "-DCREATE=1" mode this program takes about 40 minutes    //
6 //  of CPU time on a computer that in 2016 counts as fairly fast. So     //
7 //  when you run it be prepared to be reasonably patient. The slow run   //
8 //  really only needed to be done once to extract font information from  //
9 //  a collection of ".afm" files and form it into a compact table, so it //
10 //  should only need to be run if the fonts being used change.           //
11 //                                                                       //
12 //=======================================================================//
13 
14 
15 /**************************************************************************
16  * Copyright (C) 2021, Codemist.                         A C Norman       *
17  *                                                                        *
18  * Redistribution and use in source and binary forms, with or without     *
19  * modification, are permitted provided that the following conditions are *
20  * met:                                                                   *
21  *                                                                        *
22  *     * Redistributions of source code must retain the relevant          *
23  *       copyright notice, this list of conditions and the following      *
24  *       disclaimer.                                                      *
25  *     * Redistributions in binary form must reproduce the above          *
26  *       copyright notice, this list of conditions and the following      *
27  *       disclaimer in the documentation and/or other materials provided  *
28  *       with the distribution.                                           *
29  *                                                                        *
30  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS    *
31  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT      *
32  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS      *
33  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE         *
34  * COPYRIGHT OWNERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,   *
35  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,   *
36  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS  *
37  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND *
38  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR  *
39  * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF     *
40  * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH   *
41  * DAMAGE.                                                                *
42  *************************************************************************/
43 
44 
45 // $Id: charmetrics.cpp 5736 2021-03-16 10:41:22Z arthurcnorman $
46 
47 
48 #ifdef CREATE
49 
50 // The slowest part of the code here is finding a good hash regime
51 // to obtain good occupancy for the main character metrics table.
52 // Telling the code where to look can help speed it up. So if you
53 // predefined RESTRICTED_SEARCH at compile time this will avoid a lengthy
54 // search, but if you ever alter the fonts used or any other things
55 // that can inclfluence how the hash table might pack you will need to
56 // to run at least once without that and transcribe the new optimum
57 // information to here.
58 
59 #ifdef RESTRICTED_SEARCH
60 // The values here are tolerably close to the expected best answer!
61 #define MAIN_LOW        10056
62 #define MAIN_HIGH       10080
63 #else
64 #define MAIN_LOW        (mainkeycount-1)
65 #define MAIN_HIGH       (sizeof(hashtable)/sizeof(hashtable[0]))
66 #endif
67 
68 // Even if I am going to do a full search (ie regardless of the setting
69 // of RESTRICTED_SEARCH) I will check if a matching is available with the following
70 // parameters. If it is then I do not need to do any more searching at all,
71 // and the code here will run really fast! as with RESTRICTED_SEARCH and
72 // if you feed really cautious you should set these to ridiculous values
73 // (eg EXPECTED_TABLESIZE smaller than the amount of data in use so that
74 // things can not possibly fit) after altering font details.
75 
76 #ifndef EXPECTED_TABLESIZE
77 #define EXPECTED_TABLESIZE   10057
78 #endif
79 #ifndef EXPECTED_MODULUS2
80 #define EXPECTED_MODULUS2     4955
81 #endif
82 #ifndef EXPECTED_OFFSET2
83 #define EXPECTED_OFFSET2      5000
84 #endif
85 
86 // After having used a simple matching process to identify the smallest
87 // hash table that could be used the code will proceed to try all
88 // configurations of that size and find for each the matching that has
89 // lowest weight - in a sense that means it should create a hash table
90 // where as large a proportion of the keys as possible go in the location
91 // that means that access to them only takes one probe.
92 // This uses the "Hungarian" (sometimes known as Munkres) algorithm, and to
93 // avoid the substantial cost I can specify a target figure of merit (ie a
94 // target for the average number of probes that accessing keys will involve.
95 // If the assignment established by EXPECTED_TABLESIZE etc attains this
96 // limit the heavy search for an optimum will not be activated.
97 //
98 // A "merit" value is an floating point value showing the average number
99 // of probes a lookup might take, assuming (in a fairly arbitrary way) that
100 // there are 4 times as many lookups of "IMPORTANT" keys as "STANDARD"
101 // ones. If every key ended up in its first choice position (possible with
102 // a really lightly loaded table) it would end up as 1.0. The worst imaginable
103 // case would have every key in its third choice position and a merit 0f 3.0,
104 // but it is hard to see how to force every key to avoid choices 1 and 2, so
105 // this is an upper bound not to be attained. If keys ended up evenly
106 // distributed across the three places that they could go the overall merit
107 // would be 2.0. If everything was IMPORTANT so all keys took either 1 or 2
108 // probes and those two cases were equally likely then the overall value would
109 // be 1.5.
110 //
111 // If you set TARGET_MERIT to 4.0 (ie 4 probes per key) then any matching
112 // at all will beat that and so first solution tried will be accepted.
113 // If on the other hand you set it to 0.0 then no assignment can achieve
114 // that so all possible hash options will be tried to find the best
115 // merit possible. Running the expensive search once and putting the merit
116 // found my it in here will keep you safe and fast!
117 
118 #ifndef TARGET_MERIT
119 // The merit score for my "EXPECTED" hash parameters is 1.472.
120 // None of the "important" keys need more than one probe, and for the
121 // "standard" cases there are 5904 cases using 1 probe, 2291 needing 2
122 // and only 1489 that use the full 3 probes. So for all the common keys
123 // access is in just 1 probe, and even if you were to suppose that all
124 // keys were equally probable the average would only be 1.526 probes
125 // per access.
126 //
127 // Finding the values for modulus2 and offset2 took a three of days of CPU
128 // time (albeit mostly not exploiting any parallelism) but now the good
129 // EXPECTED parameters are provided the hash arrangement can be reconstructed
130 // and verified fairly fast.
131 //
132 #define TARGET_MERIT       1.473
133 #endif
134 
135 
136 
137 #include <cstdio>
138 #include <cstdint>
139 #include <cinttypes>
140 #include <cstdlib>
141 #include <cstring>
142 #include <cctype>
143 #include <ctime>
144 #include <climits>
145 
146 // This takes a bunch of font information files and creates some C
147 // tables that can be used to access the information rapidly.
148 
149 // Well if I compile this with -DCREATE=1 it makes a program that
150 // inspects the raw font metric and kerning files and creates a file
151 // called charmetrics.h containing a packed version of it. Otherwise it
152 // makes the code that inspects those tables and retrieves information.
153 // The "raw" input files are in ".afm" (Adobe Font Metric) format and
154 // the ones I use here were created from some .otf and .ttf font files
155 // using fontforge. The fonts I start from have generous licenses that
156 // permit re-distribution and so even more so I will hold that the
157 // file generated here that contains metrics is not subject to any
158 // severe limits on its use.
159 
160 // I need my cuckoo-hashing library. One sane thing to do would be to
161 // link to that as a separately compiled component, but here I will just
162 // include its source! See cuckoo.h and cuckoo.c for commentary and
163 // explanation.
164 
165 #include "cuckoo.cpp"
166 
167 // I also generate a charmetrics.red that can give access to the same
168 // information from within Reduce... I am considering use from Java as well
169 // but somewhat amazingly Java deals with statically initialised arrays
170 // in a way that puts limits on their size such as to BREAK this - and
171 // so with a naive conversion I get both a "code too large" and a
172 // "too many constants" error from the Java compiler. The first of these
173 // persists even if I split the main hash table to give just one
174 // column at a time - so an initialised array of 10000 simple long values
175 // more than Java is willing to support. Thus any Java code will need to
176 // load metric information from a resource, and I think that C code that
177 // uses the charmetrics.h file as created here can be used to create the
178 // data that will be loaded. Ugh.
179 
180 // I have inspected the fonts that concern me and the sizes
181 // shown here will suffice. There are less than 32000 characters in
182 // total defined in all of the fonts I have (odokai is by far
183 // the biggest with over 17K characters defined). There are also less
184 // then 5000 kerning pairs listed. I will in fact have 7 distinct fonts.
185 
186 // The code to create the font data tables is very careless and would
187 // be thoroughly succeptible to all sorts of bad effects from buffer overflow
188 // if it were ever to be fed font files other than the ones distributed with
189 // it. Here I follow a path of fixed and unchecked size limits in a search
190 // for code simplicity in a utility I only intend for use in a single
191 // context. But anybody minded to add extra fonts MUST be aware and should
192 // check all the limits before running anything.
193 
194 // My code here is also not especially tidy, and it uses C-99 style "//"
195 // comments (as here!) and <stdint.h> with uint64_t, UINT64_C() and their
196 // friends. In fact gcc has supported both of these for some time now and
197 // since this is a run-once sort of program I feel I can rely on using
198 // either a tolerably recent gcc or some other C compiler that supports
199 // C-99.
200 
201 
202 // "wc -L" tells me that all my font-metric files have lines that are
203 // less than 1750 characters long. The worst case is for the cmuntt font
204 // where thare are a large number of ligatures specified for "space"
205 // followed by various characters (that I believe are probably all the
206 // combining characters present, so that putting a space ahead of any of them
207 // can lead to a conversion to a non-combining variant.
208 
209 // Having statically fixed limits here simplifies my coding.
210 
211 #define MAXFONTS 16
212 #define MAXCHARS 50000
213 #define MAXKERNS 10000
214 #define MAXLIGATURES 1000
215 #define MAXLINE  2000
216 #define MAXMATHSYMS 200
217 
218 #else // CREATE
219 #ifdef TEST
220 #include <cstdio>
221 #include <cstdlib>
222 #include <cstdint>
223 #include <cinttypes>
224 #include "cuckoo.h"
225 #else // TEST
226 #include "headers.h"
227 #include "cuckoo.h"
228 #endif // TEST
229 
230 #include "charmetrics.h"  // A file I must have created earlier
231 #endif // CREATE
232 
233 #define IS_LIGATURE 0x00200000
234 #define IS_BLOCKEND 0x00400000
235 
236 // The list of font codes here must be kept in step with the list
237 // of names in the table.
238 
239 #define F_cmuntt                      0
240 #define F_odokai                      1
241 #define F_Regular                     2
242 #define F_Bold                        3
243 #define F_Italic                      4
244 #define F_BoldItalic                  5
245 #define F_Math                        6
246 #define F_end                         7
247 
248 // In the ".afm" files and hence the main metrics I work with each
249 // character is measured and positioned relative to a base-line. However
250 // when wxWidgets draws things it uses the top left corner of a bounding
251 // box for text as its reference point. However the "bounding box" used
252 // does not seem to be related to any information I can find in the .afm
253 // file so I file values by running a wxWidgets program and seeing what
254 // it reports and put in a table of values here...
255 
256 // To re-calculate these values you could check out a current of
257 // Reduce, configure "--with-csl --with-wx", make wxshowmath and run
258 // wxshowmath on wxdata/fontsizes.dat. The trace output should include
259 //....        Need to process CMU Typewriter Text
260 //....        Gives CMU Typewriter Text with flags 0
261 //....        font[0] = "CMU Typewriter Text" size 10000
262 //....        ( CMU Typewriter Text/10000: 12597.7 2330 [10267.7]
263 //....            1027,           // cmuntt
264 //....        from table baseline offset = 10270
265 //....        convert odokai
266 //....        Need to process AR PL New Kai
267 //....        Gives AR PL New Kai with flags 10000
268 //....        font[1] = "AR PL New Kai" size 10000
269 //....        ( AR Pl New Kai/10000: 10693.4 1210.94 [9482.42]
270 //....            1055,           // odokai
271 // and I extracted the information I need here using
272 //    grep "    // " wxshowmath.log > DESTINATION
273 // I edited the file to remove a comma after the final entry...
274 // (revisions much before then may not display the relevant
275 // information - ones significantly after may have removed the trace
276 // output...).
277 
278 
279 // I note with some distress that the adjustments needed here differ
280 // across operating systems. That backs up the fact that this data can not
281 // be deduced from a set of Adobe Font Metrics.
282 //
283 // So I provide three versions of this table (it is not very large) and
284 // a tolerably cheap run-time test can pick which one to use. I feel it is
285 // nicer for my runtime code to check no more than 3 cases to choose between
286 // these tables rather than getting it to measure all the fonts.
287 //
288 // With cslSTIX it seems that the X11 and OS/X measurements match - but
289 // I will nevertheless provide three versions here just in a spirit of
290 // caution.
291 
292 const std::uint16_t *chardepth = nullptr;
293 
294 const std::uint16_t chardepth_WIN32[] =
295 {   1027,           // cmuntt
296     948,            // odokai
297     1023,           // Regular
298     1023,           // Bold
299     1023,           // Italic
300     1023,           // BoldItalic
301     2566            // Math
302 };
303 
304 const std::uint16_t chardepth_X11[] =
305 {   1027,           // cmuntt
306     885,            // odokai
307     1023,           // Regular
308     1055,           // Bold
309     1023,           // Italic
310     1004,           // BoldItalic
311     2566            // Math
312 };
313 
314 const std::uint16_t chardepth_OSX[] =
315 {   1027,           // cmuntt
316     885,            // odokai
317     1023,           // Regular
318     1055,           // Bold
319     1023,           // Italic
320     1004,           // BoldItalic
321     2566,           // Math
322 };
323 
324 const char *fontnames[31] =
325 {   "cmuntt",
326     "odokai",
327     "cslSTIX-Regular",
328     "cslSTIX-Bold",
329     "cslSTIX-Italic",
330     "cslSTIX-BoldItalic",
331     "cslSTIXMath-Regular"
332 };
333 
334 // The font metric information I use will be quite bulky, so I will be
335 // trying to balance speed, compactness and simplicity here.
336 // I will only need to support the fonts listed above, so I will
337 // exploit some observed features. The first is that cmuntt and odokai
338 // (almost)  only use the Basic Multilingual Plane (ie U+0000 to U+FFFF).
339 // The STIX fonts use nothing in the range U+4000 to U+A000. But then
340 // STIXMath uses code in the range U+1D400 to U+1D800 and U+108000 to
341 // U+108400.
342 // The other STIX fonts have subsets of the same use. So there I will map
343 // U+1Dxxx to U+4xxx and U+108xxx to U+5xxx, having mapped anthing in either
344 // of those ranges to the illegal code U+FFFF. That leaves all codes as
345 // just 16 bits.
346 // Since I have under than 8 fonts I can use 3 bits to indicate a font.
347 // and the squashed 16-bit "remapped codepoint" plus 3 bits of font leaves
348 // me needing 19 bits in all.
349 // My hash table format could cope even if I needed up to 21-bits of
350 // full key... so I have some slack available if needed (eg if I wanted to
351 // support more fonts).
352 
353 static int pack_character(int font, int codepoint)
354 {
355 // The cases that apply here are
356 //    cmuntt    U+10144 - U+10147
357 //              The above are the only characters present in
358 //              cmuntt that have codes over U+ffff.
359 //    cslSTIX*  U+1d4xxx to U+4xxx
360 //              U+108xxx to U+5xxx
361     if (font < 2)
362     {   if ((codepoint & 0xd800) == 0xd800) codepoint = 0xffff;
363         else if (codepoint >= 0x10000)
364         {   if (codepoint <= 0x107ff) codepoint = 0xd800 +
365                                                       (codepoint & 0x7ff);
366             else codepoint = 0xffff;
367         }
368     }
369     else if (codepoint >= 0x4000 &&
370              codepoint < 0x8000) codepoint = 0xffff;
371     else if (codepoint >= 0x1d000 && codepoint <= 0x1dfff)
372         codepoint = 0x4000 + (codepoint & 0xfff);
373     else if (codepoint >= 0x108000 && codepoint <= 0x108fff)
374         codepoint = 0x5000 + (codepoint & 0xfff);
375     else if (codepoint >= 0x10000) codepoint = 0xffff;
376 // I need the bottom two bits of this packed code to be the bottom
377 // two bits of the codepoint because my hash table will be using
378 // buckets of four adjacent codepoints.
379     return (font << 16) | codepoint;
380 }
381 
382 // I will store information in a hash table that puts four codepoints per
383 // "hash table line". Each hash table entry will involve five 64-bit words.
384 // The first will contain the key and some information to help with kerning
385 // and ligatures. Because I will have a line size of 4 I only need to use 19
386 // bits of key. That leaves me with space to put four 11-bit kern entries
387 // in, one for each of the 4 codepoints covered. There will be a 16-entry
388 // table indexed by font that gives a value to be added to one of these
389 // offsets. That allows for up to 2048 kern entries per font. I count 1016
390 // kern declarations and 16 ligature declarations for STIX-Regular
391 // and those together add up to 1032 which fits reasonably.
392 //
393 // I assessed having individual entries is the hash table and line sizes
394 // of 2 and 8 as well as 4. A line size of 8 saves a small amount of space
395 // but at the cost of seeming noticably messier. A line size of only 2
396 // consumes distinctly more memory.
397 //
398 // The other four 64-bit values each hold four 13-bit fields and one 12
399 // bit one. These store the character width and its bounding box. For my
400 // fonts I observe
401 //     0   <=   width   <=  3238        use 13 bits unsigned
402 //    -998 <=   llx     <=  929         use 13 bits unsigned offset by -3000
403 //    -524 <=   lly     <=  843         use *12* bits unsigned offset by -1000
404 //    -234 <=   urx     <=  3238        use 13 bits unsigned offset by -500
405 //    -141 <=   ury     <=  1055        use 13 bits unsigned offset by -1000
406 // that packing is a bit ugly but ends up using exactly 64 bits which is
407 // really convenient.
408 
409 // When a character starts a kern I have a 11 bit index value that gets added
410 // to a further value that depends on the font to give an offset into a table
411 // of 32-bit words. Each word contains
412 //    last item mark      1 bit set if this is the last item of
413 //                        kern information associated with the current
414 //                        lead character.
415 //    kern-or-ligature    1 bit flag set if this is ligature not kern data.
416 //    successor char      21-bit codepoint for a successor character. Note
417 //                        that this will always be in the same font. The
418 //                        codepoint is stored plain here with no compression
419 //                        or adjustment.
420 //    offset              for kerning this is a 9-bit signed adjustment
421 //                        to make the spacing. For a ligature it is a
422 //                        9 bit offset into a table of 21-bit codepoints
423 //                        for replacement characters. I only seem to see
424 //                        77 cases of ligatures in all so this final
425 //                        table is not too bulky.
426 // The kern index values atart at 1 so that a kern index of zero can
427 // indicate no kerning is needed. The range of kern adjustment I see in my
428 // fonts is -149 to 87, and so the 9-bit field I have (coping with -256 to
429 // +255) will suffice comfortably.
430 //
431 
432 #ifdef CREATE
433 
434 // In the official full list of Unicode names there are some remarkably
435 // long names used, of which maybe the worst is
436 //    "CLOCKWISE RIGHTWARDS AND LEFTWARDS OPEN CIRCLE ARROWS WITH
437 //     CIRCLED ONE OVERLAY"
438 // however the fonts I use here all have embedded names that are reasonably
439 // short. I would detect it if any were longer than 120 characters and stop.
440 // If that happened I would merely increase MAXUNILEN here. The names present
441 // while processing fonts here are purely local to the treatment here (they
442 // are used to link kerning tables).
443 
444 #define MAXUNILEN 120
445 
446 static int       charcount = 0;
447 static int       fontkey[MAXCHARS];
448 static int32_t   codepoint[MAXCHARS];
449 static int       mainkeycount;
450 static uint32_t  mainkey[MAXCHARS];
451 static int32_t   width[MAXCHARS];
452 static int32_t   llx[MAXCHARS];
453 static int32_t   lly[MAXCHARS];
454 static int32_t   urx[MAXCHARS];
455 static int32_t   ury[MAXCHARS];
456 static char      uninames[MAXCHARS][MAXUNILEN];
457 static int       kernreference[MAXCHARS];
458 
459 // For ligature information I will store the identity of the
460 // start character and then the names of the follower and the
461 // replacement.
462 
463 static int       nligatures = 0;
464 static int       ligfont[MAXLIGATURES];
465 static int32_t   ligstart[MAXLIGATURES];
466 static char      ligfollow[MAXLIGATURES][MAXUNILEN];
467 static char      ligreplacement[MAXLIGATURES][MAXUNILEN];
468 static int32_t   ligfollowcode[MAXLIGATURES];
469 static int32_t   ligreplacementcode[MAXLIGATURES];
470 
471 // For kerning information I will store the identity of the
472 // start and follow characters and the integer adjustment to be made.
473 
474 static int       nkerns = 0;
475 static int       kernfont[MAXKERNS];
476 static char      kernstart[MAXKERNS][MAXUNILEN];
477 static char      kernfollow[MAXKERNS][MAXUNILEN];
478 static int       kernadjustment[MAXKERNS];
479 static int32_t   kernstartcode[MAXKERNS];
480 static int32_t   kernfollowcode[MAXKERNS];
481 
482 static int kernp = 0;
483 static std::int16_t  fontkern[F_end];
484 static uint32_t kerntable[MAXKERNS];
485 static char     ktstart[MAXKERNS][MAXUNILEN];
486 static char     ktfollow[MAXKERNS][MAXUNILEN];
487 static int      ktadjustment[MAXKERNS];
488 static char     ktfont[MAXKERNS][32];
489 static int      ktfontn[MAXKERNS];
490 
491 static int ligp = 0;
492 static uint32_t ligtable[MAXLIGATURES];
493 static char ltfirst[MAXLIGATURES][MAXUNILEN],
494        ltfollow[MAXLIGATURES][MAXUNILEN],
495        ltname[MAXLIGATURES][MAXUNILEN],
496        ltfont[MAXLIGATURES][32];
497 
498 static int accentp = 0;
499 static char accentname[MAXMATHSYMS][MAXUNILEN];
500 static uint32_t accentnum[MAXMATHSYMS];
501 static int32_t  accentval[MAXMATHSYMS];
502 
503 static int variantp = 0;
504 static int variantdirection[MAXMATHSYMS];
505 static char variantname[MAXMATHSYMS][MAXUNILEN];
506 static char
507 v1[MAXMATHSYMS][MAXUNILEN]; // size 1 (just bigger than basic)
508 static char v2[MAXMATHSYMS][MAXUNILEN]; // size 2
509 static char v3[MAXMATHSYMS][MAXUNILEN]; // size 3
510 static char v4[MAXMATHSYMS][MAXUNILEN]; // size 4
511 static char v5[MAXMATHSYMS][MAXUNILEN]; // size 5 (biggest)
512 static char P1[MAXMATHSYMS][MAXUNILEN]; // top        top        top
513 static char
514 P2[MAXMATHSYMS][MAXUNILEN]; // extension  extension  extension
515 static char P3[MAXMATHSYMS][MAXUNILEN]; // middle     bottom
516 static char P4[MAXMATHSYMS][MAXUNILEN]; // extension
517 static char P5[MAXMATHSYMS][MAXUNILEN]; // bottom
518 // Now the same converted to codepoints rather than names.
519 static int32_t variantcode[MAXMATHSYMS];
520 static int32_t
521 nv1[MAXMATHSYMS];     // size 1 (just bigger than basic)
522 static int32_t nv2[MAXMATHSYMS];     // size 2
523 static int32_t nv3[MAXMATHSYMS];     // size 3
524 static int32_t nv4[MAXMATHSYMS];     // size 4
525 static int32_t nv5[MAXMATHSYMS];     // size 5 (biggest)
526 static int32_t np1[MAXMATHSYMS];     // bottom piece
527 static int32_t np2[MAXMATHSYMS];     // extension
528 static int32_t np3[MAXMATHSYMS];     // middle piece
529 static int32_t np4[MAXMATHSYMS];     // extension
530 static int32_t np5[MAXMATHSYMS];     // top piece
531 static int vdata1[MAXMATHSYMS][4];   // start end full flag
532 static int vdata2[MAXMATHSYMS][4];
533 static int vdata3[MAXMATHSYMS][4];
534 static int vdata4[MAXMATHSYMS][4];
535 static int vdata5[MAXMATHSYMS][4];
536 
537 
538 // It will be necessary at times to look up a name given its name. I will do
539 // that with a crude linear search because I do not expect this to be a
540 // performance-limiting part of this whole program.
541 
542 int32_t decodename(int fontnum, const char *name)
543 {   int i;
544     for (i=0; i<charcount; i++)
545         if (fontnum == fontkey[i] &&
546             std::strcmp(name, uninames[i]) == 0)
547             return codepoint[i];
548     std::printf("Character called %s not found in font %d\n", name,
549                 fontnum);
550     std::exit(EXIT_FAILURE);
551 }
552 
553 // The hash table will end up holding information about around 32000
554 // characters. It is arranged in lines each of which store data on
555 // four characters, so assuming few isolated character codes that
556 // means it will use around 8000 entries. I make its size a prime.
557 // If I make it 10091 then my tables are fairly full - around 99.5%.
558 // The hashing scheme I use will guarantee that not many probes are
559 // needed even at this high loading level!!!
560 
561 #define MAXCHAR_METRICS_TABLE_SIZE 20000
562 
563 // This is around 400 Kbytes... I tend to count that as quite large.
564 // By far the largest contribution to it is data from odokai.afm.
565 // Although almost all characters there are specified with a width of 1000
566 // the character bounding boxes are all individual and varied, and so trying
567 // to save space by having an index of bounding boxes does not appear to be
568 // useful.
569 
570 static uint64_t hashtable[MAXCHAR_METRICS_TABLE_SIZE][5];
571 // The following smaller array is used with the Hungarian algorithm...
572 static uint32_t uint32hashtable[MAXCHAR_METRICS_TABLE_SIZE];
573 
574 static int main_importance(uint32_t key)
575 {   int font = key >> 16;
576     if (font != F_cmuntt && font != F_Math) return CUCKOO_STANDARD;
577     key &= 0xffff;
578 // codepoints U+0000 to U+007f have a very special status, and
579 // I will insist that they are always processed in one probe.
580     if ((key & 0xffff) < 0x80/4) return CUCKOO_VITAL;
581 // The fixed pitch font used for most input and the Maths font used for
582 // most output will be encouraged to use at most two probes.
583     if (font == F_cmuntt)
584     {   if (key < 0x0600/4 ||
585             (0x2000/4 <= key && key < 0x3000/4) ||
586             (0xfb00/4 <= key && key < 0xfc00/4))  return CUCKOO_IMPORTANT;
587     }
588     else if (font == F_Math)
589     {   if (key < 0x0600/4 ||
590             (0x2000/4 <= key && key < 0x2400/4) ||
591             (0x2900/4 <= key && key < 0x2c00/4) ||
592             (0x4000/4 <= key && key < 0x6000/4) ||
593             (0xfb00/4 <= key && key < 0xfc00/4))  return CUCKOO_IMPORTANT;
594     }
595 // Other characters get standard treatment and are allowed up to three probes.
596     return CUCKOO_STANDARD;
597 }
598 
599 static uint32_t main_get(void *p)
600 {
601 // The FULL key may be up to 21-bits but because I then have hash-table lines
602 // with 4 items in I only use 19-bit keys here. In reality with the packing
603 // scheme used at present I only ise 19-bit full keys and hence 17 bits here,
604 // so there are two bits available for future expansion if necessary.
605     return *(uint32_t *)p & 0x0007ffff;
606 }
607 static void main_set(void *p, uint32_t key)
608 {   *(uint32_t *)p = (*(uint32_t *)p & 0xfff80000) |
609                      (key & 0x0007ffff);
610 }
611 
612 
613 // I will have a separate hash table to map cslSTIXMath-Regular characters
614 // onto signed 10-bit values that give information about the horizontal
615 // placement for accents to be places above characters. This is just for
616 // cstSTIXMath-Regular because that is the only font that I am using that
617 // has this information embedded within it. Both key and value can pack
618 // into a 32-bit integere here.
619 
620 #define MAXTOPCENTRESIZE 500
621 static int32_t topcentre[MAXTOPCENTRESIZE];
622 
623 
624 static int accent_importance(uint32_t key)
625 {   return CUCKOO_IMPORTANT;
626 }
627 
628 
629 static uint32_t accent_get(void *p)
630 {   return *(uint32_t *)p;
631 }
632 static void accent_set(void *p, uint32_t key)
633 {   *(uint32_t *)p = key;
634 }
635 
636 
637 // Another table will take characters to "larger variants". So for instance
638 // a left parenthesis will have five gradually larger versions.
639 
640 static int variantsize = MAXMATHSYMS;
641 static uint32_t variant_table[MAXMATHSYMS][6];
642 
643 static int variant_importance(uint32_t key)
644 {   return CUCKOO_STANDARD;
645 }
646 
647 static uint32_t variant_get(void *p)
648 {   return *(uint32_t *)p;
649 }
650 static void variant_set(void *p, uint32_t key)
651 {   *(uint32_t *)p = key;
652 }
653 
654 // Yet another is for the ways to build up huge symbols out of multiple
655 // glyphs.
656 
657 static int extensionsize = MAXMATHSYMS;
658 static uint32_t extension_table[MAXMATHSYMS][11];
659 
660 static int extension_importance(uint32_t key)
661 {   return CUCKOO_STANDARD;
662 }
663 
664 static uint32_t extension_get(void *p)
665 {   return *(uint32_t *)p;
666 }
667 static void extension_set(void *p, uint32_t key)
668 {   *(uint32_t *)p = key;
669 }
670 
671 
672 static char      line[MAXLINE];
673 static char      saveline[MAXLINE];
674 static char      segment[MAXLINE];
675 
676 int maxw=-10000, maxllx=-10000, maxlly=-10000, maxurx=-10000,
677     maxury=-10000;
678 int minw=10000, minllx=10000, minlly=10000, minurx=10000,
679     minury=10000;
680 
681 int main(int argc, char *argv[])
682 {   const char *f;
683     int pass, fontnum, best;
684     char *p, *q;
685     int relevant = 0;
686     int kerndata = 0;
687     int topaccent = 0;
688     int variant = 0;
689     std::FILE *src;
690     std::time_t ttt;
691     char filename[100];
692     int i, probes  = 0, p1 = 0, p2 = 0, n1 = 0, n2 = 0,
693            occupancy = 0, fail, qq;
694     CREATEMUTEX;
695     CREATELOGMUTEX;
696 #ifndef _WIN32
697     pthread_mutex_lock(&condmutex);
698 #endif
699     std::setvbuf(stdout, nullptr, _IONBF, 1);
700 //==========================================================================
701 // (1) Read in all the metrics
702 //==========================================================================
703     nkerns = charcount = 0;
704 // I will map characters from u+000000 to u+01ffff but not beyond - that
705 // way I will only need 17 bits to specify a codepoint.
706     for (fontnum=0; fontnum<F_end; fontnum++)
707     {   f = fontnames[fontnum];
708         std::printf("Process font %s\n", f);
709         relevant = kerndata = topaccent = variant = 0;
710         std::sprintf(filename, "wxfonts/metrics/%s.afm", f);
711         if ((src = std::fopen(filename, "r")) == nullptr)
712         {   std::printf("Unable to access %s\n", filename);
713             std::exit(EXIT_FAILURE);
714         }
715         for (;;)
716         {   int ia, ib, ic, id;
717             int32_t cp, wid, bb1, bb2, bb3, bb4;
718             char unn[MAXLINE], lig1[MAXLINE], lig2[MAXLINE];
719             cp = -1;
720             wid = bb1 = bb2 = bb3 = bb4 = 0;
721             unn[0] = lig1[0] = lig2[0] = 0;
722             ia = ib = ic = id = 0;
723             if (std::fgets(line, sizeof(line)-1, src) == nullptr) break;
724             if (std::strncmp(line, "EndFontMetrics", 14) == 0) break;
725             ia = static_cast<int>(std)::strlen(line);
726             while (ia >= 0 &&
727                    (line[ia] == 0 || line[ia] == '\n' || line[ia] == '\r'))
728                 ia--;
729             line[ia+1] = 0; // discard final newline
730             if (ia == 0) break;
731             std::strcpy(saveline, line);
732             if (std::strncmp(line, "StartCharMetrics", 16) == 0)
733             {   relevant = 1;
734                 continue;
735             }
736             if (std::strncmp(line, "EndCharMetrics", 14) == 0)
737             {   relevant = 0;
738                 continue;
739             }
740             if (std::strncmp(line, "StartKernPairs", 14) == 0)
741             {   kerndata = 1;
742                 continue;
743             }
744             if (std::strncmp(line, "EndKernPairs", 12) == 0)
745             {   kerndata = 0;
746                 continue;
747             }
748             if (std::strncmp(line, "StartTopAccent", 14) == 0)
749             {   topaccent = 1;
750                 continue;
751             }
752             if (std::strncmp(line, "EndTopAccent", 12) == 0)
753             {   topaccent = 0;
754                 continue;
755             }
756             if (std::strncmp(line, "StartVariations", 15) == 0)
757             {   variant = 1;
758                 continue;
759             }
760             if (std::strncmp(line, "EndVariations", 13) == 0)
761             {   variant = 0;
762                 continue;
763             }
764             if (kerndata)
765             {   if (std::sscanf(line, "KPX %s %s %d", lig1, lig2, &ia) == 3)
766                 {   kernfont[nkerns] = fontnum;
767                     std::strcpy(kernstart[nkerns], lig1);
768                     std::strcpy(kernfollow[nkerns], lig2);
769                     kernadjustment[nkerns] = ia;
770 #if 0
771                     std::printf("[%d] %s + %s => %d\n", nkerns, lig1, lig2, ia);
772 #endif
773                     nkerns++;
774                 }
775                 else
776                 {   std::printf("Dubious kerning data %s\n", line);
777                     continue;
778                 }
779                 continue;
780             }
781             if (topaccent)
782             {   if (std::sscanf(line, "N %s ; DX %d", accentname[accentp],
783                                 &accentval[accentp]) == 2)
784                     accentp++;
785 #if 0
786                 std::printf("%d: %s\n", accentp, line);
787 #endif
788                 continue;
789             }
790             if (variant)
791             {   int some = 0;
792 // Variant lines can be horribly long! They start VX or HX for vertical
793 // or horizontal variations. I used code 1 for horizontal, 0 for vertical.
794                 if (std::sscanf(line, "VX %s ;", variantname[variantp]) == 1)
795                     variantdirection[variantp] = 0;
796                 else if (std::sscanf(line, "HX %s ;", variantname[variantp]) == 1)
797                     variantdirection[variantp] = 1;
798                 else continue;
799 // printf("Variant record %d (%d) for %s\n", variantp, variantdirection[variantp], variantname[variantp]);
800 // before collecting data I zero out all the relevant fields so that
801 // when data is not present I end up in a sane state.
802                 v1[variantp][0] = 0;
803                 v2[variantp][0] = 0;
804                 v3[variantp][0] = 0;
805                 v4[variantp][0] = 0;
806                 v5[variantp][0] = 0;
807                 P1[variantp][0] = 0;
808                 P2[variantp][0] = 0;
809                 P3[variantp][0] = 0;
810                 P4[variantp][0] = 0;
811                 P5[variantp][0] = 0;
812                 for (i=0; i<4; i++) vdata1[variantp][i] = 0;
813                 for (i=0; i<4; i++) vdata2[variantp][i] = 0;
814                 for (i=0; i<4; i++) vdata3[variantp][i] = 0;
815                 for (i=0; i<4; i++) vdata4[variantp][i] = 0;
816                 for (i=0; i<4; i++) vdata5[variantp][i] = 0;
817                 p = std::strchr(line, ';');
818                 if (p!=nullptr & std::sscanf(p, "; V1 %s ;", v1[variantp]) == 1)
819                 {   p = std::strchr(p+1, ';');
820                     some = 1;
821                 }
822                 if (p!=nullptr & std::sscanf(p, "; V2 %s ;", v2[variantp]) == 1)
823                 {   p = std::strchr(p+1, ';');
824                     some = 1;
825                 }
826                 if (p!=nullptr & std::sscanf(p, "; V3 %s ;", v3[variantp]) == 1)
827                 {   p = std::strchr(p+1, ';');
828                     some = 1;
829                 }
830                 if (p!=nullptr & std::sscanf(p, "; V4 %s ;", v4[variantp]) == 1)
831                 {   p = std::strchr(p+1, ';');
832                     some = 1;
833                 }
834                 if (p!=nullptr & std::sscanf(p, "; V5 %s ;", v5[variantp]) == 1)
835                 {   p = std::strchr(p+1, ';');
836                     some = 1;
837                 }
838                 if (p!=nullptr & std::sscanf(p, "; P1 %s %d %d %d %d ;",
839                                              P1[variantp],
840                                              &vdata1[variantp][0], &vdata1[variantp][1],
841                                              &vdata1[variantp][2], &vdata1[variantp][3]) == 5)
842                 {   p = std::strchr(p+1, ';');
843                     some = 1;
844                 }
845                 if (p!=nullptr & std::sscanf(p, "; P2 %s %d %d %d %d ;",
846                                              P2[variantp],
847                                              &vdata2[variantp][0], &vdata2[variantp][1],
848                                              &vdata2[variantp][2], &vdata2[variantp][3]) == 5)
849                 {   p = std::strchr(p+1, ';');
850                     some = 1;
851                 }
852                 if (p!=nullptr & std::sscanf(p, "; P3 %s %d %d %d %d ;",
853                                              P3[variantp],
854                                              &vdata3[variantp][0], &vdata3[variantp][1],
855                                              &vdata3[variantp][2], &vdata3[variantp][3]) == 5)
856                 {   p = std::strchr(p+1, ';');
857                     some = 1;
858                 }
859                 if (p!=nullptr & std::sscanf(p, "; P4 %s %d %d %d %d ;",
860                                              P4[variantp],
861                                              &vdata4[variantp][0], &vdata4[variantp][1],
862                                              &vdata4[variantp][2], &vdata4[variantp][3]) == 5)
863                 {   p = std::strchr(p+1, ';');
864                     some = 1;
865                 }
866                 if (p!=nullptr & std::sscanf(p, "; P5 %s %d %d %d %d ;",
867                                              P5[variantp],
868                                              &vdata5[variantp][0], &vdata5[variantp][1],
869                                              &vdata5[variantp][2], &vdata5[variantp][3]) == 5)
870                 {   p = std::strchr(p+1, ';');
871                     some = 1;
872                 }
873                 if (some)
874                 {
875 #if 0
876                     std::printf("%d: (%d) %s\n", variantp,
877                                 variantdirection[variantp], variantname[variantp]);
878                     std::printf(" sizes: %s %s %s %s %s\n",
879                                 v1[variantp], v2[variantp],
880                                 v3[variantp], v4[variantp],
881                                 v5[variantp]);
882                     if (P1[variantp] != 0) std::printf(" huge1: %s %d %d %d %d\n",
883                                                            P1[variantp], vdata1[variantp][0],
884                                                            vdata1[variantp][1], vdata1[variantp][2],
885                                                            vdata1[variantp][3]);
886                     if (P2[variantp] != 0) std::printf(" huge2: %s %d %d %d %d\n",
887                                                            P2[variantp], vdata2[variantp][0],
888                                                            vdata2[variantp][1], vdata2[variantp][2],
889                                                            vdata2[variantp][3]);
890                     if (P3[variantp] != 0) std::printf(" huge3: %s %d %d %d %d\n",
891                                                            P3[variantp], vdata3[variantp][0],
892                                                            vdata3[variantp][1], vdata3[variantp][2],
893                                                            vdata3[variantp][3]);
894                     if (P4[variantp] != 0) std::printf(" huge4: %s %d %d %d %d\n",
895                                                            P4[variantp], vdata4[variantp][0],
896                                                            vdata4[variantp][1], vdata4[variantp][2],
897                                                            vdata4[variantp][3]);
898                     if (P5[variantp] != 0) std::printf(" huge5: %s %d %d %d %d\n",
899                                                            P5[variantp], vdata5[variantp][0],
900                                                            vdata5[variantp][1], vdata5[variantp][2],
901                                                            vdata5[variantp][3]);
902 #endif
903                     variantp++;
904                 }
905                 continue;
906             }
907             if (relevant == 0) continue;
908 // Now line contains character information. This may include
909 //     C nnn            decimal code point
910 //     WX xxx           hexacedimal width
911 //     N word           unicode character name, needed for kern tables
912 //     B nn nn nn nn    character bounding box
913 //     L word word      ligature specification
914 // with each of these separated by a semicolon.
915             p = line;
916             while (p != nullptr)
917             {   q = std::strchr(p, ';');
918                 if (q != nullptr) *q = 0;
919 // Process segment starting at p
920                 while (*p == ' ' || *p == '\n' || *p == '\r') p++;
921                 if (*p == 0) break; // empty segment
922                 switch (*p)
923                 {   case 'C':
924                         if (std::sscanf(p, "C %d", &ia) != 1)
925                         {   std::printf("Bad segment \"%s\" in .afm file\n", p);
926                             std::exit(EXIT_FAILURE);
927                         }
928                         cp = ia;
929                         break;
930                     case 'W':
931                         if (std::sscanf(p, "WX %d", &ia) != 1)
932                         {   std::printf("Bad segment \"%s\" in .afm file\n", p);
933                             std::exit(EXIT_FAILURE);
934                         }
935                         wid = ia;
936                         if (wid > maxw) maxw = wid;
937                         if (wid < minw) minw = wid;
938                         break;
939                     case 'N':
940                         if (std::sscanf(p, "N %s", unn) != 1)
941                         {   std::printf("Bad segment \"%s\" in .afm file\n", p);
942                             std::exit(EXIT_FAILURE);
943                         }
944                         if (std::strlen(unn) >= MAXUNILEN)
945                         {   std::printf("Unicode name length = %d\n",
946                                         static_cast<int>(std)::strlen(unn));
947                             std::printf("%d: %s\n", static_cast<int>(std)::strlen(unn), unn);
948                             std::exit(EXIT_FAILURE);
949                         }
950                         if (cp == -1)
951                         {   if (std::sscanf(unn, "u%x", &ia) == 1) cp = ia;
952                             else if (std::sscanf(unn, "uni%x", &ia) == 1) cp = ia;
953                             else if (std::strcmp(unn, ".notdef") != 0)
954                                 std::printf("Dodgy character: %s\n", saveline);
955                         }
956                         break;
957                     case 'B':
958                         if (std::sscanf(p, "B %d %d %d %d", &ia, &ib, &ic, &id) != 4)
959                         {   std::printf("Bad segment \"%s\" in .afm file\n", p);
960                             std::exit(EXIT_FAILURE);
961                         }
962                         bb1 = ia; bb2 = ib; bb3 = ic; bb4 = id;
963                         if (bb1 > maxllx) maxllx = bb1;
964                         if (bb1 < minllx) minllx = bb1;
965                         if (bb2 > maxlly) maxlly = bb2;
966                         if (bb2 < minlly) minlly = bb2;
967                         if (bb3 > maxurx) maxurx = bb3;
968                         if (bb3 < minurx) minurx = bb3;
969                         if (bb4 > maxury) maxury = bb4;
970                         if (bb4 < minury) minury = bb4;
971                         break;
972                     case 'L':
973 // All I can do with ligature information on a first pass is to record
974 // it rather literally. That is because it may contain forward references
975 // to character names.
976                         if (std::sscanf(p, "L %s %s", lig1, lig2) != 2)
977                         {   std::printf("Bad segment \"%s\" in .afm file\n", p);
978                             std::exit(EXIT_FAILURE);
979                         }
980 // I observe some redundant ligature statements in the font metrics I use,
981 // so that the same information appears twice in a row. I filter that
982 // case out here.
983                         if (nligatures == 0 ||
984                             fontnum != ligfont[nligatures-1] ||
985                             cp != ligstart[nligatures-1] ||
986                             std::strcmp(lig1, ligfollow[nligatures-1]) != 0)
987                         {   ligfont[nligatures] = fontnum;
988                             ligstart[nligatures] = cp;
989                             std::strcpy(ligfollow[nligatures], lig1);
990                             std::strcpy(ligreplacement[nligatures], lig2);
991                             nligatures++;
992                         }
993                         break;
994                     case 0:
995                         break;
996                     default:
997                         std::printf("Unknown segment \"%s\" in .afm file\n", p);
998                         std::printf("Input line: \"%s\"\n", saveline);
999                         std::exit(EXIT_FAILURE);
1000                 }
1001                 if (q == nullptr) break;
1002                 else p = q+1;
1003             }
1004 // The information I now have is
1005 // fontnum, cp                          key
1006 // wid, bb1, bb2, bb3, bb4, unn         data
1007             if (cp < 0 || cp > 0x10ffff)
1008             {   if (std::strcmp(unn, ".notdef") != 0)
1009                     std::printf("Discarding character <%s>"
1010                                 " with codepoint %#x = %d\n",
1011                                 unn, cp, cp);
1012                 continue;
1013             }
1014             if (cp >= 0xd000 && cp < 0xe000)
1015                 std::printf("Codepoint %d U+%x noted : probably invalid in %s\n",
1016                             cp, cp, f);
1017 // Note that cmuntt has 4 characters beyond the basic multilingual pane -
1018 // for GREEK ACROPHONIC ATTIC FIFTY etc at U+10144. They look like capitals
1019 // Delta, H, X and M each with a border to left, right and top. I will
1020 // detect these here but then not support their use!
1021             if (cp > 0xffff &&
1022                 !(cp >= 0x1d000 && cp < 0x1e000) &&
1023                 !(cp >= 0x108000 && cp < 0x109000))
1024                 std::printf("Codepoint %d U+%x noted : probably invalid in %s\n",
1025                             cp, cp, f);
1026             fontkey[charcount] = fontnum;
1027             codepoint[charcount] = cp;
1028             width[charcount] = wid;
1029             std::strcpy(uninames[charcount], unn);
1030             llx[charcount] = bb1;
1031             lly[charcount] = bb2;
1032             urx[charcount] = bb3;
1033             ury[charcount] = bb4;
1034             charcount++;
1035         }
1036         std::fclose(src);
1037     }
1038     std::printf("About to resolve kern and ligature names\n");
1039     std::printf("nkerns = %d nligatures = %d\n", nkerns, nligatures);
1040     for (i=0; i<nkerns; i++)
1041         kernstartcode[i] = decodename(kernfont[i], kernstart[i]);
1042     for (i=0; i<nkerns; i++)
1043         kernfollowcode[i] = decodename(kernfont[i], kernfollow[i]);
1044     for (i=0; i<nligatures; i++)
1045         ligfollowcode[i] = decodename(ligfont[i], ligfollow[i]);
1046     for (i=0; i<nligatures; i++)
1047         ligreplacementcode[i] = decodename(ligfont[i], ligreplacement[i]);
1048 
1049 // Now I will try to do something about the topcentre table...
1050     for (i=0; i<accentp; i++)
1051         accentnum[i] = decodename(F_Math, accentname[i]);
1052     std::printf("Accent position tables processed\n");
1053     for (i=0; i<variantp; i++)
1054     {   variantcode[i] = decodename(F_Math, variantname[i]) |
1055                          (variantdirection[i] << 21);
1056         if (v1[i][0] != 0) nv1[i] = decodename(F_Math, v1[i]);
1057         else nv1[i] = 0;
1058         if (v2[i][0] != 0) nv2[i] = decodename(F_Math, v2[i]);
1059         else nv2[i] = 0;
1060         if (v3[i][0] != 0) nv3[i] = decodename(F_Math, v3[i]);
1061         else nv3[i] = 0;
1062         if (v4[i][0] != 0) nv4[i] = decodename(F_Math, v4[i]);
1063         else nv4[i] = 0;
1064         if (v5[i][0] != 0) nv5[i] = decodename(F_Math, v5[i]);
1065         else nv5[i] = 0;
1066         if (P1[i][0] != 0) np1[i] = decodename(F_Math, P1[i]);
1067         else np1[i] = 0;
1068         if (P2[i][0] != 0) np2[i] = decodename(F_Math, P2[i]);
1069         else np2[i] = 0;
1070         if (P3[i][0] != 0) np3[i] = decodename(F_Math, P3[i]);
1071         else np3[i] = 0;
1072         if (P4[i][0] != 0) np4[i] = decodename(F_Math, P4[i]);
1073         else np4[i] = 0;
1074         if (P5[i][0] != 0) np5[i] = decodename(F_Math, P5[i]);
1075         else np5[i] = 0;
1076     }
1077     std::printf("Larger symbols tables processed\n");
1078 
1079 // Now I have read everything.
1080 //
1081 // Before I fill in the main hash table I need to collect kern and ligature
1082 // information.
1083     kernp = ligp = 0;
1084     for (fontnum=0; fontnum<F_end; fontnum++)
1085     {   fontkern[fontnum] = kernp-1;
1086         for (i=0; i<charcount; i++)
1087         {   int j, v = 0, kkk = 0;
1088 // I wish to process all chars from each font in order. In fact they will
1089 // be in my table that way, but I still code things to scan once for each font.
1090             if (fontkey[i] != fontnum) continue;
1091 // Now I will transfer any ligature and kern info about this character
1092 // into kerntable & ligtable.
1093             for (j=0; j<nkerns; j++)
1094             {   if (kernfont[j] == fontnum &&
1095                     kernstartcode[j] == codepoint[i])
1096                 {   std::strcpy(ktstart[kernp], kernstart[j]);
1097                     std::strcpy(ktfollow[kernp], kernfollow[j]);
1098                     ktadjustment[kernp] = kernadjustment[j];
1099                     std::strcpy(ktfont[kernp], fontnames[fontnum]);
1100                     ktfontn[kernp] = fontnum;
1101 // kkk will be the index in the kernel tables of the FIRST item
1102 // relating to this start character. It has 0x80000000 forced in so
1103 // that it is a nonzero value even if the kernel table index is zero.
1104                     if (kkk == 0) kkk = kernp | 0x80000000;
1105                     kerntable[kernp++] =
1106                         (kernadjustment[j]<<23) | kernfollowcode[j];
1107                     v = 1;
1108                 }
1109             }
1110             for (j=0; j<nligatures; j++)
1111             {   if (ligfont[j] == fontnum &&
1112                     ligstart[j] == codepoint[i])
1113                 {   std::strcpy(ktstart[kernp], uninames[i]);
1114                     std::strcpy(ktfollow[kernp], ligfollow[j]);
1115                     ktadjustment[kernp] = 9999;
1116                     std::strcpy(ktfont[kernp], fontnames[fontnum]);
1117                     ktfontn[kernp] = fontnum;
1118                     if (kkk == 0) kkk = kernp | 0x80000000;
1119                     kerntable[kernp++] =
1120                         (ligp<<23) | IS_LIGATURE | ligfollowcode[j];
1121                     std::strcpy(ltfirst[ligp], uninames[i]);
1122                     std::strcpy(ltfollow[ligp], ligfollow[j]);
1123                     std::strcpy(ltname[ligp], ligreplacement[j]);
1124                     std::strcpy(ltfont[ligp], fontnames[fontnum]);
1125                     ligtable[ligp++] = ligreplacementcode[j];
1126                     v = 1;
1127                 }
1128             }
1129 // v was set if I found at least one kern or ligature entry starting
1130 // with this character. Noticing that here means I can look back and
1131 // be certain that the previous block just ended.
1132             if (v && kernp!=0) kerntable[kernp-1] |= IS_BLOCKEND;
1133             kernreference[i] = kkk;
1134         }
1135     }
1136 // Make really certain that the table is terminated.
1137     if (kernp!=0) kerntable[kernp-1] |= IS_BLOCKEND;
1138 
1139     std::printf("charcount = %d\n", charcount);
1140 
1141 // Well because it will be a cheaper process I will set up the small hash-
1142 // tables for accent placement and large-characters first...
1143 #if 0
1144     for (i=0; i<accentp; i++)
1145         std::printf("    %#.8x,\n", accentnum[i]);
1146 #endif
1147 
1148     std::printf("About to do topaccent table creation with %d keys\n",
1149                 accentp);
1150     cuckoo_parameters topcentre_r =
1151         cuckoo_binary_optimise(
1152             accentnum,
1153             accentp,
1154             accent_importance,
1155             topcentre,
1156             sizeof(topcentre[0]),
1157             accentp-1,
1158             sizeof(topcentre)/sizeof(topcentre[0]),
1159             accent_get,
1160             accent_set,
1161             1.0);
1162     std::printf("Table size = %d (%d %d)\n", topcentre_r.table_size,
1163                 topcentre_r.modulus2, topcentre_r.offset2);
1164 #ifndef DUMMY
1165     std::printf("Now put in accent positions\n");
1166     for (i=0; i<accentp; i++)
1167     {   int w = cuckoo_lookup(
1168                     accentnum[i],
1169                     topcentre,
1170                     sizeof(topcentre[0]),
1171                     topcentre_r.table_size,
1172                     accent_get,
1173                     topcentre_r.modulus2,
1174                     topcentre_r.offset2);
1175         if (w == -1)
1176         {   std::printf("failure of lookup in topaccent table!\n");
1177             std::printf("%d: %d/%x\n", i, accentnum[i], accentnum[i]);
1178             for (i=0; i<topcentre_r.table_size; i++)
1179                 std::printf("%4d: %x\n", i, topcentre[i]);
1180             std::exit(1);
1181         }
1182         topcentre[w] |= accentval[i] << 21;
1183     }
1184     std::printf("top-centre table set up with %d words for %d chars (%.2f)\n",
1185                 topcentre_r.table_size, accentp,
1186                 (100.0*accentp)/topcentre_r.table_size);
1187 #endif
1188 
1189     cuckoo_parameters variant_r;
1190     int usefulp = 0;
1191 // I will only put characters that actually have variants in here
1192     {   uint32_t usefulcode[MAXMATHSYMS];
1193         for (i=0; i<variantp; i++)
1194         {   if (nv1[i] != 0 || nv2[i] != 0 || nv3[i] != 0 ||
1195                 nv4[i] != 0 || nv5[i] != 0)
1196                 usefulcode[usefulp++] = (uint32_t)variantcode[i];
1197         }
1198         variant_r = cuckoo_binary_optimise(
1199                         usefulcode,
1200                         usefulp,
1201                         variant_importance,
1202                         variant_table,
1203                         sizeof(variant_table[0]),
1204                         usefulp-1,
1205                         sizeof(variant_table)/sizeof(variant_table[0]),
1206                         variant_get,
1207                         variant_set,
1208                         0.0);
1209         std::printf("Variant table size = %d (%d %d)\n", variant_r.table_size,
1210                     variant_r.modulus2, variant_r.offset2);
1211     }
1212 #ifndef DUMMY
1213     std::printf("Now put in variant info for (, ), [, ] etc.\n");
1214     for (i=0; i<variantp; i++)
1215     {   int w;
1216 // If there are no variants then do not bother!
1217         if (nv1[i] == 0 && nv2[i] == 0 && nv3[i] == 0 &&
1218             nv4[i] == 0 && nv5[i] == 0) continue;
1219         w = cuckoo_lookup(
1220                 variantcode[i],
1221                 variant_table,
1222                 sizeof(variant_table[0]),
1223                 variant_r.table_size,
1224                 variant_get,
1225                 variant_r.modulus2,
1226                 variant_r.offset2);
1227         if (w == -1)
1228         {   std::printf("failure of lookup in variant table!\n");
1229             std::printf("%d: %d/%x\n", i, variantcode[i], variantcode[i]);
1230             for (i=0; i<variant_r.table_size; i++)
1231                 std::printf("%4d: %" PRIx32 "\n", i, variant_table[i][0]);
1232             std::exit(1);
1233         }
1234 // Put the five gradually larger variants of the character in place. These
1235 // are stored in a really simple way since the total amount of data involved
1236 // is not huge. Maybe the only thing to note here is that variable_table[*][0]
1237 // has the codepoint of the basic character with 0x00200000 added in if the
1238 // variants will be for horiziontal use (eg gradually wider circumflex
1239 // accents), rather than for vertical use (eg progressivly taller parentheses).
1240 // When a size is not provided the entry with contain U+0000.
1241         if (variant_table[w][0] != variantcode[i])
1242         {   std::printf("Messed up at line %d\n", __LINE__);
1243             std::exit(1);
1244         }
1245         variant_table[w][1] |= nv1[i];
1246         variant_table[w][2] |= nv2[i];
1247         variant_table[w][3] |= nv3[i];
1248         variant_table[w][4] |= nv4[i];
1249         variant_table[w][5] |= nv5[i];
1250     }
1251     std::printf("variant table set up with %d entries for %d chars (%.2f%%)\n",
1252                 variant_r.table_size, usefulp,
1253                 (100.0*usefulp)/variant_r.table_size);
1254 #endif
1255 
1256     cuckoo_parameters extension_r;
1257     usefulp = 0;
1258 // I will only put characters that actually have variants in here
1259     {   uint32_t usefulcode[MAXMATHSYMS];
1260         for (i=0; i<variantp; i++)
1261         {   if (np1[i] != 0 || np2[i] != 0 || np3[i] != 0 ||
1262                 np4[i] != 0 || np5[i] != 0)
1263                 usefulcode[usefulp++] = (uint32_t)variantcode[i];
1264         }
1265         extension_r = cuckoo_binary_optimise(
1266                           usefulcode,
1267                           usefulp,
1268                           extension_importance,
1269                           extension_table,
1270                           sizeof(extension_table[0]),
1271                           usefulp-1,
1272                           sizeof(extension_table)/sizeof(extension_table[0]),
1273                           extension_get,
1274                           extension_set,
1275                           0.0);
1276         std::printf("Extension table size = %d (%d %d)\n",
1277                     extension_r.table_size,
1278                     extension_r.modulus2, extension_r.offset2);
1279     }
1280 #ifndef DUMMY
1281     std::printf("Now put in extension info for (, ), [, ] etc.\n");
1282     for (i=0; i<variantp; i++)
1283     {   int w;
1284 // If there are no extensions then do not bother!
1285         if (np1[i] == 0 && np2[i] == 0 && np3[i] == 0 &&
1286             np4[i] == 0 && np5[i] == 0) continue;
1287         w = cuckoo_lookup(
1288                 variantcode[i],
1289                 extension_table,
1290                 sizeof(extension_table[0]),
1291                 extension_r.table_size,
1292                 extension_get,
1293                 extension_r.modulus2,
1294                 extension_r.offset2);
1295         if (w == -1)
1296         {   std::printf("failure of lookup in extension table!\n");
1297             std::printf("%d: %d/%x\n", i, variantcode[i], variantcode[i]);
1298             for (i=0; i<extension_r.table_size; i++)
1299                 std::printf("%4d: %" PRIx32 "\n", i, extension_table[i][0]);
1300             std::exit(1);
1301         }
1302 // Put the components used to build up huge characters into the table.
1303 // When one is not provided the entry with contain U+0000.
1304         if (extension_table[w][0] != variantcode[i])
1305         {   std::printf("Messed up at line %d\n", __LINE__);
1306             std::exit(1);
1307         }
1308         extension_table[w][1] |= np1[i] | (vdata1[i][0]<<21);
1309         extension_table[w][2] |= vdata1[i][1] | (vdata1[i][2]<<16) |
1310                                  (vdata1[i][3]<<31);
1311         extension_table[w][3] |= np2[i] | (vdata2[i][0]<<21);
1312         extension_table[w][4] |= vdata2[i][1] | (vdata2[i][2]<<16) |
1313                                  (vdata2[i][3]<<31);
1314         extension_table[w][5] |= np3[i] | (vdata3[i][0]<<21);
1315         extension_table[w][6] |= vdata3[i][1] | (vdata3[i][2]<<16) |
1316                                  (vdata3[i][3]<<31);
1317         extension_table[w][7] |= np4[i] | (vdata4[i][0]<<21);
1318         extension_table[w][8] |= vdata4[i][1] | (vdata4[i][2]<<16) |
1319                                  (vdata4[i][3]<<31);
1320         extension_table[w][9] |= np5[i] | (vdata5[i][0]<<21);
1321         extension_table[w][10]|= vdata5[i][1] | (vdata5[i][2]<<16) |
1322                                  (vdata5[i][3]<<31);
1323     }
1324     std::printf("extension table set up with %d entries for %d chars (%.2f%%)\n",
1325                 extension_r.table_size, usefulp,
1326                 (100.0*usefulp)/extension_r.table_size);
1327 #endif
1328 
1329 //==========================================================================
1330 // (2) Try inserting everything in to the main metrics hash table
1331 //==========================================================================
1332 
1333 // I will remove duplicate keys here first... I will cheerfully use
1334 // a quadratic cost filtering process here because other things are so much
1335 // more expensive.
1336     mainkeycount = 0;
1337     for (i=0; i<charcount; i++)
1338     {   int j;
1339         uint32_t k = pack_character(fontkey[i], codepoint[i]) >> 2;
1340         if (k == 0) continue;
1341         for (j=0; j<mainkeycount; j++)
1342             if (k == mainkey[j]) break;
1343         if (j<mainkeycount) continue;   // discard repeat key.
1344         mainkey[mainkeycount++] = k;
1345     }
1346 
1347     std::printf("About to try to optimise for %d entries\n",
1348                 mainkeycount);
1349 
1350 // In my case there are 10019 keys to consider, If I do a proper search
1351 // that can take quite a while - say 45 minutes on a reasonably fast desktop
1352 // system. So as I cunning ploy I will first try the parameters that
1353 // are a known solution, and if nothing has changed at all that will
1354 // succeed (very rapidly) and I can use it. If that fails I will drop back
1355 // to the more expensive search
1356 
1357 //#define EXPECTED_TABLESIZE   10057
1358 //#define EXPECTED_MODULUS2     8729
1359 //#define EXPECTED_OFFSET2      1108
1360 
1361     cuckoo_parameters main_r;
1362     double mm;
1363     std::printf("static uint32_t keys[] = \n{\n   ");
1364     for (i=0; i<mainkeycount; i++)
1365     {   std::printf("%#8x%s", mainkey[i], (i==(mainkeycount-1)?"":","));
1366         if (i % 8 == 7) std::printf("\n   ");
1367     }
1368     std::printf("\n};\n\n");
1369 
1370 #ifndef DUMMY
1371 // If the Hungarian method shows that there is an assignment with
1372 // exactly my expected parameters that meets my target merit then I
1373 // will just accept it.
1374     if ((mm = find_best_assignment(
1375                   mainkey,
1376                   mainkeycount,
1377                   main_importance,
1378                   uint32hashtable,
1379                   EXPECTED_TABLESIZE,
1380                   EXPECTED_MODULUS2,
1381                   EXPECTED_OFFSET2)) > 0.0 &&
1382         mm <= TARGET_MERIT)
1383     {   main_r.table_size = EXPECTED_TABLESIZE;
1384         main_r.modulus2 = EXPECTED_MODULUS2;
1385         main_r.offset2 = EXPECTED_OFFSET2;
1386         main_r.merit = mm;
1387 // Transfer allocation to the main hash table
1388         for (i=0; i<main_r.table_size; i++)
1389             main_set(&hashtable[i], uint32hashtable[i]);
1390         std::printf("Built-in table parameters successfully used\n");
1391     }
1392 // Otherwise if there is an assignment at that table size I do not need to
1393 // do elaborate searches to identify it, but I will want to run the Hungarian
1394 // algorithm for all possible values of modulus2 and offset2 at that table
1395 // size.
1396     else if (mm > 0.0)
1397     {   main_r.table_size = EXPECTED_TABLESIZE;
1398         main_r.modulus2 = EXPECTED_MODULUS2;
1399         main_r.offset2 = EXPECTED_OFFSET2;
1400         main_r.merit = mm;
1401     }
1402     else
1403 #endif // DUMMY
1404     {   main_r = cuckoo_binary_optimise(
1405                      mainkey,
1406                      mainkeycount,
1407                      main_importance,
1408                      hashtable,
1409                      sizeof(hashtable[0]),
1410                      MAIN_LOW,
1411                      MAIN_HIGH,
1412                      main_get,
1413                      main_set,
1414                      TARGET_MERIT);
1415         main_r.merit = 4.0;
1416     }
1417     std::printf("Whooeeee! %d %d %d %.2f%%  merit=%.4f\n",
1418                 main_r.table_size, main_r.modulus2, main_r.offset2,
1419                 (100.0*mainkeycount)/main_r.table_size, main_r.merit);
1420 
1421 // Now unless the current merit is good enough I will optimise by
1422 // trying all possible hash options at this table size.
1423 
1424     if (main_r.merit > TARGET_MERIT)
1425         main_r = try_all_hash_functions(
1426                      mainkey,
1427                      mainkeycount,
1428                      main_importance,
1429                      hashtable,
1430                      sizeof(hashtable[0]),
1431                      main_r.table_size,
1432                      main_set,
1433                      1);
1434 
1435 //=====================================================================
1436 // Now the table should have everything in it and so I can merely fill
1437 // in the actual metric information
1438 //=====================================================================
1439 #ifndef DUMMY
1440     std::printf("\nNow I want to put data into the hash table.\n");
1441     for (i=0; i<charcount; i++)
1442     {   int fullkey = pack_character(fontkey[i],
1443                                      codepoint[i]); // 20-bit key
1444         int key = fullkey >> 2; // because my hash table has line-size 4
1445         if (codepoint[i] == 0) continue;
1446         int h1;
1447         uint64_t w;
1448         h1 = cuckoo_lookup(
1449                  key,
1450                  hashtable,
1451                  sizeof(hashtable[0]),
1452                  main_r.table_size,
1453                  main_get,
1454                  main_r.modulus2,
1455                  main_r.offset2);
1456         if (h1 == -1)
1457         {   int j;
1458             std::printf("failure at line %d!\n", __LINE__);
1459             std::printf("Problem with character %d font %d codepoint %d/%x\n",
1460                         i, fontkey[i], codepoint[i], codepoint[i]);
1461             std::printf("Full key = %d/%x key = %d/%x\n",
1462                         fullkey, fullkey, key, key);
1463             for (j=0; j<main_r.table_size; j++)
1464                 std::printf("%7d: %" PRIx64 "\n", j, hashtable[j][0]);
1465 
1466             std::exit(1);
1467         }
1468 
1469 // Pack and write in the messy information about width and bounding boxes.
1470         w = ((uint64_t)width[i] & 0x1fff) << 51 |
1471             ((uint64_t)(llx[i]+3000) & 0x1fff) << 38 |
1472             ((uint64_t)(lly[i]+1000) & 0x0fff) << 26 |
1473             ((uint64_t)(urx[i]+500) & 0x1fff) << 13 |
1474             ((uint64_t)(ury[i]+1000) & 0x1fff);
1475         hashtable[h1][1+(fullkey&3)] = w;
1476 // Finally merge in an offset to any kern info that might be available
1477         if (kernreference[i] != 0)
1478         {   int64_t q = (kernreference[i] & 0x7fffffff)-fontkern[fontkey[i]];
1479 #if 0
1480             std::printf("Fill in kern ref %d as %d\n",
1481                         kernreference[i] & 0x7fffffff, static_cast<int>(q));
1482 #endif
1483             hashtable[h1][0] |= q << (19+11*(fullkey&3));
1484         }
1485     }
1486 
1487     std::printf("Done after %d characters, %d ligatures, %d kerns\n",
1488                 charcount, nligatures, nkerns);
1489     std::printf("width %d %d (%d)\n", minw, maxw, maxw-minw);
1490     std::printf("llx %d %d (%d)\n", minllx, maxllx, maxllx-minllx);
1491     std::printf("lly %d %d (%d)\n", minlly, maxlly, maxlly-minlly);
1492     std::printf("urx %d %d (%d)\n", minurx, maxurx, maxurx-minurx);
1493     std::printf("ury %d %d (%d)\n", minury, maxury, maxury-minury);
1494 
1495     std::printf("Total space = %d\n", main_r.table_size*(5*8));
1496     p1 = 0;
1497     for (i=0; i<main_r.table_size; i++)
1498     {   if (hashtable[i][0] != 0) p1++;
1499     }
1500     std::printf("%d of %d entries (%d of %d bytes) used: %.4f\n",
1501                 p1, main_r.table_size, 40*p1, 40*main_r.table_size,
1502                 static_cast<double>(p1)/static_cast<double>(main_r.table_size));
1503 
1504     {   std::FILE *dest = std::fopen("charmetrics.h", "w");
1505         std::FILE *rdest = std::fopen("charmetrics.red", "w");
1506         std::FILE *smldest = std::fopen("charmetrics.sml", "w");
1507         std::fprintf(dest,
1508                      "// charmetrics.h                               Copyright (C) 2017 Codemist\n");
1509         std::fprintf(dest, "\n");
1510         std::fprintf(dest, "\n");
1511         std::fprintf(dest,
1512                      "/**************************************************************************\n");
1513         std::fprintf(dest,
1514                      " * Copyright (C) 2017, Codemist.                         A C Norman       *\n");
1515         std::fprintf(dest,
1516                      " *                                                                        *\n");
1517         std::fprintf(dest,
1518                      " * Redistribution and use in source and binary forms, with or without     *\n");
1519         std::fprintf(dest,
1520                      " * modification, are permitted provided that the following conditions are *\n");
1521         std::fprintf(dest,
1522                      " * met:                                                                   *\n");
1523         std::fprintf(dest,
1524                      " *                                                                        *\n");
1525         std::fprintf(dest,
1526                      " *     * Redistributions of source code must retain the relevant          *\n");
1527         std::fprintf(dest,
1528                      " *       copyright notice, this list of conditions and the following      *\n");
1529         std::fprintf(dest,
1530                      " *       disclaimer.                                                      *\n");
1531         std::fprintf(dest,
1532                      " *     * Redistributions in binary form must reproduce the above          *\n");
1533         std::fprintf(dest,
1534                      " *       copyright notice, this list of conditions and the following      *\n");
1535         std::fprintf(dest,
1536                      " *       disclaimer in the documentation and/or other materials provided  *\n");
1537         std::fprintf(dest,
1538                      " *       with the distribution.                                           *\n");
1539         std::fprintf(dest,
1540                      " *                                                                        *\n");
1541         std::fprintf(dest,
1542                      " * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS    *\n");
1543         std::fprintf(dest,
1544                      " * \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT      *\n");
1545         std::fprintf(dest,
1546                      " * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS      *\n");
1547         std::fprintf(dest,
1548                      " * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE         *\n");
1549         std::fprintf(dest,
1550                      " * COPYRIGHT OWNERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,   *\n");
1551         std::fprintf(dest,
1552                      " * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,   *\n");
1553         std::fprintf(dest,
1554                      " * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS  *\n");
1555         std::fprintf(dest,
1556                      " * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND *\n");
1557         std::fprintf(dest,
1558                      " * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR  *\n");
1559         std::fprintf(dest,
1560                      " * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF     *\n");
1561         std::fprintf(dest,
1562                      " * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH   *\n");
1563         std::fprintf(dest,
1564                      " * DAMAGE.                                                                *\n");
1565         std::fprintf(dest,
1566                      " *************************************************************************/\n");
1567         std::fprintf(dest, "\n");
1568         std::fprintf(dest,
1569                      "// $Id: charmetrics.cpp 5736 2021-03-16 10:41:22Z arthurcnorman $\n");
1570         std::fprintf(dest, "\n");
1571         std::fprintf(dest, "\n");
1572         std::fprintf(dest, "#ifndef __STDC_CONSTANT_MACROS\n");
1573         std::fprintf(dest, "#define __STDC_CONSTANT_MACROS 1\n");
1574         std::fprintf(dest, "#endif\n");
1575         std::fprintf(dest, "\n#include <cstdint>\n\n");
1576         std::fprintf(dest,
1577                      "// Character metric hash table created using the program charmetrics.cpp\n");
1578         std::fprintf(dest,
1579                      "// sourceforge.net/p/reduce-algebra/code/HEAD/tree/trunk/csl/cslbase/wxfontxs\n");
1580         std::fprintf(dest,
1581                      "// contains README files with full credits to the fonts this is used with\n");
1582         std::fprintf(dest, "\n\n");
1583         std::fprintf(dest,
1584                      "// The list of font codes here must be kept in step with the list\n");
1585         std::fprintf(dest, "// of names in the table.\n");
1586         std::fprintf(dest, "\n");
1587         std::fprintf(dest, "#define F_cmuntt                      0\n");
1588         std::fprintf(dest, "#define F_odokai                      1\n");
1589         std::fprintf(dest, "#define F_Regular                     2\n");
1590         std::fprintf(dest, "#define F_Bold                        3\n");
1591         std::fprintf(dest, "#define F_Italic                      4\n");
1592         std::fprintf(dest, "#define F_BoldItalic                  5\n");
1593         std::fprintf(dest, "#define F_Math                        6\n");
1594         std::fprintf(dest, "#define F_end                         7\n");
1595         std::fprintf(dest, "\n");
1596         std::fprintf(dest,
1597                      "extern int c_width, c_llx, c_lly, c_urx, c_ury, c_kerninfo;\n");
1598         std::fprintf(dest,
1599                      "extern int lookupchar(int fontnum, int codepoint);\n");
1600         std::fprintf(dest,
1601                      "extern int32_t lookupkernandligature(int codepoint);\n");
1602         std::fprintf(dest,
1603                      "extern int32_t lookupkernadjustment(int codepoint);\n");
1604         std::fprintf(dest, "extern int32_t lookupligature(int codepoint);\n");
1605         std::fprintf(dest, "extern int accentposition(int codepoint);\n\n");
1606         std::fprintf(dest, "extern const uint16_t chardepth_WIN32[31];\n");
1607         std::fprintf(dest, "extern const uint16_t chardepth_X11[31];\n");
1608         std::fprintf(dest, "extern const uint16_t chardepth_OSX[31];\n");
1609         std::fprintf(dest, "extern const uint16_t *chardepth;\n");
1610         std::fprintf(dest, "extern const char *fontnames[31];\n\n");
1611         std::fprintf(rdest,
1612                      "%% Character metrics for the STIX (and some other) fonts...\n");
1613         std::fprintf(rdest, "\n");
1614         std::fprintf(rdest,
1615                      "%% Character metric hash table created using the program charmetrics.cpp\n");
1616         std::fprintf(rdest,
1617                      "%% sourceforge.net/p/reduce-algebra/code/HEAD/tree/trunk/csl/cslbase/wxfonts\n");
1618         std::fprintf(rdest,
1619                      "%% contains README files with full credits to the fonts this is used with\n");
1620         std::fprintf(rdest, "%% Author: Arthur Norman\n");
1621         std::fprintf(rdest, "\n");
1622         std::fprintf(rdest,
1623                      "%% Redistribution and use in source and binary forms, with or without\n");
1624         std::fprintf(rdest,
1625                      "%% modification, are permitted provided that the following conditions are met:\n");
1626         std::fprintf(rdest, "%%\n");
1627         std::fprintf(rdest,
1628                      "%%    * Redistributions of source code must retain the relevant copyright\n");
1629         std::fprintf(rdest,
1630                      "%%      notice, this list of conditions and the following disclaimer.\n");
1631         std::fprintf(rdest,
1632                      "%%    * Redistributions in binary form must reproduce the above copyright\n");
1633         std::fprintf(rdest,
1634                      "%%      notice, this list of conditions and the following disclaimer in the\n");
1635         std::fprintf(rdest,
1636                      "%%      documentation and/or other materials provided with the distribution.\n");
1637         std::fprintf(rdest, "%%\n");
1638         std::fprintf(rdest,
1639                      "%% THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\n");
1640         std::fprintf(rdest,
1641                      "%% AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,\n");
1642         std::fprintf(rdest,
1643                      "%% THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR\n");
1644         std::fprintf(rdest,
1645                      "%% PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNERS OR\n");
1646         std::fprintf(rdest, "%% CONTRIBUTORS\n");
1647         std::fprintf(rdest,
1648                      "%% BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\n");
1649         std::fprintf(rdest,
1650                      "%% CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF\n");
1651         std::fprintf(rdest,
1652                      "%% SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS\n");
1653         std::fprintf(rdest,
1654                      "%% INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN\n");
1655         std::fprintf(rdest,
1656                      "%% CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)\n");
1657         std::fprintf(rdest,
1658                      "%% ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE\n");
1659         std::fprintf(rdest, "%% POSSIBILITY OF SUCH DAMAGE.\n");
1660         std::fprintf(rdest, "%%\n");
1661         std::fprintf(rdest, "\n");
1662         std::fprintf(rdest,
1663                      "%% Also be aware of the (generally permissive) licenses associated with the\n");
1664         std::fprintf(rdest,
1665                      "%% fonts. Fill README files and license terms for the fonts themselves\n");
1666         std::fprintf(rdest, "%% are in csl/cslbase/wxfonts.\n");
1667         std::fprintf(rdest, "\n");
1668         std::fprintf(rdest, "\n");
1669         std::fprintf(rdest,
1670                      "%% $Id: charmetrics.cpp 5736 2021-03-16 10:41:22Z arthurcnorman $\n");
1671         std::fprintf(rdest, "\n");
1672         std::fprintf(rdest,
1673                      "#if (or (memq 'psl lispsystem!*) (memq 'jlisp lispsystem!*))\n");
1674         std::fprintf(rdest, "\n");
1675         std::fprintf(rdest,
1676                      "%% CSL has special vectors that hold just 16-bit integers and 32-bit\n");
1677         std::fprintf(rdest,
1678                      "%% integers and use of those will decrease the amount of memory consumed\n");
1679         std::fprintf(rdest,
1680                      "%% here. However if PSL does not have these it does not matter much since I\n");
1681         std::fprintf(rdest, "%% can just use ordinary Lisp vectors...\n");
1682         std::fprintf(rdest,
1683                      "%% I set initial contents as all 0 rather than all nil since these are\n");
1684         std::fprintf(rdest,
1685                      "%% supposed to contain (small) integer values.\n");
1686         std::fprintf(rdest, "\n");
1687         std::fprintf(rdest, "symbolic procedure mkvect32 n;\n");
1688         std::fprintf(rdest, "  begin\n");
1689         std::fprintf(rdest, "    scalar r;\n");
1690         std::fprintf(rdest, "    r := mkvect n;\n");
1691         std::fprintf(rdest, "    for i := 0:n do putv(r, i, 0);\n");
1692         std::fprintf(rdest, "    return r\n");
1693         std::fprintf(rdest, "  end;\n");
1694         std::fprintf(rdest, "\n");
1695         std::fprintf(rdest,
1696                      "symbolic inline procedure putv32(v, n, x); putv(v, n, x);\n");
1697         std::fprintf(rdest, "\n");
1698         std::fprintf(rdest,
1699                      "symbolic inline procedure getv32(v, n); getv(v, n);\n");
1700         std::fprintf(rdest, "\n");
1701         std::fprintf(rdest, "symbolic procedure mkvect16 n;\n");
1702         std::fprintf(rdest, "  begin\n");
1703         std::fprintf(rdest, "    scalar r;\n");
1704         std::fprintf(rdest, "    r := mkvect n;\n");
1705         std::fprintf(rdest, "    for i := 0:n do putv(r, i, 0);\n");
1706         std::fprintf(rdest, "    return r\n");
1707         std::fprintf(rdest, "  end;\n");
1708         std::fprintf(rdest, "\n");
1709         std::fprintf(rdest,
1710                      "symbolic inline procedure putv16(v, n, x); putv(v, n, x);\n");
1711         std::fprintf(rdest, "\n");
1712         std::fprintf(rdest,
1713                      "symbolic inline procedure getv16(v, n); getv(v, n);\n");
1714         std::fprintf(rdest, "\n");
1715         std::fprintf(rdest, "#endif\n");
1716         std::fprintf(rdest, "\n");
1717         std::fprintf(rdest,
1718                      "put('cmuntt, 'font_number,                      0)$\n");
1719         std::fprintf(rdest,
1720                      "put('odokai, 'font_number,                      1)$\n");
1721         std::fprintf(rdest,
1722                      "put('Regular, 'font_number,                     2)$\n");
1723         std::fprintf(rdest,
1724                      "put('Bold, 'font_number,                        3)$\n");
1725         std::fprintf(rdest,
1726                      "put('Italic, 'font_number,                      4)$\n");
1727         std::fprintf(rdest,
1728                      "put('BoldItalic, 'font_number,                  5)$\n");
1729         std::fprintf(rdest,
1730                      "put('Math, 'font_number,                        6)$\n");
1731         std::fprintf(rdest, "\n");
1732         std::fprintf(rdest, "symbolic procedure list_to_vec16 l;\n");
1733         std::fprintf(rdest, "  begin\n");
1734         std::fprintf(rdest, "    scalar r, n;\n");
1735         std::fprintf(rdest, "    r := mkvect16 (n := sub1 length l);\n");
1736         std::fprintf(rdest, "    for i := 0:n do <<\n");
1737         std::fprintf(rdest, "       putv16(r, i, car l);\n");
1738         std::fprintf(rdest, "       l := cdr l >>;\n");
1739         std::fprintf(rdest, "    return r\n");
1740         std::fprintf(rdest, "  end;\n");
1741         std::fprintf(rdest, "\n");
1742         std::fprintf(rdest, "symbolic procedure list_to_vec32 l;\n");
1743         std::fprintf(rdest, "  begin\n");
1744         std::fprintf(rdest, "    scalar r, n;\n");
1745         std::fprintf(rdest, "    r := mkvect32 (n := sub1 length l);\n");
1746         std::fprintf(rdest, "    for i := 0:n do <<\n");
1747         std::fprintf(rdest, "       putv32(r, i, car l);\n");
1748         std::fprintf(rdest, "       l := cdr l >>;\n");
1749         std::fprintf(rdest, "    return r\n");
1750         std::fprintf(rdest, "  end;\n");
1751         std::fprintf(rdest, "\n");
1752         std::fprintf(rdest,
1753                      "%% This one will take a list whose elements are themselves lists\n");
1754         std::fprintf(rdest, "%% of 32-bit integers.\n");
1755         std::fprintf(rdest, "%%\n");
1756         std::fprintf(rdest, "symbolic procedure list_to_metric_table l;\n");
1757         std::fprintf(rdest, "  begin\n");
1758         std::fprintf(rdest, "    scalar r, n;\n");
1759         std::fprintf(rdest, "    r := mkvect (n := sub1 length l);\n");
1760         std::fprintf(rdest, "    for i := 0:n do <<\n");
1761         std::fprintf(rdest, "       putv(r, i, list_to_vec32 car l);\n");
1762         std::fprintf(rdest, "       l := cdr l >>;\n");
1763         std::fprintf(rdest, "    return r\n");
1764         std::fprintf(rdest, "  end;\n");
1765         std::fprintf(rdest, "\n");
1766         std::fprintf(rdest,
1767                      "fluid '(hashsize!* metrics_hash!* topcentre_hash!* variant_hash!* extension_hash!* fontkern!* kerntable!* ligaturetable!*);\n");
1768         std::fprintf(rdest, "\n");
1769         std::fprintf(rdest, "symbolic (hashsize!* := %d);\n",
1770                      main_r.table_size);
1771         std::fprintf(rdest, "\n");
1772         std::fprintf(smldest,
1773                      "(* Character metrics for the STIX (and some other) fonts...\n");
1774         std::fprintf(smldest, "\n");
1775         std::fprintf(smldest,
1776                      "Character metric hash table created using the program charmetrics.cpp\n");
1777         std::fprintf(smldest,
1778                      "sourceforge.net/p/reduce-algebra/code/HEAD/tree/trunk/csl/cslbase/wxfonts\n");
1779         std::fprintf(smldest,
1780                      "contains README files with full credits to the fonts this is used with\n");
1781         std::fprintf(smldest, "Author: Arthur Norman\n");
1782         std::fprintf(smldest, "\n");
1783         std::fprintf(smldest,
1784                      "Redistribution and use in source and binary forms, with or without\n");
1785         std::fprintf(smldest,
1786                      "modification, are permitted provided that the following conditions are met:\n");
1787         std::fprintf(smldest, "\n");
1788         std::fprintf(smldest,
1789                      "   * Redistributions of source code must retain the relevant copyright\n");
1790         std::fprintf(smldest,
1791                      "     notice, this list of conditions and the following disclaimer.\n");
1792         std::fprintf(smldest,
1793                      "   * Redistributions in binary form must reproduce the above copyright\n");
1794         std::fprintf(smldest,
1795                      "     notice, this list of conditions and the following disclaimer in the\n");
1796         std::fprintf(smldest,
1797                      "     documentation and/or other materials provided with the distribution.\n");
1798         std::fprintf(smldest, "\n");
1799         std::fprintf(smldest,
1800                      "THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\n");
1801         std::fprintf(smldest,
1802                      "AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,\n");
1803         std::fprintf(smldest,
1804                      "THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR\n");
1805         std::fprintf(smldest,
1806                      "PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNERS OR\n");
1807         std::fprintf(smldest, "CONTRIBUTORS\n");
1808         std::fprintf(smldest,
1809                      "BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\n");
1810         std::fprintf(smldest,
1811                      "CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF\n");
1812         std::fprintf(smldest,
1813                      "SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS\n");
1814         std::fprintf(smldest,
1815                      "INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN\n");
1816         std::fprintf(smldest,
1817                      "CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)\n");
1818         std::fprintf(smldest,
1819                      "ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE\n");
1820         std::fprintf(smldest, "POSSIBILITY OF SUCH DAMAGE.\n");
1821         std::fprintf(smldest, "\n");
1822         std::fprintf(smldest, "\n");
1823         std::fprintf(smldest,
1824                      "Also be aware of the (generally permissive) licenses associated with the\n");
1825         std::fprintf(smldest,
1826                      "fonts. Fill README files and license terms for the fonts themselves\n");
1827         std::fprintf(smldest, "are in csl/cslbase/wxfonts.\n");
1828         std::fprintf(smldest, "\n");
1829         std::fprintf(smldest, "\n");
1830         std::fprintf(smldest,
1831                      "$Id: charmetrics.cpp 5736 2021-03-16 10:41:22Z arthurcnorman $\n");
1832         std::fprintf(smldest, "\n*)\n\n");
1833         std::fprintf(smldest, "\n");
1834         std::fprintf(smldest, "val F_cmuntt     = 0;\n");
1835         std::fprintf(smldest, "val F_odokai     = 1;\n");
1836         std::fprintf(smldest, "val F_Regular    = 2;\n");
1837         std::fprintf(smldest, "val F_Bold       = 3;\n");
1838         std::fprintf(smldest, "val F_Italic     = 4;\n");
1839         std::fprintf(smldest, "val F_BoldItalic = 5;\n");
1840         std::fprintf(smldest, "val F_Math       = 6;\n");
1841         std::fprintf(smldest, "val F_end        = 7;\n");
1842         std::fprintf(smldest, "\n");
1843         std::fprintf(smldest, "fun font_number \"cmuntt\"     = F_cmuntt\n");
1844         std::fprintf(smldest, "  | font_number \"odokai\"     = F_odokai\n");
1845         std::fprintf(smldest, "  | font_number \"Regular\"    = F_Regular\n");
1846         std::fprintf(smldest, "  | font_number \"Bold\"       = F_Bold\n");
1847         std::fprintf(smldest, "  | font_number \"Italic\"     = F_Italic\n");
1848         std::fprintf(smldest,
1849                      "  | font_number \"BoldItalic\" = F_BoldItalic\n");
1850         std::fprintf(smldest, "  | font_number \"Math\"       = F_Math\n");
1851         std::fprintf(smldest, "  | font_number _            = 0;\n");
1852         std::fprintf(smldest, "\n");
1853         std::fprintf(smldest, "val hashsize = %d;\n", main_r.table_size);
1854         std::fprintf(smldest, "\n");
1855         std::fprintf(dest, "const uint64_t charmetrics[%d][5] = \n{",
1856                      main_r.table_size);
1857         std::fprintf(rdest,
1858                      "#eval (setq metrics_hash!* (list_to_metric_table '\n    (");
1859         std::fprintf(smldest, "val metrics_hash = Vector.fromList\n"
1860                      "   (map Vector.fromList [");
1861         for (i=0; i<main_r.table_size; i++)
1862         {   if (i != 0)
1863             {   std::fprintf(dest, ",");
1864                 std::fprintf(smldest, ",");
1865             }
1866             std::fprintf(dest,
1867                          "\n    {UINT64_C(0x%.16" PRIx64 "), UINT64_C(0x%.16" PRIx64
1868                          "), UINT64_C(0x%.16" PRIx64 "),"
1869                          "\n                                   UINT64_C(0x%.16" PRIx64
1870                          "), UINT64_C(0x%.16" PRIx64 ")}",
1871                          hashtable[i][0],
1872                          hashtable[i][1], hashtable[i][2],
1873                          hashtable[i][3], hashtable[i][4]);
1874             std::fprintf(rdest, "\n     (0x%.8" PRIx32 " 0x%.8" PRIx32 " 0x%.8"
1875                          PRIx32 " 0x%.8" PRIx32 " 0x%.8" PRIx32,
1876                          static_cast<int>(hashtable[i][0]),
1877                          static_cast<int>(hashtable[i][0]>>32),
1878                          static_cast<int>(hashtable[i][1]),
1879                          static_cast<int>(hashtable[i][1]>>32),
1880                          static_cast<int>(hashtable[i][2]));
1881             std::fprintf(rdest, "\n      0x%.8" PRIx32 " 0x%.8" PRIx32 " 0x%.8"
1882                          PRIx32 " 0x%.8" PRIx32 " 0x%.8" PRIx32 ")",
1883                          static_cast<int>(hashtable[i][2]>>32),
1884                          static_cast<int>(hashtable[i][3]),
1885                          static_cast<int>(hashtable[i][3]>>32),
1886                          static_cast<int>(hashtable[i][4]),
1887                          static_cast<int>(hashtable[i][4]>>32));
1888             std::fprintf(smldest,
1889                          "\n     [0x%.8" PRIx32 ", 0x%.8" PRIx32 ", 0x%.8" PRIx32 ", 0x%.8"
1890                          PRIx32 ", 0x%.8" PRIx32 ",",
1891                          static_cast<int>(hashtable[i][0]),
1892                          static_cast<int>(hashtable[i][0]>>32),
1893                          static_cast<int>(hashtable[i][1]),
1894                          static_cast<int>(hashtable[i][1]>>32),
1895                          static_cast<int>(hashtable[i][2]));
1896             std::fprintf(smldest,
1897                          "\n      0x%.8" PRIx32 ", 0x%.8" PRIx32 ", 0x%.8" PRIx32 ", 0x%.8"
1898                          PRIx32 ", 0x%.8" PRIx32 "]",
1899                          static_cast<int>(hashtable[i][2]>>32),
1900                          static_cast<int>(hashtable[i][3]),
1901                          static_cast<int>(hashtable[i][3]>>32),
1902                          static_cast<int>(hashtable[i][4]),
1903                          static_cast<int>(hashtable[i][4]>>32));
1904         }
1905         std::fprintf(dest, "\n};\n\n");
1906         std::fprintf(dest, "#define CHAR_METRICS_MODULUS %d\n",
1907                      main_r.modulus2);
1908         std::fprintf(dest, "#define CHAR_METRICS_OFFSET %d\n\n",
1909                      main_r.offset2);
1910         std::fprintf(rdest, "\n    )))\n\n");
1911         std::fprintf(smldest, "\n    ]);\n\n");
1912         std::fprintf(smldest, "val CHAR_METRICS_MODULUS = %d;\n",
1913                      main_r.modulus2);
1914         std::fprintf(smldest, "val CHAR_METRICS_OFFSET = %d;\n\n",
1915                      main_r.offset2);
1916         std::fprintf(dest, "const uint32_t topcentre[%d] = \n{",
1917                      topcentre_r.table_size);
1918         std::fprintf(rdest,
1919                      "#eval (setq topcentre_hash!* (list_to_vec32 '\n    (");
1920         std::fprintf(smldest, "val topcentre_hash = Vector.fromList [");
1921         for (i=0; i<topcentre_r.table_size; i++)
1922         {   if (i != 0)
1923             {   std::fprintf(dest, ",");
1924                 std::fprintf(smldest, ",");
1925             }
1926             std::fprintf(dest, "\n    UINT32_C(0x%.8" PRIx32 ")", topcentre[i]);
1927             std::fprintf(rdest, "\n     0x%.8" PRIx32, topcentre[i]);
1928             std::fprintf(smldest, "\n     0x%.8" PRIx32, topcentre[i]);
1929         }
1930         std::fprintf(dest, "\n};\n\n");
1931         std::fprintf(dest, "#define TOPCENTRE_MODULUS %d\n",
1932                      topcentre_r.modulus2);
1933         std::fprintf(dest, "#define TOPCENTRE_OFFSET %d\n\n",
1934                      topcentre_r.offset2);
1935         std::fprintf(rdest, "\n    )))\n\n");
1936         std::fprintf(smldest, "\n    ];\n\n");
1937         std::fprintf(smldest, "val TOPCENTRE_SIZE = %d;\n",
1938                      topcentre_r.table_size);
1939         std::fprintf(smldest, "val TOPCENTRE_MODULUS = %d;\n",
1940                      topcentre_r.modulus2);
1941         std::fprintf(smldest, "val TOPCENTRE_OFFSET = %d;\n\n",
1942                      topcentre_r.offset2);
1943         std::fprintf(dest, "const uint32_t variant_table[%d][6] = \n{",
1944                      variant_r.table_size);
1945         std::fprintf(rdest,
1946                      "#eval (setq variant_hash!* (list_to_metric_table '\n    (");
1947         std::fprintf(smldest, "val variant_hash = Vector.fromList\n"
1948                      "   (map Vector.fromList [");
1949         for (i=0; i<variant_r.table_size; i++)
1950         {   if (i != 0)
1951             {   std::fprintf(dest, ",");
1952                 std::fprintf(smldest, ",");
1953             }
1954             std::fprintf(dest,
1955                          "\n    {0x%.8x, 0x%.8x, 0x%.8x, 0x%.8x, 0x%.8x, 0x%.8x}",
1956                          variant_table[i][0], variant_table[i][1],
1957                          variant_table[i][2], variant_table[i][3],
1958                          variant_table[i][4], variant_table[i][5]);
1959             std::fprintf(rdest,
1960                          "\n     (0x%.8x 0x%.8x 0x%.8x 0x%.8x 0x%.8x 0x%.8x)",
1961                          variant_table[i][0], variant_table[i][1],
1962                          variant_table[i][2], variant_table[i][3],
1963                          variant_table[i][4], variant_table[i][5]);
1964             std::fprintf(smldest,
1965                          "\n     [0x%.8x, 0x%.8x, 0x%.8x, 0x%.8x, 0x%.8x, 0x%.8x]",
1966                          variant_table[i][0], variant_table[i][1],
1967                          variant_table[i][2], variant_table[i][3],
1968                          variant_table[i][4], variant_table[i][5]);
1969         }
1970         std::fprintf(dest, "\n};\n\n");
1971         std::fprintf(dest, "#define VARIANT_MODULUS %d\n",
1972                      variant_r.modulus2);
1973         std::fprintf(dest, "#define VARIANT_OFFSET %d\n\n",
1974                      variant_r.offset2);
1975         std::fprintf(rdest, "\n    )))\n\n");
1976         std::fprintf(smldest, "\n    ]);\n\n");
1977         std::fprintf(smldest, "val VARIANT_SIZE = %d;\n",
1978                      variant_r.table_size);
1979         std::fprintf(smldest, "val VARIANT_MODULUS = %d;\n",
1980                      variant_r.modulus2);
1981         std::fprintf(smldest, "val VARIANT_OFFSET = %d;\n\n",
1982                      variant_r.offset2);
1983         std::fprintf(dest, "const uint32_t extension_table[%d][11] = \n{",
1984                      extension_r.table_size);
1985         std::fprintf(rdest,
1986                      "#eval (setq extension_hash!* (list_to_metric_table '\n    (");
1987         std::fprintf(smldest, "val extension_hash = Vector.fromList\n"
1988                      "   (map Vector.fromList [");
1989         for (i=0; i<extension_r.table_size; i++)
1990         {   if (i != 0)
1991             {   std::fprintf(dest, ",");
1992                 std::fprintf(smldest, ",");
1993             }
1994             std::fprintf(dest,
1995                          "\n    {0x%.8x,\n     0x%.8x, 0x%.8x,\n     0x%.8x, 0x%.8x,\n"
1996                          "     0x%.8x, 0x%.8x,\n     0x%.8x, 0x%.8x,\n"
1997                          "     0x%.8x, 0x%.8x}",
1998                          extension_table[i][0], extension_table[i][1],
1999                          extension_table[i][2], extension_table[i][3],
2000                          extension_table[i][4], extension_table[i][5],
2001                          extension_table[i][6], extension_table[i][7],
2002                          extension_table[i][8], extension_table[i][9],
2003                          extension_table[i][10]);
2004             std::fprintf(rdest,
2005                          "\n     (0x%.8x 0x%.8x 0x%.8x 0x%.8x 0x%.8x 0x%.8x\n"
2006                          "                 0x%.8x 0x%.8x 0x%.8x 0x%.8x 0x%.8x)",
2007                          extension_table[i][0], extension_table[i][1],
2008                          extension_table[i][2], extension_table[i][3],
2009                          extension_table[i][4], extension_table[i][5],
2010                          extension_table[i][6], extension_table[i][7],
2011                          extension_table[i][8], extension_table[i][9],
2012                          extension_table[i][10]);
2013             std::fprintf(smldest,
2014                          "\n     [0x%.8x, 0x%.8x, 0x%.8x, 0x%.8x, 0x%.8x, 0x%.8x,\n"
2015                          "                 0x%.8x, 0x%.8x, 0x%.8x, 0x%.8x, 0x%.8x]",
2016                          extension_table[i][0], extension_table[i][1],
2017                          extension_table[i][2], extension_table[i][3],
2018                          extension_table[i][4], extension_table[i][5],
2019                          extension_table[i][6], extension_table[i][7],
2020                          extension_table[i][8], extension_table[i][9],
2021                          extension_table[i][10]);
2022         }
2023         std::fprintf(dest, "\n};\n\n");
2024         std::fprintf(dest, "#define EXTENSION_MODULUS %d\n",
2025                      extension_r.modulus2);
2026         std::fprintf(dest, "#define EXTENSION_OFFSET %d\n\n",
2027                      extension_r.offset2);
2028         std::fprintf(rdest, "\n    )))\n\n");
2029         std::fprintf(smldest, "\n    ]);\n\n");
2030         std::fprintf(smldest, "val EXTENSION_SIZE = %d;\n",
2031                      extension_r.table_size);
2032         std::fprintf(smldest, "val EXTENSION_MODULUS = %d;\n",
2033                      extension_r.modulus2);
2034         std::fprintf(smldest, "val EXTENSION_OFFSET = %d;\n\n",
2035                      extension_r.offset2);
2036         std::fprintf(dest, "const int16_t fontkern[] = \n{");
2037         std::fprintf(rdest, "#eval (setq fontkern!* (list_to_vec16 '\n    (");
2038         std::fprintf(smldest, "val fontkern = Vector.fromList [");
2039 // SML version not sorted yet...
2040         for (i=0; i<F_end; i++)
2041         {   int w = std::fprintf(dest, "\n    %d", fontkern[i]);
2042             if (i != F_end-1) std::fprintf(dest, ",");
2043             else std::fprintf(dest, " ");
2044             while (++w < 16) std::fprintf(dest, " ");
2045             w = std::fprintf(rdest, "\n    %d ", fontkern[i]);
2046             while (++w < 16) std::fprintf(rdest, " ");
2047             std::fprintf(dest, "// %s", fontnames[i]);
2048             if (fontkern[i] < 0)
2049                 w = std::fprintf(smldest, "\n    ~%d", -fontkern[i]);
2050             else w = std::fprintf(smldest, "\n    %d", fontkern[i]);
2051             if (i != F_end-1) std::fprintf(smldest, ",");
2052             else std::fprintf(smldest, " ");
2053             while (++w < 16) std::fprintf(smldest, " ");
2054             std::fprintf(smldest, "(* %s ", fontnames[i]);
2055             if (i != F_end-2 &&
2056                 fontkern[i+1] != fontkern[i])
2057                 std::fprintf(dest, " [%d items]", fontkern[i+1]-fontkern[i]);
2058             std::fprintf(rdest, "%% %s", fontnames[i]);
2059             if (i != F_end-2 &&
2060                 fontkern[i+1] != fontkern[i])
2061                 std::fprintf(rdest, " [%d items]", fontkern[i+1]-fontkern[i]);
2062             if (i != F_end-2 &&
2063                 fontkern[i+1] != fontkern[i])
2064                 std::fprintf(smldest, " [%d items] ", fontkern[i+1]-fontkern[i]);
2065             std::fprintf(smldest, "*)");
2066         }
2067         std::fprintf(dest, "\n};\n\n");
2068         std::fprintf(rdest, "\n    )))\n\n");
2069         std::fprintf(smldest, "\n    ];\n\n");
2070         std::fprintf(dest, "const uint32_t kerntable[] = \n{");
2071         std::fprintf(rdest,
2072                      "#eval (setq kerntable!* (list_to_vec32 '\n    (");
2073         std::fprintf(smldest, "val kerntable = Vector.fromList [");
2074         for (i=0; i<kernp; i++)
2075         {   std::fprintf(dest, "\n    0x%.8" PRIx32, kerntable[i]);
2076             if (i != kernp-1) std::fprintf(dest, ",");
2077             else std::fprintf(dest, " ");
2078             std::fprintf(rdest, "\n    0x%.8" PRIx32 " ", kerntable[i]);
2079             std::fprintf(smldest, "\n    0x%.8" PRIx32, kerntable[i]);
2080             if (i != kernp-1) std::fprintf(smldest, ",");
2081             else std::fprintf(smldest, " ");
2082             if ((kerntable[i] & IS_LIGATURE) != 0)
2083                 std::fprintf(dest, "   // [%d:%d] %s + %s ligature #%d (%s)",
2084                              i, i-fontkern[ktfontn[i]],
2085                              ktstart[i], ktfollow[i],
2086                              kerntable[i]>>23, ktfont[i]);
2087             else
2088                 std::fprintf(dest, "   // [%d:%d] %s + %s : %d (%s)",
2089                              i, i-fontkern[ktfontn[i]],
2090                              ktstart[i], ktfollow[i],
2091                              ktadjustment[i], ktfont[i]);
2092             if ((kerntable[i] & IS_BLOCKEND) != 0) std::fprintf(dest, " ;;");
2093             if ((kerntable[i] & IS_LIGATURE) != 0)
2094                 std::fprintf(rdest, "   %% [%d:%d] %s + %s ligature #%d (%s)",
2095                              i, i-fontkern[ktfontn[i]],
2096                              ktstart[i], ktfollow[i],
2097                              kerntable[i]>>23, ktfont[i]);
2098             else
2099                 std::fprintf(rdest, "   %% [%d:%d] %s + %s : %d (%s)",
2100                              i, i-fontkern[ktfontn[i]],
2101                              ktstart[i], ktfollow[i],
2102                              ktadjustment[i], ktfont[i]);
2103             if ((kerntable[i] & IS_BLOCKEND) != 0) std::fprintf(rdest, " ;;");
2104             if ((kerntable[i] & IS_LIGATURE) != 0)
2105                 std::fprintf(smldest, "   (* [%d:%d] %s + %s ligature #%d (%s) *)",
2106                              i, i-fontkern[ktfontn[i]],
2107                              ktstart[i], ktfollow[i],
2108                              kerntable[i]>>23, ktfont[i]);
2109             else
2110                 std::fprintf(smldest, "   (* [%d:%d] %s + %s : %d (%s) *)",
2111                              i, i-fontkern[ktfontn[i]],
2112                              ktstart[i], ktfollow[i],
2113                              ktadjustment[i], ktfont[i]);
2114             if ((kerntable[i] & IS_BLOCKEND) != 0) std::fprintf(smldest,
2115                         " (*;;*)");
2116         }
2117         std::fprintf(dest, "\n};\n\n");
2118         std::fprintf(rdest, "\n    )))\n\n");
2119         std::fprintf(smldest, "\n    ];\n\n");
2120         std::fprintf(dest, "const uint32_t ligaturetable[] = \n{");
2121         std::fprintf(rdest,
2122                      "#eval (setq ligaturetable!* (list_to_vec32 '\n    (");
2123         std::fprintf(smldest, "val ligaturetable = Vector.fromList [");
2124         for (i=0; i<ligp; i++)
2125         {   int l = std::fprintf(dest, "\n    %" PRId32, ligtable[i]);
2126             if (i != ligp-1) std::fprintf(dest, ",");
2127             else std::fprintf(dest, " ");
2128             while (++l < 12) std::fprintf(dest, " ");
2129             l = std::fprintf(rdest, "\n    %" PRId32 " ", ligtable[i]);
2130             while (++l < 12) std::fprintf(rdest, " ");
2131             l = std::fprintf(smldest, "\n    %" PRId32, ligtable[i]);
2132             if (i != ligp-1) std::fprintf(smldest, ",");
2133             else std::fprintf(smldest, " ");
2134             while (++l < 12) std::fprintf(smldest, " ");
2135             std::fprintf(dest, "   // [%d] %s + %s => %s (%s)",
2136                          i, ltfirst[i], ltfollow[i], ltname[i], ltfont[i]);
2137             std::fprintf(rdest, "   %% [%d] %s + %s => %s (%s)",
2138                          i, ltfirst[i], ltfollow[i], ltname[i], ltfont[i]);
2139             std::fprintf(smldest, "   (* [%d] %s + %s => %s (%s) *)",
2140                          i, ltfirst[i], ltfollow[i], ltname[i], ltfont[i]);
2141         }
2142         std::fprintf(dest, "\n};\n\n");
2143         std::fprintf(rdest, "\n    )))\n\n");
2144         std::fprintf(dest, "// end of charmetrics.h\n");
2145         std::fprintf(smldest, "\n   ];\n\n");
2146         std::fprintf(rdest,
2147                      "%% The use of #eval means that the metrics above have been defined at\n");
2148         std::fprintf(rdest,
2149                      "%% parse time. I now need to ensure that they will be available even\n");
2150         std::fprintf(rdest,
2151                      "%% when this code is passed through the compiler and hence everything\n");
2152         std::fprintf(rdest,
2153                      "%% goes via a FASL file. The slighly curious macro here should achieve\n");
2154         std::fprintf(rdest, "%% that.\n");
2155         std::fprintf(rdest, "\n");
2156         std::fprintf(rdest,
2157                      "symbolic macro procedure get_character_metrics !*unused!*;\n");
2158         std::fprintf(rdest, "  list('progn,\n");
2159         std::fprintf(rdest,
2160                      "    list('setq, 'metrics_hash!*, mkquote metrics_hash!*),\n");
2161         std::fprintf(rdest,
2162                      "    list('setq, 'fontkern!*, mkquote fontkern!*),\n");
2163         std::fprintf(rdest,
2164                      "    list('setq, 'kerntable!*, mkquote kerntable!*),\n");
2165         std::fprintf(rdest,
2166                      "    list('setq, 'ligaturetable!*, mkquote ligaturetable!*),\n");
2167         std::fprintf(rdest, "    \"character metrics established\");\n");
2168         std::fprintf(rdest, "\n");
2169         std::fprintf(rdest,
2170                      "%% The call to the macro here expands into four simple assignments.\n");
2171         std::fprintf(rdest, "symbolic get_character_metrics();\n");
2172         std::fprintf(rdest, "\n");
2173         std::fprintf(rdest,
2174                      "fluid '(c_width c_llx c_lly c_urx c_ury c_kerninfo);\n");
2175         std::fprintf(rdest, "\n");
2176         std::fprintf(rdest,
2177                      "%% This code looks up a font/codepoint pair in the tables and returns\n");
2178         std::fprintf(rdest,
2179                      "%% a character width (escapement) and a bounding box. It leaves behind\n");
2180         std::fprintf(rdest,
2181                      "%% c_kerninfo - and index into a kern and ligature table.\n");
2182         std::fprintf(rdest, "\n");
2183         std::fprintf(rdest,
2184                      "symbolic procedure lookupchar(fontnum, codepoint);\n");
2185         std::fprintf(rdest, "  begin\n");
2186         std::fprintf(rdest,
2187                      "    scalar v, h1, h2, w, whi, wlo, fullkey, key;\n");
2188         std::fprintf(rdest, "%% pack codes into fewer bits\n");
2189         std::fprintf(rdest, "    if fontnum < 2 then <<\n");
2190         std::fprintf(rdest,
2191                      "      if land(codepoint, 0xd800) = 0xd800 then codepoint := 0xffff\n");
2192         std::fprintf(rdest, "      else if codepoint >= 0x10000 then <<\n");
2193         std::fprintf(rdest,
2194                      "        if codepoint < 0x10800 then codepoint := 0xd800 + land(codepoint, 0xfff)\n");
2195         std::fprintf(rdest, "        else codepoint := 0xffff >> >>\n");
2196         std::fprintf(rdest,
2197                      "    else if codepoint >= 0x4000 and codepoint < 0x8000 then codepoint := 0xffff\n");
2198         std::fprintf(rdest,
2199                      "    else if codepoint >= 0x1d000 and codepoint < 0x1e000 then\n");
2200         std::fprintf(rdest,
2201                      "      codepoint = 0x4000 + land(codepoint, 0xfff)\n");
2202         std::fprintf(rdest,
2203                      "    else if codepoint >= 0x108000 and codepoint < 0x109000 then\n");
2204         std::fprintf(rdest,
2205                      "      codepoint = 0x5000 + land(codepoint, 0xfff)\n");
2206         std::fprintf(rdest,
2207                      "    else if codepoint >= 0x10000 then codepoint := 0xffff;\n");
2208         std::fprintf(rdest,
2209                      "    fullkey := lshift(fontnum, 16) + codepoint\n");
2210         std::fprintf(rdest, "    key := lshift(fullkey, -2);\n");
2211 // I REALLY want the key to be positive here!
2212         std::fprintf(rdest, "    h1 := remainder(key, %d);\n",
2213                      main_r.table_size);
2214         std::fprintf(rdest, "    %% Hash table probe 1.\n");
2215         std::fprintf(rdest,
2216                      "    v := land(getv32(w := getv(metrics_hash!*, h1), 0), 0x7ffff);\n");
2217         std::fprintf(rdest, "    if not (v = key) then <<\n");
2218         std::fprintf(rdest, "      h2 := remainder(key, %d) + %d;\n",
2219                      main_r.modulus2, main_r.offset2);
2220         std::fprintf(rdest, "      %% Hash table probe 2.\n");
2221         std::fprintf(rdest,
2222                      "      v := land(getv32(w := getv(metrics_hash!*, h2), 0), 0x7ffff);\n");
2223         std::fprintf(rdest, "      if not (v = key) then <<\n");
2224         std::fprintf(rdest, "        h1 := h1 + h2;\n");
2225         std::fprintf(rdest, "        if h1 >= %d then h1 := h1 - %d;\n",
2226                      main_r.table_size, main_r.table_size);
2227         std::fprintf(rdest, "        %% Hash table probe 3.\n");
2228         std::fprintf(rdest,
2229                      "        v := land(getv32(w := getv(metrics_hash!*, h1), 0), 0x7ffff);\n");
2230         std::fprintf(rdest,
2231                      "        if not (v = key) then return nil >> >>;\n");
2232         std::fprintf(rdest, "    v := 2*land(fullkey, 3);\n");
2233         std::fprintf(rdest, "    wlo := getv32(w, v+2);\n");
2234         std::fprintf(rdest,
2235                      "    if wlo = 0 then return nil; %% in hash table but no character here.\n");
2236         std::fprintf(rdest, "    whi := getv32(w, v+3);\n");
2237         std::fprintf(rdest,
2238                      "    c_width := land(lshift(whi, -19), 0x1fff);\n");
2239         std::fprintf(rdest,
2240                      "    c_llx := land(lshift(whi, -6), 0x1fff) - 3000;\n");
2241         std::fprintf(rdest, "    c_lly := land(lshift(wlo, -26), 0x3f) +\n");
2242         std::fprintf(rdest,
2243                      "             land(lshift(whi, 6), 0xfc0) - 1000;\n");
2244         std::fprintf(rdest,
2245                      "    c_urx := land(lshift(wlo, -13), 0x1fff) - 500;\n");
2246         std::fprintf(rdest, "    c_ury := land(wlo, 0x1fff) - 1000;\n");
2247         std::fprintf(rdest,
2248                      "    if v = 0 then c_kerninfo := land(lshift(getv32(w, 0), -19), 0x7ff)\n");
2249         std::fprintf(rdest,
2250                      "    else if v = 2 then c_kerninfo := land(lshift(getv32(w, 0), -30), 0x3) +\n");
2251         std::fprintf(rdest,
2252                      "                                     land(lshift(getv32(w, 1), 2), 0x7fc)\n");
2253         std::fprintf(rdest,
2254                      "    else if v = 4 then c_kerninfo := land(lshift(getv32(w, 1), -9), 0x7ff)\n");
2255         std::fprintf(rdest,
2256                      "    else c_kerninfo := land(lshift(getv32(w, 1), -20), 0x7ff);\n");
2257         std::fprintf(rdest, "    if not zerop c_kerninfo then\n");
2258         std::fprintf(rdest,
2259                      "      c_kerninfo := c_kerninfo + getv16(fontkern!*, fontnum);\n");
2260         std::fprintf(rdest, "    return t\n");
2261         std::fprintf(rdest, "  end;\n");
2262         std::fprintf(rdest, "\n");
2263         std::fprintf(rdest,
2264                      "symbolic procedure lookupkernadjustment codepoint;\n");
2265         std::fprintf(rdest, "  begin\n");
2266         std::fprintf(rdest, "    scalar i, w;\n");
2267         std::fprintf(rdest,
2268                      "    if zerop (i := c_kerninfo) then return 0;\n");
2269         std::fprintf(rdest, " a: w := getv32(kerntable!*, i);\n");
2270         std::fprintf(rdest, "    if land(w, 0x001fffff) = codepoint and\n");
2271         std::fprintf(rdest, "      zerop land(w, 0x00200000) then <<\n");
2272         std::fprintf(rdest, "        w := land(lshift(w, -23), 0x1ff);\n");
2273         std::fprintf(rdest,
2274                      "        if not zerop land(w, 0x100) then w := w - 0x200;\n");
2275         std::fprintf(rdest, "        return w >>\n");
2276         std::fprintf(rdest,
2277                      "    else if not zerop land(w, 0x00400000) then return 0;\n");
2278         std::fprintf(rdest, "    i := add1 i;\n");
2279         std::fprintf(rdest, "    go to a\n");
2280         std::fprintf(rdest, "  end;\n");
2281         std::fprintf(rdest, "\n");
2282         std::fprintf(rdest, "symbolic procedure lookupligature codepoint;\n");
2283         std::fprintf(rdest, "  begin\n");
2284         std::fprintf(rdest, "    scalar i, w;\n");
2285         std::fprintf(rdest,
2286                      "    if zerop (i := c_kerninfo) then return nil;\n");
2287         std::fprintf(rdest, " a: w := getv32(kerntable!*, i);\n");
2288         std::fprintf(rdest, "    if land(w, 0x001fffff) = codepoint and\n");
2289         std::fprintf(rdest, "      not zerop land(w, 0x00200000) then\n");
2290         std::fprintf(rdest,
2291                      "        return getv32(ligaturetable!*, land(lshift(w, -23), 0x1ff))\n");
2292         std::fprintf(rdest,
2293                      "    else if not zerop land(w, 0x00400000) then return nil;\n");
2294         std::fprintf(rdest, "    i := add1 i;\n");
2295         std::fprintf(rdest, "    go to a\n");
2296         std::fprintf(rdest, "  end;\n");
2297         std::fprintf(rdest, "\n");
2298         std::fprintf(rdest, "symbolic procedure accentposition key;\n");
2299         std::fprintf(rdest, "  begin\n");
2300         std::fprintf(rdest, "    scalar h1, h2, v, w;\n");
2301         std::fprintf(rdest, "    h1 := remainder(key, %d);\n",
2302                      topcentre_r.table_size);
2303         std::fprintf(rdest, "    %% Hash table probe 1.\n");
2304         std::fprintf(rdest,
2305                      "    v := land(w := getv32(topcentre_hash!*, h1), 0x1fffff);\n");
2306         std::fprintf(rdest, "    if not (v = key) then <<\n");
2307         std::fprintf(rdest, "      h2 := remainder(key, %d) + %d;\n",
2308                      topcentre_r.modulus2, topcentre_r.offset2);
2309         std::fprintf(rdest, "      %% Hash table probe 2.\n");
2310         std::fprintf(rdest,
2311                      "      v := land(w := getv32(topcentre_hash!*, h2), 0x1fffff);\n");
2312         std::fprintf(rdest, "      if not (v = key) then return 0 >>;\n");
2313         std::fprintf(rdest, "    return lshift(w, -21)\n");
2314         std::fprintf(rdest, "  end;\n");
2315         std::fprintf(rdest, "\n");
2316         std::fprintf(rdest, "end;\n\n");
2317         std::fprintf(rdest,
2318                      "%% Note that variants must be passed a codepoint and direction flag\n");
2319         std::fprintf(rdest, "symbolic procedure variants key;\n");
2320         std::fprintf(rdest, "  begin\n");
2321         std::fprintf(rdest, "    scalar h1, h2, h3, v, w;\n");
2322         std::fprintf(rdest, "    h1 := remainder(key, %d);\n",
2323                      variant_r.table_size);
2324         std::fprintf(rdest, "    %% Hash table probe 1.\n");
2325         std::fprintf(rdest,
2326                      "    v := getv32(w := getv(variant_hash!*, h1), 0);\n");
2327         std::fprintf(rdest, "    if not (v = key) then <<\n");
2328         std::fprintf(rdest, "      h2 := remainder(key, %d) + %d;\n",
2329                      variant_r.modulus2, variant_r.offset2);
2330         std::fprintf(rdest, "      %% Hash table probe 2.\n");
2331         std::fprintf(rdest,
2332                      "      v := getv32(w := getv(variant_hash!*, h2), 0);\n");
2333         std::fprintf(rdest, "      if not (v = key) then <<\n");
2334         std::fprintf(rdest, "         h3 := remainder(h1 + h2, %d);\n",
2335                      variant_r.table_size);
2336         std::fprintf(rdest, "         %% Hash table probe 3.\n");
2337         std::fprintf(rdest,
2338                      "         v := getv32(w := getv(variant_hash!*, h3), 0);\n");
2339         std::fprintf(rdest,
2340                      "         if not (v = key) then return nil >> >>;\n");
2341         std::fprintf(rdest, "    return w\n");
2342         std::fprintf(rdest, "  end;\n");
2343         std::fprintf(rdest, "\n");
2344         std::fprintf(rdest, "symbolic procedure extension key;\n");
2345         std::fprintf(rdest, "  begin\n");
2346         std::fprintf(rdest, "    scalar h1, h2, h3, v, w;\n");
2347         std::fprintf(rdest, "    h1 := remainder(key, %d);\n",
2348                      extension_r.table_size);
2349         std::fprintf(rdest, "    %% Hash table probe 1.\n");
2350         std::fprintf(rdest,
2351                      "    v := getv32(w := getv(extension_hash!*, h1), 0);\n");
2352         std::fprintf(rdest, "    if not (v = key) then <<\n");
2353         std::fprintf(rdest, "      h2 := remainder(key, %d) + %d;\n",
2354                      extension_r.modulus2, extension_r.offset2);
2355         std::fprintf(rdest, "      %% Hash table probe 2.\n");
2356         std::fprintf(rdest,
2357                      "      v := getv32(w := getv(extension_hash!*, h2), 0);\n");
2358         std::fprintf(rdest, "      if not (v = key) then <<\n");
2359         std::fprintf(rdest, "         h3 := remainder(h1 + h2, %d);\n",
2360                      extension_r.table_size);
2361         std::fprintf(rdest, "         %% Hash table probe 3.\n");
2362         std::fprintf(rdest,
2363                      "         v := getv32(w := getv(extension_hash!*, h3), 0);\n");
2364         std::fprintf(rdest,
2365                      "         if not (v = key) then return nil >> >>;\n");
2366         std::fprintf(rdest, "    return w\n");
2367         std::fprintf(rdest, "  end;\n");
2368         std::fprintf(rdest, "\n");
2369         std::fprintf(rdest, "end;\n\n");
2370         std::fprintf(rdest, "%% end of charmetrics.red\n");
2371         std::fprintf(smldest, "(* end of charmetrics.sml *)\n");
2372         std::fclose(dest);
2373         std::fclose(rdest);
2374     }
2375 #endif // DUMMY
2376 }
2377 
2378 #else // CREATE
2379 //
2380 // I am putting the code that decodes and accesses metric information
2381 // in the same file as the program that reads raw ".afm" files and
2382 // packs the information for me. I hope that this keeps coding and decoding
2383 // together so they will remian in step.
2384 //
2385 
2386 // lookupchar takes a font and a codepoint and returns true if there is
2387 // information about the character and false if not. If it returns true
2388 // it will have left information about the character in a number of
2389 // variables that can then be inspected.
2390 
2391 #define CHAR_METRICS_TABLE_SIZE (sizeof(charmetrics)/sizeof(charmetrics[0]))
2392 
2393 int c_width, c_llx, c_lly, c_urx, c_ury, c_kerninfo;
2394 
2395 int lookupchar(int fontnum, int codepoint)
2396 {   int fullkey = pack_character(fontnum, codepoint); // 21-bit key
2397     int key = fullkey >> 2; // because my hash table has line-size 4
2398     int v, h1, h2;
2399     uint64_t w;
2400     h1 = key % CHAR_METRICS_TABLE_SIZE;
2401     v = static_cast<int>(charmetrics[h1][0]) & 0x7ffff;
2402     if (v != key)
2403     {   h2 = (key % CHAR_METRICS_MODULUS) + CHAR_METRICS_OFFSET;
2404         v = static_cast<int>(charmetrics[h2][0]) & 0x7ffff;
2405         if (v != key)
2406         {   h1 += h2;
2407             if (h1 >= CHAR_METRICS_TABLE_SIZE) h1 -= CHAR_METRICS_TABLE_SIZE;
2408             v = static_cast<int>(charmetrics[h1][0]) & 0x7ffff;
2409             if (v != key) return 0;
2410         }
2411     }
2412     w = charmetrics[h1][1+(fullkey&3)];
2413 // Even though the hash line exists this entry in it may be unused.
2414     if (w == 0) return 0;
2415     c_width = static_cast<int>(w >> 51) & 0x1fff;
2416     c_llx = (static_cast<int>(w >> 38) & 0x1fff) - 3000;
2417     c_lly = (static_cast<int>(w >> 26) & 0x0fff) - 1000;
2418     c_urx = (static_cast<int>(w >> 13) & 0x1fff) - 500;
2419     c_ury = (static_cast<int>(w) & 0x1fff) - 1000;
2420 // Based on the font and 11 bits of information from the hash table I will
2421 // set up a pointer into kerntable. The interpretation of that will be
2422 // considered in the function lookupkernandligature. If the 11 bit field
2423 // contains zero then there is neither kern nor ligature information
2424 // associated with this character.
2425     v = static_cast<int>(charmetrics[h1][0] >> (19+11*
2426                          (fullkey&3))) & 0x7ff;
2427     if (v != 0) v += fontkern[fontnum];
2428 // c_kerninfo will be left zero if there is no information, otherwise an
2429 // index into a table.
2430     c_kerninfo = v;
2431     return 1;
2432 }
2433 
2434 // Having looked up a single character I may now need to check for
2435 // ligature or kerning information. I will do that by passing the
2436 // codepoint of the successor character (which must be in the same
2437 // font). Thus the complete sequence will be
2438 //      lookupchar(font_number, codepoint_for_first_character);
2439 //      lookupkernandligature(codepoint_for_second_character);
2440 // This returns an int32_t where the bottom 21 bits are a codepoint for
2441 // a character that can replace the two that were specified. For instance
2442 // this can return a single ligature "fi" is the two input codepoints are
2443 // for "f" and "i", or it could return first "ff" for two "f" characters
2444 // that are adjacent, and then "ffi" for the resulting "ff" if followed
2445 // by "i". Those bits will be returned as zero if no ligature is available
2446 // (and that will almost always be the case).
2447 // The top 9 bits of the 32-bit word will be a signed value for a
2448 // kerning adjustment to the spacing between a pair of characters, relative
2449 // to 1000 as the notional height of the character cell. This is again
2450 // 0 if no adjustment is called for. As one of the more extreme examples,
2451 // in one font if an "A" is followed by a "V" the space between them can be
2452 // decreased by 109 units, while when a "T" is followed by a "W" an extra
2453 // 41 units (and perhaps less obviously an "L" followd by "-" (hyphen)
2454 // calls for even more extra space than that.
2455 //
2456 // I provide variants that collect just kern or just ligature information.
2457 
2458 int32_t lookupkernandligature(int codepoint)
2459 {   int32_t r = 0;
2460     uint32_t w;
2461     int i;
2462     if ((i = c_kerninfo) == 0) return
2463             0;  // No info based on current start.
2464 // The worst cases I can see in my fonts is the kern information for "W"
2465 // in STIX-Regular where around 50 characters following "W" get their spacing
2466 // adjusted. That gives some impression of the most extreme number of
2467 // times this loop will be traversed. For many characters there will be no
2468 // kern information at all, and when there is any it will usually be
2469 // pretty limited so average costs here should end up low.
2470     do
2471     {   w = kerntable[i++];
2472 // The kern table contains a sequence of 32-bit words. The low 21 bits of
2473 // each is a codepoint being the second character of a pair. The next two
2474 // bits are flags. One indicates whether the word is documenting kern or
2475 // ligature information. The other marks the final word of information
2476 // relating to a lead character. That leaves 9 bits. For kern information
2477 // that is a 9-bit signed spacing adjustment. For ligatures it is a
2478 // 9 bit unsigned index into a table of codepoints giving the single
2479 // character to be used to replace the initial pair. Note that one pair
2480 // of characters can (and often well!) have both kern and ligature information
2481 // which is why the return value here can hand back both and why searching
2482 // continues through all information about the relevant pair. It would be a
2483 // MESS if the tables included two entries for the same character pair but
2484 // yielding different results! I ought to police that while creating the
2485 // tables.
2486 // Because w is an unsigned value I do not need to mask the result of the
2487 // right shift.
2488         if ((w & 0x001fffff) == codepoint)
2489         {   if ((w & IS_LIGATURE) != 0) r |= ligaturetable[w >> 23];
2490             else r |= (w & 0xff800000);
2491         }
2492     }
2493     while ((w & IS_BLOCKEND) == 0);
2494     return r;
2495 }
2496 
2497 // Much the same as the above but ONLY looks for kern information and returns
2498 // its result as a simple integer. In case kern information is found this
2499 // is just slightly faster than using the more general method.
2500 
2501 int32_t lookupkernadjustment(int codepoint)
2502 {   int32_t w;
2503     int i;
2504     if ((i = c_kerninfo) == 0) return
2505             0;  // No info based on current start.
2506     do
2507     {   w = kerntable[i++];
2508 // The shift right in the return statement relies on w being a signed
2509 // value and on signed shifts being arithmetic.
2510         if ((w & 0x001fffff) == codepoint &&
2511             (w & IS_LIGATURE) == 0) return (w >> 23);
2512     }
2513     while ((w & IS_BLOCKEND) == 0);
2514     return 0;
2515 }
2516 
2517 // Much the same as the above but ONLY looks for ligature information.
2518 
2519 int32_t lookupligature(int codepoint)
2520 {   uint32_t w;
2521     int i;
2522     if ((i = c_kerninfo) == 0) return
2523             0;  // No info based on current start.
2524     do
2525     {   w = kerntable[i++];
2526 // I made w unsigned so that the shift right returned an unsigned index.
2527         if ((w & 0x001fffff) == codepoint &&
2528             (w & IS_LIGATURE) != 0) return ligaturetable[w >> 23];
2529     }
2530     while ((w & IS_BLOCKEND) == 0);
2531     return 0;
2532 }
2533 
2534 #define TOPCENTRE_TABLE_SIZE (sizeof(topcentre)/sizeof(topcentre[0]))
2535 
2536 // accentposition only applies to characters in STIXMath. It gives a horizontal
2537 // offset to be used when positioning an accent above a character. I *believe*
2538 // the intent is to use both the position information from the base character
2539 // and the accent and line them up... The code here returns 0 if no special
2540 // information is available.
2541 
2542 int accentposition(int code)
2543 {   int hash1 = code % TOPCENTRE_TABLE_SIZE, hash2;
2544     int32_t r;
2545     if (((r = topcentre[hash1]) & 0x001fffff) == code) return ((
2546                     int32_t)r)>>21;
2547     hash2 = (code % TOPCENTRE_MODULUS) + TOPCENTRE_OFFSET;
2548     if (((r = topcentre[hash2]) & 0x001fffff) == code) return ((
2549                     int32_t)r)>>21;
2550     else return 0;
2551 }
2552 
2553 #define VARIANT_TABLE_SIZE (sizeof(variant_table)/sizeof(variant_table[0]))
2554 
2555 // Some characters have variants that represent gradually larger versions
2556 // of the same thing. A good example can be seen in the variations on
2557 // parenthesis, brackets and braces. There can also be horizontal size
2558 // varients such as wide overbars and circumflex accents that may be used to
2559 // go above wide items of various sorts. The code passed to character_variants
2560 // is the code point of the base character plus 0x00200000 if a horizontal
2561 // expansion is needed. The result is nullptr if nothing is available, or a
2562 // pointer to a block of 6 words otherwise. If this pointer is r, then r[0]
2563 // is the base character passed, but then r[1] tp r[5] are gradually larger
2564 // versions, or U+0000 when no further large versions are available.
2565 
2566 const uint32_t *character_variants(int code)
2567 {   int hash1 = code % VARIANT_TABLE_SIZE, hash2, hash3;
2568     int32_t r;
2569     if (variant_table[hash1][0] == code) return &variant_table[hash1][0];
2570     hash2 = (code % VARIANT_MODULUS) + VARIANT_OFFSET;
2571     if (variant_table[hash2][0] == code) return &variant_table[hash2][0];
2572     hash3 = (hash1 + hash2) % VARIANT_TABLE_SIZE;
2573     if (variant_table[hash3][0] == code) return &variant_table[hash3][0];
2574     return nullptr;
2575 }
2576 
2577 #define EXTENSION_TABLE_SIZE (sizeof(extension_table)/sizeof(extension_table[0]))
2578 
2579 // A characters such as "{" is associated with (up to) 5 other characters that
2580 // can be placed together to ranfer a huge version of it. This retrieves
2581 // a table showing how to do that.
2582 
2583 const uint32_t *character_extension(int code)
2584 {   int hash1 = code % EXTENSION_TABLE_SIZE, hash2, hash3;
2585     int32_t r;
2586     if (extension_table[hash1][0] == code) return
2587             &extension_table[hash1][0];
2588     hash2 = (code % EXTENSION_MODULUS) + EXTENSION_OFFSET;
2589     if (extension_table[hash2][0] == code) return
2590             &extension_table[hash2][0];
2591     hash3 = (hash1 + hash2) % EXTENSION_TABLE_SIZE;
2592     if (extension_table[hash3][0] == code) return
2593             &extension_table[hash3][0];
2594     return nullptr;
2595 }
2596 
2597 #ifdef TEST
2598 // If TEST is defined then this code will try some very minimal tests.
2599 // Expected output is
2600 //
2601 //    Hash table size was 10057
2602 // Second modulus, offset 8729 (1108)
2603 //    "e": width 444   BB 25 -10 424 460  (630)
2604 //    "f": width 333   BB 20 0 383 683  (636)
2605 //    "g": width 500   BB 28 -218 470 460  (663)
2606 //    "h": width 500   BB 9 0 487 683  (0)
2607 //    "i": width 278   BB 16 0 253 683  (0)
2608 //    "j": width 278   BB -70 -218 194 683  (0)
2609 //    "k": width 500   BB 7 0 505 683  (0)
2610 //    "l": width 278   BB 19 0 257 683  (669)
2611 //    "m": width 778   BB 16 0 775 460  (701)
2612 //    Kern/ligature data for sequence f-i is 14 64257
2613 //    Kern/ligature data for sequence f-l is 44 64258
2614 //    Top accent shift A=361 combining circumflex=-230
2615 //    Paren sizes = U+000028, U+000028, U+1081e2, U+10824e, U+108287, U+1082bf
2616 //    For { bottom U+10821e 0 600 1005 0
2617 //    lower extender U+10821f 600 500 1010 1
2618 //    middle piece U+10821d 200 200 1010 0
2619 //    upper extender U+10821f 500 600 1010 1
2620 //    top piece U+10821c 600 0 1005 0
2621 //    For | bottom U+00007c 0 600 1380 0
2622 //    extender U+00007c 600 0 1380 1
2623 //    unused U+000000 0 0 0 0
2624 //    unused U+000000 0 0 0 0
2625 //    unused U+000000 0 0 0 0
2626 //
2627 //
2628 // The Kerm/ligature lines say that if in font STIX-Regular an "f" is
2629 // followed by an "i" then either the two may have their spacing adjusted
2630 // by 14 units or the pair may be replaced by the character at codepoint
2631 // 64257 (which is "fi")... and similarly for "f" followed by "l". The output
2632 // higher up tells us that in this font there are no kerning involving
2633 // a "j" followed by something else, while the (xxx) values are offsets
2634 // into a table of kerning information. "BB" is for "Bounding Box" and the
2635 // four numbers are for lower-left-x, lower-left-y, upper-right-x and
2636 // upper-right-y in that order.
2637 // The lower block is from the maths tables showing codepoints for a range
2638 // of sizes of left parentheses, and for ways to build up huge "{" and "|"
2639 // symbols.
2640 
2641 int main(int argc, char *argv[])
2642 {   int i, r;
2643     const uint32_t *p;
2644     std::printf("====== Test program starting ======\n");
2645     std::printf("Hash table size was %d\n",
2646                 static_cast<int>(CHAR_METRICS_TABLE_SIZE));
2647     std::printf("Second modulus, offset %d (%d)\n",
2648                 static_cast<int>(CHAR_METRICS_MODULUS),
2649                 static_cast<int>(CHAR_METRICS_OFFSET));
2650     for (i='e'; i<'n'; i++)
2651     {   r = lookupchar(F_Regular, i);
2652         if (r) std::printf("\"%c\": width %d   BB %d %d %d %d  (%d)\n",
2653                                i, c_width, c_llx, c_lly, c_urx, c_ury, c_kerninfo);
2654         else std::printf("\"%c\" char not found\n", i);
2655         std::fflush(stdout);
2656     }
2657     if (!lookupchar(F_Regular,
2658                     'f')) std::printf("Character \"f\" not found\n");
2659     else
2660     {   int32_t k = lookupkernandligature('i');
2661         std::printf("Kern/ligature data for sequence f-i is %d %d\n",
2662                     static_cast<int>(k >> 23), static_cast<int>(k & 0x001fffff));
2663         std::fflush(stdout);
2664         k = lookupkernandligature('l');
2665         std::printf("Kern/ligature data for sequence f-l is %d %d\n",
2666                     static_cast<int>(k >> 23), static_cast<int>(k & 0x001fffff));
2667         std::fflush(stdout);
2668     }
2669     std::printf("Top accent shift A=%d combining circumflex=%d\n",
2670                 accentposition('A'), accentposition(770));
2671     p = character_variants('(');
2672     if (p == nullptr) std::printf("Failed to find paren sizes\n");
2673     else std::printf("Paren sizes = U+%.6x, U+%.6x, U+%.6x, U+%.6x, U+%.6x, U+%.6x\n",
2674                          p[0], p[1], p[2], p[3], p[4], p[5]);
2675     p = character_extension('{');
2676     if (p == nullptr)
2677         std::printf("Failed to find left brace extension data\n");
2678     else
2679     {   std::printf("For { bottom U+%.6x %d %d %d %d\n",
2680                     p[1] & 0x001fffff, p[1]>>21,
2681                     p[2] & 0x0000ffff, (p[2]>>16) & 0x00007fff,
2682                     p[2]>>31);
2683         std::printf("lower extender U+%.6x %d %d %d %d\n",
2684                     p[3] & 0x001fffff, p[3]>>21,
2685                     p[4] & 0x0000ffff, (p[4]>>16) & 0x00007fff,
2686                     p[4]>>31);
2687         std::printf("middle piece U+%.6x %d %d %d %d\n",
2688                     p[5] & 0x001fffff, p[5]>>21,
2689                     p[6] & 0x0000ffff, (p[6]>>16) & 0x00007fff,
2690                     p[6]>>31);
2691         std::printf("upper extender U+%.6x %d %d %d %d\n",
2692                     p[7] & 0x001fffff, p[7]>>21,
2693                     p[8] & 0x0000ffff, (p[8]>>16) & 0x00007fff,
2694                     p[8]>>31);
2695         std::printf("top piece U+%.6x %d %d %d %d\n",
2696                     p[9] & 0x001fffff, p[9]>>21,
2697                     p[10] & 0x0000ffff, (p[10]>>16) & 0x00007fff,
2698                     p[10]>>31);
2699     }
2700     p = character_extension('|');
2701     if (p == nullptr)
2702         std::printf("Failed to find vertical bar extension data\n");
2703     else
2704     {   std::printf("For | bottom U+%.6x %d %d %d %d\n",
2705                     p[1] & 0x001fffff, p[1]>>21,
2706                     p[2] & 0x00007fff, (p[2]>>15) & 0x0000ffff,
2707                     p[2]>>31);
2708         std::printf("extender U+%.6x %d %d %d %d\n",
2709                     p[3] & 0x001fffff, p[3]>>21,
2710                     p[4] & 0x00007fff, (p[4]>>15) & 0x0000ffff,
2711                     p[4]>>31);
2712         std::printf("unused U+%.6x %d %d %d %d\n",
2713                     p[5] & 0x001fffff, p[5]>>21,
2714                     p[6] & 0x00007fff, (p[6]>>15) & 0x0000ffff,
2715                     p[6]>>31);
2716         std::printf("unused U+%.6x %d %d %d %d\n",
2717                     p[7] & 0x001fffff, p[7]>>21,
2718                     p[8] & 0x00007fff, (p[8]>>15) & 0x0000ffff,
2719                     p[8]>>31);
2720         std::printf("unused U+%.6x %d %d %d %d\n",
2721                     p[9] & 0x001fffff, p[9]>>21,
2722                     p[10] & 0x00007fff, (p[10]>>15) & 0x0000ffff,
2723                     p[10]>>31);
2724     }
2725     return 0;
2726 }
2727 
2728 #endif // TEST
2729 #endif // CREATE
2730 
2731 // end of charmetrics.cpp
2732