1 // charmetrics.cpp Copyright (C) 2021 Codemist 2 3 //=======================================================================// 4 // // 5 // WARNING. In "-DCREATE=1" mode this program takes about 40 minutes // 6 // of CPU time on a computer that in 2016 counts as fairly fast. So // 7 // when you run it be prepared to be reasonably patient. The slow run // 8 // really only needed to be done once to extract font information from // 9 // a collection of ".afm" files and form it into a compact table, so it // 10 // should only need to be run if the fonts being used change. // 11 // // 12 //=======================================================================// 13 14 15 /************************************************************************** 16 * Copyright (C) 2021, Codemist. A C Norman * 17 * * 18 * Redistribution and use in source and binary forms, with or without * 19 * modification, are permitted provided that the following conditions are * 20 * met: * 21 * * 22 * * Redistributions of source code must retain the relevant * 23 * copyright notice, this list of conditions and the following * 24 * disclaimer. * 25 * * Redistributions in binary form must reproduce the above * 26 * copyright notice, this list of conditions and the following * 27 * disclaimer in the documentation and/or other materials provided * 28 * with the distribution. * 29 * * 30 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * 31 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * 32 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * 33 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * 34 * COPYRIGHT OWNERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * 35 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * 36 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS * 37 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * 38 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR * 39 * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF * 40 * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * 41 * DAMAGE. * 42 *************************************************************************/ 43 44 45 // $Id: charmetrics.cpp 5736 2021-03-16 10:41:22Z arthurcnorman $ 46 47 48 #ifdef CREATE 49 50 // The slowest part of the code here is finding a good hash regime 51 // to obtain good occupancy for the main character metrics table. 52 // Telling the code where to look can help speed it up. So if you 53 // predefined RESTRICTED_SEARCH at compile time this will avoid a lengthy 54 // search, but if you ever alter the fonts used or any other things 55 // that can inclfluence how the hash table might pack you will need to 56 // to run at least once without that and transcribe the new optimum 57 // information to here. 58 59 #ifdef RESTRICTED_SEARCH 60 // The values here are tolerably close to the expected best answer! 61 #define MAIN_LOW 10056 62 #define MAIN_HIGH 10080 63 #else 64 #define MAIN_LOW (mainkeycount-1) 65 #define MAIN_HIGH (sizeof(hashtable)/sizeof(hashtable[0])) 66 #endif 67 68 // Even if I am going to do a full search (ie regardless of the setting 69 // of RESTRICTED_SEARCH) I will check if a matching is available with the following 70 // parameters. If it is then I do not need to do any more searching at all, 71 // and the code here will run really fast! as with RESTRICTED_SEARCH and 72 // if you feed really cautious you should set these to ridiculous values 73 // (eg EXPECTED_TABLESIZE smaller than the amount of data in use so that 74 // things can not possibly fit) after altering font details. 75 76 #ifndef EXPECTED_TABLESIZE 77 #define EXPECTED_TABLESIZE 10057 78 #endif 79 #ifndef EXPECTED_MODULUS2 80 #define EXPECTED_MODULUS2 4955 81 #endif 82 #ifndef EXPECTED_OFFSET2 83 #define EXPECTED_OFFSET2 5000 84 #endif 85 86 // After having used a simple matching process to identify the smallest 87 // hash table that could be used the code will proceed to try all 88 // configurations of that size and find for each the matching that has 89 // lowest weight - in a sense that means it should create a hash table 90 // where as large a proportion of the keys as possible go in the location 91 // that means that access to them only takes one probe. 92 // This uses the "Hungarian" (sometimes known as Munkres) algorithm, and to 93 // avoid the substantial cost I can specify a target figure of merit (ie a 94 // target for the average number of probes that accessing keys will involve. 95 // If the assignment established by EXPECTED_TABLESIZE etc attains this 96 // limit the heavy search for an optimum will not be activated. 97 // 98 // A "merit" value is an floating point value showing the average number 99 // of probes a lookup might take, assuming (in a fairly arbitrary way) that 100 // there are 4 times as many lookups of "IMPORTANT" keys as "STANDARD" 101 // ones. If every key ended up in its first choice position (possible with 102 // a really lightly loaded table) it would end up as 1.0. The worst imaginable 103 // case would have every key in its third choice position and a merit 0f 3.0, 104 // but it is hard to see how to force every key to avoid choices 1 and 2, so 105 // this is an upper bound not to be attained. If keys ended up evenly 106 // distributed across the three places that they could go the overall merit 107 // would be 2.0. If everything was IMPORTANT so all keys took either 1 or 2 108 // probes and those two cases were equally likely then the overall value would 109 // be 1.5. 110 // 111 // If you set TARGET_MERIT to 4.0 (ie 4 probes per key) then any matching 112 // at all will beat that and so first solution tried will be accepted. 113 // If on the other hand you set it to 0.0 then no assignment can achieve 114 // that so all possible hash options will be tried to find the best 115 // merit possible. Running the expensive search once and putting the merit 116 // found my it in here will keep you safe and fast! 117 118 #ifndef TARGET_MERIT 119 // The merit score for my "EXPECTED" hash parameters is 1.472. 120 // None of the "important" keys need more than one probe, and for the 121 // "standard" cases there are 5904 cases using 1 probe, 2291 needing 2 122 // and only 1489 that use the full 3 probes. So for all the common keys 123 // access is in just 1 probe, and even if you were to suppose that all 124 // keys were equally probable the average would only be 1.526 probes 125 // per access. 126 // 127 // Finding the values for modulus2 and offset2 took a three of days of CPU 128 // time (albeit mostly not exploiting any parallelism) but now the good 129 // EXPECTED parameters are provided the hash arrangement can be reconstructed 130 // and verified fairly fast. 131 // 132 #define TARGET_MERIT 1.473 133 #endif 134 135 136 137 #include <cstdio> 138 #include <cstdint> 139 #include <cinttypes> 140 #include <cstdlib> 141 #include <cstring> 142 #include <cctype> 143 #include <ctime> 144 #include <climits> 145 146 // This takes a bunch of font information files and creates some C 147 // tables that can be used to access the information rapidly. 148 149 // Well if I compile this with -DCREATE=1 it makes a program that 150 // inspects the raw font metric and kerning files and creates a file 151 // called charmetrics.h containing a packed version of it. Otherwise it 152 // makes the code that inspects those tables and retrieves information. 153 // The "raw" input files are in ".afm" (Adobe Font Metric) format and 154 // the ones I use here were created from some .otf and .ttf font files 155 // using fontforge. The fonts I start from have generous licenses that 156 // permit re-distribution and so even more so I will hold that the 157 // file generated here that contains metrics is not subject to any 158 // severe limits on its use. 159 160 // I need my cuckoo-hashing library. One sane thing to do would be to 161 // link to that as a separately compiled component, but here I will just 162 // include its source! See cuckoo.h and cuckoo.c for commentary and 163 // explanation. 164 165 #include "cuckoo.cpp" 166 167 // I also generate a charmetrics.red that can give access to the same 168 // information from within Reduce... I am considering use from Java as well 169 // but somewhat amazingly Java deals with statically initialised arrays 170 // in a way that puts limits on their size such as to BREAK this - and 171 // so with a naive conversion I get both a "code too large" and a 172 // "too many constants" error from the Java compiler. The first of these 173 // persists even if I split the main hash table to give just one 174 // column at a time - so an initialised array of 10000 simple long values 175 // more than Java is willing to support. Thus any Java code will need to 176 // load metric information from a resource, and I think that C code that 177 // uses the charmetrics.h file as created here can be used to create the 178 // data that will be loaded. Ugh. 179 180 // I have inspected the fonts that concern me and the sizes 181 // shown here will suffice. There are less than 32000 characters in 182 // total defined in all of the fonts I have (odokai is by far 183 // the biggest with over 17K characters defined). There are also less 184 // then 5000 kerning pairs listed. I will in fact have 7 distinct fonts. 185 186 // The code to create the font data tables is very careless and would 187 // be thoroughly succeptible to all sorts of bad effects from buffer overflow 188 // if it were ever to be fed font files other than the ones distributed with 189 // it. Here I follow a path of fixed and unchecked size limits in a search 190 // for code simplicity in a utility I only intend for use in a single 191 // context. But anybody minded to add extra fonts MUST be aware and should 192 // check all the limits before running anything. 193 194 // My code here is also not especially tidy, and it uses C-99 style "//" 195 // comments (as here!) and <stdint.h> with uint64_t, UINT64_C() and their 196 // friends. In fact gcc has supported both of these for some time now and 197 // since this is a run-once sort of program I feel I can rely on using 198 // either a tolerably recent gcc or some other C compiler that supports 199 // C-99. 200 201 202 // "wc -L" tells me that all my font-metric files have lines that are 203 // less than 1750 characters long. The worst case is for the cmuntt font 204 // where thare are a large number of ligatures specified for "space" 205 // followed by various characters (that I believe are probably all the 206 // combining characters present, so that putting a space ahead of any of them 207 // can lead to a conversion to a non-combining variant. 208 209 // Having statically fixed limits here simplifies my coding. 210 211 #define MAXFONTS 16 212 #define MAXCHARS 50000 213 #define MAXKERNS 10000 214 #define MAXLIGATURES 1000 215 #define MAXLINE 2000 216 #define MAXMATHSYMS 200 217 218 #else // CREATE 219 #ifdef TEST 220 #include <cstdio> 221 #include <cstdlib> 222 #include <cstdint> 223 #include <cinttypes> 224 #include "cuckoo.h" 225 #else // TEST 226 #include "headers.h" 227 #include "cuckoo.h" 228 #endif // TEST 229 230 #include "charmetrics.h" // A file I must have created earlier 231 #endif // CREATE 232 233 #define IS_LIGATURE 0x00200000 234 #define IS_BLOCKEND 0x00400000 235 236 // The list of font codes here must be kept in step with the list 237 // of names in the table. 238 239 #define F_cmuntt 0 240 #define F_odokai 1 241 #define F_Regular 2 242 #define F_Bold 3 243 #define F_Italic 4 244 #define F_BoldItalic 5 245 #define F_Math 6 246 #define F_end 7 247 248 // In the ".afm" files and hence the main metrics I work with each 249 // character is measured and positioned relative to a base-line. However 250 // when wxWidgets draws things it uses the top left corner of a bounding 251 // box for text as its reference point. However the "bounding box" used 252 // does not seem to be related to any information I can find in the .afm 253 // file so I file values by running a wxWidgets program and seeing what 254 // it reports and put in a table of values here... 255 256 // To re-calculate these values you could check out a current of 257 // Reduce, configure "--with-csl --with-wx", make wxshowmath and run 258 // wxshowmath on wxdata/fontsizes.dat. The trace output should include 259 //.... Need to process CMU Typewriter Text 260 //.... Gives CMU Typewriter Text with flags 0 261 //.... font[0] = "CMU Typewriter Text" size 10000 262 //.... ( CMU Typewriter Text/10000: 12597.7 2330 [10267.7] 263 //.... 1027, // cmuntt 264 //.... from table baseline offset = 10270 265 //.... convert odokai 266 //.... Need to process AR PL New Kai 267 //.... Gives AR PL New Kai with flags 10000 268 //.... font[1] = "AR PL New Kai" size 10000 269 //.... ( AR Pl New Kai/10000: 10693.4 1210.94 [9482.42] 270 //.... 1055, // odokai 271 // and I extracted the information I need here using 272 // grep " // " wxshowmath.log > DESTINATION 273 // I edited the file to remove a comma after the final entry... 274 // (revisions much before then may not display the relevant 275 // information - ones significantly after may have removed the trace 276 // output...). 277 278 279 // I note with some distress that the adjustments needed here differ 280 // across operating systems. That backs up the fact that this data can not 281 // be deduced from a set of Adobe Font Metrics. 282 // 283 // So I provide three versions of this table (it is not very large) and 284 // a tolerably cheap run-time test can pick which one to use. I feel it is 285 // nicer for my runtime code to check no more than 3 cases to choose between 286 // these tables rather than getting it to measure all the fonts. 287 // 288 // With cslSTIX it seems that the X11 and OS/X measurements match - but 289 // I will nevertheless provide three versions here just in a spirit of 290 // caution. 291 292 const std::uint16_t *chardepth = nullptr; 293 294 const std::uint16_t chardepth_WIN32[] = 295 { 1027, // cmuntt 296 948, // odokai 297 1023, // Regular 298 1023, // Bold 299 1023, // Italic 300 1023, // BoldItalic 301 2566 // Math 302 }; 303 304 const std::uint16_t chardepth_X11[] = 305 { 1027, // cmuntt 306 885, // odokai 307 1023, // Regular 308 1055, // Bold 309 1023, // Italic 310 1004, // BoldItalic 311 2566 // Math 312 }; 313 314 const std::uint16_t chardepth_OSX[] = 315 { 1027, // cmuntt 316 885, // odokai 317 1023, // Regular 318 1055, // Bold 319 1023, // Italic 320 1004, // BoldItalic 321 2566, // Math 322 }; 323 324 const char *fontnames[31] = 325 { "cmuntt", 326 "odokai", 327 "cslSTIX-Regular", 328 "cslSTIX-Bold", 329 "cslSTIX-Italic", 330 "cslSTIX-BoldItalic", 331 "cslSTIXMath-Regular" 332 }; 333 334 // The font metric information I use will be quite bulky, so I will be 335 // trying to balance speed, compactness and simplicity here. 336 // I will only need to support the fonts listed above, so I will 337 // exploit some observed features. The first is that cmuntt and odokai 338 // (almost) only use the Basic Multilingual Plane (ie U+0000 to U+FFFF). 339 // The STIX fonts use nothing in the range U+4000 to U+A000. But then 340 // STIXMath uses code in the range U+1D400 to U+1D800 and U+108000 to 341 // U+108400. 342 // The other STIX fonts have subsets of the same use. So there I will map 343 // U+1Dxxx to U+4xxx and U+108xxx to U+5xxx, having mapped anthing in either 344 // of those ranges to the illegal code U+FFFF. That leaves all codes as 345 // just 16 bits. 346 // Since I have under than 8 fonts I can use 3 bits to indicate a font. 347 // and the squashed 16-bit "remapped codepoint" plus 3 bits of font leaves 348 // me needing 19 bits in all. 349 // My hash table format could cope even if I needed up to 21-bits of 350 // full key... so I have some slack available if needed (eg if I wanted to 351 // support more fonts). 352 353 static int pack_character(int font, int codepoint) 354 { 355 // The cases that apply here are 356 // cmuntt U+10144 - U+10147 357 // The above are the only characters present in 358 // cmuntt that have codes over U+ffff. 359 // cslSTIX* U+1d4xxx to U+4xxx 360 // U+108xxx to U+5xxx 361 if (font < 2) 362 { if ((codepoint & 0xd800) == 0xd800) codepoint = 0xffff; 363 else if (codepoint >= 0x10000) 364 { if (codepoint <= 0x107ff) codepoint = 0xd800 + 365 (codepoint & 0x7ff); 366 else codepoint = 0xffff; 367 } 368 } 369 else if (codepoint >= 0x4000 && 370 codepoint < 0x8000) codepoint = 0xffff; 371 else if (codepoint >= 0x1d000 && codepoint <= 0x1dfff) 372 codepoint = 0x4000 + (codepoint & 0xfff); 373 else if (codepoint >= 0x108000 && codepoint <= 0x108fff) 374 codepoint = 0x5000 + (codepoint & 0xfff); 375 else if (codepoint >= 0x10000) codepoint = 0xffff; 376 // I need the bottom two bits of this packed code to be the bottom 377 // two bits of the codepoint because my hash table will be using 378 // buckets of four adjacent codepoints. 379 return (font << 16) | codepoint; 380 } 381 382 // I will store information in a hash table that puts four codepoints per 383 // "hash table line". Each hash table entry will involve five 64-bit words. 384 // The first will contain the key and some information to help with kerning 385 // and ligatures. Because I will have a line size of 4 I only need to use 19 386 // bits of key. That leaves me with space to put four 11-bit kern entries 387 // in, one for each of the 4 codepoints covered. There will be a 16-entry 388 // table indexed by font that gives a value to be added to one of these 389 // offsets. That allows for up to 2048 kern entries per font. I count 1016 390 // kern declarations and 16 ligature declarations for STIX-Regular 391 // and those together add up to 1032 which fits reasonably. 392 // 393 // I assessed having individual entries is the hash table and line sizes 394 // of 2 and 8 as well as 4. A line size of 8 saves a small amount of space 395 // but at the cost of seeming noticably messier. A line size of only 2 396 // consumes distinctly more memory. 397 // 398 // The other four 64-bit values each hold four 13-bit fields and one 12 399 // bit one. These store the character width and its bounding box. For my 400 // fonts I observe 401 // 0 <= width <= 3238 use 13 bits unsigned 402 // -998 <= llx <= 929 use 13 bits unsigned offset by -3000 403 // -524 <= lly <= 843 use *12* bits unsigned offset by -1000 404 // -234 <= urx <= 3238 use 13 bits unsigned offset by -500 405 // -141 <= ury <= 1055 use 13 bits unsigned offset by -1000 406 // that packing is a bit ugly but ends up using exactly 64 bits which is 407 // really convenient. 408 409 // When a character starts a kern I have a 11 bit index value that gets added 410 // to a further value that depends on the font to give an offset into a table 411 // of 32-bit words. Each word contains 412 // last item mark 1 bit set if this is the last item of 413 // kern information associated with the current 414 // lead character. 415 // kern-or-ligature 1 bit flag set if this is ligature not kern data. 416 // successor char 21-bit codepoint for a successor character. Note 417 // that this will always be in the same font. The 418 // codepoint is stored plain here with no compression 419 // or adjustment. 420 // offset for kerning this is a 9-bit signed adjustment 421 // to make the spacing. For a ligature it is a 422 // 9 bit offset into a table of 21-bit codepoints 423 // for replacement characters. I only seem to see 424 // 77 cases of ligatures in all so this final 425 // table is not too bulky. 426 // The kern index values atart at 1 so that a kern index of zero can 427 // indicate no kerning is needed. The range of kern adjustment I see in my 428 // fonts is -149 to 87, and so the 9-bit field I have (coping with -256 to 429 // +255) will suffice comfortably. 430 // 431 432 #ifdef CREATE 433 434 // In the official full list of Unicode names there are some remarkably 435 // long names used, of which maybe the worst is 436 // "CLOCKWISE RIGHTWARDS AND LEFTWARDS OPEN CIRCLE ARROWS WITH 437 // CIRCLED ONE OVERLAY" 438 // however the fonts I use here all have embedded names that are reasonably 439 // short. I would detect it if any were longer than 120 characters and stop. 440 // If that happened I would merely increase MAXUNILEN here. The names present 441 // while processing fonts here are purely local to the treatment here (they 442 // are used to link kerning tables). 443 444 #define MAXUNILEN 120 445 446 static int charcount = 0; 447 static int fontkey[MAXCHARS]; 448 static int32_t codepoint[MAXCHARS]; 449 static int mainkeycount; 450 static uint32_t mainkey[MAXCHARS]; 451 static int32_t width[MAXCHARS]; 452 static int32_t llx[MAXCHARS]; 453 static int32_t lly[MAXCHARS]; 454 static int32_t urx[MAXCHARS]; 455 static int32_t ury[MAXCHARS]; 456 static char uninames[MAXCHARS][MAXUNILEN]; 457 static int kernreference[MAXCHARS]; 458 459 // For ligature information I will store the identity of the 460 // start character and then the names of the follower and the 461 // replacement. 462 463 static int nligatures = 0; 464 static int ligfont[MAXLIGATURES]; 465 static int32_t ligstart[MAXLIGATURES]; 466 static char ligfollow[MAXLIGATURES][MAXUNILEN]; 467 static char ligreplacement[MAXLIGATURES][MAXUNILEN]; 468 static int32_t ligfollowcode[MAXLIGATURES]; 469 static int32_t ligreplacementcode[MAXLIGATURES]; 470 471 // For kerning information I will store the identity of the 472 // start and follow characters and the integer adjustment to be made. 473 474 static int nkerns = 0; 475 static int kernfont[MAXKERNS]; 476 static char kernstart[MAXKERNS][MAXUNILEN]; 477 static char kernfollow[MAXKERNS][MAXUNILEN]; 478 static int kernadjustment[MAXKERNS]; 479 static int32_t kernstartcode[MAXKERNS]; 480 static int32_t kernfollowcode[MAXKERNS]; 481 482 static int kernp = 0; 483 static std::int16_t fontkern[F_end]; 484 static uint32_t kerntable[MAXKERNS]; 485 static char ktstart[MAXKERNS][MAXUNILEN]; 486 static char ktfollow[MAXKERNS][MAXUNILEN]; 487 static int ktadjustment[MAXKERNS]; 488 static char ktfont[MAXKERNS][32]; 489 static int ktfontn[MAXKERNS]; 490 491 static int ligp = 0; 492 static uint32_t ligtable[MAXLIGATURES]; 493 static char ltfirst[MAXLIGATURES][MAXUNILEN], 494 ltfollow[MAXLIGATURES][MAXUNILEN], 495 ltname[MAXLIGATURES][MAXUNILEN], 496 ltfont[MAXLIGATURES][32]; 497 498 static int accentp = 0; 499 static char accentname[MAXMATHSYMS][MAXUNILEN]; 500 static uint32_t accentnum[MAXMATHSYMS]; 501 static int32_t accentval[MAXMATHSYMS]; 502 503 static int variantp = 0; 504 static int variantdirection[MAXMATHSYMS]; 505 static char variantname[MAXMATHSYMS][MAXUNILEN]; 506 static char 507 v1[MAXMATHSYMS][MAXUNILEN]; // size 1 (just bigger than basic) 508 static char v2[MAXMATHSYMS][MAXUNILEN]; // size 2 509 static char v3[MAXMATHSYMS][MAXUNILEN]; // size 3 510 static char v4[MAXMATHSYMS][MAXUNILEN]; // size 4 511 static char v5[MAXMATHSYMS][MAXUNILEN]; // size 5 (biggest) 512 static char P1[MAXMATHSYMS][MAXUNILEN]; // top top top 513 static char 514 P2[MAXMATHSYMS][MAXUNILEN]; // extension extension extension 515 static char P3[MAXMATHSYMS][MAXUNILEN]; // middle bottom 516 static char P4[MAXMATHSYMS][MAXUNILEN]; // extension 517 static char P5[MAXMATHSYMS][MAXUNILEN]; // bottom 518 // Now the same converted to codepoints rather than names. 519 static int32_t variantcode[MAXMATHSYMS]; 520 static int32_t 521 nv1[MAXMATHSYMS]; // size 1 (just bigger than basic) 522 static int32_t nv2[MAXMATHSYMS]; // size 2 523 static int32_t nv3[MAXMATHSYMS]; // size 3 524 static int32_t nv4[MAXMATHSYMS]; // size 4 525 static int32_t nv5[MAXMATHSYMS]; // size 5 (biggest) 526 static int32_t np1[MAXMATHSYMS]; // bottom piece 527 static int32_t np2[MAXMATHSYMS]; // extension 528 static int32_t np3[MAXMATHSYMS]; // middle piece 529 static int32_t np4[MAXMATHSYMS]; // extension 530 static int32_t np5[MAXMATHSYMS]; // top piece 531 static int vdata1[MAXMATHSYMS][4]; // start end full flag 532 static int vdata2[MAXMATHSYMS][4]; 533 static int vdata3[MAXMATHSYMS][4]; 534 static int vdata4[MAXMATHSYMS][4]; 535 static int vdata5[MAXMATHSYMS][4]; 536 537 538 // It will be necessary at times to look up a name given its name. I will do 539 // that with a crude linear search because I do not expect this to be a 540 // performance-limiting part of this whole program. 541 542 int32_t decodename(int fontnum, const char *name) 543 { int i; 544 for (i=0; i<charcount; i++) 545 if (fontnum == fontkey[i] && 546 std::strcmp(name, uninames[i]) == 0) 547 return codepoint[i]; 548 std::printf("Character called %s not found in font %d\n", name, 549 fontnum); 550 std::exit(EXIT_FAILURE); 551 } 552 553 // The hash table will end up holding information about around 32000 554 // characters. It is arranged in lines each of which store data on 555 // four characters, so assuming few isolated character codes that 556 // means it will use around 8000 entries. I make its size a prime. 557 // If I make it 10091 then my tables are fairly full - around 99.5%. 558 // The hashing scheme I use will guarantee that not many probes are 559 // needed even at this high loading level!!! 560 561 #define MAXCHAR_METRICS_TABLE_SIZE 20000 562 563 // This is around 400 Kbytes... I tend to count that as quite large. 564 // By far the largest contribution to it is data from odokai.afm. 565 // Although almost all characters there are specified with a width of 1000 566 // the character bounding boxes are all individual and varied, and so trying 567 // to save space by having an index of bounding boxes does not appear to be 568 // useful. 569 570 static uint64_t hashtable[MAXCHAR_METRICS_TABLE_SIZE][5]; 571 // The following smaller array is used with the Hungarian algorithm... 572 static uint32_t uint32hashtable[MAXCHAR_METRICS_TABLE_SIZE]; 573 574 static int main_importance(uint32_t key) 575 { int font = key >> 16; 576 if (font != F_cmuntt && font != F_Math) return CUCKOO_STANDARD; 577 key &= 0xffff; 578 // codepoints U+0000 to U+007f have a very special status, and 579 // I will insist that they are always processed in one probe. 580 if ((key & 0xffff) < 0x80/4) return CUCKOO_VITAL; 581 // The fixed pitch font used for most input and the Maths font used for 582 // most output will be encouraged to use at most two probes. 583 if (font == F_cmuntt) 584 { if (key < 0x0600/4 || 585 (0x2000/4 <= key && key < 0x3000/4) || 586 (0xfb00/4 <= key && key < 0xfc00/4)) return CUCKOO_IMPORTANT; 587 } 588 else if (font == F_Math) 589 { if (key < 0x0600/4 || 590 (0x2000/4 <= key && key < 0x2400/4) || 591 (0x2900/4 <= key && key < 0x2c00/4) || 592 (0x4000/4 <= key && key < 0x6000/4) || 593 (0xfb00/4 <= key && key < 0xfc00/4)) return CUCKOO_IMPORTANT; 594 } 595 // Other characters get standard treatment and are allowed up to three probes. 596 return CUCKOO_STANDARD; 597 } 598 599 static uint32_t main_get(void *p) 600 { 601 // The FULL key may be up to 21-bits but because I then have hash-table lines 602 // with 4 items in I only use 19-bit keys here. In reality with the packing 603 // scheme used at present I only ise 19-bit full keys and hence 17 bits here, 604 // so there are two bits available for future expansion if necessary. 605 return *(uint32_t *)p & 0x0007ffff; 606 } 607 static void main_set(void *p, uint32_t key) 608 { *(uint32_t *)p = (*(uint32_t *)p & 0xfff80000) | 609 (key & 0x0007ffff); 610 } 611 612 613 // I will have a separate hash table to map cslSTIXMath-Regular characters 614 // onto signed 10-bit values that give information about the horizontal 615 // placement for accents to be places above characters. This is just for 616 // cstSTIXMath-Regular because that is the only font that I am using that 617 // has this information embedded within it. Both key and value can pack 618 // into a 32-bit integere here. 619 620 #define MAXTOPCENTRESIZE 500 621 static int32_t topcentre[MAXTOPCENTRESIZE]; 622 623 624 static int accent_importance(uint32_t key) 625 { return CUCKOO_IMPORTANT; 626 } 627 628 629 static uint32_t accent_get(void *p) 630 { return *(uint32_t *)p; 631 } 632 static void accent_set(void *p, uint32_t key) 633 { *(uint32_t *)p = key; 634 } 635 636 637 // Another table will take characters to "larger variants". So for instance 638 // a left parenthesis will have five gradually larger versions. 639 640 static int variantsize = MAXMATHSYMS; 641 static uint32_t variant_table[MAXMATHSYMS][6]; 642 643 static int variant_importance(uint32_t key) 644 { return CUCKOO_STANDARD; 645 } 646 647 static uint32_t variant_get(void *p) 648 { return *(uint32_t *)p; 649 } 650 static void variant_set(void *p, uint32_t key) 651 { *(uint32_t *)p = key; 652 } 653 654 // Yet another is for the ways to build up huge symbols out of multiple 655 // glyphs. 656 657 static int extensionsize = MAXMATHSYMS; 658 static uint32_t extension_table[MAXMATHSYMS][11]; 659 660 static int extension_importance(uint32_t key) 661 { return CUCKOO_STANDARD; 662 } 663 664 static uint32_t extension_get(void *p) 665 { return *(uint32_t *)p; 666 } 667 static void extension_set(void *p, uint32_t key) 668 { *(uint32_t *)p = key; 669 } 670 671 672 static char line[MAXLINE]; 673 static char saveline[MAXLINE]; 674 static char segment[MAXLINE]; 675 676 int maxw=-10000, maxllx=-10000, maxlly=-10000, maxurx=-10000, 677 maxury=-10000; 678 int minw=10000, minllx=10000, minlly=10000, minurx=10000, 679 minury=10000; 680 681 int main(int argc, char *argv[]) 682 { const char *f; 683 int pass, fontnum, best; 684 char *p, *q; 685 int relevant = 0; 686 int kerndata = 0; 687 int topaccent = 0; 688 int variant = 0; 689 std::FILE *src; 690 std::time_t ttt; 691 char filename[100]; 692 int i, probes = 0, p1 = 0, p2 = 0, n1 = 0, n2 = 0, 693 occupancy = 0, fail, qq; 694 CREATEMUTEX; 695 CREATELOGMUTEX; 696 #ifndef _WIN32 697 pthread_mutex_lock(&condmutex); 698 #endif 699 std::setvbuf(stdout, nullptr, _IONBF, 1); 700 //========================================================================== 701 // (1) Read in all the metrics 702 //========================================================================== 703 nkerns = charcount = 0; 704 // I will map characters from u+000000 to u+01ffff but not beyond - that 705 // way I will only need 17 bits to specify a codepoint. 706 for (fontnum=0; fontnum<F_end; fontnum++) 707 { f = fontnames[fontnum]; 708 std::printf("Process font %s\n", f); 709 relevant = kerndata = topaccent = variant = 0; 710 std::sprintf(filename, "wxfonts/metrics/%s.afm", f); 711 if ((src = std::fopen(filename, "r")) == nullptr) 712 { std::printf("Unable to access %s\n", filename); 713 std::exit(EXIT_FAILURE); 714 } 715 for (;;) 716 { int ia, ib, ic, id; 717 int32_t cp, wid, bb1, bb2, bb3, bb4; 718 char unn[MAXLINE], lig1[MAXLINE], lig2[MAXLINE]; 719 cp = -1; 720 wid = bb1 = bb2 = bb3 = bb4 = 0; 721 unn[0] = lig1[0] = lig2[0] = 0; 722 ia = ib = ic = id = 0; 723 if (std::fgets(line, sizeof(line)-1, src) == nullptr) break; 724 if (std::strncmp(line, "EndFontMetrics", 14) == 0) break; 725 ia = static_cast<int>(std)::strlen(line); 726 while (ia >= 0 && 727 (line[ia] == 0 || line[ia] == '\n' || line[ia] == '\r')) 728 ia--; 729 line[ia+1] = 0; // discard final newline 730 if (ia == 0) break; 731 std::strcpy(saveline, line); 732 if (std::strncmp(line, "StartCharMetrics", 16) == 0) 733 { relevant = 1; 734 continue; 735 } 736 if (std::strncmp(line, "EndCharMetrics", 14) == 0) 737 { relevant = 0; 738 continue; 739 } 740 if (std::strncmp(line, "StartKernPairs", 14) == 0) 741 { kerndata = 1; 742 continue; 743 } 744 if (std::strncmp(line, "EndKernPairs", 12) == 0) 745 { kerndata = 0; 746 continue; 747 } 748 if (std::strncmp(line, "StartTopAccent", 14) == 0) 749 { topaccent = 1; 750 continue; 751 } 752 if (std::strncmp(line, "EndTopAccent", 12) == 0) 753 { topaccent = 0; 754 continue; 755 } 756 if (std::strncmp(line, "StartVariations", 15) == 0) 757 { variant = 1; 758 continue; 759 } 760 if (std::strncmp(line, "EndVariations", 13) == 0) 761 { variant = 0; 762 continue; 763 } 764 if (kerndata) 765 { if (std::sscanf(line, "KPX %s %s %d", lig1, lig2, &ia) == 3) 766 { kernfont[nkerns] = fontnum; 767 std::strcpy(kernstart[nkerns], lig1); 768 std::strcpy(kernfollow[nkerns], lig2); 769 kernadjustment[nkerns] = ia; 770 #if 0 771 std::printf("[%d] %s + %s => %d\n", nkerns, lig1, lig2, ia); 772 #endif 773 nkerns++; 774 } 775 else 776 { std::printf("Dubious kerning data %s\n", line); 777 continue; 778 } 779 continue; 780 } 781 if (topaccent) 782 { if (std::sscanf(line, "N %s ; DX %d", accentname[accentp], 783 &accentval[accentp]) == 2) 784 accentp++; 785 #if 0 786 std::printf("%d: %s\n", accentp, line); 787 #endif 788 continue; 789 } 790 if (variant) 791 { int some = 0; 792 // Variant lines can be horribly long! They start VX or HX for vertical 793 // or horizontal variations. I used code 1 for horizontal, 0 for vertical. 794 if (std::sscanf(line, "VX %s ;", variantname[variantp]) == 1) 795 variantdirection[variantp] = 0; 796 else if (std::sscanf(line, "HX %s ;", variantname[variantp]) == 1) 797 variantdirection[variantp] = 1; 798 else continue; 799 // printf("Variant record %d (%d) for %s\n", variantp, variantdirection[variantp], variantname[variantp]); 800 // before collecting data I zero out all the relevant fields so that 801 // when data is not present I end up in a sane state. 802 v1[variantp][0] = 0; 803 v2[variantp][0] = 0; 804 v3[variantp][0] = 0; 805 v4[variantp][0] = 0; 806 v5[variantp][0] = 0; 807 P1[variantp][0] = 0; 808 P2[variantp][0] = 0; 809 P3[variantp][0] = 0; 810 P4[variantp][0] = 0; 811 P5[variantp][0] = 0; 812 for (i=0; i<4; i++) vdata1[variantp][i] = 0; 813 for (i=0; i<4; i++) vdata2[variantp][i] = 0; 814 for (i=0; i<4; i++) vdata3[variantp][i] = 0; 815 for (i=0; i<4; i++) vdata4[variantp][i] = 0; 816 for (i=0; i<4; i++) vdata5[variantp][i] = 0; 817 p = std::strchr(line, ';'); 818 if (p!=nullptr & std::sscanf(p, "; V1 %s ;", v1[variantp]) == 1) 819 { p = std::strchr(p+1, ';'); 820 some = 1; 821 } 822 if (p!=nullptr & std::sscanf(p, "; V2 %s ;", v2[variantp]) == 1) 823 { p = std::strchr(p+1, ';'); 824 some = 1; 825 } 826 if (p!=nullptr & std::sscanf(p, "; V3 %s ;", v3[variantp]) == 1) 827 { p = std::strchr(p+1, ';'); 828 some = 1; 829 } 830 if (p!=nullptr & std::sscanf(p, "; V4 %s ;", v4[variantp]) == 1) 831 { p = std::strchr(p+1, ';'); 832 some = 1; 833 } 834 if (p!=nullptr & std::sscanf(p, "; V5 %s ;", v5[variantp]) == 1) 835 { p = std::strchr(p+1, ';'); 836 some = 1; 837 } 838 if (p!=nullptr & std::sscanf(p, "; P1 %s %d %d %d %d ;", 839 P1[variantp], 840 &vdata1[variantp][0], &vdata1[variantp][1], 841 &vdata1[variantp][2], &vdata1[variantp][3]) == 5) 842 { p = std::strchr(p+1, ';'); 843 some = 1; 844 } 845 if (p!=nullptr & std::sscanf(p, "; P2 %s %d %d %d %d ;", 846 P2[variantp], 847 &vdata2[variantp][0], &vdata2[variantp][1], 848 &vdata2[variantp][2], &vdata2[variantp][3]) == 5) 849 { p = std::strchr(p+1, ';'); 850 some = 1; 851 } 852 if (p!=nullptr & std::sscanf(p, "; P3 %s %d %d %d %d ;", 853 P3[variantp], 854 &vdata3[variantp][0], &vdata3[variantp][1], 855 &vdata3[variantp][2], &vdata3[variantp][3]) == 5) 856 { p = std::strchr(p+1, ';'); 857 some = 1; 858 } 859 if (p!=nullptr & std::sscanf(p, "; P4 %s %d %d %d %d ;", 860 P4[variantp], 861 &vdata4[variantp][0], &vdata4[variantp][1], 862 &vdata4[variantp][2], &vdata4[variantp][3]) == 5) 863 { p = std::strchr(p+1, ';'); 864 some = 1; 865 } 866 if (p!=nullptr & std::sscanf(p, "; P5 %s %d %d %d %d ;", 867 P5[variantp], 868 &vdata5[variantp][0], &vdata5[variantp][1], 869 &vdata5[variantp][2], &vdata5[variantp][3]) == 5) 870 { p = std::strchr(p+1, ';'); 871 some = 1; 872 } 873 if (some) 874 { 875 #if 0 876 std::printf("%d: (%d) %s\n", variantp, 877 variantdirection[variantp], variantname[variantp]); 878 std::printf(" sizes: %s %s %s %s %s\n", 879 v1[variantp], v2[variantp], 880 v3[variantp], v4[variantp], 881 v5[variantp]); 882 if (P1[variantp] != 0) std::printf(" huge1: %s %d %d %d %d\n", 883 P1[variantp], vdata1[variantp][0], 884 vdata1[variantp][1], vdata1[variantp][2], 885 vdata1[variantp][3]); 886 if (P2[variantp] != 0) std::printf(" huge2: %s %d %d %d %d\n", 887 P2[variantp], vdata2[variantp][0], 888 vdata2[variantp][1], vdata2[variantp][2], 889 vdata2[variantp][3]); 890 if (P3[variantp] != 0) std::printf(" huge3: %s %d %d %d %d\n", 891 P3[variantp], vdata3[variantp][0], 892 vdata3[variantp][1], vdata3[variantp][2], 893 vdata3[variantp][3]); 894 if (P4[variantp] != 0) std::printf(" huge4: %s %d %d %d %d\n", 895 P4[variantp], vdata4[variantp][0], 896 vdata4[variantp][1], vdata4[variantp][2], 897 vdata4[variantp][3]); 898 if (P5[variantp] != 0) std::printf(" huge5: %s %d %d %d %d\n", 899 P5[variantp], vdata5[variantp][0], 900 vdata5[variantp][1], vdata5[variantp][2], 901 vdata5[variantp][3]); 902 #endif 903 variantp++; 904 } 905 continue; 906 } 907 if (relevant == 0) continue; 908 // Now line contains character information. This may include 909 // C nnn decimal code point 910 // WX xxx hexacedimal width 911 // N word unicode character name, needed for kern tables 912 // B nn nn nn nn character bounding box 913 // L word word ligature specification 914 // with each of these separated by a semicolon. 915 p = line; 916 while (p != nullptr) 917 { q = std::strchr(p, ';'); 918 if (q != nullptr) *q = 0; 919 // Process segment starting at p 920 while (*p == ' ' || *p == '\n' || *p == '\r') p++; 921 if (*p == 0) break; // empty segment 922 switch (*p) 923 { case 'C': 924 if (std::sscanf(p, "C %d", &ia) != 1) 925 { std::printf("Bad segment \"%s\" in .afm file\n", p); 926 std::exit(EXIT_FAILURE); 927 } 928 cp = ia; 929 break; 930 case 'W': 931 if (std::sscanf(p, "WX %d", &ia) != 1) 932 { std::printf("Bad segment \"%s\" in .afm file\n", p); 933 std::exit(EXIT_FAILURE); 934 } 935 wid = ia; 936 if (wid > maxw) maxw = wid; 937 if (wid < minw) minw = wid; 938 break; 939 case 'N': 940 if (std::sscanf(p, "N %s", unn) != 1) 941 { std::printf("Bad segment \"%s\" in .afm file\n", p); 942 std::exit(EXIT_FAILURE); 943 } 944 if (std::strlen(unn) >= MAXUNILEN) 945 { std::printf("Unicode name length = %d\n", 946 static_cast<int>(std)::strlen(unn)); 947 std::printf("%d: %s\n", static_cast<int>(std)::strlen(unn), unn); 948 std::exit(EXIT_FAILURE); 949 } 950 if (cp == -1) 951 { if (std::sscanf(unn, "u%x", &ia) == 1) cp = ia; 952 else if (std::sscanf(unn, "uni%x", &ia) == 1) cp = ia; 953 else if (std::strcmp(unn, ".notdef") != 0) 954 std::printf("Dodgy character: %s\n", saveline); 955 } 956 break; 957 case 'B': 958 if (std::sscanf(p, "B %d %d %d %d", &ia, &ib, &ic, &id) != 4) 959 { std::printf("Bad segment \"%s\" in .afm file\n", p); 960 std::exit(EXIT_FAILURE); 961 } 962 bb1 = ia; bb2 = ib; bb3 = ic; bb4 = id; 963 if (bb1 > maxllx) maxllx = bb1; 964 if (bb1 < minllx) minllx = bb1; 965 if (bb2 > maxlly) maxlly = bb2; 966 if (bb2 < minlly) minlly = bb2; 967 if (bb3 > maxurx) maxurx = bb3; 968 if (bb3 < minurx) minurx = bb3; 969 if (bb4 > maxury) maxury = bb4; 970 if (bb4 < minury) minury = bb4; 971 break; 972 case 'L': 973 // All I can do with ligature information on a first pass is to record 974 // it rather literally. That is because it may contain forward references 975 // to character names. 976 if (std::sscanf(p, "L %s %s", lig1, lig2) != 2) 977 { std::printf("Bad segment \"%s\" in .afm file\n", p); 978 std::exit(EXIT_FAILURE); 979 } 980 // I observe some redundant ligature statements in the font metrics I use, 981 // so that the same information appears twice in a row. I filter that 982 // case out here. 983 if (nligatures == 0 || 984 fontnum != ligfont[nligatures-1] || 985 cp != ligstart[nligatures-1] || 986 std::strcmp(lig1, ligfollow[nligatures-1]) != 0) 987 { ligfont[nligatures] = fontnum; 988 ligstart[nligatures] = cp; 989 std::strcpy(ligfollow[nligatures], lig1); 990 std::strcpy(ligreplacement[nligatures], lig2); 991 nligatures++; 992 } 993 break; 994 case 0: 995 break; 996 default: 997 std::printf("Unknown segment \"%s\" in .afm file\n", p); 998 std::printf("Input line: \"%s\"\n", saveline); 999 std::exit(EXIT_FAILURE); 1000 } 1001 if (q == nullptr) break; 1002 else p = q+1; 1003 } 1004 // The information I now have is 1005 // fontnum, cp key 1006 // wid, bb1, bb2, bb3, bb4, unn data 1007 if (cp < 0 || cp > 0x10ffff) 1008 { if (std::strcmp(unn, ".notdef") != 0) 1009 std::printf("Discarding character <%s>" 1010 " with codepoint %#x = %d\n", 1011 unn, cp, cp); 1012 continue; 1013 } 1014 if (cp >= 0xd000 && cp < 0xe000) 1015 std::printf("Codepoint %d U+%x noted : probably invalid in %s\n", 1016 cp, cp, f); 1017 // Note that cmuntt has 4 characters beyond the basic multilingual pane - 1018 // for GREEK ACROPHONIC ATTIC FIFTY etc at U+10144. They look like capitals 1019 // Delta, H, X and M each with a border to left, right and top. I will 1020 // detect these here but then not support their use! 1021 if (cp > 0xffff && 1022 !(cp >= 0x1d000 && cp < 0x1e000) && 1023 !(cp >= 0x108000 && cp < 0x109000)) 1024 std::printf("Codepoint %d U+%x noted : probably invalid in %s\n", 1025 cp, cp, f); 1026 fontkey[charcount] = fontnum; 1027 codepoint[charcount] = cp; 1028 width[charcount] = wid; 1029 std::strcpy(uninames[charcount], unn); 1030 llx[charcount] = bb1; 1031 lly[charcount] = bb2; 1032 urx[charcount] = bb3; 1033 ury[charcount] = bb4; 1034 charcount++; 1035 } 1036 std::fclose(src); 1037 } 1038 std::printf("About to resolve kern and ligature names\n"); 1039 std::printf("nkerns = %d nligatures = %d\n", nkerns, nligatures); 1040 for (i=0; i<nkerns; i++) 1041 kernstartcode[i] = decodename(kernfont[i], kernstart[i]); 1042 for (i=0; i<nkerns; i++) 1043 kernfollowcode[i] = decodename(kernfont[i], kernfollow[i]); 1044 for (i=0; i<nligatures; i++) 1045 ligfollowcode[i] = decodename(ligfont[i], ligfollow[i]); 1046 for (i=0; i<nligatures; i++) 1047 ligreplacementcode[i] = decodename(ligfont[i], ligreplacement[i]); 1048 1049 // Now I will try to do something about the topcentre table... 1050 for (i=0; i<accentp; i++) 1051 accentnum[i] = decodename(F_Math, accentname[i]); 1052 std::printf("Accent position tables processed\n"); 1053 for (i=0; i<variantp; i++) 1054 { variantcode[i] = decodename(F_Math, variantname[i]) | 1055 (variantdirection[i] << 21); 1056 if (v1[i][0] != 0) nv1[i] = decodename(F_Math, v1[i]); 1057 else nv1[i] = 0; 1058 if (v2[i][0] != 0) nv2[i] = decodename(F_Math, v2[i]); 1059 else nv2[i] = 0; 1060 if (v3[i][0] != 0) nv3[i] = decodename(F_Math, v3[i]); 1061 else nv3[i] = 0; 1062 if (v4[i][0] != 0) nv4[i] = decodename(F_Math, v4[i]); 1063 else nv4[i] = 0; 1064 if (v5[i][0] != 0) nv5[i] = decodename(F_Math, v5[i]); 1065 else nv5[i] = 0; 1066 if (P1[i][0] != 0) np1[i] = decodename(F_Math, P1[i]); 1067 else np1[i] = 0; 1068 if (P2[i][0] != 0) np2[i] = decodename(F_Math, P2[i]); 1069 else np2[i] = 0; 1070 if (P3[i][0] != 0) np3[i] = decodename(F_Math, P3[i]); 1071 else np3[i] = 0; 1072 if (P4[i][0] != 0) np4[i] = decodename(F_Math, P4[i]); 1073 else np4[i] = 0; 1074 if (P5[i][0] != 0) np5[i] = decodename(F_Math, P5[i]); 1075 else np5[i] = 0; 1076 } 1077 std::printf("Larger symbols tables processed\n"); 1078 1079 // Now I have read everything. 1080 // 1081 // Before I fill in the main hash table I need to collect kern and ligature 1082 // information. 1083 kernp = ligp = 0; 1084 for (fontnum=0; fontnum<F_end; fontnum++) 1085 { fontkern[fontnum] = kernp-1; 1086 for (i=0; i<charcount; i++) 1087 { int j, v = 0, kkk = 0; 1088 // I wish to process all chars from each font in order. In fact they will 1089 // be in my table that way, but I still code things to scan once for each font. 1090 if (fontkey[i] != fontnum) continue; 1091 // Now I will transfer any ligature and kern info about this character 1092 // into kerntable & ligtable. 1093 for (j=0; j<nkerns; j++) 1094 { if (kernfont[j] == fontnum && 1095 kernstartcode[j] == codepoint[i]) 1096 { std::strcpy(ktstart[kernp], kernstart[j]); 1097 std::strcpy(ktfollow[kernp], kernfollow[j]); 1098 ktadjustment[kernp] = kernadjustment[j]; 1099 std::strcpy(ktfont[kernp], fontnames[fontnum]); 1100 ktfontn[kernp] = fontnum; 1101 // kkk will be the index in the kernel tables of the FIRST item 1102 // relating to this start character. It has 0x80000000 forced in so 1103 // that it is a nonzero value even if the kernel table index is zero. 1104 if (kkk == 0) kkk = kernp | 0x80000000; 1105 kerntable[kernp++] = 1106 (kernadjustment[j]<<23) | kernfollowcode[j]; 1107 v = 1; 1108 } 1109 } 1110 for (j=0; j<nligatures; j++) 1111 { if (ligfont[j] == fontnum && 1112 ligstart[j] == codepoint[i]) 1113 { std::strcpy(ktstart[kernp], uninames[i]); 1114 std::strcpy(ktfollow[kernp], ligfollow[j]); 1115 ktadjustment[kernp] = 9999; 1116 std::strcpy(ktfont[kernp], fontnames[fontnum]); 1117 ktfontn[kernp] = fontnum; 1118 if (kkk == 0) kkk = kernp | 0x80000000; 1119 kerntable[kernp++] = 1120 (ligp<<23) | IS_LIGATURE | ligfollowcode[j]; 1121 std::strcpy(ltfirst[ligp], uninames[i]); 1122 std::strcpy(ltfollow[ligp], ligfollow[j]); 1123 std::strcpy(ltname[ligp], ligreplacement[j]); 1124 std::strcpy(ltfont[ligp], fontnames[fontnum]); 1125 ligtable[ligp++] = ligreplacementcode[j]; 1126 v = 1; 1127 } 1128 } 1129 // v was set if I found at least one kern or ligature entry starting 1130 // with this character. Noticing that here means I can look back and 1131 // be certain that the previous block just ended. 1132 if (v && kernp!=0) kerntable[kernp-1] |= IS_BLOCKEND; 1133 kernreference[i] = kkk; 1134 } 1135 } 1136 // Make really certain that the table is terminated. 1137 if (kernp!=0) kerntable[kernp-1] |= IS_BLOCKEND; 1138 1139 std::printf("charcount = %d\n", charcount); 1140 1141 // Well because it will be a cheaper process I will set up the small hash- 1142 // tables for accent placement and large-characters first... 1143 #if 0 1144 for (i=0; i<accentp; i++) 1145 std::printf(" %#.8x,\n", accentnum[i]); 1146 #endif 1147 1148 std::printf("About to do topaccent table creation with %d keys\n", 1149 accentp); 1150 cuckoo_parameters topcentre_r = 1151 cuckoo_binary_optimise( 1152 accentnum, 1153 accentp, 1154 accent_importance, 1155 topcentre, 1156 sizeof(topcentre[0]), 1157 accentp-1, 1158 sizeof(topcentre)/sizeof(topcentre[0]), 1159 accent_get, 1160 accent_set, 1161 1.0); 1162 std::printf("Table size = %d (%d %d)\n", topcentre_r.table_size, 1163 topcentre_r.modulus2, topcentre_r.offset2); 1164 #ifndef DUMMY 1165 std::printf("Now put in accent positions\n"); 1166 for (i=0; i<accentp; i++) 1167 { int w = cuckoo_lookup( 1168 accentnum[i], 1169 topcentre, 1170 sizeof(topcentre[0]), 1171 topcentre_r.table_size, 1172 accent_get, 1173 topcentre_r.modulus2, 1174 topcentre_r.offset2); 1175 if (w == -1) 1176 { std::printf("failure of lookup in topaccent table!\n"); 1177 std::printf("%d: %d/%x\n", i, accentnum[i], accentnum[i]); 1178 for (i=0; i<topcentre_r.table_size; i++) 1179 std::printf("%4d: %x\n", i, topcentre[i]); 1180 std::exit(1); 1181 } 1182 topcentre[w] |= accentval[i] << 21; 1183 } 1184 std::printf("top-centre table set up with %d words for %d chars (%.2f)\n", 1185 topcentre_r.table_size, accentp, 1186 (100.0*accentp)/topcentre_r.table_size); 1187 #endif 1188 1189 cuckoo_parameters variant_r; 1190 int usefulp = 0; 1191 // I will only put characters that actually have variants in here 1192 { uint32_t usefulcode[MAXMATHSYMS]; 1193 for (i=0; i<variantp; i++) 1194 { if (nv1[i] != 0 || nv2[i] != 0 || nv3[i] != 0 || 1195 nv4[i] != 0 || nv5[i] != 0) 1196 usefulcode[usefulp++] = (uint32_t)variantcode[i]; 1197 } 1198 variant_r = cuckoo_binary_optimise( 1199 usefulcode, 1200 usefulp, 1201 variant_importance, 1202 variant_table, 1203 sizeof(variant_table[0]), 1204 usefulp-1, 1205 sizeof(variant_table)/sizeof(variant_table[0]), 1206 variant_get, 1207 variant_set, 1208 0.0); 1209 std::printf("Variant table size = %d (%d %d)\n", variant_r.table_size, 1210 variant_r.modulus2, variant_r.offset2); 1211 } 1212 #ifndef DUMMY 1213 std::printf("Now put in variant info for (, ), [, ] etc.\n"); 1214 for (i=0; i<variantp; i++) 1215 { int w; 1216 // If there are no variants then do not bother! 1217 if (nv1[i] == 0 && nv2[i] == 0 && nv3[i] == 0 && 1218 nv4[i] == 0 && nv5[i] == 0) continue; 1219 w = cuckoo_lookup( 1220 variantcode[i], 1221 variant_table, 1222 sizeof(variant_table[0]), 1223 variant_r.table_size, 1224 variant_get, 1225 variant_r.modulus2, 1226 variant_r.offset2); 1227 if (w == -1) 1228 { std::printf("failure of lookup in variant table!\n"); 1229 std::printf("%d: %d/%x\n", i, variantcode[i], variantcode[i]); 1230 for (i=0; i<variant_r.table_size; i++) 1231 std::printf("%4d: %" PRIx32 "\n", i, variant_table[i][0]); 1232 std::exit(1); 1233 } 1234 // Put the five gradually larger variants of the character in place. These 1235 // are stored in a really simple way since the total amount of data involved 1236 // is not huge. Maybe the only thing to note here is that variable_table[*][0] 1237 // has the codepoint of the basic character with 0x00200000 added in if the 1238 // variants will be for horiziontal use (eg gradually wider circumflex 1239 // accents), rather than for vertical use (eg progressivly taller parentheses). 1240 // When a size is not provided the entry with contain U+0000. 1241 if (variant_table[w][0] != variantcode[i]) 1242 { std::printf("Messed up at line %d\n", __LINE__); 1243 std::exit(1); 1244 } 1245 variant_table[w][1] |= nv1[i]; 1246 variant_table[w][2] |= nv2[i]; 1247 variant_table[w][3] |= nv3[i]; 1248 variant_table[w][4] |= nv4[i]; 1249 variant_table[w][5] |= nv5[i]; 1250 } 1251 std::printf("variant table set up with %d entries for %d chars (%.2f%%)\n", 1252 variant_r.table_size, usefulp, 1253 (100.0*usefulp)/variant_r.table_size); 1254 #endif 1255 1256 cuckoo_parameters extension_r; 1257 usefulp = 0; 1258 // I will only put characters that actually have variants in here 1259 { uint32_t usefulcode[MAXMATHSYMS]; 1260 for (i=0; i<variantp; i++) 1261 { if (np1[i] != 0 || np2[i] != 0 || np3[i] != 0 || 1262 np4[i] != 0 || np5[i] != 0) 1263 usefulcode[usefulp++] = (uint32_t)variantcode[i]; 1264 } 1265 extension_r = cuckoo_binary_optimise( 1266 usefulcode, 1267 usefulp, 1268 extension_importance, 1269 extension_table, 1270 sizeof(extension_table[0]), 1271 usefulp-1, 1272 sizeof(extension_table)/sizeof(extension_table[0]), 1273 extension_get, 1274 extension_set, 1275 0.0); 1276 std::printf("Extension table size = %d (%d %d)\n", 1277 extension_r.table_size, 1278 extension_r.modulus2, extension_r.offset2); 1279 } 1280 #ifndef DUMMY 1281 std::printf("Now put in extension info for (, ), [, ] etc.\n"); 1282 for (i=0; i<variantp; i++) 1283 { int w; 1284 // If there are no extensions then do not bother! 1285 if (np1[i] == 0 && np2[i] == 0 && np3[i] == 0 && 1286 np4[i] == 0 && np5[i] == 0) continue; 1287 w = cuckoo_lookup( 1288 variantcode[i], 1289 extension_table, 1290 sizeof(extension_table[0]), 1291 extension_r.table_size, 1292 extension_get, 1293 extension_r.modulus2, 1294 extension_r.offset2); 1295 if (w == -1) 1296 { std::printf("failure of lookup in extension table!\n"); 1297 std::printf("%d: %d/%x\n", i, variantcode[i], variantcode[i]); 1298 for (i=0; i<extension_r.table_size; i++) 1299 std::printf("%4d: %" PRIx32 "\n", i, extension_table[i][0]); 1300 std::exit(1); 1301 } 1302 // Put the components used to build up huge characters into the table. 1303 // When one is not provided the entry with contain U+0000. 1304 if (extension_table[w][0] != variantcode[i]) 1305 { std::printf("Messed up at line %d\n", __LINE__); 1306 std::exit(1); 1307 } 1308 extension_table[w][1] |= np1[i] | (vdata1[i][0]<<21); 1309 extension_table[w][2] |= vdata1[i][1] | (vdata1[i][2]<<16) | 1310 (vdata1[i][3]<<31); 1311 extension_table[w][3] |= np2[i] | (vdata2[i][0]<<21); 1312 extension_table[w][4] |= vdata2[i][1] | (vdata2[i][2]<<16) | 1313 (vdata2[i][3]<<31); 1314 extension_table[w][5] |= np3[i] | (vdata3[i][0]<<21); 1315 extension_table[w][6] |= vdata3[i][1] | (vdata3[i][2]<<16) | 1316 (vdata3[i][3]<<31); 1317 extension_table[w][7] |= np4[i] | (vdata4[i][0]<<21); 1318 extension_table[w][8] |= vdata4[i][1] | (vdata4[i][2]<<16) | 1319 (vdata4[i][3]<<31); 1320 extension_table[w][9] |= np5[i] | (vdata5[i][0]<<21); 1321 extension_table[w][10]|= vdata5[i][1] | (vdata5[i][2]<<16) | 1322 (vdata5[i][3]<<31); 1323 } 1324 std::printf("extension table set up with %d entries for %d chars (%.2f%%)\n", 1325 extension_r.table_size, usefulp, 1326 (100.0*usefulp)/extension_r.table_size); 1327 #endif 1328 1329 //========================================================================== 1330 // (2) Try inserting everything in to the main metrics hash table 1331 //========================================================================== 1332 1333 // I will remove duplicate keys here first... I will cheerfully use 1334 // a quadratic cost filtering process here because other things are so much 1335 // more expensive. 1336 mainkeycount = 0; 1337 for (i=0; i<charcount; i++) 1338 { int j; 1339 uint32_t k = pack_character(fontkey[i], codepoint[i]) >> 2; 1340 if (k == 0) continue; 1341 for (j=0; j<mainkeycount; j++) 1342 if (k == mainkey[j]) break; 1343 if (j<mainkeycount) continue; // discard repeat key. 1344 mainkey[mainkeycount++] = k; 1345 } 1346 1347 std::printf("About to try to optimise for %d entries\n", 1348 mainkeycount); 1349 1350 // In my case there are 10019 keys to consider, If I do a proper search 1351 // that can take quite a while - say 45 minutes on a reasonably fast desktop 1352 // system. So as I cunning ploy I will first try the parameters that 1353 // are a known solution, and if nothing has changed at all that will 1354 // succeed (very rapidly) and I can use it. If that fails I will drop back 1355 // to the more expensive search 1356 1357 //#define EXPECTED_TABLESIZE 10057 1358 //#define EXPECTED_MODULUS2 8729 1359 //#define EXPECTED_OFFSET2 1108 1360 1361 cuckoo_parameters main_r; 1362 double mm; 1363 std::printf("static uint32_t keys[] = \n{\n "); 1364 for (i=0; i<mainkeycount; i++) 1365 { std::printf("%#8x%s", mainkey[i], (i==(mainkeycount-1)?"":",")); 1366 if (i % 8 == 7) std::printf("\n "); 1367 } 1368 std::printf("\n};\n\n"); 1369 1370 #ifndef DUMMY 1371 // If the Hungarian method shows that there is an assignment with 1372 // exactly my expected parameters that meets my target merit then I 1373 // will just accept it. 1374 if ((mm = find_best_assignment( 1375 mainkey, 1376 mainkeycount, 1377 main_importance, 1378 uint32hashtable, 1379 EXPECTED_TABLESIZE, 1380 EXPECTED_MODULUS2, 1381 EXPECTED_OFFSET2)) > 0.0 && 1382 mm <= TARGET_MERIT) 1383 { main_r.table_size = EXPECTED_TABLESIZE; 1384 main_r.modulus2 = EXPECTED_MODULUS2; 1385 main_r.offset2 = EXPECTED_OFFSET2; 1386 main_r.merit = mm; 1387 // Transfer allocation to the main hash table 1388 for (i=0; i<main_r.table_size; i++) 1389 main_set(&hashtable[i], uint32hashtable[i]); 1390 std::printf("Built-in table parameters successfully used\n"); 1391 } 1392 // Otherwise if there is an assignment at that table size I do not need to 1393 // do elaborate searches to identify it, but I will want to run the Hungarian 1394 // algorithm for all possible values of modulus2 and offset2 at that table 1395 // size. 1396 else if (mm > 0.0) 1397 { main_r.table_size = EXPECTED_TABLESIZE; 1398 main_r.modulus2 = EXPECTED_MODULUS2; 1399 main_r.offset2 = EXPECTED_OFFSET2; 1400 main_r.merit = mm; 1401 } 1402 else 1403 #endif // DUMMY 1404 { main_r = cuckoo_binary_optimise( 1405 mainkey, 1406 mainkeycount, 1407 main_importance, 1408 hashtable, 1409 sizeof(hashtable[0]), 1410 MAIN_LOW, 1411 MAIN_HIGH, 1412 main_get, 1413 main_set, 1414 TARGET_MERIT); 1415 main_r.merit = 4.0; 1416 } 1417 std::printf("Whooeeee! %d %d %d %.2f%% merit=%.4f\n", 1418 main_r.table_size, main_r.modulus2, main_r.offset2, 1419 (100.0*mainkeycount)/main_r.table_size, main_r.merit); 1420 1421 // Now unless the current merit is good enough I will optimise by 1422 // trying all possible hash options at this table size. 1423 1424 if (main_r.merit > TARGET_MERIT) 1425 main_r = try_all_hash_functions( 1426 mainkey, 1427 mainkeycount, 1428 main_importance, 1429 hashtable, 1430 sizeof(hashtable[0]), 1431 main_r.table_size, 1432 main_set, 1433 1); 1434 1435 //===================================================================== 1436 // Now the table should have everything in it and so I can merely fill 1437 // in the actual metric information 1438 //===================================================================== 1439 #ifndef DUMMY 1440 std::printf("\nNow I want to put data into the hash table.\n"); 1441 for (i=0; i<charcount; i++) 1442 { int fullkey = pack_character(fontkey[i], 1443 codepoint[i]); // 20-bit key 1444 int key = fullkey >> 2; // because my hash table has line-size 4 1445 if (codepoint[i] == 0) continue; 1446 int h1; 1447 uint64_t w; 1448 h1 = cuckoo_lookup( 1449 key, 1450 hashtable, 1451 sizeof(hashtable[0]), 1452 main_r.table_size, 1453 main_get, 1454 main_r.modulus2, 1455 main_r.offset2); 1456 if (h1 == -1) 1457 { int j; 1458 std::printf("failure at line %d!\n", __LINE__); 1459 std::printf("Problem with character %d font %d codepoint %d/%x\n", 1460 i, fontkey[i], codepoint[i], codepoint[i]); 1461 std::printf("Full key = %d/%x key = %d/%x\n", 1462 fullkey, fullkey, key, key); 1463 for (j=0; j<main_r.table_size; j++) 1464 std::printf("%7d: %" PRIx64 "\n", j, hashtable[j][0]); 1465 1466 std::exit(1); 1467 } 1468 1469 // Pack and write in the messy information about width and bounding boxes. 1470 w = ((uint64_t)width[i] & 0x1fff) << 51 | 1471 ((uint64_t)(llx[i]+3000) & 0x1fff) << 38 | 1472 ((uint64_t)(lly[i]+1000) & 0x0fff) << 26 | 1473 ((uint64_t)(urx[i]+500) & 0x1fff) << 13 | 1474 ((uint64_t)(ury[i]+1000) & 0x1fff); 1475 hashtable[h1][1+(fullkey&3)] = w; 1476 // Finally merge in an offset to any kern info that might be available 1477 if (kernreference[i] != 0) 1478 { int64_t q = (kernreference[i] & 0x7fffffff)-fontkern[fontkey[i]]; 1479 #if 0 1480 std::printf("Fill in kern ref %d as %d\n", 1481 kernreference[i] & 0x7fffffff, static_cast<int>(q)); 1482 #endif 1483 hashtable[h1][0] |= q << (19+11*(fullkey&3)); 1484 } 1485 } 1486 1487 std::printf("Done after %d characters, %d ligatures, %d kerns\n", 1488 charcount, nligatures, nkerns); 1489 std::printf("width %d %d (%d)\n", minw, maxw, maxw-minw); 1490 std::printf("llx %d %d (%d)\n", minllx, maxllx, maxllx-minllx); 1491 std::printf("lly %d %d (%d)\n", minlly, maxlly, maxlly-minlly); 1492 std::printf("urx %d %d (%d)\n", minurx, maxurx, maxurx-minurx); 1493 std::printf("ury %d %d (%d)\n", minury, maxury, maxury-minury); 1494 1495 std::printf("Total space = %d\n", main_r.table_size*(5*8)); 1496 p1 = 0; 1497 for (i=0; i<main_r.table_size; i++) 1498 { if (hashtable[i][0] != 0) p1++; 1499 } 1500 std::printf("%d of %d entries (%d of %d bytes) used: %.4f\n", 1501 p1, main_r.table_size, 40*p1, 40*main_r.table_size, 1502 static_cast<double>(p1)/static_cast<double>(main_r.table_size)); 1503 1504 { std::FILE *dest = std::fopen("charmetrics.h", "w"); 1505 std::FILE *rdest = std::fopen("charmetrics.red", "w"); 1506 std::FILE *smldest = std::fopen("charmetrics.sml", "w"); 1507 std::fprintf(dest, 1508 "// charmetrics.h Copyright (C) 2017 Codemist\n"); 1509 std::fprintf(dest, "\n"); 1510 std::fprintf(dest, "\n"); 1511 std::fprintf(dest, 1512 "/**************************************************************************\n"); 1513 std::fprintf(dest, 1514 " * Copyright (C) 2017, Codemist. A C Norman *\n"); 1515 std::fprintf(dest, 1516 " * *\n"); 1517 std::fprintf(dest, 1518 " * Redistribution and use in source and binary forms, with or without *\n"); 1519 std::fprintf(dest, 1520 " * modification, are permitted provided that the following conditions are *\n"); 1521 std::fprintf(dest, 1522 " * met: *\n"); 1523 std::fprintf(dest, 1524 " * *\n"); 1525 std::fprintf(dest, 1526 " * * Redistributions of source code must retain the relevant *\n"); 1527 std::fprintf(dest, 1528 " * copyright notice, this list of conditions and the following *\n"); 1529 std::fprintf(dest, 1530 " * disclaimer. *\n"); 1531 std::fprintf(dest, 1532 " * * Redistributions in binary form must reproduce the above *\n"); 1533 std::fprintf(dest, 1534 " * copyright notice, this list of conditions and the following *\n"); 1535 std::fprintf(dest, 1536 " * disclaimer in the documentation and/or other materials provided *\n"); 1537 std::fprintf(dest, 1538 " * with the distribution. *\n"); 1539 std::fprintf(dest, 1540 " * *\n"); 1541 std::fprintf(dest, 1542 " * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS *\n"); 1543 std::fprintf(dest, 1544 " * \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT *\n"); 1545 std::fprintf(dest, 1546 " * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS *\n"); 1547 std::fprintf(dest, 1548 " * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE *\n"); 1549 std::fprintf(dest, 1550 " * COPYRIGHT OWNERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, *\n"); 1551 std::fprintf(dest, 1552 " * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, *\n"); 1553 std::fprintf(dest, 1554 " * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS *\n"); 1555 std::fprintf(dest, 1556 " * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND *\n"); 1557 std::fprintf(dest, 1558 " * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR *\n"); 1559 std::fprintf(dest, 1560 " * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF *\n"); 1561 std::fprintf(dest, 1562 " * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH *\n"); 1563 std::fprintf(dest, 1564 " * DAMAGE. *\n"); 1565 std::fprintf(dest, 1566 " *************************************************************************/\n"); 1567 std::fprintf(dest, "\n"); 1568 std::fprintf(dest, 1569 "// $Id: charmetrics.cpp 5736 2021-03-16 10:41:22Z arthurcnorman $\n"); 1570 std::fprintf(dest, "\n"); 1571 std::fprintf(dest, "\n"); 1572 std::fprintf(dest, "#ifndef __STDC_CONSTANT_MACROS\n"); 1573 std::fprintf(dest, "#define __STDC_CONSTANT_MACROS 1\n"); 1574 std::fprintf(dest, "#endif\n"); 1575 std::fprintf(dest, "\n#include <cstdint>\n\n"); 1576 std::fprintf(dest, 1577 "// Character metric hash table created using the program charmetrics.cpp\n"); 1578 std::fprintf(dest, 1579 "// sourceforge.net/p/reduce-algebra/code/HEAD/tree/trunk/csl/cslbase/wxfontxs\n"); 1580 std::fprintf(dest, 1581 "// contains README files with full credits to the fonts this is used with\n"); 1582 std::fprintf(dest, "\n\n"); 1583 std::fprintf(dest, 1584 "// The list of font codes here must be kept in step with the list\n"); 1585 std::fprintf(dest, "// of names in the table.\n"); 1586 std::fprintf(dest, "\n"); 1587 std::fprintf(dest, "#define F_cmuntt 0\n"); 1588 std::fprintf(dest, "#define F_odokai 1\n"); 1589 std::fprintf(dest, "#define F_Regular 2\n"); 1590 std::fprintf(dest, "#define F_Bold 3\n"); 1591 std::fprintf(dest, "#define F_Italic 4\n"); 1592 std::fprintf(dest, "#define F_BoldItalic 5\n"); 1593 std::fprintf(dest, "#define F_Math 6\n"); 1594 std::fprintf(dest, "#define F_end 7\n"); 1595 std::fprintf(dest, "\n"); 1596 std::fprintf(dest, 1597 "extern int c_width, c_llx, c_lly, c_urx, c_ury, c_kerninfo;\n"); 1598 std::fprintf(dest, 1599 "extern int lookupchar(int fontnum, int codepoint);\n"); 1600 std::fprintf(dest, 1601 "extern int32_t lookupkernandligature(int codepoint);\n"); 1602 std::fprintf(dest, 1603 "extern int32_t lookupkernadjustment(int codepoint);\n"); 1604 std::fprintf(dest, "extern int32_t lookupligature(int codepoint);\n"); 1605 std::fprintf(dest, "extern int accentposition(int codepoint);\n\n"); 1606 std::fprintf(dest, "extern const uint16_t chardepth_WIN32[31];\n"); 1607 std::fprintf(dest, "extern const uint16_t chardepth_X11[31];\n"); 1608 std::fprintf(dest, "extern const uint16_t chardepth_OSX[31];\n"); 1609 std::fprintf(dest, "extern const uint16_t *chardepth;\n"); 1610 std::fprintf(dest, "extern const char *fontnames[31];\n\n"); 1611 std::fprintf(rdest, 1612 "%% Character metrics for the STIX (and some other) fonts...\n"); 1613 std::fprintf(rdest, "\n"); 1614 std::fprintf(rdest, 1615 "%% Character metric hash table created using the program charmetrics.cpp\n"); 1616 std::fprintf(rdest, 1617 "%% sourceforge.net/p/reduce-algebra/code/HEAD/tree/trunk/csl/cslbase/wxfonts\n"); 1618 std::fprintf(rdest, 1619 "%% contains README files with full credits to the fonts this is used with\n"); 1620 std::fprintf(rdest, "%% Author: Arthur Norman\n"); 1621 std::fprintf(rdest, "\n"); 1622 std::fprintf(rdest, 1623 "%% Redistribution and use in source and binary forms, with or without\n"); 1624 std::fprintf(rdest, 1625 "%% modification, are permitted provided that the following conditions are met:\n"); 1626 std::fprintf(rdest, "%%\n"); 1627 std::fprintf(rdest, 1628 "%% * Redistributions of source code must retain the relevant copyright\n"); 1629 std::fprintf(rdest, 1630 "%% notice, this list of conditions and the following disclaimer.\n"); 1631 std::fprintf(rdest, 1632 "%% * Redistributions in binary form must reproduce the above copyright\n"); 1633 std::fprintf(rdest, 1634 "%% notice, this list of conditions and the following disclaimer in the\n"); 1635 std::fprintf(rdest, 1636 "%% documentation and/or other materials provided with the distribution.\n"); 1637 std::fprintf(rdest, "%%\n"); 1638 std::fprintf(rdest, 1639 "%% THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\n"); 1640 std::fprintf(rdest, 1641 "%% AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,\n"); 1642 std::fprintf(rdest, 1643 "%% THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR\n"); 1644 std::fprintf(rdest, 1645 "%% PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNERS OR\n"); 1646 std::fprintf(rdest, "%% CONTRIBUTORS\n"); 1647 std::fprintf(rdest, 1648 "%% BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\n"); 1649 std::fprintf(rdest, 1650 "%% CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF\n"); 1651 std::fprintf(rdest, 1652 "%% SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS\n"); 1653 std::fprintf(rdest, 1654 "%% INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN\n"); 1655 std::fprintf(rdest, 1656 "%% CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)\n"); 1657 std::fprintf(rdest, 1658 "%% ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE\n"); 1659 std::fprintf(rdest, "%% POSSIBILITY OF SUCH DAMAGE.\n"); 1660 std::fprintf(rdest, "%%\n"); 1661 std::fprintf(rdest, "\n"); 1662 std::fprintf(rdest, 1663 "%% Also be aware of the (generally permissive) licenses associated with the\n"); 1664 std::fprintf(rdest, 1665 "%% fonts. Fill README files and license terms for the fonts themselves\n"); 1666 std::fprintf(rdest, "%% are in csl/cslbase/wxfonts.\n"); 1667 std::fprintf(rdest, "\n"); 1668 std::fprintf(rdest, "\n"); 1669 std::fprintf(rdest, 1670 "%% $Id: charmetrics.cpp 5736 2021-03-16 10:41:22Z arthurcnorman $\n"); 1671 std::fprintf(rdest, "\n"); 1672 std::fprintf(rdest, 1673 "#if (or (memq 'psl lispsystem!*) (memq 'jlisp lispsystem!*))\n"); 1674 std::fprintf(rdest, "\n"); 1675 std::fprintf(rdest, 1676 "%% CSL has special vectors that hold just 16-bit integers and 32-bit\n"); 1677 std::fprintf(rdest, 1678 "%% integers and use of those will decrease the amount of memory consumed\n"); 1679 std::fprintf(rdest, 1680 "%% here. However if PSL does not have these it does not matter much since I\n"); 1681 std::fprintf(rdest, "%% can just use ordinary Lisp vectors...\n"); 1682 std::fprintf(rdest, 1683 "%% I set initial contents as all 0 rather than all nil since these are\n"); 1684 std::fprintf(rdest, 1685 "%% supposed to contain (small) integer values.\n"); 1686 std::fprintf(rdest, "\n"); 1687 std::fprintf(rdest, "symbolic procedure mkvect32 n;\n"); 1688 std::fprintf(rdest, " begin\n"); 1689 std::fprintf(rdest, " scalar r;\n"); 1690 std::fprintf(rdest, " r := mkvect n;\n"); 1691 std::fprintf(rdest, " for i := 0:n do putv(r, i, 0);\n"); 1692 std::fprintf(rdest, " return r\n"); 1693 std::fprintf(rdest, " end;\n"); 1694 std::fprintf(rdest, "\n"); 1695 std::fprintf(rdest, 1696 "symbolic inline procedure putv32(v, n, x); putv(v, n, x);\n"); 1697 std::fprintf(rdest, "\n"); 1698 std::fprintf(rdest, 1699 "symbolic inline procedure getv32(v, n); getv(v, n);\n"); 1700 std::fprintf(rdest, "\n"); 1701 std::fprintf(rdest, "symbolic procedure mkvect16 n;\n"); 1702 std::fprintf(rdest, " begin\n"); 1703 std::fprintf(rdest, " scalar r;\n"); 1704 std::fprintf(rdest, " r := mkvect n;\n"); 1705 std::fprintf(rdest, " for i := 0:n do putv(r, i, 0);\n"); 1706 std::fprintf(rdest, " return r\n"); 1707 std::fprintf(rdest, " end;\n"); 1708 std::fprintf(rdest, "\n"); 1709 std::fprintf(rdest, 1710 "symbolic inline procedure putv16(v, n, x); putv(v, n, x);\n"); 1711 std::fprintf(rdest, "\n"); 1712 std::fprintf(rdest, 1713 "symbolic inline procedure getv16(v, n); getv(v, n);\n"); 1714 std::fprintf(rdest, "\n"); 1715 std::fprintf(rdest, "#endif\n"); 1716 std::fprintf(rdest, "\n"); 1717 std::fprintf(rdest, 1718 "put('cmuntt, 'font_number, 0)$\n"); 1719 std::fprintf(rdest, 1720 "put('odokai, 'font_number, 1)$\n"); 1721 std::fprintf(rdest, 1722 "put('Regular, 'font_number, 2)$\n"); 1723 std::fprintf(rdest, 1724 "put('Bold, 'font_number, 3)$\n"); 1725 std::fprintf(rdest, 1726 "put('Italic, 'font_number, 4)$\n"); 1727 std::fprintf(rdest, 1728 "put('BoldItalic, 'font_number, 5)$\n"); 1729 std::fprintf(rdest, 1730 "put('Math, 'font_number, 6)$\n"); 1731 std::fprintf(rdest, "\n"); 1732 std::fprintf(rdest, "symbolic procedure list_to_vec16 l;\n"); 1733 std::fprintf(rdest, " begin\n"); 1734 std::fprintf(rdest, " scalar r, n;\n"); 1735 std::fprintf(rdest, " r := mkvect16 (n := sub1 length l);\n"); 1736 std::fprintf(rdest, " for i := 0:n do <<\n"); 1737 std::fprintf(rdest, " putv16(r, i, car l);\n"); 1738 std::fprintf(rdest, " l := cdr l >>;\n"); 1739 std::fprintf(rdest, " return r\n"); 1740 std::fprintf(rdest, " end;\n"); 1741 std::fprintf(rdest, "\n"); 1742 std::fprintf(rdest, "symbolic procedure list_to_vec32 l;\n"); 1743 std::fprintf(rdest, " begin\n"); 1744 std::fprintf(rdest, " scalar r, n;\n"); 1745 std::fprintf(rdest, " r := mkvect32 (n := sub1 length l);\n"); 1746 std::fprintf(rdest, " for i := 0:n do <<\n"); 1747 std::fprintf(rdest, " putv32(r, i, car l);\n"); 1748 std::fprintf(rdest, " l := cdr l >>;\n"); 1749 std::fprintf(rdest, " return r\n"); 1750 std::fprintf(rdest, " end;\n"); 1751 std::fprintf(rdest, "\n"); 1752 std::fprintf(rdest, 1753 "%% This one will take a list whose elements are themselves lists\n"); 1754 std::fprintf(rdest, "%% of 32-bit integers.\n"); 1755 std::fprintf(rdest, "%%\n"); 1756 std::fprintf(rdest, "symbolic procedure list_to_metric_table l;\n"); 1757 std::fprintf(rdest, " begin\n"); 1758 std::fprintf(rdest, " scalar r, n;\n"); 1759 std::fprintf(rdest, " r := mkvect (n := sub1 length l);\n"); 1760 std::fprintf(rdest, " for i := 0:n do <<\n"); 1761 std::fprintf(rdest, " putv(r, i, list_to_vec32 car l);\n"); 1762 std::fprintf(rdest, " l := cdr l >>;\n"); 1763 std::fprintf(rdest, " return r\n"); 1764 std::fprintf(rdest, " end;\n"); 1765 std::fprintf(rdest, "\n"); 1766 std::fprintf(rdest, 1767 "fluid '(hashsize!* metrics_hash!* topcentre_hash!* variant_hash!* extension_hash!* fontkern!* kerntable!* ligaturetable!*);\n"); 1768 std::fprintf(rdest, "\n"); 1769 std::fprintf(rdest, "symbolic (hashsize!* := %d);\n", 1770 main_r.table_size); 1771 std::fprintf(rdest, "\n"); 1772 std::fprintf(smldest, 1773 "(* Character metrics for the STIX (and some other) fonts...\n"); 1774 std::fprintf(smldest, "\n"); 1775 std::fprintf(smldest, 1776 "Character metric hash table created using the program charmetrics.cpp\n"); 1777 std::fprintf(smldest, 1778 "sourceforge.net/p/reduce-algebra/code/HEAD/tree/trunk/csl/cslbase/wxfonts\n"); 1779 std::fprintf(smldest, 1780 "contains README files with full credits to the fonts this is used with\n"); 1781 std::fprintf(smldest, "Author: Arthur Norman\n"); 1782 std::fprintf(smldest, "\n"); 1783 std::fprintf(smldest, 1784 "Redistribution and use in source and binary forms, with or without\n"); 1785 std::fprintf(smldest, 1786 "modification, are permitted provided that the following conditions are met:\n"); 1787 std::fprintf(smldest, "\n"); 1788 std::fprintf(smldest, 1789 " * Redistributions of source code must retain the relevant copyright\n"); 1790 std::fprintf(smldest, 1791 " notice, this list of conditions and the following disclaimer.\n"); 1792 std::fprintf(smldest, 1793 " * Redistributions in binary form must reproduce the above copyright\n"); 1794 std::fprintf(smldest, 1795 " notice, this list of conditions and the following disclaimer in the\n"); 1796 std::fprintf(smldest, 1797 " documentation and/or other materials provided with the distribution.\n"); 1798 std::fprintf(smldest, "\n"); 1799 std::fprintf(smldest, 1800 "THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\n"); 1801 std::fprintf(smldest, 1802 "AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,\n"); 1803 std::fprintf(smldest, 1804 "THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR\n"); 1805 std::fprintf(smldest, 1806 "PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNERS OR\n"); 1807 std::fprintf(smldest, "CONTRIBUTORS\n"); 1808 std::fprintf(smldest, 1809 "BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\n"); 1810 std::fprintf(smldest, 1811 "CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF\n"); 1812 std::fprintf(smldest, 1813 "SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS\n"); 1814 std::fprintf(smldest, 1815 "INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN\n"); 1816 std::fprintf(smldest, 1817 "CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)\n"); 1818 std::fprintf(smldest, 1819 "ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE\n"); 1820 std::fprintf(smldest, "POSSIBILITY OF SUCH DAMAGE.\n"); 1821 std::fprintf(smldest, "\n"); 1822 std::fprintf(smldest, "\n"); 1823 std::fprintf(smldest, 1824 "Also be aware of the (generally permissive) licenses associated with the\n"); 1825 std::fprintf(smldest, 1826 "fonts. Fill README files and license terms for the fonts themselves\n"); 1827 std::fprintf(smldest, "are in csl/cslbase/wxfonts.\n"); 1828 std::fprintf(smldest, "\n"); 1829 std::fprintf(smldest, "\n"); 1830 std::fprintf(smldest, 1831 "$Id: charmetrics.cpp 5736 2021-03-16 10:41:22Z arthurcnorman $\n"); 1832 std::fprintf(smldest, "\n*)\n\n"); 1833 std::fprintf(smldest, "\n"); 1834 std::fprintf(smldest, "val F_cmuntt = 0;\n"); 1835 std::fprintf(smldest, "val F_odokai = 1;\n"); 1836 std::fprintf(smldest, "val F_Regular = 2;\n"); 1837 std::fprintf(smldest, "val F_Bold = 3;\n"); 1838 std::fprintf(smldest, "val F_Italic = 4;\n"); 1839 std::fprintf(smldest, "val F_BoldItalic = 5;\n"); 1840 std::fprintf(smldest, "val F_Math = 6;\n"); 1841 std::fprintf(smldest, "val F_end = 7;\n"); 1842 std::fprintf(smldest, "\n"); 1843 std::fprintf(smldest, "fun font_number \"cmuntt\" = F_cmuntt\n"); 1844 std::fprintf(smldest, " | font_number \"odokai\" = F_odokai\n"); 1845 std::fprintf(smldest, " | font_number \"Regular\" = F_Regular\n"); 1846 std::fprintf(smldest, " | font_number \"Bold\" = F_Bold\n"); 1847 std::fprintf(smldest, " | font_number \"Italic\" = F_Italic\n"); 1848 std::fprintf(smldest, 1849 " | font_number \"BoldItalic\" = F_BoldItalic\n"); 1850 std::fprintf(smldest, " | font_number \"Math\" = F_Math\n"); 1851 std::fprintf(smldest, " | font_number _ = 0;\n"); 1852 std::fprintf(smldest, "\n"); 1853 std::fprintf(smldest, "val hashsize = %d;\n", main_r.table_size); 1854 std::fprintf(smldest, "\n"); 1855 std::fprintf(dest, "const uint64_t charmetrics[%d][5] = \n{", 1856 main_r.table_size); 1857 std::fprintf(rdest, 1858 "#eval (setq metrics_hash!* (list_to_metric_table '\n ("); 1859 std::fprintf(smldest, "val metrics_hash = Vector.fromList\n" 1860 " (map Vector.fromList ["); 1861 for (i=0; i<main_r.table_size; i++) 1862 { if (i != 0) 1863 { std::fprintf(dest, ","); 1864 std::fprintf(smldest, ","); 1865 } 1866 std::fprintf(dest, 1867 "\n {UINT64_C(0x%.16" PRIx64 "), UINT64_C(0x%.16" PRIx64 1868 "), UINT64_C(0x%.16" PRIx64 ")," 1869 "\n UINT64_C(0x%.16" PRIx64 1870 "), UINT64_C(0x%.16" PRIx64 ")}", 1871 hashtable[i][0], 1872 hashtable[i][1], hashtable[i][2], 1873 hashtable[i][3], hashtable[i][4]); 1874 std::fprintf(rdest, "\n (0x%.8" PRIx32 " 0x%.8" PRIx32 " 0x%.8" 1875 PRIx32 " 0x%.8" PRIx32 " 0x%.8" PRIx32, 1876 static_cast<int>(hashtable[i][0]), 1877 static_cast<int>(hashtable[i][0]>>32), 1878 static_cast<int>(hashtable[i][1]), 1879 static_cast<int>(hashtable[i][1]>>32), 1880 static_cast<int>(hashtable[i][2])); 1881 std::fprintf(rdest, "\n 0x%.8" PRIx32 " 0x%.8" PRIx32 " 0x%.8" 1882 PRIx32 " 0x%.8" PRIx32 " 0x%.8" PRIx32 ")", 1883 static_cast<int>(hashtable[i][2]>>32), 1884 static_cast<int>(hashtable[i][3]), 1885 static_cast<int>(hashtable[i][3]>>32), 1886 static_cast<int>(hashtable[i][4]), 1887 static_cast<int>(hashtable[i][4]>>32)); 1888 std::fprintf(smldest, 1889 "\n [0x%.8" PRIx32 ", 0x%.8" PRIx32 ", 0x%.8" PRIx32 ", 0x%.8" 1890 PRIx32 ", 0x%.8" PRIx32 ",", 1891 static_cast<int>(hashtable[i][0]), 1892 static_cast<int>(hashtable[i][0]>>32), 1893 static_cast<int>(hashtable[i][1]), 1894 static_cast<int>(hashtable[i][1]>>32), 1895 static_cast<int>(hashtable[i][2])); 1896 std::fprintf(smldest, 1897 "\n 0x%.8" PRIx32 ", 0x%.8" PRIx32 ", 0x%.8" PRIx32 ", 0x%.8" 1898 PRIx32 ", 0x%.8" PRIx32 "]", 1899 static_cast<int>(hashtable[i][2]>>32), 1900 static_cast<int>(hashtable[i][3]), 1901 static_cast<int>(hashtable[i][3]>>32), 1902 static_cast<int>(hashtable[i][4]), 1903 static_cast<int>(hashtable[i][4]>>32)); 1904 } 1905 std::fprintf(dest, "\n};\n\n"); 1906 std::fprintf(dest, "#define CHAR_METRICS_MODULUS %d\n", 1907 main_r.modulus2); 1908 std::fprintf(dest, "#define CHAR_METRICS_OFFSET %d\n\n", 1909 main_r.offset2); 1910 std::fprintf(rdest, "\n )))\n\n"); 1911 std::fprintf(smldest, "\n ]);\n\n"); 1912 std::fprintf(smldest, "val CHAR_METRICS_MODULUS = %d;\n", 1913 main_r.modulus2); 1914 std::fprintf(smldest, "val CHAR_METRICS_OFFSET = %d;\n\n", 1915 main_r.offset2); 1916 std::fprintf(dest, "const uint32_t topcentre[%d] = \n{", 1917 topcentre_r.table_size); 1918 std::fprintf(rdest, 1919 "#eval (setq topcentre_hash!* (list_to_vec32 '\n ("); 1920 std::fprintf(smldest, "val topcentre_hash = Vector.fromList ["); 1921 for (i=0; i<topcentre_r.table_size; i++) 1922 { if (i != 0) 1923 { std::fprintf(dest, ","); 1924 std::fprintf(smldest, ","); 1925 } 1926 std::fprintf(dest, "\n UINT32_C(0x%.8" PRIx32 ")", topcentre[i]); 1927 std::fprintf(rdest, "\n 0x%.8" PRIx32, topcentre[i]); 1928 std::fprintf(smldest, "\n 0x%.8" PRIx32, topcentre[i]); 1929 } 1930 std::fprintf(dest, "\n};\n\n"); 1931 std::fprintf(dest, "#define TOPCENTRE_MODULUS %d\n", 1932 topcentre_r.modulus2); 1933 std::fprintf(dest, "#define TOPCENTRE_OFFSET %d\n\n", 1934 topcentre_r.offset2); 1935 std::fprintf(rdest, "\n )))\n\n"); 1936 std::fprintf(smldest, "\n ];\n\n"); 1937 std::fprintf(smldest, "val TOPCENTRE_SIZE = %d;\n", 1938 topcentre_r.table_size); 1939 std::fprintf(smldest, "val TOPCENTRE_MODULUS = %d;\n", 1940 topcentre_r.modulus2); 1941 std::fprintf(smldest, "val TOPCENTRE_OFFSET = %d;\n\n", 1942 topcentre_r.offset2); 1943 std::fprintf(dest, "const uint32_t variant_table[%d][6] = \n{", 1944 variant_r.table_size); 1945 std::fprintf(rdest, 1946 "#eval (setq variant_hash!* (list_to_metric_table '\n ("); 1947 std::fprintf(smldest, "val variant_hash = Vector.fromList\n" 1948 " (map Vector.fromList ["); 1949 for (i=0; i<variant_r.table_size; i++) 1950 { if (i != 0) 1951 { std::fprintf(dest, ","); 1952 std::fprintf(smldest, ","); 1953 } 1954 std::fprintf(dest, 1955 "\n {0x%.8x, 0x%.8x, 0x%.8x, 0x%.8x, 0x%.8x, 0x%.8x}", 1956 variant_table[i][0], variant_table[i][1], 1957 variant_table[i][2], variant_table[i][3], 1958 variant_table[i][4], variant_table[i][5]); 1959 std::fprintf(rdest, 1960 "\n (0x%.8x 0x%.8x 0x%.8x 0x%.8x 0x%.8x 0x%.8x)", 1961 variant_table[i][0], variant_table[i][1], 1962 variant_table[i][2], variant_table[i][3], 1963 variant_table[i][4], variant_table[i][5]); 1964 std::fprintf(smldest, 1965 "\n [0x%.8x, 0x%.8x, 0x%.8x, 0x%.8x, 0x%.8x, 0x%.8x]", 1966 variant_table[i][0], variant_table[i][1], 1967 variant_table[i][2], variant_table[i][3], 1968 variant_table[i][4], variant_table[i][5]); 1969 } 1970 std::fprintf(dest, "\n};\n\n"); 1971 std::fprintf(dest, "#define VARIANT_MODULUS %d\n", 1972 variant_r.modulus2); 1973 std::fprintf(dest, "#define VARIANT_OFFSET %d\n\n", 1974 variant_r.offset2); 1975 std::fprintf(rdest, "\n )))\n\n"); 1976 std::fprintf(smldest, "\n ]);\n\n"); 1977 std::fprintf(smldest, "val VARIANT_SIZE = %d;\n", 1978 variant_r.table_size); 1979 std::fprintf(smldest, "val VARIANT_MODULUS = %d;\n", 1980 variant_r.modulus2); 1981 std::fprintf(smldest, "val VARIANT_OFFSET = %d;\n\n", 1982 variant_r.offset2); 1983 std::fprintf(dest, "const uint32_t extension_table[%d][11] = \n{", 1984 extension_r.table_size); 1985 std::fprintf(rdest, 1986 "#eval (setq extension_hash!* (list_to_metric_table '\n ("); 1987 std::fprintf(smldest, "val extension_hash = Vector.fromList\n" 1988 " (map Vector.fromList ["); 1989 for (i=0; i<extension_r.table_size; i++) 1990 { if (i != 0) 1991 { std::fprintf(dest, ","); 1992 std::fprintf(smldest, ","); 1993 } 1994 std::fprintf(dest, 1995 "\n {0x%.8x,\n 0x%.8x, 0x%.8x,\n 0x%.8x, 0x%.8x,\n" 1996 " 0x%.8x, 0x%.8x,\n 0x%.8x, 0x%.8x,\n" 1997 " 0x%.8x, 0x%.8x}", 1998 extension_table[i][0], extension_table[i][1], 1999 extension_table[i][2], extension_table[i][3], 2000 extension_table[i][4], extension_table[i][5], 2001 extension_table[i][6], extension_table[i][7], 2002 extension_table[i][8], extension_table[i][9], 2003 extension_table[i][10]); 2004 std::fprintf(rdest, 2005 "\n (0x%.8x 0x%.8x 0x%.8x 0x%.8x 0x%.8x 0x%.8x\n" 2006 " 0x%.8x 0x%.8x 0x%.8x 0x%.8x 0x%.8x)", 2007 extension_table[i][0], extension_table[i][1], 2008 extension_table[i][2], extension_table[i][3], 2009 extension_table[i][4], extension_table[i][5], 2010 extension_table[i][6], extension_table[i][7], 2011 extension_table[i][8], extension_table[i][9], 2012 extension_table[i][10]); 2013 std::fprintf(smldest, 2014 "\n [0x%.8x, 0x%.8x, 0x%.8x, 0x%.8x, 0x%.8x, 0x%.8x,\n" 2015 " 0x%.8x, 0x%.8x, 0x%.8x, 0x%.8x, 0x%.8x]", 2016 extension_table[i][0], extension_table[i][1], 2017 extension_table[i][2], extension_table[i][3], 2018 extension_table[i][4], extension_table[i][5], 2019 extension_table[i][6], extension_table[i][7], 2020 extension_table[i][8], extension_table[i][9], 2021 extension_table[i][10]); 2022 } 2023 std::fprintf(dest, "\n};\n\n"); 2024 std::fprintf(dest, "#define EXTENSION_MODULUS %d\n", 2025 extension_r.modulus2); 2026 std::fprintf(dest, "#define EXTENSION_OFFSET %d\n\n", 2027 extension_r.offset2); 2028 std::fprintf(rdest, "\n )))\n\n"); 2029 std::fprintf(smldest, "\n ]);\n\n"); 2030 std::fprintf(smldest, "val EXTENSION_SIZE = %d;\n", 2031 extension_r.table_size); 2032 std::fprintf(smldest, "val EXTENSION_MODULUS = %d;\n", 2033 extension_r.modulus2); 2034 std::fprintf(smldest, "val EXTENSION_OFFSET = %d;\n\n", 2035 extension_r.offset2); 2036 std::fprintf(dest, "const int16_t fontkern[] = \n{"); 2037 std::fprintf(rdest, "#eval (setq fontkern!* (list_to_vec16 '\n ("); 2038 std::fprintf(smldest, "val fontkern = Vector.fromList ["); 2039 // SML version not sorted yet... 2040 for (i=0; i<F_end; i++) 2041 { int w = std::fprintf(dest, "\n %d", fontkern[i]); 2042 if (i != F_end-1) std::fprintf(dest, ","); 2043 else std::fprintf(dest, " "); 2044 while (++w < 16) std::fprintf(dest, " "); 2045 w = std::fprintf(rdest, "\n %d ", fontkern[i]); 2046 while (++w < 16) std::fprintf(rdest, " "); 2047 std::fprintf(dest, "// %s", fontnames[i]); 2048 if (fontkern[i] < 0) 2049 w = std::fprintf(smldest, "\n ~%d", -fontkern[i]); 2050 else w = std::fprintf(smldest, "\n %d", fontkern[i]); 2051 if (i != F_end-1) std::fprintf(smldest, ","); 2052 else std::fprintf(smldest, " "); 2053 while (++w < 16) std::fprintf(smldest, " "); 2054 std::fprintf(smldest, "(* %s ", fontnames[i]); 2055 if (i != F_end-2 && 2056 fontkern[i+1] != fontkern[i]) 2057 std::fprintf(dest, " [%d items]", fontkern[i+1]-fontkern[i]); 2058 std::fprintf(rdest, "%% %s", fontnames[i]); 2059 if (i != F_end-2 && 2060 fontkern[i+1] != fontkern[i]) 2061 std::fprintf(rdest, " [%d items]", fontkern[i+1]-fontkern[i]); 2062 if (i != F_end-2 && 2063 fontkern[i+1] != fontkern[i]) 2064 std::fprintf(smldest, " [%d items] ", fontkern[i+1]-fontkern[i]); 2065 std::fprintf(smldest, "*)"); 2066 } 2067 std::fprintf(dest, "\n};\n\n"); 2068 std::fprintf(rdest, "\n )))\n\n"); 2069 std::fprintf(smldest, "\n ];\n\n"); 2070 std::fprintf(dest, "const uint32_t kerntable[] = \n{"); 2071 std::fprintf(rdest, 2072 "#eval (setq kerntable!* (list_to_vec32 '\n ("); 2073 std::fprintf(smldest, "val kerntable = Vector.fromList ["); 2074 for (i=0; i<kernp; i++) 2075 { std::fprintf(dest, "\n 0x%.8" PRIx32, kerntable[i]); 2076 if (i != kernp-1) std::fprintf(dest, ","); 2077 else std::fprintf(dest, " "); 2078 std::fprintf(rdest, "\n 0x%.8" PRIx32 " ", kerntable[i]); 2079 std::fprintf(smldest, "\n 0x%.8" PRIx32, kerntable[i]); 2080 if (i != kernp-1) std::fprintf(smldest, ","); 2081 else std::fprintf(smldest, " "); 2082 if ((kerntable[i] & IS_LIGATURE) != 0) 2083 std::fprintf(dest, " // [%d:%d] %s + %s ligature #%d (%s)", 2084 i, i-fontkern[ktfontn[i]], 2085 ktstart[i], ktfollow[i], 2086 kerntable[i]>>23, ktfont[i]); 2087 else 2088 std::fprintf(dest, " // [%d:%d] %s + %s : %d (%s)", 2089 i, i-fontkern[ktfontn[i]], 2090 ktstart[i], ktfollow[i], 2091 ktadjustment[i], ktfont[i]); 2092 if ((kerntable[i] & IS_BLOCKEND) != 0) std::fprintf(dest, " ;;"); 2093 if ((kerntable[i] & IS_LIGATURE) != 0) 2094 std::fprintf(rdest, " %% [%d:%d] %s + %s ligature #%d (%s)", 2095 i, i-fontkern[ktfontn[i]], 2096 ktstart[i], ktfollow[i], 2097 kerntable[i]>>23, ktfont[i]); 2098 else 2099 std::fprintf(rdest, " %% [%d:%d] %s + %s : %d (%s)", 2100 i, i-fontkern[ktfontn[i]], 2101 ktstart[i], ktfollow[i], 2102 ktadjustment[i], ktfont[i]); 2103 if ((kerntable[i] & IS_BLOCKEND) != 0) std::fprintf(rdest, " ;;"); 2104 if ((kerntable[i] & IS_LIGATURE) != 0) 2105 std::fprintf(smldest, " (* [%d:%d] %s + %s ligature #%d (%s) *)", 2106 i, i-fontkern[ktfontn[i]], 2107 ktstart[i], ktfollow[i], 2108 kerntable[i]>>23, ktfont[i]); 2109 else 2110 std::fprintf(smldest, " (* [%d:%d] %s + %s : %d (%s) *)", 2111 i, i-fontkern[ktfontn[i]], 2112 ktstart[i], ktfollow[i], 2113 ktadjustment[i], ktfont[i]); 2114 if ((kerntable[i] & IS_BLOCKEND) != 0) std::fprintf(smldest, 2115 " (*;;*)"); 2116 } 2117 std::fprintf(dest, "\n};\n\n"); 2118 std::fprintf(rdest, "\n )))\n\n"); 2119 std::fprintf(smldest, "\n ];\n\n"); 2120 std::fprintf(dest, "const uint32_t ligaturetable[] = \n{"); 2121 std::fprintf(rdest, 2122 "#eval (setq ligaturetable!* (list_to_vec32 '\n ("); 2123 std::fprintf(smldest, "val ligaturetable = Vector.fromList ["); 2124 for (i=0; i<ligp; i++) 2125 { int l = std::fprintf(dest, "\n %" PRId32, ligtable[i]); 2126 if (i != ligp-1) std::fprintf(dest, ","); 2127 else std::fprintf(dest, " "); 2128 while (++l < 12) std::fprintf(dest, " "); 2129 l = std::fprintf(rdest, "\n %" PRId32 " ", ligtable[i]); 2130 while (++l < 12) std::fprintf(rdest, " "); 2131 l = std::fprintf(smldest, "\n %" PRId32, ligtable[i]); 2132 if (i != ligp-1) std::fprintf(smldest, ","); 2133 else std::fprintf(smldest, " "); 2134 while (++l < 12) std::fprintf(smldest, " "); 2135 std::fprintf(dest, " // [%d] %s + %s => %s (%s)", 2136 i, ltfirst[i], ltfollow[i], ltname[i], ltfont[i]); 2137 std::fprintf(rdest, " %% [%d] %s + %s => %s (%s)", 2138 i, ltfirst[i], ltfollow[i], ltname[i], ltfont[i]); 2139 std::fprintf(smldest, " (* [%d] %s + %s => %s (%s) *)", 2140 i, ltfirst[i], ltfollow[i], ltname[i], ltfont[i]); 2141 } 2142 std::fprintf(dest, "\n};\n\n"); 2143 std::fprintf(rdest, "\n )))\n\n"); 2144 std::fprintf(dest, "// end of charmetrics.h\n"); 2145 std::fprintf(smldest, "\n ];\n\n"); 2146 std::fprintf(rdest, 2147 "%% The use of #eval means that the metrics above have been defined at\n"); 2148 std::fprintf(rdest, 2149 "%% parse time. I now need to ensure that they will be available even\n"); 2150 std::fprintf(rdest, 2151 "%% when this code is passed through the compiler and hence everything\n"); 2152 std::fprintf(rdest, 2153 "%% goes via a FASL file. The slighly curious macro here should achieve\n"); 2154 std::fprintf(rdest, "%% that.\n"); 2155 std::fprintf(rdest, "\n"); 2156 std::fprintf(rdest, 2157 "symbolic macro procedure get_character_metrics !*unused!*;\n"); 2158 std::fprintf(rdest, " list('progn,\n"); 2159 std::fprintf(rdest, 2160 " list('setq, 'metrics_hash!*, mkquote metrics_hash!*),\n"); 2161 std::fprintf(rdest, 2162 " list('setq, 'fontkern!*, mkquote fontkern!*),\n"); 2163 std::fprintf(rdest, 2164 " list('setq, 'kerntable!*, mkquote kerntable!*),\n"); 2165 std::fprintf(rdest, 2166 " list('setq, 'ligaturetable!*, mkquote ligaturetable!*),\n"); 2167 std::fprintf(rdest, " \"character metrics established\");\n"); 2168 std::fprintf(rdest, "\n"); 2169 std::fprintf(rdest, 2170 "%% The call to the macro here expands into four simple assignments.\n"); 2171 std::fprintf(rdest, "symbolic get_character_metrics();\n"); 2172 std::fprintf(rdest, "\n"); 2173 std::fprintf(rdest, 2174 "fluid '(c_width c_llx c_lly c_urx c_ury c_kerninfo);\n"); 2175 std::fprintf(rdest, "\n"); 2176 std::fprintf(rdest, 2177 "%% This code looks up a font/codepoint pair in the tables and returns\n"); 2178 std::fprintf(rdest, 2179 "%% a character width (escapement) and a bounding box. It leaves behind\n"); 2180 std::fprintf(rdest, 2181 "%% c_kerninfo - and index into a kern and ligature table.\n"); 2182 std::fprintf(rdest, "\n"); 2183 std::fprintf(rdest, 2184 "symbolic procedure lookupchar(fontnum, codepoint);\n"); 2185 std::fprintf(rdest, " begin\n"); 2186 std::fprintf(rdest, 2187 " scalar v, h1, h2, w, whi, wlo, fullkey, key;\n"); 2188 std::fprintf(rdest, "%% pack codes into fewer bits\n"); 2189 std::fprintf(rdest, " if fontnum < 2 then <<\n"); 2190 std::fprintf(rdest, 2191 " if land(codepoint, 0xd800) = 0xd800 then codepoint := 0xffff\n"); 2192 std::fprintf(rdest, " else if codepoint >= 0x10000 then <<\n"); 2193 std::fprintf(rdest, 2194 " if codepoint < 0x10800 then codepoint := 0xd800 + land(codepoint, 0xfff)\n"); 2195 std::fprintf(rdest, " else codepoint := 0xffff >> >>\n"); 2196 std::fprintf(rdest, 2197 " else if codepoint >= 0x4000 and codepoint < 0x8000 then codepoint := 0xffff\n"); 2198 std::fprintf(rdest, 2199 " else if codepoint >= 0x1d000 and codepoint < 0x1e000 then\n"); 2200 std::fprintf(rdest, 2201 " codepoint = 0x4000 + land(codepoint, 0xfff)\n"); 2202 std::fprintf(rdest, 2203 " else if codepoint >= 0x108000 and codepoint < 0x109000 then\n"); 2204 std::fprintf(rdest, 2205 " codepoint = 0x5000 + land(codepoint, 0xfff)\n"); 2206 std::fprintf(rdest, 2207 " else if codepoint >= 0x10000 then codepoint := 0xffff;\n"); 2208 std::fprintf(rdest, 2209 " fullkey := lshift(fontnum, 16) + codepoint\n"); 2210 std::fprintf(rdest, " key := lshift(fullkey, -2);\n"); 2211 // I REALLY want the key to be positive here! 2212 std::fprintf(rdest, " h1 := remainder(key, %d);\n", 2213 main_r.table_size); 2214 std::fprintf(rdest, " %% Hash table probe 1.\n"); 2215 std::fprintf(rdest, 2216 " v := land(getv32(w := getv(metrics_hash!*, h1), 0), 0x7ffff);\n"); 2217 std::fprintf(rdest, " if not (v = key) then <<\n"); 2218 std::fprintf(rdest, " h2 := remainder(key, %d) + %d;\n", 2219 main_r.modulus2, main_r.offset2); 2220 std::fprintf(rdest, " %% Hash table probe 2.\n"); 2221 std::fprintf(rdest, 2222 " v := land(getv32(w := getv(metrics_hash!*, h2), 0), 0x7ffff);\n"); 2223 std::fprintf(rdest, " if not (v = key) then <<\n"); 2224 std::fprintf(rdest, " h1 := h1 + h2;\n"); 2225 std::fprintf(rdest, " if h1 >= %d then h1 := h1 - %d;\n", 2226 main_r.table_size, main_r.table_size); 2227 std::fprintf(rdest, " %% Hash table probe 3.\n"); 2228 std::fprintf(rdest, 2229 " v := land(getv32(w := getv(metrics_hash!*, h1), 0), 0x7ffff);\n"); 2230 std::fprintf(rdest, 2231 " if not (v = key) then return nil >> >>;\n"); 2232 std::fprintf(rdest, " v := 2*land(fullkey, 3);\n"); 2233 std::fprintf(rdest, " wlo := getv32(w, v+2);\n"); 2234 std::fprintf(rdest, 2235 " if wlo = 0 then return nil; %% in hash table but no character here.\n"); 2236 std::fprintf(rdest, " whi := getv32(w, v+3);\n"); 2237 std::fprintf(rdest, 2238 " c_width := land(lshift(whi, -19), 0x1fff);\n"); 2239 std::fprintf(rdest, 2240 " c_llx := land(lshift(whi, -6), 0x1fff) - 3000;\n"); 2241 std::fprintf(rdest, " c_lly := land(lshift(wlo, -26), 0x3f) +\n"); 2242 std::fprintf(rdest, 2243 " land(lshift(whi, 6), 0xfc0) - 1000;\n"); 2244 std::fprintf(rdest, 2245 " c_urx := land(lshift(wlo, -13), 0x1fff) - 500;\n"); 2246 std::fprintf(rdest, " c_ury := land(wlo, 0x1fff) - 1000;\n"); 2247 std::fprintf(rdest, 2248 " if v = 0 then c_kerninfo := land(lshift(getv32(w, 0), -19), 0x7ff)\n"); 2249 std::fprintf(rdest, 2250 " else if v = 2 then c_kerninfo := land(lshift(getv32(w, 0), -30), 0x3) +\n"); 2251 std::fprintf(rdest, 2252 " land(lshift(getv32(w, 1), 2), 0x7fc)\n"); 2253 std::fprintf(rdest, 2254 " else if v = 4 then c_kerninfo := land(lshift(getv32(w, 1), -9), 0x7ff)\n"); 2255 std::fprintf(rdest, 2256 " else c_kerninfo := land(lshift(getv32(w, 1), -20), 0x7ff);\n"); 2257 std::fprintf(rdest, " if not zerop c_kerninfo then\n"); 2258 std::fprintf(rdest, 2259 " c_kerninfo := c_kerninfo + getv16(fontkern!*, fontnum);\n"); 2260 std::fprintf(rdest, " return t\n"); 2261 std::fprintf(rdest, " end;\n"); 2262 std::fprintf(rdest, "\n"); 2263 std::fprintf(rdest, 2264 "symbolic procedure lookupkernadjustment codepoint;\n"); 2265 std::fprintf(rdest, " begin\n"); 2266 std::fprintf(rdest, " scalar i, w;\n"); 2267 std::fprintf(rdest, 2268 " if zerop (i := c_kerninfo) then return 0;\n"); 2269 std::fprintf(rdest, " a: w := getv32(kerntable!*, i);\n"); 2270 std::fprintf(rdest, " if land(w, 0x001fffff) = codepoint and\n"); 2271 std::fprintf(rdest, " zerop land(w, 0x00200000) then <<\n"); 2272 std::fprintf(rdest, " w := land(lshift(w, -23), 0x1ff);\n"); 2273 std::fprintf(rdest, 2274 " if not zerop land(w, 0x100) then w := w - 0x200;\n"); 2275 std::fprintf(rdest, " return w >>\n"); 2276 std::fprintf(rdest, 2277 " else if not zerop land(w, 0x00400000) then return 0;\n"); 2278 std::fprintf(rdest, " i := add1 i;\n"); 2279 std::fprintf(rdest, " go to a\n"); 2280 std::fprintf(rdest, " end;\n"); 2281 std::fprintf(rdest, "\n"); 2282 std::fprintf(rdest, "symbolic procedure lookupligature codepoint;\n"); 2283 std::fprintf(rdest, " begin\n"); 2284 std::fprintf(rdest, " scalar i, w;\n"); 2285 std::fprintf(rdest, 2286 " if zerop (i := c_kerninfo) then return nil;\n"); 2287 std::fprintf(rdest, " a: w := getv32(kerntable!*, i);\n"); 2288 std::fprintf(rdest, " if land(w, 0x001fffff) = codepoint and\n"); 2289 std::fprintf(rdest, " not zerop land(w, 0x00200000) then\n"); 2290 std::fprintf(rdest, 2291 " return getv32(ligaturetable!*, land(lshift(w, -23), 0x1ff))\n"); 2292 std::fprintf(rdest, 2293 " else if not zerop land(w, 0x00400000) then return nil;\n"); 2294 std::fprintf(rdest, " i := add1 i;\n"); 2295 std::fprintf(rdest, " go to a\n"); 2296 std::fprintf(rdest, " end;\n"); 2297 std::fprintf(rdest, "\n"); 2298 std::fprintf(rdest, "symbolic procedure accentposition key;\n"); 2299 std::fprintf(rdest, " begin\n"); 2300 std::fprintf(rdest, " scalar h1, h2, v, w;\n"); 2301 std::fprintf(rdest, " h1 := remainder(key, %d);\n", 2302 topcentre_r.table_size); 2303 std::fprintf(rdest, " %% Hash table probe 1.\n"); 2304 std::fprintf(rdest, 2305 " v := land(w := getv32(topcentre_hash!*, h1), 0x1fffff);\n"); 2306 std::fprintf(rdest, " if not (v = key) then <<\n"); 2307 std::fprintf(rdest, " h2 := remainder(key, %d) + %d;\n", 2308 topcentre_r.modulus2, topcentre_r.offset2); 2309 std::fprintf(rdest, " %% Hash table probe 2.\n"); 2310 std::fprintf(rdest, 2311 " v := land(w := getv32(topcentre_hash!*, h2), 0x1fffff);\n"); 2312 std::fprintf(rdest, " if not (v = key) then return 0 >>;\n"); 2313 std::fprintf(rdest, " return lshift(w, -21)\n"); 2314 std::fprintf(rdest, " end;\n"); 2315 std::fprintf(rdest, "\n"); 2316 std::fprintf(rdest, "end;\n\n"); 2317 std::fprintf(rdest, 2318 "%% Note that variants must be passed a codepoint and direction flag\n"); 2319 std::fprintf(rdest, "symbolic procedure variants key;\n"); 2320 std::fprintf(rdest, " begin\n"); 2321 std::fprintf(rdest, " scalar h1, h2, h3, v, w;\n"); 2322 std::fprintf(rdest, " h1 := remainder(key, %d);\n", 2323 variant_r.table_size); 2324 std::fprintf(rdest, " %% Hash table probe 1.\n"); 2325 std::fprintf(rdest, 2326 " v := getv32(w := getv(variant_hash!*, h1), 0);\n"); 2327 std::fprintf(rdest, " if not (v = key) then <<\n"); 2328 std::fprintf(rdest, " h2 := remainder(key, %d) + %d;\n", 2329 variant_r.modulus2, variant_r.offset2); 2330 std::fprintf(rdest, " %% Hash table probe 2.\n"); 2331 std::fprintf(rdest, 2332 " v := getv32(w := getv(variant_hash!*, h2), 0);\n"); 2333 std::fprintf(rdest, " if not (v = key) then <<\n"); 2334 std::fprintf(rdest, " h3 := remainder(h1 + h2, %d);\n", 2335 variant_r.table_size); 2336 std::fprintf(rdest, " %% Hash table probe 3.\n"); 2337 std::fprintf(rdest, 2338 " v := getv32(w := getv(variant_hash!*, h3), 0);\n"); 2339 std::fprintf(rdest, 2340 " if not (v = key) then return nil >> >>;\n"); 2341 std::fprintf(rdest, " return w\n"); 2342 std::fprintf(rdest, " end;\n"); 2343 std::fprintf(rdest, "\n"); 2344 std::fprintf(rdest, "symbolic procedure extension key;\n"); 2345 std::fprintf(rdest, " begin\n"); 2346 std::fprintf(rdest, " scalar h1, h2, h3, v, w;\n"); 2347 std::fprintf(rdest, " h1 := remainder(key, %d);\n", 2348 extension_r.table_size); 2349 std::fprintf(rdest, " %% Hash table probe 1.\n"); 2350 std::fprintf(rdest, 2351 " v := getv32(w := getv(extension_hash!*, h1), 0);\n"); 2352 std::fprintf(rdest, " if not (v = key) then <<\n"); 2353 std::fprintf(rdest, " h2 := remainder(key, %d) + %d;\n", 2354 extension_r.modulus2, extension_r.offset2); 2355 std::fprintf(rdest, " %% Hash table probe 2.\n"); 2356 std::fprintf(rdest, 2357 " v := getv32(w := getv(extension_hash!*, h2), 0);\n"); 2358 std::fprintf(rdest, " if not (v = key) then <<\n"); 2359 std::fprintf(rdest, " h3 := remainder(h1 + h2, %d);\n", 2360 extension_r.table_size); 2361 std::fprintf(rdest, " %% Hash table probe 3.\n"); 2362 std::fprintf(rdest, 2363 " v := getv32(w := getv(extension_hash!*, h3), 0);\n"); 2364 std::fprintf(rdest, 2365 " if not (v = key) then return nil >> >>;\n"); 2366 std::fprintf(rdest, " return w\n"); 2367 std::fprintf(rdest, " end;\n"); 2368 std::fprintf(rdest, "\n"); 2369 std::fprintf(rdest, "end;\n\n"); 2370 std::fprintf(rdest, "%% end of charmetrics.red\n"); 2371 std::fprintf(smldest, "(* end of charmetrics.sml *)\n"); 2372 std::fclose(dest); 2373 std::fclose(rdest); 2374 } 2375 #endif // DUMMY 2376 } 2377 2378 #else // CREATE 2379 // 2380 // I am putting the code that decodes and accesses metric information 2381 // in the same file as the program that reads raw ".afm" files and 2382 // packs the information for me. I hope that this keeps coding and decoding 2383 // together so they will remian in step. 2384 // 2385 2386 // lookupchar takes a font and a codepoint and returns true if there is 2387 // information about the character and false if not. If it returns true 2388 // it will have left information about the character in a number of 2389 // variables that can then be inspected. 2390 2391 #define CHAR_METRICS_TABLE_SIZE (sizeof(charmetrics)/sizeof(charmetrics[0])) 2392 2393 int c_width, c_llx, c_lly, c_urx, c_ury, c_kerninfo; 2394 2395 int lookupchar(int fontnum, int codepoint) 2396 { int fullkey = pack_character(fontnum, codepoint); // 21-bit key 2397 int key = fullkey >> 2; // because my hash table has line-size 4 2398 int v, h1, h2; 2399 uint64_t w; 2400 h1 = key % CHAR_METRICS_TABLE_SIZE; 2401 v = static_cast<int>(charmetrics[h1][0]) & 0x7ffff; 2402 if (v != key) 2403 { h2 = (key % CHAR_METRICS_MODULUS) + CHAR_METRICS_OFFSET; 2404 v = static_cast<int>(charmetrics[h2][0]) & 0x7ffff; 2405 if (v != key) 2406 { h1 += h2; 2407 if (h1 >= CHAR_METRICS_TABLE_SIZE) h1 -= CHAR_METRICS_TABLE_SIZE; 2408 v = static_cast<int>(charmetrics[h1][0]) & 0x7ffff; 2409 if (v != key) return 0; 2410 } 2411 } 2412 w = charmetrics[h1][1+(fullkey&3)]; 2413 // Even though the hash line exists this entry in it may be unused. 2414 if (w == 0) return 0; 2415 c_width = static_cast<int>(w >> 51) & 0x1fff; 2416 c_llx = (static_cast<int>(w >> 38) & 0x1fff) - 3000; 2417 c_lly = (static_cast<int>(w >> 26) & 0x0fff) - 1000; 2418 c_urx = (static_cast<int>(w >> 13) & 0x1fff) - 500; 2419 c_ury = (static_cast<int>(w) & 0x1fff) - 1000; 2420 // Based on the font and 11 bits of information from the hash table I will 2421 // set up a pointer into kerntable. The interpretation of that will be 2422 // considered in the function lookupkernandligature. If the 11 bit field 2423 // contains zero then there is neither kern nor ligature information 2424 // associated with this character. 2425 v = static_cast<int>(charmetrics[h1][0] >> (19+11* 2426 (fullkey&3))) & 0x7ff; 2427 if (v != 0) v += fontkern[fontnum]; 2428 // c_kerninfo will be left zero if there is no information, otherwise an 2429 // index into a table. 2430 c_kerninfo = v; 2431 return 1; 2432 } 2433 2434 // Having looked up a single character I may now need to check for 2435 // ligature or kerning information. I will do that by passing the 2436 // codepoint of the successor character (which must be in the same 2437 // font). Thus the complete sequence will be 2438 // lookupchar(font_number, codepoint_for_first_character); 2439 // lookupkernandligature(codepoint_for_second_character); 2440 // This returns an int32_t where the bottom 21 bits are a codepoint for 2441 // a character that can replace the two that were specified. For instance 2442 // this can return a single ligature "fi" is the two input codepoints are 2443 // for "f" and "i", or it could return first "ff" for two "f" characters 2444 // that are adjacent, and then "ffi" for the resulting "ff" if followed 2445 // by "i". Those bits will be returned as zero if no ligature is available 2446 // (and that will almost always be the case). 2447 // The top 9 bits of the 32-bit word will be a signed value for a 2448 // kerning adjustment to the spacing between a pair of characters, relative 2449 // to 1000 as the notional height of the character cell. This is again 2450 // 0 if no adjustment is called for. As one of the more extreme examples, 2451 // in one font if an "A" is followed by a "V" the space between them can be 2452 // decreased by 109 units, while when a "T" is followed by a "W" an extra 2453 // 41 units (and perhaps less obviously an "L" followd by "-" (hyphen) 2454 // calls for even more extra space than that. 2455 // 2456 // I provide variants that collect just kern or just ligature information. 2457 2458 int32_t lookupkernandligature(int codepoint) 2459 { int32_t r = 0; 2460 uint32_t w; 2461 int i; 2462 if ((i = c_kerninfo) == 0) return 2463 0; // No info based on current start. 2464 // The worst cases I can see in my fonts is the kern information for "W" 2465 // in STIX-Regular where around 50 characters following "W" get their spacing 2466 // adjusted. That gives some impression of the most extreme number of 2467 // times this loop will be traversed. For many characters there will be no 2468 // kern information at all, and when there is any it will usually be 2469 // pretty limited so average costs here should end up low. 2470 do 2471 { w = kerntable[i++]; 2472 // The kern table contains a sequence of 32-bit words. The low 21 bits of 2473 // each is a codepoint being the second character of a pair. The next two 2474 // bits are flags. One indicates whether the word is documenting kern or 2475 // ligature information. The other marks the final word of information 2476 // relating to a lead character. That leaves 9 bits. For kern information 2477 // that is a 9-bit signed spacing adjustment. For ligatures it is a 2478 // 9 bit unsigned index into a table of codepoints giving the single 2479 // character to be used to replace the initial pair. Note that one pair 2480 // of characters can (and often well!) have both kern and ligature information 2481 // which is why the return value here can hand back both and why searching 2482 // continues through all information about the relevant pair. It would be a 2483 // MESS if the tables included two entries for the same character pair but 2484 // yielding different results! I ought to police that while creating the 2485 // tables. 2486 // Because w is an unsigned value I do not need to mask the result of the 2487 // right shift. 2488 if ((w & 0x001fffff) == codepoint) 2489 { if ((w & IS_LIGATURE) != 0) r |= ligaturetable[w >> 23]; 2490 else r |= (w & 0xff800000); 2491 } 2492 } 2493 while ((w & IS_BLOCKEND) == 0); 2494 return r; 2495 } 2496 2497 // Much the same as the above but ONLY looks for kern information and returns 2498 // its result as a simple integer. In case kern information is found this 2499 // is just slightly faster than using the more general method. 2500 2501 int32_t lookupkernadjustment(int codepoint) 2502 { int32_t w; 2503 int i; 2504 if ((i = c_kerninfo) == 0) return 2505 0; // No info based on current start. 2506 do 2507 { w = kerntable[i++]; 2508 // The shift right in the return statement relies on w being a signed 2509 // value and on signed shifts being arithmetic. 2510 if ((w & 0x001fffff) == codepoint && 2511 (w & IS_LIGATURE) == 0) return (w >> 23); 2512 } 2513 while ((w & IS_BLOCKEND) == 0); 2514 return 0; 2515 } 2516 2517 // Much the same as the above but ONLY looks for ligature information. 2518 2519 int32_t lookupligature(int codepoint) 2520 { uint32_t w; 2521 int i; 2522 if ((i = c_kerninfo) == 0) return 2523 0; // No info based on current start. 2524 do 2525 { w = kerntable[i++]; 2526 // I made w unsigned so that the shift right returned an unsigned index. 2527 if ((w & 0x001fffff) == codepoint && 2528 (w & IS_LIGATURE) != 0) return ligaturetable[w >> 23]; 2529 } 2530 while ((w & IS_BLOCKEND) == 0); 2531 return 0; 2532 } 2533 2534 #define TOPCENTRE_TABLE_SIZE (sizeof(topcentre)/sizeof(topcentre[0])) 2535 2536 // accentposition only applies to characters in STIXMath. It gives a horizontal 2537 // offset to be used when positioning an accent above a character. I *believe* 2538 // the intent is to use both the position information from the base character 2539 // and the accent and line them up... The code here returns 0 if no special 2540 // information is available. 2541 2542 int accentposition(int code) 2543 { int hash1 = code % TOPCENTRE_TABLE_SIZE, hash2; 2544 int32_t r; 2545 if (((r = topcentre[hash1]) & 0x001fffff) == code) return (( 2546 int32_t)r)>>21; 2547 hash2 = (code % TOPCENTRE_MODULUS) + TOPCENTRE_OFFSET; 2548 if (((r = topcentre[hash2]) & 0x001fffff) == code) return (( 2549 int32_t)r)>>21; 2550 else return 0; 2551 } 2552 2553 #define VARIANT_TABLE_SIZE (sizeof(variant_table)/sizeof(variant_table[0])) 2554 2555 // Some characters have variants that represent gradually larger versions 2556 // of the same thing. A good example can be seen in the variations on 2557 // parenthesis, brackets and braces. There can also be horizontal size 2558 // varients such as wide overbars and circumflex accents that may be used to 2559 // go above wide items of various sorts. The code passed to character_variants 2560 // is the code point of the base character plus 0x00200000 if a horizontal 2561 // expansion is needed. The result is nullptr if nothing is available, or a 2562 // pointer to a block of 6 words otherwise. If this pointer is r, then r[0] 2563 // is the base character passed, but then r[1] tp r[5] are gradually larger 2564 // versions, or U+0000 when no further large versions are available. 2565 2566 const uint32_t *character_variants(int code) 2567 { int hash1 = code % VARIANT_TABLE_SIZE, hash2, hash3; 2568 int32_t r; 2569 if (variant_table[hash1][0] == code) return &variant_table[hash1][0]; 2570 hash2 = (code % VARIANT_MODULUS) + VARIANT_OFFSET; 2571 if (variant_table[hash2][0] == code) return &variant_table[hash2][0]; 2572 hash3 = (hash1 + hash2) % VARIANT_TABLE_SIZE; 2573 if (variant_table[hash3][0] == code) return &variant_table[hash3][0]; 2574 return nullptr; 2575 } 2576 2577 #define EXTENSION_TABLE_SIZE (sizeof(extension_table)/sizeof(extension_table[0])) 2578 2579 // A characters such as "{" is associated with (up to) 5 other characters that 2580 // can be placed together to ranfer a huge version of it. This retrieves 2581 // a table showing how to do that. 2582 2583 const uint32_t *character_extension(int code) 2584 { int hash1 = code % EXTENSION_TABLE_SIZE, hash2, hash3; 2585 int32_t r; 2586 if (extension_table[hash1][0] == code) return 2587 &extension_table[hash1][0]; 2588 hash2 = (code % EXTENSION_MODULUS) + EXTENSION_OFFSET; 2589 if (extension_table[hash2][0] == code) return 2590 &extension_table[hash2][0]; 2591 hash3 = (hash1 + hash2) % EXTENSION_TABLE_SIZE; 2592 if (extension_table[hash3][0] == code) return 2593 &extension_table[hash3][0]; 2594 return nullptr; 2595 } 2596 2597 #ifdef TEST 2598 // If TEST is defined then this code will try some very minimal tests. 2599 // Expected output is 2600 // 2601 // Hash table size was 10057 2602 // Second modulus, offset 8729 (1108) 2603 // "e": width 444 BB 25 -10 424 460 (630) 2604 // "f": width 333 BB 20 0 383 683 (636) 2605 // "g": width 500 BB 28 -218 470 460 (663) 2606 // "h": width 500 BB 9 0 487 683 (0) 2607 // "i": width 278 BB 16 0 253 683 (0) 2608 // "j": width 278 BB -70 -218 194 683 (0) 2609 // "k": width 500 BB 7 0 505 683 (0) 2610 // "l": width 278 BB 19 0 257 683 (669) 2611 // "m": width 778 BB 16 0 775 460 (701) 2612 // Kern/ligature data for sequence f-i is 14 64257 2613 // Kern/ligature data for sequence f-l is 44 64258 2614 // Top accent shift A=361 combining circumflex=-230 2615 // Paren sizes = U+000028, U+000028, U+1081e2, U+10824e, U+108287, U+1082bf 2616 // For { bottom U+10821e 0 600 1005 0 2617 // lower extender U+10821f 600 500 1010 1 2618 // middle piece U+10821d 200 200 1010 0 2619 // upper extender U+10821f 500 600 1010 1 2620 // top piece U+10821c 600 0 1005 0 2621 // For | bottom U+00007c 0 600 1380 0 2622 // extender U+00007c 600 0 1380 1 2623 // unused U+000000 0 0 0 0 2624 // unused U+000000 0 0 0 0 2625 // unused U+000000 0 0 0 0 2626 // 2627 // 2628 // The Kerm/ligature lines say that if in font STIX-Regular an "f" is 2629 // followed by an "i" then either the two may have their spacing adjusted 2630 // by 14 units or the pair may be replaced by the character at codepoint 2631 // 64257 (which is "fi")... and similarly for "f" followed by "l". The output 2632 // higher up tells us that in this font there are no kerning involving 2633 // a "j" followed by something else, while the (xxx) values are offsets 2634 // into a table of kerning information. "BB" is for "Bounding Box" and the 2635 // four numbers are for lower-left-x, lower-left-y, upper-right-x and 2636 // upper-right-y in that order. 2637 // The lower block is from the maths tables showing codepoints for a range 2638 // of sizes of left parentheses, and for ways to build up huge "{" and "|" 2639 // symbols. 2640 2641 int main(int argc, char *argv[]) 2642 { int i, r; 2643 const uint32_t *p; 2644 std::printf("====== Test program starting ======\n"); 2645 std::printf("Hash table size was %d\n", 2646 static_cast<int>(CHAR_METRICS_TABLE_SIZE)); 2647 std::printf("Second modulus, offset %d (%d)\n", 2648 static_cast<int>(CHAR_METRICS_MODULUS), 2649 static_cast<int>(CHAR_METRICS_OFFSET)); 2650 for (i='e'; i<'n'; i++) 2651 { r = lookupchar(F_Regular, i); 2652 if (r) std::printf("\"%c\": width %d BB %d %d %d %d (%d)\n", 2653 i, c_width, c_llx, c_lly, c_urx, c_ury, c_kerninfo); 2654 else std::printf("\"%c\" char not found\n", i); 2655 std::fflush(stdout); 2656 } 2657 if (!lookupchar(F_Regular, 2658 'f')) std::printf("Character \"f\" not found\n"); 2659 else 2660 { int32_t k = lookupkernandligature('i'); 2661 std::printf("Kern/ligature data for sequence f-i is %d %d\n", 2662 static_cast<int>(k >> 23), static_cast<int>(k & 0x001fffff)); 2663 std::fflush(stdout); 2664 k = lookupkernandligature('l'); 2665 std::printf("Kern/ligature data for sequence f-l is %d %d\n", 2666 static_cast<int>(k >> 23), static_cast<int>(k & 0x001fffff)); 2667 std::fflush(stdout); 2668 } 2669 std::printf("Top accent shift A=%d combining circumflex=%d\n", 2670 accentposition('A'), accentposition(770)); 2671 p = character_variants('('); 2672 if (p == nullptr) std::printf("Failed to find paren sizes\n"); 2673 else std::printf("Paren sizes = U+%.6x, U+%.6x, U+%.6x, U+%.6x, U+%.6x, U+%.6x\n", 2674 p[0], p[1], p[2], p[3], p[4], p[5]); 2675 p = character_extension('{'); 2676 if (p == nullptr) 2677 std::printf("Failed to find left brace extension data\n"); 2678 else 2679 { std::printf("For { bottom U+%.6x %d %d %d %d\n", 2680 p[1] & 0x001fffff, p[1]>>21, 2681 p[2] & 0x0000ffff, (p[2]>>16) & 0x00007fff, 2682 p[2]>>31); 2683 std::printf("lower extender U+%.6x %d %d %d %d\n", 2684 p[3] & 0x001fffff, p[3]>>21, 2685 p[4] & 0x0000ffff, (p[4]>>16) & 0x00007fff, 2686 p[4]>>31); 2687 std::printf("middle piece U+%.6x %d %d %d %d\n", 2688 p[5] & 0x001fffff, p[5]>>21, 2689 p[6] & 0x0000ffff, (p[6]>>16) & 0x00007fff, 2690 p[6]>>31); 2691 std::printf("upper extender U+%.6x %d %d %d %d\n", 2692 p[7] & 0x001fffff, p[7]>>21, 2693 p[8] & 0x0000ffff, (p[8]>>16) & 0x00007fff, 2694 p[8]>>31); 2695 std::printf("top piece U+%.6x %d %d %d %d\n", 2696 p[9] & 0x001fffff, p[9]>>21, 2697 p[10] & 0x0000ffff, (p[10]>>16) & 0x00007fff, 2698 p[10]>>31); 2699 } 2700 p = character_extension('|'); 2701 if (p == nullptr) 2702 std::printf("Failed to find vertical bar extension data\n"); 2703 else 2704 { std::printf("For | bottom U+%.6x %d %d %d %d\n", 2705 p[1] & 0x001fffff, p[1]>>21, 2706 p[2] & 0x00007fff, (p[2]>>15) & 0x0000ffff, 2707 p[2]>>31); 2708 std::printf("extender U+%.6x %d %d %d %d\n", 2709 p[3] & 0x001fffff, p[3]>>21, 2710 p[4] & 0x00007fff, (p[4]>>15) & 0x0000ffff, 2711 p[4]>>31); 2712 std::printf("unused U+%.6x %d %d %d %d\n", 2713 p[5] & 0x001fffff, p[5]>>21, 2714 p[6] & 0x00007fff, (p[6]>>15) & 0x0000ffff, 2715 p[6]>>31); 2716 std::printf("unused U+%.6x %d %d %d %d\n", 2717 p[7] & 0x001fffff, p[7]>>21, 2718 p[8] & 0x00007fff, (p[8]>>15) & 0x0000ffff, 2719 p[8]>>31); 2720 std::printf("unused U+%.6x %d %d %d %d\n", 2721 p[9] & 0x001fffff, p[9]>>21, 2722 p[10] & 0x00007fff, (p[10]>>15) & 0x0000ffff, 2723 p[10]>>31); 2724 } 2725 return 0; 2726 } 2727 2728 #endif // TEST 2729 #endif // CREATE 2730 2731 // end of charmetrics.cpp 2732