1(* charmetrics1.sml *) 2 3(* This decodes stuff from the "charmetrics.sml" hash tables to give 4 * information about characters in the STIX (and cmuntt and odokai) 5 * fonts. This is a component of ACN adjustments to the code here so that 6 * use is made of Unicode fonts not the original TeX ones... 7 *) 8 9(* To perform boolean operations on 32-bit integers I need to turn things 10 into LargeWord items. This looks clumsy and maybe something simpler 11 would be OK (eg maybe I do not need to go via LargeInt) but of this 12 works I will feel OK. At least this seems to work on Poly/ML - but 13 in general SML may not provide very good guarantees about the width 14 of integers that it supports. 15 *) 16 17fun andb(a, b) = 18 Int.fromLarge ( 19 LargeWord.toLargeInt ( 20 LargeWord.andb ( 21 LargeWord.fromLargeInt ( 22 LargeInt.toLarge a), 23 LargeWord.fromLargeInt ( 24 LargeInt.toLarge b)))); 25 26fun orb(a, b) = 27 Int.fromLarge ( 28 LargeWord.toLargeInt ( 29 LargeWord.orb ( 30 LargeWord.fromLargeInt ( 31 LargeInt.toLarge a), 32 LargeWord.fromLargeInt ( 33 LargeInt.toLarge b)))); 34 35fun notb a = 36 Int.fromLarge ( 37 LargeWord.toLargeInt ( 38 LargeWord.notb ( 39 LargeWord.fromLargeInt ( 40 LargeInt.toLarge a)))); 41 42fun <<(a, n) = 43 Int.fromLarge ( 44 LargeWord.toLargeInt ( 45 LargeWord.<< ( 46 LargeWord.fromLargeInt ( 47 LargeInt.toLarge a), 48 Word.fromInt n))); 49 50fun >>(a, n) = 51 Int.fromLarge ( 52 LargeWord.toLargeInt ( 53 LargeWord.>> ( 54 LargeWord.fromLargeInt ( 55 LargeInt.toLarge a), 56 Word.fromInt n))); 57 58fun getv v n = Vector.sub(v, n); 59 60(* The hash table access code here has to be keyed to cod ein a file 61 * called "charmetrics.cpp" in the CSL part of the Reduce source code. 62 * The stuff there accesses raw font metric information (which is in turn 63 * extracted from the .otf font files via a collection of messy steps) and 64 * picks hash parameters that let me end up with especially compact 65 * metric tables. Well even in the form used here the tables consume 66 * quite a lot of space, but I believe they represent a good balance 67 * between spaec saving and compact representation given that the Unicode 68 * fonts concerned have many thousands of characters (specifically I have 69 * over 30,000 codepoints with measurements). 70 *) 71 72fun lookupchar fontnum codepoint = 73 let 74 val cp1 = 75(* I first map the font code and (21-bit) codepoint so that codepoints use 76 only 16 bits, with certain invalid or unused ranges mapped onto the 77 value 0xffff (which does not represent a valid character). The messy 78 set of tests here are mostly present because in a hash table I need to 79 store keys (as well as values) and if I did not compress the keys 80 somewhat I would be very tight on bits when fitting my table into 81 a collection of 32-bit words. 82 *) 83 if fontnum < 2 then 84 if andb(codepoint, 0xd800) = 0xd800 then 0xffff 85 else if codepoint >= 0x10000 then 86 if codepoint < 0x10800 then 0xd800 + andb(codepoint, 0xfff) 87 else 0xffff 88 else codepoint 89 else if codepoint >= 0x4000 andalso codepoint < 0x8000 then 0xffff 90 else if codepoint >= 0x1d8000 andalso codepoint < 0x1d9000 then 91 0x5000 + andb(codepoint, 0xfff) 92 else if codepoint >= 0x10000 then 0xffff 93 else codepoint 94(* I combine the (reduced) codempoint with the font number to get a key *) 95 val fullkey = <<(fontnum, 16) + cp1 96 val key = >>(fullkey, 2) 97(* My first hash location is merely the key reduced modulo the hash table 98 size. This is clearly rather cheap and simplistic, but turns out to 99 be good enough. 100 *) 101 val h1 = key mod hashsize 102 val w = getv metrics_hash h1; 103 val v = andb((getv w 0), 0x7ffff) 104 in 105(* If I find my character at its first choice location can unpick information 106 and the sub-function lookupchar1 does that. 107 *) 108 if v = key then lookupchar1 fullkey w fontnum 109 else let 110(* A second hash function is an offset version of the key modulo a different 111 number. These two numbers were chosen following an exhausive search to 112 find values that led to high hash table occupancy. 113 *) 114 val h2 = (key mod CHAR_METRICS_MODULUS) + CHAR_METRICS_OFFSET; 115 val w = getv metrics_hash h2; 116 val v = andb((getv w 0), 0x7ffff) 117 in 118 if v = key then lookupchar1 fullkey w fontnum 119 else let 120(* A third (and final) hash function is simply the sum of the previous two, 121 obviously reduced modulo the table size. 122 *) 123 val h3 = (h1 + h2) mod hashsize; 124 val w = getv metrics_hash h3; 125 val v = andb((getv w 0), 0x7ffff) 126 in 127 if v = key then lookupchar1 fullkey w fontnum 128(* If the character was not present I will return NONE *) 129 else NONE 130 end 131 end 132 end 133 134and lookupchar1 fullkey row fontnum = 135 let 136 val v = 2*andb(fullkey, 3) 137 val wlo = getv row (v+2) 138 in 139 if wlo = 0 then NONE 140 else 141 let 142 val whi = getv row (v+3) 143 val width = andb(>>(whi, 19), 0x1fff) 144 val llx = andb(>>(whi, 6), 0x1fff) - 3000 145 val lly = andb(>>(wlo, 26), 0x3f) + 146 andb(<<(whi, 6), 0xfc0) - 1000 147 val urx = andb(>>(wlo, 13), 0x1fff) - 500 148 val ury = andb(wlo, 0x1fff) - 1000 149 val ki = 150 if v = 0 then andb(>>(getv row 0, 19), 0x7ff) 151 else if v = 2 then andb(>>(getv row 0, 30), 0x3) + 152 andb(<<(getv row 1, 2), 0x7fc) 153 else if v = 4 then andb(>>(getv row 1, 9), 0x7ff) 154 else (* v = 6 *) andb(>>(getv row 1, 20), 0x7ff); 155 val kerninfo = 156 if ki = 0 then 0 157 else ki + (getv fontkern fontnum) 158 in 159 SOME (width, llx, lly, urx, ury, kerninfo) 160 end 161 end; 162 163 164(* Given the output from lookupchar you can then the following with a 165 second codepoint (which must be in the same font) to find an adjustment 166 to the space between the two characters so that they are properly 167 kerned. The initial metric information provides an index into the 168 main kern table, and the code then does a linear search starting from 169 there to seek information relevant to the succesor character. This 170 is sort of reasonable because any one character only has a modest 171 number of successor thet kern with it. The table in fact contains both 172 kern and ligature information, and a bit flags a particular entry to 173 note which sort of adjustment is being recorded in any given word. 174 *) 175 176fun lookupkernadjustment NONE cp = 0 177 | lookupkernadjustment (SOME (width, llx, lly, urx, ury, kerninfo)) cp = 178 if kerninfo = 0 then 0 179 else let 180 fun seek i = 181 let 182 val w = getv kerntable i 183 in 184 if andb(w, 0x001fffff) = cp andalso 185 andb(w, 0x00200000) = 0 then 186 let 187 val w1 = andb(>>(w, 23), 0x1ff) 188 in 189 if andb(w1, 0x100) = 0 then w1 190 else w1 - 0x200 191 end 192 else if andb(w, 0x00400000) <> 0 then 0 193 else seek (i + 1) 194 end 195 in 196 seek kerninfo 197 end; 198 199(* For instance if you have just looked up "f" and you now go 200 (lookupligature ... "i") you should get back SOME ("fi") where the 201 result is the numeric codepoint for an f-i-ligature. If no ligature 202 is available you get back NONE. 203 *) 204 205fun lookupligature NONE cp = NONE 206 | lookupligature (SOME (width, llx, lly, urx, ury, kerninfo)) cp = 207 if kerninfo = 0 then NONE 208 else let 209 fun seek i = 210 let 211 val w = getv kerntable i 212 in 213 if andb(w, 0x001fffff) = cp andalso 214 andb(w, 0x00200000) <> 0 then SOME(andb(>>(w, 23), 0x1ff)) 215 else if andb(w, 0x00400000) <> 0 then NONE 216 else seek (i + 1) 217 end 218 in 219 seek kerninfo 220 end; 221 222(* On Original TeX accent positions above characters were adjusted using 223 information that was stored in the kern table. Here I have a separate 224 table for accent offsets, arranged as a hash table that uses at worst 225 two probes. This returns 0 if there us no adjustment needed (or 226 know adjustment know about). Note that the values in the table 227 are treated as 32-bit signed numbers. This information is only 228 present in the STIXMath font (of the fonts I am using) and so I do 229 not worry about other fonts here! 230 *) 231 232fun extend11 n = 233 if andb(n, 0x400) = 0 then n 234 else n - 0x800; 235 236fun accentposition key = 237 let 238 val h1 = key mod TOPCENTRE_SIZE 239 val w = getv topcentre_hash h1 240 val v = andb(w, 0x1fffff) 241 in 242 if v = key then extend11(>>(w, 21)) 243 else 244 let 245 val h2 = key mod TOPCENTRE_MODULUS + TOPCENTRE_OFFSET 246 val w = getv topcentre_hash h2 247 val v = andb(w, 0x1fffffff) 248 in 249 if v = key then extend11(>>(w, 21)) 250 else 0 251 end 252 end; 253 254(* The the STIXMath font there are tables for "variants" and for 255 "extended characters". The first copes with single glyphs that 256 render as a range of sized of (sor instance) parentheses. The 257 latter gives information about a set of characters that can be used 258 to build up seriously over-sized delimiters as in 259 / top hook 260 | top extension 261 < middle part 262 | lower extension 263 \ lower hook 264 At present I do not really understand the intent of all the 265 data present and associated with the parts! As with accent 266 positions the data I have here only applies in the STIXMath font. 267 (variants cp) returns a Vector of length 6 where each element 268 is either a codepoint or zero (or NONE if no variants are 269 available at all). 270*) 271 272fun variants key = 273 let 274 val h1 = key mod VARIANT_SIZE 275 val w = getv variant_hash h1 276 val v = getv w 0 277 in 278 if v = key then SOME w 279 else 280 let 281 val h2 = key mod VARIANT_MODULUS + VARIANT_OFFSET 282 val w = getv variant_hash h2 283 val v = getv w 0 284 in 285 if v = key then SOME w 286 else 287 let 288 val h3 = (h1 + h2) mod VARIANT_SIZE 289 val w = getv variant_hash h1 290 val v = getv w 0 291 in 292 if v = key then SOME w 293 else NONE 294 end 295 end 296 end; 297 298(* Same sort of idea but for extensions and you get back a longer vector *) 299 300fun extension key = 301 let 302 val h1 = key mod EXTENSION_SIZE 303 val w = getv extension_hash h1 304 val v = getv w 0 305 in 306 if v = key then SOME w 307 else 308 let 309 val h2 = key mod EXTENSION_MODULUS + EXTENSION_OFFSET 310 val w = getv extension_hash h2 311 val v = getv w 0 312 in 313 if v = key then SOME w 314 else 315 let 316 val h3 = (h1 + h2) mod EXTENSION_SIZE 317 val w = getv extension_hash h1 318 val v = getv w 0 319 in 320 if v = key then SOME w 321 else NONE 322 end 323 end 324 end; 325 326 327(* end of Metrics.sml *) 328 329 330