1# This software is Copyright (c) 2012-2018 magnum, and it is hereby 2# released to the general public under the following terms: 3# Redistribution and use in source and binary forms, with or without 4# modification, are permitted. 5# 6# Generic implementation of "dumb" exhaustive search of FULL Unicode. 7# Default is to try *all* allocated characters in Unicode v11 (there's 8# 137,046 of them). Even if a fast format can exhaust two characters in one 9# hour, three characters would take 12 years... 10# 11# Note that these modes will handle --max-len differently than normal: They 12# will consider number of characters as opposed to number of bytes. This 13# means you can naturally just use e.g. --max-len=3 for generating all 14# three-character candidates (which may be up to 12 bytes each). 15# 16# Also note that for UTF-16 formats, the resulting plaintext size within the 17# format will be up to four bytes (two 16-bit words) due to use of surrogates 18# for characters above U+FFFF. This means a format which normally handles up 19# to 27 characters may be limited to only 13 characters, worst case. 20[List.External:Dumb32] 21int maxlength; // Maximum password length to try 22int last; // Last character position, zero-based 23int lastid; // Character index in the last position 24int id[0x7f]; // Current character indices for other positions 25int charset[0x22000], c0; // Characters 26 27void init() 28{ 29 int minlength; 30 int i, c; 31 32 # Trigger UTF-32 handling in External mode 33 utf32 = 1; 34 35 if (req_minlen) 36 minlength = req_minlen; 37 else 38 minlength = 1; 39 if (req_maxlen) 40 maxlength = req_maxlen; 41 else 42 maxlength = 2; 43 44/* 45 * This defines the character set. This is auto-generated from UnicodeData.txt 46 * and we skip control characters. 47 */ 48 i = 0; 49// 0000..007F; Basic Latin 50 c = 0x20; // from SPACE 51 while (c <= 0x7e) // ..to TILDE 52 charset[i++] = c++; 53// 0080..00FF; Latin-1 Supplement 54 c = 0xa0; // from NO-BREAK SPACE 55 while (c <= 0xff) // ..to LATIN SMALL LETTER Y WITH DIAERESIS 56 charset[i++] = c++; 57// 0100..017F; Latin Extended-A 58 c = 0x100; // from LATIN CAPITAL LETTER A WITH MACRON 59 while (c <= 0x17f) // ..to LATIN SMALL LETTER LONG S 60 charset[i++] = c++; 61// 0180..024F; Latin Extended-B 62 c = 0x180; // from LATIN SMALL LETTER B WITH STROKE 63 while (c <= 0x24f) // ..to LATIN SMALL LETTER Y WITH STROKE 64 charset[i++] = c++; 65// 0250..02AF; IPA Extensions 66 c = 0x250; // from LATIN SMALL LETTER TURNED A 67 while (c <= 0x2af) // ..to LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL 68 charset[i++] = c++; 69// 02B0..02FF; Spacing Modifier Letters 70 c = 0x2b0; // from MODIFIER LETTER SMALL H 71 while (c <= 0x2ff) // ..to MODIFIER LETTER LOW LEFT ARROW 72 charset[i++] = c++; 73// 0300..036F; Combining Diacritical Marks 74 c = 0x300; // from COMBINING GRAVE ACCENT 75 while (c <= 0x36f) // ..to COMBINING LATIN SMALL LETTER X 76 charset[i++] = c++; 77// 0370..03FF; Greek and Coptic 78 c = 0x370; // from GREEK CAPITAL LETTER HETA 79 while (c <= 0x377) // ..to GREEK SMALL LETTER PAMPHYLIAN DIGAMMA 80 charset[i++] = c++; 81 c = 0x37a; // from GREEK YPOGEGRAMMENI 82 while (c <= 0x37f) // ..to GREEK CAPITAL LETTER YOT 83 charset[i++] = c++; 84 c = 0x384; // from GREEK TONOS 85 while (c <= 0x38a) // ..to GREEK CAPITAL LETTER IOTA WITH TONOS 86 charset[i++] = c++; 87 c = 0x38e; // from GREEK CAPITAL LETTER UPSILON WITH TONOS 88 while (c <= 0x3a1) // ..to GREEK CAPITAL LETTER RHO 89 charset[i++] = c++; 90 c = 0x3a3; // from GREEK CAPITAL LETTER SIGMA 91 while (c <= 0x3ff) // ..to GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL 92 charset[i++] = c++; 93// 0400..04FF; Cyrillic 94 c = 0x400; // from CYRILLIC CAPITAL LETTER IE WITH GRAVE 95 while (c <= 0x4ff) // ..to CYRILLIC SMALL LETTER HA WITH STROKE 96 charset[i++] = c++; 97// 0500..052F; Cyrillic Supplement 98 c = 0x500; // from CYRILLIC CAPITAL LETTER KOMI DE 99 while (c <= 0x52f) // ..to CYRILLIC SMALL LETTER EL WITH DESCENDER 100 charset[i++] = c++; 101// 0530..058F; Armenian 102 c = 0x531; // from ARMENIAN CAPITAL LETTER AYB 103 while (c <= 0x556) // ..to ARMENIAN CAPITAL LETTER FEH 104 charset[i++] = c++; 105 c = 0x559; // from ARMENIAN MODIFIER LETTER LEFT HALF RING 106 while (c <= 0x58a) // ..to ARMENIAN HYPHEN 107 charset[i++] = c++; 108 charset[i++] = 0x58d; // RIGHT-FACING ARMENIAN ETERNITY SIGN 109 charset[i++] = 0x58f; // ARMENIAN DRAM SIGN 110// 0590..05FF; Hebrew 111 c = 0x591; // from HEBREW ACCENT ETNAHTA 112 while (c <= 0x5c7) // ..to HEBREW POINT QAMATS QATAN 113 charset[i++] = c++; 114 c = 0x5d0; // from HEBREW LETTER ALEF 115 while (c <= 0x5ea) // ..to HEBREW LETTER TAV 116 charset[i++] = c++; 117 c = 0x5ef; // from HEBREW YOD TRIANGLE 118 while (c <= 0x5f4) // ..to HEBREW PUNCTUATION GERSHAYIM 119 charset[i++] = c++; 120// 0600..06FF; Arabic 121 c = 0x600; // from ARABIC NUMBER SIGN 122 while (c <= 0x61c) // ..to ARABIC LETTER MARK 123 charset[i++] = c++; 124 c = 0x61e; // from ARABIC TRIPLE DOT PUNCTUATION MARK 125 while (c <= 0x6ff) // ..to ARABIC LETTER HEH WITH INVERTED V 126 charset[i++] = c++; 127// 0700..074F; Syriac 128 c = 0x700; // from SYRIAC END OF PARAGRAPH 129 while (c <= 0x70d) // ..to SYRIAC HARKLEAN ASTERISCUS 130 charset[i++] = c++; 131 c = 0x70f; // from SYRIAC ABBREVIATION MARK 132 while (c <= 0x74a) // ..to SYRIAC BARREKH 133 charset[i++] = c++; 134 charset[i++] = 0x74d; // SYRIAC LETTER SOGDIAN ZHAIN 135 charset[i++] = 0x74f; // SYRIAC LETTER SOGDIAN FE 136// 0750..077F; Arabic Supplement 137 c = 0x750; // from ARABIC LETTER BEH WITH THREE DOTS HORIZONTALLY BELOW 138 while (c <= 0x77f) // ..to ARABIC LETTER KAF WITH TWO DOTS ABOVE 139 charset[i++] = c++; 140// 0780..07BF; Thaana 141 c = 0x780; // from THAANA LETTER HAA 142 while (c <= 0x7b1) // ..to THAANA LETTER NAA 143 charset[i++] = c++; 144// 07C0..07FF; NKo 145 c = 0x7c0; // from NKO DIGIT ZERO 146 while (c <= 0x7fa) // ..to NKO LAJANYALAN 147 charset[i++] = c++; 148 charset[i++] = 0x7fd; // NKO DANTAYALAN 149 charset[i++] = 0x7ff; // NKO TAMAN SIGN 150// 0800..083F; Samaritan 151 c = 0x800; // from SAMARITAN LETTER ALAF 152 while (c <= 0x82d) // ..to SAMARITAN MARK NEQUDAA 153 charset[i++] = c++; 154 c = 0x830; // from SAMARITAN PUNCTUATION NEQUDAA 155 while (c <= 0x83e) // ..to SAMARITAN PUNCTUATION ANNAAU 156 charset[i++] = c++; 157// 0840..085F; Mandaic 158 c = 0x840; // from MANDAIC LETTER HALQA 159 while (c <= 0x85b) // ..to MANDAIC GEMINATION MARK 160 charset[i++] = c++; 161 charset[i++] = 0x85e; // MANDAIC PUNCTUATION 162// 0860..086F; Syriac Supplement 163 c = 0x860; // from SYRIAC LETTER MALAYALAM NGA 164 while (c <= 0x86a) // ..to SYRIAC LETTER MALAYALAM SSA 165 charset[i++] = c++; 166// 08A0..08FF; Arabic Extended-A 167 c = 0x8a0; // from ARABIC LETTER BEH WITH SMALL V BELOW 168 while (c <= 0x8b4) // ..to ARABIC LETTER KAF WITH DOT BELOW 169 charset[i++] = c++; 170 c = 0x8b6; // from ARABIC LETTER BEH WITH SMALL MEEM ABOVE 171 while (c <= 0x8bd) // ..to ARABIC LETTER AFRICAN NOON 172 charset[i++] = c++; 173 c = 0x8d3; // from ARABIC SMALL LOW WAW 174 while (c <= 0x8ff) // ..to ARABIC MARK SIDEWAYS NOON GHUNNA 175 charset[i++] = c++; 176// 0900..097F; Devanagari 177 c = 0x900; // from DEVANAGARI SIGN INVERTED CANDRABINDU 178 while (c <= 0x97f) // ..to DEVANAGARI LETTER BBA 179 charset[i++] = c++; 180// 0980..09FF; Bengali 181 c = 0x980; // from BENGALI ANJI 182 while (c <= 0x983) // ..to BENGALI SIGN VISARGA 183 charset[i++] = c++; 184 c = 0x985; // from BENGALI LETTER A 185 while (c <= 0x98c) // ..to BENGALI LETTER VOCALIC L 186 charset[i++] = c++; 187 charset[i++] = 0x98f; // BENGALI LETTER E 188 charset[i++] = 0x990; // BENGALI LETTER AI 189 c = 0x993; // from BENGALI LETTER O 190 while (c <= 0x9a8) // ..to BENGALI LETTER NA 191 charset[i++] = c++; 192 c = 0x9aa; // from BENGALI LETTER PA 193 while (c <= 0x9b0) // ..to BENGALI LETTER RA 194 charset[i++] = c++; 195 c = 0x9b6; // from BENGALI LETTER SHA 196 while (c <= 0x9b9) // ..to BENGALI LETTER HA 197 charset[i++] = c++; 198 c = 0x9bc; // from BENGALI SIGN NUKTA 199 while (c <= 0x9c4) // ..to BENGALI VOWEL SIGN VOCALIC RR 200 charset[i++] = c++; 201 charset[i++] = 0x9c7; // BENGALI VOWEL SIGN E 202 charset[i++] = 0x9c8; // BENGALI VOWEL SIGN AI 203 c = 0x9cb; // from BENGALI VOWEL SIGN O 204 while (c <= 0x9ce) // ..to BENGALI LETTER KHANDA TA 205 charset[i++] = c++; 206 charset[i++] = 0x9dc; // BENGALI LETTER RRA 207 charset[i++] = 0x9dd; // BENGALI LETTER RHA 208 c = 0x9df; // from BENGALI LETTER YYA 209 while (c <= 0x9e3) // ..to BENGALI VOWEL SIGN VOCALIC LL 210 charset[i++] = c++; 211 c = 0x9e6; // from BENGALI DIGIT ZERO 212 while (c <= 0x9fe) // ..to BENGALI SANDHI MARK 213 charset[i++] = c++; 214// 0A00..0A7F; Gurmukhi 215 charset[i++] = 0xa01; // GURMUKHI SIGN ADAK BINDI 216 charset[i++] = 0xa03; // GURMUKHI SIGN VISARGA 217 c = 0xa05; // from GURMUKHI LETTER A 218 while (c <= 0xa0a) // ..to GURMUKHI LETTER UU 219 charset[i++] = c++; 220 charset[i++] = 0xa0f; // GURMUKHI LETTER EE 221 charset[i++] = 0xa10; // GURMUKHI LETTER AI 222 c = 0xa13; // from GURMUKHI LETTER OO 223 while (c <= 0xa28) // ..to GURMUKHI LETTER NA 224 charset[i++] = c++; 225 c = 0xa2a; // from GURMUKHI LETTER PA 226 while (c <= 0xa30) // ..to GURMUKHI LETTER RA 227 charset[i++] = c++; 228 charset[i++] = 0xa32; // GURMUKHI LETTER LA 229 charset[i++] = 0xa33; // GURMUKHI LETTER LLA 230 charset[i++] = 0xa35; // GURMUKHI LETTER VA 231 charset[i++] = 0xa36; // GURMUKHI LETTER SHA 232 charset[i++] = 0xa38; // GURMUKHI LETTER SA 233 charset[i++] = 0xa39; // GURMUKHI LETTER HA 234 c = 0xa3e; // from GURMUKHI VOWEL SIGN AA 235 while (c <= 0xa42) // ..to GURMUKHI VOWEL SIGN UU 236 charset[i++] = c++; 237 charset[i++] = 0xa47; // GURMUKHI VOWEL SIGN EE 238 charset[i++] = 0xa48; // GURMUKHI VOWEL SIGN AI 239 charset[i++] = 0xa4b; // GURMUKHI VOWEL SIGN OO 240 charset[i++] = 0xa4d; // GURMUKHI SIGN VIRAMA 241 c = 0xa59; // from GURMUKHI LETTER KHHA 242 while (c <= 0xa5c) // ..to GURMUKHI LETTER RRA 243 charset[i++] = c++; 244 c = 0xa66; // from GURMUKHI DIGIT ZERO 245 while (c <= 0xa76) // ..to GURMUKHI ABBREVIATION SIGN 246 charset[i++] = c++; 247// 0A80..0AFF; Gujarati 248 charset[i++] = 0xa81; // GUJARATI SIGN CANDRABINDU 249 charset[i++] = 0xa83; // GUJARATI SIGN VISARGA 250 c = 0xa85; // from GUJARATI LETTER A 251 while (c <= 0xa8d) // ..to GUJARATI VOWEL CANDRA E 252 charset[i++] = c++; 253 charset[i++] = 0xa8f; // GUJARATI LETTER E 254 charset[i++] = 0xa91; // GUJARATI VOWEL CANDRA O 255 c = 0xa93; // from GUJARATI LETTER O 256 while (c <= 0xaa8) // ..to GUJARATI LETTER NA 257 charset[i++] = c++; 258 c = 0xaaa; // from GUJARATI LETTER PA 259 while (c <= 0xab0) // ..to GUJARATI LETTER RA 260 charset[i++] = c++; 261 charset[i++] = 0xab2; // GUJARATI LETTER LA 262 charset[i++] = 0xab3; // GUJARATI LETTER LLA 263 c = 0xab5; // from GUJARATI LETTER VA 264 while (c <= 0xab9) // ..to GUJARATI LETTER HA 265 charset[i++] = c++; 266 c = 0xabc; // from GUJARATI SIGN NUKTA 267 while (c <= 0xac5) // ..to GUJARATI VOWEL SIGN CANDRA E 268 charset[i++] = c++; 269 charset[i++] = 0xac7; // GUJARATI VOWEL SIGN E 270 charset[i++] = 0xac9; // GUJARATI VOWEL SIGN CANDRA O 271 charset[i++] = 0xacb; // GUJARATI VOWEL SIGN O 272 charset[i++] = 0xacd; // GUJARATI SIGN VIRAMA 273 c = 0xae0; // from GUJARATI LETTER VOCALIC RR 274 while (c <= 0xae3) // ..to GUJARATI VOWEL SIGN VOCALIC LL 275 charset[i++] = c++; 276 c = 0xae6; // from GUJARATI DIGIT ZERO 277 while (c <= 0xaf1) // ..to GUJARATI RUPEE SIGN 278 charset[i++] = c++; 279 c = 0xaf9; // from GUJARATI LETTER ZHA 280 while (c <= 0xaff) // ..to GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE 281 charset[i++] = c++; 282// 0B00..0B7F; Oriya 283 charset[i++] = 0xb01; // ORIYA SIGN CANDRABINDU 284 charset[i++] = 0xb03; // ORIYA SIGN VISARGA 285 c = 0xb05; // from ORIYA LETTER A 286 while (c <= 0xb0c) // ..to ORIYA LETTER VOCALIC L 287 charset[i++] = c++; 288 charset[i++] = 0xb0f; // ORIYA LETTER E 289 charset[i++] = 0xb10; // ORIYA LETTER AI 290 c = 0xb13; // from ORIYA LETTER O 291 while (c <= 0xb28) // ..to ORIYA LETTER NA 292 charset[i++] = c++; 293 c = 0xb2a; // from ORIYA LETTER PA 294 while (c <= 0xb30) // ..to ORIYA LETTER RA 295 charset[i++] = c++; 296 charset[i++] = 0xb32; // ORIYA LETTER LA 297 charset[i++] = 0xb33; // ORIYA LETTER LLA 298 c = 0xb35; // from ORIYA LETTER VA 299 while (c <= 0xb39) // ..to ORIYA LETTER HA 300 charset[i++] = c++; 301 c = 0xb3c; // from ORIYA SIGN NUKTA 302 while (c <= 0xb44) // ..to ORIYA VOWEL SIGN VOCALIC RR 303 charset[i++] = c++; 304 charset[i++] = 0xb47; // ORIYA VOWEL SIGN E 305 charset[i++] = 0xb48; // ORIYA VOWEL SIGN AI 306 charset[i++] = 0xb4b; // ORIYA VOWEL SIGN O 307 charset[i++] = 0xb4d; // ORIYA SIGN VIRAMA 308 charset[i++] = 0xb56; // ORIYA AI LENGTH MARK 309 charset[i++] = 0xb57; // ORIYA AU LENGTH MARK 310 charset[i++] = 0xb5c; // ORIYA LETTER RRA 311 charset[i++] = 0xb5d; // ORIYA LETTER RHA 312 c = 0xb5f; // from ORIYA LETTER YYA 313 while (c <= 0xb63) // ..to ORIYA VOWEL SIGN VOCALIC LL 314 charset[i++] = c++; 315 c = 0xb66; // from ORIYA DIGIT ZERO 316 while (c <= 0xb77) // ..to ORIYA FRACTION THREE SIXTEENTHS 317 charset[i++] = c++; 318// 0B80..0BFF; Tamil 319 charset[i++] = 0xb82; // TAMIL SIGN ANUSVARA 320 charset[i++] = 0xb83; // TAMIL SIGN VISARGA 321 c = 0xb85; // from TAMIL LETTER A 322 while (c <= 0xb8a) // ..to TAMIL LETTER UU 323 charset[i++] = c++; 324 charset[i++] = 0xb8e; // TAMIL LETTER E 325 charset[i++] = 0xb90; // TAMIL LETTER AI 326 c = 0xb92; // from TAMIL LETTER O 327 while (c <= 0xb95) // ..to TAMIL LETTER KA 328 charset[i++] = c++; 329 charset[i++] = 0xb99; // TAMIL LETTER NGA 330 charset[i++] = 0xb9a; // TAMIL LETTER CA 331 charset[i++] = 0xb9e; // TAMIL LETTER NYA 332 charset[i++] = 0xb9f; // TAMIL LETTER TTA 333 charset[i++] = 0xba3; // TAMIL LETTER NNA 334 charset[i++] = 0xba4; // TAMIL LETTER TA 335 charset[i++] = 0xba8; // TAMIL LETTER NA 336 charset[i++] = 0xbaa; // TAMIL LETTER PA 337 c = 0xbae; // from TAMIL LETTER MA 338 while (c <= 0xbb9) // ..to TAMIL LETTER HA 339 charset[i++] = c++; 340 c = 0xbbe; // from TAMIL VOWEL SIGN AA 341 while (c <= 0xbc2) // ..to TAMIL VOWEL SIGN UU 342 charset[i++] = c++; 343 charset[i++] = 0xbc6; // TAMIL VOWEL SIGN E 344 charset[i++] = 0xbc8; // TAMIL VOWEL SIGN AI 345 c = 0xbca; // from TAMIL VOWEL SIGN O 346 while (c <= 0xbcd) // ..to TAMIL SIGN VIRAMA 347 charset[i++] = c++; 348 c = 0xbe6; // from TAMIL DIGIT ZERO 349 while (c <= 0xbfa) // ..to TAMIL NUMBER SIGN 350 charset[i++] = c++; 351// 0C00..0C7F; Telugu 352 c = 0xc00; // from TELUGU SIGN COMBINING CANDRABINDU ABOVE 353 while (c <= 0xc0c) // ..to TELUGU LETTER VOCALIC L 354 charset[i++] = c++; 355 charset[i++] = 0xc0e; // TELUGU LETTER E 356 charset[i++] = 0xc10; // TELUGU LETTER AI 357 c = 0xc12; // from TELUGU LETTER O 358 while (c <= 0xc28) // ..to TELUGU LETTER NA 359 charset[i++] = c++; 360 c = 0xc2a; // from TELUGU LETTER PA 361 while (c <= 0xc39) // ..to TELUGU LETTER HA 362 charset[i++] = c++; 363 c = 0xc3d; // from TELUGU SIGN AVAGRAHA 364 while (c <= 0xc44) // ..to TELUGU VOWEL SIGN VOCALIC RR 365 charset[i++] = c++; 366 charset[i++] = 0xc46; // TELUGU VOWEL SIGN E 367 charset[i++] = 0xc48; // TELUGU VOWEL SIGN AI 368 c = 0xc4a; // from TELUGU VOWEL SIGN O 369 while (c <= 0xc4d) // ..to TELUGU SIGN VIRAMA 370 charset[i++] = c++; 371 charset[i++] = 0xc55; // TELUGU LENGTH MARK 372 charset[i++] = 0xc56; // TELUGU AI LENGTH MARK 373 charset[i++] = 0xc58; // TELUGU LETTER TSA 374 charset[i++] = 0xc5a; // TELUGU LETTER RRRA 375 c = 0xc60; // from TELUGU LETTER VOCALIC RR 376 while (c <= 0xc63) // ..to TELUGU VOWEL SIGN VOCALIC LL 377 charset[i++] = c++; 378 c = 0xc66; // from TELUGU DIGIT ZERO 379 while (c <= 0xc6f) // ..to TELUGU DIGIT NINE 380 charset[i++] = c++; 381 c = 0xc78; // from TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR 382 while (c <= 0xc7f) // ..to TELUGU SIGN TUUMU 383 charset[i++] = c++; 384// 0C80..0CFF; Kannada 385 c = 0xc80; // from KANNADA SIGN SPACING CANDRABINDU 386 while (c <= 0xc8c) // ..to KANNADA LETTER VOCALIC L 387 charset[i++] = c++; 388 charset[i++] = 0xc8e; // KANNADA LETTER E 389 charset[i++] = 0xc90; // KANNADA LETTER AI 390 c = 0xc92; // from KANNADA LETTER O 391 while (c <= 0xca8) // ..to KANNADA LETTER NA 392 charset[i++] = c++; 393 c = 0xcaa; // from KANNADA LETTER PA 394 while (c <= 0xcb3) // ..to KANNADA LETTER LLA 395 charset[i++] = c++; 396 c = 0xcb5; // from KANNADA LETTER VA 397 while (c <= 0xcb9) // ..to KANNADA LETTER HA 398 charset[i++] = c++; 399 c = 0xcbc; // from KANNADA SIGN NUKTA 400 while (c <= 0xcc4) // ..to KANNADA VOWEL SIGN VOCALIC RR 401 charset[i++] = c++; 402 charset[i++] = 0xcc6; // KANNADA VOWEL SIGN E 403 charset[i++] = 0xcc8; // KANNADA VOWEL SIGN AI 404 c = 0xcca; // from KANNADA VOWEL SIGN O 405 while (c <= 0xccd) // ..to KANNADA SIGN VIRAMA 406 charset[i++] = c++; 407 charset[i++] = 0xcd5; // KANNADA LENGTH MARK 408 charset[i++] = 0xcd6; // KANNADA AI LENGTH MARK 409 c = 0xce0; // from KANNADA LETTER VOCALIC RR 410 while (c <= 0xce3) // ..to KANNADA VOWEL SIGN VOCALIC LL 411 charset[i++] = c++; 412 c = 0xce6; // from KANNADA DIGIT ZERO 413 while (c <= 0xcef) // ..to KANNADA DIGIT NINE 414 charset[i++] = c++; 415 charset[i++] = 0xcf1; // KANNADA SIGN JIHVAMULIYA 416 charset[i++] = 0xcf2; // KANNADA SIGN UPADHMANIYA 417// 0D00..0D7F; Malayalam 418 c = 0xd00; // from MALAYALAM SIGN COMBINING ANUSVARA ABOVE 419 while (c <= 0xd03) // ..to MALAYALAM SIGN VISARGA 420 charset[i++] = c++; 421 c = 0xd05; // from MALAYALAM LETTER A 422 while (c <= 0xd0c) // ..to MALAYALAM LETTER VOCALIC L 423 charset[i++] = c++; 424 charset[i++] = 0xd0e; // MALAYALAM LETTER E 425 charset[i++] = 0xd10; // MALAYALAM LETTER AI 426 c = 0xd12; // from MALAYALAM LETTER O 427 while (c <= 0xd44) // ..to MALAYALAM VOWEL SIGN VOCALIC RR 428 charset[i++] = c++; 429 charset[i++] = 0xd46; // MALAYALAM VOWEL SIGN E 430 charset[i++] = 0xd48; // MALAYALAM VOWEL SIGN AI 431 c = 0xd4a; // from MALAYALAM VOWEL SIGN O 432 while (c <= 0xd4f) // ..to MALAYALAM SIGN PARA 433 charset[i++] = c++; 434 c = 0xd54; // from MALAYALAM LETTER CHILLU M 435 while (c <= 0xd63) // ..to MALAYALAM VOWEL SIGN VOCALIC LL 436 charset[i++] = c++; 437 c = 0xd66; // from MALAYALAM DIGIT ZERO 438 while (c <= 0xd7f) // ..to MALAYALAM LETTER CHILLU K 439 charset[i++] = c++; 440// 0D80..0DFF; Sinhala 441 charset[i++] = 0xd82; // SINHALA SIGN ANUSVARAYA 442 charset[i++] = 0xd83; // SINHALA SIGN VISARGAYA 443 c = 0xd85; // from SINHALA LETTER AYANNA 444 while (c <= 0xd96) // ..to SINHALA LETTER AUYANNA 445 charset[i++] = c++; 446 c = 0xd9a; // from SINHALA LETTER ALPAPRAANA KAYANNA 447 while (c <= 0xdb1) // ..to SINHALA LETTER DANTAJA NAYANNA 448 charset[i++] = c++; 449 c = 0xdb3; // from SINHALA LETTER SANYAKA DAYANNA 450 while (c <= 0xdbb) // ..to SINHALA LETTER RAYANNA 451 charset[i++] = c++; 452 c = 0xdc0; // from SINHALA LETTER VAYANNA 453 while (c <= 0xdc6) // ..to SINHALA LETTER FAYANNA 454 charset[i++] = c++; 455 c = 0xdcf; // from SINHALA VOWEL SIGN AELA-PILLA 456 while (c <= 0xdd4) // ..to SINHALA VOWEL SIGN KETTI PAA-PILLA 457 charset[i++] = c++; 458 c = 0xdd8; // from SINHALA VOWEL SIGN GAETTA-PILLA 459 while (c <= 0xddf) // ..to SINHALA VOWEL SIGN GAYANUKITTA 460 charset[i++] = c++; 461 c = 0xde6; // from SINHALA LITH DIGIT ZERO 462 while (c <= 0xdef) // ..to SINHALA LITH DIGIT NINE 463 charset[i++] = c++; 464 charset[i++] = 0xdf2; // SINHALA VOWEL SIGN DIGA GAETTA-PILLA 465 charset[i++] = 0xdf4; // SINHALA PUNCTUATION KUNDDALIYA 466// 0E00..0E7F; Thai 467 c = 0xe01; // from THAI CHARACTER KO KAI 468 while (c <= 0xe3a) // ..to THAI CHARACTER PHINTHU 469 charset[i++] = c++; 470 c = 0xe3f; // from THAI CURRENCY SYMBOL BAHT 471 while (c <= 0xe5b) // ..to THAI CHARACTER KHOMUT 472 charset[i++] = c++; 473// 0E80..0EFF; Lao 474 charset[i++] = 0xe81; // LAO LETTER KO 475 charset[i++] = 0xe82; // LAO LETTER KHO SUNG 476 charset[i++] = 0xe87; // LAO LETTER NGO 477 charset[i++] = 0xe88; // LAO LETTER CO 478 c = 0xe94; // from LAO LETTER DO 479 while (c <= 0xe97) // ..to LAO LETTER THO TAM 480 charset[i++] = c++; 481 c = 0xe99; // from LAO LETTER NO 482 while (c <= 0xe9f) // ..to LAO LETTER FO SUNG 483 charset[i++] = c++; 484 charset[i++] = 0xea1; // LAO LETTER MO 485 charset[i++] = 0xea3; // LAO LETTER LO LING 486 charset[i++] = 0xeaa; // LAO LETTER SO SUNG 487 charset[i++] = 0xeab; // LAO LETTER HO SUNG 488 c = 0xead; // from LAO LETTER O 489 while (c <= 0xeb9) // ..to LAO VOWEL SIGN UU 490 charset[i++] = c++; 491 charset[i++] = 0xebb; // LAO VOWEL SIGN MAI KON 492 charset[i++] = 0xebd; // LAO SEMIVOWEL SIGN NYO 493 c = 0xec0; // from LAO VOWEL SIGN E 494 while (c <= 0xec4) // ..to LAO VOWEL SIGN AI 495 charset[i++] = c++; 496 c = 0xec8; // from LAO TONE MAI EK 497 while (c <= 0xecd) // ..to LAO NIGGAHITA 498 charset[i++] = c++; 499 c = 0xed0; // from LAO DIGIT ZERO 500 while (c <= 0xed9) // ..to LAO DIGIT NINE 501 charset[i++] = c++; 502 c = 0xedc; // from LAO HO NO 503 while (c <= 0xedf) // ..to LAO LETTER KHMU NYO 504 charset[i++] = c++; 505// 0F00..0FFF; Tibetan 506 c = 0xf00; // from TIBETAN SYLLABLE OM 507 while (c <= 0xf47) // ..to TIBETAN LETTER JA 508 charset[i++] = c++; 509 c = 0xf49; // from TIBETAN LETTER NYA 510 while (c <= 0xf6c) // ..to TIBETAN LETTER RRA 511 charset[i++] = c++; 512 c = 0xf71; // from TIBETAN VOWEL SIGN AA 513 while (c <= 0xf97) // ..to TIBETAN SUBJOINED LETTER JA 514 charset[i++] = c++; 515 c = 0xf99; // from TIBETAN SUBJOINED LETTER NYA 516 while (c <= 0xfbc) // ..to TIBETAN SUBJOINED LETTER FIXED-FORM RA 517 charset[i++] = c++; 518 c = 0xfbe; // from TIBETAN KU RU KHA 519 while (c <= 0xfcc) // ..to TIBETAN SYMBOL NOR BU BZHI -KHYIL 520 charset[i++] = c++; 521 c = 0xfce; // from TIBETAN SIGN RDEL NAG RDEL DKAR 522 while (c <= 0xfda) // ..to TIBETAN MARK TRAILING MCHAN RTAGS 523 charset[i++] = c++; 524// 1000..109F; Myanmar 525 c = 0x1000; // from MYANMAR LETTER KA 526 while (c <= 0x109f) // ..to MYANMAR SYMBOL SHAN EXCLAMATION 527 charset[i++] = c++; 528// 10A0..10FF; Georgian 529 c = 0x10a0; // from GEORGIAN CAPITAL LETTER AN 530 while (c <= 0x10c5) // ..to GEORGIAN CAPITAL LETTER HOE 531 charset[i++] = c++; 532 c = 0x10d0; // from GEORGIAN LETTER AN 533 while (c <= 0x10ff) // ..to GEORGIAN LETTER LABIAL SIGN 534 charset[i++] = c++; 535// 1100..11FF; Hangul Jamo 536 c = 0x1100; // from HANGUL CHOSEONG KIYEOK 537 while (c <= 0x11ff) // ..to HANGUL JONGSEONG SSANGNIEUN 538 charset[i++] = c++; 539// 1200..137F; Ethiopic 540 c = 0x1200; // from ETHIOPIC SYLLABLE HA 541 while (c <= 0x1248) // ..to ETHIOPIC SYLLABLE QWA 542 charset[i++] = c++; 543 c = 0x124a; // from ETHIOPIC SYLLABLE QWI 544 while (c <= 0x124d) // ..to ETHIOPIC SYLLABLE QWE 545 charset[i++] = c++; 546 c = 0x1250; // from ETHIOPIC SYLLABLE QHA 547 while (c <= 0x1256) // ..to ETHIOPIC SYLLABLE QHO 548 charset[i++] = c++; 549 c = 0x125a; // from ETHIOPIC SYLLABLE QHWI 550 while (c <= 0x125d) // ..to ETHIOPIC SYLLABLE QHWE 551 charset[i++] = c++; 552 c = 0x1260; // from ETHIOPIC SYLLABLE BA 553 while (c <= 0x1288) // ..to ETHIOPIC SYLLABLE XWA 554 charset[i++] = c++; 555 c = 0x128a; // from ETHIOPIC SYLLABLE XWI 556 while (c <= 0x128d) // ..to ETHIOPIC SYLLABLE XWE 557 charset[i++] = c++; 558 c = 0x1290; // from ETHIOPIC SYLLABLE NA 559 while (c <= 0x12b0) // ..to ETHIOPIC SYLLABLE KWA 560 charset[i++] = c++; 561 c = 0x12b2; // from ETHIOPIC SYLLABLE KWI 562 while (c <= 0x12b5) // ..to ETHIOPIC SYLLABLE KWE 563 charset[i++] = c++; 564 c = 0x12b8; // from ETHIOPIC SYLLABLE KXA 565 while (c <= 0x12be) // ..to ETHIOPIC SYLLABLE KXO 566 charset[i++] = c++; 567 c = 0x12c2; // from ETHIOPIC SYLLABLE KXWI 568 while (c <= 0x12c5) // ..to ETHIOPIC SYLLABLE KXWE 569 charset[i++] = c++; 570 c = 0x12c8; // from ETHIOPIC SYLLABLE WA 571 while (c <= 0x12d6) // ..to ETHIOPIC SYLLABLE PHARYNGEAL O 572 charset[i++] = c++; 573 c = 0x12d8; // from ETHIOPIC SYLLABLE ZA 574 while (c <= 0x1310) // ..to ETHIOPIC SYLLABLE GWA 575 charset[i++] = c++; 576 c = 0x1312; // from ETHIOPIC SYLLABLE GWI 577 while (c <= 0x1315) // ..to ETHIOPIC SYLLABLE GWE 578 charset[i++] = c++; 579 c = 0x1318; // from ETHIOPIC SYLLABLE GGA 580 while (c <= 0x135a) // ..to ETHIOPIC SYLLABLE FYA 581 charset[i++] = c++; 582 c = 0x135d; // from ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK 583 while (c <= 0x137c) // ..to ETHIOPIC NUMBER TEN THOUSAND 584 charset[i++] = c++; 585// 1380..139F; Ethiopic Supplement 586 c = 0x1380; // from ETHIOPIC SYLLABLE SEBATBEIT MWA 587 while (c <= 0x1399) // ..to ETHIOPIC TONAL MARK KURT 588 charset[i++] = c++; 589// 13A0..13FF; Cherokee 590 c = 0x13a0; // from CHEROKEE LETTER A 591 while (c <= 0x13f5) // ..to CHEROKEE LETTER MV 592 charset[i++] = c++; 593 c = 0x13f8; // from CHEROKEE SMALL LETTER YE 594 while (c <= 0x13fd) // ..to CHEROKEE SMALL LETTER MV 595 charset[i++] = c++; 596// 1400..167F; Unified Canadian Aboriginal Syllabics 597 c = 0x1400; // from CANADIAN SYLLABICS HYPHEN 598 while (c <= 0x167f) // ..to CANADIAN SYLLABICS BLACKFOOT W 599 charset[i++] = c++; 600// 1680..169F; Ogham 601 c = 0x1680; // from OGHAM SPACE MARK 602 while (c <= 0x169c) // ..to OGHAM REVERSED FEATHER MARK 603 charset[i++] = c++; 604// 16A0..16FF; Runic 605 c = 0x16a0; // from RUNIC LETTER FEHU FEOH FE F 606 while (c <= 0x16f8) // ..to RUNIC LETTER FRANKS CASKET AESC 607 charset[i++] = c++; 608// 1700..171F; Tagalog 609 c = 0x1700; // from TAGALOG LETTER A 610 while (c <= 0x170c) // ..to TAGALOG LETTER YA 611 charset[i++] = c++; 612 c = 0x170e; // from TAGALOG LETTER LA 613 while (c <= 0x1714) // ..to TAGALOG SIGN VIRAMA 614 charset[i++] = c++; 615// 1720..173F; Hanunoo 616 c = 0x1720; // from HANUNOO LETTER A 617 while (c <= 0x1736) // ..to PHILIPPINE DOUBLE PUNCTUATION 618 charset[i++] = c++; 619// 1740..175F; Buhid 620 c = 0x1740; // from BUHID LETTER A 621 while (c <= 0x1753) // ..to BUHID VOWEL SIGN U 622 charset[i++] = c++; 623// 1760..177F; Tagbanwa 624 c = 0x1760; // from TAGBANWA LETTER A 625 while (c <= 0x176c) // ..to TAGBANWA LETTER YA 626 charset[i++] = c++; 627 charset[i++] = 0x176e; // TAGBANWA LETTER LA 628 charset[i++] = 0x1770; // TAGBANWA LETTER SA 629 charset[i++] = 0x1772; // TAGBANWA VOWEL SIGN I 630 charset[i++] = 0x1773; // TAGBANWA VOWEL SIGN U 631// 1780..17FF; Khmer 632 c = 0x1780; // from KHMER LETTER KA 633 while (c <= 0x17dd) // ..to KHMER SIGN ATTHACAN 634 charset[i++] = c++; 635 c = 0x17e0; // from KHMER DIGIT ZERO 636 while (c <= 0x17e9) // ..to KHMER DIGIT NINE 637 charset[i++] = c++; 638 c = 0x17f0; // from KHMER SYMBOL LEK ATTAK SON 639 while (c <= 0x17f9) // ..to KHMER SYMBOL LEK ATTAK PRAM-BUON 640 charset[i++] = c++; 641// 1800..18AF; Mongolian 642 c = 0x1800; // from MONGOLIAN BIRGA 643 while (c <= 0x180e) // ..to MONGOLIAN VOWEL SEPARATOR 644 charset[i++] = c++; 645 c = 0x1810; // from MONGOLIAN DIGIT ZERO 646 while (c <= 0x1819) // ..to MONGOLIAN DIGIT NINE 647 charset[i++] = c++; 648 c = 0x1820; // from MONGOLIAN LETTER A 649 while (c <= 0x1878) // ..to MONGOLIAN LETTER CHA WITH TWO DOTS 650 charset[i++] = c++; 651 c = 0x1880; // from MONGOLIAN LETTER ALI GALI ANUSVARA ONE 652 while (c <= 0x18aa) // ..to MONGOLIAN LETTER MANCHU ALI GALI LHA 653 charset[i++] = c++; 654// 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended 655 c = 0x18b0; // from CANADIAN SYLLABICS OY 656 while (c <= 0x18f5) // ..to CANADIAN SYLLABICS CARRIER DENTAL S 657 charset[i++] = c++; 658// 1900..194F; Limbu 659 c = 0x1900; // from LIMBU VOWEL-CARRIER LETTER 660 while (c <= 0x191e) // ..to LIMBU LETTER TRA 661 charset[i++] = c++; 662 c = 0x1920; // from LIMBU VOWEL SIGN A 663 while (c <= 0x192b) // ..to LIMBU SUBJOINED LETTER WA 664 charset[i++] = c++; 665 c = 0x1930; // from LIMBU SMALL LETTER KA 666 while (c <= 0x193b) // ..to LIMBU SIGN SA-I 667 charset[i++] = c++; 668 c = 0x1944; // from LIMBU EXCLAMATION MARK 669 while (c <= 0x194f) // ..to LIMBU DIGIT NINE 670 charset[i++] = c++; 671// 1950..197F; Tai Le 672 c = 0x1950; // from TAI LE LETTER KA 673 while (c <= 0x196d) // ..to TAI LE LETTER AI 674 charset[i++] = c++; 675 c = 0x1970; // from TAI LE LETTER TONE-2 676 while (c <= 0x1974) // ..to TAI LE LETTER TONE-6 677 charset[i++] = c++; 678// 1980..19DF; New Tai Lue 679 c = 0x1980; // from NEW TAI LUE LETTER HIGH QA 680 while (c <= 0x19ab) // ..to NEW TAI LUE LETTER LOW SUA 681 charset[i++] = c++; 682 c = 0x19b0; // from NEW TAI LUE VOWEL SIGN VOWEL SHORTENER 683 while (c <= 0x19c9) // ..to NEW TAI LUE TONE MARK-2 684 charset[i++] = c++; 685 c = 0x19d0; // from NEW TAI LUE DIGIT ZERO 686 while (c <= 0x19da) // ..to NEW TAI LUE THAM DIGIT ONE 687 charset[i++] = c++; 688 charset[i++] = 0x19de; // NEW TAI LUE SIGN LAE 689 charset[i++] = 0x19df; // NEW TAI LUE SIGN LAEV 690// 19E0..19FF; Khmer Symbols 691 c = 0x19e0; // from KHMER SYMBOL PATHAMASAT 692 while (c <= 0x19ff) // ..to KHMER SYMBOL DAP-PRAM ROC 693 charset[i++] = c++; 694// 1A00..1A1F; Buginese 695 c = 0x1a00; // from BUGINESE LETTER KA 696 while (c <= 0x1a1b) // ..to BUGINESE VOWEL SIGN AE 697 charset[i++] = c++; 698 charset[i++] = 0x1a1e; // BUGINESE PALLAWA 699 charset[i++] = 0x1a1f; // BUGINESE END OF SECTION 700// 1A20..1AAF; Tai Tham 701 c = 0x1a20; // from TAI THAM LETTER HIGH KA 702 while (c <= 0x1a5e) // ..to TAI THAM CONSONANT SIGN SA 703 charset[i++] = c++; 704 c = 0x1a60; // from TAI THAM SIGN SAKOT 705 while (c <= 0x1a7c) // ..to TAI THAM SIGN KHUEN-LUE KARAN 706 charset[i++] = c++; 707 c = 0x1a7f; // from TAI THAM COMBINING CRYPTOGRAMMIC DOT 708 while (c <= 0x1a89) // ..to TAI THAM HORA DIGIT NINE 709 charset[i++] = c++; 710 c = 0x1a90; // from TAI THAM THAM DIGIT ZERO 711 while (c <= 0x1a99) // ..to TAI THAM THAM DIGIT NINE 712 charset[i++] = c++; 713 c = 0x1aa0; // from TAI THAM SIGN WIANG 714 while (c <= 0x1aad) // ..to TAI THAM SIGN CAANG 715 charset[i++] = c++; 716// 1AB0..1AFF; Combining Diacritical Marks Extended 717 c = 0x1ab0; // from COMBINING DOUBLED CIRCUMFLEX ACCENT 718 while (c <= 0x1abe) // ..to COMBINING PARENTHESES OVERLAY 719 charset[i++] = c++; 720// 1B00..1B7F; Balinese 721 c = 0x1b00; // from BALINESE SIGN ULU RICEM 722 while (c <= 0x1b4b) // ..to BALINESE LETTER ASYURA SASAK 723 charset[i++] = c++; 724 c = 0x1b50; // from BALINESE DIGIT ZERO 725 while (c <= 0x1b7c) // ..to BALINESE MUSICAL SYMBOL LEFT-HAND OPEN PING 726 charset[i++] = c++; 727// 1B80..1BBF; Sundanese 728 c = 0x1b80; // from SUNDANESE SIGN PANYECEK 729 while (c <= 0x1bbf) // ..to SUNDANESE LETTER FINAL M 730 charset[i++] = c++; 731// 1BC0..1BFF; Batak 732 c = 0x1bc0; // from BATAK LETTER A 733 while (c <= 0x1bf3) // ..to BATAK PANONGONAN 734 charset[i++] = c++; 735 c = 0x1bfc; // from BATAK SYMBOL BINDU NA METEK 736 while (c <= 0x1bff) // ..to BATAK SYMBOL BINDU PANGOLAT 737 charset[i++] = c++; 738// 1C00..1C4F; Lepcha 739 c = 0x1c00; // from LEPCHA LETTER KA 740 while (c <= 0x1c37) // ..to LEPCHA SIGN NUKTA 741 charset[i++] = c++; 742 c = 0x1c3b; // from LEPCHA PUNCTUATION TA-ROL 743 while (c <= 0x1c49) // ..to LEPCHA DIGIT NINE 744 charset[i++] = c++; 745 charset[i++] = 0x1c4d; // LEPCHA LETTER TTA 746 charset[i++] = 0x1c4f; // LEPCHA LETTER DDA 747// 1C50..1C7F; Ol Chiki 748 c = 0x1c50; // from OL CHIKI DIGIT ZERO 749 while (c <= 0x1c7f) // ..to OL CHIKI PUNCTUATION DOUBLE MUCAAD 750 charset[i++] = c++; 751// 1C80..1C8F; Cyrillic Extended-C 752 c = 0x1c80; // from CYRILLIC SMALL LETTER ROUNDED VE 753 while (c <= 0x1c88) // ..to CYRILLIC SMALL LETTER UNBLENDED UK 754 charset[i++] = c++; 755// 1C90..1CBF; Georgian Extended 756 c = 0x1c90; // from GEORGIAN MTAVRULI CAPITAL LETTER AN 757 while (c <= 0x1cba) // ..to GEORGIAN MTAVRULI CAPITAL LETTER AIN 758 charset[i++] = c++; 759 charset[i++] = 0x1cbd; // GEORGIAN MTAVRULI CAPITAL LETTER AEN 760 charset[i++] = 0x1cbf; // GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN 761// 1CC0..1CCF; Sundanese Supplement 762 c = 0x1cc0; // from SUNDANESE PUNCTUATION BINDU SURYA 763 while (c <= 0x1cc7) // ..to SUNDANESE PUNCTUATION BINDU BA SATANGA 764 charset[i++] = c++; 765// 1CD0..1CFF; Vedic Extensions 766 c = 0x1cd0; // from VEDIC TONE KARSHANA 767 while (c <= 0x1cf9) // ..to VEDIC TONE DOUBLE RING ABOVE 768 charset[i++] = c++; 769// 1D00..1D7F; Phonetic Extensions 770 c = 0x1d00; // from LATIN LETTER SMALL CAPITAL A 771 while (c <= 0x1d7f) // ..to LATIN SMALL LETTER UPSILON WITH STROKE 772 charset[i++] = c++; 773// 1D80..1DBF; Phonetic Extensions Supplement 774 c = 0x1d80; // from LATIN SMALL LETTER B WITH PALATAL HOOK 775 while (c <= 0x1dbf) // ..to MODIFIER LETTER SMALL THETA 776 charset[i++] = c++; 777// 1DC0..1DFF; Combining Diacritical Marks Supplement 778 c = 0x1dc0; // from COMBINING DOTTED GRAVE ACCENT 779 while (c <= 0x1df9) // ..to COMBINING WIDE INVERTED BRIDGE BELOW 780 charset[i++] = c++; 781 c = 0x1dfb; // from COMBINING DELETION MARK 782 while (c <= 0x1dff) // ..to COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW 783 charset[i++] = c++; 784// 1E00..1EFF; Latin Extended Additional 785 c = 0x1e00; // from LATIN CAPITAL LETTER A WITH RING BELOW 786 while (c <= 0x1eff) // ..to LATIN SMALL LETTER Y WITH LOOP 787 charset[i++] = c++; 788// 1F00..1FFF; Greek Extended 789 c = 0x1f00; // from GREEK SMALL LETTER ALPHA WITH PSILI 790 while (c <= 0x1f15) // ..to GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA 791 charset[i++] = c++; 792 c = 0x1f18; // from GREEK CAPITAL LETTER EPSILON WITH PSILI 793 while (c <= 0x1f1d) // ..to GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA 794 charset[i++] = c++; 795 c = 0x1f20; // from GREEK SMALL LETTER ETA WITH PSILI 796 while (c <= 0x1f45) // ..to GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA 797 charset[i++] = c++; 798 c = 0x1f48; // from GREEK CAPITAL LETTER OMICRON WITH PSILI 799 while (c <= 0x1f4d) // ..to GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA 800 charset[i++] = c++; 801 c = 0x1f50; // from GREEK SMALL LETTER UPSILON WITH PSILI 802 while (c <= 0x1f57) // ..to GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI 803 charset[i++] = c++; 804 c = 0x1f5f; // from GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI 805 while (c <= 0x1f7d) // ..to GREEK SMALL LETTER OMEGA WITH OXIA 806 charset[i++] = c++; 807 c = 0x1f80; // from GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI 808 while (c <= 0x1fb4) // ..to GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI 809 charset[i++] = c++; 810 c = 0x1fb6; // from GREEK SMALL LETTER ALPHA WITH PERISPOMENI 811 while (c <= 0x1fc4) // ..to GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI 812 charset[i++] = c++; 813 c = 0x1fc6; // from GREEK SMALL LETTER ETA WITH PERISPOMENI 814 while (c <= 0x1fd3) // ..to GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA 815 charset[i++] = c++; 816 c = 0x1fd6; // from GREEK SMALL LETTER IOTA WITH PERISPOMENI 817 while (c <= 0x1fdb) // ..to GREEK CAPITAL LETTER IOTA WITH OXIA 818 charset[i++] = c++; 819 c = 0x1fdd; // from GREEK DASIA AND VARIA 820 while (c <= 0x1fef) // ..to GREEK VARIA 821 charset[i++] = c++; 822 charset[i++] = 0x1ff2; // GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI 823 charset[i++] = 0x1ff4; // GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI 824 c = 0x1ff6; // from GREEK SMALL LETTER OMEGA WITH PERISPOMENI 825 while (c <= 0x1ffe) // ..to GREEK DASIA 826 charset[i++] = c++; 827// 2000..206F; General Punctuation 828 c = 0x2000; // from EN QUAD 829 while (c <= 0x2064) // ..to INVISIBLE PLUS 830 charset[i++] = c++; 831 c = 0x2066; // from LEFT-TO-RIGHT ISOLATE 832 while (c <= 0x206f) // ..to NOMINAL DIGIT SHAPES 833 charset[i++] = c++; 834// 2070..209F; Superscripts and Subscripts 835 charset[i++] = 0x2070; // SUPERSCRIPT ZERO 836 charset[i++] = 0x2071; // SUPERSCRIPT LATIN SMALL LETTER I 837 c = 0x2074; // from SUPERSCRIPT FOUR 838 while (c <= 0x208e) // ..to SUBSCRIPT RIGHT PARENTHESIS 839 charset[i++] = c++; 840 c = 0x2090; // from LATIN SUBSCRIPT SMALL LETTER A 841 while (c <= 0x209c) // ..to LATIN SUBSCRIPT SMALL LETTER T 842 charset[i++] = c++; 843// 20A0..20CF; Currency Symbols 844 c = 0x20a0; // from EURO-CURRENCY SIGN 845 while (c <= 0x20bf) // ..to BITCOIN SIGN 846 charset[i++] = c++; 847// 20D0..20FF; Combining Diacritical Marks for Symbols 848 c = 0x20d0; // from COMBINING LEFT HARPOON ABOVE 849 while (c <= 0x20f0) // ..to COMBINING ASTERISK ABOVE 850 charset[i++] = c++; 851// 2100..214F; Letterlike Symbols 852 c = 0x2100; // from ACCOUNT OF 853 while (c <= 0x214f) // ..to SYMBOL FOR SAMARITAN SOURCE 854 charset[i++] = c++; 855// 2150..218F; Number Forms 856 c = 0x2150; // from VULGAR FRACTION ONE SEVENTH 857 while (c <= 0x218b) // ..to TURNED DIGIT THREE 858 charset[i++] = c++; 859// 2190..21FF; Arrows 860 c = 0x2190; // from LEFTWARDS ARROW 861 while (c <= 0x21ff) // ..to LEFT RIGHT OPEN-HEADED ARROW 862 charset[i++] = c++; 863// 2200..22FF; Mathematical Operators 864 c = 0x2200; // from FOR ALL 865 while (c <= 0x22ff) // ..to Z NOTATION BAG MEMBERSHIP 866 charset[i++] = c++; 867// 2300..23FF; Miscellaneous Technical 868 c = 0x2300; // from DIAMETER SIGN 869 while (c <= 0x23ff) // ..to OBSERVER EYE SYMBOL 870 charset[i++] = c++; 871// 2400..243F; Control Pictures 872 c = 0x2400; // from SYMBOL FOR NULL 873 while (c <= 0x2426) // ..to SYMBOL FOR SUBSTITUTE FORM TWO 874 charset[i++] = c++; 875// 2440..245F; Optical Character Recognition 876 c = 0x2440; // from OCR HOOK 877 while (c <= 0x244a) // ..to OCR DOUBLE BACKSLASH 878 charset[i++] = c++; 879// 2460..24FF; Enclosed Alphanumerics 880 c = 0x2460; // from CIRCLED DIGIT ONE 881 while (c <= 0x24ff) // ..to NEGATIVE CIRCLED DIGIT ZERO 882 charset[i++] = c++; 883// 2500..257F; Box Drawing 884 c = 0x2500; // from BOX DRAWINGS LIGHT HORIZONTAL 885 while (c <= 0x257f) // ..to BOX DRAWINGS HEAVY UP AND LIGHT DOWN 886 charset[i++] = c++; 887// 2580..259F; Block Elements 888 c = 0x2580; // from UPPER HALF BLOCK 889 while (c <= 0x259f) // ..to QUADRANT UPPER RIGHT AND LOWER LEFT AND LOWER RIGHT 890 charset[i++] = c++; 891// 25A0..25FF; Geometric Shapes 892 c = 0x25a0; // from BLACK SQUARE 893 while (c <= 0x25ff) // ..to LOWER RIGHT TRIANGLE 894 charset[i++] = c++; 895// 2600..26FF; Miscellaneous Symbols 896 c = 0x2600; // from BLACK SUN WITH RAYS 897 while (c <= 0x26ff) // ..to WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE 898 charset[i++] = c++; 899// 2700..27BF; Dingbats 900 c = 0x2700; // from BLACK SAFETY SCISSORS 901 while (c <= 0x27bf) // ..to DOUBLE CURLY LOOP 902 charset[i++] = c++; 903// 27C0..27EF; Miscellaneous Mathematical Symbols-A 904 c = 0x27c0; // from THREE DIMENSIONAL ANGLE 905 while (c <= 0x27ef) // ..to MATHEMATICAL RIGHT FLATTENED PARENTHESIS 906 charset[i++] = c++; 907// 27F0..27FF; Supplemental Arrows-A 908 c = 0x27f0; // from UPWARDS QUADRUPLE ARROW 909 while (c <= 0x27ff) // ..to LONG RIGHTWARDS SQUIGGLE ARROW 910 charset[i++] = c++; 911// 2800..28FF; Braille Patterns 912 c = 0x2800; // from BRAILLE PATTERN BLANK 913 while (c <= 0x28ff) // ..to BRAILLE PATTERN DOTS-12345678 914 charset[i++] = c++; 915// 2900..297F; Supplemental Arrows-B 916 c = 0x2900; // from RIGHTWARDS TWO-HEADED ARROW WITH VERTICAL STROKE 917 while (c <= 0x297f) // ..to DOWN FISH TAIL 918 charset[i++] = c++; 919// 2980..29FF; Miscellaneous Mathematical Symbols-B 920 c = 0x2980; // from TRIPLE VERTICAL BAR DELIMITER 921 while (c <= 0x29ff) // ..to MINY 922 charset[i++] = c++; 923// 2A00..2AFF; Supplemental Mathematical Operators 924 c = 0x2a00; // from N-ARY CIRCLED DOT OPERATOR 925 while (c <= 0x2aff) // ..to N-ARY WHITE VERTICAL BAR 926 charset[i++] = c++; 927// 2B00..2BFF; Miscellaneous Symbols and Arrows 928 c = 0x2b00; // from NORTH EAST WHITE ARROW 929 while (c <= 0x2b73) // ..to DOWNWARDS TRIANGLE-HEADED ARROW TO BAR 930 charset[i++] = c++; 931 c = 0x2b76; // from NORTH WEST TRIANGLE-HEADED ARROW TO BAR 932 while (c <= 0x2b95) // ..to RIGHTWARDS BLACK ARROW 933 charset[i++] = c++; 934 c = 0x2b98; // from THREE-D TOP-LIGHTED LEFTWARDS EQUILATERAL ARROWHEAD 935 while (c <= 0x2bc8) // ..to BLACK MEDIUM RIGHT-POINTING TRIANGLE CENTRED 936 charset[i++] = c++; 937 c = 0x2bca; // from TOP HALF BLACK CIRCLE 938 while (c <= 0x2bfe) // ..to REVERSED RIGHT ANGLE 939 charset[i++] = c++; 940// 2C00..2C5F; Glagolitic 941 c = 0x2c00; // from GLAGOLITIC CAPITAL LETTER AZU 942 while (c <= 0x2c2e) // ..to GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE 943 charset[i++] = c++; 944 c = 0x2c30; // from GLAGOLITIC SMALL LETTER AZU 945 while (c <= 0x2c5e) // ..to GLAGOLITIC SMALL LETTER LATINATE MYSLITE 946 charset[i++] = c++; 947// 2C60..2C7F; Latin Extended-C 948 c = 0x2c60; // from LATIN CAPITAL LETTER L WITH DOUBLE BAR 949 while (c <= 0x2c7f) // ..to LATIN CAPITAL LETTER Z WITH SWASH TAIL 950 charset[i++] = c++; 951// 2C80..2CFF; Coptic 952 c = 0x2c80; // from COPTIC CAPITAL LETTER ALFA 953 while (c <= 0x2cf3) // ..to COPTIC SMALL LETTER BOHAIRIC KHEI 954 charset[i++] = c++; 955 c = 0x2cf9; // from COPTIC OLD NUBIAN FULL STOP 956 while (c <= 0x2cff) // ..to COPTIC MORPHOLOGICAL DIVIDER 957 charset[i++] = c++; 958// 2D00..2D2F; Georgian Supplement 959 c = 0x2d00; // from GEORGIAN SMALL LETTER AN 960 while (c <= 0x2d25) // ..to GEORGIAN SMALL LETTER HOE 961 charset[i++] = c++; 962 c = 0x2d27; // from GEORGIAN SMALL LETTER YN 963 while (c <= 0x2d2d) // ..to GEORGIAN SMALL LETTER AEN 964 charset[i++] = c++; 965// 2D30..2D7F; Tifinagh 966 c = 0x2d30; // from TIFINAGH LETTER YA 967 while (c <= 0x2d67) // ..to TIFINAGH LETTER YO 968 charset[i++] = c++; 969 charset[i++] = 0x2d6f; // TIFINAGH MODIFIER LETTER LABIALIZATION MARK 970 charset[i++] = 0x2d70; // TIFINAGH SEPARATOR MARK 971 charset[i++] = 0x2d7f; // TIFINAGH CONSONANT JOINER 972// 2D80..2DDF; Ethiopic Extended 973 c = 0x2d80; // from ETHIOPIC SYLLABLE LOA 974 while (c <= 0x2d96) // ..to ETHIOPIC SYLLABLE GGWE 975 charset[i++] = c++; 976 c = 0x2da0; // from ETHIOPIC SYLLABLE SSA 977 while (c <= 0x2da6) // ..to ETHIOPIC SYLLABLE SSO 978 charset[i++] = c++; 979 c = 0x2da8; // from ETHIOPIC SYLLABLE CCA 980 while (c <= 0x2dae) // ..to ETHIOPIC SYLLABLE CCO 981 charset[i++] = c++; 982 c = 0x2db0; // from ETHIOPIC SYLLABLE ZZA 983 while (c <= 0x2db6) // ..to ETHIOPIC SYLLABLE ZZO 984 charset[i++] = c++; 985 c = 0x2db8; // from ETHIOPIC SYLLABLE CCHA 986 while (c <= 0x2dbe) // ..to ETHIOPIC SYLLABLE CCHO 987 charset[i++] = c++; 988 c = 0x2dc0; // from ETHIOPIC SYLLABLE QYA 989 while (c <= 0x2dc6) // ..to ETHIOPIC SYLLABLE QYO 990 charset[i++] = c++; 991 c = 0x2dc8; // from ETHIOPIC SYLLABLE KYA 992 while (c <= 0x2dce) // ..to ETHIOPIC SYLLABLE KYO 993 charset[i++] = c++; 994 c = 0x2dd0; // from ETHIOPIC SYLLABLE XYA 995 while (c <= 0x2dd6) // ..to ETHIOPIC SYLLABLE XYO 996 charset[i++] = c++; 997 c = 0x2dd8; // from ETHIOPIC SYLLABLE GYA 998 while (c <= 0x2dde) // ..to ETHIOPIC SYLLABLE GYO 999 charset[i++] = c++; 1000// 2DE0..2DFF; Cyrillic Extended-A 1001 c = 0x2de0; // from COMBINING CYRILLIC LETTER BE 1002 while (c <= 0x2dff) // ..to COMBINING CYRILLIC LETTER IOTIFIED BIG YUS 1003 charset[i++] = c++; 1004// 2E00..2E7F; Supplemental Punctuation 1005 c = 0x2e00; // from RIGHT ANGLE SUBSTITUTION MARKER 1006 while (c <= 0x2e4e) // ..to PUNCTUS ELEVATUS MARK 1007 charset[i++] = c++; 1008// 2E80..2EFF; CJK Radicals Supplement 1009 c = 0x2e80; // from CJK RADICAL REPEAT 1010 while (c <= 0x2e99) // ..to CJK RADICAL RAP 1011 charset[i++] = c++; 1012 c = 0x2e9b; // from CJK RADICAL CHOKE 1013 while (c <= 0x2ef3) // ..to CJK RADICAL C-SIMPLIFIED TURTLE 1014 charset[i++] = c++; 1015// 2F00..2FDF; Kangxi Radicals 1016 c = 0x2f00; // from KANGXI RADICAL ONE 1017 while (c <= 0x2fd5) // ..to KANGXI RADICAL FLUTE 1018 charset[i++] = c++; 1019// 2FF0..2FFF; Ideographic Description Characters 1020 c = 0x2ff0; // from IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT 1021 while (c <= 0x2ffb) // ..to IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID 1022 charset[i++] = c++; 1023// 3000..303F; CJK Symbols and Punctuation 1024 c = 0x3000; // from IDEOGRAPHIC SPACE 1025 while (c <= 0x303f) // ..to IDEOGRAPHIC HALF FILL SPACE 1026 charset[i++] = c++; 1027// 3040..309F; Hiragana 1028 c = 0x3041; // from HIRAGANA LETTER SMALL A 1029 while (c <= 0x3096) // ..to HIRAGANA LETTER SMALL KE 1030 charset[i++] = c++; 1031 c = 0x3099; // from COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK 1032 while (c <= 0x309f) // ..to HIRAGANA DIGRAPH YORI 1033 charset[i++] = c++; 1034// 30A0..30FF; Katakana 1035 c = 0x30a0; // from KATAKANA-HIRAGANA DOUBLE HYPHEN 1036 while (c <= 0x30ff) // ..to KATAKANA DIGRAPH KOTO 1037 charset[i++] = c++; 1038// 3100..312F; Bopomofo 1039 c = 0x3105; // from BOPOMOFO LETTER B 1040 while (c <= 0x312f) // ..to BOPOMOFO LETTER NN 1041 charset[i++] = c++; 1042// 3130..318F; Hangul Compatibility Jamo 1043 c = 0x3131; // from HANGUL LETTER KIYEOK 1044 while (c <= 0x318e) // ..to HANGUL LETTER ARAEAE 1045 charset[i++] = c++; 1046// 3190..319F; Kanbun 1047 c = 0x3190; // from IDEOGRAPHIC ANNOTATION LINKING MARK 1048 while (c <= 0x319f) // ..to IDEOGRAPHIC ANNOTATION MAN MARK 1049 charset[i++] = c++; 1050// 31A0..31BF; Bopomofo Extended 1051 c = 0x31a0; // from BOPOMOFO LETTER BU 1052 while (c <= 0x31ba) // ..to BOPOMOFO LETTER ZY 1053 charset[i++] = c++; 1054// 31C0..31EF; CJK Strokes 1055 c = 0x31c0; // from CJK STROKE T 1056 while (c <= 0x31e3) // ..to CJK STROKE Q 1057 charset[i++] = c++; 1058// 31F0..31FF; Katakana Phonetic Extensions 1059 c = 0x31f0; // from KATAKANA LETTER SMALL KU 1060 while (c <= 0x31ff) // ..to KATAKANA LETTER SMALL RO 1061 charset[i++] = c++; 1062// 3200..32FF; Enclosed CJK Letters and Months 1063 c = 0x3200; // from PARENTHESIZED HANGUL KIYEOK 1064 while (c <= 0x321e) // ..to PARENTHESIZED KOREAN CHARACTER O HU 1065 charset[i++] = c++; 1066 c = 0x3220; // from PARENTHESIZED IDEOGRAPH ONE 1067 while (c <= 0x32fe) // ..to CIRCLED KATAKANA WO 1068 charset[i++] = c++; 1069// 3300..33FF; CJK Compatibility 1070 c = 0x3300; // from SQUARE APAATO 1071 while (c <= 0x33ff) // ..to SQUARE GAL 1072 charset[i++] = c++; 1073// 3400..4DBF; CJK Unified Ideographs Extension A 1074 c = 0x3400; // from <CJK Ideograph Extension A, First> 1075 while (c <= 0x4db5) // ..to <CJK Ideograph Extension A, Last> 1076 charset[i++] = c++; 1077// 4DC0..4DFF; Yijing Hexagram Symbols 1078 c = 0x4dc0; // from HEXAGRAM FOR THE CREATIVE HEAVEN 1079 while (c <= 0x4dff) // ..to HEXAGRAM FOR BEFORE COMPLETION 1080 charset[i++] = c++; 1081// 4E00..9FFF; CJK Unified Ideographs 1082 c = 0x4e00; // from <CJK Ideograph, First> 1083 while (c <= 0x9fef) // ..to <CJK Ideograph, Last> 1084 charset[i++] = c++; 1085// A000..A48F; Yi Syllables 1086 c = 0xa000; // from YI SYLLABLE IT 1087 while (c <= 0xa48c) // ..to YI SYLLABLE YYR 1088 charset[i++] = c++; 1089// A490..A4CF; Yi Radicals 1090 c = 0xa490; // from YI RADICAL QOT 1091 while (c <= 0xa4c6) // ..to YI RADICAL KE 1092 charset[i++] = c++; 1093// A4D0..A4FF; Lisu 1094 c = 0xa4d0; // from LISU LETTER BA 1095 while (c <= 0xa4ff) // ..to LISU PUNCTUATION FULL STOP 1096 charset[i++] = c++; 1097// A500..A63F; Vai 1098 c = 0xa500; // from VAI SYLLABLE EE 1099 while (c <= 0xa62b) // ..to VAI SYLLABLE NDOLE DO 1100 charset[i++] = c++; 1101// A640..A69F; Cyrillic Extended-B 1102 c = 0xa640; // from CYRILLIC CAPITAL LETTER ZEMLYA 1103 while (c <= 0xa69f) // ..to COMBINING CYRILLIC LETTER IOTIFIED E 1104 charset[i++] = c++; 1105// A6A0..A6FF; Bamum 1106 c = 0xa6a0; // from BAMUM LETTER A 1107 while (c <= 0xa6f7) // ..to BAMUM QUESTION MARK 1108 charset[i++] = c++; 1109// A700..A71F; Modifier Tone Letters 1110 c = 0xa700; // from MODIFIER LETTER CHINESE TONE YIN PING 1111 while (c <= 0xa71f) // ..to MODIFIER LETTER LOW INVERTED EXCLAMATION MARK 1112 charset[i++] = c++; 1113// A720..A7FF; Latin Extended-D 1114 c = 0xa720; // from MODIFIER LETTER STRESS AND HIGH TONE 1115 while (c <= 0xa7b9) // ..to LATIN SMALL LETTER U WITH STROKE 1116 charset[i++] = c++; 1117 c = 0xa7f7; // from LATIN EPIGRAPHIC LETTER SIDEWAYS I 1118 while (c <= 0xa7ff) // ..to LATIN EPIGRAPHIC LETTER ARCHAIC M 1119 charset[i++] = c++; 1120// A800..A82F; Syloti Nagri 1121 c = 0xa800; // from SYLOTI NAGRI LETTER A 1122 while (c <= 0xa82b) // ..to SYLOTI NAGRI POETRY MARK-4 1123 charset[i++] = c++; 1124// A830..A83F; Common Indic Number Forms 1125 c = 0xa830; // from NORTH INDIC FRACTION ONE QUARTER 1126 while (c <= 0xa839) // ..to NORTH INDIC QUANTITY MARK 1127 charset[i++] = c++; 1128// A840..A87F; Phags-pa 1129 c = 0xa840; // from PHAGS-PA LETTER KA 1130 while (c <= 0xa877) // ..to PHAGS-PA MARK DOUBLE SHAD 1131 charset[i++] = c++; 1132// A880..A8DF; Saurashtra 1133 c = 0xa880; // from SAURASHTRA SIGN ANUSVARA 1134 while (c <= 0xa8c5) // ..to SAURASHTRA SIGN CANDRABINDU 1135 charset[i++] = c++; 1136 c = 0xa8ce; // from SAURASHTRA DANDA 1137 while (c <= 0xa8d9) // ..to SAURASHTRA DIGIT NINE 1138 charset[i++] = c++; 1139// A8E0..A8FF; Devanagari Extended 1140 c = 0xa8e0; // from COMBINING DEVANAGARI DIGIT ZERO 1141 while (c <= 0xa8ff) // ..to DEVANAGARI VOWEL SIGN AY 1142 charset[i++] = c++; 1143// A900..A92F; Kayah Li 1144 c = 0xa900; // from KAYAH LI DIGIT ZERO 1145 while (c <= 0xa92f) // ..to KAYAH LI SIGN SHYA 1146 charset[i++] = c++; 1147// A930..A95F; Rejang 1148 c = 0xa930; // from REJANG LETTER KA 1149 while (c <= 0xa953) // ..to REJANG VIRAMA 1150 charset[i++] = c++; 1151 charset[i++] = 0xa95f; // REJANG SECTION MARK 1152// A960..A97F; Hangul Jamo Extended-A 1153 c = 0xa960; // from HANGUL CHOSEONG TIKEUT-MIEUM 1154 while (c <= 0xa97c) // ..to HANGUL CHOSEONG SSANGYEORINHIEUH 1155 charset[i++] = c++; 1156// A980..A9DF; Javanese 1157 c = 0xa980; // from JAVANESE SIGN PANYANGGA 1158 while (c <= 0xa9cd) // ..to JAVANESE TURNED PADA PISELEH 1159 charset[i++] = c++; 1160 c = 0xa9cf; // from JAVANESE PANGRANGKEP 1161 while (c <= 0xa9d9) // ..to JAVANESE DIGIT NINE 1162 charset[i++] = c++; 1163 charset[i++] = 0xa9de; // JAVANESE PADA TIRTA TUMETES 1164 charset[i++] = 0xa9df; // JAVANESE PADA ISEN-ISEN 1165// A9E0..A9FF; Myanmar Extended-B 1166 c = 0xa9e0; // from MYANMAR LETTER SHAN GHA 1167 while (c <= 0xa9fe) // ..to MYANMAR LETTER TAI LAING BHA 1168 charset[i++] = c++; 1169// AA00..AA5F; Cham 1170 c = 0xaa00; // from CHAM LETTER A 1171 while (c <= 0xaa36) // ..to CHAM CONSONANT SIGN WA 1172 charset[i++] = c++; 1173 c = 0xaa40; // from CHAM LETTER FINAL K 1174 while (c <= 0xaa4d) // ..to CHAM CONSONANT SIGN FINAL H 1175 charset[i++] = c++; 1176 c = 0xaa50; // from CHAM DIGIT ZERO 1177 while (c <= 0xaa59) // ..to CHAM DIGIT NINE 1178 charset[i++] = c++; 1179 c = 0xaa5c; // from CHAM PUNCTUATION SPIRAL 1180 while (c <= 0xaa5f) // ..to CHAM PUNCTUATION TRIPLE DANDA 1181 charset[i++] = c++; 1182// AA60..AA7F; Myanmar Extended-A 1183 c = 0xaa60; // from MYANMAR LETTER KHAMTI GA 1184 while (c <= 0xaa7f) // ..to MYANMAR LETTER SHWE PALAUNG SHA 1185 charset[i++] = c++; 1186// AA80..AADF; Tai Viet 1187 c = 0xaa80; // from TAI VIET LETTER LOW KO 1188 while (c <= 0xaac2) // ..to TAI VIET TONE MAI SONG 1189 charset[i++] = c++; 1190 c = 0xaadb; // from TAI VIET SYMBOL KON 1191 while (c <= 0xaadf) // ..to TAI VIET SYMBOL KOI KOI 1192 charset[i++] = c++; 1193// AAE0..AAFF; Meetei Mayek Extensions 1194 c = 0xaae0; // from MEETEI MAYEK LETTER E 1195 while (c <= 0xaaf6) // ..to MEETEI MAYEK VIRAMA 1196 charset[i++] = c++; 1197// AB00..AB2F; Ethiopic Extended-A 1198 c = 0xab01; // from ETHIOPIC SYLLABLE TTHU 1199 while (c <= 0xab06) // ..to ETHIOPIC SYLLABLE TTHO 1200 charset[i++] = c++; 1201 c = 0xab09; // from ETHIOPIC SYLLABLE DDHU 1202 while (c <= 0xab0e) // ..to ETHIOPIC SYLLABLE DDHO 1203 charset[i++] = c++; 1204 c = 0xab11; // from ETHIOPIC SYLLABLE DZU 1205 while (c <= 0xab16) // ..to ETHIOPIC SYLLABLE DZO 1206 charset[i++] = c++; 1207 c = 0xab20; // from ETHIOPIC SYLLABLE CCHHA 1208 while (c <= 0xab26) // ..to ETHIOPIC SYLLABLE CCHHO 1209 charset[i++] = c++; 1210 c = 0xab28; // from ETHIOPIC SYLLABLE BBA 1211 while (c <= 0xab2e) // ..to ETHIOPIC SYLLABLE BBO 1212 charset[i++] = c++; 1213// AB30..AB6F; Latin Extended-E 1214 c = 0xab30; // from LATIN SMALL LETTER BARRED ALPHA 1215 while (c <= 0xab65) // ..to GREEK LETTER SMALL CAPITAL OMEGA 1216 charset[i++] = c++; 1217// AB70..ABBF; Cherokee Supplement 1218 c = 0xab70; // from CHEROKEE SMALL LETTER A 1219 while (c <= 0xabbf) // ..to CHEROKEE SMALL LETTER YA 1220 charset[i++] = c++; 1221// ABC0..ABFF; Meetei Mayek 1222 c = 0xabc0; // from MEETEI MAYEK LETTER KOK 1223 while (c <= 0xabed) // ..to MEETEI MAYEK APUN IYEK 1224 charset[i++] = c++; 1225 c = 0xabf0; // from MEETEI MAYEK DIGIT ZERO 1226 while (c <= 0xabf9) // ..to MEETEI MAYEK DIGIT NINE 1227 charset[i++] = c++; 1228// AC00..D7AF; Hangul Syllables 1229 c = 0xac00; // from <Hangul Syllable, First> 1230 while (c <= 0xd7a3) // ..to <Hangul Syllable, Last> 1231 charset[i++] = c++; 1232// D7B0..D7FF; Hangul Jamo Extended-B 1233 c = 0xd7b0; // from HANGUL JUNGSEONG O-YEO 1234 while (c <= 0xd7c6) // ..to HANGUL JUNGSEONG ARAEA-E 1235 charset[i++] = c++; 1236 c = 0xd7cb; // from HANGUL JONGSEONG NIEUN-RIEUL 1237 while (c <= 0xd7fb) // ..to HANGUL JONGSEONG PHIEUPH-THIEUTH 1238 charset[i++] = c++; 1239// D800..DB7F; High Surrogates 1240// DB80..DBFF; High Private Use Surrogates 1241// DC00..DFFF; Low Surrogates 1242// E000..F8FF; Private Use Area 1243// F900..FAFF; CJK Compatibility Ideographs 1244 c = 0xf900; // from CJK COMPATIBILITY IDEOGRAPH-F900 1245 while (c <= 0xfa6d) // ..to CJK COMPATIBILITY IDEOGRAPH-FA6D 1246 charset[i++] = c++; 1247 c = 0xfa70; // from CJK COMPATIBILITY IDEOGRAPH-FA70 1248 while (c <= 0xfad9) // ..to CJK COMPATIBILITY IDEOGRAPH-FAD9 1249 charset[i++] = c++; 1250// FB00..FB4F; Alphabetic Presentation Forms 1251 c = 0xfb00; // from LATIN SMALL LIGATURE FF 1252 while (c <= 0xfb06) // ..to LATIN SMALL LIGATURE ST 1253 charset[i++] = c++; 1254 c = 0xfb13; // from ARMENIAN SMALL LIGATURE MEN NOW 1255 while (c <= 0xfb17) // ..to ARMENIAN SMALL LIGATURE MEN XEH 1256 charset[i++] = c++; 1257 c = 0xfb1d; // from HEBREW LETTER YOD WITH HIRIQ 1258 while (c <= 0xfb36) // ..to HEBREW LETTER ZAYIN WITH DAGESH 1259 charset[i++] = c++; 1260 c = 0xfb38; // from HEBREW LETTER TET WITH DAGESH 1261 while (c <= 0xfb3c) // ..to HEBREW LETTER LAMED WITH DAGESH 1262 charset[i++] = c++; 1263 charset[i++] = 0xfb40; // HEBREW LETTER NUN WITH DAGESH 1264 charset[i++] = 0xfb41; // HEBREW LETTER SAMEKH WITH DAGESH 1265 charset[i++] = 0xfb43; // HEBREW LETTER FINAL PE WITH DAGESH 1266 charset[i++] = 0xfb44; // HEBREW LETTER PE WITH DAGESH 1267 c = 0xfb46; // from HEBREW LETTER TSADI WITH DAGESH 1268 while (c <= 0xfb4f) // ..to HEBREW LIGATURE ALEF LAMED 1269 charset[i++] = c++; 1270// FB50..FDFF; Arabic Presentation Forms-A 1271 c = 0xfb50; // from ARABIC LETTER ALEF WASLA ISOLATED FORM 1272 while (c <= 0xfbc1) // ..to ARABIC SYMBOL SMALL TAH BELOW 1273 charset[i++] = c++; 1274 c = 0xfbd3; // from ARABIC LETTER NG ISOLATED FORM 1275 while (c <= 0xfd3f) // ..to ORNATE RIGHT PARENTHESIS 1276 charset[i++] = c++; 1277 c = 0xfd50; // from ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM 1278 while (c <= 0xfd8f) // ..to ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM 1279 charset[i++] = c++; 1280 c = 0xfd92; // from ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM 1281 while (c <= 0xfdc7) // ..to ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM 1282 charset[i++] = c++; 1283 c = 0xfdf0; // from ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM 1284 while (c <= 0xfdfd) // ..to ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM 1285 charset[i++] = c++; 1286// FE00..FE0F; Variation Selectors 1287 c = 0xfe00; // from VARIATION SELECTOR-1 1288 while (c <= 0xfe0f) // ..to VARIATION SELECTOR-16 1289 charset[i++] = c++; 1290// FE10..FE1F; Vertical Forms 1291 c = 0xfe10; // from PRESENTATION FORM FOR VERTICAL COMMA 1292 while (c <= 0xfe19) // ..to PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS 1293 charset[i++] = c++; 1294// FE20..FE2F; Combining Half Marks 1295 c = 0xfe20; // from COMBINING LIGATURE LEFT HALF 1296 while (c <= 0xfe2f) // ..to COMBINING CYRILLIC TITLO RIGHT HALF 1297 charset[i++] = c++; 1298// FE30..FE4F; CJK Compatibility Forms 1299 c = 0xfe30; // from PRESENTATION FORM FOR VERTICAL TWO DOT LEADER 1300 while (c <= 0xfe4f) // ..to WAVY LOW LINE 1301 charset[i++] = c++; 1302// FE50..FE6F; Small Form Variants 1303 charset[i++] = 0xfe50; // SMALL COMMA 1304 charset[i++] = 0xfe52; // SMALL FULL STOP 1305 c = 0xfe54; // from SMALL SEMICOLON 1306 while (c <= 0xfe66) // ..to SMALL EQUALS SIGN 1307 charset[i++] = c++; 1308 c = 0xfe68; // from SMALL REVERSE SOLIDUS 1309 while (c <= 0xfe6b) // ..to SMALL COMMERCIAL AT 1310 charset[i++] = c++; 1311// FE70..FEFF; Arabic Presentation Forms-B 1312 c = 0xfe70; // from ARABIC FATHATAN ISOLATED FORM 1313 while (c <= 0xfe74) // ..to ARABIC KASRATAN ISOLATED FORM 1314 charset[i++] = c++; 1315 c = 0xfe76; // from ARABIC FATHA ISOLATED FORM 1316 while (c <= 0xfefc) // ..to ARABIC LIGATURE LAM WITH ALEF FINAL FORM 1317 charset[i++] = c++; 1318 charset[i++] = 0xfeff; // ZERO WIDTH NO-BREAK SPACE 1319// FF00..FFEF; Halfwidth and Fullwidth Forms 1320 c = 0xff01; // from FULLWIDTH EXCLAMATION MARK 1321 while (c <= 0xffbe) // ..to HALFWIDTH HANGUL LETTER HIEUH 1322 charset[i++] = c++; 1323 c = 0xffc2; // from HALFWIDTH HANGUL LETTER A 1324 while (c <= 0xffc7) // ..to HALFWIDTH HANGUL LETTER E 1325 charset[i++] = c++; 1326 c = 0xffca; // from HALFWIDTH HANGUL LETTER YEO 1327 while (c <= 0xffcf) // ..to HALFWIDTH HANGUL LETTER OE 1328 charset[i++] = c++; 1329 c = 0xffd2; // from HALFWIDTH HANGUL LETTER YO 1330 while (c <= 0xffd7) // ..to HALFWIDTH HANGUL LETTER YU 1331 charset[i++] = c++; 1332 charset[i++] = 0xffda; // HALFWIDTH HANGUL LETTER EU 1333 charset[i++] = 0xffdc; // HALFWIDTH HANGUL LETTER I 1334 c = 0xffe0; // from FULLWIDTH CENT SIGN 1335 while (c <= 0xffe6) // ..to FULLWIDTH WON SIGN 1336 charset[i++] = c++; 1337 c = 0xffe8; // from HALFWIDTH FORMS LIGHT VERTICAL 1338 while (c <= 0xffee) // ..to HALFWIDTH WHITE CIRCLE 1339 charset[i++] = c++; 1340// FFF0..FFFF; Specials 1341 c = 0xfff9; // from INTERLINEAR ANNOTATION ANCHOR 1342 while (c <= 0xfffd) // ..to REPLACEMENT CHARACTER 1343 charset[i++] = c++; 1344// 10000..1007F; Linear B Syllabary 1345 c = 0x10000; // from LINEAR B SYLLABLE B008 A 1346 while (c <= 0x1000b) // ..to LINEAR B SYLLABLE B046 JE 1347 charset[i++] = c++; 1348 c = 0x1000d; // from LINEAR B SYLLABLE B036 JO 1349 while (c <= 0x10026) // ..to LINEAR B SYLLABLE B032 QO 1350 charset[i++] = c++; 1351 c = 0x10028; // from LINEAR B SYLLABLE B060 RA 1352 while (c <= 0x1003a) // ..to LINEAR B SYLLABLE B042 WO 1353 charset[i++] = c++; 1354 charset[i++] = 0x1003c; // LINEAR B SYLLABLE B017 ZA 1355 charset[i++] = 0x1003d; // LINEAR B SYLLABLE B074 ZE 1356 c = 0x1003f; // from LINEAR B SYLLABLE B020 ZO 1357 while (c <= 0x1004d) // ..to LINEAR B SYLLABLE B091 TWO 1358 charset[i++] = c++; 1359 c = 0x10050; // from LINEAR B SYMBOL B018 1360 while (c <= 0x1005d) // ..to LINEAR B SYMBOL B089 1361 charset[i++] = c++; 1362// 10080..100FF; Linear B Ideograms 1363 c = 0x10080; // from LINEAR B IDEOGRAM B100 MAN 1364 while (c <= 0x100fa) // ..to LINEAR B IDEOGRAM VESSEL B305 1365 charset[i++] = c++; 1366// 10100..1013F; Aegean Numbers 1367 charset[i++] = 0x10100; // AEGEAN WORD SEPARATOR LINE 1368 charset[i++] = 0x10102; // AEGEAN CHECK MARK 1369 c = 0x10107; // from AEGEAN NUMBER ONE 1370 while (c <= 0x10133) // ..to AEGEAN NUMBER NINETY THOUSAND 1371 charset[i++] = c++; 1372 c = 0x10137; // from AEGEAN WEIGHT BASE UNIT 1373 while (c <= 0x1013f) // ..to AEGEAN MEASURE THIRD SUBUNIT 1374 charset[i++] = c++; 1375// 10140..1018F; Ancient Greek Numbers 1376 c = 0x10140; // from GREEK ACROPHONIC ATTIC ONE QUARTER 1377 while (c <= 0x1018e) // ..to NOMISMA SIGN 1378 charset[i++] = c++; 1379// 10190..101CF; Ancient Symbols 1380 c = 0x10190; // from ROMAN SEXTANS SIGN 1381 while (c <= 0x1019b) // ..to ROMAN CENTURIAL SIGN 1382 charset[i++] = c++; 1383 charset[i++] = 0x101a0; // GREEK SYMBOL TAU RHO 1384// 101D0..101FF; Phaistos Disc 1385 c = 0x101d0; // from PHAISTOS DISC SIGN PEDESTRIAN 1386 while (c <= 0x101fd) // ..to PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE 1387 charset[i++] = c++; 1388// 10280..1029F; Lycian 1389 c = 0x10280; // from LYCIAN LETTER A 1390 while (c <= 0x1029c) // ..to LYCIAN LETTER X 1391 charset[i++] = c++; 1392// 102A0..102DF; Carian 1393 c = 0x102a0; // from CARIAN LETTER A 1394 while (c <= 0x102d0) // ..to CARIAN LETTER UUU3 1395 charset[i++] = c++; 1396// 102E0..102FF; Coptic Epact Numbers 1397 c = 0x102e0; // from COPTIC EPACT THOUSANDS MARK 1398 while (c <= 0x102fb) // ..to COPTIC EPACT NUMBER NINE HUNDRED 1399 charset[i++] = c++; 1400// 10300..1032F; Old Italic 1401 c = 0x10300; // from OLD ITALIC LETTER A 1402 while (c <= 0x10323) // ..to OLD ITALIC NUMERAL FIFTY 1403 charset[i++] = c++; 1404 charset[i++] = 0x1032d; // OLD ITALIC LETTER YE 1405 charset[i++] = 0x1032f; // OLD ITALIC LETTER SOUTHERN TSE 1406// 10330..1034F; Gothic 1407 c = 0x10330; // from GOTHIC LETTER AHSA 1408 while (c <= 0x1034a) // ..to GOTHIC LETTER NINE HUNDRED 1409 charset[i++] = c++; 1410// 10350..1037F; Old Permic 1411 c = 0x10350; // from OLD PERMIC LETTER AN 1412 while (c <= 0x1037a) // ..to COMBINING OLD PERMIC LETTER SII 1413 charset[i++] = c++; 1414// 10380..1039F; Ugaritic 1415 c = 0x10380; // from UGARITIC LETTER ALPA 1416 while (c <= 0x1039d) // ..to UGARITIC LETTER SSU 1417 charset[i++] = c++; 1418 charset[i++] = 0x1039f; // UGARITIC WORD DIVIDER 1419// 103A0..103DF; Old Persian 1420 c = 0x103a0; // from OLD PERSIAN SIGN A 1421 while (c <= 0x103c3) // ..to OLD PERSIAN SIGN HA 1422 charset[i++] = c++; 1423 c = 0x103c8; // from OLD PERSIAN SIGN AURAMAZDAA 1424 while (c <= 0x103d5) // ..to OLD PERSIAN NUMBER HUNDRED 1425 charset[i++] = c++; 1426// 10400..1044F; Deseret 1427 c = 0x10400; // from DESERET CAPITAL LETTER LONG I 1428 while (c <= 0x1044f) // ..to DESERET SMALL LETTER EW 1429 charset[i++] = c++; 1430// 10450..1047F; Shavian 1431 c = 0x10450; // from SHAVIAN LETTER PEEP 1432 while (c <= 0x1047f) // ..to SHAVIAN LETTER YEW 1433 charset[i++] = c++; 1434// 10480..104AF; Osmanya 1435 c = 0x10480; // from OSMANYA LETTER ALEF 1436 while (c <= 0x1049d) // ..to OSMANYA LETTER OO 1437 charset[i++] = c++; 1438 c = 0x104a0; // from OSMANYA DIGIT ZERO 1439 while (c <= 0x104a9) // ..to OSMANYA DIGIT NINE 1440 charset[i++] = c++; 1441// 104B0..104FF; Osage 1442 c = 0x104b0; // from OSAGE CAPITAL LETTER A 1443 while (c <= 0x104d3) // ..to OSAGE CAPITAL LETTER ZHA 1444 charset[i++] = c++; 1445 c = 0x104d8; // from OSAGE SMALL LETTER A 1446 while (c <= 0x104fb) // ..to OSAGE SMALL LETTER ZHA 1447 charset[i++] = c++; 1448// 10500..1052F; Elbasan 1449 c = 0x10500; // from ELBASAN LETTER A 1450 while (c <= 0x10527) // ..to ELBASAN LETTER KHE 1451 charset[i++] = c++; 1452// 10530..1056F; Caucasian Albanian 1453 c = 0x10530; // from CAUCASIAN ALBANIAN LETTER ALT 1454 while (c <= 0x10563) // ..to CAUCASIAN ALBANIAN LETTER KIW 1455 charset[i++] = c++; 1456 charset[i++] = 0x1056f; // CAUCASIAN ALBANIAN CITATION MARK 1457// 10600..1077F; Linear A 1458 c = 0x10600; // from LINEAR A SIGN AB001 1459 while (c <= 0x10736) // ..to LINEAR A SIGN A664 1460 charset[i++] = c++; 1461 c = 0x10740; // from LINEAR A SIGN A701 A 1462 while (c <= 0x10755) // ..to LINEAR A SIGN A732 JE 1463 charset[i++] = c++; 1464 c = 0x10760; // from LINEAR A SIGN A800 1465 while (c <= 0x10767) // ..to LINEAR A SIGN A807 1466 charset[i++] = c++; 1467// 10800..1083F; Cypriot Syllabary 1468 c = 0x10800; // from CYPRIOT SYLLABLE A 1469 while (c <= 0x10805) // ..to CYPRIOT SYLLABLE JA 1470 charset[i++] = c++; 1471 c = 0x1080a; // from CYPRIOT SYLLABLE KA 1472 while (c <= 0x10835) // ..to CYPRIOT SYLLABLE WO 1473 charset[i++] = c++; 1474 charset[i++] = 0x10837; // CYPRIOT SYLLABLE XA 1475 charset[i++] = 0x10838; // CYPRIOT SYLLABLE XE 1476 c = 0x1083c; // from CYPRIOT SYLLABLE ZA 1477 while (c <= 0x1083f) // ..to CYPRIOT SYLLABLE ZO 1478 charset[i++] = c++; 1479// 10840..1085F; Imperial Aramaic 1480 c = 0x10840; // from IMPERIAL ARAMAIC LETTER ALEPH 1481 while (c <= 0x10855) // ..to IMPERIAL ARAMAIC LETTER TAW 1482 charset[i++] = c++; 1483 c = 0x10857; // from IMPERIAL ARAMAIC SECTION SIGN 1484 while (c <= 0x1085f) // ..to IMPERIAL ARAMAIC NUMBER TEN THOUSAND 1485 charset[i++] = c++; 1486// 10860..1087F; Palmyrene 1487 c = 0x10860; // from PALMYRENE LETTER ALEPH 1488 while (c <= 0x1087f) // ..to PALMYRENE NUMBER TWENTY 1489 charset[i++] = c++; 1490// 10880..108AF; Nabataean 1491 c = 0x10880; // from NABATAEAN LETTER FINAL ALEPH 1492 while (c <= 0x1089e) // ..to NABATAEAN LETTER TAW 1493 charset[i++] = c++; 1494 c = 0x108a7; // from NABATAEAN NUMBER ONE 1495 while (c <= 0x108af) // ..to NABATAEAN NUMBER ONE HUNDRED 1496 charset[i++] = c++; 1497// 108E0..108FF; Hatran 1498 c = 0x108e0; // from HATRAN LETTER ALEPH 1499 while (c <= 0x108f2) // ..to HATRAN LETTER QOPH 1500 charset[i++] = c++; 1501 charset[i++] = 0x108f4; // HATRAN LETTER SHIN 1502 charset[i++] = 0x108f5; // HATRAN LETTER TAW 1503 c = 0x108fb; // from HATRAN NUMBER ONE 1504 while (c <= 0x108ff) // ..to HATRAN NUMBER ONE HUNDRED 1505 charset[i++] = c++; 1506// 10900..1091F; Phoenician 1507 c = 0x10900; // from PHOENICIAN LETTER ALF 1508 while (c <= 0x1091b) // ..to PHOENICIAN NUMBER THREE 1509 charset[i++] = c++; 1510 charset[i++] = 0x1091f; // PHOENICIAN WORD SEPARATOR 1511// 10920..1093F; Lydian 1512 c = 0x10920; // from LYDIAN LETTER A 1513 while (c <= 0x10939) // ..to LYDIAN LETTER C 1514 charset[i++] = c++; 1515 charset[i++] = 0x1093f; // LYDIAN TRIANGULAR MARK 1516// 10980..1099F; Meroitic Hieroglyphs 1517 c = 0x10980; // from MEROITIC HIEROGLYPHIC LETTER A 1518 while (c <= 0x1099f) // ..to MEROITIC HIEROGLYPHIC SYMBOL VIDJ-2 1519 charset[i++] = c++; 1520// 109A0..109FF; Meroitic Cursive 1521 c = 0x109a0; // from MEROITIC CURSIVE LETTER A 1522 while (c <= 0x109b7) // ..to MEROITIC CURSIVE LETTER DA 1523 charset[i++] = c++; 1524 c = 0x109bc; // from MEROITIC CURSIVE FRACTION ELEVEN TWELFTHS 1525 while (c <= 0x109cf) // ..to MEROITIC CURSIVE NUMBER SEVENTY 1526 charset[i++] = c++; 1527 c = 0x109d2; // from MEROITIC CURSIVE NUMBER ONE HUNDRED 1528 while (c <= 0x109ff) // ..to MEROITIC CURSIVE FRACTION TEN TWELFTHS 1529 charset[i++] = c++; 1530// 10A00..10A5F; Kharoshthi 1531 c = 0x10a00; // from KHAROSHTHI LETTER A 1532 while (c <= 0x10a03) // ..to KHAROSHTHI VOWEL SIGN VOCALIC R 1533 charset[i++] = c++; 1534 charset[i++] = 0x10a05; // KHAROSHTHI VOWEL SIGN E 1535 charset[i++] = 0x10a06; // KHAROSHTHI VOWEL SIGN O 1536 c = 0x10a0c; // from KHAROSHTHI VOWEL LENGTH MARK 1537 while (c <= 0x10a13) // ..to KHAROSHTHI LETTER GHA 1538 charset[i++] = c++; 1539 charset[i++] = 0x10a15; // KHAROSHTHI LETTER CA 1540 charset[i++] = 0x10a17; // KHAROSHTHI LETTER JA 1541 c = 0x10a19; // from KHAROSHTHI LETTER NYA 1542 while (c <= 0x10a35) // ..to KHAROSHTHI LETTER VHA 1543 charset[i++] = c++; 1544 charset[i++] = 0x10a38; // KHAROSHTHI SIGN BAR ABOVE 1545 charset[i++] = 0x10a3a; // KHAROSHTHI SIGN DOT BELOW 1546 c = 0x10a3f; // from KHAROSHTHI VIRAMA 1547 while (c <= 0x10a48) // ..to KHAROSHTHI FRACTION ONE HALF 1548 charset[i++] = c++; 1549 c = 0x10a50; // from KHAROSHTHI PUNCTUATION DOT 1550 while (c <= 0x10a58) // ..to KHAROSHTHI PUNCTUATION LINES 1551 charset[i++] = c++; 1552// 10A60..10A7F; Old South Arabian 1553 c = 0x10a60; // from OLD SOUTH ARABIAN LETTER HE 1554 while (c <= 0x10a7f) // ..to OLD SOUTH ARABIAN NUMERIC INDICATOR 1555 charset[i++] = c++; 1556// 10A80..10A9F; Old North Arabian 1557 c = 0x10a80; // from OLD NORTH ARABIAN LETTER HEH 1558 while (c <= 0x10a9f) // ..to OLD NORTH ARABIAN NUMBER TWENTY 1559 charset[i++] = c++; 1560// 10AC0..10AFF; Manichaean 1561 c = 0x10ac0; // from MANICHAEAN LETTER ALEPH 1562 while (c <= 0x10ae6) // ..to MANICHAEAN ABBREVIATION MARK BELOW 1563 charset[i++] = c++; 1564 c = 0x10aeb; // from MANICHAEAN NUMBER ONE 1565 while (c <= 0x10af6) // ..to MANICHAEAN PUNCTUATION LINE FILLER 1566 charset[i++] = c++; 1567// 10B00..10B3F; Avestan 1568 c = 0x10b00; // from AVESTAN LETTER A 1569 while (c <= 0x10b35) // ..to AVESTAN LETTER HE 1570 charset[i++] = c++; 1571 c = 0x10b39; // from AVESTAN ABBREVIATION MARK 1572 while (c <= 0x10b3f) // ..to LARGE ONE RING OVER TWO RINGS PUNCTUATION 1573 charset[i++] = c++; 1574// 10B40..10B5F; Inscriptional Parthian 1575 c = 0x10b40; // from INSCRIPTIONAL PARTHIAN LETTER ALEPH 1576 while (c <= 0x10b55) // ..to INSCRIPTIONAL PARTHIAN LETTER TAW 1577 charset[i++] = c++; 1578 c = 0x10b58; // from INSCRIPTIONAL PARTHIAN NUMBER ONE 1579 while (c <= 0x10b5f) // ..to INSCRIPTIONAL PARTHIAN NUMBER ONE THOUSAND 1580 charset[i++] = c++; 1581// 10B60..10B7F; Inscriptional Pahlavi 1582 c = 0x10b60; // from INSCRIPTIONAL PAHLAVI LETTER ALEPH 1583 while (c <= 0x10b72) // ..to INSCRIPTIONAL PAHLAVI LETTER TAW 1584 charset[i++] = c++; 1585 c = 0x10b78; // from INSCRIPTIONAL PAHLAVI NUMBER ONE 1586 while (c <= 0x10b7f) // ..to INSCRIPTIONAL PAHLAVI NUMBER ONE THOUSAND 1587 charset[i++] = c++; 1588// 10B80..10BAF; Psalter Pahlavi 1589 c = 0x10b80; // from PSALTER PAHLAVI LETTER ALEPH 1590 while (c <= 0x10b91) // ..to PSALTER PAHLAVI LETTER TAW 1591 charset[i++] = c++; 1592 c = 0x10b99; // from PSALTER PAHLAVI SECTION MARK 1593 while (c <= 0x10b9c) // ..to PSALTER PAHLAVI FOUR DOTS WITH DOT 1594 charset[i++] = c++; 1595 c = 0x10ba9; // from PSALTER PAHLAVI NUMBER ONE 1596 while (c <= 0x10baf) // ..to PSALTER PAHLAVI NUMBER ONE HUNDRED 1597 charset[i++] = c++; 1598// 10C00..10C4F; Old Turkic 1599 c = 0x10c00; // from OLD TURKIC LETTER ORKHON A 1600 while (c <= 0x10c48) // ..to OLD TURKIC LETTER ORKHON BASH 1601 charset[i++] = c++; 1602// 10C80..10CFF; Old Hungarian 1603 c = 0x10c80; // from OLD HUNGARIAN CAPITAL LETTER A 1604 while (c <= 0x10cb2) // ..to OLD HUNGARIAN CAPITAL LETTER US 1605 charset[i++] = c++; 1606 c = 0x10cc0; // from OLD HUNGARIAN SMALL LETTER A 1607 while (c <= 0x10cf2) // ..to OLD HUNGARIAN SMALL LETTER US 1608 charset[i++] = c++; 1609 c = 0x10cfa; // from OLD HUNGARIAN NUMBER ONE 1610 while (c <= 0x10cff) // ..to OLD HUNGARIAN NUMBER ONE THOUSAND 1611 charset[i++] = c++; 1612// 10D00..10D3F; Hanifi Rohingya 1613 c = 0x10d00; // from HANIFI ROHINGYA LETTER A 1614 while (c <= 0x10d27) // ..to HANIFI ROHINGYA SIGN TASSI 1615 charset[i++] = c++; 1616 c = 0x10d30; // from HANIFI ROHINGYA DIGIT ZERO 1617 while (c <= 0x10d39) // ..to HANIFI ROHINGYA DIGIT NINE 1618 charset[i++] = c++; 1619// 10E60..10E7F; Rumi Numeral Symbols 1620 c = 0x10e60; // from RUMI DIGIT ONE 1621 while (c <= 0x10e7e) // ..to RUMI FRACTION TWO THIRDS 1622 charset[i++] = c++; 1623// 10F00..10F2F; Old Sogdian 1624 c = 0x10f00; // from OLD SOGDIAN LETTER ALEPH 1625 while (c <= 0x10f27) // ..to OLD SOGDIAN LIGATURE AYIN-DALETH 1626 charset[i++] = c++; 1627// 10F30..10F6F; Sogdian 1628 c = 0x10f30; // from SOGDIAN LETTER ALEPH 1629 while (c <= 0x10f59) // ..to SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT 1630 charset[i++] = c++; 1631// 11000..1107F; Brahmi 1632 c = 0x11000; // from BRAHMI SIGN CANDRABINDU 1633 while (c <= 0x1104d) // ..to BRAHMI PUNCTUATION LOTUS 1634 charset[i++] = c++; 1635 c = 0x11052; // from BRAHMI NUMBER ONE 1636 while (c <= 0x1106f) // ..to BRAHMI DIGIT NINE 1637 charset[i++] = c++; 1638 charset[i++] = 0x1107f; // BRAHMI NUMBER JOINER 1639// 11080..110CF; Kaithi 1640 c = 0x11080; // from KAITHI SIGN CANDRABINDU 1641 while (c <= 0x110c1) // ..to KAITHI DOUBLE DANDA 1642 charset[i++] = c++; 1643 charset[i++] = 0x110cd; // KAITHI NUMBER SIGN ABOVE 1644// 110D0..110FF; Sora Sompeng 1645 c = 0x110d0; // from SORA SOMPENG LETTER SAH 1646 while (c <= 0x110e8) // ..to SORA SOMPENG LETTER MAE 1647 charset[i++] = c++; 1648 c = 0x110f0; // from SORA SOMPENG DIGIT ZERO 1649 while (c <= 0x110f9) // ..to SORA SOMPENG DIGIT NINE 1650 charset[i++] = c++; 1651// 11100..1114F; Chakma 1652 c = 0x11100; // from CHAKMA SIGN CANDRABINDU 1653 while (c <= 0x11134) // ..to CHAKMA MAAYYAA 1654 charset[i++] = c++; 1655 c = 0x11136; // from CHAKMA DIGIT ZERO 1656 while (c <= 0x11146) // ..to CHAKMA VOWEL SIGN EI 1657 charset[i++] = c++; 1658// 11150..1117F; Mahajani 1659 c = 0x11150; // from MAHAJANI LETTER A 1660 while (c <= 0x11176) // ..to MAHAJANI LIGATURE SHRI 1661 charset[i++] = c++; 1662// 11180..111DF; Sharada 1663 c = 0x11180; // from SHARADA SIGN CANDRABINDU 1664 while (c <= 0x111cd) // ..to SHARADA SUTRA MARK 1665 charset[i++] = c++; 1666 c = 0x111d0; // from SHARADA DIGIT ZERO 1667 while (c <= 0x111df) // ..to SHARADA SECTION MARK-2 1668 charset[i++] = c++; 1669// 111E0..111FF; Sinhala Archaic Numbers 1670 c = 0x111e1; // from SINHALA ARCHAIC DIGIT ONE 1671 while (c <= 0x111f4) // ..to SINHALA ARCHAIC NUMBER ONE THOUSAND 1672 charset[i++] = c++; 1673// 11200..1124F; Khojki 1674 c = 0x11200; // from KHOJKI LETTER A 1675 while (c <= 0x11211) // ..to KHOJKI LETTER JJA 1676 charset[i++] = c++; 1677 c = 0x11213; // from KHOJKI LETTER NYA 1678 while (c <= 0x1123e) // ..to KHOJKI SIGN SUKUN 1679 charset[i++] = c++; 1680// 11280..112AF; Multani 1681 c = 0x11280; // from MULTANI LETTER A 1682 while (c <= 0x11286) // ..to MULTANI LETTER GA 1683 charset[i++] = c++; 1684 c = 0x1128a; // from MULTANI LETTER CA 1685 while (c <= 0x1128d) // ..to MULTANI LETTER JJA 1686 charset[i++] = c++; 1687 c = 0x1128f; // from MULTANI LETTER NYA 1688 while (c <= 0x1129d) // ..to MULTANI LETTER BA 1689 charset[i++] = c++; 1690 c = 0x1129f; // from MULTANI LETTER BHA 1691 while (c <= 0x112a9) // ..to MULTANI SECTION MARK 1692 charset[i++] = c++; 1693// 112B0..112FF; Khudawadi 1694 c = 0x112b0; // from KHUDAWADI LETTER A 1695 while (c <= 0x112ea) // ..to KHUDAWADI SIGN VIRAMA 1696 charset[i++] = c++; 1697 c = 0x112f0; // from KHUDAWADI DIGIT ZERO 1698 while (c <= 0x112f9) // ..to KHUDAWADI DIGIT NINE 1699 charset[i++] = c++; 1700// 11300..1137F; Grantha 1701 c = 0x11300; // from GRANTHA SIGN COMBINING ANUSVARA ABOVE 1702 while (c <= 0x11303) // ..to GRANTHA SIGN VISARGA 1703 charset[i++] = c++; 1704 c = 0x11305; // from GRANTHA LETTER A 1705 while (c <= 0x1130c) // ..to GRANTHA LETTER VOCALIC L 1706 charset[i++] = c++; 1707 charset[i++] = 0x1130f; // GRANTHA LETTER EE 1708 charset[i++] = 0x11310; // GRANTHA LETTER AI 1709 c = 0x11313; // from GRANTHA LETTER OO 1710 while (c <= 0x11328) // ..to GRANTHA LETTER NA 1711 charset[i++] = c++; 1712 c = 0x1132a; // from GRANTHA LETTER PA 1713 while (c <= 0x11330) // ..to GRANTHA LETTER RA 1714 charset[i++] = c++; 1715 charset[i++] = 0x11332; // GRANTHA LETTER LA 1716 charset[i++] = 0x11333; // GRANTHA LETTER LLA 1717 c = 0x11335; // from GRANTHA LETTER VA 1718 while (c <= 0x11339) // ..to GRANTHA LETTER HA 1719 charset[i++] = c++; 1720 c = 0x1133b; // from COMBINING BINDU BELOW 1721 while (c <= 0x11344) // ..to GRANTHA VOWEL SIGN VOCALIC RR 1722 charset[i++] = c++; 1723 charset[i++] = 0x11347; // GRANTHA VOWEL SIGN EE 1724 charset[i++] = 0x11348; // GRANTHA VOWEL SIGN AI 1725 charset[i++] = 0x1134b; // GRANTHA VOWEL SIGN OO 1726 charset[i++] = 0x1134d; // GRANTHA SIGN VIRAMA 1727 c = 0x1135d; // from GRANTHA SIGN PLUTA 1728 while (c <= 0x11363) // ..to GRANTHA VOWEL SIGN VOCALIC LL 1729 charset[i++] = c++; 1730 c = 0x11366; // from COMBINING GRANTHA DIGIT ZERO 1731 while (c <= 0x1136c) // ..to COMBINING GRANTHA DIGIT SIX 1732 charset[i++] = c++; 1733 c = 0x11370; // from COMBINING GRANTHA LETTER A 1734 while (c <= 0x11374) // ..to COMBINING GRANTHA LETTER PA 1735 charset[i++] = c++; 1736// 11400..1147F; Newa 1737 c = 0x11400; // from NEWA LETTER A 1738 while (c <= 0x11459) // ..to NEWA DIGIT NINE 1739 charset[i++] = c++; 1740 charset[i++] = 0x1145d; // NEWA INSERTION SIGN 1741 charset[i++] = 0x1145e; // NEWA SANDHI MARK 1742// 11480..114DF; Tirhuta 1743 c = 0x11480; // from TIRHUTA ANJI 1744 while (c <= 0x114c7) // ..to TIRHUTA OM 1745 charset[i++] = c++; 1746 c = 0x114d0; // from TIRHUTA DIGIT ZERO 1747 while (c <= 0x114d9) // ..to TIRHUTA DIGIT NINE 1748 charset[i++] = c++; 1749// 11580..115FF; Siddham 1750 c = 0x11580; // from SIDDHAM LETTER A 1751 while (c <= 0x115b5) // ..to SIDDHAM VOWEL SIGN VOCALIC RR 1752 charset[i++] = c++; 1753 c = 0x115b8; // from SIDDHAM VOWEL SIGN E 1754 while (c <= 0x115dd) // ..to SIDDHAM VOWEL SIGN ALTERNATE UU 1755 charset[i++] = c++; 1756// 11600..1165F; Modi 1757 c = 0x11600; // from MODI LETTER A 1758 while (c <= 0x11644) // ..to MODI SIGN HUVA 1759 charset[i++] = c++; 1760 c = 0x11650; // from MODI DIGIT ZERO 1761 while (c <= 0x11659) // ..to MODI DIGIT NINE 1762 charset[i++] = c++; 1763// 11660..1167F; Mongolian Supplement 1764 c = 0x11660; // from MONGOLIAN BIRGA WITH ORNAMENT 1765 while (c <= 0x1166c) // ..to MONGOLIAN TURNED SWIRL BIRGA WITH DOUBLE ORNAMENT 1766 charset[i++] = c++; 1767// 11680..116CF; Takri 1768 c = 0x11680; // from TAKRI LETTER A 1769 while (c <= 0x116b7) // ..to TAKRI SIGN NUKTA 1770 charset[i++] = c++; 1771 c = 0x116c0; // from TAKRI DIGIT ZERO 1772 while (c <= 0x116c9) // ..to TAKRI DIGIT NINE 1773 charset[i++] = c++; 1774// 11700..1173F; Ahom 1775 c = 0x11700; // from AHOM LETTER KA 1776 while (c <= 0x1171a) // ..to AHOM LETTER ALTERNATE BA 1777 charset[i++] = c++; 1778 c = 0x1171d; // from AHOM CONSONANT SIGN MEDIAL LA 1779 while (c <= 0x1172b) // ..to AHOM SIGN KILLER 1780 charset[i++] = c++; 1781 c = 0x11730; // from AHOM DIGIT ZERO 1782 while (c <= 0x1173f) // ..to AHOM SYMBOL VI 1783 charset[i++] = c++; 1784// 11800..1184F; Dogra 1785 c = 0x11800; // from DOGRA LETTER A 1786 while (c <= 0x1183b) // ..to DOGRA ABBREVIATION SIGN 1787 charset[i++] = c++; 1788// 118A0..118FF; Warang Citi 1789 c = 0x118a0; // from WARANG CITI CAPITAL LETTER NGAA 1790 while (c <= 0x118f2) // ..to WARANG CITI NUMBER NINETY 1791 charset[i++] = c++; 1792 charset[i++] = 0x118ff; // WARANG CITI OM 1793// 11A00..11A4F; Zanabazar Square 1794 c = 0x11a00; // from ZANABAZAR SQUARE LETTER A 1795 while (c <= 0x11a47) // ..to ZANABAZAR SQUARE SUBJOINER 1796 charset[i++] = c++; 1797// 11A50..11AAF; Soyombo 1798 c = 0x11a50; // from SOYOMBO LETTER A 1799 while (c <= 0x11a83) // ..to SOYOMBO LETTER KSSA 1800 charset[i++] = c++; 1801 c = 0x11a86; // from SOYOMBO CLUSTER-INITIAL LETTER RA 1802 while (c <= 0x11aa2) // ..to SOYOMBO TERMINAL MARK-2 1803 charset[i++] = c++; 1804// 11AC0..11AFF; Pau Cin Hau 1805 c = 0x11ac0; // from PAU CIN HAU LETTER PA 1806 while (c <= 0x11af8) // ..to PAU CIN HAU GLOTTAL STOP FINAL 1807 charset[i++] = c++; 1808// 11C00..11C6F; Bhaiksuki 1809 c = 0x11c00; // from BHAIKSUKI LETTER A 1810 while (c <= 0x11c08) // ..to BHAIKSUKI LETTER VOCALIC L 1811 charset[i++] = c++; 1812 c = 0x11c0a; // from BHAIKSUKI LETTER E 1813 while (c <= 0x11c36) // ..to BHAIKSUKI VOWEL SIGN VOCALIC L 1814 charset[i++] = c++; 1815 c = 0x11c38; // from BHAIKSUKI VOWEL SIGN E 1816 while (c <= 0x11c45) // ..to BHAIKSUKI GAP FILLER-2 1817 charset[i++] = c++; 1818 c = 0x11c50; // from BHAIKSUKI DIGIT ZERO 1819 while (c <= 0x11c6c) // ..to BHAIKSUKI HUNDREDS UNIT MARK 1820 charset[i++] = c++; 1821// 11C70..11CBF; Marchen 1822 c = 0x11c70; // from MARCHEN HEAD MARK 1823 while (c <= 0x11c8f) // ..to MARCHEN LETTER A 1824 charset[i++] = c++; 1825 c = 0x11c92; // from MARCHEN SUBJOINED LETTER KA 1826 while (c <= 0x11ca7) // ..to MARCHEN SUBJOINED LETTER ZA 1827 charset[i++] = c++; 1828 c = 0x11ca9; // from MARCHEN SUBJOINED LETTER YA 1829 while (c <= 0x11cb6) // ..to MARCHEN SIGN CANDRABINDU 1830 charset[i++] = c++; 1831// 11D00..11D5F; Masaram Gondi 1832 c = 0x11d00; // from MASARAM GONDI LETTER A 1833 while (c <= 0x11d06) // ..to MASARAM GONDI LETTER E 1834 charset[i++] = c++; 1835 charset[i++] = 0x11d08; // MASARAM GONDI LETTER AI 1836 charset[i++] = 0x11d09; // MASARAM GONDI LETTER O 1837 c = 0x11d0b; // from MASARAM GONDI LETTER AU 1838 while (c <= 0x11d36) // ..to MASARAM GONDI VOWEL SIGN VOCALIC R 1839 charset[i++] = c++; 1840 charset[i++] = 0x11d3c; // MASARAM GONDI VOWEL SIGN AI 1841 charset[i++] = 0x11d3d; // MASARAM GONDI VOWEL SIGN O 1842 c = 0x11d3f; // from MASARAM GONDI VOWEL SIGN AU 1843 while (c <= 0x11d47) // ..to MASARAM GONDI RA-KARA 1844 charset[i++] = c++; 1845 c = 0x11d50; // from MASARAM GONDI DIGIT ZERO 1846 while (c <= 0x11d59) // ..to MASARAM GONDI DIGIT NINE 1847 charset[i++] = c++; 1848// 11D60..11DAF; Gunjala Gondi 1849 c = 0x11d60; // from GUNJALA GONDI LETTER A 1850 while (c <= 0x11d65) // ..to GUNJALA GONDI LETTER UU 1851 charset[i++] = c++; 1852 charset[i++] = 0x11d67; // GUNJALA GONDI LETTER EE 1853 charset[i++] = 0x11d68; // GUNJALA GONDI LETTER AI 1854 c = 0x11d6a; // from GUNJALA GONDI LETTER OO 1855 while (c <= 0x11d8e) // ..to GUNJALA GONDI VOWEL SIGN UU 1856 charset[i++] = c++; 1857 charset[i++] = 0x11d90; // GUNJALA GONDI VOWEL SIGN EE 1858 charset[i++] = 0x11d91; // GUNJALA GONDI VOWEL SIGN AI 1859 c = 0x11d93; // from GUNJALA GONDI VOWEL SIGN OO 1860 while (c <= 0x11d98) // ..to GUNJALA GONDI OM 1861 charset[i++] = c++; 1862 c = 0x11da0; // from GUNJALA GONDI DIGIT ZERO 1863 while (c <= 0x11da9) // ..to GUNJALA GONDI DIGIT NINE 1864 charset[i++] = c++; 1865// 11EE0..11EFF; Makasar 1866 c = 0x11ee0; // from MAKASAR LETTER KA 1867 while (c <= 0x11ef8) // ..to MAKASAR END OF SECTION 1868 charset[i++] = c++; 1869// 12000..123FF; Cuneiform 1870 c = 0x12000; // from CUNEIFORM SIGN A 1871 while (c <= 0x12399) // ..to CUNEIFORM SIGN U U 1872 charset[i++] = c++; 1873// 12400..1247F; Cuneiform Numbers and Punctuation 1874 c = 0x12400; // from CUNEIFORM NUMERIC SIGN TWO ASH 1875 while (c <= 0x1246e) // ..to CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM 1876 charset[i++] = c++; 1877 c = 0x12470; // from CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER 1878 while (c <= 0x12474) // ..to CUNEIFORM PUNCTUATION SIGN DIAGONAL QUADCOLON 1879 charset[i++] = c++; 1880// 12480..1254F; Early Dynastic Cuneiform 1881 c = 0x12480; // from CUNEIFORM SIGN AB TIMES NUN TENU 1882 while (c <= 0x12543) // ..to CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU 1883 charset[i++] = c++; 1884// 13000..1342F; Egyptian Hieroglyphs 1885 c = 0x13000; // from EGYPTIAN HIEROGLYPH A001 1886 while (c <= 0x1342e) // ..to EGYPTIAN HIEROGLYPH AA032 1887 charset[i++] = c++; 1888// 14400..1467F; Anatolian Hieroglyphs 1889 c = 0x14400; // from ANATOLIAN HIEROGLYPH A001 1890 while (c <= 0x14646) // ..to ANATOLIAN HIEROGLYPH A530 1891 charset[i++] = c++; 1892// 16800..16A3F; Bamum Supplement 1893 c = 0x16800; // from BAMUM LETTER PHASE-A NGKUE MFON 1894 while (c <= 0x16a38) // ..to BAMUM LETTER PHASE-F VUEQ 1895 charset[i++] = c++; 1896// 16A40..16A6F; Mro 1897 c = 0x16a40; // from MRO LETTER TA 1898 while (c <= 0x16a5e) // ..to MRO LETTER TEK 1899 charset[i++] = c++; 1900 c = 0x16a60; // from MRO DIGIT ZERO 1901 while (c <= 0x16a69) // ..to MRO DIGIT NINE 1902 charset[i++] = c++; 1903 charset[i++] = 0x16a6e; // MRO DANDA 1904 charset[i++] = 0x16a6f; // MRO DOUBLE DANDA 1905// 16AD0..16AFF; Bassa Vah 1906 c = 0x16ad0; // from BASSA VAH LETTER ENNI 1907 while (c <= 0x16aed) // ..to BASSA VAH LETTER I 1908 charset[i++] = c++; 1909 c = 0x16af0; // from BASSA VAH COMBINING HIGH TONE 1910 while (c <= 0x16af5) // ..to BASSA VAH FULL STOP 1911 charset[i++] = c++; 1912// 16B00..16B8F; Pahawh Hmong 1913 c = 0x16b00; // from PAHAWH HMONG VOWEL KEEB 1914 while (c <= 0x16b45) // ..to PAHAWH HMONG SIGN CIM TSOV ROG 1915 charset[i++] = c++; 1916 c = 0x16b50; // from PAHAWH HMONG DIGIT ZERO 1917 while (c <= 0x16b59) // ..to PAHAWH HMONG DIGIT NINE 1918 charset[i++] = c++; 1919 c = 0x16b5b; // from PAHAWH HMONG NUMBER TENS 1920 while (c <= 0x16b61) // ..to PAHAWH HMONG NUMBER TRILLIONS 1921 charset[i++] = c++; 1922 c = 0x16b63; // from PAHAWH HMONG SIGN VOS LUB 1923 while (c <= 0x16b77) // ..to PAHAWH HMONG SIGN CIM NRES TOS 1924 charset[i++] = c++; 1925 c = 0x16b7d; // from PAHAWH HMONG CLAN SIGN TSHEEJ 1926 while (c <= 0x16b8f) // ..to PAHAWH HMONG CLAN SIGN VWJ 1927 charset[i++] = c++; 1928// 16E40..16E9F; Medefaidrin 1929 c = 0x16e40; // from MEDEFAIDRIN CAPITAL LETTER M 1930 while (c <= 0x16e9a) // ..to MEDEFAIDRIN EXCLAMATION OH 1931 charset[i++] = c++; 1932// 16F00..16F9F; Miao 1933 c = 0x16f00; // from MIAO LETTER PA 1934 while (c <= 0x16f44) // ..to MIAO LETTER HHA 1935 charset[i++] = c++; 1936 c = 0x16f50; // from MIAO LETTER NASALIZATION 1937 while (c <= 0x16f7e) // ..to MIAO VOWEL SIGN NG 1938 charset[i++] = c++; 1939 c = 0x16f8f; // from MIAO TONE RIGHT 1940 while (c <= 0x16f9f) // ..to MIAO LETTER REFORMED TONE-8 1941 charset[i++] = c++; 1942// 16FE0..16FFF; Ideographic Symbols and Punctuation 1943 charset[i++] = 0x16fe0; // TANGUT ITERATION MARK 1944 charset[i++] = 0x16fe1; // NUSHU ITERATION MARK 1945// 17000..187FF; Tangut 1946 c = 0x17000; // from <Tangut Ideograph, First> 1947 while (c <= 0x187f1) // ..to <Tangut Ideograph, Last> 1948 charset[i++] = c++; 1949// 18800..18AFF; Tangut Components 1950 c = 0x18800; // from TANGUT COMPONENT-001 1951 while (c <= 0x18af2) // ..to TANGUT COMPONENT-755 1952 charset[i++] = c++; 1953// 1B000..1B0FF; Kana Supplement 1954 c = 0x1b000; // from KATAKANA LETTER ARCHAIC E 1955 while (c <= 0x1b0ff) // ..to HENTAIGANA LETTER RE-2 1956 charset[i++] = c++; 1957// 1B100..1B12F; Kana Extended-A 1958 c = 0x1b100; // from HENTAIGANA LETTER RE-3 1959 while (c <= 0x1b11e) // ..to HENTAIGANA LETTER N-MU-MO-2 1960 charset[i++] = c++; 1961// 1B170..1B2FF; Nushu 1962 c = 0x1b170; // from NUSHU CHARACTER-1B170 1963 while (c <= 0x1b2fb) // ..to NUSHU CHARACTER-1B2FB 1964 charset[i++] = c++; 1965// 1BC00..1BC9F; Duployan 1966 c = 0x1bc00; // from DUPLOYAN LETTER H 1967 while (c <= 0x1bc6a) // ..to DUPLOYAN LETTER VOCALIC M 1968 charset[i++] = c++; 1969 c = 0x1bc70; // from DUPLOYAN AFFIX LEFT HORIZONTAL SECANT 1970 while (c <= 0x1bc7c) // ..to DUPLOYAN AFFIX ATTACHED TANGENT HOOK 1971 charset[i++] = c++; 1972 c = 0x1bc80; // from DUPLOYAN AFFIX HIGH ACUTE 1973 while (c <= 0x1bc88) // ..to DUPLOYAN AFFIX HIGH VERTICAL 1974 charset[i++] = c++; 1975 c = 0x1bc90; // from DUPLOYAN AFFIX LOW ACUTE 1976 while (c <= 0x1bc99) // ..to DUPLOYAN AFFIX LOW ARROW 1977 charset[i++] = c++; 1978 c = 0x1bc9c; // from DUPLOYAN SIGN O WITH CROSS 1979 while (c <= 0x1bc9f) // ..to DUPLOYAN PUNCTUATION CHINOOK FULL STOP 1980 charset[i++] = c++; 1981// 1BCA0..1BCAF; Shorthand Format Controls 1982 c = 0x1bca0; // from SHORTHAND FORMAT LETTER OVERLAP 1983 while (c <= 0x1bca3) // ..to SHORTHAND FORMAT UP STEP 1984 charset[i++] = c++; 1985// 1D000..1D0FF; Byzantine Musical Symbols 1986 c = 0x1d000; // from BYZANTINE MUSICAL SYMBOL PSILI 1987 while (c <= 0x1d0f5) // ..to BYZANTINE MUSICAL SYMBOL GORGON NEO KATO 1988 charset[i++] = c++; 1989// 1D100..1D1FF; Musical Symbols 1990 c = 0x1d100; // from MUSICAL SYMBOL SINGLE BARLINE 1991 while (c <= 0x1d126) // ..to MUSICAL SYMBOL DRUM CLEF-2 1992 charset[i++] = c++; 1993 c = 0x1d129; // from MUSICAL SYMBOL MULTIPLE MEASURE REST 1994 while (c <= 0x1d1e8) // ..to MUSICAL SYMBOL KIEVAN FLAT SIGN 1995 charset[i++] = c++; 1996// 1D200..1D24F; Ancient Greek Musical Notation 1997 c = 0x1d200; // from GREEK VOCAL NOTATION SYMBOL-1 1998 while (c <= 0x1d245) // ..to GREEK MUSICAL LEIMMA 1999 charset[i++] = c++; 2000// 1D2E0..1D2FF; Mayan Numerals 2001 c = 0x1d2e0; // from MAYAN NUMERAL ZERO 2002 while (c <= 0x1d2f3) // ..to MAYAN NUMERAL NINETEEN 2003 charset[i++] = c++; 2004// 1D300..1D35F; Tai Xuan Jing Symbols 2005 c = 0x1d300; // from MONOGRAM FOR EARTH 2006 while (c <= 0x1d356) // ..to TETRAGRAM FOR FOSTERING 2007 charset[i++] = c++; 2008// 1D360..1D37F; Counting Rod Numerals 2009 c = 0x1d360; // from COUNTING ROD UNIT DIGIT ONE 2010 while (c <= 0x1d378) // ..to TALLY MARK FIVE 2011 charset[i++] = c++; 2012// 1D400..1D7FF; Mathematical Alphanumeric Symbols 2013 c = 0x1d400; // from MATHEMATICAL BOLD CAPITAL A 2014 while (c <= 0x1d454) // ..to MATHEMATICAL ITALIC SMALL G 2015 charset[i++] = c++; 2016 c = 0x1d456; // from MATHEMATICAL ITALIC SMALL I 2017 while (c <= 0x1d49c) // ..to MATHEMATICAL SCRIPT CAPITAL A 2018 charset[i++] = c++; 2019 charset[i++] = 0x1d49e; // MATHEMATICAL SCRIPT CAPITAL C 2020 charset[i++] = 0x1d49f; // MATHEMATICAL SCRIPT CAPITAL D 2021 charset[i++] = 0x1d4a5; // MATHEMATICAL SCRIPT CAPITAL J 2022 charset[i++] = 0x1d4a6; // MATHEMATICAL SCRIPT CAPITAL K 2023 c = 0x1d4a9; // from MATHEMATICAL SCRIPT CAPITAL N 2024 while (c <= 0x1d4ac) // ..to MATHEMATICAL SCRIPT CAPITAL Q 2025 charset[i++] = c++; 2026 c = 0x1d4ae; // from MATHEMATICAL SCRIPT CAPITAL S 2027 while (c <= 0x1d4b9) // ..to MATHEMATICAL SCRIPT SMALL D 2028 charset[i++] = c++; 2029 c = 0x1d4bd; // from MATHEMATICAL SCRIPT SMALL H 2030 while (c <= 0x1d4c3) // ..to MATHEMATICAL SCRIPT SMALL N 2031 charset[i++] = c++; 2032 c = 0x1d4c5; // from MATHEMATICAL SCRIPT SMALL P 2033 while (c <= 0x1d505) // ..to MATHEMATICAL FRAKTUR CAPITAL B 2034 charset[i++] = c++; 2035 c = 0x1d507; // from MATHEMATICAL FRAKTUR CAPITAL D 2036 while (c <= 0x1d50a) // ..to MATHEMATICAL FRAKTUR CAPITAL G 2037 charset[i++] = c++; 2038 c = 0x1d50d; // from MATHEMATICAL FRAKTUR CAPITAL J 2039 while (c <= 0x1d514) // ..to MATHEMATICAL FRAKTUR CAPITAL Q 2040 charset[i++] = c++; 2041 c = 0x1d516; // from MATHEMATICAL FRAKTUR CAPITAL S 2042 while (c <= 0x1d51c) // ..to MATHEMATICAL FRAKTUR CAPITAL Y 2043 charset[i++] = c++; 2044 c = 0x1d51e; // from MATHEMATICAL FRAKTUR SMALL A 2045 while (c <= 0x1d539) // ..to MATHEMATICAL DOUBLE-STRUCK CAPITAL B 2046 charset[i++] = c++; 2047 c = 0x1d53b; // from MATHEMATICAL DOUBLE-STRUCK CAPITAL D 2048 while (c <= 0x1d53e) // ..to MATHEMATICAL DOUBLE-STRUCK CAPITAL G 2049 charset[i++] = c++; 2050 c = 0x1d540; // from MATHEMATICAL DOUBLE-STRUCK CAPITAL I 2051 while (c <= 0x1d544) // ..to MATHEMATICAL DOUBLE-STRUCK CAPITAL M 2052 charset[i++] = c++; 2053 c = 0x1d54a; // from MATHEMATICAL DOUBLE-STRUCK CAPITAL S 2054 while (c <= 0x1d550) // ..to MATHEMATICAL DOUBLE-STRUCK CAPITAL Y 2055 charset[i++] = c++; 2056 c = 0x1d552; // from MATHEMATICAL DOUBLE-STRUCK SMALL A 2057 while (c <= 0x1d6a5) // ..to MATHEMATICAL ITALIC SMALL DOTLESS J 2058 charset[i++] = c++; 2059 c = 0x1d6a8; // from MATHEMATICAL BOLD CAPITAL ALPHA 2060 while (c <= 0x1d7cb) // ..to MATHEMATICAL BOLD SMALL DIGAMMA 2061 charset[i++] = c++; 2062 c = 0x1d7ce; // from MATHEMATICAL BOLD DIGIT ZERO 2063 while (c <= 0x1d7ff) // ..to MATHEMATICAL MONOSPACE DIGIT NINE 2064 charset[i++] = c++; 2065// 1D800..1DAAF; Sutton SignWriting 2066 c = 0x1d800; // from SIGNWRITING HAND-FIST INDEX 2067 while (c <= 0x1da8b) // ..to SIGNWRITING PARENTHESIS 2068 charset[i++] = c++; 2069 c = 0x1da9b; // from SIGNWRITING FILL MODIFIER-2 2070 while (c <= 0x1da9f) // ..to SIGNWRITING FILL MODIFIER-6 2071 charset[i++] = c++; 2072 c = 0x1daa1; // from SIGNWRITING ROTATION MODIFIER-2 2073 while (c <= 0x1daaf) // ..to SIGNWRITING ROTATION MODIFIER-16 2074 charset[i++] = c++; 2075// 1E000..1E02F; Glagolitic Supplement 2076 c = 0x1e000; // from COMBINING GLAGOLITIC LETTER AZU 2077 while (c <= 0x1e006) // ..to COMBINING GLAGOLITIC LETTER ZHIVETE 2078 charset[i++] = c++; 2079 c = 0x1e008; // from COMBINING GLAGOLITIC LETTER ZEMLJA 2080 while (c <= 0x1e018) // ..to COMBINING GLAGOLITIC LETTER HERU 2081 charset[i++] = c++; 2082 c = 0x1e01b; // from COMBINING GLAGOLITIC LETTER SHTA 2083 while (c <= 0x1e021) // ..to COMBINING GLAGOLITIC LETTER YATI 2084 charset[i++] = c++; 2085 charset[i++] = 0x1e023; // COMBINING GLAGOLITIC LETTER YU 2086 charset[i++] = 0x1e024; // COMBINING GLAGOLITIC LETTER SMALL YUS 2087 c = 0x1e026; // from COMBINING GLAGOLITIC LETTER YO 2088 while (c <= 0x1e02a) // ..to COMBINING GLAGOLITIC LETTER FITA 2089 charset[i++] = c++; 2090// 1E800..1E8DF; Mende Kikakui 2091 c = 0x1e800; // from MENDE KIKAKUI SYLLABLE M001 KI 2092 while (c <= 0x1e8c4) // ..to MENDE KIKAKUI SYLLABLE M060 NYON 2093 charset[i++] = c++; 2094 c = 0x1e8c7; // from MENDE KIKAKUI DIGIT ONE 2095 while (c <= 0x1e8d6) // ..to MENDE KIKAKUI COMBINING NUMBER MILLIONS 2096 charset[i++] = c++; 2097// 1E900..1E95F; Adlam 2098 c = 0x1e900; // from ADLAM CAPITAL LETTER ALIF 2099 while (c <= 0x1e94a) // ..to ADLAM NUKTA 2100 charset[i++] = c++; 2101 c = 0x1e950; // from ADLAM DIGIT ZERO 2102 while (c <= 0x1e959) // ..to ADLAM DIGIT NINE 2103 charset[i++] = c++; 2104 charset[i++] = 0x1e95e; // ADLAM INITIAL EXCLAMATION MARK 2105 charset[i++] = 0x1e95f; // ADLAM INITIAL QUESTION MARK 2106// 1EC70..1ECBF; Indic Siyaq Numbers 2107 c = 0x1ec71; // from INDIC SIYAQ NUMBER ONE 2108 while (c <= 0x1ecb4) // ..to INDIC SIYAQ ALTERNATE LAKH MARK 2109 charset[i++] = c++; 2110// 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols 2111 c = 0x1ee00; // from ARABIC MATHEMATICAL ALEF 2112 while (c <= 0x1ee03) // ..to ARABIC MATHEMATICAL DAL 2113 charset[i++] = c++; 2114 c = 0x1ee05; // from ARABIC MATHEMATICAL WAW 2115 while (c <= 0x1ee1f) // ..to ARABIC MATHEMATICAL DOTLESS QAF 2116 charset[i++] = c++; 2117 charset[i++] = 0x1ee21; // ARABIC MATHEMATICAL INITIAL BEH 2118 charset[i++] = 0x1ee22; // ARABIC MATHEMATICAL INITIAL JEEM 2119 c = 0x1ee29; // from ARABIC MATHEMATICAL INITIAL YEH 2120 while (c <= 0x1ee32) // ..to ARABIC MATHEMATICAL INITIAL QAF 2121 charset[i++] = c++; 2122 c = 0x1ee34; // from ARABIC MATHEMATICAL INITIAL SHEEN 2123 while (c <= 0x1ee37) // ..to ARABIC MATHEMATICAL INITIAL KHAH 2124 charset[i++] = c++; 2125 charset[i++] = 0x1ee4d; // ARABIC MATHEMATICAL TAILED NOON 2126 charset[i++] = 0x1ee4f; // ARABIC MATHEMATICAL TAILED AIN 2127 charset[i++] = 0x1ee51; // ARABIC MATHEMATICAL TAILED SAD 2128 charset[i++] = 0x1ee52; // ARABIC MATHEMATICAL TAILED QAF 2129 charset[i++] = 0x1ee61; // ARABIC MATHEMATICAL STRETCHED BEH 2130 charset[i++] = 0x1ee62; // ARABIC MATHEMATICAL STRETCHED JEEM 2131 c = 0x1ee67; // from ARABIC MATHEMATICAL STRETCHED HAH 2132 while (c <= 0x1ee6a) // ..to ARABIC MATHEMATICAL STRETCHED KAF 2133 charset[i++] = c++; 2134 c = 0x1ee6c; // from ARABIC MATHEMATICAL STRETCHED MEEM 2135 while (c <= 0x1ee72) // ..to ARABIC MATHEMATICAL STRETCHED QAF 2136 charset[i++] = c++; 2137 c = 0x1ee74; // from ARABIC MATHEMATICAL STRETCHED SHEEN 2138 while (c <= 0x1ee77) // ..to ARABIC MATHEMATICAL STRETCHED KHAH 2139 charset[i++] = c++; 2140 c = 0x1ee79; // from ARABIC MATHEMATICAL STRETCHED DAD 2141 while (c <= 0x1ee7c) // ..to ARABIC MATHEMATICAL STRETCHED DOTLESS BEH 2142 charset[i++] = c++; 2143 c = 0x1ee80; // from ARABIC MATHEMATICAL LOOPED ALEF 2144 while (c <= 0x1ee89) // ..to ARABIC MATHEMATICAL LOOPED YEH 2145 charset[i++] = c++; 2146 c = 0x1ee8b; // from ARABIC MATHEMATICAL LOOPED LAM 2147 while (c <= 0x1ee9b) // ..to ARABIC MATHEMATICAL LOOPED GHAIN 2148 charset[i++] = c++; 2149 charset[i++] = 0x1eea1; // ARABIC MATHEMATICAL DOUBLE-STRUCK BEH 2150 charset[i++] = 0x1eea3; // ARABIC MATHEMATICAL DOUBLE-STRUCK DAL 2151 c = 0x1eea5; // from ARABIC MATHEMATICAL DOUBLE-STRUCK WAW 2152 while (c <= 0x1eea9) // ..to ARABIC MATHEMATICAL DOUBLE-STRUCK YEH 2153 charset[i++] = c++; 2154 c = 0x1eeab; // from ARABIC MATHEMATICAL DOUBLE-STRUCK LAM 2155 while (c <= 0x1eebb) // ..to ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 2156 charset[i++] = c++; 2157 charset[i++] = 0x1eef0; // ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL 2158 charset[i++] = 0x1eef1; // ARABIC MATHEMATICAL OPERATOR HAH WITH DAL 2159// 1F000..1F02F; Mahjong Tiles 2160 c = 0x1f000; // from MAHJONG TILE EAST WIND 2161 while (c <= 0x1f02b) // ..to MAHJONG TILE BACK 2162 charset[i++] = c++; 2163// 1F030..1F09F; Domino Tiles 2164 c = 0x1f030; // from DOMINO TILE HORIZONTAL BACK 2165 while (c <= 0x1f093) // ..to DOMINO TILE VERTICAL-06-06 2166 charset[i++] = c++; 2167// 1F0A0..1F0FF; Playing Cards 2168 c = 0x1f0a0; // from PLAYING CARD BACK 2169 while (c <= 0x1f0ae) // ..to PLAYING CARD KING OF SPADES 2170 charset[i++] = c++; 2171 c = 0x1f0b1; // from PLAYING CARD ACE OF HEARTS 2172 while (c <= 0x1f0bf) // ..to PLAYING CARD RED JOKER 2173 charset[i++] = c++; 2174 c = 0x1f0c1; // from PLAYING CARD ACE OF DIAMONDS 2175 while (c <= 0x1f0cf) // ..to PLAYING CARD BLACK JOKER 2176 charset[i++] = c++; 2177 c = 0x1f0d1; // from PLAYING CARD ACE OF CLUBS 2178 while (c <= 0x1f0f5) // ..to PLAYING CARD TRUMP-21 2179 charset[i++] = c++; 2180// 1F100..1F1FF; Enclosed Alphanumeric Supplement 2181 c = 0x1f100; // from DIGIT ZERO FULL STOP 2182 while (c <= 0x1f10c) // ..to DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ZERO 2183 charset[i++] = c++; 2184 c = 0x1f110; // from PARENTHESIZED LATIN CAPITAL LETTER A 2185 while (c <= 0x1f16b) // ..to RAISED MD SIGN 2186 charset[i++] = c++; 2187 c = 0x1f170; // from NEGATIVE SQUARED LATIN CAPITAL LETTER A 2188 while (c <= 0x1f1ac) // ..to SQUARED VOD 2189 charset[i++] = c++; 2190 c = 0x1f1e6; // from REGIONAL INDICATOR SYMBOL LETTER A 2191 while (c <= 0x1f1ff) // ..to REGIONAL INDICATOR SYMBOL LETTER Z 2192 charset[i++] = c++; 2193// 1F200..1F2FF; Enclosed Ideographic Supplement 2194 charset[i++] = 0x1f200; // SQUARE HIRAGANA HOKA 2195 charset[i++] = 0x1f202; // SQUARED KATAKANA SA 2196 c = 0x1f210; // from SQUARED CJK UNIFIED IDEOGRAPH-624B 2197 while (c <= 0x1f23b) // ..to SQUARED CJK UNIFIED IDEOGRAPH-914D 2198 charset[i++] = c++; 2199 c = 0x1f240; // from TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C 2200 while (c <= 0x1f248) // ..to TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557 2201 charset[i++] = c++; 2202 charset[i++] = 0x1f250; // CIRCLED IDEOGRAPH ADVANTAGE 2203 charset[i++] = 0x1f251; // CIRCLED IDEOGRAPH ACCEPT 2204 c = 0x1f260; // from ROUNDED SYMBOL FOR FU 2205 while (c <= 0x1f265) // ..to ROUNDED SYMBOL FOR CAI 2206 charset[i++] = c++; 2207// 1F300..1F5FF; Miscellaneous Symbols and Pictographs 2208 c = 0x1f300; // from CYCLONE 2209 while (c <= 0x1f5ff) // ..to MOYAI 2210 charset[i++] = c++; 2211// 1F600..1F64F; Emoticons 2212 c = 0x1f600; // from GRINNING FACE 2213 while (c <= 0x1f64f) // ..to PERSON WITH FOLDED HANDS 2214 charset[i++] = c++; 2215// 1F650..1F67F; Ornamental Dingbats 2216 c = 0x1f650; // from NORTH WEST POINTING LEAF 2217 while (c <= 0x1f67f) // ..to REVERSE CHECKER BOARD 2218 charset[i++] = c++; 2219// 1F680..1F6FF; Transport and Map Symbols 2220 c = 0x1f680; // from ROCKET 2221 while (c <= 0x1f6d4) // ..to PAGODA 2222 charset[i++] = c++; 2223 c = 0x1f6e0; // from HAMMER AND WRENCH 2224 while (c <= 0x1f6ec) // ..to AIRPLANE ARRIVING 2225 charset[i++] = c++; 2226 c = 0x1f6f0; // from SATELLITE 2227 while (c <= 0x1f6f9) // ..to SKATEBOARD 2228 charset[i++] = c++; 2229// 1F700..1F77F; Alchemical Symbols 2230 c = 0x1f700; // from ALCHEMICAL SYMBOL FOR QUINTESSENCE 2231 while (c <= 0x1f773) // ..to ALCHEMICAL SYMBOL FOR HALF OUNCE 2232 charset[i++] = c++; 2233// 1F780..1F7FF; Geometric Shapes Extended 2234 c = 0x1f780; // from BLACK LEFT-POINTING ISOSCELES RIGHT TRIANGLE 2235 while (c <= 0x1f7d8) // ..to NEGATIVE CIRCLED SQUARE 2236 charset[i++] = c++; 2237// 1F800..1F8FF; Supplemental Arrows-C 2238 c = 0x1f800; // from LEFTWARDS ARROW WITH SMALL TRIANGLE ARROWHEAD 2239 while (c <= 0x1f80b) // ..to DOWNWARDS ARROW WITH LARGE TRIANGLE ARROWHEAD 2240 charset[i++] = c++; 2241 c = 0x1f810; // from LEFTWARDS ARROW WITH SMALL EQUILATERAL ARROWHEAD 2242 while (c <= 0x1f847) // ..to DOWNWARDS HEAVY ARROW 2243 charset[i++] = c++; 2244 c = 0x1f850; // from LEFTWARDS SANS-SERIF ARROW 2245 while (c <= 0x1f859) // ..to UP DOWN SANS-SERIF ARROW 2246 charset[i++] = c++; 2247 c = 0x1f860; // from WIDE-HEADED LEFTWARDS LIGHT BARB ARROW 2248 while (c <= 0x1f887) // ..to WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW 2249 charset[i++] = c++; 2250 c = 0x1f890; // from LEFTWARDS TRIANGLE ARROWHEAD 2251 while (c <= 0x1f8ad) // ..to WHITE ARROW SHAFT WIDTH TWO THIRDS 2252 charset[i++] = c++; 2253// 1F900..1F9FF; Supplemental Symbols and Pictographs 2254 c = 0x1f900; // from CIRCLED CROSS FORMEE WITH FOUR DOTS 2255 while (c <= 0x1f90b) // ..to DOWNWARD FACING NOTCHED HOOK WITH DOT 2256 charset[i++] = c++; 2257 c = 0x1f910; // from ZIPPER-MOUTH FACE 2258 while (c <= 0x1f93e) // ..to HANDBALL 2259 charset[i++] = c++; 2260 c = 0x1f940; // from WILTED FLOWER 2261 while (c <= 0x1f970) // ..to SMILING FACE WITH SMILING EYES AND THREE HEARTS 2262 charset[i++] = c++; 2263 c = 0x1f973; // from FACE WITH PARTY HORN AND PARTY HAT 2264 while (c <= 0x1f976) // ..to FREEZING FACE 2265 charset[i++] = c++; 2266 c = 0x1f97c; // from LAB COAT 2267 while (c <= 0x1f9a2) // ..to SWAN 2268 charset[i++] = c++; 2269 c = 0x1f9b0; // from EMOJI COMPONENT RED HAIR 2270 while (c <= 0x1f9b9) // ..to SUPERVILLAIN 2271 charset[i++] = c++; 2272 charset[i++] = 0x1f9c0; // CHEESE WEDGE 2273 charset[i++] = 0x1f9c2; // SALT SHAKER 2274 c = 0x1f9d0; // from FACE WITH MONOCLE 2275 while (c <= 0x1f9ff) // ..to NAZAR AMULET 2276 charset[i++] = c++; 2277// 1FA00..1FA6F; Chess Symbols 2278 c = 0x1fa60; // from XIANGQI RED GENERAL 2279 while (c <= 0x1fa6d) // ..to XIANGQI BLACK SOLDIER 2280 charset[i++] = c++; 2281// 20000..2A6DF; CJK Unified Ideographs Extension B 2282 c = 0x20000; // from <CJK Ideograph Extension B, First> 2283 while (c <= 0x2a6d6) // ..to <CJK Ideograph Extension B, Last> 2284 charset[i++] = c++; 2285// 2A700..2B73F; CJK Unified Ideographs Extension C 2286 c = 0x2a700; // from <CJK Ideograph Extension C, First> 2287 while (c <= 0x2b734) // ..to <CJK Ideograph Extension C, Last> 2288 charset[i++] = c++; 2289// 2B740..2B81F; CJK Unified Ideographs Extension D 2290 c = 0x2b740; // from <CJK Ideograph Extension D, First> 2291 while (c <= 0x2b81d) // ..to <CJK Ideograph Extension D, Last> 2292 charset[i++] = c++; 2293// 2B820..2CEAF; CJK Unified Ideographs Extension E 2294 c = 0x2b820; // from <CJK Ideograph Extension E, First> 2295 while (c <= 0x2cea1) // ..to <CJK Ideograph Extension E, Last> 2296 charset[i++] = c++; 2297// 2CEB0..2EBEF; CJK Unified Ideographs Extension F 2298 c = 0x2ceb0; // from <CJK Ideograph Extension F, First> 2299 while (c <= 0x2ebe0) // ..to <CJK Ideograph Extension F, Last> 2300 charset[i++] = c++; 2301// 2F800..2FA1F; CJK Compatibility Ideographs Supplement 2302 c = 0x2f800; // from CJK COMPATIBILITY IDEOGRAPH-2F800 2303 while (c <= 0x2fa1d) // ..to CJK COMPATIBILITY IDEOGRAPH-2FA1D 2304 charset[i++] = c++; 2305// E0000..E007F; Tags 2306 c = 0xe0020; // from TAG SPACE 2307 while (c <= 0xe007f) // ..to CANCEL TAG 2308 charset[i++] = c++; 2309// E0100..E01EF; Variation Selectors Supplement 2310// F0000..FFFFF; Supplementary Private Use Area-A 2311// 100000..10FFFF; Supplementary Private Use Area-B 2312 2313/* Zero-terminate it, and cache the first character */ 2314 charset[i] = 0; 2315 c0 = charset[0]; 2316 2317 last = minlength - 1; 2318 i = 0; 2319 while (i <= last) { 2320 id[i] = 0; 2321 word[i++] = c0; 2322 } 2323 lastid = -1; 2324 word[i] = 0; 2325 2326/* We must init word with dummy data, it doesn't get set until filter() */ 2327 word = 1; 2328} 2329 2330void generate() 2331{ 2332 int i; 2333 2334/* Handle the typical case specially */ 2335 if (word[last] = charset[++lastid]) return; 2336 2337 lastid = 0; 2338 word[i = last] = c0; 2339 while (i--) { // Have a preceding position? 2340 if (word[i] = charset[++id[i]]) return; 2341 id[i] = 0; 2342 word[i] = c0; 2343 } 2344 2345 if (++last < maxlength) { // Next length? 2346 id[last] = lastid = 0; 2347 word[last] = c0; 2348 word[last + 1] = 0; 2349 } else // We're done 2350 word = 0; 2351} 2352 2353void restore() 2354{ 2355 int i, c; 2356 2357 /* Calculate the current length and infer the character indices */ 2358 last = 0; 2359 while (c = word[last]) { 2360 i = 0; while (charset[i] != c && charset[i]) i++; 2361 if (!charset[i]) i = 0; // Not found 2362 id[last++] = i; 2363 } 2364 lastid = id[--last]; 2365} 2366