1package unidata 2 3const ( 4 WidthAmbiguous = uint8(iota) // Ambiguous, A 5 WidthFullWidth // FullWidth, F 6 WidthHalfWidth // Halfwidth, H 7 WidthNarrow // Narrow, N 8 WidthNeutral // Neutral (Not East Asian), Na 9 WidthWide // Wide, W 10) 11 12var WidthNames = map[uint8]string{ 13 WidthAmbiguous: "ambiguous", 14 WidthFullWidth: "full", 15 WidthHalfWidth: "half", 16 WidthNarrow: "narrow", 17 WidthNeutral: "neutral", 18 WidthWide: "wide", 19} 20 21// http://www.unicode.org/reports/tr44/#General_Category_Values 22const ( 23 CatUnknown = uint8(iota) 24 CatUppercaseLetter // Lu – an uppercase letter 25 CatLowercaseLetter // Ll – a lowercase letter 26 CatTitlecaseLetter // Lt – a digraphic character, with first part uppercase 27 CatCasedLetter // LC – Lu | Ll | Lt 28 CatModifierLetter // Lm – a modifier letter 29 CatOtherLetter // Lo – other letters, including syllables and ideographs 30 CatLetter // L – Lu | Ll | Lt | Lm | Lo 31 CatNonspacingMark // Mn – a nonspacing combining mark (zero advance width) 32 CatSpacingMark // Mc – a spacing combining mark (positive advance width) 33 CatEnclosingMark // Me – an enclosing combining mark 34 CatMark // M – Mn | Mc | Me 35 CatDecimalNumber // Nd – a decimal digit 36 CatLetterNumber // Nl – a letterlike numeric character 37 CatOtherNumber // No – a numeric character of other type 38 CatNumber // N – Nd | Nl | No 39 CatConnectorPunctuation // Pc – a connecting punctuation mark, like a tie 40 CatDashPunctuation // Pd – a dash or hyphen punctuation mark 41 CatOpenPunctuation // Ps – an opening punctuation mark (of a pair) 42 CatClosePunctuation // Pe – a closing punctuation mark (of a pair) 43 CatInitialPunctuation // Pi – an initial quotation mark 44 CatFinalPunctuation // Pf – a final quotation mark 45 CatOtherPunctuation // Po – a punctuation mark of other type 46 CatPunctuation // P – Pc | Pd | Ps | Pe | Pi | Pf | Po 47 CatMathSymbol // Sm – a symbol of mathematical use 48 CatCurrencySymbol // Sc – a currency sign 49 CatModifierSymbol // Sk – a non-letterlike modifier symbol 50 CatOtherSymbol // So – a symbol of other type 51 CatSymbol // S – Sm | Sc | Sk | So 52 CatSpaceSeparator // Zs – a space character (of various non-zero widths) 53 CatLineSeparator // Zl – U+2028 LINE SEPARATOR only 54 CatParagraphSeparator // Zp – U+2029 PARAGRAPH SEPARATOR only 55 CatSeparator // Z – Zs | Zl | Zp 56 CatControl // Cc – a C0 or C1 control code 57 CatFormat // Cf – a format control character 58 CatSurrogate // Cs – a surrogate code point 59 CatPrivateUse // Co – a private-use character 60 CatUnassigned // Cn – a reserved unassigned code point or a noncharacter 61 CatOther // C – Cc | Cf | Cs | Co | Cn 62) 63 64var Planes = map[string][2]rune{ 65 "Basic Multilingual Plane": {0, 0xFFFF}, 66 "Supplementary Multilingual Plane": {0x10000, 0x1FFFF}, 67 "Supplementary Ideographic Plane": {0x20000, 0x2FFFF}, 68 "Tertiary Ideographic Plane": {0x30000, 0x3FFFF}, 69 "Unassigned": {0x40000, 0xDFFFF}, 70 "Supplementary Special-purpose Plane": {0xE0000, 0xEFFFF}, 71 "Supplementary Private Use Area planes": {0xF0000, 0x10FFFF}, 72} 73 74// TODO: generate this from the data file: 75// https://www.unicode.org/Public/UCD/latest/ucd/Blocks.txt 76var ( 77 Blocks = map[string][2]rune{ 78 "Basic Latin": {0x0000, 0x007F}, 79 "Latin-1 Supplement": {0x0080, 0x00FF}, 80 "Latin Extended-A": {0x0100, 0x017F}, 81 "Latin Extended-B": {0x0180, 0x024F}, 82 "IPA Extensions": {0x0250, 0x02AF}, 83 "Spacing Modifier Letters": {0x02B0, 0x02FF}, 84 "Combining Diacritical Marks": {0x0300, 0x036F}, 85 "Greek and Coptic": {0x0370, 0x03FF}, 86 "Cyrillic": {0x0400, 0x04FF}, 87 "Cyrillic Supplement": {0x0500, 0x052F}, 88 "Armenian": {0x0530, 0x058F}, 89 "Hebrew": {0x0590, 0x05FF}, 90 "Arabic": {0x0600, 0x06FF}, 91 "Syriac": {0x0700, 0x074F}, 92 "Arabic Supplement": {0x0750, 0x077F}, 93 "Thaana": {0x0780, 0x07BF}, 94 "NKo": {0x07C0, 0x07FF}, 95 "Samaritan": {0x0800, 0x083F}, 96 "Mandaic": {0x0840, 0x085F}, 97 "Syriac Supplement": {0x0860, 0x086F}, 98 "Arabic Extended-A": {0x08A0, 0x08FF}, 99 "Devanagari": {0x0900, 0x097F}, 100 "Bengali": {0x0980, 0x09FF}, 101 "Gurmukhi": {0x0A00, 0x0A7F}, 102 "Gujarati": {0x0A80, 0x0AFF}, 103 "Oriya": {0x0B00, 0x0B7F}, 104 "Tamil": {0x0B80, 0x0BFF}, 105 "Telugu": {0x0C00, 0x0C7F}, 106 "Kannada": {0x0C80, 0x0CFF}, 107 "Malayalam": {0x0D00, 0x0D7F}, 108 "Sinhala": {0x0D80, 0x0DFF}, 109 "Thai": {0x0E00, 0x0E7F}, 110 "Lao": {0x0E80, 0x0EFF}, 111 "Tibetan": {0x0F00, 0x0FFF}, 112 "Myanmar": {0x1000, 0x109F}, 113 "Georgian": {0x10A0, 0x10FF}, 114 "Hangul Jamo": {0x1100, 0x11FF}, 115 "Ethiopic": {0x1200, 0x137F}, 116 "Ethiopic Supplement": {0x1380, 0x139F}, 117 "Cherokee": {0x13A0, 0x13FF}, 118 "Unified Canadian Aboriginal Syllabics": {0x1400, 0x167F}, 119 "Ogham": {0x1680, 0x169F}, 120 "Runic": {0x16A0, 0x16FF}, 121 "Tagalog": {0x1700, 0x171F}, 122 "Hanunoo": {0x1720, 0x173F}, 123 "Buhid": {0x1740, 0x175F}, 124 "Tagbanwa": {0x1760, 0x177F}, 125 "Khmer": {0x1780, 0x17FF}, 126 "Mongolian": {0x1800, 0x18AF}, 127 "Unified Canadian Aboriginal Syllabics Extended": {0x18B0, 0x18FF}, 128 "Limbu": {0x1900, 0x194F}, 129 "Tai Le": {0x1950, 0x197F}, 130 "New Tai Lue": {0x1980, 0x19DF}, 131 "Khmer Symbols": {0x19E0, 0x19FF}, 132 "Buginese": {0x1A00, 0x1A1F}, 133 "Tai Tham": {0x1A20, 0x1AAF}, 134 "Combining Diacritical Marks Extended": {0x1AB0, 0x1AFF}, 135 "Balinese": {0x1B00, 0x1B7F}, 136 "Sundanese": {0x1B80, 0x1BBF}, 137 "Batak": {0x1BC0, 0x1BFF}, 138 "Lepcha": {0x1C00, 0x1C4F}, 139 "Ol Chiki": {0x1C50, 0x1C7F}, 140 "Cyrillic Extended-C": {0x1C80, 0x1C8F}, 141 "Georgian Extended": {0x1C90, 0x1CBF}, 142 "Sundanese Supplement": {0x1CC0, 0x1CCF}, 143 "Vedic Extensions": {0x1CD0, 0x1CFF}, 144 "Phonetic Extensions": {0x1D00, 0x1D7F}, 145 "Phonetic Extensions Supplement": {0x1D80, 0x1DBF}, 146 "Combining Diacritical Marks Supplement": {0x1DC0, 0x1DFF}, 147 "Latin Extended Additional": {0x1E00, 0x1EFF}, 148 "Greek Extended": {0x1F00, 0x1FFF}, 149 "General Punctuation": {0x2000, 0x206F}, 150 "Superscripts and Subscripts": {0x2070, 0x209F}, 151 "Currency Symbols": {0x20A0, 0x20CF}, 152 "Combining Diacritical Marks for Symbols": {0x20D0, 0x20FF}, 153 "Letterlike Symbols": {0x2100, 0x214F}, 154 "Number Forms": {0x2150, 0x218F}, 155 "Arrows": {0x2190, 0x21FF}, 156 "Mathematical Operators": {0x2200, 0x22FF}, 157 "Miscellaneous Technical": {0x2300, 0x23FF}, 158 "Control Pictures": {0x2400, 0x243F}, 159 "Optical Character Recognition": {0x2440, 0x245F}, 160 "Enclosed Alphanumerics": {0x2460, 0x24FF}, 161 "Box Drawing": {0x2500, 0x257F}, 162 "Block Elements": {0x2580, 0x259F}, 163 "Geometric Shapes": {0x25A0, 0x25FF}, 164 "Miscellaneous Symbols": {0x2600, 0x26FF}, 165 "Dingbats": {0x2700, 0x27BF}, 166 "Miscellaneous Mathematical Symbols-A": {0x27C0, 0x27EF}, 167 "Supplemental Arrows-A": {0x27F0, 0x27FF}, 168 "Braille Patterns": {0x2800, 0x28FF}, 169 "Supplemental Arrows-B": {0x2900, 0x297F}, 170 "Miscellaneous Mathematical Symbols-B": {0x2980, 0x29FF}, 171 "Supplemental Mathematical Operators": {0x2A00, 0x2AFF}, 172 "Miscellaneous Symbols and Arrows": {0x2B00, 0x2BFF}, 173 "Glagolitic": {0x2C00, 0x2C5F}, 174 "Latin Extended-C": {0x2C60, 0x2C7F}, 175 "Coptic": {0x2C80, 0x2CFF}, 176 "Georgian Supplement": {0x2D00, 0x2D2F}, 177 "Tifinagh": {0x2D30, 0x2D7F}, 178 "Ethiopic Extended": {0x2D80, 0x2DDF}, 179 "Cyrillic Extended-A": {0x2DE0, 0x2DFF}, 180 "Supplemental Punctuation": {0x2E00, 0x2E7F}, 181 "CJK Radicals Supplement": {0x2E80, 0x2EFF}, 182 "Kangxi Radicals": {0x2F00, 0x2FDF}, 183 "Ideographic Description Characters": {0x2FF0, 0x2FFF}, 184 "CJK Symbols and Punctuation": {0x3000, 0x303F}, 185 "Hiragana": {0x3040, 0x309F}, 186 "Katakana": {0x30A0, 0x30FF}, 187 "Bopomofo": {0x3100, 0x312F}, 188 "Hangul Compatibility Jamo": {0x3130, 0x318F}, 189 "Kanbun": {0x3190, 0x319F}, 190 "Bopomofo Extended": {0x31A0, 0x31BF}, 191 "CJK Strokes": {0x31C0, 0x31EF}, 192 "Katakana Phonetic Extensions": {0x31F0, 0x31FF}, 193 "Enclosed CJK Letters and Months": {0x3200, 0x32FF}, 194 "CJK Compatibility": {0x3300, 0x33FF}, 195 "CJK Unified Ideographs Extension A": {0x3400, 0x4DBF}, 196 "Yijing Hexagram Symbols": {0x4DC0, 0x4DFF}, 197 "CJK Unified Ideographs": {0x4E00, 0x9FFF}, 198 "Yi Syllables": {0xA000, 0xA48F}, 199 "Yi Radicals": {0xA490, 0xA4CF}, 200 "Lisu": {0xA4D0, 0xA4FF}, 201 "Vai": {0xA500, 0xA63F}, 202 "Cyrillic Extended-B": {0xA640, 0xA69F}, 203 "Bamum": {0xA6A0, 0xA6FF}, 204 "Modifier Tone Letters": {0xA700, 0xA71F}, 205 "Latin Extended-D": {0xA720, 0xA7FF}, 206 "Syloti Nagri": {0xA800, 0xA82F}, 207 "Common Indic Number Forms": {0xA830, 0xA83F}, 208 "Phags-pa": {0xA840, 0xA87F}, 209 "Saurashtra": {0xA880, 0xA8DF}, 210 "Devanagari Extended": {0xA8E0, 0xA8FF}, 211 "Kayah Li": {0xA900, 0xA92F}, 212 "Rejang": {0xA930, 0xA95F}, 213 "Hangul Jamo Extended-A": {0xA960, 0xA97F}, 214 "Javanese": {0xA980, 0xA9DF}, 215 "Myanmar Extended-B": {0xA9E0, 0xA9FF}, 216 "Cham": {0xAA00, 0xAA5F}, 217 "Myanmar Extended-A": {0xAA60, 0xAA7F}, 218 "Tai Viet": {0xAA80, 0xAADF}, 219 "Meetei Mayek Extensions": {0xAAE0, 0xAAFF}, 220 "Ethiopic Extended-A": {0xAB00, 0xAB2F}, 221 "Latin Extended-E": {0xAB30, 0xAB6F}, 222 "Cherokee Supplement": {0xAB70, 0xABBF}, 223 "Meetei Mayek": {0xABC0, 0xABFF}, 224 "Hangul Syllables": {0xAC00, 0xD7AF}, 225 "Hangul Jamo Extended-B": {0xD7B0, 0xD7FF}, 226 "High Surrogates": {0xD800, 0xDB7F}, 227 "High Private Use Surrogates": {0xDB80, 0xDBFF}, 228 "Low Surrogates": {0xDC00, 0xDFFF}, 229 "Private Use Area": {0xE000, 0xF8FF}, 230 "CJK Compatibility Ideographs": {0xF900, 0xFAFF}, 231 "Alphabetic Presentation Forms": {0xFB00, 0xFB4F}, 232 "Arabic Presentation Forms-A": {0xFB50, 0xFDFF}, 233 "Variation Selectors": {0xFE00, 0xFE0F}, 234 "Vertical Forms": {0xFE10, 0xFE1F}, 235 "Combining Half Marks": {0xFE20, 0xFE2F}, 236 "CJK Compatibility Forms": {0xFE30, 0xFE4F}, 237 "Small Form Variants": {0xFE50, 0xFE6F}, 238 "Arabic Presentation Forms-B": {0xFE70, 0xFEFF}, 239 "Halfwidth and Fullwidth Forms": {0xFF00, 0xFFEF}, 240 "Specials": {0xFFF0, 0xFFFF}, 241 "Linear B Syllabary": {0x10000, 0x1007F}, 242 "Linear B Ideograms": {0x10080, 0x100FF}, 243 "Aegean Numbers": {0x10100, 0x1013F}, 244 "Ancient Greek Numbers": {0x10140, 0x1018F}, 245 "Ancient Symbols": {0x10190, 0x101CF}, 246 "Phaistos Disc": {0x101D0, 0x101FF}, 247 "Lycian": {0x10280, 0x1029F}, 248 "Carian": {0x102A0, 0x102DF}, 249 "Coptic Epact Numbers": {0x102E0, 0x102FF}, 250 "Old Italic": {0x10300, 0x1032F}, 251 "Gothic": {0x10330, 0x1034F}, 252 "Old Permic": {0x10350, 0x1037F}, 253 "Ugaritic": {0x10380, 0x1039F}, 254 "Old Persian": {0x103A0, 0x103DF}, 255 "Deseret": {0x10400, 0x1044F}, 256 "Shavian": {0x10450, 0x1047F}, 257 "Osmanya": {0x10480, 0x104AF}, 258 "Osage": {0x104B0, 0x104FF}, 259 "Elbasan": {0x10500, 0x1052F}, 260 "Caucasian Albanian": {0x10530, 0x1056F}, 261 "Linear A": {0x10600, 0x1077F}, 262 "Cypriot Syllabary": {0x10800, 0x1083F}, 263 "Imperial Aramaic": {0x10840, 0x1085F}, 264 "Palmyrene": {0x10860, 0x1087F}, 265 "Nabataean": {0x10880, 0x108AF}, 266 "Hatran": {0x108E0, 0x108FF}, 267 "Phoenician": {0x10900, 0x1091F}, 268 "Lydian": {0x10920, 0x1093F}, 269 "Meroitic Hieroglyphs": {0x10980, 0x1099F}, 270 "Meroitic Cursive": {0x109A0, 0x109FF}, 271 "Kharoshthi": {0x10A00, 0x10A5F}, 272 "Old South Arabian": {0x10A60, 0x10A7F}, 273 "Old North Arabian": {0x10A80, 0x10A9F}, 274 "Manichaean": {0x10AC0, 0x10AFF}, 275 "Avestan": {0x10B00, 0x10B3F}, 276 "Inscriptional Parthian": {0x10B40, 0x10B5F}, 277 "Inscriptional Pahlavi": {0x10B60, 0x10B7F}, 278 "Psalter Pahlavi": {0x10B80, 0x10BAF}, 279 "Old Turkic": {0x10C00, 0x10C4F}, 280 "Old Hungarian": {0x10C80, 0x10CFF}, 281 "Hanifi Rohingya": {0x10D00, 0x10D3F}, 282 "Rumi Numeral Symbols": {0x10E60, 0x10E7F}, 283 "Old Sogdian": {0x10F00, 0x10F2F}, 284 "Sogdian": {0x10F30, 0x10F6F}, 285 "Elymaic": {0x10FE0, 0x10FFF}, 286 "Brahmi": {0x11000, 0x1107F}, 287 "Kaithi": {0x11080, 0x110CF}, 288 "Sora Sompeng": {0x110D0, 0x110FF}, 289 "Chakma": {0x11100, 0x1114F}, 290 "Mahajani": {0x11150, 0x1117F}, 291 "Sharada": {0x11180, 0x111DF}, 292 "Sinhala Archaic Numbers": {0x111E0, 0x111FF}, 293 "Khojki": {0x11200, 0x1124F}, 294 "Multani": {0x11280, 0x112AF}, 295 "Khudawadi": {0x112B0, 0x112FF}, 296 "Grantha": {0x11300, 0x1137F}, 297 "Newa": {0x11400, 0x1147F}, 298 "Tirhuta": {0x11480, 0x114DF}, 299 "Siddham": {0x11580, 0x115FF}, 300 "Modi": {0x11600, 0x1165F}, 301 "Mongolian Supplement": {0x11660, 0x1167F}, 302 "Takri": {0x11680, 0x116CF}, 303 "Ahom": {0x11700, 0x1173F}, 304 "Dogra": {0x11800, 0x1184F}, 305 "Warang Citi": {0x118A0, 0x118FF}, 306 "Nandinagari": {0x119A0, 0x119FF}, 307 "Zanabazar Square": {0x11A00, 0x11A4F}, 308 "Soyombo": {0x11A50, 0x11AAF}, 309 "Pau Cin Hau": {0x11AC0, 0x11AFF}, 310 "Bhaiksuki": {0x11C00, 0x11C6F}, 311 "Marchen": {0x11C70, 0x11CBF}, 312 "Masaram Gondi": {0x11D00, 0x11D5F}, 313 "Gunjala Gondi": {0x11D60, 0x11DAF}, 314 "Makasar": {0x11EE0, 0x11EFF}, 315 "Tamil Supplement": {0x11FC0, 0x11FFF}, 316 "Cuneiform": {0x12000, 0x123FF}, 317 "Cuneiform Numbers and Punctuation": {0x12400, 0x1247F}, 318 "Early Dynastic Cuneiform": {0x12480, 0x1254F}, 319 "Egyptian Hieroglyphs": {0x13000, 0x1342F}, 320 "Egyptian Hieroglyph Format Controls": {0x13430, 0x1343F}, 321 "Anatolian Hieroglyphs": {0x14400, 0x1467F}, 322 "Bamum Supplement": {0x16800, 0x16A3F}, 323 "Mro": {0x16A40, 0x16A6F}, 324 "Bassa Vah": {0x16AD0, 0x16AFF}, 325 "Pahawh Hmong": {0x16B00, 0x16B8F}, 326 "Medefaidrin": {0x16E40, 0x16E9F}, 327 "Miao": {0x16F00, 0x16F9F}, 328 "Ideographic Symbols and Punctuation": {0x16FE0, 0x16FFF}, 329 "Tangut": {0x17000, 0x187FF}, 330 "Tangut Components": {0x18800, 0x18AFF}, 331 "Kana Supplement": {0x1B000, 0x1B0FF}, 332 "Kana Extended-A": {0x1B100, 0x1B12F}, 333 "Small Kana Extension": {0x1B130, 0x1B16F}, 334 "Nushu": {0x1B170, 0x1B2FF}, 335 "Duployan": {0x1BC00, 0x1BC9F}, 336 "Shorthand Format Controls": {0x1BCA0, 0x1BCAF}, 337 "Byzantine Musical Symbols": {0x1D000, 0x1D0FF}, 338 "Musical Symbols": {0x1D100, 0x1D1FF}, 339 "Ancient Greek Musical Notation": {0x1D200, 0x1D24F}, 340 "Mayan Numerals": {0x1D2E0, 0x1D2FF}, 341 "Tai Xuan Jing Symbols": {0x1D300, 0x1D35F}, 342 "Counting Rod Numerals": {0x1D360, 0x1D37F}, 343 "Mathematical Alphanumeric Symbols": {0x1D400, 0x1D7FF}, 344 "Sutton SignWriting": {0x1D800, 0x1DAAF}, 345 "Glagolitic Supplement": {0x1E000, 0x1E02F}, 346 "Nyiakeng Puachue Hmong": {0x1E100, 0x1E14F}, 347 "Wancho": {0x1E2C0, 0x1E2FF}, 348 "Mende Kikakui": {0x1E800, 0x1E8DF}, 349 "Adlam": {0x1E900, 0x1E95F}, 350 "Indic Siyaq Numbers": {0x1EC70, 0x1ECBF}, 351 "Ottoman Siyaq Numbers": {0x1ED00, 0x1ED4F}, 352 "Arabic Mathematical Alphabetic Symbols": {0x1EE00, 0x1EEFF}, 353 "Mahjong Tiles": {0x1F000, 0x1F02F}, 354 "Domino Tiles": {0x1F030, 0x1F09F}, 355 "Playing Cards": {0x1F0A0, 0x1F0FF}, 356 "Enclosed Alphanumeric Supplement": {0x1F100, 0x1F1FF}, 357 "Enclosed Ideographic Supplement": {0x1F200, 0x1F2FF}, 358 "Miscellaneous Symbols and Pictographs": {0x1F300, 0x1F5FF}, 359 "Emoticons": {0x1F600, 0x1F64F}, 360 "Ornamental Dingbats": {0x1F650, 0x1F67F}, 361 "Transport and Map Symbols": {0x1F680, 0x1F6FF}, 362 "Alchemical Symbols": {0x1F700, 0x1F77F}, 363 "Geometric Shapes Extended": {0x1F780, 0x1F7FF}, 364 "Supplemental Arrows-C": {0x1F800, 0x1F8FF}, 365 "Supplemental Symbols and Pictographs": {0x1F900, 0x1F9FF}, 366 "Chess Symbols": {0x1FA00, 0x1FA6F}, 367 "Symbols and Pictographs Extended-A": {0x1FA70, 0x1FAFF}, 368 "CJK Unified Ideographs Extension B": {0x20000, 0x2A6DF}, 369 "CJK Unified Ideographs Extension C": {0x2A700, 0x2B73F}, 370 "CJK Unified Ideographs Extension D": {0x2B740, 0x2B81F}, 371 "CJK Unified Ideographs Extension E": {0x2B820, 0x2CEAF}, 372 "CJK Unified Ideographs Extension F": {0x2CEB0, 0x2EBEF}, 373 "CJK Compatibility Ideographs Supplement": {0x2F800, 0x2FA1F}, 374 "Tags": {0xE0000, 0xE007F}, 375 "Variation Selectors Supplement": {0xE0100, 0xE01EF}, 376 "Supplementary Private Use Area-A": {0xF0000, 0xFFFFF}, 377 "Supplementary Private Use Area-B": {0x100000, 0x10FFFF}, 378 } 379 380 Blockmap = make(map[string]string) 381) 382 383func init() { 384 for k := range Blocks { 385 Blockmap[CanonicalCategory(k)] = k 386 } 387} 388 389var ( 390 Catmap = map[string]uint8{ 391 // Short-hand. 392 "Lu": CatUppercaseLetter, 393 "Ll": CatLowercaseLetter, 394 "Lt": CatTitlecaseLetter, 395 "LC": CatCasedLetter, 396 "Lm": CatModifierLetter, 397 "Lo": CatOtherLetter, 398 "L": CatLetter, 399 "Mn": CatNonspacingMark, 400 "Mc": CatSpacingMark, 401 "Me": CatEnclosingMark, 402 "M": CatMark, 403 "Nd": CatDecimalNumber, 404 "Nl": CatLetterNumber, 405 "No": CatOtherNumber, 406 "N": CatNumber, 407 "Pc": CatConnectorPunctuation, 408 "Pd": CatDashPunctuation, 409 "Ps": CatOpenPunctuation, 410 "Pe": CatClosePunctuation, 411 "Pi": CatInitialPunctuation, 412 "Pf": CatFinalPunctuation, 413 "Po": CatOtherPunctuation, 414 "P": CatPunctuation, 415 "Sm": CatMathSymbol, 416 "Sc": CatCurrencySymbol, 417 "Sk": CatModifierSymbol, 418 "So": CatOtherSymbol, 419 "S": CatSymbol, 420 "Zs": CatSpaceSeparator, 421 "Zl": CatLineSeparator, 422 "Zp": CatParagraphSeparator, 423 "Z": CatSeparator, 424 "Cc": CatControl, 425 "Cf": CatFormat, 426 "Cs": CatSurrogate, 427 "Co": CatPrivateUse, 428 "Cn": CatUnassigned, 429 "C": CatOther, 430 431 // Lower-case shorthand. 432 "lu": CatUppercaseLetter, 433 "ll": CatLowercaseLetter, 434 "lt": CatTitlecaseLetter, 435 "lc": CatCasedLetter, 436 "lm": CatModifierLetter, 437 "lo": CatOtherLetter, 438 "l": CatLetter, 439 "mn": CatNonspacingMark, 440 "mc": CatSpacingMark, 441 "me": CatEnclosingMark, 442 "m": CatMark, 443 "nd": CatDecimalNumber, 444 "nl": CatLetterNumber, 445 "no": CatOtherNumber, 446 "n": CatNumber, 447 "pc": CatConnectorPunctuation, 448 "pd": CatDashPunctuation, 449 "ps": CatOpenPunctuation, 450 "pe": CatClosePunctuation, 451 "pi": CatInitialPunctuation, 452 "pf": CatFinalPunctuation, 453 "po": CatOtherPunctuation, 454 "p": CatPunctuation, 455 "sm": CatMathSymbol, 456 "sc": CatCurrencySymbol, 457 "sk": CatModifierSymbol, 458 "so": CatOtherSymbol, 459 "s": CatSymbol, 460 "zs": CatSpaceSeparator, 461 "zl": CatLineSeparator, 462 "zp": CatParagraphSeparator, 463 "z": CatSeparator, 464 "cc": CatControl, 465 "cf": CatFormat, 466 "cs": CatSurrogate, 467 "co": CatPrivateUse, 468 "cn": CatUnassigned, 469 "c": CatOther, 470 471 // Full names, underscores. 472 "uppercase_letter": CatUppercaseLetter, 473 "lowercase_letter": CatLowercaseLetter, 474 "titlecase_letter": CatTitlecaseLetter, 475 "cased_letter": CatCasedLetter, 476 "modifier_letter": CatModifierLetter, 477 "other_letter": CatOtherLetter, 478 "letter": CatLetter, 479 "nonspacing_mark": CatNonspacingMark, 480 "spacing_mark": CatSpacingMark, 481 "enclosing_mark": CatEnclosingMark, 482 "mark": CatMark, 483 "decimal_number": CatDecimalNumber, 484 "letter_number": CatLetterNumber, 485 "other_number": CatOtherNumber, 486 "number": CatNumber, 487 "connector_punctuation": CatConnectorPunctuation, 488 "dash_punctuation": CatDashPunctuation, 489 "open_punctuation": CatOpenPunctuation, 490 "close_punctuation": CatClosePunctuation, 491 "initial_punctuation": CatInitialPunctuation, 492 "final_punctuation": CatFinalPunctuation, 493 "other_punctuation": CatOtherPunctuation, 494 "punctuation": CatPunctuation, 495 "math_symbol": CatMathSymbol, 496 "currency_symbol": CatCurrencySymbol, 497 "modifier_symbol": CatModifierSymbol, 498 "other_symbol": CatOtherSymbol, 499 "symbol": CatSymbol, 500 "space_separator": CatSpaceSeparator, 501 "line_separator": CatLineSeparator, 502 "paragraph_separator": CatParagraphSeparator, 503 "separator": CatSeparator, 504 "control": CatControl, 505 "format": CatFormat, 506 "surrogate": CatSurrogate, 507 "private_use": CatPrivateUse, 508 "unassigned": CatUnassigned, 509 "other": CatOther, 510 511 // Without underscore. 512 "uppercaseletter": CatUppercaseLetter, 513 "lowercaseletter": CatLowercaseLetter, 514 "titlecaseletter": CatTitlecaseLetter, 515 "casedletter": CatCasedLetter, 516 "modifierletter": CatModifierLetter, 517 "otherletter": CatOtherLetter, 518 "nonspacingmark": CatNonspacingMark, 519 "spacingmark": CatSpacingMark, 520 "enclosingmark": CatEnclosingMark, 521 "decimalnumber": CatDecimalNumber, 522 "letternumber": CatLetterNumber, 523 "othernumber": CatOtherNumber, 524 "connectorpunctuation": CatConnectorPunctuation, 525 "dashpunctuation": CatDashPunctuation, 526 "openpunctuation": CatOpenPunctuation, 527 "closepunctuation": CatClosePunctuation, 528 "initialpunctuation": CatInitialPunctuation, 529 "finalpunctuation": CatFinalPunctuation, 530 "otherpunctuation": CatOtherPunctuation, 531 "mathsymbol": CatMathSymbol, 532 "currencysymbol": CatCurrencySymbol, 533 "modifiersymbol": CatModifierSymbol, 534 "othersymbol": CatOtherSymbol, 535 "spaceseparator": CatSpaceSeparator, 536 "lineseparator": CatLineSeparator, 537 "paragraphseparator": CatParagraphSeparator, 538 "privateuse": CatPrivateUse, 539 } 540 541 Catnames = map[uint8]string{ 542 CatUppercaseLetter: "Uppercase_Letter", 543 CatLowercaseLetter: "Lowercase_Letter", 544 CatTitlecaseLetter: "Titlecase_Letter", 545 CatCasedLetter: "Cased_Letter", 546 CatModifierLetter: "Modifier_Letter", 547 CatOtherLetter: "Other_Letter", 548 CatLetter: "Letter", 549 CatNonspacingMark: "Nonspacing_Mark", 550 CatSpacingMark: "Spacing_Mark", 551 CatEnclosingMark: "Enclosing_Mark", 552 CatMark: "Mark", 553 CatDecimalNumber: "Decimal_Number", 554 CatLetterNumber: "Letter_Number", 555 CatOtherNumber: "Other_Number", 556 CatNumber: "Number", 557 CatConnectorPunctuation: "Connector_Punctuation", 558 CatDashPunctuation: "Dash_Punctuation", 559 CatOpenPunctuation: "Open_Punctuation", 560 CatClosePunctuation: "Close_Punctuation", 561 CatInitialPunctuation: "Initial_Punctuation", 562 CatFinalPunctuation: "Final_Punctuation", 563 CatOtherPunctuation: "Other_Punctuation", 564 CatPunctuation: "Punctuation", 565 CatMathSymbol: "Math_Symbol", 566 CatCurrencySymbol: "Currency_Symbol", 567 CatModifierSymbol: "Modifier_Symbol", 568 CatOtherSymbol: "Other_Symbol", 569 CatSymbol: "Symbol", 570 CatSpaceSeparator: "Space_Separator", 571 CatLineSeparator: "Line_Separator", 572 CatParagraphSeparator: "Paragraph_Separator", 573 CatSeparator: "Separator", 574 CatControl: "Control", 575 CatFormat: "Format", 576 CatSurrogate: "Surrogate", 577 CatPrivateUse: "Private_Use", 578 CatUnassigned: "Unassigned", 579 CatOther: "Other", 580 } 581) 582 583var ( 584 ranges = [][]rune{ 585 {0x3400, 0x4DB5}, 586 {0x4E00, 0x9FEF}, 587 {0xAC00, 0xD7A3}, 588 {0xD800, 0xDB7F}, 589 {0xDB80, 0xDBFF}, 590 {0xDC00, 0xDFFF}, 591 {0xE000, 0xF8FF}, 592 {0x17000, 0x187F1}, 593 {0x20000, 0x2A6D6}, 594 {0x2A700, 0x2B734}, 595 {0x2B740, 0x2B81D}, 596 {0x2B820, 0x2CEA1}, 597 {0x2CEB0, 0x2EBE0}, 598 {0xF0000, 0xFFFFD}, 599 {0x100000, 0x10FFFD}, 600 } 601 602 rangeNames = []string{ 603 "<CJK Ideograph Extension A>", 604 "<CJK Ideograph>", 605 "<Hangul Syllable>", 606 "<Non Private Use High Surrogate>", 607 "<Private Use High Surrogate>", 608 "<Low Surrogate>", 609 "<Private Use>", 610 "<Tangut Ideograph>", 611 "<CJK Ideograph Extension B>", 612 "<CJK Ideograph Extension C>", 613 "<CJK Ideograph Extension D>", 614 "<CJK Ideograph Extension E>", 615 "<CJK Ideograph Extension F>", 616 "<Plane 15 Private Use>", 617 "<Plane 16 Private Use>", 618 } 619) 620