1package unidata
2
3const (
4	WidthAmbiguous = uint8(iota) // Ambiguous, A
5	WidthFullWidth               // FullWidth, F
6	WidthHalfWidth               // Halfwidth, H
7	WidthNarrow                  // Narrow, N
8	WidthNeutral                 // Neutral (Not East Asian), Na
9	WidthWide                    // Wide, W
10)
11
12var WidthNames = map[uint8]string{
13	WidthAmbiguous: "ambiguous",
14	WidthFullWidth: "full",
15	WidthHalfWidth: "half",
16	WidthNarrow:    "narrow",
17	WidthNeutral:   "neutral",
18	WidthWide:      "wide",
19}
20
21// http://www.unicode.org/reports/tr44/#General_Category_Values
22const (
23	CatUnknown              = uint8(iota)
24	CatUppercaseLetter      // Lu – an uppercase letter
25	CatLowercaseLetter      // Ll – a lowercase letter
26	CatTitlecaseLetter      // Lt – a digraphic character, with first part uppercase
27	CatCasedLetter          // LC – Lu | Ll | Lt
28	CatModifierLetter       // Lm – a modifier letter
29	CatOtherLetter          // Lo – other letters, including syllables and ideographs
30	CatLetter               // L  – Lu | Ll | Lt | Lm | Lo
31	CatNonspacingMark       // Mn – a nonspacing combining mark (zero advance width)
32	CatSpacingMark          // Mc – a spacing combining mark (positive advance width)
33	CatEnclosingMark        // Me – an enclosing combining mark
34	CatMark                 // M  – Mn | Mc | Me
35	CatDecimalNumber        // Nd – a decimal digit
36	CatLetterNumber         // Nl – a letterlike numeric character
37	CatOtherNumber          // No – a numeric character of other type
38	CatNumber               // N  – Nd | Nl | No
39	CatConnectorPunctuation // Pc – a connecting punctuation mark, like a tie
40	CatDashPunctuation      // Pd – a dash or hyphen punctuation mark
41	CatOpenPunctuation      // Ps – an opening punctuation mark (of a pair)
42	CatClosePunctuation     // Pe – a closing punctuation mark (of a pair)
43	CatInitialPunctuation   // Pi – an initial quotation mark
44	CatFinalPunctuation     // Pf – a final quotation mark
45	CatOtherPunctuation     // Po – a punctuation mark of other type
46	CatPunctuation          // P  – Pc | Pd | Ps | Pe | Pi | Pf | Po
47	CatMathSymbol           // Sm – a symbol of mathematical use
48	CatCurrencySymbol       // Sc – a currency sign
49	CatModifierSymbol       // Sk – a non-letterlike modifier symbol
50	CatOtherSymbol          // So – a symbol of other type
51	CatSymbol               // S  – Sm | Sc | Sk | So
52	CatSpaceSeparator       // Zs – a space character (of various non-zero widths)
53	CatLineSeparator        // Zl – U+2028 LINE SEPARATOR only
54	CatParagraphSeparator   // Zp – U+2029 PARAGRAPH SEPARATOR only
55	CatSeparator            // Z  – Zs | Zl | Zp
56	CatControl              // Cc – a C0 or C1 control code
57	CatFormat               // Cf – a format control character
58	CatSurrogate            // Cs – a surrogate code point
59	CatPrivateUse           // Co – a private-use character
60	CatUnassigned           // Cn – a reserved unassigned code point or a noncharacter
61	CatOther                // C  – Cc | Cf | Cs | Co | Cn
62)
63
64var Planes = map[string][2]rune{
65	"Basic Multilingual Plane":              {0, 0xFFFF},
66	"Supplementary Multilingual Plane":      {0x10000, 0x1FFFF},
67	"Supplementary Ideographic Plane":       {0x20000, 0x2FFFF},
68	"Tertiary Ideographic Plane":            {0x30000, 0x3FFFF},
69	"Unassigned":                            {0x40000, 0xDFFFF},
70	"Supplementary Special-purpose Plane":   {0xE0000, 0xEFFFF},
71	"Supplementary Private Use Area planes": {0xF0000, 0x10FFFF},
72}
73
74// TODO: generate this from the data file:
75// https://www.unicode.org/Public/UCD/latest/ucd/Blocks.txt
76var (
77	Blocks = map[string][2]rune{
78		"Basic Latin":                           {0x0000, 0x007F},
79		"Latin-1 Supplement":                    {0x0080, 0x00FF},
80		"Latin Extended-A":                      {0x0100, 0x017F},
81		"Latin Extended-B":                      {0x0180, 0x024F},
82		"IPA Extensions":                        {0x0250, 0x02AF},
83		"Spacing Modifier Letters":              {0x02B0, 0x02FF},
84		"Combining Diacritical Marks":           {0x0300, 0x036F},
85		"Greek and Coptic":                      {0x0370, 0x03FF},
86		"Cyrillic":                              {0x0400, 0x04FF},
87		"Cyrillic Supplement":                   {0x0500, 0x052F},
88		"Armenian":                              {0x0530, 0x058F},
89		"Hebrew":                                {0x0590, 0x05FF},
90		"Arabic":                                {0x0600, 0x06FF},
91		"Syriac":                                {0x0700, 0x074F},
92		"Arabic Supplement":                     {0x0750, 0x077F},
93		"Thaana":                                {0x0780, 0x07BF},
94		"NKo":                                   {0x07C0, 0x07FF},
95		"Samaritan":                             {0x0800, 0x083F},
96		"Mandaic":                               {0x0840, 0x085F},
97		"Syriac Supplement":                     {0x0860, 0x086F},
98		"Arabic Extended-A":                     {0x08A0, 0x08FF},
99		"Devanagari":                            {0x0900, 0x097F},
100		"Bengali":                               {0x0980, 0x09FF},
101		"Gurmukhi":                              {0x0A00, 0x0A7F},
102		"Gujarati":                              {0x0A80, 0x0AFF},
103		"Oriya":                                 {0x0B00, 0x0B7F},
104		"Tamil":                                 {0x0B80, 0x0BFF},
105		"Telugu":                                {0x0C00, 0x0C7F},
106		"Kannada":                               {0x0C80, 0x0CFF},
107		"Malayalam":                             {0x0D00, 0x0D7F},
108		"Sinhala":                               {0x0D80, 0x0DFF},
109		"Thai":                                  {0x0E00, 0x0E7F},
110		"Lao":                                   {0x0E80, 0x0EFF},
111		"Tibetan":                               {0x0F00, 0x0FFF},
112		"Myanmar":                               {0x1000, 0x109F},
113		"Georgian":                              {0x10A0, 0x10FF},
114		"Hangul Jamo":                           {0x1100, 0x11FF},
115		"Ethiopic":                              {0x1200, 0x137F},
116		"Ethiopic Supplement":                   {0x1380, 0x139F},
117		"Cherokee":                              {0x13A0, 0x13FF},
118		"Unified Canadian Aboriginal Syllabics": {0x1400, 0x167F},
119		"Ogham":                                 {0x1680, 0x169F},
120		"Runic":                                 {0x16A0, 0x16FF},
121		"Tagalog":                               {0x1700, 0x171F},
122		"Hanunoo":                               {0x1720, 0x173F},
123		"Buhid":                                 {0x1740, 0x175F},
124		"Tagbanwa":                              {0x1760, 0x177F},
125		"Khmer":                                 {0x1780, 0x17FF},
126		"Mongolian":                             {0x1800, 0x18AF},
127		"Unified Canadian Aboriginal Syllabics Extended": {0x18B0, 0x18FF},
128		"Limbu":                                  {0x1900, 0x194F},
129		"Tai Le":                                 {0x1950, 0x197F},
130		"New Tai Lue":                            {0x1980, 0x19DF},
131		"Khmer Symbols":                          {0x19E0, 0x19FF},
132		"Buginese":                               {0x1A00, 0x1A1F},
133		"Tai Tham":                               {0x1A20, 0x1AAF},
134		"Combining Diacritical Marks Extended":   {0x1AB0, 0x1AFF},
135		"Balinese":                               {0x1B00, 0x1B7F},
136		"Sundanese":                              {0x1B80, 0x1BBF},
137		"Batak":                                  {0x1BC0, 0x1BFF},
138		"Lepcha":                                 {0x1C00, 0x1C4F},
139		"Ol Chiki":                               {0x1C50, 0x1C7F},
140		"Cyrillic Extended-C":                    {0x1C80, 0x1C8F},
141		"Georgian Extended":                      {0x1C90, 0x1CBF},
142		"Sundanese Supplement":                   {0x1CC0, 0x1CCF},
143		"Vedic Extensions":                       {0x1CD0, 0x1CFF},
144		"Phonetic Extensions":                    {0x1D00, 0x1D7F},
145		"Phonetic Extensions Supplement":         {0x1D80, 0x1DBF},
146		"Combining Diacritical Marks Supplement": {0x1DC0, 0x1DFF},
147		"Latin Extended Additional":              {0x1E00, 0x1EFF},
148		"Greek Extended":                         {0x1F00, 0x1FFF},
149		"General Punctuation":                    {0x2000, 0x206F},
150		"Superscripts and Subscripts":            {0x2070, 0x209F},
151		"Currency Symbols":                       {0x20A0, 0x20CF},
152		"Combining Diacritical Marks for Symbols": {0x20D0, 0x20FF},
153		"Letterlike Symbols":                      {0x2100, 0x214F},
154		"Number Forms":                            {0x2150, 0x218F},
155		"Arrows":                                  {0x2190, 0x21FF},
156		"Mathematical Operators":                  {0x2200, 0x22FF},
157		"Miscellaneous Technical":                 {0x2300, 0x23FF},
158		"Control Pictures":                        {0x2400, 0x243F},
159		"Optical Character Recognition":           {0x2440, 0x245F},
160		"Enclosed Alphanumerics":                  {0x2460, 0x24FF},
161		"Box Drawing":                             {0x2500, 0x257F},
162		"Block Elements":                          {0x2580, 0x259F},
163		"Geometric Shapes":                        {0x25A0, 0x25FF},
164		"Miscellaneous Symbols":                   {0x2600, 0x26FF},
165		"Dingbats":                                {0x2700, 0x27BF},
166		"Miscellaneous Mathematical Symbols-A":    {0x27C0, 0x27EF},
167		"Supplemental Arrows-A":                   {0x27F0, 0x27FF},
168		"Braille Patterns":                        {0x2800, 0x28FF},
169		"Supplemental Arrows-B":                   {0x2900, 0x297F},
170		"Miscellaneous Mathematical Symbols-B":    {0x2980, 0x29FF},
171		"Supplemental Mathematical Operators":     {0x2A00, 0x2AFF},
172		"Miscellaneous Symbols and Arrows":        {0x2B00, 0x2BFF},
173		"Glagolitic":                              {0x2C00, 0x2C5F},
174		"Latin Extended-C":                        {0x2C60, 0x2C7F},
175		"Coptic":                                  {0x2C80, 0x2CFF},
176		"Georgian Supplement":                     {0x2D00, 0x2D2F},
177		"Tifinagh":                                {0x2D30, 0x2D7F},
178		"Ethiopic Extended":                       {0x2D80, 0x2DDF},
179		"Cyrillic Extended-A":                     {0x2DE0, 0x2DFF},
180		"Supplemental Punctuation":                {0x2E00, 0x2E7F},
181		"CJK Radicals Supplement":                 {0x2E80, 0x2EFF},
182		"Kangxi Radicals":                         {0x2F00, 0x2FDF},
183		"Ideographic Description Characters":      {0x2FF0, 0x2FFF},
184		"CJK Symbols and Punctuation":             {0x3000, 0x303F},
185		"Hiragana":                                {0x3040, 0x309F},
186		"Katakana":                                {0x30A0, 0x30FF},
187		"Bopomofo":                                {0x3100, 0x312F},
188		"Hangul Compatibility Jamo":               {0x3130, 0x318F},
189		"Kanbun":                                  {0x3190, 0x319F},
190		"Bopomofo Extended":                       {0x31A0, 0x31BF},
191		"CJK Strokes":                             {0x31C0, 0x31EF},
192		"Katakana Phonetic Extensions":            {0x31F0, 0x31FF},
193		"Enclosed CJK Letters and Months":         {0x3200, 0x32FF},
194		"CJK Compatibility":                       {0x3300, 0x33FF},
195		"CJK Unified Ideographs Extension A":      {0x3400, 0x4DBF},
196		"Yijing Hexagram Symbols":                 {0x4DC0, 0x4DFF},
197		"CJK Unified Ideographs":                  {0x4E00, 0x9FFF},
198		"Yi Syllables":                            {0xA000, 0xA48F},
199		"Yi Radicals":                             {0xA490, 0xA4CF},
200		"Lisu":                                    {0xA4D0, 0xA4FF},
201		"Vai":                                     {0xA500, 0xA63F},
202		"Cyrillic Extended-B":                     {0xA640, 0xA69F},
203		"Bamum":                                   {0xA6A0, 0xA6FF},
204		"Modifier Tone Letters":                   {0xA700, 0xA71F},
205		"Latin Extended-D":                        {0xA720, 0xA7FF},
206		"Syloti Nagri":                            {0xA800, 0xA82F},
207		"Common Indic Number Forms":               {0xA830, 0xA83F},
208		"Phags-pa":                                {0xA840, 0xA87F},
209		"Saurashtra":                              {0xA880, 0xA8DF},
210		"Devanagari Extended":                     {0xA8E0, 0xA8FF},
211		"Kayah Li":                                {0xA900, 0xA92F},
212		"Rejang":                                  {0xA930, 0xA95F},
213		"Hangul Jamo Extended-A":                  {0xA960, 0xA97F},
214		"Javanese":                                {0xA980, 0xA9DF},
215		"Myanmar Extended-B":                      {0xA9E0, 0xA9FF},
216		"Cham":                                    {0xAA00, 0xAA5F},
217		"Myanmar Extended-A":                      {0xAA60, 0xAA7F},
218		"Tai Viet":                                {0xAA80, 0xAADF},
219		"Meetei Mayek Extensions":                 {0xAAE0, 0xAAFF},
220		"Ethiopic Extended-A":                     {0xAB00, 0xAB2F},
221		"Latin Extended-E":                        {0xAB30, 0xAB6F},
222		"Cherokee Supplement":                     {0xAB70, 0xABBF},
223		"Meetei Mayek":                            {0xABC0, 0xABFF},
224		"Hangul Syllables":                        {0xAC00, 0xD7AF},
225		"Hangul Jamo Extended-B":                  {0xD7B0, 0xD7FF},
226		"High Surrogates":                         {0xD800, 0xDB7F},
227		"High Private Use Surrogates":             {0xDB80, 0xDBFF},
228		"Low Surrogates":                          {0xDC00, 0xDFFF},
229		"Private Use Area":                        {0xE000, 0xF8FF},
230		"CJK Compatibility Ideographs":            {0xF900, 0xFAFF},
231		"Alphabetic Presentation Forms":           {0xFB00, 0xFB4F},
232		"Arabic Presentation Forms-A":             {0xFB50, 0xFDFF},
233		"Variation Selectors":                     {0xFE00, 0xFE0F},
234		"Vertical Forms":                          {0xFE10, 0xFE1F},
235		"Combining Half Marks":                    {0xFE20, 0xFE2F},
236		"CJK Compatibility Forms":                 {0xFE30, 0xFE4F},
237		"Small Form Variants":                     {0xFE50, 0xFE6F},
238		"Arabic Presentation Forms-B":             {0xFE70, 0xFEFF},
239		"Halfwidth and Fullwidth Forms":           {0xFF00, 0xFFEF},
240		"Specials":                                {0xFFF0, 0xFFFF},
241		"Linear B Syllabary":                      {0x10000, 0x1007F},
242		"Linear B Ideograms":                      {0x10080, 0x100FF},
243		"Aegean Numbers":                          {0x10100, 0x1013F},
244		"Ancient Greek Numbers":                   {0x10140, 0x1018F},
245		"Ancient Symbols":                         {0x10190, 0x101CF},
246		"Phaistos Disc":                           {0x101D0, 0x101FF},
247		"Lycian":                                  {0x10280, 0x1029F},
248		"Carian":                                  {0x102A0, 0x102DF},
249		"Coptic Epact Numbers":                    {0x102E0, 0x102FF},
250		"Old Italic":                              {0x10300, 0x1032F},
251		"Gothic":                                  {0x10330, 0x1034F},
252		"Old Permic":                              {0x10350, 0x1037F},
253		"Ugaritic":                                {0x10380, 0x1039F},
254		"Old Persian":                             {0x103A0, 0x103DF},
255		"Deseret":                                 {0x10400, 0x1044F},
256		"Shavian":                                 {0x10450, 0x1047F},
257		"Osmanya":                                 {0x10480, 0x104AF},
258		"Osage":                                   {0x104B0, 0x104FF},
259		"Elbasan":                                 {0x10500, 0x1052F},
260		"Caucasian Albanian":                      {0x10530, 0x1056F},
261		"Linear A":                                {0x10600, 0x1077F},
262		"Cypriot Syllabary":                       {0x10800, 0x1083F},
263		"Imperial Aramaic":                        {0x10840, 0x1085F},
264		"Palmyrene":                               {0x10860, 0x1087F},
265		"Nabataean":                               {0x10880, 0x108AF},
266		"Hatran":                                  {0x108E0, 0x108FF},
267		"Phoenician":                              {0x10900, 0x1091F},
268		"Lydian":                                  {0x10920, 0x1093F},
269		"Meroitic Hieroglyphs":                    {0x10980, 0x1099F},
270		"Meroitic Cursive":                        {0x109A0, 0x109FF},
271		"Kharoshthi":                              {0x10A00, 0x10A5F},
272		"Old South Arabian":                       {0x10A60, 0x10A7F},
273		"Old North Arabian":                       {0x10A80, 0x10A9F},
274		"Manichaean":                              {0x10AC0, 0x10AFF},
275		"Avestan":                                 {0x10B00, 0x10B3F},
276		"Inscriptional Parthian":                  {0x10B40, 0x10B5F},
277		"Inscriptional Pahlavi":                   {0x10B60, 0x10B7F},
278		"Psalter Pahlavi":                         {0x10B80, 0x10BAF},
279		"Old Turkic":                              {0x10C00, 0x10C4F},
280		"Old Hungarian":                           {0x10C80, 0x10CFF},
281		"Hanifi Rohingya":                         {0x10D00, 0x10D3F},
282		"Rumi Numeral Symbols":                    {0x10E60, 0x10E7F},
283		"Old Sogdian":                             {0x10F00, 0x10F2F},
284		"Sogdian":                                 {0x10F30, 0x10F6F},
285		"Elymaic":                                 {0x10FE0, 0x10FFF},
286		"Brahmi":                                  {0x11000, 0x1107F},
287		"Kaithi":                                  {0x11080, 0x110CF},
288		"Sora Sompeng":                            {0x110D0, 0x110FF},
289		"Chakma":                                  {0x11100, 0x1114F},
290		"Mahajani":                                {0x11150, 0x1117F},
291		"Sharada":                                 {0x11180, 0x111DF},
292		"Sinhala Archaic Numbers":                 {0x111E0, 0x111FF},
293		"Khojki":                                  {0x11200, 0x1124F},
294		"Multani":                                 {0x11280, 0x112AF},
295		"Khudawadi":                               {0x112B0, 0x112FF},
296		"Grantha":                                 {0x11300, 0x1137F},
297		"Newa":                                    {0x11400, 0x1147F},
298		"Tirhuta":                                 {0x11480, 0x114DF},
299		"Siddham":                                 {0x11580, 0x115FF},
300		"Modi":                                    {0x11600, 0x1165F},
301		"Mongolian Supplement":                    {0x11660, 0x1167F},
302		"Takri":                                   {0x11680, 0x116CF},
303		"Ahom":                                    {0x11700, 0x1173F},
304		"Dogra":                                   {0x11800, 0x1184F},
305		"Warang Citi":                             {0x118A0, 0x118FF},
306		"Nandinagari":                             {0x119A0, 0x119FF},
307		"Zanabazar Square":                        {0x11A00, 0x11A4F},
308		"Soyombo":                                 {0x11A50, 0x11AAF},
309		"Pau Cin Hau":                             {0x11AC0, 0x11AFF},
310		"Bhaiksuki":                               {0x11C00, 0x11C6F},
311		"Marchen":                                 {0x11C70, 0x11CBF},
312		"Masaram Gondi":                           {0x11D00, 0x11D5F},
313		"Gunjala Gondi":                           {0x11D60, 0x11DAF},
314		"Makasar":                                 {0x11EE0, 0x11EFF},
315		"Tamil Supplement":                        {0x11FC0, 0x11FFF},
316		"Cuneiform":                               {0x12000, 0x123FF},
317		"Cuneiform Numbers and Punctuation":       {0x12400, 0x1247F},
318		"Early Dynastic Cuneiform":                {0x12480, 0x1254F},
319		"Egyptian Hieroglyphs":                    {0x13000, 0x1342F},
320		"Egyptian Hieroglyph Format Controls":     {0x13430, 0x1343F},
321		"Anatolian Hieroglyphs":                   {0x14400, 0x1467F},
322		"Bamum Supplement":                        {0x16800, 0x16A3F},
323		"Mro":                                     {0x16A40, 0x16A6F},
324		"Bassa Vah":                               {0x16AD0, 0x16AFF},
325		"Pahawh Hmong":                            {0x16B00, 0x16B8F},
326		"Medefaidrin":                             {0x16E40, 0x16E9F},
327		"Miao":                                    {0x16F00, 0x16F9F},
328		"Ideographic Symbols and Punctuation":     {0x16FE0, 0x16FFF},
329		"Tangut":                                  {0x17000, 0x187FF},
330		"Tangut Components":                       {0x18800, 0x18AFF},
331		"Kana Supplement":                         {0x1B000, 0x1B0FF},
332		"Kana Extended-A":                         {0x1B100, 0x1B12F},
333		"Small Kana Extension":                    {0x1B130, 0x1B16F},
334		"Nushu":                                   {0x1B170, 0x1B2FF},
335		"Duployan":                                {0x1BC00, 0x1BC9F},
336		"Shorthand Format Controls":               {0x1BCA0, 0x1BCAF},
337		"Byzantine Musical Symbols":               {0x1D000, 0x1D0FF},
338		"Musical Symbols":                         {0x1D100, 0x1D1FF},
339		"Ancient Greek Musical Notation":          {0x1D200, 0x1D24F},
340		"Mayan Numerals":                          {0x1D2E0, 0x1D2FF},
341		"Tai Xuan Jing Symbols":                   {0x1D300, 0x1D35F},
342		"Counting Rod Numerals":                   {0x1D360, 0x1D37F},
343		"Mathematical Alphanumeric Symbols":       {0x1D400, 0x1D7FF},
344		"Sutton SignWriting":                      {0x1D800, 0x1DAAF},
345		"Glagolitic Supplement":                   {0x1E000, 0x1E02F},
346		"Nyiakeng Puachue Hmong":                  {0x1E100, 0x1E14F},
347		"Wancho":                                  {0x1E2C0, 0x1E2FF},
348		"Mende Kikakui":                           {0x1E800, 0x1E8DF},
349		"Adlam":                                   {0x1E900, 0x1E95F},
350		"Indic Siyaq Numbers":                     {0x1EC70, 0x1ECBF},
351		"Ottoman Siyaq Numbers":                   {0x1ED00, 0x1ED4F},
352		"Arabic Mathematical Alphabetic Symbols":  {0x1EE00, 0x1EEFF},
353		"Mahjong Tiles":                           {0x1F000, 0x1F02F},
354		"Domino Tiles":                            {0x1F030, 0x1F09F},
355		"Playing Cards":                           {0x1F0A0, 0x1F0FF},
356		"Enclosed Alphanumeric Supplement":        {0x1F100, 0x1F1FF},
357		"Enclosed Ideographic Supplement":         {0x1F200, 0x1F2FF},
358		"Miscellaneous Symbols and Pictographs":   {0x1F300, 0x1F5FF},
359		"Emoticons":                               {0x1F600, 0x1F64F},
360		"Ornamental Dingbats":                     {0x1F650, 0x1F67F},
361		"Transport and Map Symbols":               {0x1F680, 0x1F6FF},
362		"Alchemical Symbols":                      {0x1F700, 0x1F77F},
363		"Geometric Shapes Extended":               {0x1F780, 0x1F7FF},
364		"Supplemental Arrows-C":                   {0x1F800, 0x1F8FF},
365		"Supplemental Symbols and Pictographs":    {0x1F900, 0x1F9FF},
366		"Chess Symbols":                           {0x1FA00, 0x1FA6F},
367		"Symbols and Pictographs Extended-A":      {0x1FA70, 0x1FAFF},
368		"CJK Unified Ideographs Extension B":      {0x20000, 0x2A6DF},
369		"CJK Unified Ideographs Extension C":      {0x2A700, 0x2B73F},
370		"CJK Unified Ideographs Extension D":      {0x2B740, 0x2B81F},
371		"CJK Unified Ideographs Extension E":      {0x2B820, 0x2CEAF},
372		"CJK Unified Ideographs Extension F":      {0x2CEB0, 0x2EBEF},
373		"CJK Compatibility Ideographs Supplement": {0x2F800, 0x2FA1F},
374		"Tags":                             {0xE0000, 0xE007F},
375		"Variation Selectors Supplement":   {0xE0100, 0xE01EF},
376		"Supplementary Private Use Area-A": {0xF0000, 0xFFFFF},
377		"Supplementary Private Use Area-B": {0x100000, 0x10FFFF},
378	}
379
380	Blockmap = make(map[string]string)
381)
382
383func init() {
384	for k := range Blocks {
385		Blockmap[CanonicalCategory(k)] = k
386	}
387}
388
389var (
390	Catmap = map[string]uint8{
391		// Short-hand.
392		"Lu": CatUppercaseLetter,
393		"Ll": CatLowercaseLetter,
394		"Lt": CatTitlecaseLetter,
395		"LC": CatCasedLetter,
396		"Lm": CatModifierLetter,
397		"Lo": CatOtherLetter,
398		"L":  CatLetter,
399		"Mn": CatNonspacingMark,
400		"Mc": CatSpacingMark,
401		"Me": CatEnclosingMark,
402		"M":  CatMark,
403		"Nd": CatDecimalNumber,
404		"Nl": CatLetterNumber,
405		"No": CatOtherNumber,
406		"N":  CatNumber,
407		"Pc": CatConnectorPunctuation,
408		"Pd": CatDashPunctuation,
409		"Ps": CatOpenPunctuation,
410		"Pe": CatClosePunctuation,
411		"Pi": CatInitialPunctuation,
412		"Pf": CatFinalPunctuation,
413		"Po": CatOtherPunctuation,
414		"P":  CatPunctuation,
415		"Sm": CatMathSymbol,
416		"Sc": CatCurrencySymbol,
417		"Sk": CatModifierSymbol,
418		"So": CatOtherSymbol,
419		"S":  CatSymbol,
420		"Zs": CatSpaceSeparator,
421		"Zl": CatLineSeparator,
422		"Zp": CatParagraphSeparator,
423		"Z":  CatSeparator,
424		"Cc": CatControl,
425		"Cf": CatFormat,
426		"Cs": CatSurrogate,
427		"Co": CatPrivateUse,
428		"Cn": CatUnassigned,
429		"C":  CatOther,
430
431		// Lower-case shorthand.
432		"lu": CatUppercaseLetter,
433		"ll": CatLowercaseLetter,
434		"lt": CatTitlecaseLetter,
435		"lc": CatCasedLetter,
436		"lm": CatModifierLetter,
437		"lo": CatOtherLetter,
438		"l":  CatLetter,
439		"mn": CatNonspacingMark,
440		"mc": CatSpacingMark,
441		"me": CatEnclosingMark,
442		"m":  CatMark,
443		"nd": CatDecimalNumber,
444		"nl": CatLetterNumber,
445		"no": CatOtherNumber,
446		"n":  CatNumber,
447		"pc": CatConnectorPunctuation,
448		"pd": CatDashPunctuation,
449		"ps": CatOpenPunctuation,
450		"pe": CatClosePunctuation,
451		"pi": CatInitialPunctuation,
452		"pf": CatFinalPunctuation,
453		"po": CatOtherPunctuation,
454		"p":  CatPunctuation,
455		"sm": CatMathSymbol,
456		"sc": CatCurrencySymbol,
457		"sk": CatModifierSymbol,
458		"so": CatOtherSymbol,
459		"s":  CatSymbol,
460		"zs": CatSpaceSeparator,
461		"zl": CatLineSeparator,
462		"zp": CatParagraphSeparator,
463		"z":  CatSeparator,
464		"cc": CatControl,
465		"cf": CatFormat,
466		"cs": CatSurrogate,
467		"co": CatPrivateUse,
468		"cn": CatUnassigned,
469		"c":  CatOther,
470
471		// Full names, underscores.
472		"uppercase_letter":      CatUppercaseLetter,
473		"lowercase_letter":      CatLowercaseLetter,
474		"titlecase_letter":      CatTitlecaseLetter,
475		"cased_letter":          CatCasedLetter,
476		"modifier_letter":       CatModifierLetter,
477		"other_letter":          CatOtherLetter,
478		"letter":                CatLetter,
479		"nonspacing_mark":       CatNonspacingMark,
480		"spacing_mark":          CatSpacingMark,
481		"enclosing_mark":        CatEnclosingMark,
482		"mark":                  CatMark,
483		"decimal_number":        CatDecimalNumber,
484		"letter_number":         CatLetterNumber,
485		"other_number":          CatOtherNumber,
486		"number":                CatNumber,
487		"connector_punctuation": CatConnectorPunctuation,
488		"dash_punctuation":      CatDashPunctuation,
489		"open_punctuation":      CatOpenPunctuation,
490		"close_punctuation":     CatClosePunctuation,
491		"initial_punctuation":   CatInitialPunctuation,
492		"final_punctuation":     CatFinalPunctuation,
493		"other_punctuation":     CatOtherPunctuation,
494		"punctuation":           CatPunctuation,
495		"math_symbol":           CatMathSymbol,
496		"currency_symbol":       CatCurrencySymbol,
497		"modifier_symbol":       CatModifierSymbol,
498		"other_symbol":          CatOtherSymbol,
499		"symbol":                CatSymbol,
500		"space_separator":       CatSpaceSeparator,
501		"line_separator":        CatLineSeparator,
502		"paragraph_separator":   CatParagraphSeparator,
503		"separator":             CatSeparator,
504		"control":               CatControl,
505		"format":                CatFormat,
506		"surrogate":             CatSurrogate,
507		"private_use":           CatPrivateUse,
508		"unassigned":            CatUnassigned,
509		"other":                 CatOther,
510
511		// Without underscore.
512		"uppercaseletter":      CatUppercaseLetter,
513		"lowercaseletter":      CatLowercaseLetter,
514		"titlecaseletter":      CatTitlecaseLetter,
515		"casedletter":          CatCasedLetter,
516		"modifierletter":       CatModifierLetter,
517		"otherletter":          CatOtherLetter,
518		"nonspacingmark":       CatNonspacingMark,
519		"spacingmark":          CatSpacingMark,
520		"enclosingmark":        CatEnclosingMark,
521		"decimalnumber":        CatDecimalNumber,
522		"letternumber":         CatLetterNumber,
523		"othernumber":          CatOtherNumber,
524		"connectorpunctuation": CatConnectorPunctuation,
525		"dashpunctuation":      CatDashPunctuation,
526		"openpunctuation":      CatOpenPunctuation,
527		"closepunctuation":     CatClosePunctuation,
528		"initialpunctuation":   CatInitialPunctuation,
529		"finalpunctuation":     CatFinalPunctuation,
530		"otherpunctuation":     CatOtherPunctuation,
531		"mathsymbol":           CatMathSymbol,
532		"currencysymbol":       CatCurrencySymbol,
533		"modifiersymbol":       CatModifierSymbol,
534		"othersymbol":          CatOtherSymbol,
535		"spaceseparator":       CatSpaceSeparator,
536		"lineseparator":        CatLineSeparator,
537		"paragraphseparator":   CatParagraphSeparator,
538		"privateuse":           CatPrivateUse,
539	}
540
541	Catnames = map[uint8]string{
542		CatUppercaseLetter:      "Uppercase_Letter",
543		CatLowercaseLetter:      "Lowercase_Letter",
544		CatTitlecaseLetter:      "Titlecase_Letter",
545		CatCasedLetter:          "Cased_Letter",
546		CatModifierLetter:       "Modifier_Letter",
547		CatOtherLetter:          "Other_Letter",
548		CatLetter:               "Letter",
549		CatNonspacingMark:       "Nonspacing_Mark",
550		CatSpacingMark:          "Spacing_Mark",
551		CatEnclosingMark:        "Enclosing_Mark",
552		CatMark:                 "Mark",
553		CatDecimalNumber:        "Decimal_Number",
554		CatLetterNumber:         "Letter_Number",
555		CatOtherNumber:          "Other_Number",
556		CatNumber:               "Number",
557		CatConnectorPunctuation: "Connector_Punctuation",
558		CatDashPunctuation:      "Dash_Punctuation",
559		CatOpenPunctuation:      "Open_Punctuation",
560		CatClosePunctuation:     "Close_Punctuation",
561		CatInitialPunctuation:   "Initial_Punctuation",
562		CatFinalPunctuation:     "Final_Punctuation",
563		CatOtherPunctuation:     "Other_Punctuation",
564		CatPunctuation:          "Punctuation",
565		CatMathSymbol:           "Math_Symbol",
566		CatCurrencySymbol:       "Currency_Symbol",
567		CatModifierSymbol:       "Modifier_Symbol",
568		CatOtherSymbol:          "Other_Symbol",
569		CatSymbol:               "Symbol",
570		CatSpaceSeparator:       "Space_Separator",
571		CatLineSeparator:        "Line_Separator",
572		CatParagraphSeparator:   "Paragraph_Separator",
573		CatSeparator:            "Separator",
574		CatControl:              "Control",
575		CatFormat:               "Format",
576		CatSurrogate:            "Surrogate",
577		CatPrivateUse:           "Private_Use",
578		CatUnassigned:           "Unassigned",
579		CatOther:                "Other",
580	}
581)
582
583var (
584	ranges = [][]rune{
585		{0x3400, 0x4DB5},
586		{0x4E00, 0x9FEF},
587		{0xAC00, 0xD7A3},
588		{0xD800, 0xDB7F},
589		{0xDB80, 0xDBFF},
590		{0xDC00, 0xDFFF},
591		{0xE000, 0xF8FF},
592		{0x17000, 0x187F1},
593		{0x20000, 0x2A6D6},
594		{0x2A700, 0x2B734},
595		{0x2B740, 0x2B81D},
596		{0x2B820, 0x2CEA1},
597		{0x2CEB0, 0x2EBE0},
598		{0xF0000, 0xFFFFD},
599		{0x100000, 0x10FFFD},
600	}
601
602	rangeNames = []string{
603		"<CJK Ideograph Extension A>",
604		"<CJK Ideograph>",
605		"<Hangul Syllable>",
606		"<Non Private Use High Surrogate>",
607		"<Private Use High Surrogate>",
608		"<Low Surrogate>",
609		"<Private Use>",
610		"<Tangut Ideograph>",
611		"<CJK Ideograph Extension B>",
612		"<CJK Ideograph Extension C>",
613		"<CJK Ideograph Extension D>",
614		"<CJK Ideograph Extension E>",
615		"<CJK Ideograph Extension F>",
616		"<Plane 15 Private Use>",
617		"<Plane 16 Private Use>",
618	}
619)
620