1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package unicode_test
6
7import (
8	"testing"
9	. "unicode"
10)
11
12type T struct {
13	rune   rune
14	script string
15}
16
17// Hand-chosen tests from Unicode 5.1.0, 6.0.0, 6.2.0, 6.3.0, 7.0.0, 8.0.0,
18// 9.0.0, 10.0.0.
19// mostly to discover when new scripts and categories arise.
20// If this tests fails, add the missing scripts to the test and add entries
21// of the form
22//     pkg unicode, var <new script> *RangeTable
23// to api/next.txt.
24var inTest = []T{
25	{0x11711, "Ahom"},
26	{0x1e900, "Adlam"},
27	{0x14646, "Anatolian_Hieroglyphs"},
28	{0x06e2, "Arabic"},
29	{0x0567, "Armenian"},
30	{0x10b20, "Avestan"},
31	{0x11c00, "Bhaiksuki"},
32	{0x1b37, "Balinese"},
33	{0xa6af, "Bamum"},
34	{0x16ada, "Bassa_Vah"},
35	{0x1be1, "Batak"},
36	{0x09c2, "Bengali"},
37	{0x3115, "Bopomofo"},
38	{0x282d, "Braille"},
39	{0x1a1a, "Buginese"},
40	{0x1747, "Buhid"},
41	{0x11011, "Brahmi"},
42	{0x156d, "Canadian_Aboriginal"},
43	{0x102a9, "Carian"},
44	{0x10563, "Caucasian_Albanian"},
45	{0x11111, "Chakma"},
46	{0xaa4d, "Cham"},
47	{0x13c2, "Cherokee"},
48	{0x0020, "Common"},
49	{0x1d4a5, "Common"},
50	{0x2cfc, "Coptic"},
51	{0x12420, "Cuneiform"},
52	{0x1080c, "Cypriot"},
53	{0xa663, "Cyrillic"},
54	{0x10430, "Deseret"},
55	{0x094a, "Devanagari"},
56	{0x1BC00, "Duployan"},
57	{0x13001, "Egyptian_Hieroglyphs"},
58	{0x10500, "Elbasan"},
59	{0x1271, "Ethiopic"},
60	{0x10fc, "Georgian"},
61	{0x2c40, "Glagolitic"},
62	{0x10347, "Gothic"},
63	{0x11303, "Grantha"},
64	{0x03ae, "Greek"},
65	{0x0abf, "Gujarati"},
66	{0x0a24, "Gurmukhi"},
67	{0x3028, "Han"},
68	{0x11b8, "Hangul"},
69	{0x1727, "Hanunoo"},
70	{0x108FF, "Hatran"},
71	{0x05a0, "Hebrew"},
72	{0x3058, "Hiragana"},
73	{0x10841, "Imperial_Aramaic"},
74	{0x20e6, "Inherited"},
75	{0x10b70, "Inscriptional_Pahlavi"},
76	{0x10b5a, "Inscriptional_Parthian"},
77	{0xa9d0, "Javanese"},
78	{0x1109f, "Kaithi"},
79	{0x0cbd, "Kannada"},
80	{0x30a6, "Katakana"},
81	{0xa928, "Kayah_Li"},
82	{0x10a11, "Kharoshthi"},
83	{0x17c6, "Khmer"},
84	{0x11211, "Khojki"},
85	{0x112df, "Khudawadi"},
86	{0x0eaa, "Lao"},
87	{0x1d79, "Latin"},
88	{0x1c10, "Lepcha"},
89	{0x1930, "Limbu"},
90	{0x10755, "Linear_A"},
91	{0x1003c, "Linear_B"},
92	{0xa4e1, "Lisu"},
93	{0x10290, "Lycian"},
94	{0x10930, "Lydian"},
95	{0x11173, "Mahajani"},
96	{0x0d42, "Malayalam"},
97	{0x0843, "Mandaic"},
98	{0x10ac8, "Manichaean"},
99	{0x11cB6, "Marchen"},
100	{0x11d59, "Masaram_Gondi"},
101	{0xabd0, "Meetei_Mayek"},
102	{0x1e800, "Mende_Kikakui"},
103	{0x1099f, "Meroitic_Hieroglyphs"},
104	{0x109a0, "Meroitic_Cursive"},
105	{0x16f00, "Miao"},
106	{0x11611, "Modi"},
107	{0x1822, "Mongolian"},
108	{0x16a60, "Mro"},
109	{0x11293, "Multani"},
110	{0x104c, "Myanmar"},
111	{0x10880, "Nabataean"},
112	{0x11400, "Newa"},
113	{0x19c3, "New_Tai_Lue"},
114	{0x07f8, "Nko"},
115	{0x1b170, "Nushu"},
116	{0x169b, "Ogham"},
117	{0x1c6a, "Ol_Chiki"},
118	{0x10C80, "Old_Hungarian"},
119	{0x10310, "Old_Italic"},
120	{0x10a80, "Old_North_Arabian"},
121	{0x10350, "Old_Permic"},
122	{0x103c9, "Old_Persian"},
123	{0x10a6f, "Old_South_Arabian"},
124	{0x10c20, "Old_Turkic"},
125	{0x0b3e, "Oriya"},
126	{0x104d9, "Osage"},
127	{0x10491, "Osmanya"},
128	{0x16b2b, "Pahawh_Hmong"},
129	{0x10876, "Palmyrene"},
130	{0x11ACE, "Pau_Cin_Hau"},
131	{0xa860, "Phags_Pa"},
132	{0x10918, "Phoenician"},
133	{0x10baf, "Psalter_Pahlavi"},
134	{0xa949, "Rejang"},
135	{0x16c0, "Runic"},
136	{0x081d, "Samaritan"},
137	{0xa892, "Saurashtra"},
138	{0x111a0, "Sharada"},
139	{0x10463, "Shavian"},
140	{0x115c1, "Siddham"},
141	{0x1D920, "SignWriting"},
142	{0x0dbd, "Sinhala"},
143	{0x110d0, "Sora_Sompeng"},
144	{0x11a99, "Soyombo"},
145	{0x1ba3, "Sundanese"},
146	{0xa803, "Syloti_Nagri"},
147	{0x070f, "Syriac"},
148	{0x170f, "Tagalog"},
149	{0x176f, "Tagbanwa"},
150	{0x1972, "Tai_Le"},
151	{0x1a62, "Tai_Tham"},
152	{0xaadc, "Tai_Viet"},
153	{0x116c9, "Takri"},
154	{0x0bbf, "Tamil"},
155	{0x17000, "Tangut"},
156	{0x0c55, "Telugu"},
157	{0x07a7, "Thaana"},
158	{0x0e46, "Thai"},
159	{0x0f36, "Tibetan"},
160	{0x2d55, "Tifinagh"},
161	{0x114d9, "Tirhuta"},
162	{0x10388, "Ugaritic"},
163	{0xa60e, "Vai"},
164	{0x118ff, "Warang_Citi"},
165	{0xa216, "Yi"},
166	{0x11a0a, "Zanabazar_Square"},
167}
168
169var outTest = []T{ // not really worth being thorough
170	{0x20, "Telugu"},
171}
172
173var inCategoryTest = []T{
174	{0x0081, "Cc"},
175	{0x200B, "Cf"},
176	{0xf0000, "Co"},
177	{0xdb80, "Cs"},
178	{0x0236, "Ll"},
179	{0x1d9d, "Lm"},
180	{0x07cf, "Lo"},
181	{0x1f8a, "Lt"},
182	{0x03ff, "Lu"},
183	{0x0bc1, "Mc"},
184	{0x20df, "Me"},
185	{0x07f0, "Mn"},
186	{0x1bb2, "Nd"},
187	{0x10147, "Nl"},
188	{0x2478, "No"},
189	{0xfe33, "Pc"},
190	{0x2011, "Pd"},
191	{0x301e, "Pe"},
192	{0x2e03, "Pf"},
193	{0x2e02, "Pi"},
194	{0x0022, "Po"},
195	{0x2770, "Ps"},
196	{0x00a4, "Sc"},
197	{0xa711, "Sk"},
198	{0x25f9, "Sm"},
199	{0x2108, "So"},
200	{0x2028, "Zl"},
201	{0x2029, "Zp"},
202	{0x202f, "Zs"},
203	// Unifieds.
204	{0x04aa, "L"},
205	{0x0009, "C"},
206	{0x1712, "M"},
207	{0x0031, "N"},
208	{0x00bb, "P"},
209	{0x00a2, "S"},
210	{0x00a0, "Z"},
211}
212
213var inPropTest = []T{
214	{0x0046, "ASCII_Hex_Digit"},
215	{0x200F, "Bidi_Control"},
216	{0x2212, "Dash"},
217	{0xE0001, "Deprecated"},
218	{0x00B7, "Diacritic"},
219	{0x30FE, "Extender"},
220	{0xFF46, "Hex_Digit"},
221	{0x2E17, "Hyphen"},
222	{0x2FFB, "IDS_Binary_Operator"},
223	{0x2FF3, "IDS_Trinary_Operator"},
224	{0xFA6A, "Ideographic"},
225	{0x200D, "Join_Control"},
226	{0x0EC4, "Logical_Order_Exception"},
227	{0x2FFFF, "Noncharacter_Code_Point"},
228	{0x065E, "Other_Alphabetic"},
229	{0x2065, "Other_Default_Ignorable_Code_Point"},
230	{0x0BD7, "Other_Grapheme_Extend"},
231	{0x0387, "Other_ID_Continue"},
232	{0x212E, "Other_ID_Start"},
233	{0x2094, "Other_Lowercase"},
234	{0x2040, "Other_Math"},
235	{0x216F, "Other_Uppercase"},
236	{0x0027, "Pattern_Syntax"},
237	{0x0020, "Pattern_White_Space"},
238	{0x06DD, "Prepended_Concatenation_Mark"},
239	{0x300D, "Quotation_Mark"},
240	{0x2EF3, "Radical"},
241	{0x1f1ff, "Regional_Indicator"},
242	{0x061F, "STerm"}, // Deprecated alias of Sentence_Terminal
243	{0x061F, "Sentence_Terminal"},
244	{0x2071, "Soft_Dotted"},
245	{0x003A, "Terminal_Punctuation"},
246	{0x9FC3, "Unified_Ideograph"},
247	{0xFE0F, "Variation_Selector"},
248	{0x0020, "White_Space"},
249}
250
251func TestScripts(t *testing.T) {
252	notTested := make(map[string]bool)
253	for k := range Scripts {
254		notTested[k] = true
255	}
256	for _, test := range inTest {
257		if _, ok := Scripts[test.script]; !ok {
258			t.Fatal(test.script, "not a known script")
259		}
260		if !Is(Scripts[test.script], test.rune) {
261			t.Errorf("IsScript(%U, %s) = false, want true", test.rune, test.script)
262		}
263		delete(notTested, test.script)
264	}
265	for _, test := range outTest {
266		if Is(Scripts[test.script], test.rune) {
267			t.Errorf("IsScript(%U, %s) = true, want false", test.rune, test.script)
268		}
269	}
270	for k := range notTested {
271		t.Error("script not tested:", k)
272	}
273}
274
275func TestCategories(t *testing.T) {
276	notTested := make(map[string]bool)
277	for k := range Categories {
278		notTested[k] = true
279	}
280	for _, test := range inCategoryTest {
281		if _, ok := Categories[test.script]; !ok {
282			t.Fatal(test.script, "not a known category")
283		}
284		if !Is(Categories[test.script], test.rune) {
285			t.Errorf("IsCategory(%U, %s) = false, want true", test.rune, test.script)
286		}
287		delete(notTested, test.script)
288	}
289	for k := range notTested {
290		t.Error("category not tested:", k)
291	}
292}
293
294func TestProperties(t *testing.T) {
295	notTested := make(map[string]bool)
296	for k := range Properties {
297		notTested[k] = true
298	}
299	for _, test := range inPropTest {
300		if _, ok := Properties[test.script]; !ok {
301			t.Fatal(test.script, "not a known prop")
302		}
303		if !Is(Properties[test.script], test.rune) {
304			t.Errorf("IsCategory(%U, %s) = false, want true", test.rune, test.script)
305		}
306		delete(notTested, test.script)
307	}
308	for k := range notTested {
309		t.Error("property not tested:", k)
310	}
311}
312