1 /*
2  * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
3  * Use of this file is governed by the BSD 3-clause license that
4  * can be found in the LICENSE.txt file in the project root.
5  */
6 
7 package org.antlr.v4.test.tool;
8 
9 import java.util.Map;
10 
11 import org.antlr.v4.unicode.UnicodeData;
12 import org.antlr.v4.runtime.misc.IntervalSet;
13 
14 import org.junit.Test;
15 import org.junit.Rule;
16 import org.junit.rules.ExpectedException;
17 
18 import static org.junit.Assert.assertFalse;
19 import static org.junit.Assert.assertEquals;
20 import static org.junit.Assert.assertTrue;
21 
22 public class TestUnicodeData {
23 	@Rule
24 	public ExpectedException thrown = ExpectedException.none();
25 
26 	@Test
testUnicodeGeneralCategoriesLatin()27 	public void testUnicodeGeneralCategoriesLatin() {
28 		assertTrue(UnicodeData.getPropertyCodePoints("Lu").contains('X'));
29 		assertFalse(UnicodeData.getPropertyCodePoints("Lu").contains('x'));
30 		assertTrue(UnicodeData.getPropertyCodePoints("Ll").contains('x'));
31 		assertFalse(UnicodeData.getPropertyCodePoints("Ll").contains('X'));
32 		assertTrue(UnicodeData.getPropertyCodePoints("L").contains('X'));
33 		assertTrue(UnicodeData.getPropertyCodePoints("L").contains('x'));
34 		assertTrue(UnicodeData.getPropertyCodePoints("N").contains('0'));
35 		assertTrue(UnicodeData.getPropertyCodePoints("Z").contains(' '));
36 	}
37 
38 	@Test
testUnicodeGeneralCategoriesBMP()39 	public void testUnicodeGeneralCategoriesBMP() {
40 		assertTrue(UnicodeData.getPropertyCodePoints("Lu").contains('\u1E3A'));
41 		assertFalse(UnicodeData.getPropertyCodePoints("Lu").contains('\u1E3B'));
42 		assertTrue(UnicodeData.getPropertyCodePoints("Ll").contains('\u1E3B'));
43 		assertFalse(UnicodeData.getPropertyCodePoints("Ll").contains('\u1E3A'));
44 		assertTrue(UnicodeData.getPropertyCodePoints("L").contains('\u1E3A'));
45 		assertTrue(UnicodeData.getPropertyCodePoints("L").contains('\u1E3B'));
46 		assertTrue(UnicodeData.getPropertyCodePoints("N").contains('\u1BB0'));
47 		assertFalse(UnicodeData.getPropertyCodePoints("N").contains('\u1E3A'));
48 		assertTrue(UnicodeData.getPropertyCodePoints("Z").contains('\u2028'));
49 		assertFalse(UnicodeData.getPropertyCodePoints("Z").contains('\u1E3A'));
50 	}
51 
52 	@Test
testUnicodeGeneralCategoriesSMP()53 	public void testUnicodeGeneralCategoriesSMP() {
54 		assertTrue(UnicodeData.getPropertyCodePoints("Lu").contains(0x1D5D4));
55 		assertFalse(UnicodeData.getPropertyCodePoints("Lu").contains(0x1D770));
56 		assertTrue(UnicodeData.getPropertyCodePoints("Ll").contains(0x1D770));
57 		assertFalse(UnicodeData.getPropertyCodePoints("Ll").contains(0x1D5D4));
58 		assertTrue(UnicodeData.getPropertyCodePoints("L").contains(0x1D5D4));
59 		assertTrue(UnicodeData.getPropertyCodePoints("L").contains(0x1D770));
60 		assertTrue(UnicodeData.getPropertyCodePoints("N").contains(0x11C50));
61 		assertFalse(UnicodeData.getPropertyCodePoints("N").contains(0x1D5D4));
62 	}
63 
64 	@Test
testUnicodeCategoryAliases()65 	public void testUnicodeCategoryAliases() {
66 		assertTrue(UnicodeData.getPropertyCodePoints("Lowercase_Letter").contains('x'));
67 		assertFalse(UnicodeData.getPropertyCodePoints("Lowercase_Letter").contains('X'));
68 		assertTrue(UnicodeData.getPropertyCodePoints("Letter").contains('x'));
69 		assertFalse(UnicodeData.getPropertyCodePoints("Letter").contains('0'));
70 		assertTrue(UnicodeData.getPropertyCodePoints("Enclosing_Mark").contains(0x20E2));
71 		assertFalse(UnicodeData.getPropertyCodePoints("Enclosing_Mark").contains('x'));
72 	}
73 
74 	@Test
testUnicodeBinaryProperties()75 	public void testUnicodeBinaryProperties() {
76 		assertTrue(UnicodeData.getPropertyCodePoints("Emoji").contains(0x1F4A9));
77 		assertFalse(UnicodeData.getPropertyCodePoints("Emoji").contains('X'));
78 		assertTrue(UnicodeData.getPropertyCodePoints("alnum").contains('9'));
79 		assertFalse(UnicodeData.getPropertyCodePoints("alnum").contains(0x1F4A9));
80 		assertTrue(UnicodeData.getPropertyCodePoints("Dash").contains('-'));
81 		assertTrue(UnicodeData.getPropertyCodePoints("Hex").contains('D'));
82 		assertFalse(UnicodeData.getPropertyCodePoints("Hex").contains('Q'));
83 	}
84 
85 	@Test
testUnicodeBinaryPropertyAliases()86 	public void testUnicodeBinaryPropertyAliases() {
87 		assertTrue(UnicodeData.getPropertyCodePoints("Ideo").contains('\u611B'));
88 		assertFalse(UnicodeData.getPropertyCodePoints("Ideo").contains('X'));
89 		assertTrue(UnicodeData.getPropertyCodePoints("Soft_Dotted").contains('\u0456'));
90 		assertFalse(UnicodeData.getPropertyCodePoints("Soft_Dotted").contains('X'));
91 		assertTrue(UnicodeData.getPropertyCodePoints("Noncharacter_Code_Point").contains('\uFFFF'));
92 		assertFalse(UnicodeData.getPropertyCodePoints("Noncharacter_Code_Point").contains('X'));
93 	}
94 
95 	@Test
testUnicodeScripts()96 	public void testUnicodeScripts() {
97 		assertTrue(UnicodeData.getPropertyCodePoints("Zyyy").contains('0'));
98 		assertTrue(UnicodeData.getPropertyCodePoints("Latn").contains('X'));
99 		assertTrue(UnicodeData.getPropertyCodePoints("Hani").contains(0x4E04));
100 		assertTrue(UnicodeData.getPropertyCodePoints("Cyrl").contains(0x0404));
101 	}
102 
103 	@Test
testUnicodeScriptEquals()104 	public void testUnicodeScriptEquals() {
105 		assertTrue(UnicodeData.getPropertyCodePoints("Script=Zyyy").contains('0'));
106 		assertTrue(UnicodeData.getPropertyCodePoints("Script=Latn").contains('X'));
107 		assertTrue(UnicodeData.getPropertyCodePoints("Script=Hani").contains(0x4E04));
108 		assertTrue(UnicodeData.getPropertyCodePoints("Script=Cyrl").contains(0x0404));
109 	}
110 
111 	@Test
testUnicodeScriptAliases()112 	public void testUnicodeScriptAliases() {
113 		assertTrue(UnicodeData.getPropertyCodePoints("Common").contains('0'));
114 		assertTrue(UnicodeData.getPropertyCodePoints("Latin").contains('X'));
115 		assertTrue(UnicodeData.getPropertyCodePoints("Han").contains(0x4E04));
116 		assertTrue(UnicodeData.getPropertyCodePoints("Cyrillic").contains(0x0404));
117 	}
118 
119 	@Test
testUnicodeBlocks()120 	public void testUnicodeBlocks() {
121 		assertTrue(UnicodeData.getPropertyCodePoints("InASCII").contains('0'));
122 		assertTrue(UnicodeData.getPropertyCodePoints("InCJK").contains(0x4E04));
123 		assertTrue(UnicodeData.getPropertyCodePoints("InCyrillic").contains(0x0404));
124 		assertTrue(UnicodeData.getPropertyCodePoints("InMisc_Pictographs").contains(0x1F4A9));
125 	}
126 
127 	@Test
testUnicodeBlockEquals()128 	public void testUnicodeBlockEquals() {
129 		assertTrue(UnicodeData.getPropertyCodePoints("Block=ASCII").contains('0'));
130 		assertTrue(UnicodeData.getPropertyCodePoints("Block=CJK").contains(0x4E04));
131 		assertTrue(UnicodeData.getPropertyCodePoints("Block=Cyrillic").contains(0x0404));
132 		assertTrue(UnicodeData.getPropertyCodePoints("Block=Misc_Pictographs").contains(0x1F4A9));
133 	}
134 
135 	@Test
testUnicodeBlockAliases()136 	public void testUnicodeBlockAliases() {
137 		assertTrue(UnicodeData.getPropertyCodePoints("InBasic_Latin").contains('0'));
138 		assertTrue(UnicodeData.getPropertyCodePoints("InMiscellaneous_Mathematical_Symbols_B").contains(0x29BE));
139 	}
140 
141 	@Test
testEnumeratedPropertyEquals()142 	public void testEnumeratedPropertyEquals() {
143 		assertTrue(
144 				"U+1F481 INFORMATION DESK PERSON is an emoji modifier base",
145 				UnicodeData.getPropertyCodePoints("Grapheme_Cluster_Break=E_Base").contains(0x1F481));
146 
147 		assertFalse(
148 				"U+1F47E ALIEN MONSTER is not an emoji modifier",
149 				UnicodeData.getPropertyCodePoints("Grapheme_Cluster_Break=E_Base").contains(0x1F47E));
150 
151 		assertTrue(
152 				"U+0E33 THAI CHARACTER SARA AM is a spacing mark",
153 				UnicodeData.getPropertyCodePoints("Grapheme_Cluster_Break=E_Base").contains(0x1F481));
154 
155 		assertFalse(
156 				"U+1038 MYANMAR SIGN VISARGA is not a spacing mark",
157 				UnicodeData.getPropertyCodePoints("Grapheme_Cluster_Break=E_Base").contains(0x1038));
158 
159 		assertTrue(
160 				"U+00A1 INVERTED EXCLAMATION MARK has ambiguous East Asian Width",
161 				UnicodeData.getPropertyCodePoints("East_Asian_Width=Ambiguous").contains(0x00A1));
162 
163 		assertFalse(
164 				"U+00A2 CENT SIGN does not have ambiguous East Asian Width",
165 				UnicodeData.getPropertyCodePoints("East_Asian_Width=Ambiguous").contains(0x00A2));
166 
167 	}
168 
169         @Test
extendedPictographic()170         public void extendedPictographic() {
171 		assertTrue(
172 				"U+1F588 BLACK PUSHPIN is in Extended Pictographic",
173 				UnicodeData.getPropertyCodePoints("Extended_Pictographic").contains(0x1F588));
174 		assertFalse(
175 				"0 is not in Extended Pictographic",
176 				UnicodeData.getPropertyCodePoints("Extended_Pictographic").contains('0'));
177         }
178 
179         @Test
emojiPresentation()180         public void emojiPresentation() {
181 		assertTrue(
182 				"U+1F4A9 PILE OF POO is in EmojiPresentation=EmojiDefault",
183 				UnicodeData.getPropertyCodePoints("EmojiPresentation=EmojiDefault").contains(0x1F4A9));
184 		assertFalse(
185 				"0 is not in EmojiPresentation=EmojiDefault",
186 				UnicodeData.getPropertyCodePoints("EmojiPresentation=EmojiDefault").contains('0'));
187 		assertFalse(
188 				"A is not in EmojiPresentation=EmojiDefault",
189 				UnicodeData.getPropertyCodePoints("EmojiPresentation=EmojiDefault").contains('A'));
190 		assertFalse(
191 				"U+1F4A9 PILE OF POO is not in EmojiPresentation=TextDefault",
192 				UnicodeData.getPropertyCodePoints("EmojiPresentation=TextDefault").contains(0x1F4A9));
193 		assertTrue(
194 				"0 is in EmojiPresentation=TextDefault",
195 				UnicodeData.getPropertyCodePoints("EmojiPresentation=TextDefault").contains('0'));
196 		assertFalse(
197 				"A is not in EmojiPresentation=TextDefault",
198 				UnicodeData.getPropertyCodePoints("EmojiPresentation=TextDefault").contains('A'));
199         }
200 
201 	@Test
testPropertyCaseInsensitivity()202 	public void testPropertyCaseInsensitivity() {
203 		assertTrue(UnicodeData.getPropertyCodePoints("l").contains('x'));
204 		assertFalse(UnicodeData.getPropertyCodePoints("l").contains('0'));
205 		assertTrue(UnicodeData.getPropertyCodePoints("common").contains('0'));
206 		assertTrue(UnicodeData.getPropertyCodePoints("Alnum").contains('0'));
207 	}
208 
209 	@Test
testPropertyDashSameAsUnderscore()210 	public void testPropertyDashSameAsUnderscore() {
211 		assertTrue(UnicodeData.getPropertyCodePoints("InLatin-1").contains('\u00F0'));
212 	}
213 
214 	@Test
modifyingUnicodeDataShouldThrow()215 	public void modifyingUnicodeDataShouldThrow() {
216 		thrown.expect(IllegalStateException.class);
217 		thrown.expectMessage("can't alter readonly IntervalSet");
218 		UnicodeData.getPropertyCodePoints("L").add(0x12345);
219 	}
220 }
221