1 /* 2 * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. 3 * Use of this file is governed by the BSD 3-clause license that 4 * can be found in the LICENSE.txt file in the project root. 5 */ 6 7 package org.antlr.v4.test.tool; 8 9 import java.util.Map; 10 11 import org.antlr.v4.unicode.UnicodeData; 12 import org.antlr.v4.runtime.misc.IntervalSet; 13 14 import org.junit.Test; 15 import org.junit.Rule; 16 import org.junit.rules.ExpectedException; 17 18 import static org.junit.Assert.assertFalse; 19 import static org.junit.Assert.assertEquals; 20 import static org.junit.Assert.assertTrue; 21 22 public class TestUnicodeData { 23 @Rule 24 public ExpectedException thrown = ExpectedException.none(); 25 26 @Test testUnicodeGeneralCategoriesLatin()27 public void testUnicodeGeneralCategoriesLatin() { 28 assertTrue(UnicodeData.getPropertyCodePoints("Lu").contains('X')); 29 assertFalse(UnicodeData.getPropertyCodePoints("Lu").contains('x')); 30 assertTrue(UnicodeData.getPropertyCodePoints("Ll").contains('x')); 31 assertFalse(UnicodeData.getPropertyCodePoints("Ll").contains('X')); 32 assertTrue(UnicodeData.getPropertyCodePoints("L").contains('X')); 33 assertTrue(UnicodeData.getPropertyCodePoints("L").contains('x')); 34 assertTrue(UnicodeData.getPropertyCodePoints("N").contains('0')); 35 assertTrue(UnicodeData.getPropertyCodePoints("Z").contains(' ')); 36 } 37 38 @Test testUnicodeGeneralCategoriesBMP()39 public void testUnicodeGeneralCategoriesBMP() { 40 assertTrue(UnicodeData.getPropertyCodePoints("Lu").contains('\u1E3A')); 41 assertFalse(UnicodeData.getPropertyCodePoints("Lu").contains('\u1E3B')); 42 assertTrue(UnicodeData.getPropertyCodePoints("Ll").contains('\u1E3B')); 43 assertFalse(UnicodeData.getPropertyCodePoints("Ll").contains('\u1E3A')); 44 assertTrue(UnicodeData.getPropertyCodePoints("L").contains('\u1E3A')); 45 assertTrue(UnicodeData.getPropertyCodePoints("L").contains('\u1E3B')); 46 assertTrue(UnicodeData.getPropertyCodePoints("N").contains('\u1BB0')); 47 assertFalse(UnicodeData.getPropertyCodePoints("N").contains('\u1E3A')); 48 assertTrue(UnicodeData.getPropertyCodePoints("Z").contains('\u2028')); 49 assertFalse(UnicodeData.getPropertyCodePoints("Z").contains('\u1E3A')); 50 } 51 52 @Test testUnicodeGeneralCategoriesSMP()53 public void testUnicodeGeneralCategoriesSMP() { 54 assertTrue(UnicodeData.getPropertyCodePoints("Lu").contains(0x1D5D4)); 55 assertFalse(UnicodeData.getPropertyCodePoints("Lu").contains(0x1D770)); 56 assertTrue(UnicodeData.getPropertyCodePoints("Ll").contains(0x1D770)); 57 assertFalse(UnicodeData.getPropertyCodePoints("Ll").contains(0x1D5D4)); 58 assertTrue(UnicodeData.getPropertyCodePoints("L").contains(0x1D5D4)); 59 assertTrue(UnicodeData.getPropertyCodePoints("L").contains(0x1D770)); 60 assertTrue(UnicodeData.getPropertyCodePoints("N").contains(0x11C50)); 61 assertFalse(UnicodeData.getPropertyCodePoints("N").contains(0x1D5D4)); 62 } 63 64 @Test testUnicodeCategoryAliases()65 public void testUnicodeCategoryAliases() { 66 assertTrue(UnicodeData.getPropertyCodePoints("Lowercase_Letter").contains('x')); 67 assertFalse(UnicodeData.getPropertyCodePoints("Lowercase_Letter").contains('X')); 68 assertTrue(UnicodeData.getPropertyCodePoints("Letter").contains('x')); 69 assertFalse(UnicodeData.getPropertyCodePoints("Letter").contains('0')); 70 assertTrue(UnicodeData.getPropertyCodePoints("Enclosing_Mark").contains(0x20E2)); 71 assertFalse(UnicodeData.getPropertyCodePoints("Enclosing_Mark").contains('x')); 72 } 73 74 @Test testUnicodeBinaryProperties()75 public void testUnicodeBinaryProperties() { 76 assertTrue(UnicodeData.getPropertyCodePoints("Emoji").contains(0x1F4A9)); 77 assertFalse(UnicodeData.getPropertyCodePoints("Emoji").contains('X')); 78 assertTrue(UnicodeData.getPropertyCodePoints("alnum").contains('9')); 79 assertFalse(UnicodeData.getPropertyCodePoints("alnum").contains(0x1F4A9)); 80 assertTrue(UnicodeData.getPropertyCodePoints("Dash").contains('-')); 81 assertTrue(UnicodeData.getPropertyCodePoints("Hex").contains('D')); 82 assertFalse(UnicodeData.getPropertyCodePoints("Hex").contains('Q')); 83 } 84 85 @Test testUnicodeBinaryPropertyAliases()86 public void testUnicodeBinaryPropertyAliases() { 87 assertTrue(UnicodeData.getPropertyCodePoints("Ideo").contains('\u611B')); 88 assertFalse(UnicodeData.getPropertyCodePoints("Ideo").contains('X')); 89 assertTrue(UnicodeData.getPropertyCodePoints("Soft_Dotted").contains('\u0456')); 90 assertFalse(UnicodeData.getPropertyCodePoints("Soft_Dotted").contains('X')); 91 assertTrue(UnicodeData.getPropertyCodePoints("Noncharacter_Code_Point").contains('\uFFFF')); 92 assertFalse(UnicodeData.getPropertyCodePoints("Noncharacter_Code_Point").contains('X')); 93 } 94 95 @Test testUnicodeScripts()96 public void testUnicodeScripts() { 97 assertTrue(UnicodeData.getPropertyCodePoints("Zyyy").contains('0')); 98 assertTrue(UnicodeData.getPropertyCodePoints("Latn").contains('X')); 99 assertTrue(UnicodeData.getPropertyCodePoints("Hani").contains(0x4E04)); 100 assertTrue(UnicodeData.getPropertyCodePoints("Cyrl").contains(0x0404)); 101 } 102 103 @Test testUnicodeScriptEquals()104 public void testUnicodeScriptEquals() { 105 assertTrue(UnicodeData.getPropertyCodePoints("Script=Zyyy").contains('0')); 106 assertTrue(UnicodeData.getPropertyCodePoints("Script=Latn").contains('X')); 107 assertTrue(UnicodeData.getPropertyCodePoints("Script=Hani").contains(0x4E04)); 108 assertTrue(UnicodeData.getPropertyCodePoints("Script=Cyrl").contains(0x0404)); 109 } 110 111 @Test testUnicodeScriptAliases()112 public void testUnicodeScriptAliases() { 113 assertTrue(UnicodeData.getPropertyCodePoints("Common").contains('0')); 114 assertTrue(UnicodeData.getPropertyCodePoints("Latin").contains('X')); 115 assertTrue(UnicodeData.getPropertyCodePoints("Han").contains(0x4E04)); 116 assertTrue(UnicodeData.getPropertyCodePoints("Cyrillic").contains(0x0404)); 117 } 118 119 @Test testUnicodeBlocks()120 public void testUnicodeBlocks() { 121 assertTrue(UnicodeData.getPropertyCodePoints("InASCII").contains('0')); 122 assertTrue(UnicodeData.getPropertyCodePoints("InCJK").contains(0x4E04)); 123 assertTrue(UnicodeData.getPropertyCodePoints("InCyrillic").contains(0x0404)); 124 assertTrue(UnicodeData.getPropertyCodePoints("InMisc_Pictographs").contains(0x1F4A9)); 125 } 126 127 @Test testUnicodeBlockEquals()128 public void testUnicodeBlockEquals() { 129 assertTrue(UnicodeData.getPropertyCodePoints("Block=ASCII").contains('0')); 130 assertTrue(UnicodeData.getPropertyCodePoints("Block=CJK").contains(0x4E04)); 131 assertTrue(UnicodeData.getPropertyCodePoints("Block=Cyrillic").contains(0x0404)); 132 assertTrue(UnicodeData.getPropertyCodePoints("Block=Misc_Pictographs").contains(0x1F4A9)); 133 } 134 135 @Test testUnicodeBlockAliases()136 public void testUnicodeBlockAliases() { 137 assertTrue(UnicodeData.getPropertyCodePoints("InBasic_Latin").contains('0')); 138 assertTrue(UnicodeData.getPropertyCodePoints("InMiscellaneous_Mathematical_Symbols_B").contains(0x29BE)); 139 } 140 141 @Test testEnumeratedPropertyEquals()142 public void testEnumeratedPropertyEquals() { 143 assertTrue( 144 "U+1F481 INFORMATION DESK PERSON is an emoji modifier base", 145 UnicodeData.getPropertyCodePoints("Grapheme_Cluster_Break=E_Base").contains(0x1F481)); 146 147 assertFalse( 148 "U+1F47E ALIEN MONSTER is not an emoji modifier", 149 UnicodeData.getPropertyCodePoints("Grapheme_Cluster_Break=E_Base").contains(0x1F47E)); 150 151 assertTrue( 152 "U+0E33 THAI CHARACTER SARA AM is a spacing mark", 153 UnicodeData.getPropertyCodePoints("Grapheme_Cluster_Break=E_Base").contains(0x1F481)); 154 155 assertFalse( 156 "U+1038 MYANMAR SIGN VISARGA is not a spacing mark", 157 UnicodeData.getPropertyCodePoints("Grapheme_Cluster_Break=E_Base").contains(0x1038)); 158 159 assertTrue( 160 "U+00A1 INVERTED EXCLAMATION MARK has ambiguous East Asian Width", 161 UnicodeData.getPropertyCodePoints("East_Asian_Width=Ambiguous").contains(0x00A1)); 162 163 assertFalse( 164 "U+00A2 CENT SIGN does not have ambiguous East Asian Width", 165 UnicodeData.getPropertyCodePoints("East_Asian_Width=Ambiguous").contains(0x00A2)); 166 167 } 168 169 @Test extendedPictographic()170 public void extendedPictographic() { 171 assertTrue( 172 "U+1F588 BLACK PUSHPIN is in Extended Pictographic", 173 UnicodeData.getPropertyCodePoints("Extended_Pictographic").contains(0x1F588)); 174 assertFalse( 175 "0 is not in Extended Pictographic", 176 UnicodeData.getPropertyCodePoints("Extended_Pictographic").contains('0')); 177 } 178 179 @Test emojiPresentation()180 public void emojiPresentation() { 181 assertTrue( 182 "U+1F4A9 PILE OF POO is in EmojiPresentation=EmojiDefault", 183 UnicodeData.getPropertyCodePoints("EmojiPresentation=EmojiDefault").contains(0x1F4A9)); 184 assertFalse( 185 "0 is not in EmojiPresentation=EmojiDefault", 186 UnicodeData.getPropertyCodePoints("EmojiPresentation=EmojiDefault").contains('0')); 187 assertFalse( 188 "A is not in EmojiPresentation=EmojiDefault", 189 UnicodeData.getPropertyCodePoints("EmojiPresentation=EmojiDefault").contains('A')); 190 assertFalse( 191 "U+1F4A9 PILE OF POO is not in EmojiPresentation=TextDefault", 192 UnicodeData.getPropertyCodePoints("EmojiPresentation=TextDefault").contains(0x1F4A9)); 193 assertTrue( 194 "0 is in EmojiPresentation=TextDefault", 195 UnicodeData.getPropertyCodePoints("EmojiPresentation=TextDefault").contains('0')); 196 assertFalse( 197 "A is not in EmojiPresentation=TextDefault", 198 UnicodeData.getPropertyCodePoints("EmojiPresentation=TextDefault").contains('A')); 199 } 200 201 @Test testPropertyCaseInsensitivity()202 public void testPropertyCaseInsensitivity() { 203 assertTrue(UnicodeData.getPropertyCodePoints("l").contains('x')); 204 assertFalse(UnicodeData.getPropertyCodePoints("l").contains('0')); 205 assertTrue(UnicodeData.getPropertyCodePoints("common").contains('0')); 206 assertTrue(UnicodeData.getPropertyCodePoints("Alnum").contains('0')); 207 } 208 209 @Test testPropertyDashSameAsUnderscore()210 public void testPropertyDashSameAsUnderscore() { 211 assertTrue(UnicodeData.getPropertyCodePoints("InLatin-1").contains('\u00F0')); 212 } 213 214 @Test modifyingUnicodeDataShouldThrow()215 public void modifyingUnicodeDataShouldThrow() { 216 thrown.expect(IllegalStateException.class); 217 thrown.expectMessage("can't alter readonly IntervalSet"); 218 UnicodeData.getPropertyCodePoints("L").add(0x12345); 219 } 220 } 221