1 /* 2 * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. 3 * Use of this file is governed by the BSD 3-clause license that 4 * can be found in the LICENSE.txt file in the project root. 5 */ 6 7 package org.antlr.v4.test.tool; 8 9 import org.antlr.v4.runtime.Token; 10 import org.antlr.v4.tool.Grammar; 11 import org.antlr.v4.tool.LexerGrammar; 12 import org.junit.Test; 13 14 import java.util.HashSet; 15 import java.util.LinkedHashSet; 16 import java.util.Set; 17 import java.util.StringTokenizer; 18 19 import static org.junit.Assert.assertEquals; 20 import static org.junit.Assert.assertNotNull; 21 import static org.junit.Assert.assertTrue; 22 23 public class TestTokenTypeAssignment extends BaseJavaToolTest { 24 25 @Test testParserSimpleTokens()26 public void testParserSimpleTokens() throws Exception { 27 Grammar g = new Grammar( 28 "parser grammar t;\n"+ 29 "a : A | B;\n" + 30 "b : C ;"); 31 String rules = "a, b"; 32 String tokenNames = "A, B, C"; 33 checkSymbols(g, rules, tokenNames); 34 } 35 testParserTokensSection()36 @Test public void testParserTokensSection() throws Exception { 37 Grammar g = new Grammar( 38 "parser grammar t;\n" + 39 "tokens {\n" + 40 " C,\n" + 41 " D" + 42 "}\n"+ 43 "a : A | B;\n" + 44 "b : C ;"); 45 String rules = "a, b"; 46 String tokenNames = "A, B, C, D"; 47 checkSymbols(g, rules, tokenNames); 48 } 49 testLexerTokensSection()50 @Test public void testLexerTokensSection() throws Exception { 51 LexerGrammar g = new LexerGrammar( 52 "lexer grammar t;\n" + 53 "tokens {\n" + 54 " C,\n" + 55 " D" + 56 "}\n"+ 57 "A : 'a';\n" + 58 "C : 'c' ;"); 59 String rules = "A, C"; 60 String tokenNames = "A, C, D"; 61 checkSymbols(g, rules, tokenNames); 62 } 63 testCombinedGrammarLiterals()64 @Test public void testCombinedGrammarLiterals() throws Exception { 65 Grammar g = new Grammar( 66 "grammar t;\n"+ 67 "a : 'begin' b 'end';\n" + 68 "b : C ';' ;\n" + 69 "ID : 'a' ;\n" + 70 "FOO : 'foo' ;\n" + // "foo" is not a token name 71 "C : 'c' ;\n"); // nor is 'c' 72 String rules = "a, b"; 73 String tokenNames = "C, FOO, ID, 'begin', 'end', ';'"; 74 checkSymbols(g, rules, tokenNames); 75 } 76 testLiteralInParserAndLexer()77 @Test public void testLiteralInParserAndLexer() throws Exception { 78 // 'x' is token and char in lexer rule 79 Grammar g = new Grammar( 80 "grammar t;\n" + 81 "a : 'x' E ; \n" + 82 "E: 'x' '0' ;\n"); 83 84 String literals = "['x']"; 85 String foundLiterals = g.stringLiteralToTypeMap.keySet().toString(); 86 assertEquals(literals, foundLiterals); 87 88 foundLiterals = g.implicitLexer.stringLiteralToTypeMap.keySet().toString(); 89 assertEquals("['x']", foundLiterals); // pushed in lexer from parser 90 91 String[] typeToTokenName = g.getTokenDisplayNames(); 92 Set<String> tokens = new LinkedHashSet<String>(); 93 for (String t : typeToTokenName) if ( t!=null ) tokens.add(t); 94 assertEquals("[<INVALID>, 'x', E]", tokens.toString()); 95 } 96 testPredDoesNotHideNameToLiteralMapInLexer()97 @Test public void testPredDoesNotHideNameToLiteralMapInLexer() throws Exception { 98 // 'x' is token and char in lexer rule 99 Grammar g = new Grammar( 100 "grammar t;\n" + 101 "a : 'x' X ; \n" + 102 "X: 'x' {true}?;\n"); // must match as alias even with pred 103 104 assertEquals("{'x'=1}", g.stringLiteralToTypeMap.toString()); 105 assertEquals("{EOF=-1, X=1}", g.tokenNameToTypeMap.toString()); 106 107 // pushed in lexer from parser 108 assertEquals("{'x'=1}", g.implicitLexer.stringLiteralToTypeMap.toString()); 109 assertEquals("{EOF=-1, X=1}", g.implicitLexer.tokenNameToTypeMap.toString()); 110 } 111 testCombinedGrammarWithRefToLiteralButNoTokenIDRef()112 @Test public void testCombinedGrammarWithRefToLiteralButNoTokenIDRef() throws Exception { 113 Grammar g = new Grammar( 114 "grammar t;\n"+ 115 "a : 'a' ;\n" + 116 "A : 'a' ;\n"); 117 String rules = "a"; 118 String tokenNames = "A, 'a'"; 119 checkSymbols(g, rules, tokenNames); 120 } 121 testSetDoesNotMissTokenAliases()122 @Test public void testSetDoesNotMissTokenAliases() throws Exception { 123 Grammar g = new Grammar( 124 "grammar t;\n"+ 125 "a : 'a'|'b' ;\n" + 126 "A : 'a' ;\n" + 127 "B : 'b' ;\n"); 128 String rules = "a"; 129 String tokenNames = "A, 'a', B, 'b'"; 130 checkSymbols(g, rules, tokenNames); 131 } 132 133 // T E S T L I T E R A L E S C A P E S 134 testParserCharLiteralWithEscape()135 @Test public void testParserCharLiteralWithEscape() throws Exception { 136 Grammar g = new Grammar( 137 "grammar t;\n"+ 138 "a : '\\n';\n"); 139 Set<?> literals = g.stringLiteralToTypeMap.keySet(); 140 // must store literals how they appear in the antlr grammar 141 assertEquals("'\\n'", literals.toArray()[0]); 142 } 143 testParserCharLiteralWithBasicUnicodeEscape()144 @Test public void testParserCharLiteralWithBasicUnicodeEscape() throws Exception { 145 Grammar g = new Grammar( 146 "grammar t;\n"+ 147 "a : '\\uABCD';\n"); 148 Set<?> literals = g.stringLiteralToTypeMap.keySet(); 149 // must store literals how they appear in the antlr grammar 150 assertEquals("'\\uABCD'", literals.toArray()[0]); 151 } 152 testParserCharLiteralWithExtendedUnicodeEscape()153 @Test public void testParserCharLiteralWithExtendedUnicodeEscape() throws Exception { 154 Grammar g = new Grammar( 155 "grammar t;\n"+ 156 "a : '\\u{1ABCD}';\n"); 157 Set<?> literals = g.stringLiteralToTypeMap.keySet(); 158 // must store literals how they appear in the antlr grammar 159 assertEquals("'\\u{1ABCD}'", literals.toArray()[0]); 160 } 161 checkSymbols(Grammar g, String rulesStr, String allValidTokensStr)162 protected void checkSymbols(Grammar g, 163 String rulesStr, 164 String allValidTokensStr) 165 throws Exception 166 { 167 String[] typeToTokenName = g.getTokenNames(); 168 Set<String> tokens = new HashSet<String>(); 169 for (int i = 0; i < typeToTokenName.length; i++) { 170 String t = typeToTokenName[i]; 171 if ( t!=null ) { 172 if (t.startsWith(Grammar.AUTO_GENERATED_TOKEN_NAME_PREFIX)) { 173 tokens.add(g.getTokenDisplayName(i)); 174 } 175 else { 176 tokens.add(t); 177 } 178 } 179 } 180 181 // make sure expected tokens are there 182 StringTokenizer st = new StringTokenizer(allValidTokensStr, ", "); 183 while ( st.hasMoreTokens() ) { 184 String tokenName = st.nextToken(); 185 assertTrue("token "+tokenName+" expected, but was undefined", 186 g.getTokenType(tokenName) != Token.INVALID_TYPE); 187 tokens.remove(tokenName); 188 } 189 // make sure there are not any others (other than <EOF> etc...) 190 for (String tokenName : tokens) { 191 assertTrue("unexpected token name "+tokenName, 192 g.getTokenType(tokenName) < Token.MIN_USER_TOKEN_TYPE); 193 } 194 195 // make sure all expected rules are there 196 st = new StringTokenizer(rulesStr, ", "); 197 int n = 0; 198 while ( st.hasMoreTokens() ) { 199 String ruleName = st.nextToken(); 200 assertNotNull("rule "+ruleName+" expected", g.getRule(ruleName)); 201 n++; 202 } 203 //System.out.println("rules="+rules); 204 // make sure there are no extra rules 205 assertEquals("number of rules mismatch; expecting "+n+"; found "+g.rules.size(), 206 n, g.rules.size()); 207 208 } 209 210 } 211