1 /*
2  * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
3  * Use of this file is governed by the BSD 3-clause license that
4  * can be found in the LICENSE.txt file in the project root.
5  */
6 
7 package org.antlr.v4.test.tool;
8 
9 import org.antlr.v4.runtime.Token;
10 import org.antlr.v4.tool.Grammar;
11 import org.antlr.v4.tool.LexerGrammar;
12 import org.junit.Test;
13 
14 import java.util.HashSet;
15 import java.util.LinkedHashSet;
16 import java.util.Set;
17 import java.util.StringTokenizer;
18 
19 import static org.junit.Assert.assertEquals;
20 import static org.junit.Assert.assertNotNull;
21 import static org.junit.Assert.assertTrue;
22 
23 public class TestTokenTypeAssignment extends BaseJavaToolTest {
24 
25 	@Test
testParserSimpleTokens()26 		public void testParserSimpleTokens() throws Exception {
27 		Grammar g = new Grammar(
28 				"parser grammar t;\n"+
29 				"a : A | B;\n" +
30 				"b : C ;");
31 		String rules = "a, b";
32 		String tokenNames = "A, B, C";
33 		checkSymbols(g, rules, tokenNames);
34 	}
35 
testParserTokensSection()36 	@Test public void testParserTokensSection() throws Exception {
37 		Grammar g = new Grammar(
38 				"parser grammar t;\n" +
39 				"tokens {\n" +
40 				"  C,\n" +
41 				"  D" +
42 				"}\n"+
43 				"a : A | B;\n" +
44 				"b : C ;");
45 		String rules = "a, b";
46 		String tokenNames = "A, B, C, D";
47 		checkSymbols(g, rules, tokenNames);
48 	}
49 
testLexerTokensSection()50 	@Test public void testLexerTokensSection() throws Exception {
51 		LexerGrammar g = new LexerGrammar(
52 				"lexer grammar t;\n" +
53 				"tokens {\n" +
54 				"  C,\n" +
55 				"  D" +
56 				"}\n"+
57 				"A : 'a';\n" +
58 				"C : 'c' ;");
59 		String rules = "A, C";
60 		String tokenNames = "A, C, D";
61 		checkSymbols(g, rules, tokenNames);
62 	}
63 
testCombinedGrammarLiterals()64 	@Test public void testCombinedGrammarLiterals() throws Exception {
65 		Grammar g = new Grammar(
66 				"grammar t;\n"+
67 				"a : 'begin' b 'end';\n" +
68 				"b : C ';' ;\n" +
69 				"ID : 'a' ;\n" +
70 				"FOO : 'foo' ;\n" +  // "foo" is not a token name
71 				"C : 'c' ;\n");        // nor is 'c'
72 		String rules = "a, b";
73 		String tokenNames = "C, FOO, ID, 'begin', 'end', ';'";
74 		checkSymbols(g, rules, tokenNames);
75 	}
76 
testLiteralInParserAndLexer()77 	@Test public void testLiteralInParserAndLexer() throws Exception {
78 		// 'x' is token and char in lexer rule
79 		Grammar g = new Grammar(
80 				"grammar t;\n" +
81 				"a : 'x' E ; \n" +
82 				"E: 'x' '0' ;\n");
83 
84 		String literals = "['x']";
85 		String foundLiterals = g.stringLiteralToTypeMap.keySet().toString();
86 		assertEquals(literals, foundLiterals);
87 
88 		foundLiterals = g.implicitLexer.stringLiteralToTypeMap.keySet().toString();
89 		assertEquals("['x']", foundLiterals); // pushed in lexer from parser
90 
91 		String[] typeToTokenName = g.getTokenDisplayNames();
92 		Set<String> tokens = new LinkedHashSet<String>();
93 		for (String t : typeToTokenName) if ( t!=null ) tokens.add(t);
94 		assertEquals("[<INVALID>, 'x', E]", tokens.toString());
95 	}
96 
testPredDoesNotHideNameToLiteralMapInLexer()97 	@Test public void testPredDoesNotHideNameToLiteralMapInLexer() throws Exception {
98 		// 'x' is token and char in lexer rule
99 		Grammar g = new Grammar(
100 				"grammar t;\n" +
101 				"a : 'x' X ; \n" +
102 				"X: 'x' {true}?;\n"); // must match as alias even with pred
103 
104 		assertEquals("{'x'=1}", g.stringLiteralToTypeMap.toString());
105 		assertEquals("{EOF=-1, X=1}", g.tokenNameToTypeMap.toString());
106 
107 		// pushed in lexer from parser
108 		assertEquals("{'x'=1}", g.implicitLexer.stringLiteralToTypeMap.toString());
109 		assertEquals("{EOF=-1, X=1}", g.implicitLexer.tokenNameToTypeMap.toString());
110 	}
111 
testCombinedGrammarWithRefToLiteralButNoTokenIDRef()112 	@Test public void testCombinedGrammarWithRefToLiteralButNoTokenIDRef() throws Exception {
113 		Grammar g = new Grammar(
114 				"grammar t;\n"+
115 				"a : 'a' ;\n" +
116 				"A : 'a' ;\n");
117 		String rules = "a";
118 		String tokenNames = "A, 'a'";
119 		checkSymbols(g, rules, tokenNames);
120 	}
121 
testSetDoesNotMissTokenAliases()122 	@Test public void testSetDoesNotMissTokenAliases() throws Exception {
123 		Grammar g = new Grammar(
124 				"grammar t;\n"+
125 				"a : 'a'|'b' ;\n" +
126 				"A : 'a' ;\n" +
127 				"B : 'b' ;\n");
128 		String rules = "a";
129 		String tokenNames = "A, 'a', B, 'b'";
130 		checkSymbols(g, rules, tokenNames);
131 	}
132 
133 	// T E S T  L I T E R A L  E S C A P E S
134 
testParserCharLiteralWithEscape()135 	@Test public void testParserCharLiteralWithEscape() throws Exception {
136 		Grammar g = new Grammar(
137 				"grammar t;\n"+
138 				"a : '\\n';\n");
139 		Set<?> literals = g.stringLiteralToTypeMap.keySet();
140 		// must store literals how they appear in the antlr grammar
141 		assertEquals("'\\n'", literals.toArray()[0]);
142 	}
143 
testParserCharLiteralWithBasicUnicodeEscape()144 	@Test public void testParserCharLiteralWithBasicUnicodeEscape() throws Exception {
145 		Grammar g = new Grammar(
146 				"grammar t;\n"+
147 				"a : '\\uABCD';\n");
148 		Set<?> literals = g.stringLiteralToTypeMap.keySet();
149 		// must store literals how they appear in the antlr grammar
150 		assertEquals("'\\uABCD'", literals.toArray()[0]);
151 	}
152 
testParserCharLiteralWithExtendedUnicodeEscape()153 	@Test public void testParserCharLiteralWithExtendedUnicodeEscape() throws Exception {
154 		Grammar g = new Grammar(
155 				"grammar t;\n"+
156 				"a : '\\u{1ABCD}';\n");
157 		Set<?> literals = g.stringLiteralToTypeMap.keySet();
158 		// must store literals how they appear in the antlr grammar
159 		assertEquals("'\\u{1ABCD}'", literals.toArray()[0]);
160 	}
161 
checkSymbols(Grammar g, String rulesStr, String allValidTokensStr)162 	protected void checkSymbols(Grammar g,
163 								String rulesStr,
164 								String allValidTokensStr)
165 		throws Exception
166 	{
167 		String[] typeToTokenName = g.getTokenNames();
168 		Set<String> tokens = new HashSet<String>();
169 		for (int i = 0; i < typeToTokenName.length; i++) {
170 			String t = typeToTokenName[i];
171 			if ( t!=null ) {
172 				if (t.startsWith(Grammar.AUTO_GENERATED_TOKEN_NAME_PREFIX)) {
173 					tokens.add(g.getTokenDisplayName(i));
174 				}
175 				else {
176 					tokens.add(t);
177 				}
178 			}
179 		}
180 
181 		// make sure expected tokens are there
182 		StringTokenizer st = new StringTokenizer(allValidTokensStr, ", ");
183 		while ( st.hasMoreTokens() ) {
184 			String tokenName = st.nextToken();
185 			assertTrue("token "+tokenName+" expected, but was undefined",
186 					   g.getTokenType(tokenName) != Token.INVALID_TYPE);
187 			tokens.remove(tokenName);
188 		}
189 		// make sure there are not any others (other than <EOF> etc...)
190 		for (String tokenName : tokens) {
191 			assertTrue("unexpected token name "+tokenName,
192 					   g.getTokenType(tokenName) < Token.MIN_USER_TOKEN_TYPE);
193 		}
194 
195 		// make sure all expected rules are there
196 		st = new StringTokenizer(rulesStr, ", ");
197 		int n = 0;
198 		while ( st.hasMoreTokens() ) {
199 			String ruleName = st.nextToken();
200 			assertNotNull("rule "+ruleName+" expected", g.getRule(ruleName));
201 			n++;
202 		}
203 		//System.out.println("rules="+rules);
204 		// make sure there are no extra rules
205 		assertEquals("number of rules mismatch; expecting "+n+"; found "+g.rules.size(),
206 					 n, g.rules.size());
207 
208 	}
209 
210 }
211