1 /*
2  * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
3  * Use of this file is governed by the BSD 3-clause license that
4  * can be found in the LICENSE.txt file in the project root.
5  */
6 
7 package org.antlr.v4.test.tool;
8 
9 import org.antlr.v4.runtime.CharStream;
10 import org.antlr.v4.runtime.CommonTokenFactory;
11 import org.antlr.v4.runtime.CommonTokenStream;
12 import org.antlr.v4.runtime.IntStream;
13 import org.antlr.v4.runtime.LexerInterpreter;
14 import org.antlr.v4.runtime.UnbufferedCharStream;
15 import org.antlr.v4.runtime.misc.Interval;
16 import org.antlr.v4.tool.LexerGrammar;
17 import org.junit.Before;
18 import org.junit.Test;
19 
20 import java.io.Reader;
21 import java.io.StringReader;
22 
23 import static org.junit.Assert.assertEquals;
24 
25 @SuppressWarnings("unused")
26 public class TestUnbufferedCharStream extends BaseJavaToolTest {
27 	@Before
28 	@Override
testSetUp()29 	public void testSetUp() throws Exception {
30 		super.testSetUp();
31 	}
32 
testNoChar()33 	@Test public void testNoChar() throws Exception {
34 		CharStream input = createStream("");
35 		assertEquals(IntStream.EOF, input.LA(1));
36 		assertEquals(IntStream.EOF, input.LA(2));
37 	}
38 
39 	/**
40 	 * The {@link IntStream} interface does not specify the behavior when the
41 	 * EOF symbol is consumed, but {@link UnbufferedCharStream} handles this
42 	 * particular case by throwing an {@link IllegalStateException}.
43 	 */
44 	@Test(expected = IllegalStateException.class)
testConsumeEOF()45 	public void testConsumeEOF() throws Exception {
46 		CharStream input = createStream("");
47 		assertEquals(IntStream.EOF, input.LA(1));
48 		input.consume();
49 		input.consume();
50 	}
51 
52 	@Test(expected = IllegalArgumentException.class)
testNegativeSeek()53 	public void testNegativeSeek() {
54 		CharStream input = createStream("");
55 		input.seek(-1);
56 	}
57 
58 	@Test
testSeekPastEOF()59 	public void testSeekPastEOF() {
60 		CharStream input = createStream("");
61 		assertEquals(0, input.index());
62 		input.seek(1);
63 		assertEquals(0, input.index());
64 	}
65 
66 	/**
67 	 * The {@link IntStream} interface does not specify the behavior when marks
68 	 * are not released in the reversed order they were created, but
69 	 * {@link UnbufferedCharStream} handles this case by throwing an
70 	 * {@link IllegalStateException}.
71 	 */
72 	@Test(expected = IllegalStateException.class)
testMarkReleaseOutOfOrder()73 	public void testMarkReleaseOutOfOrder() {
74 		CharStream input = createStream("");
75 		int m1 = input.mark();
76 		int m2 = input.mark();
77 		input.release(m1);
78 	}
79 
80 	/**
81 	 * The {@link IntStream} interface does not specify the behavior when a mark
82 	 * is released twice, but {@link UnbufferedCharStream} handles this case by
83 	 * throwing an {@link IllegalStateException}.
84 	 */
85 	@Test(expected = IllegalStateException.class)
testMarkReleasedTwice()86 	public void testMarkReleasedTwice() {
87 		CharStream input = createStream("");
88 		int m1 = input.mark();
89 		input.release(m1);
90 		input.release(m1);
91 	}
92 
93 	/**
94 	 * The {@link IntStream} interface does not specify the behavior when a mark
95 	 * is released twice, but {@link UnbufferedCharStream} handles this case by
96 	 * throwing an {@link IllegalStateException}.
97 	 */
98 	@Test(expected = IllegalStateException.class)
testNestedMarkReleasedTwice()99 	public void testNestedMarkReleasedTwice() {
100 		CharStream input = createStream("");
101 		int m1 = input.mark();
102 		int m2 = input.mark();
103 		input.release(m2);
104 		input.release(m2);
105 	}
106 
107 	/**
108 	 * It is not valid to pass a mark to {@link IntStream#seek}, but
109 	 * {@link UnbufferedCharStream} creates marks in such a way that this
110 	 * invalid usage results in an {@link IllegalArgumentException}.
111 	 */
112 	@Test(expected = IllegalArgumentException.class)
testMarkPassedToSeek()113 	public void testMarkPassedToSeek() {
114 		CharStream input = createStream("");
115 		int m1 = input.mark();
116 		input.seek(m1);
117 	}
118 
119 	@Test(expected = IllegalArgumentException.class)
testSeekBeforeBufferStart()120 	public void testSeekBeforeBufferStart() {
121 		CharStream input = createStream("xyz");
122 		input.consume();
123 		int m1 = input.mark();
124 		assertEquals(1, input.index());
125 		input.consume();
126 		input.seek(0);
127 	}
128 
129 	@Test(expected = UnsupportedOperationException.class)
testGetTextBeforeBufferStart()130 	public void testGetTextBeforeBufferStart() {
131 		CharStream input = createStream("xyz");
132 		input.consume();
133 		int m1 = input.mark();
134 		assertEquals(1, input.index());
135 		input.getText(new Interval(0, 1));
136 	}
137 
138 	@Test
testGetTextInMarkedRange()139 	public void testGetTextInMarkedRange() {
140 		CharStream input = createStream("xyz");
141 		input.consume();
142 		int m1 = input.mark();
143 		assertEquals(1, input.index());
144 		input.consume();
145 		input.consume();
146 		assertEquals("yz", input.getText(new Interval(1, 2)));
147 	}
148 
149 	@Test
testLastChar()150 	public void testLastChar() {
151 		CharStream input = createStream("abcdef");
152 
153 		input.consume();
154 		assertEquals('a', input.LA(-1));
155 
156 		int m1 = input.mark();
157 		input.consume();
158 		input.consume();
159 		input.consume();
160 		assertEquals('d', input.LA(-1));
161 
162 		input.seek(2);
163 		assertEquals('b', input.LA(-1));
164 
165 		input.release(m1);
166 		input.seek(3);
167 		assertEquals('c', input.LA(-1));
168 		// this special case is not required by the IntStream interface, but
169 		// UnbufferedCharStream allows it so we have to make sure the resulting
170 		// state is consistent
171 		input.seek(2);
172 		assertEquals('b', input.LA(-1));
173 	}
174 
test1Char()175 	@Test public void test1Char() throws Exception {
176 		TestingUnbufferedCharStream input = createStream("x");
177 		assertEquals('x', input.LA(1));
178 		input.consume();
179 		assertEquals(IntStream.EOF, input.LA(1));
180 		String r = input.getRemainingBuffer();
181 		assertEquals("\uFFFF", r); // shouldn't include x
182 		assertEquals("\uFFFF", input.getBuffer()); // whole buffer
183 	}
184 
test2Char()185 	@Test public void test2Char() throws Exception {
186 		TestingUnbufferedCharStream input = createStream("xy");
187 		assertEquals('x', input.LA(1));
188 		input.consume();
189 		assertEquals('y', input.LA(1));
190 		assertEquals("y", input.getRemainingBuffer()); // shouldn't include x
191 		assertEquals("y", input.getBuffer());
192 		input.consume();
193 		assertEquals(IntStream.EOF, input.LA(1));
194 		assertEquals("\uFFFF", input.getBuffer());
195 	}
196 
test2CharAhead()197     @Test public void test2CharAhead() throws Exception {
198    		CharStream input = createStream("xy");
199    		assertEquals('x', input.LA(1));
200    		assertEquals('y', input.LA(2));
201    		assertEquals(IntStream.EOF, input.LA(3));
202    	}
203 
testBufferExpand()204     @Test public void testBufferExpand() throws Exception {
205 		TestingUnbufferedCharStream input = createStream("01234", 2);
206    		assertEquals('0', input.LA(1));
207         assertEquals('1', input.LA(2));
208         assertEquals('2', input.LA(3));
209         assertEquals('3', input.LA(4));
210         assertEquals('4', input.LA(5));
211 		assertEquals("01234", input.getBuffer());
212    		assertEquals(IntStream.EOF, input.LA(6));
213    	}
214 
testBufferWrapSize1()215     @Test public void testBufferWrapSize1() throws Exception {
216    		CharStream input = createStream("01234", 1);
217         assertEquals('0', input.LA(1));
218         input.consume();
219         assertEquals('1', input.LA(1));
220         input.consume();
221         assertEquals('2', input.LA(1));
222         input.consume();
223         assertEquals('3', input.LA(1));
224         input.consume();
225         assertEquals('4', input.LA(1));
226         input.consume();
227    		assertEquals(IntStream.EOF, input.LA(1));
228    	}
229 
testBufferWrapSize2()230     @Test public void testBufferWrapSize2() throws Exception {
231    		CharStream input = createStream("01234", 2);
232         assertEquals('0', input.LA(1));
233         input.consume();
234         assertEquals('1', input.LA(1));
235         input.consume();
236         assertEquals('2', input.LA(1));
237         input.consume();
238         assertEquals('3', input.LA(1));
239         input.consume();
240         assertEquals('4', input.LA(1));
241         input.consume();
242    		assertEquals(IntStream.EOF, input.LA(1));
243    	}
244 
test1Mark()245 	@Test public void test1Mark() throws Exception {
246 		TestingUnbufferedCharStream input = createStream("xyz");
247 		int m = input.mark();
248 		assertEquals('x', input.LA(1));
249 		assertEquals('y', input.LA(2));
250 		assertEquals('z', input.LA(3));
251 		input.release(m);
252 		assertEquals(IntStream.EOF, input.LA(4));
253 		assertEquals("xyz\uFFFF", input.getBuffer());
254 	}
255 
test1MarkWithConsumesInSequence()256 	@Test public void test1MarkWithConsumesInSequence() throws Exception {
257 		TestingUnbufferedCharStream input = createStream("xyz");
258 		int m = input.mark();
259 		input.consume(); // x, moves to y
260 		input.consume(); // y
261 		input.consume(); // z, moves to EOF
262 		assertEquals(IntStream.EOF, input.LA(1));
263 		assertEquals("xyz\uFFFF", input.getBuffer());
264 		input.release(m); // wipes buffer
265 		assertEquals("\uFFFF", input.getBuffer());
266 	}
267 
test2Mark()268     @Test public void test2Mark() throws Exception {
269 		TestingUnbufferedCharStream input = createStream("xyz", 100);
270    		assertEquals('x', input.LA(1));
271         input.consume(); // reset buffer index (p) to 0
272         int m1 = input.mark();
273    		assertEquals('y', input.LA(1));
274         input.consume();
275         int m2 = input.mark();
276 		assertEquals("yz", input.getBuffer());
277         input.release(m2); // drop to 1 marker
278         input.consume();
279         input.release(m1); // shifts remaining char to beginning
280    		assertEquals(IntStream.EOF, input.LA(1));
281 		assertEquals("\uFFFF", input.getBuffer());
282    	}
283 
testAFewTokens()284     @Test public void testAFewTokens() throws Exception {
285         LexerGrammar g = new LexerGrammar(
286                 "lexer grammar t;\n"+
287 				"ID : 'a'..'z'+;\n" +
288 				"INT : '0'..'9'+;\n" +
289 				"SEMI : ';';\n" +
290 				"ASSIGN : '=';\n" +
291 				"PLUS : '+';\n" +
292 				"MULT : '*';\n" +
293 				"WS : ' '+;\n");
294         // Tokens: 012345678901234567
295         // Input:  x = 3 * 0 + 2 * 0;
296 		TestingUnbufferedCharStream input = createStream("x = 302 * 91 + 20234234 * 0;");
297         LexerInterpreter lexEngine = g.createLexerInterpreter(input);
298 		// copy text into tokens from char stream
299 		lexEngine.setTokenFactory(new CommonTokenFactory(true));
300 		CommonTokenStream tokens = new CommonTokenStream(lexEngine);
301         String result = tokens.LT(1).getText();
302         String expecting = "x";
303         assertEquals(expecting, result);
304 		tokens.fill();
305 		expecting =
306 			"[[@0,0:0='x',<1>,1:0], [@1,1:1=' ',<7>,1:1], [@2,2:2='=',<4>,1:2]," +
307 			" [@3,3:3=' ',<7>,1:3], [@4,4:6='302',<2>,1:4], [@5,7:7=' ',<7>,1:7]," +
308 			" [@6,8:8='*',<6>,1:8], [@7,9:9=' ',<7>,1:9], [@8,10:11='91',<2>,1:10]," +
309 			" [@9,12:12=' ',<7>,1:12], [@10,13:13='+',<5>,1:13], [@11,14:14=' ',<7>,1:14]," +
310 			" [@12,15:22='20234234',<2>,1:15], [@13,23:23=' ',<7>,1:23]," +
311 			" [@14,24:24='*',<6>,1:24], [@15,25:25=' ',<7>,1:25], [@16,26:26='0',<2>,1:26]," +
312 			" [@17,27:27=';',<3>,1:27], [@18,28:27='',<-1>,1:28]]";
313 		assertEquals(expecting, tokens.getTokens().toString());
314     }
315 
testUnicodeSMP()316 	@Test public void testUnicodeSMP() throws Exception {
317 		TestingUnbufferedCharStream input = createStream("\uD83C\uDF0E");
318 		assertEquals(0x1F30E, input.LA(1));
319 		assertEquals("\uD83C\uDF0E", input.getBuffer());
320 		input.consume();
321 		assertEquals(IntStream.EOF, input.LA(1));
322 		assertEquals("\uFFFF", input.getBuffer());
323 	}
324 
325 	@Test(expected = RuntimeException.class)
testDanglingHighSurrogateAtEOFThrows()326 	public void testDanglingHighSurrogateAtEOFThrows() throws Exception {
327 		createStream("\uD83C");
328 	}
329 
330 	@Test(expected = RuntimeException.class)
testDanglingHighSurrogateThrows()331 	public void testDanglingHighSurrogateThrows() throws Exception {
332 		createStream("\uD83C\u0123");
333 	}
334 
335 	@Test(expected = RuntimeException.class)
testDanglingLowSurrogateThrows()336 	public void testDanglingLowSurrogateThrows() throws Exception {
337 		createStream("\uDF0E");
338 	}
339 
createStream(String text)340 	protected static TestingUnbufferedCharStream createStream(String text) {
341 		return new TestingUnbufferedCharStream(new StringReader(text));
342 	}
343 
createStream(String text, int bufferSize)344 	protected static TestingUnbufferedCharStream createStream(String text, int bufferSize) {
345 		return new TestingUnbufferedCharStream(new StringReader(text), bufferSize);
346 	}
347 
348 	protected static class TestingUnbufferedCharStream extends UnbufferedCharStream {
349 
TestingUnbufferedCharStream(Reader input)350 		public TestingUnbufferedCharStream(Reader input) {
351 			super(input);
352 		}
353 
TestingUnbufferedCharStream(Reader input, int bufferSize)354 		public TestingUnbufferedCharStream(Reader input, int bufferSize) {
355 			super(input, bufferSize);
356 		}
357 
358 		/** For testing.  What's in moving window into data stream from
359 		 *  current index, LA(1) or data[p], to end of buffer?
360 		 */
getRemainingBuffer()361 		public String getRemainingBuffer() {
362 			if ( n==0 ) return "";
363 			int len = n;
364 			if (data[len-1] == IntStream.EOF) {
365 				// Don't pass -1 to new String().
366 				return new String(data,p,len-p-1) + "\uFFFF";
367 			} else {
368 				return new String(data,p,len-p);
369 			}
370 		}
371 
372 		/** For testing.  What's in moving window buffer into data stream.
373 		 *  From 0..p-1 have been consume.
374 		 */
getBuffer()375 		public String getBuffer() {
376 			if ( n==0 ) return "";
377 			int len = n;
378 			// Don't pass -1 to new String().
379 			if (data[len-1] == IntStream.EOF) {
380 				// Don't pass -1 to new String().
381 				return new String(data,0,len-1) + "\uFFFF";
382 			} else {
383 				return new String(data,0,len);
384 			}
385 		}
386 
387 	}
388 }
389