1 /*
2  * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
3  * Use of this file is governed by the BSD 3-clause license that
4  * can be found in the LICENSE.txt file in the project root.
5  */
6 
7 package org.antlr.v4.runtime.atn;
8 
9 import org.antlr.v4.runtime.CharStream;
10 import org.antlr.v4.runtime.IntStream;
11 import org.antlr.v4.runtime.Lexer;
12 import org.antlr.v4.runtime.dfa.DFA;
13 import org.antlr.v4.runtime.misc.MurmurHash;
14 
15 import java.util.Arrays;
16 
17 /**
18  * Represents an executor for a sequence of lexer actions which traversed during
19  * the matching operation of a lexer rule (token).
20  *
21  * <p>The executor tracks position information for position-dependent lexer actions
22  * efficiently, ensuring that actions appearing only at the end of the rule do
23  * not cause bloating of the {@link DFA} created for the lexer.</p>
24  *
25  * @author Sam Harwell
26  * @since 4.2
27  */
28 public class LexerActionExecutor {
29 
30 	private final LexerAction[] lexerActions;
31 	/**
32 	 * Caches the result of {@link #hashCode} since the hash code is an element
33 	 * of the performance-critical {@link LexerATNConfig#hashCode} operation.
34 	 */
35 	private final int hashCode;
36 
37 	/**
38 	 * Constructs an executor for a sequence of {@link LexerAction} actions.
39 	 * @param lexerActions The lexer actions to execute.
40 	 */
LexerActionExecutor(LexerAction[] lexerActions)41 	public LexerActionExecutor(LexerAction[] lexerActions) {
42 		this.lexerActions = lexerActions;
43 
44 		int hash = MurmurHash.initialize();
45 		for (LexerAction lexerAction : lexerActions) {
46 			hash = MurmurHash.update(hash, lexerAction);
47 		}
48 
49 		this.hashCode = MurmurHash.finish(hash, lexerActions.length);
50 	}
51 
52 	/**
53 	 * Creates a {@link LexerActionExecutor} which executes the actions for
54 	 * the input {@code lexerActionExecutor} followed by a specified
55 	 * {@code lexerAction}.
56 	 *
57 	 * @param lexerActionExecutor The executor for actions already traversed by
58 	 * the lexer while matching a token within a particular
59 	 * {@link LexerATNConfig}. If this is {@code null}, the method behaves as
60 	 * though it were an empty executor.
61 	 * @param lexerAction The lexer action to execute after the actions
62 	 * specified in {@code lexerActionExecutor}.
63 	 *
64 	 * @return A {@link LexerActionExecutor} for executing the combine actions
65 	 * of {@code lexerActionExecutor} and {@code lexerAction}.
66 	 */
append(LexerActionExecutor lexerActionExecutor, LexerAction lexerAction)67 	public static LexerActionExecutor append(LexerActionExecutor lexerActionExecutor, LexerAction lexerAction) {
68 		if (lexerActionExecutor == null) {
69 			return new LexerActionExecutor(new LexerAction[] { lexerAction });
70 		}
71 
72 		LexerAction[] lexerActions = Arrays.copyOf(lexerActionExecutor.lexerActions, lexerActionExecutor.lexerActions.length + 1);
73 		lexerActions[lexerActions.length - 1] = lexerAction;
74 		return new LexerActionExecutor(lexerActions);
75 	}
76 
77 	/**
78 	 * Creates a {@link LexerActionExecutor} which encodes the current offset
79 	 * for position-dependent lexer actions.
80 	 *
81 	 * <p>Normally, when the executor encounters lexer actions where
82 	 * {@link LexerAction#isPositionDependent} returns {@code true}, it calls
83 	 * {@link IntStream#seek} on the input {@link CharStream} to set the input
84 	 * position to the <em>end</em> of the current token. This behavior provides
85 	 * for efficient DFA representation of lexer actions which appear at the end
86 	 * of a lexer rule, even when the lexer rule matches a variable number of
87 	 * characters.</p>
88 	 *
89 	 * <p>Prior to traversing a match transition in the ATN, the current offset
90 	 * from the token start index is assigned to all position-dependent lexer
91 	 * actions which have not already been assigned a fixed offset. By storing
92 	 * the offsets relative to the token start index, the DFA representation of
93 	 * lexer actions which appear in the middle of tokens remains efficient due
94 	 * to sharing among tokens of the same length, regardless of their absolute
95 	 * position in the input stream.</p>
96 	 *
97 	 * <p>If the current executor already has offsets assigned to all
98 	 * position-dependent lexer actions, the method returns {@code this}.</p>
99 	 *
100 	 * @param offset The current offset to assign to all position-dependent
101 	 * lexer actions which do not already have offsets assigned.
102 	 *
103 	 * @return A {@link LexerActionExecutor} which stores input stream offsets
104 	 * for all position-dependent lexer actions.
105 	 */
fixOffsetBeforeMatch(int offset)106 	public LexerActionExecutor fixOffsetBeforeMatch(int offset) {
107 		LexerAction[] updatedLexerActions = null;
108 		for (int i = 0; i < lexerActions.length; i++) {
109 			if (lexerActions[i].isPositionDependent() && !(lexerActions[i] instanceof LexerIndexedCustomAction)) {
110 				if (updatedLexerActions == null) {
111 					updatedLexerActions = lexerActions.clone();
112 				}
113 
114 				updatedLexerActions[i] = new LexerIndexedCustomAction(offset, lexerActions[i]);
115 			}
116 		}
117 
118 		if (updatedLexerActions == null) {
119 			return this;
120 		}
121 
122 		return new LexerActionExecutor(updatedLexerActions);
123 	}
124 
125 	/**
126 	 * Gets the lexer actions to be executed by this executor.
127 	 * @return The lexer actions to be executed by this executor.
128 	 */
getLexerActions()129 	public LexerAction[] getLexerActions() {
130 		return lexerActions;
131 	}
132 
133 	/**
134 	 * Execute the actions encapsulated by this executor within the context of a
135 	 * particular {@link Lexer}.
136 	 *
137 	 * <p>This method calls {@link IntStream#seek} to set the position of the
138 	 * {@code input} {@link CharStream} prior to calling
139 	 * {@link LexerAction#execute} on a position-dependent action. Before the
140 	 * method returns, the input position will be restored to the same position
141 	 * it was in when the method was invoked.</p>
142 	 *
143 	 * @param lexer The lexer instance.
144 	 * @param input The input stream which is the source for the current token.
145 	 * When this method is called, the current {@link IntStream#index} for
146 	 * {@code input} should be the start of the following token, i.e. 1
147 	 * character past the end of the current token.
148 	 * @param startIndex The token start index. This value may be passed to
149 	 * {@link IntStream#seek} to set the {@code input} position to the beginning
150 	 * of the token.
151 	 */
execute(Lexer lexer, CharStream input, int startIndex)152 	public void execute(Lexer lexer, CharStream input, int startIndex) {
153 		boolean requiresSeek = false;
154 		int stopIndex = input.index();
155 		try {
156 			for (LexerAction lexerAction : lexerActions) {
157 				if (lexerAction instanceof LexerIndexedCustomAction) {
158 					int offset = ((LexerIndexedCustomAction)lexerAction).getOffset();
159 					input.seek(startIndex + offset);
160 					lexerAction = ((LexerIndexedCustomAction)lexerAction).getAction();
161 					requiresSeek = (startIndex + offset) != stopIndex;
162 				}
163 				else if (lexerAction.isPositionDependent()) {
164 					input.seek(stopIndex);
165 					requiresSeek = false;
166 				}
167 
168 				lexerAction.execute(lexer);
169 			}
170 		}
171 		finally {
172 			if (requiresSeek) {
173 				input.seek(stopIndex);
174 			}
175 		}
176 	}
177 
178 	@Override
hashCode()179 	public int hashCode() {
180 		return this.hashCode;
181 	}
182 
183 	@Override
equals(Object obj)184 	public boolean equals(Object obj) {
185 		if (obj == this) {
186 			return true;
187 		}
188 		else if (!(obj instanceof LexerActionExecutor)) {
189 			return false;
190 		}
191 
192 		LexerActionExecutor other = (LexerActionExecutor)obj;
193 		return hashCode == other.hashCode
194 			&& Arrays.equals(lexerActions, other.lexerActions);
195 	}
196 }
197