1 /* 2 * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. 3 * Use of this file is governed by the BSD 3-clause license that 4 * can be found in the LICENSE.txt file in the project root. 5 */ 6 7 package org.antlr.v4.runtime.atn; 8 9 import org.antlr.v4.runtime.CharStream; 10 import org.antlr.v4.runtime.IntStream; 11 import org.antlr.v4.runtime.Lexer; 12 import org.antlr.v4.runtime.dfa.DFA; 13 import org.antlr.v4.runtime.misc.MurmurHash; 14 15 import java.util.Arrays; 16 17 /** 18 * Represents an executor for a sequence of lexer actions which traversed during 19 * the matching operation of a lexer rule (token). 20 * 21 * <p>The executor tracks position information for position-dependent lexer actions 22 * efficiently, ensuring that actions appearing only at the end of the rule do 23 * not cause bloating of the {@link DFA} created for the lexer.</p> 24 * 25 * @author Sam Harwell 26 * @since 4.2 27 */ 28 public class LexerActionExecutor { 29 30 private final LexerAction[] lexerActions; 31 /** 32 * Caches the result of {@link #hashCode} since the hash code is an element 33 * of the performance-critical {@link LexerATNConfig#hashCode} operation. 34 */ 35 private final int hashCode; 36 37 /** 38 * Constructs an executor for a sequence of {@link LexerAction} actions. 39 * @param lexerActions The lexer actions to execute. 40 */ LexerActionExecutor(LexerAction[] lexerActions)41 public LexerActionExecutor(LexerAction[] lexerActions) { 42 this.lexerActions = lexerActions; 43 44 int hash = MurmurHash.initialize(); 45 for (LexerAction lexerAction : lexerActions) { 46 hash = MurmurHash.update(hash, lexerAction); 47 } 48 49 this.hashCode = MurmurHash.finish(hash, lexerActions.length); 50 } 51 52 /** 53 * Creates a {@link LexerActionExecutor} which executes the actions for 54 * the input {@code lexerActionExecutor} followed by a specified 55 * {@code lexerAction}. 56 * 57 * @param lexerActionExecutor The executor for actions already traversed by 58 * the lexer while matching a token within a particular 59 * {@link LexerATNConfig}. If this is {@code null}, the method behaves as 60 * though it were an empty executor. 61 * @param lexerAction The lexer action to execute after the actions 62 * specified in {@code lexerActionExecutor}. 63 * 64 * @return A {@link LexerActionExecutor} for executing the combine actions 65 * of {@code lexerActionExecutor} and {@code lexerAction}. 66 */ append(LexerActionExecutor lexerActionExecutor, LexerAction lexerAction)67 public static LexerActionExecutor append(LexerActionExecutor lexerActionExecutor, LexerAction lexerAction) { 68 if (lexerActionExecutor == null) { 69 return new LexerActionExecutor(new LexerAction[] { lexerAction }); 70 } 71 72 LexerAction[] lexerActions = Arrays.copyOf(lexerActionExecutor.lexerActions, lexerActionExecutor.lexerActions.length + 1); 73 lexerActions[lexerActions.length - 1] = lexerAction; 74 return new LexerActionExecutor(lexerActions); 75 } 76 77 /** 78 * Creates a {@link LexerActionExecutor} which encodes the current offset 79 * for position-dependent lexer actions. 80 * 81 * <p>Normally, when the executor encounters lexer actions where 82 * {@link LexerAction#isPositionDependent} returns {@code true}, it calls 83 * {@link IntStream#seek} on the input {@link CharStream} to set the input 84 * position to the <em>end</em> of the current token. This behavior provides 85 * for efficient DFA representation of lexer actions which appear at the end 86 * of a lexer rule, even when the lexer rule matches a variable number of 87 * characters.</p> 88 * 89 * <p>Prior to traversing a match transition in the ATN, the current offset 90 * from the token start index is assigned to all position-dependent lexer 91 * actions which have not already been assigned a fixed offset. By storing 92 * the offsets relative to the token start index, the DFA representation of 93 * lexer actions which appear in the middle of tokens remains efficient due 94 * to sharing among tokens of the same length, regardless of their absolute 95 * position in the input stream.</p> 96 * 97 * <p>If the current executor already has offsets assigned to all 98 * position-dependent lexer actions, the method returns {@code this}.</p> 99 * 100 * @param offset The current offset to assign to all position-dependent 101 * lexer actions which do not already have offsets assigned. 102 * 103 * @return A {@link LexerActionExecutor} which stores input stream offsets 104 * for all position-dependent lexer actions. 105 */ fixOffsetBeforeMatch(int offset)106 public LexerActionExecutor fixOffsetBeforeMatch(int offset) { 107 LexerAction[] updatedLexerActions = null; 108 for (int i = 0; i < lexerActions.length; i++) { 109 if (lexerActions[i].isPositionDependent() && !(lexerActions[i] instanceof LexerIndexedCustomAction)) { 110 if (updatedLexerActions == null) { 111 updatedLexerActions = lexerActions.clone(); 112 } 113 114 updatedLexerActions[i] = new LexerIndexedCustomAction(offset, lexerActions[i]); 115 } 116 } 117 118 if (updatedLexerActions == null) { 119 return this; 120 } 121 122 return new LexerActionExecutor(updatedLexerActions); 123 } 124 125 /** 126 * Gets the lexer actions to be executed by this executor. 127 * @return The lexer actions to be executed by this executor. 128 */ getLexerActions()129 public LexerAction[] getLexerActions() { 130 return lexerActions; 131 } 132 133 /** 134 * Execute the actions encapsulated by this executor within the context of a 135 * particular {@link Lexer}. 136 * 137 * <p>This method calls {@link IntStream#seek} to set the position of the 138 * {@code input} {@link CharStream} prior to calling 139 * {@link LexerAction#execute} on a position-dependent action. Before the 140 * method returns, the input position will be restored to the same position 141 * it was in when the method was invoked.</p> 142 * 143 * @param lexer The lexer instance. 144 * @param input The input stream which is the source for the current token. 145 * When this method is called, the current {@link IntStream#index} for 146 * {@code input} should be the start of the following token, i.e. 1 147 * character past the end of the current token. 148 * @param startIndex The token start index. This value may be passed to 149 * {@link IntStream#seek} to set the {@code input} position to the beginning 150 * of the token. 151 */ execute(Lexer lexer, CharStream input, int startIndex)152 public void execute(Lexer lexer, CharStream input, int startIndex) { 153 boolean requiresSeek = false; 154 int stopIndex = input.index(); 155 try { 156 for (LexerAction lexerAction : lexerActions) { 157 if (lexerAction instanceof LexerIndexedCustomAction) { 158 int offset = ((LexerIndexedCustomAction)lexerAction).getOffset(); 159 input.seek(startIndex + offset); 160 lexerAction = ((LexerIndexedCustomAction)lexerAction).getAction(); 161 requiresSeek = (startIndex + offset) != stopIndex; 162 } 163 else if (lexerAction.isPositionDependent()) { 164 input.seek(stopIndex); 165 requiresSeek = false; 166 } 167 168 lexerAction.execute(lexer); 169 } 170 } 171 finally { 172 if (requiresSeek) { 173 input.seek(stopIndex); 174 } 175 } 176 } 177 178 @Override hashCode()179 public int hashCode() { 180 return this.hashCode; 181 } 182 183 @Override equals(Object obj)184 public boolean equals(Object obj) { 185 if (obj == this) { 186 return true; 187 } 188 else if (!(obj instanceof LexerActionExecutor)) { 189 return false; 190 } 191 192 LexerActionExecutor other = (LexerActionExecutor)obj; 193 return hashCode == other.hashCode 194 && Arrays.equals(lexerActions, other.lexerActions); 195 } 196 } 197