1 /*
2 This file is part of the Grantlee template system.
3
4 Copyright (c) 2009,2010,2011 Stephen Kelly <steveire@gmail.com>
5
6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either version
9 2.1 of the Licence, or (at your option) any later version.
10
11 This library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with this library. If not, see <http://www.gnu.org/licenses/>.
18
19 */
20
21 #include "lexer_p.h"
22
23 using namespace Grantlee;
24
25 typedef State<TextProcessingMachine::Type> TextProcessingState;
26 typedef TextProcessingMachine::Transition TextProcessingTransition;
27
28 typedef LexerObject<TextProcessingState, NullTest, MarksClearer> ChurningState;
29 typedef LexerObject<TextProcessingState, NullTest, TokenFinalizer>
30 FinalizeTokenState;
31 typedef LexerObject<TextProcessingTransition, NullTest, TokenFinalizer>
32 EofHandler;
33 typedef LexerObject<TextProcessingTransition, NullTest,
34 TokenFinalizerWithTrimming>
35 EofHandlerWithTrimming;
36
37 typedef CharacterTransition<'{'> MaybeTemplateSyntaxHandler;
38
39 typedef CharacterTransition<'%', MarkStartSyntax> TagStartHandler;
40 typedef CharacterTransition<'#', MarkStartSyntax> CommentStartHandler;
41 typedef CharacterTransition<'%'> TagEndHandler;
42 typedef CharacterTransition<'#'> CommentEndHandler;
43 typedef CharacterTransition<'{', MarkStartSyntax> BeginValueHandler;
44 typedef CharacterTransition<'}'> MaybeEndValueHandler;
45 typedef CharacterTransition<'\n', MarkNewline> NewlineHandler;
46 typedef CharacterTransition<'}', MarkEndSyntax> EndTemplateSyntaxHandler;
47 typedef NegateCharacterTransition<'}'> NotEndTemplateSyntaxHandler;
48
49 typedef LexerObject<
50 TextProcessingTransition,
51 Negate<OrTest<CharacterTest<'{'>,
52 OrTest<CharacterTest<'#'>, CharacterTest<'%'>>>>>
53 NotBeginTemplateSyntaxHandler;
54
55 typedef LexerObject<
56 TextProcessingTransition,
57 Negate<OrTest<CharacterTest<'{'>,
58 OrTest<CharacterTest<'#'>,
59 OrTest<CharacterTest<'%'>, CharacterTest<'\n'>>>>>>
60 NotBeginTemplateSyntaxOrNewlineHandler;
61
62 typedef LexerObject<
63 TextProcessingTransition,
64 Negate<OrTest<CharacterTest<'#'>,
65 OrTest<CharacterTest<'%'>, CharacterTest<'\n'>>>>>
66 NotTagCommentOrNewlineHandler;
67
68 typedef LexerObject<TextProcessingTransition,
69 Negate<OrTest<IsSpace, CharacterTest<'{'>>>>
70 NonWhitespaceLineTextHandler;
71
72 typedef LexerObject<TextProcessingTransition,
73 AndTest<Negate<CharacterTest<'\n'>>, IsSpace>>
74 WhitespaceNonNewlineHandler;
75
76 typedef LexerObject<TextProcessingTransition,
77 Negate<OrTest<CharacterTest<'{'>, IsSpace>>, TokenFinalizer>
78 FinalizingLineTextHandler;
79
80 typedef CharacterTransition<'\n', TokenFinalizerWithTrimmingAndNewline>
81 SyntaxBoundaryNewlineHandler;
82 typedef CharacterTransition<'{', FinalizeAndMarkStartSyntax>
83 SyntaxBoundaryHandler;
84
85 template <typename Transition>
addTransition(TextProcessingState * source,Lexer * lexer,TextProcessingState * target)86 void addTransition(TextProcessingState *source, Lexer *lexer,
87 TextProcessingState *target)
88 {
89 auto tr = new Transition(lexer, source);
90 tr->setTargetState(target);
91 }
92
createMachine(Lexer * lexer,Lexer::TrimType type)93 TextProcessingMachine *createMachine(Lexer *lexer, Lexer::TrimType type)
94 {
95 auto machine = new TextProcessingMachine;
96
97 auto notFinished = new TextProcessingState(machine);
98 auto finished = new TextProcessingState(machine);
99 machine->setInitialState(notFinished);
100
101 auto processingText = new ChurningState(lexer, notFinished);
102 auto processingPostNewline = new TextProcessingState(notFinished);
103 auto processingBeginTemplateSyntax = new TextProcessingState(notFinished);
104 auto processingTag = new TextProcessingState(notFinished);
105 auto processingComment = new TextProcessingState(notFinished);
106 auto processingValue = new TextProcessingState(notFinished);
107 auto maybeProcessingValue = new TextProcessingState(notFinished);
108 auto processingEndTag = new TextProcessingState(notFinished);
109 auto processingEndComment = new TextProcessingState(notFinished);
110 auto processingEndValue = new TextProcessingState(notFinished);
111 TextProcessingState *processingPostTemplateSyntax;
112
113 if (type == Lexer::SmartTrim)
114 processingPostTemplateSyntax = new TextProcessingState(notFinished);
115 else
116 processingPostTemplateSyntax = new FinalizeTokenState(lexer, notFinished);
117 auto processingPostTemplateSyntaxWhitespace
118 = new TextProcessingState(notFinished);
119
120 if (type == Lexer::SmartTrim)
121 notFinished->setInitialState(processingPostNewline);
122 else
123 notFinished->setInitialState(processingText);
124
125 if (type == Lexer::SmartTrim) {
126 addTransition<NewlineHandler>(processingText, lexer, processingPostNewline);
127
128 addTransition<NewlineHandler>(processingPostNewline, lexer,
129 processingPostNewline);
130 addTransition<MaybeTemplateSyntaxHandler>(processingPostNewline, lexer,
131 processingBeginTemplateSyntax);
132 addTransition<NonWhitespaceLineTextHandler>(processingPostNewline, lexer,
133 processingText);
134 }
135 addTransition<MaybeTemplateSyntaxHandler>(processingText, lexer,
136 processingBeginTemplateSyntax);
137
138 addTransition<TagStartHandler>(processingBeginTemplateSyntax, lexer,
139 processingTag);
140 addTransition<CommentStartHandler>(processingBeginTemplateSyntax, lexer,
141 processingComment);
142 addTransition<BeginValueHandler>(processingBeginTemplateSyntax, lexer,
143 maybeProcessingValue);
144
145 if (type == Lexer::SmartTrim) {
146 addTransition<NotBeginTemplateSyntaxOrNewlineHandler>(
147 processingBeginTemplateSyntax, lexer, processingText);
148 addTransition<NewlineHandler>(processingBeginTemplateSyntax, lexer,
149 processingPostNewline);
150 } else {
151 addTransition<NotBeginTemplateSyntaxHandler>(processingBeginTemplateSyntax,
152 lexer, processingText);
153 }
154
155 addTransition<NewlineHandler>(processingTag, lexer,
156 type == Lexer::SmartTrim ? processingPostNewline
157 : processingText);
158 addTransition<TagEndHandler>(processingTag, lexer, processingEndTag);
159
160 addTransition<NewlineHandler>(processingComment, lexer,
161 type == Lexer::SmartTrim ? processingPostNewline
162 : processingText);
163 addTransition<CommentEndHandler>(processingComment, lexer,
164 processingEndComment);
165
166 addTransition<TagStartHandler>(maybeProcessingValue, lexer, processingTag);
167 addTransition<CommentStartHandler>(maybeProcessingValue, lexer,
168 processingComment);
169 addTransition<NotTagCommentOrNewlineHandler>(maybeProcessingValue, lexer,
170 processingValue);
171 addTransition<NewlineHandler>(maybeProcessingValue, lexer,
172 type == Lexer::SmartTrim ? processingPostNewline
173 : processingText);
174
175 addTransition<NewlineHandler>(processingValue, lexer,
176 type == Lexer::SmartTrim ? processingPostNewline
177 : processingText);
178 addTransition<MaybeEndValueHandler>(processingValue, lexer,
179 processingEndValue);
180
181 addTransition<NewlineHandler>(processingEndTag, lexer, processingPostNewline);
182 addTransition<NotEndTemplateSyntaxHandler>(processingEndTag, lexer,
183 processingTag);
184 addTransition<EndTemplateSyntaxHandler>(processingEndTag, lexer,
185 processingPostTemplateSyntax);
186
187 addTransition<NewlineHandler>(processingEndComment, lexer,
188 processingPostNewline);
189 addTransition<NotEndTemplateSyntaxHandler>(processingEndComment, lexer,
190 processingComment);
191 addTransition<EndTemplateSyntaxHandler>(processingEndComment, lexer,
192 processingPostTemplateSyntax);
193
194 addTransition<NewlineHandler>(processingEndValue, lexer,
195 processingPostNewline);
196 addTransition<NotEndTemplateSyntaxHandler>(processingEndValue, lexer,
197 processingValue);
198 addTransition<EndTemplateSyntaxHandler>(processingEndValue, lexer,
199 processingPostTemplateSyntax);
200
201 if (type != Lexer::SmartTrim) {
202 processingPostTemplateSyntax->setUnconditionalTransition(processingText);
203 } else {
204 addTransition<SyntaxBoundaryNewlineHandler>(processingPostTemplateSyntax,
205 lexer, processingPostNewline);
206 addTransition<WhitespaceNonNewlineHandler>(
207 processingPostTemplateSyntax, lexer,
208 processingPostTemplateSyntaxWhitespace);
209 addTransition<FinalizingLineTextHandler>(processingPostTemplateSyntax,
210 lexer, processingText);
211 addTransition<SyntaxBoundaryHandler>(processingPostTemplateSyntax, lexer,
212 processingBeginTemplateSyntax);
213
214 // NOTE: We only have to transition to this if there was whitespace
215 // before the opening tag. Maybe store that in an external state property?
216 // Actually, this may be a bug if we try to finalize with trimming and
217 // there is no leading whitespace.
218 addTransition<SyntaxBoundaryNewlineHandler>(
219 processingPostTemplateSyntaxWhitespace, lexer, processingPostNewline);
220 addTransition<FinalizingLineTextHandler>(
221 processingPostTemplateSyntaxWhitespace, lexer, processingText);
222 addTransition<SyntaxBoundaryHandler>(processingPostTemplateSyntaxWhitespace,
223 lexer, processingBeginTemplateSyntax);
224 }
225
226 {
227 auto handler = new EofHandler(lexer, notFinished);
228 handler->setTargetState(finished);
229 notFinished->setEndTransition(handler);
230 }
231
232 if (type == Lexer::SmartTrim) {
233 {
234 auto handler = new EofHandlerWithTrimming(
235 lexer, processingPostTemplateSyntaxWhitespace);
236 handler->setTargetState(finished);
237 processingPostTemplateSyntaxWhitespace->setEndTransition(handler);
238 }
239 {
240 auto handler
241 = new EofHandlerWithTrimming(lexer, processingPostTemplateSyntax);
242 handler->setTargetState(finished);
243 processingPostTemplateSyntax->setEndTransition(handler);
244 }
245 }
246 return machine;
247 }
248
Lexer(const QString & templateString)249 Lexer::Lexer(const QString &templateString) : m_templateString(templateString)
250 {
251 }
252
~Lexer()253 Lexer::~Lexer() {}
254
clearMarkers()255 void Lexer::clearMarkers()
256 {
257 m_startSyntaxPosition = -1;
258 m_endSyntaxPosition = -1;
259 m_newlinePosition = -1;
260 }
261
reset()262 void Lexer::reset()
263 {
264 m_tokenList.clear();
265 m_lineCount = 0;
266 m_upto = 0;
267 m_processedUpto = 0;
268 clearMarkers();
269 }
270
tokenize(TrimType type)271 QList<Token> Lexer::tokenize(TrimType type)
272 {
273 auto machine = createMachine(this, type);
274
275 machine->start();
276
277 auto it = m_templateString.constBegin();
278 const auto end = m_templateString.constEnd();
279
280 reset();
281 for (; it != end; ++it, ++m_upto)
282 machine->processCharacter(it);
283
284 machine->finished();
285
286 machine->stop();
287
288 delete machine;
289
290 return m_tokenList;
291 }
292
markStartSyntax()293 void Lexer::markStartSyntax() { m_startSyntaxPosition = m_upto; }
294
markEndSyntax()295 void Lexer::markEndSyntax() { m_endSyntaxPosition = m_upto + 1; }
296
markNewline()297 void Lexer::markNewline()
298 {
299 m_newlinePosition = m_upto;
300 ++m_lineCount;
301 }
302
finalizeToken()303 void Lexer::finalizeToken()
304 {
305 auto nextPosition = m_upto;
306 const auto validSyntax = m_endSyntaxPosition > m_startSyntaxPosition
307 && (m_startSyntaxPosition >= m_processedUpto);
308
309 if (validSyntax) {
310 Q_ASSERT(m_startSyntaxPosition >= 0);
311 nextPosition = m_startSyntaxPosition - 1;
312 }
313 finalizeToken(nextPosition, validSyntax);
314 }
315
finalizeTokenWithTrimmedWhitespace()316 void Lexer::finalizeTokenWithTrimmedWhitespace()
317 {
318 auto nextPosition = m_upto;
319 // We know this to be true because the state machine has already guaranteed
320 // it. This method is only called from transition and state actions which
321 // occur after valid syntax.
322 // TODO Investigate performance and other implications of changing the state
323 // machine to assure similar in finalizeToken()
324 Q_ASSERT(m_endSyntaxPosition > m_startSyntaxPosition);
325
326 Q_ASSERT(m_startSyntaxPosition >= 0);
327 if (m_newlinePosition >= 0 && m_newlinePosition >= m_processedUpto)
328 nextPosition = qMin(m_startSyntaxPosition - 1, m_newlinePosition);
329 else
330 nextPosition = m_startSyntaxPosition - 1;
331 finalizeToken(nextPosition, true);
332 }
333
finalizeToken(int nextPosition,bool processSyntax)334 void Lexer::finalizeToken(int nextPosition, bool processSyntax)
335 {
336 {
337 Token token;
338 token.content
339 = m_templateString.mid(m_processedUpto, nextPosition - m_processedUpto);
340 token.tokenType = TextToken;
341 token.linenumber = m_lineCount;
342 m_tokenList.append(token);
343 }
344
345 m_processedUpto = nextPosition;
346
347 if (!processSyntax)
348 return;
349
350 m_processedUpto = m_endSyntaxPosition;
351
352 const auto differentiator
353 = *(m_templateString.constData() + m_startSyntaxPosition);
354 if (differentiator == QLatin1Char('#'))
355 return;
356
357 Token syntaxToken;
358 syntaxToken.content
359 = m_templateString
360 .mid(m_startSyntaxPosition + 1,
361 m_endSyntaxPosition - m_startSyntaxPosition - 3)
362 .trimmed();
363 syntaxToken.linenumber = m_lineCount;
364
365 if (differentiator == QLatin1Char('{')) {
366 syntaxToken.tokenType = VariableToken;
367 } else {
368 Q_ASSERT(differentiator == QLatin1Char('%'));
369 syntaxToken.tokenType = BlockToken;
370 }
371 m_tokenList.append(syntaxToken);
372 }
373