1 /*
2   This file is part of the Grantlee template system.
3 
4   Copyright (c) 2009,2010,2011 Stephen Kelly <steveire@gmail.com>
5 
6   This library is free software; you can redistribute it and/or
7   modify it under the terms of the GNU Lesser General Public
8   License as published by the Free Software Foundation; either version
9   2.1 of the Licence, or (at your option) any later version.
10 
11   This library is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14   Lesser General Public License for more details.
15 
16   You should have received a copy of the GNU Lesser General Public
17   License along with this library.  If not, see <http://www.gnu.org/licenses/>.
18 
19 */
20 
21 #include "lexer_p.h"
22 
23 using namespace Grantlee;
24 
25 typedef State<TextProcessingMachine::Type> TextProcessingState;
26 typedef TextProcessingMachine::Transition TextProcessingTransition;
27 
28 typedef LexerObject<TextProcessingState, NullTest, MarksClearer> ChurningState;
29 typedef LexerObject<TextProcessingState, NullTest, TokenFinalizer>
30     FinalizeTokenState;
31 typedef LexerObject<TextProcessingTransition, NullTest, TokenFinalizer>
32     EofHandler;
33 typedef LexerObject<TextProcessingTransition, NullTest,
34                     TokenFinalizerWithTrimming>
35     EofHandlerWithTrimming;
36 
37 typedef CharacterTransition<'{'> MaybeTemplateSyntaxHandler;
38 
39 typedef CharacterTransition<'%', MarkStartSyntax> TagStartHandler;
40 typedef CharacterTransition<'#', MarkStartSyntax> CommentStartHandler;
41 typedef CharacterTransition<'%'> TagEndHandler;
42 typedef CharacterTransition<'#'> CommentEndHandler;
43 typedef CharacterTransition<'{', MarkStartSyntax> BeginValueHandler;
44 typedef CharacterTransition<'}'> MaybeEndValueHandler;
45 typedef CharacterTransition<'\n', MarkNewline> NewlineHandler;
46 typedef CharacterTransition<'}', MarkEndSyntax> EndTemplateSyntaxHandler;
47 typedef NegateCharacterTransition<'}'> NotEndTemplateSyntaxHandler;
48 
49 typedef LexerObject<
50     TextProcessingTransition,
51     Negate<OrTest<CharacterTest<'{'>,
52                   OrTest<CharacterTest<'#'>, CharacterTest<'%'>>>>>
53     NotBeginTemplateSyntaxHandler;
54 
55 typedef LexerObject<
56     TextProcessingTransition,
57     Negate<OrTest<CharacterTest<'{'>,
58                   OrTest<CharacterTest<'#'>,
59                          OrTest<CharacterTest<'%'>, CharacterTest<'\n'>>>>>>
60     NotBeginTemplateSyntaxOrNewlineHandler;
61 
62 typedef LexerObject<
63     TextProcessingTransition,
64     Negate<OrTest<CharacterTest<'#'>,
65                   OrTest<CharacterTest<'%'>, CharacterTest<'\n'>>>>>
66     NotTagCommentOrNewlineHandler;
67 
68 typedef LexerObject<TextProcessingTransition,
69                     Negate<OrTest<IsSpace, CharacterTest<'{'>>>>
70     NonWhitespaceLineTextHandler;
71 
72 typedef LexerObject<TextProcessingTransition,
73                     AndTest<Negate<CharacterTest<'\n'>>, IsSpace>>
74     WhitespaceNonNewlineHandler;
75 
76 typedef LexerObject<TextProcessingTransition,
77                     Negate<OrTest<CharacterTest<'{'>, IsSpace>>, TokenFinalizer>
78     FinalizingLineTextHandler;
79 
80 typedef CharacterTransition<'\n', TokenFinalizerWithTrimmingAndNewline>
81     SyntaxBoundaryNewlineHandler;
82 typedef CharacterTransition<'{', FinalizeAndMarkStartSyntax>
83     SyntaxBoundaryHandler;
84 
85 template <typename Transition>
addTransition(TextProcessingState * source,Lexer * lexer,TextProcessingState * target)86 void addTransition(TextProcessingState *source, Lexer *lexer,
87                    TextProcessingState *target)
88 {
89   auto tr = new Transition(lexer, source);
90   tr->setTargetState(target);
91 }
92 
createMachine(Lexer * lexer,Lexer::TrimType type)93 TextProcessingMachine *createMachine(Lexer *lexer, Lexer::TrimType type)
94 {
95   auto machine = new TextProcessingMachine;
96 
97   auto notFinished = new TextProcessingState(machine);
98   auto finished = new TextProcessingState(machine);
99   machine->setInitialState(notFinished);
100 
101   auto processingText = new ChurningState(lexer, notFinished);
102   auto processingPostNewline = new TextProcessingState(notFinished);
103   auto processingBeginTemplateSyntax = new TextProcessingState(notFinished);
104   auto processingTag = new TextProcessingState(notFinished);
105   auto processingComment = new TextProcessingState(notFinished);
106   auto processingValue = new TextProcessingState(notFinished);
107   auto maybeProcessingValue = new TextProcessingState(notFinished);
108   auto processingEndTag = new TextProcessingState(notFinished);
109   auto processingEndComment = new TextProcessingState(notFinished);
110   auto processingEndValue = new TextProcessingState(notFinished);
111   TextProcessingState *processingPostTemplateSyntax;
112 
113   if (type == Lexer::SmartTrim)
114     processingPostTemplateSyntax = new TextProcessingState(notFinished);
115   else
116     processingPostTemplateSyntax = new FinalizeTokenState(lexer, notFinished);
117   auto processingPostTemplateSyntaxWhitespace
118       = new TextProcessingState(notFinished);
119 
120   if (type == Lexer::SmartTrim)
121     notFinished->setInitialState(processingPostNewline);
122   else
123     notFinished->setInitialState(processingText);
124 
125   if (type == Lexer::SmartTrim) {
126     addTransition<NewlineHandler>(processingText, lexer, processingPostNewline);
127 
128     addTransition<NewlineHandler>(processingPostNewline, lexer,
129                                   processingPostNewline);
130     addTransition<MaybeTemplateSyntaxHandler>(processingPostNewline, lexer,
131                                               processingBeginTemplateSyntax);
132     addTransition<NonWhitespaceLineTextHandler>(processingPostNewline, lexer,
133                                                 processingText);
134   }
135   addTransition<MaybeTemplateSyntaxHandler>(processingText, lexer,
136                                             processingBeginTemplateSyntax);
137 
138   addTransition<TagStartHandler>(processingBeginTemplateSyntax, lexer,
139                                  processingTag);
140   addTransition<CommentStartHandler>(processingBeginTemplateSyntax, lexer,
141                                      processingComment);
142   addTransition<BeginValueHandler>(processingBeginTemplateSyntax, lexer,
143                                    maybeProcessingValue);
144 
145   if (type == Lexer::SmartTrim) {
146     addTransition<NotBeginTemplateSyntaxOrNewlineHandler>(
147         processingBeginTemplateSyntax, lexer, processingText);
148     addTransition<NewlineHandler>(processingBeginTemplateSyntax, lexer,
149                                   processingPostNewline);
150   } else {
151     addTransition<NotBeginTemplateSyntaxHandler>(processingBeginTemplateSyntax,
152                                                  lexer, processingText);
153   }
154 
155   addTransition<NewlineHandler>(processingTag, lexer,
156                                 type == Lexer::SmartTrim ? processingPostNewline
157                                                          : processingText);
158   addTransition<TagEndHandler>(processingTag, lexer, processingEndTag);
159 
160   addTransition<NewlineHandler>(processingComment, lexer,
161                                 type == Lexer::SmartTrim ? processingPostNewline
162                                                          : processingText);
163   addTransition<CommentEndHandler>(processingComment, lexer,
164                                    processingEndComment);
165 
166   addTransition<TagStartHandler>(maybeProcessingValue, lexer, processingTag);
167   addTransition<CommentStartHandler>(maybeProcessingValue, lexer,
168                                      processingComment);
169   addTransition<NotTagCommentOrNewlineHandler>(maybeProcessingValue, lexer,
170                                                processingValue);
171   addTransition<NewlineHandler>(maybeProcessingValue, lexer,
172                                 type == Lexer::SmartTrim ? processingPostNewline
173                                                          : processingText);
174 
175   addTransition<NewlineHandler>(processingValue, lexer,
176                                 type == Lexer::SmartTrim ? processingPostNewline
177                                                          : processingText);
178   addTransition<MaybeEndValueHandler>(processingValue, lexer,
179                                       processingEndValue);
180 
181   addTransition<NewlineHandler>(processingEndTag, lexer, processingPostNewline);
182   addTransition<NotEndTemplateSyntaxHandler>(processingEndTag, lexer,
183                                              processingTag);
184   addTransition<EndTemplateSyntaxHandler>(processingEndTag, lexer,
185                                           processingPostTemplateSyntax);
186 
187   addTransition<NewlineHandler>(processingEndComment, lexer,
188                                 processingPostNewline);
189   addTransition<NotEndTemplateSyntaxHandler>(processingEndComment, lexer,
190                                              processingComment);
191   addTransition<EndTemplateSyntaxHandler>(processingEndComment, lexer,
192                                           processingPostTemplateSyntax);
193 
194   addTransition<NewlineHandler>(processingEndValue, lexer,
195                                 processingPostNewline);
196   addTransition<NotEndTemplateSyntaxHandler>(processingEndValue, lexer,
197                                              processingValue);
198   addTransition<EndTemplateSyntaxHandler>(processingEndValue, lexer,
199                                           processingPostTemplateSyntax);
200 
201   if (type != Lexer::SmartTrim) {
202     processingPostTemplateSyntax->setUnconditionalTransition(processingText);
203   } else {
204     addTransition<SyntaxBoundaryNewlineHandler>(processingPostTemplateSyntax,
205                                                 lexer, processingPostNewline);
206     addTransition<WhitespaceNonNewlineHandler>(
207         processingPostTemplateSyntax, lexer,
208         processingPostTemplateSyntaxWhitespace);
209     addTransition<FinalizingLineTextHandler>(processingPostTemplateSyntax,
210                                              lexer, processingText);
211     addTransition<SyntaxBoundaryHandler>(processingPostTemplateSyntax, lexer,
212                                          processingBeginTemplateSyntax);
213 
214     // NOTE: We only have to transition to this if there was whitespace
215     // before the opening tag. Maybe store that in an external state property?
216     // Actually, this may be a bug if we try to finalize with trimming and
217     // there is no leading whitespace.
218     addTransition<SyntaxBoundaryNewlineHandler>(
219         processingPostTemplateSyntaxWhitespace, lexer, processingPostNewline);
220     addTransition<FinalizingLineTextHandler>(
221         processingPostTemplateSyntaxWhitespace, lexer, processingText);
222     addTransition<SyntaxBoundaryHandler>(processingPostTemplateSyntaxWhitespace,
223                                          lexer, processingBeginTemplateSyntax);
224   }
225 
226   {
227     auto handler = new EofHandler(lexer, notFinished);
228     handler->setTargetState(finished);
229     notFinished->setEndTransition(handler);
230   }
231 
232   if (type == Lexer::SmartTrim) {
233     {
234       auto handler = new EofHandlerWithTrimming(
235           lexer, processingPostTemplateSyntaxWhitespace);
236       handler->setTargetState(finished);
237       processingPostTemplateSyntaxWhitespace->setEndTransition(handler);
238     }
239     {
240       auto handler
241           = new EofHandlerWithTrimming(lexer, processingPostTemplateSyntax);
242       handler->setTargetState(finished);
243       processingPostTemplateSyntax->setEndTransition(handler);
244     }
245   }
246   return machine;
247 }
248 
Lexer(const QString & templateString)249 Lexer::Lexer(const QString &templateString) : m_templateString(templateString)
250 {
251 }
252 
~Lexer()253 Lexer::~Lexer() {}
254 
clearMarkers()255 void Lexer::clearMarkers()
256 {
257   m_startSyntaxPosition = -1;
258   m_endSyntaxPosition = -1;
259   m_newlinePosition = -1;
260 }
261 
reset()262 void Lexer::reset()
263 {
264   m_tokenList.clear();
265   m_lineCount = 0;
266   m_upto = 0;
267   m_processedUpto = 0;
268   clearMarkers();
269 }
270 
tokenize(TrimType type)271 QList<Token> Lexer::tokenize(TrimType type)
272 {
273   auto machine = createMachine(this, type);
274 
275   machine->start();
276 
277   auto it = m_templateString.constBegin();
278   const auto end = m_templateString.constEnd();
279 
280   reset();
281   for (; it != end; ++it, ++m_upto)
282     machine->processCharacter(it);
283 
284   machine->finished();
285 
286   machine->stop();
287 
288   delete machine;
289 
290   return m_tokenList;
291 }
292 
markStartSyntax()293 void Lexer::markStartSyntax() { m_startSyntaxPosition = m_upto; }
294 
markEndSyntax()295 void Lexer::markEndSyntax() { m_endSyntaxPosition = m_upto + 1; }
296 
markNewline()297 void Lexer::markNewline()
298 {
299   m_newlinePosition = m_upto;
300   ++m_lineCount;
301 }
302 
finalizeToken()303 void Lexer::finalizeToken()
304 {
305   auto nextPosition = m_upto;
306   const auto validSyntax = m_endSyntaxPosition > m_startSyntaxPosition
307                            && (m_startSyntaxPosition >= m_processedUpto);
308 
309   if (validSyntax) {
310     Q_ASSERT(m_startSyntaxPosition >= 0);
311     nextPosition = m_startSyntaxPosition - 1;
312   }
313   finalizeToken(nextPosition, validSyntax);
314 }
315 
finalizeTokenWithTrimmedWhitespace()316 void Lexer::finalizeTokenWithTrimmedWhitespace()
317 {
318   auto nextPosition = m_upto;
319   // We know this to be true because the state machine has already guaranteed
320   // it. This method is only called from transition and state actions which
321   // occur after valid syntax.
322   // TODO Investigate performance and other implications of changing the state
323   // machine to assure similar in finalizeToken()
324   Q_ASSERT(m_endSyntaxPosition > m_startSyntaxPosition);
325 
326   Q_ASSERT(m_startSyntaxPosition >= 0);
327   if (m_newlinePosition >= 0 && m_newlinePosition >= m_processedUpto)
328     nextPosition = qMin(m_startSyntaxPosition - 1, m_newlinePosition);
329   else
330     nextPosition = m_startSyntaxPosition - 1;
331   finalizeToken(nextPosition, true);
332 }
333 
finalizeToken(int nextPosition,bool processSyntax)334 void Lexer::finalizeToken(int nextPosition, bool processSyntax)
335 {
336   {
337     Token token;
338     token.content
339         = m_templateString.mid(m_processedUpto, nextPosition - m_processedUpto);
340     token.tokenType = TextToken;
341     token.linenumber = m_lineCount;
342     m_tokenList.append(token);
343   }
344 
345   m_processedUpto = nextPosition;
346 
347   if (!processSyntax)
348     return;
349 
350   m_processedUpto = m_endSyntaxPosition;
351 
352   const auto differentiator
353       = *(m_templateString.constData() + m_startSyntaxPosition);
354   if (differentiator == QLatin1Char('#'))
355     return;
356 
357   Token syntaxToken;
358   syntaxToken.content
359       = m_templateString
360             .mid(m_startSyntaxPosition + 1,
361                  m_endSyntaxPosition - m_startSyntaxPosition - 3)
362             .trimmed();
363   syntaxToken.linenumber = m_lineCount;
364 
365   if (differentiator == QLatin1Char('{')) {
366     syntaxToken.tokenType = VariableToken;
367   } else {
368     Q_ASSERT(differentiator == QLatin1Char('%'));
369     syntaxToken.tokenType = BlockToken;
370   }
371   m_tokenList.append(syntaxToken);
372 }
373