1 /*
2  *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3  *  Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
4  *
5  *  This library is free software; you can redistribute it and/or
6  *  modify it under the terms of the GNU Library General Public
7  *  License as published by the Free Software Foundation; either
8  *  version 2 of the License, or (at your option) any later version.
9  *
10  *  This library is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  *  Library General Public License for more details.
14  *
15  *  You should have received a copy of the GNU Library General Public License
16  *  along with this library; see the file COPYING.LIB.  If not, write to
17  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18  *  Boston, MA 02110-1301, USA.
19  *
20  */
21 
22 #ifndef Lexer_h
23 #define Lexer_h
24 
25 #include "Lookup.h"
26 #include "ParserArena.h"
27 #include "SourceCode.h"
28 #include <wtf/ASCIICType.h>
29 #include <wtf/SegmentedVector.h>
30 #include <wtf/Vector.h>
31 #include <wtf/unicode/Unicode.h>
32 
33 namespace JSC {
34 
35     class RegExp;
36 
37     class Lexer : public Noncopyable {
38     public:
39         // Character manipulation functions.
40         static bool isWhiteSpace(int character);
41         static bool isLineTerminator(int character);
42         static unsigned char convertHex(int c1, int c2);
43         static UChar convertUnicode(int c1, int c2, int c3, int c4);
44 
45         // Functions to set up parsing.
46         void setCode(const SourceCode&, ParserArena&);
setIsReparsing()47         void setIsReparsing() { m_isReparsing = true; }
48 
49         // Functions for the parser itself.
50         int lex(void* lvalp, void* llocp);
lineNumber()51         int lineNumber() const { return m_lineNumber; }
prevTerminator()52         bool prevTerminator() const { return m_terminator; }
53         SourceCode sourceCode(int openBrace, int closeBrace, int firstLine);
54         bool scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix = 0);
55         bool skipRegExp();
56 
57         // Functions for use after parsing.
sawError()58         bool sawError() const { return m_error; }
59         void clear();
60 
61     private:
62         friend class JSGlobalData;
63 
64         Lexer(JSGlobalData*);
65         ~Lexer();
66 
67         void shift1();
68         void shift2();
69         void shift3();
70         void shift4();
71         void shiftLineTerminator();
72 
73         void record8(int);
74         void record16(int);
75         void record16(UChar);
76 
77         void copyCodeWithoutBOMs();
78 
79         int currentOffset() const;
80         const UChar* currentCharacter() const;
81 
82         const Identifier* makeIdentifier(const UChar* characters, size_t length);
83 
84         bool lastTokenWasRestrKeyword() const;
85 
86         static const size_t initialReadBufferCapacity = 32;
87 
88         int m_lineNumber;
89 
90         Vector<char> m_buffer8;
91         Vector<UChar> m_buffer16;
92         bool m_terminator;
93         bool m_delimited; // encountered delimiter like "'" and "}" on last run
94         int m_lastToken;
95 
96         const SourceCode* m_source;
97         const UChar* m_code;
98         const UChar* m_codeStart;
99         const UChar* m_codeEnd;
100         bool m_isReparsing;
101         bool m_atLineStart;
102         bool m_error;
103 
104         // current and following unicode characters (int to allow for -1 for end-of-file marker)
105         int m_current;
106         int m_next1;
107         int m_next2;
108         int m_next3;
109 
110         IdentifierArena* m_arena;
111 
112         JSGlobalData* m_globalData;
113 
114         const HashTable m_keywordTable;
115 
116         Vector<UChar> m_codeWithoutBOMs;
117     };
118 
isWhiteSpace(int ch)119     inline bool Lexer::isWhiteSpace(int ch)
120     {
121         return isASCII(ch) ? (ch == ' ' || ch == '\t' || ch == 0xB || ch == 0xC) : WTF::Unicode::isSeparatorSpace(ch);
122     }
123 
isLineTerminator(int ch)124     inline bool Lexer::isLineTerminator(int ch)
125     {
126         return ch == '\r' || ch == '\n' || (ch & ~1) == 0x2028;
127     }
128 
convertHex(int c1,int c2)129     inline unsigned char Lexer::convertHex(int c1, int c2)
130     {
131         return (toASCIIHexValue(c1) << 4) | toASCIIHexValue(c2);
132     }
133 
convertUnicode(int c1,int c2,int c3,int c4)134     inline UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4)
135     {
136         return (convertHex(c1, c2) << 8) | convertHex(c3, c4);
137     }
138 
139     // A bridge for yacc from the C world to the C++ world.
jscyylex(void * lvalp,void * llocp,void * globalData)140     inline int jscyylex(void* lvalp, void* llocp, void* globalData)
141     {
142         return static_cast<JSGlobalData*>(globalData)->lexer->lex(lvalp, llocp);
143     }
144 
145 } // namespace JSC
146 
147 #endif // Lexer_h
148