1 /*
2 ===========================================================================
3
4 Doom 3 GPL Source Code
5 Copyright (C) 1999-2011 id Software LLC, a ZeniMax Media company.
6
7 This file is part of the Doom 3 GPL Source Code ("Doom 3 Source Code").
8
9 Doom 3 Source Code is free software: you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation, either version 3 of the License, or
12 (at your option) any later version.
13
14 Doom 3 Source Code is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with Doom 3 Source Code. If not, see <http://www.gnu.org/licenses/>.
21
22 In addition, the Doom 3 Source Code is also subject to certain additional terms. You should have received a copy of these additional terms immediately following the terms and conditions of the GNU General Public License which accompanied the Doom 3 Source Code. If not, please request a copy in writing from id Software at the address below.
23
24 If you have questions concerning this license or the applicable additional terms, you may contact in writing id Software LLC, c/o ZeniMax Media Inc., Suite 120, Rockville, Maryland 20850 USA.
25
26 ===========================================================================
27 */
28
29 #ifndef __LEXER_H__
30 #define __LEXER_H__
31
32 #include "idlib/Token.h"
33
34 /*
35 ===============================================================================
36
37 Lexicographical parser
38
39 Does not use memory allocation during parsing. The lexer uses no
40 memory allocation if a source is loaded with LoadMemory().
41 However, idToken may still allocate memory for large strings.
42
43 A number directly following the escape character '\' in a string is
44 assumed to be in decimal format instead of octal. Binary numbers of
45 the form 0b.. or 0B.. can also be used.
46
47 ===============================================================================
48 */
49
50 // lexer flags
51 typedef enum {
52 LEXFL_NOERRORS = BIT(0), // don't print any errors
53 LEXFL_NOWARNINGS = BIT(1), // don't print any warnings
54 LEXFL_NOFATALERRORS = BIT(2), // errors aren't fatal
55 LEXFL_NOSTRINGCONCAT = BIT(3), // multiple strings seperated by whitespaces are not concatenated
56 LEXFL_NOSTRINGESCAPECHARS = BIT(4), // no escape characters inside strings
57 LEXFL_NODOLLARPRECOMPILE = BIT(5), // don't use the $ sign for precompilation
58 LEXFL_NOBASEINCLUDES = BIT(6), // don't include files embraced with < >
59 LEXFL_ALLOWPATHNAMES = BIT(7), // allow path seperators in names
60 LEXFL_ALLOWNUMBERNAMES = BIT(8), // allow names to start with a number
61 LEXFL_ALLOWIPADDRESSES = BIT(9), // allow ip addresses to be parsed as numbers
62 LEXFL_ALLOWFLOATEXCEPTIONS = BIT(10), // allow float exceptions like 1.#INF or 1.#IND to be parsed
63 LEXFL_ALLOWMULTICHARLITERALS = BIT(11), // allow multi character literals
64 LEXFL_ALLOWBACKSLASHSTRINGCONCAT = BIT(12), // allow multiple strings seperated by '\' to be concatenated
65 LEXFL_ONLYSTRINGS = BIT(13) // parse as whitespace deliminated strings (quoted strings keep quotes)
66 } lexerFlags_t;
67
68 // punctuation ids
69 #define P_RSHIFT_ASSIGN 1
70 #define P_LSHIFT_ASSIGN 2
71 #define P_PARMS 3
72 #define P_PRECOMPMERGE 4
73
74 #define P_LOGIC_AND 5
75 #define P_LOGIC_OR 6
76 #define P_LOGIC_GEQ 7
77 #define P_LOGIC_LEQ 8
78 #define P_LOGIC_EQ 9
79 #define P_LOGIC_UNEQ 10
80
81 #define P_MUL_ASSIGN 11
82 #define P_DIV_ASSIGN 12
83 #define P_MOD_ASSIGN 13
84 #define P_ADD_ASSIGN 14
85 #define P_SUB_ASSIGN 15
86 #define P_INC 16
87 #define P_DEC 17
88
89 #define P_BIN_AND_ASSIGN 18
90 #define P_BIN_OR_ASSIGN 19
91 #define P_BIN_XOR_ASSIGN 20
92 #define P_RSHIFT 21
93 #define P_LSHIFT 22
94
95 #define P_POINTERREF 23
96 #define P_CPP1 24
97 #define P_CPP2 25
98 #define P_MUL 26
99 #define P_DIV 27
100 #define P_MOD 28
101 #define P_ADD 29
102 #define P_SUB 30
103 #define P_ASSIGN 31
104
105 #define P_BIN_AND 32
106 #define P_BIN_OR 33
107 #define P_BIN_XOR 34
108 #define P_BIN_NOT 35
109
110 #define P_LOGIC_NOT 36
111 #define P_LOGIC_GREATER 37
112 #define P_LOGIC_LESS 38
113
114 #define P_REF 39
115 #define P_COMMA 40
116 #define P_SEMICOLON 41
117 #define P_COLON 42
118 #define P_QUESTIONMARK 43
119
120 #define P_PARENTHESESOPEN 44
121 #define P_PARENTHESESCLOSE 45
122 #define P_BRACEOPEN 46
123 #define P_BRACECLOSE 47
124 #define P_SQBRACKETOPEN 48
125 #define P_SQBRACKETCLOSE 49
126 #define P_BACKSLASH 50
127
128 #define P_PRECOMP 51
129 #define P_DOLLAR 52
130
131 // punctuation
132 typedef struct punctuation_s
133 {
134 const char *p; // punctuation character(s)
135 int n; // punctuation id
136 } punctuation_t;
137
138
139 class idLexer {
140
141 friend class idParser;
142
143 public:
144 // constructor
145 idLexer();
146 idLexer( int flags );
147 idLexer( const char *filename, int flags = 0, bool OSPath = false );
148 idLexer( const char *ptr, int length, const char *name, int flags = 0 );
149 // destructor
150 ~idLexer();
151 // load a script from the given file at the given offset with the given length
152 int LoadFile( const char *filename, bool OSPath = false );
153 // load a script from the given memory with the given length and a specified line offset,
154 // so source strings extracted from a file can still refer to proper line numbers in the file
155 // NOTE: the ptr is expected to point at a valid C string: ptr[length] == '\0'
156 int LoadMemory( const char *ptr, int length, const char *name, int startLine = 1 );
157 // free the script
158 void FreeSource( void );
159 // returns true if a script is loaded
IsLoaded(void)160 int IsLoaded( void ) { return idLexer::loaded; };
161 // read a token
162 int ReadToken( idToken *token );
163 // expect a certain token, reads the token when available
164 int ExpectTokenString( const char *string );
165 // expect a certain token type
166 int ExpectTokenType( int type, int subtype, idToken *token );
167 // expect a token
168 int ExpectAnyToken( idToken *token );
169 // returns true when the token is available
170 int CheckTokenString( const char *string );
171 // returns true an reads the token when a token with the given type is available
172 int CheckTokenType( int type, int subtype, idToken *token );
173 // returns true if the next token equals the given string but does not remove the token from the source
174 int PeekTokenString( const char *string );
175 // returns true if the next token equals the given type but does not remove the token from the source
176 int PeekTokenType( int type, int subtype, idToken *token );
177 // skip tokens until the given token string is read
178 int SkipUntilString( const char *string );
179 // skip the rest of the current line
180 int SkipRestOfLine( void );
181 // skip the braced section
182 int SkipBracedSection( bool parseFirstBrace = true );
183 // unread the given token
184 void UnreadToken( const idToken *token );
185 // read a token only if on the same line
186 int ReadTokenOnLine( idToken *token );
187
188 //Returns the rest of the current line
189 const char* ReadRestOfLine(idStr& out);
190
191 // read a signed integer
192 int ParseInt( void );
193 // read a boolean
194 bool ParseBool( void );
195 // read a floating point number. If errorFlag is NULL, a non-numeric token will
196 // issue an Error(). If it isn't NULL, it will issue a Warning() and set *errorFlag = true
197 float ParseFloat( bool *errorFlag = NULL );
198 // parse matrices with floats
199 int Parse1DMatrix( int x, float *m );
200 int Parse2DMatrix( int y, int x, float *m );
201 int Parse3DMatrix( int z, int y, int x, float *m );
202 // parse a braced section into a string
203 const char * ParseBracedSection( idStr &out );
204 // parse a braced section into a string, maintaining indents and newlines
205 const char * ParseBracedSectionExact ( idStr &out, int tabs = -1 );
206 // parse the rest of the line
207 const char * ParseRestOfLine( idStr &out );
208 // retrieves the white space characters before the last read token
209 int GetLastWhiteSpace( idStr &whiteSpace ) const;
210 // returns start index into text buffer of last white space
211 int GetLastWhiteSpaceStart( void ) const;
212 // returns end index into text buffer of last white space
213 int GetLastWhiteSpaceEnd( void ) const;
214 // set an array with punctuations, NULL restores default C/C++ set, see default_punctuations for an example
215 void SetPunctuations( const punctuation_t *p );
216 // returns a pointer to the punctuation with the given id
217 const char * GetPunctuationFromId( int id );
218 // get the id for the given punctuation
219 int GetPunctuationId( const char *p );
220 // set lexer flags
221 void SetFlags( int flags );
222 // get lexer flags
223 int GetFlags( void );
224 // reset the lexer
225 void Reset( void );
226 // returns true if at the end of the file
227 int EndOfFile( void );
228 // returns the current filename
229 const char * GetFileName( void );
230 // get offset in script
231 const int GetFileOffset( void );
232 // get file time
233 const ID_TIME_T GetFileTime( void );
234 // returns the current line number
235 const int GetLineNum( void );
236 // print an error message
237 void Error( const char *str, ... ) id_attribute((format(printf,2,3)));
238 // print a warning message
239 void Warning( const char *str, ... ) id_attribute((format(printf,2,3)));
240 // returns true if Error() was called with LEXFL_NOFATALERRORS or LEXFL_NOERRORS set
241 bool HadError( void ) const;
242
243 // set the base folder to load files from
244 static void SetBaseFolder( const char *path );
245
246 private:
247 int loaded; // set when a script file is loaded from file or memory
248 idStr filename; // file name of the script
249 int allocated; // true if buffer memory was allocated
250 const char * buffer; // buffer containing the script
251 const char * script_p; // current pointer in the script
252 const char * end_p; // pointer to the end of the script
253 const char * lastScript_p; // script pointer before reading token
254 const char * whiteSpaceStart_p; // start of last white space
255 const char * whiteSpaceEnd_p; // end of last white space
256 ID_TIME_T fileTime; // file time
257 int length; // length of the script in bytes
258 int line; // current line in script
259 int lastline; // line before reading token
260 int tokenavailable; // set by unreadToken
261 int flags; // several script flags
262 const punctuation_t *punctuations; // the punctuations used in the script
263 int * punctuationtable; // ASCII table with punctuations
264 int * nextpunctuation; // next punctuation in chain
265 idToken token; // available token
266 idLexer * next; // next script in a chain
267 bool hadError; // set by idLexer::Error, even if the error is supressed
268
269 static char baseFolder[ 256 ]; // base folder to load files from
270
271 private:
272 void CreatePunctuationTable( const punctuation_t *punctuations );
273 int ReadWhiteSpace( void );
274 int ReadEscapeCharacter( char *ch );
275 int ReadString( idToken *token, int quote );
276 int ReadName( idToken *token );
277 int ReadNumber( idToken *token );
278 int ReadPunctuation( idToken *token );
279 int ReadPrimitive( idToken *token );
280 int CheckString( const char *str ) const;
281 int NumLinesCrossed( void );
282 };
283
GetFileName(void)284 ID_INLINE const char *idLexer::GetFileName( void ) {
285 return idLexer::filename;
286 }
287
GetFileOffset(void)288 ID_INLINE const int idLexer::GetFileOffset( void ) {
289 return idLexer::script_p - idLexer::buffer;
290 }
291
GetFileTime(void)292 ID_INLINE const ID_TIME_T idLexer::GetFileTime( void ) {
293 return idLexer::fileTime;
294 }
295
GetLineNum(void)296 ID_INLINE const int idLexer::GetLineNum( void ) {
297 return idLexer::line;
298 }
299
SetFlags(int flags)300 ID_INLINE void idLexer::SetFlags( int flags ) {
301 idLexer::flags = flags;
302 }
303
GetFlags(void)304 ID_INLINE int idLexer::GetFlags( void ) {
305 return idLexer::flags;
306 }
307
308 #endif /* !__LEXER_H__ */
309