1 /*
2 ===========================================================================
3 
4 Doom 3 GPL Source Code
5 Copyright (C) 1999-2011 id Software LLC, a ZeniMax Media company.
6 
7 This file is part of the Doom 3 GPL Source Code ("Doom 3 Source Code").
8 
9 Doom 3 Source Code is free software: you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation, either version 3 of the License, or
12 (at your option) any later version.
13 
14 Doom 3 Source Code is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 GNU General Public License for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with Doom 3 Source Code.  If not, see <http://www.gnu.org/licenses/>.
21 
22 In addition, the Doom 3 Source Code is also subject to certain additional terms. You should have received a copy of these additional terms immediately following the terms and conditions of the GNU General Public License which accompanied the Doom 3 Source Code.  If not, please request a copy in writing from id Software at the address below.
23 
24 If you have questions concerning this license or the applicable additional terms, you may contact in writing id Software LLC, c/o ZeniMax Media Inc., Suite 120, Rockville, Maryland 20850 USA.
25 
26 ===========================================================================
27 */
28 
29 #ifndef __LEXER_H__
30 #define __LEXER_H__
31 
32 #include "idlib/Token.h"
33 
34 /*
35 ===============================================================================
36 
37 	Lexicographical parser
38 
39 	Does not use memory allocation during parsing. The lexer uses no
40 	memory allocation if a source is loaded with LoadMemory().
41 	However, idToken may still allocate memory for large strings.
42 
43 	A number directly following the escape character '\' in a string is
44 	assumed to be in decimal format instead of octal. Binary numbers of
45 	the form 0b.. or 0B.. can also be used.
46 
47 ===============================================================================
48 */
49 
50 // lexer flags
51 typedef enum {
52 	LEXFL_NOERRORS						= BIT(0),	// don't print any errors
53 	LEXFL_NOWARNINGS					= BIT(1),	// don't print any warnings
54 	LEXFL_NOFATALERRORS					= BIT(2),	// errors aren't fatal
55 	LEXFL_NOSTRINGCONCAT				= BIT(3),	// multiple strings seperated by whitespaces are not concatenated
56 	LEXFL_NOSTRINGESCAPECHARS			= BIT(4),	// no escape characters inside strings
57 	LEXFL_NODOLLARPRECOMPILE			= BIT(5),	// don't use the $ sign for precompilation
58 	LEXFL_NOBASEINCLUDES				= BIT(6),	// don't include files embraced with < >
59 	LEXFL_ALLOWPATHNAMES				= BIT(7),	// allow path seperators in names
60 	LEXFL_ALLOWNUMBERNAMES				= BIT(8),	// allow names to start with a number
61 	LEXFL_ALLOWIPADDRESSES				= BIT(9),	// allow ip addresses to be parsed as numbers
62 	LEXFL_ALLOWFLOATEXCEPTIONS			= BIT(10),	// allow float exceptions like 1.#INF or 1.#IND to be parsed
63 	LEXFL_ALLOWMULTICHARLITERALS		= BIT(11),	// allow multi character literals
64 	LEXFL_ALLOWBACKSLASHSTRINGCONCAT	= BIT(12),	// allow multiple strings seperated by '\' to be concatenated
65 	LEXFL_ONLYSTRINGS					= BIT(13)	// parse as whitespace deliminated strings (quoted strings keep quotes)
66 } lexerFlags_t;
67 
68 // punctuation ids
69 #define P_RSHIFT_ASSIGN				1
70 #define P_LSHIFT_ASSIGN				2
71 #define P_PARMS						3
72 #define P_PRECOMPMERGE				4
73 
74 #define P_LOGIC_AND					5
75 #define P_LOGIC_OR					6
76 #define P_LOGIC_GEQ					7
77 #define P_LOGIC_LEQ					8
78 #define P_LOGIC_EQ					9
79 #define P_LOGIC_UNEQ				10
80 
81 #define P_MUL_ASSIGN				11
82 #define P_DIV_ASSIGN				12
83 #define P_MOD_ASSIGN				13
84 #define P_ADD_ASSIGN				14
85 #define P_SUB_ASSIGN				15
86 #define P_INC						16
87 #define P_DEC						17
88 
89 #define P_BIN_AND_ASSIGN			18
90 #define P_BIN_OR_ASSIGN				19
91 #define P_BIN_XOR_ASSIGN			20
92 #define P_RSHIFT					21
93 #define P_LSHIFT					22
94 
95 #define P_POINTERREF				23
96 #define P_CPP1						24
97 #define P_CPP2						25
98 #define P_MUL						26
99 #define P_DIV						27
100 #define P_MOD						28
101 #define P_ADD						29
102 #define P_SUB						30
103 #define P_ASSIGN					31
104 
105 #define P_BIN_AND					32
106 #define P_BIN_OR					33
107 #define P_BIN_XOR					34
108 #define P_BIN_NOT					35
109 
110 #define P_LOGIC_NOT					36
111 #define P_LOGIC_GREATER				37
112 #define P_LOGIC_LESS				38
113 
114 #define P_REF						39
115 #define P_COMMA						40
116 #define P_SEMICOLON					41
117 #define P_COLON						42
118 #define P_QUESTIONMARK				43
119 
120 #define P_PARENTHESESOPEN			44
121 #define P_PARENTHESESCLOSE			45
122 #define P_BRACEOPEN					46
123 #define P_BRACECLOSE				47
124 #define P_SQBRACKETOPEN				48
125 #define P_SQBRACKETCLOSE			49
126 #define P_BACKSLASH					50
127 
128 #define P_PRECOMP					51
129 #define P_DOLLAR					52
130 
131 // punctuation
132 typedef struct punctuation_s
133 {
134 	const char *p;						// punctuation character(s)
135 	int n;							// punctuation id
136 } punctuation_t;
137 
138 
139 class idLexer {
140 
141 	friend class idParser;
142 
143 public:
144 					// constructor
145 					idLexer();
146 					idLexer( int flags );
147 					idLexer( const char *filename, int flags = 0, bool OSPath = false );
148 					idLexer( const char *ptr, int length, const char *name, int flags = 0 );
149 					// destructor
150 					~idLexer();
151 					// load a script from the given file at the given offset with the given length
152 	int				LoadFile( const char *filename, bool OSPath = false );
153 					// load a script from the given memory with the given length and a specified line offset,
154 					// so source strings extracted from a file can still refer to proper line numbers in the file
155 					// NOTE: the ptr is expected to point at a valid C string: ptr[length] == '\0'
156 	int				LoadMemory( const char *ptr, int length, const char *name, int startLine = 1 );
157 					// free the script
158 	void			FreeSource( void );
159 					// returns true if a script is loaded
IsLoaded(void)160 	int				IsLoaded( void ) { return idLexer::loaded; };
161 					// read a token
162 	int				ReadToken( idToken *token );
163 					// expect a certain token, reads the token when available
164 	int				ExpectTokenString( const char *string );
165 					// expect a certain token type
166 	int				ExpectTokenType( int type, int subtype, idToken *token );
167 					// expect a token
168 	int				ExpectAnyToken( idToken *token );
169 					// returns true when the token is available
170 	int				CheckTokenString( const char *string );
171 					// returns true an reads the token when a token with the given type is available
172 	int				CheckTokenType( int type, int subtype, idToken *token );
173 					// returns true if the next token equals the given string but does not remove the token from the source
174 	int				PeekTokenString( const char *string );
175 					// returns true if the next token equals the given type but does not remove the token from the source
176 	int				PeekTokenType( int type, int subtype, idToken *token );
177 					// skip tokens until the given token string is read
178 	int				SkipUntilString( const char *string );
179 					// skip the rest of the current line
180 	int				SkipRestOfLine( void );
181 					// skip the braced section
182 	int				SkipBracedSection( bool parseFirstBrace = true );
183 					// unread the given token
184 	void			UnreadToken( const idToken *token );
185 					// read a token only if on the same line
186 	int				ReadTokenOnLine( idToken *token );
187 
188 					//Returns the rest of the current line
189 	const char*		ReadRestOfLine(idStr& out);
190 
191 					// read a signed integer
192 	int				ParseInt( void );
193 					// read a boolean
194 	bool			ParseBool( void );
195 					// read a floating point number.  If errorFlag is NULL, a non-numeric token will
196 					// issue an Error().  If it isn't NULL, it will issue a Warning() and set *errorFlag = true
197 	float			ParseFloat( bool *errorFlag = NULL );
198 					// parse matrices with floats
199 	int				Parse1DMatrix( int x, float *m );
200 	int				Parse2DMatrix( int y, int x, float *m );
201 	int				Parse3DMatrix( int z, int y, int x, float *m );
202 					// parse a braced section into a string
203 	const char *	ParseBracedSection( idStr &out );
204 					// parse a braced section into a string, maintaining indents and newlines
205 	const char *	ParseBracedSectionExact ( idStr &out, int tabs = -1 );
206 					// parse the rest of the line
207 	const char *	ParseRestOfLine( idStr &out );
208 					// retrieves the white space characters before the last read token
209 	int				GetLastWhiteSpace( idStr &whiteSpace ) const;
210 					// returns start index into text buffer of last white space
211 	int				GetLastWhiteSpaceStart( void ) const;
212 					// returns end index into text buffer of last white space
213 	int				GetLastWhiteSpaceEnd( void ) const;
214 					// set an array with punctuations, NULL restores default C/C++ set, see default_punctuations for an example
215 	void			SetPunctuations( const punctuation_t *p );
216 					// returns a pointer to the punctuation with the given id
217 	const char *	GetPunctuationFromId( int id );
218 					// get the id for the given punctuation
219 	int				GetPunctuationId( const char *p );
220 					// set lexer flags
221 	void			SetFlags( int flags );
222 					// get lexer flags
223 	int				GetFlags( void );
224 					// reset the lexer
225 	void			Reset( void );
226 					// returns true if at the end of the file
227 	int				EndOfFile( void );
228 					// returns the current filename
229 	const char *	GetFileName( void );
230 					// get offset in script
231 	const int		GetFileOffset( void );
232 					// get file time
233 	const ID_TIME_T	GetFileTime( void );
234 					// returns the current line number
235 	const int		GetLineNum( void );
236 					// print an error message
237 	void			Error( const char *str, ... ) id_attribute((format(printf,2,3)));
238 					// print a warning message
239 	void			Warning( const char *str, ... ) id_attribute((format(printf,2,3)));
240 					// returns true if Error() was called with LEXFL_NOFATALERRORS or LEXFL_NOERRORS set
241 	bool			HadError( void ) const;
242 
243 					// set the base folder to load files from
244 	static void		SetBaseFolder( const char *path );
245 
246 private:
247 	int				loaded;					// set when a script file is loaded from file or memory
248 	idStr			filename;				// file name of the script
249 	int				allocated;				// true if buffer memory was allocated
250 	const char *	buffer;					// buffer containing the script
251 	const char *	script_p;				// current pointer in the script
252 	const char *	end_p;					// pointer to the end of the script
253 	const char *	lastScript_p;			// script pointer before reading token
254 	const char *	whiteSpaceStart_p;		// start of last white space
255 	const char *	whiteSpaceEnd_p;		// end of last white space
256 	ID_TIME_T			fileTime;				// file time
257 	int				length;					// length of the script in bytes
258 	int				line;					// current line in script
259 	int				lastline;				// line before reading token
260 	int				tokenavailable;			// set by unreadToken
261 	int				flags;					// several script flags
262 	const punctuation_t *punctuations;		// the punctuations used in the script
263 	int *			punctuationtable;		// ASCII table with punctuations
264 	int *			nextpunctuation;		// next punctuation in chain
265 	idToken			token;					// available token
266 	idLexer *		next;					// next script in a chain
267 	bool			hadError;				// set by idLexer::Error, even if the error is supressed
268 
269 	static char		baseFolder[ 256 ];		// base folder to load files from
270 
271 private:
272 	void			CreatePunctuationTable( const punctuation_t *punctuations );
273 	int				ReadWhiteSpace( void );
274 	int				ReadEscapeCharacter( char *ch );
275 	int				ReadString( idToken *token, int quote );
276 	int				ReadName( idToken *token );
277 	int				ReadNumber( idToken *token );
278 	int				ReadPunctuation( idToken *token );
279 	int				ReadPrimitive( idToken *token );
280 	int				CheckString( const char *str ) const;
281 	int				NumLinesCrossed( void );
282 };
283 
GetFileName(void)284 ID_INLINE const char *idLexer::GetFileName( void ) {
285 	return idLexer::filename;
286 }
287 
GetFileOffset(void)288 ID_INLINE const int idLexer::GetFileOffset( void ) {
289 	return idLexer::script_p - idLexer::buffer;
290 }
291 
GetFileTime(void)292 ID_INLINE const ID_TIME_T idLexer::GetFileTime( void ) {
293 	return idLexer::fileTime;
294 }
295 
GetLineNum(void)296 ID_INLINE const int idLexer::GetLineNum( void ) {
297 	return idLexer::line;
298 }
299 
SetFlags(int flags)300 ID_INLINE void idLexer::SetFlags( int flags ) {
301 	idLexer::flags = flags;
302 }
303 
GetFlags(void)304 ID_INLINE int idLexer::GetFlags( void ) {
305 	return idLexer::flags;
306 }
307 
308 #endif /* !__LEXER_H__ */
309