1 /* 2 * This source file originally came from OGRE v1.7.2 - http://www.ogre3d.org/ 3 * with some tweaks as part of 0 A.D. 4 * All changes are released under the original license, as follows: 5 */ 6 7 /* 8 ----------------------------------------------------------------------------- 9 This source file is part of OGRE 10 (Object-oriented Graphics Rendering Engine) 11 For the latest info, see http://www.ogre3d.org/ 12 13 Copyright (c) 2000-2009 Torus Knot Software Ltd 14 15 Permission is hereby granted, free of charge, to any person obtaining a copy 16 of this software and associated documentation files (the "Software"), to deal 17 in the Software without restriction, including without limitation the rights 18 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 19 copies of the Software, and to permit persons to whom the Software is 20 furnished to do so, subject to the following conditions: 21 22 The above copyright notice and this permission notice shall be included in 23 all copies or substantial portions of the Software. 24 25 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 26 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 27 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 28 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 29 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 30 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 31 THE SOFTWARE. 32 ----------------------------------------------------------------------------- 33 */ 34 35 #ifndef INCLUDED_CPREPROCESSOR 36 #define INCLUDED_CPREPROCESSOR 37 38 /** 39 * This is a simplistic C/C++-like preprocessor. 40 * It takes an non-zero-terminated string on input and outputs a 41 * non-zero-terminated string buffer. 42 * 43 * This preprocessor was designed specifically for GLSL shaders, so 44 * if you want to use it for other purposes you might want to check 45 * if the feature set it provides is enough for you. 46 * 47 * Here's a list of supported features: 48 * <ul> 49 * <li>Fast memory allocation-less operation (mostly).</li> 50 * <li>Line continuation (backslash-newline) is swallowed.</li> 51 * <li>Line numeration is fully preserved by inserting empty lines where 52 * required. This is crucial if, say, GLSL compiler reports you an error 53 * with a line number.</li> 54 * <li>\#define: Parametrized and non-parametrized macros. Invoking a macro with 55 * less arguments than it takes assignes empty values to missing arguments.</li> 56 * <li>\#undef: Forget defined macros</li> 57 * <li>\#ifdef/\#ifndef/\#else/\#endif: Conditional suppression of parts of code.</li> 58 * <li>\#if: Supports numeric expression of any complexity, also supports the 59 * defined() pseudo-function.</li> 60 * </ul> 61 */ 62 class CPreprocessor 63 { 64 /** 65 * A input token. 66 * 67 * For performance reasons most tokens will point to portions of the 68 * input stream, so no unneeded memory allocation is done. However, 69 * in some cases we must allocate different memory for token storage, 70 * in this case this is signalled by setting the Allocated member 71 * to non-zero in which case the destructor will know that it must 72 * free memory on object destruction. 73 * 74 * Again for performance reasons we use malloc/realloc/free here because 75 * C++-style new[] lacks the realloc() counterpart. 76 */ 77 class Token 78 { 79 public: 80 enum Kind 81 { 82 TK_EOS, // End of input stream 83 TK_ERROR, // An error has been encountered 84 TK_WHITESPACE, // A whitespace span (but not newline) 85 TK_NEWLINE, // A single newline (CR & LF) 86 TK_LINECONT, // Line continuation ('\' followed by LF) 87 TK_NUMBER, // A number 88 TK_KEYWORD, // A keyword 89 TK_PUNCTUATION, // A punctuation character 90 TK_DIRECTIVE, // A preprocessor directive 91 TK_STRING, // A string 92 TK_COMMENT, // A block comment 93 TK_LINECOMMENT, // A line comment 94 TK_TEXT, // An unparsed text (cannot be returned from GetToken()) 95 }; 96 97 /// Token type 98 Kind Type; 99 /// True if string was allocated (and must be freed) 100 mutable size_t Allocated; 101 union 102 { 103 /// A pointer somewhere into the input buffer 104 const char *String; 105 /// A memory-allocated string 106 char *Buffer; 107 }; 108 /// Token length in bytes 109 size_t Length; 110 Token()111 Token () : Type (TK_ERROR), Allocated (0), String (NULL), Length (0) 112 { } 113 Token(Kind iType)114 Token (Kind iType) : Type (iType), Allocated (0), String (NULL), Length (0) 115 { } 116 Token(Kind iType,const char * iString,size_t iLength)117 Token (Kind iType, const char *iString, size_t iLength) : 118 Type (iType), Allocated (0), String (iString), Length (iLength) 119 { } 120 Token(const Token & iOther)121 Token (const Token &iOther) 122 { 123 Type = iOther.Type; 124 Allocated = iOther.Allocated; 125 iOther.Allocated = 0; // !!! not quite correct but effective 126 String = iOther.String; 127 Length = iOther.Length; 128 } 129 ~Token()130 ~Token () 131 { if (Allocated) free (Buffer); } 132 133 /// Assignment operator 134 Token &operator = (const Token &iOther) 135 { 136 if (Allocated) free (Buffer); 137 Type = iOther.Type; 138 Allocated = iOther.Allocated; 139 iOther.Allocated = 0; // !!! not quite correct but effective 140 String = iOther.String; 141 Length = iOther.Length; 142 return *this; 143 } 144 145 /// Append a string to this token 146 void Append (const char *iString, size_t iLength); 147 148 /// Append a token to this token 149 void Append (const Token &iOther); 150 151 /// Append given number of newlines to this token 152 void AppendNL (int iCount); 153 154 /// Count number of newlines in this token 155 int CountNL (); 156 157 /// Get the numeric value of the token 158 bool GetValue (long &oValue) const; 159 160 /// Set the numeric value of the token 161 void SetValue (long iValue); 162 163 /// Test two tokens for equality 164 bool operator == (const Token &iOther) 165 { 166 if (iOther.Length != Length) 167 return false; 168 return (memcmp (String, iOther.String, Length) == 0); 169 } 170 }; 171 172 /// A macro definition 173 class Macro 174 { 175 public: 176 /// Macro name 177 Token Name; 178 /// Number of arguments 179 int NumArgs; 180 /// The names of the arguments 181 Token *Args; 182 /// The macro value 183 Token Value; 184 /// Unparsed macro body (keeps the whole raw unparsed macro body) 185 Token Body; 186 /// Next macro in chained list 187 Macro *Next; 188 /// A pointer to function implementation (if macro is really a func) 189 Token (*ExpandFunc) (CPreprocessor *iParent, int iNumArgs, Token *iArgs); 190 /// true if macro expansion is in progress 191 bool Expanding; 192 Macro(const Token & iName)193 Macro (const Token &iName) : 194 Name (iName), NumArgs (0), Args (NULL), Next (NULL), 195 ExpandFunc (NULL), Expanding (false) 196 { } 197 ~Macro()198 ~Macro () 199 { delete [] Args; delete Next; } 200 201 /// Expand the macro value (will not work for functions) 202 Token Expand (int iNumArgs, Token *iArgs, Macro *iMacros); 203 }; 204 205 friend class CPreprocessor::Macro; 206 207 /// The current source text input 208 const char *Source; 209 /// The end of the source text 210 const char *SourceEnd; 211 /// Current line number 212 int Line; 213 /// True if we are at beginning of line 214 bool BOL; 215 /// A stack of 32 booleans packed into one value :) 216 unsigned EnableOutput; 217 /// The list of macros defined so far 218 Macro *MacroList; 219 220 /** 221 * Private constructor to re-parse a single token. 222 */ 223 CPreprocessor (const Token &iToken, int iLine); 224 225 /** 226 * Stateless tokenizer: Parse the input text and return the next token. 227 * @param iExpand 228 * If true, macros will be expanded to their values 229 * @return 230 * The next token from the input stream 231 */ 232 Token GetToken (bool iExpand); 233 234 /** 235 * Handle a preprocessor directive. 236 * @param iToken 237 * The whole preprocessor directive line (until EOL) 238 * @param iLine 239 * The line where the directive begins (for error reports) 240 * @return 241 * The last input token that was not proceeded. 242 */ 243 Token HandleDirective (Token &iToken, int iLine); 244 245 /** 246 * Handle a \#define directive. 247 * @param iBody 248 * The body of the directive (everything after the directive 249 * until end of line). 250 * @param iLine 251 * The line where the directive begins (for error reports) 252 * @return 253 * true if everything went ok, false if not 254 */ 255 bool HandleDefine (Token &iBody, int iLine); 256 257 /** 258 * Undefine a previously defined macro 259 * @param iBody 260 * The body of the directive (everything after the directive 261 * until end of line). 262 * @param iLine 263 * The line where the directive begins (for error reports) 264 * @return 265 * true if everything went ok, false if not 266 */ 267 bool HandleUnDef (Token &iBody, int iLine); 268 269 /** 270 * Handle an \#ifdef directive. 271 * @param iBody 272 * The body of the directive (everything after the directive 273 * until end of line). 274 * @param iLine 275 * The line where the directive begins (for error reports) 276 * @return 277 * true if everything went ok, false if not 278 */ 279 bool HandleIfDef (Token &iBody, int iLine); 280 281 /** 282 * Handle an \#if directive. 283 * @param iBody 284 * The body of the directive (everything after the directive 285 * until end of line). 286 * @param iLine 287 * The line where the directive begins (for error reports) 288 * @return 289 * true if everything went ok, false if not 290 */ 291 bool HandleIf (Token &iBody, int iLine); 292 293 /** 294 * Handle an \#else directive. 295 * @param iBody 296 * The body of the directive (everything after the directive 297 * until end of line). 298 * @param iLine 299 * The line where the directive begins (for error reports) 300 * @return 301 * true if everything went ok, false if not 302 */ 303 bool HandleElse (Token &iBody, int iLine); 304 305 /** 306 * Handle an \#endif directive. 307 * @param iBody 308 * The body of the directive (everything after the directive 309 * until end of line). 310 * @param iLine 311 * The line where the directive begins (for error reports) 312 * @return 313 * true if everything went ok, false if not 314 */ 315 bool HandleEndIf (Token &iBody, int iLine); 316 317 /** 318 * Get a single function argument until next ',' or ')'. 319 * @param oArg 320 * The argument is returned in this variable. 321 * @param iExpand 322 * If false, parameters are not expanded and no expressions are 323 * allowed; only a single keyword is expected per argument. 324 * @return 325 * The first unhandled token after argument. 326 */ 327 Token GetArgument (Token &oArg, bool iExpand); 328 329 /** 330 * Get all the arguments of a macro: '(' arg1 { ',' arg2 { ',' ... }} ')' 331 * @param oNumArgs 332 * Number of parsed arguments is stored into this variable. 333 * @param oArgs 334 * This is set to a pointer to an array of parsed arguments. 335 * @param iExpand 336 * If false, parameters are not expanded and no expressions are 337 * allowed; only a single keyword is expected per argument. 338 */ 339 Token GetArguments (int &oNumArgs, Token *&oArgs, bool iExpand); 340 341 /** 342 * Parse an expression, compute it and return the result. 343 * @param oResult 344 * A token containing the result of expression 345 * @param iLine 346 * The line at which the expression starts (for error reports) 347 * @param iOpPriority 348 * Operator priority (at which operator we will stop if 349 * proceeding recursively -- used internally. Parser stops 350 * when it encounters an operator with higher or equal priority). 351 * @return 352 * The last unhandled token after the expression 353 */ 354 Token GetExpression (Token &oResult, int iLine, int iOpPriority = 0); 355 356 /** 357 * Get the numeric value of a token. 358 * If the token was produced by expanding a macro, we will get 359 * an TEXT token which can contain a whole expression; in this 360 * case we will call GetExpression to parse it. Otherwise we 361 * just call the token's GetValue() method. 362 * @param iToken 363 * The token to get the numeric value of 364 * @param oValue 365 * The variable to put the value into 366 * @param iLine 367 * The line where the directive begins (for error reports) 368 * @return 369 * true if ok, false if not 370 */ 371 bool GetValue (const Token &iToken, long &oValue, int iLine); 372 373 /** 374 * Expand the given macro, if it exists. 375 * If macro has arguments, they are collected from source stream. 376 * @param iToken 377 * A KEYWORD token containing the (possible) macro name. 378 * @return 379 * The expanded token or iToken if it is not a macro 380 */ 381 Token ExpandMacro (const Token &iToken); 382 383 /** 384 * Check if a macro is defined, and if so, return it 385 * @param iToken 386 * Macro name 387 * @return 388 * The macro object or NULL if a macro with this name does not exist 389 */ 390 Macro *IsDefined (const Token &iToken); 391 392 /** 393 * The implementation of the defined() preprocessor function 394 * @param iParent 395 * The parent preprocessor object 396 * @param iNumArgs 397 * Number of arguments 398 * @param iArgs 399 * The arguments themselves 400 * @return 401 * The return value encapsulated in a token 402 */ 403 static Token ExpandDefined (CPreprocessor *iParent, int iNumArgs, Token *iArgs); 404 405 /** 406 * Parse the input string and return a token containing the whole output. 407 * @param iSource 408 * The source text enclosed in a token 409 * @return 410 * The output text enclosed in a token 411 */ 412 Token Parse (const Token &iSource); 413 414 /** 415 * Call the error handler 416 * @param iLine 417 * The line at which the error happened. 418 * @param iError 419 * The error string. 420 * @param iToken 421 * If not NULL contains the erroneous token 422 */ 423 void Error (int iLine, const char *iError, const Token *iToken = NULL); 424 425 public: 426 /// Create an empty preprocessor object CPreprocessor()427 CPreprocessor () : MacroList (NULL) 428 { } 429 430 /// Destroy the preprocessor object 431 virtual ~CPreprocessor (); 432 433 /** 434 * Define a macro without parameters. 435 * @param iMacroName 436 * The name of the defined macro 437 * @param iMacroNameLen 438 * The length of the name of the defined macro 439 * @param iMacroValue 440 * The value of the defined macro 441 * @param iMacroValueLen 442 * The length of the value of the defined macro 443 */ 444 void Define (const char *iMacroName, size_t iMacroNameLen, 445 const char *iMacroValue, size_t iMacroValueLen); 446 447 /** 448 * Define a numerical macro. 449 * @param iMacroName 450 * The name of the defined macro 451 * @param iMacroNameLen 452 * The length of the name of the defined macro 453 * @param iMacroValue 454 * The value of the defined macro 455 */ 456 void Define (const char *iMacroName, size_t iMacroNameLen, long iMacroValue); 457 458 /** 459 * Define a macro without parameters. 460 * @param iMacroName 461 * The name of the defined macro 462 * @param iMacroValue 463 * The value of the defined macro 464 */ 465 void Define (const char *iMacroName, const char *iMacroValue); 466 467 /** 468 * Define a numerical macro. 469 * @param iMacroName 470 * The name of the defined macro 471 * @param iMacroValue 472 * The value of the defined macro 473 */ 474 void Define (const char *iMacroName, long iMacroValue); 475 476 /** 477 * Undefine a macro. 478 * @param iMacroName 479 * The name of the macro to undefine 480 * @param iMacroNameLen 481 * The length of the name of the macro to undefine 482 * @return 483 * true if the macro has been undefined, false if macro doesn't exist 484 */ 485 bool Undef (const char *iMacroName, size_t iMacroNameLen); 486 487 /** 488 * Parse the input string and return a newly-allocated output string. 489 * @note 490 * The returned preprocessed string is NOT zero-terminated 491 * (just like the input string). 492 * @param iSource 493 * The source text 494 * @param iLength 495 * The length of the source text in characters 496 * @param oLength 497 * The length of the output string. 498 * @return 499 * The output from preprocessor, allocated with malloc(). 500 * The parser can actually allocate more than needed for performance 501 * reasons, but this should not be a problem unless you will want 502 * to store the returned pointer for long time in which case you 503 * might want to realloc() it. 504 * If an error has been encountered, the function returns NULL. 505 * In some cases the function may return an unallocated address 506 * that's *inside* the source buffer. You must free() the result 507 * string only if the returned address is not inside the source text. 508 */ 509 char *Parse (const char *iSource, size_t iLength, size_t &oLength); 510 511 /** 512 * An error handler function type. 513 * The default implementation just drops a note to stderr and 514 * then the parser ends, returning NULL. 515 * @param iData 516 * User-specific pointer from the corresponding CPreprocessor object. 517 * @param iLine 518 * The line at which the error happened. 519 * @param iError 520 * The error string. 521 * @param iToken 522 * If not NULL contains the erroneous token 523 * @param iTokenLen 524 * The length of iToken. iToken is never zero-terminated! 525 */ 526 typedef void (*ErrorHandlerFunc) ( 527 void *iData, int iLine, const char *iError, 528 const char *iToken, size_t iTokenLen); 529 530 /** 531 * A pointer to the preprocessor's error handler. 532 * You can assign the address of your own function to this variable 533 * and implement your own error handling (e.g. throwing an exception etc). 534 */ 535 static ErrorHandlerFunc ErrorHandler; 536 537 /// User-specific storage, passed to Error() 538 void *ErrorData; 539 }; 540 541 #endif // INCLUDED_CPREPROCESSOR 542