1 /*
2  * This source file originally came from OGRE v1.7.2 - http://www.ogre3d.org/
3  * with some tweaks as part of 0 A.D.
4  * All changes are released under the original license, as follows:
5  */
6 
7 /*
8 -----------------------------------------------------------------------------
9 This source file is part of OGRE
10     (Object-oriented Graphics Rendering Engine)
11 For the latest info, see http://www.ogre3d.org/
12 
13 Copyright (c) 2000-2009 Torus Knot Software Ltd
14 
15 Permission is hereby granted, free of charge, to any person obtaining a copy
16 of this software and associated documentation files (the "Software"), to deal
17 in the Software without restriction, including without limitation the rights
18 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
19 copies of the Software, and to permit persons to whom the Software is
20 furnished to do so, subject to the following conditions:
21 
22 The above copyright notice and this permission notice shall be included in
23 all copies or substantial portions of the Software.
24 
25 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
26 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
28 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
30 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
31 THE SOFTWARE.
32 -----------------------------------------------------------------------------
33 */
34 
35 #ifndef INCLUDED_CPREPROCESSOR
36 #define INCLUDED_CPREPROCESSOR
37 
38 /**
39  * This is a simplistic C/C++-like preprocessor.
40  * It takes an non-zero-terminated string on input and outputs a
41  * non-zero-terminated string buffer.
42  *
43  * This preprocessor was designed specifically for GLSL shaders, so
44  * if you want to use it for other purposes you might want to check
45  * if the feature set it provides is enough for you.
46  *
47  * Here's a list of supported features:
48  * <ul>
49  * <li>Fast memory allocation-less operation (mostly).</li>
50  * <li>Line continuation (backslash-newline) is swallowed.</li>
51  * <li>Line numeration is fully preserved by inserting empty lines where
52  *     required. This is crucial if, say, GLSL compiler reports you an error
53  *     with a line number.</li>
54  * <li>\#define: Parametrized and non-parametrized macros. Invoking a macro with
55  *     less arguments than it takes assignes empty values to missing arguments.</li>
56  * <li>\#undef: Forget defined macros</li>
57  * <li>\#ifdef/\#ifndef/\#else/\#endif: Conditional suppression of parts of code.</li>
58  * <li>\#if: Supports numeric expression of any complexity, also supports the
59  *     defined() pseudo-function.</li>
60  * </ul>
61  */
62 class CPreprocessor
63 {
64     /**
65      * A input token.
66      *
67      * For performance reasons most tokens will point to portions of the
68      * input stream, so no unneeded memory allocation is done. However,
69      * in some cases we must allocate different memory for token storage,
70      * in this case this is signalled by setting the Allocated member
71      * to non-zero in which case the destructor will know that it must
72      * free memory on object destruction.
73      *
74      * Again for performance reasons we use malloc/realloc/free here because
75      * C++-style new[] lacks the realloc() counterpart.
76      */
77     class Token
78     {
79     public:
80         enum Kind
81         {
82             TK_EOS,          // End of input stream
83             TK_ERROR,        // An error has been encountered
84             TK_WHITESPACE,   // A whitespace span (but not newline)
85             TK_NEWLINE,      // A single newline (CR & LF)
86             TK_LINECONT,     // Line continuation ('\' followed by LF)
87             TK_NUMBER,       // A number
88             TK_KEYWORD,      // A keyword
89             TK_PUNCTUATION,  // A punctuation character
90             TK_DIRECTIVE,    // A preprocessor directive
91             TK_STRING,       // A string
92             TK_COMMENT,      // A block comment
93             TK_LINECOMMENT,  // A line comment
94             TK_TEXT,         // An unparsed text (cannot be returned from GetToken())
95         };
96 
97         /// Token type
98         Kind Type;
99         /// True if string was allocated (and must be freed)
100         mutable size_t Allocated;
101         union
102         {
103             /// A pointer somewhere into the input buffer
104             const char *String;
105             /// A memory-allocated string
106             char *Buffer;
107         };
108         /// Token length in bytes
109         size_t Length;
110 
Token()111         Token () : Type (TK_ERROR), Allocated (0), String (NULL), Length (0)
112         { }
113 
Token(Kind iType)114         Token (Kind iType) : Type (iType), Allocated (0), String (NULL), Length (0)
115         { }
116 
Token(Kind iType,const char * iString,size_t iLength)117         Token (Kind iType, const char *iString, size_t iLength) :
118             Type (iType), Allocated (0), String (iString), Length (iLength)
119         { }
120 
Token(const Token & iOther)121         Token (const Token &iOther)
122         {
123             Type = iOther.Type;
124             Allocated = iOther.Allocated;
125             iOther.Allocated = 0; // !!! not quite correct but effective
126             String = iOther.String;
127             Length = iOther.Length;
128         }
129 
~Token()130         ~Token ()
131         { if (Allocated) free (Buffer); }
132 
133         /// Assignment operator
134         Token &operator = (const Token &iOther)
135         {
136             if (Allocated) free (Buffer);
137             Type = iOther.Type;
138             Allocated = iOther.Allocated;
139             iOther.Allocated = 0; // !!! not quite correct but effective
140             String = iOther.String;
141             Length = iOther.Length;
142             return *this;
143         }
144 
145         /// Append a string to this token
146         void Append (const char *iString, size_t iLength);
147 
148         /// Append a token to this token
149         void Append (const Token &iOther);
150 
151         /// Append given number of newlines to this token
152         void AppendNL (int iCount);
153 
154         /// Count number of newlines in this token
155         int CountNL ();
156 
157         /// Get the numeric value of the token
158         bool GetValue (long &oValue) const;
159 
160         /// Set the numeric value of the token
161         void SetValue (long iValue);
162 
163         /// Test two tokens for equality
164         bool operator == (const Token &iOther)
165         {
166             if (iOther.Length != Length)
167                 return false;
168             return (memcmp (String, iOther.String, Length) == 0);
169         }
170     };
171 
172     /// A macro definition
173     class Macro
174     {
175     public:
176         /// Macro name
177         Token Name;
178         /// Number of arguments
179         int NumArgs;
180         /// The names of the arguments
181         Token *Args;
182         /// The macro value
183         Token Value;
184         /// Unparsed macro body (keeps the whole raw unparsed macro body)
185         Token Body;
186         /// Next macro in chained list
187         Macro *Next;
188         /// A pointer to function implementation (if macro is really a func)
189         Token (*ExpandFunc) (CPreprocessor *iParent, int iNumArgs, Token *iArgs);
190         /// true if macro expansion is in progress
191         bool Expanding;
192 
Macro(const Token & iName)193         Macro (const Token &iName) :
194             Name (iName), NumArgs (0), Args (NULL), Next (NULL),
195             ExpandFunc (NULL), Expanding (false)
196         { }
197 
~Macro()198         ~Macro ()
199         { delete [] Args; delete Next; }
200 
201         /// Expand the macro value (will not work for functions)
202         Token Expand (int iNumArgs, Token *iArgs, Macro *iMacros);
203     };
204 
205     friend class CPreprocessor::Macro;
206 
207     /// The current source text input
208     const char *Source;
209     /// The end of the source text
210     const char *SourceEnd;
211     /// Current line number
212     int Line;
213     /// True if we are at beginning of line
214     bool BOL;
215     /// A stack of 32 booleans packed into one value :)
216     unsigned EnableOutput;
217     /// The list of macros defined so far
218     Macro *MacroList;
219 
220     /**
221      * Private constructor to re-parse a single token.
222      */
223     CPreprocessor (const Token &iToken, int iLine);
224 
225     /**
226      * Stateless tokenizer: Parse the input text and return the next token.
227      * @param iExpand
228      *     If true, macros will be expanded to their values
229      * @return
230      *     The next token from the input stream
231      */
232     Token GetToken (bool iExpand);
233 
234     /**
235      * Handle a preprocessor directive.
236      * @param iToken
237      *     The whole preprocessor directive line (until EOL)
238      * @param iLine
239      *     The line where the directive begins (for error reports)
240      * @return
241      *     The last input token that was not proceeded.
242      */
243     Token HandleDirective (Token &iToken, int iLine);
244 
245     /**
246      * Handle a \#define directive.
247      * @param iBody
248      *     The body of the directive (everything after the directive
249      *     until end of line).
250      * @param iLine
251      *     The line where the directive begins (for error reports)
252      * @return
253      *     true if everything went ok, false if not
254      */
255     bool HandleDefine (Token &iBody, int iLine);
256 
257     /**
258      * Undefine a previously defined macro
259      * @param iBody
260      *     The body of the directive (everything after the directive
261      *     until end of line).
262      * @param iLine
263      *     The line where the directive begins (for error reports)
264      * @return
265      *     true if everything went ok, false if not
266      */
267     bool HandleUnDef (Token &iBody, int iLine);
268 
269     /**
270      * Handle an \#ifdef directive.
271      * @param iBody
272      *     The body of the directive (everything after the directive
273      *     until end of line).
274      * @param iLine
275      *     The line where the directive begins (for error reports)
276      * @return
277      *     true if everything went ok, false if not
278      */
279     bool HandleIfDef (Token &iBody, int iLine);
280 
281     /**
282      * Handle an \#if directive.
283      * @param iBody
284      *     The body of the directive (everything after the directive
285      *     until end of line).
286      * @param iLine
287      *     The line where the directive begins (for error reports)
288      * @return
289      *     true if everything went ok, false if not
290      */
291     bool HandleIf (Token &iBody, int iLine);
292 
293     /**
294      * Handle an \#else directive.
295      * @param iBody
296      *     The body of the directive (everything after the directive
297      *     until end of line).
298      * @param iLine
299      *     The line where the directive begins (for error reports)
300      * @return
301      *     true if everything went ok, false if not
302      */
303     bool HandleElse (Token &iBody, int iLine);
304 
305     /**
306      * Handle an \#endif directive.
307      * @param iBody
308      *     The body of the directive (everything after the directive
309      *     until end of line).
310      * @param iLine
311      *     The line where the directive begins (for error reports)
312      * @return
313      *     true if everything went ok, false if not
314      */
315     bool HandleEndIf (Token &iBody, int iLine);
316 
317     /**
318      * Get a single function argument until next ',' or ')'.
319      * @param oArg
320      *     The argument is returned in this variable.
321      * @param iExpand
322      *     If false, parameters are not expanded and no expressions are
323      *     allowed; only a single keyword is expected per argument.
324      * @return
325      *     The first unhandled token after argument.
326      */
327     Token GetArgument (Token &oArg, bool iExpand);
328 
329     /**
330      * Get all the arguments of a macro: '(' arg1 { ',' arg2 { ',' ... }} ')'
331      * @param oNumArgs
332      *     Number of parsed arguments is stored into this variable.
333      * @param oArgs
334      *     This is set to a pointer to an array of parsed arguments.
335      * @param iExpand
336      *     If false, parameters are not expanded and no expressions are
337      *     allowed; only a single keyword is expected per argument.
338      */
339     Token GetArguments (int &oNumArgs, Token *&oArgs, bool iExpand);
340 
341     /**
342      * Parse an expression, compute it and return the result.
343      * @param oResult
344      *     A token containing the result of expression
345      * @param iLine
346      *     The line at which the expression starts (for error reports)
347      * @param iOpPriority
348      *     Operator priority (at which operator we will stop if
349      *     proceeding recursively -- used internally. Parser stops
350      *     when it encounters an operator with higher or equal priority).
351      * @return
352      *     The last unhandled token after the expression
353      */
354     Token GetExpression (Token &oResult, int iLine, int iOpPriority = 0);
355 
356     /**
357      * Get the numeric value of a token.
358      * If the token was produced by expanding a macro, we will get
359      * an TEXT token which can contain a whole expression; in this
360      * case we will call GetExpression to parse it. Otherwise we
361      * just call the token's GetValue() method.
362      * @param iToken
363      *     The token to get the numeric value of
364      * @param oValue
365      *     The variable to put the value into
366      * @param iLine
367      *     The line where the directive begins (for error reports)
368      * @return
369      *     true if ok, false if not
370      */
371     bool GetValue (const Token &iToken, long &oValue, int iLine);
372 
373     /**
374      * Expand the given macro, if it exists.
375      * If macro has arguments, they are collected from source stream.
376      * @param iToken
377      *     A KEYWORD token containing the (possible) macro name.
378      * @return
379      *     The expanded token or iToken if it is not a macro
380      */
381     Token ExpandMacro (const Token &iToken);
382 
383     /**
384      * Check if a macro is defined, and if so, return it
385      * @param iToken
386      *     Macro name
387      * @return
388      *     The macro object or NULL if a macro with this name does not exist
389      */
390     Macro *IsDefined (const Token &iToken);
391 
392     /**
393      * The implementation of the defined() preprocessor function
394      * @param iParent
395      *     The parent preprocessor object
396      * @param iNumArgs
397      *     Number of arguments
398      * @param iArgs
399      *     The arguments themselves
400      * @return
401      *     The return value encapsulated in a token
402      */
403     static Token ExpandDefined (CPreprocessor *iParent, int iNumArgs, Token *iArgs);
404 
405     /**
406      * Parse the input string and return a token containing the whole output.
407      * @param iSource
408      *     The source text enclosed in a token
409      * @return
410      *     The output text enclosed in a token
411      */
412     Token Parse (const Token &iSource);
413 
414     /**
415      * Call the error handler
416      * @param iLine
417      *     The line at which the error happened.
418      * @param iError
419      *     The error string.
420      * @param iToken
421      *     If not NULL contains the erroneous token
422      */
423     void Error (int iLine, const char *iError, const Token *iToken = NULL);
424 
425 public:
426     /// Create an empty preprocessor object
CPreprocessor()427     CPreprocessor () : MacroList (NULL)
428     { }
429 
430     /// Destroy the preprocessor object
431     virtual ~CPreprocessor ();
432 
433     /**
434      * Define a macro without parameters.
435      * @param iMacroName
436      *     The name of the defined macro
437      * @param iMacroNameLen
438      *     The length of the name of the defined macro
439      * @param iMacroValue
440      *     The value of the defined macro
441      * @param iMacroValueLen
442      *     The length of the value of the defined macro
443      */
444     void Define (const char *iMacroName, size_t iMacroNameLen,
445                  const char *iMacroValue, size_t iMacroValueLen);
446 
447     /**
448      * Define a numerical macro.
449      * @param iMacroName
450      *     The name of the defined macro
451      * @param iMacroNameLen
452      *     The length of the name of the defined macro
453      * @param iMacroValue
454      *     The value of the defined macro
455      */
456     void Define (const char *iMacroName, size_t iMacroNameLen, long iMacroValue);
457 
458     /**
459      * Define a macro without parameters.
460      * @param iMacroName
461      *     The name of the defined macro
462      * @param iMacroValue
463      *     The value of the defined macro
464      */
465     void Define (const char *iMacroName, const char *iMacroValue);
466 
467     /**
468      * Define a numerical macro.
469      * @param iMacroName
470      *     The name of the defined macro
471      * @param iMacroValue
472      *     The value of the defined macro
473      */
474     void Define (const char *iMacroName, long iMacroValue);
475 
476     /**
477      * Undefine a macro.
478      * @param iMacroName
479      *     The name of the macro to undefine
480      * @param iMacroNameLen
481      *     The length of the name of the macro to undefine
482      * @return
483      *     true if the macro has been undefined, false if macro doesn't exist
484      */
485     bool Undef (const char *iMacroName, size_t iMacroNameLen);
486 
487     /**
488      * Parse the input string and return a newly-allocated output string.
489      * @note
490      *     The returned preprocessed string is NOT zero-terminated
491      *     (just like the input string).
492      * @param iSource
493      *     The source text
494      * @param iLength
495      *     The length of the source text in characters
496      * @param oLength
497      *     The length of the output string.
498      * @return
499      *     The output from preprocessor, allocated with malloc().
500      *     The parser can actually allocate more than needed for performance
501      *     reasons, but this should not be a problem unless you will want
502      *     to store the returned pointer for long time in which case you
503      *     might want to realloc() it.
504      *     If an error has been encountered, the function returns NULL.
505      *     In some cases the function may return an unallocated address
506      *     that's *inside* the source buffer. You must free() the result
507      *     string only if the returned address is not inside the source text.
508      */
509     char *Parse (const char *iSource, size_t iLength, size_t &oLength);
510 
511     /**
512      * An error handler function type.
513      * The default implementation just drops a note to stderr and
514      * then the parser ends, returning NULL.
515      * @param iData
516      *     User-specific pointer from the corresponding CPreprocessor object.
517      * @param iLine
518      *     The line at which the error happened.
519      * @param iError
520      *     The error string.
521      * @param iToken
522      *     If not NULL contains the erroneous token
523      * @param iTokenLen
524      *     The length of iToken. iToken is never zero-terminated!
525      */
526     typedef void (*ErrorHandlerFunc) (
527         void *iData, int iLine, const char *iError,
528         const char *iToken, size_t iTokenLen);
529 
530     /**
531      * A pointer to the preprocessor's error handler.
532      * You can assign the address of your own function to this variable
533      * and implement your own error handling (e.g. throwing an exception etc).
534      */
535     static ErrorHandlerFunc ErrorHandler;
536 
537     /// User-specific storage, passed to Error()
538     void *ErrorData;
539 };
540 
541 #endif // INCLUDED_CPREPROCESSOR
542