10b57cec5SDimitry Andric //===- TGLexer.h - Lexer for TableGen Files ---------------------*- C++ -*-===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This class represents the Lexer for tablegen files.
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric 
130b57cec5SDimitry Andric #ifndef LLVM_LIB_TABLEGEN_TGLEXER_H
140b57cec5SDimitry Andric #define LLVM_LIB_TABLEGEN_TGLEXER_H
150b57cec5SDimitry Andric 
160b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h"
170b57cec5SDimitry Andric #include "llvm/ADT/StringSet.h"
180b57cec5SDimitry Andric #include "llvm/Support/DataTypes.h"
190b57cec5SDimitry Andric #include "llvm/Support/SMLoc.h"
200b57cec5SDimitry Andric #include <cassert>
210b57cec5SDimitry Andric #include <memory>
22480093f4SDimitry Andric #include <set>
230b57cec5SDimitry Andric #include <string>
245ffd83dbSDimitry Andric #include <vector>
250b57cec5SDimitry Andric 
260b57cec5SDimitry Andric namespace llvm {
275ffd83dbSDimitry Andric template <typename T> class ArrayRef;
280b57cec5SDimitry Andric class SourceMgr;
290b57cec5SDimitry Andric class Twine;
300b57cec5SDimitry Andric 
310b57cec5SDimitry Andric namespace tgtok {
320b57cec5SDimitry Andric enum TokKind {
330b57cec5SDimitry Andric   // Markers
3406c3fb27SDimitry Andric   Eof,
3506c3fb27SDimitry Andric   Error,
360b57cec5SDimitry Andric 
370b57cec5SDimitry Andric   // Tokens with no info.
3806c3fb27SDimitry Andric   minus,     // -
3906c3fb27SDimitry Andric   plus,      // +
4006c3fb27SDimitry Andric   l_square,  // [
4106c3fb27SDimitry Andric   r_square,  // ]
4206c3fb27SDimitry Andric   l_brace,   // {
4306c3fb27SDimitry Andric   r_brace,   // }
4406c3fb27SDimitry Andric   l_paren,   // (
4506c3fb27SDimitry Andric   r_paren,   // )
4606c3fb27SDimitry Andric   less,      // <
4706c3fb27SDimitry Andric   greater,   // >
4806c3fb27SDimitry Andric   colon,     // :
4906c3fb27SDimitry Andric   semi,      // ;
5006c3fb27SDimitry Andric   comma,     // ,
5106c3fb27SDimitry Andric   dot,       // .
5206c3fb27SDimitry Andric   equal,     // =
5306c3fb27SDimitry Andric   question,  // ?
540b57cec5SDimitry Andric   paste,     // #
55e8d8bef9SDimitry Andric   dotdotdot, // ...
560b57cec5SDimitry Andric 
575f757f3fSDimitry Andric   // Boolean literals.
585f757f3fSDimitry Andric   TrueVal,
595f757f3fSDimitry Andric   FalseVal,
605f757f3fSDimitry Andric 
615f757f3fSDimitry Andric   // Integer value.
625f757f3fSDimitry Andric   IntVal,
635f757f3fSDimitry Andric 
645f757f3fSDimitry Andric   // Binary constant.  Note that these are sized according to the number of
655f757f3fSDimitry Andric   // bits given.
665f757f3fSDimitry Andric   BinaryIntVal,
675f757f3fSDimitry Andric 
685f757f3fSDimitry Andric   // Preprocessing tokens for internal usage by the lexer.
695f757f3fSDimitry Andric   // They are never returned as a result of Lex().
705f757f3fSDimitry Andric   Ifdef,
715f757f3fSDimitry Andric   Ifndef,
725f757f3fSDimitry Andric   Else,
735f757f3fSDimitry Andric   Endif,
745f757f3fSDimitry Andric   Define,
755f757f3fSDimitry Andric 
76e8d8bef9SDimitry Andric   // Reserved keywords. ('ElseKW' is named to distinguish it from the
77e8d8bef9SDimitry Andric   // existing 'Else' that means the preprocessor #else.)
7806c3fb27SDimitry Andric   Bit,
7906c3fb27SDimitry Andric   Bits,
8006c3fb27SDimitry Andric   Code,
8106c3fb27SDimitry Andric   Dag,
8206c3fb27SDimitry Andric   ElseKW,
8306c3fb27SDimitry Andric   FalseKW,
8406c3fb27SDimitry Andric   Field,
8506c3fb27SDimitry Andric   In,
8606c3fb27SDimitry Andric   Include,
8706c3fb27SDimitry Andric   Int,
8806c3fb27SDimitry Andric   List,
8906c3fb27SDimitry Andric   String,
9006c3fb27SDimitry Andric   Then,
9106c3fb27SDimitry Andric   TrueKW,
920b57cec5SDimitry Andric 
935f757f3fSDimitry Andric   // Object start tokens.
945f757f3fSDimitry Andric   OBJECT_START_FIRST,
955f757f3fSDimitry Andric   Assert = OBJECT_START_FIRST,
965f757f3fSDimitry Andric   Class,
975f757f3fSDimitry Andric   Def,
985f757f3fSDimitry Andric   Defm,
995f757f3fSDimitry Andric   Defset,
1005f757f3fSDimitry Andric   Defvar,
1015f757f3fSDimitry Andric   Dump,
1025f757f3fSDimitry Andric   Foreach,
1035f757f3fSDimitry Andric   If,
1045f757f3fSDimitry Andric   Let,
1055f757f3fSDimitry Andric   MultiClass,
1065f757f3fSDimitry Andric   OBJECT_START_LAST = MultiClass,
1075f757f3fSDimitry Andric 
108e8d8bef9SDimitry Andric   // Bang operators.
1095f757f3fSDimitry Andric   BANG_OPERATOR_FIRST,
1105f757f3fSDimitry Andric   XConcat = BANG_OPERATOR_FIRST,
11106c3fb27SDimitry Andric   XADD,
11206c3fb27SDimitry Andric   XSUB,
11306c3fb27SDimitry Andric   XMUL,
11406c3fb27SDimitry Andric   XDIV,
11506c3fb27SDimitry Andric   XNOT,
11606c3fb27SDimitry Andric   XLOG2,
11706c3fb27SDimitry Andric   XAND,
11806c3fb27SDimitry Andric   XOR,
11906c3fb27SDimitry Andric   XXOR,
12006c3fb27SDimitry Andric   XSRA,
12106c3fb27SDimitry Andric   XSRL,
12206c3fb27SDimitry Andric   XSHL,
12306c3fb27SDimitry Andric   XListConcat,
12406c3fb27SDimitry Andric   XListSplat,
12506c3fb27SDimitry Andric   XStrConcat,
12606c3fb27SDimitry Andric   XInterleave,
12706c3fb27SDimitry Andric   XSubstr,
12806c3fb27SDimitry Andric   XFind,
12906c3fb27SDimitry Andric   XCast,
13006c3fb27SDimitry Andric   XSubst,
13106c3fb27SDimitry Andric   XForEach,
13206c3fb27SDimitry Andric   XFilter,
13306c3fb27SDimitry Andric   XFoldl,
13406c3fb27SDimitry Andric   XHead,
13506c3fb27SDimitry Andric   XTail,
13606c3fb27SDimitry Andric   XSize,
13706c3fb27SDimitry Andric   XEmpty,
13806c3fb27SDimitry Andric   XIf,
13906c3fb27SDimitry Andric   XCond,
14006c3fb27SDimitry Andric   XEq,
14106c3fb27SDimitry Andric   XIsA,
14206c3fb27SDimitry Andric   XDag,
14306c3fb27SDimitry Andric   XNe,
14406c3fb27SDimitry Andric   XLe,
14506c3fb27SDimitry Andric   XLt,
14606c3fb27SDimitry Andric   XGe,
14706c3fb27SDimitry Andric   XGt,
14806c3fb27SDimitry Andric   XSetDagOp,
14906c3fb27SDimitry Andric   XGetDagOp,
15006c3fb27SDimitry Andric   XExists,
15106c3fb27SDimitry Andric   XListRemove,
15206c3fb27SDimitry Andric   XToLower,
15306c3fb27SDimitry Andric   XToUpper,
15406c3fb27SDimitry Andric   XRange,
15506c3fb27SDimitry Andric   XGetDagArg,
15606c3fb27SDimitry Andric   XGetDagName,
15706c3fb27SDimitry Andric   XSetDagArg,
15806c3fb27SDimitry Andric   XSetDagName,
1595f757f3fSDimitry Andric   XRepr,
1605f757f3fSDimitry Andric   BANG_OPERATOR_LAST = XRepr,
1610b57cec5SDimitry Andric 
1620b57cec5SDimitry Andric   // String valued tokens.
1635f757f3fSDimitry Andric   STRING_VALUE_FIRST,
1645f757f3fSDimitry Andric   Id = STRING_VALUE_FIRST,
16506c3fb27SDimitry Andric   StrVal,
16606c3fb27SDimitry Andric   VarName,
16706c3fb27SDimitry Andric   CodeFragment,
1685f757f3fSDimitry Andric   STRING_VALUE_LAST = CodeFragment,
1690b57cec5SDimitry Andric };
1705f757f3fSDimitry Andric 
1715f757f3fSDimitry Andric /// isBangOperator - Return true if this is a bang operator.
isBangOperator(tgtok::TokKind Kind)1725f757f3fSDimitry Andric static inline bool isBangOperator(tgtok::TokKind Kind) {
1735f757f3fSDimitry Andric   return tgtok::BANG_OPERATOR_FIRST <= Kind && Kind <= BANG_OPERATOR_LAST;
1740b57cec5SDimitry Andric }
1750b57cec5SDimitry Andric 
1765f757f3fSDimitry Andric /// isObjectStart - Return true if this is a valid first token for a statement.
isObjectStart(tgtok::TokKind Kind)1775f757f3fSDimitry Andric static inline bool isObjectStart(tgtok::TokKind Kind) {
1785f757f3fSDimitry Andric   return tgtok::OBJECT_START_FIRST <= Kind && Kind <= OBJECT_START_LAST;
1795f757f3fSDimitry Andric }
1805f757f3fSDimitry Andric 
1815f757f3fSDimitry Andric /// isStringValue - Return true if this is a string value.
isStringValue(tgtok::TokKind Kind)1825f757f3fSDimitry Andric static inline bool isStringValue(tgtok::TokKind Kind) {
1835f757f3fSDimitry Andric   return tgtok::STRING_VALUE_FIRST <= Kind && Kind <= STRING_VALUE_LAST;
1845f757f3fSDimitry Andric }
1855f757f3fSDimitry Andric } // namespace tgtok
1865f757f3fSDimitry Andric 
1870b57cec5SDimitry Andric /// TGLexer - TableGen Lexer class.
1880b57cec5SDimitry Andric class TGLexer {
1890b57cec5SDimitry Andric   SourceMgr &SrcMgr;
1900b57cec5SDimitry Andric 
191480093f4SDimitry Andric   const char *CurPtr = nullptr;
1920b57cec5SDimitry Andric   StringRef CurBuf;
1930b57cec5SDimitry Andric 
1940b57cec5SDimitry Andric   // Information about the current token.
195480093f4SDimitry Andric   const char *TokStart = nullptr;
196480093f4SDimitry Andric   tgtok::TokKind CurCode = tgtok::TokKind::Eof;
197e8d8bef9SDimitry Andric   std::string CurStrVal; // This is valid for Id, StrVal, VarName, CodeFragment
198e8d8bef9SDimitry Andric   int64_t CurIntVal = 0; // This is valid for IntVal.
1990b57cec5SDimitry Andric 
2000b57cec5SDimitry Andric   /// CurBuffer - This is the current buffer index we're lexing from as managed
2010b57cec5SDimitry Andric   /// by the SourceMgr object.
202480093f4SDimitry Andric   unsigned CurBuffer = 0;
2030b57cec5SDimitry Andric 
2040b57cec5SDimitry Andric public:
205480093f4SDimitry Andric   typedef std::set<std::string> DependenciesSetTy;
206480093f4SDimitry Andric 
2070b57cec5SDimitry Andric private:
2080b57cec5SDimitry Andric   /// Dependencies - This is the list of all included files.
209480093f4SDimitry Andric   DependenciesSetTy Dependencies;
2100b57cec5SDimitry Andric 
2110b57cec5SDimitry Andric public:
2120b57cec5SDimitry Andric   TGLexer(SourceMgr &SrcMgr, ArrayRef<std::string> Macros);
2130b57cec5SDimitry Andric 
Lex()2140b57cec5SDimitry Andric   tgtok::TokKind Lex() {
2150b57cec5SDimitry Andric     return CurCode = LexToken(CurPtr == CurBuf.begin());
2160b57cec5SDimitry Andric   }
2170b57cec5SDimitry Andric 
getDependencies()218480093f4SDimitry Andric   const DependenciesSetTy &getDependencies() const {
2190b57cec5SDimitry Andric     return Dependencies;
2200b57cec5SDimitry Andric   }
2210b57cec5SDimitry Andric 
getCode()2220b57cec5SDimitry Andric   tgtok::TokKind getCode() const { return CurCode; }
2230b57cec5SDimitry Andric 
getCurStrVal()2240b57cec5SDimitry Andric   const std::string &getCurStrVal() const {
2255f757f3fSDimitry Andric     assert(tgtok::isStringValue(CurCode) &&
2260b57cec5SDimitry Andric            "This token doesn't have a string value");
2270b57cec5SDimitry Andric     return CurStrVal;
2280b57cec5SDimitry Andric   }
getCurIntVal()2290b57cec5SDimitry Andric   int64_t getCurIntVal() const {
2300b57cec5SDimitry Andric     assert(CurCode == tgtok::IntVal && "This token isn't an integer");
2310b57cec5SDimitry Andric     return CurIntVal;
2320b57cec5SDimitry Andric   }
getCurBinaryIntVal()2330b57cec5SDimitry Andric   std::pair<int64_t, unsigned> getCurBinaryIntVal() const {
2340b57cec5SDimitry Andric     assert(CurCode == tgtok::BinaryIntVal &&
2350b57cec5SDimitry Andric            "This token isn't a binary integer");
2360b57cec5SDimitry Andric     return std::make_pair(CurIntVal, (CurPtr - TokStart)-2);
2370b57cec5SDimitry Andric   }
2380b57cec5SDimitry Andric 
2390b57cec5SDimitry Andric   SMLoc getLoc() const;
240bdd1243dSDimitry Andric   SMRange getLocRange() const;
2410b57cec5SDimitry Andric 
2420b57cec5SDimitry Andric private:
2430b57cec5SDimitry Andric   /// LexToken - Read the next token and return its code.
2440b57cec5SDimitry Andric   tgtok::TokKind LexToken(bool FileOrLineStart = false);
2450b57cec5SDimitry Andric 
2460b57cec5SDimitry Andric   tgtok::TokKind ReturnError(SMLoc Loc, const Twine &Msg);
2470b57cec5SDimitry Andric   tgtok::TokKind ReturnError(const char *Loc, const Twine &Msg);
2480b57cec5SDimitry Andric 
2490b57cec5SDimitry Andric   int getNextChar();
2500b57cec5SDimitry Andric   int peekNextChar(int Index) const;
2510b57cec5SDimitry Andric   void SkipBCPLComment();
2520b57cec5SDimitry Andric   bool SkipCComment();
2530b57cec5SDimitry Andric   tgtok::TokKind LexIdentifier();
2540b57cec5SDimitry Andric   bool LexInclude();
2550b57cec5SDimitry Andric   tgtok::TokKind LexString();
2560b57cec5SDimitry Andric   tgtok::TokKind LexVarName();
2570b57cec5SDimitry Andric   tgtok::TokKind LexNumber();
2580b57cec5SDimitry Andric   tgtok::TokKind LexBracket();
2590b57cec5SDimitry Andric   tgtok::TokKind LexExclaim();
2600b57cec5SDimitry Andric 
2610b57cec5SDimitry Andric   // Process EOF encountered in LexToken().
2620b57cec5SDimitry Andric   // If EOF is met in an include file, then the method will update
2630b57cec5SDimitry Andric   // CurPtr, CurBuf and preprocessing include stack, and return true.
2640b57cec5SDimitry Andric   // If EOF is met in the top-level file, then the method will
2650b57cec5SDimitry Andric   // update and check the preprocessing include stack, and return false.
2660b57cec5SDimitry Andric   bool processEOF();
2670b57cec5SDimitry Andric 
2680b57cec5SDimitry Andric   // *** Structures and methods for preprocessing support ***
2690b57cec5SDimitry Andric 
2700b57cec5SDimitry Andric   // A set of macro names that are defined either via command line or
2710b57cec5SDimitry Andric   // by using:
2720b57cec5SDimitry Andric   //     #define NAME
2730b57cec5SDimitry Andric   StringSet<> DefinedMacros;
2740b57cec5SDimitry Andric 
2750b57cec5SDimitry Andric   // Each of #ifdef and #else directives has a descriptor associated
2760b57cec5SDimitry Andric   // with it.
2770b57cec5SDimitry Andric   //
2780b57cec5SDimitry Andric   // An ordered list of preprocessing controls defined by #ifdef/#else
2790b57cec5SDimitry Andric   // directives that are in effect currently is called preprocessing
2800b57cec5SDimitry Andric   // control stack.  It is represented as a vector of PreprocessorControlDesc's.
2810b57cec5SDimitry Andric   //
2820b57cec5SDimitry Andric   // The control stack is updated according to the following rules:
2830b57cec5SDimitry Andric   //
2840b57cec5SDimitry Andric   // For each #ifdef we add an element to the control stack.
2850b57cec5SDimitry Andric   // For each #else we replace the top element with a descriptor
2860b57cec5SDimitry Andric   // with an inverted IsDefined value.
2870b57cec5SDimitry Andric   // For each #endif we pop the top element from the control stack.
2880b57cec5SDimitry Andric   //
2890b57cec5SDimitry Andric   // When CurPtr reaches the current buffer's end, the control stack
2900b57cec5SDimitry Andric   // must be empty, i.e. #ifdef and the corresponding #endif
2910b57cec5SDimitry Andric   // must be located in the same file.
2920b57cec5SDimitry Andric   struct PreprocessorControlDesc {
2930b57cec5SDimitry Andric     // Either tgtok::Ifdef or tgtok::Else.
2940b57cec5SDimitry Andric     tgtok::TokKind Kind;
2950b57cec5SDimitry Andric 
2960b57cec5SDimitry Andric     // True, if the condition for this directive is true, false - otherwise.
2970b57cec5SDimitry Andric     // Examples:
2980b57cec5SDimitry Andric     //     #ifdef NAME       : true, if NAME is defined, false - otherwise.
2990b57cec5SDimitry Andric     //     ...
3000b57cec5SDimitry Andric     //     #else             : false, if NAME is defined, true - otherwise.
3010b57cec5SDimitry Andric     bool IsDefined;
3020b57cec5SDimitry Andric 
3030b57cec5SDimitry Andric     // Pointer into CurBuf to the beginning of the preprocessing directive
3040b57cec5SDimitry Andric     // word, e.g.:
3050b57cec5SDimitry Andric     //     #ifdef NAME
3060b57cec5SDimitry Andric     //      ^ - SrcPos
3070b57cec5SDimitry Andric     SMLoc SrcPos;
3080b57cec5SDimitry Andric   };
3090b57cec5SDimitry Andric 
3100b57cec5SDimitry Andric   // We want to disallow code like this:
3110b57cec5SDimitry Andric   //     file1.td:
3120b57cec5SDimitry Andric   //         #define NAME
3130b57cec5SDimitry Andric   //         #ifdef NAME
3140b57cec5SDimitry Andric   //         include "file2.td"
3150b57cec5SDimitry Andric   //     EOF
3160b57cec5SDimitry Andric   //     file2.td:
3170b57cec5SDimitry Andric   //         #endif
3180b57cec5SDimitry Andric   //     EOF
3190b57cec5SDimitry Andric   //
3200b57cec5SDimitry Andric   // To do this, we clear the preprocessing control stack on entry
3210b57cec5SDimitry Andric   // to each of the included file.  PrepIncludeStack is used to store
3220b57cec5SDimitry Andric   // preprocessing control stacks for the current file and all its
3230b57cec5SDimitry Andric   // parent files.  The back() element is the preprocessing control
3240b57cec5SDimitry Andric   // stack for the current file.
3250b57cec5SDimitry Andric   std::vector<std::unique_ptr<std::vector<PreprocessorControlDesc>>>
3260b57cec5SDimitry Andric       PrepIncludeStack;
3270b57cec5SDimitry Andric 
3280b57cec5SDimitry Andric   // Validate that the current preprocessing control stack is empty,
3290b57cec5SDimitry Andric   // since we are about to exit a file, and pop the include stack.
3300b57cec5SDimitry Andric   //
3310b57cec5SDimitry Andric   // If IncludeStackMustBeEmpty is true, the include stack must be empty
3320b57cec5SDimitry Andric   // after the popping, otherwise, the include stack must not be empty
3330b57cec5SDimitry Andric   // after the popping.  Basically, the include stack must be empty
3340b57cec5SDimitry Andric   // only if we exit the "top-level" file (i.e. finish lexing).
3350b57cec5SDimitry Andric   //
3360b57cec5SDimitry Andric   // The method returns false, if the current preprocessing control stack
3370b57cec5SDimitry Andric   // is not empty (e.g. there is an unterminated #ifdef/#else),
3380b57cec5SDimitry Andric   // true - otherwise.
3390b57cec5SDimitry Andric   bool prepExitInclude(bool IncludeStackMustBeEmpty);
3400b57cec5SDimitry Andric 
3410b57cec5SDimitry Andric   // Look ahead for a preprocessing directive starting from CurPtr.  The caller
3420b57cec5SDimitry Andric   // must only call this method, if *(CurPtr - 1) is '#'.  If the method matches
3430b57cec5SDimitry Andric   // a preprocessing directive word followed by a whitespace, then it returns
3440b57cec5SDimitry Andric   // one of the internal token kinds, i.e. Ifdef, Else, Endif, Define.
3450b57cec5SDimitry Andric   //
3460b57cec5SDimitry Andric   // CurPtr is not adjusted by this method.
3470b57cec5SDimitry Andric   tgtok::TokKind prepIsDirective() const;
3480b57cec5SDimitry Andric 
3490b57cec5SDimitry Andric   // Given a preprocessing token kind, adjusts CurPtr to the end
3500b57cec5SDimitry Andric   // of the preprocessing directive word.  Returns true, unless
3510b57cec5SDimitry Andric   // an unsupported token kind is passed in.
3520b57cec5SDimitry Andric   //
3530b57cec5SDimitry Andric   // We use look-ahead prepIsDirective() and prepEatPreprocessorDirective()
3540b57cec5SDimitry Andric   // to avoid adjusting CurPtr before we are sure that '#' is followed
3550b57cec5SDimitry Andric   // by a preprocessing directive.  If it is not, then we fall back to
3560b57cec5SDimitry Andric   // tgtok::paste interpretation of '#'.
3570b57cec5SDimitry Andric   bool prepEatPreprocessorDirective(tgtok::TokKind Kind);
3580b57cec5SDimitry Andric 
3590b57cec5SDimitry Andric   // The main "exit" point from the token parsing to preprocessor.
3600b57cec5SDimitry Andric   //
3610b57cec5SDimitry Andric   // The method is called for CurPtr, when prepIsDirective() returns
3620b57cec5SDimitry Andric   // true.  The first parameter matches the result of prepIsDirective(),
3630b57cec5SDimitry Andric   // denoting the actual preprocessor directive to be processed.
3640b57cec5SDimitry Andric   //
3650b57cec5SDimitry Andric   // If the preprocessing directive disables the tokens processing, e.g.:
3660b57cec5SDimitry Andric   //     #ifdef NAME // NAME is undefined
3670b57cec5SDimitry Andric   // then lexPreprocessor() enters the lines-skipping mode.
3680b57cec5SDimitry Andric   // In this mode, it does not parse any tokens, because the code under
3690b57cec5SDimitry Andric   // the #ifdef may not even be a correct tablegen code.  The preprocessor
3700b57cec5SDimitry Andric   // looks for lines containing other preprocessing directives, which
3710b57cec5SDimitry Andric   // may be prepended with whitespaces and C-style comments.  If the line
3720b57cec5SDimitry Andric   // does not contain a preprocessing directive, it is skipped completely.
3730b57cec5SDimitry Andric   // Otherwise, the preprocessing directive is processed by recursively
3740b57cec5SDimitry Andric   // calling lexPreprocessor().  The processing of the encountered
3750b57cec5SDimitry Andric   // preprocessing directives includes updating preprocessing control stack
3760b57cec5SDimitry Andric   // and adding new macros into DefinedMacros set.
3770b57cec5SDimitry Andric   //
3780b57cec5SDimitry Andric   // The second parameter controls whether lexPreprocessor() is called from
3790b57cec5SDimitry Andric   // LexToken() (true) or recursively from lexPreprocessor() (false).
3800b57cec5SDimitry Andric   //
3810b57cec5SDimitry Andric   // If ReturnNextLiveToken is true, the method returns the next
3820b57cec5SDimitry Andric   // LEX token following the current directive or following the end
3830b57cec5SDimitry Andric   // of the disabled preprocessing region corresponding to this directive.
3840b57cec5SDimitry Andric   // If ReturnNextLiveToken is false, the method returns the first parameter,
3850b57cec5SDimitry Andric   // unless there were errors encountered in the disabled preprocessing
3860b57cec5SDimitry Andric   // region - in this case, it returns tgtok::Error.
3870b57cec5SDimitry Andric   tgtok::TokKind lexPreprocessor(tgtok::TokKind Kind,
3880b57cec5SDimitry Andric                                  bool ReturnNextLiveToken = true);
3890b57cec5SDimitry Andric 
3900b57cec5SDimitry Andric   // Worker method for lexPreprocessor() to skip lines after some
3910b57cec5SDimitry Andric   // preprocessing directive up to the buffer end or to the directive
3920b57cec5SDimitry Andric   // that re-enables token processing.  The method returns true
3930b57cec5SDimitry Andric   // upon processing the next directive that re-enables tokens
3940b57cec5SDimitry Andric   // processing.  False is returned if an error was encountered.
3950b57cec5SDimitry Andric   //
3960b57cec5SDimitry Andric   // Note that prepSkipRegion() calls lexPreprocessor() to process
3970b57cec5SDimitry Andric   // encountered preprocessing directives.  In this case, the second
3980b57cec5SDimitry Andric   // parameter to lexPreprocessor() is set to false.  Being passed
3990b57cec5SDimitry Andric   // false ReturnNextLiveToken, lexPreprocessor() must never call
4000b57cec5SDimitry Andric   // prepSkipRegion().  We assert this by passing ReturnNextLiveToken
4010b57cec5SDimitry Andric   // to prepSkipRegion() and checking that it is never set to false.
4020b57cec5SDimitry Andric   bool prepSkipRegion(bool MustNeverBeFalse);
4030b57cec5SDimitry Andric 
4040b57cec5SDimitry Andric   // Lex name of the macro after either #ifdef or #define.  We could have used
4050b57cec5SDimitry Andric   // LexIdentifier(), but it has special handling of "include" word, which
4060b57cec5SDimitry Andric   // could result in awkward diagnostic errors.  Consider:
4070b57cec5SDimitry Andric   // ----
4080b57cec5SDimitry Andric   // #ifdef include
4090b57cec5SDimitry Andric   // class ...
4100b57cec5SDimitry Andric   // ----
4110b57cec5SDimitry Andric   // LexIdentifier() will engage LexInclude(), which will complain about
4120b57cec5SDimitry Andric   // missing file with name "class".  Instead, prepLexMacroName() will treat
4130b57cec5SDimitry Andric   // "include" as a normal macro name.
4140b57cec5SDimitry Andric   //
4150b57cec5SDimitry Andric   // On entry, CurPtr points to the end of a preprocessing directive word.
4160b57cec5SDimitry Andric   // The method allows for whitespaces between the preprocessing directive
4170b57cec5SDimitry Andric   // and the macro name.  The allowed whitespaces are ' ' and '\t'.
4180b57cec5SDimitry Andric   //
4190b57cec5SDimitry Andric   // If the first non-whitespace symbol after the preprocessing directive
4200b57cec5SDimitry Andric   // is a valid start symbol for an identifier (i.e. [a-zA-Z_]), then
4210b57cec5SDimitry Andric   // the method updates TokStart to the position of the first non-whitespace
4220b57cec5SDimitry Andric   // symbol, sets CurPtr to the position of the macro name's last symbol,
4230b57cec5SDimitry Andric   // and returns a string reference to the macro name.  Otherwise,
4240b57cec5SDimitry Andric   // TokStart is set to the first non-whitespace symbol after the preprocessing
4250b57cec5SDimitry Andric   // directive, and the method returns an empty string reference.
4260b57cec5SDimitry Andric   //
4270b57cec5SDimitry Andric   // In all cases, TokStart may be used to point to the word following
4280b57cec5SDimitry Andric   // the preprocessing directive.
4290b57cec5SDimitry Andric   StringRef prepLexMacroName();
4300b57cec5SDimitry Andric 
4310b57cec5SDimitry Andric   // Skip any whitespaces starting from CurPtr.  The method is used
4320b57cec5SDimitry Andric   // only in the lines-skipping mode to find the first non-whitespace
4330b57cec5SDimitry Andric   // symbol after or at CurPtr.  Allowed whitespaces are ' ', '\t', '\n'
4340b57cec5SDimitry Andric   // and '\r'.  The method skips C-style comments as well, because
4350b57cec5SDimitry Andric   // it is used to find the beginning of the preprocessing directive.
4360b57cec5SDimitry Andric   // If we do not handle C-style comments the following code would
4370b57cec5SDimitry Andric   // result in incorrect detection of a preprocessing directive:
4380b57cec5SDimitry Andric   //     /*
4390b57cec5SDimitry Andric   //     #ifdef NAME
4400b57cec5SDimitry Andric   //     */
4410b57cec5SDimitry Andric   // As long as we skip C-style comments, the following code is correctly
4420b57cec5SDimitry Andric   // recognized as a preprocessing directive:
4430b57cec5SDimitry Andric   //     /* first line comment
4440b57cec5SDimitry Andric   //        second line comment */ #ifdef NAME
4450b57cec5SDimitry Andric   //
4460b57cec5SDimitry Andric   // The method returns true upon reaching the first non-whitespace symbol
4470b57cec5SDimitry Andric   // or EOF, CurPtr is set to point to this symbol.  The method returns false,
44881ad6265SDimitry Andric   // if an error occurred during skipping of a C-style comment.
4490b57cec5SDimitry Andric   bool prepSkipLineBegin();
4500b57cec5SDimitry Andric 
4510b57cec5SDimitry Andric   // Skip any whitespaces or comments after a preprocessing directive.
4520b57cec5SDimitry Andric   // The method returns true upon reaching either end of the line
4530b57cec5SDimitry Andric   // or end of the file.  If there is a multiline C-style comment
4540b57cec5SDimitry Andric   // after the preprocessing directive, the method skips
4550b57cec5SDimitry Andric   // the comment, so the final CurPtr may point to one of the next lines.
45681ad6265SDimitry Andric   // The method returns false, if an error occurred during skipping
4570b57cec5SDimitry Andric   // C- or C++-style comment, or a non-whitespace symbol appears
4580b57cec5SDimitry Andric   // after the preprocessing directive.
4590b57cec5SDimitry Andric   //
4600b57cec5SDimitry Andric   // The method maybe called both during lines-skipping and tokens
4610b57cec5SDimitry Andric   // processing.  It actually verifies that only whitespaces or/and
4620b57cec5SDimitry Andric   // comments follow a preprocessing directive.
4630b57cec5SDimitry Andric   //
4640b57cec5SDimitry Andric   // After the execution of this mehod, CurPtr points either to new line
4650b57cec5SDimitry Andric   // symbol, buffer end or non-whitespace symbol following the preprocesing
4660b57cec5SDimitry Andric   // directive.
4670b57cec5SDimitry Andric   bool prepSkipDirectiveEnd();
4680b57cec5SDimitry Andric 
4690b57cec5SDimitry Andric   // Skip all symbols to the end of the line/file.
4700b57cec5SDimitry Andric   // The method adjusts CurPtr, so that it points to either new line
4710b57cec5SDimitry Andric   // symbol in the current line or the buffer end.
4720b57cec5SDimitry Andric   void prepSkipToLineEnd();
4730b57cec5SDimitry Andric 
4740b57cec5SDimitry Andric   // Return true, if the current preprocessor control stack is such that
4750b57cec5SDimitry Andric   // we should allow lexer to process the next token, false - otherwise.
4760b57cec5SDimitry Andric   //
4770b57cec5SDimitry Andric   // In particular, the method returns true, if all the #ifdef/#else
4780b57cec5SDimitry Andric   // controls on the stack have their IsDefined member set to true.
4790b57cec5SDimitry Andric   bool prepIsProcessingEnabled();
4800b57cec5SDimitry Andric 
4810b57cec5SDimitry Andric   // Report an error, if we reach EOF with non-empty preprocessing control
4820b57cec5SDimitry Andric   // stack.  This means there is no matching #endif for the previous
4830b57cec5SDimitry Andric   // #ifdef/#else.
4840b57cec5SDimitry Andric   void prepReportPreprocessorStackError();
4850b57cec5SDimitry Andric };
4860b57cec5SDimitry Andric 
4870b57cec5SDimitry Andric } // end namespace llvm
4880b57cec5SDimitry Andric 
4890b57cec5SDimitry Andric #endif
490