10b57cec5SDimitry Andric //===- TGLexer.h - Lexer for TableGen Files ---------------------*- C++ -*-===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This class represents the Lexer for tablegen files.
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric
130b57cec5SDimitry Andric #ifndef LLVM_LIB_TABLEGEN_TGLEXER_H
140b57cec5SDimitry Andric #define LLVM_LIB_TABLEGEN_TGLEXER_H
150b57cec5SDimitry Andric
160b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h"
170b57cec5SDimitry Andric #include "llvm/ADT/StringSet.h"
180b57cec5SDimitry Andric #include "llvm/Support/DataTypes.h"
190b57cec5SDimitry Andric #include "llvm/Support/SMLoc.h"
200b57cec5SDimitry Andric #include <cassert>
210b57cec5SDimitry Andric #include <memory>
22480093f4SDimitry Andric #include <set>
230b57cec5SDimitry Andric #include <string>
245ffd83dbSDimitry Andric #include <vector>
250b57cec5SDimitry Andric
260b57cec5SDimitry Andric namespace llvm {
275ffd83dbSDimitry Andric template <typename T> class ArrayRef;
280b57cec5SDimitry Andric class SourceMgr;
290b57cec5SDimitry Andric class Twine;
300b57cec5SDimitry Andric
310b57cec5SDimitry Andric namespace tgtok {
320b57cec5SDimitry Andric enum TokKind {
330b57cec5SDimitry Andric // Markers
3406c3fb27SDimitry Andric Eof,
3506c3fb27SDimitry Andric Error,
360b57cec5SDimitry Andric
370b57cec5SDimitry Andric // Tokens with no info.
3806c3fb27SDimitry Andric minus, // -
3906c3fb27SDimitry Andric plus, // +
4006c3fb27SDimitry Andric l_square, // [
4106c3fb27SDimitry Andric r_square, // ]
4206c3fb27SDimitry Andric l_brace, // {
4306c3fb27SDimitry Andric r_brace, // }
4406c3fb27SDimitry Andric l_paren, // (
4506c3fb27SDimitry Andric r_paren, // )
4606c3fb27SDimitry Andric less, // <
4706c3fb27SDimitry Andric greater, // >
4806c3fb27SDimitry Andric colon, // :
4906c3fb27SDimitry Andric semi, // ;
5006c3fb27SDimitry Andric comma, // ,
5106c3fb27SDimitry Andric dot, // .
5206c3fb27SDimitry Andric equal, // =
5306c3fb27SDimitry Andric question, // ?
540b57cec5SDimitry Andric paste, // #
55e8d8bef9SDimitry Andric dotdotdot, // ...
560b57cec5SDimitry Andric
575f757f3fSDimitry Andric // Boolean literals.
585f757f3fSDimitry Andric TrueVal,
595f757f3fSDimitry Andric FalseVal,
605f757f3fSDimitry Andric
615f757f3fSDimitry Andric // Integer value.
625f757f3fSDimitry Andric IntVal,
635f757f3fSDimitry Andric
645f757f3fSDimitry Andric // Binary constant. Note that these are sized according to the number of
655f757f3fSDimitry Andric // bits given.
665f757f3fSDimitry Andric BinaryIntVal,
675f757f3fSDimitry Andric
685f757f3fSDimitry Andric // Preprocessing tokens for internal usage by the lexer.
695f757f3fSDimitry Andric // They are never returned as a result of Lex().
705f757f3fSDimitry Andric Ifdef,
715f757f3fSDimitry Andric Ifndef,
725f757f3fSDimitry Andric Else,
735f757f3fSDimitry Andric Endif,
745f757f3fSDimitry Andric Define,
755f757f3fSDimitry Andric
76e8d8bef9SDimitry Andric // Reserved keywords. ('ElseKW' is named to distinguish it from the
77e8d8bef9SDimitry Andric // existing 'Else' that means the preprocessor #else.)
7806c3fb27SDimitry Andric Bit,
7906c3fb27SDimitry Andric Bits,
8006c3fb27SDimitry Andric Code,
8106c3fb27SDimitry Andric Dag,
8206c3fb27SDimitry Andric ElseKW,
8306c3fb27SDimitry Andric FalseKW,
8406c3fb27SDimitry Andric Field,
8506c3fb27SDimitry Andric In,
8606c3fb27SDimitry Andric Include,
8706c3fb27SDimitry Andric Int,
8806c3fb27SDimitry Andric List,
8906c3fb27SDimitry Andric String,
9006c3fb27SDimitry Andric Then,
9106c3fb27SDimitry Andric TrueKW,
920b57cec5SDimitry Andric
935f757f3fSDimitry Andric // Object start tokens.
945f757f3fSDimitry Andric OBJECT_START_FIRST,
955f757f3fSDimitry Andric Assert = OBJECT_START_FIRST,
965f757f3fSDimitry Andric Class,
975f757f3fSDimitry Andric Def,
985f757f3fSDimitry Andric Defm,
995f757f3fSDimitry Andric Defset,
1005f757f3fSDimitry Andric Defvar,
1015f757f3fSDimitry Andric Dump,
1025f757f3fSDimitry Andric Foreach,
1035f757f3fSDimitry Andric If,
1045f757f3fSDimitry Andric Let,
1055f757f3fSDimitry Andric MultiClass,
1065f757f3fSDimitry Andric OBJECT_START_LAST = MultiClass,
1075f757f3fSDimitry Andric
108e8d8bef9SDimitry Andric // Bang operators.
1095f757f3fSDimitry Andric BANG_OPERATOR_FIRST,
1105f757f3fSDimitry Andric XConcat = BANG_OPERATOR_FIRST,
11106c3fb27SDimitry Andric XADD,
11206c3fb27SDimitry Andric XSUB,
11306c3fb27SDimitry Andric XMUL,
11406c3fb27SDimitry Andric XDIV,
11506c3fb27SDimitry Andric XNOT,
11606c3fb27SDimitry Andric XLOG2,
11706c3fb27SDimitry Andric XAND,
11806c3fb27SDimitry Andric XOR,
11906c3fb27SDimitry Andric XXOR,
12006c3fb27SDimitry Andric XSRA,
12106c3fb27SDimitry Andric XSRL,
12206c3fb27SDimitry Andric XSHL,
12306c3fb27SDimitry Andric XListConcat,
12406c3fb27SDimitry Andric XListSplat,
12506c3fb27SDimitry Andric XStrConcat,
12606c3fb27SDimitry Andric XInterleave,
12706c3fb27SDimitry Andric XSubstr,
12806c3fb27SDimitry Andric XFind,
12906c3fb27SDimitry Andric XCast,
13006c3fb27SDimitry Andric XSubst,
13106c3fb27SDimitry Andric XForEach,
13206c3fb27SDimitry Andric XFilter,
13306c3fb27SDimitry Andric XFoldl,
13406c3fb27SDimitry Andric XHead,
13506c3fb27SDimitry Andric XTail,
13606c3fb27SDimitry Andric XSize,
13706c3fb27SDimitry Andric XEmpty,
13806c3fb27SDimitry Andric XIf,
13906c3fb27SDimitry Andric XCond,
14006c3fb27SDimitry Andric XEq,
14106c3fb27SDimitry Andric XIsA,
14206c3fb27SDimitry Andric XDag,
14306c3fb27SDimitry Andric XNe,
14406c3fb27SDimitry Andric XLe,
14506c3fb27SDimitry Andric XLt,
14606c3fb27SDimitry Andric XGe,
14706c3fb27SDimitry Andric XGt,
14806c3fb27SDimitry Andric XSetDagOp,
14906c3fb27SDimitry Andric XGetDagOp,
15006c3fb27SDimitry Andric XExists,
15106c3fb27SDimitry Andric XListRemove,
15206c3fb27SDimitry Andric XToLower,
15306c3fb27SDimitry Andric XToUpper,
15406c3fb27SDimitry Andric XRange,
15506c3fb27SDimitry Andric XGetDagArg,
15606c3fb27SDimitry Andric XGetDagName,
15706c3fb27SDimitry Andric XSetDagArg,
15806c3fb27SDimitry Andric XSetDagName,
1595f757f3fSDimitry Andric XRepr,
1605f757f3fSDimitry Andric BANG_OPERATOR_LAST = XRepr,
1610b57cec5SDimitry Andric
1620b57cec5SDimitry Andric // String valued tokens.
1635f757f3fSDimitry Andric STRING_VALUE_FIRST,
1645f757f3fSDimitry Andric Id = STRING_VALUE_FIRST,
16506c3fb27SDimitry Andric StrVal,
16606c3fb27SDimitry Andric VarName,
16706c3fb27SDimitry Andric CodeFragment,
1685f757f3fSDimitry Andric STRING_VALUE_LAST = CodeFragment,
1690b57cec5SDimitry Andric };
1705f757f3fSDimitry Andric
1715f757f3fSDimitry Andric /// isBangOperator - Return true if this is a bang operator.
isBangOperator(tgtok::TokKind Kind)1725f757f3fSDimitry Andric static inline bool isBangOperator(tgtok::TokKind Kind) {
1735f757f3fSDimitry Andric return tgtok::BANG_OPERATOR_FIRST <= Kind && Kind <= BANG_OPERATOR_LAST;
1740b57cec5SDimitry Andric }
1750b57cec5SDimitry Andric
1765f757f3fSDimitry Andric /// isObjectStart - Return true if this is a valid first token for a statement.
isObjectStart(tgtok::TokKind Kind)1775f757f3fSDimitry Andric static inline bool isObjectStart(tgtok::TokKind Kind) {
1785f757f3fSDimitry Andric return tgtok::OBJECT_START_FIRST <= Kind && Kind <= OBJECT_START_LAST;
1795f757f3fSDimitry Andric }
1805f757f3fSDimitry Andric
1815f757f3fSDimitry Andric /// isStringValue - Return true if this is a string value.
isStringValue(tgtok::TokKind Kind)1825f757f3fSDimitry Andric static inline bool isStringValue(tgtok::TokKind Kind) {
1835f757f3fSDimitry Andric return tgtok::STRING_VALUE_FIRST <= Kind && Kind <= STRING_VALUE_LAST;
1845f757f3fSDimitry Andric }
1855f757f3fSDimitry Andric } // namespace tgtok
1865f757f3fSDimitry Andric
1870b57cec5SDimitry Andric /// TGLexer - TableGen Lexer class.
1880b57cec5SDimitry Andric class TGLexer {
1890b57cec5SDimitry Andric SourceMgr &SrcMgr;
1900b57cec5SDimitry Andric
191480093f4SDimitry Andric const char *CurPtr = nullptr;
1920b57cec5SDimitry Andric StringRef CurBuf;
1930b57cec5SDimitry Andric
1940b57cec5SDimitry Andric // Information about the current token.
195480093f4SDimitry Andric const char *TokStart = nullptr;
196480093f4SDimitry Andric tgtok::TokKind CurCode = tgtok::TokKind::Eof;
197e8d8bef9SDimitry Andric std::string CurStrVal; // This is valid for Id, StrVal, VarName, CodeFragment
198e8d8bef9SDimitry Andric int64_t CurIntVal = 0; // This is valid for IntVal.
1990b57cec5SDimitry Andric
2000b57cec5SDimitry Andric /// CurBuffer - This is the current buffer index we're lexing from as managed
2010b57cec5SDimitry Andric /// by the SourceMgr object.
202480093f4SDimitry Andric unsigned CurBuffer = 0;
2030b57cec5SDimitry Andric
2040b57cec5SDimitry Andric public:
205480093f4SDimitry Andric typedef std::set<std::string> DependenciesSetTy;
206480093f4SDimitry Andric
2070b57cec5SDimitry Andric private:
2080b57cec5SDimitry Andric /// Dependencies - This is the list of all included files.
209480093f4SDimitry Andric DependenciesSetTy Dependencies;
2100b57cec5SDimitry Andric
2110b57cec5SDimitry Andric public:
2120b57cec5SDimitry Andric TGLexer(SourceMgr &SrcMgr, ArrayRef<std::string> Macros);
2130b57cec5SDimitry Andric
Lex()2140b57cec5SDimitry Andric tgtok::TokKind Lex() {
2150b57cec5SDimitry Andric return CurCode = LexToken(CurPtr == CurBuf.begin());
2160b57cec5SDimitry Andric }
2170b57cec5SDimitry Andric
getDependencies()218480093f4SDimitry Andric const DependenciesSetTy &getDependencies() const {
2190b57cec5SDimitry Andric return Dependencies;
2200b57cec5SDimitry Andric }
2210b57cec5SDimitry Andric
getCode()2220b57cec5SDimitry Andric tgtok::TokKind getCode() const { return CurCode; }
2230b57cec5SDimitry Andric
getCurStrVal()2240b57cec5SDimitry Andric const std::string &getCurStrVal() const {
2255f757f3fSDimitry Andric assert(tgtok::isStringValue(CurCode) &&
2260b57cec5SDimitry Andric "This token doesn't have a string value");
2270b57cec5SDimitry Andric return CurStrVal;
2280b57cec5SDimitry Andric }
getCurIntVal()2290b57cec5SDimitry Andric int64_t getCurIntVal() const {
2300b57cec5SDimitry Andric assert(CurCode == tgtok::IntVal && "This token isn't an integer");
2310b57cec5SDimitry Andric return CurIntVal;
2320b57cec5SDimitry Andric }
getCurBinaryIntVal()2330b57cec5SDimitry Andric std::pair<int64_t, unsigned> getCurBinaryIntVal() const {
2340b57cec5SDimitry Andric assert(CurCode == tgtok::BinaryIntVal &&
2350b57cec5SDimitry Andric "This token isn't a binary integer");
2360b57cec5SDimitry Andric return std::make_pair(CurIntVal, (CurPtr - TokStart)-2);
2370b57cec5SDimitry Andric }
2380b57cec5SDimitry Andric
2390b57cec5SDimitry Andric SMLoc getLoc() const;
240bdd1243dSDimitry Andric SMRange getLocRange() const;
2410b57cec5SDimitry Andric
2420b57cec5SDimitry Andric private:
2430b57cec5SDimitry Andric /// LexToken - Read the next token and return its code.
2440b57cec5SDimitry Andric tgtok::TokKind LexToken(bool FileOrLineStart = false);
2450b57cec5SDimitry Andric
2460b57cec5SDimitry Andric tgtok::TokKind ReturnError(SMLoc Loc, const Twine &Msg);
2470b57cec5SDimitry Andric tgtok::TokKind ReturnError(const char *Loc, const Twine &Msg);
2480b57cec5SDimitry Andric
2490b57cec5SDimitry Andric int getNextChar();
2500b57cec5SDimitry Andric int peekNextChar(int Index) const;
2510b57cec5SDimitry Andric void SkipBCPLComment();
2520b57cec5SDimitry Andric bool SkipCComment();
2530b57cec5SDimitry Andric tgtok::TokKind LexIdentifier();
2540b57cec5SDimitry Andric bool LexInclude();
2550b57cec5SDimitry Andric tgtok::TokKind LexString();
2560b57cec5SDimitry Andric tgtok::TokKind LexVarName();
2570b57cec5SDimitry Andric tgtok::TokKind LexNumber();
2580b57cec5SDimitry Andric tgtok::TokKind LexBracket();
2590b57cec5SDimitry Andric tgtok::TokKind LexExclaim();
2600b57cec5SDimitry Andric
2610b57cec5SDimitry Andric // Process EOF encountered in LexToken().
2620b57cec5SDimitry Andric // If EOF is met in an include file, then the method will update
2630b57cec5SDimitry Andric // CurPtr, CurBuf and preprocessing include stack, and return true.
2640b57cec5SDimitry Andric // If EOF is met in the top-level file, then the method will
2650b57cec5SDimitry Andric // update and check the preprocessing include stack, and return false.
2660b57cec5SDimitry Andric bool processEOF();
2670b57cec5SDimitry Andric
2680b57cec5SDimitry Andric // *** Structures and methods for preprocessing support ***
2690b57cec5SDimitry Andric
2700b57cec5SDimitry Andric // A set of macro names that are defined either via command line or
2710b57cec5SDimitry Andric // by using:
2720b57cec5SDimitry Andric // #define NAME
2730b57cec5SDimitry Andric StringSet<> DefinedMacros;
2740b57cec5SDimitry Andric
2750b57cec5SDimitry Andric // Each of #ifdef and #else directives has a descriptor associated
2760b57cec5SDimitry Andric // with it.
2770b57cec5SDimitry Andric //
2780b57cec5SDimitry Andric // An ordered list of preprocessing controls defined by #ifdef/#else
2790b57cec5SDimitry Andric // directives that are in effect currently is called preprocessing
2800b57cec5SDimitry Andric // control stack. It is represented as a vector of PreprocessorControlDesc's.
2810b57cec5SDimitry Andric //
2820b57cec5SDimitry Andric // The control stack is updated according to the following rules:
2830b57cec5SDimitry Andric //
2840b57cec5SDimitry Andric // For each #ifdef we add an element to the control stack.
2850b57cec5SDimitry Andric // For each #else we replace the top element with a descriptor
2860b57cec5SDimitry Andric // with an inverted IsDefined value.
2870b57cec5SDimitry Andric // For each #endif we pop the top element from the control stack.
2880b57cec5SDimitry Andric //
2890b57cec5SDimitry Andric // When CurPtr reaches the current buffer's end, the control stack
2900b57cec5SDimitry Andric // must be empty, i.e. #ifdef and the corresponding #endif
2910b57cec5SDimitry Andric // must be located in the same file.
2920b57cec5SDimitry Andric struct PreprocessorControlDesc {
2930b57cec5SDimitry Andric // Either tgtok::Ifdef or tgtok::Else.
2940b57cec5SDimitry Andric tgtok::TokKind Kind;
2950b57cec5SDimitry Andric
2960b57cec5SDimitry Andric // True, if the condition for this directive is true, false - otherwise.
2970b57cec5SDimitry Andric // Examples:
2980b57cec5SDimitry Andric // #ifdef NAME : true, if NAME is defined, false - otherwise.
2990b57cec5SDimitry Andric // ...
3000b57cec5SDimitry Andric // #else : false, if NAME is defined, true - otherwise.
3010b57cec5SDimitry Andric bool IsDefined;
3020b57cec5SDimitry Andric
3030b57cec5SDimitry Andric // Pointer into CurBuf to the beginning of the preprocessing directive
3040b57cec5SDimitry Andric // word, e.g.:
3050b57cec5SDimitry Andric // #ifdef NAME
3060b57cec5SDimitry Andric // ^ - SrcPos
3070b57cec5SDimitry Andric SMLoc SrcPos;
3080b57cec5SDimitry Andric };
3090b57cec5SDimitry Andric
3100b57cec5SDimitry Andric // We want to disallow code like this:
3110b57cec5SDimitry Andric // file1.td:
3120b57cec5SDimitry Andric // #define NAME
3130b57cec5SDimitry Andric // #ifdef NAME
3140b57cec5SDimitry Andric // include "file2.td"
3150b57cec5SDimitry Andric // EOF
3160b57cec5SDimitry Andric // file2.td:
3170b57cec5SDimitry Andric // #endif
3180b57cec5SDimitry Andric // EOF
3190b57cec5SDimitry Andric //
3200b57cec5SDimitry Andric // To do this, we clear the preprocessing control stack on entry
3210b57cec5SDimitry Andric // to each of the included file. PrepIncludeStack is used to store
3220b57cec5SDimitry Andric // preprocessing control stacks for the current file and all its
3230b57cec5SDimitry Andric // parent files. The back() element is the preprocessing control
3240b57cec5SDimitry Andric // stack for the current file.
3250b57cec5SDimitry Andric std::vector<std::unique_ptr<std::vector<PreprocessorControlDesc>>>
3260b57cec5SDimitry Andric PrepIncludeStack;
3270b57cec5SDimitry Andric
3280b57cec5SDimitry Andric // Validate that the current preprocessing control stack is empty,
3290b57cec5SDimitry Andric // since we are about to exit a file, and pop the include stack.
3300b57cec5SDimitry Andric //
3310b57cec5SDimitry Andric // If IncludeStackMustBeEmpty is true, the include stack must be empty
3320b57cec5SDimitry Andric // after the popping, otherwise, the include stack must not be empty
3330b57cec5SDimitry Andric // after the popping. Basically, the include stack must be empty
3340b57cec5SDimitry Andric // only if we exit the "top-level" file (i.e. finish lexing).
3350b57cec5SDimitry Andric //
3360b57cec5SDimitry Andric // The method returns false, if the current preprocessing control stack
3370b57cec5SDimitry Andric // is not empty (e.g. there is an unterminated #ifdef/#else),
3380b57cec5SDimitry Andric // true - otherwise.
3390b57cec5SDimitry Andric bool prepExitInclude(bool IncludeStackMustBeEmpty);
3400b57cec5SDimitry Andric
3410b57cec5SDimitry Andric // Look ahead for a preprocessing directive starting from CurPtr. The caller
3420b57cec5SDimitry Andric // must only call this method, if *(CurPtr - 1) is '#'. If the method matches
3430b57cec5SDimitry Andric // a preprocessing directive word followed by a whitespace, then it returns
3440b57cec5SDimitry Andric // one of the internal token kinds, i.e. Ifdef, Else, Endif, Define.
3450b57cec5SDimitry Andric //
3460b57cec5SDimitry Andric // CurPtr is not adjusted by this method.
3470b57cec5SDimitry Andric tgtok::TokKind prepIsDirective() const;
3480b57cec5SDimitry Andric
3490b57cec5SDimitry Andric // Given a preprocessing token kind, adjusts CurPtr to the end
3500b57cec5SDimitry Andric // of the preprocessing directive word. Returns true, unless
3510b57cec5SDimitry Andric // an unsupported token kind is passed in.
3520b57cec5SDimitry Andric //
3530b57cec5SDimitry Andric // We use look-ahead prepIsDirective() and prepEatPreprocessorDirective()
3540b57cec5SDimitry Andric // to avoid adjusting CurPtr before we are sure that '#' is followed
3550b57cec5SDimitry Andric // by a preprocessing directive. If it is not, then we fall back to
3560b57cec5SDimitry Andric // tgtok::paste interpretation of '#'.
3570b57cec5SDimitry Andric bool prepEatPreprocessorDirective(tgtok::TokKind Kind);
3580b57cec5SDimitry Andric
3590b57cec5SDimitry Andric // The main "exit" point from the token parsing to preprocessor.
3600b57cec5SDimitry Andric //
3610b57cec5SDimitry Andric // The method is called for CurPtr, when prepIsDirective() returns
3620b57cec5SDimitry Andric // true. The first parameter matches the result of prepIsDirective(),
3630b57cec5SDimitry Andric // denoting the actual preprocessor directive to be processed.
3640b57cec5SDimitry Andric //
3650b57cec5SDimitry Andric // If the preprocessing directive disables the tokens processing, e.g.:
3660b57cec5SDimitry Andric // #ifdef NAME // NAME is undefined
3670b57cec5SDimitry Andric // then lexPreprocessor() enters the lines-skipping mode.
3680b57cec5SDimitry Andric // In this mode, it does not parse any tokens, because the code under
3690b57cec5SDimitry Andric // the #ifdef may not even be a correct tablegen code. The preprocessor
3700b57cec5SDimitry Andric // looks for lines containing other preprocessing directives, which
3710b57cec5SDimitry Andric // may be prepended with whitespaces and C-style comments. If the line
3720b57cec5SDimitry Andric // does not contain a preprocessing directive, it is skipped completely.
3730b57cec5SDimitry Andric // Otherwise, the preprocessing directive is processed by recursively
3740b57cec5SDimitry Andric // calling lexPreprocessor(). The processing of the encountered
3750b57cec5SDimitry Andric // preprocessing directives includes updating preprocessing control stack
3760b57cec5SDimitry Andric // and adding new macros into DefinedMacros set.
3770b57cec5SDimitry Andric //
3780b57cec5SDimitry Andric // The second parameter controls whether lexPreprocessor() is called from
3790b57cec5SDimitry Andric // LexToken() (true) or recursively from lexPreprocessor() (false).
3800b57cec5SDimitry Andric //
3810b57cec5SDimitry Andric // If ReturnNextLiveToken is true, the method returns the next
3820b57cec5SDimitry Andric // LEX token following the current directive or following the end
3830b57cec5SDimitry Andric // of the disabled preprocessing region corresponding to this directive.
3840b57cec5SDimitry Andric // If ReturnNextLiveToken is false, the method returns the first parameter,
3850b57cec5SDimitry Andric // unless there were errors encountered in the disabled preprocessing
3860b57cec5SDimitry Andric // region - in this case, it returns tgtok::Error.
3870b57cec5SDimitry Andric tgtok::TokKind lexPreprocessor(tgtok::TokKind Kind,
3880b57cec5SDimitry Andric bool ReturnNextLiveToken = true);
3890b57cec5SDimitry Andric
3900b57cec5SDimitry Andric // Worker method for lexPreprocessor() to skip lines after some
3910b57cec5SDimitry Andric // preprocessing directive up to the buffer end or to the directive
3920b57cec5SDimitry Andric // that re-enables token processing. The method returns true
3930b57cec5SDimitry Andric // upon processing the next directive that re-enables tokens
3940b57cec5SDimitry Andric // processing. False is returned if an error was encountered.
3950b57cec5SDimitry Andric //
3960b57cec5SDimitry Andric // Note that prepSkipRegion() calls lexPreprocessor() to process
3970b57cec5SDimitry Andric // encountered preprocessing directives. In this case, the second
3980b57cec5SDimitry Andric // parameter to lexPreprocessor() is set to false. Being passed
3990b57cec5SDimitry Andric // false ReturnNextLiveToken, lexPreprocessor() must never call
4000b57cec5SDimitry Andric // prepSkipRegion(). We assert this by passing ReturnNextLiveToken
4010b57cec5SDimitry Andric // to prepSkipRegion() and checking that it is never set to false.
4020b57cec5SDimitry Andric bool prepSkipRegion(bool MustNeverBeFalse);
4030b57cec5SDimitry Andric
4040b57cec5SDimitry Andric // Lex name of the macro after either #ifdef or #define. We could have used
4050b57cec5SDimitry Andric // LexIdentifier(), but it has special handling of "include" word, which
4060b57cec5SDimitry Andric // could result in awkward diagnostic errors. Consider:
4070b57cec5SDimitry Andric // ----
4080b57cec5SDimitry Andric // #ifdef include
4090b57cec5SDimitry Andric // class ...
4100b57cec5SDimitry Andric // ----
4110b57cec5SDimitry Andric // LexIdentifier() will engage LexInclude(), which will complain about
4120b57cec5SDimitry Andric // missing file with name "class". Instead, prepLexMacroName() will treat
4130b57cec5SDimitry Andric // "include" as a normal macro name.
4140b57cec5SDimitry Andric //
4150b57cec5SDimitry Andric // On entry, CurPtr points to the end of a preprocessing directive word.
4160b57cec5SDimitry Andric // The method allows for whitespaces between the preprocessing directive
4170b57cec5SDimitry Andric // and the macro name. The allowed whitespaces are ' ' and '\t'.
4180b57cec5SDimitry Andric //
4190b57cec5SDimitry Andric // If the first non-whitespace symbol after the preprocessing directive
4200b57cec5SDimitry Andric // is a valid start symbol for an identifier (i.e. [a-zA-Z_]), then
4210b57cec5SDimitry Andric // the method updates TokStart to the position of the first non-whitespace
4220b57cec5SDimitry Andric // symbol, sets CurPtr to the position of the macro name's last symbol,
4230b57cec5SDimitry Andric // and returns a string reference to the macro name. Otherwise,
4240b57cec5SDimitry Andric // TokStart is set to the first non-whitespace symbol after the preprocessing
4250b57cec5SDimitry Andric // directive, and the method returns an empty string reference.
4260b57cec5SDimitry Andric //
4270b57cec5SDimitry Andric // In all cases, TokStart may be used to point to the word following
4280b57cec5SDimitry Andric // the preprocessing directive.
4290b57cec5SDimitry Andric StringRef prepLexMacroName();
4300b57cec5SDimitry Andric
4310b57cec5SDimitry Andric // Skip any whitespaces starting from CurPtr. The method is used
4320b57cec5SDimitry Andric // only in the lines-skipping mode to find the first non-whitespace
4330b57cec5SDimitry Andric // symbol after or at CurPtr. Allowed whitespaces are ' ', '\t', '\n'
4340b57cec5SDimitry Andric // and '\r'. The method skips C-style comments as well, because
4350b57cec5SDimitry Andric // it is used to find the beginning of the preprocessing directive.
4360b57cec5SDimitry Andric // If we do not handle C-style comments the following code would
4370b57cec5SDimitry Andric // result in incorrect detection of a preprocessing directive:
4380b57cec5SDimitry Andric // /*
4390b57cec5SDimitry Andric // #ifdef NAME
4400b57cec5SDimitry Andric // */
4410b57cec5SDimitry Andric // As long as we skip C-style comments, the following code is correctly
4420b57cec5SDimitry Andric // recognized as a preprocessing directive:
4430b57cec5SDimitry Andric // /* first line comment
4440b57cec5SDimitry Andric // second line comment */ #ifdef NAME
4450b57cec5SDimitry Andric //
4460b57cec5SDimitry Andric // The method returns true upon reaching the first non-whitespace symbol
4470b57cec5SDimitry Andric // or EOF, CurPtr is set to point to this symbol. The method returns false,
44881ad6265SDimitry Andric // if an error occurred during skipping of a C-style comment.
4490b57cec5SDimitry Andric bool prepSkipLineBegin();
4500b57cec5SDimitry Andric
4510b57cec5SDimitry Andric // Skip any whitespaces or comments after a preprocessing directive.
4520b57cec5SDimitry Andric // The method returns true upon reaching either end of the line
4530b57cec5SDimitry Andric // or end of the file. If there is a multiline C-style comment
4540b57cec5SDimitry Andric // after the preprocessing directive, the method skips
4550b57cec5SDimitry Andric // the comment, so the final CurPtr may point to one of the next lines.
45681ad6265SDimitry Andric // The method returns false, if an error occurred during skipping
4570b57cec5SDimitry Andric // C- or C++-style comment, or a non-whitespace symbol appears
4580b57cec5SDimitry Andric // after the preprocessing directive.
4590b57cec5SDimitry Andric //
4600b57cec5SDimitry Andric // The method maybe called both during lines-skipping and tokens
4610b57cec5SDimitry Andric // processing. It actually verifies that only whitespaces or/and
4620b57cec5SDimitry Andric // comments follow a preprocessing directive.
4630b57cec5SDimitry Andric //
4640b57cec5SDimitry Andric // After the execution of this mehod, CurPtr points either to new line
4650b57cec5SDimitry Andric // symbol, buffer end or non-whitespace symbol following the preprocesing
4660b57cec5SDimitry Andric // directive.
4670b57cec5SDimitry Andric bool prepSkipDirectiveEnd();
4680b57cec5SDimitry Andric
4690b57cec5SDimitry Andric // Skip all symbols to the end of the line/file.
4700b57cec5SDimitry Andric // The method adjusts CurPtr, so that it points to either new line
4710b57cec5SDimitry Andric // symbol in the current line or the buffer end.
4720b57cec5SDimitry Andric void prepSkipToLineEnd();
4730b57cec5SDimitry Andric
4740b57cec5SDimitry Andric // Return true, if the current preprocessor control stack is such that
4750b57cec5SDimitry Andric // we should allow lexer to process the next token, false - otherwise.
4760b57cec5SDimitry Andric //
4770b57cec5SDimitry Andric // In particular, the method returns true, if all the #ifdef/#else
4780b57cec5SDimitry Andric // controls on the stack have their IsDefined member set to true.
4790b57cec5SDimitry Andric bool prepIsProcessingEnabled();
4800b57cec5SDimitry Andric
4810b57cec5SDimitry Andric // Report an error, if we reach EOF with non-empty preprocessing control
4820b57cec5SDimitry Andric // stack. This means there is no matching #endif for the previous
4830b57cec5SDimitry Andric // #ifdef/#else.
4840b57cec5SDimitry Andric void prepReportPreprocessorStackError();
4850b57cec5SDimitry Andric };
4860b57cec5SDimitry Andric
4870b57cec5SDimitry Andric } // end namespace llvm
4880b57cec5SDimitry Andric
4890b57cec5SDimitry Andric #endif
490