10b57cec5SDimitry Andric //===--- RewriteMacros.cpp - Rewrite macros into their expansions ---------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This code rewrites macro invocations into their expansions.  This gives you
100b57cec5SDimitry Andric // a macro expanded file that retains comments and #includes.
110b57cec5SDimitry Andric //
120b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
130b57cec5SDimitry Andric 
140b57cec5SDimitry Andric #include "clang/Rewrite/Frontend/Rewriters.h"
150b57cec5SDimitry Andric #include "clang/Basic/SourceManager.h"
160b57cec5SDimitry Andric #include "clang/Lex/Preprocessor.h"
170b57cec5SDimitry Andric #include "clang/Rewrite/Core/Rewriter.h"
180b57cec5SDimitry Andric #include "llvm/Support/Path.h"
190b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
200b57cec5SDimitry Andric #include <cstdio>
210b57cec5SDimitry Andric #include <memory>
220b57cec5SDimitry Andric 
230b57cec5SDimitry Andric using namespace clang;
240b57cec5SDimitry Andric 
250b57cec5SDimitry Andric /// isSameToken - Return true if the two specified tokens start have the same
260b57cec5SDimitry Andric /// content.
isSameToken(Token & RawTok,Token & PPTok)270b57cec5SDimitry Andric static bool isSameToken(Token &RawTok, Token &PPTok) {
280b57cec5SDimitry Andric   // If two tokens have the same kind and the same identifier info, they are
290b57cec5SDimitry Andric   // obviously the same.
300b57cec5SDimitry Andric   if (PPTok.getKind() == RawTok.getKind() &&
310b57cec5SDimitry Andric       PPTok.getIdentifierInfo() == RawTok.getIdentifierInfo())
320b57cec5SDimitry Andric     return true;
330b57cec5SDimitry Andric 
340b57cec5SDimitry Andric   // Otherwise, if they are different but have the same identifier info, they
350b57cec5SDimitry Andric   // are also considered to be the same.  This allows keywords and raw lexed
360b57cec5SDimitry Andric   // identifiers with the same name to be treated the same.
370b57cec5SDimitry Andric   if (PPTok.getIdentifierInfo() &&
380b57cec5SDimitry Andric       PPTok.getIdentifierInfo() == RawTok.getIdentifierInfo())
390b57cec5SDimitry Andric     return true;
400b57cec5SDimitry Andric 
410b57cec5SDimitry Andric   return false;
420b57cec5SDimitry Andric }
430b57cec5SDimitry Andric 
440b57cec5SDimitry Andric 
450b57cec5SDimitry Andric /// GetNextRawTok - Return the next raw token in the stream, skipping over
460b57cec5SDimitry Andric /// comments if ReturnComment is false.
GetNextRawTok(const std::vector<Token> & RawTokens,unsigned & CurTok,bool ReturnComment)470b57cec5SDimitry Andric static const Token &GetNextRawTok(const std::vector<Token> &RawTokens,
480b57cec5SDimitry Andric                                   unsigned &CurTok, bool ReturnComment) {
490b57cec5SDimitry Andric   assert(CurTok < RawTokens.size() && "Overran eof!");
500b57cec5SDimitry Andric 
510b57cec5SDimitry Andric   // If the client doesn't want comments and we have one, skip it.
520b57cec5SDimitry Andric   if (!ReturnComment && RawTokens[CurTok].is(tok::comment))
530b57cec5SDimitry Andric     ++CurTok;
540b57cec5SDimitry Andric 
550b57cec5SDimitry Andric   return RawTokens[CurTok++];
560b57cec5SDimitry Andric }
570b57cec5SDimitry Andric 
580b57cec5SDimitry Andric 
590b57cec5SDimitry Andric /// LexRawTokensFromMainFile - Lets all the raw tokens from the main file into
600b57cec5SDimitry Andric /// the specified vector.
LexRawTokensFromMainFile(Preprocessor & PP,std::vector<Token> & RawTokens)610b57cec5SDimitry Andric static void LexRawTokensFromMainFile(Preprocessor &PP,
620b57cec5SDimitry Andric                                      std::vector<Token> &RawTokens) {
630b57cec5SDimitry Andric   SourceManager &SM = PP.getSourceManager();
640b57cec5SDimitry Andric 
650b57cec5SDimitry Andric   // Create a lexer to lex all the tokens of the main file in raw mode.  Even
660b57cec5SDimitry Andric   // though it is in raw mode, it will not return comments.
670b57cec5SDimitry Andric   llvm::MemoryBufferRef FromFile = SM.getBufferOrFake(SM.getMainFileID());
680b57cec5SDimitry Andric   Lexer RawLex(SM.getMainFileID(), FromFile, SM, PP.getLangOpts());
690b57cec5SDimitry Andric 
700b57cec5SDimitry Andric   // Switch on comment lexing because we really do want them.
710b57cec5SDimitry Andric   RawLex.SetCommentRetentionState(true);
720b57cec5SDimitry Andric 
730b57cec5SDimitry Andric   Token RawTok;
740b57cec5SDimitry Andric   do {
750b57cec5SDimitry Andric     RawLex.LexFromRawLexer(RawTok);
760b57cec5SDimitry Andric 
770b57cec5SDimitry Andric     // If we have an identifier with no identifier info for our raw token, look
780b57cec5SDimitry Andric     // up the identifier info.  This is important for equality comparison of
790b57cec5SDimitry Andric     // identifier tokens.
800b57cec5SDimitry Andric     if (RawTok.is(tok::raw_identifier))
810b57cec5SDimitry Andric       PP.LookUpIdentifierInfo(RawTok);
820b57cec5SDimitry Andric 
830b57cec5SDimitry Andric     RawTokens.push_back(RawTok);
840b57cec5SDimitry Andric   } while (RawTok.isNot(tok::eof));
850b57cec5SDimitry Andric }
860b57cec5SDimitry Andric 
870b57cec5SDimitry Andric 
880b57cec5SDimitry Andric /// RewriteMacrosInInput - Implement -rewrite-macros mode.
RewriteMacrosInInput(Preprocessor & PP,raw_ostream * OS)890b57cec5SDimitry Andric void clang::RewriteMacrosInInput(Preprocessor &PP, raw_ostream *OS) {
900b57cec5SDimitry Andric   SourceManager &SM = PP.getSourceManager();
910b57cec5SDimitry Andric 
920b57cec5SDimitry Andric   Rewriter Rewrite;
930b57cec5SDimitry Andric   Rewrite.setSourceMgr(SM, PP.getLangOpts());
940b57cec5SDimitry Andric   RewriteBuffer &RB = Rewrite.getEditBuffer(SM.getMainFileID());
950b57cec5SDimitry Andric 
960b57cec5SDimitry Andric   std::vector<Token> RawTokens;
970b57cec5SDimitry Andric   LexRawTokensFromMainFile(PP, RawTokens);
980b57cec5SDimitry Andric   unsigned CurRawTok = 0;
990b57cec5SDimitry Andric   Token RawTok = GetNextRawTok(RawTokens, CurRawTok, false);
1000b57cec5SDimitry Andric 
1010b57cec5SDimitry Andric 
1020b57cec5SDimitry Andric   // Get the first preprocessing token.
1030b57cec5SDimitry Andric   PP.EnterMainSourceFile();
1040b57cec5SDimitry Andric   Token PPTok;
1050b57cec5SDimitry Andric   PP.Lex(PPTok);
1060b57cec5SDimitry Andric 
1070b57cec5SDimitry Andric   // Preprocess the input file in parallel with raw lexing the main file. Ignore
1080b57cec5SDimitry Andric   // all tokens that are preprocessed from a file other than the main file (e.g.
1090b57cec5SDimitry Andric   // a header).  If we see tokens that are in the preprocessed file but not the
1100b57cec5SDimitry Andric   // lexed file, we have a macro expansion.  If we see tokens in the lexed file
1110b57cec5SDimitry Andric   // that aren't in the preprocessed view, we have macros that expand to no
1120b57cec5SDimitry Andric   // tokens, or macro arguments etc.
1130b57cec5SDimitry Andric   while (RawTok.isNot(tok::eof) || PPTok.isNot(tok::eof)) {
1140b57cec5SDimitry Andric     SourceLocation PPLoc = SM.getExpansionLoc(PPTok.getLocation());
1150b57cec5SDimitry Andric 
1160b57cec5SDimitry Andric     // If PPTok is from a different source file, ignore it.
1170b57cec5SDimitry Andric     if (!SM.isWrittenInMainFile(PPLoc)) {
1180b57cec5SDimitry Andric       PP.Lex(PPTok);
1190b57cec5SDimitry Andric       continue;
1200b57cec5SDimitry Andric     }
1210b57cec5SDimitry Andric 
1220b57cec5SDimitry Andric     // If the raw file hits a preprocessor directive, they will be extra tokens
1230b57cec5SDimitry Andric     // in the raw file that don't exist in the preprocsesed file.  However, we
1240b57cec5SDimitry Andric     // choose to preserve them in the output file and otherwise handle them
1250b57cec5SDimitry Andric     // specially.
1260b57cec5SDimitry Andric     if (RawTok.is(tok::hash) && RawTok.isAtStartOfLine()) {
1270b57cec5SDimitry Andric       // If this is a #warning directive or #pragma mark (GNU extensions),
1280b57cec5SDimitry Andric       // comment the line out.
1290b57cec5SDimitry Andric       if (RawTokens[CurRawTok].is(tok::identifier)) {
1300b57cec5SDimitry Andric         const IdentifierInfo *II = RawTokens[CurRawTok].getIdentifierInfo();
1310b57cec5SDimitry Andric         if (II->getName() == "warning") {
1320b57cec5SDimitry Andric           // Comment out #warning.
1330b57cec5SDimitry Andric           RB.InsertTextAfter(SM.getFileOffset(RawTok.getLocation()), "//");
1340b57cec5SDimitry Andric         } else if (II->getName() == "pragma" &&
1350b57cec5SDimitry Andric                    RawTokens[CurRawTok+1].is(tok::identifier) &&
1360b57cec5SDimitry Andric                    (RawTokens[CurRawTok+1].getIdentifierInfo()->getName() ==
1370b57cec5SDimitry Andric                     "mark")) {
1380b57cec5SDimitry Andric           // Comment out #pragma mark.
1390b57cec5SDimitry Andric           RB.InsertTextAfter(SM.getFileOffset(RawTok.getLocation()), "//");
1400b57cec5SDimitry Andric         }
1410b57cec5SDimitry Andric       }
1420b57cec5SDimitry Andric 
1430b57cec5SDimitry Andric       // Otherwise, if this is a #include or some other directive, just leave it
1440b57cec5SDimitry Andric       // in the file by skipping over the line.
1450b57cec5SDimitry Andric       RawTok = GetNextRawTok(RawTokens, CurRawTok, false);
1460b57cec5SDimitry Andric       while (!RawTok.isAtStartOfLine() && RawTok.isNot(tok::eof))
1470b57cec5SDimitry Andric         RawTok = GetNextRawTok(RawTokens, CurRawTok, false);
1480b57cec5SDimitry Andric       continue;
1490b57cec5SDimitry Andric     }
1500b57cec5SDimitry Andric 
1510b57cec5SDimitry Andric     // Okay, both tokens are from the same file.  Get their offsets from the
1520b57cec5SDimitry Andric     // start of the file.
1530b57cec5SDimitry Andric     unsigned PPOffs = SM.getFileOffset(PPLoc);
1540b57cec5SDimitry Andric     unsigned RawOffs = SM.getFileOffset(RawTok.getLocation());
1550b57cec5SDimitry Andric 
1560b57cec5SDimitry Andric     // If the offsets are the same and the token kind is the same, ignore them.
1570b57cec5SDimitry Andric     if (PPOffs == RawOffs && isSameToken(RawTok, PPTok)) {
1580b57cec5SDimitry Andric       RawTok = GetNextRawTok(RawTokens, CurRawTok, false);
1590b57cec5SDimitry Andric       PP.Lex(PPTok);
1600b57cec5SDimitry Andric       continue;
1610b57cec5SDimitry Andric     }
1620b57cec5SDimitry Andric 
1630b57cec5SDimitry Andric     // If the PP token is farther along than the raw token, something was
1640b57cec5SDimitry Andric     // deleted.  Comment out the raw token.
1650b57cec5SDimitry Andric     if (RawOffs <= PPOffs) {
1660b57cec5SDimitry Andric       // Comment out a whole run of tokens instead of bracketing each one with
1670b57cec5SDimitry Andric       // comments.  Add a leading space if RawTok didn't have one.
1680b57cec5SDimitry Andric       bool HasSpace = RawTok.hasLeadingSpace();
1690b57cec5SDimitry Andric       RB.InsertTextAfter(RawOffs, &" /*"[HasSpace]);
1700b57cec5SDimitry Andric       unsigned EndPos;
1710b57cec5SDimitry Andric 
1720b57cec5SDimitry Andric       do {
1730b57cec5SDimitry Andric         EndPos = RawOffs+RawTok.getLength();
1740b57cec5SDimitry Andric 
1750b57cec5SDimitry Andric         RawTok = GetNextRawTok(RawTokens, CurRawTok, true);
1760b57cec5SDimitry Andric         RawOffs = SM.getFileOffset(RawTok.getLocation());
1770b57cec5SDimitry Andric 
1780b57cec5SDimitry Andric         if (RawTok.is(tok::comment)) {
1790b57cec5SDimitry Andric           // Skip past the comment.
1800b57cec5SDimitry Andric           RawTok = GetNextRawTok(RawTokens, CurRawTok, false);
1810b57cec5SDimitry Andric           break;
1820b57cec5SDimitry Andric         }
1830b57cec5SDimitry Andric 
1840b57cec5SDimitry Andric       } while (RawOffs <= PPOffs && !RawTok.isAtStartOfLine() &&
1850b57cec5SDimitry Andric                (PPOffs != RawOffs || !isSameToken(RawTok, PPTok)));
1860b57cec5SDimitry Andric 
1870b57cec5SDimitry Andric       RB.InsertTextBefore(EndPos, "*/");
1880b57cec5SDimitry Andric       continue;
1890b57cec5SDimitry Andric     }
1900b57cec5SDimitry Andric 
1910b57cec5SDimitry Andric     // Otherwise, there was a replacement an expansion.  Insert the new token
1920b57cec5SDimitry Andric     // in the output buffer.  Insert the whole run of new tokens at once to get
1930b57cec5SDimitry Andric     // them in the right order.
1940b57cec5SDimitry Andric     unsigned InsertPos = PPOffs;
1950b57cec5SDimitry Andric     std::string Expansion;
1960b57cec5SDimitry Andric     while (PPOffs < RawOffs) {
1970b57cec5SDimitry Andric       Expansion += ' ' + PP.getSpelling(PPTok);
1980b57cec5SDimitry Andric       PP.Lex(PPTok);
1990b57cec5SDimitry Andric       PPLoc = SM.getExpansionLoc(PPTok.getLocation());
2000b57cec5SDimitry Andric       PPOffs = SM.getFileOffset(PPLoc);
2010b57cec5SDimitry Andric     }
2020b57cec5SDimitry Andric     Expansion += ' ';
2030b57cec5SDimitry Andric     RB.InsertTextBefore(InsertPos, Expansion);
2040b57cec5SDimitry Andric   }
2050b57cec5SDimitry Andric 
2060b57cec5SDimitry Andric   // Get the buffer corresponding to MainFileID.  If we haven't changed it, then
2070b57cec5SDimitry Andric   // we are done.
2080b57cec5SDimitry Andric   if (const RewriteBuffer *RewriteBuf =
2090b57cec5SDimitry Andric       Rewrite.getRewriteBufferFor(SM.getMainFileID())) {
2100b57cec5SDimitry Andric     //printf("Changed:\n");
2110b57cec5SDimitry Andric     *OS << std::string(RewriteBuf->begin(), RewriteBuf->end());
2120b57cec5SDimitry Andric   } else {
2130b57cec5SDimitry Andric     fprintf(stderr, "No changes\n");
2140b57cec5SDimitry Andric   }
2150b57cec5SDimitry Andric   OS->flush();
2160b57cec5SDimitry Andric }
217