1 //===--- extra/module-map-checker/CoverageChecker.cpp -------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a class that validates a module map by checking that
10 // all headers in the corresponding directories are accounted for.
11 //
12 // This class uses a previously loaded module map object.
13 // Starting at the module map file directory, or just the include
14 // paths, if specified, it will collect the names of all the files it
15 // considers headers (no extension, .h, or .inc--if you need more, modify the
16 // ModularizeUtilities::isHeader function).
17 //  It then compares the headers against those referenced
18 // in the module map, either explicitly named, or implicitly named via an
19 // umbrella directory or umbrella file, as parsed by the ModuleMap object.
20 // If headers are found which are not referenced or covered by an umbrella
21 // directory or file, warning messages will be produced, and the doChecks
22 // function will return an error code of 1.  Other errors result in an error
23 // code of 2. If no problems are found, an error code of 0 is returned.
24 //
25 // Note that in the case of umbrella headers, this tool invokes the compiler
26 // to preprocess the file, and uses a callback to collect the header files
27 // included by the umbrella header or any of its nested includes.  If any
28 // front end options are needed for these compiler invocations, these are
29 // to be passed in via the CommandLine parameter.
30 //
31 // Warning message have the form:
32 //
33 //  warning: module.modulemap does not account for file: Level3A.h
34 //
35 // Note that for the case of the module map referencing a file that does
36 // not exist, the module map parser in Clang will (at the time of this
37 // writing) display an error message.
38 //
39 // Potential problems with this program:
40 //
41 // 1. Might need a better header matching mechanism, or extensions to the
42 //    canonical file format used.
43 //
44 // 2. It might need to support additional header file extensions.
45 //
46 // Future directions:
47 //
48 // 1. Add an option to fix the problems found, writing a new module map.
49 //    Include an extra option to add unaccounted-for headers as excluded.
50 //
51 //===----------------------------------------------------------------------===//
52 
53 #include "ModularizeUtilities.h"
54 #include "clang/AST/ASTConsumer.h"
55 #include "CoverageChecker.h"
56 #include "clang/AST/ASTContext.h"
57 #include "clang/AST/RecursiveASTVisitor.h"
58 #include "clang/Basic/SourceManager.h"
59 #include "clang/Driver/Options.h"
60 #include "clang/Frontend/CompilerInstance.h"
61 #include "clang/Frontend/FrontendAction.h"
62 #include "clang/Frontend/FrontendActions.h"
63 #include "clang/Lex/PPCallbacks.h"
64 #include "clang/Lex/Preprocessor.h"
65 #include "clang/Tooling/CompilationDatabase.h"
66 #include "clang/Tooling/Tooling.h"
67 #include "llvm/Option/Option.h"
68 #include "llvm/Support/CommandLine.h"
69 #include "llvm/Support/FileSystem.h"
70 #include "llvm/Support/Path.h"
71 #include "llvm/Support/raw_ostream.h"
72 
73 using namespace Modularize;
74 using namespace clang;
75 using namespace clang::driver;
76 using namespace clang::driver::options;
77 using namespace clang::tooling;
78 namespace cl = llvm::cl;
79 namespace sys = llvm::sys;
80 
81 // Preprocessor callbacks.
82 // We basically just collect include files.
83 class CoverageCheckerCallbacks : public PPCallbacks {
84 public:
CoverageCheckerCallbacks(CoverageChecker & Checker)85   CoverageCheckerCallbacks(CoverageChecker &Checker) : Checker(Checker) {}
~CoverageCheckerCallbacks()86   ~CoverageCheckerCallbacks() override {}
87 
88   // Include directive callback.
InclusionDirective(SourceLocation HashLoc,const Token & IncludeTok,StringRef FileName,bool IsAngled,CharSourceRange FilenameRange,const FileEntry * File,StringRef SearchPath,StringRef RelativePath,const Module * Imported,SrcMgr::CharacteristicKind FileType)89   void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
90                           StringRef FileName, bool IsAngled,
91                           CharSourceRange FilenameRange, const FileEntry *File,
92                           StringRef SearchPath, StringRef RelativePath,
93                           const Module *Imported,
94                           SrcMgr::CharacteristicKind FileType) override {
95     Checker.collectUmbrellaHeaderHeader(File->getName());
96   }
97 
98 private:
99   CoverageChecker &Checker;
100 };
101 
102 // Frontend action stuff:
103 
104 // Consumer is responsible for setting up the callbacks.
105 class CoverageCheckerConsumer : public ASTConsumer {
106 public:
CoverageCheckerConsumer(CoverageChecker & Checker,Preprocessor & PP)107   CoverageCheckerConsumer(CoverageChecker &Checker, Preprocessor &PP) {
108     // PP takes ownership.
109     PP.addPPCallbacks(std::make_unique<CoverageCheckerCallbacks>(Checker));
110   }
111 };
112 
113 class CoverageCheckerAction : public SyntaxOnlyAction {
114 public:
CoverageCheckerAction(CoverageChecker & Checker)115   CoverageCheckerAction(CoverageChecker &Checker) : Checker(Checker) {}
116 
117 protected:
CreateASTConsumer(CompilerInstance & CI,StringRef InFile)118   std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &CI,
119     StringRef InFile) override {
120     return std::make_unique<CoverageCheckerConsumer>(Checker,
121       CI.getPreprocessor());
122   }
123 
124 private:
125   CoverageChecker &Checker;
126 };
127 
128 class CoverageCheckerFrontendActionFactory : public FrontendActionFactory {
129 public:
CoverageCheckerFrontendActionFactory(CoverageChecker & Checker)130   CoverageCheckerFrontendActionFactory(CoverageChecker &Checker)
131     : Checker(Checker) {}
132 
create()133   std::unique_ptr<FrontendAction> create() override {
134     return std::make_unique<CoverageCheckerAction>(Checker);
135   }
136 
137 private:
138   CoverageChecker &Checker;
139 };
140 
141 // CoverageChecker class implementation.
142 
143 // Constructor.
CoverageChecker(StringRef ModuleMapPath,std::vector<std::string> & IncludePaths,ArrayRef<std::string> CommandLine,clang::ModuleMap * ModuleMap)144 CoverageChecker::CoverageChecker(StringRef ModuleMapPath,
145     std::vector<std::string> &IncludePaths,
146     ArrayRef<std::string> CommandLine,
147     clang::ModuleMap *ModuleMap)
148   : ModuleMapPath(ModuleMapPath), IncludePaths(IncludePaths),
149     CommandLine(CommandLine),
150     ModMap(ModuleMap) {}
151 
152 // Create instance of CoverageChecker, to simplify setting up
153 // subordinate objects.
createCoverageChecker(StringRef ModuleMapPath,std::vector<std::string> & IncludePaths,ArrayRef<std::string> CommandLine,clang::ModuleMap * ModuleMap)154 std::unique_ptr<CoverageChecker> CoverageChecker::createCoverageChecker(
155     StringRef ModuleMapPath, std::vector<std::string> &IncludePaths,
156     ArrayRef<std::string> CommandLine, clang::ModuleMap *ModuleMap) {
157 
158   return std::make_unique<CoverageChecker>(ModuleMapPath, IncludePaths,
159                                             CommandLine, ModuleMap);
160 }
161 
162 // Do checks.
163 // Starting from the directory of the module.modulemap file,
164 // Find all header files, optionally looking only at files
165 // covered by the include path options, and compare against
166 // the headers referenced by the module.modulemap file.
167 // Display warnings for unaccounted-for header files.
168 // Returns error_code of 0 if there were no errors or warnings, 1 if there
169 //   were warnings, 2 if any other problem, such as if a bad
170 //   module map path argument was specified.
doChecks()171 std::error_code CoverageChecker::doChecks() {
172   std::error_code returnValue;
173 
174   // Collect the headers referenced in the modules.
175   collectModuleHeaders();
176 
177   // Collect the file system headers.
178   if (!collectFileSystemHeaders())
179     return std::error_code(2, std::generic_category());
180 
181   // Do the checks.  These save the problematic file names.
182   findUnaccountedForHeaders();
183 
184   // Check for warnings.
185   if (!UnaccountedForHeaders.empty())
186     returnValue = std::error_code(1, std::generic_category());
187 
188   return returnValue;
189 }
190 
191 // The following functions are called by doChecks.
192 
193 // Collect module headers.
194 // Walks the modules and collects referenced headers into
195 // ModuleMapHeadersSet.
collectModuleHeaders()196 void CoverageChecker::collectModuleHeaders() {
197   for (ModuleMap::module_iterator I = ModMap->module_begin(),
198     E = ModMap->module_end();
199     I != E; ++I) {
200     collectModuleHeaders(*I->second);
201   }
202 }
203 
204 // Collect referenced headers from one module.
205 // Collects the headers referenced in the given module into
206 // ModuleMapHeadersSet.
207 // FIXME: Doesn't collect files from umbrella header.
collectModuleHeaders(const Module & Mod)208 bool CoverageChecker::collectModuleHeaders(const Module &Mod) {
209 
210   if (const FileEntry *UmbrellaHeader = Mod.getUmbrellaHeader().Entry) {
211     // Collect umbrella header.
212     ModuleMapHeadersSet.insert(ModularizeUtilities::getCanonicalPath(
213       UmbrellaHeader->getName()));
214     // Preprocess umbrella header and collect the headers it references.
215     if (!collectUmbrellaHeaderHeaders(UmbrellaHeader->getName()))
216       return false;
217   }
218   else if (const DirectoryEntry *UmbrellaDir = Mod.getUmbrellaDir().Entry) {
219     // Collect headers in umbrella directory.
220     if (!collectUmbrellaHeaders(UmbrellaDir->getName()))
221       return false;
222   }
223 
224   for (auto &HeaderKind : Mod.Headers)
225     for (auto &Header : HeaderKind)
226       ModuleMapHeadersSet.insert(ModularizeUtilities::getCanonicalPath(
227         Header.Entry->getName()));
228 
229   for (auto MI = Mod.submodule_begin(), MIEnd = Mod.submodule_end();
230        MI != MIEnd; ++MI)
231     collectModuleHeaders(**MI);
232 
233   return true;
234 }
235 
236 // Collect headers from an umbrella directory.
collectUmbrellaHeaders(StringRef UmbrellaDirName)237 bool CoverageChecker::collectUmbrellaHeaders(StringRef UmbrellaDirName) {
238   // Initialize directory name.
239   SmallString<256> Directory(ModuleMapDirectory);
240   if (UmbrellaDirName.size())
241     sys::path::append(Directory, UmbrellaDirName);
242   if (Directory.size() == 0)
243     Directory = ".";
244   // Walk the directory.
245   std::error_code EC;
246   for (sys::fs::directory_iterator I(Directory.str(), EC), E; I != E;
247     I.increment(EC)) {
248     if (EC)
249       return false;
250     std::string File(I->path());
251     llvm::ErrorOr<sys::fs::basic_file_status> Status = I->status();
252     if (!Status)
253       return false;
254     sys::fs::file_type Type = Status->type();
255     // If the file is a directory, ignore the name and recurse.
256     if (Type == sys::fs::file_type::directory_file) {
257       if (!collectUmbrellaHeaders(File))
258         return false;
259       continue;
260     }
261     // If the file does not have a common header extension, ignore it.
262     if (!ModularizeUtilities::isHeader(File))
263       continue;
264     // Save header name.
265     ModuleMapHeadersSet.insert(ModularizeUtilities::getCanonicalPath(File));
266   }
267   return true;
268 }
269 
270 // Collect headers referenced from an umbrella file.
271 bool
collectUmbrellaHeaderHeaders(StringRef UmbrellaHeaderName)272 CoverageChecker::collectUmbrellaHeaderHeaders(StringRef UmbrellaHeaderName) {
273 
274   SmallString<256> PathBuf(ModuleMapDirectory);
275 
276   // If directory is empty, it's the current directory.
277   if (ModuleMapDirectory.length() == 0)
278     sys::fs::current_path(PathBuf);
279 
280   // Create the compilation database.
281   std::unique_ptr<CompilationDatabase> Compilations;
282   Compilations.reset(new FixedCompilationDatabase(Twine(PathBuf), CommandLine));
283 
284   std::vector<std::string> HeaderPath;
285   HeaderPath.push_back(std::string(UmbrellaHeaderName));
286 
287   // Create the tool and run the compilation.
288   ClangTool Tool(*Compilations, HeaderPath);
289   int HadErrors = Tool.run(new CoverageCheckerFrontendActionFactory(*this));
290 
291   // If we had errors, exit early.
292   return !HadErrors;
293 }
294 
295 // Called from CoverageCheckerCallbacks to track a header included
296 // from an umbrella header.
collectUmbrellaHeaderHeader(StringRef HeaderName)297 void CoverageChecker::collectUmbrellaHeaderHeader(StringRef HeaderName) {
298 
299   SmallString<256> PathBuf(ModuleMapDirectory);
300   // If directory is empty, it's the current directory.
301   if (ModuleMapDirectory.length() == 0)
302     sys::fs::current_path(PathBuf);
303   // HeaderName will have an absolute path, so if it's the module map
304   // directory, we remove it, also skipping trailing separator.
305   if (HeaderName.startswith(PathBuf))
306     HeaderName = HeaderName.substr(PathBuf.size() + 1);
307   // Save header name.
308   ModuleMapHeadersSet.insert(ModularizeUtilities::getCanonicalPath(HeaderName));
309 }
310 
311 // Collect file system header files.
312 // This function scans the file system for header files,
313 // starting at the directory of the module.modulemap file,
314 // optionally filtering out all but the files covered by
315 // the include path options.
316 // Returns true if no errors.
collectFileSystemHeaders()317 bool CoverageChecker::collectFileSystemHeaders() {
318 
319   // Get directory containing the module.modulemap file.
320   // Might be relative to current directory, absolute, or empty.
321   ModuleMapDirectory = ModularizeUtilities::getDirectoryFromPath(ModuleMapPath);
322 
323   // If no include paths specified, we do the whole tree starting
324   // at the module.modulemap directory.
325   if (IncludePaths.size() == 0) {
326     if (!collectFileSystemHeaders(StringRef("")))
327       return false;
328   }
329   else {
330     // Otherwise we only look at the sub-trees specified by the
331     // include paths.
332     for (std::vector<std::string>::const_iterator I = IncludePaths.begin(),
333       E = IncludePaths.end();
334       I != E; ++I) {
335       if (!collectFileSystemHeaders(*I))
336         return false;
337     }
338   }
339 
340   // Sort it, because different file systems might order the file differently.
341   llvm::sort(FileSystemHeaders);
342 
343   return true;
344 }
345 
346 // Collect file system header files from the given path.
347 // This function scans the file system for header files,
348 // starting at the given directory, which is assumed to be
349 // relative to the directory of the module.modulemap file.
350 // \returns True if no errors.
collectFileSystemHeaders(StringRef IncludePath)351 bool CoverageChecker::collectFileSystemHeaders(StringRef IncludePath) {
352 
353   // Initialize directory name.
354   SmallString<256> Directory(ModuleMapDirectory);
355   if (IncludePath.size())
356     sys::path::append(Directory, IncludePath);
357   if (Directory.size() == 0)
358     Directory = ".";
359   if (IncludePath.startswith("/") || IncludePath.startswith("\\") ||
360     ((IncludePath.size() >= 2) && (IncludePath[1] == ':'))) {
361     llvm::errs() << "error: Include path \"" << IncludePath
362       << "\" is not relative to the module map file.\n";
363     return false;
364   }
365 
366   // Recursively walk the directory tree.
367   std::error_code EC;
368   int Count = 0;
369   for (sys::fs::recursive_directory_iterator I(Directory.str(), EC), E; I != E;
370     I.increment(EC)) {
371     if (EC)
372       return false;
373     //std::string file(I->path());
374     StringRef file(I->path());
375     llvm::ErrorOr<sys::fs::basic_file_status> Status = I->status();
376     if (!Status)
377       return false;
378     sys::fs::file_type type = Status->type();
379     // If the file is a directory, ignore the name (but still recurses).
380     if (type == sys::fs::file_type::directory_file)
381       continue;
382     // Assume directories or files starting with '.' are private and not to
383     // be considered.
384     if ((file.find("\\.") != StringRef::npos) ||
385         (file.find("/.") != StringRef::npos))
386       continue;
387     // If the file does not have a common header extension, ignore it.
388     if (!ModularizeUtilities::isHeader(file))
389       continue;
390     // Save header name.
391     FileSystemHeaders.push_back(ModularizeUtilities::getCanonicalPath(file));
392     Count++;
393   }
394   if (Count == 0) {
395     llvm::errs() << "warning: No headers found in include path: \""
396       << IncludePath << "\"\n";
397   }
398   return true;
399 }
400 
401 // Find headers unaccounted-for in module map.
402 // This function compares the list of collected header files
403 // against those referenced in the module map.  Display
404 // warnings for unaccounted-for header files.
405 // Save unaccounted-for file list for possible.
406 // fixing action.
407 // FIXME: There probably needs to be some canonalization
408 // of file names so that header path can be correctly
409 // matched.  Also, a map could be used for the headers
410 // referenced in the module, but
findUnaccountedForHeaders()411 void CoverageChecker::findUnaccountedForHeaders() {
412   // Walk over file system headers.
413   for (std::vector<std::string>::const_iterator I = FileSystemHeaders.begin(),
414     E = FileSystemHeaders.end();
415     I != E; ++I) {
416     // Look for header in module map.
417     if (ModuleMapHeadersSet.insert(*I).second) {
418       UnaccountedForHeaders.push_back(*I);
419       llvm::errs() << "warning: " << ModuleMapPath
420         << " does not account for file: " << *I << "\n";
421     }
422   }
423 }
424