1 //===--- extra/modularize/ModularizeUtilities.cpp -------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a class for loading and validating a module map or
10 // header list by checking that all headers in the corresponding directories
11 // are accounted for.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "clang/Basic/SourceManager.h"
16 #include "clang/Driver/Options.h"
17 #include "clang/Frontend/CompilerInstance.h"
18 #include "clang/Frontend/FrontendActions.h"
19 #include "CoverageChecker.h"
20 #include "llvm/ADT/SmallString.h"
21 #include "llvm/Support/FileUtilities.h"
22 #include "llvm/Support/MemoryBuffer.h"
23 #include "llvm/Support/Path.h"
24 #include "llvm/Support/raw_ostream.h"
25 #include "ModularizeUtilities.h"
26 
27 using namespace clang;
28 using namespace llvm;
29 using namespace Modularize;
30 
31 namespace {
32 // Subclass TargetOptions so we can construct it inline with
33 // the minimal option, the triple.
34 class ModuleMapTargetOptions : public clang::TargetOptions {
35 public:
ModuleMapTargetOptions()36   ModuleMapTargetOptions() { Triple = llvm::sys::getDefaultTargetTriple(); }
37 };
38 } // namespace
39 
40 // ModularizeUtilities class implementation.
41 
42 // Constructor.
ModularizeUtilities(std::vector<std::string> & InputPaths,llvm::StringRef Prefix,llvm::StringRef ProblemFilesListPath)43 ModularizeUtilities::ModularizeUtilities(std::vector<std::string> &InputPaths,
44                                          llvm::StringRef Prefix,
45                                          llvm::StringRef ProblemFilesListPath)
46     : InputFilePaths(InputPaths), HeaderPrefix(Prefix),
47       ProblemFilesPath(ProblemFilesListPath), HasModuleMap(false),
48       MissingHeaderCount(0),
49       // Init clang stuff needed for loading the module map and preprocessing.
50       LangOpts(new LangOptions()), DiagIDs(new DiagnosticIDs()),
51       DiagnosticOpts(new DiagnosticOptions()),
52       DC(llvm::errs(), DiagnosticOpts.get()),
53       Diagnostics(
54           new DiagnosticsEngine(DiagIDs, DiagnosticOpts.get(), &DC, false)),
55       TargetOpts(new ModuleMapTargetOptions()),
56       Target(TargetInfo::CreateTargetInfo(*Diagnostics, TargetOpts)),
57       FileMgr(new FileManager(FileSystemOpts)),
58       SourceMgr(new SourceManager(*Diagnostics, *FileMgr, false)),
59       HeaderInfo(new HeaderSearch(std::make_shared<HeaderSearchOptions>(),
60                                   *SourceMgr, *Diagnostics, *LangOpts,
61                                   Target.get())) {}
62 
63 // Create instance of ModularizeUtilities, to simplify setting up
64 // subordinate objects.
createModularizeUtilities(std::vector<std::string> & InputPaths,llvm::StringRef Prefix,llvm::StringRef ProblemFilesListPath)65 ModularizeUtilities *ModularizeUtilities::createModularizeUtilities(
66     std::vector<std::string> &InputPaths, llvm::StringRef Prefix,
67     llvm::StringRef ProblemFilesListPath) {
68 
69   return new ModularizeUtilities(InputPaths, Prefix, ProblemFilesListPath);
70 }
71 
72 // Load all header lists and dependencies.
loadAllHeaderListsAndDependencies()73 std::error_code ModularizeUtilities::loadAllHeaderListsAndDependencies() {
74   // For each input file.
75   for (auto I = InputFilePaths.begin(), E = InputFilePaths.end(); I != E; ++I) {
76     llvm::StringRef InputPath = *I;
77     // If it's a module map.
78     if (InputPath.endswith(".modulemap")) {
79       // Load the module map.
80       if (std::error_code EC = loadModuleMap(InputPath))
81         return EC;
82     }
83     else {
84       // Else we assume it's a header list and load it.
85       if (std::error_code EC = loadSingleHeaderListsAndDependencies(InputPath)) {
86         errs() << "modularize: error: Unable to get header list '" << InputPath
87           << "': " << EC.message() << '\n';
88         return EC;
89       }
90     }
91   }
92   // If we have a problem files list.
93   if (ProblemFilesPath.size() != 0) {
94     // Load problem files list.
95     if (std::error_code EC = loadProblemHeaderList(ProblemFilesPath)) {
96       errs() << "modularize: error: Unable to get problem header list '" << ProblemFilesPath
97         << "': " << EC.message() << '\n';
98       return EC;
99     }
100   }
101   return std::error_code();
102 }
103 
104 // Do coverage checks.
105 // For each loaded module map, do header coverage check.
106 // Starting from the directory of the module.map file,
107 // Find all header files, optionally looking only at files
108 // covered by the include path options, and compare against
109 // the headers referenced by the module.map file.
110 // Display warnings for unaccounted-for header files.
111 // Returns 0 if there were no errors or warnings, 1 if there
112 // were warnings, 2 if any other problem, such as a bad
113 // module map path argument was specified.
doCoverageCheck(std::vector<std::string> & IncludePaths,llvm::ArrayRef<std::string> CommandLine)114 std::error_code ModularizeUtilities::doCoverageCheck(
115     std::vector<std::string> &IncludePaths,
116     llvm::ArrayRef<std::string> CommandLine) {
117   int ModuleMapCount = ModuleMaps.size();
118   int ModuleMapIndex;
119   std::error_code EC;
120   for (ModuleMapIndex = 0; ModuleMapIndex < ModuleMapCount; ++ModuleMapIndex) {
121     std::unique_ptr<clang::ModuleMap> &ModMap = ModuleMaps[ModuleMapIndex];
122     auto Checker = CoverageChecker::createCoverageChecker(
123         InputFilePaths[ModuleMapIndex], IncludePaths, CommandLine,
124         ModMap.get());
125     std::error_code LocalEC = Checker->doChecks();
126     if (LocalEC.value() > 0)
127       EC = LocalEC;
128   }
129   return EC;
130 }
131 
132 // Load single header list and dependencies.
loadSingleHeaderListsAndDependencies(llvm::StringRef InputPath)133 std::error_code ModularizeUtilities::loadSingleHeaderListsAndDependencies(
134     llvm::StringRef InputPath) {
135 
136   // By default, use the path component of the list file name.
137   SmallString<256> HeaderDirectory(InputPath);
138   llvm::sys::path::remove_filename(HeaderDirectory);
139   SmallString<256> CurrentDirectory;
140   llvm::sys::fs::current_path(CurrentDirectory);
141 
142   // Get the prefix if we have one.
143   if (HeaderPrefix.size() != 0)
144     HeaderDirectory = HeaderPrefix;
145 
146   // Read the header list file into a buffer.
147   ErrorOr<std::unique_ptr<MemoryBuffer>> listBuffer =
148     MemoryBuffer::getFile(InputPath);
149   if (std::error_code EC = listBuffer.getError())
150     return EC;
151 
152   // Parse the header list into strings.
153   SmallVector<StringRef, 32> Strings;
154   listBuffer.get()->getBuffer().split(Strings, "\n", -1, false);
155 
156   // Collect the header file names from the string list.
157   for (SmallVectorImpl<StringRef>::iterator I = Strings.begin(),
158     E = Strings.end();
159     I != E; ++I) {
160     StringRef Line = I->trim();
161     // Ignore comments and empty lines.
162     if (Line.empty() || (Line[0] == '#'))
163       continue;
164     std::pair<StringRef, StringRef> TargetAndDependents = Line.split(':');
165     SmallString<256> HeaderFileName;
166     // Prepend header file name prefix if it's not absolute.
167     if (llvm::sys::path::is_absolute(TargetAndDependents.first))
168       llvm::sys::path::native(TargetAndDependents.first, HeaderFileName);
169     else {
170       if (HeaderDirectory.size() != 0)
171         HeaderFileName = HeaderDirectory;
172       else
173         HeaderFileName = CurrentDirectory;
174       llvm::sys::path::append(HeaderFileName, TargetAndDependents.first);
175       llvm::sys::path::native(HeaderFileName);
176     }
177     // Handle optional dependencies.
178     DependentsVector Dependents;
179     SmallVector<StringRef, 4> DependentsList;
180     TargetAndDependents.second.split(DependentsList, " ", -1, false);
181     int Count = DependentsList.size();
182     for (int Index = 0; Index < Count; ++Index) {
183       SmallString<256> Dependent;
184       if (llvm::sys::path::is_absolute(DependentsList[Index]))
185         Dependent = DependentsList[Index];
186       else {
187         if (HeaderDirectory.size() != 0)
188           Dependent = HeaderDirectory;
189         else
190           Dependent = CurrentDirectory;
191         llvm::sys::path::append(Dependent, DependentsList[Index]);
192       }
193       llvm::sys::path::native(Dependent);
194       Dependents.push_back(getCanonicalPath(Dependent.str()));
195     }
196     // Get canonical form.
197     HeaderFileName = getCanonicalPath(HeaderFileName);
198     // Save the resulting header file path and dependencies.
199     HeaderFileNames.push_back(std::string(HeaderFileName.str()));
200     Dependencies[HeaderFileName.str()] = Dependents;
201   }
202   return std::error_code();
203 }
204 
205 // Load problem header list.
loadProblemHeaderList(llvm::StringRef InputPath)206 std::error_code ModularizeUtilities::loadProblemHeaderList(
207   llvm::StringRef InputPath) {
208 
209   // By default, use the path component of the list file name.
210   SmallString<256> HeaderDirectory(InputPath);
211   llvm::sys::path::remove_filename(HeaderDirectory);
212   SmallString<256> CurrentDirectory;
213   llvm::sys::fs::current_path(CurrentDirectory);
214 
215   // Get the prefix if we have one.
216   if (HeaderPrefix.size() != 0)
217     HeaderDirectory = HeaderPrefix;
218 
219   // Read the header list file into a buffer.
220   ErrorOr<std::unique_ptr<MemoryBuffer>> listBuffer =
221     MemoryBuffer::getFile(InputPath);
222   if (std::error_code EC = listBuffer.getError())
223     return EC;
224 
225   // Parse the header list into strings.
226   SmallVector<StringRef, 32> Strings;
227   listBuffer.get()->getBuffer().split(Strings, "\n", -1, false);
228 
229   // Collect the header file names from the string list.
230   for (SmallVectorImpl<StringRef>::iterator I = Strings.begin(),
231     E = Strings.end();
232     I != E; ++I) {
233     StringRef Line = I->trim();
234     // Ignore comments and empty lines.
235     if (Line.empty() || (Line[0] == '#'))
236       continue;
237     SmallString<256> HeaderFileName;
238     // Prepend header file name prefix if it's not absolute.
239     if (llvm::sys::path::is_absolute(Line))
240       llvm::sys::path::native(Line, HeaderFileName);
241     else {
242       if (HeaderDirectory.size() != 0)
243         HeaderFileName = HeaderDirectory;
244       else
245         HeaderFileName = CurrentDirectory;
246       llvm::sys::path::append(HeaderFileName, Line);
247       llvm::sys::path::native(HeaderFileName);
248     }
249     // Get canonical form.
250     HeaderFileName = getCanonicalPath(HeaderFileName);
251     // Save the resulting header file path.
252     ProblemFileNames.push_back(std::string(HeaderFileName.str()));
253   }
254   return std::error_code();
255 }
256 
257 // Load single module map and extract header file list.
loadModuleMap(llvm::StringRef InputPath)258 std::error_code ModularizeUtilities::loadModuleMap(
259     llvm::StringRef InputPath) {
260   // Get file entry for module.modulemap file.
261   auto ModuleMapEntryOrErr =
262     SourceMgr->getFileManager().getFile(InputPath);
263 
264   // return error if not found.
265   if (!ModuleMapEntryOrErr) {
266     llvm::errs() << "error: File \"" << InputPath << "\" not found.\n";
267     return ModuleMapEntryOrErr.getError();
268   }
269   const FileEntry *ModuleMapEntry = *ModuleMapEntryOrErr;
270 
271   // Because the module map parser uses a ForwardingDiagnosticConsumer,
272   // which doesn't forward the BeginSourceFile call, we do it explicitly here.
273   DC.BeginSourceFile(*LangOpts, nullptr);
274 
275   // Figure out the home directory for the module map file.
276   const DirectoryEntry *Dir = ModuleMapEntry->getDir();
277   StringRef DirName(Dir->getName());
278   if (llvm::sys::path::filename(DirName) == "Modules") {
279     DirName = llvm::sys::path::parent_path(DirName);
280     if (DirName.endswith(".framework")) {
281       if (auto DirEntry = FileMgr->getDirectory(DirName))
282         Dir = *DirEntry;
283       else
284         Dir = nullptr;
285     }
286     // FIXME: This assert can fail if there's a race between the above check
287     // and the removal of the directory.
288     assert(Dir && "parent must exist");
289   }
290 
291   std::unique_ptr<ModuleMap> ModMap;
292   ModMap.reset(new ModuleMap(*SourceMgr, *Diagnostics, *LangOpts,
293     Target.get(), *HeaderInfo));
294 
295   // Parse module.modulemap file into module map.
296   if (ModMap->parseModuleMapFile(ModuleMapEntry, false, Dir)) {
297     return std::error_code(1, std::generic_category());
298   }
299 
300   // Do matching end call.
301   DC.EndSourceFile();
302 
303   // Reset missing header count.
304   MissingHeaderCount = 0;
305 
306   if (!collectModuleMapHeaders(ModMap.get()))
307     return std::error_code(1, std::generic_category());
308 
309   // Save module map.
310   ModuleMaps.push_back(std::move(ModMap));
311 
312   // Indicate we are using module maps.
313   HasModuleMap = true;
314 
315   // Return code of 1 for missing headers.
316   if (MissingHeaderCount)
317     return std::error_code(1, std::generic_category());
318 
319   return std::error_code();
320 }
321 
322 // Collect module map headers.
323 // Walks the modules and collects referenced headers into
324 // HeaderFileNames.
collectModuleMapHeaders(clang::ModuleMap * ModMap)325 bool ModularizeUtilities::collectModuleMapHeaders(clang::ModuleMap *ModMap) {
326   for (ModuleMap::module_iterator I = ModMap->module_begin(),
327     E = ModMap->module_end();
328     I != E; ++I) {
329     if (!collectModuleHeaders(*I->second))
330       return false;
331   }
332   return true;
333 }
334 
335 // Collect referenced headers from one module.
336 // Collects the headers referenced in the given module into
337 // HeaderFileNames.
collectModuleHeaders(const clang::Module & Mod)338 bool ModularizeUtilities::collectModuleHeaders(const clang::Module &Mod) {
339 
340   // Ignore explicit modules because they often have dependencies
341   // we can't know.
342   if (Mod.IsExplicit)
343     return true;
344 
345   // Treat headers in umbrella directory as dependencies.
346   DependentsVector UmbrellaDependents;
347 
348   // Recursively do submodules.
349   for (auto MI = Mod.submodule_begin(), MIEnd = Mod.submodule_end();
350        MI != MIEnd; ++MI)
351     collectModuleHeaders(**MI);
352 
353   if (const FileEntry *UmbrellaHeader = Mod.getUmbrellaHeader().Entry) {
354     std::string HeaderPath = getCanonicalPath(UmbrellaHeader->getName());
355     // Collect umbrella header.
356     HeaderFileNames.push_back(HeaderPath);
357 
358     // FUTURE: When needed, umbrella header header collection goes here.
359   }
360   else if (const DirectoryEntry *UmbrellaDir = Mod.getUmbrellaDir().Entry) {
361     // If there normal headers, assume these are umbrellas and skip collection.
362     if (Mod.Headers->size() == 0) {
363       // Collect headers in umbrella directory.
364       if (!collectUmbrellaHeaders(UmbrellaDir->getName(), UmbrellaDependents))
365         return false;
366     }
367   }
368 
369   // We ignore HK_Private, HK_Textual, HK_PrivateTextual, and HK_Excluded,
370   // assuming they are marked as such either because of unsuitability for
371   // modules or because they are meant to be included by another header,
372   // and thus should be ignored by modularize.
373 
374   int NormalHeaderCount = Mod.Headers[clang::Module::HK_Normal].size();
375 
376   for (int Index = 0; Index < NormalHeaderCount; ++Index) {
377     DependentsVector NormalDependents;
378     // Collect normal header.
379     const clang::Module::Header &Header(
380       Mod.Headers[clang::Module::HK_Normal][Index]);
381     std::string HeaderPath = getCanonicalPath(Header.Entry->getName());
382     HeaderFileNames.push_back(HeaderPath);
383   }
384 
385   int MissingCountThisModule = Mod.MissingHeaders.size();
386 
387   for (int Index = 0; Index < MissingCountThisModule; ++Index) {
388     std::string MissingFile = Mod.MissingHeaders[Index].FileName;
389     SourceLocation Loc = Mod.MissingHeaders[Index].FileNameLoc;
390     errs() << Loc.printToString(*SourceMgr)
391       << ": error : Header not found: " << MissingFile << "\n";
392   }
393 
394   MissingHeaderCount += MissingCountThisModule;
395 
396   return true;
397 }
398 
399 // Collect headers from an umbrella directory.
collectUmbrellaHeaders(StringRef UmbrellaDirName,DependentsVector & Dependents)400 bool ModularizeUtilities::collectUmbrellaHeaders(StringRef UmbrellaDirName,
401   DependentsVector &Dependents) {
402   // Initialize directory name.
403   SmallString<256> Directory(UmbrellaDirName);
404   // Walk the directory.
405   std::error_code EC;
406   for (llvm::sys::fs::directory_iterator I(Directory.str(), EC), E; I != E;
407     I.increment(EC)) {
408     if (EC)
409       return false;
410     std::string File(I->path());
411     llvm::ErrorOr<llvm::sys::fs::basic_file_status> Status = I->status();
412     if (!Status)
413       return false;
414     llvm::sys::fs::file_type Type = Status->type();
415     // If the file is a directory, ignore the name and recurse.
416     if (Type == llvm::sys::fs::file_type::directory_file) {
417       if (!collectUmbrellaHeaders(File, Dependents))
418         return false;
419       continue;
420     }
421     // If the file does not have a common header extension, ignore it.
422     if (!isHeader(File))
423       continue;
424     // Save header name.
425     std::string HeaderPath = getCanonicalPath(File);
426     Dependents.push_back(HeaderPath);
427   }
428   return true;
429 }
430 
431 // Replace .. embedded in path for purposes of having
432 // a canonical path.
replaceDotDot(StringRef Path)433 static std::string replaceDotDot(StringRef Path) {
434   SmallString<128> Buffer;
435   llvm::sys::path::const_iterator B = llvm::sys::path::begin(Path),
436     E = llvm::sys::path::end(Path);
437   while (B != E) {
438     if (B->compare(".") == 0) {
439     }
440     else if (B->compare("..") == 0)
441       llvm::sys::path::remove_filename(Buffer);
442     else
443       llvm::sys::path::append(Buffer, *B);
444     ++B;
445   }
446   if (Path.endswith("/") || Path.endswith("\\"))
447     Buffer.append(1, Path.back());
448   return Buffer.c_str();
449 }
450 
451 // Convert header path to canonical form.
452 // The canonical form is basically just use forward slashes, and remove "./".
453 // \param FilePath The file path, relative to the module map directory.
454 // \returns The file path in canonical form.
getCanonicalPath(StringRef FilePath)455 std::string ModularizeUtilities::getCanonicalPath(StringRef FilePath) {
456   std::string Tmp(replaceDotDot(FilePath));
457   std::replace(Tmp.begin(), Tmp.end(), '\\', '/');
458   StringRef Tmp2(Tmp);
459   if (Tmp2.startswith("./"))
460     Tmp = std::string(Tmp2.substr(2));
461   return Tmp;
462 }
463 
464 // Check for header file extension.
465 // If the file extension is .h, .inc, or missing, it's
466 // assumed to be a header.
467 // \param FileName The file name.  Must not be a directory.
468 // \returns true if it has a header extension or no extension.
isHeader(StringRef FileName)469 bool ModularizeUtilities::isHeader(StringRef FileName) {
470   StringRef Extension = llvm::sys::path::extension(FileName);
471   if (Extension.size() == 0)
472     return true;
473   if (Extension.equals_lower(".h"))
474     return true;
475   if (Extension.equals_lower(".inc"))
476     return true;
477   return false;
478 }
479 
480 // Get directory path component from file path.
481 // \returns the component of the given path, which will be
482 // relative if the given path is relative, absolute if the
483 // given path is absolute, or "." if the path has no leading
484 // path component.
getDirectoryFromPath(StringRef Path)485 std::string ModularizeUtilities::getDirectoryFromPath(StringRef Path) {
486   SmallString<256> Directory(Path);
487   sys::path::remove_filename(Directory);
488   if (Directory.size() == 0)
489     return ".";
490   return std::string(Directory.str());
491 }
492 
493 // Add unique problem file.
494 // Also standardizes the path.
addUniqueProblemFile(std::string FilePath)495 void ModularizeUtilities::addUniqueProblemFile(std::string FilePath) {
496   FilePath = getCanonicalPath(FilePath);
497   // Don't add if already present.
498   for(auto &TestFilePath : ProblemFileNames) {
499     if (TestFilePath == FilePath)
500       return;
501   }
502   ProblemFileNames.push_back(FilePath);
503 }
504 
505 // Add file with no compile errors.
506 // Also standardizes the path.
addNoCompileErrorsFile(std::string FilePath)507 void ModularizeUtilities::addNoCompileErrorsFile(std::string FilePath) {
508   FilePath = getCanonicalPath(FilePath);
509   GoodFileNames.push_back(FilePath);
510 }
511 
512 // List problem files.
displayProblemFiles()513 void ModularizeUtilities::displayProblemFiles() {
514   errs() << "\nThese are the files with possible errors:\n\n";
515   for (auto &ProblemFile : ProblemFileNames) {
516     errs() << ProblemFile << "\n";
517   }
518 }
519 
520 // List files with no problems.
displayGoodFiles()521 void ModularizeUtilities::displayGoodFiles() {
522   errs() << "\nThese are the files with no detected errors:\n\n";
523   for (auto &GoodFile : HeaderFileNames) {
524     bool Good = true;
525     for (auto &ProblemFile : ProblemFileNames) {
526       if (ProblemFile == GoodFile) {
527         Good = false;
528         break;
529       }
530     }
531     if (Good)
532       errs() << GoodFile << "\n";
533   }
534 }
535 
536 // List files with problem files commented out.
displayCombinedFiles()537 void ModularizeUtilities::displayCombinedFiles() {
538   errs() <<
539     "\nThese are the combined files, with problem files preceded by #:\n\n";
540   for (auto &File : HeaderFileNames) {
541     bool Good = true;
542     for (auto &ProblemFile : ProblemFileNames) {
543       if (ProblemFile == File) {
544         Good = false;
545         break;
546       }
547     }
548     errs() << (Good ? "" : "#") << File << "\n";
549   }
550 }
551