1 //===--- extra/modularize/ModularizeUtilities.cpp -------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a class for loading and validating a module map or
11 // header list by checking that all headers in the corresponding directories
12 // are accounted for.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "clang/Basic/SourceManager.h"
17 #include "clang/Driver/Options.h"
18 #include "clang/Frontend/CompilerInstance.h"
19 #include "clang/Frontend/FrontendActions.h"
20 #include "CoverageChecker.h"
21 #include "llvm/ADT/SmallString.h"
22 #include "llvm/Support/FileUtilities.h"
23 #include "llvm/Support/MemoryBuffer.h"
24 #include "llvm/Support/Path.h"
25 #include "llvm/Support/raw_ostream.h"
26 #include "ModularizeUtilities.h"
27 
28 using namespace clang;
29 using namespace llvm;
30 using namespace Modularize;
31 
32 namespace {
33 // Subclass TargetOptions so we can construct it inline with
34 // the minimal option, the triple.
35 class ModuleMapTargetOptions : public clang::TargetOptions {
36 public:
ModuleMapTargetOptions()37   ModuleMapTargetOptions() { Triple = llvm::sys::getDefaultTargetTriple(); }
38 };
39 } // namespace
40 
41 // ModularizeUtilities class implementation.
42 
43 // Constructor.
ModularizeUtilities(std::vector<std::string> & InputPaths,llvm::StringRef Prefix,llvm::StringRef ProblemFilesListPath)44 ModularizeUtilities::ModularizeUtilities(std::vector<std::string> &InputPaths,
45                                          llvm::StringRef Prefix,
46                                          llvm::StringRef ProblemFilesListPath)
47     : InputFilePaths(InputPaths), HeaderPrefix(Prefix),
48       ProblemFilesPath(ProblemFilesListPath), HasModuleMap(false),
49       MissingHeaderCount(0),
50       // Init clang stuff needed for loading the module map and preprocessing.
51       LangOpts(new LangOptions()), DiagIDs(new DiagnosticIDs()),
52       DiagnosticOpts(new DiagnosticOptions()),
53       DC(llvm::errs(), DiagnosticOpts.get()),
54       Diagnostics(
55           new DiagnosticsEngine(DiagIDs, DiagnosticOpts.get(), &DC, false)),
56       TargetOpts(new ModuleMapTargetOptions()),
57       Target(TargetInfo::CreateTargetInfo(*Diagnostics, TargetOpts)),
58       FileMgr(new FileManager(FileSystemOpts)),
59       SourceMgr(new SourceManager(*Diagnostics, *FileMgr, false)),
60       HeaderInfo(new HeaderSearch(std::make_shared<HeaderSearchOptions>(),
61                                   *SourceMgr, *Diagnostics, *LangOpts,
62                                   Target.get())) {}
63 
64 // Create instance of ModularizeUtilities, to simplify setting up
65 // subordinate objects.
createModularizeUtilities(std::vector<std::string> & InputPaths,llvm::StringRef Prefix,llvm::StringRef ProblemFilesListPath)66 ModularizeUtilities *ModularizeUtilities::createModularizeUtilities(
67     std::vector<std::string> &InputPaths, llvm::StringRef Prefix,
68     llvm::StringRef ProblemFilesListPath) {
69 
70   return new ModularizeUtilities(InputPaths, Prefix, ProblemFilesListPath);
71 }
72 
73 // Load all header lists and dependencies.
loadAllHeaderListsAndDependencies()74 std::error_code ModularizeUtilities::loadAllHeaderListsAndDependencies() {
75   // For each input file.
76   for (auto I = InputFilePaths.begin(), E = InputFilePaths.end(); I != E; ++I) {
77     llvm::StringRef InputPath = *I;
78     // If it's a module map.
79     if (InputPath.endswith(".modulemap")) {
80       // Load the module map.
81       if (std::error_code EC = loadModuleMap(InputPath))
82         return EC;
83     }
84     else {
85       // Else we assume it's a header list and load it.
86       if (std::error_code EC = loadSingleHeaderListsAndDependencies(InputPath)) {
87         errs() << "modularize: error: Unable to get header list '" << InputPath
88           << "': " << EC.message() << '\n';
89         return EC;
90       }
91     }
92   }
93   // If we have a problem files list.
94   if (ProblemFilesPath.size() != 0) {
95     // Load problem files list.
96     if (std::error_code EC = loadProblemHeaderList(ProblemFilesPath)) {
97       errs() << "modularize: error: Unable to get problem header list '" << ProblemFilesPath
98         << "': " << EC.message() << '\n';
99       return EC;
100     }
101   }
102   return std::error_code();
103 }
104 
105 // Do coverage checks.
106 // For each loaded module map, do header coverage check.
107 // Starting from the directory of the module.map file,
108 // Find all header files, optionally looking only at files
109 // covered by the include path options, and compare against
110 // the headers referenced by the module.map file.
111 // Display warnings for unaccounted-for header files.
112 // Returns 0 if there were no errors or warnings, 1 if there
113 // were warnings, 2 if any other problem, such as a bad
114 // module map path argument was specified.
doCoverageCheck(std::vector<std::string> & IncludePaths,llvm::ArrayRef<std::string> CommandLine)115 std::error_code ModularizeUtilities::doCoverageCheck(
116     std::vector<std::string> &IncludePaths,
117     llvm::ArrayRef<std::string> CommandLine) {
118   int ModuleMapCount = ModuleMaps.size();
119   int ModuleMapIndex;
120   std::error_code EC;
121   for (ModuleMapIndex = 0; ModuleMapIndex < ModuleMapCount; ++ModuleMapIndex) {
122     std::unique_ptr<clang::ModuleMap> &ModMap = ModuleMaps[ModuleMapIndex];
123     auto Checker = CoverageChecker::createCoverageChecker(
124         InputFilePaths[ModuleMapIndex], IncludePaths, CommandLine,
125         ModMap.get());
126     std::error_code LocalEC = Checker->doChecks();
127     if (LocalEC.value() > 0)
128       EC = LocalEC;
129   }
130   return EC;
131 }
132 
133 // Load single header list and dependencies.
loadSingleHeaderListsAndDependencies(llvm::StringRef InputPath)134 std::error_code ModularizeUtilities::loadSingleHeaderListsAndDependencies(
135     llvm::StringRef InputPath) {
136 
137   // By default, use the path component of the list file name.
138   SmallString<256> HeaderDirectory(InputPath);
139   llvm::sys::path::remove_filename(HeaderDirectory);
140   SmallString<256> CurrentDirectory;
141   llvm::sys::fs::current_path(CurrentDirectory);
142 
143   // Get the prefix if we have one.
144   if (HeaderPrefix.size() != 0)
145     HeaderDirectory = HeaderPrefix;
146 
147   // Read the header list file into a buffer.
148   ErrorOr<std::unique_ptr<MemoryBuffer>> listBuffer =
149     MemoryBuffer::getFile(InputPath);
150   if (std::error_code EC = listBuffer.getError())
151     return EC;
152 
153   // Parse the header list into strings.
154   SmallVector<StringRef, 32> Strings;
155   listBuffer.get()->getBuffer().split(Strings, "\n", -1, false);
156 
157   // Collect the header file names from the string list.
158   for (SmallVectorImpl<StringRef>::iterator I = Strings.begin(),
159     E = Strings.end();
160     I != E; ++I) {
161     StringRef Line = I->trim();
162     // Ignore comments and empty lines.
163     if (Line.empty() || (Line[0] == '#'))
164       continue;
165     std::pair<StringRef, StringRef> TargetAndDependents = Line.split(':');
166     SmallString<256> HeaderFileName;
167     // Prepend header file name prefix if it's not absolute.
168     if (llvm::sys::path::is_absolute(TargetAndDependents.first))
169       llvm::sys::path::native(TargetAndDependents.first, HeaderFileName);
170     else {
171       if (HeaderDirectory.size() != 0)
172         HeaderFileName = HeaderDirectory;
173       else
174         HeaderFileName = CurrentDirectory;
175       llvm::sys::path::append(HeaderFileName, TargetAndDependents.first);
176       llvm::sys::path::native(HeaderFileName);
177     }
178     // Handle optional dependencies.
179     DependentsVector Dependents;
180     SmallVector<StringRef, 4> DependentsList;
181     TargetAndDependents.second.split(DependentsList, " ", -1, false);
182     int Count = DependentsList.size();
183     for (int Index = 0; Index < Count; ++Index) {
184       SmallString<256> Dependent;
185       if (llvm::sys::path::is_absolute(DependentsList[Index]))
186         Dependent = DependentsList[Index];
187       else {
188         if (HeaderDirectory.size() != 0)
189           Dependent = HeaderDirectory;
190         else
191           Dependent = CurrentDirectory;
192         llvm::sys::path::append(Dependent, DependentsList[Index]);
193       }
194       llvm::sys::path::native(Dependent);
195       Dependents.push_back(getCanonicalPath(Dependent.str()));
196     }
197     // Get canonical form.
198     HeaderFileName = getCanonicalPath(HeaderFileName);
199     // Save the resulting header file path and dependencies.
200     HeaderFileNames.push_back(HeaderFileName.str());
201     Dependencies[HeaderFileName.str()] = Dependents;
202   }
203   return std::error_code();
204 }
205 
206 // Load problem header list.
loadProblemHeaderList(llvm::StringRef InputPath)207 std::error_code ModularizeUtilities::loadProblemHeaderList(
208   llvm::StringRef InputPath) {
209 
210   // By default, use the path component of the list file name.
211   SmallString<256> HeaderDirectory(InputPath);
212   llvm::sys::path::remove_filename(HeaderDirectory);
213   SmallString<256> CurrentDirectory;
214   llvm::sys::fs::current_path(CurrentDirectory);
215 
216   // Get the prefix if we have one.
217   if (HeaderPrefix.size() != 0)
218     HeaderDirectory = HeaderPrefix;
219 
220   // Read the header list file into a buffer.
221   ErrorOr<std::unique_ptr<MemoryBuffer>> listBuffer =
222     MemoryBuffer::getFile(InputPath);
223   if (std::error_code EC = listBuffer.getError())
224     return EC;
225 
226   // Parse the header list into strings.
227   SmallVector<StringRef, 32> Strings;
228   listBuffer.get()->getBuffer().split(Strings, "\n", -1, false);
229 
230   // Collect the header file names from the string list.
231   for (SmallVectorImpl<StringRef>::iterator I = Strings.begin(),
232     E = Strings.end();
233     I != E; ++I) {
234     StringRef Line = I->trim();
235     // Ignore comments and empty lines.
236     if (Line.empty() || (Line[0] == '#'))
237       continue;
238     SmallString<256> HeaderFileName;
239     // Prepend header file name prefix if it's not absolute.
240     if (llvm::sys::path::is_absolute(Line))
241       llvm::sys::path::native(Line, HeaderFileName);
242     else {
243       if (HeaderDirectory.size() != 0)
244         HeaderFileName = HeaderDirectory;
245       else
246         HeaderFileName = CurrentDirectory;
247       llvm::sys::path::append(HeaderFileName, Line);
248       llvm::sys::path::native(HeaderFileName);
249     }
250     // Get canonical form.
251     HeaderFileName = getCanonicalPath(HeaderFileName);
252     // Save the resulting header file path.
253     ProblemFileNames.push_back(HeaderFileName.str());
254   }
255   return std::error_code();
256 }
257 
258 // Load single module map and extract header file list.
loadModuleMap(llvm::StringRef InputPath)259 std::error_code ModularizeUtilities::loadModuleMap(
260     llvm::StringRef InputPath) {
261   // Get file entry for module.modulemap file.
262   const FileEntry *ModuleMapEntry =
263     SourceMgr->getFileManager().getFile(InputPath);
264 
265   // return error if not found.
266   if (!ModuleMapEntry) {
267     llvm::errs() << "error: File \"" << InputPath << "\" not found.\n";
268     return std::error_code(1, std::generic_category());
269   }
270 
271   // Because the module map parser uses a ForwardingDiagnosticConsumer,
272   // which doesn't forward the BeginSourceFile call, we do it explicitly here.
273   DC.BeginSourceFile(*LangOpts, nullptr);
274 
275   // Figure out the home directory for the module map file.
276   const DirectoryEntry *Dir = ModuleMapEntry->getDir();
277   StringRef DirName(Dir->getName());
278   if (llvm::sys::path::filename(DirName) == "Modules") {
279     DirName = llvm::sys::path::parent_path(DirName);
280     if (DirName.endswith(".framework"))
281       Dir = FileMgr->getDirectory(DirName);
282     // FIXME: This assert can fail if there's a race between the above check
283     // and the removal of the directory.
284     assert(Dir && "parent must exist");
285   }
286 
287   std::unique_ptr<ModuleMap> ModMap;
288   ModMap.reset(new ModuleMap(*SourceMgr, *Diagnostics, *LangOpts,
289     Target.get(), *HeaderInfo));
290 
291   // Parse module.modulemap file into module map.
292   if (ModMap->parseModuleMapFile(ModuleMapEntry, false, Dir)) {
293     return std::error_code(1, std::generic_category());
294   }
295 
296   // Do matching end call.
297   DC.EndSourceFile();
298 
299   // Reset missing header count.
300   MissingHeaderCount = 0;
301 
302   if (!collectModuleMapHeaders(ModMap.get()))
303     return std::error_code(1, std::generic_category());
304 
305   // Save module map.
306   ModuleMaps.push_back(std::move(ModMap));
307 
308   // Indicate we are using module maps.
309   HasModuleMap = true;
310 
311   // Return code of 1 for missing headers.
312   if (MissingHeaderCount)
313     return std::error_code(1, std::generic_category());
314 
315   return std::error_code();
316 }
317 
318 // Collect module map headers.
319 // Walks the modules and collects referenced headers into
320 // HeaderFileNames.
collectModuleMapHeaders(clang::ModuleMap * ModMap)321 bool ModularizeUtilities::collectModuleMapHeaders(clang::ModuleMap *ModMap) {
322   for (ModuleMap::module_iterator I = ModMap->module_begin(),
323     E = ModMap->module_end();
324     I != E; ++I) {
325     if (!collectModuleHeaders(*I->second))
326       return false;
327   }
328   return true;
329 }
330 
331 // Collect referenced headers from one module.
332 // Collects the headers referenced in the given module into
333 // HeaderFileNames.
collectModuleHeaders(const clang::Module & Mod)334 bool ModularizeUtilities::collectModuleHeaders(const clang::Module &Mod) {
335 
336   // Ignore explicit modules because they often have dependencies
337   // we can't know.
338   if (Mod.IsExplicit)
339     return true;
340 
341   // Treat headers in umbrella directory as dependencies.
342   DependentsVector UmbrellaDependents;
343 
344   // Recursively do submodules.
345   for (auto MI = Mod.submodule_begin(), MIEnd = Mod.submodule_end();
346        MI != MIEnd; ++MI)
347     collectModuleHeaders(**MI);
348 
349   if (const FileEntry *UmbrellaHeader = Mod.getUmbrellaHeader().Entry) {
350     std::string HeaderPath = getCanonicalPath(UmbrellaHeader->getName());
351     // Collect umbrella header.
352     HeaderFileNames.push_back(HeaderPath);
353 
354     // FUTURE: When needed, umbrella header header collection goes here.
355   }
356   else if (const DirectoryEntry *UmbrellaDir = Mod.getUmbrellaDir().Entry) {
357     // If there normal headers, assume these are umbrellas and skip collection.
358     if (Mod.Headers->size() == 0) {
359       // Collect headers in umbrella directory.
360       if (!collectUmbrellaHeaders(UmbrellaDir->getName(), UmbrellaDependents))
361         return false;
362     }
363   }
364 
365   // We ignore HK_Private, HK_Textual, HK_PrivateTextual, and HK_Excluded,
366   // assuming they are marked as such either because of unsuitability for
367   // modules or because they are meant to be included by another header,
368   // and thus should be ignored by modularize.
369 
370   int NormalHeaderCount = Mod.Headers[clang::Module::HK_Normal].size();
371 
372   for (int Index = 0; Index < NormalHeaderCount; ++Index) {
373     DependentsVector NormalDependents;
374     // Collect normal header.
375     const clang::Module::Header &Header(
376       Mod.Headers[clang::Module::HK_Normal][Index]);
377     std::string HeaderPath = getCanonicalPath(Header.Entry->getName());
378     HeaderFileNames.push_back(HeaderPath);
379   }
380 
381   int MissingCountThisModule = Mod.MissingHeaders.size();
382 
383   for (int Index = 0; Index < MissingCountThisModule; ++Index) {
384     std::string MissingFile = Mod.MissingHeaders[Index].FileName;
385     SourceLocation Loc = Mod.MissingHeaders[Index].FileNameLoc;
386     errs() << Loc.printToString(*SourceMgr)
387       << ": error : Header not found: " << MissingFile << "\n";
388   }
389 
390   MissingHeaderCount += MissingCountThisModule;
391 
392   return true;
393 }
394 
395 // Collect headers from an umbrella directory.
collectUmbrellaHeaders(StringRef UmbrellaDirName,DependentsVector & Dependents)396 bool ModularizeUtilities::collectUmbrellaHeaders(StringRef UmbrellaDirName,
397   DependentsVector &Dependents) {
398   // Initialize directory name.
399   SmallString<256> Directory(UmbrellaDirName);
400   // Walk the directory.
401   std::error_code EC;
402   for (llvm::sys::fs::directory_iterator I(Directory.str(), EC), E; I != E;
403     I.increment(EC)) {
404     if (EC)
405       return false;
406     std::string File(I->path());
407     llvm::ErrorOr<llvm::sys::fs::basic_file_status> Status = I->status();
408     if (!Status)
409       return false;
410     llvm::sys::fs::file_type Type = Status->type();
411     // If the file is a directory, ignore the name and recurse.
412     if (Type == llvm::sys::fs::file_type::directory_file) {
413       if (!collectUmbrellaHeaders(File, Dependents))
414         return false;
415       continue;
416     }
417     // If the file does not have a common header extension, ignore it.
418     if (!isHeader(File))
419       continue;
420     // Save header name.
421     std::string HeaderPath = getCanonicalPath(File);
422     Dependents.push_back(HeaderPath);
423   }
424   return true;
425 }
426 
427 // Replace .. embedded in path for purposes of having
428 // a canonical path.
replaceDotDot(StringRef Path)429 static std::string replaceDotDot(StringRef Path) {
430   SmallString<128> Buffer;
431   llvm::sys::path::const_iterator B = llvm::sys::path::begin(Path),
432     E = llvm::sys::path::end(Path);
433   while (B != E) {
434     if (B->compare(".") == 0) {
435     }
436     else if (B->compare("..") == 0)
437       llvm::sys::path::remove_filename(Buffer);
438     else
439       llvm::sys::path::append(Buffer, *B);
440     ++B;
441   }
442   if (Path.endswith("/") || Path.endswith("\\"))
443     Buffer.append(1, Path.back());
444   return Buffer.c_str();
445 }
446 
447 // Convert header path to canonical form.
448 // The canonical form is basically just use forward slashes, and remove "./".
449 // \param FilePath The file path, relative to the module map directory.
450 // \returns The file path in canonical form.
getCanonicalPath(StringRef FilePath)451 std::string ModularizeUtilities::getCanonicalPath(StringRef FilePath) {
452   std::string Tmp(replaceDotDot(FilePath));
453   std::replace(Tmp.begin(), Tmp.end(), '\\', '/');
454   StringRef Tmp2(Tmp);
455   if (Tmp2.startswith("./"))
456     Tmp = Tmp2.substr(2);
457   return Tmp;
458 }
459 
460 // Check for header file extension.
461 // If the file extension is .h, .inc, or missing, it's
462 // assumed to be a header.
463 // \param FileName The file name.  Must not be a directory.
464 // \returns true if it has a header extension or no extension.
isHeader(StringRef FileName)465 bool ModularizeUtilities::isHeader(StringRef FileName) {
466   StringRef Extension = llvm::sys::path::extension(FileName);
467   if (Extension.size() == 0)
468     return true;
469   if (Extension.equals_lower(".h"))
470     return true;
471   if (Extension.equals_lower(".inc"))
472     return true;
473   return false;
474 }
475 
476 // Get directory path component from file path.
477 // \returns the component of the given path, which will be
478 // relative if the given path is relative, absolute if the
479 // given path is absolute, or "." if the path has no leading
480 // path component.
getDirectoryFromPath(StringRef Path)481 std::string ModularizeUtilities::getDirectoryFromPath(StringRef Path) {
482   SmallString<256> Directory(Path);
483   sys::path::remove_filename(Directory);
484   if (Directory.size() == 0)
485     return ".";
486   return Directory.str();
487 }
488 
489 // Add unique problem file.
490 // Also standardizes the path.
addUniqueProblemFile(std::string FilePath)491 void ModularizeUtilities::addUniqueProblemFile(std::string FilePath) {
492   FilePath = getCanonicalPath(FilePath);
493   // Don't add if already present.
494   for(auto &TestFilePath : ProblemFileNames) {
495     if (TestFilePath == FilePath)
496       return;
497   }
498   ProblemFileNames.push_back(FilePath);
499 }
500 
501 // Add file with no compile errors.
502 // Also standardizes the path.
addNoCompileErrorsFile(std::string FilePath)503 void ModularizeUtilities::addNoCompileErrorsFile(std::string FilePath) {
504   FilePath = getCanonicalPath(FilePath);
505   GoodFileNames.push_back(FilePath);
506 }
507 
508 // List problem files.
displayProblemFiles()509 void ModularizeUtilities::displayProblemFiles() {
510   errs() << "\nThese are the files with possible errors:\n\n";
511   for (auto &ProblemFile : ProblemFileNames) {
512     errs() << ProblemFile << "\n";
513   }
514 }
515 
516 // List files with no problems.
displayGoodFiles()517 void ModularizeUtilities::displayGoodFiles() {
518   errs() << "\nThese are the files with no detected errors:\n\n";
519   for (auto &GoodFile : HeaderFileNames) {
520     bool Good = true;
521     for (auto &ProblemFile : ProblemFileNames) {
522       if (ProblemFile == GoodFile) {
523         Good = false;
524         break;
525       }
526     }
527     if (Good)
528       errs() << GoodFile << "\n";
529   }
530 }
531 
532 // List files with problem files commented out.
displayCombinedFiles()533 void ModularizeUtilities::displayCombinedFiles() {
534   errs() <<
535     "\nThese are the combined files, with problem files preceded by #:\n\n";
536   for (auto &File : HeaderFileNames) {
537     bool Good = true;
538     for (auto &ProblemFile : ProblemFileNames) {
539       if (ProblemFile == File) {
540         Good = false;
541         break;
542       }
543     }
544     errs() << (Good ? "" : "#") << File << "\n";
545   }
546 }
547