1 //===--- extra/modularize/ModularizeUtilities.cpp -------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a class for loading and validating a module map or
11 // header list by checking that all headers in the corresponding directories
12 // are accounted for.
13 //
14 //===----------------------------------------------------------------------===//
15
16 #include "clang/Basic/SourceManager.h"
17 #include "clang/Driver/Options.h"
18 #include "clang/Frontend/CompilerInstance.h"
19 #include "clang/Frontend/FrontendActions.h"
20 #include "CoverageChecker.h"
21 #include "llvm/ADT/SmallString.h"
22 #include "llvm/Support/FileUtilities.h"
23 #include "llvm/Support/MemoryBuffer.h"
24 #include "llvm/Support/Path.h"
25 #include "llvm/Support/raw_ostream.h"
26 #include "ModularizeUtilities.h"
27
28 using namespace clang;
29 using namespace llvm;
30 using namespace Modularize;
31
32 namespace {
33 // Subclass TargetOptions so we can construct it inline with
34 // the minimal option, the triple.
35 class ModuleMapTargetOptions : public clang::TargetOptions {
36 public:
ModuleMapTargetOptions()37 ModuleMapTargetOptions() { Triple = llvm::sys::getDefaultTargetTriple(); }
38 };
39 } // namespace
40
41 // ModularizeUtilities class implementation.
42
43 // Constructor.
ModularizeUtilities(std::vector<std::string> & InputPaths,llvm::StringRef Prefix,llvm::StringRef ProblemFilesListPath)44 ModularizeUtilities::ModularizeUtilities(std::vector<std::string> &InputPaths,
45 llvm::StringRef Prefix,
46 llvm::StringRef ProblemFilesListPath)
47 : InputFilePaths(InputPaths), HeaderPrefix(Prefix),
48 ProblemFilesPath(ProblemFilesListPath), HasModuleMap(false),
49 MissingHeaderCount(0),
50 // Init clang stuff needed for loading the module map and preprocessing.
51 LangOpts(new LangOptions()), DiagIDs(new DiagnosticIDs()),
52 DiagnosticOpts(new DiagnosticOptions()),
53 DC(llvm::errs(), DiagnosticOpts.get()),
54 Diagnostics(
55 new DiagnosticsEngine(DiagIDs, DiagnosticOpts.get(), &DC, false)),
56 TargetOpts(new ModuleMapTargetOptions()),
57 Target(TargetInfo::CreateTargetInfo(*Diagnostics, TargetOpts)),
58 FileMgr(new FileManager(FileSystemOpts)),
59 SourceMgr(new SourceManager(*Diagnostics, *FileMgr, false)),
60 HeaderInfo(new HeaderSearch(std::make_shared<HeaderSearchOptions>(),
61 *SourceMgr, *Diagnostics, *LangOpts,
62 Target.get())) {}
63
64 // Create instance of ModularizeUtilities, to simplify setting up
65 // subordinate objects.
createModularizeUtilities(std::vector<std::string> & InputPaths,llvm::StringRef Prefix,llvm::StringRef ProblemFilesListPath)66 ModularizeUtilities *ModularizeUtilities::createModularizeUtilities(
67 std::vector<std::string> &InputPaths, llvm::StringRef Prefix,
68 llvm::StringRef ProblemFilesListPath) {
69
70 return new ModularizeUtilities(InputPaths, Prefix, ProblemFilesListPath);
71 }
72
73 // Load all header lists and dependencies.
loadAllHeaderListsAndDependencies()74 std::error_code ModularizeUtilities::loadAllHeaderListsAndDependencies() {
75 // For each input file.
76 for (auto I = InputFilePaths.begin(), E = InputFilePaths.end(); I != E; ++I) {
77 llvm::StringRef InputPath = *I;
78 // If it's a module map.
79 if (InputPath.endswith(".modulemap")) {
80 // Load the module map.
81 if (std::error_code EC = loadModuleMap(InputPath))
82 return EC;
83 }
84 else {
85 // Else we assume it's a header list and load it.
86 if (std::error_code EC = loadSingleHeaderListsAndDependencies(InputPath)) {
87 errs() << "modularize: error: Unable to get header list '" << InputPath
88 << "': " << EC.message() << '\n';
89 return EC;
90 }
91 }
92 }
93 // If we have a problem files list.
94 if (ProblemFilesPath.size() != 0) {
95 // Load problem files list.
96 if (std::error_code EC = loadProblemHeaderList(ProblemFilesPath)) {
97 errs() << "modularize: error: Unable to get problem header list '" << ProblemFilesPath
98 << "': " << EC.message() << '\n';
99 return EC;
100 }
101 }
102 return std::error_code();
103 }
104
105 // Do coverage checks.
106 // For each loaded module map, do header coverage check.
107 // Starting from the directory of the module.map file,
108 // Find all header files, optionally looking only at files
109 // covered by the include path options, and compare against
110 // the headers referenced by the module.map file.
111 // Display warnings for unaccounted-for header files.
112 // Returns 0 if there were no errors or warnings, 1 if there
113 // were warnings, 2 if any other problem, such as a bad
114 // module map path argument was specified.
doCoverageCheck(std::vector<std::string> & IncludePaths,llvm::ArrayRef<std::string> CommandLine)115 std::error_code ModularizeUtilities::doCoverageCheck(
116 std::vector<std::string> &IncludePaths,
117 llvm::ArrayRef<std::string> CommandLine) {
118 int ModuleMapCount = ModuleMaps.size();
119 int ModuleMapIndex;
120 std::error_code EC;
121 for (ModuleMapIndex = 0; ModuleMapIndex < ModuleMapCount; ++ModuleMapIndex) {
122 std::unique_ptr<clang::ModuleMap> &ModMap = ModuleMaps[ModuleMapIndex];
123 auto Checker = CoverageChecker::createCoverageChecker(
124 InputFilePaths[ModuleMapIndex], IncludePaths, CommandLine,
125 ModMap.get());
126 std::error_code LocalEC = Checker->doChecks();
127 if (LocalEC.value() > 0)
128 EC = LocalEC;
129 }
130 return EC;
131 }
132
133 // Load single header list and dependencies.
loadSingleHeaderListsAndDependencies(llvm::StringRef InputPath)134 std::error_code ModularizeUtilities::loadSingleHeaderListsAndDependencies(
135 llvm::StringRef InputPath) {
136
137 // By default, use the path component of the list file name.
138 SmallString<256> HeaderDirectory(InputPath);
139 llvm::sys::path::remove_filename(HeaderDirectory);
140 SmallString<256> CurrentDirectory;
141 llvm::sys::fs::current_path(CurrentDirectory);
142
143 // Get the prefix if we have one.
144 if (HeaderPrefix.size() != 0)
145 HeaderDirectory = HeaderPrefix;
146
147 // Read the header list file into a buffer.
148 ErrorOr<std::unique_ptr<MemoryBuffer>> listBuffer =
149 MemoryBuffer::getFile(InputPath);
150 if (std::error_code EC = listBuffer.getError())
151 return EC;
152
153 // Parse the header list into strings.
154 SmallVector<StringRef, 32> Strings;
155 listBuffer.get()->getBuffer().split(Strings, "\n", -1, false);
156
157 // Collect the header file names from the string list.
158 for (SmallVectorImpl<StringRef>::iterator I = Strings.begin(),
159 E = Strings.end();
160 I != E; ++I) {
161 StringRef Line = I->trim();
162 // Ignore comments and empty lines.
163 if (Line.empty() || (Line[0] == '#'))
164 continue;
165 std::pair<StringRef, StringRef> TargetAndDependents = Line.split(':');
166 SmallString<256> HeaderFileName;
167 // Prepend header file name prefix if it's not absolute.
168 if (llvm::sys::path::is_absolute(TargetAndDependents.first))
169 llvm::sys::path::native(TargetAndDependents.first, HeaderFileName);
170 else {
171 if (HeaderDirectory.size() != 0)
172 HeaderFileName = HeaderDirectory;
173 else
174 HeaderFileName = CurrentDirectory;
175 llvm::sys::path::append(HeaderFileName, TargetAndDependents.first);
176 llvm::sys::path::native(HeaderFileName);
177 }
178 // Handle optional dependencies.
179 DependentsVector Dependents;
180 SmallVector<StringRef, 4> DependentsList;
181 TargetAndDependents.second.split(DependentsList, " ", -1, false);
182 int Count = DependentsList.size();
183 for (int Index = 0; Index < Count; ++Index) {
184 SmallString<256> Dependent;
185 if (llvm::sys::path::is_absolute(DependentsList[Index]))
186 Dependent = DependentsList[Index];
187 else {
188 if (HeaderDirectory.size() != 0)
189 Dependent = HeaderDirectory;
190 else
191 Dependent = CurrentDirectory;
192 llvm::sys::path::append(Dependent, DependentsList[Index]);
193 }
194 llvm::sys::path::native(Dependent);
195 Dependents.push_back(getCanonicalPath(Dependent.str()));
196 }
197 // Get canonical form.
198 HeaderFileName = getCanonicalPath(HeaderFileName);
199 // Save the resulting header file path and dependencies.
200 HeaderFileNames.push_back(HeaderFileName.str());
201 Dependencies[HeaderFileName.str()] = Dependents;
202 }
203 return std::error_code();
204 }
205
206 // Load problem header list.
loadProblemHeaderList(llvm::StringRef InputPath)207 std::error_code ModularizeUtilities::loadProblemHeaderList(
208 llvm::StringRef InputPath) {
209
210 // By default, use the path component of the list file name.
211 SmallString<256> HeaderDirectory(InputPath);
212 llvm::sys::path::remove_filename(HeaderDirectory);
213 SmallString<256> CurrentDirectory;
214 llvm::sys::fs::current_path(CurrentDirectory);
215
216 // Get the prefix if we have one.
217 if (HeaderPrefix.size() != 0)
218 HeaderDirectory = HeaderPrefix;
219
220 // Read the header list file into a buffer.
221 ErrorOr<std::unique_ptr<MemoryBuffer>> listBuffer =
222 MemoryBuffer::getFile(InputPath);
223 if (std::error_code EC = listBuffer.getError())
224 return EC;
225
226 // Parse the header list into strings.
227 SmallVector<StringRef, 32> Strings;
228 listBuffer.get()->getBuffer().split(Strings, "\n", -1, false);
229
230 // Collect the header file names from the string list.
231 for (SmallVectorImpl<StringRef>::iterator I = Strings.begin(),
232 E = Strings.end();
233 I != E; ++I) {
234 StringRef Line = I->trim();
235 // Ignore comments and empty lines.
236 if (Line.empty() || (Line[0] == '#'))
237 continue;
238 SmallString<256> HeaderFileName;
239 // Prepend header file name prefix if it's not absolute.
240 if (llvm::sys::path::is_absolute(Line))
241 llvm::sys::path::native(Line, HeaderFileName);
242 else {
243 if (HeaderDirectory.size() != 0)
244 HeaderFileName = HeaderDirectory;
245 else
246 HeaderFileName = CurrentDirectory;
247 llvm::sys::path::append(HeaderFileName, Line);
248 llvm::sys::path::native(HeaderFileName);
249 }
250 // Get canonical form.
251 HeaderFileName = getCanonicalPath(HeaderFileName);
252 // Save the resulting header file path.
253 ProblemFileNames.push_back(HeaderFileName.str());
254 }
255 return std::error_code();
256 }
257
258 // Load single module map and extract header file list.
loadModuleMap(llvm::StringRef InputPath)259 std::error_code ModularizeUtilities::loadModuleMap(
260 llvm::StringRef InputPath) {
261 // Get file entry for module.modulemap file.
262 const FileEntry *ModuleMapEntry =
263 SourceMgr->getFileManager().getFile(InputPath);
264
265 // return error if not found.
266 if (!ModuleMapEntry) {
267 llvm::errs() << "error: File \"" << InputPath << "\" not found.\n";
268 return std::error_code(1, std::generic_category());
269 }
270
271 // Because the module map parser uses a ForwardingDiagnosticConsumer,
272 // which doesn't forward the BeginSourceFile call, we do it explicitly here.
273 DC.BeginSourceFile(*LangOpts, nullptr);
274
275 // Figure out the home directory for the module map file.
276 const DirectoryEntry *Dir = ModuleMapEntry->getDir();
277 StringRef DirName(Dir->getName());
278 if (llvm::sys::path::filename(DirName) == "Modules") {
279 DirName = llvm::sys::path::parent_path(DirName);
280 if (DirName.endswith(".framework"))
281 Dir = FileMgr->getDirectory(DirName);
282 // FIXME: This assert can fail if there's a race between the above check
283 // and the removal of the directory.
284 assert(Dir && "parent must exist");
285 }
286
287 std::unique_ptr<ModuleMap> ModMap;
288 ModMap.reset(new ModuleMap(*SourceMgr, *Diagnostics, *LangOpts,
289 Target.get(), *HeaderInfo));
290
291 // Parse module.modulemap file into module map.
292 if (ModMap->parseModuleMapFile(ModuleMapEntry, false, Dir)) {
293 return std::error_code(1, std::generic_category());
294 }
295
296 // Do matching end call.
297 DC.EndSourceFile();
298
299 // Reset missing header count.
300 MissingHeaderCount = 0;
301
302 if (!collectModuleMapHeaders(ModMap.get()))
303 return std::error_code(1, std::generic_category());
304
305 // Save module map.
306 ModuleMaps.push_back(std::move(ModMap));
307
308 // Indicate we are using module maps.
309 HasModuleMap = true;
310
311 // Return code of 1 for missing headers.
312 if (MissingHeaderCount)
313 return std::error_code(1, std::generic_category());
314
315 return std::error_code();
316 }
317
318 // Collect module map headers.
319 // Walks the modules and collects referenced headers into
320 // HeaderFileNames.
collectModuleMapHeaders(clang::ModuleMap * ModMap)321 bool ModularizeUtilities::collectModuleMapHeaders(clang::ModuleMap *ModMap) {
322 for (ModuleMap::module_iterator I = ModMap->module_begin(),
323 E = ModMap->module_end();
324 I != E; ++I) {
325 if (!collectModuleHeaders(*I->second))
326 return false;
327 }
328 return true;
329 }
330
331 // Collect referenced headers from one module.
332 // Collects the headers referenced in the given module into
333 // HeaderFileNames.
collectModuleHeaders(const clang::Module & Mod)334 bool ModularizeUtilities::collectModuleHeaders(const clang::Module &Mod) {
335
336 // Ignore explicit modules because they often have dependencies
337 // we can't know.
338 if (Mod.IsExplicit)
339 return true;
340
341 // Treat headers in umbrella directory as dependencies.
342 DependentsVector UmbrellaDependents;
343
344 // Recursively do submodules.
345 for (auto MI = Mod.submodule_begin(), MIEnd = Mod.submodule_end();
346 MI != MIEnd; ++MI)
347 collectModuleHeaders(**MI);
348
349 if (const FileEntry *UmbrellaHeader = Mod.getUmbrellaHeader().Entry) {
350 std::string HeaderPath = getCanonicalPath(UmbrellaHeader->getName());
351 // Collect umbrella header.
352 HeaderFileNames.push_back(HeaderPath);
353
354 // FUTURE: When needed, umbrella header header collection goes here.
355 }
356 else if (const DirectoryEntry *UmbrellaDir = Mod.getUmbrellaDir().Entry) {
357 // If there normal headers, assume these are umbrellas and skip collection.
358 if (Mod.Headers->size() == 0) {
359 // Collect headers in umbrella directory.
360 if (!collectUmbrellaHeaders(UmbrellaDir->getName(), UmbrellaDependents))
361 return false;
362 }
363 }
364
365 // We ignore HK_Private, HK_Textual, HK_PrivateTextual, and HK_Excluded,
366 // assuming they are marked as such either because of unsuitability for
367 // modules or because they are meant to be included by another header,
368 // and thus should be ignored by modularize.
369
370 int NormalHeaderCount = Mod.Headers[clang::Module::HK_Normal].size();
371
372 for (int Index = 0; Index < NormalHeaderCount; ++Index) {
373 DependentsVector NormalDependents;
374 // Collect normal header.
375 const clang::Module::Header &Header(
376 Mod.Headers[clang::Module::HK_Normal][Index]);
377 std::string HeaderPath = getCanonicalPath(Header.Entry->getName());
378 HeaderFileNames.push_back(HeaderPath);
379 }
380
381 int MissingCountThisModule = Mod.MissingHeaders.size();
382
383 for (int Index = 0; Index < MissingCountThisModule; ++Index) {
384 std::string MissingFile = Mod.MissingHeaders[Index].FileName;
385 SourceLocation Loc = Mod.MissingHeaders[Index].FileNameLoc;
386 errs() << Loc.printToString(*SourceMgr)
387 << ": error : Header not found: " << MissingFile << "\n";
388 }
389
390 MissingHeaderCount += MissingCountThisModule;
391
392 return true;
393 }
394
395 // Collect headers from an umbrella directory.
collectUmbrellaHeaders(StringRef UmbrellaDirName,DependentsVector & Dependents)396 bool ModularizeUtilities::collectUmbrellaHeaders(StringRef UmbrellaDirName,
397 DependentsVector &Dependents) {
398 // Initialize directory name.
399 SmallString<256> Directory(UmbrellaDirName);
400 // Walk the directory.
401 std::error_code EC;
402 for (llvm::sys::fs::directory_iterator I(Directory.str(), EC), E; I != E;
403 I.increment(EC)) {
404 if (EC)
405 return false;
406 std::string File(I->path());
407 llvm::ErrorOr<llvm::sys::fs::basic_file_status> Status = I->status();
408 if (!Status)
409 return false;
410 llvm::sys::fs::file_type Type = Status->type();
411 // If the file is a directory, ignore the name and recurse.
412 if (Type == llvm::sys::fs::file_type::directory_file) {
413 if (!collectUmbrellaHeaders(File, Dependents))
414 return false;
415 continue;
416 }
417 // If the file does not have a common header extension, ignore it.
418 if (!isHeader(File))
419 continue;
420 // Save header name.
421 std::string HeaderPath = getCanonicalPath(File);
422 Dependents.push_back(HeaderPath);
423 }
424 return true;
425 }
426
427 // Replace .. embedded in path for purposes of having
428 // a canonical path.
replaceDotDot(StringRef Path)429 static std::string replaceDotDot(StringRef Path) {
430 SmallString<128> Buffer;
431 llvm::sys::path::const_iterator B = llvm::sys::path::begin(Path),
432 E = llvm::sys::path::end(Path);
433 while (B != E) {
434 if (B->compare(".") == 0) {
435 }
436 else if (B->compare("..") == 0)
437 llvm::sys::path::remove_filename(Buffer);
438 else
439 llvm::sys::path::append(Buffer, *B);
440 ++B;
441 }
442 if (Path.endswith("/") || Path.endswith("\\"))
443 Buffer.append(1, Path.back());
444 return Buffer.c_str();
445 }
446
447 // Convert header path to canonical form.
448 // The canonical form is basically just use forward slashes, and remove "./".
449 // \param FilePath The file path, relative to the module map directory.
450 // \returns The file path in canonical form.
getCanonicalPath(StringRef FilePath)451 std::string ModularizeUtilities::getCanonicalPath(StringRef FilePath) {
452 std::string Tmp(replaceDotDot(FilePath));
453 std::replace(Tmp.begin(), Tmp.end(), '\\', '/');
454 StringRef Tmp2(Tmp);
455 if (Tmp2.startswith("./"))
456 Tmp = Tmp2.substr(2);
457 return Tmp;
458 }
459
460 // Check for header file extension.
461 // If the file extension is .h, .inc, or missing, it's
462 // assumed to be a header.
463 // \param FileName The file name. Must not be a directory.
464 // \returns true if it has a header extension or no extension.
isHeader(StringRef FileName)465 bool ModularizeUtilities::isHeader(StringRef FileName) {
466 StringRef Extension = llvm::sys::path::extension(FileName);
467 if (Extension.size() == 0)
468 return true;
469 if (Extension.equals_lower(".h"))
470 return true;
471 if (Extension.equals_lower(".inc"))
472 return true;
473 return false;
474 }
475
476 // Get directory path component from file path.
477 // \returns the component of the given path, which will be
478 // relative if the given path is relative, absolute if the
479 // given path is absolute, or "." if the path has no leading
480 // path component.
getDirectoryFromPath(StringRef Path)481 std::string ModularizeUtilities::getDirectoryFromPath(StringRef Path) {
482 SmallString<256> Directory(Path);
483 sys::path::remove_filename(Directory);
484 if (Directory.size() == 0)
485 return ".";
486 return Directory.str();
487 }
488
489 // Add unique problem file.
490 // Also standardizes the path.
addUniqueProblemFile(std::string FilePath)491 void ModularizeUtilities::addUniqueProblemFile(std::string FilePath) {
492 FilePath = getCanonicalPath(FilePath);
493 // Don't add if already present.
494 for(auto &TestFilePath : ProblemFileNames) {
495 if (TestFilePath == FilePath)
496 return;
497 }
498 ProblemFileNames.push_back(FilePath);
499 }
500
501 // Add file with no compile errors.
502 // Also standardizes the path.
addNoCompileErrorsFile(std::string FilePath)503 void ModularizeUtilities::addNoCompileErrorsFile(std::string FilePath) {
504 FilePath = getCanonicalPath(FilePath);
505 GoodFileNames.push_back(FilePath);
506 }
507
508 // List problem files.
displayProblemFiles()509 void ModularizeUtilities::displayProblemFiles() {
510 errs() << "\nThese are the files with possible errors:\n\n";
511 for (auto &ProblemFile : ProblemFileNames) {
512 errs() << ProblemFile << "\n";
513 }
514 }
515
516 // List files with no problems.
displayGoodFiles()517 void ModularizeUtilities::displayGoodFiles() {
518 errs() << "\nThese are the files with no detected errors:\n\n";
519 for (auto &GoodFile : HeaderFileNames) {
520 bool Good = true;
521 for (auto &ProblemFile : ProblemFileNames) {
522 if (ProblemFile == GoodFile) {
523 Good = false;
524 break;
525 }
526 }
527 if (Good)
528 errs() << GoodFile << "\n";
529 }
530 }
531
532 // List files with problem files commented out.
displayCombinedFiles()533 void ModularizeUtilities::displayCombinedFiles() {
534 errs() <<
535 "\nThese are the combined files, with problem files preceded by #:\n\n";
536 for (auto &File : HeaderFileNames) {
537 bool Good = true;
538 for (auto &ProblemFile : ProblemFileNames) {
539 if (ProblemFile == File) {
540 Good = false;
541 break;
542 }
543 }
544 errs() << (Good ? "" : "#") << File << "\n";
545 }
546 }
547