llvm-project
548 строк · 19.2 Кб
1//===--- extra/modularize/ModularizeUtilities.cpp -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a class for loading and validating a module map or
10// header list by checking that all headers in the corresponding directories
11// are accounted for.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Basic/SourceManager.h"
16#include "clang/Driver/Options.h"
17#include "clang/Frontend/CompilerInstance.h"
18#include "clang/Frontend/FrontendActions.h"
19#include "CoverageChecker.h"
20#include "llvm/ADT/SmallString.h"
21#include "llvm/Support/FileUtilities.h"
22#include "llvm/Support/MemoryBuffer.h"
23#include "llvm/Support/Path.h"
24#include "llvm/Support/raw_ostream.h"
25#include "ModularizeUtilities.h"
26
27using namespace clang;
28using namespace llvm;
29using namespace Modularize;
30
31namespace {
32// Subclass TargetOptions so we can construct it inline with
33// the minimal option, the triple.
34class ModuleMapTargetOptions : public clang::TargetOptions {
35public:
36ModuleMapTargetOptions() { Triple = llvm::sys::getDefaultTargetTriple(); }
37};
38} // namespace
39
40// ModularizeUtilities class implementation.
41
42// Constructor.
43ModularizeUtilities::ModularizeUtilities(std::vector<std::string> &InputPaths,
44llvm::StringRef Prefix,
45llvm::StringRef ProblemFilesListPath)
46: InputFilePaths(InputPaths), HeaderPrefix(Prefix),
47ProblemFilesPath(ProblemFilesListPath), HasModuleMap(false),
48MissingHeaderCount(0),
49// Init clang stuff needed for loading the module map and preprocessing.
50LangOpts(new LangOptions()), DiagIDs(new DiagnosticIDs()),
51DiagnosticOpts(new DiagnosticOptions()),
52DC(llvm::errs(), DiagnosticOpts.get()),
53Diagnostics(
54new DiagnosticsEngine(DiagIDs, DiagnosticOpts.get(), &DC, false)),
55TargetOpts(new ModuleMapTargetOptions()),
56Target(TargetInfo::CreateTargetInfo(*Diagnostics, TargetOpts)),
57FileMgr(new FileManager(FileSystemOpts)),
58SourceMgr(new SourceManager(*Diagnostics, *FileMgr, false)),
59HeaderInfo(new HeaderSearch(std::make_shared<HeaderSearchOptions>(),
60*SourceMgr, *Diagnostics, *LangOpts,
61Target.get())) {}
62
63// Create instance of ModularizeUtilities, to simplify setting up
64// subordinate objects.
65ModularizeUtilities *ModularizeUtilities::createModularizeUtilities(
66std::vector<std::string> &InputPaths, llvm::StringRef Prefix,
67llvm::StringRef ProblemFilesListPath) {
68
69return new ModularizeUtilities(InputPaths, Prefix, ProblemFilesListPath);
70}
71
72// Load all header lists and dependencies.
73std::error_code ModularizeUtilities::loadAllHeaderListsAndDependencies() {
74// For each input file.
75for (auto I = InputFilePaths.begin(), E = InputFilePaths.end(); I != E; ++I) {
76llvm::StringRef InputPath = *I;
77// If it's a module map.
78if (InputPath.ends_with(".modulemap")) {
79// Load the module map.
80if (std::error_code EC = loadModuleMap(InputPath))
81return EC;
82} else {
83// Else we assume it's a header list and load it.
84if (std::error_code EC = loadSingleHeaderListsAndDependencies(InputPath)) {
85errs() << "modularize: error: Unable to get header list '" << InputPath
86<< "': " << EC.message() << '\n';
87return EC;
88}
89}
90}
91// If we have a problem files list.
92if (ProblemFilesPath.size() != 0) {
93// Load problem files list.
94if (std::error_code EC = loadProblemHeaderList(ProblemFilesPath)) {
95errs() << "modularize: error: Unable to get problem header list '" << ProblemFilesPath
96<< "': " << EC.message() << '\n';
97return EC;
98}
99}
100return std::error_code();
101}
102
103// Do coverage checks.
104// For each loaded module map, do header coverage check.
105// Starting from the directory of the module.modulemap file,
106// Find all header files, optionally looking only at files
107// covered by the include path options, and compare against
108// the headers referenced by the module.modulemap file.
109// Display warnings for unaccounted-for header files.
110// Returns 0 if there were no errors or warnings, 1 if there
111// were warnings, 2 if any other problem, such as a bad
112// module map path argument was specified.
113std::error_code ModularizeUtilities::doCoverageCheck(
114std::vector<std::string> &IncludePaths,
115llvm::ArrayRef<std::string> CommandLine) {
116int ModuleMapCount = ModuleMaps.size();
117int ModuleMapIndex;
118std::error_code EC;
119for (ModuleMapIndex = 0; ModuleMapIndex < ModuleMapCount; ++ModuleMapIndex) {
120std::unique_ptr<clang::ModuleMap> &ModMap = ModuleMaps[ModuleMapIndex];
121auto Checker = CoverageChecker::createCoverageChecker(
122InputFilePaths[ModuleMapIndex], IncludePaths, CommandLine,
123ModMap.get());
124std::error_code LocalEC = Checker->doChecks();
125if (LocalEC.value() > 0)
126EC = LocalEC;
127}
128return EC;
129}
130
131// Load single header list and dependencies.
132std::error_code ModularizeUtilities::loadSingleHeaderListsAndDependencies(
133llvm::StringRef InputPath) {
134
135// By default, use the path component of the list file name.
136SmallString<256> HeaderDirectory(InputPath);
137llvm::sys::path::remove_filename(HeaderDirectory);
138SmallString<256> CurrentDirectory;
139llvm::sys::fs::current_path(CurrentDirectory);
140
141// Get the prefix if we have one.
142if (HeaderPrefix.size() != 0)
143HeaderDirectory = HeaderPrefix;
144
145// Read the header list file into a buffer.
146ErrorOr<std::unique_ptr<MemoryBuffer>> listBuffer =
147MemoryBuffer::getFile(InputPath);
148if (std::error_code EC = listBuffer.getError())
149return EC;
150
151// Parse the header list into strings.
152SmallVector<StringRef, 32> Strings;
153listBuffer.get()->getBuffer().split(Strings, "\n", -1, false);
154
155// Collect the header file names from the string list.
156for (SmallVectorImpl<StringRef>::iterator I = Strings.begin(),
157E = Strings.end();
158I != E; ++I) {
159StringRef Line = I->trim();
160// Ignore comments and empty lines.
161if (Line.empty() || (Line[0] == '#'))
162continue;
163std::pair<StringRef, StringRef> TargetAndDependents = Line.split(':');
164SmallString<256> HeaderFileName;
165// Prepend header file name prefix if it's not absolute.
166if (llvm::sys::path::is_absolute(TargetAndDependents.first))
167llvm::sys::path::native(TargetAndDependents.first, HeaderFileName);
168else {
169if (HeaderDirectory.size() != 0)
170HeaderFileName = HeaderDirectory;
171else
172HeaderFileName = CurrentDirectory;
173llvm::sys::path::append(HeaderFileName, TargetAndDependents.first);
174llvm::sys::path::native(HeaderFileName);
175}
176// Handle optional dependencies.
177DependentsVector Dependents;
178SmallVector<StringRef, 4> DependentsList;
179TargetAndDependents.second.split(DependentsList, " ", -1, false);
180int Count = DependentsList.size();
181for (int Index = 0; Index < Count; ++Index) {
182SmallString<256> Dependent;
183if (llvm::sys::path::is_absolute(DependentsList[Index]))
184Dependent = DependentsList[Index];
185else {
186if (HeaderDirectory.size() != 0)
187Dependent = HeaderDirectory;
188else
189Dependent = CurrentDirectory;
190llvm::sys::path::append(Dependent, DependentsList[Index]);
191}
192llvm::sys::path::native(Dependent);
193Dependents.push_back(getCanonicalPath(Dependent.str()));
194}
195// Get canonical form.
196HeaderFileName = getCanonicalPath(HeaderFileName);
197// Save the resulting header file path and dependencies.
198HeaderFileNames.push_back(std::string(HeaderFileName));
199Dependencies[HeaderFileName.str()] = Dependents;
200}
201return std::error_code();
202}
203
204// Load problem header list.
205std::error_code ModularizeUtilities::loadProblemHeaderList(
206llvm::StringRef InputPath) {
207
208// By default, use the path component of the list file name.
209SmallString<256> HeaderDirectory(InputPath);
210llvm::sys::path::remove_filename(HeaderDirectory);
211SmallString<256> CurrentDirectory;
212llvm::sys::fs::current_path(CurrentDirectory);
213
214// Get the prefix if we have one.
215if (HeaderPrefix.size() != 0)
216HeaderDirectory = HeaderPrefix;
217
218// Read the header list file into a buffer.
219ErrorOr<std::unique_ptr<MemoryBuffer>> listBuffer =
220MemoryBuffer::getFile(InputPath);
221if (std::error_code EC = listBuffer.getError())
222return EC;
223
224// Parse the header list into strings.
225SmallVector<StringRef, 32> Strings;
226listBuffer.get()->getBuffer().split(Strings, "\n", -1, false);
227
228// Collect the header file names from the string list.
229for (SmallVectorImpl<StringRef>::iterator I = Strings.begin(),
230E = Strings.end();
231I != E; ++I) {
232StringRef Line = I->trim();
233// Ignore comments and empty lines.
234if (Line.empty() || (Line[0] == '#'))
235continue;
236SmallString<256> HeaderFileName;
237// Prepend header file name prefix if it's not absolute.
238if (llvm::sys::path::is_absolute(Line))
239llvm::sys::path::native(Line, HeaderFileName);
240else {
241if (HeaderDirectory.size() != 0)
242HeaderFileName = HeaderDirectory;
243else
244HeaderFileName = CurrentDirectory;
245llvm::sys::path::append(HeaderFileName, Line);
246llvm::sys::path::native(HeaderFileName);
247}
248// Get canonical form.
249HeaderFileName = getCanonicalPath(HeaderFileName);
250// Save the resulting header file path.
251ProblemFileNames.push_back(std::string(HeaderFileName));
252}
253return std::error_code();
254}
255
256// Load single module map and extract header file list.
257std::error_code ModularizeUtilities::loadModuleMap(
258llvm::StringRef InputPath) {
259// Get file entry for module.modulemap file.
260auto ModuleMapEntryOrErr = SourceMgr->getFileManager().getFileRef(InputPath);
261
262// return error if not found.
263if (!ModuleMapEntryOrErr) {
264llvm::errs() << "error: File \"" << InputPath << "\" not found.\n";
265return errorToErrorCode(ModuleMapEntryOrErr.takeError());
266}
267FileEntryRef ModuleMapEntry = *ModuleMapEntryOrErr;
268
269// Because the module map parser uses a ForwardingDiagnosticConsumer,
270// which doesn't forward the BeginSourceFile call, we do it explicitly here.
271DC.BeginSourceFile(*LangOpts, nullptr);
272
273// Figure out the home directory for the module map file.
274DirectoryEntryRef Dir = ModuleMapEntry.getDir();
275StringRef DirName(Dir.getName());
276if (llvm::sys::path::filename(DirName) == "Modules") {
277DirName = llvm::sys::path::parent_path(DirName);
278if (DirName.ends_with(".framework")) {
279auto FrameworkDirOrErr = FileMgr->getDirectoryRef(DirName);
280if (!FrameworkDirOrErr) {
281// This can happen if there's a race between the above check and the
282// removal of the directory.
283return errorToErrorCode(FrameworkDirOrErr.takeError());
284}
285Dir = *FrameworkDirOrErr;
286}
287}
288
289std::unique_ptr<ModuleMap> ModMap;
290ModMap.reset(new ModuleMap(*SourceMgr, *Diagnostics, *LangOpts,
291Target.get(), *HeaderInfo));
292
293// Parse module.modulemap file into module map.
294if (ModMap->parseModuleMapFile(ModuleMapEntry, false, Dir)) {
295return std::error_code(1, std::generic_category());
296}
297
298// Do matching end call.
299DC.EndSourceFile();
300
301// Reset missing header count.
302MissingHeaderCount = 0;
303
304if (!collectModuleMapHeaders(ModMap.get()))
305return std::error_code(1, std::generic_category());
306
307// Save module map.
308ModuleMaps.push_back(std::move(ModMap));
309
310// Indicate we are using module maps.
311HasModuleMap = true;
312
313// Return code of 1 for missing headers.
314if (MissingHeaderCount)
315return std::error_code(1, std::generic_category());
316
317return std::error_code();
318}
319
320// Collect module map headers.
321// Walks the modules and collects referenced headers into
322// HeaderFileNames.
323bool ModularizeUtilities::collectModuleMapHeaders(clang::ModuleMap *ModMap) {
324SmallVector<std::pair<StringRef, const clang::Module *>, 0> Vec;
325for (auto &M : ModMap->modules())
326Vec.emplace_back(M.first(), M.second);
327llvm::sort(Vec, llvm::less_first());
328for (auto &I : Vec)
329if (!collectModuleHeaders(*I.second))
330return false;
331return true;
332}
333
334// Collect referenced headers from one module.
335// Collects the headers referenced in the given module into
336// HeaderFileNames.
337bool ModularizeUtilities::collectModuleHeaders(const clang::Module &Mod) {
338
339// Ignore explicit modules because they often have dependencies
340// we can't know.
341if (Mod.IsExplicit)
342return true;
343
344// Treat headers in umbrella directory as dependencies.
345DependentsVector UmbrellaDependents;
346
347// Recursively do submodules.
348for (auto *Submodule : Mod.submodules())
349collectModuleHeaders(*Submodule);
350
351if (std::optional<clang::Module::Header> UmbrellaHeader =
352Mod.getUmbrellaHeaderAsWritten()) {
353std::string HeaderPath = getCanonicalPath(UmbrellaHeader->Entry.getName());
354// Collect umbrella header.
355HeaderFileNames.push_back(HeaderPath);
356
357// FUTURE: When needed, umbrella header header collection goes here.
358} else if (std::optional<clang::Module::DirectoryName> UmbrellaDir =
359Mod.getUmbrellaDirAsWritten()) {
360// If there normal headers, assume these are umbrellas and skip collection.
361if (Mod.Headers->size() == 0) {
362// Collect headers in umbrella directory.
363if (!collectUmbrellaHeaders(UmbrellaDir->Entry.getName(),
364UmbrellaDependents))
365return false;
366}
367}
368
369// We ignore HK_Private, HK_Textual, HK_PrivateTextual, and HK_Excluded,
370// assuming they are marked as such either because of unsuitability for
371// modules or because they are meant to be included by another header,
372// and thus should be ignored by modularize.
373
374int NormalHeaderCount = Mod.Headers[clang::Module::HK_Normal].size();
375
376for (int Index = 0; Index < NormalHeaderCount; ++Index) {
377DependentsVector NormalDependents;
378// Collect normal header.
379const clang::Module::Header &Header(
380Mod.Headers[clang::Module::HK_Normal][Index]);
381std::string HeaderPath = getCanonicalPath(Header.Entry.getName());
382HeaderFileNames.push_back(HeaderPath);
383}
384
385int MissingCountThisModule = Mod.MissingHeaders.size();
386
387for (int Index = 0; Index < MissingCountThisModule; ++Index) {
388std::string MissingFile = Mod.MissingHeaders[Index].FileName;
389SourceLocation Loc = Mod.MissingHeaders[Index].FileNameLoc;
390errs() << Loc.printToString(*SourceMgr)
391<< ": error : Header not found: " << MissingFile << "\n";
392}
393
394MissingHeaderCount += MissingCountThisModule;
395
396return true;
397}
398
399// Collect headers from an umbrella directory.
400bool ModularizeUtilities::collectUmbrellaHeaders(StringRef UmbrellaDirName,
401DependentsVector &Dependents) {
402// Initialize directory name.
403SmallString<256> Directory(UmbrellaDirName);
404// Walk the directory.
405std::error_code EC;
406for (llvm::sys::fs::directory_iterator I(Directory.str(), EC), E; I != E;
407I.increment(EC)) {
408if (EC)
409return false;
410std::string File(I->path());
411llvm::ErrorOr<llvm::sys::fs::basic_file_status> Status = I->status();
412if (!Status)
413return false;
414llvm::sys::fs::file_type Type = Status->type();
415// If the file is a directory, ignore the name and recurse.
416if (Type == llvm::sys::fs::file_type::directory_file) {
417if (!collectUmbrellaHeaders(File, Dependents))
418return false;
419continue;
420}
421// If the file does not have a common header extension, ignore it.
422if (!isHeader(File))
423continue;
424// Save header name.
425std::string HeaderPath = getCanonicalPath(File);
426Dependents.push_back(HeaderPath);
427}
428return true;
429}
430
431// Replace .. embedded in path for purposes of having
432// a canonical path.
433static std::string replaceDotDot(StringRef Path) {
434SmallString<128> Buffer;
435llvm::sys::path::const_iterator B = llvm::sys::path::begin(Path),
436E = llvm::sys::path::end(Path);
437while (B != E) {
438if (*B == "..")
439llvm::sys::path::remove_filename(Buffer);
440else if (*B != ".")
441llvm::sys::path::append(Buffer, *B);
442++B;
443}
444if (Path.ends_with("/") || Path.ends_with("\\"))
445Buffer.append(1, Path.back());
446return Buffer.c_str();
447}
448
449// Convert header path to canonical form.
450// The canonical form is basically just use forward slashes, and remove "./".
451// \param FilePath The file path, relative to the module map directory.
452// \returns The file path in canonical form.
453std::string ModularizeUtilities::getCanonicalPath(StringRef FilePath) {
454std::string Tmp(replaceDotDot(FilePath));
455std::replace(Tmp.begin(), Tmp.end(), '\\', '/');
456StringRef Tmp2(Tmp);
457if (Tmp2.starts_with("./"))
458Tmp = std::string(Tmp2.substr(2));
459return Tmp;
460}
461
462// Check for header file extension.
463// If the file extension is .h, .inc, or missing, it's
464// assumed to be a header.
465// \param FileName The file name. Must not be a directory.
466// \returns true if it has a header extension or no extension.
467bool ModularizeUtilities::isHeader(StringRef FileName) {
468StringRef Extension = llvm::sys::path::extension(FileName);
469if (Extension.size() == 0)
470return true;
471if (Extension.equals_insensitive(".h"))
472return true;
473if (Extension.equals_insensitive(".inc"))
474return true;
475return false;
476}
477
478// Get directory path component from file path.
479// \returns the component of the given path, which will be
480// relative if the given path is relative, absolute if the
481// given path is absolute, or "." if the path has no leading
482// path component.
483std::string ModularizeUtilities::getDirectoryFromPath(StringRef Path) {
484SmallString<256> Directory(Path);
485sys::path::remove_filename(Directory);
486if (Directory.size() == 0)
487return ".";
488return std::string(Directory);
489}
490
491// Add unique problem file.
492// Also standardizes the path.
493void ModularizeUtilities::addUniqueProblemFile(std::string FilePath) {
494FilePath = getCanonicalPath(FilePath);
495// Don't add if already present.
496for(auto &TestFilePath : ProblemFileNames) {
497if (TestFilePath == FilePath)
498return;
499}
500ProblemFileNames.push_back(FilePath);
501}
502
503// Add file with no compile errors.
504// Also standardizes the path.
505void ModularizeUtilities::addNoCompileErrorsFile(std::string FilePath) {
506FilePath = getCanonicalPath(FilePath);
507GoodFileNames.push_back(FilePath);
508}
509
510// List problem files.
511void ModularizeUtilities::displayProblemFiles() {
512errs() << "\nThese are the files with possible errors:\n\n";
513for (auto &ProblemFile : ProblemFileNames) {
514errs() << ProblemFile << "\n";
515}
516}
517
518// List files with no problems.
519void ModularizeUtilities::displayGoodFiles() {
520errs() << "\nThese are the files with no detected errors:\n\n";
521for (auto &GoodFile : HeaderFileNames) {
522bool Good = true;
523for (auto &ProblemFile : ProblemFileNames) {
524if (ProblemFile == GoodFile) {
525Good = false;
526break;
527}
528}
529if (Good)
530errs() << GoodFile << "\n";
531}
532}
533
534// List files with problem files commented out.
535void ModularizeUtilities::displayCombinedFiles() {
536errs() <<
537"\nThese are the combined files, with problem files preceded by #:\n\n";
538for (auto &File : HeaderFileNames) {
539bool Good = true;
540for (auto &ProblemFile : ProblemFileNames) {
541if (ProblemFile == File) {
542Good = false;
543break;
544}
545}
546errs() << (Good ? "" : "#") << File << "\n";
547}
548}
549