Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[clang module] Current Working Directory Pruning #124786

Merged
merged 5 commits into from
Feb 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -55,15 +55,18 @@ enum class ScanningOptimizations {
HeaderSearch = 1,

/// Remove warnings from system modules.
SystemWarnings = 2,
SystemWarnings = (1 << 1),

/// Remove unused -ivfsoverlay arguments.
VFS = 4,
VFS = (1 << 2),

/// Canonicalize -D and -U options.
Macros = 8,
Macros = (1 << 3),

DSS_LAST_BITMASK_ENUM(Macros),
/// Ignore the compiler's working directory if it is safe.
IgnoreCWD = (1 << 4),

DSS_LAST_BITMASK_ENUM(IgnoreCWD),
Default = All
};

Expand Down
96 changes: 92 additions & 4 deletions clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -397,9 +397,91 @@ void ModuleDepCollector::applyDiscoveredDependencies(CompilerInvocation &CI) {
}
}

static bool isSafeToIgnoreCWD(const CowCompilerInvocation &CI) {
// Check if the command line input uses relative paths.
// It is not safe to ignore the current working directory if any of the
// command line inputs use relative paths.
#define IF_RELATIVE_RETURN_FALSE(PATH) \
do { \
if (!PATH.empty() && !llvm::sys::path::is_absolute(PATH)) \
return false; \
} while (0)

#define IF_ANY_RELATIVE_RETURN_FALSE(PATHS) \
do { \
if (llvm::any_of(PATHS, [](const auto &P) { \
return !P.empty() && !llvm::sys::path::is_absolute(P); \
})) \
return false; \
} while (0)

// Header search paths.
const auto &HeaderSearchOpts = CI.getHeaderSearchOpts();
IF_RELATIVE_RETURN_FALSE(HeaderSearchOpts.Sysroot);
for (auto &Entry : HeaderSearchOpts.UserEntries)
if (Entry.IgnoreSysRoot)
IF_RELATIVE_RETURN_FALSE(Entry.Path);
IF_RELATIVE_RETURN_FALSE(HeaderSearchOpts.ResourceDir);
IF_RELATIVE_RETURN_FALSE(HeaderSearchOpts.ModuleCachePath);
IF_RELATIVE_RETURN_FALSE(HeaderSearchOpts.ModuleUserBuildPath);
for (auto I = HeaderSearchOpts.PrebuiltModuleFiles.begin(),
E = HeaderSearchOpts.PrebuiltModuleFiles.end();
I != E;) {
auto Current = I++;
IF_RELATIVE_RETURN_FALSE(Current->second);
}
IF_ANY_RELATIVE_RETURN_FALSE(HeaderSearchOpts.PrebuiltModulePaths);
IF_ANY_RELATIVE_RETURN_FALSE(HeaderSearchOpts.VFSOverlayFiles);

// Preprocessor options.
const auto &PPOpts = CI.getPreprocessorOpts();
IF_ANY_RELATIVE_RETURN_FALSE(PPOpts.MacroIncludes);
IF_ANY_RELATIVE_RETURN_FALSE(PPOpts.Includes);
IF_RELATIVE_RETURN_FALSE(PPOpts.ImplicitPCHInclude);

// Frontend options.
const auto &FrontendOpts = CI.getFrontendOpts();
for (const FrontendInputFile &Input : FrontendOpts.Inputs) {
if (Input.isBuffer())
continue; // FIXME: Can this happen when parsing command-line?

IF_RELATIVE_RETURN_FALSE(Input.getFile());
}
IF_RELATIVE_RETURN_FALSE(FrontendOpts.CodeCompletionAt.FileName);
IF_ANY_RELATIVE_RETURN_FALSE(FrontendOpts.ModuleMapFiles);
IF_ANY_RELATIVE_RETURN_FALSE(FrontendOpts.ModuleFiles);
IF_ANY_RELATIVE_RETURN_FALSE(FrontendOpts.ModulesEmbedFiles);
IF_ANY_RELATIVE_RETURN_FALSE(FrontendOpts.ASTMergeFiles);
IF_RELATIVE_RETURN_FALSE(FrontendOpts.OverrideRecordLayoutsFile);
IF_RELATIVE_RETURN_FALSE(FrontendOpts.StatsFile);

// Filesystem options.
const auto &FileSystemOpts = CI.getFileSystemOpts();
IF_RELATIVE_RETURN_FALSE(FileSystemOpts.WorkingDir);

// Codegen options.
const auto &CodeGenOpts = CI.getCodeGenOpts();
IF_RELATIVE_RETURN_FALSE(CodeGenOpts.DebugCompilationDir);
IF_RELATIVE_RETURN_FALSE(CodeGenOpts.CoverageCompilationDir);

// Sanitizer options.
IF_ANY_RELATIVE_RETURN_FALSE(CI.getLangOpts().NoSanitizeFiles);

// Coverage mappings.
IF_RELATIVE_RETURN_FALSE(CodeGenOpts.ProfileInstrumentUsePath);
IF_RELATIVE_RETURN_FALSE(CodeGenOpts.SampleProfileFile);
IF_RELATIVE_RETURN_FALSE(CodeGenOpts.ProfileRemappingFile);

// Dependency output options.
for (auto &ExtraDep : CI.getDependencyOutputOpts().ExtraDeps)
IF_RELATIVE_RETURN_FALSE(ExtraDep.first);

return true;
}

static std::string getModuleContextHash(const ModuleDeps &MD,
const CowCompilerInvocation &CI,
bool EagerLoadModules,
bool EagerLoadModules, bool IgnoreCWD,
llvm::vfs::FileSystem &VFS) {
llvm::HashBuilder<llvm::TruncatedBLAKE3<16>, llvm::endianness::native>
HashBuilder;
Expand All @@ -410,8 +492,11 @@ static std::string getModuleContextHash(const ModuleDeps &MD,
HashBuilder.add(getClangFullRepositoryVersion());
HashBuilder.add(serialization::VERSION_MAJOR, serialization::VERSION_MINOR);
llvm::ErrorOr<std::string> CWD = VFS.getCurrentWorkingDirectory();
if (CWD)
auto &FSOpts = const_cast<FileSystemOptions &>(CI.getFileSystemOpts());
if (CWD && !IgnoreCWD)
HashBuilder.add(*CWD);
else
FSOpts.WorkingDir.clear();

// Hash the BuildInvocation without any input files.
SmallString<0> ArgVec;
Expand Down Expand Up @@ -443,8 +528,11 @@ static std::string getModuleContextHash(const ModuleDeps &MD,

void ModuleDepCollector::associateWithContextHash(
const CowCompilerInvocation &CI, ModuleDeps &Deps) {
Deps.ID.ContextHash = getModuleContextHash(
Deps, CI, EagerLoadModules, ScanInstance.getVirtualFileSystem());
bool IgnoreCWD = any(OptimizeArgs & ScanningOptimizations::IgnoreCWD) &&
isSafeToIgnoreCWD(CI);
Deps.ID.ContextHash =
getModuleContextHash(Deps, CI, EagerLoadModules, IgnoreCWD,
ScanInstance.getVirtualFileSystem());
bool Inserted = ModuleDepsByID.insert({Deps.ID, &Deps}).second;
(void)Inserted;
assert(Inserted && "duplicate module mapping");
Expand Down
188 changes: 188 additions & 0 deletions clang/test/ClangScanDeps/modules-context-hash-cwd.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
// Test current directory pruning when computing the context hash.

// REQUIRES: shell

// RUN: rm -rf %t
// RUN: split-file %s %t
// RUN: sed -e "s|DIR|%/t|g" %t/cdb0.json.in > %t/cdb0.json
// RUN: sed -e "s|DIR|%/t|g" %t/cdb1.json.in > %t/cdb1.json
// RUN: sed -e "s|DIR|%/t|g" %t/cdb3.json.in > %t/cdb3.json
// RUN: sed -e "s|DIR|%/t|g" %t/cdb4.json.in > %t/cdb4.json
// RUN: sed -e "s|DIR|%/t|g" %t/cdb5.json.in > %t/cdb5.json
// RUN: clang-scan-deps -compilation-database %t/cdb0.json -format experimental-full > %t/result0.json
// RUN: clang-scan-deps -compilation-database %t/cdb1.json -format experimental-full > %t/result1.json
// It is not a typo to use cdb1.json for result2. We intend to use the same
// compilation database, but different clang-scan-deps optimize-args options.
// RUN: clang-scan-deps -compilation-database %t/cdb1.json -format experimental-full -optimize-args=header-search,system-warnings,vfs,canonicalize-macros > %t/result2.json
// RUN: clang-scan-deps -compilation-database %t/cdb3.json -format experimental-full > %t/result3.json
// RUN: clang-scan-deps -compilation-database %t/cdb4.json -format experimental-full > %t/result4.json
// RUN: clang-scan-deps -compilation-database %t/cdb5.json -format experimental-full > %t/result5.json
// RUN: cat %t/result0.json %t/result1.json | FileCheck %s
// RUN: cat %t/result0.json %t/result2.json | FileCheck %s -check-prefix=SKIPOPT
// RUN: cat %t/result3.json %t/result4.json | FileCheck %s -check-prefix=RELPATH
// RUN: cat %t/result0.json %t/result5.json | FileCheck %s

//--- cdb0.json.in
[{
"directory": "DIR",
"command": "clang -c DIR/tu.c -fmodules -fmodules-cache-path=DIR/cache -IDIR/include/ -o DIR/tu.o",
"file": "DIR/tu.c"
}]

//--- cdb1.json.in
[{
"directory": "DIR/a",
"command": "clang -c DIR/tu.c -fmodules -fmodules-cache-path=DIR/cache -IDIR/include/ -o DIR/tu.o",
"file": "DIR/tu.c"
}]

// cdb2 is skipped because we reuse cdb1.

//--- cdb3.json.in
[{
"directory": "DIR",
"command": "clang -c DIR/tu.c -fmodules -fmodules-cache-path=DIR/cache -fprebuilt-module-path=.././module -IDIR/include/ -o DIR/tu.o ",
"file": "DIR/tu.c"
}]

//--- cdb4.json.in
[{
"directory": "DIR/a/",
"command": "clang -c DIR/tu.c -fmodules -fmodules-cache-path=DIR/cache -fprebuilt-module-path=.././module -IDIR/include/ -o DIR/tu.o ",
"file": "DIR/tu.c"
}]

//--- cdb5.json.in
[{
"directory": "DIR",
"command": "clang -c DIR/tu.c -fmodules -fmodules-cache-path=DIR/cache -IDIR/include/ -Xclang -working-directory=DIR/a/ -o DIR/tu.o",
"file": "DIR/tu.c"
}]

//--- include/module.modulemap
module mod {
header "mod.h"
}

//--- include/mod.h

//--- tu.c
#include "mod.h"

// Check that result0 and result1/result5 compute the same hash with
// optimization on. The only difference between result0 and result1/result5 is
// the compiler's working directory.
// CHECK: {
// CHECK-NEXT: "modules": [
// CHECK-NEXT: {
// CHECK-NEXT: "clang-module-deps": [],
// CHECK: "context-hash": "[[HASH:.*]]",
// CHECK: }
// CHECK: "translation-units": [
// CHECK: {
// CHECK: "commands": [
// CHECK: {
// CHECK-NEXT: "clang-context-hash": "{{.*}}",
// CHECK-NEXT: "clang-module-deps": [
// CHECK-NEXT: {
// CHECK-NEXT: "context-hash": "[[HASH]]",
// CHECK-NEXT: "module-name": "mod"
// CHECK: }
// CHECK: ],
// CHECK: {
// CHECK-NEXT: "modules": [
// CHECK-NEXT: {
// CHECK-NEXT: "clang-module-deps": [],
// CHECK: "context-hash": "[[HASH]]",
// CHECK: }
// CHECK: "translation-units": [
// CHECK: {
// CHECK: "commands": [
// CHECK: {
// CHECK-NEXT: "clang-context-hash": "{{.*}}",
// CHECK-NEXT: "clang-module-deps": [
// CHECK-NEXT: {
// CHECK-NEXT: "context-hash": "[[HASH]]",
// CHECK-NEXT: "module-name": "mod"
// CHECK: }
// CHECK: ],

// Check that result0 and result2 compute different hashes because
// the working directory optmization is turned off for result2.
// SKIPOPT: {
// SKIPOPT-NEXT: "modules": [
// SKIPOPT-NEXT: {
// SKIPOPT-NEXT: "clang-module-deps": [],
// SKIPOPT: "context-hash": "[[HASH0:.*]]",
// SKIPOPT: }
// SKIPOPT: "translation-units": [
// SKIPOPT: {
// SKIPOPT: "commands": [
// SKIPOPT: {
// SKIPOPT-NEXT: "clang-context-hash": "{{.*}}",
// SKIPOPT-NEXT: "clang-module-deps": [
// SKIPOPT-NEXT: {
// SKIPOPT-NEXT: "context-hash": "[[HASH0]]",
// SKIPOPT-NEXT: "module-name": "mod"
// SKIPOPT: }
// SKIPOPT: ],
// SKIPOPT: {
// SKIPOPT-NEXT: "modules": [
// SKIPOPT-NEXT: {
// SKIPOPT-NEXT: "clang-module-deps": [],
// SKIPOPT-NOT: "context-hash": "[[HASH0]]",
// SKIPOPT: "context-hash": "[[HASH2:.*]]",
// SKIPOPT: }
// SKIPOPT: "translation-units": [
// SKIPOPT: {
// SKIPOPT: "commands": [
// SKIPOPT: {
// SKIPOPT-NEXT: "clang-context-hash": "{{.*}}",
// SKIPOPT-NEXT: "clang-module-deps": [
// SKIPOPT-NEXT: {
// SKIPOPT-NOT: "context-hash": "[[HASH0]]",
// SKIPOPT-NEXT: "context-hash": "[[HASH2]]"
// SKIPOPT-NEXT: "module-name": "mod"
// SKIPOPT: }
// SKIPOPT: ],

// Check that result3 and result4 contain different hashes because
// both have a same relative path as a command line input, and
// they are produced using different compiler working directories.
// RELPATH: {
// RELPATH-NEXT: "modules": [
// RELPATH-NEXT: {
// RELPATH-NEXT: "clang-module-deps": [],
// RELPATH: "context-hash": "[[HASH3:.*]]",
// RELPATH: }
// RELPATH: "translation-units": [
// RELPATH: {
// RELPATH: "commands": [
// RELPATH: {
// RELPATH-NEXT: "clang-context-hash": "{{.*}}",
// RELPATH-NEXT: "clang-module-deps": [
// RELPATH-NEXT: {
// RELPATH-NEXT: "context-hash": "[[HASH3]]",
// RELPATH-NEXT: "module-name": "mod"
// RELPATH: }
// RELPATH: ],
// RELPATH: {
// RELPATH-NEXT: "modules": [
// RELPATH-NEXT: {
// RELPATH-NEXT: "clang-module-deps": [],
// RELPATH-NOT: "context-hash": "[[HASH3]]",
// RELPATH: "context-hash": "[[HASH4:.*]]",
// RELPATH: }
// RELPATH: "translation-units": [
// RELPATH: {
// RELPATH: "commands": [
// RELPATH: {
// RELPATH-NEXT: "clang-context-hash": "{{.*}}",
// RELPATH-NEXT: "clang-module-deps": [
// RELPATH-NEXT: {
// RELPATH-NOT: "context-hash": "[[HASH3]]",
// RELPATH-NEXT: "context-hash": "[[HASH4]]"
// RELPATH-NEXT: "module-name": "mod"
// RELPATH: }
// RELPATH: ],

2 changes: 1 addition & 1 deletion clang/test/ClangScanDeps/working-dir.m
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// RUN: split-file %s %t
// RUN: sed -e "s|DIR|%/t|g" %t/build/compile-commands.json.in > %t/build/compile-commands.json
// RUN: clang-scan-deps -compilation-database %t/build/compile-commands.json \
// RUN: -j 1 -format experimental-full --optimize-args=all > %t/deps.db
// RUN: -j 1 -format experimental-full --optimize-args=header-search,system-warnings,vfs,canonicalize-macros > %t/deps.db
// RUN: cat %t/deps.db | sed 's:\\\\\?:/:g' | FileCheck %s -DPREFIX=%/t

// Check that there are two separate modules hashes. One for each working dir.
Expand Down
2 changes: 2 additions & 0 deletions clang/tools/clang-scan-deps/ClangScanDeps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,8 @@ static void ParseArgs(int argc, char **argv) {
.Case("system-warnings", ScanningOptimizations::SystemWarnings)
.Case("vfs", ScanningOptimizations::VFS)
.Case("canonicalize-macros", ScanningOptimizations::Macros)
.Case("ignore-current-working-dir",
ScanningOptimizations::IgnoreCWD)
.Case("all", ScanningOptimizations::All)
.Default(std::nullopt);
if (!Optimization) {
Expand Down