Skip to content

Commit

Permalink
[clang module] Current Working Directory Pruning (llvm#124786)
Browse files Browse the repository at this point in the history
When computing the context hash, `clang` always includes the compiler's
working directory. This can lead to situations when the only difference
between two compilations is the working directory, different module
variants are generated. These variants are redundant. This PR implements
an optimization that ignores the working directory when computing the
context hash when safe.

Specifically, `clang` checks if it is safe to ignore the working
directory in `isSafeToIgnoreCWD`. The check involves going through
compile command options to see if any paths specified are relative. The
definition of relative path used here is that the input path is not
empty, and `llvm::sys::path::is_absolute` is false. If all the paths
examined are not relative, `clang` considers it safe to ignore the
current working directory and does not consider the working directory
when computing the context hash.

(cherry picked from commit 54acda2)
  • Loading branch information
qiongsiwu committed Feb 5, 2025
1 parent ebb8966 commit 27d85de
Show file tree
Hide file tree
Showing 5 changed files with 290 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -71,15 +71,18 @@ enum class ScanningOptimizations {
HeaderSearch = 1,

/// Remove warnings from system modules.
SystemWarnings = 2,
SystemWarnings = (1 << 1),

/// Remove unused -ivfsoverlay arguments.
VFS = 4,
VFS = (1 << 2),

/// Canonicalize -D and -U options.
Macros = 8,
Macros = (1 << 3),

DSS_LAST_BITMASK_ENUM(Macros),
/// Ignore the compiler's working directory if it is safe.
IgnoreCWD = (1 << 4),

DSS_LAST_BITMASK_ENUM(IgnoreCWD),
Default = All,
FullIncludeTreeIrrelevant = HeaderSearch | VFS,
};
Expand Down
97 changes: 92 additions & 5 deletions clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -423,9 +423,91 @@ void ModuleDepCollector::applyDiscoveredDependencies(CompilerInvocation &CI) {
}
}

static bool isSafeToIgnoreCWD(const CowCompilerInvocation &CI) {
// Check if the command line input uses relative paths.
// It is not safe to ignore the current working directory if any of the
// command line inputs use relative paths.
#define IF_RELATIVE_RETURN_FALSE(PATH) \
do { \
if (!PATH.empty() && !llvm::sys::path::is_absolute(PATH)) \
return false; \
} while (0)

#define IF_ANY_RELATIVE_RETURN_FALSE(PATHS) \
do { \
if (llvm::any_of(PATHS, [](const auto &P) { \
return !P.empty() && !llvm::sys::path::is_absolute(P); \
})) \
return false; \
} while (0)

// Header search paths.
const auto &HeaderSearchOpts = CI.getHeaderSearchOpts();
IF_RELATIVE_RETURN_FALSE(HeaderSearchOpts.Sysroot);
for (auto &Entry : HeaderSearchOpts.UserEntries)
if (Entry.IgnoreSysRoot)
IF_RELATIVE_RETURN_FALSE(Entry.Path);
IF_RELATIVE_RETURN_FALSE(HeaderSearchOpts.ResourceDir);
IF_RELATIVE_RETURN_FALSE(HeaderSearchOpts.ModuleCachePath);
IF_RELATIVE_RETURN_FALSE(HeaderSearchOpts.ModuleUserBuildPath);
for (auto I = HeaderSearchOpts.PrebuiltModuleFiles.begin(),
E = HeaderSearchOpts.PrebuiltModuleFiles.end();
I != E;) {
auto Current = I++;
IF_RELATIVE_RETURN_FALSE(Current->second);
}
IF_ANY_RELATIVE_RETURN_FALSE(HeaderSearchOpts.PrebuiltModulePaths);
IF_ANY_RELATIVE_RETURN_FALSE(HeaderSearchOpts.VFSOverlayFiles);

// Preprocessor options.
const auto &PPOpts = CI.getPreprocessorOpts();
IF_ANY_RELATIVE_RETURN_FALSE(PPOpts.MacroIncludes);
IF_ANY_RELATIVE_RETURN_FALSE(PPOpts.Includes);
IF_RELATIVE_RETURN_FALSE(PPOpts.ImplicitPCHInclude);

// Frontend options.
const auto &FrontendOpts = CI.getFrontendOpts();
for (const FrontendInputFile &Input : FrontendOpts.Inputs) {
if (Input.isBuffer())
continue; // FIXME: Can this happen when parsing command-line?

IF_RELATIVE_RETURN_FALSE(Input.getFile());
}
IF_RELATIVE_RETURN_FALSE(FrontendOpts.CodeCompletionAt.FileName);
IF_ANY_RELATIVE_RETURN_FALSE(FrontendOpts.ModuleMapFiles);
IF_ANY_RELATIVE_RETURN_FALSE(FrontendOpts.ModuleFiles);
IF_ANY_RELATIVE_RETURN_FALSE(FrontendOpts.ModulesEmbedFiles);
IF_ANY_RELATIVE_RETURN_FALSE(FrontendOpts.ASTMergeFiles);
IF_RELATIVE_RETURN_FALSE(FrontendOpts.OverrideRecordLayoutsFile);
IF_RELATIVE_RETURN_FALSE(FrontendOpts.StatsFile);

// Filesystem options.
const auto &FileSystemOpts = CI.getFileSystemOpts();
IF_RELATIVE_RETURN_FALSE(FileSystemOpts.WorkingDir);

// Codegen options.
const auto &CodeGenOpts = CI.getCodeGenOpts();
IF_RELATIVE_RETURN_FALSE(CodeGenOpts.DebugCompilationDir);
IF_RELATIVE_RETURN_FALSE(CodeGenOpts.CoverageCompilationDir);

// Sanitizer options.
IF_ANY_RELATIVE_RETURN_FALSE(CI.getLangOpts().NoSanitizeFiles);

// Coverage mappings.
IF_RELATIVE_RETURN_FALSE(CodeGenOpts.ProfileInstrumentUsePath);
IF_RELATIVE_RETURN_FALSE(CodeGenOpts.SampleProfileFile);
IF_RELATIVE_RETURN_FALSE(CodeGenOpts.ProfileRemappingFile);

// Dependency output options.
for (auto &ExtraDep : CI.getDependencyOutputOpts().ExtraDeps)
IF_RELATIVE_RETURN_FALSE(ExtraDep.first);

return true;
}

static std::string getModuleContextHash(const ModuleDeps &MD,
const CowCompilerInvocation &CI,
bool EagerLoadModules,
bool EagerLoadModules, bool IgnoreCWD,
llvm::vfs::FileSystem &VFS) {
llvm::HashBuilder<llvm::TruncatedBLAKE3<16>, llvm::endianness::native>
HashBuilder;
Expand All @@ -452,12 +534,14 @@ static std::string getModuleContextHash(const ModuleDeps &MD,
HashBuilder.add(getClangFullRepositoryVersion());
HashBuilder.add(serialization::VERSION_MAJOR, serialization::VERSION_MINOR);
llvm::ErrorOr<std::string> CWD = VFS.getCurrentWorkingDirectory();
if (CWD)
auto &FSOpts = const_cast<FileSystemOptions &>(CI.getFileSystemOpts());
if (CWD && !IgnoreCWD)
HashBuilder.add(*CWD);
else
FSOpts.WorkingDir.clear();

// Save and restore options that should not affect the hash, e.g. the exact
// contents of input files, or prefix mappings.
auto &FSOpts = const_cast<FileSystemOptions &>(CI.getFileSystemOpts());
auto &FEOpts = const_cast<FrontendOptions &>(CI.getFrontendOpts());
auto &CASOpts = const_cast<CASOptions &>(CI.getCASOpts());
llvm::SaveAndRestore RestoreCASFSRootID(FSOpts.CASFileSystemRootID, {});
Expand Down Expand Up @@ -511,8 +595,11 @@ static void checkCompileCacheKeyMatch(cas::ObjectStore &CAS,

void ModuleDepCollector::associateWithContextHash(
const CowCompilerInvocation &CI, ModuleDeps &Deps) {
Deps.ID.ContextHash = getModuleContextHash(
Deps, CI, EagerLoadModules, ScanInstance.getVirtualFileSystem());
bool IgnoreCWD = any(OptimizeArgs & ScanningOptimizations::IgnoreCWD) &&
isSafeToIgnoreCWD(CI);
Deps.ID.ContextHash =
getModuleContextHash(Deps, CI, EagerLoadModules, IgnoreCWD,
ScanInstance.getVirtualFileSystem());
bool Inserted = ModuleDepsByID.insert({Deps.ID, &Deps}).second;
(void)Inserted;
assert(Inserted && "duplicate module mapping");
Expand Down
188 changes: 188 additions & 0 deletions clang/test/ClangScanDeps/modules-context-hash-cwd.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
// Test current directory pruning when computing the context hash.

// REQUIRES: shell

// RUN: rm -rf %t
// RUN: split-file %s %t
// RUN: sed -e "s|DIR|%/t|g" %t/cdb0.json.in > %t/cdb0.json
// RUN: sed -e "s|DIR|%/t|g" %t/cdb1.json.in > %t/cdb1.json
// RUN: sed -e "s|DIR|%/t|g" %t/cdb3.json.in > %t/cdb3.json
// RUN: sed -e "s|DIR|%/t|g" %t/cdb4.json.in > %t/cdb4.json
// RUN: sed -e "s|DIR|%/t|g" %t/cdb5.json.in > %t/cdb5.json
// RUN: clang-scan-deps -compilation-database %t/cdb0.json -format experimental-full > %t/result0.json
// RUN: clang-scan-deps -compilation-database %t/cdb1.json -format experimental-full > %t/result1.json
// It is not a typo to use cdb1.json for result2. We intend to use the same
// compilation database, but different clang-scan-deps optimize-args options.
// RUN: clang-scan-deps -compilation-database %t/cdb1.json -format experimental-full -optimize-args=header-search,system-warnings,vfs,canonicalize-macros > %t/result2.json
// RUN: clang-scan-deps -compilation-database %t/cdb3.json -format experimental-full > %t/result3.json
// RUN: clang-scan-deps -compilation-database %t/cdb4.json -format experimental-full > %t/result4.json
// RUN: clang-scan-deps -compilation-database %t/cdb5.json -format experimental-full > %t/result5.json
// RUN: cat %t/result0.json %t/result1.json | FileCheck %s
// RUN: cat %t/result0.json %t/result2.json | FileCheck %s -check-prefix=SKIPOPT
// RUN: cat %t/result3.json %t/result4.json | FileCheck %s -check-prefix=RELPATH
// RUN: cat %t/result0.json %t/result5.json | FileCheck %s

//--- cdb0.json.in
[{
"directory": "DIR",
"command": "clang -c DIR/tu.c -fmodules -fmodules-cache-path=DIR/cache -IDIR/include/ -o DIR/tu.o",
"file": "DIR/tu.c"
}]

//--- cdb1.json.in
[{
"directory": "DIR/a",
"command": "clang -c DIR/tu.c -fmodules -fmodules-cache-path=DIR/cache -IDIR/include/ -o DIR/tu.o",
"file": "DIR/tu.c"
}]

// cdb2 is skipped because we reuse cdb1.

//--- cdb3.json.in
[{
"directory": "DIR",
"command": "clang -c DIR/tu.c -fmodules -fmodules-cache-path=DIR/cache -fprebuilt-module-path=.././module -IDIR/include/ -o DIR/tu.o ",
"file": "DIR/tu.c"
}]

//--- cdb4.json.in
[{
"directory": "DIR/a/",
"command": "clang -c DIR/tu.c -fmodules -fmodules-cache-path=DIR/cache -fprebuilt-module-path=.././module -IDIR/include/ -o DIR/tu.o ",
"file": "DIR/tu.c"
}]

//--- cdb5.json.in
[{
"directory": "DIR",
"command": "clang -c DIR/tu.c -fmodules -fmodules-cache-path=DIR/cache -IDIR/include/ -Xclang -working-directory=DIR/a/ -o DIR/tu.o",
"file": "DIR/tu.c"
}]

//--- include/module.modulemap
module mod {
header "mod.h"
}

//--- include/mod.h

//--- tu.c
#include "mod.h"

// Check that result0 and result1/result5 compute the same hash with
// optimization on. The only difference between result0 and result1/result5 is
// the compiler's working directory.
// CHECK: {
// CHECK-NEXT: "modules": [
// CHECK-NEXT: {
// CHECK-NEXT: "clang-module-deps": [],
// CHECK: "context-hash": "[[HASH:.*]]",
// CHECK: }
// CHECK: "translation-units": [
// CHECK: {
// CHECK: "commands": [
// CHECK: {
// CHECK-NEXT: "clang-context-hash": "{{.*}}",
// CHECK-NEXT: "clang-module-deps": [
// CHECK-NEXT: {
// CHECK-NEXT: "context-hash": "[[HASH]]",
// CHECK-NEXT: "module-name": "mod"
// CHECK: }
// CHECK: ],
// CHECK: {
// CHECK-NEXT: "modules": [
// CHECK-NEXT: {
// CHECK-NEXT: "clang-module-deps": [],
// CHECK: "context-hash": "[[HASH]]",
// CHECK: }
// CHECK: "translation-units": [
// CHECK: {
// CHECK: "commands": [
// CHECK: {
// CHECK-NEXT: "clang-context-hash": "{{.*}}",
// CHECK-NEXT: "clang-module-deps": [
// CHECK-NEXT: {
// CHECK-NEXT: "context-hash": "[[HASH]]",
// CHECK-NEXT: "module-name": "mod"
// CHECK: }
// CHECK: ],

// Check that result0 and result2 compute different hashes because
// the working directory optmization is turned off for result2.
// SKIPOPT: {
// SKIPOPT-NEXT: "modules": [
// SKIPOPT-NEXT: {
// SKIPOPT-NEXT: "clang-module-deps": [],
// SKIPOPT: "context-hash": "[[HASH0:.*]]",
// SKIPOPT: }
// SKIPOPT: "translation-units": [
// SKIPOPT: {
// SKIPOPT: "commands": [
// SKIPOPT: {
// SKIPOPT-NEXT: "clang-context-hash": "{{.*}}",
// SKIPOPT-NEXT: "clang-module-deps": [
// SKIPOPT-NEXT: {
// SKIPOPT-NEXT: "context-hash": "[[HASH0]]",
// SKIPOPT-NEXT: "module-name": "mod"
// SKIPOPT: }
// SKIPOPT: ],
// SKIPOPT: {
// SKIPOPT-NEXT: "modules": [
// SKIPOPT-NEXT: {
// SKIPOPT-NEXT: "clang-module-deps": [],
// SKIPOPT-NOT: "context-hash": "[[HASH0]]",
// SKIPOPT: "context-hash": "[[HASH2:.*]]",
// SKIPOPT: }
// SKIPOPT: "translation-units": [
// SKIPOPT: {
// SKIPOPT: "commands": [
// SKIPOPT: {
// SKIPOPT-NEXT: "clang-context-hash": "{{.*}}",
// SKIPOPT-NEXT: "clang-module-deps": [
// SKIPOPT-NEXT: {
// SKIPOPT-NOT: "context-hash": "[[HASH0]]",
// SKIPOPT-NEXT: "context-hash": "[[HASH2]]"
// SKIPOPT-NEXT: "module-name": "mod"
// SKIPOPT: }
// SKIPOPT: ],

// Check that result3 and result4 contain different hashes because
// both have a same relative path as a command line input, and
// they are produced using different compiler working directories.
// RELPATH: {
// RELPATH-NEXT: "modules": [
// RELPATH-NEXT: {
// RELPATH-NEXT: "clang-module-deps": [],
// RELPATH: "context-hash": "[[HASH3:.*]]",
// RELPATH: }
// RELPATH: "translation-units": [
// RELPATH: {
// RELPATH: "commands": [
// RELPATH: {
// RELPATH-NEXT: "clang-context-hash": "{{.*}}",
// RELPATH-NEXT: "clang-module-deps": [
// RELPATH-NEXT: {
// RELPATH-NEXT: "context-hash": "[[HASH3]]",
// RELPATH-NEXT: "module-name": "mod"
// RELPATH: }
// RELPATH: ],
// RELPATH: {
// RELPATH-NEXT: "modules": [
// RELPATH-NEXT: {
// RELPATH-NEXT: "clang-module-deps": [],
// RELPATH-NOT: "context-hash": "[[HASH3]]",
// RELPATH: "context-hash": "[[HASH4:.*]]",
// RELPATH: }
// RELPATH: "translation-units": [
// RELPATH: {
// RELPATH: "commands": [
// RELPATH: {
// RELPATH-NEXT: "clang-context-hash": "{{.*}}",
// RELPATH-NEXT: "clang-module-deps": [
// RELPATH-NEXT: {
// RELPATH-NOT: "context-hash": "[[HASH3]]",
// RELPATH-NEXT: "context-hash": "[[HASH4]]"
// RELPATH-NEXT: "module-name": "mod"
// RELPATH: }
// RELPATH: ],

2 changes: 1 addition & 1 deletion clang/test/ClangScanDeps/working-dir.m
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// RUN: split-file %s %t
// RUN: sed -e "s|DIR|%/t|g" %t/build/compile-commands.json.in > %t/build/compile-commands.json
// RUN: clang-scan-deps -compilation-database %t/build/compile-commands.json \
// RUN: -j 1 -format experimental-full --optimize-args=all > %t/deps.db
// RUN: -j 1 -format experimental-full --optimize-args=header-search,system-warnings,vfs,canonicalize-macros > %t/deps.db
// RUN: cat %t/deps.db | sed 's:\\\\\?:/:g' | FileCheck %s -DPREFIX=%/t

// Check that there are two separate modules hashes. One for each working dir.
Expand Down
2 changes: 2 additions & 0 deletions clang/tools/clang-scan-deps/ClangScanDeps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,8 @@ static void ParseArgs(int argc, char **argv) {
.Case("system-warnings", ScanningOptimizations::SystemWarnings)
.Case("vfs", ScanningOptimizations::VFS)
.Case("canonicalize-macros", ScanningOptimizations::Macros)
.Case("ignore-current-working-dir",
ScanningOptimizations::IgnoreCWD)
.Case("all", ScanningOptimizations::All)
.Default(std::nullopt);
if (!Optimization) {
Expand Down

0 comments on commit 27d85de

Please sign in to comment.