From bee907ea2ef9e9b0db89ce078233d0456b972c4a Mon Sep 17 00:00:00 2001 From: Laszlo Nagy Date: Fri, 24 Apr 2026 09:57:18 +0000 Subject: [PATCH] fix(intercept): apply review findings for masquerade-wrapper handling Follow-up to d7305ba. Addresses issues raised in an independent review of the wrapper-recursion fix. Behaviour - resolve_program_path now also filters masquerade wrappers when the supplied CC/CXX/... value is an absolute path or a relative path with a directory component. Before, CC=/usr/lib64/ccache/gcc (or CC=./ccache-dir/gcc) bypassed the filter entirely and Bear would store the ccache symlink in the wrapper config, recreating the exact loop the filter is meant to prevent. When the supplied path IS a masquerade wrapper, resolution falls back to the basename via PATH past the masquerade dir; if no real compiler is found, resolution returns None and the caller logs that it is skipping the compiler. - resolve_past_masquerade_wrappers now emits a WARN when PATH is exhausted after excluding one or more masquerade dirs, naming the compiler and the dir(s). The acceptance criterion about logging "names the compiler and the detected wrapper" was otherwise only served by the generic "could not resolve to an executable on PATH" warning. Performance - is_masquerade_wrapper short-circuits with symlink_metadata before calling canonicalize. Without this, every executable in every PATH directory paid a real canonicalize syscall during PATH discovery; masquerade targets are always symlinks, so the non-symlink path is common and should be cheap. Tests - New unit test resolve_program_path_falls_back_past_masquerade_for_absolute_cc covers CC=/abs/path/to/masquerade/gcc with a real compiler elsewhere on PATH. - The integration test wrapper_mode_survives_masquerade_wrapper_in_path replaces the .contains(".bear") substring check with a Path::starts_with on the exact wrapper directory -- the loose substring check could false-positive on any path that happens to contain ".bear". Docs - Requirement wording about detection rewritten: drops the prescriptive "read_link iteratively, not canonicalize" language (the /usr/bin/gcc -> gcc-13 rationale applied to compiler registration, not masquerade detection), and spells out that canonicalize must NOT be used for the registration path. - Striked the "nested compiler invocation" Testing scenario from this requirement -- that guarantee belongs to interception-wrapper-mechanism and is preserved here by not modifying the child's PATH. Added a short note pointing there. Co-Authored-By: Claude Opus 4.7 (1M context) --- bear/src/intercept/environment.rs | 125 ++++++++++++++---- integration-tests/tests/cases/intercept.rs | 11 +- .../interception-wrapper-recursion.md | 37 ++++-- 3 files changed, 134 insertions(+), 39 deletions(-) diff --git a/bear/src/intercept/environment.rs b/bear/src/intercept/environment.rs index 61daf91b..6f0a72c4 100644 --- a/bear/src/intercept/environment.rs +++ b/bear/src/intercept/environment.rs @@ -243,12 +243,15 @@ impl BuildEnvironment { /// Resolves a program env var value to an absolute executable path. /// /// Handles three cases: - /// - Absolute path: returned as-is (not canonicalized) - /// - Relative path (contains directory component): joined with cwd + /// - Absolute path: returned as-is (not canonicalized); if the path is a + /// masquerade wrapper, falls back to resolving the basename via PATH. + /// - Relative path (contains directory component): joined with cwd; same + /// masquerade fallback applies. /// - Bare name: resolved via PATH, skipping masquerade wrapper - /// directories (ccache, distcc, icecc, colorgcc, buildcache) + /// directories (ccache, distcc, icecc, colorgcc, buildcache). /// - /// Returns `None` if the program cannot be found. + /// Returns `None` if the program cannot be found or every resolution + /// lands on a masquerade wrapper with no real compiler past it. fn resolve_program_path(context: &context::Context, value: &str) -> Option { let name = value.trim(); if name.is_empty() { @@ -256,22 +259,36 @@ impl BuildEnvironment { } let path = PathBuf::from(name); + let search_path = context.path().map(|(_, p)| p).unwrap_or_else(|| context.confstr_path.clone()); - // Absolute path: pass through as-is. Do not canonicalize because that - // resolves symlinks, which can change the filename (e.g. /usr/bin/gcc -> - // /usr/bin/x86_64-linux-gnu-gcc-13) and break wrapper name matching. - if path.is_absolute() { - return Some(path); - } + // Absolute path, or relative path with a directory component. We do + // not canonicalize here because that resolves symlinks and can change + // the filename (e.g. /usr/bin/gcc -> /usr/bin/x86_64-linux-gnu-gcc-13), + // breaking wrapper name matching. If the supplied path IS a masquerade + // wrapper, fall back to resolving the basename via PATH so we do not + // register a wrapper that loops (see interception-wrapper-recursion). + let supplied: Option = if path.is_absolute() { + Some(path) + } else if path.parent().is_some_and(|p| !p.as_os_str().is_empty()) { + Some(context.current_directory.join(&path)) + } else { + None + }; - // Relative path with directory component (e.g. "./gcc" or "tools/gcc"): - // resolve against cwd to make it absolute. - if path.parent().is_some_and(|p| !p.as_os_str().is_empty()) { - return Some(context.current_directory.join(&path)); + if let Some(candidate) = supplied { + if !is_masquerade_wrapper(&candidate) { + return Some(candidate); + } + let basename = candidate.file_name().and_then(|n| n.to_str())?; + log::info!( + "resolve: {} is a masquerade wrapper; retrying basename '{}' via PATH", + candidate.display(), + basename, + ); + return resolve_past_masquerade_wrappers(basename, &search_path, &context.current_directory); } // Bare name: resolve via PATH, skipping masquerade wrappers. - let search_path = context.path().map(|(_, p)| p).unwrap_or_else(|| context.confstr_path.clone()); resolve_past_masquerade_wrappers(name, &search_path, &context.current_directory) } @@ -446,17 +463,21 @@ const MASQUERADE_WRAPPERS: &[&str] = &["ccache", "distcc", "icecc", "colorgcc", /// Checks whether `path` is a symlink whose ultimate target's filename is one /// of the known masquerade wrappers. /// -/// Uses `canonicalize` to follow the symlink chain because we only inspect -/// the basename of the final target; the canonicalised path itself is -/// discarded. This must not be used when registering a compiler, since -/// canonicalisation would change the name (e.g. `/usr/bin/gcc` -> -/// `/usr/bin/gcc-13`) and break wrapper lookup. +/// Short-circuits on non-symlinks so iterating every executable in a PATH +/// directory stays cheap. For symlinks, uses `canonicalize` to follow the +/// chain because only the basename of the final target is inspected; the +/// canonicalised path itself is discarded. Callers must not use this helper +/// when registering a compiler -- canonicalisation would change the name +/// (e.g. `/usr/bin/gcc` -> `/usr/bin/gcc-13`) and break wrapper lookup. fn is_masquerade_wrapper(path: &Path) -> bool { + let Ok(meta) = std::fs::symlink_metadata(path) else { return false }; + if !meta.file_type().is_symlink() { + return false; + } let Ok(target) = std::fs::canonicalize(path) else { return false }; let Some(name) = target.file_name().and_then(|n| n.to_str()) else { return false }; let stem = name.strip_suffix(".exe").or_else(|| name.strip_suffix(".EXE")).unwrap_or(name); - let stem_lower = stem.to_ascii_lowercase(); - MASQUERADE_WRAPPERS.iter().any(|w| *w == stem_lower) + MASQUERADE_WRAPPERS.iter().any(|w| w.eq_ignore_ascii_case(stem)) } /// Resolves a bare program name via PATH, transparently skipping masquerade @@ -472,7 +493,17 @@ fn resolve_past_masquerade_wrappers(name: &str, search_path: &str, cwd: &Path) - let mut excluded: Vec = Vec::new(); loop { let current = filter_out_paths(search_path, &excluded); - let found = which::which_in(name, Some(current.as_str()), cwd).ok()?; + let found = match which::which_in(name, Some(current.as_str()), cwd) { + Ok(path) => path, + Err(_) => { + if !excluded.is_empty() { + let dirs = + excluded.iter().map(|p| p.display().to_string()).collect::>().join(", "); + log::warn!("resolve: no real '{}' on PATH past masquerade dir(s): {}", name, dirs,); + } + return None; + } + }; if !is_masquerade_wrapper(&found) { return Some(found); @@ -1382,4 +1413,52 @@ mod test { assert_eq!(found, real_gcc); } } + + /// An explicit CC=/path/to/ccache-masquerade/gcc must not be stored + /// verbatim in the wrapper config -- that would recreate the recursion + /// the masquerade filter is meant to prevent. + // Requirements: interception-wrapper-recursion + #[cfg(unix)] + #[test] + fn resolve_program_path_falls_back_past_masquerade_for_absolute_cc() { + use std::os::unix::fs::PermissionsExt; + use tempfile::TempDir; + + let dir = TempDir::new().unwrap(); + let ccache_bin = dir.path().join("ccache"); + std::fs::write(&ccache_bin, "#!/bin/sh\n").unwrap(); + std::fs::set_permissions(&ccache_bin, std::fs::Permissions::from_mode(0o755)).unwrap(); + + let masq_dir = dir.path().join("masq"); + std::fs::create_dir_all(&masq_dir).unwrap(); + let masq_gcc = masq_dir.join("gcc"); + std::os::unix::fs::symlink(&ccache_bin, &masq_gcc).unwrap(); + + let real_dir = dir.path().join("real"); + std::fs::create_dir_all(&real_dir).unwrap(); + let real_gcc = real_dir.join("gcc"); + std::fs::write(&real_gcc, "#!/bin/sh\n").unwrap(); + std::fs::set_permissions(&real_gcc, std::fs::Permissions::from_mode(0o755)).unwrap(); + + let mut environment = HashMap::new(); + environment.insert( + KEY_OS__PATH.to_string(), + std::env::join_paths([&masq_dir, &real_dir]).unwrap().into_string().unwrap(), + ); + + let context = crate::context::Context { + current_executable: PathBuf::from("/unused"), + current_directory: dir.path().to_path_buf(), + environment, + preload_supported: true, + confstr_path: String::new(), + }; + + let resolved = BuildEnvironment::resolve_program_path(&context, masq_gcc.to_str().unwrap()); + assert_eq!( + resolved.as_deref(), + Some(real_gcc.as_path()), + "absolute CC pointing at a masquerade symlink must fall back to the real compiler on PATH", + ); + } } diff --git a/integration-tests/tests/cases/intercept.rs b/integration-tests/tests/cases/intercept.rs index 2873103a..bb8b2c12 100644 --- a/integration-tests/tests/cases/intercept.rs +++ b/integration-tests/tests/cases/intercept.rs @@ -792,7 +792,10 @@ intercept: db.assert_count(1)?; // Recorded compiler must be a real compiler, not the ccache symlink and - // not Bear's own wrapper in `.bear/`. + // not Bear's own wrapper in `.bear/`. `starts_with` on the exact wrapper + // dir is the tight check (a substring match would false-positive on any + // path that happens to contain `.bear`). + let wrapper_dir = env.test_dir().join(".bear"); for entry in db.entries() { let argv = entry.get("arguments").and_then(|v| v.as_array()); let compiler = argv @@ -800,7 +803,11 @@ intercept: .and_then(|v| v.as_str()) .expect("compilation db entry must have argv[0]"); assert!(!compiler.contains("ccache"), "compilation db must not reference ccache: {compiler}"); - assert!(!compiler.contains(".bear"), "compilation db must not reference Bear wrapper: {compiler}"); + assert!( + !std::path::Path::new(compiler).starts_with(&wrapper_dir), + "compilation db must not reference Bear wrapper at {}: {compiler}", + wrapper_dir.display(), + ); } Ok(()) diff --git a/requirements/interception-wrapper-recursion.md b/requirements/interception-wrapper-recursion.md index 88085070..2d9d923f 100644 --- a/requirements/interception-wrapper-recursion.md +++ b/requirements/interception-wrapper-recursion.md @@ -87,12 +87,22 @@ For each compiler that Bear resolves during wrapper setup (from `CC`/`CXX`/... env vars or PATH discovery), Bear classifies the resolved binary as a masquerade wrapper by: -1. Reading the file as a symbolic link (`read_link`, followed - iteratively -- not `canonicalize`, which resolves too aggressively - and would hide, for example, `/usr/bin/gcc -> gcc-13`). -2. Taking the final target's file name and comparing it, lowercased, - against a fixed set of known wrapper names: `ccache`, `distcc`, - `icecc`, `colorgcc`, `buildcache`. +1. Short-circuiting on non-symlinks so iterating every executable + on PATH stays cheap. +2. For symlinks, following the chain to its ultimate target. Only + the final target's basename is inspected; the resolved path + itself is discarded. Implementation uses `std::fs::canonicalize`; + iterative `read_link` would work equivalently. +3. Comparing that basename, ASCII-case-insensitive, against a fixed + set of known wrapper names: `ccache`, `distcc`, `icecc`, + `colorgcc`, `buildcache`. + +Note that Bear must NOT canonicalise when *registering* a compiler, +because that would change the name (e.g. `/usr/bin/gcc` -> +`/usr/bin/gcc-13`) and break wrapper lookup. The classification +helper is used only to decide whether to keep looking; the path +stored in the wrapper config is whatever PATH resolution returned +past the masquerade dirs. If the match succeeds, the directory containing the resolved binary is flagged as a masquerade directory. The resolution retries with that @@ -158,18 +168,17 @@ Given a compiler that exists only as a masquerade symlink on PATH (no real compiler past it): > When Bear resolves it, -> then Bear logs a warning naming the compiler and the detected -> wrapper, +> then Bear logs a warning naming the compiler and the masquerade +> dir(s) it excluded, > and does not register a `.bear/` wrapper for it, > and the build uses the compiler directly without Bear interception > for that name. -Given a nested compiler invocation (a compiler-driver calls another -bare-name compiler from the child process): - -> When the child invokes `cc -c foo.c`, -> then `.bear/cc` is still first on PATH in the grandchild process, -> so the invocation is intercepted. +Nested compiler invocations (a compiler driver spawning another +bare-name compiler in a grandchild process) must still be +intercepted; that guarantee is not specific to masquerade handling +and is covered by `interception-wrapper-mechanism`. This +requirement preserves it by not modifying the child's PATH. ### CI coverage