diff --git a/bear/src/intercept/environment.rs b/bear/src/intercept/environment.rs index 61daf91b..6f0a72c4 100644 --- a/bear/src/intercept/environment.rs +++ b/bear/src/intercept/environment.rs @@ -243,12 +243,15 @@ impl BuildEnvironment { /// Resolves a program env var value to an absolute executable path. /// /// Handles three cases: - /// - Absolute path: returned as-is (not canonicalized) - /// - Relative path (contains directory component): joined with cwd + /// - Absolute path: returned as-is (not canonicalized); if the path is a + /// masquerade wrapper, falls back to resolving the basename via PATH. + /// - Relative path (contains directory component): joined with cwd; same + /// masquerade fallback applies. /// - Bare name: resolved via PATH, skipping masquerade wrapper - /// directories (ccache, distcc, icecc, colorgcc, buildcache) + /// directories (ccache, distcc, icecc, colorgcc, buildcache). /// - /// Returns `None` if the program cannot be found. + /// Returns `None` if the program cannot be found or every resolution + /// lands on a masquerade wrapper with no real compiler past it. fn resolve_program_path(context: &context::Context, value: &str) -> Option { let name = value.trim(); if name.is_empty() { @@ -256,22 +259,36 @@ impl BuildEnvironment { } let path = PathBuf::from(name); + let search_path = context.path().map(|(_, p)| p).unwrap_or_else(|| context.confstr_path.clone()); - // Absolute path: pass through as-is. Do not canonicalize because that - // resolves symlinks, which can change the filename (e.g. /usr/bin/gcc -> - // /usr/bin/x86_64-linux-gnu-gcc-13) and break wrapper name matching. - if path.is_absolute() { - return Some(path); - } + // Absolute path, or relative path with a directory component. We do + // not canonicalize here because that resolves symlinks and can change + // the filename (e.g. /usr/bin/gcc -> /usr/bin/x86_64-linux-gnu-gcc-13), + // breaking wrapper name matching. If the supplied path IS a masquerade + // wrapper, fall back to resolving the basename via PATH so we do not + // register a wrapper that loops (see interception-wrapper-recursion). + let supplied: Option = if path.is_absolute() { + Some(path) + } else if path.parent().is_some_and(|p| !p.as_os_str().is_empty()) { + Some(context.current_directory.join(&path)) + } else { + None + }; - // Relative path with directory component (e.g. "./gcc" or "tools/gcc"): - // resolve against cwd to make it absolute. - if path.parent().is_some_and(|p| !p.as_os_str().is_empty()) { - return Some(context.current_directory.join(&path)); + if let Some(candidate) = supplied { + if !is_masquerade_wrapper(&candidate) { + return Some(candidate); + } + let basename = candidate.file_name().and_then(|n| n.to_str())?; + log::info!( + "resolve: {} is a masquerade wrapper; retrying basename '{}' via PATH", + candidate.display(), + basename, + ); + return resolve_past_masquerade_wrappers(basename, &search_path, &context.current_directory); } // Bare name: resolve via PATH, skipping masquerade wrappers. - let search_path = context.path().map(|(_, p)| p).unwrap_or_else(|| context.confstr_path.clone()); resolve_past_masquerade_wrappers(name, &search_path, &context.current_directory) } @@ -446,17 +463,21 @@ const MASQUERADE_WRAPPERS: &[&str] = &["ccache", "distcc", "icecc", "colorgcc", /// Checks whether `path` is a symlink whose ultimate target's filename is one /// of the known masquerade wrappers. /// -/// Uses `canonicalize` to follow the symlink chain because we only inspect -/// the basename of the final target; the canonicalised path itself is -/// discarded. This must not be used when registering a compiler, since -/// canonicalisation would change the name (e.g. `/usr/bin/gcc` -> -/// `/usr/bin/gcc-13`) and break wrapper lookup. +/// Short-circuits on non-symlinks so iterating every executable in a PATH +/// directory stays cheap. For symlinks, uses `canonicalize` to follow the +/// chain because only the basename of the final target is inspected; the +/// canonicalised path itself is discarded. Callers must not use this helper +/// when registering a compiler -- canonicalisation would change the name +/// (e.g. `/usr/bin/gcc` -> `/usr/bin/gcc-13`) and break wrapper lookup. fn is_masquerade_wrapper(path: &Path) -> bool { + let Ok(meta) = std::fs::symlink_metadata(path) else { return false }; + if !meta.file_type().is_symlink() { + return false; + } let Ok(target) = std::fs::canonicalize(path) else { return false }; let Some(name) = target.file_name().and_then(|n| n.to_str()) else { return false }; let stem = name.strip_suffix(".exe").or_else(|| name.strip_suffix(".EXE")).unwrap_or(name); - let stem_lower = stem.to_ascii_lowercase(); - MASQUERADE_WRAPPERS.iter().any(|w| *w == stem_lower) + MASQUERADE_WRAPPERS.iter().any(|w| w.eq_ignore_ascii_case(stem)) } /// Resolves a bare program name via PATH, transparently skipping masquerade @@ -472,7 +493,17 @@ fn resolve_past_masquerade_wrappers(name: &str, search_path: &str, cwd: &Path) - let mut excluded: Vec = Vec::new(); loop { let current = filter_out_paths(search_path, &excluded); - let found = which::which_in(name, Some(current.as_str()), cwd).ok()?; + let found = match which::which_in(name, Some(current.as_str()), cwd) { + Ok(path) => path, + Err(_) => { + if !excluded.is_empty() { + let dirs = + excluded.iter().map(|p| p.display().to_string()).collect::>().join(", "); + log::warn!("resolve: no real '{}' on PATH past masquerade dir(s): {}", name, dirs,); + } + return None; + } + }; if !is_masquerade_wrapper(&found) { return Some(found); @@ -1382,4 +1413,52 @@ mod test { assert_eq!(found, real_gcc); } } + + /// An explicit CC=/path/to/ccache-masquerade/gcc must not be stored + /// verbatim in the wrapper config -- that would recreate the recursion + /// the masquerade filter is meant to prevent. + // Requirements: interception-wrapper-recursion + #[cfg(unix)] + #[test] + fn resolve_program_path_falls_back_past_masquerade_for_absolute_cc() { + use std::os::unix::fs::PermissionsExt; + use tempfile::TempDir; + + let dir = TempDir::new().unwrap(); + let ccache_bin = dir.path().join("ccache"); + std::fs::write(&ccache_bin, "#!/bin/sh\n").unwrap(); + std::fs::set_permissions(&ccache_bin, std::fs::Permissions::from_mode(0o755)).unwrap(); + + let masq_dir = dir.path().join("masq"); + std::fs::create_dir_all(&masq_dir).unwrap(); + let masq_gcc = masq_dir.join("gcc"); + std::os::unix::fs::symlink(&ccache_bin, &masq_gcc).unwrap(); + + let real_dir = dir.path().join("real"); + std::fs::create_dir_all(&real_dir).unwrap(); + let real_gcc = real_dir.join("gcc"); + std::fs::write(&real_gcc, "#!/bin/sh\n").unwrap(); + std::fs::set_permissions(&real_gcc, std::fs::Permissions::from_mode(0o755)).unwrap(); + + let mut environment = HashMap::new(); + environment.insert( + KEY_OS__PATH.to_string(), + std::env::join_paths([&masq_dir, &real_dir]).unwrap().into_string().unwrap(), + ); + + let context = crate::context::Context { + current_executable: PathBuf::from("/unused"), + current_directory: dir.path().to_path_buf(), + environment, + preload_supported: true, + confstr_path: String::new(), + }; + + let resolved = BuildEnvironment::resolve_program_path(&context, masq_gcc.to_str().unwrap()); + assert_eq!( + resolved.as_deref(), + Some(real_gcc.as_path()), + "absolute CC pointing at a masquerade symlink must fall back to the real compiler on PATH", + ); + } } diff --git a/integration-tests/tests/cases/intercept.rs b/integration-tests/tests/cases/intercept.rs index 2873103a..bb8b2c12 100644 --- a/integration-tests/tests/cases/intercept.rs +++ b/integration-tests/tests/cases/intercept.rs @@ -792,7 +792,10 @@ intercept: db.assert_count(1)?; // Recorded compiler must be a real compiler, not the ccache symlink and - // not Bear's own wrapper in `.bear/`. + // not Bear's own wrapper in `.bear/`. `starts_with` on the exact wrapper + // dir is the tight check (a substring match would false-positive on any + // path that happens to contain `.bear`). + let wrapper_dir = env.test_dir().join(".bear"); for entry in db.entries() { let argv = entry.get("arguments").and_then(|v| v.as_array()); let compiler = argv @@ -800,7 +803,11 @@ intercept: .and_then(|v| v.as_str()) .expect("compilation db entry must have argv[0]"); assert!(!compiler.contains("ccache"), "compilation db must not reference ccache: {compiler}"); - assert!(!compiler.contains(".bear"), "compilation db must not reference Bear wrapper: {compiler}"); + assert!( + !std::path::Path::new(compiler).starts_with(&wrapper_dir), + "compilation db must not reference Bear wrapper at {}: {compiler}", + wrapper_dir.display(), + ); } Ok(()) diff --git a/requirements/interception-wrapper-recursion.md b/requirements/interception-wrapper-recursion.md index 88085070..2d9d923f 100644 --- a/requirements/interception-wrapper-recursion.md +++ b/requirements/interception-wrapper-recursion.md @@ -87,12 +87,22 @@ For each compiler that Bear resolves during wrapper setup (from `CC`/`CXX`/... env vars or PATH discovery), Bear classifies the resolved binary as a masquerade wrapper by: -1. Reading the file as a symbolic link (`read_link`, followed - iteratively -- not `canonicalize`, which resolves too aggressively - and would hide, for example, `/usr/bin/gcc -> gcc-13`). -2. Taking the final target's file name and comparing it, lowercased, - against a fixed set of known wrapper names: `ccache`, `distcc`, - `icecc`, `colorgcc`, `buildcache`. +1. Short-circuiting on non-symlinks so iterating every executable + on PATH stays cheap. +2. For symlinks, following the chain to its ultimate target. Only + the final target's basename is inspected; the resolved path + itself is discarded. Implementation uses `std::fs::canonicalize`; + iterative `read_link` would work equivalently. +3. Comparing that basename, ASCII-case-insensitive, against a fixed + set of known wrapper names: `ccache`, `distcc`, `icecc`, + `colorgcc`, `buildcache`. + +Note that Bear must NOT canonicalise when *registering* a compiler, +because that would change the name (e.g. `/usr/bin/gcc` -> +`/usr/bin/gcc-13`) and break wrapper lookup. The classification +helper is used only to decide whether to keep looking; the path +stored in the wrapper config is whatever PATH resolution returned +past the masquerade dirs. If the match succeeds, the directory containing the resolved binary is flagged as a masquerade directory. The resolution retries with that @@ -158,18 +168,17 @@ Given a compiler that exists only as a masquerade symlink on PATH (no real compiler past it): > When Bear resolves it, -> then Bear logs a warning naming the compiler and the detected -> wrapper, +> then Bear logs a warning naming the compiler and the masquerade +> dir(s) it excluded, > and does not register a `.bear/` wrapper for it, > and the build uses the compiler directly without Bear interception > for that name. -Given a nested compiler invocation (a compiler-driver calls another -bare-name compiler from the child process): - -> When the child invokes `cc -c foo.c`, -> then `.bear/cc` is still first on PATH in the grandchild process, -> so the invocation is intercepted. +Nested compiler invocations (a compiler driver spawning another +bare-name compiler in a grandchild process) must still be +intercepted; that guarantee is not specific to masquerade handling +and is covered by `interception-wrapper-mechanism`. This +requirement preserves it by not modifying the child's PATH. ### CI coverage