diff --git a/.github/workflows/build_rust.yml b/.github/workflows/build_rust.yml deleted file mode 100644 index bcc211a9..00000000 --- a/.github/workflows/build_rust.yml +++ /dev/null @@ -1,44 +0,0 @@ -name: rust CI - -on: - push: - pull_request: - -env: - CARGO_TERM_COLOR: always - -jobs: - lint: - name: Lint - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - run: rustup component add clippy && rustup update stable && rustup default stable - - run: cd rust && cargo clippy - compile: - name: Compile - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - run: rustup update stable && rustup default stable - - run: cd rust && cargo check --verbose - test: - name: Test - strategy: - matrix: - os: - - ubuntu-latest - - windows-latest - - macOS-latest - toolchain: - - stable - - beta - - nightly - runs-on: ${{ matrix.os }} - needs: [compile] - steps: - - uses: actions/checkout@v3 - - run: rustup update ${{ matrix.toolchain }} && rustup default ${{ matrix.toolchain }} - - run: cd rust && cargo build --verbose - - run: cd rust && cargo test --verbose - diff --git a/rust/Cargo.toml b/rust/Cargo.toml deleted file mode 100644 index a1eaa9ce..00000000 --- a/rust/Cargo.toml +++ /dev/null @@ -1,34 +0,0 @@ -# SPDX-License-Identifier: GPL-3.0-or-later - -[workspace] -members = [ - "bear" -] -resolver = "2" - -[workspace.dependencies] -thiserror = "2.0" -anyhow = "1.0" -serde = { version = "1.0", default-features = false, features = ["derive"] } -serde_json = { version = "1.0", default-features = false, features = ["std"] } -serde_yml = "0.0" -clap = { version = "4.5", default-features = false, features = ["std", "cargo", "help", "usage", "suggestions"] } -chrono = { version = "0.4", default-features = false, features = ["std", "clock"] } -log = { version = "0.4", default-features = false, features = ["std"] } -env_logger = { version = "0.11", default-features = false, features = ["humantime"]} -rand = { version = "0.9", default-features = false, features = ["std", "thread_rng"] } -path-absolutize = "3.1" -directories = "6.0" -nom = { version = "7.1", default-features = false, features = ["std"] } -regex = "1.9" -shell-words = "1.1" -tempfile = "3.13" -signal-hook = { version = "0.3", default-features = false } - -[workspace.package] -version = "4.0.0" -authors = ["László Nagy "] -repository = "https://github.com/rizsotto/Bear" -homepage = "https://github.com/rizsotto/Bear" -license = "GPL-3" -edition = "2021" diff --git a/rust/README.md b/rust/README.md deleted file mode 100644 index 6980203c..00000000 --- a/rust/README.md +++ /dev/null @@ -1,37 +0,0 @@ -# What's this? - -This is a rust rewrite of the current master branch of this project. - -# Why? - -The current master branch is written in C++ and is not very well written. -I want to rewrite it in rust to make it more maintainable and easier to work with. - -## What's wrong with the current codebase? - -- The idea of disabling exception handling and using Rust-like result values is sound, - but the implementation could be improved. -- The use of CMake as a build tool has caused several issues, - including poor handling of third-party libraries and subprojects. -- Some dependencies are problematic: - - Not all of them are available on all platforms. - - Updating them can be challenging. - -## What are the benefits of rewriting the project in Rust? - -- Easy porting of the project to other platforms, including Windows -- Improved maintainability through the use of third-party libraries - and better development tooling - -# How? - -The `3.x` version will be the last version of the C++ codebase. -The `4.x` version will be the first version of the rust codebase. - -The `master` branch will be kept as the main release branch. -And the rust codebase will be developed on the `master` branch, -but it will be kept in a separate directory. - -# When? - -I will work on this project in my free time (as before). diff --git a/rust/bear/Cargo.toml b/rust/bear/Cargo.toml deleted file mode 100644 index 70539df6..00000000 --- a/rust/bear/Cargo.toml +++ /dev/null @@ -1,49 +0,0 @@ -# SPDX-License-Identifier: GPL-3.0-or-later - -[package] -name = "bear" -description = "Bear is a tool that generates a compilation database for clang tooling." -keywords = ["clang", "clang-tooling", "compilation-database"] -version.workspace = true -authors.workspace = true -repository.workspace = true -homepage.workspace = true -license.workspace = true -edition.workspace = true - -[lib] -name = "bear" -path = "src/lib.rs" - -[[bin]] -name = "bear" -path = "src/bin/bear.rs" - -[[bin]] -name = "wrapper" -path = "src/bin/wrapper.rs" - -[dependencies] -thiserror.workspace = true -anyhow.workspace = true -serde.workspace = true -serde_json.workspace = true -serde_yml.workspace = true -clap.workspace = true -directories.workspace = true -chrono.workspace = true -log.workspace = true -env_logger.workspace = true -path-absolutize.workspace = true -shell-words.workspace = true -nom.workspace = true -regex.workspace = true -rand.workspace = true -tempfile.workspace = true -signal-hook.workspace = true - -[profile.release] -strip = true -lto = true -opt-level = 3 -codegen-units = 1 \ No newline at end of file diff --git a/rust/bear/build.rs b/rust/bear/build.rs deleted file mode 100644 index 6da3e653..00000000 --- a/rust/bear/build.rs +++ /dev/null @@ -1,6 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -fn main() { - println!("cargo:rustc-env=WRAPPER_EXECUTABLE_PATH=/usr/libexec/bear/wrapper"); - println!("cargo:rustc-env=PRELOAD_LIBRARY_PATH=/usr/libexec/bear/$LIB/libexec.so"); -} diff --git a/rust/bear/src/args.rs b/rust/bear/src/args.rs deleted file mode 100644 index 62ed87c0..00000000 --- a/rust/bear/src/args.rs +++ /dev/null @@ -1,373 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -//! This module contains the command line interface of the application. -//! -//! The command line parsing is implemented using the `clap` library. -//! The module is defining types to represent a structured form of the -//! program invocation. The `Arguments` type is used to represent all -//! possible invocations of the program. - -use anyhow::anyhow; -use clap::{arg, command, ArgAction, ArgMatches, Command}; - -/// Common constants used in the module. -const MODE_INTERCEPT_SUBCOMMAND: &str = "intercept"; -const MODE_SEMANTIC_SUBCOMMAND: &str = "semantic"; -const DEFAULT_OUTPUT_FILE: &str = "compile_commands.json"; -const DEFAULT_EVENT_FILE: &str = "events.json"; - -/// Represents the command line arguments of the application. -#[derive(Debug, PartialEq)] -pub struct Arguments { - // The path of the configuration file. - pub config: Option, - // The mode of the application. - pub mode: Mode, -} - -/// Represents the mode of the application. -#[derive(Debug, PartialEq)] -pub enum Mode { - Intercept { - input: BuildCommand, - output: BuildEvents, - }, - Semantic { - input: BuildEvents, - output: BuildSemantic, - }, - Combined { - input: BuildCommand, - output: BuildSemantic, - }, -} - -/// Represents the execution of a command. -#[derive(Debug, PartialEq)] -pub struct BuildCommand { - pub arguments: Vec, -} - -#[derive(Debug, PartialEq)] -pub struct BuildSemantic { - pub file_name: String, - pub append: bool, -} - -#[derive(Debug, PartialEq)] -pub struct BuildEvents { - pub file_name: String, -} - -impl TryFrom for Arguments { - type Error = anyhow::Error; - - fn try_from(matches: ArgMatches) -> Result { - let config = matches.get_one::("config").map(String::to_string); - - match matches.subcommand() { - Some((MODE_INTERCEPT_SUBCOMMAND, intercept_matches)) => { - let input = BuildCommand::try_from(intercept_matches)?; - let output = intercept_matches - .get_one::("output") - .map(String::to_string) - .expect("output is defaulted"); - - // let output = BuildEvents::try_from(intercept_matches)?; - let mode = Mode::Intercept { - input, - output: BuildEvents { file_name: output }, - }; - let arguments = Arguments { config, mode }; - Ok(arguments) - } - Some((MODE_SEMANTIC_SUBCOMMAND, semantic_matches)) => { - let input = semantic_matches - .get_one::("input") - .map(String::to_string) - .expect("input is defaulted"); - - let output = BuildSemantic::try_from(semantic_matches)?; - let mode = Mode::Semantic { - input: BuildEvents { file_name: input }, - output, - }; - let arguments = Arguments { config, mode }; - Ok(arguments) - } - None => { - let input = BuildCommand::try_from(&matches)?; - let output = BuildSemantic::try_from(&matches)?; - let mode = Mode::Combined { input, output }; - let arguments = Arguments { config, mode }; - Ok(arguments) - } - _ => Err(anyhow!("unrecognized subcommand")), - } - } -} - -impl TryFrom<&ArgMatches> for BuildCommand { - type Error = anyhow::Error; - - fn try_from(matches: &ArgMatches) -> Result { - let arguments = matches - .get_many("COMMAND") - .expect("missing build command") - .cloned() - .collect(); - Ok(BuildCommand { arguments }) - } -} - -impl TryFrom<&ArgMatches> for BuildSemantic { - type Error = anyhow::Error; - - fn try_from(matches: &ArgMatches) -> Result { - let file_name = matches - .get_one::("output") - .map(String::to_string) - .expect("output is defaulted"); - let append = *matches.get_one::("append").unwrap_or(&false); - Ok(BuildSemantic { file_name, append }) - } -} - -/// Represents the command line interface of the application. -/// -/// This describes how the user can interact with the application. -/// The different modes of the application are represented as subcommands. -/// The application can be run in intercept mode, semantic mode, or the -/// default mode where both intercept and semantic are executed. -pub fn cli() -> Command { - command!() - .subcommand_required(false) - .subcommand_negates_reqs(true) - .subcommand_precedence_over_arg(true) - .arg_required_else_help(true) - .args(&[ - arg!(-v --verbose ... "Sets the level of verbosity").action(ArgAction::Count), - arg!(-c --config "Path of the config file"), - ]) - .subcommand( - Command::new(MODE_INTERCEPT_SUBCOMMAND) - .about("intercepts command execution") - .args(&[ - arg!( "Build command") - .action(ArgAction::Append) - .value_terminator("--") - .num_args(1..) - .last(true) - .required(true), - arg!(-o --output "Path of the event file") - .default_value(DEFAULT_EVENT_FILE) - .hide_default_value(false), - ]) - .arg_required_else_help(true), - ) - .subcommand( - Command::new(MODE_SEMANTIC_SUBCOMMAND) - .about("detect semantics of command executions") - .args(&[ - arg!(-i --input "Path of the event file") - .default_value(DEFAULT_EVENT_FILE) - .hide_default_value(false), - arg!(-o --output "Path of the result file") - .default_value(DEFAULT_OUTPUT_FILE) - .hide_default_value(false), - arg!(-a --append "Append result to an existing output file") - .action(ArgAction::SetTrue), - ]) - .arg_required_else_help(false), - ) - .args(&[ - arg!( "Build command") - .action(ArgAction::Append) - .value_terminator("--") - .num_args(1..) - .last(true) - .required(true), - arg!(-o --output "Path of the result file") - .default_value(DEFAULT_OUTPUT_FILE) - .hide_default_value(false), - arg!(-a --append "Append result to an existing output file").action(ArgAction::SetTrue), - ]) -} - -#[cfg(test)] -mod test { - use super::*; - use crate::vec_of_strings; - - #[test] - fn test_intercept_call() { - let execution = vec![ - "bear", - "-c", - "~/bear.yaml", - "intercept", - "-o", - "custom.json", - "--", - "make", - "all", - ]; - - let matches = cli().get_matches_from(execution); - let arguments = Arguments::try_from(matches).unwrap(); - - assert_eq!( - arguments, - Arguments { - config: Some("~/bear.yaml".to_string()), - mode: Mode::Intercept { - input: BuildCommand { - arguments: vec_of_strings!["make", "all"] - }, - output: BuildEvents { - file_name: "custom.json".to_string() - }, - }, - } - ); - } - - #[test] - fn test_intercept_defaults() { - let execution = vec!["bear", "intercept", "--", "make", "all"]; - - let matches = cli().get_matches_from(execution); - let arguments = Arguments::try_from(matches).unwrap(); - - assert_eq!( - arguments, - Arguments { - config: None, - mode: Mode::Intercept { - input: BuildCommand { - arguments: vec_of_strings!["make", "all"] - }, - output: BuildEvents { - file_name: "events.json".to_string() - }, - }, - } - ); - } - - #[test] - fn test_semantic_call() { - let execution = vec![ - "bear", - "-c", - "~/bear.yaml", - "semantic", - "-i", - "custom.json", - "-o", - "result.json", - "-a", - ]; - - let matches = cli().get_matches_from(execution); - let arguments = Arguments::try_from(matches).unwrap(); - - assert_eq!( - arguments, - Arguments { - config: Some("~/bear.yaml".to_string()), - mode: Mode::Semantic { - input: BuildEvents { - file_name: "custom.json".to_string() - }, - output: BuildSemantic { - file_name: "result.json".to_string(), - append: true - }, - }, - } - ); - } - - #[test] - fn test_semantic_defaults() { - let execution = vec!["bear", "semantic"]; - - let matches = cli().get_matches_from(execution); - let arguments = Arguments::try_from(matches).unwrap(); - - assert_eq!( - arguments, - Arguments { - config: None, - mode: Mode::Semantic { - input: BuildEvents { - file_name: "events.json".to_string() - }, - output: BuildSemantic { - file_name: "compile_commands.json".to_string(), - append: false - }, - }, - } - ); - } - - #[test] - fn test_all_call() { - let execution = vec![ - "bear", - "-c", - "~/bear.yaml", - "-o", - "result.json", - "-a", - "--", - "make", - "all", - ]; - - let matches = cli().get_matches_from(execution); - let arguments = Arguments::try_from(matches).unwrap(); - - assert_eq!( - arguments, - Arguments { - config: Some("~/bear.yaml".to_string()), - mode: Mode::Combined { - input: BuildCommand { - arguments: vec_of_strings!["make", "all"] - }, - output: BuildSemantic { - file_name: "result.json".to_string(), - append: true - }, - }, - } - ); - } - - #[test] - fn test_all_defaults() { - let execution = vec!["bear", "--", "make", "all"]; - - let matches = cli().get_matches_from(execution); - let arguments = Arguments::try_from(matches).unwrap(); - - assert_eq!( - arguments, - Arguments { - config: None, - mode: Mode::Combined { - input: BuildCommand { - arguments: vec_of_strings!["make", "all"] - }, - output: BuildSemantic { - file_name: "compile_commands.json".to_string(), - append: false - }, - }, - } - ); - } -} diff --git a/rust/bear/src/bin/bear.rs b/rust/bear/src/bin/bear.rs deleted file mode 100644 index 14378dea..00000000 --- a/rust/bear/src/bin/bear.rs +++ /dev/null @@ -1,76 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -use bear::modes::{Combined, Intercept, Mode, Semantic}; -use bear::{args, config}; -use std::env; -use std::process::ExitCode; - -/// Driver function of the application. -fn main() -> anyhow::Result { - // Initialize the logging system. - env_logger::init(); - // Get the package name and version from Cargo - let pkg_name = env!("CARGO_PKG_NAME"); - let pkg_version = env!("CARGO_PKG_VERSION"); - log::debug!("{} v{}", pkg_name, pkg_version); - - // Parse the command line arguments. - let matches = args::cli().get_matches(); - let arguments = args::Arguments::try_from(matches)?; - - // Print the arguments. - log::debug!("Arguments: {:?}", arguments); - // Load the configuration. - let configuration = config::Main::load(&arguments.config)?; - log::debug!("Configuration: {:?}", configuration); - - // Run the application. - let application = Application::configure(arguments, configuration)?; - let result = application.run(); - log::debug!("Exit code: {:?}", result); - - Ok(result) -} - -/// Represent the application state. -enum Application { - Intercept(Intercept), - Semantic(Semantic), - Combined(Combined), -} - -impl Application { - /// Configure the application based on the command line arguments and the configuration. - /// - /// Trying to validate the configuration and the arguments, while creating the application - /// state that will be used by the `run` method. Trying to catch problems early before - /// the actual execution of the application. - fn configure(args: args::Arguments, config: config::Main) -> anyhow::Result { - match args.mode { - args::Mode::Intercept { input, output } => { - log::debug!("Mode: intercept"); - Intercept::from(input, output, config).map(Application::Intercept) - } - args::Mode::Semantic { input, output } => { - log::debug!("Mode: semantic analysis"); - Semantic::from(input, output, config).map(Application::Semantic) - } - args::Mode::Combined { input, output } => { - log::debug!("Mode: intercept and semantic analysis"); - Combined::from(input, output, config).map(Application::Combined) - } - } - } - - fn run(self) -> ExitCode { - let status = match self { - Application::Intercept(intercept) => intercept.run(), - Application::Semantic(semantic) => semantic.run(), - Application::Combined(all) => all.run(), - }; - status.unwrap_or_else(|error| { - log::error!("Bear: {}", error); - ExitCode::FAILURE - }) - } -} diff --git a/rust/bear/src/bin/wrapper.rs b/rust/bear/src/bin/wrapper.rs deleted file mode 100644 index 5dfb4207..00000000 --- a/rust/bear/src/bin/wrapper.rs +++ /dev/null @@ -1,124 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -//! This module implements a wrapper around an arbitrary executable. -//! -//! The wrapper is used to intercept the execution of the executable and -//! report it to a remote server. The wrapper is named after the executable -//! via a soft link (or a hard copy on platforms where soft links are not -//! supported). The wrapper process is called instead of the original executable. -//! This is arranged by the process that supervise the build process. -//! The build supervisor creates a directory with soft links and place -//! that directory at the beginning of the PATH variable. Which guarantees -//! that the wrapper is called instead of the original executable. -//! -//! The wrapper reads the PATH variable and finds the next executable with -//! the same name as the wrapper. It reports the execution of the real -//! executable and then calls the real executable with the same arguments. - -extern crate core; - -use anyhow::{Context, Result}; -use bear::intercept::supervise::supervise; -use bear::intercept::tcp::ReporterOnTcp; -use bear::intercept::Reporter; -use bear::intercept::KEY_DESTINATION; -use bear::intercept::{Event, Execution, ProcessId}; -use std::path::{Path, PathBuf}; - -/// Implementation of the wrapper process. -/// -/// The process exit code is the same as the executed process exit code. -/// Besides the functionality described in the module documentation, the -/// wrapper process logs the execution and the relevant steps leading to -/// the execution. -fn main() -> Result<()> { - env_logger::init(); - // Find out what is the executable name the execution was started with - let executable = file_name_from_arguments()?; - log::info!("Executable as called: {:?}", executable); - // Read the PATH variable and find the next executable with the same name - let real_executable = next_in_path(&executable)?; - log::info!("Executable to call: {:?}", real_executable); - - // Reporting failures shall not fail the execution. - match into_execution(&real_executable).and_then(report) { - Ok(_) => log::info!("Execution reported"), - Err(e) => log::error!("Execution reporting failed: {}", e), - } - - // Execute the real executable with the same arguments - let mut command = std::process::Command::new(real_executable); - let exit_status = supervise(command.args(std::env::args().skip(1)))?; - log::info!("Execution finished with status: {:?}", exit_status); - // Return the child process status code - std::process::exit(exit_status.code().unwrap_or(1)); -} - -/// Get the file name of the executable from the arguments. -/// -/// Since the executable will be called via soft link, the first argument -/// will be the name of the soft link. This function returns the file name -/// of the soft link. -fn file_name_from_arguments() -> Result { - std::env::args() - .next() - .ok_or_else(|| anyhow::anyhow!("Cannot get first argument")) - .and_then(|arg| match PathBuf::from(arg).file_name() { - Some(file_name) => Ok(PathBuf::from(file_name)), - None => Err(anyhow::anyhow!( - "Cannot get the file name from the argument" - )), - }) -} - -/// Find the next executable in the PATH variable. -/// -/// The function reads the PATH variable and tries to find the next executable -/// with the same name as the given executable. It returns the path to the -/// executable. -fn next_in_path(target: &Path) -> Result { - let path = std::env::var("PATH")?; - log::debug!("PATH: {}", path); - // The `current_exe` is a canonical path to the current executable. - let current_exe = std::env::current_exe()?; - - path.split(':') - .map(|dir| Path::new(dir).join(target)) - .filter(|path| path.is_file()) - .find(|path| { - // We need to compare it with the real path of the candidate executable to avoid - // calling the same executable again. - let real_path = match path.canonicalize() { - Ok(path) => path, - Err(_) => return false, - }; - real_path != current_exe - }) - .ok_or_else(|| anyhow::anyhow!("Cannot find the real executable")) -} - -fn report(execution: Execution) -> Result<()> { - let event = Event { - pid: ProcessId(std::process::id()), - execution, - }; - - // Get the reporter address from the environment - std::env::var(KEY_DESTINATION) - .with_context(|| format!("${} is missing from the environment", KEY_DESTINATION)) - // Create a new reporter - .and_then(ReporterOnTcp::new) - .with_context(|| "Cannot create TCP execution reporter") - // Report the execution - .and_then(|reporter| reporter.report(event)) - .with_context(|| "Sending execution failed") -} - -fn into_execution(path_buf: &Path) -> Result { - Ok(Execution { - executable: path_buf.to_path_buf(), - arguments: std::env::args().collect(), - working_dir: std::env::current_dir()?, - environment: std::env::vars().collect(), - }) -} diff --git a/rust/bear/src/config.rs b/rust/bear/src/config.rs deleted file mode 100644 index 301150fd..00000000 --- a/rust/bear/src/config.rs +++ /dev/null @@ -1,1392 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -//! This module defines the configuration of the application. -//! -//! The configuration is either loaded from a file or used with the default -//! values, which are defined in the code. The configuration exposes the main -//! logical steps that the application will follow. -//! -//! The configuration file syntax is based on the YAML format. -//! The default configuration file name is `bear.yml`. -//! -//! The configuration file location is searched in the following order: -//! - The current working directory. -//! - The local configuration directory of the user. -//! - The configuration directory of the user. -//! - The local configuration directory of the application. -//! - The configuration directory of the application. -//! -//! The configuration file content is validated against the schema version, -//! syntax and semantic constraints. If the configuration file is invalid, -//! the application will exit with an error message explaining the issue. -//! -//! ```yaml -//! schema: 4.0 -//! -//! intercept: -//! mode: wrapper -//! directory: /tmp -//! executables: -//! - /usr/bin/cc -//! - /usr/bin/c++ -//! - /usr/bin/clang -//! - /usr/bin/clang++ -//! output: -//! specification: clang -//! compilers: -//! - path: /usr/local/bin/cc -//! ignore: always -//! - path: /usr/bin/cc -//! ignore: never -//! - path: /usr/bin/c++ -//! ignore: conditional -//! arguments: -//! match: -//! - -### -//! - path: /usr/bin/clang -//! ignore: never -//! arguments: -//! add: -//! - -DDEBUG -//! remove: -//! - -Wall -//! - path: /usr/bin/clang++ -//! arguments: -//! remove: -//! - -Wall -//! sources: -//! only_existing_files: true -//! paths: -//! - path: /opt/project/sources -//! ignore: never -//! - path: /opt/project/tests -//! ignore: always -//! duplicates: -//! by_fields: -//! - file -//! - directory -//! format: -//! command_as_array: true -//! drop_output_field: false -//! paths_as: canonical -//! ``` -//! -//! ```yaml -//! schema: 4.0 -//! -//! intercept: -//! mode: preload -//! output: -//! specification: bear -//! ``` - -use std::fs::OpenOptions; -use std::path::{Path, PathBuf}; - -use crate::config::validation::Validate; -use anyhow::{Context, Result}; -use directories::{BaseDirs, ProjectDirs}; -use log::{debug, info}; -use serde::{Deserialize, Serialize}; - -const SUPPORTED_SCHEMA_VERSION: &str = "4.0"; -const PRELOAD_LIBRARY_PATH: &str = env!("PRELOAD_LIBRARY_PATH"); -const WRAPPER_EXECUTABLE_PATH: &str = env!("WRAPPER_EXECUTABLE_PATH"); - -/// Represents the application configuration. -#[derive(Debug, PartialEq, Deserialize, Serialize)] -pub struct Main { - #[serde(deserialize_with = "validate_schema_version")] - pub schema: String, - #[serde(default)] - pub intercept: Intercept, - #[serde(default)] - pub output: Output, -} - -impl Main { - /// Loads the configuration from the specified file or the default locations. - /// - /// If the configuration file is specified, it will be used. Otherwise, the default locations - /// will be searched for the configuration file. If the configuration file is not found, the - /// default configuration will be returned. - pub fn load(file: &Option) -> Result { - if let Some(path) = file { - // If the configuration file is specified, use it. - let config_file_path = PathBuf::from(path); - Self::from_file(config_file_path.as_path()) - } else { - // Otherwise, try to find the configuration file in the default locations. - let locations = Self::file_locations(); - for location in locations { - debug!("Checking configuration file: {}", location.display()); - if location.exists() { - return Self::from_file(location.as_path()); - } - } - // If the configuration file is not found, return the default configuration. - debug!("Configuration file not found. Using the default configuration."); - Ok(Self::default()) - } - } - - /// The default locations where the configuration file can be found. - /// - /// The locations are searched in the following order: - /// - The current working directory. - /// - The local configuration directory of the user. - /// - The configuration directory of the user. - /// - The local configuration directory of the application. - /// - The configuration directory of the application. - fn file_locations() -> Vec { - let mut locations = Vec::new(); - - if let Ok(current_dir) = std::env::current_dir() { - locations.push(current_dir); - } - if let Some(base_dirs) = BaseDirs::new() { - locations.push(base_dirs.config_local_dir().to_path_buf()); - locations.push(base_dirs.config_dir().to_path_buf()); - } - - if let Some(proj_dirs) = ProjectDirs::from("com.github", "rizsotto", "Bear") { - locations.push(proj_dirs.config_local_dir().to_path_buf()); - locations.push(proj_dirs.config_dir().to_path_buf()); - } - // filter out duplicate elements from the list - locations.dedup(); - // append the default configuration file name to the locations - locations.iter().map(|p| p.join("bear.yml")).collect() - } - - /// Loads the configuration from the specified file. - pub fn from_file(file: &Path) -> Result { - info!("Loading configuration file: {}", file.display()); - - let reader = OpenOptions::new() - .read(true) - .open(file) - .with_context(|| format!("Failed to open configuration file: {:?}", file))?; - - let content: Self = Self::from_reader(reader) - .with_context(|| format!("Failed to parse configuration from file: {:?}", file))?; - - content.validate() - } - - /// Define the deserialization format of the config file. - fn from_reader(rdr: R) -> serde_yml::Result - where - R: std::io::Read, - T: serde::de::DeserializeOwned, - { - serde_yml::from_reader(rdr) - } -} - -impl Default for Main { - fn default() -> Self { - Main { - schema: String::from(SUPPORTED_SCHEMA_VERSION), - intercept: Intercept::default(), - output: Output::default(), - } - } -} - -/// Intercept configuration is either a wrapper or a preload mode. -/// -/// In wrapper mode, the compiler is wrapped with a script that intercepts the compiler calls. -/// The configuration for that is capturing the directory where the wrapper scripts are stored -/// and the list of executables to wrap. -/// -/// In preload mode, the compiler is intercepted by a shared library that is preloaded before -/// the compiler is executed. The configuration for that is the path to the shared library. -#[derive(Debug, PartialEq, Deserialize, Serialize)] -#[serde(tag = "mode")] -pub enum Intercept { - #[serde(rename = "wrapper")] - Wrapper { - #[serde(default = "default_wrapper_executable")] - path: PathBuf, - #[serde(default = "default_wrapper_directory")] - directory: PathBuf, - executables: Vec, - }, - #[serde(rename = "preload")] - Preload { - #[serde(default = "default_preload_library")] - path: PathBuf, - }, -} - -/// The default intercept mode is varying based on the target operating system. -impl Default for Intercept { - #[cfg(any( - target_os = "linux", - target_os = "freebsd", - target_os = "netbsd", - target_os = "openbsd", - target_os = "dragonfly" - ))] - fn default() -> Self { - Intercept::Preload { - path: default_preload_library(), - } - } - - #[cfg(any(target_os = "macos", target_os = "ios"))] - fn default() -> Self { - Intercept::Wrapper { - path: default_wrapper_executable(), - directory: default_wrapper_directory(), - executables: vec![ - PathBuf::from("/usr/bin/cc"), - PathBuf::from("/usr/bin/c++"), - PathBuf::from("/usr/bin/clang"), - PathBuf::from("/usr/bin/clang++"), - ], - } - } - - #[cfg(target_os = "windows")] - fn default() -> Self { - Intercept::Wrapper { - path: default_wrapper_executable(), - directory: default_wrapper_directory(), - executables: vec![ - PathBuf::from("C:\\msys64\\mingw64\\bin\\gcc.exe"), - PathBuf::from("C:\\msys64\\mingw64\\bin\\g++.exe"), - ], - } - } -} - -/// Output configuration is used to customize the output format. -/// -/// Allow to customize the output format of the compiler calls. -/// -/// - Clang: Output the compiler calls in the clang project defined "JSON compilation database" -/// format. (The format is used by clang tooling and other tools based on that library.) -/// - Semantic: Output the compiler calls in the semantic format. (The format is not defined yet.) -#[derive(Debug, PartialEq, Deserialize, Serialize)] -#[serde(tag = "specification")] -pub enum Output { - #[serde(rename = "clang")] - Clang { - #[serde(default)] - compilers: Vec, - #[serde(default)] - sources: SourceFilter, - #[serde(default)] - duplicates: DuplicateFilter, - #[serde(default)] - format: Format, - }, - #[serde(rename = "bear")] - Semantic {}, -} - -/// The default output is the clang format. -impl Default for Output { - fn default() -> Self { - Output::Clang { - compilers: vec![], - sources: SourceFilter::default(), - duplicates: DuplicateFilter::default(), - format: Format::default(), - } - } -} - -/// Represents instructions to transform the compiler calls. -/// -/// Allow to transform the compiler calls by adding or removing arguments. -/// It also can instruct to filter out the compiler call from the output. -#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)] -pub struct Compiler { - pub path: PathBuf, - #[serde(default)] - pub ignore: IgnoreOrConsider, - #[serde(default)] - pub arguments: Arguments, -} - -/// Represents instructions to ignore the compiler call. -/// -/// The meaning of the possible values are: -/// - Always: Always ignore the compiler call. -/// - Never: Never ignore the compiler call. (Default) -/// - Conditional: Ignore the compiler call if the arguments match. -#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)] -pub enum IgnoreOrConsider { - #[serde(rename = "always", alias = "true")] - Always, - #[serde(rename = "never", alias = "false")] - Never, - #[serde(rename = "conditional")] - Conditional, -} - -/// The default ignore mode is never ignore. -impl Default for IgnoreOrConsider { - fn default() -> Self { - IgnoreOrConsider::Never - } -} - -/// Argument lists to match, add or remove. -/// -/// The `match` field is used to specify the arguments to match. Can be used only with the -/// conditional ignore mode. -/// -/// The `add` or `remove` fields are used to specify the arguments to add or remove. These can be -/// used with the conditional or never ignore mode. -#[derive(Clone, Debug, Default, PartialEq, Deserialize, Serialize)] -pub struct Arguments { - #[serde(default, rename = "match")] - pub match_: Vec, - #[serde(default)] - pub add: Vec, - #[serde(default)] - pub remove: Vec, -} - -/// Source filter configuration is used to filter the compiler calls based on the source files. -/// -/// Allow to filter the compiler calls based on the source files. -/// -/// - Include only existing files: can be true or false. -/// - List of directories to include or exclude. -/// (The order of these entries will imply the order of evaluation.) -#[derive(Clone, Debug, Default, PartialEq, Deserialize, Serialize)] -pub struct SourceFilter { - #[serde(default = "default_disabled")] - pub only_existing_files: bool, - #[serde(default)] - pub paths: Vec, -} - -/// Directory filter configuration is used to filter the compiler calls based on -/// the source file location. -#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)] -pub struct DirectoryFilter { - pub path: PathBuf, - pub ignore: Ignore, -} - -#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)] -pub enum Ignore { - #[serde(rename = "always", alias = "true")] - Always, - #[serde(rename = "never", alias = "false")] - Never, -} - -/// Duplicate filter configuration is used to filter the duplicate compiler calls. -/// -/// - By fields: Specify the fields of the JSON compilation database record to detect duplicates. -#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)] -pub struct DuplicateFilter { - pub by_fields: Vec, -} - -impl Default for DuplicateFilter { - fn default() -> Self { - DuplicateFilter { - by_fields: vec![OutputFields::File, OutputFields::Arguments], - } - } -} - -/// Represent the fields of the JSON compilation database record. -#[derive(Clone, Debug, PartialEq, Eq, Hash, Deserialize, Serialize)] -pub enum OutputFields { - #[serde(rename = "directory")] - Directory, - #[serde(rename = "file")] - File, - #[serde(rename = "arguments")] - Arguments, - #[serde(rename = "output")] - Output, -} - -/// Format configuration of the JSON compilation database. -#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)] -pub struct Format { - #[serde(default = "default_enabled")] - pub command_as_array: bool, - #[serde(default = "default_disabled")] - pub drop_output_field: bool, - #[serde(default)] - pub paths_as: PathFormat, -} - -impl Default for Format { - fn default() -> Self { - Format { - command_as_array: true, - drop_output_field: false, - paths_as: PathFormat::default(), - } - } -} - -/// Path format configuration describes how the paths should be formatted. -#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)] -pub enum PathFormat { - #[serde(rename = "original", alias = "is")] - Original, - #[serde(rename = "absolute")] - Absolute, - #[serde(rename = "canonical")] - Canonical, -} - -/// The default path format is the original path. -impl Default for PathFormat { - fn default() -> Self { - PathFormat::Original - } -} - -fn default_disabled() -> bool { - false -} - -fn default_enabled() -> bool { - true -} - -/// The default directory where the wrapper executables will be stored. -fn default_wrapper_directory() -> PathBuf { - std::env::temp_dir() -} - -/// The default path to the wrapper executable. -fn default_wrapper_executable() -> PathBuf { - PathBuf::from(WRAPPER_EXECUTABLE_PATH) -} - -/// The default path to the shared library that will be preloaded. -fn default_preload_library() -> PathBuf { - PathBuf::from(PRELOAD_LIBRARY_PATH) -} - -// Custom deserialization function to validate the schema version -fn validate_schema_version<'de, D>(deserializer: D) -> std::result::Result -where - D: serde::Deserializer<'de>, -{ - let schema: String = Deserialize::deserialize(deserializer)?; - if schema != SUPPORTED_SCHEMA_VERSION { - use serde::de::Error; - Err(D::Error::custom(format!( - "Unsupported schema version: {}. Expected: {}", - schema, SUPPORTED_SCHEMA_VERSION - ))) - } else { - Ok(schema) - } -} - -#[cfg(test)] -mod test { - use super::*; - use crate::{vec_of_pathbuf, vec_of_strings}; - - #[test] - fn test_wrapper_config() { - let content: &[u8] = br#" - schema: 4.0 - - intercept: - mode: wrapper - directory: /tmp - executables: - - /usr/bin/cc - - /usr/bin/c++ - - /usr/bin/clang - - /usr/bin/clang++ - output: - specification: clang - compilers: - - path: /usr/local/bin/cc - ignore: always - - path: /usr/bin/cc - ignore: never - - path: /usr/bin/c++ - ignore: conditional - arguments: - match: - - -### - - path: /usr/bin/clang - ignore: never - arguments: - add: - - -DDEBUG - remove: - - -Wall - - path: /usr/bin/clang++ - arguments: - remove: - - -Wall - sources: - only_existing_files: true - paths: - - path: /opt/project/sources - ignore: never - - path: /opt/project/tests - ignore: always - duplicates: - by_fields: - - file - - directory - format: - command_as_array: true - drop_output_field: false - paths_as: canonical - "#; - - let result = Main::from_reader(content).unwrap(); - - let expected = Main { - intercept: Intercept::Wrapper { - path: default_wrapper_executable(), - directory: PathBuf::from("/tmp"), - executables: vec_of_pathbuf![ - "/usr/bin/cc", - "/usr/bin/c++", - "/usr/bin/clang", - "/usr/bin/clang++" - ], - }, - output: Output::Clang { - compilers: vec![ - Compiler { - path: PathBuf::from("/usr/local/bin/cc"), - ignore: IgnoreOrConsider::Always, - arguments: Arguments::default(), - }, - Compiler { - path: PathBuf::from("/usr/bin/cc"), - ignore: IgnoreOrConsider::Never, - arguments: Arguments::default(), - }, - Compiler { - path: PathBuf::from("/usr/bin/c++"), - ignore: IgnoreOrConsider::Conditional, - arguments: Arguments { - match_: vec_of_strings!["-###"], - ..Default::default() - }, - }, - Compiler { - path: PathBuf::from("/usr/bin/clang"), - ignore: IgnoreOrConsider::Never, - arguments: Arguments { - add: vec_of_strings!["-DDEBUG"], - remove: vec_of_strings!["-Wall"], - ..Default::default() - }, - }, - Compiler { - path: PathBuf::from("/usr/bin/clang++"), - ignore: IgnoreOrConsider::Never, - arguments: Arguments { - remove: vec_of_strings!["-Wall"], - ..Default::default() - }, - }, - ], - sources: SourceFilter { - only_existing_files: true, - paths: vec![ - DirectoryFilter { - path: PathBuf::from("/opt/project/sources"), - ignore: Ignore::Never, - }, - DirectoryFilter { - path: PathBuf::from("/opt/project/tests"), - ignore: Ignore::Always, - }, - ], - }, - duplicates: DuplicateFilter { - by_fields: vec![OutputFields::File, OutputFields::Directory], - }, - format: Format { - command_as_array: true, - drop_output_field: false, - paths_as: PathFormat::Canonical, - }, - }, - schema: String::from("4.0"), - }; - - assert_eq!(expected, result); - } - - #[test] - fn test_incomplete_wrapper_config() { - let content: &[u8] = br#" - schema: 4.0 - - intercept: - mode: wrapper - executables: - - /usr/bin/cc - - /usr/bin/c++ - output: - specification: clang - sources: - only_existing_files: true - duplicates: - by_fields: - - file - - directory - format: - command_as_array: true - "#; - - let result = Main::from_reader(content).unwrap(); - - let expected = Main { - intercept: Intercept::Wrapper { - path: default_wrapper_executable(), - directory: default_wrapper_directory(), - executables: vec_of_pathbuf!["/usr/bin/cc", "/usr/bin/c++"], - }, - output: Output::Clang { - compilers: vec![], - sources: SourceFilter { - only_existing_files: true, - paths: vec![], - }, - duplicates: DuplicateFilter { - by_fields: vec![OutputFields::File, OutputFields::Directory], - }, - format: Format { - command_as_array: true, - drop_output_field: false, - paths_as: PathFormat::Original, - }, - }, - schema: String::from("4.0"), - }; - - assert_eq!(expected, result); - } - - #[test] - fn test_preload_config() { - let content: &[u8] = br#" - schema: 4.0 - - intercept: - mode: preload - path: /usr/local/lib/libexec.so - output: - specification: bear - "#; - - let result = Main::from_reader(content).unwrap(); - - let expected = Main { - intercept: Intercept::Preload { - path: PathBuf::from("/usr/local/lib/libexec.so"), - }, - output: Output::Semantic {}, - schema: String::from("4.0"), - }; - - assert_eq!(expected, result); - } - - #[test] - fn test_incomplete_preload_config() { - let content: &[u8] = br#" - schema: 4.0 - - intercept: - mode: preload - output: - specification: clang - compilers: - - path: /usr/local/bin/cc - - path: /usr/local/bin/c++ - - path: /usr/local/bin/clang - ignore: always - - path: /usr/local/bin/clang++ - ignore: always - sources: - only_existing_files: false - duplicates: - by_fields: - - file - format: - command_as_array: true - drop_output_field: true - use_absolute_path: false - "#; - - let result = Main::from_reader(content).unwrap(); - - let expected = Main { - intercept: Intercept::Preload { - path: default_preload_library(), - }, - output: Output::Clang { - compilers: vec![ - Compiler { - path: PathBuf::from("/usr/local/bin/cc"), - ignore: IgnoreOrConsider::Never, - arguments: Arguments::default(), - }, - Compiler { - path: PathBuf::from("/usr/local/bin/c++"), - ignore: IgnoreOrConsider::Never, - arguments: Arguments::default(), - }, - Compiler { - path: PathBuf::from("/usr/local/bin/clang"), - ignore: IgnoreOrConsider::Always, - arguments: Arguments::default(), - }, - Compiler { - path: PathBuf::from("/usr/local/bin/clang++"), - ignore: IgnoreOrConsider::Always, - arguments: Arguments::default(), - }, - ], - sources: SourceFilter { - only_existing_files: false, - paths: vec![], - }, - duplicates: DuplicateFilter { - by_fields: vec![OutputFields::File], - }, - format: Format { - command_as_array: true, - drop_output_field: true, - paths_as: PathFormat::Original, - }, - }, - schema: String::from("4.0"), - }; - - assert_eq!(expected, result); - } - - #[test] - fn test_default_config() { - let result = Main::default(); - - let expected = Main { - intercept: Intercept::default(), - output: Output::Clang { - compilers: vec![], - sources: SourceFilter::default(), - duplicates: DuplicateFilter::default(), - format: Format::default(), - }, - schema: String::from(SUPPORTED_SCHEMA_VERSION), - }; - - assert_eq!(expected, result); - } - - #[test] - fn test_invalid_schema_version() { - let content: &[u8] = br#" - schema: 3.0 - - intercept: - mode: wrapper - directory: /tmp - executables: - - /usr/bin/gcc - - /usr/bin/g++ - "#; - - let result: serde_yml::Result
= Main::from_reader(content); - - assert!(result.is_err()); - - let message = result.unwrap_err().to_string(); - assert_eq!( - "Unsupported schema version: 3.0. Expected: 4.0 at line 2 column 9", - message - ); - } - - #[test] - fn test_failing_config() { - let content: &[u8] = br#"{ - "output": { - "format": { - "command_as_array": false - }, - "content": { - "duplicates": "files" - } - } - }"#; - - let result: serde_yml::Result
= Main::from_reader(content); - - assert!(result.is_err()); - } -} - -mod validation { - //! This module defines the validation logic for the configuration. - - use anyhow::Result; - use std::collections::HashSet; - use std::path::{Path, PathBuf}; - - use crate::config::{ - Arguments, Compiler, DuplicateFilter, IgnoreOrConsider, Intercept, Main, Output, - SourceFilter, - }; - - /// A trait to validate the configuration and return a valid instance. - pub trait Validate { - fn validate(self) -> Result - where - Self: Sized; - } - - impl Validate for Main { - /// Validate the configuration of the main configuration. - fn validate(self) -> Result { - let intercept = self.intercept.validate()?; - let output = self.output.validate()?; - - Ok(Main { - schema: self.schema, - intercept, - output, - }) - } - } - - impl Validate for Intercept { - /// Validate the configuration of the intercept mode. - fn validate(self) -> Result { - match &self { - Intercept::Wrapper { - path, - directory, - executables, - } => { - if is_empty_path(path) { - anyhow::bail!("The wrapper path cannot be empty."); - } - if is_empty_path(directory) { - anyhow::bail!("The wrapper directory cannot be empty."); - } - for executable in executables { - if is_empty_path(executable) { - anyhow::bail!("The executable path cannot be empty."); - } - } - Ok(self) - } - Intercept::Preload { path } => { - if is_empty_path(path) { - anyhow::bail!("The preload library path cannot be empty."); - } - Ok(self) - } - } - } - } - - impl Validate for Output { - /// Validate the configuration of the output writer. - fn validate(self) -> Result { - match self { - Output::Clang { - compilers, - sources, - duplicates, - format, - } => { - let compilers = compilers.validate()?; - let sources = sources.validate()?; - let duplicates = duplicates.validate()?; - Ok(Output::Clang { - compilers, - sources, - duplicates, - format, - }) - } - Output::Semantic {} => Ok(Output::Semantic {}), - } - } - } - - impl Validate for Vec { - /// Validate the configuration of the compiler list. - /// - /// The validation is done on the individual compiler configuration. - /// Duplicate paths are allowed in the list. But the instruction to ignore the - /// compiler should be the end of the list. - fn validate(self) -> Result { - let mut validated_compilers = Vec::new(); - let mut grouped_compilers: std::collections::HashMap> = - std::collections::HashMap::new(); - - // Group compilers by their path - for compiler in self { - grouped_compilers - .entry(compiler.path.clone()) - .or_default() - .push(compiler); - } - - // Validate each group - for (path, group) in grouped_compilers { - let mut has_always = false; - let mut has_conditional = false; - let mut has_never = false; - - for compiler in group { - match compiler.ignore { - IgnoreOrConsider::Always | IgnoreOrConsider::Conditional if has_never => { - anyhow::bail!("Invalid configuration: 'Always' or 'Conditional' can't be used after 'Never' for path {:?}", path); - } - IgnoreOrConsider::Never | IgnoreOrConsider::Conditional if has_always => { - anyhow::bail!("Invalid configuration: 'Never' or 'Conditional' can't be used after 'Always' for path {:?}", path); - } - IgnoreOrConsider::Never if has_conditional => { - anyhow::bail!("Invalid configuration: 'Never' can't be used after 'Conditional' for path {:?}", path); - } - IgnoreOrConsider::Always if has_always => { - anyhow::bail!("Invalid configuration: 'Always' can't be used multiple times for path {:?}", path); - } - IgnoreOrConsider::Conditional if has_conditional => { - anyhow::bail!("Invalid configuration: 'Conditional' can't be used multiple times for path {:?}", path); - } - IgnoreOrConsider::Never if has_never => { - anyhow::bail!("Invalid configuration: 'Never' can't be used multiple times for path {:?}", path); - } - IgnoreOrConsider::Conditional => { - has_conditional = true; - } - IgnoreOrConsider::Always => { - has_always = true; - } - IgnoreOrConsider::Never => { - has_never = true; - } - } - validated_compilers.push(compiler.validate()?); - } - } - - Ok(validated_compilers) - } - } - - impl Validate for Compiler { - /// Validate the configuration of the compiler. - fn validate(self) -> Result { - match self.ignore { - IgnoreOrConsider::Always if self.arguments != Arguments::default() => { - anyhow::bail!( - "All arguments must be empty in always ignore mode. {:?}", - self.path - ); - } - IgnoreOrConsider::Conditional if self.arguments.match_.is_empty() => { - anyhow::bail!( - "The match arguments cannot be empty in conditional ignore mode. {:?}", - self.path - ); - } - IgnoreOrConsider::Never if !self.arguments.match_.is_empty() => { - anyhow::bail!( - "The arguments must be empty in never ignore mode. {:?}", - self.path - ); - } - _ if is_empty_path(&self.path) => { - anyhow::bail!("The compiler path cannot be empty."); - } - _ => Ok(self), - } - } - } - - impl Validate for SourceFilter { - /// Fail when the same directory is in multiple times in the list. - /// Otherwise, return the received source filter. - fn validate(self) -> Result { - let mut already_seen = HashSet::new(); - for directory in &self.paths { - if !already_seen.insert(&directory.path) { - anyhow::bail!("The directory {:?} is duplicated.", directory.path); - } - } - Ok(self) - } - } - - impl Validate for DuplicateFilter { - /// Deduplicate the fields of the fields vector. - fn validate(self) -> Result { - // error out when the fields vector is empty - if self.by_fields.is_empty() { - anyhow::bail!("The field list cannot be empty."); - } - // error out when the fields vector contains duplicates - let mut already_seen = HashSet::new(); - for field in &self.by_fields { - if !already_seen.insert(field) { - anyhow::bail!("The field {:?} is duplicated.", field); - } - } - Ok(self) - } - } - - fn is_empty_path(path: &Path) -> bool { - path.to_str().is_some_and(|p| p.is_empty()) - } - - #[cfg(test)] - mod test { - use super::*; - use crate::config::{DirectoryFilter, Ignore, OutputFields}; - - #[test] - fn test_duplicate_detection_validation_pass() { - let sut = DuplicateFilter { - by_fields: vec![OutputFields::File, OutputFields::Arguments], - }; - - let result = sut.validate(); - assert!(result.is_ok()); - } - - #[test] - fn test_duplicate_detection_validation_fails() { - let sut = DuplicateFilter { - by_fields: vec![OutputFields::File, OutputFields::File], - }; - - let result = sut.validate(); - assert!(result.is_err()); - } - - #[test] - fn test_duplicate_detection_validation_fails_on_empty() { - let sut = DuplicateFilter { by_fields: vec![] }; - - let result = sut.validate(); - assert!(result.is_err()); - } - - #[test] - fn test_validate_compiler_always_with_arguments() { - let sut = Compiler { - path: PathBuf::from("/usr/bin/cc"), - ignore: IgnoreOrConsider::Always, - arguments: Arguments { - add: vec!["-DDEBUG".to_string()], - ..Default::default() - }, - }; - let result = sut.validate(); - assert!(result.is_err()); - } - - #[test] - fn test_validate_compiler_conditional_without_match() { - let compiler = Compiler { - path: PathBuf::from("/usr/bin/cc"), - ignore: IgnoreOrConsider::Conditional, - arguments: Arguments::default(), - }; - let result = compiler.validate(); - assert!(result.is_err()); - } - - #[test] - fn test_validate_compiler_never_with_match() { - let compiler = Compiler { - path: PathBuf::from("/usr/bin/cc"), - ignore: IgnoreOrConsider::Never, - arguments: Arguments { - match_: vec!["-###".to_string()], - ..Default::default() - }, - }; - let result = compiler.validate(); - assert!(result.is_err()); - } - - #[test] - fn test_validate_compiler_empty_path() { - let compiler = Compiler { - path: PathBuf::from(""), - ignore: IgnoreOrConsider::Never, - arguments: Arguments::default(), - }; - let result = compiler.validate(); - assert!(result.is_err()); - } - - #[test] - fn test_compiler_validation_pass() { - let sut: Vec = vec![ - Compiler { - path: PathBuf::from("/usr/bin/cc"), - ignore: IgnoreOrConsider::Conditional, - arguments: Arguments { - match_: vec!["-###".to_string()], - ..Default::default() - }, - }, - Compiler { - path: PathBuf::from("/usr/bin/c++"), - ignore: IgnoreOrConsider::Never, - arguments: Arguments { - add: vec!["-DDEBUG".to_string()], - remove: vec!["-Wall".to_string()], - ..Default::default() - }, - }, - Compiler { - path: PathBuf::from("/usr/bin/gcc"), - ignore: IgnoreOrConsider::Conditional, - arguments: Arguments { - match_: vec!["-###".to_string()], - ..Default::default() - }, - }, - Compiler { - path: PathBuf::from("/usr/bin/gcc"), - ignore: IgnoreOrConsider::Always, - arguments: Arguments::default(), - }, - ]; - - let result = sut.validate(); - assert!(result.is_ok()); - } - - #[test] - fn test_compiler_validation_fails_conditional_after_always() { - let sut: Vec = vec![ - Compiler { - path: PathBuf::from("/usr/bin/cc"), - ignore: IgnoreOrConsider::Always, - arguments: Arguments::default(), - }, - Compiler { - path: PathBuf::from("/usr/bin/cc"), - ignore: IgnoreOrConsider::Conditional, - arguments: Arguments { - match_: vec!["-###".to_string()], - ..Default::default() - }, - }, - ]; - - let result = sut.validate(); - assert!(result.is_err()); - } - - #[test] - fn test_compiler_validation_fails_never_after_always() { - let sut: Vec = vec![ - Compiler { - path: PathBuf::from("/usr/bin/cc"), - ignore: IgnoreOrConsider::Always, - arguments: Arguments::default(), - }, - Compiler { - path: PathBuf::from("/usr/bin/cc"), - ignore: IgnoreOrConsider::Never, - arguments: Arguments::default(), - }, - ]; - - let result = sut.validate(); - assert!(result.is_err()); - } - - #[test] - fn test_compiler_validation_fails_always_after_never() { - let sut: Vec = vec![ - Compiler { - path: PathBuf::from("/usr/bin/cc"), - ignore: IgnoreOrConsider::Never, - arguments: Arguments::default(), - }, - Compiler { - path: PathBuf::from("/usr/bin/cc"), - ignore: IgnoreOrConsider::Always, - arguments: Arguments::default(), - }, - ]; - - let result = sut.validate(); - assert!(result.is_err()); - } - - #[test] - fn test_compiler_validation_fails_never_after_never() { - let sut: Vec = vec![ - Compiler { - path: PathBuf::from("/usr/bin/cc"), - ignore: IgnoreOrConsider::Never, - arguments: Arguments::default(), - }, - Compiler { - path: PathBuf::from("/usr/bin/cc"), - ignore: IgnoreOrConsider::Never, - arguments: Arguments { - add: vec!["-Wall".to_string()], - ..Default::default() - }, - }, - ]; - - let result = sut.validate(); - assert!(result.is_err()); - } - - #[test] - fn test_compiler_validation_fails_never_after_conditional() { - let sut: Vec = vec![ - Compiler { - path: PathBuf::from("/usr/bin/cc"), - ignore: IgnoreOrConsider::Conditional, - arguments: Arguments { - match_: vec!["-###".to_string()], - ..Default::default() - }, - }, - Compiler { - path: PathBuf::from("/usr/bin/cc"), - ignore: IgnoreOrConsider::Never, - arguments: Arguments::default(), - }, - ]; - - let result = sut.validate(); - assert!(result.is_err()); - } - - #[test] - fn test_validate_intercept_wrapper_valid() { - let sut = Intercept::Wrapper { - path: PathBuf::from("/usr/bin/wrapper"), - directory: PathBuf::from("/tmp"), - executables: vec![PathBuf::from("/usr/bin/cc")], - }; - assert!(sut.validate().is_ok()); - } - - #[test] - fn test_validate_intercept_wrapper_empty_path() { - let sut = Intercept::Wrapper { - path: PathBuf::from(""), - directory: PathBuf::from("/tmp"), - executables: vec![PathBuf::from("/usr/bin/cc")], - }; - assert!(sut.validate().is_err()); - } - - #[test] - fn test_validate_intercept_wrapper_empty_directory() { - let sut = Intercept::Wrapper { - path: PathBuf::from("/usr/bin/wrapper"), - directory: PathBuf::from(""), - executables: vec![PathBuf::from("/usr/bin/cc")], - }; - assert!(sut.validate().is_err()); - } - - #[test] - fn test_validate_intercept_wrapper_empty_executables() { - let sut = Intercept::Wrapper { - path: PathBuf::from("/usr/bin/wrapper"), - directory: PathBuf::from("/tmp"), - executables: vec![ - PathBuf::from("/usr/bin/cc"), - PathBuf::from("/usr/bin/c++"), - PathBuf::from(""), - ], - }; - assert!(sut.validate().is_err()); - } - - #[test] - fn test_validate_intercept_preload_valid() { - let sut = Intercept::Preload { - path: PathBuf::from("/usr/local/lib/libexec.so"), - }; - assert!(sut.validate().is_ok()); - } - - #[test] - fn test_validate_intercept_preload_empty_path() { - let sut = Intercept::Preload { - path: PathBuf::from(""), - }; - assert!(sut.validate().is_err()); - } - - #[test] - fn test_source_filter_validation_success() { - let sut = SourceFilter { - only_existing_files: true, - paths: vec![ - DirectoryFilter { - path: PathBuf::from("/opt/project/sources"), - ignore: Ignore::Never, - }, - DirectoryFilter { - path: PathBuf::from("/opt/project/tests"), - ignore: Ignore::Always, - }, - ], - }; - - let result = sut.validate(); - assert!(result.is_ok()); - } - - #[test] - fn test_source_filter_validation_duplicates() { - let sut = SourceFilter { - only_existing_files: true, - paths: vec![ - DirectoryFilter { - path: PathBuf::from("/opt/project/sources"), - ignore: Ignore::Never, - }, - DirectoryFilter { - path: PathBuf::from("/opt/project/test"), - ignore: Ignore::Always, - }, - DirectoryFilter { - path: PathBuf::from("/opt/project/sources"), - ignore: Ignore::Always, - }, - ], - }; - - let result = sut.validate(); - assert!(result.is_err()); - } - } -} diff --git a/rust/bear/src/fixtures.rs b/rust/bear/src/fixtures.rs deleted file mode 100644 index 81bee988..00000000 --- a/rust/bear/src/fixtures.rs +++ /dev/null @@ -1,21 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#[cfg(test)] -pub mod fixtures { - #[macro_export] - macro_rules! vec_of_strings { - ($($x:expr),*) => (vec![$($x.to_string()),*]); - } - - #[macro_export] - macro_rules! map_of_strings { - ($($k:expr => $v:expr),* $(,)?) => {{ - core::convert::From::from([$(($k.to_string(), $v.to_string()),)*]) - }}; - } - - #[macro_export] - macro_rules! vec_of_pathbuf { - ($($x:expr),*) => (vec![$(PathBuf::from($x)),*]); - } -} diff --git a/rust/bear/src/intercept/mod.rs b/rust/bear/src/intercept/mod.rs deleted file mode 100644 index 9cf7fbf7..00000000 --- a/rust/bear/src/intercept/mod.rs +++ /dev/null @@ -1,336 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -//! The module contains the intercept reporting and collecting functionality. -//! -//! When a command execution is intercepted, the interceptor sends the event to the collector. -//! This happens in two different processes, requiring a communication channel between these -//! processes. -//! -//! The module provides abstractions for the reporter and the collector. And it also defines -//! the data structures that are used to represent the events. - -use crate::intercept::supervise::supervise; -use crate::{args, config}; -use serde::{Deserialize, Serialize}; -use std::collections::HashMap; -use std::path::{Path, PathBuf}; -use std::process::{Command, ExitCode}; -use std::sync::mpsc::{channel, Receiver, Sender}; -use std::sync::Arc; -use std::{env, fmt, thread}; - -pub mod persistence; -pub mod supervise; -pub mod tcp; - -/// Declare the environment variables used by the intercept mode. -pub const KEY_DESTINATION: &str = "INTERCEPT_REPORTER_ADDRESS"; -pub const KEY_PRELOAD_PATH: &str = "LD_PRELOAD"; - -/// Represents the remote sink of supervised process events. -/// -/// This allows the reporters to send events to a remote collector. -pub trait Reporter { - fn report(&self, event: Event) -> Result<(), anyhow::Error>; -} - -/// Represents the local sink of supervised process events. -/// -/// The collector is responsible for collecting the events from the reporters. -/// -/// To share the collector between threads, we use the `Arc` type to wrap the -/// collector. This way we can clone the collector and send it to other threads. -pub trait Collector { - /// Returns the address of the collector. - /// - /// The address is in the format of `ip:port`. - fn address(&self) -> String; - - /// Collects the events from the reporters. - /// - /// The events are sent to the given destination channel. - /// - /// The function returns when the collector is stopped. The collector is stopped - /// when the `stop` method invoked (from another thread). - fn collect(&self, destination: Sender) -> Result<(), anyhow::Error>; - - /// Stops the collector. - fn stop(&self) -> Result<(), anyhow::Error>; -} - -/// Envelope is a wrapper around the event. -/// -/// It contains the reporter id, the timestamp of the event and the event itself. -#[derive(Serialize, Deserialize, Debug, PartialEq, Clone)] -pub struct Envelope { - pub rid: ReporterId, - pub timestamp: u64, - pub event: Event, -} - -impl fmt::Display for Envelope { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!( - f, - "Envelope rid={}, timestamp={}, event={}", - self.rid.0, self.timestamp, self.event - ) - } -} - -/// Represent a relevant life cycle event of a process. -/// -/// In the current implementation, we only have one event, the `Started` event. -/// This event is sent when a process is started. It contains the process id -/// and the execution information. -#[derive(Serialize, Deserialize, Debug, PartialEq, Clone)] -pub struct Event { - pub pid: ProcessId, - pub execution: Execution, -} - -impl fmt::Display for Event { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "Event pid={}, execution={}", self.pid.0, self.execution) - } -} - -/// Execution is a representation of a process execution. -/// -/// It does not contain information about the outcome of the execution, -/// like the exit code or the duration of the execution. It only contains -/// the information that is necessary to reproduce the execution. -#[derive(Serialize, Deserialize, Debug, PartialEq, Clone)] -pub struct Execution { - pub executable: PathBuf, - pub arguments: Vec, - pub working_dir: PathBuf, - pub environment: HashMap, -} - -impl fmt::Display for Execution { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!( - f, - "Execution path={}, args=[{}]", - self.executable.display(), - self.arguments.join(",") - ) - } -} - -/// Reporter id is a unique identifier for a reporter. -/// -/// It is used to identify the process that sends the execution report. -/// Because the OS PID is not unique across a single build (PIDs are -/// recycled), we need to use a new unique identifier to identify the process. -#[derive(Serialize, Deserialize, Debug, PartialEq, Clone)] -pub struct ReporterId(pub u64); - -/// Process id is a OS identifier for a process. -#[derive(Serialize, Deserialize, Debug, PartialEq, Clone)] -pub struct ProcessId(pub u32); - -/// The service is responsible for collecting the events from the supervised processes. -/// -/// The service is implemented as TCP server that listens on a random port on the loopback -/// interface. The address of the service can be obtained by the `address` method. -/// -/// The service is started in a separate thread to dispatch the events to the consumer. -/// The consumer is a function that receives the events from the service and processes them. -/// It also runs in a separate thread. The reason for having two threads is to avoid blocking -/// the main thread of the application and decouple the collection from the processing. -pub(crate) struct CollectorService { - collector: Arc, - network_thread: Option>, - output_thread: Option>, -} - -impl CollectorService { - /// Creates a new intercept service. - /// - /// The `consumer` is a function that receives the events and processes them. - /// The function is executed in a separate thread. - pub fn new(consumer: F) -> anyhow::Result - where - F: FnOnce(Receiver) -> anyhow::Result<()>, - F: Send + 'static, - { - let collector = tcp::CollectorOnTcp::new()?; - let collector_arc = Arc::new(collector); - let (sender, receiver) = channel(); - - let collector_in_thread = collector_arc.clone(); - let collector_thread = thread::spawn(move || { - let result = collector_in_thread.collect(sender); - if let Err(e) = result { - log::error!("Failed to collect events: {}", e); - } - }); - let output_thread = thread::spawn(move || { - let result = consumer(receiver); - if let Err(e) = result { - log::error!("Failed to process events: {}", e); - } - }); - - log::debug!("Collector service started at {}", collector_arc.address()); - Ok(CollectorService { - collector: collector_arc, - network_thread: Some(collector_thread), - output_thread: Some(output_thread), - }) - } - - /// Returns the address of the service. - pub fn address(&self) -> String { - self.collector.address() - } -} - -impl Drop for CollectorService { - /// Shuts down the service. - fn drop(&mut self) { - // TODO: log the shutdown of the service and any errors - self.collector.stop().expect("Failed to stop the collector"); - if let Some(thread) = self.network_thread.take() { - thread.join().expect("Failed to join the collector thread"); - } - if let Some(thread) = self.output_thread.take() { - thread.join().expect("Failed to join the output thread"); - } - } -} - -/// The environment for the intercept mode. -/// -/// Running the build command requires a specific environment. The environment we -/// need for intercepting the child processes is different for each intercept mode. -/// -/// The `Wrapper` mode requires a temporary directory with the executables that will -/// be used to intercept the child processes. The executables are hard linked to the -/// temporary directory. -/// -/// The `Preload` mode requires the path to the preload library that will be used to -/// intercept the child processes. -pub(crate) enum InterceptEnvironment { - Wrapper { - bin_dir: tempfile::TempDir, - address: String, - collector: CollectorService, - }, - Preload { - path: PathBuf, - address: String, - collector: CollectorService, - }, -} - -impl InterceptEnvironment { - /// Creates a new intercept environment. - /// - /// The `config` is the intercept configuration that specifies the mode and the - /// required parameters for the mode. The `collector` is the service to collect - /// the execution events. - pub fn new(config: &config::Intercept, collector: CollectorService) -> anyhow::Result { - let address = collector.address(); - let result = match config { - config::Intercept::Wrapper { - path, - directory, - executables, - } => { - // Create a temporary directory and populate it with the executables. - let bin_dir = tempfile::TempDir::with_prefix_in(directory, "bear-")?; - for executable in executables { - std::fs::hard_link(executable, path)?; - } - InterceptEnvironment::Wrapper { - bin_dir, - address, - collector, - } - } - config::Intercept::Preload { path } => InterceptEnvironment::Preload { - path: path.clone(), - address, - collector, - }, - }; - Ok(result) - } - - /// Executes the build command in the intercept environment. - /// - /// The method is blocking and waits for the build command to finish. - /// The method returns the exit code of the build command. Result failure - /// indicates that the build command failed to start. - pub fn execute_build_command(&self, input: args::BuildCommand) -> anyhow::Result { - // TODO: record the execution of the build command - - let environment = self.environment(); - let process = input.arguments[0].clone(); - let arguments = input.arguments[1..].to_vec(); - - let mut child = Command::new(process); - - let exit_status = supervise(child.args(arguments).envs(environment))?; - log::info!("Execution finished with status: {:?}", exit_status); - - // The exit code is not always available. When the process is killed by a signal, - // the exit code is not available. In this case, we return the `FAILURE` exit code. - let exit_code = exit_status - .code() - .map(|code| ExitCode::from(code as u8)) - .unwrap_or(ExitCode::FAILURE); - - Ok(exit_code) - } - - /// Returns the environment variables for the intercept environment. - /// - /// The environment variables are different for each intercept mode. - /// It does not change the original environment variables, but creates - /// the environment variables that are required for the intercept mode. - fn environment(&self) -> Vec<(String, String)> { - match self { - InterceptEnvironment::Wrapper { - bin_dir, address, .. - } => { - let path_original = env::var("PATH").unwrap_or_else(|_| String::new()); - let path_updated = InterceptEnvironment::insert_to_path( - &path_original, - Self::path_to_string(bin_dir.path()), - ); - vec![ - ("PATH".to_string(), path_updated), - (KEY_DESTINATION.to_string(), address.clone()), - ] - } - InterceptEnvironment::Preload { path, address, .. } => { - let path_original = env::var(KEY_PRELOAD_PATH).unwrap_or_else(|_| String::new()); - let path_updated = InterceptEnvironment::insert_to_path( - &path_original, - Self::path_to_string(path), - ); - vec![ - (KEY_PRELOAD_PATH.to_string(), path_updated), - (KEY_DESTINATION.to_string(), address.clone()), - ] - } - } - } - - /// Manipulate a `PATH` like environment value by inserting the `first` path into - /// the original value. It removes the `first` path if it already exists in the - /// original value. And it inserts the `first` path at the beginning of the value. - fn insert_to_path(original: &str, first: String) -> String { - let mut paths: Vec<_> = original.split(':').filter(|it| it != &first).collect(); - paths.insert(0, first.as_str()); - paths.join(":") - } - - fn path_to_string(path: &Path) -> String { - path.to_str().unwrap_or("").to_string() - } -} diff --git a/rust/bear/src/intercept/persistence.rs b/rust/bear/src/intercept/persistence.rs deleted file mode 100644 index f8624844..00000000 --- a/rust/bear/src/intercept/persistence.rs +++ /dev/null @@ -1,147 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -use super::Envelope; -use serde_json::de::IoRead; -use serde_json::StreamDeserializer; -use std::io; - -/// Generate the build events from the file. -/// -/// Returns an iterator over the build events. -/// Any error will interrupt the reading process and the remaining events will be lost. -pub fn read(reader: impl io::Read) -> impl Iterator { - let stream = StreamDeserializer::new(IoRead::new(reader)); - stream.filter_map(|result| match result { - Ok(value) => Some(value), - Err(error) => { - log::error!("Failed to read event: {:?}", error); - None - } - }) -} - -/// Write the build events to the file. -/// -/// Can fail if the events cannot be serialized or written to the file. -/// Any error will interrupt the writing process and the file will be incomplete. -pub fn write( - mut writer: impl io::Write, - envelopes: impl IntoIterator, -) -> Result<(), anyhow::Error> { - for envelope in envelopes { - serde_json::to_writer(&mut writer, &envelope)?; - writer.write_all(b"\n")?; - } - Ok(()) -} - -#[cfg(test)] -mod test { - use super::*; - use crate::intercept::{Event, Execution, ProcessId, ReporterId}; - use crate::vec_of_strings; - use serde_json::json; - use std::collections::HashMap; - use std::path::PathBuf; - - #[test] - fn read_write() { - let events = expected_values(); - - let mut buffer = Vec::new(); - write(&mut buffer, events.iter().cloned()).unwrap(); - let mut cursor = io::Cursor::new(buffer); - let read_events: Vec<_> = read(&mut cursor).collect(); - - assert_eq!(events, read_events); - } - - #[test] - fn read_write_empty() { - let events = Vec::::new(); - - let mut buffer = Vec::new(); - write(&mut buffer, events.iter().cloned()).unwrap(); - let mut cursor = io::Cursor::new(buffer); - let read_events: Vec<_> = read(&mut cursor).collect(); - - assert_eq!(events, read_events); - } - - #[test] - fn read_stops_on_errors() { - let line1 = json!({ - "rid": 42, - "timestamp": 0, - "event": { - "pid": 11782, - "execution": { - "executable": "/usr/bin/clang", - "arguments": ["clang", "-c", "main.c"], - "working_dir": "/home/user", - "environment": { - "PATH": "/usr/bin", - "HOME": "/home/user" - } - } - } - }); - let line2 = json!({"rid": 42 }); - let line3 = json!({ - "rid": 42, - "timestamp": 273, - "event": { - "pid": 11934, - "execution": { - "executable": "/usr/bin/clang", - "arguments": ["clang", "-c", "output.c"], - "working_dir": "/home/user", - "environment": {} - } - } - }); - let content = format!("{}\n{}\n{}\n", line1, line2, line3); - - let mut cursor = io::Cursor::new(content); - let read_events: Vec<_> = read(&mut cursor).collect(); - - // Only the fist event is read, all other lines are ignored. - assert_eq!(expected_values()[0..1], read_events); - } - - const REPORTER_ID: ReporterId = ReporterId(42); - - fn expected_values() -> Vec { - vec![ - Envelope { - rid: REPORTER_ID, - timestamp: 0, - event: Event { - pid: ProcessId(11782), - execution: Execution { - executable: PathBuf::from("/usr/bin/clang"), - arguments: vec_of_strings!["clang", "-c", "main.c"], - working_dir: PathBuf::from("/home/user"), - environment: HashMap::from([ - ("PATH".to_string(), "/usr/bin".to_string()), - ("HOME".to_string(), "/home/user".to_string()), - ]), - }, - }, - }, - Envelope { - rid: REPORTER_ID, - timestamp: 273, - event: Event { - pid: ProcessId(11934), - execution: Execution { - executable: PathBuf::from("/usr/bin/clang"), - arguments: vec_of_strings!["clang", "-c", "output.c"], - working_dir: PathBuf::from("/home/user"), - environment: HashMap::from([]), - }, - }, - }, - ] - } -} diff --git a/rust/bear/src/intercept/supervise.rs b/rust/bear/src/intercept/supervise.rs deleted file mode 100644 index a540daaa..00000000 --- a/rust/bear/src/intercept/supervise.rs +++ /dev/null @@ -1,44 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -use anyhow::Result; -use std::process::{Command, ExitStatus}; -use std::sync::atomic::{AtomicUsize, Ordering}; -use std::sync::Arc; -use std::thread; -use std::time; - -/// This method supervises the execution of a command. -/// -/// It starts the command and waits for its completion. It also forwards -/// signals to the child process. The method returns the exit status of the -/// child process. -pub fn supervise(command: &mut Command) -> Result { - let signaled = Arc::new(AtomicUsize::new(0)); - for signal in signal_hook::consts::TERM_SIGNALS { - signal_hook::flag::register_usize(*signal, Arc::clone(&signaled), *signal as usize)?; - } - - let mut child = command.spawn()?; - loop { - // Forward signals to the child process, but don't exit the loop while it is running - if signaled.swap(0usize, Ordering::SeqCst) != 0 { - log::debug!("Received signal, forwarding to child process"); - child.kill()?; - } - - // Check if the child process has exited - match child.try_wait() { - Ok(Some(exit_status)) => { - log::debug!("Child process exited"); - return Ok(exit_status); - } - Ok(None) => { - thread::sleep(time::Duration::from_millis(100)); - } - Err(e) => { - log::error!("Error waiting for child process: {}", e); - return Err(e.into()); - } - } - } -} diff --git a/rust/bear/src/intercept/tcp.rs b/rust/bear/src/intercept/tcp.rs deleted file mode 100644 index 79d97b98..00000000 --- a/rust/bear/src/intercept/tcp.rs +++ /dev/null @@ -1,332 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -//! The module contains the implementation of the TCP collector and reporter. - -use std::io::{Read, Write}; -use std::net::{SocketAddr, TcpListener, TcpStream}; -use std::sync::atomic::{AtomicBool, Ordering}; -use std::sync::mpsc::Sender; -use std::sync::Arc; - -use super::{Collector, Envelope, Event, Reporter, ReporterId}; -use chrono::Utc; -use rand; - -/// Implements convenient methods for the `Envelope` type. -impl Envelope { - fn new(rid: &ReporterId, event: Event) -> Self { - let timestamp = Utc::now().timestamp_millis() as u64; - Envelope { - rid: rid.clone(), - timestamp, - event, - } - } - - /// Read an envelope from a reader using TLV format. - /// - /// The envelope is serialized using JSON and the length of the JSON - /// is written as a 4 byte big-endian integer before the JSON. - fn read_from(reader: &mut impl Read) -> Result { - let mut length_bytes = [0; 4]; - reader.read_exact(&mut length_bytes)?; - let length = u32::from_be_bytes(length_bytes) as usize; - - let mut buffer = vec![0; length]; - reader.read_exact(&mut buffer)?; - let envelope = serde_json::from_slice(buffer.as_ref())?; - - Ok(envelope) - } - - /// Write an envelope to a writer using TLV format. - /// - /// The envelope is serialized using JSON and the length of the JSON - /// is written as a 4 byte big-endian integer before the JSON. - fn write_into(&self, writer: &mut impl Write) -> Result { - let serialized_envelope = serde_json::to_string(&self)?; - let bytes = serialized_envelope.into_bytes(); - let length = bytes.len() as u32; - - writer.write_all(&length.to_be_bytes())?; - writer.write_all(&bytes)?; - - Ok(length) - } -} - -/// Implements convenient methods for the `ReporterId` type. -impl ReporterId { - pub fn generate() -> Self { - let id = rand::random::(); - ReporterId(id) - } -} - -/// Represents a TCP event collector. -pub struct CollectorOnTcp { - shutdown: Arc, - listener: TcpListener, - address: SocketAddr, -} - -impl CollectorOnTcp { - /// Creates a new TCP event collector. - /// - /// The collector listens on a random port on the loopback interface. - /// The address of the collector can be obtained by the `address` method. - pub fn new() -> Result { - let shutdown = Arc::new(AtomicBool::new(false)); - let listener = TcpListener::bind("127.0.0.1:0")?; - let address = listener.local_addr()?; - - let result = CollectorOnTcp { - shutdown, - listener, - address, - }; - - Ok(result) - } - - fn send( - &self, - mut socket: TcpStream, - destination: Sender, - ) -> Result<(), anyhow::Error> { - let envelope = Envelope::read_from(&mut socket)?; - destination.send(envelope)?; - - Ok(()) - } -} - -impl Collector for CollectorOnTcp { - fn address(&self) -> String { - self.address.to_string() - } - - /// Single-threaded implementation of the collector. - /// - /// The collector listens on the TCP port and accepts incoming connections. - /// When a connection is accepted, the collector reads the events from the - /// connection and sends them to the destination channel. - fn collect(&self, destination: Sender) -> Result<(), anyhow::Error> { - for stream in self.listener.incoming() { - // This has to be the first thing to do, in order to implement the stop method! - if self.shutdown.load(Ordering::Relaxed) { - break; - } - - match stream { - Ok(connection) => { - // ... (process the connection in a separate thread or task) - self.send(connection, destination.clone())?; - } - Err(ref e) if e.kind() == std::io::ErrorKind::WouldBlock => { - // No new connection available, continue checking for shutdown - continue; - } - Err(e) => { - println!("Error: {}", e); - break; - } - } - } - Ok(()) - } - - /// Stops the collector by flipping the shutdown flag and connecting to the collector. - /// - /// The collector is stopped when the `collect` method sees the shutdown flag. - /// To signal the collector to stop, we connect to the collector to unblock the - /// `accept` call to check the shutdown flag. - fn stop(&self) -> Result<(), anyhow::Error> { - self.shutdown.store(true, Ordering::Relaxed); - let _ = TcpStream::connect(self.address)?; - Ok(()) - } -} - -/// Represents a TCP event reporter. -pub struct ReporterOnTcp { - destination: String, - reporter_id: ReporterId, -} - -impl ReporterOnTcp { - /// Creates a new TCP reporter instance. - /// - /// It does not open the TCP connection yet. Stores the destination - /// address and creates a unique reporter id. - pub fn new(destination: String) -> Result { - let reporter_id = ReporterId::generate(); - let result = ReporterOnTcp { - destination, - reporter_id, - }; - Ok(result) - } -} - -impl Reporter for ReporterOnTcp { - /// Sends an event to the remote collector. - /// - /// The event is wrapped in an envelope and sent to the remote collector. - /// The TCP connection is opened and closed for each event. - fn report(&self, event: Event) -> Result<(), anyhow::Error> { - let envelope = Envelope::new(&self.reporter_id, event); - let mut socket = TcpStream::connect(self.destination.clone())?; - envelope.write_into(&mut socket)?; - - Ok(()) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use std::io::Cursor; - use std::sync::mpsc::channel; - use std::sync::Arc; - use std::thread; - use std::time::Duration; - - // Test that the serialization and deserialization of the Envelope works. - // We write the Envelope to a buffer and read it back to check if the - // deserialized Envelope is the same as the original one. - #[test] - fn read_write_works() { - let mut writer = Cursor::new(vec![0; 1024]); - for envelope in fixtures::ENVELOPES.iter() { - let result = Envelope::write_into(envelope, &mut writer); - assert!(result.is_ok()); - } - - let mut reader = Cursor::new(writer.get_ref()); - for envelope in fixtures::ENVELOPES.iter() { - let result = Envelope::read_from(&mut reader); - assert!(result.is_ok()); - assert_eq!(result.unwrap(), envelope.clone()); - } - } - - // Test that the TCP reporter and the TCP collector work together. - // We create a TCP collector and a TCP reporter, then we send events - // to the reporter and check if the collector receives them. - // - // We use a bounded channel to send the events from the reporter to the - // collector. The collector reads the events from the channel and checks - // if they are the same as the original events. - #[test] - fn tcp_reporter_and_collectors_work() { - let collector = CollectorOnTcp::new().unwrap(); - let reporter = ReporterOnTcp::new(collector.address()).unwrap(); - - // Create wrapper to share the collector across threads. - let thread_collector = Arc::new(collector); - let main_collector = thread_collector.clone(); - - // Start the collector in a separate thread. - let (input, output) = channel(); - let receiver_thread = thread::spawn(move || { - thread_collector.collect(input).unwrap(); - }); - // Send events to the reporter. - for event in fixtures::EVENTS.iter() { - let result = reporter.report(event.clone()); - assert!(result.is_ok()); - } - - // Call the stop method to stop the collector. This will close the - // channel and the collector will stop reading from it. - thread::sleep(Duration::from_secs(1)); - main_collector.stop().unwrap(); - - // Empty the channel and assert that we received all the events. - let mut count = 0; - for envelope in output.iter() { - assert!(fixtures::EVENTS.contains(&envelope.event)); - count += 1; - } - assert_eq!(count, fixtures::EVENTS.len()); - // shutdown the receiver thread - receiver_thread.join().unwrap(); - } - - mod fixtures { - use super::*; - use crate::intercept::{Execution, ProcessId}; - use crate::{map_of_strings, vec_of_strings}; - use std::collections::HashMap; - use std::path::PathBuf; - - pub(super) static ENVELOPES: std::sync::LazyLock> = - std::sync::LazyLock::new(|| { - vec![ - Envelope { - rid: ReporterId::generate(), - timestamp: timestamp(), - event: Event { - pid: pid(), - execution: Execution { - executable: PathBuf::from("/usr/bin/ls"), - arguments: vec_of_strings!["ls", "-l"], - working_dir: PathBuf::from("/tmp"), - environment: HashMap::new(), - }, - }, - }, - Envelope { - rid: ReporterId::generate(), - timestamp: timestamp(), - event: Event { - pid: pid(), - execution: Execution { - executable: PathBuf::from("/usr/bin/cc"), - arguments: vec_of_strings![ - "cc", - "-c", - "./file_a.c", - "-o", - "./file_a.o" - ], - working_dir: PathBuf::from("/home/user"), - environment: map_of_strings! { - "PATH" => "/usr/bin:/bin", - "HOME" => "/home/user", - }, - }, - }, - }, - Envelope { - rid: ReporterId::generate(), - timestamp: timestamp(), - event: Event { - pid: pid(), - execution: Execution { - executable: PathBuf::from("/usr/bin/ld"), - arguments: vec_of_strings!["ld", "-o", "./file_a", "./file_a.o"], - working_dir: PathBuf::from("/opt/project"), - environment: map_of_strings! { - "PATH" => "/usr/bin:/bin", - "LD_LIBRARY_PATH" => "/usr/lib:/lib", - }, - }, - }, - }, - ] - }); - - pub(super) static EVENTS: std::sync::LazyLock> = - std::sync::LazyLock::new(|| ENVELOPES.iter().map(|e| e.event.clone()).collect()); - - fn timestamp() -> u64 { - rand::random::() - } - - fn pid() -> ProcessId { - ProcessId(rand::random::()) - } - } -} diff --git a/rust/bear/src/lib.rs b/rust/bear/src/lib.rs deleted file mode 100644 index 3242f781..00000000 --- a/rust/bear/src/lib.rs +++ /dev/null @@ -1,9 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -pub mod args; -pub mod config; -mod fixtures; -pub mod intercept; -pub mod modes; -pub mod output; -pub mod semantic; diff --git a/rust/bear/src/modes/intercept.rs b/rust/bear/src/modes/intercept.rs deleted file mode 100644 index 70c58e65..00000000 --- a/rust/bear/src/modes/intercept.rs +++ /dev/null @@ -1,38 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -use crate::intercept::{CollectorService, Envelope, InterceptEnvironment}; -use crate::{args, config}; -use anyhow::Context; -use std::process::ExitCode; -use std::sync::mpsc::Receiver; - -/// The build interceptor is responsible for capturing the build commands and -/// dispatching them to the consumer. The consumer is a function that processes -/// the intercepted command executions. -pub(super) struct BuildInterceptor { - environment: InterceptEnvironment, -} - -impl BuildInterceptor { - /// Create a new process execution interceptor. - pub(super) fn new(config: config::Main, consumer: F) -> anyhow::Result - where - F: FnOnce(Receiver) -> anyhow::Result<()>, - F: Send + 'static, - { - let service = CollectorService::new(consumer) - .with_context(|| "Failed to create the intercept service")?; - - let environment = InterceptEnvironment::new(&config.intercept, service) - .with_context(|| "Failed to create the intercept environment")?; - - Ok(Self { environment }) - } - - /// Run the build command in the intercept environment. - pub(super) fn run_build_command(self, command: args::BuildCommand) -> anyhow::Result { - self.environment - .execute_build_command(command) - .with_context(|| "Failed to execute the build command") - } -} diff --git a/rust/bear/src/modes/mod.rs b/rust/bear/src/modes/mod.rs deleted file mode 100644 index 3a114fc0..00000000 --- a/rust/bear/src/modes/mod.rs +++ /dev/null @@ -1,140 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -pub mod intercept; -pub mod semantic; - -use crate::intercept::persistence::{read, write}; -use crate::modes::intercept::BuildInterceptor; -use crate::modes::semantic::SemanticAnalysisPipeline; -use crate::{args, config}; -use anyhow::Context; -use std::fs::{File, OpenOptions}; -use std::io; -use std::io::BufReader; -use std::process::ExitCode; - -/// The mode trait is used to run the application in different modes. -pub trait Mode { - fn run(self) -> anyhow::Result; -} - -/// The intercept mode we are only capturing the build commands -/// and write it into the output file. -pub struct Intercept { - command: args::BuildCommand, - interceptor: BuildInterceptor, -} - -impl Intercept { - /// Create a new intercept mode instance. - pub fn from( - command: args::BuildCommand, - output: args::BuildEvents, - config: config::Main, - ) -> anyhow::Result { - let file_name = output.file_name.as_str(); - let output_file = OpenOptions::new() - .write(true) - .create(true) - .truncate(true) - .open(file_name) - .map(io::BufWriter::new) - .with_context(|| format!("Failed to open file: {:?}", file_name))?; - - let interceptor = - BuildInterceptor::new(config, move |envelopes| write(output_file, envelopes))?; - - Ok(Self { - command, - interceptor, - }) - } -} - -impl Mode for Intercept { - /// Run the intercept mode by setting up the collector service and - /// the intercept environment. The build command is executed in the - /// intercept environment. - /// - /// The exit code is based on the result of the build command. - fn run(self) -> anyhow::Result { - self.interceptor.run_build_command(self.command) - } -} - -/// The semantic mode we are deduct the semantic meaning of the -/// executed commands from the build process. -pub struct Semantic { - event_file: BufReader, - semantic: SemanticAnalysisPipeline, -} - -impl Semantic { - pub fn from( - input: args::BuildEvents, - output: args::BuildSemantic, - config: config::Main, - ) -> anyhow::Result { - let file_name = input.file_name.as_str(); - let event_file = OpenOptions::new() - .read(true) - .open(file_name) - .map(BufReader::new) - .with_context(|| format!("Failed to open file: {:?}", file_name))?; - - let semantic = SemanticAnalysisPipeline::from(output, &config)?; - - Ok(Self { - event_file, - semantic, - }) - } -} - -impl Mode for Semantic { - /// Run the semantic mode by reading the event file and analyzing the events. - /// - /// The exit code is based on the result of the output writer. - fn run(self) -> anyhow::Result { - self.semantic - .analyze_and_write(read(self.event_file)) - .map(|_| ExitCode::SUCCESS) - } -} - -/// The all model is combining the intercept and semantic modes. -pub struct Combined { - command: args::BuildCommand, - interceptor: BuildInterceptor, -} - -impl Combined { - /// Create a new all mode instance. - pub fn from( - command: args::BuildCommand, - output: args::BuildSemantic, - config: config::Main, - ) -> anyhow::Result { - let semantic = SemanticAnalysisPipeline::from(output, &config)?; - let interceptor = BuildInterceptor::new(config, move |envelopes| { - semantic.analyze_and_write(envelopes) - })?; - - Ok(Self { - command, - interceptor, - }) - } -} - -impl Mode for Combined { - /// Run the all mode by setting up the collector service and the intercept environment. - /// The build command is executed in the intercept environment. The collected events are - /// then processed by the semantic recognition and transformation. The result is written - /// to the output file. - /// - /// The exit code is based on the result of the build command. - fn run(self) -> anyhow::Result { - self.interceptor.run_build_command(self.command) - } -} diff --git a/rust/bear/src/modes/semantic.rs b/rust/bear/src/modes/semantic.rs deleted file mode 100644 index 514aa01c..00000000 --- a/rust/bear/src/modes/semantic.rs +++ /dev/null @@ -1,232 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -use crate::intercept::Envelope; -use crate::output::OutputWriter; -use crate::semantic::interpreters::create_interpreter; -use crate::semantic::transformation::Transformation; -use crate::{args, config, output, semantic}; -use anyhow::Context; -use std::fs::{File, OpenOptions}; -use std::io::{BufReader, BufWriter}; -use std::path::{Path, PathBuf}; - -/// The semantic analysis that is independent of the event source. -pub(super) struct SemanticAnalysisPipeline { - interpreter: Box, - transform: Box, - output_writer: OutputWriterImpl, -} - -impl SemanticAnalysisPipeline { - /// Create a new semantic mode instance. - pub(super) fn from(output: args::BuildSemantic, config: &config::Main) -> anyhow::Result { - let interpreter = create_interpreter(config); - let transform = Transformation::from(&config.output); - let output_writer = OutputWriterImpl::create(&output, &config.output)?; - - Ok(Self { - interpreter: Box::new(interpreter), - transform: Box::new(transform), - output_writer, - }) - } - - /// Consumer the envelopes for analysis and write the result to the output file. - /// This implements the pipeline of the semantic analysis. - pub(super) fn analyze_and_write( - self, - envelopes: impl IntoIterator, - ) -> anyhow::Result<()> { - // Set up the pipeline of compilation database entries. - let entries = envelopes - .into_iter() - .inspect(|envelope| log::debug!("envelope: {}", envelope)) - .map(|envelope| envelope.event.execution) - .flat_map(|execution| self.interpreter.recognize(&execution)) - .inspect(|semantic| log::debug!("semantic: {:?}", semantic)) - .flat_map(|semantic| self.transform.apply(semantic)); - // Consume the entries and write them to the output file. - // The exit code is based on the result of the output writer. - self.output_writer.run(entries) - } -} - -/// The output writer implementation. -/// -/// This is a workaround for the lack of trait object support for generic arguments. -/// https://doc.rust-lang.org/reference/items/traits.html#object-safety. -pub(crate) enum OutputWriterImpl { - Clang(ClangOutputWriter), - Semantic(SemanticOutputWriter), -} - -impl OutputWriter for OutputWriterImpl { - fn run( - &self, - compiler_calls: impl Iterator, - ) -> anyhow::Result<()> { - match self { - OutputWriterImpl::Clang(writer) => writer.run(compiler_calls), - OutputWriterImpl::Semantic(writer) => writer.run(compiler_calls), - } - } -} - -impl OutputWriterImpl { - /// Create a new instance of the output writer. - pub(crate) fn create( - args: &args::BuildSemantic, - config: &config::Output, - ) -> anyhow::Result { - // TODO: This method should fail early if the output file is not writable. - match config { - config::Output::Clang { - format, - sources, - duplicates, - .. - } => { - let result = ClangOutputWriter { - output: PathBuf::from(&args.file_name), - append: args.append, - source_filter: sources.clone(), - duplicate_filter: duplicates.clone(), - command_as_array: format.command_as_array, - formatter: From::from(format), - }; - Ok(OutputWriterImpl::Clang(result)) - } - config::Output::Semantic { .. } => { - let result = SemanticOutputWriter { - output: PathBuf::from(&args.file_name), - }; - Ok(OutputWriterImpl::Semantic(result)) - } - } - } -} - -pub(crate) struct SemanticOutputWriter { - output: PathBuf, -} - -impl OutputWriter for SemanticOutputWriter { - fn run(&self, entries: impl Iterator) -> anyhow::Result<()> { - let file_name = &self.output; - let file = File::create(file_name) - .map(BufWriter::new) - .with_context(|| format!("Failed to create file: {:?}", file_name.as_path()))?; - - semantic::serialize(file, entries)?; - - Ok(()) - } -} - -/// Responsible for writing the final compilation database file. -/// -/// Implements filtering, formatting and atomic file writing. -/// (Atomic file writing implemented by writing to a temporary file and renaming it.) -/// -/// Filtering is implemented by the `filter` module, and the formatting is implemented by the -/// `json_compilation_db` module. -pub(crate) struct ClangOutputWriter { - output: PathBuf, - append: bool, - source_filter: config::SourceFilter, - duplicate_filter: config::DuplicateFilter, - command_as_array: bool, - formatter: output::formatter::EntryFormatter, -} - -impl OutputWriter for ClangOutputWriter { - /// Implements the main logic of the output writer. - fn run( - &self, - compiler_calls: impl Iterator, - ) -> anyhow::Result<()> { - let entries = compiler_calls.flat_map(|compiler_call| self.formatter.apply(compiler_call)); - if self.append && self.output.exists() { - let entries_from_db = Self::read_from_compilation_db(self.output.as_path())?; - let final_entries = entries.chain(entries_from_db); - self.write_into_compilation_db(final_entries) - } else { - if self.append { - log::warn!("The output file does not exist, the append option is ignored."); - } - self.write_into_compilation_db(entries) - } - } -} - -impl ClangOutputWriter { - /// Write the entries to the compilation database. - /// - /// The entries are written to a temporary file and then renamed to the final output. - /// This guaranties that the output file is always in a consistent state. - fn write_into_compilation_db( - &self, - entries: impl Iterator, - ) -> anyhow::Result<()> { - // Filter out the entries as per the configuration. - let mut source_filter: output::filter::EntryPredicate = From::from(&self.source_filter); - let mut duplicate_filter: output::filter::EntryPredicate = - From::from(&self.duplicate_filter); - let filtered_entries = - entries.filter(move |entry| source_filter(entry) && duplicate_filter(entry)); - // Write the entries to a temporary file. - self.write_into_temporary_compilation_db(filtered_entries) - .and_then(|temp| { - // Rename the temporary file to the final output. - std::fs::rename(temp.as_path(), self.output.as_path()).with_context(|| { - format!( - "Failed to rename file from '{:?}' to '{:?}'.", - temp.as_path(), - self.output.as_path() - ) - }) - }) - } - - /// Write the entries to a temporary file and returns the temporary file name. - fn write_into_temporary_compilation_db( - &self, - entries: impl Iterator, - ) -> anyhow::Result { - // Generate a temporary file name. - let file_name = self.output.with_extension("tmp"); - // Open the file for writing. - let file = File::create(&file_name) - .map(BufWriter::new) - .with_context(|| format!("Failed to create file: {:?}", file_name.as_path()))?; - // Write the entries to the file. - output::clang::write(self.command_as_array, file, entries) - .with_context(|| format!("Failed to write entries: {:?}", file_name.as_path()))?; - // Return the temporary file name. - Ok(file_name) - } - - /// Read the compilation database from a file. - fn read_from_compilation_db( - source: &Path, - ) -> anyhow::Result> { - let source_copy = source.to_path_buf(); - - let file = OpenOptions::new() - .read(true) - .open(source) - .map(BufReader::new) - .with_context(|| format!("Failed to open file: {:?}", source))?; - - let entries = output::clang::read(file) - .map(move |candidate| { - // We are here to log the error. - candidate.map_err(|error| { - log::error!("Failed to read file: {:?}, reason: {}", source_copy, error); - error - }) - }) - .filter_map(Result::ok); - Ok(entries) - } -} diff --git a/rust/bear/src/output/clang/iterator.rs b/rust/bear/src/output/clang/iterator.rs deleted file mode 100644 index 7f9412a4..00000000 --- a/rust/bear/src/output/clang/iterator.rs +++ /dev/null @@ -1,88 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -//! Provides an iterator over a JSON array of objects. -//! -//! from https://github.com/serde-rs/json/issues/404#issuecomment-892957228 - -use std::io::{self, Read}; - -use serde::de::DeserializeOwned; -use serde_json::{Deserializer, Error, Result}; - -pub fn iter_json_array(mut reader: R) -> impl Iterator> -where - T: DeserializeOwned, - R: io::Read, -{ - let mut at_start = State::AtStart; - std::iter::from_fn(move || yield_next_obj(&mut reader, &mut at_start).transpose()) -} - -enum State { - AtStart, - AtMiddle, - Finished, - Failed, -} - -fn yield_next_obj(mut reader: R, state: &mut State) -> Result> -where - T: DeserializeOwned, - R: io::Read, -{ - match state { - State::AtStart => { - if read_skipping_ws(&mut reader)? == b'[' { - // read the next char to see if the array is empty - let peek = read_skipping_ws(&mut reader)?; - if peek == b']' { - *state = State::Finished; - Ok(None) - } else { - *state = State::AtMiddle; - deserialize_single(io::Cursor::new([peek]).chain(reader)).map(Some) - } - } else { - *state = State::Failed; - Err(serde::de::Error::custom("expected `[`")) - } - } - State::AtMiddle => match read_skipping_ws(&mut reader)? { - b',' => deserialize_single(reader).map(Some), - b']' => { - *state = State::Finished; - Ok(None) - } - _ => { - *state = State::Failed; - Err(serde::de::Error::custom("expected `,` or `]`")) - } - }, - State::Finished => Ok(None), - State::Failed => Ok(None), - } -} - -fn deserialize_single(reader: R) -> Result -where - T: DeserializeOwned, - R: io::Read, -{ - let next_obj = Deserializer::from_reader(reader).into_iter::().next(); - match next_obj { - Some(result) => result, - None => Err(serde::de::Error::custom("premature EOF")), - } -} - -fn read_skipping_ws(mut reader: impl io::Read) -> Result { - loop { - let mut byte = 0u8; - if let Err(io) = reader.read_exact(std::slice::from_mut(&mut byte)) { - return Err(Error::io(io)); - } - if !byte.is_ascii_whitespace() { - return Ok(byte); - } - } -} diff --git a/rust/bear/src/output/clang/mod.rs b/rust/bear/src/output/clang/mod.rs deleted file mode 100644 index a679ccb7..00000000 --- a/rust/bear/src/output/clang/mod.rs +++ /dev/null @@ -1,90 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -//! This crate provides support for reading and writing JSON compilation database files. -//! -//! A compilation database is a set of records which describe the compilation of the -//! source files in a given project. It describes the compiler invocation command to -//! compile a source module to an object file. -//! -//! This database can have many forms. One well known and supported format is the JSON -//! compilation database, which is a simple JSON file having the list of compilation -//! as an array. The definition of the JSON compilation database files is done in the -//! LLVM project [documentation](https://clang.llvm.org/docs/JSONCompilationDatabase.html). - -use serde::ser::{SerializeSeq, Serializer}; -use serde_json::Error; - -mod iterator; -mod tests; -mod type_de; -mod type_ser; - -/// Represents an entry of the compilation database. -#[derive(Clone, Debug, PartialEq, Eq)] -pub struct Entry { - /// The main translation unit source processed by this compilation step. - /// This is used by tools as the key into the compilation database. - /// There can be multiple command objects for the same file, for example if the same - /// source file is compiled with different configurations. - pub file: std::path::PathBuf, - /// The compile command executed. This must be a valid command to rerun the exact - /// compilation step for the translation unit in the environment the build system uses. - /// Shell expansion is not supported. - pub arguments: Vec, - /// The working directory of the compilation. All paths specified in the command or - /// file fields must be either absolute or relative to this directory. - pub directory: std::path::PathBuf, - /// The name of the output created by this compilation step. This field is optional. - /// It can be used to distinguish different processing modes of the same input file. - pub output: Option, -} - -/// Deserialize entries from a JSON array into an iterator. -pub fn read(reader: impl std::io::Read) -> impl Iterator> { - iterator::iter_json_array(reader) -} - -/// The entries are written in the format specified by the configuration. -pub fn write( - command_as_array: bool, - writer: impl std::io::Write, - entries: impl Iterator, -) -> Result<(), Error> { - let method = if command_as_array { - write_with_arguments - } else { - write_with_command - }; - method(writer, entries) -} - -/// Serialize entries from an iterator into a JSON array. -/// -/// It uses the `arguments` field of the `Entry` struct to serialize the array of strings. -pub(super) fn write_with_arguments( - writer: impl std::io::Write, - entries: impl Iterator, -) -> Result<(), Error> { - let mut ser = serde_json::Serializer::pretty(writer); - let mut seq = ser.serialize_seq(None)?; - for entry in entries { - seq.serialize_element(&entry)?; - } - seq.end() -} - -/// Serialize entries from an iterator into a JSON array. -/// -/// It uses the `arguments` field of the `Entry` struct to serialize the array of strings. -pub(super) fn write_with_command( - writer: impl std::io::Write, - entries: impl Iterator, -) -> Result<(), Error> { - let mut ser = serde_json::Serializer::pretty(writer); - let mut seq = ser.serialize_seq(None)?; - for entry in entries { - let entry = type_ser::EntryWithCommand::from(entry); - seq.serialize_element(&entry)?; - } - seq.end() -} diff --git a/rust/bear/src/output/clang/tests.rs b/rust/bear/src/output/clang/tests.rs deleted file mode 100644 index 340c95bc..00000000 --- a/rust/bear/src/output/clang/tests.rs +++ /dev/null @@ -1,314 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#[cfg(test)] -mod failures { - use super::super::*; - use serde_json::error::Category; - use serde_json::json; - - macro_rules! assert_semantic_error { - ($x:expr) => { - match $x { - Some(Err(error)) => assert_eq!(error.classify(), Category::Data), - _ => assert!(false, "shout be semantic error"), - } - }; - } - - #[test] - fn load_non_json_content() { - let content = r#"this is not json"#; - let mut result = read(content.as_bytes()); - - assert_semantic_error!(result.next()); - assert!(result.next().is_none()); - } - - #[test] - fn load_not_expected_json_content() { - let content = json!({ "file": "string" }).to_string(); - let mut result = read(content.as_bytes()); - - assert_semantic_error!(result.next()); - assert!(result.next().is_none()); - } - - #[test] - fn load_on_bad_value() { - let content = json!([ - { - "directory": " ", - "file": "./file_a.c", - "command": "cc -Dvalue=\"this" - } - ]) - .to_string(); - let mut result = read(content.as_bytes()); - - assert_semantic_error!(result.next()); - assert!(result.next().is_none()); - } - - #[test] - fn load_on_multiple_commands() { - let content = json!([ - { - "directory": " ", - "file": "./file_a.c", - "command": "cc source.c", - "arguments": ["cc", "source.c"], - } - ]) - .to_string(); - let mut result = read(content.as_bytes()); - - assert_semantic_error!(result.next()); - assert!(result.next().is_none()); - } -} - -#[cfg(test)] -mod success { - use super::super::*; - use serde_json::json; - - mod empty { - use super::*; - - #[test] - fn load_empty_array() { - let content = json!([]).to_string(); - - let mut result = read(content.as_bytes()); - - assert!(result.next().is_none()); - } - } - - mod basic { - use super::*; - use crate::vec_of_strings; - use std::io::{Cursor, Seek, SeekFrom}; - - fn expected_values() -> Vec { - vec![ - Entry { - directory: std::path::PathBuf::from("/home/user"), - file: std::path::PathBuf::from("./file_a.c"), - arguments: vec_of_strings!("cc", "-c", "./file_a.c", "-o", "./file_a.o"), - output: None, - }, - Entry { - directory: std::path::PathBuf::from("/home/user"), - file: std::path::PathBuf::from("./file_b.c"), - arguments: vec_of_strings!("cc", "-c", "./file_b.c", "-o", "./file_b.o"), - output: Some(std::path::PathBuf::from("./file_b.o")), - }, - ] - } - - fn expected_with_array_syntax() -> serde_json::Value { - json!([ - { - "directory": "/home/user", - "file": "./file_a.c", - "arguments": ["cc", "-c", "./file_a.c", "-o", "./file_a.o"] - }, - { - "directory": "/home/user", - "file": "./file_b.c", - "output": "./file_b.o", - "arguments": ["cc", "-c", "./file_b.c", "-o", "./file_b.o"] - } - ]) - } - - fn expected_with_string_syntax() -> serde_json::Value { - json!([ - { - "directory": "/home/user", - "file": "./file_a.c", - "command": "cc -c ./file_a.c -o ./file_a.o" - }, - { - "directory": "/home/user", - "file": "./file_b.c", - "output": "./file_b.o", - "command": "cc -c ./file_b.c -o ./file_b.o" - } - ]) - } - - #[test] - fn load_content_with_string_command_syntax() { - let content = expected_with_string_syntax().to_string(); - - let result = read(content.as_bytes()); - let entries: Vec = result.map(|e| e.unwrap()).collect(); - - assert_eq!(expected_values(), entries); - } - - #[test] - fn load_content_with_array_command_syntax() { - let content = expected_with_array_syntax().to_string(); - - let result = read(content.as_bytes()); - let entries: Vec = result.map(|e| e.unwrap()).collect(); - - assert_eq!(expected_values(), entries); - } - - #[test] - fn save_with_array_command_syntax() -> Result<(), Error> { - let input = expected_values(); - - // Create fake "file" - let mut buffer = Cursor::new(Vec::new()); - let result = write_with_arguments(&mut buffer, input.into_iter()); - assert!(result.is_ok()); - - // Use the fake "file" as input - buffer.seek(SeekFrom::Start(0)).unwrap(); - let content: serde_json::Value = serde_json::from_reader(&mut buffer)?; - - assert_eq!(expected_with_array_syntax(), content); - - Ok(()) - } - - #[test] - fn save_with_string_command_syntax() -> Result<(), Error> { - let input = expected_values(); - - // Create fake "file" - let mut buffer = Cursor::new(Vec::new()); - let result = write_with_command(&mut buffer, input.into_iter()); - assert!(result.is_ok()); - - // Use the fake "file" as input - buffer.seek(SeekFrom::Start(0)).unwrap(); - let content: serde_json::Value = serde_json::from_reader(&mut buffer)?; - - assert_eq!(expected_with_string_syntax(), content); - - Ok(()) - } - } - - mod quoted { - use super::*; - use crate::vec_of_strings; - use serde_json::Value; - use std::io::{Cursor, Seek, SeekFrom}; - - fn expected_values() -> Vec { - vec![ - Entry { - directory: std::path::PathBuf::from("/home/user"), - file: std::path::PathBuf::from("./file_a.c"), - arguments: vec_of_strings!( - "cc", - "-c", - "-D", - r#"name=\"me\""#, - "./file_a.c", - "-o", - "./file_a.o" - ), - output: None, - }, - Entry { - directory: std::path::PathBuf::from("/home/user"), - file: std::path::PathBuf::from("./file_b.c"), - arguments: vec_of_strings!( - "cc", - "-c", - "-D", - r#"name="me""#, - "./file_b.c", - "-o", - "./file_b.o" - ), - output: None, - }, - ] - } - - fn expected_with_array_syntax() -> serde_json::Value { - json!([ - { - "directory": "/home/user", - "file": "./file_a.c", - "arguments": ["cc", "-c", "-D", r#"name=\"me\""#, "./file_a.c", "-o", "./file_a.o"] - }, - { - "directory": "/home/user", - "file": "./file_b.c", - "arguments": ["cc", "-c", "-D", r#"name="me""#, "./file_b.c", "-o", "./file_b.o"] - } - ]) - } - - fn expected_with_string_syntax() -> serde_json::Value { - json!([ - { - "directory": "/home/user", - "file": "./file_a.c", - "command": r#"cc -c -D 'name=\"me\"' ./file_a.c -o ./file_a.o"# - }, - { - "directory": "/home/user", - "file": "./file_b.c", - "command": r#"cc -c -D 'name="me"' ./file_b.c -o ./file_b.o"# - } - ]) - } - - #[test] - fn load_content_with_array_command_syntax() { - let content = expected_with_array_syntax().to_string(); - - let result = read(content.as_bytes()); - let entries: Vec = result.map(|e| e.unwrap()).collect(); - - assert_eq!(expected_values(), entries); - } - - #[test] - fn save_with_array_command_syntax() -> Result<(), Error> { - let input = expected_values(); - - // Create fake "file" - let mut buffer = Cursor::new(Vec::new()); - let result = write_with_arguments(&mut buffer, input.into_iter()); - assert!(result.is_ok()); - - // Use the fake "file" as input - buffer.seek(SeekFrom::Start(0)).unwrap(); - let content: Value = serde_json::from_reader(&mut buffer)?; - - assert_eq!(expected_with_array_syntax(), content); - - Ok(()) - } - - #[test] - fn save_with_string_command_syntax() -> Result<(), Error> { - let input = expected_values(); - - // Create fake "file" - let mut buffer = Cursor::new(Vec::new()); - let result = write_with_command(&mut buffer, input.into_iter()); - assert!(result.is_ok()); - - // Use the fake "file" as input - buffer.seek(SeekFrom::Start(0)).unwrap(); - let content: Value = serde_json::from_reader(&mut buffer)?; - - assert_eq!(expected_with_string_syntax(), content); - - Ok(()) - } - } -} diff --git a/rust/bear/src/output/clang/type_de.rs b/rust/bear/src/output/clang/type_de.rs deleted file mode 100644 index 5cc4e3e2..00000000 --- a/rust/bear/src/output/clang/type_de.rs +++ /dev/null @@ -1,146 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -//! Implements deserialization of the `Entry` struct. - -use std::fmt; -use std::path; - -use serde::de::{self, Deserialize, Deserializer, MapAccess, Visitor}; - -use super::Entry; - -impl<'de> Deserialize<'de> for Entry { - fn deserialize(deserializer: D) -> Result - where - D: Deserializer<'de>, - { - enum Field { - Directory, - File, - Command, - Arguments, - Output, - } - const FIELDS: &[&str] = &["directory", "file", "command", "arguments", "output"]; - - impl<'de> Deserialize<'de> for Field { - fn deserialize(deserializer: D) -> Result - where - D: Deserializer<'de>, - { - struct FieldVisitor; - - impl Visitor<'_> for FieldVisitor { - type Value = Field; - - fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - formatter - .write_str("`directory`, `file`, `command`, `arguments`, or `output`") - } - - fn visit_str(self, value: &str) -> Result - where - E: de::Error, - { - match value { - "directory" => Ok(Field::Directory), - "file" => Ok(Field::File), - "command" => Ok(Field::Command), - "arguments" => Ok(Field::Arguments), - "output" => Ok(Field::Output), - _ => Err(de::Error::unknown_field(value, FIELDS)), - } - } - } - - deserializer.deserialize_identifier(FieldVisitor) - } - } - - struct EntryVisitor; - - impl<'de> Visitor<'de> for EntryVisitor { - type Value = Entry; - - fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - formatter.write_str("struct Entry") - } - - fn visit_map(self, mut map: V) -> Result - where - V: MapAccess<'de>, - { - let mut directory: Option = None; - let mut file: Option = None; - let mut command: Option = None; - let mut arguments: Option> = None; - let mut output: Option = None; - - while let Some(key) = map.next_key()? { - match key { - Field::Directory => { - if directory.is_some() { - return Err(de::Error::duplicate_field("directory")); - } - directory = Some(map.next_value()?); - } - Field::File => { - if file.is_some() { - return Err(de::Error::duplicate_field("file")); - } - file = Some(map.next_value()?); - } - Field::Command => { - if command.is_some() { - return Err(de::Error::duplicate_field("command")); - } - command = Some(map.next_value()?); - } - Field::Arguments => { - if arguments.is_some() { - return Err(de::Error::duplicate_field("arguments")); - } - arguments = Some(map.next_value()?); - } - Field::Output => { - if output.is_some() { - return Err(de::Error::duplicate_field("output")); - } - output = Some(map.next_value()?); - } - } - } - let directory = directory.ok_or_else(|| de::Error::missing_field("directory"))?; - let file = file.ok_or_else(|| de::Error::missing_field("file"))?; - if arguments.is_some() && command.is_some() { - return Err(de::Error::custom( - "Either `command` or `arguments` field need to be specified, but not both.", - )); - } - let arguments = arguments.map_or_else( - || { - command - .ok_or_else(|| de::Error::missing_field("`command` or `arguments`")) - .and_then(|cmd| { - shell_words::split(cmd.as_str()).map_err(|_| { - de::Error::invalid_value( - de::Unexpected::Str(cmd.as_str()), - &"quotes needs to be matched", - ) - }) - }) - }, - Ok, - )?; - Ok(Entry { - directory, - file, - arguments, - output, - }) - } - } - - deserializer.deserialize_struct("Entry", FIELDS, EntryVisitor) - } -} diff --git a/rust/bear/src/output/clang/type_ser.rs b/rust/bear/src/output/clang/type_ser.rs deleted file mode 100644 index 5fe7d8fa..00000000 --- a/rust/bear/src/output/clang/type_ser.rs +++ /dev/null @@ -1,60 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -//! Implements serialization of the `Entry` struct. - -use serde::ser::{Serialize, SerializeStruct, Serializer}; - -use super::Entry; - -impl Serialize for Entry { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - let size = if self.output.is_some() { 4 } else { 3 }; - let mut state = serializer.serialize_struct("Entry", size)?; - state.serialize_field("directory", &self.directory)?; - state.serialize_field("file", &self.file)?; - state.serialize_field("arguments", &self.arguments)?; - if self.output.is_some() { - state.serialize_field("output", &self.output)?; - } - state.end() - } -} - -#[derive(Clone, Debug, PartialEq, Eq)] -pub struct EntryWithCommand { - pub file: std::path::PathBuf, - pub command: String, - pub directory: std::path::PathBuf, - pub output: Option, -} - -impl From for EntryWithCommand { - fn from(entry: Entry) -> Self { - Self { - file: entry.file, - command: shell_words::join(&entry.arguments), - directory: entry.directory, - output: entry.output, - } - } -} - -impl Serialize for EntryWithCommand { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - let size = if self.output.is_some() { 4 } else { 3 }; - let mut state = serializer.serialize_struct("Entry", size)?; - state.serialize_field("directory", &self.directory)?; - state.serialize_field("file", &self.file)?; - state.serialize_field("command", &self.command)?; - if self.output.is_some() { - state.serialize_field("output", &self.output)?; - } - state.end() - } -} diff --git a/rust/bear/src/output/filter.rs b/rust/bear/src/output/filter.rs deleted file mode 100644 index 4f8acc61..00000000 --- a/rust/bear/src/output/filter.rs +++ /dev/null @@ -1,461 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -use std::hash::Hash; -use std::path::Path; - -use crate::config; -use crate::output::clang::Entry; -use builder::create_hash; -use builder::EntryPredicateBuilder as Builder; - -/// A predicate that can be used to filter compilation database entries. -/// -/// If the predicate returns `true`, the entry is included in the result set. -/// If the predicate returns `false`, the entry is excluded from the result set. -pub type EntryPredicate = Box bool>; - -impl From<&config::SourceFilter> for EntryPredicate { - /// Create a filter from the configuration. - fn from(config: &config::SourceFilter) -> Self { - let source_exist_check = Builder::filter_by_source_existence(config.only_existing_files); - - let mut source_path_checks = Builder::new(); - for config::DirectoryFilter { path, ignore } in &config.paths { - let filter = Builder::filter_by_source_path(path); - match ignore { - config::Ignore::Always => { - source_path_checks = source_path_checks & !filter; - } - config::Ignore::Never => { - source_path_checks = source_path_checks & filter; - } - } - } - - (source_exist_check & source_path_checks).build() - } -} - -impl From<&config::DuplicateFilter> for EntryPredicate { - /// Create a filter from the configuration. - fn from(config: &config::DuplicateFilter) -> Self { - let hash_function = create_hash(&config.by_fields); - Builder::filter_duplicate_entries(hash_function).build() - } -} - -mod builder { - use super::*; - use std::collections::HashSet; - use std::hash::{DefaultHasher, Hasher}; - - /// Represents a builder object that can be used to construct an entry predicate. - pub(super) struct EntryPredicateBuilder { - candidate: Option, - } - - impl EntryPredicateBuilder { - /// Creates an entry predicate from the builder. - pub(super) fn build(self) -> EntryPredicate { - match self.candidate { - Some(predicate) => predicate, - None => Box::new(|_: &Entry| true), - } - } - - /// Construct a predicate builder that is empty. - #[inline] - pub(crate) fn new() -> Self { - Self { candidate: None } - } - - /// Construct a predicate builder that implements a predicate. - #[inline] - fn from

(predicate: P) -> Self - where - P: FnMut(&Entry) -> bool + 'static, - { - Self { - candidate: Some(Box::new(predicate)), - } - } - - /// Create a predicate that filters out entries - /// that are not using any of the given source paths. - pub(super) fn filter_by_source_path(path: &Path) -> Self { - let owned_path = path.to_owned(); - Self::from(move |entry| entry.file.starts_with(owned_path.clone())) - } - - /// Create a predicate that filters out entries - /// that source file does not exist. - pub(super) fn filter_by_source_existence(only_existing: bool) -> Self { - if only_existing { - Self::from(|entry| entry.file.is_file()) - } else { - Self::new() - } - } - - /// Create a predicate that filters out entries - /// that are already in the compilation database based on their hash. - pub(super) fn filter_duplicate_entries( - hash_function: impl Fn(&Entry) -> u64 + 'static, - ) -> Self { - let mut have_seen = HashSet::new(); - - Self::from(move |entry| { - let hash = hash_function(entry); - if !have_seen.contains(&hash) { - have_seen.insert(hash); - true - } else { - false - } - }) - } - } - - /// Implement the AND operator for combining predicates. - impl std::ops::BitAnd for EntryPredicateBuilder { - type Output = EntryPredicateBuilder; - - fn bitand(self, rhs: Self) -> Self::Output { - match (self.candidate, rhs.candidate) { - (None, None) => EntryPredicateBuilder::new(), - (None, some) => EntryPredicateBuilder { candidate: some }, - (some, None) => EntryPredicateBuilder { candidate: some }, - (Some(mut lhs), Some(mut rhs)) => EntryPredicateBuilder::from(move |entry| { - let result = lhs(entry); - if result { - rhs(entry) - } else { - result - } - }), - } - } - } - - /// Implement the NOT operator for combining predicates. - impl std::ops::Not for EntryPredicateBuilder { - type Output = EntryPredicateBuilder; - - fn not(self) -> Self::Output { - match self.candidate { - Some(mut original) => Self::from(move |entry| { - let result = original(entry); - !result - }), - None => Self::new(), - } - } - } - - /// Create a hash function that is using the given fields to calculate the hash of an entry. - pub(super) fn create_hash(fields: &[config::OutputFields]) -> impl Fn(&Entry) -> u64 + 'static { - let owned_fields: Vec = fields.to_vec(); - move |entry: &Entry| { - let mut hasher = DefaultHasher::new(); - for field in &owned_fields { - match field { - config::OutputFields::Directory => entry.directory.hash(&mut hasher), - config::OutputFields::File => entry.file.hash(&mut hasher), - config::OutputFields::Arguments => entry.arguments.hash(&mut hasher), - config::OutputFields::Output => entry.output.hash(&mut hasher), - } - } - hasher.finish() - } - } - - #[cfg(test)] - mod sources_test { - use super::*; - use crate::vec_of_strings; - use std::path::PathBuf; - - #[test] - fn test_filter_by_source_paths() { - let input: Vec = vec![ - Entry { - file: PathBuf::from("/home/user/project/source/source.c"), - arguments: vec_of_strings!["cc", "-c", "source.c"], - directory: PathBuf::from("/home/user/project"), - output: None, - }, - Entry { - file: PathBuf::from("/home/user/project/test/source.c"), - arguments: vec_of_strings!["cc", "-c", "test.c"], - directory: PathBuf::from("/home/user/project"), - output: None, - }, - ]; - - let expected: Vec = vec![input[0].clone()]; - - let config = config::SourceFilter { - only_existing_files: false, - paths: vec![ - config::DirectoryFilter { - path: PathBuf::from("/home/user/project/source"), - ignore: config::Ignore::Never, - }, - config::DirectoryFilter { - path: PathBuf::from("/home/user/project/test"), - ignore: config::Ignore::Always, - }, - ], - }; - let sut: EntryPredicate = From::from(&config); - let result: Vec = input.into_iter().filter(sut).collect(); - assert_eq!(result, expected); - } - } - - #[cfg(test)] - mod existence_test { - use super::*; - use crate::vec_of_strings; - use std::hash::{Hash, Hasher}; - use std::path::PathBuf; - - #[test] - fn test_duplicate_detection_works() { - let input: Vec = vec![ - Entry { - file: PathBuf::from("/home/user/project/source.c"), - arguments: vec_of_strings!["cc", "-c", "source.c"], - directory: PathBuf::from("/home/user/project"), - output: Some(PathBuf::from("/home/user/project/source.o")), - }, - Entry { - file: PathBuf::from("/home/user/project/source.c"), - arguments: vec_of_strings!["cc", "-c", "-Wall", "source.c"], - directory: PathBuf::from("/home/user/project"), - output: Some(PathBuf::from("/home/user/project/source.o")), - }, - Entry { - file: PathBuf::from("/home/user/project/source.c"), - arguments: vec_of_strings!["cc", "-c", "source.c", "-o", "test.o"], - directory: PathBuf::from("/home/user/project"), - output: Some(PathBuf::from("/home/user/project/test.o")), - }, - ]; - - let expected: Vec = vec![input[0].clone(), input[2].clone()]; - - let hash_function = |entry: &Entry| { - let mut hasher = DefaultHasher::new(); - entry.file.hash(&mut hasher); - entry.output.hash(&mut hasher); - hasher.finish() - }; - let sut: EntryPredicate = - EntryPredicateBuilder::filter_duplicate_entries(hash_function).build(); - let result: Vec = input.into_iter().filter(sut).collect(); - assert_eq!(result, expected); - } - } - - #[cfg(test)] - mod create_hash_tests { - use super::*; - use crate::vec_of_strings; - use std::path::PathBuf; - - #[test] - fn test_create_hash_with_directory_field() { - let entry = create_test_entry(); - - let fields = vec![config::OutputFields::Directory]; - let hash_function = create_hash(&fields); - let hash = hash_function(&entry); - - let mut hasher = DefaultHasher::new(); - entry.directory.hash(&mut hasher); - let expected_hash = hasher.finish(); - - assert_eq!(hash, expected_hash); - } - - #[test] - fn test_create_hash_with_file_field() { - let entry = create_test_entry(); - - let fields = vec![config::OutputFields::File]; - let hash_function = create_hash(&fields); - let hash = hash_function(&entry); - - let mut hasher = DefaultHasher::new(); - entry.file.hash(&mut hasher); - let expected_hash = hasher.finish(); - - assert_eq!(hash, expected_hash); - } - - #[test] - fn test_create_hash_with_arguments_field() { - let entry = create_test_entry(); - - let fields = vec![config::OutputFields::Arguments]; - let hash_function = create_hash(&fields); - let hash = hash_function(&entry); - - let mut hasher = DefaultHasher::new(); - entry.arguments.hash(&mut hasher); - let expected_hash = hasher.finish(); - - assert_eq!(hash, expected_hash); - } - - #[test] - fn test_create_hash_with_output_field() { - let entry = create_test_entry(); - - let fields = vec![config::OutputFields::Output]; - let hash_function = create_hash(&fields); - let hash = hash_function(&entry); - - let mut hasher = DefaultHasher::new(); - entry.output.hash(&mut hasher); - let expected_hash = hasher.finish(); - - assert_eq!(hash, expected_hash); - } - - #[test] - fn test_create_hash_with_multiple_fields() { - let entry = create_test_entry(); - - let fields = vec![ - config::OutputFields::Directory, - config::OutputFields::File, - config::OutputFields::Arguments, - config::OutputFields::Output, - ]; - let hash_function = create_hash(&fields); - let hash = hash_function(&entry); - - let mut hasher = DefaultHasher::new(); - entry.directory.hash(&mut hasher); - entry.file.hash(&mut hasher); - entry.arguments.hash(&mut hasher); - entry.output.hash(&mut hasher); - let expected_hash = hasher.finish(); - - assert_eq!(hash, expected_hash); - } - - fn create_test_entry() -> Entry { - Entry { - file: PathBuf::from("/home/user/project/source.c"), - arguments: vec_of_strings!["cc", "-c", "source.c"], - directory: PathBuf::from("/home/user/project"), - output: Some(PathBuf::from("/home/user/project/source.o")), - } - } - } - - #[cfg(test)] - mod bitand_tests { - use super::*; - use crate::vec_of_strings; - use std::path::PathBuf; - - #[test] - fn test_bitand_both_predicates_true() { - let input = create_test_entries(); - - let predicate1 = EntryPredicateBuilder::from(|_: &Entry| true); - let predicate2 = EntryPredicateBuilder::from(|_: &Entry| true); - let combined_predicate = (predicate1 & predicate2).build(); - - let result: Vec = input.into_iter().filter(combined_predicate).collect(); - assert_eq!(result.len(), 1); - } - - #[test] - fn test_bitand_first_predicate_false() { - let input = create_test_entries(); - - let predicate1 = EntryPredicateBuilder::from(|_: &Entry| false); - let predicate2 = EntryPredicateBuilder::from(|_: &Entry| true); - let combined_predicate = (predicate1 & predicate2).build(); - - let result: Vec = input.into_iter().filter(combined_predicate).collect(); - assert_eq!(result.len(), 0); - } - - #[test] - fn test_bitand_second_predicate_false() { - let input = create_test_entries(); - - let predicate1 = EntryPredicateBuilder::from(|_: &Entry| true); - let predicate2 = EntryPredicateBuilder::from(|_: &Entry| false); - let combined_predicate = (predicate1 & predicate2).build(); - - let result: Vec = input.into_iter().filter(combined_predicate).collect(); - assert_eq!(result.len(), 0); - } - - #[test] - fn test_bitand_both_predicates_false() { - let input = create_test_entries(); - - let predicate1 = EntryPredicateBuilder::from(|_: &Entry| false); - let predicate2 = EntryPredicateBuilder::from(|_: &Entry| false); - let combined_predicate = (predicate1 & predicate2).build(); - - let result: Vec = input.into_iter().filter(combined_predicate).collect(); - assert_eq!(result.len(), 0); - } - - fn create_test_entries() -> Vec { - vec![Entry { - file: PathBuf::from("/home/user/project/source/source.c"), - arguments: vec_of_strings!["cc", "-c", "source.c"], - directory: PathBuf::from("/home/user/project"), - output: None, - }] - } - } - - #[cfg(test)] - mod not_tests { - use super::*; - use crate::vec_of_strings; - use std::path::PathBuf; - - #[test] - fn test_not_predicate_true() { - let input = create_test_entries(); - - let predicate = EntryPredicateBuilder::from(|_: &Entry| true); - let not_predicate = (!predicate).build(); - - let result: Vec = input.into_iter().filter(not_predicate).collect(); - assert_eq!(result.len(), 0); - } - - #[test] - fn test_not_predicate_false() { - let input = create_test_entries(); - - let predicate = EntryPredicateBuilder::from(|_: &Entry| false); - let not_predicate = (!predicate).build(); - - let result: Vec = input.into_iter().filter(not_predicate).collect(); - assert_eq!(result.len(), 1); - } - - fn create_test_entries() -> Vec { - vec![Entry { - file: PathBuf::from("/home/user/project/source/source.c"), - arguments: vec_of_strings!["cc", "-c", "source.c"], - directory: PathBuf::from("/home/user/project"), - output: None, - }] - } - } -} diff --git a/rust/bear/src/output/formatter.rs b/rust/bear/src/output/formatter.rs deleted file mode 100644 index 0962db70..00000000 --- a/rust/bear/src/output/formatter.rs +++ /dev/null @@ -1,270 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -use crate::output::clang::Entry; -use crate::{config, semantic}; -use anyhow::anyhow; -use path_absolutize::Absolutize; -use std::borrow::Cow; -use std::io; -use std::path::{Path, PathBuf}; - -pub struct EntryFormatter { - drop_output_field: bool, - path_format: config::PathFormat, -} - -impl From<&config::Format> for EntryFormatter { - /// Create a formatter from the configuration. - fn from(config: &config::Format) -> Self { - let drop_output_field = config.drop_output_field; - let path_format = config.paths_as.clone(); - - Self { - drop_output_field, - path_format, - } - } -} - -impl EntryFormatter { - /// Convert the compiler calls into entries. - /// - /// The conversion is done by converting the compiler passes into entries. - /// Errors are logged and ignored. The entries format is controlled by the configuration. - pub(crate) fn apply(&self, compiler_call: semantic::CompilerCall) -> Vec { - let semantic::CompilerCall { - compiler, - working_dir, - passes, - } = compiler_call; - passes - .into_iter() - .map(|pass| self.try_convert_from_pass(&working_dir, &compiler, pass)) - // We are here to log the error. - .map(|result| result.map_err(|error| log::info!("{}", error))) - .filter_map(Result::ok) - .collect() - } - - /// Creates a single entry from a compiler pass if possible. - /// - /// The preprocess pass is ignored, and the compile pass is converted into an entry. - /// - /// The file and directory paths are converted into fully qualified paths when required. - fn try_convert_from_pass( - &self, - working_dir: &Path, - compiler: &Path, - pass: semantic::CompilerPass, - ) -> anyhow::Result { - match pass { - semantic::CompilerPass::Preprocess => { - Err(anyhow!("preprocess pass should not show up in results")) - } - semantic::CompilerPass::Compile { - source, - output, - flags, - } => { - let output_clone = output.clone(); - let output_result = match output.filter(|_| !self.drop_output_field) { - None => None, - Some(candidate) => { - let x = self.format_path(candidate.as_path(), working_dir)?; - Some(PathBuf::from(x)) - } - }; - Ok(Entry { - file: PathBuf::from(self.format_path(source.as_path(), working_dir)?), - directory: working_dir.to_path_buf(), - output: output_result, - arguments: Self::format_arguments(compiler, &source, &flags, output_clone)?, - }) - } - } - } - - /// Reconstruct the arguments for the compiler call. - /// - /// It is not the same as the command line arguments, because the compiler call is - /// decomposed into a separate lists of arguments. To assemble from the parts will - /// not necessarily result in the same command line arguments. One example for that - /// is the multiple source files are treated as separate compiler calls. Another - /// thing that can change is the order of the arguments. - fn format_arguments( - compiler: &Path, - source: &Path, - flags: &[String], - output: Option, - ) -> anyhow::Result, anyhow::Error> { - let mut arguments: Vec = vec![]; - // Assemble the arguments as it would be for a single source file. - arguments.push(into_string(compiler)?); - for flag in flags { - arguments.push(flag.clone()); - } - if let Some(file) = output { - arguments.push(String::from("-o")); - arguments.push(into_string(file.as_path())?) - } - arguments.push(into_string(source)?); - Ok(arguments) - } - - fn format_path<'a>(&self, path: &'a Path, root: &Path) -> io::Result> { - // Will compute the absolute path if needed. - let absolute = || { - if path.is_absolute() { - path.absolutize() - } else { - path.absolutize_from(root) - } - }; - - match self.path_format { - config::PathFormat::Original => Ok(Cow::from(path)), - config::PathFormat::Absolute => absolute(), - config::PathFormat::Canonical => absolute()?.canonicalize().map(Cow::from), - } - } -} - -fn into_string(path: &Path) -> anyhow::Result { - path.to_path_buf() - .into_os_string() - .into_string() - .map_err(|_| anyhow!("Path can't be encoded to UTF")) -} - -#[cfg(test)] -mod test { - use super::*; - use crate::vec_of_strings; - - #[test] - fn test_non_compilations() { - let input = semantic::CompilerCall { - compiler: PathBuf::from("/usr/bin/cc"), - working_dir: PathBuf::from("/home/user"), - passes: vec![semantic::CompilerPass::Preprocess], - }; - - let format = config::Format { - command_as_array: true, - drop_output_field: false, - paths_as: config::PathFormat::Original, - }; - let sut: EntryFormatter = (&format).into(); - let result = sut.apply(input); - - let expected: Vec = vec![]; - assert_eq!(expected, result); - } - - #[test] - fn test_single_source_compilation() { - let input = semantic::CompilerCall { - compiler: PathBuf::from("/usr/bin/clang"), - working_dir: PathBuf::from("/home/user"), - passes: vec![semantic::CompilerPass::Compile { - source: PathBuf::from("source.c"), - output: Some(PathBuf::from("source.o")), - flags: vec_of_strings!["-Wall"], - }], - }; - - let format = config::Format { - command_as_array: true, - drop_output_field: false, - paths_as: config::PathFormat::Original, - }; - let sut: EntryFormatter = (&format).into(); - let result = sut.apply(input); - - let expected = vec![Entry { - directory: PathBuf::from("/home/user"), - file: PathBuf::from("source.c"), - arguments: vec_of_strings!["/usr/bin/clang", "-Wall", "-o", "source.o", "source.c"], - output: Some(PathBuf::from("source.o")), - }]; - assert_eq!(expected, result); - } - - #[test] - fn test_multiple_sources_compilation() { - let input = compiler_call_with_multiple_passes(); - - let format = config::Format { - command_as_array: true, - drop_output_field: true, - paths_as: config::PathFormat::Original, - }; - let sut: EntryFormatter = (&format).into(); - let result = sut.apply(input); - - let expected = vec![ - Entry { - directory: PathBuf::from("/home/user"), - file: PathBuf::from("/tmp/source1.c"), - arguments: vec_of_strings!["clang", "-o", "./source1.o", "/tmp/source1.c"], - output: None, - }, - Entry { - directory: PathBuf::from("/home/user"), - file: PathBuf::from("../source2.c"), - arguments: vec_of_strings!["clang", "-Wall", "../source2.c"], - output: None, - }, - ]; - assert_eq!(expected, result); - } - - #[test] - fn test_multiple_sources_compilation_with_abs_paths() { - let input = compiler_call_with_multiple_passes(); - - let format = config::Format { - command_as_array: true, - drop_output_field: true, - paths_as: config::PathFormat::Absolute, - }; - let sut: EntryFormatter = (&format).into(); - let result = sut.apply(input); - - let expected = vec![ - Entry { - directory: PathBuf::from("/home/user"), - file: PathBuf::from("/tmp/source1.c"), - arguments: vec_of_strings!["clang", "-o", "./source1.o", "/tmp/source1.c"], - output: None, - }, - Entry { - directory: PathBuf::from("/home/user"), - file: PathBuf::from("/home/source2.c"), - arguments: vec_of_strings!["clang", "-Wall", "../source2.c"], - output: None, - }, - ]; - assert_eq!(expected, result); - } - - fn compiler_call_with_multiple_passes() -> semantic::CompilerCall { - semantic::CompilerCall { - compiler: PathBuf::from("clang"), - working_dir: PathBuf::from("/home/user"), - passes: vec![ - semantic::CompilerPass::Preprocess, - semantic::CompilerPass::Compile { - source: PathBuf::from("/tmp/source1.c"), - output: Some(PathBuf::from("./source1.o")), - flags: vec_of_strings![], - }, - semantic::CompilerPass::Compile { - source: PathBuf::from("../source2.c"), - output: None, - flags: vec_of_strings!["-Wall"], - }, - ], - } - } -} diff --git a/rust/bear/src/output/mod.rs b/rust/bear/src/output/mod.rs deleted file mode 100644 index 0a64bbba..00000000 --- a/rust/bear/src/output/mod.rs +++ /dev/null @@ -1,15 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -use super::semantic; -use anyhow::Result; - -pub mod clang; -pub mod filter; -pub mod formatter; - -/// The output writer trait is responsible for writing output file. -pub(crate) trait OutputWriter { - /// Running the writer means to consume the compiler calls - /// and write the entries to the output file. - fn run(&self, _: impl Iterator) -> Result<()>; -} diff --git a/rust/bear/src/semantic/interpreters/combinators.rs b/rust/bear/src/semantic/interpreters/combinators.rs deleted file mode 100644 index 87209d99..00000000 --- a/rust/bear/src/semantic/interpreters/combinators.rs +++ /dev/null @@ -1,147 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -use super::super::{CompilerCall, Execution, Interpreter, Recognition}; - -/// Represents a set of interpreters, where any of them can recognize the semantic. -/// The evaluation is done in the order of the interpreters. The first one which -/// recognizes the semantic will be returned as result. -pub(super) struct Any { - interpreters: Vec>, -} - -impl Any { - pub(super) fn new(tools: Vec>) -> Self { - Self { - interpreters: tools, - } - } -} - -impl Interpreter for Any { - fn recognize(&self, x: &Execution) -> Recognition { - for tool in &self.interpreters { - match tool.recognize(x) { - Recognition::Unknown => continue, - result => return result, - } - } - Recognition::Unknown - } -} - -#[cfg(test)] -mod test { - use std::collections::HashMap; - use std::path::PathBuf; - - use super::super::super::CompilerCall; - use super::*; - - #[test] - fn test_any_when_no_match() { - let sut = Any { - interpreters: vec![ - Box::new(MockTool::NotRecognize), - Box::new(MockTool::NotRecognize), - Box::new(MockTool::NotRecognize), - ], - }; - - let input = any_execution(); - - match sut.recognize(&input) { - Recognition::Unknown => assert!(true), - _ => assert!(false), - } - } - - #[test] - fn test_any_when_success() { - let sut = Any { - interpreters: vec![ - Box::new(MockTool::NotRecognize), - Box::new(MockTool::Recognize), - Box::new(MockTool::NotRecognize), - ], - }; - - let input = any_execution(); - - match sut.recognize(&input) { - Recognition::Success(_) => assert!(true), - _ => assert!(false), - } - } - - #[test] - fn test_any_when_ignored() { - let sut = Any { - interpreters: vec![ - Box::new(MockTool::NotRecognize), - Box::new(MockTool::RecognizeIgnored), - Box::new(MockTool::Recognize), - ], - }; - - let input = any_execution(); - - match sut.recognize(&input) { - Recognition::Ignored => assert!(true), - _ => assert!(false), - } - } - - #[test] - fn test_any_when_match_fails() { - let sut = Any { - interpreters: vec![ - Box::new(MockTool::NotRecognize), - Box::new(MockTool::RecognizeFailed), - Box::new(MockTool::Recognize), - Box::new(MockTool::NotRecognize), - ], - }; - - let input = any_execution(); - - match sut.recognize(&input) { - Recognition::Error(_) => assert!(true), - _ => assert!(false), - } - } - - enum MockTool { - Recognize, - RecognizeIgnored, - RecognizeFailed, - NotRecognize, - } - - impl Interpreter for MockTool { - fn recognize(&self, _: &Execution) -> Recognition { - match self { - MockTool::Recognize => Recognition::Success(any_compiler_call()), - MockTool::RecognizeIgnored => Recognition::Ignored, - MockTool::RecognizeFailed => Recognition::Error(String::from("problem")), - MockTool::NotRecognize => Recognition::Unknown, - } - } - } - - fn any_execution() -> Execution { - Execution { - executable: PathBuf::new(), - arguments: vec![], - working_dir: PathBuf::new(), - environment: HashMap::new(), - } - } - - fn any_compiler_call() -> CompilerCall { - CompilerCall { - compiler: PathBuf::new(), - working_dir: PathBuf::new(), - passes: vec![], - } - } -} diff --git a/rust/bear/src/semantic/interpreters/gcc.rs b/rust/bear/src/semantic/interpreters/gcc.rs deleted file mode 100644 index 9f97a093..00000000 --- a/rust/bear/src/semantic/interpreters/gcc.rs +++ /dev/null @@ -1,209 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -use nom::branch::alt; -use nom::multi::many1; -use nom::sequence::preceded; - -use super::super::{CompilerCall, Execution, Interpreter, Recognition}; -use internal::Argument; - -pub(super) struct Gcc {} - -impl Gcc { - pub(super) fn new() -> Self { - Gcc {} - } -} - -impl Interpreter for Gcc { - fn recognize(&self, execution: &Execution) -> Recognition { - let mut parser = preceded( - internal::compiler, - many1(alt((internal::flag, internal::source))), - ); - - match parser(execution.arguments.as_slice()) { - Ok(result) => { - // TODO: append flags from environment - let flags = result.1; - let passes = Argument::passes(flags.as_slice()); - - Recognition::Success(CompilerCall { - compiler: execution.executable.clone(), - working_dir: execution.working_dir.clone(), - passes, - }) - } - Err(error) => { - log::debug!("Gcc failed to parse it: {error}."); - Recognition::Unknown - } - } - } -} - -mod internal { - use nom::{error::ErrorKind, IResult}; - use regex::Regex; - use std::path::PathBuf; - - use super::super::super::CompilerPass; - use super::super::matchers::source::looks_like_a_source_file; - - #[derive(Debug, PartialEq)] - enum Language { - C, - Cpp, - ObjectiveC, - ObjectiveCpp, - Ada, - Fortran, - Go, - D, - Assembler, - Other, - } - - #[derive(Debug, PartialEq)] - enum Pass { - Preprocessor, - Compiler, - Linker, - } - - #[derive(Debug, PartialEq)] - enum Meaning { - Compiler, - ControlKindOfOutput { stop_before: Option }, - ControlLanguage(Language), - ControlPass(Pass), - Diagnostic, - Debug, - Optimize, - Instrumentation, - DirectorySearch(Option), - Developer, - Input(Pass), - Output, - } - - /// Compiler flags are varies the number of arguments, but means one thing. - pub(super) struct Argument<'a> { - arguments: &'a [String], - meaning: Meaning, - } - - impl Argument<'_> { - pub(super) fn passes(flags: &[Argument]) -> Vec { - let mut pass: Pass = Pass::Linker; - let mut inputs: Vec = vec![]; - let mut output: Option = None; - let mut args: Vec = vec![]; - - for flag in flags { - match flag.meaning { - Meaning::ControlKindOfOutput { - stop_before: Some(Pass::Compiler), - } => { - pass = Pass::Preprocessor; - args.extend(flag.arguments.iter().map(String::to_owned)); - } - Meaning::ControlKindOfOutput { - stop_before: Some(Pass::Linker), - } => { - pass = Pass::Compiler; - args.extend(flag.arguments.iter().map(String::to_owned)); - } - Meaning::ControlKindOfOutput { .. } - | Meaning::ControlLanguage(_) - | Meaning::ControlPass(Pass::Preprocessor) - | Meaning::ControlPass(Pass::Compiler) - | Meaning::Diagnostic - | Meaning::Debug - | Meaning::Optimize - | Meaning::Instrumentation - | Meaning::DirectorySearch(None) => { - args.extend(flag.arguments.iter().map(String::to_owned)); - } - Meaning::Input(_) => { - assert_eq!(flag.arguments.len(), 1); - inputs.push(flag.arguments[0].clone()) - } - Meaning::Output => { - assert_eq!(flag.arguments.len(), 1); - output = Some(flag.arguments[0].clone()) - } - _ => {} - } - } - - match pass { - Pass::Preprocessor if inputs.is_empty() => { - vec![] - } - Pass::Preprocessor => { - vec![CompilerPass::Preprocess] - } - Pass::Compiler | Pass::Linker => inputs - .into_iter() - .map(|source| CompilerPass::Compile { - source: PathBuf::from(source), - output: output.as_ref().map(PathBuf::from), - flags: args.clone(), - }) - .collect(), - } - } - } - - pub(super) fn compiler(i: &[String]) -> IResult<&[String], Argument> { - let candidate = &i[0]; - if COMPILER_REGEX.is_match(candidate) { - const MEANING: Meaning = Meaning::Compiler; - Ok(( - &i[1..], - Argument { - arguments: &i[..0], - meaning: MEANING, - }, - )) - } else { - // Declare it as a non-recoverable error, so argument processing will stop after this. - Err(nom::Err::Failure(nom::error::Error::new(i, ErrorKind::Tag))) - } - } - - pub(super) fn source(i: &[String]) -> IResult<&[String], Argument> { - let candidate = &i[0]; - if looks_like_a_source_file(candidate.as_str()) { - const MEANING: Meaning = Meaning::Input(Pass::Preprocessor); - Ok(( - &i[1..], - Argument { - arguments: &i[..0], - meaning: MEANING, - }, - )) - } else { - Err(nom::Err::Error(nom::error::Error::new(i, ErrorKind::Tag))) - } - } - - pub(super) fn flag(_i: &[String]) -> IResult<&[String], Argument> { - todo!() - } - - static COMPILER_REGEX: std::sync::LazyLock = std::sync::LazyLock::new(|| { - // - cc - // - c++ - // - cxx - // - CC - // - mcc, gcc, m++, g++, gfortran, fortran - // - with prefixes like: arm-none-eabi- - // - with postfixes like: -7.0 or 6.4.0 - Regex::new( - r"(^(cc|c\+\+|cxx|CC|(([^-]*-)*([mg](cc|\+\+)|[g]?fortran)(-?\d+(\.\d+){0,2})?))$)", - ) - .unwrap() - }); -} diff --git a/rust/bear/src/semantic/interpreters/generic.rs b/rust/bear/src/semantic/interpreters/generic.rs deleted file mode 100644 index eec7a8c6..00000000 --- a/rust/bear/src/semantic/interpreters/generic.rs +++ /dev/null @@ -1,130 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -use std::collections::HashSet; -use std::path::PathBuf; -use std::vec; - -use super::super::{CompilerCall, CompilerPass, Execution, Interpreter, Recognition}; -use super::matchers::source::looks_like_a_source_file; - -/// A tool to recognize a compiler by executable name. -pub(super) struct Generic { - executables: HashSet, -} - -impl Generic { - pub(super) fn from(compilers: &[PathBuf]) -> Self { - let executables = compilers.iter().cloned().collect(); - Self { executables } - } -} - -impl Interpreter for Generic { - /// This tool is a naive implementation only considering: - /// - the executable name, - /// - one of the arguments is a source file, - /// - the rest of the arguments are flags. - fn recognize(&self, x: &Execution) -> Recognition { - if self.executables.contains(&x.executable) { - let mut flags = vec![]; - let mut sources = vec![]; - - // find sources and filter out requested flags. - for argument in x.arguments.iter().skip(1) { - if looks_like_a_source_file(argument.as_str()) { - sources.push(PathBuf::from(argument)); - } else { - flags.push(argument.clone()); - } - } - - if sources.is_empty() { - Recognition::Error(String::from("source file is not found")) - } else { - Recognition::Success(CompilerCall { - compiler: x.executable.clone(), - working_dir: x.working_dir.clone(), - passes: sources - .iter() - .map(|source| CompilerPass::Compile { - source: source.clone(), - output: None, - flags: flags.clone(), - }) - .collect(), - }) - } - } else { - Recognition::Unknown - } - } -} - -#[cfg(test)] -mod test { - use std::collections::HashMap; - - use crate::{vec_of_pathbuf, vec_of_strings}; - - use super::*; - - #[test] - fn test_matching() { - let input = Execution { - executable: PathBuf::from("/usr/bin/something"), - arguments: vec_of_strings![ - "something", - "-Dthis=that", - "-I.", - "source.c", - "-o", - "source.c.o" - ], - working_dir: PathBuf::from("/home/user"), - environment: HashMap::new(), - }; - - let expected = CompilerCall { - compiler: PathBuf::from("/usr/bin/something"), - working_dir: PathBuf::from("/home/user"), - passes: vec![CompilerPass::Compile { - flags: vec_of_strings!["-Dthis=that", "-I.", "-o", "source.c.o"], - source: PathBuf::from("source.c"), - output: None, - }], - }; - - assert_eq!(Recognition::Success(expected), SUT.recognize(&input)); - } - - #[test] - fn test_matching_without_sources() { - let input = Execution { - executable: PathBuf::from("/usr/bin/something"), - arguments: vec_of_strings!["something", "--help"], - working_dir: PathBuf::from("/home/user"), - environment: HashMap::new(), - }; - - assert_eq!( - Recognition::Error(String::from("source file is not found")), - SUT.recognize(&input) - ); - } - - #[test] - fn test_not_matching() { - let input = Execution { - executable: PathBuf::from("/usr/bin/cc"), - arguments: vec_of_strings!["cc", "-Dthis=that", "-I.", "source.c", "-o", "source.c.o"], - working_dir: PathBuf::from("/home/user"), - environment: HashMap::new(), - }; - - assert_eq!(Recognition::Unknown, SUT.recognize(&input)); - } - - static SUT: std::sync::LazyLock = std::sync::LazyLock::new(|| Generic { - executables: vec_of_pathbuf!["/usr/bin/something"].into_iter().collect(), - }); -} diff --git a/rust/bear/src/semantic/interpreters/ignore.rs b/rust/bear/src/semantic/interpreters/ignore.rs deleted file mode 100644 index 5ac7a947..00000000 --- a/rust/bear/src/semantic/interpreters/ignore.rs +++ /dev/null @@ -1,185 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -use std::collections::HashSet; -use std::path::PathBuf; - -use super::super::{CompilerCall, Execution, Interpreter, Recognition}; - -/// A tool to ignore a command execution by executable name. -pub(super) struct IgnoreByPath { - executables: HashSet, -} - -impl IgnoreByPath { - pub(super) fn new() -> Self { - let executables = COREUTILS_FILES.iter().map(PathBuf::from).collect(); - Self { executables } - } - - pub(super) fn from(compilers: &[PathBuf]) -> Self { - let executables = compilers.iter().cloned().collect(); - Self { executables } - } -} - -impl Default for IgnoreByPath { - fn default() -> Self { - Self::new() - } -} - -/// A tool to ignore a command execution by arguments. -impl Interpreter for IgnoreByPath { - fn recognize(&self, execution: &Execution) -> Recognition { - if self.executables.contains(&execution.executable) { - Recognition::Ignored - } else { - Recognition::Unknown - } - } -} - -static COREUTILS_FILES: [&str; 106] = [ - "/usr/bin/[", - "/usr/bin/arch", - "/usr/bin/b2sum", - "/usr/bin/base32", - "/usr/bin/base64", - "/usr/bin/basename", - "/usr/bin/basenc", - "/usr/bin/cat", - "/usr/bin/chcon", - "/usr/bin/chgrp", - "/usr/bin/chmod", - "/usr/bin/chown", - "/usr/bin/cksum", - "/usr/bin/comm", - "/usr/bin/cp", - "/usr/bin/csplit", - "/usr/bin/cut", - "/usr/bin/date", - "/usr/bin/dd", - "/usr/bin/df", - "/usr/bin/dir", - "/usr/bin/dircolors", - "/usr/bin/dirname", - "/usr/bin/du", - "/usr/bin/echo", - "/usr/bin/env", - "/usr/bin/expand", - "/usr/bin/expr", - "/usr/bin/factor", - "/usr/bin/false", - "/usr/bin/fmt", - "/usr/bin/fold", - "/usr/bin/groups", - "/usr/bin/head", - "/usr/bin/hostid", - "/usr/bin/id", - "/usr/bin/install", - "/usr/bin/join", - "/usr/bin/link", - "/usr/bin/ln", - "/usr/bin/logname", - "/usr/bin/ls", - "/usr/bin/md5sum", - "/usr/bin/mkdir", - "/usr/bin/mkfifo", - "/usr/bin/mknod", - "/usr/bin/mktemp", - "/usr/bin/mv", - "/usr/bin/nice", - "/usr/bin/nl", - "/usr/bin/nohup", - "/usr/bin/nproc", - "/usr/bin/numfmt", - "/usr/bin/od", - "/usr/bin/paste", - "/usr/bin/pathchk", - "/usr/bin/pinky", - "/usr/bin/pr", - "/usr/bin/printenv", - "/usr/bin/printf", - "/usr/bin/ptx", - "/usr/bin/pwd", - "/usr/bin/readlink", - "/usr/bin/realpath", - "/usr/bin/rm", - "/usr/bin/rmdir", - "/usr/bin/runcon", - "/usr/bin/seq", - "/usr/bin/sha1sum", - "/usr/bin/sha224sum", - "/usr/bin/sha256sum", - "/usr/bin/sha384sum", - "/usr/bin/sha512sum", - "/usr/bin/shred", - "/usr/bin/shuf", - "/usr/bin/sleep", - "/usr/bin/sort", - "/usr/bin/split", - "/usr/bin/stat", - "/usr/bin/stdbuf", - "/usr/bin/stty", - "/usr/bin/sum", - "/usr/bin/sync", - "/usr/bin/tac", - "/usr/bin/tail", - "/usr/bin/tee", - "/usr/bin/test", - "/usr/bin/timeout", - "/usr/bin/touch", - "/usr/bin/tr", - "/usr/bin/true", - "/usr/bin/truncate", - "/usr/bin/tsort", - "/usr/bin/tty", - "/usr/bin/uname", - "/usr/bin/unexpand", - "/usr/bin/uniq", - "/usr/bin/unlink", - "/usr/bin/users", - "/usr/bin/vdir", - "/usr/bin/wc", - "/usr/bin/who", - "/usr/bin/whoami", - "/usr/bin/yes", - "/usr/bin/make", - "/usr/bin/gmake", -]; - -#[cfg(test)] -mod test { - use std::collections::HashMap; - use std::path::PathBuf; - - use crate::vec_of_strings; - - use super::*; - - #[test] - fn test_executions_are_ignored_by_executable_name() { - let input = Execution { - executable: PathBuf::from("/usr/bin/ls"), - arguments: vec_of_strings!["ls", "/home/user/build"], - working_dir: PathBuf::from("/home/user"), - environment: HashMap::new(), - }; - let sut = IgnoreByPath::new(); - - assert_eq!(Recognition::Ignored, sut.recognize(&input)) - } - - #[test] - fn test_not_known_executables_are_not_recognized() { - let input = Execution { - executable: PathBuf::from("/usr/bin/bear"), - arguments: vec_of_strings!["bear", "--", "make"], - working_dir: PathBuf::from("/home/user"), - environment: HashMap::new(), - }; - let sut = IgnoreByPath::new(); - - assert_eq!(Recognition::Unknown, sut.recognize(&input)) - } -} diff --git a/rust/bear/src/semantic/interpreters/matchers/mod.rs b/rust/bear/src/semantic/interpreters/matchers/mod.rs deleted file mode 100644 index 36303a47..00000000 --- a/rust/bear/src/semantic/interpreters/matchers/mod.rs +++ /dev/null @@ -1,3 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -pub(super) mod source; diff --git a/rust/bear/src/semantic/interpreters/matchers/source.rs b/rust/bear/src/semantic/interpreters/matchers/source.rs deleted file mode 100644 index bde1cb36..00000000 --- a/rust/bear/src/semantic/interpreters/matchers/source.rs +++ /dev/null @@ -1,82 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -use std::collections::HashSet; - -#[cfg(target_family = "unix")] -pub fn looks_like_a_source_file(argument: &str) -> bool { - // not a command line flag - if argument.starts_with('-') { - return false; - } - if let Some((_, extension)) = argument.rsplit_once('.') { - return EXTENSIONS.contains(extension); - } - false -} - -#[cfg(target_family = "windows")] -pub fn looks_like_a_source_file(argument: &str) -> bool { - // not a command line flag - if argument.starts_with('/') { - return false; - } - if let Some((_, extension)) = argument.rsplit_once('.') { - return EXTENSIONS.contains(extension); - } - false -} - -#[rustfmt::skip] -static EXTENSIONS: std::sync::LazyLock> = std::sync::LazyLock::new(|| { - HashSet::from([ - // header files - "h", "hh", "H", "hp", "hxx", "hpp", "HPP", "h++", "tcc", - // C - "c", "C", - // C++ - "cc", "CC", "c++", "C++", "cxx", "cpp", "cp", - // CUDA - "cu", - // ObjectiveC - "m", "mi", "mm", "M", "mii", - // Preprocessed - "i", "ii", - // Assembly - "s", "S", "sx", "asm", - // Fortran - "f", "for", "ftn", - "F", "FOR", "fpp", "FPP", "FTN", - "f90", "f95", "f03", "f08", - "F90", "F95", "F03", "F08", - // go - "go", - // brig - "brig", - // D - "d", "di", "dd", - // Ada - "ads", "abd", - ]) -}); - -#[cfg(test)] -mod test { - use super::*; - - #[test] - fn test_filenames() { - assert!(looks_like_a_source_file("source.c")); - assert!(looks_like_a_source_file("source.cpp")); - assert!(looks_like_a_source_file("source.cxx")); - assert!(looks_like_a_source_file("source.cc")); - - assert!(looks_like_a_source_file("source.h")); - assert!(looks_like_a_source_file("source.hpp")); - - assert!(!looks_like_a_source_file("gcc")); - assert!(!looks_like_a_source_file("clang")); - assert!(!looks_like_a_source_file("-o")); - assert!(!looks_like_a_source_file("-Wall")); - assert!(!looks_like_a_source_file("/o")); - } -} diff --git a/rust/bear/src/semantic/interpreters/mod.rs b/rust/bear/src/semantic/interpreters/mod.rs deleted file mode 100644 index 434e7100..00000000 --- a/rust/bear/src/semantic/interpreters/mod.rs +++ /dev/null @@ -1,136 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -use super::interpreters::combinators::Any; -use super::interpreters::generic::Generic; -use super::interpreters::ignore::IgnoreByPath; -use super::Interpreter; -use crate::config; -use std::path::PathBuf; - -mod combinators; -mod gcc; -mod generic; -mod ignore; -mod matchers; - -/// Creates an interpreter to recognize the compiler calls. -/// -/// Using the configuration we can define which compilers to include and exclude. -/// Also read the environment variables to detect the compiler to include (and -/// make sure those are not excluded either). -// TODO: Use the CC or CXX environment variables to detect the compiler to include. -// Use the CC or CXX environment variables and make sure those are not excluded. -// Make sure the environment variables are passed to the method. -// TODO: Take environment variables as input. -pub fn create_interpreter<'a>(config: &config::Main) -> impl Interpreter + 'a { - let compilers_to_include = match &config.intercept { - config::Intercept::Wrapper { executables, .. } => executables.clone(), - _ => vec![], - }; - let compilers_to_exclude = match &config.output { - config::Output::Clang { compilers, .. } => compilers - .iter() - .filter(|compiler| compiler.ignore == config::IgnoreOrConsider::Always) - .map(|compiler| compiler.path.clone()) - .collect(), - _ => vec![], - }; - - let mut interpreters: Vec> = vec![ - // ignore executables which are not compilers, - Box::new(IgnoreByPath::default()), - // recognize default compiler - Box::new(Generic::from(&[PathBuf::from("/usr/bin/cc")])), - ]; - - if !compilers_to_include.is_empty() { - let tool = Generic::from(&compilers_to_include); - interpreters.push(Box::new(tool)); - } - - if !compilers_to_exclude.is_empty() { - let tool = IgnoreByPath::from(&compilers_to_exclude); - interpreters.insert(0, Box::new(tool)); - } - - Any::new(interpreters) -} - -#[cfg(test)] -mod test { - use std::collections::HashMap; - use std::path::PathBuf; - - use super::super::{CompilerCall, Execution, Recognition}; - use super::*; - use crate::config; - use crate::config::{DuplicateFilter, Format, SourceFilter}; - use crate::{vec_of_pathbuf, vec_of_strings}; - - fn any_execution() -> Execution { - Execution { - executable: PathBuf::from("/usr/bin/cc"), - arguments: vec_of_strings!["cc", "-c", "-Wall", "main.c"], - environment: HashMap::new(), - working_dir: PathBuf::from("/home/user"), - } - } - - #[test] - fn test_create_interpreter_with_default_config() { - let config = config::Main::default(); - - let interpreter = create_interpreter(&config); - let input = any_execution(); - - match interpreter.recognize(&input) { - Recognition::Success(CompilerCall { .. }) => assert!(true), - _ => assert!(false), - } - } - - #[test] - fn test_create_interpreter_with_compilers_to_include() { - let config = config::Main { - intercept: config::Intercept::Wrapper { - executables: vec_of_pathbuf!["/usr/bin/cc"], - path: PathBuf::from("/usr/libexec/bear"), - directory: PathBuf::from("/tmp"), - }, - ..Default::default() - }; - - let interpreter = create_interpreter(&config); - let input = any_execution(); - - match interpreter.recognize(&input) { - Recognition::Success(CompilerCall { .. }) => assert!(true), - _ => assert!(false), - } - } - - #[test] - fn test_create_interpreter_with_compilers_to_exclude() { - let config = config::Main { - output: config::Output::Clang { - compilers: vec![config::Compiler { - path: PathBuf::from("/usr/bin/cc"), - ignore: config::IgnoreOrConsider::Always, - arguments: config::Arguments::default(), - }], - sources: SourceFilter::default(), - duplicates: DuplicateFilter::default(), - format: Format::default(), - }, - ..Default::default() - }; - - let interpreter = create_interpreter(&config); - let input = any_execution(); - - match interpreter.recognize(&input) { - Recognition::Ignored => assert!(true), - _ => assert!(false), - } - } -} diff --git a/rust/bear/src/semantic/mod.rs b/rust/bear/src/semantic/mod.rs deleted file mode 100644 index 685b9c50..00000000 --- a/rust/bear/src/semantic/mod.rs +++ /dev/null @@ -1,103 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -//! This module is defining the semantic of executed commands. -//! -//! The semantic identifies the intent of the execution. It not only -//! recognizes the compiler calls, but also identifies the compiler -//! passes that are executed. -//! -//! A compilation of a source file can be divided into multiple passes. -//! We are interested in the compiler passes, because those are the -//! ones that are relevant to build a JSON compilation database. - -pub mod interpreters; -pub mod transformation; - -use super::intercept::Execution; -use serde::ser::SerializeSeq; -use serde::{Serialize, Serializer}; -use std::path::PathBuf; - -/// Represents an executed command semantic. -#[derive(Debug, PartialEq, Serialize)] -pub struct CompilerCall { - pub compiler: PathBuf, - pub working_dir: PathBuf, - pub passes: Vec, -} - -/// Represents a compiler call pass. -#[derive(Debug, PartialEq, Serialize)] -pub enum CompilerPass { - Preprocess, - Compile { - source: PathBuf, - output: Option, - flags: Vec, - }, -} - -/// Responsible to recognize the semantic of an executed command. -/// -/// The implementation can be responsible for a single compiler, -/// a set of compilers, or a set of commands that are not compilers. -/// -/// The benefit to recognize a non-compiler command, is to not -/// spend more time to try to recognize with other interpreters. -/// Or classify the recognition as ignored to not be further processed -/// later on. -pub trait Interpreter: Send { - fn recognize(&self, _: &Execution) -> Recognition; -} - -/// Represents a semantic recognition result. -/// -/// The unknown recognition is used when the interpreter is not -/// able to recognize the command. This can signal the search process -/// to continue with the next interpreter. -#[derive(Debug, PartialEq)] -pub enum Recognition { - /// The command was recognized and the semantic was identified. - Success(T), - /// The command was recognized, but the semantic was ignored. - Ignored, - /// The command was recognized, but the semantic was broken. - Error(String), - /// The command was not recognized. - Unknown, -} - -impl IntoIterator for Recognition { - type Item = T; - type IntoIter = std::option::IntoIter; - - fn into_iter(self) -> Self::IntoIter { - match self { - Recognition::Success(value) => Some(value).into_iter(), - _ => None.into_iter(), - } - } -} - -/// Responsible to transform the semantic of an executed command. -/// -/// It conditionally removes compiler calls based on compiler names or flags. -/// It can also alter the compiler flags of the compiler calls. The actions -/// are defined in the configuration this module is given. -pub trait Transform: Send { - fn apply(&self, _: CompilerCall) -> Option; -} - -/// Serialize compiler calls into a JSON array. -pub fn serialize( - writer: impl std::io::Write, - entries: impl Iterator + Sized, -) -> anyhow::Result<()> { - let mut ser = serde_json::Serializer::pretty(writer); - let mut seq = ser.serialize_seq(None)?; - for entry in entries { - seq.serialize_element(&entry)?; - } - seq.end()?; - Ok(()) -} diff --git a/rust/bear/src/semantic/transformation.rs b/rust/bear/src/semantic/transformation.rs deleted file mode 100644 index da846ebe..00000000 --- a/rust/bear/src/semantic/transformation.rs +++ /dev/null @@ -1,277 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -//! Responsible for transforming the compiler calls. -//! -//! It conditionally removes compiler calls based on compiler names or flags. -//! It can also alter the compiler flags of the compiler calls. The actions -//! are defined in the configuration this module is given. - -use crate::{config, semantic}; - -use std::collections::HashMap; -use std::path::PathBuf; - -/// Transformation contains rearranged information from the configuration. -/// -/// The configuration is a list of instruction on how to transform the compiler call. -/// The transformation group the instructions by the compiler path, so it can be -/// applied to the compiler call when it matches the path. -#[derive(Debug, PartialEq)] -pub struct Transformation { - compilers: HashMap>, -} - -impl From<&config::Output> for Transformation { - fn from(config: &config::Output) -> Self { - match config { - config::Output::Clang { compilers, .. } => compilers.as_slice().into(), - config::Output::Semantic { .. } => Transformation::new(), - } - } -} - -impl From<&[config::Compiler]> for Transformation { - fn from(config: &[config::Compiler]) -> Self { - let mut compilers = HashMap::new(); - for compiler in config { - compilers - .entry(compiler.path.clone()) - .or_insert_with(Vec::new) - .push(compiler.clone()); - } - Transformation { compilers } - } -} - -impl semantic::Transform for Transformation { - fn apply(&self, input: semantic::CompilerCall) -> Option { - if let Some(configs) = self.compilers.get(&input.compiler) { - Self::apply_when_not_empty(configs.as_slice(), input) - } else { - Some(input) - } - } -} - -impl Transformation { - fn new() -> Self { - Transformation { - compilers: HashMap::new(), - } - } - - /// Apply the transformation to the compiler call. - /// - /// Multiple configurations can be applied to the same compiler call. - /// And depending on the instruction from the configuration, the compiler call - /// can be ignored, modified, or left unchanged. The conditional ignore will - /// check if the compiler call matches the flags defined in the configuration. - fn apply_when_not_empty( - configs: &[config::Compiler], - input: semantic::CompilerCall, - ) -> Option { - let mut current_input = Some(input); - - for config in configs { - current_input = match config { - config::Compiler { - ignore: config::IgnoreOrConsider::Always, - .. - } => None, - config::Compiler { - ignore: config::IgnoreOrConsider::Conditional, - arguments, - .. - } => current_input.filter(|input| !Self::match_condition(arguments, &input.passes)), - config::Compiler { - ignore: config::IgnoreOrConsider::Never, - arguments, - .. - } => current_input.map(|input| semantic::CompilerCall { - compiler: input.compiler.clone(), - working_dir: input.working_dir.clone(), - passes: Transformation::apply_argument_changes( - arguments, - input.passes.as_slice(), - ), - }), - }; - - if current_input.is_none() { - break; - } - } - current_input - } - - /// Check if the compiler call matches the condition defined in the configuration. - /// - /// Any compiler pass that matches the flags defined in the configuration will cause - /// the whole compiler call to be ignored. - fn match_condition(arguments: &config::Arguments, passes: &[semantic::CompilerPass]) -> bool { - let match_flags = arguments.match_.as_slice(); - passes.iter().any(|pass| match pass { - semantic::CompilerPass::Compile { flags, .. } => { - flags.iter().any(|flag| match_flags.contains(flag)) - } - _ => false, - }) - } - - /// Apply the changes defined in the configuration to the compiler call. - /// - /// The changes can be to remove or add flags to the compiler call. - /// Only the flags will be changed, but applies to all compiler passes. - fn apply_argument_changes( - arguments: &config::Arguments, - passes: &[semantic::CompilerPass], - ) -> Vec { - let arguments_to_remove = arguments.remove.as_slice(); - let arguments_to_add = arguments.add.as_slice(); - - let mut new_passes = Vec::with_capacity(passes.len()); - for pass in passes { - match pass { - semantic::CompilerPass::Compile { - source, - output, - flags, - } => { - let mut new_flags = flags.clone(); - new_flags.retain(|flag| !arguments_to_remove.contains(flag)); - new_flags.extend(arguments_to_add.iter().cloned()); - new_passes.push(semantic::CompilerPass::Compile { - source: source.clone(), - output: output.clone(), - flags: new_flags, - }); - } - semantic::CompilerPass::Preprocess => { - new_passes.push(semantic::CompilerPass::Preprocess) - } - } - } - new_passes - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::config::{Arguments, Compiler, IgnoreOrConsider}; - use crate::semantic::{CompilerCall, CompilerPass, Transform}; - use std::path::PathBuf; - - #[test] - fn test_apply_no_filter() { - let input = CompilerCall { - compiler: std::path::PathBuf::from("gcc"), - passes: vec![CompilerPass::Compile { - source: PathBuf::from("main.c"), - output: PathBuf::from("main.o").into(), - flags: vec!["-O2".into()], - }], - working_dir: std::path::PathBuf::from("/project"), - }; - - let sut = Transformation::from(&config::Output::Semantic {}); - let result = sut.apply(input); - - let expected = CompilerCall { - compiler: std::path::PathBuf::from("gcc"), - passes: vec![CompilerPass::Compile { - source: PathBuf::from("main.c"), - output: PathBuf::from("main.o").into(), - flags: vec!["-O2".into()], - }], - working_dir: std::path::PathBuf::from("/project"), - }; - assert_eq!(result, Some(expected)); - } - - #[test] - fn test_apply_filter_match() { - let input = CompilerCall { - compiler: std::path::PathBuf::from("cc"), - passes: vec![CompilerPass::Compile { - source: PathBuf::from("main.c"), - output: PathBuf::from("main.o").into(), - flags: vec!["-O2".into()], - }], - working_dir: std::path::PathBuf::from("/project"), - }; - - let sut: Transformation = vec![Compiler { - path: std::path::PathBuf::from("cc"), - ignore: IgnoreOrConsider::Always, - arguments: Arguments::default(), - }] - .as_slice() - .into(); - let result = sut.apply(input); - assert!(result.is_none()); - } - - #[test] - fn test_apply_conditional_match() { - let input = CompilerCall { - compiler: std::path::PathBuf::from("gcc"), - passes: vec![CompilerPass::Compile { - source: PathBuf::from("main.c"), - output: PathBuf::from("main.o").into(), - flags: vec!["-O2".into(), "-Wall".into()], - }], - working_dir: std::path::PathBuf::from("/project"), - }; - - let sut: Transformation = vec![Compiler { - path: std::path::PathBuf::from("gcc"), - ignore: IgnoreOrConsider::Conditional, - arguments: Arguments { - match_: vec!["-O2".into()], - ..Arguments::default() - }, - }] - .as_slice() - .into(); - let result = sut.apply(input); - assert!(result.is_none()); - } - - #[test] - fn test_apply_ignore_never_modify_arguments() { - let input = CompilerCall { - compiler: std::path::PathBuf::from("gcc"), - passes: vec![CompilerPass::Compile { - source: PathBuf::from("main.c"), - output: PathBuf::from("main.o").into(), - flags: vec!["-O2".into()], - }], - working_dir: std::path::PathBuf::from("/project"), - }; - - let sut: Transformation = vec![Compiler { - path: std::path::PathBuf::from("gcc"), - ignore: IgnoreOrConsider::Never, - arguments: Arguments { - add: vec!["-Wall".into()], - remove: vec!["-O2".into()], - ..Arguments::default() - }, - }] - .as_slice() - .into(); - let result = sut.apply(input); - - let expected = CompilerCall { - compiler: std::path::PathBuf::from("gcc"), - passes: vec![CompilerPass::Compile { - source: PathBuf::from("main.c"), - output: PathBuf::from("main.o").into(), - flags: vec!["-Wall".into()], - }], - working_dir: std::path::PathBuf::from("/project"), - }; - assert_eq!(result, Some(expected)); - } -}