remove rust implementation from the main branch

This commit is contained in:
Laszlo Nagy
2025-05-10 12:12:06 +10:00
parent 0a49313ac9
commit d75455d6ce
35 changed files with 0 additions and 6150 deletions

View File

@@ -1,44 +0,0 @@
name: rust CI
on:
push:
pull_request:
env:
CARGO_TERM_COLOR: always
jobs:
lint:
name: Lint
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- run: rustup component add clippy && rustup update stable && rustup default stable
- run: cd rust && cargo clippy
compile:
name: Compile
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- run: rustup update stable && rustup default stable
- run: cd rust && cargo check --verbose
test:
name: Test
strategy:
matrix:
os:
- ubuntu-latest
- windows-latest
- macOS-latest
toolchain:
- stable
- beta
- nightly
runs-on: ${{ matrix.os }}
needs: [compile]
steps:
- uses: actions/checkout@v3
- run: rustup update ${{ matrix.toolchain }} && rustup default ${{ matrix.toolchain }}
- run: cd rust && cargo build --verbose
- run: cd rust && cargo test --verbose

View File

@@ -1,34 +0,0 @@
# SPDX-License-Identifier: GPL-3.0-or-later
[workspace]
members = [
"bear"
]
resolver = "2"
[workspace.dependencies]
thiserror = "2.0"
anyhow = "1.0"
serde = { version = "1.0", default-features = false, features = ["derive"] }
serde_json = { version = "1.0", default-features = false, features = ["std"] }
serde_yml = "0.0"
clap = { version = "4.5", default-features = false, features = ["std", "cargo", "help", "usage", "suggestions"] }
chrono = { version = "0.4", default-features = false, features = ["std", "clock"] }
log = { version = "0.4", default-features = false, features = ["std"] }
env_logger = { version = "0.11", default-features = false, features = ["humantime"]}
rand = { version = "0.9", default-features = false, features = ["std", "thread_rng"] }
path-absolutize = "3.1"
directories = "6.0"
nom = { version = "7.1", default-features = false, features = ["std"] }
regex = "1.9"
shell-words = "1.1"
tempfile = "3.13"
signal-hook = { version = "0.3", default-features = false }
[workspace.package]
version = "4.0.0"
authors = ["László Nagy <rizsotto at gmail dot com>"]
repository = "https://github.com/rizsotto/Bear"
homepage = "https://github.com/rizsotto/Bear"
license = "GPL-3"
edition = "2021"

View File

@@ -1,37 +0,0 @@
# What's this?
This is a rust rewrite of the current master branch of this project.
# Why?
The current master branch is written in C++ and is not very well written.
I want to rewrite it in rust to make it more maintainable and easier to work with.
## What's wrong with the current codebase?
- The idea of disabling exception handling and using Rust-like result values is sound,
but the implementation could be improved.
- The use of CMake as a build tool has caused several issues,
including poor handling of third-party libraries and subprojects.
- Some dependencies are problematic:
- Not all of them are available on all platforms.
- Updating them can be challenging.
## What are the benefits of rewriting the project in Rust?
- Easy porting of the project to other platforms, including Windows
- Improved maintainability through the use of third-party libraries
and better development tooling
# How?
The `3.x` version will be the last version of the C++ codebase.
The `4.x` version will be the first version of the rust codebase.
The `master` branch will be kept as the main release branch.
And the rust codebase will be developed on the `master` branch,
but it will be kept in a separate directory.
# When?
I will work on this project in my free time (as before).

View File

@@ -1,49 +0,0 @@
# SPDX-License-Identifier: GPL-3.0-or-later
[package]
name = "bear"
description = "Bear is a tool that generates a compilation database for clang tooling."
keywords = ["clang", "clang-tooling", "compilation-database"]
version.workspace = true
authors.workspace = true
repository.workspace = true
homepage.workspace = true
license.workspace = true
edition.workspace = true
[lib]
name = "bear"
path = "src/lib.rs"
[[bin]]
name = "bear"
path = "src/bin/bear.rs"
[[bin]]
name = "wrapper"
path = "src/bin/wrapper.rs"
[dependencies]
thiserror.workspace = true
anyhow.workspace = true
serde.workspace = true
serde_json.workspace = true
serde_yml.workspace = true
clap.workspace = true
directories.workspace = true
chrono.workspace = true
log.workspace = true
env_logger.workspace = true
path-absolutize.workspace = true
shell-words.workspace = true
nom.workspace = true
regex.workspace = true
rand.workspace = true
tempfile.workspace = true
signal-hook.workspace = true
[profile.release]
strip = true
lto = true
opt-level = 3
codegen-units = 1

View File

@@ -1,6 +0,0 @@
// SPDX-License-Identifier: GPL-3.0-or-later
fn main() {
println!("cargo:rustc-env=WRAPPER_EXECUTABLE_PATH=/usr/libexec/bear/wrapper");
println!("cargo:rustc-env=PRELOAD_LIBRARY_PATH=/usr/libexec/bear/$LIB/libexec.so");
}

View File

@@ -1,373 +0,0 @@
// SPDX-License-Identifier: GPL-3.0-or-later
//! This module contains the command line interface of the application.
//!
//! The command line parsing is implemented using the `clap` library.
//! The module is defining types to represent a structured form of the
//! program invocation. The `Arguments` type is used to represent all
//! possible invocations of the program.
use anyhow::anyhow;
use clap::{arg, command, ArgAction, ArgMatches, Command};
/// Common constants used in the module.
const MODE_INTERCEPT_SUBCOMMAND: &str = "intercept";
const MODE_SEMANTIC_SUBCOMMAND: &str = "semantic";
const DEFAULT_OUTPUT_FILE: &str = "compile_commands.json";
const DEFAULT_EVENT_FILE: &str = "events.json";
/// Represents the command line arguments of the application.
#[derive(Debug, PartialEq)]
pub struct Arguments {
// The path of the configuration file.
pub config: Option<String>,
// The mode of the application.
pub mode: Mode,
}
/// Represents the mode of the application.
#[derive(Debug, PartialEq)]
pub enum Mode {
Intercept {
input: BuildCommand,
output: BuildEvents,
},
Semantic {
input: BuildEvents,
output: BuildSemantic,
},
Combined {
input: BuildCommand,
output: BuildSemantic,
},
}
/// Represents the execution of a command.
#[derive(Debug, PartialEq)]
pub struct BuildCommand {
pub arguments: Vec<String>,
}
#[derive(Debug, PartialEq)]
pub struct BuildSemantic {
pub file_name: String,
pub append: bool,
}
#[derive(Debug, PartialEq)]
pub struct BuildEvents {
pub file_name: String,
}
impl TryFrom<ArgMatches> for Arguments {
type Error = anyhow::Error;
fn try_from(matches: ArgMatches) -> Result<Self, Self::Error> {
let config = matches.get_one::<String>("config").map(String::to_string);
match matches.subcommand() {
Some((MODE_INTERCEPT_SUBCOMMAND, intercept_matches)) => {
let input = BuildCommand::try_from(intercept_matches)?;
let output = intercept_matches
.get_one::<String>("output")
.map(String::to_string)
.expect("output is defaulted");
// let output = BuildEvents::try_from(intercept_matches)?;
let mode = Mode::Intercept {
input,
output: BuildEvents { file_name: output },
};
let arguments = Arguments { config, mode };
Ok(arguments)
}
Some((MODE_SEMANTIC_SUBCOMMAND, semantic_matches)) => {
let input = semantic_matches
.get_one::<String>("input")
.map(String::to_string)
.expect("input is defaulted");
let output = BuildSemantic::try_from(semantic_matches)?;
let mode = Mode::Semantic {
input: BuildEvents { file_name: input },
output,
};
let arguments = Arguments { config, mode };
Ok(arguments)
}
None => {
let input = BuildCommand::try_from(&matches)?;
let output = BuildSemantic::try_from(&matches)?;
let mode = Mode::Combined { input, output };
let arguments = Arguments { config, mode };
Ok(arguments)
}
_ => Err(anyhow!("unrecognized subcommand")),
}
}
}
impl TryFrom<&ArgMatches> for BuildCommand {
type Error = anyhow::Error;
fn try_from(matches: &ArgMatches) -> Result<Self, Self::Error> {
let arguments = matches
.get_many("COMMAND")
.expect("missing build command")
.cloned()
.collect();
Ok(BuildCommand { arguments })
}
}
impl TryFrom<&ArgMatches> for BuildSemantic {
type Error = anyhow::Error;
fn try_from(matches: &ArgMatches) -> Result<Self, Self::Error> {
let file_name = matches
.get_one::<String>("output")
.map(String::to_string)
.expect("output is defaulted");
let append = *matches.get_one::<bool>("append").unwrap_or(&false);
Ok(BuildSemantic { file_name, append })
}
}
/// Represents the command line interface of the application.
///
/// This describes how the user can interact with the application.
/// The different modes of the application are represented as subcommands.
/// The application can be run in intercept mode, semantic mode, or the
/// default mode where both intercept and semantic are executed.
pub fn cli() -> Command {
command!()
.subcommand_required(false)
.subcommand_negates_reqs(true)
.subcommand_precedence_over_arg(true)
.arg_required_else_help(true)
.args(&[
arg!(-v --verbose ... "Sets the level of verbosity").action(ArgAction::Count),
arg!(-c --config <FILE> "Path of the config file"),
])
.subcommand(
Command::new(MODE_INTERCEPT_SUBCOMMAND)
.about("intercepts command execution")
.args(&[
arg!(<COMMAND> "Build command")
.action(ArgAction::Append)
.value_terminator("--")
.num_args(1..)
.last(true)
.required(true),
arg!(-o --output <FILE> "Path of the event file")
.default_value(DEFAULT_EVENT_FILE)
.hide_default_value(false),
])
.arg_required_else_help(true),
)
.subcommand(
Command::new(MODE_SEMANTIC_SUBCOMMAND)
.about("detect semantics of command executions")
.args(&[
arg!(-i --input <FILE> "Path of the event file")
.default_value(DEFAULT_EVENT_FILE)
.hide_default_value(false),
arg!(-o --output <FILE> "Path of the result file")
.default_value(DEFAULT_OUTPUT_FILE)
.hide_default_value(false),
arg!(-a --append "Append result to an existing output file")
.action(ArgAction::SetTrue),
])
.arg_required_else_help(false),
)
.args(&[
arg!(<COMMAND> "Build command")
.action(ArgAction::Append)
.value_terminator("--")
.num_args(1..)
.last(true)
.required(true),
arg!(-o --output <FILE> "Path of the result file")
.default_value(DEFAULT_OUTPUT_FILE)
.hide_default_value(false),
arg!(-a --append "Append result to an existing output file").action(ArgAction::SetTrue),
])
}
#[cfg(test)]
mod test {
use super::*;
use crate::vec_of_strings;
#[test]
fn test_intercept_call() {
let execution = vec![
"bear",
"-c",
"~/bear.yaml",
"intercept",
"-o",
"custom.json",
"--",
"make",
"all",
];
let matches = cli().get_matches_from(execution);
let arguments = Arguments::try_from(matches).unwrap();
assert_eq!(
arguments,
Arguments {
config: Some("~/bear.yaml".to_string()),
mode: Mode::Intercept {
input: BuildCommand {
arguments: vec_of_strings!["make", "all"]
},
output: BuildEvents {
file_name: "custom.json".to_string()
},
},
}
);
}
#[test]
fn test_intercept_defaults() {
let execution = vec!["bear", "intercept", "--", "make", "all"];
let matches = cli().get_matches_from(execution);
let arguments = Arguments::try_from(matches).unwrap();
assert_eq!(
arguments,
Arguments {
config: None,
mode: Mode::Intercept {
input: BuildCommand {
arguments: vec_of_strings!["make", "all"]
},
output: BuildEvents {
file_name: "events.json".to_string()
},
},
}
);
}
#[test]
fn test_semantic_call() {
let execution = vec![
"bear",
"-c",
"~/bear.yaml",
"semantic",
"-i",
"custom.json",
"-o",
"result.json",
"-a",
];
let matches = cli().get_matches_from(execution);
let arguments = Arguments::try_from(matches).unwrap();
assert_eq!(
arguments,
Arguments {
config: Some("~/bear.yaml".to_string()),
mode: Mode::Semantic {
input: BuildEvents {
file_name: "custom.json".to_string()
},
output: BuildSemantic {
file_name: "result.json".to_string(),
append: true
},
},
}
);
}
#[test]
fn test_semantic_defaults() {
let execution = vec!["bear", "semantic"];
let matches = cli().get_matches_from(execution);
let arguments = Arguments::try_from(matches).unwrap();
assert_eq!(
arguments,
Arguments {
config: None,
mode: Mode::Semantic {
input: BuildEvents {
file_name: "events.json".to_string()
},
output: BuildSemantic {
file_name: "compile_commands.json".to_string(),
append: false
},
},
}
);
}
#[test]
fn test_all_call() {
let execution = vec![
"bear",
"-c",
"~/bear.yaml",
"-o",
"result.json",
"-a",
"--",
"make",
"all",
];
let matches = cli().get_matches_from(execution);
let arguments = Arguments::try_from(matches).unwrap();
assert_eq!(
arguments,
Arguments {
config: Some("~/bear.yaml".to_string()),
mode: Mode::Combined {
input: BuildCommand {
arguments: vec_of_strings!["make", "all"]
},
output: BuildSemantic {
file_name: "result.json".to_string(),
append: true
},
},
}
);
}
#[test]
fn test_all_defaults() {
let execution = vec!["bear", "--", "make", "all"];
let matches = cli().get_matches_from(execution);
let arguments = Arguments::try_from(matches).unwrap();
assert_eq!(
arguments,
Arguments {
config: None,
mode: Mode::Combined {
input: BuildCommand {
arguments: vec_of_strings!["make", "all"]
},
output: BuildSemantic {
file_name: "compile_commands.json".to_string(),
append: false
},
},
}
);
}
}

View File

@@ -1,76 +0,0 @@
// SPDX-License-Identifier: GPL-3.0-or-later
use bear::modes::{Combined, Intercept, Mode, Semantic};
use bear::{args, config};
use std::env;
use std::process::ExitCode;
/// Driver function of the application.
fn main() -> anyhow::Result<ExitCode> {
// Initialize the logging system.
env_logger::init();
// Get the package name and version from Cargo
let pkg_name = env!("CARGO_PKG_NAME");
let pkg_version = env!("CARGO_PKG_VERSION");
log::debug!("{} v{}", pkg_name, pkg_version);
// Parse the command line arguments.
let matches = args::cli().get_matches();
let arguments = args::Arguments::try_from(matches)?;
// Print the arguments.
log::debug!("Arguments: {:?}", arguments);
// Load the configuration.
let configuration = config::Main::load(&arguments.config)?;
log::debug!("Configuration: {:?}", configuration);
// Run the application.
let application = Application::configure(arguments, configuration)?;
let result = application.run();
log::debug!("Exit code: {:?}", result);
Ok(result)
}
/// Represent the application state.
enum Application {
Intercept(Intercept),
Semantic(Semantic),
Combined(Combined),
}
impl Application {
/// Configure the application based on the command line arguments and the configuration.
///
/// Trying to validate the configuration and the arguments, while creating the application
/// state that will be used by the `run` method. Trying to catch problems early before
/// the actual execution of the application.
fn configure(args: args::Arguments, config: config::Main) -> anyhow::Result<Self> {
match args.mode {
args::Mode::Intercept { input, output } => {
log::debug!("Mode: intercept");
Intercept::from(input, output, config).map(Application::Intercept)
}
args::Mode::Semantic { input, output } => {
log::debug!("Mode: semantic analysis");
Semantic::from(input, output, config).map(Application::Semantic)
}
args::Mode::Combined { input, output } => {
log::debug!("Mode: intercept and semantic analysis");
Combined::from(input, output, config).map(Application::Combined)
}
}
}
fn run(self) -> ExitCode {
let status = match self {
Application::Intercept(intercept) => intercept.run(),
Application::Semantic(semantic) => semantic.run(),
Application::Combined(all) => all.run(),
};
status.unwrap_or_else(|error| {
log::error!("Bear: {}", error);
ExitCode::FAILURE
})
}
}

View File

@@ -1,124 +0,0 @@
// SPDX-License-Identifier: GPL-3.0-or-later
//! This module implements a wrapper around an arbitrary executable.
//!
//! The wrapper is used to intercept the execution of the executable and
//! report it to a remote server. The wrapper is named after the executable
//! via a soft link (or a hard copy on platforms where soft links are not
//! supported). The wrapper process is called instead of the original executable.
//! This is arranged by the process that supervise the build process.
//! The build supervisor creates a directory with soft links and place
//! that directory at the beginning of the PATH variable. Which guarantees
//! that the wrapper is called instead of the original executable.
//!
//! The wrapper reads the PATH variable and finds the next executable with
//! the same name as the wrapper. It reports the execution of the real
//! executable and then calls the real executable with the same arguments.
extern crate core;
use anyhow::{Context, Result};
use bear::intercept::supervise::supervise;
use bear::intercept::tcp::ReporterOnTcp;
use bear::intercept::Reporter;
use bear::intercept::KEY_DESTINATION;
use bear::intercept::{Event, Execution, ProcessId};
use std::path::{Path, PathBuf};
/// Implementation of the wrapper process.
///
/// The process exit code is the same as the executed process exit code.
/// Besides the functionality described in the module documentation, the
/// wrapper process logs the execution and the relevant steps leading to
/// the execution.
fn main() -> Result<()> {
env_logger::init();
// Find out what is the executable name the execution was started with
let executable = file_name_from_arguments()?;
log::info!("Executable as called: {:?}", executable);
// Read the PATH variable and find the next executable with the same name
let real_executable = next_in_path(&executable)?;
log::info!("Executable to call: {:?}", real_executable);
// Reporting failures shall not fail the execution.
match into_execution(&real_executable).and_then(report) {
Ok(_) => log::info!("Execution reported"),
Err(e) => log::error!("Execution reporting failed: {}", e),
}
// Execute the real executable with the same arguments
let mut command = std::process::Command::new(real_executable);
let exit_status = supervise(command.args(std::env::args().skip(1)))?;
log::info!("Execution finished with status: {:?}", exit_status);
// Return the child process status code
std::process::exit(exit_status.code().unwrap_or(1));
}
/// Get the file name of the executable from the arguments.
///
/// Since the executable will be called via soft link, the first argument
/// will be the name of the soft link. This function returns the file name
/// of the soft link.
fn file_name_from_arguments() -> Result<PathBuf> {
std::env::args()
.next()
.ok_or_else(|| anyhow::anyhow!("Cannot get first argument"))
.and_then(|arg| match PathBuf::from(arg).file_name() {
Some(file_name) => Ok(PathBuf::from(file_name)),
None => Err(anyhow::anyhow!(
"Cannot get the file name from the argument"
)),
})
}
/// Find the next executable in the PATH variable.
///
/// The function reads the PATH variable and tries to find the next executable
/// with the same name as the given executable. It returns the path to the
/// executable.
fn next_in_path(target: &Path) -> Result<PathBuf> {
let path = std::env::var("PATH")?;
log::debug!("PATH: {}", path);
// The `current_exe` is a canonical path to the current executable.
let current_exe = std::env::current_exe()?;
path.split(':')
.map(|dir| Path::new(dir).join(target))
.filter(|path| path.is_file())
.find(|path| {
// We need to compare it with the real path of the candidate executable to avoid
// calling the same executable again.
let real_path = match path.canonicalize() {
Ok(path) => path,
Err(_) => return false,
};
real_path != current_exe
})
.ok_or_else(|| anyhow::anyhow!("Cannot find the real executable"))
}
fn report(execution: Execution) -> Result<()> {
let event = Event {
pid: ProcessId(std::process::id()),
execution,
};
// Get the reporter address from the environment
std::env::var(KEY_DESTINATION)
.with_context(|| format!("${} is missing from the environment", KEY_DESTINATION))
// Create a new reporter
.and_then(ReporterOnTcp::new)
.with_context(|| "Cannot create TCP execution reporter")
// Report the execution
.and_then(|reporter| reporter.report(event))
.with_context(|| "Sending execution failed")
}
fn into_execution(path_buf: &Path) -> Result<Execution> {
Ok(Execution {
executable: path_buf.to_path_buf(),
arguments: std::env::args().collect(),
working_dir: std::env::current_dir()?,
environment: std::env::vars().collect(),
})
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,21 +0,0 @@
// SPDX-License-Identifier: GPL-3.0-or-later
#[cfg(test)]
pub mod fixtures {
#[macro_export]
macro_rules! vec_of_strings {
($($x:expr),*) => (vec![$($x.to_string()),*]);
}
#[macro_export]
macro_rules! map_of_strings {
($($k:expr => $v:expr),* $(,)?) => {{
core::convert::From::from([$(($k.to_string(), $v.to_string()),)*])
}};
}
#[macro_export]
macro_rules! vec_of_pathbuf {
($($x:expr),*) => (vec![$(PathBuf::from($x)),*]);
}
}

View File

@@ -1,336 +0,0 @@
// SPDX-License-Identifier: GPL-3.0-or-later
//! The module contains the intercept reporting and collecting functionality.
//!
//! When a command execution is intercepted, the interceptor sends the event to the collector.
//! This happens in two different processes, requiring a communication channel between these
//! processes.
//!
//! The module provides abstractions for the reporter and the collector. And it also defines
//! the data structures that are used to represent the events.
use crate::intercept::supervise::supervise;
use crate::{args, config};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::process::{Command, ExitCode};
use std::sync::mpsc::{channel, Receiver, Sender};
use std::sync::Arc;
use std::{env, fmt, thread};
pub mod persistence;
pub mod supervise;
pub mod tcp;
/// Declare the environment variables used by the intercept mode.
pub const KEY_DESTINATION: &str = "INTERCEPT_REPORTER_ADDRESS";
pub const KEY_PRELOAD_PATH: &str = "LD_PRELOAD";
/// Represents the remote sink of supervised process events.
///
/// This allows the reporters to send events to a remote collector.
pub trait Reporter {
fn report(&self, event: Event) -> Result<(), anyhow::Error>;
}
/// Represents the local sink of supervised process events.
///
/// The collector is responsible for collecting the events from the reporters.
///
/// To share the collector between threads, we use the `Arc` type to wrap the
/// collector. This way we can clone the collector and send it to other threads.
pub trait Collector {
/// Returns the address of the collector.
///
/// The address is in the format of `ip:port`.
fn address(&self) -> String;
/// Collects the events from the reporters.
///
/// The events are sent to the given destination channel.
///
/// The function returns when the collector is stopped. The collector is stopped
/// when the `stop` method invoked (from another thread).
fn collect(&self, destination: Sender<Envelope>) -> Result<(), anyhow::Error>;
/// Stops the collector.
fn stop(&self) -> Result<(), anyhow::Error>;
}
/// Envelope is a wrapper around the event.
///
/// It contains the reporter id, the timestamp of the event and the event itself.
#[derive(Serialize, Deserialize, Debug, PartialEq, Clone)]
pub struct Envelope {
pub rid: ReporterId,
pub timestamp: u64,
pub event: Event,
}
impl fmt::Display for Envelope {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"Envelope rid={}, timestamp={}, event={}",
self.rid.0, self.timestamp, self.event
)
}
}
/// Represent a relevant life cycle event of a process.
///
/// In the current implementation, we only have one event, the `Started` event.
/// This event is sent when a process is started. It contains the process id
/// and the execution information.
#[derive(Serialize, Deserialize, Debug, PartialEq, Clone)]
pub struct Event {
pub pid: ProcessId,
pub execution: Execution,
}
impl fmt::Display for Event {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "Event pid={}, execution={}", self.pid.0, self.execution)
}
}
/// Execution is a representation of a process execution.
///
/// It does not contain information about the outcome of the execution,
/// like the exit code or the duration of the execution. It only contains
/// the information that is necessary to reproduce the execution.
#[derive(Serialize, Deserialize, Debug, PartialEq, Clone)]
pub struct Execution {
pub executable: PathBuf,
pub arguments: Vec<String>,
pub working_dir: PathBuf,
pub environment: HashMap<String, String>,
}
impl fmt::Display for Execution {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"Execution path={}, args=[{}]",
self.executable.display(),
self.arguments.join(",")
)
}
}
/// Reporter id is a unique identifier for a reporter.
///
/// It is used to identify the process that sends the execution report.
/// Because the OS PID is not unique across a single build (PIDs are
/// recycled), we need to use a new unique identifier to identify the process.
#[derive(Serialize, Deserialize, Debug, PartialEq, Clone)]
pub struct ReporterId(pub u64);
/// Process id is a OS identifier for a process.
#[derive(Serialize, Deserialize, Debug, PartialEq, Clone)]
pub struct ProcessId(pub u32);
/// The service is responsible for collecting the events from the supervised processes.
///
/// The service is implemented as TCP server that listens on a random port on the loopback
/// interface. The address of the service can be obtained by the `address` method.
///
/// The service is started in a separate thread to dispatch the events to the consumer.
/// The consumer is a function that receives the events from the service and processes them.
/// It also runs in a separate thread. The reason for having two threads is to avoid blocking
/// the main thread of the application and decouple the collection from the processing.
pub(crate) struct CollectorService {
collector: Arc<dyn Collector>,
network_thread: Option<thread::JoinHandle<()>>,
output_thread: Option<thread::JoinHandle<()>>,
}
impl CollectorService {
/// Creates a new intercept service.
///
/// The `consumer` is a function that receives the events and processes them.
/// The function is executed in a separate thread.
pub fn new<F>(consumer: F) -> anyhow::Result<Self>
where
F: FnOnce(Receiver<Envelope>) -> anyhow::Result<()>,
F: Send + 'static,
{
let collector = tcp::CollectorOnTcp::new()?;
let collector_arc = Arc::new(collector);
let (sender, receiver) = channel();
let collector_in_thread = collector_arc.clone();
let collector_thread = thread::spawn(move || {
let result = collector_in_thread.collect(sender);
if let Err(e) = result {
log::error!("Failed to collect events: {}", e);
}
});
let output_thread = thread::spawn(move || {
let result = consumer(receiver);
if let Err(e) = result {
log::error!("Failed to process events: {}", e);
}
});
log::debug!("Collector service started at {}", collector_arc.address());
Ok(CollectorService {
collector: collector_arc,
network_thread: Some(collector_thread),
output_thread: Some(output_thread),
})
}
/// Returns the address of the service.
pub fn address(&self) -> String {
self.collector.address()
}
}
impl Drop for CollectorService {
/// Shuts down the service.
fn drop(&mut self) {
// TODO: log the shutdown of the service and any errors
self.collector.stop().expect("Failed to stop the collector");
if let Some(thread) = self.network_thread.take() {
thread.join().expect("Failed to join the collector thread");
}
if let Some(thread) = self.output_thread.take() {
thread.join().expect("Failed to join the output thread");
}
}
}
/// The environment for the intercept mode.
///
/// Running the build command requires a specific environment. The environment we
/// need for intercepting the child processes is different for each intercept mode.
///
/// The `Wrapper` mode requires a temporary directory with the executables that will
/// be used to intercept the child processes. The executables are hard linked to the
/// temporary directory.
///
/// The `Preload` mode requires the path to the preload library that will be used to
/// intercept the child processes.
pub(crate) enum InterceptEnvironment {
Wrapper {
bin_dir: tempfile::TempDir,
address: String,
collector: CollectorService,
},
Preload {
path: PathBuf,
address: String,
collector: CollectorService,
},
}
impl InterceptEnvironment {
/// Creates a new intercept environment.
///
/// The `config` is the intercept configuration that specifies the mode and the
/// required parameters for the mode. The `collector` is the service to collect
/// the execution events.
pub fn new(config: &config::Intercept, collector: CollectorService) -> anyhow::Result<Self> {
let address = collector.address();
let result = match config {
config::Intercept::Wrapper {
path,
directory,
executables,
} => {
// Create a temporary directory and populate it with the executables.
let bin_dir = tempfile::TempDir::with_prefix_in(directory, "bear-")?;
for executable in executables {
std::fs::hard_link(executable, path)?;
}
InterceptEnvironment::Wrapper {
bin_dir,
address,
collector,
}
}
config::Intercept::Preload { path } => InterceptEnvironment::Preload {
path: path.clone(),
address,
collector,
},
};
Ok(result)
}
/// Executes the build command in the intercept environment.
///
/// The method is blocking and waits for the build command to finish.
/// The method returns the exit code of the build command. Result failure
/// indicates that the build command failed to start.
pub fn execute_build_command(&self, input: args::BuildCommand) -> anyhow::Result<ExitCode> {
// TODO: record the execution of the build command
let environment = self.environment();
let process = input.arguments[0].clone();
let arguments = input.arguments[1..].to_vec();
let mut child = Command::new(process);
let exit_status = supervise(child.args(arguments).envs(environment))?;
log::info!("Execution finished with status: {:?}", exit_status);
// The exit code is not always available. When the process is killed by a signal,
// the exit code is not available. In this case, we return the `FAILURE` exit code.
let exit_code = exit_status
.code()
.map(|code| ExitCode::from(code as u8))
.unwrap_or(ExitCode::FAILURE);
Ok(exit_code)
}
/// Returns the environment variables for the intercept environment.
///
/// The environment variables are different for each intercept mode.
/// It does not change the original environment variables, but creates
/// the environment variables that are required for the intercept mode.
fn environment(&self) -> Vec<(String, String)> {
match self {
InterceptEnvironment::Wrapper {
bin_dir, address, ..
} => {
let path_original = env::var("PATH").unwrap_or_else(|_| String::new());
let path_updated = InterceptEnvironment::insert_to_path(
&path_original,
Self::path_to_string(bin_dir.path()),
);
vec![
("PATH".to_string(), path_updated),
(KEY_DESTINATION.to_string(), address.clone()),
]
}
InterceptEnvironment::Preload { path, address, .. } => {
let path_original = env::var(KEY_PRELOAD_PATH).unwrap_or_else(|_| String::new());
let path_updated = InterceptEnvironment::insert_to_path(
&path_original,
Self::path_to_string(path),
);
vec![
(KEY_PRELOAD_PATH.to_string(), path_updated),
(KEY_DESTINATION.to_string(), address.clone()),
]
}
}
}
/// Manipulate a `PATH` like environment value by inserting the `first` path into
/// the original value. It removes the `first` path if it already exists in the
/// original value. And it inserts the `first` path at the beginning of the value.
fn insert_to_path(original: &str, first: String) -> String {
let mut paths: Vec<_> = original.split(':').filter(|it| it != &first).collect();
paths.insert(0, first.as_str());
paths.join(":")
}
fn path_to_string(path: &Path) -> String {
path.to_str().unwrap_or("").to_string()
}
}

View File

@@ -1,147 +0,0 @@
// SPDX-License-Identifier: GPL-3.0-or-later
use super::Envelope;
use serde_json::de::IoRead;
use serde_json::StreamDeserializer;
use std::io;
/// Generate the build events from the file.
///
/// Returns an iterator over the build events.
/// Any error will interrupt the reading process and the remaining events will be lost.
pub fn read(reader: impl io::Read) -> impl Iterator<Item = Envelope> {
let stream = StreamDeserializer::new(IoRead::new(reader));
stream.filter_map(|result| match result {
Ok(value) => Some(value),
Err(error) => {
log::error!("Failed to read event: {:?}", error);
None
}
})
}
/// Write the build events to the file.
///
/// Can fail if the events cannot be serialized or written to the file.
/// Any error will interrupt the writing process and the file will be incomplete.
pub fn write(
mut writer: impl io::Write,
envelopes: impl IntoIterator<Item = Envelope>,
) -> Result<(), anyhow::Error> {
for envelope in envelopes {
serde_json::to_writer(&mut writer, &envelope)?;
writer.write_all(b"\n")?;
}
Ok(())
}
#[cfg(test)]
mod test {
use super::*;
use crate::intercept::{Event, Execution, ProcessId, ReporterId};
use crate::vec_of_strings;
use serde_json::json;
use std::collections::HashMap;
use std::path::PathBuf;
#[test]
fn read_write() {
let events = expected_values();
let mut buffer = Vec::new();
write(&mut buffer, events.iter().cloned()).unwrap();
let mut cursor = io::Cursor::new(buffer);
let read_events: Vec<_> = read(&mut cursor).collect();
assert_eq!(events, read_events);
}
#[test]
fn read_write_empty() {
let events = Vec::<Envelope>::new();
let mut buffer = Vec::new();
write(&mut buffer, events.iter().cloned()).unwrap();
let mut cursor = io::Cursor::new(buffer);
let read_events: Vec<_> = read(&mut cursor).collect();
assert_eq!(events, read_events);
}
#[test]
fn read_stops_on_errors() {
let line1 = json!({
"rid": 42,
"timestamp": 0,
"event": {
"pid": 11782,
"execution": {
"executable": "/usr/bin/clang",
"arguments": ["clang", "-c", "main.c"],
"working_dir": "/home/user",
"environment": {
"PATH": "/usr/bin",
"HOME": "/home/user"
}
}
}
});
let line2 = json!({"rid": 42 });
let line3 = json!({
"rid": 42,
"timestamp": 273,
"event": {
"pid": 11934,
"execution": {
"executable": "/usr/bin/clang",
"arguments": ["clang", "-c", "output.c"],
"working_dir": "/home/user",
"environment": {}
}
}
});
let content = format!("{}\n{}\n{}\n", line1, line2, line3);
let mut cursor = io::Cursor::new(content);
let read_events: Vec<_> = read(&mut cursor).collect();
// Only the fist event is read, all other lines are ignored.
assert_eq!(expected_values()[0..1], read_events);
}
const REPORTER_ID: ReporterId = ReporterId(42);
fn expected_values() -> Vec<Envelope> {
vec![
Envelope {
rid: REPORTER_ID,
timestamp: 0,
event: Event {
pid: ProcessId(11782),
execution: Execution {
executable: PathBuf::from("/usr/bin/clang"),
arguments: vec_of_strings!["clang", "-c", "main.c"],
working_dir: PathBuf::from("/home/user"),
environment: HashMap::from([
("PATH".to_string(), "/usr/bin".to_string()),
("HOME".to_string(), "/home/user".to_string()),
]),
},
},
},
Envelope {
rid: REPORTER_ID,
timestamp: 273,
event: Event {
pid: ProcessId(11934),
execution: Execution {
executable: PathBuf::from("/usr/bin/clang"),
arguments: vec_of_strings!["clang", "-c", "output.c"],
working_dir: PathBuf::from("/home/user"),
environment: HashMap::from([]),
},
},
},
]
}
}

View File

@@ -1,44 +0,0 @@
// SPDX-License-Identifier: GPL-3.0-or-later
use anyhow::Result;
use std::process::{Command, ExitStatus};
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;
use std::thread;
use std::time;
/// This method supervises the execution of a command.
///
/// It starts the command and waits for its completion. It also forwards
/// signals to the child process. The method returns the exit status of the
/// child process.
pub fn supervise(command: &mut Command) -> Result<ExitStatus> {
let signaled = Arc::new(AtomicUsize::new(0));
for signal in signal_hook::consts::TERM_SIGNALS {
signal_hook::flag::register_usize(*signal, Arc::clone(&signaled), *signal as usize)?;
}
let mut child = command.spawn()?;
loop {
// Forward signals to the child process, but don't exit the loop while it is running
if signaled.swap(0usize, Ordering::SeqCst) != 0 {
log::debug!("Received signal, forwarding to child process");
child.kill()?;
}
// Check if the child process has exited
match child.try_wait() {
Ok(Some(exit_status)) => {
log::debug!("Child process exited");
return Ok(exit_status);
}
Ok(None) => {
thread::sleep(time::Duration::from_millis(100));
}
Err(e) => {
log::error!("Error waiting for child process: {}", e);
return Err(e.into());
}
}
}
}

View File

@@ -1,332 +0,0 @@
// SPDX-License-Identifier: GPL-3.0-or-later
//! The module contains the implementation of the TCP collector and reporter.
use std::io::{Read, Write};
use std::net::{SocketAddr, TcpListener, TcpStream};
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::mpsc::Sender;
use std::sync::Arc;
use super::{Collector, Envelope, Event, Reporter, ReporterId};
use chrono::Utc;
use rand;
/// Implements convenient methods for the `Envelope` type.
impl Envelope {
fn new(rid: &ReporterId, event: Event) -> Self {
let timestamp = Utc::now().timestamp_millis() as u64;
Envelope {
rid: rid.clone(),
timestamp,
event,
}
}
/// Read an envelope from a reader using TLV format.
///
/// The envelope is serialized using JSON and the length of the JSON
/// is written as a 4 byte big-endian integer before the JSON.
fn read_from(reader: &mut impl Read) -> Result<Self, anyhow::Error> {
let mut length_bytes = [0; 4];
reader.read_exact(&mut length_bytes)?;
let length = u32::from_be_bytes(length_bytes) as usize;
let mut buffer = vec![0; length];
reader.read_exact(&mut buffer)?;
let envelope = serde_json::from_slice(buffer.as_ref())?;
Ok(envelope)
}
/// Write an envelope to a writer using TLV format.
///
/// The envelope is serialized using JSON and the length of the JSON
/// is written as a 4 byte big-endian integer before the JSON.
fn write_into(&self, writer: &mut impl Write) -> Result<u32, anyhow::Error> {
let serialized_envelope = serde_json::to_string(&self)?;
let bytes = serialized_envelope.into_bytes();
let length = bytes.len() as u32;
writer.write_all(&length.to_be_bytes())?;
writer.write_all(&bytes)?;
Ok(length)
}
}
/// Implements convenient methods for the `ReporterId` type.
impl ReporterId {
pub fn generate() -> Self {
let id = rand::random::<u64>();
ReporterId(id)
}
}
/// Represents a TCP event collector.
pub struct CollectorOnTcp {
shutdown: Arc<AtomicBool>,
listener: TcpListener,
address: SocketAddr,
}
impl CollectorOnTcp {
/// Creates a new TCP event collector.
///
/// The collector listens on a random port on the loopback interface.
/// The address of the collector can be obtained by the `address` method.
pub fn new() -> Result<Self, anyhow::Error> {
let shutdown = Arc::new(AtomicBool::new(false));
let listener = TcpListener::bind("127.0.0.1:0")?;
let address = listener.local_addr()?;
let result = CollectorOnTcp {
shutdown,
listener,
address,
};
Ok(result)
}
fn send(
&self,
mut socket: TcpStream,
destination: Sender<Envelope>,
) -> Result<(), anyhow::Error> {
let envelope = Envelope::read_from(&mut socket)?;
destination.send(envelope)?;
Ok(())
}
}
impl Collector for CollectorOnTcp {
fn address(&self) -> String {
self.address.to_string()
}
/// Single-threaded implementation of the collector.
///
/// The collector listens on the TCP port and accepts incoming connections.
/// When a connection is accepted, the collector reads the events from the
/// connection and sends them to the destination channel.
fn collect(&self, destination: Sender<Envelope>) -> Result<(), anyhow::Error> {
for stream in self.listener.incoming() {
// This has to be the first thing to do, in order to implement the stop method!
if self.shutdown.load(Ordering::Relaxed) {
break;
}
match stream {
Ok(connection) => {
// ... (process the connection in a separate thread or task)
self.send(connection, destination.clone())?;
}
Err(ref e) if e.kind() == std::io::ErrorKind::WouldBlock => {
// No new connection available, continue checking for shutdown
continue;
}
Err(e) => {
println!("Error: {}", e);
break;
}
}
}
Ok(())
}
/// Stops the collector by flipping the shutdown flag and connecting to the collector.
///
/// The collector is stopped when the `collect` method sees the shutdown flag.
/// To signal the collector to stop, we connect to the collector to unblock the
/// `accept` call to check the shutdown flag.
fn stop(&self) -> Result<(), anyhow::Error> {
self.shutdown.store(true, Ordering::Relaxed);
let _ = TcpStream::connect(self.address)?;
Ok(())
}
}
/// Represents a TCP event reporter.
pub struct ReporterOnTcp {
destination: String,
reporter_id: ReporterId,
}
impl ReporterOnTcp {
/// Creates a new TCP reporter instance.
///
/// It does not open the TCP connection yet. Stores the destination
/// address and creates a unique reporter id.
pub fn new(destination: String) -> Result<Self, anyhow::Error> {
let reporter_id = ReporterId::generate();
let result = ReporterOnTcp {
destination,
reporter_id,
};
Ok(result)
}
}
impl Reporter for ReporterOnTcp {
/// Sends an event to the remote collector.
///
/// The event is wrapped in an envelope and sent to the remote collector.
/// The TCP connection is opened and closed for each event.
fn report(&self, event: Event) -> Result<(), anyhow::Error> {
let envelope = Envelope::new(&self.reporter_id, event);
let mut socket = TcpStream::connect(self.destination.clone())?;
envelope.write_into(&mut socket)?;
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Cursor;
use std::sync::mpsc::channel;
use std::sync::Arc;
use std::thread;
use std::time::Duration;
// Test that the serialization and deserialization of the Envelope works.
// We write the Envelope to a buffer and read it back to check if the
// deserialized Envelope is the same as the original one.
#[test]
fn read_write_works() {
let mut writer = Cursor::new(vec![0; 1024]);
for envelope in fixtures::ENVELOPES.iter() {
let result = Envelope::write_into(envelope, &mut writer);
assert!(result.is_ok());
}
let mut reader = Cursor::new(writer.get_ref());
for envelope in fixtures::ENVELOPES.iter() {
let result = Envelope::read_from(&mut reader);
assert!(result.is_ok());
assert_eq!(result.unwrap(), envelope.clone());
}
}
// Test that the TCP reporter and the TCP collector work together.
// We create a TCP collector and a TCP reporter, then we send events
// to the reporter and check if the collector receives them.
//
// We use a bounded channel to send the events from the reporter to the
// collector. The collector reads the events from the channel and checks
// if they are the same as the original events.
#[test]
fn tcp_reporter_and_collectors_work() {
let collector = CollectorOnTcp::new().unwrap();
let reporter = ReporterOnTcp::new(collector.address()).unwrap();
// Create wrapper to share the collector across threads.
let thread_collector = Arc::new(collector);
let main_collector = thread_collector.clone();
// Start the collector in a separate thread.
let (input, output) = channel();
let receiver_thread = thread::spawn(move || {
thread_collector.collect(input).unwrap();
});
// Send events to the reporter.
for event in fixtures::EVENTS.iter() {
let result = reporter.report(event.clone());
assert!(result.is_ok());
}
// Call the stop method to stop the collector. This will close the
// channel and the collector will stop reading from it.
thread::sleep(Duration::from_secs(1));
main_collector.stop().unwrap();
// Empty the channel and assert that we received all the events.
let mut count = 0;
for envelope in output.iter() {
assert!(fixtures::EVENTS.contains(&envelope.event));
count += 1;
}
assert_eq!(count, fixtures::EVENTS.len());
// shutdown the receiver thread
receiver_thread.join().unwrap();
}
mod fixtures {
use super::*;
use crate::intercept::{Execution, ProcessId};
use crate::{map_of_strings, vec_of_strings};
use std::collections::HashMap;
use std::path::PathBuf;
pub(super) static ENVELOPES: std::sync::LazyLock<Vec<Envelope>> =
std::sync::LazyLock::new(|| {
vec![
Envelope {
rid: ReporterId::generate(),
timestamp: timestamp(),
event: Event {
pid: pid(),
execution: Execution {
executable: PathBuf::from("/usr/bin/ls"),
arguments: vec_of_strings!["ls", "-l"],
working_dir: PathBuf::from("/tmp"),
environment: HashMap::new(),
},
},
},
Envelope {
rid: ReporterId::generate(),
timestamp: timestamp(),
event: Event {
pid: pid(),
execution: Execution {
executable: PathBuf::from("/usr/bin/cc"),
arguments: vec_of_strings![
"cc",
"-c",
"./file_a.c",
"-o",
"./file_a.o"
],
working_dir: PathBuf::from("/home/user"),
environment: map_of_strings! {
"PATH" => "/usr/bin:/bin",
"HOME" => "/home/user",
},
},
},
},
Envelope {
rid: ReporterId::generate(),
timestamp: timestamp(),
event: Event {
pid: pid(),
execution: Execution {
executable: PathBuf::from("/usr/bin/ld"),
arguments: vec_of_strings!["ld", "-o", "./file_a", "./file_a.o"],
working_dir: PathBuf::from("/opt/project"),
environment: map_of_strings! {
"PATH" => "/usr/bin:/bin",
"LD_LIBRARY_PATH" => "/usr/lib:/lib",
},
},
},
},
]
});
pub(super) static EVENTS: std::sync::LazyLock<Vec<Event>> =
std::sync::LazyLock::new(|| ENVELOPES.iter().map(|e| e.event.clone()).collect());
fn timestamp() -> u64 {
rand::random::<u64>()
}
fn pid() -> ProcessId {
ProcessId(rand::random::<u32>())
}
}
}

View File

@@ -1,9 +0,0 @@
// SPDX-License-Identifier: GPL-3.0-or-later
pub mod args;
pub mod config;
mod fixtures;
pub mod intercept;
pub mod modes;
pub mod output;
pub mod semantic;

View File

@@ -1,38 +0,0 @@
// SPDX-License-Identifier: GPL-3.0-or-later
use crate::intercept::{CollectorService, Envelope, InterceptEnvironment};
use crate::{args, config};
use anyhow::Context;
use std::process::ExitCode;
use std::sync::mpsc::Receiver;
/// The build interceptor is responsible for capturing the build commands and
/// dispatching them to the consumer. The consumer is a function that processes
/// the intercepted command executions.
pub(super) struct BuildInterceptor {
environment: InterceptEnvironment,
}
impl BuildInterceptor {
/// Create a new process execution interceptor.
pub(super) fn new<F>(config: config::Main, consumer: F) -> anyhow::Result<Self>
where
F: FnOnce(Receiver<Envelope>) -> anyhow::Result<()>,
F: Send + 'static,
{
let service = CollectorService::new(consumer)
.with_context(|| "Failed to create the intercept service")?;
let environment = InterceptEnvironment::new(&config.intercept, service)
.with_context(|| "Failed to create the intercept environment")?;
Ok(Self { environment })
}
/// Run the build command in the intercept environment.
pub(super) fn run_build_command(self, command: args::BuildCommand) -> anyhow::Result<ExitCode> {
self.environment
.execute_build_command(command)
.with_context(|| "Failed to execute the build command")
}
}

View File

@@ -1,140 +0,0 @@
// SPDX-License-Identifier: GPL-3.0-or-later
pub mod intercept;
pub mod semantic;
use crate::intercept::persistence::{read, write};
use crate::modes::intercept::BuildInterceptor;
use crate::modes::semantic::SemanticAnalysisPipeline;
use crate::{args, config};
use anyhow::Context;
use std::fs::{File, OpenOptions};
use std::io;
use std::io::BufReader;
use std::process::ExitCode;
/// The mode trait is used to run the application in different modes.
pub trait Mode {
fn run(self) -> anyhow::Result<ExitCode>;
}
/// The intercept mode we are only capturing the build commands
/// and write it into the output file.
pub struct Intercept {
command: args::BuildCommand,
interceptor: BuildInterceptor,
}
impl Intercept {
/// Create a new intercept mode instance.
pub fn from(
command: args::BuildCommand,
output: args::BuildEvents,
config: config::Main,
) -> anyhow::Result<Self> {
let file_name = output.file_name.as_str();
let output_file = OpenOptions::new()
.write(true)
.create(true)
.truncate(true)
.open(file_name)
.map(io::BufWriter::new)
.with_context(|| format!("Failed to open file: {:?}", file_name))?;
let interceptor =
BuildInterceptor::new(config, move |envelopes| write(output_file, envelopes))?;
Ok(Self {
command,
interceptor,
})
}
}
impl Mode for Intercept {
/// Run the intercept mode by setting up the collector service and
/// the intercept environment. The build command is executed in the
/// intercept environment.
///
/// The exit code is based on the result of the build command.
fn run(self) -> anyhow::Result<ExitCode> {
self.interceptor.run_build_command(self.command)
}
}
/// The semantic mode we are deduct the semantic meaning of the
/// executed commands from the build process.
pub struct Semantic {
event_file: BufReader<File>,
semantic: SemanticAnalysisPipeline,
}
impl Semantic {
pub fn from(
input: args::BuildEvents,
output: args::BuildSemantic,
config: config::Main,
) -> anyhow::Result<Self> {
let file_name = input.file_name.as_str();
let event_file = OpenOptions::new()
.read(true)
.open(file_name)
.map(BufReader::new)
.with_context(|| format!("Failed to open file: {:?}", file_name))?;
let semantic = SemanticAnalysisPipeline::from(output, &config)?;
Ok(Self {
event_file,
semantic,
})
}
}
impl Mode for Semantic {
/// Run the semantic mode by reading the event file and analyzing the events.
///
/// The exit code is based on the result of the output writer.
fn run(self) -> anyhow::Result<ExitCode> {
self.semantic
.analyze_and_write(read(self.event_file))
.map(|_| ExitCode::SUCCESS)
}
}
/// The all model is combining the intercept and semantic modes.
pub struct Combined {
command: args::BuildCommand,
interceptor: BuildInterceptor,
}
impl Combined {
/// Create a new all mode instance.
pub fn from(
command: args::BuildCommand,
output: args::BuildSemantic,
config: config::Main,
) -> anyhow::Result<Self> {
let semantic = SemanticAnalysisPipeline::from(output, &config)?;
let interceptor = BuildInterceptor::new(config, move |envelopes| {
semantic.analyze_and_write(envelopes)
})?;
Ok(Self {
command,
interceptor,
})
}
}
impl Mode for Combined {
/// Run the all mode by setting up the collector service and the intercept environment.
/// The build command is executed in the intercept environment. The collected events are
/// then processed by the semantic recognition and transformation. The result is written
/// to the output file.
///
/// The exit code is based on the result of the build command.
fn run(self) -> anyhow::Result<ExitCode> {
self.interceptor.run_build_command(self.command)
}
}

View File

@@ -1,232 +0,0 @@
// SPDX-License-Identifier: GPL-3.0-or-later
use crate::intercept::Envelope;
use crate::output::OutputWriter;
use crate::semantic::interpreters::create_interpreter;
use crate::semantic::transformation::Transformation;
use crate::{args, config, output, semantic};
use anyhow::Context;
use std::fs::{File, OpenOptions};
use std::io::{BufReader, BufWriter};
use std::path::{Path, PathBuf};
/// The semantic analysis that is independent of the event source.
pub(super) struct SemanticAnalysisPipeline {
interpreter: Box<dyn semantic::Interpreter>,
transform: Box<dyn semantic::Transform>,
output_writer: OutputWriterImpl,
}
impl SemanticAnalysisPipeline {
/// Create a new semantic mode instance.
pub(super) fn from(output: args::BuildSemantic, config: &config::Main) -> anyhow::Result<Self> {
let interpreter = create_interpreter(config);
let transform = Transformation::from(&config.output);
let output_writer = OutputWriterImpl::create(&output, &config.output)?;
Ok(Self {
interpreter: Box::new(interpreter),
transform: Box::new(transform),
output_writer,
})
}
/// Consumer the envelopes for analysis and write the result to the output file.
/// This implements the pipeline of the semantic analysis.
pub(super) fn analyze_and_write(
self,
envelopes: impl IntoIterator<Item = Envelope>,
) -> anyhow::Result<()> {
// Set up the pipeline of compilation database entries.
let entries = envelopes
.into_iter()
.inspect(|envelope| log::debug!("envelope: {}", envelope))
.map(|envelope| envelope.event.execution)
.flat_map(|execution| self.interpreter.recognize(&execution))
.inspect(|semantic| log::debug!("semantic: {:?}", semantic))
.flat_map(|semantic| self.transform.apply(semantic));
// Consume the entries and write them to the output file.
// The exit code is based on the result of the output writer.
self.output_writer.run(entries)
}
}
/// The output writer implementation.
///
/// This is a workaround for the lack of trait object support for generic arguments.
/// https://doc.rust-lang.org/reference/items/traits.html#object-safety.
pub(crate) enum OutputWriterImpl {
Clang(ClangOutputWriter),
Semantic(SemanticOutputWriter),
}
impl OutputWriter for OutputWriterImpl {
fn run(
&self,
compiler_calls: impl Iterator<Item = semantic::CompilerCall>,
) -> anyhow::Result<()> {
match self {
OutputWriterImpl::Clang(writer) => writer.run(compiler_calls),
OutputWriterImpl::Semantic(writer) => writer.run(compiler_calls),
}
}
}
impl OutputWriterImpl {
/// Create a new instance of the output writer.
pub(crate) fn create(
args: &args::BuildSemantic,
config: &config::Output,
) -> anyhow::Result<OutputWriterImpl> {
// TODO: This method should fail early if the output file is not writable.
match config {
config::Output::Clang {
format,
sources,
duplicates,
..
} => {
let result = ClangOutputWriter {
output: PathBuf::from(&args.file_name),
append: args.append,
source_filter: sources.clone(),
duplicate_filter: duplicates.clone(),
command_as_array: format.command_as_array,
formatter: From::from(format),
};
Ok(OutputWriterImpl::Clang(result))
}
config::Output::Semantic { .. } => {
let result = SemanticOutputWriter {
output: PathBuf::from(&args.file_name),
};
Ok(OutputWriterImpl::Semantic(result))
}
}
}
}
pub(crate) struct SemanticOutputWriter {
output: PathBuf,
}
impl OutputWriter for SemanticOutputWriter {
fn run(&self, entries: impl Iterator<Item = semantic::CompilerCall>) -> anyhow::Result<()> {
let file_name = &self.output;
let file = File::create(file_name)
.map(BufWriter::new)
.with_context(|| format!("Failed to create file: {:?}", file_name.as_path()))?;
semantic::serialize(file, entries)?;
Ok(())
}
}
/// Responsible for writing the final compilation database file.
///
/// Implements filtering, formatting and atomic file writing.
/// (Atomic file writing implemented by writing to a temporary file and renaming it.)
///
/// Filtering is implemented by the `filter` module, and the formatting is implemented by the
/// `json_compilation_db` module.
pub(crate) struct ClangOutputWriter {
output: PathBuf,
append: bool,
source_filter: config::SourceFilter,
duplicate_filter: config::DuplicateFilter,
command_as_array: bool,
formatter: output::formatter::EntryFormatter,
}
impl OutputWriter for ClangOutputWriter {
/// Implements the main logic of the output writer.
fn run(
&self,
compiler_calls: impl Iterator<Item = semantic::CompilerCall>,
) -> anyhow::Result<()> {
let entries = compiler_calls.flat_map(|compiler_call| self.formatter.apply(compiler_call));
if self.append && self.output.exists() {
let entries_from_db = Self::read_from_compilation_db(self.output.as_path())?;
let final_entries = entries.chain(entries_from_db);
self.write_into_compilation_db(final_entries)
} else {
if self.append {
log::warn!("The output file does not exist, the append option is ignored.");
}
self.write_into_compilation_db(entries)
}
}
}
impl ClangOutputWriter {
/// Write the entries to the compilation database.
///
/// The entries are written to a temporary file and then renamed to the final output.
/// This guaranties that the output file is always in a consistent state.
fn write_into_compilation_db(
&self,
entries: impl Iterator<Item = output::clang::Entry>,
) -> anyhow::Result<()> {
// Filter out the entries as per the configuration.
let mut source_filter: output::filter::EntryPredicate = From::from(&self.source_filter);
let mut duplicate_filter: output::filter::EntryPredicate =
From::from(&self.duplicate_filter);
let filtered_entries =
entries.filter(move |entry| source_filter(entry) && duplicate_filter(entry));
// Write the entries to a temporary file.
self.write_into_temporary_compilation_db(filtered_entries)
.and_then(|temp| {
// Rename the temporary file to the final output.
std::fs::rename(temp.as_path(), self.output.as_path()).with_context(|| {
format!(
"Failed to rename file from '{:?}' to '{:?}'.",
temp.as_path(),
self.output.as_path()
)
})
})
}
/// Write the entries to a temporary file and returns the temporary file name.
fn write_into_temporary_compilation_db(
&self,
entries: impl Iterator<Item = output::clang::Entry>,
) -> anyhow::Result<PathBuf> {
// Generate a temporary file name.
let file_name = self.output.with_extension("tmp");
// Open the file for writing.
let file = File::create(&file_name)
.map(BufWriter::new)
.with_context(|| format!("Failed to create file: {:?}", file_name.as_path()))?;
// Write the entries to the file.
output::clang::write(self.command_as_array, file, entries)
.with_context(|| format!("Failed to write entries: {:?}", file_name.as_path()))?;
// Return the temporary file name.
Ok(file_name)
}
/// Read the compilation database from a file.
fn read_from_compilation_db(
source: &Path,
) -> anyhow::Result<impl Iterator<Item = output::clang::Entry>> {
let source_copy = source.to_path_buf();
let file = OpenOptions::new()
.read(true)
.open(source)
.map(BufReader::new)
.with_context(|| format!("Failed to open file: {:?}", source))?;
let entries = output::clang::read(file)
.map(move |candidate| {
// We are here to log the error.
candidate.map_err(|error| {
log::error!("Failed to read file: {:?}, reason: {}", source_copy, error);
error
})
})
.filter_map(Result::ok);
Ok(entries)
}
}

View File

@@ -1,88 +0,0 @@
// SPDX-License-Identifier: GPL-3.0-or-later
//! Provides an iterator over a JSON array of objects.
//!
//! from https://github.com/serde-rs/json/issues/404#issuecomment-892957228
use std::io::{self, Read};
use serde::de::DeserializeOwned;
use serde_json::{Deserializer, Error, Result};
pub fn iter_json_array<T, R>(mut reader: R) -> impl Iterator<Item = Result<T>>
where
T: DeserializeOwned,
R: io::Read,
{
let mut at_start = State::AtStart;
std::iter::from_fn(move || yield_next_obj(&mut reader, &mut at_start).transpose())
}
enum State {
AtStart,
AtMiddle,
Finished,
Failed,
}
fn yield_next_obj<T, R>(mut reader: R, state: &mut State) -> Result<Option<T>>
where
T: DeserializeOwned,
R: io::Read,
{
match state {
State::AtStart => {
if read_skipping_ws(&mut reader)? == b'[' {
// read the next char to see if the array is empty
let peek = read_skipping_ws(&mut reader)?;
if peek == b']' {
*state = State::Finished;
Ok(None)
} else {
*state = State::AtMiddle;
deserialize_single(io::Cursor::new([peek]).chain(reader)).map(Some)
}
} else {
*state = State::Failed;
Err(serde::de::Error::custom("expected `[`"))
}
}
State::AtMiddle => match read_skipping_ws(&mut reader)? {
b',' => deserialize_single(reader).map(Some),
b']' => {
*state = State::Finished;
Ok(None)
}
_ => {
*state = State::Failed;
Err(serde::de::Error::custom("expected `,` or `]`"))
}
},
State::Finished => Ok(None),
State::Failed => Ok(None),
}
}
fn deserialize_single<T, R>(reader: R) -> Result<T>
where
T: DeserializeOwned,
R: io::Read,
{
let next_obj = Deserializer::from_reader(reader).into_iter::<T>().next();
match next_obj {
Some(result) => result,
None => Err(serde::de::Error::custom("premature EOF")),
}
}
fn read_skipping_ws(mut reader: impl io::Read) -> Result<u8> {
loop {
let mut byte = 0u8;
if let Err(io) = reader.read_exact(std::slice::from_mut(&mut byte)) {
return Err(Error::io(io));
}
if !byte.is_ascii_whitespace() {
return Ok(byte);
}
}
}

View File

@@ -1,90 +0,0 @@
// SPDX-License-Identifier: GPL-3.0-or-later
//! This crate provides support for reading and writing JSON compilation database files.
//!
//! A compilation database is a set of records which describe the compilation of the
//! source files in a given project. It describes the compiler invocation command to
//! compile a source module to an object file.
//!
//! This database can have many forms. One well known and supported format is the JSON
//! compilation database, which is a simple JSON file having the list of compilation
//! as an array. The definition of the JSON compilation database files is done in the
//! LLVM project [documentation](https://clang.llvm.org/docs/JSONCompilationDatabase.html).
use serde::ser::{SerializeSeq, Serializer};
use serde_json::Error;
mod iterator;
mod tests;
mod type_de;
mod type_ser;
/// Represents an entry of the compilation database.
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Entry {
/// The main translation unit source processed by this compilation step.
/// This is used by tools as the key into the compilation database.
/// There can be multiple command objects for the same file, for example if the same
/// source file is compiled with different configurations.
pub file: std::path::PathBuf,
/// The compile command executed. This must be a valid command to rerun the exact
/// compilation step for the translation unit in the environment the build system uses.
/// Shell expansion is not supported.
pub arguments: Vec<String>,
/// The working directory of the compilation. All paths specified in the command or
/// file fields must be either absolute or relative to this directory.
pub directory: std::path::PathBuf,
/// The name of the output created by this compilation step. This field is optional.
/// It can be used to distinguish different processing modes of the same input file.
pub output: Option<std::path::PathBuf>,
}
/// Deserialize entries from a JSON array into an iterator.
pub fn read(reader: impl std::io::Read) -> impl Iterator<Item = Result<Entry, Error>> {
iterator::iter_json_array(reader)
}
/// The entries are written in the format specified by the configuration.
pub fn write(
command_as_array: bool,
writer: impl std::io::Write,
entries: impl Iterator<Item = Entry>,
) -> Result<(), Error> {
let method = if command_as_array {
write_with_arguments
} else {
write_with_command
};
method(writer, entries)
}
/// Serialize entries from an iterator into a JSON array.
///
/// It uses the `arguments` field of the `Entry` struct to serialize the array of strings.
pub(super) fn write_with_arguments(
writer: impl std::io::Write,
entries: impl Iterator<Item = Entry>,
) -> Result<(), Error> {
let mut ser = serde_json::Serializer::pretty(writer);
let mut seq = ser.serialize_seq(None)?;
for entry in entries {
seq.serialize_element(&entry)?;
}
seq.end()
}
/// Serialize entries from an iterator into a JSON array.
///
/// It uses the `arguments` field of the `Entry` struct to serialize the array of strings.
pub(super) fn write_with_command(
writer: impl std::io::Write,
entries: impl Iterator<Item = Entry>,
) -> Result<(), Error> {
let mut ser = serde_json::Serializer::pretty(writer);
let mut seq = ser.serialize_seq(None)?;
for entry in entries {
let entry = type_ser::EntryWithCommand::from(entry);
seq.serialize_element(&entry)?;
}
seq.end()
}

View File

@@ -1,314 +0,0 @@
// SPDX-License-Identifier: GPL-3.0-or-later
#[cfg(test)]
mod failures {
use super::super::*;
use serde_json::error::Category;
use serde_json::json;
macro_rules! assert_semantic_error {
($x:expr) => {
match $x {
Some(Err(error)) => assert_eq!(error.classify(), Category::Data),
_ => assert!(false, "shout be semantic error"),
}
};
}
#[test]
fn load_non_json_content() {
let content = r#"this is not json"#;
let mut result = read(content.as_bytes());
assert_semantic_error!(result.next());
assert!(result.next().is_none());
}
#[test]
fn load_not_expected_json_content() {
let content = json!({ "file": "string" }).to_string();
let mut result = read(content.as_bytes());
assert_semantic_error!(result.next());
assert!(result.next().is_none());
}
#[test]
fn load_on_bad_value() {
let content = json!([
{
"directory": " ",
"file": "./file_a.c",
"command": "cc -Dvalue=\"this"
}
])
.to_string();
let mut result = read(content.as_bytes());
assert_semantic_error!(result.next());
assert!(result.next().is_none());
}
#[test]
fn load_on_multiple_commands() {
let content = json!([
{
"directory": " ",
"file": "./file_a.c",
"command": "cc source.c",
"arguments": ["cc", "source.c"],
}
])
.to_string();
let mut result = read(content.as_bytes());
assert_semantic_error!(result.next());
assert!(result.next().is_none());
}
}
#[cfg(test)]
mod success {
use super::super::*;
use serde_json::json;
mod empty {
use super::*;
#[test]
fn load_empty_array() {
let content = json!([]).to_string();
let mut result = read(content.as_bytes());
assert!(result.next().is_none());
}
}
mod basic {
use super::*;
use crate::vec_of_strings;
use std::io::{Cursor, Seek, SeekFrom};
fn expected_values() -> Vec<Entry> {
vec![
Entry {
directory: std::path::PathBuf::from("/home/user"),
file: std::path::PathBuf::from("./file_a.c"),
arguments: vec_of_strings!("cc", "-c", "./file_a.c", "-o", "./file_a.o"),
output: None,
},
Entry {
directory: std::path::PathBuf::from("/home/user"),
file: std::path::PathBuf::from("./file_b.c"),
arguments: vec_of_strings!("cc", "-c", "./file_b.c", "-o", "./file_b.o"),
output: Some(std::path::PathBuf::from("./file_b.o")),
},
]
}
fn expected_with_array_syntax() -> serde_json::Value {
json!([
{
"directory": "/home/user",
"file": "./file_a.c",
"arguments": ["cc", "-c", "./file_a.c", "-o", "./file_a.o"]
},
{
"directory": "/home/user",
"file": "./file_b.c",
"output": "./file_b.o",
"arguments": ["cc", "-c", "./file_b.c", "-o", "./file_b.o"]
}
])
}
fn expected_with_string_syntax() -> serde_json::Value {
json!([
{
"directory": "/home/user",
"file": "./file_a.c",
"command": "cc -c ./file_a.c -o ./file_a.o"
},
{
"directory": "/home/user",
"file": "./file_b.c",
"output": "./file_b.o",
"command": "cc -c ./file_b.c -o ./file_b.o"
}
])
}
#[test]
fn load_content_with_string_command_syntax() {
let content = expected_with_string_syntax().to_string();
let result = read(content.as_bytes());
let entries: Vec<Entry> = result.map(|e| e.unwrap()).collect();
assert_eq!(expected_values(), entries);
}
#[test]
fn load_content_with_array_command_syntax() {
let content = expected_with_array_syntax().to_string();
let result = read(content.as_bytes());
let entries: Vec<Entry> = result.map(|e| e.unwrap()).collect();
assert_eq!(expected_values(), entries);
}
#[test]
fn save_with_array_command_syntax() -> Result<(), Error> {
let input = expected_values();
// Create fake "file"
let mut buffer = Cursor::new(Vec::new());
let result = write_with_arguments(&mut buffer, input.into_iter());
assert!(result.is_ok());
// Use the fake "file" as input
buffer.seek(SeekFrom::Start(0)).unwrap();
let content: serde_json::Value = serde_json::from_reader(&mut buffer)?;
assert_eq!(expected_with_array_syntax(), content);
Ok(())
}
#[test]
fn save_with_string_command_syntax() -> Result<(), Error> {
let input = expected_values();
// Create fake "file"
let mut buffer = Cursor::new(Vec::new());
let result = write_with_command(&mut buffer, input.into_iter());
assert!(result.is_ok());
// Use the fake "file" as input
buffer.seek(SeekFrom::Start(0)).unwrap();
let content: serde_json::Value = serde_json::from_reader(&mut buffer)?;
assert_eq!(expected_with_string_syntax(), content);
Ok(())
}
}
mod quoted {
use super::*;
use crate::vec_of_strings;
use serde_json::Value;
use std::io::{Cursor, Seek, SeekFrom};
fn expected_values() -> Vec<Entry> {
vec![
Entry {
directory: std::path::PathBuf::from("/home/user"),
file: std::path::PathBuf::from("./file_a.c"),
arguments: vec_of_strings!(
"cc",
"-c",
"-D",
r#"name=\"me\""#,
"./file_a.c",
"-o",
"./file_a.o"
),
output: None,
},
Entry {
directory: std::path::PathBuf::from("/home/user"),
file: std::path::PathBuf::from("./file_b.c"),
arguments: vec_of_strings!(
"cc",
"-c",
"-D",
r#"name="me""#,
"./file_b.c",
"-o",
"./file_b.o"
),
output: None,
},
]
}
fn expected_with_array_syntax() -> serde_json::Value {
json!([
{
"directory": "/home/user",
"file": "./file_a.c",
"arguments": ["cc", "-c", "-D", r#"name=\"me\""#, "./file_a.c", "-o", "./file_a.o"]
},
{
"directory": "/home/user",
"file": "./file_b.c",
"arguments": ["cc", "-c", "-D", r#"name="me""#, "./file_b.c", "-o", "./file_b.o"]
}
])
}
fn expected_with_string_syntax() -> serde_json::Value {
json!([
{
"directory": "/home/user",
"file": "./file_a.c",
"command": r#"cc -c -D 'name=\"me\"' ./file_a.c -o ./file_a.o"#
},
{
"directory": "/home/user",
"file": "./file_b.c",
"command": r#"cc -c -D 'name="me"' ./file_b.c -o ./file_b.o"#
}
])
}
#[test]
fn load_content_with_array_command_syntax() {
let content = expected_with_array_syntax().to_string();
let result = read(content.as_bytes());
let entries: Vec<Entry> = result.map(|e| e.unwrap()).collect();
assert_eq!(expected_values(), entries);
}
#[test]
fn save_with_array_command_syntax() -> Result<(), Error> {
let input = expected_values();
// Create fake "file"
let mut buffer = Cursor::new(Vec::new());
let result = write_with_arguments(&mut buffer, input.into_iter());
assert!(result.is_ok());
// Use the fake "file" as input
buffer.seek(SeekFrom::Start(0)).unwrap();
let content: Value = serde_json::from_reader(&mut buffer)?;
assert_eq!(expected_with_array_syntax(), content);
Ok(())
}
#[test]
fn save_with_string_command_syntax() -> Result<(), Error> {
let input = expected_values();
// Create fake "file"
let mut buffer = Cursor::new(Vec::new());
let result = write_with_command(&mut buffer, input.into_iter());
assert!(result.is_ok());
// Use the fake "file" as input
buffer.seek(SeekFrom::Start(0)).unwrap();
let content: Value = serde_json::from_reader(&mut buffer)?;
assert_eq!(expected_with_string_syntax(), content);
Ok(())
}
}
}

View File

@@ -1,146 +0,0 @@
// SPDX-License-Identifier: GPL-3.0-or-later
//! Implements deserialization of the `Entry` struct.
use std::fmt;
use std::path;
use serde::de::{self, Deserialize, Deserializer, MapAccess, Visitor};
use super::Entry;
impl<'de> Deserialize<'de> for Entry {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
enum Field {
Directory,
File,
Command,
Arguments,
Output,
}
const FIELDS: &[&str] = &["directory", "file", "command", "arguments", "output"];
impl<'de> Deserialize<'de> for Field {
fn deserialize<D>(deserializer: D) -> Result<Field, D::Error>
where
D: Deserializer<'de>,
{
struct FieldVisitor;
impl Visitor<'_> for FieldVisitor {
type Value = Field;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter
.write_str("`directory`, `file`, `command`, `arguments`, or `output`")
}
fn visit_str<E>(self, value: &str) -> Result<Field, E>
where
E: de::Error,
{
match value {
"directory" => Ok(Field::Directory),
"file" => Ok(Field::File),
"command" => Ok(Field::Command),
"arguments" => Ok(Field::Arguments),
"output" => Ok(Field::Output),
_ => Err(de::Error::unknown_field(value, FIELDS)),
}
}
}
deserializer.deserialize_identifier(FieldVisitor)
}
}
struct EntryVisitor;
impl<'de> Visitor<'de> for EntryVisitor {
type Value = Entry;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str("struct Entry")
}
fn visit_map<V>(self, mut map: V) -> Result<Entry, V::Error>
where
V: MapAccess<'de>,
{
let mut directory: Option<path::PathBuf> = None;
let mut file: Option<path::PathBuf> = None;
let mut command: Option<String> = None;
let mut arguments: Option<Vec<String>> = None;
let mut output: Option<path::PathBuf> = None;
while let Some(key) = map.next_key()? {
match key {
Field::Directory => {
if directory.is_some() {
return Err(de::Error::duplicate_field("directory"));
}
directory = Some(map.next_value()?);
}
Field::File => {
if file.is_some() {
return Err(de::Error::duplicate_field("file"));
}
file = Some(map.next_value()?);
}
Field::Command => {
if command.is_some() {
return Err(de::Error::duplicate_field("command"));
}
command = Some(map.next_value()?);
}
Field::Arguments => {
if arguments.is_some() {
return Err(de::Error::duplicate_field("arguments"));
}
arguments = Some(map.next_value()?);
}
Field::Output => {
if output.is_some() {
return Err(de::Error::duplicate_field("output"));
}
output = Some(map.next_value()?);
}
}
}
let directory = directory.ok_or_else(|| de::Error::missing_field("directory"))?;
let file = file.ok_or_else(|| de::Error::missing_field("file"))?;
if arguments.is_some() && command.is_some() {
return Err(de::Error::custom(
"Either `command` or `arguments` field need to be specified, but not both.",
));
}
let arguments = arguments.map_or_else(
|| {
command
.ok_or_else(|| de::Error::missing_field("`command` or `arguments`"))
.and_then(|cmd| {
shell_words::split(cmd.as_str()).map_err(|_| {
de::Error::invalid_value(
de::Unexpected::Str(cmd.as_str()),
&"quotes needs to be matched",
)
})
})
},
Ok,
)?;
Ok(Entry {
directory,
file,
arguments,
output,
})
}
}
deserializer.deserialize_struct("Entry", FIELDS, EntryVisitor)
}
}

View File

@@ -1,60 +0,0 @@
// SPDX-License-Identifier: GPL-3.0-or-later
//! Implements serialization of the `Entry` struct.
use serde::ser::{Serialize, SerializeStruct, Serializer};
use super::Entry;
impl Serialize for Entry {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let size = if self.output.is_some() { 4 } else { 3 };
let mut state = serializer.serialize_struct("Entry", size)?;
state.serialize_field("directory", &self.directory)?;
state.serialize_field("file", &self.file)?;
state.serialize_field("arguments", &self.arguments)?;
if self.output.is_some() {
state.serialize_field("output", &self.output)?;
}
state.end()
}
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct EntryWithCommand {
pub file: std::path::PathBuf,
pub command: String,
pub directory: std::path::PathBuf,
pub output: Option<std::path::PathBuf>,
}
impl From<Entry> for EntryWithCommand {
fn from(entry: Entry) -> Self {
Self {
file: entry.file,
command: shell_words::join(&entry.arguments),
directory: entry.directory,
output: entry.output,
}
}
}
impl Serialize for EntryWithCommand {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let size = if self.output.is_some() { 4 } else { 3 };
let mut state = serializer.serialize_struct("Entry", size)?;
state.serialize_field("directory", &self.directory)?;
state.serialize_field("file", &self.file)?;
state.serialize_field("command", &self.command)?;
if self.output.is_some() {
state.serialize_field("output", &self.output)?;
}
state.end()
}
}

View File

@@ -1,461 +0,0 @@
// SPDX-License-Identifier: GPL-3.0-or-later
use std::hash::Hash;
use std::path::Path;
use crate::config;
use crate::output::clang::Entry;
use builder::create_hash;
use builder::EntryPredicateBuilder as Builder;
/// A predicate that can be used to filter compilation database entries.
///
/// If the predicate returns `true`, the entry is included in the result set.
/// If the predicate returns `false`, the entry is excluded from the result set.
pub type EntryPredicate = Box<dyn FnMut(&Entry) -> bool>;
impl From<&config::SourceFilter> for EntryPredicate {
/// Create a filter from the configuration.
fn from(config: &config::SourceFilter) -> Self {
let source_exist_check = Builder::filter_by_source_existence(config.only_existing_files);
let mut source_path_checks = Builder::new();
for config::DirectoryFilter { path, ignore } in &config.paths {
let filter = Builder::filter_by_source_path(path);
match ignore {
config::Ignore::Always => {
source_path_checks = source_path_checks & !filter;
}
config::Ignore::Never => {
source_path_checks = source_path_checks & filter;
}
}
}
(source_exist_check & source_path_checks).build()
}
}
impl From<&config::DuplicateFilter> for EntryPredicate {
/// Create a filter from the configuration.
fn from(config: &config::DuplicateFilter) -> Self {
let hash_function = create_hash(&config.by_fields);
Builder::filter_duplicate_entries(hash_function).build()
}
}
mod builder {
use super::*;
use std::collections::HashSet;
use std::hash::{DefaultHasher, Hasher};
/// Represents a builder object that can be used to construct an entry predicate.
pub(super) struct EntryPredicateBuilder {
candidate: Option<EntryPredicate>,
}
impl EntryPredicateBuilder {
/// Creates an entry predicate from the builder.
pub(super) fn build(self) -> EntryPredicate {
match self.candidate {
Some(predicate) => predicate,
None => Box::new(|_: &Entry| true),
}
}
/// Construct a predicate builder that is empty.
#[inline]
pub(crate) fn new() -> Self {
Self { candidate: None }
}
/// Construct a predicate builder that implements a predicate.
#[inline]
fn from<P>(predicate: P) -> Self
where
P: FnMut(&Entry) -> bool + 'static,
{
Self {
candidate: Some(Box::new(predicate)),
}
}
/// Create a predicate that filters out entries
/// that are not using any of the given source paths.
pub(super) fn filter_by_source_path(path: &Path) -> Self {
let owned_path = path.to_owned();
Self::from(move |entry| entry.file.starts_with(owned_path.clone()))
}
/// Create a predicate that filters out entries
/// that source file does not exist.
pub(super) fn filter_by_source_existence(only_existing: bool) -> Self {
if only_existing {
Self::from(|entry| entry.file.is_file())
} else {
Self::new()
}
}
/// Create a predicate that filters out entries
/// that are already in the compilation database based on their hash.
pub(super) fn filter_duplicate_entries(
hash_function: impl Fn(&Entry) -> u64 + 'static,
) -> Self {
let mut have_seen = HashSet::new();
Self::from(move |entry| {
let hash = hash_function(entry);
if !have_seen.contains(&hash) {
have_seen.insert(hash);
true
} else {
false
}
})
}
}
/// Implement the AND operator for combining predicates.
impl std::ops::BitAnd for EntryPredicateBuilder {
type Output = EntryPredicateBuilder;
fn bitand(self, rhs: Self) -> Self::Output {
match (self.candidate, rhs.candidate) {
(None, None) => EntryPredicateBuilder::new(),
(None, some) => EntryPredicateBuilder { candidate: some },
(some, None) => EntryPredicateBuilder { candidate: some },
(Some(mut lhs), Some(mut rhs)) => EntryPredicateBuilder::from(move |entry| {
let result = lhs(entry);
if result {
rhs(entry)
} else {
result
}
}),
}
}
}
/// Implement the NOT operator for combining predicates.
impl std::ops::Not for EntryPredicateBuilder {
type Output = EntryPredicateBuilder;
fn not(self) -> Self::Output {
match self.candidate {
Some(mut original) => Self::from(move |entry| {
let result = original(entry);
!result
}),
None => Self::new(),
}
}
}
/// Create a hash function that is using the given fields to calculate the hash of an entry.
pub(super) fn create_hash(fields: &[config::OutputFields]) -> impl Fn(&Entry) -> u64 + 'static {
let owned_fields: Vec<config::OutputFields> = fields.to_vec();
move |entry: &Entry| {
let mut hasher = DefaultHasher::new();
for field in &owned_fields {
match field {
config::OutputFields::Directory => entry.directory.hash(&mut hasher),
config::OutputFields::File => entry.file.hash(&mut hasher),
config::OutputFields::Arguments => entry.arguments.hash(&mut hasher),
config::OutputFields::Output => entry.output.hash(&mut hasher),
}
}
hasher.finish()
}
}
#[cfg(test)]
mod sources_test {
use super::*;
use crate::vec_of_strings;
use std::path::PathBuf;
#[test]
fn test_filter_by_source_paths() {
let input: Vec<Entry> = vec![
Entry {
file: PathBuf::from("/home/user/project/source/source.c"),
arguments: vec_of_strings!["cc", "-c", "source.c"],
directory: PathBuf::from("/home/user/project"),
output: None,
},
Entry {
file: PathBuf::from("/home/user/project/test/source.c"),
arguments: vec_of_strings!["cc", "-c", "test.c"],
directory: PathBuf::from("/home/user/project"),
output: None,
},
];
let expected: Vec<Entry> = vec![input[0].clone()];
let config = config::SourceFilter {
only_existing_files: false,
paths: vec![
config::DirectoryFilter {
path: PathBuf::from("/home/user/project/source"),
ignore: config::Ignore::Never,
},
config::DirectoryFilter {
path: PathBuf::from("/home/user/project/test"),
ignore: config::Ignore::Always,
},
],
};
let sut: EntryPredicate = From::from(&config);
let result: Vec<Entry> = input.into_iter().filter(sut).collect();
assert_eq!(result, expected);
}
}
#[cfg(test)]
mod existence_test {
use super::*;
use crate::vec_of_strings;
use std::hash::{Hash, Hasher};
use std::path::PathBuf;
#[test]
fn test_duplicate_detection_works() {
let input: Vec<Entry> = vec![
Entry {
file: PathBuf::from("/home/user/project/source.c"),
arguments: vec_of_strings!["cc", "-c", "source.c"],
directory: PathBuf::from("/home/user/project"),
output: Some(PathBuf::from("/home/user/project/source.o")),
},
Entry {
file: PathBuf::from("/home/user/project/source.c"),
arguments: vec_of_strings!["cc", "-c", "-Wall", "source.c"],
directory: PathBuf::from("/home/user/project"),
output: Some(PathBuf::from("/home/user/project/source.o")),
},
Entry {
file: PathBuf::from("/home/user/project/source.c"),
arguments: vec_of_strings!["cc", "-c", "source.c", "-o", "test.o"],
directory: PathBuf::from("/home/user/project"),
output: Some(PathBuf::from("/home/user/project/test.o")),
},
];
let expected: Vec<Entry> = vec![input[0].clone(), input[2].clone()];
let hash_function = |entry: &Entry| {
let mut hasher = DefaultHasher::new();
entry.file.hash(&mut hasher);
entry.output.hash(&mut hasher);
hasher.finish()
};
let sut: EntryPredicate =
EntryPredicateBuilder::filter_duplicate_entries(hash_function).build();
let result: Vec<Entry> = input.into_iter().filter(sut).collect();
assert_eq!(result, expected);
}
}
#[cfg(test)]
mod create_hash_tests {
use super::*;
use crate::vec_of_strings;
use std::path::PathBuf;
#[test]
fn test_create_hash_with_directory_field() {
let entry = create_test_entry();
let fields = vec![config::OutputFields::Directory];
let hash_function = create_hash(&fields);
let hash = hash_function(&entry);
let mut hasher = DefaultHasher::new();
entry.directory.hash(&mut hasher);
let expected_hash = hasher.finish();
assert_eq!(hash, expected_hash);
}
#[test]
fn test_create_hash_with_file_field() {
let entry = create_test_entry();
let fields = vec![config::OutputFields::File];
let hash_function = create_hash(&fields);
let hash = hash_function(&entry);
let mut hasher = DefaultHasher::new();
entry.file.hash(&mut hasher);
let expected_hash = hasher.finish();
assert_eq!(hash, expected_hash);
}
#[test]
fn test_create_hash_with_arguments_field() {
let entry = create_test_entry();
let fields = vec![config::OutputFields::Arguments];
let hash_function = create_hash(&fields);
let hash = hash_function(&entry);
let mut hasher = DefaultHasher::new();
entry.arguments.hash(&mut hasher);
let expected_hash = hasher.finish();
assert_eq!(hash, expected_hash);
}
#[test]
fn test_create_hash_with_output_field() {
let entry = create_test_entry();
let fields = vec![config::OutputFields::Output];
let hash_function = create_hash(&fields);
let hash = hash_function(&entry);
let mut hasher = DefaultHasher::new();
entry.output.hash(&mut hasher);
let expected_hash = hasher.finish();
assert_eq!(hash, expected_hash);
}
#[test]
fn test_create_hash_with_multiple_fields() {
let entry = create_test_entry();
let fields = vec![
config::OutputFields::Directory,
config::OutputFields::File,
config::OutputFields::Arguments,
config::OutputFields::Output,
];
let hash_function = create_hash(&fields);
let hash = hash_function(&entry);
let mut hasher = DefaultHasher::new();
entry.directory.hash(&mut hasher);
entry.file.hash(&mut hasher);
entry.arguments.hash(&mut hasher);
entry.output.hash(&mut hasher);
let expected_hash = hasher.finish();
assert_eq!(hash, expected_hash);
}
fn create_test_entry() -> Entry {
Entry {
file: PathBuf::from("/home/user/project/source.c"),
arguments: vec_of_strings!["cc", "-c", "source.c"],
directory: PathBuf::from("/home/user/project"),
output: Some(PathBuf::from("/home/user/project/source.o")),
}
}
}
#[cfg(test)]
mod bitand_tests {
use super::*;
use crate::vec_of_strings;
use std::path::PathBuf;
#[test]
fn test_bitand_both_predicates_true() {
let input = create_test_entries();
let predicate1 = EntryPredicateBuilder::from(|_: &Entry| true);
let predicate2 = EntryPredicateBuilder::from(|_: &Entry| true);
let combined_predicate = (predicate1 & predicate2).build();
let result: Vec<Entry> = input.into_iter().filter(combined_predicate).collect();
assert_eq!(result.len(), 1);
}
#[test]
fn test_bitand_first_predicate_false() {
let input = create_test_entries();
let predicate1 = EntryPredicateBuilder::from(|_: &Entry| false);
let predicate2 = EntryPredicateBuilder::from(|_: &Entry| true);
let combined_predicate = (predicate1 & predicate2).build();
let result: Vec<Entry> = input.into_iter().filter(combined_predicate).collect();
assert_eq!(result.len(), 0);
}
#[test]
fn test_bitand_second_predicate_false() {
let input = create_test_entries();
let predicate1 = EntryPredicateBuilder::from(|_: &Entry| true);
let predicate2 = EntryPredicateBuilder::from(|_: &Entry| false);
let combined_predicate = (predicate1 & predicate2).build();
let result: Vec<Entry> = input.into_iter().filter(combined_predicate).collect();
assert_eq!(result.len(), 0);
}
#[test]
fn test_bitand_both_predicates_false() {
let input = create_test_entries();
let predicate1 = EntryPredicateBuilder::from(|_: &Entry| false);
let predicate2 = EntryPredicateBuilder::from(|_: &Entry| false);
let combined_predicate = (predicate1 & predicate2).build();
let result: Vec<Entry> = input.into_iter().filter(combined_predicate).collect();
assert_eq!(result.len(), 0);
}
fn create_test_entries() -> Vec<Entry> {
vec![Entry {
file: PathBuf::from("/home/user/project/source/source.c"),
arguments: vec_of_strings!["cc", "-c", "source.c"],
directory: PathBuf::from("/home/user/project"),
output: None,
}]
}
}
#[cfg(test)]
mod not_tests {
use super::*;
use crate::vec_of_strings;
use std::path::PathBuf;
#[test]
fn test_not_predicate_true() {
let input = create_test_entries();
let predicate = EntryPredicateBuilder::from(|_: &Entry| true);
let not_predicate = (!predicate).build();
let result: Vec<Entry> = input.into_iter().filter(not_predicate).collect();
assert_eq!(result.len(), 0);
}
#[test]
fn test_not_predicate_false() {
let input = create_test_entries();
let predicate = EntryPredicateBuilder::from(|_: &Entry| false);
let not_predicate = (!predicate).build();
let result: Vec<Entry> = input.into_iter().filter(not_predicate).collect();
assert_eq!(result.len(), 1);
}
fn create_test_entries() -> Vec<Entry> {
vec![Entry {
file: PathBuf::from("/home/user/project/source/source.c"),
arguments: vec_of_strings!["cc", "-c", "source.c"],
directory: PathBuf::from("/home/user/project"),
output: None,
}]
}
}
}

View File

@@ -1,270 +0,0 @@
// SPDX-License-Identifier: GPL-3.0-or-later
use crate::output::clang::Entry;
use crate::{config, semantic};
use anyhow::anyhow;
use path_absolutize::Absolutize;
use std::borrow::Cow;
use std::io;
use std::path::{Path, PathBuf};
pub struct EntryFormatter {
drop_output_field: bool,
path_format: config::PathFormat,
}
impl From<&config::Format> for EntryFormatter {
/// Create a formatter from the configuration.
fn from(config: &config::Format) -> Self {
let drop_output_field = config.drop_output_field;
let path_format = config.paths_as.clone();
Self {
drop_output_field,
path_format,
}
}
}
impl EntryFormatter {
/// Convert the compiler calls into entries.
///
/// The conversion is done by converting the compiler passes into entries.
/// Errors are logged and ignored. The entries format is controlled by the configuration.
pub(crate) fn apply(&self, compiler_call: semantic::CompilerCall) -> Vec<Entry> {
let semantic::CompilerCall {
compiler,
working_dir,
passes,
} = compiler_call;
passes
.into_iter()
.map(|pass| self.try_convert_from_pass(&working_dir, &compiler, pass))
// We are here to log the error.
.map(|result| result.map_err(|error| log::info!("{}", error)))
.filter_map(Result::ok)
.collect()
}
/// Creates a single entry from a compiler pass if possible.
///
/// The preprocess pass is ignored, and the compile pass is converted into an entry.
///
/// The file and directory paths are converted into fully qualified paths when required.
fn try_convert_from_pass(
&self,
working_dir: &Path,
compiler: &Path,
pass: semantic::CompilerPass,
) -> anyhow::Result<Entry> {
match pass {
semantic::CompilerPass::Preprocess => {
Err(anyhow!("preprocess pass should not show up in results"))
}
semantic::CompilerPass::Compile {
source,
output,
flags,
} => {
let output_clone = output.clone();
let output_result = match output.filter(|_| !self.drop_output_field) {
None => None,
Some(candidate) => {
let x = self.format_path(candidate.as_path(), working_dir)?;
Some(PathBuf::from(x))
}
};
Ok(Entry {
file: PathBuf::from(self.format_path(source.as_path(), working_dir)?),
directory: working_dir.to_path_buf(),
output: output_result,
arguments: Self::format_arguments(compiler, &source, &flags, output_clone)?,
})
}
}
}
/// Reconstruct the arguments for the compiler call.
///
/// It is not the same as the command line arguments, because the compiler call is
/// decomposed into a separate lists of arguments. To assemble from the parts will
/// not necessarily result in the same command line arguments. One example for that
/// is the multiple source files are treated as separate compiler calls. Another
/// thing that can change is the order of the arguments.
fn format_arguments(
compiler: &Path,
source: &Path,
flags: &[String],
output: Option<PathBuf>,
) -> anyhow::Result<Vec<String>, anyhow::Error> {
let mut arguments: Vec<String> = vec![];
// Assemble the arguments as it would be for a single source file.
arguments.push(into_string(compiler)?);
for flag in flags {
arguments.push(flag.clone());
}
if let Some(file) = output {
arguments.push(String::from("-o"));
arguments.push(into_string(file.as_path())?)
}
arguments.push(into_string(source)?);
Ok(arguments)
}
fn format_path<'a>(&self, path: &'a Path, root: &Path) -> io::Result<Cow<'a, Path>> {
// Will compute the absolute path if needed.
let absolute = || {
if path.is_absolute() {
path.absolutize()
} else {
path.absolutize_from(root)
}
};
match self.path_format {
config::PathFormat::Original => Ok(Cow::from(path)),
config::PathFormat::Absolute => absolute(),
config::PathFormat::Canonical => absolute()?.canonicalize().map(Cow::from),
}
}
}
fn into_string(path: &Path) -> anyhow::Result<String> {
path.to_path_buf()
.into_os_string()
.into_string()
.map_err(|_| anyhow!("Path can't be encoded to UTF"))
}
#[cfg(test)]
mod test {
use super::*;
use crate::vec_of_strings;
#[test]
fn test_non_compilations() {
let input = semantic::CompilerCall {
compiler: PathBuf::from("/usr/bin/cc"),
working_dir: PathBuf::from("/home/user"),
passes: vec![semantic::CompilerPass::Preprocess],
};
let format = config::Format {
command_as_array: true,
drop_output_field: false,
paths_as: config::PathFormat::Original,
};
let sut: EntryFormatter = (&format).into();
let result = sut.apply(input);
let expected: Vec<Entry> = vec![];
assert_eq!(expected, result);
}
#[test]
fn test_single_source_compilation() {
let input = semantic::CompilerCall {
compiler: PathBuf::from("/usr/bin/clang"),
working_dir: PathBuf::from("/home/user"),
passes: vec![semantic::CompilerPass::Compile {
source: PathBuf::from("source.c"),
output: Some(PathBuf::from("source.o")),
flags: vec_of_strings!["-Wall"],
}],
};
let format = config::Format {
command_as_array: true,
drop_output_field: false,
paths_as: config::PathFormat::Original,
};
let sut: EntryFormatter = (&format).into();
let result = sut.apply(input);
let expected = vec![Entry {
directory: PathBuf::from("/home/user"),
file: PathBuf::from("source.c"),
arguments: vec_of_strings!["/usr/bin/clang", "-Wall", "-o", "source.o", "source.c"],
output: Some(PathBuf::from("source.o")),
}];
assert_eq!(expected, result);
}
#[test]
fn test_multiple_sources_compilation() {
let input = compiler_call_with_multiple_passes();
let format = config::Format {
command_as_array: true,
drop_output_field: true,
paths_as: config::PathFormat::Original,
};
let sut: EntryFormatter = (&format).into();
let result = sut.apply(input);
let expected = vec![
Entry {
directory: PathBuf::from("/home/user"),
file: PathBuf::from("/tmp/source1.c"),
arguments: vec_of_strings!["clang", "-o", "./source1.o", "/tmp/source1.c"],
output: None,
},
Entry {
directory: PathBuf::from("/home/user"),
file: PathBuf::from("../source2.c"),
arguments: vec_of_strings!["clang", "-Wall", "../source2.c"],
output: None,
},
];
assert_eq!(expected, result);
}
#[test]
fn test_multiple_sources_compilation_with_abs_paths() {
let input = compiler_call_with_multiple_passes();
let format = config::Format {
command_as_array: true,
drop_output_field: true,
paths_as: config::PathFormat::Absolute,
};
let sut: EntryFormatter = (&format).into();
let result = sut.apply(input);
let expected = vec![
Entry {
directory: PathBuf::from("/home/user"),
file: PathBuf::from("/tmp/source1.c"),
arguments: vec_of_strings!["clang", "-o", "./source1.o", "/tmp/source1.c"],
output: None,
},
Entry {
directory: PathBuf::from("/home/user"),
file: PathBuf::from("/home/source2.c"),
arguments: vec_of_strings!["clang", "-Wall", "../source2.c"],
output: None,
},
];
assert_eq!(expected, result);
}
fn compiler_call_with_multiple_passes() -> semantic::CompilerCall {
semantic::CompilerCall {
compiler: PathBuf::from("clang"),
working_dir: PathBuf::from("/home/user"),
passes: vec![
semantic::CompilerPass::Preprocess,
semantic::CompilerPass::Compile {
source: PathBuf::from("/tmp/source1.c"),
output: Some(PathBuf::from("./source1.o")),
flags: vec_of_strings![],
},
semantic::CompilerPass::Compile {
source: PathBuf::from("../source2.c"),
output: None,
flags: vec_of_strings!["-Wall"],
},
],
}
}
}

View File

@@ -1,15 +0,0 @@
// SPDX-License-Identifier: GPL-3.0-or-later
use super::semantic;
use anyhow::Result;
pub mod clang;
pub mod filter;
pub mod formatter;
/// The output writer trait is responsible for writing output file.
pub(crate) trait OutputWriter {
/// Running the writer means to consume the compiler calls
/// and write the entries to the output file.
fn run(&self, _: impl Iterator<Item = semantic::CompilerCall>) -> Result<()>;
}

View File

@@ -1,147 +0,0 @@
// SPDX-License-Identifier: GPL-3.0-or-later
use super::super::{CompilerCall, Execution, Interpreter, Recognition};
/// Represents a set of interpreters, where any of them can recognize the semantic.
/// The evaluation is done in the order of the interpreters. The first one which
/// recognizes the semantic will be returned as result.
pub(super) struct Any {
interpreters: Vec<Box<dyn Interpreter>>,
}
impl Any {
pub(super) fn new(tools: Vec<Box<dyn Interpreter>>) -> Self {
Self {
interpreters: tools,
}
}
}
impl Interpreter for Any {
fn recognize(&self, x: &Execution) -> Recognition<CompilerCall> {
for tool in &self.interpreters {
match tool.recognize(x) {
Recognition::Unknown => continue,
result => return result,
}
}
Recognition::Unknown
}
}
#[cfg(test)]
mod test {
use std::collections::HashMap;
use std::path::PathBuf;
use super::super::super::CompilerCall;
use super::*;
#[test]
fn test_any_when_no_match() {
let sut = Any {
interpreters: vec![
Box::new(MockTool::NotRecognize),
Box::new(MockTool::NotRecognize),
Box::new(MockTool::NotRecognize),
],
};
let input = any_execution();
match sut.recognize(&input) {
Recognition::Unknown => assert!(true),
_ => assert!(false),
}
}
#[test]
fn test_any_when_success() {
let sut = Any {
interpreters: vec![
Box::new(MockTool::NotRecognize),
Box::new(MockTool::Recognize),
Box::new(MockTool::NotRecognize),
],
};
let input = any_execution();
match sut.recognize(&input) {
Recognition::Success(_) => assert!(true),
_ => assert!(false),
}
}
#[test]
fn test_any_when_ignored() {
let sut = Any {
interpreters: vec![
Box::new(MockTool::NotRecognize),
Box::new(MockTool::RecognizeIgnored),
Box::new(MockTool::Recognize),
],
};
let input = any_execution();
match sut.recognize(&input) {
Recognition::Ignored => assert!(true),
_ => assert!(false),
}
}
#[test]
fn test_any_when_match_fails() {
let sut = Any {
interpreters: vec![
Box::new(MockTool::NotRecognize),
Box::new(MockTool::RecognizeFailed),
Box::new(MockTool::Recognize),
Box::new(MockTool::NotRecognize),
],
};
let input = any_execution();
match sut.recognize(&input) {
Recognition::Error(_) => assert!(true),
_ => assert!(false),
}
}
enum MockTool {
Recognize,
RecognizeIgnored,
RecognizeFailed,
NotRecognize,
}
impl Interpreter for MockTool {
fn recognize(&self, _: &Execution) -> Recognition<CompilerCall> {
match self {
MockTool::Recognize => Recognition::Success(any_compiler_call()),
MockTool::RecognizeIgnored => Recognition::Ignored,
MockTool::RecognizeFailed => Recognition::Error(String::from("problem")),
MockTool::NotRecognize => Recognition::Unknown,
}
}
}
fn any_execution() -> Execution {
Execution {
executable: PathBuf::new(),
arguments: vec![],
working_dir: PathBuf::new(),
environment: HashMap::new(),
}
}
fn any_compiler_call() -> CompilerCall {
CompilerCall {
compiler: PathBuf::new(),
working_dir: PathBuf::new(),
passes: vec![],
}
}
}

View File

@@ -1,209 +0,0 @@
// SPDX-License-Identifier: GPL-3.0-or-later
use nom::branch::alt;
use nom::multi::many1;
use nom::sequence::preceded;
use super::super::{CompilerCall, Execution, Interpreter, Recognition};
use internal::Argument;
pub(super) struct Gcc {}
impl Gcc {
pub(super) fn new() -> Self {
Gcc {}
}
}
impl Interpreter for Gcc {
fn recognize(&self, execution: &Execution) -> Recognition<CompilerCall> {
let mut parser = preceded(
internal::compiler,
many1(alt((internal::flag, internal::source))),
);
match parser(execution.arguments.as_slice()) {
Ok(result) => {
// TODO: append flags from environment
let flags = result.1;
let passes = Argument::passes(flags.as_slice());
Recognition::Success(CompilerCall {
compiler: execution.executable.clone(),
working_dir: execution.working_dir.clone(),
passes,
})
}
Err(error) => {
log::debug!("Gcc failed to parse it: {error}.");
Recognition::Unknown
}
}
}
}
mod internal {
use nom::{error::ErrorKind, IResult};
use regex::Regex;
use std::path::PathBuf;
use super::super::super::CompilerPass;
use super::super::matchers::source::looks_like_a_source_file;
#[derive(Debug, PartialEq)]
enum Language {
C,
Cpp,
ObjectiveC,
ObjectiveCpp,
Ada,
Fortran,
Go,
D,
Assembler,
Other,
}
#[derive(Debug, PartialEq)]
enum Pass {
Preprocessor,
Compiler,
Linker,
}
#[derive(Debug, PartialEq)]
enum Meaning {
Compiler,
ControlKindOfOutput { stop_before: Option<Pass> },
ControlLanguage(Language),
ControlPass(Pass),
Diagnostic,
Debug,
Optimize,
Instrumentation,
DirectorySearch(Option<Pass>),
Developer,
Input(Pass),
Output,
}
/// Compiler flags are varies the number of arguments, but means one thing.
pub(super) struct Argument<'a> {
arguments: &'a [String],
meaning: Meaning,
}
impl Argument<'_> {
pub(super) fn passes(flags: &[Argument]) -> Vec<CompilerPass> {
let mut pass: Pass = Pass::Linker;
let mut inputs: Vec<String> = vec![];
let mut output: Option<String> = None;
let mut args: Vec<String> = vec![];
for flag in flags {
match flag.meaning {
Meaning::ControlKindOfOutput {
stop_before: Some(Pass::Compiler),
} => {
pass = Pass::Preprocessor;
args.extend(flag.arguments.iter().map(String::to_owned));
}
Meaning::ControlKindOfOutput {
stop_before: Some(Pass::Linker),
} => {
pass = Pass::Compiler;
args.extend(flag.arguments.iter().map(String::to_owned));
}
Meaning::ControlKindOfOutput { .. }
| Meaning::ControlLanguage(_)
| Meaning::ControlPass(Pass::Preprocessor)
| Meaning::ControlPass(Pass::Compiler)
| Meaning::Diagnostic
| Meaning::Debug
| Meaning::Optimize
| Meaning::Instrumentation
| Meaning::DirectorySearch(None) => {
args.extend(flag.arguments.iter().map(String::to_owned));
}
Meaning::Input(_) => {
assert_eq!(flag.arguments.len(), 1);
inputs.push(flag.arguments[0].clone())
}
Meaning::Output => {
assert_eq!(flag.arguments.len(), 1);
output = Some(flag.arguments[0].clone())
}
_ => {}
}
}
match pass {
Pass::Preprocessor if inputs.is_empty() => {
vec![]
}
Pass::Preprocessor => {
vec![CompilerPass::Preprocess]
}
Pass::Compiler | Pass::Linker => inputs
.into_iter()
.map(|source| CompilerPass::Compile {
source: PathBuf::from(source),
output: output.as_ref().map(PathBuf::from),
flags: args.clone(),
})
.collect(),
}
}
}
pub(super) fn compiler(i: &[String]) -> IResult<&[String], Argument> {
let candidate = &i[0];
if COMPILER_REGEX.is_match(candidate) {
const MEANING: Meaning = Meaning::Compiler;
Ok((
&i[1..],
Argument {
arguments: &i[..0],
meaning: MEANING,
},
))
} else {
// Declare it as a non-recoverable error, so argument processing will stop after this.
Err(nom::Err::Failure(nom::error::Error::new(i, ErrorKind::Tag)))
}
}
pub(super) fn source(i: &[String]) -> IResult<&[String], Argument> {
let candidate = &i[0];
if looks_like_a_source_file(candidate.as_str()) {
const MEANING: Meaning = Meaning::Input(Pass::Preprocessor);
Ok((
&i[1..],
Argument {
arguments: &i[..0],
meaning: MEANING,
},
))
} else {
Err(nom::Err::Error(nom::error::Error::new(i, ErrorKind::Tag)))
}
}
pub(super) fn flag(_i: &[String]) -> IResult<&[String], Argument> {
todo!()
}
static COMPILER_REGEX: std::sync::LazyLock<Regex> = std::sync::LazyLock::new(|| {
// - cc
// - c++
// - cxx
// - CC
// - mcc, gcc, m++, g++, gfortran, fortran
// - with prefixes like: arm-none-eabi-
// - with postfixes like: -7.0 or 6.4.0
Regex::new(
r"(^(cc|c\+\+|cxx|CC|(([^-]*-)*([mg](cc|\+\+)|[g]?fortran)(-?\d+(\.\d+){0,2})?))$)",
)
.unwrap()
});
}

View File

@@ -1,130 +0,0 @@
// SPDX-License-Identifier: GPL-3.0-or-later
use std::collections::HashSet;
use std::path::PathBuf;
use std::vec;
use super::super::{CompilerCall, CompilerPass, Execution, Interpreter, Recognition};
use super::matchers::source::looks_like_a_source_file;
/// A tool to recognize a compiler by executable name.
pub(super) struct Generic {
executables: HashSet<PathBuf>,
}
impl Generic {
pub(super) fn from(compilers: &[PathBuf]) -> Self {
let executables = compilers.iter().cloned().collect();
Self { executables }
}
}
impl Interpreter for Generic {
/// This tool is a naive implementation only considering:
/// - the executable name,
/// - one of the arguments is a source file,
/// - the rest of the arguments are flags.
fn recognize(&self, x: &Execution) -> Recognition<CompilerCall> {
if self.executables.contains(&x.executable) {
let mut flags = vec![];
let mut sources = vec![];
// find sources and filter out requested flags.
for argument in x.arguments.iter().skip(1) {
if looks_like_a_source_file(argument.as_str()) {
sources.push(PathBuf::from(argument));
} else {
flags.push(argument.clone());
}
}
if sources.is_empty() {
Recognition::Error(String::from("source file is not found"))
} else {
Recognition::Success(CompilerCall {
compiler: x.executable.clone(),
working_dir: x.working_dir.clone(),
passes: sources
.iter()
.map(|source| CompilerPass::Compile {
source: source.clone(),
output: None,
flags: flags.clone(),
})
.collect(),
})
}
} else {
Recognition::Unknown
}
}
}
#[cfg(test)]
mod test {
use std::collections::HashMap;
use crate::{vec_of_pathbuf, vec_of_strings};
use super::*;
#[test]
fn test_matching() {
let input = Execution {
executable: PathBuf::from("/usr/bin/something"),
arguments: vec_of_strings![
"something",
"-Dthis=that",
"-I.",
"source.c",
"-o",
"source.c.o"
],
working_dir: PathBuf::from("/home/user"),
environment: HashMap::new(),
};
let expected = CompilerCall {
compiler: PathBuf::from("/usr/bin/something"),
working_dir: PathBuf::from("/home/user"),
passes: vec![CompilerPass::Compile {
flags: vec_of_strings!["-Dthis=that", "-I.", "-o", "source.c.o"],
source: PathBuf::from("source.c"),
output: None,
}],
};
assert_eq!(Recognition::Success(expected), SUT.recognize(&input));
}
#[test]
fn test_matching_without_sources() {
let input = Execution {
executable: PathBuf::from("/usr/bin/something"),
arguments: vec_of_strings!["something", "--help"],
working_dir: PathBuf::from("/home/user"),
environment: HashMap::new(),
};
assert_eq!(
Recognition::Error(String::from("source file is not found")),
SUT.recognize(&input)
);
}
#[test]
fn test_not_matching() {
let input = Execution {
executable: PathBuf::from("/usr/bin/cc"),
arguments: vec_of_strings!["cc", "-Dthis=that", "-I.", "source.c", "-o", "source.c.o"],
working_dir: PathBuf::from("/home/user"),
environment: HashMap::new(),
};
assert_eq!(Recognition::Unknown, SUT.recognize(&input));
}
static SUT: std::sync::LazyLock<Generic> = std::sync::LazyLock::new(|| Generic {
executables: vec_of_pathbuf!["/usr/bin/something"].into_iter().collect(),
});
}

View File

@@ -1,185 +0,0 @@
// SPDX-License-Identifier: GPL-3.0-or-later
use std::collections::HashSet;
use std::path::PathBuf;
use super::super::{CompilerCall, Execution, Interpreter, Recognition};
/// A tool to ignore a command execution by executable name.
pub(super) struct IgnoreByPath {
executables: HashSet<PathBuf>,
}
impl IgnoreByPath {
pub(super) fn new() -> Self {
let executables = COREUTILS_FILES.iter().map(PathBuf::from).collect();
Self { executables }
}
pub(super) fn from(compilers: &[PathBuf]) -> Self {
let executables = compilers.iter().cloned().collect();
Self { executables }
}
}
impl Default for IgnoreByPath {
fn default() -> Self {
Self::new()
}
}
/// A tool to ignore a command execution by arguments.
impl Interpreter for IgnoreByPath {
fn recognize(&self, execution: &Execution) -> Recognition<CompilerCall> {
if self.executables.contains(&execution.executable) {
Recognition::Ignored
} else {
Recognition::Unknown
}
}
}
static COREUTILS_FILES: [&str; 106] = [
"/usr/bin/[",
"/usr/bin/arch",
"/usr/bin/b2sum",
"/usr/bin/base32",
"/usr/bin/base64",
"/usr/bin/basename",
"/usr/bin/basenc",
"/usr/bin/cat",
"/usr/bin/chcon",
"/usr/bin/chgrp",
"/usr/bin/chmod",
"/usr/bin/chown",
"/usr/bin/cksum",
"/usr/bin/comm",
"/usr/bin/cp",
"/usr/bin/csplit",
"/usr/bin/cut",
"/usr/bin/date",
"/usr/bin/dd",
"/usr/bin/df",
"/usr/bin/dir",
"/usr/bin/dircolors",
"/usr/bin/dirname",
"/usr/bin/du",
"/usr/bin/echo",
"/usr/bin/env",
"/usr/bin/expand",
"/usr/bin/expr",
"/usr/bin/factor",
"/usr/bin/false",
"/usr/bin/fmt",
"/usr/bin/fold",
"/usr/bin/groups",
"/usr/bin/head",
"/usr/bin/hostid",
"/usr/bin/id",
"/usr/bin/install",
"/usr/bin/join",
"/usr/bin/link",
"/usr/bin/ln",
"/usr/bin/logname",
"/usr/bin/ls",
"/usr/bin/md5sum",
"/usr/bin/mkdir",
"/usr/bin/mkfifo",
"/usr/bin/mknod",
"/usr/bin/mktemp",
"/usr/bin/mv",
"/usr/bin/nice",
"/usr/bin/nl",
"/usr/bin/nohup",
"/usr/bin/nproc",
"/usr/bin/numfmt",
"/usr/bin/od",
"/usr/bin/paste",
"/usr/bin/pathchk",
"/usr/bin/pinky",
"/usr/bin/pr",
"/usr/bin/printenv",
"/usr/bin/printf",
"/usr/bin/ptx",
"/usr/bin/pwd",
"/usr/bin/readlink",
"/usr/bin/realpath",
"/usr/bin/rm",
"/usr/bin/rmdir",
"/usr/bin/runcon",
"/usr/bin/seq",
"/usr/bin/sha1sum",
"/usr/bin/sha224sum",
"/usr/bin/sha256sum",
"/usr/bin/sha384sum",
"/usr/bin/sha512sum",
"/usr/bin/shred",
"/usr/bin/shuf",
"/usr/bin/sleep",
"/usr/bin/sort",
"/usr/bin/split",
"/usr/bin/stat",
"/usr/bin/stdbuf",
"/usr/bin/stty",
"/usr/bin/sum",
"/usr/bin/sync",
"/usr/bin/tac",
"/usr/bin/tail",
"/usr/bin/tee",
"/usr/bin/test",
"/usr/bin/timeout",
"/usr/bin/touch",
"/usr/bin/tr",
"/usr/bin/true",
"/usr/bin/truncate",
"/usr/bin/tsort",
"/usr/bin/tty",
"/usr/bin/uname",
"/usr/bin/unexpand",
"/usr/bin/uniq",
"/usr/bin/unlink",
"/usr/bin/users",
"/usr/bin/vdir",
"/usr/bin/wc",
"/usr/bin/who",
"/usr/bin/whoami",
"/usr/bin/yes",
"/usr/bin/make",
"/usr/bin/gmake",
];
#[cfg(test)]
mod test {
use std::collections::HashMap;
use std::path::PathBuf;
use crate::vec_of_strings;
use super::*;
#[test]
fn test_executions_are_ignored_by_executable_name() {
let input = Execution {
executable: PathBuf::from("/usr/bin/ls"),
arguments: vec_of_strings!["ls", "/home/user/build"],
working_dir: PathBuf::from("/home/user"),
environment: HashMap::new(),
};
let sut = IgnoreByPath::new();
assert_eq!(Recognition::Ignored, sut.recognize(&input))
}
#[test]
fn test_not_known_executables_are_not_recognized() {
let input = Execution {
executable: PathBuf::from("/usr/bin/bear"),
arguments: vec_of_strings!["bear", "--", "make"],
working_dir: PathBuf::from("/home/user"),
environment: HashMap::new(),
};
let sut = IgnoreByPath::new();
assert_eq!(Recognition::Unknown, sut.recognize(&input))
}
}

View File

@@ -1,3 +0,0 @@
// SPDX-License-Identifier: GPL-3.0-or-later
pub(super) mod source;

View File

@@ -1,82 +0,0 @@
// SPDX-License-Identifier: GPL-3.0-or-later
use std::collections::HashSet;
#[cfg(target_family = "unix")]
pub fn looks_like_a_source_file(argument: &str) -> bool {
// not a command line flag
if argument.starts_with('-') {
return false;
}
if let Some((_, extension)) = argument.rsplit_once('.') {
return EXTENSIONS.contains(extension);
}
false
}
#[cfg(target_family = "windows")]
pub fn looks_like_a_source_file(argument: &str) -> bool {
// not a command line flag
if argument.starts_with('/') {
return false;
}
if let Some((_, extension)) = argument.rsplit_once('.') {
return EXTENSIONS.contains(extension);
}
false
}
#[rustfmt::skip]
static EXTENSIONS: std::sync::LazyLock<HashSet<&'static str>> = std::sync::LazyLock::new(|| {
HashSet::from([
// header files
"h", "hh", "H", "hp", "hxx", "hpp", "HPP", "h++", "tcc",
// C
"c", "C",
// C++
"cc", "CC", "c++", "C++", "cxx", "cpp", "cp",
// CUDA
"cu",
// ObjectiveC
"m", "mi", "mm", "M", "mii",
// Preprocessed
"i", "ii",
// Assembly
"s", "S", "sx", "asm",
// Fortran
"f", "for", "ftn",
"F", "FOR", "fpp", "FPP", "FTN",
"f90", "f95", "f03", "f08",
"F90", "F95", "F03", "F08",
// go
"go",
// brig
"brig",
// D
"d", "di", "dd",
// Ada
"ads", "abd",
])
});
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_filenames() {
assert!(looks_like_a_source_file("source.c"));
assert!(looks_like_a_source_file("source.cpp"));
assert!(looks_like_a_source_file("source.cxx"));
assert!(looks_like_a_source_file("source.cc"));
assert!(looks_like_a_source_file("source.h"));
assert!(looks_like_a_source_file("source.hpp"));
assert!(!looks_like_a_source_file("gcc"));
assert!(!looks_like_a_source_file("clang"));
assert!(!looks_like_a_source_file("-o"));
assert!(!looks_like_a_source_file("-Wall"));
assert!(!looks_like_a_source_file("/o"));
}
}

View File

@@ -1,136 +0,0 @@
// SPDX-License-Identifier: GPL-3.0-or-later
use super::interpreters::combinators::Any;
use super::interpreters::generic::Generic;
use super::interpreters::ignore::IgnoreByPath;
use super::Interpreter;
use crate::config;
use std::path::PathBuf;
mod combinators;
mod gcc;
mod generic;
mod ignore;
mod matchers;
/// Creates an interpreter to recognize the compiler calls.
///
/// Using the configuration we can define which compilers to include and exclude.
/// Also read the environment variables to detect the compiler to include (and
/// make sure those are not excluded either).
// TODO: Use the CC or CXX environment variables to detect the compiler to include.
// Use the CC or CXX environment variables and make sure those are not excluded.
// Make sure the environment variables are passed to the method.
// TODO: Take environment variables as input.
pub fn create_interpreter<'a>(config: &config::Main) -> impl Interpreter + 'a {
let compilers_to_include = match &config.intercept {
config::Intercept::Wrapper { executables, .. } => executables.clone(),
_ => vec![],
};
let compilers_to_exclude = match &config.output {
config::Output::Clang { compilers, .. } => compilers
.iter()
.filter(|compiler| compiler.ignore == config::IgnoreOrConsider::Always)
.map(|compiler| compiler.path.clone())
.collect(),
_ => vec![],
};
let mut interpreters: Vec<Box<dyn Interpreter>> = vec![
// ignore executables which are not compilers,
Box::new(IgnoreByPath::default()),
// recognize default compiler
Box::new(Generic::from(&[PathBuf::from("/usr/bin/cc")])),
];
if !compilers_to_include.is_empty() {
let tool = Generic::from(&compilers_to_include);
interpreters.push(Box::new(tool));
}
if !compilers_to_exclude.is_empty() {
let tool = IgnoreByPath::from(&compilers_to_exclude);
interpreters.insert(0, Box::new(tool));
}
Any::new(interpreters)
}
#[cfg(test)]
mod test {
use std::collections::HashMap;
use std::path::PathBuf;
use super::super::{CompilerCall, Execution, Recognition};
use super::*;
use crate::config;
use crate::config::{DuplicateFilter, Format, SourceFilter};
use crate::{vec_of_pathbuf, vec_of_strings};
fn any_execution() -> Execution {
Execution {
executable: PathBuf::from("/usr/bin/cc"),
arguments: vec_of_strings!["cc", "-c", "-Wall", "main.c"],
environment: HashMap::new(),
working_dir: PathBuf::from("/home/user"),
}
}
#[test]
fn test_create_interpreter_with_default_config() {
let config = config::Main::default();
let interpreter = create_interpreter(&config);
let input = any_execution();
match interpreter.recognize(&input) {
Recognition::Success(CompilerCall { .. }) => assert!(true),
_ => assert!(false),
}
}
#[test]
fn test_create_interpreter_with_compilers_to_include() {
let config = config::Main {
intercept: config::Intercept::Wrapper {
executables: vec_of_pathbuf!["/usr/bin/cc"],
path: PathBuf::from("/usr/libexec/bear"),
directory: PathBuf::from("/tmp"),
},
..Default::default()
};
let interpreter = create_interpreter(&config);
let input = any_execution();
match interpreter.recognize(&input) {
Recognition::Success(CompilerCall { .. }) => assert!(true),
_ => assert!(false),
}
}
#[test]
fn test_create_interpreter_with_compilers_to_exclude() {
let config = config::Main {
output: config::Output::Clang {
compilers: vec![config::Compiler {
path: PathBuf::from("/usr/bin/cc"),
ignore: config::IgnoreOrConsider::Always,
arguments: config::Arguments::default(),
}],
sources: SourceFilter::default(),
duplicates: DuplicateFilter::default(),
format: Format::default(),
},
..Default::default()
};
let interpreter = create_interpreter(&config);
let input = any_execution();
match interpreter.recognize(&input) {
Recognition::Ignored => assert!(true),
_ => assert!(false),
}
}
}

View File

@@ -1,103 +0,0 @@
// SPDX-License-Identifier: GPL-3.0-or-later
//! This module is defining the semantic of executed commands.
//!
//! The semantic identifies the intent of the execution. It not only
//! recognizes the compiler calls, but also identifies the compiler
//! passes that are executed.
//!
//! A compilation of a source file can be divided into multiple passes.
//! We are interested in the compiler passes, because those are the
//! ones that are relevant to build a JSON compilation database.
pub mod interpreters;
pub mod transformation;
use super::intercept::Execution;
use serde::ser::SerializeSeq;
use serde::{Serialize, Serializer};
use std::path::PathBuf;
/// Represents an executed command semantic.
#[derive(Debug, PartialEq, Serialize)]
pub struct CompilerCall {
pub compiler: PathBuf,
pub working_dir: PathBuf,
pub passes: Vec<CompilerPass>,
}
/// Represents a compiler call pass.
#[derive(Debug, PartialEq, Serialize)]
pub enum CompilerPass {
Preprocess,
Compile {
source: PathBuf,
output: Option<PathBuf>,
flags: Vec<String>,
},
}
/// Responsible to recognize the semantic of an executed command.
///
/// The implementation can be responsible for a single compiler,
/// a set of compilers, or a set of commands that are not compilers.
///
/// The benefit to recognize a non-compiler command, is to not
/// spend more time to try to recognize with other interpreters.
/// Or classify the recognition as ignored to not be further processed
/// later on.
pub trait Interpreter: Send {
fn recognize(&self, _: &Execution) -> Recognition<CompilerCall>;
}
/// Represents a semantic recognition result.
///
/// The unknown recognition is used when the interpreter is not
/// able to recognize the command. This can signal the search process
/// to continue with the next interpreter.
#[derive(Debug, PartialEq)]
pub enum Recognition<T> {
/// The command was recognized and the semantic was identified.
Success(T),
/// The command was recognized, but the semantic was ignored.
Ignored,
/// The command was recognized, but the semantic was broken.
Error(String),
/// The command was not recognized.
Unknown,
}
impl<T> IntoIterator for Recognition<T> {
type Item = T;
type IntoIter = std::option::IntoIter<T>;
fn into_iter(self) -> Self::IntoIter {
match self {
Recognition::Success(value) => Some(value).into_iter(),
_ => None.into_iter(),
}
}
}
/// Responsible to transform the semantic of an executed command.
///
/// It conditionally removes compiler calls based on compiler names or flags.
/// It can also alter the compiler flags of the compiler calls. The actions
/// are defined in the configuration this module is given.
pub trait Transform: Send {
fn apply(&self, _: CompilerCall) -> Option<CompilerCall>;
}
/// Serialize compiler calls into a JSON array.
pub fn serialize(
writer: impl std::io::Write,
entries: impl Iterator<Item = CompilerCall> + Sized,
) -> anyhow::Result<()> {
let mut ser = serde_json::Serializer::pretty(writer);
let mut seq = ser.serialize_seq(None)?;
for entry in entries {
seq.serialize_element(&entry)?;
}
seq.end()?;
Ok(())
}

View File

@@ -1,277 +0,0 @@
// SPDX-License-Identifier: GPL-3.0-or-later
//! Responsible for transforming the compiler calls.
//!
//! It conditionally removes compiler calls based on compiler names or flags.
//! It can also alter the compiler flags of the compiler calls. The actions
//! are defined in the configuration this module is given.
use crate::{config, semantic};
use std::collections::HashMap;
use std::path::PathBuf;
/// Transformation contains rearranged information from the configuration.
///
/// The configuration is a list of instruction on how to transform the compiler call.
/// The transformation group the instructions by the compiler path, so it can be
/// applied to the compiler call when it matches the path.
#[derive(Debug, PartialEq)]
pub struct Transformation {
compilers: HashMap<PathBuf, Vec<config::Compiler>>,
}
impl From<&config::Output> for Transformation {
fn from(config: &config::Output) -> Self {
match config {
config::Output::Clang { compilers, .. } => compilers.as_slice().into(),
config::Output::Semantic { .. } => Transformation::new(),
}
}
}
impl From<&[config::Compiler]> for Transformation {
fn from(config: &[config::Compiler]) -> Self {
let mut compilers = HashMap::new();
for compiler in config {
compilers
.entry(compiler.path.clone())
.or_insert_with(Vec::new)
.push(compiler.clone());
}
Transformation { compilers }
}
}
impl semantic::Transform for Transformation {
fn apply(&self, input: semantic::CompilerCall) -> Option<semantic::CompilerCall> {
if let Some(configs) = self.compilers.get(&input.compiler) {
Self::apply_when_not_empty(configs.as_slice(), input)
} else {
Some(input)
}
}
}
impl Transformation {
fn new() -> Self {
Transformation {
compilers: HashMap::new(),
}
}
/// Apply the transformation to the compiler call.
///
/// Multiple configurations can be applied to the same compiler call.
/// And depending on the instruction from the configuration, the compiler call
/// can be ignored, modified, or left unchanged. The conditional ignore will
/// check if the compiler call matches the flags defined in the configuration.
fn apply_when_not_empty(
configs: &[config::Compiler],
input: semantic::CompilerCall,
) -> Option<semantic::CompilerCall> {
let mut current_input = Some(input);
for config in configs {
current_input = match config {
config::Compiler {
ignore: config::IgnoreOrConsider::Always,
..
} => None,
config::Compiler {
ignore: config::IgnoreOrConsider::Conditional,
arguments,
..
} => current_input.filter(|input| !Self::match_condition(arguments, &input.passes)),
config::Compiler {
ignore: config::IgnoreOrConsider::Never,
arguments,
..
} => current_input.map(|input| semantic::CompilerCall {
compiler: input.compiler.clone(),
working_dir: input.working_dir.clone(),
passes: Transformation::apply_argument_changes(
arguments,
input.passes.as_slice(),
),
}),
};
if current_input.is_none() {
break;
}
}
current_input
}
/// Check if the compiler call matches the condition defined in the configuration.
///
/// Any compiler pass that matches the flags defined in the configuration will cause
/// the whole compiler call to be ignored.
fn match_condition(arguments: &config::Arguments, passes: &[semantic::CompilerPass]) -> bool {
let match_flags = arguments.match_.as_slice();
passes.iter().any(|pass| match pass {
semantic::CompilerPass::Compile { flags, .. } => {
flags.iter().any(|flag| match_flags.contains(flag))
}
_ => false,
})
}
/// Apply the changes defined in the configuration to the compiler call.
///
/// The changes can be to remove or add flags to the compiler call.
/// Only the flags will be changed, but applies to all compiler passes.
fn apply_argument_changes(
arguments: &config::Arguments,
passes: &[semantic::CompilerPass],
) -> Vec<semantic::CompilerPass> {
let arguments_to_remove = arguments.remove.as_slice();
let arguments_to_add = arguments.add.as_slice();
let mut new_passes = Vec::with_capacity(passes.len());
for pass in passes {
match pass {
semantic::CompilerPass::Compile {
source,
output,
flags,
} => {
let mut new_flags = flags.clone();
new_flags.retain(|flag| !arguments_to_remove.contains(flag));
new_flags.extend(arguments_to_add.iter().cloned());
new_passes.push(semantic::CompilerPass::Compile {
source: source.clone(),
output: output.clone(),
flags: new_flags,
});
}
semantic::CompilerPass::Preprocess => {
new_passes.push(semantic::CompilerPass::Preprocess)
}
}
}
new_passes
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::config::{Arguments, Compiler, IgnoreOrConsider};
use crate::semantic::{CompilerCall, CompilerPass, Transform};
use std::path::PathBuf;
#[test]
fn test_apply_no_filter() {
let input = CompilerCall {
compiler: std::path::PathBuf::from("gcc"),
passes: vec![CompilerPass::Compile {
source: PathBuf::from("main.c"),
output: PathBuf::from("main.o").into(),
flags: vec!["-O2".into()],
}],
working_dir: std::path::PathBuf::from("/project"),
};
let sut = Transformation::from(&config::Output::Semantic {});
let result = sut.apply(input);
let expected = CompilerCall {
compiler: std::path::PathBuf::from("gcc"),
passes: vec![CompilerPass::Compile {
source: PathBuf::from("main.c"),
output: PathBuf::from("main.o").into(),
flags: vec!["-O2".into()],
}],
working_dir: std::path::PathBuf::from("/project"),
};
assert_eq!(result, Some(expected));
}
#[test]
fn test_apply_filter_match() {
let input = CompilerCall {
compiler: std::path::PathBuf::from("cc"),
passes: vec![CompilerPass::Compile {
source: PathBuf::from("main.c"),
output: PathBuf::from("main.o").into(),
flags: vec!["-O2".into()],
}],
working_dir: std::path::PathBuf::from("/project"),
};
let sut: Transformation = vec![Compiler {
path: std::path::PathBuf::from("cc"),
ignore: IgnoreOrConsider::Always,
arguments: Arguments::default(),
}]
.as_slice()
.into();
let result = sut.apply(input);
assert!(result.is_none());
}
#[test]
fn test_apply_conditional_match() {
let input = CompilerCall {
compiler: std::path::PathBuf::from("gcc"),
passes: vec![CompilerPass::Compile {
source: PathBuf::from("main.c"),
output: PathBuf::from("main.o").into(),
flags: vec!["-O2".into(), "-Wall".into()],
}],
working_dir: std::path::PathBuf::from("/project"),
};
let sut: Transformation = vec![Compiler {
path: std::path::PathBuf::from("gcc"),
ignore: IgnoreOrConsider::Conditional,
arguments: Arguments {
match_: vec!["-O2".into()],
..Arguments::default()
},
}]
.as_slice()
.into();
let result = sut.apply(input);
assert!(result.is_none());
}
#[test]
fn test_apply_ignore_never_modify_arguments() {
let input = CompilerCall {
compiler: std::path::PathBuf::from("gcc"),
passes: vec![CompilerPass::Compile {
source: PathBuf::from("main.c"),
output: PathBuf::from("main.o").into(),
flags: vec!["-O2".into()],
}],
working_dir: std::path::PathBuf::from("/project"),
};
let sut: Transformation = vec![Compiler {
path: std::path::PathBuf::from("gcc"),
ignore: IgnoreOrConsider::Never,
arguments: Arguments {
add: vec!["-Wall".into()],
remove: vec!["-O2".into()],
..Arguments::default()
},
}]
.as_slice()
.into();
let result = sut.apply(input);
let expected = CompilerCall {
compiler: std::path::PathBuf::from("gcc"),
passes: vec![CompilerPass::Compile {
source: PathBuf::from("main.c"),
output: PathBuf::from("main.o").into(),
flags: vec!["-Wall".into()],
}],
working_dir: std::path::PathBuf::from("/project"),
};
assert_eq!(result, Some(expected));
}
}