Files
swift-mirror/benchmark/scripts/Benchmark_Driver
Tim Kientzle 7052de9399 Fix build-script -B
Without additional options, build-script -B was badly broken:
* It added a broken --independent-samples option to the driver command line
* Slow tests that ran only 1 sample by default would break the statistics

Fix the first issue by adding `--independent-samples` to the command
line only when a sample was actually provided by other options.

Fix the second issue by including `--min-samples=2` in the command.
2023-09-22 17:44:42 -07:00

1022 lines
34 KiB
Python
Executable File

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# ===--- Benchmark_Driver ------------------------------------------------===//
#
# This source file is part of the Swift.org open source project
#
# Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
# Licensed under Apache License v2.0 with Runtime Library Exception
#
# See https://swift.org/LICENSE.txt for license information
# See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
#
# ===---------------------------------------------------------------------===//
"""
Benchmark_Driver is a tool for running and analysing Swift Benchmarking Suite.
Example:
$ Benchmark_Driver run
Use `Benchmark_Driver -h` for help on available commands and options.
class `BenchmarkDriver` runs performance tests and implements the `run` COMMAND.
class `BenchmarkDoctor` analyzes performance tests, implements `check` COMMAND.
"""
import argparse
import functools
import glob
import logging
import math
import os
import platform
import re
import subprocess
import sys
import time
from compare_perf_tests import LogParser
DRIVER_DIR = os.path.dirname(os.path.realpath(__file__))
class BenchmarkDriver(object):
"""Executes tests from Swift Benchmark Suite.
It's a higher level wrapper for the Benchmark_X family of binaries
(X = [O, Onone, Osize]).
"""
def __init__(self, args, tests=None, _subprocess=None, parser=None):
"""Initialize with command line arguments.
Optional parameters are for injecting dependencies -- used for testing.
"""
self.args = args
self.run_env = os.environ.copy()
# Set a constant hash seed. Some tests are highly sensitive to
# fluctuations in the number of hash collisions.
self.run_env["SWIFT_DETERMINISTIC_HASHING"] = "1"
if hasattr(args, 'libdir') and args.libdir:
# The benchmark binaries should pick up the built swift libraries
# automatically, because their RPATH should point to ../lib/swift
# But it does not harm to additionally set the dynamic library path,
# e.g. as a workaround to rdar://78584073
if platform.system() == "Darwin":
self.run_env["DYLD_LIBRARY_PATH"] = args.libdir
elif platform.system() == "Linux":
self.run_env["LD_LIBRARY_PATH"] = args.libdir
self._subprocess = _subprocess or subprocess
self.all_tests = []
self.test_number = {}
self.tests = tests or self._get_tests()
self.parser = parser or LogParser()
self.results = {}
def _invoke(self, cmd):
return self._subprocess.check_output(
cmd, stderr=self._subprocess.STDOUT, universal_newlines=True,
env=self.run_env
)
@property
def test_harness(self):
"""Full path to test harness binary."""
suffix = self.args.optimization if hasattr(self.args, "optimization") else "O"
suffix += "-"
if hasattr(self.args, "architecture") and self.args.architecture:
suffix += self.args.architecture
pattern = os.path.join(self.args.tests, "Benchmark_" + suffix + "*")
executables = []
if hasattr(self._subprocess, "test_mode") and self._subprocess.test_mode:
executables = [pattern]
else:
executables = glob.glob(pattern)
if len(executables) == 0:
raise ValueError(
"No benchmark executable for file name pattern " +
pattern + " found")
if len(executables) > 1:
raise ValueError(
"Multiple benchmark executables for file name pattern " +
pattern + " found\n" +
str(executables) +
"\nSpecify the architecture to select the right benchmark executable")
return executables[0]
def _git(self, cmd):
"""Execute the Git command in the `swift-repo`."""
return self._invoke(
("git -C {0} ".format(self.args.swift_repo) + cmd).split()
).strip()
@property
def log_file(self):
"""Full path to log file.
If `swift-repo` is set, log file is tied to Git branch and revision.
"""
if not self.args.output_dir:
return None
log_dir = self.args.output_dir
harness_name = os.path.basename(self.test_harness)
suffix = "-" + time.strftime("%Y%m%d%H%M%S", time.localtime())
if self.args.swift_repo:
log_dir = os.path.join(
log_dir, self._git("rev-parse --abbrev-ref HEAD")
) # branch
suffix += "-" + self._git("rev-parse --short HEAD") # revision
return os.path.join(log_dir, harness_name + suffix + ".log")
@property
def _cmd_list_benchmarks(self):
# TODO: Switch to JSON format: add "--json" here
return [self.test_harness, "--list"] + (
["--skip-tags="] if (self.args.benchmarks or self.args.filters) else []
)
def _get_tests(self):
"""Return a list of performance tests to run."""
lines = self._invoke(self._cmd_list_benchmarks).split("\n")
json_tests = []
for line in lines:
columns = re.split(r'[ ,]+', line.strip())
try:
number = int(columns[0])
name = columns[1]
json_descr = {"number": number, "name": name}
json_tests.append(json_descr)
except Exception:
continue
# TODO: Replace the above with the following to
# use the JSON output from the benchmark driver
# directly
# if line.strip() != "":
# json_tests.append(json.loads(line))
self.all_tests = [json["name"] for json in json_tests]
test_numbers = [json["number"] for json in json_tests]
self.test_number = dict([(json["name"], json["number"]) for json in json_tests])
if self.args.filters:
return self._tests_matching_patterns()
if self.args.benchmarks:
return self._tests_by_name_or_number(test_numbers)
return self.all_tests
def _tests_matching_patterns(self):
matches = set()
for fil in self.args.filters:
pattern = re.compile(fil)
new_matches = filter(pattern.match, self.all_tests)
matches = matches.union(new_matches)
return sorted(list(matches))
def _tests_by_name_or_number(self, test_numbers):
benchmarks = set(self.args.benchmarks)
numbers = list(map(str, test_numbers))
number_to_name = dict(zip(numbers, self.all_tests))
tests_by_number = [
number_to_name[i] for i in benchmarks.intersection(numbers)
]
return sorted(
list(benchmarks.intersection(set(self.all_tests)).union(tests_by_number))
)
def run(
self,
test=None,
num_samples=None,
num_iters=None,
sample_time=None,
verbose=None,
measure_memory=False
):
"""Execute benchmark and gather results."""
num_samples = num_samples or 0
num_iters = num_iters or 0 # automatically determine N to run for 1s
sample_time = sample_time or 0 # default is 1s
cmd = self._cmd_run(
test, num_samples, num_iters, sample_time, verbose, measure_memory
)
output = self._invoke(cmd)
results = self.parser.results_from_string(output)
if test:
return list(results.items())[0][1]
else:
return results
def _cmd_run(
self,
test,
num_samples,
num_iters,
sample_time,
verbose,
measure_memory
):
cmd = [self.test_harness]
if test:
cmd.append(test)
else:
cmd.extend([str(self.test_number.get(name, name)) for name in self.tests])
if num_samples > 0:
cmd.append("--num-samples={0}".format(num_samples))
else:
cmd.append("--min-samples=2")
if num_iters > 0:
cmd.append("--num-iters={0}".format(num_iters))
if sample_time > 0:
cmd.append("--sample-time={0}".format(sample_time))
if verbose:
cmd.append("--verbose")
if measure_memory:
cmd.append("--memory")
# TODO: Uncomment this as soon as the new Benchmark Swift logic is available everywhere
# cmd.append("--json")
return cmd
def run_independent_samples(self, test):
"""Run benchmark multiple times, gathering independent samples.
Returns the aggregated result of independent benchmark invocations.
"""
def merge_results(a, b):
a.merge(b)
return a
return functools.reduce(
merge_results,
[
self.run(test, measure_memory=True, num_iters=1)
for _ in range(self.args.independent_samples)
],
)
def log_results(self, results, log_file=None):
"""Log output to `log_file`.
Creates `args.output_dir` if it doesn't exist yet.
"""
log_file = log_file or self.log_file
dir = os.path.dirname(log_file)
if not os.path.exists(dir):
os.makedirs(dir)
print("Logging results to: %s" % log_file)
with open(log_file, "w") as f:
for r in results:
print(r, file=f)
RESULT = "{:>3} {:<40} {:>7} {:>7} {:>6} {:>10} {:>6} {:>7} {:>10}"
def run_and_log(self, csv_console=True):
"""Run benchmarks and continuously log results to the console.
There are two console log formats: CSV and justified columns. Both are
compatible with `LogParser`. Depending on the `csv_console` parameter,
the CSV log format is either printed to console or returned as a string
from this method. When `csv_console` is False, the console output
format is justified columns.
"""
format = (
(lambda values: ",".join(values))
if csv_console
else (lambda values: self.RESULT.format(*values))
) # justified columns
def console_log(values):
print(format(values))
def summary(r):
return list(
map(
str,
[
r.test_num,
r.name,
r.num_samples,
r.min_value,
r.q1,
r.median,
r.q3,
r.max_value,
r.max_rss,
],
)
)
summary_header = [
"#",
"TEST",
"SAMPLES",
"MIN(μs)",
"Q1(μs)",
"MEDIAN(μs)",
"Q3(μs)",
"MAX(μs)",
"MAX_RSS(B)",
]
console_log(summary_header)
results = []
for test in self.tests:
result = self.run_independent_samples(test)
console_log(summary(result))
results.append(result)
print("\nTotal performance tests executed: {0}".format(len(self.tests)))
return results
@staticmethod
def run_benchmarks(args):
"""Run benchmarks and log results."""
driver = BenchmarkDriver(args)
results = driver.run_and_log(csv_console=(args.output_dir is None))
if args.output_dir:
driver.log_results([r.json for r in results])
return 0
class LoggingReportFormatter(logging.Formatter):
"""Format logs as plain text or with colors on the terminal.
Plain text outputs level, category and massage: 'DEBUG category: Hi!'
Colored output uses color coding based on the level.
"""
import logging as log
colors = {
log.DEBUG: "9",
log.INFO: "2",
log.WARNING: "3",
log.ERROR: "1",
log.CRITICAL: "5",
}
def __init__(self, use_color=False):
"""Specify if report should use colors; defaults to False."""
super(LoggingReportFormatter, self).__init__("%(message)s")
self.use_color = use_color
def format(self, record):
"""Format the log record with level and category."""
msg = super(LoggingReportFormatter, self).format(record)
category = (record.name.split(".")[-1] + ": ") if "." in record.name else ""
return (
"\033[1;3{0}m{1}{2}\033[1;0m".format(
self.colors[record.levelno], category, msg
)
if self.use_color
else "{0} {1}{2}".format(record.levelname, category, msg)
)
class MarkdownReportHandler(logging.StreamHandler):
r"""Write custom formatted messages from BenchmarkDoctor to the stream.
It works around StreamHandler's hardcoded '\n' and handles the custom
level and category formatting for BenchmarkDoctor's check report.
"""
def __init__(self, stream):
"""Initialize the handler and write a Markdown table header."""
super(MarkdownReportHandler, self).__init__(stream)
self.setLevel(logging.INFO)
self.stream.write("\n✅ | Benchmark Check Report\n---|---")
self.stream.flush()
levels = {
logging.WARNING: "\n⚠️",
logging.ERROR: "\n⛔️",
logging.INFO: " <br><sub> ",
}
categories = {"naming": "🔤", "runtime": "", "memory": "Ⓜ️"}
quotes_re = re.compile("'")
def format(self, record):
msg = super(MarkdownReportHandler, self).format(record)
return (
self.levels.get(record.levelno, "")
+ (
""
if record.levelno == logging.INFO
else self.categories.get(record.name.split(".")[-1], "") + " | "
)
+ self.quotes_re.sub("`", msg)
)
def emit(self, record):
msg = self.format(record)
stream = self.stream
try:
if isinstance(msg, str) and (getattr(stream, "encoding", None) and
getattr(stream, "buffer", None)):
stream.buffer.write(msg.encode(stream.encoding))
else:
stream.write(msg)
except UnicodeError:
stream.write(msg.encode("UTF-8"))
self.flush()
def close(self):
self.stream.write("\n\n")
self.stream.flush()
super(MarkdownReportHandler, self).close()
class BenchmarkDoctor(object):
"""Checks that the benchmark conforms to the standard set of requirements.
Benchmarks that are part of Swift Benchmark Suite are required to follow
a set of rules that ensure quality measurements. These include naming
convention, robustness when varying execution parameters like
`num-iters` and `num-samples` (no setup overhead, constant memory
consumption).
"""
log = logging.getLogger("BenchmarkDoctor")
log_naming = log.getChild("naming")
log_runtime = log.getChild("runtime")
log_memory = log.getChild("memory")
log.setLevel(logging.DEBUG)
def __init__(self, args, driver=None):
"""Initialize with command line parameters.
Optional `driver` parameter for injecting dependency; used for testing.
"""
super(BenchmarkDoctor, self).__init__()
self.results = {}
if hasattr(args, "markdown") and args.markdown:
self.console_handler = MarkdownReportHandler(sys.stdout)
else:
self.console_handler = logging.StreamHandler(sys.stdout)
self.console_handler.setFormatter(
LoggingReportFormatter(use_color=sys.stdout.isatty())
)
self.console_handler.setLevel(
logging.DEBUG if args.verbose else logging.INFO
)
self.driver = driver or BenchmarkDriver(args)
self.log.addHandler(self.console_handler)
self.log.debug("Checking tests: %s", ", ".join(self.driver.tests))
self.requirements = [
self._name_matches_benchmark_naming_convention,
self._name_is_at_most_40_chars_long,
self._no_setup_overhead,
self._reasonable_setup_time,
self._optimized_runtime_in_range,
self._constant_memory_use,
]
def __del__(self):
"""Close log handlers on exit."""
for handler in list(self.log.handlers):
handler.close()
self.log.removeHandler(self.console_handler)
benchmark_naming_convention_re = re.compile(r"[A-Z][a-zA-Z0-9\-.!?]+")
camel_humps_re = re.compile(r"[a-z][A-Z]")
@staticmethod
def _name_matches_benchmark_naming_convention(measurements):
name = measurements["name"]
match = BenchmarkDoctor.benchmark_naming_convention_re.match(name)
matched = match.group(0) if match else ""
composite_words = len(BenchmarkDoctor.camel_humps_re.findall(name)) + 1
if name != matched:
BenchmarkDoctor.log_naming.error(
"'%s' name doesn't conform to benchmark naming convention.", name
)
BenchmarkDoctor.log_naming.info("See http://bit.ly/BenchmarkNaming")
if composite_words > 4:
BenchmarkDoctor.log_naming.warning(
"'%s' name is composed of %d words.", name, composite_words
)
BenchmarkDoctor.log_naming.info(
"Split '%s' name into dot-separated groups and variants. "
"See http://bit.ly/BenchmarkNaming",
name,
)
@staticmethod
def _name_is_at_most_40_chars_long(measurements):
name = measurements["name"]
if len(name) > 40:
BenchmarkDoctor.log_naming.error(
"'%s' name is %d characters long.", name, len(name)
)
BenchmarkDoctor.log_naming.info(
"Benchmark name should not be longer than 40 characters."
)
@staticmethod
def _select(measurements, num_iters=None, opt_level="O"):
prefix = measurements["name"] + " " + opt_level
prefix += "" if num_iters is None else (" i" + str(num_iters))
return [
series for name, series in measurements.items() if name.startswith(prefix)
]
@staticmethod
def _optimized_runtime_in_range(measurements):
name = measurements["name"]
setup, ratio = BenchmarkDoctor._setup_overhead(measurements)
setup = 0 if ratio < 0.05 else setup
runtimes = []
for i in range(1, 3):
correction = setup / i
i_series = BenchmarkDoctor._select(measurements, num_iters=i)
for result in i_series:
runtimes.append(result.min_value - correction)
runtime = min(runtimes)
threshold = 1000
if threshold < runtime:
log = (
BenchmarkDoctor.log_runtime.warning
if runtime < 10000
else BenchmarkDoctor.log_runtime.error
)
caveat = "" if setup == 0 else " (excluding the setup overhead)"
log("'%s' execution took at least %d μs%s.", name, runtime, caveat)
def factor(base): # suitable divisor that's integer power of base
return int(
pow(base, math.ceil(math.log(runtime / float(threshold), base)))
)
BenchmarkDoctor.log_runtime.info(
"Decrease the workload of '%s' by a factor of %d (%d), to be "
"less than %d μs.",
name,
factor(2),
factor(10),
threshold,
)
threshold = 20
if runtime < threshold:
log = (
BenchmarkDoctor.log_runtime.error
if runtime == 0
else BenchmarkDoctor.log_runtime.warning
)
log("'%s' execution took %d μs.", name, runtime)
BenchmarkDoctor.log_runtime.info(
"Ensure the workload of '%s' has a properly measurable size"
" (runtime > %d μs) and is not eliminated by the compiler (use"
" `blackHole` function if necessary)."
if runtime == 0
else "Increase the workload of '%s' to be more than %d μs.",
name,
threshold,
)
@staticmethod
def _setup_overhead(measurements):
select = BenchmarkDoctor._select
ti1, ti2 = [
float(min(mins))
for mins in [
[result.min_value for result in i_series]
for i_series in [select(measurements, num_iters=i) for i in [1, 2]]
]
]
setup = int(round(2.0 * (ti1 - ti2))) if ti2 > 20 else 0 # limit of accuracy
ratio = (setup / ti1) if ti1 > 0 else 0
return (setup, ratio)
@staticmethod
def _no_setup_overhead(measurements):
setup, ratio = BenchmarkDoctor._setup_overhead(measurements)
if ratio > 0.05:
BenchmarkDoctor.log_runtime.error(
"'%s' has setup overhead of %d μs (%.1f%%).",
measurements["name"],
setup,
round((100 * ratio), 1),
)
BenchmarkDoctor.log_runtime.info(
"Move initialization of benchmark data to the `setUpFunction` "
"registered in `BenchmarkInfo`."
)
@staticmethod
def _reasonable_setup_time(measurements):
setup = min(
[result.setup or 0 for result in BenchmarkDoctor._select(measurements)]
)
if 200000 < setup: # 200 ms
BenchmarkDoctor.log_runtime.error(
"'%s' setup took at least %d μs.", measurements["name"], setup
)
BenchmarkDoctor.log_runtime.info(
"The `setUpFunction` should take no more than 200 ms."
)
@staticmethod
def _constant_memory_use(measurements):
select = BenchmarkDoctor._select
name = measurements["name"]
memory_uses = [
[r.mem_pages for r in i_series if r.mem_pages is not None]
for i_series in [select(measurements, num_iters=i) for i in [1, 2]]
]
memory_uses = [m for m in memory_uses if m]
if not memory_uses:
BenchmarkDoctor.log_memory.info(
"unable to compute memory footprint of '%s'",
name,
)
return
(min_i1, max_i1), (min_i2, max_i2) = [
(min(memory_use), max(memory_use))
for memory_use in memory_uses
]
range_i1, range_i2 = max_i1 - min_i1, max_i2 - min_i2
normal_range = 15 # pages
more_info = False
if abs(min_i1 - min_i2) > max(range_i1, range_i2, normal_range):
more_info = True
BenchmarkDoctor.log_memory.error(
"'%s' varies the memory footprint of the base "
"workload depending on the `num-iters`.",
name,
)
if max(range_i1, range_i2) > normal_range:
more_info = True
BenchmarkDoctor.log_memory.warning(
"'%s' has very wide range of memory used between "
"independent, repeated measurements.",
name,
)
if more_info:
BenchmarkDoctor.log_memory.info(
"'%s' mem_pages [i1, i2]: min=[%d, %d] 𝚫=%d R=[%d, %d]",
name,
*[min_i1, min_i2, abs(min_i1 - min_i2), range_i1, range_i2]
)
@staticmethod
def _adjusted_1s_samples(runtime):
u"""Return sample count that can be taken in approximately 1 second.
Based on the runtime (μs) of one sample taken with num-iters=1.
"""
if runtime == 0:
return 2
s = 1000000 / float(runtime) # samples for 1s run
s = int(pow(2, round(math.log(s, 2)))) # rounding to power of 2
return s if s > 2 else 2 # always take at least 2 samples
def measure(self, benchmark):
"""Measure benchmark with varying iterations and optimization levels.
Returns a dictionary with benchmark name and `PerformanceTestResult`s.
"""
self.log.debug("Calibrating num-samples for {0}:".format(benchmark))
r = self.driver.run(
benchmark, num_samples=3, num_iters=1, verbose=True
) # calibrate
num_samples = self._adjusted_1s_samples(r.min_value)
def capped(s):
return min(s, 200)
run_args = [(capped(num_samples), 1), (capped(num_samples / 2), 2)]
opts = self.driver.args.optimization
opts = opts if isinstance(opts, list) else [opts]
self.log.debug(
"Runtime {0} μs yields {1} adjusted samples per second.".format(
r.min_value, num_samples
)
)
self.log.debug(
"Measuring {0}, 5 x i1 ({1} samples), 5 x i2 ({2} samples)".format(
benchmark, run_args[0][0], run_args[1][0]
)
)
measurements = dict(
[
(
"{0} {1} i{2}{3}".format(benchmark, o, i, suffix),
self.driver.run(
benchmark,
num_samples=s,
num_iters=i,
verbose=True,
measure_memory=True,
),
)
for o in opts
for s, i in run_args
for suffix in list("abcde")
]
)
measurements["name"] = benchmark
return measurements
def analyze(self, benchmark_measurements):
"""Analyze whether benchmark fulfills all requirements."""
self.log.debug("Analyzing %s", benchmark_measurements["name"])
for rule in self.requirements:
rule(benchmark_measurements)
def check(self):
"""Measure and analyse all enabled tests."""
for test in self.driver.tests:
self.analyze(self.measure(test))
@staticmethod
def run_check(args):
"""Validate benchmarks conform to health rules, report violations."""
doctor = BenchmarkDoctor(args)
doctor.check()
# TODO non-zero error code when errors are logged
# See https://stackoverflow.com/a/31142078/41307
return 0
def format_name(log_path):
"""Return the filename and directory for a log file."""
return "/".join(log_path.split("/")[-2:])
def compare_logs(compare_script, new_log, old_log, log_dir, opt):
"""Return diff of log files at paths `new_log` and `old_log`."""
print("Comparing %s %s ..." % (format_name(old_log), format_name(new_log)))
subprocess.call(
[
compare_script,
"--old-file",
old_log,
"--new-file",
new_log,
"--format",
"markdown",
"--output",
os.path.join(log_dir, "latest_compare_{0}.md".format(opt)),
]
)
def compare(args):
log_dir = args.log_dir
compare_script = args.compare_script
baseline_branch = args.baseline_branch
current_branch = BenchmarkDriver(args, tests=[""])._git(
"rev-parse --abbrev-ref HEAD"
)
current_branch_dir = os.path.join(log_dir, current_branch)
baseline_branch_dir = os.path.join(log_dir, baseline_branch)
if current_branch != baseline_branch and not os.path.isdir(baseline_branch_dir):
print(
(
"Unable to find benchmark logs for {baseline_branch} branch. "
+ "Set a baseline benchmark log by passing --benchmark to "
+ "build-script while on {baseline_branch} branch."
).format(baseline_branch=baseline_branch)
)
return 1
recent_logs = {}
for branch_dir in [current_branch_dir, baseline_branch_dir]:
for opt in ["O", "Onone"]:
recent_logs[os.path.basename(branch_dir) + "_" + opt] = sorted(
glob.glob(os.path.join(branch_dir, "Benchmark_" + opt + "-*.log")),
key=os.path.getctime,
reverse=True,
)
if current_branch == baseline_branch:
if (
len(recent_logs[baseline_branch + "_O"]) > 1
and len(recent_logs[baseline_branch + "_Onone"]) > 1
):
compare_logs(
compare_script,
recent_logs[baseline_branch + "_O"][0],
recent_logs[baseline_branch + "_O"][1],
log_dir,
"O",
)
compare_logs(
compare_script,
recent_logs[baseline_branch + "_Onone"][0],
recent_logs[baseline_branch + "_Onone"][1],
log_dir,
"Onone",
)
else:
print(
(
"{baseline_branch}/{baseline_branch} comparison "
+ "skipped: no previous {baseline_branch} logs"
).format(baseline_branch=baseline_branch)
)
else:
# TODO: Check for outdated baseline branch log
if (
len(recent_logs[current_branch + "_O"]) == 0
or len(recent_logs[current_branch + "_Onone"]) == 0
):
print("branch sanity failure: missing branch logs")
return 1
if (
len(recent_logs[current_branch + "_O"]) == 1
or len(recent_logs[current_branch + "_Onone"]) == 1
):
print("branch/branch comparison skipped: no previous branch logs")
else:
compare_logs(
compare_script,
recent_logs[current_branch + "_O"][0],
recent_logs[current_branch + "_O"][1],
log_dir,
"O",
)
compare_logs(
compare_script,
recent_logs[current_branch + "_Onone"][0],
recent_logs[current_branch + "_Onone"][1],
log_dir,
"Onone",
)
if (
len(recent_logs[baseline_branch + "_O"]) == 0
or len(recent_logs[baseline_branch + "_Onone"]) == 0
):
print(
(
"branch/{baseline_branch} failure: no {baseline_branch} " + "logs"
).format(baseline_branch=baseline_branch)
)
return 1
else:
compare_logs(
compare_script,
recent_logs[current_branch + "_O"][0],
recent_logs[baseline_branch + "_O"][0],
log_dir,
"O",
)
compare_logs(
compare_script,
recent_logs[current_branch + "_Onone"][0],
recent_logs[baseline_branch + "_Onone"][0],
log_dir,
"Onone",
)
# TODO: Fail on large regressions
return 0
def positive_int(value):
"""Verify the value is a positive integer."""
ivalue = int(value)
if not (ivalue > 0):
raise ValueError
return ivalue
def parse_args(args):
"""Parse command line arguments and set default values."""
parser = argparse.ArgumentParser(
epilog="Example: ./Benchmark_Driver run -i 5 -f Prefix -f .*Suffix.*"
)
subparsers = parser.add_subparsers(
title="Swift benchmark driver commands",
help="See COMMAND -h for additional arguments",
metavar="COMMAND",
)
subparsers.required = True
shared_benchmarks_parser = argparse.ArgumentParser(add_help=False)
benchmarks_group = shared_benchmarks_parser.add_mutually_exclusive_group()
benchmarks_group.add_argument(
"benchmarks",
default=[],
help="benchmark to run (default: all)",
nargs="*",
metavar="BENCHMARK",
)
benchmarks_group.add_argument(
"-f",
"--filter",
dest="filters",
action="append",
help="run all tests whose name match regular expression PATTERN, "
+ "multiple filters are supported",
metavar="PATTERN",
)
shared_benchmarks_parser.add_argument(
"-t",
"--tests",
help="directory containing Benchmark_O{,none,size} " + "(default: DRIVER_DIR)",
default=DRIVER_DIR,
)
shared_benchmarks_parser.add_argument(
"-o",
"--optimization",
metavar="OPT",
choices=["O", "Onone", "Osize"],
help="optimization level to use: {O,Onone,Osize}, (default: O)",
default="O",
)
shared_benchmarks_parser.add_argument(
"--architecture",
metavar="architecture",
help="current architecture (e.g., x86_64, arm64, etc)",
default=None,
)
run_parser = subparsers.add_parser(
"run",
help="Run benchmarks and output results to stdout",
parents=[shared_benchmarks_parser],
)
run_parser.add_argument(
"-i",
"--independent-samples",
help="number of times to run each test (default: 1)",
type=positive_int,
default=1,
)
run_parser.add_argument(
"--output-dir", help="log results to directory (default: no logging)"
)
run_parser.add_argument(
"--swift-repo", help="absolute path to the Swift source repository"
)
run_parser.set_defaults(func=BenchmarkDriver.run_benchmarks)
check_parser = subparsers.add_parser(
"check", help="", parents=[shared_benchmarks_parser]
)
check_group = check_parser.add_mutually_exclusive_group()
check_group.add_argument(
"-v",
"--verbose",
action="store_true",
help="show more details during benchmark analysis",
)
check_group.add_argument(
"-md", "--markdown", action="store_true", help="format report as Markdown table"
)
check_parser.set_defaults(func=BenchmarkDoctor.run_check)
compare_parser = subparsers.add_parser("compare", help="Compare benchmark results")
compare_parser.add_argument(
"--log-dir", required=True, help="directory containing benchmark logs"
)
compare_parser.add_argument(
"--swift-repo",
required=True,
help="absolute path to the Swift source repository",
)
compare_parser.add_argument(
"--compare-script", required=True, help="absolute path to compare script"
)
compare_parser.add_argument(
"--baseline-branch",
default="main",
help="attempt to compare results to baseline results for specified "
"branch (default: main)",
)
compare_parser.set_defaults(func=compare)
return parser.parse_args(args)
def main():
"""Parse command line arguments and execute the specified COMMAND."""
args = parse_args(sys.argv[1:])
return args.func(args)
if __name__ == "__main__":
exit(main())