mirror of
https://github.com/apple/swift.git
synced 2025-12-14 20:36:38 +01:00
Without additional options, build-script -B was badly broken: * It added a broken --independent-samples option to the driver command line * Slow tests that ran only 1 sample by default would break the statistics Fix the first issue by adding `--independent-samples` to the command line only when a sample was actually provided by other options. Fix the second issue by including `--min-samples=2` in the command.
1022 lines
34 KiB
Python
Executable File
1022 lines
34 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
|
|
# ===--- Benchmark_Driver ------------------------------------------------===//
|
|
#
|
|
# This source file is part of the Swift.org open source project
|
|
#
|
|
# Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
|
|
# Licensed under Apache License v2.0 with Runtime Library Exception
|
|
#
|
|
# See https://swift.org/LICENSE.txt for license information
|
|
# See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
|
|
#
|
|
# ===---------------------------------------------------------------------===//
|
|
"""
|
|
Benchmark_Driver is a tool for running and analysing Swift Benchmarking Suite.
|
|
|
|
Example:
|
|
$ Benchmark_Driver run
|
|
|
|
Use `Benchmark_Driver -h` for help on available commands and options.
|
|
|
|
class `BenchmarkDriver` runs performance tests and implements the `run` COMMAND.
|
|
class `BenchmarkDoctor` analyzes performance tests, implements `check` COMMAND.
|
|
|
|
"""
|
|
|
|
import argparse
|
|
import functools
|
|
import glob
|
|
import logging
|
|
import math
|
|
import os
|
|
import platform
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
import time
|
|
|
|
from compare_perf_tests import LogParser
|
|
|
|
DRIVER_DIR = os.path.dirname(os.path.realpath(__file__))
|
|
|
|
|
|
class BenchmarkDriver(object):
|
|
"""Executes tests from Swift Benchmark Suite.
|
|
|
|
It's a higher level wrapper for the Benchmark_X family of binaries
|
|
(X = [O, Onone, Osize]).
|
|
"""
|
|
|
|
def __init__(self, args, tests=None, _subprocess=None, parser=None):
|
|
"""Initialize with command line arguments.
|
|
|
|
Optional parameters are for injecting dependencies -- used for testing.
|
|
"""
|
|
self.args = args
|
|
self.run_env = os.environ.copy()
|
|
|
|
# Set a constant hash seed. Some tests are highly sensitive to
|
|
# fluctuations in the number of hash collisions.
|
|
self.run_env["SWIFT_DETERMINISTIC_HASHING"] = "1"
|
|
|
|
if hasattr(args, 'libdir') and args.libdir:
|
|
# The benchmark binaries should pick up the built swift libraries
|
|
# automatically, because their RPATH should point to ../lib/swift
|
|
# But it does not harm to additionally set the dynamic library path,
|
|
# e.g. as a workaround to rdar://78584073
|
|
if platform.system() == "Darwin":
|
|
self.run_env["DYLD_LIBRARY_PATH"] = args.libdir
|
|
elif platform.system() == "Linux":
|
|
self.run_env["LD_LIBRARY_PATH"] = args.libdir
|
|
|
|
self._subprocess = _subprocess or subprocess
|
|
self.all_tests = []
|
|
self.test_number = {}
|
|
self.tests = tests or self._get_tests()
|
|
self.parser = parser or LogParser()
|
|
self.results = {}
|
|
|
|
def _invoke(self, cmd):
|
|
return self._subprocess.check_output(
|
|
cmd, stderr=self._subprocess.STDOUT, universal_newlines=True,
|
|
env=self.run_env
|
|
)
|
|
|
|
@property
|
|
def test_harness(self):
|
|
"""Full path to test harness binary."""
|
|
suffix = self.args.optimization if hasattr(self.args, "optimization") else "O"
|
|
suffix += "-"
|
|
if hasattr(self.args, "architecture") and self.args.architecture:
|
|
suffix += self.args.architecture
|
|
pattern = os.path.join(self.args.tests, "Benchmark_" + suffix + "*")
|
|
executables = []
|
|
if hasattr(self._subprocess, "test_mode") and self._subprocess.test_mode:
|
|
executables = [pattern]
|
|
else:
|
|
executables = glob.glob(pattern)
|
|
if len(executables) == 0:
|
|
raise ValueError(
|
|
"No benchmark executable for file name pattern " +
|
|
pattern + " found")
|
|
if len(executables) > 1:
|
|
raise ValueError(
|
|
"Multiple benchmark executables for file name pattern " +
|
|
pattern + " found\n" +
|
|
str(executables) +
|
|
"\nSpecify the architecture to select the right benchmark executable")
|
|
return executables[0]
|
|
|
|
def _git(self, cmd):
|
|
"""Execute the Git command in the `swift-repo`."""
|
|
return self._invoke(
|
|
("git -C {0} ".format(self.args.swift_repo) + cmd).split()
|
|
).strip()
|
|
|
|
@property
|
|
def log_file(self):
|
|
"""Full path to log file.
|
|
|
|
If `swift-repo` is set, log file is tied to Git branch and revision.
|
|
"""
|
|
if not self.args.output_dir:
|
|
return None
|
|
log_dir = self.args.output_dir
|
|
harness_name = os.path.basename(self.test_harness)
|
|
suffix = "-" + time.strftime("%Y%m%d%H%M%S", time.localtime())
|
|
if self.args.swift_repo:
|
|
log_dir = os.path.join(
|
|
log_dir, self._git("rev-parse --abbrev-ref HEAD")
|
|
) # branch
|
|
suffix += "-" + self._git("rev-parse --short HEAD") # revision
|
|
return os.path.join(log_dir, harness_name + suffix + ".log")
|
|
|
|
@property
|
|
def _cmd_list_benchmarks(self):
|
|
# TODO: Switch to JSON format: add "--json" here
|
|
return [self.test_harness, "--list"] + (
|
|
["--skip-tags="] if (self.args.benchmarks or self.args.filters) else []
|
|
)
|
|
|
|
def _get_tests(self):
|
|
"""Return a list of performance tests to run."""
|
|
lines = self._invoke(self._cmd_list_benchmarks).split("\n")
|
|
json_tests = []
|
|
for line in lines:
|
|
columns = re.split(r'[ ,]+', line.strip())
|
|
try:
|
|
number = int(columns[0])
|
|
name = columns[1]
|
|
json_descr = {"number": number, "name": name}
|
|
json_tests.append(json_descr)
|
|
except Exception:
|
|
continue
|
|
# TODO: Replace the above with the following to
|
|
# use the JSON output from the benchmark driver
|
|
# directly
|
|
# if line.strip() != "":
|
|
# json_tests.append(json.loads(line))
|
|
self.all_tests = [json["name"] for json in json_tests]
|
|
test_numbers = [json["number"] for json in json_tests]
|
|
self.test_number = dict([(json["name"], json["number"]) for json in json_tests])
|
|
if self.args.filters:
|
|
return self._tests_matching_patterns()
|
|
if self.args.benchmarks:
|
|
return self._tests_by_name_or_number(test_numbers)
|
|
return self.all_tests
|
|
|
|
def _tests_matching_patterns(self):
|
|
matches = set()
|
|
for fil in self.args.filters:
|
|
pattern = re.compile(fil)
|
|
new_matches = filter(pattern.match, self.all_tests)
|
|
matches = matches.union(new_matches)
|
|
return sorted(list(matches))
|
|
|
|
def _tests_by_name_or_number(self, test_numbers):
|
|
benchmarks = set(self.args.benchmarks)
|
|
numbers = list(map(str, test_numbers))
|
|
number_to_name = dict(zip(numbers, self.all_tests))
|
|
tests_by_number = [
|
|
number_to_name[i] for i in benchmarks.intersection(numbers)
|
|
]
|
|
return sorted(
|
|
list(benchmarks.intersection(set(self.all_tests)).union(tests_by_number))
|
|
)
|
|
|
|
def run(
|
|
self,
|
|
test=None,
|
|
num_samples=None,
|
|
num_iters=None,
|
|
sample_time=None,
|
|
verbose=None,
|
|
measure_memory=False
|
|
):
|
|
"""Execute benchmark and gather results."""
|
|
num_samples = num_samples or 0
|
|
num_iters = num_iters or 0 # automatically determine N to run for 1s
|
|
sample_time = sample_time or 0 # default is 1s
|
|
|
|
cmd = self._cmd_run(
|
|
test, num_samples, num_iters, sample_time, verbose, measure_memory
|
|
)
|
|
output = self._invoke(cmd)
|
|
results = self.parser.results_from_string(output)
|
|
if test:
|
|
return list(results.items())[0][1]
|
|
else:
|
|
return results
|
|
|
|
def _cmd_run(
|
|
self,
|
|
test,
|
|
num_samples,
|
|
num_iters,
|
|
sample_time,
|
|
verbose,
|
|
measure_memory
|
|
):
|
|
cmd = [self.test_harness]
|
|
if test:
|
|
cmd.append(test)
|
|
else:
|
|
cmd.extend([str(self.test_number.get(name, name)) for name in self.tests])
|
|
if num_samples > 0:
|
|
cmd.append("--num-samples={0}".format(num_samples))
|
|
else:
|
|
cmd.append("--min-samples=2")
|
|
if num_iters > 0:
|
|
cmd.append("--num-iters={0}".format(num_iters))
|
|
if sample_time > 0:
|
|
cmd.append("--sample-time={0}".format(sample_time))
|
|
if verbose:
|
|
cmd.append("--verbose")
|
|
if measure_memory:
|
|
cmd.append("--memory")
|
|
# TODO: Uncomment this as soon as the new Benchmark Swift logic is available everywhere
|
|
# cmd.append("--json")
|
|
return cmd
|
|
|
|
def run_independent_samples(self, test):
|
|
"""Run benchmark multiple times, gathering independent samples.
|
|
|
|
Returns the aggregated result of independent benchmark invocations.
|
|
"""
|
|
|
|
def merge_results(a, b):
|
|
a.merge(b)
|
|
return a
|
|
|
|
return functools.reduce(
|
|
merge_results,
|
|
[
|
|
self.run(test, measure_memory=True, num_iters=1)
|
|
for _ in range(self.args.independent_samples)
|
|
],
|
|
)
|
|
|
|
def log_results(self, results, log_file=None):
|
|
"""Log output to `log_file`.
|
|
|
|
Creates `args.output_dir` if it doesn't exist yet.
|
|
"""
|
|
log_file = log_file or self.log_file
|
|
dir = os.path.dirname(log_file)
|
|
if not os.path.exists(dir):
|
|
os.makedirs(dir)
|
|
print("Logging results to: %s" % log_file)
|
|
with open(log_file, "w") as f:
|
|
for r in results:
|
|
print(r, file=f)
|
|
|
|
RESULT = "{:>3} {:<40} {:>7} {:>7} {:>6} {:>10} {:>6} {:>7} {:>10}"
|
|
|
|
def run_and_log(self, csv_console=True):
|
|
"""Run benchmarks and continuously log results to the console.
|
|
|
|
There are two console log formats: CSV and justified columns. Both are
|
|
compatible with `LogParser`. Depending on the `csv_console` parameter,
|
|
the CSV log format is either printed to console or returned as a string
|
|
from this method. When `csv_console` is False, the console output
|
|
format is justified columns.
|
|
"""
|
|
format = (
|
|
(lambda values: ",".join(values))
|
|
if csv_console
|
|
else (lambda values: self.RESULT.format(*values))
|
|
) # justified columns
|
|
|
|
def console_log(values):
|
|
print(format(values))
|
|
|
|
def summary(r):
|
|
return list(
|
|
map(
|
|
str,
|
|
[
|
|
r.test_num,
|
|
r.name,
|
|
r.num_samples,
|
|
r.min_value,
|
|
r.q1,
|
|
r.median,
|
|
r.q3,
|
|
r.max_value,
|
|
r.max_rss,
|
|
],
|
|
)
|
|
)
|
|
|
|
summary_header = [
|
|
"#",
|
|
"TEST",
|
|
"SAMPLES",
|
|
"MIN(μs)",
|
|
"Q1(μs)",
|
|
"MEDIAN(μs)",
|
|
"Q3(μs)",
|
|
"MAX(μs)",
|
|
"MAX_RSS(B)",
|
|
]
|
|
console_log(summary_header)
|
|
results = []
|
|
for test in self.tests:
|
|
result = self.run_independent_samples(test)
|
|
console_log(summary(result))
|
|
results.append(result)
|
|
|
|
print("\nTotal performance tests executed: {0}".format(len(self.tests)))
|
|
return results
|
|
|
|
@staticmethod
|
|
def run_benchmarks(args):
|
|
"""Run benchmarks and log results."""
|
|
driver = BenchmarkDriver(args)
|
|
results = driver.run_and_log(csv_console=(args.output_dir is None))
|
|
if args.output_dir:
|
|
driver.log_results([r.json for r in results])
|
|
return 0
|
|
|
|
|
|
class LoggingReportFormatter(logging.Formatter):
|
|
"""Format logs as plain text or with colors on the terminal.
|
|
|
|
Plain text outputs level, category and massage: 'DEBUG category: Hi!'
|
|
Colored output uses color coding based on the level.
|
|
"""
|
|
|
|
import logging as log
|
|
|
|
colors = {
|
|
log.DEBUG: "9",
|
|
log.INFO: "2",
|
|
log.WARNING: "3",
|
|
log.ERROR: "1",
|
|
log.CRITICAL: "5",
|
|
}
|
|
|
|
def __init__(self, use_color=False):
|
|
"""Specify if report should use colors; defaults to False."""
|
|
super(LoggingReportFormatter, self).__init__("%(message)s")
|
|
self.use_color = use_color
|
|
|
|
def format(self, record):
|
|
"""Format the log record with level and category."""
|
|
msg = super(LoggingReportFormatter, self).format(record)
|
|
category = (record.name.split(".")[-1] + ": ") if "." in record.name else ""
|
|
return (
|
|
"\033[1;3{0}m{1}{2}\033[1;0m".format(
|
|
self.colors[record.levelno], category, msg
|
|
)
|
|
if self.use_color
|
|
else "{0} {1}{2}".format(record.levelname, category, msg)
|
|
)
|
|
|
|
|
|
class MarkdownReportHandler(logging.StreamHandler):
|
|
r"""Write custom formatted messages from BenchmarkDoctor to the stream.
|
|
|
|
It works around StreamHandler's hardcoded '\n' and handles the custom
|
|
level and category formatting for BenchmarkDoctor's check report.
|
|
"""
|
|
|
|
def __init__(self, stream):
|
|
"""Initialize the handler and write a Markdown table header."""
|
|
super(MarkdownReportHandler, self).__init__(stream)
|
|
self.setLevel(logging.INFO)
|
|
self.stream.write("\n✅ | Benchmark Check Report\n---|---")
|
|
self.stream.flush()
|
|
|
|
levels = {
|
|
logging.WARNING: "\n⚠️",
|
|
logging.ERROR: "\n⛔️",
|
|
logging.INFO: " <br><sub> ",
|
|
}
|
|
categories = {"naming": "🔤", "runtime": "⏱", "memory": "Ⓜ️"}
|
|
quotes_re = re.compile("'")
|
|
|
|
def format(self, record):
|
|
msg = super(MarkdownReportHandler, self).format(record)
|
|
return (
|
|
self.levels.get(record.levelno, "")
|
|
+ (
|
|
""
|
|
if record.levelno == logging.INFO
|
|
else self.categories.get(record.name.split(".")[-1], "") + " | "
|
|
)
|
|
+ self.quotes_re.sub("`", msg)
|
|
)
|
|
|
|
def emit(self, record):
|
|
msg = self.format(record)
|
|
stream = self.stream
|
|
try:
|
|
if isinstance(msg, str) and (getattr(stream, "encoding", None) and
|
|
getattr(stream, "buffer", None)):
|
|
stream.buffer.write(msg.encode(stream.encoding))
|
|
else:
|
|
stream.write(msg)
|
|
except UnicodeError:
|
|
stream.write(msg.encode("UTF-8"))
|
|
self.flush()
|
|
|
|
def close(self):
|
|
self.stream.write("\n\n")
|
|
self.stream.flush()
|
|
super(MarkdownReportHandler, self).close()
|
|
|
|
|
|
class BenchmarkDoctor(object):
|
|
"""Checks that the benchmark conforms to the standard set of requirements.
|
|
|
|
Benchmarks that are part of Swift Benchmark Suite are required to follow
|
|
a set of rules that ensure quality measurements. These include naming
|
|
convention, robustness when varying execution parameters like
|
|
`num-iters` and `num-samples` (no setup overhead, constant memory
|
|
consumption).
|
|
"""
|
|
|
|
log = logging.getLogger("BenchmarkDoctor")
|
|
log_naming = log.getChild("naming")
|
|
log_runtime = log.getChild("runtime")
|
|
log_memory = log.getChild("memory")
|
|
log.setLevel(logging.DEBUG)
|
|
|
|
def __init__(self, args, driver=None):
|
|
"""Initialize with command line parameters.
|
|
|
|
Optional `driver` parameter for injecting dependency; used for testing.
|
|
"""
|
|
super(BenchmarkDoctor, self).__init__()
|
|
self.results = {}
|
|
|
|
if hasattr(args, "markdown") and args.markdown:
|
|
self.console_handler = MarkdownReportHandler(sys.stdout)
|
|
else:
|
|
self.console_handler = logging.StreamHandler(sys.stdout)
|
|
self.console_handler.setFormatter(
|
|
LoggingReportFormatter(use_color=sys.stdout.isatty())
|
|
)
|
|
self.console_handler.setLevel(
|
|
logging.DEBUG if args.verbose else logging.INFO
|
|
)
|
|
self.driver = driver or BenchmarkDriver(args)
|
|
self.log.addHandler(self.console_handler)
|
|
self.log.debug("Checking tests: %s", ", ".join(self.driver.tests))
|
|
self.requirements = [
|
|
self._name_matches_benchmark_naming_convention,
|
|
self._name_is_at_most_40_chars_long,
|
|
self._no_setup_overhead,
|
|
self._reasonable_setup_time,
|
|
self._optimized_runtime_in_range,
|
|
self._constant_memory_use,
|
|
]
|
|
|
|
def __del__(self):
|
|
"""Close log handlers on exit."""
|
|
for handler in list(self.log.handlers):
|
|
handler.close()
|
|
self.log.removeHandler(self.console_handler)
|
|
|
|
benchmark_naming_convention_re = re.compile(r"[A-Z][a-zA-Z0-9\-.!?]+")
|
|
camel_humps_re = re.compile(r"[a-z][A-Z]")
|
|
|
|
@staticmethod
|
|
def _name_matches_benchmark_naming_convention(measurements):
|
|
name = measurements["name"]
|
|
match = BenchmarkDoctor.benchmark_naming_convention_re.match(name)
|
|
matched = match.group(0) if match else ""
|
|
composite_words = len(BenchmarkDoctor.camel_humps_re.findall(name)) + 1
|
|
|
|
if name != matched:
|
|
BenchmarkDoctor.log_naming.error(
|
|
"'%s' name doesn't conform to benchmark naming convention.", name
|
|
)
|
|
BenchmarkDoctor.log_naming.info("See http://bit.ly/BenchmarkNaming")
|
|
|
|
if composite_words > 4:
|
|
BenchmarkDoctor.log_naming.warning(
|
|
"'%s' name is composed of %d words.", name, composite_words
|
|
)
|
|
BenchmarkDoctor.log_naming.info(
|
|
"Split '%s' name into dot-separated groups and variants. "
|
|
"See http://bit.ly/BenchmarkNaming",
|
|
name,
|
|
)
|
|
|
|
@staticmethod
|
|
def _name_is_at_most_40_chars_long(measurements):
|
|
name = measurements["name"]
|
|
|
|
if len(name) > 40:
|
|
BenchmarkDoctor.log_naming.error(
|
|
"'%s' name is %d characters long.", name, len(name)
|
|
)
|
|
BenchmarkDoctor.log_naming.info(
|
|
"Benchmark name should not be longer than 40 characters."
|
|
)
|
|
|
|
@staticmethod
|
|
def _select(measurements, num_iters=None, opt_level="O"):
|
|
prefix = measurements["name"] + " " + opt_level
|
|
prefix += "" if num_iters is None else (" i" + str(num_iters))
|
|
return [
|
|
series for name, series in measurements.items() if name.startswith(prefix)
|
|
]
|
|
|
|
@staticmethod
|
|
def _optimized_runtime_in_range(measurements):
|
|
name = measurements["name"]
|
|
setup, ratio = BenchmarkDoctor._setup_overhead(measurements)
|
|
setup = 0 if ratio < 0.05 else setup
|
|
|
|
runtimes = []
|
|
for i in range(1, 3):
|
|
correction = setup / i
|
|
i_series = BenchmarkDoctor._select(measurements, num_iters=i)
|
|
for result in i_series:
|
|
runtimes.append(result.min_value - correction)
|
|
runtime = min(runtimes)
|
|
|
|
threshold = 1000
|
|
if threshold < runtime:
|
|
log = (
|
|
BenchmarkDoctor.log_runtime.warning
|
|
if runtime < 10000
|
|
else BenchmarkDoctor.log_runtime.error
|
|
)
|
|
caveat = "" if setup == 0 else " (excluding the setup overhead)"
|
|
log("'%s' execution took at least %d μs%s.", name, runtime, caveat)
|
|
|
|
def factor(base): # suitable divisor that's integer power of base
|
|
return int(
|
|
pow(base, math.ceil(math.log(runtime / float(threshold), base)))
|
|
)
|
|
|
|
BenchmarkDoctor.log_runtime.info(
|
|
"Decrease the workload of '%s' by a factor of %d (%d), to be "
|
|
"less than %d μs.",
|
|
name,
|
|
factor(2),
|
|
factor(10),
|
|
threshold,
|
|
)
|
|
|
|
threshold = 20
|
|
if runtime < threshold:
|
|
log = (
|
|
BenchmarkDoctor.log_runtime.error
|
|
if runtime == 0
|
|
else BenchmarkDoctor.log_runtime.warning
|
|
)
|
|
log("'%s' execution took %d μs.", name, runtime)
|
|
|
|
BenchmarkDoctor.log_runtime.info(
|
|
"Ensure the workload of '%s' has a properly measurable size"
|
|
" (runtime > %d μs) and is not eliminated by the compiler (use"
|
|
" `blackHole` function if necessary)."
|
|
if runtime == 0
|
|
else "Increase the workload of '%s' to be more than %d μs.",
|
|
name,
|
|
threshold,
|
|
)
|
|
|
|
@staticmethod
|
|
def _setup_overhead(measurements):
|
|
select = BenchmarkDoctor._select
|
|
ti1, ti2 = [
|
|
float(min(mins))
|
|
for mins in [
|
|
[result.min_value for result in i_series]
|
|
for i_series in [select(measurements, num_iters=i) for i in [1, 2]]
|
|
]
|
|
]
|
|
setup = int(round(2.0 * (ti1 - ti2))) if ti2 > 20 else 0 # limit of accuracy
|
|
ratio = (setup / ti1) if ti1 > 0 else 0
|
|
return (setup, ratio)
|
|
|
|
@staticmethod
|
|
def _no_setup_overhead(measurements):
|
|
setup, ratio = BenchmarkDoctor._setup_overhead(measurements)
|
|
if ratio > 0.05:
|
|
BenchmarkDoctor.log_runtime.error(
|
|
"'%s' has setup overhead of %d μs (%.1f%%).",
|
|
measurements["name"],
|
|
setup,
|
|
round((100 * ratio), 1),
|
|
)
|
|
BenchmarkDoctor.log_runtime.info(
|
|
"Move initialization of benchmark data to the `setUpFunction` "
|
|
"registered in `BenchmarkInfo`."
|
|
)
|
|
|
|
@staticmethod
|
|
def _reasonable_setup_time(measurements):
|
|
setup = min(
|
|
[result.setup or 0 for result in BenchmarkDoctor._select(measurements)]
|
|
)
|
|
if 200000 < setup: # 200 ms
|
|
BenchmarkDoctor.log_runtime.error(
|
|
"'%s' setup took at least %d μs.", measurements["name"], setup
|
|
)
|
|
BenchmarkDoctor.log_runtime.info(
|
|
"The `setUpFunction` should take no more than 200 ms."
|
|
)
|
|
|
|
@staticmethod
|
|
def _constant_memory_use(measurements):
|
|
select = BenchmarkDoctor._select
|
|
name = measurements["name"]
|
|
|
|
memory_uses = [
|
|
[r.mem_pages for r in i_series if r.mem_pages is not None]
|
|
for i_series in [select(measurements, num_iters=i) for i in [1, 2]]
|
|
]
|
|
memory_uses = [m for m in memory_uses if m]
|
|
if not memory_uses:
|
|
BenchmarkDoctor.log_memory.info(
|
|
"unable to compute memory footprint of '%s'",
|
|
name,
|
|
)
|
|
return
|
|
|
|
(min_i1, max_i1), (min_i2, max_i2) = [
|
|
(min(memory_use), max(memory_use))
|
|
for memory_use in memory_uses
|
|
]
|
|
range_i1, range_i2 = max_i1 - min_i1, max_i2 - min_i2
|
|
normal_range = 15 # pages
|
|
more_info = False
|
|
|
|
if abs(min_i1 - min_i2) > max(range_i1, range_i2, normal_range):
|
|
more_info = True
|
|
BenchmarkDoctor.log_memory.error(
|
|
"'%s' varies the memory footprint of the base "
|
|
"workload depending on the `num-iters`.",
|
|
name,
|
|
)
|
|
|
|
if max(range_i1, range_i2) > normal_range:
|
|
more_info = True
|
|
BenchmarkDoctor.log_memory.warning(
|
|
"'%s' has very wide range of memory used between "
|
|
"independent, repeated measurements.",
|
|
name,
|
|
)
|
|
|
|
if more_info:
|
|
BenchmarkDoctor.log_memory.info(
|
|
"'%s' mem_pages [i1, i2]: min=[%d, %d] 𝚫=%d R=[%d, %d]",
|
|
name,
|
|
*[min_i1, min_i2, abs(min_i1 - min_i2), range_i1, range_i2]
|
|
)
|
|
|
|
@staticmethod
|
|
def _adjusted_1s_samples(runtime):
|
|
u"""Return sample count that can be taken in approximately 1 second.
|
|
|
|
Based on the runtime (μs) of one sample taken with num-iters=1.
|
|
"""
|
|
if runtime == 0:
|
|
return 2
|
|
s = 1000000 / float(runtime) # samples for 1s run
|
|
s = int(pow(2, round(math.log(s, 2)))) # rounding to power of 2
|
|
return s if s > 2 else 2 # always take at least 2 samples
|
|
|
|
def measure(self, benchmark):
|
|
"""Measure benchmark with varying iterations and optimization levels.
|
|
|
|
Returns a dictionary with benchmark name and `PerformanceTestResult`s.
|
|
"""
|
|
self.log.debug("Calibrating num-samples for {0}:".format(benchmark))
|
|
r = self.driver.run(
|
|
benchmark, num_samples=3, num_iters=1, verbose=True
|
|
) # calibrate
|
|
num_samples = self._adjusted_1s_samples(r.min_value)
|
|
|
|
def capped(s):
|
|
return min(s, 200)
|
|
|
|
run_args = [(capped(num_samples), 1), (capped(num_samples / 2), 2)]
|
|
opts = self.driver.args.optimization
|
|
opts = opts if isinstance(opts, list) else [opts]
|
|
self.log.debug(
|
|
"Runtime {0} μs yields {1} adjusted samples per second.".format(
|
|
r.min_value, num_samples
|
|
)
|
|
)
|
|
self.log.debug(
|
|
"Measuring {0}, 5 x i1 ({1} samples), 5 x i2 ({2} samples)".format(
|
|
benchmark, run_args[0][0], run_args[1][0]
|
|
)
|
|
)
|
|
|
|
measurements = dict(
|
|
[
|
|
(
|
|
"{0} {1} i{2}{3}".format(benchmark, o, i, suffix),
|
|
self.driver.run(
|
|
benchmark,
|
|
num_samples=s,
|
|
num_iters=i,
|
|
verbose=True,
|
|
measure_memory=True,
|
|
),
|
|
)
|
|
for o in opts
|
|
for s, i in run_args
|
|
for suffix in list("abcde")
|
|
]
|
|
)
|
|
measurements["name"] = benchmark
|
|
return measurements
|
|
|
|
def analyze(self, benchmark_measurements):
|
|
"""Analyze whether benchmark fulfills all requirements."""
|
|
self.log.debug("Analyzing %s", benchmark_measurements["name"])
|
|
for rule in self.requirements:
|
|
rule(benchmark_measurements)
|
|
|
|
def check(self):
|
|
"""Measure and analyse all enabled tests."""
|
|
for test in self.driver.tests:
|
|
self.analyze(self.measure(test))
|
|
|
|
@staticmethod
|
|
def run_check(args):
|
|
"""Validate benchmarks conform to health rules, report violations."""
|
|
doctor = BenchmarkDoctor(args)
|
|
doctor.check()
|
|
# TODO non-zero error code when errors are logged
|
|
# See https://stackoverflow.com/a/31142078/41307
|
|
return 0
|
|
|
|
|
|
def format_name(log_path):
|
|
"""Return the filename and directory for a log file."""
|
|
return "/".join(log_path.split("/")[-2:])
|
|
|
|
|
|
def compare_logs(compare_script, new_log, old_log, log_dir, opt):
|
|
"""Return diff of log files at paths `new_log` and `old_log`."""
|
|
print("Comparing %s %s ..." % (format_name(old_log), format_name(new_log)))
|
|
subprocess.call(
|
|
[
|
|
compare_script,
|
|
"--old-file",
|
|
old_log,
|
|
"--new-file",
|
|
new_log,
|
|
"--format",
|
|
"markdown",
|
|
"--output",
|
|
os.path.join(log_dir, "latest_compare_{0}.md".format(opt)),
|
|
]
|
|
)
|
|
|
|
|
|
def compare(args):
|
|
log_dir = args.log_dir
|
|
compare_script = args.compare_script
|
|
baseline_branch = args.baseline_branch
|
|
current_branch = BenchmarkDriver(args, tests=[""])._git(
|
|
"rev-parse --abbrev-ref HEAD"
|
|
)
|
|
current_branch_dir = os.path.join(log_dir, current_branch)
|
|
baseline_branch_dir = os.path.join(log_dir, baseline_branch)
|
|
|
|
if current_branch != baseline_branch and not os.path.isdir(baseline_branch_dir):
|
|
print(
|
|
(
|
|
"Unable to find benchmark logs for {baseline_branch} branch. "
|
|
+ "Set a baseline benchmark log by passing --benchmark to "
|
|
+ "build-script while on {baseline_branch} branch."
|
|
).format(baseline_branch=baseline_branch)
|
|
)
|
|
return 1
|
|
|
|
recent_logs = {}
|
|
for branch_dir in [current_branch_dir, baseline_branch_dir]:
|
|
for opt in ["O", "Onone"]:
|
|
recent_logs[os.path.basename(branch_dir) + "_" + opt] = sorted(
|
|
glob.glob(os.path.join(branch_dir, "Benchmark_" + opt + "-*.log")),
|
|
key=os.path.getctime,
|
|
reverse=True,
|
|
)
|
|
|
|
if current_branch == baseline_branch:
|
|
if (
|
|
len(recent_logs[baseline_branch + "_O"]) > 1
|
|
and len(recent_logs[baseline_branch + "_Onone"]) > 1
|
|
):
|
|
compare_logs(
|
|
compare_script,
|
|
recent_logs[baseline_branch + "_O"][0],
|
|
recent_logs[baseline_branch + "_O"][1],
|
|
log_dir,
|
|
"O",
|
|
)
|
|
compare_logs(
|
|
compare_script,
|
|
recent_logs[baseline_branch + "_Onone"][0],
|
|
recent_logs[baseline_branch + "_Onone"][1],
|
|
log_dir,
|
|
"Onone",
|
|
)
|
|
else:
|
|
print(
|
|
(
|
|
"{baseline_branch}/{baseline_branch} comparison "
|
|
+ "skipped: no previous {baseline_branch} logs"
|
|
).format(baseline_branch=baseline_branch)
|
|
)
|
|
else:
|
|
# TODO: Check for outdated baseline branch log
|
|
if (
|
|
len(recent_logs[current_branch + "_O"]) == 0
|
|
or len(recent_logs[current_branch + "_Onone"]) == 0
|
|
):
|
|
print("branch sanity failure: missing branch logs")
|
|
return 1
|
|
|
|
if (
|
|
len(recent_logs[current_branch + "_O"]) == 1
|
|
or len(recent_logs[current_branch + "_Onone"]) == 1
|
|
):
|
|
print("branch/branch comparison skipped: no previous branch logs")
|
|
else:
|
|
compare_logs(
|
|
compare_script,
|
|
recent_logs[current_branch + "_O"][0],
|
|
recent_logs[current_branch + "_O"][1],
|
|
log_dir,
|
|
"O",
|
|
)
|
|
compare_logs(
|
|
compare_script,
|
|
recent_logs[current_branch + "_Onone"][0],
|
|
recent_logs[current_branch + "_Onone"][1],
|
|
log_dir,
|
|
"Onone",
|
|
)
|
|
|
|
if (
|
|
len(recent_logs[baseline_branch + "_O"]) == 0
|
|
or len(recent_logs[baseline_branch + "_Onone"]) == 0
|
|
):
|
|
print(
|
|
(
|
|
"branch/{baseline_branch} failure: no {baseline_branch} " + "logs"
|
|
).format(baseline_branch=baseline_branch)
|
|
)
|
|
return 1
|
|
else:
|
|
compare_logs(
|
|
compare_script,
|
|
recent_logs[current_branch + "_O"][0],
|
|
recent_logs[baseline_branch + "_O"][0],
|
|
log_dir,
|
|
"O",
|
|
)
|
|
compare_logs(
|
|
compare_script,
|
|
recent_logs[current_branch + "_Onone"][0],
|
|
recent_logs[baseline_branch + "_Onone"][0],
|
|
log_dir,
|
|
"Onone",
|
|
)
|
|
|
|
# TODO: Fail on large regressions
|
|
|
|
return 0
|
|
|
|
|
|
def positive_int(value):
|
|
"""Verify the value is a positive integer."""
|
|
ivalue = int(value)
|
|
if not (ivalue > 0):
|
|
raise ValueError
|
|
return ivalue
|
|
|
|
|
|
def parse_args(args):
|
|
"""Parse command line arguments and set default values."""
|
|
parser = argparse.ArgumentParser(
|
|
epilog="Example: ./Benchmark_Driver run -i 5 -f Prefix -f .*Suffix.*"
|
|
)
|
|
subparsers = parser.add_subparsers(
|
|
title="Swift benchmark driver commands",
|
|
help="See COMMAND -h for additional arguments",
|
|
metavar="COMMAND",
|
|
)
|
|
subparsers.required = True
|
|
|
|
shared_benchmarks_parser = argparse.ArgumentParser(add_help=False)
|
|
benchmarks_group = shared_benchmarks_parser.add_mutually_exclusive_group()
|
|
benchmarks_group.add_argument(
|
|
"benchmarks",
|
|
default=[],
|
|
help="benchmark to run (default: all)",
|
|
nargs="*",
|
|
metavar="BENCHMARK",
|
|
)
|
|
benchmarks_group.add_argument(
|
|
"-f",
|
|
"--filter",
|
|
dest="filters",
|
|
action="append",
|
|
help="run all tests whose name match regular expression PATTERN, "
|
|
+ "multiple filters are supported",
|
|
metavar="PATTERN",
|
|
)
|
|
shared_benchmarks_parser.add_argument(
|
|
"-t",
|
|
"--tests",
|
|
help="directory containing Benchmark_O{,none,size} " + "(default: DRIVER_DIR)",
|
|
default=DRIVER_DIR,
|
|
)
|
|
shared_benchmarks_parser.add_argument(
|
|
"-o",
|
|
"--optimization",
|
|
metavar="OPT",
|
|
choices=["O", "Onone", "Osize"],
|
|
help="optimization level to use: {O,Onone,Osize}, (default: O)",
|
|
default="O",
|
|
)
|
|
shared_benchmarks_parser.add_argument(
|
|
"--architecture",
|
|
metavar="architecture",
|
|
help="current architecture (e.g., x86_64, arm64, etc)",
|
|
default=None,
|
|
)
|
|
|
|
run_parser = subparsers.add_parser(
|
|
"run",
|
|
help="Run benchmarks and output results to stdout",
|
|
parents=[shared_benchmarks_parser],
|
|
)
|
|
run_parser.add_argument(
|
|
"-i",
|
|
"--independent-samples",
|
|
help="number of times to run each test (default: 1)",
|
|
type=positive_int,
|
|
default=1,
|
|
)
|
|
run_parser.add_argument(
|
|
"--output-dir", help="log results to directory (default: no logging)"
|
|
)
|
|
run_parser.add_argument(
|
|
"--swift-repo", help="absolute path to the Swift source repository"
|
|
)
|
|
run_parser.set_defaults(func=BenchmarkDriver.run_benchmarks)
|
|
|
|
check_parser = subparsers.add_parser(
|
|
"check", help="", parents=[shared_benchmarks_parser]
|
|
)
|
|
check_group = check_parser.add_mutually_exclusive_group()
|
|
check_group.add_argument(
|
|
"-v",
|
|
"--verbose",
|
|
action="store_true",
|
|
help="show more details during benchmark analysis",
|
|
)
|
|
check_group.add_argument(
|
|
"-md", "--markdown", action="store_true", help="format report as Markdown table"
|
|
)
|
|
check_parser.set_defaults(func=BenchmarkDoctor.run_check)
|
|
|
|
compare_parser = subparsers.add_parser("compare", help="Compare benchmark results")
|
|
compare_parser.add_argument(
|
|
"--log-dir", required=True, help="directory containing benchmark logs"
|
|
)
|
|
compare_parser.add_argument(
|
|
"--swift-repo",
|
|
required=True,
|
|
help="absolute path to the Swift source repository",
|
|
)
|
|
compare_parser.add_argument(
|
|
"--compare-script", required=True, help="absolute path to compare script"
|
|
)
|
|
compare_parser.add_argument(
|
|
"--baseline-branch",
|
|
default="main",
|
|
help="attempt to compare results to baseline results for specified "
|
|
"branch (default: main)",
|
|
)
|
|
compare_parser.set_defaults(func=compare)
|
|
|
|
return parser.parse_args(args)
|
|
|
|
|
|
def main():
|
|
"""Parse command line arguments and execute the specified COMMAND."""
|
|
args = parse_args(sys.argv[1:])
|
|
return args.func(args)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
exit(main())
|