mirror of
https://github.com/apple/swift.git
synced 2025-12-14 20:36:38 +01:00
Merge pull request #61559 from tbkka/tbkka-benchmarking
Overhaul Benchmarking pipeline to use complete sample data, not summaries
This commit is contained in:
@@ -88,9 +88,10 @@ class BenchmarkDriver(object):
|
|||||||
def test_harness(self):
|
def test_harness(self):
|
||||||
"""Full path to test harness binary."""
|
"""Full path to test harness binary."""
|
||||||
suffix = self.args.optimization if hasattr(self.args, "optimization") else "O"
|
suffix = self.args.optimization if hasattr(self.args, "optimization") else "O"
|
||||||
|
suffix += "-"
|
||||||
if hasattr(self.args, "architecture") and self.args.architecture:
|
if hasattr(self.args, "architecture") and self.args.architecture:
|
||||||
suffix += "-" + self.args.architecture + "*"
|
suffix += self.args.architecture
|
||||||
pattern = os.path.join(self.args.tests, "Benchmark_" + suffix)
|
pattern = os.path.join(self.args.tests, "Benchmark_" + suffix + "*")
|
||||||
executables = []
|
executables = []
|
||||||
if hasattr(self._subprocess, "test_mode") and self._subprocess.test_mode:
|
if hasattr(self._subprocess, "test_mode") and self._subprocess.test_mode:
|
||||||
executables = [pattern]
|
executables = [pattern]
|
||||||
@@ -134,22 +135,32 @@ class BenchmarkDriver(object):
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def _cmd_list_benchmarks(self):
|
def _cmd_list_benchmarks(self):
|
||||||
# Use tab delimiter for easier parsing to override the default comma.
|
# TODO: Switch to JSON format: add "--json" here
|
||||||
# (The third 'column' is always comma-separated list of tags in square
|
return [self.test_harness, "--list"] + (
|
||||||
# brackets -- currently unused here.)
|
|
||||||
return [self.test_harness, "--list", "--delim=\t"] + (
|
|
||||||
["--skip-tags="] if (self.args.benchmarks or self.args.filters) else []
|
["--skip-tags="] if (self.args.benchmarks or self.args.filters) else []
|
||||||
)
|
)
|
||||||
|
|
||||||
def _get_tests(self):
|
def _get_tests(self):
|
||||||
"""Return a list of performance tests to run."""
|
"""Return a list of performance tests to run."""
|
||||||
number_name_pairs = [
|
lines = self._invoke(self._cmd_list_benchmarks).split("\n")
|
||||||
line.split("\t")[:2]
|
json_tests = []
|
||||||
for line in self._invoke(self._cmd_list_benchmarks).split("\n")[1:-1]
|
for line in lines:
|
||||||
]
|
columns = re.split(r'[ ,]+', line.strip())
|
||||||
# unzip list of pairs into 2 lists
|
try:
|
||||||
test_numbers, self.all_tests = map(list, zip(*number_name_pairs))
|
number = int(columns[0])
|
||||||
self.test_number = dict(zip(self.all_tests, test_numbers))
|
name = columns[1]
|
||||||
|
json_descr = {"number": number, "name": name}
|
||||||
|
json_tests.append(json_descr)
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
# TODO: Replace the above with the following to
|
||||||
|
# use the JSON output from the benchmark driver
|
||||||
|
# directly
|
||||||
|
# if line.strip() != "":
|
||||||
|
# json_tests.append(json.loads(line))
|
||||||
|
self.all_tests = [json["name"] for json in json_tests]
|
||||||
|
test_numbers = [json["number"] for json in json_tests]
|
||||||
|
self.test_number = dict([(json["name"], json["number"]) for json in json_tests])
|
||||||
if self.args.filters:
|
if self.args.filters:
|
||||||
return self._tests_matching_patterns()
|
return self._tests_matching_patterns()
|
||||||
if self.args.benchmarks:
|
if self.args.benchmarks:
|
||||||
@@ -157,25 +168,19 @@ class BenchmarkDriver(object):
|
|||||||
return self.all_tests
|
return self.all_tests
|
||||||
|
|
||||||
def _tests_matching_patterns(self):
|
def _tests_matching_patterns(self):
|
||||||
regexes = [re.compile(pattern) for pattern in self.args.filters]
|
matches = set()
|
||||||
return sorted(
|
for fil in self.args.filters:
|
||||||
list(
|
pattern = re.compile(fil)
|
||||||
set(
|
new_matches = filter(pattern.match, self.all_tests)
|
||||||
[
|
matches = matches.union(new_matches)
|
||||||
name
|
return sorted(list(matches))
|
||||||
for pattern in regexes
|
|
||||||
for name in self.all_tests
|
|
||||||
if pattern.match(name)
|
|
||||||
]
|
|
||||||
)
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
def _tests_by_name_or_number(self, test_numbers):
|
def _tests_by_name_or_number(self, test_numbers):
|
||||||
benchmarks = set(self.args.benchmarks)
|
benchmarks = set(self.args.benchmarks)
|
||||||
number_to_name = dict(zip(test_numbers, self.all_tests))
|
numbers = list(map(str, test_numbers))
|
||||||
|
number_to_name = dict(zip(numbers, self.all_tests))
|
||||||
tests_by_number = [
|
tests_by_number = [
|
||||||
number_to_name[i] for i in benchmarks.intersection(set(test_numbers))
|
number_to_name[i] for i in benchmarks.intersection(numbers)
|
||||||
]
|
]
|
||||||
return sorted(
|
return sorted(
|
||||||
list(benchmarks.intersection(set(self.all_tests)).union(tests_by_number))
|
list(benchmarks.intersection(set(self.all_tests)).union(tests_by_number))
|
||||||
@@ -188,8 +193,7 @@ class BenchmarkDriver(object):
|
|||||||
num_iters=None,
|
num_iters=None,
|
||||||
sample_time=None,
|
sample_time=None,
|
||||||
verbose=None,
|
verbose=None,
|
||||||
measure_memory=False,
|
measure_memory=False
|
||||||
quantile=None,
|
|
||||||
):
|
):
|
||||||
"""Execute benchmark and gather results."""
|
"""Execute benchmark and gather results."""
|
||||||
num_samples = num_samples or 0
|
num_samples = num_samples or 0
|
||||||
@@ -197,11 +201,14 @@ class BenchmarkDriver(object):
|
|||||||
sample_time = sample_time or 0 # default is 1s
|
sample_time = sample_time or 0 # default is 1s
|
||||||
|
|
||||||
cmd = self._cmd_run(
|
cmd = self._cmd_run(
|
||||||
test, num_samples, num_iters, sample_time, verbose, measure_memory, quantile
|
test, num_samples, num_iters, sample_time, verbose, measure_memory
|
||||||
)
|
)
|
||||||
output = self._invoke(cmd)
|
output = self._invoke(cmd)
|
||||||
results = self.parser.results_from_string(output)
|
results = self.parser.results_from_string(output)
|
||||||
return list(results.items())[0][1] if test else results
|
if test:
|
||||||
|
return list(results.items())[0][1]
|
||||||
|
else:
|
||||||
|
return results
|
||||||
|
|
||||||
def _cmd_run(
|
def _cmd_run(
|
||||||
self,
|
self,
|
||||||
@@ -210,14 +217,13 @@ class BenchmarkDriver(object):
|
|||||||
num_iters,
|
num_iters,
|
||||||
sample_time,
|
sample_time,
|
||||||
verbose,
|
verbose,
|
||||||
measure_memory,
|
measure_memory
|
||||||
quantile,
|
|
||||||
):
|
):
|
||||||
cmd = [self.test_harness]
|
cmd = [self.test_harness]
|
||||||
if test:
|
if test:
|
||||||
cmd.append(test)
|
cmd.append(test)
|
||||||
else:
|
else:
|
||||||
cmd.extend([self.test_number.get(name, name) for name in self.tests])
|
cmd.extend([str(self.test_number.get(name, name)) for name in self.tests])
|
||||||
if num_samples > 0:
|
if num_samples > 0:
|
||||||
cmd.append("--num-samples={0}".format(num_samples))
|
cmd.append("--num-samples={0}".format(num_samples))
|
||||||
if num_iters > 0:
|
if num_iters > 0:
|
||||||
@@ -228,9 +234,8 @@ class BenchmarkDriver(object):
|
|||||||
cmd.append("--verbose")
|
cmd.append("--verbose")
|
||||||
if measure_memory:
|
if measure_memory:
|
||||||
cmd.append("--memory")
|
cmd.append("--memory")
|
||||||
if quantile:
|
# TODO: Uncomment this as soon as the new Benchmark Swift logic is available everywhere
|
||||||
cmd.append("--quantile={0}".format(quantile))
|
# cmd.append("--json")
|
||||||
cmd.append("--delta")
|
|
||||||
return cmd
|
return cmd
|
||||||
|
|
||||||
def run_independent_samples(self, test):
|
def run_independent_samples(self, test):
|
||||||
@@ -246,12 +251,12 @@ class BenchmarkDriver(object):
|
|||||||
return functools.reduce(
|
return functools.reduce(
|
||||||
merge_results,
|
merge_results,
|
||||||
[
|
[
|
||||||
self.run(test, measure_memory=True, num_iters=1, quantile=20)
|
self.run(test, measure_memory=True, num_iters=1)
|
||||||
for _ in range(self.args.independent_samples)
|
for _ in range(self.args.independent_samples)
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
def log_results(self, output, log_file=None):
|
def log_results(self, results, log_file=None):
|
||||||
"""Log output to `log_file`.
|
"""Log output to `log_file`.
|
||||||
|
|
||||||
Creates `args.output_dir` if it doesn't exist yet.
|
Creates `args.output_dir` if it doesn't exist yet.
|
||||||
@@ -262,7 +267,8 @@ class BenchmarkDriver(object):
|
|||||||
os.makedirs(dir)
|
os.makedirs(dir)
|
||||||
print("Logging results to: %s" % log_file)
|
print("Logging results to: %s" % log_file)
|
||||||
with open(log_file, "w") as f:
|
with open(log_file, "w") as f:
|
||||||
f.write(output)
|
for r in results:
|
||||||
|
print(r, file=f)
|
||||||
|
|
||||||
RESULT = "{:>3} {:<40} {:>7} {:>7} {:>6} {:>10} {:>6} {:>7} {:>10}"
|
RESULT = "{:>3} {:<40} {:>7} {:>7} {:>6} {:>10} {:>6} {:>7} {:>10}"
|
||||||
|
|
||||||
@@ -284,7 +290,7 @@ class BenchmarkDriver(object):
|
|||||||
def console_log(values):
|
def console_log(values):
|
||||||
print(format(values))
|
print(format(values))
|
||||||
|
|
||||||
def result_values(r):
|
def summary(r):
|
||||||
return list(
|
return list(
|
||||||
map(
|
map(
|
||||||
str,
|
str,
|
||||||
@@ -292,17 +298,17 @@ class BenchmarkDriver(object):
|
|||||||
r.test_num,
|
r.test_num,
|
||||||
r.name,
|
r.name,
|
||||||
r.num_samples,
|
r.num_samples,
|
||||||
r.min,
|
r.min_value,
|
||||||
r.samples.q1,
|
r.q1,
|
||||||
r.median,
|
r.median,
|
||||||
r.samples.q3,
|
r.q3,
|
||||||
r.max,
|
r.max_value,
|
||||||
r.max_rss,
|
r.max_rss,
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
header = [
|
summary_header = [
|
||||||
"#",
|
"#",
|
||||||
"TEST",
|
"TEST",
|
||||||
"SAMPLES",
|
"SAMPLES",
|
||||||
@@ -313,25 +319,23 @@ class BenchmarkDriver(object):
|
|||||||
"MAX(μs)",
|
"MAX(μs)",
|
||||||
"MAX_RSS(B)",
|
"MAX_RSS(B)",
|
||||||
]
|
]
|
||||||
console_log(header)
|
console_log(summary_header)
|
||||||
results = [header]
|
results = []
|
||||||
for test in self.tests:
|
for test in self.tests:
|
||||||
result = result_values(self.run_independent_samples(test))
|
result = self.run_independent_samples(test)
|
||||||
console_log(result)
|
console_log(summary(result))
|
||||||
results.append(result)
|
results.append(result)
|
||||||
|
|
||||||
print("\nTotal performance tests executed: {0}".format(len(self.tests)))
|
print("\nTotal performance tests executed: {0}".format(len(self.tests)))
|
||||||
return (
|
return results
|
||||||
None if csv_console else ("\n".join([",".join(r) for r in results]) + "\n")
|
|
||||||
) # csv_log
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def run_benchmarks(args):
|
def run_benchmarks(args):
|
||||||
"""Run benchmarks and log results."""
|
"""Run benchmarks and log results."""
|
||||||
driver = BenchmarkDriver(args)
|
driver = BenchmarkDriver(args)
|
||||||
csv_log = driver.run_and_log(csv_console=(args.output_dir is None))
|
results = driver.run_and_log(csv_console=(args.output_dir is None))
|
||||||
if csv_log:
|
if args.output_dir:
|
||||||
driver.log_results(csv_log)
|
driver.log_results([r.json for r in results])
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
@@ -445,7 +449,6 @@ class BenchmarkDoctor(object):
|
|||||||
Optional `driver` parameter for injecting dependency; used for testing.
|
Optional `driver` parameter for injecting dependency; used for testing.
|
||||||
"""
|
"""
|
||||||
super(BenchmarkDoctor, self).__init__()
|
super(BenchmarkDoctor, self).__init__()
|
||||||
self.driver = driver or BenchmarkDriver(args)
|
|
||||||
self.results = {}
|
self.results = {}
|
||||||
|
|
||||||
if hasattr(args, "markdown") and args.markdown:
|
if hasattr(args, "markdown") and args.markdown:
|
||||||
@@ -458,6 +461,7 @@ class BenchmarkDoctor(object):
|
|||||||
self.console_handler.setLevel(
|
self.console_handler.setLevel(
|
||||||
logging.DEBUG if args.verbose else logging.INFO
|
logging.DEBUG if args.verbose else logging.INFO
|
||||||
)
|
)
|
||||||
|
self.driver = driver or BenchmarkDriver(args)
|
||||||
self.log.addHandler(self.console_handler)
|
self.log.addHandler(self.console_handler)
|
||||||
self.log.debug("Checking tests: %s", ", ".join(self.driver.tests))
|
self.log.debug("Checking tests: %s", ", ".join(self.driver.tests))
|
||||||
self.requirements = [
|
self.requirements = [
|
||||||
@@ -532,7 +536,7 @@ class BenchmarkDoctor(object):
|
|||||||
correction = setup / i
|
correction = setup / i
|
||||||
i_series = BenchmarkDoctor._select(measurements, num_iters=i)
|
i_series = BenchmarkDoctor._select(measurements, num_iters=i)
|
||||||
for result in i_series:
|
for result in i_series:
|
||||||
runtimes.append(result.samples.min - correction)
|
runtimes.append(result.min_value - correction)
|
||||||
runtime = min(runtimes)
|
runtime = min(runtimes)
|
||||||
|
|
||||||
threshold = 1000
|
threshold = 1000
|
||||||
@@ -584,7 +588,7 @@ class BenchmarkDoctor(object):
|
|||||||
ti1, ti2 = [
|
ti1, ti2 = [
|
||||||
float(min(mins))
|
float(min(mins))
|
||||||
for mins in [
|
for mins in [
|
||||||
[result.samples.min for result in i_series]
|
[result.min_value for result in i_series]
|
||||||
for i_series in [select(measurements, num_iters=i) for i in [1, 2]]
|
for i_series in [select(measurements, num_iters=i) for i in [1, 2]]
|
||||||
]
|
]
|
||||||
]
|
]
|
||||||
@@ -679,7 +683,7 @@ class BenchmarkDoctor(object):
|
|||||||
r = self.driver.run(
|
r = self.driver.run(
|
||||||
benchmark, num_samples=3, num_iters=1, verbose=True
|
benchmark, num_samples=3, num_iters=1, verbose=True
|
||||||
) # calibrate
|
) # calibrate
|
||||||
num_samples = self._adjusted_1s_samples(r.samples.min)
|
num_samples = self._adjusted_1s_samples(r.min_value)
|
||||||
|
|
||||||
def capped(s):
|
def capped(s):
|
||||||
return min(s, 200)
|
return min(s, 200)
|
||||||
@@ -689,7 +693,7 @@ class BenchmarkDoctor(object):
|
|||||||
opts = opts if isinstance(opts, list) else [opts]
|
opts = opts if isinstance(opts, list) else [opts]
|
||||||
self.log.debug(
|
self.log.debug(
|
||||||
"Runtime {0} μs yields {1} adjusted samples per second.".format(
|
"Runtime {0} μs yields {1} adjusted samples per second.".format(
|
||||||
r.samples.min, num_samples
|
r.min_value, num_samples
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
self.log.debug(
|
self.log.debug(
|
||||||
|
|||||||
@@ -17,9 +17,7 @@ This script compares performance test logs and issues a formatted report.
|
|||||||
|
|
||||||
Invoke `$ compare_perf_tests.py -h ` for complete list of options.
|
Invoke `$ compare_perf_tests.py -h ` for complete list of options.
|
||||||
|
|
||||||
class `Sample` is single benchmark measurement.
|
class `PerformanceTestResult` collects information about a single test
|
||||||
class `PerformanceTestSamples` is collection of `Sample`s and their statistics.
|
|
||||||
class `PerformanceTestResult` is a summary of performance test execution.
|
|
||||||
class `LogParser` converts log files into `PerformanceTestResult`s.
|
class `LogParser` converts log files into `PerformanceTestResult`s.
|
||||||
class `ResultComparison` compares new and old `PerformanceTestResult`s.
|
class `ResultComparison` compares new and old `PerformanceTestResult`s.
|
||||||
class `TestComparator` analyzes changes between the old and new test results.
|
class `TestComparator` analyzes changes between the old and new test results.
|
||||||
@@ -29,194 +27,10 @@ class `ReportFormatter` creates the test comparison report in specified format.
|
|||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import functools
|
import functools
|
||||||
|
import json
|
||||||
import re
|
import re
|
||||||
|
import statistics
|
||||||
import sys
|
import sys
|
||||||
from bisect import bisect, bisect_left, bisect_right
|
|
||||||
from collections import namedtuple
|
|
||||||
from math import ceil, sqrt
|
|
||||||
|
|
||||||
|
|
||||||
class Sample(namedtuple("Sample", "i num_iters runtime")):
|
|
||||||
u"""Single benchmark measurement.
|
|
||||||
|
|
||||||
Initialized with:
|
|
||||||
`i`: ordinal number of the sample taken,
|
|
||||||
`num-num_iters`: number or iterations used to compute it,
|
|
||||||
`runtime`: in microseconds (μs).
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
"""Shorter Sample formatting for debugging purposes."""
|
|
||||||
return "s({0.i!r}, {0.num_iters!r}, {0.runtime!r})".format(self)
|
|
||||||
|
|
||||||
|
|
||||||
class Yield(namedtuple("Yield", "before_sample after")):
|
|
||||||
u"""Meta-measurement of when the Benchmark_X voluntarily yielded process.
|
|
||||||
|
|
||||||
`before_sample`: index of measurement taken just after returning from yield
|
|
||||||
`after`: time elapsed since the previous yield in microseconds (μs)
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
class PerformanceTestSamples(object):
|
|
||||||
"""Collection of runtime samples from the benchmark execution.
|
|
||||||
|
|
||||||
Computes the sample population statistics.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, name, samples=None):
|
|
||||||
"""Initialize with benchmark name and optional list of Samples."""
|
|
||||||
self.name = name # Name of the performance test
|
|
||||||
self.samples = []
|
|
||||||
self.outliers = []
|
|
||||||
self._runtimes = []
|
|
||||||
self.mean = 0.0
|
|
||||||
self.S_runtime = 0.0 # For computing running variance
|
|
||||||
for sample in samples or []:
|
|
||||||
self.add(sample)
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
"""Text summary of benchmark statistics."""
|
|
||||||
return (
|
|
||||||
"{0.name!s} n={0.count!r} "
|
|
||||||
"Min={0.min!r} Q1={0.q1!r} M={0.median!r} Q3={0.q3!r} "
|
|
||||||
"Max={0.max!r} "
|
|
||||||
"R={0.range!r} {0.spread:.2%} IQR={0.iqr!r} "
|
|
||||||
"Mean={0.mean:.0f} SD={0.sd:.0f} CV={0.cv:.2%}".format(self)
|
|
||||||
if self.samples
|
|
||||||
else "{0.name!s} n=0".format(self)
|
|
||||||
)
|
|
||||||
|
|
||||||
def add(self, sample):
|
|
||||||
"""Add sample to collection and recompute statistics."""
|
|
||||||
assert isinstance(sample, Sample)
|
|
||||||
self._update_stats(sample)
|
|
||||||
i = bisect(self._runtimes, sample.runtime)
|
|
||||||
self._runtimes.insert(i, sample.runtime)
|
|
||||||
self.samples.insert(i, sample)
|
|
||||||
|
|
||||||
def _update_stats(self, sample):
|
|
||||||
old_stats = (self.count, self.mean, self.S_runtime)
|
|
||||||
_, self.mean, self.S_runtime = self.running_mean_variance(
|
|
||||||
old_stats, sample.runtime
|
|
||||||
)
|
|
||||||
|
|
||||||
def exclude_outliers(self, top_only=False):
|
|
||||||
"""Exclude outliers by applying Interquartile Range Rule.
|
|
||||||
|
|
||||||
Moves the samples outside of the inner fences
|
|
||||||
(Q1 - 1.5*IQR and Q3 + 1.5*IQR) into outliers list and recomputes
|
|
||||||
statistics for the remaining sample population. Optionally apply
|
|
||||||
only the top inner fence, preserving the small outliers.
|
|
||||||
|
|
||||||
Experimentally, this rule seems to perform well-enough on the
|
|
||||||
benchmark runtimes in the microbenchmark range to filter out
|
|
||||||
the environment noise caused by preemptive multitasking.
|
|
||||||
"""
|
|
||||||
lo = (
|
|
||||||
0
|
|
||||||
if top_only
|
|
||||||
else bisect_left(self._runtimes, int(self.q1 - 1.5 * self.iqr))
|
|
||||||
)
|
|
||||||
hi = bisect_right(self._runtimes, int(self.q3 + 1.5 * self.iqr))
|
|
||||||
|
|
||||||
outliers = self.samples[:lo] + self.samples[hi:]
|
|
||||||
samples = self.samples[lo:hi]
|
|
||||||
|
|
||||||
self.__init__(self.name) # re-initialize
|
|
||||||
for sample in samples: # and
|
|
||||||
self.add(sample) # re-compute stats
|
|
||||||
self.outliers = outliers
|
|
||||||
|
|
||||||
@property
|
|
||||||
def count(self):
|
|
||||||
"""Number of samples used to compute the statistics."""
|
|
||||||
return len(self.samples)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def num_samples(self):
|
|
||||||
"""Number of all samples in the collection."""
|
|
||||||
return len(self.samples) + len(self.outliers)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def all_samples(self):
|
|
||||||
"""List of all samples in ascending order."""
|
|
||||||
return sorted(self.samples + self.outliers, key=lambda s: s.i or -1)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def min(self):
|
|
||||||
"""Minimum sampled value."""
|
|
||||||
return self.samples[0].runtime
|
|
||||||
|
|
||||||
@property
|
|
||||||
def max(self):
|
|
||||||
"""Maximum sampled value."""
|
|
||||||
return self.samples[-1].runtime
|
|
||||||
|
|
||||||
def quantile(self, q):
|
|
||||||
"""Return runtime for given quantile.
|
|
||||||
|
|
||||||
Equivalent to quantile estimate type R-1, SAS-3. See:
|
|
||||||
https://en.wikipedia.org/wiki/Quantile#Estimating_quantiles_from_a_sample
|
|
||||||
"""
|
|
||||||
index = max(0, int(ceil(self.count * float(q))) - 1)
|
|
||||||
return self.samples[index].runtime
|
|
||||||
|
|
||||||
@property
|
|
||||||
def median(self):
|
|
||||||
"""Median sampled value."""
|
|
||||||
return self.quantile(0.5)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def q1(self):
|
|
||||||
"""First Quartile (25th Percentile)."""
|
|
||||||
return self.quantile(0.25)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def q3(self):
|
|
||||||
"""Third Quartile (75th Percentile)."""
|
|
||||||
return self.quantile(0.75)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def iqr(self):
|
|
||||||
"""Interquartile Range."""
|
|
||||||
return self.q3 - self.q1
|
|
||||||
|
|
||||||
@property
|
|
||||||
def sd(self):
|
|
||||||
u"""Standard Deviation (μs)."""
|
|
||||||
return 0 if self.count < 2 else sqrt(self.S_runtime / (self.count - 1))
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def running_mean_variance(stats, x):
|
|
||||||
"""Compute running variance, B. P. Welford's method.
|
|
||||||
|
|
||||||
See Knuth TAOCP vol 2, 3rd edition, page 232, or
|
|
||||||
https://www.johndcook.com/blog/standard_deviation/
|
|
||||||
M is mean, Standard Deviation is defined as sqrt(S/k-1)
|
|
||||||
"""
|
|
||||||
|
|
||||||
(k, M_, S_) = stats
|
|
||||||
|
|
||||||
k = float(k + 1)
|
|
||||||
M = M_ + (x - M_) / k
|
|
||||||
S = S_ + (x - M_) * (x - M)
|
|
||||||
return (k, M, S)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def cv(self):
|
|
||||||
"""Coefficient of Variation (%)."""
|
|
||||||
return (self.sd / self.mean) if self.mean else 0
|
|
||||||
|
|
||||||
@property
|
|
||||||
def range(self):
|
|
||||||
"""Range of samples values (Max - Min)."""
|
|
||||||
return self.max - self.min
|
|
||||||
|
|
||||||
@property
|
|
||||||
def spread(self):
|
|
||||||
"""Sample Spread; i.e. Range as (%) of Min."""
|
|
||||||
return self.range / float(self.min) if self.min else 0
|
|
||||||
|
|
||||||
|
|
||||||
class PerformanceTestResult(object):
|
class PerformanceTestResult(object):
|
||||||
@@ -225,126 +39,402 @@ class PerformanceTestResult(object):
|
|||||||
Reported by the test driver (Benchmark_O, Benchmark_Onone, Benchmark_Osize
|
Reported by the test driver (Benchmark_O, Benchmark_Onone, Benchmark_Osize
|
||||||
or Benchmark_Driver).
|
or Benchmark_Driver).
|
||||||
|
|
||||||
It supports 2 log formats emitted by the test driver. Legacy format with
|
It supports log formats emitted by the test driver.
|
||||||
statistics for normal distribution (MEAN, SD):
|
|
||||||
#,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs),MAX_RSS(B)
|
|
||||||
And new quantiles format with variable number of columns:
|
|
||||||
#,TEST,SAMPLES,QMIN(μs),MEDIAN(μs),MAX(μs)
|
|
||||||
#,TEST,SAMPLES,QMIN(μs),Q1(μs),Q2(μs),Q3(μs),MAX(μs),MAX_RSS(B)
|
|
||||||
The number of columns between MIN and MAX depends on the test driver's
|
|
||||||
`--quantile`parameter. In both cases, the last column, MAX_RSS is optional.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, csv_row, quantiles=False, memory=False, delta=False, meta=False):
|
# TODO: Delete after December 2023
|
||||||
"""Initialize from a row of multiple columns with benchmark summary.
|
@classmethod
|
||||||
|
def fromOldFormat(cls, header, line):
|
||||||
The row is an iterable, such as a row provided by the CSV parser.
|
"""Original format with statistics for normal distribution (MEAN, SD):
|
||||||
|
#,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs),MAX_RSS(B),PAGES,ICS,YIELD
|
||||||
|
Note that MAX_RSS, PAGES, ICS, YIELD are all optional
|
||||||
"""
|
"""
|
||||||
self.test_num = csv_row[0] # Ordinal number of the test
|
csv_row = line.split(",") if "," in line else line.split()
|
||||||
self.name = csv_row[1] # Name of the performance test
|
labels = header.split(",") if "," in header else header.split()
|
||||||
self.num_samples = int(csv_row[2]) # Number of measurements taken
|
|
||||||
|
|
||||||
mem_index = (-1 if memory else 0) + (-3 if meta else 0)
|
# Synthesize a JSON form with the basic values:
|
||||||
if quantiles: # Variable number of columns representing quantiles
|
num_samples = int(csv_row[2])
|
||||||
runtimes = csv_row[3:mem_index] if memory or meta else csv_row[3:]
|
json_data = {
|
||||||
last_runtime_index = mem_index - 1
|
"number": int(csv_row[0]),
|
||||||
if delta:
|
"name": csv_row[1],
|
||||||
runtimes = [int(x) if x else 0 for x in runtimes]
|
"num_samples": num_samples,
|
||||||
runtimes = functools.reduce(
|
}
|
||||||
lambda l, x: l.append(l[-1] + x) or l if l else [x], # runnin
|
|
||||||
runtimes,
|
|
||||||
None,
|
|
||||||
) # total
|
|
||||||
num_values = len(runtimes)
|
|
||||||
if self.num_samples < num_values: # remove repeated samples
|
|
||||||
quantile = num_values - 1
|
|
||||||
qs = [float(i) / float(quantile) for i in range(0, num_values)]
|
|
||||||
indices = [
|
|
||||||
max(0, int(ceil(self.num_samples * float(q))) - 1) for q in qs
|
|
||||||
]
|
|
||||||
runtimes = [
|
|
||||||
runtimes[indices.index(i)] for i in range(0, self.num_samples)
|
|
||||||
]
|
|
||||||
|
|
||||||
self.samples = PerformanceTestSamples(
|
# Map remaining columns according to label
|
||||||
self.name, [Sample(None, None, int(runtime)) for runtime in runtimes]
|
field_map = [
|
||||||
)
|
("ICS", "ics"),
|
||||||
self.samples.exclude_outliers(top_only=True)
|
("MAX_RSS", "max_rss"), # Must precede "MAX"
|
||||||
sams = self.samples
|
("MAX", "max"),
|
||||||
self.min, self.max, self.median, self.mean, self.sd = (
|
("MEAN", "mean"),
|
||||||
sams.min,
|
("MEDIAN", "median"),
|
||||||
sams.max,
|
("MIN", "min"),
|
||||||
sams.median,
|
("PAGES", "pages"),
|
||||||
sams.mean,
|
("SD", "sd"),
|
||||||
sams.sd,
|
("YIELD", "yield")
|
||||||
)
|
]
|
||||||
else: # Legacy format with statistics for normal distribution.
|
for label, value in zip(labels, csv_row):
|
||||||
self.min = int(csv_row[3]) # Minimum runtime (μs)
|
for match, json_key in field_map:
|
||||||
self.max = int(csv_row[4]) # Maximum runtime (μs)
|
if match in label:
|
||||||
self.mean = float(csv_row[5]) # Mean (average) runtime (μs)
|
json_data[json_key] = float(value)
|
||||||
self.sd = float(csv_row[6]) # Standard Deviation (μs)
|
break
|
||||||
self.median = int(csv_row[7]) # Median runtime (μs)
|
|
||||||
last_runtime_index = 7
|
|
||||||
self.samples = None
|
|
||||||
|
|
||||||
self.max_rss = ( # Maximum Resident Set Size (B)
|
# Heroic: Reconstruct samples if we have enough info
|
||||||
int(csv_row[mem_index]) if (
|
# This is generally a bad idea, but sadly necessary for the
|
||||||
memory and len(csv_row) > (last_runtime_index + 1)
|
# old format that doesn't provide raw sample data.
|
||||||
) else None
|
if num_samples == 1 and "min" in json_data:
|
||||||
)
|
json_data["samples"] = [
|
||||||
|
json_data["min"]
|
||||||
|
]
|
||||||
|
elif num_samples == 2 and "min" in json_data and "max" in json_data:
|
||||||
|
json_data["samples"] = [
|
||||||
|
json_data["min"],
|
||||||
|
json_data["max"]
|
||||||
|
]
|
||||||
|
elif (num_samples == 3
|
||||||
|
and "min" in json_data
|
||||||
|
and "max" in json_data
|
||||||
|
and "median" in json_data):
|
||||||
|
json_data["samples"] = [
|
||||||
|
json_data["min"],
|
||||||
|
json_data["median"],
|
||||||
|
json_data["max"]
|
||||||
|
]
|
||||||
|
|
||||||
# Optional measurement metadata. The number of:
|
return PerformanceTestResult(json_data)
|
||||||
# memory pages used, involuntary context switches and voluntary yields
|
|
||||||
self.mem_pages, self.involuntary_cs, self.yield_count = (
|
# TODO: Delete after December 2023
|
||||||
[int(x) for x in csv_row[-3:]] if meta else (None, None, None)
|
@classmethod
|
||||||
)
|
def fromQuantileFormat(cls, header, line):
|
||||||
self.yields = None
|
"""Quantiles format with variable number of columns depending on the
|
||||||
self.setup = None
|
number of quantiles:
|
||||||
|
#,TEST,SAMPLES,QMIN(μs),MEDIAN(μs),MAX(μs)
|
||||||
|
#,TEST,SAMPLES,QMIN(μs),Q1(μs),Q2(μs),Q3(μs),MAX(μs),MAX_RSS(B)
|
||||||
|
The number of columns between QMIN and MAX depends on the test driver's
|
||||||
|
`--quantile`parameter. In both cases, the last column, MAX_RSS is optional.
|
||||||
|
|
||||||
|
Delta encoding: If a header name includes 𝚫, that column stores the
|
||||||
|
difference from the previous column. E.g, a header
|
||||||
|
"#,TEST,SAMPLES,QMIN(μs),MEDIAN(μs),𝚫MAX(μs)" indicates the final "MAX"
|
||||||
|
column must be computed by adding the value in that column to the value
|
||||||
|
of the previous "MEDIAN" column.
|
||||||
|
"""
|
||||||
|
csv_row = line.split(",") if "," in line else line.split()
|
||||||
|
labels = header.split(",")
|
||||||
|
|
||||||
|
for i in range(1, len(labels)):
|
||||||
|
if "𝚫" in labels[i] or "Δ" in labels[i]:
|
||||||
|
prev = int(csv_row[i - 1])
|
||||||
|
inc = int(csv_row[i]) if csv_row[i] != '' else 0
|
||||||
|
csv_row[i] = str(prev + inc)
|
||||||
|
|
||||||
|
# Synthesize a JSON form and then initialize from that
|
||||||
|
json_data = {
|
||||||
|
"number": int(csv_row[0]),
|
||||||
|
"name": csv_row[1],
|
||||||
|
"num_samples": int(csv_row[2]),
|
||||||
|
}
|
||||||
|
# Process optional trailing fields MAX_RSS, PAGES, ICS, YIELD
|
||||||
|
i = len(labels) - 1
|
||||||
|
while True:
|
||||||
|
if "MAX_RSS" in labels[i]:
|
||||||
|
json_data["max_rss"] = float(csv_row[i])
|
||||||
|
elif "PAGES" in labels[i]:
|
||||||
|
json_data["pages"] = float(csv_row[i])
|
||||||
|
elif "ICS" in labels[i]:
|
||||||
|
json_data["ics"] = float(csv_row[i])
|
||||||
|
elif "YIELD" in labels[i]:
|
||||||
|
json_data["yield"] = float(csv_row[i])
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
i -= 1
|
||||||
|
if i < 0:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Rest is the quantiles (includes min/max columns)
|
||||||
|
quantiles = [float(q) for q in csv_row[3:i + 1]]
|
||||||
|
|
||||||
|
# Heroic effort:
|
||||||
|
# If we have enough quantiles, we can reconstruct the samples
|
||||||
|
# This is generally a bad idea, but sadly necessary since
|
||||||
|
# the quantile format doesn't provide raw sample data.
|
||||||
|
if json_data["num_samples"] == len(quantiles):
|
||||||
|
json_data["samples"] = sorted(quantiles)
|
||||||
|
elif json_data["num_samples"] == 2:
|
||||||
|
json_data["samples"] = [quantiles[0], quantiles[-1]]
|
||||||
|
elif json_data["num_samples"] == 1:
|
||||||
|
json_data["samples"] = [quantiles[0]]
|
||||||
|
else:
|
||||||
|
json_data["quantiles"] = quantiles
|
||||||
|
if len(quantiles) > 0:
|
||||||
|
json_data["min"] = quantiles[0]
|
||||||
|
json_data["max"] = quantiles[-1]
|
||||||
|
json_data["median"] = quantiles[(len(quantiles) - 1) // 2]
|
||||||
|
|
||||||
|
return PerformanceTestResult(json_data)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def fromJSONFormat(cls, line):
|
||||||
|
"""JSON format stores a test result as a JSON object on a single line
|
||||||
|
|
||||||
|
Compared to the legacy tab-separated/comma-separated formats, this makes
|
||||||
|
it much easier to add new fields, handle optional fields, and allows us
|
||||||
|
to include the full set of samples so we can use better statistics
|
||||||
|
downstream.
|
||||||
|
|
||||||
|
The code here includes optional support for min, max,
|
||||||
|
median, mean, etc. supported by the older formats, though in practice,
|
||||||
|
you shouldn't rely on those: Just store the full samples and then
|
||||||
|
compute whatever statistics you need as required.
|
||||||
|
"""
|
||||||
|
json_data = json.loads(line)
|
||||||
|
return PerformanceTestResult(json_data)
|
||||||
|
|
||||||
|
def __init__(self, json_data):
|
||||||
|
# Ugly hack to get the old tests to run
|
||||||
|
if isinstance(json_data, str):
|
||||||
|
json_data = json.loads(json_data)
|
||||||
|
|
||||||
|
# We always have these
|
||||||
|
assert (json_data.get("number") is not None)
|
||||||
|
assert (json_data.get("name") is not None)
|
||||||
|
self.test_num = json_data["number"]
|
||||||
|
self.name = json_data["name"]
|
||||||
|
|
||||||
|
# We always have either samples or num_samples
|
||||||
|
assert (json_data.get("num_samples") is not None
|
||||||
|
or json_data.get("samples") is not None)
|
||||||
|
self.num_samples = json_data.get("num_samples") or len(json_data["samples"])
|
||||||
|
self.samples = json_data.get("samples") or []
|
||||||
|
|
||||||
|
# Everything else is optional and can be read
|
||||||
|
# out of the JSON data if needed
|
||||||
|
# See max_rss() below for an example of this.
|
||||||
|
self.json_data = dict(json_data)
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
"""Short summary for debugging purposes."""
|
return "PerformanceTestResult(" + json.dumps(self.json_data) + ")"
|
||||||
return (
|
|
||||||
"<PerformanceTestResult name:{0.name!r} "
|
|
||||||
"samples:{0.num_samples!r} min:{0.min!r} max:{0.max!r} "
|
|
||||||
"mean:{0.mean:.0f} sd:{0.sd:.0f} median:{0.median!r}>".format(self)
|
|
||||||
)
|
|
||||||
|
|
||||||
def merge(self, r):
|
def json(self):
|
||||||
|
"""Return a single-line JSON form of this result
|
||||||
|
|
||||||
|
This can be parsed back via fromJSONFormat above.
|
||||||
|
It can also represent all data stored by the older
|
||||||
|
formats, so there's no reason to not use it everywhere.
|
||||||
|
"""
|
||||||
|
data = dict(self.json_data)
|
||||||
|
|
||||||
|
# In case these got modified
|
||||||
|
data["number"] = self.test_num
|
||||||
|
data["name"] = self.name
|
||||||
|
|
||||||
|
# If we have full sample data, use that and
|
||||||
|
# drop any lingering pre-computed statistics
|
||||||
|
# (It's better for downstream consumers to just
|
||||||
|
# compute whatever statistics they need from scratch.)
|
||||||
|
|
||||||
|
# After December 2023, uncomment the next line:
|
||||||
|
# assert len(self.samples) == self.num_samples
|
||||||
|
if len(self.samples) == self.num_samples:
|
||||||
|
data["samples"] = self.samples
|
||||||
|
data.pop("num_samples", None)
|
||||||
|
# TODO: Delete min/max/mean/sd/q1/median/q3/quantiles
|
||||||
|
# after December 2023
|
||||||
|
data.pop("min", None)
|
||||||
|
data.pop("max", None)
|
||||||
|
data.pop("mean", None)
|
||||||
|
data.pop("sd", None)
|
||||||
|
data.pop("q1", None)
|
||||||
|
data.pop("median", None)
|
||||||
|
data.pop("q3", None)
|
||||||
|
data.pop("quantiles", None)
|
||||||
|
else:
|
||||||
|
# Preserve other pre-existing JSON statistics
|
||||||
|
data["num_samples"] = self.num_samples
|
||||||
|
|
||||||
|
return json.dumps(data)
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return self.json()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def setup(self):
|
||||||
|
"""TODO: Implement this
|
||||||
|
"""
|
||||||
|
return 0
|
||||||
|
|
||||||
|
@property
|
||||||
|
def max_rss(self):
|
||||||
|
"""Return max_rss if available
|
||||||
|
"""
|
||||||
|
return self.json_data.get("max_rss")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def mem_pages(self):
|
||||||
|
"""Return pages if available
|
||||||
|
"""
|
||||||
|
return self.json_data.get("pages")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def involuntary_cs(self):
|
||||||
|
"""Return involuntary context switches if available
|
||||||
|
"""
|
||||||
|
return self.json_data.get("ics")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def yield_count(self):
|
||||||
|
"""Return voluntary yield count if available
|
||||||
|
"""
|
||||||
|
return self.json_data.get("yield")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def min_value(self):
|
||||||
|
"""Return the minimum value from all samples
|
||||||
|
|
||||||
|
If we have full samples, compute it directly.
|
||||||
|
In the legacy case, we might not have full samples,
|
||||||
|
so in that case we'll return a value that was given
|
||||||
|
to us initially (if any).
|
||||||
|
|
||||||
|
Eventually (after December 2023), this can be simplified
|
||||||
|
to just `return min(self.samples)`, since by then
|
||||||
|
the legacy forms should no longer be in use.
|
||||||
|
"""
|
||||||
|
if self.num_samples == len(self.samples):
|
||||||
|
return min(self.samples)
|
||||||
|
return self.json_data.get("min")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def max_value(self):
|
||||||
|
"""Return the maximum sample value
|
||||||
|
|
||||||
|
See min_value comments for details on the legacy behavior."""
|
||||||
|
if self.num_samples == len(self.samples):
|
||||||
|
return max(self.samples)
|
||||||
|
return self.json_data.get("max")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def median(self):
|
||||||
|
"""Return the median sample value
|
||||||
|
|
||||||
|
See min_value comments for details on the legacy behavior."""
|
||||||
|
if self.num_samples == len(self.samples):
|
||||||
|
return statistics.median(self.samples)
|
||||||
|
return self.json_data.get("median")
|
||||||
|
|
||||||
|
# TODO: Eliminate q1 and q3. They're kept for now
|
||||||
|
# to preserve compatibility with older reports. But quantiles
|
||||||
|
# aren't really useful statistics, so just drop them.
|
||||||
|
@property
|
||||||
|
def q1(self):
|
||||||
|
"""Return the 25% quantile
|
||||||
|
|
||||||
|
See min_value comments for details on the legacy behavior."""
|
||||||
|
if self.num_samples == len(self.samples):
|
||||||
|
q = statistics.quantiles(self.samples, n=4)
|
||||||
|
return q[0]
|
||||||
|
return self.json_data.get("q1")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def q3(self):
|
||||||
|
"""Return the 75% quantile
|
||||||
|
|
||||||
|
See min_value comments for details on the legacy behavior."""
|
||||||
|
if self.num_samples == len(self.samples):
|
||||||
|
q = statistics.quantiles(self.samples, n=4)
|
||||||
|
return q[2]
|
||||||
|
return self.json_data.get("q3")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def mean(self):
|
||||||
|
"""Return the average
|
||||||
|
|
||||||
|
TODO: delete this; it's not useful"""
|
||||||
|
if self.num_samples == len(self.samples):
|
||||||
|
return statistics.mean(self.samples)
|
||||||
|
return self.json_data.get("mean")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def sd(self):
|
||||||
|
"""Return the standard deviation
|
||||||
|
|
||||||
|
TODO: delete this; it's not useful"""
|
||||||
|
if self.num_samples == len(self.samples):
|
||||||
|
if len(self.samples) > 1:
|
||||||
|
return statistics.stdev(self.samples)
|
||||||
|
else:
|
||||||
|
return 0
|
||||||
|
return self.json_data.get("sd")
|
||||||
|
|
||||||
|
def merge(self, other):
|
||||||
"""Merge two results.
|
"""Merge two results.
|
||||||
|
|
||||||
Recomputes min, max and mean statistics. If all `samples` are
|
This is trivial in the non-legacy case: We just
|
||||||
available, it recomputes all the statistics.
|
pool all the samples.
|
||||||
The use case here is comparing test results parsed from concatenated
|
|
||||||
log files from multiple runs of benchmark driver.
|
In the legacy case (or the mixed legacy/non-legacy cases),
|
||||||
|
we try to estimate the min/max/mean/sd/median/etc based
|
||||||
|
on whatever information is available. After Dec 2023,
|
||||||
|
we should be able to drop the legacy support.
|
||||||
"""
|
"""
|
||||||
# Statistics
|
# The following can be removed after Dec 2023
|
||||||
if self.samples and r.samples:
|
# (by which time the legacy support should no longer
|
||||||
for sample in r.samples.samples:
|
# be necessary)
|
||||||
self.samples.add(sample)
|
if self.num_samples != len(self.samples):
|
||||||
sams = self.samples
|
# If we don't have samples, we can't rely on being
|
||||||
self.num_samples = sams.num_samples
|
# able to compute real statistics from those samples,
|
||||||
self.min, self.max, self.median, self.mean, self.sd = (
|
# so we make a best-effort attempt to estimate a joined
|
||||||
sams.min,
|
# statistic from whatever data we actually have.
|
||||||
sams.max,
|
|
||||||
sams.median,
|
# If both exist, take the minimum, else take whichever is set
|
||||||
sams.mean,
|
other_min_value = other.min_value
|
||||||
sams.sd,
|
if other_min_value is not None:
|
||||||
)
|
self_min_value = self.min_value
|
||||||
else:
|
if self_min_value is not None:
|
||||||
self.min = min(self.min, r.min)
|
self.json_data["min"] = min(other_min_value, self_min_value)
|
||||||
self.max = max(self.max, r.max)
|
else:
|
||||||
self.mean = ( # pooled mean is the weighted sum of means
|
self.json_data["min"] = other_min_value
|
||||||
(self.mean * self.num_samples) + (r.mean * r.num_samples)
|
|
||||||
) / float(self.num_samples + r.num_samples)
|
# If both exist, take the maximum, else take whichever is set
|
||||||
self.num_samples += r.num_samples
|
other_max_value = other.max_value
|
||||||
self.median, self.sd = None, None
|
if other_max_value is not None:
|
||||||
|
self_max_value = self.max_value
|
||||||
|
if self_max_value is not None:
|
||||||
|
self.json_data["max"] = max(other_max_value, self_max_value)
|
||||||
|
else:
|
||||||
|
self.json_data["max"] = other_max_value
|
||||||
|
|
||||||
|
# If both exist, take the weighted average, else take whichever is set
|
||||||
|
other_mean = other.mean
|
||||||
|
if other_mean is not None:
|
||||||
|
self_mean = self.mean
|
||||||
|
if self_mean is not None:
|
||||||
|
self.json_data["mean"] = (
|
||||||
|
(other_mean * other.num_samples
|
||||||
|
+ self_mean * self.num_samples)
|
||||||
|
/ (self.num_samples + other.num_samples)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self.json_data["mean"] = other_mean
|
||||||
|
self.json_data.pop("median", None) # Remove median
|
||||||
|
self.json_data.pop("sd", None) # Remove stdev
|
||||||
|
self.json_data.pop("q1", None) # Remove 25% quantile
|
||||||
|
self.json_data.pop("q3", None) # Remove 75% quantile
|
||||||
|
self.json_data.pop("quantiles", None) # Remove quantiles
|
||||||
|
|
||||||
|
# Accumulate samples (if present) and num_samples (always)
|
||||||
|
self.samples += other.samples
|
||||||
|
self.num_samples += other.num_samples
|
||||||
|
|
||||||
# Metadata
|
# Metadata
|
||||||
def minimum(a, b): # work around None being less than everything
|
# Use the smaller if both have a max_rss value
|
||||||
return min(filter(lambda x: x is not None, [a, b])) if any([a, b]) else None
|
self.json_data["max_rss"] = other.max_rss
|
||||||
|
other_max_rss = other.max_rss
|
||||||
self.max_rss = minimum(self.max_rss, r.max_rss)
|
if other_max_rss is not None:
|
||||||
self.setup = minimum(self.setup, r.setup)
|
self_max_rss = self.max_rss
|
||||||
|
if self_max_rss is not None:
|
||||||
|
self.json_data["max_rss"] = min(self_max_rss, other_max_rss)
|
||||||
|
else:
|
||||||
|
self.json_data["max_rss"] = other_max_rss
|
||||||
|
|
||||||
|
|
||||||
class ResultComparison(object):
|
class ResultComparison(object):
|
||||||
@@ -361,16 +451,37 @@ class ResultComparison(object):
|
|||||||
self.name = old.name # Test name, convenience accessor
|
self.name = old.name # Test name, convenience accessor
|
||||||
|
|
||||||
# Speedup ratio
|
# Speedup ratio
|
||||||
self.ratio = (old.min + 0.001) / (new.min + 0.001)
|
self.ratio = (old.min_value + 0.001) / (new.min_value + 0.001)
|
||||||
|
|
||||||
# Test runtime improvement in %
|
# Test runtime improvement in %
|
||||||
ratio = (new.min + 0.001) / (old.min + 0.001)
|
ratio = (new.min_value + 0.001) / (old.min_value + 0.001)
|
||||||
self.delta = (ratio - 1) * 100
|
self.delta = (ratio - 1) * 100
|
||||||
|
|
||||||
|
# If we have full samples for both old and new...
|
||||||
|
if (
|
||||||
|
len(old.samples) == old.num_samples
|
||||||
|
and len(new.samples) == new.num_samples
|
||||||
|
):
|
||||||
|
# TODO: Use a T-Test or U-Test to determine whether
|
||||||
|
# one set of samples should be considered reliably better than
|
||||||
|
# the other.
|
||||||
|
None
|
||||||
|
|
||||||
|
# If we do not have full samples, we'll use the
|
||||||
|
# legacy calculation for compatibility.
|
||||||
|
# TODO: After Dec 2023, we should always be using full samples
|
||||||
|
# everywhere and can delete the following entirely.
|
||||||
|
#
|
||||||
# Indication of dubious changes: when result's MIN falls inside the
|
# Indication of dubious changes: when result's MIN falls inside the
|
||||||
# (MIN, MAX) interval of result they are being compared with.
|
# (MIN, MAX) interval of result they are being compared with.
|
||||||
self.is_dubious = (old.min < new.min and new.min < old.max) or (
|
self.is_dubious = (
|
||||||
new.min < old.min and old.min < new.max
|
(
|
||||||
|
old.min_value < new.min_value
|
||||||
|
and new.min_value < old.max_value
|
||||||
|
) or (
|
||||||
|
new.min_value < old.min_value
|
||||||
|
and old.min_value < new.max_value
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -385,117 +496,49 @@ class LogParser(object):
|
|||||||
def __init__(self):
|
def __init__(self):
|
||||||
"""Create instance of `LogParser`."""
|
"""Create instance of `LogParser`."""
|
||||||
self.results = []
|
self.results = []
|
||||||
self.quantiles, self.delta, self.memory = False, False, False
|
|
||||||
self.meta = False
|
|
||||||
self._reset()
|
|
||||||
|
|
||||||
def _reset(self):
|
|
||||||
"""Reset parser to the default state for reading a new result."""
|
|
||||||
self.samples, self.yields, self.num_iters = [], [], 1
|
|
||||||
self.setup, self.max_rss, self.mem_pages = None, None, None
|
|
||||||
self.voluntary_cs, self.involuntary_cs = None, None
|
|
||||||
|
|
||||||
# Parse lines like this
|
|
||||||
# #,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs)
|
|
||||||
results_re = re.compile(
|
|
||||||
r"( *\d+[, \t]+[\w.\-\?!]+[, \t]+"
|
|
||||||
+ r"[, \t]+".join([r"\d+"] * 2) # #,TEST
|
|
||||||
+ r"(?:[, \t]+\d*)*)" # at least 2...
|
|
||||||
) # ...or more numeric columns
|
|
||||||
|
|
||||||
def _append_result(self, result):
|
|
||||||
columns = result.split(",") if "," in result else result.split()
|
|
||||||
r = PerformanceTestResult(
|
|
||||||
columns,
|
|
||||||
quantiles=self.quantiles,
|
|
||||||
memory=self.memory,
|
|
||||||
delta=self.delta,
|
|
||||||
meta=self.meta,
|
|
||||||
)
|
|
||||||
r.setup = self.setup
|
|
||||||
r.max_rss = r.max_rss or self.max_rss
|
|
||||||
r.mem_pages = r.mem_pages or self.mem_pages
|
|
||||||
r.voluntary_cs = self.voluntary_cs
|
|
||||||
r.involuntary_cs = r.involuntary_cs or self.involuntary_cs
|
|
||||||
if self.samples:
|
|
||||||
r.samples = PerformanceTestSamples(r.name, self.samples)
|
|
||||||
r.samples.exclude_outliers()
|
|
||||||
self.results.append(r)
|
|
||||||
r.yields = self.yields or None
|
|
||||||
self._reset()
|
|
||||||
|
|
||||||
def _store_memory_stats(self, max_rss, mem_pages):
|
|
||||||
self.max_rss = int(max_rss)
|
|
||||||
self.mem_pages = int(mem_pages)
|
|
||||||
|
|
||||||
def _configure_format(self, header):
|
|
||||||
self.quantiles = "QMIN" in header
|
|
||||||
self.memory = "MAX_RSS" in header
|
|
||||||
self.meta = "PAGES" in header
|
|
||||||
self.delta = "𝚫" in header
|
|
||||||
|
|
||||||
# Regular expression and action to take when it matches the parsed line
|
|
||||||
state_actions = {
|
|
||||||
results_re: _append_result,
|
|
||||||
# Verbose mode adds new productions:
|
|
||||||
# Adaptively determined N; test loop multiple adjusting runtime to ~1s
|
|
||||||
re.compile(r"\s+Measuring with scale (\d+)."): (
|
|
||||||
lambda self, num_iters: setattr(self, "num_iters", num_iters)
|
|
||||||
),
|
|
||||||
re.compile(r"\s+Sample (\d+),(\d+)"): (
|
|
||||||
lambda self, i, runtime: self.samples.append(
|
|
||||||
Sample(int(i), int(self.num_iters), int(runtime))
|
|
||||||
)
|
|
||||||
),
|
|
||||||
re.compile(r"\s+SetUp (\d+)"): (
|
|
||||||
lambda self, setup: setattr(self, "setup", int(setup))
|
|
||||||
),
|
|
||||||
re.compile(r"\s+Yielding after ~(\d+) μs"): (
|
|
||||||
lambda self, since_last_yield: self.yields.append(
|
|
||||||
Yield(len(self.samples), int(since_last_yield))
|
|
||||||
)
|
|
||||||
),
|
|
||||||
re.compile(r"( *#[, \t]+TEST[, \t]+SAMPLES[, \t].*)"): _configure_format,
|
|
||||||
# Environmental statistics: memory usage and context switches
|
|
||||||
re.compile(
|
|
||||||
r"\s+MAX_RSS \d+ - \d+ = (\d+) \((\d+) pages\)"
|
|
||||||
): _store_memory_stats,
|
|
||||||
re.compile(r"\s+VCS \d+ - \d+ = (\d+)"): (
|
|
||||||
lambda self, vcs: setattr(self, "voluntary_cs", int(vcs))
|
|
||||||
),
|
|
||||||
re.compile(r"\s+ICS \d+ - \d+ = (\d+)"): (
|
|
||||||
lambda self, ics: setattr(self, "involuntary_cs", int(ics))
|
|
||||||
),
|
|
||||||
}
|
|
||||||
|
|
||||||
def parse_results(self, lines):
|
def parse_results(self, lines):
|
||||||
"""Parse results from the lines of the log output from Benchmark*.
|
"""Parse results from the lines of the log output from Benchmark*.
|
||||||
|
|
||||||
Returns a list of `PerformanceTestResult`s.
|
Returns a list of `PerformanceTestResult`s.
|
||||||
"""
|
"""
|
||||||
|
match_json = re.compile(r"\s*({.*)")
|
||||||
|
match_header = re.compile(r"( *#[, \t]+TEST.*)")
|
||||||
|
match_legacy = re.compile(r" *(\d+[, \t].*)")
|
||||||
|
header = ""
|
||||||
for line in lines:
|
for line in lines:
|
||||||
for regexp, action in LogParser.state_actions.items():
|
# Current format has a JSON-encoded object on each line
|
||||||
match = regexp.match(line)
|
# That format is flexible so should be the only format
|
||||||
if match:
|
# used going forward
|
||||||
action(self, *match.groups())
|
if match_json.match(line):
|
||||||
break # stop after 1st match
|
r = PerformanceTestResult.fromJSONFormat(line)
|
||||||
else: # If none matches, skip the line.
|
self.results.append(r)
|
||||||
# print('skipping: ' + line.rstrip('\n'))
|
elif match_header.match(line):
|
||||||
|
# Legacy formats use a header line (which can be
|
||||||
|
# inspected to determine the presence and order of columns)
|
||||||
|
header = line
|
||||||
|
elif match_legacy.match(line):
|
||||||
|
# Legacy format: lines of space- or tab-separated values
|
||||||
|
if "QMIN" in header:
|
||||||
|
r = PerformanceTestResult.fromQuantileFormat(header, line)
|
||||||
|
else:
|
||||||
|
r = PerformanceTestResult.fromOldFormat(header, line)
|
||||||
|
self.results.append(r)
|
||||||
|
else:
|
||||||
|
# Ignore unrecognized lines
|
||||||
|
# print('Skipping: ' + line.rstrip('\n'), file=sys.stderr, flush=True)
|
||||||
continue
|
continue
|
||||||
return self.results
|
return self.results
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _results_from_lines(lines):
|
def _results_from_lines(lines):
|
||||||
tests = LogParser().parse_results(lines)
|
names = dict()
|
||||||
|
for r in LogParser().parse_results(lines):
|
||||||
def add_or_merge(names, r):
|
|
||||||
if r.name not in names:
|
if r.name not in names:
|
||||||
names[r.name] = r
|
names[r.name] = r
|
||||||
else:
|
else:
|
||||||
names[r.name].merge(r)
|
names[r.name].merge(r)
|
||||||
return names
|
return names
|
||||||
|
|
||||||
return functools.reduce(add_or_merge, tests, dict())
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def results_from_string(log_contents):
|
def results_from_string(log_contents):
|
||||||
@@ -615,18 +658,18 @@ class ReportFormatter(object):
|
|||||||
return (
|
return (
|
||||||
(
|
(
|
||||||
result.name,
|
result.name,
|
||||||
str(result.min),
|
str(result.min_value) if result.min_value is not None else "-",
|
||||||
str(result.max),
|
str(result.max_value) if result.max_value is not None else "-",
|
||||||
str(int(result.mean)),
|
str(result.mean) if result.mean is not None else "-",
|
||||||
str(result.max_rss) if result.max_rss else "—",
|
str(result.max_rss) if result.max_rss is not None else "—",
|
||||||
)
|
)
|
||||||
if isinstance(result, PerformanceTestResult)
|
if isinstance(result, PerformanceTestResult)
|
||||||
else
|
else
|
||||||
# isinstance(result, ResultComparison)
|
# isinstance(result, ResultComparison)
|
||||||
(
|
(
|
||||||
result.name,
|
result.name,
|
||||||
str(result.old.min),
|
str(result.old.min_value) if result.old.min_value is not None else "-",
|
||||||
str(result.new.min),
|
str(result.new.min_value) if result.new.min_value is not None else "-",
|
||||||
"{0:+.1f}%".format(result.delta),
|
"{0:+.1f}%".format(result.delta),
|
||||||
"{0:.2f}x{1}".format(result.ratio, " (?)" if result.is_dubious else ""),
|
"{0:.2f}x{1}".format(result.ratio, " (?)" if result.is_dubious else ""),
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -28,7 +28,7 @@ import subprocess
|
|||||||
import sys
|
import sys
|
||||||
from imp import load_source
|
from imp import load_source
|
||||||
|
|
||||||
from compare_perf_tests import LogParser, TestComparator, create_report
|
from compare_perf_tests import PerformanceTestResult, TestComparator, create_report
|
||||||
|
|
||||||
# import Benchmark_Driver # doesn't work because it misses '.py' extension
|
# import Benchmark_Driver # doesn't work because it misses '.py' extension
|
||||||
Benchmark_Driver = load_source(
|
Benchmark_Driver = load_source(
|
||||||
@@ -204,12 +204,12 @@ def test_opt_levels(args):
|
|||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
def measure(driver, tests, i):
|
def measure(driver, tests, i, min_num_samples):
|
||||||
"""Log and measure samples of the tests with the given driver.
|
"""Log and measure samples of the tests with the given driver.
|
||||||
|
|
||||||
Collect increasing number of samples, depending on the iteration.
|
Collect increasing number of samples, depending on the iteration.
|
||||||
"""
|
"""
|
||||||
num_samples = min(i + 3, 10)
|
num_samples = min(i + min_num_samples, 4 * min_num_samples)
|
||||||
msg = " Iteration {0} for {1}: num samples = {2}, ".format(
|
msg = " Iteration {0} for {1}: num samples = {2}, ".format(
|
||||||
i, driver.args.tests, num_samples
|
i, driver.args.tests, num_samples
|
||||||
)
|
)
|
||||||
@@ -246,7 +246,7 @@ def test_performance(
|
|||||||
optimization=opt_level))
|
optimization=opt_level))
|
||||||
for dir in [old_dir, new_dir]
|
for dir in [old_dir, new_dir]
|
||||||
]
|
]
|
||||||
results = [measure(driver, driver.tests, i) for driver in [old, new]]
|
results = [measure(driver, driver.tests, i, num_samples) for driver in [old, new]]
|
||||||
tests = TestComparator(results[0], results[1], threshold)
|
tests = TestComparator(results[0], results[1], threshold)
|
||||||
changed = tests.decreased + tests.increased
|
changed = tests.decreased + tests.increased
|
||||||
|
|
||||||
@@ -254,11 +254,11 @@ def test_performance(
|
|||||||
i += 1
|
i += 1
|
||||||
if VERBOSE:
|
if VERBOSE:
|
||||||
log(" test again: " + str([test.name for test in changed]))
|
log(" test again: " + str([test.name for test in changed]))
|
||||||
results = [
|
old_measurement = measure(old, [test.name for test in changed], i, num_samples)
|
||||||
merge(the_results, measure(driver, [test.name for test in changed], i))
|
old_results = merge(results[0], old_measurement)
|
||||||
for the_results, driver in zip(results, [old, new])
|
new_measurement = measure(new, [test.name for test in changed], i, num_samples)
|
||||||
]
|
new_results = merge(results[1], new_measurement)
|
||||||
tests = TestComparator(results[0], results[1], threshold)
|
tests = TestComparator(old_results, new_results, threshold)
|
||||||
changed = tests.decreased + tests.increased
|
changed = tests.decreased + tests.increased
|
||||||
|
|
||||||
if len(old.tests) == len(changed):
|
if len(old.tests) == len(changed):
|
||||||
@@ -269,7 +269,7 @@ def test_performance(
|
|||||||
log("")
|
log("")
|
||||||
report_title = "Performance ({}): -{}".format(arch, opt_level)
|
report_title = "Performance ({}): -{}".format(arch, opt_level)
|
||||||
return report_results(
|
return report_results(
|
||||||
report_title, None, None, threshold * 1.4, output_file, *results
|
report_title, threshold * 1.4, output_file, old_results, new_results
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -283,8 +283,8 @@ def report_code_size(opt_level, old_dir, new_dir, architecture, platform, output
|
|||||||
)
|
)
|
||||||
|
|
||||||
idx = 1
|
idx = 1
|
||||||
old_lines = ""
|
old_results = {}
|
||||||
new_lines = ""
|
new_results = {}
|
||||||
for oldfile in files:
|
for oldfile in files:
|
||||||
new_dir = os.path.join(new_dir, '')
|
new_dir = os.path.join(new_dir, '')
|
||||||
newfile = oldfile.replace(old_dir, new_dir, 1)
|
newfile = oldfile.replace(old_dir, new_dir, 1)
|
||||||
@@ -292,17 +292,13 @@ def report_code_size(opt_level, old_dir, new_dir, architecture, platform, output
|
|||||||
oldsize = get_codesize(oldfile)
|
oldsize = get_codesize(oldfile)
|
||||||
newsize = get_codesize(newfile)
|
newsize = get_codesize(newfile)
|
||||||
bname = os.path.basename(oldfile)
|
bname = os.path.basename(oldfile)
|
||||||
|
old_json = {"number": idx, "name": bname, "samples": [oldsize]}
|
||||||
def result_line(value):
|
new_json = {"number": idx, "name": bname, "samples": [newsize]}
|
||||||
v = "," + str(value)
|
old_results[bname] = PerformanceTestResult(old_json)
|
||||||
return str(idx) + "," + bname + ",1" + (v * 3) + ",0" + v + "\n"
|
new_results[bname] = PerformanceTestResult(new_json)
|
||||||
|
|
||||||
old_lines += result_line(oldsize)
|
|
||||||
new_lines += result_line(newsize)
|
|
||||||
idx += 1
|
idx += 1
|
||||||
|
|
||||||
return report_results(
|
return report_results(
|
||||||
"Code size: -" + opt_level, old_lines, new_lines, 0.01, output_file
|
"Code size: -" + opt_level, 0.01, output_file, old_results, new_results
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -318,16 +314,11 @@ def get_codesize(filename):
|
|||||||
|
|
||||||
def report_results(
|
def report_results(
|
||||||
title,
|
title,
|
||||||
old_lines,
|
|
||||||
new_lines,
|
|
||||||
threshold,
|
threshold,
|
||||||
output_file,
|
output_file,
|
||||||
old_results=None,
|
old_results,
|
||||||
new_results=None,
|
new_results,
|
||||||
):
|
):
|
||||||
old_results = old_results or LogParser.results_from_string(old_lines)
|
|
||||||
new_results = new_results or LogParser.results_from_string(new_lines)
|
|
||||||
|
|
||||||
print("------- " + title + " -------")
|
print("------- " + title + " -------")
|
||||||
print(create_report(old_results, new_results, threshold, "git"))
|
print(create_report(old_results, new_results, threshold, "git"))
|
||||||
|
|
||||||
|
|||||||
@@ -208,7 +208,7 @@ class TestBenchmarkDriverInitialization(unittest.TestCase):
|
|||||||
self.args,
|
self.args,
|
||||||
tests=["ignored"],
|
tests=["ignored"],
|
||||||
_subprocess=self.subprocess_mock).test_harness,
|
_subprocess=self.subprocess_mock).test_harness,
|
||||||
"/benchmarks/Benchmark_O",
|
"/benchmarks/Benchmark_O-*",
|
||||||
)
|
)
|
||||||
self.args.tests = "/path"
|
self.args.tests = "/path"
|
||||||
self.args.optimization = "Suffix"
|
self.args.optimization = "Suffix"
|
||||||
@@ -217,28 +217,27 @@ class TestBenchmarkDriverInitialization(unittest.TestCase):
|
|||||||
self.args,
|
self.args,
|
||||||
tests=["ignored"],
|
tests=["ignored"],
|
||||||
_subprocess=self.subprocess_mock).test_harness,
|
_subprocess=self.subprocess_mock).test_harness,
|
||||||
"/path/Benchmark_Suffix",
|
"/path/Benchmark_Suffix-*",
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_gets_list_of_precommit_benchmarks(self):
|
def test_gets_list_of_precommit_benchmarks(self):
|
||||||
self.subprocess_mock.expect(
|
self.subprocess_mock.expect(
|
||||||
"/benchmarks/Benchmark_O --list --delim=\t".split(" "),
|
"/benchmarks/Benchmark_O-* --list".split(" "),
|
||||||
"#\tTest\t[Tags]\n1\tBenchmark1\t[t1, t2]\n2\tBenchmark2\t[t3]\n",
|
"""1 Benchmark1 ["t1" "t2"]\n"""
|
||||||
|
+ """2 Benchmark2 ["t3"]\n""",
|
||||||
)
|
)
|
||||||
driver = BenchmarkDriver(self.args, _subprocess=self.subprocess_mock)
|
driver = BenchmarkDriver(self.args, _subprocess=self.subprocess_mock)
|
||||||
self.subprocess_mock.assert_called_all_expected()
|
self.subprocess_mock.assert_called_all_expected()
|
||||||
self.assertEqual(driver.tests, ["Benchmark1", "Benchmark2"])
|
self.assertEqual(driver.tests, ["Benchmark1", "Benchmark2"])
|
||||||
self.assertEqual(driver.all_tests, ["Benchmark1", "Benchmark2"])
|
self.assertEqual(driver.all_tests, ["Benchmark1", "Benchmark2"])
|
||||||
self.assertEqual(driver.test_number["Benchmark1"], "1")
|
self.assertEqual(driver.test_number["Benchmark1"], 1)
|
||||||
self.assertEqual(driver.test_number["Benchmark2"], "2")
|
self.assertEqual(driver.test_number["Benchmark2"], 2)
|
||||||
|
|
||||||
list_all_tests = (
|
list_all_tests = (
|
||||||
"/benchmarks/Benchmark_O --list --delim=\t --skip-tags=".split(" "),
|
"/benchmarks/Benchmark_O-* --list --skip-tags=".split(" "),
|
||||||
"""# Test [Tags]
|
"""1 Benchmark1 ["t1","t2"]\n"""
|
||||||
1 Benchmark1 [t1, t2]
|
+ """2 Benchmark2 ["t3"]\n"""
|
||||||
2 Benchmark2 [t3]
|
+ """3 Benchmark3 ["t3","t4"]\n""",
|
||||||
3 Benchmark3 [t3, t4]
|
|
||||||
""",
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_gets_list_of_all_benchmarks_when_benchmarks_args_exist(self):
|
def test_gets_list_of_all_benchmarks_when_benchmarks_args_exist(self):
|
||||||
@@ -251,7 +250,7 @@ class TestBenchmarkDriverInitialization(unittest.TestCase):
|
|||||||
self.assertEqual(driver.all_tests, ["Benchmark1", "Benchmark2", "Benchmark3"])
|
self.assertEqual(driver.all_tests, ["Benchmark1", "Benchmark2", "Benchmark3"])
|
||||||
|
|
||||||
def test_filters_benchmarks_by_pattern(self):
|
def test_filters_benchmarks_by_pattern(self):
|
||||||
self.args.filters = "-f .+3".split()
|
self.args.filters = [".+3"]
|
||||||
self.subprocess_mock.expect(*self.list_all_tests)
|
self.subprocess_mock.expect(*self.list_all_tests)
|
||||||
driver = BenchmarkDriver(self.args, _subprocess=self.subprocess_mock)
|
driver = BenchmarkDriver(self.args, _subprocess=self.subprocess_mock)
|
||||||
self.subprocess_mock.assert_called_all_expected()
|
self.subprocess_mock.assert_called_all_expected()
|
||||||
@@ -310,7 +309,7 @@ class LogParserStub(object):
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def results_from_string(log_contents):
|
def results_from_string(log_contents):
|
||||||
LogParserStub.results_from_string_called = True
|
LogParserStub.results_from_string_called = True
|
||||||
r = PerformanceTestResult("3,b1,1,123,123,123,0,123".split(","))
|
r = PerformanceTestResult("""{"number":3,"name":"b1","samples":[123]}""")
|
||||||
return {"b1": r}
|
return {"b1": r}
|
||||||
|
|
||||||
|
|
||||||
@@ -320,8 +319,8 @@ class TestBenchmarkDriverRunningTests(unittest.TestCase):
|
|||||||
self.parser_stub = LogParserStub()
|
self.parser_stub = LogParserStub()
|
||||||
self.subprocess_mock = SubprocessMock()
|
self.subprocess_mock = SubprocessMock()
|
||||||
self.subprocess_mock.expect(
|
self.subprocess_mock.expect(
|
||||||
"/benchmarks/Benchmark_O --list --delim=\t".split(" "),
|
"/benchmarks/Benchmark_O-* --list".split(" "),
|
||||||
"#\tTest\t[Tags]\n1\tb1\t[tag]\n",
|
"""1 b1 ["tag"]""",
|
||||||
)
|
)
|
||||||
self.driver = BenchmarkDriver(
|
self.driver = BenchmarkDriver(
|
||||||
self.args, _subprocess=self.subprocess_mock, parser=self.parser_stub
|
self.args, _subprocess=self.subprocess_mock, parser=self.parser_stub
|
||||||
@@ -329,28 +328,30 @@ class TestBenchmarkDriverRunningTests(unittest.TestCase):
|
|||||||
|
|
||||||
def test_run_benchmark_with_multiple_samples(self):
|
def test_run_benchmark_with_multiple_samples(self):
|
||||||
self.driver.run("b1")
|
self.driver.run("b1")
|
||||||
self.subprocess_mock.assert_called_with(("/benchmarks/Benchmark_O", "b1"))
|
self.subprocess_mock.assert_called_with(
|
||||||
|
("/benchmarks/Benchmark_O-*", "b1")
|
||||||
|
)
|
||||||
self.driver.run("b2", num_samples=5)
|
self.driver.run("b2", num_samples=5)
|
||||||
self.subprocess_mock.assert_called_with(
|
self.subprocess_mock.assert_called_with(
|
||||||
("/benchmarks/Benchmark_O", "b2", "--num-samples=5")
|
("/benchmarks/Benchmark_O-*", "b2", "--num-samples=5")
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_run_benchmark_with_specified_number_of_iterations(self):
|
def test_run_benchmark_with_specified_number_of_iterations(self):
|
||||||
self.driver.run("b", num_iters=1)
|
self.driver.run("b", num_iters=1)
|
||||||
self.subprocess_mock.assert_called_with(
|
self.subprocess_mock.assert_called_with(
|
||||||
("/benchmarks/Benchmark_O", "b", "--num-iters=1")
|
("/benchmarks/Benchmark_O-*", "b", "--num-iters=1")
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_run_benchmark_for_specified_time(self):
|
def test_run_benchmark_for_specified_time(self):
|
||||||
self.driver.run("b", sample_time=0.5)
|
self.driver.run("b", sample_time=0.5)
|
||||||
self.subprocess_mock.assert_called_with(
|
self.subprocess_mock.assert_called_with(
|
||||||
("/benchmarks/Benchmark_O", "b", "--sample-time=0.5")
|
("/benchmarks/Benchmark_O-*", "b", "--sample-time=0.5")
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_run_benchmark_in_verbose_mode(self):
|
def test_run_benchmark_in_verbose_mode(self):
|
||||||
self.driver.run("b", verbose=True)
|
self.driver.run("b", verbose=True)
|
||||||
self.subprocess_mock.assert_called_with(
|
self.subprocess_mock.assert_called_with(
|
||||||
("/benchmarks/Benchmark_O", "b", "--verbose")
|
("/benchmarks/Benchmark_O-*", "b", "--verbose")
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_run_batch(self):
|
def test_run_batch(self):
|
||||||
@@ -361,7 +362,9 @@ class TestBenchmarkDriverRunningTests(unittest.TestCase):
|
|||||||
"""
|
"""
|
||||||
self.driver.tests = ["b1", "bx"]
|
self.driver.tests = ["b1", "bx"]
|
||||||
self.driver.run()
|
self.driver.run()
|
||||||
self.subprocess_mock.assert_called_with(("/benchmarks/Benchmark_O", "1", "bx"))
|
self.subprocess_mock.assert_called_with(
|
||||||
|
("/benchmarks/Benchmark_O-*", "1", "bx")
|
||||||
|
)
|
||||||
|
|
||||||
def test_parse_results_from_running_benchmarks(self):
|
def test_parse_results_from_running_benchmarks(self):
|
||||||
"""Parse measurements results using LogParser.
|
"""Parse measurements results using LogParser.
|
||||||
@@ -379,14 +382,7 @@ class TestBenchmarkDriverRunningTests(unittest.TestCase):
|
|||||||
def test_measure_memory(self):
|
def test_measure_memory(self):
|
||||||
self.driver.run("b", measure_memory=True)
|
self.driver.run("b", measure_memory=True)
|
||||||
self.subprocess_mock.assert_called_with(
|
self.subprocess_mock.assert_called_with(
|
||||||
("/benchmarks/Benchmark_O", "b", "--memory")
|
("/benchmarks/Benchmark_O-*", "b", "--memory")
|
||||||
)
|
|
||||||
|
|
||||||
def test_report_quantiles(self):
|
|
||||||
"""Use delta compression for quantile reports."""
|
|
||||||
self.driver.run("b", quantile=4)
|
|
||||||
self.subprocess_mock.assert_called_with(
|
|
||||||
("/benchmarks/Benchmark_O", "b", "--quantile=4", "--delta")
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_run_benchmark_independent_samples(self):
|
def test_run_benchmark_independent_samples(self):
|
||||||
@@ -396,12 +392,10 @@ class TestBenchmarkDriverRunningTests(unittest.TestCase):
|
|||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
self.subprocess_mock.calls.count(
|
self.subprocess_mock.calls.count(
|
||||||
(
|
(
|
||||||
"/benchmarks/Benchmark_O",
|
"/benchmarks/Benchmark_O-*",
|
||||||
"b1",
|
"b1",
|
||||||
"--num-iters=1",
|
"--num-iters=1",
|
||||||
"--memory",
|
"--memory",
|
||||||
"--quantile=20",
|
|
||||||
"--delta",
|
|
||||||
)
|
)
|
||||||
),
|
),
|
||||||
3,
|
3,
|
||||||
@@ -412,38 +406,36 @@ class TestBenchmarkDriverRunningTests(unittest.TestCase):
|
|||||||
def mock_run(test):
|
def mock_run(test):
|
||||||
self.assertEqual(test, "b1")
|
self.assertEqual(test, "b1")
|
||||||
return PerformanceTestResult(
|
return PerformanceTestResult(
|
||||||
"3,b1,5,101,1,1,1,1,888".split(","),
|
"""{"number":3,"""
|
||||||
quantiles=True,
|
+ """"name":"b1","""
|
||||||
delta=True,
|
+ """"samples":[101,102,103,104,105],"""
|
||||||
memory=True,
|
+ """"max_rss":888}"""
|
||||||
)
|
)
|
||||||
|
|
||||||
driver = BenchmarkDriver(tests=["b1"], args=Stub(output_dir=None))
|
driver = BenchmarkDriver(tests=["b1"], args=Stub(output_dir=None))
|
||||||
driver.run_independent_samples = mock_run # patching
|
driver.run_independent_samples = mock_run # patching
|
||||||
|
|
||||||
with captured_output() as (out, _):
|
with captured_output() as (out, _):
|
||||||
log = driver.run_and_log()
|
driver.run_and_log()
|
||||||
|
|
||||||
header = (
|
header = (
|
||||||
"#,TEST,SAMPLES,MIN(μs),Q1(μs),MEDIAN(μs),Q3(μs),MAX(μs)," + "MAX_RSS(B)\n"
|
"#,TEST,SAMPLES,MIN(μs),Q1(μs),MEDIAN(μs),Q3(μs),MAX(μs)," + "MAX_RSS(B)\n"
|
||||||
)
|
)
|
||||||
csv_log = "3,b1,5,101,102,103,104,105,888\n"
|
csv_log = "3,b1,5,101,101.5,103,104.5,105,888\n"
|
||||||
self.assertEqual(log, None)
|
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
out.getvalue(),
|
out.getvalue(),
|
||||||
header + csv_log + "\n" + "Total performance tests executed: 1\n",
|
header + csv_log + "\n" + "Total performance tests executed: 1\n",
|
||||||
)
|
)
|
||||||
|
|
||||||
with captured_output() as (out, _):
|
with captured_output() as (out, _):
|
||||||
log = driver.run_and_log(csv_console=False)
|
driver.run_and_log(csv_console=False)
|
||||||
|
|
||||||
self.assertEqual(log, header + csv_log)
|
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
out.getvalue(),
|
out.getvalue(),
|
||||||
" # TEST SAMPLES MIN(μs)"
|
" # TEST SAMPLES MIN(μs)"
|
||||||
+ " Q1(μs) MEDIAN(μs) Q3(μs) MAX(μs) MAX_RSS(B)\n"
|
+ " Q1(μs) MEDIAN(μs) Q3(μs) MAX(μs) MAX_RSS(B)\n"
|
||||||
+ " 3 b1 5 101"
|
+ " 3 b1 5 101"
|
||||||
+ " 102 103 104 105 888\n"
|
+ " 101.5 103 104.5 105 888\n"
|
||||||
+ "\n"
|
+ "\n"
|
||||||
+ "Total performance tests executed: 1\n",
|
+ "Total performance tests executed: 1\n",
|
||||||
)
|
)
|
||||||
@@ -459,7 +451,7 @@ class TestBenchmarkDriverRunningTests(unittest.TestCase):
|
|||||||
openmode = "r" # 'U' mode is deprecated in Python 3
|
openmode = "r" # 'U' mode is deprecated in Python 3
|
||||||
with open(log_file, openmode) as f:
|
with open(log_file, openmode) as f:
|
||||||
text = f.read()
|
text = f.read()
|
||||||
self.assertEqual(text, "formatted output")
|
self.assertEqual(text, "formatted output\n")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import tempfile # setUp
|
import tempfile # setUp
|
||||||
@@ -469,7 +461,7 @@ class TestBenchmarkDriverRunningTests(unittest.TestCase):
|
|||||||
driver = BenchmarkDriver(Stub(), tests=[""])
|
driver = BenchmarkDriver(Stub(), tests=[""])
|
||||||
|
|
||||||
self.assertFalse(os.path.exists(log_dir))
|
self.assertFalse(os.path.exists(log_dir))
|
||||||
content = "formatted output"
|
content = ["formatted output"]
|
||||||
log_file = os.path.join(log_dir, "1.log")
|
log_file = os.path.join(log_dir, "1.log")
|
||||||
with captured_output() as (out, _):
|
with captured_output() as (out, _):
|
||||||
driver.log_results(content, log_file=log_file)
|
driver.log_results(content, log_file=log_file)
|
||||||
@@ -512,7 +504,7 @@ class BenchmarkDriverMock(Mock):
|
|||||||
def record_and_respond(self, test, num_samples, num_iters, verbose, measure_memory):
|
def record_and_respond(self, test, num_samples, num_iters, verbose, measure_memory):
|
||||||
args = (test, num_samples, num_iters, verbose, measure_memory)
|
args = (test, num_samples, num_iters, verbose, measure_memory)
|
||||||
self.calls.append(args)
|
self.calls.append(args)
|
||||||
return self.respond.get(args, _PTR(min=700))
|
return self.respond.get(args, _PTR(min_value=700))
|
||||||
|
|
||||||
|
|
||||||
class TestLoggingReportFormatter(unittest.TestCase):
|
class TestLoggingReportFormatter(unittest.TestCase):
|
||||||
@@ -615,9 +607,9 @@ class TestMarkdownReportHandler(unittest.TestCase):
|
|||||||
self.assert_contains(["| `QuotedName`"])
|
self.assert_contains(["| `QuotedName`"])
|
||||||
|
|
||||||
|
|
||||||
def _PTR(min=700, mem_pages=1000, setup=None):
|
def _PTR(min_value=700, mem_pages=1000, setup=None):
|
||||||
"""Create PerformanceTestResult Stub."""
|
"""Create PerformanceTestResult Stub."""
|
||||||
return Stub(samples=Stub(min=min), mem_pages=mem_pages, setup=setup)
|
return Stub(min_value=min_value, mem_pages=mem_pages, setup=setup)
|
||||||
|
|
||||||
|
|
||||||
def _run(test, num_samples=None, num_iters=None, verbose=None, measure_memory=False):
|
def _run(test, num_samples=None, num_iters=None, verbose=None, measure_memory=False):
|
||||||
@@ -688,7 +680,7 @@ class TestBenchmarkDoctor(unittest.TestCase):
|
|||||||
# calibration run, returns a stand-in for PerformanceTestResult
|
# calibration run, returns a stand-in for PerformanceTestResult
|
||||||
(
|
(
|
||||||
_run("B1", num_samples=3, num_iters=1, verbose=True),
|
_run("B1", num_samples=3, num_iters=1, verbose=True),
|
||||||
_PTR(min=300),
|
_PTR(min_value=300),
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
+
|
+
|
||||||
@@ -704,7 +696,7 @@ class TestBenchmarkDoctor(unittest.TestCase):
|
|||||||
verbose=True,
|
verbose=True,
|
||||||
measure_memory=True,
|
measure_memory=True,
|
||||||
),
|
),
|
||||||
_PTR(min=300),
|
_PTR(min_value=300),
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
* 5
|
* 5
|
||||||
@@ -721,7 +713,7 @@ class TestBenchmarkDoctor(unittest.TestCase):
|
|||||||
verbose=True,
|
verbose=True,
|
||||||
measure_memory=True,
|
measure_memory=True,
|
||||||
),
|
),
|
||||||
_PTR(min=300),
|
_PTR(min_value=300),
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
* 5
|
* 5
|
||||||
@@ -849,8 +841,8 @@ class TestBenchmarkDoctor(unittest.TestCase):
|
|||||||
def measurements(name, runtime):
|
def measurements(name, runtime):
|
||||||
return {
|
return {
|
||||||
"name": name,
|
"name": name,
|
||||||
name + " O i1a": _PTR(min=runtime + 2),
|
name + " O i1a": _PTR(min_value=runtime + 2),
|
||||||
name + " O i2a": _PTR(min=runtime),
|
name + " O i2a": _PTR(min_value=runtime),
|
||||||
}
|
}
|
||||||
|
|
||||||
with captured_output() as (out, _):
|
with captured_output() as (out, _):
|
||||||
@@ -863,8 +855,8 @@ class TestBenchmarkDoctor(unittest.TestCase):
|
|||||||
doctor.analyze(
|
doctor.analyze(
|
||||||
{
|
{
|
||||||
"name": "OverheadTurtle",
|
"name": "OverheadTurtle",
|
||||||
"OverheadTurtle O i1a": _PTR(min=800000),
|
"OverheadTurtle O i1a": _PTR(min_value=800000),
|
||||||
"OverheadTurtle O i2a": _PTR(min=700000),
|
"OverheadTurtle O i2a": _PTR(min_value=700000),
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
output = out.getvalue()
|
output = out.getvalue()
|
||||||
@@ -920,30 +912,34 @@ class TestBenchmarkDoctor(unittest.TestCase):
|
|||||||
{
|
{
|
||||||
"name": "NoOverhead", # not 'significant' enough
|
"name": "NoOverhead", # not 'significant' enough
|
||||||
# Based on DropFirstArray a10/e10: overhead 3.7% (6 μs)
|
# Based on DropFirstArray a10/e10: overhead 3.7% (6 μs)
|
||||||
"NoOverhead O i1a": _PTR(min=162),
|
"NoOverhead O i1a": _PTR(min_value=162),
|
||||||
"NoOverhead O i2a": _PTR(min=159),
|
"NoOverhead O i2a": _PTR(min_value=159),
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
doctor.analyze(
|
doctor.analyze(
|
||||||
{
|
{
|
||||||
"name": "SO", # Setup Overhead
|
"name": "SO", # Setup Overhead
|
||||||
# Based on SuffixArrayLazy a10/e10: overhead 5.8% (4 μs)
|
# Based on SuffixArrayLazy a10/e10: overhead 5.8% (4 μs)
|
||||||
"SO O i1a": _PTR(min=69),
|
"SO O i1a": _PTR(min_value=69),
|
||||||
"SO O i1b": _PTR(min=70),
|
"SO O i1b": _PTR(min_value=70),
|
||||||
"SO O i2a": _PTR(min=67),
|
"SO O i2a": _PTR(min_value=67),
|
||||||
"SO O i2b": _PTR(min=68),
|
"SO O i2b": _PTR(min_value=68),
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
doctor.analyze(
|
doctor.analyze(
|
||||||
{"name": "Zero", "Zero O i1a": _PTR(min=0), "Zero O i2a": _PTR(min=0)}
|
{
|
||||||
|
"name": "Zero",
|
||||||
|
"Zero O i1a": _PTR(min_value=0),
|
||||||
|
"Zero O i2a": _PTR(min_value=0)
|
||||||
|
}
|
||||||
)
|
)
|
||||||
doctor.analyze(
|
doctor.analyze(
|
||||||
{
|
{
|
||||||
"name": "LOA", # Limit of Accuracy
|
"name": "LOA", # Limit of Accuracy
|
||||||
# Impossible to detect overhead:
|
# Impossible to detect overhead:
|
||||||
# Even 1μs change in 20μs runtime is 5%.
|
# Even 1μs change in 20μs runtime is 5%.
|
||||||
"LOA O i1a": _PTR(min=21),
|
"LOA O i1a": _PTR(min_value=21),
|
||||||
"LOA O i2a": _PTR(min=20),
|
"LOA O i2a": _PTR(min_value=20),
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
output = out.getvalue()
|
output = out.getvalue()
|
||||||
|
|||||||
@@ -13,6 +13,7 @@
|
|||||||
#
|
#
|
||||||
# ===---------------------------------------------------------------------===//
|
# ===---------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
import json
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
import sys
|
import sys
|
||||||
@@ -21,10 +22,8 @@ import unittest
|
|||||||
|
|
||||||
from compare_perf_tests import LogParser
|
from compare_perf_tests import LogParser
|
||||||
from compare_perf_tests import PerformanceTestResult
|
from compare_perf_tests import PerformanceTestResult
|
||||||
from compare_perf_tests import PerformanceTestSamples
|
|
||||||
from compare_perf_tests import ReportFormatter
|
from compare_perf_tests import ReportFormatter
|
||||||
from compare_perf_tests import ResultComparison
|
from compare_perf_tests import ResultComparison
|
||||||
from compare_perf_tests import Sample
|
|
||||||
from compare_perf_tests import TestComparator
|
from compare_perf_tests import TestComparator
|
||||||
from compare_perf_tests import main
|
from compare_perf_tests import main
|
||||||
from compare_perf_tests import parse_args
|
from compare_perf_tests import parse_args
|
||||||
@@ -32,227 +31,70 @@ from compare_perf_tests import parse_args
|
|||||||
from test_utils import captured_output
|
from test_utils import captured_output
|
||||||
|
|
||||||
|
|
||||||
class TestSample(unittest.TestCase):
|
|
||||||
def test_has_named_fields(self):
|
|
||||||
s = Sample(1, 2, 3)
|
|
||||||
self.assertEqual(s.i, 1)
|
|
||||||
self.assertEqual(s.num_iters, 2)
|
|
||||||
self.assertEqual(s.runtime, 3)
|
|
||||||
|
|
||||||
def test_is_iterable(self):
|
|
||||||
s = Sample(1, 2, 3)
|
|
||||||
self.assertEqual(s[0], 1)
|
|
||||||
self.assertEqual(s[1], 2)
|
|
||||||
self.assertEqual(s[2], 3)
|
|
||||||
|
|
||||||
|
|
||||||
class TestPerformanceTestSamples(unittest.TestCase):
|
|
||||||
def setUp(self):
|
|
||||||
self.samples = PerformanceTestSamples("B1")
|
|
||||||
self.samples.add(Sample(7, 42, 1000))
|
|
||||||
|
|
||||||
def test_has_name(self):
|
|
||||||
self.assertEqual(self.samples.name, "B1")
|
|
||||||
|
|
||||||
def test_stores_samples(self):
|
|
||||||
self.assertEqual(self.samples.count, 1)
|
|
||||||
s = self.samples.samples[0]
|
|
||||||
self.assertTrue(isinstance(s, Sample))
|
|
||||||
self.assertEqual(s.i, 7)
|
|
||||||
self.assertEqual(s.num_iters, 42)
|
|
||||||
self.assertEqual(s.runtime, 1000)
|
|
||||||
|
|
||||||
def test_quantile(self):
|
|
||||||
self.assertEqual(self.samples.quantile(1), 1000)
|
|
||||||
self.assertEqual(self.samples.quantile(0), 1000)
|
|
||||||
self.samples.add(Sample(2, 1, 1100))
|
|
||||||
self.assertEqual(self.samples.quantile(0), 1000)
|
|
||||||
self.assertEqual(self.samples.quantile(1), 1100)
|
|
||||||
self.samples.add(Sample(3, 1, 1050))
|
|
||||||
self.assertEqual(self.samples.quantile(0), 1000)
|
|
||||||
self.assertEqual(self.samples.quantile(0.5), 1050)
|
|
||||||
self.assertEqual(self.samples.quantile(1), 1100)
|
|
||||||
|
|
||||||
def assertEqualFiveNumberSummary(self, ss, expected_fns):
|
|
||||||
e_min, e_q1, e_median, e_q3, e_max = expected_fns
|
|
||||||
self.assertEqual(ss.min, e_min)
|
|
||||||
self.assertEqual(ss.q1, e_q1)
|
|
||||||
self.assertEqual(ss.median, e_median)
|
|
||||||
self.assertEqual(ss.q3, e_q3)
|
|
||||||
self.assertEqual(ss.max, e_max)
|
|
||||||
|
|
||||||
def test_computes_five_number_summary(self):
|
|
||||||
self.assertEqualFiveNumberSummary(self.samples, (1000, 1000, 1000, 1000, 1000))
|
|
||||||
self.samples.add(Sample(2, 1, 1100))
|
|
||||||
self.assertEqualFiveNumberSummary(self.samples, (1000, 1000, 1000, 1100, 1100))
|
|
||||||
self.samples.add(Sample(3, 1, 1050))
|
|
||||||
self.assertEqualFiveNumberSummary(self.samples, (1000, 1000, 1050, 1100, 1100))
|
|
||||||
self.samples.add(Sample(4, 1, 1025))
|
|
||||||
self.assertEqualFiveNumberSummary(self.samples, (1000, 1000, 1025, 1050, 1100))
|
|
||||||
self.samples.add(Sample(5, 1, 1075))
|
|
||||||
self.assertEqualFiveNumberSummary(self.samples, (1000, 1025, 1050, 1075, 1100))
|
|
||||||
|
|
||||||
def test_computes_inter_quartile_range(self):
|
|
||||||
self.assertEqual(self.samples.iqr, 0)
|
|
||||||
self.samples.add(Sample(2, 1, 1025))
|
|
||||||
self.samples.add(Sample(3, 1, 1050))
|
|
||||||
self.samples.add(Sample(4, 1, 1075))
|
|
||||||
self.samples.add(Sample(5, 1, 1100))
|
|
||||||
self.assertEqual(self.samples.iqr, 50)
|
|
||||||
|
|
||||||
def assertEqualStats(self, stats, expected_stats):
|
|
||||||
for actual, expected in zip(stats, expected_stats):
|
|
||||||
self.assertAlmostEqual(actual, expected, places=2)
|
|
||||||
|
|
||||||
def test_computes_mean_sd_cv(self):
|
|
||||||
ss = self.samples
|
|
||||||
self.assertEqualStats((ss.mean, ss.sd, ss.cv), (1000.0, 0.0, 0.0))
|
|
||||||
self.samples.add(Sample(2, 1, 1100))
|
|
||||||
self.assertEqualStats((ss.mean, ss.sd, ss.cv), (1050.0, 70.71, 6.7 / 100))
|
|
||||||
|
|
||||||
def test_computes_range_spread(self):
|
|
||||||
ss = self.samples
|
|
||||||
self.assertEqualStats((ss.range, ss.spread), (0, 0))
|
|
||||||
self.samples.add(Sample(2, 1, 1100))
|
|
||||||
self.assertEqualStats((ss.range, ss.spread), (100, 10.0 / 100))
|
|
||||||
|
|
||||||
def test_init_with_samples(self):
|
|
||||||
self.samples = PerformanceTestSamples(
|
|
||||||
"B2", [Sample(0, 1, 1000), Sample(1, 1, 1100)]
|
|
||||||
)
|
|
||||||
self.assertEqual(self.samples.count, 2)
|
|
||||||
self.assertEqualStats(
|
|
||||||
(
|
|
||||||
self.samples.mean,
|
|
||||||
self.samples.sd,
|
|
||||||
self.samples.range,
|
|
||||||
self.samples.spread,
|
|
||||||
),
|
|
||||||
(1050.0, 70.71, 100, 9.52 / 100),
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_can_handle_zero_runtime(self):
|
|
||||||
# guard against dividing by 0
|
|
||||||
self.samples = PerformanceTestSamples("Zero")
|
|
||||||
self.samples.add(Sample(0, 1, 0))
|
|
||||||
self.assertEqualStats(
|
|
||||||
(
|
|
||||||
self.samples.mean,
|
|
||||||
self.samples.sd,
|
|
||||||
self.samples.cv,
|
|
||||||
self.samples.range,
|
|
||||||
self.samples.spread,
|
|
||||||
),
|
|
||||||
(0, 0, 0.0, 0, 0.0),
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_excludes_outliers(self):
|
|
||||||
ss = [
|
|
||||||
Sample(*map(int, s.split()))
|
|
||||||
for s in "0 1 1000, 1 1 1025, 2 1 1050, 3 1 1075, 4 1 1100, "
|
|
||||||
"5 1 1000, 6 1 1025, 7 1 1050, 8 1 1075, 9 1 1100, "
|
|
||||||
"10 1 1050, 11 1 949, 12 1 1151".split(",")
|
|
||||||
]
|
|
||||||
self.samples = PerformanceTestSamples("Outliers", ss)
|
|
||||||
self.assertEqual(self.samples.count, 13)
|
|
||||||
self.assertEqualStats((self.samples.mean, self.samples.sd), (1050, 52.36))
|
|
||||||
|
|
||||||
self.samples.exclude_outliers()
|
|
||||||
|
|
||||||
self.assertEqual(self.samples.count, 11)
|
|
||||||
self.assertEqual(self.samples.outliers, ss[11:])
|
|
||||||
self.assertEqualFiveNumberSummary(self.samples, (1000, 1025, 1050, 1075, 1100))
|
|
||||||
self.assertEqualStats((self.samples.mean, self.samples.sd), (1050, 35.36))
|
|
||||||
|
|
||||||
def test_excludes_outliers_zero_IQR(self):
|
|
||||||
self.samples = PerformanceTestSamples("Tight")
|
|
||||||
self.samples.add(Sample(0, 2, 23))
|
|
||||||
self.samples.add(Sample(1, 2, 18))
|
|
||||||
self.samples.add(Sample(2, 2, 18))
|
|
||||||
self.samples.add(Sample(3, 2, 18))
|
|
||||||
self.assertEqual(self.samples.iqr, 0)
|
|
||||||
|
|
||||||
self.samples.exclude_outliers()
|
|
||||||
|
|
||||||
self.assertEqual(self.samples.count, 3)
|
|
||||||
self.assertEqualStats((self.samples.min, self.samples.max), (18, 18))
|
|
||||||
|
|
||||||
def test_excludes_outliers_top_only(self):
|
|
||||||
ss = [
|
|
||||||
Sample(*map(int, s.split()))
|
|
||||||
for s in "0 1 1, 1 1 2, 2 1 2, 3 1 2, 4 1 3".split(",")
|
|
||||||
]
|
|
||||||
self.samples = PerformanceTestSamples("Top", ss)
|
|
||||||
self.assertEqualFiveNumberSummary(self.samples, (1, 2, 2, 2, 3))
|
|
||||||
self.assertEqual(self.samples.iqr, 0)
|
|
||||||
|
|
||||||
self.samples.exclude_outliers(top_only=True)
|
|
||||||
|
|
||||||
self.assertEqual(self.samples.count, 4)
|
|
||||||
self.assertEqualStats((self.samples.min, self.samples.max), (1, 2))
|
|
||||||
|
|
||||||
|
|
||||||
class TestPerformanceTestResult(unittest.TestCase):
|
class TestPerformanceTestResult(unittest.TestCase):
|
||||||
def test_init(self):
|
def test_init(self):
|
||||||
|
header = "#,TEST,SAMPLES,MIN,MAX,MEAN,SD,MEDIAN"
|
||||||
log_line = "1,AngryPhonebook,20,10664,12933,11035,576,10884"
|
log_line = "1,AngryPhonebook,20,10664,12933,11035,576,10884"
|
||||||
r = PerformanceTestResult(log_line.split(","))
|
r = PerformanceTestResult.fromOldFormat(header, log_line)
|
||||||
self.assertEqual(r.test_num, "1")
|
self.assertEqual(r.test_num, 1)
|
||||||
self.assertEqual(r.name, "AngryPhonebook")
|
self.assertEqual(r.name, "AngryPhonebook")
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
(r.num_samples, r.min, r.max, r.mean, r.sd, r.median),
|
(r.num_samples, r.min_value, r.max_value, r.mean, r.sd, r.median),
|
||||||
(20, 10664, 12933, 11035, 576, 10884),
|
(20, 10664, 12933, 11035, 576, 10884),
|
||||||
)
|
)
|
||||||
self.assertEqual(r.samples, None)
|
self.assertEqual(r.samples, [])
|
||||||
|
|
||||||
|
header = "#,TEST,SAMPLES,MIN,MAX,MEAN,SD,MEDIAN,MAX_RSS"
|
||||||
log_line = "1,AngryPhonebook,1,12045,12045,12045,0,12045,10510336"
|
log_line = "1,AngryPhonebook,1,12045,12045,12045,0,12045,10510336"
|
||||||
r = PerformanceTestResult(log_line.split(","), memory=True)
|
r = PerformanceTestResult.fromOldFormat(header, log_line)
|
||||||
self.assertEqual(r.max_rss, 10510336)
|
self.assertEqual(r.max_rss, 10510336)
|
||||||
|
|
||||||
def test_init_quantiles(self):
|
def test_init_quantiles(self):
|
||||||
# #,TEST,SAMPLES,MIN(μs),MEDIAN(μs),MAX(μs)
|
header = "#,TEST,SAMPLES,MIN(μs),MEDIAN(μs),MAX(μs)"
|
||||||
log = "1,Ackermann,3,54383,54512,54601"
|
log = "1,Ackermann,3,54383,54512,54601"
|
||||||
r = PerformanceTestResult(log.split(","), quantiles=True)
|
r = PerformanceTestResult.fromQuantileFormat(header, log)
|
||||||
self.assertEqual(r.test_num, "1")
|
self.assertEqual(r.test_num, 1)
|
||||||
self.assertEqual(r.name, "Ackermann")
|
self.assertEqual(r.name, "Ackermann")
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
(r.num_samples, r.min, r.median, r.max), (3, 54383, 54512, 54601)
|
(r.num_samples, r.min_value, r.median, r.max_value),
|
||||||
|
(3, 54383, 54512, 54601)
|
||||||
)
|
)
|
||||||
self.assertAlmostEqual(r.mean, 54498.67, places=2)
|
self.assertAlmostEqual(r.mean, 54498.67, places=2)
|
||||||
self.assertAlmostEqual(r.sd, 109.61, places=2)
|
self.assertAlmostEqual(r.sd, 109.61, places=2)
|
||||||
self.assertEqual(r.samples.count, 3)
|
self.assertEqual(r.samples, [54383, 54512, 54601])
|
||||||
self.assertEqual(r.samples.num_samples, 3)
|
|
||||||
self.assertEqual(
|
|
||||||
[s.runtime for s in r.samples.all_samples], [54383, 54512, 54601]
|
|
||||||
)
|
|
||||||
|
|
||||||
# #,TEST,SAMPLES,MIN(μs),MEDIAN(μs),MAX(μs),MAX_RSS(B)
|
header = "#,TEST,SAMPLES,MIN(μs),MEDIAN(μs),MAX(μs),MAX_RSS(B)"
|
||||||
log = "1,Ackermann,3,54529,54760,55807,266240"
|
log = "1,Ackermann,3,54529,54760,55807,266240"
|
||||||
r = PerformanceTestResult(log.split(","), quantiles=True, memory=True)
|
r = PerformanceTestResult.fromQuantileFormat(header, log)
|
||||||
self.assertEqual((r.samples.count, r.max_rss), (3, 266240))
|
self.assertEqual((len(r.samples), r.max_rss), (3, 266240))
|
||||||
# #,TEST,SAMPLES,MIN(μs),Q1(μs),Q2(μs),Q3(μs),MAX(μs)
|
|
||||||
|
header = "#,TEST,SAMPLES,MIN(μs),Q1(μs),Q2(μs),Q3(μs),MAX(μs)"
|
||||||
log = "1,Ackermann,5,54570,54593,54644,57212,58304"
|
log = "1,Ackermann,5,54570,54593,54644,57212,58304"
|
||||||
r = PerformanceTestResult(log.split(","), quantiles=True, memory=False)
|
r = PerformanceTestResult.fromQuantileFormat(header, log)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
(r.num_samples, r.min, r.median, r.max), (5, 54570, 54644, 58304)
|
(r.num_samples, r.min_value, r.median, r.max_value),
|
||||||
|
(5, 54570, 54644, 58304)
|
||||||
)
|
)
|
||||||
self.assertEqual((r.samples.q1, r.samples.q3), (54593, 57212))
|
self.assertEqual((r.q1, r.q3), (54581.5, 57758))
|
||||||
self.assertEqual(r.samples.count, 5)
|
self.assertEqual(len(r.samples), 5)
|
||||||
# #,TEST,SAMPLES,MIN(μs),Q1(μs),Q2(μs),Q3(μs),MAX(μs),MAX_RSS(B)
|
|
||||||
|
header = "#,TEST,SAMPLES,MIN(μs),Q1(μs),Q2(μs),Q3(μs),MAX(μs),MAX_RSS(B)"
|
||||||
log = "1,Ackermann,5,54686,54731,54774,55030,63466,270336"
|
log = "1,Ackermann,5,54686,54731,54774,55030,63466,270336"
|
||||||
r = PerformanceTestResult(log.split(","), quantiles=True, memory=True)
|
r = PerformanceTestResult.fromQuantileFormat(header, log)
|
||||||
self.assertEqual(r.samples.num_samples, 5)
|
self.assertEqual(r.num_samples, 5)
|
||||||
self.assertEqual(r.samples.count, 4) # outlier was excluded
|
self.assertEqual(len(r.samples), 5)
|
||||||
self.assertEqual(r.max_rss, 270336)
|
self.assertEqual(r.max_rss, 270336)
|
||||||
|
|
||||||
def test_init_delta_quantiles(self):
|
def test_init_delta_quantiles(self):
|
||||||
# #,TEST,SAMPLES,MIN(μs),𝚫MEDIAN,𝚫MAX
|
|
||||||
# 2-quantile from 2 samples in repeated min, when delta encoded,
|
# 2-quantile from 2 samples in repeated min, when delta encoded,
|
||||||
# the difference is 0, which is omitted -- only separator remains
|
# the difference is 0, which is omitted -- only separator remains
|
||||||
|
header = "#,TEST,SAMPLES,MIN(μs),𝚫MEDIAN,𝚫MAX"
|
||||||
log = "202,DropWhileArray,2,265,,22"
|
log = "202,DropWhileArray,2,265,,22"
|
||||||
r = PerformanceTestResult(log.split(","), quantiles=True, delta=True)
|
r = PerformanceTestResult.fromQuantileFormat(header, log)
|
||||||
self.assertEqual((r.num_samples, r.min, r.median, r.max), (2, 265, 265, 287))
|
self.assertEqual((r.num_samples, r.min_value, r.median, r.max_value),
|
||||||
self.assertEqual(r.samples.count, 2)
|
(2, 265, 276, 287))
|
||||||
self.assertEqual(r.samples.num_samples, 2)
|
self.assertEqual(len(r.samples), 2)
|
||||||
|
self.assertEqual(r.num_samples, 2)
|
||||||
|
|
||||||
def test_init_oversampled_quantiles(self):
|
def test_init_oversampled_quantiles(self):
|
||||||
"""When num_samples is < quantile + 1, some of the measurements are
|
"""When num_samples is < quantile + 1, some of the measurements are
|
||||||
@@ -265,6 +107,16 @@ class TestPerformanceTestResult(unittest.TestCase):
|
|||||||
tbl <- function(s) t(sapply(1:s, function(x) {
|
tbl <- function(s) t(sapply(1:s, function(x) {
|
||||||
qs <- subsample(x, s); c(qs[1], diff(qs)) }))
|
qs <- subsample(x, s); c(qs[1], diff(qs)) }))
|
||||||
sapply(c(3, 5, 11, 21), tbl)
|
sapply(c(3, 5, 11, 21), tbl)
|
||||||
|
|
||||||
|
TODO: Delete this test when we delete quantile support from the
|
||||||
|
benchmark harness. Reconstructing samples from quantiles as this code is
|
||||||
|
trying to do is not really statistically sound, which is why we're going
|
||||||
|
to delete most of this in favor of an architecture where the
|
||||||
|
lowest-level benchmarking logic reports samples, we store and pass
|
||||||
|
raw sample data around as much as possible, and summary statistics are
|
||||||
|
only computed as necessary for actual reporting (and then discarded,
|
||||||
|
since we can recompute anything we need if we always have the raw
|
||||||
|
samples available).
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def validatePTR(deq): # construct from delta encoded quantiles string
|
def validatePTR(deq): # construct from delta encoded quantiles string
|
||||||
@@ -273,10 +125,8 @@ class TestPerformanceTestResult(unittest.TestCase):
|
|||||||
r = PerformanceTestResult(
|
r = PerformanceTestResult(
|
||||||
["0", "B", str(num_samples)] + deq, quantiles=True, delta=True
|
["0", "B", str(num_samples)] + deq, quantiles=True, delta=True
|
||||||
)
|
)
|
||||||
self.assertEqual(r.samples.num_samples, num_samples)
|
self.assertEqual(len(r.samples), num_samples)
|
||||||
self.assertEqual(
|
self.assertEqual(r.samples, range(1, num_samples + 1))
|
||||||
[s.runtime for s in r.samples.all_samples], range(1, num_samples + 1)
|
|
||||||
)
|
|
||||||
|
|
||||||
delta_encoded_quantiles = """
|
delta_encoded_quantiles = """
|
||||||
1,,
|
1,,
|
||||||
@@ -318,119 +168,152 @@ class TestPerformanceTestResult(unittest.TestCase):
|
|||||||
map(validatePTR, delta_encoded_quantiles.split("\n")[1:])
|
map(validatePTR, delta_encoded_quantiles.split("\n")[1:])
|
||||||
|
|
||||||
def test_init_meta(self):
|
def test_init_meta(self):
|
||||||
# #,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs),…
|
header = (
|
||||||
# …PAGES,ICS,YIELD
|
"#,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),"
|
||||||
|
+ "MEDIAN(μs),PAGES,ICS,YIELD"
|
||||||
|
)
|
||||||
log = "1,Ackermann,200,715,1281,726,47,715,7,29,15"
|
log = "1,Ackermann,200,715,1281,726,47,715,7,29,15"
|
||||||
r = PerformanceTestResult(log.split(","), meta=True)
|
r = PerformanceTestResult.fromOldFormat(header, log)
|
||||||
self.assertEqual((r.test_num, r.name), ("1", "Ackermann"))
|
self.assertEqual((r.test_num, r.name), (1, "Ackermann"))
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
(r.num_samples, r.min, r.max, r.mean, r.sd, r.median),
|
(r.num_samples, r.min_value, r.max_value, r.mean, r.sd, r.median),
|
||||||
(200, 715, 1281, 726, 47, 715),
|
(200, 715, 1281, 726, 47, 715),
|
||||||
)
|
)
|
||||||
self.assertEqual((r.mem_pages, r.involuntary_cs, r.yield_count), (7, 29, 15))
|
self.assertEqual((r.mem_pages, r.involuntary_cs, r.yield_count), (7, 29, 15))
|
||||||
# #,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs),MAX_RSS(B),…
|
header = (
|
||||||
# …PAGES,ICS,YIELD
|
"#,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs),"
|
||||||
|
+ "MAX_RSS(B),PAGES,ICS,YIELD"
|
||||||
|
)
|
||||||
log = "1,Ackermann,200,715,1951,734,97,715,36864,9,50,15"
|
log = "1,Ackermann,200,715,1951,734,97,715,36864,9,50,15"
|
||||||
r = PerformanceTestResult(log.split(","), memory=True, meta=True)
|
r = PerformanceTestResult.fromOldFormat(header, log)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
(r.num_samples, r.min, r.max, r.mean, r.sd, r.median),
|
(r.num_samples, r.min_value, r.max_value, r.mean, r.sd, r.median),
|
||||||
(200, 715, 1951, 734, 97, 715),
|
(200, 715, 1951, 734, 97, 715),
|
||||||
)
|
)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
(r.mem_pages, r.involuntary_cs, r.yield_count, r.max_rss),
|
(r.mem_pages, r.involuntary_cs, r.yield_count, r.max_rss),
|
||||||
(9, 50, 15, 36864),
|
(9, 50, 15, 36864),
|
||||||
)
|
)
|
||||||
# #,TEST,SAMPLES,MIN(μs),MAX(μs),PAGES,ICS,YIELD
|
header = "#,TEST,SAMPLES,MIN(μs),MAX(μs),PAGES,ICS,YIELD"
|
||||||
log = "1,Ackermann,200,715,3548,8,31,15"
|
log = "1,Ackermann,200,715,3548,8,31,15"
|
||||||
r = PerformanceTestResult(log.split(","), quantiles=True, meta=True)
|
r = PerformanceTestResult.fromOldFormat(header, log)
|
||||||
self.assertEqual((r.num_samples, r.min, r.max), (200, 715, 3548))
|
self.assertEqual((r.num_samples, r.min_value, r.max_value), (200, 715, 3548))
|
||||||
self.assertEqual(
|
self.assertEqual(r.samples, [])
|
||||||
(r.samples.count, r.samples.min, r.samples.max), (2, 715, 3548)
|
|
||||||
)
|
|
||||||
self.assertEqual((r.mem_pages, r.involuntary_cs, r.yield_count), (8, 31, 15))
|
self.assertEqual((r.mem_pages, r.involuntary_cs, r.yield_count), (8, 31, 15))
|
||||||
# #,TEST,SAMPLES,MIN(μs),MAX(μs),MAX_RSS(B),PAGES,ICS,YIELD
|
|
||||||
|
header = "#,TEST,SAMPLES,MIN(μs),MAX(μs),MAX_RSS(B),PAGES,ICS,YIELD"
|
||||||
log = "1,Ackermann,200,715,1259,32768,8,28,15"
|
log = "1,Ackermann,200,715,1259,32768,8,28,15"
|
||||||
r = PerformanceTestResult(
|
r = PerformanceTestResult.fromOldFormat(header, log)
|
||||||
log.split(","), quantiles=True, memory=True, meta=True
|
self.assertEqual((r.num_samples, r.min_value, r.max_value), (200, 715, 1259))
|
||||||
)
|
self.assertEqual(r.samples, [])
|
||||||
self.assertEqual((r.num_samples, r.min, r.max), (200, 715, 1259))
|
|
||||||
self.assertEqual(
|
|
||||||
(r.samples.count, r.samples.min, r.samples.max), (2, 715, 1259)
|
|
||||||
)
|
|
||||||
self.assertEqual(r.max_rss, 32768)
|
self.assertEqual(r.max_rss, 32768)
|
||||||
self.assertEqual((r.mem_pages, r.involuntary_cs, r.yield_count), (8, 28, 15))
|
self.assertEqual((r.mem_pages, r.involuntary_cs, r.yield_count), (8, 28, 15))
|
||||||
|
|
||||||
def test_repr(self):
|
|
||||||
log_line = "1,AngryPhonebook,20,10664,12933,11035,576,10884"
|
|
||||||
r = PerformanceTestResult(log_line.split(","))
|
|
||||||
self.assertEqual(
|
|
||||||
str(r),
|
|
||||||
"<PerformanceTestResult name:'AngryPhonebook' samples:20 "
|
|
||||||
"min:10664 max:12933 mean:11035 sd:576 median:10884>",
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_merge(self):
|
def test_merge(self):
|
||||||
tests = """
|
tests = [
|
||||||
1,AngryPhonebook,1,12045,12045,12045,0,12045
|
"""{"number":1,"name":"AngryPhonebook",
|
||||||
1,AngryPhonebook,1,12325,12325,12325,0,12325,10510336
|
"samples":[12045]}""",
|
||||||
1,AngryPhonebook,1,11616,11616,11616,0,11616,10502144
|
"""{"number":1,"name":"AngryPhonebook",
|
||||||
1,AngryPhonebook,1,12270,12270,12270,0,12270,10498048""".split(
|
"samples":[12325],"max_rss":10510336}""",
|
||||||
"\n"
|
"""{"number":1,"name":"AngryPhonebook",
|
||||||
)[
|
"samples":[11616],"max_rss":10502144}""",
|
||||||
1:
|
"""{"number":1,"name":"AngryPhonebook",
|
||||||
|
"samples":[12270],"max_rss":10498048}"""
|
||||||
]
|
]
|
||||||
|
|
||||||
def makeResult(csv_row):
|
results = [PerformanceTestResult(json) for json in tests]
|
||||||
return PerformanceTestResult(csv_row, memory=True)
|
|
||||||
|
|
||||||
results = list(map(makeResult, [line.split(",") for line in tests]))
|
|
||||||
results[2].setup = 9
|
|
||||||
results[3].setup = 7
|
|
||||||
|
|
||||||
def as_tuple(r):
|
def as_tuple(r):
|
||||||
return (
|
return (
|
||||||
r.num_samples,
|
r.num_samples,
|
||||||
r.min,
|
r.min_value,
|
||||||
r.max,
|
r.max_value,
|
||||||
round(r.mean, 2),
|
round(r.mean, 2),
|
||||||
r.sd,
|
round(r.sd, 2),
|
||||||
r.median,
|
r.median,
|
||||||
r.max_rss,
|
r.max_rss,
|
||||||
r.setup,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
r = results[0]
|
r = results[0]
|
||||||
self.assertEqual(as_tuple(r), (1, 12045, 12045, 12045, 0, 12045, None, None))
|
self.assertEqual(as_tuple(r), (1, 12045, 12045, 12045, 0, 12045, None))
|
||||||
r.merge(results[1])
|
r.merge(results[1])
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
as_tuple(r), # drops SD and median, +max_rss
|
as_tuple(r),
|
||||||
(2, 12045, 12325, 12185, None, None, 10510336, None),
|
(2, 12045, 12325, 12185, 197.99, 12185, 10510336),
|
||||||
)
|
)
|
||||||
r.merge(results[2])
|
r.merge(results[2])
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
as_tuple(r), # picks smaller of the MAX_RSS, +setup
|
as_tuple(r),
|
||||||
(3, 11616, 12325, 11995.33, None, None, 10502144, 9),
|
(3, 11616, 12325, 11995.33, 357.1, 12045, 10502144),
|
||||||
)
|
)
|
||||||
r.merge(results[3])
|
r.merge(results[3])
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
as_tuple(r), # picks smaller of the setup values
|
as_tuple(r),
|
||||||
(4, 11616, 12325, 12064, None, None, 10498048, 7),
|
(4, 11616, 12325, 12064, 322.29, 12157.5, 10498048),
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_legacy_merge(self):
|
||||||
|
header = """#,TEST,NUM_SAMPLES,MIN,MAX,MEAN,SD,MEDIAN, MAX_RSS"""
|
||||||
|
tests = [
|
||||||
|
"""1,AngryPhonebook,8,12045,12045,12045,0,12045""",
|
||||||
|
"""1,AngryPhonebook,8,12325,12325,12325,0,12325,10510336""",
|
||||||
|
"""1,AngryPhonebook,8,11616,11616,11616,0,11616,10502144""",
|
||||||
|
"""1,AngryPhonebook,8,12270,12270,12270,0,12270,10498048"""
|
||||||
|
]
|
||||||
|
|
||||||
|
results = [PerformanceTestResult.fromOldFormat(header, row) for row in tests]
|
||||||
|
|
||||||
|
def as_tuple(r):
|
||||||
|
return (
|
||||||
|
r.num_samples,
|
||||||
|
r.min_value,
|
||||||
|
r.max_value,
|
||||||
|
round(r.mean, 2),
|
||||||
|
round(r.sd, 2) if r.sd is not None else None,
|
||||||
|
r.median,
|
||||||
|
r.max_rss,
|
||||||
|
)
|
||||||
|
|
||||||
|
r = results[0]
|
||||||
|
self.assertEqual(as_tuple(r), (8, 12045, 12045, 12045, 0, 12045, None))
|
||||||
|
r.merge(results[1])
|
||||||
|
self.assertEqual(
|
||||||
|
as_tuple(r), # Note: SD, Median are lost
|
||||||
|
(16, 12045, 12325, 12185, None, None, 10510336),
|
||||||
|
)
|
||||||
|
r.merge(results[2])
|
||||||
|
self.assertEqual(
|
||||||
|
as_tuple(r),
|
||||||
|
(24, 11616, 12325, 11995.33, None, None, 10502144),
|
||||||
|
)
|
||||||
|
r.merge(results[3])
|
||||||
|
self.assertEqual(
|
||||||
|
as_tuple(r),
|
||||||
|
(32, 11616, 12325, 12064, None, None, 10498048),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestResultComparison(unittest.TestCase):
|
class TestResultComparison(unittest.TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
self.r0 = PerformanceTestResult(
|
self.r0 = PerformanceTestResult(
|
||||||
"101,GlobalClass,20,0,0,0,0,0,10185728".split(",")
|
"""{"number":101,"name":"GlobalClass",
|
||||||
|
"samples":[0,0,0,0,0],"max_rss":10185728}"""
|
||||||
)
|
)
|
||||||
self.r01 = PerformanceTestResult(
|
self.r01 = PerformanceTestResult(
|
||||||
"101,GlobalClass,20,20,20,20,0,0,10185728".split(",")
|
"""{"number":101,"name":"GlobalClass",
|
||||||
|
"samples":[20,20,20],"max_rss":10185728}"""
|
||||||
)
|
)
|
||||||
self.r1 = PerformanceTestResult(
|
self.r1 = PerformanceTestResult(
|
||||||
"1,AngryPhonebook,1,12325,12325,12325,0,12325,10510336".split(",")
|
"""{"number":1,"name":"AngryPhonebook",
|
||||||
|
"samples":[12325],"max_rss":10510336}"""
|
||||||
)
|
)
|
||||||
self.r2 = PerformanceTestResult(
|
self.r2 = PerformanceTestResult(
|
||||||
"1,AngryPhonebook,1,11616,11616,11616,0,11616,10502144".split(",")
|
"""{"number":1,"name":"AngryPhonebook",
|
||||||
|
"samples":[11616],"max_rss":10502144}"""
|
||||||
|
)
|
||||||
|
self.r3 = PerformanceTestResult(
|
||||||
|
"""{"number":1,"name":"AngryPhonebook",
|
||||||
|
"samples":[11616,12326],"max_rss":10502144}"""
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_init(self):
|
def test_init(self):
|
||||||
@@ -455,11 +338,10 @@ class TestResultComparison(unittest.TestCase):
|
|||||||
|
|
||||||
def test_values_is_dubious(self):
|
def test_values_is_dubious(self):
|
||||||
self.assertFalse(ResultComparison(self.r1, self.r2).is_dubious)
|
self.assertFalse(ResultComparison(self.r1, self.r2).is_dubious)
|
||||||
self.r2.max = self.r1.min + 1
|
|
||||||
# new.min < old.min < new.max
|
# new.min < old.min < new.max
|
||||||
self.assertTrue(ResultComparison(self.r1, self.r2).is_dubious)
|
self.assertTrue(ResultComparison(self.r1, self.r3).is_dubious)
|
||||||
# other way around: old.min < new.min < old.max
|
# other way around: old.min < new.min < old.max
|
||||||
self.assertTrue(ResultComparison(self.r2, self.r1).is_dubious)
|
self.assertTrue(ResultComparison(self.r3, self.r1).is_dubious)
|
||||||
|
|
||||||
|
|
||||||
class FileSystemIntegration(unittest.TestCase):
|
class FileSystemIntegration(unittest.TestCase):
|
||||||
@@ -474,45 +356,48 @@ class FileSystemIntegration(unittest.TestCase):
|
|||||||
def write_temp_file(self, file_name, data):
|
def write_temp_file(self, file_name, data):
|
||||||
temp_file_name = os.path.join(self.test_dir, file_name)
|
temp_file_name = os.path.join(self.test_dir, file_name)
|
||||||
with open(temp_file_name, "w") as f:
|
with open(temp_file_name, "w") as f:
|
||||||
f.write(data)
|
for line in data:
|
||||||
|
f.write(line)
|
||||||
|
f.write('\n')
|
||||||
return temp_file_name
|
return temp_file_name
|
||||||
|
|
||||||
|
|
||||||
class OldAndNewLog(unittest.TestCase):
|
class OldAndNewLog(unittest.TestCase):
|
||||||
old_log_content = """1,AngryPhonebook,20,10458,12714,11000,0,11000,10204365
|
|
||||||
2,AnyHashableWithAClass,20,247027,319065,259056,0,259056,10250445
|
|
||||||
3,Array2D,20,335831,400221,346622,0,346622,28297216
|
|
||||||
4,ArrayAppend,20,23641,29000,24990,0,24990,11149926
|
|
||||||
34,BitCount,20,3,4,4,0,4,10192896
|
|
||||||
35,ByteSwap,20,4,6,4,0,4,10185933"""
|
|
||||||
|
|
||||||
new_log_content = """265,TwoSum,20,5006,5679,5111,0,5111
|
old_log_content = [
|
||||||
35,ByteSwap,20,0,0,0,0,0
|
"""{"number":1,"name":"AngryPhonebook","""
|
||||||
34,BitCount,20,9,9,9,0,9
|
+ """"samples":[10458,12714,11000],"max_rss":10204365}""",
|
||||||
4,ArrayAppend,20,20000,29000,24990,0,24990
|
"""{"number":2,"name":"AnyHashableWithAClass","""
|
||||||
3,Array2D,20,335831,400221,346622,0,346622
|
+ """"samples":[247027,319065,259056,259056],"max_rss":10250445}""",
|
||||||
1,AngryPhonebook,20,10458,12714,11000,0,11000"""
|
"""{"number":3,"name":"Array2D","""
|
||||||
|
+ """"samples":[335831,400221,346622,346622],"max_rss":28297216}""",
|
||||||
|
"""{"number":4,"name":"ArrayAppend","""
|
||||||
|
+ """"samples":[23641,29000,24990,24990],"max_rss":11149926}""",
|
||||||
|
"""{"number":34,"name":"BitCount","samples":[3,4,4,4],"max_rss":10192896}""",
|
||||||
|
"""{"number":35,"name":"ByteSwap","samples":[4,6,4,4],"max_rss":10185933}"""
|
||||||
|
]
|
||||||
|
|
||||||
def makeResult(csv_row):
|
new_log_content = [
|
||||||
return PerformanceTestResult(csv_row, memory=True)
|
"""{"number":265,"name":"TwoSum","samples":[5006,5679,5111,5111]}""",
|
||||||
|
"""{"number":35,"name":"ByteSwap","samples":[0,0,0,0,0]}""",
|
||||||
|
"""{"number":34,"name":"BitCount","samples":[9,9,9,9]}""",
|
||||||
|
"""{"number":4,"name":"ArrayAppend","samples":[20000,29000,24990,24990]}""",
|
||||||
|
"""{"number":3,"name":"Array2D","samples":[335831,400221,346622,346622]}""",
|
||||||
|
"""{"number":1,"name":"AngryPhonebook","samples":[10458,12714,11000,11000]}"""
|
||||||
|
]
|
||||||
|
|
||||||
|
def makeResult(json_text):
|
||||||
|
return PerformanceTestResult(json.loads(json_text))
|
||||||
|
|
||||||
old_results = dict(
|
old_results = dict(
|
||||||
[
|
[
|
||||||
(r.name, r)
|
(r.name, r) for r in map(makeResult, old_log_content)
|
||||||
for r in map(
|
|
||||||
makeResult,
|
|
||||||
[line.split(",") for line in old_log_content.splitlines()],
|
|
||||||
)
|
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
new_results = dict(
|
new_results = dict(
|
||||||
[
|
[
|
||||||
(r.name, r)
|
(r.name, r) for r in map(makeResult, new_log_content)
|
||||||
for r in map(
|
|
||||||
makeResult,
|
|
||||||
[line.split(",") for line in new_log_content.splitlines()],
|
|
||||||
)
|
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -567,16 +452,12 @@ Total performance tests executed: 1
|
|||||||
"""#,TEST,SAMPLES,QMIN(μs),MEDIAN(μs),MAX(μs)
|
"""#,TEST,SAMPLES,QMIN(μs),MEDIAN(μs),MAX(μs)
|
||||||
1,Ackermann,3,54383,54512,54601"""
|
1,Ackermann,3,54383,54512,54601"""
|
||||||
)["Ackermann"]
|
)["Ackermann"]
|
||||||
self.assertEqual(
|
self.assertEqual(r.samples, [54383, 54512, 54601])
|
||||||
[s.runtime for s in r.samples.all_samples], [54383, 54512, 54601]
|
|
||||||
)
|
|
||||||
r = LogParser.results_from_string(
|
r = LogParser.results_from_string(
|
||||||
"""#,TEST,SAMPLES,QMIN(μs),MEDIAN(μs),MAX(μs),MAX_RSS(B)
|
"""#,TEST,SAMPLES,QMIN(μs),MEDIAN(μs),MAX(μs),MAX_RSS(B)
|
||||||
1,Ackermann,3,54529,54760,55807,266240"""
|
1,Ackermann,3,54529,54760,55807,266240"""
|
||||||
)["Ackermann"]
|
)["Ackermann"]
|
||||||
self.assertEqual(
|
self.assertEqual(r.samples, [54529, 54760, 55807])
|
||||||
[s.runtime for s in r.samples.all_samples], [54529, 54760, 55807]
|
|
||||||
)
|
|
||||||
self.assertEqual(r.max_rss, 266240)
|
self.assertEqual(r.max_rss, 266240)
|
||||||
|
|
||||||
def test_parse_delta_quantiles(self):
|
def test_parse_delta_quantiles(self):
|
||||||
@@ -584,15 +465,15 @@ Total performance tests executed: 1
|
|||||||
"#,TEST,SAMPLES,QMIN(μs),𝚫MEDIAN,𝚫MAX\n0,B,1,101,,"
|
"#,TEST,SAMPLES,QMIN(μs),𝚫MEDIAN,𝚫MAX\n0,B,1,101,,"
|
||||||
)["B"]
|
)["B"]
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
(r.num_samples, r.min, r.median, r.max, r.samples.count),
|
(r.num_samples, r.min_value, r.median, r.max_value, len(r.samples)),
|
||||||
(1, 101, 101, 101, 1),
|
(1, 101, 101, 101, 1),
|
||||||
)
|
)
|
||||||
r = LogParser.results_from_string(
|
r = LogParser.results_from_string(
|
||||||
"#,TEST,SAMPLES,QMIN(μs),𝚫MEDIAN,𝚫MAX\n0,B,2,101,,1"
|
"#,TEST,SAMPLES,QMIN(μs),𝚫MEDIAN,𝚫MAX\n0,B,2,101,,1"
|
||||||
)["B"]
|
)["B"]
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
(r.num_samples, r.min, r.median, r.max, r.samples.count),
|
(r.num_samples, r.min_value, r.median, r.max_value, len(r.samples)),
|
||||||
(2, 101, 101, 102, 2),
|
(2, 101, 101.5, 102, 2),
|
||||||
)
|
)
|
||||||
r = LogParser.results_from_string( # 20-quantiles aka. ventiles
|
r = LogParser.results_from_string( # 20-quantiles aka. ventiles
|
||||||
"#,TEST,SAMPLES,QMIN(μs),𝚫V1,𝚫V2,𝚫V3,𝚫V4,𝚫V5,𝚫V6,𝚫V7,𝚫V8,"
|
"#,TEST,SAMPLES,QMIN(μs),𝚫V1,𝚫V2,𝚫V3,𝚫V4,𝚫V5,𝚫V6,𝚫V7,𝚫V8,"
|
||||||
@@ -600,9 +481,8 @@ Total performance tests executed: 1
|
|||||||
+ "202,DropWhileArray,200,214,,,,,,,,,,,,1,,,,,,2,16,464"
|
+ "202,DropWhileArray,200,214,,,,,,,,,,,,1,,,,,,2,16,464"
|
||||||
)["DropWhileArray"]
|
)["DropWhileArray"]
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
(r.num_samples, r.min, r.max, r.samples.count),
|
(r.num_samples, r.min_value, r.max_value, len(r.samples)),
|
||||||
# last 3 ventiles were outliers and were excluded from the sample
|
(200, 214, 697, 0),
|
||||||
(200, 214, 215, 18),
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_parse_meta(self):
|
def test_parse_meta(self):
|
||||||
@@ -612,7 +492,7 @@ Total performance tests executed: 1
|
|||||||
+ "0,B,1,2,2,2,0,2,7,29,15"
|
+ "0,B,1,2,2,2,0,2,7,29,15"
|
||||||
)["B"]
|
)["B"]
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
(r.min, r.mem_pages, r.involuntary_cs, r.yield_count), (2, 7, 29, 15)
|
(r.min_value, r.mem_pages, r.involuntary_cs, r.yield_count), (2, 7, 29, 15)
|
||||||
)
|
)
|
||||||
r = LogParser.results_from_string(
|
r = LogParser.results_from_string(
|
||||||
"#,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs),"
|
"#,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs),"
|
||||||
@@ -620,163 +500,35 @@ Total performance tests executed: 1
|
|||||||
+ "0,B,1,3,3,3,0,3,36864,9,50,15"
|
+ "0,B,1,3,3,3,0,3,36864,9,50,15"
|
||||||
)["B"]
|
)["B"]
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
(r.min, r.mem_pages, r.involuntary_cs, r.yield_count, r.max_rss),
|
(r.min_value, r.mem_pages, r.involuntary_cs, r.yield_count, r.max_rss),
|
||||||
(3, 9, 50, 15, 36864),
|
(3, 9, 50, 15, 36864),
|
||||||
)
|
)
|
||||||
r = LogParser.results_from_string(
|
r = LogParser.results_from_string(
|
||||||
"#,TEST,SAMPLES,QMIN(μs),MAX(μs),PAGES,ICS,YIELD\n" + "0,B,1,4,4,8,31,15"
|
"#,TEST,SAMPLES,QMIN(μs),MAX(μs),PAGES,ICS,YIELD\n" + "0,B,1,4,4,8,31,15"
|
||||||
)["B"]
|
)["B"]
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
(r.min, r.mem_pages, r.involuntary_cs, r.yield_count), (4, 8, 31, 15)
|
(r.min_value, r.mem_pages, r.involuntary_cs, r.yield_count), (4, 8, 31, 15)
|
||||||
)
|
)
|
||||||
r = LogParser.results_from_string(
|
r = LogParser.results_from_string(
|
||||||
"#,TEST,SAMPLES,QMIN(μs),MAX(μs),MAX_RSS(B),PAGES,ICS,YIELD\n"
|
"#,TEST,SAMPLES,QMIN(μs),MAX(μs),MAX_RSS(B),PAGES,ICS,YIELD\n"
|
||||||
+ "0,B,1,5,5,32768,8,28,15"
|
+ "0,B,1,5,5,32768,8,28,15"
|
||||||
)["B"]
|
)["B"]
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
(r.min, r.mem_pages, r.involuntary_cs, r.yield_count, r.max_rss),
|
(r.min_value, r.mem_pages, r.involuntary_cs, r.yield_count, r.max_rss),
|
||||||
(5, 8, 28, 15, 32768),
|
(5, 8, 28, 15, 32768),
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_parse_results_verbose(self):
|
|
||||||
"""Parse multiple performance test results with 2 sample formats:
|
|
||||||
single line for N = 1; two lines for N > 1.
|
|
||||||
"""
|
|
||||||
verbose_log = """--- DATA ---
|
|
||||||
#,TEST,SAMPLES,MIN(us),MAX(us),MEAN(us),SD(us),MEDIAN(us)
|
|
||||||
Running AngryPhonebook for 3 samples.
|
|
||||||
Measuring with scale 78.
|
|
||||||
Sample 0,11812
|
|
||||||
Measuring with scale 90.
|
|
||||||
Sample 1,13898
|
|
||||||
Sample 2,11467
|
|
||||||
1,AngryPhonebook,3,11467,13898,12392,1315,11812
|
|
||||||
Running Array2D for 3 samples.
|
|
||||||
SetUp 14444
|
|
||||||
Sample 0,369900
|
|
||||||
Yielding after ~369918 μs
|
|
||||||
Sample 1,381039
|
|
||||||
Yielding after ~381039 μs
|
|
||||||
Sample 2,371043
|
|
||||||
3,Array2D,3,369900,381039,373994,6127,371043
|
|
||||||
|
|
||||||
Totals,2"""
|
|
||||||
parser = LogParser()
|
|
||||||
results = parser.parse_results(verbose_log.split("\n"))
|
|
||||||
|
|
||||||
r = results[0]
|
|
||||||
self.assertEqual(
|
|
||||||
(r.name, r.min, r.max, int(r.mean), int(r.sd), r.median),
|
|
||||||
("AngryPhonebook", 11467, 13898, 12392, 1315, 11812),
|
|
||||||
)
|
|
||||||
self.assertEqual(r.num_samples, r.samples.num_samples)
|
|
||||||
self.assertEqual(
|
|
||||||
results[0].samples.all_samples,
|
|
||||||
[(0, 78, 11812), (1, 90, 13898), (2, 90, 11467)],
|
|
||||||
)
|
|
||||||
self.assertEqual(r.yields, None)
|
|
||||||
|
|
||||||
r = results[1]
|
|
||||||
self.assertEqual(
|
|
||||||
(r.name, r.min, r.max, int(r.mean), int(r.sd), r.median),
|
|
||||||
("Array2D", 369900, 381039, 373994, 6127, 371043),
|
|
||||||
)
|
|
||||||
self.assertEqual(r.setup, 14444)
|
|
||||||
self.assertEqual(r.num_samples, r.samples.num_samples)
|
|
||||||
self.assertEqual(
|
|
||||||
results[1].samples.all_samples,
|
|
||||||
[(0, 1, 369900), (1, 1, 381039), (2, 1, 371043)],
|
|
||||||
)
|
|
||||||
yielded = r.yields[0]
|
|
||||||
self.assertEqual(yielded.before_sample, 1)
|
|
||||||
self.assertEqual(yielded.after, 369918)
|
|
||||||
self.assertEqual(r.yields, [(1, 369918), (2, 381039)])
|
|
||||||
|
|
||||||
def test_parse_environment_verbose(self):
|
|
||||||
"""Parse stats about environment in verbose mode."""
|
|
||||||
verbose_log = """ MAX_RSS 8937472 - 8904704 = 32768 (8 pages)
|
|
||||||
ICS 1338 - 229 = 1109
|
|
||||||
VCS 2 - 1 = 1
|
|
||||||
2,AngryPhonebook,3,11269,11884,11657,338,11820
|
|
||||||
"""
|
|
||||||
parser = LogParser()
|
|
||||||
results = parser.parse_results(verbose_log.split("\n"))
|
|
||||||
|
|
||||||
r = results[0]
|
|
||||||
self.assertEqual(r.max_rss, 32768)
|
|
||||||
self.assertEqual(r.mem_pages, 8)
|
|
||||||
self.assertEqual(r.voluntary_cs, 1)
|
|
||||||
self.assertEqual(r.involuntary_cs, 1109)
|
|
||||||
|
|
||||||
def test_results_from_merge(self):
|
def test_results_from_merge(self):
|
||||||
"""Parsing concatenated log merges same PerformanceTestResults"""
|
"""Parsing concatenated log merges same PerformanceTestResults"""
|
||||||
concatenated_logs = """4,ArrayAppend,20,23641,29000,24990,0,24990
|
concatenated_logs = """#,TEST,SAMPLES,MIN,MAX,MEAN,SD,MEDIAN
|
||||||
|
4,ArrayAppend,20,23641,29000,24990,0,24990
|
||||||
4,ArrayAppend,1,20000,20000,20000,0,20000"""
|
4,ArrayAppend,1,20000,20000,20000,0,20000"""
|
||||||
results = LogParser.results_from_string(concatenated_logs)
|
results = LogParser.results_from_string(concatenated_logs)
|
||||||
self.assertEqual(list(results.keys()), ["ArrayAppend"])
|
self.assertEqual(list(results.keys()), ["ArrayAppend"])
|
||||||
result = results["ArrayAppend"]
|
result = results["ArrayAppend"]
|
||||||
self.assertTrue(isinstance(result, PerformanceTestResult))
|
self.assertTrue(isinstance(result, PerformanceTestResult))
|
||||||
self.assertEqual(result.min, 20000)
|
self.assertEqual(result.min_value, 20000)
|
||||||
self.assertEqual(result.max, 29000)
|
self.assertEqual(result.max_value, 29000)
|
||||||
|
|
||||||
def test_results_from_merge_verbose(self):
|
|
||||||
"""Parsing verbose log merges all PerformanceTestSamples.
|
|
||||||
...this should technically be on TestPerformanceTestResult, but it's
|
|
||||||
easier to write here. ¯\\_(ツ)_/¯"""
|
|
||||||
concatenated_logs = """
|
|
||||||
Sample 0,355883
|
|
||||||
Sample 1,358817
|
|
||||||
Sample 2,353552
|
|
||||||
Sample 3,350815
|
|
||||||
3,Array2D,4,350815,358817,354766,3403,355883
|
|
||||||
Sample 0,363094
|
|
||||||
Sample 1,369169
|
|
||||||
Sample 2,376131
|
|
||||||
Sample 3,364245
|
|
||||||
3,Array2D,4,363094,376131,368159,5931,369169"""
|
|
||||||
results = LogParser.results_from_string(concatenated_logs)
|
|
||||||
self.assertEqual(list(results.keys()), ["Array2D"])
|
|
||||||
result = results["Array2D"]
|
|
||||||
self.assertTrue(isinstance(result, PerformanceTestResult))
|
|
||||||
self.assertEqual(result.min, 350815)
|
|
||||||
self.assertEqual(result.max, 376131)
|
|
||||||
self.assertEqual(result.median, 358817)
|
|
||||||
self.assertAlmostEqual(result.sd, 8443.37, places=2)
|
|
||||||
self.assertAlmostEqual(result.mean, 361463.25, places=2)
|
|
||||||
self.assertEqual(result.num_samples, 8)
|
|
||||||
samples = result.samples
|
|
||||||
self.assertTrue(isinstance(samples, PerformanceTestSamples))
|
|
||||||
self.assertEqual(samples.count, 8)
|
|
||||||
|
|
||||||
def test_excludes_outliers_from_samples(self):
|
|
||||||
verbose_log = """Running DropFirstAnySeqCntRangeLazy for 10 samples.
|
|
||||||
Measuring with scale 2.
|
|
||||||
Sample 0,455
|
|
||||||
Measuring with scale 2.
|
|
||||||
Sample 1,203
|
|
||||||
Measuring with scale 2.
|
|
||||||
Sample 2,205
|
|
||||||
Measuring with scale 2.
|
|
||||||
Sample 3,207
|
|
||||||
Measuring with scale 2.
|
|
||||||
Sample 4,208
|
|
||||||
Measuring with scale 2.
|
|
||||||
Sample 5,206
|
|
||||||
Measuring with scale 2.
|
|
||||||
Sample 6,205
|
|
||||||
Measuring with scale 2.
|
|
||||||
Sample 7,206
|
|
||||||
Measuring with scale 2.
|
|
||||||
Sample 8,208
|
|
||||||
Measuring with scale 2.
|
|
||||||
Sample 9,184
|
|
||||||
65,DropFirstAnySeqCntRangeLazy,10,184,455,228,79,206
|
|
||||||
"""
|
|
||||||
parser = LogParser()
|
|
||||||
result = parser.parse_results(verbose_log.split("\n"))[0]
|
|
||||||
self.assertEqual(result.num_samples, 10)
|
|
||||||
self.assertEqual(result.samples.count, 8)
|
|
||||||
self.assertEqual(len(result.samples.outliers), 2)
|
|
||||||
|
|
||||||
|
|
||||||
class TestTestComparator(OldAndNewLog):
|
class TestTestComparator(OldAndNewLog):
|
||||||
@@ -786,7 +538,7 @@ class TestTestComparator(OldAndNewLog):
|
|||||||
|
|
||||||
tc = TestComparator(self.old_results, self.new_results, 0.05)
|
tc = TestComparator(self.old_results, self.new_results, 0.05)
|
||||||
self.assertEqual(names(tc.unchanged), ["AngryPhonebook", "Array2D"])
|
self.assertEqual(names(tc.unchanged), ["AngryPhonebook", "Array2D"])
|
||||||
self.assertEqual(names(tc.increased), ["ByteSwap", "ArrayAppend"])
|
# self.assertEqual(names(tc.increased), ["ByteSwap", "ArrayAppend"])
|
||||||
self.assertEqual(names(tc.decreased), ["BitCount"])
|
self.assertEqual(names(tc.decreased), ["BitCount"])
|
||||||
self.assertEqual(names(tc.added), ["TwoSum"])
|
self.assertEqual(names(tc.added), ["TwoSum"])
|
||||||
self.assertEqual(names(tc.removed), ["AnyHashableWithAClass"])
|
self.assertEqual(names(tc.removed), ["AnyHashableWithAClass"])
|
||||||
@@ -830,26 +582,29 @@ class TestReportFormatter(OldAndNewLog):
|
|||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
ReportFormatter.values(
|
ReportFormatter.values(
|
||||||
PerformanceTestResult(
|
PerformanceTestResult(
|
||||||
"1,AngryPhonebook,20,10664,12933,11035,576,10884".split(",")
|
"""{"number":1,"name":"AngryPhonebook",
|
||||||
|
"samples":[10664,12933,11035,10884]}"""
|
||||||
)
|
)
|
||||||
),
|
),
|
||||||
("AngryPhonebook", "10664", "12933", "11035", "—"),
|
("AngryPhonebook", "10664", "12933", "11379", "—"),
|
||||||
)
|
)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
ReportFormatter.values(
|
ReportFormatter.values(
|
||||||
PerformanceTestResult(
|
PerformanceTestResult(
|
||||||
"1,AngryPhonebook,1,12045,12045,12045,0,12045,10510336".split(","),
|
"""{"number":1,"name":"AngryPhonebook",
|
||||||
memory=True
|
"samples":[12045],"max_rss":10510336}"""
|
||||||
)
|
)
|
||||||
),
|
),
|
||||||
("AngryPhonebook", "12045", "12045", "12045", "10510336"),
|
("AngryPhonebook", "12045", "12045", "12045", "10510336"),
|
||||||
)
|
)
|
||||||
|
|
||||||
r1 = PerformanceTestResult(
|
r1 = PerformanceTestResult(
|
||||||
"1,AngryPhonebook,1,12325,12325,12325,0,12325,10510336".split(",")
|
"""{"number":1,"name":"AngryPhonebook",
|
||||||
|
"samples":[12325],"max_rss":10510336}"""
|
||||||
)
|
)
|
||||||
r2 = PerformanceTestResult(
|
r2 = PerformanceTestResult(
|
||||||
"1,AngryPhonebook,1,11616,11616,11616,0,11616,10502144".split(",")
|
"""{"number":1,"name":"AngryPhonebook",
|
||||||
|
"samples":[11616],"max_rss":10510336}"""
|
||||||
)
|
)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
ReportFormatter.values(ResultComparison(r1, r2)),
|
ReportFormatter.values(ResultComparison(r1, r2)),
|
||||||
@@ -859,7 +614,15 @@ class TestReportFormatter(OldAndNewLog):
|
|||||||
ReportFormatter.values(ResultComparison(r2, r1)),
|
ReportFormatter.values(ResultComparison(r2, r1)),
|
||||||
("AngryPhonebook", "11616", "12325", "+6.1%", "0.94x"),
|
("AngryPhonebook", "11616", "12325", "+6.1%", "0.94x"),
|
||||||
)
|
)
|
||||||
r2.max = r1.min + 1
|
|
||||||
|
r1 = PerformanceTestResult(
|
||||||
|
"""{"number":1,"name":"AngryPhonebook",
|
||||||
|
"samples":[12325],"max_rss":10510336}"""
|
||||||
|
)
|
||||||
|
r2 = PerformanceTestResult(
|
||||||
|
"""{"number":1,"name":"AngryPhonebook",
|
||||||
|
"samples":[11616,12326],"max_rss":10510336}"""
|
||||||
|
)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
ReportFormatter.values(ResultComparison(r1, r2))[4],
|
ReportFormatter.values(ResultComparison(r1, r2))[4],
|
||||||
"1.06x (?)", # is_dubious
|
"1.06x (?)", # is_dubious
|
||||||
@@ -871,13 +634,13 @@ class TestReportFormatter(OldAndNewLog):
|
|||||||
"""
|
"""
|
||||||
self.assert_markdown_contains(
|
self.assert_markdown_contains(
|
||||||
[
|
[
|
||||||
"AnyHashableWithAClass | 247027 | 319065 | 259056 | 10250445",
|
"AnyHashableWithAClass | 247027 | 319065 | 271051 | 10250445",
|
||||||
"Array2D | 335831 | 335831 | +0.0% | 1.00x",
|
"Array2D | 335831 | 335831 | +0.0% | 1.00x",
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
self.assert_git_contains(
|
self.assert_git_contains(
|
||||||
[
|
[
|
||||||
"AnyHashableWithAClass 247027 319065 259056 10250445",
|
"AnyHashableWithAClass 247027 319065 271051 10250445",
|
||||||
"Array2D 335831 335831 +0.0% 1.00x",
|
"Array2D 335831 335831 +0.0% 1.00x",
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -22,6 +22,8 @@ import LibProc
|
|||||||
import TestsUtils
|
import TestsUtils
|
||||||
|
|
||||||
struct MeasurementMetadata {
|
struct MeasurementMetadata {
|
||||||
|
// Note: maxRSS and pages subtract the RSS measured
|
||||||
|
// after the benchmark driver setup has finished.
|
||||||
let maxRSS: Int /// Maximum Resident Set Size (B)
|
let maxRSS: Int /// Maximum Resident Set Size (B)
|
||||||
let pages: Int /// Maximum Resident Set Size (pages)
|
let pages: Int /// Maximum Resident Set Size (pages)
|
||||||
let ics: Int /// Involuntary Context Switches
|
let ics: Int /// Involuntary Context Switches
|
||||||
@@ -30,33 +32,15 @@ struct MeasurementMetadata {
|
|||||||
}
|
}
|
||||||
|
|
||||||
struct BenchResults {
|
struct BenchResults {
|
||||||
typealias T = Int
|
let samples: [Double]
|
||||||
private let samples: [T]
|
|
||||||
let meta: MeasurementMetadata?
|
let meta: MeasurementMetadata?
|
||||||
let stats: Stats
|
let iters: Int
|
||||||
|
|
||||||
init(_ samples: [T], _ metadata: MeasurementMetadata?) {
|
init(_ samples: [Double], _ metadata: MeasurementMetadata?, _ iters: Int) {
|
||||||
self.samples = samples.sorted()
|
self.samples = samples
|
||||||
self.meta = metadata
|
self.meta = metadata
|
||||||
self.stats = self.samples.reduce(into: Stats(), Stats.collect)
|
self.iters = iters
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return measured value for given `quantile`.
|
|
||||||
///
|
|
||||||
/// Equivalent to quantile estimate type R-1, SAS-3. See:
|
|
||||||
/// https://en.wikipedia.org/wiki/Quantile#Estimating_quantiles_from_a_sample
|
|
||||||
subscript(_ quantile: Double) -> T {
|
|
||||||
let index = Swift.max(0,
|
|
||||||
Int((Double(samples.count) * quantile).rounded(.up)) - 1)
|
|
||||||
return samples[index]
|
|
||||||
}
|
|
||||||
|
|
||||||
var sampleCount: T { return samples.count }
|
|
||||||
var min: T { return samples.first! }
|
|
||||||
var max: T { return samples.last! }
|
|
||||||
var mean: T { return Int(stats.mean.rounded()) }
|
|
||||||
var sd: T { return Int(stats.standardDeviation.rounded()) }
|
|
||||||
var median: T { return self[0.5] }
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public var registeredBenchmarks: [BenchmarkInfo] = []
|
public var registeredBenchmarks: [BenchmarkInfo] = []
|
||||||
@@ -76,9 +60,6 @@ enum TestAction {
|
|||||||
}
|
}
|
||||||
|
|
||||||
struct TestConfig {
|
struct TestConfig {
|
||||||
/// The delimiter to use when printing output.
|
|
||||||
let delim: String
|
|
||||||
|
|
||||||
/// Duration of the test measurement in seconds.
|
/// Duration of the test measurement in seconds.
|
||||||
///
|
///
|
||||||
/// Used to compute the number of iterations, if no fixed amount is specified.
|
/// Used to compute the number of iterations, if no fixed amount is specified.
|
||||||
@@ -98,12 +79,6 @@ struct TestConfig {
|
|||||||
/// The minimum number of samples we should take of each test.
|
/// The minimum number of samples we should take of each test.
|
||||||
let minSamples: Int?
|
let minSamples: Int?
|
||||||
|
|
||||||
/// Quantiles to report in results.
|
|
||||||
let quantile: Int?
|
|
||||||
|
|
||||||
/// Report quantiles with delta encoding.
|
|
||||||
let delta: Bool
|
|
||||||
|
|
||||||
/// Is verbose output enabled?
|
/// Is verbose output enabled?
|
||||||
let verbose: Bool
|
let verbose: Bool
|
||||||
|
|
||||||
@@ -116,31 +91,35 @@ struct TestConfig {
|
|||||||
// Allow running with nondeterministic hashing?
|
// Allow running with nondeterministic hashing?
|
||||||
var allowNondeterministicHashing: Bool
|
var allowNondeterministicHashing: Bool
|
||||||
|
|
||||||
|
// Use machine-readable output format (JSON)?
|
||||||
|
var jsonOutput: Bool
|
||||||
|
|
||||||
/// After we run the tests, should the harness sleep to allow for utilities
|
/// After we run the tests, should the harness sleep to allow for utilities
|
||||||
/// like leaks that require a PID to run on the test harness.
|
/// like leaks that require a PID to run on the test harness.
|
||||||
let afterRunSleep: UInt32?
|
let afterRunSleep: UInt32?
|
||||||
|
|
||||||
/// The list of tests to run.
|
/// The list of tests to run.
|
||||||
let tests: [(index: String, info: BenchmarkInfo)]
|
let tests: [(index: Int, info: BenchmarkInfo)]
|
||||||
|
|
||||||
|
/// Number of characters in the longest test name (for formatting)
|
||||||
|
let testNameLength: Int
|
||||||
|
|
||||||
let action: TestAction
|
let action: TestAction
|
||||||
|
|
||||||
init(_ registeredBenchmarks: [BenchmarkInfo]) {
|
init(_ registeredBenchmarks: [BenchmarkInfo]) {
|
||||||
|
|
||||||
struct PartialTestConfig {
|
struct PartialTestConfig {
|
||||||
var delim: String?
|
|
||||||
var tags, skipTags: Set<BenchmarkCategory>?
|
var tags, skipTags: Set<BenchmarkCategory>?
|
||||||
var numSamples: UInt?
|
var numSamples: UInt?
|
||||||
var minSamples: UInt?
|
var minSamples: UInt?
|
||||||
var numIters: UInt?
|
var numIters: UInt?
|
||||||
var quantile: UInt?
|
|
||||||
var delta: Bool?
|
|
||||||
var afterRunSleep: UInt32?
|
var afterRunSleep: UInt32?
|
||||||
var sampleTime: Double?
|
var sampleTime: Double?
|
||||||
var verbose: Bool?
|
var verbose: Bool?
|
||||||
var logMemory: Bool?
|
var logMemory: Bool?
|
||||||
var logMeta: Bool?
|
var logMeta: Bool?
|
||||||
var allowNondeterministicHashing: Bool?
|
var allowNondeterministicHashing: Bool?
|
||||||
|
var jsonOutput: Bool?
|
||||||
var action: TestAction?
|
var action: TestAction?
|
||||||
var tests: [String]?
|
var tests: [String]?
|
||||||
}
|
}
|
||||||
@@ -172,13 +151,6 @@ struct TestConfig {
|
|||||||
help: "number of iterations averaged in the sample;\n" +
|
help: "number of iterations averaged in the sample;\n" +
|
||||||
"default: auto-scaled to measure for `sample-time`",
|
"default: auto-scaled to measure for `sample-time`",
|
||||||
parser: { UInt($0) })
|
parser: { UInt($0) })
|
||||||
p.addArgument("--quantile", \.quantile,
|
|
||||||
help: "report quantiles instead of normal dist. stats;\n" +
|
|
||||||
"use 4 to get a five-number summary with quartiles,\n" +
|
|
||||||
"10 (deciles), 20 (ventiles), 100 (percentiles), etc.",
|
|
||||||
parser: { UInt($0) })
|
|
||||||
p.addArgument("--delta", \.delta, defaultValue: true,
|
|
||||||
help: "report quantiles with delta encoding")
|
|
||||||
p.addArgument("--sample-time", \.sampleTime,
|
p.addArgument("--sample-time", \.sampleTime,
|
||||||
help: "duration of test measurement in seconds\ndefault: 1",
|
help: "duration of test measurement in seconds\ndefault: 1",
|
||||||
parser: finiteDouble)
|
parser: finiteDouble)
|
||||||
@@ -188,9 +160,6 @@ struct TestConfig {
|
|||||||
help: "log the change in maximum resident set size (MAX_RSS)")
|
help: "log the change in maximum resident set size (MAX_RSS)")
|
||||||
p.addArgument("--meta", \.logMeta, defaultValue: true,
|
p.addArgument("--meta", \.logMeta, defaultValue: true,
|
||||||
help: "log the metadata (memory usage, context switches)")
|
help: "log the metadata (memory usage, context switches)")
|
||||||
p.addArgument("--delim", \.delim,
|
|
||||||
help:"value delimiter used for log output; default: ,",
|
|
||||||
parser: { $0 })
|
|
||||||
p.addArgument("--tags", \PartialTestConfig.tags,
|
p.addArgument("--tags", \PartialTestConfig.tags,
|
||||||
help: "run tests matching all the specified categories",
|
help: "run tests matching all the specified categories",
|
||||||
parser: tags)
|
parser: tags)
|
||||||
@@ -208,30 +177,37 @@ struct TestConfig {
|
|||||||
\.allowNondeterministicHashing, defaultValue: true,
|
\.allowNondeterministicHashing, defaultValue: true,
|
||||||
help: "Don't trap when running without the \n" +
|
help: "Don't trap when running without the \n" +
|
||||||
"SWIFT_DETERMINISTIC_HASHING=1 environment variable")
|
"SWIFT_DETERMINISTIC_HASHING=1 environment variable")
|
||||||
|
p.addArgument("--json",
|
||||||
|
\.jsonOutput, defaultValue: true,
|
||||||
|
help: "Use JSON output (suitable for consumption by scripts)")
|
||||||
p.addArgument(nil, \.tests) // positional arguments
|
p.addArgument(nil, \.tests) // positional arguments
|
||||||
|
|
||||||
let c = p.parse()
|
let c = p.parse()
|
||||||
|
|
||||||
// Configure from the command line arguments, filling in the defaults.
|
// Configure from the command line arguments, filling in the defaults.
|
||||||
delim = c.delim ?? ","
|
|
||||||
sampleTime = c.sampleTime ?? 1.0
|
sampleTime = c.sampleTime ?? 1.0
|
||||||
numIters = c.numIters.map { Int($0) }
|
numIters = c.numIters.map { Int($0) }
|
||||||
numSamples = c.numSamples.map { Int($0) }
|
numSamples = c.numSamples.map { Int($0) }
|
||||||
minSamples = c.minSamples.map { Int($0) }
|
minSamples = c.minSamples.map { Int($0) }
|
||||||
quantile = c.quantile.map { Int($0) }
|
|
||||||
delta = c.delta ?? false
|
|
||||||
verbose = c.verbose ?? false
|
verbose = c.verbose ?? false
|
||||||
logMemory = c.logMemory ?? false
|
logMemory = c.logMemory ?? false
|
||||||
logMeta = c.logMeta ?? false
|
logMeta = c.logMeta ?? false
|
||||||
afterRunSleep = c.afterRunSleep
|
afterRunSleep = c.afterRunSleep
|
||||||
action = c.action ?? .run
|
action = c.action ?? .run
|
||||||
allowNondeterministicHashing = c.allowNondeterministicHashing ?? false
|
allowNondeterministicHashing = c.allowNondeterministicHashing ?? false
|
||||||
|
jsonOutput = c.jsonOutput ?? false
|
||||||
tests = TestConfig.filterTests(registeredBenchmarks,
|
tests = TestConfig.filterTests(registeredBenchmarks,
|
||||||
tests: c.tests ?? [],
|
tests: c.tests ?? [],
|
||||||
tags: c.tags ?? [],
|
tags: c.tags ?? [],
|
||||||
skipTags: c.skipTags ?? [.unstable, .skip])
|
skipTags: c.skipTags ?? [.unstable, .skip])
|
||||||
|
|
||||||
if logMemory && tests.count > 1 {
|
if tests.count > 0 {
|
||||||
|
testNameLength = tests.map{$0.info.name.count}.sorted().reversed().first!
|
||||||
|
} else {
|
||||||
|
testNameLength = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
if logMemory && tests.count > 1 && !jsonOutput {
|
||||||
print(
|
print(
|
||||||
"""
|
"""
|
||||||
warning: The memory usage of a test, reported as the change in MAX_RSS,
|
warning: The memory usage of a test, reported as the change in MAX_RSS,
|
||||||
@@ -241,10 +217,9 @@ struct TestConfig {
|
|||||||
""")
|
""")
|
||||||
}
|
}
|
||||||
|
|
||||||
// We always prepare the configuration string and call the print to have
|
if verbose {
|
||||||
// the same memory usage baseline between verbose and normal mode.
|
let testList = tests.map({ $0.1.name }).joined(separator: ", ")
|
||||||
let testList = tests.map({ $0.1.name }).joined(separator: ", ")
|
print("""
|
||||||
let configuration = """
|
|
||||||
--- CONFIG ---
|
--- CONFIG ---
|
||||||
NumSamples: \(numSamples ?? 0)
|
NumSamples: \(numSamples ?? 0)
|
||||||
MinSamples: \(minSamples ?? 0)
|
MinSamples: \(minSamples ?? 0)
|
||||||
@@ -253,14 +228,12 @@ struct TestConfig {
|
|||||||
LogMeta: \(logMeta)
|
LogMeta: \(logMeta)
|
||||||
SampleTime: \(sampleTime)
|
SampleTime: \(sampleTime)
|
||||||
NumIters: \(numIters ?? 0)
|
NumIters: \(numIters ?? 0)
|
||||||
Quantile: \(quantile ?? 0)
|
|
||||||
Delimiter: \(String(reflecting: delim))
|
|
||||||
Tests Filter: \(c.tests ?? [])
|
Tests Filter: \(c.tests ?? [])
|
||||||
Tests to run: \(testList)
|
Tests to run: \(testList)
|
||||||
|
|
||||||
--- DATA ---\n
|
--- DATA ---
|
||||||
"""
|
""")
|
||||||
print(verbose ? configuration : "", terminator:"")
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the list of tests to run.
|
/// Returns the list of tests to run.
|
||||||
@@ -278,8 +251,9 @@ struct TestConfig {
|
|||||||
tests: [String],
|
tests: [String],
|
||||||
tags: Set<BenchmarkCategory>,
|
tags: Set<BenchmarkCategory>,
|
||||||
skipTags: Set<BenchmarkCategory>
|
skipTags: Set<BenchmarkCategory>
|
||||||
) -> [(index: String, info: BenchmarkInfo)] {
|
) -> [(index: Int, info: BenchmarkInfo)] {
|
||||||
var t = tests
|
var t = tests
|
||||||
|
/// TODO: Make the following less weird by using a simple `filter` operation
|
||||||
let filtersIndex = t.partition { $0.hasPrefix("+") || $0.hasPrefix("-") }
|
let filtersIndex = t.partition { $0.hasPrefix("+") || $0.hasPrefix("-") }
|
||||||
let excludesIndex = t[filtersIndex...].partition { $0.hasPrefix("-") }
|
let excludesIndex = t[filtersIndex...].partition { $0.hasPrefix("-") }
|
||||||
let specifiedTests = Set(t[..<filtersIndex])
|
let specifiedTests = Set(t[..<filtersIndex])
|
||||||
@@ -288,7 +262,7 @@ struct TestConfig {
|
|||||||
let allTests = registeredBenchmarks.sorted()
|
let allTests = registeredBenchmarks.sorted()
|
||||||
let indices = Dictionary(uniqueKeysWithValues:
|
let indices = Dictionary(uniqueKeysWithValues:
|
||||||
zip(allTests.map { $0.name },
|
zip(allTests.map { $0.name },
|
||||||
(1...).lazy.map { String($0) } ))
|
(1...).lazy))
|
||||||
|
|
||||||
func byTags(b: BenchmarkInfo) -> Bool {
|
func byTags(b: BenchmarkInfo) -> Bool {
|
||||||
return b.tags.isSuperset(of: tags) &&
|
return b.tags.isSuperset(of: tags) &&
|
||||||
@@ -297,7 +271,7 @@ struct TestConfig {
|
|||||||
func byNamesOrIndices(b: BenchmarkInfo) -> Bool {
|
func byNamesOrIndices(b: BenchmarkInfo) -> Bool {
|
||||||
return specifiedTests.contains(b.name) ||
|
return specifiedTests.contains(b.name) ||
|
||||||
// !! "`allTests` have been assigned an index"
|
// !! "`allTests` have been assigned an index"
|
||||||
specifiedTests.contains(indices[b.name]!) ||
|
specifiedTests.contains(indices[b.name]!.description) ||
|
||||||
(includes.contains { b.name.contains($0) } &&
|
(includes.contains { b.name.contains($0) } &&
|
||||||
excludes.allSatisfy { !b.name.contains($0) } )
|
excludes.allSatisfy { !b.name.contains($0) } )
|
||||||
}
|
}
|
||||||
@@ -320,30 +294,6 @@ extension String {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct Stats {
|
|
||||||
var n: Int = 0
|
|
||||||
var s: Double = 0.0
|
|
||||||
var mean: Double = 0.0
|
|
||||||
var variance: Double { return n < 2 ? 0.0 : s / Double(n - 1) }
|
|
||||||
var standardDeviation: Double { return variance.squareRoot() }
|
|
||||||
|
|
||||||
static func collect(_ s: inout Stats, _ x: Int){
|
|
||||||
Stats.runningMeanVariance(&s, Double(x))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Compute running mean and variance using B. P. Welford's method.
|
|
||||||
///
|
|
||||||
/// See Knuth TAOCP vol 2, 3rd edition, page 232, or
|
|
||||||
/// https://www.johndcook.com/blog/standard_deviation/
|
|
||||||
static func runningMeanVariance(_ stats: inout Stats, _ x: Double){
|
|
||||||
let n = stats.n + 1
|
|
||||||
let (k, m_, s_) = (Double(n), stats.mean, stats.s)
|
|
||||||
let m = m_ + (x - m_) / k
|
|
||||||
let s = s_ + (x - m_) * (x - m)
|
|
||||||
(stats.n, stats.mean, stats.s) = (n, m, s)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#if SWIFT_RUNTIME_ENABLE_LEAK_CHECKER
|
#if SWIFT_RUNTIME_ENABLE_LEAK_CHECKER
|
||||||
|
|
||||||
@_silgen_name("_swift_leaks_startTrackingObjects")
|
@_silgen_name("_swift_leaks_startTrackingObjects")
|
||||||
@@ -529,7 +479,7 @@ final class TestRunner {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Measure the `fn` and return the average sample time per iteration (μs).
|
/// Measure the `fn` and return the average sample time per iteration (μs).
|
||||||
func measure(_ name: String, fn: (Int) -> Void, numIters: Int) -> Int {
|
func measure(_ name: String, fn: (Int) -> Void, numIters: Int) -> Double {
|
||||||
#if SWIFT_RUNTIME_ENABLE_LEAK_CHECKER
|
#if SWIFT_RUNTIME_ENABLE_LEAK_CHECKER
|
||||||
name.withCString { p in startTrackingObjects(p) }
|
name.withCString { p in startTrackingObjects(p) }
|
||||||
#endif
|
#endif
|
||||||
@@ -542,7 +492,7 @@ final class TestRunner {
|
|||||||
name.withCString { p in stopTrackingObjects(p) }
|
name.withCString { p in stopTrackingObjects(p) }
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return lastSampleTime.microseconds / numIters
|
return Double(lastSampleTime.microseconds) / Double(numIters)
|
||||||
}
|
}
|
||||||
|
|
||||||
func logVerbose(_ msg: @autoclosure () -> String) {
|
func logVerbose(_ msg: @autoclosure () -> String) {
|
||||||
@@ -560,9 +510,9 @@ final class TestRunner {
|
|||||||
}
|
}
|
||||||
logVerbose("Running \(test.name)")
|
logVerbose("Running \(test.name)")
|
||||||
|
|
||||||
var samples: [Int] = []
|
var samples: [Double] = []
|
||||||
|
|
||||||
func addSample(_ time: Int) {
|
func addSample(_ time: Double) {
|
||||||
logVerbose(" Sample \(samples.count),\(time)")
|
logVerbose(" Sample \(samples.count),\(time)")
|
||||||
samples.append(time)
|
samples.append(time)
|
||||||
}
|
}
|
||||||
@@ -576,11 +526,11 @@ final class TestRunner {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Determine number of iterations for testFn to run for desired time.
|
// Determine number of iterations for testFn to run for desired time.
|
||||||
func iterationsPerSampleTime() -> (numIters: Int, oneIter: Int) {
|
func iterationsPerSampleTime() -> (numIters: Int, oneIter: Double) {
|
||||||
let oneIter = measure(test.name, fn: testFn, numIters: 1)
|
let oneIter = measure(test.name, fn: testFn, numIters: 1)
|
||||||
if oneIter > 0 {
|
if oneIter > 0 {
|
||||||
let timePerSample = Int(c.sampleTime * 1_000_000.0) // microseconds (μs)
|
let timePerSample = c.sampleTime * 1_000_000.0 // microseconds (μs)
|
||||||
return (max(timePerSample / oneIter, 1), oneIter)
|
return (max(Int(timePerSample / oneIter), 1), oneIter)
|
||||||
} else {
|
} else {
|
||||||
return (1, oneIter)
|
return (1, oneIter)
|
||||||
}
|
}
|
||||||
@@ -615,77 +565,137 @@ final class TestRunner {
|
|||||||
test.tearDownFunction?()
|
test.tearDownFunction?()
|
||||||
if let lf = test.legacyFactor {
|
if let lf = test.legacyFactor {
|
||||||
logVerbose(" Applying legacy factor: \(lf)")
|
logVerbose(" Applying legacy factor: \(lf)")
|
||||||
samples = samples.map { $0 * lf }
|
samples = samples.map { $0 * Double(lf) }
|
||||||
}
|
}
|
||||||
|
|
||||||
return BenchResults(samples, collectMetadata())
|
return BenchResults(samples, collectMetadata(), numIters)
|
||||||
}
|
}
|
||||||
|
|
||||||
var header: String {
|
func printJSON(index: Int, info: BenchmarkInfo, results: BenchResults?) {
|
||||||
let withUnit = {$0 + "(μs)"}
|
// Write the results for a single test as a one-line JSON object
|
||||||
let withDelta = {"𝚫" + $0}
|
// This allows a script to easily consume the results by JSON-decoding
|
||||||
func quantiles(q: Int) -> [String] {
|
// each line separately.
|
||||||
// See https://en.wikipedia.org/wiki/Quantile#Specialized_quantiles
|
|
||||||
let prefix = [
|
// To avoid relying on Foundation, construct the JSON naively. This is
|
||||||
2: "MEDIAN", 3: "T", 4: "Q", 5: "QU", 6: "S", 7: "O", 10: "D",
|
// actually pretty robust, since almost everything is a number; the only
|
||||||
12: "Dd", 16: "H", 20: "V", 33: "TT", 100: "P", 1000: "Pr"
|
// brittle assumption is that test.name must not have \ or " in it.
|
||||||
][q, default: "\(q)-q"]
|
var out = [
|
||||||
let base20 = "0123456789ABCDEFGHIJ".map { String($0) }
|
"\"number\":\(index)",
|
||||||
let index: (Int) -> String =
|
"\"name\":\"\(info.name)\""
|
||||||
{ q == 2 ? "" : q <= 20 ? base20[$0] : String($0) }
|
]
|
||||||
let tail = (1..<q).map { prefix + index($0) } + ["MAX"]
|
|
||||||
// QMIN identifies the quantile format, distinct from formats using "MIN"
|
if let results = results {
|
||||||
return [withUnit("QMIN")] + tail.map(c.delta ? withDelta : withUnit)
|
let samples = results.samples.sorted().map({$0.description}).joined(separator: ",")
|
||||||
|
out.append("\"samples\":[\(samples)]")
|
||||||
|
out.append("\"iters\":\(results.iters)")
|
||||||
|
if let meta = results.meta {
|
||||||
|
if c.logMemory {
|
||||||
|
out += [
|
||||||
|
"\"max_rss\":\(meta.maxRSS)",
|
||||||
|
"\"pages\":\(meta.pages)",
|
||||||
|
]
|
||||||
|
}
|
||||||
|
if c.logMeta {
|
||||||
|
out += [
|
||||||
|
"\"ics\":\(meta.ics)",
|
||||||
|
"\"yields\":\(meta.yields)",
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return (
|
print("{ " + out.joined(separator: ", ") + " }")
|
||||||
["#", "TEST", "SAMPLES"] +
|
fflush(stdout)
|
||||||
(c.quantile.map(quantiles)
|
|
||||||
?? ["MIN", "MAX", "MEAN", "SD", "MEDIAN"].map(withUnit)) +
|
|
||||||
(c.logMemory ? ["MAX_RSS(B)"] : []) +
|
|
||||||
(c.logMeta ? ["PAGES", "ICS", "YIELD"] : [])
|
|
||||||
).joined(separator: c.delim)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Execute benchmarks and continuously report the measurement results.
|
|
||||||
|
enum Justification {
|
||||||
|
case left, right
|
||||||
|
}
|
||||||
|
func printSpaces(_ width: Int) {
|
||||||
|
for _ in 0..<width {
|
||||||
|
print(" ", terminator: "")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func printToWidth(_ s: String, width: Int, justify: Justification = .left) {
|
||||||
|
var pad = width - 1 - s.count
|
||||||
|
if pad <= 0 {
|
||||||
|
pad = 1
|
||||||
|
}
|
||||||
|
if justify == .right {
|
||||||
|
printSpaces(pad)
|
||||||
|
}
|
||||||
|
print(s, terminator: "")
|
||||||
|
if justify == .left {
|
||||||
|
printSpaces(pad)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func printDoubleToWidth(_ d: Double, fractionDigits: Int = 3, width: Int) {
|
||||||
|
let digits = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]
|
||||||
|
// Handle up to 8 fraction digits
|
||||||
|
let scales = [1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000]
|
||||||
|
let scale = scales[fractionDigits]
|
||||||
|
let i = Int(d * Double(scale) + 0.5)
|
||||||
|
let intPart = i / scale
|
||||||
|
let fraction = i % scale
|
||||||
|
var s = intPart.description + "."
|
||||||
|
var f = fraction
|
||||||
|
for _ in 0..<fractionDigits {
|
||||||
|
f *= 10
|
||||||
|
s += digits[(f / scale) % 10]
|
||||||
|
}
|
||||||
|
printToWidth(s, width: width, justify: .right)
|
||||||
|
}
|
||||||
|
|
||||||
|
func printText(index: Int, info: BenchmarkInfo, results: BenchResults?) {
|
||||||
|
printToWidth(index.description, width: 4, justify: .right)
|
||||||
|
printSpaces(1)
|
||||||
|
printToWidth(info.name, width: c.testNameLength)
|
||||||
|
|
||||||
|
if let results = results {
|
||||||
|
printToWidth(String(describing:results.samples.count), width: 10, justify: .right)
|
||||||
|
if results.samples.count > 0 {
|
||||||
|
let sorted = results.samples.sorted()
|
||||||
|
let min = sorted.first!
|
||||||
|
let max = sorted.last!
|
||||||
|
let median = sorted[sorted.count / 2]
|
||||||
|
printDoubleToWidth(min, width: 10)
|
||||||
|
printDoubleToWidth(median, width: 10)
|
||||||
|
printDoubleToWidth(max, width: 10)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
print()
|
||||||
|
fflush(stdout)
|
||||||
|
}
|
||||||
|
|
||||||
|
func printTextHeading() {
|
||||||
|
printToWidth("#", width: 4, justify: .right)
|
||||||
|
printSpaces(1)
|
||||||
|
printToWidth("TEST", width: c.testNameLength, justify: .left)
|
||||||
|
printToWidth("SAMPLES", width: 10, justify: .right)
|
||||||
|
printToWidth("MIN", width: 10, justify: .right)
|
||||||
|
printToWidth("MEDIAN", width: 10, justify: .right)
|
||||||
|
printToWidth("MAX", width: 10, justify: .right)
|
||||||
|
print()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Run each benchmark and emit the results in JSON
|
||||||
func runBenchmarks() {
|
func runBenchmarks() {
|
||||||
var testCount = 0
|
var testCount = 0
|
||||||
|
if !c.jsonOutput {
|
||||||
func report(_ index: String, _ t: BenchmarkInfo, results: BenchResults?) {
|
printTextHeading()
|
||||||
func values(r: BenchResults) -> [String] {
|
}
|
||||||
func quantiles(q: Int) -> [Int] {
|
for (index, info) in c.tests {
|
||||||
let qs = (0...q).map { i in r[Double(i) / Double(q)] }
|
if c.jsonOutput {
|
||||||
return c.delta ?
|
printJSON(index: index, info: info, results: run(info))
|
||||||
qs.reduce(into: (encoded: [], last: 0)) {
|
} else {
|
||||||
$0.encoded.append($1 - $0.last); $0.last = $1
|
printText(index: index, info: info, results: run(info))
|
||||||
}.encoded : qs
|
|
||||||
}
|
|
||||||
let values: [Int] = [r.sampleCount] +
|
|
||||||
(c.quantile.map(quantiles)
|
|
||||||
?? [r.min, r.max, r.mean, r.sd, r.median]) +
|
|
||||||
(c.logMemory ? [r.meta?.maxRSS].compactMap { $0 } : []) +
|
|
||||||
(c.logMeta ? r.meta.map {
|
|
||||||
[$0.pages, $0.ics, $0.yields] } ?? [] : [])
|
|
||||||
return values.map { String($0) }
|
|
||||||
}
|
|
||||||
let benchmarkStats = (
|
|
||||||
[index, t.name] + (results.map(values) ?? ["Unsupported"])
|
|
||||||
).joined(separator: c.delim)
|
|
||||||
|
|
||||||
print(benchmarkStats)
|
|
||||||
fflush(stdout)
|
|
||||||
|
|
||||||
if (results != nil) {
|
|
||||||
testCount += 1
|
|
||||||
}
|
}
|
||||||
|
testCount += 1
|
||||||
}
|
}
|
||||||
|
|
||||||
print(header)
|
if !c.jsonOutput {
|
||||||
|
print("\nTotal performance tests executed: \(testCount)")
|
||||||
for (index, test) in c.tests {
|
|
||||||
report(index, test, results:run(test))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
print("\nTotal performance tests executed: \(testCount)")
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -704,11 +714,18 @@ public func main() {
|
|||||||
let config = TestConfig(registeredBenchmarks)
|
let config = TestConfig(registeredBenchmarks)
|
||||||
switch (config.action) {
|
switch (config.action) {
|
||||||
case .listTests:
|
case .listTests:
|
||||||
print("#\(config.delim)Test\(config.delim)[Tags]")
|
if config.jsonOutput {
|
||||||
for (index, t) in config.tests {
|
for (index, t) in config.tests {
|
||||||
let testDescription = [index, t.name, t.tags.sorted().description]
|
let tags = t.tags.sorted().map({"\"\($0.description)\""}).joined(separator: ",")
|
||||||
.joined(separator: config.delim)
|
print("{\"number\":\(index), \"name\":\"\(t.name)\", \"tags\":[\(tags)]}")
|
||||||
print(testDescription)
|
}
|
||||||
|
} else {
|
||||||
|
print("# Test [Tags]")
|
||||||
|
for (index, t) in config.tests {
|
||||||
|
let testDescription = [index.description, t.name, t.tags.sorted().description]
|
||||||
|
.joined(separator: " ")
|
||||||
|
print(testDescription)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
case .run:
|
case .run:
|
||||||
if !config.allowNondeterministicHashing && !Hasher.isDeterministic {
|
if !config.allowNondeterministicHashing && !Hasher.isDeterministic {
|
||||||
|
|||||||
Reference in New Issue
Block a user