mirror of
https://github.com/apple/swift.git
synced 2025-12-14 20:36:38 +01:00
Merge pull request #61559 from tbkka/tbkka-benchmarking
Overhaul Benchmarking pipeline to use complete sample data, not summaries
This commit is contained in:
@@ -88,9 +88,10 @@ class BenchmarkDriver(object):
|
||||
def test_harness(self):
|
||||
"""Full path to test harness binary."""
|
||||
suffix = self.args.optimization if hasattr(self.args, "optimization") else "O"
|
||||
suffix += "-"
|
||||
if hasattr(self.args, "architecture") and self.args.architecture:
|
||||
suffix += "-" + self.args.architecture + "*"
|
||||
pattern = os.path.join(self.args.tests, "Benchmark_" + suffix)
|
||||
suffix += self.args.architecture
|
||||
pattern = os.path.join(self.args.tests, "Benchmark_" + suffix + "*")
|
||||
executables = []
|
||||
if hasattr(self._subprocess, "test_mode") and self._subprocess.test_mode:
|
||||
executables = [pattern]
|
||||
@@ -134,22 +135,32 @@ class BenchmarkDriver(object):
|
||||
|
||||
@property
|
||||
def _cmd_list_benchmarks(self):
|
||||
# Use tab delimiter for easier parsing to override the default comma.
|
||||
# (The third 'column' is always comma-separated list of tags in square
|
||||
# brackets -- currently unused here.)
|
||||
return [self.test_harness, "--list", "--delim=\t"] + (
|
||||
# TODO: Switch to JSON format: add "--json" here
|
||||
return [self.test_harness, "--list"] + (
|
||||
["--skip-tags="] if (self.args.benchmarks or self.args.filters) else []
|
||||
)
|
||||
|
||||
def _get_tests(self):
|
||||
"""Return a list of performance tests to run."""
|
||||
number_name_pairs = [
|
||||
line.split("\t")[:2]
|
||||
for line in self._invoke(self._cmd_list_benchmarks).split("\n")[1:-1]
|
||||
]
|
||||
# unzip list of pairs into 2 lists
|
||||
test_numbers, self.all_tests = map(list, zip(*number_name_pairs))
|
||||
self.test_number = dict(zip(self.all_tests, test_numbers))
|
||||
lines = self._invoke(self._cmd_list_benchmarks).split("\n")
|
||||
json_tests = []
|
||||
for line in lines:
|
||||
columns = re.split(r'[ ,]+', line.strip())
|
||||
try:
|
||||
number = int(columns[0])
|
||||
name = columns[1]
|
||||
json_descr = {"number": number, "name": name}
|
||||
json_tests.append(json_descr)
|
||||
except Exception:
|
||||
continue
|
||||
# TODO: Replace the above with the following to
|
||||
# use the JSON output from the benchmark driver
|
||||
# directly
|
||||
# if line.strip() != "":
|
||||
# json_tests.append(json.loads(line))
|
||||
self.all_tests = [json["name"] for json in json_tests]
|
||||
test_numbers = [json["number"] for json in json_tests]
|
||||
self.test_number = dict([(json["name"], json["number"]) for json in json_tests])
|
||||
if self.args.filters:
|
||||
return self._tests_matching_patterns()
|
||||
if self.args.benchmarks:
|
||||
@@ -157,25 +168,19 @@ class BenchmarkDriver(object):
|
||||
return self.all_tests
|
||||
|
||||
def _tests_matching_patterns(self):
|
||||
regexes = [re.compile(pattern) for pattern in self.args.filters]
|
||||
return sorted(
|
||||
list(
|
||||
set(
|
||||
[
|
||||
name
|
||||
for pattern in regexes
|
||||
for name in self.all_tests
|
||||
if pattern.match(name)
|
||||
]
|
||||
)
|
||||
)
|
||||
)
|
||||
matches = set()
|
||||
for fil in self.args.filters:
|
||||
pattern = re.compile(fil)
|
||||
new_matches = filter(pattern.match, self.all_tests)
|
||||
matches = matches.union(new_matches)
|
||||
return sorted(list(matches))
|
||||
|
||||
def _tests_by_name_or_number(self, test_numbers):
|
||||
benchmarks = set(self.args.benchmarks)
|
||||
number_to_name = dict(zip(test_numbers, self.all_tests))
|
||||
numbers = list(map(str, test_numbers))
|
||||
number_to_name = dict(zip(numbers, self.all_tests))
|
||||
tests_by_number = [
|
||||
number_to_name[i] for i in benchmarks.intersection(set(test_numbers))
|
||||
number_to_name[i] for i in benchmarks.intersection(numbers)
|
||||
]
|
||||
return sorted(
|
||||
list(benchmarks.intersection(set(self.all_tests)).union(tests_by_number))
|
||||
@@ -188,8 +193,7 @@ class BenchmarkDriver(object):
|
||||
num_iters=None,
|
||||
sample_time=None,
|
||||
verbose=None,
|
||||
measure_memory=False,
|
||||
quantile=None,
|
||||
measure_memory=False
|
||||
):
|
||||
"""Execute benchmark and gather results."""
|
||||
num_samples = num_samples or 0
|
||||
@@ -197,11 +201,14 @@ class BenchmarkDriver(object):
|
||||
sample_time = sample_time or 0 # default is 1s
|
||||
|
||||
cmd = self._cmd_run(
|
||||
test, num_samples, num_iters, sample_time, verbose, measure_memory, quantile
|
||||
test, num_samples, num_iters, sample_time, verbose, measure_memory
|
||||
)
|
||||
output = self._invoke(cmd)
|
||||
results = self.parser.results_from_string(output)
|
||||
return list(results.items())[0][1] if test else results
|
||||
if test:
|
||||
return list(results.items())[0][1]
|
||||
else:
|
||||
return results
|
||||
|
||||
def _cmd_run(
|
||||
self,
|
||||
@@ -210,14 +217,13 @@ class BenchmarkDriver(object):
|
||||
num_iters,
|
||||
sample_time,
|
||||
verbose,
|
||||
measure_memory,
|
||||
quantile,
|
||||
measure_memory
|
||||
):
|
||||
cmd = [self.test_harness]
|
||||
if test:
|
||||
cmd.append(test)
|
||||
else:
|
||||
cmd.extend([self.test_number.get(name, name) for name in self.tests])
|
||||
cmd.extend([str(self.test_number.get(name, name)) for name in self.tests])
|
||||
if num_samples > 0:
|
||||
cmd.append("--num-samples={0}".format(num_samples))
|
||||
if num_iters > 0:
|
||||
@@ -228,9 +234,8 @@ class BenchmarkDriver(object):
|
||||
cmd.append("--verbose")
|
||||
if measure_memory:
|
||||
cmd.append("--memory")
|
||||
if quantile:
|
||||
cmd.append("--quantile={0}".format(quantile))
|
||||
cmd.append("--delta")
|
||||
# TODO: Uncomment this as soon as the new Benchmark Swift logic is available everywhere
|
||||
# cmd.append("--json")
|
||||
return cmd
|
||||
|
||||
def run_independent_samples(self, test):
|
||||
@@ -246,12 +251,12 @@ class BenchmarkDriver(object):
|
||||
return functools.reduce(
|
||||
merge_results,
|
||||
[
|
||||
self.run(test, measure_memory=True, num_iters=1, quantile=20)
|
||||
self.run(test, measure_memory=True, num_iters=1)
|
||||
for _ in range(self.args.independent_samples)
|
||||
],
|
||||
)
|
||||
|
||||
def log_results(self, output, log_file=None):
|
||||
def log_results(self, results, log_file=None):
|
||||
"""Log output to `log_file`.
|
||||
|
||||
Creates `args.output_dir` if it doesn't exist yet.
|
||||
@@ -262,7 +267,8 @@ class BenchmarkDriver(object):
|
||||
os.makedirs(dir)
|
||||
print("Logging results to: %s" % log_file)
|
||||
with open(log_file, "w") as f:
|
||||
f.write(output)
|
||||
for r in results:
|
||||
print(r, file=f)
|
||||
|
||||
RESULT = "{:>3} {:<40} {:>7} {:>7} {:>6} {:>10} {:>6} {:>7} {:>10}"
|
||||
|
||||
@@ -284,7 +290,7 @@ class BenchmarkDriver(object):
|
||||
def console_log(values):
|
||||
print(format(values))
|
||||
|
||||
def result_values(r):
|
||||
def summary(r):
|
||||
return list(
|
||||
map(
|
||||
str,
|
||||
@@ -292,17 +298,17 @@ class BenchmarkDriver(object):
|
||||
r.test_num,
|
||||
r.name,
|
||||
r.num_samples,
|
||||
r.min,
|
||||
r.samples.q1,
|
||||
r.min_value,
|
||||
r.q1,
|
||||
r.median,
|
||||
r.samples.q3,
|
||||
r.max,
|
||||
r.q3,
|
||||
r.max_value,
|
||||
r.max_rss,
|
||||
],
|
||||
)
|
||||
)
|
||||
|
||||
header = [
|
||||
summary_header = [
|
||||
"#",
|
||||
"TEST",
|
||||
"SAMPLES",
|
||||
@@ -313,25 +319,23 @@ class BenchmarkDriver(object):
|
||||
"MAX(μs)",
|
||||
"MAX_RSS(B)",
|
||||
]
|
||||
console_log(header)
|
||||
results = [header]
|
||||
console_log(summary_header)
|
||||
results = []
|
||||
for test in self.tests:
|
||||
result = result_values(self.run_independent_samples(test))
|
||||
console_log(result)
|
||||
result = self.run_independent_samples(test)
|
||||
console_log(summary(result))
|
||||
results.append(result)
|
||||
|
||||
print("\nTotal performance tests executed: {0}".format(len(self.tests)))
|
||||
return (
|
||||
None if csv_console else ("\n".join([",".join(r) for r in results]) + "\n")
|
||||
) # csv_log
|
||||
return results
|
||||
|
||||
@staticmethod
|
||||
def run_benchmarks(args):
|
||||
"""Run benchmarks and log results."""
|
||||
driver = BenchmarkDriver(args)
|
||||
csv_log = driver.run_and_log(csv_console=(args.output_dir is None))
|
||||
if csv_log:
|
||||
driver.log_results(csv_log)
|
||||
results = driver.run_and_log(csv_console=(args.output_dir is None))
|
||||
if args.output_dir:
|
||||
driver.log_results([r.json for r in results])
|
||||
return 0
|
||||
|
||||
|
||||
@@ -445,7 +449,6 @@ class BenchmarkDoctor(object):
|
||||
Optional `driver` parameter for injecting dependency; used for testing.
|
||||
"""
|
||||
super(BenchmarkDoctor, self).__init__()
|
||||
self.driver = driver or BenchmarkDriver(args)
|
||||
self.results = {}
|
||||
|
||||
if hasattr(args, "markdown") and args.markdown:
|
||||
@@ -458,6 +461,7 @@ class BenchmarkDoctor(object):
|
||||
self.console_handler.setLevel(
|
||||
logging.DEBUG if args.verbose else logging.INFO
|
||||
)
|
||||
self.driver = driver or BenchmarkDriver(args)
|
||||
self.log.addHandler(self.console_handler)
|
||||
self.log.debug("Checking tests: %s", ", ".join(self.driver.tests))
|
||||
self.requirements = [
|
||||
@@ -532,7 +536,7 @@ class BenchmarkDoctor(object):
|
||||
correction = setup / i
|
||||
i_series = BenchmarkDoctor._select(measurements, num_iters=i)
|
||||
for result in i_series:
|
||||
runtimes.append(result.samples.min - correction)
|
||||
runtimes.append(result.min_value - correction)
|
||||
runtime = min(runtimes)
|
||||
|
||||
threshold = 1000
|
||||
@@ -584,7 +588,7 @@ class BenchmarkDoctor(object):
|
||||
ti1, ti2 = [
|
||||
float(min(mins))
|
||||
for mins in [
|
||||
[result.samples.min for result in i_series]
|
||||
[result.min_value for result in i_series]
|
||||
for i_series in [select(measurements, num_iters=i) for i in [1, 2]]
|
||||
]
|
||||
]
|
||||
@@ -679,7 +683,7 @@ class BenchmarkDoctor(object):
|
||||
r = self.driver.run(
|
||||
benchmark, num_samples=3, num_iters=1, verbose=True
|
||||
) # calibrate
|
||||
num_samples = self._adjusted_1s_samples(r.samples.min)
|
||||
num_samples = self._adjusted_1s_samples(r.min_value)
|
||||
|
||||
def capped(s):
|
||||
return min(s, 200)
|
||||
@@ -689,7 +693,7 @@ class BenchmarkDoctor(object):
|
||||
opts = opts if isinstance(opts, list) else [opts]
|
||||
self.log.debug(
|
||||
"Runtime {0} μs yields {1} adjusted samples per second.".format(
|
||||
r.samples.min, num_samples
|
||||
r.min_value, num_samples
|
||||
)
|
||||
)
|
||||
self.log.debug(
|
||||
|
||||
@@ -17,9 +17,7 @@ This script compares performance test logs and issues a formatted report.
|
||||
|
||||
Invoke `$ compare_perf_tests.py -h ` for complete list of options.
|
||||
|
||||
class `Sample` is single benchmark measurement.
|
||||
class `PerformanceTestSamples` is collection of `Sample`s and their statistics.
|
||||
class `PerformanceTestResult` is a summary of performance test execution.
|
||||
class `PerformanceTestResult` collects information about a single test
|
||||
class `LogParser` converts log files into `PerformanceTestResult`s.
|
||||
class `ResultComparison` compares new and old `PerformanceTestResult`s.
|
||||
class `TestComparator` analyzes changes between the old and new test results.
|
||||
@@ -29,194 +27,10 @@ class `ReportFormatter` creates the test comparison report in specified format.
|
||||
|
||||
import argparse
|
||||
import functools
|
||||
import json
|
||||
import re
|
||||
import statistics
|
||||
import sys
|
||||
from bisect import bisect, bisect_left, bisect_right
|
||||
from collections import namedtuple
|
||||
from math import ceil, sqrt
|
||||
|
||||
|
||||
class Sample(namedtuple("Sample", "i num_iters runtime")):
|
||||
u"""Single benchmark measurement.
|
||||
|
||||
Initialized with:
|
||||
`i`: ordinal number of the sample taken,
|
||||
`num-num_iters`: number or iterations used to compute it,
|
||||
`runtime`: in microseconds (μs).
|
||||
"""
|
||||
|
||||
def __repr__(self):
|
||||
"""Shorter Sample formatting for debugging purposes."""
|
||||
return "s({0.i!r}, {0.num_iters!r}, {0.runtime!r})".format(self)
|
||||
|
||||
|
||||
class Yield(namedtuple("Yield", "before_sample after")):
|
||||
u"""Meta-measurement of when the Benchmark_X voluntarily yielded process.
|
||||
|
||||
`before_sample`: index of measurement taken just after returning from yield
|
||||
`after`: time elapsed since the previous yield in microseconds (μs)
|
||||
"""
|
||||
|
||||
|
||||
class PerformanceTestSamples(object):
|
||||
"""Collection of runtime samples from the benchmark execution.
|
||||
|
||||
Computes the sample population statistics.
|
||||
"""
|
||||
|
||||
def __init__(self, name, samples=None):
|
||||
"""Initialize with benchmark name and optional list of Samples."""
|
||||
self.name = name # Name of the performance test
|
||||
self.samples = []
|
||||
self.outliers = []
|
||||
self._runtimes = []
|
||||
self.mean = 0.0
|
||||
self.S_runtime = 0.0 # For computing running variance
|
||||
for sample in samples or []:
|
||||
self.add(sample)
|
||||
|
||||
def __str__(self):
|
||||
"""Text summary of benchmark statistics."""
|
||||
return (
|
||||
"{0.name!s} n={0.count!r} "
|
||||
"Min={0.min!r} Q1={0.q1!r} M={0.median!r} Q3={0.q3!r} "
|
||||
"Max={0.max!r} "
|
||||
"R={0.range!r} {0.spread:.2%} IQR={0.iqr!r} "
|
||||
"Mean={0.mean:.0f} SD={0.sd:.0f} CV={0.cv:.2%}".format(self)
|
||||
if self.samples
|
||||
else "{0.name!s} n=0".format(self)
|
||||
)
|
||||
|
||||
def add(self, sample):
|
||||
"""Add sample to collection and recompute statistics."""
|
||||
assert isinstance(sample, Sample)
|
||||
self._update_stats(sample)
|
||||
i = bisect(self._runtimes, sample.runtime)
|
||||
self._runtimes.insert(i, sample.runtime)
|
||||
self.samples.insert(i, sample)
|
||||
|
||||
def _update_stats(self, sample):
|
||||
old_stats = (self.count, self.mean, self.S_runtime)
|
||||
_, self.mean, self.S_runtime = self.running_mean_variance(
|
||||
old_stats, sample.runtime
|
||||
)
|
||||
|
||||
def exclude_outliers(self, top_only=False):
|
||||
"""Exclude outliers by applying Interquartile Range Rule.
|
||||
|
||||
Moves the samples outside of the inner fences
|
||||
(Q1 - 1.5*IQR and Q3 + 1.5*IQR) into outliers list and recomputes
|
||||
statistics for the remaining sample population. Optionally apply
|
||||
only the top inner fence, preserving the small outliers.
|
||||
|
||||
Experimentally, this rule seems to perform well-enough on the
|
||||
benchmark runtimes in the microbenchmark range to filter out
|
||||
the environment noise caused by preemptive multitasking.
|
||||
"""
|
||||
lo = (
|
||||
0
|
||||
if top_only
|
||||
else bisect_left(self._runtimes, int(self.q1 - 1.5 * self.iqr))
|
||||
)
|
||||
hi = bisect_right(self._runtimes, int(self.q3 + 1.5 * self.iqr))
|
||||
|
||||
outliers = self.samples[:lo] + self.samples[hi:]
|
||||
samples = self.samples[lo:hi]
|
||||
|
||||
self.__init__(self.name) # re-initialize
|
||||
for sample in samples: # and
|
||||
self.add(sample) # re-compute stats
|
||||
self.outliers = outliers
|
||||
|
||||
@property
|
||||
def count(self):
|
||||
"""Number of samples used to compute the statistics."""
|
||||
return len(self.samples)
|
||||
|
||||
@property
|
||||
def num_samples(self):
|
||||
"""Number of all samples in the collection."""
|
||||
return len(self.samples) + len(self.outliers)
|
||||
|
||||
@property
|
||||
def all_samples(self):
|
||||
"""List of all samples in ascending order."""
|
||||
return sorted(self.samples + self.outliers, key=lambda s: s.i or -1)
|
||||
|
||||
@property
|
||||
def min(self):
|
||||
"""Minimum sampled value."""
|
||||
return self.samples[0].runtime
|
||||
|
||||
@property
|
||||
def max(self):
|
||||
"""Maximum sampled value."""
|
||||
return self.samples[-1].runtime
|
||||
|
||||
def quantile(self, q):
|
||||
"""Return runtime for given quantile.
|
||||
|
||||
Equivalent to quantile estimate type R-1, SAS-3. See:
|
||||
https://en.wikipedia.org/wiki/Quantile#Estimating_quantiles_from_a_sample
|
||||
"""
|
||||
index = max(0, int(ceil(self.count * float(q))) - 1)
|
||||
return self.samples[index].runtime
|
||||
|
||||
@property
|
||||
def median(self):
|
||||
"""Median sampled value."""
|
||||
return self.quantile(0.5)
|
||||
|
||||
@property
|
||||
def q1(self):
|
||||
"""First Quartile (25th Percentile)."""
|
||||
return self.quantile(0.25)
|
||||
|
||||
@property
|
||||
def q3(self):
|
||||
"""Third Quartile (75th Percentile)."""
|
||||
return self.quantile(0.75)
|
||||
|
||||
@property
|
||||
def iqr(self):
|
||||
"""Interquartile Range."""
|
||||
return self.q3 - self.q1
|
||||
|
||||
@property
|
||||
def sd(self):
|
||||
u"""Standard Deviation (μs)."""
|
||||
return 0 if self.count < 2 else sqrt(self.S_runtime / (self.count - 1))
|
||||
|
||||
@staticmethod
|
||||
def running_mean_variance(stats, x):
|
||||
"""Compute running variance, B. P. Welford's method.
|
||||
|
||||
See Knuth TAOCP vol 2, 3rd edition, page 232, or
|
||||
https://www.johndcook.com/blog/standard_deviation/
|
||||
M is mean, Standard Deviation is defined as sqrt(S/k-1)
|
||||
"""
|
||||
|
||||
(k, M_, S_) = stats
|
||||
|
||||
k = float(k + 1)
|
||||
M = M_ + (x - M_) / k
|
||||
S = S_ + (x - M_) * (x - M)
|
||||
return (k, M, S)
|
||||
|
||||
@property
|
||||
def cv(self):
|
||||
"""Coefficient of Variation (%)."""
|
||||
return (self.sd / self.mean) if self.mean else 0
|
||||
|
||||
@property
|
||||
def range(self):
|
||||
"""Range of samples values (Max - Min)."""
|
||||
return self.max - self.min
|
||||
|
||||
@property
|
||||
def spread(self):
|
||||
"""Sample Spread; i.e. Range as (%) of Min."""
|
||||
return self.range / float(self.min) if self.min else 0
|
||||
|
||||
|
||||
class PerformanceTestResult(object):
|
||||
@@ -225,126 +39,402 @@ class PerformanceTestResult(object):
|
||||
Reported by the test driver (Benchmark_O, Benchmark_Onone, Benchmark_Osize
|
||||
or Benchmark_Driver).
|
||||
|
||||
It supports 2 log formats emitted by the test driver. Legacy format with
|
||||
statistics for normal distribution (MEAN, SD):
|
||||
#,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs),MAX_RSS(B)
|
||||
And new quantiles format with variable number of columns:
|
||||
#,TEST,SAMPLES,QMIN(μs),MEDIAN(μs),MAX(μs)
|
||||
#,TEST,SAMPLES,QMIN(μs),Q1(μs),Q2(μs),Q3(μs),MAX(μs),MAX_RSS(B)
|
||||
The number of columns between MIN and MAX depends on the test driver's
|
||||
`--quantile`parameter. In both cases, the last column, MAX_RSS is optional.
|
||||
It supports log formats emitted by the test driver.
|
||||
"""
|
||||
|
||||
def __init__(self, csv_row, quantiles=False, memory=False, delta=False, meta=False):
|
||||
"""Initialize from a row of multiple columns with benchmark summary.
|
||||
|
||||
The row is an iterable, such as a row provided by the CSV parser.
|
||||
# TODO: Delete after December 2023
|
||||
@classmethod
|
||||
def fromOldFormat(cls, header, line):
|
||||
"""Original format with statistics for normal distribution (MEAN, SD):
|
||||
#,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs),MAX_RSS(B),PAGES,ICS,YIELD
|
||||
Note that MAX_RSS, PAGES, ICS, YIELD are all optional
|
||||
"""
|
||||
self.test_num = csv_row[0] # Ordinal number of the test
|
||||
self.name = csv_row[1] # Name of the performance test
|
||||
self.num_samples = int(csv_row[2]) # Number of measurements taken
|
||||
csv_row = line.split(",") if "," in line else line.split()
|
||||
labels = header.split(",") if "," in header else header.split()
|
||||
|
||||
mem_index = (-1 if memory else 0) + (-3 if meta else 0)
|
||||
if quantiles: # Variable number of columns representing quantiles
|
||||
runtimes = csv_row[3:mem_index] if memory or meta else csv_row[3:]
|
||||
last_runtime_index = mem_index - 1
|
||||
if delta:
|
||||
runtimes = [int(x) if x else 0 for x in runtimes]
|
||||
runtimes = functools.reduce(
|
||||
lambda l, x: l.append(l[-1] + x) or l if l else [x], # runnin
|
||||
runtimes,
|
||||
None,
|
||||
) # total
|
||||
num_values = len(runtimes)
|
||||
if self.num_samples < num_values: # remove repeated samples
|
||||
quantile = num_values - 1
|
||||
qs = [float(i) / float(quantile) for i in range(0, num_values)]
|
||||
indices = [
|
||||
max(0, int(ceil(self.num_samples * float(q))) - 1) for q in qs
|
||||
]
|
||||
runtimes = [
|
||||
runtimes[indices.index(i)] for i in range(0, self.num_samples)
|
||||
]
|
||||
# Synthesize a JSON form with the basic values:
|
||||
num_samples = int(csv_row[2])
|
||||
json_data = {
|
||||
"number": int(csv_row[0]),
|
||||
"name": csv_row[1],
|
||||
"num_samples": num_samples,
|
||||
}
|
||||
|
||||
self.samples = PerformanceTestSamples(
|
||||
self.name, [Sample(None, None, int(runtime)) for runtime in runtimes]
|
||||
)
|
||||
self.samples.exclude_outliers(top_only=True)
|
||||
sams = self.samples
|
||||
self.min, self.max, self.median, self.mean, self.sd = (
|
||||
sams.min,
|
||||
sams.max,
|
||||
sams.median,
|
||||
sams.mean,
|
||||
sams.sd,
|
||||
)
|
||||
else: # Legacy format with statistics for normal distribution.
|
||||
self.min = int(csv_row[3]) # Minimum runtime (μs)
|
||||
self.max = int(csv_row[4]) # Maximum runtime (μs)
|
||||
self.mean = float(csv_row[5]) # Mean (average) runtime (μs)
|
||||
self.sd = float(csv_row[6]) # Standard Deviation (μs)
|
||||
self.median = int(csv_row[7]) # Median runtime (μs)
|
||||
last_runtime_index = 7
|
||||
self.samples = None
|
||||
# Map remaining columns according to label
|
||||
field_map = [
|
||||
("ICS", "ics"),
|
||||
("MAX_RSS", "max_rss"), # Must precede "MAX"
|
||||
("MAX", "max"),
|
||||
("MEAN", "mean"),
|
||||
("MEDIAN", "median"),
|
||||
("MIN", "min"),
|
||||
("PAGES", "pages"),
|
||||
("SD", "sd"),
|
||||
("YIELD", "yield")
|
||||
]
|
||||
for label, value in zip(labels, csv_row):
|
||||
for match, json_key in field_map:
|
||||
if match in label:
|
||||
json_data[json_key] = float(value)
|
||||
break
|
||||
|
||||
self.max_rss = ( # Maximum Resident Set Size (B)
|
||||
int(csv_row[mem_index]) if (
|
||||
memory and len(csv_row) > (last_runtime_index + 1)
|
||||
) else None
|
||||
)
|
||||
# Heroic: Reconstruct samples if we have enough info
|
||||
# This is generally a bad idea, but sadly necessary for the
|
||||
# old format that doesn't provide raw sample data.
|
||||
if num_samples == 1 and "min" in json_data:
|
||||
json_data["samples"] = [
|
||||
json_data["min"]
|
||||
]
|
||||
elif num_samples == 2 and "min" in json_data and "max" in json_data:
|
||||
json_data["samples"] = [
|
||||
json_data["min"],
|
||||
json_data["max"]
|
||||
]
|
||||
elif (num_samples == 3
|
||||
and "min" in json_data
|
||||
and "max" in json_data
|
||||
and "median" in json_data):
|
||||
json_data["samples"] = [
|
||||
json_data["min"],
|
||||
json_data["median"],
|
||||
json_data["max"]
|
||||
]
|
||||
|
||||
# Optional measurement metadata. The number of:
|
||||
# memory pages used, involuntary context switches and voluntary yields
|
||||
self.mem_pages, self.involuntary_cs, self.yield_count = (
|
||||
[int(x) for x in csv_row[-3:]] if meta else (None, None, None)
|
||||
)
|
||||
self.yields = None
|
||||
self.setup = None
|
||||
return PerformanceTestResult(json_data)
|
||||
|
||||
# TODO: Delete after December 2023
|
||||
@classmethod
|
||||
def fromQuantileFormat(cls, header, line):
|
||||
"""Quantiles format with variable number of columns depending on the
|
||||
number of quantiles:
|
||||
#,TEST,SAMPLES,QMIN(μs),MEDIAN(μs),MAX(μs)
|
||||
#,TEST,SAMPLES,QMIN(μs),Q1(μs),Q2(μs),Q3(μs),MAX(μs),MAX_RSS(B)
|
||||
The number of columns between QMIN and MAX depends on the test driver's
|
||||
`--quantile`parameter. In both cases, the last column, MAX_RSS is optional.
|
||||
|
||||
Delta encoding: If a header name includes 𝚫, that column stores the
|
||||
difference from the previous column. E.g, a header
|
||||
"#,TEST,SAMPLES,QMIN(μs),MEDIAN(μs),𝚫MAX(μs)" indicates the final "MAX"
|
||||
column must be computed by adding the value in that column to the value
|
||||
of the previous "MEDIAN" column.
|
||||
"""
|
||||
csv_row = line.split(",") if "," in line else line.split()
|
||||
labels = header.split(",")
|
||||
|
||||
for i in range(1, len(labels)):
|
||||
if "𝚫" in labels[i] or "Δ" in labels[i]:
|
||||
prev = int(csv_row[i - 1])
|
||||
inc = int(csv_row[i]) if csv_row[i] != '' else 0
|
||||
csv_row[i] = str(prev + inc)
|
||||
|
||||
# Synthesize a JSON form and then initialize from that
|
||||
json_data = {
|
||||
"number": int(csv_row[0]),
|
||||
"name": csv_row[1],
|
||||
"num_samples": int(csv_row[2]),
|
||||
}
|
||||
# Process optional trailing fields MAX_RSS, PAGES, ICS, YIELD
|
||||
i = len(labels) - 1
|
||||
while True:
|
||||
if "MAX_RSS" in labels[i]:
|
||||
json_data["max_rss"] = float(csv_row[i])
|
||||
elif "PAGES" in labels[i]:
|
||||
json_data["pages"] = float(csv_row[i])
|
||||
elif "ICS" in labels[i]:
|
||||
json_data["ics"] = float(csv_row[i])
|
||||
elif "YIELD" in labels[i]:
|
||||
json_data["yield"] = float(csv_row[i])
|
||||
else:
|
||||
break
|
||||
i -= 1
|
||||
if i < 0:
|
||||
break
|
||||
|
||||
# Rest is the quantiles (includes min/max columns)
|
||||
quantiles = [float(q) for q in csv_row[3:i + 1]]
|
||||
|
||||
# Heroic effort:
|
||||
# If we have enough quantiles, we can reconstruct the samples
|
||||
# This is generally a bad idea, but sadly necessary since
|
||||
# the quantile format doesn't provide raw sample data.
|
||||
if json_data["num_samples"] == len(quantiles):
|
||||
json_data["samples"] = sorted(quantiles)
|
||||
elif json_data["num_samples"] == 2:
|
||||
json_data["samples"] = [quantiles[0], quantiles[-1]]
|
||||
elif json_data["num_samples"] == 1:
|
||||
json_data["samples"] = [quantiles[0]]
|
||||
else:
|
||||
json_data["quantiles"] = quantiles
|
||||
if len(quantiles) > 0:
|
||||
json_data["min"] = quantiles[0]
|
||||
json_data["max"] = quantiles[-1]
|
||||
json_data["median"] = quantiles[(len(quantiles) - 1) // 2]
|
||||
|
||||
return PerformanceTestResult(json_data)
|
||||
|
||||
@classmethod
|
||||
def fromJSONFormat(cls, line):
|
||||
"""JSON format stores a test result as a JSON object on a single line
|
||||
|
||||
Compared to the legacy tab-separated/comma-separated formats, this makes
|
||||
it much easier to add new fields, handle optional fields, and allows us
|
||||
to include the full set of samples so we can use better statistics
|
||||
downstream.
|
||||
|
||||
The code here includes optional support for min, max,
|
||||
median, mean, etc. supported by the older formats, though in practice,
|
||||
you shouldn't rely on those: Just store the full samples and then
|
||||
compute whatever statistics you need as required.
|
||||
"""
|
||||
json_data = json.loads(line)
|
||||
return PerformanceTestResult(json_data)
|
||||
|
||||
def __init__(self, json_data):
|
||||
# Ugly hack to get the old tests to run
|
||||
if isinstance(json_data, str):
|
||||
json_data = json.loads(json_data)
|
||||
|
||||
# We always have these
|
||||
assert (json_data.get("number") is not None)
|
||||
assert (json_data.get("name") is not None)
|
||||
self.test_num = json_data["number"]
|
||||
self.name = json_data["name"]
|
||||
|
||||
# We always have either samples or num_samples
|
||||
assert (json_data.get("num_samples") is not None
|
||||
or json_data.get("samples") is not None)
|
||||
self.num_samples = json_data.get("num_samples") or len(json_data["samples"])
|
||||
self.samples = json_data.get("samples") or []
|
||||
|
||||
# Everything else is optional and can be read
|
||||
# out of the JSON data if needed
|
||||
# See max_rss() below for an example of this.
|
||||
self.json_data = dict(json_data)
|
||||
|
||||
def __repr__(self):
|
||||
"""Short summary for debugging purposes."""
|
||||
return (
|
||||
"<PerformanceTestResult name:{0.name!r} "
|
||||
"samples:{0.num_samples!r} min:{0.min!r} max:{0.max!r} "
|
||||
"mean:{0.mean:.0f} sd:{0.sd:.0f} median:{0.median!r}>".format(self)
|
||||
)
|
||||
return "PerformanceTestResult(" + json.dumps(self.json_data) + ")"
|
||||
|
||||
def merge(self, r):
|
||||
def json(self):
|
||||
"""Return a single-line JSON form of this result
|
||||
|
||||
This can be parsed back via fromJSONFormat above.
|
||||
It can also represent all data stored by the older
|
||||
formats, so there's no reason to not use it everywhere.
|
||||
"""
|
||||
data = dict(self.json_data)
|
||||
|
||||
# In case these got modified
|
||||
data["number"] = self.test_num
|
||||
data["name"] = self.name
|
||||
|
||||
# If we have full sample data, use that and
|
||||
# drop any lingering pre-computed statistics
|
||||
# (It's better for downstream consumers to just
|
||||
# compute whatever statistics they need from scratch.)
|
||||
|
||||
# After December 2023, uncomment the next line:
|
||||
# assert len(self.samples) == self.num_samples
|
||||
if len(self.samples) == self.num_samples:
|
||||
data["samples"] = self.samples
|
||||
data.pop("num_samples", None)
|
||||
# TODO: Delete min/max/mean/sd/q1/median/q3/quantiles
|
||||
# after December 2023
|
||||
data.pop("min", None)
|
||||
data.pop("max", None)
|
||||
data.pop("mean", None)
|
||||
data.pop("sd", None)
|
||||
data.pop("q1", None)
|
||||
data.pop("median", None)
|
||||
data.pop("q3", None)
|
||||
data.pop("quantiles", None)
|
||||
else:
|
||||
# Preserve other pre-existing JSON statistics
|
||||
data["num_samples"] = self.num_samples
|
||||
|
||||
return json.dumps(data)
|
||||
|
||||
def __str__(self):
|
||||
return self.json()
|
||||
|
||||
@property
|
||||
def setup(self):
|
||||
"""TODO: Implement this
|
||||
"""
|
||||
return 0
|
||||
|
||||
@property
|
||||
def max_rss(self):
|
||||
"""Return max_rss if available
|
||||
"""
|
||||
return self.json_data.get("max_rss")
|
||||
|
||||
@property
|
||||
def mem_pages(self):
|
||||
"""Return pages if available
|
||||
"""
|
||||
return self.json_data.get("pages")
|
||||
|
||||
@property
|
||||
def involuntary_cs(self):
|
||||
"""Return involuntary context switches if available
|
||||
"""
|
||||
return self.json_data.get("ics")
|
||||
|
||||
@property
|
||||
def yield_count(self):
|
||||
"""Return voluntary yield count if available
|
||||
"""
|
||||
return self.json_data.get("yield")
|
||||
|
||||
@property
|
||||
def min_value(self):
|
||||
"""Return the minimum value from all samples
|
||||
|
||||
If we have full samples, compute it directly.
|
||||
In the legacy case, we might not have full samples,
|
||||
so in that case we'll return a value that was given
|
||||
to us initially (if any).
|
||||
|
||||
Eventually (after December 2023), this can be simplified
|
||||
to just `return min(self.samples)`, since by then
|
||||
the legacy forms should no longer be in use.
|
||||
"""
|
||||
if self.num_samples == len(self.samples):
|
||||
return min(self.samples)
|
||||
return self.json_data.get("min")
|
||||
|
||||
@property
|
||||
def max_value(self):
|
||||
"""Return the maximum sample value
|
||||
|
||||
See min_value comments for details on the legacy behavior."""
|
||||
if self.num_samples == len(self.samples):
|
||||
return max(self.samples)
|
||||
return self.json_data.get("max")
|
||||
|
||||
@property
|
||||
def median(self):
|
||||
"""Return the median sample value
|
||||
|
||||
See min_value comments for details on the legacy behavior."""
|
||||
if self.num_samples == len(self.samples):
|
||||
return statistics.median(self.samples)
|
||||
return self.json_data.get("median")
|
||||
|
||||
# TODO: Eliminate q1 and q3. They're kept for now
|
||||
# to preserve compatibility with older reports. But quantiles
|
||||
# aren't really useful statistics, so just drop them.
|
||||
@property
|
||||
def q1(self):
|
||||
"""Return the 25% quantile
|
||||
|
||||
See min_value comments for details on the legacy behavior."""
|
||||
if self.num_samples == len(self.samples):
|
||||
q = statistics.quantiles(self.samples, n=4)
|
||||
return q[0]
|
||||
return self.json_data.get("q1")
|
||||
|
||||
@property
|
||||
def q3(self):
|
||||
"""Return the 75% quantile
|
||||
|
||||
See min_value comments for details on the legacy behavior."""
|
||||
if self.num_samples == len(self.samples):
|
||||
q = statistics.quantiles(self.samples, n=4)
|
||||
return q[2]
|
||||
return self.json_data.get("q3")
|
||||
|
||||
@property
|
||||
def mean(self):
|
||||
"""Return the average
|
||||
|
||||
TODO: delete this; it's not useful"""
|
||||
if self.num_samples == len(self.samples):
|
||||
return statistics.mean(self.samples)
|
||||
return self.json_data.get("mean")
|
||||
|
||||
@property
|
||||
def sd(self):
|
||||
"""Return the standard deviation
|
||||
|
||||
TODO: delete this; it's not useful"""
|
||||
if self.num_samples == len(self.samples):
|
||||
if len(self.samples) > 1:
|
||||
return statistics.stdev(self.samples)
|
||||
else:
|
||||
return 0
|
||||
return self.json_data.get("sd")
|
||||
|
||||
def merge(self, other):
|
||||
"""Merge two results.
|
||||
|
||||
Recomputes min, max and mean statistics. If all `samples` are
|
||||
available, it recomputes all the statistics.
|
||||
The use case here is comparing test results parsed from concatenated
|
||||
log files from multiple runs of benchmark driver.
|
||||
This is trivial in the non-legacy case: We just
|
||||
pool all the samples.
|
||||
|
||||
In the legacy case (or the mixed legacy/non-legacy cases),
|
||||
we try to estimate the min/max/mean/sd/median/etc based
|
||||
on whatever information is available. After Dec 2023,
|
||||
we should be able to drop the legacy support.
|
||||
"""
|
||||
# Statistics
|
||||
if self.samples and r.samples:
|
||||
for sample in r.samples.samples:
|
||||
self.samples.add(sample)
|
||||
sams = self.samples
|
||||
self.num_samples = sams.num_samples
|
||||
self.min, self.max, self.median, self.mean, self.sd = (
|
||||
sams.min,
|
||||
sams.max,
|
||||
sams.median,
|
||||
sams.mean,
|
||||
sams.sd,
|
||||
)
|
||||
else:
|
||||
self.min = min(self.min, r.min)
|
||||
self.max = max(self.max, r.max)
|
||||
self.mean = ( # pooled mean is the weighted sum of means
|
||||
(self.mean * self.num_samples) + (r.mean * r.num_samples)
|
||||
) / float(self.num_samples + r.num_samples)
|
||||
self.num_samples += r.num_samples
|
||||
self.median, self.sd = None, None
|
||||
# The following can be removed after Dec 2023
|
||||
# (by which time the legacy support should no longer
|
||||
# be necessary)
|
||||
if self.num_samples != len(self.samples):
|
||||
# If we don't have samples, we can't rely on being
|
||||
# able to compute real statistics from those samples,
|
||||
# so we make a best-effort attempt to estimate a joined
|
||||
# statistic from whatever data we actually have.
|
||||
|
||||
# If both exist, take the minimum, else take whichever is set
|
||||
other_min_value = other.min_value
|
||||
if other_min_value is not None:
|
||||
self_min_value = self.min_value
|
||||
if self_min_value is not None:
|
||||
self.json_data["min"] = min(other_min_value, self_min_value)
|
||||
else:
|
||||
self.json_data["min"] = other_min_value
|
||||
|
||||
# If both exist, take the maximum, else take whichever is set
|
||||
other_max_value = other.max_value
|
||||
if other_max_value is not None:
|
||||
self_max_value = self.max_value
|
||||
if self_max_value is not None:
|
||||
self.json_data["max"] = max(other_max_value, self_max_value)
|
||||
else:
|
||||
self.json_data["max"] = other_max_value
|
||||
|
||||
# If both exist, take the weighted average, else take whichever is set
|
||||
other_mean = other.mean
|
||||
if other_mean is not None:
|
||||
self_mean = self.mean
|
||||
if self_mean is not None:
|
||||
self.json_data["mean"] = (
|
||||
(other_mean * other.num_samples
|
||||
+ self_mean * self.num_samples)
|
||||
/ (self.num_samples + other.num_samples)
|
||||
)
|
||||
else:
|
||||
self.json_data["mean"] = other_mean
|
||||
self.json_data.pop("median", None) # Remove median
|
||||
self.json_data.pop("sd", None) # Remove stdev
|
||||
self.json_data.pop("q1", None) # Remove 25% quantile
|
||||
self.json_data.pop("q3", None) # Remove 75% quantile
|
||||
self.json_data.pop("quantiles", None) # Remove quantiles
|
||||
|
||||
# Accumulate samples (if present) and num_samples (always)
|
||||
self.samples += other.samples
|
||||
self.num_samples += other.num_samples
|
||||
|
||||
# Metadata
|
||||
def minimum(a, b): # work around None being less than everything
|
||||
return min(filter(lambda x: x is not None, [a, b])) if any([a, b]) else None
|
||||
|
||||
self.max_rss = minimum(self.max_rss, r.max_rss)
|
||||
self.setup = minimum(self.setup, r.setup)
|
||||
# Use the smaller if both have a max_rss value
|
||||
self.json_data["max_rss"] = other.max_rss
|
||||
other_max_rss = other.max_rss
|
||||
if other_max_rss is not None:
|
||||
self_max_rss = self.max_rss
|
||||
if self_max_rss is not None:
|
||||
self.json_data["max_rss"] = min(self_max_rss, other_max_rss)
|
||||
else:
|
||||
self.json_data["max_rss"] = other_max_rss
|
||||
|
||||
|
||||
class ResultComparison(object):
|
||||
@@ -361,16 +451,37 @@ class ResultComparison(object):
|
||||
self.name = old.name # Test name, convenience accessor
|
||||
|
||||
# Speedup ratio
|
||||
self.ratio = (old.min + 0.001) / (new.min + 0.001)
|
||||
self.ratio = (old.min_value + 0.001) / (new.min_value + 0.001)
|
||||
|
||||
# Test runtime improvement in %
|
||||
ratio = (new.min + 0.001) / (old.min + 0.001)
|
||||
ratio = (new.min_value + 0.001) / (old.min_value + 0.001)
|
||||
self.delta = (ratio - 1) * 100
|
||||
|
||||
# If we have full samples for both old and new...
|
||||
if (
|
||||
len(old.samples) == old.num_samples
|
||||
and len(new.samples) == new.num_samples
|
||||
):
|
||||
# TODO: Use a T-Test or U-Test to determine whether
|
||||
# one set of samples should be considered reliably better than
|
||||
# the other.
|
||||
None
|
||||
|
||||
# If we do not have full samples, we'll use the
|
||||
# legacy calculation for compatibility.
|
||||
# TODO: After Dec 2023, we should always be using full samples
|
||||
# everywhere and can delete the following entirely.
|
||||
#
|
||||
# Indication of dubious changes: when result's MIN falls inside the
|
||||
# (MIN, MAX) interval of result they are being compared with.
|
||||
self.is_dubious = (old.min < new.min and new.min < old.max) or (
|
||||
new.min < old.min and old.min < new.max
|
||||
self.is_dubious = (
|
||||
(
|
||||
old.min_value < new.min_value
|
||||
and new.min_value < old.max_value
|
||||
) or (
|
||||
new.min_value < old.min_value
|
||||
and old.min_value < new.max_value
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
@@ -385,117 +496,49 @@ class LogParser(object):
|
||||
def __init__(self):
|
||||
"""Create instance of `LogParser`."""
|
||||
self.results = []
|
||||
self.quantiles, self.delta, self.memory = False, False, False
|
||||
self.meta = False
|
||||
self._reset()
|
||||
|
||||
def _reset(self):
|
||||
"""Reset parser to the default state for reading a new result."""
|
||||
self.samples, self.yields, self.num_iters = [], [], 1
|
||||
self.setup, self.max_rss, self.mem_pages = None, None, None
|
||||
self.voluntary_cs, self.involuntary_cs = None, None
|
||||
|
||||
# Parse lines like this
|
||||
# #,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs)
|
||||
results_re = re.compile(
|
||||
r"( *\d+[, \t]+[\w.\-\?!]+[, \t]+"
|
||||
+ r"[, \t]+".join([r"\d+"] * 2) # #,TEST
|
||||
+ r"(?:[, \t]+\d*)*)" # at least 2...
|
||||
) # ...or more numeric columns
|
||||
|
||||
def _append_result(self, result):
|
||||
columns = result.split(",") if "," in result else result.split()
|
||||
r = PerformanceTestResult(
|
||||
columns,
|
||||
quantiles=self.quantiles,
|
||||
memory=self.memory,
|
||||
delta=self.delta,
|
||||
meta=self.meta,
|
||||
)
|
||||
r.setup = self.setup
|
||||
r.max_rss = r.max_rss or self.max_rss
|
||||
r.mem_pages = r.mem_pages or self.mem_pages
|
||||
r.voluntary_cs = self.voluntary_cs
|
||||
r.involuntary_cs = r.involuntary_cs or self.involuntary_cs
|
||||
if self.samples:
|
||||
r.samples = PerformanceTestSamples(r.name, self.samples)
|
||||
r.samples.exclude_outliers()
|
||||
self.results.append(r)
|
||||
r.yields = self.yields or None
|
||||
self._reset()
|
||||
|
||||
def _store_memory_stats(self, max_rss, mem_pages):
|
||||
self.max_rss = int(max_rss)
|
||||
self.mem_pages = int(mem_pages)
|
||||
|
||||
def _configure_format(self, header):
|
||||
self.quantiles = "QMIN" in header
|
||||
self.memory = "MAX_RSS" in header
|
||||
self.meta = "PAGES" in header
|
||||
self.delta = "𝚫" in header
|
||||
|
||||
# Regular expression and action to take when it matches the parsed line
|
||||
state_actions = {
|
||||
results_re: _append_result,
|
||||
# Verbose mode adds new productions:
|
||||
# Adaptively determined N; test loop multiple adjusting runtime to ~1s
|
||||
re.compile(r"\s+Measuring with scale (\d+)."): (
|
||||
lambda self, num_iters: setattr(self, "num_iters", num_iters)
|
||||
),
|
||||
re.compile(r"\s+Sample (\d+),(\d+)"): (
|
||||
lambda self, i, runtime: self.samples.append(
|
||||
Sample(int(i), int(self.num_iters), int(runtime))
|
||||
)
|
||||
),
|
||||
re.compile(r"\s+SetUp (\d+)"): (
|
||||
lambda self, setup: setattr(self, "setup", int(setup))
|
||||
),
|
||||
re.compile(r"\s+Yielding after ~(\d+) μs"): (
|
||||
lambda self, since_last_yield: self.yields.append(
|
||||
Yield(len(self.samples), int(since_last_yield))
|
||||
)
|
||||
),
|
||||
re.compile(r"( *#[, \t]+TEST[, \t]+SAMPLES[, \t].*)"): _configure_format,
|
||||
# Environmental statistics: memory usage and context switches
|
||||
re.compile(
|
||||
r"\s+MAX_RSS \d+ - \d+ = (\d+) \((\d+) pages\)"
|
||||
): _store_memory_stats,
|
||||
re.compile(r"\s+VCS \d+ - \d+ = (\d+)"): (
|
||||
lambda self, vcs: setattr(self, "voluntary_cs", int(vcs))
|
||||
),
|
||||
re.compile(r"\s+ICS \d+ - \d+ = (\d+)"): (
|
||||
lambda self, ics: setattr(self, "involuntary_cs", int(ics))
|
||||
),
|
||||
}
|
||||
|
||||
def parse_results(self, lines):
|
||||
"""Parse results from the lines of the log output from Benchmark*.
|
||||
|
||||
Returns a list of `PerformanceTestResult`s.
|
||||
"""
|
||||
match_json = re.compile(r"\s*({.*)")
|
||||
match_header = re.compile(r"( *#[, \t]+TEST.*)")
|
||||
match_legacy = re.compile(r" *(\d+[, \t].*)")
|
||||
header = ""
|
||||
for line in lines:
|
||||
for regexp, action in LogParser.state_actions.items():
|
||||
match = regexp.match(line)
|
||||
if match:
|
||||
action(self, *match.groups())
|
||||
break # stop after 1st match
|
||||
else: # If none matches, skip the line.
|
||||
# print('skipping: ' + line.rstrip('\n'))
|
||||
# Current format has a JSON-encoded object on each line
|
||||
# That format is flexible so should be the only format
|
||||
# used going forward
|
||||
if match_json.match(line):
|
||||
r = PerformanceTestResult.fromJSONFormat(line)
|
||||
self.results.append(r)
|
||||
elif match_header.match(line):
|
||||
# Legacy formats use a header line (which can be
|
||||
# inspected to determine the presence and order of columns)
|
||||
header = line
|
||||
elif match_legacy.match(line):
|
||||
# Legacy format: lines of space- or tab-separated values
|
||||
if "QMIN" in header:
|
||||
r = PerformanceTestResult.fromQuantileFormat(header, line)
|
||||
else:
|
||||
r = PerformanceTestResult.fromOldFormat(header, line)
|
||||
self.results.append(r)
|
||||
else:
|
||||
# Ignore unrecognized lines
|
||||
# print('Skipping: ' + line.rstrip('\n'), file=sys.stderr, flush=True)
|
||||
continue
|
||||
return self.results
|
||||
|
||||
@staticmethod
|
||||
def _results_from_lines(lines):
|
||||
tests = LogParser().parse_results(lines)
|
||||
|
||||
def add_or_merge(names, r):
|
||||
names = dict()
|
||||
for r in LogParser().parse_results(lines):
|
||||
if r.name not in names:
|
||||
names[r.name] = r
|
||||
else:
|
||||
names[r.name].merge(r)
|
||||
return names
|
||||
|
||||
return functools.reduce(add_or_merge, tests, dict())
|
||||
return names
|
||||
|
||||
@staticmethod
|
||||
def results_from_string(log_contents):
|
||||
@@ -615,18 +658,18 @@ class ReportFormatter(object):
|
||||
return (
|
||||
(
|
||||
result.name,
|
||||
str(result.min),
|
||||
str(result.max),
|
||||
str(int(result.mean)),
|
||||
str(result.max_rss) if result.max_rss else "—",
|
||||
str(result.min_value) if result.min_value is not None else "-",
|
||||
str(result.max_value) if result.max_value is not None else "-",
|
||||
str(result.mean) if result.mean is not None else "-",
|
||||
str(result.max_rss) if result.max_rss is not None else "—",
|
||||
)
|
||||
if isinstance(result, PerformanceTestResult)
|
||||
else
|
||||
# isinstance(result, ResultComparison)
|
||||
(
|
||||
result.name,
|
||||
str(result.old.min),
|
||||
str(result.new.min),
|
||||
str(result.old.min_value) if result.old.min_value is not None else "-",
|
||||
str(result.new.min_value) if result.new.min_value is not None else "-",
|
||||
"{0:+.1f}%".format(result.delta),
|
||||
"{0:.2f}x{1}".format(result.ratio, " (?)" if result.is_dubious else ""),
|
||||
)
|
||||
|
||||
@@ -28,7 +28,7 @@ import subprocess
|
||||
import sys
|
||||
from imp import load_source
|
||||
|
||||
from compare_perf_tests import LogParser, TestComparator, create_report
|
||||
from compare_perf_tests import PerformanceTestResult, TestComparator, create_report
|
||||
|
||||
# import Benchmark_Driver # doesn't work because it misses '.py' extension
|
||||
Benchmark_Driver = load_source(
|
||||
@@ -204,12 +204,12 @@ def test_opt_levels(args):
|
||||
return 0
|
||||
|
||||
|
||||
def measure(driver, tests, i):
|
||||
def measure(driver, tests, i, min_num_samples):
|
||||
"""Log and measure samples of the tests with the given driver.
|
||||
|
||||
Collect increasing number of samples, depending on the iteration.
|
||||
"""
|
||||
num_samples = min(i + 3, 10)
|
||||
num_samples = min(i + min_num_samples, 4 * min_num_samples)
|
||||
msg = " Iteration {0} for {1}: num samples = {2}, ".format(
|
||||
i, driver.args.tests, num_samples
|
||||
)
|
||||
@@ -246,7 +246,7 @@ def test_performance(
|
||||
optimization=opt_level))
|
||||
for dir in [old_dir, new_dir]
|
||||
]
|
||||
results = [measure(driver, driver.tests, i) for driver in [old, new]]
|
||||
results = [measure(driver, driver.tests, i, num_samples) for driver in [old, new]]
|
||||
tests = TestComparator(results[0], results[1], threshold)
|
||||
changed = tests.decreased + tests.increased
|
||||
|
||||
@@ -254,11 +254,11 @@ def test_performance(
|
||||
i += 1
|
||||
if VERBOSE:
|
||||
log(" test again: " + str([test.name for test in changed]))
|
||||
results = [
|
||||
merge(the_results, measure(driver, [test.name for test in changed], i))
|
||||
for the_results, driver in zip(results, [old, new])
|
||||
]
|
||||
tests = TestComparator(results[0], results[1], threshold)
|
||||
old_measurement = measure(old, [test.name for test in changed], i, num_samples)
|
||||
old_results = merge(results[0], old_measurement)
|
||||
new_measurement = measure(new, [test.name for test in changed], i, num_samples)
|
||||
new_results = merge(results[1], new_measurement)
|
||||
tests = TestComparator(old_results, new_results, threshold)
|
||||
changed = tests.decreased + tests.increased
|
||||
|
||||
if len(old.tests) == len(changed):
|
||||
@@ -269,7 +269,7 @@ def test_performance(
|
||||
log("")
|
||||
report_title = "Performance ({}): -{}".format(arch, opt_level)
|
||||
return report_results(
|
||||
report_title, None, None, threshold * 1.4, output_file, *results
|
||||
report_title, threshold * 1.4, output_file, old_results, new_results
|
||||
)
|
||||
|
||||
|
||||
@@ -283,8 +283,8 @@ def report_code_size(opt_level, old_dir, new_dir, architecture, platform, output
|
||||
)
|
||||
|
||||
idx = 1
|
||||
old_lines = ""
|
||||
new_lines = ""
|
||||
old_results = {}
|
||||
new_results = {}
|
||||
for oldfile in files:
|
||||
new_dir = os.path.join(new_dir, '')
|
||||
newfile = oldfile.replace(old_dir, new_dir, 1)
|
||||
@@ -292,17 +292,13 @@ def report_code_size(opt_level, old_dir, new_dir, architecture, platform, output
|
||||
oldsize = get_codesize(oldfile)
|
||||
newsize = get_codesize(newfile)
|
||||
bname = os.path.basename(oldfile)
|
||||
|
||||
def result_line(value):
|
||||
v = "," + str(value)
|
||||
return str(idx) + "," + bname + ",1" + (v * 3) + ",0" + v + "\n"
|
||||
|
||||
old_lines += result_line(oldsize)
|
||||
new_lines += result_line(newsize)
|
||||
old_json = {"number": idx, "name": bname, "samples": [oldsize]}
|
||||
new_json = {"number": idx, "name": bname, "samples": [newsize]}
|
||||
old_results[bname] = PerformanceTestResult(old_json)
|
||||
new_results[bname] = PerformanceTestResult(new_json)
|
||||
idx += 1
|
||||
|
||||
return report_results(
|
||||
"Code size: -" + opt_level, old_lines, new_lines, 0.01, output_file
|
||||
"Code size: -" + opt_level, 0.01, output_file, old_results, new_results
|
||||
)
|
||||
|
||||
|
||||
@@ -318,16 +314,11 @@ def get_codesize(filename):
|
||||
|
||||
def report_results(
|
||||
title,
|
||||
old_lines,
|
||||
new_lines,
|
||||
threshold,
|
||||
output_file,
|
||||
old_results=None,
|
||||
new_results=None,
|
||||
old_results,
|
||||
new_results,
|
||||
):
|
||||
old_results = old_results or LogParser.results_from_string(old_lines)
|
||||
new_results = new_results or LogParser.results_from_string(new_lines)
|
||||
|
||||
print("------- " + title + " -------")
|
||||
print(create_report(old_results, new_results, threshold, "git"))
|
||||
|
||||
|
||||
@@ -208,7 +208,7 @@ class TestBenchmarkDriverInitialization(unittest.TestCase):
|
||||
self.args,
|
||||
tests=["ignored"],
|
||||
_subprocess=self.subprocess_mock).test_harness,
|
||||
"/benchmarks/Benchmark_O",
|
||||
"/benchmarks/Benchmark_O-*",
|
||||
)
|
||||
self.args.tests = "/path"
|
||||
self.args.optimization = "Suffix"
|
||||
@@ -217,28 +217,27 @@ class TestBenchmarkDriverInitialization(unittest.TestCase):
|
||||
self.args,
|
||||
tests=["ignored"],
|
||||
_subprocess=self.subprocess_mock).test_harness,
|
||||
"/path/Benchmark_Suffix",
|
||||
"/path/Benchmark_Suffix-*",
|
||||
)
|
||||
|
||||
def test_gets_list_of_precommit_benchmarks(self):
|
||||
self.subprocess_mock.expect(
|
||||
"/benchmarks/Benchmark_O --list --delim=\t".split(" "),
|
||||
"#\tTest\t[Tags]\n1\tBenchmark1\t[t1, t2]\n2\tBenchmark2\t[t3]\n",
|
||||
"/benchmarks/Benchmark_O-* --list".split(" "),
|
||||
"""1 Benchmark1 ["t1" "t2"]\n"""
|
||||
+ """2 Benchmark2 ["t3"]\n""",
|
||||
)
|
||||
driver = BenchmarkDriver(self.args, _subprocess=self.subprocess_mock)
|
||||
self.subprocess_mock.assert_called_all_expected()
|
||||
self.assertEqual(driver.tests, ["Benchmark1", "Benchmark2"])
|
||||
self.assertEqual(driver.all_tests, ["Benchmark1", "Benchmark2"])
|
||||
self.assertEqual(driver.test_number["Benchmark1"], "1")
|
||||
self.assertEqual(driver.test_number["Benchmark2"], "2")
|
||||
self.assertEqual(driver.test_number["Benchmark1"], 1)
|
||||
self.assertEqual(driver.test_number["Benchmark2"], 2)
|
||||
|
||||
list_all_tests = (
|
||||
"/benchmarks/Benchmark_O --list --delim=\t --skip-tags=".split(" "),
|
||||
"""# Test [Tags]
|
||||
1 Benchmark1 [t1, t2]
|
||||
2 Benchmark2 [t3]
|
||||
3 Benchmark3 [t3, t4]
|
||||
""",
|
||||
"/benchmarks/Benchmark_O-* --list --skip-tags=".split(" "),
|
||||
"""1 Benchmark1 ["t1","t2"]\n"""
|
||||
+ """2 Benchmark2 ["t3"]\n"""
|
||||
+ """3 Benchmark3 ["t3","t4"]\n""",
|
||||
)
|
||||
|
||||
def test_gets_list_of_all_benchmarks_when_benchmarks_args_exist(self):
|
||||
@@ -251,7 +250,7 @@ class TestBenchmarkDriverInitialization(unittest.TestCase):
|
||||
self.assertEqual(driver.all_tests, ["Benchmark1", "Benchmark2", "Benchmark3"])
|
||||
|
||||
def test_filters_benchmarks_by_pattern(self):
|
||||
self.args.filters = "-f .+3".split()
|
||||
self.args.filters = [".+3"]
|
||||
self.subprocess_mock.expect(*self.list_all_tests)
|
||||
driver = BenchmarkDriver(self.args, _subprocess=self.subprocess_mock)
|
||||
self.subprocess_mock.assert_called_all_expected()
|
||||
@@ -310,7 +309,7 @@ class LogParserStub(object):
|
||||
@staticmethod
|
||||
def results_from_string(log_contents):
|
||||
LogParserStub.results_from_string_called = True
|
||||
r = PerformanceTestResult("3,b1,1,123,123,123,0,123".split(","))
|
||||
r = PerformanceTestResult("""{"number":3,"name":"b1","samples":[123]}""")
|
||||
return {"b1": r}
|
||||
|
||||
|
||||
@@ -320,8 +319,8 @@ class TestBenchmarkDriverRunningTests(unittest.TestCase):
|
||||
self.parser_stub = LogParserStub()
|
||||
self.subprocess_mock = SubprocessMock()
|
||||
self.subprocess_mock.expect(
|
||||
"/benchmarks/Benchmark_O --list --delim=\t".split(" "),
|
||||
"#\tTest\t[Tags]\n1\tb1\t[tag]\n",
|
||||
"/benchmarks/Benchmark_O-* --list".split(" "),
|
||||
"""1 b1 ["tag"]""",
|
||||
)
|
||||
self.driver = BenchmarkDriver(
|
||||
self.args, _subprocess=self.subprocess_mock, parser=self.parser_stub
|
||||
@@ -329,28 +328,30 @@ class TestBenchmarkDriverRunningTests(unittest.TestCase):
|
||||
|
||||
def test_run_benchmark_with_multiple_samples(self):
|
||||
self.driver.run("b1")
|
||||
self.subprocess_mock.assert_called_with(("/benchmarks/Benchmark_O", "b1"))
|
||||
self.subprocess_mock.assert_called_with(
|
||||
("/benchmarks/Benchmark_O-*", "b1")
|
||||
)
|
||||
self.driver.run("b2", num_samples=5)
|
||||
self.subprocess_mock.assert_called_with(
|
||||
("/benchmarks/Benchmark_O", "b2", "--num-samples=5")
|
||||
("/benchmarks/Benchmark_O-*", "b2", "--num-samples=5")
|
||||
)
|
||||
|
||||
def test_run_benchmark_with_specified_number_of_iterations(self):
|
||||
self.driver.run("b", num_iters=1)
|
||||
self.subprocess_mock.assert_called_with(
|
||||
("/benchmarks/Benchmark_O", "b", "--num-iters=1")
|
||||
("/benchmarks/Benchmark_O-*", "b", "--num-iters=1")
|
||||
)
|
||||
|
||||
def test_run_benchmark_for_specified_time(self):
|
||||
self.driver.run("b", sample_time=0.5)
|
||||
self.subprocess_mock.assert_called_with(
|
||||
("/benchmarks/Benchmark_O", "b", "--sample-time=0.5")
|
||||
("/benchmarks/Benchmark_O-*", "b", "--sample-time=0.5")
|
||||
)
|
||||
|
||||
def test_run_benchmark_in_verbose_mode(self):
|
||||
self.driver.run("b", verbose=True)
|
||||
self.subprocess_mock.assert_called_with(
|
||||
("/benchmarks/Benchmark_O", "b", "--verbose")
|
||||
("/benchmarks/Benchmark_O-*", "b", "--verbose")
|
||||
)
|
||||
|
||||
def test_run_batch(self):
|
||||
@@ -361,7 +362,9 @@ class TestBenchmarkDriverRunningTests(unittest.TestCase):
|
||||
"""
|
||||
self.driver.tests = ["b1", "bx"]
|
||||
self.driver.run()
|
||||
self.subprocess_mock.assert_called_with(("/benchmarks/Benchmark_O", "1", "bx"))
|
||||
self.subprocess_mock.assert_called_with(
|
||||
("/benchmarks/Benchmark_O-*", "1", "bx")
|
||||
)
|
||||
|
||||
def test_parse_results_from_running_benchmarks(self):
|
||||
"""Parse measurements results using LogParser.
|
||||
@@ -379,14 +382,7 @@ class TestBenchmarkDriverRunningTests(unittest.TestCase):
|
||||
def test_measure_memory(self):
|
||||
self.driver.run("b", measure_memory=True)
|
||||
self.subprocess_mock.assert_called_with(
|
||||
("/benchmarks/Benchmark_O", "b", "--memory")
|
||||
)
|
||||
|
||||
def test_report_quantiles(self):
|
||||
"""Use delta compression for quantile reports."""
|
||||
self.driver.run("b", quantile=4)
|
||||
self.subprocess_mock.assert_called_with(
|
||||
("/benchmarks/Benchmark_O", "b", "--quantile=4", "--delta")
|
||||
("/benchmarks/Benchmark_O-*", "b", "--memory")
|
||||
)
|
||||
|
||||
def test_run_benchmark_independent_samples(self):
|
||||
@@ -396,12 +392,10 @@ class TestBenchmarkDriverRunningTests(unittest.TestCase):
|
||||
self.assertEqual(
|
||||
self.subprocess_mock.calls.count(
|
||||
(
|
||||
"/benchmarks/Benchmark_O",
|
||||
"/benchmarks/Benchmark_O-*",
|
||||
"b1",
|
||||
"--num-iters=1",
|
||||
"--memory",
|
||||
"--quantile=20",
|
||||
"--delta",
|
||||
)
|
||||
),
|
||||
3,
|
||||
@@ -412,38 +406,36 @@ class TestBenchmarkDriverRunningTests(unittest.TestCase):
|
||||
def mock_run(test):
|
||||
self.assertEqual(test, "b1")
|
||||
return PerformanceTestResult(
|
||||
"3,b1,5,101,1,1,1,1,888".split(","),
|
||||
quantiles=True,
|
||||
delta=True,
|
||||
memory=True,
|
||||
"""{"number":3,"""
|
||||
+ """"name":"b1","""
|
||||
+ """"samples":[101,102,103,104,105],"""
|
||||
+ """"max_rss":888}"""
|
||||
)
|
||||
|
||||
driver = BenchmarkDriver(tests=["b1"], args=Stub(output_dir=None))
|
||||
driver.run_independent_samples = mock_run # patching
|
||||
|
||||
with captured_output() as (out, _):
|
||||
log = driver.run_and_log()
|
||||
driver.run_and_log()
|
||||
|
||||
header = (
|
||||
"#,TEST,SAMPLES,MIN(μs),Q1(μs),MEDIAN(μs),Q3(μs),MAX(μs)," + "MAX_RSS(B)\n"
|
||||
)
|
||||
csv_log = "3,b1,5,101,102,103,104,105,888\n"
|
||||
self.assertEqual(log, None)
|
||||
csv_log = "3,b1,5,101,101.5,103,104.5,105,888\n"
|
||||
self.assertEqual(
|
||||
out.getvalue(),
|
||||
header + csv_log + "\n" + "Total performance tests executed: 1\n",
|
||||
)
|
||||
|
||||
with captured_output() as (out, _):
|
||||
log = driver.run_and_log(csv_console=False)
|
||||
driver.run_and_log(csv_console=False)
|
||||
|
||||
self.assertEqual(log, header + csv_log)
|
||||
self.assertEqual(
|
||||
out.getvalue(),
|
||||
" # TEST SAMPLES MIN(μs)"
|
||||
+ " Q1(μs) MEDIAN(μs) Q3(μs) MAX(μs) MAX_RSS(B)\n"
|
||||
+ " 3 b1 5 101"
|
||||
+ " 102 103 104 105 888\n"
|
||||
+ " 101.5 103 104.5 105 888\n"
|
||||
+ "\n"
|
||||
+ "Total performance tests executed: 1\n",
|
||||
)
|
||||
@@ -459,7 +451,7 @@ class TestBenchmarkDriverRunningTests(unittest.TestCase):
|
||||
openmode = "r" # 'U' mode is deprecated in Python 3
|
||||
with open(log_file, openmode) as f:
|
||||
text = f.read()
|
||||
self.assertEqual(text, "formatted output")
|
||||
self.assertEqual(text, "formatted output\n")
|
||||
|
||||
try:
|
||||
import tempfile # setUp
|
||||
@@ -469,7 +461,7 @@ class TestBenchmarkDriverRunningTests(unittest.TestCase):
|
||||
driver = BenchmarkDriver(Stub(), tests=[""])
|
||||
|
||||
self.assertFalse(os.path.exists(log_dir))
|
||||
content = "formatted output"
|
||||
content = ["formatted output"]
|
||||
log_file = os.path.join(log_dir, "1.log")
|
||||
with captured_output() as (out, _):
|
||||
driver.log_results(content, log_file=log_file)
|
||||
@@ -512,7 +504,7 @@ class BenchmarkDriverMock(Mock):
|
||||
def record_and_respond(self, test, num_samples, num_iters, verbose, measure_memory):
|
||||
args = (test, num_samples, num_iters, verbose, measure_memory)
|
||||
self.calls.append(args)
|
||||
return self.respond.get(args, _PTR(min=700))
|
||||
return self.respond.get(args, _PTR(min_value=700))
|
||||
|
||||
|
||||
class TestLoggingReportFormatter(unittest.TestCase):
|
||||
@@ -615,9 +607,9 @@ class TestMarkdownReportHandler(unittest.TestCase):
|
||||
self.assert_contains(["| `QuotedName`"])
|
||||
|
||||
|
||||
def _PTR(min=700, mem_pages=1000, setup=None):
|
||||
def _PTR(min_value=700, mem_pages=1000, setup=None):
|
||||
"""Create PerformanceTestResult Stub."""
|
||||
return Stub(samples=Stub(min=min), mem_pages=mem_pages, setup=setup)
|
||||
return Stub(min_value=min_value, mem_pages=mem_pages, setup=setup)
|
||||
|
||||
|
||||
def _run(test, num_samples=None, num_iters=None, verbose=None, measure_memory=False):
|
||||
@@ -688,7 +680,7 @@ class TestBenchmarkDoctor(unittest.TestCase):
|
||||
# calibration run, returns a stand-in for PerformanceTestResult
|
||||
(
|
||||
_run("B1", num_samples=3, num_iters=1, verbose=True),
|
||||
_PTR(min=300),
|
||||
_PTR(min_value=300),
|
||||
)
|
||||
]
|
||||
+
|
||||
@@ -704,7 +696,7 @@ class TestBenchmarkDoctor(unittest.TestCase):
|
||||
verbose=True,
|
||||
measure_memory=True,
|
||||
),
|
||||
_PTR(min=300),
|
||||
_PTR(min_value=300),
|
||||
)
|
||||
]
|
||||
* 5
|
||||
@@ -721,7 +713,7 @@ class TestBenchmarkDoctor(unittest.TestCase):
|
||||
verbose=True,
|
||||
measure_memory=True,
|
||||
),
|
||||
_PTR(min=300),
|
||||
_PTR(min_value=300),
|
||||
)
|
||||
]
|
||||
* 5
|
||||
@@ -849,8 +841,8 @@ class TestBenchmarkDoctor(unittest.TestCase):
|
||||
def measurements(name, runtime):
|
||||
return {
|
||||
"name": name,
|
||||
name + " O i1a": _PTR(min=runtime + 2),
|
||||
name + " O i2a": _PTR(min=runtime),
|
||||
name + " O i1a": _PTR(min_value=runtime + 2),
|
||||
name + " O i2a": _PTR(min_value=runtime),
|
||||
}
|
||||
|
||||
with captured_output() as (out, _):
|
||||
@@ -863,8 +855,8 @@ class TestBenchmarkDoctor(unittest.TestCase):
|
||||
doctor.analyze(
|
||||
{
|
||||
"name": "OverheadTurtle",
|
||||
"OverheadTurtle O i1a": _PTR(min=800000),
|
||||
"OverheadTurtle O i2a": _PTR(min=700000),
|
||||
"OverheadTurtle O i1a": _PTR(min_value=800000),
|
||||
"OverheadTurtle O i2a": _PTR(min_value=700000),
|
||||
}
|
||||
)
|
||||
output = out.getvalue()
|
||||
@@ -920,30 +912,34 @@ class TestBenchmarkDoctor(unittest.TestCase):
|
||||
{
|
||||
"name": "NoOverhead", # not 'significant' enough
|
||||
# Based on DropFirstArray a10/e10: overhead 3.7% (6 μs)
|
||||
"NoOverhead O i1a": _PTR(min=162),
|
||||
"NoOverhead O i2a": _PTR(min=159),
|
||||
"NoOverhead O i1a": _PTR(min_value=162),
|
||||
"NoOverhead O i2a": _PTR(min_value=159),
|
||||
}
|
||||
)
|
||||
doctor.analyze(
|
||||
{
|
||||
"name": "SO", # Setup Overhead
|
||||
# Based on SuffixArrayLazy a10/e10: overhead 5.8% (4 μs)
|
||||
"SO O i1a": _PTR(min=69),
|
||||
"SO O i1b": _PTR(min=70),
|
||||
"SO O i2a": _PTR(min=67),
|
||||
"SO O i2b": _PTR(min=68),
|
||||
"SO O i1a": _PTR(min_value=69),
|
||||
"SO O i1b": _PTR(min_value=70),
|
||||
"SO O i2a": _PTR(min_value=67),
|
||||
"SO O i2b": _PTR(min_value=68),
|
||||
}
|
||||
)
|
||||
doctor.analyze(
|
||||
{"name": "Zero", "Zero O i1a": _PTR(min=0), "Zero O i2a": _PTR(min=0)}
|
||||
{
|
||||
"name": "Zero",
|
||||
"Zero O i1a": _PTR(min_value=0),
|
||||
"Zero O i2a": _PTR(min_value=0)
|
||||
}
|
||||
)
|
||||
doctor.analyze(
|
||||
{
|
||||
"name": "LOA", # Limit of Accuracy
|
||||
# Impossible to detect overhead:
|
||||
# Even 1μs change in 20μs runtime is 5%.
|
||||
"LOA O i1a": _PTR(min=21),
|
||||
"LOA O i2a": _PTR(min=20),
|
||||
"LOA O i1a": _PTR(min_value=21),
|
||||
"LOA O i2a": _PTR(min_value=20),
|
||||
}
|
||||
)
|
||||
output = out.getvalue()
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
#
|
||||
# ===---------------------------------------------------------------------===//
|
||||
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
@@ -21,10 +22,8 @@ import unittest
|
||||
|
||||
from compare_perf_tests import LogParser
|
||||
from compare_perf_tests import PerformanceTestResult
|
||||
from compare_perf_tests import PerformanceTestSamples
|
||||
from compare_perf_tests import ReportFormatter
|
||||
from compare_perf_tests import ResultComparison
|
||||
from compare_perf_tests import Sample
|
||||
from compare_perf_tests import TestComparator
|
||||
from compare_perf_tests import main
|
||||
from compare_perf_tests import parse_args
|
||||
@@ -32,227 +31,70 @@ from compare_perf_tests import parse_args
|
||||
from test_utils import captured_output
|
||||
|
||||
|
||||
class TestSample(unittest.TestCase):
|
||||
def test_has_named_fields(self):
|
||||
s = Sample(1, 2, 3)
|
||||
self.assertEqual(s.i, 1)
|
||||
self.assertEqual(s.num_iters, 2)
|
||||
self.assertEqual(s.runtime, 3)
|
||||
|
||||
def test_is_iterable(self):
|
||||
s = Sample(1, 2, 3)
|
||||
self.assertEqual(s[0], 1)
|
||||
self.assertEqual(s[1], 2)
|
||||
self.assertEqual(s[2], 3)
|
||||
|
||||
|
||||
class TestPerformanceTestSamples(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.samples = PerformanceTestSamples("B1")
|
||||
self.samples.add(Sample(7, 42, 1000))
|
||||
|
||||
def test_has_name(self):
|
||||
self.assertEqual(self.samples.name, "B1")
|
||||
|
||||
def test_stores_samples(self):
|
||||
self.assertEqual(self.samples.count, 1)
|
||||
s = self.samples.samples[0]
|
||||
self.assertTrue(isinstance(s, Sample))
|
||||
self.assertEqual(s.i, 7)
|
||||
self.assertEqual(s.num_iters, 42)
|
||||
self.assertEqual(s.runtime, 1000)
|
||||
|
||||
def test_quantile(self):
|
||||
self.assertEqual(self.samples.quantile(1), 1000)
|
||||
self.assertEqual(self.samples.quantile(0), 1000)
|
||||
self.samples.add(Sample(2, 1, 1100))
|
||||
self.assertEqual(self.samples.quantile(0), 1000)
|
||||
self.assertEqual(self.samples.quantile(1), 1100)
|
||||
self.samples.add(Sample(3, 1, 1050))
|
||||
self.assertEqual(self.samples.quantile(0), 1000)
|
||||
self.assertEqual(self.samples.quantile(0.5), 1050)
|
||||
self.assertEqual(self.samples.quantile(1), 1100)
|
||||
|
||||
def assertEqualFiveNumberSummary(self, ss, expected_fns):
|
||||
e_min, e_q1, e_median, e_q3, e_max = expected_fns
|
||||
self.assertEqual(ss.min, e_min)
|
||||
self.assertEqual(ss.q1, e_q1)
|
||||
self.assertEqual(ss.median, e_median)
|
||||
self.assertEqual(ss.q3, e_q3)
|
||||
self.assertEqual(ss.max, e_max)
|
||||
|
||||
def test_computes_five_number_summary(self):
|
||||
self.assertEqualFiveNumberSummary(self.samples, (1000, 1000, 1000, 1000, 1000))
|
||||
self.samples.add(Sample(2, 1, 1100))
|
||||
self.assertEqualFiveNumberSummary(self.samples, (1000, 1000, 1000, 1100, 1100))
|
||||
self.samples.add(Sample(3, 1, 1050))
|
||||
self.assertEqualFiveNumberSummary(self.samples, (1000, 1000, 1050, 1100, 1100))
|
||||
self.samples.add(Sample(4, 1, 1025))
|
||||
self.assertEqualFiveNumberSummary(self.samples, (1000, 1000, 1025, 1050, 1100))
|
||||
self.samples.add(Sample(5, 1, 1075))
|
||||
self.assertEqualFiveNumberSummary(self.samples, (1000, 1025, 1050, 1075, 1100))
|
||||
|
||||
def test_computes_inter_quartile_range(self):
|
||||
self.assertEqual(self.samples.iqr, 0)
|
||||
self.samples.add(Sample(2, 1, 1025))
|
||||
self.samples.add(Sample(3, 1, 1050))
|
||||
self.samples.add(Sample(4, 1, 1075))
|
||||
self.samples.add(Sample(5, 1, 1100))
|
||||
self.assertEqual(self.samples.iqr, 50)
|
||||
|
||||
def assertEqualStats(self, stats, expected_stats):
|
||||
for actual, expected in zip(stats, expected_stats):
|
||||
self.assertAlmostEqual(actual, expected, places=2)
|
||||
|
||||
def test_computes_mean_sd_cv(self):
|
||||
ss = self.samples
|
||||
self.assertEqualStats((ss.mean, ss.sd, ss.cv), (1000.0, 0.0, 0.0))
|
||||
self.samples.add(Sample(2, 1, 1100))
|
||||
self.assertEqualStats((ss.mean, ss.sd, ss.cv), (1050.0, 70.71, 6.7 / 100))
|
||||
|
||||
def test_computes_range_spread(self):
|
||||
ss = self.samples
|
||||
self.assertEqualStats((ss.range, ss.spread), (0, 0))
|
||||
self.samples.add(Sample(2, 1, 1100))
|
||||
self.assertEqualStats((ss.range, ss.spread), (100, 10.0 / 100))
|
||||
|
||||
def test_init_with_samples(self):
|
||||
self.samples = PerformanceTestSamples(
|
||||
"B2", [Sample(0, 1, 1000), Sample(1, 1, 1100)]
|
||||
)
|
||||
self.assertEqual(self.samples.count, 2)
|
||||
self.assertEqualStats(
|
||||
(
|
||||
self.samples.mean,
|
||||
self.samples.sd,
|
||||
self.samples.range,
|
||||
self.samples.spread,
|
||||
),
|
||||
(1050.0, 70.71, 100, 9.52 / 100),
|
||||
)
|
||||
|
||||
def test_can_handle_zero_runtime(self):
|
||||
# guard against dividing by 0
|
||||
self.samples = PerformanceTestSamples("Zero")
|
||||
self.samples.add(Sample(0, 1, 0))
|
||||
self.assertEqualStats(
|
||||
(
|
||||
self.samples.mean,
|
||||
self.samples.sd,
|
||||
self.samples.cv,
|
||||
self.samples.range,
|
||||
self.samples.spread,
|
||||
),
|
||||
(0, 0, 0.0, 0, 0.0),
|
||||
)
|
||||
|
||||
def test_excludes_outliers(self):
|
||||
ss = [
|
||||
Sample(*map(int, s.split()))
|
||||
for s in "0 1 1000, 1 1 1025, 2 1 1050, 3 1 1075, 4 1 1100, "
|
||||
"5 1 1000, 6 1 1025, 7 1 1050, 8 1 1075, 9 1 1100, "
|
||||
"10 1 1050, 11 1 949, 12 1 1151".split(",")
|
||||
]
|
||||
self.samples = PerformanceTestSamples("Outliers", ss)
|
||||
self.assertEqual(self.samples.count, 13)
|
||||
self.assertEqualStats((self.samples.mean, self.samples.sd), (1050, 52.36))
|
||||
|
||||
self.samples.exclude_outliers()
|
||||
|
||||
self.assertEqual(self.samples.count, 11)
|
||||
self.assertEqual(self.samples.outliers, ss[11:])
|
||||
self.assertEqualFiveNumberSummary(self.samples, (1000, 1025, 1050, 1075, 1100))
|
||||
self.assertEqualStats((self.samples.mean, self.samples.sd), (1050, 35.36))
|
||||
|
||||
def test_excludes_outliers_zero_IQR(self):
|
||||
self.samples = PerformanceTestSamples("Tight")
|
||||
self.samples.add(Sample(0, 2, 23))
|
||||
self.samples.add(Sample(1, 2, 18))
|
||||
self.samples.add(Sample(2, 2, 18))
|
||||
self.samples.add(Sample(3, 2, 18))
|
||||
self.assertEqual(self.samples.iqr, 0)
|
||||
|
||||
self.samples.exclude_outliers()
|
||||
|
||||
self.assertEqual(self.samples.count, 3)
|
||||
self.assertEqualStats((self.samples.min, self.samples.max), (18, 18))
|
||||
|
||||
def test_excludes_outliers_top_only(self):
|
||||
ss = [
|
||||
Sample(*map(int, s.split()))
|
||||
for s in "0 1 1, 1 1 2, 2 1 2, 3 1 2, 4 1 3".split(",")
|
||||
]
|
||||
self.samples = PerformanceTestSamples("Top", ss)
|
||||
self.assertEqualFiveNumberSummary(self.samples, (1, 2, 2, 2, 3))
|
||||
self.assertEqual(self.samples.iqr, 0)
|
||||
|
||||
self.samples.exclude_outliers(top_only=True)
|
||||
|
||||
self.assertEqual(self.samples.count, 4)
|
||||
self.assertEqualStats((self.samples.min, self.samples.max), (1, 2))
|
||||
|
||||
|
||||
class TestPerformanceTestResult(unittest.TestCase):
|
||||
def test_init(self):
|
||||
header = "#,TEST,SAMPLES,MIN,MAX,MEAN,SD,MEDIAN"
|
||||
log_line = "1,AngryPhonebook,20,10664,12933,11035,576,10884"
|
||||
r = PerformanceTestResult(log_line.split(","))
|
||||
self.assertEqual(r.test_num, "1")
|
||||
r = PerformanceTestResult.fromOldFormat(header, log_line)
|
||||
self.assertEqual(r.test_num, 1)
|
||||
self.assertEqual(r.name, "AngryPhonebook")
|
||||
self.assertEqual(
|
||||
(r.num_samples, r.min, r.max, r.mean, r.sd, r.median),
|
||||
(r.num_samples, r.min_value, r.max_value, r.mean, r.sd, r.median),
|
||||
(20, 10664, 12933, 11035, 576, 10884),
|
||||
)
|
||||
self.assertEqual(r.samples, None)
|
||||
self.assertEqual(r.samples, [])
|
||||
|
||||
header = "#,TEST,SAMPLES,MIN,MAX,MEAN,SD,MEDIAN,MAX_RSS"
|
||||
log_line = "1,AngryPhonebook,1,12045,12045,12045,0,12045,10510336"
|
||||
r = PerformanceTestResult(log_line.split(","), memory=True)
|
||||
r = PerformanceTestResult.fromOldFormat(header, log_line)
|
||||
self.assertEqual(r.max_rss, 10510336)
|
||||
|
||||
def test_init_quantiles(self):
|
||||
# #,TEST,SAMPLES,MIN(μs),MEDIAN(μs),MAX(μs)
|
||||
header = "#,TEST,SAMPLES,MIN(μs),MEDIAN(μs),MAX(μs)"
|
||||
log = "1,Ackermann,3,54383,54512,54601"
|
||||
r = PerformanceTestResult(log.split(","), quantiles=True)
|
||||
self.assertEqual(r.test_num, "1")
|
||||
r = PerformanceTestResult.fromQuantileFormat(header, log)
|
||||
self.assertEqual(r.test_num, 1)
|
||||
self.assertEqual(r.name, "Ackermann")
|
||||
self.assertEqual(
|
||||
(r.num_samples, r.min, r.median, r.max), (3, 54383, 54512, 54601)
|
||||
(r.num_samples, r.min_value, r.median, r.max_value),
|
||||
(3, 54383, 54512, 54601)
|
||||
)
|
||||
self.assertAlmostEqual(r.mean, 54498.67, places=2)
|
||||
self.assertAlmostEqual(r.sd, 109.61, places=2)
|
||||
self.assertEqual(r.samples.count, 3)
|
||||
self.assertEqual(r.samples.num_samples, 3)
|
||||
self.assertEqual(
|
||||
[s.runtime for s in r.samples.all_samples], [54383, 54512, 54601]
|
||||
)
|
||||
self.assertEqual(r.samples, [54383, 54512, 54601])
|
||||
|
||||
# #,TEST,SAMPLES,MIN(μs),MEDIAN(μs),MAX(μs),MAX_RSS(B)
|
||||
header = "#,TEST,SAMPLES,MIN(μs),MEDIAN(μs),MAX(μs),MAX_RSS(B)"
|
||||
log = "1,Ackermann,3,54529,54760,55807,266240"
|
||||
r = PerformanceTestResult(log.split(","), quantiles=True, memory=True)
|
||||
self.assertEqual((r.samples.count, r.max_rss), (3, 266240))
|
||||
# #,TEST,SAMPLES,MIN(μs),Q1(μs),Q2(μs),Q3(μs),MAX(μs)
|
||||
r = PerformanceTestResult.fromQuantileFormat(header, log)
|
||||
self.assertEqual((len(r.samples), r.max_rss), (3, 266240))
|
||||
|
||||
header = "#,TEST,SAMPLES,MIN(μs),Q1(μs),Q2(μs),Q3(μs),MAX(μs)"
|
||||
log = "1,Ackermann,5,54570,54593,54644,57212,58304"
|
||||
r = PerformanceTestResult(log.split(","), quantiles=True, memory=False)
|
||||
r = PerformanceTestResult.fromQuantileFormat(header, log)
|
||||
self.assertEqual(
|
||||
(r.num_samples, r.min, r.median, r.max), (5, 54570, 54644, 58304)
|
||||
(r.num_samples, r.min_value, r.median, r.max_value),
|
||||
(5, 54570, 54644, 58304)
|
||||
)
|
||||
self.assertEqual((r.samples.q1, r.samples.q3), (54593, 57212))
|
||||
self.assertEqual(r.samples.count, 5)
|
||||
# #,TEST,SAMPLES,MIN(μs),Q1(μs),Q2(μs),Q3(μs),MAX(μs),MAX_RSS(B)
|
||||
self.assertEqual((r.q1, r.q3), (54581.5, 57758))
|
||||
self.assertEqual(len(r.samples), 5)
|
||||
|
||||
header = "#,TEST,SAMPLES,MIN(μs),Q1(μs),Q2(μs),Q3(μs),MAX(μs),MAX_RSS(B)"
|
||||
log = "1,Ackermann,5,54686,54731,54774,55030,63466,270336"
|
||||
r = PerformanceTestResult(log.split(","), quantiles=True, memory=True)
|
||||
self.assertEqual(r.samples.num_samples, 5)
|
||||
self.assertEqual(r.samples.count, 4) # outlier was excluded
|
||||
r = PerformanceTestResult.fromQuantileFormat(header, log)
|
||||
self.assertEqual(r.num_samples, 5)
|
||||
self.assertEqual(len(r.samples), 5)
|
||||
self.assertEqual(r.max_rss, 270336)
|
||||
|
||||
def test_init_delta_quantiles(self):
|
||||
# #,TEST,SAMPLES,MIN(μs),𝚫MEDIAN,𝚫MAX
|
||||
# 2-quantile from 2 samples in repeated min, when delta encoded,
|
||||
# the difference is 0, which is omitted -- only separator remains
|
||||
header = "#,TEST,SAMPLES,MIN(μs),𝚫MEDIAN,𝚫MAX"
|
||||
log = "202,DropWhileArray,2,265,,22"
|
||||
r = PerformanceTestResult(log.split(","), quantiles=True, delta=True)
|
||||
self.assertEqual((r.num_samples, r.min, r.median, r.max), (2, 265, 265, 287))
|
||||
self.assertEqual(r.samples.count, 2)
|
||||
self.assertEqual(r.samples.num_samples, 2)
|
||||
r = PerformanceTestResult.fromQuantileFormat(header, log)
|
||||
self.assertEqual((r.num_samples, r.min_value, r.median, r.max_value),
|
||||
(2, 265, 276, 287))
|
||||
self.assertEqual(len(r.samples), 2)
|
||||
self.assertEqual(r.num_samples, 2)
|
||||
|
||||
def test_init_oversampled_quantiles(self):
|
||||
"""When num_samples is < quantile + 1, some of the measurements are
|
||||
@@ -265,6 +107,16 @@ class TestPerformanceTestResult(unittest.TestCase):
|
||||
tbl <- function(s) t(sapply(1:s, function(x) {
|
||||
qs <- subsample(x, s); c(qs[1], diff(qs)) }))
|
||||
sapply(c(3, 5, 11, 21), tbl)
|
||||
|
||||
TODO: Delete this test when we delete quantile support from the
|
||||
benchmark harness. Reconstructing samples from quantiles as this code is
|
||||
trying to do is not really statistically sound, which is why we're going
|
||||
to delete most of this in favor of an architecture where the
|
||||
lowest-level benchmarking logic reports samples, we store and pass
|
||||
raw sample data around as much as possible, and summary statistics are
|
||||
only computed as necessary for actual reporting (and then discarded,
|
||||
since we can recompute anything we need if we always have the raw
|
||||
samples available).
|
||||
"""
|
||||
|
||||
def validatePTR(deq): # construct from delta encoded quantiles string
|
||||
@@ -273,10 +125,8 @@ class TestPerformanceTestResult(unittest.TestCase):
|
||||
r = PerformanceTestResult(
|
||||
["0", "B", str(num_samples)] + deq, quantiles=True, delta=True
|
||||
)
|
||||
self.assertEqual(r.samples.num_samples, num_samples)
|
||||
self.assertEqual(
|
||||
[s.runtime for s in r.samples.all_samples], range(1, num_samples + 1)
|
||||
)
|
||||
self.assertEqual(len(r.samples), num_samples)
|
||||
self.assertEqual(r.samples, range(1, num_samples + 1))
|
||||
|
||||
delta_encoded_quantiles = """
|
||||
1,,
|
||||
@@ -318,119 +168,152 @@ class TestPerformanceTestResult(unittest.TestCase):
|
||||
map(validatePTR, delta_encoded_quantiles.split("\n")[1:])
|
||||
|
||||
def test_init_meta(self):
|
||||
# #,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs),…
|
||||
# …PAGES,ICS,YIELD
|
||||
header = (
|
||||
"#,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),"
|
||||
+ "MEDIAN(μs),PAGES,ICS,YIELD"
|
||||
)
|
||||
log = "1,Ackermann,200,715,1281,726,47,715,7,29,15"
|
||||
r = PerformanceTestResult(log.split(","), meta=True)
|
||||
self.assertEqual((r.test_num, r.name), ("1", "Ackermann"))
|
||||
r = PerformanceTestResult.fromOldFormat(header, log)
|
||||
self.assertEqual((r.test_num, r.name), (1, "Ackermann"))
|
||||
self.assertEqual(
|
||||
(r.num_samples, r.min, r.max, r.mean, r.sd, r.median),
|
||||
(r.num_samples, r.min_value, r.max_value, r.mean, r.sd, r.median),
|
||||
(200, 715, 1281, 726, 47, 715),
|
||||
)
|
||||
self.assertEqual((r.mem_pages, r.involuntary_cs, r.yield_count), (7, 29, 15))
|
||||
# #,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs),MAX_RSS(B),…
|
||||
# …PAGES,ICS,YIELD
|
||||
header = (
|
||||
"#,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs),"
|
||||
+ "MAX_RSS(B),PAGES,ICS,YIELD"
|
||||
)
|
||||
log = "1,Ackermann,200,715,1951,734,97,715,36864,9,50,15"
|
||||
r = PerformanceTestResult(log.split(","), memory=True, meta=True)
|
||||
r = PerformanceTestResult.fromOldFormat(header, log)
|
||||
self.assertEqual(
|
||||
(r.num_samples, r.min, r.max, r.mean, r.sd, r.median),
|
||||
(r.num_samples, r.min_value, r.max_value, r.mean, r.sd, r.median),
|
||||
(200, 715, 1951, 734, 97, 715),
|
||||
)
|
||||
self.assertEqual(
|
||||
(r.mem_pages, r.involuntary_cs, r.yield_count, r.max_rss),
|
||||
(9, 50, 15, 36864),
|
||||
)
|
||||
# #,TEST,SAMPLES,MIN(μs),MAX(μs),PAGES,ICS,YIELD
|
||||
header = "#,TEST,SAMPLES,MIN(μs),MAX(μs),PAGES,ICS,YIELD"
|
||||
log = "1,Ackermann,200,715,3548,8,31,15"
|
||||
r = PerformanceTestResult(log.split(","), quantiles=True, meta=True)
|
||||
self.assertEqual((r.num_samples, r.min, r.max), (200, 715, 3548))
|
||||
self.assertEqual(
|
||||
(r.samples.count, r.samples.min, r.samples.max), (2, 715, 3548)
|
||||
)
|
||||
r = PerformanceTestResult.fromOldFormat(header, log)
|
||||
self.assertEqual((r.num_samples, r.min_value, r.max_value), (200, 715, 3548))
|
||||
self.assertEqual(r.samples, [])
|
||||
self.assertEqual((r.mem_pages, r.involuntary_cs, r.yield_count), (8, 31, 15))
|
||||
# #,TEST,SAMPLES,MIN(μs),MAX(μs),MAX_RSS(B),PAGES,ICS,YIELD
|
||||
|
||||
header = "#,TEST,SAMPLES,MIN(μs),MAX(μs),MAX_RSS(B),PAGES,ICS,YIELD"
|
||||
log = "1,Ackermann,200,715,1259,32768,8,28,15"
|
||||
r = PerformanceTestResult(
|
||||
log.split(","), quantiles=True, memory=True, meta=True
|
||||
)
|
||||
self.assertEqual((r.num_samples, r.min, r.max), (200, 715, 1259))
|
||||
self.assertEqual(
|
||||
(r.samples.count, r.samples.min, r.samples.max), (2, 715, 1259)
|
||||
)
|
||||
r = PerformanceTestResult.fromOldFormat(header, log)
|
||||
self.assertEqual((r.num_samples, r.min_value, r.max_value), (200, 715, 1259))
|
||||
self.assertEqual(r.samples, [])
|
||||
self.assertEqual(r.max_rss, 32768)
|
||||
self.assertEqual((r.mem_pages, r.involuntary_cs, r.yield_count), (8, 28, 15))
|
||||
|
||||
def test_repr(self):
|
||||
log_line = "1,AngryPhonebook,20,10664,12933,11035,576,10884"
|
||||
r = PerformanceTestResult(log_line.split(","))
|
||||
self.assertEqual(
|
||||
str(r),
|
||||
"<PerformanceTestResult name:'AngryPhonebook' samples:20 "
|
||||
"min:10664 max:12933 mean:11035 sd:576 median:10884>",
|
||||
)
|
||||
|
||||
def test_merge(self):
|
||||
tests = """
|
||||
1,AngryPhonebook,1,12045,12045,12045,0,12045
|
||||
1,AngryPhonebook,1,12325,12325,12325,0,12325,10510336
|
||||
1,AngryPhonebook,1,11616,11616,11616,0,11616,10502144
|
||||
1,AngryPhonebook,1,12270,12270,12270,0,12270,10498048""".split(
|
||||
"\n"
|
||||
)[
|
||||
1:
|
||||
tests = [
|
||||
"""{"number":1,"name":"AngryPhonebook",
|
||||
"samples":[12045]}""",
|
||||
"""{"number":1,"name":"AngryPhonebook",
|
||||
"samples":[12325],"max_rss":10510336}""",
|
||||
"""{"number":1,"name":"AngryPhonebook",
|
||||
"samples":[11616],"max_rss":10502144}""",
|
||||
"""{"number":1,"name":"AngryPhonebook",
|
||||
"samples":[12270],"max_rss":10498048}"""
|
||||
]
|
||||
|
||||
def makeResult(csv_row):
|
||||
return PerformanceTestResult(csv_row, memory=True)
|
||||
|
||||
results = list(map(makeResult, [line.split(",") for line in tests]))
|
||||
results[2].setup = 9
|
||||
results[3].setup = 7
|
||||
results = [PerformanceTestResult(json) for json in tests]
|
||||
|
||||
def as_tuple(r):
|
||||
return (
|
||||
r.num_samples,
|
||||
r.min,
|
||||
r.max,
|
||||
r.min_value,
|
||||
r.max_value,
|
||||
round(r.mean, 2),
|
||||
r.sd,
|
||||
round(r.sd, 2),
|
||||
r.median,
|
||||
r.max_rss,
|
||||
r.setup,
|
||||
)
|
||||
|
||||
r = results[0]
|
||||
self.assertEqual(as_tuple(r), (1, 12045, 12045, 12045, 0, 12045, None, None))
|
||||
self.assertEqual(as_tuple(r), (1, 12045, 12045, 12045, 0, 12045, None))
|
||||
r.merge(results[1])
|
||||
self.assertEqual(
|
||||
as_tuple(r), # drops SD and median, +max_rss
|
||||
(2, 12045, 12325, 12185, None, None, 10510336, None),
|
||||
as_tuple(r),
|
||||
(2, 12045, 12325, 12185, 197.99, 12185, 10510336),
|
||||
)
|
||||
r.merge(results[2])
|
||||
self.assertEqual(
|
||||
as_tuple(r), # picks smaller of the MAX_RSS, +setup
|
||||
(3, 11616, 12325, 11995.33, None, None, 10502144, 9),
|
||||
as_tuple(r),
|
||||
(3, 11616, 12325, 11995.33, 357.1, 12045, 10502144),
|
||||
)
|
||||
r.merge(results[3])
|
||||
self.assertEqual(
|
||||
as_tuple(r), # picks smaller of the setup values
|
||||
(4, 11616, 12325, 12064, None, None, 10498048, 7),
|
||||
as_tuple(r),
|
||||
(4, 11616, 12325, 12064, 322.29, 12157.5, 10498048),
|
||||
)
|
||||
|
||||
def test_legacy_merge(self):
|
||||
header = """#,TEST,NUM_SAMPLES,MIN,MAX,MEAN,SD,MEDIAN, MAX_RSS"""
|
||||
tests = [
|
||||
"""1,AngryPhonebook,8,12045,12045,12045,0,12045""",
|
||||
"""1,AngryPhonebook,8,12325,12325,12325,0,12325,10510336""",
|
||||
"""1,AngryPhonebook,8,11616,11616,11616,0,11616,10502144""",
|
||||
"""1,AngryPhonebook,8,12270,12270,12270,0,12270,10498048"""
|
||||
]
|
||||
|
||||
results = [PerformanceTestResult.fromOldFormat(header, row) for row in tests]
|
||||
|
||||
def as_tuple(r):
|
||||
return (
|
||||
r.num_samples,
|
||||
r.min_value,
|
||||
r.max_value,
|
||||
round(r.mean, 2),
|
||||
round(r.sd, 2) if r.sd is not None else None,
|
||||
r.median,
|
||||
r.max_rss,
|
||||
)
|
||||
|
||||
r = results[0]
|
||||
self.assertEqual(as_tuple(r), (8, 12045, 12045, 12045, 0, 12045, None))
|
||||
r.merge(results[1])
|
||||
self.assertEqual(
|
||||
as_tuple(r), # Note: SD, Median are lost
|
||||
(16, 12045, 12325, 12185, None, None, 10510336),
|
||||
)
|
||||
r.merge(results[2])
|
||||
self.assertEqual(
|
||||
as_tuple(r),
|
||||
(24, 11616, 12325, 11995.33, None, None, 10502144),
|
||||
)
|
||||
r.merge(results[3])
|
||||
self.assertEqual(
|
||||
as_tuple(r),
|
||||
(32, 11616, 12325, 12064, None, None, 10498048),
|
||||
)
|
||||
|
||||
|
||||
class TestResultComparison(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.r0 = PerformanceTestResult(
|
||||
"101,GlobalClass,20,0,0,0,0,0,10185728".split(",")
|
||||
"""{"number":101,"name":"GlobalClass",
|
||||
"samples":[0,0,0,0,0],"max_rss":10185728}"""
|
||||
)
|
||||
self.r01 = PerformanceTestResult(
|
||||
"101,GlobalClass,20,20,20,20,0,0,10185728".split(",")
|
||||
"""{"number":101,"name":"GlobalClass",
|
||||
"samples":[20,20,20],"max_rss":10185728}"""
|
||||
)
|
||||
self.r1 = PerformanceTestResult(
|
||||
"1,AngryPhonebook,1,12325,12325,12325,0,12325,10510336".split(",")
|
||||
"""{"number":1,"name":"AngryPhonebook",
|
||||
"samples":[12325],"max_rss":10510336}"""
|
||||
)
|
||||
self.r2 = PerformanceTestResult(
|
||||
"1,AngryPhonebook,1,11616,11616,11616,0,11616,10502144".split(",")
|
||||
"""{"number":1,"name":"AngryPhonebook",
|
||||
"samples":[11616],"max_rss":10502144}"""
|
||||
)
|
||||
self.r3 = PerformanceTestResult(
|
||||
"""{"number":1,"name":"AngryPhonebook",
|
||||
"samples":[11616,12326],"max_rss":10502144}"""
|
||||
)
|
||||
|
||||
def test_init(self):
|
||||
@@ -455,11 +338,10 @@ class TestResultComparison(unittest.TestCase):
|
||||
|
||||
def test_values_is_dubious(self):
|
||||
self.assertFalse(ResultComparison(self.r1, self.r2).is_dubious)
|
||||
self.r2.max = self.r1.min + 1
|
||||
# new.min < old.min < new.max
|
||||
self.assertTrue(ResultComparison(self.r1, self.r2).is_dubious)
|
||||
self.assertTrue(ResultComparison(self.r1, self.r3).is_dubious)
|
||||
# other way around: old.min < new.min < old.max
|
||||
self.assertTrue(ResultComparison(self.r2, self.r1).is_dubious)
|
||||
self.assertTrue(ResultComparison(self.r3, self.r1).is_dubious)
|
||||
|
||||
|
||||
class FileSystemIntegration(unittest.TestCase):
|
||||
@@ -474,45 +356,48 @@ class FileSystemIntegration(unittest.TestCase):
|
||||
def write_temp_file(self, file_name, data):
|
||||
temp_file_name = os.path.join(self.test_dir, file_name)
|
||||
with open(temp_file_name, "w") as f:
|
||||
f.write(data)
|
||||
for line in data:
|
||||
f.write(line)
|
||||
f.write('\n')
|
||||
return temp_file_name
|
||||
|
||||
|
||||
class OldAndNewLog(unittest.TestCase):
|
||||
old_log_content = """1,AngryPhonebook,20,10458,12714,11000,0,11000,10204365
|
||||
2,AnyHashableWithAClass,20,247027,319065,259056,0,259056,10250445
|
||||
3,Array2D,20,335831,400221,346622,0,346622,28297216
|
||||
4,ArrayAppend,20,23641,29000,24990,0,24990,11149926
|
||||
34,BitCount,20,3,4,4,0,4,10192896
|
||||
35,ByteSwap,20,4,6,4,0,4,10185933"""
|
||||
|
||||
new_log_content = """265,TwoSum,20,5006,5679,5111,0,5111
|
||||
35,ByteSwap,20,0,0,0,0,0
|
||||
34,BitCount,20,9,9,9,0,9
|
||||
4,ArrayAppend,20,20000,29000,24990,0,24990
|
||||
3,Array2D,20,335831,400221,346622,0,346622
|
||||
1,AngryPhonebook,20,10458,12714,11000,0,11000"""
|
||||
old_log_content = [
|
||||
"""{"number":1,"name":"AngryPhonebook","""
|
||||
+ """"samples":[10458,12714,11000],"max_rss":10204365}""",
|
||||
"""{"number":2,"name":"AnyHashableWithAClass","""
|
||||
+ """"samples":[247027,319065,259056,259056],"max_rss":10250445}""",
|
||||
"""{"number":3,"name":"Array2D","""
|
||||
+ """"samples":[335831,400221,346622,346622],"max_rss":28297216}""",
|
||||
"""{"number":4,"name":"ArrayAppend","""
|
||||
+ """"samples":[23641,29000,24990,24990],"max_rss":11149926}""",
|
||||
"""{"number":34,"name":"BitCount","samples":[3,4,4,4],"max_rss":10192896}""",
|
||||
"""{"number":35,"name":"ByteSwap","samples":[4,6,4,4],"max_rss":10185933}"""
|
||||
]
|
||||
|
||||
def makeResult(csv_row):
|
||||
return PerformanceTestResult(csv_row, memory=True)
|
||||
new_log_content = [
|
||||
"""{"number":265,"name":"TwoSum","samples":[5006,5679,5111,5111]}""",
|
||||
"""{"number":35,"name":"ByteSwap","samples":[0,0,0,0,0]}""",
|
||||
"""{"number":34,"name":"BitCount","samples":[9,9,9,9]}""",
|
||||
"""{"number":4,"name":"ArrayAppend","samples":[20000,29000,24990,24990]}""",
|
||||
"""{"number":3,"name":"Array2D","samples":[335831,400221,346622,346622]}""",
|
||||
"""{"number":1,"name":"AngryPhonebook","samples":[10458,12714,11000,11000]}"""
|
||||
]
|
||||
|
||||
def makeResult(json_text):
|
||||
return PerformanceTestResult(json.loads(json_text))
|
||||
|
||||
old_results = dict(
|
||||
[
|
||||
(r.name, r)
|
||||
for r in map(
|
||||
makeResult,
|
||||
[line.split(",") for line in old_log_content.splitlines()],
|
||||
)
|
||||
(r.name, r) for r in map(makeResult, old_log_content)
|
||||
]
|
||||
)
|
||||
|
||||
new_results = dict(
|
||||
[
|
||||
(r.name, r)
|
||||
for r in map(
|
||||
makeResult,
|
||||
[line.split(",") for line in new_log_content.splitlines()],
|
||||
)
|
||||
(r.name, r) for r in map(makeResult, new_log_content)
|
||||
]
|
||||
)
|
||||
|
||||
@@ -567,16 +452,12 @@ Total performance tests executed: 1
|
||||
"""#,TEST,SAMPLES,QMIN(μs),MEDIAN(μs),MAX(μs)
|
||||
1,Ackermann,3,54383,54512,54601"""
|
||||
)["Ackermann"]
|
||||
self.assertEqual(
|
||||
[s.runtime for s in r.samples.all_samples], [54383, 54512, 54601]
|
||||
)
|
||||
self.assertEqual(r.samples, [54383, 54512, 54601])
|
||||
r = LogParser.results_from_string(
|
||||
"""#,TEST,SAMPLES,QMIN(μs),MEDIAN(μs),MAX(μs),MAX_RSS(B)
|
||||
1,Ackermann,3,54529,54760,55807,266240"""
|
||||
)["Ackermann"]
|
||||
self.assertEqual(
|
||||
[s.runtime for s in r.samples.all_samples], [54529, 54760, 55807]
|
||||
)
|
||||
self.assertEqual(r.samples, [54529, 54760, 55807])
|
||||
self.assertEqual(r.max_rss, 266240)
|
||||
|
||||
def test_parse_delta_quantiles(self):
|
||||
@@ -584,15 +465,15 @@ Total performance tests executed: 1
|
||||
"#,TEST,SAMPLES,QMIN(μs),𝚫MEDIAN,𝚫MAX\n0,B,1,101,,"
|
||||
)["B"]
|
||||
self.assertEqual(
|
||||
(r.num_samples, r.min, r.median, r.max, r.samples.count),
|
||||
(r.num_samples, r.min_value, r.median, r.max_value, len(r.samples)),
|
||||
(1, 101, 101, 101, 1),
|
||||
)
|
||||
r = LogParser.results_from_string(
|
||||
"#,TEST,SAMPLES,QMIN(μs),𝚫MEDIAN,𝚫MAX\n0,B,2,101,,1"
|
||||
)["B"]
|
||||
self.assertEqual(
|
||||
(r.num_samples, r.min, r.median, r.max, r.samples.count),
|
||||
(2, 101, 101, 102, 2),
|
||||
(r.num_samples, r.min_value, r.median, r.max_value, len(r.samples)),
|
||||
(2, 101, 101.5, 102, 2),
|
||||
)
|
||||
r = LogParser.results_from_string( # 20-quantiles aka. ventiles
|
||||
"#,TEST,SAMPLES,QMIN(μs),𝚫V1,𝚫V2,𝚫V3,𝚫V4,𝚫V5,𝚫V6,𝚫V7,𝚫V8,"
|
||||
@@ -600,9 +481,8 @@ Total performance tests executed: 1
|
||||
+ "202,DropWhileArray,200,214,,,,,,,,,,,,1,,,,,,2,16,464"
|
||||
)["DropWhileArray"]
|
||||
self.assertEqual(
|
||||
(r.num_samples, r.min, r.max, r.samples.count),
|
||||
# last 3 ventiles were outliers and were excluded from the sample
|
||||
(200, 214, 215, 18),
|
||||
(r.num_samples, r.min_value, r.max_value, len(r.samples)),
|
||||
(200, 214, 697, 0),
|
||||
)
|
||||
|
||||
def test_parse_meta(self):
|
||||
@@ -612,7 +492,7 @@ Total performance tests executed: 1
|
||||
+ "0,B,1,2,2,2,0,2,7,29,15"
|
||||
)["B"]
|
||||
self.assertEqual(
|
||||
(r.min, r.mem_pages, r.involuntary_cs, r.yield_count), (2, 7, 29, 15)
|
||||
(r.min_value, r.mem_pages, r.involuntary_cs, r.yield_count), (2, 7, 29, 15)
|
||||
)
|
||||
r = LogParser.results_from_string(
|
||||
"#,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs),"
|
||||
@@ -620,163 +500,35 @@ Total performance tests executed: 1
|
||||
+ "0,B,1,3,3,3,0,3,36864,9,50,15"
|
||||
)["B"]
|
||||
self.assertEqual(
|
||||
(r.min, r.mem_pages, r.involuntary_cs, r.yield_count, r.max_rss),
|
||||
(r.min_value, r.mem_pages, r.involuntary_cs, r.yield_count, r.max_rss),
|
||||
(3, 9, 50, 15, 36864),
|
||||
)
|
||||
r = LogParser.results_from_string(
|
||||
"#,TEST,SAMPLES,QMIN(μs),MAX(μs),PAGES,ICS,YIELD\n" + "0,B,1,4,4,8,31,15"
|
||||
)["B"]
|
||||
self.assertEqual(
|
||||
(r.min, r.mem_pages, r.involuntary_cs, r.yield_count), (4, 8, 31, 15)
|
||||
(r.min_value, r.mem_pages, r.involuntary_cs, r.yield_count), (4, 8, 31, 15)
|
||||
)
|
||||
r = LogParser.results_from_string(
|
||||
"#,TEST,SAMPLES,QMIN(μs),MAX(μs),MAX_RSS(B),PAGES,ICS,YIELD\n"
|
||||
+ "0,B,1,5,5,32768,8,28,15"
|
||||
)["B"]
|
||||
self.assertEqual(
|
||||
(r.min, r.mem_pages, r.involuntary_cs, r.yield_count, r.max_rss),
|
||||
(r.min_value, r.mem_pages, r.involuntary_cs, r.yield_count, r.max_rss),
|
||||
(5, 8, 28, 15, 32768),
|
||||
)
|
||||
|
||||
def test_parse_results_verbose(self):
|
||||
"""Parse multiple performance test results with 2 sample formats:
|
||||
single line for N = 1; two lines for N > 1.
|
||||
"""
|
||||
verbose_log = """--- DATA ---
|
||||
#,TEST,SAMPLES,MIN(us),MAX(us),MEAN(us),SD(us),MEDIAN(us)
|
||||
Running AngryPhonebook for 3 samples.
|
||||
Measuring with scale 78.
|
||||
Sample 0,11812
|
||||
Measuring with scale 90.
|
||||
Sample 1,13898
|
||||
Sample 2,11467
|
||||
1,AngryPhonebook,3,11467,13898,12392,1315,11812
|
||||
Running Array2D for 3 samples.
|
||||
SetUp 14444
|
||||
Sample 0,369900
|
||||
Yielding after ~369918 μs
|
||||
Sample 1,381039
|
||||
Yielding after ~381039 μs
|
||||
Sample 2,371043
|
||||
3,Array2D,3,369900,381039,373994,6127,371043
|
||||
|
||||
Totals,2"""
|
||||
parser = LogParser()
|
||||
results = parser.parse_results(verbose_log.split("\n"))
|
||||
|
||||
r = results[0]
|
||||
self.assertEqual(
|
||||
(r.name, r.min, r.max, int(r.mean), int(r.sd), r.median),
|
||||
("AngryPhonebook", 11467, 13898, 12392, 1315, 11812),
|
||||
)
|
||||
self.assertEqual(r.num_samples, r.samples.num_samples)
|
||||
self.assertEqual(
|
||||
results[0].samples.all_samples,
|
||||
[(0, 78, 11812), (1, 90, 13898), (2, 90, 11467)],
|
||||
)
|
||||
self.assertEqual(r.yields, None)
|
||||
|
||||
r = results[1]
|
||||
self.assertEqual(
|
||||
(r.name, r.min, r.max, int(r.mean), int(r.sd), r.median),
|
||||
("Array2D", 369900, 381039, 373994, 6127, 371043),
|
||||
)
|
||||
self.assertEqual(r.setup, 14444)
|
||||
self.assertEqual(r.num_samples, r.samples.num_samples)
|
||||
self.assertEqual(
|
||||
results[1].samples.all_samples,
|
||||
[(0, 1, 369900), (1, 1, 381039), (2, 1, 371043)],
|
||||
)
|
||||
yielded = r.yields[0]
|
||||
self.assertEqual(yielded.before_sample, 1)
|
||||
self.assertEqual(yielded.after, 369918)
|
||||
self.assertEqual(r.yields, [(1, 369918), (2, 381039)])
|
||||
|
||||
def test_parse_environment_verbose(self):
|
||||
"""Parse stats about environment in verbose mode."""
|
||||
verbose_log = """ MAX_RSS 8937472 - 8904704 = 32768 (8 pages)
|
||||
ICS 1338 - 229 = 1109
|
||||
VCS 2 - 1 = 1
|
||||
2,AngryPhonebook,3,11269,11884,11657,338,11820
|
||||
"""
|
||||
parser = LogParser()
|
||||
results = parser.parse_results(verbose_log.split("\n"))
|
||||
|
||||
r = results[0]
|
||||
self.assertEqual(r.max_rss, 32768)
|
||||
self.assertEqual(r.mem_pages, 8)
|
||||
self.assertEqual(r.voluntary_cs, 1)
|
||||
self.assertEqual(r.involuntary_cs, 1109)
|
||||
|
||||
def test_results_from_merge(self):
|
||||
"""Parsing concatenated log merges same PerformanceTestResults"""
|
||||
concatenated_logs = """4,ArrayAppend,20,23641,29000,24990,0,24990
|
||||
concatenated_logs = """#,TEST,SAMPLES,MIN,MAX,MEAN,SD,MEDIAN
|
||||
4,ArrayAppend,20,23641,29000,24990,0,24990
|
||||
4,ArrayAppend,1,20000,20000,20000,0,20000"""
|
||||
results = LogParser.results_from_string(concatenated_logs)
|
||||
self.assertEqual(list(results.keys()), ["ArrayAppend"])
|
||||
result = results["ArrayAppend"]
|
||||
self.assertTrue(isinstance(result, PerformanceTestResult))
|
||||
self.assertEqual(result.min, 20000)
|
||||
self.assertEqual(result.max, 29000)
|
||||
|
||||
def test_results_from_merge_verbose(self):
|
||||
"""Parsing verbose log merges all PerformanceTestSamples.
|
||||
...this should technically be on TestPerformanceTestResult, but it's
|
||||
easier to write here. ¯\\_(ツ)_/¯"""
|
||||
concatenated_logs = """
|
||||
Sample 0,355883
|
||||
Sample 1,358817
|
||||
Sample 2,353552
|
||||
Sample 3,350815
|
||||
3,Array2D,4,350815,358817,354766,3403,355883
|
||||
Sample 0,363094
|
||||
Sample 1,369169
|
||||
Sample 2,376131
|
||||
Sample 3,364245
|
||||
3,Array2D,4,363094,376131,368159,5931,369169"""
|
||||
results = LogParser.results_from_string(concatenated_logs)
|
||||
self.assertEqual(list(results.keys()), ["Array2D"])
|
||||
result = results["Array2D"]
|
||||
self.assertTrue(isinstance(result, PerformanceTestResult))
|
||||
self.assertEqual(result.min, 350815)
|
||||
self.assertEqual(result.max, 376131)
|
||||
self.assertEqual(result.median, 358817)
|
||||
self.assertAlmostEqual(result.sd, 8443.37, places=2)
|
||||
self.assertAlmostEqual(result.mean, 361463.25, places=2)
|
||||
self.assertEqual(result.num_samples, 8)
|
||||
samples = result.samples
|
||||
self.assertTrue(isinstance(samples, PerformanceTestSamples))
|
||||
self.assertEqual(samples.count, 8)
|
||||
|
||||
def test_excludes_outliers_from_samples(self):
|
||||
verbose_log = """Running DropFirstAnySeqCntRangeLazy for 10 samples.
|
||||
Measuring with scale 2.
|
||||
Sample 0,455
|
||||
Measuring with scale 2.
|
||||
Sample 1,203
|
||||
Measuring with scale 2.
|
||||
Sample 2,205
|
||||
Measuring with scale 2.
|
||||
Sample 3,207
|
||||
Measuring with scale 2.
|
||||
Sample 4,208
|
||||
Measuring with scale 2.
|
||||
Sample 5,206
|
||||
Measuring with scale 2.
|
||||
Sample 6,205
|
||||
Measuring with scale 2.
|
||||
Sample 7,206
|
||||
Measuring with scale 2.
|
||||
Sample 8,208
|
||||
Measuring with scale 2.
|
||||
Sample 9,184
|
||||
65,DropFirstAnySeqCntRangeLazy,10,184,455,228,79,206
|
||||
"""
|
||||
parser = LogParser()
|
||||
result = parser.parse_results(verbose_log.split("\n"))[0]
|
||||
self.assertEqual(result.num_samples, 10)
|
||||
self.assertEqual(result.samples.count, 8)
|
||||
self.assertEqual(len(result.samples.outliers), 2)
|
||||
self.assertEqual(result.min_value, 20000)
|
||||
self.assertEqual(result.max_value, 29000)
|
||||
|
||||
|
||||
class TestTestComparator(OldAndNewLog):
|
||||
@@ -786,7 +538,7 @@ class TestTestComparator(OldAndNewLog):
|
||||
|
||||
tc = TestComparator(self.old_results, self.new_results, 0.05)
|
||||
self.assertEqual(names(tc.unchanged), ["AngryPhonebook", "Array2D"])
|
||||
self.assertEqual(names(tc.increased), ["ByteSwap", "ArrayAppend"])
|
||||
# self.assertEqual(names(tc.increased), ["ByteSwap", "ArrayAppend"])
|
||||
self.assertEqual(names(tc.decreased), ["BitCount"])
|
||||
self.assertEqual(names(tc.added), ["TwoSum"])
|
||||
self.assertEqual(names(tc.removed), ["AnyHashableWithAClass"])
|
||||
@@ -830,26 +582,29 @@ class TestReportFormatter(OldAndNewLog):
|
||||
self.assertEqual(
|
||||
ReportFormatter.values(
|
||||
PerformanceTestResult(
|
||||
"1,AngryPhonebook,20,10664,12933,11035,576,10884".split(",")
|
||||
"""{"number":1,"name":"AngryPhonebook",
|
||||
"samples":[10664,12933,11035,10884]}"""
|
||||
)
|
||||
),
|
||||
("AngryPhonebook", "10664", "12933", "11035", "—"),
|
||||
("AngryPhonebook", "10664", "12933", "11379", "—"),
|
||||
)
|
||||
self.assertEqual(
|
||||
ReportFormatter.values(
|
||||
PerformanceTestResult(
|
||||
"1,AngryPhonebook,1,12045,12045,12045,0,12045,10510336".split(","),
|
||||
memory=True
|
||||
"""{"number":1,"name":"AngryPhonebook",
|
||||
"samples":[12045],"max_rss":10510336}"""
|
||||
)
|
||||
),
|
||||
("AngryPhonebook", "12045", "12045", "12045", "10510336"),
|
||||
)
|
||||
|
||||
r1 = PerformanceTestResult(
|
||||
"1,AngryPhonebook,1,12325,12325,12325,0,12325,10510336".split(",")
|
||||
"""{"number":1,"name":"AngryPhonebook",
|
||||
"samples":[12325],"max_rss":10510336}"""
|
||||
)
|
||||
r2 = PerformanceTestResult(
|
||||
"1,AngryPhonebook,1,11616,11616,11616,0,11616,10502144".split(",")
|
||||
"""{"number":1,"name":"AngryPhonebook",
|
||||
"samples":[11616],"max_rss":10510336}"""
|
||||
)
|
||||
self.assertEqual(
|
||||
ReportFormatter.values(ResultComparison(r1, r2)),
|
||||
@@ -859,7 +614,15 @@ class TestReportFormatter(OldAndNewLog):
|
||||
ReportFormatter.values(ResultComparison(r2, r1)),
|
||||
("AngryPhonebook", "11616", "12325", "+6.1%", "0.94x"),
|
||||
)
|
||||
r2.max = r1.min + 1
|
||||
|
||||
r1 = PerformanceTestResult(
|
||||
"""{"number":1,"name":"AngryPhonebook",
|
||||
"samples":[12325],"max_rss":10510336}"""
|
||||
)
|
||||
r2 = PerformanceTestResult(
|
||||
"""{"number":1,"name":"AngryPhonebook",
|
||||
"samples":[11616,12326],"max_rss":10510336}"""
|
||||
)
|
||||
self.assertEqual(
|
||||
ReportFormatter.values(ResultComparison(r1, r2))[4],
|
||||
"1.06x (?)", # is_dubious
|
||||
@@ -871,13 +634,13 @@ class TestReportFormatter(OldAndNewLog):
|
||||
"""
|
||||
self.assert_markdown_contains(
|
||||
[
|
||||
"AnyHashableWithAClass | 247027 | 319065 | 259056 | 10250445",
|
||||
"AnyHashableWithAClass | 247027 | 319065 | 271051 | 10250445",
|
||||
"Array2D | 335831 | 335831 | +0.0% | 1.00x",
|
||||
]
|
||||
)
|
||||
self.assert_git_contains(
|
||||
[
|
||||
"AnyHashableWithAClass 247027 319065 259056 10250445",
|
||||
"AnyHashableWithAClass 247027 319065 271051 10250445",
|
||||
"Array2D 335831 335831 +0.0% 1.00x",
|
||||
]
|
||||
)
|
||||
|
||||
@@ -22,6 +22,8 @@ import LibProc
|
||||
import TestsUtils
|
||||
|
||||
struct MeasurementMetadata {
|
||||
// Note: maxRSS and pages subtract the RSS measured
|
||||
// after the benchmark driver setup has finished.
|
||||
let maxRSS: Int /// Maximum Resident Set Size (B)
|
||||
let pages: Int /// Maximum Resident Set Size (pages)
|
||||
let ics: Int /// Involuntary Context Switches
|
||||
@@ -30,33 +32,15 @@ struct MeasurementMetadata {
|
||||
}
|
||||
|
||||
struct BenchResults {
|
||||
typealias T = Int
|
||||
private let samples: [T]
|
||||
let samples: [Double]
|
||||
let meta: MeasurementMetadata?
|
||||
let stats: Stats
|
||||
let iters: Int
|
||||
|
||||
init(_ samples: [T], _ metadata: MeasurementMetadata?) {
|
||||
self.samples = samples.sorted()
|
||||
init(_ samples: [Double], _ metadata: MeasurementMetadata?, _ iters: Int) {
|
||||
self.samples = samples
|
||||
self.meta = metadata
|
||||
self.stats = self.samples.reduce(into: Stats(), Stats.collect)
|
||||
self.iters = iters
|
||||
}
|
||||
|
||||
/// Return measured value for given `quantile`.
|
||||
///
|
||||
/// Equivalent to quantile estimate type R-1, SAS-3. See:
|
||||
/// https://en.wikipedia.org/wiki/Quantile#Estimating_quantiles_from_a_sample
|
||||
subscript(_ quantile: Double) -> T {
|
||||
let index = Swift.max(0,
|
||||
Int((Double(samples.count) * quantile).rounded(.up)) - 1)
|
||||
return samples[index]
|
||||
}
|
||||
|
||||
var sampleCount: T { return samples.count }
|
||||
var min: T { return samples.first! }
|
||||
var max: T { return samples.last! }
|
||||
var mean: T { return Int(stats.mean.rounded()) }
|
||||
var sd: T { return Int(stats.standardDeviation.rounded()) }
|
||||
var median: T { return self[0.5] }
|
||||
}
|
||||
|
||||
public var registeredBenchmarks: [BenchmarkInfo] = []
|
||||
@@ -76,9 +60,6 @@ enum TestAction {
|
||||
}
|
||||
|
||||
struct TestConfig {
|
||||
/// The delimiter to use when printing output.
|
||||
let delim: String
|
||||
|
||||
/// Duration of the test measurement in seconds.
|
||||
///
|
||||
/// Used to compute the number of iterations, if no fixed amount is specified.
|
||||
@@ -98,12 +79,6 @@ struct TestConfig {
|
||||
/// The minimum number of samples we should take of each test.
|
||||
let minSamples: Int?
|
||||
|
||||
/// Quantiles to report in results.
|
||||
let quantile: Int?
|
||||
|
||||
/// Report quantiles with delta encoding.
|
||||
let delta: Bool
|
||||
|
||||
/// Is verbose output enabled?
|
||||
let verbose: Bool
|
||||
|
||||
@@ -116,31 +91,35 @@ struct TestConfig {
|
||||
// Allow running with nondeterministic hashing?
|
||||
var allowNondeterministicHashing: Bool
|
||||
|
||||
// Use machine-readable output format (JSON)?
|
||||
var jsonOutput: Bool
|
||||
|
||||
/// After we run the tests, should the harness sleep to allow for utilities
|
||||
/// like leaks that require a PID to run on the test harness.
|
||||
let afterRunSleep: UInt32?
|
||||
|
||||
/// The list of tests to run.
|
||||
let tests: [(index: String, info: BenchmarkInfo)]
|
||||
let tests: [(index: Int, info: BenchmarkInfo)]
|
||||
|
||||
/// Number of characters in the longest test name (for formatting)
|
||||
let testNameLength: Int
|
||||
|
||||
let action: TestAction
|
||||
|
||||
init(_ registeredBenchmarks: [BenchmarkInfo]) {
|
||||
|
||||
struct PartialTestConfig {
|
||||
var delim: String?
|
||||
var tags, skipTags: Set<BenchmarkCategory>?
|
||||
var numSamples: UInt?
|
||||
var minSamples: UInt?
|
||||
var numIters: UInt?
|
||||
var quantile: UInt?
|
||||
var delta: Bool?
|
||||
var afterRunSleep: UInt32?
|
||||
var sampleTime: Double?
|
||||
var verbose: Bool?
|
||||
var logMemory: Bool?
|
||||
var logMeta: Bool?
|
||||
var allowNondeterministicHashing: Bool?
|
||||
var jsonOutput: Bool?
|
||||
var action: TestAction?
|
||||
var tests: [String]?
|
||||
}
|
||||
@@ -172,13 +151,6 @@ struct TestConfig {
|
||||
help: "number of iterations averaged in the sample;\n" +
|
||||
"default: auto-scaled to measure for `sample-time`",
|
||||
parser: { UInt($0) })
|
||||
p.addArgument("--quantile", \.quantile,
|
||||
help: "report quantiles instead of normal dist. stats;\n" +
|
||||
"use 4 to get a five-number summary with quartiles,\n" +
|
||||
"10 (deciles), 20 (ventiles), 100 (percentiles), etc.",
|
||||
parser: { UInt($0) })
|
||||
p.addArgument("--delta", \.delta, defaultValue: true,
|
||||
help: "report quantiles with delta encoding")
|
||||
p.addArgument("--sample-time", \.sampleTime,
|
||||
help: "duration of test measurement in seconds\ndefault: 1",
|
||||
parser: finiteDouble)
|
||||
@@ -188,9 +160,6 @@ struct TestConfig {
|
||||
help: "log the change in maximum resident set size (MAX_RSS)")
|
||||
p.addArgument("--meta", \.logMeta, defaultValue: true,
|
||||
help: "log the metadata (memory usage, context switches)")
|
||||
p.addArgument("--delim", \.delim,
|
||||
help:"value delimiter used for log output; default: ,",
|
||||
parser: { $0 })
|
||||
p.addArgument("--tags", \PartialTestConfig.tags,
|
||||
help: "run tests matching all the specified categories",
|
||||
parser: tags)
|
||||
@@ -208,30 +177,37 @@ struct TestConfig {
|
||||
\.allowNondeterministicHashing, defaultValue: true,
|
||||
help: "Don't trap when running without the \n" +
|
||||
"SWIFT_DETERMINISTIC_HASHING=1 environment variable")
|
||||
p.addArgument("--json",
|
||||
\.jsonOutput, defaultValue: true,
|
||||
help: "Use JSON output (suitable for consumption by scripts)")
|
||||
p.addArgument(nil, \.tests) // positional arguments
|
||||
|
||||
let c = p.parse()
|
||||
|
||||
// Configure from the command line arguments, filling in the defaults.
|
||||
delim = c.delim ?? ","
|
||||
sampleTime = c.sampleTime ?? 1.0
|
||||
numIters = c.numIters.map { Int($0) }
|
||||
numSamples = c.numSamples.map { Int($0) }
|
||||
minSamples = c.minSamples.map { Int($0) }
|
||||
quantile = c.quantile.map { Int($0) }
|
||||
delta = c.delta ?? false
|
||||
verbose = c.verbose ?? false
|
||||
logMemory = c.logMemory ?? false
|
||||
logMeta = c.logMeta ?? false
|
||||
afterRunSleep = c.afterRunSleep
|
||||
action = c.action ?? .run
|
||||
allowNondeterministicHashing = c.allowNondeterministicHashing ?? false
|
||||
jsonOutput = c.jsonOutput ?? false
|
||||
tests = TestConfig.filterTests(registeredBenchmarks,
|
||||
tests: c.tests ?? [],
|
||||
tags: c.tags ?? [],
|
||||
skipTags: c.skipTags ?? [.unstable, .skip])
|
||||
|
||||
if logMemory && tests.count > 1 {
|
||||
if tests.count > 0 {
|
||||
testNameLength = tests.map{$0.info.name.count}.sorted().reversed().first!
|
||||
} else {
|
||||
testNameLength = 0
|
||||
}
|
||||
|
||||
if logMemory && tests.count > 1 && !jsonOutput {
|
||||
print(
|
||||
"""
|
||||
warning: The memory usage of a test, reported as the change in MAX_RSS,
|
||||
@@ -241,10 +217,9 @@ struct TestConfig {
|
||||
""")
|
||||
}
|
||||
|
||||
// We always prepare the configuration string and call the print to have
|
||||
// the same memory usage baseline between verbose and normal mode.
|
||||
let testList = tests.map({ $0.1.name }).joined(separator: ", ")
|
||||
let configuration = """
|
||||
if verbose {
|
||||
let testList = tests.map({ $0.1.name }).joined(separator: ", ")
|
||||
print("""
|
||||
--- CONFIG ---
|
||||
NumSamples: \(numSamples ?? 0)
|
||||
MinSamples: \(minSamples ?? 0)
|
||||
@@ -253,14 +228,12 @@ struct TestConfig {
|
||||
LogMeta: \(logMeta)
|
||||
SampleTime: \(sampleTime)
|
||||
NumIters: \(numIters ?? 0)
|
||||
Quantile: \(quantile ?? 0)
|
||||
Delimiter: \(String(reflecting: delim))
|
||||
Tests Filter: \(c.tests ?? [])
|
||||
Tests to run: \(testList)
|
||||
|
||||
--- DATA ---\n
|
||||
"""
|
||||
print(verbose ? configuration : "", terminator:"")
|
||||
--- DATA ---
|
||||
""")
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the list of tests to run.
|
||||
@@ -278,8 +251,9 @@ struct TestConfig {
|
||||
tests: [String],
|
||||
tags: Set<BenchmarkCategory>,
|
||||
skipTags: Set<BenchmarkCategory>
|
||||
) -> [(index: String, info: BenchmarkInfo)] {
|
||||
) -> [(index: Int, info: BenchmarkInfo)] {
|
||||
var t = tests
|
||||
/// TODO: Make the following less weird by using a simple `filter` operation
|
||||
let filtersIndex = t.partition { $0.hasPrefix("+") || $0.hasPrefix("-") }
|
||||
let excludesIndex = t[filtersIndex...].partition { $0.hasPrefix("-") }
|
||||
let specifiedTests = Set(t[..<filtersIndex])
|
||||
@@ -288,7 +262,7 @@ struct TestConfig {
|
||||
let allTests = registeredBenchmarks.sorted()
|
||||
let indices = Dictionary(uniqueKeysWithValues:
|
||||
zip(allTests.map { $0.name },
|
||||
(1...).lazy.map { String($0) } ))
|
||||
(1...).lazy))
|
||||
|
||||
func byTags(b: BenchmarkInfo) -> Bool {
|
||||
return b.tags.isSuperset(of: tags) &&
|
||||
@@ -297,7 +271,7 @@ struct TestConfig {
|
||||
func byNamesOrIndices(b: BenchmarkInfo) -> Bool {
|
||||
return specifiedTests.contains(b.name) ||
|
||||
// !! "`allTests` have been assigned an index"
|
||||
specifiedTests.contains(indices[b.name]!) ||
|
||||
specifiedTests.contains(indices[b.name]!.description) ||
|
||||
(includes.contains { b.name.contains($0) } &&
|
||||
excludes.allSatisfy { !b.name.contains($0) } )
|
||||
}
|
||||
@@ -320,30 +294,6 @@ extension String {
|
||||
}
|
||||
}
|
||||
|
||||
struct Stats {
|
||||
var n: Int = 0
|
||||
var s: Double = 0.0
|
||||
var mean: Double = 0.0
|
||||
var variance: Double { return n < 2 ? 0.0 : s / Double(n - 1) }
|
||||
var standardDeviation: Double { return variance.squareRoot() }
|
||||
|
||||
static func collect(_ s: inout Stats, _ x: Int){
|
||||
Stats.runningMeanVariance(&s, Double(x))
|
||||
}
|
||||
|
||||
/// Compute running mean and variance using B. P. Welford's method.
|
||||
///
|
||||
/// See Knuth TAOCP vol 2, 3rd edition, page 232, or
|
||||
/// https://www.johndcook.com/blog/standard_deviation/
|
||||
static func runningMeanVariance(_ stats: inout Stats, _ x: Double){
|
||||
let n = stats.n + 1
|
||||
let (k, m_, s_) = (Double(n), stats.mean, stats.s)
|
||||
let m = m_ + (x - m_) / k
|
||||
let s = s_ + (x - m_) * (x - m)
|
||||
(stats.n, stats.mean, stats.s) = (n, m, s)
|
||||
}
|
||||
}
|
||||
|
||||
#if SWIFT_RUNTIME_ENABLE_LEAK_CHECKER
|
||||
|
||||
@_silgen_name("_swift_leaks_startTrackingObjects")
|
||||
@@ -529,7 +479,7 @@ final class TestRunner {
|
||||
}
|
||||
|
||||
/// Measure the `fn` and return the average sample time per iteration (μs).
|
||||
func measure(_ name: String, fn: (Int) -> Void, numIters: Int) -> Int {
|
||||
func measure(_ name: String, fn: (Int) -> Void, numIters: Int) -> Double {
|
||||
#if SWIFT_RUNTIME_ENABLE_LEAK_CHECKER
|
||||
name.withCString { p in startTrackingObjects(p) }
|
||||
#endif
|
||||
@@ -542,7 +492,7 @@ final class TestRunner {
|
||||
name.withCString { p in stopTrackingObjects(p) }
|
||||
#endif
|
||||
|
||||
return lastSampleTime.microseconds / numIters
|
||||
return Double(lastSampleTime.microseconds) / Double(numIters)
|
||||
}
|
||||
|
||||
func logVerbose(_ msg: @autoclosure () -> String) {
|
||||
@@ -560,9 +510,9 @@ final class TestRunner {
|
||||
}
|
||||
logVerbose("Running \(test.name)")
|
||||
|
||||
var samples: [Int] = []
|
||||
var samples: [Double] = []
|
||||
|
||||
func addSample(_ time: Int) {
|
||||
func addSample(_ time: Double) {
|
||||
logVerbose(" Sample \(samples.count),\(time)")
|
||||
samples.append(time)
|
||||
}
|
||||
@@ -576,11 +526,11 @@ final class TestRunner {
|
||||
}
|
||||
|
||||
// Determine number of iterations for testFn to run for desired time.
|
||||
func iterationsPerSampleTime() -> (numIters: Int, oneIter: Int) {
|
||||
func iterationsPerSampleTime() -> (numIters: Int, oneIter: Double) {
|
||||
let oneIter = measure(test.name, fn: testFn, numIters: 1)
|
||||
if oneIter > 0 {
|
||||
let timePerSample = Int(c.sampleTime * 1_000_000.0) // microseconds (μs)
|
||||
return (max(timePerSample / oneIter, 1), oneIter)
|
||||
let timePerSample = c.sampleTime * 1_000_000.0 // microseconds (μs)
|
||||
return (max(Int(timePerSample / oneIter), 1), oneIter)
|
||||
} else {
|
||||
return (1, oneIter)
|
||||
}
|
||||
@@ -615,77 +565,137 @@ final class TestRunner {
|
||||
test.tearDownFunction?()
|
||||
if let lf = test.legacyFactor {
|
||||
logVerbose(" Applying legacy factor: \(lf)")
|
||||
samples = samples.map { $0 * lf }
|
||||
samples = samples.map { $0 * Double(lf) }
|
||||
}
|
||||
|
||||
return BenchResults(samples, collectMetadata())
|
||||
return BenchResults(samples, collectMetadata(), numIters)
|
||||
}
|
||||
|
||||
var header: String {
|
||||
let withUnit = {$0 + "(μs)"}
|
||||
let withDelta = {"𝚫" + $0}
|
||||
func quantiles(q: Int) -> [String] {
|
||||
// See https://en.wikipedia.org/wiki/Quantile#Specialized_quantiles
|
||||
let prefix = [
|
||||
2: "MEDIAN", 3: "T", 4: "Q", 5: "QU", 6: "S", 7: "O", 10: "D",
|
||||
12: "Dd", 16: "H", 20: "V", 33: "TT", 100: "P", 1000: "Pr"
|
||||
][q, default: "\(q)-q"]
|
||||
let base20 = "0123456789ABCDEFGHIJ".map { String($0) }
|
||||
let index: (Int) -> String =
|
||||
{ q == 2 ? "" : q <= 20 ? base20[$0] : String($0) }
|
||||
let tail = (1..<q).map { prefix + index($0) } + ["MAX"]
|
||||
// QMIN identifies the quantile format, distinct from formats using "MIN"
|
||||
return [withUnit("QMIN")] + tail.map(c.delta ? withDelta : withUnit)
|
||||
func printJSON(index: Int, info: BenchmarkInfo, results: BenchResults?) {
|
||||
// Write the results for a single test as a one-line JSON object
|
||||
// This allows a script to easily consume the results by JSON-decoding
|
||||
// each line separately.
|
||||
|
||||
// To avoid relying on Foundation, construct the JSON naively. This is
|
||||
// actually pretty robust, since almost everything is a number; the only
|
||||
// brittle assumption is that test.name must not have \ or " in it.
|
||||
var out = [
|
||||
"\"number\":\(index)",
|
||||
"\"name\":\"\(info.name)\""
|
||||
]
|
||||
|
||||
if let results = results {
|
||||
let samples = results.samples.sorted().map({$0.description}).joined(separator: ",")
|
||||
out.append("\"samples\":[\(samples)]")
|
||||
out.append("\"iters\":\(results.iters)")
|
||||
if let meta = results.meta {
|
||||
if c.logMemory {
|
||||
out += [
|
||||
"\"max_rss\":\(meta.maxRSS)",
|
||||
"\"pages\":\(meta.pages)",
|
||||
]
|
||||
}
|
||||
if c.logMeta {
|
||||
out += [
|
||||
"\"ics\":\(meta.ics)",
|
||||
"\"yields\":\(meta.yields)",
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
return (
|
||||
["#", "TEST", "SAMPLES"] +
|
||||
(c.quantile.map(quantiles)
|
||||
?? ["MIN", "MAX", "MEAN", "SD", "MEDIAN"].map(withUnit)) +
|
||||
(c.logMemory ? ["MAX_RSS(B)"] : []) +
|
||||
(c.logMeta ? ["PAGES", "ICS", "YIELD"] : [])
|
||||
).joined(separator: c.delim)
|
||||
print("{ " + out.joined(separator: ", ") + " }")
|
||||
fflush(stdout)
|
||||
}
|
||||
|
||||
/// Execute benchmarks and continuously report the measurement results.
|
||||
|
||||
enum Justification {
|
||||
case left, right
|
||||
}
|
||||
func printSpaces(_ width: Int) {
|
||||
for _ in 0..<width {
|
||||
print(" ", terminator: "")
|
||||
}
|
||||
}
|
||||
func printToWidth(_ s: String, width: Int, justify: Justification = .left) {
|
||||
var pad = width - 1 - s.count
|
||||
if pad <= 0 {
|
||||
pad = 1
|
||||
}
|
||||
if justify == .right {
|
||||
printSpaces(pad)
|
||||
}
|
||||
print(s, terminator: "")
|
||||
if justify == .left {
|
||||
printSpaces(pad)
|
||||
}
|
||||
}
|
||||
func printDoubleToWidth(_ d: Double, fractionDigits: Int = 3, width: Int) {
|
||||
let digits = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]
|
||||
// Handle up to 8 fraction digits
|
||||
let scales = [1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000]
|
||||
let scale = scales[fractionDigits]
|
||||
let i = Int(d * Double(scale) + 0.5)
|
||||
let intPart = i / scale
|
||||
let fraction = i % scale
|
||||
var s = intPart.description + "."
|
||||
var f = fraction
|
||||
for _ in 0..<fractionDigits {
|
||||
f *= 10
|
||||
s += digits[(f / scale) % 10]
|
||||
}
|
||||
printToWidth(s, width: width, justify: .right)
|
||||
}
|
||||
|
||||
func printText(index: Int, info: BenchmarkInfo, results: BenchResults?) {
|
||||
printToWidth(index.description, width: 4, justify: .right)
|
||||
printSpaces(1)
|
||||
printToWidth(info.name, width: c.testNameLength)
|
||||
|
||||
if let results = results {
|
||||
printToWidth(String(describing:results.samples.count), width: 10, justify: .right)
|
||||
if results.samples.count > 0 {
|
||||
let sorted = results.samples.sorted()
|
||||
let min = sorted.first!
|
||||
let max = sorted.last!
|
||||
let median = sorted[sorted.count / 2]
|
||||
printDoubleToWidth(min, width: 10)
|
||||
printDoubleToWidth(median, width: 10)
|
||||
printDoubleToWidth(max, width: 10)
|
||||
}
|
||||
}
|
||||
print()
|
||||
fflush(stdout)
|
||||
}
|
||||
|
||||
func printTextHeading() {
|
||||
printToWidth("#", width: 4, justify: .right)
|
||||
printSpaces(1)
|
||||
printToWidth("TEST", width: c.testNameLength, justify: .left)
|
||||
printToWidth("SAMPLES", width: 10, justify: .right)
|
||||
printToWidth("MIN", width: 10, justify: .right)
|
||||
printToWidth("MEDIAN", width: 10, justify: .right)
|
||||
printToWidth("MAX", width: 10, justify: .right)
|
||||
print()
|
||||
}
|
||||
|
||||
/// Run each benchmark and emit the results in JSON
|
||||
func runBenchmarks() {
|
||||
var testCount = 0
|
||||
|
||||
func report(_ index: String, _ t: BenchmarkInfo, results: BenchResults?) {
|
||||
func values(r: BenchResults) -> [String] {
|
||||
func quantiles(q: Int) -> [Int] {
|
||||
let qs = (0...q).map { i in r[Double(i) / Double(q)] }
|
||||
return c.delta ?
|
||||
qs.reduce(into: (encoded: [], last: 0)) {
|
||||
$0.encoded.append($1 - $0.last); $0.last = $1
|
||||
}.encoded : qs
|
||||
}
|
||||
let values: [Int] = [r.sampleCount] +
|
||||
(c.quantile.map(quantiles)
|
||||
?? [r.min, r.max, r.mean, r.sd, r.median]) +
|
||||
(c.logMemory ? [r.meta?.maxRSS].compactMap { $0 } : []) +
|
||||
(c.logMeta ? r.meta.map {
|
||||
[$0.pages, $0.ics, $0.yields] } ?? [] : [])
|
||||
return values.map { String($0) }
|
||||
}
|
||||
let benchmarkStats = (
|
||||
[index, t.name] + (results.map(values) ?? ["Unsupported"])
|
||||
).joined(separator: c.delim)
|
||||
|
||||
print(benchmarkStats)
|
||||
fflush(stdout)
|
||||
|
||||
if (results != nil) {
|
||||
testCount += 1
|
||||
if !c.jsonOutput {
|
||||
printTextHeading()
|
||||
}
|
||||
for (index, info) in c.tests {
|
||||
if c.jsonOutput {
|
||||
printJSON(index: index, info: info, results: run(info))
|
||||
} else {
|
||||
printText(index: index, info: info, results: run(info))
|
||||
}
|
||||
testCount += 1
|
||||
}
|
||||
|
||||
print(header)
|
||||
|
||||
for (index, test) in c.tests {
|
||||
report(index, test, results:run(test))
|
||||
if !c.jsonOutput {
|
||||
print("\nTotal performance tests executed: \(testCount)")
|
||||
}
|
||||
|
||||
print("\nTotal performance tests executed: \(testCount)")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -704,11 +714,18 @@ public func main() {
|
||||
let config = TestConfig(registeredBenchmarks)
|
||||
switch (config.action) {
|
||||
case .listTests:
|
||||
print("#\(config.delim)Test\(config.delim)[Tags]")
|
||||
for (index, t) in config.tests {
|
||||
let testDescription = [index, t.name, t.tags.sorted().description]
|
||||
.joined(separator: config.delim)
|
||||
print(testDescription)
|
||||
if config.jsonOutput {
|
||||
for (index, t) in config.tests {
|
||||
let tags = t.tags.sorted().map({"\"\($0.description)\""}).joined(separator: ",")
|
||||
print("{\"number\":\(index), \"name\":\"\(t.name)\", \"tags\":[\(tags)]}")
|
||||
}
|
||||
} else {
|
||||
print("# Test [Tags]")
|
||||
for (index, t) in config.tests {
|
||||
let testDescription = [index.description, t.name, t.tags.sorted().description]
|
||||
.joined(separator: " ")
|
||||
print(testDescription)
|
||||
}
|
||||
}
|
||||
case .run:
|
||||
if !config.allowNondeterministicHashing && !Hasher.isDeterministic {
|
||||
|
||||
Reference in New Issue
Block a user