mirror of
https://github.com/apple/swift.git
synced 2025-12-14 20:36:38 +01:00
904 lines
31 KiB
Python
Executable File
904 lines
31 KiB
Python
Executable File
#!/usr/bin/python
|
|
# -*- coding: utf-8 -*-
|
|
|
|
# ===--- compare_perf_tests.py -------------------------------------------===//
|
|
#
|
|
# This source file is part of the Swift.org open source project
|
|
#
|
|
# Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
|
|
# Licensed under Apache License v2.0 with Runtime Library Exception
|
|
#
|
|
# See https://swift.org/LICENSE.txt for license information
|
|
# See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
|
|
#
|
|
# ===---------------------------------------------------------------------===//
|
|
"""
|
|
This script compares performance test logs and issues a formatted report.
|
|
|
|
Invoke `$ compare_perf_tests.py -h ` for complete list of options.
|
|
|
|
class `Sample` is single benchmark measurement.
|
|
class `PerformanceTestSamples` is collection of `Sample`s and their statistics.
|
|
class `PerformanceTestResult` is a summary of performance test execution.
|
|
class `LogParser` converts log files into `PerformanceTestResult`s.
|
|
class `ResultComparison` compares new and old `PerformanceTestResult`s.
|
|
class `TestComparator` analyzes changes betweeen the old and new test results.
|
|
class `ReportFormatter` creates the test comparison report in specified format.
|
|
|
|
"""
|
|
|
|
from __future__ import print_function
|
|
|
|
import argparse
|
|
import functools
|
|
import re
|
|
import sys
|
|
from bisect import bisect, bisect_left, bisect_right
|
|
from collections import namedtuple
|
|
from math import ceil, sqrt
|
|
|
|
|
|
class Sample(namedtuple("Sample", "i num_iters runtime")):
|
|
u"""Single benchmark measurement.
|
|
|
|
Initialized with:
|
|
`i`: ordinal number of the sample taken,
|
|
`num-num_iters`: number or iterations used to compute it,
|
|
`runtime`: in microseconds (μs).
|
|
"""
|
|
|
|
def __repr__(self):
|
|
"""Shorter Sample formatting for debugging purposes."""
|
|
return "s({0.i!r}, {0.num_iters!r}, {0.runtime!r})".format(self)
|
|
|
|
|
|
class Yield(namedtuple("Yield", "before_sample after")):
|
|
u"""Meta-measurement of when the Benchmark_X voluntarily yielded process.
|
|
|
|
`before_sample`: index of measurement taken just after returning from yield
|
|
`after`: time elapsed since the previous yield in microseconds (μs)
|
|
"""
|
|
|
|
|
|
class PerformanceTestSamples(object):
|
|
"""Collection of runtime samples from the benchmark execution.
|
|
|
|
Computes the sample population statistics.
|
|
"""
|
|
|
|
def __init__(self, name, samples=None):
|
|
"""Initialize with benchmark name and optional list of Samples."""
|
|
self.name = name # Name of the performance test
|
|
self.samples = []
|
|
self.outliers = []
|
|
self._runtimes = []
|
|
self.mean = 0.0
|
|
self.S_runtime = 0.0 # For computing running variance
|
|
for sample in samples or []:
|
|
self.add(sample)
|
|
|
|
def __str__(self):
|
|
"""Text summary of benchmark statistics."""
|
|
return (
|
|
"{0.name!s} n={0.count!r} "
|
|
"Min={0.min!r} Q1={0.q1!r} M={0.median!r} Q3={0.q3!r} "
|
|
"Max={0.max!r} "
|
|
"R={0.range!r} {0.spread:.2%} IQR={0.iqr!r} "
|
|
"Mean={0.mean:.0f} SD={0.sd:.0f} CV={0.cv:.2%}".format(self)
|
|
if self.samples
|
|
else "{0.name!s} n=0".format(self)
|
|
)
|
|
|
|
def add(self, sample):
|
|
"""Add sample to collection and recompute statistics."""
|
|
assert isinstance(sample, Sample)
|
|
self._update_stats(sample)
|
|
i = bisect(self._runtimes, sample.runtime)
|
|
self._runtimes.insert(i, sample.runtime)
|
|
self.samples.insert(i, sample)
|
|
|
|
def _update_stats(self, sample):
|
|
old_stats = (self.count, self.mean, self.S_runtime)
|
|
_, self.mean, self.S_runtime = self.running_mean_variance(
|
|
old_stats, sample.runtime
|
|
)
|
|
|
|
def exclude_outliers(self, top_only=False):
|
|
"""Exclude outliers by applying Interquartile Range Rule.
|
|
|
|
Moves the samples outside of the inner fences
|
|
(Q1 - 1.5*IQR and Q3 + 1.5*IQR) into outliers list and recomputes
|
|
statistics for the remaining sample population. Optionally apply
|
|
only the top inner fence, preserving the small outliers.
|
|
|
|
Experimentally, this rule seems to perform well-enough on the
|
|
benchmark runtimes in the microbenchmark range to filter out
|
|
the environment noise caused by preemtive multitasking.
|
|
"""
|
|
lo = (
|
|
0
|
|
if top_only
|
|
else bisect_left(self._runtimes, int(self.q1 - 1.5 * self.iqr))
|
|
)
|
|
hi = bisect_right(self._runtimes, int(self.q3 + 1.5 * self.iqr))
|
|
|
|
outliers = self.samples[:lo] + self.samples[hi:]
|
|
samples = self.samples[lo:hi]
|
|
|
|
self.__init__(self.name) # re-initialize
|
|
for sample in samples: # and
|
|
self.add(sample) # re-compute stats
|
|
self.outliers = outliers
|
|
|
|
@property
|
|
def count(self):
|
|
"""Number of samples used to compute the statistics."""
|
|
return len(self.samples)
|
|
|
|
@property
|
|
def num_samples(self):
|
|
"""Number of all samples in the collection."""
|
|
return len(self.samples) + len(self.outliers)
|
|
|
|
@property
|
|
def all_samples(self):
|
|
"""List of all samples in ascending order."""
|
|
return sorted(self.samples + self.outliers, key=lambda s: s.i or -1)
|
|
|
|
@property
|
|
def min(self):
|
|
"""Minimum sampled value."""
|
|
return self.samples[0].runtime
|
|
|
|
@property
|
|
def max(self):
|
|
"""Maximum sampled value."""
|
|
return self.samples[-1].runtime
|
|
|
|
def quantile(self, q):
|
|
"""Return runtime for given quantile.
|
|
|
|
Equivalent to quantile estimate type R-1, SAS-3. See:
|
|
https://en.wikipedia.org/wiki/Quantile#Estimating_quantiles_from_a_sample
|
|
"""
|
|
index = max(0, int(ceil(self.count * float(q))) - 1)
|
|
return self.samples[index].runtime
|
|
|
|
@property
|
|
def median(self):
|
|
"""Median sampled value."""
|
|
return self.quantile(0.5)
|
|
|
|
@property
|
|
def q1(self):
|
|
"""First Quartile (25th Percentile)."""
|
|
return self.quantile(0.25)
|
|
|
|
@property
|
|
def q3(self):
|
|
"""Third Quartile (75th Percentile)."""
|
|
return self.quantile(0.75)
|
|
|
|
@property
|
|
def iqr(self):
|
|
"""Interquartile Range."""
|
|
return self.q3 - self.q1
|
|
|
|
@property
|
|
def sd(self):
|
|
u"""Standard Deviation (μs)."""
|
|
return 0 if self.count < 2 else sqrt(self.S_runtime / (self.count - 1))
|
|
|
|
@staticmethod
|
|
def running_mean_variance(stats, x):
|
|
"""Compute running variance, B. P. Welford's method.
|
|
|
|
See Knuth TAOCP vol 2, 3rd edition, page 232, or
|
|
https://www.johndcook.com/blog/standard_deviation/
|
|
M is mean, Standard Deviation is defined as sqrt(S/k-1)
|
|
"""
|
|
|
|
(k, M_, S_) = stats
|
|
|
|
k = float(k + 1)
|
|
M = M_ + (x - M_) / k
|
|
S = S_ + (x - M_) * (x - M)
|
|
return (k, M, S)
|
|
|
|
@property
|
|
def cv(self):
|
|
"""Coeficient of Variation (%)."""
|
|
return (self.sd / self.mean) if self.mean else 0
|
|
|
|
@property
|
|
def range(self):
|
|
"""Range of samples values (Max - Min)."""
|
|
return self.max - self.min
|
|
|
|
@property
|
|
def spread(self):
|
|
"""Sample Spread; i.e. Range as (%) of Min."""
|
|
return self.range / float(self.min) if self.min else 0
|
|
|
|
|
|
class PerformanceTestResult(object):
|
|
u"""Result from executing an individual Swift Benchmark Suite benchmark.
|
|
|
|
Reported by the test driver (Benchmark_O, Benchmark_Onone, Benchmark_Osize
|
|
or Benchmark_Driver).
|
|
|
|
It suppors 2 log formats emitted by the test driver. Legacy format with
|
|
statistics for normal distribution (MEAN, SD):
|
|
#,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs),MAX_RSS(B)
|
|
And new quantiles format with variable number of columns:
|
|
#,TEST,SAMPLES,MIN(μs),MEDIAN(μs),MAX(μs)
|
|
#,TEST,SAMPLES,MIN(μs),Q1(μs),Q2(μs),Q3(μs),MAX(μs),MAX_RSS(B)
|
|
The number of columns between MIN and MAX depends on the test driver's
|
|
`--quantile`parameter. In both cases, the last column, MAX_RSS is optional.
|
|
"""
|
|
|
|
def __init__(self, csv_row, quantiles=False, memory=False, delta=False, meta=False):
|
|
"""Initialize from a row of multiple columns with benchmark summary.
|
|
|
|
The row is an iterable, such as a row provided by the CSV parser.
|
|
"""
|
|
self.test_num = csv_row[0] # Ordinal number of the test
|
|
self.name = csv_row[1] # Name of the performance test
|
|
self.num_samples = int(csv_row[2]) # Number of measurements taken
|
|
|
|
if quantiles: # Variable number of columns representing quantiles
|
|
mem_index = (-1 if memory else 0) + (-3 if meta else 0)
|
|
runtimes = csv_row[3:mem_index] if memory or meta else csv_row[3:]
|
|
if delta:
|
|
runtimes = [int(x) if x else 0 for x in runtimes]
|
|
runtimes = functools.reduce(
|
|
lambda l, x: l.append(l[-1] + x) or l if l else [x], # runnin
|
|
runtimes,
|
|
None,
|
|
) # total
|
|
num_values = len(runtimes)
|
|
if self.num_samples < num_values: # remove repeated samples
|
|
quantile = num_values - 1
|
|
qs = [float(i) / float(quantile) for i in range(0, num_values)]
|
|
indices = [
|
|
max(0, int(ceil(self.num_samples * float(q))) - 1) for q in qs
|
|
]
|
|
runtimes = [
|
|
runtimes[indices.index(i)] for i in range(0, self.num_samples)
|
|
]
|
|
|
|
self.samples = PerformanceTestSamples(
|
|
self.name, [Sample(None, None, int(runtime)) for runtime in runtimes]
|
|
)
|
|
self.samples.exclude_outliers(top_only=True)
|
|
sams = self.samples
|
|
self.min, self.max, self.median, self.mean, self.sd = (
|
|
sams.min,
|
|
sams.max,
|
|
sams.median,
|
|
sams.mean,
|
|
sams.sd,
|
|
)
|
|
self.max_rss = ( # Maximum Resident Set Size (B)
|
|
int(csv_row[mem_index]) if memory else None
|
|
)
|
|
else: # Legacy format with statistics for normal distribution.
|
|
self.min = int(csv_row[3]) # Minimum runtime (μs)
|
|
self.max = int(csv_row[4]) # Maximum runtime (μs)
|
|
self.mean = float(csv_row[5]) # Mean (average) runtime (μs)
|
|
self.sd = float(csv_row[6]) # Standard Deviation (μs)
|
|
self.median = int(csv_row[7]) # Median runtime (μs)
|
|
self.max_rss = ( # Maximum Resident Set Size (B)
|
|
int(csv_row[8]) if len(csv_row) > 8 else None
|
|
)
|
|
self.samples = None
|
|
|
|
# Optional measurement metadata. The number of:
|
|
# memory pages used, involuntary context switches and voluntary yields
|
|
self.mem_pages, self.involuntary_cs, self.yield_count = (
|
|
[int(x) for x in csv_row[-3:]] if meta else (None, None, None)
|
|
)
|
|
self.yields = None
|
|
self.setup = None
|
|
|
|
def __repr__(self):
|
|
"""Short summary for debugging purposes."""
|
|
return (
|
|
"<PerformanceTestResult name:{0.name!r} "
|
|
"samples:{0.num_samples!r} min:{0.min!r} max:{0.max!r} "
|
|
"mean:{0.mean:.0f} sd:{0.sd:.0f} median:{0.median!r}>".format(self)
|
|
)
|
|
|
|
def merge(self, r):
|
|
"""Merge two results.
|
|
|
|
Recomputes min, max and mean statistics. If all `samples` are
|
|
avaliable, it recomputes all the statistics.
|
|
The use case here is comparing test results parsed from concatenated
|
|
log files from multiple runs of benchmark driver.
|
|
"""
|
|
# Statistics
|
|
if self.samples and r.samples:
|
|
for sample in r.samples.samples:
|
|
self.samples.add(sample)
|
|
sams = self.samples
|
|
self.num_samples = sams.num_samples
|
|
self.min, self.max, self.median, self.mean, self.sd = (
|
|
sams.min,
|
|
sams.max,
|
|
sams.median,
|
|
sams.mean,
|
|
sams.sd,
|
|
)
|
|
else:
|
|
self.min = min(self.min, r.min)
|
|
self.max = max(self.max, r.max)
|
|
self.mean = ( # pooled mean is the weighted sum of means
|
|
(self.mean * self.num_samples) + (r.mean * r.num_samples)
|
|
) / float(self.num_samples + r.num_samples)
|
|
self.num_samples += r.num_samples
|
|
self.median, self.sd = None, None
|
|
|
|
# Metadata
|
|
def minimum(a, b): # work around None being less than everything
|
|
return min(filter(lambda x: x is not None, [a, b])) if any([a, b]) else None
|
|
|
|
self.max_rss = minimum(self.max_rss, r.max_rss)
|
|
self.setup = minimum(self.setup, r.setup)
|
|
|
|
|
|
class ResultComparison(object):
|
|
"""ResultComparison compares MINs from new and old PerformanceTestResult.
|
|
|
|
It computes speedup ratio and improvement delta (%).
|
|
"""
|
|
|
|
def __init__(self, old, new):
|
|
"""Initialize with old and new `PerformanceTestResult`s to compare."""
|
|
self.old = old
|
|
self.new = new
|
|
assert old.name == new.name
|
|
self.name = old.name # Test name, convenience accessor
|
|
|
|
# Speedup ratio
|
|
self.ratio = (old.min + 0.001) / (new.min + 0.001)
|
|
|
|
# Test runtime improvement in %
|
|
ratio = (new.min + 0.001) / (old.min + 0.001)
|
|
self.delta = (ratio - 1) * 100
|
|
|
|
# Indication of dubious changes: when result's MIN falls inside the
|
|
# (MIN, MAX) interval of result they are being compared with.
|
|
self.is_dubious = (old.min < new.min and new.min < old.max) or (
|
|
new.min < old.min and old.min < new.max
|
|
)
|
|
|
|
|
|
class LogParser(object):
|
|
"""Converts log outputs into `PerformanceTestResult`s.
|
|
|
|
Supports various formats produced by the `Benchmark_Driver` and
|
|
`Benchmark_O`('Onone', 'Osize'). It can also merge together the
|
|
results from concatenated log files.
|
|
"""
|
|
|
|
def __init__(self):
|
|
"""Create instance of `LogParser`."""
|
|
self.results = []
|
|
self.quantiles, self.delta, self.memory = False, False, False
|
|
self.meta = False
|
|
self._reset()
|
|
|
|
def _reset(self):
|
|
"""Reset parser to the default state for reading a new result."""
|
|
self.samples, self.yields, self.num_iters = [], [], 1
|
|
self.setup, self.max_rss, self.mem_pages = None, None, None
|
|
self.voluntary_cs, self.involuntary_cs = None, None
|
|
|
|
# Parse lines like this
|
|
# #,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs)
|
|
results_re = re.compile(
|
|
r"( *\d+[, \t]+[\w.\-\?!]+[, \t]+"
|
|
+ r"[, \t]+".join([r"\d+"] * 2) # #,TEST
|
|
+ r"(?:[, \t]+\d*)*)" # at least 2...
|
|
) # ...or more numeric columns
|
|
|
|
def _append_result(self, result):
|
|
columns = result.split(",") if "," in result else result.split()
|
|
r = PerformanceTestResult(
|
|
columns,
|
|
quantiles=self.quantiles,
|
|
memory=self.memory,
|
|
delta=self.delta,
|
|
meta=self.meta,
|
|
)
|
|
r.setup = self.setup
|
|
r.max_rss = r.max_rss or self.max_rss
|
|
r.mem_pages = r.mem_pages or self.mem_pages
|
|
r.voluntary_cs = self.voluntary_cs
|
|
r.involuntary_cs = r.involuntary_cs or self.involuntary_cs
|
|
if self.samples:
|
|
r.samples = PerformanceTestSamples(r.name, self.samples)
|
|
r.samples.exclude_outliers()
|
|
self.results.append(r)
|
|
r.yields = self.yields or None
|
|
self._reset()
|
|
|
|
def _store_memory_stats(self, max_rss, mem_pages):
|
|
self.max_rss = int(max_rss)
|
|
self.mem_pages = int(mem_pages)
|
|
|
|
def _configure_format(self, header):
|
|
self.quantiles = "MEAN" not in header
|
|
self.memory = "MAX_RSS" in header
|
|
self.meta = "PAGES" in header
|
|
self.delta = "𝚫" in header
|
|
|
|
# Regular expression and action to take when it matches the parsed line
|
|
state_actions = {
|
|
results_re: _append_result,
|
|
# Verbose mode adds new productions:
|
|
# Adaptively determined N; test loop multiple adjusting runtime to ~1s
|
|
re.compile(r"\s+Measuring with scale (\d+)."): (
|
|
lambda self, num_iters: setattr(self, "num_iters", num_iters)
|
|
),
|
|
re.compile(r"\s+Sample (\d+),(\d+)"): (
|
|
lambda self, i, runtime: self.samples.append(
|
|
Sample(int(i), int(self.num_iters), int(runtime))
|
|
)
|
|
),
|
|
re.compile(r"\s+SetUp (\d+)"): (
|
|
lambda self, setup: setattr(self, "setup", int(setup))
|
|
),
|
|
re.compile(r"\s+Yielding after ~(\d+) μs"): (
|
|
lambda self, since_last_yield: self.yields.append(
|
|
Yield(len(self.samples), int(since_last_yield))
|
|
)
|
|
),
|
|
re.compile(r"( *#[, \t]+TEST[, \t]+SAMPLES[, \t]+MIN.*)"): _configure_format,
|
|
# Environmental statistics: memory usage and context switches
|
|
re.compile(
|
|
r"\s+MAX_RSS \d+ - \d+ = (\d+) \((\d+) pages\)"
|
|
): _store_memory_stats,
|
|
re.compile(r"\s+VCS \d+ - \d+ = (\d+)"): (
|
|
lambda self, vcs: setattr(self, "voluntary_cs", int(vcs))
|
|
),
|
|
re.compile(r"\s+ICS \d+ - \d+ = (\d+)"): (
|
|
lambda self, ics: setattr(self, "involuntary_cs", int(ics))
|
|
),
|
|
}
|
|
|
|
def parse_results(self, lines):
|
|
"""Parse results from the lines of the log output from Benchmark*.
|
|
|
|
Returns a list of `PerformanceTestResult`s.
|
|
"""
|
|
for line in lines:
|
|
for regexp, action in LogParser.state_actions.items():
|
|
match = regexp.match(line)
|
|
if match:
|
|
action(self, *match.groups())
|
|
break # stop after 1st match
|
|
else: # If none matches, skip the line.
|
|
# print('skipping: ' + line.rstrip('\n'))
|
|
continue
|
|
return self.results
|
|
|
|
@staticmethod
|
|
def _results_from_lines(lines):
|
|
tests = LogParser().parse_results(lines)
|
|
|
|
def add_or_merge(names, r):
|
|
if r.name not in names:
|
|
names[r.name] = r
|
|
else:
|
|
names[r.name].merge(r)
|
|
return names
|
|
|
|
return functools.reduce(add_or_merge, tests, dict())
|
|
|
|
@staticmethod
|
|
def results_from_string(log_contents):
|
|
"""Parse `PerformanceTestResult`s from the supplied string.
|
|
|
|
Returns dictionary of test names and `PerformanceTestResult`s.
|
|
"""
|
|
return LogParser._results_from_lines(log_contents.splitlines())
|
|
|
|
@staticmethod
|
|
def results_from_file(log_file):
|
|
"""Parse `PerformanceTestResult`s from the log file.
|
|
|
|
Returns dictionary of test names and `PerformanceTestResult`s.
|
|
"""
|
|
with open(log_file) as f:
|
|
return LogParser._results_from_lines(f.readlines())
|
|
|
|
|
|
class TestComparator(object):
|
|
"""Analyzes changes betweeen the old and new test results.
|
|
|
|
It determines which tests were `added`, `removed` and which can be
|
|
compared. It then splits the `ResultComparison`s into 3 groups according to
|
|
the `delta_threshold` by the change in performance: `increased`,
|
|
`descreased` and `unchanged`. Whole computaion is performed during
|
|
initialization and results are provided as properties on this object.
|
|
|
|
The lists of `added`, `removed` and `unchanged` tests are sorted
|
|
alphabetically. The `increased` and `decreased` lists are sorted in
|
|
descending order by the amount of change.
|
|
"""
|
|
|
|
def __init__(self, old_results, new_results, delta_threshold):
|
|
"""Initialize with dictionaries of old and new benchmark results.
|
|
|
|
Dictionary keys are benchmark names, values are
|
|
`PerformanceTestResult`s.
|
|
"""
|
|
old_tests = set(old_results.keys())
|
|
new_tests = set(new_results.keys())
|
|
comparable_tests = new_tests.intersection(old_tests)
|
|
added_tests = new_tests.difference(old_tests)
|
|
removed_tests = old_tests.difference(new_tests)
|
|
|
|
self.added = sorted([new_results[t] for t in added_tests], key=lambda r: r.name)
|
|
self.removed = sorted(
|
|
[old_results[t] for t in removed_tests], key=lambda r: r.name
|
|
)
|
|
|
|
def compare(name):
|
|
return ResultComparison(old_results[name], new_results[name])
|
|
|
|
comparisons = list(map(compare, comparable_tests))
|
|
|
|
def partition(items, p):
|
|
return functools.reduce(
|
|
lambda x, y: x[not p(y)].append(y) or x, items, ([], [])
|
|
)
|
|
|
|
decreased, not_decreased = partition(
|
|
comparisons, lambda c: c.ratio < (1 - delta_threshold)
|
|
)
|
|
increased, unchanged = partition(
|
|
not_decreased, lambda c: c.ratio > (1 + delta_threshold)
|
|
)
|
|
|
|
# sorted partitions
|
|
names = [c.name for c in comparisons]
|
|
comparisons = dict(zip(names, comparisons))
|
|
self.decreased = [
|
|
comparisons[c.name] for c in sorted(decreased, key=lambda c: -c.delta)
|
|
]
|
|
self.increased = [
|
|
comparisons[c.name] for c in sorted(increased, key=lambda c: c.delta)
|
|
]
|
|
self.unchanged = [
|
|
comparisons[c.name] for c in sorted(unchanged, key=lambda c: c.name)
|
|
]
|
|
|
|
|
|
class ReportFormatter(object):
|
|
"""Creates the report from perfromance test comparison in specified format.
|
|
|
|
`ReportFormatter` formats the `PerformanceTestResult`s and
|
|
`ResultComparison`s provided by `TestComparator` into report table.
|
|
Supported formats are: `markdown` (used for displaying benchmark results on
|
|
GitHub), `git` and `html`.
|
|
"""
|
|
|
|
def __init__(self, comparator, changes_only, single_table=False):
|
|
"""Initialize with `TestComparator` and names of branches."""
|
|
self.comparator = comparator
|
|
self.changes_only = changes_only
|
|
self.single_table = single_table
|
|
|
|
PERFORMANCE_TEST_RESULT_HEADER = ("TEST", "MIN", "MAX", "MEAN", "MAX_RSS")
|
|
RESULT_COMPARISON_HEADER = ("TEST", "OLD", "NEW", "DELTA", "RATIO")
|
|
|
|
@staticmethod
|
|
def header_for(result):
|
|
"""Column labels for header row in results table."""
|
|
return (
|
|
ReportFormatter.PERFORMANCE_TEST_RESULT_HEADER
|
|
if isinstance(result, PerformanceTestResult)
|
|
else
|
|
# isinstance(result, ResultComparison)
|
|
ReportFormatter.RESULT_COMPARISON_HEADER
|
|
)
|
|
|
|
@staticmethod
|
|
def values(result):
|
|
"""Format values from PerformanceTestResult or ResultComparison.
|
|
|
|
Returns tuple of strings to display in the results table.
|
|
"""
|
|
return (
|
|
(
|
|
result.name,
|
|
str(result.min),
|
|
str(result.max),
|
|
str(int(result.mean)),
|
|
str(result.max_rss) if result.max_rss else "—",
|
|
)
|
|
if isinstance(result, PerformanceTestResult)
|
|
else
|
|
# isinstance(result, ResultComparison)
|
|
(
|
|
result.name,
|
|
str(result.old.min),
|
|
str(result.new.min),
|
|
"{0:+.1f}%".format(result.delta),
|
|
"{0:.2f}x{1}".format(result.ratio, " (?)" if result.is_dubious else ""),
|
|
)
|
|
)
|
|
|
|
def markdown(self):
|
|
"""Report results of benchmark comparisons in Markdown format."""
|
|
return self._formatted_text(
|
|
label_formatter=lambda s: ("**" + s + "**"),
|
|
COLUMN_SEPARATOR=" | ",
|
|
DELIMITER_ROW=([":---"] + ["---:"] * 4),
|
|
SEPARATOR=" | | | | \n",
|
|
SECTION="""
|
|
<details {3}>
|
|
<summary>{0} ({1})</summary>
|
|
{2}
|
|
</details>
|
|
""",
|
|
)
|
|
|
|
def git(self):
|
|
"""Report results of benchmark comparisons in 'git' format."""
|
|
return self._formatted_text(
|
|
label_formatter=lambda s: s.upper(),
|
|
COLUMN_SEPARATOR=" ",
|
|
DELIMITER_ROW=None,
|
|
SEPARATOR="\n",
|
|
SECTION="""
|
|
{0} ({1}): \n{2}""",
|
|
)
|
|
|
|
def _column_widths(self):
|
|
changed = self.comparator.decreased + self.comparator.increased
|
|
results = changed if self.changes_only else changed + self.comparator.unchanged
|
|
results += self.comparator.added + self.comparator.removed
|
|
|
|
widths = [
|
|
map(len, columns)
|
|
for columns in [
|
|
ReportFormatter.PERFORMANCE_TEST_RESULT_HEADER,
|
|
ReportFormatter.RESULT_COMPARISON_HEADER,
|
|
]
|
|
+ [ReportFormatter.values(r) for r in results]
|
|
]
|
|
|
|
def max_widths(maximum, widths):
|
|
return map(max, zip(maximum, widths))
|
|
|
|
return list(functools.reduce(max_widths, widths, [0] * 5))
|
|
|
|
def _formatted_text(
|
|
self, label_formatter, COLUMN_SEPARATOR, DELIMITER_ROW, SEPARATOR, SECTION
|
|
):
|
|
widths = self._column_widths()
|
|
self.header_printed = False
|
|
|
|
def justify_columns(contents):
|
|
return [c.ljust(w) for w, c in zip(widths, contents)]
|
|
|
|
def row(contents):
|
|
return (
|
|
""
|
|
if not contents
|
|
else COLUMN_SEPARATOR.join(justify_columns(contents)) + "\n"
|
|
)
|
|
|
|
def header(title, column_labels):
|
|
labels = (
|
|
column_labels
|
|
if not self.single_table
|
|
else map(label_formatter, (title,) + column_labels[1:])
|
|
)
|
|
h = (
|
|
("" if not self.header_printed else SEPARATOR)
|
|
+ row(labels)
|
|
+ (row(DELIMITER_ROW) if not self.header_printed else "")
|
|
)
|
|
if self.single_table and not self.header_printed:
|
|
self.header_printed = True
|
|
return h
|
|
|
|
def format_columns(r, is_strong):
|
|
return r if not is_strong else r[:-1] + ("**" + r[-1] + "**",)
|
|
|
|
def table(title, results, is_strong=False, is_open=False):
|
|
if not results:
|
|
return ""
|
|
rows = [
|
|
row(format_columns(ReportFormatter.values(r), is_strong))
|
|
for r in results
|
|
]
|
|
table = header(
|
|
title if self.single_table else "",
|
|
ReportFormatter.header_for(results[0]),
|
|
) + "".join(rows)
|
|
return (
|
|
table
|
|
if self.single_table
|
|
else SECTION.format(
|
|
title, len(results), table, "open" if is_open else ""
|
|
)
|
|
)
|
|
|
|
return "\n" + "".join(
|
|
[
|
|
table("Regression", self.comparator.decreased, True, True),
|
|
table("Improvement", self.comparator.increased, True),
|
|
(
|
|
""
|
|
if self.changes_only
|
|
else table("No Changes", self.comparator.unchanged)
|
|
),
|
|
table("Added", self.comparator.added, is_open=True),
|
|
table("Removed", self.comparator.removed, is_open=True),
|
|
]
|
|
)
|
|
|
|
HTML = """
|
|
<!DOCTYPE html>
|
|
<html>
|
|
<head>
|
|
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
|
<style>
|
|
body {{ font-family: -apple-system, sans-serif; font-size: 14px; }}
|
|
table {{ border-spacing: 2px; border-color: gray; border-spacing: 0;
|
|
border-collapse: collapse; }}
|
|
table tr {{ background-color: #fff; border-top: 1px solid #c6cbd1; }}
|
|
table th, table td {{ padding: 6px 13px; border: 1px solid #dfe2e5; }}
|
|
th {{ text-align: center; padding-top: 130px; }}
|
|
td {{ text-align: right; }}
|
|
table td:first-child {{ text-align: left; }}
|
|
tr:nth-child(even) {{ background-color: #000000; }}
|
|
tr:nth-child(2n) {{ background-color: #f6f8fa; }}
|
|
</style>
|
|
</head>
|
|
<body>
|
|
<table>
|
|
{0}
|
|
</table>
|
|
</body>
|
|
</html>"""
|
|
|
|
HTML_HEADER_ROW = """
|
|
<tr>
|
|
<th align='left'>{0} ({1})</th>
|
|
<th align='left'>{2}</th>
|
|
<th align='left'>{3}</th>
|
|
<th align='left'>{4}</th>
|
|
<th align='left'>{5}</th>
|
|
</tr>
|
|
"""
|
|
|
|
HTML_ROW = """
|
|
<tr>
|
|
<td align='left'>{0}</td>
|
|
<td align='left'>{1}</td>
|
|
<td align='left'>{2}</td>
|
|
<td align='left'>{3}</td>
|
|
<td align='left'><font color='{4}'>{5}</font></td>
|
|
</tr>
|
|
"""
|
|
|
|
def html(self):
|
|
"""Report results of benchmark comparisons in HTML format."""
|
|
|
|
def row(name, old, new, delta, speedup, speedup_color):
|
|
return self.HTML_ROW.format(name, old, new, delta, speedup_color, speedup)
|
|
|
|
def header(contents):
|
|
return self.HTML_HEADER_ROW.format(*contents)
|
|
|
|
def table(title, results, speedup_color):
|
|
rows = [
|
|
row(*(ReportFormatter.values(r) + (speedup_color,))) for r in results
|
|
]
|
|
return (
|
|
""
|
|
if not rows
|
|
else header(
|
|
(title, len(results)) + ReportFormatter.header_for(results[0])[1:]
|
|
)
|
|
+ "".join(rows)
|
|
)
|
|
|
|
return self.HTML.format(
|
|
"".join(
|
|
[
|
|
table("Regression", self.comparator.decreased, "red"),
|
|
table("Improvement", self.comparator.increased, "green"),
|
|
(
|
|
""
|
|
if self.changes_only
|
|
else table("No Changes", self.comparator.unchanged, "black")
|
|
),
|
|
table("Added", self.comparator.added, ""),
|
|
table("Removed", self.comparator.removed, ""),
|
|
]
|
|
)
|
|
)
|
|
|
|
|
|
def parse_args(args):
|
|
"""Parse command line arguments and set default values."""
|
|
parser = argparse.ArgumentParser(description="Compare Performance tests.")
|
|
parser.add_argument(
|
|
"--old-file", help="Baseline performance test suite (csv file)", required=True
|
|
)
|
|
parser.add_argument(
|
|
"--new-file", help="New performance test suite (csv file)", required=True
|
|
)
|
|
parser.add_argument(
|
|
"--format",
|
|
choices=["markdown", "git", "html"],
|
|
help="Output format. Default is markdown.",
|
|
default="markdown",
|
|
)
|
|
parser.add_argument("--output", help="Output file name")
|
|
parser.add_argument(
|
|
"--changes-only", help="Output only affected tests", action="store_true"
|
|
)
|
|
parser.add_argument(
|
|
"--single-table",
|
|
help="Combine data in a single table in git and markdown formats",
|
|
action="store_true",
|
|
)
|
|
parser.add_argument(
|
|
"--delta-threshold",
|
|
help="Delta threshold. Default 0.05.",
|
|
type=float,
|
|
default=0.05,
|
|
)
|
|
return parser.parse_args(args)
|
|
|
|
|
|
def create_report(
|
|
old_results,
|
|
new_results,
|
|
delta_threshold,
|
|
format,
|
|
changes_only=True,
|
|
single_table=True,
|
|
):
|
|
comparator = TestComparator(old_results, new_results, delta_threshold)
|
|
formatter = ReportFormatter(comparator, changes_only, single_table)
|
|
formats = {
|
|
"markdown": formatter.markdown,
|
|
"git": formatter.git,
|
|
"html": formatter.html,
|
|
}
|
|
|
|
report = formats[format]()
|
|
return report
|
|
|
|
|
|
def main():
|
|
"""Compare benchmarks for changes in a formatted report."""
|
|
args = parse_args(sys.argv[1:])
|
|
report = create_report(
|
|
LogParser.results_from_file(args.old_file),
|
|
LogParser.results_from_file(args.new_file),
|
|
args.delta_threshold,
|
|
args.format,
|
|
args.changes_only,
|
|
args.single_table,
|
|
)
|
|
print(report)
|
|
|
|
if args.output:
|
|
with open(args.output, "w") as f:
|
|
f.write(report)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|