mirror of
https://github.com/apple/swift.git
synced 2025-12-14 20:36:38 +01:00
[benchmark] Measure and analyze benchmark runtimes
`BenchmarkDoctor` measures benchmark execution (using `BenchmarkDriver`) and verifies that their runtime stays under 2500 microseconds.
This commit is contained in:
@@ -16,6 +16,7 @@
|
||||
import argparse
|
||||
import glob
|
||||
import logging
|
||||
import math
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
@@ -185,6 +186,11 @@ class BenchmarkDoctor(object):
|
||||
LoggingReportFormatter(use_color=sys.stdout.isatty()))
|
||||
self.log.addHandler(self.console_handler)
|
||||
self.log.debug('Checking tests: %s', ', '.join(self.driver.tests))
|
||||
self.requirements = [
|
||||
self._name_matches_capital_words_convention,
|
||||
self._name_is_at_most_40_chars_long,
|
||||
self._optimized_runtime_in_range,
|
||||
]
|
||||
|
||||
def __del__(self):
|
||||
"""Unregister handler on exit."""
|
||||
@@ -193,10 +199,11 @@ class BenchmarkDoctor(object):
|
||||
capital_words_re = re.compile('[A-Z][a-zA-Z0-9]+')
|
||||
|
||||
@staticmethod
|
||||
def _name_matches_capital_words_convention(test):
|
||||
name = test
|
||||
def _name_matches_capital_words_convention(measurements):
|
||||
name = measurements['name']
|
||||
match = BenchmarkDoctor.capital_words_re.match(name)
|
||||
matched = match.group(0) if match else ''
|
||||
|
||||
if name != matched:
|
||||
BenchmarkDoctor.log_naming.error(
|
||||
"'%s' name doesn't conform to UpperCamelCase convention.",
|
||||
@@ -205,24 +212,101 @@ class BenchmarkDoctor(object):
|
||||
'See http://bit.ly/UpperCamelCase')
|
||||
|
||||
@staticmethod
|
||||
def _name_is_at_most_40_chars_long(test):
|
||||
if len(test) > 40:
|
||||
BenchmarkDoctor.log_naming.warning(
|
||||
"'%s' name is longer than 40 characters.", test)
|
||||
def _name_is_at_most_40_chars_long(measurements):
|
||||
name = measurements['name']
|
||||
|
||||
if len(name) > 40:
|
||||
BenchmarkDoctor.log_naming.error(
|
||||
"'%s' name is %d characters long.", name, len(name))
|
||||
BenchmarkDoctor.log_naming.info(
|
||||
'Benchmark name should not be longer than 40 characters.')
|
||||
|
||||
@staticmethod
|
||||
def _select(measurements, num_iters=None, opt_level='O'):
|
||||
prefix = measurements['name'] + ' ' + opt_level
|
||||
prefix += '' if num_iters is None else (' i' + str(num_iters))
|
||||
return [series for name, series in measurements.items()
|
||||
if name.startswith(prefix)]
|
||||
|
||||
@staticmethod
|
||||
def _optimized_runtime_in_range(measurements):
|
||||
name = measurements['name']
|
||||
runtime = min(BenchmarkDoctor._select(measurements),
|
||||
key=lambda r: r.min).min
|
||||
if 2500 < runtime < 500000:
|
||||
BenchmarkDoctor.log_runtime.warning(
|
||||
"'%s' execution takes at least %d μs.", name, runtime)
|
||||
elif 500000 <= runtime:
|
||||
BenchmarkDoctor.log_runtime.error(
|
||||
"'%s' execution takes at least %d μs.", name, runtime)
|
||||
if 2500 < runtime:
|
||||
factor = int(pow(2, math.ceil(math.log(runtime / 2500.0, 2))))
|
||||
BenchmarkDoctor.log_runtime.info(
|
||||
"Decrease the workload of '%s' by a factor of %d, "
|
||||
"to be less than 2500 μs.", name, factor)
|
||||
|
||||
@staticmethod
|
||||
def _adjusted_1s_samples(runtime):
|
||||
u"""Return sample count that can be taken in approximately 1 second.
|
||||
|
||||
Based on the runtime (μs) of one sample taken with num-iters=1.
|
||||
"""
|
||||
if runtime == 0:
|
||||
return 2
|
||||
s = 1000000 / float(runtime) # samples for 1s run
|
||||
s = int(pow(2, round(math.log(s, 2)))) # rounding to power of 2
|
||||
return s if s > 2 else 2 # always take at least 2 samples
|
||||
|
||||
def measure(self, benchmark):
|
||||
"""Measure benchmark with varying iterations and optimization levels.
|
||||
|
||||
Returns a dictionary with benchmark name and `PerformanceTestResult`s.
|
||||
"""
|
||||
self.log.debug('Calibrating num-samples for {0}:'.format(benchmark))
|
||||
r = self.driver.run(benchmark, num_samples=3, num_iters=1) # calibrate
|
||||
num_samples = self._adjusted_1s_samples(r.min)
|
||||
|
||||
def capped(s):
|
||||
return min(s, 2048)
|
||||
run_args = [(capped(num_samples), 1), (capped(num_samples / 2), 2)]
|
||||
opts = self.driver.args.optimization
|
||||
opts = opts if isinstance(opts, list) else [opts]
|
||||
self.log.debug(
|
||||
'Runtime {0} μs yields {1} adjusted samples per second.'.format(
|
||||
r.min, num_samples))
|
||||
self.log.debug(
|
||||
'Measuring {0}, 5 x i1 ({1} samples), 5 x i2 ({2} samples)'.format(
|
||||
benchmark, run_args[0][0], run_args[1][0]))
|
||||
|
||||
measurements = dict(
|
||||
[('{0} {1} i{2}{3}'.format(benchmark, o, i, suffix),
|
||||
self.driver.run(benchmark, num_samples=s, num_iters=i,
|
||||
measure_memory=True))
|
||||
for o in opts
|
||||
for s, i in run_args
|
||||
for suffix in list('abcde')
|
||||
]
|
||||
)
|
||||
measurements['name'] = benchmark
|
||||
return measurements
|
||||
|
||||
def analyze(self, benchmark_measurements):
|
||||
"""Analyze whether benchmark fullfills all requirtements."""
|
||||
self.log.debug('Analyzing %s', benchmark_measurements['name'])
|
||||
for rule in self.requirements:
|
||||
rule(benchmark_measurements)
|
||||
|
||||
def check(self):
|
||||
checks = [
|
||||
self._name_matches_capital_words_convention,
|
||||
self._name_is_at_most_40_chars_long,
|
||||
]
|
||||
"""Measure and analyse all enabled tests."""
|
||||
for test in self.driver.tests:
|
||||
for check in checks:
|
||||
check(test)
|
||||
self.analyze(self.measure(test))
|
||||
|
||||
@staticmethod
|
||||
def run_check(args):
|
||||
doctor = BenchmarkDoctor(args)
|
||||
doctor.check()
|
||||
# TODO non-zero error code when errors are logged
|
||||
# See https://stackoverflow.com/a/31142078/41307
|
||||
return 0
|
||||
|
||||
|
||||
|
||||
@@ -290,7 +290,7 @@ class BenchmarkDriverMock(Mock):
|
||||
verbose, measure_memory):
|
||||
args = (test, num_samples, num_iters, verbose, measure_memory)
|
||||
self.calls.append(args)
|
||||
return self.respond.get(args, '')
|
||||
return self.respond.get(args, Stub(min=700))
|
||||
|
||||
|
||||
class TestLoggingReportFormatter(unittest.TestCase):
|
||||
@@ -326,6 +326,13 @@ class TestLoggingReportFormatter(unittest.TestCase):
|
||||
self.assertEquals(f.format(lr), 'INFO Hi!')
|
||||
|
||||
|
||||
def _run(test, num_samples=None, num_iters=None, verbose=None,
|
||||
measure_memory=False):
|
||||
"""Helper function that constructs tuple with arguments for run method."""
|
||||
return (
|
||||
test, num_samples, num_iters, verbose, measure_memory)
|
||||
|
||||
|
||||
class TestBenchmarkDoctor(unittest.TestCase):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
@@ -367,6 +374,41 @@ class TestBenchmarkDoctor(unittest.TestCase):
|
||||
self.assertTrue(isinstance(console_handler.formatter,
|
||||
LoggingReportFormatter))
|
||||
|
||||
def test_measure_10_independent_1s_benchmark_series(self):
|
||||
"""Measurement strategy takes 5 i2 and 5 i1 series.
|
||||
|
||||
Num-samples for Benchmark Driver are calibrated to be powers of two,
|
||||
take measurements for approximately 1s
|
||||
based on short initial runtime sampling. Capped at 2k samples.
|
||||
"""
|
||||
driver = BenchmarkDriverMock(tests=['B1'], responses=([
|
||||
# calibration run, returns a stand-in for PerformanceTestResult
|
||||
(_run('B1', num_samples=3, num_iters=1), Stub(min=300))] +
|
||||
# 5x i1 series, with 300 μs runtime its possible to take 4098
|
||||
# samples/s, but it should be capped at 2k
|
||||
([(_run('B1', num_samples=2048, num_iters=1,
|
||||
measure_memory=True), Stub(min=300))] * 5) +
|
||||
# 5x i2 series
|
||||
([(_run('B1', num_samples=2048, num_iters=2,
|
||||
measure_memory=True), Stub(min=300))] * 5)
|
||||
))
|
||||
doctor = BenchmarkDoctor(self.args, driver)
|
||||
with captured_output() as (out, _):
|
||||
measurements = doctor.measure('B1')
|
||||
|
||||
driver.assert_called_all_expected()
|
||||
self.assert_contains(
|
||||
['name',
|
||||
'B1 O i1a', 'B1 O i1b', 'B1 O i1c', 'B1 O i1d', 'B1 O i1e',
|
||||
'B1 O i2a', 'B1 O i2b', 'B1 O i2c', 'B1 O i2d', 'B1 O i2e'],
|
||||
measurements.keys())
|
||||
self.assertEquals(measurements['name'], 'B1')
|
||||
self.assert_contains(
|
||||
['Calibrating num-samples for B1:',
|
||||
'Runtime 300 μs yields 4096 adjusted samples per second.',
|
||||
'Measuring B1, 5 x i1 (2048 samples), 5 x i2 (2048 samples)'],
|
||||
self.logs['debug'])
|
||||
|
||||
def test_benchmark_name_matches_capital_words_conventions(self):
|
||||
driver = BenchmarkDriverMock(tests=[
|
||||
'BenchmarkName', 'CapitalWordsConvention', 'ABBRName',
|
||||
@@ -400,7 +442,45 @@ class TestBenchmarkDoctor(unittest.TestCase):
|
||||
self.assertNotIn('BenchmarkName', output)
|
||||
self.assert_contains(
|
||||
["'ThisTestNameIsTooLongAndCausesOverflowsInReports' name is "
|
||||
"longer than 40 characters."], output)
|
||||
"48 characters long."], self.logs['error'])
|
||||
self.assert_contains(
|
||||
["Benchmark name should not be longer than 40 characters."],
|
||||
self.logs['info'])
|
||||
|
||||
def test_benchmark_runtime_range(self):
|
||||
"""Optimized benchmark should run in less then 2500 μs.
|
||||
|
||||
With runtimes less than 2500 μs there is better than 1:4 chance of
|
||||
being interrupted in the middle of measurement due to elapsed 10 ms
|
||||
quantum used by macos scheduler.
|
||||
|
||||
Warn about longer runtime. Runtimes over half a second are an error.
|
||||
"""
|
||||
def measurements(name, runtime):
|
||||
return {'name': name,
|
||||
name + ' O i1a': Stub(min=runtime + 2),
|
||||
name + ' O i2a': Stub(min=runtime)}
|
||||
|
||||
with captured_output() as (out, _):
|
||||
doctor = BenchmarkDoctor(self.args, BenchmarkDriverMock([]))
|
||||
doctor.analyze(measurements('Cheetah', 200))
|
||||
doctor.analyze(measurements('Hare', 2501))
|
||||
doctor.analyze(measurements('Tortoise', 500000))
|
||||
output = out.getvalue()
|
||||
|
||||
self.assertIn('runtime: ', output)
|
||||
self.assertNotIn('Cheetah', output)
|
||||
self.assert_contains(["'Hare' execution takes at least 2501 μs."],
|
||||
self.logs['warning'])
|
||||
self.assert_contains(
|
||||
["Decrease the workload of 'Hare' by a factor of 2, "
|
||||
"to be less than 2500 μs."], self.logs['info'])
|
||||
self.assert_contains(
|
||||
["'Tortoise' execution takes at least 500000 μs."],
|
||||
self.logs['error'])
|
||||
self.assert_contains(
|
||||
["Decrease the workload of 'Tortoise' by a factor of 256, "
|
||||
"to be less than 2500 μs."], self.logs['info'])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
Reference in New Issue
Block a user