Files
swift-mirror/benchmark/scripts/Benchmark_Driver
Pavol Vaskovic a84db83062 [benchmark] BenchmarkDriver can run tests
The `run` method on `BenchmarkDriver` invokes the test harness with specified number of iterations, samples. It supports mesuring memory use and in the verbose mode it also collects individual samples and monitors the system load by counting the number of voluntary and involuntary context switches.

Output is parsed using `LogParser` from `compare_perf_tests.py`. This makes that file a required dependency for the driver, therefore it is also copied to the bin directory during the build.
2018-08-17 08:39:50 +02:00

439 lines
16 KiB
Python
Executable File

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# ===--- Benchmark_Driver ------------------------------------------------===//
#
# This source file is part of the Swift.org open source project
#
# Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
# Licensed under Apache License v2.0 with Runtime Library Exception
#
# See https://swift.org/LICENSE.txt for license information
# See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
#
# ===---------------------------------------------------------------------===//
import argparse
import glob
import os
import re
import subprocess
import sys
import time
from compare_perf_tests import LogParser
DRIVER_DIR = os.path.dirname(os.path.realpath(__file__))
class BenchmarkDriver(object):
"""Executes tests from Swift Benchmark Suite."""
def __init__(self, args, tests=None, _subprocess=None, parser=None):
"""Initialized with command line arguments.
Optional parameters for injecting dependencies; used for testing.
"""
self.args = args
self._subprocess = _subprocess or subprocess
self.all_tests = []
self.tests = tests or self._get_tests()
self.parser = parser or LogParser()
self.results = {}
def _invoke(self, cmd):
return self._subprocess.check_output(
cmd, stderr=self._subprocess.STDOUT)
@property
def test_harness(self):
"""Full path to test harness binary."""
suffix = (self.args.optimization if hasattr(self.args, 'optimization')
else 'O')
return os.path.join(self.args.tests, "Benchmark_" + suffix)
@property
def _cmd_list_benchmarks(self):
# Use tab delimiter for easier parsing to override the default comma.
# (The third 'column' is always comma-separated list of tags in square
# brackets -- currently unused here.)
return [self.test_harness, '--list', '--delim=\t'] + (
['--skip-tags='] if (self.args.benchmarks or
self.args.filters) else [])
def _get_tests(self):
"""Return a list of performance tests to run."""
index_name_pairs = [
line.split('\t')[:2] for line in
self._invoke(self._cmd_list_benchmarks).split('\n')[1:-1]
]
# unzip list of pairs into 2 lists
indices, self.all_tests = map(list, zip(*index_name_pairs))
if self.args.filters:
return self._tests_matching_patterns()
if self.args.benchmarks:
return self._tests_by_name_or_index(indices)
return self.all_tests
def _tests_matching_patterns(self):
regexes = [re.compile(pattern) for pattern in self.args.filters]
return sorted(list(set([name for pattern in regexes
for name in self.all_tests
if pattern.match(name)])))
def _tests_by_name_or_index(self, indices):
benchmarks = set(self.args.benchmarks)
index_to_name = dict(zip(indices, self.all_tests))
indexed_names = [index_to_name[i]
for i in benchmarks.intersection(set(indices))]
return sorted(list(
benchmarks.intersection(set(self.all_tests)).union(indexed_names)))
def run(self, test, num_samples=None, num_iters=None,
verbose=None, measure_memory=False):
"""Execute benchmark and gather results."""
num_samples = num_samples or 1
num_iters = num_iters or 0 # automatically determine N to run for 1s
cmd = self._cmd_run(
test, num_samples, num_iters, verbose, measure_memory)
output = self._invoke(cmd)
result = self.parser.results_from_string(output).items()[0][1]
return result
def _cmd_run(self, test, num_samples, num_iters, verbose, measure_memory):
cmd = [self.test_harness, test]
if num_samples > 1:
cmd.append('--num-samples={0}'.format(num_samples))
if num_iters > 0:
cmd.append('--num-iters={0}'.format(num_iters))
if verbose:
cmd.append('--verbose')
if measure_memory:
cmd.append('--memory')
return cmd
def instrument_test(driver_path, test, num_samples):
"""Run a test and instrument its peak memory use"""
test_outputs = []
for _ in range(num_samples):
test_output_raw = subprocess.check_output(
['time', '-lp', driver_path, test],
stderr=subprocess.STDOUT
)
peak_memory = re.match('\s*(\d+)\s*maximum resident set size',
test_output_raw.split('\n')[-15]).group(1)
test_outputs.append(test_output_raw.split()[1].split(',') +
[peak_memory])
# Average sample results
num_samples_index = 2
min_index = 3
max_index = 4
avg_start_index = 5
# TODO: Correctly take stdev
avg_test_output = test_outputs[0]
avg_test_output[avg_start_index:] = map(int,
avg_test_output[avg_start_index:])
for test_output in test_outputs[1:]:
for i in range(avg_start_index, len(test_output)):
avg_test_output[i] += int(test_output[i])
for i in range(avg_start_index, len(avg_test_output)):
avg_test_output[i] = int(round(avg_test_output[i] /
float(len(test_outputs))))
avg_test_output[num_samples_index] = num_samples
avg_test_output[min_index] = min(
test_outputs, key=lambda x: int(x[min_index]))[min_index]
avg_test_output[max_index] = max(
test_outputs, key=lambda x: int(x[max_index]))[max_index]
avg_test_output = map(str, avg_test_output)
return avg_test_output
def get_current_git_branch(git_repo_path):
"""Return the selected branch for the repo `git_repo_path`"""
return subprocess.check_output(
['git', '-C', git_repo_path, 'rev-parse',
'--abbrev-ref', 'HEAD'], stderr=subprocess.STDOUT).strip()
def get_git_head_ID(git_repo_path):
"""Return the short identifier for the HEAD commit of the repo
`git_repo_path`"""
return subprocess.check_output(
['git', '-C', git_repo_path, 'rev-parse',
'--short', 'HEAD'], stderr=subprocess.STDOUT).strip()
def log_results(log_directory, driver, formatted_output, swift_repo=None):
"""Log `formatted_output` to a branch specific directory in
`log_directory`
"""
try:
branch = get_current_git_branch(swift_repo)
except (OSError, subprocess.CalledProcessError):
branch = None
try:
head_ID = '-' + get_git_head_ID(swift_repo)
except (OSError, subprocess.CalledProcessError):
head_ID = ''
timestamp = time.strftime("%Y%m%d%H%M%S", time.localtime())
if branch:
output_directory = os.path.join(log_directory, branch)
else:
output_directory = log_directory
driver_name = os.path.basename(driver)
try:
os.makedirs(output_directory)
except OSError:
pass
log_file = os.path.join(output_directory,
driver_name + '-' + timestamp + head_ID + '.log')
print('Logging results to: %s' % log_file)
with open(log_file, 'w') as f:
f.write(formatted_output)
def run_benchmarks(driver, benchmarks=[], num_samples=10, verbose=False,
log_directory=None, swift_repo=None):
"""Run perf tests individually and return results in a format that's
compatible with `parse_results`. If `benchmarks` is not empty,
only run tests included in it.
"""
# Set a constant hash seed. Some tests are currently sensitive to
# fluctuations in the number of hash collisions.
#
# FIXME: This should only be set in the environment of the child process
# that runs the tests.
os.environ["SWIFT_DETERMINISTIC_HASHING"] = "1"
(total_tests, total_min, total_max, total_mean) = (0, 0, 0, 0)
output = []
headings = ['#', 'TEST', 'SAMPLES', 'MIN(μs)', 'MAX(μs)', 'MEAN(μs)',
'SD(μs)', 'MEDIAN(μs)', 'MAX_RSS(B)']
line_format = '{:>3} {:<25} {:>7} {:>7} {:>7} {:>8} {:>6} {:>10} {:>10}'
if verbose and log_directory:
print(line_format.format(*headings))
for test in benchmarks:
test_output = instrument_test(driver, test, num_samples)
if test_output[0] == 'Totals':
continue
if verbose:
if log_directory:
print(line_format.format(*test_output))
else:
print(','.join(test_output))
output.append(test_output)
(samples, _min, _max, mean) = map(int, test_output[2:6])
total_tests += 1
total_min += _min
total_max += _max
total_mean += mean
if not output:
return
formatted_output = '\n'.join([','.join(l) for l in output])
totals = map(str, ['Totals', total_tests, total_min, total_max,
total_mean, '0', '0', '0'])
totals_output = '\n\n' + ','.join(totals)
if verbose:
if log_directory:
print(line_format.format(*([''] + totals)))
else:
print(totals_output[1:])
formatted_output += totals_output
if log_directory:
log_results(log_directory, driver, formatted_output, swift_repo)
return formatted_output
def run(args):
driver = BenchmarkDriver(args)
run_benchmarks(
driver.test_harness, benchmarks=driver.tests,
num_samples=args.iterations, verbose=True,
log_directory=args.output_dir,
swift_repo=args.swift_repo)
return 0
def format_name(log_path):
"""Return the filename and directory for a log file"""
return '/'.join(log_path.split('/')[-2:])
def compare_logs(compare_script, new_log, old_log, log_dir, opt):
"""Return diff of log files at paths `new_log` and `old_log`"""
print('Comparing %s %s ...' % (format_name(old_log), format_name(new_log)))
subprocess.call([compare_script, '--old-file', old_log,
'--new-file', new_log, '--format', 'markdown',
'--output', os.path.join(log_dir, 'latest_compare_{0}.md'
.format(opt))])
def compare(args):
log_dir = args.log_dir
swift_repo = args.swift_repo
compare_script = args.compare_script
baseline_branch = args.baseline_branch
current_branch = get_current_git_branch(swift_repo)
current_branch_dir = os.path.join(log_dir, current_branch)
baseline_branch_dir = os.path.join(log_dir, baseline_branch)
if current_branch != baseline_branch and \
not os.path.isdir(baseline_branch_dir):
print(('Unable to find benchmark logs for {baseline_branch} branch. ' +
'Set a baseline benchmark log by passing --benchmark to ' +
'build-script while on {baseline_branch} branch.')
.format(baseline_branch=baseline_branch))
return 1
recent_logs = {}
for branch_dir in [current_branch_dir, baseline_branch_dir]:
for opt in ['O', 'Onone']:
recent_logs[os.path.basename(branch_dir) + '_' + opt] = sorted(
glob.glob(os.path.join(
branch_dir, 'Benchmark_' + opt + '-*.log')),
key=os.path.getctime, reverse=True)
if current_branch == baseline_branch:
if len(recent_logs[baseline_branch + '_O']) > 1 and \
len(recent_logs[baseline_branch + '_Onone']) > 1:
compare_logs(compare_script,
recent_logs[baseline_branch + '_O'][0],
recent_logs[baseline_branch + '_O'][1],
log_dir, 'O')
compare_logs(compare_script,
recent_logs[baseline_branch + '_Onone'][0],
recent_logs[baseline_branch + '_Onone'][1],
log_dir, 'Onone')
else:
print(('{baseline_branch}/{baseline_branch} comparison ' +
'skipped: no previous {baseline_branch} logs')
.format(baseline_branch=baseline_branch))
else:
# TODO: Check for outdated baseline branch log
if len(recent_logs[current_branch + '_O']) == 0 or \
len(recent_logs[current_branch + '_Onone']) == 0:
print('branch sanity failure: missing branch logs')
return 1
if len(recent_logs[current_branch + '_O']) == 1 or \
len(recent_logs[current_branch + '_Onone']) == 1:
print('branch/branch comparison skipped: no previous branch logs')
else:
compare_logs(compare_script,
recent_logs[current_branch + '_O'][0],
recent_logs[current_branch + '_O'][1],
log_dir, 'O')
compare_logs(compare_script,
recent_logs[current_branch + '_Onone'][0],
recent_logs[current_branch + '_Onone'][1],
log_dir, 'Onone')
if len(recent_logs[baseline_branch + '_O']) == 0 or \
len(recent_logs[baseline_branch + '_Onone']) == 0:
print(('branch/{baseline_branch} failure: no {baseline_branch} ' +
'logs')
.format(baseline_branch=baseline_branch))
return 1
else:
compare_logs(compare_script,
recent_logs[current_branch + '_O'][0],
recent_logs[baseline_branch + '_O'][0],
log_dir, 'O')
compare_logs(compare_script,
recent_logs[current_branch + '_Onone'][0],
recent_logs[baseline_branch + '_Onone'][0],
log_dir, 'Onone')
# TODO: Fail on large regressions
return 0
def positive_int(value):
ivalue = int(value)
if not (ivalue > 0):
raise ValueError
return ivalue
def parse_args(args):
"""Parse command line arguments and set default values."""
parser = argparse.ArgumentParser(
epilog='Example: ./Benchmark_Driver run -i 5 -f Prefix -f .*Suffix.*'
)
subparsers = parser.add_subparsers(
title='Swift benchmark driver commands',
help='See COMMAND -h for additional arguments', metavar='COMMAND')
shared_benchmarks_parser = argparse.ArgumentParser(add_help=False)
benchmarks_group = shared_benchmarks_parser.add_mutually_exclusive_group()
benchmarks_group.add_argument(
'benchmarks',
default=[],
help='benchmark to run (default: all)', nargs='*', metavar="BENCHMARK")
benchmarks_group.add_argument(
'-f', '--filter', dest='filters', action='append',
help='run all tests whose name match regular expression PATTERN, ' +
'multiple filters are supported', metavar="PATTERN")
shared_benchmarks_parser.add_argument(
'-t', '--tests',
help='directory containing Benchmark_O{,none,size} ' +
'(default: DRIVER_DIR)',
default=DRIVER_DIR)
shared_benchmarks_parser.add_argument(
'-o', '--optimization',
metavar='OPT',
choices=['O', 'Onone', 'Osize'],
help='optimization level to use: {O,Onone,Osize}, (default: O)',
default='O')
run_parser = subparsers.add_parser(
'run',
help='Run benchmarks and output results to stdout',
parents=[shared_benchmarks_parser])
run_parser.add_argument(
'-i', '--iterations',
help='number of times to run each test (default: 1)',
type=positive_int, default=1)
run_parser.add_argument(
'--output-dir',
help='log results to directory (default: no logging)')
run_parser.add_argument(
'--swift-repo',
help='absolute path to the Swift source repository')
run_parser.set_defaults(func=run)
compare_parser = subparsers.add_parser(
'compare',
help='Compare benchmark results')
compare_parser.add_argument(
'--log-dir', required=True,
help='directory containing benchmark logs')
compare_parser.add_argument(
'--swift-repo', required=True,
help='absolute path to the Swift source repository')
compare_parser.add_argument(
'--compare-script', required=True,
help='absolute path to compare script')
compare_parser.add_argument(
'--baseline-branch', default='master',
help='attempt to compare results to baseline results for specified '
'branch (default: master)')
compare_parser.set_defaults(func=compare)
return parser.parse_args(args)
def main():
args = parse_args(sys.argv[1:])
return args.func(args)
if __name__ == '__main__':
exit(main())