mirror of
https://github.com/apple/swift.git
synced 2025-12-21 12:14:44 +01:00
Reintroduced feature lost during `BenchmarkInfo` modernization: All registered benchmarks are ordered alphabetically and assigned an index. This number can be used as a shortcut to invoke the test instead of its full name. (Adding and removing tests from the suite will naturally reassign the indices, but they are stable for a given build.) The `--list` parameter now prints the test *number*, *name* and *tags* separated by delimiter. The `--list` output format is modified from: ```` Enabled Tests,Tags AngryPhonebook,[String, api, validation] ... ```` to this: ```` \#,Test,[Tags] 2,AngryPhonebook,[String, api, validation] … ```` (There isn’t a backslash before the #, git was eating the whole line without it.) Note: Test number 1 is Ackermann, which is marked as “skip”, so it’s not listed with the default `skip-tags` value. Fixes the issue where running tests via `Benchmark_Driver` always reported each test as number 1. Each test is run independently, therefore every invocation was “first”. Restoring test numbers resolves this issue back to original state: The number reported in the first column when executing the tests is its ordinal number in the Swift Benchmark Suite.
490 lines
18 KiB
Python
Executable File
490 lines
18 KiB
Python
Executable File
#!/usr/bin/env python
|
|
# -*- coding: utf-8 -*-
|
|
|
|
# ===--- Benchmark_Driver ------------------------------------------------===//
|
|
#
|
|
# This source file is part of the Swift.org open source project
|
|
#
|
|
# Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
|
|
# Licensed under Apache License v2.0 with Runtime Library Exception
|
|
#
|
|
# See https://swift.org/LICENSE.txt for license information
|
|
# See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
|
|
#
|
|
# ===---------------------------------------------------------------------===//
|
|
|
|
import argparse
|
|
import datetime
|
|
import glob
|
|
import json
|
|
import os
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
import time
|
|
import urllib
|
|
import urllib2
|
|
|
|
DRIVER_DIR = os.path.dirname(os.path.realpath(__file__))
|
|
|
|
|
|
def parse_results(res, optset):
|
|
# Parse lines like this
|
|
# #,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs),PEAK_MEMORY(B)
|
|
score_re = re.compile(r"(\d+),[ \t]*(\w+)," +
|
|
",".join([r"[ \t]*([\d.]+)"] * 7))
|
|
# The Totals line would be parsed like this.
|
|
total_re = re.compile(r"()(Totals)," +
|
|
",".join([r"[ \t]*([\d.]+)"] * 7))
|
|
key_group = 2
|
|
val_group = 4
|
|
mem_group = 9
|
|
|
|
tests = []
|
|
for line in res.split():
|
|
m = score_re.match(line)
|
|
if not m:
|
|
m = total_re.match(line)
|
|
if not m:
|
|
continue
|
|
testresult = int(m.group(val_group))
|
|
testname = m.group(key_group)
|
|
test = {}
|
|
test['Data'] = [testresult]
|
|
test['Info'] = {}
|
|
test['Name'] = "nts.swift/" + optset + "." + testname + ".exec"
|
|
tests.append(test)
|
|
if testname != 'Totals':
|
|
mem_testresult = int(m.group(mem_group))
|
|
mem_test = {}
|
|
mem_test['Data'] = [mem_testresult]
|
|
mem_test['Info'] = {}
|
|
mem_test['Name'] = "nts.swift/mem_maxrss." + \
|
|
optset + "." + testname + ".mem"
|
|
tests.append(mem_test)
|
|
return tests
|
|
|
|
|
|
def submit_to_lnt(data, url):
|
|
print("\nSubmitting results to LNT server...")
|
|
json_report = {'input_data': json.dumps(data), 'commit': '1'}
|
|
data = urllib.urlencode(json_report)
|
|
response_str = urllib2.urlopen(urllib2.Request(url, data))
|
|
response = json.loads(response_str.read())
|
|
if 'success' in response:
|
|
print("Server response:\tSuccess")
|
|
else:
|
|
print("Server response:\tError")
|
|
print("Error:\t", response['error'])
|
|
sys.exit(1)
|
|
|
|
|
|
def instrument_test(driver_path, test, num_samples):
|
|
"""Run a test and instrument its peak memory use"""
|
|
test_outputs = []
|
|
for _ in range(num_samples):
|
|
test_output_raw = subprocess.check_output(
|
|
['time', '-lp', driver_path, test],
|
|
stderr=subprocess.STDOUT
|
|
)
|
|
peak_memory = re.match('\s*(\d+)\s*maximum resident set size',
|
|
test_output_raw.split('\n')[-15]).group(1)
|
|
test_outputs.append(test_output_raw.split()[1].split(',') +
|
|
[peak_memory])
|
|
|
|
# Average sample results
|
|
num_samples_index = 2
|
|
min_index = 3
|
|
max_index = 4
|
|
avg_start_index = 5
|
|
|
|
# TODO: Correctly take stdev
|
|
avg_test_output = test_outputs[0]
|
|
avg_test_output[avg_start_index:] = map(int,
|
|
avg_test_output[avg_start_index:])
|
|
for test_output in test_outputs[1:]:
|
|
for i in range(avg_start_index, len(test_output)):
|
|
avg_test_output[i] += int(test_output[i])
|
|
for i in range(avg_start_index, len(avg_test_output)):
|
|
avg_test_output[i] = int(round(avg_test_output[i] /
|
|
float(len(test_outputs))))
|
|
avg_test_output[num_samples_index] = num_samples
|
|
avg_test_output[min_index] = min(
|
|
test_outputs, key=lambda x: int(x[min_index]))[min_index]
|
|
avg_test_output[max_index] = max(
|
|
test_outputs, key=lambda x: int(x[max_index]))[max_index]
|
|
avg_test_output = map(str, avg_test_output)
|
|
|
|
return avg_test_output
|
|
|
|
|
|
def get_tests(driver_path, args):
|
|
"""Return a list of available performance tests"""
|
|
driver = ([driver_path, '--list'])
|
|
# Use tab delimiter for easier parsing to override the default comma.
|
|
# (The third 'column' is always comma-separated list of tags in square
|
|
# brackets -- currently unused here.)
|
|
driver.append('--delim=\t')
|
|
if args.benchmarks or args.filters:
|
|
driver.append('--skip-tags=') # list all tests, don't skip any tags
|
|
index_name_pairs = [
|
|
line.split('\t')[:2] for line in
|
|
subprocess.check_output(driver).split('\n')[1:-1]
|
|
]
|
|
indices, names = zip(*index_name_pairs) # unzip list of pairs into 2 lists
|
|
if args.filters:
|
|
regexes = [re.compile(pattern) for pattern in args.filters]
|
|
return sorted(list(set([name for pattern in regexes
|
|
for name in names if pattern.match(name)])))
|
|
if not args.benchmarks:
|
|
return names
|
|
benchmarks = set(args.benchmarks)
|
|
index_to_name = dict(index_name_pairs)
|
|
indexed_names = [index_to_name[i]
|
|
for i in benchmarks.intersection(set(indices))]
|
|
return sorted(list(
|
|
benchmarks.intersection(set(names)).union(indexed_names)))
|
|
|
|
|
|
def get_current_git_branch(git_repo_path):
|
|
"""Return the selected branch for the repo `git_repo_path`"""
|
|
return subprocess.check_output(
|
|
['git', '-C', git_repo_path, 'rev-parse',
|
|
'--abbrev-ref', 'HEAD'], stderr=subprocess.STDOUT).strip()
|
|
|
|
|
|
def get_git_head_ID(git_repo_path):
|
|
"""Return the short identifier for the HEAD commit of the repo
|
|
`git_repo_path`"""
|
|
return subprocess.check_output(
|
|
['git', '-C', git_repo_path, 'rev-parse',
|
|
'--short', 'HEAD'], stderr=subprocess.STDOUT).strip()
|
|
|
|
|
|
def log_results(log_directory, driver, formatted_output, swift_repo=None):
|
|
"""Log `formatted_output` to a branch specific directory in
|
|
`log_directory`
|
|
"""
|
|
try:
|
|
branch = get_current_git_branch(swift_repo)
|
|
except (OSError, subprocess.CalledProcessError):
|
|
branch = None
|
|
try:
|
|
head_ID = '-' + get_git_head_ID(swift_repo)
|
|
except (OSError, subprocess.CalledProcessError):
|
|
head_ID = ''
|
|
timestamp = time.strftime("%Y%m%d%H%M%S", time.localtime())
|
|
if branch:
|
|
output_directory = os.path.join(log_directory, branch)
|
|
else:
|
|
output_directory = log_directory
|
|
driver_name = os.path.basename(driver)
|
|
try:
|
|
os.makedirs(output_directory)
|
|
except OSError:
|
|
pass
|
|
log_file = os.path.join(output_directory,
|
|
driver_name + '-' + timestamp + head_ID + '.log')
|
|
print('Logging results to: %s' % log_file)
|
|
with open(log_file, 'w') as f:
|
|
f.write(formatted_output)
|
|
|
|
|
|
def run_benchmarks(driver, benchmarks=[], num_samples=10, verbose=False,
|
|
log_directory=None, swift_repo=None):
|
|
"""Run perf tests individually and return results in a format that's
|
|
compatible with `parse_results`. If `benchmarks` is not empty,
|
|
only run tests included in it.
|
|
"""
|
|
# Set a constant hash seed. Some tests are currently sensitive to
|
|
# fluctuations in the number of hash collisions.
|
|
#
|
|
# FIXME: This should only be set in the environment of the child process
|
|
# that runs the tests.
|
|
os.environ["SWIFT_DETERMINISTIC_HASHING"] = "1"
|
|
|
|
(total_tests, total_min, total_max, total_mean) = (0, 0, 0, 0)
|
|
output = []
|
|
headings = ['#', 'TEST', 'SAMPLES', 'MIN(μs)', 'MAX(μs)', 'MEAN(μs)',
|
|
'SD(μs)', 'MEDIAN(μs)', 'MAX_RSS(B)']
|
|
line_format = '{:>3} {:<25} {:>7} {:>7} {:>7} {:>8} {:>6} {:>10} {:>10}'
|
|
if verbose and log_directory:
|
|
print(line_format.format(*headings))
|
|
for test in benchmarks:
|
|
test_output = instrument_test(driver, test, num_samples)
|
|
if test_output[0] == 'Totals':
|
|
continue
|
|
if verbose:
|
|
if log_directory:
|
|
print(line_format.format(*test_output))
|
|
else:
|
|
print(','.join(test_output))
|
|
output.append(test_output)
|
|
(samples, _min, _max, mean) = map(int, test_output[2:6])
|
|
total_tests += 1
|
|
total_min += _min
|
|
total_max += _max
|
|
total_mean += mean
|
|
if not output:
|
|
return
|
|
formatted_output = '\n'.join([','.join(l) for l in output])
|
|
totals = map(str, ['Totals', total_tests, total_min, total_max,
|
|
total_mean, '0', '0', '0'])
|
|
totals_output = '\n\n' + ','.join(totals)
|
|
if verbose:
|
|
if log_directory:
|
|
print(line_format.format(*([''] + totals)))
|
|
else:
|
|
print(totals_output[1:])
|
|
formatted_output += totals_output
|
|
if log_directory:
|
|
log_results(log_directory, driver, formatted_output, swift_repo)
|
|
return formatted_output
|
|
|
|
|
|
def submit(args):
|
|
print("SVN revision:\t", args.revision)
|
|
print("Machine name:\t", args.machine)
|
|
print("Iterations:\t", args.iterations)
|
|
print("Optimizations:\t", ','.join(args.optimization))
|
|
print("LNT host:\t", args.lnt_host)
|
|
starttime = datetime.datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')
|
|
print("Start time:\t", starttime)
|
|
data = {}
|
|
data['Tests'] = []
|
|
data['Machine'] = {'Info': {'name': args.machine}, 'Name': args.machine}
|
|
print("\nRunning benchmarks...")
|
|
for optset in args.optimization:
|
|
print("Opt level:\t", optset)
|
|
file = os.path.join(args.tests, "Benchmark_" + optset)
|
|
try:
|
|
res = run_benchmarks(
|
|
file, benchmarks=get_tests(file, args),
|
|
num_samples=args.iterations)
|
|
data['Tests'].extend(parse_results(res, optset))
|
|
except subprocess.CalledProcessError as e:
|
|
print("Execution failed.. Test results are empty.")
|
|
print("Process output:\n", e.output)
|
|
|
|
endtime = datetime.datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')
|
|
data['Run'] = {'End Time': endtime,
|
|
'Info': {'inferred_run_order': str(args.revision),
|
|
'run_order': str(args.revision),
|
|
'tag': 'nts',
|
|
'test_suite_revision': 'None'},
|
|
'Start Time': starttime}
|
|
print("End time:\t", endtime)
|
|
|
|
submit_to_lnt(data, args.lnt_host)
|
|
return 0
|
|
|
|
|
|
def run(args):
|
|
optset = args.optimization
|
|
file = os.path.join(args.tests, "Benchmark_" + optset)
|
|
run_benchmarks(
|
|
file, benchmarks=get_tests(file, args),
|
|
num_samples=args.iterations, verbose=True,
|
|
log_directory=args.output_dir,
|
|
swift_repo=args.swift_repo)
|
|
return 0
|
|
|
|
|
|
def format_name(log_path):
|
|
"""Return the filename and directory for a log file"""
|
|
return '/'.join(log_path.split('/')[-2:])
|
|
|
|
|
|
def compare_logs(compare_script, new_log, old_log, log_dir, opt):
|
|
"""Return diff of log files at paths `new_log` and `old_log`"""
|
|
print('Comparing %s %s ...' % (format_name(old_log), format_name(new_log)))
|
|
subprocess.call([compare_script, '--old-file', old_log,
|
|
'--new-file', new_log, '--format', 'markdown',
|
|
'--output', os.path.join(log_dir, 'latest_compare_{0}.md'
|
|
.format(opt))])
|
|
|
|
|
|
def compare(args):
|
|
log_dir = args.log_dir
|
|
swift_repo = args.swift_repo
|
|
compare_script = args.compare_script
|
|
baseline_branch = args.baseline_branch
|
|
current_branch = get_current_git_branch(swift_repo)
|
|
current_branch_dir = os.path.join(log_dir, current_branch)
|
|
baseline_branch_dir = os.path.join(log_dir, baseline_branch)
|
|
|
|
if current_branch != baseline_branch and \
|
|
not os.path.isdir(baseline_branch_dir):
|
|
print(('Unable to find benchmark logs for {baseline_branch} branch. ' +
|
|
'Set a baseline benchmark log by passing --benchmark to ' +
|
|
'build-script while on {baseline_branch} branch.')
|
|
.format(baseline_branch=baseline_branch))
|
|
return 1
|
|
|
|
recent_logs = {}
|
|
for branch_dir in [current_branch_dir, baseline_branch_dir]:
|
|
for opt in ['O', 'Onone']:
|
|
recent_logs[os.path.basename(branch_dir) + '_' + opt] = sorted(
|
|
glob.glob(os.path.join(
|
|
branch_dir, 'Benchmark_' + opt + '-*.log')),
|
|
key=os.path.getctime, reverse=True)
|
|
|
|
if current_branch == baseline_branch:
|
|
if len(recent_logs[baseline_branch + '_O']) > 1 and \
|
|
len(recent_logs[baseline_branch + '_Onone']) > 1:
|
|
compare_logs(compare_script,
|
|
recent_logs[baseline_branch + '_O'][0],
|
|
recent_logs[baseline_branch + '_O'][1],
|
|
log_dir, 'O')
|
|
compare_logs(compare_script,
|
|
recent_logs[baseline_branch + '_Onone'][0],
|
|
recent_logs[baseline_branch + '_Onone'][1],
|
|
log_dir, 'Onone')
|
|
else:
|
|
print(('{baseline_branch}/{baseline_branch} comparison ' +
|
|
'skipped: no previous {baseline_branch} logs')
|
|
.format(baseline_branch=baseline_branch))
|
|
else:
|
|
# TODO: Check for outdated baseline branch log
|
|
if len(recent_logs[current_branch + '_O']) == 0 or \
|
|
len(recent_logs[current_branch + '_Onone']) == 0:
|
|
print('branch sanity failure: missing branch logs')
|
|
return 1
|
|
|
|
if len(recent_logs[current_branch + '_O']) == 1 or \
|
|
len(recent_logs[current_branch + '_Onone']) == 1:
|
|
print('branch/branch comparison skipped: no previous branch logs')
|
|
else:
|
|
compare_logs(compare_script,
|
|
recent_logs[current_branch + '_O'][0],
|
|
recent_logs[current_branch + '_O'][1],
|
|
log_dir, 'O')
|
|
compare_logs(compare_script,
|
|
recent_logs[current_branch + '_Onone'][0],
|
|
recent_logs[current_branch + '_Onone'][1],
|
|
log_dir, 'Onone')
|
|
|
|
if len(recent_logs[baseline_branch + '_O']) == 0 or \
|
|
len(recent_logs[baseline_branch + '_Onone']) == 0:
|
|
print(('branch/{baseline_branch} failure: no {baseline_branch} ' +
|
|
'logs')
|
|
.format(baseline_branch=baseline_branch))
|
|
return 1
|
|
else:
|
|
compare_logs(compare_script,
|
|
recent_logs[current_branch + '_O'][0],
|
|
recent_logs[baseline_branch + '_O'][0],
|
|
log_dir, 'O')
|
|
compare_logs(compare_script,
|
|
recent_logs[current_branch + '_Onone'][0],
|
|
recent_logs[baseline_branch + '_Onone'][0],
|
|
log_dir, 'Onone')
|
|
|
|
# TODO: Fail on large regressions
|
|
|
|
return 0
|
|
|
|
|
|
def positive_int(value):
|
|
ivalue = int(value)
|
|
if not (ivalue > 0):
|
|
raise ValueError
|
|
return ivalue
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
epilog='Example: ./Benchmark_Driver run -i 5 -f Prefix -f .*Suffix.*'
|
|
)
|
|
subparsers = parser.add_subparsers(
|
|
title='Swift benchmark driver commands',
|
|
help='See COMMAND -h for additional arguments', metavar='<command>')
|
|
|
|
parent_parser = argparse.ArgumentParser(add_help=False)
|
|
benchmarks_group = parent_parser.add_mutually_exclusive_group()
|
|
benchmarks_group.add_argument(
|
|
'benchmarks',
|
|
default=[],
|
|
help='benchmark to run (default: all)', nargs='*', metavar="BENCHMARK")
|
|
benchmarks_group.add_argument(
|
|
'-f', '--filter', dest='filters', action='append',
|
|
help='run all tests whose name match regular expression PATTERN, ' +
|
|
'multiple filters are supported', metavar="PATTERN")
|
|
parent_parser.add_argument(
|
|
'-t', '--tests',
|
|
help='directory containing Benchmark_O{,none,size} ' +
|
|
'(default: DRIVER_DIR)',
|
|
default=DRIVER_DIR)
|
|
|
|
submit_parser = subparsers.add_parser(
|
|
'submit',
|
|
help='Run benchmarks and submit results to LNT',
|
|
parents=[parent_parser])
|
|
submit_parser.add_argument(
|
|
'-o', '--optimization', nargs='+',
|
|
help='optimization levels to use (default: O Onone Osize)',
|
|
default=['O', 'Onone', 'Osize'])
|
|
submit_parser.add_argument(
|
|
'-i', '--iterations',
|
|
help='number of times to run each test (default: 10)',
|
|
type=positive_int, default=10)
|
|
submit_parser.add_argument(
|
|
'-m', '--machine', required=True,
|
|
help='LNT machine name')
|
|
submit_parser.add_argument(
|
|
'-r', '--revision', required=True,
|
|
help='SVN revision of compiler to identify the LNT run', type=int)
|
|
submit_parser.add_argument(
|
|
'-l', '--lnt_host', required=True,
|
|
help='LNT host to submit results to')
|
|
submit_parser.set_defaults(func=submit)
|
|
|
|
run_parser = subparsers.add_parser(
|
|
'run',
|
|
help='Run benchmarks and output results to stdout',
|
|
parents=[parent_parser])
|
|
run_parser.add_argument(
|
|
'-o', '--optimization',
|
|
metavar='OPT',
|
|
choices=['O', 'Onone', 'Osize'],
|
|
help='optimization level to use: {O,Onone,Osize}, (default: O)',
|
|
default='O')
|
|
run_parser.add_argument(
|
|
'-i', '--iterations',
|
|
help='number of times to run each test (default: 1)',
|
|
type=positive_int, default=1)
|
|
run_parser.add_argument(
|
|
'--output-dir',
|
|
help='log results to directory (default: no logging)')
|
|
run_parser.add_argument(
|
|
'--swift-repo',
|
|
help='absolute path to Swift source repo for branch comparison')
|
|
run_parser.set_defaults(func=run)
|
|
|
|
compare_parser = subparsers.add_parser(
|
|
'compare',
|
|
help='Compare benchmark results')
|
|
compare_parser.add_argument(
|
|
'--log-dir', required=True,
|
|
help='directory containing benchmark logs')
|
|
compare_parser.add_argument(
|
|
'--swift-repo', required=True,
|
|
help='absolute path to Swift source repo')
|
|
compare_parser.add_argument(
|
|
'--compare-script', required=True,
|
|
help='absolute path to compare script')
|
|
compare_parser.add_argument(
|
|
'--baseline-branch', default='master',
|
|
help='attempt to compare results to baseline results for specified '
|
|
'branch (default: master)')
|
|
compare_parser.set_defaults(func=compare)
|
|
|
|
args = parser.parse_args()
|
|
if args.func != compare and isinstance(args.optimization, list):
|
|
args.optimization = sorted(list(set(args.optimization)))
|
|
return args.func(args)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
exit(main())
|