mirror of
https://github.com/apple/swift.git
synced 2025-12-21 12:14:44 +01:00
384 lines
11 KiB
Python
Executable File
384 lines
11 KiB
Python
Executable File
#!/usr/bin/env python
|
|
# -*- coding: utf-8 -*-
|
|
|
|
# ===--- run_smoke_bench -------------------------------------------------===//
|
|
#
|
|
# This source file is part of the Swift.org open source project
|
|
#
|
|
# Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
|
|
# Licensed under Apache License v2.0 with Runtime Library Exception
|
|
#
|
|
# See https://swift.org/LICENSE.txt for license information
|
|
# See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
|
|
#
|
|
# ===---------------------------------------------------------------------===//
|
|
#
|
|
# Performs a very fast check which benchmarks regressed and improved.
|
|
#
|
|
# Initially runs the benchmars with a low sample count and just re-runs those
|
|
# benchmarks which differ.
|
|
# Also reports code size differences.
|
|
#
|
|
# ===---------------------------------------------------------------------===//
|
|
|
|
from __future__ import print_function
|
|
|
|
import argparse
|
|
import glob
|
|
import os
|
|
import subprocess
|
|
import sys
|
|
from imp import load_source
|
|
|
|
from compare_perf_tests import LogParser, TestComparator, create_report
|
|
|
|
# import Benchmark_Driver # doesn't work because it misses '.py' extension
|
|
Benchmark_Driver = load_source(
|
|
"Benchmark_Driver",
|
|
os.path.join(os.path.dirname(os.path.abspath(__file__)), "Benchmark_Driver"),
|
|
)
|
|
# from Benchmark_Driver import BenchmarkDriver, BenchmarkDoctor, ...
|
|
BenchmarkDriver = Benchmark_Driver.BenchmarkDriver
|
|
BenchmarkDoctor = Benchmark_Driver.BenchmarkDoctor
|
|
MarkdownReportHandler = Benchmark_Driver.MarkdownReportHandler
|
|
|
|
VERBOSE = False
|
|
|
|
|
|
class DriverArgs(object):
|
|
"""Arguments for BenchmarkDriver."""
|
|
|
|
def __init__(self, tests, optimization="O"):
|
|
"""Initialize with path to the build-dir and optimization level."""
|
|
self.benchmarks = None
|
|
self.filters = None
|
|
self.tests = os.path.join(tests, "bin")
|
|
self.optimization = optimization
|
|
|
|
|
|
def log(msg):
|
|
print(msg)
|
|
sys.stdout.flush()
|
|
|
|
|
|
def main():
|
|
global VERBOSE
|
|
argparser = argparse.ArgumentParser()
|
|
argparser.add_argument(
|
|
"-verbose", action="store_true", help="print verbose messages"
|
|
)
|
|
argparser.add_argument(
|
|
"-O",
|
|
action="append_const",
|
|
const="O",
|
|
dest="opt_levels",
|
|
help="test -O benchmarks",
|
|
)
|
|
argparser.add_argument(
|
|
"-Osize",
|
|
action="append_const",
|
|
const="Osize",
|
|
dest="opt_levels",
|
|
help="test -Osize benchmarks",
|
|
)
|
|
argparser.add_argument(
|
|
"-Onone",
|
|
action="append_const",
|
|
const="Onone",
|
|
dest="opt_levels",
|
|
help="test -Onone benchmarks (except code size)",
|
|
)
|
|
argparser.add_argument(
|
|
"-skip-code-size",
|
|
action="store_true",
|
|
help="Don't report code size differences",
|
|
)
|
|
argparser.add_argument(
|
|
"-skip-performance",
|
|
action="store_true",
|
|
help="Don't report performance differences",
|
|
)
|
|
argparser.add_argument(
|
|
"-skip-check-added",
|
|
action="store_true",
|
|
help="Don't validate newly added benchmarks",
|
|
)
|
|
argparser.add_argument(
|
|
"-o",
|
|
type=str,
|
|
help="In addition to stdout, write the results into a markdown file",
|
|
)
|
|
argparser.add_argument(
|
|
"-threshold",
|
|
type=float,
|
|
help="The performance threshold in %% which triggers a re-run",
|
|
default=5,
|
|
)
|
|
argparser.add_argument(
|
|
"-num-samples",
|
|
type=int,
|
|
help="The (minimum) number of samples to run",
|
|
default=3,
|
|
)
|
|
argparser.add_argument(
|
|
"-num-reruns",
|
|
type=int,
|
|
help="The number of re-runs until it's assumed to be a real change",
|
|
default=8,
|
|
)
|
|
argparser.add_argument(
|
|
"-platform", type=str, help="The benchmark build platform", default="macosx"
|
|
)
|
|
argparser.add_argument(
|
|
"oldbuilddir", nargs=1, type=str, help="old benchmark build directory"
|
|
)
|
|
argparser.add_argument(
|
|
"newbuilddir", nargs=1, type=str, help="new benchmark build directory"
|
|
)
|
|
args = argparser.parse_args()
|
|
VERBOSE = args.verbose
|
|
|
|
return test_opt_levels(args)
|
|
|
|
|
|
def test_opt_levels(args):
|
|
output_file = None
|
|
if args.o:
|
|
output_file = open(args.o, "w")
|
|
|
|
changes = False
|
|
for opt_level in args.opt_levels or ["O", "Osize", "Onone"]:
|
|
log("####### Testing optimization level -" + opt_level + " #######")
|
|
if not args.skip_performance:
|
|
if test_performance(
|
|
opt_level,
|
|
args.oldbuilddir[0],
|
|
args.newbuilddir[0],
|
|
float(args.threshold) / 100,
|
|
args.num_samples,
|
|
args.num_reruns,
|
|
output_file,
|
|
):
|
|
changes = True
|
|
|
|
# There is no point in reporting code size for Onone.
|
|
if not args.skip_code_size and opt_level != "Onone":
|
|
if report_code_size(
|
|
opt_level,
|
|
args.oldbuilddir[0],
|
|
args.newbuilddir[0],
|
|
args.platform,
|
|
output_file,
|
|
):
|
|
changes = True
|
|
|
|
if not args.skip_code_size:
|
|
if report_code_size(
|
|
"swiftlibs",
|
|
args.oldbuilddir[0],
|
|
args.newbuilddir[0],
|
|
args.platform,
|
|
output_file,
|
|
):
|
|
changes = True
|
|
|
|
if not args.skip_check_added:
|
|
check_added(args, output_file)
|
|
|
|
if output_file:
|
|
if changes:
|
|
output_file.write(get_info_text())
|
|
else:
|
|
output_file.write("### No performance and code size changes")
|
|
output_file.close()
|
|
return 0
|
|
|
|
|
|
def measure(driver, tests, i):
|
|
"""Log and measure samples of the tests with the given driver.
|
|
|
|
Collect increasing number of samples, depending on the iteration.
|
|
"""
|
|
num_samples = min(i + 3, 10)
|
|
msg = " Iteration {0} for {1}: num samples = {2}, ".format(
|
|
i, driver.args.tests, num_samples
|
|
)
|
|
msg += (
|
|
"running all tests"
|
|
if driver.all_tests == tests
|
|
else "re-testing {0} tests".format(len(tests))
|
|
)
|
|
log(msg)
|
|
driver.tests = tests
|
|
return driver.run(num_samples=num_samples, sample_time=0.0025)
|
|
|
|
|
|
def merge(results, other_results):
|
|
""""Merge the other PreformanceTestResults into the first dictionary."""
|
|
for test, result in other_results.items():
|
|
results[test].merge(result)
|
|
return results
|
|
|
|
|
|
def test_performance(
|
|
opt_level, old_dir, new_dir, threshold, num_samples, num_reruns, output_file
|
|
):
|
|
"""Detect performance changes in benchmarks.
|
|
|
|
Start fast with few samples per benchmark and gradually spend more time
|
|
gathering more precise measurements of the change candidates.
|
|
"""
|
|
|
|
i, unchanged_length_count = 0, 0
|
|
old, new = [
|
|
BenchmarkDriver(DriverArgs(dir, optimization=opt_level))
|
|
for dir in [old_dir, new_dir]
|
|
]
|
|
results = [measure(driver, driver.tests, i) for driver in [old, new]]
|
|
tests = TestComparator(results[0], results[1], threshold)
|
|
changed = tests.decreased + tests.increased
|
|
|
|
while len(changed) > 0 and unchanged_length_count < num_reruns:
|
|
i += 1
|
|
if VERBOSE:
|
|
log(" test again: " + str([test.name for test in changed]))
|
|
results = [
|
|
merge(the_results, measure(driver, [test.name for test in changed], i))
|
|
for the_results, driver in zip(results, [old, new])
|
|
]
|
|
tests = TestComparator(results[0], results[1], threshold)
|
|
changed = tests.decreased + tests.increased
|
|
|
|
if len(old.tests) == len(changed):
|
|
unchanged_length_count += 1
|
|
else:
|
|
unchanged_length_count = 0
|
|
|
|
log("")
|
|
return report_results(
|
|
"Performance: -" + opt_level, None, None, threshold * 1.4, output_file, *results
|
|
)
|
|
|
|
|
|
def report_code_size(opt_level, old_dir, new_dir, platform, output_file):
|
|
if opt_level == "swiftlibs":
|
|
files = glob.glob(os.path.join(old_dir, "lib", "swift", platform, "*.dylib"))
|
|
else:
|
|
files = glob.glob(
|
|
os.path.join(old_dir, opt_level + "-*" + platform + "*", "*.o")
|
|
)
|
|
|
|
idx = 1
|
|
old_lines = ""
|
|
new_lines = ""
|
|
for oldfile in files:
|
|
newfile = oldfile.replace(old_dir, new_dir, 1)
|
|
if os.path.isfile(newfile):
|
|
oldsize = get_codesize(oldfile)
|
|
newsize = get_codesize(newfile)
|
|
bname = os.path.basename(oldfile)
|
|
|
|
def result_line(value):
|
|
v = "," + str(value)
|
|
return str(idx) + "," + bname + ",1" + (v * 3) + ",0" + v + "\n"
|
|
|
|
old_lines += result_line(oldsize)
|
|
new_lines += result_line(newsize)
|
|
idx += 1
|
|
|
|
return report_results(
|
|
"Code size: -" + opt_level, old_lines, new_lines, 0.01, output_file
|
|
)
|
|
|
|
|
|
def get_codesize(filename):
|
|
output = subprocess.check_output(["size", filename]).splitlines()
|
|
header_line = output[0]
|
|
data_line = output[1]
|
|
if header_line.find("__TEXT") != 0:
|
|
sys.exit("unexpected output from size command:\n" + output)
|
|
return int(data_line.split("\t")[0])
|
|
|
|
|
|
def report_results(
|
|
title,
|
|
old_lines,
|
|
new_lines,
|
|
threshold,
|
|
output_file,
|
|
old_results=None,
|
|
new_results=None,
|
|
):
|
|
old_results = old_results or LogParser.results_from_string(old_lines)
|
|
new_results = new_results or LogParser.results_from_string(new_lines)
|
|
|
|
print("------- " + title + " -------")
|
|
print(create_report(old_results, new_results, threshold, "git"))
|
|
|
|
if output_file:
|
|
report = create_report(old_results, new_results, threshold, "markdown")
|
|
if report != "":
|
|
output_file.write("### " + title + "\n")
|
|
output_file.write(report)
|
|
output_file.write("\n")
|
|
return True
|
|
return False
|
|
|
|
|
|
def get_info_text():
|
|
text = """
|
|
<details>
|
|
<summary><strong>How to read the data</strong></summary>
|
|
The tables contain differences in performance which are larger than 8% and
|
|
differences in code size which are larger than 1%.
|
|
|
|
If you see any unexpected regressions, you should consider fixing the
|
|
regressions before you merge the PR.
|
|
|
|
**Noise**: Sometimes the performance results (not code size!) contain false
|
|
alarms. Unexpected regressions which are marked with '(?)' are probably noise.
|
|
If you see regressions which you cannot explain you can try to run the
|
|
benchmarks again. If regressions still show up, please consult with the
|
|
performance team (@eeckstein).
|
|
</details>
|
|
|
|
<details>
|
|
<summary><strong>Hardware Overview</strong></summary>
|
|
|
|
"""
|
|
po = subprocess.check_output(["system_profiler", "SPHardwareDataType"])
|
|
for line in po.splitlines():
|
|
selection = [
|
|
"Model Name",
|
|
"Model Identifier",
|
|
"Processor Name",
|
|
"Processor Speed",
|
|
"Number of Processors",
|
|
"Total Number of Cores",
|
|
"L2 Cache",
|
|
"L3 Cache",
|
|
"Memory:",
|
|
]
|
|
|
|
if any(s in line for s in selection):
|
|
text += line + "\n"
|
|
|
|
text += """
|
|
</details>"""
|
|
return text
|
|
|
|
|
|
def check_added(args, output_file=None):
|
|
old = BenchmarkDriver(DriverArgs(args.oldbuilddir[0]))
|
|
new = BenchmarkDriver(DriverArgs(args.newbuilddir[0]))
|
|
added = set(new.tests).difference(set(old.tests))
|
|
new.tests = list(added)
|
|
doctor = BenchmarkDoctor(args, driver=new)
|
|
if added and output_file:
|
|
doctor.log.addHandler(MarkdownReportHandler(output_file))
|
|
doctor.check()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|