mirror of
https://github.com/apple/swift.git
synced 2025-12-14 20:36:38 +01:00
[Compare Perf] Add support for markdown and html
This commit is contained in:
@@ -13,221 +13,360 @@
|
||||
#
|
||||
# ===---------------------------------------------------------------------===//
|
||||
|
||||
# e.g.
|
||||
# repeat.sh 3 tot/bin/Benchmark_Driver run -o -O > tot.O.times
|
||||
# repeat.sh 3 mypatch/bin/Benchmark_Driver run -o -O > mypatch.O.times
|
||||
# compare_perf_tests.py tot.O.times mypatch.O.times | sort -t, -n -k 6 | \
|
||||
# column -s, -t
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import re
|
||||
import argparse
|
||||
import csv
|
||||
import sys
|
||||
|
||||
TESTNAME = 1
|
||||
SAMPLES = 2
|
||||
MIN = 3
|
||||
MAX = 4
|
||||
MEAN = 5
|
||||
SD = 6
|
||||
MEDIAN = 7
|
||||
|
||||
VERBOSE = 0
|
||||
HTML = """
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
||||
</head>
|
||||
<body>
|
||||
{0}
|
||||
</body>
|
||||
</html>"""
|
||||
|
||||
# #,TEST,SAMPLES,MIN(ms),MAX(ms),MEAN(ms),SD(ms),MEDIAN(ms)
|
||||
SCORERE = re.compile(
|
||||
r"(\d+),[ \t]*(\w+),[ \t]*([\d.]+),[ \t]*([\d.]+),[ \t]*([\d.]+)")
|
||||
TOTALRE = re.compile(
|
||||
r"()(Totals),[ \t]*([\d.]+),[ \t]*([\d.]+),[ \t]*([\d.]+)")
|
||||
NUMGROUP = 1
|
||||
KEYGROUP = 2
|
||||
BESTGROUP = 4
|
||||
WORSTGROUP = 5
|
||||
HTML_TABLE = """
|
||||
<table>
|
||||
<tr>
|
||||
<th align='left'>{0}</th>
|
||||
<th align='left'>{1}</th>
|
||||
<th align='left'>{2}</th>
|
||||
<th align='left'>{3}</th>
|
||||
<th align='left'>{4}</th>
|
||||
</tr>
|
||||
{5}
|
||||
</table>
|
||||
"""
|
||||
|
||||
IsTime = 1
|
||||
ShowSpeedup = 1
|
||||
PrintAllScores = 0
|
||||
HTML_ROW = """
|
||||
<tr>
|
||||
<td align='left'>{0}</td>
|
||||
<td align='left'>{1}</td>
|
||||
<td align='left'>{2}</td>
|
||||
<td align='left'>{3}</td>
|
||||
<td align='left'><font color='{4}'>{5}</font></td>
|
||||
</tr>
|
||||
"""
|
||||
|
||||
MARKDOWN_ROW = "{0} | {1} | {2} | {3} | {4} \n"
|
||||
HEADER_SPLIT = "---"
|
||||
MARKDOWN_DETAIL = """
|
||||
<details {3}>
|
||||
<summary>{0} ({1})</summary>
|
||||
{2}
|
||||
</details>
|
||||
"""
|
||||
|
||||
PAIN_FORMAT = """
|
||||
Regression: {0}
|
||||
Improvement: {1}
|
||||
No Changes: {2}
|
||||
"""
|
||||
|
||||
|
||||
def parse_int(word):
|
||||
try:
|
||||
return int(word)
|
||||
except ValueError:
|
||||
raise Exception("Expected integer value, not " + word)
|
||||
def main():
|
||||
global RATIO_MIN
|
||||
global RATIO_MAX
|
||||
|
||||
old_results = {}
|
||||
new_results = {}
|
||||
old_max_results = {}
|
||||
new_max_results = {}
|
||||
ratio_list = {}
|
||||
delta_list = {}
|
||||
unknown_list = {}
|
||||
complete_perf_list = []
|
||||
increased_perf_list = []
|
||||
decreased_perf_list = []
|
||||
normal_perf_list = []
|
||||
|
||||
def get_scores(fname):
|
||||
scores = {}
|
||||
worstscores = {}
|
||||
nums = {}
|
||||
runs = 0
|
||||
f = open(fname)
|
||||
try:
|
||||
for line in f:
|
||||
if VERBOSE:
|
||||
print("Parsing", line, end="")
|
||||
m = SCORERE.match(line)
|
||||
is_total = False
|
||||
if not m:
|
||||
is_total = True
|
||||
m = TOTALRE.match(line)
|
||||
if not m:
|
||||
continue
|
||||
parser = argparse.ArgumentParser(description="Compare Performance tests.")
|
||||
parser.add_argument('--old-file',
|
||||
help='Baseline performance test suite (csv file)',
|
||||
required=True)
|
||||
parser.add_argument('--new-file',
|
||||
help='New performance test suite (csv file)',
|
||||
required=True)
|
||||
parser.add_argument('--format',
|
||||
help='Supported format git, html and markdown',
|
||||
default="markdown")
|
||||
parser.add_argument('--output', help='Output file name')
|
||||
parser.add_argument('--changes-only',
|
||||
help='Output only affected tests', action='store_true')
|
||||
parser.add_argument('--new-branch',
|
||||
help='Name of the new branch', default="NEW_MIN")
|
||||
parser.add_argument('--old-branch',
|
||||
help='Name of the old branch', default="OLD_MIN")
|
||||
parser.add_argument('--delta-threshold',
|
||||
help='delta threshold', default="0.05")
|
||||
|
||||
if VERBOSE:
|
||||
print(" match", m.group(KEYGROUP), m.group(BESTGROUP))
|
||||
args = parser.parse_args()
|
||||
|
||||
if not m.group(KEYGROUP) in scores:
|
||||
scores[m.group(KEYGROUP)] = []
|
||||
worstscores[m.group(KEYGROUP)] = []
|
||||
scores[m.group(KEYGROUP)].append(parse_int(m.group(BESTGROUP)))
|
||||
worstscores[m.group(KEYGROUP)].append(
|
||||
parse_int(m.group(WORSTGROUP)))
|
||||
if is_total:
|
||||
nums[m.group(KEYGROUP)] = ""
|
||||
old_file = args.old_file
|
||||
new_file = args.new_file
|
||||
|
||||
new_branch = args.new_branch
|
||||
old_branch = args.old_branch
|
||||
|
||||
old_data = csv.reader(open(old_file))
|
||||
new_data = csv.reader(open(new_file))
|
||||
|
||||
RATIO_MIN = 1 - float(args.delta_threshold)
|
||||
RATIO_MAX = 1 + float(args.delta_threshold)
|
||||
|
||||
for row in old_data:
|
||||
if (len(row) > 7 and row[MIN].isdigit()):
|
||||
if row[TESTNAME] in old_results:
|
||||
if old_results[row[TESTNAME]] > row[MIN]:
|
||||
old_results[row[TESTNAME]] = int(row[MIN])
|
||||
if old_max_results[row[TESTNAME]] < row[MAX]:
|
||||
old_max_results[row[TESTNAME]] = int(row[MAX])
|
||||
else:
|
||||
nums[m.group(KEYGROUP)] = m.group(NUMGROUP)
|
||||
if len(scores[m.group(KEYGROUP)]) > runs:
|
||||
runs = len(scores[m.group(KEYGROUP)])
|
||||
finally:
|
||||
f.close()
|
||||
return scores, worstscores, runs, nums
|
||||
old_results[row[TESTNAME]] = int(row[MIN])
|
||||
old_max_results[row[TESTNAME]] = int(row[MAX])
|
||||
|
||||
for row in new_data:
|
||||
if (len(row) > 7 and row[MIN].isdigit()):
|
||||
if row[TESTNAME] in new_results:
|
||||
if new_results[row[TESTNAME]] > row[MIN]:
|
||||
new_results[row[TESTNAME]] = int(row[MIN])
|
||||
if new_max_results[row[TESTNAME]] < row[MAX]:
|
||||
new_max_results[row[TESTNAME]] = int(row[MAX])
|
||||
new_results[row[TESTNAME]] = int(row[MIN])
|
||||
new_max_results[row[TESTNAME]] = int(row[MAX])
|
||||
|
||||
def is_max_score(newscore, maxscore, invert):
|
||||
return not maxscore or \
|
||||
(newscore > maxscore if not invert else newscore < maxscore)
|
||||
ratio_total = 0
|
||||
for key in new_results.keys():
|
||||
ratio = (old_results[key]+0.001)/(new_results[key]+0.001)
|
||||
ratio_list[key] = round(ratio, 2)
|
||||
ratio_total *= ratio
|
||||
delta = (((float(new_results[key]+0.001) /
|
||||
(old_results[key]+0.001)) - 1) * 100)
|
||||
delta_list[key] = round(delta, 2)
|
||||
if ((old_results[key] < new_results[key] and
|
||||
new_results[key] < old_max_results[key]) or
|
||||
(new_results[key] < old_results[key] and
|
||||
old_results[key] < new_max_results[key])):
|
||||
unknown_list[key] = "(?)"
|
||||
else:
|
||||
unknown_list[key] = ""
|
||||
|
||||
(complete_perf_list,
|
||||
increased_perf_list,
|
||||
decreased_perf_list,
|
||||
normal_perf_list) = sort_ratio_list(ratio_list, args.changes_only)
|
||||
|
||||
def compare_scores(key, score1, worstsample1, score2, worstsample2, runs, num):
|
||||
print(num.rjust(3), end=" ")
|
||||
print(key.ljust(25), end="")
|
||||
bestscore1 = None
|
||||
bestscore2 = None
|
||||
worstscore1 = None
|
||||
worstscore2 = None
|
||||
minbest = IsTime
|
||||
minworst = not minbest
|
||||
r = 0
|
||||
for score in score1:
|
||||
if is_max_score(newscore=score, maxscore=bestscore1, invert=minbest):
|
||||
bestscore1 = score
|
||||
if is_max_score(newscore=score, maxscore=worstscore1, invert=minworst):
|
||||
worstscore1 = score
|
||||
if PrintAllScores:
|
||||
print (("%d" % score).rjust(16), end="")
|
||||
for score in worstsample1:
|
||||
if is_max_score(newscore=score, maxscore=worstscore1, invert=minworst):
|
||||
worstscore1 = score
|
||||
for score in score2:
|
||||
if is_max_score(newscore=score, maxscore=bestscore2, invert=minbest):
|
||||
bestscore2 = score
|
||||
if is_max_score(newscore=score, maxscore=worstscore2, invert=minworst):
|
||||
worstscore2 = score
|
||||
if PrintAllScores:
|
||||
print (("%d" % score).rjust(16), end="")
|
||||
r += 1
|
||||
for score in worstsample2:
|
||||
if is_max_score(newscore=score, maxscore=worstscore2, invert=minworst):
|
||||
worstscore2 = score
|
||||
while r < runs:
|
||||
if PrintAllScores:
|
||||
print ("0".rjust(9), end="")
|
||||
r += 1
|
||||
"""
|
||||
Create markdown formated table
|
||||
"""
|
||||
test_name_width = max_width(ratio_list, title='TEST', key_len=True)
|
||||
new_time_width = max_width(new_results, title=new_branch)
|
||||
old_time_width = max_width(old_results, title=old_branch)
|
||||
delta_width = max_width(delta_list, title='DELTA (%)')
|
||||
|
||||
if not PrintAllScores:
|
||||
print (("%d" % bestscore1).rjust(16), end="")
|
||||
print (("%d" % bestscore2).rjust(16), end="")
|
||||
markdown_table_header = "\n" + MARKDOWN_ROW.format(
|
||||
"TEST".ljust(test_name_width),
|
||||
old_branch.ljust(old_time_width),
|
||||
new_branch.ljust(new_time_width),
|
||||
"DELTA (%)".ljust(delta_width),
|
||||
"SPEEDUP".ljust(2))
|
||||
markdown_table_header += MARKDOWN_ROW.format(
|
||||
HEADER_SPLIT.ljust(test_name_width),
|
||||
HEADER_SPLIT.ljust(old_time_width),
|
||||
HEADER_SPLIT.ljust(new_time_width),
|
||||
HEADER_SPLIT.ljust(delta_width),
|
||||
HEADER_SPLIT.ljust(2))
|
||||
markdown_regression = ""
|
||||
for i, key in enumerate(decreased_perf_list):
|
||||
ratio = "{0:.2f}x".format(ratio_list[key])
|
||||
if i == 0:
|
||||
markdown_regression = markdown_table_header
|
||||
markdown_regression += MARKDOWN_ROW.format(
|
||||
key.ljust(test_name_width),
|
||||
str(old_results[key]).ljust(old_time_width),
|
||||
str(new_results[key]).ljust(new_time_width),
|
||||
("{0:+.1f}%".format(delta_list[key])).ljust(delta_width),
|
||||
"**{0}{1}**".format(str(ratio).ljust(2), unknown_list[key]))
|
||||
|
||||
print (("%+d" % (bestscore2 - bestscore1)).rjust(9), end="")
|
||||
markdown_improvement = ""
|
||||
for i, key in enumerate(increased_perf_list):
|
||||
ratio = "{0:.2f}x".format(ratio_list[key])
|
||||
if i == 0:
|
||||
markdown_improvement = markdown_table_header
|
||||
markdown_improvement += MARKDOWN_ROW.format(
|
||||
key.ljust(test_name_width),
|
||||
str(old_results[key]).ljust(old_time_width),
|
||||
str(new_results[key]).ljust(new_time_width),
|
||||
("{0:+.1f}%".format(delta_list[key])).ljust(delta_width),
|
||||
"**{0}{1}**".format(str(ratio).ljust(2), unknown_list[key]))
|
||||
|
||||
if bestscore1 != 0 and bestscore2 != 0:
|
||||
print(("%+.1f%%" %
|
||||
(((float(bestscore2) / bestscore1) - 1) * 100)).rjust(9),
|
||||
end="")
|
||||
if ShowSpeedup:
|
||||
Num, Den = float(bestscore2), float(bestscore1)
|
||||
if IsTime:
|
||||
Num, Den = Den, Num
|
||||
print (("%.2fx" % (Num / Den)).rjust(9), end="")
|
||||
else:
|
||||
print("*".rjust(9), end="")
|
||||
if ShowSpeedup:
|
||||
print("*".rjust(9), end="")
|
||||
# check if the worst->best interval for each configuration overlap.
|
||||
if minbest:
|
||||
if (bestscore1 < bestscore2 and bestscore2 < worstscore1) \
|
||||
or (bestscore2 < bestscore1 and bestscore1 < worstscore2):
|
||||
print("(?)", end="")
|
||||
else:
|
||||
if (worstscore1 < worstscore2 and worstscore2 < bestscore1) \
|
||||
or (worstscore2 < worstscore1 and worstscore1 < bestscore2):
|
||||
print("(?)", end="")
|
||||
print()
|
||||
markdown_normal = ""
|
||||
for i, key in enumerate(normal_perf_list):
|
||||
ratio = "{0:.2f}x".format(ratio_list[key])
|
||||
if i == 0:
|
||||
markdown_normal = markdown_table_header
|
||||
markdown_normal += MARKDOWN_ROW.format(
|
||||
key.ljust(test_name_width),
|
||||
str(old_results[key]).ljust(old_time_width),
|
||||
str(new_results[key]).ljust(new_time_width),
|
||||
("{0:+.1f}%".format(delta_list[key])).ljust(delta_width),
|
||||
"{0}{1}".format(str(ratio).ljust(2), unknown_list[key]))
|
||||
|
||||
markdown_data = MARKDOWN_DETAIL.format("Regression",
|
||||
len(decreased_perf_list),
|
||||
markdown_regression, "open")
|
||||
markdown_data += MARKDOWN_DETAIL.format("Improvement",
|
||||
len(increased_perf_list),
|
||||
markdown_improvement, "")
|
||||
markdown_data += MARKDOWN_DETAIL.format("No Changes",
|
||||
len(normal_perf_list),
|
||||
markdown_normal, "")
|
||||
|
||||
def print_best_scores(key, scores):
|
||||
print(key, end="")
|
||||
bestscore = None
|
||||
minbest = IsTime
|
||||
for score in scores:
|
||||
if is_max_score(newscore=score, maxscore=bestscore, invert=minbest):
|
||||
bestscore = score
|
||||
print(", %d" % bestscore)
|
||||
|
||||
|
||||
def usage():
|
||||
print("repeat.sh <n> Benchmark_O[none|unchecked] > file.times")
|
||||
print("compare_perf_tests.py <file.times> [<file2.times>]")
|
||||
|
||||
if __name__ == '__main__':
|
||||
if len(sys.argv) < 2:
|
||||
usage()
|
||||
sys.exit(1)
|
||||
file1 = sys.argv[1]
|
||||
if len(sys.argv) < 3:
|
||||
scores, worstscores, runs, nums = get_scores(file1)
|
||||
keys = list(set(scores.keys()))
|
||||
keys.sort()
|
||||
for key in keys:
|
||||
print_best_scores(key, scores[key])
|
||||
sys.exit(0)
|
||||
|
||||
file2 = sys.argv[2]
|
||||
if len(sys.argv) > 3:
|
||||
SCORERE = re.compile(sys.argv[3])
|
||||
|
||||
scores1, worstscores1, runs1, nums = get_scores(file1)
|
||||
scores2, worstscores2, runs2, nums = get_scores(file2)
|
||||
|
||||
runs = runs1
|
||||
if runs2 > runs:
|
||||
runs = runs2
|
||||
|
||||
if VERBOSE:
|
||||
print(scores1)
|
||||
print(scores2)
|
||||
|
||||
keys = list(set(scores1.keys() + scores2.keys()))
|
||||
keys.sort()
|
||||
if VERBOSE:
|
||||
print("comparing ", file1, "vs", file2, "=", end="")
|
||||
if IsTime:
|
||||
print(file1, "/", file2)
|
||||
if args.format:
|
||||
if args.format.lower() != "markdown":
|
||||
pain_data = PAIN_FORMAT.format(markdown_regression,
|
||||
markdown_improvement,
|
||||
markdown_normal)
|
||||
print(pain_data.replace("|", " ").replace("-", " "))
|
||||
else:
|
||||
print(file2, "/", file1)
|
||||
print(markdown_data)
|
||||
|
||||
print("#".rjust(3), end=" ")
|
||||
print("TEST".ljust(25), end="")
|
||||
if PrintAllScores:
|
||||
for i in range(0, runs):
|
||||
print(("OLD_RUN%d" % i).rjust(9), end="")
|
||||
for i in range(0, runs):
|
||||
print(("NEW_RUN%d" % i).rjust(9), end="")
|
||||
if args.format:
|
||||
if args.format.lower() == "html":
|
||||
"""
|
||||
Create HTML formated table
|
||||
"""
|
||||
html_data = convert_to_html(ratio_list, old_results, new_results,
|
||||
delta_list, unknown_list, old_branch,
|
||||
new_branch, args.changes_only)
|
||||
|
||||
if args.output:
|
||||
write_to_file(args.output, html_data)
|
||||
else:
|
||||
print("Error: missing --output flag.")
|
||||
exit(1)
|
||||
elif args.format.lower() == "markdown" and args.output:
|
||||
write_to_file(args.output, markdown_data)
|
||||
elif args.format.lower() != "markdown":
|
||||
print("{0} is unknown format.".format(args.format))
|
||||
exit(1)
|
||||
|
||||
|
||||
def convert_to_html(ratio_list, old_results, new_results, delta_list,
|
||||
unknown_list, old_branch, new_branch, changes_only):
|
||||
(complete_perf_list,
|
||||
increased_perf_list,
|
||||
decreased_perf_list,
|
||||
normal_perf_list) = sort_ratio_list(ratio_list, changes_only)
|
||||
|
||||
html_rows = ""
|
||||
for key in complete_perf_list:
|
||||
if ratio_list[key] < RATIO_MIN:
|
||||
color = "red"
|
||||
elif ratio_list[key] > RATIO_MAX:
|
||||
color = "green"
|
||||
else:
|
||||
color = "black"
|
||||
if len(decreased_perf_list) > 0 and key == decreased_perf_list[0]:
|
||||
html_rows += HTML_ROW.format(
|
||||
"<strong>Regression:</strong>",
|
||||
"", "", "", "black", "", "")
|
||||
if len(increased_perf_list) > 0 and key == increased_perf_list[0]:
|
||||
html_rows += HTML_ROW.format(
|
||||
"<strong>Improvement:</strong>",
|
||||
"", "", "", "black", "", "")
|
||||
if len(normal_perf_list) > 0 and key == normal_perf_list[0]:
|
||||
html_rows += HTML_ROW.format(
|
||||
"<strong>No Changes:</strong>",
|
||||
"", "", "", "black", "", "")
|
||||
|
||||
html_rows += HTML_ROW.format(key, old_results[key],
|
||||
new_results[key],
|
||||
"{0:+.1f}%".format(delta_list[key]),
|
||||
color,
|
||||
"{0:.2f}x {1}".format(ratio_list[key],
|
||||
unknown_list[key]))
|
||||
|
||||
html_table = HTML_TABLE.format("TEST", old_branch, new_branch,
|
||||
"DELTA (%)", "SPEEDUP", html_rows)
|
||||
html_data = HTML.format(html_table)
|
||||
return html_data
|
||||
|
||||
|
||||
def write_to_file(file_name, data):
|
||||
"""
|
||||
Write data to given file
|
||||
"""
|
||||
file = open(file_name, "w")
|
||||
file.write(data)
|
||||
file.close
|
||||
|
||||
|
||||
def sort_ratio_list(ratio_list, changes_only=False):
|
||||
"""
|
||||
Return 3 sorted list imporvment, regression and normal.
|
||||
"""
|
||||
decreased_perf_list = []
|
||||
increased_perf_list = []
|
||||
sorted_normal_perf_list = []
|
||||
normal_perf_list = {}
|
||||
|
||||
for key, v in sorted(ratio_list.items(), key=lambda x: x[1]):
|
||||
if ratio_list[key] < RATIO_MIN:
|
||||
decreased_perf_list.append(key)
|
||||
elif ratio_list[key] > RATIO_MAX:
|
||||
increased_perf_list.append(key)
|
||||
else:
|
||||
normal_perf_list[key] = v
|
||||
|
||||
for key, v in sorted(normal_perf_list.items(), key=lambda x: x[1],
|
||||
reverse=True):
|
||||
sorted_normal_perf_list.append(key)
|
||||
|
||||
if changes_only:
|
||||
complete_perf_list = decreased_perf_list + increased_perf_list
|
||||
else:
|
||||
print("BEST_OLD_MIN(μs)".rjust(17), end=" ")
|
||||
print("BEST_NEW_MIN(μs)".rjust(17), end=" ")
|
||||
print('DELTA'.rjust(9), '%DELTA'.rjust(9), 'SPEEDUP'.rjust(9))
|
||||
complete_perf_list = (decreased_perf_list + increased_perf_list +
|
||||
sorted_normal_perf_list)
|
||||
|
||||
for key in keys:
|
||||
if key not in scores1:
|
||||
print(key, "not in", file1)
|
||||
continue
|
||||
if key not in scores2:
|
||||
print(key, "not in", file2)
|
||||
continue
|
||||
compare_scores(key, scores1[key], worstscores1[key], scores2[key],
|
||||
worstscores2[key], runs, nums[key])
|
||||
return (complete_perf_list, increased_perf_list,
|
||||
decreased_perf_list, sorted_normal_perf_list)
|
||||
|
||||
|
||||
def nthroot(y, n):
|
||||
x, xp = 1, -1
|
||||
while abs(x - xp) > 1:
|
||||
xp, x = x, x - x/n + y/(n * x**(n-1))
|
||||
while x**n > y:
|
||||
x -= 1
|
||||
return x
|
||||
|
||||
|
||||
def max_width(items, title, key_len=False):
|
||||
"""
|
||||
Returns the max length of string in the list
|
||||
"""
|
||||
width = len(str(title))
|
||||
for key in items.keys():
|
||||
if key_len:
|
||||
if width < len(str(key)):
|
||||
width = len(str(key))
|
||||
else:
|
||||
if width < len(str(items[key])):
|
||||
width = len(str(items[key]))
|
||||
return width
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
|
||||
Reference in New Issue
Block a user