[Compare Perf] Add support for markdown and html

This commit is contained in:
Mishal Shah
2016-03-14 14:11:01 -07:00
parent defe364a59
commit cb23837bb9
2 changed files with 349 additions and 201 deletions

View File

@@ -255,10 +255,13 @@ def format_name(log_path):
return '/'.join(log_path.split('/')[-2:]) return '/'.join(log_path.split('/')[-2:])
def compare_logs(compare_script, new_log, old_log): def compare_logs(compare_script, new_log, old_log, log_dir, opt):
"""Return diff of log files at paths `new_log` and `old_log`""" """Return diff of log files at paths `new_log` and `old_log`"""
print('Comparing %s %s ...' % (format_name(old_log), format_name(new_log))) print('Comparing %s %s ...' % (format_name(old_log), format_name(new_log)))
subprocess.call([compare_script, old_log, new_log]) subprocess.call([compare_script, '--old-file', old_log,
'--new-file', new_log, '--format', 'markdown',
'--output', os.path.join(log_dir, 'latest_compare_{0}.md'
.format(opt))])
def compare(args): def compare(args):
@@ -288,10 +291,12 @@ def compare(args):
len(recent_logs['master_Onone']) > 1: len(recent_logs['master_Onone']) > 1:
compare_logs(compare_script, compare_logs(compare_script,
recent_logs['master_O'][0], recent_logs['master_O'][0],
recent_logs['master_O'][1]) recent_logs['master_O'][1],
log_dir, 'O')
compare_logs(compare_script, compare_logs(compare_script,
recent_logs['master_Onone'][0], recent_logs['master_Onone'][0],
recent_logs['master_Onone'][1]) recent_logs['master_Onone'][1],
log_dir, 'Onone')
else: else:
print('master/master comparison skipped: no previous master logs') print('master/master comparison skipped: no previous master logs')
else: else:
@@ -307,10 +312,12 @@ def compare(args):
else: else:
compare_logs(compare_script, compare_logs(compare_script,
recent_logs[current_branch + '_O'][0], recent_logs[current_branch + '_O'][0],
recent_logs[current_branch + '_O'][1]) recent_logs[current_branch + '_O'][1],
log_dir, 'O')
compare_logs(compare_script, compare_logs(compare_script,
recent_logs[current_branch + '_Onone'][0], recent_logs[current_branch + '_Onone'][0],
recent_logs[current_branch + '_Onone'][1]) recent_logs[current_branch + '_Onone'][1],
log_dir, 'Onone')
if len(recent_logs['master_O']) == 0 or \ if len(recent_logs['master_O']) == 0 or \
len(recent_logs['master_Onone']) == 0: len(recent_logs['master_Onone']) == 0:
@@ -319,10 +326,12 @@ def compare(args):
else: else:
compare_logs(compare_script, compare_logs(compare_script,
recent_logs[current_branch + '_O'][0], recent_logs[current_branch + '_O'][0],
recent_logs['master_O'][0]) recent_logs['master_O'][0],
log_dir, 'O')
compare_logs(compare_script, compare_logs(compare_script,
recent_logs[current_branch + '_Onone'][0], recent_logs[current_branch + '_Onone'][0],
recent_logs['master_Onone'][0]) recent_logs['master_Onone'][0],
log_dir, 'Onone')
# TODO: Fail on large regressions # TODO: Fail on large regressions

View File

@@ -13,221 +13,360 @@
# #
# ===---------------------------------------------------------------------===// # ===---------------------------------------------------------------------===//
# e.g. import argparse
# repeat.sh 3 tot/bin/Benchmark_Driver run -o -O > tot.O.times import csv
# repeat.sh 3 mypatch/bin/Benchmark_Driver run -o -O > mypatch.O.times
# compare_perf_tests.py tot.O.times mypatch.O.times | sort -t, -n -k 6 | \
# column -s, -t
from __future__ import print_function
import re
import sys import sys
TESTNAME = 1
SAMPLES = 2
MIN = 3
MAX = 4
MEAN = 5
SD = 6
MEDIAN = 7
VERBOSE = 0 HTML = """
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
</head>
<body>
{0}
</body>
</html>"""
# #,TEST,SAMPLES,MIN(ms),MAX(ms),MEAN(ms),SD(ms),MEDIAN(ms) HTML_TABLE = """
SCORERE = re.compile( <table>
r"(\d+),[ \t]*(\w+),[ \t]*([\d.]+),[ \t]*([\d.]+),[ \t]*([\d.]+)") <tr>
TOTALRE = re.compile( <th align='left'>{0}</th>
r"()(Totals),[ \t]*([\d.]+),[ \t]*([\d.]+),[ \t]*([\d.]+)") <th align='left'>{1}</th>
NUMGROUP = 1 <th align='left'>{2}</th>
KEYGROUP = 2 <th align='left'>{3}</th>
BESTGROUP = 4 <th align='left'>{4}</th>
WORSTGROUP = 5 </tr>
{5}
</table>
"""
IsTime = 1 HTML_ROW = """
ShowSpeedup = 1 <tr>
PrintAllScores = 0 <td align='left'>{0}</td>
<td align='left'>{1}</td>
<td align='left'>{2}</td>
<td align='left'>{3}</td>
<td align='left'><font color='{4}'>{5}</font></td>
</tr>
"""
MARKDOWN_ROW = "{0} | {1} | {2} | {3} | {4} \n"
HEADER_SPLIT = "---"
MARKDOWN_DETAIL = """
<details {3}>
<summary>{0} ({1})</summary>
{2}
</details>
"""
PAIN_FORMAT = """
Regression: {0}
Improvement: {1}
No Changes: {2}
"""
def parse_int(word): def main():
try: global RATIO_MIN
return int(word) global RATIO_MAX
except ValueError:
raise Exception("Expected integer value, not " + word)
old_results = {}
new_results = {}
old_max_results = {}
new_max_results = {}
ratio_list = {}
delta_list = {}
unknown_list = {}
complete_perf_list = []
increased_perf_list = []
decreased_perf_list = []
normal_perf_list = []
def get_scores(fname): parser = argparse.ArgumentParser(description="Compare Performance tests.")
scores = {} parser.add_argument('--old-file',
worstscores = {} help='Baseline performance test suite (csv file)',
nums = {} required=True)
runs = 0 parser.add_argument('--new-file',
f = open(fname) help='New performance test suite (csv file)',
try: required=True)
for line in f: parser.add_argument('--format',
if VERBOSE: help='Supported format git, html and markdown',
print("Parsing", line, end="") default="markdown")
m = SCORERE.match(line) parser.add_argument('--output', help='Output file name')
is_total = False parser.add_argument('--changes-only',
if not m: help='Output only affected tests', action='store_true')
is_total = True parser.add_argument('--new-branch',
m = TOTALRE.match(line) help='Name of the new branch', default="NEW_MIN")
if not m: parser.add_argument('--old-branch',
continue help='Name of the old branch', default="OLD_MIN")
parser.add_argument('--delta-threshold',
help='delta threshold', default="0.05")
if VERBOSE: args = parser.parse_args()
print(" match", m.group(KEYGROUP), m.group(BESTGROUP))
if not m.group(KEYGROUP) in scores: old_file = args.old_file
scores[m.group(KEYGROUP)] = [] new_file = args.new_file
worstscores[m.group(KEYGROUP)] = []
scores[m.group(KEYGROUP)].append(parse_int(m.group(BESTGROUP))) new_branch = args.new_branch
worstscores[m.group(KEYGROUP)].append( old_branch = args.old_branch
parse_int(m.group(WORSTGROUP)))
if is_total: old_data = csv.reader(open(old_file))
nums[m.group(KEYGROUP)] = "" new_data = csv.reader(open(new_file))
RATIO_MIN = 1 - float(args.delta_threshold)
RATIO_MAX = 1 + float(args.delta_threshold)
for row in old_data:
if (len(row) > 7 and row[MIN].isdigit()):
if row[TESTNAME] in old_results:
if old_results[row[TESTNAME]] > row[MIN]:
old_results[row[TESTNAME]] = int(row[MIN])
if old_max_results[row[TESTNAME]] < row[MAX]:
old_max_results[row[TESTNAME]] = int(row[MAX])
else: else:
nums[m.group(KEYGROUP)] = m.group(NUMGROUP) old_results[row[TESTNAME]] = int(row[MIN])
if len(scores[m.group(KEYGROUP)]) > runs: old_max_results[row[TESTNAME]] = int(row[MAX])
runs = len(scores[m.group(KEYGROUP)])
finally:
f.close()
return scores, worstscores, runs, nums
for row in new_data:
if (len(row) > 7 and row[MIN].isdigit()):
if row[TESTNAME] in new_results:
if new_results[row[TESTNAME]] > row[MIN]:
new_results[row[TESTNAME]] = int(row[MIN])
if new_max_results[row[TESTNAME]] < row[MAX]:
new_max_results[row[TESTNAME]] = int(row[MAX])
new_results[row[TESTNAME]] = int(row[MIN])
new_max_results[row[TESTNAME]] = int(row[MAX])
def is_max_score(newscore, maxscore, invert): ratio_total = 0
return not maxscore or \ for key in new_results.keys():
(newscore > maxscore if not invert else newscore < maxscore) ratio = (old_results[key]+0.001)/(new_results[key]+0.001)
ratio_list[key] = round(ratio, 2)
ratio_total *= ratio
delta = (((float(new_results[key]+0.001) /
(old_results[key]+0.001)) - 1) * 100)
delta_list[key] = round(delta, 2)
if ((old_results[key] < new_results[key] and
new_results[key] < old_max_results[key]) or
(new_results[key] < old_results[key] and
old_results[key] < new_max_results[key])):
unknown_list[key] = "(?)"
else:
unknown_list[key] = ""
(complete_perf_list,
increased_perf_list,
decreased_perf_list,
normal_perf_list) = sort_ratio_list(ratio_list, args.changes_only)
def compare_scores(key, score1, worstsample1, score2, worstsample2, runs, num): """
print(num.rjust(3), end=" ") Create markdown formated table
print(key.ljust(25), end="") """
bestscore1 = None test_name_width = max_width(ratio_list, title='TEST', key_len=True)
bestscore2 = None new_time_width = max_width(new_results, title=new_branch)
worstscore1 = None old_time_width = max_width(old_results, title=old_branch)
worstscore2 = None delta_width = max_width(delta_list, title='DELTA (%)')
minbest = IsTime
minworst = not minbest
r = 0
for score in score1:
if is_max_score(newscore=score, maxscore=bestscore1, invert=minbest):
bestscore1 = score
if is_max_score(newscore=score, maxscore=worstscore1, invert=minworst):
worstscore1 = score
if PrintAllScores:
print (("%d" % score).rjust(16), end="")
for score in worstsample1:
if is_max_score(newscore=score, maxscore=worstscore1, invert=minworst):
worstscore1 = score
for score in score2:
if is_max_score(newscore=score, maxscore=bestscore2, invert=minbest):
bestscore2 = score
if is_max_score(newscore=score, maxscore=worstscore2, invert=minworst):
worstscore2 = score
if PrintAllScores:
print (("%d" % score).rjust(16), end="")
r += 1
for score in worstsample2:
if is_max_score(newscore=score, maxscore=worstscore2, invert=minworst):
worstscore2 = score
while r < runs:
if PrintAllScores:
print ("0".rjust(9), end="")
r += 1
if not PrintAllScores: markdown_table_header = "\n" + MARKDOWN_ROW.format(
print (("%d" % bestscore1).rjust(16), end="") "TEST".ljust(test_name_width),
print (("%d" % bestscore2).rjust(16), end="") old_branch.ljust(old_time_width),
new_branch.ljust(new_time_width),
"DELTA (%)".ljust(delta_width),
"SPEEDUP".ljust(2))
markdown_table_header += MARKDOWN_ROW.format(
HEADER_SPLIT.ljust(test_name_width),
HEADER_SPLIT.ljust(old_time_width),
HEADER_SPLIT.ljust(new_time_width),
HEADER_SPLIT.ljust(delta_width),
HEADER_SPLIT.ljust(2))
markdown_regression = ""
for i, key in enumerate(decreased_perf_list):
ratio = "{0:.2f}x".format(ratio_list[key])
if i == 0:
markdown_regression = markdown_table_header
markdown_regression += MARKDOWN_ROW.format(
key.ljust(test_name_width),
str(old_results[key]).ljust(old_time_width),
str(new_results[key]).ljust(new_time_width),
("{0:+.1f}%".format(delta_list[key])).ljust(delta_width),
"**{0}{1}**".format(str(ratio).ljust(2), unknown_list[key]))
print (("%+d" % (bestscore2 - bestscore1)).rjust(9), end="") markdown_improvement = ""
for i, key in enumerate(increased_perf_list):
ratio = "{0:.2f}x".format(ratio_list[key])
if i == 0:
markdown_improvement = markdown_table_header
markdown_improvement += MARKDOWN_ROW.format(
key.ljust(test_name_width),
str(old_results[key]).ljust(old_time_width),
str(new_results[key]).ljust(new_time_width),
("{0:+.1f}%".format(delta_list[key])).ljust(delta_width),
"**{0}{1}**".format(str(ratio).ljust(2), unknown_list[key]))
if bestscore1 != 0 and bestscore2 != 0: markdown_normal = ""
print(("%+.1f%%" % for i, key in enumerate(normal_perf_list):
(((float(bestscore2) / bestscore1) - 1) * 100)).rjust(9), ratio = "{0:.2f}x".format(ratio_list[key])
end="") if i == 0:
if ShowSpeedup: markdown_normal = markdown_table_header
Num, Den = float(bestscore2), float(bestscore1) markdown_normal += MARKDOWN_ROW.format(
if IsTime: key.ljust(test_name_width),
Num, Den = Den, Num str(old_results[key]).ljust(old_time_width),
print (("%.2fx" % (Num / Den)).rjust(9), end="") str(new_results[key]).ljust(new_time_width),
else: ("{0:+.1f}%".format(delta_list[key])).ljust(delta_width),
print("*".rjust(9), end="") "{0}{1}".format(str(ratio).ljust(2), unknown_list[key]))
if ShowSpeedup:
print("*".rjust(9), end="")
# check if the worst->best interval for each configuration overlap.
if minbest:
if (bestscore1 < bestscore2 and bestscore2 < worstscore1) \
or (bestscore2 < bestscore1 and bestscore1 < worstscore2):
print("(?)", end="")
else:
if (worstscore1 < worstscore2 and worstscore2 < bestscore1) \
or (worstscore2 < worstscore1 and worstscore1 < bestscore2):
print("(?)", end="")
print()
markdown_data = MARKDOWN_DETAIL.format("Regression",
len(decreased_perf_list),
markdown_regression, "open")
markdown_data += MARKDOWN_DETAIL.format("Improvement",
len(increased_perf_list),
markdown_improvement, "")
markdown_data += MARKDOWN_DETAIL.format("No Changes",
len(normal_perf_list),
markdown_normal, "")
def print_best_scores(key, scores): if args.format:
print(key, end="") if args.format.lower() != "markdown":
bestscore = None pain_data = PAIN_FORMAT.format(markdown_regression,
minbest = IsTime markdown_improvement,
for score in scores: markdown_normal)
if is_max_score(newscore=score, maxscore=bestscore, invert=minbest): print(pain_data.replace("|", " ").replace("-", " "))
bestscore = score
print(", %d" % bestscore)
def usage():
print("repeat.sh <n> Benchmark_O[none|unchecked] > file.times")
print("compare_perf_tests.py <file.times> [<file2.times>]")
if __name__ == '__main__':
if len(sys.argv) < 2:
usage()
sys.exit(1)
file1 = sys.argv[1]
if len(sys.argv) < 3:
scores, worstscores, runs, nums = get_scores(file1)
keys = list(set(scores.keys()))
keys.sort()
for key in keys:
print_best_scores(key, scores[key])
sys.exit(0)
file2 = sys.argv[2]
if len(sys.argv) > 3:
SCORERE = re.compile(sys.argv[3])
scores1, worstscores1, runs1, nums = get_scores(file1)
scores2, worstscores2, runs2, nums = get_scores(file2)
runs = runs1
if runs2 > runs:
runs = runs2
if VERBOSE:
print(scores1)
print(scores2)
keys = list(set(scores1.keys() + scores2.keys()))
keys.sort()
if VERBOSE:
print("comparing ", file1, "vs", file2, "=", end="")
if IsTime:
print(file1, "/", file2)
else: else:
print(file2, "/", file1) print(markdown_data)
print("#".rjust(3), end=" ") if args.format:
print("TEST".ljust(25), end="") if args.format.lower() == "html":
if PrintAllScores: """
for i in range(0, runs): Create HTML formated table
print(("OLD_RUN%d" % i).rjust(9), end="") """
for i in range(0, runs): html_data = convert_to_html(ratio_list, old_results, new_results,
print(("NEW_RUN%d" % i).rjust(9), end="") delta_list, unknown_list, old_branch,
new_branch, args.changes_only)
if args.output:
write_to_file(args.output, html_data)
else:
print("Error: missing --output flag.")
exit(1)
elif args.format.lower() == "markdown" and args.output:
write_to_file(args.output, markdown_data)
elif args.format.lower() != "markdown":
print("{0} is unknown format.".format(args.format))
exit(1)
def convert_to_html(ratio_list, old_results, new_results, delta_list,
unknown_list, old_branch, new_branch, changes_only):
(complete_perf_list,
increased_perf_list,
decreased_perf_list,
normal_perf_list) = sort_ratio_list(ratio_list, changes_only)
html_rows = ""
for key in complete_perf_list:
if ratio_list[key] < RATIO_MIN:
color = "red"
elif ratio_list[key] > RATIO_MAX:
color = "green"
else:
color = "black"
if len(decreased_perf_list) > 0 and key == decreased_perf_list[0]:
html_rows += HTML_ROW.format(
"<strong>Regression:</strong>",
"", "", "", "black", "", "")
if len(increased_perf_list) > 0 and key == increased_perf_list[0]:
html_rows += HTML_ROW.format(
"<strong>Improvement:</strong>",
"", "", "", "black", "", "")
if len(normal_perf_list) > 0 and key == normal_perf_list[0]:
html_rows += HTML_ROW.format(
"<strong>No Changes:</strong>",
"", "", "", "black", "", "")
html_rows += HTML_ROW.format(key, old_results[key],
new_results[key],
"{0:+.1f}%".format(delta_list[key]),
color,
"{0:.2f}x {1}".format(ratio_list[key],
unknown_list[key]))
html_table = HTML_TABLE.format("TEST", old_branch, new_branch,
"DELTA (%)", "SPEEDUP", html_rows)
html_data = HTML.format(html_table)
return html_data
def write_to_file(file_name, data):
"""
Write data to given file
"""
file = open(file_name, "w")
file.write(data)
file.close
def sort_ratio_list(ratio_list, changes_only=False):
"""
Return 3 sorted list imporvment, regression and normal.
"""
decreased_perf_list = []
increased_perf_list = []
sorted_normal_perf_list = []
normal_perf_list = {}
for key, v in sorted(ratio_list.items(), key=lambda x: x[1]):
if ratio_list[key] < RATIO_MIN:
decreased_perf_list.append(key)
elif ratio_list[key] > RATIO_MAX:
increased_perf_list.append(key)
else:
normal_perf_list[key] = v
for key, v in sorted(normal_perf_list.items(), key=lambda x: x[1],
reverse=True):
sorted_normal_perf_list.append(key)
if changes_only:
complete_perf_list = decreased_perf_list + increased_perf_list
else: else:
print("BEST_OLD_MIN(μs)".rjust(17), end=" ") complete_perf_list = (decreased_perf_list + increased_perf_list +
print("BEST_NEW_MIN(μs)".rjust(17), end=" ") sorted_normal_perf_list)
print('DELTA'.rjust(9), '%DELTA'.rjust(9), 'SPEEDUP'.rjust(9))
for key in keys: return (complete_perf_list, increased_perf_list,
if key not in scores1: decreased_perf_list, sorted_normal_perf_list)
print(key, "not in", file1)
continue
if key not in scores2: def nthroot(y, n):
print(key, "not in", file2) x, xp = 1, -1
continue while abs(x - xp) > 1:
compare_scores(key, scores1[key], worstscores1[key], scores2[key], xp, x = x, x - x/n + y/(n * x**(n-1))
worstscores2[key], runs, nums[key]) while x**n > y:
x -= 1
return x
def max_width(items, title, key_len=False):
"""
Returns the max length of string in the list
"""
width = len(str(title))
for key in items.keys():
if key_len:
if width < len(str(key)):
width = len(str(key))
else:
if width < len(str(items[key])):
width = len(str(items[key]))
return width
if __name__ == "__main__":
sys.exit(main())