[Compare Perf] Add support for markdown and html

This commit is contained in:
Mishal Shah
2016-03-14 14:11:01 -07:00
parent defe364a59
commit cb23837bb9
2 changed files with 349 additions and 201 deletions

View File

@@ -13,221 +13,360 @@
#
# ===---------------------------------------------------------------------===//
# e.g.
# repeat.sh 3 tot/bin/Benchmark_Driver run -o -O > tot.O.times
# repeat.sh 3 mypatch/bin/Benchmark_Driver run -o -O > mypatch.O.times
# compare_perf_tests.py tot.O.times mypatch.O.times | sort -t, -n -k 6 | \
# column -s, -t
from __future__ import print_function
import re
import argparse
import csv
import sys
TESTNAME = 1
SAMPLES = 2
MIN = 3
MAX = 4
MEAN = 5
SD = 6
MEDIAN = 7
VERBOSE = 0
HTML = """
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
</head>
<body>
{0}
</body>
</html>"""
# #,TEST,SAMPLES,MIN(ms),MAX(ms),MEAN(ms),SD(ms),MEDIAN(ms)
SCORERE = re.compile(
r"(\d+),[ \t]*(\w+),[ \t]*([\d.]+),[ \t]*([\d.]+),[ \t]*([\d.]+)")
TOTALRE = re.compile(
r"()(Totals),[ \t]*([\d.]+),[ \t]*([\d.]+),[ \t]*([\d.]+)")
NUMGROUP = 1
KEYGROUP = 2
BESTGROUP = 4
WORSTGROUP = 5
HTML_TABLE = """
<table>
<tr>
<th align='left'>{0}</th>
<th align='left'>{1}</th>
<th align='left'>{2}</th>
<th align='left'>{3}</th>
<th align='left'>{4}</th>
</tr>
{5}
</table>
"""
IsTime = 1
ShowSpeedup = 1
PrintAllScores = 0
HTML_ROW = """
<tr>
<td align='left'>{0}</td>
<td align='left'>{1}</td>
<td align='left'>{2}</td>
<td align='left'>{3}</td>
<td align='left'><font color='{4}'>{5}</font></td>
</tr>
"""
MARKDOWN_ROW = "{0} | {1} | {2} | {3} | {4} \n"
HEADER_SPLIT = "---"
MARKDOWN_DETAIL = """
<details {3}>
<summary>{0} ({1})</summary>
{2}
</details>
"""
PAIN_FORMAT = """
Regression: {0}
Improvement: {1}
No Changes: {2}
"""
def parse_int(word):
try:
return int(word)
except ValueError:
raise Exception("Expected integer value, not " + word)
def main():
global RATIO_MIN
global RATIO_MAX
old_results = {}
new_results = {}
old_max_results = {}
new_max_results = {}
ratio_list = {}
delta_list = {}
unknown_list = {}
complete_perf_list = []
increased_perf_list = []
decreased_perf_list = []
normal_perf_list = []
def get_scores(fname):
scores = {}
worstscores = {}
nums = {}
runs = 0
f = open(fname)
try:
for line in f:
if VERBOSE:
print("Parsing", line, end="")
m = SCORERE.match(line)
is_total = False
if not m:
is_total = True
m = TOTALRE.match(line)
if not m:
continue
parser = argparse.ArgumentParser(description="Compare Performance tests.")
parser.add_argument('--old-file',
help='Baseline performance test suite (csv file)',
required=True)
parser.add_argument('--new-file',
help='New performance test suite (csv file)',
required=True)
parser.add_argument('--format',
help='Supported format git, html and markdown',
default="markdown")
parser.add_argument('--output', help='Output file name')
parser.add_argument('--changes-only',
help='Output only affected tests', action='store_true')
parser.add_argument('--new-branch',
help='Name of the new branch', default="NEW_MIN")
parser.add_argument('--old-branch',
help='Name of the old branch', default="OLD_MIN")
parser.add_argument('--delta-threshold',
help='delta threshold', default="0.05")
if VERBOSE:
print(" match", m.group(KEYGROUP), m.group(BESTGROUP))
args = parser.parse_args()
if not m.group(KEYGROUP) in scores:
scores[m.group(KEYGROUP)] = []
worstscores[m.group(KEYGROUP)] = []
scores[m.group(KEYGROUP)].append(parse_int(m.group(BESTGROUP)))
worstscores[m.group(KEYGROUP)].append(
parse_int(m.group(WORSTGROUP)))
if is_total:
nums[m.group(KEYGROUP)] = ""
old_file = args.old_file
new_file = args.new_file
new_branch = args.new_branch
old_branch = args.old_branch
old_data = csv.reader(open(old_file))
new_data = csv.reader(open(new_file))
RATIO_MIN = 1 - float(args.delta_threshold)
RATIO_MAX = 1 + float(args.delta_threshold)
for row in old_data:
if (len(row) > 7 and row[MIN].isdigit()):
if row[TESTNAME] in old_results:
if old_results[row[TESTNAME]] > row[MIN]:
old_results[row[TESTNAME]] = int(row[MIN])
if old_max_results[row[TESTNAME]] < row[MAX]:
old_max_results[row[TESTNAME]] = int(row[MAX])
else:
nums[m.group(KEYGROUP)] = m.group(NUMGROUP)
if len(scores[m.group(KEYGROUP)]) > runs:
runs = len(scores[m.group(KEYGROUP)])
finally:
f.close()
return scores, worstscores, runs, nums
old_results[row[TESTNAME]] = int(row[MIN])
old_max_results[row[TESTNAME]] = int(row[MAX])
for row in new_data:
if (len(row) > 7 and row[MIN].isdigit()):
if row[TESTNAME] in new_results:
if new_results[row[TESTNAME]] > row[MIN]:
new_results[row[TESTNAME]] = int(row[MIN])
if new_max_results[row[TESTNAME]] < row[MAX]:
new_max_results[row[TESTNAME]] = int(row[MAX])
new_results[row[TESTNAME]] = int(row[MIN])
new_max_results[row[TESTNAME]] = int(row[MAX])
def is_max_score(newscore, maxscore, invert):
return not maxscore or \
(newscore > maxscore if not invert else newscore < maxscore)
ratio_total = 0
for key in new_results.keys():
ratio = (old_results[key]+0.001)/(new_results[key]+0.001)
ratio_list[key] = round(ratio, 2)
ratio_total *= ratio
delta = (((float(new_results[key]+0.001) /
(old_results[key]+0.001)) - 1) * 100)
delta_list[key] = round(delta, 2)
if ((old_results[key] < new_results[key] and
new_results[key] < old_max_results[key]) or
(new_results[key] < old_results[key] and
old_results[key] < new_max_results[key])):
unknown_list[key] = "(?)"
else:
unknown_list[key] = ""
(complete_perf_list,
increased_perf_list,
decreased_perf_list,
normal_perf_list) = sort_ratio_list(ratio_list, args.changes_only)
def compare_scores(key, score1, worstsample1, score2, worstsample2, runs, num):
print(num.rjust(3), end=" ")
print(key.ljust(25), end="")
bestscore1 = None
bestscore2 = None
worstscore1 = None
worstscore2 = None
minbest = IsTime
minworst = not minbest
r = 0
for score in score1:
if is_max_score(newscore=score, maxscore=bestscore1, invert=minbest):
bestscore1 = score
if is_max_score(newscore=score, maxscore=worstscore1, invert=minworst):
worstscore1 = score
if PrintAllScores:
print (("%d" % score).rjust(16), end="")
for score in worstsample1:
if is_max_score(newscore=score, maxscore=worstscore1, invert=minworst):
worstscore1 = score
for score in score2:
if is_max_score(newscore=score, maxscore=bestscore2, invert=minbest):
bestscore2 = score
if is_max_score(newscore=score, maxscore=worstscore2, invert=minworst):
worstscore2 = score
if PrintAllScores:
print (("%d" % score).rjust(16), end="")
r += 1
for score in worstsample2:
if is_max_score(newscore=score, maxscore=worstscore2, invert=minworst):
worstscore2 = score
while r < runs:
if PrintAllScores:
print ("0".rjust(9), end="")
r += 1
"""
Create markdown formated table
"""
test_name_width = max_width(ratio_list, title='TEST', key_len=True)
new_time_width = max_width(new_results, title=new_branch)
old_time_width = max_width(old_results, title=old_branch)
delta_width = max_width(delta_list, title='DELTA (%)')
if not PrintAllScores:
print (("%d" % bestscore1).rjust(16), end="")
print (("%d" % bestscore2).rjust(16), end="")
markdown_table_header = "\n" + MARKDOWN_ROW.format(
"TEST".ljust(test_name_width),
old_branch.ljust(old_time_width),
new_branch.ljust(new_time_width),
"DELTA (%)".ljust(delta_width),
"SPEEDUP".ljust(2))
markdown_table_header += MARKDOWN_ROW.format(
HEADER_SPLIT.ljust(test_name_width),
HEADER_SPLIT.ljust(old_time_width),
HEADER_SPLIT.ljust(new_time_width),
HEADER_SPLIT.ljust(delta_width),
HEADER_SPLIT.ljust(2))
markdown_regression = ""
for i, key in enumerate(decreased_perf_list):
ratio = "{0:.2f}x".format(ratio_list[key])
if i == 0:
markdown_regression = markdown_table_header
markdown_regression += MARKDOWN_ROW.format(
key.ljust(test_name_width),
str(old_results[key]).ljust(old_time_width),
str(new_results[key]).ljust(new_time_width),
("{0:+.1f}%".format(delta_list[key])).ljust(delta_width),
"**{0}{1}**".format(str(ratio).ljust(2), unknown_list[key]))
print (("%+d" % (bestscore2 - bestscore1)).rjust(9), end="")
markdown_improvement = ""
for i, key in enumerate(increased_perf_list):
ratio = "{0:.2f}x".format(ratio_list[key])
if i == 0:
markdown_improvement = markdown_table_header
markdown_improvement += MARKDOWN_ROW.format(
key.ljust(test_name_width),
str(old_results[key]).ljust(old_time_width),
str(new_results[key]).ljust(new_time_width),
("{0:+.1f}%".format(delta_list[key])).ljust(delta_width),
"**{0}{1}**".format(str(ratio).ljust(2), unknown_list[key]))
if bestscore1 != 0 and bestscore2 != 0:
print(("%+.1f%%" %
(((float(bestscore2) / bestscore1) - 1) * 100)).rjust(9),
end="")
if ShowSpeedup:
Num, Den = float(bestscore2), float(bestscore1)
if IsTime:
Num, Den = Den, Num
print (("%.2fx" % (Num / Den)).rjust(9), end="")
else:
print("*".rjust(9), end="")
if ShowSpeedup:
print("*".rjust(9), end="")
# check if the worst->best interval for each configuration overlap.
if minbest:
if (bestscore1 < bestscore2 and bestscore2 < worstscore1) \
or (bestscore2 < bestscore1 and bestscore1 < worstscore2):
print("(?)", end="")
else:
if (worstscore1 < worstscore2 and worstscore2 < bestscore1) \
or (worstscore2 < worstscore1 and worstscore1 < bestscore2):
print("(?)", end="")
print()
markdown_normal = ""
for i, key in enumerate(normal_perf_list):
ratio = "{0:.2f}x".format(ratio_list[key])
if i == 0:
markdown_normal = markdown_table_header
markdown_normal += MARKDOWN_ROW.format(
key.ljust(test_name_width),
str(old_results[key]).ljust(old_time_width),
str(new_results[key]).ljust(new_time_width),
("{0:+.1f}%".format(delta_list[key])).ljust(delta_width),
"{0}{1}".format(str(ratio).ljust(2), unknown_list[key]))
markdown_data = MARKDOWN_DETAIL.format("Regression",
len(decreased_perf_list),
markdown_regression, "open")
markdown_data += MARKDOWN_DETAIL.format("Improvement",
len(increased_perf_list),
markdown_improvement, "")
markdown_data += MARKDOWN_DETAIL.format("No Changes",
len(normal_perf_list),
markdown_normal, "")
def print_best_scores(key, scores):
print(key, end="")
bestscore = None
minbest = IsTime
for score in scores:
if is_max_score(newscore=score, maxscore=bestscore, invert=minbest):
bestscore = score
print(", %d" % bestscore)
def usage():
print("repeat.sh <n> Benchmark_O[none|unchecked] > file.times")
print("compare_perf_tests.py <file.times> [<file2.times>]")
if __name__ == '__main__':
if len(sys.argv) < 2:
usage()
sys.exit(1)
file1 = sys.argv[1]
if len(sys.argv) < 3:
scores, worstscores, runs, nums = get_scores(file1)
keys = list(set(scores.keys()))
keys.sort()
for key in keys:
print_best_scores(key, scores[key])
sys.exit(0)
file2 = sys.argv[2]
if len(sys.argv) > 3:
SCORERE = re.compile(sys.argv[3])
scores1, worstscores1, runs1, nums = get_scores(file1)
scores2, worstscores2, runs2, nums = get_scores(file2)
runs = runs1
if runs2 > runs:
runs = runs2
if VERBOSE:
print(scores1)
print(scores2)
keys = list(set(scores1.keys() + scores2.keys()))
keys.sort()
if VERBOSE:
print("comparing ", file1, "vs", file2, "=", end="")
if IsTime:
print(file1, "/", file2)
if args.format:
if args.format.lower() != "markdown":
pain_data = PAIN_FORMAT.format(markdown_regression,
markdown_improvement,
markdown_normal)
print(pain_data.replace("|", " ").replace("-", " "))
else:
print(file2, "/", file1)
print(markdown_data)
print("#".rjust(3), end=" ")
print("TEST".ljust(25), end="")
if PrintAllScores:
for i in range(0, runs):
print(("OLD_RUN%d" % i).rjust(9), end="")
for i in range(0, runs):
print(("NEW_RUN%d" % i).rjust(9), end="")
if args.format:
if args.format.lower() == "html":
"""
Create HTML formated table
"""
html_data = convert_to_html(ratio_list, old_results, new_results,
delta_list, unknown_list, old_branch,
new_branch, args.changes_only)
if args.output:
write_to_file(args.output, html_data)
else:
print("Error: missing --output flag.")
exit(1)
elif args.format.lower() == "markdown" and args.output:
write_to_file(args.output, markdown_data)
elif args.format.lower() != "markdown":
print("{0} is unknown format.".format(args.format))
exit(1)
def convert_to_html(ratio_list, old_results, new_results, delta_list,
unknown_list, old_branch, new_branch, changes_only):
(complete_perf_list,
increased_perf_list,
decreased_perf_list,
normal_perf_list) = sort_ratio_list(ratio_list, changes_only)
html_rows = ""
for key in complete_perf_list:
if ratio_list[key] < RATIO_MIN:
color = "red"
elif ratio_list[key] > RATIO_MAX:
color = "green"
else:
color = "black"
if len(decreased_perf_list) > 0 and key == decreased_perf_list[0]:
html_rows += HTML_ROW.format(
"<strong>Regression:</strong>",
"", "", "", "black", "", "")
if len(increased_perf_list) > 0 and key == increased_perf_list[0]:
html_rows += HTML_ROW.format(
"<strong>Improvement:</strong>",
"", "", "", "black", "", "")
if len(normal_perf_list) > 0 and key == normal_perf_list[0]:
html_rows += HTML_ROW.format(
"<strong>No Changes:</strong>",
"", "", "", "black", "", "")
html_rows += HTML_ROW.format(key, old_results[key],
new_results[key],
"{0:+.1f}%".format(delta_list[key]),
color,
"{0:.2f}x {1}".format(ratio_list[key],
unknown_list[key]))
html_table = HTML_TABLE.format("TEST", old_branch, new_branch,
"DELTA (%)", "SPEEDUP", html_rows)
html_data = HTML.format(html_table)
return html_data
def write_to_file(file_name, data):
"""
Write data to given file
"""
file = open(file_name, "w")
file.write(data)
file.close
def sort_ratio_list(ratio_list, changes_only=False):
"""
Return 3 sorted list imporvment, regression and normal.
"""
decreased_perf_list = []
increased_perf_list = []
sorted_normal_perf_list = []
normal_perf_list = {}
for key, v in sorted(ratio_list.items(), key=lambda x: x[1]):
if ratio_list[key] < RATIO_MIN:
decreased_perf_list.append(key)
elif ratio_list[key] > RATIO_MAX:
increased_perf_list.append(key)
else:
normal_perf_list[key] = v
for key, v in sorted(normal_perf_list.items(), key=lambda x: x[1],
reverse=True):
sorted_normal_perf_list.append(key)
if changes_only:
complete_perf_list = decreased_perf_list + increased_perf_list
else:
print("BEST_OLD_MIN(μs)".rjust(17), end=" ")
print("BEST_NEW_MIN(μs)".rjust(17), end=" ")
print('DELTA'.rjust(9), '%DELTA'.rjust(9), 'SPEEDUP'.rjust(9))
complete_perf_list = (decreased_perf_list + increased_perf_list +
sorted_normal_perf_list)
for key in keys:
if key not in scores1:
print(key, "not in", file1)
continue
if key not in scores2:
print(key, "not in", file2)
continue
compare_scores(key, scores1[key], worstscores1[key], scores2[key],
worstscores2[key], runs, nums[key])
return (complete_perf_list, increased_perf_list,
decreased_perf_list, sorted_normal_perf_list)
def nthroot(y, n):
x, xp = 1, -1
while abs(x - xp) > 1:
xp, x = x, x - x/n + y/(n * x**(n-1))
while x**n > y:
x -= 1
return x
def max_width(items, title, key_len=False):
"""
Returns the max length of string in the list
"""
width = len(str(title))
for key in items.keys():
if key_len:
if width < len(str(key)):
width = len(str(key))
else:
if width < len(str(items[key])):
width = len(str(items[key]))
return width
if __name__ == "__main__":
sys.exit(main())