[Compare Perf] Add support for markdown and html

2025-12-14 20:36:38 +01:00 · 2016-03-14 14:11:01 -07:00
parent defe364a59
commit cb23837bb9
2 changed files with 349 additions and 201 deletions
--- a/benchmark/scripts/Benchmark_Driver
+++ b/benchmark/scripts/Benchmark_Driver
@@ -255,10 +255,13 @@ def format_name(log_path):
    return '/'.join(log_path.split('/')[-2:])
-def compare_logs(compare_script, new_log, old_log):
+def compare_logs(compare_script, new_log, old_log, log_dir, opt):
    """Return diff of log files at paths `new_log` and `old_log`"""
    print('Comparing %s %s ...' % (format_name(old_log), format_name(new_log)))
-    subprocess.call([compare_script, old_log, new_log])
+    subprocess.call([compare_script, '--old-file', old_log,
                    '--new-file', new_log, '--format', 'markdown',
                     '--output', os.path.join(log_dir, 'latest_compare_{0}.md'
                                              .format(opt))])
 def compare(args):
@@ -288,10 +291,12 @@ def compare(args):
           len(recent_logs['master_Onone']) > 1:
            compare_logs(compare_script,
                         recent_logs['master_O'][0],
-                         recent_logs['master_O'][1])
+                         recent_logs['master_O'][1],
                         log_dir, 'O')
            compare_logs(compare_script,
                         recent_logs['master_Onone'][0],
-                         recent_logs['master_Onone'][1])
+                         recent_logs['master_Onone'][1],
                         log_dir, 'Onone')
        else:
            print('master/master comparison skipped: no previous master logs')
    else:
@@ -307,10 +312,12 @@ def compare(args):
        else:
            compare_logs(compare_script,
                         recent_logs[current_branch + '_O'][0],
-                         recent_logs[current_branch + '_O'][1])
+                         recent_logs[current_branch + '_O'][1],
                         log_dir, 'O')
            compare_logs(compare_script,
                         recent_logs[current_branch + '_Onone'][0],
-                         recent_logs[current_branch + '_Onone'][1])
+                         recent_logs[current_branch + '_Onone'][1],
                         log_dir, 'Onone')
        if len(recent_logs['master_O']) == 0 or \
           len(recent_logs['master_Onone']) == 0:
@@ -319,10 +326,12 @@ def compare(args):
        else:
            compare_logs(compare_script,
                         recent_logs[current_branch + '_O'][0],
-                         recent_logs['master_O'][0])
+                         recent_logs['master_O'][0],
                         log_dir, 'O')
            compare_logs(compare_script,
                         recent_logs[current_branch + '_Onone'][0],
-                         recent_logs['master_Onone'][0])
+                         recent_logs['master_Onone'][0],
                         log_dir, 'Onone')
        # TODO: Fail on large regressions
--- a/benchmark/scripts/compare_perf_tests.py
+++ b/benchmark/scripts/compare_perf_tests.py
@@ -13,221 +13,360 @@
 #
 # ===---------------------------------------------------------------------===//
-# e.g.
+import argparse
-# repeat.sh 3 tot/bin/Benchmark_Driver run -o -O > tot.O.times
+import csv
 # repeat.sh 3 mypatch/bin/Benchmark_Driver run -o -O > mypatch.O.times
 # compare_perf_tests.py tot.O.times mypatch.O.times | sort -t, -n -k 6 | \
 #     column -s, -t
 from __future__ import print_function
 import re
 import sys
 TESTNAME = 1
 SAMPLES = 2
 MIN = 3
 MAX = 4
 MEAN = 5
 SD = 6
 MEDIAN = 7
-VERBOSE = 0
+HTML = """
 <!DOCTYPE html>
 <html>
 <head>
    <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
 </head>
 <body>
 {0}
 </body>
 </html>"""
-# #,TEST,SAMPLES,MIN(ms),MAX(ms),MEAN(ms),SD(ms),MEDIAN(ms)
+HTML_TABLE = """
-SCORERE = re.compile(
+<table>
-    r"(\d+),[ \t]*(\w+),[ \t]*([\d.]+),[ \t]*([\d.]+),[ \t]*([\d.]+)")
+        <tr>
-TOTALRE = re.compile(
+                <th align='left'>{0}</th>
-    r"()(Totals),[ \t]*([\d.]+),[ \t]*([\d.]+),[ \t]*([\d.]+)")
+                <th align='left'>{1}</th>
-NUMGROUP = 1
+                <th align='left'>{2}</th>
-KEYGROUP = 2
+                <th align='left'>{3}</th>
-BESTGROUP = 4
+                <th align='left'>{4}</th>
-WORSTGROUP = 5
+        </tr>
        {5}
 </table>
 """
-IsTime = 1
+HTML_ROW = """
-ShowSpeedup = 1
+        <tr>
-PrintAllScores = 0
+                <td align='left'>{0}</td>
                <td align='left'>{1}</td>
                <td align='left'>{2}</td>
                <td align='left'>{3}</td>
                <td align='left'><font color='{4}'>{5}</font></td>
        </tr>
 """
 MARKDOWN_ROW = "{0} | {1} | {2} | {3} | {4} \n"
 HEADER_SPLIT = "---"
 MARKDOWN_DETAIL = """
 <details {3}>
  <summary>{0} ({1})</summary>
  {2}
 </details>
 """
 PAIN_FORMAT = """
 Regression: {0}
 Improvement: {1}
 No Changes: {2}
 """
-def parse_int(word):
+def main():
-    try:
+    global RATIO_MIN
-        return int(word)
+    global RATIO_MAX
    except ValueError:
        raise Exception("Expected integer value, not " + word)
    old_results = {}
    new_results = {}
    old_max_results = {}
    new_max_results = {}
    ratio_list = {}
    delta_list = {}
    unknown_list = {}
    complete_perf_list = []
    increased_perf_list = []
    decreased_perf_list = []
    normal_perf_list = []
-def get_scores(fname):
+    parser = argparse.ArgumentParser(description="Compare Performance tests.")
-    scores = {}
+    parser.add_argument('--old-file',
-    worstscores = {}
+                        help='Baseline performance test suite (csv file)',
-    nums = {}
+                        required=True)
-    runs = 0
+    parser.add_argument('--new-file',
-    f = open(fname)
+                        help='New performance test suite (csv file)',
-    try:
+                        required=True)
-        for line in f:
+    parser.add_argument('--format',
-            if VERBOSE:
+                        help='Supported format git, html and markdown',
-                print("Parsing", line, end="")
+                        default="markdown")
-            m = SCORERE.match(line)
+    parser.add_argument('--output', help='Output file name')
-            is_total = False
+    parser.add_argument('--changes-only',
-            if not m:
+                        help='Output only affected tests', action='store_true')
-                is_total = True
+    parser.add_argument('--new-branch',
-                m = TOTALRE.match(line)
+                        help='Name of the new branch', default="NEW_MIN")
-            if not m:
+    parser.add_argument('--old-branch',
-                continue
+                        help='Name of the old branch', default="OLD_MIN")
    parser.add_argument('--delta-threshold',
                        help='delta threshold', default="0.05")
-            if VERBOSE:
+    args = parser.parse_args()
                print("  match", m.group(KEYGROUP), m.group(BESTGROUP))
-            if not m.group(KEYGROUP) in scores:
+    old_file = args.old_file
-                scores[m.group(KEYGROUP)] = []
+    new_file = args.new_file
-                worstscores[m.group(KEYGROUP)] = []
+
-            scores[m.group(KEYGROUP)].append(parse_int(m.group(BESTGROUP)))
+    new_branch = args.new_branch
-            worstscores[m.group(KEYGROUP)].append(
+    old_branch = args.old_branch
-                parse_int(m.group(WORSTGROUP)))
+
-            if is_total:
+    old_data = csv.reader(open(old_file))
-                nums[m.group(KEYGROUP)] = ""
+    new_data = csv.reader(open(new_file))
    RATIO_MIN = 1 - float(args.delta_threshold)
    RATIO_MAX = 1 + float(args.delta_threshold)
    for row in old_data:
        if (len(row) > 7 and row[MIN].isdigit()):
            if row[TESTNAME] in old_results:
                if old_results[row[TESTNAME]] > row[MIN]:
                    old_results[row[TESTNAME]] = int(row[MIN])
                if old_max_results[row[TESTNAME]] < row[MAX]:
                    old_max_results[row[TESTNAME]] = int(row[MAX])
            else:
-                nums[m.group(KEYGROUP)] = m.group(NUMGROUP)
+                old_results[row[TESTNAME]] = int(row[MIN])
-            if len(scores[m.group(KEYGROUP)]) > runs:
+                old_max_results[row[TESTNAME]] = int(row[MAX])
                runs = len(scores[m.group(KEYGROUP)])
    finally:
        f.close()
    return scores, worstscores, runs, nums
    for row in new_data:
        if (len(row) > 7 and row[MIN].isdigit()):
            if row[TESTNAME] in new_results:
                if new_results[row[TESTNAME]] > row[MIN]:
                    new_results[row[TESTNAME]] = int(row[MIN])
                if new_max_results[row[TESTNAME]] < row[MAX]:
                    new_max_results[row[TESTNAME]] = int(row[MAX])
            new_results[row[TESTNAME]] = int(row[MIN])
            new_max_results[row[TESTNAME]] = int(row[MAX])
-def is_max_score(newscore, maxscore, invert):
+    ratio_total = 0
-    return not maxscore or \
+    for key in new_results.keys():
-        (newscore > maxscore if not invert else newscore < maxscore)
+            ratio = (old_results[key]+0.001)/(new_results[key]+0.001)
            ratio_list[key] = round(ratio, 2)
            ratio_total *= ratio
            delta = (((float(new_results[key]+0.001) /
                      (old_results[key]+0.001)) - 1) * 100)
            delta_list[key] = round(delta, 2)
            if ((old_results[key] < new_results[key] and
                new_results[key] < old_max_results[key]) or
                (new_results[key] < old_results[key] and
                    old_results[key] < new_max_results[key])):
                    unknown_list[key] = "(?)"
            else:
                    unknown_list[key] = ""
    (complete_perf_list,
     increased_perf_list,
     decreased_perf_list,
     normal_perf_list) = sort_ratio_list(ratio_list, args.changes_only)
-def compare_scores(key, score1, worstsample1, score2, worstsample2, runs, num):
+    """
-    print(num.rjust(3), end=" ")
+    Create markdown formated table
-    print(key.ljust(25), end="")
+    """
-    bestscore1 = None
+    test_name_width = max_width(ratio_list, title='TEST', key_len=True)
-    bestscore2 = None
+    new_time_width = max_width(new_results, title=new_branch)
-    worstscore1 = None
+    old_time_width = max_width(old_results, title=old_branch)
-    worstscore2 = None
+    delta_width = max_width(delta_list, title='DELTA (%)')
    minbest = IsTime
    minworst = not minbest
    r = 0
    for score in score1:
        if is_max_score(newscore=score, maxscore=bestscore1, invert=minbest):
            bestscore1 = score
        if is_max_score(newscore=score, maxscore=worstscore1, invert=minworst):
            worstscore1 = score
        if PrintAllScores:
            print (("%d" % score).rjust(16), end="")
    for score in worstsample1:
        if is_max_score(newscore=score, maxscore=worstscore1, invert=minworst):
            worstscore1 = score
    for score in score2:
        if is_max_score(newscore=score, maxscore=bestscore2, invert=minbest):
            bestscore2 = score
        if is_max_score(newscore=score, maxscore=worstscore2, invert=minworst):
            worstscore2 = score
        if PrintAllScores:
            print (("%d" % score).rjust(16), end="")
        r += 1
    for score in worstsample2:
        if is_max_score(newscore=score, maxscore=worstscore2, invert=minworst):
            worstscore2 = score
    while r < runs:
        if PrintAllScores:
            print ("0".rjust(9), end="")
        r += 1
-    if not PrintAllScores:
+    markdown_table_header = "\n" + MARKDOWN_ROW.format(
-        print (("%d" % bestscore1).rjust(16), end="")
+                                        "TEST".ljust(test_name_width),
-        print (("%d" % bestscore2).rjust(16), end="")
+                                        old_branch.ljust(old_time_width),
                                        new_branch.ljust(new_time_width),
                                        "DELTA (%)".ljust(delta_width),
                                        "SPEEDUP".ljust(2))
    markdown_table_header += MARKDOWN_ROW.format(
                                         HEADER_SPLIT.ljust(test_name_width),
                                         HEADER_SPLIT.ljust(old_time_width),
                                         HEADER_SPLIT.ljust(new_time_width),
                                         HEADER_SPLIT.ljust(delta_width),
                                         HEADER_SPLIT.ljust(2))
    markdown_regression = ""
    for i, key in enumerate(decreased_perf_list):
        ratio = "{0:.2f}x".format(ratio_list[key])
        if i == 0:
            markdown_regression = markdown_table_header
        markdown_regression += MARKDOWN_ROW.format(
            key.ljust(test_name_width),
            str(old_results[key]).ljust(old_time_width),
            str(new_results[key]).ljust(new_time_width),
            ("{0:+.1f}%".format(delta_list[key])).ljust(delta_width),
            "**{0}{1}**".format(str(ratio).ljust(2), unknown_list[key]))
-    print (("%+d" % (bestscore2 - bestscore1)).rjust(9), end="")
+    markdown_improvement = ""
    for i, key in enumerate(increased_perf_list):
        ratio = "{0:.2f}x".format(ratio_list[key])
        if i == 0:
            markdown_improvement = markdown_table_header
        markdown_improvement += MARKDOWN_ROW.format(
            key.ljust(test_name_width),
            str(old_results[key]).ljust(old_time_width),
            str(new_results[key]).ljust(new_time_width),
            ("{0:+.1f}%".format(delta_list[key])).ljust(delta_width),
            "**{0}{1}**".format(str(ratio).ljust(2), unknown_list[key]))
-    if bestscore1 != 0 and bestscore2 != 0:
+    markdown_normal = ""
-        print(("%+.1f%%" %
+    for i, key in enumerate(normal_perf_list):
-               (((float(bestscore2) / bestscore1) - 1) * 100)).rjust(9),
+        ratio = "{0:.2f}x".format(ratio_list[key])
-              end="")
+        if i == 0:
-        if ShowSpeedup:
+            markdown_normal = markdown_table_header
-            Num, Den = float(bestscore2), float(bestscore1)
+        markdown_normal += MARKDOWN_ROW.format(
-            if IsTime:
+            key.ljust(test_name_width),
-                Num, Den = Den, Num
+            str(old_results[key]).ljust(old_time_width),
-            print (("%.2fx" % (Num / Den)).rjust(9), end="")
+            str(new_results[key]).ljust(new_time_width),
-    else:
+            ("{0:+.1f}%".format(delta_list[key])).ljust(delta_width),
-        print("*".rjust(9), end="")
+            "{0}{1}".format(str(ratio).ljust(2), unknown_list[key]))
        if ShowSpeedup:
            print("*".rjust(9), end="")
    # check if the worst->best interval for each configuration overlap.
    if minbest:
        if (bestscore1 < bestscore2 and bestscore2 < worstscore1) \
           or (bestscore2 < bestscore1 and bestscore1 < worstscore2):
            print("(?)", end="")
    else:
        if (worstscore1 < worstscore2 and worstscore2 < bestscore1) \
           or (worstscore2 < worstscore1 and worstscore1 < bestscore2):
            print("(?)", end="")
    print()
    markdown_data = MARKDOWN_DETAIL.format("Regression",
                                           len(decreased_perf_list),
                                           markdown_regression, "open")
    markdown_data += MARKDOWN_DETAIL.format("Improvement",
                                            len(increased_perf_list),
                                            markdown_improvement, "")
    markdown_data += MARKDOWN_DETAIL.format("No Changes",
                                            len(normal_perf_list),
                                            markdown_normal, "")
-def print_best_scores(key, scores):
+    if args.format:
-    print(key, end="")
+        if args.format.lower() != "markdown":
-    bestscore = None
+            pain_data = PAIN_FORMAT.format(markdown_regression,
-    minbest = IsTime
+                                           markdown_improvement,
-    for score in scores:
+                                           markdown_normal)
-        if is_max_score(newscore=score, maxscore=bestscore, invert=minbest):
+            print(pain_data.replace("|", " ").replace("-", " "))
            bestscore = score
    print(", %d" % bestscore)
 def usage():
    print("repeat.sh <n> Benchmark_O[none|unchecked] > file.times")
    print("compare_perf_tests.py <file.times> [<file2.times>]")
 if __name__ == '__main__':
    if len(sys.argv) < 2:
        usage()
        sys.exit(1)
    file1 = sys.argv[1]
    if len(sys.argv) < 3:
        scores, worstscores, runs, nums = get_scores(file1)
        keys = list(set(scores.keys()))
        keys.sort()
        for key in keys:
            print_best_scores(key, scores[key])
        sys.exit(0)
    file2 = sys.argv[2]
    if len(sys.argv) > 3:
        SCORERE = re.compile(sys.argv[3])
    scores1, worstscores1, runs1, nums = get_scores(file1)
    scores2, worstscores2, runs2, nums = get_scores(file2)
    runs = runs1
    if runs2 > runs:
        runs = runs2
    if VERBOSE:
        print(scores1)
        print(scores2)
    keys = list(set(scores1.keys() + scores2.keys()))
    keys.sort()
    if VERBOSE:
        print("comparing ", file1, "vs", file2, "=", end="")
        if IsTime:
            print(file1, "/", file2)
        else:
-            print(file2, "/", file1)
+            print(markdown_data)
-    print("#".rjust(3), end=" ")
+    if args.format:
-    print("TEST".ljust(25), end="")
+        if args.format.lower() == "html":
-    if PrintAllScores:
+            """
-        for i in range(0, runs):
+            Create HTML formated table
-            print(("OLD_RUN%d" % i).rjust(9), end="")
+            """
-        for i in range(0, runs):
+            html_data = convert_to_html(ratio_list, old_results, new_results,
-            print(("NEW_RUN%d" % i).rjust(9), end="")
+                                        delta_list, unknown_list, old_branch,
                                        new_branch, args.changes_only)
            if args.output:
                write_to_file(args.output, html_data)
            else:
                print("Error: missing --output flag.")
                exit(1)
        elif args.format.lower() == "markdown" and args.output:
            write_to_file(args.output, markdown_data)
        elif args.format.lower() != "markdown":
            print("{0} is unknown format.".format(args.format))
            exit(1)
 def convert_to_html(ratio_list, old_results, new_results, delta_list,
                    unknown_list, old_branch, new_branch, changes_only):
    (complete_perf_list,
     increased_perf_list,
     decreased_perf_list,
     normal_perf_list) = sort_ratio_list(ratio_list, changes_only)
    html_rows = ""
    for key in complete_perf_list:
        if ratio_list[key] < RATIO_MIN:
            color = "red"
        elif ratio_list[key] > RATIO_MAX:
            color = "green"
        else:
            color = "black"
        if len(decreased_perf_list) > 0 and key == decreased_perf_list[0]:
            html_rows += HTML_ROW.format(
                "<strong>Regression:</strong>",
                "", "", "", "black", "", "")
        if len(increased_perf_list) > 0 and key == increased_perf_list[0]:
            html_rows += HTML_ROW.format(
                "<strong>Improvement:</strong>",
                "", "", "", "black", "", "")
        if len(normal_perf_list) > 0 and key == normal_perf_list[0]:
            html_rows += HTML_ROW.format(
                "<strong>No Changes:</strong>",
                "", "", "", "black", "", "")
        html_rows += HTML_ROW.format(key, old_results[key],
                                     new_results[key],
                                     "{0:+.1f}%".format(delta_list[key]),
                                     color,
                                     "{0:.2f}x {1}".format(ratio_list[key],
                                                           unknown_list[key]))
    html_table = HTML_TABLE.format("TEST", old_branch, new_branch,
                                   "DELTA (%)", "SPEEDUP", html_rows)
    html_data = HTML.format(html_table)
    return html_data
 def write_to_file(file_name, data):
    """
    Write data to given file
    """
    file = open(file_name, "w")
    file.write(data)
    file.close
 def sort_ratio_list(ratio_list, changes_only=False):
    """
    Return 3 sorted list imporvment, regression and normal.
    """
    decreased_perf_list = []
    increased_perf_list = []
    sorted_normal_perf_list = []
    normal_perf_list = {}
    for key, v in sorted(ratio_list.items(), key=lambda x: x[1]):
        if ratio_list[key] < RATIO_MIN:
            decreased_perf_list.append(key)
        elif ratio_list[key] > RATIO_MAX:
            increased_perf_list.append(key)
        else:
            normal_perf_list[key] = v
    for key, v in sorted(normal_perf_list.items(), key=lambda x: x[1],
                         reverse=True):
        sorted_normal_perf_list.append(key)
    if changes_only:
        complete_perf_list = decreased_perf_list + increased_perf_list
    else:
-        print("BEST_OLD_MIN(μs)".rjust(17), end=" ")
+        complete_perf_list = (decreased_perf_list + increased_perf_list +
-        print("BEST_NEW_MIN(μs)".rjust(17), end=" ")
+                              sorted_normal_perf_list)
    print('DELTA'.rjust(9), '%DELTA'.rjust(9), 'SPEEDUP'.rjust(9))
-    for key in keys:
+    return (complete_perf_list, increased_perf_list,
-        if key not in scores1:
+            decreased_perf_list, sorted_normal_perf_list)
-            print(key, "not in", file1)
+
-            continue
+
-        if key not in scores2:
+def nthroot(y, n):
-            print(key, "not in", file2)
+    x, xp = 1, -1
-            continue
+    while abs(x - xp) > 1:
-        compare_scores(key, scores1[key], worstscores1[key], scores2[key],
+            xp, x = x, x - x/n + y/(n * x**(n-1))
-                       worstscores2[key], runs, nums[key])
+    while x**n > y:
            x -= 1
    return x
 def max_width(items, title, key_len=False):
    """
    Returns the max length of string in the list
    """
    width = len(str(title))
    for key in items.keys():
        if key_len:
            if width < len(str(key)):
                width = len(str(key))
        else:
            if width < len(str(items[key])):
                width = len(str(items[key]))
    return width
 if __name__ == "__main__":
        sys.exit(main())