#!/usr/bin/python # -*- coding: utf-8 -*- # ===--- compare_perf_tests.py --------------------------------------------===// # # This source file is part of the Swift.org open source project # # Copyright (c) 2014 - 2016 Apple Inc. and the Swift project authors # Licensed under Apache License v2.0 with Runtime Library Exception # # See http://swift.org/LICENSE.txt for license information # See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors # # ===----------------------------------------------------------------------===// # e.g. # repeat.sh 3 tot/bin/Benchmark_Driver run -o -O > tot.O.times # repeat.sh 3 mypatch/bin/Benchmark_Driver run -o -O > mypatch.O.times # compare_perf_tests.py tot.O.times mypatch.O.times | sort -t, -n -k 6 | column -s, -t import re import sys VERBOSE = 0 # #,TEST,SAMPLES,MIN(ms),MAX(ms),MEAN(ms),SD(ms),MEDIAN(ms) SCORERE = re.compile(r"(\d+),[ \t]*(\w+),[ \t]*([\d.]+),[ \t]*([\d.]+),[ \t]*([\d.]+)") TOTALRE = re.compile(r"()(Totals),[ \t]*([\d.]+),[ \t]*([\d.]+),[ \t]*([\d.]+)") NUMGROUP = 1 KEYGROUP = 2 BESTGROUP = 4 WORSTGROUP = 5 IsTime = 1 ShowSpeedup = 1 PrintAllScores = 0 def parse_int(word): try: return int(word) except ValueError: raise Exception("Expected integer value, not " + word) def get_scores(fname): scores = {} worstscores = {} nums = {} runs = 0 f = open(fname) try: for line in f: if VERBOSE: print("Parsing", line,) m = SCORERE.match(line) is_total = False if not m: is_total = True m = TOTALRE.match(line) if not m: continue if VERBOSE: print(" match", m.group(KEYGROUP), m.group(BESTGROUP)) if not m.group(KEYGROUP) in scores: scores[m.group(KEYGROUP)] = [] worstscores[m.group(KEYGROUP)] = [] scores[m.group(KEYGROUP)].append(parse_int(m.group(BESTGROUP))) worstscores[m.group(KEYGROUP)].append(parse_int(m.group(WORSTGROUP))) if is_total: nums[m.group(KEYGROUP)] = "" else: nums[m.group(KEYGROUP)] = m.group(NUMGROUP) if len(scores[m.group(KEYGROUP)]) > runs: runs = len(scores[m.group(KEYGROUP)]) finally: f.close() return scores, worstscores, runs, nums def is_max_score(newscore, maxscore, invert): return not maxscore or (newscore > maxscore if not invert else newscore < maxscore) def compare_scores(key, score1, worstsample1, score2, worstsample2, runs, num): print(num.rjust(3),) print(key.ljust(25),) bestscore1 = None bestscore2 = None worstscore1 = None worstscore2 = None minbest = IsTime minworst = not minbest r = 0 for score in score1: if is_max_score(newscore=score, maxscore=bestscore1, invert=minbest): bestscore1 = score if is_max_score(newscore=score, maxscore=worstscore1, invert=minworst): worstscore1 = score if PrintAllScores: print ("%d" % score).rjust(16), for score in worstsample1: if is_max_score(newscore=score, maxscore=worstscore1, invert=minworst): worstscore1 = score for score in score2: if is_max_score(newscore=score, maxscore=bestscore2, invert=minbest): bestscore2 = score if is_max_score(newscore=score, maxscore=worstscore2, invert=minworst): worstscore2 = score if PrintAllScores: print ("%d" % score).rjust(16), r += 1 for score in worstsample2: if is_max_score(newscore=score, maxscore=worstscore2, invert=minworst): worstscore2 = score while r < runs: if PrintAllScores: print ("0").rjust(9), r += 1 if not PrintAllScores: print ("%d" % bestscore1).rjust(16), print ("%d" % bestscore2).rjust(16), print ("%+d" % (bestscore2 - bestscore1)).rjust(9), if bestscore1 != 0 and bestscore2 != 0: print ("%+.1f%%" % (((float(bestscore2) / bestscore1) - 1) * 100)).rjust(9), if ShowSpeedup: Num, Den = float(bestscore2), float(bestscore1) if IsTime: Num, Den = Den, Num print ("%.2fx" % (Num / Den)).rjust(9), else: print("*".rjust(9),) if ShowSpeedup: print("*".rjust(9),) # check if the worst->best interval for each configuration overlap. if minbest: if (bestscore1 < bestscore2 and bestscore2 < worstscore1) \ or (bestscore2 < bestscore1 and bestscore1 < worstscore2): print("(?)",) else: if (worstscore1 < worstscore2 and worstscore2 < bestscore1) \ or (worstscore2 < worstscore1 and worstscore1 < bestscore2): print("(?)",) print() def print_best_scores(key, scores): print(key,) bestscore = None minbest = IsTime for score in scores: if is_max_score(newscore=score, maxscore=bestscore, invert=minbest): bestscore = score print(", %d" % bestscore) def usage(): print("repeat.sh Benchmark_O[none|unchecked] > file.times") print("compare_perf_tests.py []") if __name__ == '__main__': if len(sys.argv) < 2: usage() sys.exit(1) file1 = sys.argv[1] if len(sys.argv) < 3: scores, worstscores, runs, nums = get_scores(file1) keys = list(set(scores.keys())) keys.sort() for key in keys: print_best_scores(key, scores[key]) sys.exit(0) file2 = sys.argv[2] if len(sys.argv) > 3: SCORERE = re.compile(sys.argv[3]) scores1, worstscores1, runs1, nums = get_scores(file1) scores2, worstscores2, runs2, nums = get_scores(file2) runs = runs1 if runs2 > runs: runs = runs2 if VERBOSE: print(scores1) print(scores2) keys = list(set(scores1.keys() + scores2.keys())) keys.sort() if VERBOSE: print("comparing ", file1, "vs", file2, "=",) if IsTime: print(file1, "/", file2) else: print(file2, "/", file1) print("#".rjust(3),) print("TEST".ljust(25),) if PrintAllScores: for i in range(0, runs): print(("OLD_RUN%d" % i).rjust(9),) for i in range(0, runs): print(("NEW_RUN%d" % i).rjust(9),) else: print("BEST_OLD_MIN(μs)".rjust(17),) print("BEST_NEW_MIN(μs)".rjust(17),) print('DELTA'.rjust(9), '%DELTA'.rjust(9), 'SPEEDUP'.rjust(9)) for key in keys: if key not in scores1: print(key, "not in", file1) continue if key not in scores2: print(key, "not in", file2) continue compare_scores(key, scores1[key], worstscores1[key], scores2[key], worstscores2[key], runs, nums[key])