swift-mirror/benchmark/scripts/compare_perf_tests.py

#!/usr/bin/python
# -*- coding: utf-8 -*-

# ===--- compare_perf_tests.py --------------------------------------------===//
#
#  This source file is part of the Swift.org open source project
#
#  Copyright (c) 2014 - 2016 Apple Inc. and the Swift project authors
#  Licensed under Apache License v2.0 with Runtime Library Exception
#
#  See http://swift.org/LICENSE.txt for license information
#  See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
#
# ===----------------------------------------------------------------------===//

# e.g.
# repeat.sh 3 tot/bin/Benchmark_Driver run -o -O > tot.O.times
# repeat.sh 3 mypatch/bin/Benchmark_Driver run -o -O > mypatch.O.times
# compare_perf_tests.py tot.O.times mypatch.O.times | sort -t, -n -k 6 | column -s, -t

import re
import sys

VERBOSE = 0

# #,TEST,SAMPLES,MIN(ms),MAX(ms),MEAN(ms),SD(ms),MEDIAN(ms)
SCORERE = re.compile(r"(\d+),[ \t]*(\w+),[ \t]*([\d.]+),[ \t]*([\d.]+),[ \t]*([\d.]+)")
TOTALRE = re.compile(r"()(Totals),[ \t]*([\d.]+),[ \t]*([\d.]+),[ \t]*([\d.]+)")
NUMGROUP = 1
KEYGROUP = 2
BESTGROUP = 4
WORSTGROUP = 5

IsTime = 1
ShowSpeedup = 1
PrintAllScores = 0

def parse_int(word):
    try:
        return int(word)
    except ValueError:
        raise Exception("Expected integer value, not " + word)

def get_scores(fname):
    scores = {}
    worstscores = {}
    nums = {}
    runs = 0
    f = open(fname)
    try:
        for line in f:
            if VERBOSE:
                print("Parsing", line,)
            m = SCORERE.match(line)
            is_total = False
            if not m:
                is_total = True
                m = TOTALRE.match(line)
            if not m:
                continue

            if VERBOSE:
                print("  match", m.group(KEYGROUP), m.group(BESTGROUP))

            if not m.group(KEYGROUP) in scores:
                scores[m.group(KEYGROUP)] = []
                worstscores[m.group(KEYGROUP)] = []
            scores[m.group(KEYGROUP)].append(parse_int(m.group(BESTGROUP)))
            worstscores[m.group(KEYGROUP)].append(parse_int(m.group(WORSTGROUP)))
            if is_total:
                nums[m.group(KEYGROUP)] = ""
            else:
                nums[m.group(KEYGROUP)] = m.group(NUMGROUP)
            if len(scores[m.group(KEYGROUP)]) > runs:
                runs = len(scores[m.group(KEYGROUP)])
    finally:
        f.close()
    return scores, worstscores, runs, nums

def is_max_score(newscore, maxscore, invert):
    return not maxscore or (newscore > maxscore if not invert else newscore < maxscore)

def compare_scores(key, score1, worstsample1, score2, worstsample2, runs, num):
    print(num.rjust(3),)
    print(key.ljust(25),)
    bestscore1 = None
    bestscore2 = None
    worstscore1 = None
    worstscore2 = None
    minbest = IsTime
    minworst = not minbest
    r = 0
    for score in score1:
        if is_max_score(newscore=score, maxscore=bestscore1, invert=minbest):
            bestscore1 = score
        if is_max_score(newscore=score, maxscore=worstscore1, invert=minworst):
            worstscore1 = score
        if PrintAllScores:
            print ("%d" % score).rjust(16),
    for score in worstsample1:
        if is_max_score(newscore=score, maxscore=worstscore1, invert=minworst):
            worstscore1 = score
    for score in score2:
        if is_max_score(newscore=score, maxscore=bestscore2, invert=minbest):
            bestscore2 = score
        if is_max_score(newscore=score, maxscore=worstscore2, invert=minworst):
            worstscore2 = score
        if PrintAllScores:
            print ("%d" % score).rjust(16),
        r += 1
    for score in worstsample2:
        if is_max_score(newscore=score, maxscore=worstscore2, invert=minworst):
            worstscore2 = score
    while r < runs:
        if PrintAllScores:
            print ("0").rjust(9),
        r += 1

    if not PrintAllScores:
        print ("%d" % bestscore1).rjust(16),
        print ("%d" % bestscore2).rjust(16),

    print ("%+d" % (bestscore2 - bestscore1)).rjust(9),

    if bestscore1 != 0 and bestscore2 != 0:
        print ("%+.1f%%" % (((float(bestscore2) / bestscore1) - 1) * 100)).rjust(9),
        if ShowSpeedup:
            Num, Den = float(bestscore2), float(bestscore1)
            if IsTime:
                Num, Den = Den, Num
            print ("%.2fx" % (Num / Den)).rjust(9),
    else:
        print("*".rjust(9),)
        if ShowSpeedup:
            print("*".rjust(9),)
    # check if the worst->best interval for each configuration overlap.
    if minbest:
        if (bestscore1 < bestscore2 and bestscore2 < worstscore1) \
           or (bestscore2 < bestscore1 and bestscore1 < worstscore2):
            print("(?)",)
    else:
        if (worstscore1 < worstscore2 and worstscore2 < bestscore1) \
           or (worstscore2 < worstscore1 and worstscore1 < bestscore2):
            print("(?)",)
    print()

def print_best_scores(key, scores):
    print(key,)
    bestscore = None
    minbest = IsTime
    for score in scores:
        if is_max_score(newscore=score, maxscore=bestscore, invert=minbest):
            bestscore = score
    print(", %d" % bestscore)

def usage():
    print("repeat.sh <n> Benchmark_O[none|unchecked] > file.times")
    print("compare_perf_tests.py <file.times> [<file2.times>]")

if __name__ == '__main__':
    if len(sys.argv) < 2:
        usage()
        sys.exit(1)
    file1 = sys.argv[1]
    if len(sys.argv) < 3:
        scores, worstscores, runs, nums = get_scores(file1)
        keys = list(set(scores.keys()))
        keys.sort()
        for key in keys:
            print_best_scores(key, scores[key])
        sys.exit(0)

    file2 = sys.argv[2]
    if len(sys.argv) > 3:
        SCORERE = re.compile(sys.argv[3])

    scores1, worstscores1, runs1, nums = get_scores(file1)
    scores2, worstscores2, runs2, nums = get_scores(file2)

    runs = runs1
    if runs2 > runs:
        runs = runs2

    if VERBOSE:
        print(scores1)
        print(scores2)

    keys = list(set(scores1.keys() + scores2.keys()))
    keys.sort()
    if VERBOSE:
        print("comparing ", file1, "vs", file2, "=",)
        if IsTime:
            print(file1, "/", file2)
        else:
            print(file2, "/", file1)

    print("#".rjust(3),)
    print("TEST".ljust(25),)
    if PrintAllScores:
        for i in range(0, runs):
            print(("OLD_RUN%d" % i).rjust(9),)
        for i in range(0, runs):
            print(("NEW_RUN%d" % i).rjust(9),)
    else:
        print("BEST_OLD_MIN(μs)".rjust(17),)
        print("BEST_NEW_MIN(μs)".rjust(17),)
    print('DELTA'.rjust(9), '%DELTA'.rjust(9), 'SPEEDUP'.rjust(9))

    for key in keys:
        if key not in scores1:
            print(key, "not in", file1)
            continue
        if key not in scores2:
            print(key, "not in", file2)
            continue
        compare_scores(key, scores1[key], worstscores1[key], scores2[key], worstscores2[key], runs, nums[key])