swift-mirror/benchmark/scripts/Benchmark_Driver

#!/usr/bin/env python
# -*- coding: utf-8 -*-

# ===--- Benchmark_Driver ------------------------------------------------===//
#
#  This source file is part of the Swift.org open source project
#
#  Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
#  Licensed under Apache License v2.0 with Runtime Library Exception
#
#  See https://swift.org/LICENSE.txt for license information
#  See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
#
# ===---------------------------------------------------------------------===//

import argparse
import datetime
import glob
import json
import os
import re
import subprocess
import sys
import time
import urllib
import urllib2

DRIVER_DIR = os.path.dirname(os.path.realpath(__file__))


def parse_results(res, optset):
    # Parse lines like this
    # #,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs),PEAK_MEMORY(B)
    score_re = re.compile(r"(\d+),[ \t]*(\w+)," +
                          ",".join([r"[ \t]*([\d.]+)"] * 7))
    # The Totals line would be parsed like this.
    total_re = re.compile(r"()(Totals)," +
                          ",".join([r"[ \t]*([\d.]+)"] * 7))
    key_group = 2
    val_group = 4
    mem_group = 9

    tests = []
    for line in res.split():
        m = score_re.match(line)
        if not m:
            m = total_re.match(line)
            if not m:
                continue
        testresult = int(m.group(val_group))
        testname = m.group(key_group)
        test = {}
        test['Data'] = [testresult]
        test['Info'] = {}
        test['Name'] = "nts.swift/" + optset + "." + testname + ".exec"
        tests.append(test)
        if testname != 'Totals':
            mem_testresult = int(m.group(mem_group))
            mem_test = {}
            mem_test['Data'] = [mem_testresult]
            mem_test['Info'] = {}
            mem_test['Name'] = "nts.swift/mem_maxrss." + \
                optset + "." + testname + ".mem"
            tests.append(mem_test)
    return tests


def submit_to_lnt(data, url):
    print("\nSubmitting results to LNT server...")
    json_report = {'input_data': json.dumps(data), 'commit': '1'}
    data = urllib.urlencode(json_report)
    response_str = urllib2.urlopen(urllib2.Request(url, data))
    response = json.loads(response_str.read())
    if 'success' in response:
        print("Server response:\tSuccess")
    else:
        print("Server response:\tError")
        print("Error:\t", response['error'])
        sys.exit(1)


def instrument_test(driver_path, test, num_samples):
    """Run a test and instrument its peak memory use"""
    test_outputs = []
    for _ in range(num_samples):
        test_output_raw = subprocess.check_output(
            ['time', '-lp', driver_path, test],
            stderr=subprocess.STDOUT
        )
        peak_memory = re.match('\s*(\d+)\s*maximum resident set size',
                               test_output_raw.split('\n')[-15]).group(1)
        test_outputs.append(test_output_raw.split()[1].split(',') +
                            [peak_memory])

    # Average sample results
    num_samples_index = 2
    min_index = 3
    max_index = 4
    avg_start_index = 5

    # TODO: Correctly take stdev
    avg_test_output = test_outputs[0]
    avg_test_output[avg_start_index:] = map(int,
                                            avg_test_output[avg_start_index:])
    for test_output in test_outputs[1:]:
        for i in range(avg_start_index, len(test_output)):
            avg_test_output[i] += int(test_output[i])
    for i in range(avg_start_index, len(avg_test_output)):
        avg_test_output[i] = int(round(avg_test_output[i] /
                                       float(len(test_outputs))))
    avg_test_output[num_samples_index] = num_samples
    avg_test_output[min_index] = min(
        test_outputs, key=lambda x: int(x[min_index]))[min_index]
    avg_test_output[max_index] = max(
        test_outputs, key=lambda x: int(x[max_index]))[max_index]
    avg_test_output = map(str, avg_test_output)

    return avg_test_output


def get_tests(driver_path, args):
    """Return a list of available performance tests"""
    driver = ([driver_path, '--list'])
    # Use tab delimiter for easier parsing to override the default comma.
    # (The third 'column' is always comma-separated list of tags in square
    # brackets -- currently unused here.)
    driver.append('--delim=\t')
    if args.benchmarks or args.filters:
        driver.append('--skip-tags=')  # list all tests, don't skip any tags
    index_name_pairs = [
        line.split('\t')[:2] for line in
        subprocess.check_output(driver).split('\n')[1:-1]
    ]
    indices, names = zip(*index_name_pairs)  # unzip list of pairs into 2 lists
    if args.filters:
        regexes = [re.compile(pattern) for pattern in args.filters]
        return sorted(list(set([name for pattern in regexes
                                for name in names if pattern.match(name)])))
    if not args.benchmarks:
        return names
    benchmarks = set(args.benchmarks)
    index_to_name = dict(index_name_pairs)
    indexed_names = [index_to_name[i]
                     for i in benchmarks.intersection(set(indices))]
    return sorted(list(
        benchmarks.intersection(set(names)).union(indexed_names)))


def get_current_git_branch(git_repo_path):
    """Return the selected branch for the repo `git_repo_path`"""
    return subprocess.check_output(
        ['git', '-C', git_repo_path, 'rev-parse',
         '--abbrev-ref', 'HEAD'], stderr=subprocess.STDOUT).strip()


def get_git_head_ID(git_repo_path):
    """Return the short identifier for the HEAD commit of the repo
        `git_repo_path`"""
    return subprocess.check_output(
        ['git', '-C', git_repo_path, 'rev-parse',
         '--short', 'HEAD'], stderr=subprocess.STDOUT).strip()


def log_results(log_directory, driver, formatted_output, swift_repo=None):
    """Log `formatted_output` to a branch specific directory in
    `log_directory`
    """
    try:
        branch = get_current_git_branch(swift_repo)
    except (OSError, subprocess.CalledProcessError):
        branch = None
    try:
        head_ID = '-' + get_git_head_ID(swift_repo)
    except (OSError, subprocess.CalledProcessError):
        head_ID = ''
    timestamp = time.strftime("%Y%m%d%H%M%S", time.localtime())
    if branch:
        output_directory = os.path.join(log_directory, branch)
    else:
        output_directory = log_directory
    driver_name = os.path.basename(driver)
    try:
        os.makedirs(output_directory)
    except OSError:
        pass
    log_file = os.path.join(output_directory,
                            driver_name + '-' + timestamp + head_ID + '.log')
    print('Logging results to: %s' % log_file)
    with open(log_file, 'w') as f:
        f.write(formatted_output)


def run_benchmarks(driver, benchmarks=[], num_samples=10, verbose=False,
                   log_directory=None, swift_repo=None):
    """Run perf tests individually and return results in a format that's
    compatible with `parse_results`. If `benchmarks` is not empty,
    only run tests included in it.
    """
    # Set a constant hash seed. Some tests are currently sensitive to
    # fluctuations in the number of hash collisions.
    #
    # FIXME: This should only be set in the environment of the child process
    # that runs the tests.
    os.environ["SWIFT_DETERMINISTIC_HASHING"] = "1"

    (total_tests, total_min, total_max, total_mean) = (0, 0, 0, 0)
    output = []
    headings = ['#', 'TEST', 'SAMPLES', 'MIN(μs)', 'MAX(μs)', 'MEAN(μs)',
                'SD(μs)', 'MEDIAN(μs)', 'MAX_RSS(B)']
    line_format = '{:>3} {:<25} {:>7} {:>7} {:>7} {:>8} {:>6} {:>10} {:>10}'
    if verbose and log_directory:
        print(line_format.format(*headings))
    for test in benchmarks:
        test_output = instrument_test(driver, test, num_samples)
        if test_output[0] == 'Totals':
            continue
        if verbose:
            if log_directory:
                print(line_format.format(*test_output))
            else:
                print(','.join(test_output))
        output.append(test_output)
        (samples, _min, _max, mean) = map(int, test_output[2:6])
        total_tests += 1
        total_min += _min
        total_max += _max
        total_mean += mean
    if not output:
        return
    formatted_output = '\n'.join([','.join(l) for l in output])
    totals = map(str, ['Totals', total_tests, total_min, total_max,
                       total_mean, '0', '0', '0'])
    totals_output = '\n\n' + ','.join(totals)
    if verbose:
        if log_directory:
            print(line_format.format(*([''] + totals)))
        else:
            print(totals_output[1:])
    formatted_output += totals_output
    if log_directory:
        log_results(log_directory, driver, formatted_output, swift_repo)
    return formatted_output


def submit(args):
    print("SVN revision:\t", args.revision)
    print("Machine name:\t", args.machine)
    print("Iterations:\t", args.iterations)
    print("Optimizations:\t", ','.join(args.optimization))
    print("LNT host:\t", args.lnt_host)
    starttime = datetime.datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')
    print("Start time:\t", starttime)
    data = {}
    data['Tests'] = []
    data['Machine'] = {'Info': {'name': args.machine}, 'Name': args.machine}
    print("\nRunning benchmarks...")
    for optset in args.optimization:
        print("Opt level:\t", optset)
        file = os.path.join(args.tests, "Benchmark_" + optset)
        try:
            res = run_benchmarks(
                file, benchmarks=get_tests(file, args),
                num_samples=args.iterations)
            data['Tests'].extend(parse_results(res, optset))
        except subprocess.CalledProcessError as e:
            print("Execution failed.. Test results are empty.")
            print("Process output:\n", e.output)

    endtime = datetime.datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')
    data['Run'] = {'End Time': endtime,
                   'Info': {'inferred_run_order': str(args.revision),
                            'run_order': str(args.revision),
                            'tag': 'nts',
                            'test_suite_revision': 'None'},
                   'Start Time': starttime}
    print("End time:\t", endtime)

    submit_to_lnt(data, args.lnt_host)
    return 0


def run(args):
    optset = args.optimization
    file = os.path.join(args.tests, "Benchmark_" + optset)
    run_benchmarks(
        file, benchmarks=get_tests(file, args),
        num_samples=args.iterations, verbose=True,
        log_directory=args.output_dir,
        swift_repo=args.swift_repo)
    return 0


def format_name(log_path):
    """Return the filename and directory for a log file"""
    return '/'.join(log_path.split('/')[-2:])


def compare_logs(compare_script, new_log, old_log, log_dir, opt):
    """Return diff of log files at paths `new_log` and `old_log`"""
    print('Comparing %s %s ...' % (format_name(old_log), format_name(new_log)))
    subprocess.call([compare_script, '--old-file', old_log,
                    '--new-file', new_log, '--format', 'markdown',
                     '--output', os.path.join(log_dir, 'latest_compare_{0}.md'
                                              .format(opt))])


def compare(args):
    log_dir = args.log_dir
    swift_repo = args.swift_repo
    compare_script = args.compare_script
    baseline_branch = args.baseline_branch
    current_branch = get_current_git_branch(swift_repo)
    current_branch_dir = os.path.join(log_dir, current_branch)
    baseline_branch_dir = os.path.join(log_dir, baseline_branch)

    if current_branch != baseline_branch and \
       not os.path.isdir(baseline_branch_dir):
        print(('Unable to find benchmark logs for {baseline_branch} branch. ' +
               'Set a baseline benchmark log by passing --benchmark to ' +
               'build-script while on {baseline_branch} branch.')
              .format(baseline_branch=baseline_branch))
        return 1

    recent_logs = {}
    for branch_dir in [current_branch_dir, baseline_branch_dir]:
        for opt in ['O', 'Onone']:
            recent_logs[os.path.basename(branch_dir) + '_' + opt] = sorted(
                glob.glob(os.path.join(
                    branch_dir, 'Benchmark_' + opt + '-*.log')),
                key=os.path.getctime, reverse=True)

    if current_branch == baseline_branch:
        if len(recent_logs[baseline_branch + '_O']) > 1 and \
           len(recent_logs[baseline_branch + '_Onone']) > 1:
            compare_logs(compare_script,
                         recent_logs[baseline_branch + '_O'][0],
                         recent_logs[baseline_branch + '_O'][1],
                         log_dir, 'O')
            compare_logs(compare_script,
                         recent_logs[baseline_branch + '_Onone'][0],
                         recent_logs[baseline_branch + '_Onone'][1],
                         log_dir, 'Onone')
        else:
            print(('{baseline_branch}/{baseline_branch} comparison ' +
                   'skipped: no previous {baseline_branch} logs')
                  .format(baseline_branch=baseline_branch))
    else:
        # TODO: Check for outdated baseline branch log
        if len(recent_logs[current_branch + '_O']) == 0 or \
           len(recent_logs[current_branch + '_Onone']) == 0:
            print('branch sanity failure: missing branch logs')
            return 1

        if len(recent_logs[current_branch + '_O']) == 1 or \
           len(recent_logs[current_branch + '_Onone']) == 1:
            print('branch/branch comparison skipped: no previous branch logs')
        else:
            compare_logs(compare_script,
                         recent_logs[current_branch + '_O'][0],
                         recent_logs[current_branch + '_O'][1],
                         log_dir, 'O')
            compare_logs(compare_script,
                         recent_logs[current_branch + '_Onone'][0],
                         recent_logs[current_branch + '_Onone'][1],
                         log_dir, 'Onone')

        if len(recent_logs[baseline_branch + '_O']) == 0 or \
           len(recent_logs[baseline_branch + '_Onone']) == 0:
            print(('branch/{baseline_branch} failure: no {baseline_branch} ' +
                   'logs')
                  .format(baseline_branch=baseline_branch))
            return 1
        else:
            compare_logs(compare_script,
                         recent_logs[current_branch + '_O'][0],
                         recent_logs[baseline_branch + '_O'][0],
                         log_dir, 'O')
            compare_logs(compare_script,
                         recent_logs[current_branch + '_Onone'][0],
                         recent_logs[baseline_branch + '_Onone'][0],
                         log_dir, 'Onone')

        # TODO: Fail on large regressions

    return 0


def positive_int(value):
    ivalue = int(value)
    if not (ivalue > 0):
        raise ValueError
    return ivalue


def main():
    parser = argparse.ArgumentParser(
        epilog='Example: ./Benchmark_Driver run -i 5 -f Prefix -f .*Suffix.*'
    )
    subparsers = parser.add_subparsers(
        title='Swift benchmark driver commands',
        help='See COMMAND -h for additional arguments', metavar='<command>')

    parent_parser = argparse.ArgumentParser(add_help=False)
    benchmarks_group = parent_parser.add_mutually_exclusive_group()
    benchmarks_group.add_argument(
        'benchmarks',
        default=[],
        help='benchmark to run (default: all)', nargs='*', metavar="BENCHMARK")
    benchmarks_group.add_argument(
        '-f', '--filter', dest='filters', action='append',
        help='run all tests whose name match regular expression PATTERN, ' +
        'multiple filters are supported', metavar="PATTERN")
    parent_parser.add_argument(
        '-t', '--tests',
        help='directory containing Benchmark_O{,none,size} ' +
        '(default: DRIVER_DIR)',
        default=DRIVER_DIR)

    submit_parser = subparsers.add_parser(
        'submit',
        help='Run benchmarks and submit results to LNT',
        parents=[parent_parser])
    submit_parser.add_argument(
        '-o', '--optimization', nargs='+',
        help='optimization levels to use (default: O Onone Osize)',
        default=['O', 'Onone', 'Osize'])
    submit_parser.add_argument(
        '-i', '--iterations',
        help='number of times to run each test (default: 10)',
        type=positive_int, default=10)
    submit_parser.add_argument(
        '-m', '--machine', required=True,
        help='LNT machine name')
    submit_parser.add_argument(
        '-r', '--revision', required=True,
        help='SVN revision of compiler to identify the LNT run', type=int)
    submit_parser.add_argument(
        '-l', '--lnt_host', required=True,
        help='LNT host to submit results to')
    submit_parser.set_defaults(func=submit)

    run_parser = subparsers.add_parser(
        'run',
        help='Run benchmarks and output results to stdout',
        parents=[parent_parser])
    run_parser.add_argument(
        '-o', '--optimization',
        metavar='OPT',
        choices=['O', 'Onone', 'Osize'],
        help='optimization level to use: {O,Onone,Osize}, (default: O)',
        default='O')
    run_parser.add_argument(
        '-i', '--iterations',
        help='number of times to run each test (default: 1)',
        type=positive_int, default=1)
    run_parser.add_argument(
        '--output-dir',
        help='log results to directory (default: no logging)')
    run_parser.add_argument(
        '--swift-repo',
        help='absolute path to Swift source repo for branch comparison')
    run_parser.set_defaults(func=run)

    compare_parser = subparsers.add_parser(
        'compare',
        help='Compare benchmark results')
    compare_parser.add_argument(
        '--log-dir', required=True,
        help='directory containing benchmark logs')
    compare_parser.add_argument(
        '--swift-repo', required=True,
        help='absolute path to Swift source repo')
    compare_parser.add_argument(
        '--compare-script', required=True,
        help='absolute path to compare script')
    compare_parser.add_argument(
        '--baseline-branch', default='master',
        help='attempt to compare results to baseline results for specified '
             'branch (default: master)')
    compare_parser.set_defaults(func=compare)

    args = parser.parse_args()
    if args.func != compare and isinstance(args.optimization, list):
        args.optimization = sorted(list(set(args.optimization)))
    return args.func(args)


if __name__ == '__main__':
    exit(main())