#!/usr/bin/env python
# -*- coding: utf-8 -*-

# ===--- Benchmark_Driver ------------------------------------------------===//
#
#  This source file is part of the Swift.org open source project
#
#  Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
#  Licensed under Apache License v2.0 with Runtime Library Exception
#
#  See https://swift.org/LICENSE.txt for license information
#  See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
#
# ===---------------------------------------------------------------------===//
"""
Benchmark_Driver is a tool for running and analysing Swift Benchmarking Suite.

Example:
    $ Benchmark_Driver run

Use `Benchmark_Driver -h` for help on available commands and options.

class `BenchmarkDriver` runs performance tests and impements the `run` COMMAND.
class `BenchmarkDoctor` analyzes performance tests, implements `check` COMMAND.

"""

import argparse
import glob
import logging
import math
import os
import re
import subprocess
import sys
import time

from compare_perf_tests import LogParser

DRIVER_DIR = os.path.dirname(os.path.realpath(__file__))


class BenchmarkDriver(object):
    """Executes tests from Swift Benchmark Suite.

    It's a higher level wrapper for the Benchmark_X family of binaries
    (X = [O, Onone, Osize]).
    """

    def __init__(self, args, tests=None, _subprocess=None, parser=None):
        """Initialize with command line arguments.

        Optional parameters are for injecting dependencies -- used for testing.
        """
        self.args = args
        self._subprocess = _subprocess or subprocess
        self.all_tests = []
        self.tests = tests or self._get_tests()
        self.parser = parser or LogParser()
        self.results = {}
        # Set a constant hash seed. Some tests are currently sensitive to
        # fluctuations in the number of hash collisions.
        os.environ['SWIFT_DETERMINISTIC_HASHING'] = '1'

    def _invoke(self, cmd):
        return self._subprocess.check_output(
            cmd, stderr=self._subprocess.STDOUT)

    @property
    def test_harness(self):
        """Full path to test harness binary."""
        suffix = (self.args.optimization if hasattr(self.args, 'optimization')
                  else 'O')
        return os.path.join(self.args.tests, "Benchmark_" + suffix)

    def _git(self, cmd):
        """Execute the Git command in the `swift-repo`."""
        return self._invoke(
            ('git -C {0} '.format(self.args.swift_repo) + cmd).split()).strip()

    @property
    def log_file(self):
        """Full path to log file.

        If `swift-repo` is set, log file is tied to Git branch and revision.
        """
        if not self.args.output_dir:
            return None
        log_dir = self.args.output_dir
        harness_name = os.path.basename(self.test_harness)
        suffix = '-' + time.strftime('%Y%m%d%H%M%S', time.localtime())
        if self.args.swift_repo:
            log_dir = os.path.join(
                log_dir, self._git('rev-parse --abbrev-ref HEAD'))  # branch
            suffix += '-' + self._git('rev-parse --short HEAD')  # revision
        return os.path.join(log_dir, harness_name + suffix + '.log')

    @property
    def _cmd_list_benchmarks(self):
        # Use tab delimiter for easier parsing to override the default comma.
        # (The third 'column' is always comma-separated list of tags in square
        # brackets -- currently unused here.)
        return [self.test_harness, '--list', '--delim=\t'] + (
            ['--skip-tags='] if (self.args.benchmarks or
                                 self.args.filters) else [])

    def _get_tests(self):
        """Return a list of performance tests to run."""
        index_name_pairs = [
            line.split('\t')[:2] for line in
            self._invoke(self._cmd_list_benchmarks).split('\n')[1:-1]
        ]
        # unzip list of pairs into 2 lists
        indices, self.all_tests = map(list, zip(*index_name_pairs))
        if self.args.filters:
            return self._tests_matching_patterns()
        if self.args.benchmarks:
            return self._tests_by_name_or_index(indices)
        return self.all_tests

    def _tests_matching_patterns(self):
        regexes = [re.compile(pattern) for pattern in self.args.filters]
        return sorted(list(set([name for pattern in regexes
                                for name in self.all_tests
                                if pattern.match(name)])))

    def _tests_by_name_or_index(self, indices):
        benchmarks = set(self.args.benchmarks)
        index_to_name = dict(zip(indices, self.all_tests))
        indexed_names = [index_to_name[i]
                         for i in benchmarks.intersection(set(indices))]
        return sorted(list(
            benchmarks.intersection(set(self.all_tests)).union(indexed_names)))

    def run(self, test, num_samples=None, num_iters=None,
            verbose=None, measure_memory=False):
        """Execute benchmark and gather results."""
        num_samples = num_samples or 1
        num_iters = num_iters or 0  # automatically determine N to run for 1s

        cmd = self._cmd_run(
            test, num_samples, num_iters, verbose, measure_memory)
        output = self._invoke(cmd)
        result = self.parser.results_from_string(output).items()[0][1]
        return result

    def _cmd_run(self, test, num_samples, num_iters, verbose, measure_memory):
        cmd = [self.test_harness, test]
        if num_samples > 1:
            cmd.append('--num-samples={0}'.format(num_samples))
        if num_iters > 0:
            cmd.append('--num-iters={0}'.format(num_iters))
        if verbose:
            cmd.append('--verbose')
        if measure_memory:
            cmd.append('--memory')
        return cmd

    def run_independent_samples(self, test):
        """Run benchmark multiple times, gathering independent samples.

        Returns the aggregated result of independent benchmark invocations.
        """
        def merge_results(a, b):
            a.merge(b)
            return a

        return reduce(merge_results,
                      [self.run(test, measure_memory=True)
                       for _ in range(self.args.independent_samples)])

    def log_results(self, output, log_file=None):
        """Log output to `log_file`.

        Creates `args.output_dir` if it doesn't exist yet.
        """
        log_file = log_file or self.log_file
        dir = os.path.dirname(log_file)
        if not os.path.exists(dir):
            os.makedirs(dir)
        print('Logging results to: %s' % log_file)
        with open(log_file, 'w') as f:
            f.write(output)

    RESULT = '{:>3} {:<25} {:>7} {:>7} {:>7} {:>8} {:>6} {:>10} {:>10}'

    def run_and_log(self, csv_console=True):
        """Run benchmarks and continuously log results to the console.

        There are two console log formats: CSV and justified columns. Both are
        compatible with `LogParser`. Depending on the `csv_console` parameter,
        the CSV log format is either printed to console or returned as a string
        from this method. When `csv_console` is False, the console output
        format is justified columns.
        """

        format = (
            (lambda values: ','.join(values)) if csv_console else
            (lambda values: self.RESULT.format(*values)))  # justified columns

        def console_log(values):
            print(format(values))

        console_log(['#', 'TEST', 'SAMPLES', 'MIN(μs)', 'MAX(μs)',  # header
                    'MEAN(μs)', 'SD(μs)', 'MEDIAN(μs)', 'MAX_RSS(B)'])

        def result_values(r):
            return map(str, [r.test_num, r.name, r.num_samples, r.min, r.max,
                             int(r.mean), int(r.sd), r.median, r.max_rss])

        results = []
        for test in self.tests:
            result = result_values(self.run_independent_samples(test))
            console_log(result)
            results.append(result)

        print(
            '\nTotal performance tests executed: {0}'.format(len(self.tests)))
        return (None if csv_console else
                ('\n'.join([','.join(r) for r in results]) + '\n'))  # csv_log

    @staticmethod
    def run_benchmarks(args):
        """Run benchmarks and log results."""
        driver = BenchmarkDriver(args)
        csv_log = driver.run_and_log(csv_console=(args.output_dir is None))
        if csv_log:
            driver.log_results(csv_log)
        return 0


class LoggingReportFormatter(logging.Formatter):
    """Format logs as plain text or with colors on the terminal.

    Plain text outputs level, category and massage: 'DEBUG category: Hi!'
    Colored output uses color coding based on the level.
    """

    import logging as log
    colors = {log.DEBUG: '9', log.INFO: '2', log.WARNING: '3', log.ERROR: '1',
              log.CRITICAL: '5'}

    def __init__(self, use_color=False):
        """Specify if report should use colors; defaults to False."""
        super(LoggingReportFormatter, self).__init__('%(message)s')
        self.use_color = use_color

    def format(self, record):
        """Format the log record with level and category."""
        msg = super(LoggingReportFormatter, self).format(record)
        category = ((record.name.split('.')[-1] + ': ') if '.' in record.name
                    else '')
        return ('\033[1;3{0}m{1}{2}\033[1;0m'.format(
            self.colors[record.levelno], category, msg) if self.use_color else
            '{0} {1}{2}'.format(record.levelname, category, msg))


class BenchmarkDoctor(object):
    """Checks that the benchmark conforms to the standard set of requirements.

    Benchmarks that are part of Swift Benchmark Suite are required to follow
    a set of rules that ensure quality measurements. These include naming
    convention, robustness when varying execution parameters like
    `num-iters` and `num-samples` (no setup overhead, constant memory
    consumption).
    """

    log = logging.getLogger('BenchmarkDoctor')
    log_naming = log.getChild('naming')
    log_runtime = log.getChild('runtime')
    log_memory = log.getChild('memory')
    log.setLevel(logging.DEBUG)

    def __init__(self, args, driver=None):
        """Initialize with command line parameters.

        Optional `driver` parameter for injecting dependency; used for testing.
        """
        super(BenchmarkDoctor, self).__init__()
        self.driver = driver or BenchmarkDriver(args)
        self.results = {}
        self.console_handler = logging.StreamHandler(sys.stdout)
        self.console_handler.setLevel(logging.DEBUG if args.verbose else
                                      logging.INFO)
        self.console_handler.setFormatter(
            LoggingReportFormatter(use_color=sys.stdout.isatty()))
        self.log.addHandler(self.console_handler)
        self.log.debug('Checking tests: %s', ', '.join(self.driver.tests))
        self.requirements = [
            self._name_matches_capital_words_convention,
            self._name_is_at_most_40_chars_long,
            self._no_setup_overhead,
            self._optimized_runtime_in_range,
            self._constant_memory_use
        ]

    def __del__(self):
        """Unregister handler on exit."""
        self.log.removeHandler(self.console_handler)

    capital_words_re = re.compile('[A-Z][a-zA-Z0-9]+')

    @staticmethod
    def _name_matches_capital_words_convention(measurements):
        name = measurements['name']
        match = BenchmarkDoctor.capital_words_re.match(name)
        matched = match.group(0) if match else ''

        if name != matched:
            BenchmarkDoctor.log_naming.error(
                "'%s' name doesn't conform to UpperCamelCase convention.",
                name)
            BenchmarkDoctor.log_naming.info(
                'See http://bit.ly/UpperCamelCase')

    @staticmethod
    def _name_is_at_most_40_chars_long(measurements):
        name = measurements['name']

        if len(name) > 40:
            BenchmarkDoctor.log_naming.error(
                "'%s' name is %d characters long.", name, len(name))
            BenchmarkDoctor.log_naming.info(
                'Benchmark name should not be longer than 40 characters.')

    @staticmethod
    def _select(measurements, num_iters=None, opt_level='O'):
        prefix = measurements['name'] + ' ' + opt_level
        prefix += '' if num_iters is None else (' i' + str(num_iters))
        return [series for name, series in measurements.items()
                if name.startswith(prefix)]

    @staticmethod
    def _optimized_runtime_in_range(measurements):
        name = measurements['name']
        setup, ratio = BenchmarkDoctor._setup_overhead(measurements)
        setup = 0 if ratio < 0.05 else setup
        runtime = min(
            [(result.min - correction) for i_series in
             [BenchmarkDoctor._select(measurements, num_iters=i)
              for correction in [(setup / i) for i in [1, 2]]
              ] for result in i_series])

        if 2500 < runtime:
            log = (BenchmarkDoctor.log_runtime.warning if runtime < 500000 else
                   BenchmarkDoctor.log_runtime.error)
            caveat = '' if setup == 0 else ' (excluding the setup overhead)'
            log("'%s' execution took at least %d μs%s.", name, runtime, caveat)
            factor = int(pow(2, math.ceil(math.log(runtime / 2500.0, 2))))
            BenchmarkDoctor.log_runtime.info(
                "Decrease the workload of '%s' by a factor of %d, "
                "to be less than 2500 μs.", name, factor)

    @staticmethod
    def _setup_overhead(measurements):
        select = BenchmarkDoctor._select
        ti1, ti2 = [float(min(mins)) for mins in
                    [[result.min for result in i_series] for i_series in
                     [select(measurements, num_iters=i) for i in [1, 2]]]]
        setup = int(round(2.0 * (ti1 - ti2)))
        ratio = (setup / ti1) if ti1 > 0 else 0
        return (setup, ratio)

    @staticmethod
    def _no_setup_overhead(measurements):
        setup, ratio = BenchmarkDoctor._setup_overhead(measurements)
        if ratio > 0.05:
            BenchmarkDoctor.log_runtime.error(
                "'%s' has setup overhead of %d μs (%.1f%%).",
                measurements['name'], setup, round((100 * ratio), 1))
            BenchmarkDoctor.log_runtime.info(
                'Move initialization of benchmark data to the `setUpFunction` '
                'registered in `BenchmarkInfo`.')

    @staticmethod
    def _constant_memory_use(measurements):
        select = BenchmarkDoctor._select
        (min_i1, max_i1), (min_i2, max_i2) = [
            (min(memory_use), max(memory_use)) for memory_use in
            [[r.mem_pages for r in i_series] for i_series in
             [select(measurements, num_iters=i) for i in
              [1, 2]]]]
        range_i1, range_i2 = max_i1 - min_i1, max_i2 - min_i2
        normal_range = 15  # pages
        name = measurements['name']

        if abs(min_i1 - min_i2) > max(range_i1, range_i2, normal_range):
            BenchmarkDoctor.log_memory.error(
                "'%s' varies the memory footprint of the base "
                "workload depending on the `num-iters`.", name)

        if max(range_i1, range_i2) > normal_range:
            BenchmarkDoctor.log_memory.warning(
                "'%s' has very wide range of memory used between "
                "independent, repeated measurements.", name)

        BenchmarkDoctor.log_memory.debug(
            "%s mem_pages [i1, i2]:  min=[%d, %d] 𝚫=%d R=[%d, %d]", name,
            *[min_i1, min_i2, abs(min_i1 - min_i2), range_i1, range_i2])

    @staticmethod
    def _adjusted_1s_samples(runtime):
        u"""Return sample count that can be taken in approximately 1 second.

        Based on the runtime (μs) of one sample taken with num-iters=1.
        """
        if runtime == 0:
            return 2
        s = 1000000 / float(runtime)  # samples for 1s run
        s = int(pow(2, round(math.log(s, 2))))  # rounding to power of 2
        return s if s > 2 else 2  # always take at least 2 samples

    def measure(self, benchmark):
        """Measure benchmark with varying iterations and optimization levels.

        Returns a dictionary with benchmark name and `PerformanceTestResult`s.
        """
        self.log.debug('Calibrating num-samples for {0}:'.format(benchmark))
        r = self.driver.run(benchmark, num_samples=3, num_iters=1)  # calibrate
        num_samples = self._adjusted_1s_samples(r.min)

        def capped(s):
            return min(s, 2048)
        run_args = [(capped(num_samples), 1), (capped(num_samples / 2), 2)]
        opts = self.driver.args.optimization
        opts = opts if isinstance(opts, list) else [opts]
        self.log.debug(
            'Runtime {0} μs yields {1} adjusted samples per second.'.format(
                r.min, num_samples))
        self.log.debug(
            'Measuring {0}, 5 x i1 ({1} samples), 5 x i2 ({2} samples)'.format(
                benchmark, run_args[0][0], run_args[1][0]))

        measurements = dict(
            [('{0} {1} i{2}{3}'.format(benchmark, o, i, suffix),
              self.driver.run(benchmark, num_samples=s, num_iters=i,
                              verbose=True, measure_memory=True))
             for o in opts
             for s, i in run_args
             for suffix in list('abcde')
             ]
        )
        measurements['name'] = benchmark
        return measurements

    def analyze(self, benchmark_measurements):
        """Analyze whether benchmark fullfills all requirtements."""
        self.log.debug('Analyzing %s', benchmark_measurements['name'])
        for rule in self.requirements:
            rule(benchmark_measurements)

    def check(self):
        """Measure and analyse all enabled tests."""
        for test in self.driver.tests:
            self.analyze(self.measure(test))

    @staticmethod
    def run_check(args):
        """Validate benchmarks conform to health rules, report violations."""
        doctor = BenchmarkDoctor(args)
        doctor.check()
        # TODO non-zero error code when errors are logged
        # See https://stackoverflow.com/a/31142078/41307
        return 0


def format_name(log_path):
    """Return the filename and directory for a log file."""
    return '/'.join(log_path.split('/')[-2:])


def compare_logs(compare_script, new_log, old_log, log_dir, opt):
    """Return diff of log files at paths `new_log` and `old_log`."""
    print('Comparing %s %s ...' % (format_name(old_log), format_name(new_log)))
    subprocess.call([compare_script, '--old-file', old_log,
                    '--new-file', new_log, '--format', 'markdown',
                     '--output', os.path.join(log_dir, 'latest_compare_{0}.md'
                                              .format(opt))])


def compare(args):
    log_dir = args.log_dir
    compare_script = args.compare_script
    baseline_branch = args.baseline_branch
    current_branch = \
        BenchmarkDriver(args, tests=[''])._git('rev-parse --abbrev-ref HEAD')
    current_branch_dir = os.path.join(log_dir, current_branch)
    baseline_branch_dir = os.path.join(log_dir, baseline_branch)

    if current_branch != baseline_branch and \
       not os.path.isdir(baseline_branch_dir):
        print(('Unable to find benchmark logs for {baseline_branch} branch. ' +
               'Set a baseline benchmark log by passing --benchmark to ' +
               'build-script while on {baseline_branch} branch.')
              .format(baseline_branch=baseline_branch))
        return 1

    recent_logs = {}
    for branch_dir in [current_branch_dir, baseline_branch_dir]:
        for opt in ['O', 'Onone']:
            recent_logs[os.path.basename(branch_dir) + '_' + opt] = sorted(
                glob.glob(os.path.join(
                    branch_dir, 'Benchmark_' + opt + '-*.log')),
                key=os.path.getctime, reverse=True)

    if current_branch == baseline_branch:
        if len(recent_logs[baseline_branch + '_O']) > 1 and \
           len(recent_logs[baseline_branch + '_Onone']) > 1:
            compare_logs(compare_script,
                         recent_logs[baseline_branch + '_O'][0],
                         recent_logs[baseline_branch + '_O'][1],
                         log_dir, 'O')
            compare_logs(compare_script,
                         recent_logs[baseline_branch + '_Onone'][0],
                         recent_logs[baseline_branch + '_Onone'][1],
                         log_dir, 'Onone')
        else:
            print(('{baseline_branch}/{baseline_branch} comparison ' +
                   'skipped: no previous {baseline_branch} logs')
                  .format(baseline_branch=baseline_branch))
    else:
        # TODO: Check for outdated baseline branch log
        if len(recent_logs[current_branch + '_O']) == 0 or \
           len(recent_logs[current_branch + '_Onone']) == 0:
            print('branch sanity failure: missing branch logs')
            return 1

        if len(recent_logs[current_branch + '_O']) == 1 or \
           len(recent_logs[current_branch + '_Onone']) == 1:
            print('branch/branch comparison skipped: no previous branch logs')
        else:
            compare_logs(compare_script,
                         recent_logs[current_branch + '_O'][0],
                         recent_logs[current_branch + '_O'][1],
                         log_dir, 'O')
            compare_logs(compare_script,
                         recent_logs[current_branch + '_Onone'][0],
                         recent_logs[current_branch + '_Onone'][1],
                         log_dir, 'Onone')

        if len(recent_logs[baseline_branch + '_O']) == 0 or \
           len(recent_logs[baseline_branch + '_Onone']) == 0:
            print(('branch/{baseline_branch} failure: no {baseline_branch} ' +
                   'logs')
                  .format(baseline_branch=baseline_branch))
            return 1
        else:
            compare_logs(compare_script,
                         recent_logs[current_branch + '_O'][0],
                         recent_logs[baseline_branch + '_O'][0],
                         log_dir, 'O')
            compare_logs(compare_script,
                         recent_logs[current_branch + '_Onone'][0],
                         recent_logs[baseline_branch + '_Onone'][0],
                         log_dir, 'Onone')

        # TODO: Fail on large regressions

    return 0


def positive_int(value):
    """Verify the value is a positive integer."""
    ivalue = int(value)
    if not (ivalue > 0):
        raise ValueError
    return ivalue


def parse_args(args):
    """Parse command line arguments and set default values."""
    parser = argparse.ArgumentParser(
        epilog='Example: ./Benchmark_Driver run -i 5 -f Prefix -f .*Suffix.*'
    )
    subparsers = parser.add_subparsers(
        title='Swift benchmark driver commands',
        help='See COMMAND -h for additional arguments', metavar='COMMAND')

    shared_benchmarks_parser = argparse.ArgumentParser(add_help=False)
    benchmarks_group = shared_benchmarks_parser.add_mutually_exclusive_group()
    benchmarks_group.add_argument(
        'benchmarks',
        default=[],
        help='benchmark to run (default: all)', nargs='*', metavar="BENCHMARK")
    benchmarks_group.add_argument(
        '-f', '--filter', dest='filters', action='append',
        help='run all tests whose name match regular expression PATTERN, ' +
        'multiple filters are supported', metavar="PATTERN")
    shared_benchmarks_parser.add_argument(
        '-t', '--tests',
        help='directory containing Benchmark_O{,none,size} ' +
        '(default: DRIVER_DIR)',
        default=DRIVER_DIR)
    shared_benchmarks_parser.add_argument(
        '-o', '--optimization',
        metavar='OPT',
        choices=['O', 'Onone', 'Osize'],
        help='optimization level to use: {O,Onone,Osize}, (default: O)',
        default='O')

    run_parser = subparsers.add_parser(
        'run',
        help='Run benchmarks and output results to stdout',
        parents=[shared_benchmarks_parser])
    run_parser.add_argument(
        '-i', '--independent-samples',
        help='number of times to run each test (default: 1)',
        type=positive_int, default=1)
    run_parser.add_argument(
        '--output-dir',
        help='log results to directory (default: no logging)')
    run_parser.add_argument(
        '--swift-repo',
        help='absolute path to the Swift source repository')
    run_parser.set_defaults(func=BenchmarkDriver.run_benchmarks)

    check_parser = subparsers.add_parser(
        'check',
        help='',
        parents=[shared_benchmarks_parser])
    check_parser.add_argument(
        '-v', '--verbose', action='store_true',
        help='show more details during benchmark analysis',)
    check_parser.set_defaults(func=BenchmarkDoctor.run_check)

    compare_parser = subparsers.add_parser(
        'compare',
        help='Compare benchmark results')
    compare_parser.add_argument(
        '--log-dir', required=True,
        help='directory containing benchmark logs')
    compare_parser.add_argument(
        '--swift-repo', required=True,
        help='absolute path to the Swift source repository')
    compare_parser.add_argument(
        '--compare-script', required=True,
        help='absolute path to compare script')
    compare_parser.add_argument(
        '--baseline-branch', default='master',
        help='attempt to compare results to baseline results for specified '
             'branch (default: master)')
    compare_parser.set_defaults(func=compare)

    return parser.parse_args(args)


def main():
    """Parse command line arguments and execute the specified COMMAND."""
    args = parse_args(sys.argv[1:])
    return args.func(args)


if __name__ == '__main__':
    exit(main())