swift-mirror/utils/process-stats-dir.py

#!/usr/bin/python
#
# ==-- process-stats-dir - summarize one or more Swift -stats-output-dirs --==#
#
# This source file is part of the Swift.org open source project
#
# Copyright (c) 2014-2017 Apple Inc. and the Swift project authors
# Licensed under Apache License v2.0 with Runtime Library Exception
#
# See https://swift.org/LICENSE.txt for license information
# See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
#
# ==------------------------------------------------------------------------==#
#
# This file processes the contents of one or more directories generated by
# `swiftc -stats-output-dir` and emits summary data, traces etc. for analysis.

import argparse
import json
import os
import random
import re
import sys
import csv


class JobStats:

    def __init__(self, jobkind, jobid, module, start_usec, dur_usec,
                 jobargs, stats):
        self.jobkind = jobkind
        self.jobid = jobid
        self.module = module
        self.start_usec = start_usec
        self.dur_usec = dur_usec
        self.jobargs = jobargs
        self.stats = stats

    def is_driver_job(self):
        return self.jobkind == 'driver'

    def is_frontend_job(self):
        return self.jobkind == 'frontend'

    def driver_jobs_ran(self):
        assert(self.is_driver_job())
        return self.stats.get("Driver.NumDriverJobsRun", 0)

    def driver_jobs_skipped(self):
        assert(self.is_driver_job())
        return self.stats.get("Driver.NumDriverJobsSkipped", 0)

    def driver_jobs_total(self):
        assert(self.is_driver_job())
        return self.driver_jobs_ran() + self.driver_jobs_skipped()

    def merged_with(self, other):
        merged_stats = {}
        for k, v in self.stats.items() + other.stats.items():
            merged_stats[k] = v + merged_stats.get(k, 0.0)
        merged_kind = self.jobkind
        if other.jobkind != merged_kind:
            merged_kind = "<merged>"
        merged_module = self.module
        if other.module != merged_module:
            merged_module = "<merged>"
        merged_start = min(self.start_usec, other.start_usec)
        merged_end = max(self.start_usec + self.dur_usec,
                         other.start_usec + other.dur_usec)
        merged_dur = merged_end - merged_start
        return JobStats(merged_kind, random.randint(0, 1000000000),
                        merged_module, merged_start, merged_dur,
                        self.jobargs + other.jobargs, merged_stats)

    def incrementality_percentage(self):
        assert(self.is_driver_job())
        ran = self.driver_jobs_ran()
        total = self.driver_jobs_total()
        return (float(ran) / float(total)) * 100.0

    # Return a JSON-formattable object of the form preferred by google chrome's
    # 'catapult' trace-viewer.
    def to_catapult_trace_obj(self):
        return {"name": self.module,
                "cat": self.jobkind,
                "ph": "X",              # "X" == "complete event"
                "pid": self.jobid,
                "tid": 1,
                "ts": self.start_usec,
                "dur": self.dur_usec,
                "args": self.jobargs}


# Return an array of JobStats objects
def load_stats_dir(path):
    jobstats = []
    fpat = r"^stats-(?P<start>\d+)-swift-(?P<kind>\w+)-(?P<pid>\d+).json$"
    for root, dirs, files in os.walk(path):
        for f in files:
            m = re.match(fpat, f)
            if m:
                # NB: "pid" in fpat is a random number, not unix pid.
                mg = m.groupdict()
                jobkind = mg['kind']
                jobid = int(mg['pid'])
                start_usec = int(mg['start'])

                j = json.load(open(os.path.join(root, f)))
                dur_usec = 1
                jobargs = None
                module = "module"
                patstr = (r"time\.swift-" + jobkind +
                          r"\.(?P<module>[^\.]+)(?P<filename>.*)\.wall$")
                pat = re.compile(patstr)
                for (k, v) in j.items():
                    tm = re.match(pat, k)
                    if tm:
                        tmg = tm.groupdict()
                        dur_usec = int(1000000.0 * float(v))
                        module = tmg['module']
                        if 'filename' in tmg:
                            ff = tmg['filename']
                            if ff.startswith('.'):
                                ff = ff[1:]
                            jobargs = [ff]
                        break
                e = JobStats(jobkind=jobkind, jobid=jobid,
                             module=module, start_usec=start_usec,
                             dur_usec=dur_usec, jobargs=jobargs,
                             stats=j)
                jobstats.append(e)
    return jobstats


# Passed args with 2-element remainder ["old", "new"], return a list of tuples
# of the form [(name, (oldstats, newstats))] where each name is a common subdir
# of each of "old" and "new", and the stats are those found in the respective
# dirs.
def load_paired_stats_dirs(args):
    assert(len(args.remainder) == 2)
    paired_stats = []
    (old, new) = args.remainder
    for p in sorted(os.listdir(old)):
        full_old = os.path.join(old, p)
        full_new = os.path.join(new, p)
        if not (os.path.exists(full_old) and os.path.isdir(full_old) and
                os.path.exists(full_new) and os.path.isdir(full_new)):
            continue
        old_stats = load_stats_dir(full_old)
        new_stats = load_stats_dir(full_new)
        if len(old_stats) == 0 or len(new_stats) == 0:
            continue
        paired_stats.append((p, (old_stats, new_stats)))
    return paired_stats


def write_catapult_trace(args):
    allstats = []
    for path in args.remainder:
        allstats += load_stats_dir(path)
    json.dump([s.to_catapult_trace_obj() for s in allstats], args.output)


def merge_all_jobstats(jobstats):
    m = None
    for j in jobstats:
        if m is None:
            m = j
        else:
            m = m.merged_with(j)
    return m


def show_paired_incrementality(args):
    fieldnames = ["old_pct", "old_skip",
                  "new_pct", "new_skip",
                  "delta_pct", "delta_skip",
                  "name"]
    out = csv.DictWriter(args.output, fieldnames, dialect='excel-tab')
    out.writeheader()

    for (name, (oldstats, newstats)) in load_paired_stats_dirs(args):
        olddriver = merge_all_jobstats([x for x in oldstats
                                        if x.is_driver_job()])
        newdriver = merge_all_jobstats([x for x in newstats
                                        if x.is_driver_job()])
        if olddriver is None or newdriver is None:
            continue
        oldpct = olddriver.incrementality_percentage()
        newpct = newdriver.incrementality_percentage()
        deltapct = newpct - oldpct
        oldskip = olddriver.driver_jobs_skipped()
        newskip = newdriver.driver_jobs_skipped()
        deltaskip = newskip - oldskip
        out.writerow(dict(name=name,
                          old_pct=oldpct, old_skip=oldskip,
                          new_pct=newpct, new_skip=newskip,
                          delta_pct=deltapct, delta_skip=deltaskip))


def show_incrementality(args):
    fieldnames = ["incrementality", "name"]
    out = csv.DictWriter(args.output, fieldnames, dialect='excel-tab')
    out.writeheader()

    for path in args.remainder:
        stats = load_stats_dir(path)
        for s in stats:
            if s.is_driver_job():
                pct = s.incrementality_percentage()
                out.writerow(dict(name=os.path.basename(path),
                                  incrementality=pct))


def compare_frontend_stats(args):
    assert(len(args.remainder) == 2)
    (olddir, newdir) = args.remainder

    fieldnames = ["old", "new", "delta_pct", "name"]
    out = csv.DictWriter(args.output, fieldnames, dialect='excel-tab')
    out.writeheader()

    old_stats = load_stats_dir(olddir)
    new_stats = load_stats_dir(newdir)
    old_merged = merge_all_jobstats([x for x in old_stats
                                     if x.is_frontend_job()])
    new_merged = merge_all_jobstats([x for x in new_stats
                                     if x.is_frontend_job()])
    if old_merged is None or new_merged is None:
        return
    for stat_name in sorted(old_merged.stats.keys()):
        if stat_name in new_merged.stats:
            old = float(old_merged.stats[stat_name])
            new = float(new_merged.stats[stat_name])
            delta = (new - old)
            delta_pct = 0
            if old != 0:
                delta_pct = -100.0
            if new != 0:
                delta_pct = (delta / new) * 100.0
            if abs(delta_pct) >= args.delta_pct_thresh:
                out.writerow(dict(name=stat_name, old=old, new=new,
                                  delta_pct=delta_pct))


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--verbose", action="store_true",
                        help="Report activity verbosely")
    parser.add_argument("--output", default="-",
                        type=argparse.FileType('wb', 0),
                        help="Write output to file")
    parser.add_argument("--paired", action="store_true",
                        help="Process two dirs-of-stats-dirs, pairwise")
    parser.add_argument("--delta-pct-thresh", type=float, default=0.0,
                        help="Percentage change required to report")
    modes = parser.add_mutually_exclusive_group(required=True)
    modes.add_argument("--catapult", action="store_true",
                       help="emit a 'catapult'-compatible trace of events")
    modes.add_argument("--incrementality", action="store_true",
                       help="summarize the 'incrementality' of a build")
    modes.add_argument("--compare-frontend-stats", action="store_true",
                       help="Compare frontend stats from two stats-dirs")
    parser.add_argument('remainder', nargs=argparse.REMAINDER,
                        help="stats-dirs to process")

    args = parser.parse_args()
    if len(args.remainder) == 0:
        parser.print_help()
        sys.exit(1)
    if args.catapult:
        write_catapult_trace(args)
    elif args.compare_frontend_stats:
        compare_frontend_stats(args)
    elif args.incrementality:
        if args.paired:
            show_paired_incrementality(args)
        else:
            show_incrementality(args)

main()