mirror of
https://github.com/apple/swift.git
synced 2025-12-14 20:36:38 +01:00
439 lines
17 KiB
Python
Executable File
439 lines
17 KiB
Python
Executable File
#!/usr/bin/python
|
|
#
|
|
# ==-- process-stats-dir - summarize one or more Swift -stats-output-dirs --==#
|
|
#
|
|
# This source file is part of the Swift.org open source project
|
|
#
|
|
# Copyright (c) 2014-2017 Apple Inc. and the Swift project authors
|
|
# Licensed under Apache License v2.0 with Runtime Library Exception
|
|
#
|
|
# See https://swift.org/LICENSE.txt for license information
|
|
# See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
|
|
#
|
|
# ==------------------------------------------------------------------------==#
|
|
#
|
|
# This file processes the contents of one or more directories generated by
|
|
# `swiftc -stats-output-dir` and emits summary data, traces etc. for analysis.
|
|
|
|
import argparse
|
|
import csv
|
|
import json
|
|
import os
|
|
import platform
|
|
import re
|
|
import sys
|
|
import time
|
|
import urllib
|
|
import urllib2
|
|
from collections import namedtuple
|
|
from operator import attrgetter
|
|
from jobstats import load_stats_dir, merge_all_jobstats
|
|
|
|
|
|
# Passed args with 2-element remainder ["old", "new"], return a list of tuples
|
|
# of the form [(name, (oldstats, newstats))] where each name is a common subdir
|
|
# of each of "old" and "new", and the stats are those found in the respective
|
|
# dirs.
|
|
def load_paired_stats_dirs(args):
|
|
assert(len(args.remainder) == 2)
|
|
paired_stats = []
|
|
(old, new) = args.remainder
|
|
for p in sorted(os.listdir(old)):
|
|
full_old = os.path.join(old, p)
|
|
full_new = os.path.join(new, p)
|
|
if not (os.path.exists(full_old) and os.path.isdir(full_old) and
|
|
os.path.exists(full_new) and os.path.isdir(full_new)):
|
|
continue
|
|
old_stats = load_stats_dir(full_old, **vars(args))
|
|
new_stats = load_stats_dir(full_new, **vars(args))
|
|
if len(old_stats) == 0 or len(new_stats) == 0:
|
|
continue
|
|
paired_stats.append((p, (old_stats, new_stats)))
|
|
return paired_stats
|
|
|
|
|
|
def write_catapult_trace(args):
|
|
allstats = []
|
|
for path in args.remainder:
|
|
allstats += load_stats_dir(path, **vars(args))
|
|
json.dump([s.to_catapult_trace_obj() for s in allstats], args.output)
|
|
|
|
|
|
def write_lnt_values(args):
|
|
for d in args.remainder:
|
|
stats = load_stats_dir(d, **vars(args))
|
|
merged = merge_all_jobstats(stats, **vars(args))
|
|
j = merged.to_lnt_test_obj(args)
|
|
if args.lnt_submit is None:
|
|
json.dump(j, args.output, indent=4)
|
|
else:
|
|
url = args.lnt_submit
|
|
print "\nsubmitting to LNT server: " + url
|
|
json_report = {'input_data': json.dumps(j), 'commit': '1'}
|
|
data = urllib.urlencode(json_report)
|
|
response_str = urllib2.urlopen(urllib2.Request(url, data))
|
|
response = json.loads(response_str.read())
|
|
print "### response:"
|
|
print response
|
|
if 'success' in response:
|
|
print "server response:\tSuccess"
|
|
else:
|
|
print "server response:\tError"
|
|
print "error:\t", response['error']
|
|
sys.exit(1)
|
|
|
|
|
|
def show_paired_incrementality(args):
|
|
fieldnames = ["old_pct", "old_skip",
|
|
"new_pct", "new_skip",
|
|
"delta_pct", "delta_skip",
|
|
"name"]
|
|
out = csv.DictWriter(args.output, fieldnames, dialect='excel-tab')
|
|
out.writeheader()
|
|
|
|
for (name, (oldstats, newstats)) in load_paired_stats_dirs(args):
|
|
olddriver = merge_all_jobstats((x for x in oldstats
|
|
if x.is_driver_job()), **vars(args))
|
|
newdriver = merge_all_jobstats((x for x in newstats
|
|
if x.is_driver_job()), **vars(args))
|
|
if olddriver is None or newdriver is None:
|
|
continue
|
|
oldpct = olddriver.incrementality_percentage()
|
|
newpct = newdriver.incrementality_percentage()
|
|
deltapct = newpct - oldpct
|
|
oldskip = olddriver.driver_jobs_skipped()
|
|
newskip = newdriver.driver_jobs_skipped()
|
|
deltaskip = newskip - oldskip
|
|
out.writerow(dict(name=name,
|
|
old_pct=oldpct, old_skip=oldskip,
|
|
new_pct=newpct, new_skip=newskip,
|
|
delta_pct=deltapct, delta_skip=deltaskip))
|
|
|
|
|
|
def show_incrementality(args):
|
|
fieldnames = ["incrementality", "name"]
|
|
out = csv.DictWriter(args.output, fieldnames, dialect='excel-tab')
|
|
out.writeheader()
|
|
|
|
for path in args.remainder:
|
|
stats = load_stats_dir(path, **vars(args))
|
|
for s in stats:
|
|
if s.is_driver_job():
|
|
pct = s.incrementality_percentage()
|
|
out.writerow(dict(name=os.path.basename(path),
|
|
incrementality=pct))
|
|
|
|
|
|
def diff_and_pct(old, new):
|
|
if old == 0:
|
|
if new == 0:
|
|
return (0, 0.0)
|
|
else:
|
|
return (new, 100.0)
|
|
delta = (new - old)
|
|
delta_pct = round((float(delta) / float(old)) * 100.0, 2)
|
|
return (delta, delta_pct)
|
|
|
|
|
|
def update_epoch_value(d, name, epoch, value):
|
|
changed = 0
|
|
if name in d:
|
|
(existing_epoch, existing_value) = d[name]
|
|
if existing_epoch > epoch:
|
|
print("note: keeping newer value %d from epoch %d for %s"
|
|
% (existing_value, existing_epoch, name))
|
|
epoch = existing_epoch
|
|
value = existing_value
|
|
elif existing_value == value:
|
|
epoch = existing_epoch
|
|
else:
|
|
(_, delta_pct) = diff_and_pct(existing_value, value)
|
|
print ("note: changing value %d -> %d (%.2f%%) for %s" %
|
|
(existing_value, value, delta_pct, name))
|
|
changed = 1
|
|
d[name] = (epoch, value)
|
|
return (epoch, value, changed)
|
|
|
|
|
|
def read_stats_dict_from_csv(f, select_stat=''):
|
|
infieldnames = ["epoch", "name", "value"]
|
|
c = csv.DictReader(f, infieldnames,
|
|
dialect='excel-tab',
|
|
quoting=csv.QUOTE_NONNUMERIC)
|
|
d = {}
|
|
sre = re.compile('.*' if len(select_stat) == 0 else
|
|
'|'.join(select_stat))
|
|
for row in c:
|
|
epoch = int(row["epoch"])
|
|
name = row["name"]
|
|
if sre.search(name) is None:
|
|
continue
|
|
value = int(row["value"])
|
|
update_epoch_value(d, name, epoch, value)
|
|
return d
|
|
|
|
|
|
# The idea here is that a "baseline" is a (tab-separated) CSV file full of
|
|
# the counters you want to track, each prefixed by an epoch timestamp of
|
|
# the last time the value was reset.
|
|
#
|
|
# When you set a fresh baseline, all stats in the provided stats dir are
|
|
# written to the baseline. When you set against an _existing_ baseline,
|
|
# only the counters mentioned in the existing baseline are updated, and
|
|
# only if their values differ.
|
|
#
|
|
# Finally, since it's a line-oriented CSV file, you can put:
|
|
#
|
|
# mybaseline.csv merge=union
|
|
#
|
|
# in your .gitattributes file, and forget about merge conflicts. The reader
|
|
# function above will take the later epoch anytime it detects duplicates,
|
|
# so union-merging is harmless. Duplicates will be eliminated whenever the
|
|
# next baseline-set is done.
|
|
def set_csv_baseline(args):
|
|
existing = None
|
|
if os.path.exists(args.set_csv_baseline):
|
|
with open(args.set_csv_baseline, "r") as f:
|
|
existing = read_stats_dict_from_csv(f,
|
|
select_stat=args.select_stat)
|
|
print ("updating %d baseline entries in %s" %
|
|
(len(existing), args.set_csv_baseline))
|
|
else:
|
|
print "making new baseline " + args.set_csv_baseline
|
|
fieldnames = ["epoch", "name", "value"]
|
|
with open(args.set_csv_baseline, "wb") as f:
|
|
out = csv.DictWriter(f, fieldnames, dialect='excel-tab',
|
|
quoting=csv.QUOTE_NONNUMERIC)
|
|
m = merge_all_jobstats((s for d in args.remainder
|
|
for s in load_stats_dir(d, **vars(args))),
|
|
**vars(args))
|
|
if m is None:
|
|
print "no stats found"
|
|
return 1
|
|
changed = 0
|
|
newepoch = int(time.time())
|
|
for name in sorted(m.stats.keys()):
|
|
epoch = newepoch
|
|
value = m.stats[name]
|
|
if existing is not None:
|
|
if name not in existing:
|
|
continue
|
|
(epoch, value, chg) = update_epoch_value(existing, name,
|
|
epoch, value)
|
|
changed += chg
|
|
out.writerow(dict(epoch=int(epoch),
|
|
name=name,
|
|
value=int(value)))
|
|
if existing is not None:
|
|
print "changed %d entries in baseline" % changed
|
|
return 0
|
|
|
|
|
|
OutputRow = namedtuple("OutputRow",
|
|
["name", "old", "new",
|
|
"delta", "delta_pct"])
|
|
|
|
|
|
def compare_stats(args, old_stats, new_stats):
|
|
for name in sorted(old_stats.keys()):
|
|
old = old_stats[name]
|
|
new = new_stats.get(name, 0)
|
|
(delta, delta_pct) = diff_and_pct(old, new)
|
|
if ((name.startswith("time.") or '.time.' in name) and
|
|
abs(delta) < args.delta_usec_thresh):
|
|
continue
|
|
if abs(delta_pct) < args.delta_pct_thresh:
|
|
continue
|
|
yield OutputRow(name=name,
|
|
old=int(old), new=int(new),
|
|
delta=int(delta),
|
|
delta_pct=delta_pct)
|
|
|
|
|
|
def write_comparison(args, old_stats, new_stats):
|
|
regressions = 0
|
|
rows = list(compare_stats(args, old_stats, new_stats))
|
|
sort_key = (attrgetter('delta_pct')
|
|
if args.sort_by_delta_pct
|
|
else attrgetter('name'))
|
|
rows.sort(key=sort_key, reverse=args.sort_descending)
|
|
regressions = sum(1 for row in rows if row.delta > 0)
|
|
|
|
if args.markdown:
|
|
|
|
def format_field(field, row, args):
|
|
if field == 'name' and args.group_by_module:
|
|
return re.sub(r'^(\w+)\.', r'\1<br/>', row.name)
|
|
elif field == 'delta_pct' and args.github_emoji:
|
|
if row.delta_pct > 0:
|
|
return str(row.delta_pct) + " :no_entry:"
|
|
else:
|
|
return str(row.delta_pct) + " :white_check_mark:"
|
|
else:
|
|
return str(vars(row)[field])
|
|
|
|
out = args.output
|
|
out.write(' | '.join(OutputRow._fields))
|
|
out.write('\n')
|
|
out.write(' | '.join('---:' for _ in OutputRow._fields))
|
|
out.write('\n')
|
|
for row in rows:
|
|
name = row.name
|
|
if args.group_by_module:
|
|
name
|
|
out.write(' | '.join(format_field(f, row, args)
|
|
for f in OutputRow._fields))
|
|
out.write('\n')
|
|
else:
|
|
out = csv.DictWriter(args.output, OutputRow._fields,
|
|
dialect='excel-tab')
|
|
out.writeheader()
|
|
for row in rows:
|
|
out.writerow(row._asdict())
|
|
|
|
return regressions
|
|
|
|
|
|
def compare_to_csv_baseline(args):
|
|
old_stats = read_stats_dict_from_csv(args.compare_to_csv_baseline,
|
|
select_stat=args.select_stat)
|
|
m = merge_all_jobstats((s for d in args.remainder
|
|
for s in load_stats_dir(d, **vars(args))),
|
|
**vars(args))
|
|
old_stats = dict((k, v) for (k, (_, v)) in old_stats.items())
|
|
new_stats = m.stats
|
|
|
|
return write_comparison(args, old_stats, new_stats)
|
|
|
|
|
|
# Summarize immediate difference between two stats-dirs, optionally
|
|
def compare_stats_dirs(args):
|
|
if len(args.remainder) != 2:
|
|
raise ValueError("Expected exactly 2 stats-dirs")
|
|
|
|
vargs = vars(args)
|
|
if args.select_stats_from_csv_baseline is not None:
|
|
b = read_stats_dict_from_csv(args.select_stats_from_csv_baseline)
|
|
if args.group_by_module:
|
|
pat = re.compile('^\w+\.')
|
|
vargs['select_stat'] = set(re.sub(pat, '', k) for k in b.keys())
|
|
else:
|
|
vargs['select_stat'] = b.keys()
|
|
|
|
(old, new) = args.remainder
|
|
old_stats = merge_all_jobstats(load_stats_dir(old, **vargs), **vargs)
|
|
new_stats = merge_all_jobstats(load_stats_dir(new, **vargs), **vargs)
|
|
|
|
return write_comparison(args, old_stats.stats, new_stats.stats)
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--verbose", action="store_true",
|
|
help="Report activity verbosely")
|
|
parser.add_argument("--output", default="-",
|
|
type=argparse.FileType('wb', 0),
|
|
help="Write output to file")
|
|
parser.add_argument("--paired", action="store_true",
|
|
help="Process two dirs-of-stats-dirs, pairwise")
|
|
parser.add_argument("--delta-pct-thresh", type=float, default=0.01,
|
|
help="Percentage change required to report")
|
|
parser.add_argument("--delta-usec-thresh", type=int, default=100000,
|
|
help="Absolute delta on times required to report")
|
|
parser.add_argument("--lnt-machine", type=str, default=platform.node(),
|
|
help="Machine name for LNT submission")
|
|
parser.add_argument("--lnt-run-info", action='append', default=[],
|
|
type=lambda kv: kv.split("="),
|
|
help="Extra key=value pairs for LNT run-info")
|
|
parser.add_argument("--lnt-machine-info", action='append', default=[],
|
|
type=lambda kv: kv.split("="),
|
|
help="Extra key=value pairs for LNT machine-info")
|
|
parser.add_argument("--lnt-order", type=str,
|
|
default=str(int(time.time())),
|
|
help="Order for LNT submission")
|
|
parser.add_argument("--lnt-tag", type=str, default="swift-compile",
|
|
help="Tag for LNT submission")
|
|
parser.add_argument("--lnt-submit", type=str, default=None,
|
|
help="URL to submit LNT data to (rather than print)")
|
|
parser.add_argument("--select-module",
|
|
default=[],
|
|
action="append",
|
|
help="Select specific modules")
|
|
parser.add_argument("--group-by-module",
|
|
default=False,
|
|
action="store_true",
|
|
help="Group stats by module")
|
|
parser.add_argument("--select-stat",
|
|
default=[],
|
|
action="append",
|
|
help="Select specific statistics")
|
|
parser.add_argument("--select-stats-from-csv-baseline",
|
|
type=argparse.FileType('rb', 0), default=None,
|
|
help="Select statistics present in a CSV baseline")
|
|
parser.add_argument("--exclude-timers",
|
|
default=False,
|
|
action="store_true",
|
|
help="only select counters, exclude timers")
|
|
parser.add_argument("--sort-by-delta-pct",
|
|
default=False,
|
|
action="store_true",
|
|
help="Sort comparison results by delta-%%, not stat")
|
|
parser.add_argument("--sort-descending",
|
|
default=False,
|
|
action="store_true",
|
|
help="Sort comparison results in descending order")
|
|
parser.add_argument("--merge-by",
|
|
default="sum",
|
|
type=str,
|
|
help="Merge identical metrics by (sum|min|max)")
|
|
parser.add_argument("--markdown",
|
|
default=False,
|
|
action="store_true",
|
|
help="Write output in markdown table format")
|
|
parser.add_argument("--github-emoji",
|
|
default=False,
|
|
action="store_true",
|
|
help="Add github-emoji indicators to markdown")
|
|
modes = parser.add_mutually_exclusive_group(required=True)
|
|
modes.add_argument("--catapult", action="store_true",
|
|
help="emit a 'catapult'-compatible trace of events")
|
|
modes.add_argument("--incrementality", action="store_true",
|
|
help="summarize the 'incrementality' of a build")
|
|
modes.add_argument("--set-csv-baseline", type=str, default=None,
|
|
help="Merge stats from a stats-dir into a CSV baseline")
|
|
modes.add_argument("--compare-to-csv-baseline",
|
|
type=argparse.FileType('rb', 0), default=None,
|
|
metavar="BASELINE.csv",
|
|
help="Compare stats dir to named CSV baseline")
|
|
modes.add_argument("--compare-stats-dirs",
|
|
action="store_true",
|
|
help="Compare two stats dirs directly")
|
|
modes.add_argument("--lnt", action="store_true",
|
|
help="Emit an LNT-compatible test summary")
|
|
parser.add_argument('remainder', nargs=argparse.REMAINDER,
|
|
help="stats-dirs to process")
|
|
|
|
args = parser.parse_args()
|
|
if len(args.remainder) == 0:
|
|
parser.print_help()
|
|
return 1
|
|
if args.catapult:
|
|
write_catapult_trace(args)
|
|
elif args.compare_stats_dirs:
|
|
return compare_stats_dirs(args)
|
|
elif args.set_csv_baseline is not None:
|
|
return set_csv_baseline(args)
|
|
elif args.compare_to_csv_baseline is not None:
|
|
return compare_to_csv_baseline(args)
|
|
elif args.incrementality:
|
|
if args.paired:
|
|
show_paired_incrementality(args)
|
|
else:
|
|
show_incrementality(args)
|
|
elif args.lnt:
|
|
write_lnt_values(args)
|
|
return None
|
|
|
|
|
|
sys.exit(main())
|