#!/usr/bin/env python3 # # ==-- jobstats - support for reading the contents of stats dirs --==# # # This source file is part of the Swift.org open source project # # Copyright (c) 2014-2017 Apple Inc. and the Swift project authors # Licensed under Apache License v2.0 with Runtime Library Exception # # See https://swift.org/LICENSE.txt for license information # See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors # # ==------------------------------------------------------------------------==# # # This file contains subroutines for loading object-representations of one or # more directories generated by `swiftc -stats-output-dir`. import datetime import itertools import json import os import platform import random import re class JobData(object): def __init__(self, jobkind, jobid, module, jobargs): self.jobkind = jobkind self.jobid = jobid self.module = module self.jobargs = jobargs (self.input, self.triple, self.out, self.opt) = jobargs[0:4] def is_driver_job(self): """Return true iff self measures a driver job""" return self.jobkind == 'driver' def is_frontend_job(self): """Return true iff self measures a frontend job""" return self.jobkind == 'frontend' class JobProfs(JobData): """Object denoting the profile of a single job run during a compilation, corresponding to a single directory of profiles produced by a single job process passed -stats-output-dir.""" def __init__(self, jobkind, jobid, module, jobargs, profiles): self.profiles = profiles super(JobProfs, self).__init__(jobkind, jobid, module, jobargs) class JobStats(JobData): """Object holding the stats of a single job run during a compilation, corresponding to a single JSON file produced by a single job process passed -stats-output-dir.""" def __init__(self, jobkind, jobid, module, start_usec, dur_usec, jobargs, stats): self.start_usec = start_usec self.dur_usec = dur_usec self.stats = stats super(JobStats, self).__init__(jobkind, jobid, module, jobargs) def driver_jobs_ran(self): """Return the count of a driver job's ran sub-jobs""" assert self.is_driver_job() return self.stats.get("Driver.NumDriverJobsRun", 0) def driver_jobs_skipped(self): """Return the count of a driver job's skipped sub-jobs""" assert self.is_driver_job() return self.stats.get("Driver.NumDriverJobsSkipped", 0) def driver_jobs_total(self): """Return the total count of a driver job's ran + skipped sub-jobs""" assert self.is_driver_job() return self.driver_jobs_ran() + self.driver_jobs_skipped() def merged_with(self, other, merge_by="sum"): """Return a new JobStats, holding the merger of self and other""" merged_stats = {} ops = {"sum": lambda a, b: a + b, # Because 0 is also a sentinel on counters we do a modified # "nonzero-min" here. Not ideal but best we can do. "min": lambda a, b: (min(a, b) if a != 0 and b != 0 else max(a, b)), "max": lambda a, b: max(a, b)} op = ops[merge_by] for k, v in list(self.stats.items()) + list(other.stats.items()): if k in merged_stats: merged_stats[k] = op(v, merged_stats[k]) else: merged_stats[k] = v merged_kind = self.jobkind if other.jobkind != merged_kind: merged_kind = "" merged_module = self.module if other.module != merged_module: merged_module = "" merged_start = min(self.start_usec, other.start_usec) merged_end = max(self.start_usec + self.dur_usec, other.start_usec + other.dur_usec) merged_dur = merged_end - merged_start return JobStats(merged_kind, random.randint(0, 1000000000), merged_module, merged_start, merged_dur, self.jobargs + other.jobargs, merged_stats) def prefixed_by(self, prefix): prefixed_stats = dict([((prefix + "." + k), v) for (k, v) in self.stats.items()]) return JobStats(self.jobkind, random.randint(0, 1000000000), self.module, self.start_usec, self.dur_usec, self.jobargs, prefixed_stats) def divided_by(self, n): divided_stats = dict([(k, v / n) for (k, v) in self.stats.items()]) return JobStats(self.jobkind, random.randint(0, 1000000000), self.module, self.start_usec, self.dur_usec, self.jobargs, divided_stats) def incrementality_percentage(self): """Assuming the job is a driver job, return the amount of jobs that actually ran, as a percentage of the total number.""" assert self.is_driver_job() ran = self.driver_jobs_ran() total = self.driver_jobs_total() return round((float(ran) / float(total)) * 100.0, 2) def to_catapult_trace_obj(self): """Return a JSON-formattable object fitting chrome's 'catapult' trace format""" return {"name": self.module, "cat": self.jobkind, "ph": "X", # "X" == "complete event" "pid": self.jobid, "tid": 1, "ts": self.start_usec, "dur": self.dur_usec, "args": self.jobargs} def start_timestr(self): """Return a formatted timestamp of the job's start-time""" t = datetime.datetime.fromtimestamp(self.start_usec / 1000000.0) return t.strftime("%Y-%m-%d %H:%M:%S") def end_timestr(self): """Return a formatted timestamp of the job's end-time""" t = datetime.datetime.fromtimestamp((self.start_usec + self.dur_usec) / 1000000.0) return t.strftime("%Y-%m-%d %H:%M:%S") def pick_lnt_metric_suffix(self, metric_name): """Guess an appropriate LNT metric type for a given metric name""" if "BytesOutput" in metric_name: return "code_size" if "RSS" in metric_name or "BytesAllocated" in metric_name: return "mem" return "compile" def to_lnt_test_obj(self, args): """Return a JSON-formattable object fitting LNT's 'submit' format""" run_info = { "run_order": str(args.lnt_order), "tag": str(args.lnt_tag), } run_info.update(dict(args.lnt_run_info)) stats = self.stats return { "Machine": { "Name": args.lnt_machine, "Info": dict(args.lnt_machine_info) }, "Run": { "Start Time": self.start_timestr(), "End Time": self.end_timestr(), "Info": run_info }, "Tests": [ { "Data": [v], "Info": {}, "Name": "%s.%s.%s.%s" % (args.lnt_tag, self.module, k, self.pick_lnt_metric_suffix(k)) } for (k, v) in stats.items() ] } AUXPATSTR = (r"(?P[^-]+)-(?P[^-]+)-(?P[^-]+)" + r"-(?P[^-]*)-(?P[^-]+)") AUXPAT = re.compile(AUXPATSTR) TIMERPATSTR = (r"time\.swift-(?P\w+)\." + AUXPATSTR + r"\.(?P\w+)$") TIMERPAT = re.compile(TIMERPATSTR) FILEPATSTR = (r"^stats-(?P\d+)-swift-(?P\w+)-" + AUXPATSTR + r"-(?P\d+)(-.*)?.json$") FILEPAT = re.compile(FILEPATSTR) PROFILEPATSTR = (r"^profile-(?P\d+)-swift-(?P\w+)-" + AUXPATSTR + r"-(?P\d+)(-.*)?.dir$") PROFILEPAT = re.compile(PROFILEPATSTR) def match_auxpat(s): m = AUXPAT.match(s) if m is not None: return m.groupdict() else: return None def match_timerpat(s): m = TIMERPAT.match(s) if m is not None: return m.groupdict() else: return None def match_filepat(s): m = FILEPAT.match(s) if m is not None: return m.groupdict() else: return None def match_profilepat(s): m = PROFILEPAT.match(s) if m is not None: return m.groupdict() else: return None def find_profiles_in(profiledir, select_stat=[]): sre = re.compile('.*' if len(select_stat) == 0 else '|'.join(select_stat)) profiles = None for profile in os.listdir(profiledir): if profile.endswith(".svg"): continue if sre.search(profile) is None: continue fullpath = os.path.join(profiledir, profile) s = os.stat(fullpath) if s.st_size != 0: if profiles is None: profiles = dict() try: (counter, profiletype) = os.path.splitext(profile) # drop leading period from extension profiletype = profiletype[1:] if profiletype not in profiles: profiles[profiletype] = dict() profiles[profiletype][counter] = fullpath except Exception: pass return profiles def list_stats_dir_profiles(path, select_module=[], select_stat=[], **kwargs): """Finds all stats-profiles in path, returning list of JobProfs objects""" jobprofs = [] for root, dirs, files in os.walk(path): for d in dirs: mg = match_profilepat(d) if not mg: continue # NB: "pid" in fpat is a random number, not unix pid. jobkind = mg['kind'] jobid = int(mg['pid']) module = mg["module"] if len(select_module) != 0 and module not in select_module: continue jobargs = [mg["input"], mg["triple"], mg["out"], mg["opt"]] e = JobProfs(jobkind=jobkind, jobid=jobid, module=module, jobargs=jobargs, profiles=find_profiles_in(os.path.join(root, d), select_stat)) jobprofs.append(e) return jobprofs def load_stats_dir(path, select_module=[], select_stat=[], exclude_timers=False, merge_timers=False, **kwargs): """Loads all stats-files found in path into a list of JobStats objects""" jobstats = [] sre = re.compile('.*' if len(select_stat) == 0 else '|'.join(select_stat)) for root, dirs, files in os.walk(path): for f in files: mg = match_filepat(f) if not mg: continue # NB: "pid" in fpat is a random number, not unix pid. jobkind = mg['kind'] jobid = int(mg['pid']) start_usec = int(mg['start']) module = mg["module"] if len(select_module) != 0 and module not in select_module: continue jobargs = [mg["input"], mg["triple"], mg["out"], mg["opt"]] if platform.system() == 'Windows': p = str(u"\\\\?\\%s" % os.path.abspath(os.path.join(root, f))) else: p = os.path.join(root, f) with open(p) as fp: j = json.load(fp) dur_usec = 1 stats = dict() for (k, v) in j.items(): if sre.search(k) is None: continue if k.startswith('time.'): v = int(1000000.0 * float(v)) if k.startswith('time.') and exclude_timers: continue tm = match_timerpat(k) if tm: if tm['jobkind'] == jobkind and \ tm['timerkind'] == 'wall': dur_usec = v if merge_timers: k = "time.swift-%s.%s" % (tm['jobkind'], tm['timerkind']) stats[k] = v e = JobStats(jobkind=jobkind, jobid=jobid, module=module, start_usec=start_usec, dur_usec=dur_usec, jobargs=jobargs, stats=stats) jobstats.append(e) return jobstats def merge_all_jobstats(jobstats, select_module=[], group_by_module=False, merge_by="sum", divide_by=1, **kwargs): """Does a pairwise merge of the elements of list of jobs""" m = None if len(select_module) > 0: jobstats = filter(lambda j: j.module in select_module, jobstats) if group_by_module: def keyfunc(j): return j.module jobstats = list(jobstats) jobstats.sort(key=keyfunc) prefixed = [] for mod, group in itertools.groupby(jobstats, keyfunc): groupmerge = merge_all_jobstats(group, merge_by=merge_by, divide_by=divide_by) prefixed.append(groupmerge.prefixed_by(mod)) jobstats = prefixed for j in jobstats: if m is None: m = j else: m = m.merged_with(j, merge_by=merge_by) if m is None: return m return m.divided_by(divide_by)