# ===--- protocol_graph.py ----------------------------*- coding: utf-8 -*-===// # # This source file is part of the Swift.org open source project # # Copyright (c) 2014 - 2016 Apple Inc. and the Swift project authors # Licensed under Apache License v2.0 with Runtime Library Exception # # See http://swift.org/LICENSE.txt for license information # See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors # # ===----------------------------------------------------------------------===// # # Create a graph of the protocol refinement relationships, associated # types, operator requirements, and defaulted generic operators. # # run as follows to view the Nth-largest connected component in a web browser: # # N=0 && rm -f /tmp/protocols.dot && \ # python protocol_graph.py stdlib.swift > /tmp/p0.dot && \ # (ccomps -zX#$N -o /tmp/protocols.dot /tmp/p0.dot || true) \ # && dot -Tsvg /tmp/protocols.dot > /tmp/protocols.svg \ # && open /tmp/protocols.svg # # ===----------------------------------------------------------------------===// from __future__ import print_function import cgi import os import re import sys # Open 'stdlib.swift' in this directory if no path specified. args = list(sys.argv) + [os.path.join(os.path.dirname(__file__), 'stdlib.swift')] re_flags = re.MULTILINE | re.VERBOSE # Pattern to recognize stdlib identifiers (FIXME: doesn't handle Unicode). identifier = '[A-Za-z_][A-Za-z0-9_]*' # Pattern to recognize a (possibly-generic) operator decl. operator = r''' (?:(?:prefix|postfix).*)? func \s* (?=\S)[^A-Za-z_] # non-space, non-identifier: begins an operator name (?:(?=\S)[^(){])* # rest of operator name \s* (<[^>{]+>)? # generic parameter list \s* \([^)]*\) # function parameter list ''' # substitute local variables into the string def interpolate(string): import inspect frame = inspect.currentframe() return string % frame.f_back.f_locals # Given the body_text of a protocol definition, return a list of # associated type and operator requirements. def body_lines(body_text): return [ cgi.escape(b.group(0)) for b in re.finditer( r'(typealias\s*' + identifier + r'(\s*[:,]\s*' + identifier + ')?|' + operator + '.*)', body_text, re_flags) ] # Mapping from protocol to associated type / operator requirements body = {} # Mapping from a parent protocol to set of children. graph = {} # Mapping from protocol to generic operators taking instances as arguments generic_operators = {} comments = r'//.* | /[*] (.|\n)*? [*]/' # FIXME: doesn't respect strings or comment nesting) # read source, stripping all comments with open(args[1]) as src: source_sans_comments = re.sub(comments, '', src.read(), flags=re_flags) generic_parameter_constraint = interpolate(r' (%(identifier)s) \s* : \s* (%(identifier)s) ') def parse_generic_operator(m): generic_params = m.group(5) generic_operator = cgi.escape(m.group(0).strip()) function_param_start = m.end(5) - m.start(0) function_params = generic_operator[function_param_start:] for m2 in re.finditer(generic_parameter_constraint, generic_params, re_flags): type_parameter = m2.group(1) protocol = m2.group(2) # we're only interested if we can find a function parameter of that type if not re.search(r':\s*%s\s*[,)]' % type_parameter, function_params): continue # Make some replacements in the signature to limit the graph size letter_tau = 'τ' letter_pi = 'π' abbreviated_signature = re.sub( r'\b%s\b' % protocol, letter_pi, re.sub(r'\b%s\b' % type_parameter, letter_tau, generic_operator)) generic_operators.setdefault(protocol, set()).add(abbreviated_signature) def parse_protocol(m): child = m.group(1) # skip irrelevant protocols if re.match(r'_Builtin.*Convertible', child): return graph.setdefault(child, set()) body[child] = body_lines(m.group(3)) if m.group(2): for parent in m.group(2).strip().split(","): if re.match(r'_Builtin.*Convertible', parent): return graph.setdefault(parent.strip(), set()).add(child) protocols_and_operators = interpolate(r''' \bprotocol \s+ (%(identifier)s) \s* (?::\s*([^{]+))? # refinements {([^{}\n]*(.*\n)*?)} # body | %(operator)s [^{]*(?={) # operator definition up to the open brace ''') # Main parsing loop for m in re.finditer(protocols_and_operators, source_sans_comments, re_flags): if m.group(1): parse_protocol(m) elif m.group(5): parse_generic_operator(m) # otherwise we matched some non-generic operator # Find clusters of protocols that have the same name when underscores # are stripped # map from potential cluster name to nodes in the cluster cluster_builder = {} for n in graph: cluster_builder.setdefault(n.translate(None, '_'), set()).add(n) # Grab the clusters with more than one member. clusters = dict((c, nodes) for (c, nodes) in cluster_builder.items() if len(nodes) > 1) # A set of all intra-cluster edges cluster_edges = set( (s, t) for (c, elements) in clusters.items() for s in elements for t in graph[s] if t in elements) print('digraph ProtocolHierarchies {') # ; packmode="array1" print(' mclimit = 100; ranksep=1.5; ') print(' edge [dir="back"];') print(' node [shape = box, fontname = Helvetica, fontsize = 10];') for c in sorted(clusters): print(' subgraph "cluster_%s" {' % c) for (s, t) in sorted(cluster_edges): if s in clusters[c]: print('%s -> %s [weight=100];' % (s, t)) print('}') for node in sorted(graph.keys()): requirements = body.get(node, []) generics = sorted(generic_operators.get(node, set())) style = 'solid' if node.startswith('_') else 'bold' divider = '
| \n%s\n |