#!/usr/bin/env python3 # blockifyasm ----- Split disassembly into basic blocks ---------*- python -*- # # This source file is part of the Swift.org open source project # # Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors # Licensed under Apache License v2.0 with Runtime Library Exception # # See https://swift.org/LICENSE.txt for license information # See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors # # ---------------------------------------------------------------------------- # # Splits a disassembled function from lldb into basic blocks. # # Useful to show the control flow graph of a disassembled function. # The control flow graph can the be viewed with the viewcfg utility: # # (lldb) disassemble # # $ blockifyasm < file.s | viewcfg # # ---------------------------------------------------------------------------- import re import sys from collections import defaultdict def help(): print( """\ Usage: blockifyasm [-] < file -: only match significant digits of relative branch addresses """ ) def main(): addr_len = 16 if len(sys.argv) >= 2: m = re.match("^-([0-9]+)$", sys.argv[1]) if m: addr_len = int(m.group(1)) else: help() return lines = [] block_starts = {} branch_re1 = re.compile(r"^\s[-\s>]*0x.*:\s.* 0x([0-9a-f]+)\s*;\s*<[+-]") branch_re2 = re.compile(r"^\s[-\s>]*0x.*:\s+tb.* 0x([0-9a-f]+)\s*(;.*)?") inst_re = re.compile(r"^\s[-\s>]*0x([0-9a-f]+)[\s<>0-9+-]*:\s+([a-z0-9.]+)\s") non_fall_through_insts = ["b", "ret", "brk", "jmp", "retq", "ud2"] def get_branch_addr(line): bm = branch_re1.match(line) if bm: return bm.group(1)[-addr_len:] bm = branch_re2.match(line) if bm: return bm.group(1)[-addr_len:] return None def print_function(): if not lines: return predecessors = defaultdict(list) block_num = -1 next_is_block = True prev_is_fallthrough = False # Collect predecessors for all blocks for line in lines: m = inst_re.match(line) assert m, "non instruction line in function" addr = m.group(1)[-addr_len:] inst = m.group(2) if next_is_block or addr in block_starts: if prev_is_fallthrough: predecessors[addr].append(block_num) block_num += 1 block_starts[addr] = block_num next_is_block = False prev_is_fallthrough = True br_addr = get_branch_addr(line) if br_addr: next_is_block = True predecessors[br_addr].append(block_num) prev_is_fallthrough = inst not in non_fall_through_insts # Print the function with basic block labels print("{") for line in lines: m = inst_re.match(line) if m: addr = m.group(1)[-addr_len:] if addr in block_starts: blockstr = "bb" + str(block_starts[addr]) + ":" if predecessors[addr]: print( blockstr + " " * (55 - len(blockstr)) + "; preds = ", end="" ) print( ", ".join("bb" + str(pred) for pred in predecessors[addr]) ) else: print(blockstr) br_addr = get_branch_addr(line) if br_addr and block_starts[br_addr] >= 0: line = re.sub(r";\s<[+-].*", "; bb" + str(block_starts[br_addr]), line) print(line, end="") print("}") # Read disassembly code from stdin for line in sys.stdin: # let the line with the instruction pointer begin with a space line = re.sub("^-> ", " ->", line) if inst_re.match(line): lines.append(line) br_addr = get_branch_addr(line) if br_addr: if len(br_addr) < addr_len: addr_len = len(br_addr) block_starts[br_addr] = -1 else: print_function() lines = [] block_starts = {} print(line, end="") print_function() if __name__ == "__main__": main()