#!/usr/bin/env python from __future__ import print_function import re import subprocess import sys def run(): if len(sys.argv) > 1: print(""" ns-html2rst - Convert Cocoa HTML documentation into ReST usage: nshtml2rst < NSString.html > NSString.rst """) sys.exit(0) html = sys.stdin.read() # Treat
\1', html, flags=re.MULTILINE | re.DOTALL) # Strip all attributes from
...containing class="..." # The resulting classes confound ReST html = re.sub( r'
]*class=[^>]*>(.*?)', r'
\1', html, flags=re.MULTILINE | re.DOTALL) # Remove links from
..., which doesn't have a rendering in
# ReST
html = re.sub(
r'(.*?)]*?>(.*?)(.*?)',
r'\1\2\3',
html, flags=re.MULTILINE | re.DOTALL)
# Let pandoc do most of the hard work
p = subprocess.Popen(
args=['pandoc', '--reference-links', '-f', 'html', '-t', 'rst'],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE
)
rst, stderr = p.communicate(html)
# HACKETY HACK HACK: Our html documents apparently contain some
# bogus heading level nesting. Just fix up the one we know about
# so that ReST doesn't complain later.
rst = re.sub("(^|\n)('+)($|\n)",
lambda m: m.group(1) + len(m.group(2)) * '^' + m.group(3),
rst, flags=re.MULTILINE)
sys.stdout.write(rst)
if __name__ == '__main__':
run()