219 lines
6.2 KiB
Python
219 lines
6.2 KiB
Python
|
#!/usr/bin/env python
|
||
|
##
|
||
|
## Name: mkdoc.py
|
||
|
## Purpose: Extract documentation from header files.
|
||
|
##
|
||
|
## Copyright (C) 2018 Michael J. Fromberger. All Rights Reserved.
|
||
|
##
|
||
|
## Usage: mkdoc.py <template> <output>
|
||
|
##
|
||
|
from __future__ import print_function
|
||
|
|
||
|
import collections, re, sys
|
||
|
|
||
|
# A regular expression to match commented declarations.
|
||
|
# This is specific to C and not very general; it should work fine for the imath
|
||
|
# headers but will not adapt well to arbitrary code or to C++.
|
||
|
doc = re.compile(
|
||
|
r"""(?mx)/\*\* # open /**
|
||
|
(?P<text>(?:[^*]|\*[^/])*) # text Does a thing
|
||
|
\*/\n # close */
|
||
|
(?P<decl>[^;{]*(?:;$|\{))"""
|
||
|
) # decl void f(x);
|
||
|
|
||
|
# A regular expression matching up to 4 spaces at the head of a line.
|
||
|
spc = re.compile(r"(?m)^ {1,4}")
|
||
|
|
||
|
# A regular expression matching an insertion point. An insertion point has the
|
||
|
# form {{include "header" name ...}}. If no names are given, all the names in
|
||
|
# the given header are inserted.
|
||
|
ins = re.compile(r'{{insert "(?P<file>[^"]*)"(?P<names>(?:\s+\w+)+)?\s*}}')
|
||
|
|
||
|
# A regular expression matching non-identifier characters, for splitting.
|
||
|
nid = re.compile(r"\W+")
|
||
|
|
||
|
# A cache of already-parsed files, maps filename to declarations.
|
||
|
CACHE = {}
|
||
|
|
||
|
|
||
|
def last_word(s):
|
||
|
"""Returns the last identifier-shaped word in s."""
|
||
|
return nid.split(s.strip())[-1]
|
||
|
|
||
|
|
||
|
def typeset(text):
|
||
|
"""Renders text with verbatim sections into markdown."""
|
||
|
lines = []
|
||
|
fence = False
|
||
|
for line in text.split("\n"):
|
||
|
if fence != line.startswith(" "):
|
||
|
lines.append("```")
|
||
|
fence = not fence
|
||
|
lines.append(line)
|
||
|
if fence:
|
||
|
lines.append("```")
|
||
|
for i, line in enumerate(lines):
|
||
|
if i == 0:
|
||
|
lines[i] = " - " + line
|
||
|
elif line:
|
||
|
lines[i] = " " + line
|
||
|
return "\n".join(lines)
|
||
|
|
||
|
|
||
|
class LIndex(object):
|
||
|
"""Represents a line offset index for text."""
|
||
|
|
||
|
def __init__(self, text):
|
||
|
pos = 0
|
||
|
|
||
|
# An array of ending offsets for each line, with a sentinel at position
|
||
|
# 0 to make the index arithmetic easier.
|
||
|
idx = [0]
|
||
|
|
||
|
# Scan forward for newlines or EOF, and push the offsets of the line
|
||
|
# breaks onto the list so we can binary search them later.
|
||
|
while pos < len(text):
|
||
|
next = text.find("\n", pos)
|
||
|
if next < 0:
|
||
|
break
|
||
|
idx.append(next)
|
||
|
pos = next + 1
|
||
|
if idx[-1] < len(text):
|
||
|
idx.append(len(text))
|
||
|
self._len = len(text)
|
||
|
self._index = idx
|
||
|
|
||
|
def linecol(self, pos):
|
||
|
"""Returns the (line, col) corresponding to pos.
|
||
|
|
||
|
Line numbers are 1-based, columns are 0-based.
|
||
|
"""
|
||
|
if pos < 0 or pos > self._len:
|
||
|
raise IndexError("position %d out of range" % pos)
|
||
|
|
||
|
# Binary search for the largest line number whose end marker is at or
|
||
|
# after pos and whose previous line's end is before pos.
|
||
|
idx = self._index
|
||
|
i, j = 1, len(idx)
|
||
|
while i < j:
|
||
|
m = (i + j) / 2
|
||
|
if idx[m] < pos:
|
||
|
i = m + 1
|
||
|
elif idx[m - 1] < pos:
|
||
|
return m, pos - idx[m - 1]
|
||
|
else:
|
||
|
j = m
|
||
|
|
||
|
# This happens if (and only if) the whole file is one line.
|
||
|
return 1, pos
|
||
|
|
||
|
|
||
|
class Decl(object):
|
||
|
"""Represents a single documented declaration."""
|
||
|
|
||
|
def __init__(self, com, decl, line=None):
|
||
|
"""Initialize a new documented declaration.
|
||
|
|
||
|
Params:
|
||
|
com: the raw text of the comment
|
||
|
decl: the raw text of the declaration
|
||
|
line: the line number of the declaration
|
||
|
"""
|
||
|
lp = decl.find("(")
|
||
|
if lp < 0:
|
||
|
self.name = last_word(decl.rstrip(";"))
|
||
|
else:
|
||
|
self.name = last_word(decl[:lp])
|
||
|
self.decl = " ".join(decl.rstrip(";{").strip().split())
|
||
|
self.comment = spc.sub("", com.rstrip())
|
||
|
self.line = line
|
||
|
|
||
|
def __repr__(self):
|
||
|
return '#Decl["%s"]' % self.decl
|
||
|
|
||
|
def markdown(self, path):
|
||
|
pos = self.decl.index(self.name)
|
||
|
decl = '%s<a href="%s#L%d">%s</a>%s' % (
|
||
|
self.decl[:pos],
|
||
|
path,
|
||
|
self.line,
|
||
|
self.name,
|
||
|
self.decl[pos + len(self.name) :],
|
||
|
)
|
||
|
return """------------
|
||
|
<a id="{name}"></a><pre>
|
||
|
{decl};
|
||
|
</pre>
|
||
|
{comment}
|
||
|
""".format(
|
||
|
name=self.name, decl=decl, comment=typeset(self.comment)
|
||
|
)
|
||
|
|
||
|
|
||
|
def parse_decls(text):
|
||
|
"""Parse a dictionary of declarations from text."""
|
||
|
decls = collections.OrderedDict()
|
||
|
idx = LIndex(text)
|
||
|
for m in doc.finditer(text):
|
||
|
line, _ = idx.linecol(m.span("decl")[0])
|
||
|
d = Decl(m.group("text"), m.group("decl"), line)
|
||
|
decls[d.name] = d
|
||
|
return decls
|
||
|
|
||
|
|
||
|
def load_file(path):
|
||
|
"""Load declarations from path, or use cached results."""
|
||
|
if path not in CACHE:
|
||
|
with file(path, "rU") as fp:
|
||
|
CACHE[path] = parse_decls(fp.read())
|
||
|
return CACHE[path]
|
||
|
|
||
|
|
||
|
def main(args):
|
||
|
if len(args) != 2:
|
||
|
print("Usage: mkdoc.py <input> <output>", file=sys.stderr)
|
||
|
sys.exit(1)
|
||
|
|
||
|
doc_template = args[0]
|
||
|
doc_markdown = args[1]
|
||
|
|
||
|
with file(doc_template, "rU") as input:
|
||
|
template = input.read()
|
||
|
|
||
|
with file(doc_markdown, "wt") as output:
|
||
|
print(
|
||
|
"""<!--
|
||
|
This file was generated from "{0}" by mkdoc.py
|
||
|
DO NOT EDIT
|
||
|
-->
|
||
|
""".format(
|
||
|
doc_template
|
||
|
),
|
||
|
file=output,
|
||
|
)
|
||
|
|
||
|
pos = 0 # last position of input copied
|
||
|
|
||
|
# Look for substitution markers in the template, and replace them with
|
||
|
# their content.
|
||
|
for ip in ins.finditer(template):
|
||
|
output.write(template[pos : ip.start()])
|
||
|
pos = ip.end()
|
||
|
|
||
|
decls = load_file(ip.group("file"))
|
||
|
if ip.group("names"): # pick the selected names, in order
|
||
|
decls = collections.OrderedDict(
|
||
|
(key, decls[key]) for key in ip.group("names").strip().split()
|
||
|
)
|
||
|
|
||
|
# Render the selected declarations.
|
||
|
for decl in decls.values():
|
||
|
print(decl.markdown(ip.group("file")), file=output)
|
||
|
|
||
|
# Clean up any remaining template bits
|
||
|
output.write(template[pos:])
|
||
|
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
main(sys.argv[1:])
|