409 lines
12 KiB
Python
Raw Normal View History

from __future__ import division
import re, json
##### Parsing utilities #####
def split_delimited(delimiters, split_by, text):
"""
Generator that walks the ``text`` and splits it into an array on
``split_by``, being careful not to break inside a delimiter pair.
``delimiters`` should be an even-length string with each pair of matching
delimiters listed together, open first.
>>> list(split_delimited('{}[]', ',', ''))
['']
>>> list(split_delimited('', ',', 'foo,bar'))
['foo', 'bar']
>>> list(split_delimited('[]', ',', 'foo,[bar, baz]'))
['foo', '[bar, baz]']
>>> list(split_delimited('{}', ' ', '{Type Name} name Desc'))
['{Type Name}', 'name', 'Desc']
>>> list(split_delimited('[]{}', ',', '[{foo,[bar, baz]}]'))
['[{foo,[bar, baz]}]']
Two adjacent delimiters result in a zero-length string between them:
>>> list(split_delimited('{}', ' ', '{Type Name} Desc'))
['{Type Name}', '', 'Desc']
``split_by`` may be a predicate function instead of a string, in which
case it should return true on a character to split.
>>> list(split_delimited('', lambda c: c in '[]{}, ', '[{foo,[bar, baz]}]'))
['', '', 'foo', '', 'bar', '', 'baz', '', '', '']
"""
delims = [0] * (len(delimiters) // 2)
actions = {}
for i in range(0, len(delimiters), 2):
actions[delimiters[i]] = (i // 2, 1)
actions[delimiters[i + 1]] = (i // 2, -1)
if isinstance(split_by, str):
def split_fn(c):
return c == split_by
else:
split_fn = split_by
last = 0
for i in range(len(text)):
c = text[i]
if split_fn(c) and not any(delims):
yield text[last:i]
last = i + 1
try:
which, dir = actions[c]
delims[which] = delims[which] + dir
except KeyError:
pass # Normal character
yield text[last:]
def strip_stars(doc_comment):
r"""
Strip leading stars from a doc comment.
>>> strip_stars('/** This is a comment. */')
'This is a comment.'
>>> strip_stars('/**\n * This is a\n * multiline comment. */')
'This is a\n multiline comment.'
>>> strip_stars('/** \n\t * This is a\n\t * multiline comment. \n*/')
'This is a\n multiline comment.'
"""
return re.sub('\n\s*?\*\s*?', '\n', doc_comment[3:-2]).strip()
def split_tag(section):
"""
Split the JSDoc tag text (everything following the @) at the first
whitespace. Returns a tuple of (tagname, body).
"""
splitval = re.split('\s+', section, 1)
tag, body = len(splitval) > 1 and splitval or (splitval[0], '')
return tag.strip(), body.strip()
FUNCTION_REGEXPS = [
'function (\w+)',
'(\w+):\sfunction',
'\.(\w+)\s*=\s*function',
]
def guess_function_name(next_line, regexps=FUNCTION_REGEXPS):
"""
Attempt to determine the function name from the first code line
following the comment. The patterns recognized are described by
`regexps`, which defaults to FUNCTION_REGEXPS. If a match is successful,
returns the function name. Otherwise, returns None.
"""
for regexp in regexps:
match = re.search(regexp, next_line)
if match:
return match.group(1)
return None
def guess_parameters(next_line):
"""
Attempt to guess parameters based on the presence of a parenthesized
group of identifiers. If successful, returns a list of parameter names;
otherwise, returns None.
"""
match = re.search('\(([\w\s,]+)\)', next_line)
if match:
return [arg.strip() for arg in match.group(1).split(',')]
else:
return None
def parse_comment(doc_comment, next_line):
r"""
Split the raw comment text into a dictionary of tags. The main comment
body is included as 'doc'.
>>> comment = get_doc_comments(read_file('examples/module.js'))[4][0]
>>> parse_comment(strip_stars(comment), '')['doc']
'This is the documentation for the fourth function.\n\n Since the function being documented is itself generated from another\n function, its name needs to be specified explicitly. using the @function tag'
>>> parse_comment(strip_stars(comment), '')['function']
'not_auto_discovered'
If there are multiple tags with the same name, they're included as a list:
>>> parse_comment(strip_stars(comment), '')['param']
['{String} arg1 The first argument.', '{Int} arg2 The second argument.']
"""
sections = re.split('\n\s*@', doc_comment)
tags = {
'doc': sections[0].strip(),
'guessed_function': guess_function_name(next_line),
'guessed_params': guess_parameters(next_line)
}
for section in sections[1:]:
tag, body = split_tag(section)
if tag in tags:
existing = tags[tag]
try:
existing.append(body)
except AttributeError:
tags[tag] = [existing, body]
else:
tags[tag] = body
return tags
#### Classes #####
class CommentDoc(object):
"""
Base class for all classes that represent a parsed comment of some sort.
"""
def __init__(self, parsed_comment):
self.parsed = parsed_comment
def __str__(self):
return "Docs for " + self.name
def __repr__(self):
return str(self)
def __contains__(self, tag_name):
return tag_name in self.parsed
def __getitem__(self, tag_name):
return self.get(tag_name)
def get(self, tag_name, default=''):
"""
Return the value of a particular tag, or None if that tag doesn't
exist. Use 'doc' for the comment body itself.
"""
return self.parsed.get(tag_name, default)
def get_as_list(self, tag_name):
"""
Return the value of a tag, making sure that it's a list. Absent
tags are returned as an empty-list; single tags are returned as a
one-element list.
The returned list is a copy, and modifications do not affect the
original object.
"""
val = self.get(tag_name, [])
if isinstance(val, list):
return val[:]
else:
return [val]
@property
def doc(self):
"""
Return the comment body.
"""
return self.get('doc')
@property
def url(self):
"""
Return a URL for the comment, within the page.
"""
return '#' + self.name
@property
def see(self):
"""
Return a list of all @see tags on the comment.
"""
return self.get_as_list('see')
def to_json(self):
"""
Return a JSON representation of the CommentDoc. Keys are as per
to_dict.
"""
return json.dumps(self.to_dict())
def to_dict(self):
"""
Return a dictionary representation of the CommentDoc. The keys of
this correspond to the tags in the comment, with the comment body in
`doc`.
"""
return self.parsed.copy()
class ParamDoc(object):
"""
Represents a parameter, option, or parameter-like object, basically
anything that has a name, a type, and a description, separated by spaces.
This is also used for return types and exceptions, which use an empty
string for the name.
>>> param = ParamDoc('{Array<DOM>} elems The elements to act upon')
>>> param.name
'elems'
>>> param.doc
'The elements to act upon'
>>> param.type
'Array<DOM>'
You can also omit the type: if the first element is not surrounded by
curly braces, it's assumed to be the name instead:
>>> param2 = ParamDoc('param1 The first param')
>>> param2.type
''
>>> param2.name
'param1'
>>> param2.doc
'The first param'
"""
def __init__(self, text):
parsed = list(split_delimited('{}', ' ', text))
if parsed[0].startswith('{') and parsed[0].endswith('}'):
self.type = parsed[0][1:-1]
self.name = parsed[1]
self.doc = ' '.join(parsed[2:])
else:
self.type = ''
self.name = parsed[0]
self.doc = ' '.join(parsed[1:])
def to_dict(self):
"""
Convert this to a dict. Keys (all strings) are:
- **name**: Parameter name
- **type**: Parameter type
- **doc**: Parameter description
"""
return {
'name': self.name,
'type': self.type,
'doc': self.doc
}
def to_html(self, css_class=''):
"""
Returns the parameter as a dt/dd pair.
"""
if self.name and self.type:
header_text = '%s (%s)' % (self.name, self.type)
elif self.type:
header_text = self.type
else:
header_text = self.name
return '<dt>%s</dt><dd>%s</dd>' % (header_text, self.doc)
##### DEPENDENCIES #####
class CyclicDependency(Exception):
"""
Exception raised if there is a cyclic dependency.
"""
def __init__(self, remaining_dependencies):
self.values = remaining_dependencies
def __str__(self):
return ('The following dependencies result in a cycle: '
+ ', '.join(self.values))
class MissingDependency(Exception):
"""
Exception raised if a file references a dependency that doesn't exist.
"""
def __init__(self, file, dependency):
self.file = file
self.dependency = dependency
def __str__(self):
return "Couldn't find dependency %s when processing %s" % \
(self.dependency, self.file)
def build_dependency_graph(start_nodes, js_doc):
"""
Build a graph where nodes are filenames and edges are reverse dependencies
(so an edge from jquery.js to jquery.dimensions.js indicates that jquery.js
must be included before jquery.dimensions.js). The graph is represented
as a dictionary from filename to (in-degree, edges) pair, for ease of
topological sorting. Also returns a list of nodes of degree zero.
"""
queue = []
dependencies = {}
start_sort = []
def add_vertex(file):
in_degree = len(js_doc[file].module.dependencies)
dependencies[file] = [in_degree, []]
queue.append(file)
if in_degree == 0:
start_sort.append(file)
def add_edge(from_file, to_file):
dependencies[from_file][1].append(to_file)
def is_in_graph(file):
return file in dependencies
for file in start_nodes:
add_vertex(file)
for file in queue:
for dependency in js_doc[file].module.dependencies:
if dependency not in js_doc:
raise MissingDependency(file, dependency)
if not is_in_graph(dependency):
add_vertex(dependency)
add_edge(dependency, file)
return dependencies, start_sort
def topological_sort(dependencies, start_nodes):
"""
Perform a topological sort on the dependency graph `dependencies`, starting
from list `start_nodes`.
"""
retval = []
def edges(node):
return dependencies[node][1]
def in_degree(node):
return dependencies[node][0]
def remove_incoming(node):
dependencies[node][0] = in_degree(node) - 1
while start_nodes:
node = start_nodes.pop()
retval.append(node)
for child in edges(node):
remove_incoming(child)
if not in_degree(child):
start_nodes.append(child)
leftover_nodes = [node for node in dependencies.keys()
if in_degree(node) > 0]
if leftover_nodes:
raise CyclicDependency(leftover_nodes)
else:
return retval
def find_dependencies(start_nodes, js_doc):
"""
Sort the dependency graph, taking in a list of starting module names and a
CodeBaseDoc (or equivalent dictionary). Returns an ordered list of
transitive dependencies such that no module appears before its
dependencies.
"""
return topological_sort(*build_dependency_graph(start_nodes, js_doc))