from __future__ import division import re, json ##### Parsing utilities ##### def split_delimited(delimiters, split_by, text): """ Generator that walks the ``text`` and splits it into an array on ``split_by``, being careful not to break inside a delimiter pair. ``delimiters`` should be an even-length string with each pair of matching delimiters listed together, open first. >>> list(split_delimited('{}[]', ',', '')) [''] >>> list(split_delimited('', ',', 'foo,bar')) ['foo', 'bar'] >>> list(split_delimited('[]', ',', 'foo,[bar, baz]')) ['foo', '[bar, baz]'] >>> list(split_delimited('{}', ' ', '{Type Name} name Desc')) ['{Type Name}', 'name', 'Desc'] >>> list(split_delimited('[]{}', ',', '[{foo,[bar, baz]}]')) ['[{foo,[bar, baz]}]'] Two adjacent delimiters result in a zero-length string between them: >>> list(split_delimited('{}', ' ', '{Type Name} Desc')) ['{Type Name}', '', 'Desc'] ``split_by`` may be a predicate function instead of a string, in which case it should return true on a character to split. >>> list(split_delimited('', lambda c: c in '[]{}, ', '[{foo,[bar, baz]}]')) ['', '', 'foo', '', 'bar', '', 'baz', '', '', ''] """ delims = [0] * (len(delimiters) // 2) actions = {} for i in range(0, len(delimiters), 2): actions[delimiters[i]] = (i // 2, 1) actions[delimiters[i + 1]] = (i // 2, -1) if isinstance(split_by, str): def split_fn(c): return c == split_by else: split_fn = split_by last = 0 for i in range(len(text)): c = text[i] if split_fn(c) and not any(delims): yield text[last:i] last = i + 1 try: which, dir = actions[c] delims[which] = delims[which] + dir except KeyError: pass # Normal character yield text[last:] def strip_stars(doc_comment): r""" Strip leading stars from a doc comment. >>> strip_stars('/** This is a comment. */') 'This is a comment.' >>> strip_stars('/**\n * This is a\n * multiline comment. */') 'This is a\n multiline comment.' >>> strip_stars('/** \n\t * This is a\n\t * multiline comment. \n*/') 'This is a\n multiline comment.' """ return re.sub('\n\s*?\*\s*?', '\n', doc_comment[3:-2]).strip() def split_tag(section): """ Split the JSDoc tag text (everything following the @) at the first whitespace. Returns a tuple of (tagname, body). """ splitval = re.split('\s+', section, 1) tag, body = len(splitval) > 1 and splitval or (splitval[0], '') return tag.strip(), body.strip() FUNCTION_REGEXPS = [ 'function (\w+)', '(\w+):\sfunction', '\.(\w+)\s*=\s*function', ] def guess_function_name(next_line, regexps=FUNCTION_REGEXPS): """ Attempt to determine the function name from the first code line following the comment. The patterns recognized are described by `regexps`, which defaults to FUNCTION_REGEXPS. If a match is successful, returns the function name. Otherwise, returns None. """ for regexp in regexps: match = re.search(regexp, next_line) if match: return match.group(1) return None def guess_parameters(next_line): """ Attempt to guess parameters based on the presence of a parenthesized group of identifiers. If successful, returns a list of parameter names; otherwise, returns None. """ match = re.search('\(([\w\s,]+)\)', next_line) if match: return [arg.strip() for arg in match.group(1).split(',')] else: return None def parse_comment(doc_comment, next_line): r""" Split the raw comment text into a dictionary of tags. The main comment body is included as 'doc'. >>> comment = get_doc_comments(read_file('examples/module.js'))[4][0] >>> parse_comment(strip_stars(comment), '')['doc'] 'This is the documentation for the fourth function.\n\n Since the function being documented is itself generated from another\n function, its name needs to be specified explicitly. using the @function tag' >>> parse_comment(strip_stars(comment), '')['function'] 'not_auto_discovered' If there are multiple tags with the same name, they're included as a list: >>> parse_comment(strip_stars(comment), '')['param'] ['{String} arg1 The first argument.', '{Int} arg2 The second argument.'] """ sections = re.split('\n\s*@', doc_comment) tags = { 'doc': sections[0].strip(), 'guessed_function': guess_function_name(next_line), 'guessed_params': guess_parameters(next_line) } for section in sections[1:]: tag, body = split_tag(section) if tag in tags: existing = tags[tag] try: existing.append(body) except AttributeError: tags[tag] = [existing, body] else: tags[tag] = body return tags #### Classes ##### class CommentDoc(object): """ Base class for all classes that represent a parsed comment of some sort. """ def __init__(self, parsed_comment): self.parsed = parsed_comment def __str__(self): return "Docs for " + self.name def __repr__(self): return str(self) def __contains__(self, tag_name): return tag_name in self.parsed def __getitem__(self, tag_name): return self.get(tag_name) def get(self, tag_name, default=''): """ Return the value of a particular tag, or None if that tag doesn't exist. Use 'doc' for the comment body itself. """ return self.parsed.get(tag_name, default) def get_as_list(self, tag_name): """ Return the value of a tag, making sure that it's a list. Absent tags are returned as an empty-list; single tags are returned as a one-element list. The returned list is a copy, and modifications do not affect the original object. """ val = self.get(tag_name, []) if isinstance(val, list): return val[:] else: return [val] @property def doc(self): """ Return the comment body. """ return self.get('doc') @property def url(self): """ Return a URL for the comment, within the page. """ return '#' + self.name @property def see(self): """ Return a list of all @see tags on the comment. """ return self.get_as_list('see') def to_json(self): """ Return a JSON representation of the CommentDoc. Keys are as per to_dict. """ return json.dumps(self.to_dict()) def to_dict(self): """ Return a dictionary representation of the CommentDoc. The keys of this correspond to the tags in the comment, with the comment body in `doc`. """ return self.parsed.copy() class ParamDoc(object): """ Represents a parameter, option, or parameter-like object, basically anything that has a name, a type, and a description, separated by spaces. This is also used for return types and exceptions, which use an empty string for the name. >>> param = ParamDoc('{Array} elems The elements to act upon') >>> param.name 'elems' >>> param.doc 'The elements to act upon' >>> param.type 'Array' You can also omit the type: if the first element is not surrounded by curly braces, it's assumed to be the name instead: >>> param2 = ParamDoc('param1 The first param') >>> param2.type '' >>> param2.name 'param1' >>> param2.doc 'The first param' """ def __init__(self, text): parsed = list(split_delimited('{}', ' ', text)) if parsed[0].startswith('{') and parsed[0].endswith('}'): self.type = parsed[0][1:-1] self.name = parsed[1] self.doc = ' '.join(parsed[2:]) else: self.type = '' self.name = parsed[0] self.doc = ' '.join(parsed[1:]) def to_dict(self): """ Convert this to a dict. Keys (all strings) are: - **name**: Parameter name - **type**: Parameter type - **doc**: Parameter description """ return { 'name': self.name, 'type': self.type, 'doc': self.doc } def to_html(self, css_class=''): """ Returns the parameter as a dt/dd pair. """ if self.name and self.type: header_text = '%s (%s)' % (self.name, self.type) elif self.type: header_text = self.type else: header_text = self.name return '
%s
%s
' % (header_text, self.doc) ##### DEPENDENCIES ##### class CyclicDependency(Exception): """ Exception raised if there is a cyclic dependency. """ def __init__(self, remaining_dependencies): self.values = remaining_dependencies def __str__(self): return ('The following dependencies result in a cycle: ' + ', '.join(self.values)) class MissingDependency(Exception): """ Exception raised if a file references a dependency that doesn't exist. """ def __init__(self, file, dependency): self.file = file self.dependency = dependency def __str__(self): return "Couldn't find dependency %s when processing %s" % \ (self.dependency, self.file) def build_dependency_graph(start_nodes, js_doc): """ Build a graph where nodes are filenames and edges are reverse dependencies (so an edge from jquery.js to jquery.dimensions.js indicates that jquery.js must be included before jquery.dimensions.js). The graph is represented as a dictionary from filename to (in-degree, edges) pair, for ease of topological sorting. Also returns a list of nodes of degree zero. """ queue = [] dependencies = {} start_sort = [] def add_vertex(file): in_degree = len(js_doc[file].module.dependencies) dependencies[file] = [in_degree, []] queue.append(file) if in_degree == 0: start_sort.append(file) def add_edge(from_file, to_file): dependencies[from_file][1].append(to_file) def is_in_graph(file): return file in dependencies for file in start_nodes: add_vertex(file) for file in queue: for dependency in js_doc[file].module.dependencies: if dependency not in js_doc: raise MissingDependency(file, dependency) if not is_in_graph(dependency): add_vertex(dependency) add_edge(dependency, file) return dependencies, start_sort def topological_sort(dependencies, start_nodes): """ Perform a topological sort on the dependency graph `dependencies`, starting from list `start_nodes`. """ retval = [] def edges(node): return dependencies[node][1] def in_degree(node): return dependencies[node][0] def remove_incoming(node): dependencies[node][0] = in_degree(node) - 1 while start_nodes: node = start_nodes.pop() retval.append(node) for child in edges(node): remove_incoming(child) if not in_degree(child): start_nodes.append(child) leftover_nodes = [node for node in dependencies.keys() if in_degree(node) > 0] if leftover_nodes: raise CyclicDependency(leftover_nodes) else: return retval def find_dependencies(start_nodes, js_doc): """ Sort the dependency graph, taking in a list of starting module names and a CodeBaseDoc (or equivalent dictionary). Returns an ordered list of transitive dependencies such that no module appears before its dependencies. """ return topological_sort(*build_dependency_graph(start_nodes, js_doc))