diff options
Diffstat (limited to 'websdk/mercurial/minirst.py')
-rw-r--r--[l---------] | websdk/mercurial/minirst.py | 688 |
1 files changed, 687 insertions, 1 deletions
diff --git a/websdk/mercurial/minirst.py b/websdk/mercurial/minirst.py index b09835a..f375def 120000..100644 --- a/websdk/mercurial/minirst.py +++ b/websdk/mercurial/minirst.py @@ -1 +1,687 @@ -/usr/share/pyshared/mercurial/minirst.py
\ No newline at end of file +# minirst.py - minimal reStructuredText parser +# +# Copyright 2009, 2010 Matt Mackall <mpm@selenic.com> and others +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. + +"""simplified reStructuredText parser. + +This parser knows just enough about reStructuredText to parse the +Mercurial docstrings. + +It cheats in a major way: nested blocks are not really nested. They +are just indented blocks that look like they are nested. This relies +on the user to keep the right indentation for the blocks. + +Remember to update http://mercurial.selenic.com/wiki/HelpStyleGuide +when adding support for new constructs. +""" + +import re +import util, encoding +from i18n import _ + +def replace(text, substs): + ''' + Apply a list of (find, replace) pairs to a text. + + >>> replace("foo bar", [('f', 'F'), ('b', 'B')]) + 'Foo Bar' + >>> encoding.encoding = 'latin1' + >>> replace('\\x81\\\\', [('\\\\', '/')]) + '\\x81/' + >>> encoding.encoding = 'shiftjis' + >>> replace('\\x81\\\\', [('\\\\', '/')]) + '\\x81\\\\' + ''' + + # some character encodings (cp932 for Japanese, at least) use + # ASCII characters other than control/alphabet/digit as a part of + # multi-bytes characters, so direct replacing with such characters + # on strings in local encoding causes invalid byte sequences. + utext = text.decode(encoding.encoding) + for f, t in substs: + utext = utext.replace(f, t) + return utext.encode(encoding.encoding) + +_blockre = re.compile(r"\n(?:\s*\n)+") + +def findblocks(text): + """Find continuous blocks of lines in text. + + Returns a list of dictionaries representing the blocks. Each block + has an 'indent' field and a 'lines' field. + """ + blocks = [] + for b in _blockre.split(text.lstrip('\n').rstrip()): + lines = b.splitlines() + if lines: + indent = min((len(l) - len(l.lstrip())) for l in lines) + lines = [l[indent:] for l in lines] + blocks.append(dict(indent=indent, lines=lines)) + return blocks + +def findliteralblocks(blocks): + """Finds literal blocks and adds a 'type' field to the blocks. + + Literal blocks are given the type 'literal', all other blocks are + given type the 'paragraph'. + """ + i = 0 + while i < len(blocks): + # Searching for a block that looks like this: + # + # +------------------------------+ + # | paragraph | + # | (ends with "::") | + # +------------------------------+ + # +---------------------------+ + # | indented literal block | + # +---------------------------+ + blocks[i]['type'] = 'paragraph' + if blocks[i]['lines'][-1].endswith('::') and i + 1 < len(blocks): + indent = blocks[i]['indent'] + adjustment = blocks[i + 1]['indent'] - indent + + if blocks[i]['lines'] == ['::']: + # Expanded form: remove block + del blocks[i] + i -= 1 + elif blocks[i]['lines'][-1].endswith(' ::'): + # Partially minimized form: remove space and both + # colons. + blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-3] + else: + # Fully minimized form: remove just one colon. + blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-1] + + # List items are formatted with a hanging indent. We must + # correct for this here while we still have the original + # information on the indentation of the subsequent literal + # blocks available. + m = _bulletre.match(blocks[i]['lines'][0]) + if m: + indent += m.end() + adjustment -= m.end() + + # Mark the following indented blocks. + while i + 1 < len(blocks) and blocks[i + 1]['indent'] > indent: + blocks[i + 1]['type'] = 'literal' + blocks[i + 1]['indent'] -= adjustment + i += 1 + i += 1 + return blocks + +_bulletre = re.compile(r'(-|[0-9A-Za-z]+\.|\(?[0-9A-Za-z]+\)|\|) ') +_optionre = re.compile(r'^(-([a-zA-Z0-9]), )?(--[a-z0-9-]+)' + r'((.*) +)(.*)$') +_fieldre = re.compile(r':(?![: ])([^:]*)(?<! ):[ ]+(.*)') +_definitionre = re.compile(r'[^ ]') +_tablere = re.compile(r'(=+\s+)*=+') + +def splitparagraphs(blocks): + """Split paragraphs into lists.""" + # Tuples with (list type, item regexp, single line items?). Order + # matters: definition lists has the least specific regexp and must + # come last. + listtypes = [('bullet', _bulletre, True), + ('option', _optionre, True), + ('field', _fieldre, True), + ('definition', _definitionre, False)] + + def match(lines, i, itemre, singleline): + """Does itemre match an item at line i? + + A list item can be followed by an idented line or another list + item (but only if singleline is True). + """ + line1 = lines[i] + line2 = i + 1 < len(lines) and lines[i + 1] or '' + if not itemre.match(line1): + return False + if singleline: + return line2 == '' or line2[0] == ' ' or itemre.match(line2) + else: + return line2.startswith(' ') + + i = 0 + while i < len(blocks): + if blocks[i]['type'] == 'paragraph': + lines = blocks[i]['lines'] + for type, itemre, singleline in listtypes: + if match(lines, 0, itemre, singleline): + items = [] + for j, line in enumerate(lines): + if match(lines, j, itemre, singleline): + items.append(dict(type=type, lines=[], + indent=blocks[i]['indent'])) + items[-1]['lines'].append(line) + blocks[i:i + 1] = items + break + i += 1 + return blocks + +_fieldwidth = 12 + +def updatefieldlists(blocks): + """Find key and maximum key width for field lists.""" + i = 0 + while i < len(blocks): + if blocks[i]['type'] != 'field': + i += 1 + continue + + keywidth = 0 + j = i + while j < len(blocks) and blocks[j]['type'] == 'field': + m = _fieldre.match(blocks[j]['lines'][0]) + key, rest = m.groups() + blocks[j]['lines'][0] = rest + blocks[j]['key'] = key + keywidth = max(keywidth, len(key)) + j += 1 + + for block in blocks[i:j]: + block['keywidth'] = keywidth + i = j + 1 + + return blocks + +def updateoptionlists(blocks): + i = 0 + while i < len(blocks): + if blocks[i]['type'] != 'option': + i += 1 + continue + + optstrwidth = 0 + j = i + while j < len(blocks) and blocks[j]['type'] == 'option': + m = _optionre.match(blocks[j]['lines'][0]) + + shortoption = m.group(2) + group3 = m.group(3) + longoption = group3[2:].strip() + desc = m.group(6).strip() + longoptionarg = m.group(5).strip() + blocks[j]['lines'][0] = desc + + noshortop = '' + if not shortoption: + noshortop = ' ' + + opt = "%s%s" % (shortoption and "-%s " % shortoption or '', + ("%s--%s %s") % (noshortop, longoption, + longoptionarg)) + opt = opt.rstrip() + blocks[j]['optstr'] = opt + optstrwidth = max(optstrwidth, encoding.colwidth(opt)) + j += 1 + + for block in blocks[i:j]: + block['optstrwidth'] = optstrwidth + i = j + 1 + return blocks + +def prunecontainers(blocks, keep): + """Prune unwanted containers. + + The blocks must have a 'type' field, i.e., they should have been + run through findliteralblocks first. + """ + pruned = [] + i = 0 + while i + 1 < len(blocks): + # Searching for a block that looks like this: + # + # +-------+---------------------------+ + # | ".. container ::" type | + # +---+ | + # | blocks | + # +-------------------------------+ + if (blocks[i]['type'] == 'paragraph' and + blocks[i]['lines'][0].startswith('.. container::')): + indent = blocks[i]['indent'] + adjustment = blocks[i + 1]['indent'] - indent + containertype = blocks[i]['lines'][0][15:] + prune = containertype not in keep + if prune: + pruned.append(containertype) + + # Always delete "..container:: type" block + del blocks[i] + j = i + i -= 1 + while j < len(blocks) and blocks[j]['indent'] > indent: + if prune: + del blocks[j] + else: + blocks[j]['indent'] -= adjustment + j += 1 + i += 1 + return blocks, pruned + +_sectionre = re.compile(r"""^([-=`:.'"~^_*+#])\1+$""") + +def findtables(blocks): + '''Find simple tables + + Only simple one-line table elements are supported + ''' + + for block in blocks: + # Searching for a block that looks like this: + # + # === ==== === + # A B C + # === ==== === <- optional + # 1 2 3 + # x y z + # === ==== === + if (block['type'] == 'paragraph' and + len(block['lines']) > 2 and + _tablere.match(block['lines'][0]) and + block['lines'][0] == block['lines'][-1]): + block['type'] = 'table' + block['header'] = False + div = block['lines'][0] + + # column markers are ASCII so we can calculate column + # position in bytes + columns = [x for x in xrange(len(div)) + if div[x] == '=' and (x == 0 or div[x - 1] == ' ')] + rows = [] + for l in block['lines'][1:-1]: + if l == div: + block['header'] = True + continue + row = [] + # we measure columns not in bytes or characters but in + # colwidth which makes things tricky + pos = columns[0] # leading whitespace is bytes + for n, start in enumerate(columns): + if n + 1 < len(columns): + width = columns[n + 1] - start + v = encoding.getcols(l, pos, width) # gather columns + pos += len(v) # calculate byte position of end + row.append(v.strip()) + else: + row.append(l[pos:].strip()) + rows.append(row) + + block['table'] = rows + + return blocks + +def findsections(blocks): + """Finds sections. + + The blocks must have a 'type' field, i.e., they should have been + run through findliteralblocks first. + """ + for block in blocks: + # Searching for a block that looks like this: + # + # +------------------------------+ + # | Section title | + # | ------------- | + # +------------------------------+ + if (block['type'] == 'paragraph' and + len(block['lines']) == 2 and + encoding.colwidth(block['lines'][0]) == len(block['lines'][1]) and + _sectionre.match(block['lines'][1])): + block['underline'] = block['lines'][1][0] + block['type'] = 'section' + del block['lines'][1] + return blocks + +def inlineliterals(blocks): + substs = [('``', '"')] + for b in blocks: + if b['type'] in ('paragraph', 'section'): + b['lines'] = [replace(l, substs) for l in b['lines']] + return blocks + +def hgrole(blocks): + substs = [(':hg:`', '"hg '), ('`', '"')] + for b in blocks: + if b['type'] in ('paragraph', 'section'): + # Turn :hg:`command` into "hg command". This also works + # when there is a line break in the command and relies on + # the fact that we have no stray back-quotes in the input + # (run the blocks through inlineliterals first). + b['lines'] = [replace(l, substs) for l in b['lines']] + return blocks + +def addmargins(blocks): + """Adds empty blocks for vertical spacing. + + This groups bullets, options, and definitions together with no vertical + space between them, and adds an empty block between all other blocks. + """ + i = 1 + while i < len(blocks): + if (blocks[i]['type'] == blocks[i - 1]['type'] and + blocks[i]['type'] in ('bullet', 'option', 'field')): + i += 1 + else: + blocks.insert(i, dict(lines=[''], indent=0, type='margin')) + i += 2 + return blocks + +def prunecomments(blocks): + """Remove comments.""" + i = 0 + while i < len(blocks): + b = blocks[i] + if b['type'] == 'paragraph' and (b['lines'][0].startswith('.. ') or + b['lines'] == ['..']): + del blocks[i] + if i < len(blocks) and blocks[i]['type'] == 'margin': + del blocks[i] + else: + i += 1 + return blocks + +_admonitionre = re.compile(r"\.\. (admonition|attention|caution|danger|" + r"error|hint|important|note|tip|warning)::", + flags=re.IGNORECASE) + +def findadmonitions(blocks): + """ + Makes the type of the block an admonition block if + the first line is an admonition directive + """ + i = 0 + while i < len(blocks): + m = _admonitionre.match(blocks[i]['lines'][0]) + if m: + blocks[i]['type'] = 'admonition' + admonitiontitle = blocks[i]['lines'][0][3:m.end() - 2].lower() + + firstline = blocks[i]['lines'][0][m.end() + 1:] + if firstline: + blocks[i]['lines'].insert(1, ' ' + firstline) + + blocks[i]['admonitiontitle'] = admonitiontitle + del blocks[i]['lines'][0] + i = i + 1 + return blocks + +_admonitiontitles = {'attention': _('Attention:'), + 'caution': _('Caution:'), + 'danger': _('!Danger!') , + 'error': _('Error:'), + 'hint': _('Hint:'), + 'important': _('Important:'), + 'note': _('Note:'), + 'tip': _('Tip:'), + 'warning': _('Warning!')} + +def formatoption(block, width): + desc = ' '.join(map(str.strip, block['lines'])) + colwidth = encoding.colwidth(block['optstr']) + usablewidth = width - 1 + hanging = block['optstrwidth'] + initindent = '%s%s ' % (block['optstr'], ' ' * ((hanging - colwidth))) + hangindent = ' ' * (encoding.colwidth(initindent) + 1) + return ' %s\n' % (util.wrap(desc, usablewidth, + initindent=initindent, + hangindent=hangindent)) + +def formatblock(block, width): + """Format a block according to width.""" + if width <= 0: + width = 78 + indent = ' ' * block['indent'] + if block['type'] == 'admonition': + admonition = _admonitiontitles[block['admonitiontitle']] + hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip()) + + defindent = indent + hang * ' ' + text = ' '.join(map(str.strip, block['lines'])) + return '%s\n%s\n' % (indent + admonition, + util.wrap(text, width=width, + initindent=defindent, + hangindent=defindent)) + if block['type'] == 'margin': + return '\n' + if block['type'] == 'literal': + indent += ' ' + return indent + ('\n' + indent).join(block['lines']) + '\n' + if block['type'] == 'section': + underline = encoding.colwidth(block['lines'][0]) * block['underline'] + return "%s%s\n%s%s\n" % (indent, block['lines'][0],indent, underline) + if block['type'] == 'table': + table = block['table'] + # compute column widths + widths = [max([encoding.colwidth(e) for e in c]) for c in zip(*table)] + text = '' + span = sum(widths) + len(widths) - 1 + indent = ' ' * block['indent'] + hang = ' ' * (len(indent) + span - widths[-1]) + + for row in table: + l = [] + for w, v in zip(widths, row): + pad = ' ' * (w - encoding.colwidth(v)) + l.append(v + pad) + l = ' '.join(l) + l = util.wrap(l, width=width, initindent=indent, hangindent=hang) + if not text and block['header']: + text = l + '\n' + indent + '-' * (min(width, span)) + '\n' + else: + text += l + "\n" + return text + if block['type'] == 'definition': + term = indent + block['lines'][0] + hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip()) + defindent = indent + hang * ' ' + text = ' '.join(map(str.strip, block['lines'][1:])) + return '%s\n%s\n' % (term, util.wrap(text, width=width, + initindent=defindent, + hangindent=defindent)) + subindent = indent + if block['type'] == 'bullet': + if block['lines'][0].startswith('| '): + # Remove bullet for line blocks and add no extra + # indention. + block['lines'][0] = block['lines'][0][2:] + else: + m = _bulletre.match(block['lines'][0]) + subindent = indent + m.end() * ' ' + elif block['type'] == 'field': + keywidth = block['keywidth'] + key = block['key'] + + subindent = indent + _fieldwidth * ' ' + if len(key) + 2 > _fieldwidth: + # key too large, use full line width + key = key.ljust(width) + elif keywidth + 2 < _fieldwidth: + # all keys are small, add only two spaces + key = key.ljust(keywidth + 2) + subindent = indent + (keywidth + 2) * ' ' + else: + # mixed sizes, use fieldwidth for this one + key = key.ljust(_fieldwidth) + block['lines'][0] = key + block['lines'][0] + elif block['type'] == 'option': + return formatoption(block, width) + + text = ' '.join(map(str.strip, block['lines'])) + return util.wrap(text, width=width, + initindent=indent, + hangindent=subindent) + '\n' + +def formathtml(blocks): + """Format RST blocks as HTML""" + + out = [] + headernest = '' + listnest = [] + + def openlist(start, level): + if not listnest or listnest[-1][0] != start: + listnest.append((start, level)) + out.append('<%s>\n' % start) + + blocks = [b for b in blocks if b['type'] != 'margin'] + + for pos, b in enumerate(blocks): + btype = b['type'] + level = b['indent'] + lines = b['lines'] + + if btype == 'admonition': + admonition = _admonitiontitles[b['admonitiontitle']] + text = ' '.join(map(str.strip, lines)) + out.append('<p>\n<b>%s</b> %s\n</p>\n' % (admonition, text)) + elif btype == 'paragraph': + out.append('<p>\n%s\n</p>\n' % '\n'.join(lines)) + elif btype == 'margin': + pass + elif btype == 'literal': + out.append('<pre>\n%s\n</pre>\n' % '\n'.join(lines)) + elif btype == 'section': + i = b['underline'] + if i not in headernest: + headernest += i + level = headernest.index(i) + 1 + out.append('<h%d>%s</h%d>\n' % (level, lines[0], level)) + elif btype == 'table': + table = b['table'] + t = [] + for row in table: + l = [] + for v in zip(row): + if not t: + l.append('<th>%s</th>' % v) + else: + l.append('<td>%s</td>' % v) + t.append(' <tr>%s</tr>\n' % ''.join(l)) + out.append('<table>\n%s</table>\n' % ''.join(t)) + elif btype == 'definition': + openlist('dl', level) + term = lines[0] + text = ' '.join(map(str.strip, lines[1:])) + out.append(' <dt>%s\n <dd>%s\n' % (term, text)) + elif btype == 'bullet': + bullet, head = lines[0].split(' ', 1) + if bullet == '-': + openlist('ul', level) + else: + openlist('ol', level) + out.append(' <li> %s\n' % ' '.join([head] + lines[1:])) + elif btype == 'field': + openlist('dl', level) + key = b['key'] + text = ' '.join(map(str.strip, lines)) + out.append(' <dt>%s\n <dd>%s\n' % (key, text)) + elif btype == 'option': + openlist('dl', level) + opt = b['optstr'] + desc = ' '.join(map(str.strip, lines)) + out.append(' <dt>%s\n <dd>%s\n' % (opt, desc)) + + # close lists if indent level of next block is lower + if listnest: + start, level = listnest[-1] + if pos == len(blocks) - 1: + out.append('</%s>\n' % start) + listnest.pop() + else: + nb = blocks[pos + 1] + ni = nb['indent'] + if (ni < level or + (ni == level and + nb['type'] not in 'definition bullet field option')): + out.append('</%s>\n' % start) + listnest.pop() + + return ''.join(out) + +def parse(text, indent=0, keep=None): + """Parse text into a list of blocks""" + pruned = [] + blocks = findblocks(text) + for b in blocks: + b['indent'] += indent + blocks = findliteralblocks(blocks) + blocks = findtables(blocks) + blocks, pruned = prunecontainers(blocks, keep or []) + blocks = findsections(blocks) + blocks = inlineliterals(blocks) + blocks = hgrole(blocks) + blocks = splitparagraphs(blocks) + blocks = updatefieldlists(blocks) + blocks = updateoptionlists(blocks) + blocks = addmargins(blocks) + blocks = prunecomments(blocks) + blocks = findadmonitions(blocks) + return blocks, pruned + +def formatblocks(blocks, width): + text = ''.join(formatblock(b, width) for b in blocks) + return text + +def format(text, width=80, indent=0, keep=None, style='plain'): + """Parse and format the text according to width.""" + blocks, pruned = parse(text, indent, keep or []) + if style == 'html': + text = formathtml(blocks) + else: + text = ''.join(formatblock(b, width) for b in blocks) + if keep is None: + return text + else: + return text, pruned + +def getsections(blocks): + '''return a list of (section name, nesting level, blocks) tuples''' + nest = "" + level = 0 + secs = [] + for b in blocks: + if b['type'] == 'section': + i = b['underline'] + if i not in nest: + nest += i + level = nest.index(i) + 1 + nest = nest[:level] + secs.append((b['lines'][0], level, [b])) + else: + if not secs: + # add an initial empty section + secs = [('', 0, [])] + secs[-1][2].append(b) + return secs + +def decorateblocks(blocks, width): + '''generate a list of (section name, line text) pairs for search''' + lines = [] + for s in getsections(blocks): + section = s[0] + text = formatblocks(s[2], width) + lines.append([(section, l) for l in text.splitlines(True)]) + return lines + +def maketable(data, indent=0, header=False): + '''Generate an RST table for the given table data''' + + widths = [max(encoding.colwidth(e) for e in c) for c in zip(*data)] + indent = ' ' * indent + div = indent + ' '.join('=' * w for w in widths) + '\n' + + out = [div] + for row in data: + l = [] + for w, v in zip(widths, row): + pad = ' ' * (w - encoding.colwidth(v)) + l.append(v + pad) + out.append(indent + ' '.join(l) + "\n") + if header and len(data) > 1: + out.insert(2, div) + out.append(div) + return ''.join(out) |