From 1030dc837b10a03a02a85d5504cbeec168ce49e2 Mon Sep 17 00:00:00 2001
From: Bernie Innocenti <bernie@codewiz.org>
Date: Mon, 03 May 2010 21:53:47 +0000
Subject: Import XaoS r489 (trunk after version 3.5)

---
(limited to 'doc/texi2rest.py')

diff --git a/doc/texi2rest.py b/doc/texi2rest.py
new file mode 100755
index 0000000..58fd5cd
--- /dev/null
+++ b/doc/texi2rest.py
@@ -0,0 +1,593 @@
+#!/usr/bin/python
+"""
+NAME
+====
+
+texi2rest - Convert texinfo xml represenation to reStructuredText
+
+SYNOPSIS
+========
+
+texi2rest *xmlfile* > *restfile*
+
+DESCRIPTION
+===========
+
+``texi2rest`` is based on ``xhtml2rest`` by Antonios Christofides. He
+included the following disclaimer for his program, and it applies
+equally to mine: far from being a decent and complete program, this is
+only something to begin with, which hopefully processes the given UTF-8
+texinfo xml file and produces reStructuredText "source code" in the
+standard output.
+
+Before processing the texinfo file, you must convert it to xml using
+the makeinfo command:
+
+    makeinfo --xml *texifile*
+
+    texi2rest *xmlfile* > *restfile*
+
+LIMITATIONS
+===========
+
+I am writing this specifically to convert the XaoS project's
+documentation. I do not plan to implement full conversion of every 
+texinfo directive--only the ones used in the documentation I am trying
+to convert. Hopefully other interested parties will contribute further
+improvements.
+
+
+META
+====
+
+``texi2rest`` was hacked together by J.B. Langston, 
+jb-langston@austin.rr.com, based on ``xhtml2rest`` by
+Antonios Christofides, anthony@itia.ntua.gr.
+
+Revision: $Revision: 3753 $
+
+The code and this text is hereby placed in the public domain.
+"""
+
+import xml.dom.minidom
+import re
+import sys
+import textwrap
+import math
+import UserList
+import warnings
+import codecs
+
+###############################################################################
+# Configuration: these values change the behavior of the conversion
+
+# Texinfo commands that generate emphasis markup (i.e., *text*)
+EMPHASIS_COMMANDS = ('emph', 'i', 'slanted', 'var')
+
+# Texinfo commands that generate strong markup (i.e., **text**)
+STRONG_COMMANDS = ('strong', 'b')
+
+# Texinfo commands that generate literal markup (i.e.,``text``)
+LITERAL_COMMANDS = ('code', 'verb' 'tt')
+
+# Texinfo commands that map to reST roles of the same name (i.e., :role:`text`)
+VERBATIM_COMMANDS = ('dfn', 'file', 'command', 'option', 'kbd', 'samp', 'math')
+
+# Texinfo commands that map to differnet reStructuredText roles (i.e., :role:`text`)
+MAPPED_COMMANDS = {
+    'env': 'envvar', 
+    'key': 'kbd', 
+    'cite': 'title' 
+}
+
+# Texinfo commands that do not generate any markup, but preserve nested text
+IGNORED_COMMANDS = ('url', 'sc', 'r', 'sansserif', 'titlefont', 'dmn', 'logo', 'punct')
+
+# Texinfo commands that are deleted from output, including nested text
+DELETED_COMMANDS = ()
+
+# Map of Texinfo section commands to section underline/overline characters
+# Single character indicates underline only; double characters indicates overline+underline
+SECTION_COMMANDS = {
+    # level 1
+    'top': '**',
+    'chapter': '**',
+    'unnumbered': '**',
+    'appendix': '**',
+    # level 2
+    'section': '=',
+    'unnumberedsec': '=',
+    'appendixsec': '=',
+    # level 3
+    'subsection': '-',
+    'unnumberedsubsec': '-',
+    'appendixsubsec': '-',
+    # level 4
+    'subsubsection': '^',
+    'unnumberedsubsubsec': '^',
+    'appendixsubsubsec': '^',
+}
+
+
+###############################################################################
+
+###############################################################################
+# Global variables. I know. I'm terribly sorry. Please get rid of them.
+
+# 'unindent' is used by list items. A li list item is always indented, but its
+# first line is "unindented" and contains the number or bullet. However, it was
+# difficult for the li node to tell its #text contents (which may be deeply
+# nested) to use that.  So it just places the number or bullet, which must be 4
+# characters, like " 1. ", in "unindent". The first text to be rendered uses
+# the unindent and then sets it to empty again.
+
+unindent = ''
+hyperlinks = {} # text-target pairs found in "a href" elements
+###############################################################################
+
+class Ditem:
+    """A document item; usually a node, but can be a block of text
+    resulting from processing adjacent inline items. If it is a node,
+    it is usually the BlockDitem subclass; if it is text, it is
+    normally a plain Ditem."""
+    def __init__(self, text):
+        self.text = text    # Contained text (empty for BlockDitem)
+        self.type = ''      # tag for block node, empty for inline
+        self.indentlevel = 0  # 0 - unindented; 1 - indented; etc.
+    def __repr__(self):
+        return self.__class__.__name__+'("""'+self.text+'""")'
+    def propagate_indents(self):
+        "Propagates indent level recursively to children"
+        pass
+    def maxwidth(self):
+        "Width it will occupy if allowed to render on infinite width"
+        self.remove_white_space()
+        return len(self.text) + 4*self.indentlevel
+    def minwidth(self):
+        "Width it will occupy if wrapped as much as possible"
+        wordlens = [len(x) for x in self.text.split()]
+        if wordlens: return max(wordlens) + 4*self.indentlevel
+        else: return 0
+    def format(self, width):
+        """Returns contents formatted so as not to exceed specified
+        width, if possible"""
+        global unindent
+        if(self.type=='pre'): raise Exception, "What are we doing here?"
+        self.remove_white_space()
+        # Quick hack to fix a problem. Do we begin with '* '?
+        while len(self.text)>=2 and self.text[1]==' ' and self.text[0] in '*-':
+            # It may be mistaken for a bullet list. Strip it.
+            self.text = self.text[2:]
+        if width < self.minwidth(): width = self.minwidth()
+        # The textwrap module has the nasty habit of breaking at hyphens. So
+        # we'll do a nasty hack: find a character that does not exist in the
+        # text, replace all hyphens with that character, ok, you get the point.
+        hyphensurrogate = ''
+        for c in '!@#$%^&*~':
+            if self.text.find(c)<0:
+                hyphensurrogate = c
+                break
+        if not hyphensurrogate: raise Exception, "Houston we have a problem"
+        text = self.text.replace('-', hyphensurrogate)
+        wrapper = textwrap.TextWrapper(
+            initial_indent=((4*self.indentlevel)-len(unindent))*' '+unindent,
+            subsequent_indent=4*self.indentlevel*' ',
+            width=width, break_long_words = False)
+        unindent = ''
+        text = wrapper.fill(text)
+        text = text.replace(hyphensurrogate, '-')
+        return text
+    def empty(self):
+        "Returns true if contains nothing"
+        return not self.text
+    def remove_white_space(self):
+        "Removes extra white space"
+        self.text = re.sub('\s+', ' ', self.text).strip()
+    def canmerge(self):
+        "Tells whether it's possible to merge this Ditem with adjacent ones"
+        return True
+    def merge(self, aditem):
+        """If possible, merges aditem, which should be an adjacent Ditem that
+        comes after this one."""
+        if not self.canmerge() or not aditem.canmerge(): return False
+        if len(self.text)>0 and self.text[-1] == '_' and len(aditem.text)>0 \
+            and aditem.text[0] not in """ \n\t:.,!=/|;"'?<>[]{}()""":
+            # Leave space after link if not followed by punctuation
+            self.text = self.text + ' ' + aditem.text
+        else:
+            self.text = self.text + aditem.text
+        return True
+
+class BlockDitem(Ditem):
+    "A Ditem which contains other Ditems"
+    def __init__(self, type):
+        Ditem.__init__(self, '')
+        self.type = type
+        self.children = []  # Contained Ditems
+    def __repr__(self):
+        return self.__class__.__name__+'("'+self.type+'"); children = '+repr(self.children)
+    def maxwidth(self):
+        childmaxwidths = [x.maxwidth() for x in self.children]
+        return childmaxwidths and max(childmaxwidths) or 0
+    def minwidth(self):
+        childminwidths = [x.minwidth() for x in self.children]
+        return childminwidths and max(childminwidths) or 0
+    def propagate_indents(self):
+        for x in self.children:
+            x.indentlevel = self.indentlevel
+            x.propagate_indents()
+    def format(self, width):
+        if width < self.minwidth(): width = self.minwidth()
+        results = [x.format(width) for x in self.children]
+        results = [x for x in results if x]
+        return "\n\n".join(results)
+    def empty(self):
+        return not (self.children)
+    def canmerge(self):
+        return False
+
+class PreDitem(Ditem):
+    "A Ditem representing a literal block"
+    def maxwidth(self):
+        return max([len(x) for x in self.text.split('\n')])
+    def minwidth(self):
+        return self.maxwidth() # Literal block; width's given
+    def remove_white_space(self):
+        pass
+    def format(self, width):
+        result = '::\n\n'
+        for x in self.text.split('\n'):
+            result = result + '    ' + x + '\n'
+        result = result + '..\n\n'
+        return result
+    def canmerge(self):
+        return False
+
+class HeadingDitem(BlockDitem):
+    "A Ditem representing an h1, h2, ..., h9"
+    def __init__(self, type):
+        BlockDitem.__init__(self, type)
+    def minwidth(self):
+        return self.maxwidth()  # Headings don't wrap
+    def format(self, width):
+        assert(len(self.children)==1)
+        text = self.children[0].format(32767)
+        uldict = {
+            'chapter':    '**', 'section':       '=', 'subsection':       '-', 'subsubsection':       '^',
+            'unnumbered': '**', 'unnumberedsec': '=', 'unnumberedsubsec': '-', 'unnumberedsubsubsec': '^',
+            'appendix':   '**', 'appendixsec':   '=', 'appendixsubsec':   '-', 'appendixsubsubsec':   '^'
+        }
+        underliner = uldict[self.type]
+        if len(underliner) == 2:
+            return len(text)*underliner[0] + '\n' + text + '\n' + len(text)*underliner[0]
+        else:
+            return text + '\n' + len(text)*underliner[0]
+
+class BlockQuoteDitem(BlockDitem):
+    "A Ditem representing a blockquote"
+    def __init__(self, type):
+        BlockDitem.__init__(self, type)
+    def propagate_indents(self):
+        self.indentlevel = self.indentlevel + 1
+        BlockDitem.propagate_indents(self)
+
+class ListDitem(BlockDitem):
+    "A Ditem representing an ol, ul, or dl"
+    def __init__(self, type):
+        BlockDitem.__init__(self, type)
+    def format(self, width):
+        # First pass the list type and order to the children
+        order = 1
+        for x in self.children:
+            if isinstance(x, ListItemDitem):
+                x.listtype = self.type
+                x.order = order
+                order = order+1
+        # And then process normally
+        return BlockDitem.format(self, width)
+        
+class ListItemDitem(BlockDitem):
+    "A Ditem representing a li, dt, or dd"
+    def __init__(self, type):
+        BlockDitem.__init__(self, type)
+        self.listtype = None
+        self.order = 0
+    def minwidth(self):
+        if self.type == 'definitionterm': return self.maxwidth()  # Don't wrap dt
+        else: return BlockDitem.minwidth(self)
+    def propagate_indents(self):
+        if self.type in ('item', 'definitionitem'):
+            self.indentlevel = self.indentlevel + 1
+        BlockDitem.propagate_indents(self)
+    def format(self, width):
+        global unindent
+        if self.type == 'item' and self.listtype == 'enumerate':
+            unindent = ('%d. ' % (self.order)).ljust(4)
+        elif self.type == 'item' and self.listtype == 'itemize':
+            unindent = '*   '
+        return BlockDitem.format(self, width)
+
+class RenderedColumn:
+    "Width information about a column being rendered"
+    def __init__(self, minwidth, maxwidth):
+        self.minwidth = minwidth
+        self.maxwidth = maxwidth
+        self.curwidth = maxwidth
+        self.fixedwidth = 0
+    def logwidth(self):
+        if self.maxwidth==0: return 0
+        else: return math.log(self.maxwidth)
+    def update(self, minwidth, maxwidth):
+        "Replaces minwidth/maxwidth if greater"
+        self.minwidth = minwidth>self.minwidth and minwidth or self.minwidth
+        self.maxwidth = maxwidth>self.maxwidth and maxwidth or self.maxwidth
+        self.curwidth = self.maxwidth
+
+class RenderedColumns(UserList.UserList):
+    "A list of RenderedColumn"
+    def __init__(self, alist):
+        self.data = alist
+    def totalWidth(self):
+        "Returns total table width"
+        return reduce(lambda x,y: x+y, [z.curwidth for z in self.data]) \
+            + len(self.data) + 1
+    def sumLogWidth(self):
+        "Returns sum of logwidth for nonfixed columns"
+        return reduce(lambda x,y: x+y,
+            [x.logwidth()*(1-x.fixedwidth) for x in self.data])
+    def distributeWidthDifference(self, width):
+        "Step 4 of w3m table rendering algorithm"
+        # Note: The use of math.ceil below is because I'd rather have a
+        # suboptimal width (a few characters less than requested width) rather
+        # than go find what to do with rounding.
+        w = self.totalWidth() - width
+        assert(w>0)
+        repeat_distribution = 1
+        while repeat_distribution:
+            repeat_distribution = 0
+            for x in self.data:
+                if x.fixedwidth: continue
+                if x.curwidth - math.ceil(w*x.logwidth()/self.sumLogWidth()) < \
+                    x.minwidth:
+                        x.curwidth = x.minwidth
+                        x.fixedwidth = 1
+                        w = self.totalWidth() - width
+                        repeat_distribution=1
+                        break
+        # Now that the we finished finding which columns need to be fixed to
+        # their minimum width, perform the distribution once again, without
+        # checking, and actually change remaining column widths
+        for x in self.data:
+            if x.fixedwidth: continue
+            x.curwidth = x.curwidth - math.ceil(w*x.logwidth()/self.sumLogWidth())
+    
+def tablehrule(colwidths, rule='-'):
+    "Returns a horizontal table separator for given column widths"
+    result = '+'
+    for x in colwidths:
+        result = result + rule * x + '+'
+    return result
+
+class TableDitem(BlockDitem):
+    def __init__(self, type):
+        BlockDitem.__init__(self, type)
+    def format(self, width):
+        # Uses table rendering algorithm of w3m
+        # (http://www.w3m.org/story.html), but ignoring width attribute
+        # Step 1 
+        columns = RenderedColumns([RenderedColumn(x.minwidth(),
+            max(x.maxwidth(), 1)    # A column can't be smaller than 1 character
+            ) for x in self.children[0].children])
+        for x in self.children:
+            for i in range(len(columns)):
+                if (len(x.children)<=i): continue # Skip empty columns
+                columns[i].update(x.children[i].minwidth(), x.children[i].maxwidth())
+        # Step 2 (width attribute) ignored
+        # Step 3 (already done - list was created with maxwidth)
+        # Step 4
+        if columns.totalWidth() > width: columns.distributeWidthDifference(width)
+        # OK, column widths are now calculated
+        colwidths = [int(x.curwidth) for x in columns]
+        result = tablehrule(colwidths) + '\n'
+        usedheadbodysep = False
+        for tr in self.children:
+            result = result + tr.format(colwidths)
+            rule = '-'
+            if not usedheadbodysep and tr.children[0].type == 'th' \
+                                        and tr!=self.children[-1]:
+                rule = '='
+                usedheadbodysep = True
+            result = result + tablehrule(colwidths, rule) + '\n'
+        return result
+
+class TrDitem(BlockDitem):
+    def __init__(self, type):
+        BlockDitem.__init__(self, type)
+    def maxwidth(self):
+        return reduce(lambda x,y: x+y,
+            [x.maxwidth() for x in self.children]) + len(self.children) + 1
+    def minwidth(self):
+        return reduce(lambda x,y: x+y,
+            [x.minwidth() for x in self.children]) + len(self.children) + 1
+    def format(self, colwidths):
+        columns = []       # List of lists of lines
+        maxlinecount = 0   # Num of lines in vertically largest column
+        for i in range(len(colwidths)):
+            if len(self.children)<=i: lines = [ '' ]
+            else: lines = self.children[i].format(colwidths[i]).split('\n')
+            lines = [x + ' ' * (colwidths[i]-len(x)) for x in lines] # Pad to col len
+            maxlinecount = max(maxlinecount, len(lines))
+            columns.append(lines)
+        # Pad vertically
+        for i in range(len(columns)):
+            for j in range(maxlinecount-len(columns[i])):
+                columns[i].append(' ' * colwidths[i])
+        result = '' 
+        # Add vertical separators
+        for i in range(maxlinecount):
+            result = result + '|'
+            for j in range(len(columns)):
+                result = result + columns[j][i] + '|'
+            result = result + '\n'
+        return result
+
+def handleNodeList(nodelist):
+    "Processes given nodes; merges them if possible; returns ditem list"
+    ditems = []
+    curditem = Ditem('')
+    for node in nodelist:
+        aditem = handleNode(node)
+        if curditem.merge(aditem): continue
+        ditems.append(curditem)
+        curditem = aditem
+    if not curditem.empty(): ditems.append(curditem)
+    return ditems
+
+def handleNode(node):
+    if node.nodeType == node.TEXT_NODE:
+        return handleText(node)
+    elif node.nodeName in EMPHASIS_COMMANDS:
+        return handleEmphasis(node)
+    elif node.nodeName in STRONG_COMMANDS:
+        return handleStrong(code)
+    elif node.nodeName in LITERAL_COMMANDS:
+        return handleLiteral(node)
+    elif node.nodeName in VERBATIM_COMMANDS:
+        return handleVerbatimCommand(node)
+    elif node.nodeName in MAPPED_COMMANDS:
+        return handleMappedCommand(node)
+    elif node.nodeName in IGNORED_COMMANDS:
+        return handleIgnoredCommand(node)
+    elif node.nodeName in DELETED_COMMANDS:
+        return handleDeletedCommand(node)
+    elif node.hasChildNodes():
+        contents = handleNodeList(node.childNodes)
+        if len(contents) == 1: return contents[0]
+        if len(contents) == 0: return Ditem('')
+        result = BlockDitem(node.nodeName)
+        result.children = contents
+        return result
+    return Ditem('')
+
+def processChildren(node):
+    if node.hasChildNodes():
+        return handleNodeList(node.childNodes)
+    else:
+        return ()
+
+def mergeChildren(node):
+    contents = processChildren(node)
+    if len(contents)>1: raise Exception('Unexpected block elements')
+    if contents: return contents[0]
+    else: return Ditem('')
+
+def handleEmphasis(node):
+    result = mergeChildren(node)
+    result.type = node.nodeName
+    if result.text:
+        result.text = '*' + result.text + '*'
+    return result
+
+def handleStrong(node):
+    result = mergeChildren(node)
+    result.type = node.nodeName
+    if result.text:
+        result.text = '**' + result.text + '**'
+    return result
+
+def handleLiteral(node):
+    result = mergeChildren(node)
+    result.type = node.nodeName
+    if result.text:
+        result.text = '``' + result.text + '``'
+    return result
+
+def handleVerbatimCommand(node):
+    result = mergeChildren(node)
+    result.type = node.nodeName
+    if result.text:
+        result.text = ':' + node.nodeName + ':`' + result.text + '`'
+    return result
+
+def handleMappedCommand(node):
+    result = mergeChildren(node)
+    result.type = node.nodeName
+    if result.text:
+        result.text = ':' + MAPPED_COMMANDS[node.nodeName] + ':`' + result.text + '`'
+    return result
+
+def handleIgnoredCommand(node):
+    result = mergeChildren(node)
+    result.type = node.nodeName
+    return result
+
+def handleDeletedCommand(node):
+    result = ''
+    result.type = node.nodeName
+    return result
+
+def handleText(node):
+    return Ditem(node.data)
+
+def handleAnchor(node):
+    result = mergeChildren(node)
+    result.type = node.nodeName
+    result.text = result.text.strip()
+    if result.text == '': return result
+    target = node.getAttribute('href').strip()
+    result.text = re.sub('\s+', ' ', result.text)
+    result.text = ':ref:`'+result.text+' <'+target+'>`'
+    return result
+
+def handleHeading(node):
+    contents = mergeChildren(node)
+    if contents.empty(): return contents
+    result = HeadingDitem(node.parentNode.nodeName)
+    result.children.append(contents)
+    return result
+
+def handleGenericBlock(node):
+    result = BlockDitem(node.nodeName)
+    result.children = processChildren(node)
+    return result
+
+def handleBlockQuote(node):
+    result = BlockQuoteDitem(node.nodeName)
+    result.children = processChildren(node)
+    return result
+
+def handleList(node):
+    result = ListDitem(node.nodeName)
+    result.children = processChildren(node)
+    return result
+
+def handleListItem(node):
+    result = ListItemDitem(node.nodeName)
+    result.children = processChildren(node)
+    return result
+
+def handleTable(node):
+    result = TableDitem(node.nodeName)
+    # Ignore table contents that are not tr
+    result.children = [x
+        for x in processChildren(node) if x.type=='tr']
+    return result
+
+def handleTr(node):
+    result = TrDitem(node.nodeName)
+    # Ignore tr contents that are not th or td
+    result.children = [x
+        for x in processChildren(node) if x.type in ('th', 'td')]
+    return result
+
+def handlePre(node):
+    return PreDitem(mergeChildren(node).text)
+
+dom1 = xml.dom.minidom.parse(sys.argv[1])
+ditem = handleNode(dom1.getElementsByTagName("texinfo")[0])
+ditem.propagate_indents()
+(utf8_encode, utf8_decode, utf8_reader, utf8_writer) = codecs.lookup('utf-8')
+outf = utf8_writer(sys.stdout)
+outf.write(ditem.format(79) + '\n')
+for h in hyperlinks.keys():
+    outf.write('\n.. _`' + h + '`:\n    ' + hyperlinks[h] + '\n')
--
cgit v0.9.1