1 files changed, 555 insertions, 0 deletions
diff --git a/groupthink/stringtree.py b/groupthink/stringtree.py
new file mode 100644
index 0000000..5701636
--- /dev/null
+++ b/groupthink/stringtree.py
@@ -0,0 +1,555 @@
+import random
+random.seed() #Works around some mysterious bug in the Journal or Rainbow that
+              #causes the random number generator to be seeded with the same
+              #constant value if a Journal entry is re-launched from the
+              #Journal.
+from collections import defaultdict
+import dbus # We do dbus (de)serialization in this file to minimize abstraction breakage
+import logging
+import aatree
+
+inf = float('inf')
+
+class Change:
+    """Each Change represents a chanage to the StringTree.
+    """
+    def __init__(self, unique_id, parent, edit):
+        """unique_id is a unique identifier for this Change
+        parent is the unique_id that is affected by this
+            Change.  Parent unique_id are always associated with an Insertion edit.
+        edit is an Insertion, Deletion, or Removal.  It's what is to be done to
+            the parent"""
+        self.unique_id = unique_id
+        self.parent = parent
+        self.edit = edit
+    
+    def __repr__(self):
+        return "Change(%d, %d, %s)" % (self.unique_id, self.parent, str(self.edit))
+
+class Insertion:
+    """Represents the action of inserting a particular bit of text
+    """
+    def __init__(self, position, text):
+        """position is the point at which to insert text, in the unmodified
+            coordinates of the parent node (other insertions and deletions
+            to that node do not affect these coordinates)
+        text is the string to insert"""
+        self.position = position
+        self.text = text
+    
+    def __repr__(self):
+        return "Insertion(%d, %s)" % (self.position, self.text)
+
+class Deletion:
+    """Represents the deletion of only those characters present in the parent in
+        this range.  Characters that have been inserted into the parent by
+        an Insertion are not affected.
+    """
+    def __init__(self, position, length):
+        """position is the point, in the unmodified coordinates of the parent
+            node, at which to start deletion
+        length is an integer greater than 0 representing the number of
+            characters to delete"""
+        self.position = position
+        self.length = length
+    
+    def __repr__(self):
+        return "Deletion(%d, %d)" % (self.position, self.length)
+        
+class Removal:
+    """Represents the deletion of all characters ever inserted in this range.
+        Insertions at the endpoints are _not_ included in the Removal, and must
+        be Removed separately.
+    """
+    def __init__(self, position, length):
+        """position is the point at which to start Removal
+        length is the number of points in the unmodified parent coordinate to
+            the end of the Removal.  Note that many more than length characters
+            can be removed if there are Insertions in this range."""
+        self.position = position
+        self.length = length
+    
+    def __repr__(self):
+        return "Removal(%d, %d)" % (self.position, self.length)
+
+class Record:
+    """Each Record is used to store one Change inside the StringTree.
+    The purpose of a Record is contain both the Change itself, and any cached
+    information about the current effect of that Change.
+    """
+    def __init__(self, change, depth):
+        self.change = change
+        self.depth = depth
+    
+    def __str__(self):
+        return "Record(%s, %d)" % (str(self.change), self.depth)
+    
+    __repr__ = __str__
+
+def flatten(L):
+    o = []
+    for x in L:
+        if isinstance(x,list):
+            o.extend(flatten(x)) #recursive
+        else:
+            o.append(x)
+    return o
+    
+def my_rand():
+    return random.getrandbits(63)
+
+def translator(c, pack):
+    if pack:
+        if isinstance(c.edit, Insertion):
+            return dbus.Struct((dbus.Int64(c.unique_id),
+                               dbus.Int64(c.parent),
+                               dbus.Int16(0),
+                               dbus.Int64(c.edit.position),
+                               dbus.UTF8String(c.edit.text)),
+                               signature='xxnxs')
+        elif isinstance(c.edit, Deletion):
+            return dbus.Struct((dbus.Int64(c.unique_id),
+                               dbus.Int64(c.parent),
+                               dbus.Int16(1),
+                               dbus.Int64(c.edit.position),
+                               dbus.Int64(c.edit.length)),
+                               signature='xxnxx')
+        elif isinstance(c.edit, Removal):
+            return dbus.Struct((dbus.Int64(c.unique_id),
+                               dbus.Int64(c.parent),
+                               dbus.Int16(2),
+                               dbus.Int64(c.edit.position),
+                               dbus.Int64(c.edit.length)),
+                               signature='xxnxx')
+        else:
+            raise Unimplemented
+    else:
+        if c[2] == 0:
+            ed = Insertion(int(c[3]),str(c[4]))
+        elif c[2] == 1:
+            ed = Deletion(int(c[3]),int(c[4]))
+        elif c[2] == 2:
+            ed = Removal(int(c[3]),int(c[4]))
+        else:
+            raise "unknown type"
+        return Change(int(c[0]), int(c[1]), ed)
+
+class EagerHideList:
+    """EagerHideList provides a list with hidden elements.  The standard index
+    considers only the visible elements, but the 'all' index considers all
+    elements.  The standard index of an invisible element is considered to be
+    the index of the next visible element."""
+    def __init__(self):
+        self._sourcelist = []
+        self._poslist = []
+        self._posmap = {}
+    def __len__(self):
+        return len(self._sourcelist)
+    def __iter__(self):
+        return self._sourcelist.__iter__()
+    def __getitem__(self, s):
+        return self._sourcelist[s]
+    def index(self, item):
+        x = self._poslist[self._posmap[item]]
+        return x[2]
+    def hide(self, position, length):
+        """Given the position in _sourcelist, and the length of the deletion,
+        perform the deletion in the lists"""
+        pfirst = self._sourcelist[position]
+        plast = self._sourcelist[position+length-1]
+        ifirst = self._posmap[pfirst]
+        ilast = self._posmap[plast]
+        for i in xrange(ifirst, ilast+1):
+            L = self._poslist[i]
+            L[1] = False #No longer visible, if it was visible before
+            L[2] = position #collapse positions
+        for L in self._poslist[ilast+1:]:
+            L[2] -= length #move all subsequent character up by length
+            
+        del self._sourcelist[position:(position+length)]
+        #self._check_invariants()
+    def getitem_all(self, s):
+        if isinstance(s, int):
+            return self._poslist[s][0]
+        else:
+            return [x[0] for x in self._poslist[s]]
+    def index_all(self, item):
+        return self._posmap[item]
+    def is_visible(self, i):
+        return self._poslist[i][1]
+    def is_visible_item(self, item):
+        return self._poslist[self._posmap[item]][1]
+    def insert_sequence_all(self, position, sequence, visibility):
+        """Insert sequence with visibility into the all-coordinates at position"""
+        if position > 0:
+            psource = self._poslist[position][2]
+        else:
+            psource = 0
+        length = len(sequence)
+        newlist = []
+        newlistsource = []
+        i = psource
+        for elt, viz in zip(sequence, visibility):
+            newlist.append([elt, viz, i])
+            if viz:
+                newlistsource.append(elt)
+                i += 1
+        self._poslist[position:position] = newlist
+        for i in xrange(position,position+length):
+            L = self._poslist[i]
+            self._posmap[L[0]] = i
+        num_viz = len(newlistsource)
+        for i in xrange(position+length,len(self._poslist)):
+            L = self._poslist[i]
+            L[2] += num_viz
+            self._posmap[L[0]] = i
+        self._sourcelist[psource:psource] = newlistsource
+        #self._check_invariants()
+    def insert_sequence_leftof(self, target, sequence, visibility):
+        self.insert_sequence_all(self._posmap[target], sequence, visibility)
+        
+    def _check_invariants(self):
+        assert len(self._posmap) == len(self._poslist)
+        for i in xrange(len(self._poslist)):
+            assert self._posmap[self._poslist[i][0]] == i
+            if self._poslist[i][1]:
+                assert self._sourcelist[self._poslist[i][2]] == self._poslist[i][0]
+            if i > 0:
+                if self._poslist[i-1][1]:
+                    assert self._poslist[i-1][2] + 1 == self._poslist[i][2]
+                else:
+                    assert self._poslist[i-1][2] == self._poslist[i][2]
+
+class SimpleStringTree:
+    """SimpleStringTree is a StringTree that supports only Insertions and
+    Deletions.  Handling References, while valuable, has proven quite difficult,
+    and so will not be addressed by this data structure.
+    
+    Code for handling Removals will be left in for the moment, since it is
+    easy enough, even though it is not presently used."""
+
+    def __init__(self, initstring=""):
+        self.ROOT = Record(Change(-1, -1, Insertion(0, "")), 0)
+        self._id2rec = dict() #unique_id: Record(change) | change.unique_id = unique_id
+        self._id2rec[-1] = self.ROOT
+        self._parent2children = defaultdict(set) # unique_id: {Record(change) | change.parent == unique_id}
+        self._cursor = 0
+        self._listing = aatree.AATreeHideList()
+        self._listing.insert_sequence_all(0,[(-1,0)],[False])
+        if initstring:
+            self.insert(initstring, 0)
+    
+    def __repr__(self):
+        return "\n".join((str(v) for v in self._id2rec.values()))
+    
+    def getvalue(self, r = None):
+        if r is None:
+            r = self.ROOT
+        s = "".join(self._id2rec[x[0]].change.edit.text[x[1]] for x in self._listing)
+        return s
+    
+    def next(self):
+        raise
+    
+    def flush(self):
+        # This could be used to implement lazy evaluation of input by not
+        # re-evaluating the string until flush (or read?)
+        pass
+    
+    def close(self):
+        raise
+    
+    def read(self, size=float('inf')):
+        #Efficiency: This method should use size to avoid calling getvalue and rendering the entire string
+        s = self.getvalue()
+        outpoint = min(len(s), self._cursor + size)
+        inpoint = self._cursor
+        self._cursor = outpoint
+        return s[inpoint:outpoint]
+        
+    def readline(self, size=float('inf')):
+        #Efficiency: This method should use size to avoid rendering the whole string
+        s = self.getvalue()
+        outpoint = min(len(s), self._cursor + size)
+        inpoint = self._cursor
+        i = s.find("\n", inpoint, outpoint)
+        if i == -1 or i >= outpoint:
+            self._cursor = outpoint
+            return s[inpoint:outpoint]
+        else:
+            self._cursor = i + 1
+            return s[inpoint:(i+1)]
+    
+    def readlines(self, sizehint=None):
+        #Efficiency: use sizehint
+        s = self.getvalue()
+        t = s[self._cursor:]
+        self._cursor = len(s)
+        return t.splitlines(True)
+    
+    def seek(self, offset, whence=0):
+        if whence == 0:
+            self._cursor = offset
+        elif whence == 1:
+            self._cursor += offset
+        elif whence == 2:
+            self._cursor = len(self._listing) + offset
+    
+    def tell(self):
+        return self._cursor
+        
+    def truncate(self, size=None):
+        if size is None:
+            size = self._cursor
+        return self.delete(size, len(self._listing) - size)
+        
+    def write(self, text):
+        L = min(len(self._listing) - self._cursor, len(text))
+        changelist = []
+        if L > 0:
+            changelist.extend(self.delete(self._cursor, L))
+        changelist.extend(self.insert(text))
+        return changelist
+    
+    def writelines(self, sequence):
+        s = "".join(sequence)
+        return self.write(s)
+    
+    # Non-filelike text editing methods
+
+    def insert(self, text, k=None):
+        if k is None:
+            k = self._cursor
+
+        if len(self._listing) == 0:
+            r = self.ROOT
+            uid = -1
+            inspoint = 0
+        elif k == 0:
+            (uid, inspoint) = self._listing[k]
+            r = self._id2rec[uid]
+        elif k < len(self._listing):
+            # When not inserting at the endpoints, we have to be sure to
+            # check if we are at the boundary between a parent and one of its
+            # descendants.  If so, we must make our insertion in the descendant,
+            # not the parent, because the insertions would "conflict" (occur at
+            # the same location) in the parent, which would produce an ordering
+            # ambiguity, which will resolve in our favor only 50% of the time.
+            pL, pR = self._listing[(k-1):(k+1)]
+            (uidL, inspointL) = pL
+            (uidR, inspointR) = pR
+            rR = self._id2rec[uidR]
+            if uidL == uidR: #There's no boundary here at all (at least not one
+                # of any importance to us.  Therefore, we have to insert to the
+                # left of the character at k, as usual.
+                r = rR
+                uid = uidR
+                inspoint = inspointR
+            else: #There's a boundary of some sort here.  We always choose to
+                # insert at the deeper node (breaking left in case of a tie).
+                # (In the case that neither node is the ancestor of the other,
+                # either choice would be acceptable, regardless of depth.  This
+                # logic is therefore acceptable, and has the advantage of being
+                # simple and fast.)
+                rL = self._id2rec[uidL]
+                if rR.depth > rL.depth:
+                    r = rR
+                    uid = uidR
+                    inspoint = inspointR
+                else:
+                    r = rL
+                    uid = uidL
+                    inspoint = inspointL + 1
+        elif k == len(self._listing):
+            (uid,i) = self._listing[k-1]
+            r = self._id2rec[uid]
+            inspoint = i + 1
+        else:
+            raise
+        
+        e = Insertion(inspoint, text)
+        c = Change(my_rand(), r.change.unique_id, e)
+        self._add_change_treeonly(c)
+        target = (uid, inspoint)
+        self._insert_listonly(c.unique_id, target, len(text))
+        self._cursor = k + len(text)
+        return [c]
+    
+    def delete(self, k, n):
+        """Starting at a point k (0-indexed), delete n characters"""
+        if k + n > len(self._listing):
+            raise
+        p = self._listing[k]
+        contigs = [[p[0],p[1],p[1]]]
+        for (uid, index) in self._listing[(k+1):(k+n)]:
+            #This logic produces deletions that span any missing chunks.  This
+            #produces a smaller number of deletions than making sure that they
+            #are actually "contiguous", but it might interact badly with a 
+            #hypothetical undo system.
+            if contigs[-1][0] == uid:
+                contigs[-1][2] = index
+            else:
+                contigs.append([uid,index,index])
+        changelist = [Change(my_rand(), c[0], Deletion(c[1],1 + c[2]-c[1])) for c in contigs]
+        for c in changelist:
+            self._add_change_treeonly(c)
+        self._delete_listonly(k,n)
+        return changelist
+
+    def get_range(self, rec, point, m):
+        todo = [(rec, point, m, None)] # None is a dummy value since p is unused
+        ranges = []
+        while len(todo) > 0:
+            (rec, point, m, p) = todo[0]
+            h = self._range_helper(point, m)
+            self._step(h, rec)
+            if h.outpoint is not None:
+                ranges.append((rec, h.point_parent, h.outpoint - h.point_parent))
+                #print rec, h.point_parent, h.outpoint - h.point_parent
+            todo.extend(h.todo)
+            #print todo
+            del todo[0]
+        return ranges
+    
+    def move(self, rempoint, n, inspoint):
+        """In StringTree, move() should coherently copy a section of text,
+        such that any conflicting edits appear in the new location, not the old.
+        In SimpleStringTree, this is not possible, so move() just falls back to
+        Deletion and Insertion."""
+        self.seek(rempoint)
+        t = self.read(n)
+        
+        if rempoint > inspoint:
+            L = self.delete(rempoint,n)
+            L.extend(self.insert(t, inspoint))
+        else:
+            L = self.insert(t, inspoint)
+            L.extend(self.delete(rempoint,n))
+        return L
+                        
+    # Patch management methods
+    
+    def add_change(self, c):
+        if c.unique_id in self._id2rec:
+            return []
+        if isinstance(c.edit, Insertion):
+            p = self._effective_parent(c.unique_id, c.parent, c.edit.position)
+            i = self._listing.index(p)
+            d = len(c.edit.text)
+            self._insert_listonly(c.unique_id, p, d)
+            flist = [Insertion(i, c.edit.text)]
+        elif isinstance(c.edit, Deletion):
+            flist = []
+            start = None
+            end = None
+            for i in xrange(c.edit.position,c.edit.position + c.edit.length):
+                p = (c.parent, i)
+                if self._listing.is_visible_item(p):
+                    q = self._listing.index(p)
+                    if end == q:
+                        end += 1
+                    else:
+                        if end is not None:
+                            n = end - start
+                            flist.append(Deletion(start,n))
+                            self._delete_listonly(start,n)
+                        start = q
+                        end = start + 1
+            if end is not None: # the last contig won't be handled by the loop
+                n = end - start
+                flist.append(Deletion(start,n))
+                self._delete_listonly(start,n)                
+        else:
+            raise
+        self._add_change_treeonly(c)
+        return flist
+    
+    def _effective_parent(self, uid, parentid, position):
+        """The effective parent of an insertion is defined as the (uid, loc) pair
+        that causes it to be inserted in the right location in the string.  This
+        is only relevant in the event of a conflict, in which case the conflict
+        edits are required to be ordered from least uid to greatest uid.  The
+        effective parent of a conflicted edit, then, is either the original pair,
+        or (u_next, 0), where u_next is the uid of the sibling directly to the right
+        of the input uid.  That is to say, it is the least u greater than uid
+        that has the same position."""
+        if parentid in self._parent2children:
+            u_next = None
+            for r in self._parent2children[parentid]:
+                u = r.change.unique_id
+                p = r.change.edit.position
+                if (p == position) and isinstance(r.change.edit, Insertion) and (u > uid):
+                    if (u_next is None) or (u < u_next):
+                        u_next = u
+            if u_next is not None:
+                return (u_next, 0)
+        return (parentid, position)
+        
+    def _delete_listonly(self, position, length):
+        """Given the position in _sourcelist, and the length of the deletion,
+        perform the deletion in the lists"""
+        self._listing.hide(position,length)
+        
+    def _insert_listonly(self, uid, target, length):
+        """Make a new insertion into the lists with uid and length at position
+        in _poslist"""
+        elts = [(uid,i) for i in xrange(length+1)]
+        visibility = [True] * length
+        visibility.append(False)
+        self._listing.insert_sequence_leftof(target, elts, visibility)
+    
+    def _add_change_treeonly(self, c):
+        if c.unique_id in self._id2rec:
+            return
+        d = self._id2rec[c.parent].depth + 1
+        r = Record(c,d)
+        self._id2rec[c.unique_id] = r
+        if c.parent not in self._parent2children:
+            self._parent2children[c.parent] = set()
+        self._parent2children[c.parent].add(r)
+
+    def get_insert(self, k, n = 0, parent=None):
+        #FIXME: This method could be useful, but it no longer works
+        """get_insert finds and returns the youngest insertion containing positions
+        k to k + n in the total coordinates of parent.  If parent is unspecified,
+        then it is taken to be the root, meaning the coordinates in question
+        are the current document coordinates.
+        
+        The return value is a tuple (rec, k_unmodified, k_modified), meaning
+        the record containing k and k + n, the position of k in rec's
+        unmodified coordinates, and the position of k in rec's
+        modified coordinates.
+        
+        "containing" is defined so that a record with n characters
+        (labeled 0...n-1) can still be returned as containing k...k+n.  In
+        other words, each Insertion contains both endpoints.  However, an
+        Insertion that has been totally deleted is ignored entirely."""
+        if parent is None:
+            parent = self.ROOT
+        
+        h = self._get_insert_helper(k, n)
+        self._step(h, parent)
+        while h.child is not None:
+            parent = h.child
+            h = self._get_insert_helper(h.child_k, n)
+            self._step(h, parent)
+                
+        return (parent, h.k_in_parent, h.k)
+
+    def get_changes(self): #TODO: add arguments to get only changes in a certain range
+        """get_changes provides a depth-first topologically sorted list of all
+        Changes in this Tree"""
+        L = []
+        stack = [-1]
+        while stack:
+            x = self._id2rec[stack.pop()].change
+            L.append(x)
+            if x.unique_id in self._parent2children:
+                stack.extend(r.change.unique_id 
+                                   for r in self._parent2children[x.unique_id])
+        return L
+    
+    def is_ready(self, c):
+        """Returns a boolean indicating whether a Change c may safely be added
+        to the Tree.  Specifically, it checks whether c.parent is already known."""
+        return c.parent in self._id2rec