Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
path: root/groupthink/stringtree.py
diff options
context:
space:
mode:
Diffstat (limited to 'groupthink/stringtree.py')
-rw-r--r--groupthink/stringtree.py555
1 files changed, 555 insertions, 0 deletions
diff --git a/groupthink/stringtree.py b/groupthink/stringtree.py
new file mode 100644
index 0000000..5701636
--- /dev/null
+++ b/groupthink/stringtree.py
@@ -0,0 +1,555 @@
+import random
+random.seed() #Works around some mysterious bug in the Journal or Rainbow that
+ #causes the random number generator to be seeded with the same
+ #constant value if a Journal entry is re-launched from the
+ #Journal.
+from collections import defaultdict
+import dbus # We do dbus (de)serialization in this file to minimize abstraction breakage
+import logging
+import aatree
+
+inf = float('inf')
+
+class Change:
+ """Each Change represents a chanage to the StringTree.
+ """
+ def __init__(self, unique_id, parent, edit):
+ """unique_id is a unique identifier for this Change
+ parent is the unique_id that is affected by this
+ Change. Parent unique_id are always associated with an Insertion edit.
+ edit is an Insertion, Deletion, or Removal. It's what is to be done to
+ the parent"""
+ self.unique_id = unique_id
+ self.parent = parent
+ self.edit = edit
+
+ def __repr__(self):
+ return "Change(%d, %d, %s)" % (self.unique_id, self.parent, str(self.edit))
+
+class Insertion:
+ """Represents the action of inserting a particular bit of text
+ """
+ def __init__(self, position, text):
+ """position is the point at which to insert text, in the unmodified
+ coordinates of the parent node (other insertions and deletions
+ to that node do not affect these coordinates)
+ text is the string to insert"""
+ self.position = position
+ self.text = text
+
+ def __repr__(self):
+ return "Insertion(%d, %s)" % (self.position, self.text)
+
+class Deletion:
+ """Represents the deletion of only those characters present in the parent in
+ this range. Characters that have been inserted into the parent by
+ an Insertion are not affected.
+ """
+ def __init__(self, position, length):
+ """position is the point, in the unmodified coordinates of the parent
+ node, at which to start deletion
+ length is an integer greater than 0 representing the number of
+ characters to delete"""
+ self.position = position
+ self.length = length
+
+ def __repr__(self):
+ return "Deletion(%d, %d)" % (self.position, self.length)
+
+class Removal:
+ """Represents the deletion of all characters ever inserted in this range.
+ Insertions at the endpoints are _not_ included in the Removal, and must
+ be Removed separately.
+ """
+ def __init__(self, position, length):
+ """position is the point at which to start Removal
+ length is the number of points in the unmodified parent coordinate to
+ the end of the Removal. Note that many more than length characters
+ can be removed if there are Insertions in this range."""
+ self.position = position
+ self.length = length
+
+ def __repr__(self):
+ return "Removal(%d, %d)" % (self.position, self.length)
+
+class Record:
+ """Each Record is used to store one Change inside the StringTree.
+ The purpose of a Record is contain both the Change itself, and any cached
+ information about the current effect of that Change.
+ """
+ def __init__(self, change, depth):
+ self.change = change
+ self.depth = depth
+
+ def __str__(self):
+ return "Record(%s, %d)" % (str(self.change), self.depth)
+
+ __repr__ = __str__
+
+def flatten(L):
+ o = []
+ for x in L:
+ if isinstance(x,list):
+ o.extend(flatten(x)) #recursive
+ else:
+ o.append(x)
+ return o
+
+def my_rand():
+ return random.getrandbits(63)
+
+def translator(c, pack):
+ if pack:
+ if isinstance(c.edit, Insertion):
+ return dbus.Struct((dbus.Int64(c.unique_id),
+ dbus.Int64(c.parent),
+ dbus.Int16(0),
+ dbus.Int64(c.edit.position),
+ dbus.UTF8String(c.edit.text)),
+ signature='xxnxs')
+ elif isinstance(c.edit, Deletion):
+ return dbus.Struct((dbus.Int64(c.unique_id),
+ dbus.Int64(c.parent),
+ dbus.Int16(1),
+ dbus.Int64(c.edit.position),
+ dbus.Int64(c.edit.length)),
+ signature='xxnxx')
+ elif isinstance(c.edit, Removal):
+ return dbus.Struct((dbus.Int64(c.unique_id),
+ dbus.Int64(c.parent),
+ dbus.Int16(2),
+ dbus.Int64(c.edit.position),
+ dbus.Int64(c.edit.length)),
+ signature='xxnxx')
+ else:
+ raise Unimplemented
+ else:
+ if c[2] == 0:
+ ed = Insertion(int(c[3]),str(c[4]))
+ elif c[2] == 1:
+ ed = Deletion(int(c[3]),int(c[4]))
+ elif c[2] == 2:
+ ed = Removal(int(c[3]),int(c[4]))
+ else:
+ raise "unknown type"
+ return Change(int(c[0]), int(c[1]), ed)
+
+class EagerHideList:
+ """EagerHideList provides a list with hidden elements. The standard index
+ considers only the visible elements, but the 'all' index considers all
+ elements. The standard index of an invisible element is considered to be
+ the index of the next visible element."""
+ def __init__(self):
+ self._sourcelist = []
+ self._poslist = []
+ self._posmap = {}
+ def __len__(self):
+ return len(self._sourcelist)
+ def __iter__(self):
+ return self._sourcelist.__iter__()
+ def __getitem__(self, s):
+ return self._sourcelist[s]
+ def index(self, item):
+ x = self._poslist[self._posmap[item]]
+ return x[2]
+ def hide(self, position, length):
+ """Given the position in _sourcelist, and the length of the deletion,
+ perform the deletion in the lists"""
+ pfirst = self._sourcelist[position]
+ plast = self._sourcelist[position+length-1]
+ ifirst = self._posmap[pfirst]
+ ilast = self._posmap[plast]
+ for i in xrange(ifirst, ilast+1):
+ L = self._poslist[i]
+ L[1] = False #No longer visible, if it was visible before
+ L[2] = position #collapse positions
+ for L in self._poslist[ilast+1:]:
+ L[2] -= length #move all subsequent character up by length
+
+ del self._sourcelist[position:(position+length)]
+ #self._check_invariants()
+ def getitem_all(self, s):
+ if isinstance(s, int):
+ return self._poslist[s][0]
+ else:
+ return [x[0] for x in self._poslist[s]]
+ def index_all(self, item):
+ return self._posmap[item]
+ def is_visible(self, i):
+ return self._poslist[i][1]
+ def is_visible_item(self, item):
+ return self._poslist[self._posmap[item]][1]
+ def insert_sequence_all(self, position, sequence, visibility):
+ """Insert sequence with visibility into the all-coordinates at position"""
+ if position > 0:
+ psource = self._poslist[position][2]
+ else:
+ psource = 0
+ length = len(sequence)
+ newlist = []
+ newlistsource = []
+ i = psource
+ for elt, viz in zip(sequence, visibility):
+ newlist.append([elt, viz, i])
+ if viz:
+ newlistsource.append(elt)
+ i += 1
+ self._poslist[position:position] = newlist
+ for i in xrange(position,position+length):
+ L = self._poslist[i]
+ self._posmap[L[0]] = i
+ num_viz = len(newlistsource)
+ for i in xrange(position+length,len(self._poslist)):
+ L = self._poslist[i]
+ L[2] += num_viz
+ self._posmap[L[0]] = i
+ self._sourcelist[psource:psource] = newlistsource
+ #self._check_invariants()
+ def insert_sequence_leftof(self, target, sequence, visibility):
+ self.insert_sequence_all(self._posmap[target], sequence, visibility)
+
+ def _check_invariants(self):
+ assert len(self._posmap) == len(self._poslist)
+ for i in xrange(len(self._poslist)):
+ assert self._posmap[self._poslist[i][0]] == i
+ if self._poslist[i][1]:
+ assert self._sourcelist[self._poslist[i][2]] == self._poslist[i][0]
+ if i > 0:
+ if self._poslist[i-1][1]:
+ assert self._poslist[i-1][2] + 1 == self._poslist[i][2]
+ else:
+ assert self._poslist[i-1][2] == self._poslist[i][2]
+
+class SimpleStringTree:
+ """SimpleStringTree is a StringTree that supports only Insertions and
+ Deletions. Handling References, while valuable, has proven quite difficult,
+ and so will not be addressed by this data structure.
+
+ Code for handling Removals will be left in for the moment, since it is
+ easy enough, even though it is not presently used."""
+
+ def __init__(self, initstring=""):
+ self.ROOT = Record(Change(-1, -1, Insertion(0, "")), 0)
+ self._id2rec = dict() #unique_id: Record(change) | change.unique_id = unique_id
+ self._id2rec[-1] = self.ROOT
+ self._parent2children = defaultdict(set) # unique_id: {Record(change) | change.parent == unique_id}
+ self._cursor = 0
+ self._listing = aatree.AATreeHideList()
+ self._listing.insert_sequence_all(0,[(-1,0)],[False])
+ if initstring:
+ self.insert(initstring, 0)
+
+ def __repr__(self):
+ return "\n".join((str(v) for v in self._id2rec.values()))
+
+ def getvalue(self, r = None):
+ if r is None:
+ r = self.ROOT
+ s = "".join(self._id2rec[x[0]].change.edit.text[x[1]] for x in self._listing)
+ return s
+
+ def next(self):
+ raise
+
+ def flush(self):
+ # This could be used to implement lazy evaluation of input by not
+ # re-evaluating the string until flush (or read?)
+ pass
+
+ def close(self):
+ raise
+
+ def read(self, size=float('inf')):
+ #Efficiency: This method should use size to avoid calling getvalue and rendering the entire string
+ s = self.getvalue()
+ outpoint = min(len(s), self._cursor + size)
+ inpoint = self._cursor
+ self._cursor = outpoint
+ return s[inpoint:outpoint]
+
+ def readline(self, size=float('inf')):
+ #Efficiency: This method should use size to avoid rendering the whole string
+ s = self.getvalue()
+ outpoint = min(len(s), self._cursor + size)
+ inpoint = self._cursor
+ i = s.find("\n", inpoint, outpoint)
+ if i == -1 or i >= outpoint:
+ self._cursor = outpoint
+ return s[inpoint:outpoint]
+ else:
+ self._cursor = i + 1
+ return s[inpoint:(i+1)]
+
+ def readlines(self, sizehint=None):
+ #Efficiency: use sizehint
+ s = self.getvalue()
+ t = s[self._cursor:]
+ self._cursor = len(s)
+ return t.splitlines(True)
+
+ def seek(self, offset, whence=0):
+ if whence == 0:
+ self._cursor = offset
+ elif whence == 1:
+ self._cursor += offset
+ elif whence == 2:
+ self._cursor = len(self._listing) + offset
+
+ def tell(self):
+ return self._cursor
+
+ def truncate(self, size=None):
+ if size is None:
+ size = self._cursor
+ return self.delete(size, len(self._listing) - size)
+
+ def write(self, text):
+ L = min(len(self._listing) - self._cursor, len(text))
+ changelist = []
+ if L > 0:
+ changelist.extend(self.delete(self._cursor, L))
+ changelist.extend(self.insert(text))
+ return changelist
+
+ def writelines(self, sequence):
+ s = "".join(sequence)
+ return self.write(s)
+
+ # Non-filelike text editing methods
+
+ def insert(self, text, k=None):
+ if k is None:
+ k = self._cursor
+
+ if len(self._listing) == 0:
+ r = self.ROOT
+ uid = -1
+ inspoint = 0
+ elif k == 0:
+ (uid, inspoint) = self._listing[k]
+ r = self._id2rec[uid]
+ elif k < len(self._listing):
+ # When not inserting at the endpoints, we have to be sure to
+ # check if we are at the boundary between a parent and one of its
+ # descendants. If so, we must make our insertion in the descendant,
+ # not the parent, because the insertions would "conflict" (occur at
+ # the same location) in the parent, which would produce an ordering
+ # ambiguity, which will resolve in our favor only 50% of the time.
+ pL, pR = self._listing[(k-1):(k+1)]
+ (uidL, inspointL) = pL
+ (uidR, inspointR) = pR
+ rR = self._id2rec[uidR]
+ if uidL == uidR: #There's no boundary here at all (at least not one
+ # of any importance to us. Therefore, we have to insert to the
+ # left of the character at k, as usual.
+ r = rR
+ uid = uidR
+ inspoint = inspointR
+ else: #There's a boundary of some sort here. We always choose to
+ # insert at the deeper node (breaking left in case of a tie).
+ # (In the case that neither node is the ancestor of the other,
+ # either choice would be acceptable, regardless of depth. This
+ # logic is therefore acceptable, and has the advantage of being
+ # simple and fast.)
+ rL = self._id2rec[uidL]
+ if rR.depth > rL.depth:
+ r = rR
+ uid = uidR
+ inspoint = inspointR
+ else:
+ r = rL
+ uid = uidL
+ inspoint = inspointL + 1
+ elif k == len(self._listing):
+ (uid,i) = self._listing[k-1]
+ r = self._id2rec[uid]
+ inspoint = i + 1
+ else:
+ raise
+
+ e = Insertion(inspoint, text)
+ c = Change(my_rand(), r.change.unique_id, e)
+ self._add_change_treeonly(c)
+ target = (uid, inspoint)
+ self._insert_listonly(c.unique_id, target, len(text))
+ self._cursor = k + len(text)
+ return [c]
+
+ def delete(self, k, n):
+ """Starting at a point k (0-indexed), delete n characters"""
+ if k + n > len(self._listing):
+ raise
+ p = self._listing[k]
+ contigs = [[p[0],p[1],p[1]]]
+ for (uid, index) in self._listing[(k+1):(k+n)]:
+ #This logic produces deletions that span any missing chunks. This
+ #produces a smaller number of deletions than making sure that they
+ #are actually "contiguous", but it might interact badly with a
+ #hypothetical undo system.
+ if contigs[-1][0] == uid:
+ contigs[-1][2] = index
+ else:
+ contigs.append([uid,index,index])
+ changelist = [Change(my_rand(), c[0], Deletion(c[1],1 + c[2]-c[1])) for c in contigs]
+ for c in changelist:
+ self._add_change_treeonly(c)
+ self._delete_listonly(k,n)
+ return changelist
+
+ def get_range(self, rec, point, m):
+ todo = [(rec, point, m, None)] # None is a dummy value since p is unused
+ ranges = []
+ while len(todo) > 0:
+ (rec, point, m, p) = todo[0]
+ h = self._range_helper(point, m)
+ self._step(h, rec)
+ if h.outpoint is not None:
+ ranges.append((rec, h.point_parent, h.outpoint - h.point_parent))
+ #print rec, h.point_parent, h.outpoint - h.point_parent
+ todo.extend(h.todo)
+ #print todo
+ del todo[0]
+ return ranges
+
+ def move(self, rempoint, n, inspoint):
+ """In StringTree, move() should coherently copy a section of text,
+ such that any conflicting edits appear in the new location, not the old.
+ In SimpleStringTree, this is not possible, so move() just falls back to
+ Deletion and Insertion."""
+ self.seek(rempoint)
+ t = self.read(n)
+
+ if rempoint > inspoint:
+ L = self.delete(rempoint,n)
+ L.extend(self.insert(t, inspoint))
+ else:
+ L = self.insert(t, inspoint)
+ L.extend(self.delete(rempoint,n))
+ return L
+
+ # Patch management methods
+
+ def add_change(self, c):
+ if c.unique_id in self._id2rec:
+ return []
+ if isinstance(c.edit, Insertion):
+ p = self._effective_parent(c.unique_id, c.parent, c.edit.position)
+ i = self._listing.index(p)
+ d = len(c.edit.text)
+ self._insert_listonly(c.unique_id, p, d)
+ flist = [Insertion(i, c.edit.text)]
+ elif isinstance(c.edit, Deletion):
+ flist = []
+ start = None
+ end = None
+ for i in xrange(c.edit.position,c.edit.position + c.edit.length):
+ p = (c.parent, i)
+ if self._listing.is_visible_item(p):
+ q = self._listing.index(p)
+ if end == q:
+ end += 1
+ else:
+ if end is not None:
+ n = end - start
+ flist.append(Deletion(start,n))
+ self._delete_listonly(start,n)
+ start = q
+ end = start + 1
+ if end is not None: # the last contig won't be handled by the loop
+ n = end - start
+ flist.append(Deletion(start,n))
+ self._delete_listonly(start,n)
+ else:
+ raise
+ self._add_change_treeonly(c)
+ return flist
+
+ def _effective_parent(self, uid, parentid, position):
+ """The effective parent of an insertion is defined as the (uid, loc) pair
+ that causes it to be inserted in the right location in the string. This
+ is only relevant in the event of a conflict, in which case the conflict
+ edits are required to be ordered from least uid to greatest uid. The
+ effective parent of a conflicted edit, then, is either the original pair,
+ or (u_next, 0), where u_next is the uid of the sibling directly to the right
+ of the input uid. That is to say, it is the least u greater than uid
+ that has the same position."""
+ if parentid in self._parent2children:
+ u_next = None
+ for r in self._parent2children[parentid]:
+ u = r.change.unique_id
+ p = r.change.edit.position
+ if (p == position) and isinstance(r.change.edit, Insertion) and (u > uid):
+ if (u_next is None) or (u < u_next):
+ u_next = u
+ if u_next is not None:
+ return (u_next, 0)
+ return (parentid, position)
+
+ def _delete_listonly(self, position, length):
+ """Given the position in _sourcelist, and the length of the deletion,
+ perform the deletion in the lists"""
+ self._listing.hide(position,length)
+
+ def _insert_listonly(self, uid, target, length):
+ """Make a new insertion into the lists with uid and length at position
+ in _poslist"""
+ elts = [(uid,i) for i in xrange(length+1)]
+ visibility = [True] * length
+ visibility.append(False)
+ self._listing.insert_sequence_leftof(target, elts, visibility)
+
+ def _add_change_treeonly(self, c):
+ if c.unique_id in self._id2rec:
+ return
+ d = self._id2rec[c.parent].depth + 1
+ r = Record(c,d)
+ self._id2rec[c.unique_id] = r
+ if c.parent not in self._parent2children:
+ self._parent2children[c.parent] = set()
+ self._parent2children[c.parent].add(r)
+
+ def get_insert(self, k, n = 0, parent=None):
+ #FIXME: This method could be useful, but it no longer works
+ """get_insert finds and returns the youngest insertion containing positions
+ k to k + n in the total coordinates of parent. If parent is unspecified,
+ then it is taken to be the root, meaning the coordinates in question
+ are the current document coordinates.
+
+ The return value is a tuple (rec, k_unmodified, k_modified), meaning
+ the record containing k and k + n, the position of k in rec's
+ unmodified coordinates, and the position of k in rec's
+ modified coordinates.
+
+ "containing" is defined so that a record with n characters
+ (labeled 0...n-1) can still be returned as containing k...k+n. In
+ other words, each Insertion contains both endpoints. However, an
+ Insertion that has been totally deleted is ignored entirely."""
+ if parent is None:
+ parent = self.ROOT
+
+ h = self._get_insert_helper(k, n)
+ self._step(h, parent)
+ while h.child is not None:
+ parent = h.child
+ h = self._get_insert_helper(h.child_k, n)
+ self._step(h, parent)
+
+ return (parent, h.k_in_parent, h.k)
+
+ def get_changes(self): #TODO: add arguments to get only changes in a certain range
+ """get_changes provides a depth-first topologically sorted list of all
+ Changes in this Tree"""
+ L = []
+ stack = [-1]
+ while stack:
+ x = self._id2rec[stack.pop()].change
+ L.append(x)
+ if x.unique_id in self._parent2children:
+ stack.extend(r.change.unique_id
+ for r in self._parent2children[x.unique_id])
+ return L
+
+ def is_ready(self, c):
+ """Returns a boolean indicating whether a Change c may safely be added
+ to the Tree. Specifically, it checks whether c.parent is already known."""
+ return c.parent in self._id2rec