Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTomeu Vizoso <tomeu@tomeuvizoso.net>2007-11-29 12:12:21 (GMT)
committer Tomeu Vizoso <tomeu@tomeuvizoso.net>2007-11-29 12:12:21 (GMT)
commit8dd37b6e966a4f328749637e73063bbf45617244 (patch)
treed85b4427c83ee9f738a9a9929107247f8ddf2eca
parentdbace3e768def38aa5fb4e328eb91c8932eccd49 (diff)
#5127 Index files from usb sticks in the idle callback.
-rw-r--r--src/olpc/datastore/backingstore.py146
1 files changed, 70 insertions, 76 deletions
diff --git a/src/olpc/datastore/backingstore.py b/src/olpc/datastore/backingstore.py
index 47f2ee4..12dc582 100644
--- a/src/olpc/datastore/backingstore.py
+++ b/src/olpc/datastore/backingstore.py
@@ -25,6 +25,7 @@ import urllib
import dbus
import xapian
+import gobject
from olpc.datastore.xapianindex import IndexManager
from olpc.datastore import bin_copy
@@ -177,7 +178,6 @@ class AsyncCopy:
logger.debug("AC: will copy %s -> %s (%d bytes)" % (self.src, self.dest, self.size))
self.tstart = time.time()
- import gobject
sid = gobject.idle_add(self._copy_block)
class FileBackingStore(BackingStore):
@@ -660,7 +660,7 @@ class InplaceFileBackingStore(FileBackingStore):
super(InplaceFileBackingStore, self).__init__(uri, **kwargs)
# use the original uri
self.uri = uri
- self.walker = None
+ self._walk_source = None
@staticmethod
def parse(uri):
@@ -694,76 +694,71 @@ class InplaceFileBackingStore(FileBackingStore):
# now map/update the existing data into the indexes
# but do it async
- self.walker = threading.Thread(target=self._walk)
- self._runWalker = True
- self.walker.setDaemon(True)
- self.walker.start()
-
- def _walk(self):
- # XXX: a version that checked xattr for uid would be simple
- # and faster
- # scan the uri for all non self.base files and update their
- # records in the db
+ files_to_check = []
for dirpath, dirname, filenames in os.walk(self.uri):
- try:
- # see if there is an entry for the filename
- if self.base in dirpath: continue
- if self.STORE_NAME in dirname:
- dirname.remove(self.STORE_NAME)
-
- # blacklist all the hidden directories
- if '/.' in dirpath: continue
-
- for fn in filenames:
- try:
- # give the thread a chance to exit
- if not self._runWalker: break
- # blacklist files
- # ignore conventionally hidden files
- if fn.startswith("."): continue
-
- source = os.path.join(dirpath, fn)
- relative = source[len(self.uri)+1:]
-
- result, count = self.indexmanager.search(dict(filename=relative))
- mime_type = gnomevfs.get_mime_type(urllib.quote(source))
- stat = os.stat(source)
- ctime = datetime.fromtimestamp(stat.st_ctime).isoformat()
- mtime = datetime.fromtimestamp(stat.st_mtime).isoformat()
- title = os.path.splitext(os.path.split(source)[1])[0]
- metadata = dict(filename=relative,
- mime_type=mime_type,
- ctime=ctime,
- mtime=mtime,
- title=title)
- if not count:
- # create a new record
- self.create(metadata, source)
- else:
- # update the object with the new content iif the
- # checksum is different
- # XXX: what if there is more than one? (shouldn't
- # happen)
-
- # FIXME This is throwing away all the entry metadata.
- # Disabled for trial-3. We are not doing indexing
- # anyway so it would just update the mtime which is
- # not that useful. Also the journal is currently
- # setting the mime type before saving the file making
- # the mtime check useless.
- #
- # content = result.next()
- # uid = content.id
- # saved_mtime = content.get_property('mtime')
- # if mtime != saved_mtime:
- # self.update(uid, metadata, source)
- pass
- except Exception, e:
- logging.exception('Error while processing %r: %r' % (fn, e))
- except Exception, e:
- logging.exception('Error while indexing mount point %r: %r' % (self.uri, e))
- self.indexmanager.flush()
- return
+ if self.base in dirpath: continue
+ if self.STORE_NAME in dirname:
+ dirname.remove(self.STORE_NAME)
+
+ # blacklist all the hidden directories
+ if '/.' in dirpath: continue
+
+ for fn in filenames:
+ # ignore conventionally hidden files
+ if fn.startswith("."):
+ continue
+ files_to_check.append((dirpath, fn))
+
+ self._walk_source = gobject.idle_add(self._walk, files_to_check)
+
+ def _walk(self, files_to_check):
+ dirpath, fn = files_to_check.pop()
+ logging.debug('InplaceFileBackingStore._walk(): %r' % fn)
+ try:
+ source = os.path.join(dirpath, fn)
+ relative = source[len(self.uri)+1:]
+
+ result, count = self.indexmanager.search(dict(filename=relative))
+ mime_type = gnomevfs.get_mime_type(source)
+ stat = os.stat(source)
+ ctime = datetime.fromtimestamp(stat.st_ctime).isoformat()
+ mtime = datetime.fromtimestamp(stat.st_mtime).isoformat()
+ title = os.path.splitext(os.path.split(source)[1])[0]
+ metadata = dict(filename=relative,
+ mime_type=mime_type,
+ ctime=ctime,
+ mtime=mtime,
+ title=title)
+ if not count:
+ # create a new record
+ self.create(metadata, source)
+ else:
+ # update the object with the new content iif the
+ # checksum is different
+ # XXX: what if there is more than one? (shouldn't
+ # happen)
+
+ # FIXME This is throwing away all the entry metadata.
+ # Disabled for trial-3. We are not doing indexing
+ # anyway so it would just update the mtime which is
+ # not that useful. Also the journal is currently
+ # setting the mime type before saving the file making
+ # the mtime check useless.
+ #
+ # content = result.next()
+ # uid = content.id
+ # saved_mtime = content.get_property('mtime')
+ # if mtime != saved_mtime:
+ # self.update(uid, metadata, source)
+ pass
+ except Exception, e:
+ logging.exception('Error while processing %r: %r' % (fn, e))
+
+ if files_to_check:
+ return True
+ else:
+ self._walk_source = None
+ return False
def _translatePath(self, uid):
try: content = self.indexmanager.get(uid)
@@ -898,12 +893,11 @@ class InplaceFileBackingStore(FileBackingStore):
os.unlink(path)
def stop(self):
- if self.walker and self.walker.isAlive():
- # XXX: just force the unmount, flush the index queue
- self._runWalker = False
+ if self._walk_source is not None:
+ gobject.source_remove(self._walk_source)
self.indexmanager.stop(force=True)
def complete_indexing(self):
- if self.walker and self.walker.isAlive():
- self.walker.join()
+ # TODO: Perhaps we should move the inplace indexing to be sync here?
self.indexmanager.complete_indexing()
+