diff options
author | Marco Pesenti Gritti <mpg@redhat.com> | 2007-10-23 23:39:29 (GMT) |
---|---|---|
committer | Marco Pesenti Gritti <mpg@redhat.com> | 2007-10-23 23:39:29 (GMT) |
commit | 7072db5ffd9dd8812523fee940a95f73f73ad753 (patch) | |
tree | e797529122ab16115b20bac68fa1caa01eb791f8 | |
parent | 51ed7d53e4f8444d44584ca9e9163bdd2255accd (diff) | |
parent | 1cea0bceb21de7a52325778ff70540ec4f7dcfbb (diff) |
Merge branch 'master' of git+ssh://dev.laptop.org/git/projects/datastore
Conflicts:
NEWS
-rw-r--r-- | NEWS | 1 | ||||
-rw-r--r-- | src/olpc/datastore/backingstore.py | 112 | ||||
-rw-r--r-- | src/olpc/datastore/datastore.py | 19 | ||||
-rw-r--r-- | src/olpc/datastore/model.py | 1 | ||||
-rw-r--r-- | src/olpc/datastore/xapianindex.py | 4 |
5 files changed, 81 insertions, 56 deletions
@@ -1,4 +1,5 @@ * #3768 Fix copy of clipboard objects to usb stick (marco) +* #4235: Keep indexing an usb stick after an error in a single file. (tomeu) Snapshot 89ae26ced4 diff --git a/src/olpc/datastore/backingstore.py b/src/olpc/datastore/backingstore.py index b76b587..d37c553 100644 --- a/src/olpc/datastore/backingstore.py +++ b/src/olpc/datastore/backingstore.py @@ -583,6 +583,9 @@ class FileBackingStore(BackingStore): if not limit: limit = 4069 return self.indexmanager.search(query, start_index=offset, end_index=limit, order_by=order_by) + def ids(self): + return self.indexmanager.get_all_ids() + def stop(self): self.indexmanager.stop() @@ -654,58 +657,63 @@ class InplaceFileBackingStore(FileBackingStore): # scan the uri for all non self.base files and update their # records in the db for dirpath, dirname, filenames in os.walk(self.uri): - # see if there is an entry for the filename - if self.base in dirpath: continue - if self.STORE_NAME in dirname: - dirname.remove(self.STORE_NAME) - - # blacklist all the hidden directories - if '/.' in dirpath: continue - - for fn in filenames: - # give the thread a chance to exit - if not self._runWalker: break - # blacklist files - # ignore conventionally hidden files - if fn.startswith("."): continue - - source = os.path.join(dirpath, fn) - relative = source[len(self.uri)+1:] - - result, count = self.indexmanager.search(dict(filename=relative)) - mime_type = gnomevfs.get_mime_type(source) - stat = os.stat(source) - ctime = datetime.fromtimestamp(stat.st_ctime).isoformat() - mtime = datetime.fromtimestamp(stat.st_mtime).isoformat() - title = os.path.splitext(os.path.split(source)[1])[0] - metadata = dict(filename=relative, - mime_type=mime_type, - ctime=ctime, - mtime=mtime, - title=title) - if not count: - # create a new record - self.create(metadata, source) - else: - # update the object with the new content iif the - # checksum is different - # XXX: what if there is more than one? (shouldn't - # happen) - - # FIXME This is throwing away all the entry metadata. - # Disabled for trial-3. We are not doing indexing - # anyway so it would just update the mtime which is - # not that useful. Also the journal is currently - # setting the mime type before saving the file making - # the mtime check useless. - # - # content = result.next() - # uid = content.id - # saved_mtime = content.get_property('mtime') - # if mtime != saved_mtime: - # self.update(uid, metadata, source) - pass - + try: + # see if there is an entry for the filename + if self.base in dirpath: continue + if self.STORE_NAME in dirname: + dirname.remove(self.STORE_NAME) + + # blacklist all the hidden directories + if '/.' in dirpath: continue + + for fn in filenames: + try: + # give the thread a chance to exit + if not self._runWalker: break + # blacklist files + # ignore conventionally hidden files + if fn.startswith("."): continue + + source = os.path.join(dirpath, fn) + relative = source[len(self.uri)+1:] + + result, count = self.indexmanager.search(dict(filename=relative)) + mime_type = gnomevfs.get_mime_type(source) + stat = os.stat(source) + ctime = datetime.fromtimestamp(stat.st_ctime).isoformat() + mtime = datetime.fromtimestamp(stat.st_mtime).isoformat() + title = os.path.splitext(os.path.split(source)[1])[0] + metadata = dict(filename=relative, + mime_type=mime_type, + ctime=ctime, + mtime=mtime, + title=title) + if not count: + # create a new record + self.create(metadata, source) + else: + # update the object with the new content iif the + # checksum is different + # XXX: what if there is more than one? (shouldn't + # happen) + + # FIXME This is throwing away all the entry metadata. + # Disabled for trial-3. We are not doing indexing + # anyway so it would just update the mtime which is + # not that useful. Also the journal is currently + # setting the mime type before saving the file making + # the mtime check useless. + # + # content = result.next() + # uid = content.id + # saved_mtime = content.get_property('mtime') + # if mtime != saved_mtime: + # self.update(uid, metadata, source) + pass + except Exception, e: + logging.exception('Error while processing %r: %r' % (fn, e)) + except Exception, e: + logging.exception('Error while indexing mount point %r: %r' % (self.uri, e)) self.indexmanager.flush() return diff --git a/src/olpc/datastore/datastore.py b/src/olpc/datastore/datastore.py index b5d67b9..8ed843a 100644 --- a/src/olpc/datastore/datastore.py +++ b/src/olpc/datastore/datastore.py @@ -253,11 +253,23 @@ class DataStore(dbus.service.Object): d[hit.id] = hit return d, len(d), len(results) + + @dbus.service.method(DS_DBUS_INTERFACE, + in_signature='s', + out_signature='as') + def ids(self, mountpoint=None): + """return all the ids of objects living on a given + mountpoint""" + if str(mountpoint) == "": mountpoint=None + mp = self._resolveMountpoint(mountpoint) + return mp.ids() + + #@utils.sanitize_dbus @dbus.service.method(DS_DBUS_INTERFACE, - in_signature='a{sv}as', + in_signature='a{sv}', out_signature='aa{sv}u') - def find(self, query=None, properties=None, **kwargs): + def find(self, query=None, **kwargs): """find(query) takes a dict of parameters and returns data in the following format @@ -294,7 +306,8 @@ class DataStore(dbus.service.Object): else: if 'query' not in kwargs: kwargs['query'] = query - + + properties = kwargs.pop("properties", []) include_files = kwargs.pop('include_files', False) order_by = kwargs.pop('order_by', []) diff --git a/src/olpc/datastore/model.py b/src/olpc/datastore/model.py index 773fdcf..9b00e2c 100644 --- a/src/olpc/datastore/model.py +++ b/src/olpc/datastore/model.py @@ -252,6 +252,7 @@ class Content(object): ext = mime.get_primary_extension(mt) # .ksh is a strange ext for plain text if ext and ext == '.ksh': ext = '.txt' + if ext and ext == '.jpe': ext = '.jpg' # fixes #3163 if ext: return None, ext return None, None diff --git a/src/olpc/datastore/xapianindex.py b/src/olpc/datastore/xapianindex.py index c1217d5..82e8644 100644 --- a/src/olpc/datastore/xapianindex.py +++ b/src/olpc/datastore/xapianindex.py @@ -431,7 +431,9 @@ class IndexManager(object): # map the result set to model.Content items return ContentMappingIter(results, self.backingstore, self.datamodel), count - + + def get_all_ids(self): + return [ti.term[1:] for ti in self.read_index._index.allterms('Q')] def get_uniquevaluesfor(self, property): # XXX: this is very sketchy code |