diff options
author | Sascha Silbe <sascha@silbe.org> | 2009-07-10 14:21:08 (GMT) |
---|---|---|
committer | Sascha Silbe <sascha@silbe.org> | 2009-07-10 14:21:08 (GMT) |
commit | 5ee65db4ab134a90e7d7de47531383d6fa8f0a1a (patch) | |
tree | 74e49415a0e16980cd32a14194703c33467e120d /src | |
parent | f6eb91101a3d5afcd8ab96f1f44c50f6c08b656b (diff) |
change names to match new API: uid -> tree_id, vid -> version_id, activity -> bundle_id
Diffstat (limited to 'src')
-rw-r--r-- | src/carquinyol/datastore.py | 154 | ||||
-rw-r--r-- | src/carquinyol/filestore.py | 30 | ||||
-rw-r--r-- | src/carquinyol/indexstore.py | 82 | ||||
-rw-r--r-- | src/carquinyol/layoutmanager.py | 10 | ||||
-rw-r--r-- | src/carquinyol/metadatastore.py | 24 | ||||
-rw-r--r-- | src/carquinyol/migration.py | 76 | ||||
-rw-r--r-- | src/carquinyol/optimizer.py | 42 |
7 files changed, 211 insertions, 207 deletions
diff --git a/src/carquinyol/datastore.py b/src/carquinyol/datastore.py index 6acdd44..8603af3 100644 --- a/src/carquinyol/datastore.py +++ b/src/carquinyol/datastore.py @@ -86,27 +86,27 @@ class DataStore(dbus.service.Object): self._optimizer = Optimizer(self._file_store, self._metadata_store) def _rebuild_index(self): - uvids = layoutmanager.get_instance().find_all() - logging.debug('Going to update the index with uvids %r' % uvids) - gobject.idle_add(lambda: self.__rebuild_index_cb(uvids), + tvids = layoutmanager.get_instance().find_all() + logging.debug('Going to update the index with tvids %r' % tvids) + gobject.idle_add(lambda: self.__rebuild_index_cb(tvids), priority=gobject.PRIORITY_LOW) - def __rebuild_index_cb(self, uvids): - if uvids: - (uid,vid) = uvids.pop() + def __rebuild_index_cb(self, tvids): + if tvids: + (tree_id,version_id) = tvids.pop() logging.debug('Updating entry (%r,%r) in index. %d to go.' % \ - (uid, vid, len(uvids))) + (tree_id, version_id, len(tvids))) - if not self._index_store.contains(uid,vid): + if not self._index_store.contains(tree_id,version_id): try: - props = self._metadata_store.retrieve(uid,vid) - self._index_store.store(uid, vid, props) + props = self._metadata_store.retrieve(tree_id,version_id) + self._index_store.store(tree_id, version_id, props) except Exception: logging.error('Error processing (%r,%r)\n%s.' \ - % (uid, vid, traceback.format_exc())) + % (tree_id, version_id, traceback.format_exc())) - if not uvids: + if not tvids: logging.debug('Finished updating index.') layoutmanager.get_instance().index_updated = True return False @@ -114,15 +114,15 @@ class DataStore(dbus.service.Object): return True @trace() - def _create_completion_cb(self, async_cb, async_err_cb, uid, vid, exc=None): + def _create_completion_cb(self, async_cb, async_err_cb, tree_id, version_id, exc=None): if exc is not None: async_err_cb(exc) return - self.Created(uid, vid) - self._optimizer.optimize(uid, vid) - logger.debug("created (%s,%s)" % (uid,vid)) - async_cb(uid,vid) + self.Created(tree_id, version_id) + self._optimizer.optimize(tree_id, version_id) + logger.debug("created (%s,%s)" % (tree_id,version_id)) + async_cb(tree_id,version_id) @dbus.service.method(DS_DBUS_INTERFACE, in_signature='a{sv}sb', @@ -131,37 +131,37 @@ class DataStore(dbus.service.Object): byte_arrays=True) def create(self, props, file_path, transfer_ownership, async_cb, async_err_cb): - uid = str(uuid.uuid4()) - vid = str(uuid.uuid4()) # use fake for now - logging.debug('datastore.create %r %r' % (uid, vid)) + tree_id = str(uuid.uuid4()) + version_id = str(uuid.uuid4()) # use fake for now + logging.debug('datastore.create %r %r' % (tree_id, version_id)) if not props.get('timestamp', ''): props['timestamp'] = int(time.time()) - self._metadata_store.store(uid, vid, props) - self._index_store.store(uid, vid, props) - self._file_store.store(uid, vid, file_path, transfer_ownership, + self._metadata_store.store(tree_id, version_id, props) + self._index_store.store(tree_id, version_id, props) + self._file_store.store(tree_id, version_id, file_path, transfer_ownership, lambda *args: self._create_completion_cb(async_cb, async_err_cb, - uid, - vid, + tree_id, + version_id, *args)) - return (uid, vid) + return (tree_id, version_id) @dbus.service.signal(DS_DBUS_INTERFACE, signature="ss") - def Created(self, uid, vid): + def Created(self, tree_id, version_id): pass @trace() - def _update_completion_cb(self, async_cb, async_err_cb, uid, vid, exc=None): + def _update_completion_cb(self, async_cb, async_err_cb, tree_id, version_id, exc=None): if exc is not None: async_err_cb(exc) return - self.Updated(uid,vid) - self._optimizer.optimize(uid,vid) - logger.debug("updated %s %s" % (uid, vid)) - async_cb(uid, vid) + self.Updated(tree_id,version_id) + self._optimizer.optimize(tree_id,version_id) + logger.debug("updated %s %s" % (tree_id, version_id)) + async_cb(tree_id, version_id) @dbus.service.method(DS_DBUS_INTERFACE, in_signature='ssa{sv}sb', @@ -169,7 +169,7 @@ class DataStore(dbus.service.Object): async_callbacks=('async_cb', 'async_err_cb'), byte_arrays=True) @trace(skip_args=[3], skip_kwargs=["props"]) # "preview" metadata will clutter logfile otherwise - def update(self, uid, vid, props, file_path, transfer_ownership, + def update(self, tree_id, version_id, props, file_path, transfer_ownership, async_cb, async_err_cb): if not props.get('timestamp', ''): @@ -178,24 +178,24 @@ class DataStore(dbus.service.Object): # TODO: create branch if required (inside some abstraction layer) if file_path : # only for data updates - vid = str(uuid.uuid4()) # use fake for now + version_id = str(uuid.uuid4()) # use fake for now - self._metadata_store.store(uid, vid, props) - self._index_store.store(uid, vid, props) + self._metadata_store.store(tree_id, version_id, props) + self._index_store.store(tree_id, version_id, props) -# if os.path.exists(self._file_store.get_file_path(uid, vid)) and \ +# if os.path.exists(self._file_store.get_file_path(tree_id, version_id)) and \ # (not file_path or os.path.exists(file_path)): -# self._optimizer.remove(uid, vid) - self._file_store.store(uid, vid, file_path, transfer_ownership, +# self._optimizer.remove(tree_id, version_id) + self._file_store.store(tree_id, version_id, file_path, transfer_ownership, lambda *args: self._update_completion_cb(async_cb, async_err_cb, - uid, - vid, + tree_id, + version_id, *args)) - return (uid, vid) + return (tree_id, version_id) @dbus.service.signal(DS_DBUS_INTERFACE, signature="ss") - def Updated(self, uid, vid): + def Updated(self, tree_id, version_id): pass @dbus.service.method(DS_DBUS_INTERFACE, @@ -205,10 +205,10 @@ class DataStore(dbus.service.Object): def find(self, query, properties): t = time.time() - uvids = None + tvids = None if layoutmanager.get_instance().index_updated: try: - uvids, count = self._index_store.find(query) + tvids, count = self._index_store.find(query) except Exception: logging.error('Failed to query index, will rebuild\n%s' \ % traceback.format_exc()) @@ -218,29 +218,29 @@ class DataStore(dbus.service.Object): self._index_store.open_index() self._rebuild_index() - if uvids is None : + if tvids is None : logging.warning('Index updating, returning all entries') - uvids = layoutmanager.get_instance().find_all() + tvids = layoutmanager.get_instance().find_all() if not query.get('all_versions', False) : # only return latest version for each entry - uids_vtime = {} - for (uid, vid) in uvids : - uids_vtime.setdefault(uid, []).append((vid, self._metadata_store.retrieve(uid, vid, 'timestamp'))) + tids_vtime = {} + for (tree_id, version_id) in tvids : + tids_vtime.setdefault(tree_id, []).append((version_id, self._metadata_store.retrieve(tree_id, version_id, 'timestamp'))) - uvids = [(uid, sorted(candidates, key=lambda e: e[1], reverse=True)[0][0]) - for (uid, candidates) in uids_vtime.items()] + tvids = [(tree_id, sorted(candidates, key=lambda e: e[1], reverse=True)[0][0]) + for (tree_id, candidates) in tids_vtime.items()] - count = len(uvids) + count = len(tvids) offset = query.get('offset', 0) limit = query.get('limit', MAX_QUERY_LIMIT) - uvids = uvids[offset:offset + limit] + tvids = tvids[offset:offset + limit] -# logger.debug('uvids=%r' % (uvids,)) +# logger.debug('tvids=%r' % (tvids,)) entries = [] - for (uid,vid) in uvids: - metadata = self._metadata_store.retrieve(uid, vid, properties) + for (tree_id,version_id) in tvids: + metadata = self._metadata_store.retrieve(tree_id, version_id, properties) entries.append(metadata) logger.debug('find(): %r' % (time.time() - t)) @@ -253,13 +253,13 @@ class DataStore(dbus.service.Object): out_signature='s', sender_keyword='sender') @trace() - def get_filename(self, uid, vid, sender=None): + def get_filename(self, tree_id, version_id, sender=None): user_id = dbus.Bus().get_unix_user(sender) - extension = self._get_extension(uid,vid) - return self._file_store.retrieve(uid, vid, user_id, extension) + extension = self._get_extension(tree_id,version_id) + return self._file_store.retrieve(tree_id, version_id, user_id, extension) - def _get_extension(self, uid, vid): - mime_type = self._metadata_store.get_property(uid, vid, 'mime_type') + def _get_extension(self, tree_id, version_id): + mime_type = self._metadata_store.get_property(tree_id, version_id, 'mime_type') if mime_type is None or not mime_type: return '' return mime.get_primary_extension(mime_type) @@ -268,20 +268,20 @@ class DataStore(dbus.service.Object): in_signature='ss', out_signature='a{sv}') @trace() - def get_properties(self, uid, vid): - metadata = self._metadata_store.retrieve(uid,vid) + def get_properties(self, tree_id, version_id): + metadata = self._metadata_store.retrieve(tree_id,version_id) return metadata @dbus.service.method(DS_DBUS_INTERFACE, in_signature='sa{sv}', out_signature='as') def get_uniquevaluesfor(self, propertyname, query=None): - if propertyname != 'activity': - raise ValueError('Only ''activity'' is a supported property name') + if propertyname != 'bundle_id': + raise ValueError('Only ''bundle_id'' is a supported property name') if query: raise ValueError('The query parameter is not supported') if layoutmanager.get_instance().index_updated: - return self._index_store.get_activities() + return self._index_store.get_bundle_ids() else: logging.warning('Index updating, returning an empty list') return [] @@ -289,22 +289,22 @@ class DataStore(dbus.service.Object): @dbus.service.method(DS_DBUS_INTERFACE, in_signature='ss', out_signature='') - def delete(self, uid, vid): - # TODO: vid=None/'' => remove all versions - self._optimizer.remove(uid, vid) + def delete(self, tree_id, version_id): + # TODO: version_id=None/'' => remove all versions + self._optimizer.remove(tree_id, version_id) - self._index_store.delete(uid, vid) - self._file_store.delete(uid, vid) - self._metadata_store.delete(uid, vid) + self._index_store.delete(tree_id, version_id) + self._file_store.delete(tree_id, version_id) + self._metadata_store.delete(tree_id, version_id) - entry_path = layoutmanager.get_instance().get_entry_path(uid, vid) + entry_path = layoutmanager.get_instance().get_entry_path(tree_id, version_id) os.removedirs(entry_path) - self.Deleted(uid, vid) - logger.debug("deleted (%r,%r)" % (uid,vid)) + self.Deleted(tree_id, version_id) + logger.debug("deleted (%r,%r)" % (tree_id,version_id)) @dbus.service.signal(DS_DBUS_INTERFACE, signature="ss") - def Deleted(self, uid, vid): + def Deleted(self, tree_id, version_id): pass def stop(self): diff --git a/src/carquinyol/filestore.py b/src/carquinyol/filestore.py index 7a4591c..edf06a6 100644 --- a/src/carquinyol/filestore.py +++ b/src/carquinyol/filestore.py @@ -29,11 +29,11 @@ class FileStore(object): # TODO: add protection against store and retrieve operations on entries # that are being processed async. - def store(self, uid, vid, file_path, transfer_ownership, completion_cb): + def store(self, tree_id, version_id, file_path, transfer_ownership, completion_cb): """Store a file for a given entry. """ - dir_path = layoutmanager.get_instance().get_entry_path(uid, vid) + dir_path = layoutmanager.get_instance().get_entry_path(tree_id, version_id) if not os.path.exists(dir_path): os.makedirs(dir_path) @@ -75,15 +75,15 @@ class FileStore(object): async_copy = AsyncCopy(file_path, destination_path, completion_cb) async_copy.start() - def retrieve(self, uid, vid, user_id, extension): + def retrieve(self, tree_id, version_id, user_id, extension): """Place the file associated to a given entry into a directory where the user can read it. The caller is reponsible for deleting this file. """ - dir_path = layoutmanager.get_instance().get_entry_path(uid, vid) + dir_path = layoutmanager.get_instance().get_entry_path(tree_id, version_id) file_path = os.path.join(dir_path, 'data') if not os.path.exists(file_path): - logging.debug('Entry (%r,%r) doesnt have any file' % (uid,vid)) + logging.debug('Entry (%r,%r) doesnt have any file' % (tree_id,version_id)) return '' use_instance_dir = os.path.exists('/etc/olpc-security') and \ @@ -105,19 +105,19 @@ class FileStore(object): elif extension: extension = '.' + extension - destination_path = os.path.join(destination_dir, "%s-%s%s" % (uid, vid, extension)) + destination_path = os.path.join(destination_dir, "%s-%s%s" % (tree_id, version_id, extension)) attempt = 1 while os.path.exists(destination_path): if attempt > 10: - fd_, destination_path = tempfile.mkstemp(prefix="%s-%s" % (uid,vid), + fd_, destination_path = tempfile.mkstemp(prefix="%s-%s" % (tree_id,version_id), suffix=extension, dir=destination_dir) del fd_ os.unlink(destination_path) break else: - file_name = '%s-%s_%s%s' % (uid, vid, attempt, extension) + file_name = '%s-%s_%s%s' % (tree_id, version_id, attempt, extension) destination_path = os.path.join(destination_dir, file_name) attempt += 1 @@ -141,25 +141,25 @@ class FileStore(object): return destination_path - def get_file_path(self, uid, vid): - dir_path = layoutmanager.get_instance().get_entry_path(uid, vid) + def get_file_path(self, tree_id, version_id): + dir_path = layoutmanager.get_instance().get_entry_path(tree_id, version_id) return os.path.join(dir_path, 'data') - def delete(self, uid, vid): + def delete(self, tree_id, version_id): """Remove the file associated to a given entry. """ - dir_path = layoutmanager.get_instance().get_entry_path(uid, vid) + dir_path = layoutmanager.get_instance().get_entry_path(tree_id, version_id) file_path = os.path.join(dir_path, 'data') if os.path.exists(file_path): os.remove(file_path) - def hard_link_entry(self, new_uid, new_vid, existing_uid, existing_vid): + def hard_link_entry(self, new_tree_id, new_version_id, existing_tree_id, existing_version_id): existing_file = os.path.join( - layoutmanager.get_instance().get_entry_path(existing_uid, existing_vid), + layoutmanager.get_instance().get_entry_path(existing_tree_id, existing_version_id), 'data') new_file = os.path.join( - layoutmanager.get_instance().get_entry_path(new_uid, new_vid), + layoutmanager.get_instance().get_entry_path(new_tree_id, new_version_id), 'data') logging.debug('removing %r' % new_file) diff --git a/src/carquinyol/indexstore.py b/src/carquinyol/indexstore.py index a814f7a..ba76da6 100644 --- a/src/carquinyol/indexstore.py +++ b/src/carquinyol/indexstore.py @@ -24,13 +24,13 @@ from xapian import WritableDatabase, Document, Enquire, Query, QueryParser from carquinyol import layoutmanager from carquinyol.layoutmanager import MAX_QUERY_LIMIT -_VALUE_UID = 0 +_VALUE_TID = 0 _VALUE_TIMESTAMP = 1 _VALUE_VID = 2 -_PREFIX_UID = 'Q' +_PREFIX_TID = 'Q' _PREFIX_VID = 'V' -_PREFIX_ACTIVITY = 'A' +_PREFIX_BUNDLE_ID = 'A' _PREFIX_ACTIVITY_ID = 'I' _PREFIX_MIME_TYPE = 'M' _PREFIX_KEEP = 'K' @@ -68,27 +68,27 @@ class IndexStore(object): for f in os.listdir(index_path): os.remove(os.path.join(index_path, f)) - def contains(self, uid, vid): - postings = self._database.postlist(_PREFIX_UID + uid + _PREFIX_VID + vid) + def contains(self, tree_id, version_id): + postings = self._database.postlist(_PREFIX_TID + tree_id + _PREFIX_VID + version_id) try: postlist_item = postings.next() except StopIteration: return False return True - def store(self, uid, vid, properties): + def store(self, tree_id, version_id, properties): document = Document() - document.add_term("%s%s%s%s" % (_PREFIX_UID, uid, _PREFIX_VID, vid)) - document.add_term(_PREFIX_UID + uid) - document.add_term(_PREFIX_VID + vid) - document.add_term(_PREFIX_ACTIVITY + properties.get('activity', '')) + document.add_term("%s%s%s%s" % (_PREFIX_TID, tree_id, _PREFIX_VID, version_id)) + document.add_term(_PREFIX_TID + tree_id) + document.add_term(_PREFIX_VID + version_id) + document.add_term(_PREFIX_BUNDLE_ID + properties.get('bundle_id', '')) document.add_term(_PREFIX_MIME_TYPE + properties.get('mime_type', '')) document.add_term(_PREFIX_ACTIVITY_ID + properties.get('activity_id', '')) document.add_term(_PREFIX_KEEP + str(properties.get('keep', 0))) - document.add_value(_VALUE_UID, uid) - document.add_value(_VALUE_VID, vid) + document.add_value(_VALUE_TID, tree_id) + document.add_value(_VALUE_VID, version_id) document.add_value(_VALUE_TIMESTAMP, str(properties['timestamp'])) term_generator = xapian.TermGenerator() @@ -108,10 +108,10 @@ class IndexStore(object): term_generator.index_text_without_positions( self._extract_text(properties), 1, '') - if not self.contains(uid, vid): + if not self.contains(tree_id, version_id): self._database.add_document(document) else: - self._database.replace_document("%s%s%s%s" % (_PREFIX_UID, uid, _PREFIX_VID, vid), document) + self._database.replace_document("%s%s%s%s" % (_PREFIX_TID, tree_id, _PREFIX_VID, version_id), document) self._flush() def _extract_text(self, properties): @@ -144,25 +144,25 @@ class IndexStore(object): query_result = enquire.get_mset(offset, limit, check_at_least) total_count = query_result.get_matches_estimated() - uvids = [] + tvids = [] timestamps = [] for hit in query_result: - uvids.append((hit.document.get_value(_VALUE_UID), hit.document.get_value(_VALUE_VID))) + tvids.append((hit.document.get_value(_VALUE_TID), hit.document.get_value(_VALUE_VID))) if not all_versions : # only return latest version for each entry - # result set is already sorted by time so we only need to take the first entry for each uid - uids_vid = {} - uvids_new = [] - for (uid, vid) in uvids : - if uid not in uids_vid : - uids_vid[uid] = vid - uvids_new.append((uid,vid)) + # result set is already sorted by time so we only need to take the first entry for each tree_id + tids_vid = {} + tvids_new = [] + for (tree_id, version_id) in tvids : + if tree_id not in tids_vid : + tids_vid[tree_id] = version_id + tvids_new.append((tree_id,version_id)) - uvids = uvids_new + tvids = tvids_new # TODO: total_count will be totally off if all_versions is not set - return (uvids, total_count) + return (tvids, total_count) def _parse_query(self, query_dict): logging.debug('_parse_query %r' % query_dict) @@ -196,17 +196,17 @@ class IndexStore(object): queries.append(query) # TODO: refactor / simplify - uid = query_dict.pop('uid', None) - if uid is not None: - queries.append(Query(_PREFIX_UID + uid)) + tree_id = query_dict.pop('tree_id', None) + if tree_id is not None: + queries.append(Query(_PREFIX_TID + tree_id)) - vid = query_dict.pop('vid', None) - if vid is not None: - queries.append(Query(_PREFIX_VID + vid)) + version_id = query_dict.pop('version_id', None) + if version_id is not None: + queries.append(Query(_PREFIX_VID + version_id)) - activity = query_dict.pop('activity', None) - if activity is not None: - queries.append(Query(_PREFIX_ACTIVITY + activity)) + bundle_id = query_dict.pop('bundle_id', None) + if bundle_id is not None: + queries.append(Query(_PREFIX_BUNDLE_ID + bundle_id)) activity_id = query_dict.pop('activity_id', None) if activity_id is not None: @@ -233,14 +233,14 @@ class IndexStore(object): return Query(Query.OP_AND, queries) - def delete(self, uid, vid): - self._database.delete_document("%s%s%s%s" % (_PREFIX_UID, uid, _PREFIX_VID, vid)) + def delete(self, tree_id, version_id): + self._database.delete_document("%s%s%s%s" % (_PREFIX_TID, tree_id, _PREFIX_VID, version_id)) - def get_activities(self): - activities = [] - for term in self._database.allterms(_PREFIX_ACTIVITY): - activities.append(term.term[len(_PREFIX_ACTIVITY):]) - return activities + def get_bundle_ids(self): + bundle_ids = [] + for term in self._database.allterms(_PREFIX_BUNDLE_ID): + bundle_ids.append(term.term[len(_PREFIX_BUNDLE_ID):]) + return bundle_ids def _flush_timeout_cb(self): self._flush(True) diff --git a/src/carquinyol/layoutmanager.py b/src/carquinyol/layoutmanager.py index 1a75052..abc8376 100644 --- a/src/carquinyol/layoutmanager.py +++ b/src/carquinyol/layoutmanager.py @@ -53,9 +53,9 @@ class LayoutManager(object): version_path = os.path.join(self._root_path, 'version') open(version_path, 'w').write(str(version)) - def get_entry_path(self, uid, vid): + def get_entry_path(self, tree_id, version_id): # os.path.join() is just too slow - return '%s/%s/%s-%s' % (self._root_path, uid[:2], uid, vid) + return '%s/%s/%s-%s' % (self._root_path, tree_id[:2], tree_id, version_id) def get_root_path(self): return self._root_path @@ -85,13 +85,13 @@ class LayoutManager(object): index_updated = property(_is_index_updated, _set_index_updated) def find_all(self): - uvids = [] + tvids = [] for f in os.listdir(self._root_path): if os.path.isdir(os.path.join(self._root_path, f)) and len(f) == 2: for g in os.listdir(os.path.join(self._root_path, f)): if len(g) == 73: - uvids.append((g[:36], g[37:])) - return uvids + tvids.append((g[:36], g[37:])) + return tvids _instance = None def get_instance(): diff --git a/src/carquinyol/metadatastore.py b/src/carquinyol/metadatastore.py index b5bfeb1..0e062e4 100644 --- a/src/carquinyol/metadatastore.py +++ b/src/carquinyol/metadatastore.py @@ -6,8 +6,8 @@ from carquinyol import metadatareader MAX_SIZE = 256 class MetadataStore(object): - def store(self, uid, vid, metadata): - dir_path = layoutmanager.get_instance().get_entry_path(uid, vid) + def store(self, tree_id, version_id, metadata): + dir_path = layoutmanager.get_instance().get_entry_path(tree_id, version_id) if not os.path.exists(dir_path): os.makedirs(dir_path) @@ -18,8 +18,8 @@ class MetadataStore(object): for key in os.listdir(metadata_path): os.remove(os.path.join(metadata_path, key)) - metadata['uid'] = uid - metadata['vid'] = vid + metadata['tree_id'] = tree_id + metadata['version_id'] = version_id for key, value in metadata.items(): # Hack to support activities that still pass properties named as for @@ -37,19 +37,19 @@ class MetadataStore(object): finally: f.close() - def retrieve(self, uid, vid, properties=None): - dir_path = layoutmanager.get_instance().get_entry_path(uid, vid) + def retrieve(self, tree_id, version_id, properties=None): + dir_path = layoutmanager.get_instance().get_entry_path(tree_id, version_id) return metadatareader.retrieve(dir_path, properties) - def delete(self, uid, vid): - dir_path = layoutmanager.get_instance().get_entry_path(uid, vid) + def delete(self, tree_id, version_id): + dir_path = layoutmanager.get_instance().get_entry_path(tree_id, version_id) metadata_path = os.path.join(dir_path, 'metadata') for key in os.listdir(metadata_path): os.remove(os.path.join(metadata_path, key)) os.rmdir(metadata_path) - def get_property(self, uid, vid, key): - dir_path = layoutmanager.get_instance().get_entry_path(uid, vid) + def get_property(self, tree_id, version_id, key): + dir_path = layoutmanager.get_instance().get_entry_path(tree_id, version_id) metadata_path = os.path.join(dir_path, 'metadata') property_path = os.path.join(metadata_path, key) if os.path.exists(property_path): @@ -57,8 +57,8 @@ class MetadataStore(object): else: return None - def set_property(self, uid, vid, key, value): - dir_path = layoutmanager.get_instance().get_entry_path(uid, vid) + def set_property(self, tree_id, version_id, key, value): + dir_path = layoutmanager.get_instance().get_entry_path(tree_id, version_id) metadata_path = os.path.join(dir_path, 'metadata') property_path = os.path.join(metadata_path, key) open(property_path, 'w').write(value) diff --git a/src/carquinyol/migration.py b/src/carquinyol/migration.py index 0722e43..6502c2a 100644 --- a/src/carquinyol/migration.py +++ b/src/carquinyol/migration.py @@ -39,19 +39,19 @@ def migrate_from_0(): return for f in os.listdir(old_root_path): - uid, ext = os.path.splitext(f) + tree_id, ext = os.path.splitext(f) if ext != '.metadata': continue - logging.debug('Migrating entry %r' % uid) - vid = str(uuid.uuid4()) + logging.debug('Migrating entry %r' % tree_id) + version_id = str(uuid.uuid4()) try: - _migrate_metadata_0(root_path, old_root_path, uid, vid) - _migrate_file_0(root_path, old_root_path, uid, vid) - _migrate_preview_0(root_path, old_root_path, uid, vid) + _migrate_metadata_0(root_path, old_root_path, tree_id, version_id) + _migrate_file_0(root_path, old_root_path, tree_id, version_id) + _migrate_preview_0(root_path, old_root_path, tree_id, version_id) except Exception: logging.error('Error while migrating entry %r: %s\n' % \ - (uid, traceback.format_exc())) + (tree_id, traceback.format_exc())) # Just be paranoid, it's cheap. if old_root_path.endswith('datastore/store'): @@ -59,18 +59,18 @@ def migrate_from_0(): logging.info('Migration finished') -def _migrate_metadata_0(root_path, old_root_path, uid, vid): - dir_path = layoutmanager.get_instance().get_entry_path(uid, vid) +def _migrate_metadata_0(root_path, old_root_path, tree_id, version_id): + dir_path = layoutmanager.get_instance().get_entry_path(tree_id, version_id) metadata_path = os.path.join(dir_path, 'metadata') os.makedirs(metadata_path) - old_metadata_path = os.path.join(old_root_path, uid + '.metadata') + old_metadata_path = os.path.join(old_root_path, tree_id + '.metadata') metadata = cjson.decode(open(old_metadata_path, 'r').read()) - if 'uid' not in metadata: - metadata['uid'] = uid + if 'tree_id' not in metadata: + metadata['tree_id'] = tree_id - metadata['vid'] = vid + metadata['version_id'] = version_id if 'timestamp' not in metadata and 'mtime' in metadata: metadata['timestamp'] = \ @@ -90,18 +90,18 @@ def _migrate_metadata_0(root_path, old_root_path, uid, vid): except Exception: logging.error( 'Error while migrating property %s of entry %s: %s\n' % \ - (key, uid, traceback.format_exc())) + (key, tree_id, traceback.format_exc())) -def _migrate_file_0(root_path, old_root_path, uid, vid): - if os.path.exists(os.path.join(old_root_path, uid)): - dir_path = layoutmanager.get_instance().get_entry_path(uid, vid) - os.rename(os.path.join(old_root_path, uid), +def _migrate_file_0(root_path, old_root_path, tree_id, version_id): + if os.path.exists(os.path.join(old_root_path, tree_id)): + dir_path = layoutmanager.get_instance().get_entry_path(tree_id, version_id) + os.rename(os.path.join(old_root_path, tree_id), os.path.join(dir_path, 'data')) -def _migrate_preview_0(root_path, old_root_path, uid, vid): - dir_path = layoutmanager.get_instance().get_entry_path(uid, vid) +def _migrate_preview_0(root_path, old_root_path, tree_id, version_id): + dir_path = layoutmanager.get_instance().get_entry_path(tree_id, version_id) metadata_path = os.path.join(dir_path, 'metadata') - os.rename(os.path.join(old_root_path, 'preview', uid), + os.rename(os.path.join(old_root_path, 'preview', tree_id), os.path.join(metadata_path, 'preview')) @@ -111,42 +111,46 @@ def migrate_from_1(): root_path = layoutmanager.get_instance().get_root_path() checksum_path = os.path.join(root_path, "checksums") - vids = {} + version_ids = {} for hash02 in os.listdir(root_path): if len(hash02) != 2 : continue - for uid in os.listdir(os.path.join(root_path, hash02)) : - if (len(uid) != 36) : + for tree_id in os.listdir(os.path.join(root_path, hash02)) : + if (len(tree_id) != 36) : continue - logging.debug('Migrating entry %r' % uid) + logging.debug('Migrating entry %r' % tree_id) - vid = str(uuid.uuid4()) - vids[uid] = vid + version_id = str(uuid.uuid4()) + version_ids[tree_id] = version_id try: - new_path = layoutmanager.get_instance().get_entry_path(uid, vid) - os.rename(os.path.join(root_path, hash02, uid), + new_path = layoutmanager.get_instance().get_entry_path(tree_id, version_id) + os.rename(os.path.join(root_path, hash02, tree_id), new_path) - file(os.path.join(new_path, "metadata", "vid"), "w").write(vid) + file(os.path.join(new_path, "metadata", "version_id"), "w").write(version_id) + uid_path = os.path.join(new_path, "metadata", "uid") + tid_path = os.path.join(new_path, "metadata", "tree_id") + if os.path.exists(uid_path) : + os.rename(uid_path, tid_path) except Exception: logging.error('Error while migrating entry %r: %s\n' % \ - (uid, traceback.format_exc())) + (tree_id, traceback.format_exc())) for checksum in os.listdir(checksum_path) : entries_path = os.path.join(checksum_path, checksum) - for uid in os.listdir(entries_path) : - if len(uid) != 36 : + for tree_id in os.listdir(entries_path) : + if len(tree_id) != 36 : continue try : - os.rename(os.path.join(entries_path, uid), - os.path.join(entries_path, "%s-%s" % (uid,vids[uid]))) + os.rename(os.path.join(entries_path, tree_id), + os.path.join(entries_path, "%s-%s" % (tree_id,version_ids[tree_id]))) except Exception: logging.error('Error while migrating checksum entry %r / %r: %s\n' % \ - (checksum, uid, traceback.format_exc())) + (checksum, tree_id, traceback.format_exc())) logging.info('Migration finished') diff --git a/src/carquinyol/optimizer.py b/src/carquinyol/optimizer.py index c4a0681..7aed8fb 100644 --- a/src/carquinyol/optimizer.py +++ b/src/carquinyol/optimizer.py @@ -32,15 +32,15 @@ class Optimizer(object): self._metadata_store = metadata_store self._enqueue_checksum_id = None - def optimize(self, uid, vid): + def optimize(self, tree_id, version_id): """Add an entry to a queue of entries to be checked for duplicates. """ - if not os.path.exists(self._file_store.get_file_path(uid, vid)): + if not os.path.exists(self._file_store.get_file_path(tree_id, version_id)): return queue_path = layoutmanager.get_instance().get_queue_path() - fname = os.path.join(queue_path, "%s-%s" % (uid, vid)) + fname = os.path.join(queue_path, "%s-%s" % (tree_id, version_id)) open(fname, 'w').close() logging.debug('optimize %r' % fname) @@ -49,17 +49,17 @@ class Optimizer(object): gobject.idle_add(self._process_entry_cb, priority=gobject.PRIORITY_LOW) - def remove(self, uid, vid): + def remove(self, tree_id, version_id): """Remove any structures left from space optimization """ - checksum = self._metadata_store.get_property(uid, vid, 'checksum') + checksum = self._metadata_store.get_property(tree_id, version_id, 'checksum') if checksum is None: return checksums_dir = layoutmanager.get_instance().get_checksums_dir() checksum_path = os.path.join(checksums_dir, checksum) - checksum_entry_path = os.path.join(checksum_path, "%s-%s" % (uid,vid)) + checksum_entry_path = os.path.join(checksum_path, "%s-%s" % (tree_id,version_id)) if os.path.exists(checksum_entry_path): logging.debug('remove %r' % checksum_entry_path) @@ -99,25 +99,25 @@ class Optimizer(object): logging.debug('create dir %r' % checksum_path) os.mkdir(checksum_path) - def _add_checksum_entry(self, uid, vid, checksum): - """Create a file in the checksum dir with the uid and vid of the entry + def _add_checksum_entry(self, tree_id, version_id, checksum): + """Create a file in the checksum dir with the tree_id and version_id of the entry """ checksums_dir = layoutmanager.get_instance().get_checksums_dir() checksum_path = os.path.join(checksums_dir, checksum) - fname = os.path.join(checksum_path, "%s-%s" % (uid,vid)) + fname = os.path.join(checksum_path, "%s-%s" % (tree_id,version_id)) logging.debug('touch %r' % fname) open(fname, 'w').close() - def _already_linked(self, uid, vid, checksum): + def _already_linked(self, tree_id, version_id, checksum): """Check if this entry's file is already a hard link to the checksums dir. """ checksums_dir = layoutmanager.get_instance().get_checksums_dir() checksum_path = os.path.join(checksums_dir, checksum) - return os.path.exists(os.path.join(checksum_path, "%s-%s" % (uid,vid))) + return os.path.exists(os.path.join(checksum_path, "%s-%s" % (tree_id,version_id))) def _process_entry_cb(self): """Process one item in the checksums queue by calculating its checksum, @@ -128,30 +128,30 @@ class Optimizer(object): queue_path = layoutmanager.get_instance().get_queue_path() queue = os.listdir(queue_path) if queue: - (uid,vid) = queue[0][:36], queue[0][37:] - logging.debug('_process_entry_cb processing (%r,%r)' % (uid,vid)) + (tree_id,version_id) = queue[0][:36], queue[0][37:] + logging.debug('_process_entry_cb processing (%r,%r)' % (tree_id,version_id)) - file_in_entry_path = self._file_store.get_file_path(uid,vid) + file_in_entry_path = self._file_store.get_file_path(tree_id,version_id) if not os.path.exists(file_in_entry_path): - logging.info('non-existent entry in queue: (%r,%r)' % (uid,vid)) + logging.info('non-existent entry in queue: (%r,%r)' % (tree_id,version_id)) else: checksum = self._calculate_md5sum(file_in_entry_path) - self._metadata_store.set_property(uid, vid, 'checksum', checksum) + self._metadata_store.set_property(tree_id, version_id, 'checksum', checksum) if self._identical_file_already_exists(checksum): - if not self._already_linked(uid, vid, checksum): + if not self._already_linked(tree_id, version_id, checksum): existing_entry_uvid = \ self._get_uvid_from_checksum(checksum) - self._file_store.hard_link_entry(uid, vid, + self._file_store.hard_link_entry(tree_id, version_id, *existing_entry_uvid) - self._add_checksum_entry(uid, vid, checksum) + self._add_checksum_entry(tree_id, version_id, checksum) else: self._create_checksum_dir(checksum) - self._add_checksum_entry(uid, vid, checksum) + self._add_checksum_entry(tree_id, version_id, checksum) - os.remove(os.path.join(queue_path, "%s-%s" % (uid,vid))) + os.remove(os.path.join(queue_path, "%s-%s" % (tree_id,version_id))) if len(queue) <= 1: self._enqueue_checksum_id = None |