diff options
Diffstat (limited to 'src/carquinyol/optimizer.py')
-rw-r--r-- | src/carquinyol/optimizer.py | 62 |
1 files changed, 33 insertions, 29 deletions
diff --git a/src/carquinyol/optimizer.py b/src/carquinyol/optimizer.py index f8a2e3e..19849e6 100644 --- a/src/carquinyol/optimizer.py +++ b/src/carquinyol/optimizer.py @@ -18,11 +18,13 @@ import os import errno import logging import subprocess +import uuid import gobject from carquinyol import layoutmanager +# TODO: use layoutmanager for entries in 'checksums' directory class Optimizer(object): """Optimizes disk space usage by detecting duplicates and sharing storage. """ @@ -31,33 +33,34 @@ class Optimizer(object): self._metadata_store = metadata_store self._enqueue_checksum_id = None - def optimize(self, uid): + def optimize(self, tree_id, version_id): """Add an entry to a queue of entries to be checked for duplicates. """ - if not os.path.exists(self._file_store.get_file_path(uid)): + if not os.path.exists(self._file_store.get_file_path(tree_id, version_id)): return queue_path = layoutmanager.get_instance().get_queue_path() - open(os.path.join(queue_path, uid), 'w').close() - logging.debug('optimize %r' % os.path.join(queue_path, uid)) + fname = os.path.join(queue_path, "%s-%s" % (tree_id, version_id)) + open(fname, 'w').close() + logging.debug('optimize %r' % fname) if self._enqueue_checksum_id is None: self._enqueue_checksum_id = \ gobject.idle_add(self._process_entry_cb, priority=gobject.PRIORITY_LOW) - def remove(self, uid): + def remove(self, tree_id, version_id): """Remove any structures left from space optimization """ - checksum = self._metadata_store.get_property(uid, 'checksum') + checksum = self._metadata_store.get_property(tree_id, version_id, 'checksum') if checksum is None: return checksums_dir = layoutmanager.get_instance().get_checksums_dir() checksum_path = os.path.join(checksums_dir, checksum) - checksum_entry_path = os.path.join(checksum_path, uid) + checksum_entry_path = os.path.join(checksum_path, "%s-%s" % (tree_id,version_id)) if os.path.exists(checksum_entry_path): logging.debug('remove %r' % checksum_entry_path) @@ -79,14 +82,14 @@ class Optimizer(object): checksum_path = os.path.join(checksums_dir, checksum) return os.path.exists(checksum_path) - def _get_uid_from_checksum(self, checksum): + def _get_uvid_from_checksum(self, checksum): """Get an existing entry which file matches checksum. """ checksums_dir = layoutmanager.get_instance().get_checksums_dir() checksum_path = os.path.join(checksums_dir, checksum) - first_uid = os.listdir(checksum_path)[0] - return first_uid + first_uvid = os.listdir(checksum_path)[0] + return (first_uvid[:36], first_uvid[37:]) def _create_checksum_dir(self, checksum): """Create directory that tracks files with this same checksum. @@ -97,24 +100,25 @@ class Optimizer(object): logging.debug('create dir %r' % checksum_path) os.mkdir(checksum_path) - def _add_checksum_entry(self, uid, checksum): - """Create a file in the checksum dir with the uid of the entry + def _add_checksum_entry(self, tree_id, version_id, checksum): + """Create a file in the checksum dir with the tree_id and version_id of the entry """ checksums_dir = layoutmanager.get_instance().get_checksums_dir() checksum_path = os.path.join(checksums_dir, checksum) - logging.debug('touch %r' % os.path.join(checksum_path, uid)) - open(os.path.join(checksum_path, uid), 'w').close() + fname = os.path.join(checksum_path, "%s-%s" % (tree_id,version_id)) + logging.debug('touch %r' % fname) + open(fname, 'w').close() - def _already_linked(self, uid, checksum): + def _already_linked(self, tree_id, version_id, checksum): """Check if this entry's file is already a hard link to the checksums dir. """ checksums_dir = layoutmanager.get_instance().get_checksums_dir() checksum_path = os.path.join(checksums_dir, checksum) - return os.path.exists(os.path.join(checksum_path, uid)) + return os.path.exists(os.path.join(checksum_path, "%s-%s" % (tree_id,version_id))) def _process_entry_cb(self): """Process one item in the checksums queue by calculating its checksum, @@ -125,30 +129,30 @@ class Optimizer(object): queue_path = layoutmanager.get_instance().get_queue_path() queue = os.listdir(queue_path) if queue: - uid = queue[0] - logging.debug('_process_entry_cb processing %r' % uid) + (tree_id,version_id) = queue[0][:36], queue[0][37:] + logging.debug('_process_entry_cb processing (%r,%r)' % (tree_id,version_id)) - file_in_entry_path = self._file_store.get_file_path(uid) + file_in_entry_path = self._file_store.get_file_path(tree_id,version_id) if not os.path.exists(file_in_entry_path): - logging.info('non-existent entry in queue: %r' % uid) + logging.info('non-existent entry in queue: (%r,%r)' % (tree_id,version_id)) else: checksum = self._calculate_md5sum(file_in_entry_path) - self._metadata_store.set_property(uid, 'checksum', checksum) + self._metadata_store.set_property(tree_id, version_id, 'checksum', checksum) if self._identical_file_already_exists(checksum): - if not self._already_linked(uid, checksum): - existing_entry_uid = \ - self._get_uid_from_checksum(checksum) + if not self._already_linked(tree_id, version_id, checksum): + existing_entry_uvid = \ + self._get_uvid_from_checksum(checksum) - self._file_store.hard_link_entry(uid, - existing_entry_uid) + self._file_store.hard_link_entry(tree_id, version_id, + *existing_entry_uvid) - self._add_checksum_entry(uid, checksum) + self._add_checksum_entry(tree_id, version_id, checksum) else: self._create_checksum_dir(checksum) - self._add_checksum_entry(uid, checksum) + self._add_checksum_entry(tree_id, version_id, checksum) - os.remove(os.path.join(queue_path, uid)) + os.remove(os.path.join(queue_path, "%s-%s" % (tree_id,version_id))) if len(queue) <= 1: self._enqueue_checksum_id = None |