Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
path: root/src/carquinyol/optimizer.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/carquinyol/optimizer.py')
-rw-r--r--src/carquinyol/optimizer.py62
1 files changed, 33 insertions, 29 deletions
diff --git a/src/carquinyol/optimizer.py b/src/carquinyol/optimizer.py
index f8a2e3e..19849e6 100644
--- a/src/carquinyol/optimizer.py
+++ b/src/carquinyol/optimizer.py
@@ -18,11 +18,13 @@ import os
import errno
import logging
import subprocess
+import uuid
import gobject
from carquinyol import layoutmanager
+# TODO: use layoutmanager for entries in 'checksums' directory
class Optimizer(object):
"""Optimizes disk space usage by detecting duplicates and sharing storage.
"""
@@ -31,33 +33,34 @@ class Optimizer(object):
self._metadata_store = metadata_store
self._enqueue_checksum_id = None
- def optimize(self, uid):
+ def optimize(self, tree_id, version_id):
"""Add an entry to a queue of entries to be checked for duplicates.
"""
- if not os.path.exists(self._file_store.get_file_path(uid)):
+ if not os.path.exists(self._file_store.get_file_path(tree_id, version_id)):
return
queue_path = layoutmanager.get_instance().get_queue_path()
- open(os.path.join(queue_path, uid), 'w').close()
- logging.debug('optimize %r' % os.path.join(queue_path, uid))
+ fname = os.path.join(queue_path, "%s-%s" % (tree_id, version_id))
+ open(fname, 'w').close()
+ logging.debug('optimize %r' % fname)
if self._enqueue_checksum_id is None:
self._enqueue_checksum_id = \
gobject.idle_add(self._process_entry_cb,
priority=gobject.PRIORITY_LOW)
- def remove(self, uid):
+ def remove(self, tree_id, version_id):
"""Remove any structures left from space optimization
"""
- checksum = self._metadata_store.get_property(uid, 'checksum')
+ checksum = self._metadata_store.get_property(tree_id, version_id, 'checksum')
if checksum is None:
return
checksums_dir = layoutmanager.get_instance().get_checksums_dir()
checksum_path = os.path.join(checksums_dir, checksum)
- checksum_entry_path = os.path.join(checksum_path, uid)
+ checksum_entry_path = os.path.join(checksum_path, "%s-%s" % (tree_id,version_id))
if os.path.exists(checksum_entry_path):
logging.debug('remove %r' % checksum_entry_path)
@@ -79,14 +82,14 @@ class Optimizer(object):
checksum_path = os.path.join(checksums_dir, checksum)
return os.path.exists(checksum_path)
- def _get_uid_from_checksum(self, checksum):
+ def _get_uvid_from_checksum(self, checksum):
"""Get an existing entry which file matches checksum.
"""
checksums_dir = layoutmanager.get_instance().get_checksums_dir()
checksum_path = os.path.join(checksums_dir, checksum)
- first_uid = os.listdir(checksum_path)[0]
- return first_uid
+ first_uvid = os.listdir(checksum_path)[0]
+ return (first_uvid[:36], first_uvid[37:])
def _create_checksum_dir(self, checksum):
"""Create directory that tracks files with this same checksum.
@@ -97,24 +100,25 @@ class Optimizer(object):
logging.debug('create dir %r' % checksum_path)
os.mkdir(checksum_path)
- def _add_checksum_entry(self, uid, checksum):
- """Create a file in the checksum dir with the uid of the entry
+ def _add_checksum_entry(self, tree_id, version_id, checksum):
+ """Create a file in the checksum dir with the tree_id and version_id of the entry
"""
checksums_dir = layoutmanager.get_instance().get_checksums_dir()
checksum_path = os.path.join(checksums_dir, checksum)
- logging.debug('touch %r' % os.path.join(checksum_path, uid))
- open(os.path.join(checksum_path, uid), 'w').close()
+ fname = os.path.join(checksum_path, "%s-%s" % (tree_id,version_id))
+ logging.debug('touch %r' % fname)
+ open(fname, 'w').close()
- def _already_linked(self, uid, checksum):
+ def _already_linked(self, tree_id, version_id, checksum):
"""Check if this entry's file is already a hard link to the checksums
dir.
"""
checksums_dir = layoutmanager.get_instance().get_checksums_dir()
checksum_path = os.path.join(checksums_dir, checksum)
- return os.path.exists(os.path.join(checksum_path, uid))
+ return os.path.exists(os.path.join(checksum_path, "%s-%s" % (tree_id,version_id)))
def _process_entry_cb(self):
"""Process one item in the checksums queue by calculating its checksum,
@@ -125,30 +129,30 @@ class Optimizer(object):
queue_path = layoutmanager.get_instance().get_queue_path()
queue = os.listdir(queue_path)
if queue:
- uid = queue[0]
- logging.debug('_process_entry_cb processing %r' % uid)
+ (tree_id,version_id) = queue[0][:36], queue[0][37:]
+ logging.debug('_process_entry_cb processing (%r,%r)' % (tree_id,version_id))
- file_in_entry_path = self._file_store.get_file_path(uid)
+ file_in_entry_path = self._file_store.get_file_path(tree_id,version_id)
if not os.path.exists(file_in_entry_path):
- logging.info('non-existent entry in queue: %r' % uid)
+ logging.info('non-existent entry in queue: (%r,%r)' % (tree_id,version_id))
else:
checksum = self._calculate_md5sum(file_in_entry_path)
- self._metadata_store.set_property(uid, 'checksum', checksum)
+ self._metadata_store.set_property(tree_id, version_id, 'checksum', checksum)
if self._identical_file_already_exists(checksum):
- if not self._already_linked(uid, checksum):
- existing_entry_uid = \
- self._get_uid_from_checksum(checksum)
+ if not self._already_linked(tree_id, version_id, checksum):
+ existing_entry_uvid = \
+ self._get_uvid_from_checksum(checksum)
- self._file_store.hard_link_entry(uid,
- existing_entry_uid)
+ self._file_store.hard_link_entry(tree_id, version_id,
+ *existing_entry_uvid)
- self._add_checksum_entry(uid, checksum)
+ self._add_checksum_entry(tree_id, version_id, checksum)
else:
self._create_checksum_dir(checksum)
- self._add_checksum_entry(uid, checksum)
+ self._add_checksum_entry(tree_id, version_id, checksum)
- os.remove(os.path.join(queue_path, uid))
+ os.remove(os.path.join(queue_path, "%s-%s" % (tree_id,version_id)))
if len(queue) <= 1:
self._enqueue_checksum_id = None