Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTomeu Vizoso <tomeu@tomeuvizoso.net>2008-09-30 17:16:49 (GMT)
committer Tomeu Vizoso <tomeu@tomeuvizoso.net>2008-09-30 17:16:49 (GMT)
commit6414ee65348f3da257f5b1eae1edfff4c62defbc (patch)
tree565dc3639c40888fe739654eb5ef88c57779bc50
parent2f325039969506164a2c79f6e73ea7ec56ee71c1 (diff)
Use a simple file instead of a link to keep the count of which entries share the same checksum
-rw-r--r--src/olpc/datastore/filestore.py14
-rw-r--r--src/olpc/datastore/optimizer.py31
2 files changed, 24 insertions, 21 deletions
diff --git a/src/olpc/datastore/filestore.py b/src/olpc/datastore/filestore.py
index 0640403..c521380 100644
--- a/src/olpc/datastore/filestore.py
+++ b/src/olpc/datastore/filestore.py
@@ -114,6 +114,20 @@ class FileStore(object):
if os.path.exists(file_path):
os.remove(file_path)
+ def hard_link_entry(self, new_uid, existing_uid):
+ existing_file = os.path.join(
+ layoutmanager.get_instance().get_entry_path(existing_uid),
+ 'data')
+ new_file = os.path.join(
+ layoutmanager.get_instance().get_entry_path(new_uid),
+ 'data')
+
+ logging.debug('removing %r' % new_file)
+ os.remove(new_file)
+
+ logging.debug('hard linking %r -> %r' % (new_file, existing_file))
+ os.link(existing_file, new_file)
+
class AsyncCopy(object):
"""Copy a file in chunks in the idle loop.
diff --git a/src/olpc/datastore/optimizer.py b/src/olpc/datastore/optimizer.py
index 0eedda8..78e47c2 100644
--- a/src/olpc/datastore/optimizer.py
+++ b/src/olpc/datastore/optimizer.py
@@ -59,15 +59,14 @@ class Optimizer(object):
checksum_path = os.path.join(checksums_dir, checksum)
return os.path.exists(checksum_path)
- def _get_file_from_checksum(self, checksum):
- """Get a file that matches checksum.
+ def _get_uid_from_checksum(self, checksum):
+ """Get an existing entry which file matches checksum.
"""
checksums_dir = layoutmanager.get_instance().get_checksums_dir()
checksum_path = os.path.join(checksums_dir, checksum)
- first_file_link = os.listdir(checksum_path)[0]
- first_file = os.readlink(os.path.join(checksum_path, first_file_link))
- return first_file
+ first_uid = os.listdir(checksum_path)[0]
+ return first_uid
def _create_checksum_dir(self, checksum):
"""Create directory that tracks files with this same checksum.
@@ -79,18 +78,14 @@ class Optimizer(object):
os.mkdir(checksum_path)
def _add_checksum_entry(self, uid, checksum):
- """Create a symbolic link in the checksum dir to the file in the entry
- dir.
+ """Create a file in the checksum dir with the uid of the entry
"""
- entry_path = layoutmanager.get_instance().get_entry_path(uid)
checksums_dir = layoutmanager.get_instance().get_checksums_dir()
checksum_path = os.path.join(checksums_dir, checksum)
- logging.debug('symlink %r -> %r' % (os.path.join(checksum_path, uid),
- os.path.join(entry_path, 'data')))
- os.symlink(os.path.join(entry_path, 'data'),
- os.path.join(checksum_path, uid))
+ logging.debug('touch %r' % os.path.join(checksum_path, uid))
+ open(os.path.join(checksum_path, uid), 'w').close()
def _already_linked(self, uid, checksum):
"""Check if this entry's file is already a hard link to the checksums
@@ -112,20 +107,14 @@ class Optimizer(object):
if queue:
uid = queue[0]
logging.debug('_process_entry_cb processing %r' % uid)
- entry_path = layoutmanager.get_instance().get_entry_path(uid)
- file_in_entry_path = os.path.join(entry_path, 'data')
+ file_in_entry_path = self._file_store.get_file_path(uid)
checksum = self._calculate_md5sum(file_in_entry_path)
self._metadata_store.set_property(uid, 'checksum', checksum)
if self._identical_file_already_exists(checksum):
if not self._already_linked(uid, checksum):
- logging.debug('delete %r' % file_in_entry_path)
- os.remove(file_in_entry_path)
-
- existing_file = self._get_file_from_checksum(checksum)
- logging.debug('link %r -> %r' % \
- (existing_file, file_in_entry_path))
- os.link(existing_file, file_in_entry_path)
+ existing_entry_uid = self._get_uid_from_checksum(checksum)
+ self._file_store.hard_link_entry(uid, existing_entry_uid)
self._add_checksum_entry(uid, checksum)
else: