Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSimon Schampijer <simon@schampijer.de>2010-12-14 19:22:44 (GMT)
committer Simon Schampijer <simon@schampijer.de>2010-12-14 19:22:44 (GMT)
commitc7f625e951df6b679092058b267e86c77b9f04a8 (patch)
treedb9da153b1cd235351aebb77195ffc565d6af061
parentc6068ef3d6df54e777d059be889122fd838f73ed (diff)
Make scanning of storage devices more robust OLPC #10140 (James Cameron)
-rw-r--r--src/jarabe/journal/model.py168
1 files changed, 112 insertions, 56 deletions
diff --git a/src/jarabe/journal/model.py b/src/jarabe/journal/model.py
index 521cd8b..a93321e 100644
--- a/src/jarabe/journal/model.py
+++ b/src/jarabe/journal/model.py
@@ -16,12 +16,12 @@
import logging
import os
+import errno
from datetime import datetime
import time
import shutil
import tempfile
-from stat import S_IFMT, S_IFDIR, S_IFREG
-import traceback
+from stat import S_IFLNK, S_IFMT, S_IFDIR, S_IFREG
import re
import json
from gettext import gettext as _
@@ -262,7 +262,9 @@ class InplaceResultSet(BaseResultSet):
BaseResultSet.__init__(self, query, cache_limit)
self._mount_point = mount_point
self._file_list = None
- self._pending_directories = 0
+ self._pending_directories = []
+ self._visited_directories = []
+ self._pending_files = []
self._stopped = False
query_text = query.get('query', '')
@@ -287,7 +289,10 @@ class InplaceResultSet(BaseResultSet):
def setup(self):
self._file_list = []
- self._recurse_dir(self._mount_point)
+ self._pending_directories = [self._mount_point]
+ self._visited_directories = []
+ self._pending_files = []
+ gobject.idle_add(self._scan)
def stop(self):
self._stopped = True
@@ -314,7 +319,6 @@ class InplaceResultSet(BaseResultSet):
entries = []
for file_path, stat, mtime_, metadata in files:
if metadata is None:
- # FIXME: the find should fetch metadata
metadata = _get_file_metadata(file_path, stat)
metadata['mountpoint'] = self._mount_point
entries.append(metadata)
@@ -323,62 +327,114 @@ class InplaceResultSet(BaseResultSet):
return entries, total_count
- def _recurse_dir(self, dir_path):
+ def _scan(self):
if self._stopped:
+ return False
+
+ self.progress.send(self)
+
+ if self._pending_files:
+ self._scan_a_file()
+ return True
+
+ if self._pending_directories:
+ self._scan_a_directory()
+ return True
+
+ self.setup_ready()
+ self._visited_directories = []
+ return False
+
+ def _scan_a_file(self):
+ full_path = self._pending_files.pop(0)
+ metadata = None
+
+ try:
+ stat = os.lstat(full_path)
+ except OSError, e:
+ if e.errno != errno.ENOENT:
+ logging.exception(
+ 'Error reading metadata of file %r', full_path)
return
- for entry in os.listdir(dir_path):
- if entry.startswith('.'):
- continue
- full_path = dir_path + '/' + entry
+ if S_IFMT(stat.st_mode) == S_IFLNK:
+ try:
+ link = os.readlink(full_path)
+ except OSError, e:
+ logging.exception(
+ 'Error reading target of link %r', full_path)
+ return
+
+ if not os.path.abspath(link).startswith(self._mount_point):
+ return
+
try:
stat = os.stat(full_path)
- if S_IFMT(stat.st_mode) == S_IFDIR:
- self._pending_directories += 1
- gobject.idle_add(lambda s=full_path: self._recurse_dir(s))
-
- elif S_IFMT(stat.st_mode) == S_IFREG:
- add_to_list = True
- metadata = None
-
- if self._regex is not None and \
- not self._regex.match(full_path):
- add_to_list = False
- metadata = _get_file_metadata_from_json( \
- dir_path, entry, preview=False)
- if metadata is not None:
- for f in ['fulltext', 'title',
- 'description', 'tags']:
- if f in metadata and \
- self._regex.match(metadata[f]):
- add_to_list = True
- break
-
- if None not in [self._date_start, self._date_end] and \
- (stat.st_mtime < self._date_start or
- stat.st_mtime > self._date_end):
- add_to_list = False
-
- if self._mime_types:
- mime_type = gio.content_type_guess(filename=full_path)
- if mime_type not in self._mime_types:
- add_to_list = False
-
- if add_to_list:
- file_info = (full_path, stat, int(stat.st_mtime),
- metadata)
- self._file_list.append(file_info)
-
- self.progress.send(self)
-
- except Exception:
- logging.error('Error reading file %r: %s' % \
- (full_path, traceback.format_exc()))
-
- if self._pending_directories == 0:
- self.setup_ready()
- else:
- self._pending_directories -= 1
+
+ except OSError, e:
+ if e.errno != errno.ENOENT:
+ logging.exception(
+ 'Error reading metadata of linked file %r', full_path)
+ return
+
+ if S_IFMT(stat.st_mode) == S_IFDIR:
+ id_tuple = stat.st_ino, stat.st_dev
+ if not id_tuple in self._visited_directories:
+ self._visited_directories.append(id_tuple)
+ self._pending_directories.append(full_path)
+ return
+
+ if S_IFMT(stat.st_mode) != S_IFREG:
+ return
+
+ if self._regex is not None and \
+ not self._regex.match(full_path):
+ filename = os.path.basename(full_path)
+ dir_path = os.path.dirname(full_path)
+ metadata = _get_file_metadata_from_json( \
+ dir_path, filename, preview=False)
+ add_to_list = False
+ if metadata is not None:
+ for f in ['fulltext', 'title',
+ 'description', 'tags']:
+ if f in metadata and \
+ self._regex.match(metadata[f]):
+ add_to_list = True
+ break
+ if not add_to_list:
+ return
+
+ if self._date_start is not None and stat.st_mtime < self._date_start:
+ return
+
+ if self._date_end is not None and stat.st_mtime > self._date_end:
+ return
+
+ if self._mime_types:
+ mime_type = gio.content_type_guess(filename=full_path)
+ if mime_type not in self._mime_types:
+ return
+
+ file_info = (full_path, stat, int(stat.st_mtime), metadata)
+ self._file_list.append(file_info)
+
+ return
+
+ def _scan_a_directory(self):
+ dir_path = self._pending_directories.pop(0)
+
+ try:
+ entries = os.listdir(dir_path)
+ except OSError, e:
+ if e.errno != errno.EACCES:
+ logging.exception('Error reading directory %r', dir_path)
+ return
+
+ for entry in entries:
+ if entry.startswith('.'):
+ continue
+ self._pending_files.append(dir_path + '/' + entry)
+ return
def _get_file_metadata(path, stat):
"""Returns the metadata from the corresponding file