From c7f625e951df6b679092058b267e86c77b9f04a8 Mon Sep 17 00:00:00 2001 From: Simon Schampijer Date: Tue, 14 Dec 2010 19:22:44 +0000 Subject: Make scanning of storage devices more robust OLPC #10140 (James Cameron) --- diff --git a/src/jarabe/journal/model.py b/src/jarabe/journal/model.py index 521cd8b..a93321e 100644 --- a/src/jarabe/journal/model.py +++ b/src/jarabe/journal/model.py @@ -16,12 +16,12 @@ import logging import os +import errno from datetime import datetime import time import shutil import tempfile -from stat import S_IFMT, S_IFDIR, S_IFREG -import traceback +from stat import S_IFLNK, S_IFMT, S_IFDIR, S_IFREG import re import json from gettext import gettext as _ @@ -262,7 +262,9 @@ class InplaceResultSet(BaseResultSet): BaseResultSet.__init__(self, query, cache_limit) self._mount_point = mount_point self._file_list = None - self._pending_directories = 0 + self._pending_directories = [] + self._visited_directories = [] + self._pending_files = [] self._stopped = False query_text = query.get('query', '') @@ -287,7 +289,10 @@ class InplaceResultSet(BaseResultSet): def setup(self): self._file_list = [] - self._recurse_dir(self._mount_point) + self._pending_directories = [self._mount_point] + self._visited_directories = [] + self._pending_files = [] + gobject.idle_add(self._scan) def stop(self): self._stopped = True @@ -314,7 +319,6 @@ class InplaceResultSet(BaseResultSet): entries = [] for file_path, stat, mtime_, metadata in files: if metadata is None: - # FIXME: the find should fetch metadata metadata = _get_file_metadata(file_path, stat) metadata['mountpoint'] = self._mount_point entries.append(metadata) @@ -323,62 +327,114 @@ class InplaceResultSet(BaseResultSet): return entries, total_count - def _recurse_dir(self, dir_path): + def _scan(self): if self._stopped: + return False + + self.progress.send(self) + + if self._pending_files: + self._scan_a_file() + return True + + if self._pending_directories: + self._scan_a_directory() + return True + + self.setup_ready() + self._visited_directories = [] + return False + + def _scan_a_file(self): + full_path = self._pending_files.pop(0) + metadata = None + + try: + stat = os.lstat(full_path) + except OSError, e: + if e.errno != errno.ENOENT: + logging.exception( + 'Error reading metadata of file %r', full_path) return - for entry in os.listdir(dir_path): - if entry.startswith('.'): - continue - full_path = dir_path + '/' + entry + if S_IFMT(stat.st_mode) == S_IFLNK: + try: + link = os.readlink(full_path) + except OSError, e: + logging.exception( + 'Error reading target of link %r', full_path) + return + + if not os.path.abspath(link).startswith(self._mount_point): + return + try: stat = os.stat(full_path) - if S_IFMT(stat.st_mode) == S_IFDIR: - self._pending_directories += 1 - gobject.idle_add(lambda s=full_path: self._recurse_dir(s)) - - elif S_IFMT(stat.st_mode) == S_IFREG: - add_to_list = True - metadata = None - - if self._regex is not None and \ - not self._regex.match(full_path): - add_to_list = False - metadata = _get_file_metadata_from_json( \ - dir_path, entry, preview=False) - if metadata is not None: - for f in ['fulltext', 'title', - 'description', 'tags']: - if f in metadata and \ - self._regex.match(metadata[f]): - add_to_list = True - break - - if None not in [self._date_start, self._date_end] and \ - (stat.st_mtime < self._date_start or - stat.st_mtime > self._date_end): - add_to_list = False - - if self._mime_types: - mime_type = gio.content_type_guess(filename=full_path) - if mime_type not in self._mime_types: - add_to_list = False - - if add_to_list: - file_info = (full_path, stat, int(stat.st_mtime), - metadata) - self._file_list.append(file_info) - - self.progress.send(self) - - except Exception: - logging.error('Error reading file %r: %s' % \ - (full_path, traceback.format_exc())) - - if self._pending_directories == 0: - self.setup_ready() - else: - self._pending_directories -= 1 + + except OSError, e: + if e.errno != errno.ENOENT: + logging.exception( + 'Error reading metadata of linked file %r', full_path) + return + + if S_IFMT(stat.st_mode) == S_IFDIR: + id_tuple = stat.st_ino, stat.st_dev + if not id_tuple in self._visited_directories: + self._visited_directories.append(id_tuple) + self._pending_directories.append(full_path) + return + + if S_IFMT(stat.st_mode) != S_IFREG: + return + + if self._regex is not None and \ + not self._regex.match(full_path): + filename = os.path.basename(full_path) + dir_path = os.path.dirname(full_path) + metadata = _get_file_metadata_from_json( \ + dir_path, filename, preview=False) + add_to_list = False + if metadata is not None: + for f in ['fulltext', 'title', + 'description', 'tags']: + if f in metadata and \ + self._regex.match(metadata[f]): + add_to_list = True + break + if not add_to_list: + return + + if self._date_start is not None and stat.st_mtime < self._date_start: + return + + if self._date_end is not None and stat.st_mtime > self._date_end: + return + + if self._mime_types: + mime_type = gio.content_type_guess(filename=full_path) + if mime_type not in self._mime_types: + return + + file_info = (full_path, stat, int(stat.st_mtime), metadata) + self._file_list.append(file_info) + + return + + def _scan_a_directory(self): + dir_path = self._pending_directories.pop(0) + + try: + entries = os.listdir(dir_path) + except OSError, e: + if e.errno != errno.EACCES: + logging.exception('Error reading directory %r', dir_path) + return + + for entry in entries: + if entry.startswith('.'): + continue + self._pending_files.append(dir_path + '/' + entry) + return def _get_file_metadata(path, stat): """Returns the metadata from the corresponding file -- cgit v0.9.1