From f1eca6f36024cff7cc4a61cfb2a52eae219b0d17 Mon Sep 17 00:00:00 2001 From: James Cameron Date: Tue, 30 Nov 2010 02:08:40 +0000 Subject: fix journal scan of external media, dev.laptop.org #10140 Update the progress bar regularly and prevent the UI from stalling during a scan. Avoid following recursive symlinks, and symlinks that point outside the filesystem being scanned. Do not check for MIME type if file is excluded for other filter reasons. Do not report permission denied errors. Tested on Sugar 0.84 using OLPC OS 10.1.2. Reviewed-By: Sascha Silbe --- diff --git a/src/jarabe/journal/model.py b/src/jarabe/journal/model.py index f4186f0..112f097 100644 --- a/src/jarabe/journal/model.py +++ b/src/jarabe/journal/model.py @@ -16,10 +16,11 @@ import logging import os +import errno from datetime import datetime import time import shutil -from stat import S_IFMT, S_IFDIR, S_IFREG +from stat import S_IFLNK, S_IFMT, S_IFDIR, S_IFREG import traceback import re @@ -258,7 +259,9 @@ class InplaceResultSet(BaseResultSet): BaseResultSet.__init__(self, query, cache_limit) self._mount_point = mount_point self._file_list = None - self._pending_directories = 0 + self._pending_directories = [] + self._visited_directories = [] + self._pending_files = [] self._stopped = False query_text = query.get('query', '') @@ -283,7 +286,10 @@ class InplaceResultSet(BaseResultSet): def setup(self): self._file_list = [] - self._recurse_dir(self._mount_point) + self._pending_directories = [self._mount_point] + self._visited_directories = [] + self._pending_files = [] + gobject.idle_add(self._scan) def stop(self): self._stopped = True @@ -317,51 +323,100 @@ class InplaceResultSet(BaseResultSet): return entries, total_count - def _recurse_dir(self, dir_path): + def _scan(self): if self._stopped: + return False + + self.progress.send(self) + + if self._pending_files: + self._scan_a_file() + return True + + if self._pending_directories: + self._scan_a_directory() + return True + + self.setup_ready() + self._visited_directories = [] + return False + + def _scan_a_file(self): + full_path = self._pending_files.pop(0) + + try: + stat = os.lstat(full_path) + except OSError, e: + if e.errno != errno.ENOENT: + logging.exception( + 'Error reading metadata of file %r', full_path) return - for entry in os.listdir(dir_path): - if entry.startswith('.'): - continue - full_path = dir_path + '/' + entry + if S_IFMT(stat.st_mode) == S_IFLNK: + try: + link = os.readlink(full_path) + except OSError, e: + logging.exception( + 'Error reading target of link %r', full_path) + return + + if not os.path.abspath(link).startswith(self._mount_point): + return + try: stat = os.stat(full_path) - if S_IFMT(stat.st_mode) == S_IFDIR: - self._pending_directories += 1 - gobject.idle_add(lambda s=full_path: self._recurse_dir(s)) - elif S_IFMT(stat.st_mode) == S_IFREG: - add_to_list = True + except OSError, e: + if e.errno != errno.ENOENT: + logging.exception( + 'Error reading metadata of linked file %r', full_path) + return + + if S_IFMT(stat.st_mode) == S_IFDIR: + id_tuple = stat.st_ino, stat.st_dev + if not id_tuple in self._visited_directories: + self._visited_directories.append(id_tuple) + self._pending_directories.append(full_path) + return + + if S_IFMT(stat.st_mode) != S_IFREG: + return - if self._regex is not None and \ - not self._regex.match(full_path): - add_to_list = False + if self._regex is not None and \ + not self._regex.match(full_path): + return - if None not in [self._date_start, self._date_end] and \ - (stat.st_mtime < self._date_start or - stat.st_mtime > self._date_end): - add_to_list = False + if self._date_start is not None and self.st_mtime < self._date_start: + return - if self._mime_types: - mime_type = gio.content_type_guess(filename=full_path) - if mime_type not in self._mime_types: - add_to_list = False + if self._date_end is not None and self.st_mtime > self._date_end: + return - if add_to_list: - file_info = (full_path, stat, int(stat.st_mtime)) - self._file_list.append(file_info) + if self._mime_types: + mime_type = gio.content_type_guess(filename=full_path) + if mime_type not in self._mime_types: + return - self.progress.send(self) + file_info = (full_path, stat, int(stat.st_mtime)) + self._file_list.append(file_info) - except Exception: - logging.error('Error reading file %r: %s' % \ - (full_path, traceback.format_exc())) + return - if self._pending_directories == 0: - self.setup_ready() - else: - self._pending_directories -= 1 + def _scan_a_directory(self): + dir_path = self._pending_directories.pop(0) + + try: + entries = os.listdir(dir_path) + except OSError, e: + if e.errno != errno.EACCES: + logging.exception('Error reading directory %r', dir_path) + return + + for entry in entries: + if entry.startswith('.'): + continue + self._pending_files.append(dir_path + '/' + entry) + return def _get_file_metadata(path, stat): client = gconf.client_get_default() -- cgit v0.9.1