From 96e12d2411dbf103832fbe3dcdad026f3fcd5180 Mon Sep 17 00:00:00 2001 From: Sascha Silbe Date: Mon, 12 Aug 2013 16:58:43 +0000 Subject: Add search support Add a simple search field accepting Xapian queries. The actual search is done by fsemulation in SearchResultDirectory which will merge the user-provided query with the root query securely. The SearchResultDirectory doesn't persist across queries and PyWebDAV doesn't allow us to add custom response headers to relay the object title as filename. In order to provide a user-friendly name to the browser for saving the file and yet identify the entry uniquely, we encode both the title and the object id in the URL for each search result. --- diff --git a/journal2webdav b/journal2webdav index 9db5042..094b97a 100755 --- a/journal2webdav +++ b/journal2webdav @@ -25,7 +25,7 @@ import ssl import sys import time import urllib -from urlparse import urljoin, urlparse +from urlparse import urljoin, urlparse, parse_qs __pychecker__ = 'no-miximport' try: @@ -71,6 +71,12 @@ INVALID_XML_CHARS = [unichr(i) for i in range(0, 0x20) if i not in [0x09, 0x0A, 0x0D]] CHUNK_SIZE = 65536 +_SEARCH_HTML = """ +
+Query: + +
+""" log = None @@ -245,7 +251,7 @@ class ObjectListHtmlResource(BufferResource): if name == '.': continue - lines.append(self._generate_html_entry(name, fs_object)) + lines.append(self._generate_html_entry(name, fs_object, directory)) lines += [''] @@ -257,8 +263,22 @@ class ObjectListHtmlResource(BufferResource): lines += [''] return '\n'.join(lines) - def _generate_html_entry(self, name, fs_object): - url = urllib.quote(name.encode('utf-8')) + def _generate_html_entry(self, name, fs_object, directory): + if (isinstance(directory, fsemulation.SearchResultDirectory) and + isinstance(fs_object, fsemulation.Symlink)): + # This directory doesn't persistent across queries, so we + # have to encode the object_id in the URI. Since PyWebDAV + # doesn't allow us to provide additional headers, we can't + # just use the object_id for the path and provide the file + # name to the client as a header. The URI must be + # specially crafted so a) the client uses the title for + # saving and b) the server has access to the object id. + object_id = fs_object.target.rsplit('/', 1)[-1] + url = ('../searchresult/%s?object_id=%s' + % (urllib.quote(name.encode('utf-8')), + urllib.quote(object_id.encode('us-ascii')))) + else: + url = urllib.quote(name.encode('utf-8')) escaped_name = cgi.escape(name.encode('utf-8')) if isinstance(fs_object, fsemulation.Directory): return '%s/' % (url, @@ -268,6 +288,16 @@ class ObjectListHtmlResource(BufferResource): escaped_name) +class SearchPageResource(BufferResource): + def __init__(self): + super(SearchPageResource, self).__init__() + self._buffer = self._generate_html() + + def _generate_html(self): + lines = ['', 'Journal search', + '', _SEARCH_HTML, ''] + return '\n'.join(lines) + class JournalHandler(dav_interface): @@ -361,13 +391,40 @@ class JournalHandler(dav_interface): def get_data(self, uri, byte_range=None): __pychecker__ = 'no-returnvalues' log.debug('get_data %r %r', uri, byte_range) - fs_object = self._lookup_uri(uri) - if isinstance(fs_object, fsemulation.Directory): + path, params_, query = urlparse(str(uri))[2:5] + path = unicode(path, 'utf-8').strip('/') + if (path + u'/').startswith('search/'): + query = unicode(query, 'utf-8') + return self._get_data_search(query) + + fs_object = self._lookup_uri(uri) + if isinstance(fs_object, fsemulation.RootDirectory): + return ObjectListHtmlResource(fs_object, _SEARCH_HTML) + elif isinstance(fs_object, fsemulation.Directory): return ObjectListHtmlResource(fs_object) else: return JournalObjectResource(fs_object) + def _get_data_search(self, url_query): + __pychecker__ = 'no-returnvalues' + query = parse_qs(url_query).get('query', [''])[0] + if not query: + return SearchPageResource() + + if query.startswith('datastore:'): + query = self._parse_datastore_uri(query) + + search_dir = self._fs.search({'query': query}) + try: + return ObjectListHtmlResource(search_dir) + except ValueError: + # FIXME: better error mapping + raise DAV_NotFound + + def _parse_datastore_uri(self, query): + return 'uid:' + query[10:] + def _get_dav_resourcetype(self, uri): log.debug('_get_dav_resourcetype %r', uri) fs_object = self._lookup_uri(uri) @@ -394,6 +451,10 @@ class JournalHandler(dav_interface): def _get_dav_getcontenttype(self, uri): log.debug('_get_dav_getcontenttype %r', uri) + path = unicode(urlparse(str(uri))[2], 'utf-8').strip('/') + if (path + u'/').startswith('search/'): + return self._get_dav_getcontenttype_search(path) + fs_object = self._lookup_uri(uri) if isinstance(fs_object, fsemulation.Directory): return 'text/html; charset=utf-8' @@ -403,6 +464,9 @@ class JournalHandler(dav_interface): raise DAV_NotFound + def _get_dav_getcontenttype_search(self, path): + return 'text/html; charset=utf-8' + def get_creationdate(self, uri): log.debug('get_creationdate %r', uri) fs_object = self._lookup_uri(uri) @@ -438,7 +502,15 @@ class JournalHandler(dav_interface): return isinstance(fs_object, fsemulation.Directory) def _lookup_uri(self, uri): - path = unicode(urlparse(str(uri))[2], 'utf-8') + parsed_uri = urlparse(str(uri)) + path = unicode(parsed_uri.path, 'utf-8') + query_qs = parse_qs(parsed_uri.query) + object_id = query_qs.get('object_id', [''])[0] + if path.startswith(u'/searchresult/') and object_id: + # The path component is for the client to use as file name + # when saving. The query references the server-side object + # using the object_id. + path = u'/by-id/' + object_id.encode('us-ascii') return self._lookup_path(path) def _lookup_path(self, path): -- cgit v0.9.1