diff options
author | Sascha Silbe <sascha-pgp@silbe.org> | 2013-08-12 16:58:43 (GMT) |
---|---|---|
committer | Sascha Silbe <sascha-pgp@silbe.org> | 2013-08-12 16:58:43 (GMT) |
commit | 96e12d2411dbf103832fbe3dcdad026f3fcd5180 (patch) | |
tree | 889b83c907fde06a7583174dbd8b024156e44e55 | |
parent | d376573d35e336aa5d05deb95cd209304357c092 (diff) |
Add search support
Add a simple search field accepting Xapian queries. The actual search
is done by fsemulation in SearchResultDirectory which will merge the
user-provided query with the root query securely.
The SearchResultDirectory doesn't persist across queries and PyWebDAV
doesn't allow us to add custom response headers to relay the object
title as filename. In order to provide a user-friendly name to the
browser for saving the file and yet identify the entry uniquely, we
encode both the title and the object id in the URL for each search
result.
-rwxr-xr-x | journal2webdav | 86 |
1 files changed, 79 insertions, 7 deletions
diff --git a/journal2webdav b/journal2webdav index 9db5042..094b97a 100755 --- a/journal2webdav +++ b/journal2webdav @@ -25,7 +25,7 @@ import ssl import sys import time import urllib -from urlparse import urljoin, urlparse +from urlparse import urljoin, urlparse, parse_qs __pychecker__ = 'no-miximport' try: @@ -71,6 +71,12 @@ INVALID_XML_CHARS = [unichr(i) for i in range(0, 0x20) if i not in [0x09, 0x0A, 0x0D]] CHUNK_SIZE = 65536 +_SEARCH_HTML = """ +<form action="/search/" method="GET"> +Query: <input type="text" name="query" /> +<input type="submit" value="Search" /> +</form> +""" log = None @@ -245,7 +251,7 @@ class ObjectListHtmlResource(BufferResource): if name == '.': continue - lines.append(self._generate_html_entry(name, fs_object)) + lines.append(self._generate_html_entry(name, fs_object, directory)) lines += ['</table>'] @@ -257,8 +263,22 @@ class ObjectListHtmlResource(BufferResource): lines += ['</html>'] return '\n'.join(lines) - def _generate_html_entry(self, name, fs_object): - url = urllib.quote(name.encode('utf-8')) + def _generate_html_entry(self, name, fs_object, directory): + if (isinstance(directory, fsemulation.SearchResultDirectory) and + isinstance(fs_object, fsemulation.Symlink)): + # This directory doesn't persistent across queries, so we + # have to encode the object_id in the URI. Since PyWebDAV + # doesn't allow us to provide additional headers, we can't + # just use the object_id for the path and provide the file + # name to the client as a header. The URI must be + # specially crafted so a) the client uses the title for + # saving and b) the server has access to the object id. + object_id = fs_object.target.rsplit('/', 1)[-1] + url = ('../searchresult/%s?object_id=%s' + % (urllib.quote(name.encode('utf-8')), + urllib.quote(object_id.encode('us-ascii')))) + else: + url = urllib.quote(name.encode('utf-8')) escaped_name = cgi.escape(name.encode('utf-8')) if isinstance(fs_object, fsemulation.Directory): return '<tr><td><a href="%s/">%s/</a></td></tr>' % (url, @@ -268,6 +288,16 @@ class ObjectListHtmlResource(BufferResource): escaped_name) +class SearchPageResource(BufferResource): + def __init__(self): + super(SearchPageResource, self).__init__() + self._buffer = self._generate_html() + + def _generate_html(self): + lines = ['<html>', '<head><title>Journal search</title></head>', + '<body>', _SEARCH_HTML, '</html>'] + return '\n'.join(lines) + class JournalHandler(dav_interface): @@ -361,13 +391,40 @@ class JournalHandler(dav_interface): def get_data(self, uri, byte_range=None): __pychecker__ = 'no-returnvalues' log.debug('get_data %r %r', uri, byte_range) - fs_object = self._lookup_uri(uri) - if isinstance(fs_object, fsemulation.Directory): + path, params_, query = urlparse(str(uri))[2:5] + path = unicode(path, 'utf-8').strip('/') + if (path + u'/').startswith('search/'): + query = unicode(query, 'utf-8') + return self._get_data_search(query) + + fs_object = self._lookup_uri(uri) + if isinstance(fs_object, fsemulation.RootDirectory): + return ObjectListHtmlResource(fs_object, _SEARCH_HTML) + elif isinstance(fs_object, fsemulation.Directory): return ObjectListHtmlResource(fs_object) else: return JournalObjectResource(fs_object) + def _get_data_search(self, url_query): + __pychecker__ = 'no-returnvalues' + query = parse_qs(url_query).get('query', [''])[0] + if not query: + return SearchPageResource() + + if query.startswith('datastore:'): + query = self._parse_datastore_uri(query) + + search_dir = self._fs.search({'query': query}) + try: + return ObjectListHtmlResource(search_dir) + except ValueError: + # FIXME: better error mapping + raise DAV_NotFound + + def _parse_datastore_uri(self, query): + return 'uid:' + query[10:] + def _get_dav_resourcetype(self, uri): log.debug('_get_dav_resourcetype %r', uri) fs_object = self._lookup_uri(uri) @@ -394,6 +451,10 @@ class JournalHandler(dav_interface): def _get_dav_getcontenttype(self, uri): log.debug('_get_dav_getcontenttype %r', uri) + path = unicode(urlparse(str(uri))[2], 'utf-8').strip('/') + if (path + u'/').startswith('search/'): + return self._get_dav_getcontenttype_search(path) + fs_object = self._lookup_uri(uri) if isinstance(fs_object, fsemulation.Directory): return 'text/html; charset=utf-8' @@ -403,6 +464,9 @@ class JournalHandler(dav_interface): raise DAV_NotFound + def _get_dav_getcontenttype_search(self, path): + return 'text/html; charset=utf-8' + def get_creationdate(self, uri): log.debug('get_creationdate %r', uri) fs_object = self._lookup_uri(uri) @@ -438,7 +502,15 @@ class JournalHandler(dav_interface): return isinstance(fs_object, fsemulation.Directory) def _lookup_uri(self, uri): - path = unicode(urlparse(str(uri))[2], 'utf-8') + parsed_uri = urlparse(str(uri)) + path = unicode(parsed_uri.path, 'utf-8') + query_qs = parse_qs(parsed_uri.query) + object_id = query_qs.get('object_id', [''])[0] + if path.startswith(u'/searchresult/') and object_id: + # The path component is for the client to use as file name + # when saving. The query references the server-side object + # using the object_id. + path = u'/by-id/' + object_id.encode('us-ascii') return self._lookup_path(path) def _lookup_path(self, path): |