Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSascha Silbe <sascha-pgp@silbe.org>2013-08-12 16:58:43 (GMT)
committer Sascha Silbe <sascha-pgp@silbe.org>2013-08-12 16:58:43 (GMT)
commit96e12d2411dbf103832fbe3dcdad026f3fcd5180 (patch)
tree889b83c907fde06a7583174dbd8b024156e44e55
parentd376573d35e336aa5d05deb95cd209304357c092 (diff)
Add search support
Add a simple search field accepting Xapian queries. The actual search is done by fsemulation in SearchResultDirectory which will merge the user-provided query with the root query securely. The SearchResultDirectory doesn't persist across queries and PyWebDAV doesn't allow us to add custom response headers to relay the object title as filename. In order to provide a user-friendly name to the browser for saving the file and yet identify the entry uniquely, we encode both the title and the object id in the URL for each search result.
-rwxr-xr-xjournal2webdav86
1 files changed, 79 insertions, 7 deletions
diff --git a/journal2webdav b/journal2webdav
index 9db5042..094b97a 100755
--- a/journal2webdav
+++ b/journal2webdav
@@ -25,7 +25,7 @@ import ssl
import sys
import time
import urllib
-from urlparse import urljoin, urlparse
+from urlparse import urljoin, urlparse, parse_qs
__pychecker__ = 'no-miximport'
try:
@@ -71,6 +71,12 @@ INVALID_XML_CHARS = [unichr(i) for i in range(0, 0x20)
if i not in [0x09, 0x0A, 0x0D]]
CHUNK_SIZE = 65536
+_SEARCH_HTML = """
+<form action="/search/" method="GET">
+Query: <input type="text" name="query" />
+<input type="submit" value="Search" />
+</form>
+"""
log = None
@@ -245,7 +251,7 @@ class ObjectListHtmlResource(BufferResource):
if name == '.':
continue
- lines.append(self._generate_html_entry(name, fs_object))
+ lines.append(self._generate_html_entry(name, fs_object, directory))
lines += ['</table>']
@@ -257,8 +263,22 @@ class ObjectListHtmlResource(BufferResource):
lines += ['</html>']
return '\n'.join(lines)
- def _generate_html_entry(self, name, fs_object):
- url = urllib.quote(name.encode('utf-8'))
+ def _generate_html_entry(self, name, fs_object, directory):
+ if (isinstance(directory, fsemulation.SearchResultDirectory) and
+ isinstance(fs_object, fsemulation.Symlink)):
+ # This directory doesn't persistent across queries, so we
+ # have to encode the object_id in the URI. Since PyWebDAV
+ # doesn't allow us to provide additional headers, we can't
+ # just use the object_id for the path and provide the file
+ # name to the client as a header. The URI must be
+ # specially crafted so a) the client uses the title for
+ # saving and b) the server has access to the object id.
+ object_id = fs_object.target.rsplit('/', 1)[-1]
+ url = ('../searchresult/%s?object_id=%s'
+ % (urllib.quote(name.encode('utf-8')),
+ urllib.quote(object_id.encode('us-ascii'))))
+ else:
+ url = urllib.quote(name.encode('utf-8'))
escaped_name = cgi.escape(name.encode('utf-8'))
if isinstance(fs_object, fsemulation.Directory):
return '<tr><td><a href="%s/">%s/</a></td></tr>' % (url,
@@ -268,6 +288,16 @@ class ObjectListHtmlResource(BufferResource):
escaped_name)
+class SearchPageResource(BufferResource):
+ def __init__(self):
+ super(SearchPageResource, self).__init__()
+ self._buffer = self._generate_html()
+
+ def _generate_html(self):
+ lines = ['<html>', '<head><title>Journal search</title></head>',
+ '<body>', _SEARCH_HTML, '</html>']
+ return '\n'.join(lines)
+
class JournalHandler(dav_interface):
@@ -361,13 +391,40 @@ class JournalHandler(dav_interface):
def get_data(self, uri, byte_range=None):
__pychecker__ = 'no-returnvalues'
log.debug('get_data %r %r', uri, byte_range)
- fs_object = self._lookup_uri(uri)
- if isinstance(fs_object, fsemulation.Directory):
+ path, params_, query = urlparse(str(uri))[2:5]
+ path = unicode(path, 'utf-8').strip('/')
+ if (path + u'/').startswith('search/'):
+ query = unicode(query, 'utf-8')
+ return self._get_data_search(query)
+
+ fs_object = self._lookup_uri(uri)
+ if isinstance(fs_object, fsemulation.RootDirectory):
+ return ObjectListHtmlResource(fs_object, _SEARCH_HTML)
+ elif isinstance(fs_object, fsemulation.Directory):
return ObjectListHtmlResource(fs_object)
else:
return JournalObjectResource(fs_object)
+ def _get_data_search(self, url_query):
+ __pychecker__ = 'no-returnvalues'
+ query = parse_qs(url_query).get('query', [''])[0]
+ if not query:
+ return SearchPageResource()
+
+ if query.startswith('datastore:'):
+ query = self._parse_datastore_uri(query)
+
+ search_dir = self._fs.search({'query': query})
+ try:
+ return ObjectListHtmlResource(search_dir)
+ except ValueError:
+ # FIXME: better error mapping
+ raise DAV_NotFound
+
+ def _parse_datastore_uri(self, query):
+ return 'uid:' + query[10:]
+
def _get_dav_resourcetype(self, uri):
log.debug('_get_dav_resourcetype %r', uri)
fs_object = self._lookup_uri(uri)
@@ -394,6 +451,10 @@ class JournalHandler(dav_interface):
def _get_dav_getcontenttype(self, uri):
log.debug('_get_dav_getcontenttype %r', uri)
+ path = unicode(urlparse(str(uri))[2], 'utf-8').strip('/')
+ if (path + u'/').startswith('search/'):
+ return self._get_dav_getcontenttype_search(path)
+
fs_object = self._lookup_uri(uri)
if isinstance(fs_object, fsemulation.Directory):
return 'text/html; charset=utf-8'
@@ -403,6 +464,9 @@ class JournalHandler(dav_interface):
raise DAV_NotFound
+ def _get_dav_getcontenttype_search(self, path):
+ return 'text/html; charset=utf-8'
+
def get_creationdate(self, uri):
log.debug('get_creationdate %r', uri)
fs_object = self._lookup_uri(uri)
@@ -438,7 +502,15 @@ class JournalHandler(dav_interface):
return isinstance(fs_object, fsemulation.Directory)
def _lookup_uri(self, uri):
- path = unicode(urlparse(str(uri))[2], 'utf-8')
+ parsed_uri = urlparse(str(uri))
+ path = unicode(parsed_uri.path, 'utf-8')
+ query_qs = parse_qs(parsed_uri.query)
+ object_id = query_qs.get('object_id', [''])[0]
+ if path.startswith(u'/searchresult/') and object_id:
+ # The path component is for the client to use as file name
+ # when saving. The query references the server-side object
+ # using the object_id.
+ path = u'/by-id/' + object_id.encode('us-ascii')
return self._lookup_path(path)
def _lookup_path(self, path):