Add search support

Add a simple search field accepting Xapian queries. The actual search is done by fsemulation in SearchResultDirectory which will merge the user-provided query with the root query securely. The SearchResultDirectory doesn't persist across queries and PyWebDAV doesn't allow us to add custom response headers to relay the object title as filename. In order to provide a user-friendly name to the browser for saving the file and yet identify the entry uniquely, we encode both the title and the object id in the URL for each search result.
author: Sascha Silbe <sascha-pgp@silbe.org> 2013-08-12 16:58:43 (GMT)
committer: Sascha Silbe <sascha-pgp@silbe.org> 2013-08-12 16:58:43 (GMT)
commit: 96e12d2411dbf103832fbe3dcdad026f3fcd5180 (patch)
tree: 889b83c907fde06a7583174dbd8b024156e44e55
parent: d376573d35e336aa5d05deb95cd209304357c092 (diff)
1 files changed, 79 insertions, 7 deletions
diff --git a/journal2webdav b/journal2webdav
index 9db5042..094b97a 100755
--- a/journal2webdav
+++ b/journal2webdav
@@ -25,7 +25,7 @@ import ssl
 import sys
 import time
 import urllib
-from urlparse import urljoin, urlparse
+from urlparse import urljoin, urlparse, parse_qs
 
 __pychecker__ = 'no-miximport'
 try:
@@ -71,6 +71,12 @@ INVALID_XML_CHARS = [unichr(i) for i in range(0, 0x20)
                      if i not in [0x09, 0x0A, 0x0D]]
 CHUNK_SIZE = 65536
 
+_SEARCH_HTML = """
+<form action="/search/" method="GET">
+Query: <input type="text" name="query" />
+<input type="submit" value="Search" />
+</form>
+"""
 
 log = None
 
@@ -245,7 +251,7 @@ class ObjectListHtmlResource(BufferResource):
             if name == '.':
                 continue
 
-            lines.append(self._generate_html_entry(name, fs_object))
+            lines.append(self._generate_html_entry(name, fs_object, directory))
 
         lines += ['</table>']
 
@@ -257,8 +263,22 @@ class ObjectListHtmlResource(BufferResource):
         lines += ['</html>']
         return '\n'.join(lines)
 
-    def _generate_html_entry(self, name, fs_object):
-        url = urllib.quote(name.encode('utf-8'))
+    def _generate_html_entry(self, name, fs_object, directory):
+        if (isinstance(directory, fsemulation.SearchResultDirectory) and
+            isinstance(fs_object, fsemulation.Symlink)):
+            # This directory doesn't persistent across queries, so we
+            # have to encode the object_id in the URI. Since PyWebDAV
+            # doesn't allow us to provide additional headers, we can't
+            # just use the object_id for the path and provide the file
+            # name to the client as a header. The URI must be
+            # specially crafted so a) the client uses the title for
+            # saving and b) the server has access to the object id.
+            object_id = fs_object.target.rsplit('/', 1)[-1]
+            url = ('../searchresult/%s?object_id=%s'
+                   % (urllib.quote(name.encode('utf-8')),
+                      urllib.quote(object_id.encode('us-ascii'))))
+        else:
+            url = urllib.quote(name.encode('utf-8'))
         escaped_name = cgi.escape(name.encode('utf-8'))
         if isinstance(fs_object, fsemulation.Directory):
             return '<tr><td><a href="%s/">%s/</a></td></tr>' % (url,
@@ -268,6 +288,16 @@ class ObjectListHtmlResource(BufferResource):
                                                               escaped_name)
 
 
+class SearchPageResource(BufferResource):
+    def __init__(self):
+        super(SearchPageResource, self).__init__()
+        self._buffer = self._generate_html()
+
+    def _generate_html(self):
+        lines = ['<html>', '<head><title>Journal search</title></head>',
+                 '<body>', _SEARCH_HTML, '</html>']
+        return '\n'.join(lines)
+
 
 class JournalHandler(dav_interface):
 
@@ -361,13 +391,40 @@ class JournalHandler(dav_interface):
     def get_data(self, uri, byte_range=None):
         __pychecker__ = 'no-returnvalues'
         log.debug('get_data %r %r', uri, byte_range)
-        fs_object = self._lookup_uri(uri)
 
-        if isinstance(fs_object, fsemulation.Directory):
+        path, params_, query = urlparse(str(uri))[2:5]
+        path = unicode(path, 'utf-8').strip('/')
+        if (path + u'/').startswith('search/'):
+            query = unicode(query, 'utf-8')
+            return self._get_data_search(query)
+
+        fs_object = self._lookup_uri(uri)
+        if isinstance(fs_object, fsemulation.RootDirectory):
+            return ObjectListHtmlResource(fs_object, _SEARCH_HTML)
+        elif isinstance(fs_object, fsemulation.Directory):
             return ObjectListHtmlResource(fs_object)
         else:
             return JournalObjectResource(fs_object)
 
+    def _get_data_search(self, url_query):
+        __pychecker__ = 'no-returnvalues'
+        query = parse_qs(url_query).get('query', [''])[0]
+        if not query:
+            return SearchPageResource()
+
+        if query.startswith('datastore:'):
+            query = self._parse_datastore_uri(query)
+
+        search_dir = self._fs.search({'query': query})
+        try:
+            return ObjectListHtmlResource(search_dir)
+        except ValueError:
+            # FIXME: better error mapping
+            raise DAV_NotFound
+
+    def _parse_datastore_uri(self, query):
+        return 'uid:' + query[10:]
+
     def _get_dav_resourcetype(self, uri):
         log.debug('_get_dav_resourcetype %r', uri)
         fs_object = self._lookup_uri(uri)
@@ -394,6 +451,10 @@ class JournalHandler(dav_interface):
 
     def _get_dav_getcontenttype(self, uri):
         log.debug('_get_dav_getcontenttype %r', uri)
+        path = unicode(urlparse(str(uri))[2], 'utf-8').strip('/')
+        if (path + u'/').startswith('search/'):
+            return self._get_dav_getcontenttype_search(path)
+
         fs_object = self._lookup_uri(uri)
         if isinstance(fs_object, fsemulation.Directory):
             return 'text/html; charset=utf-8'
@@ -403,6 +464,9 @@ class JournalHandler(dav_interface):
 
         raise DAV_NotFound
 
+    def _get_dav_getcontenttype_search(self, path):
+        return 'text/html; charset=utf-8'
+
     def get_creationdate(self, uri):
         log.debug('get_creationdate %r', uri)
         fs_object = self._lookup_uri(uri)
@@ -438,7 +502,15 @@ class JournalHandler(dav_interface):
         return isinstance(fs_object, fsemulation.Directory)
 
     def _lookup_uri(self, uri):
-        path = unicode(urlparse(str(uri))[2], 'utf-8')
+        parsed_uri = urlparse(str(uri))
+        path = unicode(parsed_uri.path, 'utf-8')
+        query_qs = parse_qs(parsed_uri.query)
+        object_id = query_qs.get('object_id', [''])[0]
+        if path.startswith(u'/searchresult/') and object_id:
+            # The path component is for the client to use as file name
+            # when saving. The query references the server-side object
+            # using the object_id.
+            path = u'/by-id/' + object_id.encode('us-ascii')
         return self._lookup_path(path)
 
     def _lookup_path(self, path):
author	Sascha Silbe <sascha-pgp@silbe.org>	2013-08-12 16:58:43 (GMT)
committer	Sascha Silbe <sascha-pgp@silbe.org>	2013-08-12 16:58:43 (GMT)
commit	96e12d2411dbf103832fbe3dcdad026f3fcd5180 (patch)
tree	889b83c907fde06a7583174dbd8b024156e44e55
parent	d376573d35e336aa5d05deb95cd209304357c092 (diff)