Journal listing

#!/usr/bin/env python # # Author: Sascha Silbe (OpenPGP signed mails only) # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 3 # as published by the Free Software Foundation. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . from BaseHTTPServer import HTTPServer from SocketServer import ThreadingMixIn import cgi import cStringIO as StringIO import gzip import logging import optparse import os import ssl import sys import time import urllib from urlparse import urljoin, urlparse, parse_qs __pychecker__ = 'no-miximport' try: from pywebdav.lib import propfind except ImportError: # pywebdav < 0.9.8 __pychecker__ = 'no-reimport' from DAV import propfind from DAV.constants import COLLECTION, OBJECT from DAV.iface import dav_interface from DAV.errors import DAV_Error, DAV_NotFound, DAV_Requested_Range_Not_Satisfiable from DAVServer.fileauth import DAVAuthHandler from xml.parsers.expat import ExpatError _PYWEBDAV_BUGS = set(['PROPFIND_NS', 'ALLPROP_RECURSE']) else: from pywebdav.lib.constants import COLLECTION, OBJECT from pywebdav.lib.iface import dav_interface from pywebdav.lib.errors import DAV_Error, DAV_NotFound, DAV_Requested_Range_Not_Satisfiable from pywebdav.lib.WebDAVServer import DAVRequestHandler as DAVAuthHandler __pychecker__ = 'no-reimport' from xml.parsers.expat import ExpatError _PYWEBDAV_BUGS = set(['ALLPROP_RECURSE', 'HTTP10_KEEPALIVE']) if 'ALLPROP_RECURSE' in _PYWEBDAV_BUGS: import xml.dom.minidom domimpl = xml.dom.minidom.getDOMImplementation() # from sugar.logger import trace import fsemulation DS_DBUS_SERVICE = 'org.laptop.sugar.DataStore' DS_DBUS_INTERFACE1 = 'org.laptop.sugar.DataStore' DS_DBUS_PATH1 = '/org/laptop/sugar/DataStore' DS_DBUS_INTERFACE2 = 'org.laptop.sugar.DataStore2' DS_DBUS_PATH2 = '/org/laptop/sugar/DataStore2' SUGAR_NS = 'http://people.sugarlabs.org/silbe/webdavns/sugar' #SCHEMA_NS = 'http://www.w3.org/2001/XMLSchema' INVALID_XML_CHARS = [unichr(i) for i in range(0, 0x20) if i not in [0x09, 0x0A, 0x0D]] CHUNK_SIZE = 65536 _SEARCH_HTML = """ """ log = None class ThreadedHTTPServer(ThreadingMixIn, HTTPServer): """Handle requests in a separate thread.""" class PROPFIND(propfind.PROPFIND): __pychecker__ = 'no-override' if 'PROPFIND_NS' in _PYWEBDAV_BUGS: # pylint: disable=C0324,C0322 def mk_propname_response(self,uri,propnames,doc): # copy of original, but with bug fix for multiple namespaces re=doc.createElement("D:response") # write href information uparts=urlparse(uri) fileloc=uparts[2] href=doc.createElement("D:href") huri=doc.createTextNode(uparts[0]+'://'+'/'.join(uparts[1:2]) + urllib.quote(fileloc)) href.appendChild(huri) re.appendChild(href) ps=doc.createElement("D:propstat") nsnum=0 for ns,plist in propnames.items(): # write prop element pr=doc.createElement("D:prop") nsp="ns"+str(nsnum) pr.setAttribute("xmlns:"+nsp,ns) nsnum=nsnum+1 # write propertynames for p in plist: pe=doc.createElement(nsp+":"+p) pr.appendChild(pe) ps.appendChild(pr) re.appendChild(ps) return re if 'ALLPROP_RECURSE' in _PYWEBDAV_BUGS: # work-around for Debian#710690 def create_allprop(self): return self.create_prop(True) def create_prop(self, allprop=False): # create the document generator doc = domimpl.createDocument(None, "multistatus", None) ms = doc.documentElement ms.setAttribute("xmlns:D", "DAV:") ms.tagName = 'D:multistatus' if self._depth == "0": if allprop: self.proplist = self._dataclass.get_propnames(self._uri) self.namespaces = self.proplist.keys() gp, bp = self.get_propvalues(self._uri) res = self.mk_prop_response(self._uri, gp, bp, doc) ms.appendChild(res) elif self._depth == "1": if allprop: self.proplist = self._dataclass.get_propnames(self._uri) self.namespaces = self.proplist.keys() gp, bp = self.get_propvalues(self._uri) res = self.mk_prop_response(self._uri, gp, bp, doc) ms.appendChild(res) for newuri in self._dataclass.get_childs(self._uri): if allprop: self.proplist = self._dataclass.get_propnames(newuri) self.namespaces = self.proplist.keys() gp, bp = self.get_propvalues(newuri) res = self.mk_prop_response(newuri, gp, bp, doc) ms.appendChild(res) elif self._depth == 'infinity': uri_list = [self._uri] while uri_list: uri = uri_list.pop() if allprop: self.proplist = self._dataclass.get_propnames(uri) self.namespaces = self.proplist.keys() gp, bp = self.get_propvalues(uri) res = self.mk_prop_response(uri, gp, bp, doc) ms.appendChild(res) uri_childs = self._dataclass.get_childs(uri) if uri_childs: uri_list.extend(uri_childs) return doc.toxml(encoding="utf-8") class WebdavResource(object): def __len__(self): raise NotImplementedError('Not implemented by subclass') def __iter__(self): raise NotImplementedError('Not implemented by subclass') def read(self, length=0): __pychecker__ = 'no-argsused' raise NotImplementedError('Not implemented by subclass') class JournalObjectResource(WebdavResource): def __init__(self, fs_object): path = fs_object.get_data() if path: self._file = file(path) os.remove(path) self._size = os.fstat(self._file.fileno()).st_size else: self._file = None self._size = 0 def __len__(self): return self._size def __iter__(self): while self._size: data = self._file.read(CHUNK_SIZE) if not data: break yield data if self._file is not None: self._file.close() self._file = None def read(self, length=0): return self._file.read(length or self._size) class BufferResource(WebdavResource): def __init__(self): self._position = 0 self._buffer = '' def __len__(self): return len(self._buffer) def __iter__(self): while self._position < len(self._buffer): yield self._buffer[self._position:self._position + CHUNK_SIZE] self._position += CHUNK_SIZE def read(self, length=0): old_position = self._position self._position += length or len(self._buffer) return self._buffer[old_position:self._position] class ObjectListHtmlResource(BufferResource): def __init__(self, directory, footer=None): super(ObjectListHtmlResource, self).__init__() self._buffer = self._generate_html(directory, footer) def _generate_html(self, directory, footer=None): lines = ['', 'Journal listing', '','', ''] for name, fs_object in directory.readdir(): if name == '.': continue lines.append(self._generate_html_entry(name, fs_object, directory)) lines += ['

Name	Details

'] if isinstance(footer, list): lines += footer elif isinstance(footer, basestring): lines += [footer] lines += [''] return '\n'.join(lines) def _generate_html_entry(self, name, fs_object, directory): if (isinstance(directory, fsemulation.SearchResultDirectory) and isinstance(fs_object, fsemulation.Symlink)): # This directory doesn't persistent across queries, so we # have to encode the object_id in the URI. Since PyWebDAV # doesn't allow us to provide additional headers, we can't # just use the object_id for the path and provide the file # name to the client as a header. The URI must be # specially crafted so a) the client uses the title for # saving and b) the server has access to the object id. object_id = fs_object.object_id object_url = ('../searchresult/%s?object_id=%s' % (urllib.quote(name.encode('utf-8')), urllib.quote(object_id.encode('us-ascii')))) details_url = ''.join(['../' * directory.level, 'details/', urllib.quote(object_id.encode('us-ascii'))]) elif isinstance(fs_object, fsemulation.Directory): object_url = urllib.quote(name.encode('utf-8')) details_url = '' else: object_id = fs_object.object_id object_url = urllib.quote(name.encode('utf-8')) details_url = ''.join(['../' * directory.level, 'details/', urllib.quote(object_id.encode('us-ascii'))]) escaped_name = cgi.escape(name.encode('utf-8')) if isinstance(fs_object, fsemulation.Directory): return ('%s/' % (object_url, escaped_name)) else: return ('%s Show' ' details' % (object_url, escaped_name, details_url)) class DetailsPageResource(BufferResource): def __init__(self, fs_object, important_properties): super(DetailsPageResource, self).__init__() self._important_props = important_properties self._buffer = self._generate_html(fs_object) def _generate_html(self, fs_object): props = fs_object.get_properties() lines = ['', 'Details', '', '

'] for name in self._important_props: lines += self._generate_html_prop(name, props.get(name), True) lines += ['', '

Additional metadata:

', '

'] for name, value in sorted(props.items()): if name in self._important_props: continue lines += self._generate_html_prop(name, value) lines += ['', ''] return '\n'.join(lines) def _generate_html_prop(self, name, value, important=False): if not value: return [] escaped_name = cgi.escape(name.encode('utf-8')) escaped_value = cgi.escape(value.encode('utf-8')) if important: escaped_value = '%s' % (escaped_value, ) return ['

' % (escaped_name, escaped_value)] class SearchPageResource(BufferResource): def __init__(self): super(SearchPageResource, self).__init__() self._buffer = self._generate_html() def _generate_html(self): lines = ['', 'Journal search', '', _SEARCH_HTML, ''] return '\n'.join(lines) class JournalHandler(dav_interface): PROPS={"DAV:" : ('creationdate', 'getcontentlength', 'getcontenttype', 'getlastmodified', 'resourcetype')} def __init__(self, file_system, base_uri, verbose, important_properties): self._fs = file_system # required by dav_interface resp. PROPFIND self.baseuri = base_uri self.baseurl = base_uri self.verbose = verbose self._important_properties = important_properties def exists(self, uri): log.debug('exists %r', uri) try: self._lookup_uri(uri) except DAV_Error: return False else: return True def get_propnames(self, uri): log.debug('get_propnames %r', uri) props_by_ns = dict(self.PROPS) fs_object = self._lookup_uri(uri) if isinstance(fs_object, fsemulation.DSObject): props_by_ns[SUGAR_NS] = fs_object.list_properties() log.debug('props_by_ns=%r', props_by_ns) return props_by_ns def get_prop(self, uri, ns, propname): if ns != SUGAR_NS: return dav_interface.get_prop(self, uri, ns, propname) log.debug('get_prop %r %r %r', uri, ns, propname) fs_object = self._lookup_uri(uri) if isinstance(fs_object, fsemulation.DSObject): metadata = fs_object.get_properties([propname]) if propname not in metadata: raise DAV_NotFound value = metadata[propname] if isinstance(value, unicode): if not [c for c in value if c in INVALID_XML_CHARS]: return value else: # contains control characters => return as binary string # (base64 encoded) value = value.encode('utf-8') # binary data (e.g. PNG previews) # FIXME: We can't add an XML element containing a text # node since the xml.dom.minidom implementation requires # the Document object for instantiating Nodes, but # propfind.PROPFIND does not pass the Document object down # (and there's not even private API to access it as it's a # local variable). So the only thing we can return is a # plain string. #element = document.Element('base64Binary', SCHEMA_NS) #text_node = document.createTextNode(value.encode('base64')) #element.appendChild(text_node) return 'base64:' + value.encode('base64') raise DAV_NotFound def get_childs(self, uri): """Return the child objects of the given URI as absolute URIs.""" scheme, netloc = urlparse(str(uri))[:2] path = unicode(urlparse(str(uri))[2], 'utf-8').strip('/') log.debug('get_childs %r', uri) fs_object = self._lookup_uri(uri) if not isinstance(fs_object, fsemulation.Directory): # PROPFIND.create_prop() recurses over all entities without # checking is_collection() first. return [] if path: path += u'/' return ['%s://%s/%s%s' % (scheme, netloc, path.encode('utf-8'), child_name.encode('utf-8')) for child_name in fs_object.listdir() if not child_name in ['.', '..']] def get_data(self, uri, byte_range=None): __pychecker__ = 'no-returnvalues' log.debug('get_data %r %r', uri, byte_range) path, params_, query = urlparse(str(uri))[2:5] path = unicode(path, 'utf-8').strip('/') if (path + u'/').startswith('search/'): query = unicode(query, 'utf-8') return self._get_data_search(query) elif path.startswith('details/'): object_id = path.split(u'/')[1] path = u'/by-id/' + object_id.encode('us-ascii') fs_object = self._lookup_path(path) return DetailsPageResource(fs_object, self._important_properties) fs_object = self._lookup_uri(uri) if isinstance(fs_object, fsemulation.RootDirectory): return ObjectListHtmlResource(fs_object, _SEARCH_HTML) elif isinstance(fs_object, fsemulation.Directory): return ObjectListHtmlResource(fs_object) else: return JournalObjectResource(fs_object) def _get_data_search(self, url_query): __pychecker__ = 'no-returnvalues' query = parse_qs(url_query).get('query', [''])[0] if not query: return SearchPageResource() if query.startswith('datastore:'): query = self._parse_datastore_uri(query) search_dir = self._fs.search({'query': query}) try: return ObjectListHtmlResource(search_dir) except ValueError: # FIXME: better error mapping raise DAV_NotFound def _parse_datastore_uri(self, query): return 'uid:' + query[10:] def _get_dav_resourcetype(self, uri): log.debug('_get_dav_resourcetype %r', uri) fs_object = self._lookup_uri(uri) if isinstance(fs_object, fsemulation.Directory): return COLLECTION else: return OBJECT def _get_dav_getcontentlength(self, uri): log.debug('_get_dav_getcontentlength %r', uri) fs_object = self._lookup_uri(uri) if isinstance(fs_object, fsemulation.DSObject): return str(fs_object.get_size()) else: # In theory, we should return the size of the HTML block # we return on a GET request on this directory. However, # native WebDAV clients are usually not used to download # the GET representation of a collection and non-WebDAV # clients don't request a WebDAV listing containing this # property. So as it's expensive to calculate (we need to # construct the full HTML block to determine the length), # we just claim it is empty and hope nothing breaks. return '0' def _get_dav_getcontenttype(self, uri): log.debug('_get_dav_getcontenttype %r', uri) path = unicode(urlparse(str(uri))[2], 'utf-8').strip('/') if (path + u'/').startswith('search/'): return self._get_dav_getcontenttype_search(path) elif path.startswith(u'details/'): return 'text/html; charset=utf-8' fs_object = self._lookup_uri(uri) if isinstance(fs_object, fsemulation.Directory): return 'text/html; charset=utf-8' elif isinstance(fs_object, fsemulation.DSObject): metadata = fs_object.get_properties(['mime_type']) return str(metadata.get('mime_type', 'application/octet-stream')) raise DAV_NotFound def _get_dav_getcontenttype_search(self, path): return 'text/html; charset=utf-8' def get_creationdate(self, uri): log.debug('get_creationdate %r', uri) fs_object = self._lookup_uri(uri) if not isinstance(fs_object, fsemulation.DSObject): return time.time() props = fs_object.get_properties(['creation_time', 'timestamp']) try: return float(props['creation_time']) except (KeyError, ValueError, TypeError): pass try: return float(props['timestamp']) except (KeyError, ValueError, TypeError): return time.time() def get_lastmodified(self, uri): log.debug('get_lastmodified %r', uri) fs_object = self._lookup_uri(uri) if not isinstance(fs_object, fsemulation.DSObject): return time.time() props = fs_object.get_properties(['timestamp']) try: return float(props['timestamp']) except (KeyError, ValueError, TypeError): return time.time() def is_collection(self, uri): log.debug('is_collection %r', uri) fs_object = self._lookup_uri(uri) return isinstance(fs_object, fsemulation.Directory) def _lookup_uri(self, uri): parsed_uri = urlparse(str(uri)) path = unicode(parsed_uri.path, 'utf-8') query_qs = parse_qs(parsed_uri.query) object_id = query_qs.get('object_id', [''])[0] if path.startswith(u'/searchresult/') and object_id: # The path component is for the client to use as file name # when saving. The query references the server-side object # using the object_id. path = u'/by-id/' + object_id.encode('us-ascii') return self._lookup_path(path) def _lookup_path(self, path): try: # WebDAV doesn't support symlinks :-/ fs_object = self._fs.resolve(path, follow_links=True) except (IOError, ValueError): # FIXME: better error mapping raise DAV_NotFound return fs_object def _parse_range(self, byte_range, size): if not byte_range or not byte_range[0]: start = 0 else: start = int(byte_range[0]) if not byte_range or not byte_range[1]: end = size else: end = min(int(byte_range[1]), size) if start > size: raise DAV_Requested_Range_Not_Satisfiable return start, end class _DummyConfigDAV: def __init__(self, **kw): self.__dict__.update(**kw) def getboolean(self, name): return (str(getattr(self, name, 0)) in ('1', "yes", "true", "on", "True")) def setupDummyConfig(**kw): class DummyConfig: DAV = _DummyConfigDAV(**kw) return DummyConfig() class RequestHandler(DAVAuthHandler): protocol_version = 'HTTP/1.1' # These class attributes need to be overridden at run-time. IFACE_CLASS = None _config = None if 'ALLPROP_RECURSE' in _PYWEBDAV_BUGS or 'PROPFIND_NS' in _PYWEBDAV_BUGS: # pylint: disable=W0402,W0404,C0324,W0612 def do_PROPFIND(self): from string import atoi # exact copy of original, just to override the PROPFIND class dc = self.IFACE_CLASS # read the body containing the xml request # iff there is no body then this is an ALLPROP request body = None if 'Content-Length' in self.headers: l = self.headers['Content-Length'] body = self.rfile.read(atoi(l)) uri = urljoin(self.get_baseuri(dc), self.path) uri = urllib.unquote(uri) try: pf = PROPFIND(uri, dc, self.headers.get('Depth', 'infinity'), body) except ExpatError: # parse error return self.send_status(400) try: DATA = '%s\n' % pf.createResponse() except DAV_Error, (ec,dd): return self.send_status(ec) # work around MSIE DAV bug for creation and modified date # taken from Resource.py @ Zope webdav if (self.headers.get('User-Agent') == 'Microsoft Data Access Internet Publishing Provider DAV 1.1'): DATA = DATA.replace('', '') DATA = DATA.replace('', '') self.send_body_chunks_if_http11(DATA, 207, 'Multi-Status', 'Multiple responses') if 'HTTP10_KEEPALIVE' in _PYWEBDAV_BUGS: # work-around for Debian#710672 def send_body(self, DATA, code=None, msg=None, desc=None, ctype='application/octet-stream', headers={}): """ send a body in one part """ __pychecker__ = 'no-argsused no-moddefvalue no-shadowbuiltin' log.debug("Use send_body method") if self.request_version != 'HTTP/1.1': headers.pop('Keep-Alive', None) headers.pop('Connection', None) self.send_response(code, message=msg) if 'Connection' not in headers: self.send_header("Connection", "close") self.send_header("Accept-Ranges", "bytes") self._send_dav_version() for a, v in headers.items(): self.send_header(a, v) if DATA: if 'gzip' in self.headers.get('Accept-Encoding', '').split(',') \ and len(DATA) > self.encode_threshold: buffer = StringIO.StringIO() output = gzip.GzipFile(mode='wb', fileobj=buffer) if isinstance(DATA, str) or isinstance(DATA, unicode): output.write(DATA) else: for buf in DATA: output.write(buf) output.close() buffer.seek(0) DATA = buffer.getvalue() self.send_header('Content-Encoding', 'gzip') self.send_header('Content-Length', len(DATA)) self.send_header('Content-Type', ctype) else: self.send_header('Content-Length', 0) self.end_headers() if DATA: if isinstance(DATA, str) or isinstance(DATA, unicode): log.debug("Don't use iterator") self.wfile.write(DATA) else: if self._config.DAV.getboolean('http_response_use_iterator'): # Use iterator to reduce using memory log.debug("Use iterator") for buf in DATA: self.wfile.write(buf) self.wfile.flush() else: # Don't use iterator, it's a compatibility option log.debug("Don't use iterator") self.wfile.write(DATA.read()) def log_message(self, format, *args): # pylint: disable=W0622 log.info('%s - - [%s] %s', self.address_string(), self.log_date_time_string(), format % args) def main(): global log parser = optparse.OptionParser() parser.add_option('-d', '--debug', action='store_true', default=False, help='enable additional debugging output') parser.add_option('-H', '--host', default='localhost', metavar='HOST', help='bind to HOST; use empty string to listen on all' ' interfaces [default: %default]') parser.add_option('-p', '--port', default=8009, metavar='PORT', type='int', help='listen on PORT [default: %default]') parser.add_option('-r', '--root-query', default="{'keep': '1'}", metavar='QUERY', help='publish all data store entries' ' matching the data store query QUERY' ' [default: %default]') parser.add_option('-i', '--important-properties', default="description tags title", metavar='PROPERTIES', help='Space separated list of' ' properties to highlight on the details page' ' [default: %default]') parser.add_option('-q', '--quiet', action='store_false', dest='verbose', help='only output warnings and errors') parser.add_option('-v', '--verbose', action='store_true', default=True, help='override a previous -q or --quiet option') parser.add_option('--private-key', metavar='FILE', help='private key file for TLS (enables TLS)') parser.add_option('--certificate', metavar='FILE', help='X.509 certificate file for TLS (required for TLS)') parser.add_option('--tls-protocol', metavar='MODE', default='TLSv1', choices=('SSLv23', 'TLSv1'), help='TLS protocol to use (affects compatibility with' ' clients) [choices: %choices, default: %default]') parser.add_option('--ca-certificates', metavar='FILE', help='X.509 CA certificates to verify client' ' certificates against (required when requesting client' ' certificates)') parser.add_option('--client-certificate', metavar='MODE', default='none', choices=('none', 'optional', 'required'), help='whether to request and/or require a client' ' certificate [choices: %choices, default: %default]') options, args = parser.parse_args() if args: parser.error('extra arguments passed') if options.private_key and not options.certificate: parser.error('Need server certificate in TLS mode') if options.client_certificate != 'none' and not options.ca_certificates: parser.error('Need CA certificates when requesting client' ' certificates') root_query = eval(options.root_query) if options.debug: logging.basicConfig(level=0) elif options.verbose: logging.basicConfig(level=logging.INFO) else: logging.basicConfig(level=logging.WARNING) log = logging.getLogger('journal2webdav') emulated_fs = fsemulation.FSEmulation(root_query) handler = RequestHandler if options.private_key: base_url = 'https://%s:%d' % (options.host, options.port) else: base_url = 'http://%s:%d' % (options.host, options.port) important_properties = options.important_properties.split(' ') handler.IFACE_CLASS = JournalHandler(emulated_fs, base_url, options.debug, important_properties) handler.DO_AUTH = False handler.IFACE_CLASS.mimecheck = True # pylint: disable=W0201 handler._config = setupDummyConfig(verbose=options.debug, port=options.port, host=options.host, noauth=True, chunked_http_response=True) runner = ThreadedHTTPServer((options.host, options.port), handler) if options.private_key: cert_reqs = getattr(ssl, 'CERT_' + options.client_certificate.upper()) ssl_version = getattr(ssl, 'PROTOCOL_' + options.tls_protocol) runner.socket = ssl.wrap_socket( runner.socket, server_side=True, certfile=options.certificate, keyfile=options.private_key, ssl_version=ssl_version, ca_certs=options.ca_certificates, cert_reqs=cert_reqs) log.info('Server running on %s', base_url) try: runner.serve_forever() except KeyboardInterrupt: log.info('Killed by user') return 0 if __name__ == '__main__': sys.exit(main())