diff options
Diffstat (limited to 'cherrypy/lib')
-rwxr-xr-x | cherrypy/lib/__init__.py | 45 | ||||
-rwxr-xr-x | cherrypy/lib/auth.py | 87 | ||||
-rwxr-xr-x | cherrypy/lib/auth_basic.py | 87 | ||||
-rwxr-xr-x | cherrypy/lib/auth_digest.py | 365 | ||||
-rwxr-xr-x | cherrypy/lib/caching.py | 465 | ||||
-rwxr-xr-x | cherrypy/lib/covercp.py | 365 | ||||
-rwxr-xr-x | cherrypy/lib/cpstats.py | 661 | ||||
-rwxr-xr-x | cherrypy/lib/cptools.py | 611 | ||||
-rwxr-xr-x | cherrypy/lib/encoding.py | 388 | ||||
-rwxr-xr-x | cherrypy/lib/http.py | 7 | ||||
-rwxr-xr-x | cherrypy/lib/httpauth.py | 354 | ||||
-rwxr-xr-x | cherrypy/lib/httputil.py | 469 | ||||
-rwxr-xr-x | cherrypy/lib/jsontools.py | 87 | ||||
-rwxr-xr-x | cherrypy/lib/profiler.py | 208 | ||||
-rwxr-xr-x | cherrypy/lib/reprconf.py | 351 | ||||
-rwxr-xr-x | cherrypy/lib/sessions.py | 832 | ||||
-rwxr-xr-x | cherrypy/lib/static.py | 352 | ||||
-rwxr-xr-x | cherrypy/lib/xmlrpc.py | 49 |
18 files changed, 5783 insertions, 0 deletions
diff --git a/cherrypy/lib/__init__.py b/cherrypy/lib/__init__.py new file mode 100755 index 0000000..611350c --- /dev/null +++ b/cherrypy/lib/__init__.py @@ -0,0 +1,45 @@ +"""CherryPy Library""" + +# Deprecated in CherryPy 3.2 -- remove in CherryPy 3.3 +from cherrypy.lib.reprconf import _Builder, unrepr, modules, attributes + +class file_generator(object): + """Yield the given input (a file object) in chunks (default 64k). (Core)""" + + def __init__(self, input, chunkSize=65536): + self.input = input + self.chunkSize = chunkSize + + def __iter__(self): + return self + + def __next__(self): + chunk = self.input.read(self.chunkSize) + if chunk: + return chunk + else: + if hasattr(self.input, 'close'): + self.input.close() + raise StopIteration() + next = __next__ + +def file_generator_limited(fileobj, count, chunk_size=65536): + """Yield the given file object in chunks, stopping after `count` + bytes has been emitted. Default chunk size is 64kB. (Core) + """ + remaining = count + while remaining > 0: + chunk = fileobj.read(min(chunk_size, remaining)) + chunklen = len(chunk) + if chunklen == 0: + return + remaining -= chunklen + yield chunk + +def set_vary_header(response, header_name): + "Add a Vary header to a response" + varies = response.headers.get("Vary", "") + varies = [x.strip() for x in varies.split(",") if x.strip()] + if header_name not in varies: + varies.append(header_name) + response.headers['Vary'] = ", ".join(varies) diff --git a/cherrypy/lib/auth.py b/cherrypy/lib/auth.py new file mode 100755 index 0000000..7d2f6dc --- /dev/null +++ b/cherrypy/lib/auth.py @@ -0,0 +1,87 @@ +import cherrypy +from cherrypy.lib import httpauth + + +def check_auth(users, encrypt=None, realm=None): + """If an authorization header contains credentials, return True, else False.""" + request = cherrypy.serving.request + if 'authorization' in request.headers: + # make sure the provided credentials are correctly set + ah = httpauth.parseAuthorization(request.headers['authorization']) + if ah is None: + raise cherrypy.HTTPError(400, 'Bad Request') + + if not encrypt: + encrypt = httpauth.DIGEST_AUTH_ENCODERS[httpauth.MD5] + + if hasattr(users, '__call__'): + try: + # backward compatibility + users = users() # expect it to return a dictionary + + if not isinstance(users, dict): + raise ValueError("Authentication users must be a dictionary") + + # fetch the user password + password = users.get(ah["username"], None) + except TypeError: + # returns a password (encrypted or clear text) + password = users(ah["username"]) + else: + if not isinstance(users, dict): + raise ValueError("Authentication users must be a dictionary") + + # fetch the user password + password = users.get(ah["username"], None) + + # validate the authorization by re-computing it here + # and compare it with what the user-agent provided + if httpauth.checkResponse(ah, password, method=request.method, + encrypt=encrypt, realm=realm): + request.login = ah["username"] + return True + + request.login = False + return False + +def basic_auth(realm, users, encrypt=None, debug=False): + """If auth fails, raise 401 with a basic authentication header. + + realm + A string containing the authentication realm. + + users + A dict of the form: {username: password} or a callable returning a dict. + + encrypt + callable used to encrypt the password returned from the user-agent. + if None it defaults to a md5 encryption. + + """ + if check_auth(users, encrypt): + if debug: + cherrypy.log('Auth successful', 'TOOLS.BASIC_AUTH') + return + + # inform the user-agent this path is protected + cherrypy.serving.response.headers['www-authenticate'] = httpauth.basicAuth(realm) + + raise cherrypy.HTTPError(401, "You are not authorized to access that resource") + +def digest_auth(realm, users, debug=False): + """If auth fails, raise 401 with a digest authentication header. + + realm + A string containing the authentication realm. + users + A dict of the form: {username: password} or a callable returning a dict. + """ + if check_auth(users, realm=realm): + if debug: + cherrypy.log('Auth successful', 'TOOLS.DIGEST_AUTH') + return + + # inform the user-agent this path is protected + cherrypy.serving.response.headers['www-authenticate'] = httpauth.digestAuth(realm) + + raise cherrypy.HTTPError(401, "You are not authorized to access that resource") diff --git a/cherrypy/lib/auth_basic.py b/cherrypy/lib/auth_basic.py new file mode 100755 index 0000000..2c05e01 --- /dev/null +++ b/cherrypy/lib/auth_basic.py @@ -0,0 +1,87 @@ +# This file is part of CherryPy <http://www.cherrypy.org/> +# -*- coding: utf-8 -*- +# vim:ts=4:sw=4:expandtab:fileencoding=utf-8 + +__doc__ = """This module provides a CherryPy 3.x tool which implements +the server-side of HTTP Basic Access Authentication, as described in :rfc:`2617`. + +Example usage, using the built-in checkpassword_dict function which uses a dict +as the credentials store:: + + userpassdict = {'bird' : 'bebop', 'ornette' : 'wayout'} + checkpassword = cherrypy.lib.auth_basic.checkpassword_dict(userpassdict) + basic_auth = {'tools.auth_basic.on': True, + 'tools.auth_basic.realm': 'earth', + 'tools.auth_basic.checkpassword': checkpassword, + } + app_config = { '/' : basic_auth } + +""" + +__author__ = 'visteya' +__date__ = 'April 2009' + +import binascii +from cherrypy._cpcompat import base64_decode +import cherrypy + + +def checkpassword_dict(user_password_dict): + """Returns a checkpassword function which checks credentials + against a dictionary of the form: {username : password}. + + If you want a simple dictionary-based authentication scheme, use + checkpassword_dict(my_credentials_dict) as the value for the + checkpassword argument to basic_auth(). + """ + def checkpassword(realm, user, password): + p = user_password_dict.get(user) + return p and p == password or False + + return checkpassword + + +def basic_auth(realm, checkpassword, debug=False): + """A CherryPy tool which hooks at before_handler to perform + HTTP Basic Access Authentication, as specified in :rfc:`2617`. + + If the request has an 'authorization' header with a 'Basic' scheme, this + tool attempts to authenticate the credentials supplied in that header. If + the request has no 'authorization' header, or if it does but the scheme is + not 'Basic', or if authentication fails, the tool sends a 401 response with + a 'WWW-Authenticate' Basic header. + + realm + A string containing the authentication realm. + + checkpassword + A callable which checks the authentication credentials. + Its signature is checkpassword(realm, username, password). where + username and password are the values obtained from the request's + 'authorization' header. If authentication succeeds, checkpassword + returns True, else it returns False. + + """ + + if '"' in realm: + raise ValueError('Realm cannot contain the " (quote) character.') + request = cherrypy.serving.request + + auth_header = request.headers.get('authorization') + if auth_header is not None: + try: + scheme, params = auth_header.split(' ', 1) + if scheme.lower() == 'basic': + username, password = base64_decode(params).split(':', 1) + if checkpassword(realm, username, password): + if debug: + cherrypy.log('Auth succeeded', 'TOOLS.AUTH_BASIC') + request.login = username + return # successful authentication + except (ValueError, binascii.Error): # split() error, base64.decodestring() error + raise cherrypy.HTTPError(400, 'Bad Request') + + # Respond with 401 status and a WWW-Authenticate header + cherrypy.serving.response.headers['www-authenticate'] = 'Basic realm="%s"' % realm + raise cherrypy.HTTPError(401, "You are not authorized to access that resource") + diff --git a/cherrypy/lib/auth_digest.py b/cherrypy/lib/auth_digest.py new file mode 100755 index 0000000..67578e0 --- /dev/null +++ b/cherrypy/lib/auth_digest.py @@ -0,0 +1,365 @@ +# This file is part of CherryPy <http://www.cherrypy.org/> +# -*- coding: utf-8 -*- +# vim:ts=4:sw=4:expandtab:fileencoding=utf-8 + +__doc__ = """An implementation of the server-side of HTTP Digest Access +Authentication, which is described in :rfc:`2617`. + +Example usage, using the built-in get_ha1_dict_plain function which uses a dict +of plaintext passwords as the credentials store:: + + userpassdict = {'alice' : '4x5istwelve'} + get_ha1 = cherrypy.lib.auth_digest.get_ha1_dict_plain(userpassdict) + digest_auth = {'tools.auth_digest.on': True, + 'tools.auth_digest.realm': 'wonderland', + 'tools.auth_digest.get_ha1': get_ha1, + 'tools.auth_digest.key': 'a565c27146791cfb', + } + app_config = { '/' : digest_auth } +""" + +__author__ = 'visteya' +__date__ = 'April 2009' + + +import time +from cherrypy._cpcompat import parse_http_list, parse_keqv_list + +import cherrypy +from cherrypy._cpcompat import md5, ntob +md5_hex = lambda s: md5(ntob(s)).hexdigest() + +qop_auth = 'auth' +qop_auth_int = 'auth-int' +valid_qops = (qop_auth, qop_auth_int) + +valid_algorithms = ('MD5', 'MD5-sess') + + +def TRACE(msg): + cherrypy.log(msg, context='TOOLS.AUTH_DIGEST') + +# Three helper functions for users of the tool, providing three variants +# of get_ha1() functions for three different kinds of credential stores. +def get_ha1_dict_plain(user_password_dict): + """Returns a get_ha1 function which obtains a plaintext password from a + dictionary of the form: {username : password}. + + If you want a simple dictionary-based authentication scheme, with plaintext + passwords, use get_ha1_dict_plain(my_userpass_dict) as the value for the + get_ha1 argument to digest_auth(). + """ + def get_ha1(realm, username): + password = user_password_dict.get(username) + if password: + return md5_hex('%s:%s:%s' % (username, realm, password)) + return None + + return get_ha1 + +def get_ha1_dict(user_ha1_dict): + """Returns a get_ha1 function which obtains a HA1 password hash from a + dictionary of the form: {username : HA1}. + + If you want a dictionary-based authentication scheme, but with + pre-computed HA1 hashes instead of plain-text passwords, use + get_ha1_dict(my_userha1_dict) as the value for the get_ha1 + argument to digest_auth(). + """ + def get_ha1(realm, username): + return user_ha1_dict.get(user) + + return get_ha1 + +def get_ha1_file_htdigest(filename): + """Returns a get_ha1 function which obtains a HA1 password hash from a + flat file with lines of the same format as that produced by the Apache + htdigest utility. For example, for realm 'wonderland', username 'alice', + and password '4x5istwelve', the htdigest line would be:: + + alice:wonderland:3238cdfe91a8b2ed8e39646921a02d4c + + If you want to use an Apache htdigest file as the credentials store, + then use get_ha1_file_htdigest(my_htdigest_file) as the value for the + get_ha1 argument to digest_auth(). It is recommended that the filename + argument be an absolute path, to avoid problems. + """ + def get_ha1(realm, username): + result = None + f = open(filename, 'r') + for line in f: + u, r, ha1 = line.rstrip().split(':') + if u == username and r == realm: + result = ha1 + break + f.close() + return result + + return get_ha1 + + +def synthesize_nonce(s, key, timestamp=None): + """Synthesize a nonce value which resists spoofing and can be checked for staleness. + Returns a string suitable as the value for 'nonce' in the www-authenticate header. + + s + A string related to the resource, such as the hostname of the server. + + key + A secret string known only to the server. + + timestamp + An integer seconds-since-the-epoch timestamp + + """ + if timestamp is None: + timestamp = int(time.time()) + h = md5_hex('%s:%s:%s' % (timestamp, s, key)) + nonce = '%s:%s' % (timestamp, h) + return nonce + + +def H(s): + """The hash function H""" + return md5_hex(s) + + +class HttpDigestAuthorization (object): + """Class to parse a Digest Authorization header and perform re-calculation + of the digest. + """ + + def errmsg(self, s): + return 'Digest Authorization header: %s' % s + + def __init__(self, auth_header, http_method, debug=False): + self.http_method = http_method + self.debug = debug + scheme, params = auth_header.split(" ", 1) + self.scheme = scheme.lower() + if self.scheme != 'digest': + raise ValueError('Authorization scheme is not "Digest"') + + self.auth_header = auth_header + + # make a dict of the params + items = parse_http_list(params) + paramsd = parse_keqv_list(items) + + self.realm = paramsd.get('realm') + self.username = paramsd.get('username') + self.nonce = paramsd.get('nonce') + self.uri = paramsd.get('uri') + self.method = paramsd.get('method') + self.response = paramsd.get('response') # the response digest + self.algorithm = paramsd.get('algorithm', 'MD5') + self.cnonce = paramsd.get('cnonce') + self.opaque = paramsd.get('opaque') + self.qop = paramsd.get('qop') # qop + self.nc = paramsd.get('nc') # nonce count + + # perform some correctness checks + if self.algorithm not in valid_algorithms: + raise ValueError(self.errmsg("Unsupported value for algorithm: '%s'" % self.algorithm)) + + has_reqd = self.username and \ + self.realm and \ + self.nonce and \ + self.uri and \ + self.response + if not has_reqd: + raise ValueError(self.errmsg("Not all required parameters are present.")) + + if self.qop: + if self.qop not in valid_qops: + raise ValueError(self.errmsg("Unsupported value for qop: '%s'" % self.qop)) + if not (self.cnonce and self.nc): + raise ValueError(self.errmsg("If qop is sent then cnonce and nc MUST be present")) + else: + if self.cnonce or self.nc: + raise ValueError(self.errmsg("If qop is not sent, neither cnonce nor nc can be present")) + + + def __str__(self): + return 'authorization : %s' % self.auth_header + + def validate_nonce(self, s, key): + """Validate the nonce. + Returns True if nonce was generated by synthesize_nonce() and the timestamp + is not spoofed, else returns False. + + s + A string related to the resource, such as the hostname of the server. + + key + A secret string known only to the server. + + Both s and key must be the same values which were used to synthesize the nonce + we are trying to validate. + """ + try: + timestamp, hashpart = self.nonce.split(':', 1) + s_timestamp, s_hashpart = synthesize_nonce(s, key, timestamp).split(':', 1) + is_valid = s_hashpart == hashpart + if self.debug: + TRACE('validate_nonce: %s' % is_valid) + return is_valid + except ValueError: # split() error + pass + return False + + + def is_nonce_stale(self, max_age_seconds=600): + """Returns True if a validated nonce is stale. The nonce contains a + timestamp in plaintext and also a secure hash of the timestamp. You should + first validate the nonce to ensure the plaintext timestamp is not spoofed. + """ + try: + timestamp, hashpart = self.nonce.split(':', 1) + if int(timestamp) + max_age_seconds > int(time.time()): + return False + except ValueError: # int() error + pass + if self.debug: + TRACE("nonce is stale") + return True + + + def HA2(self, entity_body=''): + """Returns the H(A2) string. See :rfc:`2617` section 3.2.2.3.""" + # RFC 2617 3.2.2.3 + # If the "qop" directive's value is "auth" or is unspecified, then A2 is: + # A2 = method ":" digest-uri-value + # + # If the "qop" value is "auth-int", then A2 is: + # A2 = method ":" digest-uri-value ":" H(entity-body) + if self.qop is None or self.qop == "auth": + a2 = '%s:%s' % (self.http_method, self.uri) + elif self.qop == "auth-int": + a2 = "%s:%s:%s" % (self.http_method, self.uri, H(entity_body)) + else: + # in theory, this should never happen, since I validate qop in __init__() + raise ValueError(self.errmsg("Unrecognized value for qop!")) + return H(a2) + + + def request_digest(self, ha1, entity_body=''): + """Calculates the Request-Digest. See :rfc:`2617` section 3.2.2.1. + + ha1 + The HA1 string obtained from the credentials store. + + entity_body + If 'qop' is set to 'auth-int', then A2 includes a hash + of the "entity body". The entity body is the part of the + message which follows the HTTP headers. See :rfc:`2617` section + 4.3. This refers to the entity the user agent sent in the request which + has the Authorization header. Typically GET requests don't have an entity, + and POST requests do. + + """ + ha2 = self.HA2(entity_body) + # Request-Digest -- RFC 2617 3.2.2.1 + if self.qop: + req = "%s:%s:%s:%s:%s" % (self.nonce, self.nc, self.cnonce, self.qop, ha2) + else: + req = "%s:%s" % (self.nonce, ha2) + + # RFC 2617 3.2.2.2 + # + # If the "algorithm" directive's value is "MD5" or is unspecified, then A1 is: + # A1 = unq(username-value) ":" unq(realm-value) ":" passwd + # + # If the "algorithm" directive's value is "MD5-sess", then A1 is + # calculated only once - on the first request by the client following + # receipt of a WWW-Authenticate challenge from the server. + # A1 = H( unq(username-value) ":" unq(realm-value) ":" passwd ) + # ":" unq(nonce-value) ":" unq(cnonce-value) + if self.algorithm == 'MD5-sess': + ha1 = H('%s:%s:%s' % (ha1, self.nonce, self.cnonce)) + + digest = H('%s:%s' % (ha1, req)) + return digest + + + +def www_authenticate(realm, key, algorithm='MD5', nonce=None, qop=qop_auth, stale=False): + """Constructs a WWW-Authenticate header for Digest authentication.""" + if qop not in valid_qops: + raise ValueError("Unsupported value for qop: '%s'" % qop) + if algorithm not in valid_algorithms: + raise ValueError("Unsupported value for algorithm: '%s'" % algorithm) + + if nonce is None: + nonce = synthesize_nonce(realm, key) + s = 'Digest realm="%s", nonce="%s", algorithm="%s", qop="%s"' % ( + realm, nonce, algorithm, qop) + if stale: + s += ', stale="true"' + return s + + +def digest_auth(realm, get_ha1, key, debug=False): + """A CherryPy tool which hooks at before_handler to perform + HTTP Digest Access Authentication, as specified in :rfc:`2617`. + + If the request has an 'authorization' header with a 'Digest' scheme, this + tool authenticates the credentials supplied in that header. If + the request has no 'authorization' header, or if it does but the scheme is + not "Digest", or if authentication fails, the tool sends a 401 response with + a 'WWW-Authenticate' Digest header. + + realm + A string containing the authentication realm. + + get_ha1 + A callable which looks up a username in a credentials store + and returns the HA1 string, which is defined in the RFC to be + MD5(username : realm : password). The function's signature is: + ``get_ha1(realm, username)`` + where username is obtained from the request's 'authorization' header. + If username is not found in the credentials store, get_ha1() returns + None. + + key + A secret string known only to the server, used in the synthesis of nonces. + + """ + request = cherrypy.serving.request + + auth_header = request.headers.get('authorization') + nonce_is_stale = False + if auth_header is not None: + try: + auth = HttpDigestAuthorization(auth_header, request.method, debug=debug) + except ValueError: + raise cherrypy.HTTPError(400, "The Authorization header could not be parsed.") + + if debug: + TRACE(str(auth)) + + if auth.validate_nonce(realm, key): + ha1 = get_ha1(realm, auth.username) + if ha1 is not None: + # note that for request.body to be available we need to hook in at + # before_handler, not on_start_resource like 3.1.x digest_auth does. + digest = auth.request_digest(ha1, entity_body=request.body) + if digest == auth.response: # authenticated + if debug: + TRACE("digest matches auth.response") + # Now check if nonce is stale. + # The choice of ten minutes' lifetime for nonce is somewhat arbitrary + nonce_is_stale = auth.is_nonce_stale(max_age_seconds=600) + if not nonce_is_stale: + request.login = auth.username + if debug: + TRACE("authentication of %s successful" % auth.username) + return + + # Respond with 401 status and a WWW-Authenticate header + header = www_authenticate(realm, key, stale=nonce_is_stale) + if debug: + TRACE(header) + cherrypy.serving.response.headers['WWW-Authenticate'] = header + raise cherrypy.HTTPError(401, "You are not authorized to access that resource") + diff --git a/cherrypy/lib/caching.py b/cherrypy/lib/caching.py new file mode 100755 index 0000000..435b9dc --- /dev/null +++ b/cherrypy/lib/caching.py @@ -0,0 +1,465 @@ +""" +CherryPy implements a simple caching system as a pluggable Tool. This tool tries +to be an (in-process) HTTP/1.1-compliant cache. It's not quite there yet, but +it's probably good enough for most sites. + +In general, GET responses are cached (along with selecting headers) and, if +another request arrives for the same resource, the caching Tool will return 304 +Not Modified if possible, or serve the cached response otherwise. It also sets +request.cached to True if serving a cached representation, and sets +request.cacheable to False (so it doesn't get cached again). + +If POST, PUT, or DELETE requests are made for a cached resource, they invalidate +(delete) any cached response. + +Usage +===== + +Configuration file example:: + + [/] + tools.caching.on = True + tools.caching.delay = 3600 + +You may use a class other than the default +:class:`MemoryCache<cherrypy.lib.caching.MemoryCache>` by supplying the config +entry ``cache_class``; supply the full dotted name of the replacement class +as the config value. It must implement the basic methods ``get``, ``put``, +``delete``, and ``clear``. + +You may set any attribute, including overriding methods, on the cache +instance by providing them in config. The above sets the +:attr:`delay<cherrypy.lib.caching.MemoryCache.delay>` attribute, for example. +""" + +import datetime +import sys +import threading +import time + +import cherrypy +from cherrypy.lib import cptools, httputil +from cherrypy._cpcompat import copyitems, ntob, set_daemon, sorted + + +class Cache(object): + """Base class for Cache implementations.""" + + def get(self): + """Return the current variant if in the cache, else None.""" + raise NotImplemented + + def put(self, obj, size): + """Store the current variant in the cache.""" + raise NotImplemented + + def delete(self): + """Remove ALL cached variants of the current resource.""" + raise NotImplemented + + def clear(self): + """Reset the cache to its initial, empty state.""" + raise NotImplemented + + + +# ------------------------------- Memory Cache ------------------------------- # + + +class AntiStampedeCache(dict): + """A storage system for cached items which reduces stampede collisions.""" + + def wait(self, key, timeout=5, debug=False): + """Return the cached value for the given key, or None. + + If timeout is not None, and the value is already + being calculated by another thread, wait until the given timeout has + elapsed. If the value is available before the timeout expires, it is + returned. If not, None is returned, and a sentinel placed in the cache + to signal other threads to wait. + + If timeout is None, no waiting is performed nor sentinels used. + """ + value = self.get(key) + if isinstance(value, threading._Event): + if timeout is None: + # Ignore the other thread and recalc it ourselves. + if debug: + cherrypy.log('No timeout', 'TOOLS.CACHING') + return None + + # Wait until it's done or times out. + if debug: + cherrypy.log('Waiting up to %s seconds' % timeout, 'TOOLS.CACHING') + value.wait(timeout) + if value.result is not None: + # The other thread finished its calculation. Use it. + if debug: + cherrypy.log('Result!', 'TOOLS.CACHING') + return value.result + # Timed out. Stick an Event in the slot so other threads wait + # on this one to finish calculating the value. + if debug: + cherrypy.log('Timed out', 'TOOLS.CACHING') + e = threading.Event() + e.result = None + dict.__setitem__(self, key, e) + + return None + elif value is None: + # Stick an Event in the slot so other threads wait + # on this one to finish calculating the value. + if debug: + cherrypy.log('Timed out', 'TOOLS.CACHING') + e = threading.Event() + e.result = None + dict.__setitem__(self, key, e) + return value + + def __setitem__(self, key, value): + """Set the cached value for the given key.""" + existing = self.get(key) + dict.__setitem__(self, key, value) + if isinstance(existing, threading._Event): + # Set Event.result so other threads waiting on it have + # immediate access without needing to poll the cache again. + existing.result = value + existing.set() + + +class MemoryCache(Cache): + """An in-memory cache for varying response content. + + Each key in self.store is a URI, and each value is an AntiStampedeCache. + The response for any given URI may vary based on the values of + "selecting request headers"; that is, those named in the Vary + response header. We assume the list of header names to be constant + for each URI throughout the lifetime of the application, and store + that list in ``self.store[uri].selecting_headers``. + + The items contained in ``self.store[uri]`` have keys which are tuples of + request header values (in the same order as the names in its + selecting_headers), and values which are the actual responses. + """ + + maxobjects = 1000 + """The maximum number of cached objects; defaults to 1000.""" + + maxobj_size = 100000 + """The maximum size of each cached object in bytes; defaults to 100 KB.""" + + maxsize = 10000000 + """The maximum size of the entire cache in bytes; defaults to 10 MB.""" + + delay = 600 + """Seconds until the cached content expires; defaults to 600 (10 minutes).""" + + antistampede_timeout = 5 + """Seconds to wait for other threads to release a cache lock.""" + + expire_freq = 0.1 + """Seconds to sleep between cache expiration sweeps.""" + + debug = False + + def __init__(self): + self.clear() + + # Run self.expire_cache in a separate daemon thread. + t = threading.Thread(target=self.expire_cache, name='expire_cache') + self.expiration_thread = t + set_daemon(t, True) + t.start() + + def clear(self): + """Reset the cache to its initial, empty state.""" + self.store = {} + self.expirations = {} + self.tot_puts = 0 + self.tot_gets = 0 + self.tot_hist = 0 + self.tot_expires = 0 + self.tot_non_modified = 0 + self.cursize = 0 + + def expire_cache(self): + """Continuously examine cached objects, expiring stale ones. + + This function is designed to be run in its own daemon thread, + referenced at ``self.expiration_thread``. + """ + # It's possible that "time" will be set to None + # arbitrarily, so we check "while time" to avoid exceptions. + # See tickets #99 and #180 for more information. + while time: + now = time.time() + # Must make a copy of expirations so it doesn't change size + # during iteration + for expiration_time, objects in copyitems(self.expirations): + if expiration_time <= now: + for obj_size, uri, sel_header_values in objects: + try: + del self.store[uri][tuple(sel_header_values)] + self.tot_expires += 1 + self.cursize -= obj_size + except KeyError: + # the key may have been deleted elsewhere + pass + del self.expirations[expiration_time] + time.sleep(self.expire_freq) + + def get(self): + """Return the current variant if in the cache, else None.""" + request = cherrypy.serving.request + self.tot_gets += 1 + + uri = cherrypy.url(qs=request.query_string) + uricache = self.store.get(uri) + if uricache is None: + return None + + header_values = [request.headers.get(h, '') + for h in uricache.selecting_headers] + variant = uricache.wait(key=tuple(sorted(header_values)), + timeout=self.antistampede_timeout, + debug=self.debug) + if variant is not None: + self.tot_hist += 1 + return variant + + def put(self, variant, size): + """Store the current variant in the cache.""" + request = cherrypy.serving.request + response = cherrypy.serving.response + + uri = cherrypy.url(qs=request.query_string) + uricache = self.store.get(uri) + if uricache is None: + uricache = AntiStampedeCache() + uricache.selecting_headers = [ + e.value for e in response.headers.elements('Vary')] + self.store[uri] = uricache + + if len(self.store) < self.maxobjects: + total_size = self.cursize + size + + # checks if there's space for the object + if (size < self.maxobj_size and total_size < self.maxsize): + # add to the expirations list + expiration_time = response.time + self.delay + bucket = self.expirations.setdefault(expiration_time, []) + bucket.append((size, uri, uricache.selecting_headers)) + + # add to the cache + header_values = [request.headers.get(h, '') + for h in uricache.selecting_headers] + uricache[tuple(sorted(header_values))] = variant + self.tot_puts += 1 + self.cursize = total_size + + def delete(self): + """Remove ALL cached variants of the current resource.""" + uri = cherrypy.url(qs=cherrypy.serving.request.query_string) + self.store.pop(uri, None) + + +def get(invalid_methods=("POST", "PUT", "DELETE"), debug=False, **kwargs): + """Try to obtain cached output. If fresh enough, raise HTTPError(304). + + If POST, PUT, or DELETE: + * invalidates (deletes) any cached response for this resource + * sets request.cached = False + * sets request.cacheable = False + + else if a cached copy exists: + * sets request.cached = True + * sets request.cacheable = False + * sets response.headers to the cached values + * checks the cached Last-Modified response header against the + current If-(Un)Modified-Since request headers; raises 304 + if necessary. + * sets response.status and response.body to the cached values + * returns True + + otherwise: + * sets request.cached = False + * sets request.cacheable = True + * returns False + """ + request = cherrypy.serving.request + response = cherrypy.serving.response + + if not hasattr(cherrypy, "_cache"): + # Make a process-wide Cache object. + cherrypy._cache = kwargs.pop("cache_class", MemoryCache)() + + # Take all remaining kwargs and set them on the Cache object. + for k, v in kwargs.items(): + setattr(cherrypy._cache, k, v) + cherrypy._cache.debug = debug + + # POST, PUT, DELETE should invalidate (delete) the cached copy. + # See http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.10. + if request.method in invalid_methods: + if debug: + cherrypy.log('request.method %r in invalid_methods %r' % + (request.method, invalid_methods), 'TOOLS.CACHING') + cherrypy._cache.delete() + request.cached = False + request.cacheable = False + return False + + if 'no-cache' in [e.value for e in request.headers.elements('Pragma')]: + request.cached = False + request.cacheable = True + return False + + cache_data = cherrypy._cache.get() + request.cached = bool(cache_data) + request.cacheable = not request.cached + if request.cached: + # Serve the cached copy. + max_age = cherrypy._cache.delay + for v in [e.value for e in request.headers.elements('Cache-Control')]: + atoms = v.split('=', 1) + directive = atoms.pop(0) + if directive == 'max-age': + if len(atoms) != 1 or not atoms[0].isdigit(): + raise cherrypy.HTTPError(400, "Invalid Cache-Control header") + max_age = int(atoms[0]) + break + elif directive == 'no-cache': + if debug: + cherrypy.log('Ignoring cache due to Cache-Control: no-cache', + 'TOOLS.CACHING') + request.cached = False + request.cacheable = True + return False + + if debug: + cherrypy.log('Reading response from cache', 'TOOLS.CACHING') + s, h, b, create_time = cache_data + age = int(response.time - create_time) + if (age > max_age): + if debug: + cherrypy.log('Ignoring cache due to age > %d' % max_age, + 'TOOLS.CACHING') + request.cached = False + request.cacheable = True + return False + + # Copy the response headers. See http://www.cherrypy.org/ticket/721. + response.headers = rh = httputil.HeaderMap() + for k in h: + dict.__setitem__(rh, k, dict.__getitem__(h, k)) + + # Add the required Age header + response.headers["Age"] = str(age) + + try: + # Note that validate_since depends on a Last-Modified header; + # this was put into the cached copy, and should have been + # resurrected just above (response.headers = cache_data[1]). + cptools.validate_since() + except cherrypy.HTTPRedirect: + x = sys.exc_info()[1] + if x.status == 304: + cherrypy._cache.tot_non_modified += 1 + raise + + # serve it & get out from the request + response.status = s + response.body = b + else: + if debug: + cherrypy.log('request is not cached', 'TOOLS.CACHING') + return request.cached + + +def tee_output(): + """Tee response output to cache storage. Internal.""" + # Used by CachingTool by attaching to request.hooks + + request = cherrypy.serving.request + if 'no-store' in request.headers.values('Cache-Control'): + return + + def tee(body): + """Tee response.body into a list.""" + if ('no-cache' in response.headers.values('Pragma') or + 'no-store' in response.headers.values('Cache-Control')): + for chunk in body: + yield chunk + return + + output = [] + for chunk in body: + output.append(chunk) + yield chunk + + # save the cache data + body = ntob('').join(output) + cherrypy._cache.put((response.status, response.headers or {}, + body, response.time), len(body)) + + response = cherrypy.serving.response + response.body = tee(response.body) + + +def expires(secs=0, force=False, debug=False): + """Tool for influencing cache mechanisms using the 'Expires' header. + + secs + Must be either an int or a datetime.timedelta, and indicates the + number of seconds between response.time and when the response should + expire. The 'Expires' header will be set to response.time + secs. + If secs is zero, the 'Expires' header is set one year in the past, and + the following "cache prevention" headers are also set: + + * Pragma: no-cache + * Cache-Control': no-cache, must-revalidate + + force + If False, the following headers are checked: + + * Etag + * Last-Modified + * Age + * Expires + + If any are already present, none of the above response headers are set. + + """ + + response = cherrypy.serving.response + headers = response.headers + + cacheable = False + if not force: + # some header names that indicate that the response can be cached + for indicator in ('Etag', 'Last-Modified', 'Age', 'Expires'): + if indicator in headers: + cacheable = True + break + + if not cacheable and not force: + if debug: + cherrypy.log('request is not cacheable', 'TOOLS.EXPIRES') + else: + if debug: + cherrypy.log('request is cacheable', 'TOOLS.EXPIRES') + if isinstance(secs, datetime.timedelta): + secs = (86400 * secs.days) + secs.seconds + + if secs == 0: + if force or ("Pragma" not in headers): + headers["Pragma"] = "no-cache" + if cherrypy.serving.request.protocol >= (1, 1): + if force or "Cache-Control" not in headers: + headers["Cache-Control"] = "no-cache, must-revalidate" + # Set an explicit Expires date in the past. + expiry = httputil.HTTPDate(1169942400.0) + else: + expiry = httputil.HTTPDate(response.time + secs) + if force or "Expires" not in headers: + headers["Expires"] = expiry diff --git a/cherrypy/lib/covercp.py b/cherrypy/lib/covercp.py new file mode 100755 index 0000000..9b701b5 --- /dev/null +++ b/cherrypy/lib/covercp.py @@ -0,0 +1,365 @@ +"""Code-coverage tools for CherryPy. + +To use this module, or the coverage tools in the test suite, +you need to download 'coverage.py', either Gareth Rees' `original +implementation <http://www.garethrees.org/2001/12/04/python-coverage/>`_ +or Ned Batchelder's `enhanced version: +<http://www.nedbatchelder.com/code/modules/coverage.html>`_ + +To turn on coverage tracing, use the following code:: + + cherrypy.engine.subscribe('start', covercp.start) + +DO NOT subscribe anything on the 'start_thread' channel, as previously +recommended. Calling start once in the main thread should be sufficient +to start coverage on all threads. Calling start again in each thread +effectively clears any coverage data gathered up to that point. + +Run your code, then use the ``covercp.serve()`` function to browse the +results in a web browser. If you run this module from the command line, +it will call ``serve()`` for you. +""" + +import re +import sys +import cgi +from cherrypy._cpcompat import quote_plus +import os, os.path +localFile = os.path.join(os.path.dirname(__file__), "coverage.cache") + +the_coverage = None +try: + from coverage import coverage + the_coverage = coverage(data_file=localFile) + def start(): + the_coverage.start() +except ImportError: + # Setting the_coverage to None will raise errors + # that need to be trapped downstream. + the_coverage = None + + import warnings + warnings.warn("No code coverage will be performed; coverage.py could not be imported.") + + def start(): + pass +start.priority = 20 + +TEMPLATE_MENU = """<html> +<head> + <title>CherryPy Coverage Menu</title> + <style> + body {font: 9pt Arial, serif;} + #tree { + font-size: 8pt; + font-family: Andale Mono, monospace; + white-space: pre; + } + #tree a:active, a:focus { + background-color: black; + padding: 1px; + color: white; + border: 0px solid #9999FF; + -moz-outline-style: none; + } + .fail { color: red;} + .pass { color: #888;} + #pct { text-align: right;} + h3 { + font-size: small; + font-weight: bold; + font-style: italic; + margin-top: 5px; + } + input { border: 1px solid #ccc; padding: 2px; } + .directory { + color: #933; + font-style: italic; + font-weight: bold; + font-size: 10pt; + } + .file { + color: #400; + } + a { text-decoration: none; } + #crumbs { + color: white; + font-size: 8pt; + font-family: Andale Mono, monospace; + width: 100%; + background-color: black; + } + #crumbs a { + color: #f88; + } + #options { + line-height: 2.3em; + border: 1px solid black; + background-color: #eee; + padding: 4px; + } + #exclude { + width: 100%; + margin-bottom: 3px; + border: 1px solid #999; + } + #submit { + background-color: black; + color: white; + border: 0; + margin-bottom: -9px; + } + </style> +</head> +<body> +<h2>CherryPy Coverage</h2>""" + +TEMPLATE_FORM = """ +<div id="options"> +<form action='menu' method=GET> + <input type='hidden' name='base' value='%(base)s' /> + Show percentages <input type='checkbox' %(showpct)s name='showpct' value='checked' /><br /> + Hide files over <input type='text' id='pct' name='pct' value='%(pct)s' size='3' />%%<br /> + Exclude files matching<br /> + <input type='text' id='exclude' name='exclude' value='%(exclude)s' size='20' /> + <br /> + + <input type='submit' value='Change view' id="submit"/> +</form> +</div>""" + +TEMPLATE_FRAMESET = """<html> +<head><title>CherryPy coverage data</title></head> +<frameset cols='250, 1*'> + <frame src='menu?base=%s' /> + <frame name='main' src='' /> +</frameset> +</html> +""" + +TEMPLATE_COVERAGE = """<html> +<head> + <title>Coverage for %(name)s</title> + <style> + h2 { margin-bottom: .25em; } + p { margin: .25em; } + .covered { color: #000; background-color: #fff; } + .notcovered { color: #fee; background-color: #500; } + .excluded { color: #00f; background-color: #fff; } + table .covered, table .notcovered, table .excluded + { font-family: Andale Mono, monospace; + font-size: 10pt; white-space: pre; } + + .lineno { background-color: #eee;} + .notcovered .lineno { background-color: #000;} + table { border-collapse: collapse; + </style> +</head> +<body> +<h2>%(name)s</h2> +<p>%(fullpath)s</p> +<p>Coverage: %(pc)s%%</p>""" + +TEMPLATE_LOC_COVERED = """<tr class="covered"> + <td class="lineno">%s </td> + <td>%s</td> +</tr>\n""" +TEMPLATE_LOC_NOT_COVERED = """<tr class="notcovered"> + <td class="lineno">%s </td> + <td>%s</td> +</tr>\n""" +TEMPLATE_LOC_EXCLUDED = """<tr class="excluded"> + <td class="lineno">%s </td> + <td>%s</td> +</tr>\n""" + +TEMPLATE_ITEM = "%s%s<a class='file' href='report?name=%s' target='main'>%s</a>\n" + +def _percent(statements, missing): + s = len(statements) + e = s - len(missing) + if s > 0: + return int(round(100.0 * e / s)) + return 0 + +def _show_branch(root, base, path, pct=0, showpct=False, exclude="", + coverage=the_coverage): + + # Show the directory name and any of our children + dirs = [k for k, v in root.items() if v] + dirs.sort() + for name in dirs: + newpath = os.path.join(path, name) + + if newpath.lower().startswith(base): + relpath = newpath[len(base):] + yield "| " * relpath.count(os.sep) + yield "<a class='directory' href='menu?base=%s&exclude=%s'>%s</a>\n" % \ + (newpath, quote_plus(exclude), name) + + for chunk in _show_branch(root[name], base, newpath, pct, showpct, exclude, coverage=coverage): + yield chunk + + # Now list the files + if path.lower().startswith(base): + relpath = path[len(base):] + files = [k for k, v in root.items() if not v] + files.sort() + for name in files: + newpath = os.path.join(path, name) + + pc_str = "" + if showpct: + try: + _, statements, _, missing, _ = coverage.analysis2(newpath) + except: + # Yes, we really want to pass on all errors. + pass + else: + pc = _percent(statements, missing) + pc_str = ("%3d%% " % pc).replace(' ',' ') + if pc < float(pct) or pc == -1: + pc_str = "<span class='fail'>%s</span>" % pc_str + else: + pc_str = "<span class='pass'>%s</span>" % pc_str + + yield TEMPLATE_ITEM % ("| " * (relpath.count(os.sep) + 1), + pc_str, newpath, name) + +def _skip_file(path, exclude): + if exclude: + return bool(re.search(exclude, path)) + +def _graft(path, tree): + d = tree + + p = path + atoms = [] + while True: + p, tail = os.path.split(p) + if not tail: + break + atoms.append(tail) + atoms.append(p) + if p != "/": + atoms.append("/") + + atoms.reverse() + for node in atoms: + if node: + d = d.setdefault(node, {}) + +def get_tree(base, exclude, coverage=the_coverage): + """Return covered module names as a nested dict.""" + tree = {} + runs = coverage.data.executed_files() + for path in runs: + if not _skip_file(path, exclude) and not os.path.isdir(path): + _graft(path, tree) + return tree + +class CoverStats(object): + + def __init__(self, coverage, root=None): + self.coverage = coverage + if root is None: + # Guess initial depth. Files outside this path will not be + # reachable from the web interface. + import cherrypy + root = os.path.dirname(cherrypy.__file__) + self.root = root + + def index(self): + return TEMPLATE_FRAMESET % self.root.lower() + index.exposed = True + + def menu(self, base="/", pct="50", showpct="", + exclude=r'python\d\.\d|test|tut\d|tutorial'): + + # The coverage module uses all-lower-case names. + base = base.lower().rstrip(os.sep) + + yield TEMPLATE_MENU + yield TEMPLATE_FORM % locals() + + # Start by showing links for parent paths + yield "<div id='crumbs'>" + path = "" + atoms = base.split(os.sep) + atoms.pop() + for atom in atoms: + path += atom + os.sep + yield ("<a href='menu?base=%s&exclude=%s'>%s</a> %s" + % (path, quote_plus(exclude), atom, os.sep)) + yield "</div>" + + yield "<div id='tree'>" + + # Then display the tree + tree = get_tree(base, exclude, self.coverage) + if not tree: + yield "<p>No modules covered.</p>" + else: + for chunk in _show_branch(tree, base, "/", pct, + showpct=='checked', exclude, coverage=self.coverage): + yield chunk + + yield "</div>" + yield "</body></html>" + menu.exposed = True + + def annotated_file(self, filename, statements, excluded, missing): + source = open(filename, 'r') + buffer = [] + for lineno, line in enumerate(source.readlines()): + lineno += 1 + line = line.strip("\n\r") + empty_the_buffer = True + if lineno in excluded: + template = TEMPLATE_LOC_EXCLUDED + elif lineno in missing: + template = TEMPLATE_LOC_NOT_COVERED + elif lineno in statements: + template = TEMPLATE_LOC_COVERED + else: + empty_the_buffer = False + buffer.append((lineno, line)) + if empty_the_buffer: + for lno, pastline in buffer: + yield template % (lno, cgi.escape(pastline)) + buffer = [] + yield template % (lineno, cgi.escape(line)) + + def report(self, name): + filename, statements, excluded, missing, _ = self.coverage.analysis2(name) + pc = _percent(statements, missing) + yield TEMPLATE_COVERAGE % dict(name=os.path.basename(name), + fullpath=name, + pc=pc) + yield '<table>\n' + for line in self.annotated_file(filename, statements, excluded, + missing): + yield line + yield '</table>' + yield '</body>' + yield '</html>' + report.exposed = True + + +def serve(path=localFile, port=8080, root=None): + if coverage is None: + raise ImportError("The coverage module could not be imported.") + from coverage import coverage + cov = coverage(data_file = path) + cov.load() + + import cherrypy + cherrypy.config.update({'server.socket_port': int(port), + 'server.thread_pool': 10, + 'environment': "production", + }) + cherrypy.quickstart(CoverStats(cov, root)) + +if __name__ == "__main__": + serve(*tuple(sys.argv[1:])) + diff --git a/cherrypy/lib/cpstats.py b/cherrypy/lib/cpstats.py new file mode 100755 index 0000000..79d5c3a --- /dev/null +++ b/cherrypy/lib/cpstats.py @@ -0,0 +1,661 @@ +"""CPStats, a package for collecting and reporting on program statistics. + +Overview +======== + +Statistics about program operation are an invaluable monitoring and debugging +tool. Unfortunately, the gathering and reporting of these critical values is +usually ad-hoc. This package aims to add a centralized place for gathering +statistical performance data, a structure for recording that data which +provides for extrapolation of that data into more useful information, +and a method of serving that data to both human investigators and +monitoring software. Let's examine each of those in more detail. + +Data Gathering +-------------- + +Just as Python's `logging` module provides a common importable for gathering +and sending messages, performance statistics would benefit from a similar +common mechanism, and one that does *not* require each package which wishes +to collect stats to import a third-party module. Therefore, we choose to +re-use the `logging` module by adding a `statistics` object to it. + +That `logging.statistics` object is a nested dict. It is not a custom class, +because that would 1) require libraries and applications to import a third- +party module in order to participate, 2) inhibit innovation in extrapolation +approaches and in reporting tools, and 3) be slow. There are, however, some +specifications regarding the structure of the dict. + + { + +----"SQLAlchemy": { + | "Inserts": 4389745, + | "Inserts per Second": + | lambda s: s["Inserts"] / (time() - s["Start"]), + | C +---"Table Statistics": { + | o | "widgets": {-----------+ + N | l | "Rows": 1.3M, | Record + a | l | "Inserts": 400, | + m | e | },---------------------+ + e | c | "froobles": { + s | t | "Rows": 7845, + p | i | "Inserts": 0, + a | o | }, + c | n +---}, + e | "Slow Queries": + | [{"Query": "SELECT * FROM widgets;", + | "Processing Time": 47.840923343, + | }, + | ], + +----}, + } + +The `logging.statistics` dict has four levels. The topmost level is nothing +more than a set of names to introduce modularity, usually along the lines of +package names. If the SQLAlchemy project wanted to participate, for example, +it might populate the item `logging.statistics['SQLAlchemy']`, whose value +would be a second-layer dict we call a "namespace". Namespaces help multiple +packages to avoid collisions over key names, and make reports easier to read, +to boot. The maintainers of SQLAlchemy should feel free to use more than one +namespace if needed (such as 'SQLAlchemy ORM'). Note that there are no case +or other syntax constraints on the namespace names; they should be chosen +to be maximally readable by humans (neither too short nor too long). + +Each namespace, then, is a dict of named statistical values, such as +'Requests/sec' or 'Uptime'. You should choose names which will look +good on a report: spaces and capitalization are just fine. + +In addition to scalars, values in a namespace MAY be a (third-layer) +dict, or a list, called a "collection". For example, the CherryPy StatsTool +keeps track of what each request is doing (or has most recently done) +in a 'Requests' collection, where each key is a thread ID; each +value in the subdict MUST be a fourth dict (whew!) of statistical data about +each thread. We call each subdict in the collection a "record". Similarly, +the StatsTool also keeps a list of slow queries, where each record contains +data about each slow query, in order. + +Values in a namespace or record may also be functions, which brings us to: + +Extrapolation +------------- + +The collection of statistical data needs to be fast, as close to unnoticeable +as possible to the host program. That requires us to minimize I/O, for example, +but in Python it also means we need to minimize function calls. So when you +are designing your namespace and record values, try to insert the most basic +scalar values you already have on hand. + +When it comes time to report on the gathered data, however, we usually have +much more freedom in what we can calculate. Therefore, whenever reporting +tools (like the provided StatsPage CherryPy class) fetch the contents of +`logging.statistics` for reporting, they first call `extrapolate_statistics` +(passing the whole `statistics` dict as the only argument). This makes a +deep copy of the statistics dict so that the reporting tool can both iterate +over it and even change it without harming the original. But it also expands +any functions in the dict by calling them. For example, you might have a +'Current Time' entry in the namespace with the value "lambda scope: time.time()". +The "scope" parameter is the current namespace dict (or record, if we're +currently expanding one of those instead), allowing you access to existing +static entries. If you're truly evil, you can even modify more than one entry +at a time. + +However, don't try to calculate an entry and then use its value in further +extrapolations; the order in which the functions are called is not guaranteed. +This can lead to a certain amount of duplicated work (or a redesign of your +schema), but that's better than complicating the spec. + +After the whole thing has been extrapolated, it's time for: + +Reporting +--------- + +The StatsPage class grabs the `logging.statistics` dict, extrapolates it all, +and then transforms it to HTML for easy viewing. Each namespace gets its own +header and attribute table, plus an extra table for each collection. This is +NOT part of the statistics specification; other tools can format how they like. + +You can control which columns are output and how they are formatted by updating +StatsPage.formatting, which is a dict that mirrors the keys and nesting of +`logging.statistics`. The difference is that, instead of data values, it has +formatting values. Use None for a given key to indicate to the StatsPage that a +given column should not be output. Use a string with formatting (such as '%.3f') +to interpolate the value(s), or use a callable (such as lambda v: v.isoformat()) +for more advanced formatting. Any entry which is not mentioned in the formatting +dict is output unchanged. + +Monitoring +---------- + +Although the HTML output takes pains to assign unique id's to each <td> with +statistical data, you're probably better off fetching /cpstats/data, which +outputs the whole (extrapolated) `logging.statistics` dict in JSON format. +That is probably easier to parse, and doesn't have any formatting controls, +so you get the "original" data in a consistently-serialized format. +Note: there's no treatment yet for datetime objects. Try time.time() instead +for now if you can. Nagios will probably thank you. + +Turning Collection Off +---------------------- + +It is recommended each namespace have an "Enabled" item which, if False, +stops collection (but not reporting) of statistical data. Applications +SHOULD provide controls to pause and resume collection by setting these +entries to False or True, if present. + + +Usage +===== + +To collect statistics on CherryPy applications: + + from cherrypy.lib import cpstats + appconfig['/']['tools.cpstats.on'] = True + +To collect statistics on your own code: + + import logging + # Initialize the repository + if not hasattr(logging, 'statistics'): logging.statistics = {} + # Initialize my namespace + mystats = logging.statistics.setdefault('My Stuff', {}) + # Initialize my namespace's scalars and collections + mystats.update({ + 'Enabled': True, + 'Start Time': time.time(), + 'Important Events': 0, + 'Events/Second': lambda s: ( + (s['Important Events'] / (time.time() - s['Start Time']))), + }) + ... + for event in events: + ... + # Collect stats + if mystats.get('Enabled', False): + mystats['Important Events'] += 1 + +To report statistics: + + root.cpstats = cpstats.StatsPage() + +To format statistics reports: + + See 'Reporting', above. + +""" + +# -------------------------------- Statistics -------------------------------- # + +import logging +if not hasattr(logging, 'statistics'): logging.statistics = {} + +def extrapolate_statistics(scope): + """Return an extrapolated copy of the given scope.""" + c = {} + for k, v in list(scope.items()): + if isinstance(v, dict): + v = extrapolate_statistics(v) + elif isinstance(v, (list, tuple)): + v = [extrapolate_statistics(record) for record in v] + elif hasattr(v, '__call__'): + v = v(scope) + c[k] = v + return c + + +# --------------------- CherryPy Applications Statistics --------------------- # + +import threading +import time + +import cherrypy + +appstats = logging.statistics.setdefault('CherryPy Applications', {}) +appstats.update({ + 'Enabled': True, + 'Bytes Read/Request': lambda s: (s['Total Requests'] and + (s['Total Bytes Read'] / float(s['Total Requests'])) or 0.0), + 'Bytes Read/Second': lambda s: s['Total Bytes Read'] / s['Uptime'](s), + 'Bytes Written/Request': lambda s: (s['Total Requests'] and + (s['Total Bytes Written'] / float(s['Total Requests'])) or 0.0), + 'Bytes Written/Second': lambda s: s['Total Bytes Written'] / s['Uptime'](s), + 'Current Time': lambda s: time.time(), + 'Current Requests': 0, + 'Requests/Second': lambda s: float(s['Total Requests']) / s['Uptime'](s), + 'Server Version': cherrypy.__version__, + 'Start Time': time.time(), + 'Total Bytes Read': 0, + 'Total Bytes Written': 0, + 'Total Requests': 0, + 'Total Time': 0, + 'Uptime': lambda s: time.time() - s['Start Time'], + 'Requests': {}, + }) + +proc_time = lambda s: time.time() - s['Start Time'] + + +class ByteCountWrapper(object): + """Wraps a file-like object, counting the number of bytes read.""" + + def __init__(self, rfile): + self.rfile = rfile + self.bytes_read = 0 + + def read(self, size=-1): + data = self.rfile.read(size) + self.bytes_read += len(data) + return data + + def readline(self, size=-1): + data = self.rfile.readline(size) + self.bytes_read += len(data) + return data + + def readlines(self, sizehint=0): + # Shamelessly stolen from StringIO + total = 0 + lines = [] + line = self.readline() + while line: + lines.append(line) + total += len(line) + if 0 < sizehint <= total: + break + line = self.readline() + return lines + + def close(self): + self.rfile.close() + + def __iter__(self): + return self + + def next(self): + data = self.rfile.next() + self.bytes_read += len(data) + return data + + +average_uriset_time = lambda s: s['Count'] and (s['Sum'] / s['Count']) or 0 + + +class StatsTool(cherrypy.Tool): + """Record various information about the current request.""" + + def __init__(self): + cherrypy.Tool.__init__(self, 'on_end_request', self.record_stop) + + def _setup(self): + """Hook this tool into cherrypy.request. + + The standard CherryPy request object will automatically call this + method when the tool is "turned on" in config. + """ + if appstats.get('Enabled', False): + cherrypy.Tool._setup(self) + self.record_start() + + def record_start(self): + """Record the beginning of a request.""" + request = cherrypy.serving.request + if not hasattr(request.rfile, 'bytes_read'): + request.rfile = ByteCountWrapper(request.rfile) + request.body.fp = request.rfile + + r = request.remote + + appstats['Current Requests'] += 1 + appstats['Total Requests'] += 1 + appstats['Requests'][threading._get_ident()] = { + 'Bytes Read': None, + 'Bytes Written': None, + # Use a lambda so the ip gets updated by tools.proxy later + 'Client': lambda s: '%s:%s' % (r.ip, r.port), + 'End Time': None, + 'Processing Time': proc_time, + 'Request-Line': request.request_line, + 'Response Status': None, + 'Start Time': time.time(), + } + + def record_stop(self, uriset=None, slow_queries=1.0, slow_queries_count=100, + debug=False, **kwargs): + """Record the end of a request.""" + w = appstats['Requests'][threading._get_ident()] + + r = cherrypy.request.rfile.bytes_read + w['Bytes Read'] = r + appstats['Total Bytes Read'] += r + + if cherrypy.response.stream: + w['Bytes Written'] = 'chunked' + else: + cl = int(cherrypy.response.headers.get('Content-Length', 0)) + w['Bytes Written'] = cl + appstats['Total Bytes Written'] += cl + + w['Response Status'] = cherrypy.response.status + + w['End Time'] = time.time() + p = w['End Time'] - w['Start Time'] + w['Processing Time'] = p + appstats['Total Time'] += p + + appstats['Current Requests'] -= 1 + + if debug: + cherrypy.log('Stats recorded: %s' % repr(w), 'TOOLS.CPSTATS') + + if uriset: + rs = appstats.setdefault('URI Set Tracking', {}) + r = rs.setdefault(uriset, { + 'Min': None, 'Max': None, 'Count': 0, 'Sum': 0, + 'Avg': average_uriset_time}) + if r['Min'] is None or p < r['Min']: + r['Min'] = p + if r['Max'] is None or p > r['Max']: + r['Max'] = p + r['Count'] += 1 + r['Sum'] += p + + if slow_queries and p > slow_queries: + sq = appstats.setdefault('Slow Queries', []) + sq.append(w.copy()) + if len(sq) > slow_queries_count: + sq.pop(0) + + +import cherrypy +cherrypy.tools.cpstats = StatsTool() + + +# ---------------------- CherryPy Statistics Reporting ---------------------- # + +import os +thisdir = os.path.abspath(os.path.dirname(__file__)) + +try: + import json +except ImportError: + try: + import simplejson as json + except ImportError: + json = None + + +missing = object() + +locale_date = lambda v: time.strftime('%c', time.gmtime(v)) +iso_format = lambda v: time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(v)) + +def pause_resume(ns): + def _pause_resume(enabled): + pause_disabled = '' + resume_disabled = '' + if enabled: + resume_disabled = 'disabled="disabled" ' + else: + pause_disabled = 'disabled="disabled" ' + return """ + <form action="pause" method="POST" style="display:inline"> + <input type="hidden" name="namespace" value="%s" /> + <input type="submit" value="Pause" %s/> + </form> + <form action="resume" method="POST" style="display:inline"> + <input type="hidden" name="namespace" value="%s" /> + <input type="submit" value="Resume" %s/> + </form> + """ % (ns, pause_disabled, ns, resume_disabled) + return _pause_resume + + +class StatsPage(object): + + formatting = { + 'CherryPy Applications': { + 'Enabled': pause_resume('CherryPy Applications'), + 'Bytes Read/Request': '%.3f', + 'Bytes Read/Second': '%.3f', + 'Bytes Written/Request': '%.3f', + 'Bytes Written/Second': '%.3f', + 'Current Time': iso_format, + 'Requests/Second': '%.3f', + 'Start Time': iso_format, + 'Total Time': '%.3f', + 'Uptime': '%.3f', + 'Slow Queries': { + 'End Time': None, + 'Processing Time': '%.3f', + 'Start Time': iso_format, + }, + 'URI Set Tracking': { + 'Avg': '%.3f', + 'Max': '%.3f', + 'Min': '%.3f', + 'Sum': '%.3f', + }, + 'Requests': { + 'Bytes Read': '%s', + 'Bytes Written': '%s', + 'End Time': None, + 'Processing Time': '%.3f', + 'Start Time': None, + }, + }, + 'CherryPy WSGIServer': { + 'Enabled': pause_resume('CherryPy WSGIServer'), + 'Connections/second': '%.3f', + 'Start time': iso_format, + }, + } + + + def index(self): + # Transform the raw data into pretty output for HTML + yield """ +<html> +<head> + <title>Statistics</title> +<style> + +th, td { + padding: 0.25em 0.5em; + border: 1px solid #666699; +} + +table { + border-collapse: collapse; +} + +table.stats1 { + width: 100%; +} + +table.stats1 th { + font-weight: bold; + text-align: right; + background-color: #CCD5DD; +} + +table.stats2, h2 { + margin-left: 50px; +} + +table.stats2 th { + font-weight: bold; + text-align: center; + background-color: #CCD5DD; +} + +</style> +</head> +<body> +""" + for title, scalars, collections in self.get_namespaces(): + yield """ +<h1>%s</h1> + +<table class='stats1'> + <tbody> +""" % title + for i, (key, value) in enumerate(scalars): + colnum = i % 3 + if colnum == 0: yield """ + <tr>""" + yield """ + <th>%(key)s</th><td id='%(title)s-%(key)s'>%(value)s</td>""" % vars() + if colnum == 2: yield """ + </tr>""" + + if colnum == 0: yield """ + <th></th><td></td> + <th></th><td></td> + </tr>""" + elif colnum == 1: yield """ + <th></th><td></td> + </tr>""" + yield """ + </tbody> +</table>""" + + for subtitle, headers, subrows in collections: + yield """ +<h2>%s</h2> +<table class='stats2'> + <thead> + <tr>""" % subtitle + for key in headers: + yield """ + <th>%s</th>""" % key + yield """ + </tr> + </thead> + <tbody>""" + for subrow in subrows: + yield """ + <tr>""" + for value in subrow: + yield """ + <td>%s</td>""" % value + yield """ + </tr>""" + yield """ + </tbody> +</table>""" + yield """ +</body> +</html> +""" + index.exposed = True + + def get_namespaces(self): + """Yield (title, scalars, collections) for each namespace.""" + s = extrapolate_statistics(logging.statistics) + for title, ns in sorted(s.items()): + scalars = [] + collections = [] + ns_fmt = self.formatting.get(title, {}) + for k, v in sorted(ns.items()): + fmt = ns_fmt.get(k, {}) + if isinstance(v, dict): + headers, subrows = self.get_dict_collection(v, fmt) + collections.append((k, ['ID'] + headers, subrows)) + elif isinstance(v, (list, tuple)): + headers, subrows = self.get_list_collection(v, fmt) + collections.append((k, headers, subrows)) + else: + format = ns_fmt.get(k, missing) + if format is None: + # Don't output this column. + continue + if hasattr(format, '__call__'): + v = format(v) + elif format is not missing: + v = format % v + scalars.append((k, v)) + yield title, scalars, collections + + def get_dict_collection(self, v, formatting): + """Return ([headers], [rows]) for the given collection.""" + # E.g., the 'Requests' dict. + headers = [] + for record in v.itervalues(): + for k3 in record: + format = formatting.get(k3, missing) + if format is None: + # Don't output this column. + continue + if k3 not in headers: + headers.append(k3) + headers.sort() + + subrows = [] + for k2, record in sorted(v.items()): + subrow = [k2] + for k3 in headers: + v3 = record.get(k3, '') + format = formatting.get(k3, missing) + if format is None: + # Don't output this column. + continue + if hasattr(format, '__call__'): + v3 = format(v3) + elif format is not missing: + v3 = format % v3 + subrow.append(v3) + subrows.append(subrow) + + return headers, subrows + + def get_list_collection(self, v, formatting): + """Return ([headers], [subrows]) for the given collection.""" + # E.g., the 'Slow Queries' list. + headers = [] + for record in v: + for k3 in record: + format = formatting.get(k3, missing) + if format is None: + # Don't output this column. + continue + if k3 not in headers: + headers.append(k3) + headers.sort() + + subrows = [] + for record in v: + subrow = [] + for k3 in headers: + v3 = record.get(k3, '') + format = formatting.get(k3, missing) + if format is None: + # Don't output this column. + continue + if hasattr(format, '__call__'): + v3 = format(v3) + elif format is not missing: + v3 = format % v3 + subrow.append(v3) + subrows.append(subrow) + + return headers, subrows + + if json is not None: + def data(self): + s = extrapolate_statistics(logging.statistics) + cherrypy.response.headers['Content-Type'] = 'application/json' + return json.dumps(s, sort_keys=True, indent=4) + data.exposed = True + + def pause(self, namespace): + logging.statistics.get(namespace, {})['Enabled'] = False + raise cherrypy.HTTPRedirect('./') + pause.exposed = True + pause.cp_config = {'tools.allow.on': True, + 'tools.allow.methods': ['POST']} + + def resume(self, namespace): + logging.statistics.get(namespace, {})['Enabled'] = True + raise cherrypy.HTTPRedirect('./') + resume.exposed = True + resume.cp_config = {'tools.allow.on': True, + 'tools.allow.methods': ['POST']} + diff --git a/cherrypy/lib/cptools.py b/cherrypy/lib/cptools.py new file mode 100755 index 0000000..3eedf97 --- /dev/null +++ b/cherrypy/lib/cptools.py @@ -0,0 +1,611 @@ +"""Functions for builtin CherryPy tools.""" + +import logging +import re + +import cherrypy +from cherrypy._cpcompat import basestring, ntob, md5, set +from cherrypy.lib import httputil as _httputil + + +# Conditional HTTP request support # + +def validate_etags(autotags=False, debug=False): + """Validate the current ETag against If-Match, If-None-Match headers. + + If autotags is True, an ETag response-header value will be provided + from an MD5 hash of the response body (unless some other code has + already provided an ETag header). If False (the default), the ETag + will not be automatic. + + WARNING: the autotags feature is not designed for URL's which allow + methods other than GET. For example, if a POST to the same URL returns + no content, the automatic ETag will be incorrect, breaking a fundamental + use for entity tags in a possibly destructive fashion. Likewise, if you + raise 304 Not Modified, the response body will be empty, the ETag hash + will be incorrect, and your application will break. + See :rfc:`2616` Section 14.24. + """ + response = cherrypy.serving.response + + # Guard against being run twice. + if hasattr(response, "ETag"): + return + + status, reason, msg = _httputil.valid_status(response.status) + + etag = response.headers.get('ETag') + + # Automatic ETag generation. See warning in docstring. + if etag: + if debug: + cherrypy.log('ETag already set: %s' % etag, 'TOOLS.ETAGS') + elif not autotags: + if debug: + cherrypy.log('Autotags off', 'TOOLS.ETAGS') + elif status != 200: + if debug: + cherrypy.log('Status not 200', 'TOOLS.ETAGS') + else: + etag = response.collapse_body() + etag = '"%s"' % md5(etag).hexdigest() + if debug: + cherrypy.log('Setting ETag: %s' % etag, 'TOOLS.ETAGS') + response.headers['ETag'] = etag + + response.ETag = etag + + # "If the request would, without the If-Match header field, result in + # anything other than a 2xx or 412 status, then the If-Match header + # MUST be ignored." + if debug: + cherrypy.log('Status: %s' % status, 'TOOLS.ETAGS') + if status >= 200 and status <= 299: + request = cherrypy.serving.request + + conditions = request.headers.elements('If-Match') or [] + conditions = [str(x) for x in conditions] + if debug: + cherrypy.log('If-Match conditions: %s' % repr(conditions), + 'TOOLS.ETAGS') + if conditions and not (conditions == ["*"] or etag in conditions): + raise cherrypy.HTTPError(412, "If-Match failed: ETag %r did " + "not match %r" % (etag, conditions)) + + conditions = request.headers.elements('If-None-Match') or [] + conditions = [str(x) for x in conditions] + if debug: + cherrypy.log('If-None-Match conditions: %s' % repr(conditions), + 'TOOLS.ETAGS') + if conditions == ["*"] or etag in conditions: + if debug: + cherrypy.log('request.method: %s' % request.method, 'TOOLS.ETAGS') + if request.method in ("GET", "HEAD"): + raise cherrypy.HTTPRedirect([], 304) + else: + raise cherrypy.HTTPError(412, "If-None-Match failed: ETag %r " + "matched %r" % (etag, conditions)) + +def validate_since(): + """Validate the current Last-Modified against If-Modified-Since headers. + + If no code has set the Last-Modified response header, then no validation + will be performed. + """ + response = cherrypy.serving.response + lastmod = response.headers.get('Last-Modified') + if lastmod: + status, reason, msg = _httputil.valid_status(response.status) + + request = cherrypy.serving.request + + since = request.headers.get('If-Unmodified-Since') + if since and since != lastmod: + if (status >= 200 and status <= 299) or status == 412: + raise cherrypy.HTTPError(412) + + since = request.headers.get('If-Modified-Since') + if since and since == lastmod: + if (status >= 200 and status <= 299) or status == 304: + if request.method in ("GET", "HEAD"): + raise cherrypy.HTTPRedirect([], 304) + else: + raise cherrypy.HTTPError(412) + + +# Tool code # + +def allow(methods=None, debug=False): + """Raise 405 if request.method not in methods (default GET/HEAD). + + The given methods are case-insensitive, and may be in any order. + If only one method is allowed, you may supply a single string; + if more than one, supply a list of strings. + + Regardless of whether the current method is allowed or not, this + also emits an 'Allow' response header, containing the given methods. + """ + if not isinstance(methods, (tuple, list)): + methods = [methods] + methods = [m.upper() for m in methods if m] + if not methods: + methods = ['GET', 'HEAD'] + elif 'GET' in methods and 'HEAD' not in methods: + methods.append('HEAD') + + cherrypy.response.headers['Allow'] = ', '.join(methods) + if cherrypy.request.method not in methods: + if debug: + cherrypy.log('request.method %r not in methods %r' % + (cherrypy.request.method, methods), 'TOOLS.ALLOW') + raise cherrypy.HTTPError(405) + else: + if debug: + cherrypy.log('request.method %r in methods %r' % + (cherrypy.request.method, methods), 'TOOLS.ALLOW') + + +def proxy(base=None, local='X-Forwarded-Host', remote='X-Forwarded-For', + scheme='X-Forwarded-Proto', debug=False): + """Change the base URL (scheme://host[:port][/path]). + + For running a CP server behind Apache, lighttpd, or other HTTP server. + + If you want the new request.base to include path info (not just the host), + you must explicitly set base to the full base path, and ALSO set 'local' + to '', so that the X-Forwarded-Host request header (which never includes + path info) does not override it. Regardless, the value for 'base' MUST + NOT end in a slash. + + cherrypy.request.remote.ip (the IP address of the client) will be + rewritten if the header specified by the 'remote' arg is valid. + By default, 'remote' is set to 'X-Forwarded-For'. If you do not + want to rewrite remote.ip, set the 'remote' arg to an empty string. + """ + + request = cherrypy.serving.request + + if scheme: + s = request.headers.get(scheme, None) + if debug: + cherrypy.log('Testing scheme %r:%r' % (scheme, s), 'TOOLS.PROXY') + if s == 'on' and 'ssl' in scheme.lower(): + # This handles e.g. webfaction's 'X-Forwarded-Ssl: on' header + scheme = 'https' + else: + # This is for lighttpd/pound/Mongrel's 'X-Forwarded-Proto: https' + scheme = s + if not scheme: + scheme = request.base[:request.base.find("://")] + + if local: + lbase = request.headers.get(local, None) + if debug: + cherrypy.log('Testing local %r:%r' % (local, lbase), 'TOOLS.PROXY') + if lbase is not None: + base = lbase.split(',')[0] + if not base: + port = request.local.port + if port == 80: + base = '127.0.0.1' + else: + base = '127.0.0.1:%s' % port + + if base.find("://") == -1: + # add http:// or https:// if needed + base = scheme + "://" + base + + request.base = base + + if remote: + xff = request.headers.get(remote) + if debug: + cherrypy.log('Testing remote %r:%r' % (remote, xff), 'TOOLS.PROXY') + if xff: + if remote == 'X-Forwarded-For': + # See http://bob.pythonmac.org/archives/2005/09/23/apache-x-forwarded-for-caveat/ + xff = xff.split(',')[-1].strip() + request.remote.ip = xff + + +def ignore_headers(headers=('Range',), debug=False): + """Delete request headers whose field names are included in 'headers'. + + This is a useful tool for working behind certain HTTP servers; + for example, Apache duplicates the work that CP does for 'Range' + headers, and will doubly-truncate the response. + """ + request = cherrypy.serving.request + for name in headers: + if name in request.headers: + if debug: + cherrypy.log('Ignoring request header %r' % name, + 'TOOLS.IGNORE_HEADERS') + del request.headers[name] + + +def response_headers(headers=None, debug=False): + """Set headers on the response.""" + if debug: + cherrypy.log('Setting response headers: %s' % repr(headers), + 'TOOLS.RESPONSE_HEADERS') + for name, value in (headers or []): + cherrypy.serving.response.headers[name] = value +response_headers.failsafe = True + + +def referer(pattern, accept=True, accept_missing=False, error=403, + message='Forbidden Referer header.', debug=False): + """Raise HTTPError if Referer header does/does not match the given pattern. + + pattern + A regular expression pattern to test against the Referer. + + accept + If True, the Referer must match the pattern; if False, + the Referer must NOT match the pattern. + + accept_missing + If True, permit requests with no Referer header. + + error + The HTTP error code to return to the client on failure. + + message + A string to include in the response body on failure. + + """ + try: + ref = cherrypy.serving.request.headers['Referer'] + match = bool(re.match(pattern, ref)) + if debug: + cherrypy.log('Referer %r matches %r' % (ref, pattern), + 'TOOLS.REFERER') + if accept == match: + return + except KeyError: + if debug: + cherrypy.log('No Referer header', 'TOOLS.REFERER') + if accept_missing: + return + + raise cherrypy.HTTPError(error, message) + + +class SessionAuth(object): + """Assert that the user is logged in.""" + + session_key = "username" + debug = False + + def check_username_and_password(self, username, password): + pass + + def anonymous(self): + """Provide a temporary user name for anonymous users.""" + pass + + def on_login(self, username): + pass + + def on_logout(self, username): + pass + + def on_check(self, username): + pass + + def login_screen(self, from_page='..', username='', error_msg='', **kwargs): + return ntob("""<html><body> +Message: %(error_msg)s +<form method="post" action="do_login"> + Login: <input type="text" name="username" value="%(username)s" size="10" /><br /> + Password: <input type="password" name="password" size="10" /><br /> + <input type="hidden" name="from_page" value="%(from_page)s" /><br /> + <input type="submit" /> +</form> +</body></html>""" % {'from_page': from_page, 'username': username, + 'error_msg': error_msg}, "utf-8") + + def do_login(self, username, password, from_page='..', **kwargs): + """Login. May raise redirect, or return True if request handled.""" + response = cherrypy.serving.response + error_msg = self.check_username_and_password(username, password) + if error_msg: + body = self.login_screen(from_page, username, error_msg) + response.body = body + if "Content-Length" in response.headers: + # Delete Content-Length header so finalize() recalcs it. + del response.headers["Content-Length"] + return True + else: + cherrypy.serving.request.login = username + cherrypy.session[self.session_key] = username + self.on_login(username) + raise cherrypy.HTTPRedirect(from_page or "/") + + def do_logout(self, from_page='..', **kwargs): + """Logout. May raise redirect, or return True if request handled.""" + sess = cherrypy.session + username = sess.get(self.session_key) + sess[self.session_key] = None + if username: + cherrypy.serving.request.login = None + self.on_logout(username) + raise cherrypy.HTTPRedirect(from_page) + + def do_check(self): + """Assert username. May raise redirect, or return True if request handled.""" + sess = cherrypy.session + request = cherrypy.serving.request + response = cherrypy.serving.response + + username = sess.get(self.session_key) + if not username: + sess[self.session_key] = username = self.anonymous() + if self.debug: + cherrypy.log('No session[username], trying anonymous', 'TOOLS.SESSAUTH') + if not username: + url = cherrypy.url(qs=request.query_string) + if self.debug: + cherrypy.log('No username, routing to login_screen with ' + 'from_page %r' % url, 'TOOLS.SESSAUTH') + response.body = self.login_screen(url) + if "Content-Length" in response.headers: + # Delete Content-Length header so finalize() recalcs it. + del response.headers["Content-Length"] + return True + if self.debug: + cherrypy.log('Setting request.login to %r' % username, 'TOOLS.SESSAUTH') + request.login = username + self.on_check(username) + + def run(self): + request = cherrypy.serving.request + response = cherrypy.serving.response + + path = request.path_info + if path.endswith('login_screen'): + if self.debug: + cherrypy.log('routing %r to login_screen' % path, 'TOOLS.SESSAUTH') + return self.login_screen(**request.params) + elif path.endswith('do_login'): + if request.method != 'POST': + response.headers['Allow'] = "POST" + if self.debug: + cherrypy.log('do_login requires POST', 'TOOLS.SESSAUTH') + raise cherrypy.HTTPError(405) + if self.debug: + cherrypy.log('routing %r to do_login' % path, 'TOOLS.SESSAUTH') + return self.do_login(**request.params) + elif path.endswith('do_logout'): + if request.method != 'POST': + response.headers['Allow'] = "POST" + raise cherrypy.HTTPError(405) + if self.debug: + cherrypy.log('routing %r to do_logout' % path, 'TOOLS.SESSAUTH') + return self.do_logout(**request.params) + else: + if self.debug: + cherrypy.log('No special path, running do_check', 'TOOLS.SESSAUTH') + return self.do_check() + + +def session_auth(**kwargs): + sa = SessionAuth() + for k, v in kwargs.items(): + setattr(sa, k, v) + return sa.run() +session_auth.__doc__ = """Session authentication hook. + +Any attribute of the SessionAuth class may be overridden via a keyword arg +to this function: + +""" + "\n".join(["%s: %s" % (k, type(getattr(SessionAuth, k)).__name__) + for k in dir(SessionAuth) if not k.startswith("__")]) + + +def log_traceback(severity=logging.ERROR, debug=False): + """Write the last error's traceback to the cherrypy error log.""" + cherrypy.log("", "HTTP", severity=severity, traceback=True) + +def log_request_headers(debug=False): + """Write request headers to the cherrypy error log.""" + h = [" %s: %s" % (k, v) for k, v in cherrypy.serving.request.header_list] + cherrypy.log('\nRequest Headers:\n' + '\n'.join(h), "HTTP") + +def log_hooks(debug=False): + """Write request.hooks to the cherrypy error log.""" + request = cherrypy.serving.request + + msg = [] + # Sort by the standard points if possible. + from cherrypy import _cprequest + points = _cprequest.hookpoints + for k in request.hooks.keys(): + if k not in points: + points.append(k) + + for k in points: + msg.append(" %s:" % k) + v = request.hooks.get(k, []) + v.sort() + for h in v: + msg.append(" %r" % h) + cherrypy.log('\nRequest Hooks for ' + cherrypy.url() + + ':\n' + '\n'.join(msg), "HTTP") + +def redirect(url='', internal=True, debug=False): + """Raise InternalRedirect or HTTPRedirect to the given url.""" + if debug: + cherrypy.log('Redirecting %sto: %s' % + ({True: 'internal ', False: ''}[internal], url), + 'TOOLS.REDIRECT') + if internal: + raise cherrypy.InternalRedirect(url) + else: + raise cherrypy.HTTPRedirect(url) + +def trailing_slash(missing=True, extra=False, status=None, debug=False): + """Redirect if path_info has (missing|extra) trailing slash.""" + request = cherrypy.serving.request + pi = request.path_info + + if debug: + cherrypy.log('is_index: %r, missing: %r, extra: %r, path_info: %r' % + (request.is_index, missing, extra, pi), + 'TOOLS.TRAILING_SLASH') + if request.is_index is True: + if missing: + if not pi.endswith('/'): + new_url = cherrypy.url(pi + '/', request.query_string) + raise cherrypy.HTTPRedirect(new_url, status=status or 301) + elif request.is_index is False: + if extra: + # If pi == '/', don't redirect to ''! + if pi.endswith('/') and pi != '/': + new_url = cherrypy.url(pi[:-1], request.query_string) + raise cherrypy.HTTPRedirect(new_url, status=status or 301) + +def flatten(debug=False): + """Wrap response.body in a generator that recursively iterates over body. + + This allows cherrypy.response.body to consist of 'nested generators'; + that is, a set of generators that yield generators. + """ + import types + def flattener(input): + numchunks = 0 + for x in input: + if not isinstance(x, types.GeneratorType): + numchunks += 1 + yield x + else: + for y in flattener(x): + numchunks += 1 + yield y + if debug: + cherrypy.log('Flattened %d chunks' % numchunks, 'TOOLS.FLATTEN') + response = cherrypy.serving.response + response.body = flattener(response.body) + + +def accept(media=None, debug=False): + """Return the client's preferred media-type (from the given Content-Types). + + If 'media' is None (the default), no test will be performed. + + If 'media' is provided, it should be the Content-Type value (as a string) + or values (as a list or tuple of strings) which the current resource + can emit. The client's acceptable media ranges (as declared in the + Accept request header) will be matched in order to these Content-Type + values; the first such string is returned. That is, the return value + will always be one of the strings provided in the 'media' arg (or None + if 'media' is None). + + If no match is found, then HTTPError 406 (Not Acceptable) is raised. + Note that most web browsers send */* as a (low-quality) acceptable + media range, which should match any Content-Type. In addition, "...if + no Accept header field is present, then it is assumed that the client + accepts all media types." + + Matching types are checked in order of client preference first, + and then in the order of the given 'media' values. + + Note that this function does not honor accept-params (other than "q"). + """ + if not media: + return + if isinstance(media, basestring): + media = [media] + request = cherrypy.serving.request + + # Parse the Accept request header, and try to match one + # of the requested media-ranges (in order of preference). + ranges = request.headers.elements('Accept') + if not ranges: + # Any media type is acceptable. + if debug: + cherrypy.log('No Accept header elements', 'TOOLS.ACCEPT') + return media[0] + else: + # Note that 'ranges' is sorted in order of preference + for element in ranges: + if element.qvalue > 0: + if element.value == "*/*": + # Matches any type or subtype + if debug: + cherrypy.log('Match due to */*', 'TOOLS.ACCEPT') + return media[0] + elif element.value.endswith("/*"): + # Matches any subtype + mtype = element.value[:-1] # Keep the slash + for m in media: + if m.startswith(mtype): + if debug: + cherrypy.log('Match due to %s' % element.value, + 'TOOLS.ACCEPT') + return m + else: + # Matches exact value + if element.value in media: + if debug: + cherrypy.log('Match due to %s' % element.value, + 'TOOLS.ACCEPT') + return element.value + + # No suitable media-range found. + ah = request.headers.get('Accept') + if ah is None: + msg = "Your client did not send an Accept header." + else: + msg = "Your client sent this Accept header: %s." % ah + msg += (" But this resource only emits these media types: %s." % + ", ".join(media)) + raise cherrypy.HTTPError(406, msg) + + +class MonitoredHeaderMap(_httputil.HeaderMap): + + def __init__(self): + self.accessed_headers = set() + + def __getitem__(self, key): + self.accessed_headers.add(key) + return _httputil.HeaderMap.__getitem__(self, key) + + def __contains__(self, key): + self.accessed_headers.add(key) + return _httputil.HeaderMap.__contains__(self, key) + + def get(self, key, default=None): + self.accessed_headers.add(key) + return _httputil.HeaderMap.get(self, key, default=default) + + def has_key(self, key): + self.accessed_headers.add(key) + return _httputil.HeaderMap.has_key(self, key) + + +def autovary(ignore=None, debug=False): + """Auto-populate the Vary response header based on request.header access.""" + request = cherrypy.serving.request + + req_h = request.headers + request.headers = MonitoredHeaderMap() + request.headers.update(req_h) + if ignore is None: + ignore = set(['Content-Disposition', 'Content-Length', 'Content-Type']) + + def set_response_header(): + resp_h = cherrypy.serving.response.headers + v = set([e.value for e in resp_h.elements('Vary')]) + if debug: + cherrypy.log('Accessed headers: %s' % request.headers.accessed_headers, + 'TOOLS.AUTOVARY') + v = v.union(request.headers.accessed_headers) + v = v.difference(ignore) + v = list(v) + v.sort() + resp_h['Vary'] = ', '.join(v) + request.hooks.attach('before_finalize', set_response_header, 95) + diff --git a/cherrypy/lib/encoding.py b/cherrypy/lib/encoding.py new file mode 100755 index 0000000..6459746 --- /dev/null +++ b/cherrypy/lib/encoding.py @@ -0,0 +1,388 @@ +import struct +import time + +import cherrypy +from cherrypy._cpcompat import basestring, BytesIO, ntob, set, unicodestr +from cherrypy.lib import file_generator +from cherrypy.lib import set_vary_header + + +def decode(encoding=None, default_encoding='utf-8'): + """Replace or extend the list of charsets used to decode a request entity. + + Either argument may be a single string or a list of strings. + + encoding + If not None, restricts the set of charsets attempted while decoding + a request entity to the given set (even if a different charset is given in + the Content-Type request header). + + default_encoding + Only in effect if the 'encoding' argument is not given. + If given, the set of charsets attempted while decoding a request entity is + *extended* with the given value(s). + + """ + body = cherrypy.request.body + if encoding is not None: + if not isinstance(encoding, list): + encoding = [encoding] + body.attempt_charsets = encoding + elif default_encoding: + if not isinstance(default_encoding, list): + default_encoding = [default_encoding] + body.attempt_charsets = body.attempt_charsets + default_encoding + + +class ResponseEncoder: + + default_encoding = 'utf-8' + failmsg = "Response body could not be encoded with %r." + encoding = None + errors = 'strict' + text_only = True + add_charset = True + debug = False + + def __init__(self, **kwargs): + for k, v in kwargs.items(): + setattr(self, k, v) + + self.attempted_charsets = set() + request = cherrypy.serving.request + if request.handler is not None: + # Replace request.handler with self + if self.debug: + cherrypy.log('Replacing request.handler', 'TOOLS.ENCODE') + self.oldhandler = request.handler + request.handler = self + + def encode_stream(self, encoding): + """Encode a streaming response body. + + Use a generator wrapper, and just pray it works as the stream is + being written out. + """ + if encoding in self.attempted_charsets: + return False + self.attempted_charsets.add(encoding) + + def encoder(body): + for chunk in body: + if isinstance(chunk, unicodestr): + chunk = chunk.encode(encoding, self.errors) + yield chunk + self.body = encoder(self.body) + return True + + def encode_string(self, encoding): + """Encode a buffered response body.""" + if encoding in self.attempted_charsets: + return False + self.attempted_charsets.add(encoding) + + try: + body = [] + for chunk in self.body: + if isinstance(chunk, unicodestr): + chunk = chunk.encode(encoding, self.errors) + body.append(chunk) + self.body = body + except (LookupError, UnicodeError): + return False + else: + return True + + def find_acceptable_charset(self): + request = cherrypy.serving.request + response = cherrypy.serving.response + + if self.debug: + cherrypy.log('response.stream %r' % response.stream, 'TOOLS.ENCODE') + if response.stream: + encoder = self.encode_stream + else: + encoder = self.encode_string + if "Content-Length" in response.headers: + # Delete Content-Length header so finalize() recalcs it. + # Encoded strings may be of different lengths from their + # unicode equivalents, and even from each other. For example: + # >>> t = u"\u7007\u3040" + # >>> len(t) + # 2 + # >>> len(t.encode("UTF-8")) + # 6 + # >>> len(t.encode("utf7")) + # 8 + del response.headers["Content-Length"] + + # Parse the Accept-Charset request header, and try to provide one + # of the requested charsets (in order of user preference). + encs = request.headers.elements('Accept-Charset') + charsets = [enc.value.lower() for enc in encs] + if self.debug: + cherrypy.log('charsets %s' % repr(charsets), 'TOOLS.ENCODE') + + if self.encoding is not None: + # If specified, force this encoding to be used, or fail. + encoding = self.encoding.lower() + if self.debug: + cherrypy.log('Specified encoding %r' % encoding, 'TOOLS.ENCODE') + if (not charsets) or "*" in charsets or encoding in charsets: + if self.debug: + cherrypy.log('Attempting encoding %r' % encoding, 'TOOLS.ENCODE') + if encoder(encoding): + return encoding + else: + if not encs: + if self.debug: + cherrypy.log('Attempting default encoding %r' % + self.default_encoding, 'TOOLS.ENCODE') + # Any character-set is acceptable. + if encoder(self.default_encoding): + return self.default_encoding + else: + raise cherrypy.HTTPError(500, self.failmsg % self.default_encoding) + else: + for element in encs: + if element.qvalue > 0: + if element.value == "*": + # Matches any charset. Try our default. + if self.debug: + cherrypy.log('Attempting default encoding due ' + 'to %r' % element, 'TOOLS.ENCODE') + if encoder(self.default_encoding): + return self.default_encoding + else: + encoding = element.value + if self.debug: + cherrypy.log('Attempting encoding %s (qvalue >' + '0)' % element, 'TOOLS.ENCODE') + if encoder(encoding): + return encoding + + if "*" not in charsets: + # If no "*" is present in an Accept-Charset field, then all + # character sets not explicitly mentioned get a quality + # value of 0, except for ISO-8859-1, which gets a quality + # value of 1 if not explicitly mentioned. + iso = 'iso-8859-1' + if iso not in charsets: + if self.debug: + cherrypy.log('Attempting ISO-8859-1 encoding', + 'TOOLS.ENCODE') + if encoder(iso): + return iso + + # No suitable encoding found. + ac = request.headers.get('Accept-Charset') + if ac is None: + msg = "Your client did not send an Accept-Charset header." + else: + msg = "Your client sent this Accept-Charset header: %s." % ac + msg += " We tried these charsets: %s." % ", ".join(self.attempted_charsets) + raise cherrypy.HTTPError(406, msg) + + def __call__(self, *args, **kwargs): + response = cherrypy.serving.response + self.body = self.oldhandler(*args, **kwargs) + + if isinstance(self.body, basestring): + # strings get wrapped in a list because iterating over a single + # item list is much faster than iterating over every character + # in a long string. + if self.body: + self.body = [self.body] + else: + # [''] doesn't evaluate to False, so replace it with []. + self.body = [] + elif hasattr(self.body, 'read'): + self.body = file_generator(self.body) + elif self.body is None: + self.body = [] + + ct = response.headers.elements("Content-Type") + if self.debug: + cherrypy.log('Content-Type: %r' % [str(h) for h in ct], 'TOOLS.ENCODE') + if ct: + ct = ct[0] + if self.text_only: + if ct.value.lower().startswith("text/"): + if self.debug: + cherrypy.log('Content-Type %s starts with "text/"' % ct, + 'TOOLS.ENCODE') + do_find = True + else: + if self.debug: + cherrypy.log('Not finding because Content-Type %s does ' + 'not start with "text/"' % ct, + 'TOOLS.ENCODE') + do_find = False + else: + if self.debug: + cherrypy.log('Finding because not text_only', 'TOOLS.ENCODE') + do_find = True + + if do_find: + # Set "charset=..." param on response Content-Type header + ct.params['charset'] = self.find_acceptable_charset() + if self.add_charset: + if self.debug: + cherrypy.log('Setting Content-Type %s' % ct, + 'TOOLS.ENCODE') + response.headers["Content-Type"] = str(ct) + + return self.body + +# GZIP + +def compress(body, compress_level): + """Compress 'body' at the given compress_level.""" + import zlib + + # See http://www.gzip.org/zlib/rfc-gzip.html + yield ntob('\x1f\x8b') # ID1 and ID2: gzip marker + yield ntob('\x08') # CM: compression method + yield ntob('\x00') # FLG: none set + # MTIME: 4 bytes + yield struct.pack("<L", int(time.time()) & int('FFFFFFFF', 16)) + yield ntob('\x02') # XFL: max compression, slowest algo + yield ntob('\xff') # OS: unknown + + crc = zlib.crc32(ntob("")) + size = 0 + zobj = zlib.compressobj(compress_level, + zlib.DEFLATED, -zlib.MAX_WBITS, + zlib.DEF_MEM_LEVEL, 0) + for line in body: + size += len(line) + crc = zlib.crc32(line, crc) + yield zobj.compress(line) + yield zobj.flush() + + # CRC32: 4 bytes + yield struct.pack("<L", crc & int('FFFFFFFF', 16)) + # ISIZE: 4 bytes + yield struct.pack("<L", size & int('FFFFFFFF', 16)) + +def decompress(body): + import gzip + + zbuf = BytesIO() + zbuf.write(body) + zbuf.seek(0) + zfile = gzip.GzipFile(mode='rb', fileobj=zbuf) + data = zfile.read() + zfile.close() + return data + + +def gzip(compress_level=5, mime_types=['text/html', 'text/plain'], debug=False): + """Try to gzip the response body if Content-Type in mime_types. + + cherrypy.response.headers['Content-Type'] must be set to one of the + values in the mime_types arg before calling this function. + + The provided list of mime-types must be of one of the following form: + * type/subtype + * type/* + * type/*+subtype + + No compression is performed if any of the following hold: + * The client sends no Accept-Encoding request header + * No 'gzip' or 'x-gzip' is present in the Accept-Encoding header + * No 'gzip' or 'x-gzip' with a qvalue > 0 is present + * The 'identity' value is given with a qvalue > 0. + + """ + request = cherrypy.serving.request + response = cherrypy.serving.response + + set_vary_header(response, "Accept-Encoding") + + if not response.body: + # Response body is empty (might be a 304 for instance) + if debug: + cherrypy.log('No response body', context='TOOLS.GZIP') + return + + # If returning cached content (which should already have been gzipped), + # don't re-zip. + if getattr(request, "cached", False): + if debug: + cherrypy.log('Not gzipping cached response', context='TOOLS.GZIP') + return + + acceptable = request.headers.elements('Accept-Encoding') + if not acceptable: + # If no Accept-Encoding field is present in a request, + # the server MAY assume that the client will accept any + # content coding. In this case, if "identity" is one of + # the available content-codings, then the server SHOULD use + # the "identity" content-coding, unless it has additional + # information that a different content-coding is meaningful + # to the client. + if debug: + cherrypy.log('No Accept-Encoding', context='TOOLS.GZIP') + return + + ct = response.headers.get('Content-Type', '').split(';')[0] + for coding in acceptable: + if coding.value == 'identity' and coding.qvalue != 0: + if debug: + cherrypy.log('Non-zero identity qvalue: %s' % coding, + context='TOOLS.GZIP') + return + if coding.value in ('gzip', 'x-gzip'): + if coding.qvalue == 0: + if debug: + cherrypy.log('Zero gzip qvalue: %s' % coding, + context='TOOLS.GZIP') + return + + if ct not in mime_types: + # If the list of provided mime-types contains tokens + # such as 'text/*' or 'application/*+xml', + # we go through them and find the most appropriate one + # based on the given content-type. + # The pattern matching is only caring about the most + # common cases, as stated above, and doesn't support + # for extra parameters. + found = False + if '/' in ct: + ct_media_type, ct_sub_type = ct.split('/') + for mime_type in mime_types: + if '/' in mime_type: + media_type, sub_type = mime_type.split('/') + if ct_media_type == media_type: + if sub_type == '*': + found = True + break + elif '+' in sub_type and '+' in ct_sub_type: + ct_left, ct_right = ct_sub_type.split('+') + left, right = sub_type.split('+') + if left == '*' and ct_right == right: + found = True + break + + if not found: + if debug: + cherrypy.log('Content-Type %s not in mime_types %r' % + (ct, mime_types), context='TOOLS.GZIP') + return + + if debug: + cherrypy.log('Gzipping', context='TOOLS.GZIP') + # Return a generator that compresses the page + response.headers['Content-Encoding'] = 'gzip' + response.body = compress(response.body, compress_level) + if "Content-Length" in response.headers: + # Delete Content-Length header so finalize() recalcs it. + del response.headers["Content-Length"] + + return + + if debug: + cherrypy.log('No acceptable encoding found.', context='GZIP') + cherrypy.HTTPError(406, "identity, gzip").set_response() + diff --git a/cherrypy/lib/http.py b/cherrypy/lib/http.py new file mode 100755 index 0000000..4661d69 --- /dev/null +++ b/cherrypy/lib/http.py @@ -0,0 +1,7 @@ +import warnings +warnings.warn('cherrypy.lib.http has been deprecated and will be removed ' + 'in CherryPy 3.3 use cherrypy.lib.httputil instead.', + DeprecationWarning) + +from cherrypy.lib.httputil import * + diff --git a/cherrypy/lib/httpauth.py b/cherrypy/lib/httpauth.py new file mode 100755 index 0000000..ad7c6eb --- /dev/null +++ b/cherrypy/lib/httpauth.py @@ -0,0 +1,354 @@ +""" +This module defines functions to implement HTTP Digest Authentication (:rfc:`2617`). +This has full compliance with 'Digest' and 'Basic' authentication methods. In +'Digest' it supports both MD5 and MD5-sess algorithms. + +Usage: + First use 'doAuth' to request the client authentication for a + certain resource. You should send an httplib.UNAUTHORIZED response to the + client so he knows he has to authenticate itself. + + Then use 'parseAuthorization' to retrieve the 'auth_map' used in + 'checkResponse'. + + To use 'checkResponse' you must have already verified the password associated + with the 'username' key in 'auth_map' dict. Then you use the 'checkResponse' + function to verify if the password matches the one sent by the client. + +SUPPORTED_ALGORITHM - list of supported 'Digest' algorithms +SUPPORTED_QOP - list of supported 'Digest' 'qop'. +""" +__version__ = 1, 0, 1 +__author__ = "Tiago Cogumbreiro <cogumbreiro@users.sf.net>" +__credits__ = """ + Peter van Kampen for its recipe which implement most of Digest authentication: + http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/302378 +""" + +__license__ = """ +Copyright (c) 2005, Tiago Cogumbreiro <cogumbreiro@users.sf.net> +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Sylvain Hellegouarch nor the names of his contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +""" + +__all__ = ("digestAuth", "basicAuth", "doAuth", "checkResponse", + "parseAuthorization", "SUPPORTED_ALGORITHM", "md5SessionKey", + "calculateNonce", "SUPPORTED_QOP") + +################################################################################ +import time +from cherrypy._cpcompat import base64_decode, ntob, md5 +from cherrypy._cpcompat import parse_http_list, parse_keqv_list + +MD5 = "MD5" +MD5_SESS = "MD5-sess" +AUTH = "auth" +AUTH_INT = "auth-int" + +SUPPORTED_ALGORITHM = (MD5, MD5_SESS) +SUPPORTED_QOP = (AUTH, AUTH_INT) + +################################################################################ +# doAuth +# +DIGEST_AUTH_ENCODERS = { + MD5: lambda val: md5(ntob(val)).hexdigest(), + MD5_SESS: lambda val: md5(ntob(val)).hexdigest(), +# SHA: lambda val: sha.new(ntob(val)).hexdigest (), +} + +def calculateNonce (realm, algorithm = MD5): + """This is an auxaliary function that calculates 'nonce' value. It is used + to handle sessions.""" + + global SUPPORTED_ALGORITHM, DIGEST_AUTH_ENCODERS + assert algorithm in SUPPORTED_ALGORITHM + + try: + encoder = DIGEST_AUTH_ENCODERS[algorithm] + except KeyError: + raise NotImplementedError ("The chosen algorithm (%s) does not have "\ + "an implementation yet" % algorithm) + + return encoder ("%d:%s" % (time.time(), realm)) + +def digestAuth (realm, algorithm = MD5, nonce = None, qop = AUTH): + """Challenges the client for a Digest authentication.""" + global SUPPORTED_ALGORITHM, DIGEST_AUTH_ENCODERS, SUPPORTED_QOP + assert algorithm in SUPPORTED_ALGORITHM + assert qop in SUPPORTED_QOP + + if nonce is None: + nonce = calculateNonce (realm, algorithm) + + return 'Digest realm="%s", nonce="%s", algorithm="%s", qop="%s"' % ( + realm, nonce, algorithm, qop + ) + +def basicAuth (realm): + """Challengenes the client for a Basic authentication.""" + assert '"' not in realm, "Realms cannot contain the \" (quote) character." + + return 'Basic realm="%s"' % realm + +def doAuth (realm): + """'doAuth' function returns the challenge string b giving priority over + Digest and fallback to Basic authentication when the browser doesn't + support the first one. + + This should be set in the HTTP header under the key 'WWW-Authenticate'.""" + + return digestAuth (realm) + " " + basicAuth (realm) + + +################################################################################ +# Parse authorization parameters +# +def _parseDigestAuthorization (auth_params): + # Convert the auth params to a dict + items = parse_http_list(auth_params) + params = parse_keqv_list(items) + + # Now validate the params + + # Check for required parameters + required = ["username", "realm", "nonce", "uri", "response"] + for k in required: + if k not in params: + return None + + # If qop is sent then cnonce and nc MUST be present + if "qop" in params and not ("cnonce" in params \ + and "nc" in params): + return None + + # If qop is not sent, neither cnonce nor nc can be present + if ("cnonce" in params or "nc" in params) and \ + "qop" not in params: + return None + + return params + + +def _parseBasicAuthorization (auth_params): + username, password = base64_decode(auth_params).split(":", 1) + return {"username": username, "password": password} + +AUTH_SCHEMES = { + "basic": _parseBasicAuthorization, + "digest": _parseDigestAuthorization, +} + +def parseAuthorization (credentials): + """parseAuthorization will convert the value of the 'Authorization' key in + the HTTP header to a map itself. If the parsing fails 'None' is returned. + """ + + global AUTH_SCHEMES + + auth_scheme, auth_params = credentials.split(" ", 1) + auth_scheme = auth_scheme.lower () + + parser = AUTH_SCHEMES[auth_scheme] + params = parser (auth_params) + + if params is None: + return + + assert "auth_scheme" not in params + params["auth_scheme"] = auth_scheme + return params + + +################################################################################ +# Check provided response for a valid password +# +def md5SessionKey (params, password): + """ + If the "algorithm" directive's value is "MD5-sess", then A1 + [the session key] is calculated only once - on the first request by the + client following receipt of a WWW-Authenticate challenge from the server. + + This creates a 'session key' for the authentication of subsequent + requests and responses which is different for each "authentication + session", thus limiting the amount of material hashed with any one + key. + + Because the server need only use the hash of the user + credentials in order to create the A1 value, this construction could + be used in conjunction with a third party authentication service so + that the web server would not need the actual password value. The + specification of such a protocol is beyond the scope of this + specification. +""" + + keys = ("username", "realm", "nonce", "cnonce") + params_copy = {} + for key in keys: + params_copy[key] = params[key] + + params_copy["algorithm"] = MD5_SESS + return _A1 (params_copy, password) + +def _A1(params, password): + algorithm = params.get ("algorithm", MD5) + H = DIGEST_AUTH_ENCODERS[algorithm] + + if algorithm == MD5: + # If the "algorithm" directive's value is "MD5" or is + # unspecified, then A1 is: + # A1 = unq(username-value) ":" unq(realm-value) ":" passwd + return "%s:%s:%s" % (params["username"], params["realm"], password) + + elif algorithm == MD5_SESS: + + # This is A1 if qop is set + # A1 = H( unq(username-value) ":" unq(realm-value) ":" passwd ) + # ":" unq(nonce-value) ":" unq(cnonce-value) + h_a1 = H ("%s:%s:%s" % (params["username"], params["realm"], password)) + return "%s:%s:%s" % (h_a1, params["nonce"], params["cnonce"]) + + +def _A2(params, method, kwargs): + # If the "qop" directive's value is "auth" or is unspecified, then A2 is: + # A2 = Method ":" digest-uri-value + + qop = params.get ("qop", "auth") + if qop == "auth": + return method + ":" + params["uri"] + elif qop == "auth-int": + # If the "qop" value is "auth-int", then A2 is: + # A2 = Method ":" digest-uri-value ":" H(entity-body) + entity_body = kwargs.get ("entity_body", "") + H = kwargs["H"] + + return "%s:%s:%s" % ( + method, + params["uri"], + H(entity_body) + ) + + else: + raise NotImplementedError ("The 'qop' method is unknown: %s" % qop) + +def _computeDigestResponse(auth_map, password, method = "GET", A1 = None,**kwargs): + """ + Generates a response respecting the algorithm defined in RFC 2617 + """ + params = auth_map + + algorithm = params.get ("algorithm", MD5) + + H = DIGEST_AUTH_ENCODERS[algorithm] + KD = lambda secret, data: H(secret + ":" + data) + + qop = params.get ("qop", None) + + H_A2 = H(_A2(params, method, kwargs)) + + if algorithm == MD5_SESS and A1 is not None: + H_A1 = H(A1) + else: + H_A1 = H(_A1(params, password)) + + if qop in ("auth", "auth-int"): + # If the "qop" value is "auth" or "auth-int": + # request-digest = <"> < KD ( H(A1), unq(nonce-value) + # ":" nc-value + # ":" unq(cnonce-value) + # ":" unq(qop-value) + # ":" H(A2) + # ) <"> + request = "%s:%s:%s:%s:%s" % ( + params["nonce"], + params["nc"], + params["cnonce"], + params["qop"], + H_A2, + ) + elif qop is None: + # If the "qop" directive is not present (this construction is + # for compatibility with RFC 2069): + # request-digest = + # <"> < KD ( H(A1), unq(nonce-value) ":" H(A2) ) > <"> + request = "%s:%s" % (params["nonce"], H_A2) + + return KD(H_A1, request) + +def _checkDigestResponse(auth_map, password, method = "GET", A1 = None, **kwargs): + """This function is used to verify the response given by the client when + he tries to authenticate. + Optional arguments: + entity_body - when 'qop' is set to 'auth-int' you MUST provide the + raw data you are going to send to the client (usually the + HTML page. + request_uri - the uri from the request line compared with the 'uri' + directive of the authorization map. They must represent + the same resource (unused at this time). + """ + + if auth_map['realm'] != kwargs.get('realm', None): + return False + + response = _computeDigestResponse(auth_map, password, method, A1,**kwargs) + + return response == auth_map["response"] + +def _checkBasicResponse (auth_map, password, method='GET', encrypt=None, **kwargs): + # Note that the Basic response doesn't provide the realm value so we cannot + # test it + try: + return encrypt(auth_map["password"], auth_map["username"]) == password + except TypeError: + return encrypt(auth_map["password"]) == password + +AUTH_RESPONSES = { + "basic": _checkBasicResponse, + "digest": _checkDigestResponse, +} + +def checkResponse (auth_map, password, method = "GET", encrypt=None, **kwargs): + """'checkResponse' compares the auth_map with the password and optionally + other arguments that each implementation might need. + + If the response is of type 'Basic' then the function has the following + signature:: + + checkBasicResponse (auth_map, password) -> bool + + If the response is of type 'Digest' then the function has the following + signature:: + + checkDigestResponse (auth_map, password, method = 'GET', A1 = None) -> bool + + The 'A1' argument is only used in MD5_SESS algorithm based responses. + Check md5SessionKey() for more info. + """ + checker = AUTH_RESPONSES[auth_map["auth_scheme"]] + return checker (auth_map, password, method=method, encrypt=encrypt, **kwargs) + + + + diff --git a/cherrypy/lib/httputil.py b/cherrypy/lib/httputil.py new file mode 100755 index 0000000..e005875 --- /dev/null +++ b/cherrypy/lib/httputil.py @@ -0,0 +1,469 @@ +"""HTTP library functions. + +This module contains functions for building an HTTP application +framework: any one, not just one whose name starts with "Ch". ;) If you +reference any modules from some popular framework inside *this* module, +FuManChu will personally hang you up by your thumbs and submit you +to a public caning. +""" + +from binascii import b2a_base64 +from cherrypy._cpcompat import BaseHTTPRequestHandler, HTTPDate, ntob, ntou, reversed, sorted +from cherrypy._cpcompat import basestring, iteritems, unicodestr, unquote_qs +response_codes = BaseHTTPRequestHandler.responses.copy() + +# From http://www.cherrypy.org/ticket/361 +response_codes[500] = ('Internal Server Error', + 'The server encountered an unexpected condition ' + 'which prevented it from fulfilling the request.') +response_codes[503] = ('Service Unavailable', + 'The server is currently unable to handle the ' + 'request due to a temporary overloading or ' + 'maintenance of the server.') + +import re +import urllib + + + +def urljoin(*atoms): + """Return the given path \*atoms, joined into a single URL. + + This will correctly join a SCRIPT_NAME and PATH_INFO into the + original URL, even if either atom is blank. + """ + url = "/".join([x for x in atoms if x]) + while "//" in url: + url = url.replace("//", "/") + # Special-case the final url of "", and return "/" instead. + return url or "/" + +def protocol_from_http(protocol_str): + """Return a protocol tuple from the given 'HTTP/x.y' string.""" + return int(protocol_str[5]), int(protocol_str[7]) + +def get_ranges(headervalue, content_length): + """Return a list of (start, stop) indices from a Range header, or None. + + Each (start, stop) tuple will be composed of two ints, which are suitable + for use in a slicing operation. That is, the header "Range: bytes=3-6", + if applied against a Python string, is requesting resource[3:7]. This + function will return the list [(3, 7)]. + + If this function returns an empty list, you should return HTTP 416. + """ + + if not headervalue: + return None + + result = [] + bytesunit, byteranges = headervalue.split("=", 1) + for brange in byteranges.split(","): + start, stop = [x.strip() for x in brange.split("-", 1)] + if start: + if not stop: + stop = content_length - 1 + start, stop = int(start), int(stop) + if start >= content_length: + # From rfc 2616 sec 14.16: + # "If the server receives a request (other than one + # including an If-Range request-header field) with an + # unsatisfiable Range request-header field (that is, + # all of whose byte-range-spec values have a first-byte-pos + # value greater than the current length of the selected + # resource), it SHOULD return a response code of 416 + # (Requested range not satisfiable)." + continue + if stop < start: + # From rfc 2616 sec 14.16: + # "If the server ignores a byte-range-spec because it + # is syntactically invalid, the server SHOULD treat + # the request as if the invalid Range header field + # did not exist. (Normally, this means return a 200 + # response containing the full entity)." + return None + result.append((start, stop + 1)) + else: + if not stop: + # See rfc quote above. + return None + # Negative subscript (last N bytes) + result.append((content_length - int(stop), content_length)) + + return result + + +class HeaderElement(object): + """An element (with parameters) from an HTTP header's element list.""" + + def __init__(self, value, params=None): + self.value = value + if params is None: + params = {} + self.params = params + + def __cmp__(self, other): + return cmp(self.value, other.value) + + def __str__(self): + p = [";%s=%s" % (k, v) for k, v in iteritems(self.params)] + return "%s%s" % (self.value, "".join(p)) + + def __unicode__(self): + return ntou(self.__str__()) + + def parse(elementstr): + """Transform 'token;key=val' to ('token', {'key': 'val'}).""" + # Split the element into a value and parameters. The 'value' may + # be of the form, "token=token", but we don't split that here. + atoms = [x.strip() for x in elementstr.split(";") if x.strip()] + if not atoms: + initial_value = '' + else: + initial_value = atoms.pop(0).strip() + params = {} + for atom in atoms: + atom = [x.strip() for x in atom.split("=", 1) if x.strip()] + key = atom.pop(0) + if atom: + val = atom[0] + else: + val = "" + params[key] = val + return initial_value, params + parse = staticmethod(parse) + + def from_str(cls, elementstr): + """Construct an instance from a string of the form 'token;key=val'.""" + ival, params = cls.parse(elementstr) + return cls(ival, params) + from_str = classmethod(from_str) + + +q_separator = re.compile(r'; *q *=') + +class AcceptElement(HeaderElement): + """An element (with parameters) from an Accept* header's element list. + + AcceptElement objects are comparable; the more-preferred object will be + "less than" the less-preferred object. They are also therefore sortable; + if you sort a list of AcceptElement objects, they will be listed in + priority order; the most preferred value will be first. Yes, it should + have been the other way around, but it's too late to fix now. + """ + + def from_str(cls, elementstr): + qvalue = None + # The first "q" parameter (if any) separates the initial + # media-range parameter(s) (if any) from the accept-params. + atoms = q_separator.split(elementstr, 1) + media_range = atoms.pop(0).strip() + if atoms: + # The qvalue for an Accept header can have extensions. The other + # headers cannot, but it's easier to parse them as if they did. + qvalue = HeaderElement.from_str(atoms[0].strip()) + + media_type, params = cls.parse(media_range) + if qvalue is not None: + params["q"] = qvalue + return cls(media_type, params) + from_str = classmethod(from_str) + + def qvalue(self): + val = self.params.get("q", "1") + if isinstance(val, HeaderElement): + val = val.value + return float(val) + qvalue = property(qvalue, doc="The qvalue, or priority, of this value.") + + def __cmp__(self, other): + diff = cmp(self.qvalue, other.qvalue) + if diff == 0: + diff = cmp(str(self), str(other)) + return diff + + +def header_elements(fieldname, fieldvalue): + """Return a sorted HeaderElement list from a comma-separated header string.""" + if not fieldvalue: + return [] + + result = [] + for element in fieldvalue.split(","): + if fieldname.startswith("Accept") or fieldname == 'TE': + hv = AcceptElement.from_str(element) + else: + hv = HeaderElement.from_str(element) + result.append(hv) + + return list(reversed(sorted(result))) + +def decode_TEXT(value): + r"""Decode :rfc:`2047` TEXT (e.g. "=?utf-8?q?f=C3=BCr?=" -> u"f\xfcr").""" + from email.Header import decode_header + atoms = decode_header(value) + decodedvalue = "" + for atom, charset in atoms: + if charset is not None: + atom = atom.decode(charset) + decodedvalue += atom + return decodedvalue + +def valid_status(status): + """Return legal HTTP status Code, Reason-phrase and Message. + + The status arg must be an int, or a str that begins with an int. + + If status is an int, or a str and no reason-phrase is supplied, + a default reason-phrase will be provided. + """ + + if not status: + status = 200 + + status = str(status) + parts = status.split(" ", 1) + if len(parts) == 1: + # No reason supplied. + code, = parts + reason = None + else: + code, reason = parts + reason = reason.strip() + + try: + code = int(code) + except ValueError: + raise ValueError("Illegal response status from server " + "(%s is non-numeric)." % repr(code)) + + if code < 100 or code > 599: + raise ValueError("Illegal response status from server " + "(%s is out of range)." % repr(code)) + + if code not in response_codes: + # code is unknown but not illegal + default_reason, message = "", "" + else: + default_reason, message = response_codes[code] + + if reason is None: + reason = default_reason + + return code, reason, message + + +def _parse_qs(qs, keep_blank_values=0, strict_parsing=0, encoding='utf-8'): + """Parse a query given as a string argument. + + Arguments: + + qs: URL-encoded query string to be parsed + + keep_blank_values: flag indicating whether blank values in + URL encoded queries should be treated as blank strings. A + true value indicates that blanks should be retained as blank + strings. The default false value indicates that blank values + are to be ignored and treated as if they were not included. + + strict_parsing: flag indicating what to do with parsing errors. If + false (the default), errors are silently ignored. If true, + errors raise a ValueError exception. + + Returns a dict, as G-d intended. + """ + pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] + d = {} + for name_value in pairs: + if not name_value and not strict_parsing: + continue + nv = name_value.split('=', 1) + if len(nv) != 2: + if strict_parsing: + raise ValueError("bad query field: %r" % (name_value,)) + # Handle case of a control-name with no equal sign + if keep_blank_values: + nv.append('') + else: + continue + if len(nv[1]) or keep_blank_values: + name = unquote_qs(nv[0], encoding) + value = unquote_qs(nv[1], encoding) + if name in d: + if not isinstance(d[name], list): + d[name] = [d[name]] + d[name].append(value) + else: + d[name] = value + return d + + +image_map_pattern = re.compile(r"[0-9]+,[0-9]+") + +def parse_query_string(query_string, keep_blank_values=True, encoding='utf-8'): + """Build a params dictionary from a query_string. + + Duplicate key/value pairs in the provided query_string will be + returned as {'key': [val1, val2, ...]}. Single key/values will + be returned as strings: {'key': 'value'}. + """ + if image_map_pattern.match(query_string): + # Server-side image map. Map the coords to 'x' and 'y' + # (like CGI::Request does). + pm = query_string.split(",") + pm = {'x': int(pm[0]), 'y': int(pm[1])} + else: + pm = _parse_qs(query_string, keep_blank_values, encoding=encoding) + return pm + + +class CaseInsensitiveDict(dict): + """A case-insensitive dict subclass. + + Each key is changed on entry to str(key).title(). + """ + + def __getitem__(self, key): + return dict.__getitem__(self, str(key).title()) + + def __setitem__(self, key, value): + dict.__setitem__(self, str(key).title(), value) + + def __delitem__(self, key): + dict.__delitem__(self, str(key).title()) + + def __contains__(self, key): + return dict.__contains__(self, str(key).title()) + + def get(self, key, default=None): + return dict.get(self, str(key).title(), default) + + def has_key(self, key): + return dict.has_key(self, str(key).title()) + + def update(self, E): + for k in E.keys(): + self[str(k).title()] = E[k] + + def fromkeys(cls, seq, value=None): + newdict = cls() + for k in seq: + newdict[str(k).title()] = value + return newdict + fromkeys = classmethod(fromkeys) + + def setdefault(self, key, x=None): + key = str(key).title() + try: + return self[key] + except KeyError: + self[key] = x + return x + + def pop(self, key, default): + return dict.pop(self, str(key).title(), default) + + +# TEXT = <any OCTET except CTLs, but including LWS> +# +# A CRLF is allowed in the definition of TEXT only as part of a header +# field continuation. It is expected that the folding LWS will be +# replaced with a single SP before interpretation of the TEXT value." +header_translate_table = ''.join([chr(i) for i in xrange(256)]) +header_translate_deletechars = ''.join([chr(i) for i in xrange(32)]) + chr(127) + + +class HeaderMap(CaseInsensitiveDict): + """A dict subclass for HTTP request and response headers. + + Each key is changed on entry to str(key).title(). This allows headers + to be case-insensitive and avoid duplicates. + + Values are header values (decoded according to :rfc:`2047` if necessary). + """ + + protocol=(1, 1) + encodings = ["ISO-8859-1"] + + # Someday, when http-bis is done, this will probably get dropped + # since few servers, clients, or intermediaries do it. But until then, + # we're going to obey the spec as is. + # "Words of *TEXT MAY contain characters from character sets other than + # ISO-8859-1 only when encoded according to the rules of RFC 2047." + use_rfc_2047 = True + + def elements(self, key): + """Return a sorted list of HeaderElements for the given header.""" + key = str(key).title() + value = self.get(key) + return header_elements(key, value) + + def values(self, key): + """Return a sorted list of HeaderElement.value for the given header.""" + return [e.value for e in self.elements(key)] + + def output(self): + """Transform self into a list of (name, value) tuples.""" + header_list = [] + for k, v in self.items(): + if isinstance(k, unicodestr): + k = self.encode(k) + + if not isinstance(v, basestring): + v = str(v) + + if isinstance(v, unicodestr): + v = self.encode(v) + + # See header_translate_* constants above. + # Replace only if you really know what you're doing. + k = k.translate(header_translate_table, header_translate_deletechars) + v = v.translate(header_translate_table, header_translate_deletechars) + + header_list.append((k, v)) + return header_list + + def encode(self, v): + """Return the given header name or value, encoded for HTTP output.""" + for enc in self.encodings: + try: + return v.encode(enc) + except UnicodeEncodeError: + continue + + if self.protocol == (1, 1) and self.use_rfc_2047: + # Encode RFC-2047 TEXT + # (e.g. u"\u8200" -> "=?utf-8?b?6IiA?="). + # We do our own here instead of using the email module + # because we never want to fold lines--folding has + # been deprecated by the HTTP working group. + v = b2a_base64(v.encode('utf-8')) + return (ntob('=?utf-8?b?') + v.strip(ntob('\n')) + ntob('?=')) + + raise ValueError("Could not encode header part %r using " + "any of the encodings %r." % + (v, self.encodings)) + + +class Host(object): + """An internet address. + + name + Should be the client's host name. If not available (because no DNS + lookup is performed), the IP address should be used instead. + + """ + + ip = "0.0.0.0" + port = 80 + name = "unknown.tld" + + def __init__(self, ip, port, name=None): + self.ip = ip + self.port = port + if name is None: + name = ip + self.name = name + + def __repr__(self): + return "httputil.Host(%r, %r, %r)" % (self.ip, self.port, self.name) diff --git a/cherrypy/lib/jsontools.py b/cherrypy/lib/jsontools.py new file mode 100755 index 0000000..09042e4 --- /dev/null +++ b/cherrypy/lib/jsontools.py @@ -0,0 +1,87 @@ +import sys +import cherrypy +from cherrypy._cpcompat import basestring, ntou, json, json_encode, json_decode + +def json_processor(entity): + """Read application/json data into request.json.""" + if not entity.headers.get(ntou("Content-Length"), ntou("")): + raise cherrypy.HTTPError(411) + + body = entity.fp.read() + try: + cherrypy.serving.request.json = json_decode(body.decode('utf-8')) + except ValueError: + raise cherrypy.HTTPError(400, 'Invalid JSON document') + +def json_in(content_type=[ntou('application/json'), ntou('text/javascript')], + force=True, debug=False, processor = json_processor): + """Add a processor to parse JSON request entities: + The default processor places the parsed data into request.json. + + Incoming request entities which match the given content_type(s) will + be deserialized from JSON to the Python equivalent, and the result + stored at cherrypy.request.json. The 'content_type' argument may + be a Content-Type string or a list of allowable Content-Type strings. + + If the 'force' argument is True (the default), then entities of other + content types will not be allowed; "415 Unsupported Media Type" is + raised instead. + + Supply your own processor to use a custom decoder, or to handle the parsed + data differently. The processor can be configured via + tools.json_in.processor or via the decorator method. + + Note that the deserializer requires the client send a Content-Length + request header, or it will raise "411 Length Required". If for any + other reason the request entity cannot be deserialized from JSON, + it will raise "400 Bad Request: Invalid JSON document". + + You must be using Python 2.6 or greater, or have the 'simplejson' + package importable; otherwise, ValueError is raised during processing. + """ + request = cherrypy.serving.request + if isinstance(content_type, basestring): + content_type = [content_type] + + if force: + if debug: + cherrypy.log('Removing body processors %s' % + repr(request.body.processors.keys()), 'TOOLS.JSON_IN') + request.body.processors.clear() + request.body.default_proc = cherrypy.HTTPError( + 415, 'Expected an entity of content type %s' % + ', '.join(content_type)) + + for ct in content_type: + if debug: + cherrypy.log('Adding body processor for %s' % ct, 'TOOLS.JSON_IN') + request.body.processors[ct] = processor + +def json_handler(*args, **kwargs): + value = cherrypy.serving.request._json_inner_handler(*args, **kwargs) + return json_encode(value) + +def json_out(content_type='application/json', debug=False, handler=json_handler): + """Wrap request.handler to serialize its output to JSON. Sets Content-Type. + + If the given content_type is None, the Content-Type response header + is not set. + + Provide your own handler to use a custom encoder. For example + cherrypy.config['tools.json_out.handler'] = <function>, or + @json_out(handler=function). + + You must be using Python 2.6 or greater, or have the 'simplejson' + package importable; otherwise, ValueError is raised during processing. + """ + request = cherrypy.serving.request + if debug: + cherrypy.log('Replacing %s with JSON handler' % request.handler, + 'TOOLS.JSON_OUT') + request._json_inner_handler = request.handler + request.handler = handler + if content_type is not None: + if debug: + cherrypy.log('Setting Content-Type to %s' % ct, 'TOOLS.JSON_OUT') + cherrypy.serving.response.headers['Content-Type'] = content_type + diff --git a/cherrypy/lib/profiler.py b/cherrypy/lib/profiler.py new file mode 100755 index 0000000..785d58a --- /dev/null +++ b/cherrypy/lib/profiler.py @@ -0,0 +1,208 @@ +"""Profiler tools for CherryPy. + +CherryPy users +============== + +You can profile any of your pages as follows:: + + from cherrypy.lib import profiler + + class Root: + p = profile.Profiler("/path/to/profile/dir") + + def index(self): + self.p.run(self._index) + index.exposed = True + + def _index(self): + return "Hello, world!" + + cherrypy.tree.mount(Root()) + +You can also turn on profiling for all requests +using the ``make_app`` function as WSGI middleware. + +CherryPy developers +=================== + +This module can be used whenever you make changes to CherryPy, +to get a quick sanity-check on overall CP performance. Use the +``--profile`` flag when running the test suite. Then, use the ``serve()`` +function to browse the results in a web browser. If you run this +module from the command line, it will call ``serve()`` for you. + +""" + + +def new_func_strip_path(func_name): + """Make profiler output more readable by adding ``__init__`` modules' parents""" + filename, line, name = func_name + if filename.endswith("__init__.py"): + return os.path.basename(filename[:-12]) + filename[-12:], line, name + return os.path.basename(filename), line, name + +try: + import profile + import pstats + pstats.func_strip_path = new_func_strip_path +except ImportError: + profile = None + pstats = None + +import os, os.path +import sys +import warnings + +from cherrypy._cpcompat import BytesIO + +_count = 0 + +class Profiler(object): + + def __init__(self, path=None): + if not path: + path = os.path.join(os.path.dirname(__file__), "profile") + self.path = path + if not os.path.exists(path): + os.makedirs(path) + + def run(self, func, *args, **params): + """Dump profile data into self.path.""" + global _count + c = _count = _count + 1 + path = os.path.join(self.path, "cp_%04d.prof" % c) + prof = profile.Profile() + result = prof.runcall(func, *args, **params) + prof.dump_stats(path) + return result + + def statfiles(self): + """:rtype: list of available profiles. + """ + return [f for f in os.listdir(self.path) + if f.startswith("cp_") and f.endswith(".prof")] + + def stats(self, filename, sortby='cumulative'): + """:rtype stats(index): output of print_stats() for the given profile. + """ + sio = BytesIO() + if sys.version_info >= (2, 5): + s = pstats.Stats(os.path.join(self.path, filename), stream=sio) + s.strip_dirs() + s.sort_stats(sortby) + s.print_stats() + else: + # pstats.Stats before Python 2.5 didn't take a 'stream' arg, + # but just printed to stdout. So re-route stdout. + s = pstats.Stats(os.path.join(self.path, filename)) + s.strip_dirs() + s.sort_stats(sortby) + oldout = sys.stdout + try: + sys.stdout = sio + s.print_stats() + finally: + sys.stdout = oldout + response = sio.getvalue() + sio.close() + return response + + def index(self): + return """<html> + <head><title>CherryPy profile data</title></head> + <frameset cols='200, 1*'> + <frame src='menu' /> + <frame name='main' src='' /> + </frameset> + </html> + """ + index.exposed = True + + def menu(self): + yield "<h2>Profiling runs</h2>" + yield "<p>Click on one of the runs below to see profiling data.</p>" + runs = self.statfiles() + runs.sort() + for i in runs: + yield "<a href='report?filename=%s' target='main'>%s</a><br />" % (i, i) + menu.exposed = True + + def report(self, filename): + import cherrypy + cherrypy.response.headers['Content-Type'] = 'text/plain' + return self.stats(filename) + report.exposed = True + + +class ProfileAggregator(Profiler): + + def __init__(self, path=None): + Profiler.__init__(self, path) + global _count + self.count = _count = _count + 1 + self.profiler = profile.Profile() + + def run(self, func, *args): + path = os.path.join(self.path, "cp_%04d.prof" % self.count) + result = self.profiler.runcall(func, *args) + self.profiler.dump_stats(path) + return result + + +class make_app: + def __init__(self, nextapp, path=None, aggregate=False): + """Make a WSGI middleware app which wraps 'nextapp' with profiling. + + nextapp + the WSGI application to wrap, usually an instance of + cherrypy.Application. + + path + where to dump the profiling output. + + aggregate + if True, profile data for all HTTP requests will go in + a single file. If False (the default), each HTTP request will + dump its profile data into a separate file. + + """ + if profile is None or pstats is None: + msg = ("Your installation of Python does not have a profile module. " + "If you're on Debian, try `sudo apt-get install python-profiler`. " + "See http://www.cherrypy.org/wiki/ProfilingOnDebian for details.") + warnings.warn(msg) + + self.nextapp = nextapp + self.aggregate = aggregate + if aggregate: + self.profiler = ProfileAggregator(path) + else: + self.profiler = Profiler(path) + + def __call__(self, environ, start_response): + def gather(): + result = [] + for line in self.nextapp(environ, start_response): + result.append(line) + return result + return self.profiler.run(gather) + + +def serve(path=None, port=8080): + if profile is None or pstats is None: + msg = ("Your installation of Python does not have a profile module. " + "If you're on Debian, try `sudo apt-get install python-profiler`. " + "See http://www.cherrypy.org/wiki/ProfilingOnDebian for details.") + warnings.warn(msg) + + import cherrypy + cherrypy.config.update({'server.socket_port': int(port), + 'server.thread_pool': 10, + 'environment': "production", + }) + cherrypy.quickstart(Profiler(path)) + + +if __name__ == "__main__": + serve(*tuple(sys.argv[1:])) + diff --git a/cherrypy/lib/reprconf.py b/cherrypy/lib/reprconf.py new file mode 100755 index 0000000..e18949e --- /dev/null +++ b/cherrypy/lib/reprconf.py @@ -0,0 +1,351 @@ +"""Generic configuration system using unrepr. + +Configuration data may be supplied as a Python dictionary, as a filename, +or as an open file object. When you supply a filename or file, Python's +builtin ConfigParser is used (with some extensions). + +Namespaces +---------- + +Configuration keys are separated into namespaces by the first "." in the key. + +The only key that cannot exist in a namespace is the "environment" entry. +This special entry 'imports' other config entries from a template stored in +the Config.environments dict. + +You can define your own namespaces to be called when new config is merged +by adding a named handler to Config.namespaces. The name can be any string, +and the handler must be either a callable or a context manager. +""" + +try: + # Python 3.0+ + from configparser import ConfigParser +except ImportError: + from ConfigParser import ConfigParser + +try: + set +except NameError: + from sets import Set as set +import sys + +def as_dict(config): + """Return a dict from 'config' whether it is a dict, file, or filename.""" + if isinstance(config, basestring): + config = Parser().dict_from_file(config) + elif hasattr(config, 'read'): + config = Parser().dict_from_file(config) + return config + + +class NamespaceSet(dict): + """A dict of config namespace names and handlers. + + Each config entry should begin with a namespace name; the corresponding + namespace handler will be called once for each config entry in that + namespace, and will be passed two arguments: the config key (with the + namespace removed) and the config value. + + Namespace handlers may be any Python callable; they may also be + Python 2.5-style 'context managers', in which case their __enter__ + method should return a callable to be used as the handler. + See cherrypy.tools (the Toolbox class) for an example. + """ + + def __call__(self, config): + """Iterate through config and pass it to each namespace handler. + + config + A flat dict, where keys use dots to separate + namespaces, and values are arbitrary. + + The first name in each config key is used to look up the corresponding + namespace handler. For example, a config entry of {'tools.gzip.on': v} + will call the 'tools' namespace handler with the args: ('gzip.on', v) + """ + # Separate the given config into namespaces + ns_confs = {} + for k in config: + if "." in k: + ns, name = k.split(".", 1) + bucket = ns_confs.setdefault(ns, {}) + bucket[name] = config[k] + + # I chose __enter__ and __exit__ so someday this could be + # rewritten using Python 2.5's 'with' statement: + # for ns, handler in self.iteritems(): + # with handler as callable: + # for k, v in ns_confs.get(ns, {}).iteritems(): + # callable(k, v) + for ns, handler in self.items(): + exit = getattr(handler, "__exit__", None) + if exit: + callable = handler.__enter__() + no_exc = True + try: + try: + for k, v in ns_confs.get(ns, {}).items(): + callable(k, v) + except: + # The exceptional case is handled here + no_exc = False + if exit is None: + raise + if not exit(*sys.exc_info()): + raise + # The exception is swallowed if exit() returns true + finally: + # The normal and non-local-goto cases are handled here + if no_exc and exit: + exit(None, None, None) + else: + for k, v in ns_confs.get(ns, {}).items(): + handler(k, v) + + def __repr__(self): + return "%s.%s(%s)" % (self.__module__, self.__class__.__name__, + dict.__repr__(self)) + + def __copy__(self): + newobj = self.__class__() + newobj.update(self) + return newobj + copy = __copy__ + + +class Config(dict): + """A dict-like set of configuration data, with defaults and namespaces. + + May take a file, filename, or dict. + """ + + defaults = {} + environments = {} + namespaces = NamespaceSet() + + def __init__(self, file=None, **kwargs): + self.reset() + if file is not None: + self.update(file) + if kwargs: + self.update(kwargs) + + def reset(self): + """Reset self to default values.""" + self.clear() + dict.update(self, self.defaults) + + def update(self, config): + """Update self from a dict, file or filename.""" + if isinstance(config, basestring): + # Filename + config = Parser().dict_from_file(config) + elif hasattr(config, 'read'): + # Open file object + config = Parser().dict_from_file(config) + else: + config = config.copy() + self._apply(config) + + def _apply(self, config): + """Update self from a dict.""" + which_env = config.get('environment') + if which_env: + env = self.environments[which_env] + for k in env: + if k not in config: + config[k] = env[k] + + dict.update(self, config) + self.namespaces(config) + + def __setitem__(self, k, v): + dict.__setitem__(self, k, v) + self.namespaces({k: v}) + + +class Parser(ConfigParser): + """Sub-class of ConfigParser that keeps the case of options and that + raises an exception if the file cannot be read. + """ + + def optionxform(self, optionstr): + return optionstr + + def read(self, filenames): + if isinstance(filenames, basestring): + filenames = [filenames] + for filename in filenames: + # try: + # fp = open(filename) + # except IOError: + # continue + fp = open(filename) + try: + self._read(fp, filename) + finally: + fp.close() + + def as_dict(self, raw=False, vars=None): + """Convert an INI file to a dictionary""" + # Load INI file into a dict + result = {} + for section in self.sections(): + if section not in result: + result[section] = {} + for option in self.options(section): + value = self.get(section, option, raw, vars) + try: + value = unrepr(value) + except Exception, x: + msg = ("Config error in section: %r, option: %r, " + "value: %r. Config values must be valid Python." % + (section, option, value)) + raise ValueError(msg, x.__class__.__name__, x.args) + result[section][option] = value + return result + + def dict_from_file(self, file): + if hasattr(file, 'read'): + self.readfp(file) + else: + self.read(file) + return self.as_dict() + + +# public domain "unrepr" implementation, found on the web and then improved. + +class _Builder: + + def build(self, o): + m = getattr(self, 'build_' + o.__class__.__name__, None) + if m is None: + raise TypeError("unrepr does not recognize %s" % + repr(o.__class__.__name__)) + return m(o) + + def build_Subscript(self, o): + expr, flags, subs = o.getChildren() + expr = self.build(expr) + subs = self.build(subs) + return expr[subs] + + def build_CallFunc(self, o): + children = map(self.build, o.getChildren()) + callee = children.pop(0) + kwargs = children.pop() or {} + starargs = children.pop() or () + args = tuple(children) + tuple(starargs) + return callee(*args, **kwargs) + + def build_List(self, o): + return map(self.build, o.getChildren()) + + def build_Const(self, o): + return o.value + + def build_Dict(self, o): + d = {} + i = iter(map(self.build, o.getChildren())) + for el in i: + d[el] = i.next() + return d + + def build_Tuple(self, o): + return tuple(self.build_List(o)) + + def build_Name(self, o): + name = o.name + if name == 'None': + return None + if name == 'True': + return True + if name == 'False': + return False + + # See if the Name is a package or module. If it is, import it. + try: + return modules(name) + except ImportError: + pass + + # See if the Name is in builtins. + try: + import __builtin__ + return getattr(__builtin__, name) + except AttributeError: + pass + + raise TypeError("unrepr could not resolve the name %s" % repr(name)) + + def build_Add(self, o): + left, right = map(self.build, o.getChildren()) + return left + right + + def build_Getattr(self, o): + parent = self.build(o.expr) + return getattr(parent, o.attrname) + + def build_NoneType(self, o): + return None + + def build_UnarySub(self, o): + return -self.build(o.getChildren()[0]) + + def build_UnaryAdd(self, o): + return self.build(o.getChildren()[0]) + + +def _astnode(s): + """Return a Python ast Node compiled from a string.""" + try: + import compiler + except ImportError: + # Fallback to eval when compiler package is not available, + # e.g. IronPython 1.0. + return eval(s) + + p = compiler.parse("__tempvalue__ = " + s) + return p.getChildren()[1].getChildren()[0].getChildren()[1] + + +def unrepr(s): + """Return a Python object compiled from a string.""" + if not s: + return s + obj = _astnode(s) + return _Builder().build(obj) + + +def modules(modulePath): + """Load a module and retrieve a reference to that module.""" + try: + mod = sys.modules[modulePath] + if mod is None: + raise KeyError() + except KeyError: + # The last [''] is important. + mod = __import__(modulePath, globals(), locals(), ['']) + return mod + +def attributes(full_attribute_name): + """Load a module and retrieve an attribute of that module.""" + + # Parse out the path, module, and attribute + last_dot = full_attribute_name.rfind(".") + attr_name = full_attribute_name[last_dot + 1:] + mod_path = full_attribute_name[:last_dot] + + mod = modules(mod_path) + # Let an AttributeError propagate outward. + try: + attr = getattr(mod, attr_name) + except AttributeError: + raise AttributeError("'%s' object has no attribute '%s'" + % (mod_path, attr_name)) + + # Return a reference to the attribute. + return attr + + diff --git a/cherrypy/lib/sessions.py b/cherrypy/lib/sessions.py new file mode 100755 index 0000000..42c2800 --- /dev/null +++ b/cherrypy/lib/sessions.py @@ -0,0 +1,832 @@ +"""Session implementation for CherryPy. + +You need to edit your config file to use sessions. Here's an example:: + + [/] + tools.sessions.on = True + tools.sessions.storage_type = "file" + tools.sessions.storage_path = "/home/site/sessions" + tools.sessions.timeout = 60 + +This sets the session to be stored in files in the directory /home/site/sessions, +and the session timeout to 60 minutes. If you omit ``storage_type`` the sessions +will be saved in RAM. ``tools.sessions.on`` is the only required line for +working sessions, the rest are optional. + +By default, the session ID is passed in a cookie, so the client's browser must +have cookies enabled for your site. + +To set data for the current session, use +``cherrypy.session['fieldname'] = 'fieldvalue'``; +to get data use ``cherrypy.session.get('fieldname')``. + +================ +Locking sessions +================ + +By default, the ``'locking'`` mode of sessions is ``'implicit'``, which means +the session is locked early and unlocked late. If you want to control when the +session data is locked and unlocked, set ``tools.sessions.locking = 'explicit'``. +Then call ``cherrypy.session.acquire_lock()`` and ``cherrypy.session.release_lock()``. +Regardless of which mode you use, the session is guaranteed to be unlocked when +the request is complete. + +================= +Expiring Sessions +================= + +You can force a session to expire with :func:`cherrypy.lib.sessions.expire`. +Simply call that function at the point you want the session to expire, and it +will cause the session cookie to expire client-side. + +=========================== +Session Fixation Protection +=========================== + +If CherryPy receives, via a request cookie, a session id that it does not +recognize, it will reject that id and create a new one to return in the +response cookie. This `helps prevent session fixation attacks +<http://en.wikipedia.org/wiki/Session_fixation#Regenerate_SID_on_each_request>`_. +However, CherryPy "recognizes" a session id by looking up the saved session +data for that id. Therefore, if you never save any session data, +**you will get a new session id for every request**. + +================ +Sharing Sessions +================ + +If you run multiple instances of CherryPy (for example via mod_python behind +Apache prefork), you most likely cannot use the RAM session backend, since each +instance of CherryPy will have its own memory space. Use a different backend +instead, and verify that all instances are pointing at the same file or db +location. Alternately, you might try a load balancer which makes sessions +"sticky". Google is your friend, there. + +================ +Expiration Dates +================ + +The response cookie will possess an expiration date to inform the client at +which point to stop sending the cookie back in requests. If the server time +and client time differ, expect sessions to be unreliable. **Make sure the +system time of your server is accurate**. + +CherryPy defaults to a 60-minute session timeout, which also applies to the +cookie which is sent to the client. Unfortunately, some versions of Safari +("4 public beta" on Windows XP at least) appear to have a bug in their parsing +of the GMT expiration date--they appear to interpret the date as one hour in +the past. Sixty minutes minus one hour is pretty close to zero, so you may +experience this bug as a new session id for every request, unless the requests +are less than one second apart. To fix, try increasing the session.timeout. + +On the other extreme, some users report Firefox sending cookies after their +expiration date, although this was on a system with an inaccurate system time. +Maybe FF doesn't trust system time. +""" + +import datetime +import os +import random +import time +import threading +import types +from warnings import warn + +import cherrypy +from cherrypy._cpcompat import copyitems, pickle, random20 +from cherrypy.lib import httputil + + +missing = object() + +class Session(object): + """A CherryPy dict-like Session object (one per request).""" + + _id = None + + id_observers = None + "A list of callbacks to which to pass new id's." + + def _get_id(self): + return self._id + def _set_id(self, value): + self._id = value + for o in self.id_observers: + o(value) + id = property(_get_id, _set_id, doc="The current session ID.") + + timeout = 60 + "Number of minutes after which to delete session data." + + locked = False + """ + If True, this session instance has exclusive read/write access + to session data.""" + + loaded = False + """ + If True, data has been retrieved from storage. This should happen + automatically on the first attempt to access session data.""" + + clean_thread = None + "Class-level Monitor which calls self.clean_up." + + clean_freq = 5 + "The poll rate for expired session cleanup in minutes." + + originalid = None + "The session id passed by the client. May be missing or unsafe." + + missing = False + "True if the session requested by the client did not exist." + + regenerated = False + """ + True if the application called session.regenerate(). This is not set by + internal calls to regenerate the session id.""" + + debug=False + + def __init__(self, id=None, **kwargs): + self.id_observers = [] + self._data = {} + + for k, v in kwargs.items(): + setattr(self, k, v) + + self.originalid = id + self.missing = False + if id is None: + if self.debug: + cherrypy.log('No id given; making a new one', 'TOOLS.SESSIONS') + self._regenerate() + else: + self.id = id + if not self._exists(): + if self.debug: + cherrypy.log('Expired or malicious session %r; ' + 'making a new one' % id, 'TOOLS.SESSIONS') + # Expired or malicious session. Make a new one. + # See http://www.cherrypy.org/ticket/709. + self.id = None + self.missing = True + self._regenerate() + + def regenerate(self): + """Replace the current session (with a new id).""" + self.regenerated = True + self._regenerate() + + def _regenerate(self): + if self.id is not None: + self.delete() + + old_session_was_locked = self.locked + if old_session_was_locked: + self.release_lock() + + self.id = None + while self.id is None: + self.id = self.generate_id() + # Assert that the generated id is not already stored. + if self._exists(): + self.id = None + + if old_session_was_locked: + self.acquire_lock() + + def clean_up(self): + """Clean up expired sessions.""" + pass + + def generate_id(self): + """Return a new session id.""" + return random20() + + def save(self): + """Save session data.""" + try: + # If session data has never been loaded then it's never been + # accessed: no need to save it + if self.loaded: + t = datetime.timedelta(seconds = self.timeout * 60) + expiration_time = datetime.datetime.now() + t + if self.debug: + cherrypy.log('Saving with expiry %s' % expiration_time, + 'TOOLS.SESSIONS') + self._save(expiration_time) + + finally: + if self.locked: + # Always release the lock if the user didn't release it + self.release_lock() + + def load(self): + """Copy stored session data into this session instance.""" + data = self._load() + # data is either None or a tuple (session_data, expiration_time) + if data is None or data[1] < datetime.datetime.now(): + if self.debug: + cherrypy.log('Expired session, flushing data', 'TOOLS.SESSIONS') + self._data = {} + else: + self._data = data[0] + self.loaded = True + + # Stick the clean_thread in the class, not the instance. + # The instances are created and destroyed per-request. + cls = self.__class__ + if self.clean_freq and not cls.clean_thread: + # clean_up is in instancemethod and not a classmethod, + # so that tool config can be accessed inside the method. + t = cherrypy.process.plugins.Monitor( + cherrypy.engine, self.clean_up, self.clean_freq * 60, + name='Session cleanup') + t.subscribe() + cls.clean_thread = t + t.start() + + def delete(self): + """Delete stored session data.""" + self._delete() + + def __getitem__(self, key): + if not self.loaded: self.load() + return self._data[key] + + def __setitem__(self, key, value): + if not self.loaded: self.load() + self._data[key] = value + + def __delitem__(self, key): + if not self.loaded: self.load() + del self._data[key] + + def pop(self, key, default=missing): + """Remove the specified key and return the corresponding value. + If key is not found, default is returned if given, + otherwise KeyError is raised. + """ + if not self.loaded: self.load() + if default is missing: + return self._data.pop(key) + else: + return self._data.pop(key, default) + + def __contains__(self, key): + if not self.loaded: self.load() + return key in self._data + + def has_key(self, key): + """D.has_key(k) -> True if D has a key k, else False.""" + if not self.loaded: self.load() + return key in self._data + + def get(self, key, default=None): + """D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None.""" + if not self.loaded: self.load() + return self._data.get(key, default) + + def update(self, d): + """D.update(E) -> None. Update D from E: for k in E: D[k] = E[k].""" + if not self.loaded: self.load() + self._data.update(d) + + def setdefault(self, key, default=None): + """D.setdefault(k[,d]) -> D.get(k,d), also set D[k]=d if k not in D.""" + if not self.loaded: self.load() + return self._data.setdefault(key, default) + + def clear(self): + """D.clear() -> None. Remove all items from D.""" + if not self.loaded: self.load() + self._data.clear() + + def keys(self): + """D.keys() -> list of D's keys.""" + if not self.loaded: self.load() + return self._data.keys() + + def items(self): + """D.items() -> list of D's (key, value) pairs, as 2-tuples.""" + if not self.loaded: self.load() + return self._data.items() + + def values(self): + """D.values() -> list of D's values.""" + if not self.loaded: self.load() + return self._data.values() + + +class RamSession(Session): + + # Class-level objects. Don't rebind these! + cache = {} + locks = {} + + def clean_up(self): + """Clean up expired sessions.""" + now = datetime.datetime.now() + for id, (data, expiration_time) in copyitems(self.cache): + if expiration_time <= now: + try: + del self.cache[id] + except KeyError: + pass + try: + del self.locks[id] + except KeyError: + pass + + def _exists(self): + return self.id in self.cache + + def _load(self): + return self.cache.get(self.id) + + def _save(self, expiration_time): + self.cache[self.id] = (self._data, expiration_time) + + def _delete(self): + self.cache.pop(self.id, None) + + def acquire_lock(self): + """Acquire an exclusive lock on the currently-loaded session data.""" + self.locked = True + self.locks.setdefault(self.id, threading.RLock()).acquire() + + def release_lock(self): + """Release the lock on the currently-loaded session data.""" + self.locks[self.id].release() + self.locked = False + + def __len__(self): + """Return the number of active sessions.""" + return len(self.cache) + + +class FileSession(Session): + """Implementation of the File backend for sessions + + storage_path + The folder where session data will be saved. Each session + will be saved as pickle.dump(data, expiration_time) in its own file; + the filename will be self.SESSION_PREFIX + self.id. + + """ + + SESSION_PREFIX = 'session-' + LOCK_SUFFIX = '.lock' + pickle_protocol = pickle.HIGHEST_PROTOCOL + + def __init__(self, id=None, **kwargs): + # The 'storage_path' arg is required for file-based sessions. + kwargs['storage_path'] = os.path.abspath(kwargs['storage_path']) + Session.__init__(self, id=id, **kwargs) + + def setup(cls, **kwargs): + """Set up the storage system for file-based sessions. + + This should only be called once per process; this will be done + automatically when using sessions.init (as the built-in Tool does). + """ + # The 'storage_path' arg is required for file-based sessions. + kwargs['storage_path'] = os.path.abspath(kwargs['storage_path']) + + for k, v in kwargs.items(): + setattr(cls, k, v) + + # Warn if any lock files exist at startup. + lockfiles = [fname for fname in os.listdir(cls.storage_path) + if (fname.startswith(cls.SESSION_PREFIX) + and fname.endswith(cls.LOCK_SUFFIX))] + if lockfiles: + plural = ('', 's')[len(lockfiles) > 1] + warn("%s session lockfile%s found at startup. If you are " + "only running one process, then you may need to " + "manually delete the lockfiles found at %r." + % (len(lockfiles), plural, cls.storage_path)) + setup = classmethod(setup) + + def _get_file_path(self): + f = os.path.join(self.storage_path, self.SESSION_PREFIX + self.id) + if not os.path.abspath(f).startswith(self.storage_path): + raise cherrypy.HTTPError(400, "Invalid session id in cookie.") + return f + + def _exists(self): + path = self._get_file_path() + return os.path.exists(path) + + def _load(self, path=None): + if path is None: + path = self._get_file_path() + try: + f = open(path, "rb") + try: + return pickle.load(f) + finally: + f.close() + except (IOError, EOFError): + return None + + def _save(self, expiration_time): + f = open(self._get_file_path(), "wb") + try: + pickle.dump((self._data, expiration_time), f, self.pickle_protocol) + finally: + f.close() + + def _delete(self): + try: + os.unlink(self._get_file_path()) + except OSError: + pass + + def acquire_lock(self, path=None): + """Acquire an exclusive lock on the currently-loaded session data.""" + if path is None: + path = self._get_file_path() + path += self.LOCK_SUFFIX + while True: + try: + lockfd = os.open(path, os.O_CREAT|os.O_WRONLY|os.O_EXCL) + except OSError: + time.sleep(0.1) + else: + os.close(lockfd) + break + self.locked = True + + def release_lock(self, path=None): + """Release the lock on the currently-loaded session data.""" + if path is None: + path = self._get_file_path() + os.unlink(path + self.LOCK_SUFFIX) + self.locked = False + + def clean_up(self): + """Clean up expired sessions.""" + now = datetime.datetime.now() + # Iterate over all session files in self.storage_path + for fname in os.listdir(self.storage_path): + if (fname.startswith(self.SESSION_PREFIX) + and not fname.endswith(self.LOCK_SUFFIX)): + # We have a session file: lock and load it and check + # if it's expired. If it fails, nevermind. + path = os.path.join(self.storage_path, fname) + self.acquire_lock(path) + try: + contents = self._load(path) + # _load returns None on IOError + if contents is not None: + data, expiration_time = contents + if expiration_time < now: + # Session expired: deleting it + os.unlink(path) + finally: + self.release_lock(path) + + def __len__(self): + """Return the number of active sessions.""" + return len([fname for fname in os.listdir(self.storage_path) + if (fname.startswith(self.SESSION_PREFIX) + and not fname.endswith(self.LOCK_SUFFIX))]) + + +class PostgresqlSession(Session): + """ Implementation of the PostgreSQL backend for sessions. It assumes + a table like this:: + + create table session ( + id varchar(40), + data text, + expiration_time timestamp + ) + + You must provide your own get_db function. + """ + + pickle_protocol = pickle.HIGHEST_PROTOCOL + + def __init__(self, id=None, **kwargs): + Session.__init__(self, id, **kwargs) + self.cursor = self.db.cursor() + + def setup(cls, **kwargs): + """Set up the storage system for Postgres-based sessions. + + This should only be called once per process; this will be done + automatically when using sessions.init (as the built-in Tool does). + """ + for k, v in kwargs.items(): + setattr(cls, k, v) + + self.db = self.get_db() + setup = classmethod(setup) + + def __del__(self): + if self.cursor: + self.cursor.close() + self.db.commit() + + def _exists(self): + # Select session data from table + self.cursor.execute('select data, expiration_time from session ' + 'where id=%s', (self.id,)) + rows = self.cursor.fetchall() + return bool(rows) + + def _load(self): + # Select session data from table + self.cursor.execute('select data, expiration_time from session ' + 'where id=%s', (self.id,)) + rows = self.cursor.fetchall() + if not rows: + return None + + pickled_data, expiration_time = rows[0] + data = pickle.loads(pickled_data) + return data, expiration_time + + def _save(self, expiration_time): + pickled_data = pickle.dumps(self._data, self.pickle_protocol) + self.cursor.execute('update session set data = %s, ' + 'expiration_time = %s where id = %s', + (pickled_data, expiration_time, self.id)) + + def _delete(self): + self.cursor.execute('delete from session where id=%s', (self.id,)) + + def acquire_lock(self): + """Acquire an exclusive lock on the currently-loaded session data.""" + # We use the "for update" clause to lock the row + self.locked = True + self.cursor.execute('select id from session where id=%s for update', + (self.id,)) + + def release_lock(self): + """Release the lock on the currently-loaded session data.""" + # We just close the cursor and that will remove the lock + # introduced by the "for update" clause + self.cursor.close() + self.locked = False + + def clean_up(self): + """Clean up expired sessions.""" + self.cursor.execute('delete from session where expiration_time < %s', + (datetime.datetime.now(),)) + + +class MemcachedSession(Session): + + # The most popular memcached client for Python isn't thread-safe. + # Wrap all .get and .set operations in a single lock. + mc_lock = threading.RLock() + + # This is a seperate set of locks per session id. + locks = {} + + servers = ['127.0.0.1:11211'] + + def setup(cls, **kwargs): + """Set up the storage system for memcached-based sessions. + + This should only be called once per process; this will be done + automatically when using sessions.init (as the built-in Tool does). + """ + for k, v in kwargs.items(): + setattr(cls, k, v) + + import memcache + cls.cache = memcache.Client(cls.servers) + setup = classmethod(setup) + + def _exists(self): + self.mc_lock.acquire() + try: + return bool(self.cache.get(self.id)) + finally: + self.mc_lock.release() + + def _load(self): + self.mc_lock.acquire() + try: + return self.cache.get(self.id) + finally: + self.mc_lock.release() + + def _save(self, expiration_time): + # Send the expiration time as "Unix time" (seconds since 1/1/1970) + td = int(time.mktime(expiration_time.timetuple())) + self.mc_lock.acquire() + try: + if not self.cache.set(self.id, (self._data, expiration_time), td): + raise AssertionError("Session data for id %r not set." % self.id) + finally: + self.mc_lock.release() + + def _delete(self): + self.cache.delete(self.id) + + def acquire_lock(self): + """Acquire an exclusive lock on the currently-loaded session data.""" + self.locked = True + self.locks.setdefault(self.id, threading.RLock()).acquire() + + def release_lock(self): + """Release the lock on the currently-loaded session data.""" + self.locks[self.id].release() + self.locked = False + + def __len__(self): + """Return the number of active sessions.""" + raise NotImplementedError + + +# Hook functions (for CherryPy tools) + +def save(): + """Save any changed session data.""" + + if not hasattr(cherrypy.serving, "session"): + return + request = cherrypy.serving.request + response = cherrypy.serving.response + + # Guard against running twice + if hasattr(request, "_sessionsaved"): + return + request._sessionsaved = True + + if response.stream: + # If the body is being streamed, we have to save the data + # *after* the response has been written out + request.hooks.attach('on_end_request', cherrypy.session.save) + else: + # If the body is not being streamed, we save the data now + # (so we can release the lock). + if isinstance(response.body, types.GeneratorType): + response.collapse_body() + cherrypy.session.save() +save.failsafe = True + +def close(): + """Close the session object for this request.""" + sess = getattr(cherrypy.serving, "session", None) + if getattr(sess, "locked", False): + # If the session is still locked we release the lock + sess.release_lock() +close.failsafe = True +close.priority = 90 + + +def init(storage_type='ram', path=None, path_header=None, name='session_id', + timeout=60, domain=None, secure=False, clean_freq=5, + persistent=True, debug=False, **kwargs): + """Initialize session object (using cookies). + + storage_type + One of 'ram', 'file', 'postgresql'. This will be used + to look up the corresponding class in cherrypy.lib.sessions + globals. For example, 'file' will use the FileSession class. + + path + The 'path' value to stick in the response cookie metadata. + + path_header + If 'path' is None (the default), then the response + cookie 'path' will be pulled from request.headers[path_header]. + + name + The name of the cookie. + + timeout + The expiration timeout (in minutes) for the stored session data. + If 'persistent' is True (the default), this is also the timeout + for the cookie. + + domain + The cookie domain. + + secure + If False (the default) the cookie 'secure' value will not + be set. If True, the cookie 'secure' value will be set (to 1). + + clean_freq (minutes) + The poll rate for expired session cleanup. + + persistent + If True (the default), the 'timeout' argument will be used + to expire the cookie. If False, the cookie will not have an expiry, + and the cookie will be a "session cookie" which expires when the + browser is closed. + + Any additional kwargs will be bound to the new Session instance, + and may be specific to the storage type. See the subclass of Session + you're using for more information. + """ + + request = cherrypy.serving.request + + # Guard against running twice + if hasattr(request, "_session_init_flag"): + return + request._session_init_flag = True + + # Check if request came with a session ID + id = None + if name in request.cookie: + id = request.cookie[name].value + if debug: + cherrypy.log('ID obtained from request.cookie: %r' % id, + 'TOOLS.SESSIONS') + + # Find the storage class and call setup (first time only). + storage_class = storage_type.title() + 'Session' + storage_class = globals()[storage_class] + if not hasattr(cherrypy, "session"): + if hasattr(storage_class, "setup"): + storage_class.setup(**kwargs) + + # Create and attach a new Session instance to cherrypy.serving. + # It will possess a reference to (and lock, and lazily load) + # the requested session data. + kwargs['timeout'] = timeout + kwargs['clean_freq'] = clean_freq + cherrypy.serving.session = sess = storage_class(id, **kwargs) + sess.debug = debug + def update_cookie(id): + """Update the cookie every time the session id changes.""" + cherrypy.serving.response.cookie[name] = id + sess.id_observers.append(update_cookie) + + # Create cherrypy.session which will proxy to cherrypy.serving.session + if not hasattr(cherrypy, "session"): + cherrypy.session = cherrypy._ThreadLocalProxy('session') + + if persistent: + cookie_timeout = timeout + else: + # See http://support.microsoft.com/kb/223799/EN-US/ + # and http://support.mozilla.com/en-US/kb/Cookies + cookie_timeout = None + set_response_cookie(path=path, path_header=path_header, name=name, + timeout=cookie_timeout, domain=domain, secure=secure) + + +def set_response_cookie(path=None, path_header=None, name='session_id', + timeout=60, domain=None, secure=False): + """Set a response cookie for the client. + + path + the 'path' value to stick in the response cookie metadata. + + path_header + if 'path' is None (the default), then the response + cookie 'path' will be pulled from request.headers[path_header]. + + name + the name of the cookie. + + timeout + the expiration timeout for the cookie. If 0 or other boolean + False, no 'expires' param will be set, and the cookie will be a + "session cookie" which expires when the browser is closed. + + domain + the cookie domain. + + secure + if False (the default) the cookie 'secure' value will not + be set. If True, the cookie 'secure' value will be set (to 1). + + """ + # Set response cookie + cookie = cherrypy.serving.response.cookie + cookie[name] = cherrypy.serving.session.id + cookie[name]['path'] = (path or cherrypy.serving.request.headers.get(path_header) + or '/') + + # We'd like to use the "max-age" param as indicated in + # http://www.faqs.org/rfcs/rfc2109.html but IE doesn't + # save it to disk and the session is lost if people close + # the browser. So we have to use the old "expires" ... sigh ... +## cookie[name]['max-age'] = timeout * 60 + if timeout: + e = time.time() + (timeout * 60) + cookie[name]['expires'] = httputil.HTTPDate(e) + if domain is not None: + cookie[name]['domain'] = domain + if secure: + cookie[name]['secure'] = 1 + + +def expire(): + """Expire the current session cookie.""" + name = cherrypy.serving.request.config.get('tools.sessions.name', 'session_id') + one_year = 60 * 60 * 24 * 365 + e = time.time() - one_year + cherrypy.serving.response.cookie[name]['expires'] = httputil.HTTPDate(e) + + diff --git a/cherrypy/lib/static.py b/cherrypy/lib/static.py new file mode 100755 index 0000000..cb9a68c --- /dev/null +++ b/cherrypy/lib/static.py @@ -0,0 +1,352 @@ +import logging +import mimetypes +mimetypes.init() +mimetypes.types_map['.dwg']='image/x-dwg' +mimetypes.types_map['.ico']='image/x-icon' +mimetypes.types_map['.bz2']='application/x-bzip2' +mimetypes.types_map['.gz']='application/x-gzip' + +import os +import re +import stat +import time + +import cherrypy +from cherrypy._cpcompat import ntob, unquote +from cherrypy.lib import cptools, httputil, file_generator_limited + + +def serve_file(path, content_type=None, disposition=None, name=None, debug=False): + """Set status, headers, and body in order to serve the given path. + + The Content-Type header will be set to the content_type arg, if provided. + If not provided, the Content-Type will be guessed by the file extension + of the 'path' argument. + + If disposition is not None, the Content-Disposition header will be set + to "<disposition>; filename=<name>". If name is None, it will be set + to the basename of path. If disposition is None, no Content-Disposition + header will be written. + """ + + response = cherrypy.serving.response + + # If path is relative, users should fix it by making path absolute. + # That is, CherryPy should not guess where the application root is. + # It certainly should *not* use cwd (since CP may be invoked from a + # variety of paths). If using tools.staticdir, you can make your relative + # paths become absolute by supplying a value for "tools.staticdir.root". + if not os.path.isabs(path): + msg = "'%s' is not an absolute path." % path + if debug: + cherrypy.log(msg, 'TOOLS.STATICFILE') + raise ValueError(msg) + + try: + st = os.stat(path) + except OSError: + if debug: + cherrypy.log('os.stat(%r) failed' % path, 'TOOLS.STATIC') + raise cherrypy.NotFound() + + # Check if path is a directory. + if stat.S_ISDIR(st.st_mode): + # Let the caller deal with it as they like. + if debug: + cherrypy.log('%r is a directory' % path, 'TOOLS.STATIC') + raise cherrypy.NotFound() + + # Set the Last-Modified response header, so that + # modified-since validation code can work. + response.headers['Last-Modified'] = httputil.HTTPDate(st.st_mtime) + cptools.validate_since() + + if content_type is None: + # Set content-type based on filename extension + ext = "" + i = path.rfind('.') + if i != -1: + ext = path[i:].lower() + content_type = mimetypes.types_map.get(ext, None) + if content_type is not None: + response.headers['Content-Type'] = content_type + if debug: + cherrypy.log('Content-Type: %r' % content_type, 'TOOLS.STATIC') + + cd = None + if disposition is not None: + if name is None: + name = os.path.basename(path) + cd = '%s; filename="%s"' % (disposition, name) + response.headers["Content-Disposition"] = cd + if debug: + cherrypy.log('Content-Disposition: %r' % cd, 'TOOLS.STATIC') + + # Set Content-Length and use an iterable (file object) + # this way CP won't load the whole file in memory + content_length = st.st_size + fileobj = open(path, 'rb') + return _serve_fileobj(fileobj, content_type, content_length, debug=debug) + +def serve_fileobj(fileobj, content_type=None, disposition=None, name=None, + debug=False): + """Set status, headers, and body in order to serve the given file object. + + The Content-Type header will be set to the content_type arg, if provided. + + If disposition is not None, the Content-Disposition header will be set + to "<disposition>; filename=<name>". If name is None, 'filename' will + not be set. If disposition is None, no Content-Disposition header will + be written. + + CAUTION: If the request contains a 'Range' header, one or more seek()s will + be performed on the file object. This may cause undesired behavior if + the file object is not seekable. It could also produce undesired results + if the caller set the read position of the file object prior to calling + serve_fileobj(), expecting that the data would be served starting from that + position. + """ + + response = cherrypy.serving.response + + try: + st = os.fstat(fileobj.fileno()) + except AttributeError: + if debug: + cherrypy.log('os has no fstat attribute', 'TOOLS.STATIC') + content_length = None + else: + # Set the Last-Modified response header, so that + # modified-since validation code can work. + response.headers['Last-Modified'] = httputil.HTTPDate(st.st_mtime) + cptools.validate_since() + content_length = st.st_size + + if content_type is not None: + response.headers['Content-Type'] = content_type + if debug: + cherrypy.log('Content-Type: %r' % content_type, 'TOOLS.STATIC') + + cd = None + if disposition is not None: + if name is None: + cd = disposition + else: + cd = '%s; filename="%s"' % (disposition, name) + response.headers["Content-Disposition"] = cd + if debug: + cherrypy.log('Content-Disposition: %r' % cd, 'TOOLS.STATIC') + + return _serve_fileobj(fileobj, content_type, content_length, debug=debug) + +def _serve_fileobj(fileobj, content_type, content_length, debug=False): + """Internal. Set response.body to the given file object, perhaps ranged.""" + response = cherrypy.serving.response + + # HTTP/1.0 didn't have Range/Accept-Ranges headers, or the 206 code + request = cherrypy.serving.request + if request.protocol >= (1, 1): + response.headers["Accept-Ranges"] = "bytes" + r = httputil.get_ranges(request.headers.get('Range'), content_length) + if r == []: + response.headers['Content-Range'] = "bytes */%s" % content_length + message = "Invalid Range (first-byte-pos greater than Content-Length)" + if debug: + cherrypy.log(message, 'TOOLS.STATIC') + raise cherrypy.HTTPError(416, message) + + if r: + if len(r) == 1: + # Return a single-part response. + start, stop = r[0] + if stop > content_length: + stop = content_length + r_len = stop - start + if debug: + cherrypy.log('Single part; start: %r, stop: %r' % (start, stop), + 'TOOLS.STATIC') + response.status = "206 Partial Content" + response.headers['Content-Range'] = ( + "bytes %s-%s/%s" % (start, stop - 1, content_length)) + response.headers['Content-Length'] = r_len + fileobj.seek(start) + response.body = file_generator_limited(fileobj, r_len) + else: + # Return a multipart/byteranges response. + response.status = "206 Partial Content" + from mimetools import choose_boundary + boundary = choose_boundary() + ct = "multipart/byteranges; boundary=%s" % boundary + response.headers['Content-Type'] = ct + if "Content-Length" in response.headers: + # Delete Content-Length header so finalize() recalcs it. + del response.headers["Content-Length"] + + def file_ranges(): + # Apache compatibility: + yield ntob("\r\n") + + for start, stop in r: + if debug: + cherrypy.log('Multipart; start: %r, stop: %r' % (start, stop), + 'TOOLS.STATIC') + yield ntob("--" + boundary, 'ascii') + yield ntob("\r\nContent-type: %s" % content_type, 'ascii') + yield ntob("\r\nContent-range: bytes %s-%s/%s\r\n\r\n" + % (start, stop - 1, content_length), 'ascii') + fileobj.seek(start) + for chunk in file_generator_limited(fileobj, stop-start): + yield chunk + yield ntob("\r\n") + # Final boundary + yield ntob("--" + boundary + "--", 'ascii') + + # Apache compatibility: + yield ntob("\r\n") + response.body = file_ranges() + return response.body + else: + if debug: + cherrypy.log('No byteranges requested', 'TOOLS.STATIC') + + # Set Content-Length and use an iterable (file object) + # this way CP won't load the whole file in memory + response.headers['Content-Length'] = content_length + response.body = fileobj + return response.body + +def serve_download(path, name=None): + """Serve 'path' as an application/x-download attachment.""" + # This is such a common idiom I felt it deserved its own wrapper. + return serve_file(path, "application/x-download", "attachment", name) + + +def _attempt(filename, content_types, debug=False): + if debug: + cherrypy.log('Attempting %r (content_types %r)' % + (filename, content_types), 'TOOLS.STATICDIR') + try: + # you can set the content types for a + # complete directory per extension + content_type = None + if content_types: + r, ext = os.path.splitext(filename) + content_type = content_types.get(ext[1:], None) + serve_file(filename, content_type=content_type, debug=debug) + return True + except cherrypy.NotFound: + # If we didn't find the static file, continue handling the + # request. We might find a dynamic handler instead. + if debug: + cherrypy.log('NotFound', 'TOOLS.STATICFILE') + return False + +def staticdir(section, dir, root="", match="", content_types=None, index="", + debug=False): + """Serve a static resource from the given (root +) dir. + + match + If given, request.path_info will be searched for the given + regular expression before attempting to serve static content. + + content_types + If given, it should be a Python dictionary of + {file-extension: content-type} pairs, where 'file-extension' is + a string (e.g. "gif") and 'content-type' is the value to write + out in the Content-Type response header (e.g. "image/gif"). + + index + If provided, it should be the (relative) name of a file to + serve for directory requests. For example, if the dir argument is + '/home/me', the Request-URI is 'myapp', and the index arg is + 'index.html', the file '/home/me/myapp/index.html' will be sought. + """ + request = cherrypy.serving.request + if request.method not in ('GET', 'HEAD'): + if debug: + cherrypy.log('request.method not GET or HEAD', 'TOOLS.STATICDIR') + return False + + if match and not re.search(match, request.path_info): + if debug: + cherrypy.log('request.path_info %r does not match pattern %r' % + (request.path_info, match), 'TOOLS.STATICDIR') + return False + + # Allow the use of '~' to refer to a user's home directory. + dir = os.path.expanduser(dir) + + # If dir is relative, make absolute using "root". + if not os.path.isabs(dir): + if not root: + msg = "Static dir requires an absolute dir (or root)." + if debug: + cherrypy.log(msg, 'TOOLS.STATICDIR') + raise ValueError(msg) + dir = os.path.join(root, dir) + + # Determine where we are in the object tree relative to 'section' + # (where the static tool was defined). + if section == 'global': + section = "/" + section = section.rstrip(r"\/") + branch = request.path_info[len(section) + 1:] + branch = unquote(branch.lstrip(r"\/")) + + # If branch is "", filename will end in a slash + filename = os.path.join(dir, branch) + if debug: + cherrypy.log('Checking file %r to fulfill %r' % + (filename, request.path_info), 'TOOLS.STATICDIR') + + # There's a chance that the branch pulled from the URL might + # have ".." or similar uplevel attacks in it. Check that the final + # filename is a child of dir. + if not os.path.normpath(filename).startswith(os.path.normpath(dir)): + raise cherrypy.HTTPError(403) # Forbidden + + handled = _attempt(filename, content_types) + if not handled: + # Check for an index file if a folder was requested. + if index: + handled = _attempt(os.path.join(filename, index), content_types) + if handled: + request.is_index = filename[-1] in (r"\/") + return handled + +def staticfile(filename, root=None, match="", content_types=None, debug=False): + """Serve a static resource from the given (root +) filename. + + match + If given, request.path_info will be searched for the given + regular expression before attempting to serve static content. + + content_types + If given, it should be a Python dictionary of + {file-extension: content-type} pairs, where 'file-extension' is + a string (e.g. "gif") and 'content-type' is the value to write + out in the Content-Type response header (e.g. "image/gif"). + + """ + request = cherrypy.serving.request + if request.method not in ('GET', 'HEAD'): + if debug: + cherrypy.log('request.method not GET or HEAD', 'TOOLS.STATICFILE') + return False + + if match and not re.search(match, request.path_info): + if debug: + cherrypy.log('request.path_info %r does not match pattern %r' % + (request.path_info, match), 'TOOLS.STATICFILE') + return False + + # If filename is relative, make absolute using "root". + if not os.path.isabs(filename): + if not root: + msg = "Static tool requires an absolute filename (got '%s')." % filename + if debug: + cherrypy.log(msg, 'TOOLS.STATICFILE') + raise ValueError(msg) + filename = os.path.join(root, filename) + + return _attempt(filename, content_types, debug=debug) diff --git a/cherrypy/lib/xmlrpc.py b/cherrypy/lib/xmlrpc.py new file mode 100755 index 0000000..8a5ef54 --- /dev/null +++ b/cherrypy/lib/xmlrpc.py @@ -0,0 +1,49 @@ +import sys + +import cherrypy + + +def process_body(): + """Return (params, method) from request body.""" + try: + import xmlrpclib + return xmlrpclib.loads(cherrypy.request.body.read()) + except Exception: + return ('ERROR PARAMS', ), 'ERRORMETHOD' + + +def patched_path(path): + """Return 'path', doctored for RPC.""" + if not path.endswith('/'): + path += '/' + if path.startswith('/RPC2/'): + # strip the first /rpc2 + path = path[5:] + return path + + +def _set_response(body): + # The XML-RPC spec (http://www.xmlrpc.com/spec) says: + # "Unless there's a lower-level error, always return 200 OK." + # Since Python's xmlrpclib interprets a non-200 response + # as a "Protocol Error", we'll just return 200 every time. + response = cherrypy.response + response.status = '200 OK' + response.body = body + response.headers['Content-Type'] = 'text/xml' + response.headers['Content-Length'] = len(body) + + +def respond(body, encoding='utf-8', allow_none=0): + from xmlrpclib import Fault, dumps + if not isinstance(body, Fault): + body = (body,) + _set_response(dumps(body, methodresponse=1, + encoding=encoding, + allow_none=allow_none)) + +def on_error(*args, **kwargs): + body = str(sys.exc_info()[1]) + from xmlrpclib import Fault, dumps + _set_response(dumps(Fault(1, body))) + |