Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
path: root/cherrypy/lib/caching.py
diff options
context:
space:
mode:
Diffstat (limited to 'cherrypy/lib/caching.py')
-rwxr-xr-xcherrypy/lib/caching.py465
1 files changed, 465 insertions, 0 deletions
diff --git a/cherrypy/lib/caching.py b/cherrypy/lib/caching.py
new file mode 100755
index 0000000..435b9dc
--- /dev/null
+++ b/cherrypy/lib/caching.py
@@ -0,0 +1,465 @@
+"""
+CherryPy implements a simple caching system as a pluggable Tool. This tool tries
+to be an (in-process) HTTP/1.1-compliant cache. It's not quite there yet, but
+it's probably good enough for most sites.
+
+In general, GET responses are cached (along with selecting headers) and, if
+another request arrives for the same resource, the caching Tool will return 304
+Not Modified if possible, or serve the cached response otherwise. It also sets
+request.cached to True if serving a cached representation, and sets
+request.cacheable to False (so it doesn't get cached again).
+
+If POST, PUT, or DELETE requests are made for a cached resource, they invalidate
+(delete) any cached response.
+
+Usage
+=====
+
+Configuration file example::
+
+ [/]
+ tools.caching.on = True
+ tools.caching.delay = 3600
+
+You may use a class other than the default
+:class:`MemoryCache<cherrypy.lib.caching.MemoryCache>` by supplying the config
+entry ``cache_class``; supply the full dotted name of the replacement class
+as the config value. It must implement the basic methods ``get``, ``put``,
+``delete``, and ``clear``.
+
+You may set any attribute, including overriding methods, on the cache
+instance by providing them in config. The above sets the
+:attr:`delay<cherrypy.lib.caching.MemoryCache.delay>` attribute, for example.
+"""
+
+import datetime
+import sys
+import threading
+import time
+
+import cherrypy
+from cherrypy.lib import cptools, httputil
+from cherrypy._cpcompat import copyitems, ntob, set_daemon, sorted
+
+
+class Cache(object):
+ """Base class for Cache implementations."""
+
+ def get(self):
+ """Return the current variant if in the cache, else None."""
+ raise NotImplemented
+
+ def put(self, obj, size):
+ """Store the current variant in the cache."""
+ raise NotImplemented
+
+ def delete(self):
+ """Remove ALL cached variants of the current resource."""
+ raise NotImplemented
+
+ def clear(self):
+ """Reset the cache to its initial, empty state."""
+ raise NotImplemented
+
+
+
+# ------------------------------- Memory Cache ------------------------------- #
+
+
+class AntiStampedeCache(dict):
+ """A storage system for cached items which reduces stampede collisions."""
+
+ def wait(self, key, timeout=5, debug=False):
+ """Return the cached value for the given key, or None.
+
+ If timeout is not None, and the value is already
+ being calculated by another thread, wait until the given timeout has
+ elapsed. If the value is available before the timeout expires, it is
+ returned. If not, None is returned, and a sentinel placed in the cache
+ to signal other threads to wait.
+
+ If timeout is None, no waiting is performed nor sentinels used.
+ """
+ value = self.get(key)
+ if isinstance(value, threading._Event):
+ if timeout is None:
+ # Ignore the other thread and recalc it ourselves.
+ if debug:
+ cherrypy.log('No timeout', 'TOOLS.CACHING')
+ return None
+
+ # Wait until it's done or times out.
+ if debug:
+ cherrypy.log('Waiting up to %s seconds' % timeout, 'TOOLS.CACHING')
+ value.wait(timeout)
+ if value.result is not None:
+ # The other thread finished its calculation. Use it.
+ if debug:
+ cherrypy.log('Result!', 'TOOLS.CACHING')
+ return value.result
+ # Timed out. Stick an Event in the slot so other threads wait
+ # on this one to finish calculating the value.
+ if debug:
+ cherrypy.log('Timed out', 'TOOLS.CACHING')
+ e = threading.Event()
+ e.result = None
+ dict.__setitem__(self, key, e)
+
+ return None
+ elif value is None:
+ # Stick an Event in the slot so other threads wait
+ # on this one to finish calculating the value.
+ if debug:
+ cherrypy.log('Timed out', 'TOOLS.CACHING')
+ e = threading.Event()
+ e.result = None
+ dict.__setitem__(self, key, e)
+ return value
+
+ def __setitem__(self, key, value):
+ """Set the cached value for the given key."""
+ existing = self.get(key)
+ dict.__setitem__(self, key, value)
+ if isinstance(existing, threading._Event):
+ # Set Event.result so other threads waiting on it have
+ # immediate access without needing to poll the cache again.
+ existing.result = value
+ existing.set()
+
+
+class MemoryCache(Cache):
+ """An in-memory cache for varying response content.
+
+ Each key in self.store is a URI, and each value is an AntiStampedeCache.
+ The response for any given URI may vary based on the values of
+ "selecting request headers"; that is, those named in the Vary
+ response header. We assume the list of header names to be constant
+ for each URI throughout the lifetime of the application, and store
+ that list in ``self.store[uri].selecting_headers``.
+
+ The items contained in ``self.store[uri]`` have keys which are tuples of
+ request header values (in the same order as the names in its
+ selecting_headers), and values which are the actual responses.
+ """
+
+ maxobjects = 1000
+ """The maximum number of cached objects; defaults to 1000."""
+
+ maxobj_size = 100000
+ """The maximum size of each cached object in bytes; defaults to 100 KB."""
+
+ maxsize = 10000000
+ """The maximum size of the entire cache in bytes; defaults to 10 MB."""
+
+ delay = 600
+ """Seconds until the cached content expires; defaults to 600 (10 minutes)."""
+
+ antistampede_timeout = 5
+ """Seconds to wait for other threads to release a cache lock."""
+
+ expire_freq = 0.1
+ """Seconds to sleep between cache expiration sweeps."""
+
+ debug = False
+
+ def __init__(self):
+ self.clear()
+
+ # Run self.expire_cache in a separate daemon thread.
+ t = threading.Thread(target=self.expire_cache, name='expire_cache')
+ self.expiration_thread = t
+ set_daemon(t, True)
+ t.start()
+
+ def clear(self):
+ """Reset the cache to its initial, empty state."""
+ self.store = {}
+ self.expirations = {}
+ self.tot_puts = 0
+ self.tot_gets = 0
+ self.tot_hist = 0
+ self.tot_expires = 0
+ self.tot_non_modified = 0
+ self.cursize = 0
+
+ def expire_cache(self):
+ """Continuously examine cached objects, expiring stale ones.
+
+ This function is designed to be run in its own daemon thread,
+ referenced at ``self.expiration_thread``.
+ """
+ # It's possible that "time" will be set to None
+ # arbitrarily, so we check "while time" to avoid exceptions.
+ # See tickets #99 and #180 for more information.
+ while time:
+ now = time.time()
+ # Must make a copy of expirations so it doesn't change size
+ # during iteration
+ for expiration_time, objects in copyitems(self.expirations):
+ if expiration_time <= now:
+ for obj_size, uri, sel_header_values in objects:
+ try:
+ del self.store[uri][tuple(sel_header_values)]
+ self.tot_expires += 1
+ self.cursize -= obj_size
+ except KeyError:
+ # the key may have been deleted elsewhere
+ pass
+ del self.expirations[expiration_time]
+ time.sleep(self.expire_freq)
+
+ def get(self):
+ """Return the current variant if in the cache, else None."""
+ request = cherrypy.serving.request
+ self.tot_gets += 1
+
+ uri = cherrypy.url(qs=request.query_string)
+ uricache = self.store.get(uri)
+ if uricache is None:
+ return None
+
+ header_values = [request.headers.get(h, '')
+ for h in uricache.selecting_headers]
+ variant = uricache.wait(key=tuple(sorted(header_values)),
+ timeout=self.antistampede_timeout,
+ debug=self.debug)
+ if variant is not None:
+ self.tot_hist += 1
+ return variant
+
+ def put(self, variant, size):
+ """Store the current variant in the cache."""
+ request = cherrypy.serving.request
+ response = cherrypy.serving.response
+
+ uri = cherrypy.url(qs=request.query_string)
+ uricache = self.store.get(uri)
+ if uricache is None:
+ uricache = AntiStampedeCache()
+ uricache.selecting_headers = [
+ e.value for e in response.headers.elements('Vary')]
+ self.store[uri] = uricache
+
+ if len(self.store) < self.maxobjects:
+ total_size = self.cursize + size
+
+ # checks if there's space for the object
+ if (size < self.maxobj_size and total_size < self.maxsize):
+ # add to the expirations list
+ expiration_time = response.time + self.delay
+ bucket = self.expirations.setdefault(expiration_time, [])
+ bucket.append((size, uri, uricache.selecting_headers))
+
+ # add to the cache
+ header_values = [request.headers.get(h, '')
+ for h in uricache.selecting_headers]
+ uricache[tuple(sorted(header_values))] = variant
+ self.tot_puts += 1
+ self.cursize = total_size
+
+ def delete(self):
+ """Remove ALL cached variants of the current resource."""
+ uri = cherrypy.url(qs=cherrypy.serving.request.query_string)
+ self.store.pop(uri, None)
+
+
+def get(invalid_methods=("POST", "PUT", "DELETE"), debug=False, **kwargs):
+ """Try to obtain cached output. If fresh enough, raise HTTPError(304).
+
+ If POST, PUT, or DELETE:
+ * invalidates (deletes) any cached response for this resource
+ * sets request.cached = False
+ * sets request.cacheable = False
+
+ else if a cached copy exists:
+ * sets request.cached = True
+ * sets request.cacheable = False
+ * sets response.headers to the cached values
+ * checks the cached Last-Modified response header against the
+ current If-(Un)Modified-Since request headers; raises 304
+ if necessary.
+ * sets response.status and response.body to the cached values
+ * returns True
+
+ otherwise:
+ * sets request.cached = False
+ * sets request.cacheable = True
+ * returns False
+ """
+ request = cherrypy.serving.request
+ response = cherrypy.serving.response
+
+ if not hasattr(cherrypy, "_cache"):
+ # Make a process-wide Cache object.
+ cherrypy._cache = kwargs.pop("cache_class", MemoryCache)()
+
+ # Take all remaining kwargs and set them on the Cache object.
+ for k, v in kwargs.items():
+ setattr(cherrypy._cache, k, v)
+ cherrypy._cache.debug = debug
+
+ # POST, PUT, DELETE should invalidate (delete) the cached copy.
+ # See http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.10.
+ if request.method in invalid_methods:
+ if debug:
+ cherrypy.log('request.method %r in invalid_methods %r' %
+ (request.method, invalid_methods), 'TOOLS.CACHING')
+ cherrypy._cache.delete()
+ request.cached = False
+ request.cacheable = False
+ return False
+
+ if 'no-cache' in [e.value for e in request.headers.elements('Pragma')]:
+ request.cached = False
+ request.cacheable = True
+ return False
+
+ cache_data = cherrypy._cache.get()
+ request.cached = bool(cache_data)
+ request.cacheable = not request.cached
+ if request.cached:
+ # Serve the cached copy.
+ max_age = cherrypy._cache.delay
+ for v in [e.value for e in request.headers.elements('Cache-Control')]:
+ atoms = v.split('=', 1)
+ directive = atoms.pop(0)
+ if directive == 'max-age':
+ if len(atoms) != 1 or not atoms[0].isdigit():
+ raise cherrypy.HTTPError(400, "Invalid Cache-Control header")
+ max_age = int(atoms[0])
+ break
+ elif directive == 'no-cache':
+ if debug:
+ cherrypy.log('Ignoring cache due to Cache-Control: no-cache',
+ 'TOOLS.CACHING')
+ request.cached = False
+ request.cacheable = True
+ return False
+
+ if debug:
+ cherrypy.log('Reading response from cache', 'TOOLS.CACHING')
+ s, h, b, create_time = cache_data
+ age = int(response.time - create_time)
+ if (age > max_age):
+ if debug:
+ cherrypy.log('Ignoring cache due to age > %d' % max_age,
+ 'TOOLS.CACHING')
+ request.cached = False
+ request.cacheable = True
+ return False
+
+ # Copy the response headers. See http://www.cherrypy.org/ticket/721.
+ response.headers = rh = httputil.HeaderMap()
+ for k in h:
+ dict.__setitem__(rh, k, dict.__getitem__(h, k))
+
+ # Add the required Age header
+ response.headers["Age"] = str(age)
+
+ try:
+ # Note that validate_since depends on a Last-Modified header;
+ # this was put into the cached copy, and should have been
+ # resurrected just above (response.headers = cache_data[1]).
+ cptools.validate_since()
+ except cherrypy.HTTPRedirect:
+ x = sys.exc_info()[1]
+ if x.status == 304:
+ cherrypy._cache.tot_non_modified += 1
+ raise
+
+ # serve it & get out from the request
+ response.status = s
+ response.body = b
+ else:
+ if debug:
+ cherrypy.log('request is not cached', 'TOOLS.CACHING')
+ return request.cached
+
+
+def tee_output():
+ """Tee response output to cache storage. Internal."""
+ # Used by CachingTool by attaching to request.hooks
+
+ request = cherrypy.serving.request
+ if 'no-store' in request.headers.values('Cache-Control'):
+ return
+
+ def tee(body):
+ """Tee response.body into a list."""
+ if ('no-cache' in response.headers.values('Pragma') or
+ 'no-store' in response.headers.values('Cache-Control')):
+ for chunk in body:
+ yield chunk
+ return
+
+ output = []
+ for chunk in body:
+ output.append(chunk)
+ yield chunk
+
+ # save the cache data
+ body = ntob('').join(output)
+ cherrypy._cache.put((response.status, response.headers or {},
+ body, response.time), len(body))
+
+ response = cherrypy.serving.response
+ response.body = tee(response.body)
+
+
+def expires(secs=0, force=False, debug=False):
+ """Tool for influencing cache mechanisms using the 'Expires' header.
+
+ secs
+ Must be either an int or a datetime.timedelta, and indicates the
+ number of seconds between response.time and when the response should
+ expire. The 'Expires' header will be set to response.time + secs.
+ If secs is zero, the 'Expires' header is set one year in the past, and
+ the following "cache prevention" headers are also set:
+
+ * Pragma: no-cache
+ * Cache-Control': no-cache, must-revalidate
+
+ force
+ If False, the following headers are checked:
+
+ * Etag
+ * Last-Modified
+ * Age
+ * Expires
+
+ If any are already present, none of the above response headers are set.
+
+ """
+
+ response = cherrypy.serving.response
+ headers = response.headers
+
+ cacheable = False
+ if not force:
+ # some header names that indicate that the response can be cached
+ for indicator in ('Etag', 'Last-Modified', 'Age', 'Expires'):
+ if indicator in headers:
+ cacheable = True
+ break
+
+ if not cacheable and not force:
+ if debug:
+ cherrypy.log('request is not cacheable', 'TOOLS.EXPIRES')
+ else:
+ if debug:
+ cherrypy.log('request is cacheable', 'TOOLS.EXPIRES')
+ if isinstance(secs, datetime.timedelta):
+ secs = (86400 * secs.days) + secs.seconds
+
+ if secs == 0:
+ if force or ("Pragma" not in headers):
+ headers["Pragma"] = "no-cache"
+ if cherrypy.serving.request.protocol >= (1, 1):
+ if force or "Cache-Control" not in headers:
+ headers["Cache-Control"] = "no-cache, must-revalidate"
+ # Set an explicit Expires date in the past.
+ expiry = httputil.HTTPDate(1169942400.0)
+ else:
+ expiry = httputil.HTTPDate(response.time + secs)
+ if force or "Expires" not in headers:
+ headers["Expires"] = expiry