diff options
Diffstat (limited to 'cherrypy/_cpreqbody.py')
-rwxr-xr-x | cherrypy/_cpreqbody.py | 941 |
1 files changed, 0 insertions, 941 deletions
diff --git a/cherrypy/_cpreqbody.py b/cherrypy/_cpreqbody.py deleted file mode 100755 index 1b0496e..0000000 --- a/cherrypy/_cpreqbody.py +++ /dev/null @@ -1,941 +0,0 @@ -"""Request body processing for CherryPy. - -.. versionadded:: 3.2 - -Application authors have complete control over the parsing of HTTP request -entities. In short, :attr:`cherrypy.request.body<cherrypy._cprequest.Request.body>` -is now always set to an instance of :class:`RequestBody<cherrypy._cpreqbody.RequestBody>`, -and *that* class is a subclass of :class:`Entity<cherrypy._cpreqbody.Entity>`. - -When an HTTP request includes an entity body, it is often desirable to -provide that information to applications in a form other than the raw bytes. -Different content types demand different approaches. Examples: - - * For a GIF file, we want the raw bytes in a stream. - * An HTML form is better parsed into its component fields, and each text field - decoded from bytes to unicode. - * A JSON body should be deserialized into a Python dict or list. - -When the request contains a Content-Type header, the media type is used as a -key to look up a value in the -:attr:`request.body.processors<cherrypy._cpreqbody.Entity.processors>` dict. -If the full media -type is not found, then the major type is tried; for example, if no processor -is found for the 'image/jpeg' type, then we look for a processor for the 'image' -types altogether. If neither the full type nor the major type has a matching -processor, then a default processor is used -(:func:`default_proc<cherrypy._cpreqbody.Entity.default_proc>`). For most -types, this means no processing is done, and the body is left unread as a -raw byte stream. Processors are configurable in an 'on_start_resource' hook. - -Some processors, especially those for the 'text' types, attempt to decode bytes -to unicode. If the Content-Type request header includes a 'charset' parameter, -this is used to decode the entity. Otherwise, one or more default charsets may -be attempted, although this decision is up to each processor. If a processor -successfully decodes an Entity or Part, it should set the -:attr:`charset<cherrypy._cpreqbody.Entity.charset>` attribute -on the Entity or Part to the name of the successful charset, so that -applications can easily re-encode or transcode the value if they wish. - -If the Content-Type of the request entity is of major type 'multipart', then -the above parsing process, and possibly a decoding process, is performed for -each part. - -For both the full entity and multipart parts, a Content-Disposition header may -be used to fill :attr:`name<cherrypy._cpreqbody.Entity.name>` and -:attr:`filename<cherrypy._cpreqbody.Entity.filename>` attributes on the -request.body or the Part. - -.. _custombodyprocessors: - -Custom Processors -================= - -You can add your own processors for any specific or major MIME type. Simply add -it to the :attr:`processors<cherrypy._cprequest.Entity.processors>` dict in a -hook/tool that runs at ``on_start_resource`` or ``before_request_body``. -Here's the built-in JSON tool for an example:: - - def json_in(force=True, debug=False): - request = cherrypy.serving.request - def json_processor(entity): - \"""Read application/json data into request.json.\""" - if not entity.headers.get("Content-Length", ""): - raise cherrypy.HTTPError(411) - - body = entity.fp.read() - try: - request.json = json_decode(body) - except ValueError: - raise cherrypy.HTTPError(400, 'Invalid JSON document') - if force: - request.body.processors.clear() - request.body.default_proc = cherrypy.HTTPError( - 415, 'Expected an application/json content type') - request.body.processors['application/json'] = json_processor - -We begin by defining a new ``json_processor`` function to stick in the ``processors`` -dictionary. All processor functions take a single argument, the ``Entity`` instance -they are to process. It will be called whenever a request is received (for those -URI's where the tool is turned on) which has a ``Content-Type`` of -"application/json". - -First, it checks for a valid ``Content-Length`` (raising 411 if not valid), then -reads the remaining bytes on the socket. The ``fp`` object knows its own length, so -it won't hang waiting for data that never arrives. It will return when all data -has been read. Then, we decode those bytes using Python's built-in ``json`` module, -and stick the decoded result onto ``request.json`` . If it cannot be decoded, we -raise 400. - -If the "force" argument is True (the default), the ``Tool`` clears the ``processors`` -dict so that request entities of other ``Content-Types`` aren't parsed at all. Since -there's no entry for those invalid MIME types, the ``default_proc`` method of ``cherrypy.request.body`` -is called. But this does nothing by default (usually to provide the page handler an opportunity to handle it.) -But in our case, we want to raise 415, so we replace ``request.body.default_proc`` -with the error (``HTTPError`` instances, when called, raise themselves). - -If we were defining a custom processor, we can do so without making a ``Tool``. Just add the config entry:: - - request.body.processors = {'application/json': json_processor} - -Note that you can only replace the ``processors`` dict wholesale this way, not update the existing one. -""" - -import re -import sys -import tempfile -from urllib import unquote_plus - -import cherrypy -from cherrypy._cpcompat import basestring, ntob, ntou -from cherrypy.lib import httputil - - -# -------------------------------- Processors -------------------------------- # - -def process_urlencoded(entity): - """Read application/x-www-form-urlencoded data into entity.params.""" - qs = entity.fp.read() - for charset in entity.attempt_charsets: - try: - params = {} - for aparam in qs.split(ntob('&')): - for pair in aparam.split(ntob(';')): - if not pair: - continue - - atoms = pair.split(ntob('='), 1) - if len(atoms) == 1: - atoms.append(ntob('')) - - key = unquote_plus(atoms[0]).decode(charset) - value = unquote_plus(atoms[1]).decode(charset) - - if key in params: - if not isinstance(params[key], list): - params[key] = [params[key]] - params[key].append(value) - else: - params[key] = value - except UnicodeDecodeError: - pass - else: - entity.charset = charset - break - else: - raise cherrypy.HTTPError( - 400, "The request entity could not be decoded. The following " - "charsets were attempted: %s" % repr(entity.attempt_charsets)) - - # Now that all values have been successfully parsed and decoded, - # apply them to the entity.params dict. - for key, value in params.items(): - if key in entity.params: - if not isinstance(entity.params[key], list): - entity.params[key] = [entity.params[key]] - entity.params[key].append(value) - else: - entity.params[key] = value - - -def process_multipart(entity): - """Read all multipart parts into entity.parts.""" - ib = "" - if 'boundary' in entity.content_type.params: - # http://tools.ietf.org/html/rfc2046#section-5.1.1 - # "The grammar for parameters on the Content-type field is such that it - # is often necessary to enclose the boundary parameter values in quotes - # on the Content-type line" - ib = entity.content_type.params['boundary'].strip('"') - - if not re.match("^[ -~]{0,200}[!-~]$", ib): - raise ValueError('Invalid boundary in multipart form: %r' % (ib,)) - - ib = ('--' + ib).encode('ascii') - - # Find the first marker - while True: - b = entity.readline() - if not b: - return - - b = b.strip() - if b == ib: - break - - # Read all parts - while True: - part = entity.part_class.from_fp(entity.fp, ib) - entity.parts.append(part) - part.process() - if part.fp.done: - break - -def process_multipart_form_data(entity): - """Read all multipart/form-data parts into entity.parts or entity.params.""" - process_multipart(entity) - - kept_parts = [] - for part in entity.parts: - if part.name is None: - kept_parts.append(part) - else: - if part.filename is None: - # It's a regular field - value = part.fullvalue() - else: - # It's a file upload. Retain the whole part so consumer code - # has access to its .file and .filename attributes. - value = part - - if part.name in entity.params: - if not isinstance(entity.params[part.name], list): - entity.params[part.name] = [entity.params[part.name]] - entity.params[part.name].append(value) - else: - entity.params[part.name] = value - - entity.parts = kept_parts - -def _old_process_multipart(entity): - """The behavior of 3.2 and lower. Deprecated and will be changed in 3.3.""" - process_multipart(entity) - - params = entity.params - - for part in entity.parts: - if part.name is None: - key = ntou('parts') - else: - key = part.name - - if part.filename is None: - # It's a regular field - value = part.fullvalue() - else: - # It's a file upload. Retain the whole part so consumer code - # has access to its .file and .filename attributes. - value = part - - if key in params: - if not isinstance(params[key], list): - params[key] = [params[key]] - params[key].append(value) - else: - params[key] = value - - - -# --------------------------------- Entities --------------------------------- # - - -class Entity(object): - """An HTTP request body, or MIME multipart body. - - This class collects information about the HTTP request entity. When a - given entity is of MIME type "multipart", each part is parsed into its own - Entity instance, and the set of parts stored in - :attr:`entity.parts<cherrypy._cpreqbody.Entity.parts>`. - - Between the ``before_request_body`` and ``before_handler`` tools, CherryPy - tries to process the request body (if any) by calling - :func:`request.body.process<cherrypy._cpreqbody.RequestBody.process`. - This uses the ``content_type`` of the Entity to look up a suitable processor - in :attr:`Entity.processors<cherrypy._cpreqbody.Entity.processors>`, a dict. - If a matching processor cannot be found for the complete Content-Type, - it tries again using the major type. For example, if a request with an - entity of type "image/jpeg" arrives, but no processor can be found for - that complete type, then one is sought for the major type "image". If a - processor is still not found, then the - :func:`default_proc<cherrypy._cpreqbody.Entity.default_proc>` method of the - Entity is called (which does nothing by default; you can override this too). - - CherryPy includes processors for the "application/x-www-form-urlencoded" - type, the "multipart/form-data" type, and the "multipart" major type. - CherryPy 3.2 processes these types almost exactly as older versions. - Parts are passed as arguments to the page handler using their - ``Content-Disposition.name`` if given, otherwise in a generic "parts" - argument. Each such part is either a string, or the - :class:`Part<cherrypy._cpreqbody.Part>` itself if it's a file. (In this - case it will have ``file`` and ``filename`` attributes, or possibly a - ``value`` attribute). Each Part is itself a subclass of - Entity, and has its own ``process`` method and ``processors`` dict. - - There is a separate processor for the "multipart" major type which is more - flexible, and simply stores all multipart parts in - :attr:`request.body.parts<cherrypy._cpreqbody.Entity.parts>`. You can - enable it with:: - - cherrypy.request.body.processors['multipart'] = _cpreqbody.process_multipart - - in an ``on_start_resource`` tool. - """ - - # http://tools.ietf.org/html/rfc2046#section-4.1.2: - # "The default character set, which must be assumed in the - # absence of a charset parameter, is US-ASCII." - # However, many browsers send data in utf-8 with no charset. - attempt_charsets = ['utf-8'] - """A list of strings, each of which should be a known encoding. - - When the Content-Type of the request body warrants it, each of the given - encodings will be tried in order. The first one to successfully decode the - entity without raising an error is stored as - :attr:`entity.charset<cherrypy._cpreqbody.Entity.charset>`. This defaults - to ``['utf-8']`` (plus 'ISO-8859-1' for "text/\*" types, as required by - `HTTP/1.1 <http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.7.1>`_), - but ``['us-ascii', 'utf-8']`` for multipart parts. - """ - - charset = None - """The successful decoding; see "attempt_charsets" above.""" - - content_type = None - """The value of the Content-Type request header. - - If the Entity is part of a multipart payload, this will be the Content-Type - given in the MIME headers for this part. - """ - - default_content_type = 'application/x-www-form-urlencoded' - """This defines a default ``Content-Type`` to use if no Content-Type header - is given. The empty string is used for RequestBody, which results in the - request body not being read or parsed at all. This is by design; a missing - ``Content-Type`` header in the HTTP request entity is an error at best, - and a security hole at worst. For multipart parts, however, the MIME spec - declares that a part with no Content-Type defaults to "text/plain" - (see :class:`Part<cherrypy._cpreqbody.Part>`). - """ - - filename = None - """The ``Content-Disposition.filename`` header, if available.""" - - fp = None - """The readable socket file object.""" - - headers = None - """A dict of request/multipart header names and values. - - This is a copy of the ``request.headers`` for the ``request.body``; - for multipart parts, it is the set of headers for that part. - """ - - length = None - """The value of the ``Content-Length`` header, if provided.""" - - name = None - """The "name" parameter of the ``Content-Disposition`` header, if any.""" - - params = None - """ - If the request Content-Type is 'application/x-www-form-urlencoded' or - multipart, this will be a dict of the params pulled from the entity - body; that is, it will be the portion of request.params that come - from the message body (sometimes called "POST params", although they - can be sent with various HTTP method verbs). This value is set between - the 'before_request_body' and 'before_handler' hooks (assuming that - process_request_body is True).""" - - processors = {'application/x-www-form-urlencoded': process_urlencoded, - 'multipart/form-data': process_multipart_form_data, - 'multipart': process_multipart, - } - """A dict of Content-Type names to processor methods.""" - - parts = None - """A list of Part instances if ``Content-Type`` is of major type "multipart".""" - - part_class = None - """The class used for multipart parts. - - You can replace this with custom subclasses to alter the processing of - multipart parts. - """ - - def __init__(self, fp, headers, params=None, parts=None): - # Make an instance-specific copy of the class processors - # so Tools, etc. can replace them per-request. - self.processors = self.processors.copy() - - self.fp = fp - self.headers = headers - - if params is None: - params = {} - self.params = params - - if parts is None: - parts = [] - self.parts = parts - - # Content-Type - self.content_type = headers.elements('Content-Type') - if self.content_type: - self.content_type = self.content_type[0] - else: - self.content_type = httputil.HeaderElement.from_str( - self.default_content_type) - - # Copy the class 'attempt_charsets', prepending any Content-Type charset - dec = self.content_type.params.get("charset", None) - if dec: - #dec = dec.decode('ISO-8859-1') - self.attempt_charsets = [dec] + [c for c in self.attempt_charsets - if c != dec] - else: - self.attempt_charsets = self.attempt_charsets[:] - - # Length - self.length = None - clen = headers.get('Content-Length', None) - # If Transfer-Encoding is 'chunked', ignore any Content-Length. - if clen is not None and 'chunked' not in headers.get('Transfer-Encoding', ''): - try: - self.length = int(clen) - except ValueError: - pass - - # Content-Disposition - self.name = None - self.filename = None - disp = headers.elements('Content-Disposition') - if disp: - disp = disp[0] - if 'name' in disp.params: - self.name = disp.params['name'] - if self.name.startswith('"') and self.name.endswith('"'): - self.name = self.name[1:-1] - if 'filename' in disp.params: - self.filename = disp.params['filename'] - if self.filename.startswith('"') and self.filename.endswith('"'): - self.filename = self.filename[1:-1] - - # The 'type' attribute is deprecated in 3.2; remove it in 3.3. - type = property(lambda self: self.content_type, - doc="""A deprecated alias for :attr:`content_type<cherrypy._cpreqbody.Entity.content_type>`.""") - - def read(self, size=None, fp_out=None): - return self.fp.read(size, fp_out) - - def readline(self, size=None): - return self.fp.readline(size) - - def readlines(self, sizehint=None): - return self.fp.readlines(sizehint) - - def __iter__(self): - return self - - def next(self): - line = self.readline() - if not line: - raise StopIteration - return line - - def read_into_file(self, fp_out=None): - """Read the request body into fp_out (or make_file() if None). Return fp_out.""" - if fp_out is None: - fp_out = self.make_file() - self.read(fp_out=fp_out) - return fp_out - - def make_file(self): - """Return a file-like object into which the request body will be read. - - By default, this will return a TemporaryFile. Override as needed. - See also :attr:`cherrypy._cpreqbody.Part.maxrambytes`.""" - return tempfile.TemporaryFile() - - def fullvalue(self): - """Return this entity as a string, whether stored in a file or not.""" - if self.file: - # It was stored in a tempfile. Read it. - self.file.seek(0) - value = self.file.read() - self.file.seek(0) - else: - value = self.value - return value - - def process(self): - """Execute the best-match processor for the given media type.""" - proc = None - ct = self.content_type.value - try: - proc = self.processors[ct] - except KeyError: - toptype = ct.split('/', 1)[0] - try: - proc = self.processors[toptype] - except KeyError: - pass - if proc is None: - self.default_proc() - else: - proc(self) - - def default_proc(self): - """Called if a more-specific processor is not found for the ``Content-Type``.""" - # Leave the fp alone for someone else to read. This works fine - # for request.body, but the Part subclasses need to override this - # so they can move on to the next part. - pass - - -class Part(Entity): - """A MIME part entity, part of a multipart entity.""" - - # "The default character set, which must be assumed in the absence of a - # charset parameter, is US-ASCII." - attempt_charsets = ['us-ascii', 'utf-8'] - """A list of strings, each of which should be a known encoding. - - When the Content-Type of the request body warrants it, each of the given - encodings will be tried in order. The first one to successfully decode the - entity without raising an error is stored as - :attr:`entity.charset<cherrypy._cpreqbody.Entity.charset>`. This defaults - to ``['utf-8']`` (plus 'ISO-8859-1' for "text/\*" types, as required by - `HTTP/1.1 <http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.7.1>`_), - but ``['us-ascii', 'utf-8']`` for multipart parts. - """ - - boundary = None - """The MIME multipart boundary.""" - - default_content_type = 'text/plain' - """This defines a default ``Content-Type`` to use if no Content-Type header - is given. The empty string is used for RequestBody, which results in the - request body not being read or parsed at all. This is by design; a missing - ``Content-Type`` header in the HTTP request entity is an error at best, - and a security hole at worst. For multipart parts, however (this class), - the MIME spec declares that a part with no Content-Type defaults to - "text/plain". - """ - - # This is the default in stdlib cgi. We may want to increase it. - maxrambytes = 1000 - """The threshold of bytes after which point the ``Part`` will store its data - in a file (generated by :func:`make_file<cherrypy._cprequest.Entity.make_file>`) - instead of a string. Defaults to 1000, just like the :mod:`cgi` module in - Python's standard library. - """ - - def __init__(self, fp, headers, boundary): - Entity.__init__(self, fp, headers) - self.boundary = boundary - self.file = None - self.value = None - - def from_fp(cls, fp, boundary): - headers = cls.read_headers(fp) - return cls(fp, headers, boundary) - from_fp = classmethod(from_fp) - - def read_headers(cls, fp): - headers = httputil.HeaderMap() - while True: - line = fp.readline() - if not line: - # No more data--illegal end of headers - raise EOFError("Illegal end of headers.") - - if line == ntob('\r\n'): - # Normal end of headers - break - if not line.endswith(ntob('\r\n')): - raise ValueError("MIME requires CRLF terminators: %r" % line) - - if line[0] in ntob(' \t'): - # It's a continuation line. - v = line.strip().decode('ISO-8859-1') - else: - k, v = line.split(ntob(":"), 1) - k = k.strip().decode('ISO-8859-1') - v = v.strip().decode('ISO-8859-1') - - existing = headers.get(k) - if existing: - v = ", ".join((existing, v)) - headers[k] = v - - return headers - read_headers = classmethod(read_headers) - - def read_lines_to_boundary(self, fp_out=None): - """Read bytes from self.fp and return or write them to a file. - - If the 'fp_out' argument is None (the default), all bytes read are - returned in a single byte string. - - If the 'fp_out' argument is not None, it must be a file-like object that - supports the 'write' method; all bytes read will be written to the fp, - and that fp is returned. - """ - endmarker = self.boundary + ntob("--") - delim = ntob("") - prev_lf = True - lines = [] - seen = 0 - while True: - line = self.fp.readline(1<<16) - if not line: - raise EOFError("Illegal end of multipart body.") - if line.startswith(ntob("--")) and prev_lf: - strippedline = line.strip() - if strippedline == self.boundary: - break - if strippedline == endmarker: - self.fp.finish() - break - - line = delim + line - - if line.endswith(ntob("\r\n")): - delim = ntob("\r\n") - line = line[:-2] - prev_lf = True - elif line.endswith(ntob("\n")): - delim = ntob("\n") - line = line[:-1] - prev_lf = True - else: - delim = ntob("") - prev_lf = False - - if fp_out is None: - lines.append(line) - seen += len(line) - if seen > self.maxrambytes: - fp_out = self.make_file() - for line in lines: - fp_out.write(line) - else: - fp_out.write(line) - - if fp_out is None: - result = ntob('').join(lines) - for charset in self.attempt_charsets: - try: - result = result.decode(charset) - except UnicodeDecodeError: - pass - else: - self.charset = charset - return result - else: - raise cherrypy.HTTPError( - 400, "The request entity could not be decoded. The following " - "charsets were attempted: %s" % repr(self.attempt_charsets)) - else: - fp_out.seek(0) - return fp_out - - def default_proc(self): - """Called if a more-specific processor is not found for the ``Content-Type``.""" - if self.filename: - # Always read into a file if a .filename was given. - self.file = self.read_into_file() - else: - result = self.read_lines_to_boundary() - if isinstance(result, basestring): - self.value = result - else: - self.file = result - - def read_into_file(self, fp_out=None): - """Read the request body into fp_out (or make_file() if None). Return fp_out.""" - if fp_out is None: - fp_out = self.make_file() - self.read_lines_to_boundary(fp_out=fp_out) - return fp_out - -Entity.part_class = Part - - -class Infinity(object): - def __cmp__(self, other): - return 1 - def __sub__(self, other): - return self -inf = Infinity() - - -comma_separated_headers = ['Accept', 'Accept-Charset', 'Accept-Encoding', - 'Accept-Language', 'Accept-Ranges', 'Allow', 'Cache-Control', 'Connection', - 'Content-Encoding', 'Content-Language', 'Expect', 'If-Match', - 'If-None-Match', 'Pragma', 'Proxy-Authenticate', 'Te', 'Trailer', - 'Transfer-Encoding', 'Upgrade', 'Vary', 'Via', 'Warning', 'Www-Authenticate'] - - -class SizedReader: - - def __init__(self, fp, length, maxbytes, bufsize=8192, has_trailers=False): - # Wrap our fp in a buffer so peek() works - self.fp = fp - self.length = length - self.maxbytes = maxbytes - self.buffer = ntob('') - self.bufsize = bufsize - self.bytes_read = 0 - self.done = False - self.has_trailers = has_trailers - - def read(self, size=None, fp_out=None): - """Read bytes from the request body and return or write them to a file. - - A number of bytes less than or equal to the 'size' argument are read - off the socket. The actual number of bytes read are tracked in - self.bytes_read. The number may be smaller than 'size' when 1) the - client sends fewer bytes, 2) the 'Content-Length' request header - specifies fewer bytes than requested, or 3) the number of bytes read - exceeds self.maxbytes (in which case, 413 is raised). - - If the 'fp_out' argument is None (the default), all bytes read are - returned in a single byte string. - - If the 'fp_out' argument is not None, it must be a file-like object that - supports the 'write' method; all bytes read will be written to the fp, - and None is returned. - """ - - if self.length is None: - if size is None: - remaining = inf - else: - remaining = size - else: - remaining = self.length - self.bytes_read - if size and size < remaining: - remaining = size - if remaining == 0: - self.finish() - if fp_out is None: - return ntob('') - else: - return None - - chunks = [] - - # Read bytes from the buffer. - if self.buffer: - if remaining is inf: - data = self.buffer - self.buffer = ntob('') - else: - data = self.buffer[:remaining] - self.buffer = self.buffer[remaining:] - datalen = len(data) - remaining -= datalen - - # Check lengths. - self.bytes_read += datalen - if self.maxbytes and self.bytes_read > self.maxbytes: - raise cherrypy.HTTPError(413) - - # Store the data. - if fp_out is None: - chunks.append(data) - else: - fp_out.write(data) - - # Read bytes from the socket. - while remaining > 0: - chunksize = min(remaining, self.bufsize) - try: - data = self.fp.read(chunksize) - except Exception: - e = sys.exc_info()[1] - if e.__class__.__name__ == 'MaxSizeExceeded': - # Post data is too big - raise cherrypy.HTTPError( - 413, "Maximum request length: %r" % e.args[1]) - else: - raise - if not data: - self.finish() - break - datalen = len(data) - remaining -= datalen - - # Check lengths. - self.bytes_read += datalen - if self.maxbytes and self.bytes_read > self.maxbytes: - raise cherrypy.HTTPError(413) - - # Store the data. - if fp_out is None: - chunks.append(data) - else: - fp_out.write(data) - - if fp_out is None: - return ntob('').join(chunks) - - def readline(self, size=None): - """Read a line from the request body and return it.""" - chunks = [] - while size is None or size > 0: - chunksize = self.bufsize - if size is not None and size < self.bufsize: - chunksize = size - data = self.read(chunksize) - if not data: - break - pos = data.find(ntob('\n')) + 1 - if pos: - chunks.append(data[:pos]) - remainder = data[pos:] - self.buffer += remainder - self.bytes_read -= len(remainder) - break - else: - chunks.append(data) - return ntob('').join(chunks) - - def readlines(self, sizehint=None): - """Read lines from the request body and return them.""" - if self.length is not None: - if sizehint is None: - sizehint = self.length - self.bytes_read - else: - sizehint = min(sizehint, self.length - self.bytes_read) - - lines = [] - seen = 0 - while True: - line = self.readline() - if not line: - break - lines.append(line) - seen += len(line) - if seen >= sizehint: - break - return lines - - def finish(self): - self.done = True - if self.has_trailers and hasattr(self.fp, 'read_trailer_lines'): - self.trailers = {} - - try: - for line in self.fp.read_trailer_lines(): - if line[0] in ntob(' \t'): - # It's a continuation line. - v = line.strip() - else: - try: - k, v = line.split(ntob(":"), 1) - except ValueError: - raise ValueError("Illegal header line.") - k = k.strip().title() - v = v.strip() - - if k in comma_separated_headers: - existing = self.trailers.get(envname) - if existing: - v = ntob(", ").join((existing, v)) - self.trailers[k] = v - except Exception: - e = sys.exc_info()[1] - if e.__class__.__name__ == 'MaxSizeExceeded': - # Post data is too big - raise cherrypy.HTTPError( - 413, "Maximum request length: %r" % e.args[1]) - else: - raise - - -class RequestBody(Entity): - """The entity of the HTTP request.""" - - bufsize = 8 * 1024 - """The buffer size used when reading the socket.""" - - # Don't parse the request body at all if the client didn't provide - # a Content-Type header. See http://www.cherrypy.org/ticket/790 - default_content_type = '' - """This defines a default ``Content-Type`` to use if no Content-Type header - is given. The empty string is used for RequestBody, which results in the - request body not being read or parsed at all. This is by design; a missing - ``Content-Type`` header in the HTTP request entity is an error at best, - and a security hole at worst. For multipart parts, however, the MIME spec - declares that a part with no Content-Type defaults to "text/plain" - (see :class:`Part<cherrypy._cpreqbody.Part>`). - """ - - maxbytes = None - """Raise ``MaxSizeExceeded`` if more bytes than this are read from the socket.""" - - def __init__(self, fp, headers, params=None, request_params=None): - Entity.__init__(self, fp, headers, params) - - # http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.7.1 - # When no explicit charset parameter is provided by the - # sender, media subtypes of the "text" type are defined - # to have a default charset value of "ISO-8859-1" when - # received via HTTP. - if self.content_type.value.startswith('text/'): - for c in ('ISO-8859-1', 'iso-8859-1', 'Latin-1', 'latin-1'): - if c in self.attempt_charsets: - break - else: - self.attempt_charsets.append('ISO-8859-1') - - # Temporary fix while deprecating passing .parts as .params. - self.processors['multipart'] = _old_process_multipart - - if request_params is None: - request_params = {} - self.request_params = request_params - - def process(self): - """Process the request entity based on its Content-Type.""" - # "The presence of a message-body in a request is signaled by the - # inclusion of a Content-Length or Transfer-Encoding header field in - # the request's message-headers." - # It is possible to send a POST request with no body, for example; - # however, app developers are responsible in that case to set - # cherrypy.request.process_body to False so this method isn't called. - h = cherrypy.serving.request.headers - if 'Content-Length' not in h and 'Transfer-Encoding' not in h: - raise cherrypy.HTTPError(411) - - self.fp = SizedReader(self.fp, self.length, - self.maxbytes, bufsize=self.bufsize, - has_trailers='Trailer' in h) - super(RequestBody, self).process() - - # Body params should also be a part of the request_params - # add them in here. - request_params = self.request_params - for key, value in self.params.items(): - # Python 2 only: keyword arguments must be byte strings (type 'str'). - if isinstance(key, unicode): - key = key.encode('ISO-8859-1') - - if key in request_params: - if not isinstance(request_params[key], list): - request_params[key] = [request_params[key]] - request_params[key].append(value) - else: - request_params[key] = value |