diff options
author | florent <florent.pigout@gmail.com> | 2011-07-09 00:33:26 (GMT) |
---|---|---|
committer | florent <florent.pigout@gmail.com> | 2011-07-09 00:33:26 (GMT) |
commit | 0767eedcd06485f30ee6b00df348b22847c7c7ad (patch) | |
tree | de339586453b0b638889ec607f4ded7de2edc05a /werkzeug/formparser.py | |
parent | 89198c864831bea0a17f136b897aebc59f606166 (diff) |
make the flask based tools more clean for a nicer use -> move requirement to lib dir + limit import code to the minimumHEADmaster
Diffstat (limited to 'werkzeug/formparser.py')
-rw-r--r-- | werkzeug/formparser.py | 352 |
1 files changed, 0 insertions, 352 deletions
diff --git a/werkzeug/formparser.py b/werkzeug/formparser.py deleted file mode 100644 index 54e952f..0000000 --- a/werkzeug/formparser.py +++ /dev/null @@ -1,352 +0,0 @@ -# -*- coding: utf-8 -*- -""" - werkzeug.formparser - ~~~~~~~~~~~~~~~~~~~ - - This module implements the form parsing. It supports url-encoded forms - as well as non-nested multipart uploads. - - :copyright: (c) 2010 by the Werkzeug Team, see AUTHORS for more details. - :license: BSD, see LICENSE for more details. -""" -import re -from cStringIO import StringIO -from tempfile import TemporaryFile -from itertools import chain, repeat - -from werkzeug._internal import _decode_unicode, _empty_stream - - -#: an iterator that yields empty strings -_empty_string_iter = repeat('') - -#: a regular expression for multipart boundaries -_multipart_boundary_re = re.compile('^[ -~]{0,200}[!-~]$') - -#: supported http encodings that are also available in python we support -#: for multipart messages. -_supported_multipart_encodings = frozenset(['base64', 'quoted-printable']) - - -def default_stream_factory(total_content_length, filename, content_type, - content_length=None): - """The stream factory that is used per default.""" - if total_content_length > 1024 * 500: - return TemporaryFile('wb+') - return StringIO() - - -def parse_form_data(environ, stream_factory=None, charset='utf-8', - errors='ignore', max_form_memory_size=None, - max_content_length=None, cls=None, - silent=True): - """Parse the form data in the environ and return it as tuple in the form - ``(stream, form, files)``. You should only call this method if the - transport method is `POST` or `PUT`. - - If the mimetype of the data transmitted is `multipart/form-data` the - files multidict will be filled with `FileStorage` objects. If the - mimetype is unknown the input stream is wrapped and returned as first - argument, else the stream is empty. - - This function does not raise exceptions, even if the input data is - malformed. - - Have a look at :ref:`dealing-with-request-data` for more details. - - .. versionadded:: 0.5 - The `max_form_memory_size`, `max_content_length` and - `cls` parameters were added. - - .. versionadded:: 0.5.1 - The optional `silent` flag was added. - - :param environ: the WSGI environment to be used for parsing. - :param stream_factory: An optional callable that returns a new read and - writeable file descriptor. This callable works - the same as :meth:`~BaseResponse._get_file_stream`. - :param charset: The character set for URL and url encoded form data. - :param errors: The encoding error behavior. - :param max_form_memory_size: the maximum number of bytes to be accepted for - in-memory stored form data. If the data - exceeds the value specified an - :exc:`~exceptions.RequestURITooLarge` - exception is raised. - :param max_content_length: If this is provided and the transmitted data - is longer than this value an - :exc:`~exceptions.RequestEntityTooLarge` - exception is raised. - :param cls: an optional dict class to use. If this is not specified - or `None` the default :class:`MultiDict` is used. - :param silent: If set to False parsing errors will not be caught. - :return: A tuple in the form ``(stream, form, files)``. - """ - content_type, extra = parse_options_header(environ.get('CONTENT_TYPE', '')) - try: - content_length = int(environ['CONTENT_LENGTH']) - except (KeyError, ValueError): - content_length = 0 - - if cls is None: - cls = MultiDict - - if max_content_length is not None and content_length > max_content_length: - raise RequestEntityTooLarge() - - stream = _empty_stream - files = () - - if content_type == 'multipart/form-data': - try: - form, files = parse_multipart(environ['wsgi.input'], - extra.get('boundary'), - content_length, stream_factory, - charset, errors, - max_form_memory_size=max_form_memory_size) - except ValueError, e: - if not silent: - raise - form = cls() - else: - form = cls(form) - elif content_type == 'application/x-www-form-urlencoded' or \ - content_type == 'application/x-url-encoded': - if max_form_memory_size is not None and \ - content_length > max_form_memory_size: - raise RequestEntityTooLarge() - form = url_decode(environ['wsgi.input'].read(content_length), - charset, errors=errors, cls=cls) - else: - form = cls() - stream = LimitedStream(environ['wsgi.input'], content_length) - - return stream, form, cls(files) - - -def _fix_ie_filename(filename): - """Internet Explorer 6 transmits the full file name if a file is - uploaded. This function strips the full path if it thinks the - filename is Windows-like absolute. - """ - if filename[1:3] == ':\\' or filename[:2] == '\\\\': - return filename.split('\\')[-1] - return filename - - -def _line_parse(line): - """Removes line ending characters and returns a tuple (`stripped_line`, - `is_terminated`). - """ - if line[-2:] == '\r\n': - return line[:-2], True - elif line[-1:] in '\r\n': - return line[:-1], True - return line, False - - -def _find_terminator(iterator): - """The terminator might have some additional newlines before it. - There is at least one application that sends additional newlines - before headers (the python setuptools package). - """ - for line in iterator: - if not line: - break - line = line.strip() - if line: - return line - return '' - - -def is_valid_multipart_boundary(boundary): - """Checks if the string given is a valid multipart boundary.""" - return _multipart_boundary_re.match(boundary) is not None - - -def parse_multipart(file, boundary, content_length, stream_factory=None, - charset='utf-8', errors='ignore', buffer_size=10 * 1024, - max_form_memory_size=None): - """Parse a multipart/form-data stream. This is invoked by - :func:`utils.parse_form_data` if the content type matches. Currently it - exists for internal usage only, but could be exposed as separate - function if it turns out to be useful and if we consider the API stable. - """ - # XXX: this function does not support multipart/mixed. I don't know of - # any browser that supports this, but it should be implemented - # nonetheless. - - # make sure the buffer size is divisible by four so that we can base64 - # decode chunk by chunk - assert buffer_size % 4 == 0, 'buffer size has to be divisible by 4' - # also the buffer size has to be at least 1024 bytes long or long headers - # will freak out the system - assert buffer_size >= 1024, 'buffer size has to be at least 1KB' - - if stream_factory is None: - stream_factory = default_stream_factory - - if not boundary: - raise ValueError('Missing boundary') - if not is_valid_multipart_boundary(boundary): - raise ValueError('Invalid boundary: %s' % boundary) - if len(boundary) > buffer_size: # pragma: no cover - # this should never happen because we check for a minimum size - # of 1024 and boundaries may not be longer than 200. The only - # situation when this happen is for non debug builds where - # the assert i skipped. - raise ValueError('Boundary longer than buffer size') - - total_content_length = content_length - next_part = '--' + boundary - last_part = next_part + '--' - - form = [] - files = [] - in_memory = 0 - - # convert the file into a limited stream with iteration capabilities - file = LimitedStream(file, content_length) - iterator = chain(make_line_iter(file, buffer_size=buffer_size), - _empty_string_iter) - - try: - terminator = _find_terminator(iterator) - if terminator != next_part: - raise ValueError('Expected boundary at start of multipart data') - - while terminator != last_part: - headers = parse_multipart_headers(iterator) - disposition = headers.get('content-disposition') - if disposition is None: - raise ValueError('Missing Content-Disposition header') - disposition, extra = parse_options_header(disposition) - name = extra.get('name') - - transfer_encoding = headers.get('content-transfer-encoding') - try_decode = transfer_encoding is not None and \ - transfer_encoding in _supported_multipart_encodings - - filename = extra.get('filename') - - # if no content type is given we stream into memory. A list is - # used as a temporary container. - if filename is None: - is_file = False - container = [] - _write = container.append - guard_memory = max_form_memory_size is not None - - # otherwise we parse the rest of the headers and ask the stream - # factory for something we can write in. - else: - content_type = headers.get('content-type') - content_type = parse_options_header(content_type)[0] \ - or 'text/plain' - is_file = True - guard_memory = False - if filename is not None: - filename = _fix_ie_filename(_decode_unicode(filename, - charset, - errors)) - try: - content_length = int(headers['content-length']) - except (KeyError, ValueError): - content_length = 0 - container = stream_factory(total_content_length, content_type, - filename, content_length) - _write = container.write - - buf = '' - for line in iterator: - if not line: - raise ValueError('unexpected end of stream') - - if line[:2] == '--': - terminator = line.rstrip() - if terminator in (next_part, last_part): - break - - if try_decode: - try: - line = line.decode(transfer_encoding) - except: - raise ValueError('could not decode transfer ' - 'encoded chunk') - - # we have something in the buffer from the last iteration. - # this is usually a newline delimiter. - if buf: - _write(buf) - buf = '' - - # If the line ends with windows CRLF we write everything except - # the last two bytes. In all other cases however we write - # everything except the last byte. If it was a newline, that's - # fine, otherwise it does not matter because we will write it - # the next iteration. this ensures we do not write the - # final newline into the stream. That way we do not have to - # truncate the stream. - if line[-2:] == '\r\n': - buf = '\r\n' - cutoff = -2 - else: - buf = line[-1] - cutoff = -1 - _write(line[:cutoff]) - - # if we write into memory and there is a memory size limit we - # count the number of bytes in memory and raise an exception if - # there is too much data in memory. - if guard_memory: - in_memory += len(line) - if in_memory > max_form_memory_size: - from werkzeug.exceptions import RequestEntityTooLarge - raise RequestEntityTooLarge() - else: # pragma: no cover - raise ValueError('unexpected end of part') - - if is_file: - container.seek(0) - files.append((name, FileStorage(container, filename, name, - content_type, - content_length, headers))) - else: - form.append((name, _decode_unicode(''.join(container), - charset, errors))) - finally: - # make sure the whole input stream is read - file.exhaust() - - return form, files - - -def parse_multipart_headers(iterable): - """Parses multipart headers from an iterable that yields lines (including - the trailing newline symbol. - """ - result = [] - for line in iterable: - line, line_terminated = _line_parse(line) - if not line_terminated: - raise ValueError('unexpected end of line in multipart header') - if not line: - break - elif line[0] in ' \t' and result: - key, value = result[-1] - result[-1] = (key, value + '\n ' + line[1:]) - else: - parts = line.split(':', 1) - if len(parts) == 2: - result.append((parts[0].strip(), parts[1].strip())) - - # we link the list to the headers, no need to create a copy, the - # list was not shared anyways. - return Headers.linked(result) - - -# circular dependencies -from werkzeug.urls import url_decode -from werkzeug.wsgi import LimitedStream, make_line_iter -from werkzeug.exceptions import RequestEntityTooLarge -from werkzeug.datastructures import Headers, FileStorage, MultiDict -from werkzeug.http import parse_options_header |