|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553 |
- from __future__ import unicode_literals
-
- import copy
- import re
- import sys
- from io import BytesIO
- from itertools import chain
-
- from django.conf import settings
- from django.core import signing
- from django.core.exceptions import DisallowedHost, ImproperlyConfigured
- from django.core.files import uploadhandler
- from django.http.multipartparser import MultiPartParser, MultiPartParserError
- from django.utils import six
- from django.utils.datastructures import ImmutableList, MultiValueDict
- from django.utils.encoding import (
- escape_uri_path, force_bytes, force_str, force_text, iri_to_uri,
- )
- from django.utils.http import is_same_domain
- from django.utils.six.moves.urllib.parse import (
- parse_qsl, quote, urlencode, urljoin, urlsplit,
- )
-
- RAISE_ERROR = object()
- host_validation_re = re.compile(r"^([a-z0-9.-]+|\[[a-f0-9]*:[a-f0-9:]+\])(:\d+)?$")
-
-
- class UnreadablePostError(IOError):
- pass
-
-
- class RawPostDataException(Exception):
- """
- You cannot access raw_post_data from a request that has
- multipart/* POST data if it has been accessed via POST,
- FILES, etc..
- """
- pass
-
-
- class HttpRequest(object):
- """A basic HTTP request."""
-
- # The encoding used in GET/POST dicts. None means use default setting.
- _encoding = None
- _upload_handlers = []
-
- def __init__(self):
- # WARNING: The `WSGIRequest` subclass doesn't call `super`.
- # Any variable assignment made here should also happen in
- # `WSGIRequest.__init__()`.
-
- self.GET = QueryDict(mutable=True)
- self.POST = QueryDict(mutable=True)
- self.COOKIES = {}
- self.META = {}
- self.FILES = MultiValueDict()
-
- self.path = ''
- self.path_info = ''
- self.method = None
- self.resolver_match = None
- self._post_parse_error = False
-
- def __repr__(self):
- if self.method is None or not self.get_full_path():
- return force_str('<%s>' % self.__class__.__name__)
- return force_str(
- '<%s: %s %r>' % (self.__class__.__name__, self.method, force_str(self.get_full_path()))
- )
-
- def _get_raw_host(self):
- """
- Return the HTTP host using the environment or request headers. Skip
- allowed hosts protection, so may return an insecure host.
- """
- # We try three options, in order of decreasing preference.
- if settings.USE_X_FORWARDED_HOST and (
- 'HTTP_X_FORWARDED_HOST' in self.META):
- host = self.META['HTTP_X_FORWARDED_HOST']
- elif 'HTTP_HOST' in self.META:
- host = self.META['HTTP_HOST']
- else:
- # Reconstruct the host using the algorithm from PEP 333.
- host = self.META['SERVER_NAME']
- server_port = self.get_port()
- if server_port != ('443' if self.is_secure() else '80'):
- host = '%s:%s' % (host, server_port)
- return host
-
- def get_host(self):
- """Return the HTTP host using the environment or request headers."""
- host = self._get_raw_host()
-
- # There is no hostname validation when DEBUG=True
- if settings.DEBUG:
- return host
-
- domain, port = split_domain_port(host)
- if domain and validate_host(domain, settings.ALLOWED_HOSTS):
- return host
- else:
- msg = "Invalid HTTP_HOST header: %r." % host
- if domain:
- msg += " You may need to add %r to ALLOWED_HOSTS." % domain
- else:
- msg += " The domain name provided is not valid according to RFC 1034/1035."
- raise DisallowedHost(msg)
-
- def get_port(self):
- """Return the port number for the request as a string."""
- if settings.USE_X_FORWARDED_PORT and 'HTTP_X_FORWARDED_PORT' in self.META:
- port = self.META['HTTP_X_FORWARDED_PORT']
- else:
- port = self.META['SERVER_PORT']
- return str(port)
-
- def get_full_path(self, force_append_slash=False):
- # RFC 3986 requires query string arguments to be in the ASCII range.
- # Rather than crash if this doesn't happen, we encode defensively.
- return '%s%s%s' % (
- escape_uri_path(self.path),
- '/' if force_append_slash and not self.path.endswith('/') else '',
- ('?' + iri_to_uri(self.META.get('QUERY_STRING', ''))) if self.META.get('QUERY_STRING', '') else ''
- )
-
- def get_signed_cookie(self, key, default=RAISE_ERROR, salt='', max_age=None):
- """
- Attempts to return a signed cookie. If the signature fails or the
- cookie has expired, raises an exception... unless you provide the
- default argument in which case that value will be returned instead.
- """
- try:
- cookie_value = self.COOKIES[key]
- except KeyError:
- if default is not RAISE_ERROR:
- return default
- else:
- raise
- try:
- value = signing.get_cookie_signer(salt=key + salt).unsign(
- cookie_value, max_age=max_age)
- except signing.BadSignature:
- if default is not RAISE_ERROR:
- return default
- else:
- raise
- return value
-
- def get_raw_uri(self):
- """
- Return an absolute URI from variables available in this request. Skip
- allowed hosts protection, so may return insecure URI.
- """
- return '{scheme}://{host}{path}'.format(
- scheme=self.scheme,
- host=self._get_raw_host(),
- path=self.get_full_path(),
- )
-
- def build_absolute_uri(self, location=None):
- """
- Builds an absolute URI from the location and the variables available in
- this request. If no ``location`` is specified, the absolute URI is
- built on ``request.get_full_path()``. Anyway, if the location is
- absolute, it is simply converted to an RFC 3987 compliant URI and
- returned and if location is relative or is scheme-relative (i.e.,
- ``//example.com/``), it is urljoined to a base URL constructed from the
- request variables.
- """
- if location is None:
- # Make it an absolute url (but schemeless and domainless) for the
- # edge case that the path starts with '//'.
- location = '//%s' % self.get_full_path()
- bits = urlsplit(location)
- if not (bits.scheme and bits.netloc):
- current_uri = '{scheme}://{host}{path}'.format(scheme=self.scheme,
- host=self.get_host(),
- path=self.path)
- # Join the constructed URL with the provided location, which will
- # allow the provided ``location`` to apply query strings to the
- # base path as well as override the host, if it begins with //
- location = urljoin(current_uri, location)
- return iri_to_uri(location)
-
- def _get_scheme(self):
- """
- Hook for subclasses like WSGIRequest to implement. Returns 'http' by
- default.
- """
- return 'http'
-
- @property
- def scheme(self):
- if settings.SECURE_PROXY_SSL_HEADER:
- try:
- header, value = settings.SECURE_PROXY_SSL_HEADER
- except ValueError:
- raise ImproperlyConfigured(
- 'The SECURE_PROXY_SSL_HEADER setting must be a tuple containing two values.'
- )
- if self.META.get(header) == value:
- return 'https'
- return self._get_scheme()
-
- def is_secure(self):
- return self.scheme == 'https'
-
- def is_ajax(self):
- return self.META.get('HTTP_X_REQUESTED_WITH') == 'XMLHttpRequest'
-
- @property
- def encoding(self):
- return self._encoding
-
- @encoding.setter
- def encoding(self, val):
- """
- Sets the encoding used for GET/POST accesses. If the GET or POST
- dictionary has already been created, it is removed and recreated on the
- next access (so that it is decoded correctly).
- """
- self._encoding = val
- if hasattr(self, '_get'):
- del self._get
- if hasattr(self, '_post'):
- del self._post
-
- def _initialize_handlers(self):
- self._upload_handlers = [uploadhandler.load_handler(handler, self)
- for handler in settings.FILE_UPLOAD_HANDLERS]
-
- @property
- def upload_handlers(self):
- if not self._upload_handlers:
- # If there are no upload handlers defined, initialize them from settings.
- self._initialize_handlers()
- return self._upload_handlers
-
- @upload_handlers.setter
- def upload_handlers(self, upload_handlers):
- if hasattr(self, '_files'):
- raise AttributeError("You cannot set the upload handlers after the upload has been processed.")
- self._upload_handlers = upload_handlers
-
- def parse_file_upload(self, META, post_data):
- """Returns a tuple of (POST QueryDict, FILES MultiValueDict)."""
- self.upload_handlers = ImmutableList(
- self.upload_handlers,
- warning="You cannot alter upload handlers after the upload has been processed."
- )
- parser = MultiPartParser(META, post_data, self.upload_handlers, self.encoding)
- return parser.parse()
-
- @property
- def body(self):
- if not hasattr(self, '_body'):
- if self._read_started:
- raise RawPostDataException("You cannot access body after reading from request's data stream")
- try:
- self._body = self.read()
- except IOError as e:
- six.reraise(UnreadablePostError, UnreadablePostError(*e.args), sys.exc_info()[2])
- self._stream = BytesIO(self._body)
- return self._body
-
- def _mark_post_parse_error(self):
- self._post = QueryDict('')
- self._files = MultiValueDict()
- self._post_parse_error = True
-
- def _load_post_and_files(self):
- """Populate self._post and self._files if the content-type is a form type"""
- if self.method != 'POST':
- self._post, self._files = QueryDict('', encoding=self._encoding), MultiValueDict()
- return
- if self._read_started and not hasattr(self, '_body'):
- self._mark_post_parse_error()
- return
-
- if self.META.get('CONTENT_TYPE', '').startswith('multipart/form-data'):
- if hasattr(self, '_body'):
- # Use already read data
- data = BytesIO(self._body)
- else:
- data = self
- try:
- self._post, self._files = self.parse_file_upload(self.META, data)
- except MultiPartParserError:
- # An error occurred while parsing POST data. Since when
- # formatting the error the request handler might access
- # self.POST, set self._post and self._file to prevent
- # attempts to parse POST data again.
- # Mark that an error occurred. This allows self.__repr__ to
- # be explicit about it instead of simply representing an
- # empty POST
- self._mark_post_parse_error()
- raise
- elif self.META.get('CONTENT_TYPE', '').startswith('application/x-www-form-urlencoded'):
- self._post, self._files = QueryDict(self.body, encoding=self._encoding), MultiValueDict()
- else:
- self._post, self._files = QueryDict('', encoding=self._encoding), MultiValueDict()
-
- def close(self):
- if hasattr(self, '_files'):
- for f in chain.from_iterable(l[1] for l in self._files.lists()):
- f.close()
-
- # File-like and iterator interface.
- #
- # Expects self._stream to be set to an appropriate source of bytes by
- # a corresponding request subclass (e.g. WSGIRequest).
- # Also when request data has already been read by request.POST or
- # request.body, self._stream points to a BytesIO instance
- # containing that data.
-
- def read(self, *args, **kwargs):
- self._read_started = True
- try:
- return self._stream.read(*args, **kwargs)
- except IOError as e:
- six.reraise(UnreadablePostError, UnreadablePostError(*e.args), sys.exc_info()[2])
-
- def readline(self, *args, **kwargs):
- self._read_started = True
- try:
- return self._stream.readline(*args, **kwargs)
- except IOError as e:
- six.reraise(UnreadablePostError, UnreadablePostError(*e.args), sys.exc_info()[2])
-
- def xreadlines(self):
- while True:
- buf = self.readline()
- if not buf:
- break
- yield buf
-
- __iter__ = xreadlines
-
- def readlines(self):
- return list(iter(self))
-
-
- class QueryDict(MultiValueDict):
- """
- A specialized MultiValueDict which represents a query string.
-
- A QueryDict can be used to represent GET or POST data. It subclasses
- MultiValueDict since keys in such data can be repeated, for instance
- in the data from a form with a <select multiple> field.
-
- By default QueryDicts are immutable, though the copy() method
- will always return a mutable copy.
-
- Both keys and values set on this class are converted from the given encoding
- (DEFAULT_CHARSET by default) to unicode.
- """
-
- # These are both reset in __init__, but is specified here at the class
- # level so that unpickling will have valid values
- _mutable = True
- _encoding = None
-
- def __init__(self, query_string=None, mutable=False, encoding=None):
- super(QueryDict, self).__init__()
- if not encoding:
- encoding = settings.DEFAULT_CHARSET
- self.encoding = encoding
- if six.PY3:
- if isinstance(query_string, bytes):
- # query_string normally contains URL-encoded data, a subset of ASCII.
- try:
- query_string = query_string.decode(encoding)
- except UnicodeDecodeError:
- # ... but some user agents are misbehaving :-(
- query_string = query_string.decode('iso-8859-1')
- for key, value in parse_qsl(query_string or '',
- keep_blank_values=True,
- encoding=encoding):
- self.appendlist(key, value)
- else:
- for key, value in parse_qsl(query_string or '',
- keep_blank_values=True):
- try:
- value = value.decode(encoding)
- except UnicodeDecodeError:
- value = value.decode('iso-8859-1')
- self.appendlist(force_text(key, encoding, errors='replace'),
- value)
- self._mutable = mutable
-
- @property
- def encoding(self):
- if self._encoding is None:
- self._encoding = settings.DEFAULT_CHARSET
- return self._encoding
-
- @encoding.setter
- def encoding(self, value):
- self._encoding = value
-
- def _assert_mutable(self):
- if not self._mutable:
- raise AttributeError("This QueryDict instance is immutable")
-
- def __setitem__(self, key, value):
- self._assert_mutable()
- key = bytes_to_text(key, self.encoding)
- value = bytes_to_text(value, self.encoding)
- super(QueryDict, self).__setitem__(key, value)
-
- def __delitem__(self, key):
- self._assert_mutable()
- super(QueryDict, self).__delitem__(key)
-
- def __copy__(self):
- result = self.__class__('', mutable=True, encoding=self.encoding)
- for key, value in six.iterlists(self):
- result.setlist(key, value)
- return result
-
- def __deepcopy__(self, memo):
- result = self.__class__('', mutable=True, encoding=self.encoding)
- memo[id(self)] = result
- for key, value in six.iterlists(self):
- result.setlist(copy.deepcopy(key, memo), copy.deepcopy(value, memo))
- return result
-
- def setlist(self, key, list_):
- self._assert_mutable()
- key = bytes_to_text(key, self.encoding)
- list_ = [bytes_to_text(elt, self.encoding) for elt in list_]
- super(QueryDict, self).setlist(key, list_)
-
- def setlistdefault(self, key, default_list=None):
- self._assert_mutable()
- return super(QueryDict, self).setlistdefault(key, default_list)
-
- def appendlist(self, key, value):
- self._assert_mutable()
- key = bytes_to_text(key, self.encoding)
- value = bytes_to_text(value, self.encoding)
- super(QueryDict, self).appendlist(key, value)
-
- def pop(self, key, *args):
- self._assert_mutable()
- return super(QueryDict, self).pop(key, *args)
-
- def popitem(self):
- self._assert_mutable()
- return super(QueryDict, self).popitem()
-
- def clear(self):
- self._assert_mutable()
- super(QueryDict, self).clear()
-
- def setdefault(self, key, default=None):
- self._assert_mutable()
- key = bytes_to_text(key, self.encoding)
- default = bytes_to_text(default, self.encoding)
- return super(QueryDict, self).setdefault(key, default)
-
- def copy(self):
- """Returns a mutable copy of this object."""
- return self.__deepcopy__({})
-
- def urlencode(self, safe=None):
- """
- Returns an encoded string of all query string arguments.
-
- :arg safe: Used to specify characters which do not require quoting, for
- example::
-
- >>> q = QueryDict('', mutable=True)
- >>> q['next'] = '/a&b/'
- >>> q.urlencode()
- 'next=%2Fa%26b%2F'
- >>> q.urlencode(safe='/')
- 'next=/a%26b/'
- """
- output = []
- if safe:
- safe = force_bytes(safe, self.encoding)
- encode = lambda k, v: '%s=%s' % ((quote(k, safe), quote(v, safe)))
- else:
- encode = lambda k, v: urlencode({k: v})
- for k, list_ in self.lists():
- k = force_bytes(k, self.encoding)
- output.extend(encode(k, force_bytes(v, self.encoding))
- for v in list_)
- return '&'.join(output)
-
-
- # It's neither necessary nor appropriate to use
- # django.utils.encoding.smart_text for parsing URLs and form inputs. Thus,
- # this slightly more restricted function, used by QueryDict.
- def bytes_to_text(s, encoding):
- """
- Converts basestring objects to unicode, using the given encoding. Illegally
- encoded input characters are replaced with Unicode "unknown" codepoint
- (\ufffd).
-
- Returns any non-basestring objects without change.
- """
- if isinstance(s, bytes):
- return six.text_type(s, encoding, 'replace')
- else:
- return s
-
-
- def split_domain_port(host):
- """
- Return a (domain, port) tuple from a given host.
-
- Returned domain is lower-cased. If the host is invalid, the domain will be
- empty.
- """
- host = host.lower()
-
- if not host_validation_re.match(host):
- return '', ''
-
- if host[-1] == ']':
- # It's an IPv6 address without a port.
- return host, ''
- bits = host.rsplit(':', 1)
- if len(bits) == 2:
- return tuple(bits)
- return bits[0], ''
-
-
- def validate_host(host, allowed_hosts):
- """
- Validate the given host for this site.
-
- Check that the host looks valid and matches a host or host pattern in the
- given list of ``allowed_hosts``. Any pattern beginning with a period
- matches a domain and all its subdomains (e.g. ``.example.com`` matches
- ``example.com`` and any subdomain), ``*`` matches anything, and anything
- else must match exactly.
-
- Note: This function assumes that the given host is lower-cased and has
- already had the port, if any, stripped off.
-
- Return ``True`` for a valid host, ``False`` otherwise.
- """
- host = host[:-1] if host.endswith('.') else host
-
- for pattern in allowed_hosts:
- if pattern == '*' or is_same_domain(host, pattern):
- return True
-
- return False
|