You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

374 lines
15 KiB

  1. """
  2. This module contains helper functions for controlling caching. It does so by
  3. managing the "Vary" header of responses. It includes functions to patch the
  4. header of response objects directly and decorators that change functions to do
  5. that header-patching themselves.
  6. For information on the Vary header, see:
  7. https://tools.ietf.org/html/rfc7231#section-7.1.4
  8. Essentially, the "Vary" HTTP header defines which headers a cache should take
  9. into account when building its cache key. Requests with the same path but
  10. different header content for headers named in "Vary" need to get different
  11. cache keys to prevent delivery of wrong content.
  12. An example: i18n middleware would need to distinguish caches by the
  13. "Accept-language" header.
  14. """
  15. from __future__ import unicode_literals
  16. import hashlib
  17. import logging
  18. import re
  19. import time
  20. from django.conf import settings
  21. from django.core.cache import caches
  22. from django.http import HttpResponse, HttpResponseNotModified
  23. from django.utils.encoding import force_bytes, force_text, iri_to_uri
  24. from django.utils.http import (
  25. http_date, parse_etags, parse_http_date_safe, quote_etag,
  26. )
  27. from django.utils.timezone import get_current_timezone_name
  28. from django.utils.translation import get_language
  29. cc_delim_re = re.compile(r'\s*,\s*')
  30. logger = logging.getLogger('django.request')
  31. def patch_cache_control(response, **kwargs):
  32. """
  33. This function patches the Cache-Control header by adding all
  34. keyword arguments to it. The transformation is as follows:
  35. * All keyword parameter names are turned to lowercase, and underscores
  36. are converted to hyphens.
  37. * If the value of a parameter is True (exactly True, not just a
  38. true value), only the parameter name is added to the header.
  39. * All other parameters are added with their value, after applying
  40. str() to it.
  41. """
  42. def dictitem(s):
  43. t = s.split('=', 1)
  44. if len(t) > 1:
  45. return (t[0].lower(), t[1])
  46. else:
  47. return (t[0].lower(), True)
  48. def dictvalue(t):
  49. if t[1] is True:
  50. return t[0]
  51. else:
  52. return '%s=%s' % (t[0], t[1])
  53. if response.has_header('Cache-Control'):
  54. cc = cc_delim_re.split(response['Cache-Control'])
  55. cc = dict(dictitem(el) for el in cc)
  56. else:
  57. cc = {}
  58. # If there's already a max-age header but we're being asked to set a new
  59. # max-age, use the minimum of the two ages. In practice this happens when
  60. # a decorator and a piece of middleware both operate on a given view.
  61. if 'max-age' in cc and 'max_age' in kwargs:
  62. kwargs['max_age'] = min(int(cc['max-age']), kwargs['max_age'])
  63. # Allow overriding private caching and vice versa
  64. if 'private' in cc and 'public' in kwargs:
  65. del cc['private']
  66. elif 'public' in cc and 'private' in kwargs:
  67. del cc['public']
  68. for (k, v) in kwargs.items():
  69. cc[k.replace('_', '-')] = v
  70. cc = ', '.join(dictvalue(el) for el in cc.items())
  71. response['Cache-Control'] = cc
  72. def get_max_age(response):
  73. """
  74. Returns the max-age from the response Cache-Control header as an integer
  75. (or ``None`` if it wasn't found or wasn't an integer.
  76. """
  77. if not response.has_header('Cache-Control'):
  78. return
  79. cc = dict(_to_tuple(el) for el in
  80. cc_delim_re.split(response['Cache-Control']))
  81. if 'max-age' in cc:
  82. try:
  83. return int(cc['max-age'])
  84. except (ValueError, TypeError):
  85. pass
  86. def set_response_etag(response):
  87. if not response.streaming:
  88. response['ETag'] = quote_etag(hashlib.md5(response.content).hexdigest())
  89. return response
  90. def _precondition_failed(request):
  91. logger.warning('Precondition Failed: %s', request.path,
  92. extra={
  93. 'status_code': 412,
  94. 'request': request,
  95. },
  96. )
  97. return HttpResponse(status=412)
  98. def _not_modified(request, response=None):
  99. if response:
  100. # We need to keep the cookies, see ticket #4994.
  101. cookies = response.cookies
  102. response = HttpResponseNotModified()
  103. response.cookies = cookies
  104. return response
  105. else:
  106. return HttpResponseNotModified()
  107. def get_conditional_response(request, etag=None, last_modified=None, response=None):
  108. # Get HTTP request headers
  109. if_modified_since = request.META.get('HTTP_IF_MODIFIED_SINCE')
  110. if if_modified_since:
  111. if_modified_since = parse_http_date_safe(if_modified_since)
  112. if_unmodified_since = request.META.get('HTTP_IF_UNMODIFIED_SINCE')
  113. if if_unmodified_since:
  114. if_unmodified_since = parse_http_date_safe(if_unmodified_since)
  115. if_none_match = request.META.get('HTTP_IF_NONE_MATCH')
  116. if_match = request.META.get('HTTP_IF_MATCH')
  117. etags = []
  118. if if_none_match or if_match:
  119. # There can be more than one ETag in the request, so we
  120. # consider the list of values.
  121. try:
  122. etags = parse_etags(if_none_match or if_match)
  123. except ValueError:
  124. # In case of an invalid ETag, ignore all ETag headers.
  125. # Apparently Opera sends invalidly quoted headers at times
  126. # (we should be returning a 400 response, but that's a
  127. # little extreme) -- this is bug #10681.
  128. if_none_match = None
  129. if_match = None
  130. # If-None-Match must be ignored if original result would be anything
  131. # other than a 2XX or 304 status. 304 status would result in no change.
  132. # http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.26
  133. if response and not (200 <= response.status_code < 300):
  134. if_none_match = None
  135. if_match = None
  136. # If-Modified-Since must be ignored if the original result was not a 200.
  137. # http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.25
  138. if response and response.status_code != 200:
  139. if_modified_since = None
  140. if_unmodified_since = None
  141. if not ((if_match and if_modified_since) or
  142. (if_none_match and if_unmodified_since) or
  143. (if_modified_since and if_unmodified_since) or
  144. (if_match and if_none_match)):
  145. # We only get here if no undefined combinations of headers are
  146. # specified.
  147. if ((if_none_match and (etag in etags or
  148. '*' in etags and etag)) and
  149. (not if_modified_since or
  150. (last_modified and if_modified_since and
  151. last_modified <= if_modified_since))):
  152. if request.method in ('GET', 'HEAD'):
  153. return _not_modified(request, response)
  154. else:
  155. return _precondition_failed(request)
  156. elif (if_match and ((not etag and '*' in etags) or
  157. (etag and etag not in etags) or
  158. (last_modified and if_unmodified_since and
  159. last_modified > if_unmodified_since))):
  160. return _precondition_failed(request)
  161. elif (not if_none_match and request.method in ('GET', 'HEAD') and
  162. last_modified and if_modified_since and
  163. last_modified <= if_modified_since):
  164. return _not_modified(request, response)
  165. elif (not if_match and
  166. last_modified and if_unmodified_since and
  167. last_modified > if_unmodified_since):
  168. return _precondition_failed(request)
  169. return response
  170. def patch_response_headers(response, cache_timeout=None):
  171. """
  172. Adds some useful headers to the given HttpResponse object:
  173. ETag, Last-Modified, Expires and Cache-Control
  174. Each header is only added if it isn't already set.
  175. cache_timeout is in seconds. The CACHE_MIDDLEWARE_SECONDS setting is used
  176. by default.
  177. """
  178. if cache_timeout is None:
  179. cache_timeout = settings.CACHE_MIDDLEWARE_SECONDS
  180. if cache_timeout < 0:
  181. cache_timeout = 0 # Can't have max-age negative
  182. if settings.USE_ETAGS and not response.has_header('ETag'):
  183. if hasattr(response, 'render') and callable(response.render):
  184. response.add_post_render_callback(set_response_etag)
  185. else:
  186. response = set_response_etag(response)
  187. if not response.has_header('Last-Modified'):
  188. response['Last-Modified'] = http_date()
  189. if not response.has_header('Expires'):
  190. response['Expires'] = http_date(time.time() + cache_timeout)
  191. patch_cache_control(response, max_age=cache_timeout)
  192. def add_never_cache_headers(response):
  193. """
  194. Adds headers to a response to indicate that a page should never be cached.
  195. """
  196. patch_response_headers(response, cache_timeout=-1)
  197. patch_cache_control(response, no_cache=True, no_store=True, must_revalidate=True)
  198. def patch_vary_headers(response, newheaders):
  199. """
  200. Adds (or updates) the "Vary" header in the given HttpResponse object.
  201. newheaders is a list of header names that should be in "Vary". Existing
  202. headers in "Vary" aren't removed.
  203. """
  204. # Note that we need to keep the original order intact, because cache
  205. # implementations may rely on the order of the Vary contents in, say,
  206. # computing an MD5 hash.
  207. if response.has_header('Vary'):
  208. vary_headers = cc_delim_re.split(response['Vary'])
  209. else:
  210. vary_headers = []
  211. # Use .lower() here so we treat headers as case-insensitive.
  212. existing_headers = set(header.lower() for header in vary_headers)
  213. additional_headers = [newheader for newheader in newheaders
  214. if newheader.lower() not in existing_headers]
  215. response['Vary'] = ', '.join(vary_headers + additional_headers)
  216. def has_vary_header(response, header_query):
  217. """
  218. Checks to see if the response has a given header name in its Vary header.
  219. """
  220. if not response.has_header('Vary'):
  221. return False
  222. vary_headers = cc_delim_re.split(response['Vary'])
  223. existing_headers = set(header.lower() for header in vary_headers)
  224. return header_query.lower() in existing_headers
  225. def _i18n_cache_key_suffix(request, cache_key):
  226. """If necessary, adds the current locale or time zone to the cache key."""
  227. if settings.USE_I18N or settings.USE_L10N:
  228. # first check if LocaleMiddleware or another middleware added
  229. # LANGUAGE_CODE to request, then fall back to the active language
  230. # which in turn can also fall back to settings.LANGUAGE_CODE
  231. cache_key += '.%s' % getattr(request, 'LANGUAGE_CODE', get_language())
  232. if settings.USE_TZ:
  233. # The datetime module doesn't restrict the output of tzname().
  234. # Windows is known to use non-standard, locale-dependent names.
  235. # User-defined tzinfo classes may return absolutely anything.
  236. # Hence this paranoid conversion to create a valid cache key.
  237. tz_name = force_text(get_current_timezone_name(), errors='ignore')
  238. cache_key += '.%s' % tz_name.encode('ascii', 'ignore').decode('ascii').replace(' ', '_')
  239. return cache_key
  240. def _generate_cache_key(request, method, headerlist, key_prefix):
  241. """Returns a cache key from the headers given in the header list."""
  242. ctx = hashlib.md5()
  243. for header in headerlist:
  244. value = request.META.get(header)
  245. if value is not None:
  246. ctx.update(force_bytes(value))
  247. url = hashlib.md5(force_bytes(iri_to_uri(request.build_absolute_uri())))
  248. cache_key = 'views.decorators.cache.cache_page.%s.%s.%s.%s' % (
  249. key_prefix, method, url.hexdigest(), ctx.hexdigest())
  250. return _i18n_cache_key_suffix(request, cache_key)
  251. def _generate_cache_header_key(key_prefix, request):
  252. """Returns a cache key for the header cache."""
  253. url = hashlib.md5(force_bytes(iri_to_uri(request.build_absolute_uri())))
  254. cache_key = 'views.decorators.cache.cache_header.%s.%s' % (
  255. key_prefix, url.hexdigest())
  256. return _i18n_cache_key_suffix(request, cache_key)
  257. def get_cache_key(request, key_prefix=None, method='GET', cache=None):
  258. """
  259. Returns a cache key based on the request URL and query. It can be used
  260. in the request phase because it pulls the list of headers to take into
  261. account from the global URL registry and uses those to build a cache key
  262. to check against.
  263. If there is no headerlist stored, the page needs to be rebuilt, so this
  264. function returns None.
  265. """
  266. if key_prefix is None:
  267. key_prefix = settings.CACHE_MIDDLEWARE_KEY_PREFIX
  268. cache_key = _generate_cache_header_key(key_prefix, request)
  269. if cache is None:
  270. cache = caches[settings.CACHE_MIDDLEWARE_ALIAS]
  271. headerlist = cache.get(cache_key)
  272. if headerlist is not None:
  273. return _generate_cache_key(request, method, headerlist, key_prefix)
  274. else:
  275. return None
  276. def learn_cache_key(request, response, cache_timeout=None, key_prefix=None, cache=None):
  277. """
  278. Learns what headers to take into account for some request URL from the
  279. response object. It stores those headers in a global URL registry so that
  280. later access to that URL will know what headers to take into account
  281. without building the response object itself. The headers are named in the
  282. Vary header of the response, but we want to prevent response generation.
  283. The list of headers to use for cache key generation is stored in the same
  284. cache as the pages themselves. If the cache ages some data out of the
  285. cache, this just means that we have to build the response once to get at
  286. the Vary header and so at the list of headers to use for the cache key.
  287. """
  288. if key_prefix is None:
  289. key_prefix = settings.CACHE_MIDDLEWARE_KEY_PREFIX
  290. if cache_timeout is None:
  291. cache_timeout = settings.CACHE_MIDDLEWARE_SECONDS
  292. cache_key = _generate_cache_header_key(key_prefix, request)
  293. if cache is None:
  294. cache = caches[settings.CACHE_MIDDLEWARE_ALIAS]
  295. if response.has_header('Vary'):
  296. is_accept_language_redundant = settings.USE_I18N or settings.USE_L10N
  297. # If i18n or l10n are used, the generated cache key will be suffixed
  298. # with the current locale. Adding the raw value of Accept-Language is
  299. # redundant in that case and would result in storing the same content
  300. # under multiple keys in the cache. See #18191 for details.
  301. headerlist = []
  302. for header in cc_delim_re.split(response['Vary']):
  303. header = header.upper().replace('-', '_')
  304. if header == 'ACCEPT_LANGUAGE' and is_accept_language_redundant:
  305. continue
  306. headerlist.append('HTTP_' + header)
  307. headerlist.sort()
  308. cache.set(cache_key, headerlist, cache_timeout)
  309. return _generate_cache_key(request, request.method, headerlist, key_prefix)
  310. else:
  311. # if there is no Vary header, we still need a cache key
  312. # for the request.build_absolute_uri()
  313. cache.set(cache_key, [], cache_timeout)
  314. return _generate_cache_key(request, request.method, [], key_prefix)
  315. def _to_tuple(s):
  316. t = s.split('=', 1)
  317. if len(t) == 2:
  318. return t[0].lower(), t[1]
  319. return t[0].lower(), True