From dd3a46295c069561b0c278a8af0db784b57a6416 Mon Sep 17 00:00:00 2001 From: Jamey Sharp Date: Sun, 28 Dec 2014 19:21:45 -0800 Subject: [PATCH] Support "Cache-Control: max-stale" in requests. This allows spiders to be configured with the full RFC2616 cache policy, but avoid revalidation on a request-by-request basis, while remaining conformant with the HTTP spec. --- scrapy/extensions/httpcache.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/scrapy/extensions/httpcache.py b/scrapy/extensions/httpcache.py index 8011581ac..665ad3439 100644 --- a/scrapy/extensions/httpcache.py +++ b/scrapy/extensions/httpcache.py @@ -94,6 +94,25 @@ class RFC2616Policy(object): currentage = self._compute_current_age(cachedresponse, request, now) if currentage < freshnesslifetime: return True + + if 'max-stale' in ccreq and 'must-revalidate' not in cc: + # From RFC2616: "Indicates that the client is willing to + # accept a response that has exceeded its expiration time. + # If max-stale is assigned a value, then the client is + # willing to accept a response that has exceeded its + # expiration time by no more than the specified number of + # seconds. If no value is assigned to max-stale, then the + # client is willing to accept a stale response of any age." + staleage = ccreq['max-stale'] + if staleage is None: + return True + + try: + if currentage < freshnesslifetime + max(0, int(staleage)): + return True + except ValueError: + pass + # Cached response is stale, try to set validators if any self._set_conditional_validators(request, cachedresponse) return False