mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-25 00:04:03 +00:00
Automated merge with http://hg.scrapy.org/scrapy-0.9
This commit is contained in:
commit
90a04f0530
@ -276,6 +276,17 @@ Number of seconds to use for HTTP cache expiration. Requests that were cached
|
||||
before this time will be re-downloaded. If zero, cached requests will always
|
||||
expire. A negative number means requests will never expire.
|
||||
|
||||
.. setting:: HTTPCACHE_IGNORE_HTTP_CODES
|
||||
|
||||
HTTPCACHE_IGNORE_HTTP_CODES
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
.. versionadded:: 0.10
|
||||
|
||||
Default: ``[]``
|
||||
|
||||
Don't cache response with these HTTP codes.
|
||||
|
||||
.. setting:: HTTPCACHE_IGNORE_MISSING
|
||||
|
||||
HTTPCACHE_IGNORE_MISSING
|
||||
|
@ -2,8 +2,8 @@
|
||||
Scrapy - a screen scraping framework written in Python
|
||||
"""
|
||||
|
||||
version_info = (0, 9, 0, '')
|
||||
__version__ = "0.9"
|
||||
version_info = (0, 10, 0, 'dev')
|
||||
__version__ = "0.10-dev"
|
||||
|
||||
import sys, os, warnings
|
||||
|
||||
|
@ -123,6 +123,7 @@ HTTPCACHE_DIR = ''
|
||||
HTTPCACHE_IGNORE_MISSING = False
|
||||
HTTPCACHE_STORAGE = 'scrapy.contrib.downloadermiddleware.httpcache.FilesystemCacheStorage'
|
||||
HTTPCACHE_EXPIRATION_SECS = 0
|
||||
HTTPCACHE_IGNORE_HTTP_CODES = []
|
||||
|
||||
ITEM_PROCESSOR = 'scrapy.contrib.pipeline.ItemPipelineManager'
|
||||
|
||||
|
@ -22,6 +22,7 @@ class HttpCacheMiddleware(object):
|
||||
def __init__(self, settings=conf.settings):
|
||||
self.storage = load_object(settings['HTTPCACHE_STORAGE'])(settings)
|
||||
self.ignore_missing = settings.getbool('HTTPCACHE_IGNORE_MISSING')
|
||||
self.ignore_http_codes = map(int, settings.getlist('HTTPCACHE_IGNORE_HTTP_CODES'))
|
||||
dispatcher.connect(self.spider_opened, signal=signals.spider_opened)
|
||||
dispatcher.connect(self.spider_closed, signal=signals.spider_closed)
|
||||
|
||||
@ -35,17 +36,20 @@ class HttpCacheMiddleware(object):
|
||||
if not self.is_cacheable(request):
|
||||
return
|
||||
response = self.storage.retrieve_response(spider, request)
|
||||
if response:
|
||||
if response and self.is_cacheable_response(response):
|
||||
response.flags.append('cached')
|
||||
return response
|
||||
elif self.ignore_missing:
|
||||
raise IgnoreRequest("Ignored request not in cache: %s" % request)
|
||||
|
||||
def process_response(self, request, response, spider):
|
||||
if self.is_cacheable(request):
|
||||
if self.is_cacheable(request) and self.is_cacheable_response(response):
|
||||
self.storage.store_response(spider, request, response)
|
||||
return response
|
||||
|
||||
def is_cacheable_response(self, response):
|
||||
return response.status not in self.ignore_http_codes
|
||||
|
||||
def is_cacheable(self, request):
|
||||
return urlparse_cached(request).scheme in ['http', 'https']
|
||||
|
||||
|
@ -24,6 +24,7 @@ class HttpCacheMiddlewareTest(unittest.TestCase):
|
||||
settings = {
|
||||
'HTTPCACHE_DIR': self.tmpdir,
|
||||
'HTTPCACHE_EXPIRATION_SECS': 1,
|
||||
'HTTPCACHE_IGNORE_HTTP_CODES': [],
|
||||
}
|
||||
settings.update(new_settings)
|
||||
return Settings(settings)
|
||||
@ -76,6 +77,22 @@ class HttpCacheMiddlewareTest(unittest.TestCase):
|
||||
self.assertEqualResponse(self.response, response)
|
||||
assert 'cached' in response.flags
|
||||
|
||||
def test_middleware_ignore_http_codes(self):
|
||||
# test response is not cached
|
||||
mw = self._get_middleware(HTTPCACHE_IGNORE_HTTP_CODES=[202])
|
||||
assert mw.process_request(self.request, self.spider) is None
|
||||
mw.process_response(self.request, self.response, self.spider)
|
||||
assert mw.storage.retrieve_response(self.spider, self.request) is None
|
||||
assert mw.process_request(self.request, self.spider) is None
|
||||
|
||||
# test response is cached
|
||||
mw = self._get_middleware(HTTPCACHE_IGNORE_HTTP_CODES=[203])
|
||||
mw.process_response(self.request, self.response, self.spider)
|
||||
response = mw.process_request(self.request, self.spider)
|
||||
assert isinstance(response, HtmlResponse)
|
||||
self.assertEqualResponse(self.response, response)
|
||||
assert 'cached' in response.flags
|
||||
|
||||
def assertEqualResponse(self, response1, response2):
|
||||
self.assertEqual(response1.url, response2.url)
|
||||
self.assertEqual(response1.status, response2.status)
|
||||
|
Loading…
x
Reference in New Issue
Block a user