mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-23 22:44:03 +00:00
Fix for #612 + integration-style tests for HttpErrorMiddleware
This commit is contained in:
parent
364790eb07
commit
75060d1424
@ -25,7 +25,7 @@ class HttpErrorMiddleware(object):
|
||||
self.handle_httpstatus_list = settings.getlist('HTTPERROR_ALLOWED_CODES')
|
||||
|
||||
def process_spider_input(self, response, spider):
|
||||
if 200 <= response.status < 300: # common case
|
||||
if 200 <= response.status < 300: # common case
|
||||
return
|
||||
meta = response.meta
|
||||
if 'handle_httpstatus_all' in meta:
|
||||
@ -38,11 +38,14 @@ class HttpErrorMiddleware(object):
|
||||
allowed_statuses = getattr(spider, 'handle_httpstatus_list', self.handle_httpstatus_list)
|
||||
if response.status in allowed_statuses:
|
||||
return
|
||||
log.msg(format="Ignoring HTTP response code: not handled or not allowed: %(status_code)d",
|
||||
level=log.DEBUG, spider=spider,
|
||||
status_code=response.status)
|
||||
raise HttpError(response, 'Ignoring non-200 response')
|
||||
|
||||
def process_spider_exception(self, response, exception, spider):
|
||||
if isinstance(exception, HttpError):
|
||||
log.msg(
|
||||
format="Ignoring response %(response)r: HTTP status code is not handled or not allowed",
|
||||
level=log.DEBUG,
|
||||
spider=spider,
|
||||
response=response
|
||||
)
|
||||
return []
|
||||
|
@ -1,11 +1,51 @@
|
||||
from unittest import TestCase
|
||||
|
||||
from twisted.trial.unittest import TestCase as TrialTestCase
|
||||
from twisted.internet import defer
|
||||
|
||||
from scrapy.utils.test import docrawl, get_testlog
|
||||
from scrapy.tests.mockserver import MockServer
|
||||
from scrapy.http import Response, Request
|
||||
from scrapy.spider import Spider
|
||||
from scrapy.contrib.spidermiddleware.httperror import HttpErrorMiddleware, HttpError
|
||||
from scrapy.settings import Settings
|
||||
|
||||
|
||||
class _HttpErrorSpider(Spider):
|
||||
name = 'httperror'
|
||||
start_urls = [
|
||||
"http://localhost:8998/status?n=200",
|
||||
"http://localhost:8998/status?n=404",
|
||||
"http://localhost:8998/status?n=402",
|
||||
"http://localhost:8998/status?n=500",
|
||||
]
|
||||
bypass_status_codes = set()
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(_HttpErrorSpider, self).__init__(*args, **kwargs)
|
||||
self.failed = set()
|
||||
self.skipped = set()
|
||||
self.parsed = set()
|
||||
|
||||
def start_requests(self):
|
||||
for url in self.start_urls:
|
||||
yield Request(url, self.parse, errback=self.on_error)
|
||||
|
||||
def parse(self, response):
|
||||
self.parsed.add(response.url[-3:])
|
||||
|
||||
def on_error(self, failure):
|
||||
if isinstance(failure.value, HttpError):
|
||||
response = failure.value.response
|
||||
if response.status in self.bypass_status_codes:
|
||||
self.skipped.add(response.url[-3:])
|
||||
return self.parse(response)
|
||||
|
||||
# it assumes there is a response attached to failure
|
||||
self.failed.add(failure.value.response.url[-3:])
|
||||
return failure
|
||||
|
||||
|
||||
def _responses(request, status_codes):
|
||||
responses = []
|
||||
for code in status_codes:
|
||||
@ -48,6 +88,7 @@ class TestHttpErrorMiddleware(TestCase):
|
||||
self.assertEquals(None,
|
||||
self.mw.process_spider_input(self.res404, self.spider))
|
||||
|
||||
|
||||
class TestHttpErrorMiddlewareSettings(TestCase):
|
||||
"""Similar test, but with settings"""
|
||||
|
||||
@ -85,6 +126,7 @@ class TestHttpErrorMiddlewareSettings(TestCase):
|
||||
self.assertRaises(HttpError,
|
||||
self.mw.process_spider_input, self.res402, self.spider)
|
||||
|
||||
|
||||
class TestHttpErrorMiddlewareHandleAll(TestCase):
|
||||
|
||||
def setUp(self):
|
||||
@ -112,3 +154,34 @@ class TestHttpErrorMiddlewareHandleAll(TestCase):
|
||||
self.assertRaises(HttpError,
|
||||
self.mw.process_spider_input, res402, self.spider)
|
||||
|
||||
|
||||
class TestHttpErrorMiddlewareIntegrational(TrialTestCase):
|
||||
def setUp(self):
|
||||
self.mockserver = MockServer()
|
||||
self.mockserver.__enter__()
|
||||
|
||||
def tearDown(self):
|
||||
self.mockserver.__exit__(None, None, None)
|
||||
|
||||
@defer.inlineCallbacks
|
||||
def test_middleware_works(self):
|
||||
spider = _HttpErrorSpider()
|
||||
yield docrawl(spider)
|
||||
assert not spider.skipped, spider.skipped
|
||||
self.assertEqual(spider.parsed, {'200'})
|
||||
self.assertEqual(spider.failed, {'404', '402', '500'})
|
||||
|
||||
@defer.inlineCallbacks
|
||||
def test_logging(self):
|
||||
spider = _HttpErrorSpider(bypass_status_codes={402})
|
||||
yield docrawl(spider)
|
||||
# print(get_testlog())
|
||||
self.assertEqual(spider.parsed, {'200', '402'})
|
||||
self.assertEqual(spider.skipped, {'402'})
|
||||
self.assertEqual(spider.failed, {'404', '500'})
|
||||
|
||||
log = get_testlog()
|
||||
self.assertIn('Ignoring response <404', log)
|
||||
self.assertIn('Ignoring response <500', log)
|
||||
self.assertNotIn('Ignoring response <200', log)
|
||||
self.assertNotIn('Ignoring response <402', log)
|
||||
|
Loading…
x
Reference in New Issue
Block a user