mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-24 20:23:44 +00:00
103 lines
3.7 KiB
Python
103 lines
3.7 KiB
Python
import unittest
|
|
from twisted.internet import defer
|
|
from twisted.internet.error import TimeoutError, DNSLookupError, \
|
|
ConnectionRefusedError, ConnectionDone, ConnectError, \
|
|
ConnectionLost, TCPTimedOutError
|
|
|
|
from scrapy import twisted_version
|
|
from scrapy.downloadermiddlewares.retry import RetryMiddleware
|
|
from scrapy.xlib.tx import ResponseFailed
|
|
from scrapy.spiders import Spider
|
|
from scrapy.http import Request, Response
|
|
from scrapy.utils.test import get_crawler
|
|
|
|
|
|
class RetryTest(unittest.TestCase):
|
|
def setUp(self):
|
|
crawler = get_crawler(Spider)
|
|
self.spider = crawler._create_spider('foo')
|
|
self.mw = RetryMiddleware.from_crawler(crawler)
|
|
self.mw.max_retry_times = 2
|
|
|
|
def test_priority_adjust(self):
|
|
req = Request('http://www.scrapytest.org/503')
|
|
rsp = Response('http://www.scrapytest.org/503', body='', status=503)
|
|
req2 = self.mw.process_response(req, rsp, self.spider)
|
|
assert req2.priority < req.priority
|
|
|
|
def test_404(self):
|
|
req = Request('http://www.scrapytest.org/404')
|
|
rsp = Response('http://www.scrapytest.org/404', body='', status=404)
|
|
|
|
# dont retry 404s
|
|
assert self.mw.process_response(req, rsp, self.spider) is rsp
|
|
|
|
def test_dont_retry(self):
|
|
req = Request('http://www.scrapytest.org/503', meta={'dont_retry': True})
|
|
rsp = Response('http://www.scrapytest.org/503', body='', status=503)
|
|
|
|
# first retry
|
|
r = self.mw.process_response(req, rsp, self.spider)
|
|
assert r is rsp
|
|
|
|
# Test retry when dont_retry set to False
|
|
req = Request('http://www.scrapytest.org/503', meta={'dont_retry': False})
|
|
rsp = Response('http://www.scrapytest.org/503')
|
|
|
|
# first retry
|
|
r = self.mw.process_response(req, rsp, self.spider)
|
|
assert r is rsp
|
|
|
|
def test_dont_retry_exc(self):
|
|
req = Request('http://www.scrapytest.org/503', meta={'dont_retry': True})
|
|
|
|
r = self.mw.process_exception(req, DNSLookupError(), self.spider)
|
|
assert r is None
|
|
|
|
def test_503(self):
|
|
req = Request('http://www.scrapytest.org/503')
|
|
rsp = Response('http://www.scrapytest.org/503', body='', status=503)
|
|
|
|
# first retry
|
|
req = self.mw.process_response(req, rsp, self.spider)
|
|
assert isinstance(req, Request)
|
|
self.assertEqual(req.meta['retry_times'], 1)
|
|
|
|
# second retry
|
|
req = self.mw.process_response(req, rsp, self.spider)
|
|
assert isinstance(req, Request)
|
|
self.assertEqual(req.meta['retry_times'], 2)
|
|
|
|
# discard it
|
|
assert self.mw.process_response(req, rsp, self.spider) is rsp
|
|
|
|
def test_twistederrors(self):
|
|
exceptions = [defer.TimeoutError, TCPTimedOutError, TimeoutError,
|
|
DNSLookupError, ConnectionRefusedError, ConnectionDone,
|
|
ConnectError, ConnectionLost]
|
|
if twisted_version >= (11, 1, 0): # http11 available
|
|
exceptions.append(ResponseFailed)
|
|
|
|
for exc in exceptions:
|
|
req = Request('http://www.scrapytest.org/%s' % exc.__name__)
|
|
self._test_retry_exception(req, exc('foo'))
|
|
|
|
def _test_retry_exception(self, req, exception):
|
|
# first retry
|
|
req = self.mw.process_exception(req, exception, self.spider)
|
|
assert isinstance(req, Request)
|
|
self.assertEqual(req.meta['retry_times'], 1)
|
|
|
|
# second retry
|
|
req = self.mw.process_exception(req, exception, self.spider)
|
|
assert isinstance(req, Request)
|
|
self.assertEqual(req.meta['retry_times'], 2)
|
|
|
|
# discard it
|
|
req = self.mw.process_exception(req, exception, self.spider)
|
|
self.assertEqual(req, None)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main()
|