1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-25 21:23:44 +00:00
scrapy/tests/test_downloadermiddleware.py

93 lines
3.4 KiB
Python
Raw Normal View History

from twisted.trial.unittest import TestCase
from twisted.python.failure import Failure
from scrapy.http import Request, Response
from scrapy.spiders import Spider
from scrapy.core.downloader.middleware import DownloaderMiddlewareManager
from scrapy.utils.test import get_crawler
class ManagerTestCase(TestCase):
settings_dict = None
def setUp(self):
self.crawler = get_crawler(Spider, self.settings_dict)
self.spider = self.crawler._create_spider('foo')
self.mwman = DownloaderMiddlewareManager.from_crawler(self.crawler)
# some mw depends on stats collector
self.crawler.stats.open_spider(self.spider)
return self.mwman.open_spider(self.spider)
def tearDown(self):
self.crawler.stats.close_spider(self.spider, '')
return self.mwman.close_spider(self.spider)
def _download(self, request, response=None):
"""Executes downloader mw manager's download method and returns
the result (Request or Response) or raise exception in case of
failure.
"""
if not response:
response = Response(request.url)
def download_func(**kwargs):
return response
dfd = self.mwman.download(download_func, request, self.spider)
# catch deferred result and return the value
results = []
dfd.addBoth(results.append)
self._wait(dfd)
ret = results[0]
if isinstance(ret, Failure):
ret.raiseException()
return ret
class DefaultsTest(ManagerTestCase):
"""Tests default behavior with default settings"""
def test_request_response(self):
req = Request('http://example.com/index.html')
resp = Response(req.url, status=200)
ret = self._download(req, resp)
self.assertTrue(isinstance(ret, Response), "Non-response returned")
def test_3xx_and_invalid_gzipped_body_must_redirect(self):
"""Regression test for a failure when redirecting a compressed
request.
This happens when httpcompression middleware is executed before redirect
middleware and attempts to decompress a non-compressed body.
In particular when some website returns a 30x response with header
'Content-Encoding: gzip' giving as result the error below:
exceptions.IOError: Not a gzipped file
"""
req = Request('http://example.com')
body = '<p>You are being redirected</p>'
resp = Response(req.url, status=302, body=body, headers={
'Content-Length': str(len(body)),
'Content-Type': 'text/html',
'Content-Encoding': 'gzip',
'Location': 'http://example.com/login',
})
ret = self._download(request=req, response=resp)
self.assertTrue(isinstance(ret, Request),
"Not redirected: {0!r}".format(ret))
self.assertEqual(ret.url, resp.headers['Location'],
"Not redirected to location header")
def test_200_and_invalid_gzipped_body_must_fail(self):
req = Request('http://example.com')
body = '<p>You are being redirected</p>'
resp = Response(req.url, status=200, body=body, headers={
'Content-Length': str(len(body)),
'Content-Type': 'text/html',
'Content-Encoding': 'gzip',
'Location': 'http://example.com/login',
})
self.assertRaises(IOError, self._download, request=req, response=resp)