2008-09-09 12:56:01 +00:00
|
|
|
from unittest import TestCase, main
|
2012-08-30 11:24:29 -03:00
|
|
|
from scrapy.http import Response, XmlResponse
|
2015-04-20 21:23:05 -03:00
|
|
|
from scrapy.downloadermiddlewares.decompression import DecompressionMiddleware
|
2015-05-09 04:20:09 -03:00
|
|
|
from scrapy.spiders import Spider
|
2014-07-30 16:53:28 -03:00
|
|
|
from tests import get_testdata
|
2012-08-30 11:24:29 -03:00
|
|
|
from scrapy.utils.test import assert_samelines
|
2008-09-09 12:56:01 +00:00
|
|
|
|
2009-10-13 16:38:49 -02:00
|
|
|
|
|
|
|
def _test_data(formats):
|
2009-05-15 15:03:42 -03:00
|
|
|
uncompressed_body = get_testdata('compressed', 'feed-sample1.xml')
|
2008-09-09 12:56:01 +00:00
|
|
|
test_responses = {}
|
2009-01-08 18:29:56 +00:00
|
|
|
for format in formats:
|
2009-05-15 15:03:42 -03:00
|
|
|
body = get_testdata('compressed', 'feed-sample1.' + format)
|
2009-01-18 16:36:17 +00:00
|
|
|
test_responses[format] = Response('http://foo.com/bar', body=body)
|
2009-01-08 18:29:56 +00:00
|
|
|
return uncompressed_body, test_responses
|
2009-01-04 18:23:12 +00:00
|
|
|
|
2009-10-13 16:38:49 -02:00
|
|
|
|
|
|
|
class DecompressionMiddlewareTest(TestCase):
|
2015-04-20 21:23:05 -03:00
|
|
|
|
2009-10-13 16:38:49 -02:00
|
|
|
test_formats = ['tar', 'xml.bz2', 'xml.gz', 'zip']
|
|
|
|
uncompressed_body, test_responses = _test_data(test_formats)
|
|
|
|
|
|
|
|
def setUp(self):
|
|
|
|
self.mw = DecompressionMiddleware()
|
2013-12-28 00:47:32 +06:00
|
|
|
self.spider = Spider('foo')
|
2009-10-13 16:38:49 -02:00
|
|
|
|
|
|
|
def test_known_compression_formats(self):
|
|
|
|
for fmt in self.test_formats:
|
|
|
|
rsp = self.test_responses[fmt]
|
|
|
|
new = self.mw.process_response(None, rsp, self.spider)
|
|
|
|
assert isinstance(new, XmlResponse), \
|
|
|
|
'Failed %s, response type %s' % (fmt, type(new).__name__)
|
2012-08-30 11:24:29 -03:00
|
|
|
assert_samelines(self, new.body, self.uncompressed_body, fmt)
|
2009-10-13 16:38:49 -02:00
|
|
|
|
|
|
|
def test_plain_response(self):
|
|
|
|
rsp = Response(url='http://test.com', body=self.uncompressed_body)
|
|
|
|
new = self.mw.process_response(None, rsp, self.spider)
|
|
|
|
assert new is rsp
|
2012-08-30 11:24:29 -03:00
|
|
|
assert_samelines(self, new.body, rsp.body)
|
2009-10-13 16:38:49 -02:00
|
|
|
|
|
|
|
def test_empty_response(self):
|
2015-07-25 20:51:27 +00:00
|
|
|
rsp = Response(url='http://test.com', body=b'')
|
2009-10-13 16:38:49 -02:00
|
|
|
new = self.mw.process_response(None, rsp, self.spider)
|
|
|
|
assert new is rsp
|
|
|
|
assert not rsp.body
|
|
|
|
assert not new.body
|
|
|
|
|
|
|
|
def tearDown(self):
|
|
|
|
del self.mw
|
|
|
|
|
2009-01-04 18:23:12 +00:00
|
|
|
|
2008-09-09 12:56:01 +00:00
|
|
|
if __name__ == '__main__':
|
|
|
|
main()
|