2016-02-17 16:55:28 +01:00
|
|
|
|
# -*- coding: utf-8 -*-
|
2009-01-27 11:27:09 +00:00
|
|
|
|
import unittest
|
2011-08-07 02:49:57 -03:00
|
|
|
|
from scrapy.responsetypes import responsetypes
|
2009-01-27 11:27:09 +00:00
|
|
|
|
|
|
|
|
|
from scrapy.http import Response, TextResponse, XmlResponse, HtmlResponse, Headers
|
|
|
|
|
|
2019-11-18 17:16:09 +01:00
|
|
|
|
|
2009-01-27 11:27:09 +00:00
|
|
|
|
class ResponseTypesTest(unittest.TestCase):
|
|
|
|
|
|
|
|
|
|
def test_from_filename(self):
|
|
|
|
|
mappings = [
|
|
|
|
|
('data.bin', Response),
|
|
|
|
|
('file.txt', TextResponse),
|
|
|
|
|
('file.xml.gz', Response),
|
|
|
|
|
('file.xml', XmlResponse),
|
|
|
|
|
('file.html', HtmlResponse),
|
2009-01-27 13:08:07 +00:00
|
|
|
|
('file.unknownext', Response),
|
2009-01-27 11:27:09 +00:00
|
|
|
|
]
|
|
|
|
|
for source, cls in mappings:
|
|
|
|
|
retcls = responsetypes.from_filename(source)
|
|
|
|
|
assert retcls is cls, "%s ==> %s != %s" % (source, retcls, cls)
|
|
|
|
|
|
|
|
|
|
def test_from_content_disposition(self):
|
|
|
|
|
mappings = [
|
2016-02-17 16:55:28 +01:00
|
|
|
|
(b'attachment; filename="data.xml"', XmlResponse),
|
|
|
|
|
(b'attachment; filename=data.xml', XmlResponse),
|
|
|
|
|
(u'attachment;filename=data£.tar.gz'.encode('utf-8'), Response),
|
|
|
|
|
(u'attachment;filename=dataµ.tar.gz'.encode('latin-1'), Response),
|
|
|
|
|
(u'attachment;filename=data高.doc'.encode('gbk'), Response),
|
|
|
|
|
(u'attachment;filename=دورهdata.html'.encode('cp720'), HtmlResponse),
|
|
|
|
|
(u'attachment;filename=日本語版Wikipedia.xml'.encode('iso2022_jp'), XmlResponse),
|
|
|
|
|
|
2009-01-27 11:27:09 +00:00
|
|
|
|
]
|
|
|
|
|
for source, cls in mappings:
|
|
|
|
|
retcls = responsetypes.from_content_disposition(source)
|
|
|
|
|
assert retcls is cls, "%s ==> %s != %s" % (source, retcls, cls)
|
|
|
|
|
|
|
|
|
|
def test_from_content_type(self):
|
|
|
|
|
mappings = [
|
|
|
|
|
('text/html; charset=UTF-8', HtmlResponse),
|
|
|
|
|
('text/xml; charset=UTF-8', XmlResponse),
|
2009-03-10 17:51:03 +00:00
|
|
|
|
('application/xhtml+xml; charset=UTF-8', HtmlResponse),
|
2012-02-24 16:16:35 -02:00
|
|
|
|
('application/vnd.wap.xhtml+xml; charset=utf-8', HtmlResponse),
|
2009-01-27 11:27:09 +00:00
|
|
|
|
('application/xml; charset=UTF-8', XmlResponse),
|
|
|
|
|
('application/octet-stream', Response),
|
2015-07-02 19:51:49 +02:00
|
|
|
|
('application/x-json; encoding=UTF8;charset=UTF-8', TextResponse),
|
2015-09-18 17:16:43 +09:00
|
|
|
|
('application/json-amazonui-streaming;charset=UTF-8', TextResponse),
|
2009-01-27 11:27:09 +00:00
|
|
|
|
]
|
|
|
|
|
for source, cls in mappings:
|
|
|
|
|
retcls = responsetypes.from_content_type(source)
|
|
|
|
|
assert retcls is cls, "%s ==> %s != %s" % (source, retcls, cls)
|
|
|
|
|
|
|
|
|
|
def test_from_body(self):
|
|
|
|
|
mappings = [
|
2015-07-31 14:31:11 +02:00
|
|
|
|
(b'\x03\x02\xdf\xdd\x23', Response),
|
|
|
|
|
(b'Some plain text\ndata with tabs\t and null bytes\0', TextResponse),
|
|
|
|
|
(b'<html><head><title>Hello</title></head>', HtmlResponse),
|
|
|
|
|
(b'<?xml version="1.0" encoding="utf-8"', XmlResponse),
|
2009-01-27 11:27:09 +00:00
|
|
|
|
]
|
|
|
|
|
for source, cls in mappings:
|
|
|
|
|
retcls = responsetypes.from_body(source)
|
|
|
|
|
assert retcls is cls, "%s ==> %s != %s" % (source, retcls, cls)
|
2015-07-25 20:51:27 +00:00
|
|
|
|
|
2009-01-27 11:27:09 +00:00
|
|
|
|
def test_from_headers(self):
|
|
|
|
|
mappings = [
|
|
|
|
|
({'Content-Type': ['text/html; charset=utf-8']}, HtmlResponse),
|
2009-01-27 13:33:28 +00:00
|
|
|
|
({'Content-Type': ['application/octet-stream'], 'Content-Disposition': ['attachment; filename=data.txt']}, TextResponse),
|
2010-09-14 20:22:25 -03:00
|
|
|
|
({'Content-Type': ['text/html; charset=utf-8'], 'Content-Encoding': ['gzip']}, Response),
|
2009-01-27 11:27:09 +00:00
|
|
|
|
]
|
|
|
|
|
for source, cls in mappings:
|
|
|
|
|
source = Headers(source)
|
|
|
|
|
retcls = responsetypes.from_headers(source)
|
|
|
|
|
assert retcls is cls, "%s ==> %s != %s" % (source, retcls, cls)
|
2009-01-27 13:08:07 +00:00
|
|
|
|
|
2009-01-29 18:52:46 +00:00
|
|
|
|
def test_from_args(self):
|
|
|
|
|
# TODO: add more tests that check precedence between the different arguments
|
|
|
|
|
mappings = [
|
|
|
|
|
({'url': 'http://www.example.com/data.csv'}, TextResponse),
|
|
|
|
|
# headers takes precedence over url
|
|
|
|
|
({'headers': Headers({'Content-Type': ['text/html; charset=utf-8']}), 'url': 'http://www.example.com/item/'}, HtmlResponse),
|
2009-01-30 11:14:08 +00:00
|
|
|
|
({'headers': Headers({'Content-Disposition': ['attachment; filename="data.xml.gz"']}), 'url': 'http://www.example.com/page/'}, Response),
|
|
|
|
|
|
|
|
|
|
|
2009-01-29 18:52:46 +00:00
|
|
|
|
]
|
|
|
|
|
for source, cls in mappings:
|
|
|
|
|
retcls = responsetypes.from_args(**source)
|
|
|
|
|
assert retcls is cls, "%s ==> %s != %s" % (source, retcls, cls)
|
|
|
|
|
|
2010-12-20 15:16:21 -02:00
|
|
|
|
def test_custom_mime_types_loaded(self):
|
|
|
|
|
# check that mime.types files shipped with scrapy are loaded
|
|
|
|
|
self.assertEqual(responsetypes.mimetypes.guess_type('x.scrapytest')[0], 'x-scrapy/test')
|
|
|
|
|
|
2009-01-27 11:27:09 +00:00
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
unittest.main()
|