1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-22 20:04:09 +00:00
scrapy/tests/test_responsetypes.py

95 lines
4.2 KiB
Python
Raw Normal View History

# -*- coding: utf-8 -*-
import unittest
from scrapy.responsetypes import responsetypes
from scrapy.http import Response, TextResponse, XmlResponse, HtmlResponse, Headers
class ResponseTypesTest(unittest.TestCase):
def test_from_filename(self):
mappings = [
('data.bin', Response),
('file.txt', TextResponse),
('file.xml.gz', Response),
('file.xml', XmlResponse),
('file.html', HtmlResponse),
('file.unknownext', Response),
]
for source, cls in mappings:
retcls = responsetypes.from_filename(source)
assert retcls is cls, "%s ==> %s != %s" % (source, retcls, cls)
def test_from_content_disposition(self):
mappings = [
(b'attachment; filename="data.xml"', XmlResponse),
(b'attachment; filename=data.xml', XmlResponse),
(u'attachment;filename=data£.tar.gz'.encode('utf-8'), Response),
(u'attachment;filename=dataµ.tar.gz'.encode('latin-1'), Response),
(u'attachment;filename=data高.doc'.encode('gbk'), Response),
(u'attachment;filename=دورهdata.html'.encode('cp720'), HtmlResponse),
(u'attachment;filename=日本語版Wikipedia.xml'.encode('iso2022_jp'), XmlResponse),
]
for source, cls in mappings:
retcls = responsetypes.from_content_disposition(source)
assert retcls is cls, "%s ==> %s != %s" % (source, retcls, cls)
def test_from_content_type(self):
mappings = [
('text/html; charset=UTF-8', HtmlResponse),
('text/xml; charset=UTF-8', XmlResponse),
('application/xhtml+xml; charset=UTF-8', HtmlResponse),
('application/vnd.wap.xhtml+xml; charset=utf-8', HtmlResponse),
('application/xml; charset=UTF-8', XmlResponse),
('application/octet-stream', Response),
('application/x-json; encoding=UTF8;charset=UTF-8', TextResponse),
('application/json-amazonui-streaming;charset=UTF-8', TextResponse),
]
for source, cls in mappings:
retcls = responsetypes.from_content_type(source)
assert retcls is cls, "%s ==> %s != %s" % (source, retcls, cls)
def test_from_body(self):
mappings = [
2015-07-31 14:31:11 +02:00
(b'\x03\x02\xdf\xdd\x23', Response),
(b'Some plain text\ndata with tabs\t and null bytes\0', TextResponse),
(b'<html><head><title>Hello</title></head>', HtmlResponse),
(b'<?xml version="1.0" encoding="utf-8"', XmlResponse),
]
for source, cls in mappings:
retcls = responsetypes.from_body(source)
assert retcls is cls, "%s ==> %s != %s" % (source, retcls, cls)
def test_from_headers(self):
mappings = [
({'Content-Type': ['text/html; charset=utf-8']}, HtmlResponse),
({'Content-Type': ['application/octet-stream'], 'Content-Disposition': ['attachment; filename=data.txt']}, TextResponse),
({'Content-Type': ['text/html; charset=utf-8'], 'Content-Encoding': ['gzip']}, Response),
]
for source, cls in mappings:
source = Headers(source)
retcls = responsetypes.from_headers(source)
assert retcls is cls, "%s ==> %s != %s" % (source, retcls, cls)
def test_from_args(self):
# TODO: add more tests that check precedence between the different arguments
mappings = [
({'url': 'http://www.example.com/data.csv'}, TextResponse),
# headers takes precedence over url
({'headers': Headers({'Content-Type': ['text/html; charset=utf-8']}), 'url': 'http://www.example.com/item/'}, HtmlResponse),
({'headers': Headers({'Content-Disposition': ['attachment; filename="data.xml.gz"']}), 'url': 'http://www.example.com/page/'}, Response),
]
for source, cls in mappings:
retcls = responsetypes.from_args(**source)
assert retcls is cls, "%s ==> %s != %s" % (source, retcls, cls)
def test_custom_mime_types_loaded(self):
# check that mime.types files shipped with scrapy are loaded
self.assertEqual(responsetypes.mimetypes.guess_type('x.scrapytest')[0], 'x-scrapy/test')
if __name__ == "__main__":
unittest.main()