scrapy/tests/test_responsetypes.py

# -*- coding: utf-8 -*-
import unittest
from scrapy.responsetypes import responsetypes

from scrapy.http import Response, TextResponse, XmlResponse, HtmlResponse, Headers


class ResponseTypesTest(unittest.TestCase):

    def test_from_filename(self):
        mappings = [
            ('data.bin', Response),
            ('file.txt', TextResponse),
            ('file.xml.gz', Response),
            ('file.xml', XmlResponse),
            ('file.html', HtmlResponse),
            ('file.unknownext', Response),
        ]
        for source, cls in mappings:
            retcls = responsetypes.from_filename(source)
            assert retcls is cls, "%s ==> %s != %s" % (source, retcls, cls)

    def test_from_content_disposition(self):
        mappings = [
            (b'attachment; filename="data.xml"', XmlResponse),
            (b'attachment; filename=data.xml', XmlResponse),
            (u'attachment;filename=data£.tar.gz'.encode('utf-8'), Response),
            (u'attachment;filename=dataµ.tar.gz'.encode('latin-1'), Response),
            (u'attachment;filename=data高.doc'.encode('gbk'), Response),
            (u'attachment;filename=دورهdata.html'.encode('cp720'), HtmlResponse),
            (u'attachment;filename=日本語版Wikipedia.xml'.encode('iso2022_jp'), XmlResponse),

        ]
        for source, cls in mappings:
            retcls = responsetypes.from_content_disposition(source)
            assert retcls is cls, "%s ==> %s != %s" % (source, retcls, cls)

    def test_from_content_type(self):
        mappings = [
            ('text/html; charset=UTF-8', HtmlResponse),
            ('text/xml; charset=UTF-8', XmlResponse),
            ('application/xhtml+xml; charset=UTF-8', HtmlResponse),
            ('application/vnd.wap.xhtml+xml; charset=utf-8', HtmlResponse),
            ('application/xml; charset=UTF-8', XmlResponse),
            ('application/octet-stream', Response),
            ('application/x-json; encoding=UTF8;charset=UTF-8', TextResponse),
            ('application/json-amazonui-streaming;charset=UTF-8', TextResponse),
        ]
        for source, cls in mappings:
            retcls = responsetypes.from_content_type(source)
            assert retcls is cls, "%s ==> %s != %s" % (source, retcls, cls)

    def test_from_body(self):
        mappings = [
            (b'\x03\x02\xdf\xdd\x23', Response),
            (b'Some plain text\ndata with tabs\t and null bytes\0', TextResponse),
            (b'<html><head><title>Hello</title></head>', HtmlResponse),
            (b'<?xml version="1.0" encoding="utf-8"', XmlResponse),
        ]
        for source, cls in mappings:
            retcls = responsetypes.from_body(source)
            assert retcls is cls, "%s ==> %s != %s" % (source, retcls, cls)

    def test_from_headers(self):
        mappings = [
            ({'Content-Type': ['text/html; charset=utf-8']}, HtmlResponse),
            ({'Content-Type': ['application/octet-stream'], 'Content-Disposition': ['attachment; filename=data.txt']}, TextResponse),
            ({'Content-Type': ['text/html; charset=utf-8'], 'Content-Encoding': ['gzip']}, Response),
        ]
        for source, cls in mappings:
            source = Headers(source)
            retcls = responsetypes.from_headers(source)
            assert retcls is cls, "%s ==> %s != %s" % (source, retcls, cls)

    def test_from_args(self):
        # TODO: add more tests that check precedence between the different arguments
        mappings = [
            ({'url': 'http://www.example.com/data.csv'}, TextResponse),
            # headers takes precedence over url
            ({'headers': Headers({'Content-Type': ['text/html; charset=utf-8']}), 'url': 'http://www.example.com/item/'}, HtmlResponse),
            ({'headers': Headers({'Content-Disposition': ['attachment; filename="data.xml.gz"']}), 'url': 'http://www.example.com/page/'}, Response),


        ]
        for source, cls in mappings:
            retcls = responsetypes.from_args(**source)
            assert retcls is cls, "%s ==> %s != %s" % (source, retcls, cls)

    def test_custom_mime_types_loaded(self):
        # check that mime.types files shipped with scrapy are loaded
        self.assertEqual(responsetypes.mimetypes.guess_type('x.scrapytest')[0], 'x-scrapy/test')

if __name__ == "__main__":
    unittest.main()
-												More liberal Content-Disposition header parsing

Fixes #1782

											
										
										
											2016-02-17 16:55:28 +01:00
+								# -*- coding: utf-8 -*-
-												added more cases to ResponseTypes, and tests for ResponseTypes

--HG--
extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40781

											
										
										
											2009-01-27 11:27:09 +00:00
+								import unittest
-												moved module scrapy.core.downloader.responsetypes to scrapy.responsetypes

--HG--
rename : scrapy/core/downloader/responsetypes/mime.types => scrapy/mime.types
rename : scrapy/core/downloader/responsetypes/__init__.py => scrapy/responsetypes.py

											
										
										
											2011-08-07 02:49:57 -03:00
+								from scrapy.responsetypes import responsetypes
-												added more cases to ResponseTypes, and tests for ResponseTypes

--HG--
extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40781

											
										
										
											2009-01-27 11:27:09 +00:00
 								from scrapy.http import Response, TextResponse, XmlResponse, HtmlResponse, Headers
-												Add missing blank lines between functions and classes

Also fixed 2 unrelated Flake8 issues

											
										
										
											2019-11-18 17:16:09 +01:00
-												added more cases to ResponseTypes, and tests for ResponseTypes

--HG--
extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40781

											
										
										
											2009-01-27 11:27:09 +00:00
+								class ResponseTypesTest(unittest.TestCase):
 								    def test_from_filename(self):
 								        mappings = [
 								            ('data.bin', Response),
 								            ('file.txt', TextResponse),
 								            ('file.xml.gz', Response),
 								            ('file.xml', XmlResponse),
 								            ('file.html', HtmlResponse),
-												added support for unknown file extensions to ResponseTypes.from_filename

--HG--
extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40786

											
										
										
											2009-01-27 13:08:07 +00:00
+								            ('file.unknownext', Response),
-												added more cases to ResponseTypes, and tests for ResponseTypes

--HG--
extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40781

											
										
										
											2009-01-27 11:27:09 +00:00
+								        ]
 								        for source, cls in mappings:
 								            retcls = responsetypes.from_filename(source)
 								            assert retcls is cls, "%s ==> %s != %s" % (source, retcls, cls)
 								    def test_from_content_disposition(self):
 								        mappings = [
-												More liberal Content-Disposition header parsing

Fixes #1782

											
										
										
											2016-02-17 16:55:28 +01:00
+								            (b'attachment; filename="data.xml"', XmlResponse),
 								            (b'attachment; filename=data.xml', XmlResponse),
 								            (u'attachment;filename=data£.tar.gz'.encode('utf-8'), Response),
 								            (u'attachment;filename=dataµ.tar.gz'.encode('latin-1'), Response),
 								            (u'attachment;filename=data高.doc'.encode('gbk'), Response),
 								            (u'attachment;filename=دورهdata.html'.encode('cp720'), HtmlResponse),
 								            (u'attachment;filename=日本語版Wikipedia.xml'.encode('iso2022_jp'), XmlResponse),
-												added more cases to ResponseTypes, and tests for ResponseTypes

--HG--
extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40781

											
										
										
											2009-01-27 11:27:09 +00:00
+								        ]
 								        for source, cls in mappings:
 								            retcls = responsetypes.from_content_disposition(source)
 								            assert retcls is cls, "%s ==> %s != %s" % (source, retcls, cls)
 								    def test_from_content_type(self):
 								        mappings = [
 								            ('text/html; charset=UTF-8', HtmlResponse),
 								            ('text/xml; charset=UTF-8', XmlResponse),
-												core: add application/xhtml+xml support to responsetypes

more info at http://www.w3.org/TR/xhtml-media-types/#media-types

--HG--
extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40977

											
										
										
											2009-03-10 17:51:03 +00:00
+								            ('application/xhtml+xml; charset=UTF-8', HtmlResponse),
-												Handle as html standard mimetype defined by Open Mobile Alliance

											
										
										
											2012-02-24 16:16:35 -02:00
+								            ('application/vnd.wap.xhtml+xml; charset=utf-8', HtmlResponse),
-												added more cases to ResponseTypes, and tests for ResponseTypes

--HG--
extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40781

											
										
										
											2009-01-27 11:27:09 +00:00
+								            ('application/xml; charset=UTF-8', XmlResponse),
 								            ('application/octet-stream', Response),
-												interpreting application/x-json as TextResponse

											
										
										
											2015-07-02 19:51:49 +02:00
+								            ('application/x-json; encoding=UTF8;charset=UTF-8', TextResponse),
-												interpreting json-amazonui-streaming as TextResponse

											
										
										
											2015-09-18 17:16:43 +09:00
+								            ('application/json-amazonui-streaming;charset=UTF-8', TextResponse),
-												added more cases to ResponseTypes, and tests for ResponseTypes

--HG--
extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40781

											
										
										
											2009-01-27 11:27:09 +00:00
+								        ]
 								        for source, cls in mappings:
 								            retcls = responsetypes.from_content_type(source)
 								            assert retcls is cls, "%s ==> %s != %s" % (source, retcls, cls)
 								    def test_from_body(self):
 								        mappings = [
-												using bytes for body constant

											
										
										
											2015-07-31 14:31:11 +02:00
+								            (b'\x03\x02\xdf\xdd\x23', Response),
 								            (b'Some plain text\ndata with tabs\t and null bytes\0', TextResponse),
 								            (b'<html><head><title>Hello</title></head>', HtmlResponse),
 								            (b'<?xml version="1.0" encoding="utf-8"', XmlResponse),
-												added more cases to ResponseTypes, and tests for ResponseTypes

--HG--
extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40781

											
										
										
											2009-01-27 11:27:09 +00:00
+								        ]
 								        for source, cls in mappings:
 								            retcls = responsetypes.from_body(source)
 								            assert retcls is cls, "%s ==> %s != %s" % (source, retcls, cls)
-												PY3 fix test downloadermiddleware decompression

											
										
										
											2015-07-25 20:51:27 +00:00
-												added more cases to ResponseTypes, and tests for ResponseTypes

--HG--
extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40781

											
										
										
											2009-01-27 11:27:09 +00:00
+								    def test_from_headers(self):
 								        mappings = [
 								            ({'Content-Type': ['text/html; charset=utf-8']}, HtmlResponse),
-												changing extension in test from csv to txt (not all systems support the text/csv mimetype)

--HG--
extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40787

											
										
										
											2009-01-27 13:33:28 +00:00
+								            ({'Content-Type': ['application/octet-stream'], 'Content-Disposition': ['attachment; filename=data.txt']}, TextResponse),
-												Fixed bug in HTTP Compression middleware which was failing to properly discover the encoding when the encoding was declared inside the response body. Closes #239. Also changed responsetypes to return Response class (instead of HtmlResponse) when the response has a Content-Encoding header

											
										
										
											2010-09-14 20:22:25 -03:00
+								            ({'Content-Type': ['text/html; charset=utf-8'], 'Content-Encoding': ['gzip']}, Response),
-												added more cases to ResponseTypes, and tests for ResponseTypes

--HG--
extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40781

											
										
										
											2009-01-27 11:27:09 +00:00
+								        ]
 								        for source, cls in mappings:
 								            source = Headers(source)
 								            retcls = responsetypes.from_headers(source)
 								            assert retcls is cls, "%s ==> %s != %s" % (source, retcls, cls)
-												added support for unknown file extensions to ResponseTypes.from_filename

--HG--
extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40786

											
										
										
											2009-01-27 13:08:07 +00:00
-												bundled mime.types file with scrapy for platforms that have poor support for mime.types (windows), added from_args() convenience method to ResponseTypes

--HG--
extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40797

											
										
										
											2009-01-29 18:52:46 +00:00
+								    def test_from_args(self):
 								        # TODO: add more tests that check precedence between the different arguments
 								        mappings = [
 								            ({'url': 'http://www.example.com/data.csv'}, TextResponse),
 								            # headers takes precedence over url
 								            ({'headers': Headers({'Content-Type': ['text/html; charset=utf-8']}), 'url': 'http://www.example.com/item/'}, HtmlResponse),
-												added one more case to responsetypes

--HG--
extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40801

											
										
										
											2009-01-30 11:14:08 +00:00
+								            ({'headers': Headers({'Content-Disposition': ['attachment; filename="data.xml.gz"']}), 'url': 'http://www.example.com/page/'}, Response),
-												bundled mime.types file with scrapy for platforms that have poor support for mime.types (windows), added from_args() convenience method to ResponseTypes

--HG--
extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40797

											
										
										
											2009-01-29 18:52:46 +00:00
+								        ]
 								        for source, cls in mappings:
 								            retcls = responsetypes.from_args(**source)
 								            assert retcls is cls, "%s ==> %s != %s" % (source, retcls, cls)
-												Added test to make sure custom mime.types shipped with Scrapy is loaded, and made Scrapy more egg-friendly by using pkutil.get_data()

											
										
										
											2010-12-20 15:16:21 -02:00
+								    def test_custom_mime_types_loaded(self):
 								        # check that mime.types files shipped with scrapy are loaded
 								        self.assertEqual(responsetypes.mimetypes.guess_type('x.scrapytest')[0], 'x-scrapy/test')
-												added more cases to ResponseTypes, and tests for ResponseTypes

--HG--
extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40781

											
										
										
											2009-01-27 11:27:09 +00:00
+								if __name__ == "__main__":
 								    unittest.main()