diff --git a/pytest.ini b/pytest.ini index da0f68e20..de0bccbf1 100644 --- a/pytest.ini +++ b/pytest.ini @@ -54,7 +54,7 @@ flake8-ignore = scrapy/core/downloader/__init__.py E501 scrapy/core/downloader/contextfactory.py E501 E128 E126 scrapy/core/downloader/middleware.py E501 - scrapy/core/downloader/tls.py E501 E241 + scrapy/core/downloader/tls.py E501 scrapy/core/downloader/webclient.py E731 E501 E128 E126 scrapy/core/downloader/handlers/__init__.py E501 scrapy/core/downloader/handlers/ftp.py E501 E128 E127 @@ -97,9 +97,9 @@ flake8-ignore = scrapy/loader/processors.py E501 # scrapy/pipelines scrapy/pipelines/__init__.py E501 - scrapy/pipelines/files.py E116 E501 E266 - scrapy/pipelines/images.py E265 E501 - scrapy/pipelines/media.py E125 E501 E266 + scrapy/pipelines/files.py E116 E501 + scrapy/pipelines/images.py E501 + scrapy/pipelines/media.py E125 E501 # scrapy/selector scrapy/selector/__init__.py F403 scrapy/selector/unified.py E501 E111 @@ -149,7 +149,7 @@ flake8-ignore = scrapy/__init__.py E402 E501 scrapy/cmdline.py E501 scrapy/crawler.py E501 - scrapy/dupefilters.py E501 E202 + scrapy/dupefilters.py E501 scrapy/exceptions.py E501 scrapy/exporters.py E501 scrapy/interfaces.py E501 @@ -178,13 +178,13 @@ flake8-ignore = tests/test_command_shell.py E501 E128 tests/test_commands.py E128 E501 tests/test_contracts.py E501 E128 - tests/test_crawl.py E501 E741 E265 + tests/test_crawl.py E501 E741 tests/test_crawler.py F841 E501 tests/test_dependencies.py F841 E501 - tests/test_downloader_handlers.py E124 E127 E128 E265 E501 E126 E123 + tests/test_downloader_handlers.py E124 E127 E128 E501 E126 E123 tests/test_downloadermiddleware.py E501 tests/test_downloadermiddleware_ajaxcrawlable.py E501 - tests/test_downloadermiddleware_cookies.py E731 E741 E501 E128 E265 E126 + tests/test_downloadermiddleware_cookies.py E731 E741 E501 E128 E126 tests/test_downloadermiddleware_decompression.py E127 tests/test_downloadermiddleware_defaultheaders.py E501 tests/test_downloadermiddleware_downloadtimeout.py E501 @@ -199,15 +199,15 @@ flake8-ignore = tests/test_engine.py E401 E501 E128 tests/test_exporters.py E501 E731 E128 E124 tests/test_extension_telnet.py F841 - tests/test_feedexport.py E501 F841 E241 + tests/test_feedexport.py E501 F841 tests/test_http_cookies.py E501 tests/test_http_headers.py E501 tests/test_http_request.py E402 E501 E127 E128 E128 E126 E123 - tests/test_http_response.py E501 E128 E265 + tests/test_http_response.py E501 E128 tests/test_item.py E128 F841 tests/test_link.py E501 tests/test_linkextractors.py E501 E128 E124 - tests/test_loader.py E501 E731 E741 E128 E117 E241 + tests/test_loader.py E501 E731 E741 E128 E117 tests/test_logformatter.py E128 E501 E122 tests/test_mail.py E128 E501 tests/test_middleware.py E501 E128 @@ -226,7 +226,7 @@ flake8-ignore = tests/test_spidermiddleware_httperror.py E128 E501 E127 E121 tests/test_spidermiddleware_offsite.py E501 E128 E111 tests/test_spidermiddleware_output_chain.py E501 - tests/test_spidermiddleware_referer.py E501 F841 E125 E201 E124 E501 E241 E121 + tests/test_spidermiddleware_referer.py E501 F841 E125 E124 E501 E121 tests/test_squeues.py E501 E741 tests/test_utils_asyncio.py E501 tests/test_utils_conf.py E501 E128 @@ -235,7 +235,7 @@ flake8-ignore = tests/test_utils_defer.py E501 F841 tests/test_utils_deprecate.py F841 E501 tests/test_utils_http.py E501 E128 W504 - tests/test_utils_iterators.py E501 E128 E129 E241 + tests/test_utils_iterators.py E501 E128 E129 tests/test_utils_log.py E741 tests/test_utils_python.py E501 E731 tests/test_utils_reqser.py E501 E128 @@ -243,8 +243,8 @@ flake8-ignore = tests/test_utils_response.py E501 tests/test_utils_signal.py E741 F841 E731 tests/test_utils_sitemap.py E128 E501 E124 - tests/test_utils_url.py E501 E127 E125 E501 E241 E126 E123 - tests/test_webclient.py E501 E128 E122 E402 E241 E123 E126 + tests/test_utils_url.py E501 E127 E125 E501 E126 E123 + tests/test_webclient.py E501 E128 E122 E402 E123 E126 tests/test_cmdline/__init__.py E501 tests/test_settings/__init__.py E501 E128 tests/test_spiderloader/__init__.py E128 E501 diff --git a/scrapy/core/downloader/tls.py b/scrapy/core/downloader/tls.py index a1c881d5e..e43a3c83e 100644 --- a/scrapy/core/downloader/tls.py +++ b/scrapy/core/downloader/tls.py @@ -20,8 +20,8 @@ METHOD_TLSv12 = 'TLSv1.2' openssl_methods = { - METHOD_TLS: SSL.SSLv23_METHOD, # protocol negotiation (recommended) - METHOD_SSLv3: SSL.SSLv3_METHOD, # SSL 3 (NOT recommended) + METHOD_TLS: SSL.SSLv23_METHOD, # protocol negotiation (recommended) + METHOD_SSLv3: SSL.SSLv3_METHOD, # SSL 3 (NOT recommended) METHOD_TLSv10: SSL.TLSv1_METHOD, # TLS 1.0 only METHOD_TLSv11: getattr(SSL, 'TLSv1_1_METHOD', 5), # TLS 1.1 only METHOD_TLSv12: getattr(SSL, 'TLSv1_2_METHOD', 6), # TLS 1.2 only diff --git a/scrapy/dupefilters.py b/scrapy/dupefilters.py index d74c8ed36..ac5478e7c 100644 --- a/scrapy/dupefilters.py +++ b/scrapy/dupefilters.py @@ -61,7 +61,7 @@ class RFPDupeFilter(BaseDupeFilter): def log(self, request, spider): if self.debug: msg = "Filtered duplicate request: %(request)s (referer: %(referer)s)" - args = {'request': request, 'referer': referer_str(request) } + args = {'request': request, 'referer': referer_str(request)} self.logger.debug(msg, args, extra={'spider': spider}) elif self.logdupes: msg = ("Filtered duplicate request: %(request)s" diff --git a/scrapy/pipelines/files.py b/scrapy/pipelines/files.py index 101bf5fbc..aab645d3d 100644 --- a/scrapy/pipelines/files.py +++ b/scrapy/pipelines/files.py @@ -500,7 +500,7 @@ class FilesPipeline(MediaPipeline): spider.crawler.stats.inc_value('file_count', spider=spider) spider.crawler.stats.inc_value('file_status_count/%s' % status, spider=spider) - ### Overridable Interface + # Overridable Interface def get_media_requests(self, item, info): return [Request(x) for x in item.get(self.files_urls_field, [])] diff --git a/scrapy/pipelines/images.py b/scrapy/pipelines/images.py index 2e646379c..aeb520442 100644 --- a/scrapy/pipelines/images.py +++ b/scrapy/pipelines/images.py @@ -14,7 +14,7 @@ from scrapy.utils.python import to_bytes from scrapy.http import Request from scrapy.settings import Settings from scrapy.exceptions import DropItem -#TODO: from scrapy.pipelines.media import MediaPipeline +# TODO: from scrapy.pipelines.media import MediaPipeline from scrapy.pipelines.files import FileException, FilesPipeline diff --git a/scrapy/pipelines/media.py b/scrapy/pipelines/media.py index 562d9ee32..a6d99fa99 100644 --- a/scrapy/pipelines/media.py +++ b/scrapy/pipelines/media.py @@ -166,7 +166,7 @@ class MediaPipeline: for wad in info.waiting.pop(fp): defer_result(result).chainDeferred(wad) - ### Overridable Interface + # Overridable Interface def media_to_download(self, request, info): """Check request before starting download""" pass diff --git a/tests/test_crawl.py b/tests/test_crawl.py index 3f8a7435c..c02e6a70b 100644 --- a/tests/test_crawl.py +++ b/tests/test_crawl.py @@ -147,9 +147,9 @@ class CrawlTestCase(TestCase): settings = {"CONCURRENT_REQUESTS": 1} crawler = CrawlerRunner(settings).create_crawler(BrokenStartRequestsSpider) yield crawler.crawl(mockserver=self.mockserver) - #self.assertTrue(False, crawler.spider.seedsseen) - #self.assertTrue(crawler.spider.seedsseen.index(None) < crawler.spider.seedsseen.index(99), - # crawler.spider.seedsseen) + self.assertTrue( + crawler.spider.seedsseen.index(None) < crawler.spider.seedsseen.index(99), + crawler.spider.seedsseen) @defer.inlineCallbacks def test_start_requests_dupes(self): diff --git a/tests/test_downloadermiddleware_cookies.py b/tests/test_downloadermiddleware_cookies.py index 051f66680..f8e4851fc 100644 --- a/tests/test_downloadermiddleware_cookies.py +++ b/tests/test_downloadermiddleware_cookies.py @@ -202,7 +202,7 @@ class CookiesMiddlewareTest(TestCase): assert self.mw.process_request(req4, self.spider) is None self.assertCookieValEqual(req4.headers.get('Cookie'), b'C2=value2; galleta=dulce') - #cookies from hosts with port + # cookies from hosts with port req5_1 = Request('http://scrapytest.org:1104/') assert self.mw.process_request(req5_1, self.spider) is None @@ -218,7 +218,7 @@ class CookiesMiddlewareTest(TestCase): assert self.mw.process_request(req5_3, self.spider) is None self.assertEqual(req5_3.headers.get('Cookie'), b'C1=value1') - #skip cookie retrieval for not http request + # skip cookie retrieval for not http request req6 = Request('file:///scrapy/sometempfile') assert self.mw.process_request(req6, self.spider) is None self.assertEqual(req6.headers.get('Cookie'), None) diff --git a/tests/test_http_response.py b/tests/test_http_response.py index eafc3560e..522ec4875 100644 --- a/tests/test_http_response.py +++ b/tests/test_http_response.py @@ -438,8 +438,8 @@ class TextResponseTest(BaseResponseTest): assert u'value' in r.text, repr(r.text) # FIXME: This test should pass once we stop using BeautifulSoup's UnicodeDammit in TextResponse - #r = self.response_class("http://www.example.com", body=b'PREFIX\xe3\xabSUFFIX') - #assert u'\ufffd' in r.text, repr(r.text) + # r = self.response_class("http://www.example.com", body=b'PREFIX\xe3\xabSUFFIX') + # assert u'\ufffd' in r.text, repr(r.text) def test_selector(self): body = b"Some page" diff --git a/tests/test_spidermiddleware_referer.py b/tests/test_spidermiddleware_referer.py index 4c6ede70b..742adc64f 100644 --- a/tests/test_spidermiddleware_referer.py +++ b/tests/test_spidermiddleware_referer.py @@ -24,7 +24,7 @@ class TestRefererMiddleware(TestCase): resp_headers = {} settings = {} scenarii = [ - ('http://scrapytest.org', 'http://scrapytest.org/', b'http://scrapytest.org'), + ('http://scrapytest.org', 'http://scrapytest.org/', b'http://scrapytest.org'), ] def setUp(self): @@ -54,57 +54,57 @@ class MixinDefault: with some additional filtering of s3:// """ scenarii = [ - ('https://example.com/', 'https://scrapy.org/', b'https://example.com/'), - ('http://example.com/', 'http://scrapy.org/', b'http://example.com/'), - ('http://example.com/', 'https://scrapy.org/', b'http://example.com/'), - ('https://example.com/', 'http://scrapy.org/', None), + ('https://example.com/', 'https://scrapy.org/', b'https://example.com/'), + ('http://example.com/', 'http://scrapy.org/', b'http://example.com/'), + ('http://example.com/', 'https://scrapy.org/', b'http://example.com/'), + ('https://example.com/', 'http://scrapy.org/', None), # no credentials leak - ('http://user:password@example.com/', 'https://scrapy.org/', b'http://example.com/'), + ('http://user:password@example.com/', 'https://scrapy.org/', b'http://example.com/'), # no referrer leak for local schemes - ('file:///home/path/to/somefile.html', 'https://scrapy.org/', None), - ('file:///home/path/to/somefile.html', 'http://scrapy.org/', None), + ('file:///home/path/to/somefile.html', 'https://scrapy.org/', None), + ('file:///home/path/to/somefile.html', 'http://scrapy.org/', None), # no referrer leak for s3 origins - ('s3://mybucket/path/to/data.csv', 'https://scrapy.org/', None), - ('s3://mybucket/path/to/data.csv', 'http://scrapy.org/', None), + ('s3://mybucket/path/to/data.csv', 'https://scrapy.org/', None), + ('s3://mybucket/path/to/data.csv', 'http://scrapy.org/', None), ] class MixinNoReferrer: scenarii = [ - ('https://example.com/page.html', 'https://example.com/', None), - ('http://www.example.com/', 'https://scrapy.org/', None), - ('http://www.example.com/', 'http://scrapy.org/', None), - ('https://www.example.com/', 'http://scrapy.org/', None), - ('file:///home/path/to/somefile.html', 'http://scrapy.org/', None), + ('https://example.com/page.html', 'https://example.com/', None), + ('http://www.example.com/', 'https://scrapy.org/', None), + ('http://www.example.com/', 'http://scrapy.org/', None), + ('https://www.example.com/', 'http://scrapy.org/', None), + ('file:///home/path/to/somefile.html', 'http://scrapy.org/', None), ] class MixinNoReferrerWhenDowngrade: scenarii = [ # TLS to TLS: send non-empty referrer - ('https://example.com/page.html', 'https://not.example.com/', b'https://example.com/page.html'), - ('https://example.com/page.html', 'https://scrapy.org/', b'https://example.com/page.html'), - ('https://example.com:443/page.html', 'https://scrapy.org/', b'https://example.com/page.html'), - ('https://example.com:444/page.html', 'https://scrapy.org/', b'https://example.com:444/page.html'), - ('ftps://example.com/urls.zip', 'https://scrapy.org/', b'ftps://example.com/urls.zip'), + ('https://example.com/page.html', 'https://not.example.com/', b'https://example.com/page.html'), + ('https://example.com/page.html', 'https://scrapy.org/', b'https://example.com/page.html'), + ('https://example.com:443/page.html', 'https://scrapy.org/', b'https://example.com/page.html'), + ('https://example.com:444/page.html', 'https://scrapy.org/', b'https://example.com:444/page.html'), + ('ftps://example.com/urls.zip', 'https://scrapy.org/', b'ftps://example.com/urls.zip'), # TLS to non-TLS: do not send referrer - ('https://example.com/page.html', 'http://not.example.com/', None), - ('https://example.com/page.html', 'http://scrapy.org/', None), - ('ftps://example.com/urls.zip', 'http://scrapy.org/', None), + ('https://example.com/page.html', 'http://not.example.com/', None), + ('https://example.com/page.html', 'http://scrapy.org/', None), + ('ftps://example.com/urls.zip', 'http://scrapy.org/', None), # non-TLS to TLS or non-TLS: send referrer - ('http://example.com/page.html', 'https://not.example.com/', b'http://example.com/page.html'), - ('http://example.com/page.html', 'https://scrapy.org/', b'http://example.com/page.html'), - ('http://example.com:8080/page.html', 'https://scrapy.org/', b'http://example.com:8080/page.html'), - ('http://example.com:80/page.html', 'http://not.example.com/', b'http://example.com/page.html'), - ('http://example.com/page.html', 'http://scrapy.org/', b'http://example.com/page.html'), - ('http://example.com:443/page.html', 'http://scrapy.org/', b'http://example.com:443/page.html'), - ('ftp://example.com/urls.zip', 'http://scrapy.org/', b'ftp://example.com/urls.zip'), - ('ftp://example.com/urls.zip', 'https://scrapy.org/', b'ftp://example.com/urls.zip'), + ('http://example.com/page.html', 'https://not.example.com/', b'http://example.com/page.html'), + ('http://example.com/page.html', 'https://scrapy.org/', b'http://example.com/page.html'), + ('http://example.com:8080/page.html', 'https://scrapy.org/', b'http://example.com:8080/page.html'), + ('http://example.com:80/page.html', 'http://not.example.com/', b'http://example.com/page.html'), + ('http://example.com/page.html', 'http://scrapy.org/', b'http://example.com/page.html'), + ('http://example.com:443/page.html', 'http://scrapy.org/', b'http://example.com:443/page.html'), + ('ftp://example.com/urls.zip', 'http://scrapy.org/', b'ftp://example.com/urls.zip'), + ('ftp://example.com/urls.zip', 'https://scrapy.org/', b'ftp://example.com/urls.zip'), # test for user/password stripping ('http://user:password@example.com/page.html', 'https://not.example.com/', b'http://example.com/page.html'), @@ -114,43 +114,43 @@ class MixinNoReferrerWhenDowngrade: class MixinSameOrigin: scenarii = [ # Same origin (protocol, host, port): send referrer - ('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'), - ('http://example.com/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'), - ('https://example.com:443/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'), - ('http://example.com:80/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'), - ('http://example.com/page.html', 'http://example.com:80/not-page.html', b'http://example.com/page.html'), - ('http://example.com:8888/page.html', 'http://example.com:8888/not-page.html', b'http://example.com:8888/page.html'), + ('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'), + ('http://example.com/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'), + ('https://example.com:443/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'), + ('http://example.com:80/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'), + ('http://example.com/page.html', 'http://example.com:80/not-page.html', b'http://example.com/page.html'), + ('http://example.com:8888/page.html', 'http://example.com:8888/not-page.html', b'http://example.com:8888/page.html'), # Different host: do NOT send referrer - ('https://example.com/page.html', 'https://not.example.com/otherpage.html', None), - ('http://example.com/page.html', 'http://not.example.com/otherpage.html', None), - ('http://example.com/page.html', 'http://www.example.com/otherpage.html', None), + ('https://example.com/page.html', 'https://not.example.com/otherpage.html', None), + ('http://example.com/page.html', 'http://not.example.com/otherpage.html', None), + ('http://example.com/page.html', 'http://www.example.com/otherpage.html', None), # Different port: do NOT send referrer - ('https://example.com:444/page.html', 'https://example.com/not-page.html', None), - ('http://example.com:81/page.html', 'http://example.com/not-page.html', None), - ('http://example.com/page.html', 'http://example.com:81/not-page.html', None), + ('https://example.com:444/page.html', 'https://example.com/not-page.html', None), + ('http://example.com:81/page.html', 'http://example.com/not-page.html', None), + ('http://example.com/page.html', 'http://example.com:81/not-page.html', None), # Different protocols: do NOT send refferer - ('https://example.com/page.html', 'http://example.com/not-page.html', None), - ('https://example.com/page.html', 'http://not.example.com/', None), - ('ftps://example.com/urls.zip', 'https://example.com/not-page.html', None), - ('ftp://example.com/urls.zip', 'http://example.com/not-page.html', None), - ('ftps://example.com/urls.zip', 'https://example.com/not-page.html', None), + ('https://example.com/page.html', 'http://example.com/not-page.html', None), + ('https://example.com/page.html', 'http://not.example.com/', None), + ('ftps://example.com/urls.zip', 'https://example.com/not-page.html', None), + ('ftp://example.com/urls.zip', 'http://example.com/not-page.html', None), + ('ftps://example.com/urls.zip', 'https://example.com/not-page.html', None), # test for user/password stripping - ('https://user:password@example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'), - ('https://user:password@example.com/page.html', 'http://example.com/not-page.html', None), + ('https://user:password@example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'), + ('https://user:password@example.com/page.html', 'http://example.com/not-page.html', None), ] class MixinOrigin: scenarii = [ # TLS or non-TLS to TLS or non-TLS: referrer origin is sent (yes, even for downgrades) - ('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/'), - ('https://example.com/page.html', 'https://scrapy.org', b'https://example.com/'), - ('https://example.com/page.html', 'http://scrapy.org', b'https://example.com/'), - ('http://example.com/page.html', 'http://scrapy.org', b'http://example.com/'), + ('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/'), + ('https://example.com/page.html', 'https://scrapy.org', b'https://example.com/'), + ('https://example.com/page.html', 'http://scrapy.org', b'https://example.com/'), + ('http://example.com/page.html', 'http://scrapy.org', b'http://example.com/'), # test for user/password stripping ('https://user:password@example.com/page.html', 'http://scrapy.org', b'https://example.com/'), @@ -160,129 +160,129 @@ class MixinOrigin: class MixinStrictOrigin: scenarii = [ # TLS or non-TLS to TLS or non-TLS: referrer origin is sent but not for downgrades - ('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/'), - ('https://example.com/page.html', 'https://scrapy.org', b'https://example.com/'), - ('http://example.com/page.html', 'http://scrapy.org', b'http://example.com/'), + ('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/'), + ('https://example.com/page.html', 'https://scrapy.org', b'https://example.com/'), + ('http://example.com/page.html', 'http://scrapy.org', b'http://example.com/'), # downgrade: send nothing - ('https://example.com/page.html', 'http://scrapy.org', None), + ('https://example.com/page.html', 'http://scrapy.org', None), # upgrade: send origin - ('http://example.com/page.html', 'https://scrapy.org', b'http://example.com/'), + ('http://example.com/page.html', 'https://scrapy.org', b'http://example.com/'), # test for user/password stripping - ('https://user:password@example.com/page.html', 'https://scrapy.org', b'https://example.com/'), - ('https://user:password@example.com/page.html', 'http://scrapy.org', None), + ('https://user:password@example.com/page.html', 'https://scrapy.org', b'https://example.com/'), + ('https://user:password@example.com/page.html', 'http://scrapy.org', None), ] class MixinOriginWhenCrossOrigin: scenarii = [ # Same origin (protocol, host, port): send referrer - ('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'), - ('http://example.com/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'), - ('https://example.com:443/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'), - ('http://example.com:80/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'), - ('http://example.com/page.html', 'http://example.com:80/not-page.html', b'http://example.com/page.html'), - ('http://example.com:8888/page.html', 'http://example.com:8888/not-page.html', b'http://example.com:8888/page.html'), + ('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'), + ('http://example.com/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'), + ('https://example.com:443/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'), + ('http://example.com:80/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'), + ('http://example.com/page.html', 'http://example.com:80/not-page.html', b'http://example.com/page.html'), + ('http://example.com:8888/page.html', 'http://example.com:8888/not-page.html', b'http://example.com:8888/page.html'), # Different host: send origin as referrer - ('https://example2.com/page.html', 'https://scrapy.org/otherpage.html', b'https://example2.com/'), - ('https://example2.com/page.html', 'https://not.example2.com/otherpage.html', b'https://example2.com/'), - ('http://example2.com/page.html', 'http://not.example2.com/otherpage.html', b'http://example2.com/'), + ('https://example2.com/page.html', 'https://scrapy.org/otherpage.html', b'https://example2.com/'), + ('https://example2.com/page.html', 'https://not.example2.com/otherpage.html', b'https://example2.com/'), + ('http://example2.com/page.html', 'http://not.example2.com/otherpage.html', b'http://example2.com/'), # exact match required - ('http://example2.com/page.html', 'http://www.example2.com/otherpage.html', b'http://example2.com/'), + ('http://example2.com/page.html', 'http://www.example2.com/otherpage.html', b'http://example2.com/'), # Different port: send origin as referrer - ('https://example3.com:444/page.html', 'https://example3.com/not-page.html', b'https://example3.com:444/'), - ('http://example3.com:81/page.html', 'http://example3.com/not-page.html', b'http://example3.com:81/'), + ('https://example3.com:444/page.html', 'https://example3.com/not-page.html', b'https://example3.com:444/'), + ('http://example3.com:81/page.html', 'http://example3.com/not-page.html', b'http://example3.com:81/'), # Different protocols: send origin as referrer - ('https://example4.com/page.html', 'http://example4.com/not-page.html', b'https://example4.com/'), - ('https://example4.com/page.html', 'http://not.example4.com/', b'https://example4.com/'), - ('ftps://example4.com/urls.zip', 'https://example4.com/not-page.html', b'ftps://example4.com/'), - ('ftp://example4.com/urls.zip', 'http://example4.com/not-page.html', b'ftp://example4.com/'), - ('ftps://example4.com/urls.zip', 'https://example4.com/not-page.html', b'ftps://example4.com/'), + ('https://example4.com/page.html', 'http://example4.com/not-page.html', b'https://example4.com/'), + ('https://example4.com/page.html', 'http://not.example4.com/', b'https://example4.com/'), + ('ftps://example4.com/urls.zip', 'https://example4.com/not-page.html', b'ftps://example4.com/'), + ('ftp://example4.com/urls.zip', 'http://example4.com/not-page.html', b'ftp://example4.com/'), + ('ftps://example4.com/urls.zip', 'https://example4.com/not-page.html', b'ftps://example4.com/'), # test for user/password stripping - ('https://user:password@example5.com/page.html', 'https://example5.com/not-page.html', b'https://example5.com/page.html'), + ('https://user:password@example5.com/page.html', 'https://example5.com/not-page.html', b'https://example5.com/page.html'), # TLS to non-TLS downgrade: send origin - ('https://user:password@example5.com/page.html', 'http://example5.com/not-page.html', b'https://example5.com/'), + ('https://user:password@example5.com/page.html', 'http://example5.com/not-page.html', b'https://example5.com/'), ] class MixinStrictOriginWhenCrossOrigin: scenarii = [ # Same origin (protocol, host, port): send referrer - ('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'), - ('http://example.com/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'), - ('https://example.com:443/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'), - ('http://example.com:80/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'), - ('http://example.com/page.html', 'http://example.com:80/not-page.html', b'http://example.com/page.html'), - ('http://example.com:8888/page.html', 'http://example.com:8888/not-page.html', b'http://example.com:8888/page.html'), + ('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'), + ('http://example.com/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'), + ('https://example.com:443/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'), + ('http://example.com:80/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'), + ('http://example.com/page.html', 'http://example.com:80/not-page.html', b'http://example.com/page.html'), + ('http://example.com:8888/page.html', 'http://example.com:8888/not-page.html', b'http://example.com:8888/page.html'), # Different host: send origin as referrer - ('https://example2.com/page.html', 'https://scrapy.org/otherpage.html', b'https://example2.com/'), - ('https://example2.com/page.html', 'https://not.example2.com/otherpage.html', b'https://example2.com/'), - ('http://example2.com/page.html', 'http://not.example2.com/otherpage.html', b'http://example2.com/'), + ('https://example2.com/page.html', 'https://scrapy.org/otherpage.html', b'https://example2.com/'), + ('https://example2.com/page.html', 'https://not.example2.com/otherpage.html', b'https://example2.com/'), + ('http://example2.com/page.html', 'http://not.example2.com/otherpage.html', b'http://example2.com/'), # exact match required - ('http://example2.com/page.html', 'http://www.example2.com/otherpage.html', b'http://example2.com/'), + ('http://example2.com/page.html', 'http://www.example2.com/otherpage.html', b'http://example2.com/'), # Different port: send origin as referrer - ('https://example3.com:444/page.html', 'https://example3.com/not-page.html', b'https://example3.com:444/'), - ('http://example3.com:81/page.html', 'http://example3.com/not-page.html', b'http://example3.com:81/'), + ('https://example3.com:444/page.html', 'https://example3.com/not-page.html', b'https://example3.com:444/'), + ('http://example3.com:81/page.html', 'http://example3.com/not-page.html', b'http://example3.com:81/'), # downgrade - ('https://example4.com/page.html', 'http://example4.com/not-page.html', None), - ('https://example4.com/page.html', 'http://not.example4.com/', None), + ('https://example4.com/page.html', 'http://example4.com/not-page.html', None), + ('https://example4.com/page.html', 'http://not.example4.com/', None), # non-TLS to non-TLS - ('ftp://example4.com/urls.zip', 'http://example4.com/not-page.html', b'ftp://example4.com/'), + ('ftp://example4.com/urls.zip', 'http://example4.com/not-page.html', b'ftp://example4.com/'), # upgrade - ('http://example4.com/page.html', 'https://example4.com/not-page.html', b'http://example4.com/'), - ('http://example4.com/page.html', 'https://not.example4.com/', b'http://example4.com/'), + ('http://example4.com/page.html', 'https://example4.com/not-page.html', b'http://example4.com/'), + ('http://example4.com/page.html', 'https://not.example4.com/', b'http://example4.com/'), # Different protocols: send origin as referrer - ('ftps://example4.com/urls.zip', 'https://example4.com/not-page.html', b'ftps://example4.com/'), - ('ftps://example4.com/urls.zip', 'https://example4.com/not-page.html', b'ftps://example4.com/'), + ('ftps://example4.com/urls.zip', 'https://example4.com/not-page.html', b'ftps://example4.com/'), + ('ftps://example4.com/urls.zip', 'https://example4.com/not-page.html', b'ftps://example4.com/'), # test for user/password stripping - ('https://user:password@example5.com/page.html', 'https://example5.com/not-page.html', b'https://example5.com/page.html'), + ('https://user:password@example5.com/page.html', 'https://example5.com/not-page.html', b'https://example5.com/page.html'), # TLS to non-TLS downgrade: send nothing - ('https://user:password@example5.com/page.html', 'http://example5.com/not-page.html', None), + ('https://user:password@example5.com/page.html', 'http://example5.com/not-page.html', None), ] class MixinUnsafeUrl: scenarii = [ # TLS to TLS: send referrer - ('https://example.com/sekrit.html', 'http://not.example.com/', b'https://example.com/sekrit.html'), - ('https://example1.com/page.html', 'https://not.example1.com/', b'https://example1.com/page.html'), - ('https://example1.com/page.html', 'https://scrapy.org/', b'https://example1.com/page.html'), - ('https://example1.com:443/page.html', 'https://scrapy.org/', b'https://example1.com/page.html'), - ('https://example1.com:444/page.html', 'https://scrapy.org/', b'https://example1.com:444/page.html'), - ('ftps://example1.com/urls.zip', 'https://scrapy.org/', b'ftps://example1.com/urls.zip'), + ('https://example.com/sekrit.html', 'http://not.example.com/', b'https://example.com/sekrit.html'), + ('https://example1.com/page.html', 'https://not.example1.com/', b'https://example1.com/page.html'), + ('https://example1.com/page.html', 'https://scrapy.org/', b'https://example1.com/page.html'), + ('https://example1.com:443/page.html', 'https://scrapy.org/', b'https://example1.com/page.html'), + ('https://example1.com:444/page.html', 'https://scrapy.org/', b'https://example1.com:444/page.html'), + ('ftps://example1.com/urls.zip', 'https://scrapy.org/', b'ftps://example1.com/urls.zip'), # TLS to non-TLS: send referrer (yes, it's unsafe) - ('https://example2.com/page.html', 'http://not.example2.com/', b'https://example2.com/page.html'), - ('https://example2.com/page.html', 'http://scrapy.org/', b'https://example2.com/page.html'), - ('ftps://example2.com/urls.zip', 'http://scrapy.org/', b'ftps://example2.com/urls.zip'), + ('https://example2.com/page.html', 'http://not.example2.com/', b'https://example2.com/page.html'), + ('https://example2.com/page.html', 'http://scrapy.org/', b'https://example2.com/page.html'), + ('ftps://example2.com/urls.zip', 'http://scrapy.org/', b'ftps://example2.com/urls.zip'), # non-TLS to TLS or non-TLS: send referrer (yes, it's unsafe) - ('http://example3.com/page.html', 'https://not.example3.com/', b'http://example3.com/page.html'), - ('http://example3.com/page.html', 'https://scrapy.org/', b'http://example3.com/page.html'), - ('http://example3.com:8080/page.html', 'https://scrapy.org/', b'http://example3.com:8080/page.html'), - ('http://example3.com:80/page.html', 'http://not.example3.com/', b'http://example3.com/page.html'), - ('http://example3.com/page.html', 'http://scrapy.org/', b'http://example3.com/page.html'), - ('http://example3.com:443/page.html', 'http://scrapy.org/', b'http://example3.com:443/page.html'), - ('ftp://example3.com/urls.zip', 'http://scrapy.org/', b'ftp://example3.com/urls.zip'), - ('ftp://example3.com/urls.zip', 'https://scrapy.org/', b'ftp://example3.com/urls.zip'), + ('http://example3.com/page.html', 'https://not.example3.com/', b'http://example3.com/page.html'), + ('http://example3.com/page.html', 'https://scrapy.org/', b'http://example3.com/page.html'), + ('http://example3.com:8080/page.html', 'https://scrapy.org/', b'http://example3.com:8080/page.html'), + ('http://example3.com:80/page.html', 'http://not.example3.com/', b'http://example3.com/page.html'), + ('http://example3.com/page.html', 'http://scrapy.org/', b'http://example3.com/page.html'), + ('http://example3.com:443/page.html', 'http://scrapy.org/', b'http://example3.com:443/page.html'), + ('ftp://example3.com/urls.zip', 'http://scrapy.org/', b'ftp://example3.com/urls.zip'), + ('ftp://example3.com/urls.zip', 'https://scrapy.org/', b'ftp://example3.com/urls.zip'), # test for user/password stripping - ('http://user:password@example4.com/page.html', 'https://not.example4.com/', b'http://example4.com/page.html'), - ('https://user:password@example4.com/page.html', 'http://scrapy.org/', b'https://example4.com/page.html'), + ('http://user:password@example4.com/page.html', 'https://not.example4.com/', b'http://example4.com/page.html'), + ('https://user:password@example4.com/page.html', 'http://scrapy.org/', b'https://example4.com/page.html'), ] @@ -339,12 +339,12 @@ class CustomPythonOrgPolicy(ReferrerPolicy): class TestSettingsCustomPolicy(TestRefererMiddleware): settings = {'REFERRER_POLICY': 'tests.test_spidermiddleware_referer.CustomPythonOrgPolicy'} scenarii = [ - ('https://example.com/', 'https://scrapy.org/', b'https://python.org/'), - ('http://example.com/', 'http://scrapy.org/', b'http://python.org/'), - ('http://example.com/', 'https://scrapy.org/', b'https://python.org/'), - ('https://example.com/', 'http://scrapy.org/', b'http://python.org/'), - ('file:///home/path/to/somefile.html', 'https://scrapy.org/', b'https://python.org/'), - ('file:///home/path/to/somefile.html', 'http://scrapy.org/', b'http://python.org/'), + ('https://example.com/', 'https://scrapy.org/', b'https://python.org/'), + ('http://example.com/', 'http://scrapy.org/', b'http://python.org/'), + ('http://example.com/', 'https://scrapy.org/', b'https://python.org/'), + ('https://example.com/', 'http://scrapy.org/', b'http://python.org/'), + ('file:///home/path/to/somefile.html', 'https://scrapy.org/', b'https://python.org/'), + ('file:///home/path/to/somefile.html', 'http://scrapy.org/', b'http://python.org/'), ] @@ -541,7 +541,8 @@ class TestReferrerOnRedirect(TestRefererMiddleware): settings = {'REFERRER_POLICY': 'scrapy.spidermiddlewares.referer.UnsafeUrlPolicy'} scenarii = [ - ( 'http://scrapytest.org/1', # parent + ( + 'http://scrapytest.org/1', # parent 'http://scrapytest.org/2', # target ( # redirections: code, URL @@ -551,7 +552,8 @@ class TestReferrerOnRedirect(TestRefererMiddleware): b'http://scrapytest.org/1', # expected initial referer b'http://scrapytest.org/1', # expected referer for the redirection request ), - ( 'https://scrapytest.org/1', + ( + 'https://scrapytest.org/1', 'https://scrapytest.org/2', ( # redirecting to non-secure URL @@ -560,7 +562,8 @@ class TestReferrerOnRedirect(TestRefererMiddleware): b'https://scrapytest.org/1', b'https://scrapytest.org/1', ), - ( 'https://scrapytest.org/1', + ( + 'https://scrapytest.org/1', 'https://scrapytest.com/2', ( # redirecting to non-secure URL: different origin @@ -602,7 +605,8 @@ class TestReferrerOnRedirectNoReferrer(TestReferrerOnRedirect): """ settings = {'REFERRER_POLICY': 'no-referrer'} scenarii = [ - ( 'http://scrapytest.org/1', # parent + ( + 'http://scrapytest.org/1', # parent 'http://scrapytest.org/2', # target ( # redirections: code, URL @@ -612,7 +616,8 @@ class TestReferrerOnRedirectNoReferrer(TestReferrerOnRedirect): None, # expected initial "Referer" None, # expected "Referer" for the redirection request ), - ( 'https://scrapytest.org/1', + ( + 'https://scrapytest.org/1', 'https://scrapytest.org/2', ( (301, 'http://scrapytest.org/3'), @@ -620,7 +625,8 @@ class TestReferrerOnRedirectNoReferrer(TestReferrerOnRedirect): None, None, ), - ( 'https://scrapytest.org/1', + ( + 'https://scrapytest.org/1', 'https://example.com/2', # different origin ( (301, 'http://scrapytest.com/3'), @@ -641,7 +647,8 @@ class TestReferrerOnRedirectSameOrigin(TestReferrerOnRedirect): """ settings = {'REFERRER_POLICY': 'same-origin'} scenarii = [ - ( 'http://scrapytest.org/101', # origin + ( + 'http://scrapytest.org/101', # origin 'http://scrapytest.org/102', # target ( # redirections: code, URL @@ -651,7 +658,8 @@ class TestReferrerOnRedirectSameOrigin(TestReferrerOnRedirect): b'http://scrapytest.org/101', # expected initial "Referer" b'http://scrapytest.org/101', # expected referer for the redirection request ), - ( 'https://scrapytest.org/201', + ( + 'https://scrapytest.org/201', 'https://scrapytest.org/202', ( # redirecting from secure to non-secure URL == different origin @@ -660,7 +668,8 @@ class TestReferrerOnRedirectSameOrigin(TestReferrerOnRedirect): b'https://scrapytest.org/201', None, ), - ( 'https://scrapytest.org/301', + ( + 'https://scrapytest.org/301', 'https://scrapytest.org/302', ( # different domain == different origin @@ -683,7 +692,8 @@ class TestReferrerOnRedirectStrictOrigin(TestReferrerOnRedirect): """ settings = {'REFERRER_POLICY': POLICY_STRICT_ORIGIN} scenarii = [ - ( 'http://scrapytest.org/101', + ( + 'http://scrapytest.org/101', 'http://scrapytest.org/102', ( (301, 'http://scrapytest.org/103'), @@ -692,7 +702,8 @@ class TestReferrerOnRedirectStrictOrigin(TestReferrerOnRedirect): b'http://scrapytest.org/', # send origin b'http://scrapytest.org/', # redirects to same origin: send origin ), - ( 'https://scrapytest.org/201', + ( + 'https://scrapytest.org/201', 'https://scrapytest.org/202', ( # redirecting to non-secure URL: no referrer @@ -701,7 +712,8 @@ class TestReferrerOnRedirectStrictOrigin(TestReferrerOnRedirect): b'https://scrapytest.org/', None, ), - ( 'https://scrapytest.org/301', + ( + 'https://scrapytest.org/301', 'https://scrapytest.org/302', ( # redirecting to non-secure URL (different domain): no referrer @@ -710,7 +722,8 @@ class TestReferrerOnRedirectStrictOrigin(TestReferrerOnRedirect): b'https://scrapytest.org/', None, ), - ( 'http://scrapy.org/401', + ( + 'http://scrapy.org/401', 'http://example.com/402', ( (301, 'http://scrapytest.org/403'), @@ -718,7 +731,8 @@ class TestReferrerOnRedirectStrictOrigin(TestReferrerOnRedirect): b'http://scrapy.org/', b'http://scrapy.org/', ), - ( 'https://scrapy.org/501', + ( + 'https://scrapy.org/501', 'https://example.com/502', ( # HTTPS all along, so origin referrer is kept as-is @@ -728,7 +742,8 @@ class TestReferrerOnRedirectStrictOrigin(TestReferrerOnRedirect): b'https://scrapy.org/', b'https://scrapy.org/', ), - ( 'https://scrapytest.org/601', + ( + 'https://scrapytest.org/601', 'http://scrapytest.org/602', # TLS to non-TLS: no referrer ( (301, 'https://scrapytest.org/603'), # TLS URL again: (still) no referrer @@ -750,7 +765,8 @@ class TestReferrerOnRedirectOriginWhenCrossOrigin(TestReferrerOnRedirect): """ settings = {'REFERRER_POLICY': POLICY_ORIGIN_WHEN_CROSS_ORIGIN} scenarii = [ - ( 'http://scrapytest.org/101', # origin + ( + 'http://scrapytest.org/101', # origin 'http://scrapytest.org/102', # target + redirection ( # redirections: code, URL @@ -760,7 +776,8 @@ class TestReferrerOnRedirectOriginWhenCrossOrigin(TestReferrerOnRedirect): b'http://scrapytest.org/101', # expected initial referer b'http://scrapytest.org/101', # expected referer for the redirection request ), - ( 'https://scrapytest.org/201', + ( + 'https://scrapytest.org/201', 'https://scrapytest.org/202', ( # redirecting to non-secure URL: send origin @@ -769,7 +786,8 @@ class TestReferrerOnRedirectOriginWhenCrossOrigin(TestReferrerOnRedirect): b'https://scrapytest.org/201', b'https://scrapytest.org/', ), - ( 'https://scrapytest.org/301', + ( + 'https://scrapytest.org/301', 'https://scrapytest.org/302', ( # redirecting to non-secure URL (different domain): send origin @@ -778,7 +796,8 @@ class TestReferrerOnRedirectOriginWhenCrossOrigin(TestReferrerOnRedirect): b'https://scrapytest.org/301', b'https://scrapytest.org/', ), - ( 'http://scrapy.org/401', + ( + 'http://scrapy.org/401', 'http://example.com/402', ( (301, 'http://scrapytest.org/403'), @@ -786,7 +805,8 @@ class TestReferrerOnRedirectOriginWhenCrossOrigin(TestReferrerOnRedirect): b'http://scrapy.org/', b'http://scrapy.org/', ), - ( 'https://scrapy.org/501', + ( + 'https://scrapy.org/501', 'https://example.com/502', ( # all different domains: send origin @@ -796,7 +816,8 @@ class TestReferrerOnRedirectOriginWhenCrossOrigin(TestReferrerOnRedirect): b'https://scrapy.org/', b'https://scrapy.org/', ), - ( 'https://scrapytest.org/301', + ( + 'https://scrapytest.org/301', 'http://scrapytest.org/302', # TLS to non-TLS: send origin ( (301, 'https://scrapytest.org/303'), # TLS URL again: send origin (also) @@ -820,7 +841,8 @@ class TestReferrerOnRedirectStrictOriginWhenCrossOrigin(TestReferrerOnRedirect): """ settings = {'REFERRER_POLICY': POLICY_STRICT_ORIGIN_WHEN_CROSS_ORIGIN} scenarii = [ - ( 'http://scrapytest.org/101', # origin + ( + 'http://scrapytest.org/101', # origin 'http://scrapytest.org/102', # target + redirection ( # redirections: code, URL @@ -830,7 +852,8 @@ class TestReferrerOnRedirectStrictOriginWhenCrossOrigin(TestReferrerOnRedirect): b'http://scrapytest.org/101', # expected initial referer b'http://scrapytest.org/101', # expected referer for the redirection request ), - ( 'https://scrapytest.org/201', + ( + 'https://scrapytest.org/201', 'https://scrapytest.org/202', ( # redirecting to non-secure URL: do not send the "Referer" header @@ -839,7 +862,8 @@ class TestReferrerOnRedirectStrictOriginWhenCrossOrigin(TestReferrerOnRedirect): b'https://scrapytest.org/201', None, ), - ( 'https://scrapytest.org/301', + ( + 'https://scrapytest.org/301', 'https://scrapytest.org/302', ( # redirecting to non-secure URL (different domain): send origin @@ -848,7 +872,8 @@ class TestReferrerOnRedirectStrictOriginWhenCrossOrigin(TestReferrerOnRedirect): b'https://scrapytest.org/301', None, ), - ( 'http://scrapy.org/401', + ( + 'http://scrapy.org/401', 'http://example.com/402', ( (301, 'http://scrapytest.org/403'), @@ -856,7 +881,8 @@ class TestReferrerOnRedirectStrictOriginWhenCrossOrigin(TestReferrerOnRedirect): b'http://scrapy.org/', b'http://scrapy.org/', ), - ( 'https://scrapy.org/501', + ( + 'https://scrapy.org/501', 'https://example.com/502', ( # all different domains: send origin @@ -866,7 +892,8 @@ class TestReferrerOnRedirectStrictOriginWhenCrossOrigin(TestReferrerOnRedirect): b'https://scrapy.org/', b'https://scrapy.org/', ), - ( 'https://scrapytest.org/601', + ( + 'https://scrapytest.org/601', 'http://scrapytest.org/602', # TLS to non-TLS: do not send "Referer" ( (301, 'https://scrapytest.org/603'), # TLS URL again: (still) send nothing diff --git a/tests/test_utils_iterators.py b/tests/test_utils_iterators.py index 33fc4d570..ec8311298 100644 --- a/tests/test_utils_iterators.py +++ b/tests/test_utils_iterators.py @@ -250,10 +250,10 @@ class UtilsCsvTestCase(unittest.TestCase): result = [row for row in csv] self.assertEqual(result, - [{u'id': u'1', u'name': u'alpha', u'value': u'foobar'}, + [{u'id': u'1', u'name': u'alpha', u'value': u'foobar'}, {u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'}, - {u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL}, - {u'id': u'4', u'name': u'empty', u'value': u''}]) + {u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL}, + {u'id': u'4', u'name': u'empty', u'value': u''}]) # explicit type check cuz' we no like stinkin' autocasting! yarrr for result_row in result: @@ -266,10 +266,10 @@ class UtilsCsvTestCase(unittest.TestCase): csv = csviter(response, delimiter='\t') self.assertEqual([row for row in csv], - [{u'id': u'1', u'name': u'alpha', u'value': u'foobar'}, + [{u'id': u'1', u'name': u'alpha', u'value': u'foobar'}, {u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'}, - {u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL}, - {u'id': u'4', u'name': u'empty', u'value': u''}]) + {u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL}, + {u'id': u'4', u'name': u'empty', u'value': u''}]) def test_csviter_quotechar(self): body1 = get_testdata('feeds', 'feed-sample6.csv') @@ -279,19 +279,19 @@ class UtilsCsvTestCase(unittest.TestCase): csv1 = csviter(response1, quotechar="'") self.assertEqual([row for row in csv1], - [{u'id': u'1', u'name': u'alpha', u'value': u'foobar'}, + [{u'id': u'1', u'name': u'alpha', u'value': u'foobar'}, {u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'}, - {u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL}, - {u'id': u'4', u'name': u'empty', u'value': u''}]) + {u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL}, + {u'id': u'4', u'name': u'empty', u'value': u''}]) response2 = TextResponse(url="http://example.com/", body=body2) csv2 = csviter(response2, delimiter="|", quotechar="'") self.assertEqual([row for row in csv2], - [{u'id': u'1', u'name': u'alpha', u'value': u'foobar'}, + [{u'id': u'1', u'name': u'alpha', u'value': u'foobar'}, {u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'}, - {u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL}, - {u'id': u'4', u'name': u'empty', u'value': u''}]) + {u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL}, + {u'id': u'4', u'name': u'empty', u'value': u''}]) def test_csviter_wrong_quotechar(self): body = get_testdata('feeds', 'feed-sample6.csv') @@ -299,10 +299,10 @@ class UtilsCsvTestCase(unittest.TestCase): csv = csviter(response) self.assertEqual([row for row in csv], - [{u"'id'": u"1", u"'name'": u"'alpha'", u"'value'": u"'foobar'"}, - {u"'id'": u"2", u"'name'": u"'unicode'", u"'value'": u"'\xfan\xedc\xf3d\xe9\u203d'"}, - {u"'id'": u"'3'", u"'name'": u"'multi'", u"'value'": u"'foo"}, - {u"'id'": u"4", u"'name'": u"'empty'", u"'value'": u""}]) + [{u"'id'": u"1", u"'name'": u"'alpha'", u"'value'": u"'foobar'"}, + {u"'id'": u"2", u"'name'": u"'unicode'", u"'value'": u"'\xfan\xedc\xf3d\xe9\u203d'"}, + {u"'id'": u"'3'", u"'name'": u"'multi'", u"'value'": u"'foo"}, + {u"'id'": u"4", u"'name'": u"'empty'", u"'value'": u""}]) def test_csviter_delimiter_binary_response_assume_utf8_encoding(self): body = get_testdata('feeds', 'feed-sample3.csv').replace(b',', b'\t') @@ -310,10 +310,10 @@ class UtilsCsvTestCase(unittest.TestCase): csv = csviter(response, delimiter='\t') self.assertEqual([row for row in csv], - [{u'id': u'1', u'name': u'alpha', u'value': u'foobar'}, + [{u'id': u'1', u'name': u'alpha', u'value': u'foobar'}, {u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'}, - {u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL}, - {u'id': u'4', u'name': u'empty', u'value': u''}]) + {u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL}, + {u'id': u'4', u'name': u'empty', u'value': u''}]) def test_csviter_headers(self): sample = get_testdata('feeds', 'feed-sample3.csv').splitlines() @@ -323,10 +323,10 @@ class UtilsCsvTestCase(unittest.TestCase): csv = csviter(response, headers=[h.decode('utf-8') for h in headers]) self.assertEqual([row for row in csv], - [{u'id': u'1', u'name': u'alpha', u'value': u'foobar'}, + [{u'id': u'1', u'name': u'alpha', u'value': u'foobar'}, {u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'}, - {u'id': u'3', u'name': u'multi', u'value': u'foo\nbar'}, - {u'id': u'4', u'name': u'empty', u'value': u''}]) + {u'id': u'3', u'name': u'multi', u'value': u'foo\nbar'}, + {u'id': u'4', u'name': u'empty', u'value': u''}]) def test_csviter_falserow(self): body = get_testdata('feeds', 'feed-sample3.csv') @@ -336,10 +336,10 @@ class UtilsCsvTestCase(unittest.TestCase): csv = csviter(response) self.assertEqual([row for row in csv], - [{u'id': u'1', u'name': u'alpha', u'value': u'foobar'}, + [{u'id': u'1', u'name': u'alpha', u'value': u'foobar'}, {u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'}, - {u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL}, - {u'id': u'4', u'name': u'empty', u'value': u''}]) + {u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL}, + {u'id': u'4', u'name': u'empty', u'value': u''}]) def test_csviter_exception(self): body = get_testdata('feeds', 'feed-sample3.csv') diff --git a/tests/test_utils_url.py b/tests/test_utils_url.py index 7abff8281..72a16e9b1 100644 --- a/tests/test_utils_url.py +++ b/tests/test_utils_url.py @@ -203,29 +203,29 @@ def create_skipped_scheme_t(args): for k, args in enumerate([ - ('/index', 'file://'), - ('/index.html', 'file://'), - ('./index.html', 'file://'), - ('../index.html', 'file://'), - ('../../index.html', 'file://'), - ('./data/index.html', 'file://'), - ('.hidden/data/index.html', 'file://'), - ('/home/user/www/index.html', 'file://'), - ('//home/user/www/index.html', 'file://'), - ('file:///home/user/www/index.html', 'file://'), + ('/index', 'file://'), + ('/index.html', 'file://'), + ('./index.html', 'file://'), + ('../index.html', 'file://'), + ('../../index.html', 'file://'), + ('./data/index.html', 'file://'), + ('.hidden/data/index.html', 'file://'), + ('/home/user/www/index.html', 'file://'), + ('//home/user/www/index.html', 'file://'), + ('file:///home/user/www/index.html', 'file://'), - ('index.html', 'http://'), - ('example.com', 'http://'), - ('www.example.com', 'http://'), - ('www.example.com/index.html', 'http://'), - ('http://example.com', 'http://'), - ('http://example.com/index.html', 'http://'), - ('localhost', 'http://'), - ('localhost/index.html', 'http://'), + ('index.html', 'http://'), + ('example.com', 'http://'), + ('www.example.com', 'http://'), + ('www.example.com/index.html', 'http://'), + ('http://example.com', 'http://'), + ('http://example.com/index.html', 'http://'), + ('localhost', 'http://'), + ('localhost/index.html', 'http://'), # some corner cases (default to http://) - ('/', 'http://'), - ('.../test', 'http://'), + ('/', 'http://'), + ('.../test', 'http://'), ], start=1): t_method = create_guess_scheme_t(args) diff --git a/tests/test_webclient.py b/tests/test_webclient.py index 6253d5c3f..d4abebbfb 100644 --- a/tests/test_webclient.py +++ b/tests/test_webclient.py @@ -53,28 +53,28 @@ class ParseUrlTestCase(unittest.TestCase): def testParse(self): lip = '127.0.0.1' tests = ( - ("http://127.0.0.1?c=v&c2=v2#fragment", ('http', lip, lip, 80, '/?c=v&c2=v2')), - ("http://127.0.0.1/?c=v&c2=v2#fragment", ('http', lip, lip, 80, '/?c=v&c2=v2')), - ("http://127.0.0.1/foo?c=v&c2=v2#frag", ('http', lip, lip, 80, '/foo?c=v&c2=v2')), + ("http://127.0.0.1?c=v&c2=v2#fragment", ('http', lip, lip, 80, '/?c=v&c2=v2')), + ("http://127.0.0.1/?c=v&c2=v2#fragment", ('http', lip, lip, 80, '/?c=v&c2=v2')), + ("http://127.0.0.1/foo?c=v&c2=v2#frag", ('http', lip, lip, 80, '/foo?c=v&c2=v2')), ("http://127.0.0.1:100?c=v&c2=v2#fragment", ('http', lip + ':100', lip, 100, '/?c=v&c2=v2')), - ("http://127.0.0.1:100/?c=v&c2=v2#frag", ('http', lip + ':100', lip, 100, '/?c=v&c2=v2')), + ("http://127.0.0.1:100/?c=v&c2=v2#frag", ('http', lip + ':100', lip, 100, '/?c=v&c2=v2')), ("http://127.0.0.1:100/foo?c=v&c2=v2#frag", ('http', lip + ':100', lip, 100, '/foo?c=v&c2=v2')), - ("http://127.0.0.1", ('http', lip, lip, 80, '/')), - ("http://127.0.0.1/", ('http', lip, lip, 80, '/')), - ("http://127.0.0.1/foo", ('http', lip, lip, 80, '/foo')), - ("http://127.0.0.1?param=value", ('http', lip, lip, 80, '/?param=value')), + ("http://127.0.0.1", ('http', lip, lip, 80, '/')), + ("http://127.0.0.1/", ('http', lip, lip, 80, '/')), + ("http://127.0.0.1/foo", ('http', lip, lip, 80, '/foo')), + ("http://127.0.0.1?param=value", ('http', lip, lip, 80, '/?param=value')), ("http://127.0.0.1/?param=value", ('http', lip, lip, 80, '/?param=value')), - ("http://127.0.0.1:12345/foo", ('http', lip + ':12345', lip, 12345, '/foo')), - ("http://spam:12345/foo", ('http', 'spam:12345', 'spam', 12345, '/foo')), - ("http://spam.test.org/foo", ('http', 'spam.test.org', 'spam.test.org', 80, '/foo')), + ("http://127.0.0.1:12345/foo", ('http', lip + ':12345', lip, 12345, '/foo')), + ("http://spam:12345/foo", ('http', 'spam:12345', 'spam', 12345, '/foo')), + ("http://spam.test.org/foo", ('http', 'spam.test.org', 'spam.test.org', 80, '/foo')), - ("https://127.0.0.1/foo", ('https', lip, lip, 443, '/foo')), + ("https://127.0.0.1/foo", ('https', lip, lip, 443, '/foo')), ("https://127.0.0.1/?param=value", ('https', lip, lip, 443, '/?param=value')), - ("https://127.0.0.1:12345/", ('https', lip + ':12345', lip, 12345, '/')), + ("https://127.0.0.1:12345/", ('https', lip + ':12345', lip, 12345, '/')), - ("http://scrapytest.org/foo ", ('http', 'scrapytest.org', 'scrapytest.org', 80, '/foo')), - ("http://egg:7890 ", ('http', 'egg:7890', 'egg', 7890, '/')), + ("http://scrapytest.org/foo ", ('http', 'scrapytest.org', 'scrapytest.org', 80, '/foo')), + ("http://egg:7890 ", ('http', 'egg:7890', 'egg', 7890, '/')), ) for url, test in tests: