diff --git a/pytest.ini b/pytest.ini
index da0f68e20..de0bccbf1 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -54,7 +54,7 @@ flake8-ignore =
scrapy/core/downloader/__init__.py E501
scrapy/core/downloader/contextfactory.py E501 E128 E126
scrapy/core/downloader/middleware.py E501
- scrapy/core/downloader/tls.py E501 E241
+ scrapy/core/downloader/tls.py E501
scrapy/core/downloader/webclient.py E731 E501 E128 E126
scrapy/core/downloader/handlers/__init__.py E501
scrapy/core/downloader/handlers/ftp.py E501 E128 E127
@@ -97,9 +97,9 @@ flake8-ignore =
scrapy/loader/processors.py E501
# scrapy/pipelines
scrapy/pipelines/__init__.py E501
- scrapy/pipelines/files.py E116 E501 E266
- scrapy/pipelines/images.py E265 E501
- scrapy/pipelines/media.py E125 E501 E266
+ scrapy/pipelines/files.py E116 E501
+ scrapy/pipelines/images.py E501
+ scrapy/pipelines/media.py E125 E501
# scrapy/selector
scrapy/selector/__init__.py F403
scrapy/selector/unified.py E501 E111
@@ -149,7 +149,7 @@ flake8-ignore =
scrapy/__init__.py E402 E501
scrapy/cmdline.py E501
scrapy/crawler.py E501
- scrapy/dupefilters.py E501 E202
+ scrapy/dupefilters.py E501
scrapy/exceptions.py E501
scrapy/exporters.py E501
scrapy/interfaces.py E501
@@ -178,13 +178,13 @@ flake8-ignore =
tests/test_command_shell.py E501 E128
tests/test_commands.py E128 E501
tests/test_contracts.py E501 E128
- tests/test_crawl.py E501 E741 E265
+ tests/test_crawl.py E501 E741
tests/test_crawler.py F841 E501
tests/test_dependencies.py F841 E501
- tests/test_downloader_handlers.py E124 E127 E128 E265 E501 E126 E123
+ tests/test_downloader_handlers.py E124 E127 E128 E501 E126 E123
tests/test_downloadermiddleware.py E501
tests/test_downloadermiddleware_ajaxcrawlable.py E501
- tests/test_downloadermiddleware_cookies.py E731 E741 E501 E128 E265 E126
+ tests/test_downloadermiddleware_cookies.py E731 E741 E501 E128 E126
tests/test_downloadermiddleware_decompression.py E127
tests/test_downloadermiddleware_defaultheaders.py E501
tests/test_downloadermiddleware_downloadtimeout.py E501
@@ -199,15 +199,15 @@ flake8-ignore =
tests/test_engine.py E401 E501 E128
tests/test_exporters.py E501 E731 E128 E124
tests/test_extension_telnet.py F841
- tests/test_feedexport.py E501 F841 E241
+ tests/test_feedexport.py E501 F841
tests/test_http_cookies.py E501
tests/test_http_headers.py E501
tests/test_http_request.py E402 E501 E127 E128 E128 E126 E123
- tests/test_http_response.py E501 E128 E265
+ tests/test_http_response.py E501 E128
tests/test_item.py E128 F841
tests/test_link.py E501
tests/test_linkextractors.py E501 E128 E124
- tests/test_loader.py E501 E731 E741 E128 E117 E241
+ tests/test_loader.py E501 E731 E741 E128 E117
tests/test_logformatter.py E128 E501 E122
tests/test_mail.py E128 E501
tests/test_middleware.py E501 E128
@@ -226,7 +226,7 @@ flake8-ignore =
tests/test_spidermiddleware_httperror.py E128 E501 E127 E121
tests/test_spidermiddleware_offsite.py E501 E128 E111
tests/test_spidermiddleware_output_chain.py E501
- tests/test_spidermiddleware_referer.py E501 F841 E125 E201 E124 E501 E241 E121
+ tests/test_spidermiddleware_referer.py E501 F841 E125 E124 E501 E121
tests/test_squeues.py E501 E741
tests/test_utils_asyncio.py E501
tests/test_utils_conf.py E501 E128
@@ -235,7 +235,7 @@ flake8-ignore =
tests/test_utils_defer.py E501 F841
tests/test_utils_deprecate.py F841 E501
tests/test_utils_http.py E501 E128 W504
- tests/test_utils_iterators.py E501 E128 E129 E241
+ tests/test_utils_iterators.py E501 E128 E129
tests/test_utils_log.py E741
tests/test_utils_python.py E501 E731
tests/test_utils_reqser.py E501 E128
@@ -243,8 +243,8 @@ flake8-ignore =
tests/test_utils_response.py E501
tests/test_utils_signal.py E741 F841 E731
tests/test_utils_sitemap.py E128 E501 E124
- tests/test_utils_url.py E501 E127 E125 E501 E241 E126 E123
- tests/test_webclient.py E501 E128 E122 E402 E241 E123 E126
+ tests/test_utils_url.py E501 E127 E125 E501 E126 E123
+ tests/test_webclient.py E501 E128 E122 E402 E123 E126
tests/test_cmdline/__init__.py E501
tests/test_settings/__init__.py E501 E128
tests/test_spiderloader/__init__.py E128 E501
diff --git a/scrapy/core/downloader/tls.py b/scrapy/core/downloader/tls.py
index a1c881d5e..e43a3c83e 100644
--- a/scrapy/core/downloader/tls.py
+++ b/scrapy/core/downloader/tls.py
@@ -20,8 +20,8 @@ METHOD_TLSv12 = 'TLSv1.2'
openssl_methods = {
- METHOD_TLS: SSL.SSLv23_METHOD, # protocol negotiation (recommended)
- METHOD_SSLv3: SSL.SSLv3_METHOD, # SSL 3 (NOT recommended)
+ METHOD_TLS: SSL.SSLv23_METHOD, # protocol negotiation (recommended)
+ METHOD_SSLv3: SSL.SSLv3_METHOD, # SSL 3 (NOT recommended)
METHOD_TLSv10: SSL.TLSv1_METHOD, # TLS 1.0 only
METHOD_TLSv11: getattr(SSL, 'TLSv1_1_METHOD', 5), # TLS 1.1 only
METHOD_TLSv12: getattr(SSL, 'TLSv1_2_METHOD', 6), # TLS 1.2 only
diff --git a/scrapy/dupefilters.py b/scrapy/dupefilters.py
index d74c8ed36..ac5478e7c 100644
--- a/scrapy/dupefilters.py
+++ b/scrapy/dupefilters.py
@@ -61,7 +61,7 @@ class RFPDupeFilter(BaseDupeFilter):
def log(self, request, spider):
if self.debug:
msg = "Filtered duplicate request: %(request)s (referer: %(referer)s)"
- args = {'request': request, 'referer': referer_str(request) }
+ args = {'request': request, 'referer': referer_str(request)}
self.logger.debug(msg, args, extra={'spider': spider})
elif self.logdupes:
msg = ("Filtered duplicate request: %(request)s"
diff --git a/scrapy/pipelines/files.py b/scrapy/pipelines/files.py
index 101bf5fbc..aab645d3d 100644
--- a/scrapy/pipelines/files.py
+++ b/scrapy/pipelines/files.py
@@ -500,7 +500,7 @@ class FilesPipeline(MediaPipeline):
spider.crawler.stats.inc_value('file_count', spider=spider)
spider.crawler.stats.inc_value('file_status_count/%s' % status, spider=spider)
- ### Overridable Interface
+ # Overridable Interface
def get_media_requests(self, item, info):
return [Request(x) for x in item.get(self.files_urls_field, [])]
diff --git a/scrapy/pipelines/images.py b/scrapy/pipelines/images.py
index 2e646379c..aeb520442 100644
--- a/scrapy/pipelines/images.py
+++ b/scrapy/pipelines/images.py
@@ -14,7 +14,7 @@ from scrapy.utils.python import to_bytes
from scrapy.http import Request
from scrapy.settings import Settings
from scrapy.exceptions import DropItem
-#TODO: from scrapy.pipelines.media import MediaPipeline
+# TODO: from scrapy.pipelines.media import MediaPipeline
from scrapy.pipelines.files import FileException, FilesPipeline
diff --git a/scrapy/pipelines/media.py b/scrapy/pipelines/media.py
index 562d9ee32..a6d99fa99 100644
--- a/scrapy/pipelines/media.py
+++ b/scrapy/pipelines/media.py
@@ -166,7 +166,7 @@ class MediaPipeline:
for wad in info.waiting.pop(fp):
defer_result(result).chainDeferred(wad)
- ### Overridable Interface
+ # Overridable Interface
def media_to_download(self, request, info):
"""Check request before starting download"""
pass
diff --git a/tests/test_crawl.py b/tests/test_crawl.py
index 3f8a7435c..c02e6a70b 100644
--- a/tests/test_crawl.py
+++ b/tests/test_crawl.py
@@ -147,9 +147,9 @@ class CrawlTestCase(TestCase):
settings = {"CONCURRENT_REQUESTS": 1}
crawler = CrawlerRunner(settings).create_crawler(BrokenStartRequestsSpider)
yield crawler.crawl(mockserver=self.mockserver)
- #self.assertTrue(False, crawler.spider.seedsseen)
- #self.assertTrue(crawler.spider.seedsseen.index(None) < crawler.spider.seedsseen.index(99),
- # crawler.spider.seedsseen)
+ self.assertTrue(
+ crawler.spider.seedsseen.index(None) < crawler.spider.seedsseen.index(99),
+ crawler.spider.seedsseen)
@defer.inlineCallbacks
def test_start_requests_dupes(self):
diff --git a/tests/test_downloadermiddleware_cookies.py b/tests/test_downloadermiddleware_cookies.py
index 051f66680..f8e4851fc 100644
--- a/tests/test_downloadermiddleware_cookies.py
+++ b/tests/test_downloadermiddleware_cookies.py
@@ -202,7 +202,7 @@ class CookiesMiddlewareTest(TestCase):
assert self.mw.process_request(req4, self.spider) is None
self.assertCookieValEqual(req4.headers.get('Cookie'), b'C2=value2; galleta=dulce')
- #cookies from hosts with port
+ # cookies from hosts with port
req5_1 = Request('http://scrapytest.org:1104/')
assert self.mw.process_request(req5_1, self.spider) is None
@@ -218,7 +218,7 @@ class CookiesMiddlewareTest(TestCase):
assert self.mw.process_request(req5_3, self.spider) is None
self.assertEqual(req5_3.headers.get('Cookie'), b'C1=value1')
- #skip cookie retrieval for not http request
+ # skip cookie retrieval for not http request
req6 = Request('file:///scrapy/sometempfile')
assert self.mw.process_request(req6, self.spider) is None
self.assertEqual(req6.headers.get('Cookie'), None)
diff --git a/tests/test_http_response.py b/tests/test_http_response.py
index eafc3560e..522ec4875 100644
--- a/tests/test_http_response.py
+++ b/tests/test_http_response.py
@@ -438,8 +438,8 @@ class TextResponseTest(BaseResponseTest):
assert u'value' in r.text, repr(r.text)
# FIXME: This test should pass once we stop using BeautifulSoup's UnicodeDammit in TextResponse
- #r = self.response_class("http://www.example.com", body=b'PREFIX\xe3\xabSUFFIX')
- #assert u'\ufffd' in r.text, repr(r.text)
+ # r = self.response_class("http://www.example.com", body=b'PREFIX\xe3\xabSUFFIX')
+ # assert u'\ufffd' in r.text, repr(r.text)
def test_selector(self):
body = b"
Some page"
diff --git a/tests/test_spidermiddleware_referer.py b/tests/test_spidermiddleware_referer.py
index 4c6ede70b..742adc64f 100644
--- a/tests/test_spidermiddleware_referer.py
+++ b/tests/test_spidermiddleware_referer.py
@@ -24,7 +24,7 @@ class TestRefererMiddleware(TestCase):
resp_headers = {}
settings = {}
scenarii = [
- ('http://scrapytest.org', 'http://scrapytest.org/', b'http://scrapytest.org'),
+ ('http://scrapytest.org', 'http://scrapytest.org/', b'http://scrapytest.org'),
]
def setUp(self):
@@ -54,57 +54,57 @@ class MixinDefault:
with some additional filtering of s3://
"""
scenarii = [
- ('https://example.com/', 'https://scrapy.org/', b'https://example.com/'),
- ('http://example.com/', 'http://scrapy.org/', b'http://example.com/'),
- ('http://example.com/', 'https://scrapy.org/', b'http://example.com/'),
- ('https://example.com/', 'http://scrapy.org/', None),
+ ('https://example.com/', 'https://scrapy.org/', b'https://example.com/'),
+ ('http://example.com/', 'http://scrapy.org/', b'http://example.com/'),
+ ('http://example.com/', 'https://scrapy.org/', b'http://example.com/'),
+ ('https://example.com/', 'http://scrapy.org/', None),
# no credentials leak
- ('http://user:password@example.com/', 'https://scrapy.org/', b'http://example.com/'),
+ ('http://user:password@example.com/', 'https://scrapy.org/', b'http://example.com/'),
# no referrer leak for local schemes
- ('file:///home/path/to/somefile.html', 'https://scrapy.org/', None),
- ('file:///home/path/to/somefile.html', 'http://scrapy.org/', None),
+ ('file:///home/path/to/somefile.html', 'https://scrapy.org/', None),
+ ('file:///home/path/to/somefile.html', 'http://scrapy.org/', None),
# no referrer leak for s3 origins
- ('s3://mybucket/path/to/data.csv', 'https://scrapy.org/', None),
- ('s3://mybucket/path/to/data.csv', 'http://scrapy.org/', None),
+ ('s3://mybucket/path/to/data.csv', 'https://scrapy.org/', None),
+ ('s3://mybucket/path/to/data.csv', 'http://scrapy.org/', None),
]
class MixinNoReferrer:
scenarii = [
- ('https://example.com/page.html', 'https://example.com/', None),
- ('http://www.example.com/', 'https://scrapy.org/', None),
- ('http://www.example.com/', 'http://scrapy.org/', None),
- ('https://www.example.com/', 'http://scrapy.org/', None),
- ('file:///home/path/to/somefile.html', 'http://scrapy.org/', None),
+ ('https://example.com/page.html', 'https://example.com/', None),
+ ('http://www.example.com/', 'https://scrapy.org/', None),
+ ('http://www.example.com/', 'http://scrapy.org/', None),
+ ('https://www.example.com/', 'http://scrapy.org/', None),
+ ('file:///home/path/to/somefile.html', 'http://scrapy.org/', None),
]
class MixinNoReferrerWhenDowngrade:
scenarii = [
# TLS to TLS: send non-empty referrer
- ('https://example.com/page.html', 'https://not.example.com/', b'https://example.com/page.html'),
- ('https://example.com/page.html', 'https://scrapy.org/', b'https://example.com/page.html'),
- ('https://example.com:443/page.html', 'https://scrapy.org/', b'https://example.com/page.html'),
- ('https://example.com:444/page.html', 'https://scrapy.org/', b'https://example.com:444/page.html'),
- ('ftps://example.com/urls.zip', 'https://scrapy.org/', b'ftps://example.com/urls.zip'),
+ ('https://example.com/page.html', 'https://not.example.com/', b'https://example.com/page.html'),
+ ('https://example.com/page.html', 'https://scrapy.org/', b'https://example.com/page.html'),
+ ('https://example.com:443/page.html', 'https://scrapy.org/', b'https://example.com/page.html'),
+ ('https://example.com:444/page.html', 'https://scrapy.org/', b'https://example.com:444/page.html'),
+ ('ftps://example.com/urls.zip', 'https://scrapy.org/', b'ftps://example.com/urls.zip'),
# TLS to non-TLS: do not send referrer
- ('https://example.com/page.html', 'http://not.example.com/', None),
- ('https://example.com/page.html', 'http://scrapy.org/', None),
- ('ftps://example.com/urls.zip', 'http://scrapy.org/', None),
+ ('https://example.com/page.html', 'http://not.example.com/', None),
+ ('https://example.com/page.html', 'http://scrapy.org/', None),
+ ('ftps://example.com/urls.zip', 'http://scrapy.org/', None),
# non-TLS to TLS or non-TLS: send referrer
- ('http://example.com/page.html', 'https://not.example.com/', b'http://example.com/page.html'),
- ('http://example.com/page.html', 'https://scrapy.org/', b'http://example.com/page.html'),
- ('http://example.com:8080/page.html', 'https://scrapy.org/', b'http://example.com:8080/page.html'),
- ('http://example.com:80/page.html', 'http://not.example.com/', b'http://example.com/page.html'),
- ('http://example.com/page.html', 'http://scrapy.org/', b'http://example.com/page.html'),
- ('http://example.com:443/page.html', 'http://scrapy.org/', b'http://example.com:443/page.html'),
- ('ftp://example.com/urls.zip', 'http://scrapy.org/', b'ftp://example.com/urls.zip'),
- ('ftp://example.com/urls.zip', 'https://scrapy.org/', b'ftp://example.com/urls.zip'),
+ ('http://example.com/page.html', 'https://not.example.com/', b'http://example.com/page.html'),
+ ('http://example.com/page.html', 'https://scrapy.org/', b'http://example.com/page.html'),
+ ('http://example.com:8080/page.html', 'https://scrapy.org/', b'http://example.com:8080/page.html'),
+ ('http://example.com:80/page.html', 'http://not.example.com/', b'http://example.com/page.html'),
+ ('http://example.com/page.html', 'http://scrapy.org/', b'http://example.com/page.html'),
+ ('http://example.com:443/page.html', 'http://scrapy.org/', b'http://example.com:443/page.html'),
+ ('ftp://example.com/urls.zip', 'http://scrapy.org/', b'ftp://example.com/urls.zip'),
+ ('ftp://example.com/urls.zip', 'https://scrapy.org/', b'ftp://example.com/urls.zip'),
# test for user/password stripping
('http://user:password@example.com/page.html', 'https://not.example.com/', b'http://example.com/page.html'),
@@ -114,43 +114,43 @@ class MixinNoReferrerWhenDowngrade:
class MixinSameOrigin:
scenarii = [
# Same origin (protocol, host, port): send referrer
- ('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
- ('http://example.com/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
- ('https://example.com:443/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
- ('http://example.com:80/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
- ('http://example.com/page.html', 'http://example.com:80/not-page.html', b'http://example.com/page.html'),
- ('http://example.com:8888/page.html', 'http://example.com:8888/not-page.html', b'http://example.com:8888/page.html'),
+ ('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
+ ('http://example.com/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
+ ('https://example.com:443/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
+ ('http://example.com:80/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
+ ('http://example.com/page.html', 'http://example.com:80/not-page.html', b'http://example.com/page.html'),
+ ('http://example.com:8888/page.html', 'http://example.com:8888/not-page.html', b'http://example.com:8888/page.html'),
# Different host: do NOT send referrer
- ('https://example.com/page.html', 'https://not.example.com/otherpage.html', None),
- ('http://example.com/page.html', 'http://not.example.com/otherpage.html', None),
- ('http://example.com/page.html', 'http://www.example.com/otherpage.html', None),
+ ('https://example.com/page.html', 'https://not.example.com/otherpage.html', None),
+ ('http://example.com/page.html', 'http://not.example.com/otherpage.html', None),
+ ('http://example.com/page.html', 'http://www.example.com/otherpage.html', None),
# Different port: do NOT send referrer
- ('https://example.com:444/page.html', 'https://example.com/not-page.html', None),
- ('http://example.com:81/page.html', 'http://example.com/not-page.html', None),
- ('http://example.com/page.html', 'http://example.com:81/not-page.html', None),
+ ('https://example.com:444/page.html', 'https://example.com/not-page.html', None),
+ ('http://example.com:81/page.html', 'http://example.com/not-page.html', None),
+ ('http://example.com/page.html', 'http://example.com:81/not-page.html', None),
# Different protocols: do NOT send refferer
- ('https://example.com/page.html', 'http://example.com/not-page.html', None),
- ('https://example.com/page.html', 'http://not.example.com/', None),
- ('ftps://example.com/urls.zip', 'https://example.com/not-page.html', None),
- ('ftp://example.com/urls.zip', 'http://example.com/not-page.html', None),
- ('ftps://example.com/urls.zip', 'https://example.com/not-page.html', None),
+ ('https://example.com/page.html', 'http://example.com/not-page.html', None),
+ ('https://example.com/page.html', 'http://not.example.com/', None),
+ ('ftps://example.com/urls.zip', 'https://example.com/not-page.html', None),
+ ('ftp://example.com/urls.zip', 'http://example.com/not-page.html', None),
+ ('ftps://example.com/urls.zip', 'https://example.com/not-page.html', None),
# test for user/password stripping
- ('https://user:password@example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
- ('https://user:password@example.com/page.html', 'http://example.com/not-page.html', None),
+ ('https://user:password@example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
+ ('https://user:password@example.com/page.html', 'http://example.com/not-page.html', None),
]
class MixinOrigin:
scenarii = [
# TLS or non-TLS to TLS or non-TLS: referrer origin is sent (yes, even for downgrades)
- ('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/'),
- ('https://example.com/page.html', 'https://scrapy.org', b'https://example.com/'),
- ('https://example.com/page.html', 'http://scrapy.org', b'https://example.com/'),
- ('http://example.com/page.html', 'http://scrapy.org', b'http://example.com/'),
+ ('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/'),
+ ('https://example.com/page.html', 'https://scrapy.org', b'https://example.com/'),
+ ('https://example.com/page.html', 'http://scrapy.org', b'https://example.com/'),
+ ('http://example.com/page.html', 'http://scrapy.org', b'http://example.com/'),
# test for user/password stripping
('https://user:password@example.com/page.html', 'http://scrapy.org', b'https://example.com/'),
@@ -160,129 +160,129 @@ class MixinOrigin:
class MixinStrictOrigin:
scenarii = [
# TLS or non-TLS to TLS or non-TLS: referrer origin is sent but not for downgrades
- ('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/'),
- ('https://example.com/page.html', 'https://scrapy.org', b'https://example.com/'),
- ('http://example.com/page.html', 'http://scrapy.org', b'http://example.com/'),
+ ('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/'),
+ ('https://example.com/page.html', 'https://scrapy.org', b'https://example.com/'),
+ ('http://example.com/page.html', 'http://scrapy.org', b'http://example.com/'),
# downgrade: send nothing
- ('https://example.com/page.html', 'http://scrapy.org', None),
+ ('https://example.com/page.html', 'http://scrapy.org', None),
# upgrade: send origin
- ('http://example.com/page.html', 'https://scrapy.org', b'http://example.com/'),
+ ('http://example.com/page.html', 'https://scrapy.org', b'http://example.com/'),
# test for user/password stripping
- ('https://user:password@example.com/page.html', 'https://scrapy.org', b'https://example.com/'),
- ('https://user:password@example.com/page.html', 'http://scrapy.org', None),
+ ('https://user:password@example.com/page.html', 'https://scrapy.org', b'https://example.com/'),
+ ('https://user:password@example.com/page.html', 'http://scrapy.org', None),
]
class MixinOriginWhenCrossOrigin:
scenarii = [
# Same origin (protocol, host, port): send referrer
- ('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
- ('http://example.com/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
- ('https://example.com:443/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
- ('http://example.com:80/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
- ('http://example.com/page.html', 'http://example.com:80/not-page.html', b'http://example.com/page.html'),
- ('http://example.com:8888/page.html', 'http://example.com:8888/not-page.html', b'http://example.com:8888/page.html'),
+ ('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
+ ('http://example.com/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
+ ('https://example.com:443/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
+ ('http://example.com:80/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
+ ('http://example.com/page.html', 'http://example.com:80/not-page.html', b'http://example.com/page.html'),
+ ('http://example.com:8888/page.html', 'http://example.com:8888/not-page.html', b'http://example.com:8888/page.html'),
# Different host: send origin as referrer
- ('https://example2.com/page.html', 'https://scrapy.org/otherpage.html', b'https://example2.com/'),
- ('https://example2.com/page.html', 'https://not.example2.com/otherpage.html', b'https://example2.com/'),
- ('http://example2.com/page.html', 'http://not.example2.com/otherpage.html', b'http://example2.com/'),
+ ('https://example2.com/page.html', 'https://scrapy.org/otherpage.html', b'https://example2.com/'),
+ ('https://example2.com/page.html', 'https://not.example2.com/otherpage.html', b'https://example2.com/'),
+ ('http://example2.com/page.html', 'http://not.example2.com/otherpage.html', b'http://example2.com/'),
# exact match required
- ('http://example2.com/page.html', 'http://www.example2.com/otherpage.html', b'http://example2.com/'),
+ ('http://example2.com/page.html', 'http://www.example2.com/otherpage.html', b'http://example2.com/'),
# Different port: send origin as referrer
- ('https://example3.com:444/page.html', 'https://example3.com/not-page.html', b'https://example3.com:444/'),
- ('http://example3.com:81/page.html', 'http://example3.com/not-page.html', b'http://example3.com:81/'),
+ ('https://example3.com:444/page.html', 'https://example3.com/not-page.html', b'https://example3.com:444/'),
+ ('http://example3.com:81/page.html', 'http://example3.com/not-page.html', b'http://example3.com:81/'),
# Different protocols: send origin as referrer
- ('https://example4.com/page.html', 'http://example4.com/not-page.html', b'https://example4.com/'),
- ('https://example4.com/page.html', 'http://not.example4.com/', b'https://example4.com/'),
- ('ftps://example4.com/urls.zip', 'https://example4.com/not-page.html', b'ftps://example4.com/'),
- ('ftp://example4.com/urls.zip', 'http://example4.com/not-page.html', b'ftp://example4.com/'),
- ('ftps://example4.com/urls.zip', 'https://example4.com/not-page.html', b'ftps://example4.com/'),
+ ('https://example4.com/page.html', 'http://example4.com/not-page.html', b'https://example4.com/'),
+ ('https://example4.com/page.html', 'http://not.example4.com/', b'https://example4.com/'),
+ ('ftps://example4.com/urls.zip', 'https://example4.com/not-page.html', b'ftps://example4.com/'),
+ ('ftp://example4.com/urls.zip', 'http://example4.com/not-page.html', b'ftp://example4.com/'),
+ ('ftps://example4.com/urls.zip', 'https://example4.com/not-page.html', b'ftps://example4.com/'),
# test for user/password stripping
- ('https://user:password@example5.com/page.html', 'https://example5.com/not-page.html', b'https://example5.com/page.html'),
+ ('https://user:password@example5.com/page.html', 'https://example5.com/not-page.html', b'https://example5.com/page.html'),
# TLS to non-TLS downgrade: send origin
- ('https://user:password@example5.com/page.html', 'http://example5.com/not-page.html', b'https://example5.com/'),
+ ('https://user:password@example5.com/page.html', 'http://example5.com/not-page.html', b'https://example5.com/'),
]
class MixinStrictOriginWhenCrossOrigin:
scenarii = [
# Same origin (protocol, host, port): send referrer
- ('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
- ('http://example.com/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
- ('https://example.com:443/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
- ('http://example.com:80/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
- ('http://example.com/page.html', 'http://example.com:80/not-page.html', b'http://example.com/page.html'),
- ('http://example.com:8888/page.html', 'http://example.com:8888/not-page.html', b'http://example.com:8888/page.html'),
+ ('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
+ ('http://example.com/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
+ ('https://example.com:443/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
+ ('http://example.com:80/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
+ ('http://example.com/page.html', 'http://example.com:80/not-page.html', b'http://example.com/page.html'),
+ ('http://example.com:8888/page.html', 'http://example.com:8888/not-page.html', b'http://example.com:8888/page.html'),
# Different host: send origin as referrer
- ('https://example2.com/page.html', 'https://scrapy.org/otherpage.html', b'https://example2.com/'),
- ('https://example2.com/page.html', 'https://not.example2.com/otherpage.html', b'https://example2.com/'),
- ('http://example2.com/page.html', 'http://not.example2.com/otherpage.html', b'http://example2.com/'),
+ ('https://example2.com/page.html', 'https://scrapy.org/otherpage.html', b'https://example2.com/'),
+ ('https://example2.com/page.html', 'https://not.example2.com/otherpage.html', b'https://example2.com/'),
+ ('http://example2.com/page.html', 'http://not.example2.com/otherpage.html', b'http://example2.com/'),
# exact match required
- ('http://example2.com/page.html', 'http://www.example2.com/otherpage.html', b'http://example2.com/'),
+ ('http://example2.com/page.html', 'http://www.example2.com/otherpage.html', b'http://example2.com/'),
# Different port: send origin as referrer
- ('https://example3.com:444/page.html', 'https://example3.com/not-page.html', b'https://example3.com:444/'),
- ('http://example3.com:81/page.html', 'http://example3.com/not-page.html', b'http://example3.com:81/'),
+ ('https://example3.com:444/page.html', 'https://example3.com/not-page.html', b'https://example3.com:444/'),
+ ('http://example3.com:81/page.html', 'http://example3.com/not-page.html', b'http://example3.com:81/'),
# downgrade
- ('https://example4.com/page.html', 'http://example4.com/not-page.html', None),
- ('https://example4.com/page.html', 'http://not.example4.com/', None),
+ ('https://example4.com/page.html', 'http://example4.com/not-page.html', None),
+ ('https://example4.com/page.html', 'http://not.example4.com/', None),
# non-TLS to non-TLS
- ('ftp://example4.com/urls.zip', 'http://example4.com/not-page.html', b'ftp://example4.com/'),
+ ('ftp://example4.com/urls.zip', 'http://example4.com/not-page.html', b'ftp://example4.com/'),
# upgrade
- ('http://example4.com/page.html', 'https://example4.com/not-page.html', b'http://example4.com/'),
- ('http://example4.com/page.html', 'https://not.example4.com/', b'http://example4.com/'),
+ ('http://example4.com/page.html', 'https://example4.com/not-page.html', b'http://example4.com/'),
+ ('http://example4.com/page.html', 'https://not.example4.com/', b'http://example4.com/'),
# Different protocols: send origin as referrer
- ('ftps://example4.com/urls.zip', 'https://example4.com/not-page.html', b'ftps://example4.com/'),
- ('ftps://example4.com/urls.zip', 'https://example4.com/not-page.html', b'ftps://example4.com/'),
+ ('ftps://example4.com/urls.zip', 'https://example4.com/not-page.html', b'ftps://example4.com/'),
+ ('ftps://example4.com/urls.zip', 'https://example4.com/not-page.html', b'ftps://example4.com/'),
# test for user/password stripping
- ('https://user:password@example5.com/page.html', 'https://example5.com/not-page.html', b'https://example5.com/page.html'),
+ ('https://user:password@example5.com/page.html', 'https://example5.com/not-page.html', b'https://example5.com/page.html'),
# TLS to non-TLS downgrade: send nothing
- ('https://user:password@example5.com/page.html', 'http://example5.com/not-page.html', None),
+ ('https://user:password@example5.com/page.html', 'http://example5.com/not-page.html', None),
]
class MixinUnsafeUrl:
scenarii = [
# TLS to TLS: send referrer
- ('https://example.com/sekrit.html', 'http://not.example.com/', b'https://example.com/sekrit.html'),
- ('https://example1.com/page.html', 'https://not.example1.com/', b'https://example1.com/page.html'),
- ('https://example1.com/page.html', 'https://scrapy.org/', b'https://example1.com/page.html'),
- ('https://example1.com:443/page.html', 'https://scrapy.org/', b'https://example1.com/page.html'),
- ('https://example1.com:444/page.html', 'https://scrapy.org/', b'https://example1.com:444/page.html'),
- ('ftps://example1.com/urls.zip', 'https://scrapy.org/', b'ftps://example1.com/urls.zip'),
+ ('https://example.com/sekrit.html', 'http://not.example.com/', b'https://example.com/sekrit.html'),
+ ('https://example1.com/page.html', 'https://not.example1.com/', b'https://example1.com/page.html'),
+ ('https://example1.com/page.html', 'https://scrapy.org/', b'https://example1.com/page.html'),
+ ('https://example1.com:443/page.html', 'https://scrapy.org/', b'https://example1.com/page.html'),
+ ('https://example1.com:444/page.html', 'https://scrapy.org/', b'https://example1.com:444/page.html'),
+ ('ftps://example1.com/urls.zip', 'https://scrapy.org/', b'ftps://example1.com/urls.zip'),
# TLS to non-TLS: send referrer (yes, it's unsafe)
- ('https://example2.com/page.html', 'http://not.example2.com/', b'https://example2.com/page.html'),
- ('https://example2.com/page.html', 'http://scrapy.org/', b'https://example2.com/page.html'),
- ('ftps://example2.com/urls.zip', 'http://scrapy.org/', b'ftps://example2.com/urls.zip'),
+ ('https://example2.com/page.html', 'http://not.example2.com/', b'https://example2.com/page.html'),
+ ('https://example2.com/page.html', 'http://scrapy.org/', b'https://example2.com/page.html'),
+ ('ftps://example2.com/urls.zip', 'http://scrapy.org/', b'ftps://example2.com/urls.zip'),
# non-TLS to TLS or non-TLS: send referrer (yes, it's unsafe)
- ('http://example3.com/page.html', 'https://not.example3.com/', b'http://example3.com/page.html'),
- ('http://example3.com/page.html', 'https://scrapy.org/', b'http://example3.com/page.html'),
- ('http://example3.com:8080/page.html', 'https://scrapy.org/', b'http://example3.com:8080/page.html'),
- ('http://example3.com:80/page.html', 'http://not.example3.com/', b'http://example3.com/page.html'),
- ('http://example3.com/page.html', 'http://scrapy.org/', b'http://example3.com/page.html'),
- ('http://example3.com:443/page.html', 'http://scrapy.org/', b'http://example3.com:443/page.html'),
- ('ftp://example3.com/urls.zip', 'http://scrapy.org/', b'ftp://example3.com/urls.zip'),
- ('ftp://example3.com/urls.zip', 'https://scrapy.org/', b'ftp://example3.com/urls.zip'),
+ ('http://example3.com/page.html', 'https://not.example3.com/', b'http://example3.com/page.html'),
+ ('http://example3.com/page.html', 'https://scrapy.org/', b'http://example3.com/page.html'),
+ ('http://example3.com:8080/page.html', 'https://scrapy.org/', b'http://example3.com:8080/page.html'),
+ ('http://example3.com:80/page.html', 'http://not.example3.com/', b'http://example3.com/page.html'),
+ ('http://example3.com/page.html', 'http://scrapy.org/', b'http://example3.com/page.html'),
+ ('http://example3.com:443/page.html', 'http://scrapy.org/', b'http://example3.com:443/page.html'),
+ ('ftp://example3.com/urls.zip', 'http://scrapy.org/', b'ftp://example3.com/urls.zip'),
+ ('ftp://example3.com/urls.zip', 'https://scrapy.org/', b'ftp://example3.com/urls.zip'),
# test for user/password stripping
- ('http://user:password@example4.com/page.html', 'https://not.example4.com/', b'http://example4.com/page.html'),
- ('https://user:password@example4.com/page.html', 'http://scrapy.org/', b'https://example4.com/page.html'),
+ ('http://user:password@example4.com/page.html', 'https://not.example4.com/', b'http://example4.com/page.html'),
+ ('https://user:password@example4.com/page.html', 'http://scrapy.org/', b'https://example4.com/page.html'),
]
@@ -339,12 +339,12 @@ class CustomPythonOrgPolicy(ReferrerPolicy):
class TestSettingsCustomPolicy(TestRefererMiddleware):
settings = {'REFERRER_POLICY': 'tests.test_spidermiddleware_referer.CustomPythonOrgPolicy'}
scenarii = [
- ('https://example.com/', 'https://scrapy.org/', b'https://python.org/'),
- ('http://example.com/', 'http://scrapy.org/', b'http://python.org/'),
- ('http://example.com/', 'https://scrapy.org/', b'https://python.org/'),
- ('https://example.com/', 'http://scrapy.org/', b'http://python.org/'),
- ('file:///home/path/to/somefile.html', 'https://scrapy.org/', b'https://python.org/'),
- ('file:///home/path/to/somefile.html', 'http://scrapy.org/', b'http://python.org/'),
+ ('https://example.com/', 'https://scrapy.org/', b'https://python.org/'),
+ ('http://example.com/', 'http://scrapy.org/', b'http://python.org/'),
+ ('http://example.com/', 'https://scrapy.org/', b'https://python.org/'),
+ ('https://example.com/', 'http://scrapy.org/', b'http://python.org/'),
+ ('file:///home/path/to/somefile.html', 'https://scrapy.org/', b'https://python.org/'),
+ ('file:///home/path/to/somefile.html', 'http://scrapy.org/', b'http://python.org/'),
]
@@ -541,7 +541,8 @@ class TestReferrerOnRedirect(TestRefererMiddleware):
settings = {'REFERRER_POLICY': 'scrapy.spidermiddlewares.referer.UnsafeUrlPolicy'}
scenarii = [
- ( 'http://scrapytest.org/1', # parent
+ (
+ 'http://scrapytest.org/1', # parent
'http://scrapytest.org/2', # target
(
# redirections: code, URL
@@ -551,7 +552,8 @@ class TestReferrerOnRedirect(TestRefererMiddleware):
b'http://scrapytest.org/1', # expected initial referer
b'http://scrapytest.org/1', # expected referer for the redirection request
),
- ( 'https://scrapytest.org/1',
+ (
+ 'https://scrapytest.org/1',
'https://scrapytest.org/2',
(
# redirecting to non-secure URL
@@ -560,7 +562,8 @@ class TestReferrerOnRedirect(TestRefererMiddleware):
b'https://scrapytest.org/1',
b'https://scrapytest.org/1',
),
- ( 'https://scrapytest.org/1',
+ (
+ 'https://scrapytest.org/1',
'https://scrapytest.com/2',
(
# redirecting to non-secure URL: different origin
@@ -602,7 +605,8 @@ class TestReferrerOnRedirectNoReferrer(TestReferrerOnRedirect):
"""
settings = {'REFERRER_POLICY': 'no-referrer'}
scenarii = [
- ( 'http://scrapytest.org/1', # parent
+ (
+ 'http://scrapytest.org/1', # parent
'http://scrapytest.org/2', # target
(
# redirections: code, URL
@@ -612,7 +616,8 @@ class TestReferrerOnRedirectNoReferrer(TestReferrerOnRedirect):
None, # expected initial "Referer"
None, # expected "Referer" for the redirection request
),
- ( 'https://scrapytest.org/1',
+ (
+ 'https://scrapytest.org/1',
'https://scrapytest.org/2',
(
(301, 'http://scrapytest.org/3'),
@@ -620,7 +625,8 @@ class TestReferrerOnRedirectNoReferrer(TestReferrerOnRedirect):
None,
None,
),
- ( 'https://scrapytest.org/1',
+ (
+ 'https://scrapytest.org/1',
'https://example.com/2', # different origin
(
(301, 'http://scrapytest.com/3'),
@@ -641,7 +647,8 @@ class TestReferrerOnRedirectSameOrigin(TestReferrerOnRedirect):
"""
settings = {'REFERRER_POLICY': 'same-origin'}
scenarii = [
- ( 'http://scrapytest.org/101', # origin
+ (
+ 'http://scrapytest.org/101', # origin
'http://scrapytest.org/102', # target
(
# redirections: code, URL
@@ -651,7 +658,8 @@ class TestReferrerOnRedirectSameOrigin(TestReferrerOnRedirect):
b'http://scrapytest.org/101', # expected initial "Referer"
b'http://scrapytest.org/101', # expected referer for the redirection request
),
- ( 'https://scrapytest.org/201',
+ (
+ 'https://scrapytest.org/201',
'https://scrapytest.org/202',
(
# redirecting from secure to non-secure URL == different origin
@@ -660,7 +668,8 @@ class TestReferrerOnRedirectSameOrigin(TestReferrerOnRedirect):
b'https://scrapytest.org/201',
None,
),
- ( 'https://scrapytest.org/301',
+ (
+ 'https://scrapytest.org/301',
'https://scrapytest.org/302',
(
# different domain == different origin
@@ -683,7 +692,8 @@ class TestReferrerOnRedirectStrictOrigin(TestReferrerOnRedirect):
"""
settings = {'REFERRER_POLICY': POLICY_STRICT_ORIGIN}
scenarii = [
- ( 'http://scrapytest.org/101',
+ (
+ 'http://scrapytest.org/101',
'http://scrapytest.org/102',
(
(301, 'http://scrapytest.org/103'),
@@ -692,7 +702,8 @@ class TestReferrerOnRedirectStrictOrigin(TestReferrerOnRedirect):
b'http://scrapytest.org/', # send origin
b'http://scrapytest.org/', # redirects to same origin: send origin
),
- ( 'https://scrapytest.org/201',
+ (
+ 'https://scrapytest.org/201',
'https://scrapytest.org/202',
(
# redirecting to non-secure URL: no referrer
@@ -701,7 +712,8 @@ class TestReferrerOnRedirectStrictOrigin(TestReferrerOnRedirect):
b'https://scrapytest.org/',
None,
),
- ( 'https://scrapytest.org/301',
+ (
+ 'https://scrapytest.org/301',
'https://scrapytest.org/302',
(
# redirecting to non-secure URL (different domain): no referrer
@@ -710,7 +722,8 @@ class TestReferrerOnRedirectStrictOrigin(TestReferrerOnRedirect):
b'https://scrapytest.org/',
None,
),
- ( 'http://scrapy.org/401',
+ (
+ 'http://scrapy.org/401',
'http://example.com/402',
(
(301, 'http://scrapytest.org/403'),
@@ -718,7 +731,8 @@ class TestReferrerOnRedirectStrictOrigin(TestReferrerOnRedirect):
b'http://scrapy.org/',
b'http://scrapy.org/',
),
- ( 'https://scrapy.org/501',
+ (
+ 'https://scrapy.org/501',
'https://example.com/502',
(
# HTTPS all along, so origin referrer is kept as-is
@@ -728,7 +742,8 @@ class TestReferrerOnRedirectStrictOrigin(TestReferrerOnRedirect):
b'https://scrapy.org/',
b'https://scrapy.org/',
),
- ( 'https://scrapytest.org/601',
+ (
+ 'https://scrapytest.org/601',
'http://scrapytest.org/602', # TLS to non-TLS: no referrer
(
(301, 'https://scrapytest.org/603'), # TLS URL again: (still) no referrer
@@ -750,7 +765,8 @@ class TestReferrerOnRedirectOriginWhenCrossOrigin(TestReferrerOnRedirect):
"""
settings = {'REFERRER_POLICY': POLICY_ORIGIN_WHEN_CROSS_ORIGIN}
scenarii = [
- ( 'http://scrapytest.org/101', # origin
+ (
+ 'http://scrapytest.org/101', # origin
'http://scrapytest.org/102', # target + redirection
(
# redirections: code, URL
@@ -760,7 +776,8 @@ class TestReferrerOnRedirectOriginWhenCrossOrigin(TestReferrerOnRedirect):
b'http://scrapytest.org/101', # expected initial referer
b'http://scrapytest.org/101', # expected referer for the redirection request
),
- ( 'https://scrapytest.org/201',
+ (
+ 'https://scrapytest.org/201',
'https://scrapytest.org/202',
(
# redirecting to non-secure URL: send origin
@@ -769,7 +786,8 @@ class TestReferrerOnRedirectOriginWhenCrossOrigin(TestReferrerOnRedirect):
b'https://scrapytest.org/201',
b'https://scrapytest.org/',
),
- ( 'https://scrapytest.org/301',
+ (
+ 'https://scrapytest.org/301',
'https://scrapytest.org/302',
(
# redirecting to non-secure URL (different domain): send origin
@@ -778,7 +796,8 @@ class TestReferrerOnRedirectOriginWhenCrossOrigin(TestReferrerOnRedirect):
b'https://scrapytest.org/301',
b'https://scrapytest.org/',
),
- ( 'http://scrapy.org/401',
+ (
+ 'http://scrapy.org/401',
'http://example.com/402',
(
(301, 'http://scrapytest.org/403'),
@@ -786,7 +805,8 @@ class TestReferrerOnRedirectOriginWhenCrossOrigin(TestReferrerOnRedirect):
b'http://scrapy.org/',
b'http://scrapy.org/',
),
- ( 'https://scrapy.org/501',
+ (
+ 'https://scrapy.org/501',
'https://example.com/502',
(
# all different domains: send origin
@@ -796,7 +816,8 @@ class TestReferrerOnRedirectOriginWhenCrossOrigin(TestReferrerOnRedirect):
b'https://scrapy.org/',
b'https://scrapy.org/',
),
- ( 'https://scrapytest.org/301',
+ (
+ 'https://scrapytest.org/301',
'http://scrapytest.org/302', # TLS to non-TLS: send origin
(
(301, 'https://scrapytest.org/303'), # TLS URL again: send origin (also)
@@ -820,7 +841,8 @@ class TestReferrerOnRedirectStrictOriginWhenCrossOrigin(TestReferrerOnRedirect):
"""
settings = {'REFERRER_POLICY': POLICY_STRICT_ORIGIN_WHEN_CROSS_ORIGIN}
scenarii = [
- ( 'http://scrapytest.org/101', # origin
+ (
+ 'http://scrapytest.org/101', # origin
'http://scrapytest.org/102', # target + redirection
(
# redirections: code, URL
@@ -830,7 +852,8 @@ class TestReferrerOnRedirectStrictOriginWhenCrossOrigin(TestReferrerOnRedirect):
b'http://scrapytest.org/101', # expected initial referer
b'http://scrapytest.org/101', # expected referer for the redirection request
),
- ( 'https://scrapytest.org/201',
+ (
+ 'https://scrapytest.org/201',
'https://scrapytest.org/202',
(
# redirecting to non-secure URL: do not send the "Referer" header
@@ -839,7 +862,8 @@ class TestReferrerOnRedirectStrictOriginWhenCrossOrigin(TestReferrerOnRedirect):
b'https://scrapytest.org/201',
None,
),
- ( 'https://scrapytest.org/301',
+ (
+ 'https://scrapytest.org/301',
'https://scrapytest.org/302',
(
# redirecting to non-secure URL (different domain): send origin
@@ -848,7 +872,8 @@ class TestReferrerOnRedirectStrictOriginWhenCrossOrigin(TestReferrerOnRedirect):
b'https://scrapytest.org/301',
None,
),
- ( 'http://scrapy.org/401',
+ (
+ 'http://scrapy.org/401',
'http://example.com/402',
(
(301, 'http://scrapytest.org/403'),
@@ -856,7 +881,8 @@ class TestReferrerOnRedirectStrictOriginWhenCrossOrigin(TestReferrerOnRedirect):
b'http://scrapy.org/',
b'http://scrapy.org/',
),
- ( 'https://scrapy.org/501',
+ (
+ 'https://scrapy.org/501',
'https://example.com/502',
(
# all different domains: send origin
@@ -866,7 +892,8 @@ class TestReferrerOnRedirectStrictOriginWhenCrossOrigin(TestReferrerOnRedirect):
b'https://scrapy.org/',
b'https://scrapy.org/',
),
- ( 'https://scrapytest.org/601',
+ (
+ 'https://scrapytest.org/601',
'http://scrapytest.org/602', # TLS to non-TLS: do not send "Referer"
(
(301, 'https://scrapytest.org/603'), # TLS URL again: (still) send nothing
diff --git a/tests/test_utils_iterators.py b/tests/test_utils_iterators.py
index 33fc4d570..ec8311298 100644
--- a/tests/test_utils_iterators.py
+++ b/tests/test_utils_iterators.py
@@ -250,10 +250,10 @@ class UtilsCsvTestCase(unittest.TestCase):
result = [row for row in csv]
self.assertEqual(result,
- [{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
+ [{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
{u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
- {u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
- {u'id': u'4', u'name': u'empty', u'value': u''}])
+ {u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
+ {u'id': u'4', u'name': u'empty', u'value': u''}])
# explicit type check cuz' we no like stinkin' autocasting! yarrr
for result_row in result:
@@ -266,10 +266,10 @@ class UtilsCsvTestCase(unittest.TestCase):
csv = csviter(response, delimiter='\t')
self.assertEqual([row for row in csv],
- [{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
+ [{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
{u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
- {u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
- {u'id': u'4', u'name': u'empty', u'value': u''}])
+ {u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
+ {u'id': u'4', u'name': u'empty', u'value': u''}])
def test_csviter_quotechar(self):
body1 = get_testdata('feeds', 'feed-sample6.csv')
@@ -279,19 +279,19 @@ class UtilsCsvTestCase(unittest.TestCase):
csv1 = csviter(response1, quotechar="'")
self.assertEqual([row for row in csv1],
- [{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
+ [{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
{u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
- {u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
- {u'id': u'4', u'name': u'empty', u'value': u''}])
+ {u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
+ {u'id': u'4', u'name': u'empty', u'value': u''}])
response2 = TextResponse(url="http://example.com/", body=body2)
csv2 = csviter(response2, delimiter="|", quotechar="'")
self.assertEqual([row for row in csv2],
- [{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
+ [{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
{u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
- {u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
- {u'id': u'4', u'name': u'empty', u'value': u''}])
+ {u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
+ {u'id': u'4', u'name': u'empty', u'value': u''}])
def test_csviter_wrong_quotechar(self):
body = get_testdata('feeds', 'feed-sample6.csv')
@@ -299,10 +299,10 @@ class UtilsCsvTestCase(unittest.TestCase):
csv = csviter(response)
self.assertEqual([row for row in csv],
- [{u"'id'": u"1", u"'name'": u"'alpha'", u"'value'": u"'foobar'"},
- {u"'id'": u"2", u"'name'": u"'unicode'", u"'value'": u"'\xfan\xedc\xf3d\xe9\u203d'"},
- {u"'id'": u"'3'", u"'name'": u"'multi'", u"'value'": u"'foo"},
- {u"'id'": u"4", u"'name'": u"'empty'", u"'value'": u""}])
+ [{u"'id'": u"1", u"'name'": u"'alpha'", u"'value'": u"'foobar'"},
+ {u"'id'": u"2", u"'name'": u"'unicode'", u"'value'": u"'\xfan\xedc\xf3d\xe9\u203d'"},
+ {u"'id'": u"'3'", u"'name'": u"'multi'", u"'value'": u"'foo"},
+ {u"'id'": u"4", u"'name'": u"'empty'", u"'value'": u""}])
def test_csviter_delimiter_binary_response_assume_utf8_encoding(self):
body = get_testdata('feeds', 'feed-sample3.csv').replace(b',', b'\t')
@@ -310,10 +310,10 @@ class UtilsCsvTestCase(unittest.TestCase):
csv = csviter(response, delimiter='\t')
self.assertEqual([row for row in csv],
- [{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
+ [{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
{u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
- {u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
- {u'id': u'4', u'name': u'empty', u'value': u''}])
+ {u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
+ {u'id': u'4', u'name': u'empty', u'value': u''}])
def test_csviter_headers(self):
sample = get_testdata('feeds', 'feed-sample3.csv').splitlines()
@@ -323,10 +323,10 @@ class UtilsCsvTestCase(unittest.TestCase):
csv = csviter(response, headers=[h.decode('utf-8') for h in headers])
self.assertEqual([row for row in csv],
- [{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
+ [{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
{u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
- {u'id': u'3', u'name': u'multi', u'value': u'foo\nbar'},
- {u'id': u'4', u'name': u'empty', u'value': u''}])
+ {u'id': u'3', u'name': u'multi', u'value': u'foo\nbar'},
+ {u'id': u'4', u'name': u'empty', u'value': u''}])
def test_csviter_falserow(self):
body = get_testdata('feeds', 'feed-sample3.csv')
@@ -336,10 +336,10 @@ class UtilsCsvTestCase(unittest.TestCase):
csv = csviter(response)
self.assertEqual([row for row in csv],
- [{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
+ [{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
{u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
- {u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
- {u'id': u'4', u'name': u'empty', u'value': u''}])
+ {u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
+ {u'id': u'4', u'name': u'empty', u'value': u''}])
def test_csviter_exception(self):
body = get_testdata('feeds', 'feed-sample3.csv')
diff --git a/tests/test_utils_url.py b/tests/test_utils_url.py
index 7abff8281..72a16e9b1 100644
--- a/tests/test_utils_url.py
+++ b/tests/test_utils_url.py
@@ -203,29 +203,29 @@ def create_skipped_scheme_t(args):
for k, args in enumerate([
- ('/index', 'file://'),
- ('/index.html', 'file://'),
- ('./index.html', 'file://'),
- ('../index.html', 'file://'),
- ('../../index.html', 'file://'),
- ('./data/index.html', 'file://'),
- ('.hidden/data/index.html', 'file://'),
- ('/home/user/www/index.html', 'file://'),
- ('//home/user/www/index.html', 'file://'),
- ('file:///home/user/www/index.html', 'file://'),
+ ('/index', 'file://'),
+ ('/index.html', 'file://'),
+ ('./index.html', 'file://'),
+ ('../index.html', 'file://'),
+ ('../../index.html', 'file://'),
+ ('./data/index.html', 'file://'),
+ ('.hidden/data/index.html', 'file://'),
+ ('/home/user/www/index.html', 'file://'),
+ ('//home/user/www/index.html', 'file://'),
+ ('file:///home/user/www/index.html', 'file://'),
- ('index.html', 'http://'),
- ('example.com', 'http://'),
- ('www.example.com', 'http://'),
- ('www.example.com/index.html', 'http://'),
- ('http://example.com', 'http://'),
- ('http://example.com/index.html', 'http://'),
- ('localhost', 'http://'),
- ('localhost/index.html', 'http://'),
+ ('index.html', 'http://'),
+ ('example.com', 'http://'),
+ ('www.example.com', 'http://'),
+ ('www.example.com/index.html', 'http://'),
+ ('http://example.com', 'http://'),
+ ('http://example.com/index.html', 'http://'),
+ ('localhost', 'http://'),
+ ('localhost/index.html', 'http://'),
# some corner cases (default to http://)
- ('/', 'http://'),
- ('.../test', 'http://'),
+ ('/', 'http://'),
+ ('.../test', 'http://'),
], start=1):
t_method = create_guess_scheme_t(args)
diff --git a/tests/test_webclient.py b/tests/test_webclient.py
index 6253d5c3f..d4abebbfb 100644
--- a/tests/test_webclient.py
+++ b/tests/test_webclient.py
@@ -53,28 +53,28 @@ class ParseUrlTestCase(unittest.TestCase):
def testParse(self):
lip = '127.0.0.1'
tests = (
- ("http://127.0.0.1?c=v&c2=v2#fragment", ('http', lip, lip, 80, '/?c=v&c2=v2')),
- ("http://127.0.0.1/?c=v&c2=v2#fragment", ('http', lip, lip, 80, '/?c=v&c2=v2')),
- ("http://127.0.0.1/foo?c=v&c2=v2#frag", ('http', lip, lip, 80, '/foo?c=v&c2=v2')),
+ ("http://127.0.0.1?c=v&c2=v2#fragment", ('http', lip, lip, 80, '/?c=v&c2=v2')),
+ ("http://127.0.0.1/?c=v&c2=v2#fragment", ('http', lip, lip, 80, '/?c=v&c2=v2')),
+ ("http://127.0.0.1/foo?c=v&c2=v2#frag", ('http', lip, lip, 80, '/foo?c=v&c2=v2')),
("http://127.0.0.1:100?c=v&c2=v2#fragment", ('http', lip + ':100', lip, 100, '/?c=v&c2=v2')),
- ("http://127.0.0.1:100/?c=v&c2=v2#frag", ('http', lip + ':100', lip, 100, '/?c=v&c2=v2')),
+ ("http://127.0.0.1:100/?c=v&c2=v2#frag", ('http', lip + ':100', lip, 100, '/?c=v&c2=v2')),
("http://127.0.0.1:100/foo?c=v&c2=v2#frag", ('http', lip + ':100', lip, 100, '/foo?c=v&c2=v2')),
- ("http://127.0.0.1", ('http', lip, lip, 80, '/')),
- ("http://127.0.0.1/", ('http', lip, lip, 80, '/')),
- ("http://127.0.0.1/foo", ('http', lip, lip, 80, '/foo')),
- ("http://127.0.0.1?param=value", ('http', lip, lip, 80, '/?param=value')),
+ ("http://127.0.0.1", ('http', lip, lip, 80, '/')),
+ ("http://127.0.0.1/", ('http', lip, lip, 80, '/')),
+ ("http://127.0.0.1/foo", ('http', lip, lip, 80, '/foo')),
+ ("http://127.0.0.1?param=value", ('http', lip, lip, 80, '/?param=value')),
("http://127.0.0.1/?param=value", ('http', lip, lip, 80, '/?param=value')),
- ("http://127.0.0.1:12345/foo", ('http', lip + ':12345', lip, 12345, '/foo')),
- ("http://spam:12345/foo", ('http', 'spam:12345', 'spam', 12345, '/foo')),
- ("http://spam.test.org/foo", ('http', 'spam.test.org', 'spam.test.org', 80, '/foo')),
+ ("http://127.0.0.1:12345/foo", ('http', lip + ':12345', lip, 12345, '/foo')),
+ ("http://spam:12345/foo", ('http', 'spam:12345', 'spam', 12345, '/foo')),
+ ("http://spam.test.org/foo", ('http', 'spam.test.org', 'spam.test.org', 80, '/foo')),
- ("https://127.0.0.1/foo", ('https', lip, lip, 443, '/foo')),
+ ("https://127.0.0.1/foo", ('https', lip, lip, 443, '/foo')),
("https://127.0.0.1/?param=value", ('https', lip, lip, 443, '/?param=value')),
- ("https://127.0.0.1:12345/", ('https', lip + ':12345', lip, 12345, '/')),
+ ("https://127.0.0.1:12345/", ('https', lip + ':12345', lip, 12345, '/')),
- ("http://scrapytest.org/foo ", ('http', 'scrapytest.org', 'scrapytest.org', 80, '/foo')),
- ("http://egg:7890 ", ('http', 'egg:7890', 'egg', 7890, '/')),
+ ("http://scrapytest.org/foo ", ('http', 'scrapytest.org', 'scrapytest.org', 80, '/foo')),
+ ("http://egg:7890 ", ('http', 'egg:7890', 'egg', 7890, '/')),
)
for url, test in tests: