mirror of
https://github.com/scrapy/scrapy.git
synced 2025-03-13 10:53:37 +00:00
Fix pycodestyle E2XX (whitespace) (#4468)
This commit is contained in:
parent
39b01b6892
commit
94d7ad76cb
30
pytest.ini
30
pytest.ini
@ -54,7 +54,7 @@ flake8-ignore =
|
||||
scrapy/core/downloader/__init__.py E501
|
||||
scrapy/core/downloader/contextfactory.py E501 E128 E126
|
||||
scrapy/core/downloader/middleware.py E501
|
||||
scrapy/core/downloader/tls.py E501 E241
|
||||
scrapy/core/downloader/tls.py E501
|
||||
scrapy/core/downloader/webclient.py E731 E501 E128 E126
|
||||
scrapy/core/downloader/handlers/__init__.py E501
|
||||
scrapy/core/downloader/handlers/ftp.py E501 E128 E127
|
||||
@ -97,9 +97,9 @@ flake8-ignore =
|
||||
scrapy/loader/processors.py E501
|
||||
# scrapy/pipelines
|
||||
scrapy/pipelines/__init__.py E501
|
||||
scrapy/pipelines/files.py E116 E501 E266
|
||||
scrapy/pipelines/images.py E265 E501
|
||||
scrapy/pipelines/media.py E125 E501 E266
|
||||
scrapy/pipelines/files.py E116 E501
|
||||
scrapy/pipelines/images.py E501
|
||||
scrapy/pipelines/media.py E125 E501
|
||||
# scrapy/selector
|
||||
scrapy/selector/__init__.py F403
|
||||
scrapy/selector/unified.py E501 E111
|
||||
@ -149,7 +149,7 @@ flake8-ignore =
|
||||
scrapy/__init__.py E402 E501
|
||||
scrapy/cmdline.py E501
|
||||
scrapy/crawler.py E501
|
||||
scrapy/dupefilters.py E501 E202
|
||||
scrapy/dupefilters.py E501
|
||||
scrapy/exceptions.py E501
|
||||
scrapy/exporters.py E501
|
||||
scrapy/interfaces.py E501
|
||||
@ -178,13 +178,13 @@ flake8-ignore =
|
||||
tests/test_command_shell.py E501 E128
|
||||
tests/test_commands.py E128 E501
|
||||
tests/test_contracts.py E501 E128
|
||||
tests/test_crawl.py E501 E741 E265
|
||||
tests/test_crawl.py E501 E741
|
||||
tests/test_crawler.py F841 E501
|
||||
tests/test_dependencies.py F841 E501
|
||||
tests/test_downloader_handlers.py E124 E127 E128 E265 E501 E126 E123
|
||||
tests/test_downloader_handlers.py E124 E127 E128 E501 E126 E123
|
||||
tests/test_downloadermiddleware.py E501
|
||||
tests/test_downloadermiddleware_ajaxcrawlable.py E501
|
||||
tests/test_downloadermiddleware_cookies.py E731 E741 E501 E128 E265 E126
|
||||
tests/test_downloadermiddleware_cookies.py E731 E741 E501 E128 E126
|
||||
tests/test_downloadermiddleware_decompression.py E127
|
||||
tests/test_downloadermiddleware_defaultheaders.py E501
|
||||
tests/test_downloadermiddleware_downloadtimeout.py E501
|
||||
@ -199,15 +199,15 @@ flake8-ignore =
|
||||
tests/test_engine.py E401 E501 E128
|
||||
tests/test_exporters.py E501 E731 E128 E124
|
||||
tests/test_extension_telnet.py F841
|
||||
tests/test_feedexport.py E501 F841 E241
|
||||
tests/test_feedexport.py E501 F841
|
||||
tests/test_http_cookies.py E501
|
||||
tests/test_http_headers.py E501
|
||||
tests/test_http_request.py E402 E501 E127 E128 E128 E126 E123
|
||||
tests/test_http_response.py E501 E128 E265
|
||||
tests/test_http_response.py E501 E128
|
||||
tests/test_item.py E128 F841
|
||||
tests/test_link.py E501
|
||||
tests/test_linkextractors.py E501 E128 E124
|
||||
tests/test_loader.py E501 E731 E741 E128 E117 E241
|
||||
tests/test_loader.py E501 E731 E741 E128 E117
|
||||
tests/test_logformatter.py E128 E501 E122
|
||||
tests/test_mail.py E128 E501
|
||||
tests/test_middleware.py E501 E128
|
||||
@ -226,7 +226,7 @@ flake8-ignore =
|
||||
tests/test_spidermiddleware_httperror.py E128 E501 E127 E121
|
||||
tests/test_spidermiddleware_offsite.py E501 E128 E111
|
||||
tests/test_spidermiddleware_output_chain.py E501
|
||||
tests/test_spidermiddleware_referer.py E501 F841 E125 E201 E124 E501 E241 E121
|
||||
tests/test_spidermiddleware_referer.py E501 F841 E125 E124 E501 E121
|
||||
tests/test_squeues.py E501 E741
|
||||
tests/test_utils_asyncio.py E501
|
||||
tests/test_utils_conf.py E501 E128
|
||||
@ -235,7 +235,7 @@ flake8-ignore =
|
||||
tests/test_utils_defer.py E501 F841
|
||||
tests/test_utils_deprecate.py F841 E501
|
||||
tests/test_utils_http.py E501 E128 W504
|
||||
tests/test_utils_iterators.py E501 E128 E129 E241
|
||||
tests/test_utils_iterators.py E501 E128 E129
|
||||
tests/test_utils_log.py E741
|
||||
tests/test_utils_python.py E501 E731
|
||||
tests/test_utils_reqser.py E501 E128
|
||||
@ -243,8 +243,8 @@ flake8-ignore =
|
||||
tests/test_utils_response.py E501
|
||||
tests/test_utils_signal.py E741 F841 E731
|
||||
tests/test_utils_sitemap.py E128 E501 E124
|
||||
tests/test_utils_url.py E501 E127 E125 E501 E241 E126 E123
|
||||
tests/test_webclient.py E501 E128 E122 E402 E241 E123 E126
|
||||
tests/test_utils_url.py E501 E127 E125 E501 E126 E123
|
||||
tests/test_webclient.py E501 E128 E122 E402 E123 E126
|
||||
tests/test_cmdline/__init__.py E501
|
||||
tests/test_settings/__init__.py E501 E128
|
||||
tests/test_spiderloader/__init__.py E128 E501
|
||||
|
@ -20,8 +20,8 @@ METHOD_TLSv12 = 'TLSv1.2'
|
||||
|
||||
|
||||
openssl_methods = {
|
||||
METHOD_TLS: SSL.SSLv23_METHOD, # protocol negotiation (recommended)
|
||||
METHOD_SSLv3: SSL.SSLv3_METHOD, # SSL 3 (NOT recommended)
|
||||
METHOD_TLS: SSL.SSLv23_METHOD, # protocol negotiation (recommended)
|
||||
METHOD_SSLv3: SSL.SSLv3_METHOD, # SSL 3 (NOT recommended)
|
||||
METHOD_TLSv10: SSL.TLSv1_METHOD, # TLS 1.0 only
|
||||
METHOD_TLSv11: getattr(SSL, 'TLSv1_1_METHOD', 5), # TLS 1.1 only
|
||||
METHOD_TLSv12: getattr(SSL, 'TLSv1_2_METHOD', 6), # TLS 1.2 only
|
||||
|
@ -61,7 +61,7 @@ class RFPDupeFilter(BaseDupeFilter):
|
||||
def log(self, request, spider):
|
||||
if self.debug:
|
||||
msg = "Filtered duplicate request: %(request)s (referer: %(referer)s)"
|
||||
args = {'request': request, 'referer': referer_str(request) }
|
||||
args = {'request': request, 'referer': referer_str(request)}
|
||||
self.logger.debug(msg, args, extra={'spider': spider})
|
||||
elif self.logdupes:
|
||||
msg = ("Filtered duplicate request: %(request)s"
|
||||
|
@ -500,7 +500,7 @@ class FilesPipeline(MediaPipeline):
|
||||
spider.crawler.stats.inc_value('file_count', spider=spider)
|
||||
spider.crawler.stats.inc_value('file_status_count/%s' % status, spider=spider)
|
||||
|
||||
### Overridable Interface
|
||||
# Overridable Interface
|
||||
def get_media_requests(self, item, info):
|
||||
return [Request(x) for x in item.get(self.files_urls_field, [])]
|
||||
|
||||
|
@ -14,7 +14,7 @@ from scrapy.utils.python import to_bytes
|
||||
from scrapy.http import Request
|
||||
from scrapy.settings import Settings
|
||||
from scrapy.exceptions import DropItem
|
||||
#TODO: from scrapy.pipelines.media import MediaPipeline
|
||||
# TODO: from scrapy.pipelines.media import MediaPipeline
|
||||
from scrapy.pipelines.files import FileException, FilesPipeline
|
||||
|
||||
|
||||
|
@ -166,7 +166,7 @@ class MediaPipeline:
|
||||
for wad in info.waiting.pop(fp):
|
||||
defer_result(result).chainDeferred(wad)
|
||||
|
||||
### Overridable Interface
|
||||
# Overridable Interface
|
||||
def media_to_download(self, request, info):
|
||||
"""Check request before starting download"""
|
||||
pass
|
||||
|
@ -147,9 +147,9 @@ class CrawlTestCase(TestCase):
|
||||
settings = {"CONCURRENT_REQUESTS": 1}
|
||||
crawler = CrawlerRunner(settings).create_crawler(BrokenStartRequestsSpider)
|
||||
yield crawler.crawl(mockserver=self.mockserver)
|
||||
#self.assertTrue(False, crawler.spider.seedsseen)
|
||||
#self.assertTrue(crawler.spider.seedsseen.index(None) < crawler.spider.seedsseen.index(99),
|
||||
# crawler.spider.seedsseen)
|
||||
self.assertTrue(
|
||||
crawler.spider.seedsseen.index(None) < crawler.spider.seedsseen.index(99),
|
||||
crawler.spider.seedsseen)
|
||||
|
||||
@defer.inlineCallbacks
|
||||
def test_start_requests_dupes(self):
|
||||
|
@ -202,7 +202,7 @@ class CookiesMiddlewareTest(TestCase):
|
||||
assert self.mw.process_request(req4, self.spider) is None
|
||||
self.assertCookieValEqual(req4.headers.get('Cookie'), b'C2=value2; galleta=dulce')
|
||||
|
||||
#cookies from hosts with port
|
||||
# cookies from hosts with port
|
||||
req5_1 = Request('http://scrapytest.org:1104/')
|
||||
assert self.mw.process_request(req5_1, self.spider) is None
|
||||
|
||||
@ -218,7 +218,7 @@ class CookiesMiddlewareTest(TestCase):
|
||||
assert self.mw.process_request(req5_3, self.spider) is None
|
||||
self.assertEqual(req5_3.headers.get('Cookie'), b'C1=value1')
|
||||
|
||||
#skip cookie retrieval for not http request
|
||||
# skip cookie retrieval for not http request
|
||||
req6 = Request('file:///scrapy/sometempfile')
|
||||
assert self.mw.process_request(req6, self.spider) is None
|
||||
self.assertEqual(req6.headers.get('Cookie'), None)
|
||||
|
@ -438,8 +438,8 @@ class TextResponseTest(BaseResponseTest):
|
||||
assert u'<span>value</span>' in r.text, repr(r.text)
|
||||
|
||||
# FIXME: This test should pass once we stop using BeautifulSoup's UnicodeDammit in TextResponse
|
||||
#r = self.response_class("http://www.example.com", body=b'PREFIX\xe3\xabSUFFIX')
|
||||
#assert u'\ufffd' in r.text, repr(r.text)
|
||||
# r = self.response_class("http://www.example.com", body=b'PREFIX\xe3\xabSUFFIX')
|
||||
# assert u'\ufffd' in r.text, repr(r.text)
|
||||
|
||||
def test_selector(self):
|
||||
body = b"<html><head><title>Some page</title><body></body></html>"
|
||||
|
@ -24,7 +24,7 @@ class TestRefererMiddleware(TestCase):
|
||||
resp_headers = {}
|
||||
settings = {}
|
||||
scenarii = [
|
||||
('http://scrapytest.org', 'http://scrapytest.org/', b'http://scrapytest.org'),
|
||||
('http://scrapytest.org', 'http://scrapytest.org/', b'http://scrapytest.org'),
|
||||
]
|
||||
|
||||
def setUp(self):
|
||||
@ -54,57 +54,57 @@ class MixinDefault:
|
||||
with some additional filtering of s3://
|
||||
"""
|
||||
scenarii = [
|
||||
('https://example.com/', 'https://scrapy.org/', b'https://example.com/'),
|
||||
('http://example.com/', 'http://scrapy.org/', b'http://example.com/'),
|
||||
('http://example.com/', 'https://scrapy.org/', b'http://example.com/'),
|
||||
('https://example.com/', 'http://scrapy.org/', None),
|
||||
('https://example.com/', 'https://scrapy.org/', b'https://example.com/'),
|
||||
('http://example.com/', 'http://scrapy.org/', b'http://example.com/'),
|
||||
('http://example.com/', 'https://scrapy.org/', b'http://example.com/'),
|
||||
('https://example.com/', 'http://scrapy.org/', None),
|
||||
|
||||
# no credentials leak
|
||||
('http://user:password@example.com/', 'https://scrapy.org/', b'http://example.com/'),
|
||||
('http://user:password@example.com/', 'https://scrapy.org/', b'http://example.com/'),
|
||||
|
||||
# no referrer leak for local schemes
|
||||
('file:///home/path/to/somefile.html', 'https://scrapy.org/', None),
|
||||
('file:///home/path/to/somefile.html', 'http://scrapy.org/', None),
|
||||
('file:///home/path/to/somefile.html', 'https://scrapy.org/', None),
|
||||
('file:///home/path/to/somefile.html', 'http://scrapy.org/', None),
|
||||
|
||||
# no referrer leak for s3 origins
|
||||
('s3://mybucket/path/to/data.csv', 'https://scrapy.org/', None),
|
||||
('s3://mybucket/path/to/data.csv', 'http://scrapy.org/', None),
|
||||
('s3://mybucket/path/to/data.csv', 'https://scrapy.org/', None),
|
||||
('s3://mybucket/path/to/data.csv', 'http://scrapy.org/', None),
|
||||
]
|
||||
|
||||
|
||||
class MixinNoReferrer:
|
||||
scenarii = [
|
||||
('https://example.com/page.html', 'https://example.com/', None),
|
||||
('http://www.example.com/', 'https://scrapy.org/', None),
|
||||
('http://www.example.com/', 'http://scrapy.org/', None),
|
||||
('https://www.example.com/', 'http://scrapy.org/', None),
|
||||
('file:///home/path/to/somefile.html', 'http://scrapy.org/', None),
|
||||
('https://example.com/page.html', 'https://example.com/', None),
|
||||
('http://www.example.com/', 'https://scrapy.org/', None),
|
||||
('http://www.example.com/', 'http://scrapy.org/', None),
|
||||
('https://www.example.com/', 'http://scrapy.org/', None),
|
||||
('file:///home/path/to/somefile.html', 'http://scrapy.org/', None),
|
||||
]
|
||||
|
||||
|
||||
class MixinNoReferrerWhenDowngrade:
|
||||
scenarii = [
|
||||
# TLS to TLS: send non-empty referrer
|
||||
('https://example.com/page.html', 'https://not.example.com/', b'https://example.com/page.html'),
|
||||
('https://example.com/page.html', 'https://scrapy.org/', b'https://example.com/page.html'),
|
||||
('https://example.com:443/page.html', 'https://scrapy.org/', b'https://example.com/page.html'),
|
||||
('https://example.com:444/page.html', 'https://scrapy.org/', b'https://example.com:444/page.html'),
|
||||
('ftps://example.com/urls.zip', 'https://scrapy.org/', b'ftps://example.com/urls.zip'),
|
||||
('https://example.com/page.html', 'https://not.example.com/', b'https://example.com/page.html'),
|
||||
('https://example.com/page.html', 'https://scrapy.org/', b'https://example.com/page.html'),
|
||||
('https://example.com:443/page.html', 'https://scrapy.org/', b'https://example.com/page.html'),
|
||||
('https://example.com:444/page.html', 'https://scrapy.org/', b'https://example.com:444/page.html'),
|
||||
('ftps://example.com/urls.zip', 'https://scrapy.org/', b'ftps://example.com/urls.zip'),
|
||||
|
||||
# TLS to non-TLS: do not send referrer
|
||||
('https://example.com/page.html', 'http://not.example.com/', None),
|
||||
('https://example.com/page.html', 'http://scrapy.org/', None),
|
||||
('ftps://example.com/urls.zip', 'http://scrapy.org/', None),
|
||||
('https://example.com/page.html', 'http://not.example.com/', None),
|
||||
('https://example.com/page.html', 'http://scrapy.org/', None),
|
||||
('ftps://example.com/urls.zip', 'http://scrapy.org/', None),
|
||||
|
||||
# non-TLS to TLS or non-TLS: send referrer
|
||||
('http://example.com/page.html', 'https://not.example.com/', b'http://example.com/page.html'),
|
||||
('http://example.com/page.html', 'https://scrapy.org/', b'http://example.com/page.html'),
|
||||
('http://example.com:8080/page.html', 'https://scrapy.org/', b'http://example.com:8080/page.html'),
|
||||
('http://example.com:80/page.html', 'http://not.example.com/', b'http://example.com/page.html'),
|
||||
('http://example.com/page.html', 'http://scrapy.org/', b'http://example.com/page.html'),
|
||||
('http://example.com:443/page.html', 'http://scrapy.org/', b'http://example.com:443/page.html'),
|
||||
('ftp://example.com/urls.zip', 'http://scrapy.org/', b'ftp://example.com/urls.zip'),
|
||||
('ftp://example.com/urls.zip', 'https://scrapy.org/', b'ftp://example.com/urls.zip'),
|
||||
('http://example.com/page.html', 'https://not.example.com/', b'http://example.com/page.html'),
|
||||
('http://example.com/page.html', 'https://scrapy.org/', b'http://example.com/page.html'),
|
||||
('http://example.com:8080/page.html', 'https://scrapy.org/', b'http://example.com:8080/page.html'),
|
||||
('http://example.com:80/page.html', 'http://not.example.com/', b'http://example.com/page.html'),
|
||||
('http://example.com/page.html', 'http://scrapy.org/', b'http://example.com/page.html'),
|
||||
('http://example.com:443/page.html', 'http://scrapy.org/', b'http://example.com:443/page.html'),
|
||||
('ftp://example.com/urls.zip', 'http://scrapy.org/', b'ftp://example.com/urls.zip'),
|
||||
('ftp://example.com/urls.zip', 'https://scrapy.org/', b'ftp://example.com/urls.zip'),
|
||||
|
||||
# test for user/password stripping
|
||||
('http://user:password@example.com/page.html', 'https://not.example.com/', b'http://example.com/page.html'),
|
||||
@ -114,43 +114,43 @@ class MixinNoReferrerWhenDowngrade:
|
||||
class MixinSameOrigin:
|
||||
scenarii = [
|
||||
# Same origin (protocol, host, port): send referrer
|
||||
('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
|
||||
('http://example.com/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
|
||||
('https://example.com:443/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
|
||||
('http://example.com:80/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
|
||||
('http://example.com/page.html', 'http://example.com:80/not-page.html', b'http://example.com/page.html'),
|
||||
('http://example.com:8888/page.html', 'http://example.com:8888/not-page.html', b'http://example.com:8888/page.html'),
|
||||
('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
|
||||
('http://example.com/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
|
||||
('https://example.com:443/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
|
||||
('http://example.com:80/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
|
||||
('http://example.com/page.html', 'http://example.com:80/not-page.html', b'http://example.com/page.html'),
|
||||
('http://example.com:8888/page.html', 'http://example.com:8888/not-page.html', b'http://example.com:8888/page.html'),
|
||||
|
||||
# Different host: do NOT send referrer
|
||||
('https://example.com/page.html', 'https://not.example.com/otherpage.html', None),
|
||||
('http://example.com/page.html', 'http://not.example.com/otherpage.html', None),
|
||||
('http://example.com/page.html', 'http://www.example.com/otherpage.html', None),
|
||||
('https://example.com/page.html', 'https://not.example.com/otherpage.html', None),
|
||||
('http://example.com/page.html', 'http://not.example.com/otherpage.html', None),
|
||||
('http://example.com/page.html', 'http://www.example.com/otherpage.html', None),
|
||||
|
||||
# Different port: do NOT send referrer
|
||||
('https://example.com:444/page.html', 'https://example.com/not-page.html', None),
|
||||
('http://example.com:81/page.html', 'http://example.com/not-page.html', None),
|
||||
('http://example.com/page.html', 'http://example.com:81/not-page.html', None),
|
||||
('https://example.com:444/page.html', 'https://example.com/not-page.html', None),
|
||||
('http://example.com:81/page.html', 'http://example.com/not-page.html', None),
|
||||
('http://example.com/page.html', 'http://example.com:81/not-page.html', None),
|
||||
|
||||
# Different protocols: do NOT send refferer
|
||||
('https://example.com/page.html', 'http://example.com/not-page.html', None),
|
||||
('https://example.com/page.html', 'http://not.example.com/', None),
|
||||
('ftps://example.com/urls.zip', 'https://example.com/not-page.html', None),
|
||||
('ftp://example.com/urls.zip', 'http://example.com/not-page.html', None),
|
||||
('ftps://example.com/urls.zip', 'https://example.com/not-page.html', None),
|
||||
('https://example.com/page.html', 'http://example.com/not-page.html', None),
|
||||
('https://example.com/page.html', 'http://not.example.com/', None),
|
||||
('ftps://example.com/urls.zip', 'https://example.com/not-page.html', None),
|
||||
('ftp://example.com/urls.zip', 'http://example.com/not-page.html', None),
|
||||
('ftps://example.com/urls.zip', 'https://example.com/not-page.html', None),
|
||||
|
||||
# test for user/password stripping
|
||||
('https://user:password@example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
|
||||
('https://user:password@example.com/page.html', 'http://example.com/not-page.html', None),
|
||||
('https://user:password@example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
|
||||
('https://user:password@example.com/page.html', 'http://example.com/not-page.html', None),
|
||||
]
|
||||
|
||||
|
||||
class MixinOrigin:
|
||||
scenarii = [
|
||||
# TLS or non-TLS to TLS or non-TLS: referrer origin is sent (yes, even for downgrades)
|
||||
('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/'),
|
||||
('https://example.com/page.html', 'https://scrapy.org', b'https://example.com/'),
|
||||
('https://example.com/page.html', 'http://scrapy.org', b'https://example.com/'),
|
||||
('http://example.com/page.html', 'http://scrapy.org', b'http://example.com/'),
|
||||
('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/'),
|
||||
('https://example.com/page.html', 'https://scrapy.org', b'https://example.com/'),
|
||||
('https://example.com/page.html', 'http://scrapy.org', b'https://example.com/'),
|
||||
('http://example.com/page.html', 'http://scrapy.org', b'http://example.com/'),
|
||||
|
||||
# test for user/password stripping
|
||||
('https://user:password@example.com/page.html', 'http://scrapy.org', b'https://example.com/'),
|
||||
@ -160,129 +160,129 @@ class MixinOrigin:
|
||||
class MixinStrictOrigin:
|
||||
scenarii = [
|
||||
# TLS or non-TLS to TLS or non-TLS: referrer origin is sent but not for downgrades
|
||||
('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/'),
|
||||
('https://example.com/page.html', 'https://scrapy.org', b'https://example.com/'),
|
||||
('http://example.com/page.html', 'http://scrapy.org', b'http://example.com/'),
|
||||
('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/'),
|
||||
('https://example.com/page.html', 'https://scrapy.org', b'https://example.com/'),
|
||||
('http://example.com/page.html', 'http://scrapy.org', b'http://example.com/'),
|
||||
|
||||
# downgrade: send nothing
|
||||
('https://example.com/page.html', 'http://scrapy.org', None),
|
||||
('https://example.com/page.html', 'http://scrapy.org', None),
|
||||
|
||||
# upgrade: send origin
|
||||
('http://example.com/page.html', 'https://scrapy.org', b'http://example.com/'),
|
||||
('http://example.com/page.html', 'https://scrapy.org', b'http://example.com/'),
|
||||
|
||||
# test for user/password stripping
|
||||
('https://user:password@example.com/page.html', 'https://scrapy.org', b'https://example.com/'),
|
||||
('https://user:password@example.com/page.html', 'http://scrapy.org', None),
|
||||
('https://user:password@example.com/page.html', 'https://scrapy.org', b'https://example.com/'),
|
||||
('https://user:password@example.com/page.html', 'http://scrapy.org', None),
|
||||
]
|
||||
|
||||
|
||||
class MixinOriginWhenCrossOrigin:
|
||||
scenarii = [
|
||||
# Same origin (protocol, host, port): send referrer
|
||||
('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
|
||||
('http://example.com/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
|
||||
('https://example.com:443/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
|
||||
('http://example.com:80/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
|
||||
('http://example.com/page.html', 'http://example.com:80/not-page.html', b'http://example.com/page.html'),
|
||||
('http://example.com:8888/page.html', 'http://example.com:8888/not-page.html', b'http://example.com:8888/page.html'),
|
||||
('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
|
||||
('http://example.com/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
|
||||
('https://example.com:443/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
|
||||
('http://example.com:80/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
|
||||
('http://example.com/page.html', 'http://example.com:80/not-page.html', b'http://example.com/page.html'),
|
||||
('http://example.com:8888/page.html', 'http://example.com:8888/not-page.html', b'http://example.com:8888/page.html'),
|
||||
|
||||
# Different host: send origin as referrer
|
||||
('https://example2.com/page.html', 'https://scrapy.org/otherpage.html', b'https://example2.com/'),
|
||||
('https://example2.com/page.html', 'https://not.example2.com/otherpage.html', b'https://example2.com/'),
|
||||
('http://example2.com/page.html', 'http://not.example2.com/otherpage.html', b'http://example2.com/'),
|
||||
('https://example2.com/page.html', 'https://scrapy.org/otherpage.html', b'https://example2.com/'),
|
||||
('https://example2.com/page.html', 'https://not.example2.com/otherpage.html', b'https://example2.com/'),
|
||||
('http://example2.com/page.html', 'http://not.example2.com/otherpage.html', b'http://example2.com/'),
|
||||
# exact match required
|
||||
('http://example2.com/page.html', 'http://www.example2.com/otherpage.html', b'http://example2.com/'),
|
||||
('http://example2.com/page.html', 'http://www.example2.com/otherpage.html', b'http://example2.com/'),
|
||||
|
||||
# Different port: send origin as referrer
|
||||
('https://example3.com:444/page.html', 'https://example3.com/not-page.html', b'https://example3.com:444/'),
|
||||
('http://example3.com:81/page.html', 'http://example3.com/not-page.html', b'http://example3.com:81/'),
|
||||
('https://example3.com:444/page.html', 'https://example3.com/not-page.html', b'https://example3.com:444/'),
|
||||
('http://example3.com:81/page.html', 'http://example3.com/not-page.html', b'http://example3.com:81/'),
|
||||
|
||||
# Different protocols: send origin as referrer
|
||||
('https://example4.com/page.html', 'http://example4.com/not-page.html', b'https://example4.com/'),
|
||||
('https://example4.com/page.html', 'http://not.example4.com/', b'https://example4.com/'),
|
||||
('ftps://example4.com/urls.zip', 'https://example4.com/not-page.html', b'ftps://example4.com/'),
|
||||
('ftp://example4.com/urls.zip', 'http://example4.com/not-page.html', b'ftp://example4.com/'),
|
||||
('ftps://example4.com/urls.zip', 'https://example4.com/not-page.html', b'ftps://example4.com/'),
|
||||
('https://example4.com/page.html', 'http://example4.com/not-page.html', b'https://example4.com/'),
|
||||
('https://example4.com/page.html', 'http://not.example4.com/', b'https://example4.com/'),
|
||||
('ftps://example4.com/urls.zip', 'https://example4.com/not-page.html', b'ftps://example4.com/'),
|
||||
('ftp://example4.com/urls.zip', 'http://example4.com/not-page.html', b'ftp://example4.com/'),
|
||||
('ftps://example4.com/urls.zip', 'https://example4.com/not-page.html', b'ftps://example4.com/'),
|
||||
|
||||
# test for user/password stripping
|
||||
('https://user:password@example5.com/page.html', 'https://example5.com/not-page.html', b'https://example5.com/page.html'),
|
||||
('https://user:password@example5.com/page.html', 'https://example5.com/not-page.html', b'https://example5.com/page.html'),
|
||||
# TLS to non-TLS downgrade: send origin
|
||||
('https://user:password@example5.com/page.html', 'http://example5.com/not-page.html', b'https://example5.com/'),
|
||||
('https://user:password@example5.com/page.html', 'http://example5.com/not-page.html', b'https://example5.com/'),
|
||||
]
|
||||
|
||||
|
||||
class MixinStrictOriginWhenCrossOrigin:
|
||||
scenarii = [
|
||||
# Same origin (protocol, host, port): send referrer
|
||||
('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
|
||||
('http://example.com/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
|
||||
('https://example.com:443/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
|
||||
('http://example.com:80/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
|
||||
('http://example.com/page.html', 'http://example.com:80/not-page.html', b'http://example.com/page.html'),
|
||||
('http://example.com:8888/page.html', 'http://example.com:8888/not-page.html', b'http://example.com:8888/page.html'),
|
||||
('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
|
||||
('http://example.com/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
|
||||
('https://example.com:443/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
|
||||
('http://example.com:80/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
|
||||
('http://example.com/page.html', 'http://example.com:80/not-page.html', b'http://example.com/page.html'),
|
||||
('http://example.com:8888/page.html', 'http://example.com:8888/not-page.html', b'http://example.com:8888/page.html'),
|
||||
|
||||
# Different host: send origin as referrer
|
||||
('https://example2.com/page.html', 'https://scrapy.org/otherpage.html', b'https://example2.com/'),
|
||||
('https://example2.com/page.html', 'https://not.example2.com/otherpage.html', b'https://example2.com/'),
|
||||
('http://example2.com/page.html', 'http://not.example2.com/otherpage.html', b'http://example2.com/'),
|
||||
('https://example2.com/page.html', 'https://scrapy.org/otherpage.html', b'https://example2.com/'),
|
||||
('https://example2.com/page.html', 'https://not.example2.com/otherpage.html', b'https://example2.com/'),
|
||||
('http://example2.com/page.html', 'http://not.example2.com/otherpage.html', b'http://example2.com/'),
|
||||
# exact match required
|
||||
('http://example2.com/page.html', 'http://www.example2.com/otherpage.html', b'http://example2.com/'),
|
||||
('http://example2.com/page.html', 'http://www.example2.com/otherpage.html', b'http://example2.com/'),
|
||||
|
||||
# Different port: send origin as referrer
|
||||
('https://example3.com:444/page.html', 'https://example3.com/not-page.html', b'https://example3.com:444/'),
|
||||
('http://example3.com:81/page.html', 'http://example3.com/not-page.html', b'http://example3.com:81/'),
|
||||
('https://example3.com:444/page.html', 'https://example3.com/not-page.html', b'https://example3.com:444/'),
|
||||
('http://example3.com:81/page.html', 'http://example3.com/not-page.html', b'http://example3.com:81/'),
|
||||
|
||||
# downgrade
|
||||
('https://example4.com/page.html', 'http://example4.com/not-page.html', None),
|
||||
('https://example4.com/page.html', 'http://not.example4.com/', None),
|
||||
('https://example4.com/page.html', 'http://example4.com/not-page.html', None),
|
||||
('https://example4.com/page.html', 'http://not.example4.com/', None),
|
||||
|
||||
# non-TLS to non-TLS
|
||||
('ftp://example4.com/urls.zip', 'http://example4.com/not-page.html', b'ftp://example4.com/'),
|
||||
('ftp://example4.com/urls.zip', 'http://example4.com/not-page.html', b'ftp://example4.com/'),
|
||||
|
||||
# upgrade
|
||||
('http://example4.com/page.html', 'https://example4.com/not-page.html', b'http://example4.com/'),
|
||||
('http://example4.com/page.html', 'https://not.example4.com/', b'http://example4.com/'),
|
||||
('http://example4.com/page.html', 'https://example4.com/not-page.html', b'http://example4.com/'),
|
||||
('http://example4.com/page.html', 'https://not.example4.com/', b'http://example4.com/'),
|
||||
|
||||
# Different protocols: send origin as referrer
|
||||
('ftps://example4.com/urls.zip', 'https://example4.com/not-page.html', b'ftps://example4.com/'),
|
||||
('ftps://example4.com/urls.zip', 'https://example4.com/not-page.html', b'ftps://example4.com/'),
|
||||
('ftps://example4.com/urls.zip', 'https://example4.com/not-page.html', b'ftps://example4.com/'),
|
||||
('ftps://example4.com/urls.zip', 'https://example4.com/not-page.html', b'ftps://example4.com/'),
|
||||
|
||||
# test for user/password stripping
|
||||
('https://user:password@example5.com/page.html', 'https://example5.com/not-page.html', b'https://example5.com/page.html'),
|
||||
('https://user:password@example5.com/page.html', 'https://example5.com/not-page.html', b'https://example5.com/page.html'),
|
||||
|
||||
# TLS to non-TLS downgrade: send nothing
|
||||
('https://user:password@example5.com/page.html', 'http://example5.com/not-page.html', None),
|
||||
('https://user:password@example5.com/page.html', 'http://example5.com/not-page.html', None),
|
||||
]
|
||||
|
||||
|
||||
class MixinUnsafeUrl:
|
||||
scenarii = [
|
||||
# TLS to TLS: send referrer
|
||||
('https://example.com/sekrit.html', 'http://not.example.com/', b'https://example.com/sekrit.html'),
|
||||
('https://example1.com/page.html', 'https://not.example1.com/', b'https://example1.com/page.html'),
|
||||
('https://example1.com/page.html', 'https://scrapy.org/', b'https://example1.com/page.html'),
|
||||
('https://example1.com:443/page.html', 'https://scrapy.org/', b'https://example1.com/page.html'),
|
||||
('https://example1.com:444/page.html', 'https://scrapy.org/', b'https://example1.com:444/page.html'),
|
||||
('ftps://example1.com/urls.zip', 'https://scrapy.org/', b'ftps://example1.com/urls.zip'),
|
||||
('https://example.com/sekrit.html', 'http://not.example.com/', b'https://example.com/sekrit.html'),
|
||||
('https://example1.com/page.html', 'https://not.example1.com/', b'https://example1.com/page.html'),
|
||||
('https://example1.com/page.html', 'https://scrapy.org/', b'https://example1.com/page.html'),
|
||||
('https://example1.com:443/page.html', 'https://scrapy.org/', b'https://example1.com/page.html'),
|
||||
('https://example1.com:444/page.html', 'https://scrapy.org/', b'https://example1.com:444/page.html'),
|
||||
('ftps://example1.com/urls.zip', 'https://scrapy.org/', b'ftps://example1.com/urls.zip'),
|
||||
|
||||
# TLS to non-TLS: send referrer (yes, it's unsafe)
|
||||
('https://example2.com/page.html', 'http://not.example2.com/', b'https://example2.com/page.html'),
|
||||
('https://example2.com/page.html', 'http://scrapy.org/', b'https://example2.com/page.html'),
|
||||
('ftps://example2.com/urls.zip', 'http://scrapy.org/', b'ftps://example2.com/urls.zip'),
|
||||
('https://example2.com/page.html', 'http://not.example2.com/', b'https://example2.com/page.html'),
|
||||
('https://example2.com/page.html', 'http://scrapy.org/', b'https://example2.com/page.html'),
|
||||
('ftps://example2.com/urls.zip', 'http://scrapy.org/', b'ftps://example2.com/urls.zip'),
|
||||
|
||||
# non-TLS to TLS or non-TLS: send referrer (yes, it's unsafe)
|
||||
('http://example3.com/page.html', 'https://not.example3.com/', b'http://example3.com/page.html'),
|
||||
('http://example3.com/page.html', 'https://scrapy.org/', b'http://example3.com/page.html'),
|
||||
('http://example3.com:8080/page.html', 'https://scrapy.org/', b'http://example3.com:8080/page.html'),
|
||||
('http://example3.com:80/page.html', 'http://not.example3.com/', b'http://example3.com/page.html'),
|
||||
('http://example3.com/page.html', 'http://scrapy.org/', b'http://example3.com/page.html'),
|
||||
('http://example3.com:443/page.html', 'http://scrapy.org/', b'http://example3.com:443/page.html'),
|
||||
('ftp://example3.com/urls.zip', 'http://scrapy.org/', b'ftp://example3.com/urls.zip'),
|
||||
('ftp://example3.com/urls.zip', 'https://scrapy.org/', b'ftp://example3.com/urls.zip'),
|
||||
('http://example3.com/page.html', 'https://not.example3.com/', b'http://example3.com/page.html'),
|
||||
('http://example3.com/page.html', 'https://scrapy.org/', b'http://example3.com/page.html'),
|
||||
('http://example3.com:8080/page.html', 'https://scrapy.org/', b'http://example3.com:8080/page.html'),
|
||||
('http://example3.com:80/page.html', 'http://not.example3.com/', b'http://example3.com/page.html'),
|
||||
('http://example3.com/page.html', 'http://scrapy.org/', b'http://example3.com/page.html'),
|
||||
('http://example3.com:443/page.html', 'http://scrapy.org/', b'http://example3.com:443/page.html'),
|
||||
('ftp://example3.com/urls.zip', 'http://scrapy.org/', b'ftp://example3.com/urls.zip'),
|
||||
('ftp://example3.com/urls.zip', 'https://scrapy.org/', b'ftp://example3.com/urls.zip'),
|
||||
|
||||
# test for user/password stripping
|
||||
('http://user:password@example4.com/page.html', 'https://not.example4.com/', b'http://example4.com/page.html'),
|
||||
('https://user:password@example4.com/page.html', 'http://scrapy.org/', b'https://example4.com/page.html'),
|
||||
('http://user:password@example4.com/page.html', 'https://not.example4.com/', b'http://example4.com/page.html'),
|
||||
('https://user:password@example4.com/page.html', 'http://scrapy.org/', b'https://example4.com/page.html'),
|
||||
]
|
||||
|
||||
|
||||
@ -339,12 +339,12 @@ class CustomPythonOrgPolicy(ReferrerPolicy):
|
||||
class TestSettingsCustomPolicy(TestRefererMiddleware):
|
||||
settings = {'REFERRER_POLICY': 'tests.test_spidermiddleware_referer.CustomPythonOrgPolicy'}
|
||||
scenarii = [
|
||||
('https://example.com/', 'https://scrapy.org/', b'https://python.org/'),
|
||||
('http://example.com/', 'http://scrapy.org/', b'http://python.org/'),
|
||||
('http://example.com/', 'https://scrapy.org/', b'https://python.org/'),
|
||||
('https://example.com/', 'http://scrapy.org/', b'http://python.org/'),
|
||||
('file:///home/path/to/somefile.html', 'https://scrapy.org/', b'https://python.org/'),
|
||||
('file:///home/path/to/somefile.html', 'http://scrapy.org/', b'http://python.org/'),
|
||||
('https://example.com/', 'https://scrapy.org/', b'https://python.org/'),
|
||||
('http://example.com/', 'http://scrapy.org/', b'http://python.org/'),
|
||||
('http://example.com/', 'https://scrapy.org/', b'https://python.org/'),
|
||||
('https://example.com/', 'http://scrapy.org/', b'http://python.org/'),
|
||||
('file:///home/path/to/somefile.html', 'https://scrapy.org/', b'https://python.org/'),
|
||||
('file:///home/path/to/somefile.html', 'http://scrapy.org/', b'http://python.org/'),
|
||||
|
||||
]
|
||||
|
||||
@ -541,7 +541,8 @@ class TestReferrerOnRedirect(TestRefererMiddleware):
|
||||
|
||||
settings = {'REFERRER_POLICY': 'scrapy.spidermiddlewares.referer.UnsafeUrlPolicy'}
|
||||
scenarii = [
|
||||
( 'http://scrapytest.org/1', # parent
|
||||
(
|
||||
'http://scrapytest.org/1', # parent
|
||||
'http://scrapytest.org/2', # target
|
||||
(
|
||||
# redirections: code, URL
|
||||
@ -551,7 +552,8 @@ class TestReferrerOnRedirect(TestRefererMiddleware):
|
||||
b'http://scrapytest.org/1', # expected initial referer
|
||||
b'http://scrapytest.org/1', # expected referer for the redirection request
|
||||
),
|
||||
( 'https://scrapytest.org/1',
|
||||
(
|
||||
'https://scrapytest.org/1',
|
||||
'https://scrapytest.org/2',
|
||||
(
|
||||
# redirecting to non-secure URL
|
||||
@ -560,7 +562,8 @@ class TestReferrerOnRedirect(TestRefererMiddleware):
|
||||
b'https://scrapytest.org/1',
|
||||
b'https://scrapytest.org/1',
|
||||
),
|
||||
( 'https://scrapytest.org/1',
|
||||
(
|
||||
'https://scrapytest.org/1',
|
||||
'https://scrapytest.com/2',
|
||||
(
|
||||
# redirecting to non-secure URL: different origin
|
||||
@ -602,7 +605,8 @@ class TestReferrerOnRedirectNoReferrer(TestReferrerOnRedirect):
|
||||
"""
|
||||
settings = {'REFERRER_POLICY': 'no-referrer'}
|
||||
scenarii = [
|
||||
( 'http://scrapytest.org/1', # parent
|
||||
(
|
||||
'http://scrapytest.org/1', # parent
|
||||
'http://scrapytest.org/2', # target
|
||||
(
|
||||
# redirections: code, URL
|
||||
@ -612,7 +616,8 @@ class TestReferrerOnRedirectNoReferrer(TestReferrerOnRedirect):
|
||||
None, # expected initial "Referer"
|
||||
None, # expected "Referer" for the redirection request
|
||||
),
|
||||
( 'https://scrapytest.org/1',
|
||||
(
|
||||
'https://scrapytest.org/1',
|
||||
'https://scrapytest.org/2',
|
||||
(
|
||||
(301, 'http://scrapytest.org/3'),
|
||||
@ -620,7 +625,8 @@ class TestReferrerOnRedirectNoReferrer(TestReferrerOnRedirect):
|
||||
None,
|
||||
None,
|
||||
),
|
||||
( 'https://scrapytest.org/1',
|
||||
(
|
||||
'https://scrapytest.org/1',
|
||||
'https://example.com/2', # different origin
|
||||
(
|
||||
(301, 'http://scrapytest.com/3'),
|
||||
@ -641,7 +647,8 @@ class TestReferrerOnRedirectSameOrigin(TestReferrerOnRedirect):
|
||||
"""
|
||||
settings = {'REFERRER_POLICY': 'same-origin'}
|
||||
scenarii = [
|
||||
( 'http://scrapytest.org/101', # origin
|
||||
(
|
||||
'http://scrapytest.org/101', # origin
|
||||
'http://scrapytest.org/102', # target
|
||||
(
|
||||
# redirections: code, URL
|
||||
@ -651,7 +658,8 @@ class TestReferrerOnRedirectSameOrigin(TestReferrerOnRedirect):
|
||||
b'http://scrapytest.org/101', # expected initial "Referer"
|
||||
b'http://scrapytest.org/101', # expected referer for the redirection request
|
||||
),
|
||||
( 'https://scrapytest.org/201',
|
||||
(
|
||||
'https://scrapytest.org/201',
|
||||
'https://scrapytest.org/202',
|
||||
(
|
||||
# redirecting from secure to non-secure URL == different origin
|
||||
@ -660,7 +668,8 @@ class TestReferrerOnRedirectSameOrigin(TestReferrerOnRedirect):
|
||||
b'https://scrapytest.org/201',
|
||||
None,
|
||||
),
|
||||
( 'https://scrapytest.org/301',
|
||||
(
|
||||
'https://scrapytest.org/301',
|
||||
'https://scrapytest.org/302',
|
||||
(
|
||||
# different domain == different origin
|
||||
@ -683,7 +692,8 @@ class TestReferrerOnRedirectStrictOrigin(TestReferrerOnRedirect):
|
||||
"""
|
||||
settings = {'REFERRER_POLICY': POLICY_STRICT_ORIGIN}
|
||||
scenarii = [
|
||||
( 'http://scrapytest.org/101',
|
||||
(
|
||||
'http://scrapytest.org/101',
|
||||
'http://scrapytest.org/102',
|
||||
(
|
||||
(301, 'http://scrapytest.org/103'),
|
||||
@ -692,7 +702,8 @@ class TestReferrerOnRedirectStrictOrigin(TestReferrerOnRedirect):
|
||||
b'http://scrapytest.org/', # send origin
|
||||
b'http://scrapytest.org/', # redirects to same origin: send origin
|
||||
),
|
||||
( 'https://scrapytest.org/201',
|
||||
(
|
||||
'https://scrapytest.org/201',
|
||||
'https://scrapytest.org/202',
|
||||
(
|
||||
# redirecting to non-secure URL: no referrer
|
||||
@ -701,7 +712,8 @@ class TestReferrerOnRedirectStrictOrigin(TestReferrerOnRedirect):
|
||||
b'https://scrapytest.org/',
|
||||
None,
|
||||
),
|
||||
( 'https://scrapytest.org/301',
|
||||
(
|
||||
'https://scrapytest.org/301',
|
||||
'https://scrapytest.org/302',
|
||||
(
|
||||
# redirecting to non-secure URL (different domain): no referrer
|
||||
@ -710,7 +722,8 @@ class TestReferrerOnRedirectStrictOrigin(TestReferrerOnRedirect):
|
||||
b'https://scrapytest.org/',
|
||||
None,
|
||||
),
|
||||
( 'http://scrapy.org/401',
|
||||
(
|
||||
'http://scrapy.org/401',
|
||||
'http://example.com/402',
|
||||
(
|
||||
(301, 'http://scrapytest.org/403'),
|
||||
@ -718,7 +731,8 @@ class TestReferrerOnRedirectStrictOrigin(TestReferrerOnRedirect):
|
||||
b'http://scrapy.org/',
|
||||
b'http://scrapy.org/',
|
||||
),
|
||||
( 'https://scrapy.org/501',
|
||||
(
|
||||
'https://scrapy.org/501',
|
||||
'https://example.com/502',
|
||||
(
|
||||
# HTTPS all along, so origin referrer is kept as-is
|
||||
@ -728,7 +742,8 @@ class TestReferrerOnRedirectStrictOrigin(TestReferrerOnRedirect):
|
||||
b'https://scrapy.org/',
|
||||
b'https://scrapy.org/',
|
||||
),
|
||||
( 'https://scrapytest.org/601',
|
||||
(
|
||||
'https://scrapytest.org/601',
|
||||
'http://scrapytest.org/602', # TLS to non-TLS: no referrer
|
||||
(
|
||||
(301, 'https://scrapytest.org/603'), # TLS URL again: (still) no referrer
|
||||
@ -750,7 +765,8 @@ class TestReferrerOnRedirectOriginWhenCrossOrigin(TestReferrerOnRedirect):
|
||||
"""
|
||||
settings = {'REFERRER_POLICY': POLICY_ORIGIN_WHEN_CROSS_ORIGIN}
|
||||
scenarii = [
|
||||
( 'http://scrapytest.org/101', # origin
|
||||
(
|
||||
'http://scrapytest.org/101', # origin
|
||||
'http://scrapytest.org/102', # target + redirection
|
||||
(
|
||||
# redirections: code, URL
|
||||
@ -760,7 +776,8 @@ class TestReferrerOnRedirectOriginWhenCrossOrigin(TestReferrerOnRedirect):
|
||||
b'http://scrapytest.org/101', # expected initial referer
|
||||
b'http://scrapytest.org/101', # expected referer for the redirection request
|
||||
),
|
||||
( 'https://scrapytest.org/201',
|
||||
(
|
||||
'https://scrapytest.org/201',
|
||||
'https://scrapytest.org/202',
|
||||
(
|
||||
# redirecting to non-secure URL: send origin
|
||||
@ -769,7 +786,8 @@ class TestReferrerOnRedirectOriginWhenCrossOrigin(TestReferrerOnRedirect):
|
||||
b'https://scrapytest.org/201',
|
||||
b'https://scrapytest.org/',
|
||||
),
|
||||
( 'https://scrapytest.org/301',
|
||||
(
|
||||
'https://scrapytest.org/301',
|
||||
'https://scrapytest.org/302',
|
||||
(
|
||||
# redirecting to non-secure URL (different domain): send origin
|
||||
@ -778,7 +796,8 @@ class TestReferrerOnRedirectOriginWhenCrossOrigin(TestReferrerOnRedirect):
|
||||
b'https://scrapytest.org/301',
|
||||
b'https://scrapytest.org/',
|
||||
),
|
||||
( 'http://scrapy.org/401',
|
||||
(
|
||||
'http://scrapy.org/401',
|
||||
'http://example.com/402',
|
||||
(
|
||||
(301, 'http://scrapytest.org/403'),
|
||||
@ -786,7 +805,8 @@ class TestReferrerOnRedirectOriginWhenCrossOrigin(TestReferrerOnRedirect):
|
||||
b'http://scrapy.org/',
|
||||
b'http://scrapy.org/',
|
||||
),
|
||||
( 'https://scrapy.org/501',
|
||||
(
|
||||
'https://scrapy.org/501',
|
||||
'https://example.com/502',
|
||||
(
|
||||
# all different domains: send origin
|
||||
@ -796,7 +816,8 @@ class TestReferrerOnRedirectOriginWhenCrossOrigin(TestReferrerOnRedirect):
|
||||
b'https://scrapy.org/',
|
||||
b'https://scrapy.org/',
|
||||
),
|
||||
( 'https://scrapytest.org/301',
|
||||
(
|
||||
'https://scrapytest.org/301',
|
||||
'http://scrapytest.org/302', # TLS to non-TLS: send origin
|
||||
(
|
||||
(301, 'https://scrapytest.org/303'), # TLS URL again: send origin (also)
|
||||
@ -820,7 +841,8 @@ class TestReferrerOnRedirectStrictOriginWhenCrossOrigin(TestReferrerOnRedirect):
|
||||
"""
|
||||
settings = {'REFERRER_POLICY': POLICY_STRICT_ORIGIN_WHEN_CROSS_ORIGIN}
|
||||
scenarii = [
|
||||
( 'http://scrapytest.org/101', # origin
|
||||
(
|
||||
'http://scrapytest.org/101', # origin
|
||||
'http://scrapytest.org/102', # target + redirection
|
||||
(
|
||||
# redirections: code, URL
|
||||
@ -830,7 +852,8 @@ class TestReferrerOnRedirectStrictOriginWhenCrossOrigin(TestReferrerOnRedirect):
|
||||
b'http://scrapytest.org/101', # expected initial referer
|
||||
b'http://scrapytest.org/101', # expected referer for the redirection request
|
||||
),
|
||||
( 'https://scrapytest.org/201',
|
||||
(
|
||||
'https://scrapytest.org/201',
|
||||
'https://scrapytest.org/202',
|
||||
(
|
||||
# redirecting to non-secure URL: do not send the "Referer" header
|
||||
@ -839,7 +862,8 @@ class TestReferrerOnRedirectStrictOriginWhenCrossOrigin(TestReferrerOnRedirect):
|
||||
b'https://scrapytest.org/201',
|
||||
None,
|
||||
),
|
||||
( 'https://scrapytest.org/301',
|
||||
(
|
||||
'https://scrapytest.org/301',
|
||||
'https://scrapytest.org/302',
|
||||
(
|
||||
# redirecting to non-secure URL (different domain): send origin
|
||||
@ -848,7 +872,8 @@ class TestReferrerOnRedirectStrictOriginWhenCrossOrigin(TestReferrerOnRedirect):
|
||||
b'https://scrapytest.org/301',
|
||||
None,
|
||||
),
|
||||
( 'http://scrapy.org/401',
|
||||
(
|
||||
'http://scrapy.org/401',
|
||||
'http://example.com/402',
|
||||
(
|
||||
(301, 'http://scrapytest.org/403'),
|
||||
@ -856,7 +881,8 @@ class TestReferrerOnRedirectStrictOriginWhenCrossOrigin(TestReferrerOnRedirect):
|
||||
b'http://scrapy.org/',
|
||||
b'http://scrapy.org/',
|
||||
),
|
||||
( 'https://scrapy.org/501',
|
||||
(
|
||||
'https://scrapy.org/501',
|
||||
'https://example.com/502',
|
||||
(
|
||||
# all different domains: send origin
|
||||
@ -866,7 +892,8 @@ class TestReferrerOnRedirectStrictOriginWhenCrossOrigin(TestReferrerOnRedirect):
|
||||
b'https://scrapy.org/',
|
||||
b'https://scrapy.org/',
|
||||
),
|
||||
( 'https://scrapytest.org/601',
|
||||
(
|
||||
'https://scrapytest.org/601',
|
||||
'http://scrapytest.org/602', # TLS to non-TLS: do not send "Referer"
|
||||
(
|
||||
(301, 'https://scrapytest.org/603'), # TLS URL again: (still) send nothing
|
||||
|
@ -250,10 +250,10 @@ class UtilsCsvTestCase(unittest.TestCase):
|
||||
|
||||
result = [row for row in csv]
|
||||
self.assertEqual(result,
|
||||
[{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
|
||||
[{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
|
||||
{u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
|
||||
{u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
|
||||
{u'id': u'4', u'name': u'empty', u'value': u''}])
|
||||
{u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
|
||||
{u'id': u'4', u'name': u'empty', u'value': u''}])
|
||||
|
||||
# explicit type check cuz' we no like stinkin' autocasting! yarrr
|
||||
for result_row in result:
|
||||
@ -266,10 +266,10 @@ class UtilsCsvTestCase(unittest.TestCase):
|
||||
csv = csviter(response, delimiter='\t')
|
||||
|
||||
self.assertEqual([row for row in csv],
|
||||
[{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
|
||||
[{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
|
||||
{u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
|
||||
{u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
|
||||
{u'id': u'4', u'name': u'empty', u'value': u''}])
|
||||
{u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
|
||||
{u'id': u'4', u'name': u'empty', u'value': u''}])
|
||||
|
||||
def test_csviter_quotechar(self):
|
||||
body1 = get_testdata('feeds', 'feed-sample6.csv')
|
||||
@ -279,19 +279,19 @@ class UtilsCsvTestCase(unittest.TestCase):
|
||||
csv1 = csviter(response1, quotechar="'")
|
||||
|
||||
self.assertEqual([row for row in csv1],
|
||||
[{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
|
||||
[{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
|
||||
{u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
|
||||
{u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
|
||||
{u'id': u'4', u'name': u'empty', u'value': u''}])
|
||||
{u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
|
||||
{u'id': u'4', u'name': u'empty', u'value': u''}])
|
||||
|
||||
response2 = TextResponse(url="http://example.com/", body=body2)
|
||||
csv2 = csviter(response2, delimiter="|", quotechar="'")
|
||||
|
||||
self.assertEqual([row for row in csv2],
|
||||
[{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
|
||||
[{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
|
||||
{u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
|
||||
{u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
|
||||
{u'id': u'4', u'name': u'empty', u'value': u''}])
|
||||
{u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
|
||||
{u'id': u'4', u'name': u'empty', u'value': u''}])
|
||||
|
||||
def test_csviter_wrong_quotechar(self):
|
||||
body = get_testdata('feeds', 'feed-sample6.csv')
|
||||
@ -299,10 +299,10 @@ class UtilsCsvTestCase(unittest.TestCase):
|
||||
csv = csviter(response)
|
||||
|
||||
self.assertEqual([row for row in csv],
|
||||
[{u"'id'": u"1", u"'name'": u"'alpha'", u"'value'": u"'foobar'"},
|
||||
{u"'id'": u"2", u"'name'": u"'unicode'", u"'value'": u"'\xfan\xedc\xf3d\xe9\u203d'"},
|
||||
{u"'id'": u"'3'", u"'name'": u"'multi'", u"'value'": u"'foo"},
|
||||
{u"'id'": u"4", u"'name'": u"'empty'", u"'value'": u""}])
|
||||
[{u"'id'": u"1", u"'name'": u"'alpha'", u"'value'": u"'foobar'"},
|
||||
{u"'id'": u"2", u"'name'": u"'unicode'", u"'value'": u"'\xfan\xedc\xf3d\xe9\u203d'"},
|
||||
{u"'id'": u"'3'", u"'name'": u"'multi'", u"'value'": u"'foo"},
|
||||
{u"'id'": u"4", u"'name'": u"'empty'", u"'value'": u""}])
|
||||
|
||||
def test_csviter_delimiter_binary_response_assume_utf8_encoding(self):
|
||||
body = get_testdata('feeds', 'feed-sample3.csv').replace(b',', b'\t')
|
||||
@ -310,10 +310,10 @@ class UtilsCsvTestCase(unittest.TestCase):
|
||||
csv = csviter(response, delimiter='\t')
|
||||
|
||||
self.assertEqual([row for row in csv],
|
||||
[{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
|
||||
[{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
|
||||
{u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
|
||||
{u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
|
||||
{u'id': u'4', u'name': u'empty', u'value': u''}])
|
||||
{u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
|
||||
{u'id': u'4', u'name': u'empty', u'value': u''}])
|
||||
|
||||
def test_csviter_headers(self):
|
||||
sample = get_testdata('feeds', 'feed-sample3.csv').splitlines()
|
||||
@ -323,10 +323,10 @@ class UtilsCsvTestCase(unittest.TestCase):
|
||||
csv = csviter(response, headers=[h.decode('utf-8') for h in headers])
|
||||
|
||||
self.assertEqual([row for row in csv],
|
||||
[{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
|
||||
[{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
|
||||
{u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
|
||||
{u'id': u'3', u'name': u'multi', u'value': u'foo\nbar'},
|
||||
{u'id': u'4', u'name': u'empty', u'value': u''}])
|
||||
{u'id': u'3', u'name': u'multi', u'value': u'foo\nbar'},
|
||||
{u'id': u'4', u'name': u'empty', u'value': u''}])
|
||||
|
||||
def test_csviter_falserow(self):
|
||||
body = get_testdata('feeds', 'feed-sample3.csv')
|
||||
@ -336,10 +336,10 @@ class UtilsCsvTestCase(unittest.TestCase):
|
||||
csv = csviter(response)
|
||||
|
||||
self.assertEqual([row for row in csv],
|
||||
[{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
|
||||
[{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
|
||||
{u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
|
||||
{u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
|
||||
{u'id': u'4', u'name': u'empty', u'value': u''}])
|
||||
{u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
|
||||
{u'id': u'4', u'name': u'empty', u'value': u''}])
|
||||
|
||||
def test_csviter_exception(self):
|
||||
body = get_testdata('feeds', 'feed-sample3.csv')
|
||||
|
@ -203,29 +203,29 @@ def create_skipped_scheme_t(args):
|
||||
|
||||
|
||||
for k, args in enumerate([
|
||||
('/index', 'file://'),
|
||||
('/index.html', 'file://'),
|
||||
('./index.html', 'file://'),
|
||||
('../index.html', 'file://'),
|
||||
('../../index.html', 'file://'),
|
||||
('./data/index.html', 'file://'),
|
||||
('.hidden/data/index.html', 'file://'),
|
||||
('/home/user/www/index.html', 'file://'),
|
||||
('//home/user/www/index.html', 'file://'),
|
||||
('file:///home/user/www/index.html', 'file://'),
|
||||
('/index', 'file://'),
|
||||
('/index.html', 'file://'),
|
||||
('./index.html', 'file://'),
|
||||
('../index.html', 'file://'),
|
||||
('../../index.html', 'file://'),
|
||||
('./data/index.html', 'file://'),
|
||||
('.hidden/data/index.html', 'file://'),
|
||||
('/home/user/www/index.html', 'file://'),
|
||||
('//home/user/www/index.html', 'file://'),
|
||||
('file:///home/user/www/index.html', 'file://'),
|
||||
|
||||
('index.html', 'http://'),
|
||||
('example.com', 'http://'),
|
||||
('www.example.com', 'http://'),
|
||||
('www.example.com/index.html', 'http://'),
|
||||
('http://example.com', 'http://'),
|
||||
('http://example.com/index.html', 'http://'),
|
||||
('localhost', 'http://'),
|
||||
('localhost/index.html', 'http://'),
|
||||
('index.html', 'http://'),
|
||||
('example.com', 'http://'),
|
||||
('www.example.com', 'http://'),
|
||||
('www.example.com/index.html', 'http://'),
|
||||
('http://example.com', 'http://'),
|
||||
('http://example.com/index.html', 'http://'),
|
||||
('localhost', 'http://'),
|
||||
('localhost/index.html', 'http://'),
|
||||
|
||||
# some corner cases (default to http://)
|
||||
('/', 'http://'),
|
||||
('.../test', 'http://'),
|
||||
('/', 'http://'),
|
||||
('.../test', 'http://'),
|
||||
|
||||
], start=1):
|
||||
t_method = create_guess_scheme_t(args)
|
||||
|
@ -53,28 +53,28 @@ class ParseUrlTestCase(unittest.TestCase):
|
||||
def testParse(self):
|
||||
lip = '127.0.0.1'
|
||||
tests = (
|
||||
("http://127.0.0.1?c=v&c2=v2#fragment", ('http', lip, lip, 80, '/?c=v&c2=v2')),
|
||||
("http://127.0.0.1/?c=v&c2=v2#fragment", ('http', lip, lip, 80, '/?c=v&c2=v2')),
|
||||
("http://127.0.0.1/foo?c=v&c2=v2#frag", ('http', lip, lip, 80, '/foo?c=v&c2=v2')),
|
||||
("http://127.0.0.1?c=v&c2=v2#fragment", ('http', lip, lip, 80, '/?c=v&c2=v2')),
|
||||
("http://127.0.0.1/?c=v&c2=v2#fragment", ('http', lip, lip, 80, '/?c=v&c2=v2')),
|
||||
("http://127.0.0.1/foo?c=v&c2=v2#frag", ('http', lip, lip, 80, '/foo?c=v&c2=v2')),
|
||||
("http://127.0.0.1:100?c=v&c2=v2#fragment", ('http', lip + ':100', lip, 100, '/?c=v&c2=v2')),
|
||||
("http://127.0.0.1:100/?c=v&c2=v2#frag", ('http', lip + ':100', lip, 100, '/?c=v&c2=v2')),
|
||||
("http://127.0.0.1:100/?c=v&c2=v2#frag", ('http', lip + ':100', lip, 100, '/?c=v&c2=v2')),
|
||||
("http://127.0.0.1:100/foo?c=v&c2=v2#frag", ('http', lip + ':100', lip, 100, '/foo?c=v&c2=v2')),
|
||||
|
||||
("http://127.0.0.1", ('http', lip, lip, 80, '/')),
|
||||
("http://127.0.0.1/", ('http', lip, lip, 80, '/')),
|
||||
("http://127.0.0.1/foo", ('http', lip, lip, 80, '/foo')),
|
||||
("http://127.0.0.1?param=value", ('http', lip, lip, 80, '/?param=value')),
|
||||
("http://127.0.0.1", ('http', lip, lip, 80, '/')),
|
||||
("http://127.0.0.1/", ('http', lip, lip, 80, '/')),
|
||||
("http://127.0.0.1/foo", ('http', lip, lip, 80, '/foo')),
|
||||
("http://127.0.0.1?param=value", ('http', lip, lip, 80, '/?param=value')),
|
||||
("http://127.0.0.1/?param=value", ('http', lip, lip, 80, '/?param=value')),
|
||||
("http://127.0.0.1:12345/foo", ('http', lip + ':12345', lip, 12345, '/foo')),
|
||||
("http://spam:12345/foo", ('http', 'spam:12345', 'spam', 12345, '/foo')),
|
||||
("http://spam.test.org/foo", ('http', 'spam.test.org', 'spam.test.org', 80, '/foo')),
|
||||
("http://127.0.0.1:12345/foo", ('http', lip + ':12345', lip, 12345, '/foo')),
|
||||
("http://spam:12345/foo", ('http', 'spam:12345', 'spam', 12345, '/foo')),
|
||||
("http://spam.test.org/foo", ('http', 'spam.test.org', 'spam.test.org', 80, '/foo')),
|
||||
|
||||
("https://127.0.0.1/foo", ('https', lip, lip, 443, '/foo')),
|
||||
("https://127.0.0.1/foo", ('https', lip, lip, 443, '/foo')),
|
||||
("https://127.0.0.1/?param=value", ('https', lip, lip, 443, '/?param=value')),
|
||||
("https://127.0.0.1:12345/", ('https', lip + ':12345', lip, 12345, '/')),
|
||||
("https://127.0.0.1:12345/", ('https', lip + ':12345', lip, 12345, '/')),
|
||||
|
||||
("http://scrapytest.org/foo ", ('http', 'scrapytest.org', 'scrapytest.org', 80, '/foo')),
|
||||
("http://egg:7890 ", ('http', 'egg:7890', 'egg', 7890, '/')),
|
||||
("http://scrapytest.org/foo ", ('http', 'scrapytest.org', 'scrapytest.org', 80, '/foo')),
|
||||
("http://egg:7890 ", ('http', 'egg:7890', 'egg', 7890, '/')),
|
||||
)
|
||||
|
||||
for url, test in tests:
|
||||
|
Loading…
x
Reference in New Issue
Block a user