mirror of
https://github.com/scrapy/scrapy.git
synced 2025-03-13 16:24:52 +00:00
Fix pycodestyle E2XX (whitespace) (#4468)
This commit is contained in:
parent
39b01b6892
commit
94d7ad76cb
30
pytest.ini
30
pytest.ini
@ -54,7 +54,7 @@ flake8-ignore =
|
|||||||
scrapy/core/downloader/__init__.py E501
|
scrapy/core/downloader/__init__.py E501
|
||||||
scrapy/core/downloader/contextfactory.py E501 E128 E126
|
scrapy/core/downloader/contextfactory.py E501 E128 E126
|
||||||
scrapy/core/downloader/middleware.py E501
|
scrapy/core/downloader/middleware.py E501
|
||||||
scrapy/core/downloader/tls.py E501 E241
|
scrapy/core/downloader/tls.py E501
|
||||||
scrapy/core/downloader/webclient.py E731 E501 E128 E126
|
scrapy/core/downloader/webclient.py E731 E501 E128 E126
|
||||||
scrapy/core/downloader/handlers/__init__.py E501
|
scrapy/core/downloader/handlers/__init__.py E501
|
||||||
scrapy/core/downloader/handlers/ftp.py E501 E128 E127
|
scrapy/core/downloader/handlers/ftp.py E501 E128 E127
|
||||||
@ -97,9 +97,9 @@ flake8-ignore =
|
|||||||
scrapy/loader/processors.py E501
|
scrapy/loader/processors.py E501
|
||||||
# scrapy/pipelines
|
# scrapy/pipelines
|
||||||
scrapy/pipelines/__init__.py E501
|
scrapy/pipelines/__init__.py E501
|
||||||
scrapy/pipelines/files.py E116 E501 E266
|
scrapy/pipelines/files.py E116 E501
|
||||||
scrapy/pipelines/images.py E265 E501
|
scrapy/pipelines/images.py E501
|
||||||
scrapy/pipelines/media.py E125 E501 E266
|
scrapy/pipelines/media.py E125 E501
|
||||||
# scrapy/selector
|
# scrapy/selector
|
||||||
scrapy/selector/__init__.py F403
|
scrapy/selector/__init__.py F403
|
||||||
scrapy/selector/unified.py E501 E111
|
scrapy/selector/unified.py E501 E111
|
||||||
@ -149,7 +149,7 @@ flake8-ignore =
|
|||||||
scrapy/__init__.py E402 E501
|
scrapy/__init__.py E402 E501
|
||||||
scrapy/cmdline.py E501
|
scrapy/cmdline.py E501
|
||||||
scrapy/crawler.py E501
|
scrapy/crawler.py E501
|
||||||
scrapy/dupefilters.py E501 E202
|
scrapy/dupefilters.py E501
|
||||||
scrapy/exceptions.py E501
|
scrapy/exceptions.py E501
|
||||||
scrapy/exporters.py E501
|
scrapy/exporters.py E501
|
||||||
scrapy/interfaces.py E501
|
scrapy/interfaces.py E501
|
||||||
@ -178,13 +178,13 @@ flake8-ignore =
|
|||||||
tests/test_command_shell.py E501 E128
|
tests/test_command_shell.py E501 E128
|
||||||
tests/test_commands.py E128 E501
|
tests/test_commands.py E128 E501
|
||||||
tests/test_contracts.py E501 E128
|
tests/test_contracts.py E501 E128
|
||||||
tests/test_crawl.py E501 E741 E265
|
tests/test_crawl.py E501 E741
|
||||||
tests/test_crawler.py F841 E501
|
tests/test_crawler.py F841 E501
|
||||||
tests/test_dependencies.py F841 E501
|
tests/test_dependencies.py F841 E501
|
||||||
tests/test_downloader_handlers.py E124 E127 E128 E265 E501 E126 E123
|
tests/test_downloader_handlers.py E124 E127 E128 E501 E126 E123
|
||||||
tests/test_downloadermiddleware.py E501
|
tests/test_downloadermiddleware.py E501
|
||||||
tests/test_downloadermiddleware_ajaxcrawlable.py E501
|
tests/test_downloadermiddleware_ajaxcrawlable.py E501
|
||||||
tests/test_downloadermiddleware_cookies.py E731 E741 E501 E128 E265 E126
|
tests/test_downloadermiddleware_cookies.py E731 E741 E501 E128 E126
|
||||||
tests/test_downloadermiddleware_decompression.py E127
|
tests/test_downloadermiddleware_decompression.py E127
|
||||||
tests/test_downloadermiddleware_defaultheaders.py E501
|
tests/test_downloadermiddleware_defaultheaders.py E501
|
||||||
tests/test_downloadermiddleware_downloadtimeout.py E501
|
tests/test_downloadermiddleware_downloadtimeout.py E501
|
||||||
@ -199,15 +199,15 @@ flake8-ignore =
|
|||||||
tests/test_engine.py E401 E501 E128
|
tests/test_engine.py E401 E501 E128
|
||||||
tests/test_exporters.py E501 E731 E128 E124
|
tests/test_exporters.py E501 E731 E128 E124
|
||||||
tests/test_extension_telnet.py F841
|
tests/test_extension_telnet.py F841
|
||||||
tests/test_feedexport.py E501 F841 E241
|
tests/test_feedexport.py E501 F841
|
||||||
tests/test_http_cookies.py E501
|
tests/test_http_cookies.py E501
|
||||||
tests/test_http_headers.py E501
|
tests/test_http_headers.py E501
|
||||||
tests/test_http_request.py E402 E501 E127 E128 E128 E126 E123
|
tests/test_http_request.py E402 E501 E127 E128 E128 E126 E123
|
||||||
tests/test_http_response.py E501 E128 E265
|
tests/test_http_response.py E501 E128
|
||||||
tests/test_item.py E128 F841
|
tests/test_item.py E128 F841
|
||||||
tests/test_link.py E501
|
tests/test_link.py E501
|
||||||
tests/test_linkextractors.py E501 E128 E124
|
tests/test_linkextractors.py E501 E128 E124
|
||||||
tests/test_loader.py E501 E731 E741 E128 E117 E241
|
tests/test_loader.py E501 E731 E741 E128 E117
|
||||||
tests/test_logformatter.py E128 E501 E122
|
tests/test_logformatter.py E128 E501 E122
|
||||||
tests/test_mail.py E128 E501
|
tests/test_mail.py E128 E501
|
||||||
tests/test_middleware.py E501 E128
|
tests/test_middleware.py E501 E128
|
||||||
@ -226,7 +226,7 @@ flake8-ignore =
|
|||||||
tests/test_spidermiddleware_httperror.py E128 E501 E127 E121
|
tests/test_spidermiddleware_httperror.py E128 E501 E127 E121
|
||||||
tests/test_spidermiddleware_offsite.py E501 E128 E111
|
tests/test_spidermiddleware_offsite.py E501 E128 E111
|
||||||
tests/test_spidermiddleware_output_chain.py E501
|
tests/test_spidermiddleware_output_chain.py E501
|
||||||
tests/test_spidermiddleware_referer.py E501 F841 E125 E201 E124 E501 E241 E121
|
tests/test_spidermiddleware_referer.py E501 F841 E125 E124 E501 E121
|
||||||
tests/test_squeues.py E501 E741
|
tests/test_squeues.py E501 E741
|
||||||
tests/test_utils_asyncio.py E501
|
tests/test_utils_asyncio.py E501
|
||||||
tests/test_utils_conf.py E501 E128
|
tests/test_utils_conf.py E501 E128
|
||||||
@ -235,7 +235,7 @@ flake8-ignore =
|
|||||||
tests/test_utils_defer.py E501 F841
|
tests/test_utils_defer.py E501 F841
|
||||||
tests/test_utils_deprecate.py F841 E501
|
tests/test_utils_deprecate.py F841 E501
|
||||||
tests/test_utils_http.py E501 E128 W504
|
tests/test_utils_http.py E501 E128 W504
|
||||||
tests/test_utils_iterators.py E501 E128 E129 E241
|
tests/test_utils_iterators.py E501 E128 E129
|
||||||
tests/test_utils_log.py E741
|
tests/test_utils_log.py E741
|
||||||
tests/test_utils_python.py E501 E731
|
tests/test_utils_python.py E501 E731
|
||||||
tests/test_utils_reqser.py E501 E128
|
tests/test_utils_reqser.py E501 E128
|
||||||
@ -243,8 +243,8 @@ flake8-ignore =
|
|||||||
tests/test_utils_response.py E501
|
tests/test_utils_response.py E501
|
||||||
tests/test_utils_signal.py E741 F841 E731
|
tests/test_utils_signal.py E741 F841 E731
|
||||||
tests/test_utils_sitemap.py E128 E501 E124
|
tests/test_utils_sitemap.py E128 E501 E124
|
||||||
tests/test_utils_url.py E501 E127 E125 E501 E241 E126 E123
|
tests/test_utils_url.py E501 E127 E125 E501 E126 E123
|
||||||
tests/test_webclient.py E501 E128 E122 E402 E241 E123 E126
|
tests/test_webclient.py E501 E128 E122 E402 E123 E126
|
||||||
tests/test_cmdline/__init__.py E501
|
tests/test_cmdline/__init__.py E501
|
||||||
tests/test_settings/__init__.py E501 E128
|
tests/test_settings/__init__.py E501 E128
|
||||||
tests/test_spiderloader/__init__.py E128 E501
|
tests/test_spiderloader/__init__.py E128 E501
|
||||||
|
@ -20,8 +20,8 @@ METHOD_TLSv12 = 'TLSv1.2'
|
|||||||
|
|
||||||
|
|
||||||
openssl_methods = {
|
openssl_methods = {
|
||||||
METHOD_TLS: SSL.SSLv23_METHOD, # protocol negotiation (recommended)
|
METHOD_TLS: SSL.SSLv23_METHOD, # protocol negotiation (recommended)
|
||||||
METHOD_SSLv3: SSL.SSLv3_METHOD, # SSL 3 (NOT recommended)
|
METHOD_SSLv3: SSL.SSLv3_METHOD, # SSL 3 (NOT recommended)
|
||||||
METHOD_TLSv10: SSL.TLSv1_METHOD, # TLS 1.0 only
|
METHOD_TLSv10: SSL.TLSv1_METHOD, # TLS 1.0 only
|
||||||
METHOD_TLSv11: getattr(SSL, 'TLSv1_1_METHOD', 5), # TLS 1.1 only
|
METHOD_TLSv11: getattr(SSL, 'TLSv1_1_METHOD', 5), # TLS 1.1 only
|
||||||
METHOD_TLSv12: getattr(SSL, 'TLSv1_2_METHOD', 6), # TLS 1.2 only
|
METHOD_TLSv12: getattr(SSL, 'TLSv1_2_METHOD', 6), # TLS 1.2 only
|
||||||
|
@ -61,7 +61,7 @@ class RFPDupeFilter(BaseDupeFilter):
|
|||||||
def log(self, request, spider):
|
def log(self, request, spider):
|
||||||
if self.debug:
|
if self.debug:
|
||||||
msg = "Filtered duplicate request: %(request)s (referer: %(referer)s)"
|
msg = "Filtered duplicate request: %(request)s (referer: %(referer)s)"
|
||||||
args = {'request': request, 'referer': referer_str(request) }
|
args = {'request': request, 'referer': referer_str(request)}
|
||||||
self.logger.debug(msg, args, extra={'spider': spider})
|
self.logger.debug(msg, args, extra={'spider': spider})
|
||||||
elif self.logdupes:
|
elif self.logdupes:
|
||||||
msg = ("Filtered duplicate request: %(request)s"
|
msg = ("Filtered duplicate request: %(request)s"
|
||||||
|
@ -500,7 +500,7 @@ class FilesPipeline(MediaPipeline):
|
|||||||
spider.crawler.stats.inc_value('file_count', spider=spider)
|
spider.crawler.stats.inc_value('file_count', spider=spider)
|
||||||
spider.crawler.stats.inc_value('file_status_count/%s' % status, spider=spider)
|
spider.crawler.stats.inc_value('file_status_count/%s' % status, spider=spider)
|
||||||
|
|
||||||
### Overridable Interface
|
# Overridable Interface
|
||||||
def get_media_requests(self, item, info):
|
def get_media_requests(self, item, info):
|
||||||
return [Request(x) for x in item.get(self.files_urls_field, [])]
|
return [Request(x) for x in item.get(self.files_urls_field, [])]
|
||||||
|
|
||||||
|
@ -14,7 +14,7 @@ from scrapy.utils.python import to_bytes
|
|||||||
from scrapy.http import Request
|
from scrapy.http import Request
|
||||||
from scrapy.settings import Settings
|
from scrapy.settings import Settings
|
||||||
from scrapy.exceptions import DropItem
|
from scrapy.exceptions import DropItem
|
||||||
#TODO: from scrapy.pipelines.media import MediaPipeline
|
# TODO: from scrapy.pipelines.media import MediaPipeline
|
||||||
from scrapy.pipelines.files import FileException, FilesPipeline
|
from scrapy.pipelines.files import FileException, FilesPipeline
|
||||||
|
|
||||||
|
|
||||||
|
@ -166,7 +166,7 @@ class MediaPipeline:
|
|||||||
for wad in info.waiting.pop(fp):
|
for wad in info.waiting.pop(fp):
|
||||||
defer_result(result).chainDeferred(wad)
|
defer_result(result).chainDeferred(wad)
|
||||||
|
|
||||||
### Overridable Interface
|
# Overridable Interface
|
||||||
def media_to_download(self, request, info):
|
def media_to_download(self, request, info):
|
||||||
"""Check request before starting download"""
|
"""Check request before starting download"""
|
||||||
pass
|
pass
|
||||||
|
@ -147,9 +147,9 @@ class CrawlTestCase(TestCase):
|
|||||||
settings = {"CONCURRENT_REQUESTS": 1}
|
settings = {"CONCURRENT_REQUESTS": 1}
|
||||||
crawler = CrawlerRunner(settings).create_crawler(BrokenStartRequestsSpider)
|
crawler = CrawlerRunner(settings).create_crawler(BrokenStartRequestsSpider)
|
||||||
yield crawler.crawl(mockserver=self.mockserver)
|
yield crawler.crawl(mockserver=self.mockserver)
|
||||||
#self.assertTrue(False, crawler.spider.seedsseen)
|
self.assertTrue(
|
||||||
#self.assertTrue(crawler.spider.seedsseen.index(None) < crawler.spider.seedsseen.index(99),
|
crawler.spider.seedsseen.index(None) < crawler.spider.seedsseen.index(99),
|
||||||
# crawler.spider.seedsseen)
|
crawler.spider.seedsseen)
|
||||||
|
|
||||||
@defer.inlineCallbacks
|
@defer.inlineCallbacks
|
||||||
def test_start_requests_dupes(self):
|
def test_start_requests_dupes(self):
|
||||||
|
@ -202,7 +202,7 @@ class CookiesMiddlewareTest(TestCase):
|
|||||||
assert self.mw.process_request(req4, self.spider) is None
|
assert self.mw.process_request(req4, self.spider) is None
|
||||||
self.assertCookieValEqual(req4.headers.get('Cookie'), b'C2=value2; galleta=dulce')
|
self.assertCookieValEqual(req4.headers.get('Cookie'), b'C2=value2; galleta=dulce')
|
||||||
|
|
||||||
#cookies from hosts with port
|
# cookies from hosts with port
|
||||||
req5_1 = Request('http://scrapytest.org:1104/')
|
req5_1 = Request('http://scrapytest.org:1104/')
|
||||||
assert self.mw.process_request(req5_1, self.spider) is None
|
assert self.mw.process_request(req5_1, self.spider) is None
|
||||||
|
|
||||||
@ -218,7 +218,7 @@ class CookiesMiddlewareTest(TestCase):
|
|||||||
assert self.mw.process_request(req5_3, self.spider) is None
|
assert self.mw.process_request(req5_3, self.spider) is None
|
||||||
self.assertEqual(req5_3.headers.get('Cookie'), b'C1=value1')
|
self.assertEqual(req5_3.headers.get('Cookie'), b'C1=value1')
|
||||||
|
|
||||||
#skip cookie retrieval for not http request
|
# skip cookie retrieval for not http request
|
||||||
req6 = Request('file:///scrapy/sometempfile')
|
req6 = Request('file:///scrapy/sometempfile')
|
||||||
assert self.mw.process_request(req6, self.spider) is None
|
assert self.mw.process_request(req6, self.spider) is None
|
||||||
self.assertEqual(req6.headers.get('Cookie'), None)
|
self.assertEqual(req6.headers.get('Cookie'), None)
|
||||||
|
@ -438,8 +438,8 @@ class TextResponseTest(BaseResponseTest):
|
|||||||
assert u'<span>value</span>' in r.text, repr(r.text)
|
assert u'<span>value</span>' in r.text, repr(r.text)
|
||||||
|
|
||||||
# FIXME: This test should pass once we stop using BeautifulSoup's UnicodeDammit in TextResponse
|
# FIXME: This test should pass once we stop using BeautifulSoup's UnicodeDammit in TextResponse
|
||||||
#r = self.response_class("http://www.example.com", body=b'PREFIX\xe3\xabSUFFIX')
|
# r = self.response_class("http://www.example.com", body=b'PREFIX\xe3\xabSUFFIX')
|
||||||
#assert u'\ufffd' in r.text, repr(r.text)
|
# assert u'\ufffd' in r.text, repr(r.text)
|
||||||
|
|
||||||
def test_selector(self):
|
def test_selector(self):
|
||||||
body = b"<html><head><title>Some page</title><body></body></html>"
|
body = b"<html><head><title>Some page</title><body></body></html>"
|
||||||
|
@ -24,7 +24,7 @@ class TestRefererMiddleware(TestCase):
|
|||||||
resp_headers = {}
|
resp_headers = {}
|
||||||
settings = {}
|
settings = {}
|
||||||
scenarii = [
|
scenarii = [
|
||||||
('http://scrapytest.org', 'http://scrapytest.org/', b'http://scrapytest.org'),
|
('http://scrapytest.org', 'http://scrapytest.org/', b'http://scrapytest.org'),
|
||||||
]
|
]
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
@ -54,57 +54,57 @@ class MixinDefault:
|
|||||||
with some additional filtering of s3://
|
with some additional filtering of s3://
|
||||||
"""
|
"""
|
||||||
scenarii = [
|
scenarii = [
|
||||||
('https://example.com/', 'https://scrapy.org/', b'https://example.com/'),
|
('https://example.com/', 'https://scrapy.org/', b'https://example.com/'),
|
||||||
('http://example.com/', 'http://scrapy.org/', b'http://example.com/'),
|
('http://example.com/', 'http://scrapy.org/', b'http://example.com/'),
|
||||||
('http://example.com/', 'https://scrapy.org/', b'http://example.com/'),
|
('http://example.com/', 'https://scrapy.org/', b'http://example.com/'),
|
||||||
('https://example.com/', 'http://scrapy.org/', None),
|
('https://example.com/', 'http://scrapy.org/', None),
|
||||||
|
|
||||||
# no credentials leak
|
# no credentials leak
|
||||||
('http://user:password@example.com/', 'https://scrapy.org/', b'http://example.com/'),
|
('http://user:password@example.com/', 'https://scrapy.org/', b'http://example.com/'),
|
||||||
|
|
||||||
# no referrer leak for local schemes
|
# no referrer leak for local schemes
|
||||||
('file:///home/path/to/somefile.html', 'https://scrapy.org/', None),
|
('file:///home/path/to/somefile.html', 'https://scrapy.org/', None),
|
||||||
('file:///home/path/to/somefile.html', 'http://scrapy.org/', None),
|
('file:///home/path/to/somefile.html', 'http://scrapy.org/', None),
|
||||||
|
|
||||||
# no referrer leak for s3 origins
|
# no referrer leak for s3 origins
|
||||||
('s3://mybucket/path/to/data.csv', 'https://scrapy.org/', None),
|
('s3://mybucket/path/to/data.csv', 'https://scrapy.org/', None),
|
||||||
('s3://mybucket/path/to/data.csv', 'http://scrapy.org/', None),
|
('s3://mybucket/path/to/data.csv', 'http://scrapy.org/', None),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
class MixinNoReferrer:
|
class MixinNoReferrer:
|
||||||
scenarii = [
|
scenarii = [
|
||||||
('https://example.com/page.html', 'https://example.com/', None),
|
('https://example.com/page.html', 'https://example.com/', None),
|
||||||
('http://www.example.com/', 'https://scrapy.org/', None),
|
('http://www.example.com/', 'https://scrapy.org/', None),
|
||||||
('http://www.example.com/', 'http://scrapy.org/', None),
|
('http://www.example.com/', 'http://scrapy.org/', None),
|
||||||
('https://www.example.com/', 'http://scrapy.org/', None),
|
('https://www.example.com/', 'http://scrapy.org/', None),
|
||||||
('file:///home/path/to/somefile.html', 'http://scrapy.org/', None),
|
('file:///home/path/to/somefile.html', 'http://scrapy.org/', None),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
class MixinNoReferrerWhenDowngrade:
|
class MixinNoReferrerWhenDowngrade:
|
||||||
scenarii = [
|
scenarii = [
|
||||||
# TLS to TLS: send non-empty referrer
|
# TLS to TLS: send non-empty referrer
|
||||||
('https://example.com/page.html', 'https://not.example.com/', b'https://example.com/page.html'),
|
('https://example.com/page.html', 'https://not.example.com/', b'https://example.com/page.html'),
|
||||||
('https://example.com/page.html', 'https://scrapy.org/', b'https://example.com/page.html'),
|
('https://example.com/page.html', 'https://scrapy.org/', b'https://example.com/page.html'),
|
||||||
('https://example.com:443/page.html', 'https://scrapy.org/', b'https://example.com/page.html'),
|
('https://example.com:443/page.html', 'https://scrapy.org/', b'https://example.com/page.html'),
|
||||||
('https://example.com:444/page.html', 'https://scrapy.org/', b'https://example.com:444/page.html'),
|
('https://example.com:444/page.html', 'https://scrapy.org/', b'https://example.com:444/page.html'),
|
||||||
('ftps://example.com/urls.zip', 'https://scrapy.org/', b'ftps://example.com/urls.zip'),
|
('ftps://example.com/urls.zip', 'https://scrapy.org/', b'ftps://example.com/urls.zip'),
|
||||||
|
|
||||||
# TLS to non-TLS: do not send referrer
|
# TLS to non-TLS: do not send referrer
|
||||||
('https://example.com/page.html', 'http://not.example.com/', None),
|
('https://example.com/page.html', 'http://not.example.com/', None),
|
||||||
('https://example.com/page.html', 'http://scrapy.org/', None),
|
('https://example.com/page.html', 'http://scrapy.org/', None),
|
||||||
('ftps://example.com/urls.zip', 'http://scrapy.org/', None),
|
('ftps://example.com/urls.zip', 'http://scrapy.org/', None),
|
||||||
|
|
||||||
# non-TLS to TLS or non-TLS: send referrer
|
# non-TLS to TLS or non-TLS: send referrer
|
||||||
('http://example.com/page.html', 'https://not.example.com/', b'http://example.com/page.html'),
|
('http://example.com/page.html', 'https://not.example.com/', b'http://example.com/page.html'),
|
||||||
('http://example.com/page.html', 'https://scrapy.org/', b'http://example.com/page.html'),
|
('http://example.com/page.html', 'https://scrapy.org/', b'http://example.com/page.html'),
|
||||||
('http://example.com:8080/page.html', 'https://scrapy.org/', b'http://example.com:8080/page.html'),
|
('http://example.com:8080/page.html', 'https://scrapy.org/', b'http://example.com:8080/page.html'),
|
||||||
('http://example.com:80/page.html', 'http://not.example.com/', b'http://example.com/page.html'),
|
('http://example.com:80/page.html', 'http://not.example.com/', b'http://example.com/page.html'),
|
||||||
('http://example.com/page.html', 'http://scrapy.org/', b'http://example.com/page.html'),
|
('http://example.com/page.html', 'http://scrapy.org/', b'http://example.com/page.html'),
|
||||||
('http://example.com:443/page.html', 'http://scrapy.org/', b'http://example.com:443/page.html'),
|
('http://example.com:443/page.html', 'http://scrapy.org/', b'http://example.com:443/page.html'),
|
||||||
('ftp://example.com/urls.zip', 'http://scrapy.org/', b'ftp://example.com/urls.zip'),
|
('ftp://example.com/urls.zip', 'http://scrapy.org/', b'ftp://example.com/urls.zip'),
|
||||||
('ftp://example.com/urls.zip', 'https://scrapy.org/', b'ftp://example.com/urls.zip'),
|
('ftp://example.com/urls.zip', 'https://scrapy.org/', b'ftp://example.com/urls.zip'),
|
||||||
|
|
||||||
# test for user/password stripping
|
# test for user/password stripping
|
||||||
('http://user:password@example.com/page.html', 'https://not.example.com/', b'http://example.com/page.html'),
|
('http://user:password@example.com/page.html', 'https://not.example.com/', b'http://example.com/page.html'),
|
||||||
@ -114,43 +114,43 @@ class MixinNoReferrerWhenDowngrade:
|
|||||||
class MixinSameOrigin:
|
class MixinSameOrigin:
|
||||||
scenarii = [
|
scenarii = [
|
||||||
# Same origin (protocol, host, port): send referrer
|
# Same origin (protocol, host, port): send referrer
|
||||||
('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
|
('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
|
||||||
('http://example.com/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
|
('http://example.com/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
|
||||||
('https://example.com:443/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
|
('https://example.com:443/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
|
||||||
('http://example.com:80/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
|
('http://example.com:80/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
|
||||||
('http://example.com/page.html', 'http://example.com:80/not-page.html', b'http://example.com/page.html'),
|
('http://example.com/page.html', 'http://example.com:80/not-page.html', b'http://example.com/page.html'),
|
||||||
('http://example.com:8888/page.html', 'http://example.com:8888/not-page.html', b'http://example.com:8888/page.html'),
|
('http://example.com:8888/page.html', 'http://example.com:8888/not-page.html', b'http://example.com:8888/page.html'),
|
||||||
|
|
||||||
# Different host: do NOT send referrer
|
# Different host: do NOT send referrer
|
||||||
('https://example.com/page.html', 'https://not.example.com/otherpage.html', None),
|
('https://example.com/page.html', 'https://not.example.com/otherpage.html', None),
|
||||||
('http://example.com/page.html', 'http://not.example.com/otherpage.html', None),
|
('http://example.com/page.html', 'http://not.example.com/otherpage.html', None),
|
||||||
('http://example.com/page.html', 'http://www.example.com/otherpage.html', None),
|
('http://example.com/page.html', 'http://www.example.com/otherpage.html', None),
|
||||||
|
|
||||||
# Different port: do NOT send referrer
|
# Different port: do NOT send referrer
|
||||||
('https://example.com:444/page.html', 'https://example.com/not-page.html', None),
|
('https://example.com:444/page.html', 'https://example.com/not-page.html', None),
|
||||||
('http://example.com:81/page.html', 'http://example.com/not-page.html', None),
|
('http://example.com:81/page.html', 'http://example.com/not-page.html', None),
|
||||||
('http://example.com/page.html', 'http://example.com:81/not-page.html', None),
|
('http://example.com/page.html', 'http://example.com:81/not-page.html', None),
|
||||||
|
|
||||||
# Different protocols: do NOT send refferer
|
# Different protocols: do NOT send refferer
|
||||||
('https://example.com/page.html', 'http://example.com/not-page.html', None),
|
('https://example.com/page.html', 'http://example.com/not-page.html', None),
|
||||||
('https://example.com/page.html', 'http://not.example.com/', None),
|
('https://example.com/page.html', 'http://not.example.com/', None),
|
||||||
('ftps://example.com/urls.zip', 'https://example.com/not-page.html', None),
|
('ftps://example.com/urls.zip', 'https://example.com/not-page.html', None),
|
||||||
('ftp://example.com/urls.zip', 'http://example.com/not-page.html', None),
|
('ftp://example.com/urls.zip', 'http://example.com/not-page.html', None),
|
||||||
('ftps://example.com/urls.zip', 'https://example.com/not-page.html', None),
|
('ftps://example.com/urls.zip', 'https://example.com/not-page.html', None),
|
||||||
|
|
||||||
# test for user/password stripping
|
# test for user/password stripping
|
||||||
('https://user:password@example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
|
('https://user:password@example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
|
||||||
('https://user:password@example.com/page.html', 'http://example.com/not-page.html', None),
|
('https://user:password@example.com/page.html', 'http://example.com/not-page.html', None),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
class MixinOrigin:
|
class MixinOrigin:
|
||||||
scenarii = [
|
scenarii = [
|
||||||
# TLS or non-TLS to TLS or non-TLS: referrer origin is sent (yes, even for downgrades)
|
# TLS or non-TLS to TLS or non-TLS: referrer origin is sent (yes, even for downgrades)
|
||||||
('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/'),
|
('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/'),
|
||||||
('https://example.com/page.html', 'https://scrapy.org', b'https://example.com/'),
|
('https://example.com/page.html', 'https://scrapy.org', b'https://example.com/'),
|
||||||
('https://example.com/page.html', 'http://scrapy.org', b'https://example.com/'),
|
('https://example.com/page.html', 'http://scrapy.org', b'https://example.com/'),
|
||||||
('http://example.com/page.html', 'http://scrapy.org', b'http://example.com/'),
|
('http://example.com/page.html', 'http://scrapy.org', b'http://example.com/'),
|
||||||
|
|
||||||
# test for user/password stripping
|
# test for user/password stripping
|
||||||
('https://user:password@example.com/page.html', 'http://scrapy.org', b'https://example.com/'),
|
('https://user:password@example.com/page.html', 'http://scrapy.org', b'https://example.com/'),
|
||||||
@ -160,129 +160,129 @@ class MixinOrigin:
|
|||||||
class MixinStrictOrigin:
|
class MixinStrictOrigin:
|
||||||
scenarii = [
|
scenarii = [
|
||||||
# TLS or non-TLS to TLS or non-TLS: referrer origin is sent but not for downgrades
|
# TLS or non-TLS to TLS or non-TLS: referrer origin is sent but not for downgrades
|
||||||
('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/'),
|
('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/'),
|
||||||
('https://example.com/page.html', 'https://scrapy.org', b'https://example.com/'),
|
('https://example.com/page.html', 'https://scrapy.org', b'https://example.com/'),
|
||||||
('http://example.com/page.html', 'http://scrapy.org', b'http://example.com/'),
|
('http://example.com/page.html', 'http://scrapy.org', b'http://example.com/'),
|
||||||
|
|
||||||
# downgrade: send nothing
|
# downgrade: send nothing
|
||||||
('https://example.com/page.html', 'http://scrapy.org', None),
|
('https://example.com/page.html', 'http://scrapy.org', None),
|
||||||
|
|
||||||
# upgrade: send origin
|
# upgrade: send origin
|
||||||
('http://example.com/page.html', 'https://scrapy.org', b'http://example.com/'),
|
('http://example.com/page.html', 'https://scrapy.org', b'http://example.com/'),
|
||||||
|
|
||||||
# test for user/password stripping
|
# test for user/password stripping
|
||||||
('https://user:password@example.com/page.html', 'https://scrapy.org', b'https://example.com/'),
|
('https://user:password@example.com/page.html', 'https://scrapy.org', b'https://example.com/'),
|
||||||
('https://user:password@example.com/page.html', 'http://scrapy.org', None),
|
('https://user:password@example.com/page.html', 'http://scrapy.org', None),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
class MixinOriginWhenCrossOrigin:
|
class MixinOriginWhenCrossOrigin:
|
||||||
scenarii = [
|
scenarii = [
|
||||||
# Same origin (protocol, host, port): send referrer
|
# Same origin (protocol, host, port): send referrer
|
||||||
('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
|
('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
|
||||||
('http://example.com/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
|
('http://example.com/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
|
||||||
('https://example.com:443/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
|
('https://example.com:443/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
|
||||||
('http://example.com:80/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
|
('http://example.com:80/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
|
||||||
('http://example.com/page.html', 'http://example.com:80/not-page.html', b'http://example.com/page.html'),
|
('http://example.com/page.html', 'http://example.com:80/not-page.html', b'http://example.com/page.html'),
|
||||||
('http://example.com:8888/page.html', 'http://example.com:8888/not-page.html', b'http://example.com:8888/page.html'),
|
('http://example.com:8888/page.html', 'http://example.com:8888/not-page.html', b'http://example.com:8888/page.html'),
|
||||||
|
|
||||||
# Different host: send origin as referrer
|
# Different host: send origin as referrer
|
||||||
('https://example2.com/page.html', 'https://scrapy.org/otherpage.html', b'https://example2.com/'),
|
('https://example2.com/page.html', 'https://scrapy.org/otherpage.html', b'https://example2.com/'),
|
||||||
('https://example2.com/page.html', 'https://not.example2.com/otherpage.html', b'https://example2.com/'),
|
('https://example2.com/page.html', 'https://not.example2.com/otherpage.html', b'https://example2.com/'),
|
||||||
('http://example2.com/page.html', 'http://not.example2.com/otherpage.html', b'http://example2.com/'),
|
('http://example2.com/page.html', 'http://not.example2.com/otherpage.html', b'http://example2.com/'),
|
||||||
# exact match required
|
# exact match required
|
||||||
('http://example2.com/page.html', 'http://www.example2.com/otherpage.html', b'http://example2.com/'),
|
('http://example2.com/page.html', 'http://www.example2.com/otherpage.html', b'http://example2.com/'),
|
||||||
|
|
||||||
# Different port: send origin as referrer
|
# Different port: send origin as referrer
|
||||||
('https://example3.com:444/page.html', 'https://example3.com/not-page.html', b'https://example3.com:444/'),
|
('https://example3.com:444/page.html', 'https://example3.com/not-page.html', b'https://example3.com:444/'),
|
||||||
('http://example3.com:81/page.html', 'http://example3.com/not-page.html', b'http://example3.com:81/'),
|
('http://example3.com:81/page.html', 'http://example3.com/not-page.html', b'http://example3.com:81/'),
|
||||||
|
|
||||||
# Different protocols: send origin as referrer
|
# Different protocols: send origin as referrer
|
||||||
('https://example4.com/page.html', 'http://example4.com/not-page.html', b'https://example4.com/'),
|
('https://example4.com/page.html', 'http://example4.com/not-page.html', b'https://example4.com/'),
|
||||||
('https://example4.com/page.html', 'http://not.example4.com/', b'https://example4.com/'),
|
('https://example4.com/page.html', 'http://not.example4.com/', b'https://example4.com/'),
|
||||||
('ftps://example4.com/urls.zip', 'https://example4.com/not-page.html', b'ftps://example4.com/'),
|
('ftps://example4.com/urls.zip', 'https://example4.com/not-page.html', b'ftps://example4.com/'),
|
||||||
('ftp://example4.com/urls.zip', 'http://example4.com/not-page.html', b'ftp://example4.com/'),
|
('ftp://example4.com/urls.zip', 'http://example4.com/not-page.html', b'ftp://example4.com/'),
|
||||||
('ftps://example4.com/urls.zip', 'https://example4.com/not-page.html', b'ftps://example4.com/'),
|
('ftps://example4.com/urls.zip', 'https://example4.com/not-page.html', b'ftps://example4.com/'),
|
||||||
|
|
||||||
# test for user/password stripping
|
# test for user/password stripping
|
||||||
('https://user:password@example5.com/page.html', 'https://example5.com/not-page.html', b'https://example5.com/page.html'),
|
('https://user:password@example5.com/page.html', 'https://example5.com/not-page.html', b'https://example5.com/page.html'),
|
||||||
# TLS to non-TLS downgrade: send origin
|
# TLS to non-TLS downgrade: send origin
|
||||||
('https://user:password@example5.com/page.html', 'http://example5.com/not-page.html', b'https://example5.com/'),
|
('https://user:password@example5.com/page.html', 'http://example5.com/not-page.html', b'https://example5.com/'),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
class MixinStrictOriginWhenCrossOrigin:
|
class MixinStrictOriginWhenCrossOrigin:
|
||||||
scenarii = [
|
scenarii = [
|
||||||
# Same origin (protocol, host, port): send referrer
|
# Same origin (protocol, host, port): send referrer
|
||||||
('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
|
('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
|
||||||
('http://example.com/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
|
('http://example.com/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
|
||||||
('https://example.com:443/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
|
('https://example.com:443/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
|
||||||
('http://example.com:80/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
|
('http://example.com:80/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
|
||||||
('http://example.com/page.html', 'http://example.com:80/not-page.html', b'http://example.com/page.html'),
|
('http://example.com/page.html', 'http://example.com:80/not-page.html', b'http://example.com/page.html'),
|
||||||
('http://example.com:8888/page.html', 'http://example.com:8888/not-page.html', b'http://example.com:8888/page.html'),
|
('http://example.com:8888/page.html', 'http://example.com:8888/not-page.html', b'http://example.com:8888/page.html'),
|
||||||
|
|
||||||
# Different host: send origin as referrer
|
# Different host: send origin as referrer
|
||||||
('https://example2.com/page.html', 'https://scrapy.org/otherpage.html', b'https://example2.com/'),
|
('https://example2.com/page.html', 'https://scrapy.org/otherpage.html', b'https://example2.com/'),
|
||||||
('https://example2.com/page.html', 'https://not.example2.com/otherpage.html', b'https://example2.com/'),
|
('https://example2.com/page.html', 'https://not.example2.com/otherpage.html', b'https://example2.com/'),
|
||||||
('http://example2.com/page.html', 'http://not.example2.com/otherpage.html', b'http://example2.com/'),
|
('http://example2.com/page.html', 'http://not.example2.com/otherpage.html', b'http://example2.com/'),
|
||||||
# exact match required
|
# exact match required
|
||||||
('http://example2.com/page.html', 'http://www.example2.com/otherpage.html', b'http://example2.com/'),
|
('http://example2.com/page.html', 'http://www.example2.com/otherpage.html', b'http://example2.com/'),
|
||||||
|
|
||||||
# Different port: send origin as referrer
|
# Different port: send origin as referrer
|
||||||
('https://example3.com:444/page.html', 'https://example3.com/not-page.html', b'https://example3.com:444/'),
|
('https://example3.com:444/page.html', 'https://example3.com/not-page.html', b'https://example3.com:444/'),
|
||||||
('http://example3.com:81/page.html', 'http://example3.com/not-page.html', b'http://example3.com:81/'),
|
('http://example3.com:81/page.html', 'http://example3.com/not-page.html', b'http://example3.com:81/'),
|
||||||
|
|
||||||
# downgrade
|
# downgrade
|
||||||
('https://example4.com/page.html', 'http://example4.com/not-page.html', None),
|
('https://example4.com/page.html', 'http://example4.com/not-page.html', None),
|
||||||
('https://example4.com/page.html', 'http://not.example4.com/', None),
|
('https://example4.com/page.html', 'http://not.example4.com/', None),
|
||||||
|
|
||||||
# non-TLS to non-TLS
|
# non-TLS to non-TLS
|
||||||
('ftp://example4.com/urls.zip', 'http://example4.com/not-page.html', b'ftp://example4.com/'),
|
('ftp://example4.com/urls.zip', 'http://example4.com/not-page.html', b'ftp://example4.com/'),
|
||||||
|
|
||||||
# upgrade
|
# upgrade
|
||||||
('http://example4.com/page.html', 'https://example4.com/not-page.html', b'http://example4.com/'),
|
('http://example4.com/page.html', 'https://example4.com/not-page.html', b'http://example4.com/'),
|
||||||
('http://example4.com/page.html', 'https://not.example4.com/', b'http://example4.com/'),
|
('http://example4.com/page.html', 'https://not.example4.com/', b'http://example4.com/'),
|
||||||
|
|
||||||
# Different protocols: send origin as referrer
|
# Different protocols: send origin as referrer
|
||||||
('ftps://example4.com/urls.zip', 'https://example4.com/not-page.html', b'ftps://example4.com/'),
|
('ftps://example4.com/urls.zip', 'https://example4.com/not-page.html', b'ftps://example4.com/'),
|
||||||
('ftps://example4.com/urls.zip', 'https://example4.com/not-page.html', b'ftps://example4.com/'),
|
('ftps://example4.com/urls.zip', 'https://example4.com/not-page.html', b'ftps://example4.com/'),
|
||||||
|
|
||||||
# test for user/password stripping
|
# test for user/password stripping
|
||||||
('https://user:password@example5.com/page.html', 'https://example5.com/not-page.html', b'https://example5.com/page.html'),
|
('https://user:password@example5.com/page.html', 'https://example5.com/not-page.html', b'https://example5.com/page.html'),
|
||||||
|
|
||||||
# TLS to non-TLS downgrade: send nothing
|
# TLS to non-TLS downgrade: send nothing
|
||||||
('https://user:password@example5.com/page.html', 'http://example5.com/not-page.html', None),
|
('https://user:password@example5.com/page.html', 'http://example5.com/not-page.html', None),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
class MixinUnsafeUrl:
|
class MixinUnsafeUrl:
|
||||||
scenarii = [
|
scenarii = [
|
||||||
# TLS to TLS: send referrer
|
# TLS to TLS: send referrer
|
||||||
('https://example.com/sekrit.html', 'http://not.example.com/', b'https://example.com/sekrit.html'),
|
('https://example.com/sekrit.html', 'http://not.example.com/', b'https://example.com/sekrit.html'),
|
||||||
('https://example1.com/page.html', 'https://not.example1.com/', b'https://example1.com/page.html'),
|
('https://example1.com/page.html', 'https://not.example1.com/', b'https://example1.com/page.html'),
|
||||||
('https://example1.com/page.html', 'https://scrapy.org/', b'https://example1.com/page.html'),
|
('https://example1.com/page.html', 'https://scrapy.org/', b'https://example1.com/page.html'),
|
||||||
('https://example1.com:443/page.html', 'https://scrapy.org/', b'https://example1.com/page.html'),
|
('https://example1.com:443/page.html', 'https://scrapy.org/', b'https://example1.com/page.html'),
|
||||||
('https://example1.com:444/page.html', 'https://scrapy.org/', b'https://example1.com:444/page.html'),
|
('https://example1.com:444/page.html', 'https://scrapy.org/', b'https://example1.com:444/page.html'),
|
||||||
('ftps://example1.com/urls.zip', 'https://scrapy.org/', b'ftps://example1.com/urls.zip'),
|
('ftps://example1.com/urls.zip', 'https://scrapy.org/', b'ftps://example1.com/urls.zip'),
|
||||||
|
|
||||||
# TLS to non-TLS: send referrer (yes, it's unsafe)
|
# TLS to non-TLS: send referrer (yes, it's unsafe)
|
||||||
('https://example2.com/page.html', 'http://not.example2.com/', b'https://example2.com/page.html'),
|
('https://example2.com/page.html', 'http://not.example2.com/', b'https://example2.com/page.html'),
|
||||||
('https://example2.com/page.html', 'http://scrapy.org/', b'https://example2.com/page.html'),
|
('https://example2.com/page.html', 'http://scrapy.org/', b'https://example2.com/page.html'),
|
||||||
('ftps://example2.com/urls.zip', 'http://scrapy.org/', b'ftps://example2.com/urls.zip'),
|
('ftps://example2.com/urls.zip', 'http://scrapy.org/', b'ftps://example2.com/urls.zip'),
|
||||||
|
|
||||||
# non-TLS to TLS or non-TLS: send referrer (yes, it's unsafe)
|
# non-TLS to TLS or non-TLS: send referrer (yes, it's unsafe)
|
||||||
('http://example3.com/page.html', 'https://not.example3.com/', b'http://example3.com/page.html'),
|
('http://example3.com/page.html', 'https://not.example3.com/', b'http://example3.com/page.html'),
|
||||||
('http://example3.com/page.html', 'https://scrapy.org/', b'http://example3.com/page.html'),
|
('http://example3.com/page.html', 'https://scrapy.org/', b'http://example3.com/page.html'),
|
||||||
('http://example3.com:8080/page.html', 'https://scrapy.org/', b'http://example3.com:8080/page.html'),
|
('http://example3.com:8080/page.html', 'https://scrapy.org/', b'http://example3.com:8080/page.html'),
|
||||||
('http://example3.com:80/page.html', 'http://not.example3.com/', b'http://example3.com/page.html'),
|
('http://example3.com:80/page.html', 'http://not.example3.com/', b'http://example3.com/page.html'),
|
||||||
('http://example3.com/page.html', 'http://scrapy.org/', b'http://example3.com/page.html'),
|
('http://example3.com/page.html', 'http://scrapy.org/', b'http://example3.com/page.html'),
|
||||||
('http://example3.com:443/page.html', 'http://scrapy.org/', b'http://example3.com:443/page.html'),
|
('http://example3.com:443/page.html', 'http://scrapy.org/', b'http://example3.com:443/page.html'),
|
||||||
('ftp://example3.com/urls.zip', 'http://scrapy.org/', b'ftp://example3.com/urls.zip'),
|
('ftp://example3.com/urls.zip', 'http://scrapy.org/', b'ftp://example3.com/urls.zip'),
|
||||||
('ftp://example3.com/urls.zip', 'https://scrapy.org/', b'ftp://example3.com/urls.zip'),
|
('ftp://example3.com/urls.zip', 'https://scrapy.org/', b'ftp://example3.com/urls.zip'),
|
||||||
|
|
||||||
# test for user/password stripping
|
# test for user/password stripping
|
||||||
('http://user:password@example4.com/page.html', 'https://not.example4.com/', b'http://example4.com/page.html'),
|
('http://user:password@example4.com/page.html', 'https://not.example4.com/', b'http://example4.com/page.html'),
|
||||||
('https://user:password@example4.com/page.html', 'http://scrapy.org/', b'https://example4.com/page.html'),
|
('https://user:password@example4.com/page.html', 'http://scrapy.org/', b'https://example4.com/page.html'),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@ -339,12 +339,12 @@ class CustomPythonOrgPolicy(ReferrerPolicy):
|
|||||||
class TestSettingsCustomPolicy(TestRefererMiddleware):
|
class TestSettingsCustomPolicy(TestRefererMiddleware):
|
||||||
settings = {'REFERRER_POLICY': 'tests.test_spidermiddleware_referer.CustomPythonOrgPolicy'}
|
settings = {'REFERRER_POLICY': 'tests.test_spidermiddleware_referer.CustomPythonOrgPolicy'}
|
||||||
scenarii = [
|
scenarii = [
|
||||||
('https://example.com/', 'https://scrapy.org/', b'https://python.org/'),
|
('https://example.com/', 'https://scrapy.org/', b'https://python.org/'),
|
||||||
('http://example.com/', 'http://scrapy.org/', b'http://python.org/'),
|
('http://example.com/', 'http://scrapy.org/', b'http://python.org/'),
|
||||||
('http://example.com/', 'https://scrapy.org/', b'https://python.org/'),
|
('http://example.com/', 'https://scrapy.org/', b'https://python.org/'),
|
||||||
('https://example.com/', 'http://scrapy.org/', b'http://python.org/'),
|
('https://example.com/', 'http://scrapy.org/', b'http://python.org/'),
|
||||||
('file:///home/path/to/somefile.html', 'https://scrapy.org/', b'https://python.org/'),
|
('file:///home/path/to/somefile.html', 'https://scrapy.org/', b'https://python.org/'),
|
||||||
('file:///home/path/to/somefile.html', 'http://scrapy.org/', b'http://python.org/'),
|
('file:///home/path/to/somefile.html', 'http://scrapy.org/', b'http://python.org/'),
|
||||||
|
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -541,7 +541,8 @@ class TestReferrerOnRedirect(TestRefererMiddleware):
|
|||||||
|
|
||||||
settings = {'REFERRER_POLICY': 'scrapy.spidermiddlewares.referer.UnsafeUrlPolicy'}
|
settings = {'REFERRER_POLICY': 'scrapy.spidermiddlewares.referer.UnsafeUrlPolicy'}
|
||||||
scenarii = [
|
scenarii = [
|
||||||
( 'http://scrapytest.org/1', # parent
|
(
|
||||||
|
'http://scrapytest.org/1', # parent
|
||||||
'http://scrapytest.org/2', # target
|
'http://scrapytest.org/2', # target
|
||||||
(
|
(
|
||||||
# redirections: code, URL
|
# redirections: code, URL
|
||||||
@ -551,7 +552,8 @@ class TestReferrerOnRedirect(TestRefererMiddleware):
|
|||||||
b'http://scrapytest.org/1', # expected initial referer
|
b'http://scrapytest.org/1', # expected initial referer
|
||||||
b'http://scrapytest.org/1', # expected referer for the redirection request
|
b'http://scrapytest.org/1', # expected referer for the redirection request
|
||||||
),
|
),
|
||||||
( 'https://scrapytest.org/1',
|
(
|
||||||
|
'https://scrapytest.org/1',
|
||||||
'https://scrapytest.org/2',
|
'https://scrapytest.org/2',
|
||||||
(
|
(
|
||||||
# redirecting to non-secure URL
|
# redirecting to non-secure URL
|
||||||
@ -560,7 +562,8 @@ class TestReferrerOnRedirect(TestRefererMiddleware):
|
|||||||
b'https://scrapytest.org/1',
|
b'https://scrapytest.org/1',
|
||||||
b'https://scrapytest.org/1',
|
b'https://scrapytest.org/1',
|
||||||
),
|
),
|
||||||
( 'https://scrapytest.org/1',
|
(
|
||||||
|
'https://scrapytest.org/1',
|
||||||
'https://scrapytest.com/2',
|
'https://scrapytest.com/2',
|
||||||
(
|
(
|
||||||
# redirecting to non-secure URL: different origin
|
# redirecting to non-secure URL: different origin
|
||||||
@ -602,7 +605,8 @@ class TestReferrerOnRedirectNoReferrer(TestReferrerOnRedirect):
|
|||||||
"""
|
"""
|
||||||
settings = {'REFERRER_POLICY': 'no-referrer'}
|
settings = {'REFERRER_POLICY': 'no-referrer'}
|
||||||
scenarii = [
|
scenarii = [
|
||||||
( 'http://scrapytest.org/1', # parent
|
(
|
||||||
|
'http://scrapytest.org/1', # parent
|
||||||
'http://scrapytest.org/2', # target
|
'http://scrapytest.org/2', # target
|
||||||
(
|
(
|
||||||
# redirections: code, URL
|
# redirections: code, URL
|
||||||
@ -612,7 +616,8 @@ class TestReferrerOnRedirectNoReferrer(TestReferrerOnRedirect):
|
|||||||
None, # expected initial "Referer"
|
None, # expected initial "Referer"
|
||||||
None, # expected "Referer" for the redirection request
|
None, # expected "Referer" for the redirection request
|
||||||
),
|
),
|
||||||
( 'https://scrapytest.org/1',
|
(
|
||||||
|
'https://scrapytest.org/1',
|
||||||
'https://scrapytest.org/2',
|
'https://scrapytest.org/2',
|
||||||
(
|
(
|
||||||
(301, 'http://scrapytest.org/3'),
|
(301, 'http://scrapytest.org/3'),
|
||||||
@ -620,7 +625,8 @@ class TestReferrerOnRedirectNoReferrer(TestReferrerOnRedirect):
|
|||||||
None,
|
None,
|
||||||
None,
|
None,
|
||||||
),
|
),
|
||||||
( 'https://scrapytest.org/1',
|
(
|
||||||
|
'https://scrapytest.org/1',
|
||||||
'https://example.com/2', # different origin
|
'https://example.com/2', # different origin
|
||||||
(
|
(
|
||||||
(301, 'http://scrapytest.com/3'),
|
(301, 'http://scrapytest.com/3'),
|
||||||
@ -641,7 +647,8 @@ class TestReferrerOnRedirectSameOrigin(TestReferrerOnRedirect):
|
|||||||
"""
|
"""
|
||||||
settings = {'REFERRER_POLICY': 'same-origin'}
|
settings = {'REFERRER_POLICY': 'same-origin'}
|
||||||
scenarii = [
|
scenarii = [
|
||||||
( 'http://scrapytest.org/101', # origin
|
(
|
||||||
|
'http://scrapytest.org/101', # origin
|
||||||
'http://scrapytest.org/102', # target
|
'http://scrapytest.org/102', # target
|
||||||
(
|
(
|
||||||
# redirections: code, URL
|
# redirections: code, URL
|
||||||
@ -651,7 +658,8 @@ class TestReferrerOnRedirectSameOrigin(TestReferrerOnRedirect):
|
|||||||
b'http://scrapytest.org/101', # expected initial "Referer"
|
b'http://scrapytest.org/101', # expected initial "Referer"
|
||||||
b'http://scrapytest.org/101', # expected referer for the redirection request
|
b'http://scrapytest.org/101', # expected referer for the redirection request
|
||||||
),
|
),
|
||||||
( 'https://scrapytest.org/201',
|
(
|
||||||
|
'https://scrapytest.org/201',
|
||||||
'https://scrapytest.org/202',
|
'https://scrapytest.org/202',
|
||||||
(
|
(
|
||||||
# redirecting from secure to non-secure URL == different origin
|
# redirecting from secure to non-secure URL == different origin
|
||||||
@ -660,7 +668,8 @@ class TestReferrerOnRedirectSameOrigin(TestReferrerOnRedirect):
|
|||||||
b'https://scrapytest.org/201',
|
b'https://scrapytest.org/201',
|
||||||
None,
|
None,
|
||||||
),
|
),
|
||||||
( 'https://scrapytest.org/301',
|
(
|
||||||
|
'https://scrapytest.org/301',
|
||||||
'https://scrapytest.org/302',
|
'https://scrapytest.org/302',
|
||||||
(
|
(
|
||||||
# different domain == different origin
|
# different domain == different origin
|
||||||
@ -683,7 +692,8 @@ class TestReferrerOnRedirectStrictOrigin(TestReferrerOnRedirect):
|
|||||||
"""
|
"""
|
||||||
settings = {'REFERRER_POLICY': POLICY_STRICT_ORIGIN}
|
settings = {'REFERRER_POLICY': POLICY_STRICT_ORIGIN}
|
||||||
scenarii = [
|
scenarii = [
|
||||||
( 'http://scrapytest.org/101',
|
(
|
||||||
|
'http://scrapytest.org/101',
|
||||||
'http://scrapytest.org/102',
|
'http://scrapytest.org/102',
|
||||||
(
|
(
|
||||||
(301, 'http://scrapytest.org/103'),
|
(301, 'http://scrapytest.org/103'),
|
||||||
@ -692,7 +702,8 @@ class TestReferrerOnRedirectStrictOrigin(TestReferrerOnRedirect):
|
|||||||
b'http://scrapytest.org/', # send origin
|
b'http://scrapytest.org/', # send origin
|
||||||
b'http://scrapytest.org/', # redirects to same origin: send origin
|
b'http://scrapytest.org/', # redirects to same origin: send origin
|
||||||
),
|
),
|
||||||
( 'https://scrapytest.org/201',
|
(
|
||||||
|
'https://scrapytest.org/201',
|
||||||
'https://scrapytest.org/202',
|
'https://scrapytest.org/202',
|
||||||
(
|
(
|
||||||
# redirecting to non-secure URL: no referrer
|
# redirecting to non-secure URL: no referrer
|
||||||
@ -701,7 +712,8 @@ class TestReferrerOnRedirectStrictOrigin(TestReferrerOnRedirect):
|
|||||||
b'https://scrapytest.org/',
|
b'https://scrapytest.org/',
|
||||||
None,
|
None,
|
||||||
),
|
),
|
||||||
( 'https://scrapytest.org/301',
|
(
|
||||||
|
'https://scrapytest.org/301',
|
||||||
'https://scrapytest.org/302',
|
'https://scrapytest.org/302',
|
||||||
(
|
(
|
||||||
# redirecting to non-secure URL (different domain): no referrer
|
# redirecting to non-secure URL (different domain): no referrer
|
||||||
@ -710,7 +722,8 @@ class TestReferrerOnRedirectStrictOrigin(TestReferrerOnRedirect):
|
|||||||
b'https://scrapytest.org/',
|
b'https://scrapytest.org/',
|
||||||
None,
|
None,
|
||||||
),
|
),
|
||||||
( 'http://scrapy.org/401',
|
(
|
||||||
|
'http://scrapy.org/401',
|
||||||
'http://example.com/402',
|
'http://example.com/402',
|
||||||
(
|
(
|
||||||
(301, 'http://scrapytest.org/403'),
|
(301, 'http://scrapytest.org/403'),
|
||||||
@ -718,7 +731,8 @@ class TestReferrerOnRedirectStrictOrigin(TestReferrerOnRedirect):
|
|||||||
b'http://scrapy.org/',
|
b'http://scrapy.org/',
|
||||||
b'http://scrapy.org/',
|
b'http://scrapy.org/',
|
||||||
),
|
),
|
||||||
( 'https://scrapy.org/501',
|
(
|
||||||
|
'https://scrapy.org/501',
|
||||||
'https://example.com/502',
|
'https://example.com/502',
|
||||||
(
|
(
|
||||||
# HTTPS all along, so origin referrer is kept as-is
|
# HTTPS all along, so origin referrer is kept as-is
|
||||||
@ -728,7 +742,8 @@ class TestReferrerOnRedirectStrictOrigin(TestReferrerOnRedirect):
|
|||||||
b'https://scrapy.org/',
|
b'https://scrapy.org/',
|
||||||
b'https://scrapy.org/',
|
b'https://scrapy.org/',
|
||||||
),
|
),
|
||||||
( 'https://scrapytest.org/601',
|
(
|
||||||
|
'https://scrapytest.org/601',
|
||||||
'http://scrapytest.org/602', # TLS to non-TLS: no referrer
|
'http://scrapytest.org/602', # TLS to non-TLS: no referrer
|
||||||
(
|
(
|
||||||
(301, 'https://scrapytest.org/603'), # TLS URL again: (still) no referrer
|
(301, 'https://scrapytest.org/603'), # TLS URL again: (still) no referrer
|
||||||
@ -750,7 +765,8 @@ class TestReferrerOnRedirectOriginWhenCrossOrigin(TestReferrerOnRedirect):
|
|||||||
"""
|
"""
|
||||||
settings = {'REFERRER_POLICY': POLICY_ORIGIN_WHEN_CROSS_ORIGIN}
|
settings = {'REFERRER_POLICY': POLICY_ORIGIN_WHEN_CROSS_ORIGIN}
|
||||||
scenarii = [
|
scenarii = [
|
||||||
( 'http://scrapytest.org/101', # origin
|
(
|
||||||
|
'http://scrapytest.org/101', # origin
|
||||||
'http://scrapytest.org/102', # target + redirection
|
'http://scrapytest.org/102', # target + redirection
|
||||||
(
|
(
|
||||||
# redirections: code, URL
|
# redirections: code, URL
|
||||||
@ -760,7 +776,8 @@ class TestReferrerOnRedirectOriginWhenCrossOrigin(TestReferrerOnRedirect):
|
|||||||
b'http://scrapytest.org/101', # expected initial referer
|
b'http://scrapytest.org/101', # expected initial referer
|
||||||
b'http://scrapytest.org/101', # expected referer for the redirection request
|
b'http://scrapytest.org/101', # expected referer for the redirection request
|
||||||
),
|
),
|
||||||
( 'https://scrapytest.org/201',
|
(
|
||||||
|
'https://scrapytest.org/201',
|
||||||
'https://scrapytest.org/202',
|
'https://scrapytest.org/202',
|
||||||
(
|
(
|
||||||
# redirecting to non-secure URL: send origin
|
# redirecting to non-secure URL: send origin
|
||||||
@ -769,7 +786,8 @@ class TestReferrerOnRedirectOriginWhenCrossOrigin(TestReferrerOnRedirect):
|
|||||||
b'https://scrapytest.org/201',
|
b'https://scrapytest.org/201',
|
||||||
b'https://scrapytest.org/',
|
b'https://scrapytest.org/',
|
||||||
),
|
),
|
||||||
( 'https://scrapytest.org/301',
|
(
|
||||||
|
'https://scrapytest.org/301',
|
||||||
'https://scrapytest.org/302',
|
'https://scrapytest.org/302',
|
||||||
(
|
(
|
||||||
# redirecting to non-secure URL (different domain): send origin
|
# redirecting to non-secure URL (different domain): send origin
|
||||||
@ -778,7 +796,8 @@ class TestReferrerOnRedirectOriginWhenCrossOrigin(TestReferrerOnRedirect):
|
|||||||
b'https://scrapytest.org/301',
|
b'https://scrapytest.org/301',
|
||||||
b'https://scrapytest.org/',
|
b'https://scrapytest.org/',
|
||||||
),
|
),
|
||||||
( 'http://scrapy.org/401',
|
(
|
||||||
|
'http://scrapy.org/401',
|
||||||
'http://example.com/402',
|
'http://example.com/402',
|
||||||
(
|
(
|
||||||
(301, 'http://scrapytest.org/403'),
|
(301, 'http://scrapytest.org/403'),
|
||||||
@ -786,7 +805,8 @@ class TestReferrerOnRedirectOriginWhenCrossOrigin(TestReferrerOnRedirect):
|
|||||||
b'http://scrapy.org/',
|
b'http://scrapy.org/',
|
||||||
b'http://scrapy.org/',
|
b'http://scrapy.org/',
|
||||||
),
|
),
|
||||||
( 'https://scrapy.org/501',
|
(
|
||||||
|
'https://scrapy.org/501',
|
||||||
'https://example.com/502',
|
'https://example.com/502',
|
||||||
(
|
(
|
||||||
# all different domains: send origin
|
# all different domains: send origin
|
||||||
@ -796,7 +816,8 @@ class TestReferrerOnRedirectOriginWhenCrossOrigin(TestReferrerOnRedirect):
|
|||||||
b'https://scrapy.org/',
|
b'https://scrapy.org/',
|
||||||
b'https://scrapy.org/',
|
b'https://scrapy.org/',
|
||||||
),
|
),
|
||||||
( 'https://scrapytest.org/301',
|
(
|
||||||
|
'https://scrapytest.org/301',
|
||||||
'http://scrapytest.org/302', # TLS to non-TLS: send origin
|
'http://scrapytest.org/302', # TLS to non-TLS: send origin
|
||||||
(
|
(
|
||||||
(301, 'https://scrapytest.org/303'), # TLS URL again: send origin (also)
|
(301, 'https://scrapytest.org/303'), # TLS URL again: send origin (also)
|
||||||
@ -820,7 +841,8 @@ class TestReferrerOnRedirectStrictOriginWhenCrossOrigin(TestReferrerOnRedirect):
|
|||||||
"""
|
"""
|
||||||
settings = {'REFERRER_POLICY': POLICY_STRICT_ORIGIN_WHEN_CROSS_ORIGIN}
|
settings = {'REFERRER_POLICY': POLICY_STRICT_ORIGIN_WHEN_CROSS_ORIGIN}
|
||||||
scenarii = [
|
scenarii = [
|
||||||
( 'http://scrapytest.org/101', # origin
|
(
|
||||||
|
'http://scrapytest.org/101', # origin
|
||||||
'http://scrapytest.org/102', # target + redirection
|
'http://scrapytest.org/102', # target + redirection
|
||||||
(
|
(
|
||||||
# redirections: code, URL
|
# redirections: code, URL
|
||||||
@ -830,7 +852,8 @@ class TestReferrerOnRedirectStrictOriginWhenCrossOrigin(TestReferrerOnRedirect):
|
|||||||
b'http://scrapytest.org/101', # expected initial referer
|
b'http://scrapytest.org/101', # expected initial referer
|
||||||
b'http://scrapytest.org/101', # expected referer for the redirection request
|
b'http://scrapytest.org/101', # expected referer for the redirection request
|
||||||
),
|
),
|
||||||
( 'https://scrapytest.org/201',
|
(
|
||||||
|
'https://scrapytest.org/201',
|
||||||
'https://scrapytest.org/202',
|
'https://scrapytest.org/202',
|
||||||
(
|
(
|
||||||
# redirecting to non-secure URL: do not send the "Referer" header
|
# redirecting to non-secure URL: do not send the "Referer" header
|
||||||
@ -839,7 +862,8 @@ class TestReferrerOnRedirectStrictOriginWhenCrossOrigin(TestReferrerOnRedirect):
|
|||||||
b'https://scrapytest.org/201',
|
b'https://scrapytest.org/201',
|
||||||
None,
|
None,
|
||||||
),
|
),
|
||||||
( 'https://scrapytest.org/301',
|
(
|
||||||
|
'https://scrapytest.org/301',
|
||||||
'https://scrapytest.org/302',
|
'https://scrapytest.org/302',
|
||||||
(
|
(
|
||||||
# redirecting to non-secure URL (different domain): send origin
|
# redirecting to non-secure URL (different domain): send origin
|
||||||
@ -848,7 +872,8 @@ class TestReferrerOnRedirectStrictOriginWhenCrossOrigin(TestReferrerOnRedirect):
|
|||||||
b'https://scrapytest.org/301',
|
b'https://scrapytest.org/301',
|
||||||
None,
|
None,
|
||||||
),
|
),
|
||||||
( 'http://scrapy.org/401',
|
(
|
||||||
|
'http://scrapy.org/401',
|
||||||
'http://example.com/402',
|
'http://example.com/402',
|
||||||
(
|
(
|
||||||
(301, 'http://scrapytest.org/403'),
|
(301, 'http://scrapytest.org/403'),
|
||||||
@ -856,7 +881,8 @@ class TestReferrerOnRedirectStrictOriginWhenCrossOrigin(TestReferrerOnRedirect):
|
|||||||
b'http://scrapy.org/',
|
b'http://scrapy.org/',
|
||||||
b'http://scrapy.org/',
|
b'http://scrapy.org/',
|
||||||
),
|
),
|
||||||
( 'https://scrapy.org/501',
|
(
|
||||||
|
'https://scrapy.org/501',
|
||||||
'https://example.com/502',
|
'https://example.com/502',
|
||||||
(
|
(
|
||||||
# all different domains: send origin
|
# all different domains: send origin
|
||||||
@ -866,7 +892,8 @@ class TestReferrerOnRedirectStrictOriginWhenCrossOrigin(TestReferrerOnRedirect):
|
|||||||
b'https://scrapy.org/',
|
b'https://scrapy.org/',
|
||||||
b'https://scrapy.org/',
|
b'https://scrapy.org/',
|
||||||
),
|
),
|
||||||
( 'https://scrapytest.org/601',
|
(
|
||||||
|
'https://scrapytest.org/601',
|
||||||
'http://scrapytest.org/602', # TLS to non-TLS: do not send "Referer"
|
'http://scrapytest.org/602', # TLS to non-TLS: do not send "Referer"
|
||||||
(
|
(
|
||||||
(301, 'https://scrapytest.org/603'), # TLS URL again: (still) send nothing
|
(301, 'https://scrapytest.org/603'), # TLS URL again: (still) send nothing
|
||||||
|
@ -250,10 +250,10 @@ class UtilsCsvTestCase(unittest.TestCase):
|
|||||||
|
|
||||||
result = [row for row in csv]
|
result = [row for row in csv]
|
||||||
self.assertEqual(result,
|
self.assertEqual(result,
|
||||||
[{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
|
[{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
|
||||||
{u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
|
{u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
|
||||||
{u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
|
{u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
|
||||||
{u'id': u'4', u'name': u'empty', u'value': u''}])
|
{u'id': u'4', u'name': u'empty', u'value': u''}])
|
||||||
|
|
||||||
# explicit type check cuz' we no like stinkin' autocasting! yarrr
|
# explicit type check cuz' we no like stinkin' autocasting! yarrr
|
||||||
for result_row in result:
|
for result_row in result:
|
||||||
@ -266,10 +266,10 @@ class UtilsCsvTestCase(unittest.TestCase):
|
|||||||
csv = csviter(response, delimiter='\t')
|
csv = csviter(response, delimiter='\t')
|
||||||
|
|
||||||
self.assertEqual([row for row in csv],
|
self.assertEqual([row for row in csv],
|
||||||
[{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
|
[{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
|
||||||
{u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
|
{u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
|
||||||
{u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
|
{u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
|
||||||
{u'id': u'4', u'name': u'empty', u'value': u''}])
|
{u'id': u'4', u'name': u'empty', u'value': u''}])
|
||||||
|
|
||||||
def test_csviter_quotechar(self):
|
def test_csviter_quotechar(self):
|
||||||
body1 = get_testdata('feeds', 'feed-sample6.csv')
|
body1 = get_testdata('feeds', 'feed-sample6.csv')
|
||||||
@ -279,19 +279,19 @@ class UtilsCsvTestCase(unittest.TestCase):
|
|||||||
csv1 = csviter(response1, quotechar="'")
|
csv1 = csviter(response1, quotechar="'")
|
||||||
|
|
||||||
self.assertEqual([row for row in csv1],
|
self.assertEqual([row for row in csv1],
|
||||||
[{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
|
[{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
|
||||||
{u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
|
{u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
|
||||||
{u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
|
{u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
|
||||||
{u'id': u'4', u'name': u'empty', u'value': u''}])
|
{u'id': u'4', u'name': u'empty', u'value': u''}])
|
||||||
|
|
||||||
response2 = TextResponse(url="http://example.com/", body=body2)
|
response2 = TextResponse(url="http://example.com/", body=body2)
|
||||||
csv2 = csviter(response2, delimiter="|", quotechar="'")
|
csv2 = csviter(response2, delimiter="|", quotechar="'")
|
||||||
|
|
||||||
self.assertEqual([row for row in csv2],
|
self.assertEqual([row for row in csv2],
|
||||||
[{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
|
[{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
|
||||||
{u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
|
{u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
|
||||||
{u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
|
{u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
|
||||||
{u'id': u'4', u'name': u'empty', u'value': u''}])
|
{u'id': u'4', u'name': u'empty', u'value': u''}])
|
||||||
|
|
||||||
def test_csviter_wrong_quotechar(self):
|
def test_csviter_wrong_quotechar(self):
|
||||||
body = get_testdata('feeds', 'feed-sample6.csv')
|
body = get_testdata('feeds', 'feed-sample6.csv')
|
||||||
@ -299,10 +299,10 @@ class UtilsCsvTestCase(unittest.TestCase):
|
|||||||
csv = csviter(response)
|
csv = csviter(response)
|
||||||
|
|
||||||
self.assertEqual([row for row in csv],
|
self.assertEqual([row for row in csv],
|
||||||
[{u"'id'": u"1", u"'name'": u"'alpha'", u"'value'": u"'foobar'"},
|
[{u"'id'": u"1", u"'name'": u"'alpha'", u"'value'": u"'foobar'"},
|
||||||
{u"'id'": u"2", u"'name'": u"'unicode'", u"'value'": u"'\xfan\xedc\xf3d\xe9\u203d'"},
|
{u"'id'": u"2", u"'name'": u"'unicode'", u"'value'": u"'\xfan\xedc\xf3d\xe9\u203d'"},
|
||||||
{u"'id'": u"'3'", u"'name'": u"'multi'", u"'value'": u"'foo"},
|
{u"'id'": u"'3'", u"'name'": u"'multi'", u"'value'": u"'foo"},
|
||||||
{u"'id'": u"4", u"'name'": u"'empty'", u"'value'": u""}])
|
{u"'id'": u"4", u"'name'": u"'empty'", u"'value'": u""}])
|
||||||
|
|
||||||
def test_csviter_delimiter_binary_response_assume_utf8_encoding(self):
|
def test_csviter_delimiter_binary_response_assume_utf8_encoding(self):
|
||||||
body = get_testdata('feeds', 'feed-sample3.csv').replace(b',', b'\t')
|
body = get_testdata('feeds', 'feed-sample3.csv').replace(b',', b'\t')
|
||||||
@ -310,10 +310,10 @@ class UtilsCsvTestCase(unittest.TestCase):
|
|||||||
csv = csviter(response, delimiter='\t')
|
csv = csviter(response, delimiter='\t')
|
||||||
|
|
||||||
self.assertEqual([row for row in csv],
|
self.assertEqual([row for row in csv],
|
||||||
[{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
|
[{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
|
||||||
{u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
|
{u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
|
||||||
{u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
|
{u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
|
||||||
{u'id': u'4', u'name': u'empty', u'value': u''}])
|
{u'id': u'4', u'name': u'empty', u'value': u''}])
|
||||||
|
|
||||||
def test_csviter_headers(self):
|
def test_csviter_headers(self):
|
||||||
sample = get_testdata('feeds', 'feed-sample3.csv').splitlines()
|
sample = get_testdata('feeds', 'feed-sample3.csv').splitlines()
|
||||||
@ -323,10 +323,10 @@ class UtilsCsvTestCase(unittest.TestCase):
|
|||||||
csv = csviter(response, headers=[h.decode('utf-8') for h in headers])
|
csv = csviter(response, headers=[h.decode('utf-8') for h in headers])
|
||||||
|
|
||||||
self.assertEqual([row for row in csv],
|
self.assertEqual([row for row in csv],
|
||||||
[{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
|
[{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
|
||||||
{u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
|
{u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
|
||||||
{u'id': u'3', u'name': u'multi', u'value': u'foo\nbar'},
|
{u'id': u'3', u'name': u'multi', u'value': u'foo\nbar'},
|
||||||
{u'id': u'4', u'name': u'empty', u'value': u''}])
|
{u'id': u'4', u'name': u'empty', u'value': u''}])
|
||||||
|
|
||||||
def test_csviter_falserow(self):
|
def test_csviter_falserow(self):
|
||||||
body = get_testdata('feeds', 'feed-sample3.csv')
|
body = get_testdata('feeds', 'feed-sample3.csv')
|
||||||
@ -336,10 +336,10 @@ class UtilsCsvTestCase(unittest.TestCase):
|
|||||||
csv = csviter(response)
|
csv = csviter(response)
|
||||||
|
|
||||||
self.assertEqual([row for row in csv],
|
self.assertEqual([row for row in csv],
|
||||||
[{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
|
[{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
|
||||||
{u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
|
{u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
|
||||||
{u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
|
{u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
|
||||||
{u'id': u'4', u'name': u'empty', u'value': u''}])
|
{u'id': u'4', u'name': u'empty', u'value': u''}])
|
||||||
|
|
||||||
def test_csviter_exception(self):
|
def test_csviter_exception(self):
|
||||||
body = get_testdata('feeds', 'feed-sample3.csv')
|
body = get_testdata('feeds', 'feed-sample3.csv')
|
||||||
|
@ -203,29 +203,29 @@ def create_skipped_scheme_t(args):
|
|||||||
|
|
||||||
|
|
||||||
for k, args in enumerate([
|
for k, args in enumerate([
|
||||||
('/index', 'file://'),
|
('/index', 'file://'),
|
||||||
('/index.html', 'file://'),
|
('/index.html', 'file://'),
|
||||||
('./index.html', 'file://'),
|
('./index.html', 'file://'),
|
||||||
('../index.html', 'file://'),
|
('../index.html', 'file://'),
|
||||||
('../../index.html', 'file://'),
|
('../../index.html', 'file://'),
|
||||||
('./data/index.html', 'file://'),
|
('./data/index.html', 'file://'),
|
||||||
('.hidden/data/index.html', 'file://'),
|
('.hidden/data/index.html', 'file://'),
|
||||||
('/home/user/www/index.html', 'file://'),
|
('/home/user/www/index.html', 'file://'),
|
||||||
('//home/user/www/index.html', 'file://'),
|
('//home/user/www/index.html', 'file://'),
|
||||||
('file:///home/user/www/index.html', 'file://'),
|
('file:///home/user/www/index.html', 'file://'),
|
||||||
|
|
||||||
('index.html', 'http://'),
|
('index.html', 'http://'),
|
||||||
('example.com', 'http://'),
|
('example.com', 'http://'),
|
||||||
('www.example.com', 'http://'),
|
('www.example.com', 'http://'),
|
||||||
('www.example.com/index.html', 'http://'),
|
('www.example.com/index.html', 'http://'),
|
||||||
('http://example.com', 'http://'),
|
('http://example.com', 'http://'),
|
||||||
('http://example.com/index.html', 'http://'),
|
('http://example.com/index.html', 'http://'),
|
||||||
('localhost', 'http://'),
|
('localhost', 'http://'),
|
||||||
('localhost/index.html', 'http://'),
|
('localhost/index.html', 'http://'),
|
||||||
|
|
||||||
# some corner cases (default to http://)
|
# some corner cases (default to http://)
|
||||||
('/', 'http://'),
|
('/', 'http://'),
|
||||||
('.../test', 'http://'),
|
('.../test', 'http://'),
|
||||||
|
|
||||||
], start=1):
|
], start=1):
|
||||||
t_method = create_guess_scheme_t(args)
|
t_method = create_guess_scheme_t(args)
|
||||||
|
@ -53,28 +53,28 @@ class ParseUrlTestCase(unittest.TestCase):
|
|||||||
def testParse(self):
|
def testParse(self):
|
||||||
lip = '127.0.0.1'
|
lip = '127.0.0.1'
|
||||||
tests = (
|
tests = (
|
||||||
("http://127.0.0.1?c=v&c2=v2#fragment", ('http', lip, lip, 80, '/?c=v&c2=v2')),
|
("http://127.0.0.1?c=v&c2=v2#fragment", ('http', lip, lip, 80, '/?c=v&c2=v2')),
|
||||||
("http://127.0.0.1/?c=v&c2=v2#fragment", ('http', lip, lip, 80, '/?c=v&c2=v2')),
|
("http://127.0.0.1/?c=v&c2=v2#fragment", ('http', lip, lip, 80, '/?c=v&c2=v2')),
|
||||||
("http://127.0.0.1/foo?c=v&c2=v2#frag", ('http', lip, lip, 80, '/foo?c=v&c2=v2')),
|
("http://127.0.0.1/foo?c=v&c2=v2#frag", ('http', lip, lip, 80, '/foo?c=v&c2=v2')),
|
||||||
("http://127.0.0.1:100?c=v&c2=v2#fragment", ('http', lip + ':100', lip, 100, '/?c=v&c2=v2')),
|
("http://127.0.0.1:100?c=v&c2=v2#fragment", ('http', lip + ':100', lip, 100, '/?c=v&c2=v2')),
|
||||||
("http://127.0.0.1:100/?c=v&c2=v2#frag", ('http', lip + ':100', lip, 100, '/?c=v&c2=v2')),
|
("http://127.0.0.1:100/?c=v&c2=v2#frag", ('http', lip + ':100', lip, 100, '/?c=v&c2=v2')),
|
||||||
("http://127.0.0.1:100/foo?c=v&c2=v2#frag", ('http', lip + ':100', lip, 100, '/foo?c=v&c2=v2')),
|
("http://127.0.0.1:100/foo?c=v&c2=v2#frag", ('http', lip + ':100', lip, 100, '/foo?c=v&c2=v2')),
|
||||||
|
|
||||||
("http://127.0.0.1", ('http', lip, lip, 80, '/')),
|
("http://127.0.0.1", ('http', lip, lip, 80, '/')),
|
||||||
("http://127.0.0.1/", ('http', lip, lip, 80, '/')),
|
("http://127.0.0.1/", ('http', lip, lip, 80, '/')),
|
||||||
("http://127.0.0.1/foo", ('http', lip, lip, 80, '/foo')),
|
("http://127.0.0.1/foo", ('http', lip, lip, 80, '/foo')),
|
||||||
("http://127.0.0.1?param=value", ('http', lip, lip, 80, '/?param=value')),
|
("http://127.0.0.1?param=value", ('http', lip, lip, 80, '/?param=value')),
|
||||||
("http://127.0.0.1/?param=value", ('http', lip, lip, 80, '/?param=value')),
|
("http://127.0.0.1/?param=value", ('http', lip, lip, 80, '/?param=value')),
|
||||||
("http://127.0.0.1:12345/foo", ('http', lip + ':12345', lip, 12345, '/foo')),
|
("http://127.0.0.1:12345/foo", ('http', lip + ':12345', lip, 12345, '/foo')),
|
||||||
("http://spam:12345/foo", ('http', 'spam:12345', 'spam', 12345, '/foo')),
|
("http://spam:12345/foo", ('http', 'spam:12345', 'spam', 12345, '/foo')),
|
||||||
("http://spam.test.org/foo", ('http', 'spam.test.org', 'spam.test.org', 80, '/foo')),
|
("http://spam.test.org/foo", ('http', 'spam.test.org', 'spam.test.org', 80, '/foo')),
|
||||||
|
|
||||||
("https://127.0.0.1/foo", ('https', lip, lip, 443, '/foo')),
|
("https://127.0.0.1/foo", ('https', lip, lip, 443, '/foo')),
|
||||||
("https://127.0.0.1/?param=value", ('https', lip, lip, 443, '/?param=value')),
|
("https://127.0.0.1/?param=value", ('https', lip, lip, 443, '/?param=value')),
|
||||||
("https://127.0.0.1:12345/", ('https', lip + ':12345', lip, 12345, '/')),
|
("https://127.0.0.1:12345/", ('https', lip + ':12345', lip, 12345, '/')),
|
||||||
|
|
||||||
("http://scrapytest.org/foo ", ('http', 'scrapytest.org', 'scrapytest.org', 80, '/foo')),
|
("http://scrapytest.org/foo ", ('http', 'scrapytest.org', 'scrapytest.org', 80, '/foo')),
|
||||||
("http://egg:7890 ", ('http', 'egg:7890', 'egg', 7890, '/')),
|
("http://egg:7890 ", ('http', 'egg:7890', 'egg', 7890, '/')),
|
||||||
)
|
)
|
||||||
|
|
||||||
for url, test in tests:
|
for url, test in tests:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user