1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-03-13 08:13:55 +00:00

Fix pycodestyle E2XX (whitespace) (#4468)

This commit is contained in:
Eugenio Lacuesta 2020-04-15 09:11:37 -03:00 committed by GitHub
parent 39b01b6892
commit 94d7ad76cb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 268 additions and 241 deletions

View File

@ -54,7 +54,7 @@ flake8-ignore =
scrapy/core/downloader/__init__.py E501
scrapy/core/downloader/contextfactory.py E501 E128 E126
scrapy/core/downloader/middleware.py E501
scrapy/core/downloader/tls.py E501 E241
scrapy/core/downloader/tls.py E501
scrapy/core/downloader/webclient.py E731 E501 E128 E126
scrapy/core/downloader/handlers/__init__.py E501
scrapy/core/downloader/handlers/ftp.py E501 E128 E127
@ -97,9 +97,9 @@ flake8-ignore =
scrapy/loader/processors.py E501
# scrapy/pipelines
scrapy/pipelines/__init__.py E501
scrapy/pipelines/files.py E116 E501 E266
scrapy/pipelines/images.py E265 E501
scrapy/pipelines/media.py E125 E501 E266
scrapy/pipelines/files.py E116 E501
scrapy/pipelines/images.py E501
scrapy/pipelines/media.py E125 E501
# scrapy/selector
scrapy/selector/__init__.py F403
scrapy/selector/unified.py E501 E111
@ -149,7 +149,7 @@ flake8-ignore =
scrapy/__init__.py E402 E501
scrapy/cmdline.py E501
scrapy/crawler.py E501
scrapy/dupefilters.py E501 E202
scrapy/dupefilters.py E501
scrapy/exceptions.py E501
scrapy/exporters.py E501
scrapy/interfaces.py E501
@ -178,13 +178,13 @@ flake8-ignore =
tests/test_command_shell.py E501 E128
tests/test_commands.py E128 E501
tests/test_contracts.py E501 E128
tests/test_crawl.py E501 E741 E265
tests/test_crawl.py E501 E741
tests/test_crawler.py F841 E501
tests/test_dependencies.py F841 E501
tests/test_downloader_handlers.py E124 E127 E128 E265 E501 E126 E123
tests/test_downloader_handlers.py E124 E127 E128 E501 E126 E123
tests/test_downloadermiddleware.py E501
tests/test_downloadermiddleware_ajaxcrawlable.py E501
tests/test_downloadermiddleware_cookies.py E731 E741 E501 E128 E265 E126
tests/test_downloadermiddleware_cookies.py E731 E741 E501 E128 E126
tests/test_downloadermiddleware_decompression.py E127
tests/test_downloadermiddleware_defaultheaders.py E501
tests/test_downloadermiddleware_downloadtimeout.py E501
@ -199,15 +199,15 @@ flake8-ignore =
tests/test_engine.py E401 E501 E128
tests/test_exporters.py E501 E731 E128 E124
tests/test_extension_telnet.py F841
tests/test_feedexport.py E501 F841 E241
tests/test_feedexport.py E501 F841
tests/test_http_cookies.py E501
tests/test_http_headers.py E501
tests/test_http_request.py E402 E501 E127 E128 E128 E126 E123
tests/test_http_response.py E501 E128 E265
tests/test_http_response.py E501 E128
tests/test_item.py E128 F841
tests/test_link.py E501
tests/test_linkextractors.py E501 E128 E124
tests/test_loader.py E501 E731 E741 E128 E117 E241
tests/test_loader.py E501 E731 E741 E128 E117
tests/test_logformatter.py E128 E501 E122
tests/test_mail.py E128 E501
tests/test_middleware.py E501 E128
@ -226,7 +226,7 @@ flake8-ignore =
tests/test_spidermiddleware_httperror.py E128 E501 E127 E121
tests/test_spidermiddleware_offsite.py E501 E128 E111
tests/test_spidermiddleware_output_chain.py E501
tests/test_spidermiddleware_referer.py E501 F841 E125 E201 E124 E501 E241 E121
tests/test_spidermiddleware_referer.py E501 F841 E125 E124 E501 E121
tests/test_squeues.py E501 E741
tests/test_utils_asyncio.py E501
tests/test_utils_conf.py E501 E128
@ -235,7 +235,7 @@ flake8-ignore =
tests/test_utils_defer.py E501 F841
tests/test_utils_deprecate.py F841 E501
tests/test_utils_http.py E501 E128 W504
tests/test_utils_iterators.py E501 E128 E129 E241
tests/test_utils_iterators.py E501 E128 E129
tests/test_utils_log.py E741
tests/test_utils_python.py E501 E731
tests/test_utils_reqser.py E501 E128
@ -243,8 +243,8 @@ flake8-ignore =
tests/test_utils_response.py E501
tests/test_utils_signal.py E741 F841 E731
tests/test_utils_sitemap.py E128 E501 E124
tests/test_utils_url.py E501 E127 E125 E501 E241 E126 E123
tests/test_webclient.py E501 E128 E122 E402 E241 E123 E126
tests/test_utils_url.py E501 E127 E125 E501 E126 E123
tests/test_webclient.py E501 E128 E122 E402 E123 E126
tests/test_cmdline/__init__.py E501
tests/test_settings/__init__.py E501 E128
tests/test_spiderloader/__init__.py E128 E501

View File

@ -20,8 +20,8 @@ METHOD_TLSv12 = 'TLSv1.2'
openssl_methods = {
METHOD_TLS: SSL.SSLv23_METHOD, # protocol negotiation (recommended)
METHOD_SSLv3: SSL.SSLv3_METHOD, # SSL 3 (NOT recommended)
METHOD_TLS: SSL.SSLv23_METHOD, # protocol negotiation (recommended)
METHOD_SSLv3: SSL.SSLv3_METHOD, # SSL 3 (NOT recommended)
METHOD_TLSv10: SSL.TLSv1_METHOD, # TLS 1.0 only
METHOD_TLSv11: getattr(SSL, 'TLSv1_1_METHOD', 5), # TLS 1.1 only
METHOD_TLSv12: getattr(SSL, 'TLSv1_2_METHOD', 6), # TLS 1.2 only

View File

@ -61,7 +61,7 @@ class RFPDupeFilter(BaseDupeFilter):
def log(self, request, spider):
if self.debug:
msg = "Filtered duplicate request: %(request)s (referer: %(referer)s)"
args = {'request': request, 'referer': referer_str(request) }
args = {'request': request, 'referer': referer_str(request)}
self.logger.debug(msg, args, extra={'spider': spider})
elif self.logdupes:
msg = ("Filtered duplicate request: %(request)s"

View File

@ -500,7 +500,7 @@ class FilesPipeline(MediaPipeline):
spider.crawler.stats.inc_value('file_count', spider=spider)
spider.crawler.stats.inc_value('file_status_count/%s' % status, spider=spider)
### Overridable Interface
# Overridable Interface
def get_media_requests(self, item, info):
return [Request(x) for x in item.get(self.files_urls_field, [])]

View File

@ -14,7 +14,7 @@ from scrapy.utils.python import to_bytes
from scrapy.http import Request
from scrapy.settings import Settings
from scrapy.exceptions import DropItem
#TODO: from scrapy.pipelines.media import MediaPipeline
# TODO: from scrapy.pipelines.media import MediaPipeline
from scrapy.pipelines.files import FileException, FilesPipeline

View File

@ -166,7 +166,7 @@ class MediaPipeline:
for wad in info.waiting.pop(fp):
defer_result(result).chainDeferred(wad)
### Overridable Interface
# Overridable Interface
def media_to_download(self, request, info):
"""Check request before starting download"""
pass

View File

@ -147,9 +147,9 @@ class CrawlTestCase(TestCase):
settings = {"CONCURRENT_REQUESTS": 1}
crawler = CrawlerRunner(settings).create_crawler(BrokenStartRequestsSpider)
yield crawler.crawl(mockserver=self.mockserver)
#self.assertTrue(False, crawler.spider.seedsseen)
#self.assertTrue(crawler.spider.seedsseen.index(None) < crawler.spider.seedsseen.index(99),
# crawler.spider.seedsseen)
self.assertTrue(
crawler.spider.seedsseen.index(None) < crawler.spider.seedsseen.index(99),
crawler.spider.seedsseen)
@defer.inlineCallbacks
def test_start_requests_dupes(self):

View File

@ -202,7 +202,7 @@ class CookiesMiddlewareTest(TestCase):
assert self.mw.process_request(req4, self.spider) is None
self.assertCookieValEqual(req4.headers.get('Cookie'), b'C2=value2; galleta=dulce')
#cookies from hosts with port
# cookies from hosts with port
req5_1 = Request('http://scrapytest.org:1104/')
assert self.mw.process_request(req5_1, self.spider) is None
@ -218,7 +218,7 @@ class CookiesMiddlewareTest(TestCase):
assert self.mw.process_request(req5_3, self.spider) is None
self.assertEqual(req5_3.headers.get('Cookie'), b'C1=value1')
#skip cookie retrieval for not http request
# skip cookie retrieval for not http request
req6 = Request('file:///scrapy/sometempfile')
assert self.mw.process_request(req6, self.spider) is None
self.assertEqual(req6.headers.get('Cookie'), None)

View File

@ -438,8 +438,8 @@ class TextResponseTest(BaseResponseTest):
assert u'<span>value</span>' in r.text, repr(r.text)
# FIXME: This test should pass once we stop using BeautifulSoup's UnicodeDammit in TextResponse
#r = self.response_class("http://www.example.com", body=b'PREFIX\xe3\xabSUFFIX')
#assert u'\ufffd' in r.text, repr(r.text)
# r = self.response_class("http://www.example.com", body=b'PREFIX\xe3\xabSUFFIX')
# assert u'\ufffd' in r.text, repr(r.text)
def test_selector(self):
body = b"<html><head><title>Some page</title><body></body></html>"

View File

@ -24,7 +24,7 @@ class TestRefererMiddleware(TestCase):
resp_headers = {}
settings = {}
scenarii = [
('http://scrapytest.org', 'http://scrapytest.org/', b'http://scrapytest.org'),
('http://scrapytest.org', 'http://scrapytest.org/', b'http://scrapytest.org'),
]
def setUp(self):
@ -54,57 +54,57 @@ class MixinDefault:
with some additional filtering of s3://
"""
scenarii = [
('https://example.com/', 'https://scrapy.org/', b'https://example.com/'),
('http://example.com/', 'http://scrapy.org/', b'http://example.com/'),
('http://example.com/', 'https://scrapy.org/', b'http://example.com/'),
('https://example.com/', 'http://scrapy.org/', None),
('https://example.com/', 'https://scrapy.org/', b'https://example.com/'),
('http://example.com/', 'http://scrapy.org/', b'http://example.com/'),
('http://example.com/', 'https://scrapy.org/', b'http://example.com/'),
('https://example.com/', 'http://scrapy.org/', None),
# no credentials leak
('http://user:password@example.com/', 'https://scrapy.org/', b'http://example.com/'),
('http://user:password@example.com/', 'https://scrapy.org/', b'http://example.com/'),
# no referrer leak for local schemes
('file:///home/path/to/somefile.html', 'https://scrapy.org/', None),
('file:///home/path/to/somefile.html', 'http://scrapy.org/', None),
('file:///home/path/to/somefile.html', 'https://scrapy.org/', None),
('file:///home/path/to/somefile.html', 'http://scrapy.org/', None),
# no referrer leak for s3 origins
('s3://mybucket/path/to/data.csv', 'https://scrapy.org/', None),
('s3://mybucket/path/to/data.csv', 'http://scrapy.org/', None),
('s3://mybucket/path/to/data.csv', 'https://scrapy.org/', None),
('s3://mybucket/path/to/data.csv', 'http://scrapy.org/', None),
]
class MixinNoReferrer:
scenarii = [
('https://example.com/page.html', 'https://example.com/', None),
('http://www.example.com/', 'https://scrapy.org/', None),
('http://www.example.com/', 'http://scrapy.org/', None),
('https://www.example.com/', 'http://scrapy.org/', None),
('file:///home/path/to/somefile.html', 'http://scrapy.org/', None),
('https://example.com/page.html', 'https://example.com/', None),
('http://www.example.com/', 'https://scrapy.org/', None),
('http://www.example.com/', 'http://scrapy.org/', None),
('https://www.example.com/', 'http://scrapy.org/', None),
('file:///home/path/to/somefile.html', 'http://scrapy.org/', None),
]
class MixinNoReferrerWhenDowngrade:
scenarii = [
# TLS to TLS: send non-empty referrer
('https://example.com/page.html', 'https://not.example.com/', b'https://example.com/page.html'),
('https://example.com/page.html', 'https://scrapy.org/', b'https://example.com/page.html'),
('https://example.com:443/page.html', 'https://scrapy.org/', b'https://example.com/page.html'),
('https://example.com:444/page.html', 'https://scrapy.org/', b'https://example.com:444/page.html'),
('ftps://example.com/urls.zip', 'https://scrapy.org/', b'ftps://example.com/urls.zip'),
('https://example.com/page.html', 'https://not.example.com/', b'https://example.com/page.html'),
('https://example.com/page.html', 'https://scrapy.org/', b'https://example.com/page.html'),
('https://example.com:443/page.html', 'https://scrapy.org/', b'https://example.com/page.html'),
('https://example.com:444/page.html', 'https://scrapy.org/', b'https://example.com:444/page.html'),
('ftps://example.com/urls.zip', 'https://scrapy.org/', b'ftps://example.com/urls.zip'),
# TLS to non-TLS: do not send referrer
('https://example.com/page.html', 'http://not.example.com/', None),
('https://example.com/page.html', 'http://scrapy.org/', None),
('ftps://example.com/urls.zip', 'http://scrapy.org/', None),
('https://example.com/page.html', 'http://not.example.com/', None),
('https://example.com/page.html', 'http://scrapy.org/', None),
('ftps://example.com/urls.zip', 'http://scrapy.org/', None),
# non-TLS to TLS or non-TLS: send referrer
('http://example.com/page.html', 'https://not.example.com/', b'http://example.com/page.html'),
('http://example.com/page.html', 'https://scrapy.org/', b'http://example.com/page.html'),
('http://example.com:8080/page.html', 'https://scrapy.org/', b'http://example.com:8080/page.html'),
('http://example.com:80/page.html', 'http://not.example.com/', b'http://example.com/page.html'),
('http://example.com/page.html', 'http://scrapy.org/', b'http://example.com/page.html'),
('http://example.com:443/page.html', 'http://scrapy.org/', b'http://example.com:443/page.html'),
('ftp://example.com/urls.zip', 'http://scrapy.org/', b'ftp://example.com/urls.zip'),
('ftp://example.com/urls.zip', 'https://scrapy.org/', b'ftp://example.com/urls.zip'),
('http://example.com/page.html', 'https://not.example.com/', b'http://example.com/page.html'),
('http://example.com/page.html', 'https://scrapy.org/', b'http://example.com/page.html'),
('http://example.com:8080/page.html', 'https://scrapy.org/', b'http://example.com:8080/page.html'),
('http://example.com:80/page.html', 'http://not.example.com/', b'http://example.com/page.html'),
('http://example.com/page.html', 'http://scrapy.org/', b'http://example.com/page.html'),
('http://example.com:443/page.html', 'http://scrapy.org/', b'http://example.com:443/page.html'),
('ftp://example.com/urls.zip', 'http://scrapy.org/', b'ftp://example.com/urls.zip'),
('ftp://example.com/urls.zip', 'https://scrapy.org/', b'ftp://example.com/urls.zip'),
# test for user/password stripping
('http://user:password@example.com/page.html', 'https://not.example.com/', b'http://example.com/page.html'),
@ -114,43 +114,43 @@ class MixinNoReferrerWhenDowngrade:
class MixinSameOrigin:
scenarii = [
# Same origin (protocol, host, port): send referrer
('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
('http://example.com/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
('https://example.com:443/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
('http://example.com:80/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
('http://example.com/page.html', 'http://example.com:80/not-page.html', b'http://example.com/page.html'),
('http://example.com:8888/page.html', 'http://example.com:8888/not-page.html', b'http://example.com:8888/page.html'),
('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
('http://example.com/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
('https://example.com:443/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
('http://example.com:80/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
('http://example.com/page.html', 'http://example.com:80/not-page.html', b'http://example.com/page.html'),
('http://example.com:8888/page.html', 'http://example.com:8888/not-page.html', b'http://example.com:8888/page.html'),
# Different host: do NOT send referrer
('https://example.com/page.html', 'https://not.example.com/otherpage.html', None),
('http://example.com/page.html', 'http://not.example.com/otherpage.html', None),
('http://example.com/page.html', 'http://www.example.com/otherpage.html', None),
('https://example.com/page.html', 'https://not.example.com/otherpage.html', None),
('http://example.com/page.html', 'http://not.example.com/otherpage.html', None),
('http://example.com/page.html', 'http://www.example.com/otherpage.html', None),
# Different port: do NOT send referrer
('https://example.com:444/page.html', 'https://example.com/not-page.html', None),
('http://example.com:81/page.html', 'http://example.com/not-page.html', None),
('http://example.com/page.html', 'http://example.com:81/not-page.html', None),
('https://example.com:444/page.html', 'https://example.com/not-page.html', None),
('http://example.com:81/page.html', 'http://example.com/not-page.html', None),
('http://example.com/page.html', 'http://example.com:81/not-page.html', None),
# Different protocols: do NOT send refferer
('https://example.com/page.html', 'http://example.com/not-page.html', None),
('https://example.com/page.html', 'http://not.example.com/', None),
('ftps://example.com/urls.zip', 'https://example.com/not-page.html', None),
('ftp://example.com/urls.zip', 'http://example.com/not-page.html', None),
('ftps://example.com/urls.zip', 'https://example.com/not-page.html', None),
('https://example.com/page.html', 'http://example.com/not-page.html', None),
('https://example.com/page.html', 'http://not.example.com/', None),
('ftps://example.com/urls.zip', 'https://example.com/not-page.html', None),
('ftp://example.com/urls.zip', 'http://example.com/not-page.html', None),
('ftps://example.com/urls.zip', 'https://example.com/not-page.html', None),
# test for user/password stripping
('https://user:password@example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
('https://user:password@example.com/page.html', 'http://example.com/not-page.html', None),
('https://user:password@example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
('https://user:password@example.com/page.html', 'http://example.com/not-page.html', None),
]
class MixinOrigin:
scenarii = [
# TLS or non-TLS to TLS or non-TLS: referrer origin is sent (yes, even for downgrades)
('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/'),
('https://example.com/page.html', 'https://scrapy.org', b'https://example.com/'),
('https://example.com/page.html', 'http://scrapy.org', b'https://example.com/'),
('http://example.com/page.html', 'http://scrapy.org', b'http://example.com/'),
('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/'),
('https://example.com/page.html', 'https://scrapy.org', b'https://example.com/'),
('https://example.com/page.html', 'http://scrapy.org', b'https://example.com/'),
('http://example.com/page.html', 'http://scrapy.org', b'http://example.com/'),
# test for user/password stripping
('https://user:password@example.com/page.html', 'http://scrapy.org', b'https://example.com/'),
@ -160,129 +160,129 @@ class MixinOrigin:
class MixinStrictOrigin:
scenarii = [
# TLS or non-TLS to TLS or non-TLS: referrer origin is sent but not for downgrades
('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/'),
('https://example.com/page.html', 'https://scrapy.org', b'https://example.com/'),
('http://example.com/page.html', 'http://scrapy.org', b'http://example.com/'),
('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/'),
('https://example.com/page.html', 'https://scrapy.org', b'https://example.com/'),
('http://example.com/page.html', 'http://scrapy.org', b'http://example.com/'),
# downgrade: send nothing
('https://example.com/page.html', 'http://scrapy.org', None),
('https://example.com/page.html', 'http://scrapy.org', None),
# upgrade: send origin
('http://example.com/page.html', 'https://scrapy.org', b'http://example.com/'),
('http://example.com/page.html', 'https://scrapy.org', b'http://example.com/'),
# test for user/password stripping
('https://user:password@example.com/page.html', 'https://scrapy.org', b'https://example.com/'),
('https://user:password@example.com/page.html', 'http://scrapy.org', None),
('https://user:password@example.com/page.html', 'https://scrapy.org', b'https://example.com/'),
('https://user:password@example.com/page.html', 'http://scrapy.org', None),
]
class MixinOriginWhenCrossOrigin:
scenarii = [
# Same origin (protocol, host, port): send referrer
('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
('http://example.com/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
('https://example.com:443/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
('http://example.com:80/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
('http://example.com/page.html', 'http://example.com:80/not-page.html', b'http://example.com/page.html'),
('http://example.com:8888/page.html', 'http://example.com:8888/not-page.html', b'http://example.com:8888/page.html'),
('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
('http://example.com/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
('https://example.com:443/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
('http://example.com:80/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
('http://example.com/page.html', 'http://example.com:80/not-page.html', b'http://example.com/page.html'),
('http://example.com:8888/page.html', 'http://example.com:8888/not-page.html', b'http://example.com:8888/page.html'),
# Different host: send origin as referrer
('https://example2.com/page.html', 'https://scrapy.org/otherpage.html', b'https://example2.com/'),
('https://example2.com/page.html', 'https://not.example2.com/otherpage.html', b'https://example2.com/'),
('http://example2.com/page.html', 'http://not.example2.com/otherpage.html', b'http://example2.com/'),
('https://example2.com/page.html', 'https://scrapy.org/otherpage.html', b'https://example2.com/'),
('https://example2.com/page.html', 'https://not.example2.com/otherpage.html', b'https://example2.com/'),
('http://example2.com/page.html', 'http://not.example2.com/otherpage.html', b'http://example2.com/'),
# exact match required
('http://example2.com/page.html', 'http://www.example2.com/otherpage.html', b'http://example2.com/'),
('http://example2.com/page.html', 'http://www.example2.com/otherpage.html', b'http://example2.com/'),
# Different port: send origin as referrer
('https://example3.com:444/page.html', 'https://example3.com/not-page.html', b'https://example3.com:444/'),
('http://example3.com:81/page.html', 'http://example3.com/not-page.html', b'http://example3.com:81/'),
('https://example3.com:444/page.html', 'https://example3.com/not-page.html', b'https://example3.com:444/'),
('http://example3.com:81/page.html', 'http://example3.com/not-page.html', b'http://example3.com:81/'),
# Different protocols: send origin as referrer
('https://example4.com/page.html', 'http://example4.com/not-page.html', b'https://example4.com/'),
('https://example4.com/page.html', 'http://not.example4.com/', b'https://example4.com/'),
('ftps://example4.com/urls.zip', 'https://example4.com/not-page.html', b'ftps://example4.com/'),
('ftp://example4.com/urls.zip', 'http://example4.com/not-page.html', b'ftp://example4.com/'),
('ftps://example4.com/urls.zip', 'https://example4.com/not-page.html', b'ftps://example4.com/'),
('https://example4.com/page.html', 'http://example4.com/not-page.html', b'https://example4.com/'),
('https://example4.com/page.html', 'http://not.example4.com/', b'https://example4.com/'),
('ftps://example4.com/urls.zip', 'https://example4.com/not-page.html', b'ftps://example4.com/'),
('ftp://example4.com/urls.zip', 'http://example4.com/not-page.html', b'ftp://example4.com/'),
('ftps://example4.com/urls.zip', 'https://example4.com/not-page.html', b'ftps://example4.com/'),
# test for user/password stripping
('https://user:password@example5.com/page.html', 'https://example5.com/not-page.html', b'https://example5.com/page.html'),
('https://user:password@example5.com/page.html', 'https://example5.com/not-page.html', b'https://example5.com/page.html'),
# TLS to non-TLS downgrade: send origin
('https://user:password@example5.com/page.html', 'http://example5.com/not-page.html', b'https://example5.com/'),
('https://user:password@example5.com/page.html', 'http://example5.com/not-page.html', b'https://example5.com/'),
]
class MixinStrictOriginWhenCrossOrigin:
scenarii = [
# Same origin (protocol, host, port): send referrer
('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
('http://example.com/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
('https://example.com:443/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
('http://example.com:80/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
('http://example.com/page.html', 'http://example.com:80/not-page.html', b'http://example.com/page.html'),
('http://example.com:8888/page.html', 'http://example.com:8888/not-page.html', b'http://example.com:8888/page.html'),
('https://example.com/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
('http://example.com/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
('https://example.com:443/page.html', 'https://example.com/not-page.html', b'https://example.com/page.html'),
('http://example.com:80/page.html', 'http://example.com/not-page.html', b'http://example.com/page.html'),
('http://example.com/page.html', 'http://example.com:80/not-page.html', b'http://example.com/page.html'),
('http://example.com:8888/page.html', 'http://example.com:8888/not-page.html', b'http://example.com:8888/page.html'),
# Different host: send origin as referrer
('https://example2.com/page.html', 'https://scrapy.org/otherpage.html', b'https://example2.com/'),
('https://example2.com/page.html', 'https://not.example2.com/otherpage.html', b'https://example2.com/'),
('http://example2.com/page.html', 'http://not.example2.com/otherpage.html', b'http://example2.com/'),
('https://example2.com/page.html', 'https://scrapy.org/otherpage.html', b'https://example2.com/'),
('https://example2.com/page.html', 'https://not.example2.com/otherpage.html', b'https://example2.com/'),
('http://example2.com/page.html', 'http://not.example2.com/otherpage.html', b'http://example2.com/'),
# exact match required
('http://example2.com/page.html', 'http://www.example2.com/otherpage.html', b'http://example2.com/'),
('http://example2.com/page.html', 'http://www.example2.com/otherpage.html', b'http://example2.com/'),
# Different port: send origin as referrer
('https://example3.com:444/page.html', 'https://example3.com/not-page.html', b'https://example3.com:444/'),
('http://example3.com:81/page.html', 'http://example3.com/not-page.html', b'http://example3.com:81/'),
('https://example3.com:444/page.html', 'https://example3.com/not-page.html', b'https://example3.com:444/'),
('http://example3.com:81/page.html', 'http://example3.com/not-page.html', b'http://example3.com:81/'),
# downgrade
('https://example4.com/page.html', 'http://example4.com/not-page.html', None),
('https://example4.com/page.html', 'http://not.example4.com/', None),
('https://example4.com/page.html', 'http://example4.com/not-page.html', None),
('https://example4.com/page.html', 'http://not.example4.com/', None),
# non-TLS to non-TLS
('ftp://example4.com/urls.zip', 'http://example4.com/not-page.html', b'ftp://example4.com/'),
('ftp://example4.com/urls.zip', 'http://example4.com/not-page.html', b'ftp://example4.com/'),
# upgrade
('http://example4.com/page.html', 'https://example4.com/not-page.html', b'http://example4.com/'),
('http://example4.com/page.html', 'https://not.example4.com/', b'http://example4.com/'),
('http://example4.com/page.html', 'https://example4.com/not-page.html', b'http://example4.com/'),
('http://example4.com/page.html', 'https://not.example4.com/', b'http://example4.com/'),
# Different protocols: send origin as referrer
('ftps://example4.com/urls.zip', 'https://example4.com/not-page.html', b'ftps://example4.com/'),
('ftps://example4.com/urls.zip', 'https://example4.com/not-page.html', b'ftps://example4.com/'),
('ftps://example4.com/urls.zip', 'https://example4.com/not-page.html', b'ftps://example4.com/'),
('ftps://example4.com/urls.zip', 'https://example4.com/not-page.html', b'ftps://example4.com/'),
# test for user/password stripping
('https://user:password@example5.com/page.html', 'https://example5.com/not-page.html', b'https://example5.com/page.html'),
('https://user:password@example5.com/page.html', 'https://example5.com/not-page.html', b'https://example5.com/page.html'),
# TLS to non-TLS downgrade: send nothing
('https://user:password@example5.com/page.html', 'http://example5.com/not-page.html', None),
('https://user:password@example5.com/page.html', 'http://example5.com/not-page.html', None),
]
class MixinUnsafeUrl:
scenarii = [
# TLS to TLS: send referrer
('https://example.com/sekrit.html', 'http://not.example.com/', b'https://example.com/sekrit.html'),
('https://example1.com/page.html', 'https://not.example1.com/', b'https://example1.com/page.html'),
('https://example1.com/page.html', 'https://scrapy.org/', b'https://example1.com/page.html'),
('https://example1.com:443/page.html', 'https://scrapy.org/', b'https://example1.com/page.html'),
('https://example1.com:444/page.html', 'https://scrapy.org/', b'https://example1.com:444/page.html'),
('ftps://example1.com/urls.zip', 'https://scrapy.org/', b'ftps://example1.com/urls.zip'),
('https://example.com/sekrit.html', 'http://not.example.com/', b'https://example.com/sekrit.html'),
('https://example1.com/page.html', 'https://not.example1.com/', b'https://example1.com/page.html'),
('https://example1.com/page.html', 'https://scrapy.org/', b'https://example1.com/page.html'),
('https://example1.com:443/page.html', 'https://scrapy.org/', b'https://example1.com/page.html'),
('https://example1.com:444/page.html', 'https://scrapy.org/', b'https://example1.com:444/page.html'),
('ftps://example1.com/urls.zip', 'https://scrapy.org/', b'ftps://example1.com/urls.zip'),
# TLS to non-TLS: send referrer (yes, it's unsafe)
('https://example2.com/page.html', 'http://not.example2.com/', b'https://example2.com/page.html'),
('https://example2.com/page.html', 'http://scrapy.org/', b'https://example2.com/page.html'),
('ftps://example2.com/urls.zip', 'http://scrapy.org/', b'ftps://example2.com/urls.zip'),
('https://example2.com/page.html', 'http://not.example2.com/', b'https://example2.com/page.html'),
('https://example2.com/page.html', 'http://scrapy.org/', b'https://example2.com/page.html'),
('ftps://example2.com/urls.zip', 'http://scrapy.org/', b'ftps://example2.com/urls.zip'),
# non-TLS to TLS or non-TLS: send referrer (yes, it's unsafe)
('http://example3.com/page.html', 'https://not.example3.com/', b'http://example3.com/page.html'),
('http://example3.com/page.html', 'https://scrapy.org/', b'http://example3.com/page.html'),
('http://example3.com:8080/page.html', 'https://scrapy.org/', b'http://example3.com:8080/page.html'),
('http://example3.com:80/page.html', 'http://not.example3.com/', b'http://example3.com/page.html'),
('http://example3.com/page.html', 'http://scrapy.org/', b'http://example3.com/page.html'),
('http://example3.com:443/page.html', 'http://scrapy.org/', b'http://example3.com:443/page.html'),
('ftp://example3.com/urls.zip', 'http://scrapy.org/', b'ftp://example3.com/urls.zip'),
('ftp://example3.com/urls.zip', 'https://scrapy.org/', b'ftp://example3.com/urls.zip'),
('http://example3.com/page.html', 'https://not.example3.com/', b'http://example3.com/page.html'),
('http://example3.com/page.html', 'https://scrapy.org/', b'http://example3.com/page.html'),
('http://example3.com:8080/page.html', 'https://scrapy.org/', b'http://example3.com:8080/page.html'),
('http://example3.com:80/page.html', 'http://not.example3.com/', b'http://example3.com/page.html'),
('http://example3.com/page.html', 'http://scrapy.org/', b'http://example3.com/page.html'),
('http://example3.com:443/page.html', 'http://scrapy.org/', b'http://example3.com:443/page.html'),
('ftp://example3.com/urls.zip', 'http://scrapy.org/', b'ftp://example3.com/urls.zip'),
('ftp://example3.com/urls.zip', 'https://scrapy.org/', b'ftp://example3.com/urls.zip'),
# test for user/password stripping
('http://user:password@example4.com/page.html', 'https://not.example4.com/', b'http://example4.com/page.html'),
('https://user:password@example4.com/page.html', 'http://scrapy.org/', b'https://example4.com/page.html'),
('http://user:password@example4.com/page.html', 'https://not.example4.com/', b'http://example4.com/page.html'),
('https://user:password@example4.com/page.html', 'http://scrapy.org/', b'https://example4.com/page.html'),
]
@ -339,12 +339,12 @@ class CustomPythonOrgPolicy(ReferrerPolicy):
class TestSettingsCustomPolicy(TestRefererMiddleware):
settings = {'REFERRER_POLICY': 'tests.test_spidermiddleware_referer.CustomPythonOrgPolicy'}
scenarii = [
('https://example.com/', 'https://scrapy.org/', b'https://python.org/'),
('http://example.com/', 'http://scrapy.org/', b'http://python.org/'),
('http://example.com/', 'https://scrapy.org/', b'https://python.org/'),
('https://example.com/', 'http://scrapy.org/', b'http://python.org/'),
('file:///home/path/to/somefile.html', 'https://scrapy.org/', b'https://python.org/'),
('file:///home/path/to/somefile.html', 'http://scrapy.org/', b'http://python.org/'),
('https://example.com/', 'https://scrapy.org/', b'https://python.org/'),
('http://example.com/', 'http://scrapy.org/', b'http://python.org/'),
('http://example.com/', 'https://scrapy.org/', b'https://python.org/'),
('https://example.com/', 'http://scrapy.org/', b'http://python.org/'),
('file:///home/path/to/somefile.html', 'https://scrapy.org/', b'https://python.org/'),
('file:///home/path/to/somefile.html', 'http://scrapy.org/', b'http://python.org/'),
]
@ -541,7 +541,8 @@ class TestReferrerOnRedirect(TestRefererMiddleware):
settings = {'REFERRER_POLICY': 'scrapy.spidermiddlewares.referer.UnsafeUrlPolicy'}
scenarii = [
( 'http://scrapytest.org/1', # parent
(
'http://scrapytest.org/1', # parent
'http://scrapytest.org/2', # target
(
# redirections: code, URL
@ -551,7 +552,8 @@ class TestReferrerOnRedirect(TestRefererMiddleware):
b'http://scrapytest.org/1', # expected initial referer
b'http://scrapytest.org/1', # expected referer for the redirection request
),
( 'https://scrapytest.org/1',
(
'https://scrapytest.org/1',
'https://scrapytest.org/2',
(
# redirecting to non-secure URL
@ -560,7 +562,8 @@ class TestReferrerOnRedirect(TestRefererMiddleware):
b'https://scrapytest.org/1',
b'https://scrapytest.org/1',
),
( 'https://scrapytest.org/1',
(
'https://scrapytest.org/1',
'https://scrapytest.com/2',
(
# redirecting to non-secure URL: different origin
@ -602,7 +605,8 @@ class TestReferrerOnRedirectNoReferrer(TestReferrerOnRedirect):
"""
settings = {'REFERRER_POLICY': 'no-referrer'}
scenarii = [
( 'http://scrapytest.org/1', # parent
(
'http://scrapytest.org/1', # parent
'http://scrapytest.org/2', # target
(
# redirections: code, URL
@ -612,7 +616,8 @@ class TestReferrerOnRedirectNoReferrer(TestReferrerOnRedirect):
None, # expected initial "Referer"
None, # expected "Referer" for the redirection request
),
( 'https://scrapytest.org/1',
(
'https://scrapytest.org/1',
'https://scrapytest.org/2',
(
(301, 'http://scrapytest.org/3'),
@ -620,7 +625,8 @@ class TestReferrerOnRedirectNoReferrer(TestReferrerOnRedirect):
None,
None,
),
( 'https://scrapytest.org/1',
(
'https://scrapytest.org/1',
'https://example.com/2', # different origin
(
(301, 'http://scrapytest.com/3'),
@ -641,7 +647,8 @@ class TestReferrerOnRedirectSameOrigin(TestReferrerOnRedirect):
"""
settings = {'REFERRER_POLICY': 'same-origin'}
scenarii = [
( 'http://scrapytest.org/101', # origin
(
'http://scrapytest.org/101', # origin
'http://scrapytest.org/102', # target
(
# redirections: code, URL
@ -651,7 +658,8 @@ class TestReferrerOnRedirectSameOrigin(TestReferrerOnRedirect):
b'http://scrapytest.org/101', # expected initial "Referer"
b'http://scrapytest.org/101', # expected referer for the redirection request
),
( 'https://scrapytest.org/201',
(
'https://scrapytest.org/201',
'https://scrapytest.org/202',
(
# redirecting from secure to non-secure URL == different origin
@ -660,7 +668,8 @@ class TestReferrerOnRedirectSameOrigin(TestReferrerOnRedirect):
b'https://scrapytest.org/201',
None,
),
( 'https://scrapytest.org/301',
(
'https://scrapytest.org/301',
'https://scrapytest.org/302',
(
# different domain == different origin
@ -683,7 +692,8 @@ class TestReferrerOnRedirectStrictOrigin(TestReferrerOnRedirect):
"""
settings = {'REFERRER_POLICY': POLICY_STRICT_ORIGIN}
scenarii = [
( 'http://scrapytest.org/101',
(
'http://scrapytest.org/101',
'http://scrapytest.org/102',
(
(301, 'http://scrapytest.org/103'),
@ -692,7 +702,8 @@ class TestReferrerOnRedirectStrictOrigin(TestReferrerOnRedirect):
b'http://scrapytest.org/', # send origin
b'http://scrapytest.org/', # redirects to same origin: send origin
),
( 'https://scrapytest.org/201',
(
'https://scrapytest.org/201',
'https://scrapytest.org/202',
(
# redirecting to non-secure URL: no referrer
@ -701,7 +712,8 @@ class TestReferrerOnRedirectStrictOrigin(TestReferrerOnRedirect):
b'https://scrapytest.org/',
None,
),
( 'https://scrapytest.org/301',
(
'https://scrapytest.org/301',
'https://scrapytest.org/302',
(
# redirecting to non-secure URL (different domain): no referrer
@ -710,7 +722,8 @@ class TestReferrerOnRedirectStrictOrigin(TestReferrerOnRedirect):
b'https://scrapytest.org/',
None,
),
( 'http://scrapy.org/401',
(
'http://scrapy.org/401',
'http://example.com/402',
(
(301, 'http://scrapytest.org/403'),
@ -718,7 +731,8 @@ class TestReferrerOnRedirectStrictOrigin(TestReferrerOnRedirect):
b'http://scrapy.org/',
b'http://scrapy.org/',
),
( 'https://scrapy.org/501',
(
'https://scrapy.org/501',
'https://example.com/502',
(
# HTTPS all along, so origin referrer is kept as-is
@ -728,7 +742,8 @@ class TestReferrerOnRedirectStrictOrigin(TestReferrerOnRedirect):
b'https://scrapy.org/',
b'https://scrapy.org/',
),
( 'https://scrapytest.org/601',
(
'https://scrapytest.org/601',
'http://scrapytest.org/602', # TLS to non-TLS: no referrer
(
(301, 'https://scrapytest.org/603'), # TLS URL again: (still) no referrer
@ -750,7 +765,8 @@ class TestReferrerOnRedirectOriginWhenCrossOrigin(TestReferrerOnRedirect):
"""
settings = {'REFERRER_POLICY': POLICY_ORIGIN_WHEN_CROSS_ORIGIN}
scenarii = [
( 'http://scrapytest.org/101', # origin
(
'http://scrapytest.org/101', # origin
'http://scrapytest.org/102', # target + redirection
(
# redirections: code, URL
@ -760,7 +776,8 @@ class TestReferrerOnRedirectOriginWhenCrossOrigin(TestReferrerOnRedirect):
b'http://scrapytest.org/101', # expected initial referer
b'http://scrapytest.org/101', # expected referer for the redirection request
),
( 'https://scrapytest.org/201',
(
'https://scrapytest.org/201',
'https://scrapytest.org/202',
(
# redirecting to non-secure URL: send origin
@ -769,7 +786,8 @@ class TestReferrerOnRedirectOriginWhenCrossOrigin(TestReferrerOnRedirect):
b'https://scrapytest.org/201',
b'https://scrapytest.org/',
),
( 'https://scrapytest.org/301',
(
'https://scrapytest.org/301',
'https://scrapytest.org/302',
(
# redirecting to non-secure URL (different domain): send origin
@ -778,7 +796,8 @@ class TestReferrerOnRedirectOriginWhenCrossOrigin(TestReferrerOnRedirect):
b'https://scrapytest.org/301',
b'https://scrapytest.org/',
),
( 'http://scrapy.org/401',
(
'http://scrapy.org/401',
'http://example.com/402',
(
(301, 'http://scrapytest.org/403'),
@ -786,7 +805,8 @@ class TestReferrerOnRedirectOriginWhenCrossOrigin(TestReferrerOnRedirect):
b'http://scrapy.org/',
b'http://scrapy.org/',
),
( 'https://scrapy.org/501',
(
'https://scrapy.org/501',
'https://example.com/502',
(
# all different domains: send origin
@ -796,7 +816,8 @@ class TestReferrerOnRedirectOriginWhenCrossOrigin(TestReferrerOnRedirect):
b'https://scrapy.org/',
b'https://scrapy.org/',
),
( 'https://scrapytest.org/301',
(
'https://scrapytest.org/301',
'http://scrapytest.org/302', # TLS to non-TLS: send origin
(
(301, 'https://scrapytest.org/303'), # TLS URL again: send origin (also)
@ -820,7 +841,8 @@ class TestReferrerOnRedirectStrictOriginWhenCrossOrigin(TestReferrerOnRedirect):
"""
settings = {'REFERRER_POLICY': POLICY_STRICT_ORIGIN_WHEN_CROSS_ORIGIN}
scenarii = [
( 'http://scrapytest.org/101', # origin
(
'http://scrapytest.org/101', # origin
'http://scrapytest.org/102', # target + redirection
(
# redirections: code, URL
@ -830,7 +852,8 @@ class TestReferrerOnRedirectStrictOriginWhenCrossOrigin(TestReferrerOnRedirect):
b'http://scrapytest.org/101', # expected initial referer
b'http://scrapytest.org/101', # expected referer for the redirection request
),
( 'https://scrapytest.org/201',
(
'https://scrapytest.org/201',
'https://scrapytest.org/202',
(
# redirecting to non-secure URL: do not send the "Referer" header
@ -839,7 +862,8 @@ class TestReferrerOnRedirectStrictOriginWhenCrossOrigin(TestReferrerOnRedirect):
b'https://scrapytest.org/201',
None,
),
( 'https://scrapytest.org/301',
(
'https://scrapytest.org/301',
'https://scrapytest.org/302',
(
# redirecting to non-secure URL (different domain): send origin
@ -848,7 +872,8 @@ class TestReferrerOnRedirectStrictOriginWhenCrossOrigin(TestReferrerOnRedirect):
b'https://scrapytest.org/301',
None,
),
( 'http://scrapy.org/401',
(
'http://scrapy.org/401',
'http://example.com/402',
(
(301, 'http://scrapytest.org/403'),
@ -856,7 +881,8 @@ class TestReferrerOnRedirectStrictOriginWhenCrossOrigin(TestReferrerOnRedirect):
b'http://scrapy.org/',
b'http://scrapy.org/',
),
( 'https://scrapy.org/501',
(
'https://scrapy.org/501',
'https://example.com/502',
(
# all different domains: send origin
@ -866,7 +892,8 @@ class TestReferrerOnRedirectStrictOriginWhenCrossOrigin(TestReferrerOnRedirect):
b'https://scrapy.org/',
b'https://scrapy.org/',
),
( 'https://scrapytest.org/601',
(
'https://scrapytest.org/601',
'http://scrapytest.org/602', # TLS to non-TLS: do not send "Referer"
(
(301, 'https://scrapytest.org/603'), # TLS URL again: (still) send nothing

View File

@ -250,10 +250,10 @@ class UtilsCsvTestCase(unittest.TestCase):
result = [row for row in csv]
self.assertEqual(result,
[{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
[{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
{u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
{u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
{u'id': u'4', u'name': u'empty', u'value': u''}])
{u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
{u'id': u'4', u'name': u'empty', u'value': u''}])
# explicit type check cuz' we no like stinkin' autocasting! yarrr
for result_row in result:
@ -266,10 +266,10 @@ class UtilsCsvTestCase(unittest.TestCase):
csv = csviter(response, delimiter='\t')
self.assertEqual([row for row in csv],
[{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
[{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
{u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
{u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
{u'id': u'4', u'name': u'empty', u'value': u''}])
{u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
{u'id': u'4', u'name': u'empty', u'value': u''}])
def test_csviter_quotechar(self):
body1 = get_testdata('feeds', 'feed-sample6.csv')
@ -279,19 +279,19 @@ class UtilsCsvTestCase(unittest.TestCase):
csv1 = csviter(response1, quotechar="'")
self.assertEqual([row for row in csv1],
[{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
[{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
{u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
{u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
{u'id': u'4', u'name': u'empty', u'value': u''}])
{u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
{u'id': u'4', u'name': u'empty', u'value': u''}])
response2 = TextResponse(url="http://example.com/", body=body2)
csv2 = csviter(response2, delimiter="|", quotechar="'")
self.assertEqual([row for row in csv2],
[{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
[{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
{u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
{u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
{u'id': u'4', u'name': u'empty', u'value': u''}])
{u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
{u'id': u'4', u'name': u'empty', u'value': u''}])
def test_csviter_wrong_quotechar(self):
body = get_testdata('feeds', 'feed-sample6.csv')
@ -299,10 +299,10 @@ class UtilsCsvTestCase(unittest.TestCase):
csv = csviter(response)
self.assertEqual([row for row in csv],
[{u"'id'": u"1", u"'name'": u"'alpha'", u"'value'": u"'foobar'"},
{u"'id'": u"2", u"'name'": u"'unicode'", u"'value'": u"'\xfan\xedc\xf3d\xe9\u203d'"},
{u"'id'": u"'3'", u"'name'": u"'multi'", u"'value'": u"'foo"},
{u"'id'": u"4", u"'name'": u"'empty'", u"'value'": u""}])
[{u"'id'": u"1", u"'name'": u"'alpha'", u"'value'": u"'foobar'"},
{u"'id'": u"2", u"'name'": u"'unicode'", u"'value'": u"'\xfan\xedc\xf3d\xe9\u203d'"},
{u"'id'": u"'3'", u"'name'": u"'multi'", u"'value'": u"'foo"},
{u"'id'": u"4", u"'name'": u"'empty'", u"'value'": u""}])
def test_csviter_delimiter_binary_response_assume_utf8_encoding(self):
body = get_testdata('feeds', 'feed-sample3.csv').replace(b',', b'\t')
@ -310,10 +310,10 @@ class UtilsCsvTestCase(unittest.TestCase):
csv = csviter(response, delimiter='\t')
self.assertEqual([row for row in csv],
[{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
[{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
{u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
{u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
{u'id': u'4', u'name': u'empty', u'value': u''}])
{u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
{u'id': u'4', u'name': u'empty', u'value': u''}])
def test_csviter_headers(self):
sample = get_testdata('feeds', 'feed-sample3.csv').splitlines()
@ -323,10 +323,10 @@ class UtilsCsvTestCase(unittest.TestCase):
csv = csviter(response, headers=[h.decode('utf-8') for h in headers])
self.assertEqual([row for row in csv],
[{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
[{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
{u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
{u'id': u'3', u'name': u'multi', u'value': u'foo\nbar'},
{u'id': u'4', u'name': u'empty', u'value': u''}])
{u'id': u'3', u'name': u'multi', u'value': u'foo\nbar'},
{u'id': u'4', u'name': u'empty', u'value': u''}])
def test_csviter_falserow(self):
body = get_testdata('feeds', 'feed-sample3.csv')
@ -336,10 +336,10 @@ class UtilsCsvTestCase(unittest.TestCase):
csv = csviter(response)
self.assertEqual([row for row in csv],
[{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
[{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
{u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
{u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
{u'id': u'4', u'name': u'empty', u'value': u''}])
{u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
{u'id': u'4', u'name': u'empty', u'value': u''}])
def test_csviter_exception(self):
body = get_testdata('feeds', 'feed-sample3.csv')

View File

@ -203,29 +203,29 @@ def create_skipped_scheme_t(args):
for k, args in enumerate([
('/index', 'file://'),
('/index.html', 'file://'),
('./index.html', 'file://'),
('../index.html', 'file://'),
('../../index.html', 'file://'),
('./data/index.html', 'file://'),
('.hidden/data/index.html', 'file://'),
('/home/user/www/index.html', 'file://'),
('//home/user/www/index.html', 'file://'),
('file:///home/user/www/index.html', 'file://'),
('/index', 'file://'),
('/index.html', 'file://'),
('./index.html', 'file://'),
('../index.html', 'file://'),
('../../index.html', 'file://'),
('./data/index.html', 'file://'),
('.hidden/data/index.html', 'file://'),
('/home/user/www/index.html', 'file://'),
('//home/user/www/index.html', 'file://'),
('file:///home/user/www/index.html', 'file://'),
('index.html', 'http://'),
('example.com', 'http://'),
('www.example.com', 'http://'),
('www.example.com/index.html', 'http://'),
('http://example.com', 'http://'),
('http://example.com/index.html', 'http://'),
('localhost', 'http://'),
('localhost/index.html', 'http://'),
('index.html', 'http://'),
('example.com', 'http://'),
('www.example.com', 'http://'),
('www.example.com/index.html', 'http://'),
('http://example.com', 'http://'),
('http://example.com/index.html', 'http://'),
('localhost', 'http://'),
('localhost/index.html', 'http://'),
# some corner cases (default to http://)
('/', 'http://'),
('.../test', 'http://'),
('/', 'http://'),
('.../test', 'http://'),
], start=1):
t_method = create_guess_scheme_t(args)

View File

@ -53,28 +53,28 @@ class ParseUrlTestCase(unittest.TestCase):
def testParse(self):
lip = '127.0.0.1'
tests = (
("http://127.0.0.1?c=v&c2=v2#fragment", ('http', lip, lip, 80, '/?c=v&c2=v2')),
("http://127.0.0.1/?c=v&c2=v2#fragment", ('http', lip, lip, 80, '/?c=v&c2=v2')),
("http://127.0.0.1/foo?c=v&c2=v2#frag", ('http', lip, lip, 80, '/foo?c=v&c2=v2')),
("http://127.0.0.1?c=v&c2=v2#fragment", ('http', lip, lip, 80, '/?c=v&c2=v2')),
("http://127.0.0.1/?c=v&c2=v2#fragment", ('http', lip, lip, 80, '/?c=v&c2=v2')),
("http://127.0.0.1/foo?c=v&c2=v2#frag", ('http', lip, lip, 80, '/foo?c=v&c2=v2')),
("http://127.0.0.1:100?c=v&c2=v2#fragment", ('http', lip + ':100', lip, 100, '/?c=v&c2=v2')),
("http://127.0.0.1:100/?c=v&c2=v2#frag", ('http', lip + ':100', lip, 100, '/?c=v&c2=v2')),
("http://127.0.0.1:100/?c=v&c2=v2#frag", ('http', lip + ':100', lip, 100, '/?c=v&c2=v2')),
("http://127.0.0.1:100/foo?c=v&c2=v2#frag", ('http', lip + ':100', lip, 100, '/foo?c=v&c2=v2')),
("http://127.0.0.1", ('http', lip, lip, 80, '/')),
("http://127.0.0.1/", ('http', lip, lip, 80, '/')),
("http://127.0.0.1/foo", ('http', lip, lip, 80, '/foo')),
("http://127.0.0.1?param=value", ('http', lip, lip, 80, '/?param=value')),
("http://127.0.0.1", ('http', lip, lip, 80, '/')),
("http://127.0.0.1/", ('http', lip, lip, 80, '/')),
("http://127.0.0.1/foo", ('http', lip, lip, 80, '/foo')),
("http://127.0.0.1?param=value", ('http', lip, lip, 80, '/?param=value')),
("http://127.0.0.1/?param=value", ('http', lip, lip, 80, '/?param=value')),
("http://127.0.0.1:12345/foo", ('http', lip + ':12345', lip, 12345, '/foo')),
("http://spam:12345/foo", ('http', 'spam:12345', 'spam', 12345, '/foo')),
("http://spam.test.org/foo", ('http', 'spam.test.org', 'spam.test.org', 80, '/foo')),
("http://127.0.0.1:12345/foo", ('http', lip + ':12345', lip, 12345, '/foo')),
("http://spam:12345/foo", ('http', 'spam:12345', 'spam', 12345, '/foo')),
("http://spam.test.org/foo", ('http', 'spam.test.org', 'spam.test.org', 80, '/foo')),
("https://127.0.0.1/foo", ('https', lip, lip, 443, '/foo')),
("https://127.0.0.1/foo", ('https', lip, lip, 443, '/foo')),
("https://127.0.0.1/?param=value", ('https', lip, lip, 443, '/?param=value')),
("https://127.0.0.1:12345/", ('https', lip + ':12345', lip, 12345, '/')),
("https://127.0.0.1:12345/", ('https', lip + ':12345', lip, 12345, '/')),
("http://scrapytest.org/foo ", ('http', 'scrapytest.org', 'scrapytest.org', 80, '/foo')),
("http://egg:7890 ", ('http', 'egg:7890', 'egg', 7890, '/')),
("http://scrapytest.org/foo ", ('http', 'scrapytest.org', 'scrapytest.org', 80, '/foo')),
("http://egg:7890 ", ('http', 'egg:7890', 'egg', 7890, '/')),
)
for url, test in tests: