1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-03-14 16:58:20 +00:00

Merge branch 'master' into flake8-max-line-length

This commit is contained in:
Eugenio Lacuesta 2020-04-15 09:24:33 -03:00
commit 01d73dd3f0
No known key found for this signature in database
GPG Key ID: DA3EF2D0913E9810
13 changed files with 241 additions and 216 deletions

View File

@ -45,7 +45,6 @@ flake8-ignore =
scrapy/core/scraper.py E128 W504
scrapy/core/spidermw.py E731 E126
scrapy/core/downloader/contextfactory.py E128 E126
scrapy/core/downloader/tls.py E241
scrapy/core/downloader/webclient.py E731 E128 E126
scrapy/core/downloader/handlers/ftp.py E128 E127
scrapy/core/downloader/handlers/s3.py E128 E126
@ -69,9 +68,9 @@ flake8-ignore =
# scrapy/loader
scrapy/loader/__init__.py E128
# scrapy/pipelines
scrapy/pipelines/files.py E116 E266
scrapy/pipelines/images.py E265
scrapy/pipelines/media.py E125 E266
scrapy/pipelines/files.py E116
scrapy/pipelines/media.py E125
# scrapy/selector
scrapy/selector/__init__.py F403
scrapy/selector/unified.py E111
@ -96,7 +95,6 @@ flake8-ignore =
scrapy/utils/url.py F403 E128 F405
# scrapy
scrapy/__init__.py E402
scrapy/dupefilters.py E202
scrapy/item.py E128
scrapy/mail.py E402 E128
scrapy/middleware.py E128
@ -113,11 +111,11 @@ flake8-ignore =
tests/test_command_shell.py E128
tests/test_commands.py E128
tests/test_contracts.py E128
tests/test_crawl.py E741 E265
tests/test_crawl.py E741
tests/test_crawler.py F841
tests/test_dependencies.py F841
tests/test_downloader_handlers.py E124 E127 E128 E265 E126 E123
tests/test_downloadermiddleware_cookies.py E731 E741 E128 E265 E126
tests/test_downloader_handlers.py E124 E127 E128 E126 E123
tests/test_downloadermiddleware_cookies.py E731 E741 E128 E126
tests/test_downloadermiddleware_decompression.py E127
tests/test_downloadermiddleware_httpcompression.py E126 E123
tests/test_downloadermiddleware_httpproxy.py E128
@ -127,12 +125,12 @@ flake8-ignore =
tests/test_engine.py E401 E128
tests/test_exporters.py E731 E128 E124
tests/test_extension_telnet.py F841
tests/test_feedexport.py F841 E241
tests/test_feedexport.py F841
tests/test_http_request.py E402 E127 E128 E128 E126 E123
tests/test_http_response.py E128 E265
tests/test_http_response.py E128
tests/test_item.py E128 F841
tests/test_linkextractors.py E128 E124
tests/test_loader.py E731 E741 E128 E117 E241
tests/test_loader.py E731 E741 E128 E117
tests/test_logformatter.py E128 E122
tests/test_mail.py E128
tests/test_middleware.py E128
@ -144,21 +142,21 @@ flake8-ignore =
tests/test_selector.py E127
tests/test_spidermiddleware_httperror.py E128 E127 E121
tests/test_spidermiddleware_offsite.py E128 E111
tests/test_spidermiddleware_referer.py F841 E125 E201 E124 E241 E121
tests/test_spidermiddleware_referer.py F841 E125 E124 E121
tests/test_squeues.py E741
tests/test_utils_conf.py E128
tests/test_utils_datatypes.py E402
tests/test_utils_defer.py F841
tests/test_utils_deprecate.py F841
tests/test_utils_http.py E128 W504
tests/test_utils_iterators.py E128 E129 E241
tests/test_utils_iterators.py E128 E129
tests/test_utils_log.py E741
tests/test_utils_python.py E731
tests/test_utils_reqser.py E128
tests/test_utils_request.py E128
tests/test_utils_signal.py E741 F841 E731
tests/test_utils_sitemap.py E128 E124
tests/test_utils_url.py E127 E125 E241 E126 E123
tests/test_webclient.py E128 E122 E402 E241 E123 E126
tests/test_utils_url.py E127 E125 E126 E123
tests/test_webclient.py E128 E122 E402 E123 E126
tests/test_settings/__init__.py E128
tests/test_spiderloader/__init__.py E128

View File

@ -61,7 +61,7 @@ class RFPDupeFilter(BaseDupeFilter):
def log(self, request, spider):
if self.debug:
msg = "Filtered duplicate request: %(request)s (referer: %(referer)s)"
args = {'request': request, 'referer': referer_str(request) }
args = {'request': request, 'referer': referer_str(request)}
self.logger.debug(msg, args, extra={'spider': spider})
elif self.logdupes:
msg = ("Filtered duplicate request: %(request)s"

View File

@ -500,7 +500,7 @@ class FilesPipeline(MediaPipeline):
spider.crawler.stats.inc_value('file_count', spider=spider)
spider.crawler.stats.inc_value('file_status_count/%s' % status, spider=spider)
### Overridable Interface
# Overridable Interface
def get_media_requests(self, item, info):
return [Request(x) for x in item.get(self.files_urls_field, [])]

View File

@ -14,7 +14,7 @@ from scrapy.utils.python import to_bytes
from scrapy.http import Request
from scrapy.settings import Settings
from scrapy.exceptions import DropItem
#TODO: from scrapy.pipelines.media import MediaPipeline
# TODO: from scrapy.pipelines.media import MediaPipeline
from scrapy.pipelines.files import FileException, FilesPipeline

View File

@ -166,7 +166,7 @@ class MediaPipeline:
for wad in info.waiting.pop(fp):
defer_result(result).chainDeferred(wad)
### Overridable Interface
# Overridable Interface
def media_to_download(self, request, info):
"""Check request before starting download"""
pass

View File

@ -147,9 +147,9 @@ class CrawlTestCase(TestCase):
settings = {"CONCURRENT_REQUESTS": 1}
crawler = CrawlerRunner(settings).create_crawler(BrokenStartRequestsSpider)
yield crawler.crawl(mockserver=self.mockserver)
#self.assertTrue(False, crawler.spider.seedsseen)
#self.assertTrue(crawler.spider.seedsseen.index(None) < crawler.spider.seedsseen.index(99),
# crawler.spider.seedsseen)
self.assertTrue(
crawler.spider.seedsseen.index(None) < crawler.spider.seedsseen.index(99),
crawler.spider.seedsseen)
@defer.inlineCallbacks
def test_start_requests_dupes(self):

View File

@ -202,7 +202,7 @@ class CookiesMiddlewareTest(TestCase):
assert self.mw.process_request(req4, self.spider) is None
self.assertCookieValEqual(req4.headers.get('Cookie'), b'C2=value2; galleta=dulce')
#cookies from hosts with port
# cookies from hosts with port
req5_1 = Request('http://scrapytest.org:1104/')
assert self.mw.process_request(req5_1, self.spider) is None
@ -218,7 +218,7 @@ class CookiesMiddlewareTest(TestCase):
assert self.mw.process_request(req5_3, self.spider) is None
self.assertEqual(req5_3.headers.get('Cookie'), b'C1=value1')
#skip cookie retrieval for not http request
# skip cookie retrieval for not http request
req6 = Request('file:///scrapy/sometempfile')
assert self.mw.process_request(req6, self.spider) is None
self.assertEqual(req6.headers.get('Cookie'), None)

View File

@ -450,8 +450,8 @@ class TextResponseTest(BaseResponseTest):
assert u'<span>value</span>' in r.text, repr(r.text)
# FIXME: This test should pass once we stop using BeautifulSoup's UnicodeDammit in TextResponse
#r = self.response_class("http://www.example.com", body=b'PREFIX\xe3\xabSUFFIX')
#assert u'\ufffd' in r.text, repr(r.text)
# r = self.response_class("http://www.example.com", body=b'PREFIX\xe3\xabSUFFIX')
# assert u'\ufffd' in r.text, repr(r.text)
def test_selector(self):
body = b"<html><head><title>Some page</title><body></body></html>"

View File

@ -577,7 +577,8 @@ class TestReferrerOnRedirect(TestRefererMiddleware):
settings = {'REFERRER_POLICY': 'scrapy.spidermiddlewares.referer.UnsafeUrlPolicy'}
scenarii = [
( 'http://scrapytest.org/1', # parent
(
'http://scrapytest.org/1', # parent
'http://scrapytest.org/2', # target
(
# redirections: code, URL
@ -587,7 +588,8 @@ class TestReferrerOnRedirect(TestRefererMiddleware):
b'http://scrapytest.org/1', # expected initial referer
b'http://scrapytest.org/1', # expected referer for the redirection request
),
( 'https://scrapytest.org/1',
(
'https://scrapytest.org/1',
'https://scrapytest.org/2',
(
# redirecting to non-secure URL
@ -596,7 +598,8 @@ class TestReferrerOnRedirect(TestRefererMiddleware):
b'https://scrapytest.org/1',
b'https://scrapytest.org/1',
),
( 'https://scrapytest.org/1',
(
'https://scrapytest.org/1',
'https://scrapytest.com/2',
(
# redirecting to non-secure URL: different origin
@ -638,7 +641,8 @@ class TestReferrerOnRedirectNoReferrer(TestReferrerOnRedirect):
"""
settings = {'REFERRER_POLICY': 'no-referrer'}
scenarii = [
( 'http://scrapytest.org/1', # parent
(
'http://scrapytest.org/1', # parent
'http://scrapytest.org/2', # target
(
# redirections: code, URL
@ -648,7 +652,8 @@ class TestReferrerOnRedirectNoReferrer(TestReferrerOnRedirect):
None, # expected initial "Referer"
None, # expected "Referer" for the redirection request
),
( 'https://scrapytest.org/1',
(
'https://scrapytest.org/1',
'https://scrapytest.org/2',
(
(301, 'http://scrapytest.org/3'),
@ -656,7 +661,8 @@ class TestReferrerOnRedirectNoReferrer(TestReferrerOnRedirect):
None,
None,
),
( 'https://scrapytest.org/1',
(
'https://scrapytest.org/1',
'https://example.com/2', # different origin
(
(301, 'http://scrapytest.com/3'),
@ -677,7 +683,8 @@ class TestReferrerOnRedirectSameOrigin(TestReferrerOnRedirect):
"""
settings = {'REFERRER_POLICY': 'same-origin'}
scenarii = [
( 'http://scrapytest.org/101', # origin
(
'http://scrapytest.org/101', # origin
'http://scrapytest.org/102', # target
(
# redirections: code, URL
@ -687,7 +694,8 @@ class TestReferrerOnRedirectSameOrigin(TestReferrerOnRedirect):
b'http://scrapytest.org/101', # expected initial "Referer"
b'http://scrapytest.org/101', # expected referer for the redirection request
),
( 'https://scrapytest.org/201',
(
'https://scrapytest.org/201',
'https://scrapytest.org/202',
(
# redirecting from secure to non-secure URL == different origin
@ -696,7 +704,8 @@ class TestReferrerOnRedirectSameOrigin(TestReferrerOnRedirect):
b'https://scrapytest.org/201',
None,
),
( 'https://scrapytest.org/301',
(
'https://scrapytest.org/301',
'https://scrapytest.org/302',
(
# different domain == different origin
@ -719,7 +728,8 @@ class TestReferrerOnRedirectStrictOrigin(TestReferrerOnRedirect):
"""
settings = {'REFERRER_POLICY': POLICY_STRICT_ORIGIN}
scenarii = [
( 'http://scrapytest.org/101',
(
'http://scrapytest.org/101',
'http://scrapytest.org/102',
(
(301, 'http://scrapytest.org/103'),
@ -728,7 +738,8 @@ class TestReferrerOnRedirectStrictOrigin(TestReferrerOnRedirect):
b'http://scrapytest.org/', # send origin
b'http://scrapytest.org/', # redirects to same origin: send origin
),
( 'https://scrapytest.org/201',
(
'https://scrapytest.org/201',
'https://scrapytest.org/202',
(
# redirecting to non-secure URL: no referrer
@ -737,7 +748,8 @@ class TestReferrerOnRedirectStrictOrigin(TestReferrerOnRedirect):
b'https://scrapytest.org/',
None,
),
( 'https://scrapytest.org/301',
(
'https://scrapytest.org/301',
'https://scrapytest.org/302',
(
# redirecting to non-secure URL (different domain): no referrer
@ -746,7 +758,8 @@ class TestReferrerOnRedirectStrictOrigin(TestReferrerOnRedirect):
b'https://scrapytest.org/',
None,
),
( 'http://scrapy.org/401',
(
'http://scrapy.org/401',
'http://example.com/402',
(
(301, 'http://scrapytest.org/403'),
@ -754,7 +767,8 @@ class TestReferrerOnRedirectStrictOrigin(TestReferrerOnRedirect):
b'http://scrapy.org/',
b'http://scrapy.org/',
),
( 'https://scrapy.org/501',
(
'https://scrapy.org/501',
'https://example.com/502',
(
# HTTPS all along, so origin referrer is kept as-is
@ -764,7 +778,8 @@ class TestReferrerOnRedirectStrictOrigin(TestReferrerOnRedirect):
b'https://scrapy.org/',
b'https://scrapy.org/',
),
( 'https://scrapytest.org/601',
(
'https://scrapytest.org/601',
'http://scrapytest.org/602', # TLS to non-TLS: no referrer
(
(301, 'https://scrapytest.org/603'), # TLS URL again: (still) no referrer
@ -786,7 +801,8 @@ class TestReferrerOnRedirectOriginWhenCrossOrigin(TestReferrerOnRedirect):
"""
settings = {'REFERRER_POLICY': POLICY_ORIGIN_WHEN_CROSS_ORIGIN}
scenarii = [
( 'http://scrapytest.org/101', # origin
(
'http://scrapytest.org/101', # origin
'http://scrapytest.org/102', # target + redirection
(
# redirections: code, URL
@ -796,7 +812,8 @@ class TestReferrerOnRedirectOriginWhenCrossOrigin(TestReferrerOnRedirect):
b'http://scrapytest.org/101', # expected initial referer
b'http://scrapytest.org/101', # expected referer for the redirection request
),
( 'https://scrapytest.org/201',
(
'https://scrapytest.org/201',
'https://scrapytest.org/202',
(
# redirecting to non-secure URL: send origin
@ -805,7 +822,8 @@ class TestReferrerOnRedirectOriginWhenCrossOrigin(TestReferrerOnRedirect):
b'https://scrapytest.org/201',
b'https://scrapytest.org/',
),
( 'https://scrapytest.org/301',
(
'https://scrapytest.org/301',
'https://scrapytest.org/302',
(
# redirecting to non-secure URL (different domain): send origin
@ -814,7 +832,8 @@ class TestReferrerOnRedirectOriginWhenCrossOrigin(TestReferrerOnRedirect):
b'https://scrapytest.org/301',
b'https://scrapytest.org/',
),
( 'http://scrapy.org/401',
(
'http://scrapy.org/401',
'http://example.com/402',
(
(301, 'http://scrapytest.org/403'),
@ -822,7 +841,8 @@ class TestReferrerOnRedirectOriginWhenCrossOrigin(TestReferrerOnRedirect):
b'http://scrapy.org/',
b'http://scrapy.org/',
),
( 'https://scrapy.org/501',
(
'https://scrapy.org/501',
'https://example.com/502',
(
# all different domains: send origin
@ -832,7 +852,8 @@ class TestReferrerOnRedirectOriginWhenCrossOrigin(TestReferrerOnRedirect):
b'https://scrapy.org/',
b'https://scrapy.org/',
),
( 'https://scrapytest.org/301',
(
'https://scrapytest.org/301',
'http://scrapytest.org/302', # TLS to non-TLS: send origin
(
(301, 'https://scrapytest.org/303'), # TLS URL again: send origin (also)
@ -856,7 +877,8 @@ class TestReferrerOnRedirectStrictOriginWhenCrossOrigin(TestReferrerOnRedirect):
"""
settings = {'REFERRER_POLICY': POLICY_STRICT_ORIGIN_WHEN_CROSS_ORIGIN}
scenarii = [
( 'http://scrapytest.org/101', # origin
(
'http://scrapytest.org/101', # origin
'http://scrapytest.org/102', # target + redirection
(
# redirections: code, URL
@ -866,7 +888,8 @@ class TestReferrerOnRedirectStrictOriginWhenCrossOrigin(TestReferrerOnRedirect):
b'http://scrapytest.org/101', # expected initial referer
b'http://scrapytest.org/101', # expected referer for the redirection request
),
( 'https://scrapytest.org/201',
(
'https://scrapytest.org/201',
'https://scrapytest.org/202',
(
# redirecting to non-secure URL: do not send the "Referer" header
@ -875,7 +898,8 @@ class TestReferrerOnRedirectStrictOriginWhenCrossOrigin(TestReferrerOnRedirect):
b'https://scrapytest.org/201',
None,
),
( 'https://scrapytest.org/301',
(
'https://scrapytest.org/301',
'https://scrapytest.org/302',
(
# redirecting to non-secure URL (different domain): send origin
@ -884,7 +908,8 @@ class TestReferrerOnRedirectStrictOriginWhenCrossOrigin(TestReferrerOnRedirect):
b'https://scrapytest.org/301',
None,
),
( 'http://scrapy.org/401',
(
'http://scrapy.org/401',
'http://example.com/402',
(
(301, 'http://scrapytest.org/403'),
@ -892,7 +917,8 @@ class TestReferrerOnRedirectStrictOriginWhenCrossOrigin(TestReferrerOnRedirect):
b'http://scrapy.org/',
b'http://scrapy.org/',
),
( 'https://scrapy.org/501',
(
'https://scrapy.org/501',
'https://example.com/502',
(
# all different domains: send origin
@ -902,7 +928,8 @@ class TestReferrerOnRedirectStrictOriginWhenCrossOrigin(TestReferrerOnRedirect):
b'https://scrapy.org/',
b'https://scrapy.org/',
),
( 'https://scrapytest.org/601',
(
'https://scrapytest.org/601',
'http://scrapytest.org/602', # TLS to non-TLS: do not send "Referer"
(
(301, 'https://scrapytest.org/603'), # TLS URL again: (still) send nothing