1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-16 02:52:54 +00:00

Merge remote-tracking branch 'upstream/master' into remove-six-code

This commit is contained in:
Eugenio Lacuesta 2019-11-08 12:29:15 -03:00
commit 342e3b5bd5
No known key found for this signature in database
GPG Key ID: DA3EF2D0913E9810
27 changed files with 271 additions and 34 deletions

View File

@ -7,6 +7,8 @@ branches:
- /^\d\.\d+\.\d+(rc\d+|\.dev\d+)?$/
matrix:
include:
- env: TOXENV=flake8
python: 3.8
- env: TOXENV=pypy3
python: 3.5
- env: TOXENV=py35
@ -25,13 +27,6 @@ matrix:
python: 3.6
install:
- |
if [ "$TOXENV" = "pypy" ]; then
export PYPY_VERSION="pypy-6.0.0-linux_x86_64-portable"
wget "https://bitbucket.org/squeaky/portable-pypy/downloads/${PYPY_VERSION}.tar.bz2"
tar -jxf ${PYPY_VERSION}.tar.bz2
virtualenv --python="$PYPY_VERSION/bin/pypy" "$HOME/virtualenvs/$PYPY_VERSION"
source "$HOME/virtualenvs/$PYPY_VERSION/bin/activate"
fi
if [ "$TOXENV" = "pypy3" ]; then
export PYPY_VERSION="pypy3.5-5.9-beta-linux_x86_64-portable"
wget "https://bitbucket.org/squeaky/portable-pypy/downloads/${PYPY_VERSION}.tar.bz2"

View File

@ -17,3 +17,13 @@ for line in open('tests/ignores.txt'):
def chdir(tmpdir):
"""Change to pytest-provided temporary directory"""
tmpdir.chdir()
def pytest_collection_modifyitems(session, config, items):
# Avoid executing tests when executing `--flake8` flag (pytest-flake8)
try:
from pytest_flake8 import Flake8Item
if config.getoption('--flake8'):
items[:] = [item for item in items if isinstance(item, Flake8Item)]
except ImportError:
pass

View File

@ -61,7 +61,7 @@ _scrapy() {
'-c[evaluate the code in the shell, print the result and exit]:code:(CODE)'
'--no-redirect[do not handle HTTP 3xx status codes and print response as-is]'
'--spider[use this spider]:spider:_scrapy_spiders'
'::file:_files -g \*.http'
'::file:_files -g \*.html'
'::URL:_httpie_urls'
)
_scrapy_glb_opts $options

View File

@ -4,3 +4,251 @@ python_files=test_*.py __init__.py
python_classes=
addopts = --doctest-modules --assert=plain
twisted = 1
flake8-ignore =
# extras
extras/qps-bench-server.py E261 E501
extras/qpsclient.py E501 E261 E501
# scrapy/commands
scrapy/commands/__init__.py E128 E501
scrapy/commands/check.py F401 E501
scrapy/commands/crawl.py E501
scrapy/commands/edit.py E501
scrapy/commands/fetch.py E401 E302 E501 E128 E502 E731
scrapy/commands/genspider.py E128 E501 E502
scrapy/commands/list.py E302
scrapy/commands/parse.py E128 E501 E731 E226
scrapy/commands/runspider.py E501
scrapy/commands/settings.py E302 E128
scrapy/commands/shell.py E128 E501 E502
scrapy/commands/startproject.py E502 E127 E501 E128
scrapy/commands/version.py E501 E128
scrapy/commands/view.py F401 E302
# scrapy/contracts
scrapy/contracts/__init__.py E501 W504
scrapy/contracts/default.py E502 E128
# scrapy/core
scrapy/core/engine.py E261 E501 E128 E127 E306 E502
scrapy/core/scheduler.py E501
scrapy/core/scraper.py E501 E306 E261 E128 W504
scrapy/core/spidermw.py E501 E731 E502 E231 E126 E226
scrapy/core/downloader/__init__.py F401 E501
scrapy/core/downloader/contextfactory.py E501 E128 E126
scrapy/core/downloader/middleware.py E501 E502
scrapy/core/downloader/tls.py E501 E305 E241
scrapy/core/downloader/webclient.py E731 E501 E261 E502 E128 E126 E226
scrapy/core/downloader/handlers/__init__.py E501
scrapy/core/downloader/handlers/ftp.py E501 E305 E128 E127
scrapy/core/downloader/handlers/http.py F401
scrapy/core/downloader/handlers/http10.py E501
scrapy/core/downloader/handlers/http11.py E501
scrapy/core/downloader/handlers/s3.py E501 F401 E502 E128 E126
# scrapy/downloadermiddlewares
scrapy/downloadermiddlewares/ajaxcrawl.py E302 E501 E226
scrapy/downloadermiddlewares/decompression.py E501
scrapy/downloadermiddlewares/defaultheaders.py E501
scrapy/downloadermiddlewares/httpcache.py E501 E126
scrapy/downloadermiddlewares/httpcompression.py E502 E128
scrapy/downloadermiddlewares/httpproxy.py E501
scrapy/downloadermiddlewares/redirect.py E501 W504
scrapy/downloadermiddlewares/retry.py E501 E126
scrapy/downloadermiddlewares/robotstxt.py F401 E501
scrapy/downloadermiddlewares/stats.py E501
# scrapy/extensions
scrapy/extensions/closespider.py E501 E502 E128 E123
scrapy/extensions/corestats.py E302 E501
scrapy/extensions/feedexport.py E128 E501
scrapy/extensions/httpcache.py E128 E501 E303 F401
scrapy/extensions/memdebug.py E501
scrapy/extensions/spiderstate.py E302 E501
scrapy/extensions/telnet.py E501 W504
scrapy/extensions/throttle.py E501
# scrapy/http
scrapy/http/__init__.py F401
scrapy/http/common.py E501
scrapy/http/cookies.py E501
scrapy/http/request/__init__.py E501
scrapy/http/request/form.py E501 E123
scrapy/http/request/json_request.py E501
scrapy/http/response/__init__.py E501 E128 W293 W291
scrapy/http/response/html.py E302
scrapy/http/response/text.py E501 W293 E128 E124
scrapy/http/response/xml.py E302
# scrapy/linkextractors
scrapy/linkextractors/__init__.py E731 E502 E501 E402 F401
scrapy/linkextractors/lxmlhtml.py E501 E731 E226
# scrapy/loader
scrapy/loader/__init__.py E501 E502 E128
scrapy/loader/common.py E302
scrapy/loader/processors.py E501
# scrapy/pipelines
scrapy/pipelines/__init__.py E302
scrapy/pipelines/files.py E116 E501 E266
scrapy/pipelines/images.py E265 E501
scrapy/pipelines/media.py E125 E501 E266
# scrapy/selector
scrapy/selector/__init__.py F403 F401
scrapy/selector/unified.py F401 E501 E111
# scrapy/settings
scrapy/settings/__init__.py E501
scrapy/settings/default_settings.py E501 E261 E114 E116 E226
scrapy/settings/deprecated.py E501
# scrapy/spidermiddlewares
scrapy/spidermiddlewares/httperror.py E501
scrapy/spidermiddlewares/offsite.py E501
scrapy/spidermiddlewares/referer.py F401 E501 E129 W503 W504
scrapy/spidermiddlewares/urllength.py E501
# scrapy/spiders
scrapy/spiders/__init__.py F401 E501 E402
scrapy/spiders/crawl.py E501
scrapy/spiders/feed.py E501 E261
scrapy/spiders/sitemap.py E501
# scrapy/utils
scrapy/utils/benchserver.py E501
scrapy/utils/boto.py F401
scrapy/utils/conf.py E402 E502 E501
scrapy/utils/console.py E302 E261 F401 E306 E305
scrapy/utils/curl.py F401
scrapy/utils/datatypes.py E501 E226
scrapy/utils/decorators.py E501 E302
scrapy/utils/defer.py E501 E302 E128
scrapy/utils/deprecate.py E128 E501 E127 E502
scrapy/utils/display.py E302
scrapy/utils/engine.py F401 E261 E302
scrapy/utils/ftp.py E302
scrapy/utils/gz.py E305 E501 E302 W504
scrapy/utils/http.py F403 F401 E226
scrapy/utils/httpobj.py E302 E501
scrapy/utils/iterators.py E501 E701
scrapy/utils/job.py E302
scrapy/utils/log.py E128 W503
scrapy/utils/markup.py F403 F401 W292
scrapy/utils/misc.py E501 E226
scrapy/utils/multipart.py F403 F401 W292
scrapy/utils/project.py E501
scrapy/utils/python.py E501 E302
scrapy/utils/reactor.py E302 E226
scrapy/utils/reqser.py E501
scrapy/utils/request.py E302 E127 E501
scrapy/utils/response.py E501 E302 E128
scrapy/utils/signal.py E501 E128
scrapy/utils/sitemap.py E501
scrapy/utils/spider.py E271 E302 E501
scrapy/utils/ssl.py E501
scrapy/utils/template.py E302
scrapy/utils/test.py E302 E501
scrapy/utils/url.py E501 F403 F401 E128 F405
# scrapy
scrapy/__init__.py E402 E501
scrapy/_monkeypatches.py W293
scrapy/cmdline.py E502 E501
scrapy/crawler.py E501
scrapy/dupefilters.py E302 E501 E202
scrapy/exceptions.py E302 E501
scrapy/exporters.py E501 E261 E226
scrapy/extension.py E302
scrapy/interfaces.py E302 E501
scrapy/item.py E501 E128
scrapy/link.py E501
scrapy/logformatter.py E501 W293
scrapy/mail.py E402 E128 E501 E502
scrapy/middleware.py E502 E128 E501
scrapy/pqueues.py E501
scrapy/resolver.py E302
scrapy/responsetypes.py E128 E501 E305
scrapy/robotstxt.py E302 E501
scrapy/shell.py E501
scrapy/signalmanager.py E501
scrapy/spiderloader.py E225 F841 E501 E126
scrapy/squeues.py E128
scrapy/statscollectors.py E501
# tests
tests/__init__.py F401 E402 E501
tests/mockserver.py E401 E501 E126 E123 F401
tests/pipelines.py E302 F841 E226
tests/spiders.py E302 E501 E127
tests/test_closespider.py E501 E127
tests/test_command_fetch.py E501 E261
tests/test_command_parse.py F401 E302 E501 E128 E303 E226
tests/test_command_shell.py E501 E128
tests/test_commands.py F401 E128 E501
tests/test_contracts.py E501 E128 W293
tests/test_crawl.py E501 E741 E265
tests/test_crawler.py F841 E306 E501
tests/test_dependencies.py E302 F841 E501 E305
tests/test_downloader_handlers.py E124 E127 E128 E225 E261 E265 F401 E501 E502 E701 E711 E126 E226 E123
tests/test_downloadermiddleware.py E501
tests/test_downloadermiddleware_ajaxcrawlable.py E302 E501
tests/test_downloadermiddleware_cookies.py E731 E741 E501 E128 E303 E265 E126
tests/test_downloadermiddleware_decompression.py E127
tests/test_downloadermiddleware_defaultheaders.py E501
tests/test_downloadermiddleware_downloadtimeout.py E501
tests/test_downloadermiddleware_httpcache.py E713 E501 E302 E305 F401
tests/test_downloadermiddleware_httpcompression.py E501 F401 E251 E126 E123
tests/test_downloadermiddleware_httpproxy.py F401 E501 E128
tests/test_downloadermiddleware_redirect.py E501 E303 E128 E306 E127 E305
tests/test_downloadermiddleware_retry.py E501 E128 W293 E251 E502 E303 E126
tests/test_downloadermiddleware_robotstxt.py E501
tests/test_downloadermiddleware_stats.py E501
tests/test_dupefilters.py E302 E221 E501 E741 W293 W291 E128 E124
tests/test_engine.py E401 E501 E502 E128 E261
tests/test_exporters.py E501 E731 E306 E128 E124
tests/test_extension_telnet.py F401 F841
tests/test_feedexport.py E501 F401 F841 E241
tests/test_http_cookies.py E501
tests/test_http_headers.py E302 E501
tests/test_http_request.py F401 E402 E501 E231 E261 E127 E128 W293 E502 E128 E502 E126 E123
tests/test_http_response.py E501 E301 E502 E128 E265
tests/test_item.py E701 E128 E231 F841 E306
tests/test_link.py E501
tests/test_linkextractors.py E501 E128 E231 E124
tests/test_loader.py E302 E501 E731 E303 E741 E128 E117 E241
tests/test_logformatter.py E128 E501 E231 E122 E302
tests/test_mail.py E302 E128 E501 E305
tests/test_middleware.py E302 E501 E128
tests/test_pipeline_crawl.py E131 E501 E128 E126
tests/test_pipeline_files.py F401 E501 W293 E303 E272 E226
tests/test_pipeline_images.py F401 F841 E501 E303
tests/test_pipeline_media.py E501 E741 E731 E128 E261 E306 E502
tests/test_request_cb_kwargs.py E501
tests/test_responsetypes.py E501 E302 E305
tests/test_robotstxt_interface.py F401 E302 E501 W291 E501
tests/test_scheduler.py E501 E126 E123
tests/test_selector.py F401 E501 E127
tests/test_spider.py E501 F401
tests/test_spidermiddleware.py E501 E226
tests/test_spidermiddleware_httperror.py E128 E501 E127 E121
tests/test_spidermiddleware_offsite.py E302 E501 E128 E111 W293
tests/test_spidermiddleware_output_chain.py F401 E501 E302 W293 E226
tests/test_spidermiddleware_referer.py F401 E501 E302 F841 E125 E201 E261 E124 E501 E241 E121
tests/test_squeues.py E501 E302 E701 E741
tests/test_utils_conf.py E501 E231 E303 E128
tests/test_utils_console.py E302 E231
tests/test_utils_curl.py E501
tests/test_utils_datatypes.py E402 E501 E305
tests/test_utils_defer.py E306 E261 E501 E302 F841 E226
tests/test_utils_deprecate.py F841 E306 E501
tests/test_utils_http.py E302 E501 E502 E128 W504
tests/test_utils_httpobj.py E302
tests/test_utils_iterators.py E501 E128 E129 E302 E303 E241
tests/test_utils_log.py E741 E226
tests/test_utils_python.py E501 E303 E731 E701 E305
tests/test_utils_reqser.py F401 E501 E128
tests/test_utils_request.py E302 E501 E128 E305
tests/test_utils_response.py E501
tests/test_utils_signal.py E741 F841 E302 E731 E226
tests/test_utils_sitemap.py E302 E128 E501 E124
tests/test_utils_spider.py E261 E302 E305
tests/test_utils_template.py E305
tests/test_utils_url.py F401 E501 E127 E302 E305 E211 E125 E501 E226 E241 E126 E123
tests/test_webclient.py E501 E128 E122 E303 E402 E306 E226 E241 E123 E126
tests/mocks/dummydbm.py E302
tests/test_cmdline/__init__.py E502 E501
tests/test_cmdline/extensions.py E302
tests/test_settings/__init__.py F401 E501 E128
tests/test_spiderloader/__init__.py E128 E501 E302
tests/test_spiderloader/test_spiders/spider0.py E302
tests/test_spiderloader/test_spiders/spider1.py E302
tests/test_spiderloader/test_spiders/spider2.py E302
tests/test_spiderloader/test_spiders/spider3.py E302
tests/test_spiderloader/test_spiders/nested/spider4.py E302
tests/test_utils_misc/__init__.py E501 E231

View File

@ -95,4 +95,3 @@ class Command(ScrapyCommand):
result.printErrors()
result.printSummary(start, stop)
self.exitcode = int(not result.wasSuccessful())

View File

@ -118,4 +118,3 @@ class Command(ScrapyCommand):
_templates_base_dir = self.settings['TEMPLATES_DIR'] or \
join(scrapy.__path__[0], 'templates')
return join(_templates_base_dir, 'project')

View File

@ -28,4 +28,3 @@ class Command(ScrapyCommand):
print(patt % (name, version))
else:
print("Scrapy %s" % scrapy.__version__)

View File

@ -112,4 +112,3 @@ class FTPDownloadHandler(object):
httpcode = self.CODE_MAPPING.get(ftpcode, self.CODE_MAPPING["default"])
return Response(url=request.url, status=httpcode, body=to_bytes(message))
raise result.type(result.value)

View File

@ -157,4 +157,3 @@ class ScrapyHTTPClientFactory(HTTPClientFactory):
def gotHeaders(self, headers):
self.headers_time = time()
self.response_headers = headers

View File

@ -244,4 +244,3 @@ class Scraper(object):
return self.signals.send_catch_log_deferred(
signal=signals.item_scraped, item=output, response=response,
spider=spider)

View File

@ -87,5 +87,3 @@ class Headers(CaselessDict):
def __copy__(self):
return self.__class__(self)
copy = __copy__

View File

@ -15,4 +15,3 @@ class ISpiderLoader(Interface):
def find_by_request(request):
"""Return the list of spiders names that can handle the given request"""

View File

@ -28,4 +28,3 @@ class Link(object):
def __repr__(self):
return 'Link(url=%r, text=%r, fragment=%r, nofollow=%r)' % \
(self.url, self.text, self.fragment, self.nofollow)

View File

@ -133,4 +133,3 @@ class CSVFeedSpider(Spider):
raise NotConfigured('You must define parse_row method in order to scrape this CSV feed')
response = self.adapt_response(response)
return self.parse_rows(response)

View File

@ -29,4 +29,3 @@ class InitSpider(Spider):
spider
"""
return self.initialized()

View File

@ -80,5 +80,3 @@ class DummyStatsCollector(StatsCollector):
def min_value(self, key, value, spider=None):
pass

View File

@ -34,4 +34,3 @@ def decode_chunked_transfer(chunked_body):
body += t[:size]
t = t[size+2:]
return body

View File

@ -135,7 +135,7 @@ def create_instance(objcls, settings, crawler, *args, **kwargs):
"""
if settings is None:
if crawler is None:
raise ValueError("Specifiy at least one of settings and crawler.")
raise ValueError("Specify at least one of settings and crawler.")
settings = crawler.settings
if crawler and hasattr(objcls, 'from_crawler'):
return objcls.from_crawler(crawler, *args, **kwargs)

View File

@ -209,8 +209,7 @@ class MockServer():
def __exit__(self, exc_type, exc_value, traceback):
self.proc.kill()
self.proc.wait()
time.sleep(0.2)
self.proc.communicate()
def url(self, path, is_secure=False):
host = self.http_address.replace('0.0.0.0', '127.0.0.1')

View File

@ -12,4 +12,3 @@ class TestExtension(object):
class DummyExtension(object):
pass

View File

@ -2,4 +2,3 @@
TEST_DEFAULT = 'defvalue'
TEST_DICT = {'key': 'val'}

View File

@ -40,4 +40,3 @@ class TestDepthMiddleware(TestCase):
def tearDown(self):
self.stats.close_spider(self.spider, '')

View File

@ -18,4 +18,3 @@ class TestUrlLengthMiddleware(TestCase):
spider = Spider('foo')
out = list(mw.process_spider_output(res, reqs, spider))
self.assertEqual(out, [short_url_req])

View File

@ -231,4 +231,3 @@ class SequenceExcludeTest(unittest.TestCase):
if __name__ == "__main__":
unittest.main()

View File

@ -16,5 +16,3 @@ class ChunkedTest(unittest.TestCase):
"This is the data in the first chunk\r\n" +
"and this is the second one\r\n" +
"consequence")

View File

@ -34,4 +34,3 @@ class UtilsSpidersTestCase(unittest.TestCase):
if __name__ == "__main__":
unittest.main()

View File

@ -62,6 +62,14 @@ basepython = pypy3
commands =
py.test {posargs:scrapy tests}
[testenv:flake8]
basepython = python3.8
deps =
{[testenv]deps}
pytest-flake8
commands =
py.test --flake8 {posargs:scrapy tests}
[docs]
changedir = docs
deps =