mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-22 04:53:30 +00:00
Merge pull request #4010 from scrapy/asyncio-base
Base support for asyncio
This commit is contained in:
commit
bb991cd303
@ -16,6 +16,8 @@ matrix:
|
|||||||
python: 3.5
|
python: 3.5
|
||||||
- env: TOXENV=pinned
|
- env: TOXENV=pinned
|
||||||
python: 3.5
|
python: 3.5
|
||||||
|
- env: TOXENV=py35-asyncio
|
||||||
|
python: 3.5
|
||||||
- env: TOXENV=py36
|
- env: TOXENV=py36
|
||||||
python: 3.6
|
python: 3.6
|
||||||
- env: TOXENV=py37
|
- env: TOXENV=py37
|
||||||
@ -24,6 +26,8 @@ matrix:
|
|||||||
python: 3.8
|
python: 3.8
|
||||||
- env: TOXENV=extra-deps
|
- env: TOXENV=extra-deps
|
||||||
python: 3.8
|
python: 3.8
|
||||||
|
- env: TOXENV=py38-asyncio
|
||||||
|
python: 3.8
|
||||||
- env: TOXENV=docs
|
- env: TOXENV=docs
|
||||||
python: 3.7 # Keep in sync with .readthedocs.yml
|
python: 3.7 # Keep in sync with .readthedocs.yml
|
||||||
install:
|
install:
|
||||||
|
15
conftest.py
15
conftest.py
@ -34,3 +34,18 @@ def pytest_collection_modifyitems(session, config, items):
|
|||||||
items[:] = [item for item in items if isinstance(item, Flake8Item)]
|
items[:] = [item for item in items if isinstance(item, Flake8Item)]
|
||||||
except ImportError:
|
except ImportError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope='class')
|
||||||
|
def reactor_pytest(request):
|
||||||
|
if not request.cls:
|
||||||
|
# doctests
|
||||||
|
return
|
||||||
|
request.cls.reactor_pytest = request.config.getoption("--reactor")
|
||||||
|
return request.cls.reactor_pytest
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def only_asyncio(request, reactor_pytest):
|
||||||
|
if request.node.get_closest_marker('only_asyncio') and reactor_pytest != 'asyncio':
|
||||||
|
pytest.skip('This test is only run with --reactor=asyncio')
|
||||||
|
@ -160,6 +160,27 @@ to any particular component. In that case the module of that component will be
|
|||||||
shown, typically an extension, middleware or pipeline. It also means that the
|
shown, typically an extension, middleware or pipeline. It also means that the
|
||||||
component must be enabled in order for the setting to have any effect.
|
component must be enabled in order for the setting to have any effect.
|
||||||
|
|
||||||
|
.. setting:: ASYNCIO_REACTOR
|
||||||
|
|
||||||
|
ASYNCIO_REACTOR
|
||||||
|
---------------
|
||||||
|
|
||||||
|
Default: ``False``
|
||||||
|
|
||||||
|
Whether to install and require the Twisted reactor that uses the asyncio loop.
|
||||||
|
|
||||||
|
When this option is set to ``True``, Scrapy will require
|
||||||
|
:class:`~twisted.internet.asyncioreactor.AsyncioSelectorReactor`. It will
|
||||||
|
install this reactor if no reactor is installed yet, such as when using the
|
||||||
|
``scrapy`` script or :class:`~scrapy.crawler.CrawlerProcess`. If you are using
|
||||||
|
:class:`~scrapy.crawler.CrawlerRunner`, you need to install the correct reactor
|
||||||
|
manually. If a different reactor is installed outside Scrapy, it will raise an
|
||||||
|
exception.
|
||||||
|
|
||||||
|
The default value for this option is currently ``False`` to maintain backward
|
||||||
|
compatibility and avoid possible problems caused by using a different Twisted
|
||||||
|
reactor.
|
||||||
|
|
||||||
.. setting:: AWS_ACCESS_KEY_ID
|
.. setting:: AWS_ACCESS_KEY_ID
|
||||||
|
|
||||||
AWS_ACCESS_KEY_ID
|
AWS_ACCESS_KEY_ID
|
||||||
|
@ -18,6 +18,8 @@ addopts =
|
|||||||
--ignore=docs/topics/telnetconsole.rst
|
--ignore=docs/topics/telnetconsole.rst
|
||||||
--ignore=docs/utils
|
--ignore=docs/utils
|
||||||
twisted = 1
|
twisted = 1
|
||||||
|
markers =
|
||||||
|
only_asyncio: marks tests as only enabled when --reactor=asyncio is passed
|
||||||
flake8-ignore =
|
flake8-ignore =
|
||||||
# Files that are only meant to provide top-level imports are expected not
|
# Files that are only meant to provide top-level imports are expected not
|
||||||
# to use any of their imports:
|
# to use any of their imports:
|
||||||
@ -114,6 +116,7 @@ flake8-ignore =
|
|||||||
scrapy/spiders/feed.py E501
|
scrapy/spiders/feed.py E501
|
||||||
scrapy/spiders/sitemap.py E501
|
scrapy/spiders/sitemap.py E501
|
||||||
# scrapy/utils
|
# scrapy/utils
|
||||||
|
scrapy/utils/asyncio.py E501
|
||||||
scrapy/utils/benchserver.py E501
|
scrapy/utils/benchserver.py E501
|
||||||
scrapy/utils/conf.py E402 E501
|
scrapy/utils/conf.py E402 E501
|
||||||
scrapy/utils/console.py E306 E305
|
scrapy/utils/console.py E306 E305
|
||||||
@ -223,6 +226,7 @@ flake8-ignore =
|
|||||||
tests/test_spidermiddleware_output_chain.py E501 E226
|
tests/test_spidermiddleware_output_chain.py E501 E226
|
||||||
tests/test_spidermiddleware_referer.py E501 F841 E125 E201 E124 E501 E241 E121
|
tests/test_spidermiddleware_referer.py E501 F841 E125 E201 E124 E501 E241 E121
|
||||||
tests/test_squeues.py E501 E701 E741
|
tests/test_squeues.py E501 E701 E741
|
||||||
|
tests/test_utils_asyncio.py E501
|
||||||
tests/test_utils_conf.py E501 E303 E128
|
tests/test_utils_conf.py E501 E303 E128
|
||||||
tests/test_utils_curl.py E501
|
tests/test_utils_curl.py E501
|
||||||
tests/test_utils_datatypes.py E402 E501 E305
|
tests/test_utils_datatypes.py E402 E501 E305
|
||||||
|
@ -6,8 +6,8 @@ See documentation in docs/topics/shell.rst
|
|||||||
from threading import Thread
|
from threading import Thread
|
||||||
|
|
||||||
from scrapy.commands import ScrapyCommand
|
from scrapy.commands import ScrapyCommand
|
||||||
from scrapy.shell import Shell
|
|
||||||
from scrapy.http import Request
|
from scrapy.http import Request
|
||||||
|
from scrapy.shell import Shell
|
||||||
from scrapy.utils.spider import spidercls_for_request, DefaultSpider
|
from scrapy.utils.spider import spidercls_for_request, DefaultSpider
|
||||||
from scrapy.utils.url import guess_scheme
|
from scrapy.utils.url import guess_scheme
|
||||||
|
|
||||||
|
@ -3,7 +3,7 @@ import pprint
|
|||||||
import signal
|
import signal
|
||||||
import warnings
|
import warnings
|
||||||
|
|
||||||
from twisted.internet import reactor, defer
|
from twisted.internet import defer
|
||||||
from zope.interface.verify import verifyClass, DoesNotImplement
|
from zope.interface.verify import verifyClass, DoesNotImplement
|
||||||
|
|
||||||
from scrapy import Spider
|
from scrapy import Spider
|
||||||
@ -14,6 +14,7 @@ from scrapy.extension import ExtensionManager
|
|||||||
from scrapy.settings import overridden_settings, Settings
|
from scrapy.settings import overridden_settings, Settings
|
||||||
from scrapy.signalmanager import SignalManager
|
from scrapy.signalmanager import SignalManager
|
||||||
from scrapy.exceptions import ScrapyDeprecationWarning
|
from scrapy.exceptions import ScrapyDeprecationWarning
|
||||||
|
from scrapy.utils.asyncio import install_asyncio_reactor, is_asyncio_reactor_installed
|
||||||
from scrapy.utils.ossignal import install_shutdown_handlers, signal_names
|
from scrapy.utils.ossignal import install_shutdown_handlers, signal_names
|
||||||
from scrapy.utils.misc import load_object
|
from scrapy.utils.misc import load_object
|
||||||
from scrapy.utils.log import (
|
from scrapy.utils.log import (
|
||||||
@ -136,6 +137,7 @@ class CrawlerRunner(object):
|
|||||||
self._crawlers = set()
|
self._crawlers = set()
|
||||||
self._active = set()
|
self._active = set()
|
||||||
self.bootstrap_failed = False
|
self.bootstrap_failed = False
|
||||||
|
self._handle_asyncio_reactor()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def spiders(self):
|
def spiders(self):
|
||||||
@ -229,6 +231,11 @@ class CrawlerRunner(object):
|
|||||||
while self._active:
|
while self._active:
|
||||||
yield defer.DeferredList(self._active)
|
yield defer.DeferredList(self._active)
|
||||||
|
|
||||||
|
def _handle_asyncio_reactor(self):
|
||||||
|
if self.settings.getbool('ASYNCIO_REACTOR') and not is_asyncio_reactor_installed():
|
||||||
|
raise Exception("ASYNCIO_REACTOR is on but the Twisted asyncio "
|
||||||
|
"reactor is not installed.")
|
||||||
|
|
||||||
|
|
||||||
class CrawlerProcess(CrawlerRunner):
|
class CrawlerProcess(CrawlerRunner):
|
||||||
"""
|
"""
|
||||||
@ -261,6 +268,7 @@ class CrawlerProcess(CrawlerRunner):
|
|||||||
log_scrapy_info(self.settings)
|
log_scrapy_info(self.settings)
|
||||||
|
|
||||||
def _signal_shutdown(self, signum, _):
|
def _signal_shutdown(self, signum, _):
|
||||||
|
from twisted.internet import reactor
|
||||||
install_shutdown_handlers(self._signal_kill)
|
install_shutdown_handlers(self._signal_kill)
|
||||||
signame = signal_names[signum]
|
signame = signal_names[signum]
|
||||||
logger.info("Received %(signame)s, shutting down gracefully. Send again to force ",
|
logger.info("Received %(signame)s, shutting down gracefully. Send again to force ",
|
||||||
@ -268,6 +276,7 @@ class CrawlerProcess(CrawlerRunner):
|
|||||||
reactor.callFromThread(self._graceful_stop_reactor)
|
reactor.callFromThread(self._graceful_stop_reactor)
|
||||||
|
|
||||||
def _signal_kill(self, signum, _):
|
def _signal_kill(self, signum, _):
|
||||||
|
from twisted.internet import reactor
|
||||||
install_shutdown_handlers(signal.SIG_IGN)
|
install_shutdown_handlers(signal.SIG_IGN)
|
||||||
signame = signal_names[signum]
|
signame = signal_names[signum]
|
||||||
logger.info('Received %(signame)s twice, forcing unclean shutdown',
|
logger.info('Received %(signame)s twice, forcing unclean shutdown',
|
||||||
@ -286,6 +295,7 @@ class CrawlerProcess(CrawlerRunner):
|
|||||||
:param boolean stop_after_crawl: stop or not the reactor when all
|
:param boolean stop_after_crawl: stop or not the reactor when all
|
||||||
crawlers have finished
|
crawlers have finished
|
||||||
"""
|
"""
|
||||||
|
from twisted.internet import reactor
|
||||||
if stop_after_crawl:
|
if stop_after_crawl:
|
||||||
d = self.join()
|
d = self.join()
|
||||||
# Don't start the reactor if the deferreds are already fired
|
# Don't start the reactor if the deferreds are already fired
|
||||||
@ -300,6 +310,7 @@ class CrawlerProcess(CrawlerRunner):
|
|||||||
reactor.run(installSignalHandlers=False) # blocking call
|
reactor.run(installSignalHandlers=False) # blocking call
|
||||||
|
|
||||||
def _get_dns_resolver(self):
|
def _get_dns_resolver(self):
|
||||||
|
from twisted.internet import reactor
|
||||||
if self.settings.getbool('DNSCACHE_ENABLED'):
|
if self.settings.getbool('DNSCACHE_ENABLED'):
|
||||||
cache_size = self.settings.getint('DNSCACHE_SIZE')
|
cache_size = self.settings.getint('DNSCACHE_SIZE')
|
||||||
else:
|
else:
|
||||||
@ -316,11 +327,17 @@ class CrawlerProcess(CrawlerRunner):
|
|||||||
return d
|
return d
|
||||||
|
|
||||||
def _stop_reactor(self, _=None):
|
def _stop_reactor(self, _=None):
|
||||||
|
from twisted.internet import reactor
|
||||||
try:
|
try:
|
||||||
reactor.stop()
|
reactor.stop()
|
||||||
except RuntimeError: # raised if already stopped or in shutdown stage
|
except RuntimeError: # raised if already stopped or in shutdown stage
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def _handle_asyncio_reactor(self):
|
||||||
|
if self.settings.getbool('ASYNCIO_REACTOR'):
|
||||||
|
install_asyncio_reactor()
|
||||||
|
super()._handle_asyncio_reactor()
|
||||||
|
|
||||||
|
|
||||||
def _get_spider_loader(settings):
|
def _get_spider_loader(settings):
|
||||||
""" Get SpiderLoader instance from settings """
|
""" Get SpiderLoader instance from settings """
|
||||||
|
@ -19,6 +19,8 @@ from os.path import join, abspath, dirname
|
|||||||
|
|
||||||
AJAXCRAWL_ENABLED = False
|
AJAXCRAWL_ENABLED = False
|
||||||
|
|
||||||
|
ASYNCIO_REACTOR = False
|
||||||
|
|
||||||
AUTOTHROTTLE_ENABLED = False
|
AUTOTHROTTLE_ENABLED = False
|
||||||
AUTOTHROTTLE_DEBUG = False
|
AUTOTHROTTLE_DEBUG = False
|
||||||
AUTOTHROTTLE_MAX_DELAY = 60.0
|
AUTOTHROTTLE_MAX_DELAY = 60.0
|
||||||
|
@ -7,7 +7,7 @@ import os
|
|||||||
import signal
|
import signal
|
||||||
import warnings
|
import warnings
|
||||||
|
|
||||||
from twisted.internet import reactor, threads, defer
|
from twisted.internet import threads, defer
|
||||||
from twisted.python import threadable
|
from twisted.python import threadable
|
||||||
from w3lib.url import any_to_uri
|
from w3lib.url import any_to_uri
|
||||||
|
|
||||||
@ -98,6 +98,7 @@ class Shell(object):
|
|||||||
return spider
|
return spider
|
||||||
|
|
||||||
def fetch(self, request_or_url, spider=None, redirect=True, **kwargs):
|
def fetch(self, request_or_url, spider=None, redirect=True, **kwargs):
|
||||||
|
from twisted.internet import reactor
|
||||||
if isinstance(request_or_url, Request):
|
if isinstance(request_or_url, Request):
|
||||||
request = request_or_url
|
request = request_or_url
|
||||||
else:
|
else:
|
||||||
|
17
scrapy/utils/asyncio.py
Normal file
17
scrapy/utils/asyncio.py
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
import asyncio
|
||||||
|
from contextlib import suppress
|
||||||
|
|
||||||
|
from twisted.internet import asyncioreactor
|
||||||
|
from twisted.internet.error import ReactorAlreadyInstalledError
|
||||||
|
|
||||||
|
|
||||||
|
def install_asyncio_reactor():
|
||||||
|
""" Tries to install AsyncioSelectorReactor
|
||||||
|
"""
|
||||||
|
with suppress(ReactorAlreadyInstalledError):
|
||||||
|
asyncioreactor.install(asyncio.get_event_loop())
|
||||||
|
|
||||||
|
|
||||||
|
def is_asyncio_reactor_installed():
|
||||||
|
from twisted.internet import reactor
|
||||||
|
return isinstance(reactor, asyncioreactor.AsyncioSelectorReactor)
|
@ -1,8 +1,7 @@
|
|||||||
"""
|
"""
|
||||||
Helper functions for dealing with Twisted deferreds
|
Helper functions for dealing with Twisted deferreds
|
||||||
"""
|
"""
|
||||||
|
from twisted.internet import defer, task
|
||||||
from twisted.internet import defer, reactor, task
|
|
||||||
from twisted.python import failure
|
from twisted.python import failure
|
||||||
|
|
||||||
from scrapy.exceptions import IgnoreRequest
|
from scrapy.exceptions import IgnoreRequest
|
||||||
@ -15,6 +14,7 @@ def defer_fail(_failure):
|
|||||||
It delays by 100ms so reactor has a chance to go through readers and writers
|
It delays by 100ms so reactor has a chance to go through readers and writers
|
||||||
before attending pending delayed calls, so do not set delay to zero.
|
before attending pending delayed calls, so do not set delay to zero.
|
||||||
"""
|
"""
|
||||||
|
from twisted.internet import reactor
|
||||||
d = defer.Deferred()
|
d = defer.Deferred()
|
||||||
reactor.callLater(0.1, d.errback, _failure)
|
reactor.callLater(0.1, d.errback, _failure)
|
||||||
return d
|
return d
|
||||||
@ -27,6 +27,7 @@ def defer_succeed(result):
|
|||||||
It delays by 100ms so reactor has a chance to go trough readers and writers
|
It delays by 100ms so reactor has a chance to go trough readers and writers
|
||||||
before attending pending delayed calls, so do not set delay to zero.
|
before attending pending delayed calls, so do not set delay to zero.
|
||||||
"""
|
"""
|
||||||
|
from twisted.internet import reactor
|
||||||
d = defer.Deferred()
|
d = defer.Deferred()
|
||||||
reactor.callLater(0.1, d.callback, result)
|
reactor.callLater(0.1, d.callback, result)
|
||||||
return d
|
return d
|
||||||
|
@ -11,6 +11,7 @@ from twisted.python import log as twisted_log
|
|||||||
import scrapy
|
import scrapy
|
||||||
from scrapy.settings import Settings
|
from scrapy.settings import Settings
|
||||||
from scrapy.exceptions import ScrapyDeprecationWarning
|
from scrapy.exceptions import ScrapyDeprecationWarning
|
||||||
|
from scrapy.utils.asyncio import is_asyncio_reactor_installed
|
||||||
from scrapy.utils.versions import scrapy_components_versions
|
from scrapy.utils.versions import scrapy_components_versions
|
||||||
|
|
||||||
|
|
||||||
@ -148,6 +149,8 @@ def log_scrapy_info(settings):
|
|||||||
{'versions': ", ".join("%s %s" % (name, version)
|
{'versions': ", ".join("%s %s" % (name, version)
|
||||||
for name, version in scrapy_components_versions()
|
for name, version in scrapy_components_versions()
|
||||||
if name != "Scrapy")})
|
if name != "Scrapy")})
|
||||||
|
if is_asyncio_reactor_installed():
|
||||||
|
logger.debug("Asyncio reactor is installed")
|
||||||
|
|
||||||
|
|
||||||
class StreamLogger(object):
|
class StreamLogger(object):
|
||||||
|
@ -1,7 +1,5 @@
|
|||||||
import signal
|
import signal
|
||||||
|
|
||||||
from twisted.internet import reactor
|
|
||||||
|
|
||||||
|
|
||||||
signal_names = {}
|
signal_names = {}
|
||||||
for signame in dir(signal):
|
for signame in dir(signal):
|
||||||
@ -17,6 +15,7 @@ def install_shutdown_handlers(function, override_sigint=True):
|
|||||||
SIGINT handler won't be install if there is already a handler in place
|
SIGINT handler won't be install if there is already a handler in place
|
||||||
(e.g. Pdb)
|
(e.g. Pdb)
|
||||||
"""
|
"""
|
||||||
|
from twisted.internet import reactor
|
||||||
reactor._handleSignals()
|
reactor._handleSignals()
|
||||||
signal.signal(signal.SIGTERM, function)
|
signal.signal(signal.SIGTERM, function)
|
||||||
if signal.getsignal(signal.SIGINT) == signal.default_int_handler or \
|
if signal.getsignal(signal.SIGINT) == signal.default_int_handler or \
|
||||||
|
@ -1,8 +1,9 @@
|
|||||||
from twisted.internet import reactor, error
|
from twisted.internet import error
|
||||||
|
|
||||||
|
|
||||||
def listen_tcp(portrange, host, factory):
|
def listen_tcp(portrange, host, factory):
|
||||||
"""Like reactor.listenTCP but tries different ports in a range."""
|
"""Like reactor.listenTCP but tries different ports in a range."""
|
||||||
|
from twisted.internet import reactor
|
||||||
assert len(portrange) <= 2, "invalid portrange: %s" % portrange
|
assert len(portrange) <= 2, "invalid portrange: %s" % portrange
|
||||||
if not portrange:
|
if not portrange:
|
||||||
return reactor.listenTCP(0, factory, interface=host)
|
return reactor.listenTCP(0, factory, interface=host)
|
||||||
@ -30,6 +31,7 @@ class CallLaterOnce(object):
|
|||||||
self._call = None
|
self._call = None
|
||||||
|
|
||||||
def schedule(self, delay=0):
|
def schedule(self, delay=0):
|
||||||
|
from twisted.internet import reactor
|
||||||
if self._call is None:
|
if self._call is None:
|
||||||
self._call = reactor.callLater(delay, self)
|
self._call = reactor.callLater(delay, self)
|
||||||
|
|
||||||
|
17
tests/CrawlerProcess/asyncio_enabled_no_reactor.py
Normal file
17
tests/CrawlerProcess/asyncio_enabled_no_reactor.py
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
import scrapy
|
||||||
|
from scrapy.crawler import CrawlerProcess
|
||||||
|
|
||||||
|
|
||||||
|
class NoRequestsSpider(scrapy.Spider):
|
||||||
|
name = 'no_request'
|
||||||
|
|
||||||
|
def start_requests(self):
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
process = CrawlerProcess(settings={
|
||||||
|
'ASYNCIO_REACTOR': True,
|
||||||
|
})
|
||||||
|
|
||||||
|
process.crawl(NoRequestsSpider)
|
||||||
|
process.start()
|
22
tests/CrawlerProcess/asyncio_enabled_reactor.py
Normal file
22
tests/CrawlerProcess/asyncio_enabled_reactor.py
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
import asyncio
|
||||||
|
|
||||||
|
from twisted.internet import asyncioreactor
|
||||||
|
asyncioreactor.install(asyncio.get_event_loop())
|
||||||
|
|
||||||
|
import scrapy
|
||||||
|
from scrapy.crawler import CrawlerProcess
|
||||||
|
|
||||||
|
|
||||||
|
class NoRequestsSpider(scrapy.Spider):
|
||||||
|
name = 'no_request'
|
||||||
|
|
||||||
|
def start_requests(self):
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
process = CrawlerProcess(settings={
|
||||||
|
'ASYNCIO_REACTOR': True,
|
||||||
|
})
|
||||||
|
|
||||||
|
process.crawl(NoRequestsSpider)
|
||||||
|
process.start()
|
@ -4,7 +4,7 @@ mitmproxy; python_version >= '3.6'
|
|||||||
mitmproxy<4.0.0; python_version < '3.6'
|
mitmproxy<4.0.0; python_version < '3.6'
|
||||||
pytest
|
pytest
|
||||||
pytest-cov
|
pytest-cov
|
||||||
pytest-twisted
|
pytest-twisted >= 1.11
|
||||||
pytest-xdist
|
pytest-xdist
|
||||||
sybil
|
sybil
|
||||||
testfixtures
|
testfixtures
|
||||||
|
@ -295,6 +295,14 @@ class BadSpider(scrapy.Spider):
|
|||||||
self.assertIn("start_requests", log)
|
self.assertIn("start_requests", log)
|
||||||
self.assertIn("badspider.py", log)
|
self.assertIn("badspider.py", log)
|
||||||
|
|
||||||
|
def test_asyncio_enabled_true(self):
|
||||||
|
log = self.get_log(self.debug_log_spider, args=['-s', 'ASYNCIO_REACTOR=True'])
|
||||||
|
self.assertIn("DEBUG: Asyncio reactor is installed", log)
|
||||||
|
|
||||||
|
def test_asyncio_enabled_false(self):
|
||||||
|
log = self.get_log(self.debug_log_spider, args=['-s', 'ASYNCIO_REACTOR=False'])
|
||||||
|
self.assertNotIn("DEBUG: Asyncio reactor is installed", log)
|
||||||
|
|
||||||
|
|
||||||
class BenchCommandTest(CommandTest):
|
class BenchCommandTest(CommandTest):
|
||||||
|
|
||||||
|
@ -4,9 +4,10 @@ import subprocess
|
|||||||
import sys
|
import sys
|
||||||
import warnings
|
import warnings
|
||||||
|
|
||||||
|
from pytest import raises, mark
|
||||||
|
from testfixtures import LogCapture
|
||||||
from twisted.internet import defer
|
from twisted.internet import defer
|
||||||
from twisted.trial import unittest
|
from twisted.trial import unittest
|
||||||
from pytest import raises
|
|
||||||
|
|
||||||
import scrapy
|
import scrapy
|
||||||
from scrapy.crawler import Crawler, CrawlerRunner, CrawlerProcess
|
from scrapy.crawler import Crawler, CrawlerRunner, CrawlerProcess
|
||||||
@ -207,6 +208,7 @@ class NoRequestsSpider(scrapy.Spider):
|
|||||||
return []
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
@mark.usefixtures('reactor_pytest')
|
||||||
class CrawlerRunnerHasSpider(unittest.TestCase):
|
class CrawlerRunnerHasSpider(unittest.TestCase):
|
||||||
|
|
||||||
@defer.inlineCallbacks
|
@defer.inlineCallbacks
|
||||||
@ -250,6 +252,33 @@ class CrawlerRunnerHasSpider(unittest.TestCase):
|
|||||||
|
|
||||||
self.assertEqual(runner.bootstrap_failed, True)
|
self.assertEqual(runner.bootstrap_failed, True)
|
||||||
|
|
||||||
|
def test_crawler_runner_asyncio_enabled_true(self):
|
||||||
|
if self.reactor_pytest == 'asyncio':
|
||||||
|
runner = CrawlerRunner(settings={'ASYNCIO_REACTOR': True})
|
||||||
|
else:
|
||||||
|
msg = "ASYNCIO_REACTOR is on but the Twisted asyncio reactor is not installed"
|
||||||
|
with self.assertRaisesRegex(Exception, msg):
|
||||||
|
runner = CrawlerRunner(settings={'ASYNCIO_REACTOR': True})
|
||||||
|
|
||||||
|
@defer.inlineCallbacks
|
||||||
|
def test_crawler_process_asyncio_enabled_true(self):
|
||||||
|
with LogCapture(level=logging.DEBUG) as log:
|
||||||
|
if self.reactor_pytest == 'asyncio':
|
||||||
|
runner = CrawlerProcess(settings={'ASYNCIO_REACTOR': True})
|
||||||
|
yield runner.crawl(NoRequestsSpider)
|
||||||
|
self.assertIn("Asyncio reactor is installed", str(log))
|
||||||
|
else:
|
||||||
|
msg = "ASYNCIO_REACTOR is on but the Twisted asyncio reactor is not installed"
|
||||||
|
with self.assertRaisesRegex(Exception, msg):
|
||||||
|
runner = CrawlerProcess(settings={'ASYNCIO_REACTOR': True})
|
||||||
|
|
||||||
|
@defer.inlineCallbacks
|
||||||
|
def test_crawler_process_asyncio_enabled_false(self):
|
||||||
|
runner = CrawlerProcess(settings={'ASYNCIO_REACTOR': False})
|
||||||
|
with LogCapture(level=logging.DEBUG) as log:
|
||||||
|
yield runner.crawl(NoRequestsSpider)
|
||||||
|
self.assertNotIn("Asyncio reactor is installed", str(log))
|
||||||
|
|
||||||
|
|
||||||
class CrawlerProcessSubprocess(unittest.TestCase):
|
class CrawlerProcessSubprocess(unittest.TestCase):
|
||||||
script_dir = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'CrawlerProcess')
|
script_dir = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'CrawlerProcess')
|
||||||
@ -265,3 +294,14 @@ class CrawlerProcessSubprocess(unittest.TestCase):
|
|||||||
def test_simple(self):
|
def test_simple(self):
|
||||||
log = self.run_script('simple.py')
|
log = self.run_script('simple.py')
|
||||||
self.assertIn('Spider closed (finished)', log)
|
self.assertIn('Spider closed (finished)', log)
|
||||||
|
self.assertNotIn("DEBUG: Asyncio reactor is installed", log)
|
||||||
|
|
||||||
|
def test_asyncio_enabled_no_reactor(self):
|
||||||
|
log = self.run_script('asyncio_enabled_no_reactor.py')
|
||||||
|
self.assertIn('Spider closed (finished)', log)
|
||||||
|
self.assertIn("DEBUG: Asyncio reactor is installed", log)
|
||||||
|
|
||||||
|
def test_asyncio_enabled_reactor(self):
|
||||||
|
log = self.run_script('asyncio_enabled_reactor.py')
|
||||||
|
self.assertIn('Spider closed (finished)', log)
|
||||||
|
self.assertIn("DEBUG: Asyncio reactor is installed", log)
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
from unittest import mock
|
from unittest import mock
|
||||||
|
|
||||||
|
from twisted.internet.defer import Deferred
|
||||||
from twisted.trial.unittest import TestCase
|
from twisted.trial.unittest import TestCase
|
||||||
from twisted.python.failure import Failure
|
from twisted.python.failure import Failure
|
||||||
|
|
||||||
@ -177,3 +178,31 @@ class ProcessExceptionInvalidOutput(ManagerTestCase):
|
|||||||
dfd.addBoth(results.append)
|
dfd.addBoth(results.append)
|
||||||
self.assertIsInstance(results[0], Failure)
|
self.assertIsInstance(results[0], Failure)
|
||||||
self.assertIsInstance(results[0].value, _InvalidOutput)
|
self.assertIsInstance(results[0].value, _InvalidOutput)
|
||||||
|
|
||||||
|
|
||||||
|
class MiddlewareUsingDeferreds(ManagerTestCase):
|
||||||
|
"""Middlewares using Deferreds should work"""
|
||||||
|
|
||||||
|
def test_deferred(self):
|
||||||
|
resp = Response('http://example.com/index.html')
|
||||||
|
|
||||||
|
class DeferredMiddleware:
|
||||||
|
def cb(self, result):
|
||||||
|
return result
|
||||||
|
|
||||||
|
def process_request(self, request, spider):
|
||||||
|
d = Deferred()
|
||||||
|
d.addCallback(self.cb)
|
||||||
|
d.callback(resp)
|
||||||
|
return d
|
||||||
|
|
||||||
|
self.mwman._add_middleware(DeferredMiddleware())
|
||||||
|
req = Request('http://example.com/index.html')
|
||||||
|
download_func = mock.MagicMock()
|
||||||
|
dfd = self.mwman.download(download_func, req, self.spider)
|
||||||
|
results = []
|
||||||
|
dfd.addBoth(results.append)
|
||||||
|
self._wait(dfd)
|
||||||
|
|
||||||
|
self.assertIs(results[0], resp)
|
||||||
|
self.assertFalse(download_func.called)
|
||||||
|
17
tests/test_utils_asyncio.py
Normal file
17
tests/test_utils_asyncio.py
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
from unittest import TestCase
|
||||||
|
|
||||||
|
from pytest import mark
|
||||||
|
|
||||||
|
from scrapy.utils.asyncio import is_asyncio_reactor_installed, install_asyncio_reactor
|
||||||
|
|
||||||
|
|
||||||
|
@mark.usefixtures('reactor_pytest')
|
||||||
|
class AsyncioTest(TestCase):
|
||||||
|
|
||||||
|
def test_is_asyncio_reactor_installed(self):
|
||||||
|
# the result should depend only on the pytest --reactor argument
|
||||||
|
self.assertEqual(is_asyncio_reactor_installed(), self.reactor_pytest == 'asyncio')
|
||||||
|
|
||||||
|
def test_install_asyncio_reactor(self):
|
||||||
|
# this should do nothing
|
||||||
|
install_asyncio_reactor()
|
14
tox.ini
14
tox.ini
@ -96,3 +96,17 @@ changedir = {[docs]changedir}
|
|||||||
deps = {[docs]deps}
|
deps = {[docs]deps}
|
||||||
commands =
|
commands =
|
||||||
sphinx-build -W -b linkcheck . {envtmpdir}/linkcheck
|
sphinx-build -W -b linkcheck . {envtmpdir}/linkcheck
|
||||||
|
|
||||||
|
[asyncio]
|
||||||
|
commands =
|
||||||
|
{[testenv]commands} --reactor=asyncio
|
||||||
|
|
||||||
|
[testenv:py35-asyncio]
|
||||||
|
basepython = python3.5
|
||||||
|
deps = {[testenv]deps}
|
||||||
|
commands = {[asyncio]commands}
|
||||||
|
|
||||||
|
[testenv:py38-asyncio]
|
||||||
|
basepython = python3.8
|
||||||
|
deps = {[testenv]deps}
|
||||||
|
commands = {[asyncio]commands}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user