1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-24 00:44:08 +00:00

Merge pull request #2632 from redapple/spider-loader-warn-or-fail

[MRG] Add SPIDER_LOADER_WARN_ONLY to toggle between spiderloader failure or warning
This commit is contained in:
Mikhail Korobov 2017-03-09 23:01:27 +05:00 committed by GitHub
commit 9c69e90056
8 changed files with 48 additions and 10 deletions

View File

@ -1180,6 +1180,29 @@ Default: ``'scrapy.spiderloader.SpiderLoader'``
The class that will be used for loading spiders, which must implement the
:ref:`topics-api-spiderloader`.
.. setting:: SPIDER_LOADER_WARN_ONLY
SPIDER_LOADER_WARN_ONLY
-----------------------
.. versionadded:: 1.4
Default: ``False``
By default, when scrapy tries to import spider classes from :setting:`SPIDER_MODULES`,
it will fail loudly if there is any ``ImportError`` exception.
But you can choose to silence this exception and turn it into a simple
warning by setting ``SPIDER_LOADER_WARN_ONLY = True``.
.. note::
Some :ref:`scrapy commands <topics-commands>` run with this setting to ``True``
already (i.e. they will only issue a warning and will not fail)
since they do not actually need to load spider classes to work:
:command:`scrapy runspider <runspider>`,
:command:`scrapy settings <settings>`,
:command:`scrapy startproject <startproject>`,
:command:`scrapy version <version>`.
.. setting:: SPIDER_MIDDLEWARES
SPIDER_MIDDLEWARES

View File

@ -28,6 +28,7 @@ def _import_file(filepath):
class Command(ScrapyCommand):
requires_project = False
default_settings = {'SPIDER_LOADER_WARN_ONLY': True}
def syntax(self):
return "[options] <spider_file>"

View File

@ -7,7 +7,8 @@ from scrapy.settings import BaseSettings
class Command(ScrapyCommand):
requires_project = False
default_settings = {'LOG_ENABLED': False}
default_settings = {'LOG_ENABLED': False,
'SPIDER_LOADER_WARN_ONLY': True}
def syntax(self):
return "[options]"

View File

@ -26,7 +26,8 @@ IGNORE = ignore_patterns('*.pyc', '.svn')
class Command(ScrapyCommand):
requires_project = False
default_settings = {'LOG_ENABLED': False}
default_settings = {'LOG_ENABLED': False,
'SPIDER_LOADER_WARN_ONLY': True}
def syntax(self):
return "<project_name> [project_dir]"
@ -118,4 +119,4 @@ class Command(ScrapyCommand):
_templates_base_dir = self.settings['TEMPLATES_DIR'] or \
join(scrapy.__path__[0], 'templates')
return join(_templates_base_dir, 'project')

View File

@ -11,7 +11,8 @@ from scrapy.commands import ScrapyCommand
class Command(ScrapyCommand):
default_settings = {'LOG_ENABLED': False}
default_settings = {'LOG_ENABLED': False,
'SPIDER_LOADER_WARN_ONLY': True}
def syntax(self):
return "[-v]"

View File

@ -250,6 +250,7 @@ SCHEDULER_MEMORY_QUEUE = 'scrapy.squeues.LifoMemoryQueue'
SCHEDULER_PRIORITY_QUEUE = 'queuelib.PriorityQueue'
SPIDER_LOADER_CLASS = 'scrapy.spiderloader.SpiderLoader'
SPIDER_LOADER_WARN_ONLY = False
SPIDER_MIDDLEWARES = {}

View File

@ -19,6 +19,7 @@ class SpiderLoader(object):
"""
def __init__(self, settings):
self.spider_modules = settings.getlist('SPIDER_MODULES')
self.warn_only = settings.getbool('SPIDER_LOADER_WARN_ONLY')
self._spiders = {}
self._found = defaultdict(list)
self._load_all_spiders()
@ -46,10 +47,13 @@ class SpiderLoader(object):
for module in walk_modules(name):
self._load_spiders(module)
except ImportError as e:
msg = ("\n{tb}Could not load spiders from module '{modname}'. "
"Check SPIDER_MODULES setting".format(
modname=name, tb=traceback.format_exc()))
warnings.warn(msg, RuntimeWarning)
if self.warn_only:
msg = ("\n{tb}Could not load spiders from module '{modname}'. "
"See above traceback for details.".format(
modname=name, tb=traceback.format_exc()))
warnings.warn(msg, RuntimeWarning)
else:
raise
self._check_name_duplicates()
@classmethod

View File

@ -91,18 +91,24 @@ class SpiderLoaderTest(unittest.TestCase):
self.assertTrue(issubclass(crawler.spidercls, scrapy.Spider))
self.assertEqual(crawler.spidercls.name, 'spider1')
def test_bad_spider_modules_exception(self):
module = 'tests.test_spiderloader.test_spiders.doesnotexist'
settings = Settings({'SPIDER_MODULES': [module]})
self.assertRaises(ImportError, SpiderLoader.from_settings, settings)
def test_bad_spider_modules_warning(self):
with warnings.catch_warnings(record=True) as w:
module = 'tests.test_spiderloader.test_spiders.doesnotexist'
settings = Settings({'SPIDER_MODULES': [module]})
settings = Settings({'SPIDER_MODULES': [module],
'SPIDER_LOADER_WARN_ONLY': True})
spider_loader = SpiderLoader.from_settings(settings)
self.assertIn("Could not load spiders from module", str(w[0].message))
spiders = spider_loader.list()
self.assertEqual(spiders, [])
class DuplicateSpiderNameLoaderTest(unittest.TestCase):
def setUp(self):