mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-24 00:44:08 +00:00
Merge pull request #2632 from redapple/spider-loader-warn-or-fail
[MRG] Add SPIDER_LOADER_WARN_ONLY to toggle between spiderloader failure or warning
This commit is contained in:
commit
9c69e90056
@ -1180,6 +1180,29 @@ Default: ``'scrapy.spiderloader.SpiderLoader'``
|
||||
The class that will be used for loading spiders, which must implement the
|
||||
:ref:`topics-api-spiderloader`.
|
||||
|
||||
.. setting:: SPIDER_LOADER_WARN_ONLY
|
||||
|
||||
SPIDER_LOADER_WARN_ONLY
|
||||
-----------------------
|
||||
|
||||
.. versionadded:: 1.4
|
||||
|
||||
Default: ``False``
|
||||
|
||||
By default, when scrapy tries to import spider classes from :setting:`SPIDER_MODULES`,
|
||||
it will fail loudly if there is any ``ImportError`` exception.
|
||||
But you can choose to silence this exception and turn it into a simple
|
||||
warning by setting ``SPIDER_LOADER_WARN_ONLY = True``.
|
||||
|
||||
.. note::
|
||||
Some :ref:`scrapy commands <topics-commands>` run with this setting to ``True``
|
||||
already (i.e. they will only issue a warning and will not fail)
|
||||
since they do not actually need to load spider classes to work:
|
||||
:command:`scrapy runspider <runspider>`,
|
||||
:command:`scrapy settings <settings>`,
|
||||
:command:`scrapy startproject <startproject>`,
|
||||
:command:`scrapy version <version>`.
|
||||
|
||||
.. setting:: SPIDER_MIDDLEWARES
|
||||
|
||||
SPIDER_MIDDLEWARES
|
||||
|
@ -28,6 +28,7 @@ def _import_file(filepath):
|
||||
class Command(ScrapyCommand):
|
||||
|
||||
requires_project = False
|
||||
default_settings = {'SPIDER_LOADER_WARN_ONLY': True}
|
||||
|
||||
def syntax(self):
|
||||
return "[options] <spider_file>"
|
||||
|
@ -7,7 +7,8 @@ from scrapy.settings import BaseSettings
|
||||
class Command(ScrapyCommand):
|
||||
|
||||
requires_project = False
|
||||
default_settings = {'LOG_ENABLED': False}
|
||||
default_settings = {'LOG_ENABLED': False,
|
||||
'SPIDER_LOADER_WARN_ONLY': True}
|
||||
|
||||
def syntax(self):
|
||||
return "[options]"
|
||||
|
@ -26,7 +26,8 @@ IGNORE = ignore_patterns('*.pyc', '.svn')
|
||||
class Command(ScrapyCommand):
|
||||
|
||||
requires_project = False
|
||||
default_settings = {'LOG_ENABLED': False}
|
||||
default_settings = {'LOG_ENABLED': False,
|
||||
'SPIDER_LOADER_WARN_ONLY': True}
|
||||
|
||||
def syntax(self):
|
||||
return "<project_name> [project_dir]"
|
||||
@ -118,4 +119,4 @@ class Command(ScrapyCommand):
|
||||
_templates_base_dir = self.settings['TEMPLATES_DIR'] or \
|
||||
join(scrapy.__path__[0], 'templates')
|
||||
return join(_templates_base_dir, 'project')
|
||||
|
||||
|
||||
|
@ -11,7 +11,8 @@ from scrapy.commands import ScrapyCommand
|
||||
|
||||
class Command(ScrapyCommand):
|
||||
|
||||
default_settings = {'LOG_ENABLED': False}
|
||||
default_settings = {'LOG_ENABLED': False,
|
||||
'SPIDER_LOADER_WARN_ONLY': True}
|
||||
|
||||
def syntax(self):
|
||||
return "[-v]"
|
||||
|
@ -250,6 +250,7 @@ SCHEDULER_MEMORY_QUEUE = 'scrapy.squeues.LifoMemoryQueue'
|
||||
SCHEDULER_PRIORITY_QUEUE = 'queuelib.PriorityQueue'
|
||||
|
||||
SPIDER_LOADER_CLASS = 'scrapy.spiderloader.SpiderLoader'
|
||||
SPIDER_LOADER_WARN_ONLY = False
|
||||
|
||||
SPIDER_MIDDLEWARES = {}
|
||||
|
||||
|
@ -19,6 +19,7 @@ class SpiderLoader(object):
|
||||
"""
|
||||
def __init__(self, settings):
|
||||
self.spider_modules = settings.getlist('SPIDER_MODULES')
|
||||
self.warn_only = settings.getbool('SPIDER_LOADER_WARN_ONLY')
|
||||
self._spiders = {}
|
||||
self._found = defaultdict(list)
|
||||
self._load_all_spiders()
|
||||
@ -46,10 +47,13 @@ class SpiderLoader(object):
|
||||
for module in walk_modules(name):
|
||||
self._load_spiders(module)
|
||||
except ImportError as e:
|
||||
msg = ("\n{tb}Could not load spiders from module '{modname}'. "
|
||||
"Check SPIDER_MODULES setting".format(
|
||||
modname=name, tb=traceback.format_exc()))
|
||||
warnings.warn(msg, RuntimeWarning)
|
||||
if self.warn_only:
|
||||
msg = ("\n{tb}Could not load spiders from module '{modname}'. "
|
||||
"See above traceback for details.".format(
|
||||
modname=name, tb=traceback.format_exc()))
|
||||
warnings.warn(msg, RuntimeWarning)
|
||||
else:
|
||||
raise
|
||||
self._check_name_duplicates()
|
||||
|
||||
@classmethod
|
||||
|
@ -91,18 +91,24 @@ class SpiderLoaderTest(unittest.TestCase):
|
||||
self.assertTrue(issubclass(crawler.spidercls, scrapy.Spider))
|
||||
self.assertEqual(crawler.spidercls.name, 'spider1')
|
||||
|
||||
def test_bad_spider_modules_exception(self):
|
||||
|
||||
module = 'tests.test_spiderloader.test_spiders.doesnotexist'
|
||||
settings = Settings({'SPIDER_MODULES': [module]})
|
||||
self.assertRaises(ImportError, SpiderLoader.from_settings, settings)
|
||||
|
||||
def test_bad_spider_modules_warning(self):
|
||||
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
module = 'tests.test_spiderloader.test_spiders.doesnotexist'
|
||||
settings = Settings({'SPIDER_MODULES': [module]})
|
||||
settings = Settings({'SPIDER_MODULES': [module],
|
||||
'SPIDER_LOADER_WARN_ONLY': True})
|
||||
spider_loader = SpiderLoader.from_settings(settings)
|
||||
self.assertIn("Could not load spiders from module", str(w[0].message))
|
||||
|
||||
spiders = spider_loader.list()
|
||||
self.assertEqual(spiders, [])
|
||||
|
||||
|
||||
class DuplicateSpiderNameLoaderTest(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
|
Loading…
x
Reference in New Issue
Block a user