diff --git a/docs/topics/settings.rst b/docs/topics/settings.rst index ccdd02c4e..569b71518 100644 --- a/docs/topics/settings.rst +++ b/docs/topics/settings.rst @@ -1180,6 +1180,29 @@ Default: ``'scrapy.spiderloader.SpiderLoader'`` The class that will be used for loading spiders, which must implement the :ref:`topics-api-spiderloader`. +.. setting:: SPIDER_LOADER_WARN_ONLY + +SPIDER_LOADER_WARN_ONLY +----------------------- + +.. versionadded:: 1.4 + +Default: ``False`` + +By default, when scrapy tries to import spider classes from :setting:`SPIDER_MODULES`, +it will fail loudly if there is any ``ImportError`` exception. +But you can choose to silence this exception and turn it into a simple +warning by setting ``SPIDER_LOADER_WARN_ONLY = True``. + +.. note:: + Some :ref:`scrapy commands ` run with this setting to ``True`` + already (i.e. they will only issue a warning and will not fail) + since they do not actually need to load spider classes to work: + :command:`scrapy runspider `, + :command:`scrapy settings `, + :command:`scrapy startproject `, + :command:`scrapy version `. + .. setting:: SPIDER_MIDDLEWARES SPIDER_MIDDLEWARES diff --git a/scrapy/commands/runspider.py b/scrapy/commands/runspider.py index 1da09e4da..a98033dd1 100644 --- a/scrapy/commands/runspider.py +++ b/scrapy/commands/runspider.py @@ -28,6 +28,7 @@ def _import_file(filepath): class Command(ScrapyCommand): requires_project = False + default_settings = {'SPIDER_LOADER_WARN_ONLY': True} def syntax(self): return "[options] " diff --git a/scrapy/commands/settings.py b/scrapy/commands/settings.py index bce4e6086..bee52f06a 100644 --- a/scrapy/commands/settings.py +++ b/scrapy/commands/settings.py @@ -7,7 +7,8 @@ from scrapy.settings import BaseSettings class Command(ScrapyCommand): requires_project = False - default_settings = {'LOG_ENABLED': False} + default_settings = {'LOG_ENABLED': False, + 'SPIDER_LOADER_WARN_ONLY': True} def syntax(self): return "[options]" diff --git a/scrapy/commands/startproject.py b/scrapy/commands/startproject.py index 594106632..c17aaf442 100644 --- a/scrapy/commands/startproject.py +++ b/scrapy/commands/startproject.py @@ -26,7 +26,8 @@ IGNORE = ignore_patterns('*.pyc', '.svn') class Command(ScrapyCommand): requires_project = False - default_settings = {'LOG_ENABLED': False} + default_settings = {'LOG_ENABLED': False, + 'SPIDER_LOADER_WARN_ONLY': True} def syntax(self): return " [project_dir]" @@ -118,4 +119,4 @@ class Command(ScrapyCommand): _templates_base_dir = self.settings['TEMPLATES_DIR'] or \ join(scrapy.__path__[0], 'templates') return join(_templates_base_dir, 'project') - + diff --git a/scrapy/commands/version.py b/scrapy/commands/version.py index a9954edb0..e22f98f5a 100644 --- a/scrapy/commands/version.py +++ b/scrapy/commands/version.py @@ -11,7 +11,8 @@ from scrapy.commands import ScrapyCommand class Command(ScrapyCommand): - default_settings = {'LOG_ENABLED': False} + default_settings = {'LOG_ENABLED': False, + 'SPIDER_LOADER_WARN_ONLY': True} def syntax(self): return "[-v]" diff --git a/scrapy/settings/default_settings.py b/scrapy/settings/default_settings.py index d73c595d2..854cefc9c 100644 --- a/scrapy/settings/default_settings.py +++ b/scrapy/settings/default_settings.py @@ -250,6 +250,7 @@ SCHEDULER_MEMORY_QUEUE = 'scrapy.squeues.LifoMemoryQueue' SCHEDULER_PRIORITY_QUEUE = 'queuelib.PriorityQueue' SPIDER_LOADER_CLASS = 'scrapy.spiderloader.SpiderLoader' +SPIDER_LOADER_WARN_ONLY = False SPIDER_MIDDLEWARES = {} diff --git a/scrapy/spiderloader.py b/scrapy/spiderloader.py index 486a4637e..7478faa78 100644 --- a/scrapy/spiderloader.py +++ b/scrapy/spiderloader.py @@ -19,6 +19,7 @@ class SpiderLoader(object): """ def __init__(self, settings): self.spider_modules = settings.getlist('SPIDER_MODULES') + self.warn_only = settings.getbool('SPIDER_LOADER_WARN_ONLY') self._spiders = {} self._found = defaultdict(list) self._load_all_spiders() @@ -46,10 +47,13 @@ class SpiderLoader(object): for module in walk_modules(name): self._load_spiders(module) except ImportError as e: - msg = ("\n{tb}Could not load spiders from module '{modname}'. " - "Check SPIDER_MODULES setting".format( - modname=name, tb=traceback.format_exc())) - warnings.warn(msg, RuntimeWarning) + if self.warn_only: + msg = ("\n{tb}Could not load spiders from module '{modname}'. " + "See above traceback for details.".format( + modname=name, tb=traceback.format_exc())) + warnings.warn(msg, RuntimeWarning) + else: + raise self._check_name_duplicates() @classmethod diff --git a/tests/test_spiderloader/__init__.py b/tests/test_spiderloader/__init__.py index 673a2d302..1cd59b99a 100644 --- a/tests/test_spiderloader/__init__.py +++ b/tests/test_spiderloader/__init__.py @@ -91,18 +91,24 @@ class SpiderLoaderTest(unittest.TestCase): self.assertTrue(issubclass(crawler.spidercls, scrapy.Spider)) self.assertEqual(crawler.spidercls.name, 'spider1') + def test_bad_spider_modules_exception(self): + + module = 'tests.test_spiderloader.test_spiders.doesnotexist' + settings = Settings({'SPIDER_MODULES': [module]}) + self.assertRaises(ImportError, SpiderLoader.from_settings, settings) + def test_bad_spider_modules_warning(self): with warnings.catch_warnings(record=True) as w: module = 'tests.test_spiderloader.test_spiders.doesnotexist' - settings = Settings({'SPIDER_MODULES': [module]}) + settings = Settings({'SPIDER_MODULES': [module], + 'SPIDER_LOADER_WARN_ONLY': True}) spider_loader = SpiderLoader.from_settings(settings) self.assertIn("Could not load spiders from module", str(w[0].message)) spiders = spider_loader.list() self.assertEqual(spiders, []) - class DuplicateSpiderNameLoaderTest(unittest.TestCase): def setUp(self):