From 048044c1f8536b1e4d04712bba179053855294a8 Mon Sep 17 00:00:00 2001
From: Pablo Hoffman <pablo@pablohoffman.com>
Date: Wed, 5 Jan 2011 15:59:43 -0200
Subject: [PATCH] A couple of changes to fix #303:

* improved detection of inside-project environments
* make list command faster (by only instantiating the spider manger)
* print a warning when extensions (middlewares, etc) are disabled with a message on NotConfigured exception
* assert that scrapy configuration hasn't been loaded in scrapyd.runner
* simplified IgnoreRequest exception, to avoid loading settings when importing scrapy.exceptions
* added test to make sure certain modules don't cause scrapy.conf module to be
  loaded, to ensure the scrapyd runner bootstraping performs properly
---
 scrapy/cmdline.py                        |  3 ++-
 scrapy/commands/list.py                  |  8 +++++++-
 scrapy/core/engine.py                    |  4 +---
 scrapy/exceptions.py                     |  9 ---------
 scrapy/middleware.py                     |  3 ++-
 scrapy/utils/project.py                  | 21 +++++++++++++++------
 scrapyd/runner.py                        |  4 ++++
 scrapyd/tests/test_dont_load_settings.py | 22 ++++++++++++++++++++++
 scrapyd/tests/test_utils.py              |  2 +-
 9 files changed, 54 insertions(+), 22 deletions(-)
 create mode 100644 scrapyd/tests/test_dont_load_settings.py

diff --git a/scrapy/cmdline.py b/scrapy/cmdline.py
index 6884aad88..e381bf537 100644
--- a/scrapy/cmdline.py
+++ b/scrapy/cmdline.py
@@ -13,6 +13,7 @@ from scrapy.conf import settings
 from scrapy.command import ScrapyCommand
 from scrapy.exceptions import UsageError
 from scrapy.utils.misc import walk_modules
+from scrapy.utils.project import inside_project
 
 def _iter_command_classes(module_name):
     # TODO: add `name` attribute to commands and and merge this function with
@@ -106,7 +107,7 @@ def execute(argv=None):
         argv = sys.argv
     crawler = CrawlerProcess(settings)
     crawler.install()
-    inproject = bool(settings.settings_module)
+    inproject = inside_project()
     _check_deprecated_scrapy_ctl(argv, inproject) # TODO: remove for Scrapy 0.11
     cmds = _get_commands_dict(inproject)
     cmdname = _pop_command_name(argv)
diff --git a/scrapy/commands/list.py b/scrapy/commands/list.py
index 29b57edfd..319265658 100644
--- a/scrapy/commands/list.py
+++ b/scrapy/commands/list.py
@@ -1,4 +1,8 @@
+import os
+
 from scrapy.command import ScrapyCommand
+from scrapy.utils.misc import load_object
+from scrapy.conf import settings
 
 class Command(ScrapyCommand):
 
@@ -9,4 +13,6 @@ class Command(ScrapyCommand):
         return "List available spiders"
 
     def run(self, args, opts):
-        print "\n".join(self.crawler.spiders.list())
+        spman_cls = load_object(settings['SPIDER_MANAGER_CLASS'])
+        spiders = spman_cls.from_settings(settings)
+        print os.linesep.join(spiders.list())
diff --git a/scrapy/core/engine.py b/scrapy/core/engine.py
index 25d5dcc36..abd98f2d5 100644
--- a/scrapy/core/engine.py
+++ b/scrapy/core/engine.py
@@ -166,13 +166,11 @@ class ExecutionEngine(object):
             exc = _failure.value
             if isinstance(exc, IgnoreRequest):
                 errmsg = _failure.getErrorMessage()
-                level = exc.level
             else:
                 errmsg = str(_failure)
-                level = log.ERROR
             if errmsg:
                 log.msg("Error downloading <%s>: %s" % (request.url, errmsg), \
-                    level=level, spider=spider)
+                    level=log.ERROR, spider=spider)
             return Failure(IgnoreRequest(str(exc)))
 
         def _on_complete(_):
diff --git a/scrapy/exceptions.py b/scrapy/exceptions.py
index 3294dc4da..e52dafc6d 100644
--- a/scrapy/exceptions.py
+++ b/scrapy/exceptions.py
@@ -5,8 +5,6 @@ These exceptions are documented in docs/topics/exceptions.rst. Please don't add
 new exceptions here without documenting them there.
 """
 
-from scrapy import log
-
 # Internal
 
 class NotConfigured(Exception):
@@ -18,13 +16,6 @@ class NotConfigured(Exception):
 class IgnoreRequest(Exception):
     """Indicates a decision was made not to process a request"""
 
-    def __init__(self, msg='', level=log.ERROR):
-        self.msg = msg
-        self.level = level
-
-    def __str__(self):
-        return self.msg
-
 class DontCloseSpider(Exception):
     """Request the spider not to be closed yet"""
     pass
diff --git a/scrapy/middleware.py b/scrapy/middleware.py
index 818dcdb87..c83b0d44f 100644
--- a/scrapy/middleware.py
+++ b/scrapy/middleware.py
@@ -34,7 +34,8 @@ class MiddlewareManager(object):
                 middlewares.append(mw)
             except NotConfigured, e:
                 if e.args:
-                    log.msg(e)
+                    clsname = clspath.split('.')[-1]
+                    log.msg("Disabled %s: %s" % (clsname, e.args[0]), log.WARNING)
         enabled = [x.__class__.__name__ for x in middlewares]
         log.msg("Enabled %ss: %s" % (cls.component_name, ", ".join(enabled)), \
             level=log.DEBUG)
diff --git a/scrapy/utils/project.py b/scrapy/utils/project.py
index 15af61a97..7fe778868 100644
--- a/scrapy/utils/project.py
+++ b/scrapy/utils/project.py
@@ -4,6 +4,7 @@ import warnings
 
 from scrapy.utils.conf import closest_scrapy_cfg, get_config
 from scrapy.utils.python import is_writable
+from scrapy.exceptions import NotConfigured
 
 DATADIR_CFG_SECTION = 'datadir'
 
@@ -20,12 +21,16 @@ def inside_project():
 
 def project_data_dir(project='default'):
     """Return the current project data dir, creating it if it doesn't exist"""
-    assert inside_project(), "Not inside project"
-    scrapy_cfg = closest_scrapy_cfg()
-    d = abspath(join(dirname(scrapy_cfg), '.scrapy'))
+    if not inside_project():
+        raise NotConfigured("Not inside a project")
     cfg = get_config()
     if cfg.has_option(DATADIR_CFG_SECTION, project):
         d = cfg.get(DATADIR_CFG_SECTION, project)
+    else:
+        scrapy_cfg = closest_scrapy_cfg()
+        if not scrapy_cfg:
+            raise NotConfigured("Unable to find scrapy.cfg file to infer project data dir")
+        d = abspath(join(dirname(scrapy_cfg), '.scrapy'))
     if not exists(d):
         makedirs(d)
     return d
@@ -47,8 +52,12 @@ def sqlite_db(path, nonwritable_fallback=True):
     if not inside_project() or path == ':memory:':
         db = ':memory:'
     else:
-        db = data_path(path)
-        if not is_writable(db) and nonwritable_fallback:
-            warnings.warn("%r is not writable - using in-memory SQLite instead" % db)
+        try:
+            db = data_path(path)
+        except NotConfigured:
             db = ':memory:'
+        else:
+            if not is_writable(db) and nonwritable_fallback:
+                warnings.warn("%r is not writable - using in-memory SQLite instead" % db)
+                db = ':memory:'
     return db
diff --git a/scrapyd/runner.py b/scrapyd/runner.py
index 8f47a339b..c0acd0fea 100644
--- a/scrapyd/runner.py
+++ b/scrapyd/runner.py
@@ -1,3 +1,6 @@
+from __future__ import with_statement
+
+import sys
 import os
 import shutil
 import tempfile
@@ -22,6 +25,7 @@ def project_environment(project):
     else:
         eggpath = None
     try:
+        assert 'scrapy.conf' not in sys.modules, "Scrapy settings already loaded"
         yield
     finally:
         if eggpath:
diff --git a/scrapyd/tests/test_dont_load_settings.py b/scrapyd/tests/test_dont_load_settings.py
new file mode 100644
index 000000000..cefa987db
--- /dev/null
+++ b/scrapyd/tests/test_dont_load_settings.py
@@ -0,0 +1,22 @@
+import sys
+import unittest
+
+class SettingsSafeModulesTest(unittest.TestCase):
+
+    # these modules must not load scrapy.conf
+    SETTINGS_SAFE_MODULES = [
+        'scrapy.utils.project',
+        'scrapy.utils.conf',
+        'scrapyd.interfaces',
+        'scrapyd.eggutils',
+    ]
+
+    def test_modules_that_shouldnt_load_settings(self):
+        sys.modules.pop('scrapy.conf', None)
+        for m in self.SETTINGS_SAFE_MODULES:
+            __import__(m)
+            assert 'scrapy.conf' not in sys.modules, \
+                "Module %r must not cause the scrapy.conf module to be loaded" % m
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/scrapyd/tests/test_utils.py b/scrapyd/tests/test_utils.py
index e5b53beb0..624b82473 100644
--- a/scrapyd/tests/test_utils.py
+++ b/scrapyd/tests/test_utils.py
@@ -25,7 +25,7 @@ class UtilsTest(unittest.TestCase):
 class GetSpiderListTest(unittest.TestCase):
 
     def test_get_spider_list(self):
-        path = self.mktemp()
+        path = os.path.abspath(self.mktemp())
         j = os.path.join
         eggs_dir = j(path, 'eggs')
         os.makedirs(eggs_dir)