removed scrapy-admin.py command, and left only scrapy-ctl as the only scrapy command

2025-02-23 17:03:40 +00:00 · 2009-08-24 15:43:36 -03:00 · 2009-08-24 15:43:36 -03:00 · 8a074c9cb5
commit 8a074c9cb5
parent 39540b188a
27 changed files with 165 additions and 158 deletions
--- a/bin/scrapy-admin.py
+++ b/bin/scrapy-admin.py
@ -1,75 +0,0 @@
-#!/usr/bin/env python
-"""Scrapy admin script is used to create new scrapy projects and similar
-tasks"""
-import os
-import string
-from optparse import OptionParser
-import re
-
-import scrapy
-from scrapy.utils.template import render_templatefile, string_camelcase
-from scrapy.utils.python import ignore_patterns, copytree
-
-usage = """
-scrapy-admin.py [options] [command]
- 
-Available commands:
-     
-    startproject <project_name>
-      Starts a new project with name 'project_name'
-"""
-
-PROJECT_TEMPLATES_PATH = os.path.join(scrapy.__path__[0], 'templates/project')
-
-# This is the list of templatefile's path that are rendered *after copying* to
-# project directory.
-TEMPLATES = (
-        'scrapy-ctl.py',
-        '${project_name}/settings.py.tmpl',
-        '${project_name}/items.py.tmpl',
-        '${project_name}/pipelines.py.tmpl',
-        )
-
-IGNORE = ignore_patterns('*.pyc', '.svn')
-
-def main():
-    parser = OptionParser(usage=usage)
-    opts, args = parser.parse_args()
-
-    if not args:
-        parser.print_help()
-        return
-
-    cmd = args[0]
-    if cmd == "startproject":
-        if len(args) >= 2:
-            project_name = args[1]
-            if not re.search(r'^[_a-zA-Z]\w*$', project_name): # If it's not a valid directory name.
-                # Provide a smart error message, depending on the error.
-                if not re.search(r'^[_a-zA-Z]', project_name):
-                    message = 'make sure the project_name begins with a letter or underscore'
-                else:
-                    message = 'use only numbers, letters and underscores'
-                print "scrapy-admin.py: %r is not a valid project name. Please %s." % (project_name, message)
-            else:
-                project_root_path = project_name
-
-                roottpl = os.path.join(PROJECT_TEMPLATES_PATH, 'root')
-                copytree(roottpl, project_name, ignore=IGNORE)
-
-                moduletpl = os.path.join(PROJECT_TEMPLATES_PATH, 'module')
-                copytree(moduletpl, '%s/%s' % (project_name, project_name),
-                         ignore=IGNORE)
-
-                for path in TEMPLATES:
-                    tplfile = os.path.join(project_root_path,
-                            string.Template(path).substitute(project_name=project_name))
-                    render_templatefile(tplfile, project_name=project_name,
-                            ProjectName=string_camelcase(project_name))
-        else:
-            print "scrapy-admin.py: missing project name"
-    else:
-        print "scrapy-admin.py: unknown command: %s" % cmd
-
-if __name__ == '__main__':
-    main()
--- a/bin/scrapy-ctl.py
+++ b/bin/scrapy-ctl.py
@ -0,0 +1,4 @@
+#!/usr/bin/env python
+
+from scrapy.command.cmdline import execute
+execute()
--- a/docs/experimental/scripts.rst
+++ b/docs/experimental/scripts.rst
@ -4,35 +4,14 @@
 Management scripts
 ==================

-Scrapy is controlled by two commmandline scripts:
-
-1. :ref:`topics-scripts-scrapy-admin`: used to create Scrapy projects.  
-2. :ref:`topics-scripts-scrapy-ctl`: located in every project's root dir, used
-   to manage each project.
-
-.. _topics-scripts-scrapy-admin:
-
-scrapy-admin.py
-===============
-Usage: ``scrapy-admin.py <subcommand>``
-
-This script should be in your system path.
-
-Available subcommands
---------------------
-
-startproject
-~~~~~~~~~~~~
-Usage: ``startproject <project_name>``
-
-Starts a new project with name ``project_name``
-
+Scrapy is controlled by the ``scrapy-ctl.py`` command.

 .. _topics-scripts-scrapy-ctl:

 scrapy-ctl.py
 =============
-Usage: ``scrapy-admin.py <subcommand>``
+
+Usage: ``scrapy-ctl.py <command>``

 This script is located in every project's root folder.

@ -154,3 +133,9 @@ start
 ~~~~~
 Start the Scrapy manager but don't run any spider (idle mode)

+startproject
+~~~~~~~~~~~~
+Usage: ``startproject <project_name>``
+
+Starts a new project with name ``project_name``
+
--- a/docs/intro/install.rst
+++ b/docs/intro/install.rst
@ -210,20 +210,20 @@ Panel`_)::

    set PYTHONPATH=C:\path\to\scrapy-trunk

-Make the scrapy-admin.py script available
-----------------------------------------
+Make the scrapy-ctl.py script available
+---------------------------------------

 On Unix-like systems, create a symbolic link to the file
-``scrapy-trunk/scrapy/bin/scrapy-admin.py`` in a directory on your system path,
+``scrapy-trunk/scrapy/bin/scrapy-ctl.py`` in a directory on your system path,
 such as ``/usr/local/bin``. For example::

-    ln -s `pwd`/scrapy-trunk/scrapy/bin/scrapy-admin.py /usr/local/bin
+    ln -s `pwd`/scrapy-trunk/scrapy/bin/scrapy-ctl.py /usr/local/bin

-This simply lets you type scrapy-admin.py from within any directory, rather
+This simply lets you type ``scrapy-ctl.py`` from within any directory, rather
 than having to qualify the command with the full path to the file.

 On Windows systems, the same result can be achieved by copying the file
-``scrapy-trunk/scrapy/bin/scrapy-admin.py`` to somewhere on your system path,
+``scrapy-trunk/scrapy/bin/scrapy-ctl.py`` to somewhere on your system path,
 for example ``C:\Python25\Scripts``, which is customary for Python scripts.

 .. _Control Panel: http://www.microsoft.com/resources/documentation/windows/xp/all/proddocs/en-us/sysdm_advancd_environmnt_addchange_variable.mspx
--- a/docs/intro/tutorial.rst
+++ b/docs/intro/tutorial.rst
@ -36,7 +36,7 @@ Creating a project
 Before start scraping, you will have set up a new Scrapy project. Enter a
 directory where you'd like to store your code and then run::

-   scrapy-admin.py startproject dmoz
+   python scrapy-ctl.py startproject dmoz

 This will create a ``dmoz`` directory with the following contents::

--- a/docs/topics/settings.rst
+++ b/docs/topics/settings.rst
@ -704,7 +704,7 @@ PROJECT_NAME
 Default: ``Not Defined``

 The name of the current project. It matches the project module name as created
-by :ref:`scrapy-admin.py startproject <topics-scripts-scrapy-admin>` command,
+by :ref:`scrapy-ctl.py startproject <topics-scripts-scrapy-ctl-startproject>` command,
 and is only defined by project settings file.

 .. setting:: REDIRECT_MAX_TIMES
@ -953,7 +953,7 @@ TEMPLATES_DIR
 Default: ``templates`` dir inside scrapy module

 The directory where to look for template when creating new projects with
-:ref:`scrapy-admin.py startproject <topics-scripts-scrapy-admin>` command.
+:ref:`scrapy-ctl.py startproject <topics-scripts-scrapy-ctl>` command.

 .. setting:: URLLENGTH_LIMIT

--- a/scrapy/command/cmdline.py
+++ b/scrapy/command/cmdline.py
@ -9,7 +9,7 @@ import scrapy
 from scrapy import log
 from scrapy.spider import spiders
 from scrapy.xlib import lsprofcalltree
-from scrapy.conf import settings, SETTINGS_MODULE
+from scrapy.conf import settings

 # This dict holds information about the executed command for later use
 command_executed = {}
@ -45,12 +45,12 @@ def get_command_name(argv):
        if not arg.startswith('-'):
            return arg

-def usage(prog):
+def usage():
    s = "Usage\n"
    s += "=====\n"
-    s += "%s <command> [options] [args]\n" % prog
+    s += "scrapy-ctl.py <command> [options] [args]\n"
    s += "  Run a command\n\n"
-    s += "%s <command> -h\n" % prog
+    s += "scrapy-ctl.py <command> -h\n"
    s += "  Print command help and options\n\n"
    s += "Available commands\n"
    s += "===================\n"
@ -72,14 +72,10 @@ def update_default_settings(module, cmdname):
        if not k.startswith("_"):
            settings.defaults[k] = v

-def execute():
-    if not settings.settings_module:
-        print "Scrapy %s\n" % scrapy.__version__
-        print "Error: Cannot find %r module in python path" % SETTINGS_MODULE
-        sys.exit(1)
-    execute_with_args(sys.argv)
+def execute(argv=None):
+    if argv is None:
+        argv = sys.argv

-def execute_with_args(argv):
    cmds = get_commands_dict()

    cmdname = get_command_name(argv)
@ -88,7 +84,7 @@ def execute_with_args(argv):

    if not cmdname:
        print "Scrapy %s\n" % scrapy.__version__
-        print usage(argv[0])
+        print usage()
        sys.exit(2)

    parser = optparse.OptionParser(formatter=optparse.TitledHelpFormatter(), \
@ -119,6 +115,11 @@ def execute_with_args(argv):
    command_executed['opts'] = opts.__dict__.copy()

    cmd.process_options(args, opts)
+    if cmd.requires_project and not settings.settings_module:
+        print "Error running: scrapy-ctl.py %s\n" % cmdname
+        print "Cannot find project settings module in python path: %s" % \
+            settings.settings_module_path
+        sys.exit(1)
    spiders.load()
    log.start()
    ret = run_command(cmd, args, opts)
--- a/scrapy/command/commands/crawl.py
+++ b/scrapy/command/commands/crawl.py
@ -8,6 +8,9 @@ from scrapy import log


 class Command(ScrapyCommand):
+
+    requires_project = True
+
    def syntax(self):
        return "[options] <domain|url> ..."

--- a/scrapy/command/commands/fetch.py
+++ b/scrapy/command/commands/fetch.py
@ -4,6 +4,9 @@ from scrapy.command import ScrapyCommand
 from scrapy.fetcher import fetch

 class Command(ScrapyCommand):
+
+    requires_project = False
+
    def syntax(self):
        return "[options] <url>"

--- a/scrapy/command/commands/genspider.py
+++ b/scrapy/command/commands/genspider.py
@ -24,6 +24,8 @@ def sanitize_module_name(module_name):

 class Command(ScrapyCommand):

+    requires_project = True
+
    def syntax(self):
        return "[options] <spider_module_name> <spider_domain_name>"

--- a/scrapy/command/commands/help.py
+++ b/scrapy/command/commands/help.py
@ -1,6 +1,9 @@
 from scrapy.command import ScrapyCommand, cmdline

 class Command(ScrapyCommand):
+
+    requires_project = False
+
    def syntax(self):
        return "<command>"
    
--- a/scrapy/command/commands/list.py
+++ b/scrapy/command/commands/list.py
@ -2,6 +2,9 @@ from scrapy.command import ScrapyCommand
 from scrapy.spider import spiders

 class Command(ScrapyCommand):
+
+    requires_project = True
+
    def short_desc(self):
        return "List available spiders"

--- a/scrapy/command/commands/parse.py
+++ b/scrapy/command/commands/parse.py
@ -7,6 +7,9 @@ from scrapy.utils import display
 from scrapy import log

 class Command(ScrapyCommand):
+
+    requires_project = True
+
    def syntax(self):
        return "[options] <url>"

--- a/scrapy/command/commands/shell.py
+++ b/scrapy/command/commands/shell.py
@ -21,6 +21,9 @@ from scrapy.http import Request
 from scrapy.fetcher import get_or_create_spider

 class Command(ScrapyCommand):
+
+    requires_project = False
+
    def syntax(self):
        return "[url]"

--- a/scrapy/command/commands/start.py
+++ b/scrapy/command/commands/start.py
@ -2,6 +2,9 @@ from scrapy.command import ScrapyCommand
 from scrapy.core.manager import scrapymanager

 class Command(ScrapyCommand):
+
+    requires_project = True
+
    def short_desc(self):
        return "Start the Scrapy manager but don't run any spider (idle mode)"

--- a/scrapy/command/commands/startproject.py
+++ b/scrapy/command/commands/startproject.py
@ -0,0 +1,62 @@
+#!/usr/bin/env python
+
+import os
+import string
+import re
+
+import scrapy
+from scrapy.command import ScrapyCommand
+from scrapy.utils.template import render_templatefile, string_camelcase
+from scrapy.utils.python import ignore_patterns, copytree
+
+PROJECT_TEMPLATES_PATH = os.path.join(scrapy.__path__[0], 'templates/project')
+
+# This is the list of templatefile's path that are rendered *after copying* to
+# the new project directory.
+TEMPLATES = (
+    ('scrapy-ctl.py',),
+    ('${project_name}', 'settings.py.tmpl'),
+    ('${project_name}', 'items.py.tmpl'),
+    ('${project_name}', 'pipelines.py.tmpl'),
+)
+
+IGNORE = ignore_patterns('*.pyc', '.svn')
+
+class Command(ScrapyCommand):
+
+    requires_project = False
+
+    def syntax(self):
+        return "<project_name>"
+
+    def short_desc(self):
+        return "Create new project with an initial project template"
+
+    def run(self, args, opts):
+        if len(args) != 1:
+            return False
+
+        project_name = args[0]
+        if not re.search(r'^[_a-zA-Z]\w*$', project_name): # If it's not a valid directory name.
+            # Provide a smart error message, depending on the error.
+            if not re.search(r'^[a-zA-Z]', project_name):
+                message = 'Project names must begin with a letter'
+            else:
+                message = 'Project names must contain only letters, numbers and underscores'
+            print "Invalid project name: %s\n\n%s" % (project_name, message)
+        else:
+            project_root_path = project_name
+
+            roottpl = os.path.join(PROJECT_TEMPLATES_PATH, 'root')
+            copytree(roottpl, project_name, ignore=IGNORE)
+
+            moduletpl = os.path.join(PROJECT_TEMPLATES_PATH, 'module')
+            copytree(moduletpl, '%s/%s' % (project_name, project_name),
+                ignore=IGNORE)
+
+            for paths in TEMPLATES:
+                path = os.path.join(*paths)
+                tplfile = os.path.join(project_root_path,
+                    string.Template(path).substitute(project_name=project_name))
+                render_templatefile(tplfile, project_name=project_name,
+                    ProjectName=string_camelcase(project_name))
--- a/scrapy/command/models.py
+++ b/scrapy/command/models.py
@ -11,6 +11,9 @@ from optparse import OptionGroup
 from scrapy.conf import settings

 class ScrapyCommand(object):
+
+    requires_project = False
+
    def syntax(self):
        """
        Command syntax (preferably one-line). Do not include command name.
@ -61,12 +64,17 @@ class ScrapyCommand(object):
            help="write lsprof profiling stats to FILE")
        group.add_option("--pidfile", dest="pidfile", metavar="FILE", \
            help="write process ID to FILE")
-        group.add_option("--set", dest="settings", action="append", \
+        group.add_option("--set", dest="set", action="append", \
            metavar="SETTING=VALUE", default=[], \
            help="set/override setting (may be repeated)")
+        group.add_option("--settings", dest="settings", metavar="MODULE",
+            help="python path to the Scrapy project settings")
        parser.add_option_group(group)
        
    def process_options(self, args, opts):
+        if opts.settings:
+            settings.set_settings_module(opts.settings)
+
        if opts.logfile:
            settings.overrides['LOG_ENABLED'] = True
            settings.overrides['LOG_FILE'] = opts.logfile
@ -90,7 +98,7 @@ class ScrapyCommand(object):
            with open(opts.pidfile, "w") as f:
                f.write(str(os.getpid()))

-        for setting in opts.settings:
+        for setting in opts.set:
            if '=' in setting:
                name, val = setting.split('=', 1)
                settings.overrides[name] = val
--- a/scrapy/conf/init.py
+++ b/scrapy/conf/init.py
@ -7,7 +7,6 @@ See documentation in docs/topics/settings.rst
 import os
 import cPickle as pickle

-SETTINGS_MODULE = os.environ.get('SCRAPYSETTINGS_MODULE', 'scrapy_settings')
 SETTINGS_DISABLED = os.environ.get('SCRAPY_SETTINGS_DISABLED', False)

 class Settings(object):
@ -19,17 +18,11 @@ class Settings(object):
    global_defaults = None

    def __init__(self):
+        self.set_settings_module()
        pickled_settings = os.environ.get("SCRAPY_PICKLED_SETTINGS_TO_OVERRIDE")
        self.overrides = pickle.loads(pickled_settings) if pickled_settings else {}
-        self.settings_module = self._import(SETTINGS_MODULE)
        self.defaults = {}
-        self.global_defaults = self._import('scrapy.conf.default_settings')
-
-    def _import(self, modulepath):
-        try:
-            return __import__(modulepath, {}, {}, [''])
-        except ImportError:
-            pass
+        self.global_defaults = __import__('scrapy.conf.default_settings', {}, {}, [''])

    def __getitem__(self, opt_name):
        if not SETTINGS_DISABLED:
@ -43,6 +36,16 @@ class Settings(object):
                return self.defaults[opt_name]
        return getattr(self.global_defaults, opt_name, None)

+    def set_settings_module(self, settings_module_path=None):
+        if settings_module_path is None:
+            settings_module_path = os.environ.get('SCRAPYSETTINGS_MODULE', \
+                'scrapy_settings')
+        self.settings_module_path = settings_module_path
+        try:
+            self.settings_module = __import__(settings_module_path, {}, {}, [''])
+        except ImportError:
+            self.settings_module = None
+
    def get(self, name, default=None):
        return self[name] if self[name] is not None else default

@ -68,4 +71,7 @@ class Settings(object):
        else:
            return str(value).split(',')

+    def __str__(self):
+        return "<Settings %r>" % self.settings_module_path
+
 settings = Settings()
--- a/scrapy/conf/commands/startproject.py
+++ b/scrapy/conf/commands/startproject.py
@ -0,0 +1 @@
+LOG_ENABLED = False
--- a/scrapy/spider/manager.py
+++ b/scrapy/spider/manager.py
@ -19,8 +19,8 @@ class SpiderManager(object):
    def __init__(self):
        self.loaded = False
        self.default_domain = None
-        self.spider_modules = settings.getlist('SPIDER_MODULES')
        self.force_domain = None
+        self.spider_modules = None

    def fromdomain(self, domain_name):
        return self.asdict().get(domain_name)
@ -56,7 +56,10 @@ class SpiderManager(object):
        if not self.loaded:
            self.load()

-    def load(self):
+    def load(self, spider_modules=None):
+        if spider_modules is None:
+            spider_modules = settings.getlist('SPIDER_MODULES')
+        self.spider_modules = spider_modules
        self._invaliddict = {}
        self._spiders = {}

@ -75,7 +78,7 @@ class SpiderManager(object):
            # we can't use the log module here because it may not be available yet
            print "WARNING: Could not load spider %s: %s" % (spider, e)

-    def reload(self, skip_domains=None):
+    def reload(self, spider_modules=None, skip_domains=None):
        """Reload spiders by trying to discover any spiders added under the
        spiders module/packages, removes any spiders removed.

@ -91,7 +94,7 @@ class SpiderManager(object):
            if not domain in skip_domains:
                reload(sys.modules[spider.__module__])
                reloaded += 1
-        self.load()  # second call to update spider instances
+        self.load(spider_modules=spider_modules)  # second call to update spider instances
        log.msg("Reloaded %d/%d scrapy spiders" % (reloaded, len(pdict)), level=log.DEBUG)

    def _getspiders(self, interface, package):
--- a/scrapy/tests/test_downloadermiddleware_cookies.py
+++ b/scrapy/tests/test_downloadermiddleware_cookies.py
@ -2,17 +2,15 @@ from __future__ import with_statement

 from unittest import TestCase

-from scrapy.spider import spiders
 from scrapy.http import Response, Request
+from scrapy.spider import BaseSpider
 from scrapy.contrib.downloadermiddleware.cookies import CookiesMiddleware


 class CookiesMiddlewareTest(TestCase):

    def setUp(self):
-        spiders.spider_modules = ['scrapy.tests.test_spiders']
-        spiders.reload()
-        self.spider = spiders.fromdomain('scrapytest.org')
+        self.spider = BaseSpider()
        self.mw = CookiesMiddleware()

    def tearDown(self):
--- a/scrapy/tests/test_downloadermiddleware_httpcompression.py
+++ b/scrapy/tests/test_downloadermiddleware_httpcompression.py
@ -3,7 +3,7 @@ from __future__ import with_statement
 from unittest import TestCase
 from os.path import join, abspath, dirname

-from scrapy.spider import spiders
+from scrapy.spider import BaseSpider
 from scrapy.http import Response, Request
 from scrapy.contrib.downloadermiddleware.httpcompression import HttpCompressionMiddleware
 from scrapy.tests import tests_datadir
@ -20,9 +20,7 @@ FORMAT = {
 class HttpCompressionTest(TestCase):

    def setUp(self):
-        spiders.spider_modules = ['scrapy.tests.test_spiders']
-        spiders.reload()
-        self.spider = spiders.fromdomain('scrapytest.org')
+        self.spider = BaseSpider()
        self.mw = HttpCompressionMiddleware()

    def _getresponse(self, coding):
--- a/scrapy/tests/test_downloadermiddleware_redirect.py
+++ b/scrapy/tests/test_downloadermiddleware_redirect.py
@ -1,16 +1,14 @@
 import unittest

 from scrapy.contrib.downloadermiddleware.redirect import RedirectMiddleware
-from scrapy.spider import spiders
+from scrapy.spider import BaseSpider
 from scrapy.core.exceptions import IgnoreRequest
 from scrapy.http import Request, Response, Headers

 class RedirectMiddlewareTest(unittest.TestCase):

    def setUp(self):
-        spiders.spider_modules = ['scrapy.tests.test_spiders']
-        spiders.reload()
-        self.spider = spiders.fromdomain('scrapytest.org')
+        self.spider = BaseSpider()
        self.mw = RedirectMiddleware()

    def test_priority_adjust(self):
--- a/scrapy/tests/test_downloadermiddleware_retry.py
+++ b/scrapy/tests/test_downloadermiddleware_retry.py
@ -5,14 +5,12 @@ from twisted.internet.error import TimeoutError as ServerTimeoutError, DNSLookup
                                   ConnectionLost

 from scrapy.contrib.downloadermiddleware.retry import RetryMiddleware
-from scrapy.spider import spiders
+from scrapy.spider import BaseSpider
 from scrapy.http import Request, Response

 class RetryTest(unittest.TestCase):
    def setUp(self):
-        spiders.spider_modules = ['scrapy.tests.test_spiders']
-        spiders.reload()
-        self.spider = spiders.fromdomain('scrapytest.org')
+        self.spider = BaseSpider()
        self.mw = RetryMiddleware()
        self.mw.max_retry_times = 2

--- a/scrapy/tests/test_downloadermiddleware_useragent.py
+++ b/scrapy/tests/test_downloadermiddleware_useragent.py
@ -1,6 +1,6 @@
 from unittest import TestCase

-from scrapy.spider import spiders
+from scrapy.spider import BaseSpider
 from scrapy.http import Request
 from scrapy.contrib.downloadermiddleware.useragent import UserAgentMiddleware
 from scrapy.conf import settings
@ -9,9 +9,7 @@ from scrapy.conf import settings
 class UserAgentMiddlewareTest(TestCase):

    def setUp(self):
-        spiders.spider_modules = ['scrapy.tests.test_spiders']
-        spiders.reload()
-        self.spider = spiders.fromdomain('scrapytest.org')
+        self.spider = BaseSpider()
        self.mw = UserAgentMiddleware()

    def tearDown(self):
--- a/scrapy/tests/test_engine.py
+++ b/scrapy/tests/test_engine.py
@ -45,8 +45,7 @@ class CrawlingSession(object):
        self.portno = self.port.getHost().port

        from scrapy.spider import spiders
-        spiders.spider_modules = ['scrapy.tests.test_spiders']
-        spiders.reload()
+        spiders.load(['scrapy.tests.test_spiders'])

        self.spider = spiders.fromdomain(self.domain)
        if self.spider:
--- a/setup.py
+++ b/setup.py
@ -101,5 +101,5 @@ setup(
    packages = packages,
    cmdclass = cmdclasses,
    data_files = data_files,
-    scripts = ['bin/scrapy-admin.py'],
+    scripts = ['bin/scrapy-ctl.py'],
 )