1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-23 17:03:40 +00:00

removed scrapy-admin.py command, and left only scrapy-ctl as the only scrapy command

This commit is contained in:
Pablo Hoffman 2009-08-24 15:43:36 -03:00
parent 39540b188a
commit 8a074c9cb5
27 changed files with 165 additions and 158 deletions

View File

@ -1,75 +0,0 @@
#!/usr/bin/env python
"""Scrapy admin script is used to create new scrapy projects and similar
tasks"""
import os
import string
from optparse import OptionParser
import re
import scrapy
from scrapy.utils.template import render_templatefile, string_camelcase
from scrapy.utils.python import ignore_patterns, copytree
usage = """
scrapy-admin.py [options] [command]
Available commands:
startproject <project_name>
Starts a new project with name 'project_name'
"""
PROJECT_TEMPLATES_PATH = os.path.join(scrapy.__path__[0], 'templates/project')
# This is the list of templatefile's path that are rendered *after copying* to
# project directory.
TEMPLATES = (
'scrapy-ctl.py',
'${project_name}/settings.py.tmpl',
'${project_name}/items.py.tmpl',
'${project_name}/pipelines.py.tmpl',
)
IGNORE = ignore_patterns('*.pyc', '.svn')
def main():
parser = OptionParser(usage=usage)
opts, args = parser.parse_args()
if not args:
parser.print_help()
return
cmd = args[0]
if cmd == "startproject":
if len(args) >= 2:
project_name = args[1]
if not re.search(r'^[_a-zA-Z]\w*$', project_name): # If it's not a valid directory name.
# Provide a smart error message, depending on the error.
if not re.search(r'^[_a-zA-Z]', project_name):
message = 'make sure the project_name begins with a letter or underscore'
else:
message = 'use only numbers, letters and underscores'
print "scrapy-admin.py: %r is not a valid project name. Please %s." % (project_name, message)
else:
project_root_path = project_name
roottpl = os.path.join(PROJECT_TEMPLATES_PATH, 'root')
copytree(roottpl, project_name, ignore=IGNORE)
moduletpl = os.path.join(PROJECT_TEMPLATES_PATH, 'module')
copytree(moduletpl, '%s/%s' % (project_name, project_name),
ignore=IGNORE)
for path in TEMPLATES:
tplfile = os.path.join(project_root_path,
string.Template(path).substitute(project_name=project_name))
render_templatefile(tplfile, project_name=project_name,
ProjectName=string_camelcase(project_name))
else:
print "scrapy-admin.py: missing project name"
else:
print "scrapy-admin.py: unknown command: %s" % cmd
if __name__ == '__main__':
main()

4
bin/scrapy-ctl.py Executable file
View File

@ -0,0 +1,4 @@
#!/usr/bin/env python
from scrapy.command.cmdline import execute
execute()

View File

@ -4,35 +4,14 @@
Management scripts
==================
Scrapy is controlled by two commmandline scripts:
1. :ref:`topics-scripts-scrapy-admin`: used to create Scrapy projects.
2. :ref:`topics-scripts-scrapy-ctl`: located in every project's root dir, used
to manage each project.
.. _topics-scripts-scrapy-admin:
scrapy-admin.py
===============
Usage: ``scrapy-admin.py <subcommand>``
This script should be in your system path.
Available subcommands
---------------------
startproject
~~~~~~~~~~~~
Usage: ``startproject <project_name>``
Starts a new project with name ``project_name``
Scrapy is controlled by the ``scrapy-ctl.py`` command.
.. _topics-scripts-scrapy-ctl:
scrapy-ctl.py
=============
Usage: ``scrapy-admin.py <subcommand>``
Usage: ``scrapy-ctl.py <command>``
This script is located in every project's root folder.
@ -154,3 +133,9 @@ start
~~~~~
Start the Scrapy manager but don't run any spider (idle mode)
startproject
~~~~~~~~~~~~
Usage: ``startproject <project_name>``
Starts a new project with name ``project_name``

View File

@ -210,20 +210,20 @@ Panel`_)::
set PYTHONPATH=C:\path\to\scrapy-trunk
Make the scrapy-admin.py script available
-----------------------------------------
Make the scrapy-ctl.py script available
---------------------------------------
On Unix-like systems, create a symbolic link to the file
``scrapy-trunk/scrapy/bin/scrapy-admin.py`` in a directory on your system path,
``scrapy-trunk/scrapy/bin/scrapy-ctl.py`` in a directory on your system path,
such as ``/usr/local/bin``. For example::
ln -s `pwd`/scrapy-trunk/scrapy/bin/scrapy-admin.py /usr/local/bin
ln -s `pwd`/scrapy-trunk/scrapy/bin/scrapy-ctl.py /usr/local/bin
This simply lets you type scrapy-admin.py from within any directory, rather
This simply lets you type ``scrapy-ctl.py`` from within any directory, rather
than having to qualify the command with the full path to the file.
On Windows systems, the same result can be achieved by copying the file
``scrapy-trunk/scrapy/bin/scrapy-admin.py`` to somewhere on your system path,
``scrapy-trunk/scrapy/bin/scrapy-ctl.py`` to somewhere on your system path,
for example ``C:\Python25\Scripts``, which is customary for Python scripts.
.. _Control Panel: http://www.microsoft.com/resources/documentation/windows/xp/all/proddocs/en-us/sysdm_advancd_environmnt_addchange_variable.mspx

View File

@ -36,7 +36,7 @@ Creating a project
Before start scraping, you will have set up a new Scrapy project. Enter a
directory where you'd like to store your code and then run::
scrapy-admin.py startproject dmoz
python scrapy-ctl.py startproject dmoz
This will create a ``dmoz`` directory with the following contents::

View File

@ -704,7 +704,7 @@ PROJECT_NAME
Default: ``Not Defined``
The name of the current project. It matches the project module name as created
by :ref:`scrapy-admin.py startproject <topics-scripts-scrapy-admin>` command,
by :ref:`scrapy-ctl.py startproject <topics-scripts-scrapy-ctl-startproject>` command,
and is only defined by project settings file.
.. setting:: REDIRECT_MAX_TIMES
@ -953,7 +953,7 @@ TEMPLATES_DIR
Default: ``templates`` dir inside scrapy module
The directory where to look for template when creating new projects with
:ref:`scrapy-admin.py startproject <topics-scripts-scrapy-admin>` command.
:ref:`scrapy-ctl.py startproject <topics-scripts-scrapy-ctl>` command.
.. setting:: URLLENGTH_LIMIT

View File

@ -9,7 +9,7 @@ import scrapy
from scrapy import log
from scrapy.spider import spiders
from scrapy.xlib import lsprofcalltree
from scrapy.conf import settings, SETTINGS_MODULE
from scrapy.conf import settings
# This dict holds information about the executed command for later use
command_executed = {}
@ -45,12 +45,12 @@ def get_command_name(argv):
if not arg.startswith('-'):
return arg
def usage(prog):
def usage():
s = "Usage\n"
s += "=====\n"
s += "%s <command> [options] [args]\n" % prog
s += "scrapy-ctl.py <command> [options] [args]\n"
s += " Run a command\n\n"
s += "%s <command> -h\n" % prog
s += "scrapy-ctl.py <command> -h\n"
s += " Print command help and options\n\n"
s += "Available commands\n"
s += "===================\n"
@ -72,14 +72,10 @@ def update_default_settings(module, cmdname):
if not k.startswith("_"):
settings.defaults[k] = v
def execute():
if not settings.settings_module:
print "Scrapy %s\n" % scrapy.__version__
print "Error: Cannot find %r module in python path" % SETTINGS_MODULE
sys.exit(1)
execute_with_args(sys.argv)
def execute(argv=None):
if argv is None:
argv = sys.argv
def execute_with_args(argv):
cmds = get_commands_dict()
cmdname = get_command_name(argv)
@ -88,7 +84,7 @@ def execute_with_args(argv):
if not cmdname:
print "Scrapy %s\n" % scrapy.__version__
print usage(argv[0])
print usage()
sys.exit(2)
parser = optparse.OptionParser(formatter=optparse.TitledHelpFormatter(), \
@ -119,6 +115,11 @@ def execute_with_args(argv):
command_executed['opts'] = opts.__dict__.copy()
cmd.process_options(args, opts)
if cmd.requires_project and not settings.settings_module:
print "Error running: scrapy-ctl.py %s\n" % cmdname
print "Cannot find project settings module in python path: %s" % \
settings.settings_module_path
sys.exit(1)
spiders.load()
log.start()
ret = run_command(cmd, args, opts)

View File

@ -8,6 +8,9 @@ from scrapy import log
class Command(ScrapyCommand):
requires_project = True
def syntax(self):
return "[options] <domain|url> ..."

View File

@ -4,6 +4,9 @@ from scrapy.command import ScrapyCommand
from scrapy.fetcher import fetch
class Command(ScrapyCommand):
requires_project = False
def syntax(self):
return "[options] <url>"

View File

@ -24,6 +24,8 @@ def sanitize_module_name(module_name):
class Command(ScrapyCommand):
requires_project = True
def syntax(self):
return "[options] <spider_module_name> <spider_domain_name>"

View File

@ -1,6 +1,9 @@
from scrapy.command import ScrapyCommand, cmdline
class Command(ScrapyCommand):
requires_project = False
def syntax(self):
return "<command>"

View File

@ -2,6 +2,9 @@ from scrapy.command import ScrapyCommand
from scrapy.spider import spiders
class Command(ScrapyCommand):
requires_project = True
def short_desc(self):
return "List available spiders"

View File

@ -7,6 +7,9 @@ from scrapy.utils import display
from scrapy import log
class Command(ScrapyCommand):
requires_project = True
def syntax(self):
return "[options] <url>"

View File

@ -21,6 +21,9 @@ from scrapy.http import Request
from scrapy.fetcher import get_or_create_spider
class Command(ScrapyCommand):
requires_project = False
def syntax(self):
return "[url]"

View File

@ -2,6 +2,9 @@ from scrapy.command import ScrapyCommand
from scrapy.core.manager import scrapymanager
class Command(ScrapyCommand):
requires_project = True
def short_desc(self):
return "Start the Scrapy manager but don't run any spider (idle mode)"

View File

@ -0,0 +1,62 @@
#!/usr/bin/env python
import os
import string
import re
import scrapy
from scrapy.command import ScrapyCommand
from scrapy.utils.template import render_templatefile, string_camelcase
from scrapy.utils.python import ignore_patterns, copytree
PROJECT_TEMPLATES_PATH = os.path.join(scrapy.__path__[0], 'templates/project')
# This is the list of templatefile's path that are rendered *after copying* to
# the new project directory.
TEMPLATES = (
('scrapy-ctl.py',),
('${project_name}', 'settings.py.tmpl'),
('${project_name}', 'items.py.tmpl'),
('${project_name}', 'pipelines.py.tmpl'),
)
IGNORE = ignore_patterns('*.pyc', '.svn')
class Command(ScrapyCommand):
requires_project = False
def syntax(self):
return "<project_name>"
def short_desc(self):
return "Create new project with an initial project template"
def run(self, args, opts):
if len(args) != 1:
return False
project_name = args[0]
if not re.search(r'^[_a-zA-Z]\w*$', project_name): # If it's not a valid directory name.
# Provide a smart error message, depending on the error.
if not re.search(r'^[a-zA-Z]', project_name):
message = 'Project names must begin with a letter'
else:
message = 'Project names must contain only letters, numbers and underscores'
print "Invalid project name: %s\n\n%s" % (project_name, message)
else:
project_root_path = project_name
roottpl = os.path.join(PROJECT_TEMPLATES_PATH, 'root')
copytree(roottpl, project_name, ignore=IGNORE)
moduletpl = os.path.join(PROJECT_TEMPLATES_PATH, 'module')
copytree(moduletpl, '%s/%s' % (project_name, project_name),
ignore=IGNORE)
for paths in TEMPLATES:
path = os.path.join(*paths)
tplfile = os.path.join(project_root_path,
string.Template(path).substitute(project_name=project_name))
render_templatefile(tplfile, project_name=project_name,
ProjectName=string_camelcase(project_name))

View File

@ -11,6 +11,9 @@ from optparse import OptionGroup
from scrapy.conf import settings
class ScrapyCommand(object):
requires_project = False
def syntax(self):
"""
Command syntax (preferably one-line). Do not include command name.
@ -61,12 +64,17 @@ class ScrapyCommand(object):
help="write lsprof profiling stats to FILE")
group.add_option("--pidfile", dest="pidfile", metavar="FILE", \
help="write process ID to FILE")
group.add_option("--set", dest="settings", action="append", \
group.add_option("--set", dest="set", action="append", \
metavar="SETTING=VALUE", default=[], \
help="set/override setting (may be repeated)")
group.add_option("--settings", dest="settings", metavar="MODULE",
help="python path to the Scrapy project settings")
parser.add_option_group(group)
def process_options(self, args, opts):
if opts.settings:
settings.set_settings_module(opts.settings)
if opts.logfile:
settings.overrides['LOG_ENABLED'] = True
settings.overrides['LOG_FILE'] = opts.logfile
@ -90,7 +98,7 @@ class ScrapyCommand(object):
with open(opts.pidfile, "w") as f:
f.write(str(os.getpid()))
for setting in opts.settings:
for setting in opts.set:
if '=' in setting:
name, val = setting.split('=', 1)
settings.overrides[name] = val

View File

@ -7,7 +7,6 @@ See documentation in docs/topics/settings.rst
import os
import cPickle as pickle
SETTINGS_MODULE = os.environ.get('SCRAPYSETTINGS_MODULE', 'scrapy_settings')
SETTINGS_DISABLED = os.environ.get('SCRAPY_SETTINGS_DISABLED', False)
class Settings(object):
@ -19,17 +18,11 @@ class Settings(object):
global_defaults = None
def __init__(self):
self.set_settings_module()
pickled_settings = os.environ.get("SCRAPY_PICKLED_SETTINGS_TO_OVERRIDE")
self.overrides = pickle.loads(pickled_settings) if pickled_settings else {}
self.settings_module = self._import(SETTINGS_MODULE)
self.defaults = {}
self.global_defaults = self._import('scrapy.conf.default_settings')
def _import(self, modulepath):
try:
return __import__(modulepath, {}, {}, [''])
except ImportError:
pass
self.global_defaults = __import__('scrapy.conf.default_settings', {}, {}, [''])
def __getitem__(self, opt_name):
if not SETTINGS_DISABLED:
@ -43,6 +36,16 @@ class Settings(object):
return self.defaults[opt_name]
return getattr(self.global_defaults, opt_name, None)
def set_settings_module(self, settings_module_path=None):
if settings_module_path is None:
settings_module_path = os.environ.get('SCRAPYSETTINGS_MODULE', \
'scrapy_settings')
self.settings_module_path = settings_module_path
try:
self.settings_module = __import__(settings_module_path, {}, {}, [''])
except ImportError:
self.settings_module = None
def get(self, name, default=None):
return self[name] if self[name] is not None else default
@ -68,4 +71,7 @@ class Settings(object):
else:
return str(value).split(',')
def __str__(self):
return "<Settings %r>" % self.settings_module_path
settings = Settings()

View File

@ -0,0 +1 @@
LOG_ENABLED = False

View File

@ -19,8 +19,8 @@ class SpiderManager(object):
def __init__(self):
self.loaded = False
self.default_domain = None
self.spider_modules = settings.getlist('SPIDER_MODULES')
self.force_domain = None
self.spider_modules = None
def fromdomain(self, domain_name):
return self.asdict().get(domain_name)
@ -56,7 +56,10 @@ class SpiderManager(object):
if not self.loaded:
self.load()
def load(self):
def load(self, spider_modules=None):
if spider_modules is None:
spider_modules = settings.getlist('SPIDER_MODULES')
self.spider_modules = spider_modules
self._invaliddict = {}
self._spiders = {}
@ -75,7 +78,7 @@ class SpiderManager(object):
# we can't use the log module here because it may not be available yet
print "WARNING: Could not load spider %s: %s" % (spider, e)
def reload(self, skip_domains=None):
def reload(self, spider_modules=None, skip_domains=None):
"""Reload spiders by trying to discover any spiders added under the
spiders module/packages, removes any spiders removed.
@ -91,7 +94,7 @@ class SpiderManager(object):
if not domain in skip_domains:
reload(sys.modules[spider.__module__])
reloaded += 1
self.load() # second call to update spider instances
self.load(spider_modules=spider_modules) # second call to update spider instances
log.msg("Reloaded %d/%d scrapy spiders" % (reloaded, len(pdict)), level=log.DEBUG)
def _getspiders(self, interface, package):

View File

@ -2,17 +2,15 @@ from __future__ import with_statement
from unittest import TestCase
from scrapy.spider import spiders
from scrapy.http import Response, Request
from scrapy.spider import BaseSpider
from scrapy.contrib.downloadermiddleware.cookies import CookiesMiddleware
class CookiesMiddlewareTest(TestCase):
def setUp(self):
spiders.spider_modules = ['scrapy.tests.test_spiders']
spiders.reload()
self.spider = spiders.fromdomain('scrapytest.org')
self.spider = BaseSpider()
self.mw = CookiesMiddleware()
def tearDown(self):

View File

@ -3,7 +3,7 @@ from __future__ import with_statement
from unittest import TestCase
from os.path import join, abspath, dirname
from scrapy.spider import spiders
from scrapy.spider import BaseSpider
from scrapy.http import Response, Request
from scrapy.contrib.downloadermiddleware.httpcompression import HttpCompressionMiddleware
from scrapy.tests import tests_datadir
@ -20,9 +20,7 @@ FORMAT = {
class HttpCompressionTest(TestCase):
def setUp(self):
spiders.spider_modules = ['scrapy.tests.test_spiders']
spiders.reload()
self.spider = spiders.fromdomain('scrapytest.org')
self.spider = BaseSpider()
self.mw = HttpCompressionMiddleware()
def _getresponse(self, coding):

View File

@ -1,16 +1,14 @@
import unittest
from scrapy.contrib.downloadermiddleware.redirect import RedirectMiddleware
from scrapy.spider import spiders
from scrapy.spider import BaseSpider
from scrapy.core.exceptions import IgnoreRequest
from scrapy.http import Request, Response, Headers
class RedirectMiddlewareTest(unittest.TestCase):
def setUp(self):
spiders.spider_modules = ['scrapy.tests.test_spiders']
spiders.reload()
self.spider = spiders.fromdomain('scrapytest.org')
self.spider = BaseSpider()
self.mw = RedirectMiddleware()
def test_priority_adjust(self):

View File

@ -5,14 +5,12 @@ from twisted.internet.error import TimeoutError as ServerTimeoutError, DNSLookup
ConnectionLost
from scrapy.contrib.downloadermiddleware.retry import RetryMiddleware
from scrapy.spider import spiders
from scrapy.spider import BaseSpider
from scrapy.http import Request, Response
class RetryTest(unittest.TestCase):
def setUp(self):
spiders.spider_modules = ['scrapy.tests.test_spiders']
spiders.reload()
self.spider = spiders.fromdomain('scrapytest.org')
self.spider = BaseSpider()
self.mw = RetryMiddleware()
self.mw.max_retry_times = 2

View File

@ -1,6 +1,6 @@
from unittest import TestCase
from scrapy.spider import spiders
from scrapy.spider import BaseSpider
from scrapy.http import Request
from scrapy.contrib.downloadermiddleware.useragent import UserAgentMiddleware
from scrapy.conf import settings
@ -9,9 +9,7 @@ from scrapy.conf import settings
class UserAgentMiddlewareTest(TestCase):
def setUp(self):
spiders.spider_modules = ['scrapy.tests.test_spiders']
spiders.reload()
self.spider = spiders.fromdomain('scrapytest.org')
self.spider = BaseSpider()
self.mw = UserAgentMiddleware()
def tearDown(self):

View File

@ -45,8 +45,7 @@ class CrawlingSession(object):
self.portno = self.port.getHost().port
from scrapy.spider import spiders
spiders.spider_modules = ['scrapy.tests.test_spiders']
spiders.reload()
spiders.load(['scrapy.tests.test_spiders'])
self.spider = spiders.fromdomain(self.domain)
if self.spider:

View File

@ -101,5 +101,5 @@ setup(
packages = packages,
cmdclass = cmdclasses,
data_files = data_files,
scripts = ['bin/scrapy-admin.py'],
scripts = ['bin/scrapy-ctl.py'],
)