1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-25 07:24:09 +00:00

Some additional improvements to scrapy.command.cmdline logic:

- calling scrapymanager.configure() for all commands
- finally added some unittests to check cmdline behaviour!
This commit is contained in:
Pablo Hoffman 2009-08-31 18:53:55 -03:00
parent 6c58d06f0f
commit 3c6deaeffc
7 changed files with 99 additions and 32 deletions

View File

@ -15,24 +15,24 @@ from scrapy.command.models import ScrapyCommand
# This dict holds information about the executed command for later use
command_executed = {}
def save_command_executed(cmdname, cmd, args, opts):
def _save_command_executed(cmdname, cmd, args, opts):
"""Save command executed info for later reference"""
command_executed['name'] = cmdname
command_executed['class'] = cmd
command_executed['args'] = args[:]
command_executed['opts'] = opts.__dict__.copy()
def find_commands(dir):
def _find_commands(dir):
try:
return [f[:-3] for f in os.listdir(dir) if not f.startswith('_') and \
f.endswith('.py')]
except OSError:
return []
def get_commands_from_module(module):
def _get_commands_from_module(module):
d = {}
mod = __import__(module, {}, {}, [''])
for cmdname in find_commands(mod.__path__[0]):
for cmdname in _find_commands(mod.__path__[0]):
modname = '%s.%s' % (module, cmdname)
command = getattr(__import__(modname, {}, {}, [cmdname]), 'Command', None)
if callable(command):
@ -41,19 +41,19 @@ def get_commands_from_module(module):
print 'WARNING: Module %r does not define a Command class' % modname
return d
def get_commands_dict():
cmds = get_commands_from_module('scrapy.command.commands')
def _get_commands_dict():
cmds = _get_commands_from_module('scrapy.command.commands')
cmds_module = settings['COMMANDS_MODULE']
if cmds_module:
cmds.update(get_commands_from_module(cmds_module))
cmds.update(_get_commands_from_module(cmds_module))
return cmds
def get_command_name(argv):
def _get_command_name(argv):
for arg in argv[1:]:
if not arg.startswith('-'):
return arg
def print_usage(inside_project):
def _print_usage(inside_project):
if inside_project:
print "Scrapy %s - project: %s\n" % (scrapy.__version__, \
settings['BOT_NAME'])
@ -67,14 +67,14 @@ def print_usage(inside_project):
print " scrapy-ctl.py <command> -h\n"
print "Available commands"
print "==================\n"
cmds = get_commands_dict()
cmds = _get_commands_dict()
for cmdname, cmdclass in sorted(cmds.iteritems()):
if inside_project or not cmdclass.requires_project:
print "%s %s" % (cmdname, cmdclass.syntax())
print " %s" % cmdclass.short_desc()
print
def update_default_settings(module, cmdname):
def _update_default_settings(module, cmdname):
if not module:
return
try:
@ -90,11 +90,11 @@ def execute(argv=None):
if argv is None:
argv = sys.argv
cmds = get_commands_dict()
cmds = _get_commands_dict()
cmdname = get_command_name(argv)
update_default_settings('scrapy.conf.commands', cmdname)
update_default_settings(settings['COMMANDS_SETTINGS_MODULE'], cmdname)
cmdname = _get_command_name(argv)
_update_default_settings('scrapy.conf.commands', cmdname)
_update_default_settings(settings['COMMANDS_SETTINGS_MODULE'], cmdname)
parser = optparse.OptionParser(formatter=optparse.TitledHelpFormatter(), \
conflict_handler='resolve', add_help_option=False)
@ -119,7 +119,7 @@ def execute(argv=None):
cmd.add_options(parser)
opts, args = parser.parse_args(args=argv)
cmd.process_options(args, opts)
print_usage(settings.settings_module)
_print_usage(settings.settings_module)
sys.exit(2)
else:
print "Unknown command: %s\n" % cmdname
@ -127,14 +127,14 @@ def execute(argv=None):
sys.exit(2)
del args[0] # remove command name from args
save_command_executed(cmdname, cmd, args, opts)
spiders.load()
log.start()
ret = run_command(cmd, args, opts)
_save_command_executed(cmdname, cmd, args, opts)
from scrapy.core.manager import scrapymanager
scrapymanager.configure()
ret = _run_command(cmd, args, opts)
if ret is False:
parser.print_help()
def run_command(cmd, args, opts):
def _run_command(cmd, args, opts):
if opts.profile or opts.lsprof:
if opts.profile:
log.msg("writing cProfile stats to %r" % opts.profile)

View File

@ -67,8 +67,7 @@ class ScrapyCommand(object):
help="write lsprof profiling stats to FILE")
group.add_option("--pidfile", dest="pidfile", metavar="FILE", \
help="write process ID to FILE")
group.add_option("--set", dest="set", action="append", \
metavar="SETTING=VALUE", default=[], \
group.add_option("--set", dest="set", action="append", default=[], \
help="set/override setting (may be repeated)")
group.add_option("--settings", dest="settings", metavar="MODULE",
help="python path to the Scrapy project settings")
@ -78,6 +77,15 @@ class ScrapyCommand(object):
if opts.settings:
settings.set_settings_module(opts.settings)
for setting in opts.set:
if '=' in setting:
name, val = setting.split('=', 1)
settings.overrides[name] = val
else:
sys.stderr.write("%s: invalid argument --set %s - proper format " \
"is --set SETTING=VALUE'\n" % (sys.argv[0], setting))
sys.exit(2)
if opts.version:
print scrapy.__version__
sys.exit()
@ -105,14 +113,6 @@ class ScrapyCommand(object):
with open(opts.pidfile, "w") as f:
f.write(str(os.getpid()))
for setting in opts.set:
if '=' in setting:
name, val = setting.split('=', 1)
settings.overrides[name] = val
else:
sys.stderr.write("%s: invalid argument --set %s - proper format is --set SETTING=VALUE'\n" % (sys.argv[0], setting))
sys.exit(2)
def run(self, args, opts):
"""
Entry point for running commands

View File

@ -40,6 +40,7 @@ class ExecutionManager(object):
level=log.DEBUG)
scrapyengine.configure()
self.configured = True
def crawl(self, *args):
"""Schedule the given args for crawling. args is a list of urls or domains"""
@ -53,7 +54,8 @@ class ExecutionManager(object):
def runonce(self, *args):
"""Run the engine until it finishes scraping all domains and then exit"""
self.configure()
if not self.configured:
self.configure()
self.crawl(*args)
scrapyengine.start()

View File

@ -0,0 +1,45 @@
import sys
import os
from subprocess import Popen, PIPE
import unittest
class CmdlineTest(unittest.TestCase):
def setUp(self):
self.env = os.environ.copy()
self.env.pop('SCRAPY_SETTINGS_DISABLED', None)
self.env['SCRAPY_SETTINGS_MODULE'] = 'scrapy.tests.test_cmdline.settings'
def _execute(self, *new_args, **kwargs):
args = (sys.executable, '-m', 'scrapy.command.cmdline') + new_args
proc = Popen(args, stdout=PIPE, stderr=PIPE, env=self.env, **kwargs)
comm = proc.communicate()
return comm[0].strip()
def test_default_settings(self):
self.assertEqual(self._execute('settings', '--get', 'TEST1', '--init'), \
'default')
self.assertEqual(self._execute('settings', '--get', 'TEST1'), \
'default + loaded + started')
def test_override_settings_using_settings_arg(self):
self.assertEqual(self._execute('settings', '--get', 'TEST1', '--init', \
'--settings', 'scrapy.tests.test_cmdline.settings2'), \
'override')
self.assertEqual(self._execute('settings', '--get', 'TEST1', \
'--settings', 'scrapy.tests.test_cmdline.settings2'), \
'override + loaded + started')
def test_override_settings_using_set_arg(self):
self.assertEqual(self._execute('settings', '--get', 'TEST1', '--init', '--set', 'TEST1=override'), \
'override')
self.assertEqual(self._execute('settings', '--get', 'TEST1', '--set', 'TEST1=override'), \
'override + loaded + started')
def test_override_settings_using_envvar(self):
self.env['SCRAPY_TEST1'] = 'override'
self.assertEqual(self._execute('settings', '--get', 'TEST1', '--init'), \
'override')
self.assertEqual(self._execute('settings', '--get', 'TEST1'), \
'override + loaded + started')

View File

@ -0,0 +1,10 @@
"""A test extension used to check the settings loading order"""
from scrapy.conf import settings
settings.overrides['TEST1'] = "%s + %s" % (settings['TEST1'], 'loaded')
class TestExtension(object):
def __init__(self):
settings.overrides['TEST1'] = "%s + %s" % (settings['TEST1'], 'started')

View File

@ -0,0 +1,5 @@
EXTENSIONS = [
'scrapy.tests.test_cmdline.extensions.TestExtension'
]
TEST1 = 'default'

View File

@ -0,0 +1,5 @@
EXTENSIONS = [
'scrapy.tests.test_cmdline.extensions.TestExtension'
]
TEST1 = 'override'