mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-25 07:24:09 +00:00
Some additional improvements to scrapy.command.cmdline logic:
- calling scrapymanager.configure() for all commands - finally added some unittests to check cmdline behaviour!
This commit is contained in:
parent
6c58d06f0f
commit
3c6deaeffc
@ -15,24 +15,24 @@ from scrapy.command.models import ScrapyCommand
|
||||
# This dict holds information about the executed command for later use
|
||||
command_executed = {}
|
||||
|
||||
def save_command_executed(cmdname, cmd, args, opts):
|
||||
def _save_command_executed(cmdname, cmd, args, opts):
|
||||
"""Save command executed info for later reference"""
|
||||
command_executed['name'] = cmdname
|
||||
command_executed['class'] = cmd
|
||||
command_executed['args'] = args[:]
|
||||
command_executed['opts'] = opts.__dict__.copy()
|
||||
|
||||
def find_commands(dir):
|
||||
def _find_commands(dir):
|
||||
try:
|
||||
return [f[:-3] for f in os.listdir(dir) if not f.startswith('_') and \
|
||||
f.endswith('.py')]
|
||||
except OSError:
|
||||
return []
|
||||
|
||||
def get_commands_from_module(module):
|
||||
def _get_commands_from_module(module):
|
||||
d = {}
|
||||
mod = __import__(module, {}, {}, [''])
|
||||
for cmdname in find_commands(mod.__path__[0]):
|
||||
for cmdname in _find_commands(mod.__path__[0]):
|
||||
modname = '%s.%s' % (module, cmdname)
|
||||
command = getattr(__import__(modname, {}, {}, [cmdname]), 'Command', None)
|
||||
if callable(command):
|
||||
@ -41,19 +41,19 @@ def get_commands_from_module(module):
|
||||
print 'WARNING: Module %r does not define a Command class' % modname
|
||||
return d
|
||||
|
||||
def get_commands_dict():
|
||||
cmds = get_commands_from_module('scrapy.command.commands')
|
||||
def _get_commands_dict():
|
||||
cmds = _get_commands_from_module('scrapy.command.commands')
|
||||
cmds_module = settings['COMMANDS_MODULE']
|
||||
if cmds_module:
|
||||
cmds.update(get_commands_from_module(cmds_module))
|
||||
cmds.update(_get_commands_from_module(cmds_module))
|
||||
return cmds
|
||||
|
||||
def get_command_name(argv):
|
||||
def _get_command_name(argv):
|
||||
for arg in argv[1:]:
|
||||
if not arg.startswith('-'):
|
||||
return arg
|
||||
|
||||
def print_usage(inside_project):
|
||||
def _print_usage(inside_project):
|
||||
if inside_project:
|
||||
print "Scrapy %s - project: %s\n" % (scrapy.__version__, \
|
||||
settings['BOT_NAME'])
|
||||
@ -67,14 +67,14 @@ def print_usage(inside_project):
|
||||
print " scrapy-ctl.py <command> -h\n"
|
||||
print "Available commands"
|
||||
print "==================\n"
|
||||
cmds = get_commands_dict()
|
||||
cmds = _get_commands_dict()
|
||||
for cmdname, cmdclass in sorted(cmds.iteritems()):
|
||||
if inside_project or not cmdclass.requires_project:
|
||||
print "%s %s" % (cmdname, cmdclass.syntax())
|
||||
print " %s" % cmdclass.short_desc()
|
||||
print
|
||||
|
||||
def update_default_settings(module, cmdname):
|
||||
def _update_default_settings(module, cmdname):
|
||||
if not module:
|
||||
return
|
||||
try:
|
||||
@ -90,11 +90,11 @@ def execute(argv=None):
|
||||
if argv is None:
|
||||
argv = sys.argv
|
||||
|
||||
cmds = get_commands_dict()
|
||||
cmds = _get_commands_dict()
|
||||
|
||||
cmdname = get_command_name(argv)
|
||||
update_default_settings('scrapy.conf.commands', cmdname)
|
||||
update_default_settings(settings['COMMANDS_SETTINGS_MODULE'], cmdname)
|
||||
cmdname = _get_command_name(argv)
|
||||
_update_default_settings('scrapy.conf.commands', cmdname)
|
||||
_update_default_settings(settings['COMMANDS_SETTINGS_MODULE'], cmdname)
|
||||
|
||||
parser = optparse.OptionParser(formatter=optparse.TitledHelpFormatter(), \
|
||||
conflict_handler='resolve', add_help_option=False)
|
||||
@ -119,7 +119,7 @@ def execute(argv=None):
|
||||
cmd.add_options(parser)
|
||||
opts, args = parser.parse_args(args=argv)
|
||||
cmd.process_options(args, opts)
|
||||
print_usage(settings.settings_module)
|
||||
_print_usage(settings.settings_module)
|
||||
sys.exit(2)
|
||||
else:
|
||||
print "Unknown command: %s\n" % cmdname
|
||||
@ -127,14 +127,14 @@ def execute(argv=None):
|
||||
sys.exit(2)
|
||||
|
||||
del args[0] # remove command name from args
|
||||
save_command_executed(cmdname, cmd, args, opts)
|
||||
spiders.load()
|
||||
log.start()
|
||||
ret = run_command(cmd, args, opts)
|
||||
_save_command_executed(cmdname, cmd, args, opts)
|
||||
from scrapy.core.manager import scrapymanager
|
||||
scrapymanager.configure()
|
||||
ret = _run_command(cmd, args, opts)
|
||||
if ret is False:
|
||||
parser.print_help()
|
||||
|
||||
def run_command(cmd, args, opts):
|
||||
def _run_command(cmd, args, opts):
|
||||
if opts.profile or opts.lsprof:
|
||||
if opts.profile:
|
||||
log.msg("writing cProfile stats to %r" % opts.profile)
|
||||
|
@ -67,8 +67,7 @@ class ScrapyCommand(object):
|
||||
help="write lsprof profiling stats to FILE")
|
||||
group.add_option("--pidfile", dest="pidfile", metavar="FILE", \
|
||||
help="write process ID to FILE")
|
||||
group.add_option("--set", dest="set", action="append", \
|
||||
metavar="SETTING=VALUE", default=[], \
|
||||
group.add_option("--set", dest="set", action="append", default=[], \
|
||||
help="set/override setting (may be repeated)")
|
||||
group.add_option("--settings", dest="settings", metavar="MODULE",
|
||||
help="python path to the Scrapy project settings")
|
||||
@ -78,6 +77,15 @@ class ScrapyCommand(object):
|
||||
if opts.settings:
|
||||
settings.set_settings_module(opts.settings)
|
||||
|
||||
for setting in opts.set:
|
||||
if '=' in setting:
|
||||
name, val = setting.split('=', 1)
|
||||
settings.overrides[name] = val
|
||||
else:
|
||||
sys.stderr.write("%s: invalid argument --set %s - proper format " \
|
||||
"is --set SETTING=VALUE'\n" % (sys.argv[0], setting))
|
||||
sys.exit(2)
|
||||
|
||||
if opts.version:
|
||||
print scrapy.__version__
|
||||
sys.exit()
|
||||
@ -105,14 +113,6 @@ class ScrapyCommand(object):
|
||||
with open(opts.pidfile, "w") as f:
|
||||
f.write(str(os.getpid()))
|
||||
|
||||
for setting in opts.set:
|
||||
if '=' in setting:
|
||||
name, val = setting.split('=', 1)
|
||||
settings.overrides[name] = val
|
||||
else:
|
||||
sys.stderr.write("%s: invalid argument --set %s - proper format is --set SETTING=VALUE'\n" % (sys.argv[0], setting))
|
||||
sys.exit(2)
|
||||
|
||||
def run(self, args, opts):
|
||||
"""
|
||||
Entry point for running commands
|
||||
|
@ -40,6 +40,7 @@ class ExecutionManager(object):
|
||||
level=log.DEBUG)
|
||||
|
||||
scrapyengine.configure()
|
||||
self.configured = True
|
||||
|
||||
def crawl(self, *args):
|
||||
"""Schedule the given args for crawling. args is a list of urls or domains"""
|
||||
@ -53,7 +54,8 @@ class ExecutionManager(object):
|
||||
|
||||
def runonce(self, *args):
|
||||
"""Run the engine until it finishes scraping all domains and then exit"""
|
||||
self.configure()
|
||||
if not self.configured:
|
||||
self.configure()
|
||||
self.crawl(*args)
|
||||
scrapyengine.start()
|
||||
|
||||
|
45
scrapy/tests/test_cmdline/__init__.py
Normal file
45
scrapy/tests/test_cmdline/__init__.py
Normal file
@ -0,0 +1,45 @@
|
||||
import sys
|
||||
import os
|
||||
from subprocess import Popen, PIPE
|
||||
import unittest
|
||||
|
||||
class CmdlineTest(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.env = os.environ.copy()
|
||||
self.env.pop('SCRAPY_SETTINGS_DISABLED', None)
|
||||
self.env['SCRAPY_SETTINGS_MODULE'] = 'scrapy.tests.test_cmdline.settings'
|
||||
|
||||
def _execute(self, *new_args, **kwargs):
|
||||
args = (sys.executable, '-m', 'scrapy.command.cmdline') + new_args
|
||||
proc = Popen(args, stdout=PIPE, stderr=PIPE, env=self.env, **kwargs)
|
||||
comm = proc.communicate()
|
||||
return comm[0].strip()
|
||||
|
||||
def test_default_settings(self):
|
||||
self.assertEqual(self._execute('settings', '--get', 'TEST1', '--init'), \
|
||||
'default')
|
||||
self.assertEqual(self._execute('settings', '--get', 'TEST1'), \
|
||||
'default + loaded + started')
|
||||
|
||||
def test_override_settings_using_settings_arg(self):
|
||||
self.assertEqual(self._execute('settings', '--get', 'TEST1', '--init', \
|
||||
'--settings', 'scrapy.tests.test_cmdline.settings2'), \
|
||||
'override')
|
||||
self.assertEqual(self._execute('settings', '--get', 'TEST1', \
|
||||
'--settings', 'scrapy.tests.test_cmdline.settings2'), \
|
||||
'override + loaded + started')
|
||||
|
||||
def test_override_settings_using_set_arg(self):
|
||||
self.assertEqual(self._execute('settings', '--get', 'TEST1', '--init', '--set', 'TEST1=override'), \
|
||||
'override')
|
||||
self.assertEqual(self._execute('settings', '--get', 'TEST1', '--set', 'TEST1=override'), \
|
||||
'override + loaded + started')
|
||||
|
||||
def test_override_settings_using_envvar(self):
|
||||
self.env['SCRAPY_TEST1'] = 'override'
|
||||
self.assertEqual(self._execute('settings', '--get', 'TEST1', '--init'), \
|
||||
'override')
|
||||
self.assertEqual(self._execute('settings', '--get', 'TEST1'), \
|
||||
'override + loaded + started')
|
||||
|
10
scrapy/tests/test_cmdline/extensions.py
Normal file
10
scrapy/tests/test_cmdline/extensions.py
Normal file
@ -0,0 +1,10 @@
|
||||
"""A test extension used to check the settings loading order"""
|
||||
|
||||
from scrapy.conf import settings
|
||||
|
||||
settings.overrides['TEST1'] = "%s + %s" % (settings['TEST1'], 'loaded')
|
||||
|
||||
class TestExtension(object):
|
||||
|
||||
def __init__(self):
|
||||
settings.overrides['TEST1'] = "%s + %s" % (settings['TEST1'], 'started')
|
5
scrapy/tests/test_cmdline/settings.py
Normal file
5
scrapy/tests/test_cmdline/settings.py
Normal file
@ -0,0 +1,5 @@
|
||||
EXTENSIONS = [
|
||||
'scrapy.tests.test_cmdline.extensions.TestExtension'
|
||||
]
|
||||
|
||||
TEST1 = 'default'
|
5
scrapy/tests/test_cmdline/settings2.py
Normal file
5
scrapy/tests/test_cmdline/settings2.py
Normal file
@ -0,0 +1,5 @@
|
||||
EXTENSIONS = [
|
||||
'scrapy.tests.test_cmdline.extensions.TestExtension'
|
||||
]
|
||||
|
||||
TEST1 = 'override'
|
Loading…
x
Reference in New Issue
Block a user