From 3c6deaeffcc2f485d287d2aa02a6c643c073df9f Mon Sep 17 00:00:00 2001 From: Pablo Hoffman Date: Mon, 31 Aug 2009 18:53:55 -0300 Subject: [PATCH] Some additional improvements to scrapy.command.cmdline logic: - calling scrapymanager.configure() for all commands - finally added some unittests to check cmdline behaviour! --- scrapy/command/cmdline.py | 42 +++++++++++------------ scrapy/command/models.py | 20 +++++------ scrapy/core/manager.py | 4 ++- scrapy/tests/test_cmdline/__init__.py | 45 +++++++++++++++++++++++++ scrapy/tests/test_cmdline/extensions.py | 10 ++++++ scrapy/tests/test_cmdline/settings.py | 5 +++ scrapy/tests/test_cmdline/settings2.py | 5 +++ 7 files changed, 99 insertions(+), 32 deletions(-) create mode 100644 scrapy/tests/test_cmdline/__init__.py create mode 100644 scrapy/tests/test_cmdline/extensions.py create mode 100644 scrapy/tests/test_cmdline/settings.py create mode 100644 scrapy/tests/test_cmdline/settings2.py diff --git a/scrapy/command/cmdline.py b/scrapy/command/cmdline.py index 6c4dbd69a..c75829371 100644 --- a/scrapy/command/cmdline.py +++ b/scrapy/command/cmdline.py @@ -15,24 +15,24 @@ from scrapy.command.models import ScrapyCommand # This dict holds information about the executed command for later use command_executed = {} -def save_command_executed(cmdname, cmd, args, opts): +def _save_command_executed(cmdname, cmd, args, opts): """Save command executed info for later reference""" command_executed['name'] = cmdname command_executed['class'] = cmd command_executed['args'] = args[:] command_executed['opts'] = opts.__dict__.copy() -def find_commands(dir): +def _find_commands(dir): try: return [f[:-3] for f in os.listdir(dir) if not f.startswith('_') and \ f.endswith('.py')] except OSError: return [] -def get_commands_from_module(module): +def _get_commands_from_module(module): d = {} mod = __import__(module, {}, {}, ['']) - for cmdname in find_commands(mod.__path__[0]): + for cmdname in _find_commands(mod.__path__[0]): modname = '%s.%s' % (module, cmdname) command = getattr(__import__(modname, {}, {}, [cmdname]), 'Command', None) if callable(command): @@ -41,19 +41,19 @@ def get_commands_from_module(module): print 'WARNING: Module %r does not define a Command class' % modname return d -def get_commands_dict(): - cmds = get_commands_from_module('scrapy.command.commands') +def _get_commands_dict(): + cmds = _get_commands_from_module('scrapy.command.commands') cmds_module = settings['COMMANDS_MODULE'] if cmds_module: - cmds.update(get_commands_from_module(cmds_module)) + cmds.update(_get_commands_from_module(cmds_module)) return cmds -def get_command_name(argv): +def _get_command_name(argv): for arg in argv[1:]: if not arg.startswith('-'): return arg -def print_usage(inside_project): +def _print_usage(inside_project): if inside_project: print "Scrapy %s - project: %s\n" % (scrapy.__version__, \ settings['BOT_NAME']) @@ -67,14 +67,14 @@ def print_usage(inside_project): print " scrapy-ctl.py -h\n" print "Available commands" print "==================\n" - cmds = get_commands_dict() + cmds = _get_commands_dict() for cmdname, cmdclass in sorted(cmds.iteritems()): if inside_project or not cmdclass.requires_project: print "%s %s" % (cmdname, cmdclass.syntax()) print " %s" % cmdclass.short_desc() print -def update_default_settings(module, cmdname): +def _update_default_settings(module, cmdname): if not module: return try: @@ -90,11 +90,11 @@ def execute(argv=None): if argv is None: argv = sys.argv - cmds = get_commands_dict() + cmds = _get_commands_dict() - cmdname = get_command_name(argv) - update_default_settings('scrapy.conf.commands', cmdname) - update_default_settings(settings['COMMANDS_SETTINGS_MODULE'], cmdname) + cmdname = _get_command_name(argv) + _update_default_settings('scrapy.conf.commands', cmdname) + _update_default_settings(settings['COMMANDS_SETTINGS_MODULE'], cmdname) parser = optparse.OptionParser(formatter=optparse.TitledHelpFormatter(), \ conflict_handler='resolve', add_help_option=False) @@ -119,7 +119,7 @@ def execute(argv=None): cmd.add_options(parser) opts, args = parser.parse_args(args=argv) cmd.process_options(args, opts) - print_usage(settings.settings_module) + _print_usage(settings.settings_module) sys.exit(2) else: print "Unknown command: %s\n" % cmdname @@ -127,14 +127,14 @@ def execute(argv=None): sys.exit(2) del args[0] # remove command name from args - save_command_executed(cmdname, cmd, args, opts) - spiders.load() - log.start() - ret = run_command(cmd, args, opts) + _save_command_executed(cmdname, cmd, args, opts) + from scrapy.core.manager import scrapymanager + scrapymanager.configure() + ret = _run_command(cmd, args, opts) if ret is False: parser.print_help() -def run_command(cmd, args, opts): +def _run_command(cmd, args, opts): if opts.profile or opts.lsprof: if opts.profile: log.msg("writing cProfile stats to %r" % opts.profile) diff --git a/scrapy/command/models.py b/scrapy/command/models.py index fbe7d68a0..99d43c06b 100644 --- a/scrapy/command/models.py +++ b/scrapy/command/models.py @@ -67,8 +67,7 @@ class ScrapyCommand(object): help="write lsprof profiling stats to FILE") group.add_option("--pidfile", dest="pidfile", metavar="FILE", \ help="write process ID to FILE") - group.add_option("--set", dest="set", action="append", \ - metavar="SETTING=VALUE", default=[], \ + group.add_option("--set", dest="set", action="append", default=[], \ help="set/override setting (may be repeated)") group.add_option("--settings", dest="settings", metavar="MODULE", help="python path to the Scrapy project settings") @@ -78,6 +77,15 @@ class ScrapyCommand(object): if opts.settings: settings.set_settings_module(opts.settings) + for setting in opts.set: + if '=' in setting: + name, val = setting.split('=', 1) + settings.overrides[name] = val + else: + sys.stderr.write("%s: invalid argument --set %s - proper format " \ + "is --set SETTING=VALUE'\n" % (sys.argv[0], setting)) + sys.exit(2) + if opts.version: print scrapy.__version__ sys.exit() @@ -105,14 +113,6 @@ class ScrapyCommand(object): with open(opts.pidfile, "w") as f: f.write(str(os.getpid())) - for setting in opts.set: - if '=' in setting: - name, val = setting.split('=', 1) - settings.overrides[name] = val - else: - sys.stderr.write("%s: invalid argument --set %s - proper format is --set SETTING=VALUE'\n" % (sys.argv[0], setting)) - sys.exit(2) - def run(self, args, opts): """ Entry point for running commands diff --git a/scrapy/core/manager.py b/scrapy/core/manager.py index c9bf23941..be7de69da 100644 --- a/scrapy/core/manager.py +++ b/scrapy/core/manager.py @@ -40,6 +40,7 @@ class ExecutionManager(object): level=log.DEBUG) scrapyengine.configure() + self.configured = True def crawl(self, *args): """Schedule the given args for crawling. args is a list of urls or domains""" @@ -53,7 +54,8 @@ class ExecutionManager(object): def runonce(self, *args): """Run the engine until it finishes scraping all domains and then exit""" - self.configure() + if not self.configured: + self.configure() self.crawl(*args) scrapyengine.start() diff --git a/scrapy/tests/test_cmdline/__init__.py b/scrapy/tests/test_cmdline/__init__.py new file mode 100644 index 000000000..40c51dc5e --- /dev/null +++ b/scrapy/tests/test_cmdline/__init__.py @@ -0,0 +1,45 @@ +import sys +import os +from subprocess import Popen, PIPE +import unittest + +class CmdlineTest(unittest.TestCase): + + def setUp(self): + self.env = os.environ.copy() + self.env.pop('SCRAPY_SETTINGS_DISABLED', None) + self.env['SCRAPY_SETTINGS_MODULE'] = 'scrapy.tests.test_cmdline.settings' + + def _execute(self, *new_args, **kwargs): + args = (sys.executable, '-m', 'scrapy.command.cmdline') + new_args + proc = Popen(args, stdout=PIPE, stderr=PIPE, env=self.env, **kwargs) + comm = proc.communicate() + return comm[0].strip() + + def test_default_settings(self): + self.assertEqual(self._execute('settings', '--get', 'TEST1', '--init'), \ + 'default') + self.assertEqual(self._execute('settings', '--get', 'TEST1'), \ + 'default + loaded + started') + + def test_override_settings_using_settings_arg(self): + self.assertEqual(self._execute('settings', '--get', 'TEST1', '--init', \ + '--settings', 'scrapy.tests.test_cmdline.settings2'), \ + 'override') + self.assertEqual(self._execute('settings', '--get', 'TEST1', \ + '--settings', 'scrapy.tests.test_cmdline.settings2'), \ + 'override + loaded + started') + + def test_override_settings_using_set_arg(self): + self.assertEqual(self._execute('settings', '--get', 'TEST1', '--init', '--set', 'TEST1=override'), \ + 'override') + self.assertEqual(self._execute('settings', '--get', 'TEST1', '--set', 'TEST1=override'), \ + 'override + loaded + started') + + def test_override_settings_using_envvar(self): + self.env['SCRAPY_TEST1'] = 'override' + self.assertEqual(self._execute('settings', '--get', 'TEST1', '--init'), \ + 'override') + self.assertEqual(self._execute('settings', '--get', 'TEST1'), \ + 'override + loaded + started') + diff --git a/scrapy/tests/test_cmdline/extensions.py b/scrapy/tests/test_cmdline/extensions.py new file mode 100644 index 000000000..25176c7e2 --- /dev/null +++ b/scrapy/tests/test_cmdline/extensions.py @@ -0,0 +1,10 @@ +"""A test extension used to check the settings loading order""" + +from scrapy.conf import settings + +settings.overrides['TEST1'] = "%s + %s" % (settings['TEST1'], 'loaded') + +class TestExtension(object): + + def __init__(self): + settings.overrides['TEST1'] = "%s + %s" % (settings['TEST1'], 'started') diff --git a/scrapy/tests/test_cmdline/settings.py b/scrapy/tests/test_cmdline/settings.py new file mode 100644 index 000000000..382beafd8 --- /dev/null +++ b/scrapy/tests/test_cmdline/settings.py @@ -0,0 +1,5 @@ +EXTENSIONS = [ + 'scrapy.tests.test_cmdline.extensions.TestExtension' +] + +TEST1 = 'default' diff --git a/scrapy/tests/test_cmdline/settings2.py b/scrapy/tests/test_cmdline/settings2.py new file mode 100644 index 000000000..4b5ba9e16 --- /dev/null +++ b/scrapy/tests/test_cmdline/settings2.py @@ -0,0 +1,5 @@ +EXTENSIONS = [ + 'scrapy.tests.test_cmdline.extensions.TestExtension' +] + +TEST1 = 'override'