1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-27 01:43:43 +00:00
scrapy/scrapy/cmdline.py

136 lines
4.2 KiB
Python

import sys
import optparse
import cProfile
import inspect
import scrapy
from scrapy.crawler import CrawlerProcess
from scrapy.xlib import lsprofcalltree
from scrapy.conf import settings
from scrapy.command import ScrapyCommand
from scrapy.exceptions import UsageError
from scrapy.utils.misc import walk_modules
from scrapy.utils.project import inside_project
def _iter_command_classes(module_name):
# TODO: add `name` attribute to commands and and merge this function with
# scrapy.utils.spider.iter_spider_classes
for module in walk_modules(module_name):
for obj in vars(module).itervalues():
if inspect.isclass(obj) and \
issubclass(obj, ScrapyCommand) and \
obj.__module__ == module.__name__:
yield obj
def _get_commands_from_module(module, inproject):
d = {}
for cmd in _iter_command_classes(module):
if inproject or not cmd.requires_project:
cmdname = cmd.__module__.split('.')[-1]
d[cmdname] = cmd()
return d
def _get_commands_dict(inproject):
cmds = _get_commands_from_module('scrapy.commands', inproject)
cmds_module = settings['COMMANDS_MODULE']
if cmds_module:
cmds.update(_get_commands_from_module(cmds_module, inproject))
return cmds
def _pop_command_name(argv):
i = 0
for arg in argv[1:]:
if not arg.startswith('-'):
del argv[i]
return arg
i += 1
def _print_header(inproject):
if inproject:
print "Scrapy %s - project: %s\n" % (scrapy.__version__, \
settings['BOT_NAME'])
else:
print "Scrapy %s - no active project\n" % scrapy.__version__
def _print_commands(inproject):
_print_header(inproject)
print "Usage:"
print " scrapy <command> [options] [args]\n"
print "Available commands:"
cmds = _get_commands_dict(inproject)
for cmdname, cmdclass in sorted(cmds.iteritems()):
print " %-13s %s" % (cmdname, cmdclass.short_desc())
if not inproject:
print
print " [ more ] More commands available when run from project directory"
print
print 'Use "scrapy <command> -h" to see more info about a command'
def _print_unknown_command(cmdname, inproject):
_print_header(inproject)
print "Unknown command: %s\n" % cmdname
print 'Use "scrapy" to see available commands'
def _run_print_help(parser, func, *a, **kw):
try:
func(*a, **kw)
except UsageError, e:
if str(e):
parser.error(str(e))
if e.print_help:
parser.print_help()
sys.exit(2)
def execute(argv=None):
if argv is None:
argv = sys.argv
crawler = CrawlerProcess(settings)
crawler.install()
inproject = inside_project()
cmds = _get_commands_dict(inproject)
cmdname = _pop_command_name(argv)
parser = optparse.OptionParser(formatter=optparse.TitledHelpFormatter(), \
conflict_handler='resolve')
if not cmdname:
_print_commands(inproject)
sys.exit(0)
elif cmdname not in cmds:
_print_unknown_command(cmdname, inproject)
sys.exit(2)
cmd = cmds[cmdname]
parser.usage = "scrapy %s %s" % (cmdname, cmd.syntax())
parser.description = cmd.long_desc()
settings.defaults.update(cmd.default_settings)
cmd.settings = settings
cmd.add_options(parser)
opts, args = parser.parse_args(args=argv[1:])
_run_print_help(parser, cmd.process_options, args, opts)
cmd.set_crawler(crawler)
_run_print_help(parser, _run_command, cmd, args, opts)
sys.exit(cmd.exitcode)
def _run_command(cmd, args, opts):
if opts.profile or opts.lsprof:
_run_command_profiled(cmd, args, opts)
else:
cmd.run(args, opts)
def _run_command_profiled(cmd, args, opts):
if opts.profile:
sys.stderr.write("scrapy: writing cProfile stats to %r\n" % opts.profile)
if opts.lsprof:
sys.stderr.write("scrapy: writing lsprof stats to %r\n" % opts.lsprof)
loc = locals()
p = cProfile.Profile()
p.runctx('cmd.run(args, opts)', globals(), loc)
if opts.profile:
p.dump_stats(opts.profile)
k = lsprofcalltree.KCacheGrind(p)
if opts.lsprof:
with open(opts.lsprof, 'w') as f:
k.output(f)
if __name__ == '__main__':
execute()