mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-24 07:43:48 +00:00
Spider Arguments support for parse command and test
This commit is contained in:
parent
21c8b89422
commit
df19693ed2
@ -3,6 +3,7 @@ from scrapy.command import ScrapyCommand
|
||||
from scrapy.http import Request
|
||||
from scrapy.item import BaseItem
|
||||
from scrapy.utils import display
|
||||
from scrapy.utils.conf import arglist_to_dict
|
||||
from scrapy.utils.spider import iterate_spider_output, create_spider_for_request
|
||||
from scrapy.exceptions import UsageError
|
||||
from scrapy import log
|
||||
@ -14,7 +15,7 @@ class Command(ScrapyCommand):
|
||||
spider = None
|
||||
items = {}
|
||||
requests = {}
|
||||
|
||||
|
||||
first_response = None
|
||||
|
||||
def syntax(self):
|
||||
@ -27,6 +28,8 @@ class Command(ScrapyCommand):
|
||||
ScrapyCommand.add_options(self, parser)
|
||||
parser.add_option("--spider", dest="spider", default=None, \
|
||||
help="use this spider without looking for one")
|
||||
parser.add_option("-a", dest="spargs", action="append", default=[], metavar="NAME=VALUE", \
|
||||
help="set spider argument (may be repeated)")
|
||||
parser.add_option("--nolinks", dest="nolinks", action="store_true", \
|
||||
help="don't show links to follow (extracted requests)")
|
||||
parser.add_option("--noitems", dest="noitems", action="store_true", \
|
||||
@ -56,7 +59,7 @@ class Command(ScrapyCommand):
|
||||
def add_requests(self, lvl, new_reqs):
|
||||
old_reqs = self.requests.get(lvl, [])
|
||||
self.requests[lvl] = old_reqs + new_reqs
|
||||
|
||||
|
||||
def print_items(self, lvl=None, colour=True):
|
||||
if lvl is None:
|
||||
items = [item for lst in self.items.values() for item in lst]
|
||||
@ -120,12 +123,12 @@ class Command(ScrapyCommand):
|
||||
def set_spider(self, url, opts):
|
||||
if opts.spider:
|
||||
try:
|
||||
self.spider = self.crawler.spiders.create(opts.spider)
|
||||
self.spider = self.crawler.spiders.create(opts.spider, **opts.spargs)
|
||||
except KeyError:
|
||||
log.msg(format='Unable to find spider: %(spider)s',
|
||||
level=log.ERROR, spider=opts.spider)
|
||||
else:
|
||||
self.spider = create_spider_for_request(self.crawler.spiders, Request(url))
|
||||
self.spider = create_spider_for_request(self.crawler.spiders, Request(url), **opts.spargs)
|
||||
if not self.spider:
|
||||
log.msg(format='Unable to find spider for: %(url)s',
|
||||
level=log.ERROR, url=url)
|
||||
@ -183,6 +186,13 @@ class Command(ScrapyCommand):
|
||||
request.callback = callback
|
||||
return request
|
||||
|
||||
def process_options(self, args, opts):
|
||||
ScrapyCommand.process_options(self, args, opts)
|
||||
try:
|
||||
opts.spargs = arglist_to_dict(opts.spargs)
|
||||
except ValueError:
|
||||
raise UsageError("Invalid -a value, use -a NAME=VALUE", print_help=False)
|
||||
|
||||
def run(self, args, opts):
|
||||
# parse arguments
|
||||
if not len(args) == 1 or not is_url(args[0]):
|
||||
|
@ -175,3 +175,26 @@ from scrapy.spider import BaseSpider
|
||||
log = p.stderr.read()
|
||||
self.assert_("Unable to load" in log)
|
||||
|
||||
|
||||
class ParseCommandTest(CommandTest):
|
||||
|
||||
def test_spider_arguments(self):
|
||||
spider_name = 'parse_spider'
|
||||
fname = abspath(join(self.proj_mod_path, 'spiders', 'myspider.py'))
|
||||
with open(fname, 'w') as f:
|
||||
f.write("""
|
||||
from scrapy import log
|
||||
from scrapy.spider import BaseSpider
|
||||
|
||||
class MySpider(BaseSpider):
|
||||
name = '{0}'
|
||||
|
||||
def parse(self, response):
|
||||
if self.test_arg:
|
||||
self.log('It Works!')
|
||||
return []
|
||||
""".format(spider_name))
|
||||
|
||||
p = self.proc('parse', '--spider', spider_name, '-a', 'test_arg=1', '-c', 'parse', 'http://scrapinghub.com')
|
||||
log = p.stderr.read()
|
||||
self.assert_("[parse_spider] DEBUG: It Works!" in log, log)
|
||||
|
Loading…
x
Reference in New Issue
Block a user