1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-27 04:03:49 +00:00

Fixed yet another bug in parse command that didnt extract links from all of the rules (just from the matching ones)

--HG--
extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40467
This commit is contained in:
elpolilla 2008-12-03 14:24:41 +00:00
parent 242bd38b3f
commit e030f38427

View File

@ -60,7 +60,7 @@ class Command(ScrapyCommand):
def run(self, args, opts):
if opts.matches:
url = args[0]
method = None
method = None
else:
if len(args) < 2:
print "A URL and method is required"
@ -68,8 +68,8 @@ class Command(ScrapyCommand):
else:
url, method = args[:2]
items = set()
links = set()
items = []
links = []
for response in fetch([url]):
spider = spiders.fromurl(response.url)
if not spider:
@ -78,19 +78,19 @@ class Command(ScrapyCommand):
if method:
ret_items, ret_links = self.run_method(spider, response, method, args, opts)
items = items.union(ret_items)
links = links.union(ret_links)
items.extend(ret_items)
links.extend(ret_links)
else:
if hasattr(spider, 'rules'):
for rule in spider.rules:
extracted_links = rule.link_extractor.extract_urls(response)
for link in extracted_links:
links.add(Request(url=link.url, link_text=link.text))
already_parsed = False
if rule.link_extractor.matches(response.url):
for rule in spider.rules:
links.extend(Request(url=link.url, link_text=link.text) for link in rule.link_extractor.extract_urls(response))
if not already_parsed and rule.link_extractor.matches(response.url):
already_parsed = True
ret_items, ret_links = self.run_method(spider, response, rule.callback, args, opts)
items = items.union(ret_items)
links = links.union(ret_links)
items.extend(ret_items)
links.extend(ret_links)
else:
log.msg('No rules found for spider "%s", please specify a parsing method' % spider.domain_name)
continue