mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-27 10:23:40 +00:00
. Reverted change in r473. Now an error message is shown when no rules match the provided url
. Modified print_results to make it show the callback from which items/links were extracted --HG-- extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40476
This commit is contained in:
parent
2fe8afdade
commit
b52f7726f1
@ -46,18 +46,18 @@ class Command(ScrapyCommand):
|
|||||||
|
|
||||||
return (), ()
|
return (), ()
|
||||||
|
|
||||||
def print_results(self, items, links, opts):
|
def print_results(self, items, links, cb_name, opts):
|
||||||
display.nocolour = opts.nocolour
|
display.nocolour = opts.nocolour
|
||||||
if not opts.noitems:
|
if not opts.noitems:
|
||||||
for item in items:
|
for item in items:
|
||||||
for key in item.__dict__.keys():
|
for key in item.__dict__.keys():
|
||||||
if key.startswith('_'):
|
if key.startswith('_'):
|
||||||
item.__dict__.pop(key, None)
|
item.__dict__.pop(key, None)
|
||||||
print "# Scraped Items", "-"*60
|
print "# Scraped Items - callback: %s" % cb_name, "-"*60
|
||||||
display.pprint(list(items))
|
display.pprint(list(items))
|
||||||
|
|
||||||
if not opts.nolinks:
|
if not opts.nolinks:
|
||||||
print "# Links", "-"*68
|
print "# Links - callback: %s" % cb_name, "-"*68
|
||||||
display.pprint(list(links))
|
display.pprint(list(links))
|
||||||
|
|
||||||
def run(self, args, opts):
|
def run(self, args, opts):
|
||||||
@ -65,7 +65,6 @@ class Command(ScrapyCommand):
|
|||||||
print "An URL is required"
|
print "An URL is required"
|
||||||
return
|
return
|
||||||
|
|
||||||
ret_items, ret_links = [], []
|
|
||||||
for response in fetch(args):
|
for response in fetch(args):
|
||||||
spider = spiders.fromurl(response.url)
|
spider = spiders.fromurl(response.url)
|
||||||
if not spider:
|
if not spider:
|
||||||
@ -75,27 +74,22 @@ class Command(ScrapyCommand):
|
|||||||
if self.callbacks:
|
if self.callbacks:
|
||||||
for callback in self.callbacks:
|
for callback in self.callbacks:
|
||||||
items, links = self.run_callback(spider, response, callback, args, opts)
|
items, links = self.run_callback(spider, response, callback, args, opts)
|
||||||
ret_items.extend(items)
|
self.print_results(items, links, callback, opts)
|
||||||
ret_links.extend(links)
|
|
||||||
continue
|
|
||||||
|
|
||||||
elif opts.rules:
|
elif opts.rules:
|
||||||
rules = getattr(spider, 'rules')
|
rules = getattr(spider, 'rules', None)
|
||||||
if rules:
|
if rules:
|
||||||
items, links = [], []
|
items, links = [], []
|
||||||
for rule in rules:
|
for rule in rules:
|
||||||
if rule.callback and rule.link_extractor.matches(response.url):
|
if rule.callback and rule.link_extractor.matches(response.url):
|
||||||
items, links = self.run_callback(spider, response, rule.callback, args, opts)
|
items, links = self.run_callback(spider, response, rule.callback, args, opts)
|
||||||
|
self.print_results(items, links, rule.callback, opts)
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
log.msg('No rules found for spider "%s", calling default method "parse"' % spider.domain_name)
|
log.msg('No rules found for spider "%s", please specify a callback for parsing' % spider.domain_name)
|
||||||
items, links = self.run_callback(spider, response, 'parse', args, opts)
|
continue
|
||||||
|
|
||||||
else:
|
else:
|
||||||
items, links = self.run_callback(spider, response, 'parse', args, opts)
|
items, links = self.run_callback(spider, response, 'parse', args, opts)
|
||||||
|
self.print_results(items, links, 'parse', opts)
|
||||||
ret_items.extend(items)
|
|
||||||
ret_links.extend(links)
|
|
||||||
|
|
||||||
self.print_results(ret_items, ret_links, opts)
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user