1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-27 10:23:40 +00:00

. Reverted change in r473. Now an error message is shown when no rules match the provided url

. Modified print_results to make it show the callback from which items/links were extracted

--HG--
extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40476
This commit is contained in:
elpolilla 2008-12-08 11:15:38 +00:00
parent 2fe8afdade
commit b52f7726f1

View File

@ -46,18 +46,18 @@ class Command(ScrapyCommand):
return (), () return (), ()
def print_results(self, items, links, opts): def print_results(self, items, links, cb_name, opts):
display.nocolour = opts.nocolour display.nocolour = opts.nocolour
if not opts.noitems: if not opts.noitems:
for item in items: for item in items:
for key in item.__dict__.keys(): for key in item.__dict__.keys():
if key.startswith('_'): if key.startswith('_'):
item.__dict__.pop(key, None) item.__dict__.pop(key, None)
print "# Scraped Items", "-"*60 print "# Scraped Items - callback: %s" % cb_name, "-"*60
display.pprint(list(items)) display.pprint(list(items))
if not opts.nolinks: if not opts.nolinks:
print "# Links", "-"*68 print "# Links - callback: %s" % cb_name, "-"*68
display.pprint(list(links)) display.pprint(list(links))
def run(self, args, opts): def run(self, args, opts):
@ -65,7 +65,6 @@ class Command(ScrapyCommand):
print "An URL is required" print "An URL is required"
return return
ret_items, ret_links = [], []
for response in fetch(args): for response in fetch(args):
spider = spiders.fromurl(response.url) spider = spiders.fromurl(response.url)
if not spider: if not spider:
@ -75,27 +74,22 @@ class Command(ScrapyCommand):
if self.callbacks: if self.callbacks:
for callback in self.callbacks: for callback in self.callbacks:
items, links = self.run_callback(spider, response, callback, args, opts) items, links = self.run_callback(spider, response, callback, args, opts)
ret_items.extend(items) self.print_results(items, links, callback, opts)
ret_links.extend(links)
continue
elif opts.rules: elif opts.rules:
rules = getattr(spider, 'rules') rules = getattr(spider, 'rules', None)
if rules: if rules:
items, links = [], [] items, links = [], []
for rule in rules: for rule in rules:
if rule.callback and rule.link_extractor.matches(response.url): if rule.callback and rule.link_extractor.matches(response.url):
items, links = self.run_callback(spider, response, rule.callback, args, opts) items, links = self.run_callback(spider, response, rule.callback, args, opts)
self.print_results(items, links, rule.callback, opts)
break break
else: else:
log.msg('No rules found for spider "%s", calling default method "parse"' % spider.domain_name) log.msg('No rules found for spider "%s", please specify a callback for parsing' % spider.domain_name)
items, links = self.run_callback(spider, response, 'parse', args, opts) continue
else: else:
items, links = self.run_callback(spider, response, 'parse', args, opts) items, links = self.run_callback(spider, response, 'parse', args, opts)
self.print_results(items, links, 'parse', opts)
ret_items.extend(items)
ret_links.extend(links)
self.print_results(ret_items, ret_links, opts)