1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-26 18:23:57 +00:00

Fixed bug in Scrapy shell which hanged if requests failed to download (#205), added dont_filter=True to requests generated when calling the shell with a url argument, and changed formatting of messages

This commit is contained in:
Pablo Hoffman 2010-08-19 21:11:39 -03:00
parent 30e2404d8f
commit 6dd76ab54b
2 changed files with 21 additions and 23 deletions

View File

@ -10,7 +10,6 @@ from scrapy.shell import Shell
class Command(ScrapyCommand): class Command(ScrapyCommand):
requires_project = False requires_project = False
default_settings = {'LOG_LEVEL': 'WARNING'}
def syntax(self): def syntax(self):
return "[url|file]" return "[url|file]"

View File

@ -10,7 +10,9 @@ import urlparse
import signal import signal
from twisted.internet import reactor, threads from twisted.internet import reactor, threads
from twisted.python.failure import Failure
from scrapy import log
from scrapy.spider import BaseSpider, spiders from scrapy.spider import BaseSpider, spiders
from scrapy.selector import XmlXPathSelector, HtmlXPathSelector from scrapy.selector import XmlXPathSelector, HtmlXPathSelector
from scrapy.utils.misc import load_object from scrapy.utils.misc import load_object
@ -50,21 +52,24 @@ class Shell(object):
url = request.url url = request.url
else: else:
url = parse_url(request_or_url) url = parse_url(request_or_url)
request = Request(url) request = Request(url, dont_filter=True)
spider = spiders.create_for_request(request, BaseSpider('default'), \ spider = spiders.create_for_request(request, BaseSpider('default'), \
log_multiple=True) log_multiple=True)
print "Fetching %s..." % request print "Fetching %s..." % request
scrapymanager.engine.open_spider(spider) scrapymanager.engine.open_spider(spider)
response = threads.blockingCallFromThread(reactor, scrapymanager.engine.schedule, \ response = None
request, spider) try:
if response: response = threads.blockingCallFromThread(reactor, \
self.populate_vars(url, response, request, spider) scrapymanager.engine.schedule, request, spider)
if print_help: except:
self.print_help() log.err(Failure(), "Error fetching response", spider=spider)
else: self.populate_vars(url, response, request, spider)
print "Done - use shelp() to see available objects" if print_help:
self.print_help()
else:
print "Done - use shelp() to see available objects"
def populate_vars(self, url=None, response=None, request=None, spider=None): def populate_vars(self, url=None, response=None, request=None, spider=None):
item = self.item_class() item = self.item_class()
@ -86,20 +91,17 @@ class Shell(object):
self.update_vars(self.vars) self.update_vars(self.vars)
def print_help(self): def print_help(self):
print "Available objects"
print "================="
print print
print "Available objects:"
for k, v in self.vars.iteritems(): for k, v in self.vars.iteritems():
if relevant_var(k): if relevant_var(k):
print " %-10s: %s" % (k, v) print " %-10s %s" % (k, v)
print print
print "Available shortcuts" print "Convenient shortcuts:"
print "===================" print " shelp() Print this help"
print
print " shelp() : Prints this help."
if not self.nofetch: if not self.nofetch:
print " fetch(req_or_url) : Fetch a new request or URL and update objects" print " fetch(req_or_url) Fetch a new request or URL and update shell objects"
print " view(response) : View response in a browser" print " view(response) View response in a browser"
print print
def start(self, url): def start(self, url):
@ -112,10 +114,7 @@ class Shell(object):
def inspect_response(self, response): def inspect_response(self, response):
print print
print "Scrapy Shell" print "Scrapy Shell - inspecting response: %s" % response
print "============"
print
print "Inspecting: %s" % response
print "Use shelp() to see available objects" print "Use shelp() to see available objects"
print print
request = response.request request = response.request