Fixed bug in Scrapy shell which hanged if requests failed to download (#205), added dont_filter=True to requests generated when calling the shell with a url argument, and changed formatting of messages

2025-02-26 18:23:57 +00:00 · 2010-08-19 21:11:39 -03:00 · 2010-08-19 21:11:39 -03:00 · 6dd76ab54b
commit 6dd76ab54b
parent 30e2404d8f
2 changed files with 21 additions and 23 deletions
--- a/scrapy/commands/shell.py
+++ b/scrapy/commands/shell.py
@ -10,7 +10,6 @@ from scrapy.shell import Shell
 class Command(ScrapyCommand):
    requires_project = False
    default_settings = {'LOG_LEVEL': 'WARNING'}
    def syntax(self):
        return "[url|file]"
--- a/scrapy/shell.py
+++ b/scrapy/shell.py
@ -10,7 +10,9 @@ import urlparse
 import signal
 from twisted.internet import reactor, threads
 from twisted.python.failure import Failure
 from scrapy import log
 from scrapy.spider import BaseSpider, spiders
 from scrapy.selector import XmlXPathSelector, HtmlXPathSelector
 from scrapy.utils.misc import load_object
@ -50,21 +52,24 @@ class Shell(object):
            url = request.url
        else:
            url = parse_url(request_or_url)
-            request = Request(url)
+            request = Request(url, dont_filter=True)
        spider = spiders.create_for_request(request, BaseSpider('default'), \
            log_multiple=True)
        print "Fetching %s..." % request
        scrapymanager.engine.open_spider(spider)
-        response = threads.blockingCallFromThread(reactor, scrapymanager.engine.schedule, \
+        response = None
-            request, spider)
+        try:
-        if response:
+            response = threads.blockingCallFromThread(reactor, \
-            self.populate_vars(url, response, request, spider)
+                scrapymanager.engine.schedule, request, spider)
-            if print_help:
+        except:
-                self.print_help()
+            log.err(Failure(), "Error fetching response", spider=spider)
-            else:
+        self.populate_vars(url, response, request, spider)
-                print "Done - use shelp() to see available objects"
+        if print_help:
            self.print_help()
        else:
            print "Done - use shelp() to see available objects"
    def populate_vars(self, url=None, response=None, request=None, spider=None):
        item = self.item_class()
@ -86,20 +91,17 @@ class Shell(object):
            self.update_vars(self.vars)
    def print_help(self):
        print "Available objects"
        print "================="
        print
        print "Available objects:"
        for k, v in self.vars.iteritems():
            if relevant_var(k):
-                print "  %-10s: %s" % (k, v)
+                print "  %-10s %s" % (k, v)
        print
-        print "Available shortcuts"
+        print "Convenient shortcuts:"
-        print "==================="
+        print "  shelp()           Print this help"
        print
        print "  shelp()           : Prints this help."
        if not self.nofetch:
-            print "  fetch(req_or_url) : Fetch a new request or URL and update objects"
+            print "  fetch(req_or_url) Fetch a new request or URL and update shell objects"
-        print "  view(response)    : View response in a browser"
+        print "  view(response)    View response in a browser"
        print
    def start(self, url):
@ -112,10 +114,7 @@ class Shell(object):
    def inspect_response(self, response):
        print
-        print "Scrapy Shell"
+        print "Scrapy Shell - inspecting response: %s" % response
        print "============"
        print
        print "Inspecting: %s" % response
        print "Use shelp() to see available objects"
        print
        request = response.request