1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-26 16:03:49 +00:00

Merge branch 'shell' of git://github.com/stav/scrapy into stav-shell

This commit is contained in:
Pablo Hoffman 2013-03-04 02:14:01 -02:00
commit 19d0942c74

View File

@ -3,7 +3,6 @@ Scrapy Shell
See documentation in docs/topics/shell.rst See documentation in docs/topics/shell.rst
""" """
import signal import signal
from twisted.internet import reactor, threads from twisted.internet import reactor, threads
@ -20,10 +19,12 @@ from scrapy.utils.response import open_in_browser
from scrapy.utils.console import start_python_console from scrapy.utils.console import start_python_console
from scrapy.settings import Settings from scrapy.settings import Settings
from scrapy.http import Request, Response, HtmlResponse, XmlResponse from scrapy.http import Request, Response, HtmlResponse, XmlResponse
from scrapy.exceptions import IgnoreRequest
class Shell(object): class Shell(object):
relevant_classes = (BaseSpider, Request, Response, BaseItem, \ relevant_classes = (BaseSpider, Request, Response, BaseItem,
XPathSelector, Settings) XPathSelector, Settings)
def __init__(self, crawler, update_vars=None, code=None): def __init__(self, crawler, update_vars=None, code=None):
@ -63,7 +64,7 @@ class Shell(object):
if self.spider: if self.spider:
return self.spider return self.spider
if spider is None: if spider is None:
spider = create_spider_for_request(self.crawler.spiders, request, \ spider = create_spider_for_request(self.crawler.spiders, request,
BaseSpider('default'), log_multiple=True) BaseSpider('default'), log_multiple=True)
spider.set_crawler(self.crawler) spider.set_crawler(self.crawler)
self.crawler.engine.open_spider(spider, close_if_idle=False) self.crawler.engine.open_spider(spider, close_if_idle=False)
@ -79,8 +80,11 @@ class Shell(object):
request = Request(url, dont_filter=True) request = Request(url, dont_filter=True)
request.meta['handle_httpstatus_all'] = True request.meta['handle_httpstatus_all'] = True
response = None response = None
response, spider = threads.blockingCallFromThread(reactor, \ try:
self._schedule, request, spider) response, spider = threads.blockingCallFromThread(
reactor, self._schedule, request, spider)
except IgnoreRequest:
pass
self.populate_vars(response, request, spider) self.populate_vars(response, request, spider)
def populate_vars(self, response=None, request=None, spider=None): def populate_vars(self, response=None, request=None, spider=None):