1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-23 21:44:19 +00:00

added CloseSpider exception, to manually close spiders

This commit is contained in:
Pablo Hoffman 2011-07-12 14:24:10 -03:00
parent 4bb409923c
commit 4fde1ef94d
4 changed files with 33 additions and 1 deletions

View File

@ -240,3 +240,8 @@ How can I see the cookies being sent and received from Scrapy?
Enable the :setting:`COOKIES_DEBUG` setting. Enable the :setting:`COOKIES_DEBUG` setting.
How can I manually stop a running spider?
-----------------------------------------
Raise the :exc:`~scrapy.exceptions.CloseSpider` exception from a callback. For
more info see: :exc:`~scrapy.exceptions.CloseSpider`.

View File

@ -22,6 +22,23 @@ DropItem
The exception that must be raised by item pipeline stages to stop processing an The exception that must be raised by item pipeline stages to stop processing an
Item. For more information see :ref:`topics-item-pipeline`. Item. For more information see :ref:`topics-item-pipeline`.
CloseSpider
-----------
.. exception:: CloseSpider(reason='cancelled')
This exception can be raised from a spider callback to request the spider to be
closed/stopped. Supported arguments:
:param reason: the reason for closing
:type reason: str
For example::
def parse_page(self, response):
if 'Bandwidth exceeded' in response.body:
raise CloseSpider('bandwidth_exceeded')
IgnoreRequest IgnoreRequest
------------- -------------

View File

@ -10,7 +10,7 @@ from scrapy.utils.defer import defer_result, defer_succeed, parallel, iter_errba
from scrapy.utils.spider import iterate_spider_output from scrapy.utils.spider import iterate_spider_output
from scrapy.utils.misc import load_object from scrapy.utils.misc import load_object
from scrapy.utils.signal import send_catch_log, send_catch_log_deferred from scrapy.utils.signal import send_catch_log, send_catch_log_deferred
from scrapy.exceptions import IgnoreRequest, DropItem from scrapy.exceptions import CloseSpider, IgnoreRequest, DropItem
from scrapy import signals from scrapy import signals
from scrapy.http import Request, Response from scrapy.http import Request, Response
from scrapy.item import BaseItem from scrapy.item import BaseItem
@ -143,6 +143,10 @@ class Scraper(object):
return dfd.addCallback(iterate_spider_output) return dfd.addCallback(iterate_spider_output)
def handle_spider_error(self, _failure, request, response, spider, propagated_failure=None): def handle_spider_error(self, _failure, request, response, spider, propagated_failure=None):
exc = _failure.value
if isinstance(exc, CloseSpider):
self.engine.close_spider(spider, exc.reason or 'cancelled')
return
referer = request.headers.get('Referer', None) referer = request.headers.get('Referer', None)
msg = "Spider error processing <%s> (referer: <%s>)" % \ msg = "Spider error processing <%s> (referer: <%s>)" % \
(request.url, referer) (request.url, referer)

View File

@ -20,6 +20,12 @@ class DontCloseSpider(Exception):
"""Request the spider not to be closed yet""" """Request the spider not to be closed yet"""
pass pass
class CloseSpider(Exception):
"""Raise this from callbacks to request the spider to be closed"""
def __init__(self, reason='cancelled'):
self.reason = reason
# Items # Items
class DropItem(Exception): class DropItem(Exception):