1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-23 12:04:00 +00:00

added CloseSpider exception, to manually close spiders

This commit is contained in:
Pablo Hoffman 2011-07-12 14:24:10 -03:00
parent 4bb409923c
commit 4fde1ef94d
4 changed files with 33 additions and 1 deletions

View File

@ -240,3 +240,8 @@ How can I see the cookies being sent and received from Scrapy?
Enable the :setting:`COOKIES_DEBUG` setting.
How can I manually stop a running spider?
-----------------------------------------
Raise the :exc:`~scrapy.exceptions.CloseSpider` exception from a callback. For
more info see: :exc:`~scrapy.exceptions.CloseSpider`.

View File

@ -22,6 +22,23 @@ DropItem
The exception that must be raised by item pipeline stages to stop processing an
Item. For more information see :ref:`topics-item-pipeline`.
CloseSpider
-----------
.. exception:: CloseSpider(reason='cancelled')
This exception can be raised from a spider callback to request the spider to be
closed/stopped. Supported arguments:
:param reason: the reason for closing
:type reason: str
For example::
def parse_page(self, response):
if 'Bandwidth exceeded' in response.body:
raise CloseSpider('bandwidth_exceeded')
IgnoreRequest
-------------

View File

@ -10,7 +10,7 @@ from scrapy.utils.defer import defer_result, defer_succeed, parallel, iter_errba
from scrapy.utils.spider import iterate_spider_output
from scrapy.utils.misc import load_object
from scrapy.utils.signal import send_catch_log, send_catch_log_deferred
from scrapy.exceptions import IgnoreRequest, DropItem
from scrapy.exceptions import CloseSpider, IgnoreRequest, DropItem
from scrapy import signals
from scrapy.http import Request, Response
from scrapy.item import BaseItem
@ -143,6 +143,10 @@ class Scraper(object):
return dfd.addCallback(iterate_spider_output)
def handle_spider_error(self, _failure, request, response, spider, propagated_failure=None):
exc = _failure.value
if isinstance(exc, CloseSpider):
self.engine.close_spider(spider, exc.reason or 'cancelled')
return
referer = request.headers.get('Referer', None)
msg = "Spider error processing <%s> (referer: <%s>)" % \
(request.url, referer)

View File

@ -20,6 +20,12 @@ class DontCloseSpider(Exception):
"""Request the spider not to be closed yet"""
pass
class CloseSpider(Exception):
"""Raise this from callbacks to request the spider to be closed"""
def __init__(self, reason='cancelled'):
self.reason = reason
# Items
class DropItem(Exception):