mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-23 12:04:00 +00:00
added CloseSpider exception, to manually close spiders
This commit is contained in:
parent
4bb409923c
commit
4fde1ef94d
@ -240,3 +240,8 @@ How can I see the cookies being sent and received from Scrapy?
|
||||
|
||||
Enable the :setting:`COOKIES_DEBUG` setting.
|
||||
|
||||
How can I manually stop a running spider?
|
||||
-----------------------------------------
|
||||
|
||||
Raise the :exc:`~scrapy.exceptions.CloseSpider` exception from a callback. For
|
||||
more info see: :exc:`~scrapy.exceptions.CloseSpider`.
|
||||
|
@ -22,6 +22,23 @@ DropItem
|
||||
The exception that must be raised by item pipeline stages to stop processing an
|
||||
Item. For more information see :ref:`topics-item-pipeline`.
|
||||
|
||||
CloseSpider
|
||||
-----------
|
||||
|
||||
.. exception:: CloseSpider(reason='cancelled')
|
||||
|
||||
This exception can be raised from a spider callback to request the spider to be
|
||||
closed/stopped. Supported arguments:
|
||||
|
||||
:param reason: the reason for closing
|
||||
:type reason: str
|
||||
|
||||
For example::
|
||||
|
||||
def parse_page(self, response):
|
||||
if 'Bandwidth exceeded' in response.body:
|
||||
raise CloseSpider('bandwidth_exceeded')
|
||||
|
||||
IgnoreRequest
|
||||
-------------
|
||||
|
||||
|
@ -10,7 +10,7 @@ from scrapy.utils.defer import defer_result, defer_succeed, parallel, iter_errba
|
||||
from scrapy.utils.spider import iterate_spider_output
|
||||
from scrapy.utils.misc import load_object
|
||||
from scrapy.utils.signal import send_catch_log, send_catch_log_deferred
|
||||
from scrapy.exceptions import IgnoreRequest, DropItem
|
||||
from scrapy.exceptions import CloseSpider, IgnoreRequest, DropItem
|
||||
from scrapy import signals
|
||||
from scrapy.http import Request, Response
|
||||
from scrapy.item import BaseItem
|
||||
@ -143,6 +143,10 @@ class Scraper(object):
|
||||
return dfd.addCallback(iterate_spider_output)
|
||||
|
||||
def handle_spider_error(self, _failure, request, response, spider, propagated_failure=None):
|
||||
exc = _failure.value
|
||||
if isinstance(exc, CloseSpider):
|
||||
self.engine.close_spider(spider, exc.reason or 'cancelled')
|
||||
return
|
||||
referer = request.headers.get('Referer', None)
|
||||
msg = "Spider error processing <%s> (referer: <%s>)" % \
|
||||
(request.url, referer)
|
||||
|
@ -20,6 +20,12 @@ class DontCloseSpider(Exception):
|
||||
"""Request the spider not to be closed yet"""
|
||||
pass
|
||||
|
||||
class CloseSpider(Exception):
|
||||
"""Raise this from callbacks to request the spider to be closed"""
|
||||
|
||||
def __init__(self, reason='cancelled'):
|
||||
self.reason = reason
|
||||
|
||||
# Items
|
||||
|
||||
class DropItem(Exception):
|
||||
|
Loading…
x
Reference in New Issue
Block a user