mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-23 21:44:19 +00:00
added CloseSpider exception, to manually close spiders
This commit is contained in:
parent
4bb409923c
commit
4fde1ef94d
@ -240,3 +240,8 @@ How can I see the cookies being sent and received from Scrapy?
|
|||||||
|
|
||||||
Enable the :setting:`COOKIES_DEBUG` setting.
|
Enable the :setting:`COOKIES_DEBUG` setting.
|
||||||
|
|
||||||
|
How can I manually stop a running spider?
|
||||||
|
-----------------------------------------
|
||||||
|
|
||||||
|
Raise the :exc:`~scrapy.exceptions.CloseSpider` exception from a callback. For
|
||||||
|
more info see: :exc:`~scrapy.exceptions.CloseSpider`.
|
||||||
|
@ -22,6 +22,23 @@ DropItem
|
|||||||
The exception that must be raised by item pipeline stages to stop processing an
|
The exception that must be raised by item pipeline stages to stop processing an
|
||||||
Item. For more information see :ref:`topics-item-pipeline`.
|
Item. For more information see :ref:`topics-item-pipeline`.
|
||||||
|
|
||||||
|
CloseSpider
|
||||||
|
-----------
|
||||||
|
|
||||||
|
.. exception:: CloseSpider(reason='cancelled')
|
||||||
|
|
||||||
|
This exception can be raised from a spider callback to request the spider to be
|
||||||
|
closed/stopped. Supported arguments:
|
||||||
|
|
||||||
|
:param reason: the reason for closing
|
||||||
|
:type reason: str
|
||||||
|
|
||||||
|
For example::
|
||||||
|
|
||||||
|
def parse_page(self, response):
|
||||||
|
if 'Bandwidth exceeded' in response.body:
|
||||||
|
raise CloseSpider('bandwidth_exceeded')
|
||||||
|
|
||||||
IgnoreRequest
|
IgnoreRequest
|
||||||
-------------
|
-------------
|
||||||
|
|
||||||
|
@ -10,7 +10,7 @@ from scrapy.utils.defer import defer_result, defer_succeed, parallel, iter_errba
|
|||||||
from scrapy.utils.spider import iterate_spider_output
|
from scrapy.utils.spider import iterate_spider_output
|
||||||
from scrapy.utils.misc import load_object
|
from scrapy.utils.misc import load_object
|
||||||
from scrapy.utils.signal import send_catch_log, send_catch_log_deferred
|
from scrapy.utils.signal import send_catch_log, send_catch_log_deferred
|
||||||
from scrapy.exceptions import IgnoreRequest, DropItem
|
from scrapy.exceptions import CloseSpider, IgnoreRequest, DropItem
|
||||||
from scrapy import signals
|
from scrapy import signals
|
||||||
from scrapy.http import Request, Response
|
from scrapy.http import Request, Response
|
||||||
from scrapy.item import BaseItem
|
from scrapy.item import BaseItem
|
||||||
@ -143,6 +143,10 @@ class Scraper(object):
|
|||||||
return dfd.addCallback(iterate_spider_output)
|
return dfd.addCallback(iterate_spider_output)
|
||||||
|
|
||||||
def handle_spider_error(self, _failure, request, response, spider, propagated_failure=None):
|
def handle_spider_error(self, _failure, request, response, spider, propagated_failure=None):
|
||||||
|
exc = _failure.value
|
||||||
|
if isinstance(exc, CloseSpider):
|
||||||
|
self.engine.close_spider(spider, exc.reason or 'cancelled')
|
||||||
|
return
|
||||||
referer = request.headers.get('Referer', None)
|
referer = request.headers.get('Referer', None)
|
||||||
msg = "Spider error processing <%s> (referer: <%s>)" % \
|
msg = "Spider error processing <%s> (referer: <%s>)" % \
|
||||||
(request.url, referer)
|
(request.url, referer)
|
||||||
|
@ -20,6 +20,12 @@ class DontCloseSpider(Exception):
|
|||||||
"""Request the spider not to be closed yet"""
|
"""Request the spider not to be closed yet"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
class CloseSpider(Exception):
|
||||||
|
"""Raise this from callbacks to request the spider to be closed"""
|
||||||
|
|
||||||
|
def __init__(self, reason='cancelled'):
|
||||||
|
self.reason = reason
|
||||||
|
|
||||||
# Items
|
# Items
|
||||||
|
|
||||||
class DropItem(Exception):
|
class DropItem(Exception):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user