1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-27 17:43:46 +00:00

Renamed exception: DontCloseDomain to DontCloseSpider (closes #120)

This commit is contained in:
Pablo Hoffman 2009-11-21 15:06:03 -02:00
parent 0d75a3a636
commit a49aef2beb
3 changed files with 10 additions and 9 deletions

View File

@ -10,7 +10,7 @@ from collections import defaultdict
from scrapy.core import signals from scrapy.core import signals
from scrapy.core.engine import scrapyengine from scrapy.core.engine import scrapyengine
from scrapy.core.exceptions import NotConfigured, DontCloseDomain from scrapy.core.exceptions import NotConfigured, DontCloseSpider
from scrapy.conf import settings from scrapy.conf import settings
@ -33,7 +33,7 @@ class DelayedCloseDomain(object):
lastseen = self.opened_at[spider] lastseen = self.opened_at[spider]
if time() < lastseen + self.delay: if time() < lastseen + self.delay:
raise DontCloseDomain raise DontCloseSpider
def spider_closed(self, spider): def spider_closed(self, spider):
self.opened_at.pop(spider, None) self.opened_at.pop(spider, None)

View File

@ -16,7 +16,7 @@ from scrapy.conf import settings
from scrapy.core import signals from scrapy.core import signals
from scrapy.core.downloader import Downloader from scrapy.core.downloader import Downloader
from scrapy.core.scraper import Scraper from scrapy.core.scraper import Scraper
from scrapy.core.exceptions import IgnoreRequest, DontCloseDomain from scrapy.core.exceptions import IgnoreRequest, DontCloseSpider
from scrapy.http import Response, Request from scrapy.http import Response, Request
from scrapy.spider import spiders from scrapy.spider import spiders
from scrapy.utils.misc import load_object from scrapy.utils.misc import load_object
@ -251,7 +251,7 @@ class ExecutionEngine(object):
def _spider_idle(self, spider): def _spider_idle(self, spider):
"""Called when a spider gets idle. This function is called when there """Called when a spider gets idle. This function is called when there
are no remaining pages to download or schedule. It can be called are no remaining pages to download or schedule. It can be called
multiple times. If some extension raises a DontCloseDomain exception multiple times. If some extension raises a DontCloseSpider exception
(in the spider_idle signal handler) the spider is not closed until the (in the spider_idle signal handler) the spider is not closed until the
next loop and this function is guaranteed to be called (at least) once next loop and this function is guaranteed to be called (at least) once
again for this spider. again for this spider.
@ -259,11 +259,12 @@ class ExecutionEngine(object):
try: try:
dispatcher.send(signal=signals.spider_idle, sender=self.__class__, \ dispatcher.send(signal=signals.spider_idle, sender=self.__class__, \
spider=spider) spider=spider)
except DontCloseDomain: except DontCloseSpider:
reactor.callLater(5, self.next_request, spider) reactor.callLater(5, self.next_request, spider)
return return
except: except Exception, e:
log.err("Exception catched on spider_idle signal dispatch") log.msg("Exception caught on 'spider_idle' signal dispatch: %r" % e, \
level=log.ERROR)
if self.spider_is_idle(spider): if self.spider_is_idle(spider):
self.close_spider(spider, reason='finished') self.close_spider(spider, reason='finished')

View File

@ -25,8 +25,8 @@ class IgnoreRequest(Exception):
def __str__(self): def __str__(self):
return self.msg return self.msg
class DontCloseDomain(Exception): class DontCloseSpider(Exception):
"""Request the domain not to be closed yet""" """Request the spider not to be closed yet"""
pass pass
# Items # Items