From a84e5f806c7f909b2b497eea90c22f807baeff0a Mon Sep 17 00:00:00 2001 From: nramirezuy Date: Tue, 8 Oct 2013 16:01:27 -0200 Subject: [PATCH] engine multispider support removed --- scrapy/core/engine.py | 40 ++++++++++++++++++++++------------------ scrapy/utils/engine.py | 9 ++++----- 2 files changed, 26 insertions(+), 23 deletions(-) diff --git a/scrapy/core/engine.py b/scrapy/core/engine.py index aa66b1013..fbb655651 100644 --- a/scrapy/core/engine.py +++ b/scrapy/core/engine.py @@ -55,7 +55,8 @@ class ExecutionEngine(object): self.settings = crawler.settings self.signals = crawler.signals self.logformatter = crawler.logformatter - self.slots = {} + self.slot = None + self.spider = None self.running = False self.paused = False self.scheduler_cls = load_object(self.settings['SCHEDULER']) @@ -93,9 +94,8 @@ class ExecutionEngine(object): self.paused = False def _next_request(self, spider): - try: - slot = self.slots[spider] - except KeyError: + slot = self.slot + if not slot: return if self.paused: @@ -121,14 +121,14 @@ class ExecutionEngine(object): self._spider_idle(spider) def _needs_backout(self, spider): - slot = self.slots[spider] + slot = self.slot return not self.running \ or slot.closing \ or self.downloader.needs_backout() \ or self.scraper.slot.needs_backout() def _next_request_from_scheduler(self, spider): - slot = self.slots[spider] + slot = self.slot request = slot.scheduler.next_request() if not request: return @@ -154,32 +154,32 @@ class ExecutionEngine(object): def spider_is_idle(self, spider): scraper_idle = self.scraper.slot.is_idle() - pending = self.slots[spider].scheduler.has_pending_requests() + pending = self.slot.scheduler.has_pending_requests() downloading = bool(self.downloader.active) idle = scraper_idle and not (pending or downloading) return idle @property def open_spiders(self): - return self.slots.keys() + return [self.spider] if self.spider else [] def has_capacity(self): """Does the engine have capacity to handle more spiders""" - return len(self.slots) < self._concurrent_spiders + return not bool(self.slot) def crawl(self, request, spider): assert spider in self.open_spiders, \ "Spider %r not opened when crawling: %s" % (spider.name, request) self.schedule(request, spider) - self.slots[spider].nextcall.schedule() + self.slot.nextcall.schedule() def schedule(self, request, spider): self.signals.send_catch_log(signal=signals.request_scheduled, request=request, spider=spider) - return self.slots[spider].scheduler.enqueue_request(request) + return self.slot.scheduler.enqueue_request(request) def download(self, request, spider): - slot = self.slots[spider] + slot = self.slot slot.add_request(request) d = self._download(request, spider) d.addBoth(self._downloaded, slot, request, spider) @@ -191,7 +191,7 @@ class ExecutionEngine(object): if isinstance(response, Request) else response def _download(self, request, spider): - slot = self.slots[spider] + slot = self.slot slot.add_request(request) def _on_success(response): assert isinstance(response, (Response, Request)) @@ -214,14 +214,15 @@ class ExecutionEngine(object): @defer.inlineCallbacks def open_spider(self, spider, start_requests=(), close_if_idle=True): - assert self.has_capacity(), "No free spider slots when opening %r" % \ + assert self.has_capacity(), "No free spider slot when opening %r" % \ spider.name log.msg("Spider opened", spider=spider) nextcall = CallLaterOnce(self._next_request, spider) scheduler = self.scheduler_cls.from_crawler(self.crawler) start_requests = yield self.scraper.spidermw.process_start_requests(start_requests, spider) slot = Slot(start_requests, close_if_idle, nextcall, scheduler) - self.slots[spider] = slot + self.slot = slot + self.spider = spider yield scheduler.open(spider) yield self.scraper.open_spider(spider) self.crawler.stats.open_spider(spider) @@ -240,7 +241,7 @@ class ExecutionEngine(object): spider=spider, dont_log=DontCloseSpider) if any(isinstance(x, Failure) and isinstance(x.value, DontCloseSpider) \ for _, x in res): - self.slots[spider].nextcall.schedule(5) + self.slot.nextcall.schedule(5) return if self.spider_is_idle(spider): @@ -249,7 +250,7 @@ class ExecutionEngine(object): def close_spider(self, spider, reason='cancelled'): """Close (cancel) spider and clear all its outstanding requests""" - slot = self.slots[spider] + slot = self.slot if slot.closing: return slot.closing log.msg(format="Closing spider (%(reason)s)", reason=reason, spider=spider) @@ -276,7 +277,10 @@ class ExecutionEngine(object): dfd.addBoth(lambda _: log.msg(format="Spider closed (%(reason)s)", reason=reason, spider=spider)) - dfd.addBoth(lambda _: self.slots.pop(spider)) + dfd.addBoth(lambda _: setattr(self, 'slot', None)) + dfd.addErrback(log.err, spider=spider) + + dfd.addBoth(lambda _: setattr(self, 'spider', None)) dfd.addErrback(log.err, spider=spider) dfd.addBoth(lambda _: self._spider_closed_callback(spider)) diff --git a/scrapy/utils/engine.py b/scrapy/utils/engine.py index 7a5232ba6..a346447e5 100644 --- a/scrapy/utils/engine.py +++ b/scrapy/utils/engine.py @@ -7,16 +7,15 @@ def get_engine_status(engine): global_tests = [ "time()-engine.start_time", "engine.has_capacity()", - "len(engine.downloader.slots)", "len(engine.downloader.active)", "engine.scraper.is_idle()", ] spider_tests = [ "engine.spider_is_idle(spider)", - "engine.slots[spider].closing", - "len(engine.slots[spider].inprogress)", - "len(engine.slots[spider].scheduler.dqs or [])", - "len(engine.slots[spider].scheduler.mqs)", + "engine.slot.closing", + "len(engine.slot.inprogress)", + "len(engine.slot.scheduler.dqs or [])", + "len(engine.slot.scheduler.mqs)", "len(engine.scraper.slot.queue)", "len(engine.scraper.slot.active)", "engine.scraper.slot.active_size",