mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-26 19:24:09 +00:00
Merge pull request #418 from nramirezuy/engine-multispider
engine multispider support removed
This commit is contained in:
commit
1a2db089ff
@ -55,7 +55,8 @@ class ExecutionEngine(object):
|
||||
self.settings = crawler.settings
|
||||
self.signals = crawler.signals
|
||||
self.logformatter = crawler.logformatter
|
||||
self.slots = {}
|
||||
self.slot = None
|
||||
self.spider = None
|
||||
self.running = False
|
||||
self.paused = False
|
||||
self.scheduler_cls = load_object(self.settings['SCHEDULER'])
|
||||
@ -93,9 +94,8 @@ class ExecutionEngine(object):
|
||||
self.paused = False
|
||||
|
||||
def _next_request(self, spider):
|
||||
try:
|
||||
slot = self.slots[spider]
|
||||
except KeyError:
|
||||
slot = self.slot
|
||||
if not slot:
|
||||
return
|
||||
|
||||
if self.paused:
|
||||
@ -121,14 +121,14 @@ class ExecutionEngine(object):
|
||||
self._spider_idle(spider)
|
||||
|
||||
def _needs_backout(self, spider):
|
||||
slot = self.slots[spider]
|
||||
slot = self.slot
|
||||
return not self.running \
|
||||
or slot.closing \
|
||||
or self.downloader.needs_backout() \
|
||||
or self.scraper.slot.needs_backout()
|
||||
|
||||
def _next_request_from_scheduler(self, spider):
|
||||
slot = self.slots[spider]
|
||||
slot = self.slot
|
||||
request = slot.scheduler.next_request()
|
||||
if not request:
|
||||
return
|
||||
@ -154,32 +154,32 @@ class ExecutionEngine(object):
|
||||
|
||||
def spider_is_idle(self, spider):
|
||||
scraper_idle = self.scraper.slot.is_idle()
|
||||
pending = self.slots[spider].scheduler.has_pending_requests()
|
||||
pending = self.slot.scheduler.has_pending_requests()
|
||||
downloading = bool(self.downloader.active)
|
||||
idle = scraper_idle and not (pending or downloading)
|
||||
return idle
|
||||
|
||||
@property
|
||||
def open_spiders(self):
|
||||
return self.slots.keys()
|
||||
return [self.spider] if self.spider else []
|
||||
|
||||
def has_capacity(self):
|
||||
"""Does the engine have capacity to handle more spiders"""
|
||||
return len(self.slots) < self._concurrent_spiders
|
||||
return not bool(self.slot)
|
||||
|
||||
def crawl(self, request, spider):
|
||||
assert spider in self.open_spiders, \
|
||||
"Spider %r not opened when crawling: %s" % (spider.name, request)
|
||||
self.schedule(request, spider)
|
||||
self.slots[spider].nextcall.schedule()
|
||||
self.slot.nextcall.schedule()
|
||||
|
||||
def schedule(self, request, spider):
|
||||
self.signals.send_catch_log(signal=signals.request_scheduled,
|
||||
request=request, spider=spider)
|
||||
return self.slots[spider].scheduler.enqueue_request(request)
|
||||
return self.slot.scheduler.enqueue_request(request)
|
||||
|
||||
def download(self, request, spider):
|
||||
slot = self.slots[spider]
|
||||
slot = self.slot
|
||||
slot.add_request(request)
|
||||
d = self._download(request, spider)
|
||||
d.addBoth(self._downloaded, slot, request, spider)
|
||||
@ -191,7 +191,7 @@ class ExecutionEngine(object):
|
||||
if isinstance(response, Request) else response
|
||||
|
||||
def _download(self, request, spider):
|
||||
slot = self.slots[spider]
|
||||
slot = self.slot
|
||||
slot.add_request(request)
|
||||
def _on_success(response):
|
||||
assert isinstance(response, (Response, Request))
|
||||
@ -214,14 +214,15 @@ class ExecutionEngine(object):
|
||||
|
||||
@defer.inlineCallbacks
|
||||
def open_spider(self, spider, start_requests=(), close_if_idle=True):
|
||||
assert self.has_capacity(), "No free spider slots when opening %r" % \
|
||||
assert self.has_capacity(), "No free spider slot when opening %r" % \
|
||||
spider.name
|
||||
log.msg("Spider opened", spider=spider)
|
||||
nextcall = CallLaterOnce(self._next_request, spider)
|
||||
scheduler = self.scheduler_cls.from_crawler(self.crawler)
|
||||
start_requests = yield self.scraper.spidermw.process_start_requests(start_requests, spider)
|
||||
slot = Slot(start_requests, close_if_idle, nextcall, scheduler)
|
||||
self.slots[spider] = slot
|
||||
self.slot = slot
|
||||
self.spider = spider
|
||||
yield scheduler.open(spider)
|
||||
yield self.scraper.open_spider(spider)
|
||||
self.crawler.stats.open_spider(spider)
|
||||
@ -240,7 +241,7 @@ class ExecutionEngine(object):
|
||||
spider=spider, dont_log=DontCloseSpider)
|
||||
if any(isinstance(x, Failure) and isinstance(x.value, DontCloseSpider) \
|
||||
for _, x in res):
|
||||
self.slots[spider].nextcall.schedule(5)
|
||||
self.slot.nextcall.schedule(5)
|
||||
return
|
||||
|
||||
if self.spider_is_idle(spider):
|
||||
@ -249,7 +250,7 @@ class ExecutionEngine(object):
|
||||
def close_spider(self, spider, reason='cancelled'):
|
||||
"""Close (cancel) spider and clear all its outstanding requests"""
|
||||
|
||||
slot = self.slots[spider]
|
||||
slot = self.slot
|
||||
if slot.closing:
|
||||
return slot.closing
|
||||
log.msg(format="Closing spider (%(reason)s)", reason=reason, spider=spider)
|
||||
@ -276,7 +277,10 @@ class ExecutionEngine(object):
|
||||
|
||||
dfd.addBoth(lambda _: log.msg(format="Spider closed (%(reason)s)", reason=reason, spider=spider))
|
||||
|
||||
dfd.addBoth(lambda _: self.slots.pop(spider))
|
||||
dfd.addBoth(lambda _: setattr(self, 'slot', None))
|
||||
dfd.addErrback(log.err, spider=spider)
|
||||
|
||||
dfd.addBoth(lambda _: setattr(self, 'spider', None))
|
||||
dfd.addErrback(log.err, spider=spider)
|
||||
|
||||
dfd.addBoth(lambda _: self._spider_closed_callback(spider))
|
||||
|
@ -7,16 +7,15 @@ def get_engine_status(engine):
|
||||
global_tests = [
|
||||
"time()-engine.start_time",
|
||||
"engine.has_capacity()",
|
||||
"len(engine.downloader.slots)",
|
||||
"len(engine.downloader.active)",
|
||||
"engine.scraper.is_idle()",
|
||||
]
|
||||
spider_tests = [
|
||||
"engine.spider_is_idle(spider)",
|
||||
"engine.slots[spider].closing",
|
||||
"len(engine.slots[spider].inprogress)",
|
||||
"len(engine.slots[spider].scheduler.dqs or [])",
|
||||
"len(engine.slots[spider].scheduler.mqs)",
|
||||
"engine.slot.closing",
|
||||
"len(engine.slot.inprogress)",
|
||||
"len(engine.slot.scheduler.dqs or [])",
|
||||
"len(engine.slot.scheduler.mqs)",
|
||||
"len(engine.scraper.slot.queue)",
|
||||
"len(engine.scraper.slot.active)",
|
||||
"engine.scraper.slot.active_size",
|
||||
|
Loading…
x
Reference in New Issue
Block a user