mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-26 22:44:05 +00:00
Merge pull request #418 from nramirezuy/engine-multispider
engine multispider support removed
This commit is contained in:
commit
1a2db089ff
@ -55,7 +55,8 @@ class ExecutionEngine(object):
|
|||||||
self.settings = crawler.settings
|
self.settings = crawler.settings
|
||||||
self.signals = crawler.signals
|
self.signals = crawler.signals
|
||||||
self.logformatter = crawler.logformatter
|
self.logformatter = crawler.logformatter
|
||||||
self.slots = {}
|
self.slot = None
|
||||||
|
self.spider = None
|
||||||
self.running = False
|
self.running = False
|
||||||
self.paused = False
|
self.paused = False
|
||||||
self.scheduler_cls = load_object(self.settings['SCHEDULER'])
|
self.scheduler_cls = load_object(self.settings['SCHEDULER'])
|
||||||
@ -93,9 +94,8 @@ class ExecutionEngine(object):
|
|||||||
self.paused = False
|
self.paused = False
|
||||||
|
|
||||||
def _next_request(self, spider):
|
def _next_request(self, spider):
|
||||||
try:
|
slot = self.slot
|
||||||
slot = self.slots[spider]
|
if not slot:
|
||||||
except KeyError:
|
|
||||||
return
|
return
|
||||||
|
|
||||||
if self.paused:
|
if self.paused:
|
||||||
@ -121,14 +121,14 @@ class ExecutionEngine(object):
|
|||||||
self._spider_idle(spider)
|
self._spider_idle(spider)
|
||||||
|
|
||||||
def _needs_backout(self, spider):
|
def _needs_backout(self, spider):
|
||||||
slot = self.slots[spider]
|
slot = self.slot
|
||||||
return not self.running \
|
return not self.running \
|
||||||
or slot.closing \
|
or slot.closing \
|
||||||
or self.downloader.needs_backout() \
|
or self.downloader.needs_backout() \
|
||||||
or self.scraper.slot.needs_backout()
|
or self.scraper.slot.needs_backout()
|
||||||
|
|
||||||
def _next_request_from_scheduler(self, spider):
|
def _next_request_from_scheduler(self, spider):
|
||||||
slot = self.slots[spider]
|
slot = self.slot
|
||||||
request = slot.scheduler.next_request()
|
request = slot.scheduler.next_request()
|
||||||
if not request:
|
if not request:
|
||||||
return
|
return
|
||||||
@ -154,32 +154,32 @@ class ExecutionEngine(object):
|
|||||||
|
|
||||||
def spider_is_idle(self, spider):
|
def spider_is_idle(self, spider):
|
||||||
scraper_idle = self.scraper.slot.is_idle()
|
scraper_idle = self.scraper.slot.is_idle()
|
||||||
pending = self.slots[spider].scheduler.has_pending_requests()
|
pending = self.slot.scheduler.has_pending_requests()
|
||||||
downloading = bool(self.downloader.active)
|
downloading = bool(self.downloader.active)
|
||||||
idle = scraper_idle and not (pending or downloading)
|
idle = scraper_idle and not (pending or downloading)
|
||||||
return idle
|
return idle
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def open_spiders(self):
|
def open_spiders(self):
|
||||||
return self.slots.keys()
|
return [self.spider] if self.spider else []
|
||||||
|
|
||||||
def has_capacity(self):
|
def has_capacity(self):
|
||||||
"""Does the engine have capacity to handle more spiders"""
|
"""Does the engine have capacity to handle more spiders"""
|
||||||
return len(self.slots) < self._concurrent_spiders
|
return not bool(self.slot)
|
||||||
|
|
||||||
def crawl(self, request, spider):
|
def crawl(self, request, spider):
|
||||||
assert spider in self.open_spiders, \
|
assert spider in self.open_spiders, \
|
||||||
"Spider %r not opened when crawling: %s" % (spider.name, request)
|
"Spider %r not opened when crawling: %s" % (spider.name, request)
|
||||||
self.schedule(request, spider)
|
self.schedule(request, spider)
|
||||||
self.slots[spider].nextcall.schedule()
|
self.slot.nextcall.schedule()
|
||||||
|
|
||||||
def schedule(self, request, spider):
|
def schedule(self, request, spider):
|
||||||
self.signals.send_catch_log(signal=signals.request_scheduled,
|
self.signals.send_catch_log(signal=signals.request_scheduled,
|
||||||
request=request, spider=spider)
|
request=request, spider=spider)
|
||||||
return self.slots[spider].scheduler.enqueue_request(request)
|
return self.slot.scheduler.enqueue_request(request)
|
||||||
|
|
||||||
def download(self, request, spider):
|
def download(self, request, spider):
|
||||||
slot = self.slots[spider]
|
slot = self.slot
|
||||||
slot.add_request(request)
|
slot.add_request(request)
|
||||||
d = self._download(request, spider)
|
d = self._download(request, spider)
|
||||||
d.addBoth(self._downloaded, slot, request, spider)
|
d.addBoth(self._downloaded, slot, request, spider)
|
||||||
@ -191,7 +191,7 @@ class ExecutionEngine(object):
|
|||||||
if isinstance(response, Request) else response
|
if isinstance(response, Request) else response
|
||||||
|
|
||||||
def _download(self, request, spider):
|
def _download(self, request, spider):
|
||||||
slot = self.slots[spider]
|
slot = self.slot
|
||||||
slot.add_request(request)
|
slot.add_request(request)
|
||||||
def _on_success(response):
|
def _on_success(response):
|
||||||
assert isinstance(response, (Response, Request))
|
assert isinstance(response, (Response, Request))
|
||||||
@ -214,14 +214,15 @@ class ExecutionEngine(object):
|
|||||||
|
|
||||||
@defer.inlineCallbacks
|
@defer.inlineCallbacks
|
||||||
def open_spider(self, spider, start_requests=(), close_if_idle=True):
|
def open_spider(self, spider, start_requests=(), close_if_idle=True):
|
||||||
assert self.has_capacity(), "No free spider slots when opening %r" % \
|
assert self.has_capacity(), "No free spider slot when opening %r" % \
|
||||||
spider.name
|
spider.name
|
||||||
log.msg("Spider opened", spider=spider)
|
log.msg("Spider opened", spider=spider)
|
||||||
nextcall = CallLaterOnce(self._next_request, spider)
|
nextcall = CallLaterOnce(self._next_request, spider)
|
||||||
scheduler = self.scheduler_cls.from_crawler(self.crawler)
|
scheduler = self.scheduler_cls.from_crawler(self.crawler)
|
||||||
start_requests = yield self.scraper.spidermw.process_start_requests(start_requests, spider)
|
start_requests = yield self.scraper.spidermw.process_start_requests(start_requests, spider)
|
||||||
slot = Slot(start_requests, close_if_idle, nextcall, scheduler)
|
slot = Slot(start_requests, close_if_idle, nextcall, scheduler)
|
||||||
self.slots[spider] = slot
|
self.slot = slot
|
||||||
|
self.spider = spider
|
||||||
yield scheduler.open(spider)
|
yield scheduler.open(spider)
|
||||||
yield self.scraper.open_spider(spider)
|
yield self.scraper.open_spider(spider)
|
||||||
self.crawler.stats.open_spider(spider)
|
self.crawler.stats.open_spider(spider)
|
||||||
@ -240,7 +241,7 @@ class ExecutionEngine(object):
|
|||||||
spider=spider, dont_log=DontCloseSpider)
|
spider=spider, dont_log=DontCloseSpider)
|
||||||
if any(isinstance(x, Failure) and isinstance(x.value, DontCloseSpider) \
|
if any(isinstance(x, Failure) and isinstance(x.value, DontCloseSpider) \
|
||||||
for _, x in res):
|
for _, x in res):
|
||||||
self.slots[spider].nextcall.schedule(5)
|
self.slot.nextcall.schedule(5)
|
||||||
return
|
return
|
||||||
|
|
||||||
if self.spider_is_idle(spider):
|
if self.spider_is_idle(spider):
|
||||||
@ -249,7 +250,7 @@ class ExecutionEngine(object):
|
|||||||
def close_spider(self, spider, reason='cancelled'):
|
def close_spider(self, spider, reason='cancelled'):
|
||||||
"""Close (cancel) spider and clear all its outstanding requests"""
|
"""Close (cancel) spider and clear all its outstanding requests"""
|
||||||
|
|
||||||
slot = self.slots[spider]
|
slot = self.slot
|
||||||
if slot.closing:
|
if slot.closing:
|
||||||
return slot.closing
|
return slot.closing
|
||||||
log.msg(format="Closing spider (%(reason)s)", reason=reason, spider=spider)
|
log.msg(format="Closing spider (%(reason)s)", reason=reason, spider=spider)
|
||||||
@ -276,7 +277,10 @@ class ExecutionEngine(object):
|
|||||||
|
|
||||||
dfd.addBoth(lambda _: log.msg(format="Spider closed (%(reason)s)", reason=reason, spider=spider))
|
dfd.addBoth(lambda _: log.msg(format="Spider closed (%(reason)s)", reason=reason, spider=spider))
|
||||||
|
|
||||||
dfd.addBoth(lambda _: self.slots.pop(spider))
|
dfd.addBoth(lambda _: setattr(self, 'slot', None))
|
||||||
|
dfd.addErrback(log.err, spider=spider)
|
||||||
|
|
||||||
|
dfd.addBoth(lambda _: setattr(self, 'spider', None))
|
||||||
dfd.addErrback(log.err, spider=spider)
|
dfd.addErrback(log.err, spider=spider)
|
||||||
|
|
||||||
dfd.addBoth(lambda _: self._spider_closed_callback(spider))
|
dfd.addBoth(lambda _: self._spider_closed_callback(spider))
|
||||||
|
@ -7,16 +7,15 @@ def get_engine_status(engine):
|
|||||||
global_tests = [
|
global_tests = [
|
||||||
"time()-engine.start_time",
|
"time()-engine.start_time",
|
||||||
"engine.has_capacity()",
|
"engine.has_capacity()",
|
||||||
"len(engine.downloader.slots)",
|
|
||||||
"len(engine.downloader.active)",
|
"len(engine.downloader.active)",
|
||||||
"engine.scraper.is_idle()",
|
"engine.scraper.is_idle()",
|
||||||
]
|
]
|
||||||
spider_tests = [
|
spider_tests = [
|
||||||
"engine.spider_is_idle(spider)",
|
"engine.spider_is_idle(spider)",
|
||||||
"engine.slots[spider].closing",
|
"engine.slot.closing",
|
||||||
"len(engine.slots[spider].inprogress)",
|
"len(engine.slot.inprogress)",
|
||||||
"len(engine.slots[spider].scheduler.dqs or [])",
|
"len(engine.slot.scheduler.dqs or [])",
|
||||||
"len(engine.slots[spider].scheduler.mqs)",
|
"len(engine.slot.scheduler.mqs)",
|
||||||
"len(engine.scraper.slot.queue)",
|
"len(engine.scraper.slot.queue)",
|
||||||
"len(engine.scraper.slot.active)",
|
"len(engine.scraper.slot.active)",
|
||||||
"engine.scraper.slot.active_size",
|
"engine.scraper.slot.active_size",
|
||||||
|
Loading…
x
Reference in New Issue
Block a user