From 33d145e2f5a1f470398d96184b4da158e3ef7240 Mon Sep 17 00:00:00 2001 From: Mikhail Korobov Date: Sat, 6 Jun 2015 02:49:39 +0500 Subject: [PATCH 1/2] CrawlerProcess cleanup * remove unneeded lambda; * extract _get_dns_resolver method and format code to pep8. --- scrapy/crawler.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/scrapy/crawler.py b/scrapy/crawler.py index 204395c77..9ab233573 100644 --- a/scrapy/crawler.py +++ b/scrapy/crawler.py @@ -242,16 +242,25 @@ class CrawlerProcess(CrawlerRunner): # Don't start the reactor if the deferreds are already fired if d.called: return - d.addBoth(lambda _: self._stop_reactor()) + d.addBoth(self._stop_reactor) - cache_size = self.settings.getint('DNSCACHE_SIZE') if self.settings.getbool('DNSCACHE_ENABLED') else 0 - reactor.installResolver(CachingThreadedResolver(reactor, cache_size, - self.settings.getfloat('DNS_TIMEOUT'))) + reactor.installResolver(self._get_dns_resolver()) tp = reactor.getThreadPool() tp.adjustPoolsize(maxthreads=self.settings.getint('REACTOR_THREADPOOL_MAXSIZE')) reactor.addSystemEventTrigger('before', 'shutdown', self.stop) reactor.run(installSignalHandlers=False) # blocking call + def _get_dns_resolver(self): + if self.settings.getbool('DNSCACHE_ENABLED'): + cache_size = self.settings.getint('DNSCACHE_SIZE') + else: + cache_size = 0 + return CachingThreadedResolver( + reactor=reactor, + cache_size=cache_size, + timeout=self.settings.getfloat('DNS_TIMEOUT') + ) + def _stop_reactor(self, _=None): try: reactor.stop() From 64399d18d81bda9fdb1771627949d36f9f412981 Mon Sep 17 00:00:00 2001 From: Mikhail Korobov Date: Sat, 6 Jun 2015 02:53:36 +0500 Subject: [PATCH 2/2] Stop reactor on Ctrl-C regardless of 'stop_after_crawl'. Fixes GH-1279. --- scrapy/crawler.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/scrapy/crawler.py b/scrapy/crawler.py index 9ab233573..21b3c748f 100644 --- a/scrapy/crawler.py +++ b/scrapy/crawler.py @@ -216,7 +216,7 @@ class CrawlerProcess(CrawlerRunner): signame = signal_names[signum] logger.info("Received %(signame)s, shutting down gracefully. Send again to force ", {'signame': signame}) - reactor.callFromThread(self.stop) + reactor.callFromThread(self._graceful_stop_reactor) def _signal_kill(self, signum, _): install_shutdown_handlers(signal.SIG_IGN) @@ -261,6 +261,11 @@ class CrawlerProcess(CrawlerRunner): timeout=self.settings.getfloat('DNS_TIMEOUT') ) + def _graceful_stop_reactor(self): + d = self.stop() + d.addBoth(self._stop_reactor) + return d + def _stop_reactor(self, _=None): try: reactor.stop()