mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-24 07:43:48 +00:00
Support multiple simultaneous LogObservers listening different crawlers
This commit is contained in:
parent
89df18bd2b
commit
900a487682
@ -10,7 +10,11 @@ logging`_ but this may change in the future.
|
||||
|
||||
.. _Twisted logging: http://twistedmatrix.com/projects/core/documentation/howto/logging.html
|
||||
|
||||
The logging service must be explicitly started through the :func:`scrapy.log.start` function.
|
||||
The logging service must be explicitly started through the
|
||||
:func:`scrapy.log.start` function to catch the top level Scrapy's log messages.
|
||||
On top of that, each crawler has its own independent log observer
|
||||
(automatically attached when it's created) that intercepts its spider's log
|
||||
messages.
|
||||
|
||||
.. _topics-logging-levels:
|
||||
|
||||
@ -55,8 +59,11 @@ scrapy.log module
|
||||
|
||||
.. function:: start(logfile=None, loglevel=None, logstdout=None)
|
||||
|
||||
Start the logging facility. This must be called before actually logging any
|
||||
messages. Otherwise, messages logged before this call will get lost.
|
||||
Start the top level Scrapy logger. This must be called before actually
|
||||
logging any top level messages (those logged using this module's
|
||||
:func:`~scrapy.log.msg` function instead of the :meth:`Spider.log
|
||||
<scrapy.spider.Spider.log>` method). Otherwise, messages logged before this
|
||||
call will get lost.
|
||||
|
||||
:param logfile: the file path to use for logging output. If omitted, the
|
||||
:setting:`LOG_FILE` setting will be used. If both are ``None``, the log
|
||||
|
@ -128,6 +128,7 @@ class CrawlerProcess(CrawlerRunner):
|
||||
signame = signal_names[signum]
|
||||
log.msg(format='Received %(signame)s twice, forcing unclean shutdown',
|
||||
level=log.INFO, signame=signame)
|
||||
self._stop_logging()
|
||||
reactor.callFromThread(self._stop_reactor)
|
||||
|
||||
def start(self, stop_after_crawl=True):
|
||||
@ -135,6 +136,7 @@ class CrawlerProcess(CrawlerRunner):
|
||||
self._start_reactor(stop_after_crawl)
|
||||
|
||||
def _start_logging(self):
|
||||
self.log_observer = log.start_from_settings(self.settings)
|
||||
log.scrapy_info(self.settings)
|
||||
|
||||
def _start_reactor(self, stop_after_crawl=True):
|
||||
@ -149,6 +151,9 @@ class CrawlerProcess(CrawlerRunner):
|
||||
reactor.addSystemEventTrigger('before', 'shutdown', self.stop)
|
||||
reactor.run(installSignalHandlers=False) # blocking call
|
||||
|
||||
def _stop_logging(self):
|
||||
self.log_observer.stop()
|
||||
|
||||
def _stop_reactor(self, _=None):
|
||||
try:
|
||||
reactor.stop()
|
||||
|
@ -35,15 +35,16 @@ class ScrapyFileLogObserver(log.FileLogObserver):
|
||||
def __init__(self, f, level=INFO, encoding='utf-8', crawler=None):
|
||||
self.level = level
|
||||
self.encoding = encoding
|
||||
self.crawler = crawler
|
||||
if crawler:
|
||||
self.crawler = crawler
|
||||
self.emit = self._emit_with_crawler
|
||||
else:
|
||||
self.emit = self._emit
|
||||
log.FileLogObserver.__init__(self, f)
|
||||
|
||||
def _emit(self, eventDict):
|
||||
ev = _adapt_eventdict(eventDict, self.level, self.encoding)
|
||||
ev = _adapt_eventdict(eventDict, self.level, self.encoding,
|
||||
self.crawler)
|
||||
if ev is not None:
|
||||
log.FileLogObserver.emit(self, ev)
|
||||
return ev
|
||||
@ -55,7 +56,8 @@ class ScrapyFileLogObserver(log.FileLogObserver):
|
||||
sname = 'log_count/%s' % level_names.get(level, level)
|
||||
self.crawler.stats.inc_value(sname)
|
||||
|
||||
def _adapt_eventdict(eventDict, log_level=INFO, encoding='utf-8', prepend_level=True):
|
||||
def _adapt_eventdict(eventDict, log_level=INFO, encoding='utf-8',
|
||||
crawler=None, prepend_level=True):
|
||||
"""Adapt Twisted log eventDict making it suitable for logging with a Scrapy
|
||||
log observer. It may return None to indicate that the event should be
|
||||
ignored by a Scrapy log observer.
|
||||
@ -78,6 +80,12 @@ def _adapt_eventdict(eventDict, log_level=INFO, encoding='utf-8', prepend_level=
|
||||
spider = ev.get('spider')
|
||||
if spider:
|
||||
ev['system'] = unicode_to_str(spider.name, encoding)
|
||||
if crawler and (not spider or spider.crawler is not crawler):
|
||||
# ignore events not triggered by own spiders in crawlers' observers
|
||||
return
|
||||
if not crawler and spider:
|
||||
# ignore spiders' events in observers without crawler
|
||||
return
|
||||
|
||||
lvlname = level_names.get(level, 'NOLEVEL')
|
||||
message = ev.get('message')
|
||||
@ -140,18 +148,14 @@ def start_from_settings(settings, crawler=None):
|
||||
settings['LOG_ENCODING'], crawler)
|
||||
|
||||
def scrapy_info(settings):
|
||||
log_observer = start_from_settings(settings)
|
||||
if log_observer:
|
||||
msg("Scrapy %s started (bot: %s)" % (scrapy.__version__,
|
||||
settings['BOT_NAME']))
|
||||
msg("Scrapy %s started (bot: %s)" % (scrapy.__version__,
|
||||
settings['BOT_NAME']))
|
||||
|
||||
msg("Optional features available: %s" % ", ".join(scrapy.optional_features),
|
||||
level=INFO)
|
||||
msg("Optional features available: %s" % ", ".join(scrapy.optional_features),
|
||||
level=INFO)
|
||||
|
||||
d = dict(overridden_settings(settings))
|
||||
msg(format="Overridden settings: %(settings)r", settings=d, level=INFO)
|
||||
|
||||
log_observer.stop()
|
||||
d = dict(overridden_settings(settings))
|
||||
msg(format="Overridden settings: %(settings)r", settings=d, level=INFO)
|
||||
|
||||
def start_from_crawler(crawler):
|
||||
return start_from_settings(crawler.settings, crawler)
|
||||
|
@ -6,6 +6,7 @@ from twisted.trial import unittest
|
||||
from scrapy import log
|
||||
from scrapy.spider import Spider
|
||||
from scrapy.settings import default_settings
|
||||
from scrapy.utils.test import get_crawler
|
||||
|
||||
class LogTest(unittest.TestCase):
|
||||
|
||||
@ -40,10 +41,10 @@ class ScrapyFileLogObserverTest(unittest.TestCase):
|
||||
log.msg("Hello")
|
||||
self.assertEqual(self.logged(), "[scrapy] INFO: Hello")
|
||||
|
||||
def test_msg_spider(self):
|
||||
def test_msg_ignore_spider(self):
|
||||
spider = Spider("myspider")
|
||||
log.msg("Hello", spider=spider)
|
||||
self.assertEqual(self.logged(), "[myspider] INFO: Hello")
|
||||
self.failIf(self.logged())
|
||||
|
||||
def test_msg_level1(self):
|
||||
log.msg("Hello", level=log.WARNING)
|
||||
@ -57,11 +58,6 @@ class ScrapyFileLogObserverTest(unittest.TestCase):
|
||||
log.msg("Hello", level=9999)
|
||||
self.assertEqual(self.logged(), "[scrapy] NOLEVEL: Hello")
|
||||
|
||||
def test_msg_level_spider(self):
|
||||
spider = Spider("myspider")
|
||||
log.msg("Hello", spider=spider, level=log.WARNING)
|
||||
self.assertEqual(self.logged(), "[myspider] WARNING: Hello")
|
||||
|
||||
def test_msg_encoding(self):
|
||||
log.msg(u"Price: \xa3100")
|
||||
self.assertEqual(self.logged(), "[scrapy] INFO: Price: \xc2\xa3100")
|
||||
@ -133,5 +129,41 @@ class Latin1ScrapyFileLogObserverTest(ScrapyFileLogObserverTest):
|
||||
# self.assertEqual(self.first_log_line(), "[scrapy] ERROR: \xa3")
|
||||
|
||||
|
||||
class CrawlerScrapyFileLogObserverTest(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.f = BytesIO()
|
||||
self.crawler = get_crawler(Spider)
|
||||
self.spider = self.crawler.spider = self.crawler._create_spider('test')
|
||||
self.log_observer = log.ScrapyFileLogObserver(self.f, log.INFO,
|
||||
'utf-8', self.crawler)
|
||||
self.log_observer.start()
|
||||
|
||||
def tearDown(self):
|
||||
self.flushLoggedErrors()
|
||||
self.log_observer.stop()
|
||||
|
||||
def logged(self):
|
||||
return self.f.getvalue().strip()[25:]
|
||||
|
||||
def test_msg_basic(self):
|
||||
log.msg("Hello", spider=self.spider)
|
||||
self.assertEqual(self.logged(), "[test] INFO: Hello")
|
||||
|
||||
def test_msg_ignore_scrapy_channel(self):
|
||||
log.msg("Hello")
|
||||
self.failIf(self.logged())
|
||||
|
||||
def test_msg_ignore_another_crawler(self):
|
||||
crawler = get_crawler(Spider)
|
||||
log.msg("Hello", spider=crawler._create_spider('test'))
|
||||
self.failIf(self.logged())
|
||||
|
||||
def test_msg_stats_log(self):
|
||||
assert self.crawler.stats.get_value('log_count/INFO', 0) == 0
|
||||
log.msg("Hello", spider=self.spider)
|
||||
self.assertEqual(self.crawler.stats.get_value('log_count/INFO'), 1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
Loading…
x
Reference in New Issue
Block a user