1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-24 07:43:48 +00:00

Support multiple simultaneous LogObservers listening different crawlers

This commit is contained in:
Julia Medina 2014-08-05 21:01:57 -03:00
parent 89df18bd2b
commit 900a487682
4 changed files with 71 additions and 23 deletions

View File

@ -10,7 +10,11 @@ logging`_ but this may change in the future.
.. _Twisted logging: http://twistedmatrix.com/projects/core/documentation/howto/logging.html
The logging service must be explicitly started through the :func:`scrapy.log.start` function.
The logging service must be explicitly started through the
:func:`scrapy.log.start` function to catch the top level Scrapy's log messages.
On top of that, each crawler has its own independent log observer
(automatically attached when it's created) that intercepts its spider's log
messages.
.. _topics-logging-levels:
@ -55,8 +59,11 @@ scrapy.log module
.. function:: start(logfile=None, loglevel=None, logstdout=None)
Start the logging facility. This must be called before actually logging any
messages. Otherwise, messages logged before this call will get lost.
Start the top level Scrapy logger. This must be called before actually
logging any top level messages (those logged using this module's
:func:`~scrapy.log.msg` function instead of the :meth:`Spider.log
<scrapy.spider.Spider.log>` method). Otherwise, messages logged before this
call will get lost.
:param logfile: the file path to use for logging output. If omitted, the
:setting:`LOG_FILE` setting will be used. If both are ``None``, the log

View File

@ -128,6 +128,7 @@ class CrawlerProcess(CrawlerRunner):
signame = signal_names[signum]
log.msg(format='Received %(signame)s twice, forcing unclean shutdown',
level=log.INFO, signame=signame)
self._stop_logging()
reactor.callFromThread(self._stop_reactor)
def start(self, stop_after_crawl=True):
@ -135,6 +136,7 @@ class CrawlerProcess(CrawlerRunner):
self._start_reactor(stop_after_crawl)
def _start_logging(self):
self.log_observer = log.start_from_settings(self.settings)
log.scrapy_info(self.settings)
def _start_reactor(self, stop_after_crawl=True):
@ -149,6 +151,9 @@ class CrawlerProcess(CrawlerRunner):
reactor.addSystemEventTrigger('before', 'shutdown', self.stop)
reactor.run(installSignalHandlers=False) # blocking call
def _stop_logging(self):
self.log_observer.stop()
def _stop_reactor(self, _=None):
try:
reactor.stop()

View File

@ -35,15 +35,16 @@ class ScrapyFileLogObserver(log.FileLogObserver):
def __init__(self, f, level=INFO, encoding='utf-8', crawler=None):
self.level = level
self.encoding = encoding
self.crawler = crawler
if crawler:
self.crawler = crawler
self.emit = self._emit_with_crawler
else:
self.emit = self._emit
log.FileLogObserver.__init__(self, f)
def _emit(self, eventDict):
ev = _adapt_eventdict(eventDict, self.level, self.encoding)
ev = _adapt_eventdict(eventDict, self.level, self.encoding,
self.crawler)
if ev is not None:
log.FileLogObserver.emit(self, ev)
return ev
@ -55,7 +56,8 @@ class ScrapyFileLogObserver(log.FileLogObserver):
sname = 'log_count/%s' % level_names.get(level, level)
self.crawler.stats.inc_value(sname)
def _adapt_eventdict(eventDict, log_level=INFO, encoding='utf-8', prepend_level=True):
def _adapt_eventdict(eventDict, log_level=INFO, encoding='utf-8',
crawler=None, prepend_level=True):
"""Adapt Twisted log eventDict making it suitable for logging with a Scrapy
log observer. It may return None to indicate that the event should be
ignored by a Scrapy log observer.
@ -78,6 +80,12 @@ def _adapt_eventdict(eventDict, log_level=INFO, encoding='utf-8', prepend_level=
spider = ev.get('spider')
if spider:
ev['system'] = unicode_to_str(spider.name, encoding)
if crawler and (not spider or spider.crawler is not crawler):
# ignore events not triggered by own spiders in crawlers' observers
return
if not crawler and spider:
# ignore spiders' events in observers without crawler
return
lvlname = level_names.get(level, 'NOLEVEL')
message = ev.get('message')
@ -140,18 +148,14 @@ def start_from_settings(settings, crawler=None):
settings['LOG_ENCODING'], crawler)
def scrapy_info(settings):
log_observer = start_from_settings(settings)
if log_observer:
msg("Scrapy %s started (bot: %s)" % (scrapy.__version__,
settings['BOT_NAME']))
msg("Scrapy %s started (bot: %s)" % (scrapy.__version__,
settings['BOT_NAME']))
msg("Optional features available: %s" % ", ".join(scrapy.optional_features),
level=INFO)
msg("Optional features available: %s" % ", ".join(scrapy.optional_features),
level=INFO)
d = dict(overridden_settings(settings))
msg(format="Overridden settings: %(settings)r", settings=d, level=INFO)
log_observer.stop()
d = dict(overridden_settings(settings))
msg(format="Overridden settings: %(settings)r", settings=d, level=INFO)
def start_from_crawler(crawler):
return start_from_settings(crawler.settings, crawler)

View File

@ -6,6 +6,7 @@ from twisted.trial import unittest
from scrapy import log
from scrapy.spider import Spider
from scrapy.settings import default_settings
from scrapy.utils.test import get_crawler
class LogTest(unittest.TestCase):
@ -40,10 +41,10 @@ class ScrapyFileLogObserverTest(unittest.TestCase):
log.msg("Hello")
self.assertEqual(self.logged(), "[scrapy] INFO: Hello")
def test_msg_spider(self):
def test_msg_ignore_spider(self):
spider = Spider("myspider")
log.msg("Hello", spider=spider)
self.assertEqual(self.logged(), "[myspider] INFO: Hello")
self.failIf(self.logged())
def test_msg_level1(self):
log.msg("Hello", level=log.WARNING)
@ -57,11 +58,6 @@ class ScrapyFileLogObserverTest(unittest.TestCase):
log.msg("Hello", level=9999)
self.assertEqual(self.logged(), "[scrapy] NOLEVEL: Hello")
def test_msg_level_spider(self):
spider = Spider("myspider")
log.msg("Hello", spider=spider, level=log.WARNING)
self.assertEqual(self.logged(), "[myspider] WARNING: Hello")
def test_msg_encoding(self):
log.msg(u"Price: \xa3100")
self.assertEqual(self.logged(), "[scrapy] INFO: Price: \xc2\xa3100")
@ -133,5 +129,41 @@ class Latin1ScrapyFileLogObserverTest(ScrapyFileLogObserverTest):
# self.assertEqual(self.first_log_line(), "[scrapy] ERROR: \xa3")
class CrawlerScrapyFileLogObserverTest(unittest.TestCase):
def setUp(self):
self.f = BytesIO()
self.crawler = get_crawler(Spider)
self.spider = self.crawler.spider = self.crawler._create_spider('test')
self.log_observer = log.ScrapyFileLogObserver(self.f, log.INFO,
'utf-8', self.crawler)
self.log_observer.start()
def tearDown(self):
self.flushLoggedErrors()
self.log_observer.stop()
def logged(self):
return self.f.getvalue().strip()[25:]
def test_msg_basic(self):
log.msg("Hello", spider=self.spider)
self.assertEqual(self.logged(), "[test] INFO: Hello")
def test_msg_ignore_scrapy_channel(self):
log.msg("Hello")
self.failIf(self.logged())
def test_msg_ignore_another_crawler(self):
crawler = get_crawler(Spider)
log.msg("Hello", spider=crawler._create_spider('test'))
self.failIf(self.logged())
def test_msg_stats_log(self):
assert self.crawler.stats.get_value('log_count/INFO', 0) == 0
log.msg("Hello", spider=self.spider)
self.assertEqual(self.crawler.stats.get_value('log_count/INFO'), 1)
if __name__ == "__main__":
unittest.main()