1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-23 09:43:50 +00:00
scrapy/tests/test_crawler.py
Konstantin Lopuhin ef04cfd237 Respect log settings in custom_settings: fixes GH-1612
A new root logger is installed when a crawler is created
if one was already installed before.
This allows to respect custom settings related to logging,
such as LOG_LEVEL, LOG_FILE, etc.
2017-03-03 18:01:11 +03:00

184 lines
6.8 KiB
Python

import logging
import os
import tempfile
import warnings
import unittest
import scrapy
from scrapy.crawler import Crawler, CrawlerRunner, CrawlerProcess
from scrapy.settings import Settings, default_settings
from scrapy.spiderloader import SpiderLoader
from scrapy.utils.log import configure_logging, get_scrapy_root_handler
from scrapy.utils.spider import DefaultSpider
from scrapy.utils.misc import load_object
from scrapy.extensions.throttle import AutoThrottle
class BaseCrawlerTest(unittest.TestCase):
def assertOptionIsDefault(self, settings, key):
self.assertIsInstance(settings, Settings)
self.assertEqual(settings[key], getattr(default_settings, key))
class CrawlerTestCase(BaseCrawlerTest):
def setUp(self):
self.crawler = Crawler(DefaultSpider, Settings())
def test_deprecated_attribute_spiders(self):
with warnings.catch_warnings(record=True) as w:
spiders = self.crawler.spiders
self.assertEqual(len(w), 1)
self.assertIn("Crawler.spiders", str(w[0].message))
sl_cls = load_object(self.crawler.settings['SPIDER_LOADER_CLASS'])
self.assertIsInstance(spiders, sl_cls)
self.crawler.spiders
self.assertEqual(len(w), 1, "Warn deprecated access only once")
def test_populate_spidercls_settings(self):
spider_settings = {'TEST1': 'spider', 'TEST2': 'spider'}
project_settings = {'TEST1': 'project', 'TEST3': 'project'}
class CustomSettingsSpider(DefaultSpider):
custom_settings = spider_settings
settings = Settings()
settings.setdict(project_settings, priority='project')
crawler = Crawler(CustomSettingsSpider, settings)
self.assertEqual(crawler.settings.get('TEST1'), 'spider')
self.assertEqual(crawler.settings.get('TEST2'), 'spider')
self.assertEqual(crawler.settings.get('TEST3'), 'project')
self.assertFalse(settings.frozen)
self.assertTrue(crawler.settings.frozen)
def test_crawler_accepts_dict(self):
crawler = Crawler(DefaultSpider, {'foo': 'bar'})
self.assertEqual(crawler.settings['foo'], 'bar')
self.assertOptionIsDefault(crawler.settings, 'RETRY_ENABLED')
def test_crawler_accepts_None(self):
crawler = Crawler(DefaultSpider)
self.assertOptionIsDefault(crawler.settings, 'RETRY_ENABLED')
class SpiderSettingsTestCase(unittest.TestCase):
def test_spider_custom_settings(self):
class MySpider(scrapy.Spider):
name = 'spider'
custom_settings = {
'AUTOTHROTTLE_ENABLED': True
}
crawler = Crawler(MySpider, {})
enabled_exts = [e.__class__ for e in crawler.extensions.middlewares]
self.assertIn(AutoThrottle, enabled_exts)
class CrawlerLoggingTestCase(unittest.TestCase):
def test_no_root_handler_installed(self):
handler = get_scrapy_root_handler()
if handler is not None:
logging.root.removeHandler(handler)
class MySpider(scrapy.Spider):
name = 'spider'
crawler = Crawler(MySpider, {})
assert get_scrapy_root_handler() is None
def test_spider_custom_settings_log_level(self):
with tempfile.NamedTemporaryFile() as log_file:
class MySpider(scrapy.Spider):
name = 'spider'
custom_settings = {
'LOG_LEVEL': 'INFO',
'LOG_FILE': log_file.name,
}
configure_logging()
self.assertEqual(get_scrapy_root_handler().level, logging.DEBUG)
crawler = Crawler(MySpider, {})
self.assertEqual(get_scrapy_root_handler().level, logging.INFO)
info_count = crawler.stats.get_value('log_count/INFO')
logging.debug('debug message')
logging.info('info message')
logging.warning('warning message')
logging.error('error message')
logged = log_file.read().decode('utf8')
self.assertNotIn('debug message', logged)
self.assertIn('info message', logged)
self.assertIn('warning message', logged)
self.assertIn('error message', logged)
self.assertEqual(crawler.stats.get_value('log_count/ERROR'), 1)
self.assertEqual(crawler.stats.get_value('log_count/WARNING'), 1)
self.assertEqual(
crawler.stats.get_value('log_count/INFO') - info_count, 1)
self.assertEqual(crawler.stats.get_value('log_count/DEBUG', 0), 0)
class SpiderLoaderWithWrongInterface(object):
def unneeded_method(self):
pass
class CustomSpiderLoader(SpiderLoader):
pass
class CrawlerRunnerTestCase(BaseCrawlerTest):
def test_spider_manager_verify_interface(self):
settings = Settings({
'SPIDER_LOADER_CLASS': 'tests.test_crawler.SpiderLoaderWithWrongInterface'
})
with warnings.catch_warnings(record=True) as w, \
self.assertRaises(AttributeError):
CrawlerRunner(settings)
self.assertEqual(len(w), 1)
self.assertIn("SPIDER_LOADER_CLASS", str(w[0].message))
self.assertIn("scrapy.interfaces.ISpiderLoader", str(w[0].message))
def test_crawler_runner_accepts_dict(self):
runner = CrawlerRunner({'foo': 'bar'})
self.assertEqual(runner.settings['foo'], 'bar')
self.assertOptionIsDefault(runner.settings, 'RETRY_ENABLED')
def test_crawler_runner_accepts_None(self):
runner = CrawlerRunner()
self.assertOptionIsDefault(runner.settings, 'RETRY_ENABLED')
def test_deprecated_attribute_spiders(self):
with warnings.catch_warnings(record=True) as w:
runner = CrawlerRunner(Settings())
spiders = runner.spiders
self.assertEqual(len(w), 1)
self.assertIn("CrawlerRunner.spiders", str(w[0].message))
self.assertIn("CrawlerRunner.spider_loader", str(w[0].message))
sl_cls = load_object(runner.settings['SPIDER_LOADER_CLASS'])
self.assertIsInstance(spiders, sl_cls)
def test_spidermanager_deprecation(self):
with warnings.catch_warnings(record=True) as w:
runner = CrawlerRunner({
'SPIDER_MANAGER_CLASS': 'tests.test_crawler.CustomSpiderLoader'
})
self.assertIsInstance(runner.spider_loader, CustomSpiderLoader)
self.assertEqual(len(w), 1)
self.assertIn('Please use SPIDER_LOADER_CLASS', str(w[0].message))
class CrawlerProcessTest(BaseCrawlerTest):
def test_crawler_process_accepts_dict(self):
runner = CrawlerProcess({'foo': 'bar'})
self.assertEqual(runner.settings['foo'], 'bar')
self.assertOptionIsDefault(runner.settings, 'RETRY_ENABLED')
def test_crawler_process_accepts_None(self):
runner = CrawlerProcess()
self.assertOptionIsDefault(runner.settings, 'RETRY_ENABLED')