mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-26 06:23:41 +00:00
Merge branch 'Curita-per-spider-settings'
This commit is contained in:
commit
5daa14770b
@ -172,6 +172,7 @@ Settings API
|
||||
'default': 0,
|
||||
'command': 10,
|
||||
'project': 20,
|
||||
'spider': 30,
|
||||
'cmdline': 40,
|
||||
}
|
||||
|
||||
|
@ -36,9 +36,10 @@ different precedence. Here is the list of them in decreasing order of
|
||||
precedence:
|
||||
|
||||
1. Command line options (most precedence)
|
||||
2. Project settings module
|
||||
3. Default settings per-command
|
||||
4. Default global settings (less precedence)
|
||||
2. Settings per-spider
|
||||
3. Project settings module
|
||||
4. Default settings per-command
|
||||
5. Default global settings (less precedence)
|
||||
|
||||
The population of these settings sources is taken care of internally, but a
|
||||
manual handling is possible using API calls. See the
|
||||
@ -59,14 +60,21 @@ Example::
|
||||
|
||||
scrapy crawl myspider -s LOG_FILE=scrapy.log
|
||||
|
||||
2. Project settings module
|
||||
2. Settings per-spider
|
||||
----------------------
|
||||
|
||||
Spiders (See the :ref:`topics-spiders` chapter for reference) can define their
|
||||
own settings that will take precedence and override the project ones. They can
|
||||
do so by setting their :attr:`scrapy.spider.Spider.custom_settings` attribute.
|
||||
|
||||
3. Project settings module
|
||||
--------------------------
|
||||
|
||||
The project settings module is the standard configuration file for your Scrapy
|
||||
project. It's where most of your custom settings will be populated. For
|
||||
example:: ``myproject.settings``.
|
||||
|
||||
3. Default settings per-command
|
||||
4. Default settings per-command
|
||||
-------------------------------
|
||||
|
||||
Each :doc:`Scrapy tool </topics/commands>` command can have its own default
|
||||
@ -74,7 +82,7 @@ settings, which override the global default settings. Those custom command
|
||||
settings are specified in the ``default_settings`` attribute of the command
|
||||
class.
|
||||
|
||||
4. Default global settings
|
||||
5. Default global settings
|
||||
--------------------------
|
||||
|
||||
The global defaults are located in the ``scrapy.settings.default_settings``
|
||||
|
@ -133,6 +133,15 @@ Spider
|
||||
listed here. The subsequent URLs will be generated successively from data
|
||||
contained in the start URLs.
|
||||
|
||||
.. attribute:: custom_settings
|
||||
|
||||
A dictionary of settings that will be overridden from the project wide
|
||||
configuration when running this spider. It must be defined as a class
|
||||
attribute since the settings are updated before instantiation.
|
||||
|
||||
For a list of available built-in settings see:
|
||||
:ref:`topics-settings-ref`.
|
||||
|
||||
.. attribute:: crawler
|
||||
|
||||
This attribute is set by the :meth:`from_crawler` class method after
|
||||
|
@ -96,7 +96,12 @@ class CrawlerRunner(object):
|
||||
def _create_crawler(self, spidercls):
|
||||
if isinstance(spidercls, six.string_types):
|
||||
spidercls = self.spiders.load(spidercls)
|
||||
crawler = Crawler(spidercls, self.settings.frozencopy())
|
||||
|
||||
crawler_settings = self.settings.copy()
|
||||
spidercls.update_settings(crawler_settings)
|
||||
crawler_settings.freeze()
|
||||
|
||||
crawler = Crawler(spidercls, crawler_settings)
|
||||
return crawler
|
||||
|
||||
def stop(self):
|
||||
|
@ -15,6 +15,7 @@ SETTINGS_PRIORITIES = {
|
||||
'default': 0,
|
||||
'command': 10,
|
||||
'project': 20,
|
||||
'spider': 30,
|
||||
'cmdline': 40,
|
||||
}
|
||||
|
||||
|
@ -20,6 +20,7 @@ class Spider(object_ref):
|
||||
"""
|
||||
|
||||
name = None
|
||||
custom_settings = None
|
||||
|
||||
def __init__(self, name=None, **kwargs):
|
||||
if name is not None:
|
||||
@ -66,6 +67,10 @@ class Spider(object_ref):
|
||||
def parse(self, response):
|
||||
raise NotImplementedError
|
||||
|
||||
@classmethod
|
||||
def update_settings(cls, settings):
|
||||
settings.setdict(cls.custom_settings or {}, priority='spider')
|
||||
|
||||
@classmethod
|
||||
def handles_request(cls, request):
|
||||
return url_is_from_spider(request.url, cls)
|
||||
|
@ -1,7 +1,9 @@
|
||||
import warnings
|
||||
import unittest
|
||||
|
||||
from scrapy.crawler import Crawler
|
||||
from twisted.internet import defer
|
||||
|
||||
from scrapy.crawler import Crawler, CrawlerRunner
|
||||
from scrapy.settings import Settings
|
||||
from scrapy.utils.spider import DefaultSpider
|
||||
from scrapy.utils.misc import load_object
|
||||
@ -22,3 +24,26 @@ class CrawlerTestCase(unittest.TestCase):
|
||||
|
||||
self.crawler.spiders
|
||||
self.assertEqual(len(w), 1, "Warn deprecated access only once")
|
||||
|
||||
|
||||
class CrawlerRunnerTest(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.crawler_runner = CrawlerRunner(Settings())
|
||||
|
||||
@defer.inlineCallbacks
|
||||
def test_populate_spidercls_settings(self):
|
||||
spider_settings = {'TEST1': 'spider', 'TEST2': 'spider'}
|
||||
project_settings = {'TEST1': 'project', 'TEST3': 'project'}
|
||||
|
||||
class CustomSettingsSpider(DefaultSpider):
|
||||
custom_settings = spider_settings
|
||||
|
||||
self.crawler_runner.settings.setdict(project_settings,
|
||||
priority='project')
|
||||
|
||||
yield self.crawler_runner.crawl(CustomSettingsSpider)
|
||||
crawler = self.crawler_runner.crawlers.pop()
|
||||
self.assertEqual(crawler.settings.get('TEST1'), 'spider')
|
||||
self.assertEqual(crawler.settings.get('TEST2'), 'spider')
|
||||
self.assertEqual(crawler.settings.get('TEST3'), 'project')
|
||||
|
@ -10,6 +10,7 @@ except ImportError:
|
||||
|
||||
from scrapy import signals
|
||||
from scrapy.spider import Spider, BaseSpider
|
||||
from scrapy.settings import Settings
|
||||
from scrapy.http import Request, Response, TextResponse, XmlResponse, HtmlResponse
|
||||
from scrapy.contrib.spiders.init import InitSpider
|
||||
from scrapy.contrib.spiders import CrawlSpider, Rule, XMLFeedSpider, \
|
||||
@ -92,6 +93,16 @@ class SpiderTest(unittest.TestCase):
|
||||
spider=spider, reason=None)
|
||||
self.assertTrue(spider.closed_called)
|
||||
|
||||
def test_update_settings(self):
|
||||
spider_settings = {'TEST1': 'spider', 'TEST2': 'spider'}
|
||||
project_settings = {'TEST1': 'project', 'TEST3': 'project'}
|
||||
self.spider_class.custom_settings = spider_settings
|
||||
settings = Settings(project_settings, priority='project')
|
||||
|
||||
self.spider_class.update_settings(settings)
|
||||
self.assertEqual(settings.get('TEST1'), 'spider')
|
||||
self.assertEqual(settings.get('TEST2'), 'spider')
|
||||
self.assertEqual(settings.get('TEST3'), 'project')
|
||||
|
||||
class InitSpiderTest(SpiderTest):
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user