mirror of
https://github.com/scrapy/scrapy.git
synced 2025-03-13 10:33:49 +00:00
Tests: use classes instead of paths in settings (#4817)
This commit is contained in:
parent
4f27c5f82b
commit
797a6690c0
@ -43,7 +43,7 @@ class RFPDupeFilterTest(unittest.TestCase):
|
||||
|
||||
def test_df_from_crawler_scheduler(self):
|
||||
settings = {'DUPEFILTER_DEBUG': True,
|
||||
'DUPEFILTER_CLASS': __name__ + '.FromCrawlerRFPDupeFilter'}
|
||||
'DUPEFILTER_CLASS': FromCrawlerRFPDupeFilter}
|
||||
crawler = get_crawler(settings_dict=settings)
|
||||
scheduler = Scheduler.from_crawler(crawler)
|
||||
self.assertTrue(scheduler.df.debug)
|
||||
@ -51,14 +51,14 @@ class RFPDupeFilterTest(unittest.TestCase):
|
||||
|
||||
def test_df_from_settings_scheduler(self):
|
||||
settings = {'DUPEFILTER_DEBUG': True,
|
||||
'DUPEFILTER_CLASS': __name__ + '.FromSettingsRFPDupeFilter'}
|
||||
'DUPEFILTER_CLASS': FromSettingsRFPDupeFilter}
|
||||
crawler = get_crawler(settings_dict=settings)
|
||||
scheduler = Scheduler.from_crawler(crawler)
|
||||
self.assertTrue(scheduler.df.debug)
|
||||
self.assertEqual(scheduler.df.method, 'from_settings')
|
||||
|
||||
def test_df_direct_scheduler(self):
|
||||
settings = {'DUPEFILTER_CLASS': __name__ + '.DirectDupeFilter'}
|
||||
settings = {'DUPEFILTER_CLASS': DirectDupeFilter}
|
||||
crawler = get_crawler(settings_dict=settings)
|
||||
scheduler = Scheduler.from_crawler(crawler)
|
||||
self.assertEqual(scheduler.df.method, 'n/a')
|
||||
@ -162,7 +162,7 @@ class RFPDupeFilterTest(unittest.TestCase):
|
||||
def test_log(self):
|
||||
with LogCapture() as log:
|
||||
settings = {'DUPEFILTER_DEBUG': False,
|
||||
'DUPEFILTER_CLASS': __name__ + '.FromCrawlerRFPDupeFilter'}
|
||||
'DUPEFILTER_CLASS': FromCrawlerRFPDupeFilter}
|
||||
crawler = get_crawler(SimpleSpider, settings_dict=settings)
|
||||
scheduler = Scheduler.from_crawler(crawler)
|
||||
spider = SimpleSpider.from_crawler(crawler)
|
||||
@ -191,7 +191,7 @@ class RFPDupeFilterTest(unittest.TestCase):
|
||||
def test_log_debug(self):
|
||||
with LogCapture() as log:
|
||||
settings = {'DUPEFILTER_DEBUG': True,
|
||||
'DUPEFILTER_CLASS': __name__ + '.FromCrawlerRFPDupeFilter'}
|
||||
'DUPEFILTER_CLASS': FromCrawlerRFPDupeFilter}
|
||||
crawler = get_crawler(SimpleSpider, settings_dict=settings)
|
||||
scheduler = Scheduler.from_crawler(crawler)
|
||||
spider = SimpleSpider.from_crawler(crawler)
|
||||
|
@ -193,7 +193,7 @@ class ShowOrSkipMessagesTestCase(TwistedTestCase):
|
||||
self.base_settings = {
|
||||
'LOG_LEVEL': 'DEBUG',
|
||||
'ITEM_PIPELINES': {
|
||||
__name__ + '.DropSomeItemsPipeline': 300,
|
||||
DropSomeItemsPipeline: 300,
|
||||
},
|
||||
}
|
||||
|
||||
@ -212,7 +212,7 @@ class ShowOrSkipMessagesTestCase(TwistedTestCase):
|
||||
@defer.inlineCallbacks
|
||||
def test_skip_messages(self):
|
||||
settings = self.base_settings.copy()
|
||||
settings['LOG_FORMATTER'] = __name__ + '.SkipMessagesLogFormatter'
|
||||
settings['LOG_FORMATTER'] = SkipMessagesLogFormatter
|
||||
crawler = CrawlerRunner(settings).create_crawler(ItemSpider)
|
||||
with LogCapture() as lc:
|
||||
yield crawler.crawl(mockserver=self.mockserver)
|
||||
|
@ -68,7 +68,7 @@ class PipelineTestCase(unittest.TestCase):
|
||||
|
||||
def _create_crawler(self, pipeline_class):
|
||||
settings = {
|
||||
'ITEM_PIPELINES': {__name__ + '.' + pipeline_class.__name__: 1},
|
||||
'ITEM_PIPELINES': {pipeline_class: 1},
|
||||
}
|
||||
crawler = get_crawler(ItemSpider, settings)
|
||||
crawler.signals.connect(self._on_item_scraped, signals.item_scraped)
|
||||
|
@ -92,7 +92,7 @@ class CrawlTestCase(TestCase):
|
||||
url = self.mockserver.url("/status?n=200")
|
||||
runner = CrawlerRunner(settings={
|
||||
"DOWNLOADER_MIDDLEWARES": {
|
||||
__name__ + ".RaiseExceptionRequestMiddleware": 590,
|
||||
RaiseExceptionRequestMiddleware: 590,
|
||||
},
|
||||
})
|
||||
crawler = runner.create_crawler(SingleRequestSpider)
|
||||
@ -119,7 +119,7 @@ class CrawlTestCase(TestCase):
|
||||
url = self.mockserver.url("/status?n=200")
|
||||
runner = CrawlerRunner(settings={
|
||||
"DOWNLOADER_MIDDLEWARES": {
|
||||
__name__ + ".ProcessResponseMiddleware": 595,
|
||||
ProcessResponseMiddleware: 595,
|
||||
}
|
||||
})
|
||||
crawler = runner.create_crawler(SingleRequestSpider)
|
||||
@ -149,8 +149,8 @@ class CrawlTestCase(TestCase):
|
||||
url = self.mockserver.url("/status?n=200")
|
||||
runner = CrawlerRunner(settings={
|
||||
"DOWNLOADER_MIDDLEWARES": {
|
||||
__name__ + ".RaiseExceptionRequestMiddleware": 590,
|
||||
__name__ + ".CatchExceptionOverrideRequestMiddleware": 595,
|
||||
RaiseExceptionRequestMiddleware: 590,
|
||||
CatchExceptionOverrideRequestMiddleware: 595,
|
||||
},
|
||||
})
|
||||
crawler = runner.create_crawler(SingleRequestSpider)
|
||||
@ -170,8 +170,8 @@ class CrawlTestCase(TestCase):
|
||||
url = self.mockserver.url("/status?n=200")
|
||||
runner = CrawlerRunner(settings={
|
||||
"DOWNLOADER_MIDDLEWARES": {
|
||||
__name__ + ".RaiseExceptionRequestMiddleware": 590,
|
||||
__name__ + ".CatchExceptionDoNotOverrideRequestMiddleware": 595,
|
||||
RaiseExceptionRequestMiddleware: 590,
|
||||
CatchExceptionDoNotOverrideRequestMiddleware: 595,
|
||||
},
|
||||
})
|
||||
crawler = runner.create_crawler(SingleRequestSpider)
|
||||
@ -188,7 +188,7 @@ class CrawlTestCase(TestCase):
|
||||
"""
|
||||
runner = CrawlerRunner(settings={
|
||||
"DOWNLOADER_MIDDLEWARES": {
|
||||
__name__ + ".AlternativeCallbacksMiddleware": 595,
|
||||
AlternativeCallbacksMiddleware: 595,
|
||||
}
|
||||
})
|
||||
crawler = runner.create_crawler(AlternativeCallbacksSpider)
|
||||
|
@ -50,10 +50,10 @@ class KeywordArgumentsSpider(MockServerSpider):
|
||||
name = 'kwargs'
|
||||
custom_settings = {
|
||||
'DOWNLOADER_MIDDLEWARES': {
|
||||
__name__ + '.InjectArgumentsDownloaderMiddleware': 750,
|
||||
InjectArgumentsDownloaderMiddleware: 750,
|
||||
},
|
||||
'SPIDER_MIDDLEWARES': {
|
||||
__name__ + '.InjectArgumentsSpiderMiddleware': 750,
|
||||
InjectArgumentsSpiderMiddleware: 750,
|
||||
},
|
||||
}
|
||||
|
||||
|
@ -16,11 +16,20 @@ class LogExceptionMiddleware:
|
||||
|
||||
# ================================================================================
|
||||
# (0) recover from an exception on a spider callback
|
||||
class RecoveryMiddleware:
|
||||
def process_spider_exception(self, response, exception, spider):
|
||||
spider.logger.info('Middleware: %s exception caught', exception.__class__.__name__)
|
||||
return [
|
||||
{'from': 'process_spider_exception'},
|
||||
Request(response.url, meta={'dont_fail': True}, dont_filter=True),
|
||||
]
|
||||
|
||||
|
||||
class RecoverySpider(Spider):
|
||||
name = 'RecoverySpider'
|
||||
custom_settings = {
|
||||
'SPIDER_MIDDLEWARES': {
|
||||
__name__ + '.RecoveryMiddleware': 10,
|
||||
RecoveryMiddleware: 10,
|
||||
},
|
||||
}
|
||||
|
||||
@ -34,15 +43,6 @@ class RecoverySpider(Spider):
|
||||
raise TabError()
|
||||
|
||||
|
||||
class RecoveryMiddleware:
|
||||
def process_spider_exception(self, response, exception, spider):
|
||||
spider.logger.info('Middleware: %s exception caught', exception.__class__.__name__)
|
||||
return [
|
||||
{'from': 'process_spider_exception'},
|
||||
Request(response.url, meta={'dont_fail': True}, dont_filter=True),
|
||||
]
|
||||
|
||||
|
||||
# ================================================================================
|
||||
# (1) exceptions from a spider middleware's process_spider_input method
|
||||
class FailProcessSpiderInputMiddleware:
|
||||
@ -56,9 +56,8 @@ class ProcessSpiderInputSpiderWithoutErrback(Spider):
|
||||
custom_settings = {
|
||||
'SPIDER_MIDDLEWARES': {
|
||||
# spider
|
||||
__name__ + '.LogExceptionMiddleware': 10,
|
||||
__name__ + '.FailProcessSpiderInputMiddleware': 8,
|
||||
__name__ + '.LogExceptionMiddleware': 6,
|
||||
FailProcessSpiderInputMiddleware: 8,
|
||||
LogExceptionMiddleware: 6,
|
||||
# engine
|
||||
}
|
||||
}
|
||||
@ -87,7 +86,7 @@ class GeneratorCallbackSpider(Spider):
|
||||
name = 'GeneratorCallbackSpider'
|
||||
custom_settings = {
|
||||
'SPIDER_MIDDLEWARES': {
|
||||
__name__ + '.LogExceptionMiddleware': 10,
|
||||
LogExceptionMiddleware: 10,
|
||||
},
|
||||
}
|
||||
|
||||
@ -106,7 +105,7 @@ class GeneratorCallbackSpiderMiddlewareRightAfterSpider(GeneratorCallbackSpider)
|
||||
name = 'GeneratorCallbackSpiderMiddlewareRightAfterSpider'
|
||||
custom_settings = {
|
||||
'SPIDER_MIDDLEWARES': {
|
||||
__name__ + '.LogExceptionMiddleware': 100000,
|
||||
LogExceptionMiddleware: 100000,
|
||||
},
|
||||
}
|
||||
|
||||
@ -117,7 +116,7 @@ class NotGeneratorCallbackSpider(Spider):
|
||||
name = 'NotGeneratorCallbackSpider'
|
||||
custom_settings = {
|
||||
'SPIDER_MIDDLEWARES': {
|
||||
__name__ + '.LogExceptionMiddleware': 10,
|
||||
LogExceptionMiddleware: 10,
|
||||
},
|
||||
}
|
||||
|
||||
@ -134,32 +133,13 @@ class NotGeneratorCallbackSpiderMiddlewareRightAfterSpider(NotGeneratorCallbackS
|
||||
name = 'NotGeneratorCallbackSpiderMiddlewareRightAfterSpider'
|
||||
custom_settings = {
|
||||
'SPIDER_MIDDLEWARES': {
|
||||
__name__ + '.LogExceptionMiddleware': 100000,
|
||||
LogExceptionMiddleware: 100000,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
# ================================================================================
|
||||
# (4) exceptions from a middleware process_spider_output method (generator)
|
||||
class GeneratorOutputChainSpider(Spider):
|
||||
name = 'GeneratorOutputChainSpider'
|
||||
custom_settings = {
|
||||
'SPIDER_MIDDLEWARES': {
|
||||
__name__ + '.GeneratorFailMiddleware': 10,
|
||||
__name__ + '.GeneratorDoNothingAfterFailureMiddleware': 8,
|
||||
__name__ + '.GeneratorRecoverMiddleware': 5,
|
||||
__name__ + '.GeneratorDoNothingAfterRecoveryMiddleware': 3,
|
||||
},
|
||||
}
|
||||
|
||||
def start_requests(self):
|
||||
yield Request(self.mockserver.url('/status?n=200'))
|
||||
|
||||
def parse(self, response):
|
||||
yield {'processed': ['parse-first-item']}
|
||||
yield {'processed': ['parse-second-item']}
|
||||
|
||||
|
||||
class _GeneratorDoNothingMiddleware:
|
||||
def process_spider_output(self, response, result, spider):
|
||||
for r in result:
|
||||
@ -205,26 +185,28 @@ class GeneratorDoNothingAfterRecoveryMiddleware(_GeneratorDoNothingMiddleware):
|
||||
pass
|
||||
|
||||
|
||||
# ================================================================================
|
||||
# (5) exceptions from a middleware process_spider_output method (not generator)
|
||||
class NotGeneratorOutputChainSpider(Spider):
|
||||
name = 'NotGeneratorOutputChainSpider'
|
||||
class GeneratorOutputChainSpider(Spider):
|
||||
name = 'GeneratorOutputChainSpider'
|
||||
custom_settings = {
|
||||
'SPIDER_MIDDLEWARES': {
|
||||
__name__ + '.NotGeneratorFailMiddleware': 10,
|
||||
__name__ + '.NotGeneratorDoNothingAfterFailureMiddleware': 8,
|
||||
__name__ + '.NotGeneratorRecoverMiddleware': 5,
|
||||
__name__ + '.NotGeneratorDoNothingAfterRecoveryMiddleware': 3,
|
||||
GeneratorFailMiddleware: 10,
|
||||
GeneratorDoNothingAfterFailureMiddleware: 8,
|
||||
GeneratorRecoverMiddleware: 5,
|
||||
GeneratorDoNothingAfterRecoveryMiddleware: 3,
|
||||
},
|
||||
}
|
||||
|
||||
def start_requests(self):
|
||||
return [Request(self.mockserver.url('/status?n=200'))]
|
||||
yield Request(self.mockserver.url('/status?n=200'))
|
||||
|
||||
def parse(self, response):
|
||||
return [{'processed': ['parse-first-item']}, {'processed': ['parse-second-item']}]
|
||||
yield {'processed': ['parse-first-item']}
|
||||
yield {'processed': ['parse-second-item']}
|
||||
|
||||
|
||||
# ================================================================================
|
||||
# (5) exceptions from a middleware process_spider_output method (not generator)
|
||||
|
||||
class _NotGeneratorDoNothingMiddleware:
|
||||
def process_spider_output(self, response, result, spider):
|
||||
out = []
|
||||
@ -276,6 +258,24 @@ class NotGeneratorDoNothingAfterRecoveryMiddleware(_NotGeneratorDoNothingMiddlew
|
||||
pass
|
||||
|
||||
|
||||
class NotGeneratorOutputChainSpider(Spider):
|
||||
name = 'NotGeneratorOutputChainSpider'
|
||||
custom_settings = {
|
||||
'SPIDER_MIDDLEWARES': {
|
||||
NotGeneratorFailMiddleware: 10,
|
||||
NotGeneratorDoNothingAfterFailureMiddleware: 8,
|
||||
NotGeneratorRecoverMiddleware: 5,
|
||||
NotGeneratorDoNothingAfterRecoveryMiddleware: 3,
|
||||
},
|
||||
}
|
||||
|
||||
def start_requests(self):
|
||||
return [Request(self.mockserver.url('/status?n=200'))]
|
||||
|
||||
def parse(self, response):
|
||||
return [{'processed': ['parse-first-item']}, {'processed': ['parse-second-item']}]
|
||||
|
||||
|
||||
# ================================================================================
|
||||
class TestSpiderMiddleware(TestCase):
|
||||
@classmethod
|
||||
|
Loading…
x
Reference in New Issue
Block a user