1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-03-13 10:33:49 +00:00

Tests: use classes instead of paths in settings ()

This commit is contained in:
Eugenio Lacuesta 2020-10-01 23:11:11 -03:00 committed by GitHub
parent 4f27c5f82b
commit 797a6690c0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 63 additions and 63 deletions

@ -43,7 +43,7 @@ class RFPDupeFilterTest(unittest.TestCase):
def test_df_from_crawler_scheduler(self):
settings = {'DUPEFILTER_DEBUG': True,
'DUPEFILTER_CLASS': __name__ + '.FromCrawlerRFPDupeFilter'}
'DUPEFILTER_CLASS': FromCrawlerRFPDupeFilter}
crawler = get_crawler(settings_dict=settings)
scheduler = Scheduler.from_crawler(crawler)
self.assertTrue(scheduler.df.debug)
@ -51,14 +51,14 @@ class RFPDupeFilterTest(unittest.TestCase):
def test_df_from_settings_scheduler(self):
settings = {'DUPEFILTER_DEBUG': True,
'DUPEFILTER_CLASS': __name__ + '.FromSettingsRFPDupeFilter'}
'DUPEFILTER_CLASS': FromSettingsRFPDupeFilter}
crawler = get_crawler(settings_dict=settings)
scheduler = Scheduler.from_crawler(crawler)
self.assertTrue(scheduler.df.debug)
self.assertEqual(scheduler.df.method, 'from_settings')
def test_df_direct_scheduler(self):
settings = {'DUPEFILTER_CLASS': __name__ + '.DirectDupeFilter'}
settings = {'DUPEFILTER_CLASS': DirectDupeFilter}
crawler = get_crawler(settings_dict=settings)
scheduler = Scheduler.from_crawler(crawler)
self.assertEqual(scheduler.df.method, 'n/a')
@ -162,7 +162,7 @@ class RFPDupeFilterTest(unittest.TestCase):
def test_log(self):
with LogCapture() as log:
settings = {'DUPEFILTER_DEBUG': False,
'DUPEFILTER_CLASS': __name__ + '.FromCrawlerRFPDupeFilter'}
'DUPEFILTER_CLASS': FromCrawlerRFPDupeFilter}
crawler = get_crawler(SimpleSpider, settings_dict=settings)
scheduler = Scheduler.from_crawler(crawler)
spider = SimpleSpider.from_crawler(crawler)
@ -191,7 +191,7 @@ class RFPDupeFilterTest(unittest.TestCase):
def test_log_debug(self):
with LogCapture() as log:
settings = {'DUPEFILTER_DEBUG': True,
'DUPEFILTER_CLASS': __name__ + '.FromCrawlerRFPDupeFilter'}
'DUPEFILTER_CLASS': FromCrawlerRFPDupeFilter}
crawler = get_crawler(SimpleSpider, settings_dict=settings)
scheduler = Scheduler.from_crawler(crawler)
spider = SimpleSpider.from_crawler(crawler)

@ -193,7 +193,7 @@ class ShowOrSkipMessagesTestCase(TwistedTestCase):
self.base_settings = {
'LOG_LEVEL': 'DEBUG',
'ITEM_PIPELINES': {
__name__ + '.DropSomeItemsPipeline': 300,
DropSomeItemsPipeline: 300,
},
}
@ -212,7 +212,7 @@ class ShowOrSkipMessagesTestCase(TwistedTestCase):
@defer.inlineCallbacks
def test_skip_messages(self):
settings = self.base_settings.copy()
settings['LOG_FORMATTER'] = __name__ + '.SkipMessagesLogFormatter'
settings['LOG_FORMATTER'] = SkipMessagesLogFormatter
crawler = CrawlerRunner(settings).create_crawler(ItemSpider)
with LogCapture() as lc:
yield crawler.crawl(mockserver=self.mockserver)

@ -68,7 +68,7 @@ class PipelineTestCase(unittest.TestCase):
def _create_crawler(self, pipeline_class):
settings = {
'ITEM_PIPELINES': {__name__ + '.' + pipeline_class.__name__: 1},
'ITEM_PIPELINES': {pipeline_class: 1},
}
crawler = get_crawler(ItemSpider, settings)
crawler.signals.connect(self._on_item_scraped, signals.item_scraped)

@ -92,7 +92,7 @@ class CrawlTestCase(TestCase):
url = self.mockserver.url("/status?n=200")
runner = CrawlerRunner(settings={
"DOWNLOADER_MIDDLEWARES": {
__name__ + ".RaiseExceptionRequestMiddleware": 590,
RaiseExceptionRequestMiddleware: 590,
},
})
crawler = runner.create_crawler(SingleRequestSpider)
@ -119,7 +119,7 @@ class CrawlTestCase(TestCase):
url = self.mockserver.url("/status?n=200")
runner = CrawlerRunner(settings={
"DOWNLOADER_MIDDLEWARES": {
__name__ + ".ProcessResponseMiddleware": 595,
ProcessResponseMiddleware: 595,
}
})
crawler = runner.create_crawler(SingleRequestSpider)
@ -149,8 +149,8 @@ class CrawlTestCase(TestCase):
url = self.mockserver.url("/status?n=200")
runner = CrawlerRunner(settings={
"DOWNLOADER_MIDDLEWARES": {
__name__ + ".RaiseExceptionRequestMiddleware": 590,
__name__ + ".CatchExceptionOverrideRequestMiddleware": 595,
RaiseExceptionRequestMiddleware: 590,
CatchExceptionOverrideRequestMiddleware: 595,
},
})
crawler = runner.create_crawler(SingleRequestSpider)
@ -170,8 +170,8 @@ class CrawlTestCase(TestCase):
url = self.mockserver.url("/status?n=200")
runner = CrawlerRunner(settings={
"DOWNLOADER_MIDDLEWARES": {
__name__ + ".RaiseExceptionRequestMiddleware": 590,
__name__ + ".CatchExceptionDoNotOverrideRequestMiddleware": 595,
RaiseExceptionRequestMiddleware: 590,
CatchExceptionDoNotOverrideRequestMiddleware: 595,
},
})
crawler = runner.create_crawler(SingleRequestSpider)
@ -188,7 +188,7 @@ class CrawlTestCase(TestCase):
"""
runner = CrawlerRunner(settings={
"DOWNLOADER_MIDDLEWARES": {
__name__ + ".AlternativeCallbacksMiddleware": 595,
AlternativeCallbacksMiddleware: 595,
}
})
crawler = runner.create_crawler(AlternativeCallbacksSpider)

@ -50,10 +50,10 @@ class KeywordArgumentsSpider(MockServerSpider):
name = 'kwargs'
custom_settings = {
'DOWNLOADER_MIDDLEWARES': {
__name__ + '.InjectArgumentsDownloaderMiddleware': 750,
InjectArgumentsDownloaderMiddleware: 750,
},
'SPIDER_MIDDLEWARES': {
__name__ + '.InjectArgumentsSpiderMiddleware': 750,
InjectArgumentsSpiderMiddleware: 750,
},
}

@ -16,11 +16,20 @@ class LogExceptionMiddleware:
# ================================================================================
# (0) recover from an exception on a spider callback
class RecoveryMiddleware:
def process_spider_exception(self, response, exception, spider):
spider.logger.info('Middleware: %s exception caught', exception.__class__.__name__)
return [
{'from': 'process_spider_exception'},
Request(response.url, meta={'dont_fail': True}, dont_filter=True),
]
class RecoverySpider(Spider):
name = 'RecoverySpider'
custom_settings = {
'SPIDER_MIDDLEWARES': {
__name__ + '.RecoveryMiddleware': 10,
RecoveryMiddleware: 10,
},
}
@ -34,15 +43,6 @@ class RecoverySpider(Spider):
raise TabError()
class RecoveryMiddleware:
def process_spider_exception(self, response, exception, spider):
spider.logger.info('Middleware: %s exception caught', exception.__class__.__name__)
return [
{'from': 'process_spider_exception'},
Request(response.url, meta={'dont_fail': True}, dont_filter=True),
]
# ================================================================================
# (1) exceptions from a spider middleware's process_spider_input method
class FailProcessSpiderInputMiddleware:
@ -56,9 +56,8 @@ class ProcessSpiderInputSpiderWithoutErrback(Spider):
custom_settings = {
'SPIDER_MIDDLEWARES': {
# spider
__name__ + '.LogExceptionMiddleware': 10,
__name__ + '.FailProcessSpiderInputMiddleware': 8,
__name__ + '.LogExceptionMiddleware': 6,
FailProcessSpiderInputMiddleware: 8,
LogExceptionMiddleware: 6,
# engine
}
}
@ -87,7 +86,7 @@ class GeneratorCallbackSpider(Spider):
name = 'GeneratorCallbackSpider'
custom_settings = {
'SPIDER_MIDDLEWARES': {
__name__ + '.LogExceptionMiddleware': 10,
LogExceptionMiddleware: 10,
},
}
@ -106,7 +105,7 @@ class GeneratorCallbackSpiderMiddlewareRightAfterSpider(GeneratorCallbackSpider)
name = 'GeneratorCallbackSpiderMiddlewareRightAfterSpider'
custom_settings = {
'SPIDER_MIDDLEWARES': {
__name__ + '.LogExceptionMiddleware': 100000,
LogExceptionMiddleware: 100000,
},
}
@ -117,7 +116,7 @@ class NotGeneratorCallbackSpider(Spider):
name = 'NotGeneratorCallbackSpider'
custom_settings = {
'SPIDER_MIDDLEWARES': {
__name__ + '.LogExceptionMiddleware': 10,
LogExceptionMiddleware: 10,
},
}
@ -134,32 +133,13 @@ class NotGeneratorCallbackSpiderMiddlewareRightAfterSpider(NotGeneratorCallbackS
name = 'NotGeneratorCallbackSpiderMiddlewareRightAfterSpider'
custom_settings = {
'SPIDER_MIDDLEWARES': {
__name__ + '.LogExceptionMiddleware': 100000,
LogExceptionMiddleware: 100000,
},
}
# ================================================================================
# (4) exceptions from a middleware process_spider_output method (generator)
class GeneratorOutputChainSpider(Spider):
name = 'GeneratorOutputChainSpider'
custom_settings = {
'SPIDER_MIDDLEWARES': {
__name__ + '.GeneratorFailMiddleware': 10,
__name__ + '.GeneratorDoNothingAfterFailureMiddleware': 8,
__name__ + '.GeneratorRecoverMiddleware': 5,
__name__ + '.GeneratorDoNothingAfterRecoveryMiddleware': 3,
},
}
def start_requests(self):
yield Request(self.mockserver.url('/status?n=200'))
def parse(self, response):
yield {'processed': ['parse-first-item']}
yield {'processed': ['parse-second-item']}
class _GeneratorDoNothingMiddleware:
def process_spider_output(self, response, result, spider):
for r in result:
@ -205,26 +185,28 @@ class GeneratorDoNothingAfterRecoveryMiddleware(_GeneratorDoNothingMiddleware):
pass
# ================================================================================
# (5) exceptions from a middleware process_spider_output method (not generator)
class NotGeneratorOutputChainSpider(Spider):
name = 'NotGeneratorOutputChainSpider'
class GeneratorOutputChainSpider(Spider):
name = 'GeneratorOutputChainSpider'
custom_settings = {
'SPIDER_MIDDLEWARES': {
__name__ + '.NotGeneratorFailMiddleware': 10,
__name__ + '.NotGeneratorDoNothingAfterFailureMiddleware': 8,
__name__ + '.NotGeneratorRecoverMiddleware': 5,
__name__ + '.NotGeneratorDoNothingAfterRecoveryMiddleware': 3,
GeneratorFailMiddleware: 10,
GeneratorDoNothingAfterFailureMiddleware: 8,
GeneratorRecoverMiddleware: 5,
GeneratorDoNothingAfterRecoveryMiddleware: 3,
},
}
def start_requests(self):
return [Request(self.mockserver.url('/status?n=200'))]
yield Request(self.mockserver.url('/status?n=200'))
def parse(self, response):
return [{'processed': ['parse-first-item']}, {'processed': ['parse-second-item']}]
yield {'processed': ['parse-first-item']}
yield {'processed': ['parse-second-item']}
# ================================================================================
# (5) exceptions from a middleware process_spider_output method (not generator)
class _NotGeneratorDoNothingMiddleware:
def process_spider_output(self, response, result, spider):
out = []
@ -276,6 +258,24 @@ class NotGeneratorDoNothingAfterRecoveryMiddleware(_NotGeneratorDoNothingMiddlew
pass
class NotGeneratorOutputChainSpider(Spider):
name = 'NotGeneratorOutputChainSpider'
custom_settings = {
'SPIDER_MIDDLEWARES': {
NotGeneratorFailMiddleware: 10,
NotGeneratorDoNothingAfterFailureMiddleware: 8,
NotGeneratorRecoverMiddleware: 5,
NotGeneratorDoNothingAfterRecoveryMiddleware: 3,
},
}
def start_requests(self):
return [Request(self.mockserver.url('/status?n=200'))]
def parse(self, response):
return [{'processed': ['parse-first-item']}, {'processed': ['parse-second-item']}]
# ================================================================================
class TestSpiderMiddleware(TestCase):
@classmethod