From 576663e5a778646aa4ef870641fa677ea94d21f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Thu, 6 Feb 2020 10:43:20 +0100 Subject: [PATCH] Make METAREFRESH_IGNORE_TAGS an empty list by default --- docs/topics/downloader-middleware.rst | 2 +- scrapy/settings/default_settings.py | 2 +- tests/test_downloadermiddleware_redirect.py | 16 +++++++++------- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/docs/topics/downloader-middleware.rst b/docs/topics/downloader-middleware.rst index 8a760e53b..3ec6e0c17 100644 --- a/docs/topics/downloader-middleware.rst +++ b/docs/topics/downloader-middleware.rst @@ -868,7 +868,7 @@ Whether the Meta Refresh middleware will be enabled. METAREFRESH_IGNORE_TAGS ^^^^^^^^^^^^^^^^^^^^^^^ -Default: ``['script', 'noscript']`` +Default: ``[]`` Meta tags within these tags are ignored. diff --git a/scrapy/settings/default_settings.py b/scrapy/settings/default_settings.py index c10dc1a1c..1a7d35b13 100644 --- a/scrapy/settings/default_settings.py +++ b/scrapy/settings/default_settings.py @@ -225,7 +225,7 @@ MEMUSAGE_NOTIFY_MAIL = [] MEMUSAGE_WARNING_MB = 0 METAREFRESH_ENABLED = True -METAREFRESH_IGNORE_TAGS = ['script', 'noscript'] +METAREFRESH_IGNORE_TAGS = [] METAREFRESH_MAXDELAY = 100 NEWSPIDER_MODULE = '' diff --git a/tests/test_downloadermiddleware_redirect.py b/tests/test_downloadermiddleware_redirect.py index e7faf14a7..e0f145d0e 100644 --- a/tests/test_downloadermiddleware_redirect.py +++ b/tests/test_downloadermiddleware_redirect.py @@ -300,19 +300,21 @@ class MetaRefreshMiddlewareTest(unittest.TestCase): body = ('''''') rsp = HtmlResponse(req.url, body=body.encode()) - response = self.mw.process_response(req, rsp, self.spider) - assert isinstance(response, Response) + req2 = self.mw.process_response(req, rsp, self.spider) + assert isinstance(req2, Request) + self.assertEqual(req2.url, 'http://example.org/newpage') - def test_ignore_tags_empty_list(self): - crawler = get_crawler(Spider, {'METAREFRESH_IGNORE_TAGS': []}) + def test_ignore_tags_1_x_list(self): + """Test that Scrapy 1.x behavior remains possible""" + settings = {'METAREFRESH_IGNORE_TAGS': ['script', 'noscript']} + crawler = get_crawler(Spider, settings) mw = MetaRefreshMiddleware.from_crawler(crawler) req = Request(url='http://example.org') body = ('''''') rsp = HtmlResponse(req.url, body=body.encode()) - req2 = mw.process_response(req, rsp, self.spider) - assert isinstance(req2, Request) - self.assertEqual(req2.url, 'http://example.org/newpage') + response = mw.process_response(req, rsp, self.spider) + assert isinstance(response, Response) if __name__ == "__main__": unittest.main()