1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-14 14:05:01 +00:00

Make METAREFRESH_IGNORE_TAGS an empty list by default

This commit is contained in:
Adrián Chaves 2020-02-06 10:43:20 +01:00
parent 22f7934fcc
commit 576663e5a7
3 changed files with 11 additions and 9 deletions

View File

@ -868,7 +868,7 @@ Whether the Meta Refresh middleware will be enabled.
METAREFRESH_IGNORE_TAGS
^^^^^^^^^^^^^^^^^^^^^^^
Default: ``['script', 'noscript']``
Default: ``[]``
Meta tags within these tags are ignored.

View File

@ -225,7 +225,7 @@ MEMUSAGE_NOTIFY_MAIL = []
MEMUSAGE_WARNING_MB = 0
METAREFRESH_ENABLED = True
METAREFRESH_IGNORE_TAGS = ['script', 'noscript']
METAREFRESH_IGNORE_TAGS = []
METAREFRESH_MAXDELAY = 100
NEWSPIDER_MODULE = ''

View File

@ -300,19 +300,21 @@ class MetaRefreshMiddlewareTest(unittest.TestCase):
body = ('''<noscript><meta http-equiv="refresh" '''
'''content="0;URL='http://example.org/newpage'"></noscript>''')
rsp = HtmlResponse(req.url, body=body.encode())
response = self.mw.process_response(req, rsp, self.spider)
assert isinstance(response, Response)
req2 = self.mw.process_response(req, rsp, self.spider)
assert isinstance(req2, Request)
self.assertEqual(req2.url, 'http://example.org/newpage')
def test_ignore_tags_empty_list(self):
crawler = get_crawler(Spider, {'METAREFRESH_IGNORE_TAGS': []})
def test_ignore_tags_1_x_list(self):
"""Test that Scrapy 1.x behavior remains possible"""
settings = {'METAREFRESH_IGNORE_TAGS': ['script', 'noscript']}
crawler = get_crawler(Spider, settings)
mw = MetaRefreshMiddleware.from_crawler(crawler)
req = Request(url='http://example.org')
body = ('''<noscript><meta http-equiv="refresh" '''
'''content="0;URL='http://example.org/newpage'"></noscript>''')
rsp = HtmlResponse(req.url, body=body.encode())
req2 = mw.process_response(req, rsp, self.spider)
assert isinstance(req2, Request)
self.assertEqual(req2.url, 'http://example.org/newpage')
response = mw.process_response(req, rsp, self.spider)
assert isinstance(response, Response)
if __name__ == "__main__":
unittest.main()