mirror of
https://github.com/scrapy/scrapy.git
synced 2025-03-14 15:48:38 +00:00
Test and address ReDoS attack vectors for open_in_browser
This commit is contained in:
parent
40b3efbbee
commit
1533b69032
@ -103,9 +103,9 @@ def open_in_browser(
|
||||
body = response.body
|
||||
if isinstance(response, HtmlResponse):
|
||||
if b"<base" not in body:
|
||||
repl = rf'\1<base href="{response.url}">'
|
||||
body = re.sub(b"<!--.{,1024}?-->", b"", body, flags=re.DOTALL)
|
||||
body = re.sub(rb"(<head(?:>|\s.{,1024}?>))", to_bytes(repl), body)
|
||||
repl = rf'\0<base href="{response.url}">'
|
||||
body = re.sub(b"(?s)<!--.*?(?:-->|$)", b"", body)
|
||||
body = re.sub(rb"<head(?:[^<>]*?>)", to_bytes(repl), body, count=1)
|
||||
ext = ".html"
|
||||
elif isinstance(response, TextResponse):
|
||||
ext = ".txt"
|
||||
|
@ -1,10 +1,12 @@
|
||||
import unittest
|
||||
import warnings
|
||||
from pathlib import Path
|
||||
from time import process_time
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from scrapy.exceptions import ScrapyDeprecationWarning
|
||||
from scrapy.http import HtmlResponse, Response, TextResponse
|
||||
from scrapy.settings.default_settings import DOWNLOAD_MAXSIZE
|
||||
from scrapy.utils.python import to_bytes
|
||||
from scrapy.utils.response import (
|
||||
get_base_url,
|
||||
@ -198,3 +200,37 @@ class ResponseUtilsTest(unittest.TestCase):
|
||||
assert open_in_browser(
|
||||
r5, _openfunc=check_base_url
|
||||
), "Inject unique base url with conditional comment"
|
||||
|
||||
def test_open_in_browser_redos_comment(self):
|
||||
MAX_CPU_TIME = 30
|
||||
|
||||
# Exploit input from
|
||||
# https://makenowjust-labs.github.io/recheck/playground/
|
||||
# for /<!--.*?-->/ (old pattern to remove comments).
|
||||
body = b"-><!--\x00" * (int(DOWNLOAD_MAXSIZE / 7) - 10) + b"->\n<!---->"
|
||||
|
||||
response = HtmlResponse("https://example.com", body=body)
|
||||
|
||||
start_time = process_time()
|
||||
|
||||
open_in_browser(response, lambda url: True)
|
||||
|
||||
end_time = process_time()
|
||||
self.assertLess(end_time - start_time, MAX_CPU_TIME)
|
||||
|
||||
def test_open_in_browser_redos_head(self):
|
||||
MAX_CPU_TIME = 15
|
||||
|
||||
# Exploit input from
|
||||
# https://makenowjust-labs.github.io/recheck/playground/
|
||||
# for /(<head(?:>|\s.*?>))/ (old pattern to find the head element).
|
||||
body = b"<head\t" * int(DOWNLOAD_MAXSIZE / 6)
|
||||
|
||||
response = HtmlResponse("https://example.com", body=body)
|
||||
|
||||
start_time = process_time()
|
||||
|
||||
open_in_browser(response, lambda url: True)
|
||||
|
||||
end_time = process_time()
|
||||
self.assertLess(end_time - start_time, MAX_CPU_TIME)
|
||||
|
Loading…
x
Reference in New Issue
Block a user