Deprecate and disable escape_ajax().

2025-02-06 10:24:24 +00:00 · 2025-02-02 23:23:51 +05:00 · 2025-02-02 23:23:51 +05:00 · 4842bcbf1d
commit 4842bcbf1d
parent 393ff96e45
4 changed files with 9 additions and 17 deletions
--- a/scrapy/downloadermiddlewares/ajaxcrawl.py
+++ b/scrapy/downloadermiddlewares/ajaxcrawl.py
@ -9,6 +9,7 @@ from w3lib import html

 from scrapy.exceptions import NotConfigured, ScrapyDeprecationWarning
 from scrapy.http import HtmlResponse, Response
+from scrapy.utils.url import escape_ajax

 if TYPE_CHECKING:
    # typing.Self requires Python 3.11
@ -64,8 +65,7 @@ class AjaxCrawlMiddleware:
        if not self._has_ajax_crawlable_variant(response):
            return response

-        # scrapy already handles #! links properly
-        ajax_crawl_request = request.replace(url=request.url + "#!")
+        ajax_crawl_request = request.replace(url=escape_ajax(request.url + "#!"))
        logger.debug(
            "Downloading AJAX crawlable %(ajax_crawl_request)s instead of %(request)s",
            {"ajax_crawl_request": ajax_crawl_request, "request": request},
--- a/scrapy/http/request/init.py
+++ b/scrapy/http/request/init.py
@ -27,7 +27,6 @@ from scrapy.http.headers import Headers
 from scrapy.utils.curl import curl_to_request_kwargs
 from scrapy.utils.python import to_bytes
 from scrapy.utils.trackref import object_ref
-from scrapy.utils.url import escape_ajax

 if TYPE_CHECKING:
    from collections.abc import Callable, Iterable, Mapping
@ -170,8 +169,7 @@ class Request(object_ref):
        if not isinstance(url, str):
            raise TypeError(f"Request url must be str, got {type(url).__name__}")

-        s = safe_url_string(url, self.encoding)
-        self._url = escape_ajax(s)
+        self._url = safe_url_string(url, self.encoding)

        if (
            "://" not in self._url
--- a/scrapy/utils/url.py
+++ b/scrapy/utils/url.py
@ -10,6 +10,7 @@ import warnings
 from importlib import import_module
 from typing import TYPE_CHECKING, Union
 from urllib.parse import ParseResult, urldefrag, urlparse, urlunparse
+from warnings import warn

 from w3lib.url import __all__ as _public_w3lib_objects
 from w3lib.url import add_or_replace_parameter as _add_or_replace_parameter
@ -83,6 +84,11 @@ def escape_ajax(url: str) -> str:
    >>> escape_ajax("www.example.com/ajax.html")
    'www.example.com/ajax.html'
    """
+    warn(
+        "escape_ajax() is deprecated and will be removed in a future Scrapy version.",
+        ScrapyDeprecationWarning,
+        stacklevel=2,
+    )
    defrag, frag = urldefrag(url)
    if not frag.startswith("!"):
        return url
--- a/tests/test_http_request.py
+++ b/tests/test_http_request.py
@ -187,18 +187,6 @@ class RequestTest(unittest.TestCase):
        assert isinstance(r4.body, bytes)
        self.assertEqual(r4.body, b"Price: \xa3100")

-    def test_ajax_url(self):
-        # ascii url
-        r = self.request_class(url="http://www.example.com/ajax.html#!key=value")
-        self.assertEqual(
-            r.url, "http://www.example.com/ajax.html?_escaped_fragment_=key%3Dvalue"
-        )
-        # unicode url
-        r = self.request_class(url="http://www.example.com/ajax.html#!key=value")
-        self.assertEqual(
-            r.url, "http://www.example.com/ajax.html?_escaped_fragment_=key%3Dvalue"
-        )
-
    def test_copy(self):
        """Test Request copy"""