1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-06 10:24:24 +00:00

Deprecate and disable escape_ajax().

This commit is contained in:
Andrey Rakhmatullin 2025-02-02 23:23:51 +05:00
parent 393ff96e45
commit 4842bcbf1d
4 changed files with 9 additions and 17 deletions

View File

@ -9,6 +9,7 @@ from w3lib import html
from scrapy.exceptions import NotConfigured, ScrapyDeprecationWarning
from scrapy.http import HtmlResponse, Response
from scrapy.utils.url import escape_ajax
if TYPE_CHECKING:
# typing.Self requires Python 3.11
@ -64,8 +65,7 @@ class AjaxCrawlMiddleware:
if not self._has_ajax_crawlable_variant(response):
return response
# scrapy already handles #! links properly
ajax_crawl_request = request.replace(url=request.url + "#!")
ajax_crawl_request = request.replace(url=escape_ajax(request.url + "#!"))
logger.debug(
"Downloading AJAX crawlable %(ajax_crawl_request)s instead of %(request)s",
{"ajax_crawl_request": ajax_crawl_request, "request": request},

View File

@ -27,7 +27,6 @@ from scrapy.http.headers import Headers
from scrapy.utils.curl import curl_to_request_kwargs
from scrapy.utils.python import to_bytes
from scrapy.utils.trackref import object_ref
from scrapy.utils.url import escape_ajax
if TYPE_CHECKING:
from collections.abc import Callable, Iterable, Mapping
@ -170,8 +169,7 @@ class Request(object_ref):
if not isinstance(url, str):
raise TypeError(f"Request url must be str, got {type(url).__name__}")
s = safe_url_string(url, self.encoding)
self._url = escape_ajax(s)
self._url = safe_url_string(url, self.encoding)
if (
"://" not in self._url

View File

@ -10,6 +10,7 @@ import warnings
from importlib import import_module
from typing import TYPE_CHECKING, Union
from urllib.parse import ParseResult, urldefrag, urlparse, urlunparse
from warnings import warn
from w3lib.url import __all__ as _public_w3lib_objects
from w3lib.url import add_or_replace_parameter as _add_or_replace_parameter
@ -83,6 +84,11 @@ def escape_ajax(url: str) -> str:
>>> escape_ajax("www.example.com/ajax.html")
'www.example.com/ajax.html'
"""
warn(
"escape_ajax() is deprecated and will be removed in a future Scrapy version.",
ScrapyDeprecationWarning,
stacklevel=2,
)
defrag, frag = urldefrag(url)
if not frag.startswith("!"):
return url

View File

@ -187,18 +187,6 @@ class RequestTest(unittest.TestCase):
assert isinstance(r4.body, bytes)
self.assertEqual(r4.body, b"Price: \xa3100")
def test_ajax_url(self):
# ascii url
r = self.request_class(url="http://www.example.com/ajax.html#!key=value")
self.assertEqual(
r.url, "http://www.example.com/ajax.html?_escaped_fragment_=key%3Dvalue"
)
# unicode url
r = self.request_class(url="http://www.example.com/ajax.html#!key=value")
self.assertEqual(
r.url, "http://www.example.com/ajax.html?_escaped_fragment_=key%3Dvalue"
)
def test_copy(self):
"""Test Request copy"""