mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-06 11:00:46 +00:00
633 lines
23 KiB
Python
633 lines
23 KiB
Python
import unittest
|
|
import warnings
|
|
|
|
import pytest
|
|
|
|
from scrapy.linkextractors import IGNORED_EXTENSIONS
|
|
from scrapy.spiders import Spider
|
|
from scrapy.utils.misc import arg_to_iter
|
|
from scrapy.utils.url import ( # type: ignore[attr-defined]
|
|
_is_filesystem_path,
|
|
_public_w3lib_objects,
|
|
add_http_if_no_scheme,
|
|
guess_scheme,
|
|
strip_url,
|
|
url_has_any_extension,
|
|
url_is_from_any_domain,
|
|
url_is_from_spider,
|
|
)
|
|
|
|
__doctests__ = ["scrapy.utils.url"]
|
|
|
|
|
|
class UrlUtilsTest(unittest.TestCase):
|
|
def test_url_is_from_any_domain(self):
|
|
url = "http://www.wheele-bin-art.co.uk/get/product/123"
|
|
self.assertTrue(url_is_from_any_domain(url, ["wheele-bin-art.co.uk"]))
|
|
self.assertFalse(url_is_from_any_domain(url, ["art.co.uk"]))
|
|
|
|
url = "http://wheele-bin-art.co.uk/get/product/123"
|
|
self.assertTrue(url_is_from_any_domain(url, ["wheele-bin-art.co.uk"]))
|
|
self.assertFalse(url_is_from_any_domain(url, ["art.co.uk"]))
|
|
|
|
url = "http://www.Wheele-Bin-Art.co.uk/get/product/123"
|
|
self.assertTrue(url_is_from_any_domain(url, ["wheele-bin-art.CO.UK"]))
|
|
self.assertTrue(url_is_from_any_domain(url, ["WHEELE-BIN-ART.CO.UK"]))
|
|
|
|
url = "http://192.169.0.15:8080/mypage.html"
|
|
self.assertTrue(url_is_from_any_domain(url, ["192.169.0.15:8080"]))
|
|
self.assertFalse(url_is_from_any_domain(url, ["192.169.0.15"]))
|
|
|
|
url = (
|
|
"javascript:%20document.orderform_2581_1190810811.mode.value=%27add%27;%20"
|
|
"javascript:%20document.orderform_2581_1190810811.submit%28%29"
|
|
)
|
|
self.assertFalse(url_is_from_any_domain(url, ["testdomain.com"]))
|
|
self.assertFalse(
|
|
url_is_from_any_domain(url + ".testdomain.com", ["testdomain.com"])
|
|
)
|
|
|
|
def test_url_is_from_spider(self):
|
|
spider = Spider(name="example.com")
|
|
self.assertTrue(
|
|
url_is_from_spider("http://www.example.com/some/page.html", spider)
|
|
)
|
|
self.assertTrue(
|
|
url_is_from_spider("http://sub.example.com/some/page.html", spider)
|
|
)
|
|
self.assertFalse(
|
|
url_is_from_spider("http://www.example.org/some/page.html", spider)
|
|
)
|
|
self.assertFalse(
|
|
url_is_from_spider("http://www.example.net/some/page.html", spider)
|
|
)
|
|
|
|
def test_url_is_from_spider_class_attributes(self):
|
|
class MySpider(Spider):
|
|
name = "example.com"
|
|
|
|
self.assertTrue(
|
|
url_is_from_spider("http://www.example.com/some/page.html", MySpider)
|
|
)
|
|
self.assertTrue(
|
|
url_is_from_spider("http://sub.example.com/some/page.html", MySpider)
|
|
)
|
|
self.assertFalse(
|
|
url_is_from_spider("http://www.example.org/some/page.html", MySpider)
|
|
)
|
|
self.assertFalse(
|
|
url_is_from_spider("http://www.example.net/some/page.html", MySpider)
|
|
)
|
|
|
|
def test_url_is_from_spider_with_allowed_domains(self):
|
|
spider = Spider(
|
|
name="example.com", allowed_domains=["example.org", "example.net"]
|
|
)
|
|
self.assertTrue(
|
|
url_is_from_spider("http://www.example.com/some/page.html", spider)
|
|
)
|
|
self.assertTrue(
|
|
url_is_from_spider("http://sub.example.com/some/page.html", spider)
|
|
)
|
|
self.assertTrue(url_is_from_spider("http://example.com/some/page.html", spider))
|
|
self.assertTrue(
|
|
url_is_from_spider("http://www.example.org/some/page.html", spider)
|
|
)
|
|
self.assertTrue(
|
|
url_is_from_spider("http://www.example.net/some/page.html", spider)
|
|
)
|
|
self.assertFalse(
|
|
url_is_from_spider("http://www.example.us/some/page.html", spider)
|
|
)
|
|
|
|
spider = Spider(
|
|
name="example.com", allowed_domains={"example.com", "example.net"}
|
|
)
|
|
self.assertTrue(
|
|
url_is_from_spider("http://www.example.com/some/page.html", spider)
|
|
)
|
|
|
|
spider = Spider(
|
|
name="example.com", allowed_domains=("example.com", "example.net")
|
|
)
|
|
self.assertTrue(
|
|
url_is_from_spider("http://www.example.com/some/page.html", spider)
|
|
)
|
|
|
|
def test_url_is_from_spider_with_allowed_domains_class_attributes(self):
|
|
class MySpider(Spider):
|
|
name = "example.com"
|
|
allowed_domains = ("example.org", "example.net")
|
|
|
|
self.assertTrue(
|
|
url_is_from_spider("http://www.example.com/some/page.html", MySpider)
|
|
)
|
|
self.assertTrue(
|
|
url_is_from_spider("http://sub.example.com/some/page.html", MySpider)
|
|
)
|
|
self.assertTrue(
|
|
url_is_from_spider("http://example.com/some/page.html", MySpider)
|
|
)
|
|
self.assertTrue(
|
|
url_is_from_spider("http://www.example.org/some/page.html", MySpider)
|
|
)
|
|
self.assertTrue(
|
|
url_is_from_spider("http://www.example.net/some/page.html", MySpider)
|
|
)
|
|
self.assertFalse(
|
|
url_is_from_spider("http://www.example.us/some/page.html", MySpider)
|
|
)
|
|
|
|
def test_url_has_any_extension(self):
|
|
deny_extensions = {"." + e for e in arg_to_iter(IGNORED_EXTENSIONS)}
|
|
self.assertTrue(
|
|
url_has_any_extension(
|
|
"http://www.example.com/archive.tar.gz", deny_extensions
|
|
)
|
|
)
|
|
self.assertTrue(
|
|
url_has_any_extension("http://www.example.com/page.doc", deny_extensions)
|
|
)
|
|
self.assertTrue(
|
|
url_has_any_extension("http://www.example.com/page.pdf", deny_extensions)
|
|
)
|
|
self.assertFalse(
|
|
url_has_any_extension("http://www.example.com/page.htm", deny_extensions)
|
|
)
|
|
self.assertFalse(
|
|
url_has_any_extension("http://www.example.com/", deny_extensions)
|
|
)
|
|
self.assertFalse(
|
|
url_has_any_extension(
|
|
"http://www.example.com/page.doc.html", deny_extensions
|
|
)
|
|
)
|
|
|
|
|
|
class AddHttpIfNoScheme(unittest.TestCase):
|
|
def test_add_scheme(self):
|
|
self.assertEqual(
|
|
add_http_if_no_scheme("www.example.com"), "http://www.example.com"
|
|
)
|
|
|
|
def test_without_subdomain(self):
|
|
self.assertEqual(add_http_if_no_scheme("example.com"), "http://example.com")
|
|
|
|
def test_path(self):
|
|
self.assertEqual(
|
|
add_http_if_no_scheme("www.example.com/some/page.html"),
|
|
"http://www.example.com/some/page.html",
|
|
)
|
|
|
|
def test_port(self):
|
|
self.assertEqual(
|
|
add_http_if_no_scheme("www.example.com:80"), "http://www.example.com:80"
|
|
)
|
|
|
|
def test_fragment(self):
|
|
self.assertEqual(
|
|
add_http_if_no_scheme("www.example.com/some/page#frag"),
|
|
"http://www.example.com/some/page#frag",
|
|
)
|
|
|
|
def test_query(self):
|
|
self.assertEqual(
|
|
add_http_if_no_scheme("www.example.com/do?a=1&b=2&c=3"),
|
|
"http://www.example.com/do?a=1&b=2&c=3",
|
|
)
|
|
|
|
def test_username_password(self):
|
|
self.assertEqual(
|
|
add_http_if_no_scheme("username:password@www.example.com"),
|
|
"http://username:password@www.example.com",
|
|
)
|
|
|
|
def test_complete_url(self):
|
|
self.assertEqual(
|
|
add_http_if_no_scheme(
|
|
"username:password@www.example.com:80/some/page/do?a=1&b=2&c=3#frag"
|
|
),
|
|
"http://username:password@www.example.com:80/some/page/do?a=1&b=2&c=3#frag",
|
|
)
|
|
|
|
def test_preserve_http(self):
|
|
self.assertEqual(
|
|
add_http_if_no_scheme("http://www.example.com"), "http://www.example.com"
|
|
)
|
|
|
|
def test_preserve_http_without_subdomain(self):
|
|
self.assertEqual(
|
|
add_http_if_no_scheme("http://example.com"), "http://example.com"
|
|
)
|
|
|
|
def test_preserve_http_path(self):
|
|
self.assertEqual(
|
|
add_http_if_no_scheme("http://www.example.com/some/page.html"),
|
|
"http://www.example.com/some/page.html",
|
|
)
|
|
|
|
def test_preserve_http_port(self):
|
|
self.assertEqual(
|
|
add_http_if_no_scheme("http://www.example.com:80"),
|
|
"http://www.example.com:80",
|
|
)
|
|
|
|
def test_preserve_http_fragment(self):
|
|
self.assertEqual(
|
|
add_http_if_no_scheme("http://www.example.com/some/page#frag"),
|
|
"http://www.example.com/some/page#frag",
|
|
)
|
|
|
|
def test_preserve_http_query(self):
|
|
self.assertEqual(
|
|
add_http_if_no_scheme("http://www.example.com/do?a=1&b=2&c=3"),
|
|
"http://www.example.com/do?a=1&b=2&c=3",
|
|
)
|
|
|
|
def test_preserve_http_username_password(self):
|
|
self.assertEqual(
|
|
add_http_if_no_scheme("http://username:password@www.example.com"),
|
|
"http://username:password@www.example.com",
|
|
)
|
|
|
|
def test_preserve_http_complete_url(self):
|
|
self.assertEqual(
|
|
add_http_if_no_scheme(
|
|
"http://username:password@www.example.com:80/some/page/do?a=1&b=2&c=3#frag"
|
|
),
|
|
"http://username:password@www.example.com:80/some/page/do?a=1&b=2&c=3#frag",
|
|
)
|
|
|
|
def test_protocol_relative(self):
|
|
self.assertEqual(
|
|
add_http_if_no_scheme("//www.example.com"), "http://www.example.com"
|
|
)
|
|
|
|
def test_protocol_relative_without_subdomain(self):
|
|
self.assertEqual(add_http_if_no_scheme("//example.com"), "http://example.com")
|
|
|
|
def test_protocol_relative_path(self):
|
|
self.assertEqual(
|
|
add_http_if_no_scheme("//www.example.com/some/page.html"),
|
|
"http://www.example.com/some/page.html",
|
|
)
|
|
|
|
def test_protocol_relative_port(self):
|
|
self.assertEqual(
|
|
add_http_if_no_scheme("//www.example.com:80"), "http://www.example.com:80"
|
|
)
|
|
|
|
def test_protocol_relative_fragment(self):
|
|
self.assertEqual(
|
|
add_http_if_no_scheme("//www.example.com/some/page#frag"),
|
|
"http://www.example.com/some/page#frag",
|
|
)
|
|
|
|
def test_protocol_relative_query(self):
|
|
self.assertEqual(
|
|
add_http_if_no_scheme("//www.example.com/do?a=1&b=2&c=3"),
|
|
"http://www.example.com/do?a=1&b=2&c=3",
|
|
)
|
|
|
|
def test_protocol_relative_username_password(self):
|
|
self.assertEqual(
|
|
add_http_if_no_scheme("//username:password@www.example.com"),
|
|
"http://username:password@www.example.com",
|
|
)
|
|
|
|
def test_protocol_relative_complete_url(self):
|
|
self.assertEqual(
|
|
add_http_if_no_scheme(
|
|
"//username:password@www.example.com:80/some/page/do?a=1&b=2&c=3#frag"
|
|
),
|
|
"http://username:password@www.example.com:80/some/page/do?a=1&b=2&c=3#frag",
|
|
)
|
|
|
|
def test_preserve_https(self):
|
|
self.assertEqual(
|
|
add_http_if_no_scheme("https://www.example.com"), "https://www.example.com"
|
|
)
|
|
|
|
def test_preserve_ftp(self):
|
|
self.assertEqual(
|
|
add_http_if_no_scheme("ftp://www.example.com"), "ftp://www.example.com"
|
|
)
|
|
|
|
|
|
class GuessSchemeTest(unittest.TestCase):
|
|
pass
|
|
|
|
|
|
def create_guess_scheme_t(args):
|
|
def do_expected(self):
|
|
url = guess_scheme(args[0])
|
|
assert url.startswith(args[1]), (
|
|
f"Wrong scheme guessed: for `{args[0]}` got `{url}`, expected `{args[1]}...`"
|
|
)
|
|
|
|
return do_expected
|
|
|
|
|
|
def create_skipped_scheme_t(args):
|
|
def do_expected(self):
|
|
raise unittest.SkipTest(args[2])
|
|
|
|
return do_expected
|
|
|
|
|
|
for k, args in enumerate(
|
|
[
|
|
("/index", "file://"),
|
|
("/index.html", "file://"),
|
|
("./index.html", "file://"),
|
|
("../index.html", "file://"),
|
|
("../../index.html", "file://"),
|
|
("./data/index.html", "file://"),
|
|
(".hidden/data/index.html", "file://"),
|
|
("/home/user/www/index.html", "file://"),
|
|
("//home/user/www/index.html", "file://"),
|
|
("file:///home/user/www/index.html", "file://"),
|
|
("index.html", "http://"),
|
|
("example.com", "http://"),
|
|
("www.example.com", "http://"),
|
|
("www.example.com/index.html", "http://"),
|
|
("http://example.com", "http://"),
|
|
("http://example.com/index.html", "http://"),
|
|
("localhost", "http://"),
|
|
("localhost/index.html", "http://"),
|
|
# some corner cases (default to http://)
|
|
("/", "http://"),
|
|
(".../test", "http://"),
|
|
],
|
|
start=1,
|
|
):
|
|
t_method = create_guess_scheme_t(args)
|
|
t_method.__name__ = f"test_uri_{k:03}"
|
|
setattr(GuessSchemeTest, t_method.__name__, t_method)
|
|
|
|
# TODO: the following tests do not pass with current implementation
|
|
for k, skip_args in enumerate(
|
|
[
|
|
(
|
|
r"C:\absolute\path\to\a\file.html",
|
|
"file://",
|
|
"Windows filepath are not supported for scrapy shell",
|
|
),
|
|
],
|
|
start=1,
|
|
):
|
|
t_method = create_skipped_scheme_t(skip_args)
|
|
t_method.__name__ = f"test_uri_skipped_{k:03}"
|
|
setattr(GuessSchemeTest, t_method.__name__, t_method)
|
|
|
|
|
|
class StripUrl(unittest.TestCase):
|
|
def test_noop(self):
|
|
self.assertEqual(
|
|
strip_url("http://www.example.com/index.html"),
|
|
"http://www.example.com/index.html",
|
|
)
|
|
|
|
def test_noop_query_string(self):
|
|
self.assertEqual(
|
|
strip_url("http://www.example.com/index.html?somekey=somevalue"),
|
|
"http://www.example.com/index.html?somekey=somevalue",
|
|
)
|
|
|
|
def test_fragments(self):
|
|
self.assertEqual(
|
|
strip_url(
|
|
"http://www.example.com/index.html?somekey=somevalue#section",
|
|
strip_fragment=False,
|
|
),
|
|
"http://www.example.com/index.html?somekey=somevalue#section",
|
|
)
|
|
|
|
def test_path(self):
|
|
for input_url, origin, output_url in [
|
|
("http://www.example.com/", False, "http://www.example.com/"),
|
|
("http://www.example.com", False, "http://www.example.com"),
|
|
("http://www.example.com", True, "http://www.example.com/"),
|
|
]:
|
|
self.assertEqual(strip_url(input_url, origin_only=origin), output_url)
|
|
|
|
def test_credentials(self):
|
|
for i, o in [
|
|
(
|
|
"http://username@www.example.com/index.html?somekey=somevalue#section",
|
|
"http://www.example.com/index.html?somekey=somevalue",
|
|
),
|
|
(
|
|
"https://username:@www.example.com/index.html?somekey=somevalue#section",
|
|
"https://www.example.com/index.html?somekey=somevalue",
|
|
),
|
|
(
|
|
"ftp://username:password@www.example.com/index.html?somekey=somevalue#section",
|
|
"ftp://www.example.com/index.html?somekey=somevalue",
|
|
),
|
|
]:
|
|
self.assertEqual(strip_url(i, strip_credentials=True), o)
|
|
|
|
def test_credentials_encoded_delims(self):
|
|
for i, o in [
|
|
# user: "username@"
|
|
# password: none
|
|
(
|
|
"http://username%40@www.example.com/index.html?somekey=somevalue#section",
|
|
"http://www.example.com/index.html?somekey=somevalue",
|
|
),
|
|
# user: "username:pass"
|
|
# password: ""
|
|
(
|
|
"https://username%3Apass:@www.example.com/index.html?somekey=somevalue#section",
|
|
"https://www.example.com/index.html?somekey=somevalue",
|
|
),
|
|
# user: "me"
|
|
# password: "user@domain.com"
|
|
(
|
|
"ftp://me:user%40domain.com@www.example.com/index.html?somekey=somevalue#section",
|
|
"ftp://www.example.com/index.html?somekey=somevalue",
|
|
),
|
|
]:
|
|
self.assertEqual(strip_url(i, strip_credentials=True), o)
|
|
|
|
def test_default_ports_creds_off(self):
|
|
for i, o in [
|
|
(
|
|
"http://username:password@www.example.com:80/index.html?somekey=somevalue#section",
|
|
"http://www.example.com/index.html?somekey=somevalue",
|
|
),
|
|
(
|
|
"http://username:password@www.example.com:8080/index.html#section",
|
|
"http://www.example.com:8080/index.html",
|
|
),
|
|
(
|
|
"http://username:password@www.example.com:443/index.html?somekey=somevalue&someotherkey=sov#section",
|
|
"http://www.example.com:443/index.html?somekey=somevalue&someotherkey=sov",
|
|
),
|
|
(
|
|
"https://username:password@www.example.com:443/index.html",
|
|
"https://www.example.com/index.html",
|
|
),
|
|
(
|
|
"https://username:password@www.example.com:442/index.html",
|
|
"https://www.example.com:442/index.html",
|
|
),
|
|
(
|
|
"https://username:password@www.example.com:80/index.html",
|
|
"https://www.example.com:80/index.html",
|
|
),
|
|
(
|
|
"ftp://username:password@www.example.com:21/file.txt",
|
|
"ftp://www.example.com/file.txt",
|
|
),
|
|
(
|
|
"ftp://username:password@www.example.com:221/file.txt",
|
|
"ftp://www.example.com:221/file.txt",
|
|
),
|
|
]:
|
|
self.assertEqual(strip_url(i), o)
|
|
|
|
def test_default_ports(self):
|
|
for i, o in [
|
|
(
|
|
"http://username:password@www.example.com:80/index.html",
|
|
"http://username:password@www.example.com/index.html",
|
|
),
|
|
(
|
|
"http://username:password@www.example.com:8080/index.html",
|
|
"http://username:password@www.example.com:8080/index.html",
|
|
),
|
|
(
|
|
"http://username:password@www.example.com:443/index.html",
|
|
"http://username:password@www.example.com:443/index.html",
|
|
),
|
|
(
|
|
"https://username:password@www.example.com:443/index.html",
|
|
"https://username:password@www.example.com/index.html",
|
|
),
|
|
(
|
|
"https://username:password@www.example.com:442/index.html",
|
|
"https://username:password@www.example.com:442/index.html",
|
|
),
|
|
(
|
|
"https://username:password@www.example.com:80/index.html",
|
|
"https://username:password@www.example.com:80/index.html",
|
|
),
|
|
(
|
|
"ftp://username:password@www.example.com:21/file.txt",
|
|
"ftp://username:password@www.example.com/file.txt",
|
|
),
|
|
(
|
|
"ftp://username:password@www.example.com:221/file.txt",
|
|
"ftp://username:password@www.example.com:221/file.txt",
|
|
),
|
|
]:
|
|
self.assertEqual(
|
|
strip_url(i, strip_default_port=True, strip_credentials=False), o
|
|
)
|
|
|
|
def test_default_ports_keep(self):
|
|
for i, o in [
|
|
(
|
|
"http://username:password@www.example.com:80/index.html?somekey=somevalue&someotherkey=sov#section",
|
|
"http://username:password@www.example.com:80/index.html?somekey=somevalue&someotherkey=sov",
|
|
),
|
|
(
|
|
"http://username:password@www.example.com:8080/index.html?somekey=somevalue&someotherkey=sov#section",
|
|
"http://username:password@www.example.com:8080/index.html?somekey=somevalue&someotherkey=sov",
|
|
),
|
|
(
|
|
"http://username:password@www.example.com:443/index.html",
|
|
"http://username:password@www.example.com:443/index.html",
|
|
),
|
|
(
|
|
"https://username:password@www.example.com:443/index.html",
|
|
"https://username:password@www.example.com:443/index.html",
|
|
),
|
|
(
|
|
"https://username:password@www.example.com:442/index.html",
|
|
"https://username:password@www.example.com:442/index.html",
|
|
),
|
|
(
|
|
"https://username:password@www.example.com:80/index.html",
|
|
"https://username:password@www.example.com:80/index.html",
|
|
),
|
|
(
|
|
"ftp://username:password@www.example.com:21/file.txt",
|
|
"ftp://username:password@www.example.com:21/file.txt",
|
|
),
|
|
(
|
|
"ftp://username:password@www.example.com:221/file.txt",
|
|
"ftp://username:password@www.example.com:221/file.txt",
|
|
),
|
|
]:
|
|
self.assertEqual(
|
|
strip_url(i, strip_default_port=False, strip_credentials=False), o
|
|
)
|
|
|
|
def test_origin_only(self):
|
|
for i, o in [
|
|
(
|
|
"http://username:password@www.example.com/index.html",
|
|
"http://www.example.com/",
|
|
),
|
|
(
|
|
"http://username:password@www.example.com:80/foo/bar?query=value#somefrag",
|
|
"http://www.example.com/",
|
|
),
|
|
(
|
|
"http://username:password@www.example.com:8008/foo/bar?query=value#somefrag",
|
|
"http://www.example.com:8008/",
|
|
),
|
|
(
|
|
"https://username:password@www.example.com:443/index.html",
|
|
"https://www.example.com/",
|
|
),
|
|
]:
|
|
self.assertEqual(strip_url(i, origin_only=True), o)
|
|
|
|
|
|
class IsPathTestCase(unittest.TestCase):
|
|
def test_path(self):
|
|
for input_value, output_value in (
|
|
# https://en.wikipedia.org/wiki/Path_(computing)#Representations_of_paths_by_operating_system_and_shell
|
|
# Unix-like OS, Microsoft Windows / cmd.exe
|
|
("/home/user/docs/Letter.txt", True),
|
|
("./inthisdir", True),
|
|
("../../greatgrandparent", True),
|
|
("~/.rcinfo", True),
|
|
(r"C:\user\docs\Letter.txt", True),
|
|
("/user/docs/Letter.txt", True),
|
|
(r"C:\Letter.txt", True),
|
|
(r"\\Server01\user\docs\Letter.txt", True),
|
|
(r"\\?\UNC\Server01\user\docs\Letter.txt", True),
|
|
(r"\\?\C:\user\docs\Letter.txt", True),
|
|
(r"C:\user\docs\somefile.ext:alternate_stream_name", True),
|
|
(r"https://example.com", False),
|
|
):
|
|
self.assertEqual(
|
|
_is_filesystem_path(input_value), output_value, input_value
|
|
)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"obj_name",
|
|
[
|
|
"_unquotepath",
|
|
"_safe_chars",
|
|
"parse_url",
|
|
*_public_w3lib_objects,
|
|
],
|
|
)
|
|
def test_deprecated_imports_from_w3lib(obj_name):
|
|
with warnings.catch_warnings(record=True) as warns:
|
|
obj_type = "attribute" if obj_name == "_safe_chars" else "function"
|
|
message = f"The scrapy.utils.url.{obj_name} {obj_type} is deprecated, use w3lib.url.{obj_name} instead."
|
|
|
|
from importlib import import_module
|
|
|
|
getattr(import_module("scrapy.utils.url"), obj_name)
|
|
|
|
assert message in warns[0].message.args
|