Enable RUF Ruff rules.

2025-02-06 11:00:46 +00:00 · 2025-01-01 21:31:04 +05:00 · 2025-01-01 21:31:04 +05:00 · 838ff99f37
commit 838ff99f37
parent ee239d2451
28 changed files with 62 additions and 51 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@ -246,6 +246,8 @@ extend-select = [
    "RET",
    # flake8-raise
    "RSE",
+    # Ruff-specific rules
+    "RUF",
    # flake8-bandit
    "S",
    # flake8-slots
@ -324,6 +326,14 @@ ignore = [
    "PLR2004",
    # `for` loop variable overwritten by assignment target
    "PLW2901",
+    # String contains ambiguous {}.
+    "RUF001",
+    # Docstring contains ambiguous {}.
+    "RUF002",
+    # Comment contains ambiguous {}.
+    "RUF003",
+    # Mutable class attributes should be annotated with `typing.ClassVar`
+    "RUF012",
    # Use of `assert` detected; needed for mypy
    "S101",
    # FTP-related functions are being called; https://github.com/scrapy/scrapy/issues/4180
--- a/scrapy/init.py
+++ b/scrapy/init.py
@ -13,14 +13,14 @@ from scrapy.selector import Selector
 from scrapy.spiders import Spider

 __all__ = [
+    "Field",
+    "FormRequest",
+    "Item",
+    "Request",
+    "Selector",
+    "Spider",
    "__version__",
    "version_info",
-    "Spider",
-    "Request",
-    "FormRequest",
-    "Selector",
-    "Item",
-    "Field",
 ]


--- a/scrapy/commands/crawl.py
+++ b/scrapy/commands/crawl.py
@ -39,9 +39,8 @@ class Command(BaseRunSpiderCommand):
        else:
            self.crawler_process.start()

-            if (
-                self.crawler_process.bootstrap_failed
-                or hasattr(self.crawler_process, "has_exception")
+            if self.crawler_process.bootstrap_failed or (
+                hasattr(self.crawler_process, "has_exception")
                and self.crawler_process.has_exception
            ):
                self.exitcode = 1
--- a/scrapy/commands/parse.py
+++ b/scrapy/commands/parse.py
@ -269,7 +269,7 @@ class Command(BaseRunSpiderCommand):
        assert self.crawler_process
        assert self.spidercls
        self.crawler_process.crawl(self.spidercls, **opts.spargs)
-        self.pcrawler = list(self.crawler_process.crawlers)[0]
+        self.pcrawler = next(iter(self.crawler_process.crawlers))
        self.crawler_process.start()

        if not self.first_response:
--- a/scrapy/commands/runspider.py
+++ b/scrapy/commands/runspider.py
@ -20,7 +20,7 @@ def _import_file(filepath: str | PathLike[str]) -> ModuleType:
    if abspath.suffix not in (".py", ".pyw"):
        raise ValueError(f"Not a Python source file: {abspath}")
    dirname = str(abspath.parent)
-    sys.path = [dirname] + sys.path
+    sys.path = [dirname, *sys.path]
    try:
        module = import_module(abspath.stem)
    finally:
--- a/scrapy/downloadermiddlewares/redirect.py
+++ b/scrapy/downloadermiddlewares/redirect.py
@ -101,12 +101,14 @@ class BaseRedirectMiddleware:
        if ttl and redirects <= self.max_redirect_times:
            redirected.meta["redirect_times"] = redirects
            redirected.meta["redirect_ttl"] = ttl - 1
-            redirected.meta["redirect_urls"] = request.meta.get("redirect_urls", []) + [
-                request.url
+            redirected.meta["redirect_urls"] = [
+                *request.meta.get("redirect_urls", []),
+                request.url,
+            ]
+            redirected.meta["redirect_reasons"] = [
+                *request.meta.get("redirect_reasons", []),
+                reason,
            ]
-            redirected.meta["redirect_reasons"] = request.meta.get(
-                "redirect_reasons", []
-            ) + [reason]
            redirected.dont_filter = request.dont_filter
            redirected.priority = request.priority + self.priority_adjust
            logger.debug(
--- a/scrapy/exporters.py
+++ b/scrapy/exporters.py
@ -25,13 +25,13 @@ if TYPE_CHECKING:

 __all__ = [
    "BaseItemExporter",
-    "PprintItemExporter",
-    "PickleItemExporter",
    "CsvItemExporter",
-    "XmlItemExporter",
-    "JsonLinesItemExporter",
    "JsonItemExporter",
+    "JsonLinesItemExporter",
    "MarshalItemExporter",
+    "PickleItemExporter",
+    "PprintItemExporter",
+    "XmlItemExporter",
 ]


--- a/scrapy/extensions/debug.py
+++ b/scrapy/extensions/debug.py
@ -77,4 +77,4 @@ class Debugger:

    def _enter_debugger(self, signum: int, frame: FrameType | None) -> None:
        assert frame
-        Pdb().set_trace(frame.f_back)  # noqa: T100
+        Pdb().set_trace(frame.f_back)
--- a/scrapy/http/request/json_request.py
+++ b/scrapy/http/request/json_request.py
@ -20,7 +20,7 @@ if TYPE_CHECKING:


 class JsonRequest(Request):
-    attributes: tuple[str, ...] = Request.attributes + ("dumps_kwargs",)
+    attributes: tuple[str, ...] = (*Request.attributes, "dumps_kwargs")

    def __init__(
        self, *args: Any, dumps_kwargs: dict[str, Any] | None = None, **kwargs: Any
--- a/scrapy/http/response/text.py
+++ b/scrapy/http/response/text.py
@ -43,7 +43,7 @@ class TextResponse(Response):
    _DEFAULT_ENCODING = "ascii"
    _cached_decoded_json = _NONE

-    attributes: tuple[str, ...] = Response.attributes + ("encoding",)
+    attributes: tuple[str, ...] = (*Response.attributes, "encoding")

    def __init__(self, *args: Any, **kwargs: Any):
        self._encoding: str | None = kwargs.pop("encoding", None)
--- a/scrapy/link.py
+++ b/scrapy/link.py
@ -24,7 +24,7 @@ class Link:
                    of the anchor tag.
    """

-    __slots__ = ["url", "text", "fragment", "nofollow"]
+    __slots__ = ["fragment", "nofollow", "text", "url"]

    def __init__(
        self, url: str, text: str = "", fragment: str = "", nofollow: bool = False
--- a/scrapy/logformatter.py
+++ b/scrapy/logformatter.py
@ -76,8 +76,8 @@ class LogFormatter:
        self, request: Request, response: Response, spider: Spider
    ) -> LogFormatterResult:
        """Logs a message when the crawler finds a webpage."""
-        request_flags = f" {str(request.flags)}" if request.flags else ""
-        response_flags = f" {str(response.flags)}" if response.flags else ""
+        request_flags = f" {request.flags!s}" if request.flags else ""
+        response_flags = f" {response.flags!s}" if response.flags else ""
        return {
            "level": logging.DEBUG,
            "msg": CRAWLEDMSG,
--- a/scrapy/pipelines/media.py
+++ b/scrapy/pipelines/media.py
@ -127,8 +127,7 @@ class MediaPipeline(ABC):
        if (
            not base_class_name
            or class_name == base_class_name
-            or settings
-            and not settings.get(formatted_key)
+            or (settings and not settings.get(formatted_key))
        ):
            return key
        return formatted_key
--- a/scrapy/spidermiddlewares/referer.py
+++ b/scrapy/spidermiddlewares/referer.py
@ -195,8 +195,7 @@ class StrictOriginPolicy(ReferrerPolicy):
        if (
            self.tls_protected(response_url)
            and self.potentially_trustworthy(request_url)
-            or not self.tls_protected(response_url)
-        ):
+        ) or not self.tls_protected(response_url):
            return self.origin_referrer(response_url)
        return None

@ -249,8 +248,7 @@ class StrictOriginWhenCrossOriginPolicy(ReferrerPolicy):
        if (
            self.tls_protected(response_url)
            and self.potentially_trustworthy(request_url)
-            or not self.tls_protected(response_url)
-        ):
+        ) or not self.tls_protected(response_url):
            return self.origin_referrer(response_url)
        return None

@ -282,7 +280,7 @@ class DefaultReferrerPolicy(NoReferrerWhenDowngradePolicy):
    using ``file://`` or ``s3://`` scheme.
    """

-    NOREFERRER_SCHEMES: tuple[str, ...] = LOCAL_SCHEMES + ("file", "s3")
+    NOREFERRER_SCHEMES: tuple[str, ...] = (*LOCAL_SCHEMES, "file", "s3")
    name: str = POLICY_SCRAPY_DEFAULT


--- a/scrapy/utils/misc.py
+++ b/scrapy/utils/misc.py
@ -252,7 +252,9 @@ def is_generator_with_return_value(callable: Callable[..., Any]) -> bool:

    def returns_none(return_node: ast.Return) -> bool:
        value = return_node.value
-        return value is None or isinstance(value, ast.Constant) and value.value is None
+        return value is None or (
+            isinstance(value, ast.Constant) and value.value is None
+        )

    if inspect.isgeneratorfunction(callable):
        func = callable
--- a/scrapy/utils/reactor.py
+++ b/scrapy/utils/reactor.py
@ -100,7 +100,7 @@ def install_reactor(reactor_path: str, event_loop_path: str | None = None) -> No
            asyncioreactor.install(eventloop=event_loop)
    else:
        *module, _ = reactor_path.split(".")
-        installer_path = module + ["install"]
+        installer_path = [*module, "install"]
        installer = load_object(".".join(installer_path))
        with suppress(error.ReactorAlreadyInstalledError):
            installer()
--- a/scrapy/utils/request.py
+++ b/scrapy/utils/request.py
@ -229,7 +229,8 @@ def request_to_curl(request: Request) -> str:
            cookies = f"--cookie '{cookie}'"
        elif isinstance(request.cookies, list):
            cookie = "; ".join(
-                f"{list(c.keys())[0]}={list(c.values())[0]}" for c in request.cookies
+                f"{next(iter(c.keys()))}={next(iter(c.values()))}"
+                for c in request.cookies
            )
            cookies = f"--cookie '{cookie}'"

--- a/scrapy/utils/testproc.py
+++ b/scrapy/utils/testproc.py
@ -31,7 +31,7 @@ class ProcessTest:
        if settings is not None:
            env["SCRAPY_SETTINGS_MODULE"] = settings
        assert self.command
-        cmd = self.prefix + [self.command] + list(args)
+        cmd = [*self.prefix, self.command, *args]
        pp = TestProcessProtocol()
        pp.deferred.addCallback(self._process_finished, cmd, check_code)
        reactor.spawnProcess(pp, cmd[0], cmd, env=env, path=self.cwd)
--- a/scrapy/utils/url.py
+++ b/scrapy/utils/url.py
@ -51,7 +51,7 @@ def url_is_from_any_domain(url: UrlT, domains: Iterable[str]) -> bool:
 def url_is_from_spider(url: UrlT, spider: type[Spider]) -> bool:
    """Return True if the url belongs to the given spider"""
    return url_is_from_any_domain(
-        url, [spider.name] + list(getattr(spider, "allowed_domains", []))
+        url, [spider.name, *getattr(spider, "allowed_domains", [])]
    )


--- a/scrapy/utils/versions.py
+++ b/scrapy/utils/versions.py
@ -11,7 +11,7 @@ from scrapy.exceptions import ScrapyDeprecationWarning
 from scrapy.settings.default_settings import LOG_VERSIONS
 from scrapy.utils.ssl import get_openssl_version

-_DEFAULT_SOFTWARE = ["Scrapy"] + LOG_VERSIONS
+_DEFAULT_SOFTWARE = ["Scrapy", *LOG_VERSIONS]


 def _version(item):
--- a/tests/test_cmdline/init.py
+++ b/tests/test_cmdline/init.py
@ -21,7 +21,7 @@ class CmdlineTest(unittest.TestCase):

    def _execute(self, *new_args, **kwargs):
        encoding = sys.stdout.encoding or "utf-8"
-        args = (sys.executable, "-m", "scrapy.cmdline") + new_args
+        args = (sys.executable, "-m", "scrapy.cmdline", *new_args)
        proc = Popen(args, stdout=PIPE, stderr=PIPE, env=self.env, **kwargs)
        comm = proc.communicate()[0].strip()
        return comm.decode(encoding)
--- a/tests/test_commands.py
+++ b/tests/test_commands.py
@ -87,13 +87,13 @@ class ProjectTest(unittest.TestCase):

    def call(self, *new_args, **kwargs):
        with TemporaryFile() as out:
-            args = (sys.executable, "-m", "scrapy.cmdline") + new_args
+            args = (sys.executable, "-m", "scrapy.cmdline", *new_args)
            return subprocess.call(
                args, stdout=out, stderr=out, cwd=self.cwd, env=self.env, **kwargs
            )

    def proc(self, *new_args, **popen_kwargs):
-        args = (sys.executable, "-m", "scrapy.cmdline") + new_args
+        args = (sys.executable, "-m", "scrapy.cmdline", *new_args)
        p = subprocess.Popen(
            args,
            cwd=popen_kwargs.pop("cwd", self.cwd),
--- a/tests/test_crawler.py
+++ b/tests/test_crawler.py
@ -647,7 +647,7 @@ class ScriptRunnerMixin:

    def get_script_args(self, script_name: str, *script_args: str) -> list[str]:
        script_path = self.script_dir / script_name
-        return [sys.executable, str(script_path)] + list(script_args)
+        return [sys.executable, str(script_path), *script_args]

    def run_script(self, script_name: str, *script_args: str) -> str:
        args = self.get_script_args(script_name, *script_args)
--- a/tests/test_downloadermiddleware_retry.py
+++ b/tests/test_downloadermiddleware_retry.py
@ -114,7 +114,7 @@ class RetryTest(unittest.TestCase):
    def test_exception_to_retry_added(self):
        exc = ValueError
        settings_dict = {
-            "RETRY_EXCEPTIONS": list(RETRY_EXCEPTIONS) + [exc],
+            "RETRY_EXCEPTIONS": [*RETRY_EXCEPTIONS, exc],
        }
        crawler = get_crawler(Spider, settings_dict=settings_dict)
        mw = RetryMiddleware.from_crawler(crawler)
--- a/tests/test_downloaderslotssettings.py
+++ b/tests/test_downloaderslotssettings.py
@ -31,7 +31,7 @@ class DownloaderSlotsSettingsTestSpider(MetaSpider):
    def start_requests(self):
        self.times = {None: []}

-        slots = list(self.custom_settings.get("DOWNLOAD_SLOTS", {}).keys()) + [None]
+        slots = [*self.custom_settings.get("DOWNLOAD_SLOTS", {}), None]

        for slot in slots:
            url = self.mockserver.url(f"/?downloader_slot={slot}")
--- a/tests/test_exporters.py
+++ b/tests/test_exporters.py
@ -116,7 +116,7 @@ class BaseItemExporterTest(unittest.TestCase):
        )

        ie = self._get_exporter(fields_to_export=["name"], encoding="latin-1")
-        _, name = list(ie._get_serialized_fields(self.i))[0]
+        _, name = next(iter(ie._get_serialized_fields(self.i)))
        assert isinstance(name, str)
        self.assertEqual(name, "John\xa3")

--- a/tests/test_http_response.py
+++ b/tests/test_http_response.py
@ -960,7 +960,7 @@ class XmlResponseTest(TextResponseTest):


 class CustomResponse(TextResponse):
-    attributes = TextResponse.attributes + ("foo", "bar")
+    attributes = (*TextResponse.attributes, "foo", "bar")

    def __init__(self, *args, **kwargs) -> None:
        self.foo = kwargs.pop("foo", None)
--- a/tests/test_utils_trackref.py
+++ b/tests/test_utils_trackref.py
@ -61,11 +61,11 @@ Foo                                 1   oldest: 0s ago\n\n""",
        )

    def test_get_oldest(self):
-        o1 = Foo()  # noqa: F841
+        o1 = Foo()

        o1_time = time()

-        o2 = Bar()  # noqa: F841
+        o2 = Bar()

        o3_time = time()
        if o3_time <= o1_time:
@ -80,9 +80,9 @@ Foo                                 1   oldest: 0s ago\n\n""",
        self.assertIsNone(trackref.get_oldest("XXX"))

    def test_iter_all(self):
-        o1 = Foo()  # noqa: F841
+        o1 = Foo()
        o2 = Bar()  # noqa: F841
-        o3 = Foo()  # noqa: F841
+        o3 = Foo()
        self.assertEqual(
            set(trackref.iter_all("Foo")),
            {o1, o3},