1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-06 11:00:46 +00:00

Enable RUF Ruff rules.

This commit is contained in:
Andrey Rakhmatullin 2025-01-01 21:31:04 +05:00
parent ee239d2451
commit 838ff99f37
28 changed files with 62 additions and 51 deletions

View File

@ -246,6 +246,8 @@ extend-select = [
"RET",
# flake8-raise
"RSE",
# Ruff-specific rules
"RUF",
# flake8-bandit
"S",
# flake8-slots
@ -324,6 +326,14 @@ ignore = [
"PLR2004",
# `for` loop variable overwritten by assignment target
"PLW2901",
# String contains ambiguous {}.
"RUF001",
# Docstring contains ambiguous {}.
"RUF002",
# Comment contains ambiguous {}.
"RUF003",
# Mutable class attributes should be annotated with `typing.ClassVar`
"RUF012",
# Use of `assert` detected; needed for mypy
"S101",
# FTP-related functions are being called; https://github.com/scrapy/scrapy/issues/4180

View File

@ -13,14 +13,14 @@ from scrapy.selector import Selector
from scrapy.spiders import Spider
__all__ = [
"Field",
"FormRequest",
"Item",
"Request",
"Selector",
"Spider",
"__version__",
"version_info",
"Spider",
"Request",
"FormRequest",
"Selector",
"Item",
"Field",
]

View File

@ -39,9 +39,8 @@ class Command(BaseRunSpiderCommand):
else:
self.crawler_process.start()
if (
self.crawler_process.bootstrap_failed
or hasattr(self.crawler_process, "has_exception")
if self.crawler_process.bootstrap_failed or (
hasattr(self.crawler_process, "has_exception")
and self.crawler_process.has_exception
):
self.exitcode = 1

View File

@ -269,7 +269,7 @@ class Command(BaseRunSpiderCommand):
assert self.crawler_process
assert self.spidercls
self.crawler_process.crawl(self.spidercls, **opts.spargs)
self.pcrawler = list(self.crawler_process.crawlers)[0]
self.pcrawler = next(iter(self.crawler_process.crawlers))
self.crawler_process.start()
if not self.first_response:

View File

@ -20,7 +20,7 @@ def _import_file(filepath: str | PathLike[str]) -> ModuleType:
if abspath.suffix not in (".py", ".pyw"):
raise ValueError(f"Not a Python source file: {abspath}")
dirname = str(abspath.parent)
sys.path = [dirname] + sys.path
sys.path = [dirname, *sys.path]
try:
module = import_module(abspath.stem)
finally:

View File

@ -101,12 +101,14 @@ class BaseRedirectMiddleware:
if ttl and redirects <= self.max_redirect_times:
redirected.meta["redirect_times"] = redirects
redirected.meta["redirect_ttl"] = ttl - 1
redirected.meta["redirect_urls"] = request.meta.get("redirect_urls", []) + [
request.url
redirected.meta["redirect_urls"] = [
*request.meta.get("redirect_urls", []),
request.url,
]
redirected.meta["redirect_reasons"] = [
*request.meta.get("redirect_reasons", []),
reason,
]
redirected.meta["redirect_reasons"] = request.meta.get(
"redirect_reasons", []
) + [reason]
redirected.dont_filter = request.dont_filter
redirected.priority = request.priority + self.priority_adjust
logger.debug(

View File

@ -25,13 +25,13 @@ if TYPE_CHECKING:
__all__ = [
"BaseItemExporter",
"PprintItemExporter",
"PickleItemExporter",
"CsvItemExporter",
"XmlItemExporter",
"JsonLinesItemExporter",
"JsonItemExporter",
"JsonLinesItemExporter",
"MarshalItemExporter",
"PickleItemExporter",
"PprintItemExporter",
"XmlItemExporter",
]

View File

@ -77,4 +77,4 @@ class Debugger:
def _enter_debugger(self, signum: int, frame: FrameType | None) -> None:
assert frame
Pdb().set_trace(frame.f_back) # noqa: T100
Pdb().set_trace(frame.f_back)

View File

@ -20,7 +20,7 @@ if TYPE_CHECKING:
class JsonRequest(Request):
attributes: tuple[str, ...] = Request.attributes + ("dumps_kwargs",)
attributes: tuple[str, ...] = (*Request.attributes, "dumps_kwargs")
def __init__(
self, *args: Any, dumps_kwargs: dict[str, Any] | None = None, **kwargs: Any

View File

@ -43,7 +43,7 @@ class TextResponse(Response):
_DEFAULT_ENCODING = "ascii"
_cached_decoded_json = _NONE
attributes: tuple[str, ...] = Response.attributes + ("encoding",)
attributes: tuple[str, ...] = (*Response.attributes, "encoding")
def __init__(self, *args: Any, **kwargs: Any):
self._encoding: str | None = kwargs.pop("encoding", None)

View File

@ -24,7 +24,7 @@ class Link:
of the anchor tag.
"""
__slots__ = ["url", "text", "fragment", "nofollow"]
__slots__ = ["fragment", "nofollow", "text", "url"]
def __init__(
self, url: str, text: str = "", fragment: str = "", nofollow: bool = False

View File

@ -76,8 +76,8 @@ class LogFormatter:
self, request: Request, response: Response, spider: Spider
) -> LogFormatterResult:
"""Logs a message when the crawler finds a webpage."""
request_flags = f" {str(request.flags)}" if request.flags else ""
response_flags = f" {str(response.flags)}" if response.flags else ""
request_flags = f" {request.flags!s}" if request.flags else ""
response_flags = f" {response.flags!s}" if response.flags else ""
return {
"level": logging.DEBUG,
"msg": CRAWLEDMSG,

View File

@ -127,8 +127,7 @@ class MediaPipeline(ABC):
if (
not base_class_name
or class_name == base_class_name
or settings
and not settings.get(formatted_key)
or (settings and not settings.get(formatted_key))
):
return key
return formatted_key

View File

@ -195,8 +195,7 @@ class StrictOriginPolicy(ReferrerPolicy):
if (
self.tls_protected(response_url)
and self.potentially_trustworthy(request_url)
or not self.tls_protected(response_url)
):
) or not self.tls_protected(response_url):
return self.origin_referrer(response_url)
return None
@ -249,8 +248,7 @@ class StrictOriginWhenCrossOriginPolicy(ReferrerPolicy):
if (
self.tls_protected(response_url)
and self.potentially_trustworthy(request_url)
or not self.tls_protected(response_url)
):
) or not self.tls_protected(response_url):
return self.origin_referrer(response_url)
return None
@ -282,7 +280,7 @@ class DefaultReferrerPolicy(NoReferrerWhenDowngradePolicy):
using ``file://`` or ``s3://`` scheme.
"""
NOREFERRER_SCHEMES: tuple[str, ...] = LOCAL_SCHEMES + ("file", "s3")
NOREFERRER_SCHEMES: tuple[str, ...] = (*LOCAL_SCHEMES, "file", "s3")
name: str = POLICY_SCRAPY_DEFAULT

View File

@ -252,7 +252,9 @@ def is_generator_with_return_value(callable: Callable[..., Any]) -> bool:
def returns_none(return_node: ast.Return) -> bool:
value = return_node.value
return value is None or isinstance(value, ast.Constant) and value.value is None
return value is None or (
isinstance(value, ast.Constant) and value.value is None
)
if inspect.isgeneratorfunction(callable):
func = callable

View File

@ -100,7 +100,7 @@ def install_reactor(reactor_path: str, event_loop_path: str | None = None) -> No
asyncioreactor.install(eventloop=event_loop)
else:
*module, _ = reactor_path.split(".")
installer_path = module + ["install"]
installer_path = [*module, "install"]
installer = load_object(".".join(installer_path))
with suppress(error.ReactorAlreadyInstalledError):
installer()

View File

@ -229,7 +229,8 @@ def request_to_curl(request: Request) -> str:
cookies = f"--cookie '{cookie}'"
elif isinstance(request.cookies, list):
cookie = "; ".join(
f"{list(c.keys())[0]}={list(c.values())[0]}" for c in request.cookies
f"{next(iter(c.keys()))}={next(iter(c.values()))}"
for c in request.cookies
)
cookies = f"--cookie '{cookie}'"

View File

@ -31,7 +31,7 @@ class ProcessTest:
if settings is not None:
env["SCRAPY_SETTINGS_MODULE"] = settings
assert self.command
cmd = self.prefix + [self.command] + list(args)
cmd = [*self.prefix, self.command, *args]
pp = TestProcessProtocol()
pp.deferred.addCallback(self._process_finished, cmd, check_code)
reactor.spawnProcess(pp, cmd[0], cmd, env=env, path=self.cwd)

View File

@ -51,7 +51,7 @@ def url_is_from_any_domain(url: UrlT, domains: Iterable[str]) -> bool:
def url_is_from_spider(url: UrlT, spider: type[Spider]) -> bool:
"""Return True if the url belongs to the given spider"""
return url_is_from_any_domain(
url, [spider.name] + list(getattr(spider, "allowed_domains", []))
url, [spider.name, *getattr(spider, "allowed_domains", [])]
)

View File

@ -11,7 +11,7 @@ from scrapy.exceptions import ScrapyDeprecationWarning
from scrapy.settings.default_settings import LOG_VERSIONS
from scrapy.utils.ssl import get_openssl_version
_DEFAULT_SOFTWARE = ["Scrapy"] + LOG_VERSIONS
_DEFAULT_SOFTWARE = ["Scrapy", *LOG_VERSIONS]
def _version(item):

View File

@ -21,7 +21,7 @@ class CmdlineTest(unittest.TestCase):
def _execute(self, *new_args, **kwargs):
encoding = sys.stdout.encoding or "utf-8"
args = (sys.executable, "-m", "scrapy.cmdline") + new_args
args = (sys.executable, "-m", "scrapy.cmdline", *new_args)
proc = Popen(args, stdout=PIPE, stderr=PIPE, env=self.env, **kwargs)
comm = proc.communicate()[0].strip()
return comm.decode(encoding)

View File

@ -87,13 +87,13 @@ class ProjectTest(unittest.TestCase):
def call(self, *new_args, **kwargs):
with TemporaryFile() as out:
args = (sys.executable, "-m", "scrapy.cmdline") + new_args
args = (sys.executable, "-m", "scrapy.cmdline", *new_args)
return subprocess.call(
args, stdout=out, stderr=out, cwd=self.cwd, env=self.env, **kwargs
)
def proc(self, *new_args, **popen_kwargs):
args = (sys.executable, "-m", "scrapy.cmdline") + new_args
args = (sys.executable, "-m", "scrapy.cmdline", *new_args)
p = subprocess.Popen(
args,
cwd=popen_kwargs.pop("cwd", self.cwd),

View File

@ -647,7 +647,7 @@ class ScriptRunnerMixin:
def get_script_args(self, script_name: str, *script_args: str) -> list[str]:
script_path = self.script_dir / script_name
return [sys.executable, str(script_path)] + list(script_args)
return [sys.executable, str(script_path), *script_args]
def run_script(self, script_name: str, *script_args: str) -> str:
args = self.get_script_args(script_name, *script_args)

View File

@ -114,7 +114,7 @@ class RetryTest(unittest.TestCase):
def test_exception_to_retry_added(self):
exc = ValueError
settings_dict = {
"RETRY_EXCEPTIONS": list(RETRY_EXCEPTIONS) + [exc],
"RETRY_EXCEPTIONS": [*RETRY_EXCEPTIONS, exc],
}
crawler = get_crawler(Spider, settings_dict=settings_dict)
mw = RetryMiddleware.from_crawler(crawler)

View File

@ -31,7 +31,7 @@ class DownloaderSlotsSettingsTestSpider(MetaSpider):
def start_requests(self):
self.times = {None: []}
slots = list(self.custom_settings.get("DOWNLOAD_SLOTS", {}).keys()) + [None]
slots = [*self.custom_settings.get("DOWNLOAD_SLOTS", {}), None]
for slot in slots:
url = self.mockserver.url(f"/?downloader_slot={slot}")

View File

@ -116,7 +116,7 @@ class BaseItemExporterTest(unittest.TestCase):
)
ie = self._get_exporter(fields_to_export=["name"], encoding="latin-1")
_, name = list(ie._get_serialized_fields(self.i))[0]
_, name = next(iter(ie._get_serialized_fields(self.i)))
assert isinstance(name, str)
self.assertEqual(name, "John\xa3")

View File

@ -960,7 +960,7 @@ class XmlResponseTest(TextResponseTest):
class CustomResponse(TextResponse):
attributes = TextResponse.attributes + ("foo", "bar")
attributes = (*TextResponse.attributes, "foo", "bar")
def __init__(self, *args, **kwargs) -> None:
self.foo = kwargs.pop("foo", None)

View File

@ -61,11 +61,11 @@ Foo 1 oldest: 0s ago\n\n""",
)
def test_get_oldest(self):
o1 = Foo() # noqa: F841
o1 = Foo()
o1_time = time()
o2 = Bar() # noqa: F841
o2 = Bar()
o3_time = time()
if o3_time <= o1_time:
@ -80,9 +80,9 @@ Foo 1 oldest: 0s ago\n\n""",
self.assertIsNone(trackref.get_oldest("XXX"))
def test_iter_all(self):
o1 = Foo() # noqa: F841
o1 = Foo()
o2 = Bar() # noqa: F841
o3 = Foo() # noqa: F841
o3 = Foo()
self.assertEqual(
set(trackref.iter_all("Foo")),
{o1, o3},