mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-06 11:00:46 +00:00
Enable RUF Ruff rules.
This commit is contained in:
parent
ee239d2451
commit
838ff99f37
@ -246,6 +246,8 @@ extend-select = [
|
||||
"RET",
|
||||
# flake8-raise
|
||||
"RSE",
|
||||
# Ruff-specific rules
|
||||
"RUF",
|
||||
# flake8-bandit
|
||||
"S",
|
||||
# flake8-slots
|
||||
@ -324,6 +326,14 @@ ignore = [
|
||||
"PLR2004",
|
||||
# `for` loop variable overwritten by assignment target
|
||||
"PLW2901",
|
||||
# String contains ambiguous {}.
|
||||
"RUF001",
|
||||
# Docstring contains ambiguous {}.
|
||||
"RUF002",
|
||||
# Comment contains ambiguous {}.
|
||||
"RUF003",
|
||||
# Mutable class attributes should be annotated with `typing.ClassVar`
|
||||
"RUF012",
|
||||
# Use of `assert` detected; needed for mypy
|
||||
"S101",
|
||||
# FTP-related functions are being called; https://github.com/scrapy/scrapy/issues/4180
|
||||
|
@ -13,14 +13,14 @@ from scrapy.selector import Selector
|
||||
from scrapy.spiders import Spider
|
||||
|
||||
__all__ = [
|
||||
"Field",
|
||||
"FormRequest",
|
||||
"Item",
|
||||
"Request",
|
||||
"Selector",
|
||||
"Spider",
|
||||
"__version__",
|
||||
"version_info",
|
||||
"Spider",
|
||||
"Request",
|
||||
"FormRequest",
|
||||
"Selector",
|
||||
"Item",
|
||||
"Field",
|
||||
]
|
||||
|
||||
|
||||
|
@ -39,9 +39,8 @@ class Command(BaseRunSpiderCommand):
|
||||
else:
|
||||
self.crawler_process.start()
|
||||
|
||||
if (
|
||||
self.crawler_process.bootstrap_failed
|
||||
or hasattr(self.crawler_process, "has_exception")
|
||||
if self.crawler_process.bootstrap_failed or (
|
||||
hasattr(self.crawler_process, "has_exception")
|
||||
and self.crawler_process.has_exception
|
||||
):
|
||||
self.exitcode = 1
|
||||
|
@ -269,7 +269,7 @@ class Command(BaseRunSpiderCommand):
|
||||
assert self.crawler_process
|
||||
assert self.spidercls
|
||||
self.crawler_process.crawl(self.spidercls, **opts.spargs)
|
||||
self.pcrawler = list(self.crawler_process.crawlers)[0]
|
||||
self.pcrawler = next(iter(self.crawler_process.crawlers))
|
||||
self.crawler_process.start()
|
||||
|
||||
if not self.first_response:
|
||||
|
@ -20,7 +20,7 @@ def _import_file(filepath: str | PathLike[str]) -> ModuleType:
|
||||
if abspath.suffix not in (".py", ".pyw"):
|
||||
raise ValueError(f"Not a Python source file: {abspath}")
|
||||
dirname = str(abspath.parent)
|
||||
sys.path = [dirname] + sys.path
|
||||
sys.path = [dirname, *sys.path]
|
||||
try:
|
||||
module = import_module(abspath.stem)
|
||||
finally:
|
||||
|
@ -101,12 +101,14 @@ class BaseRedirectMiddleware:
|
||||
if ttl and redirects <= self.max_redirect_times:
|
||||
redirected.meta["redirect_times"] = redirects
|
||||
redirected.meta["redirect_ttl"] = ttl - 1
|
||||
redirected.meta["redirect_urls"] = request.meta.get("redirect_urls", []) + [
|
||||
request.url
|
||||
redirected.meta["redirect_urls"] = [
|
||||
*request.meta.get("redirect_urls", []),
|
||||
request.url,
|
||||
]
|
||||
redirected.meta["redirect_reasons"] = [
|
||||
*request.meta.get("redirect_reasons", []),
|
||||
reason,
|
||||
]
|
||||
redirected.meta["redirect_reasons"] = request.meta.get(
|
||||
"redirect_reasons", []
|
||||
) + [reason]
|
||||
redirected.dont_filter = request.dont_filter
|
||||
redirected.priority = request.priority + self.priority_adjust
|
||||
logger.debug(
|
||||
|
@ -25,13 +25,13 @@ if TYPE_CHECKING:
|
||||
|
||||
__all__ = [
|
||||
"BaseItemExporter",
|
||||
"PprintItemExporter",
|
||||
"PickleItemExporter",
|
||||
"CsvItemExporter",
|
||||
"XmlItemExporter",
|
||||
"JsonLinesItemExporter",
|
||||
"JsonItemExporter",
|
||||
"JsonLinesItemExporter",
|
||||
"MarshalItemExporter",
|
||||
"PickleItemExporter",
|
||||
"PprintItemExporter",
|
||||
"XmlItemExporter",
|
||||
]
|
||||
|
||||
|
||||
|
@ -77,4 +77,4 @@ class Debugger:
|
||||
|
||||
def _enter_debugger(self, signum: int, frame: FrameType | None) -> None:
|
||||
assert frame
|
||||
Pdb().set_trace(frame.f_back) # noqa: T100
|
||||
Pdb().set_trace(frame.f_back)
|
||||
|
@ -20,7 +20,7 @@ if TYPE_CHECKING:
|
||||
|
||||
|
||||
class JsonRequest(Request):
|
||||
attributes: tuple[str, ...] = Request.attributes + ("dumps_kwargs",)
|
||||
attributes: tuple[str, ...] = (*Request.attributes, "dumps_kwargs")
|
||||
|
||||
def __init__(
|
||||
self, *args: Any, dumps_kwargs: dict[str, Any] | None = None, **kwargs: Any
|
||||
|
@ -43,7 +43,7 @@ class TextResponse(Response):
|
||||
_DEFAULT_ENCODING = "ascii"
|
||||
_cached_decoded_json = _NONE
|
||||
|
||||
attributes: tuple[str, ...] = Response.attributes + ("encoding",)
|
||||
attributes: tuple[str, ...] = (*Response.attributes, "encoding")
|
||||
|
||||
def __init__(self, *args: Any, **kwargs: Any):
|
||||
self._encoding: str | None = kwargs.pop("encoding", None)
|
||||
|
@ -24,7 +24,7 @@ class Link:
|
||||
of the anchor tag.
|
||||
"""
|
||||
|
||||
__slots__ = ["url", "text", "fragment", "nofollow"]
|
||||
__slots__ = ["fragment", "nofollow", "text", "url"]
|
||||
|
||||
def __init__(
|
||||
self, url: str, text: str = "", fragment: str = "", nofollow: bool = False
|
||||
|
@ -76,8 +76,8 @@ class LogFormatter:
|
||||
self, request: Request, response: Response, spider: Spider
|
||||
) -> LogFormatterResult:
|
||||
"""Logs a message when the crawler finds a webpage."""
|
||||
request_flags = f" {str(request.flags)}" if request.flags else ""
|
||||
response_flags = f" {str(response.flags)}" if response.flags else ""
|
||||
request_flags = f" {request.flags!s}" if request.flags else ""
|
||||
response_flags = f" {response.flags!s}" if response.flags else ""
|
||||
return {
|
||||
"level": logging.DEBUG,
|
||||
"msg": CRAWLEDMSG,
|
||||
|
@ -127,8 +127,7 @@ class MediaPipeline(ABC):
|
||||
if (
|
||||
not base_class_name
|
||||
or class_name == base_class_name
|
||||
or settings
|
||||
and not settings.get(formatted_key)
|
||||
or (settings and not settings.get(formatted_key))
|
||||
):
|
||||
return key
|
||||
return formatted_key
|
||||
|
@ -195,8 +195,7 @@ class StrictOriginPolicy(ReferrerPolicy):
|
||||
if (
|
||||
self.tls_protected(response_url)
|
||||
and self.potentially_trustworthy(request_url)
|
||||
or not self.tls_protected(response_url)
|
||||
):
|
||||
) or not self.tls_protected(response_url):
|
||||
return self.origin_referrer(response_url)
|
||||
return None
|
||||
|
||||
@ -249,8 +248,7 @@ class StrictOriginWhenCrossOriginPolicy(ReferrerPolicy):
|
||||
if (
|
||||
self.tls_protected(response_url)
|
||||
and self.potentially_trustworthy(request_url)
|
||||
or not self.tls_protected(response_url)
|
||||
):
|
||||
) or not self.tls_protected(response_url):
|
||||
return self.origin_referrer(response_url)
|
||||
return None
|
||||
|
||||
@ -282,7 +280,7 @@ class DefaultReferrerPolicy(NoReferrerWhenDowngradePolicy):
|
||||
using ``file://`` or ``s3://`` scheme.
|
||||
"""
|
||||
|
||||
NOREFERRER_SCHEMES: tuple[str, ...] = LOCAL_SCHEMES + ("file", "s3")
|
||||
NOREFERRER_SCHEMES: tuple[str, ...] = (*LOCAL_SCHEMES, "file", "s3")
|
||||
name: str = POLICY_SCRAPY_DEFAULT
|
||||
|
||||
|
||||
|
@ -252,7 +252,9 @@ def is_generator_with_return_value(callable: Callable[..., Any]) -> bool:
|
||||
|
||||
def returns_none(return_node: ast.Return) -> bool:
|
||||
value = return_node.value
|
||||
return value is None or isinstance(value, ast.Constant) and value.value is None
|
||||
return value is None or (
|
||||
isinstance(value, ast.Constant) and value.value is None
|
||||
)
|
||||
|
||||
if inspect.isgeneratorfunction(callable):
|
||||
func = callable
|
||||
|
@ -100,7 +100,7 @@ def install_reactor(reactor_path: str, event_loop_path: str | None = None) -> No
|
||||
asyncioreactor.install(eventloop=event_loop)
|
||||
else:
|
||||
*module, _ = reactor_path.split(".")
|
||||
installer_path = module + ["install"]
|
||||
installer_path = [*module, "install"]
|
||||
installer = load_object(".".join(installer_path))
|
||||
with suppress(error.ReactorAlreadyInstalledError):
|
||||
installer()
|
||||
|
@ -229,7 +229,8 @@ def request_to_curl(request: Request) -> str:
|
||||
cookies = f"--cookie '{cookie}'"
|
||||
elif isinstance(request.cookies, list):
|
||||
cookie = "; ".join(
|
||||
f"{list(c.keys())[0]}={list(c.values())[0]}" for c in request.cookies
|
||||
f"{next(iter(c.keys()))}={next(iter(c.values()))}"
|
||||
for c in request.cookies
|
||||
)
|
||||
cookies = f"--cookie '{cookie}'"
|
||||
|
||||
|
@ -31,7 +31,7 @@ class ProcessTest:
|
||||
if settings is not None:
|
||||
env["SCRAPY_SETTINGS_MODULE"] = settings
|
||||
assert self.command
|
||||
cmd = self.prefix + [self.command] + list(args)
|
||||
cmd = [*self.prefix, self.command, *args]
|
||||
pp = TestProcessProtocol()
|
||||
pp.deferred.addCallback(self._process_finished, cmd, check_code)
|
||||
reactor.spawnProcess(pp, cmd[0], cmd, env=env, path=self.cwd)
|
||||
|
@ -51,7 +51,7 @@ def url_is_from_any_domain(url: UrlT, domains: Iterable[str]) -> bool:
|
||||
def url_is_from_spider(url: UrlT, spider: type[Spider]) -> bool:
|
||||
"""Return True if the url belongs to the given spider"""
|
||||
return url_is_from_any_domain(
|
||||
url, [spider.name] + list(getattr(spider, "allowed_domains", []))
|
||||
url, [spider.name, *getattr(spider, "allowed_domains", [])]
|
||||
)
|
||||
|
||||
|
||||
|
@ -11,7 +11,7 @@ from scrapy.exceptions import ScrapyDeprecationWarning
|
||||
from scrapy.settings.default_settings import LOG_VERSIONS
|
||||
from scrapy.utils.ssl import get_openssl_version
|
||||
|
||||
_DEFAULT_SOFTWARE = ["Scrapy"] + LOG_VERSIONS
|
||||
_DEFAULT_SOFTWARE = ["Scrapy", *LOG_VERSIONS]
|
||||
|
||||
|
||||
def _version(item):
|
||||
|
@ -21,7 +21,7 @@ class CmdlineTest(unittest.TestCase):
|
||||
|
||||
def _execute(self, *new_args, **kwargs):
|
||||
encoding = sys.stdout.encoding or "utf-8"
|
||||
args = (sys.executable, "-m", "scrapy.cmdline") + new_args
|
||||
args = (sys.executable, "-m", "scrapy.cmdline", *new_args)
|
||||
proc = Popen(args, stdout=PIPE, stderr=PIPE, env=self.env, **kwargs)
|
||||
comm = proc.communicate()[0].strip()
|
||||
return comm.decode(encoding)
|
||||
|
@ -87,13 +87,13 @@ class ProjectTest(unittest.TestCase):
|
||||
|
||||
def call(self, *new_args, **kwargs):
|
||||
with TemporaryFile() as out:
|
||||
args = (sys.executable, "-m", "scrapy.cmdline") + new_args
|
||||
args = (sys.executable, "-m", "scrapy.cmdline", *new_args)
|
||||
return subprocess.call(
|
||||
args, stdout=out, stderr=out, cwd=self.cwd, env=self.env, **kwargs
|
||||
)
|
||||
|
||||
def proc(self, *new_args, **popen_kwargs):
|
||||
args = (sys.executable, "-m", "scrapy.cmdline") + new_args
|
||||
args = (sys.executable, "-m", "scrapy.cmdline", *new_args)
|
||||
p = subprocess.Popen(
|
||||
args,
|
||||
cwd=popen_kwargs.pop("cwd", self.cwd),
|
||||
|
@ -647,7 +647,7 @@ class ScriptRunnerMixin:
|
||||
|
||||
def get_script_args(self, script_name: str, *script_args: str) -> list[str]:
|
||||
script_path = self.script_dir / script_name
|
||||
return [sys.executable, str(script_path)] + list(script_args)
|
||||
return [sys.executable, str(script_path), *script_args]
|
||||
|
||||
def run_script(self, script_name: str, *script_args: str) -> str:
|
||||
args = self.get_script_args(script_name, *script_args)
|
||||
|
@ -114,7 +114,7 @@ class RetryTest(unittest.TestCase):
|
||||
def test_exception_to_retry_added(self):
|
||||
exc = ValueError
|
||||
settings_dict = {
|
||||
"RETRY_EXCEPTIONS": list(RETRY_EXCEPTIONS) + [exc],
|
||||
"RETRY_EXCEPTIONS": [*RETRY_EXCEPTIONS, exc],
|
||||
}
|
||||
crawler = get_crawler(Spider, settings_dict=settings_dict)
|
||||
mw = RetryMiddleware.from_crawler(crawler)
|
||||
|
@ -31,7 +31,7 @@ class DownloaderSlotsSettingsTestSpider(MetaSpider):
|
||||
def start_requests(self):
|
||||
self.times = {None: []}
|
||||
|
||||
slots = list(self.custom_settings.get("DOWNLOAD_SLOTS", {}).keys()) + [None]
|
||||
slots = [*self.custom_settings.get("DOWNLOAD_SLOTS", {}), None]
|
||||
|
||||
for slot in slots:
|
||||
url = self.mockserver.url(f"/?downloader_slot={slot}")
|
||||
|
@ -116,7 +116,7 @@ class BaseItemExporterTest(unittest.TestCase):
|
||||
)
|
||||
|
||||
ie = self._get_exporter(fields_to_export=["name"], encoding="latin-1")
|
||||
_, name = list(ie._get_serialized_fields(self.i))[0]
|
||||
_, name = next(iter(ie._get_serialized_fields(self.i)))
|
||||
assert isinstance(name, str)
|
||||
self.assertEqual(name, "John\xa3")
|
||||
|
||||
|
@ -960,7 +960,7 @@ class XmlResponseTest(TextResponseTest):
|
||||
|
||||
|
||||
class CustomResponse(TextResponse):
|
||||
attributes = TextResponse.attributes + ("foo", "bar")
|
||||
attributes = (*TextResponse.attributes, "foo", "bar")
|
||||
|
||||
def __init__(self, *args, **kwargs) -> None:
|
||||
self.foo = kwargs.pop("foo", None)
|
||||
|
@ -61,11 +61,11 @@ Foo 1 oldest: 0s ago\n\n""",
|
||||
)
|
||||
|
||||
def test_get_oldest(self):
|
||||
o1 = Foo() # noqa: F841
|
||||
o1 = Foo()
|
||||
|
||||
o1_time = time()
|
||||
|
||||
o2 = Bar() # noqa: F841
|
||||
o2 = Bar()
|
||||
|
||||
o3_time = time()
|
||||
if o3_time <= o1_time:
|
||||
@ -80,9 +80,9 @@ Foo 1 oldest: 0s ago\n\n""",
|
||||
self.assertIsNone(trackref.get_oldest("XXX"))
|
||||
|
||||
def test_iter_all(self):
|
||||
o1 = Foo() # noqa: F841
|
||||
o1 = Foo()
|
||||
o2 = Bar() # noqa: F841
|
||||
o3 = Foo() # noqa: F841
|
||||
o3 = Foo()
|
||||
self.assertEqual(
|
||||
set(trackref.iter_all("Foo")),
|
||||
{o1, o3},
|
||||
|
Loading…
x
Reference in New Issue
Block a user