From cec0aeca58730b592bec50299414d4bf30fc9ec0 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Mon, 27 Jan 2025 14:07:09 +0400 Subject: [PATCH] Bump ruff, switch from black to ruff-format (#6631) --- .pre-commit-config.yaml | 7 +- scrapy/commands/genspider.py | 6 +- scrapy/core/downloader/handlers/__init__.py | 13 ++-- scrapy/core/http2/stream.py | 8 +-- scrapy/downloadermiddlewares/cookies.py | 3 +- scrapy/downloadermiddlewares/offsite.py | 2 +- scrapy/downloadermiddlewares/robotstxt.py | 4 +- scrapy/downloadermiddlewares/stats.py | 2 +- scrapy/extensions/telnet.py | 4 +- scrapy/http/headers.py | 3 +- scrapy/http/request/form.py | 1 - scrapy/http/response/__init__.py | 3 +- scrapy/linkextractors/lxmlhtml.py | 1 - scrapy/pipelines/files.py | 4 +- scrapy/selector/unified.py | 3 +- scrapy/settings/__init__.py | 2 +- scrapy/settings/default_settings.py | 2 +- scrapy/spidermiddlewares/offsite.py | 2 +- scrapy/squeues.py | 16 +++-- scrapy/utils/curl.py | 5 +- scrapy/utils/defer.py | 2 +- scrapy/utils/iterators.py | 2 +- scrapy/utils/log.py | 1 - scrapy/utils/python.py | 7 +- tests/spiders.py | 6 +- tests/test_contracts.py | 3 +- tests/test_downloadermiddleware_httpproxy.py | 6 +- tests/test_downloadermiddleware_redirect.py | 24 +++---- tests/test_downloaderslotssettings.py | 6 +- tests/test_engine.py | 12 ++-- tests/test_engine_stop_download_headers.py | 6 +- tests/test_exporters.py | 4 +- tests/test_feedexport.py | 76 +++++++++----------- tests/test_http_request.py | 12 ++-- tests/test_http_response.py | 6 +- tests/test_item.py | 4 +- tests/test_pipeline_files.py | 2 +- tests/test_pipeline_media.py | 4 -- tests/test_robotstxt_interface.py | 7 +- tests/test_selector.py | 6 +- tests/test_utils_response.py | 30 ++++---- tests/test_utils_url.py | 6 +- 42 files changed, 151 insertions(+), 172 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c76c613d9..18402b908 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,13 +1,10 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.8.4 + rev: v0.9.3 hooks: - id: ruff args: [ --fix ] -- repo: https://github.com/psf/black.git - rev: 24.10.0 - hooks: - - id: black + - id: ruff-format - repo: https://github.com/adamchainz/blacken-docs rev: 1.19.1 hooks: diff --git a/scrapy/commands/genspider.py b/scrapy/commands/genspider.py index 2a1dea997..6d4aec3d8 100644 --- a/scrapy/commands/genspider.py +++ b/scrapy/commands/genspider.py @@ -188,9 +188,9 @@ class Command(ScrapyCommand): return True return False - assert ( - self.crawler_process is not None - ), "crawler_process must be set before calling run" + assert self.crawler_process is not None, ( + "crawler_process must be set before calling run" + ) try: spidercls = self.crawler_process.spider_loader.load(name) diff --git a/scrapy/core/downloader/handlers/__init__.py b/scrapy/core/downloader/handlers/__init__.py index 7f3da67eb..902f200b8 100644 --- a/scrapy/core/downloader/handlers/__init__.py +++ b/scrapy/core/downloader/handlers/__init__.py @@ -34,13 +34,12 @@ class DownloadHandlerProtocol(Protocol): class DownloadHandlers: def __init__(self, crawler: Crawler): self._crawler: Crawler = crawler - self._schemes: dict[str, str | Callable[..., Any]] = ( - {} - ) # stores acceptable schemes on instancing - self._handlers: dict[str, DownloadHandlerProtocol] = ( - {} - ) # stores instanced handlers for schemes - self._notconfigured: dict[str, str] = {} # remembers failed handlers + # stores acceptable schemes on instancing + self._schemes: dict[str, str | Callable[..., Any]] = {} + # stores instanced handlers for schemes + self._handlers: dict[str, DownloadHandlerProtocol] = {} + # remembers failed handlers + self._notconfigured: dict[str, str] = {} handlers: dict[str, str | Callable[..., Any]] = without_none_values( cast( "dict[str, str | Callable[..., Any]]", diff --git a/scrapy/core/http2/stream.py b/scrapy/core/http2/stream.py index a4dc89c18..afca99dcf 100644 --- a/scrapy/core/http2/stream.py +++ b/scrapy/core/http2/stream.py @@ -193,7 +193,7 @@ class Stream: url.netloc == str(self._protocol.metadata["uri"].host, "utf-8") or url.netloc == str(self._protocol.metadata["uri"].netloc, "utf-8") or url.netloc - == f'{self._protocol.metadata["ip_address"]}:{self._protocol.metadata["uri"].port}' + == f"{self._protocol.metadata['ip_address']}:{self._protocol.metadata['uri'].port}" ) def _get_request_headers(self) -> list[tuple[str, str]]: @@ -339,7 +339,7 @@ class Stream: if self._log_warnsize: self.metadata["reached_warnsize"] = True warning_msg = ( - f'Received more ({self._response["flow_controlled_size"]}) bytes than download ' + f"Received more ({self._response['flow_controlled_size']}) bytes than download " f"warn size ({self._download_warnsize}) in request {self._request}" ) logger.warning(warning_msg) @@ -445,7 +445,7 @@ class Stream: ResponseFailed( [ Failure( - f'Remote peer {self._protocol.metadata["ip_address"]} sent RST_STREAM', + f"Remote peer {self._protocol.metadata['ip_address']} sent RST_STREAM", ProtocolError, ) ] @@ -465,7 +465,7 @@ class Stream: InvalidHostname( self._request, str(self._protocol.metadata["uri"].host, "utf-8"), - f'{self._protocol.metadata["ip_address"]}:{self._protocol.metadata["uri"].port}', + f"{self._protocol.metadata['ip_address']}:{self._protocol.metadata['uri'].port}", ) ) diff --git a/scrapy/downloadermiddlewares/cookies.py b/scrapy/downloadermiddlewares/cookies.py index 43348f632..9156b8c3a 100644 --- a/scrapy/downloadermiddlewares/cookies.py +++ b/scrapy/downloadermiddlewares/cookies.py @@ -54,8 +54,7 @@ class CookiesMiddleware: ) -> None: for cookie in cookies: cookie_domain = cookie.domain - if cookie_domain.startswith("."): - cookie_domain = cookie_domain[1:] + cookie_domain = cookie_domain.removeprefix(".") hostname = urlparse_cached(request).hostname assert hostname is not None diff --git a/scrapy/downloadermiddlewares/offsite.py b/scrapy/downloadermiddlewares/offsite.py index a2cff65e7..787c46a60 100644 --- a/scrapy/downloadermiddlewares/offsite.py +++ b/scrapy/downloadermiddlewares/offsite.py @@ -89,5 +89,5 @@ class OffsiteMiddleware: warnings.warn(message) else: domains.append(re.escape(domain)) - regex = rf'^(.*\.)?({"|".join(domains)})$' + regex = rf"^(.*\.)?({'|'.join(domains)})$" return re.compile(regex) diff --git a/scrapy/downloadermiddlewares/robotstxt.py b/scrapy/downloadermiddlewares/robotstxt.py index 9411cff14..aba455bdd 100644 --- a/scrapy/downloadermiddlewares/robotstxt.py +++ b/scrapy/downloadermiddlewares/robotstxt.py @@ -63,7 +63,9 @@ class RobotsTxtMiddleware: if request.url.startswith("data:") or request.url.startswith("file:"): return None d: Deferred[RobotParser | None] = maybeDeferred( - self.robot_parser, request, spider # type: ignore[call-overload] + self.robot_parser, + request, + spider, # type: ignore[call-overload] ) d2: Deferred[None] = d.addCallback(self.process_request_2, request, spider) return d2 diff --git a/scrapy/downloadermiddlewares/stats.py b/scrapy/downloadermiddlewares/stats.py index fb0f30620..cb5887a6f 100644 --- a/scrapy/downloadermiddlewares/stats.py +++ b/scrapy/downloadermiddlewares/stats.py @@ -19,7 +19,7 @@ if TYPE_CHECKING: def get_header_size( - headers: dict[str, list[str | bytes] | tuple[str | bytes, ...]] + headers: dict[str, list[str | bytes] | tuple[str | bytes, ...]], ) -> int: size = 0 for key, value in headers.items(): diff --git a/scrapy/extensions/telnet.py b/scrapy/extensions/telnet.py index 189b1953b..ac832e025 100644 --- a/scrapy/extensions/telnet.py +++ b/scrapy/extensions/telnet.py @@ -84,9 +84,7 @@ class TelnetConsole(protocol.ServerFactory): """An implementation of IPortal""" @defers - def login( - self_, credentials, mind, *interfaces - ): # pylint: disable=no-self-argument + def login(self_, credentials, mind, *interfaces): # pylint: disable=no-self-argument if not ( credentials.username == self.username.encode("utf8") and credentials.checkPassword(self.password.encode("utf8")) diff --git a/scrapy/http/headers.py b/scrapy/http/headers.py index 29ba9533b..60b04753b 100644 --- a/scrapy/http/headers.py +++ b/scrapy/http/headers.py @@ -105,7 +105,8 @@ class Headers(CaselessDict): def values(self) -> list[bytes | None]: # type: ignore[override] return [ - self[k] for k in self.keys() # pylint: disable=consider-using-dict-items + self[k] + for k in self.keys() # pylint: disable=consider-using-dict-items ] def to_string(self) -> bytes: diff --git a/scrapy/http/request/form.py b/scrapy/http/request/form.py index de3b24de0..7681419c4 100644 --- a/scrapy/http/request/form.py +++ b/scrapy/http/request/form.py @@ -24,7 +24,6 @@ from scrapy.http.request import Request from scrapy.utils.python import is_listlike, to_bytes if TYPE_CHECKING: - # typing.Self requires Python 3.11 from typing_extensions import Self diff --git a/scrapy/http/response/__init__.py b/scrapy/http/response/__init__.py index b84110b29..de2188ceb 100644 --- a/scrapy/http/response/__init__.py +++ b/scrapy/http/response/__init__.py @@ -94,8 +94,7 @@ class Response(object_ref): return self.request.meta # type: ignore[union-attr] except AttributeError: raise AttributeError( - "Response.meta not available, this response " - "is not tied to any request" + "Response.meta not available, this response is not tied to any request" ) @property diff --git a/scrapy/linkextractors/lxmlhtml.py b/scrapy/linkextractors/lxmlhtml.py index 4fd932b88..814e31fec 100644 --- a/scrapy/linkextractors/lxmlhtml.py +++ b/scrapy/linkextractors/lxmlhtml.py @@ -25,7 +25,6 @@ from scrapy.utils.response import get_base_url from scrapy.utils.url import url_has_any_extension, url_is_from_any_domain if TYPE_CHECKING: - from lxml.html import HtmlElement from scrapy import Selector diff --git a/scrapy/pipelines/files.py b/scrapy/pipelines/files.py index a10117590..888be81c3 100644 --- a/scrapy/pipelines/files.py +++ b/scrapy/pipelines/files.py @@ -202,7 +202,9 @@ class S3FilesStore: return cast( "Deferred[dict[str, Any]]", deferToThread( - self.s3_client.head_object, Bucket=self.bucket, Key=key_name # type: ignore[attr-defined] + self.s3_client.head_object, # type: ignore[attr-defined] + Bucket=self.bucket, + Key=key_name, ), ) diff --git a/scrapy/selector/unified.py b/scrapy/selector/unified.py index db9014b41..f8365a87b 100644 --- a/scrapy/selector/unified.py +++ b/scrapy/selector/unified.py @@ -81,8 +81,7 @@ class Selector(_ParselSelector, object_ref): ): if response is not None and text is not None: raise ValueError( - f"{self.__class__.__name__}.__init__() received " - "both response and text" + f"{self.__class__.__name__}.__init__() received both response and text" ) st = _st(response, type) diff --git a/scrapy/settings/__init__.py b/scrapy/settings/__init__.py index 3ebdb351a..f31f824a8 100644 --- a/scrapy/settings/__init__.py +++ b/scrapy/settings/__init__.py @@ -539,7 +539,7 @@ def iter_default_settings() -> Iterable[tuple[str, Any]]: def overridden_settings( - settings: Mapping[_SettingsKeyT, Any] + settings: Mapping[_SettingsKeyT, Any], ) -> Iterable[tuple[str, Any]]: """Return an iterable of the settings that have been overridden""" for name, defvalue in iter_default_settings(): diff --git a/scrapy/settings/default_settings.py b/scrapy/settings/default_settings.py index 7ef365f68..c473b369c 100644 --- a/scrapy/settings/default_settings.py +++ b/scrapy/settings/default_settings.py @@ -333,7 +333,7 @@ TEMPLATES_DIR = str((Path(__file__).parent / ".." / "templates").resolve()) URLLENGTH_LIMIT = 2083 -USER_AGENT = f'Scrapy/{import_module("scrapy").__version__} (+https://scrapy.org)' +USER_AGENT = f"Scrapy/{import_module('scrapy').__version__} (+https://scrapy.org)" TELNETCONSOLE_ENABLED = 1 TELNETCONSOLE_PORT = [6023, 6073] diff --git a/scrapy/spidermiddlewares/offsite.py b/scrapy/spidermiddlewares/offsite.py index 95e753830..646beb911 100644 --- a/scrapy/spidermiddlewares/offsite.py +++ b/scrapy/spidermiddlewares/offsite.py @@ -110,7 +110,7 @@ class OffsiteMiddleware: warnings.warn(message, PortWarning) else: domains.append(re.escape(domain)) - regex = rf'^(.*\.)?({"|".join(domains)})$' + regex = rf"^(.*\.)?({'|'.join(domains)})$" return re.compile(regex) def spider_opened(self, spider: Spider) -> None: diff --git a/scrapy/squeues.py b/scrapy/squeues.py index 80bb37e93..7007cd4b8 100644 --- a/scrapy/squeues.py +++ b/scrapy/squeues.py @@ -147,16 +147,24 @@ def _pickle_serialize(obj: Any) -> bytes: # queue.*Queue aren't subclasses of queue.BaseQueue _PickleFifoSerializationDiskQueue = _serializable_queue( - _with_mkdir(queue.FifoDiskQueue), _pickle_serialize, pickle.loads # type: ignore[arg-type] + _with_mkdir(queue.FifoDiskQueue), # type: ignore[arg-type] + _pickle_serialize, + pickle.loads, ) _PickleLifoSerializationDiskQueue = _serializable_queue( - _with_mkdir(queue.LifoDiskQueue), _pickle_serialize, pickle.loads # type: ignore[arg-type] + _with_mkdir(queue.LifoDiskQueue), # type: ignore[arg-type] + _pickle_serialize, + pickle.loads, ) _MarshalFifoSerializationDiskQueue = _serializable_queue( - _with_mkdir(queue.FifoDiskQueue), marshal.dumps, marshal.loads # type: ignore[arg-type] + _with_mkdir(queue.FifoDiskQueue), # type: ignore[arg-type] + marshal.dumps, + marshal.loads, ) _MarshalLifoSerializationDiskQueue = _serializable_queue( - _with_mkdir(queue.LifoDiskQueue), marshal.dumps, marshal.loads # type: ignore[arg-type] + _with_mkdir(queue.LifoDiskQueue), # type: ignore[arg-type] + marshal.dumps, + marshal.loads, ) # public queue classes diff --git a/scrapy/utils/curl.py b/scrapy/utils/curl.py index bfdd4dc8a..a563dc79a 100644 --- a/scrapy/utils/curl.py +++ b/scrapy/utils/curl.py @@ -22,8 +22,7 @@ class DataAction(argparse.Action): option_string: str | None = None, ) -> None: value = str(values) - if value.startswith("$"): - value = value[1:] + value = value.removeprefix("$") setattr(namespace, self.dest, value) @@ -96,7 +95,7 @@ def curl_to_request_kwargs( parsed_args, argv = curl_parser.parse_known_args(curl_args[1:]) if argv: - msg = f'Unrecognized options: {", ".join(argv)}' + msg = f"Unrecognized options: {', '.join(argv)}" if ignore_unknown_options: warnings.warn(msg) else: diff --git a/scrapy/utils/defer.py b/scrapy/utils/defer.py index 000ab5c65..8f52836c4 100644 --- a/scrapy/utils/defer.py +++ b/scrapy/utils/defer.py @@ -377,7 +377,7 @@ def deferred_from_coro(o: _T) -> Deferred | _T: def deferred_f_from_coro_f( - coro_f: Callable[_P, Coroutine[Any, Any, _T]] + coro_f: Callable[_P, Coroutine[Any, Any, _T]], ) -> Callable[_P, Deferred[_T]]: """Converts a coroutine function into a function that returns a Deferred. diff --git a/scrapy/utils/iterators.py b/scrapy/utils/iterators.py index e8ed7b60a..c646fc218 100644 --- a/scrapy/utils/iterators.py +++ b/scrapy/utils/iterators.py @@ -71,7 +71,7 @@ def xmliter(obj: Response | str | bytes, nodename: str) -> Iterator[Selector]: nodetext = ( document_header + match.group().replace( - nodename, f'{nodename} {" ".join(namespaces.values())}', 1 + nodename, f"{nodename} {' '.join(namespaces.values())}", 1 ) + header_end ) diff --git a/scrapy/utils/log.py b/scrapy/utils/log.py index d51231b82..b865cf48d 100644 --- a/scrapy/utils/log.py +++ b/scrapy/utils/log.py @@ -16,7 +16,6 @@ from scrapy.settings import Settings, _SettingsKeyT from scrapy.utils.versions import get_versions if TYPE_CHECKING: - from scrapy.crawler import Crawler from scrapy.logformatter import LogFormatterResult diff --git a/scrapy/utils/python.py b/scrapy/utils/python.py index e954b625c..fcf582082 100644 --- a/scrapy/utils/python.py +++ b/scrapy/utils/python.py @@ -119,8 +119,7 @@ def to_unicode( return text if not isinstance(text, (bytes, str)): raise TypeError( - "to_unicode must receive a bytes or str " - f"object, got {type(text).__name__}" + f"to_unicode must receive a bytes or str object, got {type(text).__name__}" ) if encoding is None: encoding = "utf-8" @@ -183,7 +182,7 @@ _SelfT = TypeVar("_SelfT") def memoizemethod_noargs( - method: Callable[Concatenate[_SelfT, _P], _T] + method: Callable[Concatenate[_SelfT, _P], _T], ) -> Callable[Concatenate[_SelfT, _P], _T]: """Decorator to cache the result of a method (without arguments) using a weak reference to its object @@ -313,7 +312,7 @@ def without_none_values(iterable: Iterable[_KT]) -> Iterable[_KT]: ... def without_none_values( - iterable: Mapping[_KT, _VT] | Iterable[_KT] + iterable: Mapping[_KT, _VT] | Iterable[_KT], ) -> dict[_KT, _VT] | Iterable[_KT]: """Return a copy of ``iterable`` with all ``None`` entries removed. diff --git a/tests/spiders.py b/tests/spiders.py index 3c44d7da5..da923de6e 100644 --- a/tests/spiders.py +++ b/tests/spiders.py @@ -338,9 +338,9 @@ class BrokenStartRequestsSpider(FollowAllSpider): if self.fail_yielding: 2 / 0 - assert ( - self.seedsseen - ), "All start requests consumed before any download happened" + assert self.seedsseen, ( + "All start requests consumed before any download happened" + ) def parse(self, response): self.seedsseen.append(response.meta.get("seed")) diff --git a/tests/test_contracts.py b/tests/test_contracts.py index f7581707b..fb16140be 100644 --- a/tests/test_contracts.py +++ b/tests/test_contracts.py @@ -529,7 +529,7 @@ class ContractsManagerTest(unittest.TestCase): return TestItem() with MockServer() as mockserver: - contract_doc = f'@url {mockserver.url("/status?n=200")}' + contract_doc = f"@url {mockserver.url('/status?n=200')}" TestSameUrlSpider.parse_first.__doc__ = contract_doc TestSameUrlSpider.parse_second.__doc__ = contract_doc @@ -567,7 +567,6 @@ class CustomFailContractPostProcess(Contract): class CustomContractPrePostProcess(unittest.TestCase): - def setUp(self): self.results = TextTestResult(stream=None, descriptions=False, verbosity=0) diff --git a/tests/test_downloadermiddleware_httpproxy.py b/tests/test_downloadermiddleware_httpproxy.py index 0ea1ef5eb..97c276b48 100644 --- a/tests/test_downloadermiddleware_httpproxy.py +++ b/tests/test_downloadermiddleware_httpproxy.py @@ -94,7 +94,7 @@ class TestHttpProxyMiddleware(TestCase): def test_proxy_auth_encoding(self): # utf-8 encoding - os.environ["http_proxy"] = "https://m\u00E1n:pass@proxy:3128" + os.environ["http_proxy"] = "https://m\u00e1n:pass@proxy:3128" mw = HttpProxyMiddleware(auth_encoding="utf-8") req = Request("http://scrapytest.org") assert mw.process_request(req, spider) is None @@ -103,7 +103,7 @@ class TestHttpProxyMiddleware(TestCase): # proxy from request.meta req = Request( - "http://scrapytest.org", meta={"proxy": "https://\u00FCser:pass@proxy:3128"} + "http://scrapytest.org", meta={"proxy": "https://\u00fcser:pass@proxy:3128"} ) assert mw.process_request(req, spider) is None self.assertEqual(req.meta["proxy"], "https://proxy:3128") @@ -120,7 +120,7 @@ class TestHttpProxyMiddleware(TestCase): # proxy from request.meta, latin-1 encoding req = Request( - "http://scrapytest.org", meta={"proxy": "https://\u00FCser:pass@proxy:3128"} + "http://scrapytest.org", meta={"proxy": "https://\u00fcser:pass@proxy:3128"} ) assert mw.process_request(req, spider) is None self.assertEqual(req.meta["proxy"], "https://proxy:3128") diff --git a/tests/test_downloadermiddleware_redirect.py b/tests/test_downloadermiddleware_redirect.py index 7b19ab781..eb3cdfc11 100644 --- a/tests/test_downloadermiddleware_redirect.py +++ b/tests/test_downloadermiddleware_redirect.py @@ -55,12 +55,12 @@ class Base: assert isinstance(req2, Request) self.assertEqual(req2.url, url2) self.assertEqual(req2.method, "GET") - assert ( - "Content-Type" not in req2.headers - ), "Content-Type header must not be present in redirected request" - assert ( - "Content-Length" not in req2.headers - ), "Content-Length header must not be present in redirected request" + assert "Content-Type" not in req2.headers, ( + "Content-Type header must not be present in redirected request" + ) + assert "Content-Length" not in req2.headers, ( + "Content-Length header must not be present in redirected request" + ) assert not req2.body, f"Redirected body must be empty, not '{req2.body}'" def test_max_redirect_times(self): @@ -1243,12 +1243,12 @@ class MetaRefreshMiddlewareTest(Base.Test): assert isinstance(req2, Request) self.assertEqual(req2.url, "http://example.org/newpage") self.assertEqual(req2.method, "GET") - assert ( - "Content-Type" not in req2.headers - ), "Content-Type header must not be present in redirected request" - assert ( - "Content-Length" not in req2.headers - ), "Content-Length header must not be present in redirected request" + assert "Content-Type" not in req2.headers, ( + "Content-Type header must not be present in redirected request" + ) + assert "Content-Length" not in req2.headers, ( + "Content-Length header must not be present in redirected request" + ) assert not req2.body, f"Redirected body must be empty, not '{req2.body}'" def test_ignore_tags_default(self): diff --git a/tests/test_downloaderslotssettings.py b/tests/test_downloaderslotssettings.py index 879bc8697..0bb143f69 100644 --- a/tests/test_downloaderslotssettings.py +++ b/tests/test_downloaderslotssettings.py @@ -93,6 +93,6 @@ def test_params(): _, actual = downloader._get_slot(request, spider=None) expected = Slot(**params) for param in params: - assert getattr(expected, param) == getattr( - actual, param - ), f"Slot.{param}: {getattr(expected, param)!r} != {getattr(actual, param)!r}" + assert getattr(expected, param) == getattr(actual, param), ( + f"Slot.{param}: {getattr(expected, param)!r} != {getattr(actual, param)!r}" + ) diff --git a/tests/test_engine.py b/tests/test_engine.py index 8d645eada..95955f7be 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -294,9 +294,9 @@ class EngineTest(unittest.TestCase): ] urls_visited = {rp[0].url for rp in run.respplug} urls_expected = {run.geturl(p) for p in must_be_visited} - assert ( - urls_expected <= urls_visited - ), f"URLs not visited: {list(urls_expected - urls_visited)}" + assert urls_expected <= urls_visited, ( + f"URLs not visited: {list(urls_expected - urls_visited)}" + ) def _assert_scheduled_requests(self, run: CrawlerRun, count=None): self.assertEqual(count, len(run.reqplug)) @@ -496,9 +496,9 @@ def test_request_scheduled_signal(caplog): drop_request = Request("https://drop.example") caplog.set_level(DEBUG) engine._schedule_request(drop_request, spider) - assert scheduler.enqueued == [ - keep_request - ], f"{scheduler.enqueued!r} != [{keep_request!r}]" + assert scheduler.enqueued == [keep_request], ( + f"{scheduler.enqueued!r} != [{keep_request!r}]" + ) crawler.signals.disconnect(signal_handler, request_scheduled) diff --git a/tests/test_engine_stop_download_headers.py b/tests/test_engine_stop_download_headers.py index 0bad5ba55..db35bd81e 100644 --- a/tests/test_engine_stop_download_headers.py +++ b/tests/test_engine_stop_download_headers.py @@ -67,6 +67,6 @@ class HeadersReceivedEngineTest(EngineTest): must_be_visited = ["/", "/redirect", "/redirected"] urls_visited = {rp[0].url for rp in run.respplug} urls_expected = {run.geturl(p) for p in must_be_visited} - assert ( - urls_expected <= urls_visited - ), f"URLs not visited: {list(urls_expected - urls_visited)}" + assert urls_expected <= urls_visited, ( + f"URLs not visited: {list(urls_expected - urls_visited)}" + ) diff --git a/tests/test_exporters.py b/tests/test_exporters.py index 1fbacfdfc..c2cab9b2a 100644 --- a/tests/test_exporters.py +++ b/tests/test_exporters.py @@ -390,14 +390,14 @@ class CsvItemExporterTest(BaseItemExporterTest): def test_errors_default(self): with self.assertRaises(UnicodeEncodeError): self.assertExportResult( - item={"text": "W\u0275\u200Brd"}, + item={"text": "W\u0275\u200brd"}, expected=None, encoding="windows-1251", ) def test_errors_xmlcharrefreplace(self): self.assertExportResult( - item={"text": "W\u0275\u200Brd"}, + item={"text": "W\u0275\u200brd"}, include_headers_line=False, expected="Wɵ​rd\r\n", encoding="windows-1251", diff --git a/tests/test_feedexport.py b/tests/test_feedexport.py index 7edffa1f6..4f91795e4 100644 --- a/tests/test_feedexport.py +++ b/tests/test_feedexport.py @@ -1190,8 +1190,7 @@ class FeedExportTest(FeedExportTestBase): "csv": b"baz,egg,foo\r\n,spam1,bar1\r\n", "json": b'[\n{"hello": "world2", "foo": "bar2"}\n]', "jsonlines": ( - b'{"foo": "bar1", "egg": "spam1"}\n' - b'{"hello": "world2", "foo": "bar2"}\n' + b'{"foo": "bar1", "egg": "spam1"}\n{"hello": "world2", "foo": "bar2"}\n' ), "xml": ( b'\n\n' @@ -2289,9 +2288,9 @@ class BatchDeliveriesTest(FeedExportTestBase): settings.update( { "FEEDS": { - self._random_temp_filename() - / "jl" - / self._file_mark: {"format": "jl"}, + self._random_temp_filename() / "jl" / self._file_mark: { + "format": "jl" + }, }, } ) @@ -2311,9 +2310,9 @@ class BatchDeliveriesTest(FeedExportTestBase): settings.update( { "FEEDS": { - self._random_temp_filename() - / "csv" - / self._file_mark: {"format": "csv"}, + self._random_temp_filename() / "csv" / self._file_mark: { + "format": "csv" + }, }, } ) @@ -2331,9 +2330,9 @@ class BatchDeliveriesTest(FeedExportTestBase): settings.update( { "FEEDS": { - self._random_temp_filename() - / "xml" - / self._file_mark: {"format": "xml"}, + self._random_temp_filename() / "xml" / self._file_mark: { + "format": "xml" + }, }, } ) @@ -2352,12 +2351,12 @@ class BatchDeliveriesTest(FeedExportTestBase): settings.update( { "FEEDS": { - self._random_temp_filename() - / "xml" - / self._file_mark: {"format": "xml"}, - self._random_temp_filename() - / "json" - / self._file_mark: {"format": "json"}, + self._random_temp_filename() / "xml" / self._file_mark: { + "format": "xml" + }, + self._random_temp_filename() / "json" / self._file_mark: { + "format": "json" + }, }, } ) @@ -2384,9 +2383,9 @@ class BatchDeliveriesTest(FeedExportTestBase): settings.update( { "FEEDS": { - self._random_temp_filename() - / "pickle" - / self._file_mark: {"format": "pickle"}, + self._random_temp_filename() / "pickle" / self._file_mark: { + "format": "pickle" + }, }, } ) @@ -2406,9 +2405,9 @@ class BatchDeliveriesTest(FeedExportTestBase): settings.update( { "FEEDS": { - self._random_temp_filename() - / "marshal" - / self._file_mark: {"format": "marshal"}, + self._random_temp_filename() / "marshal" / self._file_mark: { + "format": "marshal" + }, }, } ) @@ -2455,9 +2454,9 @@ class BatchDeliveriesTest(FeedExportTestBase): for fmt in ("json", "jsonlines", "xml", "csv"): settings = { "FEEDS": { - self._random_temp_filename() - / fmt - / self._file_mark: {"format": fmt}, + self._random_temp_filename() / fmt / self._file_mark: { + "format": fmt + }, }, "FEED_EXPORT_BATCH_ITEM_COUNT": 1, "FEED_STORE_EMPTY": False, @@ -2478,9 +2477,9 @@ class BatchDeliveriesTest(FeedExportTestBase): for fmt, expctd in formats: settings = { "FEEDS": { - self._random_temp_filename() - / fmt - / self._file_mark: {"format": fmt}, + self._random_temp_filename() / fmt / self._file_mark: { + "format": fmt + }, }, "FEED_STORE_EMPTY": True, "FEED_EXPORT_INDENT": None, @@ -2520,25 +2519,19 @@ class BatchDeliveriesTest(FeedExportTestBase): settings = { "FEEDS": { - self._random_temp_filename() - / "json" - / self._file_mark: { + self._random_temp_filename() / "json" / self._file_mark: { "format": "json", "indent": 0, "fields": ["bar"], "encoding": "utf-8", }, - self._random_temp_filename() - / "xml" - / self._file_mark: { + self._random_temp_filename() / "xml" / self._file_mark: { "format": "xml", "indent": 2, "fields": ["foo"], "encoding": "latin-1", }, - self._random_temp_filename() - / "csv" - / self._file_mark: { + self._random_temp_filename() / "csv" / self._file_mark: { "format": "csv", "indent": None, "fields": ["foo", "bar"], @@ -2563,9 +2556,7 @@ class BatchDeliveriesTest(FeedExportTestBase): } settings = { "FEEDS": { - self._random_temp_filename() - / "json" - / self._file_mark: { + self._random_temp_filename() / "json" / self._file_mark: { "format": "json", "indent": None, "encoding": "utf-8", @@ -2591,8 +2582,7 @@ class BatchDeliveriesTest(FeedExportTestBase): ] settings = { "FEEDS": { - self._random_temp_filename() - / "%(batch_id)d": { + self._random_temp_filename() / "%(batch_id)d": { "format": "json", }, }, diff --git a/tests/test_http_request.py b/tests/test_http_request.py index 34d3b25d5..9915aaca4 100644 --- a/tests/test_http_request.py +++ b/tests/test_http_request.py @@ -226,9 +226,9 @@ class RequestTest(unittest.TestCase): self.assertEqual(r1.flags, r2.flags) # make sure cb_kwargs dict is shallow copied - assert ( - r1.cb_kwargs is not r2.cb_kwargs - ), "cb_kwargs must be a shallow copy, not identical" + assert r1.cb_kwargs is not r2.cb_kwargs, ( + "cb_kwargs must be a shallow copy, not identical" + ) self.assertEqual(r1.cb_kwargs, r2.cb_kwargs) # make sure meta dict is shallow copied @@ -236,9 +236,9 @@ class RequestTest(unittest.TestCase): self.assertEqual(r1.meta, r2.meta) # make sure headers attribute is shallow copied - assert ( - r1.headers is not r2.headers - ), "headers must be a shallow copy, not identical" + assert r1.headers is not r2.headers, ( + "headers must be a shallow copy, not identical" + ) self.assertEqual(r1.headers, r2.headers) self.assertEqual(r1.encoding, r2.encoding) self.assertEqual(r1.dont_filter, r2.dont_filter) diff --git a/tests/test_http_response.py b/tests/test_http_response.py index 0730cff3a..b157e9802 100644 --- a/tests/test_http_response.py +++ b/tests/test_http_response.py @@ -99,9 +99,9 @@ class BaseResponseTest(unittest.TestCase): self.assertEqual(r1.flags, r2.flags) # make sure headers attribute is shallow copied - assert ( - r1.headers is not r2.headers - ), "headers must be a shallow copy, not identical" + assert r1.headers is not r2.headers, ( + "headers must be a shallow copy, not identical" + ) self.assertEqual(r1.headers, r2.headers) def test_copy_meta(self): diff --git a/tests/test_item.py b/tests/test_item.py index 480412841..0399c8f8d 100644 --- a/tests/test_item.py +++ b/tests/test_item.py @@ -289,9 +289,7 @@ class ItemMetaTest(unittest.TestCase): class ItemMetaClassCellRegression(unittest.TestCase): def test_item_meta_classcell_regression(self): class MyItem(Item, metaclass=ItemMeta): - def __init__( - self, *args, **kwargs - ): # pylint: disable=useless-parent-delegation + def __init__(self, *args, **kwargs): # pylint: disable=useless-parent-delegation # This call to super() trigger the __classcell__ propagation # requirement. When not done properly raises an error: # TypeError: __class__ set to diff --git a/tests/test_pipeline_files.py b/tests/test_pipeline_files.py index 4c3fc36b6..4c59fcfb7 100644 --- a/tests/test_pipeline_files.py +++ b/tests/test_pipeline_files.py @@ -215,7 +215,7 @@ class FilesPipelineTestCase(unittest.TestCase): class CustomFilesPipeline(FilesPipeline): def file_path(self, request, response=None, info=None, item=None): - return f'full/{item.get("path")}' + return f"full/{item.get('path')}" file_path = CustomFilesPipeline.from_crawler( get_crawler(None, {"FILES_STORE": self.tempdir}) diff --git a/tests/test_pipeline_media.py b/tests/test_pipeline_media.py index dd8f1084a..c6fdd3767 100644 --- a/tests/test_pipeline_media.py +++ b/tests/test_pipeline_media.py @@ -35,7 +35,6 @@ def _mocked_download_func(request, info): class UserDefinedPipeline(MediaPipeline): - def media_to_download(self, request, info, *, item=None): pass @@ -376,7 +375,6 @@ class MediaPipelineTestCase(BaseMediaPipelineTestCase): class MediaPipelineAllowRedirectSettingsTestCase(unittest.TestCase): - def _assert_request_no3xx(self, pipeline_class, settings): pipe = pipeline_class(crawler=get_crawler(None, settings)) request = Request("http://url") @@ -403,11 +401,9 @@ class MediaPipelineAllowRedirectSettingsTestCase(unittest.TestCase): self.assertNotIn(status, request.meta["handle_httpstatus_list"]) def test_subclass_standard_setting(self): - self._assert_request_no3xx(UserDefinedPipeline, {"MEDIA_ALLOW_REDIRECTS": True}) def test_subclass_specific_setting(self): - self._assert_request_no3xx( UserDefinedPipeline, {"USERDEFINEDPIPELINE_MEDIA_ALLOW_REDIRECTS": True} ) diff --git a/tests/test_robotstxt_interface.py b/tests/test_robotstxt_interface.py index e127cc2e3..0d00ff660 100644 --- a/tests/test_robotstxt_interface.py +++ b/tests/test_robotstxt_interface.py @@ -27,10 +27,7 @@ class BaseRobotParserTest: def test_allowed(self): robotstxt_robotstxt_body = ( - b"User-agent: * \n" - b"Disallow: /disallowed \n" - b"Allow: /allowed \n" - b"Crawl-delay: 10" + b"User-agent: * \nDisallow: /disallowed \nAllow: /allowed \nCrawl-delay: 10" ) rp = self.parser_cls.from_crawler( crawler=None, robotstxt_body=robotstxt_robotstxt_body @@ -140,7 +137,7 @@ class DecodeRobotsTxtTest(unittest.TestCase): self.assertEqual(decoded_content, "User-agent: *\nDisallow: /\n") def test_decode_non_utf8(self): - robotstxt_body = b"User-agent: *\n\xFFDisallow: /\n" + robotstxt_body = b"User-agent: *\n\xffDisallow: /\n" decoded_content = decode_robotstxt(robotstxt_body, spider=None) self.assertEqual(decoded_content, "User-agent: *\nDisallow: /\n") diff --git a/tests/test_selector.py b/tests/test_selector.py index 857c7d626..4eda0460f 100644 --- a/tests/test_selector.py +++ b/tests/test_selector.py @@ -107,9 +107,9 @@ class SelectorTestCase(unittest.TestCase): """Check that classes are using slots and are weak-referenceable""" x = Selector(text="") weakref.ref(x) - assert not hasattr( - x, "__dict__" - ), f"{x.__class__.__name__} does not use __slots__" + assert not hasattr(x, "__dict__"), ( + f"{x.__class__.__name__} does not use __slots__" + ) def test_selector_bad_args(self): with self.assertRaisesRegex(ValueError, "received both response and text"): diff --git a/tests/test_utils_response.py b/tests/test_utils_response.py index 7ad86127b..db6866571 100644 --- a/tests/test_utils_response.py +++ b/tests/test_utils_response.py @@ -158,18 +158,18 @@ class ResponseUtilsTest(unittest.TestCase): ) assert open_in_browser(r1, _openfunc=check_base_url), "Inject base url" - assert open_in_browser( - r2, _openfunc=check_base_url - ), "Inject base url with argumented head" - assert open_in_browser( - r3, _openfunc=check_base_url - ), "Inject unique base url with misleading tag" - assert open_in_browser( - r4, _openfunc=check_base_url - ), "Inject unique base url with misleading comment" - assert open_in_browser( - r5, _openfunc=check_base_url - ), "Inject unique base url with conditional comment" + assert open_in_browser(r2, _openfunc=check_base_url), ( + "Inject base url with argumented head" + ) + assert open_in_browser(r3, _openfunc=check_base_url), ( + "Inject unique base url with misleading tag" + ) + assert open_in_browser(r4, _openfunc=check_base_url), ( + "Inject unique base url with misleading comment" + ) + assert open_in_browser(r5, _openfunc=check_base_url), ( + "Inject unique base url with conditional comment" + ) def test_open_in_browser_redos_comment(self): MAX_CPU_TIME = 0.02 @@ -240,6 +240,6 @@ class ResponseUtilsTest(unittest.TestCase): ), ) def test_remove_html_comments(input_body, output_body): - assert ( - _remove_html_comments(input_body) == output_body - ), f"{_remove_html_comments(input_body)=} == {output_body=}" + assert _remove_html_comments(input_body) == output_body, ( + f"{_remove_html_comments(input_body)=} == {output_body=}" + ) diff --git a/tests/test_utils_url.py b/tests/test_utils_url.py index 314082742..4b9a98d79 100644 --- a/tests/test_utils_url.py +++ b/tests/test_utils_url.py @@ -321,9 +321,9 @@ class GuessSchemeTest(unittest.TestCase): def create_guess_scheme_t(args): def do_expected(self): url = guess_scheme(args[0]) - assert url.startswith( - args[1] - ), f"Wrong scheme guessed: for `{args[0]}` got `{url}`, expected `{args[1]}...`" + assert url.startswith(args[1]), ( + f"Wrong scheme guessed: for `{args[0]}` got `{url}`, expected `{args[1]}...`" + ) return do_expected