1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-06 10:24:24 +00:00

Add flake8-return rules to ruff.

This commit is contained in:
Andrey Rakhmatullin 2024-12-12 20:22:03 +05:00
parent 802c67072c
commit 897e124a27
19 changed files with 25 additions and 44 deletions

View File

@ -244,6 +244,8 @@ extend-select = [
"PGH",
# flake8-quotes
"Q",
# flake8-return
"RET",
# flake8-bandit
"S",
# flake8-slots

View File

@ -225,8 +225,9 @@ class Command(BaseRunSpiderCommand):
cb_kwargs: dict[str, Any] | None = None,
) -> Deferred[Any]:
cb_kwargs = cb_kwargs or {}
d = maybeDeferred(self.iterate_spider_output, callback(response, **cb_kwargs))
return d
return maybeDeferred(
self.iterate_spider_output, callback(response, **cb_kwargs)
)
def get_callback_from_rules(
self, spider: Spider, response: Response

View File

@ -41,7 +41,7 @@ class OffsiteMiddleware:
def process_request(self, request: Request, spider: Spider) -> None:
if request.dont_filter or self.should_follow(request, spider):
return None
return
domain = urlparse_cached(request).hostname
if domain and domain not in self.domains_seen:
self.domains_seen.add(domain)

View File

@ -586,7 +586,7 @@ class FeedExporter:
:param uri_template: template of uri which contains %(batch_time)s or %(batch_id)d to create new uri
"""
storage = self._get_storage(uri, feed_options)
slot = FeedSlot(
return FeedSlot(
storage=storage,
uri=uri,
format=feed_options["format"],
@ -600,7 +600,6 @@ class FeedExporter:
settings=self.settings,
crawler=self.crawler,
)
return slot
def item_scraped(self, item: Any, spider: Spider) -> None:
slots = []

View File

@ -282,8 +282,7 @@ class DbmCacheStorage:
headers = Headers(data["headers"])
body = data["body"]
respcls = responsetypes.from_args(headers=headers, url=url, body=body)
response = respcls(url=url, headers=headers, status=status, body=body)
return response
return respcls(url=url, headers=headers, status=status, body=body)
def store_response(
self, spider: Spider, request: Request, response: Response
@ -349,8 +348,7 @@ class FilesystemCacheStorage:
status = metadata["status"]
headers = Headers(headers_raw_to_dict(rawheaders))
respcls = responsetypes.from_args(headers=headers, url=url, body=body)
response = respcls(url=url, headers=headers, status=status, body=body)
return response
return respcls(url=url, headers=headers, status=status, body=body)
def store_response(
self, spider: Spider, request: Request, response: Response

View File

@ -157,8 +157,7 @@ class PostProcessingManager(IOBase):
return True
def _load_plugins(self, plugins: list[Any]) -> list[Any]:
plugins = [load_object(plugin) for plugin in plugins]
return plugins
return [load_object(plugin) for plugin in plugins]
def _get_head_plugin(self) -> Any:
prev = self.file

View File

@ -253,8 +253,7 @@ class LxmlLinkExtractor:
if self.canonicalize:
for link in links:
link.url = canonicalize_url(link.url)
links = self.link_extractor._process_links(links)
return links
return self.link_extractor._process_links(links)
def _extract_links(self, *args: Any, **kwargs: Any) -> list[Link]:
return self.link_extractor._extract_links(*args, **kwargs)

View File

@ -79,8 +79,7 @@ class PythonRobotParser(RobotParser):
@classmethod
def from_crawler(cls, crawler: Crawler, robotstxt_body: bytes) -> Self:
spider = None if not crawler else crawler.spider
o = cls(robotstxt_body, spider)
return o
return cls(robotstxt_body, spider)
def allowed(self, url: str | bytes, user_agent: str | bytes) -> bool:
user_agent = to_unicode(user_agent)
@ -100,8 +99,7 @@ class RerpRobotParser(RobotParser):
@classmethod
def from_crawler(cls, crawler: Crawler, robotstxt_body: bytes) -> Self:
spider = None if not crawler else crawler.spider
o = cls(robotstxt_body, spider)
return o
return cls(robotstxt_body, spider)
def allowed(self, url: str | bytes, user_agent: str | bytes) -> bool:
user_agent = to_unicode(user_agent)
@ -120,8 +118,7 @@ class ProtegoRobotParser(RobotParser):
@classmethod
def from_crawler(cls, crawler: Crawler, robotstxt_body: bytes) -> Self:
spider = None if not crawler else crawler.spider
o = cls(robotstxt_body, spider)
return o
return cls(robotstxt_body, spider)
def allowed(self, url: str | bytes, user_agent: str | bytes) -> bool:
user_agent = to_unicode(user_agent)

View File

@ -36,7 +36,7 @@ def listen_tcp(portrange: list[int], host: str, factory: ServerFactory) -> Port:
return reactor.listenTCP(0, factory, interface=host)
if len(portrange) == 1:
return reactor.listenTCP(portrange[0], factory, interface=host)
for x in range(portrange[0], portrange[1] + 1):
for x in range(portrange[0], portrange[1] + 1): # noqa: RET503
try:
return reactor.listenTCP(x, factory, interface=host)
except error.CannotListenError:

View File

@ -175,7 +175,7 @@ class AsyncDefAsyncioReqsReturnSpider(SimpleSpider):
status = await get_from_asyncio_queue(response.status)
self.logger.info(f"Got response {status}, req_id {req_id}")
if req_id > 0:
return
return None
reqs = []
for i in range(1, 3):
req = Request(self.start_urls[0], dont_filter=True, meta={"req_id": i})

View File

@ -250,8 +250,7 @@ class MiddlewareUsingCoro(ManagerTestCase):
class CoroMiddleware:
async def process_request(self, request, spider):
await asyncio.sleep(0.1)
result = await get_from_asyncio_queue(resp)
return result
return await get_from_asyncio_queue(resp)
self.mwman._add_middleware(CoroMiddleware())
req = Request("http://example.com/index.html")

View File

@ -116,7 +116,7 @@ Disallow: /some/randome/page.html
def test_robotstxt_garbage(self):
# garbage response should be discarded, equal 'allow all'
middleware = RobotsTxtMiddleware(self._get_garbage_crawler())
deferred = DeferredList(
return DeferredList(
[
self.assertNotIgnored(Request("http://site.local"), middleware),
self.assertNotIgnored(Request("http://site.local/allowed"), middleware),
@ -127,7 +127,6 @@ Disallow: /some/randome/page.html
],
fireOnOneErrback=True,
)
return deferred
def _get_emptybody_crawler(self):
crawler = self.crawler

View File

@ -134,8 +134,7 @@ class FTPFeedStorageTest(unittest.TestCase):
name = "test_spider"
crawler = get_crawler(settings_dict=settings)
spider = TestSpider.from_crawler(crawler)
return spider
return TestSpider.from_crawler(crawler)
def _store(self, uri, content, feed_options=None, settings=None):
crawler = get_crawler(settings_dict=settings or {})
@ -210,8 +209,7 @@ class BlockingFeedStorageTest(unittest.TestCase):
name = "test_spider"
crawler = get_crawler(settings_dict=settings)
spider = TestSpider.from_crawler(crawler)
return spider
return TestSpider.from_crawler(crawler)
def test_default_temp_dir(self):
b = BlockingFeedStorage()

View File

@ -342,13 +342,11 @@ class BaseResponseTest(unittest.TestCase):
def _links_response(self):
body = get_testdata("link_extractor", "linkextractor.html")
resp = self.response_class("http://example.com/index", body=body)
return resp
return self.response_class("http://example.com/index", body=body)
def _links_response_no_href(self):
body = get_testdata("link_extractor", "linkextractor_no_href.html")
resp = self.response_class("http://example.com/index", body=body)
return resp
return self.response_class("http://example.com/index", body=body)
class TextResponseTest(BaseResponseTest):

View File

@ -48,8 +48,7 @@ sys.exit(mitmdump())
)
line = self.proc.stdout.readline().decode("utf-8")
host_port = re.search(r"listening at (?:http://)?([^:]+:\d+)", line).group(1)
address = f"http://{self.auth_user}:{self.auth_pass}@{host_port}"
return address
return f"http://{self.auth_user}:{self.auth_pass}@{host_port}"
def stop(self):
self.proc.kill()

View File

@ -16,7 +16,6 @@ class InjectArgumentsDownloaderMiddleware:
def process_request(self, request, spider):
if request.callback.__name__ == "parse_downloader_mw":
request.cb_kwargs["from_process_request"] = True
return None
def process_response(self, request, response, spider):
if request.callback.__name__ == "parse_downloader_mw":
@ -39,7 +38,6 @@ class InjectArgumentsSpiderMiddleware:
request = response.request
if request.callback.__name__ == "parse_spider_mw":
request.cb_kwargs["from_process_spider_input"] = True
return None
def process_spider_output(self, response, result, spider):
for element in result:

View File

@ -18,8 +18,7 @@ class SignalCatcherSpider(Spider):
@classmethod
def from_crawler(cls, crawler, *args, **kwargs):
spider = cls(crawler, *args, **kwargs)
return spider
return cls(crawler, *args, **kwargs)
def on_request_left(self, request, spider):
self.caught_times += 1

View File

@ -37,8 +37,7 @@ class SpiderMiddlewareTestCase(TestCase):
results = []
dfd.addBoth(results.append)
self._wait(dfd)
ret = results[0]
return ret
return results[0]
class ProcessSpiderInputInvalidOutput(SpiderMiddlewareTestCase):

View File

@ -12,7 +12,6 @@ class LogExceptionMiddleware:
spider.logger.info(
"Middleware: %s exception caught", exception.__class__.__name__
)
return None
# ================================================================================
@ -170,7 +169,6 @@ class _GeneratorDoNothingMiddleware:
def process_spider_exception(self, response, exception, spider):
method = f"{self.__class__.__name__}.process_spider_exception"
spider.logger.info("%s: %s caught", method, exception.__class__.__name__)
return None
class GeneratorFailMiddleware:
@ -240,7 +238,6 @@ class _NotGeneratorDoNothingMiddleware:
def process_spider_exception(self, response, exception, spider):
method = f"{self.__class__.__name__}.process_spider_exception"
spider.logger.info("%s: %s caught", method, exception.__class__.__name__)
return None
class NotGeneratorFailMiddleware: