mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-06 10:24:24 +00:00
Add flake8-return rules to ruff.
This commit is contained in:
parent
802c67072c
commit
897e124a27
@ -244,6 +244,8 @@ extend-select = [
|
||||
"PGH",
|
||||
# flake8-quotes
|
||||
"Q",
|
||||
# flake8-return
|
||||
"RET",
|
||||
# flake8-bandit
|
||||
"S",
|
||||
# flake8-slots
|
||||
|
@ -225,8 +225,9 @@ class Command(BaseRunSpiderCommand):
|
||||
cb_kwargs: dict[str, Any] | None = None,
|
||||
) -> Deferred[Any]:
|
||||
cb_kwargs = cb_kwargs or {}
|
||||
d = maybeDeferred(self.iterate_spider_output, callback(response, **cb_kwargs))
|
||||
return d
|
||||
return maybeDeferred(
|
||||
self.iterate_spider_output, callback(response, **cb_kwargs)
|
||||
)
|
||||
|
||||
def get_callback_from_rules(
|
||||
self, spider: Spider, response: Response
|
||||
|
@ -41,7 +41,7 @@ class OffsiteMiddleware:
|
||||
|
||||
def process_request(self, request: Request, spider: Spider) -> None:
|
||||
if request.dont_filter or self.should_follow(request, spider):
|
||||
return None
|
||||
return
|
||||
domain = urlparse_cached(request).hostname
|
||||
if domain and domain not in self.domains_seen:
|
||||
self.domains_seen.add(domain)
|
||||
|
@ -586,7 +586,7 @@ class FeedExporter:
|
||||
:param uri_template: template of uri which contains %(batch_time)s or %(batch_id)d to create new uri
|
||||
"""
|
||||
storage = self._get_storage(uri, feed_options)
|
||||
slot = FeedSlot(
|
||||
return FeedSlot(
|
||||
storage=storage,
|
||||
uri=uri,
|
||||
format=feed_options["format"],
|
||||
@ -600,7 +600,6 @@ class FeedExporter:
|
||||
settings=self.settings,
|
||||
crawler=self.crawler,
|
||||
)
|
||||
return slot
|
||||
|
||||
def item_scraped(self, item: Any, spider: Spider) -> None:
|
||||
slots = []
|
||||
|
@ -282,8 +282,7 @@ class DbmCacheStorage:
|
||||
headers = Headers(data["headers"])
|
||||
body = data["body"]
|
||||
respcls = responsetypes.from_args(headers=headers, url=url, body=body)
|
||||
response = respcls(url=url, headers=headers, status=status, body=body)
|
||||
return response
|
||||
return respcls(url=url, headers=headers, status=status, body=body)
|
||||
|
||||
def store_response(
|
||||
self, spider: Spider, request: Request, response: Response
|
||||
@ -349,8 +348,7 @@ class FilesystemCacheStorage:
|
||||
status = metadata["status"]
|
||||
headers = Headers(headers_raw_to_dict(rawheaders))
|
||||
respcls = responsetypes.from_args(headers=headers, url=url, body=body)
|
||||
response = respcls(url=url, headers=headers, status=status, body=body)
|
||||
return response
|
||||
return respcls(url=url, headers=headers, status=status, body=body)
|
||||
|
||||
def store_response(
|
||||
self, spider: Spider, request: Request, response: Response
|
||||
|
@ -157,8 +157,7 @@ class PostProcessingManager(IOBase):
|
||||
return True
|
||||
|
||||
def _load_plugins(self, plugins: list[Any]) -> list[Any]:
|
||||
plugins = [load_object(plugin) for plugin in plugins]
|
||||
return plugins
|
||||
return [load_object(plugin) for plugin in plugins]
|
||||
|
||||
def _get_head_plugin(self) -> Any:
|
||||
prev = self.file
|
||||
|
@ -253,8 +253,7 @@ class LxmlLinkExtractor:
|
||||
if self.canonicalize:
|
||||
for link in links:
|
||||
link.url = canonicalize_url(link.url)
|
||||
links = self.link_extractor._process_links(links)
|
||||
return links
|
||||
return self.link_extractor._process_links(links)
|
||||
|
||||
def _extract_links(self, *args: Any, **kwargs: Any) -> list[Link]:
|
||||
return self.link_extractor._extract_links(*args, **kwargs)
|
||||
|
@ -79,8 +79,7 @@ class PythonRobotParser(RobotParser):
|
||||
@classmethod
|
||||
def from_crawler(cls, crawler: Crawler, robotstxt_body: bytes) -> Self:
|
||||
spider = None if not crawler else crawler.spider
|
||||
o = cls(robotstxt_body, spider)
|
||||
return o
|
||||
return cls(robotstxt_body, spider)
|
||||
|
||||
def allowed(self, url: str | bytes, user_agent: str | bytes) -> bool:
|
||||
user_agent = to_unicode(user_agent)
|
||||
@ -100,8 +99,7 @@ class RerpRobotParser(RobotParser):
|
||||
@classmethod
|
||||
def from_crawler(cls, crawler: Crawler, robotstxt_body: bytes) -> Self:
|
||||
spider = None if not crawler else crawler.spider
|
||||
o = cls(robotstxt_body, spider)
|
||||
return o
|
||||
return cls(robotstxt_body, spider)
|
||||
|
||||
def allowed(self, url: str | bytes, user_agent: str | bytes) -> bool:
|
||||
user_agent = to_unicode(user_agent)
|
||||
@ -120,8 +118,7 @@ class ProtegoRobotParser(RobotParser):
|
||||
@classmethod
|
||||
def from_crawler(cls, crawler: Crawler, robotstxt_body: bytes) -> Self:
|
||||
spider = None if not crawler else crawler.spider
|
||||
o = cls(robotstxt_body, spider)
|
||||
return o
|
||||
return cls(robotstxt_body, spider)
|
||||
|
||||
def allowed(self, url: str | bytes, user_agent: str | bytes) -> bool:
|
||||
user_agent = to_unicode(user_agent)
|
||||
|
@ -36,7 +36,7 @@ def listen_tcp(portrange: list[int], host: str, factory: ServerFactory) -> Port:
|
||||
return reactor.listenTCP(0, factory, interface=host)
|
||||
if len(portrange) == 1:
|
||||
return reactor.listenTCP(portrange[0], factory, interface=host)
|
||||
for x in range(portrange[0], portrange[1] + 1):
|
||||
for x in range(portrange[0], portrange[1] + 1): # noqa: RET503
|
||||
try:
|
||||
return reactor.listenTCP(x, factory, interface=host)
|
||||
except error.CannotListenError:
|
||||
|
@ -175,7 +175,7 @@ class AsyncDefAsyncioReqsReturnSpider(SimpleSpider):
|
||||
status = await get_from_asyncio_queue(response.status)
|
||||
self.logger.info(f"Got response {status}, req_id {req_id}")
|
||||
if req_id > 0:
|
||||
return
|
||||
return None
|
||||
reqs = []
|
||||
for i in range(1, 3):
|
||||
req = Request(self.start_urls[0], dont_filter=True, meta={"req_id": i})
|
||||
|
@ -250,8 +250,7 @@ class MiddlewareUsingCoro(ManagerTestCase):
|
||||
class CoroMiddleware:
|
||||
async def process_request(self, request, spider):
|
||||
await asyncio.sleep(0.1)
|
||||
result = await get_from_asyncio_queue(resp)
|
||||
return result
|
||||
return await get_from_asyncio_queue(resp)
|
||||
|
||||
self.mwman._add_middleware(CoroMiddleware())
|
||||
req = Request("http://example.com/index.html")
|
||||
|
@ -116,7 +116,7 @@ Disallow: /some/randome/page.html
|
||||
def test_robotstxt_garbage(self):
|
||||
# garbage response should be discarded, equal 'allow all'
|
||||
middleware = RobotsTxtMiddleware(self._get_garbage_crawler())
|
||||
deferred = DeferredList(
|
||||
return DeferredList(
|
||||
[
|
||||
self.assertNotIgnored(Request("http://site.local"), middleware),
|
||||
self.assertNotIgnored(Request("http://site.local/allowed"), middleware),
|
||||
@ -127,7 +127,6 @@ Disallow: /some/randome/page.html
|
||||
],
|
||||
fireOnOneErrback=True,
|
||||
)
|
||||
return deferred
|
||||
|
||||
def _get_emptybody_crawler(self):
|
||||
crawler = self.crawler
|
||||
|
@ -134,8 +134,7 @@ class FTPFeedStorageTest(unittest.TestCase):
|
||||
name = "test_spider"
|
||||
|
||||
crawler = get_crawler(settings_dict=settings)
|
||||
spider = TestSpider.from_crawler(crawler)
|
||||
return spider
|
||||
return TestSpider.from_crawler(crawler)
|
||||
|
||||
def _store(self, uri, content, feed_options=None, settings=None):
|
||||
crawler = get_crawler(settings_dict=settings or {})
|
||||
@ -210,8 +209,7 @@ class BlockingFeedStorageTest(unittest.TestCase):
|
||||
name = "test_spider"
|
||||
|
||||
crawler = get_crawler(settings_dict=settings)
|
||||
spider = TestSpider.from_crawler(crawler)
|
||||
return spider
|
||||
return TestSpider.from_crawler(crawler)
|
||||
|
||||
def test_default_temp_dir(self):
|
||||
b = BlockingFeedStorage()
|
||||
|
@ -342,13 +342,11 @@ class BaseResponseTest(unittest.TestCase):
|
||||
|
||||
def _links_response(self):
|
||||
body = get_testdata("link_extractor", "linkextractor.html")
|
||||
resp = self.response_class("http://example.com/index", body=body)
|
||||
return resp
|
||||
return self.response_class("http://example.com/index", body=body)
|
||||
|
||||
def _links_response_no_href(self):
|
||||
body = get_testdata("link_extractor", "linkextractor_no_href.html")
|
||||
resp = self.response_class("http://example.com/index", body=body)
|
||||
return resp
|
||||
return self.response_class("http://example.com/index", body=body)
|
||||
|
||||
|
||||
class TextResponseTest(BaseResponseTest):
|
||||
|
@ -48,8 +48,7 @@ sys.exit(mitmdump())
|
||||
)
|
||||
line = self.proc.stdout.readline().decode("utf-8")
|
||||
host_port = re.search(r"listening at (?:http://)?([^:]+:\d+)", line).group(1)
|
||||
address = f"http://{self.auth_user}:{self.auth_pass}@{host_port}"
|
||||
return address
|
||||
return f"http://{self.auth_user}:{self.auth_pass}@{host_port}"
|
||||
|
||||
def stop(self):
|
||||
self.proc.kill()
|
||||
|
@ -16,7 +16,6 @@ class InjectArgumentsDownloaderMiddleware:
|
||||
def process_request(self, request, spider):
|
||||
if request.callback.__name__ == "parse_downloader_mw":
|
||||
request.cb_kwargs["from_process_request"] = True
|
||||
return None
|
||||
|
||||
def process_response(self, request, response, spider):
|
||||
if request.callback.__name__ == "parse_downloader_mw":
|
||||
@ -39,7 +38,6 @@ class InjectArgumentsSpiderMiddleware:
|
||||
request = response.request
|
||||
if request.callback.__name__ == "parse_spider_mw":
|
||||
request.cb_kwargs["from_process_spider_input"] = True
|
||||
return None
|
||||
|
||||
def process_spider_output(self, response, result, spider):
|
||||
for element in result:
|
||||
|
@ -18,8 +18,7 @@ class SignalCatcherSpider(Spider):
|
||||
|
||||
@classmethod
|
||||
def from_crawler(cls, crawler, *args, **kwargs):
|
||||
spider = cls(crawler, *args, **kwargs)
|
||||
return spider
|
||||
return cls(crawler, *args, **kwargs)
|
||||
|
||||
def on_request_left(self, request, spider):
|
||||
self.caught_times += 1
|
||||
|
@ -37,8 +37,7 @@ class SpiderMiddlewareTestCase(TestCase):
|
||||
results = []
|
||||
dfd.addBoth(results.append)
|
||||
self._wait(dfd)
|
||||
ret = results[0]
|
||||
return ret
|
||||
return results[0]
|
||||
|
||||
|
||||
class ProcessSpiderInputInvalidOutput(SpiderMiddlewareTestCase):
|
||||
|
@ -12,7 +12,6 @@ class LogExceptionMiddleware:
|
||||
spider.logger.info(
|
||||
"Middleware: %s exception caught", exception.__class__.__name__
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
# ================================================================================
|
||||
@ -170,7 +169,6 @@ class _GeneratorDoNothingMiddleware:
|
||||
def process_spider_exception(self, response, exception, spider):
|
||||
method = f"{self.__class__.__name__}.process_spider_exception"
|
||||
spider.logger.info("%s: %s caught", method, exception.__class__.__name__)
|
||||
return None
|
||||
|
||||
|
||||
class GeneratorFailMiddleware:
|
||||
@ -240,7 +238,6 @@ class _NotGeneratorDoNothingMiddleware:
|
||||
def process_spider_exception(self, response, exception, spider):
|
||||
method = f"{self.__class__.__name__}.process_spider_exception"
|
||||
spider.logger.info("%s: %s caught", method, exception.__class__.__name__)
|
||||
return None
|
||||
|
||||
|
||||
class NotGeneratorFailMiddleware:
|
||||
|
Loading…
x
Reference in New Issue
Block a user