1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-06 11:00:46 +00:00

Address some previously ignored pylint messages. (#6531)

This commit is contained in:
Andrey Rakhmatullin 2024-11-11 15:49:52 +05:00 committed by GitHub
parent bcef96570b
commit b042ad255d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
40 changed files with 91 additions and 99 deletions

View File

@ -57,7 +57,7 @@ def pytest_addoption(parser):
def reactor_pytest(request):
if not request.cls:
# doctests
return
return None
request.cls.reactor_pytest = request.config.getoption("--reactor")
return request.cls.reactor_pytest

View File

@ -18,14 +18,12 @@ disable=abstract-method,
disallowed-name,
duplicate-code, # https://github.com/PyCQA/pylint/issues/214
eval-used,
expression-not-assigned,
fixme,
function-redefined,
global-statement,
implicit-str-concat,
import-error,
import-outside-toplevel,
inconsistent-return-statements,
inherit-non-class,
invalid-name,
invalid-overridden-method,
@ -37,25 +35,20 @@ disable=abstract-method,
logging-not-lazy,
lost-exception,
missing-docstring,
no-else-raise,
no-else-return,
no-member,
no-method-argument,
no-name-in-module,
no-self-argument,
no-value-for-parameter, # https://github.com/pylint-dev/pylint/issues/3268
not-callable,
pointless-exception-statement,
pointless-statement,
pointless-string-statement,
protected-access,
raise-missing-from,
redefined-argument-from-local,
redefined-builtin,
redefined-outer-name,
reimported,
signature-differs,
super-init-not-called,
too-few-public-methods,
too-many-ancestors,
too-many-arguments,
@ -73,9 +66,7 @@ disable=abstract-method,
unreachable,
unused-argument,
unused-import,
unused-private-member,
unused-variable,
unused-wildcard-import,
used-before-assignment,
useless-return,
wildcard-import,

View File

@ -23,7 +23,7 @@ class Command(BaseRunSpiderCommand):
def run(self, args: list[str], opts: argparse.Namespace) -> None:
if len(args) < 1:
raise UsageError()
elif len(args) > 1:
if len(args) > 1:
raise UsageError(
"running 'scrapy crawl' with more than one spider is not supported"
)

View File

@ -35,7 +35,8 @@ class Command(ScrapyCommand):
try:
spidercls = self.crawler_process.spider_loader.load(args[0])
except KeyError:
return self._err(f"Spider not found: {args[0]}")
self._err(f"Spider not found: {args[0]}")
return
sfile = sys.modules[spidercls.__module__].__file__
assert sfile

View File

@ -399,8 +399,7 @@ class Command(BaseRunSpiderCommand):
# parse arguments
if not len(args) == 1 or not is_url(args[0]):
raise UsageError()
else:
url = args[0]
url = args[0]
# prepare spidercls
self.set_spidercls(url, opts)

View File

@ -38,7 +38,9 @@ class Contract:
assert cb is not None
@wraps(cb)
def wrapper(response: Response, **cb_kwargs: Any) -> list[Any]:
def wrapper( # pylint: disable=inconsistent-return-statements
response: Response, **cb_kwargs: Any
) -> list[Any]:
try:
results.startTest(self.testcase_pre)
self.pre_process(response)
@ -67,7 +69,9 @@ class Contract:
assert cb is not None
@wraps(cb)
def wrapper(response: Response, **cb_kwargs: Any) -> list[Any]:
def wrapper( # pylint: disable=inconsistent-return-statements
response: Response, **cb_kwargs: Any
) -> list[Any]:
cb_result = cb(response, **cb_kwargs)
if isinstance(cb_result, (AsyncGenerator, CoroutineType)):
raise TypeError("Contracts don't support async callbacks")

View File

@ -92,9 +92,8 @@ class DownloadHandlers:
)
self._notconfigured[scheme] = str(ex)
return None
else:
self._handlers[scheme] = dh
return dh
self._handlers[scheme] = dh
return dh
def download_request(self, request: Request, spider: Spider) -> Deferred[Response]:
scheme = urlparse_cached(request).scheme

View File

@ -70,7 +70,10 @@ class ReceivedDataProtocol(Protocol):
return self.__filename
def close(self) -> None:
self.body.close() if self.filename else self.body.seek(0)
if self.filename:
self.body.close()
else:
self.body.seek(0)
_CODE_RE = re.compile(r"\d+")

View File

@ -73,7 +73,7 @@ class DownloaderMiddlewareManager(MiddlewareManager):
) -> Generator[Deferred[Any], Any, Response | Request]:
if response is None:
raise TypeError("Received None in process_response")
elif isinstance(response, Request):
if isinstance(response, Request):
return response
for method in self.methods["process_response"]:

View File

@ -172,7 +172,7 @@ class ExecutionEngine:
assert self.spider is not None # typing
if self.paused:
return None
return
while (
not self._needs_backout()
@ -418,7 +418,7 @@ class ExecutionEngine:
if isinstance(x, Failure) and isinstance(x.value, ex)
}
if DontCloseSpider in detected_ex:
return None
return
if self.spider_is_idle():
ex = detected_ex.get(CloseSpider, CloseSpider(reason="finished"))
assert isinstance(ex, CloseSpider) # typing

View File

@ -312,8 +312,7 @@ class Scheduler(BaseScheduler):
assert self.stats is not None
self.stats.inc_value("scheduler/unserializable", spider=self.spider)
return False
else:
return True
return True
def _mqpush(self, request: Request) -> None:
self.mqs.push(request)

View File

@ -174,14 +174,13 @@ class SpiderMiddlewareManager(MiddlewareManager):
# _process_spider_exception too, which complicates the architecture
msg = f"Async iterable returned from {method.__qualname__} cannot be downgraded"
raise _InvalidOutput(msg)
elif result is None:
if result is None:
continue
else:
msg = (
f"{method.__qualname__} must return None "
f"or an iterable, got {type(result)}"
)
raise _InvalidOutput(msg)
msg = (
f"{method.__qualname__} must return None "
f"or an iterable, got {type(result)}"
)
raise _InvalidOutput(msg)
return _failure
# This method cannot be made async def, as _process_spider_exception relies on the Deferred result

View File

@ -105,7 +105,7 @@ class ItemFilter:
class IFeedStorage(Interface):
"""Interface that all Feed Storages must implement"""
def __init__(uri, *, feed_options=None):
def __init__(uri, *, feed_options=None): # pylint: disable=super-init-not-called
"""Initialize the storage with the parameters given in the URI and the
feed-specific options (see :setting:`FEEDS`)"""

View File

@ -152,8 +152,7 @@ def _get_form(
form = forms[formnumber]
except IndexError:
raise IndexError(f"Form number {formnumber} not found in {response}")
else:
return cast(FormElement, form)
return cast(FormElement, form)
def _get_inputs(
@ -264,5 +263,4 @@ def _get_clickable(
f"Multiple elements found ({el!r}) matching the "
f"criteria in clickdata: {clickdata!r}"
)
else:
raise ValueError(f"No clickable element matching clickdata: {clickdata!r}")
raise ValueError(f"No clickable element matching clickdata: {clickdata!r}")

View File

@ -117,8 +117,8 @@ class MailSender:
if charset:
msg.set_charset(charset)
msg.attach(MIMEText(body, "plain", charset or "us-ascii"))
for attach_name, mimetype, f in attachs:
part = MIMEBase(*mimetype.split("/"))
for attach_name, attach_mimetype, f in attachs:
part = MIMEBase(*attach_mimetype.split("/"))
part.set_payload(f.read())
Encoders.encode_base64(part)
part.add_header(

View File

@ -265,8 +265,7 @@ class S3FilesStore:
kwarg = mapping[key]
except KeyError:
raise TypeError(f'Header "{key}" is not supported by botocore')
else:
extra[kwarg] = value
extra[kwarg] = value
return extra

View File

@ -141,9 +141,8 @@ class CachingHostnameResolver:
addressTypes,
transportSemantics,
)
else:
resolutionReceiver.resolutionBegan(HostResolution(hostName))
for addr in addresses:
resolutionReceiver.addressResolved(addr)
resolutionReceiver.resolutionComplete()
return resolutionReceiver
resolutionReceiver.resolutionBegan(HostResolution(hostName))
for addr in addresses:
resolutionReceiver.addressResolved(addr)
resolutionReceiver.resolutionComplete()
return resolutionReceiver

View File

@ -501,11 +501,9 @@ class BaseSettings(MutableMapping[_SettingsKeyT, Any]):
except KeyError:
if default is self.__default:
raise
return default
else:
self.__delitem__(name)
return value
self.__delitem__(name)
return value
class Settings(BaseSettings):

View File

@ -325,9 +325,8 @@ def _load_policy_class(
msg = f"Could not load referrer policy {policy!r}"
if not warning_only:
raise RuntimeError(msg)
else:
warnings.warn(msg, RuntimeWarning)
return None
warnings.warn(msg, RuntimeWarning)
return None
class RefererMiddleware:

View File

@ -44,8 +44,7 @@ def build_component_list(
"convert to the same "
"object, please update your settings"
)
else:
compbs.set(convert(k), v, priority=prio)
compbs.set(convert(k), v, priority=prio)
return compbs
_check_components(compdict)
return {convert(k): v for k, v in compdict.items()}

View File

@ -101,7 +101,7 @@ DEFAULT_PYTHON_SHELLS: KnownShellsT = {
def get_shell_embed_func(
shells: Iterable[str] | None = None, known_shells: KnownShellsT | None = None
) -> Any:
) -> EmbedFuncT | None:
"""Return the first acceptable shell-embed function
from a given list of shell names.
"""
@ -117,6 +117,7 @@ def get_shell_embed_func(
return known_shells[shell]()
except ImportError:
continue
return None
def start_python_console(

View File

@ -109,8 +109,7 @@ def mustbe_deferred(
return defer_fail(failure.Failure(e))
except Exception:
return defer_fail(failure.Failure())
else:
return defer_result(result)
return defer_result(result)
def parallel(

View File

@ -36,11 +36,10 @@ def _colorize(text: str, colorize: bool = True) -> str:
from pygments import highlight
except ImportError:
return text
else:
from pygments.formatters import TerminalFormatter
from pygments.lexers import PythonLexer
from pygments.formatters import TerminalFormatter
from pygments.lexers import PythonLexer
return highlight(text, PythonLexer(), TerminalFormatter())
return highlight(text, PythonLexer(), TerminalFormatter())
def pformat(obj: Any, *args: Any, **kwargs: Any) -> str:

View File

@ -51,6 +51,7 @@ class TopLevelFormatter(logging.Filter):
"""
def __init__(self, loggers: list[str] | None = None):
super().__init__()
self.loggers: list[str] = loggers or []
def filter(self, record: logging.LogRecord) -> bool:

View File

@ -323,9 +323,8 @@ def without_none_values(
"""
if isinstance(iterable, Mapping):
return {k: v for k, v in iterable.items() if v is not None}
else:
# the iterable __init__ must take another iterable
return type(iterable)(v for v in iterable if v is not None) # type: ignore[call-arg]
# the iterable __init__ must take another iterable
return type(iterable)(v for v in iterable if v is not None) # type: ignore[call-arg]
def global_object_name(obj: Any) -> str:

View File

@ -26,7 +26,7 @@ if TYPE_CHECKING:
_T = TypeVar("_T")
def listen_tcp(portrange: list[int], host: str, factory: ServerFactory) -> Port: # type: ignore[return]
def listen_tcp(portrange: list[int], host: str, factory: ServerFactory) -> Port: # type: ignore[return] # pylint: disable=inconsistent-return-statements
"""Like reactor.listenTCP but tries different ports in a range."""
from twisted.internet import reactor

View File

@ -66,9 +66,8 @@ def _remove_html_comments(body: bytes) -> bytes:
end = body.find(b"-->", start + 1)
if end == -1:
return body[:start]
else:
body = body[:start] + body[end + 3 :]
start = body.find(b"<!--")
body = body[:start] + body[end + 3 :]
start = body.find(b"<!--")
return body

View File

@ -66,6 +66,7 @@ def get_oldest(class_name: str) -> Any:
if not wdict:
break
return min(wdict.items(), key=itemgetter(1))[0]
return None
def iter_all(class_name: str) -> Iterable[Any]:

View File

@ -14,7 +14,7 @@ from urllib.parse import ParseResult, urldefrag, urlparse, urlunparse
# scrapy.utils.url was moved to w3lib.url and import * ensures this
# move doesn't break old code
from w3lib.url import *
from w3lib.url import * # pylint: disable=unused-wildcard-import
from w3lib.url import _safe_chars, _unquotepath # noqa: F401
from scrapy.utils.python import to_unicode

View File

@ -377,11 +377,13 @@ class SingleRequestSpider(MetaSpider):
return self.callback_func(response)
if "next" in response.meta:
return response.meta["next"]
return None
def on_error(self, failure):
self.meta["failure"] = failure
if callable(self.errback_func):
return self.errback_func(failure)
return None
class DuplicateStartRequestsSpider(MockServerSpider):

View File

@ -143,7 +143,8 @@ class CrawlerTestCase(BaseCrawlerTest):
def from_crawler(cls, crawler):
return cls(crawler=crawler)
def __init__(self, crawler):
def __init__(self, crawler, **kwargs: Any):
super().__init__(**kwargs)
self.crawler = crawler
def start_requests(self):
@ -223,7 +224,8 @@ class CrawlerTestCase(BaseCrawlerTest):
def from_crawler(cls, crawler):
return cls(crawler=crawler)
def __init__(self, crawler):
def __init__(self, crawler, **kwargs: Any):
super().__init__(**kwargs)
self.crawler = crawler
def start_requests(self):
@ -301,7 +303,8 @@ class CrawlerTestCase(BaseCrawlerTest):
def from_crawler(cls, crawler):
return cls(crawler=crawler)
def __init__(self, crawler):
def __init__(self, crawler, **kwargs: Any):
super().__init__(**kwargs)
self.crawler = crawler
def start_requests(self):
@ -379,7 +382,8 @@ class CrawlerTestCase(BaseCrawlerTest):
def from_crawler(cls, crawler):
return cls(crawler=crawler)
def __init__(self, crawler):
def __init__(self, crawler, **kwargs: Any):
super().__init__(**kwargs)
self.crawler = crawler
def start_requests(self):

View File

@ -25,7 +25,7 @@ def _cookie_to_set_cookie_value(cookie):
for key in ("name", "value", "path", "domain"):
if cookie.get(key) is None:
if key in ("name", "value"):
return
return None
continue
if isinstance(cookie[key], (bool, float, int, str)):
decoded[key] = str(cookie[key])

View File

@ -436,8 +436,7 @@ class Base:
def process_value(value):
m = re.search(r"javascript:goToPage\('(.*?)'", value)
if m:
return m.group(1)
return m.group(1) if m else None
lx = self.extractor_cls(process_value=process_value)
self.assertEqual(

View File

@ -69,8 +69,7 @@ class BasicItemLoaderTest(unittest.TestCase):
def test_load_item_ignore_none_field_values(self):
def validate_sku(value):
# Let's assume a SKU is only digits.
if value.isdigit():
return value
return value if value.isdigit() else None
class MyLoader(ItemLoader):
name_out = Compose(lambda vs: vs[0]) # take first which allows empty values

View File

@ -198,8 +198,7 @@ class DropSomeItemsPipeline:
if self.drop:
self.drop = False
raise DropItem("Ignoring item")
else:
self.drop = True
self.drop = True
class ShowOrSkipMessagesTestCase(TwistedTestCase):

View File

@ -627,20 +627,19 @@ class TestGCSFilesStore(unittest.TestCase):
import google.cloud.storage # noqa
except ModuleNotFoundError:
raise unittest.SkipTest("google-cloud-storage is not installed")
else:
with mock.patch("google.cloud.storage") as _:
with mock.patch("scrapy.pipelines.files.time") as _:
uri = "gs://my_bucket/my_prefix/"
store = GCSFilesStore(uri)
store.bucket = mock.Mock()
path = "full/my_data.txt"
yield store.persist_file(
path, mock.Mock(), info=None, meta=None, headers=None
)
yield store.stat_file(path, info=None)
expected_blob_path = store.prefix + path
store.bucket.blob.assert_called_with(expected_blob_path)
store.bucket.get_blob.assert_called_with(expected_blob_path)
with mock.patch("google.cloud.storage") as _:
with mock.patch("scrapy.pipelines.files.time") as _:
uri = "gs://my_bucket/my_prefix/"
store = GCSFilesStore(uri)
store.bucket = mock.Mock()
path = "full/my_data.txt"
yield store.persist_file(
path, mock.Mock(), info=None, meta=None, headers=None
)
yield store.stat_file(path, info=None)
expected_blob_path = store.prefix + path
store.bucket.blob.assert_called_with(expected_blob_path)
store.bucket.get_blob.assert_called_with(expected_blob_path)
class TestFTPFileStore(unittest.TestCase):

View File

@ -159,7 +159,7 @@ class RequestSerializationTest(unittest.TestCase):
class TestSpiderMixin:
def __mixin_callback(self, response):
def __mixin_callback(self, response): # pylint: disable=unused-private-member
pass
@ -191,7 +191,8 @@ class TestSpider(Spider, TestSpiderMixin):
__parse_item_reference = private_parse_item
__handle_error_reference = private_handle_error
def __init__(self):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.delegated_callback = TestSpiderDelegation().delegated_callback
def parse_item(self, response):
@ -200,5 +201,5 @@ class TestSpider(Spider, TestSpiderMixin):
def handle_error(self, failure):
pass
def __parse_item_private(self, response):
def __parse_item_private(self, response): # pylint: disable=unused-private-member
pass

View File

@ -686,6 +686,7 @@ class CustomPythonOrgPolicy(ReferrerPolicy):
return b"https://python.org/"
if scheme == "http":
return b"http://python.org/"
return None
class TestSettingsCustomPolicy(TestRefererMiddleware):

View File

@ -158,6 +158,7 @@ class CaseInsensitiveDictMixin:
def _normvalue(self, value):
if value is not None:
return value + 1
return None
normvalue = _normvalue # deprecated CaselessDict class

View File

@ -182,6 +182,7 @@ class AsyncCooperatorTest(unittest.TestCase):
return dfd
# simulate trivial sync processing
results.append(o)
return None
@staticmethod
def get_async_iterable(length):