Fix and remove most of the entries from the mypy ignore list (#6137)

2025-02-06 11:00:46 +00:00 · 2023-11-07 12:34:35 +04:00 · 2023-11-07 12:34:35 +04:00 · b4acf5c827
commit b4acf5c827
parent c31e09d709
19 changed files with 67 additions and 122 deletions
--- a/scrapy/contracts/init.py
+++ b/scrapy/contracts/init.py
@ -3,7 +3,7 @@ import sys
 from functools import wraps
 from inspect import getmembers
 from types import CoroutineType
-from typing import AsyncGenerator, Dict
+from typing import AsyncGenerator, Dict, Optional, Type
 from unittest import TestCase

 from scrapy.http import Request
@ -14,7 +14,7 @@ from scrapy.utils.spider import iterate_spider_output
 class Contract:
    """Abstract class for contracts"""

-    request_cls = None
+    request_cls: Optional[Type[Request]] = None

    def __init__(self, method, *args):
        self.testcase_pre = _create_testcase(method, f"@{self.name} pre-hook")
--- a/scrapy/loader/init.py
+++ b/scrapy/loader/init.py
@ -78,7 +78,7 @@ class ItemLoader(itemloaders.ItemLoader):
        read-only.
    """

-    default_item_class = Item
+    default_item_class: type = Item
    default_selector_class = Selector

    def __init__(self, item=None, selector=None, response=None, parent=None, **context):
--- a/scrapy/pipelines/images.py
+++ b/scrapy/pipelines/images.py
@ -8,6 +8,7 @@ import hashlib
 import warnings
 from contextlib import suppress
 from io import BytesIO
+from typing import Dict, Tuple

 from itemadapter import ItemAdapter

@ -48,7 +49,7 @@ class ImagesPipeline(FilesPipeline):
    MIN_WIDTH = 0
    MIN_HEIGHT = 0
    EXPIRES = 90
-    THUMBS = {}
+    THUMBS: Dict[str, Tuple[int, int]] = {}
    DEFAULT_IMAGES_URLS_FIELD = "image_urls"
    DEFAULT_IMAGES_RESULT_FIELD = "images"

--- a/scrapy/utils/testproc.py
+++ b/scrapy/utils/testproc.py
@ -11,7 +11,7 @@ from twisted.python.failure import Failure


 class ProcessTest:
-    command = None
+    command: Optional[str] = None
    prefix = [sys.executable, "-m", "scrapy.cmdline"]
    cwd = os.getcwd()  # trial chdirs to temp dir

--- a/setup.cfg
+++ b/setup.cfg
@ -8,82 +8,17 @@ universal=1
 ignore_missing_imports = true

 # Interface classes are hard to support
+
 [mypy-twisted.internet.interfaces]
 follow_imports = skip

+[mypy-scrapy.interfaces]
+ignore_errors = True
+
 [mypy-twisted.internet.reactor]
 follow_imports = skip

 # FIXME: remove the following sections once the issues are solved

-[mypy-scrapy.interfaces]
-ignore_errors = True
-
-[mypy-scrapy.pipelines.images]
-ignore_errors = True
-
 [mypy-scrapy.settings.default_settings]
 ignore_errors = True
-
-[mypy-tests.mocks.dummydbm]
-ignore_errors = True
-
-[mypy-tests.test_command_fetch]
-ignore_errors = True
-
-[mypy-tests.test_command_parse]
-ignore_errors = True
-
-[mypy-tests.test_command_shell]
-ignore_errors = True
-
-[mypy-tests.test_command_version]
-ignore_errors = True
-
-[mypy-tests.test_contracts]
-ignore_errors = True
-
-[mypy-tests.test_downloader_handlers]
-ignore_errors = True
-
-[mypy-tests.test_exporters]
-ignore_errors = True
-
-[mypy-tests.test_http_request]
-ignore_errors = True
-
-[mypy-tests.test_linkextractors]
-ignore_errors = True
-
-[mypy-tests.test_loader]
-ignore_errors = True
-
-[mypy-tests.test_loader_deprecated]
-ignore_errors = True
-
-[mypy-tests.test_pipeline_crawl]
-ignore_errors = True
-
-[mypy-tests.test_pipeline_files]
-ignore_errors = True
-
-[mypy-tests.test_pipeline_images]
-ignore_errors = True
-
-[mypy-tests.test_request_cb_kwargs]
-ignore_errors = True
-
-[mypy-tests.test_scheduler]
-ignore_errors = True
-
-[mypy-tests.test_spidermiddleware_httperror]
-ignore_errors = True
-
-[mypy-tests.test_spidermiddleware_referer]
-ignore_errors = True
-
-[mypy-tests.test_utils_serialize]
-ignore_errors = True
-
-[mypy-tests.test_utils_url]
-ignore_errors = True
--- a/tests/mocks/dummydbm.py
+++ b/tests/mocks/dummydbm.py
@ -1,5 +1,6 @@
 """DBM-like dummy module"""
 import collections
+from typing import Any, DefaultDict


 class DummyDB(dict):
@ -12,7 +13,7 @@ class DummyDB(dict):
 error = KeyError


-_DATABASES = collections.defaultdict(DummyDB)
+_DATABASES: DefaultDict[Any, DummyDB] = collections.defaultdict(DummyDB)


 def open(file, flag="r", mode=0o666):
--- a/tests/test_exporters.py
+++ b/tests/test_exporters.py
@ -7,6 +7,7 @@ import tempfile
 import unittest
 from datetime import datetime
 from io import BytesIO
+from typing import Any

 import lxml.etree
 from itemadapter import ItemAdapter
@ -53,8 +54,8 @@ class CustomFieldDataclass:


 class BaseItemExporterTest(unittest.TestCase):
-    item_class = TestItem
-    custom_field_item_class = CustomFieldItem
+    item_class: type = TestItem
+    custom_field_item_class: type = CustomFieldItem

    def setUp(self):
        self.i = self.item_class(name="John\xa3", age="22")
@ -517,7 +518,7 @@ class XmlItemExporterDataclassTest(XmlItemExporterTest):


 class JsonLinesItemExporterTest(BaseItemExporterTest):
-    _expected_nested = {
+    _expected_nested: Any = {
        "name": "Jesus",
        "age": {"name": "Maria", "age": {"name": "Joseph", "age": "22"}},
    }
@ -665,7 +666,7 @@ class JsonItemExporterDataclassTest(JsonItemExporterTest):


 class CustomExporterItemTest(unittest.TestCase):
-    item_class = TestItem
+    item_class: type = TestItem

    def setUp(self):
        if self.item_class is None:
--- a/tests/test_http_request.py
+++ b/tests/test_http_request.py
@ -3,6 +3,7 @@ import re
 import unittest
 import warnings
 import xmlrpc.client
+from typing import Any, Dict, List
 from unittest import mock
 from urllib.parse import parse_qs, unquote_to_bytes, urlparse

@ -21,8 +22,8 @@ from scrapy.utils.python import to_bytes, to_unicode
 class RequestTest(unittest.TestCase):
    request_class = Request
    default_method = "GET"
-    default_headers = {}
-    default_meta = {}
+    default_headers: Dict[bytes, List[bytes]] = {}
+    default_meta: Dict[str, Any] = {}

    def test_init(self):
        # Request requires url in the __init__ method
--- a/tests/test_linkextractors.py
+++ b/tests/test_linkextractors.py
@ -1,6 +1,7 @@
 import pickle
 import re
 import unittest
+from typing import Optional

 from packaging.version import Version
 from pytest import mark
@ -15,7 +16,7 @@ from tests import get_testdata
 # a hack to skip base class tests in pytest
 class Base:
    class LinkExtractorTestCase(unittest.TestCase):
-        extractor_cls = None
+        extractor_cls: Optional[type] = None

        def setUp(self):
            body = get_testdata("link_extractor", "linkextractor.html")
--- a/tests/test_loader.py
+++ b/tests/test_loader.py
@ -1,5 +1,6 @@
 import dataclasses
 import unittest
+from typing import Optional

 import attr
 from itemadapter import ItemAdapter
@ -87,7 +88,7 @@ class BasicItemLoaderTest(unittest.TestCase):


 class InitializationTestMixin:
-    item_class = None
+    item_class: Optional[type] = None

    def test_keep_single_value(self):
        """Loaded item should contain values from the initial item"""
--- a/tests/test_pipeline_crawl.py
+++ b/tests/test_pipeline_crawl.py
@ -1,5 +1,6 @@
 import shutil
 from pathlib import Path
+from typing import Optional, Set

 from testfixtures import LogCapture
 from twisted.internet import defer
@ -54,7 +55,7 @@ class FileDownloadCrawlTestCase(TestCase):
    store_setting_key = "FILES_STORE"
    media_key = "files"
    media_urls_key = "file_urls"
-    expected_checksums = {
+    expected_checksums: Optional[Set[str]] = {
        "5547178b89448faf0015a13f904c936e",
        "c2281c83670e31d8aaab7cb642b824db",
        "ed3f6538dc15d4d9179dae57319edc5f",
@ -193,6 +194,7 @@ class FileDownloadCrawlTestCase(TestCase):
        )


+skip_pillow: Optional[str]
 try:
    from PIL import Image  # noqa: imported just to check for the import error
 except ImportError:
--- a/tests/test_pipeline_files.py
+++ b/tests/test_pipeline_files.py
@ -7,6 +7,7 @@ from io import BytesIO
 from pathlib import Path
 from shutil import rmtree
 from tempfile import mkdtemp
+from typing import Dict, List
 from unittest import mock
 from urllib.parse import urlparse

@ -308,11 +309,11 @@ class FilesPipelineTestCaseFieldsDataClass(
 class FilesPipelineTestAttrsItem:
    name = attr.ib(default="")
    # default fields
-    file_urls = attr.ib(default=lambda: [])
-    files = attr.ib(default=lambda: [])
+    file_urls: List[str] = attr.ib(default=lambda: [])
+    files: List[Dict[str, str]] = attr.ib(default=lambda: [])
    # overridden fields
-    custom_file_urls = attr.ib(default=lambda: [])
-    custom_files = attr.ib(default=lambda: [])
+    custom_file_urls: List[str] = attr.ib(default=lambda: [])
+    custom_files: List[Dict[str, str]] = attr.ib(default=lambda: [])


 class FilesPipelineTestCaseFieldsAttrsItem(
@ -690,7 +691,3 @@ def _prepare_request_object(item_url, flags=None):
        item_url,
        meta={"response": Response(item_url, status=200, body=b"data", flags=flags)},
    )
-
-
-if __name__ == "__main__":
-    unittest.main()
--- a/tests/test_pipeline_images.py
+++ b/tests/test_pipeline_images.py
@ -5,6 +5,7 @@ import random
 import warnings
 from shutil import rmtree
 from tempfile import mkdtemp
+from typing import Dict, List, Optional
 from unittest.mock import patch

 import attr
@ -18,6 +19,7 @@ from scrapy.pipelines.images import ImageException, ImagesPipeline, NoimagesDrop
 from scrapy.settings import Settings
 from scrapy.utils.python import to_bytes

+skip_pillow: Optional[str]
 try:
    from PIL import Image
 except ImportError:
@ -26,7 +28,7 @@ except ImportError:
    )
 else:
    encoders = {"jpeg_encoder", "jpeg_decoder"}
-    if not encoders.issubset(set(Image.core.__dict__)):
+    if not encoders.issubset(set(Image.core.__dict__)):  # type: ignore[attr-defined]
        skip_pillow = "Missing JPEG encoders"
    else:
        skip_pillow = None
@ -404,11 +406,11 @@ class ImagesPipelineTestCaseFieldsDataClass(
 class ImagesPipelineTestAttrsItem:
    name = attr.ib(default="")
    # default fields
-    image_urls = attr.ib(default=lambda: [])
-    images = attr.ib(default=lambda: [])
+    image_urls: List[str] = attr.ib(default=lambda: [])
+    images: List[Dict[str, str]] = attr.ib(default=lambda: [])
    # overridden fields
-    custom_image_urls = attr.ib(default=lambda: [])
-    custom_images = attr.ib(default=lambda: [])
+    custom_image_urls: List[str] = attr.ib(default=lambda: [])
+    custom_images: List[Dict[str, str]] = attr.ib(default=lambda: [])


 class ImagesPipelineTestCaseFieldsAttrsItem(
@ -646,7 +648,3 @@ def _create_image(format, *a, **kw):
    Image.new(*a, **kw).save(buf, format)
    buf.seek(0)
    return Image.open(buf), buf
-
-
-if __name__ == "__main__":
-    unittest.main()
--- a/tests/test_request_cb_kwargs.py
+++ b/tests/test_request_cb_kwargs.py
@ -1,3 +1,5 @@
+from typing import List
+
 from testfixtures import LogCapture
 from twisted.internet import defer
 from twisted.trial.unittest import TestCase
@ -62,7 +64,7 @@ class KeywordArgumentsSpider(MockServerSpider):
        },
    }

-    checks = []
+    checks: List[bool] = []

    def start_requests(self):
        data = {"key": "value", "number": 123, "callback": "some_callback"}
--- a/tests/test_scheduler.py
+++ b/tests/test_scheduler.py
@ -2,6 +2,7 @@ import collections
 import shutil
 import tempfile
 import unittest
+from typing import Optional

 from twisted.internet import defer
 from twisted.trial.unittest import TestCase
@ -59,7 +60,7 @@ class MockCrawler(Crawler):


 class SchedulerHandler:
-    priority_queue_cls = None
+    priority_queue_cls: Optional[str] = None
    jobdir = None

    def create_scheduler(self):
@ -253,7 +254,7 @@ def _is_scheduling_fair(enqueued_slots, dequeued_slots):


 class DownloaderAwareSchedulerTestMixin:
-    priority_queue_cls = "scrapy.pqueues.DownloaderAwarePriorityQueue"
+    priority_queue_cls: Optional[str] = "scrapy.pqueues.DownloaderAwarePriorityQueue"
    reopen = False

    def test_logic(self):
--- a/tests/test_spidermiddleware_httperror.py
+++ b/tests/test_spidermiddleware_httperror.py
@ -1,4 +1,5 @@
 import logging
+from typing import Set
 from unittest import TestCase

 from testfixtures import LogCapture
@ -16,7 +17,7 @@ from tests.spiders import MockServerSpider

 class _HttpErrorSpider(MockServerSpider):
    name = "httperror"
-    bypass_status_codes = set()
+    bypass_status_codes: Set[int] = set()

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
--- a/tests/test_spidermiddleware_referer.py
+++ b/tests/test_spidermiddleware_referer.py
@ -1,4 +1,5 @@
 import warnings
+from typing import Any, Dict, List, Optional, Tuple
 from unittest import TestCase
 from urllib.parse import urlparse

@ -31,10 +32,10 @@ from scrapy.spiders import Spider


 class TestRefererMiddleware(TestCase):
-    req_meta = {}
-    resp_headers = {}
-    settings = {}
-    scenarii = [
+    req_meta: Dict[str, Any] = {}
+    resp_headers: Dict[str, str] = {}
+    settings: Dict[str, Any] = {}
+    scenarii: List[Tuple[str, str, Optional[bytes]]] = [
        ("http://scrapytest.org", "http://scrapytest.org/", b"http://scrapytest.org"),
    ]

@ -64,7 +65,7 @@ class MixinDefault:
    with some additional filtering of s3://
    """

-    scenarii = [
+    scenarii: List[Tuple[str, str, Optional[bytes]]] = [
        ("https://example.com/", "https://scrapy.org/", b"https://example.com/"),
        ("http://example.com/", "http://scrapy.org/", b"http://example.com/"),
        ("http://example.com/", "https://scrapy.org/", b"http://example.com/"),
@ -85,7 +86,7 @@ class MixinDefault:


 class MixinNoReferrer:
-    scenarii = [
+    scenarii: List[Tuple[str, str, Optional[bytes]]] = [
        ("https://example.com/page.html", "https://example.com/", None),
        ("http://www.example.com/", "https://scrapy.org/", None),
        ("http://www.example.com/", "http://scrapy.org/", None),
@ -95,7 +96,7 @@ class MixinNoReferrer:


 class MixinNoReferrerWhenDowngrade:
-    scenarii = [
+    scenarii: List[Tuple[str, str, Optional[bytes]]] = [
        # TLS to TLS: send non-empty referrer
        (
            "https://example.com/page.html",
@ -177,7 +178,7 @@ class MixinNoReferrerWhenDowngrade:


 class MixinSameOrigin:
-    scenarii = [
+    scenarii: List[Tuple[str, str, Optional[bytes]]] = [
        # Same origin (protocol, host, port): send referrer
        (
            "https://example.com/page.html",
@ -246,7 +247,7 @@ class MixinSameOrigin:


 class MixinOrigin:
-    scenarii = [
+    scenarii: List[Tuple[str, str, Optional[bytes]]] = [
        # TLS or non-TLS to TLS or non-TLS: referrer origin is sent (yes, even for downgrades)
        (
            "https://example.com/page.html",
@ -270,7 +271,7 @@ class MixinOrigin:


 class MixinStrictOrigin:
-    scenarii = [
+    scenarii: List[Tuple[str, str, Optional[bytes]]] = [
        # TLS or non-TLS to TLS or non-TLS: referrer origin is sent but not for downgrades
        (
            "https://example.com/page.html",
@ -298,7 +299,7 @@ class MixinStrictOrigin:


 class MixinOriginWhenCrossOrigin:
-    scenarii = [
+    scenarii: List[Tuple[str, str, Optional[bytes]]] = [
        # Same origin (protocol, host, port): send referrer
        (
            "https://example.com/page.html",
@ -405,7 +406,7 @@ class MixinOriginWhenCrossOrigin:


 class MixinStrictOriginWhenCrossOrigin:
-    scenarii = [
+    scenarii: List[Tuple[str, str, Optional[bytes]]] = [
        # Same origin (protocol, host, port): send referrer
        (
            "https://example.com/page.html",
@ -517,7 +518,7 @@ class MixinStrictOriginWhenCrossOrigin:


 class MixinUnsafeUrl:
-    scenarii = [
+    scenarii: List[Tuple[str, str, Optional[bytes]]] = [
        # TLS to TLS: send referrer
        (
            "https://example.com/sekrit.html",
@ -920,7 +921,9 @@ class TestPolicyHeaderPrecedence004(

 class TestReferrerOnRedirect(TestRefererMiddleware):
    settings = {"REFERRER_POLICY": "scrapy.spidermiddlewares.referer.UnsafeUrlPolicy"}
-    scenarii = [
+    scenarii: List[
+        Tuple[str, str, Tuple[Tuple[int, str], ...], Optional[bytes], Optional[bytes]]
+    ] = [  # type: ignore[assignment]
        (
            "http://scrapytest.org/1",  # parent
            "http://scrapytest.org/2",  # target
--- a/tests/test_utils_serialize.py
+++ b/tests/test_utils_serialize.py
@ -58,7 +58,7 @@ class JsonEncoderTestCase(unittest.TestCase):
        self.assertIn(r.url, rs)
        self.assertIn(str(r.status), rs)

-    def test_encode_dataclass_item(self):
+    def test_encode_dataclass_item(self) -> None:
        @dataclasses.dataclass
        class TestDataClass:
            name: str
--- a/tests/test_utils_url.py
+++ b/tests/test_utils_url.py
@ -364,7 +364,7 @@ for k, args in enumerate(
    setattr(GuessSchemeTest, t_method.__name__, t_method)

 # TODO: the following tests do not pass with current implementation
-for k, args in enumerate(
+for k, skip_args in enumerate(
    [
        (
            r"C:\absolute\path\to\a\file.html",
@ -374,7 +374,7 @@ for k, args in enumerate(
    ],
    start=1,
 ):
-    t_method = create_skipped_scheme_t(args)
+    t_method = create_skipped_scheme_t(skip_args)
    t_method.__name__ = f"test_uri_skipped_{k:03}"
    setattr(GuessSchemeTest, t_method.__name__, t_method)