1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-06 11:00:46 +00:00

Fix and remove most of the entries from the mypy ignore list (#6137)

This commit is contained in:
Andrey Rakhmatullin 2023-11-07 12:34:35 +04:00 committed by GitHub
parent c31e09d709
commit b4acf5c827
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 67 additions and 122 deletions

View File

@ -3,7 +3,7 @@ import sys
from functools import wraps
from inspect import getmembers
from types import CoroutineType
from typing import AsyncGenerator, Dict
from typing import AsyncGenerator, Dict, Optional, Type
from unittest import TestCase
from scrapy.http import Request
@ -14,7 +14,7 @@ from scrapy.utils.spider import iterate_spider_output
class Contract:
"""Abstract class for contracts"""
request_cls = None
request_cls: Optional[Type[Request]] = None
def __init__(self, method, *args):
self.testcase_pre = _create_testcase(method, f"@{self.name} pre-hook")

View File

@ -78,7 +78,7 @@ class ItemLoader(itemloaders.ItemLoader):
read-only.
"""
default_item_class = Item
default_item_class: type = Item
default_selector_class = Selector
def __init__(self, item=None, selector=None, response=None, parent=None, **context):

View File

@ -8,6 +8,7 @@ import hashlib
import warnings
from contextlib import suppress
from io import BytesIO
from typing import Dict, Tuple
from itemadapter import ItemAdapter
@ -48,7 +49,7 @@ class ImagesPipeline(FilesPipeline):
MIN_WIDTH = 0
MIN_HEIGHT = 0
EXPIRES = 90
THUMBS = {}
THUMBS: Dict[str, Tuple[int, int]] = {}
DEFAULT_IMAGES_URLS_FIELD = "image_urls"
DEFAULT_IMAGES_RESULT_FIELD = "images"

View File

@ -11,7 +11,7 @@ from twisted.python.failure import Failure
class ProcessTest:
command = None
command: Optional[str] = None
prefix = [sys.executable, "-m", "scrapy.cmdline"]
cwd = os.getcwd() # trial chdirs to temp dir

View File

@ -8,82 +8,17 @@ universal=1
ignore_missing_imports = true
# Interface classes are hard to support
[mypy-twisted.internet.interfaces]
follow_imports = skip
[mypy-scrapy.interfaces]
ignore_errors = True
[mypy-twisted.internet.reactor]
follow_imports = skip
# FIXME: remove the following sections once the issues are solved
[mypy-scrapy.interfaces]
ignore_errors = True
[mypy-scrapy.pipelines.images]
ignore_errors = True
[mypy-scrapy.settings.default_settings]
ignore_errors = True
[mypy-tests.mocks.dummydbm]
ignore_errors = True
[mypy-tests.test_command_fetch]
ignore_errors = True
[mypy-tests.test_command_parse]
ignore_errors = True
[mypy-tests.test_command_shell]
ignore_errors = True
[mypy-tests.test_command_version]
ignore_errors = True
[mypy-tests.test_contracts]
ignore_errors = True
[mypy-tests.test_downloader_handlers]
ignore_errors = True
[mypy-tests.test_exporters]
ignore_errors = True
[mypy-tests.test_http_request]
ignore_errors = True
[mypy-tests.test_linkextractors]
ignore_errors = True
[mypy-tests.test_loader]
ignore_errors = True
[mypy-tests.test_loader_deprecated]
ignore_errors = True
[mypy-tests.test_pipeline_crawl]
ignore_errors = True
[mypy-tests.test_pipeline_files]
ignore_errors = True
[mypy-tests.test_pipeline_images]
ignore_errors = True
[mypy-tests.test_request_cb_kwargs]
ignore_errors = True
[mypy-tests.test_scheduler]
ignore_errors = True
[mypy-tests.test_spidermiddleware_httperror]
ignore_errors = True
[mypy-tests.test_spidermiddleware_referer]
ignore_errors = True
[mypy-tests.test_utils_serialize]
ignore_errors = True
[mypy-tests.test_utils_url]
ignore_errors = True

View File

@ -1,5 +1,6 @@
"""DBM-like dummy module"""
import collections
from typing import Any, DefaultDict
class DummyDB(dict):
@ -12,7 +13,7 @@ class DummyDB(dict):
error = KeyError
_DATABASES = collections.defaultdict(DummyDB)
_DATABASES: DefaultDict[Any, DummyDB] = collections.defaultdict(DummyDB)
def open(file, flag="r", mode=0o666):

View File

@ -7,6 +7,7 @@ import tempfile
import unittest
from datetime import datetime
from io import BytesIO
from typing import Any
import lxml.etree
from itemadapter import ItemAdapter
@ -53,8 +54,8 @@ class CustomFieldDataclass:
class BaseItemExporterTest(unittest.TestCase):
item_class = TestItem
custom_field_item_class = CustomFieldItem
item_class: type = TestItem
custom_field_item_class: type = CustomFieldItem
def setUp(self):
self.i = self.item_class(name="John\xa3", age="22")
@ -517,7 +518,7 @@ class XmlItemExporterDataclassTest(XmlItemExporterTest):
class JsonLinesItemExporterTest(BaseItemExporterTest):
_expected_nested = {
_expected_nested: Any = {
"name": "Jesus",
"age": {"name": "Maria", "age": {"name": "Joseph", "age": "22"}},
}
@ -665,7 +666,7 @@ class JsonItemExporterDataclassTest(JsonItemExporterTest):
class CustomExporterItemTest(unittest.TestCase):
item_class = TestItem
item_class: type = TestItem
def setUp(self):
if self.item_class is None:

View File

@ -3,6 +3,7 @@ import re
import unittest
import warnings
import xmlrpc.client
from typing import Any, Dict, List
from unittest import mock
from urllib.parse import parse_qs, unquote_to_bytes, urlparse
@ -21,8 +22,8 @@ from scrapy.utils.python import to_bytes, to_unicode
class RequestTest(unittest.TestCase):
request_class = Request
default_method = "GET"
default_headers = {}
default_meta = {}
default_headers: Dict[bytes, List[bytes]] = {}
default_meta: Dict[str, Any] = {}
def test_init(self):
# Request requires url in the __init__ method

View File

@ -1,6 +1,7 @@
import pickle
import re
import unittest
from typing import Optional
from packaging.version import Version
from pytest import mark
@ -15,7 +16,7 @@ from tests import get_testdata
# a hack to skip base class tests in pytest
class Base:
class LinkExtractorTestCase(unittest.TestCase):
extractor_cls = None
extractor_cls: Optional[type] = None
def setUp(self):
body = get_testdata("link_extractor", "linkextractor.html")

View File

@ -1,5 +1,6 @@
import dataclasses
import unittest
from typing import Optional
import attr
from itemadapter import ItemAdapter
@ -87,7 +88,7 @@ class BasicItemLoaderTest(unittest.TestCase):
class InitializationTestMixin:
item_class = None
item_class: Optional[type] = None
def test_keep_single_value(self):
"""Loaded item should contain values from the initial item"""

View File

@ -1,5 +1,6 @@
import shutil
from pathlib import Path
from typing import Optional, Set
from testfixtures import LogCapture
from twisted.internet import defer
@ -54,7 +55,7 @@ class FileDownloadCrawlTestCase(TestCase):
store_setting_key = "FILES_STORE"
media_key = "files"
media_urls_key = "file_urls"
expected_checksums = {
expected_checksums: Optional[Set[str]] = {
"5547178b89448faf0015a13f904c936e",
"c2281c83670e31d8aaab7cb642b824db",
"ed3f6538dc15d4d9179dae57319edc5f",
@ -193,6 +194,7 @@ class FileDownloadCrawlTestCase(TestCase):
)
skip_pillow: Optional[str]
try:
from PIL import Image # noqa: imported just to check for the import error
except ImportError:

View File

@ -7,6 +7,7 @@ from io import BytesIO
from pathlib import Path
from shutil import rmtree
from tempfile import mkdtemp
from typing import Dict, List
from unittest import mock
from urllib.parse import urlparse
@ -308,11 +309,11 @@ class FilesPipelineTestCaseFieldsDataClass(
class FilesPipelineTestAttrsItem:
name = attr.ib(default="")
# default fields
file_urls = attr.ib(default=lambda: [])
files = attr.ib(default=lambda: [])
file_urls: List[str] = attr.ib(default=lambda: [])
files: List[Dict[str, str]] = attr.ib(default=lambda: [])
# overridden fields
custom_file_urls = attr.ib(default=lambda: [])
custom_files = attr.ib(default=lambda: [])
custom_file_urls: List[str] = attr.ib(default=lambda: [])
custom_files: List[Dict[str, str]] = attr.ib(default=lambda: [])
class FilesPipelineTestCaseFieldsAttrsItem(
@ -690,7 +691,3 @@ def _prepare_request_object(item_url, flags=None):
item_url,
meta={"response": Response(item_url, status=200, body=b"data", flags=flags)},
)
if __name__ == "__main__":
unittest.main()

View File

@ -5,6 +5,7 @@ import random
import warnings
from shutil import rmtree
from tempfile import mkdtemp
from typing import Dict, List, Optional
from unittest.mock import patch
import attr
@ -18,6 +19,7 @@ from scrapy.pipelines.images import ImageException, ImagesPipeline, NoimagesDrop
from scrapy.settings import Settings
from scrapy.utils.python import to_bytes
skip_pillow: Optional[str]
try:
from PIL import Image
except ImportError:
@ -26,7 +28,7 @@ except ImportError:
)
else:
encoders = {"jpeg_encoder", "jpeg_decoder"}
if not encoders.issubset(set(Image.core.__dict__)):
if not encoders.issubset(set(Image.core.__dict__)): # type: ignore[attr-defined]
skip_pillow = "Missing JPEG encoders"
else:
skip_pillow = None
@ -404,11 +406,11 @@ class ImagesPipelineTestCaseFieldsDataClass(
class ImagesPipelineTestAttrsItem:
name = attr.ib(default="")
# default fields
image_urls = attr.ib(default=lambda: [])
images = attr.ib(default=lambda: [])
image_urls: List[str] = attr.ib(default=lambda: [])
images: List[Dict[str, str]] = attr.ib(default=lambda: [])
# overridden fields
custom_image_urls = attr.ib(default=lambda: [])
custom_images = attr.ib(default=lambda: [])
custom_image_urls: List[str] = attr.ib(default=lambda: [])
custom_images: List[Dict[str, str]] = attr.ib(default=lambda: [])
class ImagesPipelineTestCaseFieldsAttrsItem(
@ -646,7 +648,3 @@ def _create_image(format, *a, **kw):
Image.new(*a, **kw).save(buf, format)
buf.seek(0)
return Image.open(buf), buf
if __name__ == "__main__":
unittest.main()

View File

@ -1,3 +1,5 @@
from typing import List
from testfixtures import LogCapture
from twisted.internet import defer
from twisted.trial.unittest import TestCase
@ -62,7 +64,7 @@ class KeywordArgumentsSpider(MockServerSpider):
},
}
checks = []
checks: List[bool] = []
def start_requests(self):
data = {"key": "value", "number": 123, "callback": "some_callback"}

View File

@ -2,6 +2,7 @@ import collections
import shutil
import tempfile
import unittest
from typing import Optional
from twisted.internet import defer
from twisted.trial.unittest import TestCase
@ -59,7 +60,7 @@ class MockCrawler(Crawler):
class SchedulerHandler:
priority_queue_cls = None
priority_queue_cls: Optional[str] = None
jobdir = None
def create_scheduler(self):
@ -253,7 +254,7 @@ def _is_scheduling_fair(enqueued_slots, dequeued_slots):
class DownloaderAwareSchedulerTestMixin:
priority_queue_cls = "scrapy.pqueues.DownloaderAwarePriorityQueue"
priority_queue_cls: Optional[str] = "scrapy.pqueues.DownloaderAwarePriorityQueue"
reopen = False
def test_logic(self):

View File

@ -1,4 +1,5 @@
import logging
from typing import Set
from unittest import TestCase
from testfixtures import LogCapture
@ -16,7 +17,7 @@ from tests.spiders import MockServerSpider
class _HttpErrorSpider(MockServerSpider):
name = "httperror"
bypass_status_codes = set()
bypass_status_codes: Set[int] = set()
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

View File

@ -1,4 +1,5 @@
import warnings
from typing import Any, Dict, List, Optional, Tuple
from unittest import TestCase
from urllib.parse import urlparse
@ -31,10 +32,10 @@ from scrapy.spiders import Spider
class TestRefererMiddleware(TestCase):
req_meta = {}
resp_headers = {}
settings = {}
scenarii = [
req_meta: Dict[str, Any] = {}
resp_headers: Dict[str, str] = {}
settings: Dict[str, Any] = {}
scenarii: List[Tuple[str, str, Optional[bytes]]] = [
("http://scrapytest.org", "http://scrapytest.org/", b"http://scrapytest.org"),
]
@ -64,7 +65,7 @@ class MixinDefault:
with some additional filtering of s3://
"""
scenarii = [
scenarii: List[Tuple[str, str, Optional[bytes]]] = [
("https://example.com/", "https://scrapy.org/", b"https://example.com/"),
("http://example.com/", "http://scrapy.org/", b"http://example.com/"),
("http://example.com/", "https://scrapy.org/", b"http://example.com/"),
@ -85,7 +86,7 @@ class MixinDefault:
class MixinNoReferrer:
scenarii = [
scenarii: List[Tuple[str, str, Optional[bytes]]] = [
("https://example.com/page.html", "https://example.com/", None),
("http://www.example.com/", "https://scrapy.org/", None),
("http://www.example.com/", "http://scrapy.org/", None),
@ -95,7 +96,7 @@ class MixinNoReferrer:
class MixinNoReferrerWhenDowngrade:
scenarii = [
scenarii: List[Tuple[str, str, Optional[bytes]]] = [
# TLS to TLS: send non-empty referrer
(
"https://example.com/page.html",
@ -177,7 +178,7 @@ class MixinNoReferrerWhenDowngrade:
class MixinSameOrigin:
scenarii = [
scenarii: List[Tuple[str, str, Optional[bytes]]] = [
# Same origin (protocol, host, port): send referrer
(
"https://example.com/page.html",
@ -246,7 +247,7 @@ class MixinSameOrigin:
class MixinOrigin:
scenarii = [
scenarii: List[Tuple[str, str, Optional[bytes]]] = [
# TLS or non-TLS to TLS or non-TLS: referrer origin is sent (yes, even for downgrades)
(
"https://example.com/page.html",
@ -270,7 +271,7 @@ class MixinOrigin:
class MixinStrictOrigin:
scenarii = [
scenarii: List[Tuple[str, str, Optional[bytes]]] = [
# TLS or non-TLS to TLS or non-TLS: referrer origin is sent but not for downgrades
(
"https://example.com/page.html",
@ -298,7 +299,7 @@ class MixinStrictOrigin:
class MixinOriginWhenCrossOrigin:
scenarii = [
scenarii: List[Tuple[str, str, Optional[bytes]]] = [
# Same origin (protocol, host, port): send referrer
(
"https://example.com/page.html",
@ -405,7 +406,7 @@ class MixinOriginWhenCrossOrigin:
class MixinStrictOriginWhenCrossOrigin:
scenarii = [
scenarii: List[Tuple[str, str, Optional[bytes]]] = [
# Same origin (protocol, host, port): send referrer
(
"https://example.com/page.html",
@ -517,7 +518,7 @@ class MixinStrictOriginWhenCrossOrigin:
class MixinUnsafeUrl:
scenarii = [
scenarii: List[Tuple[str, str, Optional[bytes]]] = [
# TLS to TLS: send referrer
(
"https://example.com/sekrit.html",
@ -920,7 +921,9 @@ class TestPolicyHeaderPrecedence004(
class TestReferrerOnRedirect(TestRefererMiddleware):
settings = {"REFERRER_POLICY": "scrapy.spidermiddlewares.referer.UnsafeUrlPolicy"}
scenarii = [
scenarii: List[
Tuple[str, str, Tuple[Tuple[int, str], ...], Optional[bytes], Optional[bytes]]
] = [ # type: ignore[assignment]
(
"http://scrapytest.org/1", # parent
"http://scrapytest.org/2", # target

View File

@ -58,7 +58,7 @@ class JsonEncoderTestCase(unittest.TestCase):
self.assertIn(r.url, rs)
self.assertIn(str(r.status), rs)
def test_encode_dataclass_item(self):
def test_encode_dataclass_item(self) -> None:
@dataclasses.dataclass
class TestDataClass:
name: str

View File

@ -364,7 +364,7 @@ for k, args in enumerate(
setattr(GuessSchemeTest, t_method.__name__, t_method)
# TODO: the following tests do not pass with current implementation
for k, args in enumerate(
for k, skip_args in enumerate(
[
(
r"C:\absolute\path\to\a\file.html",
@ -374,7 +374,7 @@ for k, args in enumerate(
],
start=1,
):
t_method = create_skipped_scheme_t(args)
t_method = create_skipped_scheme_t(skip_args)
t_method.__name__ = f"test_uri_skipped_{k:03}"
setattr(GuessSchemeTest, t_method.__name__, t_method)