1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-06 13:49:32 +00:00

Add flake8-type-checking. (#6413)

This commit is contained in:
Andrey Rakhmatullin 2024-06-25 13:20:59 +05:00 committed by GitHub
parent 326e323e11
commit d08f559600
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
104 changed files with 562 additions and 300 deletions

View File

@ -1,6 +1,7 @@
[flake8]
max-line-length = 119
extend-select = TC, TC1
ignore =
# black disagrees with flake8 about these
E203, E501, E701, E704, W503
@ -58,6 +59,9 @@ ignore =
D402
# First word of the first line should be properly capitalized
D403
# Annotation in typing.cast() should be a string literal
TC006
exclude =
docs/conf.py

View File

@ -14,6 +14,7 @@ repos:
- flake8-debugger
- flake8-docstrings
- flake8-string-format
- flake8-type-checking
- repo: https://github.com/psf/black.git
rev: 24.2.0
hooks:

View File

@ -1,13 +1,16 @@
from __future__ import annotations
import logging
from typing import TYPE_CHECKING, Any, List
from scrapy.exceptions import NotConfigured
from scrapy.settings import Settings
from scrapy.utils.conf import build_component_list
from scrapy.utils.misc import build_from_crawler, load_object
if TYPE_CHECKING:
from scrapy.crawler import Crawler
from scrapy.settings import Settings
logger = logging.getLogger(__name__)
@ -15,8 +18,8 @@ logger = logging.getLogger(__name__)
class AddonManager:
"""This class facilitates loading and storing :ref:`topics-addons`."""
def __init__(self, crawler: "Crawler") -> None:
self.crawler: "Crawler" = crawler
def __init__(self, crawler: Crawler) -> None:
self.crawler: Crawler = crawler
self.addons: List[Any] = []
def load_settings(self, settings: Settings) -> None:

View File

@ -12,7 +12,6 @@ import scrapy
from scrapy.commands import BaseRunSpiderCommand, ScrapyCommand, ScrapyHelpFormatter
from scrapy.crawler import CrawlerProcess
from scrapy.exceptions import UsageError
from scrapy.settings import BaseSettings, Settings
from scrapy.utils.misc import walk_modules
from scrapy.utils.project import get_project_settings, inside_project
from scrapy.utils.python import garbage_collect
@ -21,6 +20,8 @@ if TYPE_CHECKING:
# typing.ParamSpec requires Python 3.10
from typing_extensions import ParamSpec
from scrapy.settings import BaseSettings, Settings
_P = ParamSpec("_P")

View File

@ -2,18 +2,22 @@
Base class for Scrapy commands
"""
from __future__ import annotations
import argparse
import builtins
import os
from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional
from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional
from twisted.python import failure
from scrapy.crawler import Crawler, CrawlerProcess
from scrapy.exceptions import UsageError
from scrapy.utils.conf import arglist_to_dict, feed_process_params_from_cli
if TYPE_CHECKING:
from scrapy.crawler import Crawler, CrawlerProcess
class ScrapyCommand:
requires_project: bool = False

View File

@ -1,16 +1,20 @@
from __future__ import annotations
import argparse
import subprocess # nosec
import sys
import time
from typing import Any, Iterable, List
from typing import TYPE_CHECKING, Any, Iterable, List
from urllib.parse import urlencode
import scrapy
from scrapy import Request
from scrapy.commands import ScrapyCommand
from scrapy.http import Response, TextResponse
from scrapy.linkextractors import LinkExtractor
if TYPE_CHECKING:
from scrapy import Request
class Command(ScrapyCommand):
default_settings = {

View File

@ -1,11 +1,15 @@
import argparse
from typing import List, cast
from __future__ import annotations
from typing import TYPE_CHECKING, List, cast
from twisted.python.failure import Failure
from scrapy.commands import BaseRunSpiderCommand
from scrapy.exceptions import UsageError
if TYPE_CHECKING:
import argparse
class Command(BaseRunSpiderCommand):
requires_project = True

View File

@ -1,6 +1,7 @@
from __future__ import annotations
import sys
from argparse import ArgumentParser, Namespace
from typing import Dict, List, Type
from typing import TYPE_CHECKING, Dict, List, Type
from w3lib.url import is_url
@ -11,6 +12,9 @@ from scrapy.http import Request, Response
from scrapy.utils.datatypes import SequenceExclude
from scrapy.utils.spider import DefaultSpider, spidercls_for_request
if TYPE_CHECKING:
from argparse import ArgumentParser, Namespace
class Command(ScrapyCommand):
requires_project = False

View File

@ -1,8 +1,12 @@
import argparse
from typing import List
from __future__ import annotations
from typing import TYPE_CHECKING, List
from scrapy.commands import ScrapyCommand
if TYPE_CHECKING:
import argparse
class Command(ScrapyCommand):
requires_project = True

View File

@ -6,6 +6,7 @@ import inspect
import json
import logging
from typing import (
TYPE_CHECKING,
Any,
AsyncGenerator,
Callable,
@ -22,13 +23,11 @@ from typing import (
from itemadapter import ItemAdapter, is_item
from twisted.internet.defer import Deferred, maybeDeferred
from twisted.python.failure import Failure
from w3lib.url import is_url
from scrapy.commands import BaseRunSpiderCommand
from scrapy.exceptions import UsageError
from scrapy.http import Request, Response
from scrapy.spiders import Spider
from scrapy.utils import display
from scrapy.utils.asyncgen import collect_asyncgen
from scrapy.utils.defer import aiter_errback, deferred_from_coro
@ -36,6 +35,12 @@ from scrapy.utils.log import failure_to_exc_info
from scrapy.utils.misc import arg_to_iter
from scrapy.utils.spider import spidercls_for_request
if TYPE_CHECKING:
from twisted.python.failure import Failure
from scrapy.spiders import Spider
logger = logging.getLogger(__name__)
_T = TypeVar("_T")

View File

@ -1,17 +1,21 @@
from __future__ import annotations
import argparse
import sys
from importlib import import_module
from os import PathLike
from pathlib import Path
from types import ModuleType
from typing import List, Union
from typing import TYPE_CHECKING, List, Union
from scrapy.commands import BaseRunSpiderCommand
from scrapy.exceptions import UsageError
from scrapy.utils.spider import iter_spider_classes
if TYPE_CHECKING:
from os import PathLike
from types import ModuleType
def _import_file(filepath: Union[str, PathLike]) -> ModuleType:
def _import_file(filepath: Union[str, PathLike[str]]) -> ModuleType:
abspath = Path(filepath).resolve()
if abspath.suffix not in (".py", ".pyw"):
raise ValueError(f"Not a Python source file: {abspath}")

View File

@ -4,9 +4,10 @@ Scrapy Shell
See documentation in docs/topics/shell.rst
"""
from argparse import ArgumentParser, Namespace
from __future__ import annotations
from threading import Thread
from typing import Any, Dict, List, Type
from typing import TYPE_CHECKING, Any, Dict, List, Type
from scrapy import Spider
from scrapy.commands import ScrapyCommand
@ -15,6 +16,9 @@ from scrapy.shell import Shell
from scrapy.utils.spider import DefaultSpider, spidercls_for_request
from scrapy.utils.url import guess_scheme
if TYPE_CHECKING:
from argparse import ArgumentParser, Namespace
class Command(ScrapyCommand):
requires_project = False

View File

@ -1,9 +1,12 @@
from __future__ import annotations
import re
import sys
from functools import wraps
from inspect import getmembers
from types import CoroutineType
from typing import (
TYPE_CHECKING,
Any,
AsyncGenerator,
Callable,
@ -16,13 +19,15 @@ from typing import (
)
from unittest import TestCase, TestResult
from twisted.python.failure import Failure
from scrapy import Spider
from scrapy.http import Request, Response
from scrapy.utils.python import get_spec
from scrapy.utils.spider import iterate_spider_output
if TYPE_CHECKING:
from twisted.python.failure import Failure
from scrapy import Spider
class Contract:
"""Abstract class for contracts"""

View File

@ -25,15 +25,16 @@ from scrapy import Request, Spider, signals
from scrapy.core.downloader.handlers import DownloadHandlers
from scrapy.core.downloader.middleware import DownloaderMiddlewareManager
from scrapy.exceptions import ScrapyDeprecationWarning
from scrapy.http import Response
from scrapy.resolver import dnscache
from scrapy.settings import BaseSettings
from scrapy.signalmanager import SignalManager
from scrapy.utils.defer import mustbe_deferred
from scrapy.utils.httpobj import urlparse_cached
if TYPE_CHECKING:
from scrapy.crawler import Crawler
from scrapy.http import Response
from scrapy.settings import BaseSettings
_T = TypeVar("_T")

View File

@ -21,8 +21,6 @@ from scrapy.core.downloader.tls import (
ScrapyClientTLSOptions,
openssl_methods,
)
from scrapy.crawler import Crawler
from scrapy.settings import BaseSettings
from scrapy.utils.misc import build_from_crawler, load_object
if TYPE_CHECKING:
@ -31,6 +29,9 @@ if TYPE_CHECKING:
# typing.Self requires Python 3.11
from typing_extensions import Self
from scrapy.crawler import Crawler
from scrapy.settings import BaseSettings
@implementer(IPolicyForHTTPS)
class ScrapyClientContextFactory(BrowserLikePolicyForHTTPS):

View File

@ -17,17 +17,19 @@ from typing import (
)
from twisted.internet import defer
from twisted.internet.defer import Deferred
from scrapy import Request, Spider, signals
from scrapy.exceptions import NotConfigured, NotSupported
from scrapy.http import Response
from scrapy.utils.httpobj import urlparse_cached
from scrapy.utils.misc import build_from_crawler, load_object
from scrapy.utils.python import without_none_values
if TYPE_CHECKING:
from twisted.internet.defer import Deferred
from scrapy.crawler import Crawler
from scrapy.http import Response
logger = logging.getLogger(__name__)

View File

@ -1,12 +1,16 @@
from typing import Any, Dict
from __future__ import annotations
from typing import TYPE_CHECKING, Any, Dict
from w3lib.url import parse_data_uri
from scrapy import Request, Spider
from scrapy.http import Response, TextResponse
from scrapy.responsetypes import responsetypes
from scrapy.utils.decorators import defers
if TYPE_CHECKING:
from scrapy import Request, Spider
class DataURIDownloadHandler:
lazy = False

View File

@ -1,12 +1,17 @@
from __future__ import annotations
from pathlib import Path
from typing import TYPE_CHECKING
from w3lib.url import file_uri_to_path
from scrapy import Request, Spider
from scrapy.http import Response
from scrapy.responsetypes import responsetypes
from scrapy.utils.decorators import defers
if TYPE_CHECKING:
from scrapy import Request, Spider
from scrapy.http import Response
class FileDownloadHandler:
lazy = False

View File

@ -35,23 +35,25 @@ from io import BytesIO
from typing import TYPE_CHECKING, Any, BinaryIO, Dict, Optional
from urllib.parse import unquote
from twisted.internet.defer import Deferred
from twisted.internet.protocol import ClientCreator, Protocol
from twisted.protocols.ftp import CommandFailed, FTPClient
from twisted.python.failure import Failure
from scrapy import Request, Spider
from scrapy.crawler import Crawler
from scrapy.http import Response
from scrapy.responsetypes import responsetypes
from scrapy.settings import BaseSettings
from scrapy.utils.httpobj import urlparse_cached
from scrapy.utils.python import to_bytes
if TYPE_CHECKING:
from twisted.internet.defer import Deferred
from twisted.python.failure import Failure
# typing.Self requires Python 3.11
from typing_extensions import Self
from scrapy import Request, Spider
from scrapy.crawler import Crawler
from scrapy.settings import BaseSettings
class ReceivedDataProtocol(Protocol):
def __init__(self, filename: Optional[str] = None):

View File

@ -5,21 +5,21 @@ from __future__ import annotations
from typing import TYPE_CHECKING, Type
from twisted.internet.defer import Deferred
from scrapy import Request, Spider
from scrapy.crawler import Crawler
from scrapy.http import Response
from scrapy.settings import BaseSettings
from scrapy.utils.misc import build_from_crawler, load_object
from scrapy.utils.python import to_unicode
if TYPE_CHECKING:
from twisted.internet.defer import Deferred
# typing.Self requires Python 3.11
from typing_extensions import Self
from scrapy import Request, Spider
from scrapy.core.downloader.contextfactory import ScrapyClientContextFactory
from scrapy.core.downloader.webclient import ScrapyHTTPClientFactory
from scrapy.crawler import Crawler
from scrapy.http import Response
from scrapy.settings import BaseSettings
class HTTP10DownloadHandler:

View File

@ -12,11 +12,9 @@ from typing import TYPE_CHECKING, Any, List, Optional, Tuple, TypedDict, TypeVar
from urllib.parse import urldefrag, urlunparse
from twisted.internet import ssl
from twisted.internet.base import ReactorBase
from twisted.internet.defer import CancelledError, Deferred, succeed
from twisted.internet.endpoints import TCP4ClientEndpoint
from twisted.internet.error import TimeoutError
from twisted.internet.interfaces import IConsumer
from twisted.internet.protocol import Factory, Protocol, connectionDone
from twisted.python.failure import Failure
from twisted.web.client import URI, Agent, HTTPConnectionPool
@ -30,17 +28,22 @@ from zope.interface import implementer
from scrapy import Request, Spider, signals
from scrapy.core.downloader.contextfactory import load_context_factory_from_settings
from scrapy.core.downloader.webclient import _parse
from scrapy.crawler import Crawler
from scrapy.exceptions import StopDownload
from scrapy.http import Headers, Response
from scrapy.responsetypes import responsetypes
from scrapy.settings import BaseSettings
from scrapy.utils.python import to_bytes, to_unicode
if TYPE_CHECKING:
from twisted.internet.base import ReactorBase
from twisted.internet.interfaces import IConsumer
# typing.NotRequired and typing.Self require Python 3.11
from typing_extensions import NotRequired, Self
from scrapy.crawler import Crawler
from scrapy.settings import BaseSettings
logger = logging.getLogger(__name__)
_T = TypeVar("_T")

View File

@ -4,25 +4,27 @@ from time import time
from typing import TYPE_CHECKING, Optional
from urllib.parse import urldefrag
from twisted.internet.base import DelayedCall
from twisted.internet.defer import Deferred
from twisted.internet.error import TimeoutError
from twisted.web.client import URI
from twisted.web.iweb import IPolicyForHTTPS
from scrapy.core.downloader.contextfactory import load_context_factory_from_settings
from scrapy.core.downloader.webclient import _parse
from scrapy.core.http2.agent import H2Agent, H2ConnectionPool, ScrapyProxyH2Agent
from scrapy.crawler import Crawler
from scrapy.http import Request, Response
from scrapy.settings import Settings
from scrapy.spiders import Spider
from scrapy.utils.python import to_bytes
if TYPE_CHECKING:
from twisted.internet.base import DelayedCall
from twisted.internet.defer import Deferred
from twisted.web.iweb import IPolicyForHTTPS
# typing.Self requires Python 3.11
from typing_extensions import Self
from scrapy.crawler import Crawler
from scrapy.http import Request, Response
from scrapy.settings import Settings
from scrapy.spiders import Spider
class H2DownloadHandler:
def __init__(self, settings: Settings, crawler: Crawler):

View File

@ -2,22 +2,23 @@ from __future__ import annotations
from typing import TYPE_CHECKING, Any, Optional, Type
from twisted.internet.defer import Deferred
from scrapy import Request, Spider
from scrapy.core.downloader.handlers.http import HTTPDownloadHandler
from scrapy.crawler import Crawler
from scrapy.exceptions import NotConfigured
from scrapy.http import Response
from scrapy.settings import BaseSettings
from scrapy.utils.boto import is_botocore_available
from scrapy.utils.httpobj import urlparse_cached
from scrapy.utils.misc import build_from_crawler
if TYPE_CHECKING:
from twisted.internet.defer import Deferred
# typing.Self requires Python 3.11
from typing_extensions import Self
from scrapy import Request, Spider
from scrapy.crawler import Crawler
from scrapy.http import Response
from scrapy.settings import BaseSettings
class S3DownloadHandler:
def __init__(

View File

@ -6,19 +6,22 @@ See documentation in docs/topics/downloader-middleware.rst
from __future__ import annotations
from typing import Any, Callable, Generator, List, Union, cast
from typing import TYPE_CHECKING, Any, Callable, Generator, List, Union, cast
from twisted.internet.defer import Deferred, inlineCallbacks
from twisted.python.failure import Failure
from scrapy import Spider
from scrapy.exceptions import _InvalidOutput
from scrapy.http import Request, Response
from scrapy.middleware import MiddlewareManager
from scrapy.settings import BaseSettings
from scrapy.utils.conf import build_component_list
from scrapy.utils.defer import deferred_from_coro, mustbe_deferred
if TYPE_CHECKING:
from twisted.python.failure import Failure
from scrapy import Spider
from scrapy.settings import BaseSettings
class DownloaderMiddlewareManager(MiddlewareManager):
component_name = "downloader middleware"

View File

@ -1,18 +1,22 @@
from __future__ import annotations
import re
from time import time
from typing import Optional, Tuple
from typing import TYPE_CHECKING, Optional, Tuple
from urllib.parse import ParseResult, urldefrag, urlparse, urlunparse
from twisted.internet import defer
from twisted.internet.protocol import ClientFactory
from twisted.web.http import HTTPClient
from scrapy import Request
from scrapy.http import Headers, Response
from scrapy.responsetypes import responsetypes
from scrapy.utils.httpobj import urlparse_cached
from scrapy.utils.python import to_bytes, to_unicode
if TYPE_CHECKING:
from scrapy import Request
def _parsed_url_args(parsed: ParseResult) -> Tuple[bytes, bytes, bytes, int, bytes]:
# Assume parsed is urlparse-d from Request.url,

View File

@ -34,9 +34,8 @@ from scrapy.core.scraper import Scraper
from scrapy.exceptions import CloseSpider, DontCloseSpider, IgnoreRequest
from scrapy.http import Request, Response
from scrapy.logformatter import LogFormatter
from scrapy.settings import BaseSettings, Settings
from scrapy.settings import Settings
from scrapy.signalmanager import SignalManager
from scrapy.spiders import Spider
from scrapy.utils.log import failure_to_exc_info, logformatter_adapter
from scrapy.utils.misc import build_from_crawler, load_object
from scrapy.utils.python import global_object_name
@ -46,6 +45,9 @@ if TYPE_CHECKING:
from scrapy.core.scheduler import BaseScheduler
from scrapy.core.scraper import _HandleOutputDeferred
from scrapy.crawler import Crawler
from scrapy.settings import BaseSettings
from scrapy.spiders import Spider
logger = logging.getLogger(__name__)

View File

@ -1,10 +1,10 @@
from __future__ import annotations
from collections import deque
from typing import Deque, Dict, List, Optional, Tuple
from typing import TYPE_CHECKING, Deque, Dict, List, Optional, Tuple
from twisted.internet import defer
from twisted.internet.base import ReactorBase
from twisted.internet.defer import Deferred
from twisted.internet.endpoints import HostnameEndpoint
from twisted.python.failure import Failure
from twisted.web.client import (
URI,
@ -16,9 +16,15 @@ from twisted.web.error import SchemeNotSupported
from scrapy.core.downloader.contextfactory import AcceptableProtocolsContextFactory
from scrapy.core.http2.protocol import H2ClientFactory, H2ClientProtocol
from scrapy.http.request import Request
from scrapy.settings import Settings
from scrapy.spiders import Spider
if TYPE_CHECKING:
from twisted.internet.base import ReactorBase
from twisted.internet.endpoints import HostnameEndpoint
from scrapy.http.request import Request
from scrapy.settings import Settings
from scrapy.spiders import Spider
ConnectionKeyT = Tuple[bytes, bytes, int]

View File

@ -1,9 +1,10 @@
from __future__ import annotations
import ipaddress
import itertools
import logging
from collections import deque
from ipaddress import IPv4Address, IPv6Address
from typing import Any, Deque, Dict, List, Optional, Union
from typing import TYPE_CHECKING, Any, Deque, Dict, List, Optional, Union
from h2.config import H2Configuration
from h2.connection import H2Connection
@ -20,7 +21,6 @@ from h2.events import (
WindowUpdated,
)
from h2.exceptions import FrameTooLargeError, H2Error
from twisted.internet.defer import Deferred
from twisted.internet.error import TimeoutError
from twisted.internet.interfaces import (
IAddress,
@ -30,14 +30,21 @@ from twisted.internet.interfaces import (
from twisted.internet.protocol import Factory, Protocol, connectionDone
from twisted.internet.ssl import Certificate
from twisted.protocols.policies import TimeoutMixin
from twisted.python.failure import Failure
from twisted.web.client import URI
from zope.interface import implementer
from scrapy.core.http2.stream import Stream, StreamCloseReason
from scrapy.http import Request
from scrapy.settings import Settings
from scrapy.spiders import Spider
if TYPE_CHECKING:
from ipaddress import IPv4Address, IPv6Address
from twisted.internet.defer import Deferred
from twisted.python.failure import Failure
from twisted.web.client import URI
from scrapy.settings import Settings
from scrapy.spiders import Spider
logger = logging.getLogger(__name__)

View File

@ -1,3 +1,5 @@
from __future__ import annotations
import logging
from enum import Enum
from io import BytesIO
@ -5,19 +7,20 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
from h2.errors import ErrorCodes
from h2.exceptions import H2Error, ProtocolError, StreamClosedError
from hpack import HeaderTuple
from twisted.internet.defer import CancelledError, Deferred
from twisted.internet.error import ConnectionClosed
from twisted.python.failure import Failure
from twisted.web.client import ResponseFailed
from scrapy.http import Request
from scrapy.http.headers import Headers
from scrapy.responsetypes import responsetypes
from scrapy.utils.httpobj import urlparse_cached
if TYPE_CHECKING:
from hpack import HeaderTuple
from scrapy.core.http2.protocol import H2ClientProtocol
from scrapy.http import Request
logger = logging.getLogger(__name__)
@ -87,7 +90,7 @@ class Stream:
self,
stream_id: int,
request: Request,
protocol: "H2ClientProtocol",
protocol: H2ClientProtocol,
download_maxsize: int = 0,
download_warnsize: int = 0,
) -> None:
@ -99,7 +102,7 @@ class Stream:
"""
self.stream_id: int = stream_id
self._request: Request = request
self._protocol: "H2ClientProtocol" = protocol
self._protocol: H2ClientProtocol = protocol
self._download_maxsize = self._request.meta.get(
"download_maxsize", download_maxsize

View File

@ -6,14 +6,10 @@ from abc import abstractmethod
from pathlib import Path
from typing import TYPE_CHECKING, Any, List, Optional, Type, cast
from twisted.internet.defer import Deferred
# working around https://github.com/sphinx-doc/sphinx/issues/10400
from twisted.internet.defer import Deferred # noqa: TC002
from scrapy.crawler import Crawler
from scrapy.dupefilters import BaseDupeFilter
from scrapy.http.request import Request
from scrapy.pqueues import ScrapyPriorityQueue
from scrapy.spiders import Spider
from scrapy.statscollectors import StatsCollector
from scrapy.spiders import Spider # noqa: TC001
from scrapy.utils.job import job_dir
from scrapy.utils.misc import build_from_crawler, load_object
@ -24,6 +20,12 @@ if TYPE_CHECKING:
# typing.Self requires Python 3.11
from typing_extensions import Self
from scrapy.crawler import Crawler
from scrapy.dupefilters import BaseDupeFilter
from scrapy.http.request import Request
from scrapy.pqueues import ScrapyPriorityQueue
from scrapy.statscollectors import StatsCollector
logger = logging.getLogger(__name__)

View File

@ -10,6 +10,7 @@ import logging
from inspect import isasyncgenfunction, iscoroutine
from itertools import islice
from typing import (
TYPE_CHECKING,
Any,
AsyncIterable,
Callable,
@ -30,7 +31,6 @@ from scrapy import Request, Spider
from scrapy.exceptions import _InvalidOutput
from scrapy.http import Response
from scrapy.middleware import MiddlewareManager
from scrapy.settings import BaseSettings
from scrapy.utils.asyncgen import as_async_generator, collect_asyncgen
from scrapy.utils.conf import build_component_list
from scrapy.utils.defer import (
@ -41,6 +41,10 @@ from scrapy.utils.defer import (
)
from scrapy.utils.python import MutableAsyncChain, MutableChain
if TYPE_CHECKING:
from scrapy.settings import BaseSettings
logger = logging.getLogger(__name__)

View File

@ -6,16 +6,18 @@ from typing import TYPE_CHECKING, Union
from w3lib import html
from scrapy import Request, Spider
from scrapy.crawler import Crawler
from scrapy.exceptions import NotConfigured
from scrapy.http import HtmlResponse, Response
from scrapy.settings import BaseSettings
if TYPE_CHECKING:
# typing.Self requires Python 3.11
from typing_extensions import Self
from scrapy import Request, Spider
from scrapy.crawler import Crawler
from scrapy.settings import BaseSettings
logger = logging.getLogger(__name__)

View File

@ -2,24 +2,26 @@ from __future__ import annotations
import logging
from collections import defaultdict
from http.cookiejar import Cookie
from typing import TYPE_CHECKING, Any, DefaultDict, Iterable, Optional, Sequence, Union
from tldextract import TLDExtract
from scrapy import Request, Spider
from scrapy.crawler import Crawler
from scrapy.exceptions import NotConfigured
from scrapy.http import Response
from scrapy.http.cookies import CookieJar
from scrapy.http.request import VerboseCookie
from scrapy.utils.httpobj import urlparse_cached
from scrapy.utils.python import to_unicode
if TYPE_CHECKING:
from http.cookiejar import Cookie
# typing.Self requires Python 3.11
from typing_extensions import Self
from scrapy import Request, Spider
from scrapy.crawler import Crawler
from scrapy.http.request import VerboseCookie
logger = logging.getLogger(__name__)

View File

@ -8,15 +8,16 @@ from __future__ import annotations
from typing import TYPE_CHECKING, Iterable, Tuple, Union
from scrapy import Request, Spider
from scrapy.crawler import Crawler
from scrapy.http import Response
from scrapy.utils.python import without_none_values
if TYPE_CHECKING:
# typing.Self requires Python 3.11
from typing_extensions import Self
from scrapy import Request, Spider
from scrapy.crawler import Crawler
from scrapy.http import Response
class DefaultHeadersMiddleware:
def __init__(self, headers: Iterable[Tuple[str, str]]):

View File

@ -9,13 +9,14 @@ from __future__ import annotations
from typing import TYPE_CHECKING, Union
from scrapy import Request, Spider, signals
from scrapy.crawler import Crawler
from scrapy.http import Response
if TYPE_CHECKING:
# typing.Self requires Python 3.11
from typing_extensions import Self
from scrapy.crawler import Crawler
from scrapy.http import Response
class DownloadTimeoutMiddleware:
def __init__(self, timeout: float = 180):

View File

@ -11,14 +11,15 @@ from typing import TYPE_CHECKING, Union
from w3lib.http import basic_auth_header
from scrapy import Request, Spider, signals
from scrapy.crawler import Crawler
from scrapy.http import Response
from scrapy.utils.url import url_is_from_any_domain
if TYPE_CHECKING:
# typing.Self requires Python 3.11
from typing_extensions import Self
from scrapy.crawler import Crawler
from scrapy.http import Response
class HttpAuthMiddleware:
"""Set Basic HTTP Authorization header

View File

@ -16,19 +16,20 @@ from twisted.internet.error import (
from twisted.web.client import ResponseFailed
from scrapy import signals
from scrapy.crawler import Crawler
from scrapy.exceptions import IgnoreRequest, NotConfigured
from scrapy.http.request import Request
from scrapy.http.response import Response
from scrapy.settings import Settings
from scrapy.spiders import Spider
from scrapy.statscollectors import StatsCollector
from scrapy.utils.misc import load_object
if TYPE_CHECKING:
# typing.Self requires Python 3.11
from typing_extensions import Self
from scrapy.crawler import Crawler
from scrapy.http.request import Request
from scrapy.http.response import Response
from scrapy.settings import Settings
from scrapy.spiders import Spider
from scrapy.statscollectors import StatsCollector
class HttpCacheMiddleware:
DOWNLOAD_EXCEPTIONS = (

View File

@ -6,11 +6,9 @@ from logging import getLogger
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
from scrapy import Request, Spider, signals
from scrapy.crawler import Crawler
from scrapy.exceptions import IgnoreRequest, NotConfigured
from scrapy.http import Response, TextResponse
from scrapy.responsetypes import responsetypes
from scrapy.statscollectors import StatsCollector
from scrapy.utils._compression import (
_DecompressionMaxSizeExceeded,
_inflate,
@ -24,6 +22,10 @@ if TYPE_CHECKING:
# typing.Self requires Python 3.11
from typing_extensions import Self
from scrapy.crawler import Crawler
from scrapy.statscollectors import StatsCollector
logger = getLogger(__name__)
ACCEPTED_ENCODINGS: List[bytes] = [b"gzip", b"deflate"]

View File

@ -9,10 +9,7 @@ from urllib.request import ( # type: ignore[attr-defined]
proxy_bypass,
)
from scrapy import Request, Spider
from scrapy.crawler import Crawler
from scrapy.exceptions import NotConfigured
from scrapy.http import Response
from scrapy.utils.httpobj import urlparse_cached
from scrapy.utils.python import to_bytes
@ -20,6 +17,10 @@ if TYPE_CHECKING:
# typing.Self requires Python 3.11
from typing_extensions import Self
from scrapy import Request, Spider
from scrapy.crawler import Crawler
from scrapy.http import Response
class HttpProxyMiddleware:
def __init__(self, auth_encoding: Optional[str] = "latin-1"):

View File

@ -6,15 +6,17 @@ import warnings
from typing import TYPE_CHECKING, Set
from scrapy import Request, Spider, signals
from scrapy.crawler import Crawler
from scrapy.exceptions import IgnoreRequest
from scrapy.statscollectors import StatsCollector
from scrapy.utils.httpobj import urlparse_cached
if TYPE_CHECKING:
# typing.Self requires Python 3.11
from typing_extensions import Self
from scrapy.crawler import Crawler
from scrapy.statscollectors import StatsCollector
logger = logging.getLogger(__name__)

View File

@ -6,11 +6,8 @@ from urllib.parse import urljoin
from w3lib.url import safe_url_string
from scrapy import Request, Spider
from scrapy.crawler import Crawler
from scrapy.exceptions import IgnoreRequest, NotConfigured
from scrapy.http import HtmlResponse, Response
from scrapy.settings import BaseSettings
from scrapy.utils.httpobj import urlparse_cached
from scrapy.utils.response import get_meta_refresh
@ -18,6 +15,11 @@ if TYPE_CHECKING:
# typing.Self requires Python 3.11
from typing_extensions import Self
from scrapy import Request, Spider
from scrapy.crawler import Crawler
from scrapy.settings import BaseSettings
logger = logging.getLogger(__name__)

View File

@ -16,12 +16,8 @@ import warnings
from logging import Logger, getLogger
from typing import TYPE_CHECKING, Any, Optional, Tuple, Type, Union
from scrapy.crawler import Crawler
from scrapy.exceptions import NotConfigured, ScrapyDeprecationWarning
from scrapy.http import Response
from scrapy.http.request import Request
from scrapy.settings import BaseSettings, Settings
from scrapy.spiders import Spider
from scrapy.utils.misc import load_object
from scrapy.utils.python import global_object_name
from scrapy.utils.response import response_status_message
@ -30,6 +26,12 @@ if TYPE_CHECKING:
# typing.Self requires Python 3.11
from typing_extensions import Self
from scrapy.crawler import Crawler
from scrapy.http import Response
from scrapy.http.request import Request
from scrapy.spiders import Spider
retry_logger = getLogger(__name__)

View File

@ -10,22 +10,24 @@ import logging
from typing import TYPE_CHECKING, Any, Dict, Optional, Union
from twisted.internet.defer import Deferred, maybeDeferred
from twisted.python.failure import Failure
from scrapy import Spider
from scrapy.crawler import Crawler
from scrapy.exceptions import IgnoreRequest, NotConfigured
from scrapy.http import Request, Response
from scrapy.http.request import NO_CALLBACK
from scrapy.robotstxt import RobotParser
from scrapy.utils.httpobj import urlparse_cached
from scrapy.utils.log import failure_to_exc_info
from scrapy.utils.misc import load_object
if TYPE_CHECKING:
from twisted.python.failure import Failure
# typing.Self requires Python 3.11
from typing_extensions import Self
from scrapy import Spider
from scrapy.crawler import Crawler
from scrapy.robotstxt import RobotParser
logger = logging.getLogger(__name__)

View File

@ -4,11 +4,7 @@ from typing import TYPE_CHECKING, Dict, List, Tuple, Union
from twisted.web import http
from scrapy import Request, Spider
from scrapy.crawler import Crawler
from scrapy.exceptions import NotConfigured
from scrapy.http import Response
from scrapy.statscollectors import StatsCollector
from scrapy.utils.python import global_object_name, to_bytes
from scrapy.utils.request import request_httprepr
@ -16,6 +12,11 @@ if TYPE_CHECKING:
# typing.Self requires Python 3.11
from typing_extensions import Self
from scrapy import Request, Spider
from scrapy.crawler import Crawler
from scrapy.http import Response
from scrapy.statscollectors import StatsCollector
def get_header_size(
headers: Dict[str, Union[List[Union[str, bytes]], Tuple[Union[str, bytes], ...]]]

View File

@ -5,13 +5,14 @@ from __future__ import annotations
from typing import TYPE_CHECKING, Union
from scrapy import Request, Spider, signals
from scrapy.crawler import Crawler
from scrapy.http import Response
if TYPE_CHECKING:
# typing.Self requires Python 3.11
from typing_extensions import Self
from scrapy.crawler import Crawler
from scrapy.http import Response
class UserAgentMiddleware:
"""This middleware allows spiders to override the user_agent"""

View File

@ -4,11 +4,6 @@ import logging
from pathlib import Path
from typing import TYPE_CHECKING, Optional, Set
from twisted.internet.defer import Deferred
from scrapy.http.request import Request
from scrapy.settings import BaseSettings
from scrapy.spiders import Spider
from scrapy.utils.job import job_dir
from scrapy.utils.request import (
RequestFingerprinter,
@ -17,10 +12,15 @@ from scrapy.utils.request import (
)
if TYPE_CHECKING:
from twisted.internet.defer import Deferred
# typing.Self requires Python 3.11
from typing_extensions import Self
from scrapy.crawler import Crawler
from scrapy.http.request import Request
from scrapy.settings import BaseSettings
from scrapy.spiders import Spider
class BaseDupeFilter:

View File

@ -4,12 +4,16 @@ The Extension Manager
See documentation in docs/topics/extensions.rst
"""
from typing import Any, List
from __future__ import annotations
from typing import TYPE_CHECKING, Any, List
from scrapy.middleware import MiddlewareManager
from scrapy.settings import Settings
from scrapy.utils.conf import build_component_list
if TYPE_CHECKING:
from scrapy.settings import Settings
class ExtensionManager(MiddlewareManager):
component_name = "extension"

View File

@ -10,17 +10,19 @@ import logging
from collections import defaultdict
from typing import TYPE_CHECKING, Any, DefaultDict, Dict
from twisted.python.failure import Failure
from scrapy import Request, Spider, signals
from scrapy.crawler import Crawler
from scrapy.exceptions import NotConfigured
from scrapy.http import Response
if TYPE_CHECKING:
from twisted.python.failure import Failure
# typing.Self requires Python 3.11
from typing_extensions import Self
from scrapy.crawler import Crawler
from scrapy.http import Response
logger = logging.getLogger(__name__)

View File

@ -8,13 +8,14 @@ from datetime import datetime, timezone
from typing import TYPE_CHECKING, Any, Optional
from scrapy import Spider, signals
from scrapy.crawler import Crawler
from scrapy.statscollectors import StatsCollector
if TYPE_CHECKING:
# typing.Self requires Python 3.11
from typing_extensions import Self
from scrapy.crawler import Crawler
from scrapy.statscollectors import StatsCollector
class CoreStats:
def __init__(self, stats: StatsCollector):

View File

@ -12,17 +12,20 @@ import sys
import threading
import traceback
from pdb import Pdb
from types import FrameType
from typing import TYPE_CHECKING, Optional
from scrapy.crawler import Crawler
from scrapy.utils.engine import format_engine_status
from scrapy.utils.trackref import format_live_refs
if TYPE_CHECKING:
from types import FrameType
# typing.Self requires Python 3.11
from typing_extensions import Self
from scrapy.crawler import Crawler
logger = logging.getLogger(__name__)

View File

@ -31,18 +31,15 @@ from typing import (
)
from urllib.parse import unquote, urlparse
from twisted.internet import threads
from twisted.internet.defer import Deferred, DeferredList, maybeDeferred
from twisted.python.failure import Failure
from twisted.internet.threads import deferToThread
from w3lib.url import file_uri_to_path
from zope.interface import Interface, implementer
from scrapy import Spider, signals
from scrapy.crawler import Crawler
from scrapy.exceptions import NotConfigured, ScrapyDeprecationWarning
from scrapy.exporters import BaseItemExporter
from scrapy.extensions.postprocessing import PostProcessingManager
from scrapy.settings import BaseSettings, Settings
from scrapy.settings import Settings
from scrapy.utils.boto import is_botocore_available
from scrapy.utils.conf import feed_complete_default_values_from_settings
from scrapy.utils.defer import maybe_deferred_to_future
@ -54,11 +51,14 @@ from scrapy.utils.python import without_none_values
if TYPE_CHECKING:
from _typeshed import OpenBinaryMode
from twisted.python.failure import Failure
# typing.Self requires Python 3.11
from typing_extensions import Self
logger = logging.getLogger(__name__)
from scrapy.crawler import Crawler
from scrapy.exporters import BaseItemExporter
from scrapy.settings import BaseSettings
try:
import boto3 # noqa: F401
@ -67,6 +67,9 @@ try:
except ImportError:
IS_BOTO3_AVAILABLE = False
logger = logging.getLogger(__name__)
UriParamsCallableT = Callable[[Dict[str, Any], Spider], Optional[Dict[str, Any]]]
_StorageT = TypeVar("_StorageT", bound="FeedStorageProtocol")
@ -160,7 +163,7 @@ class BlockingFeedStorage:
return NamedTemporaryFile(prefix="feed-", dir=path)
def store(self, file: IO[bytes]) -> Optional[Deferred]:
return threads.deferToThread(self._store_in_thread, file)
return deferToThread(self._store_in_thread, file)
def _store_in_thread(self, file: IO[bytes]) -> None:
raise NotImplementedError

View File

@ -1,3 +1,5 @@
from __future__ import annotations
import gzip
import logging
import os
@ -13,10 +15,7 @@ from weakref import WeakKeyDictionary
from w3lib.http import headers_dict_to_raw, headers_raw_to_dict
from scrapy.http import Headers, Response
from scrapy.http.request import Request
from scrapy.responsetypes import responsetypes
from scrapy.settings import BaseSettings
from scrapy.spiders import Spider
from scrapy.utils.httpobj import urlparse_cached
from scrapy.utils.project import data_path
from scrapy.utils.python import to_bytes, to_unicode
@ -26,6 +25,10 @@ if TYPE_CHECKING:
# typing.Concatenate requires Python 3.10
from typing_extensions import Concatenate
from scrapy.http.request import Request
from scrapy.settings import BaseSettings
from scrapy.spiders import Spider
logger = logging.getLogger(__name__)

View File

@ -6,14 +6,16 @@ from typing import TYPE_CHECKING, Optional, Tuple, Union
from twisted.internet import task
from scrapy import Spider, signals
from scrapy.crawler import Crawler
from scrapy.exceptions import NotConfigured
from scrapy.statscollectors import StatsCollector
if TYPE_CHECKING:
# typing.Self requires Python 3.11
from typing_extensions import Self
from scrapy.crawler import Crawler
from scrapy.statscollectors import StatsCollector
logger = logging.getLogger(__name__)

View File

@ -10,15 +10,16 @@ import gc
from typing import TYPE_CHECKING
from scrapy import Spider, signals
from scrapy.crawler import Crawler
from scrapy.exceptions import NotConfigured
from scrapy.statscollectors import StatsCollector
from scrapy.utils.trackref import live_refs
if TYPE_CHECKING:
# typing.Self requires Python 3.11
from typing_extensions import Self
from scrapy.crawler import Crawler
from scrapy.statscollectors import StatsCollector
class MemoryDebugger:
def __init__(self, stats: StatsCollector):

View File

@ -16,7 +16,6 @@ from typing import TYPE_CHECKING, List
from twisted.internet import task
from scrapy import signals
from scrapy.crawler import Crawler
from scrapy.exceptions import NotConfigured
from scrapy.mail import MailSender
from scrapy.utils.engine import get_engine_status
@ -25,6 +24,9 @@ if TYPE_CHECKING:
# typing.Self requires Python 3.11
from typing_extensions import Self
from scrapy.crawler import Crawler
logger = logging.getLogger(__name__)

View File

@ -8,15 +8,17 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
from twisted.internet import task
from scrapy import Spider, signals
from scrapy.crawler import Crawler
from scrapy.exceptions import NotConfigured
from scrapy.statscollectors import StatsCollector
from scrapy.utils.serialize import ScrapyJSONEncoder
if TYPE_CHECKING:
# typing.Self requires Python 3.11
from typing_extensions import Self
from scrapy.crawler import Crawler
from scrapy.statscollectors import StatsCollector
logger = logging.getLogger(__name__)

View File

@ -5,7 +5,6 @@ from pathlib import Path
from typing import TYPE_CHECKING, Optional
from scrapy import Spider, signals
from scrapy.crawler import Crawler
from scrapy.exceptions import NotConfigured
from scrapy.utils.job import job_dir
@ -13,6 +12,8 @@ if TYPE_CHECKING:
# typing.Self requires Python 3.11
from typing_extensions import Self
from scrapy.crawler import Crawler
class SpiderState:
"""Store and load spider state during a scraping job"""

View File

@ -8,18 +8,19 @@ from __future__ import annotations
from typing import TYPE_CHECKING, List, Optional
from twisted.internet.defer import Deferred
from scrapy import Spider, signals
from scrapy.crawler import Crawler
from scrapy.exceptions import NotConfigured
from scrapy.mail import MailSender
from scrapy.statscollectors import StatsCollector
if TYPE_CHECKING:
from twisted.internet.defer import Deferred
# typing.Self requires Python 3.11
from typing_extensions import Self
from scrapy.crawler import Crawler
from scrapy.statscollectors import StatsCollector
class StatsMailer:
def __init__(self, stats: StatsCollector, recipients: List[str], mail: MailSender):

View File

@ -26,7 +26,6 @@ except (ImportError, SyntaxError):
TWISTED_CONCH_AVAILABLE = False
from scrapy import signals
from scrapy.crawler import Crawler
from scrapy.exceptions import NotConfigured
from scrapy.utils.decorators import defers
from scrapy.utils.engine import print_engine_status
@ -36,6 +35,10 @@ from scrapy.utils.trackref import print_live_refs
if TYPE_CHECKING:
# typing.Self requires Python 3.11
from typing_extensions import Self
from scrapy.crawler import Crawler
logger = logging.getLogger(__name__)
# signal to update telnet variables

View File

@ -4,15 +4,17 @@ import logging
from typing import TYPE_CHECKING, Optional, Tuple
from scrapy import Request, Spider, signals
from scrapy.core.downloader import Slot
from scrapy.crawler import Crawler
from scrapy.exceptions import NotConfigured
from scrapy.http import Response
if TYPE_CHECKING:
# typing.Self requires Python 3.11
from typing_extensions import Self
from scrapy.core.downloader import Slot
from scrapy.crawler import Crawler
from scrapy.http import Response
logger = logging.getLogger(__name__)

View File

@ -17,8 +17,6 @@ from typing import (
cast,
)
from scrapy import Request
from scrapy.http import Response
from scrapy.utils.httpobj import urlparse_cached
from scrapy.utils.python import to_unicode
@ -26,6 +24,10 @@ if TYPE_CHECKING:
# typing.Self requires Python 3.11
from typing_extensions import Self
from scrapy import Request
from scrapy.http import Response
# Defined in the http.cookiejar module, but undocumented:
# https://github.com/python/cpython/blob/v3.9.0/Lib/http/cookiejar.py#L527
IPV4_RE = re.compile(r"\.\d+$", re.ASCII)

View File

@ -28,13 +28,14 @@ from lxml.html import TextareaElement # nosec
from w3lib.html import strip_html5_whitespace
from scrapy.http.request import Request
from scrapy.http.response.text import TextResponse
from scrapy.utils.python import is_listlike, to_bytes
if TYPE_CHECKING:
# typing.Self requires Python 3.11
from typing_extensions import Self
from scrapy.http.response.text import TextResponse
FormdataVType = Union[str, Iterable[str]]
FormdataKVType = Tuple[str, FormdataVType]

View File

@ -7,7 +7,6 @@ See documentation in docs/topics/request-response.rst
from __future__ import annotations
from ipaddress import IPv4Address, IPv6Address
from typing import (
TYPE_CHECKING,
Any,
@ -26,8 +25,6 @@ from typing import (
)
from urllib.parse import urljoin
from twisted.internet.ssl import Certificate
from scrapy.exceptions import NotSupported
from scrapy.http.headers import Headers
from scrapy.http.request import CookiesT, Request
@ -35,6 +32,10 @@ from scrapy.link import Link
from scrapy.utils.trackref import object_ref
if TYPE_CHECKING:
from ipaddress import IPv4Address, IPv6Address
from twisted.internet.ssl import Certificate
# typing.Self requires Python 3.11
from typing_extensions import Self

View File

@ -35,15 +35,16 @@ from w3lib.encoding import (
)
from w3lib.html import strip_html5_whitespace
from scrapy.http.request import CookiesT, Request
from scrapy.http.response import Response
from scrapy.link import Link
from scrapy.utils.python import memoizemethod_noargs, to_unicode
from scrapy.utils.response import get_base_url
if TYPE_CHECKING:
from scrapy.http.request import CookiesT, Request
from scrapy.selector import Selector, SelectorList
_NONE = object()

View File

@ -2,10 +2,13 @@
Link extractor based on lxml.html
"""
from __future__ import annotations
import logging
import operator
from functools import partial
from typing import (
TYPE_CHECKING,
Any,
Callable,
Iterable,
@ -20,13 +23,10 @@ from typing import (
from urllib.parse import urljoin, urlparse
from lxml import etree # nosec
from lxml.html import HtmlElement # nosec
from parsel.csstranslator import HTMLTranslator
from w3lib.html import strip_html5_whitespace
from w3lib.url import canonicalize_url, safe_url_string
from scrapy import Selector
from scrapy.http import TextResponse
from scrapy.link import Link
from scrapy.linkextractors import IGNORED_EXTENSIONS, _is_valid_url, _matches, re
from scrapy.utils.misc import arg_to_iter, rel_has_nofollow
@ -34,6 +34,13 @@ from scrapy.utils.python import unique as unique_list
from scrapy.utils.response import get_base_url
from scrapy.utils.url import url_has_any_extension, url_is_from_any_domain
if TYPE_CHECKING:
from lxml.html import HtmlElement # nosec
from scrapy import Selector
from scrapy.http import TextResponse
logger = logging.getLogger(__name__)
# from lxml/src/lxml/html/__init__.py

View File

@ -4,14 +4,18 @@ Item Loader
See documentation in docs/topics/loaders.rst
"""
from typing import Any, Optional
from __future__ import annotations
from typing import TYPE_CHECKING, Any, Optional
import itemloaders
from scrapy.http import TextResponse
from scrapy.item import Item
from scrapy.selector import Selector
if TYPE_CHECKING:
from scrapy.http import TextResponse
class ItemLoader(itemloaders.ItemLoader):
"""
@ -91,7 +95,7 @@ class ItemLoader(itemloaders.ItemLoader):
selector: Optional[Selector] = None,
response: Optional[TextResponse] = None,
parent: Optional[itemloaders.ItemLoader] = None,
**context: Any
**context: Any,
):
if selector is None and response is not None:
try:

View File

@ -6,8 +6,9 @@ from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, TypedDict, Union
from twisted.python.failure import Failure
from scrapy import Request, Spider
from scrapy.http import Response
# working around https://github.com/sphinx-doc/sphinx/issues/10400
from scrapy import Request, Spider # noqa: TC001
from scrapy.http import Response # noqa: TC001
from scrapy.utils.request import referer_str
if TYPE_CHECKING:

View File

@ -30,20 +30,22 @@ from typing import (
from twisted import version as twisted_version
from twisted.internet import ssl
from twisted.internet.defer import Deferred
from twisted.python.failure import Failure
from twisted.python.versions import Version
from scrapy.settings import BaseSettings
from scrapy.utils.misc import arg_to_iter
from scrapy.utils.python import to_bytes
if TYPE_CHECKING:
# imports twisted.internet.reactor
from twisted.mail.smtp import ESMTPSenderFactory
from twisted.python.failure import Failure
# typing.Self requires Python 3.11
from typing_extensions import Self
from scrapy.settings import BaseSettings
logger = logging.getLogger(__name__)

View File

@ -17,19 +17,19 @@ from typing import (
cast,
)
from twisted.internet.defer import Deferred
from scrapy import Spider
from scrapy.exceptions import NotConfigured
from scrapy.settings import Settings
from scrapy.utils.defer import process_chain, process_parallel
from scrapy.utils.misc import build_from_crawler, build_from_settings, load_object
if TYPE_CHECKING:
from twisted.internet.defer import Deferred
# typing.Self requires Python 3.11
from typing_extensions import Self
from scrapy import Spider
from scrapy.crawler import Crawler
from scrapy.settings import Settings
logger = logging.getLogger(__name__)

View File

@ -6,16 +6,18 @@ See documentation in docs/item-pipeline.rst
from __future__ import annotations
from typing import Any, List
from typing import TYPE_CHECKING, Any, List
from twisted.internet.defer import Deferred
from scrapy import Spider
from scrapy.middleware import MiddlewareManager
from scrapy.settings import Settings
from scrapy.utils.conf import build_component_list
from scrapy.utils.defer import deferred_f_from_coro_f
if TYPE_CHECKING:
from twisted.internet.defer import Deferred
from scrapy import Spider
from scrapy.settings import Settings
class ItemPipelineManager(MiddlewareManager):
component_name = "item pipeline"

View File

@ -16,7 +16,6 @@ from collections import defaultdict
from contextlib import suppress
from ftplib import FTP
from io import BytesIO
from os import PathLike
from pathlib import Path
from typing import (
IO,
@ -38,11 +37,9 @@ from typing import (
from urllib.parse import urlparse
from itemadapter import ItemAdapter
from twisted.internet import defer, threads
from twisted.internet.defer import Deferred
from twisted.python.failure import Failure
from twisted.internet.defer import Deferred, maybeDeferred
from twisted.internet.threads import deferToThread
from scrapy import Spider
from scrapy.exceptions import IgnoreRequest, NotConfigured
from scrapy.http import Request, Response
from scrapy.http.request import NO_CALLBACK
@ -56,9 +53,15 @@ from scrapy.utils.python import to_bytes
from scrapy.utils.request import referer_str
if TYPE_CHECKING:
from os import PathLike
from twisted.python.failure import Failure
# typing.Self requires Python 3.11
from typing_extensions import Self
from scrapy import Spider
logger = logging.getLogger(__name__)
@ -210,7 +213,7 @@ class S3FilesStore:
key_name = f"{self.prefix}{path}"
return cast(
"Deferred[Dict[str, Any]]",
threads.deferToThread(
deferToThread(
self.s3_client.head_object, Bucket=self.bucket, Key=key_name # type: ignore[attr-defined]
),
)
@ -229,7 +232,7 @@ class S3FilesStore:
extra = self._headers_to_botocore_kwargs(self.HEADERS)
if headers:
extra.update(self._headers_to_botocore_kwargs(headers))
return threads.deferToThread(
return deferToThread(
self.s3_client.put_object, # type: ignore[attr-defined]
Bucket=self.bucket,
Key=key_name,
@ -326,9 +329,7 @@ class GCSFilesStore:
blob_path = self._get_blob_path(path)
return cast(
Deferred[StatInfo],
threads.deferToThread(self.bucket.get_blob, blob_path).addCallback(
_onsuccess
),
deferToThread(self.bucket.get_blob, blob_path).addCallback(_onsuccess),
)
def _get_content_type(self, headers: Optional[Dict[str, str]]) -> str:
@ -351,7 +352,7 @@ class GCSFilesStore:
blob = self.bucket.blob(blob_path)
blob.cache_control = self.CACHE_CONTROL
blob.metadata = {k: str(v) for k, v in (meta or {}).items()}
return threads.deferToThread(
return deferToThread(
blob.upload_from_string,
data=buf.getvalue(),
content_type=self._get_content_type(headers),
@ -388,7 +389,7 @@ class FTPFilesStore:
headers: Optional[Dict[str, str]] = None,
) -> Deferred[Any]:
path = f"{self.basedir}/{path}"
return threads.deferToThread(
return deferToThread(
ftp_store_file,
path=path,
file=buf,
@ -418,7 +419,7 @@ class FTPFilesStore:
except Exception:
return {}
return cast("Deferred[StatInfo]", threads.deferToThread(_stat_file, path))
return cast("Deferred[StatInfo]", deferToThread(_stat_file, path))
class FilesPipeline(MediaPipeline):
@ -553,8 +554,8 @@ class FilesPipeline(MediaPipeline):
}
path = self.file_path(request, info=info, item=item)
# defer.maybeDeferred() overloads don't seem to support a Union[_T, Deferred[_T]] return type
dfd: Deferred[StatInfo] = defer.maybeDeferred(self.store.stat_file, path, info) # type: ignore[arg-type]
# maybeDeferred() overloads don't seem to support a Union[_T, Deferred[_T]] return type
dfd: Deferred[StatInfo] = maybeDeferred(self.store.stat_file, path, info) # type: ignore[arg-type]
dfd2: Deferred[Optional[FileInfo]] = dfd.addCallback(_onsuccess)
dfd2.addErrback(lambda _: None)
dfd2.addErrback(

View File

@ -11,7 +11,6 @@ import hashlib
import warnings
from contextlib import suppress
from io import BytesIO
from os import PathLike
from typing import (
TYPE_CHECKING,
Any,
@ -28,7 +27,6 @@ from typing import (
from itemadapter import ItemAdapter
from scrapy import Spider
from scrapy.exceptions import DropItem, NotConfigured, ScrapyDeprecationWarning
from scrapy.http import Request, Response
from scrapy.http.request import NO_CALLBACK
@ -40,15 +38,20 @@ from scrapy.pipelines.files import (
S3FilesStore,
_md5sum,
)
from scrapy.pipelines.media import FileInfoOrError, MediaPipeline
from scrapy.settings import Settings
from scrapy.utils.python import get_func_args, to_bytes
if TYPE_CHECKING:
# typing.Self requires Python 3.11
from os import PathLike
from PIL import Image
# typing.Self requires Python 3.11
from typing_extensions import Self
from scrapy import Spider
from scrapy.pipelines.media import FileInfoOrError, MediaPipeline
class NoimagesDrop(DropItem):
"""Product with no images exception"""

View File

@ -25,21 +25,23 @@ from typing import (
from twisted.internet.defer import Deferred, DeferredList
from twisted.python.failure import Failure
from scrapy import Spider
from scrapy.crawler import Crawler
from scrapy.http import Response
from scrapy.http.request import NO_CALLBACK, Request
from scrapy.settings import Settings
from scrapy.utils.datatypes import SequenceExclude
from scrapy.utils.defer import defer_result, mustbe_deferred
from scrapy.utils.log import failure_to_exc_info
from scrapy.utils.misc import arg_to_iter
from scrapy.utils.request import RequestFingerprinter
if TYPE_CHECKING:
# typing.Self requires Python 3.11
from typing_extensions import Self
from scrapy import Spider
from scrapy.crawler import Crawler
from scrapy.http import Response
from scrapy.utils.request import RequestFingerprinter
_T = TypeVar("_T")

View File

@ -4,7 +4,6 @@ from typing import TYPE_CHECKING, Any, List, Optional, Sequence, Type
from twisted.internet import defer
from twisted.internet.base import ReactorBase, ThreadedResolver
from twisted.internet.defer import Deferred
from twisted.internet.interfaces import (
IAddress,
IHostnameResolver,
@ -17,6 +16,8 @@ from zope.interface.declarations import implementer, provider
from scrapy.utils.datatypes import LocalCache
if TYPE_CHECKING:
from twisted.internet.defer import Deferred
# typing.Self requires Python 3.11
from typing_extensions import Self

View File

@ -6,7 +6,6 @@ from abc import ABCMeta, abstractmethod
from typing import TYPE_CHECKING, Optional, Union
from warnings import warn
from scrapy import Spider
from scrapy.exceptions import ScrapyDeprecationWarning
from scrapy.utils.python import to_unicode
@ -14,8 +13,10 @@ if TYPE_CHECKING:
# typing.Self requires Python 3.11
from typing_extensions import Self
from scrapy import Spider
from scrapy.crawler import Crawler
logger = logging.getLogger(__name__)

View File

@ -4,7 +4,6 @@ import copy
import json
from importlib import import_module
from pprint import pformat
from types import ModuleType
from typing import (
TYPE_CHECKING,
Any,
@ -27,6 +26,8 @@ from scrapy.settings import default_settings
_SettingsKeyT = Union[bool, float, int, str, None]
if TYPE_CHECKING:
from types import ModuleType
# https://github.com/python/typing/issues/445#issuecomment-1131458824
from _typeshed import SupportsItems

View File

@ -1,10 +1,14 @@
from typing import Any, List, Tuple
from __future__ import annotations
from typing import TYPE_CHECKING, Any, List, Tuple
from pydispatch import dispatcher
from twisted.internet.defer import Deferred
from scrapy.utils import signal as _signal
if TYPE_CHECKING:
from twisted.internet.defer import Deferred
class SignalManager:
def __init__(self, sender: Any = dispatcher.Anonymous):

View File

@ -3,21 +3,23 @@ from __future__ import annotations
import traceback
import warnings
from collections import defaultdict
from types import ModuleType
from typing import TYPE_CHECKING, DefaultDict, Dict, List, Tuple, Type
from zope.interface import implementer
from scrapy import Request, Spider
from scrapy.interfaces import ISpiderLoader
from scrapy.settings import BaseSettings
from scrapy.utils.misc import walk_modules
from scrapy.utils.spider import iter_spider_classes
if TYPE_CHECKING:
from types import ModuleType
# typing.Self requires Python 3.11
from typing_extensions import Self
from scrapy import Request, Spider
from scrapy.settings import BaseSettings
@implementer(ISpiderLoader)
class SpiderLoader:

View File

@ -9,15 +9,17 @@ from __future__ import annotations
import logging
from typing import TYPE_CHECKING, Any, AsyncIterable, Iterable
from scrapy import Spider
from scrapy.crawler import Crawler
from scrapy.http import Request, Response
from scrapy.statscollectors import StatsCollector
if TYPE_CHECKING:
# typing.Self requires Python 3.11
from typing_extensions import Self
from scrapy import Spider
from scrapy.crawler import Crawler
from scrapy.statscollectors import StatsCollector
logger = logging.getLogger(__name__)

View File

@ -9,16 +9,18 @@ from __future__ import annotations
import logging
from typing import TYPE_CHECKING, Any, Iterable, List, Optional
from scrapy import Spider
from scrapy.crawler import Crawler
from scrapy.exceptions import IgnoreRequest
from scrapy.http import Response
from scrapy.settings import BaseSettings
if TYPE_CHECKING:
# typing.Self requires Python 3.11
from typing_extensions import Self
from scrapy import Spider
from scrapy.crawler import Crawler
from scrapy.http import Response
from scrapy.settings import BaseSettings
logger = logging.getLogger(__name__)

View File

@ -12,10 +12,8 @@ import warnings
from typing import TYPE_CHECKING, Any, AsyncIterable, Iterable, Set
from scrapy import Spider, signals
from scrapy.crawler import Crawler
from scrapy.exceptions import ScrapyDeprecationWarning
from scrapy.http import Request, Response
from scrapy.statscollectors import StatsCollector
from scrapy.utils.httpobj import urlparse_cached
warnings.warn(
@ -28,6 +26,10 @@ if TYPE_CHECKING:
# typing.Self requires Python 3.11
from typing_extensions import Self
from scrapy.crawler import Crawler
from scrapy.statscollectors import StatsCollector
logger = logging.getLogger(__name__)

View File

@ -23,10 +23,8 @@ from urllib.parse import urlparse
from w3lib.url import safe_url_string
from scrapy import Spider, signals
from scrapy.crawler import Crawler
from scrapy.exceptions import NotConfigured
from scrapy.http import Request, Response
from scrapy.settings import BaseSettings
from scrapy.utils.misc import load_object
from scrapy.utils.python import to_unicode
from scrapy.utils.url import strip_url
@ -35,6 +33,10 @@ if TYPE_CHECKING:
# typing.Self requires Python 3.11
from typing_extensions import Self
from scrapy.crawler import Crawler
from scrapy.settings import BaseSettings
LOCAL_SCHEMES: Tuple[str, ...] = (
"about",
"blob",

View File

@ -9,15 +9,17 @@ from __future__ import annotations
import logging
from typing import TYPE_CHECKING, Any, AsyncIterable, Iterable
from scrapy import Spider
from scrapy.exceptions import NotConfigured
from scrapy.http import Request, Response
from scrapy.settings import BaseSettings
if TYPE_CHECKING:
# typing.Self requires Python 3.11
from typing_extensions import Self
from scrapy import Spider
from scrapy.settings import BaseSettings
logger = logging.getLogger(__name__)

View File

@ -1,10 +1,14 @@
from typing import Any, Iterable, Optional, cast
from __future__ import annotations
from typing import TYPE_CHECKING, Any, Iterable, Optional, cast
from scrapy import Request
from scrapy.http import Response
from scrapy.spiders import Spider
from scrapy.utils.spider import iterate_spider_output
if TYPE_CHECKING:
from scrapy.http import Response
class InitSpider(Spider):
"""Base Spider with initialization facilities"""

View File

@ -6,20 +6,22 @@ from __future__ import annotations
import marshal
import pickle # nosec
from os import PathLike
from pathlib import Path
from typing import TYPE_CHECKING, Any, Callable, Optional, Type, Union
from queuelib import queue
from scrapy import Request
from scrapy.crawler import Crawler
from scrapy.utils.request import request_from_dict
if TYPE_CHECKING:
from os import PathLike
# typing.Self requires Python 3.11
from typing_extensions import Self
from scrapy import Request
from scrapy.crawler import Crawler
def _with_mkdir(queue_class: Type[queue.BaseQueue]) -> Type[queue.BaseQueue]:
class DirectoriesCreated(queue_class): # type: ignore[valid-type,misc]

View File

@ -2,15 +2,17 @@
Scrapy extension for collecting scraping stats
"""
from __future__ import annotations
import logging
import pprint
from typing import TYPE_CHECKING, Any, Dict, Optional
from scrapy import Spider
if TYPE_CHECKING:
from scrapy import Spider
from scrapy.crawler import Crawler
logger = logging.getLogger(__name__)
@ -18,7 +20,7 @@ StatsT = Dict[str, Any]
class StatsCollector:
def __init__(self, crawler: "Crawler"):
def __init__(self, crawler: Crawler):
self._dump: bool = crawler.settings.getbool("STATS_DUMP")
self._stats: StatsT = {}
@ -67,7 +69,7 @@ class StatsCollector:
class MemoryStatsCollector(StatsCollector):
def __init__(self, crawler: "Crawler"):
def __init__(self, crawler: Crawler):
super().__init__(crawler)
self.spider_stats: Dict[str, StatsT] = {}

View File

@ -4,8 +4,8 @@ import warnings
from functools import wraps
from typing import TYPE_CHECKING, Any, Callable, TypeVar
from twisted.internet import defer, threads
from twisted.internet.defer import Deferred
from twisted.internet.defer import Deferred, maybeDeferred
from twisted.internet.threads import deferToThread
from scrapy.exceptions import ScrapyDeprecationWarning
@ -48,7 +48,7 @@ def defers(func: Callable[_P, _T]) -> Callable[_P, Deferred[_T]]:
@wraps(func)
def wrapped(*a: _P.args, **kw: _P.kwargs) -> Deferred[_T]:
return defer.maybeDeferred(func, *a, **kw)
return maybeDeferred(func, *a, **kw)
return wrapped
@ -60,6 +60,6 @@ def inthread(func: Callable[_P, _T]) -> Callable[_P, Deferred[_T]]:
@wraps(func)
def wrapped(*a: _P.args, **kw: _P.kwargs) -> Deferred[_T]:
return threads.deferToThread(func, *a, **kw)
return deferToThread(func, *a, **kw)
return wrapped

View File

@ -34,12 +34,13 @@ from twisted.internet import defer
from twisted.internet.defer import Deferred, DeferredList, ensureDeferred
from twisted.internet.task import Cooperator
from twisted.python import failure
from twisted.python.failure import Failure
from scrapy.exceptions import IgnoreRequest, ScrapyDeprecationWarning
from scrapy.utils.reactor import _get_asyncio_event_loop, is_asyncio_reactor_installed
if TYPE_CHECKING:
from twisted.python.failure import Failure
# typing.Concatenate and typing.ParamSpec require Python 3.10
from typing_extensions import Concatenate, ParamSpec

View File

@ -4,9 +4,10 @@ from __future__ import annotations
# used in global tests code
from time import time # noqa: F401
from typing import Any, List, Tuple
from typing import TYPE_CHECKING, Any, List, Tuple
from scrapy.core.engine import ExecutionEngine
if TYPE_CHECKING:
from scrapy.core.engine import ExecutionEngine
def get_engine_status(engine: ExecutionEngine) -> List[Tuple[str, Any]]:

View File

@ -1,11 +1,15 @@
from __future__ import annotations
import struct
from gzip import GzipFile
from io import BytesIO
from scrapy.http import Response
from typing import TYPE_CHECKING
from ._compression import _CHUNK_SIZE, _DecompressionMaxSizeExceeded
if TYPE_CHECKING:
from scrapy.http import Response
def gunzip(data: bytes, *, max_size: int = 0) -> bytes:
"""Gunzip the given data and return as much data as possible.

View File

@ -1,12 +1,16 @@
"""Helper functions for scrapy.http objects (Request, Response)"""
from typing import Union
from __future__ import annotations
from typing import TYPE_CHECKING, Union
from urllib.parse import ParseResult, urlparse
from weakref import WeakKeyDictionary
from scrapy.http import Request, Response
if TYPE_CHECKING:
from scrapy.http import Request, Response
_urlparse_cache: "WeakKeyDictionary[Union[Request, Response], ParseResult]" = (
_urlparse_cache: WeakKeyDictionary[Union[Request, Response], ParseResult] = (
WeakKeyDictionary()
)

View File

@ -1,7 +1,10 @@
from pathlib import Path
from typing import Optional
from __future__ import annotations
from scrapy.settings import BaseSettings
from pathlib import Path
from typing import TYPE_CHECKING, Optional
if TYPE_CHECKING:
from scrapy.settings import BaseSettings
def job_dir(settings: BaseSettings) -> Optional[str]:

View File

@ -21,12 +21,13 @@ from twisted.python import log as twisted_log
from twisted.python.failure import Failure
import scrapy
from scrapy.logformatter import LogFormatterResult
from scrapy.settings import Settings, _SettingsKeyT
from scrapy.utils.versions import scrapy_components_versions
if TYPE_CHECKING:
from scrapy.crawler import Crawler
from scrapy.logformatter import LogFormatterResult
logger = logging.getLogger(__name__)

View File

@ -13,7 +13,6 @@ from contextlib import contextmanager
from functools import partial
from importlib import import_module
from pkgutil import iter_modules
from types import ModuleType
from typing import (
IO,
TYPE_CHECKING,
@ -35,10 +34,13 @@ from scrapy.item import Item
from scrapy.utils.datatypes import LocalWeakReferencedCache
if TYPE_CHECKING:
from types import ModuleType
from scrapy import Spider
from scrapy.crawler import Crawler
from scrapy.settings import BaseSettings
_ITERABLE_SINGLE_VALUES = dict, Item, str, bytes
T = TypeVar("T")

View File

@ -1,7 +1,8 @@
from __future__ import annotations
import os
import warnings
from importlib import import_module
from os import PathLike
from pathlib import Path
from typing import Union
@ -46,7 +47,7 @@ def project_data_dir(project: str = "default") -> str:
return str(d)
def data_path(path: Union[str, PathLike], createdir: bool = False) -> str:
def data_path(path: Union[str, os.PathLike[str]], createdir: bool = False) -> str:
"""
Return the given path joined with the .scrapy data directory.
If given an absolute path, return it unmodified.

View File

@ -2,7 +2,6 @@ from __future__ import annotations
import asyncio
import sys
from asyncio import AbstractEventLoop, AbstractEventLoopPolicy
from contextlib import suppress
from typing import (
TYPE_CHECKING,
@ -20,13 +19,16 @@ from warnings import catch_warnings, filterwarnings, warn
from twisted.internet import asyncioreactor, error
from twisted.internet.base import DelayedCall
from twisted.internet.protocol import ServerFactory
from twisted.internet.tcp import Port
from scrapy.exceptions import ScrapyDeprecationWarning
from scrapy.utils.misc import load_object
if TYPE_CHECKING:
from asyncio import AbstractEventLoop, AbstractEventLoopPolicy
from twisted.internet.protocol import ServerFactory
from twisted.internet.tcp import Port
# typing.ParamSpec requires Python 3.10
from typing_extensions import ParamSpec

View File

@ -2,7 +2,6 @@ from __future__ import annotations
import inspect
import logging
from types import CoroutineType, ModuleType
from typing import (
TYPE_CHECKING,
Any,
@ -16,16 +15,19 @@ from typing import (
overload,
)
from twisted.internet.defer import Deferred
from scrapy import Request
from scrapy.spiders import Spider
from scrapy.utils.defer import deferred_from_coro
from scrapy.utils.misc import arg_to_iter
if TYPE_CHECKING:
from types import CoroutineType, ModuleType
from twisted.internet.defer import Deferred
from scrapy import Request
from scrapy.spiderloader import SpiderLoader
logger = logging.getLogger(__name__)
_T = TypeVar("_T")

View File

@ -1,12 +1,16 @@
from typing import Any, Optional
from __future__ import annotations
from typing import TYPE_CHECKING, Any, Optional
import OpenSSL._util as pyOpenSSLutil
import OpenSSL.SSL
import OpenSSL.version
from OpenSSL.crypto import X509Name
from scrapy.utils.python import to_unicode
if TYPE_CHECKING:
from OpenSSL.crypto import X509Name
def ffi_buf_to_string(buf: Any) -> str:
return to_unicode(pyOpenSSLutil.ffi.string(buf))

View File

@ -1,10 +1,14 @@
"""Helper functions for working with templates"""
from __future__ import annotations
import re
import string
from os import PathLike
from pathlib import Path
from typing import Any, Union
from typing import TYPE_CHECKING, Any, Union
if TYPE_CHECKING:
from os import PathLike
def render_templatefile(path: Union[str, PathLike], **kwargs: Any) -> None:

View File

@ -2,21 +2,36 @@
This module contains some assorted functions used in tests
"""
from __future__ import annotations
import asyncio
import os
from importlib import import_module
from pathlib import Path
from posixpath import split
from typing import Any, Awaitable, Dict, List, Optional, Tuple, Type, TypeVar
from typing import (
TYPE_CHECKING,
Any,
Awaitable,
Dict,
List,
Optional,
Tuple,
Type,
TypeVar,
)
from unittest import TestCase, mock
from twisted.internet.defer import Deferred
from twisted.trial.unittest import SkipTest
from scrapy import Spider
from scrapy.crawler import Crawler
from scrapy.utils.boto import is_botocore_available
if TYPE_CHECKING:
from twisted.internet.defer import Deferred
_T = TypeVar("_T")

Some files were not shown because too many files have changed in this diff Show More