mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-06 16:51:46 +00:00
Add flake8-type-checking. (#6413)
This commit is contained in:
parent
326e323e11
commit
d08f559600
4
.flake8
4
.flake8
@ -1,6 +1,7 @@
|
||||
[flake8]
|
||||
|
||||
max-line-length = 119
|
||||
extend-select = TC, TC1
|
||||
ignore =
|
||||
# black disagrees with flake8 about these
|
||||
E203, E501, E701, E704, W503
|
||||
@ -58,6 +59,9 @@ ignore =
|
||||
D402
|
||||
# First word of the first line should be properly capitalized
|
||||
D403
|
||||
|
||||
# Annotation in typing.cast() should be a string literal
|
||||
TC006
|
||||
exclude =
|
||||
docs/conf.py
|
||||
|
||||
|
@ -14,6 +14,7 @@ repos:
|
||||
- flake8-debugger
|
||||
- flake8-docstrings
|
||||
- flake8-string-format
|
||||
- flake8-type-checking
|
||||
- repo: https://github.com/psf/black.git
|
||||
rev: 24.2.0
|
||||
hooks:
|
||||
|
@ -1,13 +1,16 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import TYPE_CHECKING, Any, List
|
||||
|
||||
from scrapy.exceptions import NotConfigured
|
||||
from scrapy.settings import Settings
|
||||
from scrapy.utils.conf import build_component_list
|
||||
from scrapy.utils.misc import build_from_crawler, load_object
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.settings import Settings
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@ -15,8 +18,8 @@ logger = logging.getLogger(__name__)
|
||||
class AddonManager:
|
||||
"""This class facilitates loading and storing :ref:`topics-addons`."""
|
||||
|
||||
def __init__(self, crawler: "Crawler") -> None:
|
||||
self.crawler: "Crawler" = crawler
|
||||
def __init__(self, crawler: Crawler) -> None:
|
||||
self.crawler: Crawler = crawler
|
||||
self.addons: List[Any] = []
|
||||
|
||||
def load_settings(self, settings: Settings) -> None:
|
||||
|
@ -12,7 +12,6 @@ import scrapy
|
||||
from scrapy.commands import BaseRunSpiderCommand, ScrapyCommand, ScrapyHelpFormatter
|
||||
from scrapy.crawler import CrawlerProcess
|
||||
from scrapy.exceptions import UsageError
|
||||
from scrapy.settings import BaseSettings, Settings
|
||||
from scrapy.utils.misc import walk_modules
|
||||
from scrapy.utils.project import get_project_settings, inside_project
|
||||
from scrapy.utils.python import garbage_collect
|
||||
@ -21,6 +20,8 @@ if TYPE_CHECKING:
|
||||
# typing.ParamSpec requires Python 3.10
|
||||
from typing_extensions import ParamSpec
|
||||
|
||||
from scrapy.settings import BaseSettings, Settings
|
||||
|
||||
_P = ParamSpec("_P")
|
||||
|
||||
|
||||
|
@ -2,18 +2,22 @@
|
||||
Base class for Scrapy commands
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import builtins
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Iterable, List, Optional
|
||||
from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional
|
||||
|
||||
from twisted.python import failure
|
||||
|
||||
from scrapy.crawler import Crawler, CrawlerProcess
|
||||
from scrapy.exceptions import UsageError
|
||||
from scrapy.utils.conf import arglist_to_dict, feed_process_params_from_cli
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from scrapy.crawler import Crawler, CrawlerProcess
|
||||
|
||||
|
||||
class ScrapyCommand:
|
||||
requires_project: bool = False
|
||||
|
@ -1,16 +1,20 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import subprocess # nosec
|
||||
import sys
|
||||
import time
|
||||
from typing import Any, Iterable, List
|
||||
from typing import TYPE_CHECKING, Any, Iterable, List
|
||||
from urllib.parse import urlencode
|
||||
|
||||
import scrapy
|
||||
from scrapy import Request
|
||||
from scrapy.commands import ScrapyCommand
|
||||
from scrapy.http import Response, TextResponse
|
||||
from scrapy.linkextractors import LinkExtractor
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from scrapy import Request
|
||||
|
||||
|
||||
class Command(ScrapyCommand):
|
||||
default_settings = {
|
||||
|
@ -1,11 +1,15 @@
|
||||
import argparse
|
||||
from typing import List, cast
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, List, cast
|
||||
|
||||
from twisted.python.failure import Failure
|
||||
|
||||
from scrapy.commands import BaseRunSpiderCommand
|
||||
from scrapy.exceptions import UsageError
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import argparse
|
||||
|
||||
|
||||
class Command(BaseRunSpiderCommand):
|
||||
requires_project = True
|
||||
|
@ -1,6 +1,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from argparse import ArgumentParser, Namespace
|
||||
from typing import Dict, List, Type
|
||||
from typing import TYPE_CHECKING, Dict, List, Type
|
||||
|
||||
from w3lib.url import is_url
|
||||
|
||||
@ -11,6 +12,9 @@ from scrapy.http import Request, Response
|
||||
from scrapy.utils.datatypes import SequenceExclude
|
||||
from scrapy.utils.spider import DefaultSpider, spidercls_for_request
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from argparse import ArgumentParser, Namespace
|
||||
|
||||
|
||||
class Command(ScrapyCommand):
|
||||
requires_project = False
|
||||
|
@ -1,8 +1,12 @@
|
||||
import argparse
|
||||
from typing import List
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, List
|
||||
|
||||
from scrapy.commands import ScrapyCommand
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import argparse
|
||||
|
||||
|
||||
class Command(ScrapyCommand):
|
||||
requires_project = True
|
||||
|
@ -6,6 +6,7 @@ import inspect
|
||||
import json
|
||||
import logging
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
AsyncGenerator,
|
||||
Callable,
|
||||
@ -22,13 +23,11 @@ from typing import (
|
||||
|
||||
from itemadapter import ItemAdapter, is_item
|
||||
from twisted.internet.defer import Deferred, maybeDeferred
|
||||
from twisted.python.failure import Failure
|
||||
from w3lib.url import is_url
|
||||
|
||||
from scrapy.commands import BaseRunSpiderCommand
|
||||
from scrapy.exceptions import UsageError
|
||||
from scrapy.http import Request, Response
|
||||
from scrapy.spiders import Spider
|
||||
from scrapy.utils import display
|
||||
from scrapy.utils.asyncgen import collect_asyncgen
|
||||
from scrapy.utils.defer import aiter_errback, deferred_from_coro
|
||||
@ -36,6 +35,12 @@ from scrapy.utils.log import failure_to_exc_info
|
||||
from scrapy.utils.misc import arg_to_iter
|
||||
from scrapy.utils.spider import spidercls_for_request
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from twisted.python.failure import Failure
|
||||
|
||||
from scrapy.spiders import Spider
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_T = TypeVar("_T")
|
||||
|
@ -1,17 +1,21 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
from importlib import import_module
|
||||
from os import PathLike
|
||||
from pathlib import Path
|
||||
from types import ModuleType
|
||||
from typing import List, Union
|
||||
from typing import TYPE_CHECKING, List, Union
|
||||
|
||||
from scrapy.commands import BaseRunSpiderCommand
|
||||
from scrapy.exceptions import UsageError
|
||||
from scrapy.utils.spider import iter_spider_classes
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from os import PathLike
|
||||
from types import ModuleType
|
||||
|
||||
def _import_file(filepath: Union[str, PathLike]) -> ModuleType:
|
||||
|
||||
def _import_file(filepath: Union[str, PathLike[str]]) -> ModuleType:
|
||||
abspath = Path(filepath).resolve()
|
||||
if abspath.suffix not in (".py", ".pyw"):
|
||||
raise ValueError(f"Not a Python source file: {abspath}")
|
||||
|
@ -4,9 +4,10 @@ Scrapy Shell
|
||||
See documentation in docs/topics/shell.rst
|
||||
"""
|
||||
|
||||
from argparse import ArgumentParser, Namespace
|
||||
from __future__ import annotations
|
||||
|
||||
from threading import Thread
|
||||
from typing import Any, Dict, List, Type
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Type
|
||||
|
||||
from scrapy import Spider
|
||||
from scrapy.commands import ScrapyCommand
|
||||
@ -15,6 +16,9 @@ from scrapy.shell import Shell
|
||||
from scrapy.utils.spider import DefaultSpider, spidercls_for_request
|
||||
from scrapy.utils.url import guess_scheme
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from argparse import ArgumentParser, Namespace
|
||||
|
||||
|
||||
class Command(ScrapyCommand):
|
||||
requires_project = False
|
||||
|
@ -1,9 +1,12 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import sys
|
||||
from functools import wraps
|
||||
from inspect import getmembers
|
||||
from types import CoroutineType
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
AsyncGenerator,
|
||||
Callable,
|
||||
@ -16,13 +19,15 @@ from typing import (
|
||||
)
|
||||
from unittest import TestCase, TestResult
|
||||
|
||||
from twisted.python.failure import Failure
|
||||
|
||||
from scrapy import Spider
|
||||
from scrapy.http import Request, Response
|
||||
from scrapy.utils.python import get_spec
|
||||
from scrapy.utils.spider import iterate_spider_output
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from twisted.python.failure import Failure
|
||||
|
||||
from scrapy import Spider
|
||||
|
||||
|
||||
class Contract:
|
||||
"""Abstract class for contracts"""
|
||||
|
@ -25,15 +25,16 @@ from scrapy import Request, Spider, signals
|
||||
from scrapy.core.downloader.handlers import DownloadHandlers
|
||||
from scrapy.core.downloader.middleware import DownloaderMiddlewareManager
|
||||
from scrapy.exceptions import ScrapyDeprecationWarning
|
||||
from scrapy.http import Response
|
||||
from scrapy.resolver import dnscache
|
||||
from scrapy.settings import BaseSettings
|
||||
from scrapy.signalmanager import SignalManager
|
||||
from scrapy.utils.defer import mustbe_deferred
|
||||
from scrapy.utils.httpobj import urlparse_cached
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.http import Response
|
||||
from scrapy.settings import BaseSettings
|
||||
|
||||
|
||||
_T = TypeVar("_T")
|
||||
|
||||
|
@ -21,8 +21,6 @@ from scrapy.core.downloader.tls import (
|
||||
ScrapyClientTLSOptions,
|
||||
openssl_methods,
|
||||
)
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.settings import BaseSettings
|
||||
from scrapy.utils.misc import build_from_crawler, load_object
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@ -31,6 +29,9 @@ if TYPE_CHECKING:
|
||||
# typing.Self requires Python 3.11
|
||||
from typing_extensions import Self
|
||||
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.settings import BaseSettings
|
||||
|
||||
|
||||
@implementer(IPolicyForHTTPS)
|
||||
class ScrapyClientContextFactory(BrowserLikePolicyForHTTPS):
|
||||
|
@ -17,17 +17,19 @@ from typing import (
|
||||
)
|
||||
|
||||
from twisted.internet import defer
|
||||
from twisted.internet.defer import Deferred
|
||||
|
||||
from scrapy import Request, Spider, signals
|
||||
from scrapy.exceptions import NotConfigured, NotSupported
|
||||
from scrapy.http import Response
|
||||
from scrapy.utils.httpobj import urlparse_cached
|
||||
from scrapy.utils.misc import build_from_crawler, load_object
|
||||
from scrapy.utils.python import without_none_values
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from twisted.internet.defer import Deferred
|
||||
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.http import Response
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -1,12 +1,16 @@
|
||||
from typing import Any, Dict
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Any, Dict
|
||||
|
||||
from w3lib.url import parse_data_uri
|
||||
|
||||
from scrapy import Request, Spider
|
||||
from scrapy.http import Response, TextResponse
|
||||
from scrapy.responsetypes import responsetypes
|
||||
from scrapy.utils.decorators import defers
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from scrapy import Request, Spider
|
||||
|
||||
|
||||
class DataURIDownloadHandler:
|
||||
lazy = False
|
||||
|
@ -1,12 +1,17 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from w3lib.url import file_uri_to_path
|
||||
|
||||
from scrapy import Request, Spider
|
||||
from scrapy.http import Response
|
||||
from scrapy.responsetypes import responsetypes
|
||||
from scrapy.utils.decorators import defers
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from scrapy import Request, Spider
|
||||
from scrapy.http import Response
|
||||
|
||||
|
||||
class FileDownloadHandler:
|
||||
lazy = False
|
||||
|
@ -35,23 +35,25 @@ from io import BytesIO
|
||||
from typing import TYPE_CHECKING, Any, BinaryIO, Dict, Optional
|
||||
from urllib.parse import unquote
|
||||
|
||||
from twisted.internet.defer import Deferred
|
||||
from twisted.internet.protocol import ClientCreator, Protocol
|
||||
from twisted.protocols.ftp import CommandFailed, FTPClient
|
||||
from twisted.python.failure import Failure
|
||||
|
||||
from scrapy import Request, Spider
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.http import Response
|
||||
from scrapy.responsetypes import responsetypes
|
||||
from scrapy.settings import BaseSettings
|
||||
from scrapy.utils.httpobj import urlparse_cached
|
||||
from scrapy.utils.python import to_bytes
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from twisted.internet.defer import Deferred
|
||||
from twisted.python.failure import Failure
|
||||
|
||||
# typing.Self requires Python 3.11
|
||||
from typing_extensions import Self
|
||||
|
||||
from scrapy import Request, Spider
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.settings import BaseSettings
|
||||
|
||||
|
||||
class ReceivedDataProtocol(Protocol):
|
||||
def __init__(self, filename: Optional[str] = None):
|
||||
|
@ -5,21 +5,21 @@ from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Type
|
||||
|
||||
from twisted.internet.defer import Deferred
|
||||
|
||||
from scrapy import Request, Spider
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.http import Response
|
||||
from scrapy.settings import BaseSettings
|
||||
from scrapy.utils.misc import build_from_crawler, load_object
|
||||
from scrapy.utils.python import to_unicode
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from twisted.internet.defer import Deferred
|
||||
|
||||
# typing.Self requires Python 3.11
|
||||
from typing_extensions import Self
|
||||
|
||||
from scrapy import Request, Spider
|
||||
from scrapy.core.downloader.contextfactory import ScrapyClientContextFactory
|
||||
from scrapy.core.downloader.webclient import ScrapyHTTPClientFactory
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.http import Response
|
||||
from scrapy.settings import BaseSettings
|
||||
|
||||
|
||||
class HTTP10DownloadHandler:
|
||||
|
@ -12,11 +12,9 @@ from typing import TYPE_CHECKING, Any, List, Optional, Tuple, TypedDict, TypeVar
|
||||
from urllib.parse import urldefrag, urlunparse
|
||||
|
||||
from twisted.internet import ssl
|
||||
from twisted.internet.base import ReactorBase
|
||||
from twisted.internet.defer import CancelledError, Deferred, succeed
|
||||
from twisted.internet.endpoints import TCP4ClientEndpoint
|
||||
from twisted.internet.error import TimeoutError
|
||||
from twisted.internet.interfaces import IConsumer
|
||||
from twisted.internet.protocol import Factory, Protocol, connectionDone
|
||||
from twisted.python.failure import Failure
|
||||
from twisted.web.client import URI, Agent, HTTPConnectionPool
|
||||
@ -30,17 +28,22 @@ from zope.interface import implementer
|
||||
from scrapy import Request, Spider, signals
|
||||
from scrapy.core.downloader.contextfactory import load_context_factory_from_settings
|
||||
from scrapy.core.downloader.webclient import _parse
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.exceptions import StopDownload
|
||||
from scrapy.http import Headers, Response
|
||||
from scrapy.responsetypes import responsetypes
|
||||
from scrapy.settings import BaseSettings
|
||||
from scrapy.utils.python import to_bytes, to_unicode
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from twisted.internet.base import ReactorBase
|
||||
from twisted.internet.interfaces import IConsumer
|
||||
|
||||
# typing.NotRequired and typing.Self require Python 3.11
|
||||
from typing_extensions import NotRequired, Self
|
||||
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.settings import BaseSettings
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_T = TypeVar("_T")
|
||||
|
@ -4,25 +4,27 @@ from time import time
|
||||
from typing import TYPE_CHECKING, Optional
|
||||
from urllib.parse import urldefrag
|
||||
|
||||
from twisted.internet.base import DelayedCall
|
||||
from twisted.internet.defer import Deferred
|
||||
from twisted.internet.error import TimeoutError
|
||||
from twisted.web.client import URI
|
||||
from twisted.web.iweb import IPolicyForHTTPS
|
||||
|
||||
from scrapy.core.downloader.contextfactory import load_context_factory_from_settings
|
||||
from scrapy.core.downloader.webclient import _parse
|
||||
from scrapy.core.http2.agent import H2Agent, H2ConnectionPool, ScrapyProxyH2Agent
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.http import Request, Response
|
||||
from scrapy.settings import Settings
|
||||
from scrapy.spiders import Spider
|
||||
from scrapy.utils.python import to_bytes
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from twisted.internet.base import DelayedCall
|
||||
from twisted.internet.defer import Deferred
|
||||
from twisted.web.iweb import IPolicyForHTTPS
|
||||
|
||||
# typing.Self requires Python 3.11
|
||||
from typing_extensions import Self
|
||||
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.http import Request, Response
|
||||
from scrapy.settings import Settings
|
||||
from scrapy.spiders import Spider
|
||||
|
||||
|
||||
class H2DownloadHandler:
|
||||
def __init__(self, settings: Settings, crawler: Crawler):
|
||||
|
@ -2,22 +2,23 @@ from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Any, Optional, Type
|
||||
|
||||
from twisted.internet.defer import Deferred
|
||||
|
||||
from scrapy import Request, Spider
|
||||
from scrapy.core.downloader.handlers.http import HTTPDownloadHandler
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.exceptions import NotConfigured
|
||||
from scrapy.http import Response
|
||||
from scrapy.settings import BaseSettings
|
||||
from scrapy.utils.boto import is_botocore_available
|
||||
from scrapy.utils.httpobj import urlparse_cached
|
||||
from scrapy.utils.misc import build_from_crawler
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from twisted.internet.defer import Deferred
|
||||
|
||||
# typing.Self requires Python 3.11
|
||||
from typing_extensions import Self
|
||||
|
||||
from scrapy import Request, Spider
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.http import Response
|
||||
from scrapy.settings import BaseSettings
|
||||
|
||||
|
||||
class S3DownloadHandler:
|
||||
def __init__(
|
||||
|
@ -6,19 +6,22 @@ See documentation in docs/topics/downloader-middleware.rst
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Callable, Generator, List, Union, cast
|
||||
from typing import TYPE_CHECKING, Any, Callable, Generator, List, Union, cast
|
||||
|
||||
from twisted.internet.defer import Deferred, inlineCallbacks
|
||||
from twisted.python.failure import Failure
|
||||
|
||||
from scrapy import Spider
|
||||
from scrapy.exceptions import _InvalidOutput
|
||||
from scrapy.http import Request, Response
|
||||
from scrapy.middleware import MiddlewareManager
|
||||
from scrapy.settings import BaseSettings
|
||||
from scrapy.utils.conf import build_component_list
|
||||
from scrapy.utils.defer import deferred_from_coro, mustbe_deferred
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from twisted.python.failure import Failure
|
||||
|
||||
from scrapy import Spider
|
||||
from scrapy.settings import BaseSettings
|
||||
|
||||
|
||||
class DownloaderMiddlewareManager(MiddlewareManager):
|
||||
component_name = "downloader middleware"
|
||||
|
@ -1,18 +1,22 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from time import time
|
||||
from typing import Optional, Tuple
|
||||
from typing import TYPE_CHECKING, Optional, Tuple
|
||||
from urllib.parse import ParseResult, urldefrag, urlparse, urlunparse
|
||||
|
||||
from twisted.internet import defer
|
||||
from twisted.internet.protocol import ClientFactory
|
||||
from twisted.web.http import HTTPClient
|
||||
|
||||
from scrapy import Request
|
||||
from scrapy.http import Headers, Response
|
||||
from scrapy.responsetypes import responsetypes
|
||||
from scrapy.utils.httpobj import urlparse_cached
|
||||
from scrapy.utils.python import to_bytes, to_unicode
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from scrapy import Request
|
||||
|
||||
|
||||
def _parsed_url_args(parsed: ParseResult) -> Tuple[bytes, bytes, bytes, int, bytes]:
|
||||
# Assume parsed is urlparse-d from Request.url,
|
||||
|
@ -34,9 +34,8 @@ from scrapy.core.scraper import Scraper
|
||||
from scrapy.exceptions import CloseSpider, DontCloseSpider, IgnoreRequest
|
||||
from scrapy.http import Request, Response
|
||||
from scrapy.logformatter import LogFormatter
|
||||
from scrapy.settings import BaseSettings, Settings
|
||||
from scrapy.settings import Settings
|
||||
from scrapy.signalmanager import SignalManager
|
||||
from scrapy.spiders import Spider
|
||||
from scrapy.utils.log import failure_to_exc_info, logformatter_adapter
|
||||
from scrapy.utils.misc import build_from_crawler, load_object
|
||||
from scrapy.utils.python import global_object_name
|
||||
@ -46,6 +45,9 @@ if TYPE_CHECKING:
|
||||
from scrapy.core.scheduler import BaseScheduler
|
||||
from scrapy.core.scraper import _HandleOutputDeferred
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.settings import BaseSettings
|
||||
from scrapy.spiders import Spider
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -1,10 +1,10 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from collections import deque
|
||||
from typing import Deque, Dict, List, Optional, Tuple
|
||||
from typing import TYPE_CHECKING, Deque, Dict, List, Optional, Tuple
|
||||
|
||||
from twisted.internet import defer
|
||||
from twisted.internet.base import ReactorBase
|
||||
from twisted.internet.defer import Deferred
|
||||
from twisted.internet.endpoints import HostnameEndpoint
|
||||
from twisted.python.failure import Failure
|
||||
from twisted.web.client import (
|
||||
URI,
|
||||
@ -16,9 +16,15 @@ from twisted.web.error import SchemeNotSupported
|
||||
|
||||
from scrapy.core.downloader.contextfactory import AcceptableProtocolsContextFactory
|
||||
from scrapy.core.http2.protocol import H2ClientFactory, H2ClientProtocol
|
||||
from scrapy.http.request import Request
|
||||
from scrapy.settings import Settings
|
||||
from scrapy.spiders import Spider
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from twisted.internet.base import ReactorBase
|
||||
from twisted.internet.endpoints import HostnameEndpoint
|
||||
|
||||
from scrapy.http.request import Request
|
||||
from scrapy.settings import Settings
|
||||
from scrapy.spiders import Spider
|
||||
|
||||
|
||||
ConnectionKeyT = Tuple[bytes, bytes, int]
|
||||
|
||||
|
@ -1,9 +1,10 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import ipaddress
|
||||
import itertools
|
||||
import logging
|
||||
from collections import deque
|
||||
from ipaddress import IPv4Address, IPv6Address
|
||||
from typing import Any, Deque, Dict, List, Optional, Union
|
||||
from typing import TYPE_CHECKING, Any, Deque, Dict, List, Optional, Union
|
||||
|
||||
from h2.config import H2Configuration
|
||||
from h2.connection import H2Connection
|
||||
@ -20,7 +21,6 @@ from h2.events import (
|
||||
WindowUpdated,
|
||||
)
|
||||
from h2.exceptions import FrameTooLargeError, H2Error
|
||||
from twisted.internet.defer import Deferred
|
||||
from twisted.internet.error import TimeoutError
|
||||
from twisted.internet.interfaces import (
|
||||
IAddress,
|
||||
@ -30,14 +30,21 @@ from twisted.internet.interfaces import (
|
||||
from twisted.internet.protocol import Factory, Protocol, connectionDone
|
||||
from twisted.internet.ssl import Certificate
|
||||
from twisted.protocols.policies import TimeoutMixin
|
||||
from twisted.python.failure import Failure
|
||||
from twisted.web.client import URI
|
||||
from zope.interface import implementer
|
||||
|
||||
from scrapy.core.http2.stream import Stream, StreamCloseReason
|
||||
from scrapy.http import Request
|
||||
from scrapy.settings import Settings
|
||||
from scrapy.spiders import Spider
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from ipaddress import IPv4Address, IPv6Address
|
||||
|
||||
from twisted.internet.defer import Deferred
|
||||
from twisted.python.failure import Failure
|
||||
from twisted.web.client import URI
|
||||
|
||||
from scrapy.settings import Settings
|
||||
from scrapy.spiders import Spider
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -1,3 +1,5 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from enum import Enum
|
||||
from io import BytesIO
|
||||
@ -5,19 +7,20 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
|
||||
|
||||
from h2.errors import ErrorCodes
|
||||
from h2.exceptions import H2Error, ProtocolError, StreamClosedError
|
||||
from hpack import HeaderTuple
|
||||
from twisted.internet.defer import CancelledError, Deferred
|
||||
from twisted.internet.error import ConnectionClosed
|
||||
from twisted.python.failure import Failure
|
||||
from twisted.web.client import ResponseFailed
|
||||
|
||||
from scrapy.http import Request
|
||||
from scrapy.http.headers import Headers
|
||||
from scrapy.responsetypes import responsetypes
|
||||
from scrapy.utils.httpobj import urlparse_cached
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from hpack import HeaderTuple
|
||||
|
||||
from scrapy.core.http2.protocol import H2ClientProtocol
|
||||
from scrapy.http import Request
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@ -87,7 +90,7 @@ class Stream:
|
||||
self,
|
||||
stream_id: int,
|
||||
request: Request,
|
||||
protocol: "H2ClientProtocol",
|
||||
protocol: H2ClientProtocol,
|
||||
download_maxsize: int = 0,
|
||||
download_warnsize: int = 0,
|
||||
) -> None:
|
||||
@ -99,7 +102,7 @@ class Stream:
|
||||
"""
|
||||
self.stream_id: int = stream_id
|
||||
self._request: Request = request
|
||||
self._protocol: "H2ClientProtocol" = protocol
|
||||
self._protocol: H2ClientProtocol = protocol
|
||||
|
||||
self._download_maxsize = self._request.meta.get(
|
||||
"download_maxsize", download_maxsize
|
||||
|
@ -6,14 +6,10 @@ from abc import abstractmethod
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING, Any, List, Optional, Type, cast
|
||||
|
||||
from twisted.internet.defer import Deferred
|
||||
# working around https://github.com/sphinx-doc/sphinx/issues/10400
|
||||
from twisted.internet.defer import Deferred # noqa: TC002
|
||||
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.dupefilters import BaseDupeFilter
|
||||
from scrapy.http.request import Request
|
||||
from scrapy.pqueues import ScrapyPriorityQueue
|
||||
from scrapy.spiders import Spider
|
||||
from scrapy.statscollectors import StatsCollector
|
||||
from scrapy.spiders import Spider # noqa: TC001
|
||||
from scrapy.utils.job import job_dir
|
||||
from scrapy.utils.misc import build_from_crawler, load_object
|
||||
|
||||
@ -24,6 +20,12 @@ if TYPE_CHECKING:
|
||||
# typing.Self requires Python 3.11
|
||||
from typing_extensions import Self
|
||||
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.dupefilters import BaseDupeFilter
|
||||
from scrapy.http.request import Request
|
||||
from scrapy.pqueues import ScrapyPriorityQueue
|
||||
from scrapy.statscollectors import StatsCollector
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -10,6 +10,7 @@ import logging
|
||||
from inspect import isasyncgenfunction, iscoroutine
|
||||
from itertools import islice
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
AsyncIterable,
|
||||
Callable,
|
||||
@ -30,7 +31,6 @@ from scrapy import Request, Spider
|
||||
from scrapy.exceptions import _InvalidOutput
|
||||
from scrapy.http import Response
|
||||
from scrapy.middleware import MiddlewareManager
|
||||
from scrapy.settings import BaseSettings
|
||||
from scrapy.utils.asyncgen import as_async_generator, collect_asyncgen
|
||||
from scrapy.utils.conf import build_component_list
|
||||
from scrapy.utils.defer import (
|
||||
@ -41,6 +41,10 @@ from scrapy.utils.defer import (
|
||||
)
|
||||
from scrapy.utils.python import MutableAsyncChain, MutableChain
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from scrapy.settings import BaseSettings
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
@ -6,16 +6,18 @@ from typing import TYPE_CHECKING, Union
|
||||
|
||||
from w3lib import html
|
||||
|
||||
from scrapy import Request, Spider
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.exceptions import NotConfigured
|
||||
from scrapy.http import HtmlResponse, Response
|
||||
from scrapy.settings import BaseSettings
|
||||
|
||||
if TYPE_CHECKING:
|
||||
# typing.Self requires Python 3.11
|
||||
from typing_extensions import Self
|
||||
|
||||
from scrapy import Request, Spider
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.settings import BaseSettings
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
@ -2,24 +2,26 @@ from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from collections import defaultdict
|
||||
from http.cookiejar import Cookie
|
||||
from typing import TYPE_CHECKING, Any, DefaultDict, Iterable, Optional, Sequence, Union
|
||||
|
||||
from tldextract import TLDExtract
|
||||
|
||||
from scrapy import Request, Spider
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.exceptions import NotConfigured
|
||||
from scrapy.http import Response
|
||||
from scrapy.http.cookies import CookieJar
|
||||
from scrapy.http.request import VerboseCookie
|
||||
from scrapy.utils.httpobj import urlparse_cached
|
||||
from scrapy.utils.python import to_unicode
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from http.cookiejar import Cookie
|
||||
|
||||
# typing.Self requires Python 3.11
|
||||
from typing_extensions import Self
|
||||
|
||||
from scrapy import Request, Spider
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.http.request import VerboseCookie
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -8,15 +8,16 @@ from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Iterable, Tuple, Union
|
||||
|
||||
from scrapy import Request, Spider
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.http import Response
|
||||
from scrapy.utils.python import without_none_values
|
||||
|
||||
if TYPE_CHECKING:
|
||||
# typing.Self requires Python 3.11
|
||||
from typing_extensions import Self
|
||||
|
||||
from scrapy import Request, Spider
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.http import Response
|
||||
|
||||
|
||||
class DefaultHeadersMiddleware:
|
||||
def __init__(self, headers: Iterable[Tuple[str, str]]):
|
||||
|
@ -9,13 +9,14 @@ from __future__ import annotations
|
||||
from typing import TYPE_CHECKING, Union
|
||||
|
||||
from scrapy import Request, Spider, signals
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.http import Response
|
||||
|
||||
if TYPE_CHECKING:
|
||||
# typing.Self requires Python 3.11
|
||||
from typing_extensions import Self
|
||||
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.http import Response
|
||||
|
||||
|
||||
class DownloadTimeoutMiddleware:
|
||||
def __init__(self, timeout: float = 180):
|
||||
|
@ -11,14 +11,15 @@ from typing import TYPE_CHECKING, Union
|
||||
from w3lib.http import basic_auth_header
|
||||
|
||||
from scrapy import Request, Spider, signals
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.http import Response
|
||||
from scrapy.utils.url import url_is_from_any_domain
|
||||
|
||||
if TYPE_CHECKING:
|
||||
# typing.Self requires Python 3.11
|
||||
from typing_extensions import Self
|
||||
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.http import Response
|
||||
|
||||
|
||||
class HttpAuthMiddleware:
|
||||
"""Set Basic HTTP Authorization header
|
||||
|
@ -16,19 +16,20 @@ from twisted.internet.error import (
|
||||
from twisted.web.client import ResponseFailed
|
||||
|
||||
from scrapy import signals
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.exceptions import IgnoreRequest, NotConfigured
|
||||
from scrapy.http.request import Request
|
||||
from scrapy.http.response import Response
|
||||
from scrapy.settings import Settings
|
||||
from scrapy.spiders import Spider
|
||||
from scrapy.statscollectors import StatsCollector
|
||||
from scrapy.utils.misc import load_object
|
||||
|
||||
if TYPE_CHECKING:
|
||||
# typing.Self requires Python 3.11
|
||||
from typing_extensions import Self
|
||||
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.http.request import Request
|
||||
from scrapy.http.response import Response
|
||||
from scrapy.settings import Settings
|
||||
from scrapy.spiders import Spider
|
||||
from scrapy.statscollectors import StatsCollector
|
||||
|
||||
|
||||
class HttpCacheMiddleware:
|
||||
DOWNLOAD_EXCEPTIONS = (
|
||||
|
@ -6,11 +6,9 @@ from logging import getLogger
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
|
||||
|
||||
from scrapy import Request, Spider, signals
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.exceptions import IgnoreRequest, NotConfigured
|
||||
from scrapy.http import Response, TextResponse
|
||||
from scrapy.responsetypes import responsetypes
|
||||
from scrapy.statscollectors import StatsCollector
|
||||
from scrapy.utils._compression import (
|
||||
_DecompressionMaxSizeExceeded,
|
||||
_inflate,
|
||||
@ -24,6 +22,10 @@ if TYPE_CHECKING:
|
||||
# typing.Self requires Python 3.11
|
||||
from typing_extensions import Self
|
||||
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.statscollectors import StatsCollector
|
||||
|
||||
|
||||
logger = getLogger(__name__)
|
||||
|
||||
ACCEPTED_ENCODINGS: List[bytes] = [b"gzip", b"deflate"]
|
||||
|
@ -9,10 +9,7 @@ from urllib.request import ( # type: ignore[attr-defined]
|
||||
proxy_bypass,
|
||||
)
|
||||
|
||||
from scrapy import Request, Spider
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.exceptions import NotConfigured
|
||||
from scrapy.http import Response
|
||||
from scrapy.utils.httpobj import urlparse_cached
|
||||
from scrapy.utils.python import to_bytes
|
||||
|
||||
@ -20,6 +17,10 @@ if TYPE_CHECKING:
|
||||
# typing.Self requires Python 3.11
|
||||
from typing_extensions import Self
|
||||
|
||||
from scrapy import Request, Spider
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.http import Response
|
||||
|
||||
|
||||
class HttpProxyMiddleware:
|
||||
def __init__(self, auth_encoding: Optional[str] = "latin-1"):
|
||||
|
@ -6,15 +6,17 @@ import warnings
|
||||
from typing import TYPE_CHECKING, Set
|
||||
|
||||
from scrapy import Request, Spider, signals
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.exceptions import IgnoreRequest
|
||||
from scrapy.statscollectors import StatsCollector
|
||||
from scrapy.utils.httpobj import urlparse_cached
|
||||
|
||||
if TYPE_CHECKING:
|
||||
# typing.Self requires Python 3.11
|
||||
from typing_extensions import Self
|
||||
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.statscollectors import StatsCollector
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
@ -6,11 +6,8 @@ from urllib.parse import urljoin
|
||||
|
||||
from w3lib.url import safe_url_string
|
||||
|
||||
from scrapy import Request, Spider
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.exceptions import IgnoreRequest, NotConfigured
|
||||
from scrapy.http import HtmlResponse, Response
|
||||
from scrapy.settings import BaseSettings
|
||||
from scrapy.utils.httpobj import urlparse_cached
|
||||
from scrapy.utils.response import get_meta_refresh
|
||||
|
||||
@ -18,6 +15,11 @@ if TYPE_CHECKING:
|
||||
# typing.Self requires Python 3.11
|
||||
from typing_extensions import Self
|
||||
|
||||
from scrapy import Request, Spider
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.settings import BaseSettings
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
@ -16,12 +16,8 @@ import warnings
|
||||
from logging import Logger, getLogger
|
||||
from typing import TYPE_CHECKING, Any, Optional, Tuple, Type, Union
|
||||
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.exceptions import NotConfigured, ScrapyDeprecationWarning
|
||||
from scrapy.http import Response
|
||||
from scrapy.http.request import Request
|
||||
from scrapy.settings import BaseSettings, Settings
|
||||
from scrapy.spiders import Spider
|
||||
from scrapy.utils.misc import load_object
|
||||
from scrapy.utils.python import global_object_name
|
||||
from scrapy.utils.response import response_status_message
|
||||
@ -30,6 +26,12 @@ if TYPE_CHECKING:
|
||||
# typing.Self requires Python 3.11
|
||||
from typing_extensions import Self
|
||||
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.http import Response
|
||||
from scrapy.http.request import Request
|
||||
from scrapy.spiders import Spider
|
||||
|
||||
|
||||
retry_logger = getLogger(__name__)
|
||||
|
||||
|
||||
|
@ -10,22 +10,24 @@ import logging
|
||||
from typing import TYPE_CHECKING, Any, Dict, Optional, Union
|
||||
|
||||
from twisted.internet.defer import Deferred, maybeDeferred
|
||||
from twisted.python.failure import Failure
|
||||
|
||||
from scrapy import Spider
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.exceptions import IgnoreRequest, NotConfigured
|
||||
from scrapy.http import Request, Response
|
||||
from scrapy.http.request import NO_CALLBACK
|
||||
from scrapy.robotstxt import RobotParser
|
||||
from scrapy.utils.httpobj import urlparse_cached
|
||||
from scrapy.utils.log import failure_to_exc_info
|
||||
from scrapy.utils.misc import load_object
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from twisted.python.failure import Failure
|
||||
|
||||
# typing.Self requires Python 3.11
|
||||
from typing_extensions import Self
|
||||
|
||||
from scrapy import Spider
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.robotstxt import RobotParser
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -4,11 +4,7 @@ from typing import TYPE_CHECKING, Dict, List, Tuple, Union
|
||||
|
||||
from twisted.web import http
|
||||
|
||||
from scrapy import Request, Spider
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.exceptions import NotConfigured
|
||||
from scrapy.http import Response
|
||||
from scrapy.statscollectors import StatsCollector
|
||||
from scrapy.utils.python import global_object_name, to_bytes
|
||||
from scrapy.utils.request import request_httprepr
|
||||
|
||||
@ -16,6 +12,11 @@ if TYPE_CHECKING:
|
||||
# typing.Self requires Python 3.11
|
||||
from typing_extensions import Self
|
||||
|
||||
from scrapy import Request, Spider
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.http import Response
|
||||
from scrapy.statscollectors import StatsCollector
|
||||
|
||||
|
||||
def get_header_size(
|
||||
headers: Dict[str, Union[List[Union[str, bytes]], Tuple[Union[str, bytes], ...]]]
|
||||
|
@ -5,13 +5,14 @@ from __future__ import annotations
|
||||
from typing import TYPE_CHECKING, Union
|
||||
|
||||
from scrapy import Request, Spider, signals
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.http import Response
|
||||
|
||||
if TYPE_CHECKING:
|
||||
# typing.Self requires Python 3.11
|
||||
from typing_extensions import Self
|
||||
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.http import Response
|
||||
|
||||
|
||||
class UserAgentMiddleware:
|
||||
"""This middleware allows spiders to override the user_agent"""
|
||||
|
@ -4,11 +4,6 @@ import logging
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING, Optional, Set
|
||||
|
||||
from twisted.internet.defer import Deferred
|
||||
|
||||
from scrapy.http.request import Request
|
||||
from scrapy.settings import BaseSettings
|
||||
from scrapy.spiders import Spider
|
||||
from scrapy.utils.job import job_dir
|
||||
from scrapy.utils.request import (
|
||||
RequestFingerprinter,
|
||||
@ -17,10 +12,15 @@ from scrapy.utils.request import (
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from twisted.internet.defer import Deferred
|
||||
|
||||
# typing.Self requires Python 3.11
|
||||
from typing_extensions import Self
|
||||
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.http.request import Request
|
||||
from scrapy.settings import BaseSettings
|
||||
from scrapy.spiders import Spider
|
||||
|
||||
|
||||
class BaseDupeFilter:
|
||||
|
@ -4,12 +4,16 @@ The Extension Manager
|
||||
See documentation in docs/topics/extensions.rst
|
||||
"""
|
||||
|
||||
from typing import Any, List
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Any, List
|
||||
|
||||
from scrapy.middleware import MiddlewareManager
|
||||
from scrapy.settings import Settings
|
||||
from scrapy.utils.conf import build_component_list
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from scrapy.settings import Settings
|
||||
|
||||
|
||||
class ExtensionManager(MiddlewareManager):
|
||||
component_name = "extension"
|
||||
|
@ -10,17 +10,19 @@ import logging
|
||||
from collections import defaultdict
|
||||
from typing import TYPE_CHECKING, Any, DefaultDict, Dict
|
||||
|
||||
from twisted.python.failure import Failure
|
||||
|
||||
from scrapy import Request, Spider, signals
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.exceptions import NotConfigured
|
||||
from scrapy.http import Response
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from twisted.python.failure import Failure
|
||||
|
||||
# typing.Self requires Python 3.11
|
||||
from typing_extensions import Self
|
||||
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.http import Response
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
@ -8,13 +8,14 @@ from datetime import datetime, timezone
|
||||
from typing import TYPE_CHECKING, Any, Optional
|
||||
|
||||
from scrapy import Spider, signals
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.statscollectors import StatsCollector
|
||||
|
||||
if TYPE_CHECKING:
|
||||
# typing.Self requires Python 3.11
|
||||
from typing_extensions import Self
|
||||
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.statscollectors import StatsCollector
|
||||
|
||||
|
||||
class CoreStats:
|
||||
def __init__(self, stats: StatsCollector):
|
||||
|
@ -12,17 +12,20 @@ import sys
|
||||
import threading
|
||||
import traceback
|
||||
from pdb import Pdb
|
||||
from types import FrameType
|
||||
from typing import TYPE_CHECKING, Optional
|
||||
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.utils.engine import format_engine_status
|
||||
from scrapy.utils.trackref import format_live_refs
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from types import FrameType
|
||||
|
||||
# typing.Self requires Python 3.11
|
||||
from typing_extensions import Self
|
||||
|
||||
from scrapy.crawler import Crawler
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
@ -31,18 +31,15 @@ from typing import (
|
||||
)
|
||||
from urllib.parse import unquote, urlparse
|
||||
|
||||
from twisted.internet import threads
|
||||
from twisted.internet.defer import Deferred, DeferredList, maybeDeferred
|
||||
from twisted.python.failure import Failure
|
||||
from twisted.internet.threads import deferToThread
|
||||
from w3lib.url import file_uri_to_path
|
||||
from zope.interface import Interface, implementer
|
||||
|
||||
from scrapy import Spider, signals
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.exceptions import NotConfigured, ScrapyDeprecationWarning
|
||||
from scrapy.exporters import BaseItemExporter
|
||||
from scrapy.extensions.postprocessing import PostProcessingManager
|
||||
from scrapy.settings import BaseSettings, Settings
|
||||
from scrapy.settings import Settings
|
||||
from scrapy.utils.boto import is_botocore_available
|
||||
from scrapy.utils.conf import feed_complete_default_values_from_settings
|
||||
from scrapy.utils.defer import maybe_deferred_to_future
|
||||
@ -54,11 +51,14 @@ from scrapy.utils.python import without_none_values
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from _typeshed import OpenBinaryMode
|
||||
from twisted.python.failure import Failure
|
||||
|
||||
# typing.Self requires Python 3.11
|
||||
from typing_extensions import Self
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.exporters import BaseItemExporter
|
||||
from scrapy.settings import BaseSettings
|
||||
|
||||
try:
|
||||
import boto3 # noqa: F401
|
||||
@ -67,6 +67,9 @@ try:
|
||||
except ImportError:
|
||||
IS_BOTO3_AVAILABLE = False
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
UriParamsCallableT = Callable[[Dict[str, Any], Spider], Optional[Dict[str, Any]]]
|
||||
|
||||
_StorageT = TypeVar("_StorageT", bound="FeedStorageProtocol")
|
||||
@ -160,7 +163,7 @@ class BlockingFeedStorage:
|
||||
return NamedTemporaryFile(prefix="feed-", dir=path)
|
||||
|
||||
def store(self, file: IO[bytes]) -> Optional[Deferred]:
|
||||
return threads.deferToThread(self._store_in_thread, file)
|
||||
return deferToThread(self._store_in_thread, file)
|
||||
|
||||
def _store_in_thread(self, file: IO[bytes]) -> None:
|
||||
raise NotImplementedError
|
||||
|
@ -1,3 +1,5 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import gzip
|
||||
import logging
|
||||
import os
|
||||
@ -13,10 +15,7 @@ from weakref import WeakKeyDictionary
|
||||
from w3lib.http import headers_dict_to_raw, headers_raw_to_dict
|
||||
|
||||
from scrapy.http import Headers, Response
|
||||
from scrapy.http.request import Request
|
||||
from scrapy.responsetypes import responsetypes
|
||||
from scrapy.settings import BaseSettings
|
||||
from scrapy.spiders import Spider
|
||||
from scrapy.utils.httpobj import urlparse_cached
|
||||
from scrapy.utils.project import data_path
|
||||
from scrapy.utils.python import to_bytes, to_unicode
|
||||
@ -26,6 +25,10 @@ if TYPE_CHECKING:
|
||||
# typing.Concatenate requires Python 3.10
|
||||
from typing_extensions import Concatenate
|
||||
|
||||
from scrapy.http.request import Request
|
||||
from scrapy.settings import BaseSettings
|
||||
from scrapy.spiders import Spider
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -6,14 +6,16 @@ from typing import TYPE_CHECKING, Optional, Tuple, Union
|
||||
from twisted.internet import task
|
||||
|
||||
from scrapy import Spider, signals
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.exceptions import NotConfigured
|
||||
from scrapy.statscollectors import StatsCollector
|
||||
|
||||
if TYPE_CHECKING:
|
||||
# typing.Self requires Python 3.11
|
||||
from typing_extensions import Self
|
||||
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.statscollectors import StatsCollector
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
@ -10,15 +10,16 @@ import gc
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from scrapy import Spider, signals
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.exceptions import NotConfigured
|
||||
from scrapy.statscollectors import StatsCollector
|
||||
from scrapy.utils.trackref import live_refs
|
||||
|
||||
if TYPE_CHECKING:
|
||||
# typing.Self requires Python 3.11
|
||||
from typing_extensions import Self
|
||||
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.statscollectors import StatsCollector
|
||||
|
||||
|
||||
class MemoryDebugger:
|
||||
def __init__(self, stats: StatsCollector):
|
||||
|
@ -16,7 +16,6 @@ from typing import TYPE_CHECKING, List
|
||||
from twisted.internet import task
|
||||
|
||||
from scrapy import signals
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.exceptions import NotConfigured
|
||||
from scrapy.mail import MailSender
|
||||
from scrapy.utils.engine import get_engine_status
|
||||
@ -25,6 +24,9 @@ if TYPE_CHECKING:
|
||||
# typing.Self requires Python 3.11
|
||||
from typing_extensions import Self
|
||||
|
||||
from scrapy.crawler import Crawler
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
@ -8,15 +8,17 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
|
||||
from twisted.internet import task
|
||||
|
||||
from scrapy import Spider, signals
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.exceptions import NotConfigured
|
||||
from scrapy.statscollectors import StatsCollector
|
||||
from scrapy.utils.serialize import ScrapyJSONEncoder
|
||||
|
||||
if TYPE_CHECKING:
|
||||
# typing.Self requires Python 3.11
|
||||
from typing_extensions import Self
|
||||
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.statscollectors import StatsCollector
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
@ -5,7 +5,6 @@ from pathlib import Path
|
||||
from typing import TYPE_CHECKING, Optional
|
||||
|
||||
from scrapy import Spider, signals
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.exceptions import NotConfigured
|
||||
from scrapy.utils.job import job_dir
|
||||
|
||||
@ -13,6 +12,8 @@ if TYPE_CHECKING:
|
||||
# typing.Self requires Python 3.11
|
||||
from typing_extensions import Self
|
||||
|
||||
from scrapy.crawler import Crawler
|
||||
|
||||
|
||||
class SpiderState:
|
||||
"""Store and load spider state during a scraping job"""
|
||||
|
@ -8,18 +8,19 @@ from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, List, Optional
|
||||
|
||||
from twisted.internet.defer import Deferred
|
||||
|
||||
from scrapy import Spider, signals
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.exceptions import NotConfigured
|
||||
from scrapy.mail import MailSender
|
||||
from scrapy.statscollectors import StatsCollector
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from twisted.internet.defer import Deferred
|
||||
|
||||
# typing.Self requires Python 3.11
|
||||
from typing_extensions import Self
|
||||
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.statscollectors import StatsCollector
|
||||
|
||||
|
||||
class StatsMailer:
|
||||
def __init__(self, stats: StatsCollector, recipients: List[str], mail: MailSender):
|
||||
|
@ -26,7 +26,6 @@ except (ImportError, SyntaxError):
|
||||
TWISTED_CONCH_AVAILABLE = False
|
||||
|
||||
from scrapy import signals
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.exceptions import NotConfigured
|
||||
from scrapy.utils.decorators import defers
|
||||
from scrapy.utils.engine import print_engine_status
|
||||
@ -36,6 +35,10 @@ from scrapy.utils.trackref import print_live_refs
|
||||
if TYPE_CHECKING:
|
||||
# typing.Self requires Python 3.11
|
||||
from typing_extensions import Self
|
||||
|
||||
from scrapy.crawler import Crawler
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# signal to update telnet variables
|
||||
|
@ -4,15 +4,17 @@ import logging
|
||||
from typing import TYPE_CHECKING, Optional, Tuple
|
||||
|
||||
from scrapy import Request, Spider, signals
|
||||
from scrapy.core.downloader import Slot
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.exceptions import NotConfigured
|
||||
from scrapy.http import Response
|
||||
|
||||
if TYPE_CHECKING:
|
||||
# typing.Self requires Python 3.11
|
||||
from typing_extensions import Self
|
||||
|
||||
from scrapy.core.downloader import Slot
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.http import Response
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
@ -17,8 +17,6 @@ from typing import (
|
||||
cast,
|
||||
)
|
||||
|
||||
from scrapy import Request
|
||||
from scrapy.http import Response
|
||||
from scrapy.utils.httpobj import urlparse_cached
|
||||
from scrapy.utils.python import to_unicode
|
||||
|
||||
@ -26,6 +24,10 @@ if TYPE_CHECKING:
|
||||
# typing.Self requires Python 3.11
|
||||
from typing_extensions import Self
|
||||
|
||||
from scrapy import Request
|
||||
from scrapy.http import Response
|
||||
|
||||
|
||||
# Defined in the http.cookiejar module, but undocumented:
|
||||
# https://github.com/python/cpython/blob/v3.9.0/Lib/http/cookiejar.py#L527
|
||||
IPV4_RE = re.compile(r"\.\d+$", re.ASCII)
|
||||
|
@ -28,13 +28,14 @@ from lxml.html import TextareaElement # nosec
|
||||
from w3lib.html import strip_html5_whitespace
|
||||
|
||||
from scrapy.http.request import Request
|
||||
from scrapy.http.response.text import TextResponse
|
||||
from scrapy.utils.python import is_listlike, to_bytes
|
||||
|
||||
if TYPE_CHECKING:
|
||||
# typing.Self requires Python 3.11
|
||||
from typing_extensions import Self
|
||||
|
||||
from scrapy.http.response.text import TextResponse
|
||||
|
||||
|
||||
FormdataVType = Union[str, Iterable[str]]
|
||||
FormdataKVType = Tuple[str, FormdataVType]
|
||||
|
@ -7,7 +7,6 @@ See documentation in docs/topics/request-response.rst
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from ipaddress import IPv4Address, IPv6Address
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
@ -26,8 +25,6 @@ from typing import (
|
||||
)
|
||||
from urllib.parse import urljoin
|
||||
|
||||
from twisted.internet.ssl import Certificate
|
||||
|
||||
from scrapy.exceptions import NotSupported
|
||||
from scrapy.http.headers import Headers
|
||||
from scrapy.http.request import CookiesT, Request
|
||||
@ -35,6 +32,10 @@ from scrapy.link import Link
|
||||
from scrapy.utils.trackref import object_ref
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from ipaddress import IPv4Address, IPv6Address
|
||||
|
||||
from twisted.internet.ssl import Certificate
|
||||
|
||||
# typing.Self requires Python 3.11
|
||||
from typing_extensions import Self
|
||||
|
||||
|
@ -35,15 +35,16 @@ from w3lib.encoding import (
|
||||
)
|
||||
from w3lib.html import strip_html5_whitespace
|
||||
|
||||
from scrapy.http.request import CookiesT, Request
|
||||
from scrapy.http.response import Response
|
||||
from scrapy.link import Link
|
||||
from scrapy.utils.python import memoizemethod_noargs, to_unicode
|
||||
from scrapy.utils.response import get_base_url
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from scrapy.http.request import CookiesT, Request
|
||||
from scrapy.selector import Selector, SelectorList
|
||||
|
||||
|
||||
_NONE = object()
|
||||
|
||||
|
||||
|
@ -2,10 +2,13 @@
|
||||
Link extractor based on lxml.html
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import operator
|
||||
from functools import partial
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Callable,
|
||||
Iterable,
|
||||
@ -20,13 +23,10 @@ from typing import (
|
||||
from urllib.parse import urljoin, urlparse
|
||||
|
||||
from lxml import etree # nosec
|
||||
from lxml.html import HtmlElement # nosec
|
||||
from parsel.csstranslator import HTMLTranslator
|
||||
from w3lib.html import strip_html5_whitespace
|
||||
from w3lib.url import canonicalize_url, safe_url_string
|
||||
|
||||
from scrapy import Selector
|
||||
from scrapy.http import TextResponse
|
||||
from scrapy.link import Link
|
||||
from scrapy.linkextractors import IGNORED_EXTENSIONS, _is_valid_url, _matches, re
|
||||
from scrapy.utils.misc import arg_to_iter, rel_has_nofollow
|
||||
@ -34,6 +34,13 @@ from scrapy.utils.python import unique as unique_list
|
||||
from scrapy.utils.response import get_base_url
|
||||
from scrapy.utils.url import url_has_any_extension, url_is_from_any_domain
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from lxml.html import HtmlElement # nosec
|
||||
|
||||
from scrapy import Selector
|
||||
from scrapy.http import TextResponse
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# from lxml/src/lxml/html/__init__.py
|
||||
|
@ -4,14 +4,18 @@ Item Loader
|
||||
See documentation in docs/topics/loaders.rst
|
||||
"""
|
||||
|
||||
from typing import Any, Optional
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Any, Optional
|
||||
|
||||
import itemloaders
|
||||
|
||||
from scrapy.http import TextResponse
|
||||
from scrapy.item import Item
|
||||
from scrapy.selector import Selector
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from scrapy.http import TextResponse
|
||||
|
||||
|
||||
class ItemLoader(itemloaders.ItemLoader):
|
||||
"""
|
||||
@ -91,7 +95,7 @@ class ItemLoader(itemloaders.ItemLoader):
|
||||
selector: Optional[Selector] = None,
|
||||
response: Optional[TextResponse] = None,
|
||||
parent: Optional[itemloaders.ItemLoader] = None,
|
||||
**context: Any
|
||||
**context: Any,
|
||||
):
|
||||
if selector is None and response is not None:
|
||||
try:
|
||||
|
@ -6,8 +6,9 @@ from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, TypedDict, Union
|
||||
|
||||
from twisted.python.failure import Failure
|
||||
|
||||
from scrapy import Request, Spider
|
||||
from scrapy.http import Response
|
||||
# working around https://github.com/sphinx-doc/sphinx/issues/10400
|
||||
from scrapy import Request, Spider # noqa: TC001
|
||||
from scrapy.http import Response # noqa: TC001
|
||||
from scrapy.utils.request import referer_str
|
||||
|
||||
if TYPE_CHECKING:
|
||||
|
@ -30,20 +30,22 @@ from typing import (
|
||||
from twisted import version as twisted_version
|
||||
from twisted.internet import ssl
|
||||
from twisted.internet.defer import Deferred
|
||||
from twisted.python.failure import Failure
|
||||
from twisted.python.versions import Version
|
||||
|
||||
from scrapy.settings import BaseSettings
|
||||
from scrapy.utils.misc import arg_to_iter
|
||||
from scrapy.utils.python import to_bytes
|
||||
|
||||
if TYPE_CHECKING:
|
||||
# imports twisted.internet.reactor
|
||||
from twisted.mail.smtp import ESMTPSenderFactory
|
||||
from twisted.python.failure import Failure
|
||||
|
||||
# typing.Self requires Python 3.11
|
||||
from typing_extensions import Self
|
||||
|
||||
from scrapy.settings import BaseSettings
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
@ -17,19 +17,19 @@ from typing import (
|
||||
cast,
|
||||
)
|
||||
|
||||
from twisted.internet.defer import Deferred
|
||||
|
||||
from scrapy import Spider
|
||||
from scrapy.exceptions import NotConfigured
|
||||
from scrapy.settings import Settings
|
||||
from scrapy.utils.defer import process_chain, process_parallel
|
||||
from scrapy.utils.misc import build_from_crawler, build_from_settings, load_object
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from twisted.internet.defer import Deferred
|
||||
|
||||
# typing.Self requires Python 3.11
|
||||
from typing_extensions import Self
|
||||
|
||||
from scrapy import Spider
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.settings import Settings
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
@ -6,16 +6,18 @@ See documentation in docs/item-pipeline.rst
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, List
|
||||
from typing import TYPE_CHECKING, Any, List
|
||||
|
||||
from twisted.internet.defer import Deferred
|
||||
|
||||
from scrapy import Spider
|
||||
from scrapy.middleware import MiddlewareManager
|
||||
from scrapy.settings import Settings
|
||||
from scrapy.utils.conf import build_component_list
|
||||
from scrapy.utils.defer import deferred_f_from_coro_f
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from twisted.internet.defer import Deferred
|
||||
|
||||
from scrapy import Spider
|
||||
from scrapy.settings import Settings
|
||||
|
||||
|
||||
class ItemPipelineManager(MiddlewareManager):
|
||||
component_name = "item pipeline"
|
||||
|
@ -16,7 +16,6 @@ from collections import defaultdict
|
||||
from contextlib import suppress
|
||||
from ftplib import FTP
|
||||
from io import BytesIO
|
||||
from os import PathLike
|
||||
from pathlib import Path
|
||||
from typing import (
|
||||
IO,
|
||||
@ -38,11 +37,9 @@ from typing import (
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from itemadapter import ItemAdapter
|
||||
from twisted.internet import defer, threads
|
||||
from twisted.internet.defer import Deferred
|
||||
from twisted.python.failure import Failure
|
||||
from twisted.internet.defer import Deferred, maybeDeferred
|
||||
from twisted.internet.threads import deferToThread
|
||||
|
||||
from scrapy import Spider
|
||||
from scrapy.exceptions import IgnoreRequest, NotConfigured
|
||||
from scrapy.http import Request, Response
|
||||
from scrapy.http.request import NO_CALLBACK
|
||||
@ -56,9 +53,15 @@ from scrapy.utils.python import to_bytes
|
||||
from scrapy.utils.request import referer_str
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from os import PathLike
|
||||
|
||||
from twisted.python.failure import Failure
|
||||
|
||||
# typing.Self requires Python 3.11
|
||||
from typing_extensions import Self
|
||||
|
||||
from scrapy import Spider
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@ -210,7 +213,7 @@ class S3FilesStore:
|
||||
key_name = f"{self.prefix}{path}"
|
||||
return cast(
|
||||
"Deferred[Dict[str, Any]]",
|
||||
threads.deferToThread(
|
||||
deferToThread(
|
||||
self.s3_client.head_object, Bucket=self.bucket, Key=key_name # type: ignore[attr-defined]
|
||||
),
|
||||
)
|
||||
@ -229,7 +232,7 @@ class S3FilesStore:
|
||||
extra = self._headers_to_botocore_kwargs(self.HEADERS)
|
||||
if headers:
|
||||
extra.update(self._headers_to_botocore_kwargs(headers))
|
||||
return threads.deferToThread(
|
||||
return deferToThread(
|
||||
self.s3_client.put_object, # type: ignore[attr-defined]
|
||||
Bucket=self.bucket,
|
||||
Key=key_name,
|
||||
@ -326,9 +329,7 @@ class GCSFilesStore:
|
||||
blob_path = self._get_blob_path(path)
|
||||
return cast(
|
||||
Deferred[StatInfo],
|
||||
threads.deferToThread(self.bucket.get_blob, blob_path).addCallback(
|
||||
_onsuccess
|
||||
),
|
||||
deferToThread(self.bucket.get_blob, blob_path).addCallback(_onsuccess),
|
||||
)
|
||||
|
||||
def _get_content_type(self, headers: Optional[Dict[str, str]]) -> str:
|
||||
@ -351,7 +352,7 @@ class GCSFilesStore:
|
||||
blob = self.bucket.blob(blob_path)
|
||||
blob.cache_control = self.CACHE_CONTROL
|
||||
blob.metadata = {k: str(v) for k, v in (meta or {}).items()}
|
||||
return threads.deferToThread(
|
||||
return deferToThread(
|
||||
blob.upload_from_string,
|
||||
data=buf.getvalue(),
|
||||
content_type=self._get_content_type(headers),
|
||||
@ -388,7 +389,7 @@ class FTPFilesStore:
|
||||
headers: Optional[Dict[str, str]] = None,
|
||||
) -> Deferred[Any]:
|
||||
path = f"{self.basedir}/{path}"
|
||||
return threads.deferToThread(
|
||||
return deferToThread(
|
||||
ftp_store_file,
|
||||
path=path,
|
||||
file=buf,
|
||||
@ -418,7 +419,7 @@ class FTPFilesStore:
|
||||
except Exception:
|
||||
return {}
|
||||
|
||||
return cast("Deferred[StatInfo]", threads.deferToThread(_stat_file, path))
|
||||
return cast("Deferred[StatInfo]", deferToThread(_stat_file, path))
|
||||
|
||||
|
||||
class FilesPipeline(MediaPipeline):
|
||||
@ -553,8 +554,8 @@ class FilesPipeline(MediaPipeline):
|
||||
}
|
||||
|
||||
path = self.file_path(request, info=info, item=item)
|
||||
# defer.maybeDeferred() overloads don't seem to support a Union[_T, Deferred[_T]] return type
|
||||
dfd: Deferred[StatInfo] = defer.maybeDeferred(self.store.stat_file, path, info) # type: ignore[arg-type]
|
||||
# maybeDeferred() overloads don't seem to support a Union[_T, Deferred[_T]] return type
|
||||
dfd: Deferred[StatInfo] = maybeDeferred(self.store.stat_file, path, info) # type: ignore[arg-type]
|
||||
dfd2: Deferred[Optional[FileInfo]] = dfd.addCallback(_onsuccess)
|
||||
dfd2.addErrback(lambda _: None)
|
||||
dfd2.addErrback(
|
||||
|
@ -11,7 +11,6 @@ import hashlib
|
||||
import warnings
|
||||
from contextlib import suppress
|
||||
from io import BytesIO
|
||||
from os import PathLike
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
@ -28,7 +27,6 @@ from typing import (
|
||||
|
||||
from itemadapter import ItemAdapter
|
||||
|
||||
from scrapy import Spider
|
||||
from scrapy.exceptions import DropItem, NotConfigured, ScrapyDeprecationWarning
|
||||
from scrapy.http import Request, Response
|
||||
from scrapy.http.request import NO_CALLBACK
|
||||
@ -40,15 +38,20 @@ from scrapy.pipelines.files import (
|
||||
S3FilesStore,
|
||||
_md5sum,
|
||||
)
|
||||
from scrapy.pipelines.media import FileInfoOrError, MediaPipeline
|
||||
from scrapy.settings import Settings
|
||||
from scrapy.utils.python import get_func_args, to_bytes
|
||||
|
||||
if TYPE_CHECKING:
|
||||
# typing.Self requires Python 3.11
|
||||
from os import PathLike
|
||||
|
||||
from PIL import Image
|
||||
|
||||
# typing.Self requires Python 3.11
|
||||
from typing_extensions import Self
|
||||
|
||||
from scrapy import Spider
|
||||
from scrapy.pipelines.media import FileInfoOrError, MediaPipeline
|
||||
|
||||
|
||||
class NoimagesDrop(DropItem):
|
||||
"""Product with no images exception"""
|
||||
|
@ -25,21 +25,23 @@ from typing import (
|
||||
from twisted.internet.defer import Deferred, DeferredList
|
||||
from twisted.python.failure import Failure
|
||||
|
||||
from scrapy import Spider
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.http import Response
|
||||
from scrapy.http.request import NO_CALLBACK, Request
|
||||
from scrapy.settings import Settings
|
||||
from scrapy.utils.datatypes import SequenceExclude
|
||||
from scrapy.utils.defer import defer_result, mustbe_deferred
|
||||
from scrapy.utils.log import failure_to_exc_info
|
||||
from scrapy.utils.misc import arg_to_iter
|
||||
from scrapy.utils.request import RequestFingerprinter
|
||||
|
||||
if TYPE_CHECKING:
|
||||
# typing.Self requires Python 3.11
|
||||
from typing_extensions import Self
|
||||
|
||||
from scrapy import Spider
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.http import Response
|
||||
from scrapy.utils.request import RequestFingerprinter
|
||||
|
||||
|
||||
_T = TypeVar("_T")
|
||||
|
||||
|
||||
|
@ -4,7 +4,6 @@ from typing import TYPE_CHECKING, Any, List, Optional, Sequence, Type
|
||||
|
||||
from twisted.internet import defer
|
||||
from twisted.internet.base import ReactorBase, ThreadedResolver
|
||||
from twisted.internet.defer import Deferred
|
||||
from twisted.internet.interfaces import (
|
||||
IAddress,
|
||||
IHostnameResolver,
|
||||
@ -17,6 +16,8 @@ from zope.interface.declarations import implementer, provider
|
||||
from scrapy.utils.datatypes import LocalCache
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from twisted.internet.defer import Deferred
|
||||
|
||||
# typing.Self requires Python 3.11
|
||||
from typing_extensions import Self
|
||||
|
||||
|
@ -6,7 +6,6 @@ from abc import ABCMeta, abstractmethod
|
||||
from typing import TYPE_CHECKING, Optional, Union
|
||||
from warnings import warn
|
||||
|
||||
from scrapy import Spider
|
||||
from scrapy.exceptions import ScrapyDeprecationWarning
|
||||
from scrapy.utils.python import to_unicode
|
||||
|
||||
@ -14,8 +13,10 @@ if TYPE_CHECKING:
|
||||
# typing.Self requires Python 3.11
|
||||
from typing_extensions import Self
|
||||
|
||||
from scrapy import Spider
|
||||
from scrapy.crawler import Crawler
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
@ -4,7 +4,6 @@ import copy
|
||||
import json
|
||||
from importlib import import_module
|
||||
from pprint import pformat
|
||||
from types import ModuleType
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
@ -27,6 +26,8 @@ from scrapy.settings import default_settings
|
||||
_SettingsKeyT = Union[bool, float, int, str, None]
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from types import ModuleType
|
||||
|
||||
# https://github.com/python/typing/issues/445#issuecomment-1131458824
|
||||
from _typeshed import SupportsItems
|
||||
|
||||
|
@ -1,10 +1,14 @@
|
||||
from typing import Any, List, Tuple
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Any, List, Tuple
|
||||
|
||||
from pydispatch import dispatcher
|
||||
from twisted.internet.defer import Deferred
|
||||
|
||||
from scrapy.utils import signal as _signal
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from twisted.internet.defer import Deferred
|
||||
|
||||
|
||||
class SignalManager:
|
||||
def __init__(self, sender: Any = dispatcher.Anonymous):
|
||||
|
@ -3,21 +3,23 @@ from __future__ import annotations
|
||||
import traceback
|
||||
import warnings
|
||||
from collections import defaultdict
|
||||
from types import ModuleType
|
||||
from typing import TYPE_CHECKING, DefaultDict, Dict, List, Tuple, Type
|
||||
|
||||
from zope.interface import implementer
|
||||
|
||||
from scrapy import Request, Spider
|
||||
from scrapy.interfaces import ISpiderLoader
|
||||
from scrapy.settings import BaseSettings
|
||||
from scrapy.utils.misc import walk_modules
|
||||
from scrapy.utils.spider import iter_spider_classes
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from types import ModuleType
|
||||
|
||||
# typing.Self requires Python 3.11
|
||||
from typing_extensions import Self
|
||||
|
||||
from scrapy import Request, Spider
|
||||
from scrapy.settings import BaseSettings
|
||||
|
||||
|
||||
@implementer(ISpiderLoader)
|
||||
class SpiderLoader:
|
||||
|
@ -9,15 +9,17 @@ from __future__ import annotations
|
||||
import logging
|
||||
from typing import TYPE_CHECKING, Any, AsyncIterable, Iterable
|
||||
|
||||
from scrapy import Spider
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.http import Request, Response
|
||||
from scrapy.statscollectors import StatsCollector
|
||||
|
||||
if TYPE_CHECKING:
|
||||
# typing.Self requires Python 3.11
|
||||
from typing_extensions import Self
|
||||
|
||||
from scrapy import Spider
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.statscollectors import StatsCollector
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
@ -9,16 +9,18 @@ from __future__ import annotations
|
||||
import logging
|
||||
from typing import TYPE_CHECKING, Any, Iterable, List, Optional
|
||||
|
||||
from scrapy import Spider
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.exceptions import IgnoreRequest
|
||||
from scrapy.http import Response
|
||||
from scrapy.settings import BaseSettings
|
||||
|
||||
if TYPE_CHECKING:
|
||||
# typing.Self requires Python 3.11
|
||||
from typing_extensions import Self
|
||||
|
||||
from scrapy import Spider
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.http import Response
|
||||
from scrapy.settings import BaseSettings
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
@ -12,10 +12,8 @@ import warnings
|
||||
from typing import TYPE_CHECKING, Any, AsyncIterable, Iterable, Set
|
||||
|
||||
from scrapy import Spider, signals
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.exceptions import ScrapyDeprecationWarning
|
||||
from scrapy.http import Request, Response
|
||||
from scrapy.statscollectors import StatsCollector
|
||||
from scrapy.utils.httpobj import urlparse_cached
|
||||
|
||||
warnings.warn(
|
||||
@ -28,6 +26,10 @@ if TYPE_CHECKING:
|
||||
# typing.Self requires Python 3.11
|
||||
from typing_extensions import Self
|
||||
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.statscollectors import StatsCollector
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
@ -23,10 +23,8 @@ from urllib.parse import urlparse
|
||||
from w3lib.url import safe_url_string
|
||||
|
||||
from scrapy import Spider, signals
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.exceptions import NotConfigured
|
||||
from scrapy.http import Request, Response
|
||||
from scrapy.settings import BaseSettings
|
||||
from scrapy.utils.misc import load_object
|
||||
from scrapy.utils.python import to_unicode
|
||||
from scrapy.utils.url import strip_url
|
||||
@ -35,6 +33,10 @@ if TYPE_CHECKING:
|
||||
# typing.Self requires Python 3.11
|
||||
from typing_extensions import Self
|
||||
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.settings import BaseSettings
|
||||
|
||||
|
||||
LOCAL_SCHEMES: Tuple[str, ...] = (
|
||||
"about",
|
||||
"blob",
|
||||
|
@ -9,15 +9,17 @@ from __future__ import annotations
|
||||
import logging
|
||||
from typing import TYPE_CHECKING, Any, AsyncIterable, Iterable
|
||||
|
||||
from scrapy import Spider
|
||||
from scrapy.exceptions import NotConfigured
|
||||
from scrapy.http import Request, Response
|
||||
from scrapy.settings import BaseSettings
|
||||
|
||||
if TYPE_CHECKING:
|
||||
# typing.Self requires Python 3.11
|
||||
from typing_extensions import Self
|
||||
|
||||
from scrapy import Spider
|
||||
from scrapy.settings import BaseSettings
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
@ -1,10 +1,14 @@
|
||||
from typing import Any, Iterable, Optional, cast
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Any, Iterable, Optional, cast
|
||||
|
||||
from scrapy import Request
|
||||
from scrapy.http import Response
|
||||
from scrapy.spiders import Spider
|
||||
from scrapy.utils.spider import iterate_spider_output
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from scrapy.http import Response
|
||||
|
||||
|
||||
class InitSpider(Spider):
|
||||
"""Base Spider with initialization facilities"""
|
||||
|
@ -6,20 +6,22 @@ from __future__ import annotations
|
||||
|
||||
import marshal
|
||||
import pickle # nosec
|
||||
from os import PathLike
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING, Any, Callable, Optional, Type, Union
|
||||
|
||||
from queuelib import queue
|
||||
|
||||
from scrapy import Request
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.utils.request import request_from_dict
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from os import PathLike
|
||||
|
||||
# typing.Self requires Python 3.11
|
||||
from typing_extensions import Self
|
||||
|
||||
from scrapy import Request
|
||||
from scrapy.crawler import Crawler
|
||||
|
||||
|
||||
def _with_mkdir(queue_class: Type[queue.BaseQueue]) -> Type[queue.BaseQueue]:
|
||||
class DirectoriesCreated(queue_class): # type: ignore[valid-type,misc]
|
||||
|
@ -2,15 +2,17 @@
|
||||
Scrapy extension for collecting scraping stats
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import pprint
|
||||
from typing import TYPE_CHECKING, Any, Dict, Optional
|
||||
|
||||
from scrapy import Spider
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from scrapy import Spider
|
||||
from scrapy.crawler import Crawler
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@ -18,7 +20,7 @@ StatsT = Dict[str, Any]
|
||||
|
||||
|
||||
class StatsCollector:
|
||||
def __init__(self, crawler: "Crawler"):
|
||||
def __init__(self, crawler: Crawler):
|
||||
self._dump: bool = crawler.settings.getbool("STATS_DUMP")
|
||||
self._stats: StatsT = {}
|
||||
|
||||
@ -67,7 +69,7 @@ class StatsCollector:
|
||||
|
||||
|
||||
class MemoryStatsCollector(StatsCollector):
|
||||
def __init__(self, crawler: "Crawler"):
|
||||
def __init__(self, crawler: Crawler):
|
||||
super().__init__(crawler)
|
||||
self.spider_stats: Dict[str, StatsT] = {}
|
||||
|
||||
|
@ -4,8 +4,8 @@ import warnings
|
||||
from functools import wraps
|
||||
from typing import TYPE_CHECKING, Any, Callable, TypeVar
|
||||
|
||||
from twisted.internet import defer, threads
|
||||
from twisted.internet.defer import Deferred
|
||||
from twisted.internet.defer import Deferred, maybeDeferred
|
||||
from twisted.internet.threads import deferToThread
|
||||
|
||||
from scrapy.exceptions import ScrapyDeprecationWarning
|
||||
|
||||
@ -48,7 +48,7 @@ def defers(func: Callable[_P, _T]) -> Callable[_P, Deferred[_T]]:
|
||||
|
||||
@wraps(func)
|
||||
def wrapped(*a: _P.args, **kw: _P.kwargs) -> Deferred[_T]:
|
||||
return defer.maybeDeferred(func, *a, **kw)
|
||||
return maybeDeferred(func, *a, **kw)
|
||||
|
||||
return wrapped
|
||||
|
||||
@ -60,6 +60,6 @@ def inthread(func: Callable[_P, _T]) -> Callable[_P, Deferred[_T]]:
|
||||
|
||||
@wraps(func)
|
||||
def wrapped(*a: _P.args, **kw: _P.kwargs) -> Deferred[_T]:
|
||||
return threads.deferToThread(func, *a, **kw)
|
||||
return deferToThread(func, *a, **kw)
|
||||
|
||||
return wrapped
|
||||
|
@ -34,12 +34,13 @@ from twisted.internet import defer
|
||||
from twisted.internet.defer import Deferred, DeferredList, ensureDeferred
|
||||
from twisted.internet.task import Cooperator
|
||||
from twisted.python import failure
|
||||
from twisted.python.failure import Failure
|
||||
|
||||
from scrapy.exceptions import IgnoreRequest, ScrapyDeprecationWarning
|
||||
from scrapy.utils.reactor import _get_asyncio_event_loop, is_asyncio_reactor_installed
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from twisted.python.failure import Failure
|
||||
|
||||
# typing.Concatenate and typing.ParamSpec require Python 3.10
|
||||
from typing_extensions import Concatenate, ParamSpec
|
||||
|
||||
|
@ -4,9 +4,10 @@ from __future__ import annotations
|
||||
|
||||
# used in global tests code
|
||||
from time import time # noqa: F401
|
||||
from typing import Any, List, Tuple
|
||||
from typing import TYPE_CHECKING, Any, List, Tuple
|
||||
|
||||
from scrapy.core.engine import ExecutionEngine
|
||||
if TYPE_CHECKING:
|
||||
from scrapy.core.engine import ExecutionEngine
|
||||
|
||||
|
||||
def get_engine_status(engine: ExecutionEngine) -> List[Tuple[str, Any]]:
|
||||
|
@ -1,11 +1,15 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import struct
|
||||
from gzip import GzipFile
|
||||
from io import BytesIO
|
||||
|
||||
from scrapy.http import Response
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from ._compression import _CHUNK_SIZE, _DecompressionMaxSizeExceeded
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from scrapy.http import Response
|
||||
|
||||
|
||||
def gunzip(data: bytes, *, max_size: int = 0) -> bytes:
|
||||
"""Gunzip the given data and return as much data as possible.
|
||||
|
@ -1,12 +1,16 @@
|
||||
"""Helper functions for scrapy.http objects (Request, Response)"""
|
||||
|
||||
from typing import Union
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Union
|
||||
from urllib.parse import ParseResult, urlparse
|
||||
from weakref import WeakKeyDictionary
|
||||
|
||||
from scrapy.http import Request, Response
|
||||
if TYPE_CHECKING:
|
||||
from scrapy.http import Request, Response
|
||||
|
||||
_urlparse_cache: "WeakKeyDictionary[Union[Request, Response], ParseResult]" = (
|
||||
|
||||
_urlparse_cache: WeakKeyDictionary[Union[Request, Response], ParseResult] = (
|
||||
WeakKeyDictionary()
|
||||
)
|
||||
|
||||
|
@ -1,7 +1,10 @@
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
from __future__ import annotations
|
||||
|
||||
from scrapy.settings import BaseSettings
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING, Optional
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from scrapy.settings import BaseSettings
|
||||
|
||||
|
||||
def job_dir(settings: BaseSettings) -> Optional[str]:
|
||||
|
@ -21,12 +21,13 @@ from twisted.python import log as twisted_log
|
||||
from twisted.python.failure import Failure
|
||||
|
||||
import scrapy
|
||||
from scrapy.logformatter import LogFormatterResult
|
||||
from scrapy.settings import Settings, _SettingsKeyT
|
||||
from scrapy.utils.versions import scrapy_components_versions
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.logformatter import LogFormatterResult
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -13,7 +13,6 @@ from contextlib import contextmanager
|
||||
from functools import partial
|
||||
from importlib import import_module
|
||||
from pkgutil import iter_modules
|
||||
from types import ModuleType
|
||||
from typing import (
|
||||
IO,
|
||||
TYPE_CHECKING,
|
||||
@ -35,10 +34,13 @@ from scrapy.item import Item
|
||||
from scrapy.utils.datatypes import LocalWeakReferencedCache
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from types import ModuleType
|
||||
|
||||
from scrapy import Spider
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.settings import BaseSettings
|
||||
|
||||
|
||||
_ITERABLE_SINGLE_VALUES = dict, Item, str, bytes
|
||||
T = TypeVar("T")
|
||||
|
||||
|
@ -1,7 +1,8 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import warnings
|
||||
from importlib import import_module
|
||||
from os import PathLike
|
||||
from pathlib import Path
|
||||
from typing import Union
|
||||
|
||||
@ -46,7 +47,7 @@ def project_data_dir(project: str = "default") -> str:
|
||||
return str(d)
|
||||
|
||||
|
||||
def data_path(path: Union[str, PathLike], createdir: bool = False) -> str:
|
||||
def data_path(path: Union[str, os.PathLike[str]], createdir: bool = False) -> str:
|
||||
"""
|
||||
Return the given path joined with the .scrapy data directory.
|
||||
If given an absolute path, return it unmodified.
|
||||
|
@ -2,7 +2,6 @@ from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import sys
|
||||
from asyncio import AbstractEventLoop, AbstractEventLoopPolicy
|
||||
from contextlib import suppress
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
@ -20,13 +19,16 @@ from warnings import catch_warnings, filterwarnings, warn
|
||||
|
||||
from twisted.internet import asyncioreactor, error
|
||||
from twisted.internet.base import DelayedCall
|
||||
from twisted.internet.protocol import ServerFactory
|
||||
from twisted.internet.tcp import Port
|
||||
|
||||
from scrapy.exceptions import ScrapyDeprecationWarning
|
||||
from scrapy.utils.misc import load_object
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from asyncio import AbstractEventLoop, AbstractEventLoopPolicy
|
||||
|
||||
from twisted.internet.protocol import ServerFactory
|
||||
from twisted.internet.tcp import Port
|
||||
|
||||
# typing.ParamSpec requires Python 3.10
|
||||
from typing_extensions import ParamSpec
|
||||
|
||||
|
@ -2,7 +2,6 @@ from __future__ import annotations
|
||||
|
||||
import inspect
|
||||
import logging
|
||||
from types import CoroutineType, ModuleType
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
@ -16,16 +15,19 @@ from typing import (
|
||||
overload,
|
||||
)
|
||||
|
||||
from twisted.internet.defer import Deferred
|
||||
|
||||
from scrapy import Request
|
||||
from scrapy.spiders import Spider
|
||||
from scrapy.utils.defer import deferred_from_coro
|
||||
from scrapy.utils.misc import arg_to_iter
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from types import CoroutineType, ModuleType
|
||||
|
||||
from twisted.internet.defer import Deferred
|
||||
|
||||
from scrapy import Request
|
||||
from scrapy.spiderloader import SpiderLoader
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_T = TypeVar("_T")
|
||||
|
@ -1,12 +1,16 @@
|
||||
from typing import Any, Optional
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Any, Optional
|
||||
|
||||
import OpenSSL._util as pyOpenSSLutil
|
||||
import OpenSSL.SSL
|
||||
import OpenSSL.version
|
||||
from OpenSSL.crypto import X509Name
|
||||
|
||||
from scrapy.utils.python import to_unicode
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from OpenSSL.crypto import X509Name
|
||||
|
||||
|
||||
def ffi_buf_to_string(buf: Any) -> str:
|
||||
return to_unicode(pyOpenSSLutil.ffi.string(buf))
|
||||
|
@ -1,10 +1,14 @@
|
||||
"""Helper functions for working with templates"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import string
|
||||
from os import PathLike
|
||||
from pathlib import Path
|
||||
from typing import Any, Union
|
||||
from typing import TYPE_CHECKING, Any, Union
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from os import PathLike
|
||||
|
||||
|
||||
def render_templatefile(path: Union[str, PathLike], **kwargs: Any) -> None:
|
||||
|
@ -2,21 +2,36 @@
|
||||
This module contains some assorted functions used in tests
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
from importlib import import_module
|
||||
from pathlib import Path
|
||||
from posixpath import split
|
||||
from typing import Any, Awaitable, Dict, List, Optional, Tuple, Type, TypeVar
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Awaitable,
|
||||
Dict,
|
||||
List,
|
||||
Optional,
|
||||
Tuple,
|
||||
Type,
|
||||
TypeVar,
|
||||
)
|
||||
from unittest import TestCase, mock
|
||||
|
||||
from twisted.internet.defer import Deferred
|
||||
from twisted.trial.unittest import SkipTest
|
||||
|
||||
from scrapy import Spider
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.utils.boto import is_botocore_available
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from twisted.internet.defer import Deferred
|
||||
|
||||
|
||||
_T = TypeVar("_T")
|
||||
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user