1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-06 10:24:24 +00:00

Add from_crawler() to components that only had from_settings().

This commit is contained in:
Andrey Rakhmatullin 2024-11-12 21:12:32 +05:00
parent f796d8780c
commit 499e7e8aa6
10 changed files with 99 additions and 45 deletions

View File

@ -27,13 +27,13 @@ the standard ``__init__`` method:
mailer = MailSender()
Or you can instantiate it passing a Scrapy settings object, which will respect
the :ref:`settings <topics-email-settings>`:
Or you can instantiate it passing a :class:`scrapy.Crawler` instance, which
will respect the :ref:`settings <topics-email-settings>`:
.. skip: start
.. code-block:: python
mailer = MailSender.from_settings(settings)
mailer = MailSender.from_crawler(crawler)
And here is how to use it to send an e-mail (without attachments):
@ -81,13 +81,13 @@ rest of the framework.
:param smtpssl: enforce using a secure SSL connection
:type smtpssl: bool
.. classmethod:: from_settings(settings)
.. classmethod:: from_crawler(crawler)
Instantiate using a Scrapy settings object, which will respect
:ref:`these Scrapy settings <topics-email-settings>`.
Instantiate using a :class:`scrapy.Crawler` instance, which will
respect :ref:`these Scrapy settings <topics-email-settings>`.
:param settings: the e-mail recipients
:type settings: :class:`scrapy.settings.Settings` object
:param crawler: the crawler
:type settings: :class:`scrapy.Crawler` object
.. method:: send(to, subject, body, cc=None, attachs=(), mimetype='text/plain', charset=None)

View File

@ -488,7 +488,7 @@ A request fingerprinter is a class that must implement the following method:
:param request: request to fingerprint
:type request: scrapy.http.Request
Additionally, it may also implement the following methods:
Additionally, it may also implement the following method:
.. classmethod:: from_crawler(cls, crawler)
:noindex:
@ -504,13 +504,6 @@ Additionally, it may also implement the following methods:
:param crawler: crawler that uses this request fingerprinter
:type crawler: :class:`~scrapy.crawler.Crawler` object
.. classmethod:: from_settings(cls, settings)
If present, and ``from_crawler`` is not defined, this class method is called
to create a request fingerprinter instance from a
:class:`~scrapy.settings.Settings` object. It must return a new instance of
the request fingerprinter.
.. currentmodule:: scrapy.http
The :meth:`fingerprint` method of the default request fingerprinter,

View File

@ -21,6 +21,7 @@ from scrapy.core.downloader.tls import (
ScrapyClientTLSOptions,
openssl_methods,
)
from scrapy.exceptions import ScrapyDeprecationWarning
from scrapy.utils.misc import build_from_crawler, load_object
if TYPE_CHECKING:
@ -69,6 +70,31 @@ class ScrapyClientContextFactory(BrowserLikePolicyForHTTPS):
method: int = SSL.SSLv23_METHOD,
*args: Any,
**kwargs: Any,
) -> Self:
warnings.warn(
f"{cls.__name__}.from_settings() is deprecated, use from_crawler() instead.",
category=ScrapyDeprecationWarning,
stacklevel=2,
)
return cls._from_settings(settings, method, *args, **kwargs)
@classmethod
def from_crawler(
cls,
crawler: Crawler,
method: int = SSL.SSLv23_METHOD,
*args: Any,
**kwargs: Any,
) -> Self:
return cls._from_settings(crawler.settings, method, *args, **kwargs)
@classmethod
def _from_settings(
cls,
settings: BaseSettings,
method: int = SSL.SSLv23_METHOD,
*args: Any,
**kwargs: Any,
) -> Self:
tls_verbose_logging: bool = settings.getbool(
"DOWNLOADER_CLIENT_TLS_VERBOSE_LOGGING"

View File

@ -1,9 +1,11 @@
from __future__ import annotations
import logging
import warnings
from pathlib import Path
from typing import TYPE_CHECKING
from scrapy.exceptions import ScrapyDeprecationWarning
from scrapy.utils.job import job_dir
from scrapy.utils.request import (
RequestFingerprinter,
@ -26,6 +28,15 @@ if TYPE_CHECKING:
class BaseDupeFilter:
@classmethod
def from_settings(cls, settings: BaseSettings) -> Self:
warnings.warn(
f"{cls.__name__}.from_settings() is deprecated, use from_crawler() instead.",
category=ScrapyDeprecationWarning,
stacklevel=2,
)
return cls()
@classmethod
def from_crawler(cls, crawler: Crawler) -> Self:
return cls()
def request_seen(self, request: Request) -> bool:
@ -72,17 +83,31 @@ class RFPDupeFilter(BaseDupeFilter):
*,
fingerprinter: RequestFingerprinterProtocol | None = None,
) -> Self:
debug = settings.getbool("DUPEFILTER_DEBUG")
return cls(job_dir(settings), debug, fingerprinter=fingerprinter)
warnings.warn(
f"{cls.__name__}.from_settings() is deprecated, use from_crawler() instead.",
category=ScrapyDeprecationWarning,
stacklevel=2,
)
return cls._from_settings(settings, fingerprinter=fingerprinter)
@classmethod
def from_crawler(cls, crawler: Crawler) -> Self:
assert crawler.request_fingerprinter
return cls.from_settings(
return cls._from_settings(
crawler.settings,
fingerprinter=crawler.request_fingerprinter,
)
@classmethod
def _from_settings(
cls,
settings: BaseSettings,
*,
fingerprinter: RequestFingerprinterProtocol | None = None,
) -> Self:
debug = settings.getbool("DUPEFILTER_DEBUG")
return cls(job_dir(settings), debug, fingerprinter=fingerprinter)
def request_seen(self, request: Request) -> bool:
fp = self.request_fingerprint(request)
if fp in self.fingerprints:

View File

@ -48,7 +48,7 @@ class MemoryUsage:
self.check_interval: float = crawler.settings.getfloat(
"MEMUSAGE_CHECK_INTERVAL_SECONDS"
)
self.mail: MailSender = MailSender.from_settings(crawler.settings)
self.mail: MailSender = MailSender.from_crawler(crawler)
crawler.signals.connect(self.engine_started, signal=signals.engine_started)
crawler.signals.connect(self.engine_stopped, signal=signals.engine_stopped)

View File

@ -33,7 +33,7 @@ class StatsMailer:
recipients: list[str] = crawler.settings.getlist("STATSMAILER_RCPTS")
if not recipients:
raise NotConfigured
mail: MailSender = MailSender.from_settings(crawler.settings)
mail: MailSender = MailSender.from_crawler(crawler)
assert crawler.stats
o = cls(crawler.stats, recipients, mail)
crawler.signals.connect(o.spider_closed, signal=signals.spider_closed)

View File

@ -7,6 +7,7 @@ See documentation in docs/topics/email.rst
from __future__ import annotations
import logging
import warnings
from email import encoders as Encoders
from email.mime.base import MIMEBase
from email.mime.multipart import MIMEMultipart
@ -19,6 +20,7 @@ from typing import IO, TYPE_CHECKING, Any
from twisted.internet import ssl
from twisted.internet.defer import Deferred
from scrapy.exceptions import ScrapyDeprecationWarning
from scrapy.utils.misc import arg_to_iter
from scrapy.utils.python import to_bytes
@ -32,6 +34,7 @@ if TYPE_CHECKING:
# typing.Self requires Python 3.11
from typing_extensions import Self
from scrapy.crawler import Crawler
from scrapy.settings import BaseSettings
@ -72,6 +75,19 @@ class MailSender:
@classmethod
def from_settings(cls, settings: BaseSettings) -> Self:
warnings.warn(
f"{cls.__name__}.from_settings() is deprecated, use from_crawler() instead.",
category=ScrapyDeprecationWarning,
stacklevel=2,
)
return cls._from_settings(settings)
@classmethod
def from_crawler(cls, crawler: Crawler) -> Self:
return cls._from_settings(crawler.settings)
@classmethod
def _from_settings(cls, settings: BaseSettings) -> Self:
return cls(
smtphost=settings["MAIL_HOST"],
mailfrom=settings["MAIL_FROM"],

View File

@ -7,9 +7,10 @@ See documentation in docs/topics/spider-middleware.rst
from __future__ import annotations
import logging
import warnings
from typing import TYPE_CHECKING, Any
from scrapy.exceptions import NotConfigured
from scrapy.exceptions import NotConfigured, ScrapyDeprecationWarning
from scrapy.http import Request, Response
if TYPE_CHECKING:
@ -19,6 +20,7 @@ if TYPE_CHECKING:
from typing_extensions import Self
from scrapy import Spider
from scrapy.crawler import Crawler
from scrapy.settings import BaseSettings
@ -31,6 +33,19 @@ class UrlLengthMiddleware:
@classmethod
def from_settings(cls, settings: BaseSettings) -> Self:
warnings.warn(
f"{cls.__name__}.from_settings() is deprecated, use from_crawler() instead.",
category=ScrapyDeprecationWarning,
stacklevel=2,
)
return cls._from_settings(settings)
@classmethod
def from_crawler(cls, crawler: Crawler) -> Self:
return cls._from_settings(crawler.settings)
@classmethod
def _from_settings(cls, settings: BaseSettings) -> Self:
maxlength = settings.getint("URLLENGTH_LIMIT")
if not maxlength:
raise NotConfigured

View File

@ -33,14 +33,6 @@ class FromCrawlerRFPDupeFilter(RFPDupeFilter):
return df
class FromSettingsRFPDupeFilter(RFPDupeFilter):
@classmethod
def from_settings(cls, settings, *, fingerprinter=None):
df = super().from_settings(settings, fingerprinter=fingerprinter)
df.method = "from_settings"
return df
class DirectDupeFilter:
method = "n/a"
@ -56,16 +48,6 @@ class RFPDupeFilterTest(unittest.TestCase):
self.assertTrue(scheduler.df.debug)
self.assertEqual(scheduler.df.method, "from_crawler")
def test_df_from_settings_scheduler(self):
settings = {
"DUPEFILTER_DEBUG": True,
"DUPEFILTER_CLASS": FromSettingsRFPDupeFilter,
}
crawler = get_crawler(settings_dict=settings)
scheduler = Scheduler.from_crawler(crawler)
self.assertTrue(scheduler.df.debug)
self.assertEqual(scheduler.df.method, "from_settings")
def test_df_direct_scheduler(self):
settings = {
"DUPEFILTER_CLASS": DirectDupeFilter,

View File

@ -3,7 +3,6 @@ from unittest import TestCase
from testfixtures import LogCapture
from scrapy.http import Request, Response
from scrapy.settings import Settings
from scrapy.spidermiddlewares.urllength import UrlLengthMiddleware
from scrapy.spiders import Spider
from scrapy.utils.test import get_crawler
@ -12,12 +11,10 @@ from scrapy.utils.test import get_crawler
class TestUrlLengthMiddleware(TestCase):
def setUp(self):
self.maxlength = 25
settings = Settings({"URLLENGTH_LIMIT": self.maxlength})
crawler = get_crawler(Spider)
crawler = get_crawler(Spider, {"URLLENGTH_LIMIT": self.maxlength})
self.spider = crawler._create_spider("foo")
self.stats = crawler.stats
self.mw = UrlLengthMiddleware.from_settings(settings)
self.mw = UrlLengthMiddleware.from_crawler(crawler)
self.response = Response("http://scrapytest.org")
self.short_url_req = Request("http://scrapytest.org/")