mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-06 10:24:24 +00:00
Add from_crawler() to components that only had from_settings().
This commit is contained in:
parent
f796d8780c
commit
499e7e8aa6
@ -27,13 +27,13 @@ the standard ``__init__`` method:
|
||||
|
||||
mailer = MailSender()
|
||||
|
||||
Or you can instantiate it passing a Scrapy settings object, which will respect
|
||||
the :ref:`settings <topics-email-settings>`:
|
||||
Or you can instantiate it passing a :class:`scrapy.Crawler` instance, which
|
||||
will respect the :ref:`settings <topics-email-settings>`:
|
||||
|
||||
.. skip: start
|
||||
.. code-block:: python
|
||||
|
||||
mailer = MailSender.from_settings(settings)
|
||||
mailer = MailSender.from_crawler(crawler)
|
||||
|
||||
And here is how to use it to send an e-mail (without attachments):
|
||||
|
||||
@ -81,13 +81,13 @@ rest of the framework.
|
||||
:param smtpssl: enforce using a secure SSL connection
|
||||
:type smtpssl: bool
|
||||
|
||||
.. classmethod:: from_settings(settings)
|
||||
.. classmethod:: from_crawler(crawler)
|
||||
|
||||
Instantiate using a Scrapy settings object, which will respect
|
||||
:ref:`these Scrapy settings <topics-email-settings>`.
|
||||
Instantiate using a :class:`scrapy.Crawler` instance, which will
|
||||
respect :ref:`these Scrapy settings <topics-email-settings>`.
|
||||
|
||||
:param settings: the e-mail recipients
|
||||
:type settings: :class:`scrapy.settings.Settings` object
|
||||
:param crawler: the crawler
|
||||
:type settings: :class:`scrapy.Crawler` object
|
||||
|
||||
.. method:: send(to, subject, body, cc=None, attachs=(), mimetype='text/plain', charset=None)
|
||||
|
||||
|
@ -488,7 +488,7 @@ A request fingerprinter is a class that must implement the following method:
|
||||
:param request: request to fingerprint
|
||||
:type request: scrapy.http.Request
|
||||
|
||||
Additionally, it may also implement the following methods:
|
||||
Additionally, it may also implement the following method:
|
||||
|
||||
.. classmethod:: from_crawler(cls, crawler)
|
||||
:noindex:
|
||||
@ -504,13 +504,6 @@ Additionally, it may also implement the following methods:
|
||||
:param crawler: crawler that uses this request fingerprinter
|
||||
:type crawler: :class:`~scrapy.crawler.Crawler` object
|
||||
|
||||
.. classmethod:: from_settings(cls, settings)
|
||||
|
||||
If present, and ``from_crawler`` is not defined, this class method is called
|
||||
to create a request fingerprinter instance from a
|
||||
:class:`~scrapy.settings.Settings` object. It must return a new instance of
|
||||
the request fingerprinter.
|
||||
|
||||
.. currentmodule:: scrapy.http
|
||||
|
||||
The :meth:`fingerprint` method of the default request fingerprinter,
|
||||
|
@ -21,6 +21,7 @@ from scrapy.core.downloader.tls import (
|
||||
ScrapyClientTLSOptions,
|
||||
openssl_methods,
|
||||
)
|
||||
from scrapy.exceptions import ScrapyDeprecationWarning
|
||||
from scrapy.utils.misc import build_from_crawler, load_object
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@ -69,6 +70,31 @@ class ScrapyClientContextFactory(BrowserLikePolicyForHTTPS):
|
||||
method: int = SSL.SSLv23_METHOD,
|
||||
*args: Any,
|
||||
**kwargs: Any,
|
||||
) -> Self:
|
||||
warnings.warn(
|
||||
f"{cls.__name__}.from_settings() is deprecated, use from_crawler() instead.",
|
||||
category=ScrapyDeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
return cls._from_settings(settings, method, *args, **kwargs)
|
||||
|
||||
@classmethod
|
||||
def from_crawler(
|
||||
cls,
|
||||
crawler: Crawler,
|
||||
method: int = SSL.SSLv23_METHOD,
|
||||
*args: Any,
|
||||
**kwargs: Any,
|
||||
) -> Self:
|
||||
return cls._from_settings(crawler.settings, method, *args, **kwargs)
|
||||
|
||||
@classmethod
|
||||
def _from_settings(
|
||||
cls,
|
||||
settings: BaseSettings,
|
||||
method: int = SSL.SSLv23_METHOD,
|
||||
*args: Any,
|
||||
**kwargs: Any,
|
||||
) -> Self:
|
||||
tls_verbose_logging: bool = settings.getbool(
|
||||
"DOWNLOADER_CLIENT_TLS_VERBOSE_LOGGING"
|
||||
|
@ -1,9 +1,11 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import warnings
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from scrapy.exceptions import ScrapyDeprecationWarning
|
||||
from scrapy.utils.job import job_dir
|
||||
from scrapy.utils.request import (
|
||||
RequestFingerprinter,
|
||||
@ -26,6 +28,15 @@ if TYPE_CHECKING:
|
||||
class BaseDupeFilter:
|
||||
@classmethod
|
||||
def from_settings(cls, settings: BaseSettings) -> Self:
|
||||
warnings.warn(
|
||||
f"{cls.__name__}.from_settings() is deprecated, use from_crawler() instead.",
|
||||
category=ScrapyDeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
return cls()
|
||||
|
||||
@classmethod
|
||||
def from_crawler(cls, crawler: Crawler) -> Self:
|
||||
return cls()
|
||||
|
||||
def request_seen(self, request: Request) -> bool:
|
||||
@ -72,17 +83,31 @@ class RFPDupeFilter(BaseDupeFilter):
|
||||
*,
|
||||
fingerprinter: RequestFingerprinterProtocol | None = None,
|
||||
) -> Self:
|
||||
debug = settings.getbool("DUPEFILTER_DEBUG")
|
||||
return cls(job_dir(settings), debug, fingerprinter=fingerprinter)
|
||||
warnings.warn(
|
||||
f"{cls.__name__}.from_settings() is deprecated, use from_crawler() instead.",
|
||||
category=ScrapyDeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
return cls._from_settings(settings, fingerprinter=fingerprinter)
|
||||
|
||||
@classmethod
|
||||
def from_crawler(cls, crawler: Crawler) -> Self:
|
||||
assert crawler.request_fingerprinter
|
||||
return cls.from_settings(
|
||||
return cls._from_settings(
|
||||
crawler.settings,
|
||||
fingerprinter=crawler.request_fingerprinter,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def _from_settings(
|
||||
cls,
|
||||
settings: BaseSettings,
|
||||
*,
|
||||
fingerprinter: RequestFingerprinterProtocol | None = None,
|
||||
) -> Self:
|
||||
debug = settings.getbool("DUPEFILTER_DEBUG")
|
||||
return cls(job_dir(settings), debug, fingerprinter=fingerprinter)
|
||||
|
||||
def request_seen(self, request: Request) -> bool:
|
||||
fp = self.request_fingerprint(request)
|
||||
if fp in self.fingerprints:
|
||||
|
@ -48,7 +48,7 @@ class MemoryUsage:
|
||||
self.check_interval: float = crawler.settings.getfloat(
|
||||
"MEMUSAGE_CHECK_INTERVAL_SECONDS"
|
||||
)
|
||||
self.mail: MailSender = MailSender.from_settings(crawler.settings)
|
||||
self.mail: MailSender = MailSender.from_crawler(crawler)
|
||||
crawler.signals.connect(self.engine_started, signal=signals.engine_started)
|
||||
crawler.signals.connect(self.engine_stopped, signal=signals.engine_stopped)
|
||||
|
||||
|
@ -33,7 +33,7 @@ class StatsMailer:
|
||||
recipients: list[str] = crawler.settings.getlist("STATSMAILER_RCPTS")
|
||||
if not recipients:
|
||||
raise NotConfigured
|
||||
mail: MailSender = MailSender.from_settings(crawler.settings)
|
||||
mail: MailSender = MailSender.from_crawler(crawler)
|
||||
assert crawler.stats
|
||||
o = cls(crawler.stats, recipients, mail)
|
||||
crawler.signals.connect(o.spider_closed, signal=signals.spider_closed)
|
||||
|
@ -7,6 +7,7 @@ See documentation in docs/topics/email.rst
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import warnings
|
||||
from email import encoders as Encoders
|
||||
from email.mime.base import MIMEBase
|
||||
from email.mime.multipart import MIMEMultipart
|
||||
@ -19,6 +20,7 @@ from typing import IO, TYPE_CHECKING, Any
|
||||
from twisted.internet import ssl
|
||||
from twisted.internet.defer import Deferred
|
||||
|
||||
from scrapy.exceptions import ScrapyDeprecationWarning
|
||||
from scrapy.utils.misc import arg_to_iter
|
||||
from scrapy.utils.python import to_bytes
|
||||
|
||||
@ -32,6 +34,7 @@ if TYPE_CHECKING:
|
||||
# typing.Self requires Python 3.11
|
||||
from typing_extensions import Self
|
||||
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.settings import BaseSettings
|
||||
|
||||
|
||||
@ -72,6 +75,19 @@ class MailSender:
|
||||
|
||||
@classmethod
|
||||
def from_settings(cls, settings: BaseSettings) -> Self:
|
||||
warnings.warn(
|
||||
f"{cls.__name__}.from_settings() is deprecated, use from_crawler() instead.",
|
||||
category=ScrapyDeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
return cls._from_settings(settings)
|
||||
|
||||
@classmethod
|
||||
def from_crawler(cls, crawler: Crawler) -> Self:
|
||||
return cls._from_settings(crawler.settings)
|
||||
|
||||
@classmethod
|
||||
def _from_settings(cls, settings: BaseSettings) -> Self:
|
||||
return cls(
|
||||
smtphost=settings["MAIL_HOST"],
|
||||
mailfrom=settings["MAIL_FROM"],
|
||||
|
@ -7,9 +7,10 @@ See documentation in docs/topics/spider-middleware.rst
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import warnings
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
from scrapy.exceptions import NotConfigured
|
||||
from scrapy.exceptions import NotConfigured, ScrapyDeprecationWarning
|
||||
from scrapy.http import Request, Response
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@ -19,6 +20,7 @@ if TYPE_CHECKING:
|
||||
from typing_extensions import Self
|
||||
|
||||
from scrapy import Spider
|
||||
from scrapy.crawler import Crawler
|
||||
from scrapy.settings import BaseSettings
|
||||
|
||||
|
||||
@ -31,6 +33,19 @@ class UrlLengthMiddleware:
|
||||
|
||||
@classmethod
|
||||
def from_settings(cls, settings: BaseSettings) -> Self:
|
||||
warnings.warn(
|
||||
f"{cls.__name__}.from_settings() is deprecated, use from_crawler() instead.",
|
||||
category=ScrapyDeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
return cls._from_settings(settings)
|
||||
|
||||
@classmethod
|
||||
def from_crawler(cls, crawler: Crawler) -> Self:
|
||||
return cls._from_settings(crawler.settings)
|
||||
|
||||
@classmethod
|
||||
def _from_settings(cls, settings: BaseSettings) -> Self:
|
||||
maxlength = settings.getint("URLLENGTH_LIMIT")
|
||||
if not maxlength:
|
||||
raise NotConfigured
|
||||
|
@ -33,14 +33,6 @@ class FromCrawlerRFPDupeFilter(RFPDupeFilter):
|
||||
return df
|
||||
|
||||
|
||||
class FromSettingsRFPDupeFilter(RFPDupeFilter):
|
||||
@classmethod
|
||||
def from_settings(cls, settings, *, fingerprinter=None):
|
||||
df = super().from_settings(settings, fingerprinter=fingerprinter)
|
||||
df.method = "from_settings"
|
||||
return df
|
||||
|
||||
|
||||
class DirectDupeFilter:
|
||||
method = "n/a"
|
||||
|
||||
@ -56,16 +48,6 @@ class RFPDupeFilterTest(unittest.TestCase):
|
||||
self.assertTrue(scheduler.df.debug)
|
||||
self.assertEqual(scheduler.df.method, "from_crawler")
|
||||
|
||||
def test_df_from_settings_scheduler(self):
|
||||
settings = {
|
||||
"DUPEFILTER_DEBUG": True,
|
||||
"DUPEFILTER_CLASS": FromSettingsRFPDupeFilter,
|
||||
}
|
||||
crawler = get_crawler(settings_dict=settings)
|
||||
scheduler = Scheduler.from_crawler(crawler)
|
||||
self.assertTrue(scheduler.df.debug)
|
||||
self.assertEqual(scheduler.df.method, "from_settings")
|
||||
|
||||
def test_df_direct_scheduler(self):
|
||||
settings = {
|
||||
"DUPEFILTER_CLASS": DirectDupeFilter,
|
||||
|
@ -3,7 +3,6 @@ from unittest import TestCase
|
||||
from testfixtures import LogCapture
|
||||
|
||||
from scrapy.http import Request, Response
|
||||
from scrapy.settings import Settings
|
||||
from scrapy.spidermiddlewares.urllength import UrlLengthMiddleware
|
||||
from scrapy.spiders import Spider
|
||||
from scrapy.utils.test import get_crawler
|
||||
@ -12,12 +11,10 @@ from scrapy.utils.test import get_crawler
|
||||
class TestUrlLengthMiddleware(TestCase):
|
||||
def setUp(self):
|
||||
self.maxlength = 25
|
||||
settings = Settings({"URLLENGTH_LIMIT": self.maxlength})
|
||||
|
||||
crawler = get_crawler(Spider)
|
||||
crawler = get_crawler(Spider, {"URLLENGTH_LIMIT": self.maxlength})
|
||||
self.spider = crawler._create_spider("foo")
|
||||
self.stats = crawler.stats
|
||||
self.mw = UrlLengthMiddleware.from_settings(settings)
|
||||
self.mw = UrlLengthMiddleware.from_crawler(crawler)
|
||||
|
||||
self.response = Response("http://scrapytest.org")
|
||||
self.short_url_req = Request("http://scrapytest.org/")
|
||||
|
Loading…
x
Reference in New Issue
Block a user