1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-21 04:53:19 +00:00

Downloader handlers: crawler=None in __init__

This commit is contained in:
Eugenio Lacuesta 2019-12-23 10:40:16 -03:00
parent 8a567e98bb
commit a6ec89251e
No known key found for this signature in database
GPG Key ID: DA3EF2D0913E9810
5 changed files with 23 additions and 24 deletions

View File

@ -63,7 +63,7 @@ class ReceivedDataProtocol(Protocol):
_CODE_RE = re.compile(r"\d+")
class FTPDownloadHandler(object):
class FTPDownloadHandler:
lazy = False
CODE_MAPPING = {
@ -71,14 +71,14 @@ class FTPDownloadHandler(object):
"default": 503,
}
def __init__(self, crawler):
self.default_user = crawler.settings['FTP_USER']
self.default_password = crawler.settings['FTP_PASSWORD']
self.passive_mode = crawler.settings['FTP_PASSIVE_MODE']
def __init__(self, settings, crawler=None):
self.default_user = settings['FTP_USER']
self.default_password = settings['FTP_PASSWORD']
self.passive_mode = settings['FTP_PASSIVE_MODE']
@classmethod
def from_crawler(cls, crawler):
return cls(crawler)
return cls(crawler.settings, crawler)
def download_request(self, request, spider):
parsed_url = urlparse_cached(request)

View File

@ -6,18 +6,18 @@ from scrapy.utils.misc import load_object, create_instance
from scrapy.utils.python import to_unicode
class HTTP10DownloadHandler(object):
class HTTP10DownloadHandler:
lazy = False
def __init__(self, crawler):
def __init__(self, settings, crawler=None):
self.HTTPClientFactory = load_object(crawler.settings['DOWNLOADER_HTTPCLIENTFACTORY'])
self.ClientContextFactory = load_object(crawler.settings['DOWNLOADER_CLIENTCONTEXTFACTORY'])
self._settings = settings
self._crawler = crawler
self._settings = crawler.settings
@classmethod
def from_crawler(cls, crawler):
return cls(crawler)
return cls(crawler.settings, crawler)
def download_request(self, request, spider):
"""Return a deferred for the HTTP download"""

View File

@ -28,12 +28,10 @@ from scrapy.utils.python import to_bytes, to_unicode
logger = logging.getLogger(__name__)
class HTTP11DownloadHandler(object):
class HTTP11DownloadHandler:
lazy = False
def __init__(self, crawler):
settings = crawler.settings
def __init__(self, settings, crawler=None):
self._pool = HTTPConnectionPool(reactor, persistent=True)
self._pool.maxPersistentPerHost = settings.getint('CONCURRENT_REQUESTS_PER_DOMAIN')
self._pool._factory.noisy = False
@ -68,7 +66,7 @@ class HTTP11DownloadHandler(object):
@classmethod
def from_crawler(cls, crawler):
return cls(crawler)
return cls(crawler.settings, crawler)
def download_request(self, request, spider):
"""Return a deferred for the HTTP download"""

View File

@ -4,6 +4,7 @@ from scrapy.core.downloader.handlers.http import HTTPDownloadHandler
from scrapy.exceptions import NotConfigured
from scrapy.utils.boto import is_botocore
from scrapy.utils.httpobj import urlparse_cached
from scrapy.utils.misc import create_instance
def _get_boto_connection():
@ -30,14 +31,15 @@ def _get_boto_connection():
return _S3Connection
class S3DownloadHandler(object):
class S3DownloadHandler:
def __init__(self, crawler, aws_access_key_id=None, aws_secret_access_key=None,
def __init__(self, settings, crawler=None,
aws_access_key_id=None, aws_secret_access_key=None,
httpdownloadhandler=HTTPDownloadHandler, **kw):
if not aws_access_key_id:
aws_access_key_id = crawler.settings['AWS_ACCESS_KEY_ID']
aws_access_key_id = settings['AWS_ACCESS_KEY_ID']
if not aws_secret_access_key:
aws_secret_access_key = crawler.settings['AWS_SECRET_ACCESS_KEY']
aws_secret_access_key = settings['AWS_SECRET_ACCESS_KEY']
# If no credentials could be found anywhere,
# consider this an anonymous connection request by default;
@ -66,11 +68,12 @@ class S3DownloadHandler(object):
except Exception as ex:
raise NotConfigured(str(ex))
self._download_http = httpdownloadhandler(crawler).download_request
_http_handler = create_instance(httpdownloadhandler, settings, crawler)
self._download_http = _http_handler.download_request
@classmethod
def from_crawler(cls, crawler, *args, **kwargs):
return cls(crawler, *args, **kwargs)
return cls(crawler.settings, crawler, *args, **kwargs)
def download_request(self, request, spider):
p = urlparse_cached(request)

View File

@ -751,9 +751,7 @@ class Http11ProxyTestCase(HttpProxyTestCase):
self.assertIn(domain, timeout.osError)
class HttpDownloadHandlerMock(object):
def __init__(self, settings, crawler):
pass
class HttpDownloadHandlerMock:
def download_request(self, request, spider):
return request