From 2840865746a207afe7e98c3139032df0242c2dda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Gra=C3=B1a?= Date: Wed, 25 Jan 2012 18:29:54 -0200 Subject: [PATCH 1/2] Allow overriding ClientContextFactory and enable SSL bug workarounds by default. refs #82 --- scrapy/core/downloader/handlers/http.py | 3 +-- scrapy/core/downloader/webclient.py | 17 +++++++++++++++++ scrapy/settings/default_settings.py | 1 + 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/scrapy/core/downloader/handlers/http.py b/scrapy/core/downloader/handlers/http.py index 61391c626..ff0beb321 100644 --- a/scrapy/core/downloader/handlers/http.py +++ b/scrapy/core/downloader/handlers/http.py @@ -8,10 +8,9 @@ from scrapy.conf import settings from scrapy import optional_features ssl_supported = 'ssl' in optional_features -if ssl_supported: - from twisted.internet.ssl import ClientContextFactory HTTPClientFactory = load_object(settings['DOWNLOADER_HTTPCLIENTFACTORY']) +ClientContextFactory = load_object(settings['DOWNLOADER_CLIENTCONTEXTFACTORY']) class HttpDownloadHandler(object): diff --git a/scrapy/core/downloader/webclient.py b/scrapy/core/downloader/webclient.py index 6579475a9..c76593bd6 100644 --- a/scrapy/core/downloader/webclient.py +++ b/scrapy/core/downloader/webclient.py @@ -9,6 +9,7 @@ from twisted.internet import defer from scrapy.http import Headers from scrapy.utils.httpobj import urlparse_cached from scrapy.responsetypes import responsetypes +from scrapy import optional_features def _parsed_url_args(parsed): @@ -135,3 +136,19 @@ class ScrapyHTTPClientFactory(HTTPClientFactory): def gotHeaders(self, headers): self.headers_time = time() self.response_headers = headers + + + +if 'ssl' in optional_features: + from twisted.internet.ssl import ClientContextFactory + from OpenSSL import SSL +else: + ClientContextFactory = object + + +class ScrapyClientContextFactory(ClientContextFactory): + + def getContext(self): + ctx = ClientContextFactory.getContext(self) + ctx.set_options(SSL.OP_ALL) + return ctx diff --git a/scrapy/settings/default_settings.py b/scrapy/settings/default_settings.py index 7e4363507..278ce8570 100644 --- a/scrapy/settings/default_settings.py +++ b/scrapy/settings/default_settings.py @@ -64,6 +64,7 @@ DOWNLOAD_TIMEOUT = 180 # 3mins DOWNLOADER_DEBUG = False DOWNLOADER_HTTPCLIENTFACTORY = 'scrapy.core.downloader.webclient.ScrapyHTTPClientFactory' +DOWNLOADER_CLIENTCONTEXTFACTORY = 'scrapy.core.downloader.webclient.ScrapyClientContextFactory' DOWNLOADER_MIDDLEWARES = {} From eb8e98461d0c5806bfc0fe939675fc06099855ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Gra=C3=B1a?= Date: Wed, 25 Jan 2012 19:15:59 -0200 Subject: [PATCH 2/2] Add some comments and references to github issues. closes #82 --- scrapy/core/downloader/webclient.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/scrapy/core/downloader/webclient.py b/scrapy/core/downloader/webclient.py index c76593bd6..985cc4ae5 100644 --- a/scrapy/core/downloader/webclient.py +++ b/scrapy/core/downloader/webclient.py @@ -147,8 +147,13 @@ else: class ScrapyClientContextFactory(ClientContextFactory): + "A SSL context factory which is more permissive against SSL bugs." + # see https://github.com/scrapy/scrapy/issues/82 + # and https://github.com/scrapy/scrapy/issues/26 def getContext(self): ctx = ClientContextFactory.getContext(self) + # Enable all workarounds to SSL bugs as documented by + # http://www.openssl.org/docs/ssl/SSL_CTX_set_options.html ctx.set_options(SSL.OP_ALL) return ctx