mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-25 03:43:58 +00:00
Merge pull request #1678 from lopuhin/py3-http-downloaders
[MRG+1] Py3: port http downloaders
This commit is contained in:
commit
b4fb9d3534
@ -2,6 +2,7 @@
|
||||
"""
|
||||
from twisted.internet import reactor
|
||||
from scrapy.utils.misc import load_object
|
||||
from scrapy.utils.python import to_unicode
|
||||
|
||||
|
||||
class HTTP10DownloadHandler(object):
|
||||
@ -17,8 +18,8 @@ class HTTP10DownloadHandler(object):
|
||||
return factory.deferred
|
||||
|
||||
def _connect(self, factory):
|
||||
host, port = factory.host, factory.port
|
||||
if factory.scheme == 'https':
|
||||
host, port = to_unicode(factory.host), factory.port
|
||||
if factory.scheme == b'https':
|
||||
return reactor.connectSSL(host, port, factory,
|
||||
self.ClientContextFactory())
|
||||
else:
|
||||
|
@ -6,7 +6,7 @@ from io import BytesIO
|
||||
from time import time
|
||||
from six.moves.urllib.parse import urldefrag
|
||||
|
||||
from zope.interface import implements
|
||||
from zope.interface import implementer
|
||||
from twisted.internet import defer, reactor, protocol
|
||||
from twisted.web.http_headers import Headers as TxHeaders
|
||||
from twisted.web.iweb import IBodyProducer, UNKNOWN_LENGTH
|
||||
@ -19,6 +19,7 @@ from scrapy.http import Headers
|
||||
from scrapy.responsetypes import responsetypes
|
||||
from scrapy.core.downloader.webclient import _parse
|
||||
from scrapy.utils.misc import load_object
|
||||
from scrapy.utils.python import to_bytes, to_unicode
|
||||
from scrapy import twisted_version
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@ -77,7 +78,7 @@ class TunnelingTCP4ClientEndpoint(TCP4ClientEndpoint):
|
||||
for it.
|
||||
"""
|
||||
|
||||
_responseMatcher = re.compile('HTTP/1\.. 200')
|
||||
_responseMatcher = re.compile(b'HTTP/1\.. 200')
|
||||
|
||||
def __init__(self, reactor, host, port, proxyConf, contextFactory,
|
||||
timeout=30, bindAddress=None):
|
||||
@ -91,11 +92,15 @@ class TunnelingTCP4ClientEndpoint(TCP4ClientEndpoint):
|
||||
|
||||
def requestTunnel(self, protocol):
|
||||
"""Asks the proxy to open a tunnel."""
|
||||
tunnelReq = 'CONNECT %s:%s HTTP/1.1\r\n' % (self._tunneledHost,
|
||||
self._tunneledPort)
|
||||
tunnelReq = (
|
||||
b'CONNECT ' +
|
||||
to_bytes(self._tunneledHost, encoding='ascii') + b':' +
|
||||
to_bytes(str(self._tunneledPort)) +
|
||||
b' HTTP/1.1\r\n')
|
||||
if self._proxyAuthHeader:
|
||||
tunnelReq += 'Proxy-Authorization: %s\r\n' % self._proxyAuthHeader
|
||||
tunnelReq += '\r\n'
|
||||
tunnelReq += \
|
||||
b'Proxy-Authorization: ' + self._proxyAuthHeader + b'\r\n'
|
||||
tunnelReq += b'\r\n'
|
||||
protocol.transport.write(tunnelReq)
|
||||
self._protocolDataReceived = protocol.dataReceived
|
||||
protocol.dataReceived = self.processProxyResponse
|
||||
@ -180,10 +185,11 @@ class ScrapyAgent(object):
|
||||
if proxy:
|
||||
_, _, proxyHost, proxyPort, proxyParams = _parse(proxy)
|
||||
scheme = _parse(request.url)[0]
|
||||
omitConnectTunnel = proxyParams.find('noconnect') >= 0
|
||||
if scheme == 'https' and not omitConnectTunnel:
|
||||
proxyHost = to_unicode(proxyHost)
|
||||
omitConnectTunnel = b'noconnect' in proxyParams
|
||||
if scheme == b'https' and not omitConnectTunnel:
|
||||
proxyConf = (proxyHost, proxyPort,
|
||||
request.headers.get('Proxy-Authorization', None))
|
||||
request.headers.get(b'Proxy-Authorization', None))
|
||||
return self._TunnelingAgent(reactor, proxyConf,
|
||||
contextFactory=self._contextFactory, connectTimeout=timeout,
|
||||
bindAddress=bindaddress, pool=self._pool)
|
||||
@ -201,14 +207,15 @@ class ScrapyAgent(object):
|
||||
|
||||
# request details
|
||||
url = urldefrag(request.url)[0]
|
||||
method = request.method
|
||||
method = to_bytes(request.method)
|
||||
headers = TxHeaders(request.headers)
|
||||
if isinstance(agent, self._TunnelingAgent):
|
||||
headers.removeHeader('Proxy-Authorization')
|
||||
headers.removeHeader(b'Proxy-Authorization')
|
||||
bodyproducer = _RequestBodyProducer(request.body) if request.body else None
|
||||
|
||||
start_time = time()
|
||||
d = agent.request(method, url, headers, bodyproducer)
|
||||
d = agent.request(
|
||||
method, to_bytes(url, encoding='ascii'), headers, bodyproducer)
|
||||
# set download latency
|
||||
d.addCallback(self._cb_latency, request, start_time)
|
||||
# response body is ready to be consumed
|
||||
@ -232,7 +239,7 @@ class ScrapyAgent(object):
|
||||
def _cb_bodyready(self, txresponse, request):
|
||||
# deliverBody hangs for responses without body
|
||||
if txresponse.length == 0:
|
||||
return txresponse, '', None
|
||||
return txresponse, b'', None
|
||||
|
||||
maxsize = request.meta.get('download_maxsize', self._maxsize)
|
||||
warnsize = request.meta.get('download_warnsize', self._warnsize)
|
||||
@ -268,8 +275,8 @@ class ScrapyAgent(object):
|
||||
return respcls(url=url, status=status, headers=headers, body=body, flags=flags)
|
||||
|
||||
|
||||
@implementer(IBodyProducer)
|
||||
class _RequestBodyProducer(object):
|
||||
implements(IBodyProducer)
|
||||
|
||||
def __init__(self, body):
|
||||
self.body = body
|
||||
|
@ -134,12 +134,12 @@ class Echo(LeafResource):
|
||||
class Partial(LeafResource):
|
||||
|
||||
def render_GET(self, request):
|
||||
request.setHeader("Content-Length", "1024")
|
||||
request.setHeader(b"Content-Length", b"1024")
|
||||
self.deferRequest(request, 0, self._delayedRender, request)
|
||||
return NOT_DONE_YET
|
||||
|
||||
def _delayedRender(self, request):
|
||||
request.write("partial content\n")
|
||||
request.write(b"partial content\n")
|
||||
request.finish()
|
||||
|
||||
|
||||
@ -147,7 +147,7 @@ class Drop(Partial):
|
||||
|
||||
def _delayedRender(self, request):
|
||||
abort = getarg(request, "abort", 0, type=int)
|
||||
request.write("this connection will be dropped\n")
|
||||
request.write(b"this connection will be dropped\n")
|
||||
tr = request.channel.transport
|
||||
try:
|
||||
if abort and hasattr(tr, 'abortConnection'):
|
||||
@ -162,26 +162,26 @@ class Root(Resource):
|
||||
|
||||
def __init__(self):
|
||||
Resource.__init__(self)
|
||||
self.putChild("status", Status())
|
||||
self.putChild("follow", Follow())
|
||||
self.putChild("delay", Delay())
|
||||
self.putChild("partial", Partial())
|
||||
self.putChild("drop", Drop())
|
||||
self.putChild("raw", Raw())
|
||||
self.putChild("echo", Echo())
|
||||
self.putChild(b"status", Status())
|
||||
self.putChild(b"follow", Follow())
|
||||
self.putChild(b"delay", Delay())
|
||||
self.putChild(b"partial", Partial())
|
||||
self.putChild(b"drop", Drop())
|
||||
self.putChild(b"raw", Raw())
|
||||
self.putChild(b"echo", Echo())
|
||||
|
||||
if six.PY2 and twisted_version > (12, 3, 0):
|
||||
if twisted_version > (12, 3, 0):
|
||||
from twisted.web.test.test_webclient import PayloadResource
|
||||
from twisted.web.server import GzipEncoderFactory
|
||||
from twisted.web.resource import EncodingResourceWrapper
|
||||
self.putChild('payload', PayloadResource())
|
||||
self.putChild("xpayload", EncodingResourceWrapper(PayloadResource(), [GzipEncoderFactory()]))
|
||||
self.putChild(b"payload", PayloadResource())
|
||||
self.putChild(b"xpayload", EncodingResourceWrapper(PayloadResource(), [GzipEncoderFactory()]))
|
||||
|
||||
def getChild(self, name, request):
|
||||
return self
|
||||
|
||||
def render(self, request):
|
||||
return 'Scrapy mock HTTP server\n'
|
||||
return b'Scrapy mock HTTP server\n'
|
||||
|
||||
|
||||
class MockServer():
|
||||
@ -199,14 +199,18 @@ class MockServer():
|
||||
time.sleep(0.2)
|
||||
|
||||
|
||||
def ssl_context_factory():
|
||||
return ssl.DefaultOpenSSLContextFactory(
|
||||
os.path.join(os.path.dirname(__file__), 'keys/cert.pem'),
|
||||
os.path.join(os.path.dirname(__file__), 'keys/cert.pem'),
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
root = Root()
|
||||
factory = Site(root)
|
||||
httpPort = reactor.listenTCP(8998, factory)
|
||||
contextFactory = ssl.DefaultOpenSSLContextFactory(
|
||||
os.path.join(os.path.dirname(__file__), 'keys/cert.pem'),
|
||||
os.path.join(os.path.dirname(__file__), 'keys/cert.pem'),
|
||||
)
|
||||
contextFactory = ssl_context_factory()
|
||||
httpsPort = reactor.listenSSL(8999, factory, contextFactory)
|
||||
|
||||
def print_listening():
|
||||
|
@ -4,7 +4,6 @@ tests/test_command_shell.py
|
||||
tests/test_exporters.py
|
||||
tests/test_linkextractors_deprecated.py
|
||||
tests/test_crawl.py
|
||||
tests/test_downloader_handlers.py
|
||||
tests/test_downloadermiddleware_httpcache.py
|
||||
tests/test_downloadermiddleware_httpcompression.py
|
||||
tests/test_downloadermiddleware_httpproxy.py
|
||||
@ -25,8 +24,6 @@ scrapy/xlib/tx/client.py
|
||||
scrapy/xlib/tx/_newclient.py
|
||||
scrapy/xlib/tx/__init__.py
|
||||
scrapy/core/downloader/handlers/s3.py
|
||||
scrapy/core/downloader/handlers/http11.py
|
||||
scrapy/core/downloader/handlers/http.py
|
||||
scrapy/core/downloader/handlers/ftp.py
|
||||
scrapy/pipelines/images.py
|
||||
scrapy/pipelines/files.py
|
||||
|
@ -1,5 +1,4 @@
|
||||
import os
|
||||
import twisted
|
||||
import six
|
||||
|
||||
from twisted.trial import unittest
|
||||
@ -10,9 +9,7 @@ from twisted.web import server, static, util, resource
|
||||
from twisted.web.test.test_webclient import ForeverTakingResource, \
|
||||
NoLengthResource, HostHeaderResource, \
|
||||
PayloadResource, BrokenDownloadResource
|
||||
from twisted.protocols.ftp import FTPRealm, FTPFactory
|
||||
from twisted.cred import portal, checkers, credentials
|
||||
from twisted.protocols.ftp import FTPClient, ConnectionLost
|
||||
from w3lib.url import path_to_file_uri
|
||||
|
||||
from scrapy import twisted_version
|
||||
@ -22,15 +19,15 @@ from scrapy.core.downloader.handlers.http import HTTPDownloadHandler, HttpDownlo
|
||||
from scrapy.core.downloader.handlers.http10 import HTTP10DownloadHandler
|
||||
from scrapy.core.downloader.handlers.http11 import HTTP11DownloadHandler
|
||||
from scrapy.core.downloader.handlers.s3 import S3DownloadHandler
|
||||
from scrapy.core.downloader.handlers.ftp import FTPDownloadHandler
|
||||
|
||||
from scrapy.spiders import Spider
|
||||
from scrapy.http import Request
|
||||
from scrapy.settings import Settings
|
||||
from scrapy.utils.test import get_crawler
|
||||
from scrapy.utils.python import to_bytes
|
||||
from scrapy.exceptions import NotConfigured
|
||||
|
||||
from tests.mockserver import MockServer
|
||||
from tests.mockserver import MockServer, ssl_context_factory
|
||||
from tests.spiders import SingleRequestSpider
|
||||
|
||||
class DummyDH(object):
|
||||
@ -91,7 +88,7 @@ class FileTestCase(unittest.TestCase):
|
||||
def _test(response):
|
||||
self.assertEquals(response.url, request.url)
|
||||
self.assertEquals(response.status, 200)
|
||||
self.assertEquals(response.body, '0123456789')
|
||||
self.assertEquals(response.body, b'0123456789')
|
||||
|
||||
request = Request(path_to_file_uri(self.tmpname + '^'))
|
||||
assert request.url.upper().endswith('%5E')
|
||||
@ -105,23 +102,29 @@ class FileTestCase(unittest.TestCase):
|
||||
|
||||
class HttpTestCase(unittest.TestCase):
|
||||
|
||||
scheme = 'http'
|
||||
download_handler_cls = HTTPDownloadHandler
|
||||
|
||||
def setUp(self):
|
||||
name = self.mktemp()
|
||||
os.mkdir(name)
|
||||
FilePath(name).child("file").setContent("0123456789")
|
||||
FilePath(name).child("file").setContent(b"0123456789")
|
||||
r = static.File(name)
|
||||
r.putChild("redirect", util.Redirect("/file"))
|
||||
r.putChild("wait", ForeverTakingResource())
|
||||
r.putChild("hang-after-headers", ForeverTakingResource(write=True))
|
||||
r.putChild("nolength", NoLengthResource())
|
||||
r.putChild("host", HostHeaderResource())
|
||||
r.putChild("payload", PayloadResource())
|
||||
r.putChild("broken", BrokenDownloadResource())
|
||||
r.putChild(b"redirect", util.Redirect(b"/file"))
|
||||
r.putChild(b"wait", ForeverTakingResource())
|
||||
r.putChild(b"hang-after-headers", ForeverTakingResource(write=True))
|
||||
r.putChild(b"nolength", NoLengthResource())
|
||||
r.putChild(b"host", HostHeaderResource())
|
||||
r.putChild(b"payload", PayloadResource())
|
||||
r.putChild(b"broken", BrokenDownloadResource())
|
||||
self.site = server.Site(r, timeout=None)
|
||||
self.wrapper = WrappingFactory(self.site)
|
||||
self.port = reactor.listenTCP(0, self.wrapper, interface='127.0.0.1')
|
||||
self.host = 'localhost'
|
||||
if self.scheme == 'https':
|
||||
self.port = reactor.listenSSL(
|
||||
0, self.wrapper, ssl_context_factory(), interface=self.host)
|
||||
else:
|
||||
self.port = reactor.listenTCP(0, self.wrapper, interface=self.host)
|
||||
self.portno = self.port.getHost().port
|
||||
self.download_handler = self.download_handler_cls(Settings())
|
||||
self.download_request = self.download_handler.download_request
|
||||
@ -133,20 +136,20 @@ class HttpTestCase(unittest.TestCase):
|
||||
yield self.download_handler.close()
|
||||
|
||||
def getURL(self, path):
|
||||
return "http://127.0.0.1:%d/%s" % (self.portno, path)
|
||||
return "%s://%s:%d/%s" % (self.scheme, self.host, self.portno, path)
|
||||
|
||||
def test_download(self):
|
||||
request = Request(self.getURL('file'))
|
||||
d = self.download_request(request, Spider('foo'))
|
||||
d.addCallback(lambda r: r.body)
|
||||
d.addCallback(self.assertEquals, "0123456789")
|
||||
d.addCallback(self.assertEquals, b"0123456789")
|
||||
return d
|
||||
|
||||
def test_download_head(self):
|
||||
request = Request(self.getURL('file'), method='HEAD')
|
||||
d = self.download_request(request, Spider('foo'))
|
||||
d.addCallback(lambda r: r.body)
|
||||
d.addCallback(self.assertEquals, '')
|
||||
d.addCallback(self.assertEquals, b'')
|
||||
return d
|
||||
|
||||
def test_redirect_status(self):
|
||||
@ -165,6 +168,9 @@ class HttpTestCase(unittest.TestCase):
|
||||
|
||||
@defer.inlineCallbacks
|
||||
def test_timeout_download_from_spider(self):
|
||||
if self.scheme == 'https':
|
||||
raise unittest.SkipTest(
|
||||
'test_timeout_download_from_spider skipped under https')
|
||||
spider = Spider('foo')
|
||||
meta = {'download_timeout': 0.2}
|
||||
# client connects but no data is received
|
||||
@ -178,7 +184,8 @@ class HttpTestCase(unittest.TestCase):
|
||||
|
||||
def test_host_header_not_in_request_headers(self):
|
||||
def _test(response):
|
||||
self.assertEquals(response.body, '127.0.0.1:%d' % self.portno)
|
||||
self.assertEquals(
|
||||
response.body, to_bytes('%s:%d' % (self.host, self.portno)))
|
||||
self.assertEquals(request.headers, {})
|
||||
|
||||
request = Request(self.getURL('host'))
|
||||
@ -186,19 +193,19 @@ class HttpTestCase(unittest.TestCase):
|
||||
|
||||
def test_host_header_seted_in_request_headers(self):
|
||||
def _test(response):
|
||||
self.assertEquals(response.body, 'example.com')
|
||||
self.assertEquals(request.headers.get('Host'), 'example.com')
|
||||
self.assertEquals(response.body, b'example.com')
|
||||
self.assertEquals(request.headers.get('Host'), b'example.com')
|
||||
|
||||
request = Request(self.getURL('host'), headers={'Host': 'example.com'})
|
||||
return self.download_request(request, Spider('foo')).addCallback(_test)
|
||||
|
||||
d = self.download_request(request, Spider('foo'))
|
||||
d.addCallback(lambda r: r.body)
|
||||
d.addCallback(self.assertEquals, 'example.com')
|
||||
d.addCallback(self.assertEquals, b'example.com')
|
||||
return d
|
||||
|
||||
def test_payload(self):
|
||||
body = '1'*100 # PayloadResource requires body length to be 100
|
||||
body = b'1'*100 # PayloadResource requires body length to be 100
|
||||
request = Request(self.getURL('payload'), method='POST', body=body)
|
||||
d = self.download_request(request, Spider('foo'))
|
||||
d.addCallback(lambda r: r.body)
|
||||
@ -216,6 +223,10 @@ class Http10TestCase(HttpTestCase):
|
||||
download_handler_cls = HTTP10DownloadHandler
|
||||
|
||||
|
||||
class Https10TestCase(Http10TestCase):
|
||||
scheme = 'https'
|
||||
|
||||
|
||||
class Http11TestCase(HttpTestCase):
|
||||
"""HTTP 1.1 test case"""
|
||||
download_handler_cls = HTTP11DownloadHandler
|
||||
@ -226,7 +237,7 @@ class Http11TestCase(HttpTestCase):
|
||||
request = Request(self.getURL('file'))
|
||||
d = self.download_request(request, Spider('foo'))
|
||||
d.addCallback(lambda r: r.body)
|
||||
d.addCallback(self.assertEquals, "0123456789")
|
||||
d.addCallback(self.assertEquals, b"0123456789")
|
||||
return d
|
||||
|
||||
@defer.inlineCallbacks
|
||||
@ -237,7 +248,7 @@ class Http11TestCase(HttpTestCase):
|
||||
# response body. (regardless of headers)
|
||||
d = self.download_request(request, Spider('foo', download_maxsize=10))
|
||||
d.addCallback(lambda r: r.body)
|
||||
d.addCallback(self.assertEquals, "0123456789")
|
||||
d.addCallback(self.assertEquals, b"0123456789")
|
||||
yield d
|
||||
|
||||
d = self.download_request(request, Spider('foo', download_maxsize=9))
|
||||
@ -260,10 +271,14 @@ class Http11TestCase(HttpTestCase):
|
||||
request = Request(self.getURL('file'))
|
||||
d = self.download_request(request, Spider('foo', download_maxsize=100))
|
||||
d.addCallback(lambda r: r.body)
|
||||
d.addCallback(self.assertEquals, "0123456789")
|
||||
d.addCallback(self.assertEquals, b"0123456789")
|
||||
return d
|
||||
|
||||
|
||||
class Https11TestCase(Http11TestCase):
|
||||
scheme = 'https'
|
||||
|
||||
|
||||
class Http11MockServerTestCase(unittest.TestCase):
|
||||
"""HTTP 1.1 test case with MockServer"""
|
||||
if twisted_version < (11, 1, 0):
|
||||
@ -297,27 +312,30 @@ class Http11MockServerTestCase(unittest.TestCase):
|
||||
@defer.inlineCallbacks
|
||||
def test_download_gzip_response(self):
|
||||
|
||||
if six.PY2 and twisted_version > (12, 3, 0):
|
||||
if twisted_version > (12, 3, 0):
|
||||
|
||||
crawler = get_crawler(SingleRequestSpider)
|
||||
body = '1'*100 # PayloadResource requires body length to be 100
|
||||
body = b'1'*100 # PayloadResource requires body length to be 100
|
||||
request = Request('http://localhost:8998/payload', method='POST', body=body, meta={'download_maxsize': 50})
|
||||
yield crawler.crawl(seed=request)
|
||||
failure = crawler.spider.meta['failure']
|
||||
# download_maxsize < 100, hence the CancelledError
|
||||
self.assertIsInstance(failure.value, defer.CancelledError)
|
||||
|
||||
request.headers.setdefault('Accept-Encoding', 'gzip,deflate')
|
||||
request = request.replace(url='http://localhost:8998/xpayload')
|
||||
yield crawler.crawl(seed=request)
|
||||
|
||||
# download_maxsize = 50 is enough for the gzipped response
|
||||
failure = crawler.spider.meta.get('failure')
|
||||
self.assertTrue(failure == None)
|
||||
reason = crawler.spider.meta['close_reason']
|
||||
self.assertTrue(reason, 'finished')
|
||||
if six.PY2:
|
||||
request.headers.setdefault(b'Accept-Encoding', b'gzip,deflate')
|
||||
request = request.replace(url='http://localhost:8998/xpayload')
|
||||
yield crawler.crawl(seed=request)
|
||||
# download_maxsize = 50 is enough for the gzipped response
|
||||
failure = crawler.spider.meta.get('failure')
|
||||
self.assertTrue(failure == None)
|
||||
reason = crawler.spider.meta['close_reason']
|
||||
self.assertTrue(reason, 'finished')
|
||||
else:
|
||||
# See issue https://twistedmatrix.com/trac/ticket/8175
|
||||
raise unittest.SkipTest("xpayload only enabled for PY2")
|
||||
else:
|
||||
raise unittest.SkipTest("xpayload and payload endpoint only enabled for twisted > 12.3.0 and python 2.x")
|
||||
raise unittest.SkipTest("xpayload and payload endpoint only enabled for twisted > 12.3.0")
|
||||
|
||||
|
||||
class UriResource(resource.Resource):
|
||||
@ -354,7 +372,7 @@ class HttpProxyTestCase(unittest.TestCase):
|
||||
def _test(response):
|
||||
self.assertEquals(response.status, 200)
|
||||
self.assertEquals(response.url, request.url)
|
||||
self.assertEquals(response.body, 'http://example.com')
|
||||
self.assertEquals(response.body, b'http://example.com')
|
||||
|
||||
http_proxy = self.getURL('')
|
||||
request = Request('http://example.com', meta={'proxy': http_proxy})
|
||||
@ -364,7 +382,7 @@ class HttpProxyTestCase(unittest.TestCase):
|
||||
def _test(response):
|
||||
self.assertEquals(response.status, 200)
|
||||
self.assertEquals(response.url, request.url)
|
||||
self.assertEquals(response.body, 'https://example.com')
|
||||
self.assertEquals(response.body, b'https://example.com')
|
||||
|
||||
http_proxy = '%s?noconnect' % self.getURL('')
|
||||
request = Request('https://example.com', meta={'proxy': http_proxy})
|
||||
@ -374,7 +392,7 @@ class HttpProxyTestCase(unittest.TestCase):
|
||||
def _test(response):
|
||||
self.assertEquals(response.status, 200)
|
||||
self.assertEquals(response.url, request.url)
|
||||
self.assertEquals(response.body, '/path/to/resource')
|
||||
self.assertEquals(response.body, b'/path/to/resource')
|
||||
|
||||
request = Request(self.getURL('path/to/resource'))
|
||||
return self.download_request(request, Spider('foo')).addCallback(_test)
|
||||
@ -394,6 +412,17 @@ class Http11ProxyTestCase(HttpProxyTestCase):
|
||||
if twisted_version < (11, 1, 0):
|
||||
skip = 'HTTP1.1 not supported in twisted < 11.1.0'
|
||||
|
||||
@defer.inlineCallbacks
|
||||
def test_download_with_proxy_https_timeout(self):
|
||||
""" Test TunnelingTCP4ClientEndpoint """
|
||||
http_proxy = self.getURL('')
|
||||
domain = 'https://no-such-domain.nosuch'
|
||||
request = Request(
|
||||
domain, meta={'proxy': http_proxy, 'download_timeout': 0.2})
|
||||
d = self.download_request(request, Spider('foo'))
|
||||
timeout = yield self.assertFailure(d, error.TimeoutError)
|
||||
self.assertIn(domain, timeout.osError)
|
||||
|
||||
|
||||
class HttpDownloadHandlerMock(object):
|
||||
def __init__(self, settings):
|
||||
@ -518,8 +547,13 @@ class FTPTestCase(unittest.TestCase):
|
||||
|
||||
if twisted_version < (10, 2, 0):
|
||||
skip = "Twisted pre 10.2.0 doesn't allow to set home path other than /home"
|
||||
if six.PY3:
|
||||
skip = "Twisted missing ftp support for PY3"
|
||||
|
||||
def setUp(self):
|
||||
from twisted.protocols.ftp import FTPRealm, FTPFactory
|
||||
from scrapy.core.downloader.handlers.ftp import FTPDownloadHandler
|
||||
|
||||
# setup dirs and test file
|
||||
self.directory = self.mktemp()
|
||||
os.mkdir(self.directory)
|
||||
@ -601,6 +635,8 @@ class FTPTestCase(unittest.TestCase):
|
||||
return self._add_test_callbacks(d, _test)
|
||||
|
||||
def test_invalid_credentials(self):
|
||||
from twisted.protocols.ftp import ConnectionLost
|
||||
|
||||
request = Request(url="ftp://127.0.0.1:%s/file.txt" % self.portNum,
|
||||
meta={"ftp_user": self.username, "ftp_password": 'invalid'})
|
||||
d = self.download_handler.download_request(request, None)
|
||||
|
@ -17,6 +17,8 @@ class BaseResponseTest(unittest.TestCase):
|
||||
# Response requires url in the consturctor
|
||||
self.assertRaises(Exception, self.response_class)
|
||||
self.assertTrue(isinstance(self.response_class('http://example.com/'), self.response_class))
|
||||
if not six.PY2:
|
||||
self.assertRaises(TypeError, self.response_class, b"http://example.com")
|
||||
# body can be str or None
|
||||
self.assertTrue(isinstance(self.response_class('http://example.com/', body=b''), self.response_class))
|
||||
self.assertTrue(isinstance(self.response_class('http://example.com/', body=b'body'), self.response_class))
|
||||
|
Loading…
x
Reference in New Issue
Block a user