1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-23 07:04:01 +00:00
scrapy/tests/test_proxy_connect.py
Vostretsov Nikita b364d27247 [MRG+1] Automatic port selection for servicies in unit tests (#3210)
* ability to pass port as a parameter

* try to find free ports

* use environment variables to pass mock server address

* get mock server address from environment variables

* ability to select ports for proxy in runtime

* use common method for URLs from mock server

* https support

* get mock server address

* get mock address

* replace hand-written mechanism by kernel-based one

* use ephemeral ports in mockserver

* strip EOL from addresses

* use ephemeral port in proxy

* no need to restore environment as it is restored in tearDown

* decode bytes

* use mockserver address as a variable

* ability to pass address as variable

* per test-case mockserver

* use base class

* remove obsolete environment manipulation

* return usage of proxy for http cases

* common method for broking proxy auth credentials

* python version-independent url methods
2018-05-23 21:25:50 +03:00

121 lines
4.4 KiB
Python

import json
import os
import time
from six.moves.urllib.parse import urlsplit, urlunsplit
from threading import Thread
from libmproxy import controller, proxy
from netlib import http_auth
from testfixtures import LogCapture
from twisted.internet import defer
from twisted.trial.unittest import TestCase
from scrapy.utils.test import get_crawler
from scrapy.http import Request
from tests.spiders import SimpleSpider, SingleRequestSpider
from tests.mockserver import MockServer
class HTTPSProxy(controller.Master, Thread):
def __init__(self):
password_manager = http_auth.PassManSingleUser('scrapy', 'scrapy')
authenticator = http_auth.BasicProxyAuth(password_manager, "mitmproxy")
cert_path = os.path.join(os.path.abspath(os.path.dirname(__file__)),
'keys', 'mitmproxy-ca.pem')
server = proxy.ProxyServer(proxy.ProxyConfig(
authenticator = authenticator,
cacert = cert_path),
0)
self.server = server
Thread.__init__(self)
controller.Master.__init__(self, server)
def http_address(self):
return 'http://scrapy:scrapy@%s:%d' % self.server.socket.getsockname()
def _wrong_credentials(proxy_url):
bad_auth_proxy = list(urlsplit(proxy_url))
bad_auth_proxy[1] = bad_auth_proxy[1].replace('scrapy:scrapy@', 'wrong:wronger@')
return urlunsplit(bad_auth_proxy)
class ProxyConnectTestCase(TestCase):
def setUp(self):
self.mockserver = MockServer()
self.mockserver.__enter__()
self._oldenv = os.environ.copy()
self._proxy = HTTPSProxy()
self._proxy.start()
# Wait for the proxy to start.
time.sleep(1.0)
os.environ['https_proxy'] = self._proxy.http_address()
os.environ['http_proxy'] = self._proxy.http_address()
def tearDown(self):
self.mockserver.__exit__(None, None, None)
self._proxy.shutdown()
os.environ = self._oldenv
@defer.inlineCallbacks
def test_https_connect_tunnel(self):
crawler = get_crawler(SimpleSpider)
with LogCapture() as l:
yield crawler.crawl(self.mockserver.url("/status?n=200", is_secure=True))
self._assert_got_response_code(200, l)
@defer.inlineCallbacks
def test_https_noconnect(self):
proxy = os.environ['https_proxy']
os.environ['https_proxy'] = proxy + '?noconnect'
crawler = get_crawler(SimpleSpider)
with LogCapture() as l:
yield crawler.crawl(self.mockserver.url("/status?n=200", is_secure=True))
self._assert_got_response_code(200, l)
@defer.inlineCallbacks
def test_https_connect_tunnel_error(self):
crawler = get_crawler(SimpleSpider)
with LogCapture() as l:
yield crawler.crawl("https://localhost:99999/status?n=200")
self._assert_got_tunnel_error(l)
@defer.inlineCallbacks
def test_https_tunnel_auth_error(self):
os.environ['https_proxy'] = _wrong_credentials(os.environ['https_proxy'])
crawler = get_crawler(SimpleSpider)
with LogCapture() as l:
yield crawler.crawl(self.mockserver.url("/status?n=200", is_secure=True))
# The proxy returns a 407 error code but it does not reach the client;
# he just sees a TunnelError.
self._assert_got_tunnel_error(l)
@defer.inlineCallbacks
def test_https_tunnel_without_leak_proxy_authorization_header(self):
request = Request(self.mockserver.url("/echo", is_secure=True))
crawler = get_crawler(SingleRequestSpider)
with LogCapture() as l:
yield crawler.crawl(seed=request)
self._assert_got_response_code(200, l)
echo = json.loads(crawler.spider.meta['responses'][0].body)
self.assertTrue('Proxy-Authorization' not in echo['headers'])
@defer.inlineCallbacks
def test_https_noconnect_auth_error(self):
os.environ['https_proxy'] = _wrong_credentials(os.environ['https_proxy']) + '?noconnect'
crawler = get_crawler(SimpleSpider)
with LogCapture() as l:
yield crawler.crawl(self.mockserver.url("/status?n=200", is_secure=True))
self._assert_got_response_code(407, l)
def _assert_got_response_code(self, code, log):
print(log)
self.assertEqual(str(log).count('Crawled (%d)' % code), 1)
def _assert_got_tunnel_error(self, log):
print(log)
self.assertIn('TunnelError', str(log))