mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-24 22:04:16 +00:00
made encoding conversion more explicit, added test for header with utf-8 encoding replicating what browsers do
This commit is contained in:
parent
179c4588ca
commit
81950f773d
@ -3,6 +3,7 @@ from six.moves.urllib.parse import urljoin
|
||||
|
||||
from scrapy.http import HtmlResponse
|
||||
from scrapy.utils.response import get_meta_refresh
|
||||
from scrapy.utils.python import to_native_str
|
||||
from scrapy.exceptions import IgnoreRequest, NotConfigured
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@ -62,7 +63,8 @@ class RedirectMiddleware(BaseRedirectMiddleware):
|
||||
|
||||
location = None
|
||||
if 'Location' in response.headers:
|
||||
location = response.headers['location'].decode('latin1')
|
||||
# HTTP header is ascii or latin1, redirected url will be percent-encoded utf-8
|
||||
location = to_native_str(response.headers['location'].decode('latin1'))
|
||||
|
||||
if location is not None and response.status in [301, 302, 303, 307]:
|
||||
redirected_url = urljoin(request.url, location)
|
||||
|
@ -154,12 +154,20 @@ class RedirectMiddlewareTest(unittest.TestCase):
|
||||
|
||||
def test_latin1_location(self):
|
||||
req = Request('http://scrapytest.org/first')
|
||||
latin1_path = u'/ação'.encode('latin1')
|
||||
resp = Response('http://scrapytest.org/first', headers={'Location': latin1_path}, status=302)
|
||||
latin1_location = u'/ação'.encode('latin1') # HTTP historically supports latin1
|
||||
resp = Response('http://scrapytest.org/first', headers={'Location': latin1_location}, status=302)
|
||||
req_result = self.mw.process_response(req, resp, self.spider)
|
||||
perc_encoded_utf8_url = 'http://scrapytest.org/a%C3%A7%C3%A3o'
|
||||
self.assertEquals(perc_encoded_utf8_url, req_result.url)
|
||||
|
||||
def test_location_with_wrong_encoding(self):
|
||||
req = Request('http://scrapytest.org/first')
|
||||
utf8_location = u'/ação' # header with wrong encoding (utf-8)
|
||||
resp = Response('http://scrapytest.org/first', headers={'Location': utf8_location}, status=302)
|
||||
req_result = self.mw.process_response(req, resp, self.spider)
|
||||
perc_encoded_utf8_url = 'http://scrapytest.org/a%C3%83%C2%A7%C3%83%C2%A3o'
|
||||
self.assertEquals(perc_encoded_utf8_url, req_result.url)
|
||||
|
||||
|
||||
class MetaRefreshMiddlewareTest(unittest.TestCase):
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user