1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-24 22:04:16 +00:00

made encoding conversion more explicit, added test for header with utf-8 encoding replicating what browsers do

This commit is contained in:
Elias Dorneles 2015-09-11 18:51:48 -03:00
parent 179c4588ca
commit 81950f773d
2 changed files with 13 additions and 3 deletions

View File

@ -3,6 +3,7 @@ from six.moves.urllib.parse import urljoin
from scrapy.http import HtmlResponse
from scrapy.utils.response import get_meta_refresh
from scrapy.utils.python import to_native_str
from scrapy.exceptions import IgnoreRequest, NotConfigured
logger = logging.getLogger(__name__)
@ -62,7 +63,8 @@ class RedirectMiddleware(BaseRedirectMiddleware):
location = None
if 'Location' in response.headers:
location = response.headers['location'].decode('latin1')
# HTTP header is ascii or latin1, redirected url will be percent-encoded utf-8
location = to_native_str(response.headers['location'].decode('latin1'))
if location is not None and response.status in [301, 302, 303, 307]:
redirected_url = urljoin(request.url, location)

View File

@ -154,12 +154,20 @@ class RedirectMiddlewareTest(unittest.TestCase):
def test_latin1_location(self):
req = Request('http://scrapytest.org/first')
latin1_path = u'/ação'.encode('latin1')
resp = Response('http://scrapytest.org/first', headers={'Location': latin1_path}, status=302)
latin1_location = u'/ação'.encode('latin1') # HTTP historically supports latin1
resp = Response('http://scrapytest.org/first', headers={'Location': latin1_location}, status=302)
req_result = self.mw.process_response(req, resp, self.spider)
perc_encoded_utf8_url = 'http://scrapytest.org/a%C3%A7%C3%A3o'
self.assertEquals(perc_encoded_utf8_url, req_result.url)
def test_location_with_wrong_encoding(self):
req = Request('http://scrapytest.org/first')
utf8_location = u'/ação' # header with wrong encoding (utf-8)
resp = Response('http://scrapytest.org/first', headers={'Location': utf8_location}, status=302)
req_result = self.mw.process_response(req, resp, self.spider)
perc_encoded_utf8_url = 'http://scrapytest.org/a%C3%83%C2%A7%C3%83%C2%A3o'
self.assertEquals(perc_encoded_utf8_url, req_result.url)
class MetaRefreshMiddlewareTest(unittest.TestCase):