mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-25 20:44:18 +00:00
Merge pull request #1923 from scrapy/request-new-safe_url_string
[MRG+1] Use newer w3lib.url.safe_url_string() and re-enable HTTP request tests
This commit is contained in:
commit
73a5571c60
@ -2,7 +2,7 @@ Twisted>=10.0.0
|
|||||||
lxml
|
lxml
|
||||||
pyOpenSSL
|
pyOpenSSL
|
||||||
cssselect>=0.9
|
cssselect>=0.9
|
||||||
w3lib>=1.13.0
|
w3lib>=1.14.2
|
||||||
queuelib
|
queuelib
|
||||||
six>=1.5.2
|
six>=1.5.2
|
||||||
PyDispatcher>=2.0.5
|
PyDispatcher>=2.0.5
|
||||||
|
@ -8,7 +8,7 @@ import six
|
|||||||
from w3lib.url import safe_url_string
|
from w3lib.url import safe_url_string
|
||||||
|
|
||||||
from scrapy.http.headers import Headers
|
from scrapy.http.headers import Headers
|
||||||
from scrapy.utils.python import to_native_str, to_bytes
|
from scrapy.utils.python import to_bytes
|
||||||
from scrapy.utils.trackref import object_ref
|
from scrapy.utils.trackref import object_ref
|
||||||
from scrapy.utils.url import escape_ajax
|
from scrapy.utils.url import escape_ajax
|
||||||
from scrapy.http.common import obsolete_setter
|
from scrapy.http.common import obsolete_setter
|
||||||
@ -50,8 +50,8 @@ class Request(object_ref):
|
|||||||
if not isinstance(url, six.string_types):
|
if not isinstance(url, six.string_types):
|
||||||
raise TypeError('Request url must be str or unicode, got %s:' % type(url).__name__)
|
raise TypeError('Request url must be str or unicode, got %s:' % type(url).__name__)
|
||||||
|
|
||||||
url = to_native_str(url, self.encoding)
|
s = safe_url_string(url, self.encoding)
|
||||||
self._url = escape_ajax(safe_url_string(url))
|
self._url = escape_ajax(s)
|
||||||
|
|
||||||
if ':' not in self._url:
|
if ':' not in self._url:
|
||||||
raise ValueError('Missing scheme in request url: %s' % self._url)
|
raise ValueError('Missing scheme in request url: %s' % self._url)
|
||||||
|
@ -1,10 +1,13 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
import cgi
|
import cgi
|
||||||
import unittest
|
import unittest
|
||||||
import re
|
import re
|
||||||
|
|
||||||
import six
|
import six
|
||||||
from six.moves import xmlrpc_client as xmlrpclib
|
from six.moves import xmlrpc_client as xmlrpclib
|
||||||
from six.moves.urllib.parse import urlparse
|
from six.moves.urllib.parse import urlparse, parse_qs, unquote
|
||||||
|
if six.PY3:
|
||||||
|
from urllib.parse import unquote_to_bytes
|
||||||
|
|
||||||
from scrapy.http import Request, FormRequest, XmlRpcRequest, Headers, HtmlResponse
|
from scrapy.http import Request, FormRequest, XmlRpcRequest, Headers, HtmlResponse
|
||||||
from scrapy.utils.python import to_bytes, to_native_str
|
from scrapy.utils.python import to_bytes, to_native_str
|
||||||
@ -86,12 +89,60 @@ class RequestTest(unittest.TestCase):
|
|||||||
r = self.request_class(url="http://www.scrapy.org/blank space")
|
r = self.request_class(url="http://www.scrapy.org/blank space")
|
||||||
self.assertEqual(r.url, "http://www.scrapy.org/blank%20space")
|
self.assertEqual(r.url, "http://www.scrapy.org/blank%20space")
|
||||||
|
|
||||||
@unittest.skipUnless(six.PY2, "TODO")
|
|
||||||
def test_url_encoding(self):
|
def test_url_encoding(self):
|
||||||
r1 = self.request_class(url=u"http://www.scrapy.org/price/\xa3", encoding="utf-8")
|
r = self.request_class(url=u"http://www.scrapy.org/price/£")
|
||||||
r2 = self.request_class(url=u"http://www.scrapy.org/price/\xa3", encoding="latin1")
|
self.assertEqual(r.url, "http://www.scrapy.org/price/%C2%A3")
|
||||||
self.assertEqual(r1.url, "http://www.scrapy.org/price/%C2%A3")
|
|
||||||
self.assertEqual(r2.url, "http://www.scrapy.org/price/%A3")
|
def test_url_encoding_other(self):
|
||||||
|
# encoding affects only query part of URI, not path
|
||||||
|
# path part should always be UTF-8 encoded before percent-escaping
|
||||||
|
r = self.request_class(url=u"http://www.scrapy.org/price/£", encoding="utf-8")
|
||||||
|
self.assertEqual(r.url, "http://www.scrapy.org/price/%C2%A3")
|
||||||
|
|
||||||
|
r = self.request_class(url=u"http://www.scrapy.org/price/£", encoding="latin1")
|
||||||
|
self.assertEqual(r.url, "http://www.scrapy.org/price/%C2%A3")
|
||||||
|
|
||||||
|
def test_url_encoding_query(self):
|
||||||
|
r1 = self.request_class(url=u"http://www.scrapy.org/price/£?unit=µ")
|
||||||
|
self.assertEqual(r1.url, "http://www.scrapy.org/price/%C2%A3?unit=%C2%B5")
|
||||||
|
|
||||||
|
# should be same as above
|
||||||
|
r2 = self.request_class(url=u"http://www.scrapy.org/price/£?unit=µ", encoding="utf-8")
|
||||||
|
self.assertEqual(r2.url, "http://www.scrapy.org/price/%C2%A3?unit=%C2%B5")
|
||||||
|
|
||||||
|
def test_url_encoding_query_latin1(self):
|
||||||
|
# encoding is used for encoding query-string before percent-escaping;
|
||||||
|
# path is still UTF-8 encoded before percent-escaping
|
||||||
|
r3 = self.request_class(url=u"http://www.scrapy.org/price/µ?currency=£", encoding="latin1")
|
||||||
|
self.assertEqual(r3.url, "http://www.scrapy.org/price/%C2%B5?currency=%A3")
|
||||||
|
|
||||||
|
def test_url_encoding_nonutf8_untouched(self):
|
||||||
|
# percent-escaping sequences that do not match valid UTF-8 sequences
|
||||||
|
# should be kept untouched (just upper-cased perhaps)
|
||||||
|
#
|
||||||
|
# See https://tools.ietf.org/html/rfc3987#section-3.2
|
||||||
|
#
|
||||||
|
# "Conversions from URIs to IRIs MUST NOT use any character encoding
|
||||||
|
# other than UTF-8 in steps 3 and 4, even if it might be possible to
|
||||||
|
# guess from the context that another character encoding than UTF-8 was
|
||||||
|
# used in the URI. For example, the URI
|
||||||
|
# "http://www.example.org/r%E9sum%E9.html" might with some guessing be
|
||||||
|
# interpreted to contain two e-acute characters encoded as iso-8859-1.
|
||||||
|
# It must not be converted to an IRI containing these e-acute
|
||||||
|
# characters. Otherwise, in the future the IRI will be mapped to
|
||||||
|
# "http://www.example.org/r%C3%A9sum%C3%A9.html", which is a different
|
||||||
|
# URI from "http://www.example.org/r%E9sum%E9.html".
|
||||||
|
r1 = self.request_class(url=u"http://www.scrapy.org/price/%a3")
|
||||||
|
self.assertEqual(r1.url, "http://www.scrapy.org/price/%a3")
|
||||||
|
|
||||||
|
r2 = self.request_class(url=u"http://www.scrapy.org/r%C3%A9sum%C3%A9/%a3")
|
||||||
|
self.assertEqual(r2.url, "http://www.scrapy.org/r%C3%A9sum%C3%A9/%a3")
|
||||||
|
|
||||||
|
r3 = self.request_class(url=u"http://www.scrapy.org/résumé/%a3")
|
||||||
|
self.assertEqual(r3.url, "http://www.scrapy.org/r%C3%A9sum%C3%A9/%a3")
|
||||||
|
|
||||||
|
r4 = self.request_class(url=u"http://www.example.org/r%E9sum%E9.html")
|
||||||
|
self.assertEqual(r4.url, "http://www.example.org/r%E9sum%E9.html")
|
||||||
|
|
||||||
def test_body(self):
|
def test_body(self):
|
||||||
r1 = self.request_class(url="http://www.example.com/")
|
r1 = self.request_class(url="http://www.example.com/")
|
||||||
@ -198,19 +249,46 @@ class FormRequestTest(RequestTest):
|
|||||||
r1 = self.request_class("http://www.example.com", formdata={})
|
r1 = self.request_class("http://www.example.com", formdata={})
|
||||||
self.assertEqual(r1.body, b'')
|
self.assertEqual(r1.body, b'')
|
||||||
|
|
||||||
@unittest.skipUnless(six.PY2, "TODO")
|
def test_default_encoding_bytes(self):
|
||||||
def test_default_encoding(self):
|
|
||||||
# using default encoding (utf-8)
|
# using default encoding (utf-8)
|
||||||
data = {'one': 'two', 'price': '\xc2\xa3 100'}
|
data = {b'one': b'two', b'price': b'\xc2\xa3 100'}
|
||||||
r2 = self.request_class("http://www.example.com", formdata=data)
|
r2 = self.request_class("http://www.example.com", formdata=data)
|
||||||
self.assertEqual(r2.method, 'POST')
|
self.assertEqual(r2.method, 'POST')
|
||||||
self.assertEqual(r2.encoding, 'utf-8')
|
self.assertEqual(r2.encoding, 'utf-8')
|
||||||
self.assertQueryEqual(r2.body, b'price=%C2%A3+100&one=two')
|
self.assertQueryEqual(r2.body, b'price=%C2%A3+100&one=two')
|
||||||
self.assertEqual(r2.headers[b'Content-Type'], b'application/x-www-form-urlencoded')
|
self.assertEqual(r2.headers[b'Content-Type'], b'application/x-www-form-urlencoded')
|
||||||
|
|
||||||
def test_custom_encoding(self):
|
def test_default_encoding_textual_data(self):
|
||||||
data = {'price': u'\xa3 100'}
|
# using default encoding (utf-8)
|
||||||
r3 = self.request_class("http://www.example.com", formdata=data, encoding='latin1')
|
data = {u'µ one': u'two', u'price': u'£ 100'}
|
||||||
|
r2 = self.request_class("http://www.example.com", formdata=data)
|
||||||
|
self.assertEqual(r2.method, 'POST')
|
||||||
|
self.assertEqual(r2.encoding, 'utf-8')
|
||||||
|
self.assertQueryEqual(r2.body, b'price=%C2%A3+100&%C2%B5+one=two')
|
||||||
|
self.assertEqual(r2.headers[b'Content-Type'], b'application/x-www-form-urlencoded')
|
||||||
|
|
||||||
|
def test_default_encoding_mixed_data(self):
|
||||||
|
# using default encoding (utf-8)
|
||||||
|
data = {u'\u00b5one': b'two', b'price\xc2\xa3': u'\u00a3 100'}
|
||||||
|
r2 = self.request_class("http://www.example.com", formdata=data)
|
||||||
|
self.assertEqual(r2.method, 'POST')
|
||||||
|
self.assertEqual(r2.encoding, 'utf-8')
|
||||||
|
self.assertQueryEqual(r2.body, b'%C2%B5one=two&price%C2%A3=%C2%A3+100')
|
||||||
|
self.assertEqual(r2.headers[b'Content-Type'], b'application/x-www-form-urlencoded')
|
||||||
|
|
||||||
|
def test_custom_encoding_bytes(self):
|
||||||
|
data = {b'\xb5 one': b'two', b'price': b'\xa3 100'}
|
||||||
|
r2 = self.request_class("http://www.example.com", formdata=data,
|
||||||
|
encoding='latin1')
|
||||||
|
self.assertEqual(r2.method, 'POST')
|
||||||
|
self.assertEqual(r2.encoding, 'latin1')
|
||||||
|
self.assertQueryEqual(r2.body, b'price=%A3+100&%B5+one=two')
|
||||||
|
self.assertEqual(r2.headers[b'Content-Type'], b'application/x-www-form-urlencoded')
|
||||||
|
|
||||||
|
def test_custom_encoding_textual_data(self):
|
||||||
|
data = {'price': u'£ 100'}
|
||||||
|
r3 = self.request_class("http://www.example.com", formdata=data,
|
||||||
|
encoding='latin1')
|
||||||
self.assertEqual(r3.encoding, 'latin1')
|
self.assertEqual(r3.encoding, 'latin1')
|
||||||
self.assertEqual(r3.body, b'price=%A3+100')
|
self.assertEqual(r3.body, b'price=%A3+100')
|
||||||
|
|
||||||
@ -231,15 +309,78 @@ class FormRequestTest(RequestTest):
|
|||||||
url="http://www.example.com/this/list.html")
|
url="http://www.example.com/this/list.html")
|
||||||
req = self.request_class.from_response(response,
|
req = self.request_class.from_response(response,
|
||||||
formdata={'one': ['two', 'three'], 'six': 'seven'})
|
formdata={'one': ['two', 'three'], 'six': 'seven'})
|
||||||
|
|
||||||
self.assertEqual(req.method, 'POST')
|
self.assertEqual(req.method, 'POST')
|
||||||
self.assertEqual(req.headers[b'Content-type'], b'application/x-www-form-urlencoded')
|
self.assertEqual(req.headers[b'Content-type'], b'application/x-www-form-urlencoded')
|
||||||
self.assertEqual(req.url, "http://www.example.com/this/post.php")
|
self.assertEqual(req.url, "http://www.example.com/this/post.php")
|
||||||
fs = _qs(req)
|
fs = _qs(req)
|
||||||
self.assertEqual(set(fs[b"test"]), {b"val1", b"val2"})
|
self.assertEqual(set(fs[b'test']), {b'val1', b'val2'})
|
||||||
self.assertEqual(set(fs[b"one"]), {b"two", b"three"})
|
self.assertEqual(set(fs[b'one']), {b'two', b'three'})
|
||||||
self.assertEqual(fs[b'test2'], [b'xxx'])
|
self.assertEqual(fs[b'test2'], [b'xxx'])
|
||||||
self.assertEqual(fs[b'six'], [b'seven'])
|
self.assertEqual(fs[b'six'], [b'seven'])
|
||||||
|
|
||||||
|
def test_from_response_post_nonascii_bytes_utf8(self):
|
||||||
|
response = _buildresponse(
|
||||||
|
b"""<form action="post.php" method="POST">
|
||||||
|
<input type="hidden" name="test \xc2\xa3" value="val1">
|
||||||
|
<input type="hidden" name="test \xc2\xa3" value="val2">
|
||||||
|
<input type="hidden" name="test2" value="xxx \xc2\xb5">
|
||||||
|
</form>""",
|
||||||
|
url="http://www.example.com/this/list.html")
|
||||||
|
req = self.request_class.from_response(response,
|
||||||
|
formdata={'one': ['two', 'three'], 'six': 'seven'})
|
||||||
|
|
||||||
|
self.assertEqual(req.method, 'POST')
|
||||||
|
self.assertEqual(req.headers[b'Content-type'], b'application/x-www-form-urlencoded')
|
||||||
|
self.assertEqual(req.url, "http://www.example.com/this/post.php")
|
||||||
|
fs = _qs(req, to_unicode=True)
|
||||||
|
self.assertEqual(set(fs[u'test £']), {u'val1', u'val2'})
|
||||||
|
self.assertEqual(set(fs[u'one']), {u'two', u'three'})
|
||||||
|
self.assertEqual(fs[u'test2'], [u'xxx µ'])
|
||||||
|
self.assertEqual(fs[u'six'], [u'seven'])
|
||||||
|
|
||||||
|
def test_from_response_post_nonascii_bytes_latin1(self):
|
||||||
|
response = _buildresponse(
|
||||||
|
b"""<form action="post.php" method="POST">
|
||||||
|
<input type="hidden" name="test \xa3" value="val1">
|
||||||
|
<input type="hidden" name="test \xa3" value="val2">
|
||||||
|
<input type="hidden" name="test2" value="xxx \xb5">
|
||||||
|
</form>""",
|
||||||
|
url="http://www.example.com/this/list.html",
|
||||||
|
encoding='latin1',
|
||||||
|
)
|
||||||
|
req = self.request_class.from_response(response,
|
||||||
|
formdata={'one': ['two', 'three'], 'six': 'seven'})
|
||||||
|
|
||||||
|
self.assertEqual(req.method, 'POST')
|
||||||
|
self.assertEqual(req.headers[b'Content-type'], b'application/x-www-form-urlencoded')
|
||||||
|
self.assertEqual(req.url, "http://www.example.com/this/post.php")
|
||||||
|
fs = _qs(req, to_unicode=True, encoding='latin1')
|
||||||
|
self.assertEqual(set(fs[u'test £']), {u'val1', u'val2'})
|
||||||
|
self.assertEqual(set(fs[u'one']), {u'two', u'three'})
|
||||||
|
self.assertEqual(fs[u'test2'], [u'xxx µ'])
|
||||||
|
self.assertEqual(fs[u'six'], [u'seven'])
|
||||||
|
|
||||||
|
def test_from_response_post_nonascii_unicode(self):
|
||||||
|
response = _buildresponse(
|
||||||
|
u"""<form action="post.php" method="POST">
|
||||||
|
<input type="hidden" name="test £" value="val1">
|
||||||
|
<input type="hidden" name="test £" value="val2">
|
||||||
|
<input type="hidden" name="test2" value="xxx µ">
|
||||||
|
</form>""",
|
||||||
|
url="http://www.example.com/this/list.html")
|
||||||
|
req = self.request_class.from_response(response,
|
||||||
|
formdata={'one': ['two', 'three'], 'six': 'seven'})
|
||||||
|
|
||||||
|
self.assertEqual(req.method, 'POST')
|
||||||
|
self.assertEqual(req.headers[b'Content-type'], b'application/x-www-form-urlencoded')
|
||||||
|
self.assertEqual(req.url, "http://www.example.com/this/post.php")
|
||||||
|
fs = _qs(req, to_unicode=True)
|
||||||
|
self.assertEqual(set(fs[u'test £']), {u'val1', u'val2'})
|
||||||
|
self.assertEqual(set(fs[u'one']), {u'two', u'three'})
|
||||||
|
self.assertEqual(fs[u'test2'], [u'xxx µ'])
|
||||||
|
self.assertEqual(fs[u'six'], [u'seven'])
|
||||||
|
|
||||||
def test_from_response_extra_headers(self):
|
def test_from_response_extra_headers(self):
|
||||||
response = _buildresponse(
|
response = _buildresponse(
|
||||||
"""<form action="post.php" method="POST">
|
"""<form action="post.php" method="POST">
|
||||||
@ -268,10 +409,10 @@ class FormRequestTest(RequestTest):
|
|||||||
self.assertEqual(urlparse(r1.url).hostname, "www.example.com")
|
self.assertEqual(urlparse(r1.url).hostname, "www.example.com")
|
||||||
self.assertEqual(urlparse(r1.url).path, "/this/get.php")
|
self.assertEqual(urlparse(r1.url).path, "/this/get.php")
|
||||||
fs = _qs(r1)
|
fs = _qs(r1)
|
||||||
self.assertEqual(set(fs['test']), set(['val1', 'val2']))
|
self.assertEqual(set(fs[b'test']), set([b'val1', b'val2']))
|
||||||
self.assertEqual(set(fs['one']), set(['two', 'three']))
|
self.assertEqual(set(fs[b'one']), set([b'two', b'three']))
|
||||||
self.assertEqual(fs['test2'], ['xxx'])
|
self.assertEqual(fs[b'test2'], [b'xxx'])
|
||||||
self.assertEqual(fs['six'], ['seven'])
|
self.assertEqual(fs[b'six'], [b'seven'])
|
||||||
|
|
||||||
def test_from_response_override_params(self):
|
def test_from_response_override_params(self):
|
||||||
response = _buildresponse(
|
response = _buildresponse(
|
||||||
@ -315,9 +456,9 @@ class FormRequestTest(RequestTest):
|
|||||||
</form>""")
|
</form>""")
|
||||||
req = self.request_class.from_response(response)
|
req = self.request_class.from_response(response)
|
||||||
fs = _qs(req)
|
fs = _qs(req)
|
||||||
self.assertEqual(fs['clickable1'], ['clicked1'])
|
self.assertEqual(fs[b'clickable1'], [b'clicked1'])
|
||||||
self.assertFalse('i1' in fs, fs) # xpath in _get_inputs()
|
self.assertFalse(b'i1' in fs, fs) # xpath in _get_inputs()
|
||||||
self.assertFalse('clickable2' in fs, fs) # xpath in _get_clickable()
|
self.assertFalse(b'clickable2' in fs, fs) # xpath in _get_clickable()
|
||||||
|
|
||||||
def test_from_response_submit_first_clickable(self):
|
def test_from_response_submit_first_clickable(self):
|
||||||
response = _buildresponse(
|
response = _buildresponse(
|
||||||
@ -329,10 +470,10 @@ class FormRequestTest(RequestTest):
|
|||||||
</form>""")
|
</form>""")
|
||||||
req = self.request_class.from_response(response, formdata={'two': '2'})
|
req = self.request_class.from_response(response, formdata={'two': '2'})
|
||||||
fs = _qs(req)
|
fs = _qs(req)
|
||||||
self.assertEqual(fs['clickable1'], ['clicked1'])
|
self.assertEqual(fs[b'clickable1'], [b'clicked1'])
|
||||||
self.assertFalse('clickable2' in fs, fs)
|
self.assertFalse(b'clickable2' in fs, fs)
|
||||||
self.assertEqual(fs['one'], ['1'])
|
self.assertEqual(fs[b'one'], [b'1'])
|
||||||
self.assertEqual(fs['two'], ['2'])
|
self.assertEqual(fs[b'two'], [b'2'])
|
||||||
|
|
||||||
def test_from_response_submit_not_first_clickable(self):
|
def test_from_response_submit_not_first_clickable(self):
|
||||||
response = _buildresponse(
|
response = _buildresponse(
|
||||||
@ -345,10 +486,10 @@ class FormRequestTest(RequestTest):
|
|||||||
req = self.request_class.from_response(response, formdata={'two': '2'}, \
|
req = self.request_class.from_response(response, formdata={'two': '2'}, \
|
||||||
clickdata={'name': 'clickable2'})
|
clickdata={'name': 'clickable2'})
|
||||||
fs = _qs(req)
|
fs = _qs(req)
|
||||||
self.assertEqual(fs['clickable2'], ['clicked2'])
|
self.assertEqual(fs[b'clickable2'], [b'clicked2'])
|
||||||
self.assertFalse('clickable1' in fs, fs)
|
self.assertFalse(b'clickable1' in fs, fs)
|
||||||
self.assertEqual(fs['one'], ['1'])
|
self.assertEqual(fs[b'one'], [b'1'])
|
||||||
self.assertEqual(fs['two'], ['2'])
|
self.assertEqual(fs[b'two'], [b'2'])
|
||||||
|
|
||||||
def test_from_response_dont_submit_image_as_input(self):
|
def test_from_response_dont_submit_image_as_input(self):
|
||||||
response = _buildresponse(
|
response = _buildresponse(
|
||||||
@ -359,7 +500,7 @@ class FormRequestTest(RequestTest):
|
|||||||
</form>""")
|
</form>""")
|
||||||
req = self.request_class.from_response(response, dont_click=True)
|
req = self.request_class.from_response(response, dont_click=True)
|
||||||
fs = _qs(req)
|
fs = _qs(req)
|
||||||
self.assertEqual(fs, {'i1': ['i1v']})
|
self.assertEqual(fs, {b'i1': [b'i1v']})
|
||||||
|
|
||||||
def test_from_response_dont_submit_reset_as_input(self):
|
def test_from_response_dont_submit_reset_as_input(self):
|
||||||
response = _buildresponse(
|
response = _buildresponse(
|
||||||
@ -371,7 +512,7 @@ class FormRequestTest(RequestTest):
|
|||||||
</form>""")
|
</form>""")
|
||||||
req = self.request_class.from_response(response, dont_click=True)
|
req = self.request_class.from_response(response, dont_click=True)
|
||||||
fs = _qs(req)
|
fs = _qs(req)
|
||||||
self.assertEqual(fs, {'i1': ['i1v'], 'i2': ['i2v']})
|
self.assertEqual(fs, {b'i1': [b'i1v'], b'i2': [b'i2v']})
|
||||||
|
|
||||||
def test_from_response_multiple_clickdata(self):
|
def test_from_response_multiple_clickdata(self):
|
||||||
response = _buildresponse(
|
response = _buildresponse(
|
||||||
@ -382,11 +523,11 @@ class FormRequestTest(RequestTest):
|
|||||||
<input type="hidden" name="two" value="clicked2">
|
<input type="hidden" name="two" value="clicked2">
|
||||||
</form>""")
|
</form>""")
|
||||||
req = self.request_class.from_response(response, \
|
req = self.request_class.from_response(response, \
|
||||||
clickdata={'name': 'clickable', 'value': 'clicked2'})
|
clickdata={u'name': u'clickable', u'value': u'clicked2'})
|
||||||
fs = _qs(req)
|
fs = _qs(req)
|
||||||
self.assertEqual(fs['clickable'], ['clicked2'])
|
self.assertEqual(fs[b'clickable'], [b'clicked2'])
|
||||||
self.assertEqual(fs['one'], ['clicked1'])
|
self.assertEqual(fs[b'one'], [b'clicked1'])
|
||||||
self.assertEqual(fs['two'], ['clicked2'])
|
self.assertEqual(fs[b'two'], [b'clicked2'])
|
||||||
|
|
||||||
def test_from_response_unicode_clickdata(self):
|
def test_from_response_unicode_clickdata(self):
|
||||||
response = _buildresponse(
|
response = _buildresponse(
|
||||||
@ -397,9 +538,24 @@ class FormRequestTest(RequestTest):
|
|||||||
<input type="hidden" name="eurosign" value="\u20ac">
|
<input type="hidden" name="eurosign" value="\u20ac">
|
||||||
</form>""")
|
</form>""")
|
||||||
req = self.request_class.from_response(response, \
|
req = self.request_class.from_response(response, \
|
||||||
clickdata={'name': u'price in \u00a3'})
|
clickdata={u'name': u'price in \u00a3'})
|
||||||
fs = _qs(req)
|
fs = _qs(req, to_unicode=True)
|
||||||
self.assertTrue(fs[to_native_str(u'price in \u00a3')])
|
self.assertTrue(fs[u'price in \u00a3'])
|
||||||
|
|
||||||
|
def test_from_response_unicode_clickdata_latin1(self):
|
||||||
|
response = _buildresponse(
|
||||||
|
u"""<form action="get.php" method="GET">
|
||||||
|
<input type="submit" name="price in \u00a3" value="\u00a3 1000">
|
||||||
|
<input type="submit" name="price in \u00a5" value="\u00a5 2000">
|
||||||
|
<input type="hidden" name="poundsign" value="\u00a3">
|
||||||
|
<input type="hidden" name="yensign" value="\u00a5">
|
||||||
|
</form>""",
|
||||||
|
encoding='latin1')
|
||||||
|
req = self.request_class.from_response(response, \
|
||||||
|
clickdata={u'name': u'price in \u00a5'})
|
||||||
|
fs = _qs(req, to_unicode=True, encoding='latin1')
|
||||||
|
self.assertTrue(fs[u'price in \u00a5'])
|
||||||
|
|
||||||
|
|
||||||
def test_from_response_multiple_forms_clickdata(self):
|
def test_from_response_multiple_forms_clickdata(self):
|
||||||
response = _buildresponse(
|
response = _buildresponse(
|
||||||
@ -413,18 +569,18 @@ class FormRequestTest(RequestTest):
|
|||||||
</form>
|
</form>
|
||||||
""")
|
""")
|
||||||
req = self.request_class.from_response(response, formname='form2', \
|
req = self.request_class.from_response(response, formname='form2', \
|
||||||
clickdata={'name': 'clickable'})
|
clickdata={u'name': u'clickable'})
|
||||||
fs = _qs(req)
|
fs = _qs(req)
|
||||||
self.assertEqual(fs['clickable'], ['clicked2'])
|
self.assertEqual(fs[b'clickable'], [b'clicked2'])
|
||||||
self.assertEqual(fs['field2'], ['value2'])
|
self.assertEqual(fs[b'field2'], [b'value2'])
|
||||||
self.assertFalse('field1' in fs, fs)
|
self.assertFalse(b'field1' in fs, fs)
|
||||||
|
|
||||||
def test_from_response_override_clickable(self):
|
def test_from_response_override_clickable(self):
|
||||||
response = _buildresponse('''<form><input type="submit" name="clickme" value="one"> </form>''')
|
response = _buildresponse('''<form><input type="submit" name="clickme" value="one"> </form>''')
|
||||||
req = self.request_class.from_response(response, \
|
req = self.request_class.from_response(response, \
|
||||||
formdata={'clickme': 'two'}, clickdata={'name': 'clickme'})
|
formdata={'clickme': 'two'}, clickdata={'name': 'clickme'})
|
||||||
fs = _qs(req)
|
fs = _qs(req)
|
||||||
self.assertEqual(fs['clickme'], ['two'])
|
self.assertEqual(fs[b'clickme'], [b'two'])
|
||||||
|
|
||||||
def test_from_response_dont_click(self):
|
def test_from_response_dont_click(self):
|
||||||
response = _buildresponse(
|
response = _buildresponse(
|
||||||
@ -436,8 +592,8 @@ class FormRequestTest(RequestTest):
|
|||||||
</form>""")
|
</form>""")
|
||||||
r1 = self.request_class.from_response(response, dont_click=True)
|
r1 = self.request_class.from_response(response, dont_click=True)
|
||||||
fs = _qs(r1)
|
fs = _qs(r1)
|
||||||
self.assertFalse('clickable1' in fs, fs)
|
self.assertFalse(b'clickable1' in fs, fs)
|
||||||
self.assertFalse('clickable2' in fs, fs)
|
self.assertFalse(b'clickable2' in fs, fs)
|
||||||
|
|
||||||
def test_from_response_ambiguous_clickdata(self):
|
def test_from_response_ambiguous_clickdata(self):
|
||||||
response = _buildresponse(
|
response = _buildresponse(
|
||||||
@ -468,8 +624,8 @@ class FormRequestTest(RequestTest):
|
|||||||
""")
|
""")
|
||||||
req = self.request_class.from_response(response, clickdata={'nr': 1})
|
req = self.request_class.from_response(response, clickdata={'nr': 1})
|
||||||
fs = _qs(req)
|
fs = _qs(req)
|
||||||
self.assertIn('clickable2', fs)
|
self.assertIn(b'clickable2', fs)
|
||||||
self.assertNotIn('clickable1', fs)
|
self.assertNotIn(b'clickable1', fs)
|
||||||
|
|
||||||
def test_from_response_invalid_nr_index_clickdata(self):
|
def test_from_response_invalid_nr_index_clickdata(self):
|
||||||
response = _buildresponse(
|
response = _buildresponse(
|
||||||
@ -490,7 +646,7 @@ class FormRequestTest(RequestTest):
|
|||||||
"""</form></body></html>""")
|
"""</form></body></html>""")
|
||||||
req = self.request_class.from_response(response, formdata={'bar': 'buz'})
|
req = self.request_class.from_response(response, formdata={'bar': 'buz'})
|
||||||
fs = _qs(req)
|
fs = _qs(req)
|
||||||
self.assertEqual(fs, {'foo': ['xxx'], 'bar': ['buz']})
|
self.assertEqual(fs, {b'foo': [b'xxx'], b'bar': [b'buz']})
|
||||||
|
|
||||||
def test_from_response_errors_formnumber(self):
|
def test_from_response_errors_formnumber(self):
|
||||||
response = _buildresponse(
|
response = _buildresponse(
|
||||||
@ -634,7 +790,7 @@ class FormRequestTest(RequestTest):
|
|||||||
<select name="i7"/>
|
<select name="i7"/>
|
||||||
</form>''')
|
</form>''')
|
||||||
req = self.request_class.from_response(res)
|
req = self.request_class.from_response(res)
|
||||||
fs = _qs(req)
|
fs = _qs(req, to_unicode=True)
|
||||||
self.assertEqual(fs, {'i1': ['i1v2'], 'i2': ['i2v1'], 'i4': ['i4v2', 'i4v3']})
|
self.assertEqual(fs, {'i1': ['i1v2'], 'i2': ['i2v1'], 'i4': ['i4v2', 'i4v3']})
|
||||||
|
|
||||||
def test_from_response_radio(self):
|
def test_from_response_radio(self):
|
||||||
@ -651,7 +807,7 @@ class FormRequestTest(RequestTest):
|
|||||||
</form>''')
|
</form>''')
|
||||||
req = self.request_class.from_response(res)
|
req = self.request_class.from_response(res)
|
||||||
fs = _qs(req)
|
fs = _qs(req)
|
||||||
self.assertEqual(fs, {'i1': ['iv2'], 'i2': ['on']})
|
self.assertEqual(fs, {b'i1': [b'iv2'], b'i2': [b'on']})
|
||||||
|
|
||||||
def test_from_response_checkbox(self):
|
def test_from_response_checkbox(self):
|
||||||
res = _buildresponse(
|
res = _buildresponse(
|
||||||
@ -667,7 +823,7 @@ class FormRequestTest(RequestTest):
|
|||||||
</form>''')
|
</form>''')
|
||||||
req = self.request_class.from_response(res)
|
req = self.request_class.from_response(res)
|
||||||
fs = _qs(req)
|
fs = _qs(req)
|
||||||
self.assertEqual(fs, {'i1': ['iv2'], 'i2': ['on']})
|
self.assertEqual(fs, {b'i1': [b'iv2'], b'i2': [b'on']})
|
||||||
|
|
||||||
def test_from_response_input_text(self):
|
def test_from_response_input_text(self):
|
||||||
res = _buildresponse(
|
res = _buildresponse(
|
||||||
@ -680,7 +836,7 @@ class FormRequestTest(RequestTest):
|
|||||||
</form>''')
|
</form>''')
|
||||||
req = self.request_class.from_response(res)
|
req = self.request_class.from_response(res)
|
||||||
fs = _qs(req)
|
fs = _qs(req)
|
||||||
self.assertEqual(fs, {'i1': ['i1v1'], 'i2': [''], 'i4': ['i4v1']})
|
self.assertEqual(fs, {b'i1': [b'i1v1'], b'i2': [b''], b'i4': [b'i4v1']})
|
||||||
|
|
||||||
def test_from_response_input_hidden(self):
|
def test_from_response_input_hidden(self):
|
||||||
res = _buildresponse(
|
res = _buildresponse(
|
||||||
@ -692,7 +848,7 @@ class FormRequestTest(RequestTest):
|
|||||||
</form>''')
|
</form>''')
|
||||||
req = self.request_class.from_response(res)
|
req = self.request_class.from_response(res)
|
||||||
fs = _qs(req)
|
fs = _qs(req)
|
||||||
self.assertEqual(fs, {'i1': ['i1v1'], 'i2': ['']})
|
self.assertEqual(fs, {b'i1': [b'i1v1'], b'i2': [b'']})
|
||||||
|
|
||||||
def test_from_response_input_textarea(self):
|
def test_from_response_input_textarea(self):
|
||||||
res = _buildresponse(
|
res = _buildresponse(
|
||||||
@ -704,7 +860,7 @@ class FormRequestTest(RequestTest):
|
|||||||
</form>''')
|
</form>''')
|
||||||
req = self.request_class.from_response(res)
|
req = self.request_class.from_response(res)
|
||||||
fs = _qs(req)
|
fs = _qs(req)
|
||||||
self.assertEqual(fs, {'i1': ['i1v'], 'i2': [''], 'i3': ['']})
|
self.assertEqual(fs, {b'i1': [b'i1v'], b'i2': [b''], b'i3': [b'']})
|
||||||
|
|
||||||
def test_from_response_descendants(self):
|
def test_from_response_descendants(self):
|
||||||
res = _buildresponse(
|
res = _buildresponse(
|
||||||
@ -725,7 +881,7 @@ class FormRequestTest(RequestTest):
|
|||||||
</form>''')
|
</form>''')
|
||||||
req = self.request_class.from_response(res)
|
req = self.request_class.from_response(res)
|
||||||
fs = _qs(req)
|
fs = _qs(req)
|
||||||
self.assertEqual(set(fs), set(['h2', 'i2', 'i1', 'i3', 'h1', 'i5', 'i4']))
|
self.assertEqual(set(fs), set([b'h2', b'i2', b'i1', b'i3', b'h1', b'i5', b'i4']))
|
||||||
|
|
||||||
def test_from_response_xpath(self):
|
def test_from_response_xpath(self):
|
||||||
response = _buildresponse(
|
response = _buildresponse(
|
||||||
@ -873,12 +1029,18 @@ def _buildresponse(body, **kwargs):
|
|||||||
kwargs.setdefault('encoding', 'utf-8')
|
kwargs.setdefault('encoding', 'utf-8')
|
||||||
return HtmlResponse(**kwargs)
|
return HtmlResponse(**kwargs)
|
||||||
|
|
||||||
def _qs(req):
|
def _qs(req, encoding='utf-8', to_unicode=False):
|
||||||
if req.method == 'POST':
|
if req.method == 'POST':
|
||||||
qs = req.body
|
qs = req.body
|
||||||
else:
|
else:
|
||||||
qs = req.url.partition('?')[2]
|
qs = req.url.partition('?')[2]
|
||||||
return cgi.parse_qs(qs, True)
|
if six.PY2:
|
||||||
|
uqs = unquote(to_native_str(qs, encoding))
|
||||||
|
elif six.PY3:
|
||||||
|
uqs = unquote_to_bytes(qs)
|
||||||
|
if to_unicode:
|
||||||
|
uqs = uqs.decode(encoding)
|
||||||
|
return parse_qs(uqs, True)
|
||||||
|
|
||||||
|
|
||||||
class XmlRpcRequestTest(RequestTest):
|
class XmlRpcRequestTest(RequestTest):
|
||||||
@ -890,7 +1052,9 @@ class XmlRpcRequestTest(RequestTest):
|
|||||||
def _test_request(self, **kwargs):
|
def _test_request(self, **kwargs):
|
||||||
r = self.request_class('http://scrapytest.org/rpc2', **kwargs)
|
r = self.request_class('http://scrapytest.org/rpc2', **kwargs)
|
||||||
self.assertEqual(r.headers[b'Content-Type'], b'text/xml')
|
self.assertEqual(r.headers[b'Content-Type'], b'text/xml')
|
||||||
self.assertEqual(r.body, to_bytes(xmlrpclib.dumps(**kwargs)))
|
self.assertEqual(r.body,
|
||||||
|
to_bytes(xmlrpclib.dumps(**kwargs),
|
||||||
|
encoding=kwargs.get('encoding', 'utf-8')))
|
||||||
self.assertEqual(r.method, 'POST')
|
self.assertEqual(r.method, 'POST')
|
||||||
self.assertEqual(r.encoding, kwargs.get('encoding', 'utf-8'))
|
self.assertEqual(r.encoding, kwargs.get('encoding', 'utf-8'))
|
||||||
self.assertTrue(r.dont_filter, True)
|
self.assertTrue(r.dont_filter, True)
|
||||||
@ -899,14 +1063,13 @@ class XmlRpcRequestTest(RequestTest):
|
|||||||
self._test_request(params=('value',))
|
self._test_request(params=('value',))
|
||||||
self._test_request(params=('username', 'password'), methodname='login')
|
self._test_request(params=('username', 'password'), methodname='login')
|
||||||
self._test_request(params=('response', ), methodresponse='login')
|
self._test_request(params=('response', ), methodresponse='login')
|
||||||
self._test_request(params=(u'pas\xa3',), encoding='utf-8')
|
self._test_request(params=(u'pas£',), encoding='utf-8')
|
||||||
self._test_request(params=(None,), allow_none=1)
|
self._test_request(params=(None,), allow_none=1)
|
||||||
self.assertRaises(TypeError, self._test_request)
|
self.assertRaises(TypeError, self._test_request)
|
||||||
self.assertRaises(TypeError, self._test_request, params=(None,))
|
self.assertRaises(TypeError, self._test_request, params=(None,))
|
||||||
|
|
||||||
@unittest.skipUnless(six.PY2, "TODO")
|
|
||||||
def test_latin1(self):
|
def test_latin1(self):
|
||||||
self._test_request(params=(u'pas\xa3',), encoding='latin')
|
self._test_request(params=(u'pas£',), encoding='latin1')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
Loading…
x
Reference in New Issue
Block a user