2008-06-28 23:37:28 +00:00
|
|
|
import unittest
|
2009-04-12 08:31:55 +00:00
|
|
|
import cgi
|
2009-08-18 20:40:04 -03:00
|
|
|
import weakref
|
2009-08-17 21:16:55 -03:00
|
|
|
from cStringIO import StringIO
|
|
|
|
from urlparse import urlparse
|
2009-04-12 08:31:55 +00:00
|
|
|
|
2009-08-17 21:16:55 -03:00
|
|
|
from scrapy.http import Request, FormRequest, XmlRpcRequest, Headers, Response
|
2008-06-28 23:37:28 +00:00
|
|
|
|
|
|
|
class RequestTest(unittest.TestCase):
|
|
|
|
|
2009-01-17 23:57:53 +00:00
|
|
|
def test_init(self):
|
2009-01-18 16:36:17 +00:00
|
|
|
# Request requires url in the constructor
|
|
|
|
self.assertRaises(Exception, Request)
|
|
|
|
|
2009-08-17 21:16:55 -03:00
|
|
|
# url argument must be basestring
|
2009-03-24 20:02:42 +00:00
|
|
|
self.assertRaises(TypeError, Request, 123)
|
2009-08-17 21:16:55 -03:00
|
|
|
r = Request('http://www.example.com')
|
2009-03-24 20:02:42 +00:00
|
|
|
|
2009-01-17 23:57:53 +00:00
|
|
|
r = Request("http://www.example.com")
|
2009-08-17 21:16:55 -03:00
|
|
|
assert isinstance(r.url, str)
|
2009-01-17 23:57:53 +00:00
|
|
|
self.assertEqual(r.url, "http://www.example.com")
|
|
|
|
self.assertEqual(r.method, "GET")
|
|
|
|
|
2009-01-18 19:20:32 +00:00
|
|
|
r.url = "http://www.example.com/other"
|
2009-08-17 21:16:55 -03:00
|
|
|
assert isinstance(r.url, str)
|
2009-01-18 19:20:32 +00:00
|
|
|
|
2009-01-17 23:57:53 +00:00
|
|
|
assert isinstance(r.headers, Headers)
|
|
|
|
self.assertEqual(r.headers, {})
|
|
|
|
self.assertEqual(r.meta, {})
|
|
|
|
|
|
|
|
meta = {"lala": "lolo"}
|
|
|
|
headers = {"caca": "coco"}
|
|
|
|
r = Request("http://www.example.com", meta=meta, headers=headers, body="a body")
|
|
|
|
|
|
|
|
assert r.meta is not meta
|
|
|
|
self.assertEqual(r.meta, meta)
|
|
|
|
assert r.headers is not headers
|
|
|
|
self.assertEqual(r.headers["caca"], "coco")
|
|
|
|
|
2008-06-28 23:37:28 +00:00
|
|
|
def test_headers(self):
|
|
|
|
# Different ways of setting headers attribute
|
|
|
|
url = 'http://www.scrapy.org'
|
|
|
|
headers = {'Accept':'gzip', 'Custom-Header':'nothing to tell you'}
|
|
|
|
r = Request(url=url, headers=headers)
|
|
|
|
p = Request(url=url, headers=r.headers)
|
|
|
|
|
|
|
|
self.assertEqual(r.headers, p.headers)
|
|
|
|
self.assertFalse(r.headers is headers)
|
|
|
|
self.assertFalse(p.headers is r.headers)
|
|
|
|
|
|
|
|
# headers must not be unicode
|
|
|
|
h = Headers({'key1': u'val1', u'key2': 'val2'})
|
|
|
|
h[u'newkey'] = u'newval'
|
2009-01-14 01:17:40 +00:00
|
|
|
for k, v in h.iteritems():
|
2008-06-28 23:37:28 +00:00
|
|
|
self.assert_(isinstance(k, str))
|
2009-04-22 17:21:46 +00:00
|
|
|
for s in v:
|
|
|
|
self.assert_(isinstance(s, str))
|
2008-06-28 23:37:28 +00:00
|
|
|
|
|
|
|
def test_eq(self):
|
|
|
|
url = 'http://www.scrapy.org'
|
|
|
|
r1 = Request(url=url)
|
|
|
|
r2 = Request(url=url)
|
|
|
|
self.assertNotEqual(r1, r2)
|
|
|
|
|
|
|
|
set_ = set()
|
|
|
|
set_.add(r1)
|
|
|
|
set_.add(r2)
|
|
|
|
self.assertEqual(len(set_), 2)
|
|
|
|
|
|
|
|
def test_url(self):
|
|
|
|
"""Request url tests"""
|
|
|
|
r = Request(url="http://www.scrapy.org/path")
|
|
|
|
self.assertEqual(r.url, "http://www.scrapy.org/path")
|
|
|
|
|
|
|
|
# url quoting on attribute assign
|
|
|
|
r.url = "http://www.scrapy.org/blank%20space"
|
|
|
|
self.assertEqual(r.url, "http://www.scrapy.org/blank%20space")
|
|
|
|
r.url = "http://www.scrapy.org/blank space"
|
|
|
|
self.assertEqual(r.url, "http://www.scrapy.org/blank%20space")
|
|
|
|
|
|
|
|
# url quoting on creation
|
|
|
|
r = Request(url="http://www.scrapy.org/blank%20space")
|
|
|
|
self.assertEqual(r.url, "http://www.scrapy.org/blank%20space")
|
|
|
|
r = Request(url="http://www.scrapy.org/blank space")
|
|
|
|
self.assertEqual(r.url, "http://www.scrapy.org/blank%20space")
|
|
|
|
|
|
|
|
# url coercion to string
|
|
|
|
r.url = u"http://www.scrapy.org/test"
|
|
|
|
self.assert_(isinstance(r.url, str))
|
|
|
|
|
|
|
|
# url encoding
|
2009-01-20 21:10:18 +00:00
|
|
|
r1 = Request(url=u"http://www.scrapy.org/price/\xa3", encoding="utf-8")
|
|
|
|
r2 = Request(url=u"http://www.scrapy.org/price/\xa3", encoding="latin1")
|
|
|
|
self.assertEqual(r1.url, "http://www.scrapy.org/price/%C2%A3")
|
|
|
|
self.assertEqual(r2.url, "http://www.scrapy.org/price/%A3")
|
|
|
|
|
|
|
|
def test_body(self):
|
|
|
|
r1 = Request(url="http://www.example.com/")
|
2009-01-26 02:57:03 +00:00
|
|
|
assert r1.body == ''
|
2009-01-20 21:10:18 +00:00
|
|
|
|
|
|
|
r2 = Request(url="http://www.example.com/", body="")
|
|
|
|
assert isinstance(r2.body, str)
|
|
|
|
self.assertEqual(r2.encoding, 'utf-8') # default encoding
|
|
|
|
|
|
|
|
r3 = Request(url="http://www.example.com/", body=u"Price: \xa3100", encoding='utf-8')
|
|
|
|
assert isinstance(r3.body, str)
|
|
|
|
self.assertEqual(r3.body, "Price: \xc2\xa3100")
|
|
|
|
|
|
|
|
r4 = Request(url="http://www.example.com/", body=u"Price: \xa3100", encoding='latin1')
|
|
|
|
assert isinstance(r4.body, str)
|
|
|
|
self.assertEqual(r4.body, "Price: \xa3100")
|
2008-06-28 23:37:28 +00:00
|
|
|
|
2009-01-15 03:24:48 +00:00
|
|
|
def test_copy(self):
|
|
|
|
"""Test Request copy"""
|
|
|
|
|
2009-01-18 17:52:21 +00:00
|
|
|
def somecallback():
|
|
|
|
pass
|
|
|
|
|
|
|
|
r1 = Request("http://www.example.com", callback=somecallback)
|
2009-01-15 03:24:48 +00:00
|
|
|
r1.meta['foo'] = 'bar'
|
|
|
|
r2 = r1.copy()
|
|
|
|
|
2009-01-18 17:52:21 +00:00
|
|
|
assert r1.deferred is not r2.deferred
|
|
|
|
|
2009-01-15 03:24:48 +00:00
|
|
|
# make sure meta dict is shallow copied
|
|
|
|
assert r1.meta is not r2.meta, "meta must be a shallow copy, not identical"
|
|
|
|
self.assertEqual(r1.meta, r2.meta)
|
|
|
|
|
2009-01-18 17:52:21 +00:00
|
|
|
# make sure headers attribute is shallow copied
|
|
|
|
assert r1.headers is not r2.headers, "headers must be a shallow copy, not identical"
|
|
|
|
self.assertEqual(r1.headers, r2.headers)
|
2009-03-24 20:02:42 +00:00
|
|
|
self.assertEqual(r1.encoding, r2.encoding)
|
|
|
|
self.assertEqual(r1.dont_filter, r2.dont_filter)
|
2009-01-18 17:52:21 +00:00
|
|
|
|
|
|
|
# Request.body can be identical since it's an immutable object (str)
|
|
|
|
|
2009-01-17 20:40:07 +00:00
|
|
|
def test_copy_inherited_classes(self):
|
|
|
|
"""Test Request children copies preserve their class"""
|
|
|
|
|
|
|
|
class CustomRequest(Request):
|
|
|
|
pass
|
|
|
|
|
|
|
|
r1 = CustomRequest('example.com', 'http://www.example.com')
|
|
|
|
r2 = r1.copy()
|
|
|
|
|
|
|
|
assert type(r2) is CustomRequest
|
|
|
|
|
2009-01-18 17:52:21 +00:00
|
|
|
def test_replace(self):
|
|
|
|
"""Test Request.replace() method"""
|
|
|
|
hdrs = Headers({"key": "value"})
|
|
|
|
r1 = Request("http://www.example.com")
|
|
|
|
r2 = r1.replace(method="POST", body="New body", headers=hdrs)
|
|
|
|
self.assertEqual(r1.url, r2.url)
|
|
|
|
self.assertEqual((r1.method, r2.method), ("GET", "POST"))
|
2009-01-26 02:57:03 +00:00
|
|
|
self.assertEqual((r1.body, r2.body), ('', "New body"))
|
2009-01-18 17:52:21 +00:00
|
|
|
self.assertEqual((r1.headers, r2.headers), ({}, hdrs))
|
|
|
|
|
2009-03-24 20:02:42 +00:00
|
|
|
# Empty attributes (which may fail if not compared properly)
|
|
|
|
r3 = Request("http://www.example.com", meta={'a': 1}, dont_filter=True)
|
2009-03-24 21:03:38 +00:00
|
|
|
r4 = r3.replace(url="http://www.example.com/2", body='', meta={}, dont_filter=False)
|
|
|
|
self.assertEqual(r4.url, "http://www.example.com/2")
|
2009-03-24 20:02:42 +00:00
|
|
|
self.assertEqual(r4.body, '')
|
|
|
|
self.assertEqual(r4.meta, {})
|
|
|
|
assert r4.dont_filter is False
|
|
|
|
|
2009-08-18 20:40:04 -03:00
|
|
|
def test_weakref_slots(self):
|
|
|
|
"""Check that classes are using slots and are weak-referenceable"""
|
|
|
|
for cls in [Request, FormRequest]:
|
|
|
|
x = cls('http://www.example.com')
|
|
|
|
weakref.ref(x)
|
|
|
|
assert not hasattr(x, '__dict__'), "%s does not use __slots__" % \
|
|
|
|
x.__class__.__name__
|
|
|
|
|
2009-01-26 02:57:03 +00:00
|
|
|
|
2009-03-03 00:37:06 +00:00
|
|
|
class FormRequestTest(unittest.TestCase):
|
|
|
|
|
|
|
|
def test_empty_formdata(self):
|
2009-01-26 02:57:03 +00:00
|
|
|
r1 = FormRequest("http://www.example.com", formdata={})
|
|
|
|
self.assertEqual(r1.body, '')
|
|
|
|
|
2009-03-03 00:37:06 +00:00
|
|
|
def test_default_encoding(self):
|
2009-01-26 02:57:03 +00:00
|
|
|
# using default encoding (utf-8)
|
|
|
|
data = {'one': 'two', 'price': '\xc2\xa3 100'}
|
|
|
|
r2 = FormRequest("http://www.example.com", formdata=data)
|
2009-05-07 00:36:39 -03:00
|
|
|
self.assertEqual(r2.method, 'POST')
|
2009-01-26 02:57:03 +00:00
|
|
|
self.assertEqual(r2.encoding, 'utf-8')
|
|
|
|
self.assertEqual(r2.body, 'price=%C2%A3+100&one=two')
|
|
|
|
self.assertEqual(r2.headers['Content-Type'], 'application/x-www-form-urlencoded')
|
|
|
|
|
2009-03-03 00:37:06 +00:00
|
|
|
def test_custom_encoding(self):
|
2009-01-26 02:57:03 +00:00
|
|
|
data = {'price': u'\xa3 100'}
|
|
|
|
r3 = FormRequest("http://www.example.com", formdata=data, encoding='latin1')
|
|
|
|
self.assertEqual(r3.encoding, 'latin1')
|
|
|
|
self.assertEqual(r3.body, 'price=%A3+100')
|
2009-01-17 20:40:07 +00:00
|
|
|
|
2009-03-03 00:37:06 +00:00
|
|
|
def test_multi_key_values(self):
|
2009-02-05 13:41:10 +00:00
|
|
|
# using multiples values for a single key
|
|
|
|
data = {'price': u'\xa3 100', 'colours': ['red', 'blue', 'green']}
|
|
|
|
r3 = FormRequest("http://www.example.com", formdata=data)
|
|
|
|
self.assertEqual(r3.body, 'colours=red&colours=blue&colours=green&price=%C2%A3+100')
|
|
|
|
|
2009-04-12 08:31:55 +00:00
|
|
|
def test_from_response_post(self):
|
|
|
|
respbody = """
|
|
|
|
<form action="post.php" method="POST">
|
|
|
|
<input type="hidden" name="test" value="val1">
|
|
|
|
<input type="hidden" name="test" value="val2">
|
|
|
|
<input type="hidden" name="test2" value="xxx">
|
|
|
|
</form>
|
|
|
|
"""
|
|
|
|
response = Response("http://www.example.com/this/list.html", body=respbody)
|
|
|
|
r1 = FormRequest.from_response(response, formdata={'one': ['two', 'three'], 'six': 'seven'}, callback=lambda x: x)
|
2009-04-20 02:29:19 +00:00
|
|
|
self.assertEqual(r1.method, 'POST')
|
|
|
|
self.assertEqual(r1.headers['Content-type'], 'application/x-www-form-urlencoded')
|
2009-04-12 08:31:55 +00:00
|
|
|
fs = cgi.FieldStorage(StringIO(r1.body), r1.headers, environ={"REQUEST_METHOD": "POST"})
|
|
|
|
self.assertEqual(r1.url, "http://www.example.com/this/post.php")
|
|
|
|
self.assertEqual(set([f.value for f in fs["test"]]), set(["val1", "val2"]))
|
|
|
|
self.assertEqual(set([f.value for f in fs["one"]]), set(["two", "three"]))
|
|
|
|
self.assertEqual(fs['test2'].value, 'xxx')
|
|
|
|
self.assertEqual(fs['six'].value, 'seven')
|
|
|
|
|
|
|
|
def test_from_response_get(self):
|
|
|
|
respbody = """
|
|
|
|
<form action="get.php" method="GET">
|
|
|
|
<input type="hidden" name="test" value="val1">
|
|
|
|
<input type="hidden" name="test" value="val2">
|
|
|
|
<input type="hidden" name="test2" value="xxx">
|
|
|
|
</form>
|
|
|
|
"""
|
|
|
|
response = Response("http://www.example.com/this/list.html", body=respbody)
|
|
|
|
r1 = FormRequest.from_response(response, formdata={'one': ['two', 'three'], 'six': 'seven'})
|
2009-04-20 02:29:19 +00:00
|
|
|
self.assertEqual(r1.method, 'GET')
|
2009-08-17 21:16:55 -03:00
|
|
|
self.assertEqual(urlparse(r1.url).hostname, "www.example.com")
|
|
|
|
self.assertEqual(urlparse(r1.url).path, "/this/get.php")
|
|
|
|
urlargs = cgi.parse_qs(urlparse(r1.url).query)
|
2009-04-12 08:31:55 +00:00
|
|
|
self.assertEqual(set(urlargs['test']), set(['val1', 'val2']))
|
|
|
|
self.assertEqual(set(urlargs['one']), set(['two', 'three']))
|
|
|
|
self.assertEqual(urlargs['test2'], ['xxx'])
|
|
|
|
self.assertEqual(urlargs['six'], ['seven'])
|
|
|
|
|
2009-04-20 02:29:19 +00:00
|
|
|
def test_from_response_override_params(self):
|
|
|
|
respbody = """
|
|
|
|
<form action="get.php" method="POST">
|
|
|
|
<input type="hidden" name="one" value="1">
|
|
|
|
<input type="hidden" name="two" value="3">
|
|
|
|
</form>
|
|
|
|
"""
|
|
|
|
response = Response("http://www.example.com/this/list.html", body=respbody)
|
|
|
|
r1 = FormRequest.from_response(response, formdata={'two': '2'})
|
|
|
|
fs = cgi.FieldStorage(StringIO(r1.body), r1.headers, environ={"REQUEST_METHOD": "POST"})
|
|
|
|
self.assertEqual(fs['one'].value, '1')
|
|
|
|
self.assertEqual(fs['two'].value, '2')
|
|
|
|
|
2009-04-12 09:16:31 +00:00
|
|
|
def test_from_response_errors_noform(self):
|
|
|
|
respbody = """<html></html>"""
|
|
|
|
response = Response("http://www.example.com/lala.html", body=respbody)
|
|
|
|
self.assertRaises(ValueError, FormRequest.from_response, response)
|
|
|
|
|
|
|
|
def test_from_response_errors_formnumber(self):
|
|
|
|
respbody = """
|
|
|
|
<form action="get.php" method="GET">
|
|
|
|
<input type="hidden" name="test" value="val1">
|
|
|
|
<input type="hidden" name="test" value="val2">
|
|
|
|
<input type="hidden" name="test2" value="xxx">
|
|
|
|
</form>
|
|
|
|
"""
|
|
|
|
response = Response("http://www.example.com/lala.html", body=respbody)
|
|
|
|
self.assertRaises(IndexError, FormRequest.from_response, response, formnumber=1)
|
2009-03-03 00:37:06 +00:00
|
|
|
|
|
|
|
class XmlRpcRequestTest(unittest.TestCase):
|
|
|
|
|
|
|
|
def test_basic(self):
|
|
|
|
r = XmlRpcRequest('http://scrapytest.org/rpc2', methodname='login', params=('username', 'password'))
|
|
|
|
self.assertEqual(r.headers['Content-Type'], 'text/xml')
|
|
|
|
self.assertEqual(r.body, "<?xml version='1.0'?>\n<methodCall>\n<methodName>login</methodName>\n<params>\n<param>\n<value><string>username</string></value>\n</param>\n<param>\n<value><string>password</string></value>\n</param>\n</params>\n</methodCall>\n")
|
2009-03-03 05:37:41 +00:00
|
|
|
self.assertEqual(r.method, 'POST')
|
|
|
|
self.assertTrue(r.dont_filter, True)
|
2009-03-03 00:37:06 +00:00
|
|
|
|
2009-04-16 18:23:12 +00:00
|
|
|
def test_copy(self):
|
|
|
|
"""Test XmlRpcRequest copy"""
|
|
|
|
|
|
|
|
def somecallback():
|
|
|
|
pass
|
|
|
|
|
|
|
|
r1 = XmlRpcRequest("http://www.example.com", callback=somecallback,
|
|
|
|
methodname='login', params=('username', 'password'))
|
|
|
|
r1.meta['foo'] = 'bar'
|
|
|
|
r2 = r1.copy()
|
|
|
|
|
|
|
|
assert r1.deferred is not r2.deferred
|
|
|
|
|
|
|
|
# make sure meta dict is shallow copied
|
|
|
|
assert r1.meta is not r2.meta, "meta must be a shallow copy, not identical"
|
|
|
|
self.assertEqual(r1.meta, r2.meta)
|
|
|
|
|
|
|
|
# make sure headers attribute is shallow copied
|
|
|
|
assert r1.headers is not r2.headers, "headers must be a shallow copy, not identical"
|
|
|
|
self.assertEqual(r1.headers, r2.headers)
|
|
|
|
self.assertEqual(r1.encoding, r2.encoding)
|
|
|
|
self.assertEqual(r1.dont_filter, r2.dont_filter)
|
|
|
|
self.assertEqual(r1.body, r2.body)
|
|
|
|
|
|
|
|
|
2008-06-28 23:37:28 +00:00
|
|
|
if __name__ == "__main__":
|
|
|
|
unittest.main()
|