1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-22 06:13:24 +00:00
scrapy/tests/test_utils_request.py

88 lines
4.2 KiB
Python

import unittest
from scrapy.http import Request
from scrapy.utils.request import request_fingerprint, _fingerprint_cache, \
request_authenticate, request_httprepr
class UtilsRequestTest(unittest.TestCase):
def test_request_fingerprint(self):
r1 = Request("http://www.example.com/query?id=111&cat=222")
r2 = Request("http://www.example.com/query?cat=222&id=111")
self.assertEqual(request_fingerprint(r1), request_fingerprint(r1))
self.assertEqual(request_fingerprint(r1), request_fingerprint(r2))
r1 = Request('http://www.example.com/hnnoticiaj1.aspx?78132,199')
r2 = Request('http://www.example.com/hnnoticiaj1.aspx?78160,199')
self.assertNotEqual(request_fingerprint(r1), request_fingerprint(r2))
# make sure caching is working
self.assertEqual(request_fingerprint(r1), _fingerprint_cache[r1][(None, False)])
r1 = Request("http://www.example.com/members/offers.html")
r2 = Request("http://www.example.com/members/offers.html")
r2.headers['SESSIONID'] = b"somehash"
self.assertEqual(request_fingerprint(r1), request_fingerprint(r2))
r1 = Request("http://www.example.com/")
r2 = Request("http://www.example.com/")
r2.headers['Accept-Language'] = b'en'
r3 = Request("http://www.example.com/")
r3.headers['Accept-Language'] = b'en'
r3.headers['SESSIONID'] = b"somehash"
self.assertEqual(request_fingerprint(r1), request_fingerprint(r2), request_fingerprint(r3))
self.assertEqual(request_fingerprint(r1),
request_fingerprint(r1, include_headers=['Accept-Language']))
self.assertNotEqual(request_fingerprint(r1),
request_fingerprint(r2, include_headers=['Accept-Language']))
self.assertEqual(request_fingerprint(r3, include_headers=['accept-language', 'sessionid']),
request_fingerprint(r3, include_headers=['SESSIONID', 'Accept-Language']))
r1 = Request("http://www.example.com/test.html")
r2 = Request("http://www.example.com/test.html#fragment")
self.assertEqual(request_fingerprint(r1), request_fingerprint(r2))
self.assertEqual(request_fingerprint(r1), request_fingerprint(r1, keep_fragments=True))
self.assertNotEqual(request_fingerprint(r2), request_fingerprint(r2, keep_fragments=True))
self.assertNotEqual(request_fingerprint(r1), request_fingerprint(r2, keep_fragments=True))
r1 = Request("http://www.example.com")
r2 = Request("http://www.example.com", method='POST')
r3 = Request("http://www.example.com", method='POST', body=b'request body')
self.assertNotEqual(request_fingerprint(r1), request_fingerprint(r2))
self.assertNotEqual(request_fingerprint(r2), request_fingerprint(r3))
# cached fingerprint must be cleared on request copy
r1 = Request("http://www.example.com")
fp1 = request_fingerprint(r1)
r2 = r1.replace(url="http://www.example.com/other")
fp2 = request_fingerprint(r2)
self.assertNotEqual(fp1, fp2)
def test_request_authenticate(self):
r = Request("http://www.example.com")
request_authenticate(r, 'someuser', 'somepass')
self.assertEqual(r.headers['Authorization'], b'Basic c29tZXVzZXI6c29tZXBhc3M=')
def test_request_httprepr(self):
r1 = Request("http://www.example.com")
self.assertEqual(request_httprepr(r1), b'GET / HTTP/1.1\r\nHost: www.example.com\r\n\r\n')
r1 = Request("http://www.example.com/some/page.html?arg=1")
self.assertEqual(request_httprepr(r1), b'GET /some/page.html?arg=1 HTTP/1.1\r\nHost: www.example.com\r\n\r\n')
r1 = Request("http://www.example.com", method='POST', headers={"Content-type": b"text/html"}, body=b"Some body")
self.assertEqual(request_httprepr(r1), b'POST / HTTP/1.1\r\nHost: www.example.com\r\nContent-Type: text/html\r\n\r\nSome body')
def test_request_httprepr_for_non_http_request(self):
# the representation is not important but it must not fail.
request_httprepr(Request("file:///tmp/foo.txt"))
request_httprepr(Request("ftp://localhost/tmp/foo.txt"))
if __name__ == "__main__":
unittest.main()