from __future__ import print_function import unittest from scrapy.http import Request from scrapy.utils.request import request_fingerprint, _fingerprint_cache, \ request_authenticate, request_httprepr class UtilsRequestTest(unittest.TestCase): def test_request_fingerprint(self): r1 = Request("http://www.example.com/query?id=111&cat=222") r2 = Request("http://www.example.com/query?cat=222&id=111") self.assertEqual(request_fingerprint(r1), request_fingerprint(r1)) self.assertEqual(request_fingerprint(r1), request_fingerprint(r2)) r1 = Request('http://www.example.com/hnnoticiaj1.aspx?78132,199') r2 = Request('http://www.example.com/hnnoticiaj1.aspx?78160,199') self.assertNotEqual(request_fingerprint(r1), request_fingerprint(r2)) # make sure caching is working self.assertEqual(request_fingerprint(r1), _fingerprint_cache[r1][None]) r1 = Request("http://www.example.com/members/offers.html") r2 = Request("http://www.example.com/members/offers.html") r2.headers['SESSIONID'] = b"somehash" self.assertEqual(request_fingerprint(r1), request_fingerprint(r2)) r1 = Request("http://www.example.com/") r2 = Request("http://www.example.com/") r2.headers['Accept-Language'] = b'en' r3 = Request("http://www.example.com/") r3.headers['Accept-Language'] = b'en' r3.headers['SESSIONID'] = b"somehash" self.assertEqual(request_fingerprint(r1), request_fingerprint(r2), request_fingerprint(r3)) self.assertEqual(request_fingerprint(r1), request_fingerprint(r1, include_headers=['Accept-Language'])) self.assertNotEqual(request_fingerprint(r1), request_fingerprint(r2, include_headers=['Accept-Language'])) self.assertEqual(request_fingerprint(r3, include_headers=['accept-language', 'sessionid']), request_fingerprint(r3, include_headers=['SESSIONID', 'Accept-Language'])) r1 = Request("http://www.example.com") r2 = Request("http://www.example.com", method='POST') r3 = Request("http://www.example.com", method='POST', body=b'request body') self.assertNotEqual(request_fingerprint(r1), request_fingerprint(r2)) self.assertNotEqual(request_fingerprint(r2), request_fingerprint(r3)) # cached fingerprint must be cleared on request copy r1 = Request("http://www.example.com") fp1 = request_fingerprint(r1) r2 = r1.replace(url="http://www.example.com/other") fp2 = request_fingerprint(r2) self.assertNotEqual(fp1, fp2) def test_request_authenticate(self): r = Request("http://www.example.com") request_authenticate(r, 'someuser', 'somepass') self.assertEqual(r.headers['Authorization'], b'Basic c29tZXVzZXI6c29tZXBhc3M=') def test_request_httprepr(self): r1 = Request("http://www.example.com") self.assertEqual(request_httprepr(r1), b'GET / HTTP/1.1\r\nHost: www.example.com\r\n\r\n') r1 = Request("http://www.example.com/some/page.html?arg=1") self.assertEqual(request_httprepr(r1), b'GET /some/page.html?arg=1 HTTP/1.1\r\nHost: www.example.com\r\n\r\n') r1 = Request("http://www.example.com", method='POST', headers={"Content-type": b"text/html"}, body=b"Some body") self.assertEqual(request_httprepr(r1), b'POST / HTTP/1.1\r\nHost: www.example.com\r\nContent-Type: text/html\r\n\r\nSome body') def test_request_httprepr_for_non_http_request(self): # the representation is not important but it must not fail. request_httprepr(Request("file:///tmp/foo.txt")) request_httprepr(Request("ftp://localhost/tmp/foo.txt")) if __name__ == "__main__": unittest.main()