2009-04-21 13:33:42 +00:00
|
|
|
from unittest import TestCase
|
2014-01-30 15:16:31 +01:00
|
|
|
import re
|
2009-04-21 13:33:42 +00:00
|
|
|
|
|
|
|
from scrapy.http import Response, Request
|
2013-12-28 00:47:32 +06:00
|
|
|
from scrapy.spider import Spider
|
2015-04-20 21:23:05 -03:00
|
|
|
from scrapy.downloadermiddlewares.cookies import CookiesMiddleware
|
2009-04-21 13:33:42 +00:00
|
|
|
|
|
|
|
|
|
|
|
class CookiesMiddlewareTest(TestCase):
|
|
|
|
|
2014-01-30 15:16:31 +01:00
|
|
|
def assertCookieValEqual(self, first, second, msg=None):
|
|
|
|
cookievaleq = lambda cv: re.split(';\s*', cv)
|
|
|
|
return self.assertEqual(
|
|
|
|
sorted(cookievaleq(first)),
|
|
|
|
sorted(cookievaleq(second)), msg)
|
|
|
|
|
2009-04-21 13:33:42 +00:00
|
|
|
def setUp(self):
|
2013-12-28 00:47:32 +06:00
|
|
|
self.spider = Spider('foo')
|
2009-04-21 13:33:42 +00:00
|
|
|
self.mw = CookiesMiddleware()
|
|
|
|
|
|
|
|
def tearDown(self):
|
|
|
|
del self.mw
|
|
|
|
|
|
|
|
def test_basic(self):
|
|
|
|
headers = {'Set-Cookie': 'C1=value1; path=/'}
|
|
|
|
req = Request('http://scrapytest.org/')
|
2009-04-23 13:27:04 +00:00
|
|
|
assert self.mw.process_request(req, self.spider) is None
|
2009-04-21 13:33:42 +00:00
|
|
|
assert 'Cookie' not in req.headers
|
|
|
|
|
|
|
|
res = Response('http://scrapytest.org/', headers=headers)
|
2009-04-23 13:27:04 +00:00
|
|
|
assert self.mw.process_response(req, res, self.spider) is res
|
2009-04-21 13:33:42 +00:00
|
|
|
|
|
|
|
#assert res.cookies
|
|
|
|
|
|
|
|
req2 = Request('http://scrapytest.org/sub1/')
|
2009-04-23 13:27:04 +00:00
|
|
|
assert self.mw.process_request(req2, self.spider) is None
|
2009-04-21 13:33:42 +00:00
|
|
|
self.assertEquals(req2.headers.get('Cookie'), "C1=value1")
|
|
|
|
|
|
|
|
def test_dont_merge_cookies(self):
|
|
|
|
# merge some cookies into jar
|
|
|
|
headers = {'Set-Cookie': 'C1=value1; path=/'}
|
|
|
|
req = Request('http://scrapytest.org/')
|
|
|
|
res = Response('http://scrapytest.org/', headers=headers)
|
2009-04-23 13:27:04 +00:00
|
|
|
assert self.mw.process_response(req, res, self.spider) is res
|
2009-04-21 13:33:42 +00:00
|
|
|
|
|
|
|
# test Cookie header is not seted to request
|
|
|
|
req = Request('http://scrapytest.org/dontmerge', meta={'dont_merge_cookies': 1})
|
2009-04-23 13:27:04 +00:00
|
|
|
assert self.mw.process_request(req, self.spider) is None
|
2009-04-21 13:33:42 +00:00
|
|
|
assert 'Cookie' not in req.headers
|
|
|
|
|
|
|
|
# check that returned cookies are not merged back to jar
|
|
|
|
res = Response('http://scrapytest.org/dontmerge', headers={'Set-Cookie': 'dont=mergeme; path=/'})
|
2009-04-23 13:27:04 +00:00
|
|
|
assert self.mw.process_response(req, res, self.spider) is res
|
|
|
|
|
2014-08-15 13:44:29 -07:00
|
|
|
# check that cookies are merged back
|
2009-04-21 13:33:42 +00:00
|
|
|
req = Request('http://scrapytest.org/mergeme')
|
2009-04-23 13:27:04 +00:00
|
|
|
assert self.mw.process_request(req, self.spider) is None
|
2009-04-21 13:33:42 +00:00
|
|
|
self.assertEquals(req.headers.get('Cookie'), 'C1=value1')
|
|
|
|
|
2014-08-15 13:44:29 -07:00
|
|
|
# check that cookies are merged when dont_merge_cookies is passed as 0
|
|
|
|
req = Request('http://scrapytest.org/mergeme', meta={'dont_merge_cookies': 0})
|
|
|
|
assert self.mw.process_request(req, self.spider) is None
|
|
|
|
self.assertEquals(req.headers.get('Cookie'), 'C1=value1')
|
|
|
|
|
2012-06-18 02:10:43 +02:00
|
|
|
def test_complex_cookies(self):
|
|
|
|
# merge some cookies into jar
|
|
|
|
cookies = [{'name': 'C1', 'value': 'value1', 'path': '/foo', 'domain': 'scrapytest.org'},
|
|
|
|
{'name': 'C2', 'value': 'value2', 'path': '/bar', 'domain': 'scrapytest.org'},
|
|
|
|
{'name': 'C3', 'value': 'value3', 'path': '/foo', 'domain': 'scrapytest.org'},
|
|
|
|
{'name': 'C4', 'value': 'value4', 'path': '/foo', 'domain': 'scrapy.org'}]
|
|
|
|
|
|
|
|
|
|
|
|
req = Request('http://scrapytest.org/', cookies=cookies)
|
|
|
|
self.mw.process_request(req, self.spider)
|
|
|
|
|
|
|
|
# embed C1 and C3 for scrapytest.org/foo
|
|
|
|
req = Request('http://scrapytest.org/foo')
|
|
|
|
self.mw.process_request(req, self.spider)
|
|
|
|
assert req.headers.get('Cookie') in ('C1=value1; C3=value3', 'C3=value3; C1=value1')
|
|
|
|
|
|
|
|
# embed C2 for scrapytest.org/bar
|
|
|
|
req = Request('http://scrapytest.org/bar')
|
|
|
|
self.mw.process_request(req, self.spider)
|
|
|
|
self.assertEquals(req.headers.get('Cookie'), 'C2=value2')
|
|
|
|
|
|
|
|
# embed nothing for scrapytest.org/baz
|
|
|
|
req = Request('http://scrapytest.org/baz')
|
|
|
|
self.mw.process_request(req, self.spider)
|
|
|
|
assert 'Cookie' not in req.headers
|
|
|
|
|
2009-04-21 13:33:42 +00:00
|
|
|
def test_merge_request_cookies(self):
|
|
|
|
req = Request('http://scrapytest.org/', cookies={'galleta': 'salada'})
|
2009-04-23 13:27:04 +00:00
|
|
|
assert self.mw.process_request(req, self.spider) is None
|
2009-04-21 13:33:42 +00:00
|
|
|
self.assertEquals(req.headers.get('Cookie'), 'galleta=salada')
|
|
|
|
|
2009-04-21 17:13:12 +00:00
|
|
|
headers = {'Set-Cookie': 'C1=value1; path=/'}
|
2009-04-21 13:33:42 +00:00
|
|
|
res = Response('http://scrapytest.org/', headers=headers)
|
2009-04-23 13:27:04 +00:00
|
|
|
assert self.mw.process_response(req, res, self.spider) is res
|
|
|
|
|
2009-04-21 13:33:42 +00:00
|
|
|
req2 = Request('http://scrapytest.org/sub1/')
|
2009-04-23 13:27:04 +00:00
|
|
|
assert self.mw.process_request(req2, self.spider) is None
|
2014-01-30 15:16:31 +01:00
|
|
|
|
|
|
|
self.assertCookieValEqual(req2.headers.get('Cookie'), "C1=value1; galleta=salada")
|
2009-04-21 13:33:42 +00:00
|
|
|
|
2012-02-27 18:03:48 +00:00
|
|
|
def test_cookiejar_key(self):
|
|
|
|
req = Request('http://scrapytest.org/', cookies={'galleta': 'salada'}, meta={'cookiejar': "store1"})
|
|
|
|
assert self.mw.process_request(req, self.spider) is None
|
|
|
|
self.assertEquals(req.headers.get('Cookie'), 'galleta=salada')
|
|
|
|
|
|
|
|
headers = {'Set-Cookie': 'C1=value1; path=/'}
|
|
|
|
res = Response('http://scrapytest.org/', headers=headers, request=req)
|
|
|
|
assert self.mw.process_response(req, res, self.spider) is res
|
|
|
|
|
|
|
|
req2 = Request('http://scrapytest.org/', meta=res.meta)
|
|
|
|
assert self.mw.process_request(req2, self.spider) is None
|
2014-01-30 15:16:31 +01:00
|
|
|
self.assertCookieValEqual(req2.headers.get('Cookie'),'C1=value1; galleta=salada')
|
2009-04-21 13:33:42 +00:00
|
|
|
|
2012-02-27 18:03:48 +00:00
|
|
|
req3 = Request('http://scrapytest.org/', cookies={'galleta': 'dulce'}, meta={'cookiejar': "store2"})
|
|
|
|
assert self.mw.process_request(req3, self.spider) is None
|
|
|
|
self.assertEquals(req3.headers.get('Cookie'), 'galleta=dulce')
|
2009-04-21 13:33:42 +00:00
|
|
|
|
2012-02-27 18:03:48 +00:00
|
|
|
headers = {'Set-Cookie': 'C2=value2; path=/'}
|
|
|
|
res2 = Response('http://scrapytest.org/', headers=headers, request=req3)
|
|
|
|
assert self.mw.process_response(req3, res2, self.spider) is res2
|
2009-04-21 13:33:42 +00:00
|
|
|
|
2012-02-27 18:03:48 +00:00
|
|
|
req4 = Request('http://scrapytest.org/', meta=res2.meta)
|
|
|
|
assert self.mw.process_request(req4, self.spider) is None
|
2014-01-30 15:16:31 +01:00
|
|
|
self.assertCookieValEqual(req4.headers.get('Cookie'), 'C2=value2; galleta=dulce')
|
2013-07-18 19:42:58 +05:30
|
|
|
|
|
|
|
#cookies from hosts with port
|
|
|
|
req5_1 = Request('http://scrapytest.org:1104/')
|
|
|
|
assert self.mw.process_request(req5_1, self.spider) is None
|
|
|
|
|
|
|
|
headers = {'Set-Cookie': 'C1=value1; path=/'}
|
|
|
|
res5_1 = Response('http://scrapytest.org:1104/', headers=headers, request=req5_1)
|
|
|
|
assert self.mw.process_response(req5_1, res5_1, self.spider) is res5_1
|
|
|
|
|
|
|
|
req5_2 = Request('http://scrapytest.org:1104/some-redirected-path')
|
|
|
|
assert self.mw.process_request(req5_2, self.spider) is None
|
|
|
|
self.assertEquals(req5_2.headers.get('Cookie'), 'C1=value1')
|
|
|
|
|
|
|
|
req5_3 = Request('http://scrapytest.org/some-redirected-path')
|
|
|
|
assert self.mw.process_request(req5_3, self.spider) is None
|
|
|
|
self.assertEquals(req5_3.headers.get('Cookie'), 'C1=value1')
|
2013-07-22 19:41:01 +05:30
|
|
|
|
|
|
|
#skip cookie retrieval for not http request
|
|
|
|
req6 = Request('file:///scrapy/sometempfile')
|
|
|
|
assert self.mw.process_request(req6, self.spider) is None
|
|
|
|
self.assertEquals(req6.headers.get('Cookie'), None)
|
2014-09-21 05:31:34 +06:00
|
|
|
|
|
|
|
def test_local_domain(self):
|
|
|
|
request = Request("http://example-host/", cookies={'currencyCookie': 'USD'})
|
|
|
|
assert self.mw.process_request(request, self.spider) is None
|
|
|
|
self.assertIn('Cookie', request.headers)
|
|
|
|
self.assertIn('currencyCookie', request.headers['Cookie'])
|
|
|
|
|