1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-24 14:24:35 +00:00

test for RFPDupeFilter.request_fingerprint overriding

This commit is contained in:
Alexander Chekunkov 2014-02-15 17:47:07 +02:00
parent 3932877bcd
commit 6e4c77c684

View File

@ -1,23 +1,55 @@
import hashlib
import unittest
from scrapy.http import Request
from scrapy.dupefilter import RFPDupeFilter
from scrapy.http import Request
class RFPDupeFilterTest(unittest.TestCase):
def test_filter(self):
filter = RFPDupeFilter()
filter.open()
dupefilter = RFPDupeFilter()
dupefilter.open()
r1 = Request('http://scrapytest.org/1')
r2 = Request('http://scrapytest.org/2')
r3 = Request('http://scrapytest.org/2')
assert not filter.request_seen(r1)
assert filter.request_seen(r1)
assert not dupefilter.request_seen(r1)
assert dupefilter.request_seen(r1)
assert not filter.request_seen(r2)
assert filter.request_seen(r3)
assert not dupefilter.request_seen(r2)
assert dupefilter.request_seen(r3)
filter.close('finished')
dupefilter.close('finished')
def test_request_fingerprint(self):
"""Test if customization of request_fingerprint method will change
output of request_seen.
"""
r1 = Request('http://scrapytest.org/index.html')
r2 = Request('http://scrapytest.org/INDEX.html')
dupefilter = RFPDupeFilter()
dupefilter.open()
assert not dupefilter.request_seen(r1)
assert not dupefilter.request_seen(r2)
dupefilter.close('finished')
class CaseInsensitiveRFPDupeFilter(RFPDupeFilter):
def request_fingerprint(self, request):
fp = hashlib.sha1()
fp.update(request.url.lower())
return fp.hexdigest()
case_insensitive_dupefilter = CaseInsensitiveRFPDupeFilter()
case_insensitive_dupefilter.open()
assert not case_insensitive_dupefilter.request_seen(r1)
assert case_insensitive_dupefilter.request_seen(r2)
case_insensitive_dupefilter.close('finished')