mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-26 12:03:40 +00:00
conflict solved, reverted r869 and applied changes for r868
--HG-- extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40872
This commit is contained in:
parent
fefccfaa31
commit
c546585af0
@ -40,8 +40,7 @@ class DuplicatesFilterMiddleware(object):
|
||||
|
||||
def process_spider_output(self, response, result, spider):
|
||||
domain = spider.domain_name
|
||||
# FIXME there's a conflict between test_spidermiddleware_duplicatesfilter.py and test_engine.py
|
||||
#self.filter.add(domain, response.request)
|
||||
self.filter.add(domain, response.request)
|
||||
|
||||
for req in result:
|
||||
if isinstance(req, Request):
|
||||
|
@ -41,7 +41,7 @@ class RegexLinkExtractor(LinkExtractor):
|
||||
"""
|
||||
|
||||
def __init__(self, allow=(), deny=(), allow_domains=(), deny_domains=(), restrict_xpaths=(),
|
||||
tags=('a', 'area'), attrs=('href',), canonicalize=True, unique=True):
|
||||
tags=('a', 'area'), attrs=('href'), canonicalize=True, unique=True):
|
||||
self.allow_res = [x if isinstance(x, _re_type) else re.compile(x) for x in allow]
|
||||
self.deny_res = [x if isinstance(x, _re_type) else re.compile(x) for x in deny]
|
||||
self.allow_domains = set(allow_domains)
|
||||
|
@ -15,16 +15,19 @@ class DuplicatesFilterMiddlewareTest(unittest.TestCase):
|
||||
mw = DuplicatesFilterMiddleware()
|
||||
mw.filter.open('scrapytest.org')
|
||||
|
||||
response = Response('')
|
||||
rq = Request('http://scrapytest.org/')
|
||||
response = Response('http://scrapytest.org/')
|
||||
response.request = rq
|
||||
r1 = Request('http://scrapytest.org/1')
|
||||
r2 = Request('http://scrapytest.org/2')
|
||||
r3 = Request('http://scrapytest.org/2')
|
||||
|
||||
filtered = list(mw.process_spider_output(response, [r1, r2, r3], self.spider))
|
||||
|
||||
assert r1 in filtered
|
||||
assert r2 in filtered
|
||||
assert r3 not in filtered
|
||||
self.assertFalse(rq in filtered)
|
||||
self.assertTrue(r1 in filtered)
|
||||
self.assertTrue(r2 in filtered)
|
||||
self.assertFalse(r3 in filtered)
|
||||
|
||||
mw.filter.close('scrapytest.org')
|
||||
|
||||
@ -35,16 +38,16 @@ class SimplePerDomainFilterTest(unittest.TestCase):
|
||||
domain = 'scrapytest.org'
|
||||
filter = SimplePerDomainFilter()
|
||||
filter.open(domain)
|
||||
assert domain in filter
|
||||
self.assertTrue(domain in filter)
|
||||
|
||||
r1 = Request('http://scrapytest.org/1')
|
||||
r2 = Request('http://scrapytest.org/2')
|
||||
r3 = Request('http://scrapytest.org/2')
|
||||
|
||||
assert filter.add(domain, r1)
|
||||
assert filter.add(domain, r2)
|
||||
assert not filter.add(domain, r3)
|
||||
self.assertTrue(filter.add(domain, r1))
|
||||
self.assertTrue(filter.add(domain, r2))
|
||||
self.assertFalse(filter.add(domain, r3))
|
||||
|
||||
filter.close(domain)
|
||||
assert domain not in filter
|
||||
self.assertFalse(domain in filter)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user