1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-26 15:43:44 +00:00

conflict solved, reverted r869 and applied changes for r868

--HG--
extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40872
This commit is contained in:
samus_ 2009-02-19 05:12:55 +00:00
parent fefccfaa31
commit c546585af0
3 changed files with 14 additions and 12 deletions

View File

@ -40,8 +40,7 @@ class DuplicatesFilterMiddleware(object):
def process_spider_output(self, response, result, spider):
domain = spider.domain_name
# FIXME there's a conflict between test_spidermiddleware_duplicatesfilter.py and test_engine.py
#self.filter.add(domain, response.request)
self.filter.add(domain, response.request)
for req in result:
if isinstance(req, Request):

View File

@ -41,7 +41,7 @@ class RegexLinkExtractor(LinkExtractor):
"""
def __init__(self, allow=(), deny=(), allow_domains=(), deny_domains=(), restrict_xpaths=(),
tags=('a', 'area'), attrs=('href',), canonicalize=True, unique=True):
tags=('a', 'area'), attrs=('href'), canonicalize=True, unique=True):
self.allow_res = [x if isinstance(x, _re_type) else re.compile(x) for x in allow]
self.deny_res = [x if isinstance(x, _re_type) else re.compile(x) for x in deny]
self.allow_domains = set(allow_domains)

View File

@ -15,16 +15,19 @@ class DuplicatesFilterMiddlewareTest(unittest.TestCase):
mw = DuplicatesFilterMiddleware()
mw.filter.open('scrapytest.org')
response = Response('')
rq = Request('http://scrapytest.org/')
response = Response('http://scrapytest.org/')
response.request = rq
r1 = Request('http://scrapytest.org/1')
r2 = Request('http://scrapytest.org/2')
r3 = Request('http://scrapytest.org/2')
filtered = list(mw.process_spider_output(response, [r1, r2, r3], self.spider))
assert r1 in filtered
assert r2 in filtered
assert r3 not in filtered
self.assertFalse(rq in filtered)
self.assertTrue(r1 in filtered)
self.assertTrue(r2 in filtered)
self.assertFalse(r3 in filtered)
mw.filter.close('scrapytest.org')
@ -35,16 +38,16 @@ class SimplePerDomainFilterTest(unittest.TestCase):
domain = 'scrapytest.org'
filter = SimplePerDomainFilter()
filter.open(domain)
assert domain in filter
self.assertTrue(domain in filter)
r1 = Request('http://scrapytest.org/1')
r2 = Request('http://scrapytest.org/2')
r3 = Request('http://scrapytest.org/2')
assert filter.add(domain, r1)
assert filter.add(domain, r2)
assert not filter.add(domain, r3)
self.assertTrue(filter.add(domain, r1))
self.assertTrue(filter.add(domain, r2))
self.assertFalse(filter.add(domain, r3))
filter.close(domain)
assert domain not in filter
self.assertFalse(domain in filter)