1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-23 16:24:18 +00:00

RegexLinkExtractorTestCase

This commit is contained in:
alexanderlukanin13 2014-01-11 15:12:54 +06:00
parent 968141cd42
commit a54e31cebc

View File

@ -1,5 +1,6 @@
import re
import unittest
from scrapy.contrib.linkextractors.regex import RegexLinkExtractor
from scrapy.http import HtmlResponse
from scrapy.link import Link
from scrapy.contrib.linkextractors.htmlparser import HtmlParserLinkExtractor
@ -312,5 +313,23 @@ class HtmlParserLinkExtractorTestCase(unittest.TestCase):
])
class RegexLinkExtractorTestCase(unittest.TestCase):
def setUp(self):
body = get_testdata('link_extractor', 'sgml_linkextractor.html')
self.response = HtmlResponse(url='http://example.com/index', body=body)
def test_extraction(self):
# Default arguments
lx = RegexLinkExtractor()
# Note that RegexLinkExtractor returns links in arbitrary order,
# so we need to sort them for comparison
self.assertEqual(sorted(lx.extract_links(self.response), key=lambda x: x.url), [
Link(url='http://example.com/sample2.html', text=u'sample 2'),
Link(url='http://example.com/sample3.html', text=u'sample 3 repetition'),
Link(url='http://www.google.com/something', text=u''),
])
if __name__ == "__main__":
unittest.main()