mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-21 07:52:49 +00:00
some improvments to XPathSelectors:
- x() method now returns the same XPathSelector type of its parent - added tests to check this - added tests to verify that XML and HTML XPathSelector behave differently when parsing some non trivial markup --HG-- extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%4035
This commit is contained in:
parent
1f7f0d0994
commit
d993f493b5
@ -26,7 +26,7 @@ class XPathTestCase(unittest.TestCase):
|
||||
xl = xpath.x('//input')
|
||||
self.assertEqual(2, len(xl))
|
||||
for x in xl:
|
||||
assert isinstance(x, XPathSelector)
|
||||
assert isinstance(x, HtmlXPathSelector)
|
||||
|
||||
self.assertEqual(xpath.x('//input').extract(),
|
||||
[x.extract() for x in xpath.x('//input')])
|
||||
@ -41,6 +41,26 @@ class XPathTestCase(unittest.TestCase):
|
||||
self.assertEqual([x.extract() for x in xpath.x("concat(//input[@name='a']/@value, //input[@name='b']/@value)")],
|
||||
[u'12'])
|
||||
|
||||
def test_selector_same_type(self):
|
||||
"""Test XPathSelector returning the same type in x() method"""
|
||||
text = '<p>test<p>'
|
||||
assert isinstance(XmlXPathSelector(text=text).x("//p")[0],
|
||||
XmlXPathSelector)
|
||||
assert isinstance(HtmlXPathSelector(text=text).x("//p")[0],
|
||||
HtmlXPathSelector)
|
||||
|
||||
def test_selector_xml_html(self):
|
||||
"""Test that XML and HTML XPathSelector's behave differently"""
|
||||
|
||||
# some text which is parsed differently by XML and HTML flavors
|
||||
text = '<div><img src="a.jpg"><p>Hello</div>'
|
||||
|
||||
self.assertEqual(XmlXPathSelector(text=text).x("//div").extract(),
|
||||
[u'<div><img src="a.jpg"><p>Hello</p></img></div>'])
|
||||
|
||||
self.assertEqual(HtmlXPathSelector(text=text).x("//div").extract(),
|
||||
[u'<div><img src="a.jpg"><p>Hello</p></div>'])
|
||||
|
||||
def test_selector_nested(self):
|
||||
"""Nested selector tests"""
|
||||
body = """<body>
|
||||
|
@ -39,10 +39,11 @@ class XPathSelector(object):
|
||||
if hasattr(self.xmlNode, 'xpathEval'):
|
||||
self.doc.xpathContext.setContextNode(self.xmlNode)
|
||||
xpath_result = self.doc.xpathContext.xpathEval(xpath)
|
||||
cls = type(self)
|
||||
if hasattr(xpath_result, '__iter__'):
|
||||
return XPathSelectorList([XPathSelector(node=node, parent=self, expr=xpath) for node in xpath_result])
|
||||
return XPathSelectorList([cls(node=node, parent=self, expr=xpath) for node in xpath_result])
|
||||
else:
|
||||
return XPathSelectorList([XPathSelector(node=xpath_result, parent=self, expr=xpath)])
|
||||
return XPathSelectorList([cls(node=xpath_result, parent=self, expr=xpath)])
|
||||
else:
|
||||
return XPathSelectorList([])
|
||||
|
||||
@ -77,7 +78,7 @@ class XPathSelector(object):
|
||||
self.doc.xpathContext.xpathRegisterNs(prefix, uri)
|
||||
|
||||
def __str__(self):
|
||||
return "<XPathSelector (%s) xpath=%s>" % (getattr(self.xmlNode, 'name'), self.expr)
|
||||
return "<%s (%s) xpath=%s>" % (type(self).__name__, getattr(self.xmlNode, 'name'), self.expr)
|
||||
|
||||
__repr__ = __str__
|
||||
|
||||
@ -100,12 +101,15 @@ class XPathSelectorList(list):
|
||||
XPathSelector of the list"""
|
||||
return [x.extract() if isinstance(x, XPathSelector) else x for x in self]
|
||||
|
||||
|
||||
class XmlXPathSelector(XPathSelector):
|
||||
"""XPathSelector for XML content"""
|
||||
def __init__(self, response=None, text=None):
|
||||
XPathSelector.__init__(self, response=response, text=text, constructor=xmlDoc_from_xml)
|
||||
def __init__(self, *args, **kwargs):
|
||||
kwargs['constructor'] = xmlDoc_from_xml
|
||||
XPathSelector.__init__(self, *args, **kwargs)
|
||||
|
||||
class HtmlXPathSelector(XPathSelector):
|
||||
"""XPathSelector for HTML content"""
|
||||
def __init__(self, response=None, text=None):
|
||||
XPathSelector.__init__(self, response=response, text=text, constructor=xmlDoc_from_html)
|
||||
def __init__(self, *args, **kwargs):
|
||||
kwargs['constructor'] = xmlDoc_from_html
|
||||
XPathSelector.__init__(self, *args, **kwargs)
|
||||
|
Loading…
x
Reference in New Issue
Block a user