1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-21 07:52:49 +00:00

some improvments to XPathSelectors:

- x() method now returns the same XPathSelector type of its parent
- added tests to check this
- added tests to verify that XML and HTML XPathSelector behave differently when
  parsing some non trivial markup

--HG--
extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%4035
This commit is contained in:
Pablo Hoffman 2008-07-01 02:21:12 +00:00
parent 1f7f0d0994
commit d993f493b5
2 changed files with 32 additions and 8 deletions

View File

@ -26,7 +26,7 @@ class XPathTestCase(unittest.TestCase):
xl = xpath.x('//input')
self.assertEqual(2, len(xl))
for x in xl:
assert isinstance(x, XPathSelector)
assert isinstance(x, HtmlXPathSelector)
self.assertEqual(xpath.x('//input').extract(),
[x.extract() for x in xpath.x('//input')])
@ -41,6 +41,26 @@ class XPathTestCase(unittest.TestCase):
self.assertEqual([x.extract() for x in xpath.x("concat(//input[@name='a']/@value, //input[@name='b']/@value)")],
[u'12'])
def test_selector_same_type(self):
"""Test XPathSelector returning the same type in x() method"""
text = '<p>test<p>'
assert isinstance(XmlXPathSelector(text=text).x("//p")[0],
XmlXPathSelector)
assert isinstance(HtmlXPathSelector(text=text).x("//p")[0],
HtmlXPathSelector)
def test_selector_xml_html(self):
"""Test that XML and HTML XPathSelector's behave differently"""
# some text which is parsed differently by XML and HTML flavors
text = '<div><img src="a.jpg"><p>Hello</div>'
self.assertEqual(XmlXPathSelector(text=text).x("//div").extract(),
[u'<div><img src="a.jpg"><p>Hello</p></img></div>'])
self.assertEqual(HtmlXPathSelector(text=text).x("//div").extract(),
[u'<div><img src="a.jpg"><p>Hello</p></div>'])
def test_selector_nested(self):
"""Nested selector tests"""
body = """<body>

View File

@ -39,10 +39,11 @@ class XPathSelector(object):
if hasattr(self.xmlNode, 'xpathEval'):
self.doc.xpathContext.setContextNode(self.xmlNode)
xpath_result = self.doc.xpathContext.xpathEval(xpath)
cls = type(self)
if hasattr(xpath_result, '__iter__'):
return XPathSelectorList([XPathSelector(node=node, parent=self, expr=xpath) for node in xpath_result])
return XPathSelectorList([cls(node=node, parent=self, expr=xpath) for node in xpath_result])
else:
return XPathSelectorList([XPathSelector(node=xpath_result, parent=self, expr=xpath)])
return XPathSelectorList([cls(node=xpath_result, parent=self, expr=xpath)])
else:
return XPathSelectorList([])
@ -77,7 +78,7 @@ class XPathSelector(object):
self.doc.xpathContext.xpathRegisterNs(prefix, uri)
def __str__(self):
return "<XPathSelector (%s) xpath=%s>" % (getattr(self.xmlNode, 'name'), self.expr)
return "<%s (%s) xpath=%s>" % (type(self).__name__, getattr(self.xmlNode, 'name'), self.expr)
__repr__ = __str__
@ -100,12 +101,15 @@ class XPathSelectorList(list):
XPathSelector of the list"""
return [x.extract() if isinstance(x, XPathSelector) else x for x in self]
class XmlXPathSelector(XPathSelector):
"""XPathSelector for XML content"""
def __init__(self, response=None, text=None):
XPathSelector.__init__(self, response=response, text=text, constructor=xmlDoc_from_xml)
def __init__(self, *args, **kwargs):
kwargs['constructor'] = xmlDoc_from_xml
XPathSelector.__init__(self, *args, **kwargs)
class HtmlXPathSelector(XPathSelector):
"""XPathSelector for HTML content"""
def __init__(self, response=None, text=None):
XPathSelector.__init__(self, response=response, text=text, constructor=xmlDoc_from_html)
def __init__(self, *args, **kwargs):
kwargs['constructor'] = xmlDoc_from_html
XPathSelector.__init__(self, *args, **kwargs)