some improvments to XPathSelectors:

- x() method now returns the same XPathSelector type of its parent - added tests to check this - added tests to verify that XML and HTML XPathSelector behave differently when parsing some non trivial markup --HG-- extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%4035
2025-02-21 07:52:49 +00:00 · 2008-07-01 02:21:12 +00:00 · 2008-07-01 02:21:12 +00:00 · d993f493b5
commit d993f493b5
parent 1f7f0d0994
2 changed files with 32 additions and 8 deletions
--- a/scrapy/trunk/scrapy/tests/test_xpath.py
+++ b/scrapy/trunk/scrapy/tests/test_xpath.py
@ -26,7 +26,7 @@ class XPathTestCase(unittest.TestCase):
        xl = xpath.x('//input')
        self.assertEqual(2, len(xl))
        for x in xl:
-            assert isinstance(x, XPathSelector)
+            assert isinstance(x, HtmlXPathSelector)

        self.assertEqual(xpath.x('//input').extract(),
                         [x.extract() for x in xpath.x('//input')])
@ -41,6 +41,26 @@ class XPathTestCase(unittest.TestCase):
        self.assertEqual([x.extract() for x in xpath.x("concat(//input[@name='a']/@value, //input[@name='b']/@value)")],
                         [u'12'])

+    def test_selector_same_type(self):
+        """Test XPathSelector returning the same type in x() method"""
+        text = '<p>test<p>'
+        assert isinstance(XmlXPathSelector(text=text).x("//p")[0],
+                          XmlXPathSelector)
+        assert isinstance(HtmlXPathSelector(text=text).x("//p")[0], 
+                          HtmlXPathSelector)
+
+    def test_selector_xml_html(self):
+        """Test that XML and HTML XPathSelector's behave differently"""
+
+        # some text which is parsed differently by XML and HTML flavors
+        text = '<div><img src="a.jpg"><p>Hello</div>'
+
+        self.assertEqual(XmlXPathSelector(text=text).x("//div").extract(),
+                         [u'<div><img src="a.jpg"><p>Hello</p></img></div>'])
+
+        self.assertEqual(HtmlXPathSelector(text=text).x("//div").extract(),
+                         [u'<div><img src="a.jpg"><p>Hello</p></div>'])
+
    def test_selector_nested(self):
        """Nested selector tests"""
        body = """<body>
--- a/scrapy/trunk/scrapy/xpath/selector.py
+++ b/scrapy/trunk/scrapy/xpath/selector.py
@ -39,10 +39,11 @@ class XPathSelector(object):
        if hasattr(self.xmlNode, 'xpathEval'):
            self.doc.xpathContext.setContextNode(self.xmlNode)
            xpath_result = self.doc.xpathContext.xpathEval(xpath)
+            cls = type(self)
            if hasattr(xpath_result, '__iter__'):
-                return XPathSelectorList([XPathSelector(node=node, parent=self, expr=xpath) for node in xpath_result])
+                return XPathSelectorList([cls(node=node, parent=self, expr=xpath) for node in xpath_result])
            else:
-                return XPathSelectorList([XPathSelector(node=xpath_result, parent=self, expr=xpath)])
+                return XPathSelectorList([cls(node=xpath_result, parent=self, expr=xpath)])
        else:
            return XPathSelectorList([])

@ -77,7 +78,7 @@ class XPathSelector(object):
        self.doc.xpathContext.xpathRegisterNs(prefix, uri)

    def __str__(self):
-        return "<XPathSelector (%s) xpath=%s>" % (getattr(self.xmlNode, 'name'), self.expr)
+        return "<%s (%s) xpath=%s>" % (type(self).__name__, getattr(self.xmlNode, 'name'), self.expr)

    __repr__ = __str__

@ -100,12 +101,15 @@ class XPathSelectorList(list):
        XPathSelector of the list"""
        return [x.extract() if isinstance(x, XPathSelector) else x for x in self]

+
 class XmlXPathSelector(XPathSelector):
    """XPathSelector for XML content"""
-    def __init__(self, response=None, text=None):
-        XPathSelector.__init__(self, response=response, text=text, constructor=xmlDoc_from_xml)
+    def __init__(self, *args, **kwargs):
+        kwargs['constructor'] = xmlDoc_from_xml
+        XPathSelector.__init__(self, *args, **kwargs)

 class HtmlXPathSelector(XPathSelector):
    """XPathSelector for HTML content"""
-    def __init__(self, response=None, text=None):
-        XPathSelector.__init__(self, response=response, text=text, constructor=xmlDoc_from_html)
+    def __init__(self, *args, **kwargs):
+        kwargs['constructor'] = xmlDoc_from_html
+        XPathSelector.__init__(self, *args, **kwargs)