1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-26 15:24:12 +00:00

Merge pull request #535 from redapple/xpath-smartstrings

Disable smart strings in lxml XPath evaluations
This commit is contained in:
Pablo Hoffman 2014-01-22 07:41:31 -08:00
commit 2d60f86084
2 changed files with 38 additions and 1 deletions

View File

@ -57,6 +57,7 @@ class Selector(object_ref):
# set:trailing
"set": "http://exslt.org/sets"
}
_lxml_smart_strings = False
def __init__(self, response=None, text=None, type=None, namespaces=None,
_root=None, _expr=None):
@ -85,7 +86,8 @@ class Selector(object_ref):
return SelectorList([])
try:
result = xpathev(query, namespaces=self.namespaces)
result = xpathev(query, namespaces=self.namespaces,
smart_strings=self._lxml_smart_strings)
except etree.XPathError:
raise ValueError("Invalid XPath: %s" % query)

View File

@ -297,6 +297,41 @@ class SelectorTestCase(unittest.TestCase):
sel.remove_namespaces()
self.assertEqual(len(sel.xpath("//link/@type")), 2)
def test_smart_strings(self):
"""Lxml smart strings return values"""
class SmartStringsSelector(Selector):
_lxml_smart_strings = True
body = """<body>
<div class='one'>
<ul>
<li>one</li><li>two</li>
</ul>
</div>
<div class='two'>
<ul>
<li>four</li><li>five</li><li>six</li>
</ul>
</div>
</body>"""
response = HtmlResponse(url="http://example.com", body=body)
# .getparent() is available for text nodes and attributes
# only when smart_strings are on
x = self.sscls(response)
li_text = x.xpath('//li/text()')
self.assertFalse(any(map(lambda e: hasattr(e._root, 'getparent'), li_text)))
div_class = x.xpath('//div/@class')
self.assertFalse(any(map(lambda e: hasattr(e._root, 'getparent'), div_class)))
x = SmartStringsSelector(response)
li_text = x.xpath('//li/text()')
self.assertTrue(all(map(lambda e: hasattr(e._root, 'getparent'), li_text)))
div_class = x.xpath('//div/@class')
self.assertTrue(all(map(lambda e: hasattr(e._root, 'getparent'), div_class)))
class DeprecatedXpathSelectorTest(unittest.TestCase):