mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-26 11:03:45 +00:00
Merge pull request #535 from redapple/xpath-smartstrings
Disable smart strings in lxml XPath evaluations
This commit is contained in:
commit
2d60f86084
@ -57,6 +57,7 @@ class Selector(object_ref):
|
||||
# set:trailing
|
||||
"set": "http://exslt.org/sets"
|
||||
}
|
||||
_lxml_smart_strings = False
|
||||
|
||||
def __init__(self, response=None, text=None, type=None, namespaces=None,
|
||||
_root=None, _expr=None):
|
||||
@ -85,7 +86,8 @@ class Selector(object_ref):
|
||||
return SelectorList([])
|
||||
|
||||
try:
|
||||
result = xpathev(query, namespaces=self.namespaces)
|
||||
result = xpathev(query, namespaces=self.namespaces,
|
||||
smart_strings=self._lxml_smart_strings)
|
||||
except etree.XPathError:
|
||||
raise ValueError("Invalid XPath: %s" % query)
|
||||
|
||||
|
@ -297,6 +297,41 @@ class SelectorTestCase(unittest.TestCase):
|
||||
sel.remove_namespaces()
|
||||
self.assertEqual(len(sel.xpath("//link/@type")), 2)
|
||||
|
||||
def test_smart_strings(self):
|
||||
"""Lxml smart strings return values"""
|
||||
|
||||
class SmartStringsSelector(Selector):
|
||||
_lxml_smart_strings = True
|
||||
|
||||
body = """<body>
|
||||
<div class='one'>
|
||||
<ul>
|
||||
<li>one</li><li>two</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class='two'>
|
||||
<ul>
|
||||
<li>four</li><li>five</li><li>six</li>
|
||||
</ul>
|
||||
</div>
|
||||
</body>"""
|
||||
|
||||
response = HtmlResponse(url="http://example.com", body=body)
|
||||
|
||||
# .getparent() is available for text nodes and attributes
|
||||
# only when smart_strings are on
|
||||
x = self.sscls(response)
|
||||
li_text = x.xpath('//li/text()')
|
||||
self.assertFalse(any(map(lambda e: hasattr(e._root, 'getparent'), li_text)))
|
||||
div_class = x.xpath('//div/@class')
|
||||
self.assertFalse(any(map(lambda e: hasattr(e._root, 'getparent'), div_class)))
|
||||
|
||||
x = SmartStringsSelector(response)
|
||||
li_text = x.xpath('//li/text()')
|
||||
self.assertTrue(all(map(lambda e: hasattr(e._root, 'getparent'), li_text)))
|
||||
div_class = x.xpath('//div/@class')
|
||||
self.assertTrue(all(map(lambda e: hasattr(e._root, 'getparent'), div_class)))
|
||||
|
||||
|
||||
class DeprecatedXpathSelectorTest(unittest.TestCase):
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user