mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-24 22:43:57 +00:00
Fix xmliter_lxml
This commit is contained in:
parent
874bfa0284
commit
9b07b0ab0a
@ -7,14 +7,14 @@ def xmliter_lxml(obj, nodename, namespace=None):
|
||||
reader = _StreamReader(obj)
|
||||
tag = '{%s}%s' % (namespace, nodename) if namespace else nodename
|
||||
iterable = etree.iterparse(reader, tag=tag, encoding=reader.encoding)
|
||||
selxpath = '//' + ('x:%s' % nodename if namespace else nodename)
|
||||
for _, node in iterable:
|
||||
nodetext = etree.tostring(node)
|
||||
node.clear()
|
||||
xs = XmlXPathSelector(text=nodetext)
|
||||
if namespace:
|
||||
xs.register_namespace('x', namespace)
|
||||
nodename = 'x:%s' % nodename
|
||||
yield xs.select('//' + nodename)[0]
|
||||
yield xs.select(selxpath)[0]
|
||||
|
||||
|
||||
class _StreamReader(object):
|
||||
|
@ -110,6 +110,7 @@ class LxmlXmliterTestCase(XmliterTestCase):
|
||||
<description>This is item 1</description>
|
||||
<link>http://www.mydummycompany.com/items/1</link>
|
||||
<image_link>http://www.mydummycompany.com/images/item1.jpg</image_link>
|
||||
<image_link>http://www.mydummycompany.com/images/item2.jpg</image_link>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>
|
||||
@ -122,6 +123,8 @@ class LxmlXmliterTestCase(XmliterTestCase):
|
||||
namespace_iter = self.xmliter(response, 'image_link', 'http://base.google.com/ns/1.0')
|
||||
node = namespace_iter.next()
|
||||
self.assertEqual(node.select('text()').extract(), ['http://www.mydummycompany.com/images/item1.jpg'])
|
||||
node = namespace_iter.next()
|
||||
self.assertEqual(node.select('text()').extract(), ['http://www.mydummycompany.com/images/item2.jpg'])
|
||||
|
||||
|
||||
class UtilsCsvTestCase(unittest.TestCase):
|
||||
|
Loading…
x
Reference in New Issue
Block a user