1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-24 22:43:57 +00:00

Fix xmliter_lxml

This commit is contained in:
Ismael Carnales 2011-02-11 11:41:44 -02:00
parent 874bfa0284
commit 9b07b0ab0a
2 changed files with 5 additions and 2 deletions

View File

@ -7,14 +7,14 @@ def xmliter_lxml(obj, nodename, namespace=None):
reader = _StreamReader(obj)
tag = '{%s}%s' % (namespace, nodename) if namespace else nodename
iterable = etree.iterparse(reader, tag=tag, encoding=reader.encoding)
selxpath = '//' + ('x:%s' % nodename if namespace else nodename)
for _, node in iterable:
nodetext = etree.tostring(node)
node.clear()
xs = XmlXPathSelector(text=nodetext)
if namespace:
xs.register_namespace('x', namespace)
nodename = 'x:%s' % nodename
yield xs.select('//' + nodename)[0]
yield xs.select(selxpath)[0]
class _StreamReader(object):

View File

@ -110,6 +110,7 @@ class LxmlXmliterTestCase(XmliterTestCase):
<description>This is item 1</description>
<link>http://www.mydummycompany.com/items/1</link>
<image_link>http://www.mydummycompany.com/images/item1.jpg</image_link>
<image_link>http://www.mydummycompany.com/images/item2.jpg</image_link>
</item>
</channel>
</rss>
@ -122,6 +123,8 @@ class LxmlXmliterTestCase(XmliterTestCase):
namespace_iter = self.xmliter(response, 'image_link', 'http://base.google.com/ns/1.0')
node = namespace_iter.next()
self.assertEqual(node.select('text()').extract(), ['http://www.mydummycompany.com/images/item1.jpg'])
node = namespace_iter.next()
self.assertEqual(node.select('text()').extract(), ['http://www.mydummycompany.com/images/item2.jpg'])
class UtilsCsvTestCase(unittest.TestCase):