1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-21 05:13:16 +00:00

added (yet another) xml node iterator based entirely in regex

--HG--
extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%4030
This commit is contained in:
Pablo Hoffman 2008-06-29 06:08:48 +00:00
parent f9fc8a1b65
commit 4caadf6b67

View File

@ -114,3 +114,19 @@ class expat_XMLNodeIterator():
start, end = self._byte_offset_buffer.pop(0)
yield response_body[start:end]
self._parser.Parse('', 1)
# TESTING (pablo) #
# Yet another node iterator: this one is based entirely on regular expressions,
# which means it should be faster but needs some profiling to confirm.
class re_XMLNodeIterator():
def __init__(self, response, node):
self.response = response
self.node = node
self.re = re.compile(r"<%s[\s>].*?</%s>" % (node, node), re.DOTALL)
def __iter__(self):
for match in self.re.finditer(self.response.body.to_string()):
yield XmlXPathSelector(text=match.group()).x('/' + self.node)[0]