added (yet another) xml node iterator based entirely in regex

--HG-- extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%4030
2025-02-21 05:13:16 +00:00 · 2008-06-29 06:08:48 +00:00 · 2008-06-29 06:08:48 +00:00 · 4caadf6b67
commit 4caadf6b67
parent f9fc8a1b65
1 changed files with 16 additions and 0 deletions
--- a/scrapy/trunk/scrapy/xpath/iterator.py
+++ b/scrapy/trunk/scrapy/xpath/iterator.py
@ -114,3 +114,19 @@ class expat_XMLNodeIterator():
                start, end = self._byte_offset_buffer.pop(0)
                yield response_body[start:end]
        self._parser.Parse('', 1)
+
+
+# TESTING (pablo) #
+# Yet another node iterator: this one is based entirely on regular expressions,
+# which means it should be faster but needs some profiling to confirm.
+
+class re_XMLNodeIterator():
+
+    def __init__(self, response, node):
+        self.response = response
+        self.node = node
+        self.re = re.compile(r"<%s[\s>].*?</%s>" % (node, node), re.DOTALL)
+
+    def __iter__(self):
+        for match in self.re.finditer(self.response.body.to_string()):
+            yield XmlXPathSelector(text=match.group()).x('/' + self.node)[0]