mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-21 05:13:16 +00:00
added (yet another) xml node iterator based entirely in regex
--HG-- extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%4030
This commit is contained in:
parent
f9fc8a1b65
commit
4caadf6b67
@ -114,3 +114,19 @@ class expat_XMLNodeIterator():
|
||||
start, end = self._byte_offset_buffer.pop(0)
|
||||
yield response_body[start:end]
|
||||
self._parser.Parse('', 1)
|
||||
|
||||
|
||||
# TESTING (pablo) #
|
||||
# Yet another node iterator: this one is based entirely on regular expressions,
|
||||
# which means it should be faster but needs some profiling to confirm.
|
||||
|
||||
class re_XMLNodeIterator():
|
||||
|
||||
def __init__(self, response, node):
|
||||
self.response = response
|
||||
self.node = node
|
||||
self.re = re.compile(r"<%s[\s>].*?</%s>" % (node, node), re.DOTALL)
|
||||
|
||||
def __iter__(self):
|
||||
for match in self.re.finditer(self.response.body.to_string()):
|
||||
yield XmlXPathSelector(text=match.group()).x('/' + self.node)[0]
|
||||
|
Loading…
x
Reference in New Issue
Block a user