1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-25 08:44:28 +00:00

lxml selectors: cache and reuse XPathEvaluator object, for performance. refs #147

This commit is contained in:
Pablo Hoffman 2010-10-27 05:51:30 -02:00
parent 1ead888db8
commit bd7def8fd4

View File

@ -16,7 +16,8 @@ __all__ = ['HtmlXPathSelector', 'XmlXPathSelector', 'XPathSelector', \
class XPathSelector(object_ref):
__slots__ = ['response', 'text', 'expr', 'namespaces', '_root', '__weakref__']
__slots__ = ['response', 'text', 'expr', 'namespaces', '_root', '_xpathev', \
'__weakref__']
_parser = etree.HTMLParser
_tostring_method = 'html'
@ -27,6 +28,7 @@ class XPathSelector(object_ref):
else:
self.response = response
self._root = root
self._xpathev = None
self.namespaces = namespaces
self.expr = expr
@ -38,10 +40,15 @@ class XPathSelector(object_ref):
base_url=self.response.url)
return self._root
@property
def xpathev(self):
if self._xpathev is None:
self._xpathev = etree.XPathEvaluator(self.root, namespaces=self.namespaces)
return self._xpathev
def select(self, xpath):
xpatheval = etree.XPathEvaluator(self.root, namespaces=self.namespaces)
try:
result = xpatheval(xpath)
result = self.xpathev(xpath)
except etree.XPathError:
raise ValueError("Invalid XPath: %s" % xpath)
if hasattr(result, '__iter__'):