1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-23 16:24:18 +00:00

response.selector, response.xpath(), response.css() and response.re()

This commit is contained in:
Mikhail Korobov 2014-04-10 04:11:23 +06:00 committed by Daniel Graña
parent 21d073d03b
commit 134bd8a9e0
2 changed files with 83 additions and 1 deletions

View File

@ -19,6 +19,7 @@ class TextResponse(Response):
self._encoding = kwargs.pop('encoding', None)
self._cached_benc = None
self._cached_ubody = None
self._cached_selector = None
super(TextResponse, self).__init__(*args, **kwargs)
def _set_url(self, url):
@ -88,3 +89,19 @@ class TextResponse(Response):
@memoizemethod_noargs
def _body_declared_encoding(self):
return html_body_declared_encoding(self.body)
@property
def selector(self):
from scrapy.selector import Selector
if self._cached_selector is None:
self._cached_selector = Selector(self)
return self._cached_selector
def xpath(self, query):
return self.selector.xpath(query)
def css(self, query):
return self.selector.css(query)
def re(self, regex):
return self.selector.re(regex)

View File

@ -2,6 +2,7 @@ import unittest
from w3lib.encoding import resolve_encoding
from scrapy.http import Request, Response, TextResponse, HtmlResponse, XmlResponse, Headers
from scrapy.selector import Selector
class BaseResponseTest(unittest.TestCase):
@ -112,6 +113,7 @@ class BaseResponseTest(unittest.TestCase):
self.assertRaises(AttributeError, setattr, r, 'url', 'http://example2.com')
self.assertRaises(AttributeError, setattr, r, 'body', 'xxx')
class ResponseText(BaseResponseTest):
def test_no_unicode_url(self):
@ -258,13 +260,52 @@ class TextResponseTest(BaseResponseTest):
#r = self.response_class("http://www.example.com", body='PREFIX\xe3\xabSUFFIX')
#assert u'\ufffd' in r.body_as_unicode(), repr(r.body_as_unicode())
def test_selector(self):
body = "<html><head><title>Some page</title><body></body></html>"
response = self.response_class("http://www.example.com", body=body)
self.assertIsInstance(response.selector, Selector)
self.assertEqual(response.selector.type, 'html')
self.assertIs(response.selector, response.selector) # property is cached
self.assertIs(response.selector.response, response)
self.assertEqual(
response.selector.xpath("//title/text()").extract(),
[u'Some page']
)
self.assertEqual(
response.selector.css("title::text").extract(),
[u'Some page']
)
self.assertEqual(
response.selector.re("Some (.*)</title>"),
[u'page']
)
def test_selector_shortcuts(self):
body = "<html><head><title>Some page</title><body></body></html>"
response = self.response_class("http://www.example.com", body=body)
self.assertEqual(
response.xpath("//title/text()").extract(),
response.selector.xpath("//title/text()").extract(),
)
self.assertEqual(
response.css("title::text").extract(),
response.selector.css("title::text").extract(),
)
self.assertEqual(
response.re("Some (.*)</title>"),
response.selector.re("Some (.*)</title>"),
)
class HtmlResponseTest(TextResponseTest):
response_class = HtmlResponse
def test_html_encoding(self):
body = """<html><head><title>Some page</title><meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
</head><body>Price: \xa3100</body></html>'
"""
@ -328,6 +369,30 @@ class XmlResponseTest(TextResponseTest):
self._assert_response_values(r6, 'iso-8859-1', body2)
self._assert_response_values(r7, 'utf-8', body2)
def test_selector(self):
body = '<?xml version="1.0" encoding="utf-8"?><xml><elem>value</elem></xml>'
response = self.response_class("http://www.example.com", body=body)
self.assertIsInstance(response.selector, Selector)
self.assertEqual(response.selector.type, 'xml')
self.assertIs(response.selector, response.selector) # property is cached
self.assertIs(response.selector.response, response)
self.assertEqual(
response.selector.xpath("//elem/text()").extract(),
[u'value']
)
def test_selector_shortcuts(self):
body = '<?xml version="1.0" encoding="utf-8"?><xml><elem>value</elem></xml>'
response = self.response_class("http://www.example.com", body=body)
self.assertEqual(
response.xpath("//elem/text()").extract(),
response.selector.xpath("//elem/text()").extract(),
)
if __name__ == "__main__":
unittest.main()