mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-23 16:24:18 +00:00
response.selector, response.xpath(), response.css() and response.re()
This commit is contained in:
parent
21d073d03b
commit
134bd8a9e0
@ -19,6 +19,7 @@ class TextResponse(Response):
|
||||
self._encoding = kwargs.pop('encoding', None)
|
||||
self._cached_benc = None
|
||||
self._cached_ubody = None
|
||||
self._cached_selector = None
|
||||
super(TextResponse, self).__init__(*args, **kwargs)
|
||||
|
||||
def _set_url(self, url):
|
||||
@ -88,3 +89,19 @@ class TextResponse(Response):
|
||||
@memoizemethod_noargs
|
||||
def _body_declared_encoding(self):
|
||||
return html_body_declared_encoding(self.body)
|
||||
|
||||
@property
|
||||
def selector(self):
|
||||
from scrapy.selector import Selector
|
||||
if self._cached_selector is None:
|
||||
self._cached_selector = Selector(self)
|
||||
return self._cached_selector
|
||||
|
||||
def xpath(self, query):
|
||||
return self.selector.xpath(query)
|
||||
|
||||
def css(self, query):
|
||||
return self.selector.css(query)
|
||||
|
||||
def re(self, regex):
|
||||
return self.selector.re(regex)
|
||||
|
@ -2,6 +2,7 @@ import unittest
|
||||
|
||||
from w3lib.encoding import resolve_encoding
|
||||
from scrapy.http import Request, Response, TextResponse, HtmlResponse, XmlResponse, Headers
|
||||
from scrapy.selector import Selector
|
||||
|
||||
|
||||
class BaseResponseTest(unittest.TestCase):
|
||||
@ -112,6 +113,7 @@ class BaseResponseTest(unittest.TestCase):
|
||||
self.assertRaises(AttributeError, setattr, r, 'url', 'http://example2.com')
|
||||
self.assertRaises(AttributeError, setattr, r, 'body', 'xxx')
|
||||
|
||||
|
||||
class ResponseText(BaseResponseTest):
|
||||
|
||||
def test_no_unicode_url(self):
|
||||
@ -258,13 +260,52 @@ class TextResponseTest(BaseResponseTest):
|
||||
#r = self.response_class("http://www.example.com", body='PREFIX\xe3\xabSUFFIX')
|
||||
#assert u'\ufffd' in r.body_as_unicode(), repr(r.body_as_unicode())
|
||||
|
||||
def test_selector(self):
|
||||
body = "<html><head><title>Some page</title><body></body></html>"
|
||||
response = self.response_class("http://www.example.com", body=body)
|
||||
|
||||
self.assertIsInstance(response.selector, Selector)
|
||||
self.assertEqual(response.selector.type, 'html')
|
||||
self.assertIs(response.selector, response.selector) # property is cached
|
||||
self.assertIs(response.selector.response, response)
|
||||
|
||||
self.assertEqual(
|
||||
response.selector.xpath("//title/text()").extract(),
|
||||
[u'Some page']
|
||||
)
|
||||
self.assertEqual(
|
||||
response.selector.css("title::text").extract(),
|
||||
[u'Some page']
|
||||
)
|
||||
self.assertEqual(
|
||||
response.selector.re("Some (.*)</title>"),
|
||||
[u'page']
|
||||
)
|
||||
|
||||
def test_selector_shortcuts(self):
|
||||
body = "<html><head><title>Some page</title><body></body></html>"
|
||||
response = self.response_class("http://www.example.com", body=body)
|
||||
|
||||
self.assertEqual(
|
||||
response.xpath("//title/text()").extract(),
|
||||
response.selector.xpath("//title/text()").extract(),
|
||||
)
|
||||
self.assertEqual(
|
||||
response.css("title::text").extract(),
|
||||
response.selector.css("title::text").extract(),
|
||||
)
|
||||
self.assertEqual(
|
||||
response.re("Some (.*)</title>"),
|
||||
response.selector.re("Some (.*)</title>"),
|
||||
)
|
||||
|
||||
|
||||
class HtmlResponseTest(TextResponseTest):
|
||||
|
||||
response_class = HtmlResponse
|
||||
|
||||
def test_html_encoding(self):
|
||||
|
||||
|
||||
body = """<html><head><title>Some page</title><meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
</head><body>Price: \xa3100</body></html>'
|
||||
"""
|
||||
@ -328,6 +369,30 @@ class XmlResponseTest(TextResponseTest):
|
||||
self._assert_response_values(r6, 'iso-8859-1', body2)
|
||||
self._assert_response_values(r7, 'utf-8', body2)
|
||||
|
||||
def test_selector(self):
|
||||
body = '<?xml version="1.0" encoding="utf-8"?><xml><elem>value</elem></xml>'
|
||||
response = self.response_class("http://www.example.com", body=body)
|
||||
|
||||
self.assertIsInstance(response.selector, Selector)
|
||||
self.assertEqual(response.selector.type, 'xml')
|
||||
self.assertIs(response.selector, response.selector) # property is cached
|
||||
self.assertIs(response.selector.response, response)
|
||||
|
||||
self.assertEqual(
|
||||
response.selector.xpath("//elem/text()").extract(),
|
||||
[u'value']
|
||||
)
|
||||
|
||||
def test_selector_shortcuts(self):
|
||||
body = '<?xml version="1.0" encoding="utf-8"?><xml><elem>value</elem></xml>'
|
||||
response = self.response_class("http://www.example.com", body=body)
|
||||
|
||||
self.assertEqual(
|
||||
response.xpath("//elem/text()").extract(),
|
||||
response.selector.xpath("//elem/text()").extract(),
|
||||
)
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
Loading…
x
Reference in New Issue
Block a user