mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-11 14:51:32 +00:00
reaplying black again and removing line length on pre-commit black config
This commit is contained in:
parent
ef6eb48b2d
commit
c5cdd0d30c
@ -17,4 +17,3 @@ repos:
|
||||
rev: 22.12.0
|
||||
hooks:
|
||||
- id: black
|
||||
args: [--line-length=79]
|
||||
|
@ -61,9 +61,7 @@ class ImagesPipeline(FilesPipeline):
|
||||
"ImagesPipeline requires installing Pillow 4.0.0 or later"
|
||||
)
|
||||
|
||||
super().__init__(
|
||||
store_uri, settings=settings, download_func=download_func
|
||||
)
|
||||
super().__init__(store_uri, settings=settings, download_func=download_func)
|
||||
|
||||
if isinstance(settings, dict) or settings is None:
|
||||
settings = Settings(settings)
|
||||
@ -86,12 +84,8 @@ class ImagesPipeline(FilesPipeline):
|
||||
self.images_result_field = settings.get(
|
||||
resolve("IMAGES_RESULT_FIELD"), self.IMAGES_RESULT_FIELD
|
||||
)
|
||||
self.min_width = settings.getint(
|
||||
resolve("IMAGES_MIN_WIDTH"), self.MIN_WIDTH
|
||||
)
|
||||
self.min_height = settings.getint(
|
||||
resolve("IMAGES_MIN_HEIGHT"), self.MIN_HEIGHT
|
||||
)
|
||||
self.min_width = settings.getint(resolve("IMAGES_MIN_WIDTH"), self.MIN_WIDTH)
|
||||
self.min_height = settings.getint(resolve("IMAGES_MIN_HEIGHT"), self.MIN_HEIGHT)
|
||||
self.thumbs = settings.get(resolve("IMAGES_THUMBS"), self.THUMBS)
|
||||
|
||||
self._deprecated_convert_image = None
|
||||
@ -125,9 +119,7 @@ class ImagesPipeline(FilesPipeline):
|
||||
|
||||
def image_downloaded(self, response, request, info, *, item=None):
|
||||
checksum = None
|
||||
for path, image, buf in self.get_images(
|
||||
response, request, info, item=item
|
||||
):
|
||||
for path, image, buf in self.get_images(response, request, info, item=item):
|
||||
if checksum is None:
|
||||
buf.seek(0)
|
||||
checksum = md5sum(buf)
|
||||
@ -154,8 +146,8 @@ class ImagesPipeline(FilesPipeline):
|
||||
)
|
||||
|
||||
if self._deprecated_convert_image is None:
|
||||
self._deprecated_convert_image = (
|
||||
"response_body" not in get_func_args(self.convert_image)
|
||||
self._deprecated_convert_image = "response_body" not in get_func_args(
|
||||
self.convert_image
|
||||
)
|
||||
if self._deprecated_convert_image:
|
||||
warnings.warn(
|
||||
@ -226,17 +218,13 @@ class ImagesPipeline(FilesPipeline):
|
||||
|
||||
def item_completed(self, results, item, info):
|
||||
with suppress(KeyError):
|
||||
ItemAdapter(item)[self.images_result_field] = [
|
||||
x for ok, x in results if ok
|
||||
]
|
||||
ItemAdapter(item)[self.images_result_field] = [x for ok, x in results if ok]
|
||||
return item
|
||||
|
||||
def file_path(self, request, response=None, info=None, *, item=None):
|
||||
image_guid = hashlib.sha1(to_bytes(request.url)).hexdigest()
|
||||
return f"full/{image_guid}.jpg"
|
||||
|
||||
def thumb_path(
|
||||
self, request, thumb_id, response=None, info=None, *, item=None
|
||||
):
|
||||
def thumb_path(self, request, thumb_id, response=None, info=None, *, item=None):
|
||||
thumb_guid = hashlib.sha1(to_bytes(request.url)).hexdigest()
|
||||
return f"thumbs/{thumb_id}/{thumb_guid}.jpg"
|
||||
|
@ -40,9 +40,7 @@ class Shell:
|
||||
self.code = code
|
||||
self.vars = {}
|
||||
|
||||
def start(
|
||||
self, url=None, request=None, response=None, spider=None, redirect=True
|
||||
):
|
||||
def start(self, url=None, request=None, response=None, spider=None, redirect=True):
|
||||
# disable accidental Ctrl-C key press from shutting down the engine
|
||||
signal.signal(signal.SIGINT, signal.SIG_IGN)
|
||||
if url:
|
||||
|
@ -20,23 +20,17 @@ class ShellTest(ProcessTest, SiteTest, unittest.TestCase):
|
||||
|
||||
@defer.inlineCallbacks
|
||||
def test_response_body(self):
|
||||
_, out, _ = yield self.execute(
|
||||
[self.url("/text"), "-c", "response.body"]
|
||||
)
|
||||
_, out, _ = yield self.execute([self.url("/text"), "-c", "response.body"])
|
||||
assert b"Works" in out
|
||||
|
||||
@defer.inlineCallbacks
|
||||
def test_response_type_text(self):
|
||||
_, out, _ = yield self.execute(
|
||||
[self.url("/text"), "-c", "type(response)"]
|
||||
)
|
||||
_, out, _ = yield self.execute([self.url("/text"), "-c", "type(response)"])
|
||||
assert b"TextResponse" in out
|
||||
|
||||
@defer.inlineCallbacks
|
||||
def test_response_type_html(self):
|
||||
_, out, _ = yield self.execute(
|
||||
[self.url("/html"), "-c", "type(response)"]
|
||||
)
|
||||
_, out, _ = yield self.execute([self.url("/html"), "-c", "type(response)"])
|
||||
assert b"HtmlResponse" in out
|
||||
|
||||
@defer.inlineCallbacks
|
||||
@ -54,9 +48,7 @@ class ShellTest(ProcessTest, SiteTest, unittest.TestCase):
|
||||
|
||||
@defer.inlineCallbacks
|
||||
def test_redirect(self):
|
||||
_, out, _ = yield self.execute(
|
||||
[self.url("/redirect"), "-c", "response.url"]
|
||||
)
|
||||
_, out, _ = yield self.execute([self.url("/redirect"), "-c", "response.url"])
|
||||
assert out.strip().endswith(b"/redirected")
|
||||
|
||||
@defer.inlineCallbacks
|
||||
@ -100,9 +92,7 @@ class ShellTest(ProcessTest, SiteTest, unittest.TestCase):
|
||||
@defer.inlineCallbacks
|
||||
def test_request_replace(self):
|
||||
url = self.url("/text")
|
||||
code = (
|
||||
f"fetch('{url}') or fetch(response.request.replace(method='POST'))"
|
||||
)
|
||||
code = f"fetch('{url}') or fetch(response.request.replace(method='POST'))"
|
||||
errcode, out, _ = yield self.execute(["-c", code])
|
||||
self.assertEqual(errcode, 0, out)
|
||||
|
||||
@ -133,9 +123,7 @@ class ShellTest(ProcessTest, SiteTest, unittest.TestCase):
|
||||
if NON_EXISTING_RESOLVABLE:
|
||||
raise unittest.SkipTest("Non-existing hosts are resolvable")
|
||||
url = "www.somedomainthatdoesntexi.st"
|
||||
errcode, out, err = yield self.execute(
|
||||
[url, "-c", "item"], check_code=False
|
||||
)
|
||||
errcode, out, err = yield self.execute([url, "-c", "item"], check_code=False)
|
||||
self.assertEqual(errcode, 1, out or err)
|
||||
self.assertIn(b"DNS lookup failed", err)
|
||||
|
||||
@ -146,6 +134,4 @@ class ShellTest(ProcessTest, SiteTest, unittest.TestCase):
|
||||
code = f"fetch('{url}')"
|
||||
args = ["-c", code, "--set", f"TWISTED_REACTOR={reactor_path}"]
|
||||
_, _, err = yield self.execute(args, check_code=True)
|
||||
self.assertNotIn(
|
||||
b"RuntimeError: There is no current event loop in thread", err
|
||||
)
|
||||
self.assertNotIn(b"RuntimeError: There is no current event loop in thread", err)
|
||||
|
@ -30,13 +30,9 @@ class BaseResponseTest(unittest.TestCase):
|
||||
# Response requires url in the constructor
|
||||
self.assertRaises(Exception, self.response_class)
|
||||
self.assertTrue(
|
||||
isinstance(
|
||||
self.response_class("http://example.com/"), self.response_class
|
||||
)
|
||||
)
|
||||
self.assertRaises(
|
||||
TypeError, self.response_class, b"http://example.com"
|
||||
isinstance(self.response_class("http://example.com/"), self.response_class)
|
||||
)
|
||||
self.assertRaises(TypeError, self.response_class, b"http://example.com")
|
||||
# body can be str or None
|
||||
self.assertTrue(
|
||||
isinstance(
|
||||
@ -70,9 +66,7 @@ class BaseResponseTest(unittest.TestCase):
|
||||
|
||||
headers = {"foo": "bar"}
|
||||
body = b"a body"
|
||||
r = self.response_class(
|
||||
"http://www.example.com", headers=headers, body=body
|
||||
)
|
||||
r = self.response_class("http://www.example.com", headers=headers, body=body)
|
||||
|
||||
assert r.headers is not headers
|
||||
self.assertEqual(r.headers[b"foo"], b"bar")
|
||||
@ -99,9 +93,7 @@ class BaseResponseTest(unittest.TestCase):
|
||||
self.assertEqual(r1.body, r2.body)
|
||||
|
||||
# make sure flags list is shallow copied
|
||||
assert (
|
||||
r1.flags is not r2.flags
|
||||
), "flags must be a shallow copy, not identical"
|
||||
assert r1.flags is not r2.flags, "flags must be a shallow copy, not identical"
|
||||
self.assertEqual(r1.flags, r2.flags)
|
||||
|
||||
# make sure headers attribute is shallow copied
|
||||
@ -128,9 +120,7 @@ class BaseResponseTest(unittest.TestCase):
|
||||
|
||||
def test_unavailable_meta(self):
|
||||
r1 = self.response_class("http://www.example.com", body=b"Some body")
|
||||
with self.assertRaisesRegex(
|
||||
AttributeError, r"Response\.meta not available"
|
||||
):
|
||||
with self.assertRaisesRegex(AttributeError, r"Response\.meta not available"):
|
||||
r1.meta
|
||||
|
||||
def test_unavailable_cb_kwargs(self):
|
||||
@ -187,9 +177,7 @@ class BaseResponseTest(unittest.TestCase):
|
||||
|
||||
def test_immutable_attributes(self):
|
||||
r = self.response_class("http://example.com")
|
||||
self.assertRaises(
|
||||
AttributeError, setattr, r, "url", "http://example2.com"
|
||||
)
|
||||
self.assertRaises(AttributeError, setattr, r, "url", "http://example2.com")
|
||||
self.assertRaises(AttributeError, setattr, r, "body", "xxx")
|
||||
|
||||
def test_urljoin(self):
|
||||
@ -213,9 +201,7 @@ class BaseResponseTest(unittest.TestCase):
|
||||
# Response.follow
|
||||
|
||||
def test_follow_url_absolute(self):
|
||||
self._assert_followed_url(
|
||||
"http://foo.example.com", "http://foo.example.com"
|
||||
)
|
||||
self._assert_followed_url("http://foo.example.com", "http://foo.example.com")
|
||||
|
||||
def test_follow_url_relative(self):
|
||||
self._assert_followed_url("foo", "http://example.com/foo")
|
||||
@ -343,9 +329,7 @@ class BaseResponseTest(unittest.TestCase):
|
||||
self.assertEqual(req.url, target_url)
|
||||
return req
|
||||
|
||||
def _assert_followed_all_urls(
|
||||
self, follow_obj, target_urls, response=None
|
||||
):
|
||||
def _assert_followed_all_urls(self, follow_obj, target_urls, response=None):
|
||||
if response is None:
|
||||
response = self._links_response()
|
||||
followed = response.follow_all(follow_obj)
|
||||
@ -385,22 +369,16 @@ class TextResponseTest(BaseResponseTest):
|
||||
def test_unicode_url(self):
|
||||
# instantiate with unicode url without encoding (should set default encoding)
|
||||
resp = self.response_class("http://www.example.com/")
|
||||
self._assert_response_encoding(
|
||||
resp, self.response_class._DEFAULT_ENCODING
|
||||
)
|
||||
self._assert_response_encoding(resp, self.response_class._DEFAULT_ENCODING)
|
||||
|
||||
# make sure urls are converted to str
|
||||
resp = self.response_class(
|
||||
url="http://www.example.com/", encoding="utf-8"
|
||||
)
|
||||
resp = self.response_class(url="http://www.example.com/", encoding="utf-8")
|
||||
assert isinstance(resp.url, str)
|
||||
|
||||
resp = self.response_class(
|
||||
url="http://www.example.com/price/\xa3", encoding="utf-8"
|
||||
)
|
||||
self.assertEqual(
|
||||
resp.url, to_unicode(b"http://www.example.com/price/\xc2\xa3")
|
||||
)
|
||||
self.assertEqual(resp.url, to_unicode(b"http://www.example.com/price/\xc2\xa3"))
|
||||
resp = self.response_class(
|
||||
url="http://www.example.com/price/\xa3", encoding="latin-1"
|
||||
)
|
||||
@ -409,9 +387,7 @@ class TextResponseTest(BaseResponseTest):
|
||||
"http://www.example.com/price/\xa3",
|
||||
headers={"Content-type": ["text/html; charset=utf-8"]},
|
||||
)
|
||||
self.assertEqual(
|
||||
resp.url, to_unicode(b"http://www.example.com/price/\xc2\xa3")
|
||||
)
|
||||
self.assertEqual(resp.url, to_unicode(b"http://www.example.com/price/\xc2\xa3"))
|
||||
resp = self.response_class(
|
||||
"http://www.example.com/price/\xa3",
|
||||
headers={"Content-type": ["text/html; charset=iso-8859-1"]},
|
||||
@ -597,17 +573,13 @@ class TextResponseTest(BaseResponseTest):
|
||||
|
||||
self.assertIsInstance(response.selector, Selector)
|
||||
self.assertEqual(response.selector.type, "html")
|
||||
self.assertIs(
|
||||
response.selector, response.selector
|
||||
) # property is cached
|
||||
self.assertIs(response.selector, response.selector) # property is cached
|
||||
self.assertIs(response.selector.response, response)
|
||||
|
||||
self.assertEqual(
|
||||
response.selector.xpath("//title/text()").getall(), ["Some page"]
|
||||
)
|
||||
self.assertEqual(
|
||||
response.selector.css("title::text").getall(), ["Some page"]
|
||||
)
|
||||
self.assertEqual(response.selector.css("title::text").getall(), ["Some page"])
|
||||
self.assertEqual(response.selector.re("Some (.*)</title>"), ["page"])
|
||||
|
||||
def test_selector_shortcuts(self):
|
||||
@ -647,23 +619,23 @@ class TextResponseTest(BaseResponseTest):
|
||||
def test_urljoin_with_base_url(self):
|
||||
"""Test urljoin shortcut which also evaluates base-url through get_base_url()."""
|
||||
body = b'<html><body><base href="https://example.net"></body></html>'
|
||||
joined = self.response_class(
|
||||
"http://www.example.com", body=body
|
||||
).urljoin("/test")
|
||||
joined = self.response_class("http://www.example.com", body=body).urljoin(
|
||||
"/test"
|
||||
)
|
||||
absolute = "https://example.net/test"
|
||||
self.assertEqual(joined, absolute)
|
||||
|
||||
body = b'<html><body><base href="/elsewhere"></body></html>'
|
||||
joined = self.response_class(
|
||||
"http://www.example.com", body=body
|
||||
).urljoin("test")
|
||||
joined = self.response_class("http://www.example.com", body=body).urljoin(
|
||||
"test"
|
||||
)
|
||||
absolute = "http://www.example.com/test"
|
||||
self.assertEqual(joined, absolute)
|
||||
|
||||
body = b'<html><body><base href="/elsewhere/"></body></html>'
|
||||
joined = self.response_class(
|
||||
"http://www.example.com", body=body
|
||||
).urljoin("test")
|
||||
joined = self.response_class("http://www.example.com", body=body).urljoin(
|
||||
"test"
|
||||
)
|
||||
absolute = "http://www.example.com/elsewhere/test"
|
||||
self.assertEqual(joined, absolute)
|
||||
|
||||
@ -700,9 +672,7 @@ class TextResponseTest(BaseResponseTest):
|
||||
|
||||
def test_follow_selector_list(self):
|
||||
resp = self._links_response()
|
||||
self.assertRaisesRegex(
|
||||
ValueError, "SelectorList", resp.follow, resp.css("a")
|
||||
)
|
||||
self.assertRaisesRegex(ValueError, "SelectorList", resp.follow, resp.css("a"))
|
||||
|
||||
def test_follow_selector_invalid(self):
|
||||
resp = self._links_response()
|
||||
@ -723,9 +693,7 @@ class TextResponseTest(BaseResponseTest):
|
||||
url="http://example.com",
|
||||
body=b"<html><body><a name=123>click me</a></body></html>",
|
||||
)
|
||||
self.assertRaisesRegex(
|
||||
ValueError, "no href", resp.follow, resp.css("a")[0]
|
||||
)
|
||||
self.assertRaisesRegex(ValueError, "no href", resp.follow, resp.css("a")[0])
|
||||
|
||||
def test_follow_whitespace_selector(self):
|
||||
resp = self.response_class(
|
||||
@ -792,9 +760,7 @@ class TextResponseTest(BaseResponseTest):
|
||||
"http://example.com/innertag.html",
|
||||
]
|
||||
response = self._links_response()
|
||||
extracted = [
|
||||
r.url for r in response.follow_all(css='a[href*="example.com"]')
|
||||
]
|
||||
extracted = [r.url for r in response.follow_all(css='a[href*="example.com"]')]
|
||||
self.assertEqual(expected, extracted)
|
||||
|
||||
def test_follow_all_css_skip_invalid(self):
|
||||
@ -806,9 +772,7 @@ class TextResponseTest(BaseResponseTest):
|
||||
response = self._links_response_no_href()
|
||||
extracted1 = [r.url for r in response.follow_all(css=".pagination a")]
|
||||
self.assertEqual(expected, extracted1)
|
||||
extracted2 = [
|
||||
r.url for r in response.follow_all(response.css(".pagination a"))
|
||||
]
|
||||
extracted2 = [r.url for r in response.follow_all(response.css(".pagination a"))]
|
||||
self.assertEqual(expected, extracted2)
|
||||
|
||||
def test_follow_all_xpath(self):
|
||||
@ -817,9 +781,7 @@ class TextResponseTest(BaseResponseTest):
|
||||
"http://example.com/innertag.html",
|
||||
]
|
||||
response = self._links_response()
|
||||
extracted = response.follow_all(
|
||||
xpath='//a[contains(@href, "example.com")]'
|
||||
)
|
||||
extracted = response.follow_all(xpath='//a[contains(@href, "example.com")]')
|
||||
self.assertEqual(expected, [r.url for r in extracted])
|
||||
|
||||
def test_follow_all_xpath_skip_invalid(self):
|
||||
@ -830,15 +792,12 @@ class TextResponseTest(BaseResponseTest):
|
||||
]
|
||||
response = self._links_response_no_href()
|
||||
extracted1 = [
|
||||
r.url
|
||||
for r in response.follow_all(xpath='//div[@id="pagination"]/a')
|
||||
r.url for r in response.follow_all(xpath='//div[@id="pagination"]/a')
|
||||
]
|
||||
self.assertEqual(expected, extracted1)
|
||||
extracted2 = [
|
||||
r.url
|
||||
for r in response.follow_all(
|
||||
response.xpath('//div[@id="pagination"]/a')
|
||||
)
|
||||
for r in response.follow_all(response.xpath('//div[@id="pagination"]/a'))
|
||||
]
|
||||
self.assertEqual(expected, extracted2)
|
||||
|
||||
@ -852,15 +811,11 @@ class TextResponseTest(BaseResponseTest):
|
||||
|
||||
def test_json_response(self):
|
||||
json_body = b"""{"ip": "109.187.217.200"}"""
|
||||
json_response = self.response_class(
|
||||
"http://www.example.com", body=json_body
|
||||
)
|
||||
json_response = self.response_class("http://www.example.com", body=json_body)
|
||||
self.assertEqual(json_response.json(), {"ip": "109.187.217.200"})
|
||||
|
||||
text_body = b"""<html><body>text</body></html>"""
|
||||
text_response = self.response_class(
|
||||
"http://www.example.com", body=text_body
|
||||
)
|
||||
text_response = self.response_class("http://www.example.com", body=text_body)
|
||||
with self.assertRaises(ValueError):
|
||||
text_response.json()
|
||||
|
||||
@ -927,9 +882,7 @@ class XmlResponseTest(TextResponseTest):
|
||||
def test_xml_encoding(self):
|
||||
body = b"<xml></xml>"
|
||||
r1 = self.response_class("http://www.example.com", body=body)
|
||||
self._assert_response_values(
|
||||
r1, self.response_class._DEFAULT_ENCODING, body
|
||||
)
|
||||
self._assert_response_values(r1, self.response_class._DEFAULT_ENCODING, body)
|
||||
|
||||
body = b"""<?xml version="1.0" encoding="iso-8859-1"?><xml></xml>"""
|
||||
r2 = self.response_class("http://www.example.com", body=body)
|
||||
@ -937,9 +890,7 @@ class XmlResponseTest(TextResponseTest):
|
||||
|
||||
# make sure replace() preserves the explicit encoding passed in the __init__ method
|
||||
body = b"""<?xml version="1.0" encoding="iso-8859-1"?><xml></xml>"""
|
||||
r3 = self.response_class(
|
||||
"http://www.example.com", body=body, encoding="utf-8"
|
||||
)
|
||||
r3 = self.response_class("http://www.example.com", body=body, encoding="utf-8")
|
||||
body2 = b"New body"
|
||||
r4 = r3.replace(body=body2)
|
||||
self._assert_response_values(r4, "utf-8", body2)
|
||||
@ -961,14 +912,10 @@ class XmlResponseTest(TextResponseTest):
|
||||
|
||||
self.assertIsInstance(response.selector, Selector)
|
||||
self.assertEqual(response.selector.type, "xml")
|
||||
self.assertIs(
|
||||
response.selector, response.selector
|
||||
) # property is cached
|
||||
self.assertIs(response.selector, response.selector) # property is cached
|
||||
self.assertIs(response.selector.response, response)
|
||||
|
||||
self.assertEqual(
|
||||
response.selector.xpath("//elem/text()").getall(), ["value"]
|
||||
)
|
||||
self.assertEqual(response.selector.xpath("//elem/text()").getall(), ["value"])
|
||||
|
||||
def test_selector_shortcuts(self):
|
||||
body = b'<?xml version="1.0" encoding="utf-8"?><xml><elem>value</elem></xml>'
|
||||
|
Loading…
x
Reference in New Issue
Block a user