1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-11 14:51:32 +00:00

reaplying black again and removing line length on pre-commit black config

This commit is contained in:
Emmanuel Rondan 2022-12-29 12:46:57 -03:00
parent ef6eb48b2d
commit c5cdd0d30c
5 changed files with 52 additions and 134 deletions

View File

@ -17,4 +17,3 @@ repos:
rev: 22.12.0
hooks:
- id: black
args: [--line-length=79]

View File

@ -61,9 +61,7 @@ class ImagesPipeline(FilesPipeline):
"ImagesPipeline requires installing Pillow 4.0.0 or later"
)
super().__init__(
store_uri, settings=settings, download_func=download_func
)
super().__init__(store_uri, settings=settings, download_func=download_func)
if isinstance(settings, dict) or settings is None:
settings = Settings(settings)
@ -86,12 +84,8 @@ class ImagesPipeline(FilesPipeline):
self.images_result_field = settings.get(
resolve("IMAGES_RESULT_FIELD"), self.IMAGES_RESULT_FIELD
)
self.min_width = settings.getint(
resolve("IMAGES_MIN_WIDTH"), self.MIN_WIDTH
)
self.min_height = settings.getint(
resolve("IMAGES_MIN_HEIGHT"), self.MIN_HEIGHT
)
self.min_width = settings.getint(resolve("IMAGES_MIN_WIDTH"), self.MIN_WIDTH)
self.min_height = settings.getint(resolve("IMAGES_MIN_HEIGHT"), self.MIN_HEIGHT)
self.thumbs = settings.get(resolve("IMAGES_THUMBS"), self.THUMBS)
self._deprecated_convert_image = None
@ -125,9 +119,7 @@ class ImagesPipeline(FilesPipeline):
def image_downloaded(self, response, request, info, *, item=None):
checksum = None
for path, image, buf in self.get_images(
response, request, info, item=item
):
for path, image, buf in self.get_images(response, request, info, item=item):
if checksum is None:
buf.seek(0)
checksum = md5sum(buf)
@ -154,8 +146,8 @@ class ImagesPipeline(FilesPipeline):
)
if self._deprecated_convert_image is None:
self._deprecated_convert_image = (
"response_body" not in get_func_args(self.convert_image)
self._deprecated_convert_image = "response_body" not in get_func_args(
self.convert_image
)
if self._deprecated_convert_image:
warnings.warn(
@ -226,17 +218,13 @@ class ImagesPipeline(FilesPipeline):
def item_completed(self, results, item, info):
with suppress(KeyError):
ItemAdapter(item)[self.images_result_field] = [
x for ok, x in results if ok
]
ItemAdapter(item)[self.images_result_field] = [x for ok, x in results if ok]
return item
def file_path(self, request, response=None, info=None, *, item=None):
image_guid = hashlib.sha1(to_bytes(request.url)).hexdigest()
return f"full/{image_guid}.jpg"
def thumb_path(
self, request, thumb_id, response=None, info=None, *, item=None
):
def thumb_path(self, request, thumb_id, response=None, info=None, *, item=None):
thumb_guid = hashlib.sha1(to_bytes(request.url)).hexdigest()
return f"thumbs/{thumb_id}/{thumb_guid}.jpg"

View File

@ -40,9 +40,7 @@ class Shell:
self.code = code
self.vars = {}
def start(
self, url=None, request=None, response=None, spider=None, redirect=True
):
def start(self, url=None, request=None, response=None, spider=None, redirect=True):
# disable accidental Ctrl-C key press from shutting down the engine
signal.signal(signal.SIGINT, signal.SIG_IGN)
if url:

View File

@ -20,23 +20,17 @@ class ShellTest(ProcessTest, SiteTest, unittest.TestCase):
@defer.inlineCallbacks
def test_response_body(self):
_, out, _ = yield self.execute(
[self.url("/text"), "-c", "response.body"]
)
_, out, _ = yield self.execute([self.url("/text"), "-c", "response.body"])
assert b"Works" in out
@defer.inlineCallbacks
def test_response_type_text(self):
_, out, _ = yield self.execute(
[self.url("/text"), "-c", "type(response)"]
)
_, out, _ = yield self.execute([self.url("/text"), "-c", "type(response)"])
assert b"TextResponse" in out
@defer.inlineCallbacks
def test_response_type_html(self):
_, out, _ = yield self.execute(
[self.url("/html"), "-c", "type(response)"]
)
_, out, _ = yield self.execute([self.url("/html"), "-c", "type(response)"])
assert b"HtmlResponse" in out
@defer.inlineCallbacks
@ -54,9 +48,7 @@ class ShellTest(ProcessTest, SiteTest, unittest.TestCase):
@defer.inlineCallbacks
def test_redirect(self):
_, out, _ = yield self.execute(
[self.url("/redirect"), "-c", "response.url"]
)
_, out, _ = yield self.execute([self.url("/redirect"), "-c", "response.url"])
assert out.strip().endswith(b"/redirected")
@defer.inlineCallbacks
@ -100,9 +92,7 @@ class ShellTest(ProcessTest, SiteTest, unittest.TestCase):
@defer.inlineCallbacks
def test_request_replace(self):
url = self.url("/text")
code = (
f"fetch('{url}') or fetch(response.request.replace(method='POST'))"
)
code = f"fetch('{url}') or fetch(response.request.replace(method='POST'))"
errcode, out, _ = yield self.execute(["-c", code])
self.assertEqual(errcode, 0, out)
@ -133,9 +123,7 @@ class ShellTest(ProcessTest, SiteTest, unittest.TestCase):
if NON_EXISTING_RESOLVABLE:
raise unittest.SkipTest("Non-existing hosts are resolvable")
url = "www.somedomainthatdoesntexi.st"
errcode, out, err = yield self.execute(
[url, "-c", "item"], check_code=False
)
errcode, out, err = yield self.execute([url, "-c", "item"], check_code=False)
self.assertEqual(errcode, 1, out or err)
self.assertIn(b"DNS lookup failed", err)
@ -146,6 +134,4 @@ class ShellTest(ProcessTest, SiteTest, unittest.TestCase):
code = f"fetch('{url}')"
args = ["-c", code, "--set", f"TWISTED_REACTOR={reactor_path}"]
_, _, err = yield self.execute(args, check_code=True)
self.assertNotIn(
b"RuntimeError: There is no current event loop in thread", err
)
self.assertNotIn(b"RuntimeError: There is no current event loop in thread", err)

View File

@ -30,13 +30,9 @@ class BaseResponseTest(unittest.TestCase):
# Response requires url in the constructor
self.assertRaises(Exception, self.response_class)
self.assertTrue(
isinstance(
self.response_class("http://example.com/"), self.response_class
)
)
self.assertRaises(
TypeError, self.response_class, b"http://example.com"
isinstance(self.response_class("http://example.com/"), self.response_class)
)
self.assertRaises(TypeError, self.response_class, b"http://example.com")
# body can be str or None
self.assertTrue(
isinstance(
@ -70,9 +66,7 @@ class BaseResponseTest(unittest.TestCase):
headers = {"foo": "bar"}
body = b"a body"
r = self.response_class(
"http://www.example.com", headers=headers, body=body
)
r = self.response_class("http://www.example.com", headers=headers, body=body)
assert r.headers is not headers
self.assertEqual(r.headers[b"foo"], b"bar")
@ -99,9 +93,7 @@ class BaseResponseTest(unittest.TestCase):
self.assertEqual(r1.body, r2.body)
# make sure flags list is shallow copied
assert (
r1.flags is not r2.flags
), "flags must be a shallow copy, not identical"
assert r1.flags is not r2.flags, "flags must be a shallow copy, not identical"
self.assertEqual(r1.flags, r2.flags)
# make sure headers attribute is shallow copied
@ -128,9 +120,7 @@ class BaseResponseTest(unittest.TestCase):
def test_unavailable_meta(self):
r1 = self.response_class("http://www.example.com", body=b"Some body")
with self.assertRaisesRegex(
AttributeError, r"Response\.meta not available"
):
with self.assertRaisesRegex(AttributeError, r"Response\.meta not available"):
r1.meta
def test_unavailable_cb_kwargs(self):
@ -187,9 +177,7 @@ class BaseResponseTest(unittest.TestCase):
def test_immutable_attributes(self):
r = self.response_class("http://example.com")
self.assertRaises(
AttributeError, setattr, r, "url", "http://example2.com"
)
self.assertRaises(AttributeError, setattr, r, "url", "http://example2.com")
self.assertRaises(AttributeError, setattr, r, "body", "xxx")
def test_urljoin(self):
@ -213,9 +201,7 @@ class BaseResponseTest(unittest.TestCase):
# Response.follow
def test_follow_url_absolute(self):
self._assert_followed_url(
"http://foo.example.com", "http://foo.example.com"
)
self._assert_followed_url("http://foo.example.com", "http://foo.example.com")
def test_follow_url_relative(self):
self._assert_followed_url("foo", "http://example.com/foo")
@ -343,9 +329,7 @@ class BaseResponseTest(unittest.TestCase):
self.assertEqual(req.url, target_url)
return req
def _assert_followed_all_urls(
self, follow_obj, target_urls, response=None
):
def _assert_followed_all_urls(self, follow_obj, target_urls, response=None):
if response is None:
response = self._links_response()
followed = response.follow_all(follow_obj)
@ -385,22 +369,16 @@ class TextResponseTest(BaseResponseTest):
def test_unicode_url(self):
# instantiate with unicode url without encoding (should set default encoding)
resp = self.response_class("http://www.example.com/")
self._assert_response_encoding(
resp, self.response_class._DEFAULT_ENCODING
)
self._assert_response_encoding(resp, self.response_class._DEFAULT_ENCODING)
# make sure urls are converted to str
resp = self.response_class(
url="http://www.example.com/", encoding="utf-8"
)
resp = self.response_class(url="http://www.example.com/", encoding="utf-8")
assert isinstance(resp.url, str)
resp = self.response_class(
url="http://www.example.com/price/\xa3", encoding="utf-8"
)
self.assertEqual(
resp.url, to_unicode(b"http://www.example.com/price/\xc2\xa3")
)
self.assertEqual(resp.url, to_unicode(b"http://www.example.com/price/\xc2\xa3"))
resp = self.response_class(
url="http://www.example.com/price/\xa3", encoding="latin-1"
)
@ -409,9 +387,7 @@ class TextResponseTest(BaseResponseTest):
"http://www.example.com/price/\xa3",
headers={"Content-type": ["text/html; charset=utf-8"]},
)
self.assertEqual(
resp.url, to_unicode(b"http://www.example.com/price/\xc2\xa3")
)
self.assertEqual(resp.url, to_unicode(b"http://www.example.com/price/\xc2\xa3"))
resp = self.response_class(
"http://www.example.com/price/\xa3",
headers={"Content-type": ["text/html; charset=iso-8859-1"]},
@ -597,17 +573,13 @@ class TextResponseTest(BaseResponseTest):
self.assertIsInstance(response.selector, Selector)
self.assertEqual(response.selector.type, "html")
self.assertIs(
response.selector, response.selector
) # property is cached
self.assertIs(response.selector, response.selector) # property is cached
self.assertIs(response.selector.response, response)
self.assertEqual(
response.selector.xpath("//title/text()").getall(), ["Some page"]
)
self.assertEqual(
response.selector.css("title::text").getall(), ["Some page"]
)
self.assertEqual(response.selector.css("title::text").getall(), ["Some page"])
self.assertEqual(response.selector.re("Some (.*)</title>"), ["page"])
def test_selector_shortcuts(self):
@ -647,23 +619,23 @@ class TextResponseTest(BaseResponseTest):
def test_urljoin_with_base_url(self):
"""Test urljoin shortcut which also evaluates base-url through get_base_url()."""
body = b'<html><body><base href="https://example.net"></body></html>'
joined = self.response_class(
"http://www.example.com", body=body
).urljoin("/test")
joined = self.response_class("http://www.example.com", body=body).urljoin(
"/test"
)
absolute = "https://example.net/test"
self.assertEqual(joined, absolute)
body = b'<html><body><base href="/elsewhere"></body></html>'
joined = self.response_class(
"http://www.example.com", body=body
).urljoin("test")
joined = self.response_class("http://www.example.com", body=body).urljoin(
"test"
)
absolute = "http://www.example.com/test"
self.assertEqual(joined, absolute)
body = b'<html><body><base href="/elsewhere/"></body></html>'
joined = self.response_class(
"http://www.example.com", body=body
).urljoin("test")
joined = self.response_class("http://www.example.com", body=body).urljoin(
"test"
)
absolute = "http://www.example.com/elsewhere/test"
self.assertEqual(joined, absolute)
@ -700,9 +672,7 @@ class TextResponseTest(BaseResponseTest):
def test_follow_selector_list(self):
resp = self._links_response()
self.assertRaisesRegex(
ValueError, "SelectorList", resp.follow, resp.css("a")
)
self.assertRaisesRegex(ValueError, "SelectorList", resp.follow, resp.css("a"))
def test_follow_selector_invalid(self):
resp = self._links_response()
@ -723,9 +693,7 @@ class TextResponseTest(BaseResponseTest):
url="http://example.com",
body=b"<html><body><a name=123>click me</a></body></html>",
)
self.assertRaisesRegex(
ValueError, "no href", resp.follow, resp.css("a")[0]
)
self.assertRaisesRegex(ValueError, "no href", resp.follow, resp.css("a")[0])
def test_follow_whitespace_selector(self):
resp = self.response_class(
@ -792,9 +760,7 @@ class TextResponseTest(BaseResponseTest):
"http://example.com/innertag.html",
]
response = self._links_response()
extracted = [
r.url for r in response.follow_all(css='a[href*="example.com"]')
]
extracted = [r.url for r in response.follow_all(css='a[href*="example.com"]')]
self.assertEqual(expected, extracted)
def test_follow_all_css_skip_invalid(self):
@ -806,9 +772,7 @@ class TextResponseTest(BaseResponseTest):
response = self._links_response_no_href()
extracted1 = [r.url for r in response.follow_all(css=".pagination a")]
self.assertEqual(expected, extracted1)
extracted2 = [
r.url for r in response.follow_all(response.css(".pagination a"))
]
extracted2 = [r.url for r in response.follow_all(response.css(".pagination a"))]
self.assertEqual(expected, extracted2)
def test_follow_all_xpath(self):
@ -817,9 +781,7 @@ class TextResponseTest(BaseResponseTest):
"http://example.com/innertag.html",
]
response = self._links_response()
extracted = response.follow_all(
xpath='//a[contains(@href, "example.com")]'
)
extracted = response.follow_all(xpath='//a[contains(@href, "example.com")]')
self.assertEqual(expected, [r.url for r in extracted])
def test_follow_all_xpath_skip_invalid(self):
@ -830,15 +792,12 @@ class TextResponseTest(BaseResponseTest):
]
response = self._links_response_no_href()
extracted1 = [
r.url
for r in response.follow_all(xpath='//div[@id="pagination"]/a')
r.url for r in response.follow_all(xpath='//div[@id="pagination"]/a')
]
self.assertEqual(expected, extracted1)
extracted2 = [
r.url
for r in response.follow_all(
response.xpath('//div[@id="pagination"]/a')
)
for r in response.follow_all(response.xpath('//div[@id="pagination"]/a'))
]
self.assertEqual(expected, extracted2)
@ -852,15 +811,11 @@ class TextResponseTest(BaseResponseTest):
def test_json_response(self):
json_body = b"""{"ip": "109.187.217.200"}"""
json_response = self.response_class(
"http://www.example.com", body=json_body
)
json_response = self.response_class("http://www.example.com", body=json_body)
self.assertEqual(json_response.json(), {"ip": "109.187.217.200"})
text_body = b"""<html><body>text</body></html>"""
text_response = self.response_class(
"http://www.example.com", body=text_body
)
text_response = self.response_class("http://www.example.com", body=text_body)
with self.assertRaises(ValueError):
text_response.json()
@ -927,9 +882,7 @@ class XmlResponseTest(TextResponseTest):
def test_xml_encoding(self):
body = b"<xml></xml>"
r1 = self.response_class("http://www.example.com", body=body)
self._assert_response_values(
r1, self.response_class._DEFAULT_ENCODING, body
)
self._assert_response_values(r1, self.response_class._DEFAULT_ENCODING, body)
body = b"""<?xml version="1.0" encoding="iso-8859-1"?><xml></xml>"""
r2 = self.response_class("http://www.example.com", body=body)
@ -937,9 +890,7 @@ class XmlResponseTest(TextResponseTest):
# make sure replace() preserves the explicit encoding passed in the __init__ method
body = b"""<?xml version="1.0" encoding="iso-8859-1"?><xml></xml>"""
r3 = self.response_class(
"http://www.example.com", body=body, encoding="utf-8"
)
r3 = self.response_class("http://www.example.com", body=body, encoding="utf-8")
body2 = b"New body"
r4 = r3.replace(body=body2)
self._assert_response_values(r4, "utf-8", body2)
@ -961,14 +912,10 @@ class XmlResponseTest(TextResponseTest):
self.assertIsInstance(response.selector, Selector)
self.assertEqual(response.selector.type, "xml")
self.assertIs(
response.selector, response.selector
) # property is cached
self.assertIs(response.selector, response.selector) # property is cached
self.assertIs(response.selector.response, response)
self.assertEqual(
response.selector.xpath("//elem/text()").getall(), ["value"]
)
self.assertEqual(response.selector.xpath("//elem/text()").getall(), ["value"])
def test_selector_shortcuts(self):
body = b'<?xml version="1.0" encoding="utf-8"?><xml><elem>value</elem></xml>'