mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-06 10:24:24 +00:00
Fix tests
This commit is contained in:
parent
63acd07209
commit
26a16f2c43
@ -76,11 +76,11 @@ class CrawlTestCase(TestCase):
|
||||
|
||||
@defer.inlineCallbacks
|
||||
def _test_delay(self, total, delay, randomize=False):
|
||||
crawl_kwargs = dict(
|
||||
maxlatency=delay * 2,
|
||||
mockserver=self.mockserver,
|
||||
total=total,
|
||||
)
|
||||
crawl_kwargs = {
|
||||
"maxlatency": delay * 2,
|
||||
"mockserver": self.mockserver,
|
||||
"total": total,
|
||||
}
|
||||
tolerance = 1 - (0.6 if randomize else 0.2)
|
||||
|
||||
settings = {"DOWNLOAD_DELAY": delay, "RANDOMIZE_DOWNLOAD_DELAY": randomize}
|
||||
|
@ -320,7 +320,7 @@ class CookiesMiddlewareTest(TestCase):
|
||||
|
||||
@pytest.mark.xfail(reason="Cookie header is not currently being processed")
|
||||
def test_keep_cookie_from_default_request_headers_middleware(self):
|
||||
DEFAULT_REQUEST_HEADERS = dict(Cookie="default=value; asdf=qwerty")
|
||||
DEFAULT_REQUEST_HEADERS = {"Cookie": "default=value; asdf=qwerty"}
|
||||
mw_default_headers = DefaultHeadersMiddleware(DEFAULT_REQUEST_HEADERS.items())
|
||||
# overwrite with values from 'cookies' request argument
|
||||
req1 = Request("http://example.org", cookies={"default": "something"})
|
||||
|
@ -59,7 +59,7 @@ class HttpAuthMiddlewareTest(unittest.TestCase):
|
||||
self.assertEqual(req.headers["Authorization"], basic_auth_header("foo", "bar"))
|
||||
|
||||
def test_auth_already_set(self):
|
||||
req = Request("http://example.com/", headers=dict(Authorization="Digest 123"))
|
||||
req = Request("http://example.com/", headers={"Authorization": "Digest 123"})
|
||||
assert self.mw.process_request(req, self.spider) is None
|
||||
self.assertEqual(req.headers["Authorization"], b"Digest 123")
|
||||
|
||||
@ -79,6 +79,6 @@ class HttpAuthAnyMiddlewareTest(unittest.TestCase):
|
||||
self.assertEqual(req.headers["Authorization"], basic_auth_header("foo", "bar"))
|
||||
|
||||
def test_auth_already_set(self):
|
||||
req = Request("http://example.com/", headers=dict(Authorization="Digest 123"))
|
||||
req = Request("http://example.com/", headers={"Authorization": "Digest 123"})
|
||||
assert self.mw.process_request(req, self.spider) is None
|
||||
self.assertEqual(req.headers["Authorization"], b"Digest 123")
|
||||
|
@ -152,7 +152,7 @@ class PythonItemExporterTest(BaseItemExporterTest):
|
||||
|
||||
def test_nested_item(self):
|
||||
i1 = self.item_class(name="Joseph", age="22")
|
||||
i2 = dict(name="Maria", age=i1)
|
||||
i2 = {"name": "Maria", "age": i1}
|
||||
i3 = self.item_class(name="Jesus", age=i2)
|
||||
ie = self._get_exporter()
|
||||
exported = ie.export_item(i3)
|
||||
@ -185,7 +185,7 @@ class PythonItemExporterTest(BaseItemExporterTest):
|
||||
|
||||
def test_export_item_dict_list(self):
|
||||
i1 = self.item_class(name="Joseph", age="22")
|
||||
i2 = dict(name="Maria", age=[i1])
|
||||
i2 = {"name": "Maria", "age": [i1]}
|
||||
i3 = self.item_class(name="Jesus", age=[i2])
|
||||
ie = self._get_exporter()
|
||||
exported = ie.export_item(i3)
|
||||
@ -373,7 +373,7 @@ class CsvItemExporterTest(BaseItemExporterTest):
|
||||
|
||||
def test_join_multivalue_not_strings(self):
|
||||
self.assertExportResult(
|
||||
item=dict(name="John", friends=[4, 8]),
|
||||
item={"name": "John", "friends": [4, 8]},
|
||||
include_headers_line=False,
|
||||
expected='"[4, 8]",John\r\n',
|
||||
)
|
||||
@ -388,14 +388,14 @@ class CsvItemExporterTest(BaseItemExporterTest):
|
||||
def test_errors_default(self):
|
||||
with self.assertRaises(UnicodeEncodeError):
|
||||
self.assertExportResult(
|
||||
item=dict(text="W\u0275\u200Brd"),
|
||||
item={"text": "W\u0275\u200Brd"},
|
||||
expected=None,
|
||||
encoding="windows-1251",
|
||||
)
|
||||
|
||||
def test_errors_xmlcharrefreplace(self):
|
||||
self.assertExportResult(
|
||||
item=dict(text="W\u0275\u200Brd"),
|
||||
item={"text": "W\u0275\u200Brd"},
|
||||
include_headers_line=False,
|
||||
expected="Wɵ​rd\r\n",
|
||||
encoding="windows-1251",
|
||||
@ -455,8 +455,8 @@ class XmlItemExporterTest(BaseItemExporterTest):
|
||||
)
|
||||
|
||||
def test_nested_item(self):
|
||||
i1 = dict(name="foo\xa3hoo", age="22")
|
||||
i2 = dict(name="bar", age=i1)
|
||||
i1 = {"name": "foo\xa3hoo", "age": "22"}
|
||||
i2 = {"name": "bar", "age": i1}
|
||||
i3 = self.item_class(name="buz", age=i2)
|
||||
|
||||
self.assertExportResult(
|
||||
@ -478,8 +478,8 @@ class XmlItemExporterTest(BaseItemExporterTest):
|
||||
)
|
||||
|
||||
def test_nested_list_item(self):
|
||||
i1 = dict(name="foo")
|
||||
i2 = dict(name="bar", v2={"egg": ["spam"]})
|
||||
i1 = {"name": "foo"}
|
||||
i2 = {"name": "bar", "v2": {"egg": ["spam"]}}
|
||||
i3 = self.item_class(name="buz", age=[i1, i2])
|
||||
|
||||
self.assertExportResult(
|
||||
@ -534,7 +534,7 @@ class JsonLinesItemExporterTest(BaseItemExporterTest):
|
||||
|
||||
def test_nested_item(self):
|
||||
i1 = self.item_class(name="Joseph", age="22")
|
||||
i2 = dict(name="Maria", age=i1)
|
||||
i2 = {"name": "Maria", "age": i1}
|
||||
i3 = self.item_class(name="Jesus", age=i2)
|
||||
self.ie.start_exporting()
|
||||
self.ie.export_item(i3)
|
||||
@ -622,9 +622,9 @@ class JsonItemExporterTest(JsonLinesItemExporterTest):
|
||||
self.assertEqual(exported, [expected])
|
||||
|
||||
def test_nested_dict_item(self):
|
||||
i1 = dict(name="Joseph\xa3", age="22")
|
||||
i1 = {"name": "Joseph\xa3", "age": "22"}
|
||||
i2 = self.item_class(name="Maria", age=i1)
|
||||
i3 = dict(name="Jesus", age=i2)
|
||||
i3 = {"name": "Jesus", "age": i2}
|
||||
self.ie.start_exporting()
|
||||
self.ie.export_item(i3)
|
||||
self.ie.finish_exporting()
|
||||
|
@ -37,7 +37,7 @@ class Base:
|
||||
page4_url = "http://example.com/page%204.html"
|
||||
|
||||
self.assertEqual(
|
||||
[link for link in lx.extract_links(self.response)],
|
||||
list(lx.extract_links(self.response)),
|
||||
[
|
||||
Link(url="http://example.com/sample1.html", text=""),
|
||||
Link(url="http://example.com/sample2.html", text="sample 2"),
|
||||
@ -55,7 +55,7 @@ class Base:
|
||||
def test_extract_filter_allow(self):
|
||||
lx = self.extractor_cls(allow=("sample",))
|
||||
self.assertEqual(
|
||||
[link for link in lx.extract_links(self.response)],
|
||||
list(lx.extract_links(self.response)),
|
||||
[
|
||||
Link(url="http://example.com/sample1.html", text=""),
|
||||
Link(url="http://example.com/sample2.html", text="sample 2"),
|
||||
@ -70,7 +70,7 @@ class Base:
|
||||
def test_extract_filter_allow_with_duplicates(self):
|
||||
lx = self.extractor_cls(allow=("sample",), unique=False)
|
||||
self.assertEqual(
|
||||
[link for link in lx.extract_links(self.response)],
|
||||
list(lx.extract_links(self.response)),
|
||||
[
|
||||
Link(url="http://example.com/sample1.html", text=""),
|
||||
Link(url="http://example.com/sample2.html", text="sample 2"),
|
||||
@ -93,7 +93,7 @@ class Base:
|
||||
def test_extract_filter_allow_with_duplicates_canonicalize(self):
|
||||
lx = self.extractor_cls(allow=("sample",), unique=False, canonicalize=True)
|
||||
self.assertEqual(
|
||||
[link for link in lx.extract_links(self.response)],
|
||||
list(lx.extract_links(self.response)),
|
||||
[
|
||||
Link(url="http://example.com/sample1.html", text=""),
|
||||
Link(url="http://example.com/sample2.html", text="sample 2"),
|
||||
@ -116,7 +116,7 @@ class Base:
|
||||
def test_extract_filter_allow_no_duplicates_canonicalize(self):
|
||||
lx = self.extractor_cls(allow=("sample",), unique=True, canonicalize=True)
|
||||
self.assertEqual(
|
||||
[link for link in lx.extract_links(self.response)],
|
||||
list(lx.extract_links(self.response)),
|
||||
[
|
||||
Link(url="http://example.com/sample1.html", text=""),
|
||||
Link(url="http://example.com/sample2.html", text="sample 2"),
|
||||
@ -127,7 +127,7 @@ class Base:
|
||||
def test_extract_filter_allow_and_deny(self):
|
||||
lx = self.extractor_cls(allow=("sample",), deny=("3",))
|
||||
self.assertEqual(
|
||||
[link for link in lx.extract_links(self.response)],
|
||||
list(lx.extract_links(self.response)),
|
||||
[
|
||||
Link(url="http://example.com/sample1.html", text=""),
|
||||
Link(url="http://example.com/sample2.html", text="sample 2"),
|
||||
@ -137,7 +137,7 @@ class Base:
|
||||
def test_extract_filter_allowed_domains(self):
|
||||
lx = self.extractor_cls(allow_domains=("google.com",))
|
||||
self.assertEqual(
|
||||
[link for link in lx.extract_links(self.response)],
|
||||
list(lx.extract_links(self.response)),
|
||||
[
|
||||
Link(url="http://www.google.com/something", text=""),
|
||||
],
|
||||
@ -148,7 +148,7 @@ class Base:
|
||||
|
||||
lx = self.extractor_cls(allow="sample")
|
||||
self.assertEqual(
|
||||
[link for link in lx.extract_links(self.response)],
|
||||
list(lx.extract_links(self.response)),
|
||||
[
|
||||
Link(url="http://example.com/sample1.html", text=""),
|
||||
Link(url="http://example.com/sample2.html", text="sample 2"),
|
||||
@ -162,7 +162,7 @@ class Base:
|
||||
|
||||
lx = self.extractor_cls(allow="sample", deny="3")
|
||||
self.assertEqual(
|
||||
[link for link in lx.extract_links(self.response)],
|
||||
list(lx.extract_links(self.response)),
|
||||
[
|
||||
Link(url="http://example.com/sample1.html", text=""),
|
||||
Link(url="http://example.com/sample2.html", text="sample 2"),
|
||||
@ -171,7 +171,7 @@ class Base:
|
||||
|
||||
lx = self.extractor_cls(allow_domains="google.com")
|
||||
self.assertEqual(
|
||||
[link for link in lx.extract_links(self.response)],
|
||||
list(lx.extract_links(self.response)),
|
||||
[
|
||||
Link(url="http://www.google.com/something", text=""),
|
||||
],
|
||||
@ -179,7 +179,7 @@ class Base:
|
||||
|
||||
lx = self.extractor_cls(deny_domains="example.com")
|
||||
self.assertEqual(
|
||||
[link for link in lx.extract_links(self.response)],
|
||||
list(lx.extract_links(self.response)),
|
||||
[
|
||||
Link(url="http://www.google.com/something", text=""),
|
||||
],
|
||||
@ -265,7 +265,7 @@ class Base:
|
||||
def test_restrict_xpaths(self):
|
||||
lx = self.extractor_cls(restrict_xpaths=('//div[@id="subwrapper"]',))
|
||||
self.assertEqual(
|
||||
[link for link in lx.extract_links(self.response)],
|
||||
list(lx.extract_links(self.response)),
|
||||
[
|
||||
Link(url="http://example.com/sample1.html", text=""),
|
||||
Link(url="http://example.com/sample2.html", text="sample 2"),
|
||||
@ -337,7 +337,7 @@ class Base:
|
||||
restrict_css=("#subwrapper + a",),
|
||||
)
|
||||
self.assertEqual(
|
||||
[link for link in lx.extract_links(self.response)],
|
||||
list(lx.extract_links(self.response)),
|
||||
[
|
||||
Link(url="http://example.com/sample1.html", text=""),
|
||||
Link(url="http://example.com/sample2.html", text="sample 2"),
|
||||
@ -705,7 +705,7 @@ class Base:
|
||||
response = HtmlResponse("http://example.org/index.html", body=html)
|
||||
lx = self.extractor_cls()
|
||||
self.assertEqual(
|
||||
[link for link in lx.extract_links(response)],
|
||||
list(lx.extract_links(response)),
|
||||
[
|
||||
Link(
|
||||
url="http://example.org/item1.html",
|
||||
@ -758,7 +758,7 @@ class LxmlLinkExtractorTestCase(Base.LinkExtractorTestCase):
|
||||
response = HtmlResponse("http://example.org/index.html", body=html)
|
||||
lx = self.extractor_cls()
|
||||
self.assertEqual(
|
||||
[link for link in lx.extract_links(response)],
|
||||
list(lx.extract_links(response)),
|
||||
[
|
||||
Link(
|
||||
url="http://example.org/item1.html", text="Item 1", nofollow=False
|
||||
@ -779,7 +779,7 @@ class LxmlLinkExtractorTestCase(Base.LinkExtractorTestCase):
|
||||
# Simple text inclusion test
|
||||
lx = self.extractor_cls(restrict_text="dog")
|
||||
self.assertEqual(
|
||||
[link for link in lx.extract_links(response)],
|
||||
list(lx.extract_links(response)),
|
||||
[
|
||||
Link(
|
||||
url="http://example.org/item2.html",
|
||||
@ -791,7 +791,7 @@ class LxmlLinkExtractorTestCase(Base.LinkExtractorTestCase):
|
||||
# Unique regex test
|
||||
lx = self.extractor_cls(restrict_text=r"of.*dog")
|
||||
self.assertEqual(
|
||||
[link for link in lx.extract_links(response)],
|
||||
list(lx.extract_links(response)),
|
||||
[
|
||||
Link(
|
||||
url="http://example.org/item2.html",
|
||||
@ -803,7 +803,7 @@ class LxmlLinkExtractorTestCase(Base.LinkExtractorTestCase):
|
||||
# Multiple regex test
|
||||
lx = self.extractor_cls(restrict_text=[r"of.*dog", r"of.*cat"])
|
||||
self.assertEqual(
|
||||
[link for link in lx.extract_links(response)],
|
||||
list(lx.extract_links(response)),
|
||||
[
|
||||
Link(
|
||||
url="http://example.org/item1.html",
|
||||
@ -834,7 +834,7 @@ class LxmlLinkExtractorTestCase(Base.LinkExtractorTestCase):
|
||||
response = HtmlResponse("http://example.org/index.html", body=html)
|
||||
lx = self.extractor_cls()
|
||||
self.assertEqual(
|
||||
[link for link in lx.extract_links(response)],
|
||||
list(lx.extract_links(response)),
|
||||
[
|
||||
Link(
|
||||
url="http://example.org/item2.html",
|
||||
|
@ -565,37 +565,37 @@ class NoInputReprocessingFromDictTest(unittest.TestCase):
|
||||
"""
|
||||
|
||||
def test_avoid_reprocessing_with_initial_values_single(self):
|
||||
il = NoInputReprocessingDictLoader(item=dict(title="foo"))
|
||||
il = NoInputReprocessingDictLoader(item={"title": "foo"})
|
||||
il_loaded = il.load_item()
|
||||
self.assertEqual(il_loaded, dict(title="foo"))
|
||||
self.assertEqual(il_loaded, {"title": "foo"})
|
||||
self.assertEqual(
|
||||
NoInputReprocessingDictLoader(item=il_loaded).load_item(), dict(title="foo")
|
||||
NoInputReprocessingDictLoader(item=il_loaded).load_item(), {"title": "foo"}
|
||||
)
|
||||
|
||||
def test_avoid_reprocessing_with_initial_values_list(self):
|
||||
il = NoInputReprocessingDictLoader(item=dict(title=["foo", "bar"]))
|
||||
il = NoInputReprocessingDictLoader(item={"title": ["foo", "bar"]})
|
||||
il_loaded = il.load_item()
|
||||
self.assertEqual(il_loaded, dict(title="foo"))
|
||||
self.assertEqual(il_loaded, {"title": "foo"})
|
||||
self.assertEqual(
|
||||
NoInputReprocessingDictLoader(item=il_loaded).load_item(), dict(title="foo")
|
||||
NoInputReprocessingDictLoader(item=il_loaded).load_item(), {"title": "foo"}
|
||||
)
|
||||
|
||||
def test_avoid_reprocessing_without_initial_values_single(self):
|
||||
il = NoInputReprocessingDictLoader()
|
||||
il.add_value("title", "foo")
|
||||
il_loaded = il.load_item()
|
||||
self.assertEqual(il_loaded, dict(title="FOO"))
|
||||
self.assertEqual(il_loaded, {"title": "FOO"})
|
||||
self.assertEqual(
|
||||
NoInputReprocessingDictLoader(item=il_loaded).load_item(), dict(title="FOO")
|
||||
NoInputReprocessingDictLoader(item=il_loaded).load_item(), {"title": "FOO"}
|
||||
)
|
||||
|
||||
def test_avoid_reprocessing_without_initial_values_list(self):
|
||||
il = NoInputReprocessingDictLoader()
|
||||
il.add_value("title", ["foo", "bar"])
|
||||
il_loaded = il.load_item()
|
||||
self.assertEqual(il_loaded, dict(title="FOO"))
|
||||
self.assertEqual(il_loaded, {"title": "FOO"})
|
||||
self.assertEqual(
|
||||
NoInputReprocessingDictLoader(item=il_loaded).load_item(), dict(title="FOO")
|
||||
NoInputReprocessingDictLoader(item=il_loaded).load_item(), {"title": "FOO"}
|
||||
)
|
||||
|
||||
|
||||
|
@ -91,7 +91,7 @@ class MailSenderTest(unittest.TestCase):
|
||||
self.assertEqual(attach.get_payload(decode=True), b"content")
|
||||
|
||||
def _catch_mail_sent(self, **kwargs):
|
||||
self.catched_msg = dict(**kwargs)
|
||||
self.catched_msg = {**kwargs}
|
||||
|
||||
def test_send_utf8(self):
|
||||
subject = "sübjèçt"
|
||||
|
@ -140,7 +140,7 @@ class FileDownloadCrawlTestCase(TestCase):
|
||||
self.assertEqual(logs.count(file_dl_failure), 3)
|
||||
|
||||
# check that no files were written to the media store
|
||||
self.assertEqual([x for x in self.tmpmediastore.iterdir()], [])
|
||||
self.assertEqual(list(self.tmpmediastore.iterdir()), [])
|
||||
|
||||
@defer.inlineCallbacks
|
||||
def test_download_media(self):
|
||||
|
@ -221,7 +221,7 @@ class FilesPipelineTestCase(unittest.TestCase):
|
||||
file_path = CustomFilesPipeline.from_settings(
|
||||
Settings({"FILES_STORE": self.tempdir})
|
||||
).file_path
|
||||
item = dict(path="path-to-store-file")
|
||||
item = {"path": "path-to-store-file"}
|
||||
request = Request("http://example.com")
|
||||
self.assertEqual(file_path(request, item=item), "full/path-to-store-file")
|
||||
|
||||
|
@ -132,7 +132,7 @@ class ImagesPipelineTestCase(unittest.TestCase):
|
||||
thumb_path = CustomImagesPipeline.from_settings(
|
||||
Settings({"IMAGES_STORE": self.tempdir})
|
||||
).thumb_path
|
||||
item = dict(path="path-to-store-file")
|
||||
item = {"path": "path-to-store-file"}
|
||||
request = Request("http://example.com")
|
||||
self.assertEqual(
|
||||
thumb_path(request, "small", item=item), "thumb/small/path-to-store-file"
|
||||
@ -433,14 +433,14 @@ class ImagesPipelineTestCaseCustomSettings(unittest.TestCase):
|
||||
]
|
||||
|
||||
# This should match what is defined in ImagesPipeline.
|
||||
default_pipeline_settings = dict(
|
||||
MIN_WIDTH=0,
|
||||
MIN_HEIGHT=0,
|
||||
EXPIRES=90,
|
||||
THUMBS={},
|
||||
IMAGES_URLS_FIELD="image_urls",
|
||||
IMAGES_RESULT_FIELD="images",
|
||||
)
|
||||
default_pipeline_settings = {
|
||||
"MIN_WIDTH": 0,
|
||||
"MIN_HEIGHT": 0,
|
||||
"EXPIRES": 90,
|
||||
"THUMBS": {},
|
||||
"IMAGES_URLS_FIELD": "image_urls",
|
||||
"IMAGES_RESULT_FIELD": "images",
|
||||
}
|
||||
|
||||
def setUp(self):
|
||||
self.tempdir = mkdtemp()
|
||||
|
@ -59,7 +59,7 @@ class BaseMediaPipelineTestCase(unittest.TestCase):
|
||||
assert self.pipe.media_to_download(request, self.info) is None
|
||||
|
||||
def test_default_get_media_requests(self):
|
||||
item = dict(name="name")
|
||||
item = {"name": "name"}
|
||||
assert self.pipe.get_media_requests(item, self.info) is None
|
||||
|
||||
def test_default_media_downloaded(self):
|
||||
@ -73,7 +73,7 @@ class BaseMediaPipelineTestCase(unittest.TestCase):
|
||||
assert self.pipe.media_failed(fail, request, self.info) is fail
|
||||
|
||||
def test_default_item_completed(self):
|
||||
item = dict(name="name")
|
||||
item = {"name": "name"}
|
||||
assert self.pipe.item_completed([], item, self.info) is item
|
||||
|
||||
# Check that failures are logged by default
|
||||
@ -98,7 +98,7 @@ class BaseMediaPipelineTestCase(unittest.TestCase):
|
||||
|
||||
@inlineCallbacks
|
||||
def test_default_process_item(self):
|
||||
item = dict(name="name")
|
||||
item = {"name": "name"}
|
||||
new_item = yield self.pipe.process_item(item, self.spider)
|
||||
assert new_item is item
|
||||
|
||||
@ -226,11 +226,11 @@ class MediaPipelineTestCase(BaseMediaPipelineTestCase):
|
||||
rsp = Response("http://url1")
|
||||
req = Request(
|
||||
"http://url1",
|
||||
meta=dict(response=rsp),
|
||||
meta={"response": rsp},
|
||||
callback=self._callback,
|
||||
errback=self._errback,
|
||||
)
|
||||
item = dict(requests=req)
|
||||
item = {"requests": req}
|
||||
new_item = yield self.pipe.process_item(item, self.spider)
|
||||
self.assertEqual(new_item["results"], [(True, rsp)])
|
||||
self.assertEqual(
|
||||
@ -250,11 +250,11 @@ class MediaPipelineTestCase(BaseMediaPipelineTestCase):
|
||||
fail = Failure(Exception())
|
||||
req = Request(
|
||||
"http://url1",
|
||||
meta=dict(response=fail),
|
||||
meta={"response": fail},
|
||||
callback=self._callback,
|
||||
errback=self._errback,
|
||||
)
|
||||
item = dict(requests=req)
|
||||
item = {"requests": req}
|
||||
new_item = yield self.pipe.process_item(item, self.spider)
|
||||
self.assertEqual(new_item["results"], [(False, fail)])
|
||||
self.assertEqual(
|
||||
@ -272,10 +272,10 @@ class MediaPipelineTestCase(BaseMediaPipelineTestCase):
|
||||
def test_mix_of_success_and_failure(self):
|
||||
self.pipe.LOG_FAILED_RESULTS = False
|
||||
rsp1 = Response("http://url1")
|
||||
req1 = Request("http://url1", meta=dict(response=rsp1))
|
||||
req1 = Request("http://url1", meta={"response": rsp1})
|
||||
fail = Failure(Exception())
|
||||
req2 = Request("http://url2", meta=dict(response=fail))
|
||||
item = dict(requests=[req1, req2])
|
||||
req2 = Request("http://url2", meta={"response": fail})
|
||||
item = {"requests": [req1, req2]}
|
||||
new_item = yield self.pipe.process_item(item, self.spider)
|
||||
self.assertEqual(new_item["results"], [(True, rsp1), (False, fail)])
|
||||
m = self.pipe._mockcalled
|
||||
@ -294,7 +294,7 @@ class MediaPipelineTestCase(BaseMediaPipelineTestCase):
|
||||
def test_get_media_requests(self):
|
||||
# returns single Request (without callback)
|
||||
req = Request("http://url")
|
||||
item = dict(requests=req) # pass a single item
|
||||
item = {"requests": req} # pass a single item
|
||||
new_item = yield self.pipe.process_item(item, self.spider)
|
||||
assert new_item is item
|
||||
self.assertIn(self.fingerprint(req), self.info.downloaded)
|
||||
@ -302,7 +302,7 @@ class MediaPipelineTestCase(BaseMediaPipelineTestCase):
|
||||
# returns iterable of Requests
|
||||
req1 = Request("http://url1")
|
||||
req2 = Request("http://url2")
|
||||
item = dict(requests=iter([req1, req2]))
|
||||
item = {"requests": iter([req1, req2])}
|
||||
new_item = yield self.pipe.process_item(item, self.spider)
|
||||
assert new_item is item
|
||||
assert self.fingerprint(req1) in self.info.downloaded
|
||||
@ -311,17 +311,17 @@ class MediaPipelineTestCase(BaseMediaPipelineTestCase):
|
||||
@inlineCallbacks
|
||||
def test_results_are_cached_across_multiple_items(self):
|
||||
rsp1 = Response("http://url1")
|
||||
req1 = Request("http://url1", meta=dict(response=rsp1))
|
||||
item = dict(requests=req1)
|
||||
req1 = Request("http://url1", meta={"response": rsp1})
|
||||
item = {"requests": req1}
|
||||
new_item = yield self.pipe.process_item(item, self.spider)
|
||||
self.assertTrue(new_item is item)
|
||||
self.assertEqual(new_item["results"], [(True, rsp1)])
|
||||
|
||||
# rsp2 is ignored, rsp1 must be in results because request fingerprints are the same
|
||||
req2 = Request(
|
||||
req1.url, meta=dict(response=Response("http://donot.download.me"))
|
||||
req1.url, meta={"response": Response("http://donot.download.me")}
|
||||
)
|
||||
item = dict(requests=req2)
|
||||
item = {"requests": req2}
|
||||
new_item = yield self.pipe.process_item(item, self.spider)
|
||||
self.assertTrue(new_item is item)
|
||||
self.assertEqual(self.fingerprint(req1), self.fingerprint(req2))
|
||||
@ -330,11 +330,11 @@ class MediaPipelineTestCase(BaseMediaPipelineTestCase):
|
||||
@inlineCallbacks
|
||||
def test_results_are_cached_for_requests_of_single_item(self):
|
||||
rsp1 = Response("http://url1")
|
||||
req1 = Request("http://url1", meta=dict(response=rsp1))
|
||||
req1 = Request("http://url1", meta={"response": rsp1})
|
||||
req2 = Request(
|
||||
req1.url, meta=dict(response=Response("http://donot.download.me"))
|
||||
req1.url, meta={"response": Response("http://donot.download.me")}
|
||||
)
|
||||
item = dict(requests=[req1, req2])
|
||||
item = {"requests": [req1, req2]}
|
||||
new_item = yield self.pipe.process_item(item, self.spider)
|
||||
self.assertTrue(new_item is item)
|
||||
self.assertEqual(new_item["results"], [(True, rsp1), (True, rsp1)])
|
||||
@ -359,16 +359,16 @@ class MediaPipelineTestCase(BaseMediaPipelineTestCase):
|
||||
def rsp2_func():
|
||||
self.fail("it must cache rsp1 result and must not try to redownload")
|
||||
|
||||
req1 = Request("http://url", meta=dict(response=rsp1_func))
|
||||
req2 = Request(req1.url, meta=dict(response=rsp2_func))
|
||||
item = dict(requests=[req1, req2])
|
||||
req1 = Request("http://url", meta={"response": rsp1_func})
|
||||
req2 = Request(req1.url, meta={"response": rsp2_func})
|
||||
item = {"requests": [req1, req2]}
|
||||
new_item = yield self.pipe.process_item(item, self.spider)
|
||||
self.assertEqual(new_item["results"], [(True, rsp1), (True, rsp1)])
|
||||
|
||||
@inlineCallbacks
|
||||
def test_use_media_to_download_result(self):
|
||||
req = Request("http://url", meta=dict(result="ITSME", response=self.fail))
|
||||
item = dict(requests=req)
|
||||
req = Request("http://url", meta={"result": "ITSME", "response": self.fail})
|
||||
item = {"requests": req}
|
||||
new_item = yield self.pipe.process_item(item, self.spider)
|
||||
self.assertEqual(new_item["results"], [(True, "ITSME")])
|
||||
self.assertEqual(
|
||||
|
@ -45,15 +45,15 @@ class MockDownloader:
|
||||
|
||||
class MockCrawler(Crawler):
|
||||
def __init__(self, priority_queue_cls, jobdir):
|
||||
settings = dict(
|
||||
SCHEDULER_DEBUG=False,
|
||||
SCHEDULER_DISK_QUEUE="scrapy.squeues.PickleLifoDiskQueue",
|
||||
SCHEDULER_MEMORY_QUEUE="scrapy.squeues.LifoMemoryQueue",
|
||||
SCHEDULER_PRIORITY_QUEUE=priority_queue_cls,
|
||||
JOBDIR=jobdir,
|
||||
DUPEFILTER_CLASS="scrapy.dupefilters.BaseDupeFilter",
|
||||
REQUEST_FINGERPRINTER_IMPLEMENTATION="2.7",
|
||||
)
|
||||
settings = {
|
||||
"SCHEDULER_DEBUG": False,
|
||||
"SCHEDULER_DISK_QUEUE": "scrapy.squeues.PickleLifoDiskQueue",
|
||||
"SCHEDULER_MEMORY_QUEUE": "scrapy.squeues.LifoMemoryQueue",
|
||||
"SCHEDULER_PRIORITY_QUEUE": priority_queue_cls,
|
||||
"JOBDIR": jobdir,
|
||||
"DUPEFILTER_CLASS": "scrapy.dupefilters.BaseDupeFilter",
|
||||
"REQUEST_FINGERPRINTER_IMPLEMENTATION": "2.7",
|
||||
}
|
||||
super().__init__(Spider, settings)
|
||||
self.engine = MockEngine(downloader=MockDownloader())
|
||||
self.stats = load_object(self.settings["STATS_CLASS"])(self)
|
||||
@ -338,10 +338,10 @@ class TestIntegrationWithDownloaderAwareInMemory(TestCase):
|
||||
|
||||
class TestIncompatibility(unittest.TestCase):
|
||||
def _incompatible(self):
|
||||
settings = dict(
|
||||
SCHEDULER_PRIORITY_QUEUE="scrapy.pqueues.DownloaderAwarePriorityQueue",
|
||||
CONCURRENT_REQUESTS_PER_IP=1,
|
||||
)
|
||||
settings = {
|
||||
"SCHEDULER_PRIORITY_QUEUE": "scrapy.pqueues.DownloaderAwarePriorityQueue",
|
||||
"CONCURRENT_REQUESTS_PER_IP": 1,
|
||||
}
|
||||
crawler = get_crawler(Spider, settings)
|
||||
scheduler = Scheduler.from_crawler(crawler)
|
||||
spider = Spider(name="spider")
|
||||
|
@ -16,10 +16,10 @@ class TestOffsiteMiddleware(TestCase):
|
||||
self.mw.spider_opened(self.spider)
|
||||
|
||||
def _get_spiderargs(self):
|
||||
return dict(
|
||||
name="foo",
|
||||
allowed_domains=["scrapytest.org", "scrapy.org", "scrapy.test.org"],
|
||||
)
|
||||
return {
|
||||
"name": "foo",
|
||||
"allowed_domains": ["scrapytest.org", "scrapy.org", "scrapy.test.org"],
|
||||
}
|
||||
|
||||
def test_process_spider_output(self):
|
||||
res = Response("http://scrapytest.org")
|
||||
@ -50,7 +50,7 @@ class TestOffsiteMiddleware(TestCase):
|
||||
|
||||
class TestOffsiteMiddleware2(TestOffsiteMiddleware):
|
||||
def _get_spiderargs(self):
|
||||
return dict(name="foo", allowed_domains=None)
|
||||
return {"name": "foo", "allowed_domains": None}
|
||||
|
||||
def test_process_spider_output(self):
|
||||
res = Response("http://scrapytest.org")
|
||||
@ -61,13 +61,16 @@ class TestOffsiteMiddleware2(TestOffsiteMiddleware):
|
||||
|
||||
class TestOffsiteMiddleware3(TestOffsiteMiddleware2):
|
||||
def _get_spiderargs(self):
|
||||
return dict(name="foo")
|
||||
return {"name": "foo"}
|
||||
|
||||
|
||||
class TestOffsiteMiddleware4(TestOffsiteMiddleware3):
|
||||
def _get_spiderargs(self):
|
||||
bad_hostname = urlparse("http:////scrapytest.org").hostname
|
||||
return dict(name="foo", allowed_domains=["scrapytest.org", None, bad_hostname])
|
||||
return {
|
||||
"name": "foo",
|
||||
"allowed_domains": ["scrapytest.org", None, bad_hostname],
|
||||
}
|
||||
|
||||
def test_process_spider_output(self):
|
||||
res = Response("http://scrapytest.org")
|
||||
|
@ -355,7 +355,7 @@ class UtilsCsvTestCase(unittest.TestCase):
|
||||
response = TextResponse(url="http://example.com/", body=body)
|
||||
csv = csviter(response)
|
||||
|
||||
result = [row for row in csv]
|
||||
result = list(csv)
|
||||
self.assertEqual(
|
||||
result,
|
||||
[
|
||||
@ -377,7 +377,7 @@ class UtilsCsvTestCase(unittest.TestCase):
|
||||
csv = csviter(response, delimiter="\t")
|
||||
|
||||
self.assertEqual(
|
||||
[row for row in csv],
|
||||
list(csv),
|
||||
[
|
||||
{"id": "1", "name": "alpha", "value": "foobar"},
|
||||
{"id": "2", "name": "unicode", "value": "\xfan\xedc\xf3d\xe9\u203d"},
|
||||
@ -394,7 +394,7 @@ class UtilsCsvTestCase(unittest.TestCase):
|
||||
csv1 = csviter(response1, quotechar="'")
|
||||
|
||||
self.assertEqual(
|
||||
[row for row in csv1],
|
||||
list(csv1),
|
||||
[
|
||||
{"id": "1", "name": "alpha", "value": "foobar"},
|
||||
{"id": "2", "name": "unicode", "value": "\xfan\xedc\xf3d\xe9\u203d"},
|
||||
@ -407,7 +407,7 @@ class UtilsCsvTestCase(unittest.TestCase):
|
||||
csv2 = csviter(response2, delimiter="|", quotechar="'")
|
||||
|
||||
self.assertEqual(
|
||||
[row for row in csv2],
|
||||
list(csv2),
|
||||
[
|
||||
{"id": "1", "name": "alpha", "value": "foobar"},
|
||||
{"id": "2", "name": "unicode", "value": "\xfan\xedc\xf3d\xe9\u203d"},
|
||||
@ -422,7 +422,7 @@ class UtilsCsvTestCase(unittest.TestCase):
|
||||
csv = csviter(response)
|
||||
|
||||
self.assertEqual(
|
||||
[row for row in csv],
|
||||
list(csv),
|
||||
[
|
||||
{"'id'": "1", "'name'": "'alpha'", "'value'": "'foobar'"},
|
||||
{
|
||||
@ -441,7 +441,7 @@ class UtilsCsvTestCase(unittest.TestCase):
|
||||
csv = csviter(response, delimiter="\t")
|
||||
|
||||
self.assertEqual(
|
||||
[row for row in csv],
|
||||
list(csv),
|
||||
[
|
||||
{"id": "1", "name": "alpha", "value": "foobar"},
|
||||
{"id": "2", "name": "unicode", "value": "\xfan\xedc\xf3d\xe9\u203d"},
|
||||
@ -458,7 +458,7 @@ class UtilsCsvTestCase(unittest.TestCase):
|
||||
csv = csviter(response, headers=[h.decode("utf-8") for h in headers])
|
||||
|
||||
self.assertEqual(
|
||||
[row for row in csv],
|
||||
list(csv),
|
||||
[
|
||||
{"id": "1", "name": "alpha", "value": "foobar"},
|
||||
{"id": "2", "name": "unicode", "value": "\xfan\xedc\xf3d\xe9\u203d"},
|
||||
@ -475,7 +475,7 @@ class UtilsCsvTestCase(unittest.TestCase):
|
||||
csv = csviter(response)
|
||||
|
||||
self.assertEqual(
|
||||
[row for row in csv],
|
||||
list(csv),
|
||||
[
|
||||
{"id": "1", "name": "alpha", "value": "foobar"},
|
||||
{"id": "2", "name": "unicode", "value": "\xfan\xedc\xf3d\xe9\u203d"},
|
||||
|
@ -16,7 +16,7 @@ class UtilsRenderTemplateFileTestCase(unittest.TestCase):
|
||||
rmtree(self.tmp_path)
|
||||
|
||||
def test_simple_render(self):
|
||||
context = dict(project_name="proj", name="spi", classname="TheSpider")
|
||||
context = {"project_name": "proj", "name": "spi", "classname": "TheSpider"}
|
||||
template = "from ${project_name}.spiders.${name} import ${classname}"
|
||||
rendered = "from proj.spiders.spi import TheSpider"
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user