mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-06 10:24:24 +00:00
165 lines
5.8 KiB
Python
165 lines
5.8 KiB
Python
import os
|
|
import sys
|
|
from io import BytesIO
|
|
from pathlib import Path
|
|
|
|
from pexpect.popen_spawn import PopenSpawn
|
|
from twisted.internet import defer
|
|
from twisted.trial import unittest
|
|
|
|
from scrapy.utils.testproc import ProcessTest
|
|
from scrapy.utils.testsite import SiteTest
|
|
from tests import NON_EXISTING_RESOLVABLE, tests_datadir
|
|
from tests.mockserver import MockServer
|
|
|
|
|
|
class ShellTest(ProcessTest, SiteTest, unittest.TestCase):
|
|
command = "shell"
|
|
|
|
@defer.inlineCallbacks
|
|
def test_empty(self):
|
|
_, out, _ = yield self.execute(["-c", "item"])
|
|
assert b"{}" in out
|
|
|
|
@defer.inlineCallbacks
|
|
def test_response_body(self):
|
|
_, out, _ = yield self.execute([self.url("/text"), "-c", "response.body"])
|
|
assert b"Works" in out
|
|
|
|
@defer.inlineCallbacks
|
|
def test_response_type_text(self):
|
|
_, out, _ = yield self.execute([self.url("/text"), "-c", "type(response)"])
|
|
assert b"TextResponse" in out
|
|
|
|
@defer.inlineCallbacks
|
|
def test_response_type_html(self):
|
|
_, out, _ = yield self.execute([self.url("/html"), "-c", "type(response)"])
|
|
assert b"HtmlResponse" in out
|
|
|
|
@defer.inlineCallbacks
|
|
def test_response_selector_html(self):
|
|
xpath = "response.xpath(\"//p[@class='one']/text()\").get()"
|
|
_, out, _ = yield self.execute([self.url("/html"), "-c", xpath])
|
|
self.assertEqual(out.strip(), b"Works")
|
|
|
|
@defer.inlineCallbacks
|
|
def test_response_encoding_gb18030(self):
|
|
_, out, _ = yield self.execute(
|
|
[self.url("/enc-gb18030"), "-c", "response.encoding"]
|
|
)
|
|
self.assertEqual(out.strip(), b"gb18030")
|
|
|
|
@defer.inlineCallbacks
|
|
def test_redirect(self):
|
|
_, out, _ = yield self.execute([self.url("/redirect"), "-c", "response.url"])
|
|
assert out.strip().endswith(b"/redirected")
|
|
|
|
@defer.inlineCallbacks
|
|
def test_redirect_follow_302(self):
|
|
_, out, _ = yield self.execute(
|
|
[self.url("/redirect-no-meta-refresh"), "-c", "response.status"]
|
|
)
|
|
assert out.strip().endswith(b"200")
|
|
|
|
@defer.inlineCallbacks
|
|
def test_redirect_not_follow_302(self):
|
|
_, out, _ = yield self.execute(
|
|
[
|
|
"--no-redirect",
|
|
self.url("/redirect-no-meta-refresh"),
|
|
"-c",
|
|
"response.status",
|
|
]
|
|
)
|
|
assert out.strip().endswith(b"302")
|
|
|
|
@defer.inlineCallbacks
|
|
def test_fetch_redirect_follow_302(self):
|
|
"""Test that calling ``fetch(url)`` follows HTTP redirects by default."""
|
|
url = self.url("/redirect-no-meta-refresh")
|
|
code = f"fetch('{url}')"
|
|
errcode, out, errout = yield self.execute(["-c", code])
|
|
self.assertEqual(errcode, 0, out)
|
|
assert b"Redirecting (302)" in errout
|
|
assert b"Crawled (200)" in errout
|
|
|
|
@defer.inlineCallbacks
|
|
def test_fetch_redirect_not_follow_302(self):
|
|
"""Test that calling ``fetch(url, redirect=False)`` disables automatic redirects."""
|
|
url = self.url("/redirect-no-meta-refresh")
|
|
code = f"fetch('{url}', redirect=False)"
|
|
errcode, out, errout = yield self.execute(["-c", code])
|
|
self.assertEqual(errcode, 0, out)
|
|
assert b"Crawled (302)" in errout
|
|
|
|
@defer.inlineCallbacks
|
|
def test_request_replace(self):
|
|
url = self.url("/text")
|
|
code = f"fetch('{url}') or fetch(response.request.replace(method='POST'))"
|
|
errcode, out, _ = yield self.execute(["-c", code])
|
|
self.assertEqual(errcode, 0, out)
|
|
|
|
@defer.inlineCallbacks
|
|
def test_scrapy_import(self):
|
|
url = self.url("/text")
|
|
code = f"fetch(scrapy.Request('{url}'))"
|
|
errcode, out, _ = yield self.execute(["-c", code])
|
|
self.assertEqual(errcode, 0, out)
|
|
|
|
@defer.inlineCallbacks
|
|
def test_local_file(self):
|
|
filepath = Path(tests_datadir, "test_site", "index.html")
|
|
_, out, _ = yield self.execute([str(filepath), "-c", "item"])
|
|
assert b"{}" in out
|
|
|
|
@defer.inlineCallbacks
|
|
def test_local_nofile(self):
|
|
filepath = "file:///tests/sample_data/test_site/nothinghere.html"
|
|
errcode, out, err = yield self.execute(
|
|
[filepath, "-c", "item"], check_code=False
|
|
)
|
|
self.assertEqual(errcode, 1, out or err)
|
|
self.assertIn(b"No such file or directory", err)
|
|
|
|
@defer.inlineCallbacks
|
|
def test_dns_failures(self):
|
|
if NON_EXISTING_RESOLVABLE:
|
|
raise unittest.SkipTest("Non-existing hosts are resolvable")
|
|
url = "www.somedomainthatdoesntexi.st"
|
|
errcode, out, err = yield self.execute([url, "-c", "item"], check_code=False)
|
|
self.assertEqual(errcode, 1, out or err)
|
|
self.assertIn(b"DNS lookup failed", err)
|
|
|
|
@defer.inlineCallbacks
|
|
def test_shell_fetch_async(self):
|
|
reactor_path = "twisted.internet.asyncioreactor.AsyncioSelectorReactor"
|
|
url = self.url("/html")
|
|
code = f"fetch('{url}')"
|
|
args = ["-c", code, "--set", f"TWISTED_REACTOR={reactor_path}"]
|
|
_, _, err = yield self.execute(args, check_code=True)
|
|
self.assertNotIn(b"RuntimeError: There is no current event loop in thread", err)
|
|
|
|
|
|
class InteractiveShellTest(unittest.TestCase):
|
|
def test_fetch(self):
|
|
args = (
|
|
sys.executable,
|
|
"-m",
|
|
"scrapy.cmdline",
|
|
"shell",
|
|
)
|
|
env = os.environ.copy()
|
|
env["SCRAPY_PYTHON_SHELL"] = "python"
|
|
logfile = BytesIO()
|
|
p = PopenSpawn(args, env=env, timeout=5)
|
|
p.logfile_read = logfile
|
|
p.expect_exact("Available Scrapy objects")
|
|
with MockServer() as mockserver:
|
|
p.sendline(f"fetch('{mockserver.url('/')}')")
|
|
p.sendline("type(response)")
|
|
p.expect_exact("HtmlResponse")
|
|
p.sendeof()
|
|
p.wait()
|
|
logfile.seek(0)
|
|
self.assertNotIn("Traceback", logfile.read().decode())
|