1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-24 01:23:57 +00:00
scrapy/tests/test_commands.py

258 lines
8.6 KiB
Python
Raw Normal View History

2009-08-28 14:42:07 -03:00
import os
import sys
2009-08-28 14:42:07 -03:00
import subprocess
import tempfile
from time import sleep
from os.path import exists, join, abspath
2009-08-28 14:42:07 -03:00
from shutil import rmtree
from tempfile import mkdtemp
import six
2010-08-19 02:30:15 -03:00
from twisted.trial import unittest
from twisted.internet import defer
2009-08-28 14:42:07 -03:00
from scrapy.utils.python import to_native_str
from scrapy.utils.python import retry_on_eintr
from scrapy.utils.test import get_testenv
from scrapy.utils.testsite import SiteTest
from scrapy.utils.testproc import ProcessTest
2009-08-28 14:42:07 -03:00
class ProjectTest(unittest.TestCase):
2009-08-28 14:42:07 -03:00
project_name = 'testproject'
def setUp(self):
self.temp_path = mkdtemp()
self.cwd = self.temp_path
2009-08-28 18:07:35 -03:00
self.proj_path = join(self.temp_path, self.project_name)
self.proj_mod_path = join(self.proj_path, self.project_name)
self.env = get_testenv()
2009-08-28 14:42:07 -03:00
def tearDown(self):
rmtree(self.temp_path)
2009-08-30 01:51:34 -03:00
def call(self, *new_args, **kwargs):
2014-07-14 18:00:20 +06:00
with tempfile.TemporaryFile() as out:
args = (sys.executable, '-m', 'scrapy.cmdline') + new_args
return subprocess.call(args, stdout=out, stderr=out, cwd=self.cwd,
env=self.env, **kwargs)
2009-08-28 14:42:07 -03:00
def proc(self, *new_args, **kwargs):
2010-08-19 02:30:15 -03:00
args = (sys.executable, '-m', 'scrapy.cmdline') + new_args
p = subprocess.Popen(args, cwd=self.cwd, env=self.env,
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
**kwargs)
waited = 0
interval = 0.2
while p.poll() is None:
sleep(interval)
waited += interval
if waited > 15:
p.kill()
assert False, 'Command took too much time to complete'
return p
2010-08-19 02:30:15 -03:00
2009-08-28 14:42:07 -03:00
class StartprojectTest(ProjectTest):
2009-08-30 01:51:34 -03:00
2009-08-28 14:42:07 -03:00
def test_startproject(self):
2009-08-30 01:51:34 -03:00
self.assertEqual(0, self.call('startproject', self.project_name))
2009-08-28 14:42:07 -03:00
assert exists(join(self.proj_path, 'scrapy.cfg'))
2009-08-28 18:07:35 -03:00
assert exists(join(self.proj_path, 'testproject'))
assert exists(join(self.proj_mod_path, '__init__.py'))
assert exists(join(self.proj_mod_path, 'items.py'))
assert exists(join(self.proj_mod_path, 'pipelines.py'))
assert exists(join(self.proj_mod_path, 'settings.py'))
assert exists(join(self.proj_mod_path, 'spiders', '__init__.py'))
2009-08-28 14:42:07 -03:00
2009-08-30 01:51:34 -03:00
self.assertEqual(1, self.call('startproject', self.project_name))
self.assertEqual(1, self.call('startproject', 'wrong---project---name'))
2013-10-18 14:46:55 +01:00
self.assertEqual(1, self.call('startproject', 'sys'))
2009-08-28 14:42:07 -03:00
class CommandTest(ProjectTest):
def setUp(self):
super(CommandTest, self).setUp()
2009-08-30 01:51:34 -03:00
self.call('startproject', self.project_name)
2009-08-28 14:42:07 -03:00
self.cwd = join(self.temp_path, self.project_name)
self.env['SCRAPY_SETTINGS_MODULE'] = '%s.settings' % self.project_name
2009-08-31 08:58:29 -03:00
class GenspiderCommandTest(CommandTest):
2009-08-28 14:42:07 -03:00
def test_arguments(self):
# only pass one argument. spider script shouldn't be created
self.assertEqual(2, self.call('genspider', 'test_name'))
assert not exists(join(self.proj_mod_path, 'spiders', 'test_name.py'))
# pass two arguments <name> <domain>. spider script should be created
self.assertEqual(0, self.call('genspider', 'test_name', 'test.com'))
assert exists(join(self.proj_mod_path, 'spiders', 'test_name.py'))
def test_template(self, tplname='crawl'):
args = ['--template=%s' % tplname] if tplname else []
spname = 'test_spider'
p = self.proc('genspider', spname, 'test.com', *args)
out = to_native_str(retry_on_eintr(p.stdout.read))
self.assertIn("Created spider %r using template %r in module" % (spname, tplname), out)
self.assertTrue(exists(join(self.proj_mod_path, 'spiders', 'test_spider.py')))
p = self.proc('genspider', spname, 'test.com', *args)
out = to_native_str(retry_on_eintr(p.stdout.read))
self.assertIn("Spider %r already exists in module" % spname, out)
2009-08-28 14:42:07 -03:00
2009-08-31 08:58:29 -03:00
def test_template_basic(self):
self.test_template('basic')
2009-08-28 14:42:07 -03:00
2009-08-31 08:58:29 -03:00
def test_template_csvfeed(self):
self.test_template('csvfeed')
2009-08-28 14:42:07 -03:00
2009-08-31 08:58:29 -03:00
def test_template_xmlfeed(self):
self.test_template('xmlfeed')
2009-08-28 14:42:07 -03:00
2009-08-31 08:58:29 -03:00
def test_list(self):
self.assertEqual(0, self.call('genspider', '--list'))
2009-08-28 14:42:07 -03:00
2009-08-31 08:58:29 -03:00
def test_dump(self):
self.assertEqual(0, self.call('genspider', '--dump=basic'))
self.assertEqual(0, self.call('genspider', '-d', 'basic'))
2009-08-28 14:42:07 -03:00
def test_same_name_as_project(self):
self.assertEqual(2, self.call('genspider', self.project_name))
assert not exists(join(self.proj_mod_path, 'spiders', '%s.py' % self.project_name))
2009-08-28 14:42:07 -03:00
2009-08-31 08:58:29 -03:00
class MiscCommandsTest(CommandTest):
2009-08-28 14:42:07 -03:00
2009-08-31 08:58:29 -03:00
def test_list(self):
self.assertEqual(0, self.call('list'))
2009-08-28 14:42:07 -03:00
2010-08-19 02:30:15 -03:00
class RunSpiderCommandTest(CommandTest):
def test_runspider(self):
tmpdir = self.mktemp()
os.mkdir(tmpdir)
fname = abspath(join(tmpdir, 'myspider.py'))
with open(fname, 'w') as f:
f.write("""
import scrapy
2010-08-19 02:30:15 -03:00
class MySpider(scrapy.Spider):
2010-08-19 02:30:15 -03:00
name = 'myspider'
def start_requests(self):
self.logger.debug("It Works!")
2010-08-19 02:30:15 -03:00
return []
""")
p = self.proc('runspider', fname)
log = to_native_str(p.stderr.read())
self.assertIn("DEBUG: It Works!", log)
self.assertIn("INFO: Spider opened", log)
self.assertIn("INFO: Closing spider (finished)", log)
self.assertIn("INFO: Spider closed (finished)", log)
2010-08-19 02:30:15 -03:00
def test_runspider_no_spider_found(self):
tmpdir = self.mktemp()
os.mkdir(tmpdir)
fname = abspath(join(tmpdir, 'myspider.py'))
with open(fname, 'w') as f:
f.write("""
from scrapy.spiders import Spider
2010-08-19 02:30:15 -03:00
""")
p = self.proc('runspider', fname)
log = to_native_str(p.stderr.read())
self.assertIn("No spider found in file", log)
2010-08-19 02:30:15 -03:00
def test_runspider_file_not_found(self):
p = self.proc('runspider', 'some_non_existent_file')
log = to_native_str(p.stderr.read())
self.assertIn("File not found: some_non_existent_file", log)
2010-08-19 02:30:15 -03:00
def test_runspider_unable_to_load(self):
tmpdir = self.mktemp()
os.mkdir(tmpdir)
fname = abspath(join(tmpdir, 'myspider.txt'))
with open(fname, 'w') as f:
f.write("")
p = self.proc('runspider', fname)
log = to_native_str(p.stderr.read())
self.assertIn("Unable to load", log)
class ParseCommandTest(ProcessTest, SiteTest, CommandTest):
skip = not six.PY2
command = 'parse'
2013-04-08 14:31:10 -03:00
def setUp(self):
super(ParseCommandTest, self).setUp()
self.spider_name = 'parse_spider'
fname = abspath(join(self.proj_mod_path, 'spiders', 'myspider.py'))
with open(fname, 'w') as f:
f.write("""
import scrapy
class MySpider(scrapy.Spider):
name = '{0}'
def parse(self, response):
2013-04-08 14:31:10 -03:00
if getattr(self, 'test_arg', None):
self.logger.debug('It Works!')
return [scrapy.Item(), dict(foo='bar')]
2013-04-08 14:31:10 -03:00
""".format(self.spider_name))
fname = abspath(join(self.proj_mod_path, 'pipelines.py'))
with open(fname, 'w') as f:
f.write("""
import logging
2013-04-08 14:31:10 -03:00
class MyPipeline(object):
component_name = 'my_pipeline'
2013-04-08 14:31:10 -03:00
def process_item(self, item, spider):
logging.info('It Works!')
2013-04-08 14:31:10 -03:00
return item
""")
fname = abspath(join(self.proj_mod_path, 'settings.py'))
with open(fname, 'a') as f:
f.write("""
ITEM_PIPELINES = {'%s.pipelines.MyPipeline': 1}
""" % self.project_name)
2013-04-08 14:31:10 -03:00
@defer.inlineCallbacks
2013-04-08 14:31:10 -03:00
def test_spider_arguments(self):
_, _, stderr = yield self.execute(['--spider', self.spider_name,
'-a', 'test_arg=1',
'-c', 'parse',
self.url('/html')])
self.assertIn("DEBUG: It Works!", to_native_str(stderr))
2013-04-08 14:31:10 -03:00
@defer.inlineCallbacks
2013-04-08 14:31:10 -03:00
def test_pipelines(self):
_, _, stderr = yield self.execute(['--spider', self.spider_name,
'--pipelines',
'-c', 'parse',
self.url('/html')])
self.assertIn("INFO: It Works!", to_native_str(stderr))
2013-05-28 12:42:50 -03:00
@defer.inlineCallbacks
def test_parse_items(self):
status, out, stderr = yield self.execute(
['--spider', self.spider_name, '-c', 'parse', self.url('/html')]
)
self.assertIn("""[{}, {'foo': 'bar'}]""", to_native_str(out))
2013-05-28 12:42:50 -03:00
class BenchCommandTest(CommandTest):
def test_run(self):
p = self.proc('bench', '-s', 'LOGSTATS_INTERVAL=0.001',
'-s', 'CLOSESPIDER_TIMEOUT=0.01')
log = to_native_str(p.stderr.read())
self.assertIn('INFO: Crawled', log)