mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-23 22:44:03 +00:00
106 lines
4.0 KiB
Python
106 lines
4.0 KiB
Python
# Scrapy setup.py script
|
|
#
|
|
# Most of the code here was taken from Django setup.py
|
|
|
|
from distutils.core import setup
|
|
from distutils.command.install_data import install_data
|
|
from distutils.command.install import INSTALL_SCHEMES
|
|
import os
|
|
import sys
|
|
|
|
class osx_install_data(install_data):
|
|
# On MacOS, the platform-specific lib dir is /System/Library/Framework/Python/.../
|
|
# which is wrong. Python 2.5 supplied with MacOS 10.5 has an Apple-specific fix
|
|
# for this in distutils.command.install_data#306. It fixes install_lib but not
|
|
# install_data, which is why we roll our own install_data class.
|
|
|
|
def finalize_options(self):
|
|
# By the time finalize_options is called, install.install_lib is set to the
|
|
# fixed directory, so we set the installdir to install_lib. The
|
|
# install_data class uses ('install_data', 'install_dir') instead.
|
|
self.set_undefined_options('install', ('install_lib', 'install_dir'))
|
|
install_data.finalize_options(self)
|
|
|
|
if sys.platform == "darwin":
|
|
cmdclasses = {'install_data': osx_install_data}
|
|
else:
|
|
cmdclasses = {'install_data': install_data}
|
|
|
|
def fullsplit(path, result=None):
|
|
"""
|
|
Split a pathname into components (the opposite of os.path.join) in a
|
|
platform-neutral way.
|
|
"""
|
|
if result is None:
|
|
result = []
|
|
head, tail = os.path.split(path)
|
|
if head == '':
|
|
return [tail] + result
|
|
if head == path:
|
|
return result
|
|
return fullsplit(head, [tail] + result)
|
|
|
|
# Tell distutils to put the data_files in platform-specific installation
|
|
# locations. See here for an explanation:
|
|
# http://groups.google.com/group/comp.lang.python/browse_thread/thread/35ec7b2fed36eaec/2105ee4d9e8042cb
|
|
for scheme in INSTALL_SCHEMES.values():
|
|
scheme['data'] = scheme['purelib']
|
|
|
|
# Compile the list of packages available, because distutils doesn't have
|
|
# an easy way to do this.
|
|
packages, data_files = [], []
|
|
root_dir = os.path.dirname(__file__)
|
|
if root_dir != '':
|
|
os.chdir(root_dir)
|
|
scrapy_dir = 'scrapy'
|
|
|
|
# Generate dropin.cache for spiders under tests
|
|
# if dropin.cache is missing, running tests under python site-packages fails with permission errors
|
|
import os
|
|
os.environ['SCRAPY_SETTINGS_DISABLED']='1'
|
|
from twisted.plugin import getPlugins, IPlugin
|
|
import scrapy.tests.test_spiders
|
|
list(getPlugins(IPlugin, scrapy.tests.test_spiders))
|
|
|
|
def is_not_module(filename):
|
|
return os.path.splitext(f)[1] not in ['.py', '.pyc', '.pyo']
|
|
|
|
for dirpath, dirnames, filenames in os.walk(scrapy_dir):
|
|
# Ignore dirnames that start with '.'
|
|
for i, dirname in enumerate(dirnames):
|
|
if dirname.startswith('.'): del dirnames[i]
|
|
if '__init__.py' in filenames:
|
|
packages.append('.'.join(fullsplit(dirpath)))
|
|
data = [f for f in filenames if is_not_module(f)]
|
|
if data:
|
|
data_files.append([dirpath, [os.path.join(dirpath, f) for f in data]])
|
|
elif filenames:
|
|
data_files.append([dirpath, [os.path.join(dirpath, f) for f in filenames]])
|
|
|
|
# Small hack for working with bdist_wininst.
|
|
# See http://mail.python.org/pipermail/distutils-sig/2004-August/004134.html
|
|
if len(sys.argv) > 1 and sys.argv[1] == 'bdist_wininst':
|
|
for file_info in data_files:
|
|
file_info[0] = '\\PURELIB\\%s' % file_info[0]
|
|
|
|
# Dynamically calculate the version based on scrapy.__version__
|
|
version = __import__('scrapy').__version__
|
|
if u'SVN' in version:
|
|
version = ' '.join(version.split(' ')[:-1])
|
|
|
|
setup(
|
|
name = 'Scrapy',
|
|
version = version,
|
|
url = 'http://scrapy.org',
|
|
download_url = "http://scrapy.org/releases/scrapy-0.7.0.tar.gz",
|
|
description = 'A high-level Python Screen Scraping framework',
|
|
long_description = 'Scrapy is a high level scraping and web crawling framework for writing spiders to crawl and parse web pages for all kinds of purposes, from information retrieval to monitoring or testing web sites.',
|
|
author = 'Scrapy developers',
|
|
author_email = '',
|
|
license = 'BSD',
|
|
packages = packages,
|
|
cmdclass = cmdclasses,
|
|
data_files = data_files,
|
|
scripts = ['bin/scrapy-ctl.py'],
|
|
)
|