1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-23 03:23:59 +00:00

Remove djangoitem since we moved it to scrapy/scrapy-djangoitem

This commit is contained in:
Julia Medina 2015-04-19 13:09:25 -03:00
parent bb4c8c33cc
commit ffc60910aa
9 changed files with 3 additions and 358 deletions

View File

@ -2,11 +2,7 @@ import six
import pytest
from twisted.python import log
from scrapy import optional_features
collect_ignore = ["scrapy/stats.py", "scrapy/project.py"]
if 'django' not in optional_features:
collect_ignore.append("tests/test_djangoitem/models.py")
if six.PY3:
for line in open('tests/py3-ignores.txt'):

View File

@ -4,146 +4,8 @@
DjangoItem
==========
:class:`DjangoItem` is a class of item that gets its fields definition from a
Django model, you simply create a :class:`DjangoItem` and specify what Django
model it relates to.
DjangoItem has been moved into a separate project.
Besides of getting the model fields defined on your item, :class:`DjangoItem`
provides a method to create and populate a Django model instance with the item
data.
It is hosted at:
Using DjangoItem
================
:class:`DjangoItem` works much like ModelForms in Django, you create a subclass
and define its ``django_model`` attribute to be a valid Django model. With this
you will get an item with a field for each Django model field.
In addition, you can define fields that aren't present in the model and even
override fields that are present in the model defining them in the item.
Let's see some examples:
Creating a Django model for the examples::
from django.db import models
class Person(models.Model):
name = models.CharField(max_length=255)
age = models.IntegerField()
Defining a basic :class:`DjangoItem`::
from scrapy.contrib.djangoitem import DjangoItem
class PersonItem(DjangoItem):
django_model = Person
:class:`DjangoItem` work just like :class:`~scrapy.item.Item`::
>>> p = PersonItem()
>>> p['name'] = 'John'
>>> p['age'] = '22'
To obtain the Django model from the item, we call the extra method
:meth:`~DjangoItem.save` of the :class:`DjangoItem`::
>>> person = p.save()
>>> person.name
'John'
>>> person.age
'22'
>>> person.id
1
The model is already saved when we call :meth:`~DjangoItem.save`, we
can prevent this by calling it with ``commit=False``. We can use
``commit=False`` in :meth:`~DjangoItem.save` method to obtain an unsaved model::
>>> person = p.save(commit=False)
>>> person.name
'John'
>>> person.age
'22'
>>> person.id
None
As said before, we can add other fields to the item::
import scrapy
from scrapy.contrib.djangoitem import DjangoItem
class PersonItem(DjangoItem):
django_model = Person
sex = scrapy.Field()
::
>>> p = PersonItem()
>>> p['name'] = 'John'
>>> p['age'] = '22'
>>> p['sex'] = 'M'
.. note:: fields added to the item won't be taken into account when doing a :meth:`~DjangoItem.save`
And we can override the fields of the model with your own::
class PersonItem(DjangoItem):
django_model = Person
name = scrapy.Field(default='No Name')
This is useful to provide properties to the field, like a default or any other
property that your project uses.
DjangoItem caveats
==================
DjangoItem is a rather convenient way to integrate Scrapy projects with Django
models, but bear in mind that Django ORM may not scale well if you scrape a lot
of items (ie. millions) with Scrapy. This is because a relational backend is
often not a good choice for a write intensive application (such as a web
crawler), specially if the database is highly normalized and with many indices.
Django settings set up
======================
To use the Django models outside the Django application you need to set up the
``DJANGO_SETTINGS_MODULE`` environment variable and --in most cases-- modify
the ``PYTHONPATH`` environment variable to be able to import the settings
module.
There are many ways to do this depending on your use case and preferences.
Below is detailed one of the simplest ways to do it.
Suppose your Django project is named ``mysite``, is located in the path
``/home/projects/mysite`` and you have created an app ``myapp`` with the model
``Person``. That means your directory structure is something like this::
/home/projects/mysite
├── manage.py
├── myapp
│   ├── __init__.py
│   ├── models.py
│   ├── tests.py
│   └── views.py
└── mysite
├── __init__.py
├── settings.py
├── urls.py
└── wsgi.py
Then you need to add ``/home/projects/mysite`` to the ``PYTHONPATH``
environment variable and set up the environment variable
``DJANGO_SETTINGS_MODULE`` to ``mysite.settings``. That can be done in your
Scrapy's settings file by adding the lines below::
import sys
sys.path.append('/home/projects/mysite')
import os
os.environ['DJANGO_SETTINGS_MODULE'] = 'mysite.settings'
Notice that we modify the ``sys.path`` variable instead the ``PYTHONPATH``
environment variable as we are already within the python runtime. If everything
is right, you should be able to start the ``scrapy shell`` command and import
the model ``Person`` (i.e. ``from myapp.models import Person``).
https://github.com/scrapy/scrapy-djangoitem

View File

@ -38,13 +38,6 @@ except ImportError:
pass
else:
optional_features.add('boto')
try:
import django
del django
except ImportError:
pass
else:
optional_features.add('django')
from twisted import version as _txv
twisted_version = (_txv.major, _txv.minor, _txv.micro)

View File

@ -1,75 +0,0 @@
from scrapy.item import Field, Item, ItemMeta
from scrapy import optional_features
if 'django' in optional_features:
from django.core.exceptions import ValidationError
class DjangoItemMeta(ItemMeta):
def __new__(mcs, class_name, bases, attrs):
cls = super(DjangoItemMeta, mcs).__new__(mcs, class_name, bases, attrs)
cls.fields = cls.fields.copy()
if cls.django_model:
cls._model_fields = []
cls._model_meta = cls.django_model._meta
for model_field in cls._model_meta.fields:
if not model_field.auto_created:
if model_field.name not in cls.fields:
cls.fields[model_field.name] = Field()
cls._model_fields.append(model_field.name)
return cls
class DjangoItem(Item):
__metaclass__ = DjangoItemMeta
django_model = None
def __init__(self, *args, **kwargs):
super(DjangoItem, self).__init__(*args, **kwargs)
self._instance = None
self._errors = None
def save(self, commit=True):
if commit:
self.instance.save()
return self.instance
def is_valid(self, exclude=None):
self._get_errors(exclude)
return not bool(self._errors)
def _get_errors(self, exclude=None):
if self._errors is not None:
return self._errors
self._errors = {}
if exclude is None:
exclude = []
try:
self.instance.clean_fields(exclude=exclude)
except ValidationError as e:
self._errors = e.update_error_dict(self._errors)
try:
self.instance.clean()
except ValidationError as e:
self._errors = e.update_error_dict(self._errors)
# uniqueness is not checked, because it is faster to check it when
# saving object to database. Just beware, that failed save()
# raises IntegrityError instead of ValidationError.
return self._errors
errors = property(_get_errors)
@property
def instance(self):
if self._instance is None:
modelargs = dict((k, self.get(k)) for k in self._values
if k in self._model_fields)
self._instance = self.django_model(**modelargs)
return self._instance

View File

@ -9,7 +9,6 @@ tests/test_contrib_linkextractors.py
tests/test_contrib_loader.py
tests/test_crawl.py
tests/test_crawler.py
tests/test_djangoitem/__init__.py
tests/test_downloader_handlers.py
tests/test_downloadermiddleware_ajaxcrawlable.py
tests/test_downloadermiddleware_cookies.py

View File

@ -1,103 +0,0 @@
import os
from twisted.trial import unittest
from scrapy.contrib.djangoitem import DjangoItem, Field
from scrapy import optional_features
os.environ['DJANGO_SETTINGS_MODULE'] = 'tests.test_djangoitem.settings'
if 'django' in optional_features:
from .models import Person, IdentifiedPerson
class BasePersonItem(DjangoItem):
django_model = Person
class NewFieldPersonItem(BasePersonItem):
other = Field()
class OverrideFieldPersonItem(BasePersonItem):
age = Field()
class IdentifiedPersonItem(DjangoItem):
django_model = IdentifiedPerson
class DjangoItemTest(unittest.TestCase):
def assertSortedEqual(self, first, second, msg=None):
return self.assertEqual(sorted(first), sorted(second), msg)
def setUp(self):
if 'django' not in optional_features:
raise unittest.SkipTest("Django is not available")
def test_base(self):
i = BasePersonItem()
self.assertSortedEqual(i.fields.keys(), ['age', 'name'])
def test_new_fields(self):
i = NewFieldPersonItem()
self.assertSortedEqual(i.fields.keys(), ['age', 'other', 'name'])
def test_override_field(self):
i = OverrideFieldPersonItem()
self.assertSortedEqual(i.fields.keys(), ['age', 'name'])
def test_custom_primary_key_field(self):
"""
Test that if a custom primary key exists, it is
in the field list.
"""
i = IdentifiedPersonItem()
self.assertSortedEqual(i.fields.keys(), ['age', 'identifier', 'name'])
def test_save(self):
i = BasePersonItem()
self.assertSortedEqual(i.fields.keys(), ['age', 'name'])
i['name'] = 'John'
i['age'] = '22'
person = i.save(commit=False)
self.assertEqual(person.name, 'John')
self.assertEqual(person.age, '22')
def test_override_save(self):
i = OverrideFieldPersonItem()
i['name'] = 'John'
# it is not obvious that "age" should be saved also, since it was
# redefined in child class
i['age'] = '22'
person = i.save(commit=False)
self.assertEqual(person.name, 'John')
self.assertEqual(person.age, '22')
def test_validation(self):
long_name = 'z' * 300
i = BasePersonItem(name=long_name)
self.assertFalse(i.is_valid())
self.assertEqual(set(i.errors), set(['age', 'name']))
i = BasePersonItem(name='John')
self.assertTrue(i.is_valid(exclude=['age']))
self.assertEqual({}, i.errors)
# once the item is validated, it does not validate again
i['name'] = long_name
self.assertTrue(i.is_valid())
def test_override_validation(self):
i = OverrideFieldPersonItem()
i['name'] = 'John'
self.assertFalse(i.is_valid())
i = i = OverrideFieldPersonItem()
i['name'] = 'John'
i['age'] = '22'
self.assertTrue(i.is_valid())
def test_default_field_values(self):
i = BasePersonItem()
person = i.save(commit=False)
self.assertEqual(person.name, 'Robot')

View File

@ -1,17 +0,0 @@
from django.db import models
class Person(models.Model):
name = models.CharField(max_length=255, default='Robot')
age = models.IntegerField()
class Meta:
app_label = 'test_djangoitem'
class IdentifiedPerson(models.Model):
identifier = models.PositiveIntegerField(primary_key=True)
name = models.CharField(max_length=255)
age = models.IntegerField()
class Meta:
app_label = 'test_djangoitem'

View File

@ -1,8 +0,0 @@
DATABASES = {
'default': {
'ENGINE': 'django.db.backends.sqlite3',
'NAME': ':memory:',
}
}
SECRET_KEY = 'top-secret'

View File

@ -12,7 +12,6 @@ deps =
# Extras
boto
Pillow
django
leveldb
-rtests/requirements.txt
commands =
@ -26,7 +25,6 @@ deps =
Twisted==11.1.0
boto==2.2.2
Pillow<2.0
django==1.3.1
cssselect==0.9.1
zope.interface==3.6.1
-rtests/requirements.txt