mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-23 03:23:59 +00:00
Remove djangoitem since we moved it to scrapy/scrapy-djangoitem
This commit is contained in:
parent
bb4c8c33cc
commit
ffc60910aa
@ -2,11 +2,7 @@ import six
|
||||
import pytest
|
||||
from twisted.python import log
|
||||
|
||||
from scrapy import optional_features
|
||||
|
||||
collect_ignore = ["scrapy/stats.py", "scrapy/project.py"]
|
||||
if 'django' not in optional_features:
|
||||
collect_ignore.append("tests/test_djangoitem/models.py")
|
||||
|
||||
if six.PY3:
|
||||
for line in open('tests/py3-ignores.txt'):
|
||||
|
@ -4,146 +4,8 @@
|
||||
DjangoItem
|
||||
==========
|
||||
|
||||
:class:`DjangoItem` is a class of item that gets its fields definition from a
|
||||
Django model, you simply create a :class:`DjangoItem` and specify what Django
|
||||
model it relates to.
|
||||
DjangoItem has been moved into a separate project.
|
||||
|
||||
Besides of getting the model fields defined on your item, :class:`DjangoItem`
|
||||
provides a method to create and populate a Django model instance with the item
|
||||
data.
|
||||
It is hosted at:
|
||||
|
||||
Using DjangoItem
|
||||
================
|
||||
|
||||
:class:`DjangoItem` works much like ModelForms in Django, you create a subclass
|
||||
and define its ``django_model`` attribute to be a valid Django model. With this
|
||||
you will get an item with a field for each Django model field.
|
||||
|
||||
In addition, you can define fields that aren't present in the model and even
|
||||
override fields that are present in the model defining them in the item.
|
||||
|
||||
Let's see some examples:
|
||||
|
||||
Creating a Django model for the examples::
|
||||
|
||||
from django.db import models
|
||||
|
||||
class Person(models.Model):
|
||||
name = models.CharField(max_length=255)
|
||||
age = models.IntegerField()
|
||||
|
||||
Defining a basic :class:`DjangoItem`::
|
||||
|
||||
from scrapy.contrib.djangoitem import DjangoItem
|
||||
|
||||
class PersonItem(DjangoItem):
|
||||
django_model = Person
|
||||
|
||||
:class:`DjangoItem` work just like :class:`~scrapy.item.Item`::
|
||||
|
||||
>>> p = PersonItem()
|
||||
>>> p['name'] = 'John'
|
||||
>>> p['age'] = '22'
|
||||
|
||||
To obtain the Django model from the item, we call the extra method
|
||||
:meth:`~DjangoItem.save` of the :class:`DjangoItem`::
|
||||
|
||||
>>> person = p.save()
|
||||
>>> person.name
|
||||
'John'
|
||||
>>> person.age
|
||||
'22'
|
||||
>>> person.id
|
||||
1
|
||||
|
||||
The model is already saved when we call :meth:`~DjangoItem.save`, we
|
||||
can prevent this by calling it with ``commit=False``. We can use
|
||||
``commit=False`` in :meth:`~DjangoItem.save` method to obtain an unsaved model::
|
||||
|
||||
>>> person = p.save(commit=False)
|
||||
>>> person.name
|
||||
'John'
|
||||
>>> person.age
|
||||
'22'
|
||||
>>> person.id
|
||||
None
|
||||
|
||||
As said before, we can add other fields to the item::
|
||||
|
||||
import scrapy
|
||||
from scrapy.contrib.djangoitem import DjangoItem
|
||||
|
||||
class PersonItem(DjangoItem):
|
||||
django_model = Person
|
||||
sex = scrapy.Field()
|
||||
|
||||
::
|
||||
|
||||
>>> p = PersonItem()
|
||||
>>> p['name'] = 'John'
|
||||
>>> p['age'] = '22'
|
||||
>>> p['sex'] = 'M'
|
||||
|
||||
.. note:: fields added to the item won't be taken into account when doing a :meth:`~DjangoItem.save`
|
||||
|
||||
And we can override the fields of the model with your own::
|
||||
|
||||
class PersonItem(DjangoItem):
|
||||
django_model = Person
|
||||
name = scrapy.Field(default='No Name')
|
||||
|
||||
This is useful to provide properties to the field, like a default or any other
|
||||
property that your project uses.
|
||||
|
||||
DjangoItem caveats
|
||||
==================
|
||||
|
||||
DjangoItem is a rather convenient way to integrate Scrapy projects with Django
|
||||
models, but bear in mind that Django ORM may not scale well if you scrape a lot
|
||||
of items (ie. millions) with Scrapy. This is because a relational backend is
|
||||
often not a good choice for a write intensive application (such as a web
|
||||
crawler), specially if the database is highly normalized and with many indices.
|
||||
|
||||
Django settings set up
|
||||
======================
|
||||
|
||||
To use the Django models outside the Django application you need to set up the
|
||||
``DJANGO_SETTINGS_MODULE`` environment variable and --in most cases-- modify
|
||||
the ``PYTHONPATH`` environment variable to be able to import the settings
|
||||
module.
|
||||
|
||||
There are many ways to do this depending on your use case and preferences.
|
||||
Below is detailed one of the simplest ways to do it.
|
||||
|
||||
Suppose your Django project is named ``mysite``, is located in the path
|
||||
``/home/projects/mysite`` and you have created an app ``myapp`` with the model
|
||||
``Person``. That means your directory structure is something like this::
|
||||
|
||||
/home/projects/mysite
|
||||
├── manage.py
|
||||
├── myapp
|
||||
│ ├── __init__.py
|
||||
│ ├── models.py
|
||||
│ ├── tests.py
|
||||
│ └── views.py
|
||||
└── mysite
|
||||
├── __init__.py
|
||||
├── settings.py
|
||||
├── urls.py
|
||||
└── wsgi.py
|
||||
|
||||
Then you need to add ``/home/projects/mysite`` to the ``PYTHONPATH``
|
||||
environment variable and set up the environment variable
|
||||
``DJANGO_SETTINGS_MODULE`` to ``mysite.settings``. That can be done in your
|
||||
Scrapy's settings file by adding the lines below::
|
||||
|
||||
import sys
|
||||
sys.path.append('/home/projects/mysite')
|
||||
|
||||
import os
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'mysite.settings'
|
||||
|
||||
Notice that we modify the ``sys.path`` variable instead the ``PYTHONPATH``
|
||||
environment variable as we are already within the python runtime. If everything
|
||||
is right, you should be able to start the ``scrapy shell`` command and import
|
||||
the model ``Person`` (i.e. ``from myapp.models import Person``).
|
||||
https://github.com/scrapy/scrapy-djangoitem
|
||||
|
@ -38,13 +38,6 @@ except ImportError:
|
||||
pass
|
||||
else:
|
||||
optional_features.add('boto')
|
||||
try:
|
||||
import django
|
||||
del django
|
||||
except ImportError:
|
||||
pass
|
||||
else:
|
||||
optional_features.add('django')
|
||||
|
||||
from twisted import version as _txv
|
||||
twisted_version = (_txv.major, _txv.minor, _txv.micro)
|
||||
|
@ -1,75 +0,0 @@
|
||||
from scrapy.item import Field, Item, ItemMeta
|
||||
from scrapy import optional_features
|
||||
if 'django' in optional_features:
|
||||
from django.core.exceptions import ValidationError
|
||||
|
||||
|
||||
class DjangoItemMeta(ItemMeta):
|
||||
|
||||
def __new__(mcs, class_name, bases, attrs):
|
||||
cls = super(DjangoItemMeta, mcs).__new__(mcs, class_name, bases, attrs)
|
||||
cls.fields = cls.fields.copy()
|
||||
|
||||
if cls.django_model:
|
||||
cls._model_fields = []
|
||||
cls._model_meta = cls.django_model._meta
|
||||
for model_field in cls._model_meta.fields:
|
||||
if not model_field.auto_created:
|
||||
if model_field.name not in cls.fields:
|
||||
cls.fields[model_field.name] = Field()
|
||||
cls._model_fields.append(model_field.name)
|
||||
return cls
|
||||
|
||||
|
||||
class DjangoItem(Item):
|
||||
|
||||
__metaclass__ = DjangoItemMeta
|
||||
|
||||
django_model = None
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(DjangoItem, self).__init__(*args, **kwargs)
|
||||
self._instance = None
|
||||
self._errors = None
|
||||
|
||||
def save(self, commit=True):
|
||||
if commit:
|
||||
self.instance.save()
|
||||
return self.instance
|
||||
|
||||
def is_valid(self, exclude=None):
|
||||
self._get_errors(exclude)
|
||||
return not bool(self._errors)
|
||||
|
||||
def _get_errors(self, exclude=None):
|
||||
if self._errors is not None:
|
||||
return self._errors
|
||||
|
||||
self._errors = {}
|
||||
if exclude is None:
|
||||
exclude = []
|
||||
|
||||
try:
|
||||
self.instance.clean_fields(exclude=exclude)
|
||||
except ValidationError as e:
|
||||
self._errors = e.update_error_dict(self._errors)
|
||||
|
||||
try:
|
||||
self.instance.clean()
|
||||
except ValidationError as e:
|
||||
self._errors = e.update_error_dict(self._errors)
|
||||
|
||||
# uniqueness is not checked, because it is faster to check it when
|
||||
# saving object to database. Just beware, that failed save()
|
||||
# raises IntegrityError instead of ValidationError.
|
||||
|
||||
return self._errors
|
||||
errors = property(_get_errors)
|
||||
|
||||
@property
|
||||
def instance(self):
|
||||
if self._instance is None:
|
||||
modelargs = dict((k, self.get(k)) for k in self._values
|
||||
if k in self._model_fields)
|
||||
self._instance = self.django_model(**modelargs)
|
||||
return self._instance
|
@ -9,7 +9,6 @@ tests/test_contrib_linkextractors.py
|
||||
tests/test_contrib_loader.py
|
||||
tests/test_crawl.py
|
||||
tests/test_crawler.py
|
||||
tests/test_djangoitem/__init__.py
|
||||
tests/test_downloader_handlers.py
|
||||
tests/test_downloadermiddleware_ajaxcrawlable.py
|
||||
tests/test_downloadermiddleware_cookies.py
|
||||
|
@ -1,103 +0,0 @@
|
||||
import os
|
||||
from twisted.trial import unittest
|
||||
|
||||
from scrapy.contrib.djangoitem import DjangoItem, Field
|
||||
from scrapy import optional_features
|
||||
|
||||
os.environ['DJANGO_SETTINGS_MODULE'] = 'tests.test_djangoitem.settings'
|
||||
|
||||
if 'django' in optional_features:
|
||||
from .models import Person, IdentifiedPerson
|
||||
|
||||
class BasePersonItem(DjangoItem):
|
||||
django_model = Person
|
||||
|
||||
class NewFieldPersonItem(BasePersonItem):
|
||||
other = Field()
|
||||
|
||||
class OverrideFieldPersonItem(BasePersonItem):
|
||||
age = Field()
|
||||
|
||||
class IdentifiedPersonItem(DjangoItem):
|
||||
django_model = IdentifiedPerson
|
||||
|
||||
|
||||
class DjangoItemTest(unittest.TestCase):
|
||||
|
||||
def assertSortedEqual(self, first, second, msg=None):
|
||||
return self.assertEqual(sorted(first), sorted(second), msg)
|
||||
|
||||
def setUp(self):
|
||||
if 'django' not in optional_features:
|
||||
raise unittest.SkipTest("Django is not available")
|
||||
|
||||
def test_base(self):
|
||||
i = BasePersonItem()
|
||||
self.assertSortedEqual(i.fields.keys(), ['age', 'name'])
|
||||
|
||||
def test_new_fields(self):
|
||||
i = NewFieldPersonItem()
|
||||
self.assertSortedEqual(i.fields.keys(), ['age', 'other', 'name'])
|
||||
|
||||
def test_override_field(self):
|
||||
i = OverrideFieldPersonItem()
|
||||
self.assertSortedEqual(i.fields.keys(), ['age', 'name'])
|
||||
|
||||
def test_custom_primary_key_field(self):
|
||||
"""
|
||||
Test that if a custom primary key exists, it is
|
||||
in the field list.
|
||||
"""
|
||||
i = IdentifiedPersonItem()
|
||||
self.assertSortedEqual(i.fields.keys(), ['age', 'identifier', 'name'])
|
||||
|
||||
def test_save(self):
|
||||
i = BasePersonItem()
|
||||
self.assertSortedEqual(i.fields.keys(), ['age', 'name'])
|
||||
|
||||
i['name'] = 'John'
|
||||
i['age'] = '22'
|
||||
person = i.save(commit=False)
|
||||
|
||||
self.assertEqual(person.name, 'John')
|
||||
self.assertEqual(person.age, '22')
|
||||
|
||||
def test_override_save(self):
|
||||
i = OverrideFieldPersonItem()
|
||||
|
||||
i['name'] = 'John'
|
||||
# it is not obvious that "age" should be saved also, since it was
|
||||
# redefined in child class
|
||||
i['age'] = '22'
|
||||
person = i.save(commit=False)
|
||||
|
||||
self.assertEqual(person.name, 'John')
|
||||
self.assertEqual(person.age, '22')
|
||||
|
||||
def test_validation(self):
|
||||
long_name = 'z' * 300
|
||||
i = BasePersonItem(name=long_name)
|
||||
self.assertFalse(i.is_valid())
|
||||
self.assertEqual(set(i.errors), set(['age', 'name']))
|
||||
i = BasePersonItem(name='John')
|
||||
self.assertTrue(i.is_valid(exclude=['age']))
|
||||
self.assertEqual({}, i.errors)
|
||||
|
||||
# once the item is validated, it does not validate again
|
||||
i['name'] = long_name
|
||||
self.assertTrue(i.is_valid())
|
||||
|
||||
def test_override_validation(self):
|
||||
i = OverrideFieldPersonItem()
|
||||
i['name'] = 'John'
|
||||
self.assertFalse(i.is_valid())
|
||||
|
||||
i = i = OverrideFieldPersonItem()
|
||||
i['name'] = 'John'
|
||||
i['age'] = '22'
|
||||
self.assertTrue(i.is_valid())
|
||||
|
||||
def test_default_field_values(self):
|
||||
i = BasePersonItem()
|
||||
person = i.save(commit=False)
|
||||
self.assertEqual(person.name, 'Robot')
|
@ -1,17 +0,0 @@
|
||||
from django.db import models
|
||||
|
||||
|
||||
class Person(models.Model):
|
||||
name = models.CharField(max_length=255, default='Robot')
|
||||
age = models.IntegerField()
|
||||
|
||||
class Meta:
|
||||
app_label = 'test_djangoitem'
|
||||
|
||||
class IdentifiedPerson(models.Model):
|
||||
identifier = models.PositiveIntegerField(primary_key=True)
|
||||
name = models.CharField(max_length=255)
|
||||
age = models.IntegerField()
|
||||
|
||||
class Meta:
|
||||
app_label = 'test_djangoitem'
|
@ -1,8 +0,0 @@
|
||||
DATABASES = {
|
||||
'default': {
|
||||
'ENGINE': 'django.db.backends.sqlite3',
|
||||
'NAME': ':memory:',
|
||||
}
|
||||
}
|
||||
|
||||
SECRET_KEY = 'top-secret'
|
Loading…
x
Reference in New Issue
Block a user