2018-01-16 16:14:35 -03:00
|
|
|
from sys import version_info
|
2016-09-01 15:39:16 +03:00
|
|
|
import pickle
|
|
|
|
|
2013-04-23 17:48:09 -03:00
|
|
|
from queuelib.tests import test_queue as t
|
2015-04-23 11:51:07 -03:00
|
|
|
from scrapy.squeues import MarshalFifoDiskQueue, MarshalLifoDiskQueue, PickleFifoDiskQueue, PickleLifoDiskQueue
|
2011-09-01 14:27:29 -03:00
|
|
|
from scrapy.item import Item, Field
|
|
|
|
from scrapy.http import Request
|
2015-04-20 23:21:41 -03:00
|
|
|
from scrapy.loader import ItemLoader
|
2018-01-16 16:14:35 -03:00
|
|
|
from scrapy.selector import Selector
|
2011-08-05 20:32:22 -03:00
|
|
|
|
2011-09-01 14:27:29 -03:00
|
|
|
class TestItem(Item):
|
|
|
|
name = Field()
|
|
|
|
|
2014-04-03 05:58:15 -03:00
|
|
|
def _test_procesor(x):
|
2011-09-01 14:27:29 -03:00
|
|
|
return x + x
|
|
|
|
|
|
|
|
class TestLoader(ItemLoader):
|
|
|
|
default_item_class = TestItem
|
2014-04-03 05:58:15 -03:00
|
|
|
name_out = staticmethod(_test_procesor)
|
2011-08-05 20:32:22 -03:00
|
|
|
|
2016-09-01 15:39:16 +03:00
|
|
|
def nonserializable_object_test(self):
|
2018-01-16 16:14:35 -03:00
|
|
|
q = self.queue()
|
2016-09-01 15:39:16 +03:00
|
|
|
try:
|
|
|
|
pickle.dumps(lambda x: x)
|
|
|
|
except Exception:
|
|
|
|
# Trigger Twisted bug #7989
|
|
|
|
import twisted.persisted.styles # NOQA
|
|
|
|
self.assertRaises(ValueError, q.push, lambda x: x)
|
|
|
|
else:
|
|
|
|
# Use a different unpickleable object
|
|
|
|
class A(object): pass
|
|
|
|
a = A()
|
|
|
|
a.__reduce__ = a.__reduce_ex__ = None
|
|
|
|
self.assertRaises(ValueError, q.push, a)
|
2018-01-16 16:14:35 -03:00
|
|
|
if version_info.major == 3 and version_info.minor >= 6:
|
|
|
|
# Selectors should fail (lxml.html.HtmlElement objects can't be pickled)
|
|
|
|
sel = Selector(text='<html><body><p>some text</p></body></html>')
|
|
|
|
self.assertRaises(ValueError, q.push, sel)
|
2016-09-01 15:39:16 +03:00
|
|
|
|
2011-09-23 13:03:07 -03:00
|
|
|
class MarshalFifoDiskQueueTest(t.FifoDiskQueueTest):
|
2011-08-05 20:32:22 -03:00
|
|
|
|
|
|
|
chunksize = 100000
|
|
|
|
|
|
|
|
def queue(self):
|
2015-09-09 15:55:55 -03:00
|
|
|
return MarshalFifoDiskQueue(self.qpath, chunksize=self.chunksize)
|
2011-08-05 20:32:22 -03:00
|
|
|
|
|
|
|
def test_serialize(self):
|
|
|
|
q = self.queue()
|
|
|
|
q.push('a')
|
|
|
|
q.push(123)
|
|
|
|
q.push({'a': 'dict'})
|
|
|
|
self.assertEqual(q.pop(), 'a')
|
|
|
|
self.assertEqual(q.pop(), 123)
|
|
|
|
self.assertEqual(q.pop(), {'a': 'dict'})
|
|
|
|
|
2016-09-01 15:39:16 +03:00
|
|
|
test_nonserializable_object = nonserializable_object_test
|
2011-09-01 19:40:44 -03:00
|
|
|
|
2011-09-23 13:03:07 -03:00
|
|
|
class ChunkSize1MarshalFifoDiskQueueTest(MarshalFifoDiskQueueTest):
|
2011-08-05 20:32:22 -03:00
|
|
|
chunksize = 1
|
|
|
|
|
2011-09-23 13:03:07 -03:00
|
|
|
class ChunkSize2MarshalFifoDiskQueueTest(MarshalFifoDiskQueueTest):
|
2011-08-05 20:32:22 -03:00
|
|
|
chunksize = 2
|
|
|
|
|
2011-09-23 13:03:07 -03:00
|
|
|
class ChunkSize3MarshalFifoDiskQueueTest(MarshalFifoDiskQueueTest):
|
2011-08-05 20:32:22 -03:00
|
|
|
chunksize = 3
|
|
|
|
|
2011-09-23 13:03:07 -03:00
|
|
|
class ChunkSize4MarshalFifoDiskQueueTest(MarshalFifoDiskQueueTest):
|
2011-08-05 20:32:22 -03:00
|
|
|
chunksize = 4
|
2011-09-01 14:27:29 -03:00
|
|
|
|
|
|
|
|
2011-09-23 13:03:07 -03:00
|
|
|
class PickleFifoDiskQueueTest(MarshalFifoDiskQueueTest):
|
2011-09-01 14:27:29 -03:00
|
|
|
|
|
|
|
chunksize = 100000
|
|
|
|
|
|
|
|
def queue(self):
|
2015-09-09 15:55:55 -03:00
|
|
|
return PickleFifoDiskQueue(self.qpath, chunksize=self.chunksize)
|
2011-09-01 14:27:29 -03:00
|
|
|
|
|
|
|
def test_serialize_item(self):
|
|
|
|
q = self.queue()
|
|
|
|
i = TestItem(name='foo')
|
|
|
|
q.push(i)
|
|
|
|
i2 = q.pop()
|
|
|
|
assert isinstance(i2, TestItem)
|
|
|
|
self.assertEqual(i, i2)
|
|
|
|
|
|
|
|
def test_serialize_loader(self):
|
|
|
|
q = self.queue()
|
|
|
|
l = TestLoader()
|
|
|
|
q.push(l)
|
|
|
|
l2 = q.pop()
|
|
|
|
assert isinstance(l2, TestLoader)
|
|
|
|
assert l2.default_item_class is TestItem
|
|
|
|
self.assertEqual(l2.name_out('x'), 'xx')
|
|
|
|
|
|
|
|
def test_serialize_request_recursive(self):
|
|
|
|
q = self.queue()
|
|
|
|
r = Request('http://www.example.com')
|
|
|
|
r.meta['request'] = r
|
|
|
|
q.push(r)
|
|
|
|
r2 = q.pop()
|
|
|
|
assert isinstance(r2, Request)
|
|
|
|
self.assertEqual(r.url, r2.url)
|
|
|
|
assert r2.meta['request'] is r2
|
|
|
|
|
2011-09-23 13:03:07 -03:00
|
|
|
class ChunkSize1PickleFifoDiskQueueTest(PickleFifoDiskQueueTest):
|
2011-09-01 14:27:29 -03:00
|
|
|
chunksize = 1
|
|
|
|
|
2011-09-23 13:03:07 -03:00
|
|
|
class ChunkSize2PickleFifoDiskQueueTest(PickleFifoDiskQueueTest):
|
2011-09-01 14:27:29 -03:00
|
|
|
chunksize = 2
|
|
|
|
|
2011-09-23 13:03:07 -03:00
|
|
|
class ChunkSize3PickleFifoDiskQueueTest(PickleFifoDiskQueueTest):
|
2011-09-01 14:27:29 -03:00
|
|
|
chunksize = 3
|
|
|
|
|
2011-09-23 13:03:07 -03:00
|
|
|
class ChunkSize4PickleFifoDiskQueueTest(PickleFifoDiskQueueTest):
|
2011-09-01 14:27:29 -03:00
|
|
|
chunksize = 4
|
|
|
|
|
2011-09-23 13:03:07 -03:00
|
|
|
|
|
|
|
class MarshalLifoDiskQueueTest(t.LifoDiskQueueTest):
|
|
|
|
|
|
|
|
def queue(self):
|
2015-09-09 15:55:55 -03:00
|
|
|
return MarshalLifoDiskQueue(self.qpath)
|
2011-09-23 13:03:07 -03:00
|
|
|
|
|
|
|
def test_serialize(self):
|
|
|
|
q = self.queue()
|
|
|
|
q.push('a')
|
|
|
|
q.push(123)
|
|
|
|
q.push({'a': 'dict'})
|
|
|
|
self.assertEqual(q.pop(), {'a': 'dict'})
|
|
|
|
self.assertEqual(q.pop(), 123)
|
|
|
|
self.assertEqual(q.pop(), 'a')
|
|
|
|
|
2016-09-01 15:39:16 +03:00
|
|
|
test_nonserializable_object = nonserializable_object_test
|
2011-09-23 13:03:07 -03:00
|
|
|
|
|
|
|
|
|
|
|
class PickleLifoDiskQueueTest(MarshalLifoDiskQueueTest):
|
|
|
|
|
|
|
|
def queue(self):
|
2015-09-09 15:55:55 -03:00
|
|
|
return PickleLifoDiskQueue(self.qpath)
|
2011-09-23 13:03:07 -03:00
|
|
|
|
|
|
|
def test_serialize_item(self):
|
|
|
|
q = self.queue()
|
|
|
|
i = TestItem(name='foo')
|
|
|
|
q.push(i)
|
|
|
|
i2 = q.pop()
|
|
|
|
assert isinstance(i2, TestItem)
|
|
|
|
self.assertEqual(i, i2)
|
|
|
|
|
|
|
|
def test_serialize_loader(self):
|
|
|
|
q = self.queue()
|
|
|
|
l = TestLoader()
|
|
|
|
q.push(l)
|
|
|
|
l2 = q.pop()
|
|
|
|
assert isinstance(l2, TestLoader)
|
|
|
|
assert l2.default_item_class is TestItem
|
|
|
|
self.assertEqual(l2.name_out('x'), 'xx')
|
|
|
|
|
|
|
|
def test_serialize_request_recursive(self):
|
|
|
|
q = self.queue()
|
|
|
|
r = Request('http://www.example.com')
|
|
|
|
r.meta['request'] = r
|
|
|
|
q.push(r)
|
|
|
|
r2 = q.pop()
|
|
|
|
assert isinstance(r2, Request)
|
|
|
|
self.assertEqual(r.url, r2.url)
|
|
|
|
assert r2.meta['request'] is r2
|