mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-21 05:33:16 +00:00
Merge pull request #3899 from elacuesta/py3_single_argument_processors
[Py3] Item loaders: allow single argument functions as processors
This commit is contained in:
commit
8a1c99676e
@ -142,20 +142,6 @@ accept one (and only one) positional argument, which will be an iterable.
|
||||
containing the collected values (for that field). The result of the output
|
||||
processors is the value that will be finally assigned to the item.
|
||||
|
||||
If you want to use a plain function as a processor, make sure it receives
|
||||
``self`` as the first argument::
|
||||
|
||||
def lowercase_processor(self, values):
|
||||
for v in values:
|
||||
yield v.lower()
|
||||
|
||||
class MyItemLoader(ItemLoader):
|
||||
name_in = lowercase_processor
|
||||
|
||||
This is because whenever a function is assigned as a class variable, it becomes
|
||||
a method and would be passed the instance as the the first argument when being
|
||||
called. See `this answer on stackoverflow`_ for more details.
|
||||
|
||||
The other thing you need to keep in mind is that the values returned by input
|
||||
processors are collected internally (in lists) and then passed to output
|
||||
processors to populate the fields.
|
||||
@ -163,7 +149,7 @@ processors to populate the fields.
|
||||
Last, but not least, Scrapy comes with some :ref:`commonly used processors
|
||||
<topics-loaders-available-processors>` built-in for convenience.
|
||||
|
||||
.. _this answer on stackoverflow: https://stackoverflow.com/a/35322635
|
||||
|
||||
|
||||
Declaring Item Loaders
|
||||
======================
|
||||
|
@ -4,8 +4,7 @@ Item Loader
|
||||
See documentation in docs/topics/loaders.rst
|
||||
"""
|
||||
from collections import defaultdict
|
||||
|
||||
import six
|
||||
from contextlib import suppress
|
||||
|
||||
from scrapy.item import Item
|
||||
from scrapy.loader.common import wrap_loader_context
|
||||
@ -15,6 +14,17 @@ from scrapy.utils.misc import arg_to_iter, extract_regex
|
||||
from scrapy.utils.python import flatten
|
||||
|
||||
|
||||
def unbound_method(method):
|
||||
"""
|
||||
Allow to use single-argument functions as input or output processors
|
||||
(no need to define an unused first 'self' argument)
|
||||
"""
|
||||
with suppress(AttributeError):
|
||||
if '.' not in method.__qualname__:
|
||||
return method.__func__
|
||||
return method
|
||||
|
||||
|
||||
class ItemLoader(object):
|
||||
|
||||
default_item_class = Item
|
||||
@ -72,7 +82,7 @@ class ItemLoader(object):
|
||||
if value is None:
|
||||
return
|
||||
if not field_name:
|
||||
for k, v in six.iteritems(value):
|
||||
for k, v in value.items():
|
||||
self._add_value(k, v)
|
||||
else:
|
||||
self._add_value(field_name, value)
|
||||
@ -82,7 +92,7 @@ class ItemLoader(object):
|
||||
if value is None:
|
||||
return
|
||||
if not field_name:
|
||||
for k, v in six.iteritems(value):
|
||||
for k, v in value.items():
|
||||
self._replace_value(k, v)
|
||||
else:
|
||||
self._replace_value(field_name, value)
|
||||
@ -142,14 +152,14 @@ class ItemLoader(object):
|
||||
if not proc:
|
||||
proc = self._get_item_field_attr(field_name, 'input_processor',
|
||||
self.default_input_processor)
|
||||
return proc
|
||||
return unbound_method(proc)
|
||||
|
||||
def get_output_processor(self, field_name):
|
||||
proc = getattr(self, '%s_out' % field_name, None)
|
||||
if not proc:
|
||||
proc = self._get_item_field_attr(field_name, 'output_processor',
|
||||
self.default_output_processor)
|
||||
return proc
|
||||
return unbound_method(proc)
|
||||
|
||||
def _process_input_value(self, field_name, value):
|
||||
proc = self.get_input_processor(field_name)
|
||||
|
@ -994,5 +994,53 @@ class SelectJmesTestCase(unittest.TestCase):
|
||||
)
|
||||
|
||||
|
||||
# Functions as processors
|
||||
|
||||
def function_processor_strip(iterable):
|
||||
return [x.strip() for x in iterable]
|
||||
|
||||
|
||||
def function_processor_upper(iterable):
|
||||
return [x.upper() for x in iterable]
|
||||
|
||||
|
||||
class FunctionProcessorItem(Item):
|
||||
foo = Field(
|
||||
input_processor=function_processor_strip,
|
||||
output_processor=function_processor_upper,
|
||||
)
|
||||
|
||||
|
||||
class FunctionProcessorItemLoader(ItemLoader):
|
||||
default_item_class = FunctionProcessorItem
|
||||
|
||||
|
||||
class FunctionProcessorDictLoader(ItemLoader):
|
||||
default_item_class = dict
|
||||
foo_in = function_processor_strip
|
||||
foo_out = function_processor_upper
|
||||
|
||||
|
||||
class FunctionProcessorTestCase(unittest.TestCase):
|
||||
|
||||
def test_processor_defined_in_item(self):
|
||||
lo = FunctionProcessorItemLoader()
|
||||
lo.add_value('foo', ' bar ')
|
||||
lo.add_value('foo', [' asdf ', ' qwerty '])
|
||||
self.assertEqual(
|
||||
dict(lo.load_item()),
|
||||
{'foo': ['BAR', 'ASDF', 'QWERTY']}
|
||||
)
|
||||
|
||||
def test_processor_defined_in_item_loader(self):
|
||||
lo = FunctionProcessorDictLoader()
|
||||
lo.add_value('foo', ' bar ')
|
||||
lo.add_value('foo', [' asdf ', ' qwerty '])
|
||||
self.assertEqual(
|
||||
dict(lo.load_item()),
|
||||
{'foo': ['BAR', 'ASDF', 'QWERTY']}
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
Loading…
x
Reference in New Issue
Block a user