Merge branch 'master' into remove-six-code

2025-02-22 06:52:53 +00:00 · 2019-11-25 10:34:21 +01:00 · 2019-11-25 10:34:21 +01:00 · 6d9ed6146d
commit 6d9ed6146d
parent 55cc5c9068 8a1c99676e
3 changed files with 63 additions and 17 deletions
--- a/docs/topics/loaders.rst
+++ b/docs/topics/loaders.rst
@ -142,20 +142,6 @@ accept one (and only one) positional argument, which will be an iterable.
   containing the collected values (for that field). The result of the output
   processors is the value that will be finally assigned to the item.
 If you want to use a plain function as a processor, make sure it receives
 ``self`` as the first argument::
    def lowercase_processor(self, values):
        for v in values:
            yield v.lower()
    class MyItemLoader(ItemLoader):
        name_in = lowercase_processor
 This is because whenever a function is assigned as a class variable, it becomes
 a method and would be passed the instance as the the first argument when being
 called. See `this answer on stackoverflow`_ for more details.
 The other thing you need to keep in mind is that the values returned by input
 processors are collected internally (in lists) and then passed to output
 processors to populate the fields.
@ -163,7 +149,7 @@ processors to populate the fields.
 Last, but not least, Scrapy comes with some :ref:`commonly used processors
 <topics-loaders-available-processors>` built-in for convenience.
-.. _this answer on stackoverflow: https://stackoverflow.com/a/35322635
+
 Declaring Item Loaders
 ======================
--- a/scrapy/loader/init.py
+++ b/scrapy/loader/init.py
@ -4,6 +4,7 @@ Item Loader
 See documentation in docs/topics/loaders.rst
 """
 from collections import defaultdict
 from contextlib import suppress
 from scrapy.item import Item
 from scrapy.loader.common import wrap_loader_context
@ -13,6 +14,17 @@ from scrapy.utils.misc import arg_to_iter, extract_regex
 from scrapy.utils.python import flatten
 def unbound_method(method):
    """
    Allow to use single-argument functions as input or output processors
    (no need to define an unused first 'self' argument)
    """
    with suppress(AttributeError):
        if '.' not in method.__qualname__:
            return method.__func__
    return method
 class ItemLoader(object):
    default_item_class = Item
@ -140,14 +152,14 @@ class ItemLoader(object):
        if not proc:
            proc = self._get_item_field_attr(field_name, 'input_processor',
                                             self.default_input_processor)
-        return proc
+        return unbound_method(proc)
    def get_output_processor(self, field_name):
        proc = getattr(self, '%s_out' % field_name, None)
        if not proc:
            proc = self._get_item_field_attr(field_name, 'output_processor',
                                             self.default_output_processor)
-        return proc
+        return unbound_method(proc)
    def _process_input_value(self, field_name, value):
        proc = self.get_input_processor(field_name)
--- a/tests/test_loader.py
+++ b/tests/test_loader.py
@ -992,5 +992,53 @@ class SelectJmesTestCase(unittest.TestCase):
            )
 # Functions as processors
 def function_processor_strip(iterable):
    return [x.strip() for x in iterable]
 def function_processor_upper(iterable):
    return [x.upper() for x in iterable]
 class FunctionProcessorItem(Item):
    foo = Field(
        input_processor=function_processor_strip,
        output_processor=function_processor_upper,
    )
 class FunctionProcessorItemLoader(ItemLoader):
    default_item_class = FunctionProcessorItem
 class FunctionProcessorDictLoader(ItemLoader):
    default_item_class = dict
    foo_in = function_processor_strip
    foo_out = function_processor_upper
 class FunctionProcessorTestCase(unittest.TestCase):
    def test_processor_defined_in_item(self):
        lo = FunctionProcessorItemLoader()
        lo.add_value('foo', '  bar  ')
        lo.add_value('foo', ['  asdf  ', '  qwerty  '])
        self.assertEqual(
            dict(lo.load_item()),
            {'foo': ['BAR', 'ASDF', 'QWERTY']}
        )
    def test_processor_defined_in_item_loader(self):
        lo = FunctionProcessorDictLoader()
        lo.add_value('foo', '  bar  ')
        lo.add_value('foo', ['  asdf  ', '  qwerty  '])
        self.assertEqual(
            dict(lo.load_item()),
            {'foo': ['BAR', 'ASDF', 'QWERTY']}
        )
 if __name__ == "__main__":
    unittest.main()