Merge pull request #461 from redapple/selectorloader

Add "unified" SelectorItemLoader (supports .add_css() and .add_xpath())
2025-02-24 01:23:57 +00:00 · 2013-11-22 12:10:39 -08:00 · 2013-11-22 12:10:39 -08:00 · 36c8da2ad6
commit 36c8da2ad6
parent 545f2601b0 14f5817d6b
3 changed files with 297 additions and 115 deletions
--- a/docs/topics/loaders.rst
+++ b/docs/topics/loaders.rst
@ -39,15 +39,15 @@ Here is a typical Item Loader usage in a :ref:`Spider <topics-spiders>`, using
 the :ref:`Product item <topics-items-declaring>` declared in the :ref:`Items
 chapter <topics-items>`::

-    from scrapy.contrib.loader import XPathItemLoader
+    from scrapy.contrib.loader import ItemLoader
    from myproject.items import Product

    def parse(self, response):
-        l = XPathItemLoader(item=Product(), response=response)
+        l = ItemLoader(item=Product(), response=response)
        l.add_xpath('name', '//div[@class="product_name"]')
        l.add_xpath('name', '//div[@class="product_title"]')
        l.add_xpath('price', '//p[@id="price"]')
-        l.add_xpath('stock', '//p[@id="stock"]')
+        l.add_css('stock', 'p#stock]')
        l.add_value('last_updated', 'today') # you can also use literal values
        return l.load_item()

@ -58,17 +58,18 @@ extracted from two different XPath locations in the page:
 2. ``//div[@class="product_title"]``

 In other words, data is being collected by extracting it from two XPath
-locations, using the :meth:`~XPathItemLoader.add_xpath` method. This is the
+locations, using the :meth:`~ItemLoader.add_xpath` method. This is the
 data that will be assigned to the ``name`` field later.

-Afterwords, similar calls are used for ``price`` and ``stock`` fields, and
-finally the ``last_update`` field is populated directly with a literal value
+Afterwords, similar calls are used for ``price`` and ``stock`` fields
+(the later using a CSS selector with the :meth:`~ItemLoader.add_css` method),
+and finally the ``last_update`` field is populated directly with a literal value
 (``today``) using a different method: :meth:`~ItemLoader.add_value`.

 Finally, when all data is collected, the :meth:`ItemLoader.load_item` method is
 called which actually populates and returns the item populated with the data
-previously extracted and collected with the :meth:`~XPathItemLoader.add_xpath`
-and :meth:`~ItemLoader.add_value` calls.
+previously extracted and collected with the :meth:`~ItemLoader.add_xpath`,
+:meth:`~ItemLoader.add_css`, and :meth:`~ItemLoader.add_value` calls.

 .. _topics-loaders-processors:

@ -77,7 +78,7 @@ Input and Output processors

 An Item Loader contains one input processor and one output processor for each
 (item) field. The input processor processes the extracted data as soon as it's
-received (through the :meth:`~XPathItemLoader.add_xpath` or
+received (through the :meth:`~ItemLoader.add_xpath`, :meth:`~ItemLoader.add_css` or
 :meth:`~ItemLoader.add_value` methods) and the result of the input processor is
 collected and kept inside the ItemLoader. After collecting all data, the
 :meth:`ItemLoader.load_item` method is called to populate and get the populated
@ -89,11 +90,12 @@ assigned to the item.
 Let's see an example to illustrate how the input and output processors are
 called for a particular field (the same applies for any other field)::

-    l = XPathItemLoader(Product(), some_xpath_selector)
+    l = ItemLoader(Product(), some_selector)
    l.add_xpath('name', xpath1) # (1)
    l.add_xpath('name', xpath2) # (2)
-    l.add_value('name', 'test') # (3)
-    return l.load_item() # (4)
+    l.add_css('name', css) # (3)
+    l.add_value('name', 'test') # (4)
+    return l.load_item() # (5)

 So what happens is:

@ -105,16 +107,23 @@ So what happens is:
   processor* used in (1). The result of the input processor is appended to the
   data collected in (1) (if any).

-3. This case is similar to the previous ones, except that the value to be
-   collected is assigned directly, instead of being extracted from a XPath.
+3. This case is similar to the previous ones, except that the data is extracted
+   from the ``css`` CSS selector, and passed through the same *input
+   processor* used in (1) and (2). The result of the input processor is appended to the
+   data collected in (1) and (2) (if any).
+
+4. This case is also similar to the previous ones, except that the value to be
+   collected is assigned directly, instead of being extracted from a XPath
+   expression or a CSS selector.
   However, the value is still passed through the input processors. In this
   case, since the value is not iterable it is converted to an iterable of a
   single element before passing it to the input processor, because input
   processor always receive iterables.

-4. The data collected in (1) and (2) is passed through the *output processor* of
-   the ``name`` field. The result of the output processor is the value assigned to
-   the ``name`` field in the item.
+5. The data collected in steps (1), (2), (3) and (4) is passed through
+   the *output processor* of the ``name`` field.
+   The result of the output processor is the value assigned to the ``name``
+   field in the item.

 It's worth noticing that processors are just callable objects, which are called
 with the data to be parsed, and return a parsed value. So you can use any
@ -246,14 +255,35 @@ There are several ways to modify Item Loader context values:
 ItemLoader objects
 ==================

-.. class:: ItemLoader([item], \**kwargs)
+.. class:: ItemLoader([item, selector, response], \**kwargs)

    Return a new Item Loader for populating the given Item. If no item is
    given, one is instantiated automatically using the class in
    :attr:`default_item_class`.

-    The item and the remaining keyword arguments are assigned to the Loader
-    context (accessible through the :attr:`context` attribute).
+    When instantiated with a `selector` or a `response` parameters
+    the :class:`ItemLoader` class provides convenient mechanisms for extracting
+    data from web pages using :ref:`selectors <topics-selectors>`.
+
+    :param item: The item instance to populate using subsequent calls to
+        :meth:`~ItemLoader.add_xpath`, :meth:`~ItemLoader.add_css`,
+        or :meth:`~ItemLoader.add_value`.
+    :type item: :class:`~scrapy.item.Item` object
+
+    :param selector: The selector to extract data from, when using the
+        :meth:`add_xpath` (resp. :meth:`add_css`) or :meth:`replace_xpath`
+        (resp. :meth:`replace_css`) method.
+    :type selector: :class:`~scrapy.selector.Selector` object
+
+    :param response: The response used to construct the selector using the
+        :attr:`default_selector_class`, unless the selector argument is given,
+        in which case this argument is ignored.
+    :type response: :class:`~scrapy.http.Response` object
+
+    The item, selector, response and the remaining keyword arguments are
+    assigned to the Loader context (accessible through the :attr:`context` attribute).
+
+    :class:`ItemLoader` instances have the following methods:

    .. method:: get_value(value, \*processors, \**kwargs)

@ -299,6 +329,91 @@ ItemLoader objects

        Similar to :meth:`add_value` but replaces the collected data with the
        new value instead of adding it.
+    .. method:: get_xpath(xpath, \*processors, \**kwargs)
+
+        Similar to :meth:`ItemLoader.get_value` but receives an XPath instead of a
+        value, which is used to extract a list of unicode strings from the
+        selector associated with this :class:`ItemLoader`.
+
+        :param xpath: the XPath to extract data from
+        :type xpath: str
+
+        :param re: a regular expression to use for extracting data from the
+            selected XPath region
+        :type re: str or compiled regex
+
+        Examples::
+
+            # HTML snippet: <p class="product-name">Color TV</p>
+            loader.get_xpath('//p[@class="product-name"]')
+            # HTML snippet: <p id="price">the price is $1200</p>
+            loader.get_xpath('//p[@id="price"]', TakeFirst(), re='the price is (.*)')
+
+    .. method:: add_xpath(field_name, xpath, \*processors, \**kwargs)
+
+        Similar to :meth:`ItemLoader.add_value` but receives an XPath instead of a
+        value, which is used to extract a list of unicode strings from the
+        selector associated with this :class:`ItemLoader`.
+
+        See :meth:`get_xpath` for ``kwargs``.
+
+        :param xpath: the XPath to extract data from
+        :type xpath: str
+
+        Examples::
+
+            # HTML snippet: <p class="product-name">Color TV</p>
+            loader.add_xpath('name', '//p[@class="product-name"]')
+            # HTML snippet: <p id="price">the price is $1200</p>
+            loader.add_xpath('price', '//p[@id="price"]', re='the price is (.*)')
+
+    .. method:: replace_xpath(field_name, xpath, \*processors, \**kwargs)
+
+        Similar to :meth:`add_xpath` but replaces collected data instead of
+        adding it.
+
+    .. method:: get_css(css, \*processors, \**kwargs)
+
+        Similar to :meth:`ItemLoader.get_value` but receives a CSS selector
+        instead of a value, which is used to extract a list of unicode strings
+        from the selector associated with this :class:`ItemLoader`.
+
+        :param css: the CSS selector to extract data from
+        :type css: str
+
+        :param re: a regular expression to use for extracting data from the
+            selected CSS region
+        :type re: str or compiled regex
+
+        Examples::
+
+            # HTML snippet: <p class="product-name">Color TV</p>
+            loader.get_css('p.product-name')
+            # HTML snippet: <p id="price">the price is $1200</p>
+            loader.get_css('p#price', TakeFirst(), re='the price is (.*)')
+
+    .. method:: add_css(field_name, css, \*processors, \**kwargs)
+
+        Similar to :meth:`ItemLoader.add_value` but receives a CSS selector
+        instead of a value, which is used to extract a list of unicode strings
+        from the selector associated with this :class:`ItemLoader`.
+
+        See :meth:`get_css` for ``kwargs``.
+
+        :param css: the CSS selector to extract data from
+        :type css: str
+
+        Examples::
+
+            # HTML snippet: <p class="product-name">Color TV</p>
+            loader.add_css('name', 'p.product-name')
+            # HTML snippet: <p id="price">the price is $1200</p>
+            loader.add_css('price', 'p#price', re='the price is (.*)')
+
+    .. method:: replace_css(field_name, css, \*processors, \**kwargs)
+
+        Similar to :meth:`add_css` but replaces collected data instead of
+        adding it.

    .. method:: load_item()

@ -324,6 +439,8 @@ ItemLoader objects

        Return the output processor for the given field.

+    :class:`ItemLoader` instances have the following attributes:
+
    .. attribute:: item

        The :class:`~scrapy.item.Item` object being parsed by this Item Loader.
@ -348,71 +465,10 @@ ItemLoader objects
        The default output processor to use for those fields which don't specify
        one.

-.. class:: XPathItemLoader([item, selector, response], \**kwargs)
-
-    The :class:`XPathItemLoader` class extends the :class:`ItemLoader` class
-    providing more convenient mechanisms for extracting data from web pages
-    using :ref:`selectors <topics-selectors>`.
-
-    :class:`XPathItemLoader` objects accept two more additional parameters in
-    their constructors:
-
-    :param selector: The selector to extract data from, when using the
-        :meth:`add_xpath` or :meth:`replace_xpath` method.
-    :type selector: :class:`~scrapy.selector.Selector` object
-
-    :param response: The response used to construct the selector using the
-        :attr:`default_selector_class`, unless the selector argument is given,
-        in which case this argument is ignored.
-    :type response: :class:`~scrapy.http.Response` object
-
-    .. method:: get_xpath(xpath, \*processors, \**kwargs)
-
-        Similar to :meth:`ItemLoader.get_value` but receives an XPath instead of a
-        value, which is used to extract a list of unicode strings from the
-        selector associated with this :class:`XPathItemLoader`.
-
-        :param xpath: the XPath to extract data from
-        :type xpath: str
-
-        :param re: a regular expression to use for extracting data from the
-            selected XPath region
-        :type re: str or compiled regex
-
-        Examples::
-
-            # HTML snippet: <p class="product-name">Color TV</p>
-            loader.get_xpath('//p[@class="product-name"]')
-            # HTML snippet: <p id="price">the price is $1200</p>
-            loader.get_xpath('//p[@id="price"]', TakeFirst(), re='the price is (.*)')
-
-    .. method:: add_xpath(field_name, xpath, \*processors, \**kwargs)
-
-        Similar to :meth:`ItemLoader.add_value` but receives an XPath instead of a
-        value, which is used to extract a list of unicode strings from the
-        selector associated with this :class:`XPathItemLoader`.
-
-        See :meth:`get_xpath` for ``kwargs``.
-
-        :param xpath: the XPath to extract data from
-        :type xpath: str
-
-        Examples::
-
-            # HTML snippet: <p class="product-name">Color TV</p>
-            loader.add_xpath('name', '//p[@class="product-name"]')
-            # HTML snippet: <p id="price">the price is $1200</p>
-            loader.add_xpath('price', '//p[@id="price"]', re='the price is (.*)')
-
-    .. method:: replace_xpath(field_name, xpath, \*processors, \**kwargs)
-
-        Similar to :meth:`add_xpath` but replaces collected data instead of
-        adding it.
-
    .. attribute:: default_selector_class

        The class used to construct the :attr:`selector` of this
-        :class:`XPathItemLoader`, if only a response is given in the constructor.
+        :class:`ItemLoader`, if only a response is given in the constructor.
        If a selector is given in the constructor this attribute is ignored.
        This attribute is sometimes overridden in subclasses.

--- a/scrapy/contrib/loader/init.py
+++ b/scrapy/contrib/loader/init.py
@ -11,16 +11,23 @@ from scrapy.item import Item
 from scrapy.selector import Selector
 from scrapy.utils.misc import arg_to_iter, extract_regex
 from scrapy.utils.python import flatten
+from scrapy.utils.decorator import deprecated
 from .common import wrap_loader_context
 from .processor import Identity

+
 class ItemLoader(object):

    default_item_class = Item
    default_input_processor = Identity()
    default_output_processor = Identity()
+    default_selector_class = Selector

-    def __init__(self, item=None, **context):
+    def __init__(self, item=None, selector=None, response=None, **context):
+        if selector is None and response is not None:
+            selector = self.default_selector_class(response)
+        self.selector = selector
+        context.update(selector=selector, response=response)
        if item is None:
            item = self.default_item_class()
        self.item = context['item'] = item
@ -114,32 +121,56 @@ class ItemLoader(object):
            value = default
        return value

-class XPathItemLoader(ItemLoader):
-
-    default_selector_class = Selector
-
-    def __init__(self, item=None, selector=None, response=None, **context):
-        if selector is None and response is None:
-            raise RuntimeError("%s must be instantiated with a selector " \
-                "or response" % self.__class__.__name__)
-        if selector is None:
-            selector = self.default_selector_class(response)
-        self.selector = selector
-        context.update(selector=selector, response=response)
-        super(XPathItemLoader, self).__init__(item, **context)
+    def _check_selector_method(self):
+        if self.selector is None:
+            raise RuntimeError("To use XPath or CSS selectors, "
+                "%s must be instantiated with a selector "
+                "or a response" % self.__class__.__name__)

    def add_xpath(self, field_name, xpath, *processors, **kw):
-        values = self._get_values(xpath, **kw)
+        values = self._get_xpathvalues(xpath, **kw)
        self.add_value(field_name, values, *processors, **kw)

    def replace_xpath(self, field_name, xpath, *processors, **kw):
-        values = self._get_values(xpath, **kw)
+        values = self._get_xpathvalues(xpath, **kw)
        self.replace_value(field_name, values, *processors, **kw)

    def get_xpath(self, xpath, *processors, **kw):
-        values = self._get_values(xpath, **kw)
+        values = self._get_xpathvalues(xpath, **kw)
        return self.get_value(values, *processors, **kw)

+    @deprecated(use_instead='._get_xpathvalues()')
    def _get_values(self, xpaths, **kw):
+        return self._get_xpathvalues(xpaths, **kw)
+
+    def _get_xpathvalues(self, xpaths, **kw):
+        self._check_selector_method()
        xpaths = arg_to_iter(xpaths)
        return flatten([self.selector.xpath(xpath).extract() for xpath in xpaths])
+
+    def add_css(self, field_name, css, *processors, **kw):
+        values = self._get_cssvalues(css, **kw)
+        self.add_value(field_name, values, *processors, **kw)
+
+    def replace_css(self, field_name, css, *processors, **kw):
+        values = self._get_cssvalues(css, **kw)
+        self.replace_value(field_name, values, *processors, **kw)
+
+    def get_css(self, css, *processors, **kw):
+        values = self._get_cssvalues(css, **kw)
+        return self.get_value(values, *processors, **kw)
+
+    def _get_cssvalues(self, csss, **kw):
+        self._check_selector_method()
+        csss = arg_to_iter(csss)
+        return flatten([self.selector.css(css).extract() for css in csss])
+
+
+class XPathItemLoader(ItemLoader):
+    def __init__(self, *a, **kw):
+        import warnings
+        from scrapy.exceptions import ScrapyDeprecationWarning
+        warnings.warn('%s is deprecated, instanciate scrapy.contrib.loader.ItemLoader '
+                      'instead' % type(self).__name__,
+                      category=ScrapyDeprecationWarning, stacklevel=1)
+        super(XPathItemLoader, self).__init__(*a, **kw)
--- a/scrapy/tests/test_contrib_loader.py
+++ b/scrapy/tests/test_contrib_loader.py
@ -1,6 +1,6 @@
 import unittest

-from scrapy.contrib.loader import ItemLoader, XPathItemLoader
+from scrapy.contrib.loader import ItemLoader
 from scrapy.contrib.loader.processor import Join, Identity, TakeFirst, \
    Compose, MapCompose
 from scrapy.item import Item, Field
@ -38,7 +38,7 @@ def processor_with_args(value, other=None, loader_context=None):
    return value


-class ItemLoaderTest(unittest.TestCase):
+class BasicItemLoaderTest(unittest.TestCase):

    def test_load_item_using_default_loader(self):
        i = TestItem()
@ -367,37 +367,78 @@ class ProcessorsTest(unittest.TestCase):
                         [u'HELLO', u'THIS', u'IS', u'SCRAPY'])


-class TestXPathItemLoader(XPathItemLoader):
-    default_item_class = TestItem
-    name_in = MapCompose(lambda v: v.title())
+class SelectortemLoaderTest(unittest.TestCase):
+    response = HtmlResponse(url="", body="""
+    <html>
+    <body>
+    <div id="id">marta</div>
+    <p>paragraph</p>
+    <a href="http://www.scrapy.org">homepage</a>
+    <img src="/images/logo.png" width="244" height="65" alt="Scrapy">
+    </body>
+    </html>
+    """)

-
-class XPathItemLoaderTest(unittest.TestCase):
-    response = HtmlResponse(url="", body='<html><body><div id="id">marta</div><p>paragraph</p></body></html>')
+    def test_constructor(self):
+        l = TestItemLoader()
+        self.assertEqual(l.selector, None)

    def test_constructor_errors(self):
-        self.assertRaises(RuntimeError, XPathItemLoader)
+        l = TestItemLoader()
+        self.assertRaises(RuntimeError, l.add_xpath, 'url', '//a/@href')
+        self.assertRaises(RuntimeError, l.replace_xpath, 'url', '//a/@href')
+        self.assertRaises(RuntimeError, l.get_xpath, '//a/@href')
+        self.assertRaises(RuntimeError, l.add_css, 'name', '#name::text')
+        self.assertRaises(RuntimeError, l.replace_css, 'name', '#name::text')
+        self.assertRaises(RuntimeError, l.get_css, '#name::text')

    def test_constructor_with_selector(self):
        sel = Selector(text=u"<html><body><div>marta</div></body></html>")
-        l = TestXPathItemLoader(selector=sel)
+        l = TestItemLoader(selector=sel)
        self.assert_(l.selector is sel)
+
        l.add_xpath('name', '//div/text()')
        self.assertEqual(l.get_output_value('name'), [u'Marta'])

+    def test_constructor_with_selector_css(self):
+        sel = Selector(text=u"<html><body><div>marta</div></body></html>")
+        l = TestItemLoader(selector=sel)
+        self.assert_(l.selector is sel)
+
+        l.add_css('name', 'div::text')
+        self.assertEqual(l.get_output_value('name'), [u'Marta'])
+
    def test_constructor_with_response(self):
-        l = TestXPathItemLoader(response=self.response)
+        l = TestItemLoader(response=self.response)
        self.assert_(l.selector)
+
        l.add_xpath('name', '//div/text()')
        self.assertEqual(l.get_output_value('name'), [u'Marta'])

+    def test_constructor_with_response_css(self):
+        l = TestItemLoader(response=self.response)
+        self.assert_(l.selector)
+
+        l.add_css('name', 'div::text')
+        self.assertEqual(l.get_output_value('name'), [u'Marta'])
+
+        l.add_css('url', 'a::attr(href)')
+        self.assertEqual(l.get_output_value('url'), [u'http://www.scrapy.org'])
+
+        # combining/accumulating CSS selectors and XPath expressions
+        l.add_xpath('name', '//div/text()')
+        self.assertEqual(l.get_output_value('name'), [u'Marta', u'Marta'])
+
+        l.add_xpath('url', '//img/@src')
+        self.assertEqual(l.get_output_value('url'), [u'http://www.scrapy.org', u'/images/logo.png'])
+
    def test_add_xpath_re(self):
-        l = TestXPathItemLoader(response=self.response)
+        l = TestItemLoader(response=self.response)
        l.add_xpath('name', '//div/text()', re='ma')
        self.assertEqual(l.get_output_value('name'), [u'Ma'])

    def test_replace_xpath(self):
-        l = TestXPathItemLoader(response=self.response)
+        l = TestItemLoader(response=self.response)
        self.assert_(l.selector)
        l.add_xpath('name', '//div/text()')
        self.assertEqual(l.get_output_value('name'), [u'Marta'])
@ -408,7 +449,7 @@ class XPathItemLoaderTest(unittest.TestCase):
        self.assertEqual(l.get_output_value('name'), [u'Paragraph', 'Marta'])

    def test_get_xpath(self):
-        l = TestXPathItemLoader(response=self.response)
+        l = TestItemLoader(response=self.response)
        self.assertEqual(l.get_xpath('//p/text()'), [u'paragraph'])
        self.assertEqual(l.get_xpath('//p/text()', TakeFirst()), u'paragraph')
        self.assertEqual(l.get_xpath('//p/text()', TakeFirst(), re='pa'), u'pa')
@ -416,20 +457,74 @@ class XPathItemLoaderTest(unittest.TestCase):
        self.assertEqual(l.get_xpath(['//p/text()', '//div/text()']), [u'paragraph', 'marta'])

    def test_replace_xpath_multi_fields(self):
-        l = TestXPathItemLoader(response=self.response)
+        l = TestItemLoader(response=self.response)
        l.add_xpath(None, '//div/text()', TakeFirst(), lambda x: {'name': x})
        self.assertEqual(l.get_output_value('name'), [u'Marta'])
        l.replace_xpath(None, '//p/text()', TakeFirst(), lambda x: {'name': x})
        self.assertEqual(l.get_output_value('name'), [u'Paragraph'])

    def test_replace_xpath_re(self):
-        l = TestXPathItemLoader(response=self.response)
+        l = TestItemLoader(response=self.response)
        self.assert_(l.selector)
        l.add_xpath('name', '//div/text()')
        self.assertEqual(l.get_output_value('name'), [u'Marta'])
        l.replace_xpath('name', '//div/text()', re='ma')
        self.assertEqual(l.get_output_value('name'), [u'Ma'])

+    def test_add_css_re(self):
+        l = TestItemLoader(response=self.response)
+        l.add_css('name', 'div::text', re='ma')
+        self.assertEqual(l.get_output_value('name'), [u'Ma'])
+
+        l.add_css('url', 'a::attr(href)', re='http://(.+)')
+        self.assertEqual(l.get_output_value('url'), [u'www.scrapy.org'])
+
+    def test_replace_css(self):
+        l = TestItemLoader(response=self.response)
+        self.assert_(l.selector)
+        l.add_css('name', 'div::text')
+        self.assertEqual(l.get_output_value('name'), [u'Marta'])
+        l.replace_css('name', 'p::text')
+        self.assertEqual(l.get_output_value('name'), [u'Paragraph'])
+
+        l.replace_css('name', ['p::text', 'div::text'])
+        self.assertEqual(l.get_output_value('name'), [u'Paragraph', 'Marta'])
+
+        l.add_css('url', 'a::attr(href)', re='http://(.+)')
+        self.assertEqual(l.get_output_value('url'), [u'www.scrapy.org'])
+        l.replace_css('url', 'img::attr(src)')
+        self.assertEqual(l.get_output_value('url'), [u'/images/logo.png'])
+
+    def test_get_css(self):
+        l = TestItemLoader(response=self.response)
+        self.assertEqual(l.get_css('p::text'), [u'paragraph'])
+        self.assertEqual(l.get_css('p::text', TakeFirst()), u'paragraph')
+        self.assertEqual(l.get_css('p::text', TakeFirst(), re='pa'), u'pa')
+
+        self.assertEqual(l.get_css(['p::text', 'div::text']), [u'paragraph', 'marta'])
+        self.assertEqual(l.get_css(['a::attr(href)', 'img::attr(src)']),
+            [u'http://www.scrapy.org', u'/images/logo.png'])
+
+    def test_replace_css_multi_fields(self):
+        l = TestItemLoader(response=self.response)
+        l.add_css(None, 'div::text', TakeFirst(), lambda x: {'name': x})
+        self.assertEqual(l.get_output_value('name'), [u'Marta'])
+        l.replace_css(None, 'p::text', TakeFirst(), lambda x: {'name': x})
+        self.assertEqual(l.get_output_value('name'), [u'Paragraph'])
+
+        l.add_css(None, 'a::attr(href)', TakeFirst(), lambda x: {'url': x})
+        self.assertEqual(l.get_output_value('url'), [u'http://www.scrapy.org'])
+        l.replace_css(None, 'img::attr(src)', TakeFirst(), lambda x: {'url': x})
+        self.assertEqual(l.get_output_value('url'), [u'/images/logo.png'])
+
+    def test_replace_css_re(self):
+        l = TestItemLoader(response=self.response)
+        self.assert_(l.selector)
+        l.add_css('url', 'a::attr(href)')
+        self.assertEqual(l.get_output_value('url'), [u'http://www.scrapy.org'])
+        l.replace_css('url', 'a::attr(href)', re='http://www\.(.+)')
+        self.assertEqual(l.get_output_value('url'), [u'scrapy.org'])
+

 if __name__ == "__main__":
    unittest.main()