Merge pull request #461 from redapple/selectorloader

Add "unified" SelectorItemLoader (supports .add_css() and .add_xpath())
2025-02-24 07:43:48 +00:00 · 2013-11-22 12:10:39 -08:00 · 2013-11-22 12:10:39 -08:00 · 36c8da2ad6
commit 36c8da2ad6
parent 545f2601b0 14f5817d6b
3 changed files with 297 additions and 115 deletions
--- a/docs/topics/loaders.rst
+++ b/docs/topics/loaders.rst
@ -39,15 +39,15 @@ Here is a typical Item Loader usage in a :ref:`Spider <topics-spiders>`, using
 the :ref:`Product item <topics-items-declaring>` declared in the :ref:`Items
 chapter <topics-items>`::
-    from scrapy.contrib.loader import XPathItemLoader
+    from scrapy.contrib.loader import ItemLoader
    from myproject.items import Product
    def parse(self, response):
-        l = XPathItemLoader(item=Product(), response=response)
+        l = ItemLoader(item=Product(), response=response)
        l.add_xpath('name', '//div[@class="product_name"]')
        l.add_xpath('name', '//div[@class="product_title"]')
        l.add_xpath('price', '//p[@id="price"]')
-        l.add_xpath('stock', '//p[@id="stock"]')
+        l.add_css('stock', 'p#stock]')
        l.add_value('last_updated', 'today') # you can also use literal values
        return l.load_item()
@ -58,17 +58,18 @@ extracted from two different XPath locations in the page:
 2. ``//div[@class="product_title"]``
 In other words, data is being collected by extracting it from two XPath
-locations, using the :meth:`~XPathItemLoader.add_xpath` method. This is the
+locations, using the :meth:`~ItemLoader.add_xpath` method. This is the
 data that will be assigned to the ``name`` field later.
-Afterwords, similar calls are used for ``price`` and ``stock`` fields, and
+Afterwords, similar calls are used for ``price`` and ``stock`` fields
-finally the ``last_update`` field is populated directly with a literal value
+(the later using a CSS selector with the :meth:`~ItemLoader.add_css` method),
 and finally the ``last_update`` field is populated directly with a literal value
 (``today``) using a different method: :meth:`~ItemLoader.add_value`.
 Finally, when all data is collected, the :meth:`ItemLoader.load_item` method is
 called which actually populates and returns the item populated with the data
-previously extracted and collected with the :meth:`~XPathItemLoader.add_xpath`
+previously extracted and collected with the :meth:`~ItemLoader.add_xpath`,
-and :meth:`~ItemLoader.add_value` calls.
+:meth:`~ItemLoader.add_css`, and :meth:`~ItemLoader.add_value` calls.
 .. _topics-loaders-processors:
@ -77,7 +78,7 @@ Input and Output processors
 An Item Loader contains one input processor and one output processor for each
 (item) field. The input processor processes the extracted data as soon as it's
-received (through the :meth:`~XPathItemLoader.add_xpath` or
+received (through the :meth:`~ItemLoader.add_xpath`, :meth:`~ItemLoader.add_css` or
 :meth:`~ItemLoader.add_value` methods) and the result of the input processor is
 collected and kept inside the ItemLoader. After collecting all data, the
 :meth:`ItemLoader.load_item` method is called to populate and get the populated
@ -89,11 +90,12 @@ assigned to the item.
 Let's see an example to illustrate how the input and output processors are
 called for a particular field (the same applies for any other field)::
-    l = XPathItemLoader(Product(), some_xpath_selector)
+    l = ItemLoader(Product(), some_selector)
    l.add_xpath('name', xpath1) # (1)
    l.add_xpath('name', xpath2) # (2)
-    l.add_value('name', 'test') # (3)
+    l.add_css('name', css) # (3)
-    return l.load_item() # (4)
+    l.add_value('name', 'test') # (4)
    return l.load_item() # (5)
 So what happens is:
@ -105,16 +107,23 @@ So what happens is:
   processor* used in (1). The result of the input processor is appended to the
   data collected in (1) (if any).
-3. This case is similar to the previous ones, except that the value to be
+3. This case is similar to the previous ones, except that the data is extracted
-   collected is assigned directly, instead of being extracted from a XPath.
+   from the ``css`` CSS selector, and passed through the same *input
   processor* used in (1) and (2). The result of the input processor is appended to the
   data collected in (1) and (2) (if any).
 4. This case is also similar to the previous ones, except that the value to be
   collected is assigned directly, instead of being extracted from a XPath
   expression or a CSS selector.
   However, the value is still passed through the input processors. In this
   case, since the value is not iterable it is converted to an iterable of a
   single element before passing it to the input processor, because input
   processor always receive iterables.
-4. The data collected in (1) and (2) is passed through the *output processor* of
+5. The data collected in steps (1), (2), (3) and (4) is passed through
-   the ``name`` field. The result of the output processor is the value assigned to
+   the *output processor* of the ``name`` field.
-   the ``name`` field in the item.
+   The result of the output processor is the value assigned to the ``name``
   field in the item.
 It's worth noticing that processors are just callable objects, which are called
 with the data to be parsed, and return a parsed value. So you can use any
@ -246,14 +255,35 @@ There are several ways to modify Item Loader context values:
 ItemLoader objects
 ==================
-.. class:: ItemLoader([item], \**kwargs)
+.. class:: ItemLoader([item, selector, response], \**kwargs)
    Return a new Item Loader for populating the given Item. If no item is
    given, one is instantiated automatically using the class in
    :attr:`default_item_class`.
-    The item and the remaining keyword arguments are assigned to the Loader
+    When instantiated with a `selector` or a `response` parameters
-    context (accessible through the :attr:`context` attribute).
+    the :class:`ItemLoader` class provides convenient mechanisms for extracting
    data from web pages using :ref:`selectors <topics-selectors>`.
    :param item: The item instance to populate using subsequent calls to
        :meth:`~ItemLoader.add_xpath`, :meth:`~ItemLoader.add_css`,
        or :meth:`~ItemLoader.add_value`.
    :type item: :class:`~scrapy.item.Item` object
    :param selector: The selector to extract data from, when using the
        :meth:`add_xpath` (resp. :meth:`add_css`) or :meth:`replace_xpath`
        (resp. :meth:`replace_css`) method.
    :type selector: :class:`~scrapy.selector.Selector` object
    :param response: The response used to construct the selector using the
        :attr:`default_selector_class`, unless the selector argument is given,
        in which case this argument is ignored.
    :type response: :class:`~scrapy.http.Response` object
    The item, selector, response and the remaining keyword arguments are
    assigned to the Loader context (accessible through the :attr:`context` attribute).
    :class:`ItemLoader` instances have the following methods:
    .. method:: get_value(value, \*processors, \**kwargs)
@ -299,6 +329,91 @@ ItemLoader objects
        Similar to :meth:`add_value` but replaces the collected data with the
        new value instead of adding it.
    .. method:: get_xpath(xpath, \*processors, \**kwargs)
        Similar to :meth:`ItemLoader.get_value` but receives an XPath instead of a
        value, which is used to extract a list of unicode strings from the
        selector associated with this :class:`ItemLoader`.
        :param xpath: the XPath to extract data from
        :type xpath: str
        :param re: a regular expression to use for extracting data from the
            selected XPath region
        :type re: str or compiled regex
        Examples::
            # HTML snippet: <p class="product-name">Color TV</p>
            loader.get_xpath('//p[@class="product-name"]')
            # HTML snippet: <p id="price">the price is $1200</p>
            loader.get_xpath('//p[@id="price"]', TakeFirst(), re='the price is (.*)')
    .. method:: add_xpath(field_name, xpath, \*processors, \**kwargs)
        Similar to :meth:`ItemLoader.add_value` but receives an XPath instead of a
        value, which is used to extract a list of unicode strings from the
        selector associated with this :class:`ItemLoader`.
        See :meth:`get_xpath` for ``kwargs``.
        :param xpath: the XPath to extract data from
        :type xpath: str
        Examples::
            # HTML snippet: <p class="product-name">Color TV</p>
            loader.add_xpath('name', '//p[@class="product-name"]')
            # HTML snippet: <p id="price">the price is $1200</p>
            loader.add_xpath('price', '//p[@id="price"]', re='the price is (.*)')
    .. method:: replace_xpath(field_name, xpath, \*processors, \**kwargs)
        Similar to :meth:`add_xpath` but replaces collected data instead of
        adding it.
    .. method:: get_css(css, \*processors, \**kwargs)
        Similar to :meth:`ItemLoader.get_value` but receives a CSS selector
        instead of a value, which is used to extract a list of unicode strings
        from the selector associated with this :class:`ItemLoader`.
        :param css: the CSS selector to extract data from
        :type css: str
        :param re: a regular expression to use for extracting data from the
            selected CSS region
        :type re: str or compiled regex
        Examples::
            # HTML snippet: <p class="product-name">Color TV</p>
            loader.get_css('p.product-name')
            # HTML snippet: <p id="price">the price is $1200</p>
            loader.get_css('p#price', TakeFirst(), re='the price is (.*)')
    .. method:: add_css(field_name, css, \*processors, \**kwargs)
        Similar to :meth:`ItemLoader.add_value` but receives a CSS selector
        instead of a value, which is used to extract a list of unicode strings
        from the selector associated with this :class:`ItemLoader`.
        See :meth:`get_css` for ``kwargs``.
        :param css: the CSS selector to extract data from
        :type css: str
        Examples::
            # HTML snippet: <p class="product-name">Color TV</p>
            loader.add_css('name', 'p.product-name')
            # HTML snippet: <p id="price">the price is $1200</p>
            loader.add_css('price', 'p#price', re='the price is (.*)')
    .. method:: replace_css(field_name, css, \*processors, \**kwargs)
        Similar to :meth:`add_css` but replaces collected data instead of
        adding it.
    .. method:: load_item()
@ -324,6 +439,8 @@ ItemLoader objects
        Return the output processor for the given field.
    :class:`ItemLoader` instances have the following attributes:
    .. attribute:: item
        The :class:`~scrapy.item.Item` object being parsed by this Item Loader.
@ -348,71 +465,10 @@ ItemLoader objects
        The default output processor to use for those fields which don't specify
        one.
 .. class:: XPathItemLoader([item, selector, response], \**kwargs)
    The :class:`XPathItemLoader` class extends the :class:`ItemLoader` class
    providing more convenient mechanisms for extracting data from web pages
    using :ref:`selectors <topics-selectors>`.
    :class:`XPathItemLoader` objects accept two more additional parameters in
    their constructors:
    :param selector: The selector to extract data from, when using the
        :meth:`add_xpath` or :meth:`replace_xpath` method.
    :type selector: :class:`~scrapy.selector.Selector` object
    :param response: The response used to construct the selector using the
        :attr:`default_selector_class`, unless the selector argument is given,
        in which case this argument is ignored.
    :type response: :class:`~scrapy.http.Response` object
    .. method:: get_xpath(xpath, \*processors, \**kwargs)
        Similar to :meth:`ItemLoader.get_value` but receives an XPath instead of a
        value, which is used to extract a list of unicode strings from the
        selector associated with this :class:`XPathItemLoader`.
        :param xpath: the XPath to extract data from
        :type xpath: str
        :param re: a regular expression to use for extracting data from the
            selected XPath region
        :type re: str or compiled regex
        Examples::
            # HTML snippet: <p class="product-name">Color TV</p>
            loader.get_xpath('//p[@class="product-name"]')
            # HTML snippet: <p id="price">the price is $1200</p>
            loader.get_xpath('//p[@id="price"]', TakeFirst(), re='the price is (.*)')
    .. method:: add_xpath(field_name, xpath, \*processors, \**kwargs)
        Similar to :meth:`ItemLoader.add_value` but receives an XPath instead of a
        value, which is used to extract a list of unicode strings from the
        selector associated with this :class:`XPathItemLoader`.
        See :meth:`get_xpath` for ``kwargs``.
        :param xpath: the XPath to extract data from
        :type xpath: str
        Examples::
            # HTML snippet: <p class="product-name">Color TV</p>
            loader.add_xpath('name', '//p[@class="product-name"]')
            # HTML snippet: <p id="price">the price is $1200</p>
            loader.add_xpath('price', '//p[@id="price"]', re='the price is (.*)')
    .. method:: replace_xpath(field_name, xpath, \*processors, \**kwargs)
        Similar to :meth:`add_xpath` but replaces collected data instead of
        adding it.
    .. attribute:: default_selector_class
        The class used to construct the :attr:`selector` of this
-        :class:`XPathItemLoader`, if only a response is given in the constructor.
+        :class:`ItemLoader`, if only a response is given in the constructor.
        If a selector is given in the constructor this attribute is ignored.
        This attribute is sometimes overridden in subclasses.
--- a/scrapy/contrib/loader/init.py
+++ b/scrapy/contrib/loader/init.py
@ -11,16 +11,23 @@ from scrapy.item import Item
 from scrapy.selector import Selector
 from scrapy.utils.misc import arg_to_iter, extract_regex
 from scrapy.utils.python import flatten
 from scrapy.utils.decorator import deprecated
 from .common import wrap_loader_context
 from .processor import Identity
 class ItemLoader(object):
    default_item_class = Item
    default_input_processor = Identity()
    default_output_processor = Identity()
    default_selector_class = Selector
-    def __init__(self, item=None, **context):
+    def __init__(self, item=None, selector=None, response=None, **context):
        if selector is None and response is not None:
            selector = self.default_selector_class(response)
        self.selector = selector
        context.update(selector=selector, response=response)
        if item is None:
            item = self.default_item_class()
        self.item = context['item'] = item
@ -114,32 +121,56 @@ class ItemLoader(object):
            value = default
        return value
-class XPathItemLoader(ItemLoader):
+    def _check_selector_method(self):
-
+        if self.selector is None:
-    default_selector_class = Selector
+            raise RuntimeError("To use XPath or CSS selectors, "
-
+                "%s must be instantiated with a selector "
-    def __init__(self, item=None, selector=None, response=None, **context):
+                "or a response" % self.__class__.__name__)
        if selector is None and response is None:
            raise RuntimeError("%s must be instantiated with a selector " \
                "or response" % self.__class__.__name__)
        if selector is None:
            selector = self.default_selector_class(response)
        self.selector = selector
        context.update(selector=selector, response=response)
        super(XPathItemLoader, self).__init__(item, **context)
    def add_xpath(self, field_name, xpath, *processors, **kw):
-        values = self._get_values(xpath, **kw)
+        values = self._get_xpathvalues(xpath, **kw)
        self.add_value(field_name, values, *processors, **kw)
    def replace_xpath(self, field_name, xpath, *processors, **kw):
-        values = self._get_values(xpath, **kw)
+        values = self._get_xpathvalues(xpath, **kw)
        self.replace_value(field_name, values, *processors, **kw)
    def get_xpath(self, xpath, *processors, **kw):
-        values = self._get_values(xpath, **kw)
+        values = self._get_xpathvalues(xpath, **kw)
        return self.get_value(values, *processors, **kw)
    @deprecated(use_instead='._get_xpathvalues()')
    def _get_values(self, xpaths, **kw):
        return self._get_xpathvalues(xpaths, **kw)
    def _get_xpathvalues(self, xpaths, **kw):
        self._check_selector_method()
        xpaths = arg_to_iter(xpaths)
        return flatten([self.selector.xpath(xpath).extract() for xpath in xpaths])
    def add_css(self, field_name, css, *processors, **kw):
        values = self._get_cssvalues(css, **kw)
        self.add_value(field_name, values, *processors, **kw)
    def replace_css(self, field_name, css, *processors, **kw):
        values = self._get_cssvalues(css, **kw)
        self.replace_value(field_name, values, *processors, **kw)
    def get_css(self, css, *processors, **kw):
        values = self._get_cssvalues(css, **kw)
        return self.get_value(values, *processors, **kw)
    def _get_cssvalues(self, csss, **kw):
        self._check_selector_method()
        csss = arg_to_iter(csss)
        return flatten([self.selector.css(css).extract() for css in csss])
 class XPathItemLoader(ItemLoader):
    def __init__(self, *a, **kw):
        import warnings
        from scrapy.exceptions import ScrapyDeprecationWarning
        warnings.warn('%s is deprecated, instanciate scrapy.contrib.loader.ItemLoader '
                      'instead' % type(self).__name__,
                      category=ScrapyDeprecationWarning, stacklevel=1)
        super(XPathItemLoader, self).__init__(*a, **kw)
--- a/scrapy/tests/test_contrib_loader.py
+++ b/scrapy/tests/test_contrib_loader.py
@ -1,6 +1,6 @@
 import unittest
-from scrapy.contrib.loader import ItemLoader, XPathItemLoader
+from scrapy.contrib.loader import ItemLoader
 from scrapy.contrib.loader.processor import Join, Identity, TakeFirst, \
    Compose, MapCompose
 from scrapy.item import Item, Field
@ -38,7 +38,7 @@ def processor_with_args(value, other=None, loader_context=None):
    return value
-class ItemLoaderTest(unittest.TestCase):
+class BasicItemLoaderTest(unittest.TestCase):
    def test_load_item_using_default_loader(self):
        i = TestItem()
@ -367,37 +367,78 @@ class ProcessorsTest(unittest.TestCase):
                         [u'HELLO', u'THIS', u'IS', u'SCRAPY'])
-class TestXPathItemLoader(XPathItemLoader):
+class SelectortemLoaderTest(unittest.TestCase):
-    default_item_class = TestItem
+    response = HtmlResponse(url="", body="""
-    name_in = MapCompose(lambda v: v.title())
+    <html>
    <body>
    <div id="id">marta</div>
    <p>paragraph</p>
    <a href="http://www.scrapy.org">homepage</a>
    <img src="/images/logo.png" width="244" height="65" alt="Scrapy">
    </body>
    </html>
    """)
-
+    def test_constructor(self):
-class XPathItemLoaderTest(unittest.TestCase):
+        l = TestItemLoader()
-    response = HtmlResponse(url="", body='<html><body><div id="id">marta</div><p>paragraph</p></body></html>')
+        self.assertEqual(l.selector, None)
    def test_constructor_errors(self):
-        self.assertRaises(RuntimeError, XPathItemLoader)
+        l = TestItemLoader()
        self.assertRaises(RuntimeError, l.add_xpath, 'url', '//a/@href')
        self.assertRaises(RuntimeError, l.replace_xpath, 'url', '//a/@href')
        self.assertRaises(RuntimeError, l.get_xpath, '//a/@href')
        self.assertRaises(RuntimeError, l.add_css, 'name', '#name::text')
        self.assertRaises(RuntimeError, l.replace_css, 'name', '#name::text')
        self.assertRaises(RuntimeError, l.get_css, '#name::text')
    def test_constructor_with_selector(self):
        sel = Selector(text=u"<html><body><div>marta</div></body></html>")
-        l = TestXPathItemLoader(selector=sel)
+        l = TestItemLoader(selector=sel)
        self.assert_(l.selector is sel)
        l.add_xpath('name', '//div/text()')
        self.assertEqual(l.get_output_value('name'), [u'Marta'])
    def test_constructor_with_selector_css(self):
        sel = Selector(text=u"<html><body><div>marta</div></body></html>")
        l = TestItemLoader(selector=sel)
        self.assert_(l.selector is sel)
        l.add_css('name', 'div::text')
        self.assertEqual(l.get_output_value('name'), [u'Marta'])
    def test_constructor_with_response(self):
-        l = TestXPathItemLoader(response=self.response)
+        l = TestItemLoader(response=self.response)
        self.assert_(l.selector)
        l.add_xpath('name', '//div/text()')
        self.assertEqual(l.get_output_value('name'), [u'Marta'])
    def test_constructor_with_response_css(self):
        l = TestItemLoader(response=self.response)
        self.assert_(l.selector)
        l.add_css('name', 'div::text')
        self.assertEqual(l.get_output_value('name'), [u'Marta'])
        l.add_css('url', 'a::attr(href)')
        self.assertEqual(l.get_output_value('url'), [u'http://www.scrapy.org'])
        # combining/accumulating CSS selectors and XPath expressions
        l.add_xpath('name', '//div/text()')
        self.assertEqual(l.get_output_value('name'), [u'Marta', u'Marta'])
        l.add_xpath('url', '//img/@src')
        self.assertEqual(l.get_output_value('url'), [u'http://www.scrapy.org', u'/images/logo.png'])
    def test_add_xpath_re(self):
-        l = TestXPathItemLoader(response=self.response)
+        l = TestItemLoader(response=self.response)
        l.add_xpath('name', '//div/text()', re='ma')
        self.assertEqual(l.get_output_value('name'), [u'Ma'])
    def test_replace_xpath(self):
-        l = TestXPathItemLoader(response=self.response)
+        l = TestItemLoader(response=self.response)
        self.assert_(l.selector)
        l.add_xpath('name', '//div/text()')
        self.assertEqual(l.get_output_value('name'), [u'Marta'])
@ -408,7 +449,7 @@ class XPathItemLoaderTest(unittest.TestCase):
        self.assertEqual(l.get_output_value('name'), [u'Paragraph', 'Marta'])
    def test_get_xpath(self):
-        l = TestXPathItemLoader(response=self.response)
+        l = TestItemLoader(response=self.response)
        self.assertEqual(l.get_xpath('//p/text()'), [u'paragraph'])
        self.assertEqual(l.get_xpath('//p/text()', TakeFirst()), u'paragraph')
        self.assertEqual(l.get_xpath('//p/text()', TakeFirst(), re='pa'), u'pa')
@ -416,20 +457,74 @@ class XPathItemLoaderTest(unittest.TestCase):
        self.assertEqual(l.get_xpath(['//p/text()', '//div/text()']), [u'paragraph', 'marta'])
    def test_replace_xpath_multi_fields(self):
-        l = TestXPathItemLoader(response=self.response)
+        l = TestItemLoader(response=self.response)
        l.add_xpath(None, '//div/text()', TakeFirst(), lambda x: {'name': x})
        self.assertEqual(l.get_output_value('name'), [u'Marta'])
        l.replace_xpath(None, '//p/text()', TakeFirst(), lambda x: {'name': x})
        self.assertEqual(l.get_output_value('name'), [u'Paragraph'])
    def test_replace_xpath_re(self):
-        l = TestXPathItemLoader(response=self.response)
+        l = TestItemLoader(response=self.response)
        self.assert_(l.selector)
        l.add_xpath('name', '//div/text()')
        self.assertEqual(l.get_output_value('name'), [u'Marta'])
        l.replace_xpath('name', '//div/text()', re='ma')
        self.assertEqual(l.get_output_value('name'), [u'Ma'])
    def test_add_css_re(self):
        l = TestItemLoader(response=self.response)
        l.add_css('name', 'div::text', re='ma')
        self.assertEqual(l.get_output_value('name'), [u'Ma'])
        l.add_css('url', 'a::attr(href)', re='http://(.+)')
        self.assertEqual(l.get_output_value('url'), [u'www.scrapy.org'])
    def test_replace_css(self):
        l = TestItemLoader(response=self.response)
        self.assert_(l.selector)
        l.add_css('name', 'div::text')
        self.assertEqual(l.get_output_value('name'), [u'Marta'])
        l.replace_css('name', 'p::text')
        self.assertEqual(l.get_output_value('name'), [u'Paragraph'])
        l.replace_css('name', ['p::text', 'div::text'])
        self.assertEqual(l.get_output_value('name'), [u'Paragraph', 'Marta'])
        l.add_css('url', 'a::attr(href)', re='http://(.+)')
        self.assertEqual(l.get_output_value('url'), [u'www.scrapy.org'])
        l.replace_css('url', 'img::attr(src)')
        self.assertEqual(l.get_output_value('url'), [u'/images/logo.png'])
    def test_get_css(self):
        l = TestItemLoader(response=self.response)
        self.assertEqual(l.get_css('p::text'), [u'paragraph'])
        self.assertEqual(l.get_css('p::text', TakeFirst()), u'paragraph')
        self.assertEqual(l.get_css('p::text', TakeFirst(), re='pa'), u'pa')
        self.assertEqual(l.get_css(['p::text', 'div::text']), [u'paragraph', 'marta'])
        self.assertEqual(l.get_css(['a::attr(href)', 'img::attr(src)']),
            [u'http://www.scrapy.org', u'/images/logo.png'])
    def test_replace_css_multi_fields(self):
        l = TestItemLoader(response=self.response)
        l.add_css(None, 'div::text', TakeFirst(), lambda x: {'name': x})
        self.assertEqual(l.get_output_value('name'), [u'Marta'])
        l.replace_css(None, 'p::text', TakeFirst(), lambda x: {'name': x})
        self.assertEqual(l.get_output_value('name'), [u'Paragraph'])
        l.add_css(None, 'a::attr(href)', TakeFirst(), lambda x: {'url': x})
        self.assertEqual(l.get_output_value('url'), [u'http://www.scrapy.org'])
        l.replace_css(None, 'img::attr(src)', TakeFirst(), lambda x: {'url': x})
        self.assertEqual(l.get_output_value('url'), [u'/images/logo.png'])
    def test_replace_css_re(self):
        l = TestItemLoader(response=self.response)
        self.assert_(l.selector)
        l.add_css('url', 'a::attr(href)')
        self.assertEqual(l.get_output_value('url'), [u'http://www.scrapy.org'])
        l.replace_css('url', 'a::attr(href)', re='http://www\.(.+)')
        self.assertEqual(l.get_output_value('url'), [u'scrapy.org'])
 if __name__ == "__main__":
    unittest.main()