Modified extract adaptor to make use of "adaptor_args" (as it should), and added test for AdaptorPipes

--HG-- extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40673
2025-02-26 22:04:01 +00:00 · 2009-01-07 14:00:54 +00:00 · 2009-01-07 14:00:54 +00:00 · e4852bca78
commit e4852bca78
parent 41fbcbde48
3 changed files with 42 additions and 6 deletions
--- a/scrapy/trunk/scrapy/contrib/adaptors/extraction.py
+++ b/scrapy/trunk/scrapy/contrib/adaptors/extraction.py
@ -10,7 +10,7 @@ from scrapy.utils.url import is_url
 from scrapy.utils.python import flatten
 from scrapy.xpath.selector import XPathSelector, XPathSelectorList

-def extract(locations, use_unquote=True):
+def extract(locations, adaptor_args=None):
    """
    This adaptor tries to extract data from the given locations.
    Any XPathSelector in it will be extracted, and any other data
@ -23,8 +23,8 @@ def extract(locations, use_unquote=True):
    Input: anything
    Output: list of extracted selectors plus anything else in the input
    """
-
    locations = flatten([locations])
+    use_unquote = adaptor_args.get('use_unquote', True) if adaptor_args else True

    result = []
    for location in locations:
--- a/scrapy/trunk/scrapy/item/adaptors.py
+++ b/scrapy/trunk/scrapy/item/adaptors.py
@ -51,10 +51,11 @@ class AdaptorPipe(list):
        return value

    def __add__(self, other):
-        if isinstance(other, list):
-            return AdaptorPipe(super(AdaptorPipe, self).__add__(other))
-        elif callable(other):
-            return AdaptorPipe(self + [other])
+        if callable(other):
+            other = [other]
+        elif hasattr(other, '__iter__'):
+            other = list(other)
+        return AdaptorPipe(super(AdaptorPipe, self).__add__(other))

    def __repr__(self):
        return '<AdaptorPipe %s >' % super(AdaptorPipe, self).__repr__()
--- a/scrapy/trunk/scrapy/tests/test_adaptors.py
+++ b/scrapy/trunk/scrapy/tests/test_adaptors.py
@ -3,10 +3,38 @@ import os
 import unittest
 import re

+from scrapy.item.adaptors import AdaptorPipe
 from scrapy.contrib import adaptors
 from scrapy.http import Response, Headers
 from scrapy.xpath.selector import HtmlXPathSelector, XmlXPathSelector

+class AdaptorPipeTestCase(unittest.TestCase):
+    def test_pipe_init(self):
+        self.assertRaises(TypeError, AdaptorPipe, [adaptors.extract, 'a string'])
+
+    def test_adaptor_args(self):
+        def sample_adaptor(value, adaptor_args):
+            '''Dummy adaptor that joins the received value with the given string'''
+            sample_text = adaptor_args.get('sample_arg', 'sample text 1')
+            return '%s "%s"' % (value, sample_text)
+
+        sample_value = 'hi, this is my text:'
+        sample_pipe = AdaptorPipe([sample_adaptor])
+        self.assertEqual(sample_pipe(sample_value), 'hi, this is my text: "sample text 1"')
+        self.assertEqual(sample_pipe(sample_value, sample_arg='foobarfoobar'),
+            'hi, this is my text: "foobarfoobar"')
+
+    def test_add(self):
+        pipe1 = AdaptorPipe([adaptors.extract])
+        pipe2 = [adaptors.remove_tags]
+        pipe3 = (adaptors.remove_root, )
+        sample_callable = dir
+
+        self.assertTrue(isinstance(pipe1 + pipe1, AdaptorPipe))
+        self.assertTrue(isinstance(pipe1 + pipe2, AdaptorPipe))
+        self.assertTrue(isinstance(pipe1 + pipe3, AdaptorPipe))
+        self.assertTrue(isinstance(pipe1 + sample_callable, AdaptorPipe))
+
 class AdaptorsTestCase(unittest.TestCase):
    def setUp(self):
        self.samplesdir = os.path.abspath(os.path.join(os.path.dirname(__file__), 'sample_data', 'adaptors'))
@ -75,9 +103,16 @@ class AdaptorsTestCase(unittest.TestCase):

    def test_extract_unquoted(self):
        x = self.get_selector('example.com', 'http://www.example.com/test_unquoted', 'extr_unquoted.xml', selector=XmlXPathSelector)
+
+        # test unquoting
        self.assertEqual(adaptors.extract(x.x('//tag1/text()')), [u'test text & &', u'more test text &amp; &gt;', u'blah&blah'])
        self.assertEqual(adaptors.extract(x.x('//tag2/text()')), [u'blaheawfds<'])

+        # test without unquoting
+        self.assertEqual(adaptors.extract(x.x('//tag1/text()'), {'use_unquote': False}),
+            [u'test text &amp; &amp;', u'<![CDATA[more test text &amp; &gt;]]>', u'blah&amp;blah'])
+        self.assertEqual(adaptors.extract(x.x('//tag2/text()'), {'use_unquote': False}), [u'blaheawfds&lt;'])
+
    def test_extract_links(self):
        test_data = """<html><body>
                         <div>