use sel name for Selector's instances in docs, internals and shell

2025-02-24 06:43:43 +00:00 · 2013-10-15 15:58:36 -02:00 · 2013-10-15 15:58:36 -02:00 · 155ea08ea1
commit 155ea08ea1
parent 1abb1af0c6
11 changed files with 95 additions and 95 deletions
--- a/docs/intro/overview.rst
+++ b/docs/intro/overview.rst
@ -143,12 +143,12 @@ Finally, here's the spider code::
        rules = [Rule(SgmlLinkExtractor(allow=['/tor/\d+']), 'parse_torrent')]
        
        def parse_torrent(self, response):
-            ss = Selector(response)
+            sel = Selector(response)
            torrent = TorrentItem()
            torrent['url'] = response.url
-            torrent['name'] = ss.xpath("//h1/text()").extract()
-            torrent['description'] = ss.xpath("//div[@id='description']").extract()
-            torrent['size'] = ss.xpath("//div[@id='info-left']/p[2]/text()[2]").extract()
+            torrent['name'] = sel.xpath("//h1/text()").extract()
+            torrent['description'] = sel.xpath("//div[@id='description']").extract()
+            torrent['size'] = sel.xpath("//div[@id='info-left']/p[2]/text()[2]").extract()
            return torrent

 For brevity's sake, we intentionally left out the import statements. The
--- a/docs/intro/tutorial.rst
+++ b/docs/intro/tutorial.rst
@ -255,7 +255,7 @@ This is what the shell looks like::

    [s] Available Scrapy objects:
    [s] 2010-08-19 21:45:59-0300 [default] INFO: Spider closed (finished)
-    [s]   ss         <Selector (http://www.dmoz.org/Computers/Programming/Languages/Python/Books/) xpath=None>
+    [s]   sel        <Selector (http://www.dmoz.org/Computers/Programming/Languages/Python/Books/) xpath=None>
    [s]   item       Item()
    [s]   request    <GET http://www.dmoz.org/Computers/Programming/Languages/Python/Books/>
    [s]   response   <200 http://www.dmoz.org/Computers/Programming/Languages/Python/Books/>
@ -271,25 +271,25 @@ After the shell loads, you will have the response fetched in a local
 ``response`` variable, so if you type ``response.body`` you will see the body
 of the response, or you can type ``response.headers`` to see its headers.

-The shell also pre-instantiate a selector for this response in variable ``ss``,
+The shell also pre-instantiate a selector for this response in variable ``sel``,
 the selector automatically chooses the best parsing rules (XML vs HTML) based
 on response's type.

 So let's try it::

-   In [1]: ss.xpath('//title')
+   In [1]: sel.xpath('//title')
   Out[1]: [<Selector (title) xpath=//title>]

-   In [2]: ss.xpath('//title').extract()
+   In [2]: sel.xpath('//title').extract()
   Out[2]: [u'<title>Open Directory - Computers: Programming: Languages: Python: Books</title>']

-   In [3]: ss.xpath('//title/text()')
+   In [3]: sel.xpath('//title/text()')
   Out[3]: [<Selector (text) xpath=//title/text()>]

-   In [4]: ss.xpath('//title/text()').extract()
+   In [4]: sel.xpath('//title/text()').extract()
   Out[4]: [u'Open Directory - Computers: Programming: Languages: Python: Books']

-   In [5]: ss.xpath('//title/text()').re('(\w+):')
+   In [5]: sel.xpath('//title/text()').re('(\w+):')
   Out[5]: [u'Computers', u'Programming', u'Languages', u'Python']

 Extracting the data
@ -309,25 +309,25 @@ is inside a ``<ul>`` element, in fact the *second* ``<ul>`` element.
 So we can select each ``<li>`` element belonging to the sites list with this
 code::

-   ss.xpath('//ul/li')
+   sel.xpath('//ul/li')

 And from them, the sites descriptions::

-   ss.xpath('//ul/li/text()').extract()
+   sel.xpath('//ul/li/text()').extract()

 The sites titles::

-   ss.xpath('//ul/li/a/text()').extract()
+   sel.xpath('//ul/li/a/text()').extract()

 And the sites links::

-   ss.xpath('//ul/li/a/@href').extract()
+   sel.xpath('//ul/li/a/@href').extract()

 As we said before, each ``.xpath()`` call returns a list of selectors, so we can
 concatenate further ``.xpath()`` calls to dig deeper into a node. We are going to use
 that property here, so::

-   sites = ss.xpath('//ul/li')
+   sites = sel.xpath('//ul/li')
   for site in sites:
       title = site.xpath('a/text()').extract()
       link = site.xpath('a/@href').extract()
@ -355,8 +355,8 @@ Let's add this code to our spider::
       ]
       
       def parse(self, response):
-           ss = Selector(response)
-           sites = ss.xpath('//ul/li')
+           sel = Selector(response)
+           sites = sel.xpath('//ul/li')
           for site in sites:
               title = site.xpath('a/text()').extract()
               link = site.xpath('a/@href').extract()
@ -398,8 +398,8 @@ scraped so far, the final code for our Spider would be like this::
      ]
       
      def parse(self, response):
-          ss = Selector(response)
-          sites = ss.xpath('//ul/li')
+          sel = Selector(response)
+          sites = sel.xpath('//ul/li')
          items = []
          for site in sites:
              item = DmozItem()
--- a/docs/topics/firebug.rst
+++ b/docs/topics/firebug.rst
@ -146,10 +146,10 @@ that have that grey colour of the links,
 Finally, we can write our ``parse_category()`` method::

    def parse_category(self, response):
-        ss = Selector(response)
+        sel = Selector(response)

        # The path to website links in directory page
-        links = ss.xpath('//td[descendant::a[contains(@href, "#pagerank")]]/following-sibling::td/font')
+        links = sel.xpath('//td[descendant::a[contains(@href, "#pagerank")]]/following-sibling::td/font')

        for link in links:
            item = DirectoryItem()
--- a/docs/topics/selectors.rst
+++ b/docs/topics/selectors.rst
@ -62,13 +62,13 @@ body is what they're going to be "selecting"::
    class MySpider(BaseSpider):
        # ...
        def parse(self, response):
-            ss = Selector(response)
+            sel = Selector(response)
            # Using XPath query
-            print ss.xpath('//p')
+            print sel.xpath('//p')
            # Using CSS query
-            print ss.css('p')
+            print sel.css('p')
            # Nesting queries
-            print ss.xpath('//div[@foo="bar"]').css('span#bold')
+            print sel.xpath('//div[@foo="bar"]').css('span#bold')


 Using selectors
@ -94,7 +94,7 @@ First, let's open the shell::
    scrapy shell http://doc.scrapy.org/en/latest/_static/selectors-sample1.html

 Then, after the shell loads, you'll have a selector already instantiated and
-ready to use in ``ss`` shell variable.
+ready to use in ``sel`` shell variable.

 Since we're dealing with HTML, the selector will automatically use an HTML parser.

@ -104,7 +104,7 @@ So, by looking at the :ref:`HTML code <topics-selectors-htmlcode>` of that
 page, let's construct an XPath (using an HTML selector) for selecting the text
 inside the title tag::

-    >>> ss.xpath('//title/text()')
+    >>> sel.xpath('//title/text()')
    [<Selector (text) xpath=//title/text()>]

 As you can see, the ``.xpath()`` method returns an
@ -114,45 +114,45 @@ selectors. This API can be used quickly for extracting nested data.
 To actually extract the textual data, you must call the selector ``.extract()``
 method, as follows::

-    >>> ss.xpath('//title/text()').extract()
+    >>> sel.xpath('//title/text()').extract()
    [u'Example website']

 Notice that CSS selectors can select text or attribute nodes using CSS3
 pseudo-elements::

-    >>> ss.css('title::text').extract()
+    >>> sel.css('title::text').extract()
    [u'Example website']

 Now we're going to get the base URL and some image links::

-    >>> ss.xpath('//base/@href').extract()
+    >>> sel.xpath('//base/@href').extract()
    [u'http://example.com/']

-    >>> ss.css('base::attr(href)').extract()
+    >>> sel.css('base::attr(href)').extract()
    [u'http://example.com/']

-    >>> ss.xpath('//a[contains(@href, "image")]/@href').extract()
+    >>> sel.xpath('//a[contains(@href, "image")]/@href').extract()
    [u'image1.html',
     u'image2.html',
     u'image3.html',
     u'image4.html',
     u'image5.html']

-    >>> ss.css('a[href*=image]::attr(href)').extract()
+    >>> sel.css('a[href*=image]::attr(href)').extract()
    [u'image1.html',
     u'image2.html',
     u'image3.html',
     u'image4.html',
     u'image5.html']

-    >>> ss.xpath('//a[contains(@href, "image")]/img/@src').extract()
+    >>> sel.xpath('//a[contains(@href, "image")]/img/@src').extract()
    [u'image1_thumb.jpg',
     u'image2_thumb.jpg',
     u'image3_thumb.jpg',
     u'image4_thumb.jpg',
     u'image5_thumb.jpg']

-    >>> ss.css('a[href*=image] img::attr(src)').extract()
+    >>> sel.css('a[href*=image] img::attr(src)').extract()
    [u'image1_thumb.jpg',
     u'image2_thumb.jpg',
     u'image3_thumb.jpg',
@ -168,7 +168,7 @@ The selection methods (``.xpath()`` or ``.css()``) returns a list of selectors
 of the same type, so you can call the selection methods for those selectors
 too. Here's an example::

-    >>> links = ss.xpath('//a[contains(@href, "image")]')
+    >>> links = sel.xpath('//a[contains(@href, "image")]')
    >>> links.extract()
    [u'<a href="image1.html">Name: My image 1 <br><img src="image1_thumb.jpg"></a>',
     u'<a href="image2.html">Name: My image 2 <br><img src="image2_thumb.jpg"></a>',
@ -197,7 +197,7 @@ can't construct nested ``.re()`` calls.
 Here's an example used to extract images names from the :ref:`HTML code
 <topics-selectors-htmlcode>` above::

-    >>> ss.xpath('//a[contains(@href, "image")]/text()').re(r'Name:\s*(.*)')
+    >>> sel.xpath('//a[contains(@href, "image")]/text()').re(r'Name:\s*(.*)')
    [u'My image 1',
     u'My image 2',
     u'My image 3',
@ -216,7 +216,7 @@ with ``/``, that XPath will be absolute to the document and not relative to the
 For example, suppose you want to extract all ``<p>`` elements inside ``<div>``
 elements. First, you would get all ``<div>`` elements::

-    >>> divs = ss.xpath('//div')
+    >>> divs = sel.xpath('//div')

 At first, you may be tempted to use the following approach, which is wrong, as
 it actually extracts all ``<p>`` elements from the document, not only those
--- a/docs/topics/shell.rst
+++ b/docs/topics/shell.rst
@ -83,7 +83,7 @@ Those objects are:
 * ``response`` - a :class:`~scrapy.http.Response` object containing the last
   fetched page

- * ``ss`` - a :class:`~scrapy.selector.Selector` object constructed
+ * ``sel`` - a :class:`~scrapy.selector.Selector` object constructed
   with the last response fetched

 * ``settings`` - the current :ref:`Scrapy settings <topics-settings>`
@ -111,7 +111,7 @@ list of available objects and useful shortcuts (you'll notice that these lines
 all start with the ``[s]`` prefix)::

    [s] Available objects
-    [s]   ss        <Selector (http://scrapy.org) xpath=None>
+    [s]   sel       <Selector (http://scrapy.org) xpath=None>
    [s]   item      Item()
    [s]   request   <http://scrapy.org>
    [s]   response  <http://scrapy.org>
@ -126,12 +126,12 @@ all start with the ``[s]`` prefix)::

 After that, we can star playing with the objects::

-    >>> ss.xpath("//h2/text()").extract()[0]
+    >>> sel.xpath("//h2/text()").extract()[0]
    u'Welcome to Scrapy'

    >>> fetch("http://slashdot.org")
    [s] Available Scrapy objects:
-    [s]   ss         <Selector (http://slashdot.org) xpath=None>
+    [s]   sel        <Selector (http://slashdot.org) xpath=None>
    [s]   item       JobItem()
    [s]   request    <GET http://slashdot.org>
    [s]   response   <200 http://slashdot.org>
@ -142,7 +142,7 @@ After that, we can star playing with the objects::
    [s]   fetch(req_or_url) Fetch request (or URL) and update local objects
    [s]   view(response)    View response in a browser

-    >>> ss.xpath("//h2/text()").extract()
+    >>> sel.xpath("//h2/text()").extract()
    [u'News for nerds, stuff that matters']

    >>> request = request.replace(method="POST")
@ -180,7 +180,7 @@ When you run the spider, you will get something similar to this::
    2009-08-27 19:15:25-0300 [example.com] DEBUG: Crawled <http://www.example.com/> (referer: <None>)
    2009-08-27 19:15:26-0300 [example.com] DEBUG: Crawled <http://www.example.com/products.php> (referer: <http://www.example.com/>)
    [s] Available objects
-    [s]   ss        <Selector (http://www.example.com/products.php) xpath=None>
+    [s]   sel       <Selector (http://www.example.com/products.php) xpath=None>
    ...

    >>> response.url
@ -188,7 +188,7 @@ When you run the spider, you will get something similar to this::

 Then, you can check if the extraction code is working::

-    >>> ss.xpath('//h1')
+    >>> sel.xpath('//h1')
    []

 Nope, it doesn't. So you can open the response in your web browser and see if
--- a/docs/topics/spiders.rst
+++ b/docs/topics/spiders.rst
@ -231,11 +231,11 @@ Another example returning multiples Requests and Items from a single callback::
        ]

        def parse(self, response):
-            ss = Selector(response)
-            for h3 in ss.xpath('//h3').extract():
+            sel = Selector(response)
+            for h3 in sel.xpath('//h3').extract():
                yield MyItem(title=h3)

-            for url in ss.xpath('//a/@href').extract():
+            for url in sel.xpath('//a/@href').extract():
                yield Request(url, callback=self.parse)

 .. module:: scrapy.contrib.spiders
@ -334,11 +334,11 @@ Let's now take a look at an example CrawlSpider with rules::
        def parse_item(self, response):
            self.log('Hi, this is an item page! %s' % response.url)

-            ss = Selector(response)
+            sel = Selector(response)
            item = Item()
-            item['id'] = ss.xpath('//td[@id="item_id"]/text()').re(r'ID: (\d+)')
-            item['name'] = ss.xpath('//td[@id="item_name"]/text()').extract()
-            item['description'] = ss.xpath('//td[@id="item_description"]/text()').extract()
+            item['id'] = sel.xpath('//td[@id="item_id"]/text()').re(r'ID: (\d+)')
+            item['name'] = sel.xpath('//td[@id="item_name"]/text()').extract()
+            item['description'] = sel.xpath('//td[@id="item_description"]/text()').extract()
            return item


--- a/scrapy/contrib/linkextractors/sgml.py
+++ b/scrapy/contrib/linkextractors/sgml.py
@ -116,11 +116,11 @@ class SgmlLinkExtractor(BaseSgmlLinkExtractor):
    def extract_links(self, response):
        base_url = None
        if self.restrict_xpaths:
-            ss = Selector(response)
+            sel = Selector(response)
            base_url = get_base_url(response)
            body = u''.join(f
                            for x in self.restrict_xpaths
-                            for f in ss.xpath(x).extract()
+                            for f in sel.xpath(x).extract()
                            ).encode(response.encoding)
        else:
            body = response.body
--- a/scrapy/shell.py
+++ b/scrapy/shell.py
@ -17,7 +17,7 @@ from scrapy.utils.misc import load_object
 from scrapy.utils.response import open_in_browser
 from scrapy.utils.console import start_python_console
 from scrapy.settings import Settings
-from scrapy.http import Request, Response, HtmlResponse, XmlResponse
+from scrapy.http import Request, Response
 from scrapy.exceptions import IgnoreRequest


@ -95,7 +95,7 @@ class Shell(object):
        self.vars['spider'] = spider
        self.vars['request'] = request
        self.vars['response'] = response
-        self.vars['ss'] = Selector(response)
+        self.vars['sel'] = Selector(response)
        if self.inthread:
            self.vars['fetch'] = self.fetch
        self.vars['view'] = open_in_browser
--- a/scrapy/tests/test_command_shell.py
+++ b/scrapy/tests/test_command_shell.py
@ -31,7 +31,7 @@ class ShellTest(ProcessTest, SiteTest, unittest.TestCase):

    @defer.inlineCallbacks
    def test_response_selector_html(self):
-        xpath = 'ss.xpath("//p[@class=\'one\']/text()").extract()[0]'
+        xpath = 'sel.xpath("//p[@class=\'one\']/text()").extract()[0]'
        _, out, _ = yield self.execute([self.url('/html'), '-c', xpath])
        self.assertEqual(out.strip(), 'Works')

--- a/scrapy/tests/test_selector.py
+++ b/scrapy/tests/test_selector.py
@ -16,31 +16,31 @@ class SelectorTestCase(unittest.TestCase):
        """Simple selector tests"""
        body = "<p><input name='a'value='1'/><input name='b'value='2'/></p>"
        response = TextResponse(url="http://example.com", body=body)
-        ss = self.sscls(response)
+        sel = self.sscls(response)

-        xl = ss.xpath('//input')
+        xl = sel.xpath('//input')
        self.assertEqual(2, len(xl))
        for x in xl:
            assert isinstance(x, self.sscls)

-        self.assertEqual(ss.xpath('//input').extract(),
-                         [x.extract() for x in ss.xpath('//input')])
+        self.assertEqual(sel.xpath('//input').extract(),
+                         [x.extract() for x in sel.xpath('//input')])

-        self.assertEqual([x.extract() for x in ss.xpath("//input[@name='a']/@name")],
+        self.assertEqual([x.extract() for x in sel.xpath("//input[@name='a']/@name")],
                         [u'a'])
-        self.assertEqual([x.extract() for x in ss.xpath("number(concat(//input[@name='a']/@value, //input[@name='b']/@value))")],
+        self.assertEqual([x.extract() for x in sel.xpath("number(concat(//input[@name='a']/@value, //input[@name='b']/@value))")],
                         [u'12.0'])

-        self.assertEqual(ss.xpath("concat('xpath', 'rules')").extract(),
+        self.assertEqual(sel.xpath("concat('xpath', 'rules')").extract(),
                         [u'xpathrules'])
-        self.assertEqual([x.extract() for x in ss.xpath("concat(//input[@name='a']/@value, //input[@name='b']/@value)")],
+        self.assertEqual([x.extract() for x in sel.xpath("concat(//input[@name='a']/@value, //input[@name='b']/@value)")],
                         [u'12'])

    def test_select_unicode_query(self):
        body = u"<p><input name='\xa9' value='1'/></p>"
        response = TextResponse(url="http://example.com", body=body, encoding='utf8')
-        ss = self.sscls(response)
-        self.assertEqual(ss.xpath(u'//input[@name="\xa9"]/@value').extract(), [u'1'])
+        sel = self.sscls(response)
+        self.assertEqual(sel.xpath(u'//input[@name="\xa9"]/@value').extract(), [u'1'])

    def test_list_elements_type(self):
        """Test Selector returning the same type in selection methods"""
@ -69,14 +69,14 @@ class SelectorTestCase(unittest.TestCase):

    def test_flavor_detection(self):
        text = '<div><img src="a.jpg"><p>Hello</div>'
-        ss = self.sscls(XmlResponse('http://example.com', body=text))
-        self.assertEqual(ss.contenttype, 'xml')
-        self.assertEqual(ss.xpath("//div").extract(),
+        sel = self.sscls(XmlResponse('http://example.com', body=text))
+        self.assertEqual(sel.contenttype, 'xml')
+        self.assertEqual(sel.xpath("//div").extract(),
                         [u'<div><img src="a.jpg"><p>Hello</p></img></div>'])

-        ss = self.sscls(HtmlResponse('http://example.com', body=text))
-        self.assertEqual(ss.contenttype, 'html')
-        self.assertEqual(ss.xpath("//div").extract(),
+        sel = self.sscls(HtmlResponse('http://example.com', body=text))
+        self.assertEqual(sel.contenttype, 'html')
+        self.assertEqual(sel.xpath("//div").extract(),
                         [u'<div><img src="a.jpg"><p>Hello</p></div>'])

    def test_nested_selectors(self):
@ -110,13 +110,13 @@ class SelectorTestCase(unittest.TestCase):
                    <div id=1>not<span>me</span></div>
                    <div class="dos"><p>text</p><a href='#'>foo</a></div>
               </body>'''
-        ss = self.sscls(text=body)
-        self.assertEqual(ss.xpath('//div[@id="1"]').css('span::text').extract(), [u'me'])
-        self.assertEqual(ss.css('#1').xpath('./span/text()').extract(), [u'me'])
+        sel = self.sscls(text=body)
+        self.assertEqual(sel.xpath('//div[@id="1"]').css('span::text').extract(), [u'me'])
+        self.assertEqual(sel.css('#1').xpath('./span/text()').extract(), [u'me'])

    def test_dont_strip(self):
-        hxs = self.sscls(text='<div>fff: <a href="#">zzz</a></div>')
-        self.assertEqual(hxs.xpath("//text()").extract(), [u'fff: ', u'zzz'])
+        sel = self.sscls(text='<div>fff: <a href="#">zzz</a></div>')
+        self.assertEqual(sel.xpath("//text()").extract(), [u'fff: ', u'zzz'])

    def test_namespaces_simple(self):
        body = """
@ -279,10 +279,10 @@ class SelectorTestCase(unittest.TestCase):
  <link type="application/atom+xml">
 </feed>
 """
-        xxs = self.sscls(XmlResponse("http://example.com/feed.atom", body=xml))
-        self.assertEqual(len(xxs.xpath("//link")), 0)
-        xxs.remove_namespaces()
-        self.assertEqual(len(xxs.xpath("//link")), 2)
+        sel = self.sscls(XmlResponse("http://example.com/feed.atom", body=xml))
+        self.assertEqual(len(sel.xpath("//link")), 0)
+        sel.remove_namespaces()
+        self.assertEqual(len(sel.xpath("//link")), 2)

    def test_remove_attributes_namespaces(self):
        xml = """<?xml version="1.0" encoding="UTF-8"?>
@ -291,10 +291,10 @@ class SelectorTestCase(unittest.TestCase):
  <link atom:type="application/atom+xml">
 </feed>
 """
-        xxs = self.sscls(XmlResponse("http://example.com/feed.atom", body=xml))
-        self.assertEqual(len(xxs.xpath("//link/@type")), 0)
-        xxs.remove_namespaces()
-        self.assertEqual(len(xxs.xpath("//link/@type")), 2)
+        sel = self.sscls(XmlResponse("http://example.com/feed.atom", body=xml))
+        self.assertEqual(len(sel.xpath("//link/@type")), 0)
+        sel.remove_namespaces()
+        self.assertEqual(len(sel.xpath("//link/@type")), 2)


 class DeprecatedXpathSelectorTest(unittest.TestCase):
--- a/scrapy/tests/test_selector_csstranslator.py
+++ b/scrapy/tests/test_selector_csstranslator.py
@ -120,16 +120,16 @@ class CSSSelectorTest(unittest.TestCase):

    def setUp(self):
        self.htmlresponse = HtmlResponse('http://example.com', body=HTMLBODY)
-        self.ss = self.sscls(self.htmlresponse)
+        self.sel = self.sscls(self.htmlresponse)

    def x(self, *a, **kw):
-        return [v.strip() for v in self.ss.css(*a, **kw).extract() if v.strip()]
+        return [v.strip() for v in self.sel.css(*a, **kw).extract() if v.strip()]

    def test_selector_simple(self):
-        for x in self.ss.css('input'):
-            self.assertTrue(isinstance(x, self.ss.__class__), x)
-        self.assertEqual(self.ss.css('input').extract(),
-                         [x.extract() for x in self.ss.css('input')])
+        for x in self.sel.css('input'):
+            self.assertTrue(isinstance(x, self.sel.__class__), x)
+        self.assertEqual(self.sel.css('input').extract(),
+                         [x.extract() for x in self.sel.css('input')])

    def test_text_pseudo_element(self):
        self.assertEqual(self.x('#p-b2'), [u'<b id="p-b2">guy</b>'])
@ -147,7 +147,7 @@ class CSSSelectorTest(unittest.TestCase):
        self.assertEqual(self.x('map[name="dummymap"] ::attr(shape)'), [u'circle', u'default'])

    def test_nested_selector(self):
-        self.assertEqual(self.ss.css('p').css('b::text').extract(),
+        self.assertEqual(self.sel.css('p').css('b::text').extract(),
                         [u'hi', u'guy'])
-        self.assertEqual(self.ss.css('div').css('area:last-child').extract(),
+        self.assertEqual(self.sel.css('div').css('area:last-child').extract(),
                         [u'<area shape="default" id="area-nohref">'])