From cd42bd7d0c8c8d9d5a4a3f38e442b5020a68116b Mon Sep 17 00:00:00 2001
From: Pablo Hoffman <pablo@pablohoffman.com>
Date: Sun, 2 Jan 2011 17:21:31 -0200
Subject: [PATCH 01/14] Bumped version to 0.13

---
 scrapy/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scrapy/__init__.py b/scrapy/__init__.py
index 3d8a7a4ae..c73dae498 100644
--- a/scrapy/__init__.py
+++ b/scrapy/__init__.py
@@ -2,8 +2,8 @@
 Scrapy - a screen scraping framework written in Python
 """
 
-version_info = (0, 12, 0)
-__version__ = "0.12.0"
+version_info = (0, 13, 0)
+__version__ = "0.13.0"
 
 import sys, os, warnings
 

From d7f193cbeac9dba4eda7bb463b6590900bbb2998 Mon Sep 17 00:00:00 2001
From: Pablo Hoffman <pablo@pablohoffman.com>
Date: Sun, 2 Jan 2011 17:29:43 -0200
Subject: [PATCH 02/14] bumped version to 0.13 in documentation

---
 docs/topics/scrapyd.rst | 2 +-
 docs/topics/ubuntu.rst  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/topics/scrapyd.rst b/docs/topics/scrapyd.rst
index 6bfe8d3f7..143e983a3 100644
--- a/docs/topics/scrapyd.rst
+++ b/docs/topics/scrapyd.rst
@@ -86,7 +86,7 @@ in your Ubuntu servers.
 So, if you plan to deploy Scrapyd on a Ubuntu server, just add the Ubuntu
 repositories as described in :ref:`topics-ubuntu` and then run::
 
-    aptitude install scrapyd-0.12
+    aptitude install scrapyd-0.13
 
 This will install Scrapyd in your Ubuntu server creating a ``scrapy`` user
 which Scrapyd will run as. It will also create some directories and files that
diff --git a/docs/topics/ubuntu.rst b/docs/topics/ubuntu.rst
index 13bdd4b7b..6cd164f7b 100644
--- a/docs/topics/ubuntu.rst
+++ b/docs/topics/ubuntu.rst
@@ -13,7 +13,7 @@ latest bug fixes.
 
 To use the packages, just add the following line to your
 ``/etc/apt/sources.list``, and then run ``aptitude update`` and ``aptitude
-install scrapy-0.12``::
+install scrapy-0.13``::
 
     deb http://archive.scrapy.org/ubuntu DISTRO main
 

From 579463aff252c30e97ccb2045e84f4f4d69690da Mon Sep 17 00:00:00 2001
From: Pablo Hoffman <pablo@pablohoffman.com>
Date: Tue, 4 Jan 2011 13:57:32 -0200
Subject: [PATCH 03/14] make scrapy*-0.13 packages conflict with scrapy*-0.12
 packages

---
 debian/control | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/debian/control b/debian/control
index b4b159470..cc0d49040 100644
--- a/debian/control
+++ b/debian/control
@@ -9,7 +9,7 @@ Homepage: http://scrapy.org/
 Package: scrapy-SUFFIX
 Architecture: all
 Depends: ${python:Depends}, python-libxml2, python-twisted, python-openssl
-Conflicts: python-scrapy, scrapy, scrapy-0.11
+Conflicts: python-scrapy, scrapy, scrapy-0.11, scrapy-0.12
 Provides: python-scrapy, scrapy
 Description: Python web crawling and scraping framework
  Scrapy is a fast high-level screen scraping and web crawling framework, 
@@ -20,7 +20,7 @@ Description: Python web crawling and scraping framework
 Package: scrapyd-SUFFIX
 Architecture: all
 Depends: scrapy, python-setuptools
-Conflicts: scrapyd, scrapyd-0.11
+Conflicts: scrapyd, scrapyd-0.11, scrapyd-0.12
 Provides: scrapyd
 Description: Scrapy Service
  The Scrapy service allows you to deploy your Scrapy projects by building

From 0ba9999cca6526bd1aea84eb5ce808fc840639b7 Mon Sep 17 00:00:00 2001
From: Martin Olveyra <olveyra@insophia.com>
Date: Wed, 5 Jan 2011 11:02:05 -0200
Subject: [PATCH 04/14] Handle badformed tags with no trailing >

---
 scrapy/contrib/ibl/htmlpage.py                      |  2 +-
 scrapy/tests/test_contrib_ibl/test_extraction.py    |  2 +-
 scrapy/tests/test_contrib_ibl/test_htmlpage.py      | 10 ++++++++++
 scrapy/tests/test_contrib_ibl/test_htmlpage_data.py |  9 +++++++++
 4 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/scrapy/contrib/ibl/htmlpage.py b/scrapy/contrib/ibl/htmlpage.py
index 023030bdc..30c21c043 100644
--- a/scrapy/contrib/ibl/htmlpage.py
+++ b/scrapy/contrib/ibl/htmlpage.py
@@ -81,7 +81,7 @@ class HtmlTag(HtmlDataFragment):
         return str(self)
 
 _ATTR = "((?:[^=/>\s]|/(?!>))+)(?:\s*=(?:\s*\"(.*?)\"|\s*'(.*?)'|([^>\s]+))?)?"
-_TAG = "<(\/?)(\w+(?::\w+)?)((?:\s+" + _ATTR + ")+\s*|\s*)(\/?)>"
+_TAG = "<(\/?)(\w+(?::\w+)?)((?:\s+" + _ATTR + ")+\s*|\s*)(\/?)>?"
 _DOCTYPE = r"<!DOCTYPE.*?>"
 _SCRIPT = "(<script.*?>)(.*?)(</script.*?>)"
 _COMMENT = "(<!--.*?-->)"
diff --git a/scrapy/tests/test_contrib_ibl/test_extraction.py b/scrapy/tests/test_contrib_ibl/test_extraction.py
index f79c9dc78..e9ce86f7e 100644
--- a/scrapy/tests/test_contrib_ibl/test_extraction.py
+++ b/scrapy/tests/test_contrib_ibl/test_extraction.py
@@ -516,7 +516,7 @@ ANNOTATED_PAGE19 = u"""
 <div>
 <p data-scrapy-annotate="{&quot;variant&quot;: 0, &quot;annotations&quot;: {&quot;content&quot;: &quot;name&quot;}}">Product name</p>
 <p data-scrapy-annotate="{&quot;variant&quot;: 0, &quot;annotations&quot;: {&quot;content&quot;: &quot;price&quot;}}">60.00</p>
-<img data-scrapy-annotate="{&quot;variant&quot;: 0, &quot;annotations&quot;: {&quot;src&quot;: &quot;image_urls&quot;}}"src="image.jpg" />
+<img data-scrapy-annotate="{&quot;variant&quot;: 0, &quot;annotations&quot;: {&quot;src&quot;: &quot;image_urls&quot;}}" src="image.jpg" />
 <p data-scrapy-annotate="{&quot;variant&quot;: 0, &quot;required&quot;: [&quot;description&quot;], &quot;annotations&quot;: {&quot;content&quot;: &quot;description&quot;}}">description</p>
 </div>
 </body></html>
diff --git a/scrapy/tests/test_contrib_ibl/test_htmlpage.py b/scrapy/tests/test_contrib_ibl/test_htmlpage.py
index cdba56853..b3bc91fa5 100644
--- a/scrapy/tests/test_contrib_ibl/test_htmlpage.py
+++ b/scrapy/tests/test_contrib_ibl/test_htmlpage.py
@@ -137,3 +137,13 @@ class TestParseHtml(TestCase):
         parsed = list(parse_html("<IMG SRC='http://images.play.com/banners/SAM550a.jpg' align='left' / hspace=5>"))
         self.assertEqual(parsed[0].attributes, {'src': 'http://images.play.com/banners/SAM550a.jpg', \
                                                 'align': 'left', 'hspace': '5', '/': None})
+
+    def test_no_ending_body(self):
+        """Test case when no ending body nor html elements are present"""
+        parsed = [_decode_element(d) for d in PARSED7]
+        self._test_sample(PAGE7, parsed)
+
+    def test_malformed(self):
+        """Test parsing of some malformed cases"""
+        parsed = [_decode_element(d) for d in PARSED8]
+        self._test_sample(PAGE8, parsed)
diff --git a/scrapy/tests/test_contrib_ibl/test_htmlpage_data.py b/scrapy/tests/test_contrib_ibl/test_htmlpage_data.py
index 62cd6b526..39771a666 100644
--- a/scrapy/tests/test_contrib_ibl/test_htmlpage_data.py
+++ b/scrapy/tests/test_contrib_ibl/test_htmlpage_data.py
@@ -246,3 +246,12 @@ PARSED7 = [
     {'end': 99, 'start': 85},
 ]
 
+PAGE8 = u"""<a href="/overview.asp?id=277"><img border="0" src="/img/5200814311.jpg" title=\'Vinyl Cornice\'</a></td><table width=\'5\'>"""
+
+PARSED8 = [
+   {'attributes' : {u'href' : u"/overview.asp?id=277"}, 'end': 31, 'start': 0, 'tag': u'a', 'tag_type': 1},
+   {'attributes' : {u'src' : u"/img/5200814311.jpg", u'border' : u"0", u'title': u'Vinyl Cornice'}, 'end': 94, 'start': 31, 'tag': u'img', 'tag_type': 1},
+   {'attributes' : {}, 'end': 98, 'start': 94, 'tag': u'a', 'tag_type': 2},
+   {'attributes' : {}, 'end': 103, 'start': 98, 'tag': u'td', 'tag_type': 2},
+   {'attributes' : {u'width': u'5'}, 'end': 120, 'start': 103, 'tag': u'table', 'tag_type': 1}
+]

From ebf5ad933e718198cfa557272693fe0aa57a775f Mon Sep 17 00:00:00 2001
From: Pablo Hoffman <pablo@pablohoffman.com>
Date: Wed, 5 Jan 2011 11:59:19 -0200
Subject: [PATCH 05/14] fixed compatibility with python 2.5 and removed unused
 code

---
 scrapy/utils/memory.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/scrapy/utils/memory.py b/scrapy/utils/memory.py
index bed6821f9..3340a0a6e 100644
--- a/scrapy/utils/memory.py
+++ b/scrapy/utils/memory.py
@@ -1,3 +1,5 @@
+from __future__ import with_statement
+
 import os
 import sys
 import struct
@@ -64,7 +66,4 @@ def _vmvalue_solaris(vmkey, pid):
 
     vm_in_kB = parts[vmkey_index[vmkey]]
 
-    def kB_to_Bytes(kB):
-        return kB * 1024
-
     return vm_in_kB * 1024

From 32adbea545e4c4780f7c3545c093a7fb93433a3b Mon Sep 17 00:00:00 2001
From: Martin Olveyra <olveyra@insophia.com>
Date: Mon, 24 Jan 2011 18:40:42 -0200
Subject: [PATCH 06/14] handle case when attributes are not separated by space
 (still recognizable because of quotes)

---
 scrapy/contrib/ibl/htmlpage.py                |  4 ++--
 .../tests/test_contrib_ibl/test_htmlpage.py   |  6 ++++++
 .../test_contrib_ibl/test_htmlpage_data.py    | 20 +++++++++++++++++++
 3 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/scrapy/contrib/ibl/htmlpage.py b/scrapy/contrib/ibl/htmlpage.py
index 30c21c043..c86ec9670 100644
--- a/scrapy/contrib/ibl/htmlpage.py
+++ b/scrapy/contrib/ibl/htmlpage.py
@@ -80,8 +80,8 @@ class HtmlTag(HtmlDataFragment):
     def __repr__(self):
         return str(self)
 
-_ATTR = "((?:[^=/>\s]|/(?!>))+)(?:\s*=(?:\s*\"(.*?)\"|\s*'(.*?)'|([^>\s]+))?)?"
-_TAG = "<(\/?)(\w+(?::\w+)?)((?:\s+" + _ATTR + ")+\s*|\s*)(\/?)>?"
+_ATTR = "((?:[^=/<>\s]|/(?!>))+)(?:\s*=(?:\s*\"(.*?)\"|\s*'(.*?)'|([^>\s]+))?)?"
+_TAG = "<(\/?)(\w+(?::\w+)?)((?:\s*" + _ATTR + ")+\s*|\s*)(\/?)>?"
 _DOCTYPE = r"<!DOCTYPE.*?>"
 _SCRIPT = "(<script.*?>)(.*?)(</script.*?>)"
 _COMMENT = "(<!--.*?-->)"
diff --git a/scrapy/tests/test_contrib_ibl/test_htmlpage.py b/scrapy/tests/test_contrib_ibl/test_htmlpage.py
index b3bc91fa5..d85ae1afb 100644
--- a/scrapy/tests/test_contrib_ibl/test_htmlpage.py
+++ b/scrapy/tests/test_contrib_ibl/test_htmlpage.py
@@ -147,3 +147,9 @@ class TestParseHtml(TestCase):
         """Test parsing of some malformed cases"""
         parsed = [_decode_element(d) for d in PARSED8]
         self._test_sample(PAGE8, parsed)
+
+    def test_malformed2(self):
+        """Test case when attributes are not separated by space (still recognizable because of quotes)"""
+        parsed = [_decode_element(d) for d in PARSED9]
+        self._test_sample(PAGE9, parsed)
+
diff --git a/scrapy/tests/test_contrib_ibl/test_htmlpage_data.py b/scrapy/tests/test_contrib_ibl/test_htmlpage_data.py
index 39771a666..f54dc9f8c 100644
--- a/scrapy/tests/test_contrib_ibl/test_htmlpage_data.py
+++ b/scrapy/tests/test_contrib_ibl/test_htmlpage_data.py
@@ -255,3 +255,23 @@ PARSED8 = [
    {'attributes' : {}, 'end': 103, 'start': 98, 'tag': u'td', 'tag_type': 2},
    {'attributes' : {u'width': u'5'}, 'end': 120, 'start': 103, 'tag': u'table', 'tag_type': 1}
 ]
+
+PAGE9 = u"""\
+<html>\
+<body>\
+<img width='230' height='150'src='/images/9589.jpg' >\
+<a href="/product/9589">Click here</a>\
+</body>\
+</html>\
+"""
+
+PARSED9 = [
+    {'attributes' : {}, 'end': 6, 'start': 0, 'tag': 'html', 'tag_type': 1},
+    {'attributes' : {}, 'end': 12, 'start': 6, 'tag': 'body', 'tag_type': 1},
+    {'attributes' : {'width': '230', 'height': '150', 'src': '/images/9589.jpg'}, 'end': 65, 'start': 12, 'tag': 'img', 'tag_type': 1},
+    {'attributes' : {'href': '/product/9589'}, 'end': 89, 'start': 65, 'tag': 'a', 'tag_type': 1},
+    {'end': 99, 'start': 89},
+    {'attributes' : {}, 'end': 103, 'start': 99, 'tag': 'a', 'tag_type': 2},
+    {'attributes' : {}, 'end': 110, 'start': 103, 'tag': 'body', 'tag_type': 2},
+    {'attributes' : {}, 'end': 117, 'start': 110, 'tag': 'html', 'tag_type': 2},
+]

From c5351d2f4882e423828e747b528179d5b1486f7e Mon Sep 17 00:00:00 2001
From: Shane Evans <shane.evans@gmail.com>
Date: Tue, 25 Jan 2011 19:23:50 -0200
Subject: [PATCH 07/14] add __hash__ method to Link object to be compatible
 with the __eq__ method

---
 scrapy/link.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/scrapy/link.py b/scrapy/link.py
index ddae07823..371a987cb 100644
--- a/scrapy/link.py
+++ b/scrapy/link.py
@@ -18,6 +18,9 @@ class Link(object):
 
     def __eq__(self, other):
         return self.url == other.url and self.text == other.text
+    
+    def __hash__(self):
+        return hash(self.url) ^ hash(self.text)
 
     def __repr__(self):
         return '<Link url=%r text=%r >' % (self.url, self.text)

From 632bc27deb6e2425f25a3c909a2d80ea4c5df0c4 Mon Sep 17 00:00:00 2001
From: Pablo Hoffman <pablo@pablohoffman.com>
Date: Tue, 25 Jan 2011 19:51:17 -0200
Subject: [PATCH 08/14] added tests for Link object

---
 scrapy/tests/test_link.py | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)
 create mode 100644 scrapy/tests/test_link.py

diff --git a/scrapy/tests/test_link.py b/scrapy/tests/test_link.py
new file mode 100644
index 000000000..32e0095e6
--- /dev/null
+++ b/scrapy/tests/test_link.py
@@ -0,0 +1,28 @@
+import unittest
+
+from scrapy.link import Link
+
+class LinkTest(unittest.TestCase):
+
+    def test_eq_and_hash(self):
+        l1 = Link("http://www.example.com")
+        l2 = Link("http://www.example.com/other")
+        l3 = Link("http://www.example.com")
+
+        self.assertEqual(l1, l1)
+        self.assertEqual(hash(l1), hash(l1))
+        self.assertNotEqual(l1, l2)
+        self.assertNotEqual(hash(l1), hash(l2))
+        self.assertEqual(l1, l3)
+        self.assertEqual(hash(l1), hash(l3))
+
+        l4 = Link("http://www.example.com", text="test")
+        l5 = Link("http://www.example.com", text="test2")
+        l6 = Link("http://www.example.com", text="test")
+
+        self.assertEqual(l4, l4)
+        self.assertEqual(hash(l4), hash(l4))
+        self.assertNotEqual(l4, l5)
+        self.assertNotEqual(hash(l4), hash(l5))
+        self.assertEqual(l4, l6)
+        self.assertEqual(hash(l4), hash(l6))

From bfc6c3809b538ac72b5b677234ebcaacfd1190db Mon Sep 17 00:00:00 2001
From: Ismael Carnales <icarnales@gmail.com>
Date: Wed, 9 Feb 2011 16:20:48 -0200
Subject: [PATCH 09/14] Add namespace support to xmliter_lxml

---
 scrapy/contrib_exp/iterators.py      | 11 +++++++---
 scrapy/tests/test_utils_iterators.py | 32 +++++++++++++++++++++++++---
 2 files changed, 37 insertions(+), 6 deletions(-)

diff --git a/scrapy/contrib_exp/iterators.py b/scrapy/contrib_exp/iterators.py
index 0fc73e194..d42a25c3f 100644
--- a/scrapy/contrib_exp/iterators.py
+++ b/scrapy/contrib_exp/iterators.py
@@ -2,14 +2,19 @@ from scrapy.http import Response
 from scrapy.selector import XmlXPathSelector
 
 
-def xmliter_lxml(obj, nodename):
+def xmliter_lxml(obj, nodename, namespace=None):
     from lxml import etree
     reader = _StreamReader(obj)
-    iterable = etree.iterparse(reader, tag=nodename, encoding=reader.encoding)
+    tag = '{%s}%s' % (namespace, nodename) if namespace else nodename
+    iterable = etree.iterparse(reader, tag=tag, encoding=reader.encoding)
     for _, node in iterable:
         nodetext = etree.tostring(node)
         node.clear()
-        yield XmlXPathSelector(text=nodetext).select('//' + nodename)[0]
+        xs = XmlXPathSelector(text=nodetext)
+        if namespace:
+            xs.register_namespace('x', namespace)
+            nodename = 'x:%s' % nodename
+        yield xs.select('//' + nodename)[0]
 
 
 class _StreamReader(object):
diff --git a/scrapy/tests/test_utils_iterators.py b/scrapy/tests/test_utils_iterators.py
index d32d1e658..79435d615 100644
--- a/scrapy/tests/test_utils_iterators.py
+++ b/scrapy/tests/test_utils_iterators.py
@@ -29,12 +29,12 @@ class XmliterTestCase(unittest.TestCase):
         for x in self.xmliter(response, 'product'):
             attrs.append((x.select("@id").extract(), x.select("name/text()").extract(), x.select("./type/text()").extract()))
 
-        self.assertEqual(attrs, 
+        self.assertEqual(attrs,
                          [(['001'], ['Name 1'], ['Type 1']), (['002'], ['Name 2'], ['Type 2'])])
 
     def test_xmliter_text(self):
         body = u"""<?xml version="1.0" encoding="UTF-8"?><products><product>one</product><product>two</product></products>"""
-        
+
         self.assertEqual([x.select("text()").extract() for x in self.xmliter(body, 'product')],
                          [[u'one'], [u'two']])
 
@@ -74,7 +74,7 @@ class XmliterTestCase(unittest.TestCase):
 
     def test_xmliter_exception(self):
         body = u"""<?xml version="1.0" encoding="UTF-8"?><products><product>one</product><product>two</product></products>"""
-        
+
         iter = self.xmliter(body, 'product')
         iter.next()
         iter.next()
@@ -97,6 +97,32 @@ class LxmlXmliterTestCase(XmliterTestCase):
     except ImportError:
         skip = "lxml not available"
 
+    def test_xmliter_iterate_namespace(self):
+        body = """\
+            <?xml version="1.0" encoding="UTF-8"?>
+            <rss version="2.0" xmlns="http://base.google.com/ns/1.0">
+                <channel>
+                <title>My Dummy Company</title>
+                <link>http://www.mydummycompany.com</link>
+                <description>This is a dummy company. We do nothing.</description>
+                <item>
+                    <title>Item 1</title>
+                    <description>This is item 1</description>
+                    <link>http://www.mydummycompany.com/items/1</link>
+                    <image_link>http://www.mydummycompany.com/images/item1.jpg</image_link>
+                </item>
+                </channel>
+            </rss>
+        """
+        response = XmlResponse(url='http://mydummycompany.com', body=body)
+
+        no_namespace_iter = self.xmliter(response, 'image_link')
+        self.assertEqual(len(list(no_namespace_iter)), 0)
+
+        namespace_iter = self.xmliter(response, 'image_link', 'http://base.google.com/ns/1.0')
+        node = namespace_iter.next()
+        self.assertEqual(node.select('text()').extract(), ['http://www.mydummycompany.com/images/item1.jpg'])
+
 
 class UtilsCsvTestCase(unittest.TestCase):
     sample_feeds_dir = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'sample_data', 'feeds')

From 9b07b0ab0a218998191ec9b8537cdf0aea0455a5 Mon Sep 17 00:00:00 2001
From: Ismael Carnales <icarnales@gmail.com>
Date: Fri, 11 Feb 2011 11:41:44 -0200
Subject: [PATCH 10/14] Fix xmliter_lxml

---
 scrapy/contrib_exp/iterators.py      | 4 ++--
 scrapy/tests/test_utils_iterators.py | 3 +++
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/scrapy/contrib_exp/iterators.py b/scrapy/contrib_exp/iterators.py
index d42a25c3f..0f3a8c694 100644
--- a/scrapy/contrib_exp/iterators.py
+++ b/scrapy/contrib_exp/iterators.py
@@ -7,14 +7,14 @@ def xmliter_lxml(obj, nodename, namespace=None):
     reader = _StreamReader(obj)
     tag = '{%s}%s' % (namespace, nodename) if namespace else nodename
     iterable = etree.iterparse(reader, tag=tag, encoding=reader.encoding)
+    selxpath = '//' + ('x:%s' % nodename if namespace else nodename)
     for _, node in iterable:
         nodetext = etree.tostring(node)
         node.clear()
         xs = XmlXPathSelector(text=nodetext)
         if namespace:
             xs.register_namespace('x', namespace)
-            nodename = 'x:%s' % nodename
-        yield xs.select('//' + nodename)[0]
+        yield xs.select(selxpath)[0]
 
 
 class _StreamReader(object):
diff --git a/scrapy/tests/test_utils_iterators.py b/scrapy/tests/test_utils_iterators.py
index 79435d615..06fb4effe 100644
--- a/scrapy/tests/test_utils_iterators.py
+++ b/scrapy/tests/test_utils_iterators.py
@@ -110,6 +110,7 @@ class LxmlXmliterTestCase(XmliterTestCase):
                     <description>This is item 1</description>
                     <link>http://www.mydummycompany.com/items/1</link>
                     <image_link>http://www.mydummycompany.com/images/item1.jpg</image_link>
+                    <image_link>http://www.mydummycompany.com/images/item2.jpg</image_link>
                 </item>
                 </channel>
             </rss>
@@ -122,6 +123,8 @@ class LxmlXmliterTestCase(XmliterTestCase):
         namespace_iter = self.xmliter(response, 'image_link', 'http://base.google.com/ns/1.0')
         node = namespace_iter.next()
         self.assertEqual(node.select('text()').extract(), ['http://www.mydummycompany.com/images/item1.jpg'])
+        node = namespace_iter.next()
+        self.assertEqual(node.select('text()').extract(), ['http://www.mydummycompany.com/images/item2.jpg'])
 
 
 class UtilsCsvTestCase(unittest.TestCase):

From a1c3fa5dd827e1f842e4270cadeec35539f09e38 Mon Sep 17 00:00:00 2001
From: Shane Evans <shane.evans@gmail.com>
Date: Tue, 15 Feb 2011 15:42:10 -0200
Subject: [PATCH 11/14] small refactor of image extraction

---
 scrapy/contrib/ibl/extractors.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/scrapy/contrib/ibl/extractors.py b/scrapy/contrib/ibl/extractors.py
index 9c460df10..858aa9d55 100644
--- a/scrapy/contrib/ibl/extractors.py
+++ b/scrapy/contrib/ibl/extractors.py
@@ -132,6 +132,10 @@ def image_url(txt):
         ['http://s7d5.scene7.com/is/image/wasserstrom/165133?wid=227&hei=227&defaultImage=noimage_wasserstrom']
 
     """
+    imgurl = extract_image_url(txt)
+    return [safe_url_string(remove_entities(url(imgurl)))] if imgurl else None
+
+def extract_image_url(txt):
     txt = url(txt)
     imgurl = None
     if txt:
@@ -153,4 +157,4 @@ def image_url(txt):
             imgurl = urlparse.urlunparse(parsed)
         if not imgurl:
             imgurl = txt
-    return [safe_url_string(remove_entities(url(imgurl)))] if imgurl else None
+    return imgurl

From c55355642c26717265167db5acf002edf83ee7ea Mon Sep 17 00:00:00 2001
From: Daniel Grana <dangra@gmail.com>
Date: Wed, 16 Feb 2011 08:57:42 -0200
Subject: [PATCH 12/14] fix FAQ typos reported by marlun_ at #scrapy IRC
 channel

---
 docs/faq.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/faq.rst b/docs/faq.rst
index 2444a3c05..5783fdcaf 100644
--- a/docs/faq.rst
+++ b/docs/faq.rst
@@ -3,7 +3,7 @@
 Frequently Asked Questions
 ==========================
 
-How does Scrapy compare to BeautifulSoul or lxml?
+How does Scrapy compare to BeautifulSoup or lxml?
 -------------------------------------------------
 
 `BeautifulSoup`_ and `lxml`_ are libraries for parsing HTML and XML. Scrapy is
@@ -29,7 +29,7 @@ comparing `jinja2`_ to `Django`_.
 What Python versions does Scrapy support?
 -----------------------------------------
 
-Scrapy runs in Python 2.5, 2.6 and 2.6. But it's recommended you use Python 2.6
+Scrapy runs in Python 2.5, 2.6 and 2.7. But it's recommended you use Python 2.6
 or above, since the Python 2.5 standard library has a few bugs in their URL
 handling libraries. Some of these Python 2.5 bugs not only affect Scrapy but
 any user code, such as spiders. You can see a list of `Python 2.5 bugs that

From fe9febe2b10d787e00c7e48c0966f27082e82d6d Mon Sep 17 00:00:00 2001
From: Pablo Hoffman <pablo@pablohoffman.com>
Date: Wed, 23 Feb 2011 18:10:16 -0200
Subject: [PATCH 13/14] added --build-only option to deploy command, to build
 the egg without deploying it

---
 scrapy/commands/deploy.py | 30 ++++++++++++++++++++----------
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/scrapy/commands/deploy.py b/scrapy/commands/deploy.py
index 0dcf5037f..c22b9c710 100644
--- a/scrapy/commands/deploy.py
+++ b/scrapy/commands/deploy.py
@@ -57,6 +57,8 @@ class Command(ScrapyCommand):
             help="list available projects on TARGET")
         parser.add_option("--egg", metavar="FILE",
             help="use the given egg, instead of building it")
+        parser.add_option("--build-egg", metavar="FILE",
+            help="only build the egg, don't deploy it")
 
     def run(self, args, opts):
         try:
@@ -75,18 +77,26 @@ class Command(ScrapyCommand):
             projects = json.loads(f.read())['projects']
             print os.linesep.join(projects)
             return
-        target_name = _get_target_name(args)
-        target = _get_target(target_name)
-        project = _get_project(target, opts)
-        version = _get_version(target, opts)
+
         tmpdir = None
-        if opts.egg:
-            _log("Using egg: %s" % opts.egg)
-            egg = opts.egg
-        else:
-            _log("Building egg of %s-%s" % (project, version))
+
+        if opts.build_egg: # build egg only
             egg, tmpdir = _build_egg()
-        _upload_egg(target, egg, project, version)
+            _log("Writing egg to %s" % opts.build_egg)
+            shutil.copyfile(egg, opts.build_egg)
+        else: # buld egg and deploy
+            target_name = _get_target_name(args)
+            target = _get_target(target_name)
+            project = _get_project(target, opts)
+            version = _get_version(target, opts)
+            if opts.egg:
+                _log("Using egg: %s" % opts.egg)
+                egg = opts.egg
+            else:
+                _log("Building egg of %s-%s" % (project, version))
+                egg, tmpdir = _build_egg()
+            _upload_egg(target, egg, project, version)
+
         if tmpdir:
             shutil.rmtree(tmpdir)
 

From 32fa2add753306fd20da483c01532ac080488f14 Mon Sep 17 00:00:00 2001
From: Shane Evans <shane.evans@gmail.com>
Date: Thu, 24 Feb 2011 14:21:23 -0200
Subject: [PATCH 14/14] style fix to ibl contrib

---
 scrapy/contrib/ibl/extraction/regionextract.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scrapy/contrib/ibl/extraction/regionextract.py b/scrapy/contrib/ibl/extraction/regionextract.py
index db5eb2a38..c748a6a79 100644
--- a/scrapy/contrib/ibl/extraction/regionextract.py
+++ b/scrapy/contrib/ibl/extraction/regionextract.py
@@ -75,7 +75,7 @@ class BasicTypeExtractor(object):
         u'<div data-scrapy-annotate="{&quot;annotations&quot;: {&quot;content&quot;: &quot;name&quot;}}">x<b> xx</b></div>',\
         u'<div>a name<b> id-9</b></div>')
     >>> ex = BasicTypeExtractor(template.annotations[0])
-    >>> ex.extract(page, 0, 3, [LabelledRegion(*(1,2))])
+    >>> ex.extract(page, 0, 3, [LabelledRegion(1, 2)])
     [(u'name', u'a name')]
     """
 
@@ -395,7 +395,7 @@ class RecordExtractor(object):
                     s, p, e = similar_region(page.page_tokens, self.template_tokens, \
                               i, start, sindex)
                     if s > 0:
-                        similar_ignored_regions.append(LabelledRegion(*(p, e)))
+                        similar_ignored_regions.append(LabelledRegion(p, e))
                         start = e or start
                 extracted_data = first_region.extract(page, pindex, sindex, similar_ignored_regions)
                 if extracted_data: