Merge pull request #1384 from scrapy/tmp-py3

In-progress Python 3 port
2025-02-26 17:24:38 +00:00 · 2015-07-30 12:33:54 -03:00 · 2015-07-30 12:33:54 -03:00 · 8177387415
commit 8177387415
parent b6eb3404a2 dafcfd5be6
39 changed files with 387 additions and 1154 deletions
--- a/requirements.txt
+++ b/requirements.txt
@ -5,3 +5,4 @@ cssselect>=0.9
 w3lib>=1.8.0
 queuelib
 six>=1.5.2
+PyDispatcher>=2.0.5
--- a/scrapy/downloadermiddlewares/robotstxt.py
+++ b/scrapy/downloadermiddlewares/robotstxt.py
@ -65,5 +65,17 @@ class RobotsTxtMiddleware(object):

    def _parse_robots(self, response):
        rp = robotparser.RobotFileParser(response.url)
-        rp.parse(response.body.splitlines())
+        body = ''
+        if hasattr(response, 'body_as_unicode'):
+            body = response.body_as_unicode()
+        else: # last effort try
+            try:
+                body = response.body.decode('utf-8')
+            except UnicodeDecodeError:
+                # If we found garbage, disregard it:,
+                # but keep the lookup cached (in self._parsers)
+                # Running rp.parse() will set rp state from
+                # 'disallow all' to 'allow any'.
+                pass
+        rp.parse(body.splitlines())
        self._parsers[urlparse_cached(response).netloc] = rp
--- a/scrapy/http/request/init.py
+++ b/scrapy/http/request/init.py
@ -8,6 +8,7 @@ import six
 from w3lib.url import safe_url_string

 from scrapy.http.headers import Headers
+from scrapy.utils.python import to_native_str, to_bytes
 from scrapy.utils.trackref import object_ref
 from scrapy.utils.url import escape_ajax
 from scrapy.http.common import obsolete_setter
@ -46,15 +47,12 @@ class Request(object_ref):
        return self._url

    def _set_url(self, url):
-        if isinstance(url, str):
-            self._url = escape_ajax(safe_url_string(url))
-        elif isinstance(url, six.text_type):
-            if self.encoding is None:
-                raise TypeError('Cannot convert unicode url - %s has no encoding' %
-                                type(self).__name__)
-            self._set_url(url.encode(self.encoding))
-        else:
+        if not isinstance(url, six.string_types):
            raise TypeError('Request url must be str or unicode, got %s:' % type(url).__name__)
+
+        url = to_native_str(url, self.encoding)
+        self._url = escape_ajax(safe_url_string(url))
+
        if ':' not in self._url:
            raise ValueError('Missing scheme in request url: %s' % self._url)

@ -64,17 +62,10 @@ class Request(object_ref):
        return self._body

    def _set_body(self, body):
-        if isinstance(body, str):
-            self._body = body
-        elif isinstance(body, six.text_type):
-            if self.encoding is None:
-                raise TypeError('Cannot convert unicode body - %s has no encoding' %
-                                type(self).__name__)
-            self._body = body.encode(self.encoding)
-        elif body is None:
-            self._body = ''
+        if body is None:
+            self._body = b''
        else:
-            raise TypeError("Request body must either str or unicode. Got: '%s'" % type(body).__name__)
+            self._body = to_bytes(body, self.encoding)

    body = property(_get_body, obsolete_setter(_set_body, 'body'))

--- a/scrapy/http/request/form.py
+++ b/scrapy/http/request/form.py
@ -9,7 +9,7 @@ from six.moves.urllib.parse import urljoin, urlencode
 import lxml.html
 import six
 from scrapy.http.request import Request
-from scrapy.utils.python import to_bytes
+from scrapy.utils.python import to_bytes, is_listlike


 class FormRequest(Request):
@ -25,7 +25,7 @@ class FormRequest(Request):
            items = formdata.items() if isinstance(formdata, dict) else formdata
            querystr = _urlencode(items, self.encoding)
            if self.method == 'POST':
-                self.headers.setdefault('Content-Type', 'application/x-www-form-urlencoded')
+                self.headers.setdefault(b'Content-Type', b'application/x-www-form-urlencoded')
                self._set_body(querystr)
            else:
                self._set_url(self.url + ('&' if '?' in self.url else '?') + querystr)
@ -50,7 +50,7 @@ def _get_form_url(form, url):
 def _urlencode(seq, enc):
    values = [(to_bytes(k, enc), to_bytes(v, enc))
              for k, vs in seq
-              for v in (vs if hasattr(vs, '__iter__') else [vs])]
+              for v in (vs if is_listlike(vs) else [vs])]
    return urlencode(values, doseq=1)


--- a/scrapy/http/response/init.py
+++ b/scrapy/http/response/init.py
@ -4,9 +4,6 @@ responses in Scrapy.

 See documentation in docs/topics/request-response.rst
 """
-
-import copy
-
 from six.moves.urllib.parse import urljoin

 from scrapy.http.headers import Headers
@ -15,7 +12,7 @@ from scrapy.http.common import obsolete_setter

 class Response(object_ref):

-    def __init__(self, url, status=200, headers=None, body='', flags=None, request=None):
+    def __init__(self, url, status=200, headers=None, body=b'', flags=None, request=None):
        self.headers = Headers(headers or {})
        self.status = int(status)
        self._set_body(body)
@ -28,8 +25,10 @@ class Response(object_ref):
        try:
            return self.request.meta
        except AttributeError:
-            raise AttributeError("Response.meta not available, this response " \
-                "is not tied to any request")
+            raise AttributeError(
+                "Response.meta not available, this response "
+                "is not tied to any request"
+            )

    def _get_url(self):
        return self._url
@ -38,7 +37,7 @@ class Response(object_ref):
        if isinstance(url, str):
            self._url = url
        else:
-            raise TypeError('%s url must be str, got %s:' % (type(self).__name__, \
+            raise TypeError('%s url must be str, got %s:' % (type(self).__name__,
                type(url).__name__))

    url = property(_get_url, obsolete_setter(_set_url, 'url'))
@ -47,16 +46,15 @@ class Response(object_ref):
        return self._body

    def _set_body(self, body):
-        if isinstance(body, str):
-            self._body = body
-        elif isinstance(body, unicode):
-            raise TypeError("Cannot assign a unicode body to a raw Response. " \
-                "Use TextResponse, HtmlResponse, etc")
-        elif body is None:
-            self._body = ''
+        if body is None:
+            self._body = b''
+        elif not isinstance(body, bytes):
+            raise TypeError(
+                "Response body must be bytes. "
+                "If you want to pass unicode body use TextResponse "
+                "or HtmlResponse.")
        else:
-            raise TypeError("Response body must either be str or unicode. Got: '%s'" \
-                % type(body).__name__)
+            self._body = body

    body = property(_get_body, obsolete_setter(_set_body, 'body'))

--- a/scrapy/http/response/text.py
+++ b/scrapy/http/response/text.py
@ -5,13 +5,14 @@ discovering (through HTTP headers) to base Response class.
 See documentation in docs/topics/request-response.rst
 """

+import six
 from six.moves.urllib.parse import urljoin

 from w3lib.encoding import html_to_unicode, resolve_encoding, \
    html_body_declared_encoding, http_content_type_encoding
 from scrapy.http.response import Response
 from scrapy.utils.response import get_base_url
-from scrapy.utils.python import memoizemethod_noargs
+from scrapy.utils.python import memoizemethod_noargs, to_native_str


 class TextResponse(Response):
@ -26,18 +27,18 @@ class TextResponse(Response):
        super(TextResponse, self).__init__(*args, **kwargs)

    def _set_url(self, url):
-        if isinstance(url, unicode):
-            if self.encoding is None:
-                raise TypeError('Cannot convert unicode url - %s has no encoding' %
-                    type(self).__name__)
-            self._url = url.encode(self.encoding)
+        if isinstance(url, six.text_type):
+            if six.PY2 and self.encoding is None:
+                raise TypeError("Cannot convert unicode url - %s "
+                                "has no encoding" % type(self).__name__)
+            self._url = to_native_str(url, self.encoding)
        else:
            super(TextResponse, self)._set_url(url)

    def _set_body(self, body):
-        self._body = ''
-        if isinstance(body, unicode):
-            if self.encoding is None:
+        self._body = b''  # used by encoding detection
+        if isinstance(body, six.text_type):
+            if self._encoding is None:
                raise TypeError('Cannot convert unicode body - %s has no encoding' %
                    type(self).__name__)
            self._body = body.encode(self._encoding)
@ -73,14 +74,14 @@ class TextResponse(Response):

    @memoizemethod_noargs
    def _headers_encoding(self):
-        content_type = self.headers.get('Content-Type')
-        return http_content_type_encoding(content_type)
+        content_type = self.headers.get(b'Content-Type', b'')
+        return http_content_type_encoding(to_native_str(content_type))

    def _body_inferred_encoding(self):
        if self._cached_benc is None:
-            content_type = self.headers.get('Content-Type')
-            benc, ubody = html_to_unicode(content_type, self.body, \
-                    auto_detect_fun=self._auto_detect_fun, \
+            content_type = to_native_str(self.headers.get(b'Content-Type', b''))
+            benc, ubody = html_to_unicode(content_type, self.body,
+                    auto_detect_fun=self._auto_detect_fun,
                    default_encoding=self._DEFAULT_ENCODING)
            self._cached_benc = benc
            self._cached_ubody = ubody
--- a/scrapy/pipelines/files.py
+++ b/scrapy/pipelines/files.py
@ -7,9 +7,9 @@ See documentation in topics/media-pipeline.rst
 import hashlib
 import os
 import os.path
-import rfc822
 import time
 import logging
+from email.utils import parsedate_tz, mktime_tz
 from six.moves.urllib.parse import urlparse
 from collections import defaultdict
 import six
@ -91,8 +91,8 @@ class S3FilesStore(object):
        def _onsuccess(boto_key):
            checksum = boto_key.etag.strip('"')
            last_modified = boto_key.last_modified
-            modified_tuple = rfc822.parsedate_tz(last_modified)
-            modified_stamp = int(rfc822.mktime_tz(modified_tuple))
+            modified_tuple = parsedate_tz(last_modified)
+            modified_stamp = int(mktime_tz(modified_tuple))
            return {'checksum': checksum, 'last_modified': modified_stamp}

        return self._get_boto_key(path).addCallback(_onsuccess)
--- a/scrapy/responsetypes.py
+++ b/scrapy/responsetypes.py
@ -92,9 +92,9 @@ class ResponseTypes(object):
        chunk = body[:5000]
        if isbinarytext(chunk):
            return self.from_mimetype('application/octet-stream')
-        elif "<html>" in chunk.lower():
+        elif b"<html>" in chunk.lower():
            return self.from_mimetype('text/html')
-        elif "<?xml" in chunk.lower():
+        elif b"<?xml" in chunk.lower():
            return self.from_mimetype('text/xml')
        else:
            return self.from_mimetype('text')
--- a/scrapy/selector/unified.py
+++ b/scrapy/selector/unified.py
@ -121,7 +121,7 @@ class Selector(object_ref):
        try:
            return etree.tostring(self._root,
                                  method=self._tostring_method,
-                                  encoding=unicode,
+                                  encoding="unicode",
                                  with_tail=False)
        except (AttributeError, TypeError):
            if self._root is True:
@ -129,7 +129,7 @@ class Selector(object_ref):
            elif self._root is False:
                return u'0'
            else:
-                return unicode(self._root)
+                return six.text_type(self._root)

    def register_namespace(self, prefix, uri):
        if self.namespaces is None:
--- a/scrapy/signalmanager.py
+++ b/scrapy/signalmanager.py
@ -1,5 +1,5 @@
 from __future__ import absolute_import
-from scrapy.xlib.pydispatch import dispatcher
+from pydispatch import dispatcher
 from scrapy.utils import signal as _signal


--- a/scrapy/utils/defer.py
+++ b/scrapy/utils/defer.py
@ -61,7 +61,7 @@ def parallel(iterable, count, callable, *args, **named):
    """
    coop = task.Cooperator()
    work = (callable(elem, *args, **named) for elem in iterable)
-    return defer.DeferredList([coop.coiterate(work) for i in xrange(count)])
+    return defer.DeferredList([coop.coiterate(work) for i in range(count)])

 def process_chain(callbacks, input, *a, **kw):
    """Return a Deferred built by chaining the given callbacks"""
@ -97,7 +97,7 @@ def iter_errback(iterable, errback, *a, **kw):
    iterating it.
    """
    it = iter(iterable)
-    while 1:
+    while True:
        try:
            yield next(it)
        except StopIteration:
--- a/scrapy/utils/misc.py
+++ b/scrapy/utils/misc.py
@ -7,7 +7,7 @@ from pkgutil import iter_modules
 import six
 from w3lib.html import replace_entities

-from scrapy.utils.python import flatten
+from scrapy.utils.python import flatten, to_unicode
 from scrapy.item import BaseItem


@ -81,7 +81,7 @@ def extract_regex(regex, text, encoding='utf-8'):
    * if the regex doesn't contain any group the entire regex matching is returned
    """

-    if isinstance(regex, basestring):
+    if isinstance(regex, six.string_types):
        regex = re.compile(regex, re.UNICODE)

    try:
@ -90,10 +90,11 @@ def extract_regex(regex, text, encoding='utf-8'):
        strings = regex.findall(text)    # full regex or numbered groups
    strings = flatten(strings)

-    if isinstance(text, unicode):
+    if isinstance(text, six.text_type):
        return [replace_entities(s, keep=['lt', 'amp']) for s in strings]
    else:
-        return [replace_entities(unicode(s, encoding), keep=['lt', 'amp']) for s in strings]
+        return [replace_entities(to_unicode(s, encoding), keep=['lt', 'amp'])
+                for s in strings]


 def md5sum(file):
@ -105,7 +106,7 @@ def md5sum(file):
    '784406af91dd5a54fbb9c84c2236595a'
    """
    m = hashlib.md5()
-    while 1:
+    while True:
        d = file.read(8096)
        if not d:
            break
--- a/scrapy/utils/python.py
+++ b/scrapy/utils/python.py
@ -120,6 +120,15 @@ def to_bytes(text, encoding=None, errors='strict'):
    return text.encode(encoding, errors)


+def to_native_str(text, encoding=None, errors='strict'):
+    """ Return str representation of `text`
+    (bytes in Python 2.x and unicode in Python 3.x). """
+    if six.PY2:
+        return to_bytes(text, encoding, errors)
+    else:
+        return to_unicode(text, encoding, errors)
+
+
 def re_rsearch(pattern, text, chunk_size=1024):
    """
    This function does a reverse search in a text using a regular expression
--- a/scrapy/utils/request.py
+++ b/scrapy/utils/request.py
@ -10,6 +10,7 @@ from six.moves.urllib.parse import urlunparse

 from twisted.internet.defer import Deferred
 from w3lib.http import basic_auth_header
+from scrapy.utils.python import to_bytes, to_native_str

 from scrapy.utils.url import canonicalize_url
 from scrapy.utils.httpobj import urlparse_cached
@ -44,13 +45,14 @@ def request_fingerprint(request, include_headers=None):

    """
    if include_headers:
-        include_headers = tuple([h.lower() for h in sorted(include_headers)])
+        include_headers = tuple([to_bytes(h.lower())
+                                 for h in sorted(include_headers)])
    cache = _fingerprint_cache.setdefault(request, {})
    if include_headers not in cache:
        fp = hashlib.sha1()
-        fp.update(request.method)
-        fp.update(canonicalize_url(request.url))
-        fp.update(request.body or '')
+        fp.update(to_bytes(request.method))
+        fp.update(to_bytes(canonicalize_url(request.url)))
+        fp.update(request.body or b'')
        if include_headers:
            for hdr in include_headers:
                if hdr in request.headers:
@ -60,12 +62,14 @@ def request_fingerprint(request, include_headers=None):
        cache[include_headers] = fp.hexdigest()
    return cache[include_headers]

+
 def request_authenticate(request, username, password):
    """Autenticate the given request (in place) using the HTTP basic access
    authentication mechanism (RFC 2617) and the given username and password
    """
    request.headers['Authorization'] = basic_auth_header(username, password)

+
 def request_httprepr(request):
    """Return the raw HTTP representation (as string) of the given request.
    This is provided only for reference since it's not the actual stream of
@ -74,11 +78,11 @@ def request_httprepr(request):
    """
    parsed = urlparse_cached(request)
    path = urlunparse(('', '', parsed.path or '/', parsed.params, parsed.query, ''))
-    s  = "%s %s HTTP/1.1\r\n" % (request.method, path)
-    s += "Host: %s\r\n" % parsed.hostname
+    s = to_bytes(request.method) + b" " + to_bytes(path) + b" HTTP/1.1\r\n"
+    s += b"Host: " + to_bytes(parsed.hostname) + b"\r\n"
    if request.headers:
-        s += request.headers.to_string() + "\r\n"
-    s += "\r\n"
+        s += request.headers.to_string() + b"\r\n"
+    s += b"\r\n"
    s += request.body
    return s

--- a/scrapy/utils/signal.py
+++ b/scrapy/utils/signal.py
@ -5,19 +5,23 @@ import logging
 from twisted.internet.defer import maybeDeferred, DeferredList, Deferred
 from twisted.python.failure import Failure

-from scrapy.xlib.pydispatch.dispatcher import Any, Anonymous, liveReceivers, \
+from pydispatch.dispatcher import Any, Anonymous, liveReceivers, \
    getAllReceivers, disconnect
-from scrapy.xlib.pydispatch.robustapply import robustApply
+from pydispatch.robustapply import robustApply
 from scrapy.utils.log import failure_to_exc_info

 logger = logging.getLogger(__name__)


+class _IgnoredException(Exception):
+    pass
+
+
 def send_catch_log(signal=Any, sender=Anonymous, *arguments, **named):
    """Like pydispatcher.robust.sendRobust but it also logs errors and returns
    Failures instead of exceptions.
    """
-    dont_log = named.pop('dont_log', None)
+    dont_log = named.pop('dont_log', _IgnoredException)
    spider = named.get('spider', None)
    responses = []
    for receiver in liveReceivers(getAllReceivers(sender, signal)):
@ -39,6 +43,7 @@ def send_catch_log(signal=Any, sender=Anonymous, *arguments, **named):
        responses.append((receiver, result))
    return responses

+
 def send_catch_log_deferred(signal=Any, sender=Anonymous, *arguments, **named):
    """Like send_catch_log but supports returning deferreds on signal handlers.
    Returns a deferred that gets fired once all signal handlers deferreds were
@ -65,6 +70,7 @@ def send_catch_log_deferred(signal=Any, sender=Anonymous, *arguments, **named):
    d.addCallback(lambda out: [x[1] for x in out])
    return d

+
 def disconnect_all(signal=Any, sender=Any):
    """Disconnect all signal handlers. Useful for cleaning up after running
    tests
--- a/scrapy/utils/url.py
+++ b/scrapy/utils/url.py
@ -10,19 +10,20 @@ from six.moves.urllib.parse import (ParseResult, urlunparse, urldefrag,
                                    urlparse, parse_qsl, urlencode,
                                    unquote)

-# scrapy.utils.url was moved to w3lib.url and import * ensures this move doesn't break old code
+# scrapy.utils.url was moved to w3lib.url and import * ensures this
+# move doesn't break old code
 from w3lib.url import *
-from scrapy.utils.python import to_bytes
+from w3lib.url import _safe_chars
+from scrapy.utils.python import to_native_str


 def url_is_from_any_domain(url, domains):
    """Return True if the url belongs to any of the given domains"""
    host = parse_url(url).netloc.lower()
-
-    if host:
-        return any(((host == d.lower()) or (host.endswith('.%s' % d.lower())) for d in domains))
-    else:
+    if not host:
        return False
+    domains = [d.lower() for d in domains]
+    return any((host == d) or (host.endswith('.%s' % d)) for d in domains)


 def url_is_from_spider(url, spider):
@ -36,7 +37,7 @@ def url_has_any_extension(url, extensions):


 def canonicalize_url(url, keep_blank_values=True, keep_fragments=False,
-        encoding=None):
+                     encoding=None):
    """Canonicalize the given url by applying the following procedures:

    - sort query arguments, first by key, then by value
@ -57,6 +58,11 @@ def canonicalize_url(url, keep_blank_values=True, keep_fragments=False,
    keyvals = parse_qsl(query, keep_blank_values)
    keyvals.sort()
    query = urlencode(keyvals)
+
+    # XXX: copied from w3lib.url.safe_url_string to add encoding argument
+    # path = to_native_str(path, encoding)
+    # path = moves.urllib.parse.quote(path, _safe_chars, encoding='latin1') or '/'
+
    path = safe_url_string(_unquotepath(path)) or '/'
    fragment = '' if not keep_fragments else fragment
    return urlunparse((scheme, netloc.lower(), path, params, query, fragment))
@ -74,7 +80,7 @@ def parse_url(url, encoding=None):
    """
    if isinstance(url, ParseResult):
        return url
-    return urlparse(to_bytes(url, encoding))
+    return urlparse(to_native_str(url, encoding))


 def escape_ajax(url):
--- a/scrapy/xlib/pydispatch/init.py
+++ b/scrapy/xlib/pydispatch/init.py
@ -1,6 +0,0 @@
-"""Multi-consumer multi-producer dispatching mechanism
-"""
-__version__ = "2.0.0"
-__author__ = "Patrick K. O'Brien"
-__license__ = "BSD-style, see license.txt for details"
-
--- a/scrapy/xlib/pydispatch/dispatcher.py
+++ b/scrapy/xlib/pydispatch/dispatcher.py
@ -1,511 +0,0 @@
-"""Multiple-producer-multiple-consumer signal-dispatching
-
-dispatcher is the core of the PyDispatcher system,
-providing the primary API and the core logic for the
-system.
-
-Module attributes of note:
-
-    Any -- Singleton used to signal either "Any Sender" or
-        "Any Signal".  See documentation of the _Any class.
-    Anonymous -- Singleton used to signal "Anonymous Sender"
-        See documentation of the _Anonymous class.
-
-Internal attributes:
-    WEAKREF_TYPES -- tuple of types/classes which represent
-        weak references to receivers, and thus must be de-
-        referenced on retrieval to retrieve the callable
-        object
-    connections -- { senderkey (id) : { signal : [receivers...]}}
-    senders -- { senderkey (id) : weakref(sender) }
-        used for cleaning up sender references on sender
-        deletion
-    sendersBack -- { receiverkey (id) : [senderkey (id)...] }
-        used for cleaning up receiver references on receiver
-        deletion, (considerably speeds up the cleanup process
-        vs. the original code.)
-"""
-from __future__ import generators
-import types, weakref, six
-from scrapy.xlib.pydispatch import saferef, robustapply, errors
-
-__author__ = "Patrick K. O'Brien <pobrien@orbtech.com>"
-__cvsid__ = "$Id: dispatcher.py,v 1.1.1.1 2006/07/07 15:59:38 mcfletch Exp $"
-__version__ = "$Revision: 1.1.1.1 $"[11:-2]
-
-
-class _Parameter:
-    """Used to represent default parameter values."""
-
-    def __repr__(self):
-        return self.__class__.__name__
-
-
-class _Any(_Parameter):
-    """Singleton used to signal either "Any Sender" or "Any Signal"
-
-    The Any object can be used with connect, disconnect,
-    send, or sendExact to signal that the parameter given
-    Any should react to all senders/signals, not just
-    a particular sender/signal.
-    """
-
-
-Any = _Any()
-
-
-class _Anonymous(_Parameter):
-    """Singleton used to signal "Anonymous Sender"
-
-    The Anonymous object is used to signal that the sender
-    of a message is not specified (as distinct from being
-    "any sender").  Registering callbacks for Anonymous
-    will only receive messages sent without senders.  Sending
-    with anonymous will only send messages to those receivers
-    registered for Any or Anonymous.
-
-    Note:
-        The default sender for connect is Any, while the
-        default sender for send is Anonymous.  This has
-        the effect that if you do not specify any senders
-        in either function then all messages are routed
-        as though there was a single sender (Anonymous)
-        being used everywhere.
-    """
-
-
-Anonymous = _Anonymous()
-
-WEAKREF_TYPES = (weakref.ReferenceType, saferef.BoundMethodWeakref)
-
-connections = {}
-senders = {}
-sendersBack = {}
-
-
-def connect(receiver, signal=Any, sender=Any, weak=True):
-    """Connect receiver to sender for signal
-
-    receiver -- a callable Python object which is to receive
-        messages/signals/events.  Receivers must be hashable
-        objects.
-
-        if weak is True, then receiver must be weak-referencable
-        (more precisely saferef.safeRef() must be able to create
-        a reference to the receiver).
-
-        Receivers are fairly flexible in their specification,
-        as the machinery in the robustApply module takes care
-        of most of the details regarding figuring out appropriate
-        subsets of the sent arguments to apply to a given
-        receiver.
-
-        Note:
-            if receiver is itself a weak reference (a callable),
-            it will be de-referenced by the system's machinery,
-            so *generally* weak references are not suitable as
-            receivers, though some use might be found for the
-            facility whereby a higher-level library passes in
-            pre-weakrefed receiver references.
-
-    signal -- the signal to which the receiver should respond
-
-        if Any, receiver will receive any signal from the
-        indicated sender (which might also be Any, but is not
-        necessarily Any).
-
-        Otherwise must be a hashable Python object other than
-        None (DispatcherError raised on None).
-
-    sender -- the sender to which the receiver should respond
-
-        if Any, receiver will receive the indicated signals
-        from any sender.
-
-        if Anonymous, receiver will only receive indicated
-        signals from send/sendExact which do not specify a
-        sender, or specify Anonymous explicitly as the sender.
-
-        Otherwise can be any python object.
-
-    weak -- whether to use weak references to the receiver
-        By default, the module will attempt to use weak
-        references to the receiver objects.  If this parameter
-        is false, then strong references will be used.
-
-    returns None, may raise DispatcherTypeError
-    """
-    if signal is None:
-        raise errors.DispatcherTypeError(
-            'Signal cannot be None (receiver=%r sender=%r)' % (
-                receiver, sender)
-        )
-    if weak:
-        receiver = saferef.safeRef(receiver, onDelete=_removeReceiver)
-    senderkey = id(sender)
-    if senderkey in connections:
-        signals = connections[senderkey]
-    else:
-        connections[senderkey] = signals = {}
-    # Keep track of senders for cleanup.
-    # Is Anonymous something we want to clean up?
-    if sender not in (None, Anonymous, Any):
-        def remove(object, senderkey=senderkey):
-            _removeSender(senderkey=senderkey)
-
-        # Skip objects that can not be weakly referenced, which means
-        # they won't be automatically cleaned up, but that's too bad.
-        try:
-            weakSender = weakref.ref(sender, remove)
-            senders[senderkey] = weakSender
-        except:
-            pass
-
-    receiverID = id(receiver)
-    # get current set, remove any current references to
-    # this receiver in the set, including back-references
-    if signal in signals:
-        receivers = signals[signal]
-        _removeOldBackRefs(senderkey, signal, receiver, receivers)
-    else:
-        receivers = signals[signal] = []
-    try:
-        current = sendersBack.get(receiverID)
-        if current is None:
-            sendersBack[receiverID] = current = []
-        if senderkey not in current:
-            current.append(senderkey)
-    except:
-        pass
-
-    receivers.append(receiver)
-
-
-def disconnect(receiver, signal=Any, sender=Any, weak=True):
-    """Disconnect receiver from sender for signal
-
-    receiver -- the registered receiver to disconnect
-    signal -- the registered signal to disconnect
-    sender -- the registered sender to disconnect
-    weak -- the weakref state to disconnect
-
-    disconnect reverses the process of connect,
-    the semantics for the individual elements are
-    logically equivalent to a tuple of
-    (receiver, signal, sender, weak) used as a key
-    to be deleted from the internal routing tables.
-    (The actual process is slightly more complex
-    but the semantics are basically the same).
-
-    Note:
-        Using disconnect is not required to cleanup
-        routing when an object is deleted, the framework
-        will remove routes for deleted objects
-        automatically.  It's only necessary to disconnect
-        if you want to stop routing to a live object.
-
-    returns None, may raise DispatcherTypeError or
-        DispatcherKeyError
-    """
-    if signal is None:
-        raise errors.DispatcherTypeError(
-            'Signal cannot be None (receiver=%r sender=%r)' % (
-                receiver, sender)
-        )
-    if weak: receiver = saferef.safeRef(receiver)
-    senderkey = id(sender)
-    try:
-        signals = connections[senderkey]
-        receivers = signals[signal]
-    except KeyError:
-        raise errors.DispatcherKeyError(
-            """No receivers found for signal %r from sender %r""" % (
-                signal,
-                sender
-            )
-        )
-    try:
-        # also removes from receivers
-        _removeOldBackRefs(senderkey, signal, receiver, receivers)
-    except ValueError:
-        raise errors.DispatcherKeyError(
-            """No connection to receiver %s for signal %s from sender %s""" % (
-                receiver,
-                signal,
-                sender
-            )
-        )
-    _cleanupConnections(senderkey, signal)
-
-
-def getReceivers(sender=Any, signal=Any):
-    """Get list of receivers from global tables
-
-    This utility function allows you to retrieve the
-    raw list of receivers from the connections table
-    for the given sender and signal pair.
-
-    Note:
-        there is no guarantee that this is the actual list
-        stored in the connections table, so the value
-        should be treated as a simple iterable/truth value
-        rather than, for instance a list to which you
-        might append new records.
-
-    Normally you would use liveReceivers( getReceivers( ...))
-    to retrieve the actual receiver objects as an iterable
-    object.
-    """
-    try:
-        return connections[id(sender)][signal]
-    except KeyError:
-        return []
-
-
-def liveReceivers(receivers):
-    """Filter sequence of receivers to get resolved, live receivers
-
-    This is a generator which will iterate over
-    the passed sequence, checking for weak references
-    and resolving them, then returning all live
-    receivers.
-    """
-    for receiver in receivers:
-        if isinstance(receiver, WEAKREF_TYPES):
-            # Dereference the weak reference.
-            receiver = receiver()
-            if receiver is not None:
-                yield receiver
-        else:
-            yield receiver
-
-
-def getAllReceivers(sender=Any, signal=Any):
-    """Get list of all receivers from global tables
-
-    This gets all receivers which should receive
-    the given signal from sender, each receiver should
-    be produced only once by the resulting generator
-    """
-    receivers = {}
-    for set in (
-            # Get receivers that receive *this* signal from *this* sender.
-            getReceivers(sender, signal),
-            # Add receivers that receive *any* signal from *this* sender.
-            getReceivers(sender, Any),
-            # Add receivers that receive *this* signal from *any* sender.
-            getReceivers(Any, signal),
-            # Add receivers that receive *any* signal from *any* sender.
-            getReceivers(Any, Any),
-    ):
-        for receiver in set:
-            if receiver:  # filter out dead instance-method weakrefs
-                try:
-                    if receiver not in receivers:
-                        receivers[receiver] = 1
-                        yield receiver
-                except TypeError:
-                    # dead weakrefs raise TypeError on hash...
-                    pass
-
-
-def send(signal=Any, sender=Anonymous, *arguments, **named):
-    """Send signal from sender to all connected receivers.
-
-    signal -- (hashable) signal value, see connect for details
-
-    sender -- the sender of the signal
-
-        if Any, only receivers registered for Any will receive
-        the message.
-
-        if Anonymous, only receivers registered to receive
-        messages from Anonymous or Any will receive the message
-
-        Otherwise can be any python object (normally one
-        registered with a connect if you actually want
-        something to occur).
-
-    arguments -- positional arguments which will be passed to
-        *all* receivers. Note that this may raise TypeErrors
-        if the receivers do not allow the particular arguments.
-        Note also that arguments are applied before named
-        arguments, so they should be used with care.
-
-    named -- named arguments which will be filtered according
-        to the parameters of the receivers to only provide those
-        acceptable to the receiver.
-
-    Return a list of tuple pairs [(receiver, response), ... ]
-
-    if any receiver raises an error, the error propagates back
-    through send, terminating the dispatch loop, so it is quite
-    possible to not have all receivers called if a raises an
-    error.
-    """
-    # Call each receiver with whatever arguments it can accept.
-    # Return a list of tuple pairs [(receiver, response), ... ].
-    responses = []
-    for receiver in liveReceivers(getAllReceivers(sender, signal)):
-        response = robustapply.robustApply(
-            receiver,
-            signal=signal,
-            sender=sender,
-            *arguments,
-            **named
-        )
-        responses.append((receiver, response))
-    return responses
-
-
-def sendExact(signal=Any, sender=Anonymous, *arguments, **named):
-    """Send signal only to those receivers registered for exact message
-
-    sendExact allows for avoiding Any/Anonymous registered
-    handlers, sending only to those receivers explicitly
-    registered for a particular signal on a particular
-    sender.
-    """
-    responses = []
-    for receiver in liveReceivers(getReceivers(sender, signal)):
-        response = robustapply.robustApply(
-            receiver,
-            signal=signal,
-            sender=sender,
-            *arguments,
-            **named
-        )
-        responses.append((receiver, response))
-    return responses
-
-
-def _removeReceiver(receiver):
-    """Remove receiver from connections."""
-    if not sendersBack:
-        # During module cleanup the mapping will be replaced with None
-        return False
-    backKey = id(receiver)
-    try:
-        backSet = sendersBack.pop(backKey)
-    except KeyError as err:
-        return False
-    else:
-        for senderkey in backSet:
-            try:
-                signals = connections[senderkey].keys()
-            except KeyError as err:
-                pass
-            else:
-                for signal in signals:
-                    try:
-                        receivers = connections[senderkey][signal]
-                    except KeyError:
-                        pass
-                    else:
-                        try:
-                            receivers.remove(receiver)
-                        except Exception as err:
-                            pass
-                    _cleanupConnections(senderkey, signal)
-
-
-def _cleanupConnections(senderkey, signal):
-    """Delete any empty signals for senderkey. Delete senderkey if empty."""
-    try:
-        receivers = connections[senderkey][signal]
-    except:
-        pass
-    else:
-        if not receivers:
-            # No more connected receivers. Therefore, remove the signal.
-            try:
-                signals = connections[senderkey]
-            except KeyError:
-                pass
-            else:
-                del signals[signal]
-                if not signals:
-                    # No more signal connections. Therefore, remove the sender.
-                    _removeSender(senderkey)
-
-
-def _removeSender(senderkey):
-    """Remove senderkey from connections."""
-    _removeBackrefs(senderkey)
-    try:
-        del connections[senderkey]
-    except KeyError:
-        pass
-    # Senderkey will only be in senders dictionary if sender
-    # could be weakly referenced.
-    try:
-        del senders[senderkey]
-    except:
-        pass
-
-
-def _removeBackrefs(senderkey):
-    """Remove all back-references to this senderkey"""
-    try:
-        signals = connections[senderkey]
-    except KeyError:
-        signals = None
-    else:
-        items = signals.items()
-
-        def allReceivers():
-            for signal, set in items:
-                for item in set:
-                    yield item
-
-        for receiver in allReceivers():
-            _killBackref(receiver, senderkey)
-
-
-def _removeOldBackRefs(senderkey, signal, receiver, receivers):
-    """Kill old sendersBack references from receiver
-
-    This guards against multiple registration of the same
-    receiver for a given signal and sender leaking memory
-    as old back reference records build up.
-
-    Also removes old receiver instance from receivers
-    """
-    try:
-        index = receivers.index(receiver)
-        # need to scan back references here and remove senderkey
-    except ValueError:
-        return False
-    else:
-        oldReceiver = receivers[index]
-        del receivers[index]
-        found = 0
-        signals = connections.get(signal)
-        if signals is not None:
-            for sig, recs in six.iteritems(connections.get(signal, {})):
-                if sig != signal:
-                    for rec in recs:
-                        if rec is oldReceiver:
-                            found = 1
-                            break
-        if not found:
-            _killBackref(oldReceiver, senderkey)
-            return True
-        return False
-
-
-def _killBackref(receiver, senderkey):
-    """Do the actual removal of back reference from receiver to senderkey"""
-    receiverkey = id(receiver)
-    set = sendersBack.get(receiverkey, ())
-    while senderkey in set:
-        try:
-            set.remove(senderkey)
-        except:
-            break
-    if not set:
-        try:
-            del sendersBack[receiverkey]
-        except KeyError:
-            pass
-    return True
--- a/scrapy/xlib/pydispatch/errors.py
+++ b/scrapy/xlib/pydispatch/errors.py
@ -1,15 +0,0 @@
-"""Error types for dispatcher mechanism
-"""
-
-
-class DispatcherError(Exception):
-    """Base class for all Dispatcher errors"""
-
-
-class DispatcherKeyError(KeyError, DispatcherError):
-    """Error raised when unknown (sender,signal) set specified"""
-
-
-class DispatcherTypeError(TypeError, DispatcherError):
-    """Error raised when inappropriate signal-type specified (None)"""
-
--- a/scrapy/xlib/pydispatch/license.txt
+++ b/scrapy/xlib/pydispatch/license.txt
@ -1,34 +0,0 @@
-PyDispatcher License
-
-	Copyright (c) 2001-2006, Patrick K. O'Brien and Contributors
-	All rights reserved.
-	
-	Redistribution and use in source and binary forms, with or without
-	modification, are permitted provided that the following conditions
-	are met:
-	
-		Redistributions of source code must retain the above copyright
-		notice, this list of conditions and the following disclaimer.
-	
-		Redistributions in binary form must reproduce the above
-		copyright notice, this list of conditions and the following
-		disclaimer in the documentation and/or other materials
-		provided with the distribution.
-	
-		The name of Patrick K. O'Brien, or the name of any Contributor,
-		may not be used to endorse or promote products derived from this 
-		software without specific prior written permission.
-	
-	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-	``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-	LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
-	FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
-	COPYRIGHT HOLDERS AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
-	INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-	(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-	SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-	HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
-	STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-	ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
-	OF THE POSSIBILITY OF SUCH DAMAGE. 
-
--- a/scrapy/xlib/pydispatch/robust.py
+++ b/scrapy/xlib/pydispatch/robust.py
@ -1,54 +0,0 @@
-"""Module implementing error-catching version of send (sendRobust)"""
-from scrapy.xlib.pydispatch.dispatcher import Any, Anonymous, liveReceivers, getAllReceivers
-from scrapy.xlib.pydispatch.robustapply import robustApply
-
-
-def sendRobust(signal=Any, sender=Anonymous, *arguments, **named):
-    """Send signal from sender to all connected receivers catching errors
-
-    signal -- (hashable) signal value, see connect for details
-
-    sender -- the sender of the signal
-
-        if Any, only receivers registered for Any will receive
-        the message.
-
-        if Anonymous, only receivers registered to receive
-        messages from Anonymous or Any will receive the message
-
-        Otherwise can be any python object (normally one
-        registered with a connect if you actually want
-        something to occur).
-
-    arguments -- positional arguments which will be passed to
-        *all* receivers. Note that this may raise TypeErrors
-        if the receivers do not allow the particular arguments.
-        Note also that arguments are applied before named
-        arguments, so they should be used with care.
-
-    named -- named arguments which will be filtered according
-        to the parameters of the receivers to only provide those
-        acceptable to the receiver.
-
-    Return a list of tuple pairs [(receiver, response), ... ]
-
-    if any receiver raises an error (specifically any subclass of Exception),
-    the error instance is returned as the result for that receiver.
-    """
-    # Call each receiver with whatever arguments it can accept.
-    # Return a list of tuple pairs [(receiver, response), ... ].
-    responses = []
-    for receiver in liveReceivers(getAllReceivers(sender, signal)):
-        try:
-            response = robustApply(
-                receiver,
-                signal=signal,
-                sender=sender,
-                *arguments,
-                **named
-            )
-        except Exception as err:
-            responses.append((receiver, err))
-        else:
-            responses.append((receiver, response))
-    return responses
--- a/scrapy/xlib/pydispatch/robustapply.py
+++ b/scrapy/xlib/pydispatch/robustapply.py
@ -1,58 +0,0 @@
-"""Robust apply mechanism
-
-Provides a function "call", which can sort out
-what arguments a given callable object can take,
-and subset the given arguments to match only
-those which are acceptable.
-"""
-import inspect
-
-
-def function(receiver):
-    """Get function-like callable object for given receiver
-
-    returns (function_or_method, codeObject, fromMethod)
-
-    If fromMethod is true, then the callable already
-    has its first argument bound
-    """
-    if inspect.isclass(receiver) and hasattr(receiver, '__call__'):
-        # receiver is a class instance; assume it is callable.
-        # Reassign receiver to the actual method that will be called.
-        if hasattr(receiver.__call__, 'im_func') or \
-                hasattr(receiver.__call__, 'im_code'):
-            receiver = receiver.__call__
-
-    if hasattr(receiver, 'im_func'):
-        # an instance-method...
-        return receiver, receiver.im_func.func_code, 1
-    elif not hasattr(receiver, 'func_code'):
-        raise ValueError(
-            'unknown receiver type %s %s' % (receiver, type(receiver)))
-
-    return receiver, receiver.func_code, 0
-
-
-def robustApply(receiver, *arguments, **named):
-    """Call receiver with arguments and an appropriate subset of named
-    """
-    receiver, codeObject, startIndex = function(receiver)
-    acceptable = codeObject.co_varnames[
-                 startIndex + len(arguments):codeObject.co_argcount]
-    for name in codeObject.co_varnames[startIndex:startIndex + len(arguments)]:
-        if name in named:
-            raise TypeError(
-                """Argument %r specified both positionally and as a keyword for calling %r""" % (
-                    name, receiver,
-                )
-            )
-
-    if not (codeObject.co_flags & 8):
-        # fc does not have a **kwds type parameter, therefore
-        # remove unacceptable arguments.
-        for arg in named.keys():
-            if arg not in acceptable:
-                del named[arg]
-
-    return receiver(*arguments, **named)
-
--- a/scrapy/xlib/pydispatch/saferef.py
+++ b/scrapy/xlib/pydispatch/saferef.py
@ -1,180 +0,0 @@
-"""Refactored "safe reference" from dispatcher.py"""
-from __future__ import print_function
-import weakref, traceback
-
-
-def safeRef(target, onDelete=None):
-    """Return a *safe* weak reference to a callable target
-
-    target -- the object to be weakly referenced, if it's a
-        bound method reference, will create a BoundMethodWeakref,
-        otherwise creates a simple weakref.
-    onDelete -- if provided, will have a hard reference stored
-        to the callable to be called after the safe reference
-        goes out of scope with the reference object, (either a
-        weakref or a BoundMethodWeakref) as argument.
-    """
-    if hasattr(target, 'im_self'):
-        if target.im_self is not None:
-            # Turn a bound method into a BoundMethodWeakref instance.
-            # Keep track of these instances for lookup by disconnect().
-            assert hasattr(target, 'im_func'), """safeRef target %r has im_self, but no im_func, don't know how to create reference"""%( target,)
-            reference = BoundMethodWeakref(
-                target=target,
-                onDelete=onDelete
-            )
-            return reference
-    if onDelete is not None:
-        return weakref.ref(target, onDelete)
-    else:
-        return weakref.ref(target)
-
-
-class BoundMethodWeakref(object):
-    """'Safe' and reusable weak references to instance methods
-
-    BoundMethodWeakref objects provide a mechanism for
-    referencing a bound method without requiring that the
-    method object itself (which is normally a transient
-    object) is kept alive.  Instead, the BoundMethodWeakref
-    object keeps weak references to both the object and the
-    function which together define the instance method.
-
-    Attributes:
-        key -- the identity key for the reference, calculated
-            by the class's calculateKey method applied to the
-            target instance method
-        deletionMethods -- sequence of callable objects taking
-            single argument, a reference to this object which
-            will be called when *either* the target object or
-            target function is garbage collected (i.e. when
-            this object becomes invalid).  These are specified
-            as the onDelete parameters of safeRef calls.
-        weakSelf -- weak reference to the target object
-        weakFunc -- weak reference to the target function
-
-    Class Attributes:
-        _allInstances -- class attribute pointing to all live
-            BoundMethodWeakref objects indexed by the class's
-            calculateKey(target) method applied to the target
-            objects.  This weak value dictionary is used to
-            short-circuit creation so that multiple references
-            to the same (object, function) pair produce the
-            same BoundMethodWeakref instance.
-
-    """
-    _allInstances = weakref.WeakValueDictionary()
-
-    def __new__(cls, target, onDelete=None, *arguments, **named):
-        """Create new instance or return current instance
-
-        Basically this method of construction allows us to
-        short-circuit creation of references to already-
-        referenced instance methods.  The key corresponding
-        to the target is calculated, and if there is already
-        an existing reference, that is returned, with its
-        deletionMethods attribute updated.  Otherwise the
-        new instance is created and registered in the table
-        of already-referenced methods.
-        """
-        key = cls.calculateKey(target)
-        current = cls._allInstances.get(key)
-        if current is not None:
-            current.deletionMethods.append(onDelete)
-            return current
-        else:
-            base = super(BoundMethodWeakref, cls).__new__(cls)
-            cls._allInstances[key] = base
-            base.__init__(target, onDelete, *arguments, **named)
-            return base
-
-    def __init__(self, target, onDelete=None):
-        """Return a weak-reference-like instance for a bound method
-
-        target -- the instance-method target for the weak
-            reference, must have im_self and im_func attributes
-            and be reconstructable via:
-                target.im_func.__get__( target.im_self )
-            which is true of built-in instance methods.
-        onDelete -- optional callback which will be called
-            when this weak reference ceases to be valid
-            (i.e. either the object or the function is garbage
-            collected).  Should take a single argument,
-            which will be passed a pointer to this object.
-        """
-
-        def remove(weak, self=self):
-            """Set self.isDead to true when method or instance is destroyed"""
-            methods = self.deletionMethods[:]
-            del self.deletionMethods[:]
-            try:
-                del self.__class__._allInstances[self.key]
-            except KeyError:
-                pass
-            for function in methods:
-                try:
-                    if callable(function):
-                        function(self)
-                except Exception as e:
-                    try:
-                        traceback.print_exc()
-                    except AttributeError as err:
-                        print(
-                            '''Exception during saferef %s cleanup function %s: %s''' % (
-                                self, function, e
-                            ))
-
-        self.deletionMethods = [onDelete]
-        self.key = self.calculateKey(target)
-        self.weakSelf = weakref.ref(target.im_self, remove)
-        self.weakFunc = weakref.ref(target.im_func, remove)
-        self.selfName = target.im_self.__class__.__name__
-        self.funcName = str(target.im_func.__name__)
-
-    def calculateKey(cls, target):
-        """Calculate the reference key for this reference
-
-        Currently this is a two-tuple of the id()'s of the
-        target object and the target function respectively.
-        """
-        return (id(target.im_self), id(target.im_func))
-
-    calculateKey = classmethod(calculateKey)
-
-    def __str__(self):
-        """Give a friendly representation of the object"""
-        return """%s( %s.%s )""" % (
-            self.__class__.__name__,
-            self.selfName,
-            self.funcName,
-        )
-
-    __repr__ = __str__
-
-    def __nonzero__(self):
-        """Whether we are still a valid reference"""
-        return self() is not None
-
-    def __cmp__(self, other):
-        """Compare with another reference"""
-        if not isinstance(other, self.__class__):
-            return cmp(self.__class__, type(other))
-        return cmp(self.key, other.key)
-
-    def __call__(self):
-        """Return a strong reference to the bound method
-
-        If the target cannot be retrieved, then will
-        return None, otherwise returns a bound instance
-        method for our object and function.
-
-        Note:
-            You may call this method any number of times,
-            as it does not invalidate the reference.
-        """
-        target = self.weakSelf()
-        if target is not None:
-            function = self.weakFunc()
-            if function is not None:
-                return function.__get__(target)
-        return None
--- a/setup.py
+++ b/setup.py
@ -44,5 +44,6 @@ setup(
        'pyOpenSSL',
        'cssselect>=0.9',
        'six>=1.5.2',
+        'PyDispatcher>=2.0.5',
    ],
 )
--- a/tests/py3-ignores.txt
+++ b/tests/py3-ignores.txt
@ -12,7 +12,6 @@ tests/test_crawler.py
 tests/test_downloader_handlers.py
 tests/test_downloadermiddleware_ajaxcrawlable.py
 tests/test_downloadermiddleware_cookies.py
-tests/test_downloadermiddleware_decompression.py
 tests/test_downloadermiddleware_defaultheaders.py
 tests/test_downloadermiddleware_downloadtimeout.py
 tests/test_downloadermiddleware_httpauth.py
@ -22,50 +21,30 @@ tests/test_downloadermiddleware_httpproxy.py
 tests/test_downloadermiddleware.py
 tests/test_downloadermiddleware_redirect.py
 tests/test_downloadermiddleware_retry.py
-tests/test_downloadermiddleware_robotstxt.py
 tests/test_downloadermiddleware_stats.py
 tests/test_downloadermiddleware_useragent.py
-tests/test_dupefilters.py
 tests/test_engine.py
 tests/test_http_cookies.py
-tests/test_http_request.py
-tests/test_http_response.py
 tests/test_logformatter.py
 tests/test_mail.py
-tests/test_middleware.py
 tests/test_pipeline_files.py
 tests/test_pipeline_images.py
-tests/test_pipeline_media.py
 tests/test_proxy_connect.py
 tests/test_responsetypes.py
 tests/test_selector_csstranslator.py
 tests/test_selector_lxmldocument.py
 tests/test_selector.py
-tests/test_settings/__init__.py
-tests/test_spiderloader/__init__.py
-tests/test_spiderloader/test_spiders/__init__.py
-tests/test_spiderloader/test_spiders/spider0.py
-tests/test_spiderloader/test_spiders/spider1.py
-tests/test_spiderloader/test_spiders/spider2.py
-tests/test_spiderloader/test_spiders/spider3.py
-tests/test_spiderloader/test_spiders/spider4.py
 tests/test_spidermiddleware_depth.py
 tests/test_spidermiddleware_httperror.py
 tests/test_spidermiddleware_offsite.py
 tests/test_spidermiddleware_referer.py
 tests/test_spider.py
 tests/test_stats.py
-tests/test_utils_defer.py
 tests/test_utils_iterators.py
-tests/test_utils_jsonrpc.py
 tests/test_utils_log.py
 tests/test_utils_reqser.py
-tests/test_utils_request.py
 tests/test_utils_response.py
-tests/test_utils_serialize.py
-tests/test_utils_signal.py
 tests/test_utils_template.py
-tests/test_utils_url.py
 tests/test_webclient.py

 scrapy/xlib/tx/iweb.py
@ -93,6 +72,5 @@ scrapy/downloadermiddlewares/httpproxy.py
 scrapy/downloadermiddlewares/cookies.py
 scrapy/extensions/statsmailer.py
 scrapy/extensions/memusage.py
-scrapy/commands/deploy.py
 scrapy/commands/bench.py
 scrapy/mail.py
--- a/tests/test_downloadermiddleware_decompression.py
+++ b/tests/test_downloadermiddleware_decompression.py
@ -39,7 +39,7 @@ class DecompressionMiddlewareTest(TestCase):
        assert_samelines(self, new.body, rsp.body)

    def test_empty_response(self):
-        rsp = Response(url='http://test.com', body='')
+        rsp = Response(url='http://test.com', body=b'')
        new = self.mw.process_response(None, rsp, self.spider)
        assert new is rsp
        assert not rsp.body
--- a/tests/test_downloadermiddleware_robotstxt.py
+++ b/tests/test_downloadermiddleware_robotstxt.py
@ -6,15 +6,44 @@ from twisted.python import failure
 from twisted.trial import unittest
 from scrapy.downloadermiddlewares.robotstxt import RobotsTxtMiddleware
 from scrapy.exceptions import IgnoreRequest, NotConfigured
-from scrapy.http import Request, Response
+from scrapy.http import Request, Response, TextResponse
 from scrapy.settings import Settings
 from tests import mock


 class RobotsTxtMiddlewareTest(unittest.TestCase):

+    def setUp(self):
+        self.crawler = mock.MagicMock()
+        self.crawler.settings = Settings()
+        self.crawler.engine.download = mock.MagicMock()
+
+    def tearDown(self):
+        del self.crawler
+
+    def test_robotstxt_settings(self):
+        self.crawler.settings = Settings()
+        self.crawler.settings.set('USER_AGENT', 'CustomAgent')
+        self.assertRaises(NotConfigured, RobotsTxtMiddleware, self.crawler)
+
+    def _get_successful_crawler(self):
+        crawler = self.crawler
+        crawler.settings.set('ROBOTSTXT_OBEY', True)
+        ROBOTS = re.sub(b'^\s+(?m)', b'', b'''
+        User-Agent: *
+        Disallow: /admin/
+        Disallow: /static/
+        ''')
+        response = TextResponse('http://site.local/robots.txt', body=ROBOTS)
+        def return_response(request, spider):
+            deferred = Deferred()
+            reactor.callFromThread(deferred.callback, response)
+            return deferred
+        crawler.engine.download.side_effect = return_response
+        return crawler
+
    def test_robotstxt(self):
-        middleware = self._get_middleware()
+        middleware = RobotsTxtMiddleware(self._get_successful_crawler())
        # There is a bit of neglect in robotstxt.py: robots.txt is fetched asynchronously,
        # and it is actually fetched only *after* first process_request completes.
        # So, first process_request will always succeed.
@ -30,8 +59,8 @@ class RobotsTxtMiddlewareTest(unittest.TestCase):
        return deferred

    def test_robotstxt_meta(self):
+        middleware = RobotsTxtMiddleware(self._get_successful_crawler())
        meta = {'dont_obey_robotstxt': True}
-        middleware = self._get_middleware()
        self.assertNotIgnored(Request('http://site.local', meta=meta), middleware)
        def test(r):
            self.assertNotIgnored(Request('http://site.local/allowed', meta=meta), middleware)
@ -42,19 +71,67 @@ class RobotsTxtMiddlewareTest(unittest.TestCase):
        reactor.callFromThread(deferred.callback, None)
        return deferred

-    def test_robotstxt_error(self):
-        crawler = mock.MagicMock()
-        crawler.settings = Settings()
+    def _get_garbage_crawler(self):
+        crawler = self.crawler
        crawler.settings.set('ROBOTSTXT_OBEY', True)
-        crawler.engine.download = mock.MagicMock()
+        response = Response('http://site.local/robots.txt', body=b'GIF89a\xd3\x00\xfe\x00\xa2')
+        def return_response(request, spider):
+            deferred = Deferred()
+            reactor.callFromThread(deferred.callback, response)
+            return deferred
+        crawler.engine.download.side_effect = return_response
+        return crawler
+
+    def test_robotstxt_garbage(self):
+        # garbage response should be discarded, equal 'allow all'
+        middleware = RobotsTxtMiddleware(self._get_garbage_crawler())
+        middleware._logerror = mock.MagicMock()
+        middleware.process_request(Request('http://site.local'), None)
+        self.assertNotIgnored(Request('http://site.local'), middleware)
+        def test(r):
+            self.assertNotIgnored(Request('http://site.local/allowed'), middleware)
+            self.assertNotIgnored(Request('http://site.local/admin/main'), middleware)
+            self.assertNotIgnored(Request('http://site.local/static/'), middleware)
+        deferred = Deferred()
+        deferred.addCallback(test)
+        deferred.addErrback(lambda _: self.assertIsNone(middleware._logerror.assert_any_call()))
+        reactor.callFromThread(deferred.callback, None)
+        return deferred
+
+    def _get_emptybody_crawler(self):
+        crawler = self.crawler
+        crawler.settings.set('ROBOTSTXT_OBEY', True)
+        response = Response('http://site.local/robots.txt')
+        def return_response(request, spider):
+            deferred = Deferred()
+            reactor.callFromThread(deferred.callback, response)
+            return deferred
+        crawler.engine.download.side_effect = return_response
+        return crawler
+
+    def test_robotstxt_empty_response(self):
+        # empty response should equal 'allow all'
+        middleware = RobotsTxtMiddleware(self._get_emptybody_crawler())
+        self.assertNotIgnored(Request('http://site.local'), middleware)
+        def test(r):
+            self.assertNotIgnored(Request('http://site.local/allowed'), middleware)
+            self.assertNotIgnored(Request('http://site.local/admin/main'), middleware)
+            self.assertNotIgnored(Request('http://site.local/static/'), middleware)
+        deferred = Deferred()
+        deferred.addCallback(test)
+        reactor.callFromThread(deferred.callback, None)
+        return deferred
+
+    def test_robotstxt_error(self):
+        self.crawler.settings.set('ROBOTSTXT_OBEY', True)
        err = error.DNSLookupError('Robotstxt address not found')
        def return_failure(request, spider):
            deferred = Deferred()
            reactor.callFromThread(deferred.errback, failure.Failure(err))
            return deferred
-        crawler.engine.download.side_effect = return_failure
+        self.crawler.engine.download.side_effect = return_failure

-        middleware = RobotsTxtMiddleware(crawler)
+        middleware = RobotsTxtMiddleware(self.crawler)
        middleware._logerror = mock.MagicMock()
        middleware.process_request(Request('http://site.local'), None)
        deferred = Deferred()
@ -69,27 +146,3 @@ class RobotsTxtMiddlewareTest(unittest.TestCase):
    def assertIgnored(self, request, middleware):
        spider = None  # not actually used
        self.assertRaises(IgnoreRequest, middleware.process_request, request, spider)
-
-    def _get_crawler(self):
-        crawler = mock.MagicMock()
-        crawler.settings = Settings()
-        crawler.settings.set('USER_AGENT', 'CustomAgent')
-        self.assertRaises(NotConfigured, RobotsTxtMiddleware, crawler)
-        crawler.settings.set('ROBOTSTXT_OBEY', True)
-        crawler.engine.download = mock.MagicMock()
-        ROBOTS = re.sub(r'^\s+(?m)', '', '''
-        User-Agent: *
-        Disallow: /admin/
-        Disallow: /static/
-        ''')
-        response = Response('http://site.local/robots.txt', body=ROBOTS)
-        def return_response(request, spider):
-            deferred = Deferred()
-            reactor.callFromThread(deferred.callback, response)
-            return deferred
-        crawler.engine.download.side_effect = return_response
-        return crawler
-
-    def _get_middleware(self):
-        crawler = self._get_crawler()
-        return RobotsTxtMiddleware(crawler)
--- a/tests/test_dupefilters.py
+++ b/tests/test_dupefilters.py
@ -3,6 +3,7 @@ import unittest

 from scrapy.dupefilters import RFPDupeFilter
 from scrapy.http import Request
+from scrapy.utils.python import to_bytes


 class RFPDupeFilterTest(unittest.TestCase):
@ -43,7 +44,7 @@ class RFPDupeFilterTest(unittest.TestCase):

            def request_fingerprint(self, request):
                fp = hashlib.sha1()
-                fp.update(request.url.lower())
+                fp.update(to_bytes(request.url.lower()))
                return fp.hexdigest()

        case_insensitive_dupefilter = CaseInsensitiveRFPDupeFilter()
--- a/tests/test_engine.py
+++ b/tests/test_engine.py
@ -20,7 +20,7 @@ from twisted.trial import unittest

 from scrapy import signals
 from scrapy.utils.test import get_crawler
-from scrapy.xlib.pydispatch import dispatcher
+from pydispatch import dispatcher
 from tests import tests_datadir
 from scrapy.spiders import Spider
 from scrapy.item import Item, Field
--- a/tests/test_http_request.py
+++ b/tests/test_http_request.py
@ -1,9 +1,12 @@
 import cgi
 import unittest
+
+import six
 from six.moves import xmlrpc_client as xmlrpclib
 from six.moves.urllib.parse import urlparse

 from scrapy.http import Request, FormRequest, XmlRpcRequest, Headers, HtmlResponse
+from scrapy.utils.python import to_bytes, to_native_str


 class RequestTest(unittest.TestCase):
@ -31,13 +34,13 @@ class RequestTest(unittest.TestCase):
        self.assertEqual(r.meta, self.default_meta)

        meta = {"lala": "lolo"}
-        headers = {"caca": "coco"}
+        headers = {b"caca": b"coco"}
        r = self.request_class("http://www.example.com", meta=meta, headers=headers, body="a body")

        assert r.meta is not meta
        self.assertEqual(r.meta, meta)
        assert r.headers is not headers
-        self.assertEqual(r.headers["caca"], "coco")
+        self.assertEqual(r.headers[b"caca"], b"coco")

    def test_url_no_scheme(self):
        self.assertRaises(ValueError, self.request_class, 'foo')
@ -45,7 +48,7 @@ class RequestTest(unittest.TestCase):
    def test_headers(self):
        # Different ways of setting headers attribute
        url = 'http://www.scrapy.org'
-        headers = {'Accept':'gzip', 'Custom-Header':'nothing to tell you'}
+        headers = {b'Accept':'gzip', b'Custom-Header':'nothing to tell you'}
        r = self.request_class(url=url, headers=headers)
        p = self.request_class(url=url, headers=r.headers)

@ -57,9 +60,9 @@ class RequestTest(unittest.TestCase):
        h = Headers({'key1': u'val1', u'key2': 'val2'})
        h[u'newkey'] = u'newval'
        for k, v in h.iteritems():
-            self.assert_(isinstance(k, str))
+            self.assert_(isinstance(k, bytes))
            for s in v:
-                self.assert_(isinstance(s, str))
+                self.assert_(isinstance(s, bytes))

    def test_eq(self):
        url = 'http://www.scrapy.org'
@ -73,17 +76,17 @@ class RequestTest(unittest.TestCase):
        self.assertEqual(len(set_), 2)

    def test_url(self):
-        """Request url tests"""
        r = self.request_class(url="http://www.scrapy.org/path")
        self.assertEqual(r.url, "http://www.scrapy.org/path")

-        # url quoting on creation
+    def test_url_quoting(self):
        r = self.request_class(url="http://www.scrapy.org/blank%20space")
        self.assertEqual(r.url, "http://www.scrapy.org/blank%20space")
        r = self.request_class(url="http://www.scrapy.org/blank space")
        self.assertEqual(r.url, "http://www.scrapy.org/blank%20space")

-        # url encoding
+    @unittest.skipUnless(six.PY2, "TODO")
+    def test_url_encoding(self):
        r1 = self.request_class(url=u"http://www.scrapy.org/price/\xa3", encoding="utf-8")
        r2 = self.request_class(url=u"http://www.scrapy.org/price/\xa3", encoding="latin1")
        self.assertEqual(r1.url, "http://www.scrapy.org/price/%C2%A3")
@ -91,19 +94,19 @@ class RequestTest(unittest.TestCase):

    def test_body(self):
        r1 = self.request_class(url="http://www.example.com/")
-        assert r1.body == ''
+        assert r1.body == b''

-        r2 = self.request_class(url="http://www.example.com/", body="")
-        assert isinstance(r2.body, str)
+        r2 = self.request_class(url="http://www.example.com/", body=b"")
+        assert isinstance(r2.body, bytes)
        self.assertEqual(r2.encoding, 'utf-8') # default encoding

        r3 = self.request_class(url="http://www.example.com/", body=u"Price: \xa3100", encoding='utf-8')
-        assert isinstance(r3.body, str)
-        self.assertEqual(r3.body, "Price: \xc2\xa3100")
+        assert isinstance(r3.body, bytes)
+        self.assertEqual(r3.body, b"Price: \xc2\xa3100")

        r4 = self.request_class(url="http://www.example.com/", body=u"Price: \xa3100", encoding='latin1')
-        assert isinstance(r4.body, str)
-        self.assertEqual(r4.body, "Price: \xa3100")
+        assert isinstance(r4.body, bytes)
+        self.assertEqual(r4.body, b"Price: \xa3100")

    def test_ajax_url(self):
        # ascii url
@ -155,18 +158,19 @@ class RequestTest(unittest.TestCase):
    def test_replace(self):
        """Test Request.replace() method"""
        r1 = self.request_class("http://www.example.com", method='GET')
-        hdrs = Headers(dict(r1.headers, key='value'))
+        hdrs = Headers(r1.headers)
+        hdrs[b'key'] = b'value'
        r2 = r1.replace(method="POST", body="New body", headers=hdrs)
        self.assertEqual(r1.url, r2.url)
        self.assertEqual((r1.method, r2.method), ("GET", "POST"))
-        self.assertEqual((r1.body, r2.body), ('', "New body"))
+        self.assertEqual((r1.body, r2.body), (b'', b"New body"))
        self.assertEqual((r1.headers, r2.headers), (self.default_headers, hdrs))

        # Empty attributes (which may fail if not compared properly)
        r3 = self.request_class("http://www.example.com", meta={'a': 1}, dont_filter=True)
-        r4 = r3.replace(url="http://www.example.com/2", body='', meta={}, dont_filter=False)
+        r4 = r3.replace(url="http://www.example.com/2", body=b'', meta={}, dont_filter=False)
        self.assertEqual(r4.url, "http://www.example.com/2")
-        self.assertEqual(r4.body, '')
+        self.assertEqual(r4.body, b'')
        self.assertEqual(r4.meta, {})
        assert r4.dont_filter is False

@ -184,39 +188,41 @@ class FormRequestTest(RequestTest):

    request_class = FormRequest

-    def assertSortedEqual(self, first, second, msg=None):
+    def assertQueryEqual(self, first, second, msg=None):
+        first = to_native_str(first).split("&")
+        second = to_native_str(second).split("&")
        return self.assertEqual(sorted(first), sorted(second), msg)

    def test_empty_formdata(self):
        r1 = self.request_class("http://www.example.com", formdata={})
-        self.assertEqual(r1.body, '')
+        self.assertEqual(r1.body, b'')

+    @unittest.skipUnless(six.PY2, "TODO")
    def test_default_encoding(self):
        # using default encoding (utf-8)
        data = {'one': 'two', 'price': '\xc2\xa3 100'}
        r2 = self.request_class("http://www.example.com", formdata=data)
        self.assertEqual(r2.method, 'POST')
        self.assertEqual(r2.encoding, 'utf-8')
-        self.assertSortedEqual(r2.body.split('&'),
-                               'price=%C2%A3+100&one=two'.split('&'))
-        self.assertEqual(r2.headers['Content-Type'], 'application/x-www-form-urlencoded')
+        self.assertQueryEqual(r2.body, b'price=%C2%A3+100&one=two')
+        self.assertEqual(r2.headers[b'Content-Type'], b'application/x-www-form-urlencoded')

    def test_custom_encoding(self):
        data = {'price': u'\xa3 100'}
        r3 = self.request_class("http://www.example.com", formdata=data, encoding='latin1')
        self.assertEqual(r3.encoding, 'latin1')
-        self.assertEqual(r3.body, 'price=%A3+100')
+        self.assertEqual(r3.body, b'price=%A3+100')

    def test_multi_key_values(self):
        # using multiples values for a single key
        data = {'price': u'\xa3 100', 'colours': ['red', 'blue', 'green']}
        r3 = self.request_class("http://www.example.com", formdata=data)
-        self.assertSortedEqual(r3.body.split('&'),
-            'colours=red&colours=blue&colours=green&price=%C2%A3+100'.split('&'))
+        self.assertQueryEqual(r3.body,
+            b'colours=red&colours=blue&colours=green&price=%C2%A3+100')

    def test_from_response_post(self):
        response = _buildresponse(
-            """<form action="post.php" method="POST">
+            b"""<form action="post.php" method="POST">
            <input type="hidden" name="test" value="val1">
            <input type="hidden" name="test" value="val2">
            <input type="hidden" name="test2" value="xxx">
@ -225,13 +231,13 @@ class FormRequestTest(RequestTest):
        req = self.request_class.from_response(response,
                formdata={'one': ['two', 'three'], 'six': 'seven'})
        self.assertEqual(req.method, 'POST')
-        self.assertEqual(req.headers['Content-type'], 'application/x-www-form-urlencoded')
+        self.assertEqual(req.headers[b'Content-type'], b'application/x-www-form-urlencoded')
        self.assertEqual(req.url, "http://www.example.com/this/post.php")
        fs = _qs(req)
-        self.assertEqual(set(fs["test"]), set(["val1", "val2"]))
-        self.assertEqual(set(fs["one"]), set(["two", "three"]))
-        self.assertEqual(fs['test2'], ['xxx'])
-        self.assertEqual(fs['six'], ['seven'])
+        self.assertEqual(set(fs[b"test"]), {b"val1", b"val2"})
+        self.assertEqual(set(fs[b"one"]), {b"two", b"three"})
+        self.assertEqual(fs[b'test2'], [b'xxx'])
+        self.assertEqual(fs[b'six'], [b'seven'])

    def test_from_response_extra_headers(self):
        response = _buildresponse(
@ -244,8 +250,8 @@ class FormRequestTest(RequestTest):
                formdata={'one': ['two', 'three'], 'six': 'seven'},
                headers={"Accept-Encoding": "gzip,deflate"})
        self.assertEqual(req.method, 'POST')
-        self.assertEqual(req.headers['Content-type'], 'application/x-www-form-urlencoded')
-        self.assertEqual(req.headers['Accept-Encoding'], 'gzip,deflate')
+        self.assertEqual(req.headers['Content-type'], b'application/x-www-form-urlencoded')
+        self.assertEqual(req.headers['Accept-Encoding'], b'gzip,deflate')

    def test_from_response_get(self):
        response = _buildresponse(
@ -274,8 +280,8 @@ class FormRequestTest(RequestTest):
            </form>""")
        req = self.request_class.from_response(response, formdata={'two': '2'})
        fs = _qs(req)
-        self.assertEqual(fs['one'], ['1'])
-        self.assertEqual(fs['two'], ['2'])
+        self.assertEqual(fs[b'one'], [b'1'])
+        self.assertEqual(fs[b'two'], [b'2'])

    def test_from_response_override_method(self):
        response = _buildresponse(
@ -379,7 +385,7 @@ class FormRequestTest(RequestTest):
        req = self.request_class.from_response(response, \
                clickdata={'name': u'price in \u00a3'})
        fs = _qs(req)
-        self.assertTrue(fs[u'price in \u00a3'.encode('utf-8')])
+        self.assertTrue(fs[to_native_str(u'price in \u00a3')])

    def test_from_response_multiple_forms_clickdata(self):
        response = _buildresponse(
@ -489,9 +495,9 @@ class FormRequestTest(RequestTest):
            </form>""")
        r1 = self.request_class.from_response(response, formdata={'two':'3'})
        self.assertEqual(r1.method, 'POST')
-        self.assertEqual(r1.headers['Content-type'], 'application/x-www-form-urlencoded')
+        self.assertEqual(r1.headers['Content-type'], b'application/x-www-form-urlencoded')
        fs = _qs(r1)
-        self.assertEqual(fs, {'one': ['1'], 'two': ['3']})
+        self.assertEqual(fs, {b'one': [b'1'], b'two': [b'3']})

    def test_from_response_formname_exists(self):
        response = _buildresponse(
@ -506,7 +512,7 @@ class FormRequestTest(RequestTest):
        r1 = self.request_class.from_response(response, formname="form2")
        self.assertEqual(r1.method, 'POST')
        fs = _qs(r1)
-        self.assertEqual(fs, {'four': ['4'], 'three': ['3']})
+        self.assertEqual(fs, {b'four': [b'4'], b'three': [b'3']})

    def test_from_response_formname_notexist(self):
        response = _buildresponse(
@ -519,7 +525,7 @@ class FormRequestTest(RequestTest):
        r1 = self.request_class.from_response(response, formname="form3")
        self.assertEqual(r1.method, 'POST')
        fs = _qs(r1)
-        self.assertEqual(fs, {'one': ['1']})
+        self.assertEqual(fs, {b'one': [b'1']})

    def test_from_response_formname_errors_formnumber(self):
        response = _buildresponse(
@ -664,11 +670,11 @@ class FormRequestTest(RequestTest):
            </form>""")
        r1 = self.request_class.from_response(response, formxpath="//form[@action='post.php']")
        fs = _qs(r1)
-        self.assertEqual(fs['one'], ['1'])
+        self.assertEqual(fs[b'one'], [b'1'])

        r1 = self.request_class.from_response(response, formxpath="//form/input[@name='four']")
        fs = _qs(r1)
-        self.assertEqual(fs['three'], ['3'])
+        self.assertEqual(fs[b'three'], [b'3'])

        self.assertRaises(ValueError, self.request_class.from_response,
                          response, formxpath="//form/input[@name='abc']")
@ -691,12 +697,12 @@ class XmlRpcRequestTest(RequestTest):

    request_class = XmlRpcRequest
    default_method = 'POST'
-    default_headers = {'Content-Type': ['text/xml']}
+    default_headers = {b'Content-Type': [b'text/xml']}

    def _test_request(self, **kwargs):
        r = self.request_class('http://scrapytest.org/rpc2', **kwargs)
-        self.assertEqual(r.headers['Content-Type'], 'text/xml')
-        self.assertEqual(r.body, xmlrpclib.dumps(**kwargs))
+        self.assertEqual(r.headers[b'Content-Type'], b'text/xml')
+        self.assertEqual(r.body, to_bytes(xmlrpclib.dumps(**kwargs)))
        self.assertEqual(r.method, 'POST')
        self.assertEqual(r.encoding, kwargs.get('encoding', 'utf-8'))
        self.assertTrue(r.dont_filter, True)
@ -706,11 +712,14 @@ class XmlRpcRequestTest(RequestTest):
        self._test_request(params=('username', 'password'), methodname='login')
        self._test_request(params=('response', ), methodresponse='login')
        self._test_request(params=(u'pas\xa3',), encoding='utf-8')
-        self._test_request(params=(u'pas\xa3',), encoding='latin')
        self._test_request(params=(None,), allow_none=1)
        self.assertRaises(TypeError, self._test_request)
        self.assertRaises(TypeError, self._test_request, params=(None,))

+    @unittest.skipUnless(six.PY2, "TODO")
+    def test_latin1(self):
+        self._test_request(params=(u'pas\xa3',), encoding='latin')
+

 if __name__ == "__main__":
    unittest.main()
--- a/tests/test_http_response.py
+++ b/tests/test_http_response.py
@ -1,8 +1,12 @@
 import unittest

+import six
 from w3lib.encoding import resolve_encoding
-from scrapy.http import Request, Response, TextResponse, HtmlResponse, XmlResponse, Headers
+
+from scrapy.http import (Request, Response, TextResponse, HtmlResponse,
+                         XmlResponse, Headers)
 from scrapy.selector import Selector
+from scrapy.utils.python import to_native_str


 class BaseResponseTest(unittest.TestCase):
@ -14,10 +18,10 @@ class BaseResponseTest(unittest.TestCase):
        self.assertRaises(Exception, self.response_class)
        self.assertTrue(isinstance(self.response_class('http://example.com/'), self.response_class))
        # body can be str or None
-        self.assertTrue(isinstance(self.response_class('http://example.com/', body=''), self.response_class))
-        self.assertTrue(isinstance(self.response_class('http://example.com/', body='body'), self.response_class))
+        self.assertTrue(isinstance(self.response_class('http://example.com/', body=b''), self.response_class))
+        self.assertTrue(isinstance(self.response_class('http://example.com/', body=b'body'), self.response_class))
        # test presence of all optional parameters
-        self.assertTrue(isinstance(self.response_class('http://example.com/', headers={}, status=200, body=''), self.response_class))
+        self.assertTrue(isinstance(self.response_class('http://example.com/', body=b'', headers={}, status=200), self.response_class))

        r = self.response_class("http://www.example.com")
        assert isinstance(r.url, str)
@ -27,12 +31,12 @@ class BaseResponseTest(unittest.TestCase):
        assert isinstance(r.headers, Headers)
        self.assertEqual(r.headers, {})

-        headers = {"caca": "coco"}
-        body = "a body"
+        headers = {"foo": "bar"}
+        body = b"a body"
        r = self.response_class("http://www.example.com", headers=headers, body=body)

        assert r.headers is not headers
-        self.assertEqual(r.headers["caca"], "coco")
+        self.assertEqual(r.headers[b"foo"], b"bar")

        r = self.response_class("http://www.example.com", status=301)
        self.assertEqual(r.status, 301)
@ -43,7 +47,7 @@ class BaseResponseTest(unittest.TestCase):
    def test_copy(self):
        """Test Response copy"""

-        r1 = self.response_class("http://www.example.com", body="Some body")
+        r1 = self.response_class("http://www.example.com", body=b"Some body")
        r1.flags.append('cached')
        r2 = r1.copy()

@ -61,7 +65,7 @@ class BaseResponseTest(unittest.TestCase):
    def test_copy_meta(self):
        req = Request("http://www.example.com")
        req.meta['foo'] = 'bar'
-        r1 = self.response_class("http://www.example.com", body="Some body", request=req)
+        r1 = self.response_class("http://www.example.com", body=b"Some body", request=req)
        assert r1.meta is req.meta

    def test_copy_inherited_classes(self):
@ -79,30 +83,30 @@ class BaseResponseTest(unittest.TestCase):
        """Test Response.replace() method"""
        hdrs = Headers({"key": "value"})
        r1 = self.response_class("http://www.example.com")
-        r2 = r1.replace(status=301, body="New body", headers=hdrs)
-        assert r1.body == ''
+        r2 = r1.replace(status=301, body=b"New body", headers=hdrs)
+        assert r1.body == b''
        self.assertEqual(r1.url, r2.url)
        self.assertEqual((r1.status, r2.status), (200, 301))
-        self.assertEqual((r1.body, r2.body), ('', "New body"))
+        self.assertEqual((r1.body, r2.body), (b'', b"New body"))
        self.assertEqual((r1.headers, r2.headers), ({}, hdrs))

        # Empty attributes (which may fail if not compared properly)
        r3 = self.response_class("http://www.example.com", flags=['cached'])
-        r4 = r3.replace(body='', flags=[])
-        self.assertEqual(r4.body, '')
+        r4 = r3.replace(body=b'', flags=[])
+        self.assertEqual(r4.body, b'')
        self.assertEqual(r4.flags, [])

    def _assert_response_values(self, response, encoding, body):
-        if isinstance(body, unicode):
+        if isinstance(body, six.text_type):
            body_unicode = body
-            body_str = body.encode(encoding)
+            body_bytes = body.encode(encoding)
        else:
            body_unicode = body.decode(encoding)
-            body_str = body
+            body_bytes = body

-        assert isinstance(response.body, str)
+        assert isinstance(response.body, bytes)
        self._assert_response_encoding(response, encoding)
-        self.assertEqual(response.body, body_str)
+        self.assertEqual(response.body, body_bytes)
        self.assertEqual(response.body_as_unicode(), body_unicode)

    def _assert_response_encoding(self, response, encoding):
@ -120,12 +124,6 @@ class BaseResponseTest(unittest.TestCase):
        self.assertEqual(joined, absolute)


-class ResponseText(BaseResponseTest):
-
-    def test_no_unicode_url(self):
-        self.assertRaises(TypeError, self.response_class, u'http://www.example.com')
-
-
 class TextResponseTest(BaseResponseTest):

    response_class = TextResponse
@ -152,11 +150,11 @@ class TextResponseTest(BaseResponseTest):
        assert isinstance(resp.url, str)

        resp = self.response_class(url=u"http://www.example.com/price/\xa3", encoding='utf-8')
-        self.assertEqual(resp.url, 'http://www.example.com/price/\xc2\xa3')
+        self.assertEqual(resp.url, to_native_str(b'http://www.example.com/price/\xc2\xa3'))
        resp = self.response_class(url=u"http://www.example.com/price/\xa3", encoding='latin-1')
        self.assertEqual(resp.url, 'http://www.example.com/price/\xa3')
        resp = self.response_class(u"http://www.example.com/price/\xa3", headers={"Content-type": ["text/html; charset=utf-8"]})
-        self.assertEqual(resp.url, 'http://www.example.com/price/\xc2\xa3')
+        self.assertEqual(resp.url, to_native_str(b'http://www.example.com/price/\xc2\xa3'))
        resp = self.response_class(u"http://www.example.com/price/\xa3", headers={"Content-type": ["text/html; charset=iso-8859-1"]})
        self.assertEqual(resp.url, 'http://www.example.com/price/\xa3')

@ -168,17 +166,17 @@ class TextResponseTest(BaseResponseTest):
        r1 = self.response_class('http://www.example.com', body=original_string, encoding='cp1251')

        # check body_as_unicode
-        self.assertTrue(isinstance(r1.body_as_unicode(), unicode))
+        self.assertTrue(isinstance(r1.body_as_unicode(), six.text_type))
        self.assertEqual(r1.body_as_unicode(), unicode_string)

    def test_encoding(self):
-        r1 = self.response_class("http://www.example.com", headers={"Content-type": ["text/html; charset=utf-8"]}, body="\xc2\xa3")
+        r1 = self.response_class("http://www.example.com", headers={"Content-type": ["text/html; charset=utf-8"]}, body=b"\xc2\xa3")
        r2 = self.response_class("http://www.example.com", encoding='utf-8', body=u"\xa3")
-        r3 = self.response_class("http://www.example.com", headers={"Content-type": ["text/html; charset=iso-8859-1"]}, body="\xa3")
-        r4 = self.response_class("http://www.example.com", body="\xa2\xa3")
-        r5 = self.response_class("http://www.example.com", headers={"Content-type": ["text/html; charset=None"]}, body="\xc2\xa3")
-        r6 = self.response_class("http://www.example.com", headers={"Content-type": ["text/html; charset=gb2312"]}, body="\xa8D")
-        r7 = self.response_class("http://www.example.com", headers={"Content-type": ["text/html; charset=gbk"]}, body="\xa8D")
+        r3 = self.response_class("http://www.example.com", headers={"Content-type": ["text/html; charset=iso-8859-1"]}, body=b"\xa3")
+        r4 = self.response_class("http://www.example.com", body=b"\xa2\xa3")
+        r5 = self.response_class("http://www.example.com", headers={"Content-type": ["text/html; charset=None"]}, body=b"\xc2\xa3")
+        r6 = self.response_class("http://www.example.com", headers={"Content-type": ["text/html; charset=gb2312"]}, body=b"\xa8D")
+        r7 = self.response_class("http://www.example.com", headers={"Content-type": ["text/html; charset=gbk"]}, body=b"\xa8D")

        self.assertEqual(r1._headers_encoding(), "utf-8")
        self.assertEqual(r2._headers_encoding(), None)
@ -203,21 +201,21 @@ class TextResponseTest(BaseResponseTest):
        """Check that unknown declared encodings are ignored"""
        r = self.response_class("http://www.example.com",
                                headers={"Content-type": ["text/html; charset=UKNOWN"]},
-                                body="\xc2\xa3")
+                                body=b"\xc2\xa3")
        self.assertEqual(r._declared_encoding(), None)
        self._assert_response_values(r, 'utf-8', u"\xa3")

    def test_utf16(self):
        """Test utf-16 because UnicodeDammit is known to have problems with"""
        r = self.response_class("http://www.example.com",
-                                body='\xff\xfeh\x00i\x00',
+                                body=b'\xff\xfeh\x00i\x00',
                                encoding='utf-16')
        self._assert_response_values(r, 'utf-16', u"hi")

    def test_invalid_utf8_encoded_body_with_valid_utf8_BOM(self):
        r6 = self.response_class("http://www.example.com",
                                 headers={"Content-type": ["text/html; charset=utf-8"]},
-                                 body="\xef\xbb\xbfWORD\xe3\xab")
+                                 body=b"\xef\xbb\xbfWORD\xe3\xab")
        self.assertEqual(r6.encoding, 'utf-8')
        self.assertEqual(r6.body_as_unicode(), u'WORD\ufffd\ufffd')

@ -227,7 +225,7 @@ class TextResponseTest(BaseResponseTest):
        # response.body_as_unicode() in indistint order doesn't affect final
        # values for encoding and decoded body.
        url = 'http://example.com'
-        body = "\xef\xbb\xbfWORD"
+        body = b"\xef\xbb\xbfWORD"
        headers = {"Content-type": ["text/html; charset=utf-8"]}

        # Test response without content-type and BOM encoding
@ -250,7 +248,7 @@ class TextResponseTest(BaseResponseTest):

    def test_replace_wrong_encoding(self):
        """Test invalid chars are replaced properly"""
-        r = self.response_class("http://www.example.com", encoding='utf-8', body='PREFIX\xe3\xabSUFFIX')
+        r = self.response_class("http://www.example.com", encoding='utf-8', body=b'PREFIX\xe3\xabSUFFIX')
        # XXX: Policy for replacing invalid chars may suffer minor variations
        # but it should always contain the unicode replacement char (u'\ufffd')
        assert u'\ufffd' in r.body_as_unicode(), repr(r.body_as_unicode())
@ -259,7 +257,7 @@ class TextResponseTest(BaseResponseTest):

        # Do not destroy html tags due to encoding bugs
        r = self.response_class("http://example.com", encoding='utf-8', \
-                body='\xf0<span>value</span>')
+                body=b'\xf0<span>value</span>')
        assert u'<span>value</span>' in r.body_as_unicode(), repr(r.body_as_unicode())

        # FIXME: This test should pass once we stop using BeautifulSoup's UnicodeDammit in TextResponse
@ -267,7 +265,7 @@ class TextResponseTest(BaseResponseTest):
        #assert u'\ufffd' in r.body_as_unicode(), repr(r.body_as_unicode())

    def test_selector(self):
-        body = "<html><head><title>Some page</title><body></body></html>"
+        body = b"<html><head><title>Some page</title><body></body></html>"
        response = self.response_class("http://www.example.com", body=body)

        self.assertIsInstance(response.selector, Selector)
@ -289,7 +287,7 @@ class TextResponseTest(BaseResponseTest):
        )

    def test_selector_shortcuts(self):
-        body = "<html><head><title>Some page</title><body></body></html>"
+        body = b"<html><head><title>Some page</title><body></body></html>"
        response = self.response_class("http://www.example.com", body=body)

        self.assertEqual(
@ -303,17 +301,17 @@ class TextResponseTest(BaseResponseTest):

    def test_urljoin_with_base_url(self):
        """Test urljoin shortcut which also evaluates base-url through get_base_url()."""
-        body = '<html><body><base href="https://example.net"></body></html>'
+        body = b'<html><body><base href="https://example.net"></body></html>'
        joined = self.response_class('http://www.example.com', body=body).urljoin('/test')
        absolute = 'https://example.net/test'
        self.assertEqual(joined, absolute)

-        body = '<html><body><base href="/elsewhere"></body></html>'
+        body = b'<html><body><base href="/elsewhere"></body></html>'
        joined = self.response_class('http://www.example.com', body=body).urljoin('test')
        absolute = 'http://www.example.com/test'
        self.assertEqual(joined, absolute)

-        body = '<html><body><base href="/elsewhere/"></body></html>'
+        body = b'<html><body><base href="/elsewhere/"></body></html>'
        joined = self.response_class('http://www.example.com', body=body).urljoin('test')
        absolute = 'http://www.example.com/elsewhere/test'
        self.assertEqual(joined, absolute)
@ -325,13 +323,13 @@ class HtmlResponseTest(TextResponseTest):

    def test_html_encoding(self):

-        body = """<html><head><title>Some page</title><meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+        body = b"""<html><head><title>Some page</title><meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
        </head><body>Price: \xa3100</body></html>'
        """
        r1 = self.response_class("http://www.example.com", body=body)
        self._assert_response_values(r1, 'iso-8859-1', body)

-        body = """<?xml version="1.0" encoding="iso-8859-1"?>
+        body = b"""<?xml version="1.0" encoding="iso-8859-1"?>
        <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
        Price: \xa3100
        """
@ -339,19 +337,19 @@ class HtmlResponseTest(TextResponseTest):
        self._assert_response_values(r2, 'iso-8859-1', body)

        # for conflicting declarations headers must take precedence
-        body = """<html><head><title>Some page</title><meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+        body = b"""<html><head><title>Some page</title><meta http-equiv="Content-Type" content="text/html; charset=utf-8">
        </head><body>Price: \xa3100</body></html>'
        """
        r3 = self.response_class("http://www.example.com", headers={"Content-type": ["text/html; charset=iso-8859-1"]}, body=body)
        self._assert_response_values(r3, 'iso-8859-1', body)

        # make sure replace() preserves the encoding of the original response
-        body = "New body \xa3"
+        body = b"New body \xa3"
        r4 = r3.replace(body=body)
        self._assert_response_values(r4, 'iso-8859-1', body)

    def test_html5_meta_charset(self):
-        body = """<html><head><meta charset="gb2312" /><title>Some page</title><body>bla bla</body>"""
+        body = b"""<html><head><meta charset="gb2312" /><title>Some page</title><body>bla bla</body>"""
        r1 = self.response_class("http://www.example.com", body=body)
        self._assert_response_values(r1, 'gb2312', body)

@ -361,26 +359,25 @@ class XmlResponseTest(TextResponseTest):
    response_class = XmlResponse

    def test_xml_encoding(self):
-
-        body = "<xml></xml>"
+        body = b"<xml></xml>"
        r1 = self.response_class("http://www.example.com", body=body)
        self._assert_response_values(r1, self.response_class._DEFAULT_ENCODING, body)

-        body = """<?xml version="1.0" encoding="iso-8859-1"?><xml></xml>"""
+        body = b"""<?xml version="1.0" encoding="iso-8859-1"?><xml></xml>"""
        r2 = self.response_class("http://www.example.com", body=body)
        self._assert_response_values(r2, 'iso-8859-1', body)

        # make sure replace() preserves the explicit encoding passed in the constructor
-        body = """<?xml version="1.0" encoding="iso-8859-1"?><xml></xml>"""
+        body = b"""<?xml version="1.0" encoding="iso-8859-1"?><xml></xml>"""
        r3 = self.response_class("http://www.example.com", body=body, encoding='utf-8')
-        body2 = "New body"
+        body2 = b"New body"
        r4 = r3.replace(body=body2)
        self._assert_response_values(r4, 'utf-8', body2)

    def test_replace_encoding(self):
        # make sure replace() keeps the previous encoding unless overridden explicitly
-        body = """<?xml version="1.0" encoding="iso-8859-1"?><xml></xml>"""
-        body2 = """<?xml version="1.0" encoding="utf-8"?><xml></xml>"""
+        body = b"""<?xml version="1.0" encoding="iso-8859-1"?><xml></xml>"""
+        body2 = b"""<?xml version="1.0" encoding="utf-8"?><xml></xml>"""
        r5 = self.response_class("http://www.example.com", body=body)
        r6 = r5.replace(body=body2)
        r7 = r5.replace(body=body2, encoding='utf-8')
@ -389,7 +386,7 @@ class XmlResponseTest(TextResponseTest):
        self._assert_response_values(r7, 'utf-8', body2)

    def test_selector(self):
-        body = '<?xml version="1.0" encoding="utf-8"?><xml><elem>value</elem></xml>'
+        body = b'<?xml version="1.0" encoding="utf-8"?><xml><elem>value</elem></xml>'
        response = self.response_class("http://www.example.com", body=body)

        self.assertIsInstance(response.selector, Selector)
@ -403,15 +400,10 @@ class XmlResponseTest(TextResponseTest):
        )

    def test_selector_shortcuts(self):
-        body = '<?xml version="1.0" encoding="utf-8"?><xml><elem>value</elem></xml>'
+        body = b'<?xml version="1.0" encoding="utf-8"?><xml><elem>value</elem></xml>'
        response = self.response_class("http://www.example.com", body=body)

        self.assertEqual(
            response.xpath("//elem/text()").extract(),
            response.selector.xpath("//elem/text()").extract(),
        )
-
-
-
-if __name__ == "__main__":
-    unittest.main()
--- a/tests/test_middleware.py
+++ b/tests/test_middleware.py
@ -3,6 +3,7 @@ from twisted.trial import unittest
 from scrapy.settings import Settings
 from scrapy.exceptions import NotConfigured
 from scrapy.middleware import MiddlewareManager
+import six

 class M1(object):

@ -65,12 +66,20 @@ class MiddlewareManagerTest(unittest.TestCase):

    def test_methods(self):
        mwman = TestMiddlewareManager(M1(), M2(), M3())
-        self.assertEqual([x.im_class for x in mwman.methods['open_spider']],
-            [M1, M2])
-        self.assertEqual([x.im_class for x in mwman.methods['close_spider']],
-            [M2, M1])
-        self.assertEqual([x.im_class for x in mwman.methods['process']],
-            [M1, M3])
+        if six.PY2:
+            self.assertEqual([x.im_class for x in mwman.methods['open_spider']],
+                [M1, M2])
+            self.assertEqual([x.im_class for x in mwman.methods['close_spider']],
+                [M2, M1])
+            self.assertEqual([x.im_class for x in mwman.methods['process']],
+                [M1, M3])
+        else:
+            self.assertEqual([x.__self__.__class__ for x in mwman.methods['open_spider']],
+                [M1, M2])
+            self.assertEqual([x.__self__.__class__ for x in mwman.methods['close_spider']],
+                [M2, M1])
+            self.assertEqual([x.__self__.__class__ for x in mwman.methods['process']],
+                [M1, M3])

    def test_enabled(self):
        m1, m2, m3 = M1(), M2(), M3()
--- a/tests/test_pipeline_files.py
+++ b/tests/test_pipeline_files.py
@ -192,7 +192,7 @@ def _create_item_with_files(*files):
 def _prepare_request_object(item_url):
    return Request(
        item_url,
-        meta={'response': Response(item_url, status=200, body='data')})
+        meta={'response': Response(item_url, status=200, body=b'data')})


 if __name__ == "__main__":
--- a/tests/test_pipeline_media.py
+++ b/tests/test_pipeline_media.py
@ -44,7 +44,7 @@ class BaseMediaPipelineTestCase(unittest.TestCase):

    def test_default_media_downloaded(self):
        request = Request('http://url')
-        response = Response('http://url', body='')
+        response = Response('http://url', body=b'')
        assert self.pipe.media_downloaded(response, request, self.info) is response

    def test_default_media_failed(self):
--- a/tests/test_responsetypes.py
+++ b/tests/test_responsetypes.py
@ -51,7 +51,7 @@ class ResponseTypesTest(unittest.TestCase):
        for source, cls in mappings:
            retcls = responsetypes.from_body(source)
            assert retcls is cls, "%s ==> %s != %s" % (source, retcls, cls)
-        
+
    def test_from_headers(self):
        mappings = [
            ({'Content-Type': ['text/html; charset=utf-8']}, HtmlResponse),
--- a/tests/test_utils_defer.py
+++ b/tests/test_utils_defer.py
@ -5,6 +5,8 @@ from twisted.python.failure import Failure
 from scrapy.utils.defer import mustbe_deferred, process_chain, \
    process_chain_both, process_parallel, iter_errback

+from six.moves import xrange
+

 class MustbeDeferredTest(unittest.TestCase):
    def test_success_function(self):
@ -86,7 +88,7 @@ class IterErrbackTest(unittest.TestCase):

        errors = []
        out = list(iter_errback(itergood(), errors.append))
-        self.assertEqual(out, range(10))
+        self.assertEqual(out, list(range(10)))
        self.failIf(errors)

    def test_iter_errback_bad(self):
--- a/tests/test_utils_request.py
+++ b/tests/test_utils_request.py
@ -21,15 +21,15 @@ class UtilsRequestTest(unittest.TestCase):

        r1 = Request("http://www.example.com/members/offers.html")
        r2 = Request("http://www.example.com/members/offers.html")
-        r2.headers['SESSIONID'] = "somehash"
+        r2.headers['SESSIONID'] = b"somehash"
        self.assertEqual(request_fingerprint(r1), request_fingerprint(r2))

        r1 = Request("http://www.example.com/")
        r2 = Request("http://www.example.com/")
-        r2.headers['Accept-Language'] = 'en'
+        r2.headers['Accept-Language'] = b'en'
        r3 = Request("http://www.example.com/")
-        r3.headers['Accept-Language'] = 'en'
-        r3.headers['SESSIONID'] = "somehash"
+        r3.headers['Accept-Language'] = b'en'
+        r3.headers['SESSIONID'] = b"somehash"

        self.assertEqual(request_fingerprint(r1), request_fingerprint(r2), request_fingerprint(r3))

@ -44,7 +44,7 @@ class UtilsRequestTest(unittest.TestCase):

        r1 = Request("http://www.example.com")
        r2 = Request("http://www.example.com", method='POST')
-        r3 = Request("http://www.example.com", method='POST', body='request body')
+        r3 = Request("http://www.example.com", method='POST', body=b'request body')

        self.assertNotEqual(request_fingerprint(r1), request_fingerprint(r2))
        self.assertNotEqual(request_fingerprint(r2), request_fingerprint(r3))
@ -52,24 +52,24 @@ class UtilsRequestTest(unittest.TestCase):
        # cached fingerprint must be cleared on request copy
        r1 = Request("http://www.example.com")
        fp1 = request_fingerprint(r1)
-        r2 = r1.replace(url = "http://www.example.com/other")
+        r2 = r1.replace(url="http://www.example.com/other")
        fp2 = request_fingerprint(r2)
        self.assertNotEqual(fp1, fp2)

    def test_request_authenticate(self):
        r = Request("http://www.example.com")
        request_authenticate(r, 'someuser', 'somepass')
-        self.assertEqual(r.headers['Authorization'], 'Basic c29tZXVzZXI6c29tZXBhc3M=')
+        self.assertEqual(r.headers['Authorization'], b'Basic c29tZXVzZXI6c29tZXBhc3M=')

    def test_request_httprepr(self):
        r1 = Request("http://www.example.com")
-        self.assertEqual(request_httprepr(r1), 'GET / HTTP/1.1\r\nHost: www.example.com\r\n\r\n')
+        self.assertEqual(request_httprepr(r1), b'GET / HTTP/1.1\r\nHost: www.example.com\r\n\r\n')

        r1 = Request("http://www.example.com/some/page.html?arg=1")
-        self.assertEqual(request_httprepr(r1), 'GET /some/page.html?arg=1 HTTP/1.1\r\nHost: www.example.com\r\n\r\n')
+        self.assertEqual(request_httprepr(r1), b'GET /some/page.html?arg=1 HTTP/1.1\r\nHost: www.example.com\r\n\r\n')

-        r1 = Request("http://www.example.com", method='POST', headers={"Content-type": "text/html"}, body="Some body")
-        self.assertEqual(request_httprepr(r1), 'POST / HTTP/1.1\r\nHost: www.example.com\r\nContent-Type: text/html\r\n\r\nSome body')
+        r1 = Request("http://www.example.com", method='POST', headers={"Content-type": b"text/html"}, body=b"Some body")
+        self.assertEqual(request_httprepr(r1), b'POST / HTTP/1.1\r\nHost: www.example.com\r\nContent-Type: text/html\r\n\r\nSome body')

 if __name__ == "__main__":
    unittest.main()
--- a/tests/test_utils_signal.py
+++ b/tests/test_utils_signal.py
@ -2,8 +2,8 @@ from testfixtures import LogCapture
 from twisted.trial import unittest
 from twisted.python.failure import Failure
 from twisted.internet import defer, reactor
+from pydispatch import dispatcher

-from scrapy.xlib.pydispatch import dispatcher
 from scrapy.utils.signal import send_catch_log, send_catch_log_deferred


--- a/tests/test_utils_url.py
+++ b/tests/test_utils_url.py
@ -1,7 +1,10 @@
+# -*- coding: utf-8 -*-
 import unittest

+import six
 from scrapy.spiders import Spider
-from scrapy.utils.url import url_is_from_any_domain, url_is_from_spider, canonicalize_url
+from scrapy.utils.url import (url_is_from_any_domain, url_is_from_spider,
+                              canonicalize_url)

 __doctests__ = ['scrapy.utils.url']

@ -70,18 +73,23 @@ class UrlUtilsTest(unittest.TestCase):
        self.assertTrue(url_is_from_spider('http://www.example.net/some/page.html', MySpider))
        self.assertFalse(url_is_from_spider('http://www.example.us/some/page.html', MySpider))

+
+class CanonicalizeUrlTest(unittest.TestCase):
+
    def test_canonicalize_url(self):
        # simplest case
        self.assertEqual(canonicalize_url("http://www.example.com/"),
                                          "http://www.example.com/")

-        # always return a str
+    def test_return_str(self):
        assert isinstance(canonicalize_url(u"http://www.example.com"), str)
+        assert isinstance(canonicalize_url(b"http://www.example.com"), str)

-        # append missing path
+    def test_append_missing_path(self):
        self.assertEqual(canonicalize_url("http://www.example.com"),
                                          "http://www.example.com/")
-        # typical usage
+
+    def test_typical_usage(self):
        self.assertEqual(canonicalize_url("http://www.example.com/do?a=1&b=2&c=3"),
                                          "http://www.example.com/do?a=1&b=2&c=3")
        self.assertEqual(canonicalize_url("http://www.example.com/do?c=1&b=2&a=3"),
@ -89,11 +97,11 @@ class UrlUtilsTest(unittest.TestCase):
        self.assertEqual(canonicalize_url("http://www.example.com/do?&a=1"),
                                          "http://www.example.com/do?a=1")

-        # sorting by argument values
+    def test_sorting(self):
        self.assertEqual(canonicalize_url("http://www.example.com/do?c=3&b=5&b=2&a=50"),
                                          "http://www.example.com/do?a=50&b=2&b=5&c=3")

-        # using keep_blank_values
+    def test_keep_blank_values(self):
        self.assertEqual(canonicalize_url("http://www.example.com/do?b=&a=2", keep_blank_values=False),
                                          "http://www.example.com/do?a=2")
        self.assertEqual(canonicalize_url("http://www.example.com/do?b=&a=2"),
@ -106,7 +114,7 @@ class UrlUtilsTest(unittest.TestCase):
        self.assertEqual(canonicalize_url(u'http://www.example.com/do?1750,4'),
                                           'http://www.example.com/do?1750%2C4=')

-        # spaces
+    def test_spaces(self):
        self.assertEqual(canonicalize_url("http://www.example.com/do?q=a space&a=1"),
                                          "http://www.example.com/do?a=1&q=a+space")
        self.assertEqual(canonicalize_url("http://www.example.com/do?q=a+space&a=1"),
@ -114,43 +122,52 @@ class UrlUtilsTest(unittest.TestCase):
        self.assertEqual(canonicalize_url("http://www.example.com/do?q=a%20space&a=1"),
                                          "http://www.example.com/do?a=1&q=a+space")

-        # normalize percent-encoding case (in paths)
+    @unittest.skipUnless(six.PY2, "TODO")
+    def test_normalize_percent_encoding_in_paths(self):
        self.assertEqual(canonicalize_url("http://www.example.com/a%a3do"),
                                          "http://www.example.com/a%A3do"),
-        # normalize percent-encoding case (in query arguments)
+
+    @unittest.skipUnless(six.PY2, "TODO")
+    def test_normalize_percent_encoding_in_query_arguments(self):
        self.assertEqual(canonicalize_url("http://www.example.com/do?k=b%a3"),
                                          "http://www.example.com/do?k=b%A3")

-        # non-ASCII percent-encoding in paths
+    def test_non_ascii_percent_encoding_in_paths(self):
        self.assertEqual(canonicalize_url("http://www.example.com/a do?a=1"),
                                          "http://www.example.com/a%20do?a=1"),
        self.assertEqual(canonicalize_url("http://www.example.com/a %20do?a=1"),
                                          "http://www.example.com/a%20%20do?a=1"),
-        self.assertEqual(canonicalize_url("http://www.example.com/a do\xc2\xa3.html?a=1"),
+        self.assertEqual(canonicalize_url(u"http://www.example.com/a do£.html?a=1"),
                                          "http://www.example.com/a%20do%C2%A3.html?a=1")
-        # non-ASCII percent-encoding in query arguments
+        self.assertEqual(canonicalize_url(b"http://www.example.com/a do\xc2\xa3.html?a=1"),
+                                          "http://www.example.com/a%20do%C2%A3.html?a=1")
+
+    def test_non_ascii_percent_encoding_in_query_arguments(self):
        self.assertEqual(canonicalize_url(u"http://www.example.com/do?price=\xa3500&a=5&z=3"),
                                          u"http://www.example.com/do?a=5&price=%C2%A3500&z=3")
-        self.assertEqual(canonicalize_url("http://www.example.com/do?price=\xc2\xa3500&a=5&z=3"),
+        self.assertEqual(canonicalize_url(b"http://www.example.com/do?price=\xc2\xa3500&a=5&z=3"),
                                          "http://www.example.com/do?a=5&price=%C2%A3500&z=3")
-        self.assertEqual(canonicalize_url("http://www.example.com/do?price(\xc2\xa3)=500&a=1"),
+        self.assertEqual(canonicalize_url(b"http://www.example.com/do?price(\xc2\xa3)=500&a=1"),
                                          "http://www.example.com/do?a=1&price%28%C2%A3%29=500")

-        # urls containing auth and ports
+    def test_urls_with_auth_and_ports(self):
        self.assertEqual(canonicalize_url(u"http://user:pass@www.example.com:81/do?now=1"),
                                          u"http://user:pass@www.example.com:81/do?now=1")

-        # remove fragments
+    def test_remove_fragments(self):
        self.assertEqual(canonicalize_url(u"http://user:pass@www.example.com/do?a=1#frag"),
                                          u"http://user:pass@www.example.com/do?a=1")
        self.assertEqual(canonicalize_url(u"http://user:pass@www.example.com/do?a=1#frag", keep_fragments=True),
                                          u"http://user:pass@www.example.com/do?a=1#frag")

+    def test_dont_convert_safe_characters(self):
        # dont convert safe characters to percent encoding representation
        self.assertEqual(canonicalize_url(
            "http://www.simplybedrooms.com/White-Bedroom-Furniture/Bedroom-Mirror:-Josephine-Cheval-Mirror.html"),
            "http://www.simplybedrooms.com/White-Bedroom-Furniture/Bedroom-Mirror:-Josephine-Cheval-Mirror.html")

+    @unittest.skipUnless(six.PY2, "TODO")
+    def test_safe_characters_unicode(self):
        # urllib.quote uses a mapping cache of encoded characters. when parsing
        # an already percent-encoded url, it will fail if that url was not
        # percent-encoded as utf-8, that's why canonicalize_url must always
@ -159,11 +176,11 @@ class UrlUtilsTest(unittest.TestCase):
        self.assertEqual(canonicalize_url(u'http://www.example.com/caf%E9-con-leche.htm'),
                                           'http://www.example.com/caf%E9-con-leche.htm')

-        # domains are case insensitive
+    def test_domains_are_case_insensitive(self):
        self.assertEqual(canonicalize_url("http://www.EXAMPLE.com/"),
                                          "http://www.example.com/")

-        # quoted slash and question sign
+    def test_quoted_slash_and_question_sign(self):
        self.assertEqual(canonicalize_url("http://foo.com/AC%2FDC+rocks%3f/?yeah=1"),
                         "http://foo.com/AC%2FDC+rocks%3F/?yeah=1")
        self.assertEqual(canonicalize_url("http://foo.com/AC%2FDC/"),