mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-26 07:44:38 +00:00
headers: cleanup Headers class and add normvalue method to CaselessDict
--HG-- extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40912
This commit is contained in:
parent
f4b55bfd20
commit
934535740b
@ -10,10 +10,10 @@ from pydispatch import dispatcher
|
||||
from scrapy.core import signals
|
||||
from scrapy import log
|
||||
from scrapy.http import Response, Headers
|
||||
from scrapy.http.headers import headers_dict_to_raw
|
||||
from scrapy.core.exceptions import NotConfigured, HttpException, IgnoreRequest
|
||||
from scrapy.core.downloader.responsetypes import responsetypes
|
||||
from scrapy.utils.request import request_fingerprint
|
||||
from scrapy.utils.http import headers_dict_to_raw, headers_raw_to_dict
|
||||
from scrapy.conf import settings
|
||||
|
||||
class CacheMiddleware(object):
|
||||
@ -149,7 +149,7 @@ class Cache(object):
|
||||
responseheaders = f.read()
|
||||
|
||||
url = metadata['url']
|
||||
headers = Headers(responseheaders)
|
||||
headers = Headers(headers_raw_to_dict(responseheaders))
|
||||
status = metadata['status']
|
||||
|
||||
respcls = responsetypes.from_args(headers=headers, url=url)
|
||||
|
@ -1,90 +1,23 @@
|
||||
from scrapy.utils.datatypes import CaselessDict
|
||||
|
||||
def headers_raw_to_dict(headers_raw):
|
||||
"""
|
||||
Convert raw headers (single multi-line string)
|
||||
to the dictionary.
|
||||
|
||||
For example:
|
||||
>>> headers_raw_to_dict("Content-type: text/html\\n\\rAccept: gzip\\n\\n")
|
||||
{'Content-type': ['text/html'], 'Accept': ['gzip']}
|
||||
|
||||
Incorrect input:
|
||||
>>> headers_raw_to_dict("Content-typt gzip\\n\\n")
|
||||
{}
|
||||
|
||||
Argument is None:
|
||||
>>> headers_raw_to_dict(None)
|
||||
"""
|
||||
if headers_raw is None:
|
||||
return None
|
||||
return dict([
|
||||
(header_item[0].strip(), [header_item[1].strip()])
|
||||
for header_item
|
||||
in [
|
||||
header.split(':', 1)
|
||||
for header
|
||||
in headers_raw.splitlines()]
|
||||
if len(header_item) == 2])
|
||||
|
||||
def headers_dict_to_raw(headers_dict):
|
||||
"""
|
||||
Returns a raw HTTP headers representation of headers
|
||||
|
||||
For example:
|
||||
>>> headers_dict_to_raw({'Content-type': 'text/html', 'Accept': 'gzip'})
|
||||
'Content-type: text/html\\r\\nAccept: gzip'
|
||||
>>> from twisted.python.util import InsensitiveDict
|
||||
>>> td = InsensitiveDict({'Content-type': ['text/html'], 'Accept': ['gzip']})
|
||||
>>> headers_dict_to_raw(td)
|
||||
'Content-type: text/html\\r\\nAccept: gzip'
|
||||
|
||||
Argument is None:
|
||||
>>> headers_dict_to_raw(None)
|
||||
|
||||
"""
|
||||
if headers_dict is None:
|
||||
return None
|
||||
raw_lines = []
|
||||
for key, value in headers_dict.items():
|
||||
if isinstance(value, (str, unicode)):
|
||||
raw_lines.append("%s: %s" % (key, value))
|
||||
elif isinstance(value, (list, tuple)):
|
||||
for v in value:
|
||||
raw_lines.append("%s: %s" % (key, v))
|
||||
return '\r\n'.join(raw_lines)
|
||||
from scrapy.utils.http import headers_dict_to_raw
|
||||
|
||||
|
||||
class Headers(CaselessDict):
|
||||
def __init__(self, dictorstr=None, fromdict=None, fromstr=None, encoding='utf-8'):
|
||||
def __init__(self, seq=None, encoding='utf-8'):
|
||||
self.encoding = encoding
|
||||
|
||||
if dictorstr is not None:
|
||||
if isinstance(dictorstr, dict):
|
||||
d = dictorstr
|
||||
elif isinstance(dictorstr, basestring):
|
||||
d = headers_raw_to_dict(dictorstr)
|
||||
elif fromdict is not None:
|
||||
d = fromdict
|
||||
elif fromstr is not None:
|
||||
d = headers_raw_to_dict(fromstr)
|
||||
else:
|
||||
d = {}
|
||||
|
||||
# can't use CaselessDict.__init__(self, d) because it doesn't call __setitem__
|
||||
for k,v in d.iteritems():
|
||||
self.__setitem__(k.lower(), v)
|
||||
super(Headers, self).__init__(seq)
|
||||
|
||||
def normkey(self, key):
|
||||
return key.title()
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
"""Headers must not be unicode"""
|
||||
if isinstance(key, unicode):
|
||||
key = key.encode(self.encoding)
|
||||
return key.title()
|
||||
|
||||
def normvalue(self, value):
|
||||
"""Headers must not be unicode"""
|
||||
if isinstance(value, unicode):
|
||||
value = value.encode(self.encoding)
|
||||
super(Headers, self).__setitem__(key, value)
|
||||
return value
|
||||
|
||||
def to_string(self):
|
||||
return headers_dict_to_raw(self)
|
||||
|
@ -81,3 +81,41 @@ class CaselessDictTest(unittest.TestCase):
|
||||
self.assertEqual(d.pop('A'), 1)
|
||||
self.assertRaises(KeyError, d.pop, 'A')
|
||||
|
||||
def test_normkey(self):
|
||||
class MyDict(CaselessDict):
|
||||
def normkey(self, key):
|
||||
return key.title()
|
||||
|
||||
d = MyDict()
|
||||
d['key-one'] = 2
|
||||
self.assertEqual(list(d.keys()), ['Key-One'])
|
||||
|
||||
def test_normvalue(self):
|
||||
class MyDict(CaselessDict):
|
||||
def normvalue(self, value):
|
||||
if value is not None:
|
||||
return value + 1
|
||||
|
||||
d = MyDict({'key': 1})
|
||||
self.assertEqual(d['key'], 2)
|
||||
self.assertEqual(d.get('key'), 2)
|
||||
|
||||
d = MyDict()
|
||||
d['key'] = 1
|
||||
self.assertEqual(d['key'], 2)
|
||||
self.assertEqual(d.get('key'), 2)
|
||||
|
||||
d = MyDict()
|
||||
d.setdefault('key', 1)
|
||||
self.assertEqual(d['key'], 2)
|
||||
self.assertEqual(d.get('key'), 2)
|
||||
|
||||
d = MyDict()
|
||||
d.update({'key': 1})
|
||||
self.assertEqual(d['key'], 2)
|
||||
self.assertEqual(d.get('key'), 2)
|
||||
|
||||
d = MyDict.fromkeys(('key',), 1)
|
||||
self.assertEqual(d['key'], 2)
|
||||
self.assertEqual(d.get('key'), 2)
|
||||
|
||||
|
@ -183,6 +183,7 @@ class SiteNode(object):
|
||||
s += node.to_string(level+1)
|
||||
return s
|
||||
|
||||
|
||||
class CaselessDict(dict):
|
||||
def __init__(self, seq=None):
|
||||
dict.__init__(self)
|
||||
@ -193,7 +194,7 @@ class CaselessDict(dict):
|
||||
return dict.__getitem__(self, self.normkey(key))
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
dict.__setitem__(self, self.normkey(key), value)
|
||||
dict.__setitem__(self, self.normkey(key), self.normvalue(value))
|
||||
|
||||
def __delitem__(self, key):
|
||||
dict.__delitem__(self, self.normkey(key))
|
||||
@ -203,17 +204,23 @@ class CaselessDict(dict):
|
||||
has_key = __contains__
|
||||
|
||||
def normkey(self, key):
|
||||
"""Method to normalize dictionary key access"""
|
||||
return key.lower()
|
||||
|
||||
def normvalue(self, value):
|
||||
"""Method to normalize values prior to be setted"""
|
||||
return value
|
||||
|
||||
def get(self, key, def_val=None):
|
||||
return dict.get(self, self.normkey(key), def_val)
|
||||
return dict.get(self, self.normkey(key), self.normvalue(def_val))
|
||||
|
||||
def setdefault(self, key, def_val=None):
|
||||
return dict.setdefault(self, self.normkey(key), def_val)
|
||||
return dict.setdefault(self, self.normkey(key), self.normvalue(def_val))
|
||||
|
||||
def update(self, seq):
|
||||
items = seq.iteritems() if isinstance(seq, dict) else seq
|
||||
dict.update(self, ((self.normkey(k), v) for k, v in items))
|
||||
seq = seq.iteritems() if isinstance(seq, dict) else seq
|
||||
iseq = ((self.normkey(k), self.normvalue(v)) for k, v in seq)
|
||||
super(CaselessDict, self).update(iseq)
|
||||
|
||||
@classmethod
|
||||
def fromkeys(cls, keys, value=None):
|
||||
@ -222,6 +229,7 @@ class CaselessDict(dict):
|
||||
def pop(self, key, *args):
|
||||
return dict.pop(self, self.normkey(key), *args)
|
||||
|
||||
|
||||
class PriorityQueue(object):
|
||||
"""A simple priority queue"""
|
||||
|
||||
|
58
scrapy/trunk/scrapy/utils/http.py
Normal file
58
scrapy/trunk/scrapy/utils/http.py
Normal file
@ -0,0 +1,58 @@
|
||||
|
||||
def headers_raw_to_dict(headers_raw):
|
||||
"""
|
||||
Convert raw headers (single multi-line string)
|
||||
to the dictionary.
|
||||
|
||||
For example:
|
||||
>>> headers_raw_to_dict("Content-type: text/html\\n\\rAccept: gzip\\n\\n")
|
||||
{'Content-type': ['text/html'], 'Accept': ['gzip']}
|
||||
|
||||
Incorrect input:
|
||||
>>> headers_raw_to_dict("Content-typt gzip\\n\\n")
|
||||
{}
|
||||
|
||||
Argument is None:
|
||||
>>> headers_raw_to_dict(None)
|
||||
"""
|
||||
if headers_raw is None:
|
||||
return None
|
||||
return dict([
|
||||
(header_item[0].strip(), [header_item[1].strip()])
|
||||
for header_item
|
||||
in [
|
||||
header.split(':', 1)
|
||||
for header
|
||||
in headers_raw.splitlines()]
|
||||
if len(header_item) == 2])
|
||||
|
||||
|
||||
def headers_dict_to_raw(headers_dict):
|
||||
"""
|
||||
Returns a raw HTTP headers representation of headers
|
||||
|
||||
For example:
|
||||
>>> headers_dict_to_raw({'Content-type': 'text/html', 'Accept': 'gzip'})
|
||||
'Content-type: text/html\\r\\nAccept: gzip'
|
||||
>>> from twisted.python.util import InsensitiveDict
|
||||
>>> td = InsensitiveDict({'Content-type': ['text/html'], 'Accept': ['gzip']})
|
||||
>>> headers_dict_to_raw(td)
|
||||
'Content-type: text/html\\r\\nAccept: gzip'
|
||||
|
||||
Argument is None:
|
||||
>>> headers_dict_to_raw(None)
|
||||
|
||||
"""
|
||||
if headers_dict is None:
|
||||
return None
|
||||
raw_lines = []
|
||||
for key, value in headers_dict.items():
|
||||
if isinstance(value, (str, unicode)):
|
||||
raw_lines.append("%s: %s" % (key, value))
|
||||
elif isinstance(value, (list, tuple)):
|
||||
for v in value:
|
||||
raw_lines.append("%s: %s" % (key, v))
|
||||
return '\r\n'.join(raw_lines)
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user