1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-26 07:44:38 +00:00

headers: cleanup Headers class and add normvalue method to CaselessDict

--HG--
extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40912
This commit is contained in:
Daniel Grana 2009-02-24 06:58:12 +00:00
parent f4b55bfd20
commit 934535740b
5 changed files with 119 additions and 82 deletions

View File

@ -10,10 +10,10 @@ from pydispatch import dispatcher
from scrapy.core import signals
from scrapy import log
from scrapy.http import Response, Headers
from scrapy.http.headers import headers_dict_to_raw
from scrapy.core.exceptions import NotConfigured, HttpException, IgnoreRequest
from scrapy.core.downloader.responsetypes import responsetypes
from scrapy.utils.request import request_fingerprint
from scrapy.utils.http import headers_dict_to_raw, headers_raw_to_dict
from scrapy.conf import settings
class CacheMiddleware(object):
@ -149,7 +149,7 @@ class Cache(object):
responseheaders = f.read()
url = metadata['url']
headers = Headers(responseheaders)
headers = Headers(headers_raw_to_dict(responseheaders))
status = metadata['status']
respcls = responsetypes.from_args(headers=headers, url=url)

View File

@ -1,90 +1,23 @@
from scrapy.utils.datatypes import CaselessDict
def headers_raw_to_dict(headers_raw):
"""
Convert raw headers (single multi-line string)
to the dictionary.
For example:
>>> headers_raw_to_dict("Content-type: text/html\\n\\rAccept: gzip\\n\\n")
{'Content-type': ['text/html'], 'Accept': ['gzip']}
Incorrect input:
>>> headers_raw_to_dict("Content-typt gzip\\n\\n")
{}
Argument is None:
>>> headers_raw_to_dict(None)
"""
if headers_raw is None:
return None
return dict([
(header_item[0].strip(), [header_item[1].strip()])
for header_item
in [
header.split(':', 1)
for header
in headers_raw.splitlines()]
if len(header_item) == 2])
def headers_dict_to_raw(headers_dict):
"""
Returns a raw HTTP headers representation of headers
For example:
>>> headers_dict_to_raw({'Content-type': 'text/html', 'Accept': 'gzip'})
'Content-type: text/html\\r\\nAccept: gzip'
>>> from twisted.python.util import InsensitiveDict
>>> td = InsensitiveDict({'Content-type': ['text/html'], 'Accept': ['gzip']})
>>> headers_dict_to_raw(td)
'Content-type: text/html\\r\\nAccept: gzip'
Argument is None:
>>> headers_dict_to_raw(None)
"""
if headers_dict is None:
return None
raw_lines = []
for key, value in headers_dict.items():
if isinstance(value, (str, unicode)):
raw_lines.append("%s: %s" % (key, value))
elif isinstance(value, (list, tuple)):
for v in value:
raw_lines.append("%s: %s" % (key, v))
return '\r\n'.join(raw_lines)
from scrapy.utils.http import headers_dict_to_raw
class Headers(CaselessDict):
def __init__(self, dictorstr=None, fromdict=None, fromstr=None, encoding='utf-8'):
def __init__(self, seq=None, encoding='utf-8'):
self.encoding = encoding
if dictorstr is not None:
if isinstance(dictorstr, dict):
d = dictorstr
elif isinstance(dictorstr, basestring):
d = headers_raw_to_dict(dictorstr)
elif fromdict is not None:
d = fromdict
elif fromstr is not None:
d = headers_raw_to_dict(fromstr)
else:
d = {}
# can't use CaselessDict.__init__(self, d) because it doesn't call __setitem__
for k,v in d.iteritems():
self.__setitem__(k.lower(), v)
super(Headers, self).__init__(seq)
def normkey(self, key):
return key.title()
def __setitem__(self, key, value):
"""Headers must not be unicode"""
if isinstance(key, unicode):
key = key.encode(self.encoding)
return key.title()
def normvalue(self, value):
"""Headers must not be unicode"""
if isinstance(value, unicode):
value = value.encode(self.encoding)
super(Headers, self).__setitem__(key, value)
return value
def to_string(self):
return headers_dict_to_raw(self)

View File

@ -81,3 +81,41 @@ class CaselessDictTest(unittest.TestCase):
self.assertEqual(d.pop('A'), 1)
self.assertRaises(KeyError, d.pop, 'A')
def test_normkey(self):
class MyDict(CaselessDict):
def normkey(self, key):
return key.title()
d = MyDict()
d['key-one'] = 2
self.assertEqual(list(d.keys()), ['Key-One'])
def test_normvalue(self):
class MyDict(CaselessDict):
def normvalue(self, value):
if value is not None:
return value + 1
d = MyDict({'key': 1})
self.assertEqual(d['key'], 2)
self.assertEqual(d.get('key'), 2)
d = MyDict()
d['key'] = 1
self.assertEqual(d['key'], 2)
self.assertEqual(d.get('key'), 2)
d = MyDict()
d.setdefault('key', 1)
self.assertEqual(d['key'], 2)
self.assertEqual(d.get('key'), 2)
d = MyDict()
d.update({'key': 1})
self.assertEqual(d['key'], 2)
self.assertEqual(d.get('key'), 2)
d = MyDict.fromkeys(('key',), 1)
self.assertEqual(d['key'], 2)
self.assertEqual(d.get('key'), 2)

View File

@ -183,6 +183,7 @@ class SiteNode(object):
s += node.to_string(level+1)
return s
class CaselessDict(dict):
def __init__(self, seq=None):
dict.__init__(self)
@ -193,7 +194,7 @@ class CaselessDict(dict):
return dict.__getitem__(self, self.normkey(key))
def __setitem__(self, key, value):
dict.__setitem__(self, self.normkey(key), value)
dict.__setitem__(self, self.normkey(key), self.normvalue(value))
def __delitem__(self, key):
dict.__delitem__(self, self.normkey(key))
@ -203,17 +204,23 @@ class CaselessDict(dict):
has_key = __contains__
def normkey(self, key):
"""Method to normalize dictionary key access"""
return key.lower()
def normvalue(self, value):
"""Method to normalize values prior to be setted"""
return value
def get(self, key, def_val=None):
return dict.get(self, self.normkey(key), def_val)
return dict.get(self, self.normkey(key), self.normvalue(def_val))
def setdefault(self, key, def_val=None):
return dict.setdefault(self, self.normkey(key), def_val)
return dict.setdefault(self, self.normkey(key), self.normvalue(def_val))
def update(self, seq):
items = seq.iteritems() if isinstance(seq, dict) else seq
dict.update(self, ((self.normkey(k), v) for k, v in items))
seq = seq.iteritems() if isinstance(seq, dict) else seq
iseq = ((self.normkey(k), self.normvalue(v)) for k, v in seq)
super(CaselessDict, self).update(iseq)
@classmethod
def fromkeys(cls, keys, value=None):
@ -222,6 +229,7 @@ class CaselessDict(dict):
def pop(self, key, *args):
return dict.pop(self, self.normkey(key), *args)
class PriorityQueue(object):
"""A simple priority queue"""

View File

@ -0,0 +1,58 @@
def headers_raw_to_dict(headers_raw):
"""
Convert raw headers (single multi-line string)
to the dictionary.
For example:
>>> headers_raw_to_dict("Content-type: text/html\\n\\rAccept: gzip\\n\\n")
{'Content-type': ['text/html'], 'Accept': ['gzip']}
Incorrect input:
>>> headers_raw_to_dict("Content-typt gzip\\n\\n")
{}
Argument is None:
>>> headers_raw_to_dict(None)
"""
if headers_raw is None:
return None
return dict([
(header_item[0].strip(), [header_item[1].strip()])
for header_item
in [
header.split(':', 1)
for header
in headers_raw.splitlines()]
if len(header_item) == 2])
def headers_dict_to_raw(headers_dict):
"""
Returns a raw HTTP headers representation of headers
For example:
>>> headers_dict_to_raw({'Content-type': 'text/html', 'Accept': 'gzip'})
'Content-type: text/html\\r\\nAccept: gzip'
>>> from twisted.python.util import InsensitiveDict
>>> td = InsensitiveDict({'Content-type': ['text/html'], 'Accept': ['gzip']})
>>> headers_dict_to_raw(td)
'Content-type: text/html\\r\\nAccept: gzip'
Argument is None:
>>> headers_dict_to_raw(None)
"""
if headers_dict is None:
return None
raw_lines = []
for key, value in headers_dict.items():
if isinstance(value, (str, unicode)):
raw_lines.append("%s: %s" % (key, value))
elif isinstance(value, (list, tuple)):
for v in value:
raw_lines.append("%s: %s" % (key, v))
return '\r\n'.join(raw_lines)