mirror of
https://github.com/scrapy/scrapy.git
synced 2025-03-14 02:39:27 +00:00
Remove deprecated scrapy.utils.gz.is_gzipped function
This commit is contained in:
parent
4075e1eadd
commit
51ca4d0138
@ -1,7 +1,6 @@
|
||||
import struct
|
||||
from gzip import GzipFile
|
||||
from io import BytesIO
|
||||
import re
|
||||
import struct
|
||||
|
||||
from scrapy.utils.decorators import deprecated
|
||||
|
||||
@ -42,17 +41,5 @@ def gunzip(data):
|
||||
return b''.join(output_list)
|
||||
|
||||
|
||||
_is_gzipped = re.compile(br'^application/(x-)?gzip\b', re.I).search
|
||||
_is_octetstream = re.compile(br'^(application|binary)/octet-stream\b', re.I).search
|
||||
|
||||
|
||||
@deprecated
|
||||
def is_gzipped(response):
|
||||
"""Return True if the response is gzipped, or False otherwise"""
|
||||
ctype = response.headers.get('Content-Type', b'')
|
||||
cenc = response.headers.get('Content-Encoding', b'').lower()
|
||||
return _is_gzipped(ctype) or _is_octetstream(ctype) and cenc in (b'gzip', b'x-gzip')
|
||||
|
||||
|
||||
def gzip_magic_number(response):
|
||||
return response.body[:3] == b'\x1f\x8b\x08'
|
||||
|
@ -3,10 +3,11 @@ from os.path import join
|
||||
|
||||
from w3lib.encoding import html_to_unicode
|
||||
|
||||
from scrapy.utils.gz import gunzip, is_gzipped
|
||||
from scrapy.http import Response, Headers
|
||||
from scrapy.utils.gz import gunzip, gzip_magic_number
|
||||
from scrapy.http import Response
|
||||
from tests import tests_datadir
|
||||
|
||||
|
||||
SAMPLEDIR = join(tests_datadir, 'compressed')
|
||||
|
||||
|
||||
@ -14,8 +15,12 @@ class GunzipTest(unittest.TestCase):
|
||||
|
||||
def test_gunzip_basic(self):
|
||||
with open(join(SAMPLEDIR, 'feed-sample1.xml.gz'), 'rb') as f:
|
||||
text = gunzip(f.read())
|
||||
self.assertEqual(len(text), 9950)
|
||||
r1 = Response("http://www.example.com", body=f.read())
|
||||
self.assertTrue(gzip_magic_number(r1))
|
||||
|
||||
r2 = Response("http://www.example.com", body=gunzip(r1.body))
|
||||
self.assertFalse(gzip_magic_number(r2))
|
||||
self.assertEqual(len(r2.body), 9950)
|
||||
|
||||
def test_gunzip_truncated(self):
|
||||
with open(join(SAMPLEDIR, 'truncated-crc-error.gz'), 'rb') as f:
|
||||
@ -28,46 +33,16 @@ class GunzipTest(unittest.TestCase):
|
||||
|
||||
def test_gunzip_truncated_short(self):
|
||||
with open(join(SAMPLEDIR, 'truncated-crc-error-short.gz'), 'rb') as f:
|
||||
text = gunzip(f.read())
|
||||
assert text.endswith(b'</html>')
|
||||
r1 = Response("http://www.example.com", body=f.read())
|
||||
self.assertTrue(gzip_magic_number(r1))
|
||||
|
||||
def test_is_x_gzipped_right(self):
|
||||
hdrs = Headers({"Content-Type": "application/x-gzip"})
|
||||
r1 = Response("http://www.example.com", headers=hdrs)
|
||||
self.assertTrue(is_gzipped(r1))
|
||||
|
||||
def test_is_gzipped_right(self):
|
||||
hdrs = Headers({"Content-Type": "application/gzip"})
|
||||
r1 = Response("http://www.example.com", headers=hdrs)
|
||||
self.assertTrue(is_gzipped(r1))
|
||||
|
||||
def test_is_gzipped_not_quite(self):
|
||||
hdrs = Headers({"Content-Type": "application/gzippppp"})
|
||||
r1 = Response("http://www.example.com", headers=hdrs)
|
||||
self.assertFalse(is_gzipped(r1))
|
||||
|
||||
def test_is_gzipped_case_insensitive(self):
|
||||
hdrs = Headers({"Content-Type": "Application/X-Gzip"})
|
||||
r1 = Response("http://www.example.com", headers=hdrs)
|
||||
self.assertTrue(is_gzipped(r1))
|
||||
|
||||
hdrs = Headers({"Content-Type": "application/X-GZIP ; charset=utf-8"})
|
||||
r1 = Response("http://www.example.com", headers=hdrs)
|
||||
self.assertTrue(is_gzipped(r1))
|
||||
r2 = Response("http://www.example.com", body=gunzip(r1.body))
|
||||
assert r2.body.endswith(b'</html>')
|
||||
self.assertFalse(gzip_magic_number(r2))
|
||||
|
||||
def test_is_gzipped_empty(self):
|
||||
r1 = Response("http://www.example.com")
|
||||
self.assertFalse(is_gzipped(r1))
|
||||
|
||||
def test_is_gzipped_wrong(self):
|
||||
hdrs = Headers({"Content-Type": "application/javascript"})
|
||||
r1 = Response("http://www.example.com", headers=hdrs)
|
||||
self.assertFalse(is_gzipped(r1))
|
||||
|
||||
def test_is_gzipped_with_charset(self):
|
||||
hdrs = Headers({"Content-Type": "application/x-gzip;charset=utf-8"})
|
||||
r1 = Response("http://www.example.com", headers=hdrs)
|
||||
self.assertTrue(is_gzipped(r1))
|
||||
self.assertFalse(gzip_magic_number(r1))
|
||||
|
||||
def test_gunzip_illegal_eof(self):
|
||||
with open(join(SAMPLEDIR, 'unexpected-eof.gz'), 'rb') as f:
|
||||
|
Loading…
x
Reference in New Issue
Block a user