1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-03-14 02:39:27 +00:00

Remove deprecated scrapy.utils.gz.is_gzipped function

This commit is contained in:
Eugenio Lacuesta 2020-11-23 15:47:08 -03:00
parent 4075e1eadd
commit 51ca4d0138
No known key found for this signature in database
GPG Key ID: DA3EF2D0913E9810
2 changed files with 16 additions and 54 deletions

View File

@ -1,7 +1,6 @@
import struct
from gzip import GzipFile
from io import BytesIO
import re
import struct
from scrapy.utils.decorators import deprecated
@ -42,17 +41,5 @@ def gunzip(data):
return b''.join(output_list)
_is_gzipped = re.compile(br'^application/(x-)?gzip\b', re.I).search
_is_octetstream = re.compile(br'^(application|binary)/octet-stream\b', re.I).search
@deprecated
def is_gzipped(response):
"""Return True if the response is gzipped, or False otherwise"""
ctype = response.headers.get('Content-Type', b'')
cenc = response.headers.get('Content-Encoding', b'').lower()
return _is_gzipped(ctype) or _is_octetstream(ctype) and cenc in (b'gzip', b'x-gzip')
def gzip_magic_number(response):
return response.body[:3] == b'\x1f\x8b\x08'

View File

@ -3,10 +3,11 @@ from os.path import join
from w3lib.encoding import html_to_unicode
from scrapy.utils.gz import gunzip, is_gzipped
from scrapy.http import Response, Headers
from scrapy.utils.gz import gunzip, gzip_magic_number
from scrapy.http import Response
from tests import tests_datadir
SAMPLEDIR = join(tests_datadir, 'compressed')
@ -14,8 +15,12 @@ class GunzipTest(unittest.TestCase):
def test_gunzip_basic(self):
with open(join(SAMPLEDIR, 'feed-sample1.xml.gz'), 'rb') as f:
text = gunzip(f.read())
self.assertEqual(len(text), 9950)
r1 = Response("http://www.example.com", body=f.read())
self.assertTrue(gzip_magic_number(r1))
r2 = Response("http://www.example.com", body=gunzip(r1.body))
self.assertFalse(gzip_magic_number(r2))
self.assertEqual(len(r2.body), 9950)
def test_gunzip_truncated(self):
with open(join(SAMPLEDIR, 'truncated-crc-error.gz'), 'rb') as f:
@ -28,46 +33,16 @@ class GunzipTest(unittest.TestCase):
def test_gunzip_truncated_short(self):
with open(join(SAMPLEDIR, 'truncated-crc-error-short.gz'), 'rb') as f:
text = gunzip(f.read())
assert text.endswith(b'</html>')
r1 = Response("http://www.example.com", body=f.read())
self.assertTrue(gzip_magic_number(r1))
def test_is_x_gzipped_right(self):
hdrs = Headers({"Content-Type": "application/x-gzip"})
r1 = Response("http://www.example.com", headers=hdrs)
self.assertTrue(is_gzipped(r1))
def test_is_gzipped_right(self):
hdrs = Headers({"Content-Type": "application/gzip"})
r1 = Response("http://www.example.com", headers=hdrs)
self.assertTrue(is_gzipped(r1))
def test_is_gzipped_not_quite(self):
hdrs = Headers({"Content-Type": "application/gzippppp"})
r1 = Response("http://www.example.com", headers=hdrs)
self.assertFalse(is_gzipped(r1))
def test_is_gzipped_case_insensitive(self):
hdrs = Headers({"Content-Type": "Application/X-Gzip"})
r1 = Response("http://www.example.com", headers=hdrs)
self.assertTrue(is_gzipped(r1))
hdrs = Headers({"Content-Type": "application/X-GZIP ; charset=utf-8"})
r1 = Response("http://www.example.com", headers=hdrs)
self.assertTrue(is_gzipped(r1))
r2 = Response("http://www.example.com", body=gunzip(r1.body))
assert r2.body.endswith(b'</html>')
self.assertFalse(gzip_magic_number(r2))
def test_is_gzipped_empty(self):
r1 = Response("http://www.example.com")
self.assertFalse(is_gzipped(r1))
def test_is_gzipped_wrong(self):
hdrs = Headers({"Content-Type": "application/javascript"})
r1 = Response("http://www.example.com", headers=hdrs)
self.assertFalse(is_gzipped(r1))
def test_is_gzipped_with_charset(self):
hdrs = Headers({"Content-Type": "application/x-gzip;charset=utf-8"})
r1 = Response("http://www.example.com", headers=hdrs)
self.assertTrue(is_gzipped(r1))
self.assertFalse(gzip_magic_number(r1))
def test_gunzip_illegal_eof(self):
with open(join(SAMPLEDIR, 'unexpected-eof.gz'), 'rb') as f: