From 51ca4d0138e5c9cf637074f59c839ff9b5839db6 Mon Sep 17 00:00:00 2001 From: Eugenio Lacuesta Date: Mon, 23 Nov 2020 15:47:08 -0300 Subject: [PATCH] Remove deprecated scrapy.utils.gz.is_gzipped function --- scrapy/utils/gz.py | 15 +----------- tests/test_utils_gz.py | 55 ++++++++++++------------------------------ 2 files changed, 16 insertions(+), 54 deletions(-) diff --git a/scrapy/utils/gz.py b/scrapy/utils/gz.py index 11d433cf5..76156a4b8 100644 --- a/scrapy/utils/gz.py +++ b/scrapy/utils/gz.py @@ -1,7 +1,6 @@ +import struct from gzip import GzipFile from io import BytesIO -import re -import struct from scrapy.utils.decorators import deprecated @@ -42,17 +41,5 @@ def gunzip(data): return b''.join(output_list) -_is_gzipped = re.compile(br'^application/(x-)?gzip\b', re.I).search -_is_octetstream = re.compile(br'^(application|binary)/octet-stream\b', re.I).search - - -@deprecated -def is_gzipped(response): - """Return True if the response is gzipped, or False otherwise""" - ctype = response.headers.get('Content-Type', b'') - cenc = response.headers.get('Content-Encoding', b'').lower() - return _is_gzipped(ctype) or _is_octetstream(ctype) and cenc in (b'gzip', b'x-gzip') - - def gzip_magic_number(response): return response.body[:3] == b'\x1f\x8b\x08' diff --git a/tests/test_utils_gz.py b/tests/test_utils_gz.py index 7148185f4..4943731cb 100644 --- a/tests/test_utils_gz.py +++ b/tests/test_utils_gz.py @@ -3,10 +3,11 @@ from os.path import join from w3lib.encoding import html_to_unicode -from scrapy.utils.gz import gunzip, is_gzipped -from scrapy.http import Response, Headers +from scrapy.utils.gz import gunzip, gzip_magic_number +from scrapy.http import Response from tests import tests_datadir + SAMPLEDIR = join(tests_datadir, 'compressed') @@ -14,8 +15,12 @@ class GunzipTest(unittest.TestCase): def test_gunzip_basic(self): with open(join(SAMPLEDIR, 'feed-sample1.xml.gz'), 'rb') as f: - text = gunzip(f.read()) - self.assertEqual(len(text), 9950) + r1 = Response("http://www.example.com", body=f.read()) + self.assertTrue(gzip_magic_number(r1)) + + r2 = Response("http://www.example.com", body=gunzip(r1.body)) + self.assertFalse(gzip_magic_number(r2)) + self.assertEqual(len(r2.body), 9950) def test_gunzip_truncated(self): with open(join(SAMPLEDIR, 'truncated-crc-error.gz'), 'rb') as f: @@ -28,46 +33,16 @@ class GunzipTest(unittest.TestCase): def test_gunzip_truncated_short(self): with open(join(SAMPLEDIR, 'truncated-crc-error-short.gz'), 'rb') as f: - text = gunzip(f.read()) - assert text.endswith(b'') + r1 = Response("http://www.example.com", body=f.read()) + self.assertTrue(gzip_magic_number(r1)) - def test_is_x_gzipped_right(self): - hdrs = Headers({"Content-Type": "application/x-gzip"}) - r1 = Response("http://www.example.com", headers=hdrs) - self.assertTrue(is_gzipped(r1)) - - def test_is_gzipped_right(self): - hdrs = Headers({"Content-Type": "application/gzip"}) - r1 = Response("http://www.example.com", headers=hdrs) - self.assertTrue(is_gzipped(r1)) - - def test_is_gzipped_not_quite(self): - hdrs = Headers({"Content-Type": "application/gzippppp"}) - r1 = Response("http://www.example.com", headers=hdrs) - self.assertFalse(is_gzipped(r1)) - - def test_is_gzipped_case_insensitive(self): - hdrs = Headers({"Content-Type": "Application/X-Gzip"}) - r1 = Response("http://www.example.com", headers=hdrs) - self.assertTrue(is_gzipped(r1)) - - hdrs = Headers({"Content-Type": "application/X-GZIP ; charset=utf-8"}) - r1 = Response("http://www.example.com", headers=hdrs) - self.assertTrue(is_gzipped(r1)) + r2 = Response("http://www.example.com", body=gunzip(r1.body)) + assert r2.body.endswith(b'') + self.assertFalse(gzip_magic_number(r2)) def test_is_gzipped_empty(self): r1 = Response("http://www.example.com") - self.assertFalse(is_gzipped(r1)) - - def test_is_gzipped_wrong(self): - hdrs = Headers({"Content-Type": "application/javascript"}) - r1 = Response("http://www.example.com", headers=hdrs) - self.assertFalse(is_gzipped(r1)) - - def test_is_gzipped_with_charset(self): - hdrs = Headers({"Content-Type": "application/x-gzip;charset=utf-8"}) - r1 = Response("http://www.example.com", headers=hdrs) - self.assertTrue(is_gzipped(r1)) + self.assertFalse(gzip_magic_number(r1)) def test_gunzip_illegal_eof(self): with open(join(SAMPLEDIR, 'unexpected-eof.gz'), 'rb') as f: