IOError and other cleanup (#4716)

2025-02-06 10:24:24 +00:00 · 2023-06-21 11:08:53 -07:00 · 2023-06-21 11:08:53 -07:00 · 5360ba34bc
commit 5360ba34bc
parent ee215a2970
12 changed files with 14 additions and 17 deletions
--- a/docs/utils/linkfix.py
+++ b/docs/utils/linkfix.py
@ -30,7 +30,7 @@ def main():
    try:
        with Path("build/linkcheck/output.txt").open(encoding="utf-8") as out:
            output_lines = out.readlines()
-    except IOError:
+    except OSError:
        print("linkcheck output not found; please run linkcheck first.")
        sys.exit(1)

--- a/scrapy/downloadermiddlewares/decompression.py
+++ b/scrapy/downloadermiddlewares/decompression.py
@ -63,7 +63,7 @@ class DecompressionMiddleware:
        archive = BytesIO(response.body)
        try:
            body = gzip.GzipFile(fileobj=archive).read()
-        except IOError:
+        except OSError:
            return

        respcls = responsetypes.from_args(body=body)
@ -72,7 +72,7 @@ class DecompressionMiddleware:
    def _is_bzip2(self, response):
        try:
            body = bz2.decompress(response.body)
-        except IOError:
+        except OSError:
            return

        respcls = responsetypes.from_args(body=body)
--- a/scrapy/downloadermiddlewares/httpcache.py
+++ b/scrapy/downloadermiddlewares/httpcache.py
@ -37,7 +37,7 @@ class HttpCacheMiddleware:
        ConnectionLost,
        TCPTimedOutError,
        ResponseFailed,
-        IOError,
+        OSError,
    )

    def __init__(self, settings: Settings, stats: StatsCollector) -> None:
--- a/scrapy/settings/default_settings.py
+++ b/scrapy/settings/default_settings.py
@ -268,9 +268,9 @@ RETRY_EXCEPTIONS = [
    "twisted.internet.error.ConnectionLost",
    "twisted.internet.error.TCPTimedOutError",
    "twisted.web.client.ResponseFailed",
-    # IOError is raised by the HttpCompression middleware when trying to
+    # OSError is raised by the HttpCompression middleware when trying to
    # decompress an empty response
-    IOError,
+    OSError,
    "scrapy.core.downloader.handlers.http11.TunnelError",
 ]

--- a/scrapy/utils/gz.py
+++ b/scrapy/utils/gz.py
@ -15,7 +15,7 @@ def gunzip(data):
        try:
            chunk = f.read1(8196)
            output_list.append(chunk)
-        except (IOError, EOFError, struct.error):
+        except (OSError, EOFError, struct.error):
            # complete only if there is some data, otherwise re-raise
            # see issue 87 about catching struct.error
            # some pages are quite small so output_list is empty and f.extrabuf
--- a/scrapy/utils/python.py
+++ b/scrapy/utils/python.py
@ -291,7 +291,7 @@ def without_none_values(iterable):
    try:
        return {k: v for k, v in iterable.items() if v is not None}
    except AttributeError:
-        return type(iterable)((v for v in iterable if v is not None))
+        return type(iterable)(v for v in iterable if v is not None)


 def global_object_name(obj):
--- a/tests/test_downloader_handlers.py
+++ b/tests/test_downloader_handlers.py
@ -129,7 +129,7 @@ class FileTestCase(unittest.TestCase):
    def test_non_existent(self):
        request = Request(f"file://{self.mktemp()}")
        d = self.download_request(request, Spider("foo"))
-        return self.assertFailure(d, IOError)
+        return self.assertFailure(d, OSError)


 class ContentLengthHeaderResource(resource.Resource):
--- a/tests/test_downloadermiddleware.py
+++ b/tests/test_downloadermiddleware.py
@ -70,7 +70,7 @@ class DefaultsTest(ManagerTestCase):
        In particular when some website returns a 30x response with header
        'Content-Encoding: gzip' giving as result the error below:

-            exceptions.IOError: Not a gzipped file
+            BadGzipFile: Not a gzipped file (...)

        """
        req = Request("http://example.com")
@ -108,7 +108,7 @@ class DefaultsTest(ManagerTestCase):
                "Location": "http://example.com/login",
            },
        )
-        self.assertRaises(IOError, self._download, request=req, response=resp)
+        self.assertRaises(OSError, self._download, request=req, response=resp)


 class ResponseFromProcessRequestTest(ManagerTestCase):
--- a/tests/test_mail.py
+++ b/tests/test_mail.py
@ -1,5 +1,3 @@
-# coding=utf-8
-
 import unittest
 from email.charset import Charset
 from io import BytesIO
--- a/tests/test_robotstxt_interface.py
+++ b/tests/test_robotstxt_interface.py
@ -1,4 +1,3 @@
-# coding=utf-8
 from twisted.trial import unittest


--- a/tests/test_utils_gz.py
+++ b/tests/test_utils_gz.py
@ -28,7 +28,7 @@ class GunzipTest(unittest.TestCase):

    def test_gunzip_no_gzip_file_raises(self):
        self.assertRaises(
-            IOError, gunzip, (SAMPLEDIR / "feed-sample1.xml").read_bytes()
+            OSError, gunzip, (SAMPLEDIR / "feed-sample1.xml").read_bytes()
        )

    def test_gunzip_truncated_short(self):
--- a/tests/test_utils_iterators.py
+++ b/tests/test_utils_iterators.py
@ -346,8 +346,8 @@ class UtilsCsvTestCase(unittest.TestCase):

        # explicit type check cuz' we no like stinkin' autocasting! yarrr
        for result_row in result:
-            self.assertTrue(all((isinstance(k, str) for k in result_row.keys())))
-            self.assertTrue(all((isinstance(v, str) for v in result_row.values())))
+            self.assertTrue(all(isinstance(k, str) for k in result_row.keys()))
+            self.assertTrue(all(isinstance(v, str) for v in result_row.values()))

    def test_csviter_delimiter(self):
        body = get_testdata("feeds", "feed-sample3.csv").replace(b",", b"\t")