1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-06 23:51:35 +00:00

Replace _DefGen_Return exception handling

Handle StopIteration instead
This commit is contained in:
Eugenio Lacuesta 2020-04-13 06:35:17 -03:00
parent 1bd8f392c9
commit 4023d5db33
No known key found for this signature in database
GPG Key ID: DA3EF2D0913E9810
2 changed files with 18 additions and 17 deletions

View File

@ -1,7 +1,7 @@
import functools
import logging
from collections import defaultdict
from twisted.internet.defer import Deferred, DeferredList, _DefGen_Return
from twisted.internet.defer import Deferred, DeferredList
from twisted.python.failure import Failure
from scrapy.settings import Settings
@ -141,24 +141,26 @@ class MediaPipeline:
# This code fixes a memory leak by avoiding to keep references to
# the Request and Response objects on the Media Pipeline cache.
#
# Twisted inline callbacks pass return values using the function
# twisted.internet.defer.returnValue, which encapsulates the return
# value inside a _DefGen_Return base exception.
#
# What happens when the media_downloaded callback raises another
# What happens when the media_downloaded callback raises an
# exception, for example a FileException('download-error') when
# the Response status code is not 200 OK, is that it stores the
# _DefGen_Return exception on the FileException context.
# the Response status code is not 200 OK, is that the original
# StopIteration exception (which in turn contains the failed
# Response and by extension, the original Request) gets encapsulated
# within the FileException context.
#
# Originally, Scrapy was using twisted.internet.defer.returnValue
# inside functions decorated with twisted.internet.defer.inlineCallbacks,
# encapsulating the returned Response in a _DefGen_Return exception
# instead of a StopIteration.
#
# To avoid keeping references to the Response and therefore Request
# objects on the Media Pipeline cache, we should wipe the context of
# the exception encapsulated by the Twisted Failure when its a
# _DefGen_Return instance.
# the encapsulated exception when it is a StopIteration instance
#
# This problem does not occur in Python 2.7 since we don't have
# Exception Chaining (https://www.python.org/dev/peps/pep-3134/).
context = getattr(result.value, '__context__', None)
if isinstance(context, _DefGen_Return):
if isinstance(context, StopIteration):
setattr(result.value, '__context__', None)
info.downloading.remove(fp)

View File

@ -2,7 +2,7 @@ from testfixtures import LogCapture
from twisted.trial import unittest
from twisted.python.failure import Failure
from twisted.internet import reactor
from twisted.internet.defer import Deferred, inlineCallbacks, returnValue
from twisted.internet.defer import Deferred, inlineCallbacks
from scrapy.http import Request, Response
from scrapy.settings import Settings
@ -124,9 +124,8 @@ class BaseMediaPipelineTestCase(unittest.TestCase):
# Simulate the Media Pipeline behavior to produce a Twisted Failure
try:
# Simulate a Twisted inline callback returning a Response
# The returnValue method raises an exception encapsulating the value
returnValue(response)
except BaseException as exc:
raise StopIteration(response)
except StopIteration as exc:
def_gen_return_exc = exc
try:
# Simulate the media_downloaded callback raising a FileException
@ -140,7 +139,7 @@ class BaseMediaPipelineTestCase(unittest.TestCase):
# The Failure should encapsulate a FileException ...
self.assertEqual(failure.value, file_exc)
# ... and it should have the returnValue exception set as its context
# ... and it should have the StopIteration exception set as its context
self.assertEqual(failure.value.__context__, def_gen_return_exc)
# Let's calculate the request fingerprint and fake some runtime data...
@ -155,7 +154,7 @@ class BaseMediaPipelineTestCase(unittest.TestCase):
self.assertEqual(info.downloaded[fp], failure)
# ... encapsulating the original FileException ...
self.assertEqual(info.downloaded[fp].value, file_exc)
# ... but it should not store the returnValue exception on its context
# ... but it should not store the StopIteration exception on its context
context = getattr(info.downloaded[fp].value, '__context__', None)
self.assertIsNone(context)