mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-25 20:04:28 +00:00
Merge pull request #1851 from nyov/binary_or_text
[MRG+1] Rename isbinarytext function to binary_is_text for clarity
This commit is contained in:
commit
3ba5671fbc
@ -10,7 +10,7 @@ import six
|
||||
|
||||
from scrapy.http import Response
|
||||
from scrapy.utils.misc import load_object
|
||||
from scrapy.utils.python import isbinarytext, to_bytes, to_native_str
|
||||
from scrapy.utils.python import binary_is_text, to_bytes, to_native_str
|
||||
|
||||
|
||||
class ResponseTypes(object):
|
||||
@ -94,7 +94,7 @@ class ResponseTypes(object):
|
||||
cannot be guess using more straightforward methods."""
|
||||
chunk = body[:5000]
|
||||
chunk = to_bytes(chunk)
|
||||
if isbinarytext(chunk):
|
||||
if not binary_is_text(chunk):
|
||||
return self.from_mimetype('application/octet-stream')
|
||||
elif b"<html>" in chunk.lower():
|
||||
return self.from_mimetype('text/html')
|
||||
|
@ -174,17 +174,25 @@ def memoizemethod_noargs(method):
|
||||
return cache[self]
|
||||
return new_method
|
||||
|
||||
|
||||
_BINARYCHARS = {six.b(chr(i)) for i in range(32)} - {b"\0", b"\t", b"\n", b"\r"}
|
||||
_BINARYCHARS |= {ord(ch) for ch in _BINARYCHARS}
|
||||
|
||||
|
||||
@deprecated("scrapy.utils.python.binary_is_text")
|
||||
def isbinarytext(text):
|
||||
"""Return True if the given text is considered binary, or False
|
||||
otherwise, by looking for binary bytes at their chars
|
||||
""" This function is deprecated.
|
||||
Please use scrapy.utils.python.binary_is_text, which was created to be more
|
||||
clear about the functions behavior: it is behaving inverted to this one. """
|
||||
return not binary_is_text(text)
|
||||
|
||||
|
||||
def binary_is_text(data):
|
||||
""" Returns `True` if the given ``data`` argument (a ``bytes`` object)
|
||||
does not contain unprintable control characters.
|
||||
"""
|
||||
if not isinstance(text, bytes):
|
||||
raise TypeError("text must be bytes, got '%s'" % type(text).__name__)
|
||||
return any(c in _BINARYCHARS for c in text)
|
||||
if not isinstance(data, bytes):
|
||||
raise TypeError("data must be bytes, got '%s'" % type(data).__name__)
|
||||
return all(c not in _BINARYCHARS for c in data)
|
||||
|
||||
|
||||
def get_func_args(func, stripself=False):
|
||||
|
@ -5,7 +5,7 @@ from itertools import count
|
||||
import six
|
||||
|
||||
from scrapy.utils.python import (
|
||||
memoizemethod_noargs, isbinarytext, equal_attributes,
|
||||
memoizemethod_noargs, binary_is_text, equal_attributes,
|
||||
WeakKeyCache, stringify_dict, get_func_args, to_bytes, to_unicode,
|
||||
without_none_values)
|
||||
|
||||
@ -71,18 +71,18 @@ class MemoizedMethodTest(unittest.TestCase):
|
||||
assert one is not three
|
||||
|
||||
|
||||
class IsBinaryTextTest(unittest.TestCase):
|
||||
def test_isbinarytext(self):
|
||||
assert not isbinarytext(b"hello")
|
||||
class BinaryIsTextTest(unittest.TestCase):
|
||||
def test_binaryistext(self):
|
||||
assert binary_is_text(b"hello")
|
||||
|
||||
def test_utf_16_strings_contain_null_bytes(self):
|
||||
assert not isbinarytext(u"hello".encode('utf-16'))
|
||||
assert binary_is_text(u"hello".encode('utf-16'))
|
||||
|
||||
def test_one_with_encoding(self):
|
||||
assert not isbinarytext(b"<div>Price \xa3</div>")
|
||||
assert binary_is_text(b"<div>Price \xa3</div>")
|
||||
|
||||
def test_real_binary_bytes(self):
|
||||
assert isbinarytext(b"\x02\xa3")
|
||||
assert not binary_is_text(b"\x02\xa3")
|
||||
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user