1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-25 20:04:28 +00:00

Merge pull request #1851 from nyov/binary_or_text

[MRG+1] Rename isbinarytext function to binary_is_text for clarity
This commit is contained in:
Paul Tremberth 2016-03-31 11:55:09 +02:00
commit 3ba5671fbc
3 changed files with 23 additions and 15 deletions

View File

@ -10,7 +10,7 @@ import six
from scrapy.http import Response
from scrapy.utils.misc import load_object
from scrapy.utils.python import isbinarytext, to_bytes, to_native_str
from scrapy.utils.python import binary_is_text, to_bytes, to_native_str
class ResponseTypes(object):
@ -94,7 +94,7 @@ class ResponseTypes(object):
cannot be guess using more straightforward methods."""
chunk = body[:5000]
chunk = to_bytes(chunk)
if isbinarytext(chunk):
if not binary_is_text(chunk):
return self.from_mimetype('application/octet-stream')
elif b"<html>" in chunk.lower():
return self.from_mimetype('text/html')

View File

@ -174,17 +174,25 @@ def memoizemethod_noargs(method):
return cache[self]
return new_method
_BINARYCHARS = {six.b(chr(i)) for i in range(32)} - {b"\0", b"\t", b"\n", b"\r"}
_BINARYCHARS |= {ord(ch) for ch in _BINARYCHARS}
@deprecated("scrapy.utils.python.binary_is_text")
def isbinarytext(text):
"""Return True if the given text is considered binary, or False
otherwise, by looking for binary bytes at their chars
""" This function is deprecated.
Please use scrapy.utils.python.binary_is_text, which was created to be more
clear about the functions behavior: it is behaving inverted to this one. """
return not binary_is_text(text)
def binary_is_text(data):
""" Returns `True` if the given ``data`` argument (a ``bytes`` object)
does not contain unprintable control characters.
"""
if not isinstance(text, bytes):
raise TypeError("text must be bytes, got '%s'" % type(text).__name__)
return any(c in _BINARYCHARS for c in text)
if not isinstance(data, bytes):
raise TypeError("data must be bytes, got '%s'" % type(data).__name__)
return all(c not in _BINARYCHARS for c in data)
def get_func_args(func, stripself=False):

View File

@ -5,7 +5,7 @@ from itertools import count
import six
from scrapy.utils.python import (
memoizemethod_noargs, isbinarytext, equal_attributes,
memoizemethod_noargs, binary_is_text, equal_attributes,
WeakKeyCache, stringify_dict, get_func_args, to_bytes, to_unicode,
without_none_values)
@ -71,18 +71,18 @@ class MemoizedMethodTest(unittest.TestCase):
assert one is not three
class IsBinaryTextTest(unittest.TestCase):
def test_isbinarytext(self):
assert not isbinarytext(b"hello")
class BinaryIsTextTest(unittest.TestCase):
def test_binaryistext(self):
assert binary_is_text(b"hello")
def test_utf_16_strings_contain_null_bytes(self):
assert not isbinarytext(u"hello".encode('utf-16'))
assert binary_is_text(u"hello".encode('utf-16'))
def test_one_with_encoding(self):
assert not isbinarytext(b"<div>Price \xa3</div>")
assert binary_is_text(b"<div>Price \xa3</div>")
def test_real_binary_bytes(self):
assert isbinarytext(b"\x02\xa3")
assert not binary_is_text(b"\x02\xa3")