mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-22 20:44:29 +00:00
changed private html entity regex to public
--HG-- extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40281
This commit is contained in:
parent
c07937c4df
commit
ad00d5e632
@ -5,7 +5,7 @@ Functions for dealing with markup text
|
||||
import re
|
||||
import htmlentitydefs
|
||||
|
||||
_ent_re = re.compile(r'&(#?)([^&;]+);')
|
||||
ent_re = re.compile(r'&(#?)([^&;]+);')
|
||||
_tag_re = re.compile(r'<[a-zA-Z\/!].*?>', re.DOTALL)
|
||||
|
||||
def remove_entities(text, keep=(), remove_illegal=True):
|
||||
@ -26,7 +26,7 @@ def remove_entities(text, keep=(), remove_illegal=True):
|
||||
"""
|
||||
|
||||
def convert_entity(m):
|
||||
if m.group(1)=='#':
|
||||
if m.group(1) == '#':
|
||||
try:
|
||||
return unichr(int(m.group(2)))
|
||||
except ValueError:
|
||||
@ -45,7 +45,7 @@ def remove_entities(text, keep=(), remove_illegal=True):
|
||||
else:
|
||||
return u'&%s;' % m.group(2)
|
||||
|
||||
return _ent_re.sub(convert_entity, text.decode('utf-8'))
|
||||
return ent_re.sub(convert_entity, text.decode('utf-8'))
|
||||
|
||||
|
||||
def replace_tags(text, token=''):
|
||||
|
Loading…
x
Reference in New Issue
Block a user