1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-22 20:44:29 +00:00

changed private html entity regex to public

--HG--
extra : convert_revision : svn%3Ab85faa78-f9eb-468e-a121-7cced6da292c%40281
This commit is contained in:
Damian Canabal 2008-09-29 12:52:54 +00:00
parent c07937c4df
commit ad00d5e632

View File

@ -5,7 +5,7 @@ Functions for dealing with markup text
import re
import htmlentitydefs
_ent_re = re.compile(r'&(#?)([^&;]+);')
ent_re = re.compile(r'&(#?)([^&;]+);')
_tag_re = re.compile(r'<[a-zA-Z\/!].*?>', re.DOTALL)
def remove_entities(text, keep=(), remove_illegal=True):
@ -26,7 +26,7 @@ def remove_entities(text, keep=(), remove_illegal=True):
"""
def convert_entity(m):
if m.group(1)=='#':
if m.group(1) == '#':
try:
return unichr(int(m.group(2)))
except ValueError:
@ -45,7 +45,7 @@ def remove_entities(text, keep=(), remove_illegal=True):
else:
return u'&%s;' % m.group(2)
return _ent_re.sub(convert_entity, text.decode('utf-8'))
return ent_re.sub(convert_entity, text.decode('utf-8'))
def replace_tags(text, token=''):