1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-22 20:44:29 +00:00

Remove duplicate code now handled by newer w3lib

see f3029a6a10
This commit is contained in:
nyov 2016-03-26 22:22:01 +00:00
parent a38a99e0e2
commit 3787fec460
4 changed files with 6 additions and 13 deletions

View File

@ -2,7 +2,7 @@ Twisted>=10.0.0
lxml
pyOpenSSL
cssselect>=0.9
w3lib>=1.8.0
w3lib>=1.13.0
queuelib
six>=1.5.2
PyDispatcher>=2.0.5

View File

@ -8,7 +8,6 @@ from w3lib import html
from scrapy.exceptions import NotConfigured
from scrapy.http import HtmlResponse
from scrapy.utils.response import _noscript_re, _script_re
logger = logging.getLogger(__name__)
@ -89,8 +88,7 @@ def _has_ajaxcrawlable_meta(text):
if 'content' not in text:
return False
text = _script_re.sub(u'', text)
text = _noscript_re.sub(u'', text)
text = html.remove_comments(html.replace_entities(text))
text = html.remove_tags_with_content(text, ('script', 'noscript'))
text = html.replace_entities(text)
text = html.remove_comments(text)
return _ajax_crawlable_re.search(text) is not None

View File

@ -3,7 +3,6 @@ This module provides some useful functions for working with
scrapy.http.Response objects
"""
import os
import re
import weakref
import webbrowser
import tempfile
@ -31,17 +30,13 @@ def get_base_url(response):
return _baseurl_cache[response]
_noscript_re = re.compile(u'<noscript>.*?</noscript>', re.IGNORECASE | re.DOTALL)
_script_re = re.compile(u'<script.*?>.*?</script>', re.IGNORECASE | re.DOTALL)
_metaref_cache = weakref.WeakKeyDictionary()
def get_meta_refresh(response):
"""Parse the http-equiv refrsh parameter from the given response"""
if response not in _metaref_cache:
text = response.text[0:4096]
text = _noscript_re.sub(u'', text)
text = _script_re.sub(u'', text)
_metaref_cache[response] = html.get_meta_refresh(text, response.url,
response.encoding)
response.encoding, ignore_tags=('script', 'noscript'))
return _metaref_cache[response]

View File

@ -42,7 +42,7 @@ setup(
],
install_requires=[
'Twisted>=10.0.0',
'w3lib>=1.8.0',
'w3lib>=1.13.0',
'queuelib',
'lxml',
'pyOpenSSL',