1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-28 09:03:45 +00:00

issue GH #1550 - rewritten add_http_if_no_scheme()

This commit is contained in:
Leonid Amirov 2015-11-03 14:32:30 +03:00
parent bc9db65358
commit dd45b31fe4

View File

@ -6,7 +6,7 @@ Some of the functions that used to be imported from this module have been moved
to the w3lib.url module. Always import those from there instead.
"""
import posixpath
from urlparse import urlsplit, urlunsplit
import re
from six.moves.urllib.parse import (ParseResult, urlunparse, urldefrag,
urlparse, parse_qsl, urlencode,
unquote)
@ -115,16 +115,10 @@ def escape_ajax(url):
def add_http_if_no_scheme(url):
"""Add http as the default scheme if it is missing from the url."""
parts = urlsplit(url)
scheme = parts.scheme or "http"
if parts.netloc:
netloc = parts.netloc
path = parts.path
else:
path_parts = url.split("/", 1)
netloc = path_parts[0]
path = path_parts[1] if len(path_parts) > 1 else "/"
match = re.match(r"^\w+://", url, flags=re.I)
parts = urlparse(url)
if not match:
scheme = "http:" if parts.netloc else "http://"
url = scheme + url
return urlunsplit((
scheme, netloc, path, parts.query, parts.fragment
))
return url