mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-28 09:03:45 +00:00
issue GH #1550 - rewritten add_http_if_no_scheme()
This commit is contained in:
parent
bc9db65358
commit
dd45b31fe4
@ -6,7 +6,7 @@ Some of the functions that used to be imported from this module have been moved
|
||||
to the w3lib.url module. Always import those from there instead.
|
||||
"""
|
||||
import posixpath
|
||||
from urlparse import urlsplit, urlunsplit
|
||||
import re
|
||||
from six.moves.urllib.parse import (ParseResult, urlunparse, urldefrag,
|
||||
urlparse, parse_qsl, urlencode,
|
||||
unquote)
|
||||
@ -115,16 +115,10 @@ def escape_ajax(url):
|
||||
|
||||
def add_http_if_no_scheme(url):
|
||||
"""Add http as the default scheme if it is missing from the url."""
|
||||
parts = urlsplit(url)
|
||||
scheme = parts.scheme or "http"
|
||||
if parts.netloc:
|
||||
netloc = parts.netloc
|
||||
path = parts.path
|
||||
else:
|
||||
path_parts = url.split("/", 1)
|
||||
netloc = path_parts[0]
|
||||
path = path_parts[1] if len(path_parts) > 1 else "/"
|
||||
match = re.match(r"^\w+://", url, flags=re.I)
|
||||
parts = urlparse(url)
|
||||
if not match:
|
||||
scheme = "http:" if parts.netloc else "http://"
|
||||
url = scheme + url
|
||||
|
||||
return urlunsplit((
|
||||
scheme, netloc, path, parts.query, parts.fragment
|
||||
))
|
||||
return url
|
||||
|
Loading…
x
Reference in New Issue
Block a user