mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-28 18:38:43 +00:00
issue GH #1550 - rewritten add_http_if_no_scheme()
This commit is contained in:
parent
bc9db65358
commit
dd45b31fe4
@ -6,7 +6,7 @@ Some of the functions that used to be imported from this module have been moved
|
|||||||
to the w3lib.url module. Always import those from there instead.
|
to the w3lib.url module. Always import those from there instead.
|
||||||
"""
|
"""
|
||||||
import posixpath
|
import posixpath
|
||||||
from urlparse import urlsplit, urlunsplit
|
import re
|
||||||
from six.moves.urllib.parse import (ParseResult, urlunparse, urldefrag,
|
from six.moves.urllib.parse import (ParseResult, urlunparse, urldefrag,
|
||||||
urlparse, parse_qsl, urlencode,
|
urlparse, parse_qsl, urlencode,
|
||||||
unquote)
|
unquote)
|
||||||
@ -115,16 +115,10 @@ def escape_ajax(url):
|
|||||||
|
|
||||||
def add_http_if_no_scheme(url):
|
def add_http_if_no_scheme(url):
|
||||||
"""Add http as the default scheme if it is missing from the url."""
|
"""Add http as the default scheme if it is missing from the url."""
|
||||||
parts = urlsplit(url)
|
match = re.match(r"^\w+://", url, flags=re.I)
|
||||||
scheme = parts.scheme or "http"
|
parts = urlparse(url)
|
||||||
if parts.netloc:
|
if not match:
|
||||||
netloc = parts.netloc
|
scheme = "http:" if parts.netloc else "http://"
|
||||||
path = parts.path
|
url = scheme + url
|
||||||
else:
|
|
||||||
path_parts = url.split("/", 1)
|
|
||||||
netloc = path_parts[0]
|
|
||||||
path = path_parts[1] if len(path_parts) > 1 else "/"
|
|
||||||
|
|
||||||
return urlunsplit((
|
return url
|
||||||
scheme, netloc, path, parts.query, parts.fragment
|
|
||||||
))
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user