1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-28 09:23:51 +00:00

issue GH #1550 - scrapy shell argument fixes: "example.com" requests "http://example.com"; "example" requests "file://example"; "./example.com" requests "file://example.com"

This commit is contained in:
Leonid Amirov 2015-11-02 16:08:19 +03:00
parent a41c64bfb9
commit bc9db65358

View File

@ -5,11 +5,13 @@ See documentation in docs/topics/shell.rst
"""
from threading import Thread
import urlparse
from w3lib.url import any_to_uri
from scrapy.commands import ScrapyCommand
from scrapy.shell import Shell
from scrapy.http import Request
from scrapy.utils.url import add_http_if_no_scheme
from scrapy.utils.spider import spidercls_for_request, DefaultSpider
@ -43,7 +45,16 @@ class Command(ScrapyCommand):
def run(self, args, opts):
url = args[0] if args else None
if url:
url = any_to_uri(url)
parts = urlparse.urlsplit(url)
if not parts.scheme:
if "." not in parts.path.split("/", 1)[0]:
url = any_to_uri(url)
for pattern in ["/", "./", "../"]:
if url.startswith(pattern):
url = any_to_uri(url)
break
url = add_http_if_no_scheme(url)
spider_loader = self.crawler_process.spider_loader