1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-26 17:44:02 +00:00

fixed: Issue #1562 (Incorrectly picked URL in scrapy.http.FormRequest.from_response when there is a <base> tag)

This commit is contained in:
Pengyu CHEN 2015-10-29 14:18:59 +08:00
parent dd9f777ba7
commit c34dbe955d

View File

@ -11,6 +11,7 @@ from parsel.selector import create_root_node
import six import six
from scrapy.http.request import Request from scrapy.http.request import Request
from scrapy.utils.python import to_bytes, is_listlike from scrapy.utils.python import to_bytes, is_listlike
from scrapy.utils.response import get_base_url
class FormRequest(Request): class FormRequest(Request):
@ -44,7 +45,7 @@ class FormRequest(Request):
def _get_form_url(form, url): def _get_form_url(form, url):
if url is None: if url is None:
return form.action or form.base_url return urljoin(form.base_url, form.action)
return urljoin(form.base_url, url) return urljoin(form.base_url, url)
@ -58,7 +59,7 @@ def _urlencode(seq, enc):
def _get_form(response, formname, formid, formnumber, formxpath): def _get_form(response, formname, formid, formnumber, formxpath):
"""Find the form element """ """Find the form element """
text = response.body_as_unicode() text = response.body_as_unicode()
root = create_root_node(text, lxml.html.HTMLParser, base_url=response.url) root = create_root_node(text, lxml.html.HTMLParser, base_url=get_base_url(response))
forms = root.xpath('//form') forms = root.xpath('//form')
if not forms: if not forms:
raise ValueError("No <form> element found in %s" % response) raise ValueError("No <form> element found in %s" % response)