mirror of
https://github.com/scrapy/scrapy.git
synced 2025-02-27 08:24:22 +00:00
lxml form request cleanup. #111
* remove unused _nons function copied from lxml.html * compute clickables only if dont_click is False * less _get_clickables function branch nesting
This commit is contained in:
parent
e4d22cb16a
commit
32b9f788be
@ -12,8 +12,6 @@ from lxml import html
|
|||||||
from scrapy.http.request import Request
|
from scrapy.http.request import Request
|
||||||
from scrapy.utils.python import unicode_to_str
|
from scrapy.utils.python import unicode_to_str
|
||||||
|
|
||||||
XHTML_NAMESPACE = "http://www.w3.org/1999/xhtml"
|
|
||||||
|
|
||||||
|
|
||||||
class MultipleElementsFound(Exception):
|
class MultipleElementsFound(Exception):
|
||||||
pass
|
pass
|
||||||
@ -53,13 +51,6 @@ class FormRequest(Request):
|
|||||||
url = form.action or form.base_url
|
url = form.action or form.base_url
|
||||||
return cls(url, method=form.method, formdata=formdata, **kwargs)
|
return cls(url, method=form.method, formdata=formdata, **kwargs)
|
||||||
|
|
||||||
# Copied from lxml.html to avoid relying on a non-public function
|
|
||||||
def _nons(tag):
|
|
||||||
if isinstance(tag, basestring):
|
|
||||||
if tag[0] == '{' and tag[1:len(XHTML_NAMESPACE)+1] == XHTML_NAMESPACE:
|
|
||||||
return tag.split('}')[-1]
|
|
||||||
return tag
|
|
||||||
|
|
||||||
def _urlencode(seq, enc):
|
def _urlencode(seq, enc):
|
||||||
values = [(unicode_to_str(k, enc), unicode_to_str(v, enc))
|
values = [(unicode_to_str(k, enc), unicode_to_str(v, enc))
|
||||||
for k, vs in seq
|
for k, vs in seq
|
||||||
@ -92,13 +83,12 @@ def _get_form(hxs, formname, formnumber, response):
|
|||||||
|
|
||||||
def _get_inputs(form, formdata, dont_click, clickdata, response):
|
def _get_inputs(form, formdata, dont_click, clickdata, response):
|
||||||
inputs = [(n, v) for n, v in form.form_values() if n not in formdata]
|
inputs = [(n, v) for n, v in form.form_values() if n not in formdata]
|
||||||
clickables = [el for el in form.inputs if el.type == 'submit']
|
|
||||||
|
|
||||||
# If we are allowed to click on buttons and we have clickable
|
if not dont_click:
|
||||||
# elements, we move on to see if we have any clickdata
|
clickables = [el for el in form.inputs if el.type == 'submit']
|
||||||
if not dont_click and clickables:
|
if clickables:
|
||||||
clickable = _get_clickable(clickdata, clickables, form)
|
clickable = _get_clickable(clickdata, clickables, form)
|
||||||
inputs.append(clickable)
|
inputs.append(clickable)
|
||||||
|
|
||||||
inputs.extend(formdata.iteritems())
|
inputs.extend(formdata.iteritems())
|
||||||
return inputs
|
return inputs
|
||||||
@ -109,42 +99,36 @@ def _get_clickable(clickdata, clickables, form):
|
|||||||
if the latter is given. If not, it returns the first
|
if the latter is given. If not, it returns the first
|
||||||
clickable element found
|
clickable element found
|
||||||
"""
|
"""
|
||||||
# If clickdata is given, we compare it to the clickable elements
|
# If we don't have clickdata, we just use the first clickable element
|
||||||
# to find a match
|
if clickdata is None:
|
||||||
if clickdata is not None:
|
|
||||||
# We first look to see if the number is specified in
|
|
||||||
# clickdata, because that uniquely identifies the element
|
|
||||||
nr = clickdata.get('nr', None)
|
|
||||||
if nr is not None:
|
|
||||||
try:
|
|
||||||
el = list(form.inputs)[nr]
|
|
||||||
except IndexError:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
return (el.name, el.value)
|
|
||||||
|
|
||||||
# We didn't find it, so now we build an XPath expression
|
|
||||||
# out of the other arguments, because they can be used
|
|
||||||
# as such
|
|
||||||
else:
|
|
||||||
xpath_pred = []
|
|
||||||
for k, v in clickdata.items():
|
|
||||||
if k == 'coord':
|
|
||||||
v = ','.join(str(c) for c in v)
|
|
||||||
xpath_pred.append('[@%s="%s"]' % (k, v))
|
|
||||||
|
|
||||||
xpath_expr = '//*%s' % ''.join(xpath_pred)
|
|
||||||
el = form.xpath(xpath_expr)
|
|
||||||
if len(el) > 1:
|
|
||||||
raise MultipleElementsFound(
|
|
||||||
"Multiple elements found (%r) matching the criteria"
|
|
||||||
" in clickdata: %r" % (el, clickdata)
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
return (el[0].name, el[0].value)
|
|
||||||
|
|
||||||
# If we don't have clickdata, we just use the first
|
|
||||||
# clickable element
|
|
||||||
else:
|
|
||||||
el = clickables.pop(0)
|
el = clickables.pop(0)
|
||||||
return (el.name, el.value)
|
return (el.name, el.value)
|
||||||
|
|
||||||
|
# If clickdata is given, we compare it to the clickable elements to find a
|
||||||
|
# match. We first look to see if the number is specified in clickdata,
|
||||||
|
# because that uniquely identifies the element
|
||||||
|
nr = clickdata.get('nr', None)
|
||||||
|
if nr is not None:
|
||||||
|
try:
|
||||||
|
el = list(form.inputs)[nr]
|
||||||
|
except IndexError:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
return (el.name, el.value)
|
||||||
|
|
||||||
|
# We didn't find it, so now we build an XPath expression out of the other
|
||||||
|
# arguments, because they can be used as such
|
||||||
|
xpath_pred = []
|
||||||
|
for k, v in clickdata.items():
|
||||||
|
if k == 'coord':
|
||||||
|
v = ','.join(str(c) for c in v)
|
||||||
|
xpath_pred.append('[@%s="%s"]' % (k, v))
|
||||||
|
|
||||||
|
xpath_expr = '//*%s' % ''.join(xpath_pred)
|
||||||
|
el = form.xpath(xpath_expr)
|
||||||
|
if len(el) > 1:
|
||||||
|
raise MultipleElementsFound("Multiple elements found (%r) "
|
||||||
|
"matching the criteria in clickdata: %r"
|
||||||
|
% (el, clickdata))
|
||||||
|
else:
|
||||||
|
return (el[0].name, el[0].value)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user