1
0
mirror of https://github.com/scrapy/scrapy.git synced 2025-02-27 12:24:51 +00:00

lxml form request cleanup. #111

* remove unused _nons function copied from lxml.html
* compute clickables only if dont_click is False
* less _get_clickables function branch nesting
This commit is contained in:
Daniel Graña 2012-04-13 10:52:39 -03:00
parent e4d22cb16a
commit 32b9f788be

View File

@ -12,8 +12,6 @@ from lxml import html
from scrapy.http.request import Request from scrapy.http.request import Request
from scrapy.utils.python import unicode_to_str from scrapy.utils.python import unicode_to_str
XHTML_NAMESPACE = "http://www.w3.org/1999/xhtml"
class MultipleElementsFound(Exception): class MultipleElementsFound(Exception):
pass pass
@ -53,13 +51,6 @@ class FormRequest(Request):
url = form.action or form.base_url url = form.action or form.base_url
return cls(url, method=form.method, formdata=formdata, **kwargs) return cls(url, method=form.method, formdata=formdata, **kwargs)
# Copied from lxml.html to avoid relying on a non-public function
def _nons(tag):
if isinstance(tag, basestring):
if tag[0] == '{' and tag[1:len(XHTML_NAMESPACE)+1] == XHTML_NAMESPACE:
return tag.split('}')[-1]
return tag
def _urlencode(seq, enc): def _urlencode(seq, enc):
values = [(unicode_to_str(k, enc), unicode_to_str(v, enc)) values = [(unicode_to_str(k, enc), unicode_to_str(v, enc))
for k, vs in seq for k, vs in seq
@ -92,13 +83,12 @@ def _get_form(hxs, formname, formnumber, response):
def _get_inputs(form, formdata, dont_click, clickdata, response): def _get_inputs(form, formdata, dont_click, clickdata, response):
inputs = [(n, v) for n, v in form.form_values() if n not in formdata] inputs = [(n, v) for n, v in form.form_values() if n not in formdata]
clickables = [el for el in form.inputs if el.type == 'submit']
# If we are allowed to click on buttons and we have clickable if not dont_click:
# elements, we move on to see if we have any clickdata clickables = [el for el in form.inputs if el.type == 'submit']
if not dont_click and clickables: if clickables:
clickable = _get_clickable(clickdata, clickables, form) clickable = _get_clickable(clickdata, clickables, form)
inputs.append(clickable) inputs.append(clickable)
inputs.extend(formdata.iteritems()) inputs.extend(formdata.iteritems())
return inputs return inputs
@ -109,42 +99,36 @@ def _get_clickable(clickdata, clickables, form):
if the latter is given. If not, it returns the first if the latter is given. If not, it returns the first
clickable element found clickable element found
""" """
# If clickdata is given, we compare it to the clickable elements # If we don't have clickdata, we just use the first clickable element
# to find a match if clickdata is None:
if clickdata is not None:
# We first look to see if the number is specified in
# clickdata, because that uniquely identifies the element
nr = clickdata.get('nr', None)
if nr is not None:
try:
el = list(form.inputs)[nr]
except IndexError:
pass
else:
return (el.name, el.value)
# We didn't find it, so now we build an XPath expression
# out of the other arguments, because they can be used
# as such
else:
xpath_pred = []
for k, v in clickdata.items():
if k == 'coord':
v = ','.join(str(c) for c in v)
xpath_pred.append('[@%s="%s"]' % (k, v))
xpath_expr = '//*%s' % ''.join(xpath_pred)
el = form.xpath(xpath_expr)
if len(el) > 1:
raise MultipleElementsFound(
"Multiple elements found (%r) matching the criteria"
" in clickdata: %r" % (el, clickdata)
)
else:
return (el[0].name, el[0].value)
# If we don't have clickdata, we just use the first
# clickable element
else:
el = clickables.pop(0) el = clickables.pop(0)
return (el.name, el.value) return (el.name, el.value)
# If clickdata is given, we compare it to the clickable elements to find a
# match. We first look to see if the number is specified in clickdata,
# because that uniquely identifies the element
nr = clickdata.get('nr', None)
if nr is not None:
try:
el = list(form.inputs)[nr]
except IndexError:
pass
else:
return (el.name, el.value)
# We didn't find it, so now we build an XPath expression out of the other
# arguments, because they can be used as such
xpath_pred = []
for k, v in clickdata.items():
if k == 'coord':
v = ','.join(str(c) for c in v)
xpath_pred.append('[@%s="%s"]' % (k, v))
xpath_expr = '//*%s' % ''.join(xpath_pred)
el = form.xpath(xpath_expr)
if len(el) > 1:
raise MultipleElementsFound("Multiple elements found (%r) "
"matching the criteria in clickdata: %r"
% (el, clickdata))
else:
return (el[0].name, el[0].value)