[extractor/common] Case insensitive inputs extraction

This commit is contained in:
Sergey M․ 2015-09-11 20:43:05 +06:00
parent 1721fef28b
commit 73eb13dfc7

View file

@ -732,7 +732,7 @@ def _twitter_search_player(self, html):
@staticmethod @staticmethod
def _hidden_inputs(html): def _hidden_inputs(html):
hidden_inputs = {} hidden_inputs = {}
for input in re.findall(r'<input([^>]+)>', html): for input in re.findall(r'(?i)<input([^>]+)>', html):
if not re.search(r'type=(["\'])(?:hidden|submit)\1', input): if not re.search(r'type=(["\'])(?:hidden|submit)\1', input):
continue continue
name = re.search(r'name=(["\'])(?P<value>.+?)\1', input) name = re.search(r'name=(["\'])(?P<value>.+?)\1', input)
@ -746,7 +746,7 @@ def _hidden_inputs(html):
def _form_hidden_inputs(self, form_id, html): def _form_hidden_inputs(self, form_id, html):
form = self._search_regex( form = self._search_regex(
r'(?s)<form[^>]+?id=(["\'])%s\1[^>]*>(?P<form>.+?)</form>' % form_id, r'(?is)<form[^>]+?id=(["\'])%s\1[^>]*>(?P<form>.+?)</form>' % form_id,
html, '%s form' % form_id, group='form') html, '%s form' % form_id, group='form')
return self._hidden_inputs(form) return self._hidden_inputs(form)