mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-04-03 06:30:23 +00:00
Markup is not being passed in pieces to the parser, so simplify the interface and reduce the state further.
97 lines
3.3 KiB
Python
97 lines
3.3 KiB
Python
# -*- coding: iso-8859-1 -*-
|
|
# Copyright (C) 2014 Bastian Kleineidam
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation; either version 2 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License along
|
|
# with this program; if not, write to the Free Software Foundation, Inc.,
|
|
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
"""
|
|
HTML form utils
|
|
"""
|
|
from ..HtmlParser import htmlsax
|
|
from .. import log, LOG_CHECK
|
|
|
|
class Form(object):
|
|
"""Store HTML form URL and form data."""
|
|
|
|
def __init__(self, url):
|
|
"""Set URL and empty form data."""
|
|
self.url = url
|
|
self.data = {}
|
|
|
|
def add_value(self, key, value):
|
|
"""Add a form value."""
|
|
self.data[key] = value
|
|
|
|
def __repr__(self):
|
|
"""Return string displaying URL and form data."""
|
|
return "<url=%s data=%s>" % (self.url, self.data)
|
|
|
|
|
|
class FormFinder(object):
|
|
"""Base class handling HTML start elements.
|
|
TagFinder instances are used as HtmlParser handlers."""
|
|
|
|
def __init__(self):
|
|
"""Initialize local variables."""
|
|
super(FormFinder, self).__init__()
|
|
self.forms = []
|
|
self.form = None
|
|
|
|
def start_element(self, tag, attrs, element_text, lineno, column):
|
|
"""Does nothing, override in a subclass."""
|
|
if tag == u'form':
|
|
if u'action' in attrs:
|
|
url = attrs['action']
|
|
self.form = Form(url)
|
|
elif tag == u'input':
|
|
if self.form:
|
|
if 'name' in attrs:
|
|
key = attrs['name']
|
|
value = attrs.get('value')
|
|
self.form.add_value(key, value)
|
|
else:
|
|
log.warn(LOG_CHECK, "nameless form input %s" % attrs)
|
|
pass
|
|
else:
|
|
log.warn(LOG_CHECK, "formless input %s" % attrs)
|
|
pass
|
|
|
|
def start_end_element(self, tag, attrs, element_text, lineno, column):
|
|
"""Delegate a combined start/end element (eg. <input .../>) to
|
|
the start_element method. Ignore the end element part."""
|
|
self.start_element(tag, attrs, element_text, lineno, column)
|
|
|
|
def end_element(self, tag):
|
|
"""search for ending form values."""
|
|
if tag == u'form':
|
|
self.forms.append(self.form)
|
|
self.form = None
|
|
|
|
|
|
def search_form(content, cgiuser, cgipassword):
|
|
"""Search for a HTML form in the given HTML content that has the given
|
|
CGI fields. If no form is found return None.
|
|
"""
|
|
handler = FormFinder()
|
|
parser = htmlsax.parser(handler)
|
|
# parse
|
|
parser.feed_soup(htmlsax.make_soup(content))
|
|
parser.flush()
|
|
log.debug(LOG_CHECK, "Found forms %s", handler.forms)
|
|
cginames = (cgiuser.lower(), cgipassword.lower())
|
|
for form in handler.forms:
|
|
for key, value in form.data.items():
|
|
if key.lower() in cginames:
|
|
return form
|
|
# not found
|
|
return None
|