# -*- coding: iso-8859-1 -*-
# Copyright (C) 2006-2010 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
"""
Management of checking a queue of links with several threads.
"""
import time
import os
import thread
import urlparse
from cStringIO import StringIO
from .. import log, LOG_CHECK, LinkCheckerInterrupt, cookies, dummy
from ..cache import urlqueue, robots_txt, cookie, connection
from . import aggregator, console
from ..httplib2 import HTTPMessage


def visit_loginurl (aggregate):
    """Check for a login URL and visit it."""
    config = aggregate.config
    url = config["loginurl"]
    if not url:
        return
    try:
        from twill import commands as tc
    except ImportError:
        log.warn(LOG_CHECK, _("Could not import twill for login URL visit"))
        return
    log.debug(LOG_CHECK, u"Visiting login URL %s", url)
    configure_twill(tc)
    tc.go(url)
    if tc.get_browser().get_code() != 200:
        log.warn(LOG_CHECK, _("Error visiting login URL %(url)s.") % \
          {"url": url})
        return
    submit_login_form(config, url, tc)
    if tc.get_browser().get_code() != 200:
        log.warn(LOG_CHECK, _("Error posting form at login URL %(url)s.") % \
          {"url": url})
        return
    store_cookies(tc.get_browser().cj, aggregate.cookies, url)
    resulturl = tc.get_browser().get_url()
    log.debug(LOG_CHECK, u"URL after POST is %s" % resulturl)
    # add result URL to check list
    from ..checker import get_url_from
    aggregate.urlqueue.put(get_url_from(resulturl, 0, aggregate))


def configure_twill (tc):
    """Configure twill to be used by LinkChecker.
    Note that there is no need to set a proxy since twill uses the same
    ones (provided from urllib) as LinkChecker does.
    """
    # make sure readonly controls are writeable (might be needed)
    tc.config("readonly_controls_writeable", True)
    # fake IE 6.0 to talk sense into some sites (eg. SourceForge)
    tc.agent("Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)")
    # tell twill to shut up
    tc.OUT = dummy.Dummy()
    from twill import browser
    browser.OUT = dummy.Dummy()
    # set debug level
    if log.is_debug(LOG_CHECK):
        tc.debug("http", 1)


def submit_login_form (config, url, tc):
    """Fill and submit login form."""
    user, password = config.get_user_password(url)
    cgiuser = config["loginuserfield"]
    cgipassword = config["loginpasswordfield"]
    formname = search_formname((cgiuser, cgipassword), tc)
    tc.formvalue(formname, cgiuser, user)
    tc.formvalue(formname, cgipassword, password)
    for key, value in config["loginextrafields"].items():
        tc.formvalue(formname, key, value)
    tc.submit()


def search_formname (fieldnames, tc):
    """Search form that has all given CGI fieldnames."""
    browser = tc.get_browser()
    for form in browser.get_all_forms():
        for name in fieldnames:
            try:
                browser.get_form_field(form, name)
            except tc.TwillException:
                break
        else:
            return form.name or form.attrs.get('id')
    # none found
    return None


def store_cookies (cookiejar, cookiecache, url):
    """Store cookies in cookiejar into the cookiecache."""
    cookielst = []
    for c in cookiejar:
        cookielst.append("Set-Cookie2: %s" % cookies.cookie_str(c))
    log.debug(LOG_CHECK, "Store cookies %s", cookielst)
    headers = HTTPMessage(StringIO("\r\n".join(cookielst)))
    urlparts = urlparse.urlsplit(url)
    scheme, host, path = urlparts[0:3]
    cookiecache.add(headers, scheme, host, path)


def check_urls (aggregate):
    """Main check function; checks all configured URLs until interrupted
    with Ctrl-C.
    @return: None
    """
    try:
        visit_loginurl(aggregate)
    except Exception, msg:
        log.warn(LOG_CHECK, _("Error using login URL: %(msg)s.") % \
                 {'msg': str(msg)})
        raise
    try:
        aggregate.logger.start_log_output()
        if not aggregate.urlqueue.empty():
            aggregate.start_threads()
        check_url(aggregate)
        aggregate.finish()
        aggregate.logger.end_log_output()
    except LinkCheckerInterrupt:
        raise
    except KeyboardInterrupt:
        interrupt(aggregate)
    except thread.error:
        log.warn(LOG_CHECK,
             _("Could not start a new thread. Check that the current user" \
               " is allowed to start new threads."))
        abort(aggregate)
    except Exception:
        # Catching "Exception" is intentionally done. This saves the program
        # from badly-programmed libraries that raise all kinds of strange
        # exceptions.
        console.internal_error()
        abort(aggregate)
    # Not catched exceptions at this point are SystemExit and GeneratorExit,
    # and both should be handled by the calling layer.


def check_url (aggregate):
    """Helper function waiting for URL queue."""
    while True:
        try:
            aggregate.urlqueue.join(timeout=0.5)
            break
        except urlqueue.Timeout:
            # Since urlqueue.join() is not interruptable, add a timeout
            # and a one-second slumber.
            time.sleep(1)
            aggregate.remove_stopped_threads()
            if not aggregate.threads:
                break
            if aggregate.wanted_stop:
                # some other thread wants us to stop
                raise KeyboardInterrupt


def interrupt (aggregate):
    """Interrupt execution and shutdown, ignoring any subsequent
    interrupts."""
    while True:
        try:
            log.warn(LOG_CHECK,
               _("user interrupt; waiting for active threads to finish"))
            log.warn(LOG_CHECK,
               _("another interrupt will exit immediately"))
            abort(aggregate)
            break
        except KeyboardInterrupt:
            pass


def abort (aggregate):
    """Helper function to ensure a clean shutdown."""
    while True:
        try:
            aggregate.abort()
            aggregate.finish()
            aggregate.logger.end_log_output()
            break
        except KeyboardInterrupt:
            log.warn(LOG_CHECK, _("user abort; force shutdown"))
            abort_now()


def abort_now ():
    """Force exit of current process without cleanup."""
    if os.name == 'posix':
        # Unix systems can use sigkill
        import signal
        os.kill(os.getpid(), signal.SIGKILL)
    elif os.name == 'nt':
        # NT has os.abort()
        os.abort()
    else:
        # All other systems have os._exit() as best shot.
        os._exit(3)


def get_aggregate (config):
    """Get an aggregator instance with given configuration."""
    _urlqueue = urlqueue.UrlQueue()
    connections = connection.ConnectionPool(wait=config["wait"])
    cookies = cookie.CookieJar()
    _robots_txt = robots_txt.RobotsTxt()
    return aggregator.Aggregate(config, _urlqueue, connections,
                                cookies, _robots_txt)