new debugging

git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@266 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
calvin 2001-05-23 21:20:44 +00:00
parent 631f7a732d
commit f455f8ddeb
23 changed files with 322 additions and 275 deletions

6
debian/changelog vendored
View file

@ -1,8 +1,12 @@
linkchecker (1.3.2) unstable; urgency=low
* new option --pause
* only enable threading with 2 or more threads, not with only 1 thread
* new debug function with variable arguments
* more than one debug level; enable them with multiple -D options
* workaround broken HEAD with some Apache servers by using GET
-- Bastian Kleineidam <calvin@debian.org> Fri, 18 May 2001 21:05:24 +0200
-- Bastian Kleineidam <calvin@debian.org> Wed, 23 May 2001 21:30:06 +0200
linkchecker (1.3.1) unstable; urgency=low

View file

@ -1,19 +1,19 @@
"""store metadata and options"""
# Copyright (C) 2000,2001 Bastian Kleineidam
# Copyright (C) 2000,2001 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import ConfigParser, sys, os, re, UserDict, string, time
import Logging, _linkchecker_configdata
@ -21,6 +21,7 @@ from os.path import expanduser,normpath,normcase,join,isfile
from types import StringType
from urllib import getproxies
from linkcheck import _
from debuglevels import *
Version = _linkchecker_configdata.version
AppName = _linkchecker_configdata.name
@ -54,12 +55,14 @@ LoggerKeys = reduce(lambda x, y: x+", "+y, Loggers.keys())
# debug options
DebugDelim = "==========================================================\n"
DebugFlag = 0
DebugLevel = 0
# note: debugging with more than 1 thread can be painful
def debug(msg):
if DebugFlag:
sys.stderr.write(msg)
def debug(level, *args):
if DebugLevel > level:
for arg in args:
sys.stderr.write(" %s"%arg)
sys.stderr.write("\n")
sys.stderr.flush()
# path util function
@ -91,9 +94,10 @@ class Configuration(UserDict.UserDict):
self["externlinks"] = []
self["internlinks"] = []
self["denyallow"] = 0
self["authentication"] = [(re.compile(r'^.+'),
'anonymous',
'joe@')]
self["authentication"] = {'pattern': re.compile(r'^.+'),
'user': 'anonymous',
'pass': 'joe@',
}
self["proxy"] = getproxies()
self["recursionlevel"] = 1
self["wait"] = 0
@ -382,7 +386,7 @@ class Configuration(UserDict.UserDict):
def readConfig(self, files):
"""this big function reads all the configuration parameters
used in the linkchecker module."""
debug("DEBUG: reading configuration from %s\n" % files)
debug(BRING_IT_ON, "reading configuration from", files)
try:
cfgparser = ConfigParser.ConfigParser()
cfgparser.read(files)
@ -392,15 +396,14 @@ class Configuration(UserDict.UserDict):
section="output"
for key in Loggers.keys():
if cfgparser.has_section(key):
debug(key+": ")
for opt in cfgparser.options(key):
try: self[key][opt] = cfgparser.get(key, opt)
except ConfigParser.Error, msg: debug(str(msg)+"\n")
try:
self[key]['fields'] = map(string.strip,
string.split(cfgparser.get(key, 'fields'), ','))
debug("fields %s\n"%str(self[key]['fields']))
except ConfigParser.Error, msg: debug(str(msg)+"\n")
except ConfigParser.Error, msg:
debug(BRING_IT_ON, msg)
try:
log = cfgparser.get(section, "log")
if Loggers.has_key(log):
@ -462,10 +465,12 @@ class Configuration(UserDict.UserDict):
try:
i=1
while 1:
tuple = string.split(cfgparser.get(section, "entry%d" % i))
if len(tuple)!=3: break
tuple[0] = re.compile(tuple[0])
self["authentication"].insert(0, tuple)
auth = string.split(cfgparser.get(section, "entry%d" % i))
if len(auth)!=3: break
auth[0] = re.compile(auth[0])
self["authentication"].insert(0, {'pattern': auth[0],
'user': auth[1],
'pass': auth[2]})
i += 1
except ConfigParser.Error: pass

View file

@ -1,19 +1,19 @@
"""Handle local file: links"""
# Copyright (C) 2000,2001 Bastian Kleineidam
# Copyright (C) 2000,2001 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import re,string,os,urlparse,urllib
from UrlData import UrlData

View file

@ -1,19 +1,19 @@
"""Handle Mozilla-specific find: links"""
# Copyright (C) 2000,2001 Bastian Kleineidam
# Copyright (C) 2000,2001 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
from UrlData import UrlData
from linkcheck import _

View file

@ -1,19 +1,19 @@
"""Handle FTP links"""
# Copyright (C) 2000,2001 Bastian Kleineidam
# Copyright (C) 2000,2001 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import ftplib,linkcheck
from UrlData import UrlData,ExcList

View file

@ -1,19 +1,19 @@
"""Handle Gopher links"""
# Copyright (C) 2000,2001 Bastian Kleineidam
# Copyright (C) 2000,2001 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
from UrlData import UrlData
from linkcheck import _

View file

@ -1,19 +1,19 @@
"""Base handle for links with a hostname"""
# Copyright (C) 2000,2001 Bastian Kleineidam
# Copyright (C) 2000,2001 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import socket,string
from UrlData import UrlData

View file

@ -1,25 +1,26 @@
"""Handle http links"""
# Copyright (C) 2000,2001 Bastian Kleineidam
# Copyright (C) 2000,2001 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import httplib,urlparse,sys,time,re
import Config,StringUtil,robotparser2
from UrlData import UrlData
from urllib import splittype, splithost
from linkcheck import _
from debuglevels import *
class HttpUrlData(UrlData):
"Url link with http scheme"
@ -82,7 +83,7 @@ class HttpUrlData(UrlData):
# first try
status, statusText, self.mime = self._getHttpRequest()
Config.debug(str(status)+", "+str(statusText)+", "+str(self.mime)+"\n")
Config.debug(BRING_IT_ON, status, statusText, self.mime)
has301status = 0
while 1:
# proxy enforcement
@ -118,10 +119,11 @@ class HttpUrlData(UrlData):
# some servers get the HEAD request wrong:
# - Netscape Enterprise Server III (no HEAD implemented, 404 error)
# - Hyperwave Information Server (501 error)
# - Apache/1.3.14 (Unix) (500 error, http://www.rhino3d.de/)
# - some advertisings (they want only GET, dont ask why ;)
# - Zope server (it has to render the page to get the correct
# content-type
elif status in [405,501]:
elif status in [405,501,500]:
# HEAD method not allowed ==> try get
status, statusText, self.mime = self._getHttpRequest("GET")
Config.debug("DEBUG: HEAD not supported\n")

View file

@ -1,19 +1,19 @@
"""Handle https links"""
# Copyright (C) 2000,2001 Bastian Kleineidam
# Copyright (C) 2000,2001 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
from UrlData import UrlData
from HttpUrlData import HttpUrlData

View file

@ -1,19 +1,19 @@
"""Handle Javascript links"""
# Copyright (C) 2000,2001 Bastian Kleineidam
# Copyright (C) 2000,2001 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
from UrlData import UrlData
from linkcheck import _

View file

@ -1,19 +1,19 @@
"""Output logging support for different formats"""
# Copyright (C) 2000,2001 Bastian Kleineidam
# Copyright (C) 2000,2001 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import sys,time,string
from types import ListType

View file

@ -1,25 +1,26 @@
"""Handle for mailto: links"""
# Copyright (C) 2000,2001 Bastian Kleineidam
# Copyright (C) 2000,2001 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import os,re,string,DNS,sys,Config,cgi,urllib,linkcheck
from rfc822 import AddressList
from HostCheckingUrlData import HostCheckingUrlData
from smtplib import SMTP
from linkcheck import _
from debuglevels import *
# regular expression for RFC2368 compliant mailto: scanning
word = r"[-a-zA-Z0-9,./%]+"
@ -42,7 +43,7 @@ class MailtoUrlData(HostCheckingUrlData):
for val in self.headers[key]:
a = urllib.unquote(val)
self.adresses.extend(AddressList(a).addresslist)
Config.debug("DEBUG: mailto headers: %s\n" % self.headers)
Config.debug(BRING_IT_ON, "mailto headers:", self.headers)
def _cutout_adresses(self):

View file

@ -1,24 +1,25 @@
"""Handle nntp: and news: links"""
# Copyright (C) 2000,2001 Bastian Kleineidam
# Copyright (C) 2000,2001 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import re,string,time,sys,nntplib,urlparse,linkcheck
from linkcheck import _
from UrlData import ExcList,UrlData
debug = linkcheck.Config.debug
from debuglevels import *
ExcList.extend([nntplib.error_reply,
nntplib.error_temp,
@ -41,7 +42,7 @@ class NntpUrlData(UrlData):
else:
self.url = self.urlName
self.urlTuple = urlparse.urlparse(self.url)
debug("DEBUG: %s\n" % `self.urlTuple`)
debug(BRING_IT_ON, self.urlTuple)
def checkConnection(self, config):
@ -79,7 +80,7 @@ class NntpUrlData(UrlData):
timeout = 0
except nntplib.error_perm:
value = sys.exc_info()[1]
debug("NNTP: %s\n" % value)
debug(BRING_IT_ON, "NNTP:", value)
if re.compile("^505").search(str(value)):
import whrandom
time.sleep(whrandom.randint(10,20))

View file

@ -1,19 +1,19 @@
"""various string utils"""
# Copyright (C) 2000,2001 Bastian Kleineidam
# Copyright (C) 2000,2001 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import string,re,sys,htmlentitydefs

View file

@ -1,19 +1,19 @@
"""Handle telnet: links"""
# Copyright (C) 2000,2001 Bastian Kleineidam
# Copyright (C) 2000,2001 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import telnetlib,re,string,linkcheck
from HostCheckingUrlData import HostCheckingUrlData

View file

@ -1,19 +1,19 @@
"""Threading support"""
# Copyright (C) 2000,2001 Bastian Kleineidam
# Copyright (C) 2000,2001 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
from threading import *

View file

@ -1,24 +1,25 @@
"""Base URL handler"""
# Copyright (C) 2000,2001 Bastian Kleineidam
# Copyright (C) 2000,2001 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import sys,re,string,urlparse,urllib,time,DNS
import Config,StringUtil,linkcheck,linkname
from linkcheck import _
debug = linkcheck.Config.debug
from debuglevels import *
ExcList = [
IOError,
@ -194,7 +195,7 @@ class UrlData:
def logMe(self, config):
debug("DEBUG: logging url\n")
debug(BRING_IT_ON, "logging url")
config.incrementLinknumber()
if config["verbose"] or not self.valid or \
(self.warningString and config["warnings"]):
@ -202,13 +203,13 @@ class UrlData:
def check(self, config):
debug(Config.DebugDelim+"Checking\n"+str(self)+"\n"+\
Config.DebugDelim)
if self.recursionlevel and config['wait']:
time.sleep(config['wait']);
debug(BRING_IT_ON, "Checking", self)
if self.recursionLevel and config['wait']:
debug(BRING_IT_ON, "sleeping for", config['wait'], "seconds")
time.sleep(config['wait'])
t = time.time()
# check syntax
debug("DEBUG: checking syntax\n")
debug(BRING_IT_ON, "checking syntax")
if not self.urlName or self.urlName=="":
self.setError(_("URL is null or empty"))
self.logMe(config)
@ -223,7 +224,7 @@ class UrlData:
return
# check the cache
debug("DEBUG: checking cache\n")
debug(BRING_IT_ON, "checking cache")
if config.urlCache_has_key(self.getCacheKey()):
self.copyFrom(config.urlCache_get(self.getCacheKey()))
self.cached = 1
@ -231,15 +232,14 @@ class UrlData:
return
# apply filter
debug("DEBUG: checking filter\n")
debug("DEBUG: extern = %s\n" % str(self.extern))
debug(BRING_IT_ON, "extern =", self.extern)
if self.extern and (config["strict"] or self.extern[1]):
self.setWarning(_("outside of domain filter, checked only syntax"))
self.logMe(config)
return
# check connection
debug("DEBUG: checking connection\n")
debug(BRING_IT_ON, "checking connection")
try:
self.checkConnection(config)
if self.urlTuple and config["anchors"]:
@ -251,7 +251,7 @@ class UrlData:
# check content
warningregex = config["warningregex"]
if warningregex and self.valid:
debug("DEBUG: checking content\n")
debug(BRING_IT_ON, "checking content")
try: self.checkContent(warningregex)
except tuple(ExcList):
type, value = sys.exc_info()[:2]
@ -259,7 +259,7 @@ class UrlData:
self.checktime = time.time() - t
# check recursion
debug("DEBUG: checking recursion\n")
debug(BRING_IT_ON, "checking recursion")
if self.allowsRecursion(config):
self.parseUrl(config)
self.closeConnection()
@ -294,7 +294,6 @@ class UrlData:
def allowsRecursion(self, config):
Config.debug("extern: %s\n" % str(self.extern))
return self.valid and \
self.isHtml() and \
not self.cached and \
@ -347,7 +346,7 @@ class UrlData:
self.data = self.urlConnection.read()
self.downloadtime = time.time() - t
self._init_html_comments()
debug("DEBUG: comment spans %s\n" % self.html_comments)
debug(NIGHTMARE, "comment spans", self.html_comments)
return self.data
@ -379,8 +378,7 @@ class UrlData:
def parseUrl(self, config):
debug(Config.DebugDelim+"Parsing recursively into\n"+\
str(self)+"\n"+Config.DebugDelim)
debug(BRING_IT_ON, "Parsing recursively into", self)
# search for a possible base reference
bases = self.searchInForTag(BasePattern)
@ -399,11 +397,10 @@ class UrlData:
def searchInForTag(self, pattern):
debug("Searching for tag %s, attribute %s\n" \
% (pattern['tag'], pattern['attr']))
debug(HURT_ME_PLENTY, "Searching for tag", pattern['tag'],
"attribute", pattern['attr'])
urls = []
index = 0
debug("hulla")
while 1:
match = pattern['pattern'].search(self.getContent(), index)
if not match: break
@ -450,9 +447,9 @@ class UrlData:
def _getUserPassword(self, config):
for rx, user, password in config["authentication"]:
if rx.match(self.url):
return user, password
for auth in config["authentication"]:
if auth['pattern'].match(self.url):
return auth['user'], auth['password']
return None,None

View file

@ -1,19 +1,19 @@
"""main function module for link checking"""
# Copyright (C) 2000,2001 Bastian Kleineidam
# Copyright (C) 2000,2001 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
class error(Exception):
pass

19
linkcheck/debuglevels.py Normal file
View file

@ -0,0 +1,19 @@
# Copyright (C) 2001 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
ALWAYS = 0
BRING_IT_ON = 1
HURT_ME_PLENTY = 2
NIGHTMARE = 3

View file

@ -1,19 +1,19 @@
"""common CGI functions used by the CGI scripts"""
# Copyright (C) 2000,2001 Bastian Kleineidam
# Copyright (C) 2000,2001 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import re,time,urlparse
from linkcheck import _

View file

@ -1,3 +1,19 @@
# Copyright (C) 2001 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import re,StringUtil
imgtag_re = re.compile("(?i)\s+alt\s*=\s*(?P<name>(\".*?\"|'.*?'|[^\s>]+))", re.DOTALL)

View file

@ -1,19 +1,19 @@
"""spam"""
# Copyright (C) 2000,2001 Bastian Kleineidam
# Copyright (C) 2000,2001 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import sys,time,rotor,types

View file

@ -191,7 +191,7 @@ for opt,arg in options:
if opt=="-f" or opt=="--config":
configfiles.append(arg)
elif opt=="-D" or opt=="--debug":
linkcheck.Config.DebugFlag = 1
linkcheck.Config.DebugLevel += 1
config.disableThreading()
config.read(configfiles)
@ -266,8 +266,8 @@ for opt,arg in options:
elif opt=="-t" or opt=="--threads":
num = int(arg)
if config["threads"] and not linkcheck.Config.DebugFlag:
if num>0:
if config["threads"] and not linkcheck.Config.DebugLevel:
if num>1:
config.enableThreading(num)
else:
config.disableThreading()
@ -295,7 +295,9 @@ for opt,arg in options:
config["warnings"] = 1
if constructauth:
config["authentication"].insert(0, (re.compile(".*"), _user, _password))
config["authentication"].insert(0, {'pattern': re.compile(".*"),
'user': _user,
'pass': _password})
# construct the url list
# if we use blacklist mode, try to read ~/.blacklist