mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-03-24 01:40:23 +00:00
CSV output
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@76 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
parent
7094fe3ec0
commit
461b37ac33
10 changed files with 558 additions and 50 deletions
437
CSV.py
Normal file
437
CSV.py
Normal file
|
|
@ -0,0 +1,437 @@
|
|||
#
|
||||
# CSV 0.17 8 June 1999 Copyright ©Laurence Tratt 1998 - 1999
|
||||
# e-mail: tratt@dcs.kcl.ac.uk
|
||||
# home-page: http://eh.org/~laurie/comp/python/csv/index.html
|
||||
#
|
||||
#
|
||||
#
|
||||
# CSV.py is copyright ©1998 - 1999 by Laurence Tratt
|
||||
#
|
||||
# All rights reserved
|
||||
#
|
||||
# Permission to use, copy, modify, and distribute this software and its
|
||||
# documentation for any purpose and without fee is hereby granted, provided that
|
||||
# the above copyright notice appear in all copies and that both that copyright
|
||||
# notice and this permission notice appear in supporting documentation.
|
||||
#
|
||||
# THE AUTHOR - LAURENCE TRATT - DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
|
||||
# SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN
|
||||
# NO EVENT SHALL THE AUTHOR FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
|
||||
# ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
|
||||
# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTUOUS ACTION, ARISING OUT OF OR
|
||||
# IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
#
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
import re, string, types, UserList
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
###################################################################################################
|
||||
#
|
||||
# CSV class
|
||||
#
|
||||
|
||||
|
||||
class CSV(UserList.UserList):
|
||||
|
||||
""" Manage a CSV (comma separated values) file
|
||||
|
||||
The data is held in a list.
|
||||
|
||||
Methods:
|
||||
__init__()
|
||||
load() load from file
|
||||
save() save to file
|
||||
input() input from string
|
||||
output() save to string
|
||||
append() appends one entry
|
||||
__str__() printable represenation
|
||||
"""
|
||||
|
||||
|
||||
|
||||
def __init__(self, separator = ','):
|
||||
|
||||
""" Initialise CVS class instance.
|
||||
|
||||
Arguments:
|
||||
separator : The field delimiter. Defaults to ','
|
||||
"""
|
||||
|
||||
self.separator = separator
|
||||
|
||||
self.data = []
|
||||
self.fields__title__have = self.fields__title = None
|
||||
|
||||
|
||||
|
||||
def load(self, file__data__name, fields__title__have, convert_numbers = 0, separator = None, comments = None):
|
||||
|
||||
""" Load up a CSV file
|
||||
|
||||
Arguments:
|
||||
file__data__name : The name of the CSV file
|
||||
fields__title__have : 0 : file has no title fields
|
||||
otherwise : file has title fields
|
||||
convert_numbers : 0 : store everything as string's
|
||||
otherwise : store fields that can be converted
|
||||
to ints or floats to that Python
|
||||
type defaults to 0
|
||||
separator : The field delimiter (optional)
|
||||
comments : A list of strings and regular expressions to remove comments
|
||||
"""
|
||||
|
||||
file__data = open(file__data__name, 'r')
|
||||
self.input(file__data.read(-1), fields__title__have, convert_numbers, separator or self.separator, comments or ["#"])
|
||||
file__data.close()
|
||||
|
||||
|
||||
|
||||
def save(self, file__data__name, separator = None):
|
||||
|
||||
""" Save data to CSV file.
|
||||
|
||||
Arguments:
|
||||
file__data__name : The name of the CSV file to save to
|
||||
separator : The field delimiter (optional)
|
||||
"""
|
||||
|
||||
file__data = open(file__data__name, 'w')
|
||||
file__data.write(self.output(separator or self.separator))
|
||||
file__data.close()
|
||||
|
||||
|
||||
|
||||
def input(self, data, fields__title__have, convert_numbers = 0, separator = None, comments = None):
|
||||
|
||||
""" Take wodge of CSV data & convert it into internal format.
|
||||
|
||||
Arguments:
|
||||
data : A string containing the CSV data
|
||||
fields__title__have : 0 : file has no title fields
|
||||
otherwise : file has title fields
|
||||
convert_numbers : 0 : store everything as string's
|
||||
otherwise : store fields that can be
|
||||
converted to ints or
|
||||
floats to that Python type
|
||||
defaults to 0
|
||||
separator : The field delimiter (Optional)
|
||||
comments : A list of strings and regular expressions to remove comments
|
||||
(defaults to ["#"])
|
||||
"""
|
||||
|
||||
def line__process(line, convert_numbers, separator):
|
||||
|
||||
fields = []
|
||||
line__pos = 0
|
||||
|
||||
while line__pos < len(line):
|
||||
|
||||
# Skip any space at the beginning of the field (if there should be leading space,
|
||||
# there should be a " character in the CSV file)
|
||||
|
||||
while line__pos < len(line) and line[line__pos] == " ":
|
||||
line__pos = line__pos + 1
|
||||
|
||||
field = ""
|
||||
quotes__level = 0
|
||||
while line__pos < len(line):
|
||||
|
||||
# Skip space at the end of a field (if there is trailing space, it should be
|
||||
# encompassed by speech marks)
|
||||
|
||||
if quotes__level == 0 and line[line__pos] == " ":
|
||||
line__pos__temp = line__pos
|
||||
while line__pos__temp < len(line) and line[line__pos__temp] == " ":
|
||||
line__pos__temp = line__pos__temp + 1
|
||||
if line__pos__temp >= len(line):
|
||||
break
|
||||
elif line[line__pos__temp : line__pos__temp + len(separator)] == separator:
|
||||
line__pos = line__pos__temp
|
||||
if quotes__level == 0 and line[line__pos : line__pos + len(separator)] == separator:
|
||||
break
|
||||
elif line[line__pos] == "\"":
|
||||
if quotes__level == 0:
|
||||
quotes__level = 1
|
||||
else:
|
||||
quotes__level = 0
|
||||
else:
|
||||
field = field + line[line__pos]
|
||||
line__pos = line__pos + 1
|
||||
line__pos = line__pos + len(separator)
|
||||
if convert_numbers:
|
||||
for char in field:
|
||||
if char not in "0123456789.-":
|
||||
fields.append(field)
|
||||
break
|
||||
else:
|
||||
try:
|
||||
if "." not in field:
|
||||
fields.append(int(field))
|
||||
else:
|
||||
fields.append(float(field))
|
||||
except:
|
||||
fields.append(field)
|
||||
else:
|
||||
fields.append(field)
|
||||
if line[-len(separator)] == separator:
|
||||
fields.append(field)
|
||||
|
||||
return fields
|
||||
|
||||
|
||||
separator = separator or self.separator
|
||||
comments = comments or ["#"]
|
||||
|
||||
self.fields__title__have = fields__title__have
|
||||
|
||||
# Remove comments from the input file
|
||||
|
||||
comments__strings = []
|
||||
for comment in comments:
|
||||
if type(comment) == types.InstanceType:
|
||||
data = comment.sub("", data)
|
||||
elif type(comment) == types.StringType:
|
||||
comments__strings.append(comment)
|
||||
else:
|
||||
raise Exception("Invalid comment type '" + comment + "'")
|
||||
|
||||
lines = map(string.strip, string.split(data, "\n"))
|
||||
|
||||
# Remove all comments that are of type string
|
||||
|
||||
lines__pos = 0
|
||||
while lines__pos < len(lines):
|
||||
line = lines[lines__pos]
|
||||
line__pos = 0
|
||||
while line__pos < len(line) and line[line__pos] == " ":
|
||||
line__pos = line__pos + 1
|
||||
found_comment = 0
|
||||
for comment in comments__strings:
|
||||
if line__pos + len(comment) < len(line) and line[line__pos : line__pos + len(comment)] == comment:
|
||||
found_comment = 1
|
||||
break
|
||||
if found_comment:
|
||||
del lines[lines__pos]
|
||||
else:
|
||||
lines__pos = lines__pos + 1
|
||||
|
||||
# Process the input data
|
||||
|
||||
if fields__title__have:
|
||||
self.fields__title = line__process(lines[0], convert_numbers, separator)
|
||||
pos__start = 1
|
||||
else:
|
||||
self.fields__title = []
|
||||
pos__start = 0
|
||||
self.data = []
|
||||
for line in lines[pos__start : ]:
|
||||
if line != "":
|
||||
self.data.append(Entry(line__process(line, convert_numbers, separator), self.fields__title))
|
||||
|
||||
|
||||
|
||||
def output(self, separator = None):
|
||||
|
||||
""" Convert internal data into CSV string.
|
||||
|
||||
Arguments:
|
||||
separator : The field delimiter (optional)
|
||||
|
||||
Returns:
|
||||
String containing CSV data
|
||||
"""
|
||||
|
||||
separator = separator or self.separator
|
||||
|
||||
|
||||
def line__make(entry, separator = separator):
|
||||
|
||||
str = ""
|
||||
done__any = 0
|
||||
for field in entry:
|
||||
if done__any:
|
||||
str = str + separator
|
||||
else:
|
||||
done__any = 1
|
||||
if type(field) != types.StringType:
|
||||
field = `field`
|
||||
if len(field) > 0 and (string.find(field, separator) != -1 or (field[0] == " " or field[-1] == " ")):
|
||||
str = str + "\"" + field + "\""
|
||||
else:
|
||||
str = str + field
|
||||
|
||||
return str
|
||||
|
||||
|
||||
if self.fields__title__have:
|
||||
str = line__make(self.fields__title) + "\n\n"
|
||||
else:
|
||||
str = ""
|
||||
str = str + string.join(map(line__make, self.data), "\n") + "\n"
|
||||
|
||||
return str
|
||||
|
||||
|
||||
|
||||
def append(self, entry):
|
||||
|
||||
""" Add an entry. """
|
||||
|
||||
if self.fields__title:
|
||||
entry.fields__title = self.fields__title
|
||||
self.data.append(entry)
|
||||
|
||||
|
||||
|
||||
def field__append(self, func, field__title = None):
|
||||
|
||||
""" Append a field with values specified by a function
|
||||
|
||||
Arguments:
|
||||
func : Function to be called func(entry) to get the value of the new field
|
||||
field__title : Name of new field (if applicable)
|
||||
|
||||
"""
|
||||
|
||||
for data__pos in range(len(self)):
|
||||
entry = self.data[data__pos]
|
||||
entry.append(func(entry))
|
||||
self.data[data__pos] = entry
|
||||
|
||||
if self.fields__title__have:
|
||||
self.fields__title.append(field__title)
|
||||
|
||||
|
||||
|
||||
def duplicates__eliminate(self):
|
||||
|
||||
""" Eliminate duplicates (this may result in a reordering of the entries) """
|
||||
|
||||
# To eliminate duplicates, we first get Python to sort the list for us; then all we have to
|
||||
# do is to check to see whether consecutive elements are the same, and delete them
|
||||
# This give us O(<sort>) * O(n) rather than the more obvious O(n * n) speed algorithm
|
||||
|
||||
# XXX Could be done more efficiently for multiplicate duplicates by deleting a slice of
|
||||
# similar elements rather than deleting them individually
|
||||
|
||||
self.sort()
|
||||
data__pos = 1
|
||||
entry__last = self.data[0]
|
||||
while data__pos < len(self.data):
|
||||
if self.data[data__pos] == entry__last:
|
||||
del self.data[data__pos]
|
||||
else:
|
||||
entry__last = self.data[data__pos]
|
||||
data__pos = data__pos + 1
|
||||
|
||||
|
||||
|
||||
def __str__(self):
|
||||
|
||||
""" Construct a printable representation of the internal data. """
|
||||
|
||||
columns__width = []
|
||||
|
||||
# Work out the maximum width of each column
|
||||
|
||||
for column in range(len(self.data[0])):
|
||||
if self.fields__title__have:
|
||||
width = len(`self.fields__title[column]`)
|
||||
else:
|
||||
width = 0
|
||||
for entry in self:
|
||||
width__possible = len(`entry.data[column]`)
|
||||
if width__possible > width:
|
||||
width = width__possible
|
||||
columns__width.append(width)
|
||||
|
||||
if self.fields__title__have:
|
||||
str = string.join(map(string.ljust, self.fields__title, columns__width), " ") + "\n\n"
|
||||
else:
|
||||
str = ""
|
||||
for entry in self:
|
||||
str = str + string.join(map(string.ljust, map(lambda a : (type(a) == types.StringType and [a] or [eval("`a`")])[0], entry.data), columns__width), " ") + "\n"
|
||||
|
||||
return str
|
||||
|
||||
|
||||
|
||||
###################################################################################################
|
||||
#
|
||||
# CSV data entry class
|
||||
#
|
||||
#
|
||||
|
||||
|
||||
class Entry(UserList.UserList):
|
||||
|
||||
""" CSV data entry, UserList subclass.
|
||||
|
||||
Has the same properties as a list, but has a few dictionary
|
||||
like properties for easy access of fields if they have titles.
|
||||
|
||||
Methods(Override):
|
||||
__init__
|
||||
__getitem__
|
||||
__setitem__
|
||||
__delitem__
|
||||
"""
|
||||
|
||||
|
||||
|
||||
def __init__(self, fields, fields__title = None):
|
||||
|
||||
""" Initialise with fields data and field title.
|
||||
|
||||
Arguments:
|
||||
fields : a list containing the data for each field
|
||||
of this entry
|
||||
fields__title : a list with the titles of each field
|
||||
(an empty list means there are no titles)
|
||||
"""
|
||||
|
||||
self.data = fields
|
||||
if fields__title != None:
|
||||
self.fields__title = fields__title
|
||||
else:
|
||||
self.fields__title = []
|
||||
|
||||
|
||||
|
||||
def __getitem__(self, x):
|
||||
|
||||
if type(x) == types.IntType:
|
||||
return self.data[x]
|
||||
else:
|
||||
return self.data[self.fields__title.index(x)]
|
||||
|
||||
|
||||
|
||||
def __setitem__(self, x, item):
|
||||
|
||||
if type(x) == types.IntType:
|
||||
self.data[x] = item
|
||||
else:
|
||||
self.data[self.fields__title.index(x)] = item
|
||||
|
||||
|
||||
|
||||
def __delitem__(self, x):
|
||||
|
||||
if type(x) == types.IntType:
|
||||
del self.data[x]
|
||||
else:
|
||||
del self.data[self.fields__title.index(x)]
|
||||
|
||||
|
||||
|
||||
def __str__(self):
|
||||
|
||||
return `self.data`
|
||||
11
INSTALL
11
INSTALL
|
|
@ -9,19 +9,22 @@ Optionally packages:
|
|||
Distutils >= 0.8.1 from http://www.python.org/sigs/distutils-sig/
|
||||
OpenSSL from http://www.openssl.org
|
||||
|
||||
|
||||
Install with Distutils:
|
||||
If you have the Distutils, run "python setup.py install".
|
||||
How do you run this? Type the three words without the quotes in
|
||||
a command shell and press Return. Still clueless? Go away.
|
||||
|
||||
|
||||
Install without Distutils:
|
||||
Adjust the sys.path.append argument in the file 'linkchecker' to point
|
||||
to the distribution directory.
|
||||
Now you can run "python linkchecker" to run LinkChecker.
|
||||
Now you can type "python linkchecker" (or on Unix: just "./linkchecker") to
|
||||
run LinkChecker.
|
||||
|
||||
Running LinkChecker from any directory:
|
||||
Unix users can put the "linkchecker" script somewhere in a directory in
|
||||
their $path.
|
||||
For Windows users, I included a batch script 'linkchecker.bat'. You have to
|
||||
adjust the distribution directory in this script to point to the directory
|
||||
where the 'linkchecker' file is. Now you can copy 'linkchecker.bat' in
|
||||
a directory in your PATH and run it from anywhere.
|
||||
a directory in your PATH and run it.
|
||||
|
||||
|
|
|
|||
3
README
3
README
|
|
@ -5,7 +5,8 @@ With LinkChecker you can check your HTML documents for broken links.
|
|||
Features:
|
||||
o recursive checking
|
||||
o multithreaded
|
||||
o output can be colored or normal text, HTML, SQL or a GML sitemap graph
|
||||
o output can be colored or normal text, HTML, SQL, CSV or a GML sitemap
|
||||
graph
|
||||
o HTTP/1.1, HTTPS, FTP, mailto:, news:, Gopher, Telnet and local file links
|
||||
are supported
|
||||
Javascript links are currently ignored
|
||||
|
|
|
|||
5
debian/changelog
vendored
5
debian/changelog
vendored
|
|
@ -1,4 +1,4 @@
|
|||
linkchecker (1.2.3) unstable; urgency=low
|
||||
linkchecker (1.3.0) unstable; urgency=low
|
||||
|
||||
* Blacklist output support
|
||||
* typo fix for adjustWinPath
|
||||
|
|
@ -8,8 +8,9 @@ linkchecker (1.2.3) unstable; urgency=low
|
|||
* linkchecker.bat installation support for windows
|
||||
* included test suite in distribution
|
||||
* Improved mailto: link parsing
|
||||
* CSV output support
|
||||
|
||||
-- Bastian Kleineidam <calvin@users.sourceforge.net> Thu, 27 Apr 2000 10:18:52 +0200
|
||||
-- Bastian Kleineidam <calvin@users.sourceforge.net> Fri, 28 Apr 2000 12:59:13 +0200
|
||||
|
||||
linkchecker (1.2.2) unstable; urgency=low
|
||||
|
||||
|
|
|
|||
|
|
@ -8,9 +8,10 @@ This module stores
|
|||
|
||||
import ConfigParser,sys,os,re,UserDict,string
|
||||
from os.path import expanduser,normpath,normcase,join,isfile
|
||||
from types import StringType
|
||||
import Logging
|
||||
|
||||
Version = "1.2.3"
|
||||
Version = "1.3.0"
|
||||
AppName = "LinkChecker"
|
||||
App = AppName+" "+Version
|
||||
UserAgent = AppName+"/"+Version
|
||||
|
|
@ -32,20 +33,12 @@ Loggers = {
|
|||
"colored": Logging.ColoredLogger,
|
||||
"gml": Logging.GMLLogger,
|
||||
"sql": Logging.SQLLogger,
|
||||
"csv": Logging.CSVLogger,
|
||||
"blacklist": Logging.BlacklistLogger,
|
||||
}
|
||||
# for easy printing: a comma separated logger list
|
||||
LoggerKeys = reduce(lambda x, y: x+", "+y, Loggers.keys())
|
||||
|
||||
# File output names
|
||||
FileOutput = {
|
||||
"text": "linkchecker-out.txt",
|
||||
"html": "linkchecker-out.html",
|
||||
"colored": "linkchecker-out.asc",
|
||||
"gml": "linkchecker-out.gml",
|
||||
"sql": "linkchecker-out.sql"
|
||||
}
|
||||
|
||||
# debug options
|
||||
DebugDelim = "==========================================================\n"
|
||||
DebugFlag = 0
|
||||
|
|
@ -86,6 +79,14 @@ class Configuration(UserDict.UserDict):
|
|||
self.data["robotstxt"] = 0
|
||||
self.data["strict"] = 0
|
||||
self.data["fileoutput"] = []
|
||||
self.data["fileoutputnames"] = {
|
||||
"text": "linkchecker-out.txt",
|
||||
"html": "linkchecker-out.html",
|
||||
"colored": "linkchecker-out.asc",
|
||||
"gml": "linkchecker-out.gml",
|
||||
"sql": "linkchecker-out.sql",
|
||||
"csv": "linkchecker-out.csv",
|
||||
}
|
||||
self.data["quiet"] = 0
|
||||
self.data["warningregex"] = None
|
||||
self.data["nntpserver"] = os.environ.get("NNTP_SERVER",None)
|
||||
|
|
@ -338,13 +339,20 @@ class Configuration(UserDict.UserDict):
|
|||
except ConfigParser.Error: pass
|
||||
try: self.data["warnings"] = cfgparser.getboolean(section, "warnings")
|
||||
except ConfigParser.Error: pass
|
||||
try:
|
||||
filenames = eval(cfgparser.get(section, "fileoutputnames"))
|
||||
for key in filenames.keys():
|
||||
if self.data["fileoutputnames"].has_key(key) and \
|
||||
type(filenames[key]) == StringType:
|
||||
self.data["fileoutputnames"] = filenames[key]
|
||||
except ConfigParser.Error: pass
|
||||
try:
|
||||
filelist = string.split(cfgparser.get(section, "fileoutput"))
|
||||
for arg in filelist:
|
||||
# no file output for the blacklist Logger
|
||||
if Loggers.has_key(arg) and arg != "blacklist":
|
||||
self.data["fileoutput"].append(Loggers[arg](
|
||||
open(FileOutput[arg], "w")))
|
||||
open(self.data["fileoutputnames"][arg], "w")))
|
||||
except ConfigParser.Error: pass
|
||||
|
||||
section="checking"
|
||||
|
|
|
|||
|
|
@ -425,3 +425,33 @@ class BlacklistLogger:
|
|||
if self.blacklist[url] is None:
|
||||
fd.write(url+"\n")
|
||||
|
||||
|
||||
class CSVLogger(StandardLogger):
|
||||
""" CSV output. CSV consists of one line per entry. Entries are
|
||||
separated by a semicolon.
|
||||
"""
|
||||
def init(self):
|
||||
self.fd.write("# created by "+Config.AppName+" at "+
|
||||
_strtime(time.time())+
|
||||
"\n# you get "+Config.AppName+" at "+Config.Url+
|
||||
"\n# write comments and bugs to "+Config.Email+"\n\n")
|
||||
self.fd.flush()
|
||||
|
||||
def newUrl(self, urlData):
|
||||
self.fd.write(`urlData.urlName`+';'+
|
||||
`urlData.recursionLevel`+';'+
|
||||
`urlData.parentName`+';'+
|
||||
`urlData.baseRef`+';'+
|
||||
`urlData.errorString`+';'+
|
||||
`urlData.validString`+';'+
|
||||
`urlData.warningString`+';'+
|
||||
`urlData.infoString`+';'+
|
||||
`urlData.valid`+';'+
|
||||
`urlData.url`+';'+
|
||||
`urlData.line`+';'+
|
||||
`urlData.cached`+'\n')
|
||||
self.fd.flush()
|
||||
|
||||
def endOfOutput(self):
|
||||
self.fd = None
|
||||
|
||||
|
|
|
|||
|
|
@ -3,29 +3,31 @@ from HostCheckingUrlData import HostCheckingUrlData
|
|||
from smtplib import SMTP
|
||||
from UrlData import LinkCheckerException
|
||||
|
||||
mailto_re = re.compile(r"^mailto:"
|
||||
r"(['\-\w.]+@[\-\w.]+(\?.+)?|"
|
||||
r"[\w\s]+<['\-\w.]+@[\-\w.]+(\?.+)?>)$")
|
||||
# regular expression strings
|
||||
tag_str = r"^mailto:"
|
||||
adress_str = r"([a-zA-Z]['\-\w.]*)@([\w\-]+(\.[\w\-]+)*))"
|
||||
complete_adress_str = "("+adress_str+"|[\w\-\s]*<"+adress_str+">)"
|
||||
suffix_str = r"(\?.+)?"
|
||||
mailto_str = tag_str+complete_adress_str+\
|
||||
"(\s*,"+complete_adress_str+")*"+suffix_str
|
||||
|
||||
# compiled
|
||||
mailto_re = re.compile(mailto_str)
|
||||
adress_re = re.compile(adress_str)
|
||||
|
||||
class MailtoUrlData(HostCheckingUrlData):
|
||||
"Url link with mailto scheme"
|
||||
|
||||
def buildUrl(self):
|
||||
HostCheckingUrlData.buildUrl(self)
|
||||
if not mailto_re.match(self.urlName):
|
||||
mo = mailto_re.match(self.urlName)
|
||||
if not mo:
|
||||
raise LinkCheckerException, "Illegal mailto link syntax"
|
||||
self.host = self.urlName[7:]
|
||||
i = string.find(self.host, "<")
|
||||
j = string.find(self.host, ">")
|
||||
if i!=-1 and j!=-1 and i<j:
|
||||
self.host = self.host[i+1:j]
|
||||
i = string.find(self.host, "@")
|
||||
self.user = self.host[:i]
|
||||
self.host = self.host[(i+1):]
|
||||
i = string.find(self.host, "?")
|
||||
if i!=-1:
|
||||
self.host = self.host[:i]
|
||||
self.host = string.lower(self.host)
|
||||
# do not lower the user name
|
||||
self.adresses = re.findall(adress_re, self.urlName)
|
||||
Config.debug(str(self.adresses))
|
||||
raise Exception, "Nix"
|
||||
self.host = None
|
||||
self.user = None
|
||||
|
||||
def checkConnection(self, config):
|
||||
DNS.ParseResolvConf()
|
||||
|
|
|
|||
37
setup.py
37
setup.py
|
|
@ -4,9 +4,15 @@ from distutils.dist import Distribution
|
|||
from Template import Template
|
||||
import sys
|
||||
|
||||
# Hack for linkchecker.bat
|
||||
# Autodetect the existence of an SSL library (this is pretty shitty)
|
||||
# Autodetect Windows platforms to include the linkchecker.bat script
|
||||
class LCDistribution(Distribution):
|
||||
def run_commands (self):
|
||||
if self.has_ssl():
|
||||
self.ext_modules = [('ssl', {'sources': ['ssl.c'],
|
||||
'include_dirs': ['/usr/include/openssl'],
|
||||
'library_dirs': ['/usr/lib'],
|
||||
'libs': ['ssl']})]
|
||||
if sys.platform=='win32':
|
||||
inst = self.find_command_obj("install")
|
||||
inst.ensure_ready()
|
||||
|
|
@ -18,23 +24,34 @@ class LCDistribution(Distribution):
|
|||
for cmd in self.commands:
|
||||
self.run_command (cmd)
|
||||
|
||||
def has_ssl(self):
|
||||
return 1
|
||||
|
||||
|
||||
setup (name = "linkchecker",
|
||||
version = "1.2.3",
|
||||
version = "1.3.0",
|
||||
description = "check links of HTML pages",
|
||||
author = "Bastian Kleineidam",
|
||||
author_email = "calvin@users.sourceforge.net",
|
||||
url = "http://linkchecker.sourceforge.net/",
|
||||
licence = "GPL",
|
||||
long_description =
|
||||
"""With LinkChecker you can check your HTML documents for broken links.
|
||||
Features:
|
||||
o recursive checking
|
||||
o multithreaded
|
||||
o output can be colored or normal text, HTML, SQL, CSV or a GML sitemap
|
||||
graph
|
||||
o HTTP/1.1, HTTPS, FTP, mailto:, news:, Gopher, Telnet and local file links
|
||||
are supported.
|
||||
Javascript links are currently ignored
|
||||
o restrict link checking to your local domain
|
||||
o HTTP proxy support
|
||||
o give username/password for HTTP and FTP authorization
|
||||
o robots.txt exclusion protocol support
|
||||
"""
|
||||
|
||||
distclass = LCDistribution,
|
||||
packages = ['','DNS','linkcheck'],
|
||||
# uncomment ext_modules to enable HTTPS support
|
||||
# you must have an SSL library and the Python header
|
||||
# files installed
|
||||
ext_modules = [('ssl', {'sources': ['ssl.c'],
|
||||
'include_dirs': ['/usr/include/openssl'],
|
||||
'library_dirs': ['/usr/lib'],
|
||||
'libs': ['ssl']})],
|
||||
scripts = ['linkchecker'],
|
||||
)
|
||||
)
|
||||
|
|
|
|||
14
test/mail.html
Normal file
14
test/mail.html
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
<!-- extra mail checking -->
|
||||
<html><head></head>
|
||||
<body>
|
||||
<!-- legal -->
|
||||
<a href=mailto:calvin@localhost?subject=Hallo!%%&to=Pfuscher>1</a>
|
||||
<a href="mailto:Dude <calvin@studcs.uni-sb.de> , Killer <calvin@cs.uni-sb.de>?subject=bla">2</a>
|
||||
<a href="mailto:Bastian Kleineidam <calvin@host1>?foo=bar">3</a>
|
||||
<a href="mailto:Bastian Kleineidam <calvin@studcs.uni-sb.de>">4</a>
|
||||
<a href="mailto:o'hara@doctor.fraggle-hause?subject=äöü">5</a>
|
||||
<!-- illegal -->
|
||||
<a href="mailto:Bastian Kleineidam <calvin@host1?foo=bar>">3</a>
|
||||
<a href="mailto:">6</a>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -9,11 +9,6 @@
|
|||
<a href="test1.html">
|
||||
<a href="test1.html#isnix">
|
||||
<a href="test1.html#iswas">
|
||||
<a href=mailto:calvin@localhost?subject=Hallo!%%&to=Pfuscher>
|
||||
<a href="mailto:Bastian Kleineidam <calvin@host1?foo=bar>">
|
||||
<a href="mailto:Bastian Kleineidam <calvin@studcs.uni-sb.de>">
|
||||
<a href="mailto:o'hara@doctor.fraggle-hause?subject=äöü">
|
||||
<a href="mailto:">
|
||||
<a href="telnet:localhost">
|
||||
<a href="telnet:">
|
||||
<a href="ftp:/treasure.calvinsplayground.de/pub">
|
||||
|
|
|
|||
Loading…
Reference in a new issue