mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-04-23 23:54:44 +00:00
release 1.2.13
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@222 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
parent
d82d4c4d97
commit
7f226fc478
12 changed files with 210 additions and 149 deletions
2
Makefile
2
Makefile
|
|
@ -45,7 +45,7 @@ package:
|
|||
cd dist && dpkg-scanpackages . ../override.txt | gzip --best > Packages.gz
|
||||
|
||||
files: locale
|
||||
./$(PACKAGE) $(LCOPTS) -i$(HOST) http://$(HOST)/~calvin/
|
||||
env http_proxy="" ./$(PACKAGE) $(LCOPTS) -i$(HOST) http://$(HOST)/~calvin/
|
||||
|
||||
VERSION:
|
||||
echo $(VERSION) > VERSION
|
||||
|
|
|
|||
|
|
@ -14,5 +14,6 @@ create table linksdb (
|
|||
valid int,
|
||||
url varchar(50),
|
||||
line int,
|
||||
name varchar(50),
|
||||
cached int
|
||||
);
|
||||
|
|
|
|||
8
debian/changelog
vendored
8
debian/changelog
vendored
|
|
@ -11,8 +11,14 @@ linkchecker (1.2.13) unstable; urgency=low
|
|||
- two licenses: GPL and Python 2.0
|
||||
* linkcheck/__init__.py: user friendly warning for keyboard interrupts
|
||||
* debian/control: standards version 3.2.1
|
||||
* support Mozilla-specific find: links
|
||||
* print link names for:
|
||||
- <a href> links (<a href="..">link name</a>)
|
||||
- <img> links (the alt= attribute)
|
||||
this makes it easier for bookmarks.html files to find the link
|
||||
WARNING: this changed the output format!
|
||||
|
||||
-- Bastian Kleineidam <calvin@users.sourceforge.net> Sat, 6 Jan 2001 18:57:43 +0100
|
||||
-- Bastian Kleineidam <calvin@users.sourceforge.net> Sun, 7 Jan 2001 12:51:19 +0100
|
||||
|
||||
linkchecker (1.2.12) unstable; urgency=low
|
||||
|
||||
|
|
|
|||
|
|
@ -124,6 +124,7 @@ class Configuration(UserDict.UserDict):
|
|||
"filename": "linkchecker-out.ansi",
|
||||
'colorparent': ESC+"[37m", # white
|
||||
'colorurl': ESC+"[0m", # standard
|
||||
'colorname': ESC+"[0m", # standard
|
||||
'colorreal': ESC+"[36m", # cyan
|
||||
'colorbase': ESC+"[35m", # magenty
|
||||
'colorvalid': ESC+"[1;32m", # green
|
||||
|
|
|
|||
|
|
@ -29,12 +29,12 @@ class FileUrlData(UrlData):
|
|||
urlName,
|
||||
recursionLevel,
|
||||
parentName = None,
|
||||
baseRef = None, line=0):
|
||||
baseRef = None, line=0, name=""):
|
||||
UrlData.__init__(self,
|
||||
urlName,
|
||||
recursionLevel,
|
||||
parentName=parentName,
|
||||
baseRef=baseRef, line=line)
|
||||
baseRef=baseRef, line=line, name=name)
|
||||
if not parentName and not baseRef and \
|
||||
not re.compile("^file:").search(self.urlName):
|
||||
self.urlName = os.path.expanduser(self.urlName)
|
||||
|
|
|
|||
|
|
@ -23,13 +23,11 @@ from linkcheck import _
|
|||
class HostCheckingUrlData(UrlData):
|
||||
"Url link for which we have to connect to a specific host"
|
||||
|
||||
def __init__(self,
|
||||
urlName,
|
||||
recursionLevel,
|
||||
parentName = None,
|
||||
baseRef = None, line=0):
|
||||
def __init__(self, urlName, recursionLevel, parentName = None,
|
||||
baseRef = None, line=0, name=""):
|
||||
UrlData.__init__(self, urlName, recursionLevel,
|
||||
parentName=parentName, baseRef=baseRef, line=line)
|
||||
parentName=parentName, baseRef=baseRef, line=line,
|
||||
name=name)
|
||||
self.host = None
|
||||
self.url = urlName
|
||||
|
||||
|
|
|
|||
|
|
@ -41,13 +41,11 @@ import Config, StringUtil
|
|||
import linkcheck
|
||||
_ = linkcheck._
|
||||
|
||||
# HTML shortcuts
|
||||
RowEnd="</td></tr>\n"
|
||||
|
||||
# keywords
|
||||
KeyWords = ["Real URL",
|
||||
"Result",
|
||||
"Base",
|
||||
"Name",
|
||||
"Parent URL",
|
||||
"Info",
|
||||
"Warning",
|
||||
|
|
@ -109,44 +107,46 @@ class StandardLogger:
|
|||
self.fd.flush()
|
||||
|
||||
|
||||
def newUrl(self, urldata):
|
||||
self.fd.write("\n"+_("URL")+Spaces["URL"]+urldata.urlName)
|
||||
if urldata.cached:
|
||||
def newUrl(self, urlData):
|
||||
self.fd.write("\n"+_("URL")+Spaces["URL"]+urlData.urlName)
|
||||
if urlData.cached:
|
||||
self.fd.write(_(" (cached)\n"))
|
||||
else:
|
||||
self.fd.write("\n")
|
||||
if urldata.parentName:
|
||||
if urlData.name:
|
||||
self.fd.write(_("Name")+Spaces["Name"]+urlData.name+"\n")
|
||||
if urlData.parentName:
|
||||
self.fd.write(_("Parent URL")+Spaces["Parent URL"]+
|
||||
urldata.parentName+_(", line ")+
|
||||
str(urldata.line)+"\n")
|
||||
if urldata.baseRef:
|
||||
self.fd.write(_("Base")+Spaces["Base"]+urldata.baseRef+"\n")
|
||||
if urldata.url:
|
||||
self.fd.write(_("Real URL")+Spaces["Real URL"]+urldata.url+"\n")
|
||||
if urldata.downloadtime:
|
||||
urlData.parentName+_(", line ")+
|
||||
str(urlData.line)+"\n")
|
||||
if urlData.baseRef:
|
||||
self.fd.write(_("Base")+Spaces["Base"]+urlData.baseRef+"\n")
|
||||
if urlData.url:
|
||||
self.fd.write(_("Real URL")+Spaces["Real URL"]+urlData.url+"\n")
|
||||
if urlData.downloadtime:
|
||||
self.fd.write(_("D/L Time")+Spaces["D/L Time"]+
|
||||
_("%.3f seconds\n") % urldata.downloadtime)
|
||||
if urldata.checktime:
|
||||
_("%.3f seconds\n") % urlData.downloadtime)
|
||||
if urlData.checktime:
|
||||
self.fd.write(_("Check Time")+Spaces["Check Time"]+
|
||||
_("%.3f seconds\n") % urldata.checktime)
|
||||
if urldata.infoString:
|
||||
_("%.3f seconds\n") % urlData.checktime)
|
||||
if urlData.infoString:
|
||||
self.fd.write(_("Info")+Spaces["Info"]+
|
||||
StringUtil.indent(
|
||||
StringUtil.blocktext(urldata.infoString, 65),
|
||||
StringUtil.blocktext(urlData.infoString, 65),
|
||||
MaxIndent)+"\n")
|
||||
if urldata.warningString:
|
||||
if urlData.warningString:
|
||||
self.warnings = self.warnings+1
|
||||
self.fd.write(_("Warning")+Spaces["Warning"]+
|
||||
StringUtil.indent(
|
||||
StringUtil.blocktext(urldata.warningString, 65),
|
||||
StringUtil.blocktext(urlData.warningString, 65),
|
||||
MaxIndent)+"\n")
|
||||
|
||||
self.fd.write(_("Result")+Spaces["Result"])
|
||||
if urldata.valid:
|
||||
self.fd.write(urldata.validString+"\n")
|
||||
if urlData.valid:
|
||||
self.fd.write(urlData.validString+"\n")
|
||||
else:
|
||||
self.errors = self.errors+1
|
||||
self.fd.write(urldata.errorString+"\n")
|
||||
self.fd.write(urlData.errorString+"\n")
|
||||
self.fd.flush()
|
||||
|
||||
|
||||
|
|
@ -207,7 +207,7 @@ class HtmlLogger(StandardLogger):
|
|||
"body { font-family: Arial,sans-serif; font-size: 11pt }\n"
|
||||
"td { font-family: Arial,sans-serif; font-size: 11pt }\n"
|
||||
"code { font-family: Courier }\n"
|
||||
"a:hover { color: #34a4ef }\n"
|
||||
"a:hover { color: #34a4ef }\n"
|
||||
"//-->\n</style>\n</head>\n"+
|
||||
"<body bgcolor="+self.colorbackground+" link="+self.colorlink+
|
||||
" vlink="+self.colorlink+" alink="+self.colorlink+">"+
|
||||
|
|
@ -224,47 +224,53 @@ class HtmlLogger(StandardLogger):
|
|||
'><tr><td><table align="left" border="0" cellspacing="0"'
|
||||
' cellpadding="3" summary="checked link" bgcolor='+
|
||||
self.colorbackground+
|
||||
"><tr><td bgcolor="+self.colorurl+">"+
|
||||
"URL</td><td bgcolor="+self.colorurl+">"+urlData.urlName)
|
||||
"><tr><td bgcolor="+self.colorurl+">"+_("URL")+
|
||||
"</td><td bgcolor="+self.colorurl+">"+urlData.urlName)
|
||||
if urlData.cached:
|
||||
self.fd.write(_(" (cached)\n"))
|
||||
self.fd.write(RowEnd)
|
||||
|
||||
self.fd.write("</td></tr>\n")
|
||||
if urlData.name:
|
||||
self.fd.write("<tr><td>"+_("Name")+"</td><td>"+
|
||||
urlData.name+"</td></tr>\n")
|
||||
if urlData.parentName:
|
||||
self.fd.write("<tr><td>"+_("Parent URL")+"</td><td>"+
|
||||
'<a href="'+urlData.parentName+'">'+
|
||||
urlData.parentName+"</a> line "+str(urlData.line)+
|
||||
RowEnd)
|
||||
"</td></tr>\n")
|
||||
if urlData.baseRef:
|
||||
self.fd.write("<tr><td>"+_("Base")+"</td><td>"+
|
||||
urlData.baseRef+RowEnd)
|
||||
urlData.baseRef+"</td></tr>\n")
|
||||
if urlData.url:
|
||||
self.fd.write("<tr><td>"+_("Real URL")+"</td><td>"+
|
||||
"<a href=\""+urlData.url+
|
||||
'">'+urlData.url+"</a>"+RowEnd)
|
||||
'">'+urlData.url+"</a></td></tr>\n")
|
||||
if urlData.downloadtime:
|
||||
self.fd.write("<tr><td>"+_("D/L Time")+"</td><td>"+
|
||||
(_("%.3f seconds") % urlData.downloadtime)+RowEnd)
|
||||
(_("%.3f seconds") % urlData.downloadtime)+
|
||||
"</td></tr>\n")
|
||||
if urlData.checktime:
|
||||
self.fd.write("<tr><td>"+_("Check Time")+
|
||||
"</td><td>"+
|
||||
(_("%.3f seconds") % urlData.checktime)+RowEnd)
|
||||
(_("%.3f seconds") % urlData.checktime)+
|
||||
"</td></tr>\n")
|
||||
if urlData.infoString:
|
||||
self.fd.write("<tr><td>"+_("Info")+"</td><td>"+
|
||||
StringUtil.htmlify(urlData.infoString)+RowEnd)
|
||||
StringUtil.htmlify(urlData.infoString)+
|
||||
"</td></tr>\n")
|
||||
if urlData.warningString:
|
||||
self.warnings = self.warnings+1
|
||||
self.fd.write("<tr>"+self.tablewarning+_("Warning")+
|
||||
"</td>"+self.tablewarning+
|
||||
urlData.warningString+RowEnd)
|
||||
string.replace(urlData.warningString,"\n", "<br>")+
|
||||
"</td></tr>\n")
|
||||
if urlData.valid:
|
||||
self.fd.write("<tr>"+self.tableok+_("Result")+"</td>"+
|
||||
self.tableok+urlData.validString+RowEnd)
|
||||
self.tableok+urlData.validString+"</td></tr>\n")
|
||||
else:
|
||||
self.errors = self.errors+1
|
||||
self.fd.write("<tr>"+self.tableerror+_("Result")+
|
||||
"</td>"+self.tableerror+
|
||||
urlData.errorString++RowEnd)
|
||||
urlData.errorString+"</td></tr>\n")
|
||||
|
||||
self.fd.write("</table></td></tr></table><br clear=all><br>")
|
||||
self.fd.flush()
|
||||
|
|
@ -316,6 +322,7 @@ class ColoredLogger(StandardLogger):
|
|||
apply(StandardLogger.__init__, (self,), args)
|
||||
self.colorparent = args['colorparent']
|
||||
self.colorurl = args['colorurl']
|
||||
self.colorname = args['colorname']
|
||||
self.colorreal = args['colorreal']
|
||||
self.colorbase = args['colorbase']
|
||||
self.colorvalid = args['colorvalid']
|
||||
|
|
@ -354,7 +361,12 @@ class ColoredLogger(StandardLogger):
|
|||
self.fd.write(_(" (cached)\n"))
|
||||
else:
|
||||
self.fd.write("\n")
|
||||
|
||||
|
||||
if urlData.name:
|
||||
if self.prefix:
|
||||
self.fd.write("| ")
|
||||
self.fd.write(_("Name")+Spaces["Name"]+self.colorname+
|
||||
urlData.name+self.colorreset+"\n")
|
||||
if urlData.baseRef:
|
||||
if self.prefix:
|
||||
self.fd.write("| ")
|
||||
|
|
@ -590,8 +602,8 @@ class SQLLogger(StandardLogger):
|
|||
def newUrl(self, urlData):
|
||||
self.fd.write("insert into %s(urlname,recursionlevel,parentname,"
|
||||
"baseref,errorstring,validstring,warningstring,infoString,"
|
||||
"valid,url,line,checktime,downloadtime,cached) values "
|
||||
"(%s,%d,%s,%s,%s,%s,%s,%s,%d,%s,%d,%d,%d,%d)%s\n" % \
|
||||
"valid,url,line,name,checktime,downloadtime,cached) values "
|
||||
"(%s,%d,%s,%s,%s,%s,%s,%s,%d,%s,%d,%s,%d,%d,%d)%s\n" % \
|
||||
(self.dbname,
|
||||
StringUtil.sqlify(urlData.urlName),
|
||||
urlData.recursionLevel,
|
||||
|
|
@ -604,6 +616,7 @@ class SQLLogger(StandardLogger):
|
|||
urlData.valid,
|
||||
StringUtil.sqlify(urlData.url),
|
||||
urlData.line,
|
||||
StringUtil.sqlify(urlData.name),
|
||||
urlData.checktime,
|
||||
urlData.downloadtime,
|
||||
urlData.cached,
|
||||
|
|
@ -682,6 +695,7 @@ class CSVLogger(StandardLogger):
|
|||
"# valid;\n"
|
||||
"# url;\n"
|
||||
"# line;\n"
|
||||
"# name;\n"
|
||||
"# downloadtime;\n"
|
||||
"# checktime;\n"
|
||||
"# cached;\n")
|
||||
|
|
@ -689,7 +703,7 @@ class CSVLogger(StandardLogger):
|
|||
|
||||
def newUrl(self, urlData):
|
||||
self.fd.write(
|
||||
"%s%s%d%s%s%s%s%s%s%s%s%s%s%s%s%s%d%s%s%s%d%s%d%s%d%s%d\n" % (
|
||||
"%s%s%d%s%s%s%s%s%s%s%s%s%s%s%s%s%d%s%s%s%d%s%s%s%d%s%d%s%d\n" % (
|
||||
urlData.urlName, self.separator,
|
||||
urlData.recursionLevel, self.separator,
|
||||
urlData.parentName, self.separator,
|
||||
|
|
@ -701,6 +715,7 @@ class CSVLogger(StandardLogger):
|
|||
urlData.valid, self.separator,
|
||||
urlData.url, self.separator,
|
||||
urlData.line, self.separator,
|
||||
urlData.name, self.separator,
|
||||
urlData.downloadtime, self.separator,
|
||||
urlData.checktime, self.separator,
|
||||
urlData.cached))
|
||||
|
|
|
|||
|
|
@ -15,24 +15,17 @@
|
|||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
"""
|
||||
import string,re,sys
|
||||
import string,re,sys,htmlentitydefs
|
||||
|
||||
HtmlTable = [
|
||||
("ä","ä"),
|
||||
("ö","ö"),
|
||||
("ü","ü"),
|
||||
("Ä","Ä"),
|
||||
("Ö","Ö"),
|
||||
("Ü","Ü"),
|
||||
("ß","ß"),
|
||||
("&","&"),
|
||||
("<","<"),
|
||||
(">",">"),
|
||||
("é","é"),
|
||||
("è","è"),
|
||||
("à","à"),
|
||||
("ç","ç"),
|
||||
]
|
||||
HtmlTable = []
|
||||
UnHtmlTable = []
|
||||
for ent,ch in htmlentitydefs.entitydefs.items():
|
||||
HtmlTable.append((ch, "&"+ent+";"))
|
||||
UnHtmlTable.append(("&"+ent+";", ch))
|
||||
# order matters!
|
||||
HtmlTable.sort()
|
||||
UnHtmlTable.sort()
|
||||
UnHtmlTable.reverse()
|
||||
|
||||
SQLTable = [
|
||||
("'","''")
|
||||
|
|
@ -160,6 +153,8 @@ def htmlify(str):
|
|||
"Escape special HTML chars and strings"
|
||||
return applyTable(HtmlTable, str)
|
||||
|
||||
def unhtmlify(str):
|
||||
return applyTable(UnHtmlTable, str)
|
||||
|
||||
def getLineNumber(str, index):
|
||||
"return the line number of str[index]"
|
||||
|
|
@ -184,3 +179,7 @@ def paginate(text, lines=22):
|
|||
print "press return to continue..."
|
||||
sys.stdin.read(1)
|
||||
|
||||
|
||||
if __name__=='__main__':
|
||||
print htmlify("äöü")
|
||||
print unhtmlify("ä ä&auml;")
|
||||
|
|
|
|||
|
|
@ -52,19 +52,25 @@ _linkMatcher = r"""
|
|||
> # close tag
|
||||
"""
|
||||
|
||||
LinkPatterns = (
|
||||
re.compile(_linkMatcher % ("a", "href"), re.VERBOSE),
|
||||
re.compile(_linkMatcher % ("img", "src"), re.VERBOSE),
|
||||
re.compile(_linkMatcher % ("form", "action"), re.VERBOSE),
|
||||
re.compile(_linkMatcher % ("body", "background"), re.VERBOSE),
|
||||
re.compile(_linkMatcher % ("frame", "src"), re.VERBOSE),
|
||||
re.compile(_linkMatcher % ("link", "href"), re.VERBOSE),
|
||||
# <meta http-equiv="refresh" content="x; url=...">
|
||||
re.compile(_linkMatcher % ("meta", "url"), re.VERBOSE),
|
||||
re.compile(_linkMatcher % ("area", "href"), re.VERBOSE),
|
||||
re.compile(_linkMatcher % ("script", "src"), re.VERBOSE),
|
||||
LinkTags = (
|
||||
("a", "href"),
|
||||
("img", "src"),
|
||||
("form", "action"),
|
||||
("body", "background"),
|
||||
("frame", "src"),
|
||||
("link", "href"),
|
||||
("meta", "url"), # <meta http-equiv="refresh" content="x; url=...">
|
||||
("area", "href"),
|
||||
("script", "src"),
|
||||
)
|
||||
|
||||
LinkPatterns = []
|
||||
for tag,attr in LinkTags:
|
||||
pattern = re.compile(_linkMatcher % (tag, attr), re.VERBOSE)
|
||||
pattern.tag = tag
|
||||
pattern.attr = attr
|
||||
LinkPatterns.append(pattern)
|
||||
|
||||
class UrlData:
|
||||
"Representing a URL with additional information like validity etc"
|
||||
|
||||
|
|
@ -73,7 +79,8 @@ class UrlData:
|
|||
recursionLevel,
|
||||
parentName = None,
|
||||
baseRef = None,
|
||||
line = 0):
|
||||
line = 0,
|
||||
name = ""):
|
||||
self.urlName = urlName
|
||||
self.recursionLevel = recursionLevel
|
||||
self.parentName = parentName
|
||||
|
|
@ -85,6 +92,7 @@ class UrlData:
|
|||
self.valid = 1
|
||||
self.url = None
|
||||
self.line = line
|
||||
self.name = name
|
||||
self.downloadtime = 0
|
||||
self.checktime = 0
|
||||
self.cached = 0
|
||||
|
|
@ -252,8 +260,10 @@ class UrlData:
|
|||
if not (anchor!="" and self.isHtml() and self.valid):
|
||||
return
|
||||
self.getContent()
|
||||
for cur_anchor,line in self.searchInForTag(
|
||||
re.compile(_linkMatcher % ("a", "name"), re.VERBOSE)):
|
||||
pattern = re.compile(_linkMatcher % ("a", "name"), re.VERBOSE)
|
||||
pattern.tag = "a"
|
||||
pattern.attr = "name"
|
||||
for cur_anchor,line in self.searchInForTag(pattern):
|
||||
if cur_anchor == anchor:
|
||||
return
|
||||
self.setWarning("anchor #"+anchor+" not found")
|
||||
|
|
@ -321,8 +331,11 @@ class UrlData:
|
|||
debug(Config.DebugDelim+"Parsing recursively into\n"+\
|
||||
str(self)+"\n"+Config.DebugDelim)
|
||||
# search for a possible base reference
|
||||
bases = self.searchInForTag(re.compile(_linkMatcher % ("base",
|
||||
"href"), re.VERBOSE))
|
||||
pattern = re.compile(_linkMatcher % ("base", "href"), re.VERBOSE)
|
||||
pattern.tag = "base"
|
||||
pattern.attr = "href"
|
||||
bases = self.searchInForTag(pattern)
|
||||
|
||||
baseRef = None
|
||||
if len(bases)>=1:
|
||||
baseRef = bases[0][0]
|
||||
|
|
@ -332,12 +345,14 @@ class UrlData:
|
|||
# search for tags and add found tags to URL queue
|
||||
for pattern in LinkPatterns:
|
||||
urls = self.searchInForTag(pattern)
|
||||
for url,line in urls:
|
||||
for url,line,name in urls:
|
||||
config.appendUrl(GetUrlDataFrom(url,
|
||||
self.recursionLevel+1, self.url, baseRef, line))
|
||||
self.recursionLevel+1, self.url, baseRef, line, name))
|
||||
|
||||
|
||||
def searchInForTag(self, pattern):
|
||||
debug("Searching for tag %s, attribute %s" \
|
||||
% (pattern.tag, pattern.attr))
|
||||
urls = []
|
||||
index = 0
|
||||
while 1:
|
||||
|
|
@ -346,26 +361,46 @@ class UrlData:
|
|||
index = match.end()
|
||||
if self._isInComment(match.start()): continue
|
||||
# need to strip optional ending quotes for the meta tag
|
||||
urls.append((string.strip(StringUtil.stripQuotes(match.group('value'))),
|
||||
StringUtil.getLineNumber(self.getContent(),
|
||||
match.start())))
|
||||
url = string.strip(StringUtil.stripQuotes(match.group('value')))
|
||||
lineno=StringUtil.getLineNumber(self.getContent(), match.start())
|
||||
# extra feature: get optional name for this bookmark
|
||||
name = self.searchInForName(pattern.tag, pattern.attr,
|
||||
match.start(), match.end())
|
||||
urls.append((url, lineno, name))
|
||||
return urls
|
||||
|
||||
def searchInForName(self, tag, attr, start, end):
|
||||
name=""
|
||||
if tag=='img':
|
||||
all = self.getContent()[start:end]
|
||||
mo = re.search("(?i)\s+alt\s*=\s*(?P<name>(\".+?\"|[^\s>]+))", all)
|
||||
if mo:
|
||||
name = StringUtil.stripQuotes(mo.group('name'))
|
||||
name = StringUtil.unhtmlify(name)
|
||||
elif tag=='a' and attr=='href':
|
||||
all = self.getContent()[end:]
|
||||
mo = re.search("(?i)(?P<name>.*?)</a\s*>", all)
|
||||
if mo:
|
||||
name = mo.group('name')
|
||||
return name
|
||||
|
||||
|
||||
def get_scheme(self):
|
||||
return "no"
|
||||
return "none"
|
||||
|
||||
def __str__(self):
|
||||
return """%s link
|
||||
urlname=%s
|
||||
parentName=%s
|
||||
baseRef=%s
|
||||
cached=%s
|
||||
recursionLevel=%s
|
||||
urlConnection=%s
|
||||
line=%s""" % \
|
||||
(self.get_scheme(), self.urlName, self.parentName, self.baseRef,
|
||||
self.cached, self.recursionLevel, self.urlConnection, self.line)
|
||||
return ("%s link\n"
|
||||
"urlname=%s\n"
|
||||
"parentName=%s\n"
|
||||
"baseRef=%s\n"
|
||||
"cached=%s\n"
|
||||
"recursionLevel=%s\n"
|
||||
"urlConnection=%s\n"
|
||||
"line=%s\n"
|
||||
"name=%s" % \
|
||||
(self.get_scheme(), self.urlName, self.parentName, self.baseRef,
|
||||
self.cached, self.recursionLevel, self.urlConnection, self.line,
|
||||
self.name))
|
||||
|
||||
|
||||
def _getUserPassword(self, config):
|
||||
|
|
@ -376,6 +411,7 @@ line=%s""" % \
|
|||
|
||||
|
||||
from FileUrlData import FileUrlData
|
||||
from FindUrlData import FindUrlData
|
||||
from FtpUrlData import FtpUrlData
|
||||
from GopherUrlData import GopherUrlData
|
||||
from HttpUrlData import HttpUrlData
|
||||
|
|
@ -385,37 +421,38 @@ from MailtoUrlData import MailtoUrlData
|
|||
from TelnetUrlData import TelnetUrlData
|
||||
from NntpUrlData import NntpUrlData
|
||||
|
||||
def GetUrlDataFrom(urlName,
|
||||
recursionLevel,
|
||||
parentName = None,
|
||||
baseRef = None, line = 0):
|
||||
def GetUrlDataFrom(urlName, recursionLevel, parentName = None,
|
||||
baseRef = None, line = 0, name = None):
|
||||
# search for the absolute url
|
||||
name=""
|
||||
url=""
|
||||
if urlName and ":" in urlName:
|
||||
name = string.lower(urlName)
|
||||
url = string.lower(urlName)
|
||||
elif baseRef and ":" in baseRef:
|
||||
name = string.lower(baseRef)
|
||||
url = string.lower(baseRef)
|
||||
elif parentName and ":" in parentName:
|
||||
name = string.lower(parentName)
|
||||
url = string.lower(parentName)
|
||||
# test scheme
|
||||
if re.search("^http:", name):
|
||||
return HttpUrlData(urlName, recursionLevel, parentName, baseRef, line)
|
||||
if re.search("^ftp:", name):
|
||||
return FtpUrlData(urlName, recursionLevel, parentName, baseRef, line)
|
||||
if re.search("^file:", name):
|
||||
return FileUrlData(urlName, recursionLevel, parentName, baseRef, line)
|
||||
if re.search("^telnet:", name):
|
||||
return TelnetUrlData(urlName, recursionLevel, parentName, baseRef, line)
|
||||
if re.search("^mailto:", name):
|
||||
return MailtoUrlData(urlName, recursionLevel, parentName, baseRef, line)
|
||||
if re.search("^gopher:", name):
|
||||
return GopherUrlData(urlName, recursionLevel, parentName, baseRef, line)
|
||||
if re.search("^javascript:", name):
|
||||
return JavascriptUrlData(urlName, recursionLevel, parentName, baseRef, line)
|
||||
if re.search("^https:", name):
|
||||
return HttpsUrlData(urlName, recursionLevel, parentName, baseRef, line)
|
||||
if re.search("^(s?news|nntp):", name):
|
||||
return NntpUrlData(urlName, recursionLevel, parentName, baseRef, line)
|
||||
if re.search("^http:", url):
|
||||
return HttpUrlData(urlName, recursionLevel, parentName, baseRef, line, name)
|
||||
if re.search("^ftp:", url):
|
||||
return FtpUrlData(urlName, recursionLevel, parentName, baseRef, line, name)
|
||||
if re.search("^file:", url):
|
||||
return FileUrlData(urlName, recursionLevel, parentName, baseRef, line, name)
|
||||
if re.search("^telnet:", url):
|
||||
return TelnetUrlData(urlName, recursionLevel, parentName, baseRef, line, name)
|
||||
if re.search("^mailto:", url):
|
||||
return MailtoUrlData(urlName, recursionLevel, parentName, baseRef, line, name)
|
||||
if re.search("^gopher:", url):
|
||||
return GopherUrlData(urlName, recursionLevel, parentName, baseRef, line, name)
|
||||
if re.search("^javascript:", url):
|
||||
return JavascriptUrlData(urlName, recursionLevel, parentName, baseRef, line, name)
|
||||
if re.search("^https:", url):
|
||||
return HttpsUrlData(urlName, recursionLevel, parentName, baseRef, line, name)
|
||||
if re.search("^(s?news|nntp):", url):
|
||||
return NntpUrlData(urlName, recursionLevel, parentName, baseRef, line, name)
|
||||
# Mozillas Technology links start with "find:"
|
||||
if re.search("^find:", url):
|
||||
return FindUrlData(urlName, recursionLevel, parentName, baseRef, line, name)
|
||||
# assume local file
|
||||
return FileUrlData(urlName, recursionLevel, parentName, baseRef, line)
|
||||
return FileUrlData(urlName, recursionLevel, parentName, baseRef, line, name)
|
||||
|
||||
|
|
|
|||
3
po/de.po
3
po/de.po
|
|
@ -130,6 +130,9 @@ msgstr "Sekunden"
|
|||
msgid "URL"
|
||||
msgstr "URL"
|
||||
|
||||
msgid "Name"
|
||||
msgstr ""
|
||||
|
||||
msgid "URL is null or empty"
|
||||
msgstr "URL ist Null oder leer"
|
||||
|
||||
|
|
|
|||
3
po/fr.po
3
po/fr.po
|
|
@ -122,6 +122,9 @@ msgstr "secondes"
|
|||
msgid "URL"
|
||||
msgstr ""
|
||||
|
||||
msgid "Name"
|
||||
msgstr ""
|
||||
|
||||
msgid "URL is null or empty"
|
||||
msgstr "L'URL est nulle ou vide"
|
||||
|
||||
|
|
|
|||
|
|
@ -1,25 +1,23 @@
|
|||
Just some HTTP links
|
||||
<a href="http://www.garantiertnixgutt.bla"> <!-- bad url -->
|
||||
<a href="http://www.heise.de"> <!-- ok -->
|
||||
<a href="http:/www.heise.de"> <!-- one slash -->
|
||||
<a href="http:www.heise.de"> <!-- no slash -->
|
||||
<a href="http://"> <!-- no url -->
|
||||
<a href="http:/"> <!-- no url, one slash -->
|
||||
<a href="http:"> <!-- no url, no slash -->
|
||||
<a href="http://www.blubb.de/stalter&sohn"> <!-- unquoted ampersand -->
|
||||
<a name="iswas"> <!-- anchor for test2.html -->
|
||||
<a href=http://slashdot.org/> <!-- unquoted -->
|
||||
<!-- invalid anchor -->
|
||||
<a href="http://treasure.calvinsplayground.de/~calvin/software/#isnix">
|
||||
<!-- authorization (user=calvin, pass=calvin) -->
|
||||
<a href="http://treasure.calvinsplayground.de/~calvin/isnich/">
|
||||
<a href="https://www.heise.de"> <!-- https -->
|
||||
<a href="HtTP://WWW.hEIsE.DE"> <!-- should be cached -->
|
||||
<a href="HTTP://WWW.HEISE.DE"> <!-- should be cached -->
|
||||
<a href="http://www.garantiertnixgutt.bla">bad url</a>
|
||||
<a href="http://www.heise.de">ok</a>
|
||||
<a href="http:/www.heise.de">one slash</a>
|
||||
<a href="http:www.heise.de">no slash</a>
|
||||
<a href="http://">no url</a>
|
||||
<a href="http:/">no url, one slash</a>
|
||||
<a href="http:">no url, no slash</a>
|
||||
<a href="http://www.blubb.de/stalter&sohn">unquoted ampersand</a>
|
||||
<a name="iswas">anchor for test2.html</a>
|
||||
<a href=http://slashdot.org/>unquoted</a>
|
||||
<a href="http://treasure.calvinsplayground.de/~calvin/software/#isnix"
|
||||
>invalid anchor</a>
|
||||
<a href="http://treasure.calvinsplayground.de/~calvin/isnich/"
|
||||
>authorization (user=calvin, pass=calvin)</a>
|
||||
<a href="https://www.heise.de">https</a>
|
||||
<a href="HtTP://WWW.hEIsE.DE">should be cached</a>
|
||||
<a href="HTTP://WWW.HEISE.DE">should be cached</a>
|
||||
<!-- <a href=http://nocheckin> no check because of comment -->
|
||||
<!-- no beginning quote -->
|
||||
<a href=illegalquote1">
|
||||
<!-- no ending quote -->
|
||||
<a href="illegalquote2>
|
||||
<a href=illegalquote1">no beginning quote</a>
|
||||
<a href="illegalquote2>no ending quote</a>
|
||||
<!-- check the parser at end of file -->
|
||||
<a href="g
|
||||
Loading…
Reference in a new issue