news: link support

git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@52 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
calvin 2000-03-30 17:10:35 +00:00
parent 78acafb299
commit 1ad9675c2d
9 changed files with 31 additions and 13 deletions

View file

@ -1,3 +1,6 @@
30.3.2000
* support for NNTP news: links
30.3.2000 Version 1.1.4
* fixed missing self.mime assignment in HttpUrlData.py

View file

@ -6,7 +6,8 @@ You need Python >= 1.5.2
You get Python from http://www.python.org
Optionally packages:
Distutils from http://www.python.org/sigs/distutils-sig/
Distutils from http://www.python.org/sigs/distutils-sig/ (be sure to
get the CVS version, not the latest release!)
OpenSSL from http://www.openssl.org
Installation with Distutils:

2
TODO
View file

@ -15,4 +15,6 @@ o Internationalization
o Parse GML output and make a site map graphic (PNG format)
o Blacklist mode: only report links who fail over a certain amount of time
Low priority

View file

@ -2,7 +2,7 @@ import ConfigParser,sys,os,re,UserDict,string
from os.path import expanduser,normpath,normcase,join,isfile
import Logging
Version = "1.1.4"
Version = "1.2.0"
AppName = "LinkChecker"
App = AppName+" "+Version
UserAgent = AppName+"/"+Version
@ -20,9 +20,9 @@ under certain conditions. Look at the file `LICENSE' whithin this
distribution."""
Loggers = {"text": Logging.StandardLogger,
"html": Logging.HtmlLogger,
"colored": Logging.ColoredLogger,
"gml": Logging.GMLLogger,
"sql": Logging.SQLLogger}
"colored": Logging.ColoredLogger,
"gml": Logging.GMLLogger,
"sql": Logging.SQLLogger}
LoggerKeys = reduce(lambda x, y: x+", "+y, Loggers.keys())
DebugDelim = "==========================================================\n"
DebugFlag = 0
@ -57,6 +57,7 @@ class Configuration(UserDict.UserDict):
self.data["fileoutput"] = []
self.data["quiet"] = 0
self.data["warningregex"] = None
self.data["nntpserver"] = os.environ.get("NNTP_SERVER",None)
self.urlCache = {}
self.robotsTxtCache = {}
try:
@ -302,6 +303,9 @@ class Configuration(UserDict.UserDict):
self.data["warningregex"] = re.compile(cfgparser.get(section,
"warningregex"))
except ConfigParser.Error: pass
try:
self.data["nntpserver"] = cfgparser.get(section, "nntpserver")
except ConfigParser.Error: pass
section = "authentication"
try:

View file

@ -25,9 +25,6 @@ class HostCheckingUrlData(UrlData):
ip = socket.gethostbyname(self.host)
self.setValid(self.host+"("+ip+") found")
def closeConnection(self):
UrlData.closeConnection(self)
def __str__(self):
return "host="+`self.host`+"\n"+UrlData.__str__(self)

View file

@ -126,7 +126,7 @@ class HttpUrlData(UrlData):
self.urlConnection = self.urlConnection.getfile()
self.data = StringUtil.stripHtmlComments(self.urlConnection.read())
self.downloadtime = time.time() - t
Config.debug(Config.DebugDelim+self.data+Config.DebugDelim)
#Config.debug(Config.DebugDelim+self.data+Config.DebugDelim)
def isHtml(self):
if not (self.valid and self.mime):

View file

@ -291,6 +291,7 @@ from HttpsUrlData import HttpsUrlData
from JavascriptUrlData import JavascriptUrlData
from MailtoUrlData import MailtoUrlData
from TelnetUrlData import TelnetUrlData
from NntpUrlData import NntpUrlData
def GetUrlDataFrom(urlName,
recursionLevel,
@ -321,6 +322,8 @@ def GetUrlDataFrom(urlName,
return JavascriptUrlData(urlName, recursionLevel, parentName, baseRef, line)
if re.compile("^https:").search(name):
return HttpsUrlData(urlName, recursionLevel, parentName, baseRef, line)
if re.compile("^news:").search(name):
return NntpUrlData(urlName, recursionLevel, parentName, baseRef, line)
# assume local file
return FileUrlData(urlName, recursionLevel, parentName, baseRef, line)

View file

@ -36,6 +36,10 @@ OPTIONS
-l, --allowdeny
Swap checking order to intern/extern. Default checking order
is extern/intern.
-N, --nntp-server
Specify an NNTP server for news: links. Default is the
environment variable NNTP_SERVER. If the variable is not defined,
only the syntax of the link is checked.
-o name, --output=name
Specify output as """+linkcheck.Config.LoggerKeys+""".
Default is text.
@ -89,8 +93,7 @@ o If you have your system configured to automatically establish a
o Javascript links are currently ignored
o If your platform does not support threading, linkchecker assumes -t0
o You can supply multiple user/password pairs in a configuration file
o Cookies are not accepted by LinkChecker and never will be. Cookies
are dumb. Use Zope with session support!
o Cookies are not accepted by LinkChecker.
"""
Examples = """EXAMPLES
@ -122,12 +125,13 @@ def printUsage(msg):
try:
# Note: cut out the name of the script
options, args = getopt.getopt(sys.argv[1:],
"aDe:f:F:hi:lP:o:p:qr:Rst:u:VvwW:", # short options
"aDe:f:F:hi:lN:P:o:p:qr:Rst:u:VvwW:", # short options
["anchors", # long options
"config=",
"debug",
"extern=",
"file-output=",
"nntp-server=",
"help",
"intern=",
"allowdeny",
@ -197,6 +201,9 @@ for opt,arg in options:
elif opt=="-l" or opt=="--allowdeny":
config["allowdeny"] = 1
elif opt=="-N" or opt=="--nntp-server":
config["nntpserver"] = arg
elif opt=="-P" or opt=="--proxy":
proxy = re.compile("(.+):(.+)").match(arg)
if proxy:

View file

@ -17,7 +17,8 @@
#strict=0
#proxy=www-proxy.uni-sb.de
#proxyport=3128
#warningregex=
#warningregex="Request failed"
#nntpserver=news.uni-stuttgart.de
# for each extern link we can specify if it is strict or not
[filtering]