mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-04-04 23:20:34 +00:00
nntp: support
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@136 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
parent
dd4544ed81
commit
2ded7acf7f
5 changed files with 76 additions and 67 deletions
4
debian/changelog
vendored
4
debian/changelog
vendored
|
|
@ -5,8 +5,10 @@ linkchecker (1.2.4) unstable; urgency=low
|
|||
* fix for HTTP HEAD requests from bad/dumb servers
|
||||
* CGI script fixes
|
||||
* LinkChecker Online HTML pages added
|
||||
* news: link support
|
||||
* fixed parsing of extern? config file options
|
||||
|
||||
-- Bastian Kleineidam <calvin@users.sourceforge.net> Tue, 11 Jul 2000 18:15:07 +0200
|
||||
-- Bastian Kleineidam <calvin@users.sourceforge.net> Fri, 14 Jul 2000 05:23:08 +0200
|
||||
|
||||
linkchecker (1.2.3) unstable; urgency=low
|
||||
|
||||
|
|
|
|||
|
|
@ -188,7 +188,6 @@ class Configuration(UserDict.UserDict):
|
|||
self.logLock = None
|
||||
self.urls = []
|
||||
self.threader = None
|
||||
self.connectNntp = self.connectNntp_NoThreads
|
||||
self.dataLock = None
|
||||
|
||||
def enableThreading(self, num):
|
||||
|
|
@ -217,7 +216,6 @@ class Configuration(UserDict.UserDict):
|
|||
self.logLock = Lock()
|
||||
self.urls = Queue.Queue(0)
|
||||
self.threader = Threader.Threader(num)
|
||||
self.connectNntp = self.connectNntp_Threads
|
||||
self.dataLock = Lock()
|
||||
|
||||
def hasMoreUrls_NoThreads(self):
|
||||
|
|
@ -278,41 +276,12 @@ class Configuration(UserDict.UserDict):
|
|||
for log in self.data["fileoutput"]:
|
||||
log.endOfOutput(linknumber=self.data['linknumber'])
|
||||
|
||||
def connectNntp_NoThreads(self):
|
||||
if not self.data.has_key("nntp"):
|
||||
self._do_connectNntp()
|
||||
|
||||
def connectNntp_Threads(self):
|
||||
if not self.data.has_key("nntp"):
|
||||
try:
|
||||
self.dataLock.acquire()
|
||||
self._do_connectNntp()
|
||||
finally:
|
||||
self.dataLock.release()
|
||||
|
||||
def incrementLinknumber_Threads(self):
|
||||
try:
|
||||
self.dataLock.acquire()
|
||||
self.data['linknumber'] = self.data['linknumber'] + 1
|
||||
finally:
|
||||
self.dataLock.release()
|
||||
|
||||
def _do_connectNntp(self):
|
||||
"""This is done only once per checking task."""
|
||||
import nntplib
|
||||
timeout = 1
|
||||
while timeout:
|
||||
try:
|
||||
self.data["nntp"]=nntplib.NNTP(self.data["nntpserver"] or "")
|
||||
timeout = 0
|
||||
except nntplib.error_perm:
|
||||
value = sys.exc_info()[1]
|
||||
debug("NNTP: "+value+"\n")
|
||||
if re.compile("^505").search(str(value)):
|
||||
import whrandom
|
||||
time.sleep(whrandom.randint(10,20))
|
||||
else:
|
||||
raise
|
||||
|
||||
def hasMoreUrls_Threads(self):
|
||||
return not self.urls.empty()
|
||||
|
|
@ -320,7 +289,7 @@ class Configuration(UserDict.UserDict):
|
|||
def finished_Threads(self):
|
||||
time.sleep(0.1)
|
||||
self.threader.reduceThreads()
|
||||
debug("finished?\n")
|
||||
#debug("finished?\n")
|
||||
return self.threader.finished() and self.urls.empty()
|
||||
|
||||
def finish_Threads(self):
|
||||
|
|
|
|||
|
|
@ -15,10 +15,10 @@
|
|||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
"""
|
||||
import re,string,time,nntplib,linkcheck
|
||||
from HostCheckingUrlData import HostCheckingUrlData
|
||||
import re,string,time,nntplib,urlparse,linkcheck
|
||||
from linkcheck import _
|
||||
from UrlData import ExcList
|
||||
from UrlData import ExcList,UrlData
|
||||
debug = linkcheck.Config.debug
|
||||
|
||||
ExcList.extend([nntplib.error_reply,
|
||||
nntplib.error_temp,
|
||||
|
|
@ -26,32 +26,65 @@ ExcList.extend([nntplib.error_reply,
|
|||
nntplib.error_proto,
|
||||
])
|
||||
|
||||
nntp_re = re.compile("^news:[\w.\-]+$")
|
||||
|
||||
class NntpUrlData(HostCheckingUrlData):
|
||||
class NntpUrlData(UrlData):
|
||||
"Url link with NNTP scheme"
|
||||
|
||||
def buildUrl(self):
|
||||
HostCheckingUrlData.buildUrl(self)
|
||||
if not nntp_re.match(self.urlName):
|
||||
raise linkcheck.error, _("Illegal NNTP link syntax")
|
||||
self.host = string.lower(self.urlName[5:])
|
||||
# use nntp instead of news to comply with the unofficial internet
|
||||
# draft of Alfred Gilman which unifies (s)news and nntp URLs
|
||||
# note: we use this only internally (for parsing and caching)
|
||||
if string.lower(self.urlName[:4])=='news':
|
||||
self.url = 'nntp'+self.urlName[4:]
|
||||
else:
|
||||
self.url = self.urlName
|
||||
self.urlTuple = urlparse.urlparse(self.url)
|
||||
debug("DEBUG: %s\n" % `self.urlTuple`)
|
||||
|
||||
|
||||
def checkConnection(self, config):
|
||||
if not config["nntpserver"]:
|
||||
self.setWarning(_("No NNTP server specified, checked only syntax"))
|
||||
config.connectNntp()
|
||||
nntp = config["nntp"]
|
||||
resp,count,first,last,name = nntp.group(self.host)
|
||||
self.setInfo(_("Group %s has %s articles, range %s to %s") % \
|
||||
(name, count, first, last))
|
||||
nntpserver = self.urlTuple[1] or config["nntpserver"]
|
||||
if not nntpserver:
|
||||
self.setWarning(_("No NNTP server specified, skipping this URL"))
|
||||
return
|
||||
nntp = self._connectNntp(nntpserver)
|
||||
group = self.urlTuple[2]
|
||||
if group[:1]=='/':
|
||||
group = group[1:]
|
||||
if '@' in group:
|
||||
# request article
|
||||
resp,number,id = nntp.stat("<"+group+">")
|
||||
self.setInfo(_('Articel number %s found' % number))
|
||||
else:
|
||||
# split off trailing articel span
|
||||
group = string.split(group,'/',1)[0]
|
||||
# request group info
|
||||
resp,count,first,last,name = nntp.group(self.urlTuple[2])
|
||||
self.setInfo(_("Group %s has %s articles, range %s to %s") % \
|
||||
(name, count, first, last))
|
||||
|
||||
|
||||
def _connectNntp(self, nntpserver):
|
||||
"""This is done only once per checking task."""
|
||||
timeout = 1
|
||||
while timeout:
|
||||
try:
|
||||
nntp=nntplib.NNTP(nntpserver)
|
||||
timeout = 0
|
||||
except nntplib.error_perm:
|
||||
value = sys.exc_info()[1]
|
||||
debug("NNTP: "+value+"\n")
|
||||
if re.compile("^505").search(str(value)):
|
||||
import whrandom
|
||||
time.sleep(whrandom.randint(10,20))
|
||||
else:
|
||||
raise
|
||||
return nntp
|
||||
|
||||
|
||||
def getCacheKey(self):
|
||||
return "news:"+HostCheckingUrlData.getCacheKey(self)
|
||||
return self.url
|
||||
|
||||
|
||||
def __str__(self):
|
||||
return "NNTP link\n"+HostCheckingUrlData.__str__(self)
|
||||
return "NNTP link\n"+self.urlName
|
||||
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@
|
|||
import sys,re,string,urlparse,urllib,time
|
||||
import Config,StringUtil,linkcheck
|
||||
from linkcheck import _
|
||||
debug = linkcheck.Config.debug
|
||||
|
||||
ExcList = [
|
||||
IOError,
|
||||
|
|
@ -136,7 +137,7 @@ class UrlData:
|
|||
|
||||
|
||||
def logMe(self, config):
|
||||
Config.debug("DEBUG: logging url\n")
|
||||
debug("DEBUG: logging url\n")
|
||||
config.incrementLinknumber()
|
||||
if config["verbose"] or not self.valid or \
|
||||
(self.warningString and config["warnings"]):
|
||||
|
|
@ -144,11 +145,11 @@ class UrlData:
|
|||
|
||||
|
||||
def check(self, config):
|
||||
Config.debug(Config.DebugDelim+"Checking\n"+str(self)+"\n"+\
|
||||
debug(Config.DebugDelim+"Checking\n"+str(self)+"\n"+\
|
||||
Config.DebugDelim)
|
||||
t = time.time()
|
||||
# check syntax
|
||||
Config.debug("DEBUG: checking syntax\n")
|
||||
debug("DEBUG: checking syntax\n")
|
||||
if not self.urlName or self.urlName=="":
|
||||
self.setError(_("URL is null or empty"))
|
||||
self.logMe(config)
|
||||
|
|
@ -163,7 +164,7 @@ class UrlData:
|
|||
return
|
||||
|
||||
# check the cache
|
||||
Config.debug("DEBUG: checking cache\n")
|
||||
debug("DEBUG: checking cache\n")
|
||||
if config.urlCache_has_key(self.getCacheKey()):
|
||||
self.copyFrom(config.urlCache_get(self.getCacheKey()))
|
||||
self.cached = 1
|
||||
|
|
@ -171,14 +172,14 @@ class UrlData:
|
|||
return
|
||||
|
||||
# apply filter
|
||||
Config.debug("DEBUG: checking filter\n")
|
||||
debug("DEBUG: checking filter\n")
|
||||
if self.extern and (config["strict"] or self.extern[1]):
|
||||
self.setWarning(_("outside of domain filter, checked only syntax"))
|
||||
self.logMe(config)
|
||||
return
|
||||
|
||||
# check connection
|
||||
Config.debug("DEBUG: checking connection\n")
|
||||
debug("DEBUG: checking connection\n")
|
||||
try:
|
||||
self.checkConnection(config)
|
||||
if self.urlTuple and config["anchors"]:
|
||||
|
|
@ -190,12 +191,12 @@ class UrlData:
|
|||
# check content
|
||||
warningregex = config["warningregex"]
|
||||
if warningregex and self.valid:
|
||||
Config.debug("DEBUG: checking content\n")
|
||||
debug("DEBUG: checking content\n")
|
||||
self.checkContent(warningregex)
|
||||
|
||||
self.checktime = time.time() - t
|
||||
# check recursion
|
||||
Config.debug("DEBUG: checking recursion\n")
|
||||
debug("DEBUG: checking recursion\n")
|
||||
if self.allowsRecursion(config):
|
||||
self.parseUrl(config)
|
||||
self.closeConnection()
|
||||
|
|
@ -280,7 +281,7 @@ class UrlData:
|
|||
self.data = self.urlConnection.read()
|
||||
self.downloadtime = time.time() - t
|
||||
self._init_html_comments()
|
||||
Config.debug("DEBUG: comment spans %s\n" % self.html_comments)
|
||||
debug("DEBUG: comment spans %s\n" % self.html_comments)
|
||||
return self.data
|
||||
|
||||
|
||||
|
|
@ -309,8 +310,8 @@ class UrlData:
|
|||
|
||||
|
||||
def parseUrl(self, config):
|
||||
Config.debug(Config.DebugDelim+"Parsing recursively into\n"+\
|
||||
str(self)+"\n"+Config.DebugDelim)
|
||||
debug(Config.DebugDelim+"Parsing recursively into\n"+\
|
||||
str(self)+"\n"+Config.DebugDelim)
|
||||
# search for a possible base reference
|
||||
bases = self.searchInForTag(re.compile(_linkMatcher % ("base",
|
||||
"href"), re.VERBOSE))
|
||||
|
|
@ -397,7 +398,7 @@ def GetUrlDataFrom(urlName,
|
|||
return JavascriptUrlData(urlName, recursionLevel, parentName, baseRef, line)
|
||||
if re.search("^https:", name):
|
||||
return HttpsUrlData(urlName, recursionLevel, parentName, baseRef, line)
|
||||
if re.search("^news:", name):
|
||||
if re.search("^(s?news|nntp):", name):
|
||||
return NntpUrlData(urlName, recursionLevel, parentName, baseRef, line)
|
||||
# assume local file
|
||||
return FileUrlData(urlName, recursionLevel, parentName, baseRef, line)
|
||||
|
|
|
|||
|
|
@ -1,6 +1,10 @@
|
|||
<a href="news:comp.os.linux.misc">
|
||||
<a href="news:de.comp.os.unix.linux.misc">
|
||||
<a href="news:comp.lang.python">
|
||||
<a href="news:ist.garantiert.nix">
|
||||
<a href="snews:de.comp.os.unix.linux.misc">
|
||||
<a href="news:">
|
||||
<a href="news:§$%&/´`(§%">
|
||||
<a href="nntp://news.rz.uni-sb.de/comp.lang.python">
|
||||
<a href="nntp://news.rz.uni-sb.de/comp.lang.python/1-5">
|
||||
<a href="nntp://news.rz.uni-sb.de/EFGJG4.7A@deshaw.com">
|
||||
<a href="nntp://news.rz.uni-sb.de/">
|
||||
<a href="news:comp.lang.python/1-5">
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue