linkchecker/linkcheck/OutputReader.py
2000-03-21 11:38:22 +00:00

78 lines
2.5 KiB
Python

import string,re
import UrlData
class ParseException(Exception):
pass
ws = re.compile("\s+")
regex_realUrl = re.compile("^Real URL.+")
regex_result = re.compile("^Result.+")
regex_base = re.compile("^Base.+")
regex_info = re.compile("^Info.+")
regex_warning = re.compile("^Warning.+")
regex_parentUrl = re.compile("^Parent URL.+")
regex_valid = re.compile("^Valid.*")
class OutputReader:
def resetState(self):
self.urlName = None
self.parentName = None
self.baseRef = None
self.info = None
self.warning = None
self.result = None
self.linenumber = 0
self.state = 0
def parse(self, file):
line = file.readline()
url = None
urls = []
self.resetState()
while line:
if ws.match(line):
if self.state>=2:
#append url
urldata = UrlData.GetUrlDataFrom(self.urlName, 0,
self.parentName, self.baseRef, self.linenumber)
if self.info:
urldata.setInfo(self.info)
if self.warning:
urldata.setWarning(self.info)
if OutputReader.regex_valid.match(self.result):
urldata.valid=1
urldata.validString = self.result
else:
urldata.valid=0
urldata.errorString = self.result
urls.append(urldata)
elif self.state:
raise ParseException, "No Real URL and Result keyword found"
self.resetState()
elif regex_realUrl.match(line):
self.state = self.state+1
self.urlName = string.strip(line[8:])
elif regex_result.match(line):
self.state = self.state+1
self.result = string.strip(line[6:])
elif regex_info.match(line):
self.info = string.strip(line[4:])
elif regex_base.match(line):
self.baseRef = string.strip(line[4:])
elif regex_warning.match(line):
self.warning = string.strip(line[7:])
elif regex_parentUrl.match(line):
self.parentName = string.strip(line[10:])
if ',' in self.parentName:
self.parentName,self.linenumber = string.split(self.parentName,",",1)
else:
pass
line = file.readline()
return urls