linkchecker/linkcheck/OutputReader.py
2000-05-01 11:35:08 +00:00

95 lines
3.3 KiB
Python

"""
Copyright (C) 2000 Bastian Kleineidam
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
"""
import string,re
import UrlData
class ParseException(Exception):
pass
ws = re.compile("\s+")
regex_realUrl = re.compile("^Real URL.+")
regex_result = re.compile("^Result.+")
regex_base = re.compile("^Base.+")
regex_info = re.compile("^Info.+")
regex_warning = re.compile("^Warning.+")
regex_parentUrl = re.compile("^Parent URL.+")
regex_valid = re.compile("^Valid.*")
class OutputReader:
def resetState(self):
self.urlName = None
self.parentName = None
self.baseRef = None
self.info = None
self.warning = None
self.result = None
self.linenumber = 0
self.state = 0
def parse(self, file):
line = file.readline()
url = None
urls = []
self.resetState()
while line:
if ws.match(line):
if self.state>=2:
#append url
urldata = UrlData.GetUrlDataFrom(self.urlName, 0,
self.parentName, self.baseRef, self.linenumber)
if self.info:
urldata.setInfo(self.info)
if self.warning:
urldata.setWarning(self.info)
if OutputReader.regex_valid.match(self.result):
urldata.valid=1
urldata.validString = self.result
else:
urldata.valid=0
urldata.errorString = self.result
urls.append(urldata)
elif self.state:
raise ParseException, "No Real URL and Result keyword found"
self.resetState()
elif regex_realUrl.match(line):
self.state = self.state+1
self.urlName = string.strip(line[8:])
elif regex_result.match(line):
self.state = self.state+1
self.result = string.strip(line[6:])
elif regex_info.match(line):
self.info = string.strip(line[4:])
elif regex_base.match(line):
self.baseRef = string.strip(line[4:])
elif regex_warning.match(line):
self.warning = string.strip(line[7:])
elif regex_parentUrl.match(line):
self.parentName = string.strip(line[10:])
if ',' in self.parentName:
self.parentName,self.linenumber = string.split(self.parentName,",",1)
else:
pass
line = file.readline()
return urls