From e25ea13fa7d2a7b6dd5256aa4bd2bdc947c7817e Mon Sep 17 00:00:00 2001 From: calvin Date: Mon, 16 Aug 2004 19:28:42 +0000 Subject: [PATCH] added git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@1426 e7d03fd6-7b0d-0410-9947-9c21f3af8025 --- config/reindent.py | 283 + linkcheck/HtmlParser/Makefile | 24 + linkcheck/HtmlParser/__init__.py | 114 + linkcheck/HtmlParser/htmllex.c | 12138 +++++++++++++++++++ linkcheck/HtmlParser/htmllex.l | 1042 ++ linkcheck/HtmlParser/htmllib.py | 101 + linkcheck/HtmlParser/htmlparse.c | 2057 ++++ linkcheck/HtmlParser/htmlparse.h | 72 + linkcheck/HtmlParser/htmlparse.y | 852 ++ linkcheck/HtmlParser/htmlsax.h | 83 + linkcheck/HtmlParser/s_util.c | 52 + linkcheck/HtmlParser/s_util.h | 14 + linkcheck/dns/ChangeLog | 27 + linkcheck/dns/__init__.py | 50 + linkcheck/dns/dnssec.py | 63 + linkcheck/dns/exception.py | 41 + linkcheck/dns/flags.py | 107 + linkcheck/dns/ifconfig.py | 104 + linkcheck/dns/inet.py | 91 + linkcheck/dns/ipv4.py | 37 + linkcheck/dns/ipv6.py | 158 + linkcheck/dns/message.py | 905 ++ linkcheck/dns/name.py | 586 + linkcheck/dns/namedict.py | 60 + linkcheck/dns/node.py | 173 + linkcheck/dns/opcode.py | 105 + linkcheck/dns/query.py | 304 + linkcheck/dns/rcode.py | 120 + linkcheck/dns/rdata.py | 441 + linkcheck/dns/rdataclass.py | 115 + linkcheck/dns/rdataset.py | 325 + linkcheck/dns/rdatatype.py | 216 + linkcheck/dns/rdtypes/ANY/AFSDB.py | 52 + linkcheck/dns/rdtypes/ANY/CERT.py | 132 + linkcheck/dns/rdtypes/ANY/CNAME.py | 25 + linkcheck/dns/rdtypes/ANY/DNAME.py | 22 + linkcheck/dns/rdtypes/ANY/DNSKEY.py | 21 + linkcheck/dns/rdtypes/ANY/DS.py | 86 + linkcheck/dns/rdtypes/ANY/GPOS.py | 157 + linkcheck/dns/rdtypes/ANY/HINFO.py | 84 + linkcheck/dns/rdtypes/ANY/ISDN.py | 97 + linkcheck/dns/rdtypes/ANY/KEY.py | 21 + linkcheck/dns/rdtypes/ANY/LOC.py | 342 + linkcheck/dns/rdtypes/ANY/MX.py | 21 + linkcheck/dns/rdtypes/ANY/NS.py | 21 + linkcheck/dns/rdtypes/ANY/NSEC.py | 141 + linkcheck/dns/rdtypes/ANY/NXT.py | 97 + linkcheck/dns/rdtypes/ANY/PTR.py | 21 + linkcheck/dns/rdtypes/ANY/RP.py | 83 + linkcheck/dns/rdtypes/ANY/RRSIG.py | 21 + linkcheck/dns/rdtypes/ANY/RT.py | 21 + linkcheck/dns/rdtypes/ANY/SIG.py | 21 + linkcheck/dns/rdtypes/ANY/SOA.py | 122 + linkcheck/dns/rdtypes/ANY/SSHFP.py | 78 + linkcheck/dns/rdtypes/ANY/TXT.py | 86 + linkcheck/dns/rdtypes/ANY/X25.py | 63 + linkcheck/dns/rdtypes/ANY/__init__.py | 44 + linkcheck/dns/rdtypes/IN/A.py | 60 + linkcheck/dns/rdtypes/IN/AAAA.py | 61 + linkcheck/dns/rdtypes/IN/APL.py | 170 + linkcheck/dns/rdtypes/IN/KX.py | 21 + linkcheck/dns/rdtypes/IN/NAPTR.py | 133 + linkcheck/dns/rdtypes/IN/NSAP.py | 60 + linkcheck/dns/rdtypes/IN/NSAP_PTR.py | 21 + linkcheck/dns/rdtypes/IN/PX.py | 98 + linkcheck/dns/rdtypes/IN/SRV.py | 90 + linkcheck/dns/rdtypes/IN/WKS.py | 114 + linkcheck/dns/rdtypes/IN/__init__.py | 30 + linkcheck/dns/rdtypes/__init__.py | 26 + linkcheck/dns/rdtypes/keybase.py | 151 + linkcheck/dns/rdtypes/mxbase.py | 88 + linkcheck/dns/rdtypes/nsbase.py | 72 + linkcheck/dns/rdtypes/sigbase.py | 169 + linkcheck/dns/renderer.py | 299 + linkcheck/dns/resolver.py | 640 + linkcheck/dns/rrset.py | 169 + linkcheck/dns/set.py | 251 + linkcheck/dns/tests/__init__.py | 1 + linkcheck/dns/tests/example | 204 + linkcheck/dns/tests/example1.good | 105 + linkcheck/dns/tests/example2.good | 105 + linkcheck/dns/tests/test_flags.py | 68 + linkcheck/dns/tests/test_message.py | 160 + linkcheck/dns/tests/test_name.py | 603 + linkcheck/dns/tests/test_namedict.py | 110 + linkcheck/dns/tests/test_ntoaaton.py | 164 + linkcheck/dns/tests/test_rdtypeandclass.py | 133 + linkcheck/dns/tests/test_resolver.py | 89 + linkcheck/dns/tests/test_rrset.py | 62 + linkcheck/dns/tests/test_set.py | 181 + linkcheck/dns/tests/test_tokenizer.py | 184 + linkcheck/dns/tests/test_update.py | 122 + linkcheck/dns/tests/test_zone.py | 360 + linkcheck/dns/tokenizer.py | 421 + linkcheck/dns/tsig.py | 124 + linkcheck/dns/tsigkeyring.py | 45 + linkcheck/dns/ttl.py | 62 + linkcheck/dns/update.py | 241 + linkcheck/dns/version.py | 35 + linkcheck/dns/zone.py | 833 ++ linkcheck/logger/blacklist.py | 85 + linkcheck/logger/colored.py | 174 + linkcheck/logger/csvlog.py | 108 + linkcheck/logger/gml.py | 132 + linkcheck/logger/html.py | 192 + linkcheck/logger/none.py | 35 + linkcheck/logger/sql.py | 106 + linkcheck/logger/standard.py | 188 + linkcheck/logger/xmllog.py | 181 + 109 files changed, 31075 insertions(+) create mode 100644 config/reindent.py create mode 100644 linkcheck/HtmlParser/Makefile create mode 100644 linkcheck/HtmlParser/__init__.py create mode 100644 linkcheck/HtmlParser/htmllex.c create mode 100644 linkcheck/HtmlParser/htmllex.l create mode 100644 linkcheck/HtmlParser/htmllib.py create mode 100644 linkcheck/HtmlParser/htmlparse.c create mode 100644 linkcheck/HtmlParser/htmlparse.h create mode 100644 linkcheck/HtmlParser/htmlparse.y create mode 100644 linkcheck/HtmlParser/htmlsax.h create mode 100644 linkcheck/HtmlParser/s_util.c create mode 100644 linkcheck/HtmlParser/s_util.h create mode 100644 linkcheck/dns/ChangeLog create mode 100644 linkcheck/dns/__init__.py create mode 100644 linkcheck/dns/dnssec.py create mode 100644 linkcheck/dns/exception.py create mode 100644 linkcheck/dns/flags.py create mode 100644 linkcheck/dns/ifconfig.py create mode 100644 linkcheck/dns/inet.py create mode 100644 linkcheck/dns/ipv4.py create mode 100644 linkcheck/dns/ipv6.py create mode 100644 linkcheck/dns/message.py create mode 100644 linkcheck/dns/name.py create mode 100644 linkcheck/dns/namedict.py create mode 100644 linkcheck/dns/node.py create mode 100644 linkcheck/dns/opcode.py create mode 100644 linkcheck/dns/query.py create mode 100644 linkcheck/dns/rcode.py create mode 100644 linkcheck/dns/rdata.py create mode 100644 linkcheck/dns/rdataclass.py create mode 100644 linkcheck/dns/rdataset.py create mode 100644 linkcheck/dns/rdatatype.py create mode 100644 linkcheck/dns/rdtypes/ANY/AFSDB.py create mode 100644 linkcheck/dns/rdtypes/ANY/CERT.py create mode 100644 linkcheck/dns/rdtypes/ANY/CNAME.py create mode 100644 linkcheck/dns/rdtypes/ANY/DNAME.py create mode 100644 linkcheck/dns/rdtypes/ANY/DNSKEY.py create mode 100644 linkcheck/dns/rdtypes/ANY/DS.py create mode 100644 linkcheck/dns/rdtypes/ANY/GPOS.py create mode 100644 linkcheck/dns/rdtypes/ANY/HINFO.py create mode 100644 linkcheck/dns/rdtypes/ANY/ISDN.py create mode 100644 linkcheck/dns/rdtypes/ANY/KEY.py create mode 100644 linkcheck/dns/rdtypes/ANY/LOC.py create mode 100644 linkcheck/dns/rdtypes/ANY/MX.py create mode 100644 linkcheck/dns/rdtypes/ANY/NS.py create mode 100644 linkcheck/dns/rdtypes/ANY/NSEC.py create mode 100644 linkcheck/dns/rdtypes/ANY/NXT.py create mode 100644 linkcheck/dns/rdtypes/ANY/PTR.py create mode 100644 linkcheck/dns/rdtypes/ANY/RP.py create mode 100644 linkcheck/dns/rdtypes/ANY/RRSIG.py create mode 100644 linkcheck/dns/rdtypes/ANY/RT.py create mode 100644 linkcheck/dns/rdtypes/ANY/SIG.py create mode 100644 linkcheck/dns/rdtypes/ANY/SOA.py create mode 100644 linkcheck/dns/rdtypes/ANY/SSHFP.py create mode 100644 linkcheck/dns/rdtypes/ANY/TXT.py create mode 100644 linkcheck/dns/rdtypes/ANY/X25.py create mode 100644 linkcheck/dns/rdtypes/ANY/__init__.py create mode 100644 linkcheck/dns/rdtypes/IN/A.py create mode 100644 linkcheck/dns/rdtypes/IN/AAAA.py create mode 100644 linkcheck/dns/rdtypes/IN/APL.py create mode 100644 linkcheck/dns/rdtypes/IN/KX.py create mode 100644 linkcheck/dns/rdtypes/IN/NAPTR.py create mode 100644 linkcheck/dns/rdtypes/IN/NSAP.py create mode 100644 linkcheck/dns/rdtypes/IN/NSAP_PTR.py create mode 100644 linkcheck/dns/rdtypes/IN/PX.py create mode 100644 linkcheck/dns/rdtypes/IN/SRV.py create mode 100644 linkcheck/dns/rdtypes/IN/WKS.py create mode 100644 linkcheck/dns/rdtypes/IN/__init__.py create mode 100644 linkcheck/dns/rdtypes/__init__.py create mode 100644 linkcheck/dns/rdtypes/keybase.py create mode 100644 linkcheck/dns/rdtypes/mxbase.py create mode 100644 linkcheck/dns/rdtypes/nsbase.py create mode 100644 linkcheck/dns/rdtypes/sigbase.py create mode 100644 linkcheck/dns/renderer.py create mode 100644 linkcheck/dns/resolver.py create mode 100644 linkcheck/dns/rrset.py create mode 100644 linkcheck/dns/set.py create mode 100644 linkcheck/dns/tests/__init__.py create mode 100644 linkcheck/dns/tests/example create mode 100644 linkcheck/dns/tests/example1.good create mode 100644 linkcheck/dns/tests/example2.good create mode 100644 linkcheck/dns/tests/test_flags.py create mode 100644 linkcheck/dns/tests/test_message.py create mode 100644 linkcheck/dns/tests/test_name.py create mode 100644 linkcheck/dns/tests/test_namedict.py create mode 100644 linkcheck/dns/tests/test_ntoaaton.py create mode 100644 linkcheck/dns/tests/test_rdtypeandclass.py create mode 100644 linkcheck/dns/tests/test_resolver.py create mode 100644 linkcheck/dns/tests/test_rrset.py create mode 100644 linkcheck/dns/tests/test_set.py create mode 100644 linkcheck/dns/tests/test_tokenizer.py create mode 100644 linkcheck/dns/tests/test_update.py create mode 100644 linkcheck/dns/tests/test_zone.py create mode 100644 linkcheck/dns/tokenizer.py create mode 100644 linkcheck/dns/tsig.py create mode 100644 linkcheck/dns/tsigkeyring.py create mode 100644 linkcheck/dns/ttl.py create mode 100644 linkcheck/dns/update.py create mode 100644 linkcheck/dns/version.py create mode 100644 linkcheck/dns/zone.py create mode 100644 linkcheck/logger/blacklist.py create mode 100644 linkcheck/logger/colored.py create mode 100644 linkcheck/logger/csvlog.py create mode 100644 linkcheck/logger/gml.py create mode 100644 linkcheck/logger/html.py create mode 100644 linkcheck/logger/none.py create mode 100644 linkcheck/logger/sql.py create mode 100644 linkcheck/logger/standard.py create mode 100644 linkcheck/logger/xmllog.py diff --git a/config/reindent.py b/config/reindent.py new file mode 100644 index 00000000..39b52517 --- /dev/null +++ b/config/reindent.py @@ -0,0 +1,283 @@ +#! /usr/bin/python2.3 + +# Released to the public domain, by Tim Peters, 03 October 2000. + +"""reindent [-d][-r][-v] [ path ... ] + +-d Dry run. Analyze, but don't make any changes to, files. +-r Recurse. Search for all .py files in subdirectories too. +-v Verbose. Print informative msgs; else no output. + +Change Python (.py) files to use 4-space indents and no hard tab characters. +Also trim excess spaces and tabs from ends of lines, and remove empty lines +at the end of files. Also ensure the last line ends with a newline. + +If no paths are given on the command line, reindent operates as a filter, +reading a single source file from standard input and writing the transformed +source to standard output. In this case, the -d, -r and -v flags are +ignored. + +You can pass one or more file and/or directory paths. When a directory +path, all .py files within the directory will be examined, and, if the -r +option is given, likewise recursively for subdirectories. + +If output is not to standard output, reindent overwrites files in place, +renaming the originals with a .bak extension. If it finds nothing to +change, the file is left alone. If reindent does change a file, the changed +file is a fixed-point for future runs (i.e., running reindent on the +resulting .py file won't change it again). + +The hard part of reindenting is figuring out what to do with comment +lines. So long as the input files get a clean bill of health from +tabnanny.py, reindent should do a good job. +""" + +__version__ = "1" + +import tokenize +import os +import sys + +verbose = 0 +recurse = 0 +dryrun = 0 + +def errprint(*args): + sep = "" + for arg in args: + sys.stderr.write(sep + str(arg)) + sep = " " + sys.stderr.write("\n") + +def main(): + import getopt + global verbose, recurse, dryrun + try: + opts, args = getopt.getopt(sys.argv[1:], "drv") + except getopt.error, msg: + errprint(msg) + return + for o, a in opts: + if o == '-d': + dryrun += 1 + elif o == '-r': + recurse += 1 + elif o == '-v': + verbose += 1 + if not args: + r = Reindenter(sys.stdin) + r.run() + r.write(sys.stdout) + return + for arg in args: + check(arg) + +def check(file): + if os.path.isdir(file) and not os.path.islink(file): + if verbose: + print "listing directory", file + names = os.listdir(file) + for name in names: + fullname = os.path.join(file, name) + if ((recurse and os.path.isdir(fullname) and + not os.path.islink(fullname)) + or name.lower().endswith(".py")): + check(fullname) + return + + if verbose: + print "checking", file, "...", + try: + f = open(file) + except IOError, msg: + errprint("%s: I/O Error: %s" % (file, str(msg))) + return + + r = Reindenter(f) + f.close() + if r.run(): + if verbose: + print "changed." + if dryrun: + print "But this is a dry run, so leaving it alone." + if not dryrun: + bak = file + ".bak" + if os.path.exists(bak): + os.remove(bak) + os.rename(file, bak) + if verbose: + print "renamed", file, "to", bak + f = open(file, "w") + r.write(f) + f.close() + if verbose: + print "wrote new", file + else: + if verbose: + print "unchanged." + +def _rstrip(line, JUNK='\n \t'): + """Return line stripped of trailing spaces, tabs, newlines. + + Note that line.rstrip() instead also strips sundry control characters, + but at least one known Emacs user expects to keep junk like that, not + mentioning Barry by name or anything . + """ + + i = len(line) + while i > 0 and line[i-1] in JUNK: + i -= 1 + return line[:i] + +class Reindenter: + + def __init__(self, f): + self.find_stmt = 1 # next token begins a fresh stmt? + self.level = 0 # current indent level + + # Raw file lines. + self.raw = f.readlines() + + # File lines, rstripped & tab-expanded. Dummy at start is so + # that we can use tokenize's 1-based line numbering easily. + # Note that a line is all-blank iff it's "\n". + self.lines = [_rstrip(line).expandtabs() + "\n" + for line in self.raw] + self.lines.insert(0, None) + self.index = 1 # index into self.lines of next line + + # List of (lineno, indentlevel) pairs, one for each stmt and + # comment line. indentlevel is -1 for comment lines, as a + # signal that tokenize doesn't know what to do about them; + # indeed, they're our headache! + self.stats = [] + + def run(self): + tokenize.tokenize(self.getline, self.tokeneater) + # Remove trailing empty lines. + lines = self.lines + while lines and lines[-1] == "\n": + lines.pop() + # Sentinel. + stats = self.stats + stats.append((len(lines), 0)) + # Map count of leading spaces to # we want. + have2want = {} + # Program after transformation. + after = self.after = [] + # Copy over initial empty lines -- there's nothing to do until + # we see a line with *something* on it. + i = stats[0][0] + after.extend(lines[1:i]) + for i in range(len(stats)-1): + thisstmt, thislevel = stats[i] + nextstmt = stats[i+1][0] + have = getlspace(lines[thisstmt]) + want = thislevel * 4 + if want < 0: + # A comment line. + if have: + # An indented comment line. If we saw the same + # indentation before, reuse what it most recently + # mapped to. + want = have2want.get(have, -1) + if want < 0: + # Then it probably belongs to the next real stmt. + for j in xrange(i+1, len(stats)-1): + jline, jlevel = stats[j] + if jlevel >= 0: + if have == getlspace(lines[jline]): + want = jlevel * 4 + break + if want < 0: # Maybe it's a hanging + # comment like this one, + # in which case we should shift it like its base + # line got shifted. + for j in xrange(i-1, -1, -1): + jline, jlevel = stats[j] + if jlevel >= 0: + want = have + getlspace(after[jline-1]) - \ + getlspace(lines[jline]) + break + if want < 0: + # Still no luck -- leave it alone. + want = have + else: + want = 0 + assert want >= 0 + have2want[have] = want + diff = want - have + if diff == 0 or have == 0: + after.extend(lines[thisstmt:nextstmt]) + else: + for line in lines[thisstmt:nextstmt]: + if diff > 0: + if line == "\n": + after.append(line) + else: + after.append(" " * diff + line) + else: + remove = min(getlspace(line), -diff) + after.append(line[remove:]) + return self.raw != self.after + + def write(self, f): + f.writelines(self.after) + + # Line-getter for tokenize. + def getline(self): + if self.index >= len(self.lines): + line = "" + else: + line = self.lines[self.index] + self.index += 1 + return line + + # Line-eater for tokenize. + def tokeneater(self, type, token, (sline, scol), end, line, + INDENT=tokenize.INDENT, + DEDENT=tokenize.DEDENT, + NEWLINE=tokenize.NEWLINE, + COMMENT=tokenize.COMMENT, + NL=tokenize.NL): + + if type == NEWLINE: + # A program statement, or ENDMARKER, will eventually follow, + # after some (possibly empty) run of tokens of the form + # (NL | COMMENT)* (INDENT | DEDENT+)? + self.find_stmt = 1 + + elif type == INDENT: + self.find_stmt = 1 + self.level += 1 + + elif type == DEDENT: + self.find_stmt = 1 + self.level -= 1 + + elif type == COMMENT: + if self.find_stmt: + self.stats.append((sline, -1)) + # but we're still looking for a new stmt, so leave + # find_stmt alone + + elif type == NL: + pass + + elif self.find_stmt: + # This is the first "real token" following a NEWLINE, so it + # must be the first token of the next program statement, or an + # ENDMARKER. + self.find_stmt = 0 + if line: # not endmarker + self.stats.append((sline, self.level)) + +# Count number of leading blanks. +def getlspace(line): + i, n = 0, len(line) + while i < n and line[i] == " ": + i += 1 + return i + +if __name__ == '__main__': + main() diff --git a/linkcheck/HtmlParser/Makefile b/linkcheck/HtmlParser/Makefile new file mode 100644 index 00000000..651778ff --- /dev/null +++ b/linkcheck/HtmlParser/Makefile @@ -0,0 +1,24 @@ +# this parser needs flex >= 2.5.xx from http://lex.sf.net/ +# for reentrant bison parser support! +FLEX=flex +PYVER=2.3 +PYTHON=python$(PYVER) + +all: htmllex.c htmlparse.c + +%.o: %.c + gcc -g -O3 -Wall -pedantic -Wstrict-prototypes -fPIC -I. -I/usr/include/$(PYTHON) -c $< -o $@ + +htmlparse.h htmlparse.c: htmlparse.y htmlsax.h + bison htmlparse.y + +htmllex.l: htmlparse.h + +htmllex.c: htmllex.l htmlsax.h + $(FLEX) htmllex.l + +clean: + rm -f htmlparse.c htmlparse.h htmllex.c *.o *.so *.pyc *.pyo *.output + +splint: + splint -initallelements +posixlib -I/usr/include/linux -I. -I/usr/include/$(PYTHON) htmllex.c | less diff --git a/linkcheck/HtmlParser/__init__.py b/linkcheck/HtmlParser/__init__.py new file mode 100644 index 00000000..b0ab0cc8 --- /dev/null +++ b/linkcheck/HtmlParser/__init__.py @@ -0,0 +1,114 @@ +# -*- coding: iso-8859-1 -*- +# Copyright (C) 2000-2004 Bastian Kleineidam +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +"""Fast HTML parser module written in C with the following features: + +1. Reentrant + + As soon as any HTML string data is available, we try to feed it + to the HTML parser. This means that the parser has to scan possible + incomplete data, recognizing as much as it can. Incomplete trailing + data is saved for subsequent callsm, or it is just flushed into the + output buffer with the flush() function. + A reset() brings the parser back to its initial state, throwing away all + buffered data. + +2. Coping with HTML syntax errors + + The parser recognizes as much as it can and passes the rest + of the data as TEXT tokens. + The scanner only passes complete recognized HTML syntax elements to + the parser. Invalid syntax elements are passed as TEXT. This way we do + not need the bison error recovery. + Incomplete data is rescanned the next time the parser calls yylex() or + when it is being flush()ed. + + The following syntax errors will be recognized correctly: + + a) missing quotes around attribute values + b) "" end tags in script modus + c) missing ">" in tags + d) invalid tag names + e) invalid characters inside tags or tag attributes + + Additionally the parser has the following features: + + a) NULL bytes are changed into spaces + b) inside a