From 62cbdf15d91a9b66159b39374c597941bc088641 Mon Sep 17 00:00:00 2001 From: calvin Date: Tue, 3 Apr 2001 18:59:38 +0000 Subject: [PATCH] remove DNS Tests git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@244 e7d03fd6-7b0d-0410-9947-9c21f3af8025 --- MANIFEST.in | 4 +- Makefile | 12 +++--- debian/changelog | 23 +++++++++- debian/control | 11 +---- debian/copyright | 6 +-- debian/linkchecker-ssl.files | 5 ++- debian/linkchecker.postinst | 14 +++--- debian/linkchecker.prerm | 2 +- linkcheck/HttpUrlData.py | 5 ++- linkcheck/StringUtil.py | 10 ++--- linkcheck/UrlData.py | 21 ++++++--- linkcheck/__init__.py | 4 +- linkcheckssl/__init__.py | 1 + {linkcheck => linkcheckssl}/httpslib.py | 0 ssl.c => linkcheckssl/ssl.c | 0 po/de.po | 57 +++++++++++++++++++------ po/fr.po | 56 ++++++++++++++++++------ setup.py | 17 ++++---- test/html/http.html | 1 + tests/test.py | 22 ---------- tests/test2.py | 17 -------- tests/test3.py | 13 ------ tests/test4.py | 7 --- tests/test5.py | 52 ---------------------- 24 files changed, 170 insertions(+), 190 deletions(-) create mode 100644 linkcheckssl/__init__.py rename {linkcheck => linkcheckssl}/httpslib.py (100%) rename ssl.c => linkcheckssl/ssl.c (100%) delete mode 100755 tests/test.py delete mode 100755 tests/test2.py delete mode 100755 tests/test3.py delete mode 100755 tests/test4.py delete mode 100755 tests/test5.py diff --git a/MANIFEST.in b/MANIFEST.in index 8951efc8..56fe3b8a 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -6,7 +6,7 @@ include lc.cgi lc.fcgi lc.sz_fcgi include Makefile include create.sql include debian/rules debian/changelog debian/copyright debian/control -include debian/dirs debian/docs debian/links debian/postinst +include debian/docs debian/links debian/postinst include debian/prerm include DNS/README include test/viewprof.py test/profiletest.py test/*.html test/robots.txt @@ -15,4 +15,4 @@ recursive-include locale *.mo recursive-include po *.po *.py Makefile recursive-include lconline * recursive-include tests *.py -exclude linkcheckerConf.py +exclude _linkchecker_configdata.py diff --git a/Makefile b/Makefile index 4a8c02dd..56128b90 100644 --- a/Makefile +++ b/Makefile @@ -7,7 +7,6 @@ NAME=$(shell ./setup.py --name) HOST=treasure.calvinsplayground.de #LCOPTS=-ocolored -Ftext -Fhtml -Fgml -Fsql -Fcsv -Fxml -R -t0 -v -s LCOPTS=-ocolored -Ftext -Fhtml -Fgml -Fsql -Fcsv -Fxml -R -t0 -v -s -DEBPACKAGE = $(PACKAGE)_$(VERSION)_all.deb $(PACKAGE)-ssl_$(VERSION)_i386.deb OFFLINETESTS = test_base test_misc test_file test_frames ONLINETESTS = test_mail test_http test_https test_news test_ftp @@ -24,7 +23,7 @@ clean: distclean: clean cleandeb rm -rf dist build # just to be sure clean also the build dir - rm -f $(PACKAGE)-out.* VERSION $(PACKAGE)Conf.py MANIFEST Packages.gz + rm -f $(PACKAGE)-out.* VERSION _$(PACKAGE)_configdata.py MANIFEST Packages.gz cleandeb: rm -rf debian/$(PACKAGE) debian/$(PACKAGE)-ssl debian/tmp @@ -43,8 +42,11 @@ deb: fakeroot debian/rules binary fakeroot dpkg-buildpackage -sgpg -pgpg -k959C340F -package: - cd dist && dpkg-scanpackages . ../override.txt | gzip --best > Packages.gz +packages: + -cd .. && dpkg-scanpackages . | gzip --best > Packages.gz + +sources: + -cd .. && dpkg-scansources . | gzip --best > Sources.gz files: locale env http_proxy="" ./$(PACKAGE) $(LCOPTS) -i$(HOST) http://$(HOST)/~calvin/ @@ -52,7 +54,7 @@ files: locale VERSION: echo $(VERSION) > VERSION -upload: distclean dist package files VERSION +upload: distclean dist files VERSION scp debian/changelog shell1.sourceforge.net:/home/groups/$(PACKAGE)/htdocs/changes.txt scp README shell1.sourceforge.net:/home/groups/$(PACKAGE)/htdocs/readme.txt scp linkchecker-out.* shell1.sourceforge.net:/home/groups/$(PACKAGE)/htdocs diff --git a/debian/changelog b/debian/changelog index cc746fc9..079ade23 100644 --- a/debian/changelog +++ b/debian/changelog @@ -6,7 +6,28 @@ linkchecker (1.3.0) unstable; urgency=low Jamie Heilman ) * config file option to control which fields the loggers should print - -- Bastian Kleineidam Thu, 22 Feb 2001 01:35:50 +0100 + -- Bastian Kleineidam Thu, 22 Feb 2001 01:35:50 +0100 + +linkchecker (1.2.15) unstable; urgency=low + + * remove ssl modules for main linkchecker package; make a separate + package linkchecker-ssl for non-US + * adjust build-depends and depends + * fix anchor checking + * debian standards version 3.5.2.0 + * require python-distutils >= 1.0.1, removed old compatibility code + * added FAQ entry for broken links + * change email address to calvin@debian.org + * add DNS.Error to caught exceptions + * add all html link tags (ripped from HTML::Tagset.pm) + * updated robotparser2.py to newest version + * catch errors when calling UrlData.checkContent + * added linkcheckerrc to the conffiles + * disable threading in the lc.cgi script so the machine wont be + hogged (closes: Bug#86788) + * fix https support: add -lssl when compiling ssl module + + -- Bastian Kleineidam Sun, 4 Mar 2001 20:55:02 +0100 linkchecker (1.2.14) unstable; urgency=low diff --git a/debian/control b/debian/control index a9520c6b..cfca6e96 100644 --- a/debian/control +++ b/debian/control @@ -2,14 +2,14 @@ Source: linkchecker Section: web Priority: optional Maintainer: Bastian Kleineidam -Build-Depends: python2-base, python2-dev, debhelper (>= 3.0.0), libssl096-dev +Build-Depends: python2-base, python2-dev, debhelper (>= 3.0.0) Build-Depends-Indep: gettext Standards-Version: 3.5.2 Package: linkchecker Architecture: all Depends: python2-base -Suggests: linkchecker-ssl +Suggests: linkchecker-ssl (>= ${Source-Version}) Description: check HTML documents for broken links Features: o recursive checking @@ -25,10 +25,3 @@ Description: check HTML documents for broken links o i18n support o command line interface o (Fast)CGI web interface (requires HTTP server) - -Package: linkchecker-ssl -Architecture: any -Depends: linkchecker (>= 1.3.0) -Description: HTTPS support for LinkChecker - Includes the Python modules 'ssl' and 'httpslib' to support https:// - links. diff --git a/debian/copyright b/debian/copyright index 3450af59..ebf99137 100644 --- a/debian/copyright +++ b/debian/copyright @@ -1,10 +1,10 @@ -This is LinkChecker, written and maintained by Bastian Kleineidam - on Sat, 8 Jan 2000 11:00:35 +0100. +This is linkchecker, written and maintained by Bastian Kleineidam + on Sat, 8 Jan 2000 11:00:35 +0100. The original source can always be found at: http://linkchecker.sourceforge.net/ -Copyright (C) 2000 Bastian Kleineidam +Copyright (C) 2000,2001 Bastian Kleineidam This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/debian/linkchecker-ssl.files b/debian/linkchecker-ssl.files index 1555f63e..2f932ab1 100644 --- a/debian/linkchecker-ssl.files +++ b/debian/linkchecker-ssl.files @@ -1,2 +1,3 @@ -usr/lib/python2.0/site-packages/linkcheck/httpslib.py -usr/lib/python2.0/site-packages/ssl.so +usr/lib/python2.0/site-packages/linkcheckssl/httpslib.py +usr/lib/python2.0/site-packages/linkcheckssl/ssl.so +usr/lib/python2.0/site-packages/linkcheckssl/__init__.py diff --git a/debian/linkchecker.postinst b/debian/linkchecker.postinst index 1bba02db..b189dfa9 100755 --- a/debian/linkchecker.postinst +++ b/debian/linkchecker.postinst @@ -3,21 +3,21 @@ # Written 1998 by Gregor Hoffleit . # used by Bastian Kleineidam for LinkChecker -DIRLIST="/usr/lib/python1.5/site-packages/linkcheck" -FILELIST="linkcheckerConf.py" -SITEPACKAGES="/usr/lib/python1.5/site-packages" +DIRLIST="/usr/lib/python2.0/site-packages/linkcheck" +FILELIST="_linkchecker_configdata.py" +SITEPACKAGES="/usr/lib/python2.0/site-packages" COMMAND="'import sys,py_compile;py_compile.compile(sys.argv[1])'" case "$1" in configure|abort-upgrade|abort-remove|abort-deconfigure) for i in $DIRLIST; do - python -O /usr/lib/python1.5/compileall.py -q $i - python /usr/lib/python1.5/compileall.py -q $i + python2 -O /usr/lib/python2.0/compileall.py -q $i + python2 /usr/lib/python2.0/compileall.py -q $i done # use /bin/sh -c, otherwise I get a SyntaxError from Python for i in $FILELIST; do - /bin/sh -c "python -O -c $COMMAND $SITEPACKAGES/$i" - /bin/sh -c "python -c $COMMAND $SITEPACKAGES/$i" + /bin/sh -c "python2 -O -c $COMMAND $SITEPACKAGES/$i" + /bin/sh -c "python2 -c $COMMAND $SITEPACKAGES/$i" done ;; *) diff --git a/debian/linkchecker.prerm b/debian/linkchecker.prerm index 39c4d266..dd932d49 100755 --- a/debian/linkchecker.prerm +++ b/debian/linkchecker.prerm @@ -9,7 +9,7 @@ dpkg --listfiles $PACKAGE | awk '$0~/\.py$/ {print $0"c\n" $0"o"}' | xargs rm -f >&2 -rmdir /usr/lib/python1.5/site-packages/linkcheck 2>/dev/null || true +rmdir /usr/lib/python2.0/site-packages/linkcheck 2>/dev/null || true #DEBHELPER# diff --git a/linkcheck/HttpUrlData.py b/linkcheck/HttpUrlData.py index 4573f554..fa536e5f 100644 --- a/linkcheck/HttpUrlData.py +++ b/linkcheck/HttpUrlData.py @@ -188,7 +188,8 @@ class HttpUrlData(UrlData): return httplib.HTTP(host) def getContent(self): - if not self.data: + if not self.has_content: + self.has_content = 1 self.closeConnection() t = time.time() status, statusText, self.mime = self._getHttpRequest("GET") @@ -198,12 +199,14 @@ class HttpUrlData(UrlData): self._init_html_comments() Config.debug("DEBUG: comment spans %s\n" % self.html_comments) return self.data + def isHtml(self): if not (self.valid and self.mime): return 0 return self.mime.gettype()[:9]=="text/html" + def robotsTxtAllowsUrl(self, config): roboturl="%s://%s/robots.txt" % self.urlTuple[0:2] if not config.robotsTxtCache_has_key(roboturl): diff --git a/linkcheck/StringUtil.py b/linkcheck/StringUtil.py index 5ef5375c..8c4904b5 100644 --- a/linkcheck/StringUtil.py +++ b/linkcheck/StringUtil.py @@ -17,11 +17,9 @@ import string,re,sys,htmlentitydefs -HtmlTable = [] -UnHtmlTable = [] -for ent,ch in htmlentitydefs.entitydefs.items(): - HtmlTable.append((ch, "&"+ent+";")) - UnHtmlTable.append(("&"+ent+";", ch)) +entities = htmlentitydefs.entitydefs.items() +HtmlTable = map(lambda x: (x[1], "&"+x[0]+";"), entities) +UnHtmlTable = map(lambda x: ("&"+x[0]+";", x[1]), entities) # order matters! HtmlTable.sort() UnHtmlTable.sort() @@ -153,9 +151,11 @@ def htmlify(str): "Escape special HTML chars and strings" return applyTable(HtmlTable, str) + def unhtmlify(str): return applyTable(UnHtmlTable, str) + def getLineNumber(str, index): "return the line number of str[index]" i=0 diff --git a/linkcheck/UrlData.py b/linkcheck/UrlData.py index adfc6f20..a43a65ff 100644 --- a/linkcheck/UrlData.py +++ b/linkcheck/UrlData.py @@ -52,6 +52,7 @@ _linkMatcher = r""" > # close tag """ + # ripped mainly from HTML::Tagset.pm LinkTags = ( ("a", ["href"]), @@ -103,6 +104,11 @@ BasePattern = { 'attr': 'href', } +CommentPattern = re.compile(" no beginning quote