From a64e1dcab16dbdd0288cab8192550feb53e7dc5a Mon Sep 17 00:00:00 2001 From: calvin Date: Fri, 13 Apr 2001 11:39:21 +0000 Subject: [PATCH] resolve html entities and fix offline tests git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@246 e7d03fd6-7b0d-0410-9947-9c21f3af8025 --- MANIFEST.in | 9 ++++----- Makefile | 23 +++++++++++++++++++---- TODO | 2 ++ debian/rules | 20 +------------------- linkcheck/Config.py | 14 +++++++------- linkcheck/UrlData.py | 8 +++++--- linkcheck/__init__.py | 4 ++-- test/html/frames.html | 4 ++-- test/output/test_base | 38 ++++++++++++++++---------------------- 9 files changed, 58 insertions(+), 64 deletions(-) diff --git a/MANIFEST.in b/MANIFEST.in index 56fe3b8a..266b4217 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,4 +1,3 @@ -include MANIFEST.in include README FAQ INSTALL LICENSE TODO draft-gilman-news-url-00.txt include norobots-rfc.html include linkcheckerrc linkchecker linkchecker.bat linkchecker.1 create.sql @@ -6,13 +5,13 @@ include lc.cgi lc.fcgi lc.sz_fcgi include Makefile include create.sql include debian/rules debian/changelog debian/copyright debian/control -include debian/docs debian/links debian/postinst +include debian/linkchecker.* debian/linkchecker-ssl.* debian/*-ssl include debian/prerm include DNS/README -include test/viewprof.py test/profiletest.py test/*.html test/robots.txt +include test/*.py test/*.txt +include test/output/test_* test/html/*.html include rpm_build_script +include lconline/*.html recursive-include locale *.mo recursive-include po *.po *.py Makefile -recursive-include lconline * -recursive-include tests *.py exclude _linkchecker_configdata.py diff --git a/Makefile b/Makefile index 56128b90..4fbc3d5d 100644 --- a/Makefile +++ b/Makefile @@ -9,32 +9,40 @@ HOST=treasure.calvinsplayground.de LCOPTS=-ocolored -Ftext -Fhtml -Fgml -Fsql -Fcsv -Fxml -R -t0 -v -s OFFLINETESTS = test_base test_misc test_file test_frames ONLINETESTS = test_mail test_http test_https test_news test_ftp - DESTDIR=/. -.PHONY: test clean distclean package files upload dist locale all +.PHONY: all all: @echo "Read the file INSTALL to see how to build and install" +.PHONY: clean clean: -./setup.py clean --all # ignore errors of this command $(MAKE) -C po clean find . -name '*.py[co]' | xargs rm -f +.PHONY: distclean distclean: clean cleandeb rm -rf dist build # just to be sure clean also the build dir rm -f $(PACKAGE)-out.* VERSION _$(PACKAGE)_configdata.py MANIFEST Packages.gz +.PHONY: cleandeb cleandeb: rm -rf debian/$(PACKAGE) debian/$(PACKAGE)-ssl debian/tmp rm -f debian/*.debhelper debian/{files,substvars} rm -f configure-stamp build-stamp -dist: locale +.PHONY: config +config: + ./setup.py config -lcrypto + +.PHONY: dist +dist: locale config ./setup.py sdist --formats=gztar,zip bdist_rpm # extra run without SSL compilation - python setup.py bdist_wininst + ./setup.py bdist_wininst +.PHONY: deb deb: # cleandeb because distutils choke on dangling symlinks # (linkchecker.1 -> undocumented.1) @@ -42,18 +50,22 @@ deb: fakeroot debian/rules binary fakeroot dpkg-buildpackage -sgpg -pgpg -k959C340F +.PHONY: packages packages: -cd .. && dpkg-scanpackages . | gzip --best > Packages.gz +.PHONY: sources sources: -cd .. && dpkg-scansources . | gzip --best > Sources.gz +.PHONY: files files: locale env http_proxy="" ./$(PACKAGE) $(LCOPTS) -i$(HOST) http://$(HOST)/~calvin/ VERSION: echo $(VERSION) > VERSION +.PHONY: upload upload: distclean dist files VERSION scp debian/changelog shell1.sourceforge.net:/home/groups/$(PACKAGE)/htdocs/changes.txt scp README shell1.sourceforge.net:/home/groups/$(PACKAGE)/htdocs/readme.txt @@ -62,11 +74,14 @@ upload: distclean dist files VERSION scp dist/* shell1.sourceforge.net:/home/groups/ftp/pub/$(PACKAGE)/ ssh -C -t shell1.sourceforge.net "cd /home/groups/$(PACKAGE) && make" +.PHONY: test test: python2 test/regrtest.py $(OFFLINETESTS) +.PHONY: onlinetest onlinetest: python2 test/regrtest.py $(ONLINETESTS) +.PHONY: locale locale: $(MAKE) -C po diff --git a/TODO b/TODO index c5f11cdf..c99d3792 100644 --- a/TODO +++ b/TODO @@ -1,2 +1,4 @@ Better link name parsing Embed the Mozilla spidermonkey JavaScript engine for JS links +Warning if HTML source download is too slow +Warning if HTML source is too big diff --git a/debian/rules b/debian/rules index 4607e49c..80185276 100755 --- a/debian/rules +++ b/debian/rules @@ -29,25 +29,7 @@ ssl: @echo done -configure: configure-stamp -configure-stamp: - dh_testdir - ./setup.py config -lcrypto - touch configure-stamp - - -build: configure-stamp build-stamp -build-stamp: - dh_testdir - rm -rf debian/$(PACKAGE) debian/$(PACKAGE)-ssl - ./setup.py build - touch build-stamp - -clean: - dh_testdir - rm -f build-stamp configure-stamp - $(MAKE) clean - dh_clean +include debian/rules.mk install: build dh_testdir diff --git a/linkcheck/Config.py b/linkcheck/Config.py index 3974db94..af8593cf 100644 --- a/linkcheck/Config.py +++ b/linkcheck/Config.py @@ -16,24 +16,24 @@ # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. import ConfigParser, sys, os, re, UserDict, string, time -import Logging, linkcheckerConf +import Logging, _linkchecker_configdata from os.path import expanduser,normpath,normcase,join,isfile from types import StringType from urllib import getproxies from linkcheck import _ -Version = linkcheckerConf.version -AppName = linkcheckerConf.name +Version = _linkchecker_configdata.version +AppName = _linkchecker_configdata.name App = AppName+" "+Version UserAgent = AppName+"/"+Version -Author = linkcheckerConf.author +Author = _linkchecker_configdata.author HtmlAuthor = string.replace(Author, ' ', ' ') Copyright = "Copyright © 2000,2001 by "+Author HtmlCopyright = "Copyright © 2000,2001 by "+HtmlAuthor AppInfo = App+" "+Copyright HtmlAppInfo = App+", "+HtmlCopyright -Url = linkcheckerConf.url -Email = linkcheckerConf.author_email +Url = _linkchecker_configdata.url +Email = _linkchecker_configdata.author_email Freeware = AppName+""" comes with ABSOLUTELY NO WARRANTY! This is free software, and you are welcome to redistribute it under certain conditions. Look at the file `LICENSE' whithin this @@ -360,7 +360,7 @@ class Configuration(UserDict.UserDict): def read(self, files = []): if not files: # system wide config settings - config_dir = join(linkcheckerConf.install_data, 'linkchecker') + config_dir = join(_linkchecker_configdata.install_data, 'linkchecker') files.append(norm(join(config_dir, "linkcheckerrc"))) # per user config settings files.append(norm("~/.linkcheckerrc")) diff --git a/linkcheck/UrlData.py b/linkcheck/UrlData.py index a43a65ff..900d08b6 100644 --- a/linkcheck/UrlData.py +++ b/linkcheck/UrlData.py @@ -356,7 +356,7 @@ class UrlData: end = CommentPatternEnd.search(self.getContent(), index) if not match: break index = match.end() + 1 - self.html_comments.append(start, match.end()) + self.html_comments.append((start, match.end())) def _isInComment(self, index): for low,high in self.html_comments: @@ -376,13 +376,13 @@ class UrlData: str(self)+"\n"+Config.DebugDelim) # search for a possible base reference bases = self.searchInForTag(BasePattern) - + baseRef = None if len(bases)>=1: baseRef = bases[0][0] if len(bases)>1: self.setWarning("more than one base tag found") - + # search for tags and add found tags to URL queue for pattern in LinkPatterns: urls = self.searchInForTag(pattern) @@ -403,6 +403,8 @@ class UrlData: if self._isInComment(match.start()): continue # need to strip optional ending quotes for the meta tag url = string.strip(StringUtil.stripQuotes(match.group('value'))) + # need to resolve HTML entities + url = StringUtil.unhtmlify(url) lineno=StringUtil.getLineNumber(self.getContent(), match.start()) # extra feature: get optional name for this bookmark name = self.searchInForName(pattern['tag'], pattern['attr'], diff --git a/linkcheck/__init__.py b/linkcheck/__init__.py index b4ee2030..59136512 100644 --- a/linkcheck/__init__.py +++ b/linkcheck/__init__.py @@ -20,12 +20,12 @@ class error(Exception): # i18n suppport LANG="EN" # default language (used for HTML output) -import linkcheckerConf +import _linkchecker_configdata try: import fintl,os,string gettext = fintl.gettext domain = 'linkcheck' - localedir = os.path.join(linkcheckerConf.install_data, 'locale') + localedir = os.path.join(_linkchecker_configdata.install_data, 'locale') fintl.bindtextdomain(domain, localedir) fintl.textdomain(domain) languages = [] diff --git a/test/html/frames.html b/test/html/frames.html index 114cd286..d90c2663 100644 --- a/test/html/frames.html +++ b/test/html/frames.html @@ -1,5 +1,5 @@ - - + + diff --git a/test/output/test_base b/test/output/test_base index 01583d8f..bcf93064 100644 --- a/test/output/test_base +++ b/test/output/test_base @@ -1,26 +1,20 @@ test_base url file:///home/calvin/projects/linkchecker/test/html/base1.html -valid Valid +valid url file:///home/calvin/projects/linkchecker/test/html/base2.html -valid Valid +valid url file:///home/calvin/projects/linkchecker/test/html/base3.html -valid Valid -url file:/etc -parenturl file:/home/calvin/projects/linkchecker/test/html/base1.html -line 6 -valid Valid -url http://www.calvinandhobbes.com/ -parenturl file:/home/calvin/projects/linkchecker/test/html/base1.html -line 4 -warning Effective URL http://www.ucomics.com/calvinandhobbes/ -valid Valid: 200 OK -url passwd -parenturl file:/home/calvin/projects/linkchecker/test/html/base2.html -line 4 -baseurl file:/etc/ -valid Valid -url blubba.shtml -parenturl file:/home/calvin/projects/linkchecker/test/html/base3.html -line 4 -baseurl http://treasure.calvinsplayground.de/~calvin/ -error Error: 404 Not Found +valid +url base2.html +cached +valid +url base2.html +cached +valid +url html/base1.html +baseurl .. +error +url html/base1.html +cached +baseurl .. +error