resolve html entities and fix offline tests

git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@246 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
calvin 2001-04-13 11:39:21 +00:00
parent 2047e8d973
commit a64e1dcab1
9 changed files with 58 additions and 64 deletions

View file

@ -1,4 +1,3 @@
include MANIFEST.in
include README FAQ INSTALL LICENSE TODO draft-gilman-news-url-00.txt
include norobots-rfc.html
include linkcheckerrc linkchecker linkchecker.bat linkchecker.1 create.sql
@ -6,13 +5,13 @@ include lc.cgi lc.fcgi lc.sz_fcgi
include Makefile
include create.sql
include debian/rules debian/changelog debian/copyright debian/control
include debian/docs debian/links debian/postinst
include debian/linkchecker.* debian/linkchecker-ssl.* debian/*-ssl
include debian/prerm
include DNS/README
include test/viewprof.py test/profiletest.py test/*.html test/robots.txt
include test/*.py test/*.txt
include test/output/test_* test/html/*.html
include rpm_build_script
include lconline/*.html
recursive-include locale *.mo
recursive-include po *.po *.py Makefile
recursive-include lconline *
recursive-include tests *.py
exclude _linkchecker_configdata.py

View file

@ -9,32 +9,40 @@ HOST=treasure.calvinsplayground.de
LCOPTS=-ocolored -Ftext -Fhtml -Fgml -Fsql -Fcsv -Fxml -R -t0 -v -s
OFFLINETESTS = test_base test_misc test_file test_frames
ONLINETESTS = test_mail test_http test_https test_news test_ftp
DESTDIR=/.
.PHONY: test clean distclean package files upload dist locale all
.PHONY: all
all:
@echo "Read the file INSTALL to see how to build and install"
.PHONY: clean
clean:
-./setup.py clean --all # ignore errors of this command
$(MAKE) -C po clean
find . -name '*.py[co]' | xargs rm -f
.PHONY: distclean
distclean: clean cleandeb
rm -rf dist build # just to be sure clean also the build dir
rm -f $(PACKAGE)-out.* VERSION _$(PACKAGE)_configdata.py MANIFEST Packages.gz
.PHONY: cleandeb
cleandeb:
rm -rf debian/$(PACKAGE) debian/$(PACKAGE)-ssl debian/tmp
rm -f debian/*.debhelper debian/{files,substvars}
rm -f configure-stamp build-stamp
dist: locale
.PHONY: config
config:
./setup.py config -lcrypto
.PHONY: dist
dist: locale config
./setup.py sdist --formats=gztar,zip bdist_rpm
# extra run without SSL compilation
python setup.py bdist_wininst
./setup.py bdist_wininst
.PHONY: deb
deb:
# cleandeb because distutils choke on dangling symlinks
# (linkchecker.1 -> undocumented.1)
@ -42,18 +50,22 @@ deb:
fakeroot debian/rules binary
fakeroot dpkg-buildpackage -sgpg -pgpg -k959C340F
.PHONY: packages
packages:
-cd .. && dpkg-scanpackages . | gzip --best > Packages.gz
.PHONY: sources
sources:
-cd .. && dpkg-scansources . | gzip --best > Sources.gz
.PHONY: files
files: locale
env http_proxy="" ./$(PACKAGE) $(LCOPTS) -i$(HOST) http://$(HOST)/~calvin/
VERSION:
echo $(VERSION) > VERSION
.PHONY: upload
upload: distclean dist files VERSION
scp debian/changelog shell1.sourceforge.net:/home/groups/$(PACKAGE)/htdocs/changes.txt
scp README shell1.sourceforge.net:/home/groups/$(PACKAGE)/htdocs/readme.txt
@ -62,11 +74,14 @@ upload: distclean dist files VERSION
scp dist/* shell1.sourceforge.net:/home/groups/ftp/pub/$(PACKAGE)/
ssh -C -t shell1.sourceforge.net "cd /home/groups/$(PACKAGE) && make"
.PHONY: test
test:
python2 test/regrtest.py $(OFFLINETESTS)
.PHONY: onlinetest
onlinetest:
python2 test/regrtest.py $(ONLINETESTS)
.PHONY: locale
locale:
$(MAKE) -C po

2
TODO
View file

@ -1,2 +1,4 @@
Better link name parsing
Embed the Mozilla spidermonkey JavaScript engine for JS links
Warning if HTML source download is too slow
Warning if HTML source is too big

20
debian/rules vendored
View file

@ -29,25 +29,7 @@ ssl:
@echo done
configure: configure-stamp
configure-stamp:
dh_testdir
./setup.py config -lcrypto
touch configure-stamp
build: configure-stamp build-stamp
build-stamp:
dh_testdir
rm -rf debian/$(PACKAGE) debian/$(PACKAGE)-ssl
./setup.py build
touch build-stamp
clean:
dh_testdir
rm -f build-stamp configure-stamp
$(MAKE) clean
dh_clean
include debian/rules.mk
install: build
dh_testdir

View file

@ -16,24 +16,24 @@
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import ConfigParser, sys, os, re, UserDict, string, time
import Logging, linkcheckerConf
import Logging, _linkchecker_configdata
from os.path import expanduser,normpath,normcase,join,isfile
from types import StringType
from urllib import getproxies
from linkcheck import _
Version = linkcheckerConf.version
AppName = linkcheckerConf.name
Version = _linkchecker_configdata.version
AppName = _linkchecker_configdata.name
App = AppName+" "+Version
UserAgent = AppName+"/"+Version
Author = linkcheckerConf.author
Author = _linkchecker_configdata.author
HtmlAuthor = string.replace(Author, ' ', ' ')
Copyright = "Copyright © 2000,2001 by "+Author
HtmlCopyright = "Copyright © 2000,2001 by "+HtmlAuthor
AppInfo = App+" "+Copyright
HtmlAppInfo = App+", "+HtmlCopyright
Url = linkcheckerConf.url
Email = linkcheckerConf.author_email
Url = _linkchecker_configdata.url
Email = _linkchecker_configdata.author_email
Freeware = AppName+""" comes with ABSOLUTELY NO WARRANTY!
This is free software, and you are welcome to redistribute it
under certain conditions. Look at the file `LICENSE' whithin this
@ -360,7 +360,7 @@ class Configuration(UserDict.UserDict):
def read(self, files = []):
if not files:
# system wide config settings
config_dir = join(linkcheckerConf.install_data, 'linkchecker')
config_dir = join(_linkchecker_configdata.install_data, 'linkchecker')
files.append(norm(join(config_dir, "linkcheckerrc")))
# per user config settings
files.append(norm("~/.linkcheckerrc"))

View file

@ -356,7 +356,7 @@ class UrlData:
end = CommentPatternEnd.search(self.getContent(), index)
if not match: break
index = match.end() + 1
self.html_comments.append(start, match.end())
self.html_comments.append((start, match.end()))
def _isInComment(self, index):
for low,high in self.html_comments:
@ -376,13 +376,13 @@ class UrlData:
str(self)+"\n"+Config.DebugDelim)
# search for a possible base reference
bases = self.searchInForTag(BasePattern)
baseRef = None
if len(bases)>=1:
baseRef = bases[0][0]
if len(bases)>1:
self.setWarning("more than one base tag found")
# search for tags and add found tags to URL queue
for pattern in LinkPatterns:
urls = self.searchInForTag(pattern)
@ -403,6 +403,8 @@ class UrlData:
if self._isInComment(match.start()): continue
# need to strip optional ending quotes for the meta tag
url = string.strip(StringUtil.stripQuotes(match.group('value')))
# need to resolve HTML entities
url = StringUtil.unhtmlify(url)
lineno=StringUtil.getLineNumber(self.getContent(), match.start())
# extra feature: get optional name for this bookmark
name = self.searchInForName(pattern['tag'], pattern['attr'],

View file

@ -20,12 +20,12 @@ class error(Exception):
# i18n suppport
LANG="EN" # default language (used for HTML output)
import linkcheckerConf
import _linkchecker_configdata
try:
import fintl,os,string
gettext = fintl.gettext
domain = 'linkcheck'
localedir = os.path.join(linkcheckerConf.install_data, 'locale')
localedir = os.path.join(_linkchecker_configdata.install_data, 'locale')
fintl.bindtextdomain(domain, localedir)
fintl.textdomain(domain)
languages = []

View file

@ -1,5 +1,5 @@
<!-- frame src urls -->
<frameset border="0" frameborder="0" framespacing="0">
<frame name="top" src="test1.html" frameborder="0">
<frame name="bottom" src="test2.html" frameborder="0">
<frame name="top" src="base1.html" frameborder="0">
<frame name="bottom" src="http.html" frameborder="0">
</frameset>

View file

@ -1,26 +1,20 @@
test_base
url file:///home/calvin/projects/linkchecker/test/html/base1.html
valid Valid
valid
url file:///home/calvin/projects/linkchecker/test/html/base2.html
valid Valid
valid
url file:///home/calvin/projects/linkchecker/test/html/base3.html
valid Valid
url file:/etc
parenturl file:/home/calvin/projects/linkchecker/test/html/base1.html
line 6
valid Valid
url http://www.calvinandhobbes.com/
parenturl file:/home/calvin/projects/linkchecker/test/html/base1.html
line 4
warning Effective URL http://www.ucomics.com/calvinandhobbes/
valid Valid: 200 OK
url passwd
parenturl file:/home/calvin/projects/linkchecker/test/html/base2.html
line 4
baseurl file:/etc/
valid Valid
url blubba.shtml
parenturl file:/home/calvin/projects/linkchecker/test/html/base3.html
line 4
baseurl http://treasure.calvinsplayground.de/~calvin/
error Error: 404 Not Found
valid
url base2.html
cached
valid
url base2.html
cached
valid
url html/base1.html
baseurl ..
error
url html/base1.html
cached
baseurl ..
error