mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-03-24 01:40:23 +00:00
resolve html entities and fix offline tests
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@246 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
parent
2047e8d973
commit
a64e1dcab1
9 changed files with 58 additions and 64 deletions
|
|
@ -1,4 +1,3 @@
|
|||
include MANIFEST.in
|
||||
include README FAQ INSTALL LICENSE TODO draft-gilman-news-url-00.txt
|
||||
include norobots-rfc.html
|
||||
include linkcheckerrc linkchecker linkchecker.bat linkchecker.1 create.sql
|
||||
|
|
@ -6,13 +5,13 @@ include lc.cgi lc.fcgi lc.sz_fcgi
|
|||
include Makefile
|
||||
include create.sql
|
||||
include debian/rules debian/changelog debian/copyright debian/control
|
||||
include debian/docs debian/links debian/postinst
|
||||
include debian/linkchecker.* debian/linkchecker-ssl.* debian/*-ssl
|
||||
include debian/prerm
|
||||
include DNS/README
|
||||
include test/viewprof.py test/profiletest.py test/*.html test/robots.txt
|
||||
include test/*.py test/*.txt
|
||||
include test/output/test_* test/html/*.html
|
||||
include rpm_build_script
|
||||
include lconline/*.html
|
||||
recursive-include locale *.mo
|
||||
recursive-include po *.po *.py Makefile
|
||||
recursive-include lconline *
|
||||
recursive-include tests *.py
|
||||
exclude _linkchecker_configdata.py
|
||||
|
|
|
|||
23
Makefile
23
Makefile
|
|
@ -9,32 +9,40 @@ HOST=treasure.calvinsplayground.de
|
|||
LCOPTS=-ocolored -Ftext -Fhtml -Fgml -Fsql -Fcsv -Fxml -R -t0 -v -s
|
||||
OFFLINETESTS = test_base test_misc test_file test_frames
|
||||
ONLINETESTS = test_mail test_http test_https test_news test_ftp
|
||||
|
||||
DESTDIR=/.
|
||||
.PHONY: test clean distclean package files upload dist locale all
|
||||
|
||||
.PHONY: all
|
||||
all:
|
||||
@echo "Read the file INSTALL to see how to build and install"
|
||||
|
||||
.PHONY: clean
|
||||
clean:
|
||||
-./setup.py clean --all # ignore errors of this command
|
||||
$(MAKE) -C po clean
|
||||
find . -name '*.py[co]' | xargs rm -f
|
||||
|
||||
.PHONY: distclean
|
||||
distclean: clean cleandeb
|
||||
rm -rf dist build # just to be sure clean also the build dir
|
||||
rm -f $(PACKAGE)-out.* VERSION _$(PACKAGE)_configdata.py MANIFEST Packages.gz
|
||||
|
||||
.PHONY: cleandeb
|
||||
cleandeb:
|
||||
rm -rf debian/$(PACKAGE) debian/$(PACKAGE)-ssl debian/tmp
|
||||
rm -f debian/*.debhelper debian/{files,substvars}
|
||||
rm -f configure-stamp build-stamp
|
||||
|
||||
dist: locale
|
||||
.PHONY: config
|
||||
config:
|
||||
./setup.py config -lcrypto
|
||||
|
||||
.PHONY: dist
|
||||
dist: locale config
|
||||
./setup.py sdist --formats=gztar,zip bdist_rpm
|
||||
# extra run without SSL compilation
|
||||
python setup.py bdist_wininst
|
||||
./setup.py bdist_wininst
|
||||
|
||||
.PHONY: deb
|
||||
deb:
|
||||
# cleandeb because distutils choke on dangling symlinks
|
||||
# (linkchecker.1 -> undocumented.1)
|
||||
|
|
@ -42,18 +50,22 @@ deb:
|
|||
fakeroot debian/rules binary
|
||||
fakeroot dpkg-buildpackage -sgpg -pgpg -k959C340F
|
||||
|
||||
.PHONY: packages
|
||||
packages:
|
||||
-cd .. && dpkg-scanpackages . | gzip --best > Packages.gz
|
||||
|
||||
.PHONY: sources
|
||||
sources:
|
||||
-cd .. && dpkg-scansources . | gzip --best > Sources.gz
|
||||
|
||||
.PHONY: files
|
||||
files: locale
|
||||
env http_proxy="" ./$(PACKAGE) $(LCOPTS) -i$(HOST) http://$(HOST)/~calvin/
|
||||
|
||||
VERSION:
|
||||
echo $(VERSION) > VERSION
|
||||
|
||||
.PHONY: upload
|
||||
upload: distclean dist files VERSION
|
||||
scp debian/changelog shell1.sourceforge.net:/home/groups/$(PACKAGE)/htdocs/changes.txt
|
||||
scp README shell1.sourceforge.net:/home/groups/$(PACKAGE)/htdocs/readme.txt
|
||||
|
|
@ -62,11 +74,14 @@ upload: distclean dist files VERSION
|
|||
scp dist/* shell1.sourceforge.net:/home/groups/ftp/pub/$(PACKAGE)/
|
||||
ssh -C -t shell1.sourceforge.net "cd /home/groups/$(PACKAGE) && make"
|
||||
|
||||
.PHONY: test
|
||||
test:
|
||||
python2 test/regrtest.py $(OFFLINETESTS)
|
||||
|
||||
.PHONY: onlinetest
|
||||
onlinetest:
|
||||
python2 test/regrtest.py $(ONLINETESTS)
|
||||
|
||||
.PHONY: locale
|
||||
locale:
|
||||
$(MAKE) -C po
|
||||
|
|
|
|||
2
TODO
2
TODO
|
|
@ -1,2 +1,4 @@
|
|||
Better link name parsing
|
||||
Embed the Mozilla spidermonkey JavaScript engine for JS links
|
||||
Warning if HTML source download is too slow
|
||||
Warning if HTML source is too big
|
||||
|
|
|
|||
20
debian/rules
vendored
20
debian/rules
vendored
|
|
@ -29,25 +29,7 @@ ssl:
|
|||
@echo done
|
||||
|
||||
|
||||
configure: configure-stamp
|
||||
configure-stamp:
|
||||
dh_testdir
|
||||
./setup.py config -lcrypto
|
||||
touch configure-stamp
|
||||
|
||||
|
||||
build: configure-stamp build-stamp
|
||||
build-stamp:
|
||||
dh_testdir
|
||||
rm -rf debian/$(PACKAGE) debian/$(PACKAGE)-ssl
|
||||
./setup.py build
|
||||
touch build-stamp
|
||||
|
||||
clean:
|
||||
dh_testdir
|
||||
rm -f build-stamp configure-stamp
|
||||
$(MAKE) clean
|
||||
dh_clean
|
||||
include debian/rules.mk
|
||||
|
||||
install: build
|
||||
dh_testdir
|
||||
|
|
|
|||
|
|
@ -16,24 +16,24 @@
|
|||
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
import ConfigParser, sys, os, re, UserDict, string, time
|
||||
import Logging, linkcheckerConf
|
||||
import Logging, _linkchecker_configdata
|
||||
from os.path import expanduser,normpath,normcase,join,isfile
|
||||
from types import StringType
|
||||
from urllib import getproxies
|
||||
from linkcheck import _
|
||||
|
||||
Version = linkcheckerConf.version
|
||||
AppName = linkcheckerConf.name
|
||||
Version = _linkchecker_configdata.version
|
||||
AppName = _linkchecker_configdata.name
|
||||
App = AppName+" "+Version
|
||||
UserAgent = AppName+"/"+Version
|
||||
Author = linkcheckerConf.author
|
||||
Author = _linkchecker_configdata.author
|
||||
HtmlAuthor = string.replace(Author, ' ', ' ')
|
||||
Copyright = "Copyright © 2000,2001 by "+Author
|
||||
HtmlCopyright = "Copyright © 2000,2001 by "+HtmlAuthor
|
||||
AppInfo = App+" "+Copyright
|
||||
HtmlAppInfo = App+", "+HtmlCopyright
|
||||
Url = linkcheckerConf.url
|
||||
Email = linkcheckerConf.author_email
|
||||
Url = _linkchecker_configdata.url
|
||||
Email = _linkchecker_configdata.author_email
|
||||
Freeware = AppName+""" comes with ABSOLUTELY NO WARRANTY!
|
||||
This is free software, and you are welcome to redistribute it
|
||||
under certain conditions. Look at the file `LICENSE' whithin this
|
||||
|
|
@ -360,7 +360,7 @@ class Configuration(UserDict.UserDict):
|
|||
def read(self, files = []):
|
||||
if not files:
|
||||
# system wide config settings
|
||||
config_dir = join(linkcheckerConf.install_data, 'linkchecker')
|
||||
config_dir = join(_linkchecker_configdata.install_data, 'linkchecker')
|
||||
files.append(norm(join(config_dir, "linkcheckerrc")))
|
||||
# per user config settings
|
||||
files.append(norm("~/.linkcheckerrc"))
|
||||
|
|
|
|||
|
|
@ -356,7 +356,7 @@ class UrlData:
|
|||
end = CommentPatternEnd.search(self.getContent(), index)
|
||||
if not match: break
|
||||
index = match.end() + 1
|
||||
self.html_comments.append(start, match.end())
|
||||
self.html_comments.append((start, match.end()))
|
||||
|
||||
def _isInComment(self, index):
|
||||
for low,high in self.html_comments:
|
||||
|
|
@ -376,13 +376,13 @@ class UrlData:
|
|||
str(self)+"\n"+Config.DebugDelim)
|
||||
# search for a possible base reference
|
||||
bases = self.searchInForTag(BasePattern)
|
||||
|
||||
|
||||
baseRef = None
|
||||
if len(bases)>=1:
|
||||
baseRef = bases[0][0]
|
||||
if len(bases)>1:
|
||||
self.setWarning("more than one base tag found")
|
||||
|
||||
|
||||
# search for tags and add found tags to URL queue
|
||||
for pattern in LinkPatterns:
|
||||
urls = self.searchInForTag(pattern)
|
||||
|
|
@ -403,6 +403,8 @@ class UrlData:
|
|||
if self._isInComment(match.start()): continue
|
||||
# need to strip optional ending quotes for the meta tag
|
||||
url = string.strip(StringUtil.stripQuotes(match.group('value')))
|
||||
# need to resolve HTML entities
|
||||
url = StringUtil.unhtmlify(url)
|
||||
lineno=StringUtil.getLineNumber(self.getContent(), match.start())
|
||||
# extra feature: get optional name for this bookmark
|
||||
name = self.searchInForName(pattern['tag'], pattern['attr'],
|
||||
|
|
|
|||
|
|
@ -20,12 +20,12 @@ class error(Exception):
|
|||
|
||||
# i18n suppport
|
||||
LANG="EN" # default language (used for HTML output)
|
||||
import linkcheckerConf
|
||||
import _linkchecker_configdata
|
||||
try:
|
||||
import fintl,os,string
|
||||
gettext = fintl.gettext
|
||||
domain = 'linkcheck'
|
||||
localedir = os.path.join(linkcheckerConf.install_data, 'locale')
|
||||
localedir = os.path.join(_linkchecker_configdata.install_data, 'locale')
|
||||
fintl.bindtextdomain(domain, localedir)
|
||||
fintl.textdomain(domain)
|
||||
languages = []
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
<!-- frame src urls -->
|
||||
<frameset border="0" frameborder="0" framespacing="0">
|
||||
<frame name="top" src="test1.html" frameborder="0">
|
||||
<frame name="bottom" src="test2.html" frameborder="0">
|
||||
<frame name="top" src="base1.html" frameborder="0">
|
||||
<frame name="bottom" src="http.html" frameborder="0">
|
||||
</frameset>
|
||||
|
|
|
|||
|
|
@ -1,26 +1,20 @@
|
|||
test_base
|
||||
url file:///home/calvin/projects/linkchecker/test/html/base1.html
|
||||
valid Valid
|
||||
valid
|
||||
url file:///home/calvin/projects/linkchecker/test/html/base2.html
|
||||
valid Valid
|
||||
valid
|
||||
url file:///home/calvin/projects/linkchecker/test/html/base3.html
|
||||
valid Valid
|
||||
url file:/etc
|
||||
parenturl file:/home/calvin/projects/linkchecker/test/html/base1.html
|
||||
line 6
|
||||
valid Valid
|
||||
url http://www.calvinandhobbes.com/
|
||||
parenturl file:/home/calvin/projects/linkchecker/test/html/base1.html
|
||||
line 4
|
||||
warning Effective URL http://www.ucomics.com/calvinandhobbes/
|
||||
valid Valid: 200 OK
|
||||
url passwd
|
||||
parenturl file:/home/calvin/projects/linkchecker/test/html/base2.html
|
||||
line 4
|
||||
baseurl file:/etc/
|
||||
valid Valid
|
||||
url blubba.shtml
|
||||
parenturl file:/home/calvin/projects/linkchecker/test/html/base3.html
|
||||
line 4
|
||||
baseurl http://treasure.calvinsplayground.de/~calvin/
|
||||
error Error: 404 Not Found
|
||||
valid
|
||||
url base2.html
|
||||
cached
|
||||
valid
|
||||
url base2.html
|
||||
cached
|
||||
valid
|
||||
url html/base1.html
|
||||
baseurl ..
|
||||
error
|
||||
url html/base1.html
|
||||
cached
|
||||
baseurl ..
|
||||
error
|
||||
|
|
|
|||
Loading…
Reference in a new issue