support url list parsing in text files

git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@1709 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
calvin 2004-09-03 14:54:59 +00:00
parent 594797b5e2
commit 79b0ef54f5
11 changed files with 76 additions and 27 deletions

View file

@ -25,6 +25,21 @@
Changed: setup.py, setup.cfg, doc/install.txt
Added: install-linkchecker.py
* Do not raise an error when user and/or password of ftp URLs is not
specified.
Type: bugfix
Changed: linkcheck/checker/ftpurl.py
* honor anchor part of cache url key, handle the recursion check
with an extra cache key
Type: bugfix
Changed: linkcheck/checker/{urlbase,cache,fileurl}.py
* Support URL lists in text files with one URL per line. Empty lines
or comment lines starting with '#' are ignored.
Type: feature
Changed: linkcheck/checker/fileurl.py
1.13.0 "The Butterfly Effect" (released 1.9.2004)
* lots of internal code restructuring
Type: code cleanup

3
TODO
View file

@ -4,9 +4,6 @@ Next releases:
- recursion into FTP links
- recursion into text files which have one URL per line
(make a magic for easier detection)
- SF bug #992389 bit me when I wanted to do absolute imports
at the checker classes :/
When the bug is fixed we can import absolute classes

View file

@ -86,6 +86,8 @@ class FileUrl (urlbase.UrlBase):
super(FileUrl, self).check_connection()
def get_content (self):
if not self.valid:
return ""
if self.is_directory() and not self.has_content:
return self.get_directory_content()
return super(FileUrl, self).get_content()

View file

@ -378,11 +378,11 @@ class UrlBase (object):
def allows_recursion (self):
"""return True iff we can recurse into the url's content"""
# note: test self.valid before self.is_parseable()
#linkcheck.log.debug(linkcheck.LOG_CHECK, "valid=%s, parseable=%s, "\
# "content=%s, robots=%s",
# self.valid, self.is_parseable(),
# self.can_get_content(),
# self.content_allows_robots())
linkcheck.log.debug(linkcheck.LOG_CHECK, "valid=%s, parseable=%s, "\
"content=%s, robots=%s",
self.valid, self.is_parseable(),
self.can_get_content(),
self.content_allows_robots())
return self.valid and \
self.is_parseable() and \
self.can_get_content() and \
@ -568,15 +568,14 @@ class UrlBase (object):
url = line[4:]
if url:
url_data = linkcheck.checker.get_url_from(url,
self.recursion_level+1, self.consumer, parent_url=self.url,
base_ref=None, line=lineno, name=name)
self.recursion_level+1, self.consumer,
parent_url=self.url, line=lineno, name=name)
self.consumer.append_url(url_data)
name = ""
def parse_text (self):
"""parse a text file with on url per line; comment and blank
lines are ignored
UNUSED and UNTESTED, just use linkchecker `cat file.txt`
"""
lineno = 0
for line in self.get_content().splitlines():

View file

@ -0,0 +1,9 @@
# LinkChecker url list
# empty lines and
# comments are ignored
file.html
file.html#isnix
file.html#iswas
javascript:loadthis()

View file

@ -0,0 +1,26 @@
url 'file://%(curdir)s/linkcheck/ftests/data/urllist.txt'
cache key file://%(curdir)s/linkcheck/ftests/data/urllist.txt
real url file://%(curdir)s/linkcheck/ftests/data/urllist.txt
valid
url 'file.html'
cache key file://%(curdir)s/linkcheck/ftests/data/file.html
real url file://%(curdir)s/linkcheck/ftests/data/file.html
valid
url 'file.html#isnix'
cache key file://%(curdir)s/linkcheck/ftests/data/file.html#isnix
real url file://%(curdir)s/linkcheck/ftests/data/file.html
warning anchor #isnix not found
valid
url 'file.html#iswas'
cache key file://%(curdir)s/linkcheck/ftests/data/file.html#iswas
real url file://%(curdir)s/linkcheck/ftests/data/file.html
valid
url 'javascript:loadthis()'
cache key javascript:loadthis()
real url javascript:loadthis()
warning Javascript url ignored
valid

View file

@ -41,6 +41,10 @@ class TestFile (linkcheck.ftests.StandardTest):
"""test links of file.css"""
self.file_test("file.css")
def test_urllist (self):
"""test url list parsing"""
self.file_test("urllist.txt")
def test_files (self):
"""test some direct file links"""
attrs = {'curdir': os.getcwd(),

View file

@ -4,7 +4,7 @@
msgid ""
msgstr ""
"Project-Id-Version: $Id$\n"
"POT-Creation-Date: 2004-09-01 00:23+CEST\n"
"POT-Creation-Date: 2004-09-03 16:54+CEST\n"
"PO-Revision-Date: 2004-08-31 22:33+0100\n"
"Last-Translator: Bastian Kleineidam <calvin@debian.org>\n"
"Language-Team: de <de@li.org>\n"
@ -30,9 +30,6 @@ msgstr "Schr
msgid "directory"
msgstr "Verzeichnis"
msgid "No user or password found"
msgstr "Kein Benutzername oder Passwort gefunden"
msgid "Remote host has closed connection"
msgstr "Entfernter Rechner hat die Verbindung geschlossen"
@ -765,3 +762,6 @@ msgstr "keine Dateien oder URLs angegeben"
msgid "Hit RETURN to finish"
msgstr "Drücken Sie RETURN zum Beenden"
#~ msgid "No user or password found"
#~ msgstr "Kein Benutzername oder Passwort gefunden"

View file

@ -4,7 +4,7 @@
msgid ""
msgstr ""
"Project-Id-Version: $Id$\n"
"POT-Creation-Date: 2004-09-01 00:23+CEST\n"
"POT-Creation-Date: 2004-09-03 16:54+CEST\n"
"PO-Revision-Date: 2004-08-31 22:34+0100\n"
"Last-Translator: Bastian Kleineidam <calvin@debian.org>\n"
"Language-Team: fr <fr@li.org>\n"
@ -28,9 +28,6 @@ msgstr ""
msgid "directory"
msgstr ""
msgid "No user or password found"
msgstr "Aucun utilisateur ou mot de passe trouvé"
msgid "Remote host has closed connection"
msgstr ""
@ -654,3 +651,6 @@ msgstr "aucun fichier ou url donn
msgid "Hit RETURN to finish"
msgstr ""
#~ msgid "No user or password found"
#~ msgstr "Aucun utilisateur ou mot de passe trouvé"

View file

@ -5,7 +5,7 @@
msgid ""
msgstr ""
"Project-Id-Version: PACKAGE VERSION\n"
"POT-Creation-Date: 2004-09-01 00:23+CEST\n"
"POT-Creation-Date: 2004-09-03 16:54+CEST\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
@ -30,9 +30,6 @@ msgstr ""
msgid "directory"
msgstr ""
msgid "No user or password found"
msgstr ""
msgid "Remote host has closed connection"
msgstr ""

View file

@ -4,7 +4,7 @@
msgid ""
msgstr ""
"Project-Id-Version: $Id$\n"
"POT-Creation-Date: 2004-09-01 00:23+CEST\n"
"POT-Creation-Date: 2004-09-03 16:54+CEST\n"
"PO-Revision-Date: 2004-08-31 22:34+0100\n"
"Last-Translator: Bastian Kleineidam <calvin@debian.org>\n"
"Language-Team: nl <nl@li.org>\n"
@ -28,9 +28,6 @@ msgstr ""
msgid "directory"
msgstr ""
msgid "No user or password found"
msgstr "Geen gebruikernaam of wachtwoord gevonden"
msgid "Remote host has closed connection"
msgstr "Remote host heeft de verbinding beëindigd"
@ -667,3 +664,6 @@ msgstr "geen bestanden of URLs gegeven"
msgid "Hit RETURN to finish"
msgstr "Toets RETURN om te beëindigen"
#~ msgid "No user or password found"
#~ msgstr "Geen gebruikernaam of wachtwoord gevonden"