mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-04-21 06:41:00 +00:00
support url list parsing in text files
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@1709 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
parent
594797b5e2
commit
79b0ef54f5
11 changed files with 76 additions and 27 deletions
15
ChangeLog
15
ChangeLog
|
|
@ -25,6 +25,21 @@
|
|||
Changed: setup.py, setup.cfg, doc/install.txt
|
||||
Added: install-linkchecker.py
|
||||
|
||||
* Do not raise an error when user and/or password of ftp URLs is not
|
||||
specified.
|
||||
Type: bugfix
|
||||
Changed: linkcheck/checker/ftpurl.py
|
||||
|
||||
* honor anchor part of cache url key, handle the recursion check
|
||||
with an extra cache key
|
||||
Type: bugfix
|
||||
Changed: linkcheck/checker/{urlbase,cache,fileurl}.py
|
||||
|
||||
* Support URL lists in text files with one URL per line. Empty lines
|
||||
or comment lines starting with '#' are ignored.
|
||||
Type: feature
|
||||
Changed: linkcheck/checker/fileurl.py
|
||||
|
||||
1.13.0 "The Butterfly Effect" (released 1.9.2004)
|
||||
* lots of internal code restructuring
|
||||
Type: code cleanup
|
||||
|
|
|
|||
3
TODO
3
TODO
|
|
@ -4,9 +4,6 @@ Next releases:
|
|||
|
||||
- recursion into FTP links
|
||||
|
||||
- recursion into text files which have one URL per line
|
||||
(make a magic for easier detection)
|
||||
|
||||
- SF bug #992389 bit me when I wanted to do absolute imports
|
||||
at the checker classes :/
|
||||
When the bug is fixed we can import absolute classes
|
||||
|
|
|
|||
|
|
@ -86,6 +86,8 @@ class FileUrl (urlbase.UrlBase):
|
|||
super(FileUrl, self).check_connection()
|
||||
|
||||
def get_content (self):
|
||||
if not self.valid:
|
||||
return ""
|
||||
if self.is_directory() and not self.has_content:
|
||||
return self.get_directory_content()
|
||||
return super(FileUrl, self).get_content()
|
||||
|
|
|
|||
|
|
@ -378,11 +378,11 @@ class UrlBase (object):
|
|||
def allows_recursion (self):
|
||||
"""return True iff we can recurse into the url's content"""
|
||||
# note: test self.valid before self.is_parseable()
|
||||
#linkcheck.log.debug(linkcheck.LOG_CHECK, "valid=%s, parseable=%s, "\
|
||||
# "content=%s, robots=%s",
|
||||
# self.valid, self.is_parseable(),
|
||||
# self.can_get_content(),
|
||||
# self.content_allows_robots())
|
||||
linkcheck.log.debug(linkcheck.LOG_CHECK, "valid=%s, parseable=%s, "\
|
||||
"content=%s, robots=%s",
|
||||
self.valid, self.is_parseable(),
|
||||
self.can_get_content(),
|
||||
self.content_allows_robots())
|
||||
return self.valid and \
|
||||
self.is_parseable() and \
|
||||
self.can_get_content() and \
|
||||
|
|
@ -568,15 +568,14 @@ class UrlBase (object):
|
|||
url = line[4:]
|
||||
if url:
|
||||
url_data = linkcheck.checker.get_url_from(url,
|
||||
self.recursion_level+1, self.consumer, parent_url=self.url,
|
||||
base_ref=None, line=lineno, name=name)
|
||||
self.recursion_level+1, self.consumer,
|
||||
parent_url=self.url, line=lineno, name=name)
|
||||
self.consumer.append_url(url_data)
|
||||
name = ""
|
||||
|
||||
def parse_text (self):
|
||||
"""parse a text file with on url per line; comment and blank
|
||||
lines are ignored
|
||||
UNUSED and UNTESTED, just use linkchecker `cat file.txt`
|
||||
"""
|
||||
lineno = 0
|
||||
for line in self.get_content().splitlines():
|
||||
|
|
|
|||
9
linkcheck/ftests/data/urllist.txt
Normal file
9
linkcheck/ftests/data/urllist.txt
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
# LinkChecker url list
|
||||
# empty lines and
|
||||
# comments are ignored
|
||||
|
||||
file.html
|
||||
file.html#isnix
|
||||
file.html#iswas
|
||||
|
||||
javascript:loadthis()
|
||||
26
linkcheck/ftests/data/urllist.txt.result
Normal file
26
linkcheck/ftests/data/urllist.txt.result
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
url 'file://%(curdir)s/linkcheck/ftests/data/urllist.txt'
|
||||
cache key file://%(curdir)s/linkcheck/ftests/data/urllist.txt
|
||||
real url file://%(curdir)s/linkcheck/ftests/data/urllist.txt
|
||||
valid
|
||||
|
||||
url 'file.html'
|
||||
cache key file://%(curdir)s/linkcheck/ftests/data/file.html
|
||||
real url file://%(curdir)s/linkcheck/ftests/data/file.html
|
||||
valid
|
||||
|
||||
url 'file.html#isnix'
|
||||
cache key file://%(curdir)s/linkcheck/ftests/data/file.html#isnix
|
||||
real url file://%(curdir)s/linkcheck/ftests/data/file.html
|
||||
warning anchor #isnix not found
|
||||
valid
|
||||
|
||||
url 'file.html#iswas'
|
||||
cache key file://%(curdir)s/linkcheck/ftests/data/file.html#iswas
|
||||
real url file://%(curdir)s/linkcheck/ftests/data/file.html
|
||||
valid
|
||||
|
||||
url 'javascript:loadthis()'
|
||||
cache key javascript:loadthis()
|
||||
real url javascript:loadthis()
|
||||
warning Javascript url ignored
|
||||
valid
|
||||
|
|
@ -41,6 +41,10 @@ class TestFile (linkcheck.ftests.StandardTest):
|
|||
"""test links of file.css"""
|
||||
self.file_test("file.css")
|
||||
|
||||
def test_urllist (self):
|
||||
"""test url list parsing"""
|
||||
self.file_test("urllist.txt")
|
||||
|
||||
def test_files (self):
|
||||
"""test some direct file links"""
|
||||
attrs = {'curdir': os.getcwd(),
|
||||
|
|
|
|||
8
po/de.po
8
po/de.po
|
|
@ -4,7 +4,7 @@
|
|||
msgid ""
|
||||
msgstr ""
|
||||
"Project-Id-Version: $Id$\n"
|
||||
"POT-Creation-Date: 2004-09-01 00:23+CEST\n"
|
||||
"POT-Creation-Date: 2004-09-03 16:54+CEST\n"
|
||||
"PO-Revision-Date: 2004-08-31 22:33+0100\n"
|
||||
"Last-Translator: Bastian Kleineidam <calvin@debian.org>\n"
|
||||
"Language-Team: de <de@li.org>\n"
|
||||
|
|
@ -30,9 +30,6 @@ msgstr "Schr
|
|||
msgid "directory"
|
||||
msgstr "Verzeichnis"
|
||||
|
||||
msgid "No user or password found"
|
||||
msgstr "Kein Benutzername oder Passwort gefunden"
|
||||
|
||||
msgid "Remote host has closed connection"
|
||||
msgstr "Entfernter Rechner hat die Verbindung geschlossen"
|
||||
|
||||
|
|
@ -765,3 +762,6 @@ msgstr "keine Dateien oder URLs angegeben"
|
|||
|
||||
msgid "Hit RETURN to finish"
|
||||
msgstr "Drücken Sie RETURN zum Beenden"
|
||||
|
||||
#~ msgid "No user or password found"
|
||||
#~ msgstr "Kein Benutzername oder Passwort gefunden"
|
||||
|
|
|
|||
8
po/fr.po
8
po/fr.po
|
|
@ -4,7 +4,7 @@
|
|||
msgid ""
|
||||
msgstr ""
|
||||
"Project-Id-Version: $Id$\n"
|
||||
"POT-Creation-Date: 2004-09-01 00:23+CEST\n"
|
||||
"POT-Creation-Date: 2004-09-03 16:54+CEST\n"
|
||||
"PO-Revision-Date: 2004-08-31 22:34+0100\n"
|
||||
"Last-Translator: Bastian Kleineidam <calvin@debian.org>\n"
|
||||
"Language-Team: fr <fr@li.org>\n"
|
||||
|
|
@ -28,9 +28,6 @@ msgstr ""
|
|||
msgid "directory"
|
||||
msgstr ""
|
||||
|
||||
msgid "No user or password found"
|
||||
msgstr "Aucun utilisateur ou mot de passe trouvé"
|
||||
|
||||
msgid "Remote host has closed connection"
|
||||
msgstr ""
|
||||
|
||||
|
|
@ -654,3 +651,6 @@ msgstr "aucun fichier ou url donn
|
|||
|
||||
msgid "Hit RETURN to finish"
|
||||
msgstr ""
|
||||
|
||||
#~ msgid "No user or password found"
|
||||
#~ msgstr "Aucun utilisateur ou mot de passe trouvé"
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@
|
|||
msgid ""
|
||||
msgstr ""
|
||||
"Project-Id-Version: PACKAGE VERSION\n"
|
||||
"POT-Creation-Date: 2004-09-01 00:23+CEST\n"
|
||||
"POT-Creation-Date: 2004-09-03 16:54+CEST\n"
|
||||
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
|
||||
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
|
||||
"Language-Team: LANGUAGE <LL@li.org>\n"
|
||||
|
|
@ -30,9 +30,6 @@ msgstr ""
|
|||
msgid "directory"
|
||||
msgstr ""
|
||||
|
||||
msgid "No user or password found"
|
||||
msgstr ""
|
||||
|
||||
msgid "Remote host has closed connection"
|
||||
msgstr ""
|
||||
|
||||
|
|
|
|||
8
po/nl.po
8
po/nl.po
|
|
@ -4,7 +4,7 @@
|
|||
msgid ""
|
||||
msgstr ""
|
||||
"Project-Id-Version: $Id$\n"
|
||||
"POT-Creation-Date: 2004-09-01 00:23+CEST\n"
|
||||
"POT-Creation-Date: 2004-09-03 16:54+CEST\n"
|
||||
"PO-Revision-Date: 2004-08-31 22:34+0100\n"
|
||||
"Last-Translator: Bastian Kleineidam <calvin@debian.org>\n"
|
||||
"Language-Team: nl <nl@li.org>\n"
|
||||
|
|
@ -28,9 +28,6 @@ msgstr ""
|
|||
msgid "directory"
|
||||
msgstr ""
|
||||
|
||||
msgid "No user or password found"
|
||||
msgstr "Geen gebruikernaam of wachtwoord gevonden"
|
||||
|
||||
msgid "Remote host has closed connection"
|
||||
msgstr "Remote host heeft de verbinding beëindigd"
|
||||
|
||||
|
|
@ -667,3 +664,6 @@ msgstr "geen bestanden of URLs gegeven"
|
|||
|
||||
msgid "Hit RETURN to finish"
|
||||
msgstr "Toets RETURN om te beëindigen"
|
||||
|
||||
#~ msgid "No user or password found"
|
||||
#~ msgstr "Geen gebruikernaam of wachtwoord gevonden"
|
||||
|
|
|
|||
Loading…
Reference in a new issue