mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-04-12 18:40:57 +00:00
quote url in output
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@1255 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
parent
7d5ba1da34
commit
5187dbc4c2
7 changed files with 127 additions and 7 deletions
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
from StandardLogger import StandardLogger
|
||||
from linkcheck import StringUtil, i18n, AnsiColor
|
||||
from linkcheck.url import url_quote
|
||||
|
||||
|
||||
class ColoredLogger (StandardLogger):
|
||||
|
|
@ -88,7 +89,8 @@ class ColoredLogger (StandardLogger):
|
|||
if self.prefix:
|
||||
self.fd.write("| ")
|
||||
self.fd.write(self.field("realurl")+self.spaces("realurl")+
|
||||
self.colorreal+urlData.url+self.colorreset+"\n")
|
||||
self.colorreal+url_quote(urlData.url)+
|
||||
self.colorreset+"\n")
|
||||
if urlData.dltime>=0 and self.has_field("dltime"):
|
||||
if self.prefix:
|
||||
self.fd.write("| ")
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@
|
|||
import time
|
||||
from linkcheck import Config, i18n
|
||||
from linkcheck.log import strtime, strduration
|
||||
from linkcheck.url import url_quote
|
||||
from StandardLogger import StandardLogger
|
||||
from Logger import Logger
|
||||
|
||||
|
|
@ -56,7 +57,7 @@ class GMLLogger (StandardLogger):
|
|||
self.fd.write(" node [\n")
|
||||
self.fd.write(" id %d\n" % node.id)
|
||||
if self.has_field("realurl"):
|
||||
self.fd.write(' label "%s"\n' % node.url)
|
||||
self.fd.write(' label "%s"\n' % url_quote(node.url))
|
||||
if node.dltime>=0 and self.has_field("dltime"):
|
||||
self.fd.write(" dltime %d\n" % node.dltime)
|
||||
if node.dlsize>=0 and self.has_field("dlsize"):
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@
|
|||
from StandardLogger import StandardLogger
|
||||
from Logger import Logger
|
||||
from linkcheck.log import strtime, strduration
|
||||
from linkcheck.url import url_quote
|
||||
from linkcheck import StringUtil, i18n, Config
|
||||
import time
|
||||
|
||||
|
|
@ -96,8 +97,8 @@ class HtmlLogger (StandardLogger):
|
|||
urlData.baseRef+"</td>\n</tr>\n")
|
||||
if urlData.url and self.has_field("realurl"):
|
||||
self.fd.write("<tr>\n<td>"+self.field("realurl")+"</td>\n<td>"+
|
||||
'<a target="top" href="'+urlData.url+
|
||||
'">'+urlData.url+"</a></td>\n</tr>\n")
|
||||
'<a target="top" href="'+url_quote(urlData.url)+
|
||||
'">'+url_quote(urlData.url)+"</a></td>\n</tr>\n")
|
||||
if urlData.dltime>=0 and self.has_field("dltime"):
|
||||
self.fd.write("<tr>\n<td>"+self.field("dltime")+"</td>\n<td>"+
|
||||
(i18n._("%.3f seconds") % urlData.dltime)+
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ from StandardLogger import StandardLogger
|
|||
from Logger import Logger
|
||||
import time
|
||||
from linkcheck.log import strtime, strduration
|
||||
from linkcheck.url import url_quote
|
||||
from linkcheck import StringUtil, i18n, Config
|
||||
|
||||
class SQLLogger (StandardLogger):
|
||||
|
|
@ -59,7 +60,7 @@ class SQLLogger (StandardLogger):
|
|||
StringUtil.sqlify(urlData.warningString),
|
||||
StringUtil.sqlify(urlData.infoString),
|
||||
urlData.valid,
|
||||
StringUtil.sqlify(urlData.url),
|
||||
StringUtil.sqlify(url_quote(urlData.url)),
|
||||
urlData.line,
|
||||
urlData.column,
|
||||
StringUtil.sqlify(urlData.name),
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import sys, time
|
||||
from linkcheck import Config, i18n
|
||||
from linkcheck.url import url_quote
|
||||
from Logger import Logger
|
||||
from linkcheck.log import strtime, strduration
|
||||
from linkcheck import StringUtil
|
||||
|
|
@ -101,7 +102,7 @@ __init__(self, **args)
|
|||
urlData.baseRef+"\n")
|
||||
if urlData.url and self.has_field('realurl'):
|
||||
self.fd.write(self.field("realurl")+self.spaces("realurl")+
|
||||
urlData.url+"\n")
|
||||
url_quote(urlData.url)+"\n")
|
||||
if urlData.dltime>=0 and self.has_field('dltime'):
|
||||
self.fd.write(self.field("dltime")+self.spaces("dltime")+
|
||||
i18n._("%.3f seconds\n") % urlData.dltime)
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ import time
|
|||
from linkcheck import Config, i18n
|
||||
from linkcheck.StringUtil import xmlify
|
||||
from linkcheck.log import strtime, strduration
|
||||
from linkcheck.url import url_quote
|
||||
from StandardLogger import StandardLogger
|
||||
from Logger import Logger
|
||||
|
||||
|
|
@ -59,7 +60,8 @@ class XMLLogger (StandardLogger):
|
|||
self.fd.write(' <node name="%d" ' % node.id)
|
||||
self.fd.write(">\n")
|
||||
if self.has_field("realurl"):
|
||||
self.fd.write(" <label>%s</label>\n" % xmlify(node.url))
|
||||
self.fd.write(" <label>%s</label>\n" %\
|
||||
xmlify(url_quote(node.url)))
|
||||
self.fd.write(" <data>\n")
|
||||
if node.dltime>=0 and self.has_field("dltime"):
|
||||
self.fd.write(" <dltime>%f</dltime>\n" % node.dltime)
|
||||
|
|
|
|||
112
linkcheck/url.py
Normal file
112
linkcheck/url.py
Normal file
|
|
@ -0,0 +1,112 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
"""url utils"""
|
||||
# Copyright (C) 2000-2004 Bastian Kleineidam
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
__version__ = "$Revision$"[11:-2]
|
||||
__date__ = "$Date$"[7:-2]
|
||||
|
||||
import re, urlparse, os
|
||||
from urllib import splittype, splithost, splitnport, splitquery, quote, unquote
|
||||
|
||||
# adapted from David Wheelers "Secure Programming for Linux and Unix HOWTO"
|
||||
_az09 = r"a-z0-9"
|
||||
_path = r"\-\_\.\!\~\*\'\(\)"
|
||||
_hex_safe = r"2-9a-f"
|
||||
_hex_full = r"0-9a-f"
|
||||
_safe_scheme_pattern = r"(https?|ftp)"
|
||||
_safe_host_pattern = r"([%(_az09)s][%(_az09)s\-]*(\.[%(_az09)s][%(_az09)s\-]*)*\.?)"%locals()
|
||||
_safe_path_pattern = r"((/([%(_az09)s%(_path)s]|(%%[%(_hex_safe)s][%(_hex_full)s]))+)*/?)"%locals()
|
||||
_safe_fragment_pattern = r"(\#([%(_az09)s%(_path)s\+]|(%%[%(_hex_safe)s][%(_hex_full)s]))+)?"%locals()
|
||||
safe_url_pattern = "(?i)"+_safe_scheme_pattern+"://"+_safe_host_pattern+\
|
||||
_safe_path_pattern+_safe_fragment_pattern
|
||||
|
||||
is_valid_url = re.compile("^%s$"%safe_url_pattern).match
|
||||
|
||||
def safe_host_pattern (host):
|
||||
return _safe_scheme_pattern+"://"+host+ \
|
||||
_safe_path_pattern+_safe_fragment_pattern
|
||||
|
||||
|
||||
# XXX better name/implementation for this function
|
||||
def stripsite (url):
|
||||
"""remove scheme and host from url. return host, newurl"""
|
||||
url = urlparse.urlsplit(url)
|
||||
return url[1], urlparse.urlunsplit( (0,0,url[2],url[3],url[4]) )
|
||||
|
||||
|
||||
def url_norm (url):
|
||||
"""unquote and normalize url which must be quoted"""
|
||||
urlparts = list(urlparse.urlsplit(url))
|
||||
urlparts[0] = unquote(urlparts[0])
|
||||
urlparts[1] = unquote(urlparts[1])
|
||||
urlparts[2] = unquote(urlparts[2])
|
||||
urlparts[4] = unquote(urlparts[4])
|
||||
path = urlparts[2].replace('\\', '/')
|
||||
if not path or path=='/':
|
||||
urlparts[2] = '/'
|
||||
else:
|
||||
# XXX this works only under windows and posix??
|
||||
# collapse redundant path segments
|
||||
urlparts[2] = os.path.normpath(path).replace('\\', '/')
|
||||
if path.endswith('/'):
|
||||
urlparts[2] += '/'
|
||||
return urlparse.urlunsplit(urlparts)
|
||||
|
||||
|
||||
def url_quote (url):
|
||||
"""quote given url"""
|
||||
urlparts = list(urlparse.urlsplit(url))
|
||||
urlparts[0] = quote(urlparts[0])
|
||||
urlparts[1] = quote(urlparts[1], ':')
|
||||
urlparts[2] = quote(urlparts[2], '/')
|
||||
urlparts[4] = quote(urlparts[4])
|
||||
return urlparse.urlunsplit(urlparts)
|
||||
|
||||
|
||||
def document_quote (document):
|
||||
"""quote given document"""
|
||||
doc, query = splitquery(document)
|
||||
doc = quote(doc, '/')
|
||||
if query:
|
||||
return "%s?%s" % (doc, query)
|
||||
return doc
|
||||
|
||||
|
||||
default_ports = {
|
||||
'http' : 80,
|
||||
'https' : 443,
|
||||
'nntps' : 563,
|
||||
}
|
||||
|
||||
def spliturl (url):
|
||||
"""split url in a tuple (scheme, hostname, port, document) where
|
||||
hostname is always lowercased
|
||||
precondition: url is syntactically correct URI (eg has no whitespace)"""
|
||||
scheme, netloc = splittype(url)
|
||||
host, document = splithost(netloc)
|
||||
port = default_ports.get(scheme, 80)
|
||||
if host:
|
||||
host = host.lower()
|
||||
host, port = splitnport(host, port)
|
||||
return scheme, host, port, document
|
||||
|
||||
|
||||
# constants defining url part indexes
|
||||
SCHEME = 0
|
||||
HOSTNAME = DOMAIN = 1
|
||||
PORT = 2
|
||||
DOCUMENT = 3
|
||||
Loading…
Reference in a new issue