diff --git a/linkcheck/checker/fileurl.py b/linkcheck/checker/fileurl.py index af7129f6..da839958 100644 --- a/linkcheck/checker/fileurl.py +++ b/linkcheck/checker/fileurl.py @@ -209,25 +209,6 @@ class FileUrl (urlbase.UrlBase): data = super(FileUrl, self).read_content() return data - def is_html (self): - """Check if file is a HTML file.""" - return self.ContentMimetypes.get(self.get_content_type()) == "html" - - def is_css (self): - """ - Check if file is a CSS file. - """ - return self.ContentMimetypes.get(self.get_content_type()) == "css" - - def is_file (self): - """ - This is a file. - - @return: True - @rtype: bool - """ - return True - def get_os_filename (self): """ Construct os specific file path out of the file:// URL. diff --git a/linkcheck/checker/ftpurl.py b/linkcheck/checker/ftpurl.py index db0eb431..af21b61b 100644 --- a/linkcheck/checker/ftpurl.py +++ b/linkcheck/checker/ftpurl.py @@ -161,14 +161,6 @@ class FtpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport): self.url_connection.dir(add_entry) return files - def is_html (self): - """See if URL target is a HTML file by looking at the extension.""" - return self.ContentMimetypes.get(self.get_content_type()) == "html" - - def is_css (self): - """See if URL target is a CSS file by looking at the extension.""" - return self.ContentMimetypes.get(self.get_content_type()) == "css" - def is_parseable (self): """See if URL target is parseable for recursion.""" if self.is_directory(): diff --git a/linkcheck/checker/httpurl.py b/linkcheck/checker/httpurl.py index 9ac311dc..49fcb673 100644 --- a/linkcheck/checker/httpurl.py +++ b/linkcheck/checker/httpurl.py @@ -217,35 +217,6 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport): buf.write(data) return buf.getvalue() - def is_html (self): - """ - See if this URL points to a HTML file by looking at the - Content-Type header, file extension and file content. - - @return: True if URL points to HTML file - @rtype: bool - """ - if not self.valid: - return False - mime = self.get_content_type() - return self.ContentMimetypes.get(mime) == "html" - - def is_css (self): - """Return True iff content of this url is CSS stylesheet.""" - if not self.valid: - return False - mime = self.get_content_type() - return self.ContentMimetypes.get(mime) == "css" - - def is_http (self): - """ - This is a HTTP file. - - @return: True - @rtype: bool - """ - return True - def is_parseable (self): """ Check if content is parseable for recursion. diff --git a/linkcheck/checker/urlbase.py b/linkcheck/checker/urlbase.py index afb44da6..1a181a51 100644 --- a/linkcheck/checker/urlbase.py +++ b/linkcheck/checker/urlbase.py @@ -234,22 +234,27 @@ class UrlBase (object): return False def is_html (self): - """ - Return True iff content of this url is HTML formatted. - """ - return False + """Return True iff content of this url is HTML formatted.""" + return self._is_ctype("html") def is_css (self): """Return True iff content of this url is CSS stylesheet.""" - return False + return self._is_ctype("css") + + def _is_ctype(self, ctype): + """Return True iff content is valid and of the given type.""" + if not self.valid: + return False + mime = self.get_content_type() + return self.ContentMimetypes.get(mime) == ctype def is_http (self): - """Return True for http:// URLs.""" - return False + """Return True for http:// or https:// URLs.""" + return self.scheme in ("http", "https") def is_file (self): """Return True for file:// URLs.""" - return False + return self.scheme == "file" def is_directory(self): """Return True if current URL represents a directory."""