diff --git a/TODO b/TODO
index f7c02b9d..3e06114d 100644
--- a/TODO
+++ b/TODO
@@ -1,3 +1,4 @@
+Dont assume .html on local files: guess mime, parse URIs
 Check why threaded app wont exit resp. is stalled
 Another Profiling roundup
 Named constants for ANSI Color codes
diff --git a/debian/changelog b/debian/changelog
index 582f769e..7631863d 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,12 @@
+linkchecker (1.3.23) unstable; urgency=low
+
+  * linkcheck/linkname.py: workaround for a bug regex matching with
+    re.DOTALL. This could result in href="" names not found correctly.
+  * linkcheck/linkname.py: immediately return on <img> tags inside <a>.
+  * linkchecker: interpolate %s in help text
+
+ -- Bastian Kleineidam <calvin@debian.org>  Wed, 13 Mar 2002 21:31:57 +0100
+
 linkchecker (1.3.22) unstable; urgency=low
 
   * last release before 1.4.0
diff --git a/linkcheck/FileUrlData.py b/linkcheck/FileUrlData.py
index 29eb23af..0bbba12e 100644
--- a/linkcheck/FileUrlData.py
+++ b/linkcheck/FileUrlData.py
@@ -21,12 +21,69 @@ from UrlData import UrlData, ExcList
 # OSError is thrown on Windows when a file is not found
 ExcList.append(OSError)
 
-html_re = re.compile(r'(?i)\.s?html?$')
-html_content_re = re.compile(r'(?i)<html>.*</html>')
-opera_re = re.compile(r'^(?i)opera.adr$')
-opera_content_re = re.compile(r'(?i)Opera Hotlist')
+# file extensions we can parse recursively
+extensions = {
+    "html": r'(?i)\.s?html?$',
+    "opera": r'^(?i)opera.adr$', # opera bookmark file
+    "text": r'(?i)\.(txt|xml|tsv|csv|sgml?|py|java|cc?|cpp|h)$',
+}
+for key in extensions.keys():
+    extensions[key] = re.compile(extensions[key])
 
-class FileUrlData(UrlData):
+# if file extension was fruitless, look at the content
+contents = {
+    "html": r'(?i)<html>.*</html>',
+    "opera" : r'Opera Hotlist',
+    "text" : r'[\w\s]+',
+}
+for key in contents.keys():
+    contents[key] = re.compile(contents[key])
+
+_schemes = r"""(
+acap        # application configuration access protocol
+|afs        # Andrew File System global file names
+|cid        # content identifier
+|data       # data
+|dav        # dav
+|fax        # fax
+|imap       # internet message access protocol
+|ldap       # Lightweight Directory Access Protocol
+|mailserver # Access to data available from mail servers
+|mid        # message identifier
+|modem      # modem
+|nfs        # network file system protocol
+|opaquelocktoken # opaquelocktoken
+|pop        # Post Office Protocol v3
+|prospero   # Prospero Directory Service
+|rtsp       # real time streaming protocol
+|service    # service location
+|sip        # session initiation protocol
+|tel        # telephone
+|tip        # Transaction Internet Protocol
+|tn3270     # Interactive 3270 emulation sessions
+|vemmi      # versatile multimedia interface
+|wais       # Wide Area Information Servers
+|z39\.50r   # Z39.50 Retrieval
+|z39\.50s   # Z39.50 Session
+|chrome     # Mozilla specific
+|find       # Mozilla specific
+|clsid      # Microsoft specific
+|javascript # JavaScript
+|isbn       # ISBN (int. book numbers)
+|https?     # HTTP/HTTPS
+|ftp        # FTP
+|file       # local file
+|telnet     # telnet
+|mailto     # mailto
+|gopher     # gopher
+|s?news     # news
+|nntp       # news
+)"""
+_url = r"(?i)%s:[-a-zA-Z0-9$_.+!*'/(),;]+" % _schemes
+_url_re = re.compile(_url, re.VERBOSE)
+
+
+class FileUrlData (UrlData):
     "Url link with file scheme"
 
     def __init__(self,
@@ -50,42 +107,51 @@ class FileUrlData(UrlData):
                     self.urlName = os.getcwd()+"/"+self.urlName
                     if winre.search(self.urlName):
                         self.adjustWinPath()
-            self.urlName = self.urlName.replace("\\", "/")
-            self.urlName = "file://"+self.urlName
+            self.urlName = "file://"+self.urlName.replace("\\", "/")
 
 
-    def buildUrl(self):
+    def buildUrl (self):
         UrlData.buildUrl(self)
         # cut off parameter, query and fragment
         self.url = urlparse.urlunparse(self.urlTuple[:3] + ('','',''))
 
 
-    def adjustWinPath(self):
+    def adjustWinPath (self):
         "c:\\windows ==> /c|\\windows"
         self.urlName = "/"+self.urlName[0]+"|"+self.urlName[2:]
 
 
-    def isHtml(self):
-        if html_re.search(self.url) or opera_re.search(self.url):
-            return 1
+    def isHtml (self):
+        # guess by extension
+        for ro in extensions.values():
+            if ro.search(self.url):
+                return 1
         # try to read content (can fail, so catch error)
         try:
-            return html_content_re.search(self.getContent()) or \
-                   opera_content_re.search(self.getContent())
+            for ro in contents.values():
+                if ro.search(self.getContent()):
+                    return 1
         except IOError:
             pass
         return None
 
 
-    def parseUrl(self, config):
-        if html_re.search(self.url) or \
-           html_content_re.search(self.getContent()):
-            UrlData.parseUrl(self, config)
-            return
+    def parseUrl (self, config):
+        for key,ro in extensions.items():
+            if ro.search(self.url):
+                return getattr(self, "parse_"+key)(config)
+        for key,ro in contents.items():
+            if ro.search(self.getContent()):
+                return getattr(self, "parse_"+key)(config)
+
+    def parse_html (self, config):
+        UrlData.parseUrl(self, config)
+
+    def parse_opera (self, config):
         # parse an opera bookmark file
         name = ""
         lineno = 0
-        for line in self.getContent().split("\n"):
+        for line in self.getContent().splitlines():
             lineno += 1
             line = line.strip()
             if line.startswith("NAME="):
@@ -93,7 +159,21 @@ class FileUrlData(UrlData):
             elif line.startswith("URL="):
                 url = line[4:]
                 if url:
-                    from UrlData import GetUrlDataFrom
-                    config.appendUrl(GetUrlDataFrom(url,
+                    config.appendUrl(linkcheck.UrlData.GetUrlDataFrom(url,
                         self.recursionLevel+1, self.url, None, lineno, name))
                 name = ""
+
+    def parse_text (self, config):
+        lineno = 0
+        for line in self.getContent().splitlines():
+            lineno += 1
+            i = 0
+            while 1:
+                mo = _url_re.search(line, i)
+                if not mo: break
+                config.appendUrl(linkcheck.UrlData.GetUrlDataFrom(mo.group(),
+                        self.recursionLevel+1, self.url, None, lineno, ""))
+                i = mo.end()
+
+        return
+
diff --git a/linkcheck/IgnoredUrlData.py b/linkcheck/IgnoredUrlData.py
index 2dcb97fa..ef7f9cc5 100644
--- a/linkcheck/IgnoredUrlData.py
+++ b/linkcheck/IgnoredUrlData.py
@@ -18,7 +18,7 @@
 import re, linkcheck
 from UrlData import UrlData
 
-ignored_schemes_re = re.compile(r"""^(
+ignored_schemes = r"""^(
 acap        # application configuration access protocol
 |afs        # Andrew File System global file names
 |cid        # content identifier
@@ -49,7 +49,9 @@ acap        # application configuration access protocol
 |clsid      # Microsoft specific
 |javascript # JavaScript
 |isbn       # ISBN (int. book numbers)
-):""", re.VERBOSE)
+):"""
+
+ignored_schemes_re = re.compile(ignored_schemes, re.VERBOSE)
 
 
 class IgnoredUrlData(UrlData):
diff --git a/linkcheck/UrlData.py b/linkcheck/UrlData.py
index 5820f43c..7804134c 100644
--- a/linkcheck/UrlData.py
+++ b/linkcheck/UrlData.py
@@ -171,8 +171,8 @@ class UrlData:
         self.html_comments = []
         self.has_content = 0
         url = get_absolute_url(self.urlName, self.baseRef, self.parentName)
-        self.scheme = url.split(":", 1)[0] or "unknown"
-
+        # assume file link if no scheme is found
+        self.scheme = url.split(":", 1)[0] or "file"
 
     def setError(self, s):
         self.valid=0
@@ -191,14 +191,12 @@ class UrlData:
         else:
             self.warningString = s
 
-
     def setInfo(self, s):
         if self.infoString:
             self.infoString += "\n"+s
         else:
             self.infoString = s
 
-
     def copyFrom(self, urlData):
         self.errorString = urlData.errorString
         self.validString = urlData.validString
diff --git a/linkchecker b/linkchecker
index 5aac4fb3..6bcfe952 100755
--- a/linkchecker
+++ b/linkchecker
@@ -64,7 +64,8 @@ For single-letter option arguments the space is not a necessity. So
         environment variable NNTP_SERVER. If no host is given,
         only the syntax of the link is checked.
 -o type, --output=type
-        Specify output type as %s. Default type is text.
+        Specify output type as %s.
+        Default type is text.
 -p pwd, --password=pwd
         Try password pwd for HTML and FTP authorization.
         Default password is 'joe@'. See also -u.
@@ -105,8 +106,8 @@ For single-letter option arguments the space is not a necessity. So
         Use this to check for pages that contain some form of error
         message, for example 'This page has moved' or 'Oracle
         Application Server error'.
-        This option implies -w.\n") % linkcheck.Config.LoggerKeys
-""")
+        This option implies -w.
+""") % linkcheck.Config.LoggerKeys
 
 Notes = linkcheck._("""NOTES
 o LinkChecker assumes an http:// resp. ftp:// link when a commandline URL
diff --git a/test/output/test_file b/test/output/test_file
index 733f7852..ece6d675 100644
--- a/test/output/test_file
+++ b/test/output/test_file
@@ -1,6 +1,10 @@
 test_file
 url file:///home/calvin/projects/linkchecker/test/html/file.html
 valid
+url file:///home/calvin/projects/linkchecker/test/html/file.txt
+valid
+url file:///home/calvin/projects/linkchecker/test/html/file.asc
+valid
 url http.html
 name relative url
 valid
@@ -27,3 +31,9 @@ error
 url file:/etc/
 name good dir
 valid
+url file:///etc/group
+cached
+valid
+url file:///etc/group
+cached
+valid
diff --git a/test/output/test_http b/test/output/test_http
index 81ff5370..6bf1229f 100644
--- a/test/output/test_http
+++ b/test/output/test_http
@@ -4,7 +4,6 @@ valid
 url http://www.garantiertnixgutt.bla
 name bad url
 warning Missing '/' at end of URL
-Server does not support HEAD request (got 500 status), falling back to GET
 error
 url http://www.heise.de
 name ok
diff --git a/test/output/test_mail b/test/output/test_mail
index f4c26728..419dce22 100644
--- a/test/output/test_mail
+++ b/test/output/test_mail
@@ -3,13 +3,15 @@ url file:///home/calvin/projects/linkchecker/test/html/mail.html
 valid
 url mailto:calvin@LocalHost?subject=Hallo&to=michi
 name 1
-error
+warning No MX mail host for LocalHost found
+valid
 url mailto:Dude <calvin@studcs.uni-sb.de> , Killer <calvin@cs.uni-sb.de>?subject=bla
 name 2
 valid
 url mailto:Bastian Kleineidam <calvin@studcs.uni-sb.de>?bcc=jsmith%40company.com
 name 3
-error
+warning No MX mail host for company.com found
+valid
 url mailto:Bastian Kleineidam <calvin@studcs.uni-sb.de>
 name 4
 valid
@@ -20,9 +22,9 @@ valid
 url mailto:o'hara@cs.uni-sb.de
 name 5
 valid
-url mailto:?to=calvin@studcs.uni-sb.de?subject=blubb
+url mailto:?to=calvin@studcs.uni-sb.de&subject=blubb&cc=calvin_cc@studcs.uni-sb.de&CC=calvin_CC@studcs.uni-sb.de
 name ...
-error
+valid
 url mailto:news-admins@freshmeat.net?subject=Re:%20[fm%20#11093]%20(news-admins)%20Submission%20report%20-%20Pretty%20CoLoRs
 name ...
 valid
@@ -31,10 +33,12 @@ name ...
 valid
 url mailto:a@d?subject=äöü
 name 5
-error
+warning No MX mail host for d found
+valid
 url mailto:calvin@cs.uni-sb.de?subject=Halli hallo
 name _
 valid
 url mailto:Bastian Kleineidam <calvin@host1?foo=bar>
 name 3
-error
+warning No MX mail host for host1 found
+valid
diff --git a/test/test_file.py b/test/test_file.py
index 0ff66e60..9fa25365 100644
--- a/test/test_file.py
+++ b/test/test_file.py
@@ -8,7 +8,7 @@ config["anchors"] = 1
 config["verbose"] = 1
 config.disableThreading()
 htmldir = "test/html"
-for file in ('file.html',):
+for file in ('file.html',"file.txt","file.asc"):
     url = os.path.join(htmldir, file)
     config.appendUrl(linkcheck.UrlData.GetUrlDataFrom(url, 0))
 linkcheck.checkUrls(config)