From 380f14453be48b7b0d7af625fa63d0714bfadaf4 Mon Sep 17 00:00:00 2001
From: Bastian Kleineidam <bastian.kleineidam@web.de>
Date: Wed, 5 Mar 2014 19:23:58 +0100
Subject: [PATCH] Fix mimetype guessing from content.

---
 linkcheck/fileutil.py | 30 ++++++++++++++++++++----------
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/linkcheck/fileutil.py b/linkcheck/fileutil.py
index 6e796d29..35d15508 100644
--- a/linkcheck/fileutil.py
+++ b/linkcheck/fileutil.py
@@ -233,16 +233,11 @@ def guess_mimetype (filename, read=None):
     # Special case for Google Chrome Bookmark files.
     if not mime and basename == 'Bookmarks':
         mime = 'text/plain'
-    # Mime type text/plain can be differentiated further with content reading.
-    if mime == "text/plain" and read is not None:
-        # try to read some content and do a poor man's file(1)
-        try:
-            data = read()[:30]
-            for mime, ro in PARSE_CONTENTS.items():
-                if ro.search(data):
-                    break
-        except Exception:
-            pass
+    # Some mime types can be differentiated further with content reading.
+    if mime in ("text/plain", "application/xml", "text/xml") and read is not None:
+        read_mime = guess_mimetype_read(read)
+        if read_mime is not None:
+            mime = read_mime
     if not mime:
         mime = "application/octet-stream"
     elif ";" in mime:
@@ -251,6 +246,21 @@ def guess_mimetype (filename, read=None):
     return mime.strip().lower()
 
 
+def guess_mimetype_read(read):
+    """Try to read some content and do a poor man's file(1)."""
+    mime = None
+    try:
+        data = read()[:70]
+    except Exception:
+        pass
+    else:
+        for cmime, ro in PARSE_CONTENTS.items():
+            if ro.search(data):
+                mime = cmime
+                break
+    return mime
+
+
 def get_temp_file (mode='r', **kwargs):
     """Return tuple (open file object, filename) pointing to a temporary
     file."""