diff --git a/doc/changelog.txt b/doc/changelog.txt
index cd1d3e7c..4a7343ee 100644
--- a/doc/changelog.txt
+++ b/doc/changelog.txt
@@ -3,13 +3,14 @@
 Features:
 - checking: Allow specification of maximum checking time or maximum
   number of checked URLs.
+- checking: Send a HTTP Do-Not-Track header.
+- checking: Check URL length. Print error on URL longer than 2000 characters,
+  warning for longer than 255 characters.
+- checking: Warn about duplicate URL contents.
 
 Changes:
 - doc: Mention 7-zip to extract the .tar.xz under Windows.
   Closes: SF bug #3564733
-- checking: Send a HTTP Do-Not-Track header.
-- checking: Check URL length. Print error on URL longer than 2000 characters,
-  warning for longer than 255 characters.
 - logging: Print download and cache statistics in text output logger.
 - logging: Print warning tag in text output logger. Makes warning filtering
   more easy.
diff --git a/linkcheck/cache/content.py b/linkcheck/cache/content.py
new file mode 100644
index 00000000..55fc3110
--- /dev/null
+++ b/linkcheck/cache/content.py
@@ -0,0 +1,71 @@
+# -*- coding: iso-8859-1 -*-
+# Copyright (C) 2012 Bastian Kleineidam
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+"""
+Cache for content checksums.
+"""
+import hashlib
+from ..lock import get_lock
+from ..decorators import synchronized
+
+_lock = get_lock("checksums")
+
+class ChecksumInfo(object):
+
+    def __init__(self):
+        """Initialize checksums and cache statistics."""
+        # {hash -> [URL]}
+        self.checksums = {}
+        self.misses = self.hits = 0
+
+    def get_checksum_urls(self, url, checksum):
+        """Look for and store checksum for URL.
+        @param url: the URL for the checksum
+        @ptype url: unicode
+        @param checksum: the URL content checksum
+        @ptype checksum: str
+        @return: list of URLs matching the given checksum (except the given URL)
+        @rtype: list of unicode
+        """
+        if checksum in self.checksums:
+            self.hits += 1
+            urls = self.checksums[checksum]
+            if url in urls:
+                res = [x for x in urls if x != url]
+            else:
+                res = urls[:]
+                urls.append(url)
+        else:
+            self.misses += 1
+            res = []
+            self.checksums[checksum] = [url]
+        return res
+
+
+_checksuminfo = ChecksumInfo()
+
+@synchronized(_lock)
+def get_checksum_urls(url, content):
+    """See if given URL content is already stored under another URL.
+    @param url: the URL for which the content is valid
+    @ptype url: unicode
+    @param content: the content to hash
+    @ptype content: str
+    @return: list of URLs with the same content (except the given URL)
+    @rtype: list of unicode"""
+    checksum = hashlib.sha1(content).hexdigest()
+    return _checksuminfo.get_checksum_urls(url, checksum)
+
diff --git a/linkcheck/checker/const.py b/linkcheck/checker/const.py
index 9600f6d4..81557c59 100644
--- a/linkcheck/checker/const.py
+++ b/linkcheck/checker/const.py
@@ -80,6 +80,7 @@ WARN_URL_EFFECTIVE_URL = "url-effective-url"
 WARN_URL_ERROR_GETTING_CONTENT = "url-error-getting-content"
 WARN_URL_ANCHOR_NOT_FOUND = "url-anchor-not-found"
 WARN_URL_WARNREGEX_FOUND = "url-warnregex-found"
+WARN_URL_CONTENT_DUPLICATE = "url-content-duplicate"
 WARN_URL_CONTENT_SIZE_TOO_LARGE = "url-content-too-large"
 WARN_URL_CONTENT_SIZE_ZERO = "url-content-size-zero"
 WARN_URL_CONTENT_SIZE_UNEQUAL = "url-content-size-unequal"
@@ -115,6 +116,7 @@ Warnings = {
     WARN_URL_ANCHOR_NOT_FOUND: _("URL anchor was not found."),
     WARN_URL_WARNREGEX_FOUND:
         _("The warning regular expression was found in the URL contents."),
+    WARN_URL_CONTENT_DUPLICATE: _("The URL content is a duplicate of another URL."),
     WARN_URL_CONTENT_SIZE_TOO_LARGE: _("The URL content size is too large."),
     WARN_URL_CONTENT_SIZE_ZERO: _("The URL content size is zero."),
     WARN_URL_CONTENT_SIZE_UNEQUAL: _("The URL content size and download size are unequal."),
diff --git a/linkcheck/checker/ftpurl.py b/linkcheck/checker/ftpurl.py
index 9a32f621..fcb42c3e 100644
--- a/linkcheck/checker/ftpurl.py
+++ b/linkcheck/checker/ftpurl.py
@@ -220,7 +220,8 @@ class FtpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
             buf = StringIO()
             def stor_data (s):
                 """Helper method storing given data"""
-                self.aggregate.add_download_bytes(len(s))
+                urls = self.aggregate.add_download_data(self.cache_content_key, s)
+                self.warn_duplicate_content(urls)
                 # limit the download size
                 if (buf.tell() + len(s)) > self.MaxFilesizeBytes:
                     raise LinkCheckerError(_("FTP file size too large"))
diff --git a/linkcheck/checker/httpurl.py b/linkcheck/checker/httpurl.py
index 5a2a6dc8..68d0637d 100644
--- a/linkcheck/checker/httpurl.py
+++ b/linkcheck/checker/httpurl.py
@@ -695,7 +695,8 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
         read_content()"""
         data = response.read()
         self._size = len(data)
-        self.aggregate.add_download_bytes(self._size)
+        urls = self.aggregate.add_download_data(self.cache_content_key, data)
+        self.warn_duplicate_content(urls)
         encoding = headers.get_content_encoding(self.headers)
         if encoding in _supported_encodings:
             try:
diff --git a/linkcheck/checker/urlbase.py b/linkcheck/checker/urlbase.py
index f44fc869..d9003b0b 100644
--- a/linkcheck/checker/urlbase.py
+++ b/linkcheck/checker/urlbase.py
@@ -41,6 +41,7 @@ from .const import (WARN_URL_EFFECTIVE_URL,
     WARN_URL_CONTENT_SIZE_TOO_LARGE, WARN_URL_CONTENT_SIZE_ZERO,
     WARN_URL_CONTENT_SIZE_UNEQUAL, WARN_URL_WHITESPACE,
     WARN_URL_TOO_LONG, URL_MAX_LENGTH, URL_WARN_LENGTH,
+    WARN_URL_CONTENT_DUPLICATE,
     ExcList, ExcSyntaxList, ExcNoCacheList)
 
 # helper alias
@@ -745,9 +746,19 @@ class UrlBase (object):
             raise LinkCheckerError(_("File size too large"))
         data = self.url_connection.read()
         if not self.is_local():
-            self.aggregate.add_download_bytes(len(data))
+            urls = self.aggregate.add_download_data(self.cache_content_key, data)
+            self.warn_duplicate_content(urls)
         return data, len(data)
 
+    def warn_duplicate_content(self, urls):
+        """If given URL list is not empty, warn about duplicate URL content.
+        @param urls: URLs with duplicate content
+        @ptype urls: list of unicode
+        """
+        if urls:
+            args = dict(urls=u",".join(urls), size=strformat.strsize(self.size))
+            self.add_warning(_("Content with %(size)s is the same as in URLs (%(urls)s).") % args, tag=WARN_URL_CONTENT_DUPLICATE)
+
     def check_content (self):
         """Check content data for warnings, syntax errors, viruses etc."""
         if not (self.valid and self.can_get_content()):
diff --git a/linkcheck/director/aggregator.py b/linkcheck/director/aggregator.py
index cdb5d615..ad7e223e 100644
--- a/linkcheck/director/aggregator.py
+++ b/linkcheck/director/aggregator.py
@@ -21,7 +21,7 @@ import time
 import threading
 from .. import log, LOG_CHECK
 from ..decorators import synchronized
-from ..cache import urlqueue, addrinfo
+from ..cache import urlqueue, addrinfo, content
 from . import logger, status, checker, cleanup
 
 
@@ -136,12 +136,17 @@ class Aggregate (object):
         self.last_w3_call = time.time()
 
     @synchronized(_download_lock)
-    def add_download_bytes(self, bytes):
-        """Add gibven bytes to number of downloaded bytes.
-        @param bytes: number of bytes downloaded
-        @ptype bytes: int
+    def add_download_data(self, url, data):
+        """Add given downloaded data.
+        @param url: URL which data belongs to
+        @ptype url: unicode
+        @param data: downloaded data
+        @ptype data: string
+        @return: URLs with duplicate contents
+        @rtype: list of unicode
         """
-        self.downloaded_bytes += bytes
+        self.downloaded_bytes += len(data)
+        return content.get_checksum_urls(url, data)
 
     def gather_statistics(self):
         """Gather download and cache statistics and send them to the
diff --git a/tests/checker/data/http.html.result b/tests/checker/data/http.html.result
index b91a0e2c..8b80ce5c 100644
--- a/tests/checker/data/http.html.result
+++ b/tests/checker/data/http.html.result
@@ -123,12 +123,14 @@ url /?d=directory&p=page1
 cache key http://localhost:%(port)d/?d=directory&p=page1
 real url http://localhost:%(port)d/?d=directory&p=page1
 name should not be cached
+warning Content with 1KB is the same as in URLs (http://localhost:%(port)d/?d=directory&p=page).
 valid
 
 url /?quoted=ü
 cache key http://localhost:%(port)d/?quoted=%%C3%%BC
 real url http://localhost:%(port)d/?quoted=%%C3%%BC
 name html entities
+warning Content with 1KB is the same as in URLs (http://localhost:%(port)d/?d=directory&p=page,http://localhost:%(port)d/?d=directory&p=page1).
 valid
 
 url clsid:12345-67890
diff --git a/tests/checker/test_mail.py b/tests/checker/test_mail.py
index 3d99599c..3b253068 100644
--- a/tests/checker/test_mail.py
+++ b/tests/checker/test_mail.py
@@ -152,9 +152,14 @@ class TestMail (LinkCheckTest):
         self.mail_error(u"mailto:@")
         self.mail_error(u"mailto:@example.org")
         self.mail_error(u"mailto:a@")
-        self.mail_error(u"mailto:%s@%s" % (u"a"*60, u"b"*200))
+        url_too_long = "URL length %d is longer than 255."
+        url = u"mailto:%s@%s" % (u"a"*60, u"b"*200)
+        warning = url_too_long % len(url)
+        self.mail_error(url, warning=warning)
+        url = u"mailto:a@%s" % (u"a"*256)
+        warning = url_too_long % len(url)
+        self.mail_error(url, warning=warning)
         self.mail_error(u"mailto:%s@example.org" % (u"a"*65))
-        self.mail_error(u"mailto:a@%s" % (u"a"*256))
         self.mail_error(u'mailto:a@%s.com' % (u"a"*64))
         # local part quoted
         self.mail_error(u'mailto:"a""@example.com', cache_key=u'mailto:a')