Another fix to ensure get_content() is only called when allowed.

2026-04-18 21:31:00 +00:00 · 2010-10-13 22:14:43 +02:00 · 2010-10-13 22:14:43 +02:00 · a8aa3bdb00
commit a8aa3bdb00
parent 4c62b00e30
2 changed files with 18 additions and 11 deletions
--- a/linkcheck/checker/httpurl.py
+++ b/linkcheck/checker/httpurl.py
@ -651,6 +651,10 @@ Use URL `%(newurl)s' instead for checking.""") % {
            return False
        return super(HttpUrl, self).content_allows_robots()

+    def check_warningregex (self):
+        if self.method_get_allowed:
+            super(HttpUrl, self).check_warningregex()
+
    def is_html (self):
        """
        See if this URL points to a HTML file by looking at the
--- a/linkcheck/checker/urlbase.py
+++ b/linkcheck/checker/urlbase.py
@ -679,17 +679,7 @@ class UrlBase (object):
                self.get_anchors()
        if self.anchor:
            self.check_anchor()
-        warningregex = self.aggregate.config["warningregex"]
-        if warningregex:
-            log.debug(LOG_CHECK, "checking content")
-            try:
-                match = warningregex.search(self.get_content())
-                if match:
-                    self.add_warning(_("Found %(match)r in link contents.") %
-                       {"match": match.group()}, tag=WARN_URL_WARNREGEX_FOUND)
-            except tuple(ExcList):
-                value = self.handle_exception()
-                self.set_result(unicode_safe(value), valid=False)
+        self.check_warningregex()
        # is it an intern URL?
        if not self.extern[0]:
            # check HTML/CSS syntax
@ -705,6 +695,19 @@ class UrlBase (object):
            if self.aggregate.config["scanvirus"]:
                self.scan_virus()

+    def check_warningregex (self):
+        warningregex = self.aggregate.config["warningregex"]
+        if warningregex:
+            log.debug(LOG_CHECK, "checking content")
+            try:
+                match = warningregex.search(self.get_content())
+                if match:
+                    self.add_warning(_("Found %(match)r in link contents.") %
+                       {"match": match.group()}, tag=WARN_URL_WARNREGEX_FOUND)
+            except tuple(ExcList):
+                value = self.handle_exception()
+                self.set_result(unicode_safe(value), valid=False)
+
    def check_size (self):
        """Check content size if it is zero or larger than a given
        maximum size.