mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-05-01 03:24:43 +00:00
Detect sitemaps that do not start with an XML declaration
This commit is contained in:
parent
a977e4d712
commit
8c804c35a5
4 changed files with 22 additions and 2 deletions
|
|
@ -56,8 +56,9 @@ PARSE_CONTENTS = {
|
|||
"text/plain+opera": re.compile(r'^Opera Hotlist'),
|
||||
"text/plain+chromium": re.compile(r'^{\s*"checksum":'),
|
||||
"text/plain+linkchecker": re.compile(r'^# LinkChecker URL list', re.IGNORECASE),
|
||||
"application/xml+sitemapindex": re.compile(r'(?i)<\?xml[^<]+<sitemapindex\s+'),
|
||||
"application/xml+sitemap": re.compile(r'<\?xml[^<]+<urlset\s+', re.IGNORECASE),
|
||||
"application/xml+sitemapindex": re.compile(r'(<\?xml[^<]+)?<sitemapindex\s+',
|
||||
re.IGNORECASE),
|
||||
"application/xml+sitemap": re.compile(r'(<\?xml[^<]+)?<urlset\s+', re.IGNORECASE),
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
14
tests/checker/data/no_decl_sitemap.xml
Normal file
14
tests/checker/data/no_decl_sitemap.xml
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
||||
<url>
|
||||
<loc>http://www.example.com/</loc>
|
||||
<lastmod>2005-01-01</lastmod>
|
||||
<changefreq>monthly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>http://www.example.com/?ascii=nø</loc>
|
||||
<lastmod>2005-01-01</lastmod>
|
||||
<changefreq>monthly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
</url>
|
||||
</urlset>
|
||||
3
tests/checker/data/no_decl_sitemapindex.xml
Normal file
3
tests/checker/data/no_decl_sitemapindex.xml
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
||||
<sitemap><loc>sitemap.xml</loc></sitemap>
|
||||
</sitemapindex>
|
||||
|
|
@ -40,3 +40,5 @@ class TestMiMeutil(unittest.TestCase):
|
|||
self.mime_test("file.wml", "text/vnd.wap.wml")
|
||||
self.mime_test("sitemap.xml", "application/xml+sitemap")
|
||||
self.mime_test("sitemapindex.xml", "application/xml+sitemapindex")
|
||||
self.mime_test("no_decl_sitemap.xml", "application/xml+sitemap")
|
||||
self.mime_test("no_decl_sitemapindex.xml", "application/xml+sitemapindex")
|
||||
|
|
|
|||
Loading…
Reference in a new issue