From f4245815ba546153e9551afa6db9a9397a042348 Mon Sep 17 00:00:00 2001 From: Karl Hobley Date: Tue, 17 Jun 2014 09:37:35 +0100 Subject: [PATCH 1/9] Added sitemap generator --- wagtail/contrib/wagtailsitemaps/__init__.py | 0 wagtail/contrib/wagtailsitemaps/models.py | 0 .../wagtailsitemaps/sitemap_generator.py | 25 +++++++++++++++++++ .../templates/wagtailsitemaps/sitemap.xml | 13 ++++++++++ wagtail/contrib/wagtailsitemaps/views.py | 23 +++++++++++++++++ 5 files changed, 61 insertions(+) create mode 100644 wagtail/contrib/wagtailsitemaps/__init__.py create mode 100644 wagtail/contrib/wagtailsitemaps/models.py create mode 100644 wagtail/contrib/wagtailsitemaps/sitemap_generator.py create mode 100644 wagtail/contrib/wagtailsitemaps/templates/wagtailsitemaps/sitemap.xml create mode 100644 wagtail/contrib/wagtailsitemaps/views.py diff --git a/wagtail/contrib/wagtailsitemaps/__init__.py b/wagtail/contrib/wagtailsitemaps/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/wagtail/contrib/wagtailsitemaps/models.py b/wagtail/contrib/wagtailsitemaps/models.py new file mode 100644 index 000000000..e69de29bb diff --git a/wagtail/contrib/wagtailsitemaps/sitemap_generator.py b/wagtail/contrib/wagtailsitemaps/sitemap_generator.py new file mode 100644 index 000000000..53b5dd664 --- /dev/null +++ b/wagtail/contrib/wagtailsitemaps/sitemap_generator.py @@ -0,0 +1,25 @@ +from django.template.loader import render_to_string + + +class Sitemap(object): + template = 'wagtailsitemaps/sitemap.xml' + + def __init__(self, site): + self.site = site + + def get_pages(self): + return self.site.root_page.get_descendants(inclusive=True).live().order_by('path') + + def get_urls(self): + for page in self.get_pages(): + latest_revision = page.get_latest_revision() + + yield { + 'location': page.url, + 'lastmod': latest_revision.created_at if latest_revision else None + } + + def render(self): + return render_to_string(self.template, { + 'urlset': self.get_urls() + }) diff --git a/wagtail/contrib/wagtailsitemaps/templates/wagtailsitemaps/sitemap.xml b/wagtail/contrib/wagtailsitemaps/templates/wagtailsitemaps/sitemap.xml new file mode 100644 index 000000000..30ca3c024 --- /dev/null +++ b/wagtail/contrib/wagtailsitemaps/templates/wagtailsitemaps/sitemap.xml @@ -0,0 +1,13 @@ + + +{% spaceless %} +{% for url in urlset %} + + {{ url.location }} + {% if url.lastmod %}{{ url.lastmod|date:"Y-m-d" }}{% endif %} + {% if url.changefreq %}{{ url.changefreq }}{% endif %} + {% if url.priority %}{{ url.priority }}{% endif %} + +{% endfor %} +{% endspaceless %} + diff --git a/wagtail/contrib/wagtailsitemaps/views.py b/wagtail/contrib/wagtailsitemaps/views.py new file mode 100644 index 000000000..aa5fc2e71 --- /dev/null +++ b/wagtail/contrib/wagtailsitemaps/views.py @@ -0,0 +1,23 @@ +from django.shortcuts import render +from django.http import HttpResponse +from django.core.cache import cache + +from .sitemap_generator import Sitemap + + +def sitemap(request): + cache_key = 'wagtail-sitemap:' + str(request.site.id) + sitemap_xml = cache.get(cache_key) + + if not sitemap_xml: + # Rerender sitemap + sitemap = Sitemap(request.site) + sitemap_xml = sitemap.render() + + cache.set(cache_key, sitemap_xml, 6000) + + # Build response + response = HttpResponse(sitemap_xml) + response['Content-Type'] = "text/xml; charset=utf-8" + + return response From 8e1b2cf019f0ad6c6b78ce5e8b16a5a1f123c7d9 Mon Sep 17 00:00:00 2001 From: Karl Hobley Date: Mon, 23 Jun 2014 13:43:21 +0100 Subject: [PATCH 2/9] Sitemap timeout can now be configured with a setting --- wagtail/contrib/wagtailsitemaps/views.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/wagtail/contrib/wagtailsitemaps/views.py b/wagtail/contrib/wagtailsitemaps/views.py index aa5fc2e71..4ef02de0b 100644 --- a/wagtail/contrib/wagtailsitemaps/views.py +++ b/wagtail/contrib/wagtailsitemaps/views.py @@ -1,6 +1,7 @@ from django.shortcuts import render from django.http import HttpResponse from django.core.cache import cache +from django.conf import settings from .sitemap_generator import Sitemap @@ -14,7 +15,7 @@ def sitemap(request): sitemap = Sitemap(request.site) sitemap_xml = sitemap.render() - cache.set(cache_key, sitemap_xml, 6000) + cache.set(cache_key, sitemap_xml, getattr(settings, 'WAGTAILSITEMAPS_CACHE_TIMEOUT', 6000)) # Build response response = HttpResponse(sitemap_xml) From ee7aebc9daf4517f2540e80ebe30c53685af3067 Mon Sep 17 00:00:00 2001 From: Karl Hobley Date: Mon, 23 Jun 2014 13:43:34 +0100 Subject: [PATCH 3/9] Added tests for sitemaps --- runtests.py | 1 + wagtail/contrib/wagtailsitemaps/tests.py | 71 ++++++++++++++++++++++++ wagtail/tests/urls.py | 3 + 3 files changed, 75 insertions(+) create mode 100644 wagtail/contrib/wagtailsitemaps/tests.py diff --git a/runtests.py b/runtests.py index 6fd37ebeb..286a9e6b0 100755 --- a/runtests.py +++ b/runtests.py @@ -84,6 +84,7 @@ if not settings.configured: 'wagtail.wagtailsearch', 'wagtail.wagtailredirects', 'wagtail.wagtailforms', + 'wagtail.contrib.wagtailsitemaps', 'wagtail.tests', ], diff --git a/wagtail/contrib/wagtailsitemaps/tests.py b/wagtail/contrib/wagtailsitemaps/tests.py new file mode 100644 index 000000000..4adf3f98b --- /dev/null +++ b/wagtail/contrib/wagtailsitemaps/tests.py @@ -0,0 +1,71 @@ +from django.test import TestCase +from django.core.urlresolvers import reverse + +from wagtail.wagtailcore.models import Page, Site +from wagtail.tests.models import SimplePage + +from .sitemap_generator import Sitemap + + +class TestSitemapGenerator(TestCase): + def setUp(self): + self.home_page = Page.objects.get(id=2) + + self.child_page = self.home_page.add_child(instance=SimplePage( + title="Hello world!", + slug='hello-world', + live=True, + )) + + self.unpublished_child_page = self.home_page.add_child(instance=SimplePage( + title="Unpublished", + slug='unpublished', + live=False, + )) + + self.site = Site.objects.get(is_default_site=True) + + def test_get_pages(self): + sitemap = Sitemap(self.site) + pages = sitemap.get_pages() + + self.assertIn(self.child_page.page_ptr, pages) + self.assertNotIn(self.unpublished_child_page.page_ptr, pages) + + def test_get_urls(self): + sitemap = Sitemap(self.site) + urls = [url['location'] for url in sitemap.get_urls()] + + self.assertIn('/', urls) # Homepage + self.assertIn('/hello-world/', urls) # Child page + + def test_render(self): + sitemap = Sitemap(self.site) + xml = sitemap.render() + + # Check that a URL has made it into the xml + self.assertIn('/hello-world/', xml) + + +class TestSitemapView(TestCase): + def test_sitemap_view(self): + response = self.client.get('/sitemap.xml') + + self.assertEqual(response.status_code, 200) + self.assertTemplateUsed(response, 'wagtailsitemaps/sitemap.xml') + self.assertEqual(response['Content-Type'], 'text/xml; charset=utf-8') + + def test_sitemap_view_cache(self): + first_response = self.client.get('/sitemap.xml') + + self.assertEqual(first_response.status_code, 200) + self.assertTemplateUsed(first_response, 'wagtailsitemaps/sitemap.xml') + + # Hit the view again. Should come from the cache this time + second_response = self.client.get('/sitemap.xml') + + self.assertEqual(second_response.status_code, 200) + self.assertTemplateNotUsed(second_response, 'wagtailsitemaps/sitemap.xml') # Sitemap should not be re rendered + + # Check that the content is the same + self.assertEqual(first_response.content, second_response.content) diff --git a/wagtail/tests/urls.py b/wagtail/tests/urls.py index d04c871a3..83e12adfb 100644 --- a/wagtail/tests/urls.py +++ b/wagtail/tests/urls.py @@ -4,6 +4,7 @@ from wagtail.wagtailcore import urls as wagtail_urls from wagtail.wagtailadmin import urls as wagtailadmin_urls from wagtail.wagtaildocs import urls as wagtaildocs_urls from wagtail.wagtailsearch.urls import frontend as wagtailsearch_frontend_urls +from wagtail.contrib.wagtailsitemaps.views import sitemap # Signal handlers from wagtail.wagtailsearch import register_signal_handlers as wagtailsearch_register_signal_handlers @@ -15,6 +16,8 @@ urlpatterns = patterns('', url(r'^search/', include(wagtailsearch_frontend_urls)), url(r'^documents/', include(wagtaildocs_urls)), + url(r'^sitemap\.xml$', sitemap), + # For anything not caught by a more specific rule above, hand over to # Wagtail's serving mechanism url(r'', include(wagtail_urls)), From d76c8613fb77683dfdffd74e2cafd4eda4a242ad Mon Sep 17 00:00:00 2001 From: Karl Hobley Date: Mon, 23 Jun 2014 14:08:42 +0100 Subject: [PATCH 4/9] Added get_sitemap_urls method to page and use it in sitemaps --- .../wagtailsitemaps/sitemap_generator.py | 8 ++----- wagtail/wagtailcore/models.py | 21 ++++++++++++++++++- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/wagtail/contrib/wagtailsitemaps/sitemap_generator.py b/wagtail/contrib/wagtailsitemaps/sitemap_generator.py index 53b5dd664..eb08d81b7 100644 --- a/wagtail/contrib/wagtailsitemaps/sitemap_generator.py +++ b/wagtail/contrib/wagtailsitemaps/sitemap_generator.py @@ -12,12 +12,8 @@ class Sitemap(object): def get_urls(self): for page in self.get_pages(): - latest_revision = page.get_latest_revision() - - yield { - 'location': page.url, - 'lastmod': latest_revision.created_at if latest_revision else None - } + for url in page.get_sitemap_urls(): + yield url def render(self): return render_to_string(self.template, { diff --git a/wagtail/wagtailcore/models.py b/wagtail/wagtailcore/models.py index 1f0c98d83..c1e48750b 100644 --- a/wagtail/wagtailcore/models.py +++ b/wagtail/wagtailcore/models.py @@ -623,13 +623,32 @@ class Page(MP_Node, ClusterableModel, Indexed): """ return self.serve(self.dummy_request()) + def get_internal_paths(self): + """ + This returns a list of paths within this page. + This is used for static sites, sitemaps and cache invalidation. + """ + return ['/'] + + def get_sitemap_urls(self): + latest_revision = self.get_latest_revision() + + return [ + { + 'location': self.url + url[1:], + 'lastmod': latest_revision.created_at if latest_revision else None + } + for url in self.get_internal_paths() + ] + def get_static_site_paths(self): """ This is a generator of URL paths to feed into a static site generator Override this if you would like to create static versions of subpages """ # Yield paths for this page - yield '/' + for url in self.get_internal_paths(): + yield url # Yield paths for child pages for child in self.get_children().live(): From 683f1cfa64e19639820a5bf53f48e1bd72b095bc Mon Sep 17 00:00:00 2001 From: Karl Hobley Date: Tue, 1 Jul 2014 10:14:34 +0100 Subject: [PATCH 5/9] Added docs for sitemaps --- docs/sitemap_generation.rst | 58 +++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 docs/sitemap_generation.rst diff --git a/docs/sitemap_generation.rst b/docs/sitemap_generation.rst new file mode 100644 index 000000000..3308a8f4c --- /dev/null +++ b/docs/sitemap_generation.rst @@ -0,0 +1,58 @@ +Sitemap generation +================== + +This document describes how to create XML sitemaps for your Wagtail website using the ``wagtail.contrib.wagtailsitemaps`` module. + + +Basic configuration +~~~~~~~~~~~~~~~~~~~ + +You firstly need to add ``"wagtail.contrib.wagtailsitemaps"`` to INSTALLED_APPS in your Django settings file: + + .. code-block:: python + + INSTALLED_APPS = [ + ... + + "wagtail.contrib.wagtailsitemaps", + ] + + +Then, in urls.py, you need to add a link to the ``wagtail.contrib.wagtailsitemaps.views.sitemap`` view which generates the sitemap: + +.. code-block:: python + + from wagtail.contrib.wagtailsitemaps.views import sitemap + + urlpatterns = patterns('', + ... + + url('^sitemap\.xml$', sitemap), + ) + + +You should now be able to browse to "/sitemap.xml" and see the sitemap working. + + +Customising +~~~~~~~~~~~ + +URLs +---- + +The Page class defines a ``get_sitemap_urls`` method which you can override to customise sitemaps per page type. This method must return a list of dictionaries, one dictionary per URL entry in the sitemap. You can exclude pages from the sitemap by returning an empty list. + +Each dictionary can contain the following: + + - **location** (required) - This is the full URL path to add into the sitemap. + - **lastmod** - A python date or datetime set to when the page was last modified. + - **changefreq** + - **priority** + +You can add more but you will need to override the ``wagtailsitemaps/sitemap.xml`` template in order for them to be displayed in the sitemap. + + +Cache +----- + +By default, sitemaps are cached for 100 minutes. You can change this by setting ``WAGTAILSITEMAPS_CACHE_TIMEOUT`` in your Django settings to the number of seconds you would like to cache to last for. From f642e78f60897cc0ed7147ee79f1de0727fe981c Mon Sep 17 00:00:00 2001 From: Karl Hobley Date: Wed, 2 Jul 2014 12:52:09 +0100 Subject: [PATCH 6/9] Removed get_internal_paths I've changed my mind about this. --- wagtail/wagtailcore/models.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/wagtail/wagtailcore/models.py b/wagtail/wagtailcore/models.py index c1e48750b..294f5d40b 100644 --- a/wagtail/wagtailcore/models.py +++ b/wagtail/wagtailcore/models.py @@ -623,22 +623,14 @@ class Page(MP_Node, ClusterableModel, Indexed): """ return self.serve(self.dummy_request()) - def get_internal_paths(self): - """ - This returns a list of paths within this page. - This is used for static sites, sitemaps and cache invalidation. - """ - return ['/'] - def get_sitemap_urls(self): latest_revision = self.get_latest_revision() return [ { - 'location': self.url + url[1:], + 'location': self.url, 'lastmod': latest_revision.created_at if latest_revision else None } - for url in self.get_internal_paths() ] def get_static_site_paths(self): @@ -646,9 +638,8 @@ class Page(MP_Node, ClusterableModel, Indexed): This is a generator of URL paths to feed into a static site generator Override this if you would like to create static versions of subpages """ - # Yield paths for this page - for url in self.get_internal_paths(): - yield url + # Yield path for this page + yield '/' # Yield paths for child pages for child in self.get_children().live(): From 3be8c0374ca0ce3ae3cea3b3f77ab188ce0e8a40 Mon Sep 17 00:00:00 2001 From: Karl Hobley Date: Wed, 2 Jul 2014 12:57:21 +0100 Subject: [PATCH 7/9] Improvements to sitemaps tests --- wagtail/contrib/wagtailsitemaps/tests.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/wagtail/contrib/wagtailsitemaps/tests.py b/wagtail/contrib/wagtailsitemaps/tests.py index 4adf3f98b..fd639e4ec 100644 --- a/wagtail/contrib/wagtailsitemaps/tests.py +++ b/wagtail/contrib/wagtailsitemaps/tests.py @@ -1,5 +1,6 @@ from django.test import TestCase from django.core.urlresolvers import reverse +from django.core.cache import cache from wagtail.wagtailcore.models import Page, Site from wagtail.tests.models import SimplePage @@ -46,6 +47,9 @@ class TestSitemapGenerator(TestCase): # Check that a URL has made it into the xml self.assertIn('/hello-world/', xml) + # Make sure the unpublished page didn't make it into the xml + self.assertNotIn('/unpublished/', xml) + class TestSitemapView(TestCase): def test_sitemap_view(self): @@ -56,11 +60,20 @@ class TestSitemapView(TestCase): self.assertEqual(response['Content-Type'], 'text/xml; charset=utf-8') def test_sitemap_view_cache(self): + cache_key = 'wagtail-sitemap:%d' % Site.objects.get(is_default_site=True).id + + # Check that the key is not in the cache + self.assertFalse(cache.has_key(cache_key)) + + # Hit the view first_response = self.client.get('/sitemap.xml') self.assertEqual(first_response.status_code, 200) self.assertTemplateUsed(first_response, 'wagtailsitemaps/sitemap.xml') + # Check that the key is in the cache + self.assertTrue(cache.has_key(cache_key)) + # Hit the view again. Should come from the cache this time second_response = self.client.get('/sitemap.xml') From 61fd67f2e9e4920ea2412f03363f5a54c598c804 Mon Sep 17 00:00:00 2001 From: Karl Hobley Date: Wed, 2 Jul 2014 13:01:49 +0100 Subject: [PATCH 8/9] Tweaks to sitemap generation docs --- docs/sitemap_generation.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/sitemap_generation.rst b/docs/sitemap_generation.rst index 3308a8f4c..a750eadc1 100644 --- a/docs/sitemap_generation.rst +++ b/docs/sitemap_generation.rst @@ -31,7 +31,7 @@ Then, in urls.py, you need to add a link to the ``wagtail.contrib.wagtailsitemap ) -You should now be able to browse to "/sitemap.xml" and see the sitemap working. +You should now be able to browse to "/sitemap.xml" and see the sitemap working. By default, all published pages in your website will be added to the site map. Customising @@ -40,7 +40,7 @@ Customising URLs ---- -The Page class defines a ``get_sitemap_urls`` method which you can override to customise sitemaps per page type. This method must return a list of dictionaries, one dictionary per URL entry in the sitemap. You can exclude pages from the sitemap by returning an empty list. +The Page class defines a ``get_sitemap_urls`` method which you can override to customise sitemaps per page instance. This method must return a list of dictionaries, one dictionary per URL entry in the sitemap. You can exclude pages from the sitemap by returning an empty list. Each dictionary can contain the following: @@ -49,10 +49,10 @@ Each dictionary can contain the following: - **changefreq** - **priority** -You can add more but you will need to override the ``wagtailsitemaps/sitemap.xml`` template in order for them to be displayed in the sitemap. +You can add more but yoBy default, all published pages in your website will be added to the site map.u will need to override the ``wagtailsitemaps/sitemap.xml`` template in order for them to be displayed in the sitemap. Cache ----- -By default, sitemaps are cached for 100 minutes. You can change this by setting ``WAGTAILSITEMAPS_CACHE_TIMEOUT`` in your Django settings to the number of seconds you would like to cache to last for. +By default, sitemaps are cached for 100 minutes. You can change this by setting ``WAGTAILSITEMAPS_CACHE_TIMEOUT`` in your Django settings to the number of seconds you would like the cache to last for. From 2f74885834b0e84b2d9e37edb93cd78aab2bce7a Mon Sep 17 00:00:00 2001 From: Karl Hobley Date: Wed, 2 Jul 2014 13:45:43 +0100 Subject: [PATCH 9/9] Fixed bad paste --- docs/sitemap_generation.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/sitemap_generation.rst b/docs/sitemap_generation.rst index a750eadc1..9c6d50e48 100644 --- a/docs/sitemap_generation.rst +++ b/docs/sitemap_generation.rst @@ -49,7 +49,7 @@ Each dictionary can contain the following: - **changefreq** - **priority** -You can add more but yoBy default, all published pages in your website will be added to the site map.u will need to override the ``wagtailsitemaps/sitemap.xml`` template in order for them to be displayed in the sitemap. +You can add more but you will need to override the ``wagtailsitemaps/sitemap.xml`` template in order for them to be displayed in the sitemap. Cache