diff --git a/docs/sitemap_generation.rst b/docs/sitemap_generation.rst new file mode 100644 index 000000000..9c6d50e48 --- /dev/null +++ b/docs/sitemap_generation.rst @@ -0,0 +1,58 @@ +Sitemap generation +================== + +This document describes how to create XML sitemaps for your Wagtail website using the ``wagtail.contrib.wagtailsitemaps`` module. + + +Basic configuration +~~~~~~~~~~~~~~~~~~~ + +You firstly need to add ``"wagtail.contrib.wagtailsitemaps"`` to INSTALLED_APPS in your Django settings file: + + .. code-block:: python + + INSTALLED_APPS = [ + ... + + "wagtail.contrib.wagtailsitemaps", + ] + + +Then, in urls.py, you need to add a link to the ``wagtail.contrib.wagtailsitemaps.views.sitemap`` view which generates the sitemap: + +.. code-block:: python + + from wagtail.contrib.wagtailsitemaps.views import sitemap + + urlpatterns = patterns('', + ... + + url('^sitemap\.xml$', sitemap), + ) + + +You should now be able to browse to "/sitemap.xml" and see the sitemap working. By default, all published pages in your website will be added to the site map. + + +Customising +~~~~~~~~~~~ + +URLs +---- + +The Page class defines a ``get_sitemap_urls`` method which you can override to customise sitemaps per page instance. This method must return a list of dictionaries, one dictionary per URL entry in the sitemap. You can exclude pages from the sitemap by returning an empty list. + +Each dictionary can contain the following: + + - **location** (required) - This is the full URL path to add into the sitemap. + - **lastmod** - A python date or datetime set to when the page was last modified. + - **changefreq** + - **priority** + +You can add more but you will need to override the ``wagtailsitemaps/sitemap.xml`` template in order for them to be displayed in the sitemap. + + +Cache +----- + +By default, sitemaps are cached for 100 minutes. You can change this by setting ``WAGTAILSITEMAPS_CACHE_TIMEOUT`` in your Django settings to the number of seconds you would like the cache to last for. diff --git a/runtests.py b/runtests.py index efa66910f..c911377f1 100755 --- a/runtests.py +++ b/runtests.py @@ -85,6 +85,7 @@ if not settings.configured: 'wagtail.wagtailredirects', 'wagtail.wagtailforms', 'wagtail.contrib.wagtailstyleguide', + 'wagtail.contrib.wagtailsitemaps', 'wagtail.tests', ], diff --git a/wagtail/contrib/wagtailsitemaps/__init__.py b/wagtail/contrib/wagtailsitemaps/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/wagtail/contrib/wagtailsitemaps/models.py b/wagtail/contrib/wagtailsitemaps/models.py new file mode 100644 index 000000000..e69de29bb diff --git a/wagtail/contrib/wagtailsitemaps/sitemap_generator.py b/wagtail/contrib/wagtailsitemaps/sitemap_generator.py new file mode 100644 index 000000000..eb08d81b7 --- /dev/null +++ b/wagtail/contrib/wagtailsitemaps/sitemap_generator.py @@ -0,0 +1,21 @@ +from django.template.loader import render_to_string + + +class Sitemap(object): + template = 'wagtailsitemaps/sitemap.xml' + + def __init__(self, site): + self.site = site + + def get_pages(self): + return self.site.root_page.get_descendants(inclusive=True).live().order_by('path') + + def get_urls(self): + for page in self.get_pages(): + for url in page.get_sitemap_urls(): + yield url + + def render(self): + return render_to_string(self.template, { + 'urlset': self.get_urls() + }) diff --git a/wagtail/contrib/wagtailsitemaps/templates/wagtailsitemaps/sitemap.xml b/wagtail/contrib/wagtailsitemaps/templates/wagtailsitemaps/sitemap.xml new file mode 100644 index 000000000..30ca3c024 --- /dev/null +++ b/wagtail/contrib/wagtailsitemaps/templates/wagtailsitemaps/sitemap.xml @@ -0,0 +1,13 @@ + + +{% spaceless %} +{% for url in urlset %} + + {{ url.location }} + {% if url.lastmod %}{{ url.lastmod|date:"Y-m-d" }}{% endif %} + {% if url.changefreq %}{{ url.changefreq }}{% endif %} + {% if url.priority %}{{ url.priority }}{% endif %} + +{% endfor %} +{% endspaceless %} + diff --git a/wagtail/contrib/wagtailsitemaps/tests.py b/wagtail/contrib/wagtailsitemaps/tests.py new file mode 100644 index 000000000..fd639e4ec --- /dev/null +++ b/wagtail/contrib/wagtailsitemaps/tests.py @@ -0,0 +1,84 @@ +from django.test import TestCase +from django.core.urlresolvers import reverse +from django.core.cache import cache + +from wagtail.wagtailcore.models import Page, Site +from wagtail.tests.models import SimplePage + +from .sitemap_generator import Sitemap + + +class TestSitemapGenerator(TestCase): + def setUp(self): + self.home_page = Page.objects.get(id=2) + + self.child_page = self.home_page.add_child(instance=SimplePage( + title="Hello world!", + slug='hello-world', + live=True, + )) + + self.unpublished_child_page = self.home_page.add_child(instance=SimplePage( + title="Unpublished", + slug='unpublished', + live=False, + )) + + self.site = Site.objects.get(is_default_site=True) + + def test_get_pages(self): + sitemap = Sitemap(self.site) + pages = sitemap.get_pages() + + self.assertIn(self.child_page.page_ptr, pages) + self.assertNotIn(self.unpublished_child_page.page_ptr, pages) + + def test_get_urls(self): + sitemap = Sitemap(self.site) + urls = [url['location'] for url in sitemap.get_urls()] + + self.assertIn('/', urls) # Homepage + self.assertIn('/hello-world/', urls) # Child page + + def test_render(self): + sitemap = Sitemap(self.site) + xml = sitemap.render() + + # Check that a URL has made it into the xml + self.assertIn('/hello-world/', xml) + + # Make sure the unpublished page didn't make it into the xml + self.assertNotIn('/unpublished/', xml) + + +class TestSitemapView(TestCase): + def test_sitemap_view(self): + response = self.client.get('/sitemap.xml') + + self.assertEqual(response.status_code, 200) + self.assertTemplateUsed(response, 'wagtailsitemaps/sitemap.xml') + self.assertEqual(response['Content-Type'], 'text/xml; charset=utf-8') + + def test_sitemap_view_cache(self): + cache_key = 'wagtail-sitemap:%d' % Site.objects.get(is_default_site=True).id + + # Check that the key is not in the cache + self.assertFalse(cache.has_key(cache_key)) + + # Hit the view + first_response = self.client.get('/sitemap.xml') + + self.assertEqual(first_response.status_code, 200) + self.assertTemplateUsed(first_response, 'wagtailsitemaps/sitemap.xml') + + # Check that the key is in the cache + self.assertTrue(cache.has_key(cache_key)) + + # Hit the view again. Should come from the cache this time + second_response = self.client.get('/sitemap.xml') + + self.assertEqual(second_response.status_code, 200) + self.assertTemplateNotUsed(second_response, 'wagtailsitemaps/sitemap.xml') # Sitemap should not be re rendered + + # Check that the content is the same + self.assertEqual(first_response.content, second_response.content) diff --git a/wagtail/contrib/wagtailsitemaps/views.py b/wagtail/contrib/wagtailsitemaps/views.py new file mode 100644 index 000000000..4ef02de0b --- /dev/null +++ b/wagtail/contrib/wagtailsitemaps/views.py @@ -0,0 +1,24 @@ +from django.shortcuts import render +from django.http import HttpResponse +from django.core.cache import cache +from django.conf import settings + +from .sitemap_generator import Sitemap + + +def sitemap(request): + cache_key = 'wagtail-sitemap:' + str(request.site.id) + sitemap_xml = cache.get(cache_key) + + if not sitemap_xml: + # Rerender sitemap + sitemap = Sitemap(request.site) + sitemap_xml = sitemap.render() + + cache.set(cache_key, sitemap_xml, getattr(settings, 'WAGTAILSITEMAPS_CACHE_TIMEOUT', 6000)) + + # Build response + response = HttpResponse(sitemap_xml) + response['Content-Type'] = "text/xml; charset=utf-8" + + return response diff --git a/wagtail/tests/urls.py b/wagtail/tests/urls.py index d04c871a3..83e12adfb 100644 --- a/wagtail/tests/urls.py +++ b/wagtail/tests/urls.py @@ -4,6 +4,7 @@ from wagtail.wagtailcore import urls as wagtail_urls from wagtail.wagtailadmin import urls as wagtailadmin_urls from wagtail.wagtaildocs import urls as wagtaildocs_urls from wagtail.wagtailsearch.urls import frontend as wagtailsearch_frontend_urls +from wagtail.contrib.wagtailsitemaps.views import sitemap # Signal handlers from wagtail.wagtailsearch import register_signal_handlers as wagtailsearch_register_signal_handlers @@ -15,6 +16,8 @@ urlpatterns = patterns('', url(r'^search/', include(wagtailsearch_frontend_urls)), url(r'^documents/', include(wagtaildocs_urls)), + url(r'^sitemap\.xml$', sitemap), + # For anything not caught by a more specific rule above, hand over to # Wagtail's serving mechanism url(r'', include(wagtail_urls)), diff --git a/wagtail/wagtailcore/models.py b/wagtail/wagtailcore/models.py index 0a490e9c5..29cadff56 100644 --- a/wagtail/wagtailcore/models.py +++ b/wagtail/wagtailcore/models.py @@ -696,12 +696,22 @@ class Page(six.with_metaclass(PageBase, MP_Node, ClusterableModel, Indexed)): """ return ['/'] + def get_sitemap_urls(self): + latest_revision = self.get_latest_revision() + + return [ + { + 'location': self.url, + 'lastmod': latest_revision.created_at if latest_revision else None + } + ] + def get_static_site_paths(self): """ This is a generator of URL paths to feed into a static site generator Override this if you would like to create static versions of subpages """ - # Yield paths for this page + # Yield path for this page yield '/' # Yield paths for child pages