diff --git a/MANIFEST.in b/MANIFEST.in
deleted file mode 100644
index 8d94d1f..0000000
--- a/MANIFEST.in
+++ /dev/null
@@ -1 +0,0 @@
-include README.markdown LICENSE
diff --git a/README.markdown b/README.markdown
deleted file mode 100644
index 3038c61..0000000
--- a/README.markdown
+++ /dev/null
@@ -1,41 +0,0 @@
-# Python Markdown oEmbed
-
-[](https://travis-ci.org/rennat/python-markdown-oembed)
-
-Markdown extension to allow media embedding using the oEmbed standard.
-
-## Installation
-
- pip install python-markdown-oembed
-
-## Usage
-
- >>> import markdown
- >>> md = markdown.Markdown(extensions=['oembed'])
- >>> md.convert('')
- u''
-
-## Links
-
-- [python-markdown-oembed](https://github.com/rennat/python-markdown-oembed)
-- [Markdown](http://daringfireball.net/projects/markdown/)
-- [oEmbed](http://www.oembed.com/)
-- [python-oembed](https://github.com/abarmat/python-oembed)
-
-## License
-
-A Public Domain work. Do as you wish.
-
-## Changelog
-
-### 0.2.1
-
-- add Slideshare endpoint (thanks to [anantshri](https://github.com/anantshri))
-
-### 0.2.0
-
-- backwards incompatible changes
- - allows arbitrary endpoints ([commit](https://github.com/Wenzil/python-markdown-oembed/commit/1e89de9db5e63677e071c36503e2499bbe0792da))
- - works with modern Markdown (>=2.6)
- - dropped support for python 2.6
-- added support python 3.x
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..8d0cc32
--- /dev/null
+++ b/README.md
@@ -0,0 +1,98 @@
+# Python Markdown oEmbed
+
+Markdown extension to allow media embedding using the oEmbed standard.
+
+## Requirements
+
+- Python >= 3.9
+- Markdown >= 3.2
+
+## Installation
+
+ pip install python-markdown-oembed
+
+Or with [uv](https://docs.astral.sh/uv/):
+
+ uv add python-markdown-oembed
+
+## Usage
+
+```python
+import markdown
+md = markdown.Markdown(extensions=['oembed'])
+md.convert('')
+```
+
+Output is wrapped in a `` element by default:
+
+```html
+
+```
+
+### Configuration
+
+| Option | Default | Description |
+|--------|---------|-------------|
+| `allowed_endpoints` | YouTube, Flickr, Vimeo, Slideshare | List of `oembed.OEmbedEndpoint` objects |
+| `wrapper_class` | `"oembed"` | CSS class(es) for the `` wrapper. Set to `""` to disable wrapping |
+
+Example with custom configuration:
+
+```python
+from mdx_oembed.endpoints import YOUTUBE, VIMEO
+
+md = markdown.Markdown(
+ extensions=['oembed'],
+ extension_configs={
+ 'oembed': {
+ 'allowed_endpoints': [YOUTUBE, VIMEO],
+ 'wrapper_class': 'embed-responsive',
+ }
+ }
+)
+```
+
+## Security
+
+oEmbed HTML responses are sanitized using [nh3](https://github.com/messense/nh3)
+to prevent XSS from compromised oEmbed providers. Only safe tags (`iframe`,
+`video`, `audio`, `img`, etc.) and attributes are allowed.
+
+## Links
+
+- [python-markdown-oembed](https://github.com/rennat/python-markdown-oembed)
+- [Markdown](http://daringfireball.net/projects/markdown/)
+- [oEmbed](http://www.oembed.com/)
+- [python-oembed](https://github.com/abarmat/python-oembed)
+
+## License
+
+A Public Domain work. Do as you wish.
+
+## Changelog
+
+### 0.3.0
+
+- **Breaking:** requires Python >= 3.9 and Markdown >= 3.2
+- Migrated from deprecated `Pattern` to `InlineProcessor` (Markdown 3.2+ compatible)
+- Added HTML sanitization of oEmbed responses (XSS protection via nh3)
+- Added support for oEmbed `photo` type responses
+- Improved image URL detection (case-insensitive, handles query strings)
+- All oEmbed API endpoints now use HTTPS
+- Slideshare URL patterns now accept both HTTP and HTTPS
+- Configurable `` wrapper class (previously hardcoded Bootstrap classes)
+- Migrated to `pyproject.toml` with hatchling build backend
+- Tests modernized: uses pytest + unittest.mock, all HTTP calls mocked
+- Centralized version management in `mdx_oembed/version.py`
+
+### 0.2.1
+
+- add Slideshare endpoint (thanks to [anantshri](https://github.com/anantshri))
+
+### 0.2.0
+
+- backwards incompatible changes
+ - allows arbitrary endpoints ([commit](https://github.com/Wenzil/python-markdown-oembed/commit/1e89de9db5e63677e071c36503e2499bbe0792da))
+ - works with modern Markdown (>=2.6)
+ - dropped support for python 2.6
+- added support python 3.x
diff --git a/mdx_oembed/__init__.py b/mdx_oembed/__init__.py
index b783ea7..4ed9c02 100644
--- a/mdx_oembed/__init__.py
+++ b/mdx_oembed/__init__.py
@@ -1,8 +1,7 @@
-# -*- coding: utf-8 -*-
from mdx_oembed.extension import OEmbedExtension
+from mdx_oembed.version import __version__
-
-VERSION = '0.2.1'
+VERSION = __version__
def makeExtension(**kwargs):
diff --git a/mdx_oembed/endpoints.py b/mdx_oembed/endpoints.py
index 6946822..1f12c32 100644
--- a/mdx_oembed/endpoints.py
+++ b/mdx_oembed/endpoints.py
@@ -1,20 +1,21 @@
-# -*- coding: utf-8 -*-
import oembed
+# URL patterns use python-oembed's glob-like syntax, not standard regex.
+
YOUTUBE = oembed.OEmbedEndpoint('https://www.youtube.com/oembed', [
'https?://(*.)?youtube.com/*',
'https?://youtu.be/*',
])
-SLIDESHARE = oembed.OEmbedEndpoint('http://www.slideshare.net/api/oembed/2', [
- 'http://www.slideshare.net/*/*',
- 'http://fr.slideshare.net/*/*',
- 'http://de.slideshare.net/*/*',
- 'http://es.slideshare.net/*/*',
- 'http://pt.slideshare.net/*/*',
+SLIDESHARE = oembed.OEmbedEndpoint('https://www.slideshare.net/api/oembed/2', [
+ 'https?://www.slideshare.net/*/*',
+ 'https?://fr.slideshare.net/*/*',
+ 'https?://de.slideshare.net/*/*',
+ 'https?://es.slideshare.net/*/*',
+ 'https?://pt.slideshare.net/*/*',
])
-FLICKR = oembed.OEmbedEndpoint('http://www.flickr.com/services/oembed/', [
+FLICKR = oembed.OEmbedEndpoint('https://www.flickr.com/services/oembed/', [
'https?://*.flickr.com/*',
])
@@ -26,5 +27,5 @@ DEFAULT_ENDPOINTS = [
YOUTUBE,
FLICKR,
VIMEO,
- SLIDESHARE
+ SLIDESHARE,
]
diff --git a/mdx_oembed/extension.py b/mdx_oembed/extension.py
index a55c235..14607dd 100644
--- a/mdx_oembed/extension.py
+++ b/mdx_oembed/extension.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
from markdown import Extension
import oembed
from mdx_oembed.endpoints import DEFAULT_ENDPOINTS
@@ -11,24 +10,29 @@ class OEmbedExtension(Extension):
self.config = {
'allowed_endpoints': [
DEFAULT_ENDPOINTS,
- "A list of oEmbed endpoints to allow. Defaults to "
- "endpoints.DEFAULT_ENDPOINTS"
+ "A list of oEmbed endpoints to allow. "
+ "Defaults to endpoints.DEFAULT_ENDPOINTS",
+ ],
+ 'wrapper_class': [
+ 'oembed',
+ "CSS class(es) for the wrapper element. "
+ "Set to empty string to disable wrapping.",
],
}
- super(OEmbedExtension, self).__init__(**kwargs)
+ super().__init__(**kwargs)
def extendMarkdown(self, md):
- self.oembed_consumer = self.prepare_oembed_consumer()
- link_pattern = OEmbedLinkPattern(OEMBED_LINK_RE, md,
- self.oembed_consumer)
+ consumer = self._prepare_oembed_consumer()
+ wrapper_class = self.getConfig('wrapper_class', 'oembed')
+ link_pattern = OEmbedLinkPattern(
+ OEMBED_LINK_RE, md, consumer, wrapper_class=wrapper_class,
+ )
+ # Priority 175 — run before the default image pattern (priority 150)
md.inlinePatterns.register(link_pattern, 'oembed_link', 175)
- def prepare_oembed_consumer(self):
+ def _prepare_oembed_consumer(self):
allowed_endpoints = self.getConfig('allowed_endpoints', DEFAULT_ENDPOINTS)
consumer = oembed.OEmbedConsumer()
-
- if allowed_endpoints:
- for endpoint in allowed_endpoints:
- consumer.addEndpoint(endpoint)
-
+ for endpoint in (allowed_endpoints or []):
+ consumer.addEndpoint(endpoint)
return consumer
diff --git a/mdx_oembed/inlinepatterns.py b/mdx_oembed/inlinepatterns.py
index a43f44b..257f29b 100644
--- a/mdx_oembed/inlinepatterns.py
+++ b/mdx_oembed/inlinepatterns.py
@@ -1,40 +1,109 @@
-# -*- coding: utf-8 -*-
import logging
-from markdown.inlinepatterns import Pattern
-import oembed
+from posixpath import splitext
+from urllib.parse import urlparse
+import nh3
+import oembed
+from markdown.inlinepatterns import InlineProcessor
+from xml.etree.ElementTree import Element
LOG = logging.getLogger(__name__)
+# Image extensions to exclude from oEmbed processing
+_IMAGE_EXTENSIONS = frozenset({
+ ".png", ".jpg", ".jpeg", ".gif", ".avif", ".webp",
+ ".svg", ".bmp", ".tiff", ".ico",
+})
-OEMBED_LINK_RE = r'\!\[([^\]]*)\]\(((?:https?:)?//[^\)]*)' \
- r'(? bool:
+ """Check if a URL points to an image based on its path extension."""
+ try:
+ path = urlparse(url).path
+ _, ext = splitext(path)
+ return ext.lower() in _IMAGE_EXTENSIONS
+ except Exception:
+ return False
- def __init__(self, pattern, md=None, oembed_consumer=None):
- Pattern.__init__(self, pattern, md=md)
+
+def _sanitize_html(html: str) -> str:
+ """Sanitize oEmbed HTML to prevent XSS."""
+ return nh3.clean(html, tags=_SANITIZE_TAGS, attributes=_SANITIZE_ATTRS)
+
+
+class OEmbedLinkPattern(InlineProcessor):
+ """Inline processor that replaces Markdown image links with oEmbed content."""
+
+ def __init__(self, pattern, md=None, oembed_consumer=None, wrapper_class="oembed"):
+ super().__init__(pattern, md)
self.consumer = oembed_consumer
+ self.wrapper_class = wrapper_class
- def handleMatch(self, match):
- html = self.get_oembed_html_for_match(match)
+ def handleMatch(self, m, data):
+ url = m.group(2).strip()
+ alt = m.group(1)
+
+ # Skip image URLs — let Markdown's default image handler process them
+ if _is_image_url(url):
+ return None, None, None
+
+ html = self._get_oembed_html(url, alt)
if html is None:
- return None
- else:
- html = f'{ html }'
- placeholder = self.md.htmlStash.store(html)
- return placeholder
+ return None, None, None
- def get_oembed_html_for_match(self, match):
- url = match.group(3).strip()
+ html = _sanitize_html(html)
+ if self.wrapper_class:
+ html = f'{html}'
+
+ # Stash raw HTML so it survives Markdown's escaping; place the
+ # placeholder inside an inline element that the tree-processor will
+ # later replace with the real HTML.
+ placeholder = self.md.htmlStash.store(html)
+ el = Element("span")
+ el.text = placeholder
+ return el, m.start(0), m.end(0)
+
+ def _get_oembed_html(self, url: str, alt: str = "") -> str | None:
+ """Fetch oEmbed HTML for a URL, handling different response types."""
try:
response = self.consumer.embed(url)
except oembed.OEmbedNoEndpoint:
- LOG.error("No OEmbed Endpoint")
+ LOG.warning("No oEmbed endpoint for URL: %s", url)
return None
- except Exception as e:
- LOG.error(e)
+ except Exception:
+ LOG.exception("Error fetching oEmbed for URL: %s", url)
return None
- else:
- return response['html']
+
+ # oEmbed 'video' and 'rich' types include an 'html' field
+ html = response.get("html")
+ if html:
+ return html
+
+ # oEmbed 'photo' type — construct an
tag
+ photo_url = response.get("url")
+ if photo_url:
+ width = response.get("width", "")
+ height = response.get("height", "")
+ escaped_alt = alt.replace('"', """)
+ return (
+ f'
'
+ )
+
+ LOG.warning("oEmbed response for %s has no 'html' or 'url' field", url)
+ return None
diff --git a/mdx_oembed/version.py b/mdx_oembed/version.py
index e69de29..493f741 100644
--- a/mdx_oembed/version.py
+++ b/mdx_oembed/version.py
@@ -0,0 +1 @@
+__version__ = "0.3.0"
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..34695a5
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,58 @@
+[project]
+name = "python-markdown-oembed"
+version = "0.3.0"
+description = "Markdown extension to allow media embedding using the oEmbed standard."
+readme = {file = "README.md", content-type = "text/markdown"}
+license = "Unlicense"
+requires-python = ">=3.9"
+authors = [
+ { name = "Tanner Netterville", email = "tannern@gmail.com" },
+]
+keywords = ["markdown", "oembed"]
+classifiers = [
+ "Development Status :: 4 - Beta",
+ "License :: Public Domain",
+ "Programming Language :: Python",
+ "Programming Language :: Python :: 3",
+ "Programming Language :: Python :: 3.9",
+ "Programming Language :: Python :: 3.10",
+ "Programming Language :: Python :: 3.11",
+ "Programming Language :: Python :: 3.12",
+ "Programming Language :: Python :: 3.13",
+]
+dependencies = [
+ "python-oembed>=0.2.1",
+ "Markdown>=3.2",
+ "nh3>=0.2",
+]
+
+[project.urls]
+Homepage = "https://github.com/rennat/python-markdown-oembed"
+
+[project.entry-points."markdown.extensions"]
+oembed = "mdx_oembed:makeExtension"
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[tool.hatch.build.targets.sdist]
+include = [
+ "mdx_oembed/",
+ "tests.py",
+ "README.md",
+ "LICENSE",
+]
+
+[tool.hatch.build.targets.wheel]
+packages = ["mdx_oembed"]
+
+[dependency-groups]
+dev = [
+ "pytest>=7.0",
+ "pytest-mock>=3.0",
+]
+
+[tool.pytest.ini_options]
+testpaths = ["."]
+python_files = ["tests.py"]
diff --git a/setup.cfg b/setup.cfg
deleted file mode 100644
index b8a1655..0000000
--- a/setup.cfg
+++ /dev/null
@@ -1,3 +0,0 @@
-[bdist_wheel]
-universal = 1
-
diff --git a/setup.py b/setup.py
deleted file mode 100644
index 5535683..0000000
--- a/setup.py
+++ /dev/null
@@ -1,51 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-try:
- from setuptools import setup
-except ImportError:
- from distutils.core import setup
-
-
-try:
- with open('README.markdown', 'r') as readme:
- LONG_DESCRIPTION = readme.read()
-except Exception:
- LONG_DESCRIPTION = None
-
-
-setup(
- name='python-markdown-oembed',
- version='0.2.1',
- description="Markdown extension to allow media embedding using the oEmbed "
- "standard.",
- long_description=LONG_DESCRIPTION,
- author='Tanner Netterville',
- author_email='tannern@gmail.com',
- url='https://github.com/rennat/python-markdown-oembed',
- license='Public Domain',
- classifiers=(
- "Development Status :: 4 - Beta",
- "License :: Public Domain",
- "Programming Language :: Python",
- "Programming Language :: Python :: 2.7",
- "Programming Language :: Python :: 3.2",
- "Programming Language :: Python :: 3.3",
- "Programming Language :: Python :: 3.4",
- "Programming Language :: Python :: 3.5",
- ),
- keywords='markdown oembed',
-
- packages=[
- 'mdx_oembed',
- ],
- install_requires=[
- "python-oembed >= 0.2.1",
- "Markdown >= 2.6.1",
- ],
-
- test_suite='nose.collector',
- tests_require=[
- 'nose',
- 'mock'
- ]
-)
diff --git a/tests.py b/tests.py
index 8cc8aad..f706078 100644
--- a/tests.py
+++ b/tests.py
@@ -1,191 +1,279 @@
-# -*- coding: utf-8 -*-
import re
import unittest
+from unittest.mock import MagicMock, patch
+
import markdown
-from mock import patch
-from nose.plugins.skip import SkipTest
-from mdx_oembed.extension import OEMBED_LINK_RE
+
from mdx_oembed import endpoints
+from mdx_oembed.inlinepatterns import OEMBED_LINK_RE, _is_image_url, _sanitize_html
-class OEmbedPatternRegexTestCase(unittest.TestCase):
+# ---------------------------------------------------------------------------
+# Regex tests
+# ---------------------------------------------------------------------------
+
+class TestOEmbedRegex(unittest.TestCase):
+ """Tests for the raw OEMBED_LINK_RE pattern."""
+
def setUp(self):
self.re = re.compile(OEMBED_LINK_RE)
+ # --- should NOT match (relative URLs) ---
+
def test_ignore_relative_image_link(self):
- text = ''
- match = self.re.match(text)
- self.assertIsNone(match)
+ assert self.re.search("") is None
- def test_ignore_absolute_image_link(self):
- text = ''
- match = self.re.match(text)
- self.assertIsNone(match)
+ # --- should match (absolute URLs — image filtering is in Python now) ---
- def test_ignore_png_image_link(self):
- text = ''
- match = self.re.match(text)
- self.assertIsNone(match)
+ def test_match_absolute_url(self):
+ m = self.re.search("")
+ assert m is not None
- def test_ignore_jpg_image_link(self):
- text = ''
- match = self.re.match(text)
- self.assertIsNone(match)
+ def test_match_youtube_link(self):
+ m = self.re.search("")
+ assert m is not None
+ assert m.group(2) == "http://www.youtube.com/watch?v=ABC"
- def test_ignore_gif_image_link(self):
- text = ''
- match = self.re.match(text)
- self.assertIsNone(match)
+ def test_match_youtube_short_link(self):
+ m = self.re.search("")
+ assert m is not None
- def test_find_youtube_link(self):
- text = ''
- match = self.re.match(text)
- self.assertIsNotNone(match)
+ def test_match_https(self):
+ m = self.re.search("")
+ assert m is not None
- def test_find_youtube_short_link(self):
- text = ''
- match = self.re.match(text)
- self.assertIsNotNone(match)
+ def test_match_protocol_relative(self):
+ m = self.re.search("")
+ assert m is not None
- def test_find_youtube_http(self):
- text = ''
- match = self.re.match(text)
- self.assertIsNotNone(match)
-
- def test_find_youtube_https(self):
- text = ''
- match = self.re.match(text)
- self.assertIsNotNone(match)
-
- def test_find_youtube_auto(self):
- text = ''
- match = self.re.match(text)
- self.assertIsNotNone(match)
+ def test_alt_text_captured(self):
+ m = self.re.search("")
+ assert m is not None
+ assert m.group(1) == "my alt text"
-class OEmbedExtensionTestCase(unittest.TestCase):
- def setUp(self):
- self.markdown = markdown.Markdown(extensions=['oembed'])
+# ---------------------------------------------------------------------------
+# Image URL detection
+# ---------------------------------------------------------------------------
- def assert_convert(self, text, expected):
- with patch('oembed.OEmbedEndpoint') as MockOEmbedEndpoint:
- MockOEmbedEndpoint.get.return_value = expected
- output = self.markdown.convert(text)
- self.assertEqual(output, expected)
+class TestIsImageUrl(unittest.TestCase):
+
+ def test_common_extensions(self):
+ for ext in ("png", "jpg", "jpeg", "gif", "webp", "avif", "svg", "bmp", "tiff", "ico"):
+ assert _is_image_url(f"http://example.com/photo.{ext}") is True, ext
+
+ def test_case_insensitive(self):
+ assert _is_image_url("http://example.com/Photo.PNG") is True
+ assert _is_image_url("http://example.com/photo.JpEg") is True
+
+ def test_query_string_ignored(self):
+ assert _is_image_url("http://example.com/photo.jpg?size=large") is True
+
+ def test_non_image(self):
+ assert _is_image_url("http://www.youtube.com/watch?v=ABC") is False
+
+ def test_no_extension(self):
+ assert _is_image_url("http://example.com/embed") is False
-class IgnoredTestCase(OEmbedExtensionTestCase):
- """
- The OEmbedExtension should ignore these tags allowing markdown's image
- processor to find and handle them.
- """
+# ---------------------------------------------------------------------------
+# HTML sanitization
+# ---------------------------------------------------------------------------
- def test_relative(self):
- text = ''
- expected = '
'
- output = self.markdown.convert(text)
- self.assertEqual(output, expected)
+class TestSanitizeHtml(unittest.TestCase):
- def test_slash_relative(self):
- text = ''
- expected = '
'
- output = self.markdown.convert(text)
- self.assertEqual(output, expected)
+ def test_allows_iframe(self):
+ html = ''
+ result = _sanitize_html(html)
+ assert ""):
+ """Create a mock OEmbedConsumer that returns the given HTML."""
+ consumer = MagicMock()
+ response = MagicMock()
+ response.get = lambda key, default=None: {"html": html_response, "type": "video"}.get(key, default)
+ response.__getitem__ = lambda self_inner, key: {"html": html_response, "type": "video"}[key]
+ consumer.embed.return_value = response
+ return consumer
-class YoutubeTestCase(OEmbedExtensionTestCase):
- """
- The OEmbedExtension should handle embedding for these cases.
- """
-
- def test_youtube_link(self):
- """
- YouTube video link.
- """
- text = ''
- output = self.markdown.convert(text)
- self.assertIn('", "type": "video"}
+ resp.get = lambda key, default=None: data.get(key, default)
+ resp.__getitem__ = lambda self_inner, key: data[key]
+ return resp
+ raise _oembed.OEmbedNoEndpoint("nope")
+
+ consumer = MagicMock()
+ consumer.embed.side_effect = side_effect
+
+ with patch("mdx_oembed.extension.oembed.OEmbedConsumer", return_value=consumer):
+ md = markdown.Markdown(
+ extensions=["oembed"],
+ extension_configs={
+ "oembed": {"allowed_endpoints": [endpoints.YOUTUBE]},
+ },
+ )
+ yt_output = md.convert("")
+ assert "