diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 89de62b..0000000
--- a/.travis.yml
+++ /dev/null
@@ -1,9 +0,0 @@
-language: python
-python:
- - "2.7"
- - "3.2"
- - "3.3"
- - "3.4"
- - "3.5"
-install: "pip install . nose mock"
-script: nosetests
diff --git a/flake.lock b/flake.lock
deleted file mode 100644
index 9b0eab5..0000000
--- a/flake.lock
+++ /dev/null
@@ -1,27 +0,0 @@
-{
- "nodes": {
- "nixpkgs": {
- "locked": {
- "lastModified": 1698611440,
- "narHash": "sha256-jPjHjrerhYDy3q9+s5EAsuhyhuknNfowY6yt6pjn9pc=",
- "owner": "NixOS",
- "repo": "nixpkgs",
- "rev": "0cbe9f69c234a7700596e943bfae7ef27a31b735",
- "type": "github"
- },
- "original": {
- "owner": "NixOS",
- "ref": "nixos-unstable",
- "repo": "nixpkgs",
- "type": "github"
- }
- },
- "root": {
- "inputs": {
- "nixpkgs": "nixpkgs"
- }
- }
- },
- "root": "root",
- "version": 7
-}
diff --git a/flake.nix b/flake.nix
deleted file mode 100644
index ffb8b7d..0000000
--- a/flake.nix
+++ /dev/null
@@ -1,29 +0,0 @@
-{
- description = "Oembed plugin flake";
- inputs = {
- nixpkgs.url = github:NixOS/nixpkgs/nixos-unstable;
- };
- outputs = { self, nixpkgs }:
- let
- pkgs = import nixpkgs {
- inherit system;
- overlays = [];
- };
- pythonPackages = pkgs.python3Packages;
- system = "x86_64-linux";
- in rec {
- devShell.x86_64-linux = pkgs.mkShell {
- buildInputs = [
- pkgs.python3
- pkgs.python3Packages.pip
- ];
- shellHook = ''
- export PS1='\u@md-oembed \$ '
- export PIP_PREFIX=$(pwd)/venv/pip_packages
- export PYTHONPATH="$PIP_PREFIX/${pkgs.python3.sitePackages}:$PYTHONPATH"
- export PATH="$PIP_PREFIX/bin:$PATH"
- unset SOURCE_DATE_EPOCH
- '';
- };
- };
-}
diff --git a/mdx_oembed/__init__.py b/mdx_oembed/__init__.py
index 4ed9c02..0a88f9d 100644
--- a/mdx_oembed/__init__.py
+++ b/mdx_oembed/__init__.py
@@ -1,8 +1,12 @@
+from __future__ import annotations
+
from mdx_oembed.extension import OEmbedExtension
from mdx_oembed.version import __version__
VERSION = __version__
+__all__ = ["OEmbedExtension", "VERSION", "__version__", "makeExtension"]
-def makeExtension(**kwargs):
+
+def makeExtension(**kwargs: object) -> OEmbedExtension: # noqa: N802
return OEmbedExtension(**kwargs)
diff --git a/mdx_oembed/endpoints.py b/mdx_oembed/endpoints.py
new file mode 100644
index 0000000..08a175a
--- /dev/null
+++ b/mdx_oembed/endpoints.py
@@ -0,0 +1,34 @@
+from __future__ import annotations
+
+from mdx_oembed.oembed import OEmbedEndpoint
+
+# URL patterns use shell-style globs with an "https?://" shorthand
+# that matches both http and https schemes.
+
+YOUTUBE = OEmbedEndpoint('https://www.youtube.com/oembed', [
+ 'https?://*.youtube.com/*',
+ 'https?://youtu.be/*',
+])
+
+SLIDESHARE = OEmbedEndpoint('https://www.slideshare.net/api/oembed/2', [
+ 'https?://www.slideshare.net/*/*',
+ 'https?://fr.slideshare.net/*/*',
+ 'https?://de.slideshare.net/*/*',
+ 'https?://es.slideshare.net/*/*',
+ 'https?://pt.slideshare.net/*/*',
+])
+
+FLICKR = OEmbedEndpoint('https://www.flickr.com/services/oembed/', [
+ 'https?://*.flickr.com/*',
+])
+
+VIMEO = OEmbedEndpoint('https://vimeo.com/api/oembed.json', [
+ 'https?://vimeo.com/*',
+])
+
+DEFAULT_ENDPOINTS = [
+ YOUTUBE,
+ FLICKR,
+ VIMEO,
+ SLIDESHARE,
+]
diff --git a/src/python_markdown_oembed_extension/oembedextension.py b/mdx_oembed/extension.py
similarity index 77%
rename from src/python_markdown_oembed_extension/oembedextension.py
rename to mdx_oembed/extension.py
index bfe3cc7..e236525 100644
--- a/src/python_markdown_oembed_extension/oembedextension.py
+++ b/mdx_oembed/extension.py
@@ -1,7 +1,10 @@
+from __future__ import annotations
+
from markdown import Extension
-import oembed
-from python_markdown_oembed_extension.endpoints import DEFAULT_ENDPOINTS
-from python_markdown_oembed_extension.inlinepatterns import OEmbedLinkPattern, OEMBED_LINK_RE
+
+from mdx_oembed.endpoints import DEFAULT_ENDPOINTS
+from mdx_oembed.inlinepatterns import OEMBED_LINK_RE, OEmbedLinkPattern
+from mdx_oembed.oembed import OEmbedConsumer
class OEmbedExtension(Extension):
@@ -21,7 +24,7 @@ class OEmbedExtension(Extension):
}
super().__init__(**kwargs)
- def extendMarkdown(self, md):
+ def extendMarkdown(self, md): # noqa: N802
consumer = self._prepare_oembed_consumer()
wrapper_class = self.getConfig('wrapper_class', 'oembed')
link_pattern = OEmbedLinkPattern(
@@ -32,8 +35,7 @@ class OEmbedExtension(Extension):
def _prepare_oembed_consumer(self):
allowed_endpoints = self.getConfig('allowed_endpoints', DEFAULT_ENDPOINTS)
- consumer = oembed.OEmbedConsumer()
+ consumer = OEmbedConsumer()
for endpoint in (allowed_endpoints or []):
- consumer.addEndpoint(endpoint)
+ consumer.add_endpoint(endpoint)
return consumer
-
diff --git a/src/python_markdown_oembed_extension/inlinepatterns.py b/mdx_oembed/inlinepatterns.py
similarity index 70%
rename from src/python_markdown_oembed_extension/inlinepatterns.py
rename to mdx_oembed/inlinepatterns.py
index 257f29b..0899223 100644
--- a/src/python_markdown_oembed_extension/inlinepatterns.py
+++ b/mdx_oembed/inlinepatterns.py
@@ -1,12 +1,17 @@
-import logging
-from posixpath import splitext
-from urllib.parse import urlparse
+from __future__ import annotations
-import nh3
-import oembed
-from markdown.inlinepatterns import InlineProcessor
+import html as _html
+import logging
+from os.path import splitext
+from urllib.parse import urlparse
from xml.etree.ElementTree import Element
+import markdown
+import nh3
+from markdown.inlinepatterns import InlineProcessor
+
+from mdx_oembed.oembed import OEmbedConsumer, OEmbedNoEndpoint
+
LOG = logging.getLogger(__name__)
# Image extensions to exclude from oEmbed processing
@@ -19,12 +24,23 @@ _IMAGE_EXTENSIONS = frozenset({
OEMBED_LINK_RE = r"!\[([^\]]*)\]\(((?:https?:)?//[^\)]+)\)"
# Allowed HTML tags and attributes for sanitizing oEmbed responses
-_SANITIZE_TAGS = {"iframe", "video", "audio", "source", "img", "blockquote", "div", "p", "a", "span", "figure"}
+_SANITIZE_TAGS = {
+ "iframe", "video", "audio", "source", "img",
+ "blockquote", "div", "p", "a", "span", "figure",
+}
_SANITIZE_ATTRS = {
"*": {"class", "style", "title"},
- "iframe": {"src", "width", "height", "frameborder", "allowfullscreen", "allow", "referrerpolicy", "sandbox"},
- "video": {"src", "width", "height", "controls", "autoplay", "loop", "muted", "poster", "preload"},
- "audio": {"src", "controls", "autoplay", "loop", "muted", "preload"},
+ "iframe": {
+ "src", "width", "height", "frameborder",
+ "allowfullscreen", "allow", "referrerpolicy", "sandbox",
+ },
+ "video": {
+ "src", "width", "height", "controls",
+ "autoplay", "loop", "muted", "poster", "preload",
+ },
+ "audio": {
+ "src", "controls", "autoplay", "loop", "muted", "preload",
+ },
"source": {"src", "type"},
"img": {"src", "alt", "width", "height", "loading"},
"a": {"href", "target"},
@@ -49,12 +65,18 @@ def _sanitize_html(html: str) -> str:
class OEmbedLinkPattern(InlineProcessor):
"""Inline processor that replaces Markdown image links with oEmbed content."""
- def __init__(self, pattern, md=None, oembed_consumer=None, wrapper_class="oembed"):
+ def __init__(
+ self,
+ pattern: str,
+ md: markdown.Markdown | None = None,
+ oembed_consumer: OEmbedConsumer | None = None,
+ wrapper_class: str = "oembed",
+ ) -> None:
super().__init__(pattern, md)
self.consumer = oembed_consumer
self.wrapper_class = wrapper_class
- def handleMatch(self, m, data):
+ def handleMatch(self, m, data): # noqa: N802
url = m.group(2).strip()
alt = m.group(1)
@@ -80,9 +102,12 @@ class OEmbedLinkPattern(InlineProcessor):
def _get_oembed_html(self, url: str, alt: str = "") -> str | None:
"""Fetch oEmbed HTML for a URL, handling different response types."""
+ if self.consumer is None:
+ LOG.warning("No oEmbed consumer configured")
+ return None
try:
response = self.consumer.embed(url)
- except oembed.OEmbedNoEndpoint:
+ except OEmbedNoEndpoint:
LOG.warning("No oEmbed endpoint for URL: %s", url)
return None
except Exception:
@@ -99,10 +124,11 @@ class OEmbedLinkPattern(InlineProcessor):
if photo_url:
width = response.get("width", "")
height = response.get("height", "")
- escaped_alt = alt.replace('"', """)
return (
- f''
+ f''
)
LOG.warning("oEmbed response for %s has no 'html' or 'url' field", url)
diff --git a/mdx_oembed/oembed.py b/mdx_oembed/oembed.py
new file mode 100644
index 0000000..75823eb
--- /dev/null
+++ b/mdx_oembed/oembed.py
@@ -0,0 +1,181 @@
+"""Minimal oEmbed consumer — replaces the python-oembed dependency.
+
+Implements just the subset used by this extension:
+ - OEmbedEndpoint: pairs an API URL with URL-glob patterns
+ - OEmbedConsumer: resolves a URL against registered endpoints and
+ fetches the oEmbed JSON response
+ - OEmbedError / OEmbedNoEndpoint: exception hierarchy
+"""
+
+from __future__ import annotations
+
+import fnmatch
+import json
+import logging
+import re
+import warnings
+from typing import Any
+from urllib.parse import urlencode
+from urllib.request import Request, urlopen
+
+from mdx_oembed.version import __version__
+
+__all__ = [
+ "OEmbedEndpoint",
+ "OEmbedConsumer",
+ "OEmbedError",
+ "OEmbedNoEndpoint",
+ "REQUEST_TIMEOUT",
+]
+
+LOG = logging.getLogger(__name__)
+
+# Default timeout (seconds) for outbound oEmbed HTTP requests.
+REQUEST_TIMEOUT = 10
+
+_USER_AGENT = f"python-markdown-oembed/{__version__}"
+
+# Pre-compiled regex for the ``https?://`` scheme shorthand used in oEmbed
+# URL patterns. Kept at module level to avoid re-creation on every call.
+_SCHEME_RE = re.compile(r"https\?://")
+_SCHEME_PLACEHOLDER = "__SCHEME__"
+
+
+# -- Exceptions -------------------------------------------------------------
+
+class OEmbedError(Exception):
+ """Base exception for oEmbed errors."""
+
+
+class OEmbedNoEndpoint(OEmbedError): # noqa: N818
+ """Raised when no registered endpoint matches the requested URL."""
+
+
+# -- Endpoint ---------------------------------------------------------------
+
+class OEmbedEndpoint:
+ """An oEmbed provider endpoint.
+
+ Parameters
+ ----------
+ api_url:
+ The provider's oEmbed API URL (e.g. ``https://www.youtube.com/oembed``).
+ url_patterns:
+ Shell-style glob patterns (with ``https?://`` shorthand) that describe
+ which content URLs this endpoint handles. The ``?`` in ``https?``
+ is treated specially: it makes the preceding ``s`` optional so a single
+ pattern can match both ``http`` and ``https``.
+ """
+
+ def __init__(self, api_url: str, url_patterns: list[str]) -> None:
+ self.api_url = api_url
+ self.url_patterns = url_patterns
+ self._regexes: list[re.Pattern[str]] = [
+ self._compile(p) for p in url_patterns
+ ]
+
+ def __repr__(self) -> str:
+ return f"OEmbedEndpoint({self.api_url!r}, {self.url_patterns!r})"
+
+ # -- internal helpers ----------------------------------------------------
+
+ @staticmethod
+ def _compile(pattern: str) -> re.Pattern[str]:
+ """Convert a URL-glob pattern to a compiled regex.
+
+ Handles the ``https?://`` convention used by oEmbed providers:
+ the ``s`` before ``?`` is made optional *before* the rest of the
+ pattern is translated via `fnmatch`.
+ """
+ converted = _SCHEME_RE.sub(_SCHEME_PLACEHOLDER, pattern)
+ # fnmatch.translate anchors with \\A … \\Z and handles */?/[] globs.
+ regex = fnmatch.translate(converted)
+ # Put the scheme alternation back.
+ regex = regex.replace(_SCHEME_PLACEHOLDER, r"https?://")
+ return re.compile(regex, re.IGNORECASE)
+
+ def matches(self, url: str) -> bool:
+ """Return True if *url* matches any of this endpoint's patterns."""
+ return any(r.match(url) for r in self._regexes)
+
+
+# -- Consumer ---------------------------------------------------------------
+
+class OEmbedConsumer:
+ """Registry of `OEmbedEndpoint` objects that can resolve arbitrary URLs.
+
+ Parameters
+ ----------
+ timeout:
+ HTTP request timeout in seconds. Defaults to :data:`REQUEST_TIMEOUT`.
+ """
+
+ def __init__(self, timeout: int = REQUEST_TIMEOUT) -> None:
+ self._endpoints: list[OEmbedEndpoint] = []
+ self.timeout = timeout
+
+ def __repr__(self) -> str:
+ names = [ep.api_url for ep in self._endpoints]
+ return f"OEmbedConsumer(endpoints={names!r})"
+
+ def add_endpoint(self, endpoint: OEmbedEndpoint) -> None:
+ """Register an oEmbed endpoint."""
+ self._endpoints.append(endpoint)
+
+ def addEndpoint(self, endpoint: OEmbedEndpoint) -> None: # noqa: N802
+ """Deprecated alias for :meth:`add_endpoint`."""
+ warnings.warn(
+ "addEndpoint() is deprecated, use add_endpoint() instead",
+ DeprecationWarning,
+ stacklevel=2,
+ )
+ self.add_endpoint(endpoint)
+
+ def embed(self, url: str) -> dict[str, Any]:
+ """Fetch the oEmbed response for *url*.
+
+ Returns the parsed JSON as a ``dict``.
+
+ Raises
+ ------
+ OEmbedNoEndpoint
+ If none of the registered endpoints match *url*.
+ OEmbedError
+ On HTTP or JSON-parsing failures.
+ """
+ endpoint = self._find_endpoint(url)
+ if endpoint is None:
+ raise OEmbedNoEndpoint(f"No oEmbed endpoint registered for {url}")
+ return self._fetch(endpoint, url)
+
+ # -- internal helpers ----------------------------------------------------
+
+ def _find_endpoint(self, url: str) -> OEmbedEndpoint | None:
+ for ep in self._endpoints:
+ if ep.matches(url):
+ return ep
+ return None
+
+ def _fetch(self, endpoint: OEmbedEndpoint, content_url: str) -> dict[str, Any]:
+ params = urlencode({"url": content_url, "format": "json"})
+ api_url = f"{endpoint.api_url}?{params}"
+ request = Request(api_url, headers={ # noqa: S310
+ "Accept": "application/json",
+ "User-Agent": _USER_AGENT,
+ })
+ LOG.debug("Fetching oEmbed: %s", api_url)
+ try:
+ with urlopen(request, timeout=self.timeout) as resp: # noqa: S310
+ if resp.status is not None and not (200 <= resp.status < 300):
+ raise OEmbedError(
+ f"oEmbed request for {content_url} returned HTTP {resp.status}"
+ )
+ charset = resp.headers.get_content_charset() or "utf-8"
+ data: dict[str, Any] = json.loads(resp.read().decode(charset))
+ except OEmbedError:
+ raise
+ except Exception as exc:
+ raise OEmbedError(
+ f"Failed to fetch oEmbed for {content_url}: {exc}"
+ ) from exc
+ return data
diff --git a/mdx_oembed/py.typed b/mdx_oembed/py.typed
new file mode 100644
index 0000000..e69de29
diff --git a/mdx_oembed/version.py b/mdx_oembed/version.py
index 6a9beea..3d18726 100644
--- a/mdx_oembed/version.py
+++ b/mdx_oembed/version.py
@@ -1 +1 @@
-__version__ = "0.4.0"
+__version__ = "0.5.0"
diff --git a/pyproject.toml b/pyproject.toml
index ff29e6b..cc595ce 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,10 +1,10 @@
[project]
name = "python-markdown-oembed-extension"
-version = "0.4.0"
+dynamic = ["version"]
description = "Markdown extension to allow media embedding using the oEmbed standard."
readme = {file = "README.md", content-type = "text/markdown"}
license = "Unlicense"
-requires-python = ">=3.9"
+requires-python = ">=3.12"
authors = [
{ name = "Benedikt Willi", email = "ben.willi@gmail.com" },
{ name = "Tanner Netterville", email = "tannern@gmail.com" },
@@ -15,14 +15,11 @@ classifiers = [
"License :: Public Domain",
"Programming Language :: Python",
"Programming Language :: Python :: 3",
- "Programming Language :: Python :: 3.9",
- "Programming Language :: Python :: 3.10",
- "Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
+ "Typing :: Typed",
]
dependencies = [
- "python-oembed>=0.2.1",
"Markdown>=3.2",
"nh3>=0.2",
]
@@ -37,6 +34,9 @@ oembed = "mdx_oembed:makeExtension"
requires = ["hatchling"]
build-backend = "hatchling.build"
+[tool.hatch.version]
+path = "mdx_oembed/version.py"
+
[tool.hatch.build.targets.sdist]
include = [
"mdx_oembed/",
@@ -52,8 +52,26 @@ packages = ["mdx_oembed"]
dev = [
"pytest>=7.0",
"pytest-mock>=3.0",
+ "ruff>=0.4",
+ "pyright>=1.1",
]
[tool.pytest.ini_options]
testpaths = ["."]
python_files = ["tests.py"]
+
+[tool.ruff]
+target-version = "py312"
+
+[tool.ruff.lint]
+select = ["E", "F", "W", "I", "UP", "N", "S", "B"]
+ignore = [
+ "S101", # assert used — standard in pytest
+]
+
+[tool.ruff.lint.per-file-ignores]
+"tests.py" = ["S106"]
+
+[tool.pyright]
+pythonVersion = "3.12"
+typeCheckingMode = "standard"
diff --git a/src/python_markdown_oembed_extension/__init__.py b/src/python_markdown_oembed_extension/__init__.py
deleted file mode 100644
index b733a04..0000000
--- a/src/python_markdown_oembed_extension/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-# -*- coding: utf-8 -*-
-from python_markdown_oembed_extension.oembedextension import OEmbedExtension
-
-
-VERSION = '0.2.2'
-
-
-def makeExtension(**kwargs):
- return OEmbedExtension(**kwargs)
diff --git a/src/python_markdown_oembed_extension/endpoints.py b/src/python_markdown_oembed_extension/endpoints.py
deleted file mode 100644
index 1f12c32..0000000
--- a/src/python_markdown_oembed_extension/endpoints.py
+++ /dev/null
@@ -1,31 +0,0 @@
-import oembed
-
-# URL patterns use python-oembed's glob-like syntax, not standard regex.
-
-YOUTUBE = oembed.OEmbedEndpoint('https://www.youtube.com/oembed', [
- 'https?://(*.)?youtube.com/*',
- 'https?://youtu.be/*',
-])
-
-SLIDESHARE = oembed.OEmbedEndpoint('https://www.slideshare.net/api/oembed/2', [
- 'https?://www.slideshare.net/*/*',
- 'https?://fr.slideshare.net/*/*',
- 'https?://de.slideshare.net/*/*',
- 'https?://es.slideshare.net/*/*',
- 'https?://pt.slideshare.net/*/*',
-])
-
-FLICKR = oembed.OEmbedEndpoint('https://www.flickr.com/services/oembed/', [
- 'https?://*.flickr.com/*',
-])
-
-VIMEO = oembed.OEmbedEndpoint('https://vimeo.com/api/oembed.json', [
- 'https?://vimeo.com/*',
-])
-
-DEFAULT_ENDPOINTS = [
- YOUTUBE,
- FLICKR,
- VIMEO,
- SLIDESHARE,
-]
diff --git a/src/python_markdown_oembed_extension/tests/test_expectedHtml.html b/src/python_markdown_oembed_extension/tests/test_expectedHtml.html
deleted file mode 100644
index 0740525..0000000
--- a/src/python_markdown_oembed_extension/tests/test_expectedHtml.html
+++ /dev/null
@@ -1,9 +0,0 @@
-
In this video Jakob Zinsstag introduces the topic of the course. You will
-discover that the relationship between humans and animals is manifold.
-{.lead}
-
-
Have a look at the farm of Jakob Zinsstag’s cousin in the Canton of Jura,
-Switzerland. Different animals create different feelings: there are those we
-love, some provoke fears and others will be eaten. Jakob Zinsstag shares the
-personal experiences he has had with animals.
-
How do you categorise your own experience with animals?