refactor(slug): ensure generated slugs are valid Python identifiers

2026-05-08 07:34:50 +00:00 · 2024-08-31 18:12:45 -07:00 · 2024-08-31 18:12:45 -07:00 · 6f141ff4f2
commit 6f141ff4f2
parent ab23ba118d
2 changed files with 96 additions and 7 deletions
--- a/eav/logic/slug.py
+++ b/eav/logic/slug.py
@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import secrets
 import string
 from typing import Final
@ -6,17 +8,53 @@ from django.utils.text import slugify

 SLUGFIELD_MAX_LENGTH: Final = 50

+def non_identifier_chars() -> dict[str, str]:
+    """Generate a mapping of non-identifier characters to their Unicode representations.

-def generate_slug(name: str) -> str:
-    """Generates a valid slug based on ``name``."""
-    slug = slugify(name, allow_unicode=False)
+    Returns:
+        dict[str, str]: A dictionary where keys are special characters and values
+            are their Unicode representations.
+    """
+    # Start with all printable characters
+    all_chars = string.printable

+    # Filter out characters that are valid in Python identifiers
+    special_chars = [
+        char for char in all_chars
+        if not char.isalnum() and char not in ["_", " "]
+    ]
+
+    return {char: f"u{ord(char):04x}" for char in special_chars}
+
+def generate_slug(value: str) -> str:
+    """Generate a valid slug based on the given value.
+
+    This function converts the input value into a Python-identifier-friendly slug.
+    It handles special characters, ensures a valid Python identifier, and truncates
+    the result to fit within the maximum allowed length.
+
+    Args:
+        value (str): The input string to generate a slug from.
+
+    Returns:
+        str: A valid Python identifier slug, with a maximum
+            length of SLUGFIELD_MAX_LENGTH.
+    """
+    for char, replacement in non_identifier_chars().items():
+        value = value.replace(char, replacement)
+
+    # Use slugify to create a URL-friendly base slug.
+    slug = slugify(value, allow_unicode=False).replace("-", "_")
+
+    # If slugify returns an empty string, generate a fallback
+    # slug to ensure it's never empty.
    if not slug:
-        # Fallback to ensure a slug is always generated by using a random one
        chars = string.ascii_lowercase + string.digits
-        randstr = ''.join(secrets.choice(chars) for _ in range(8))
-        slug = 'rand-{0}'.format(randstr)
+        randstr = "".join(secrets.choice(chars) for _ in range(8))
+        slug = f"rand_{randstr}"

-    slug = slug.encode('utf-8', 'surrogateescape').decode()
+    # Ensure the slug doesn't start with a digit to make it a valid Python identifier.
+    if slug[0].isdigit():
+        slug = "_" + slug

    return slug[:SLUGFIELD_MAX_LENGTH]
--- a/tests/test_logic.py
+++ b/tests/test_logic.py
@ -1,3 +1,4 @@
+import pytest
 from hypothesis import given
 from hypothesis import strategies as st

@ -18,3 +19,53 @@ def test_generate_long_slug_text(name: str) -> None:
    slug = generate_slug(name)

    assert len(slug) <= SLUGFIELD_MAX_LENGTH
+
+
+def test_generate_slug_uniqueness() -> None:
+    """Test that generate_slug() produces unique slugs for different inputs.
+
+    This test ensures that even similar inputs result in unique slugs,
+    and that the number of unique slugs matches the number of inputs.
+    """
+    inputs = ["age #", "age %", "age $", "age @", "age!", "age?", "age 😊"]
+
+    generated_slugs: dict[str, str] = {}
+    for input_str in inputs:
+        slug = generate_slug(input_str)
+        assert slug not in generated_slugs.values(), \
+            f"Duplicate slug '{slug}' generated for input '{input_str}'"
+        generated_slugs[input_str] = slug
+
+    assert len(generated_slugs) == len(inputs), \
+        "Number of unique slugs doesn't match number of inputs"
+
+
+@pytest.mark.parametrize("input_str", [
+    "01 age",
+    "? age",
+    "age 😊",
+    "class",
+    "def function",
+    "2nd place",
+    "@username",
+    "user-name",
+    "first.last",
+    "snake_case",
+    "CamelCase",
+    "  " # Empty
+])
+def test_generate_slug_valid_identifier(input_str: str) -> None:
+    """Test that generate_slug() produces valid Python identifiers.
+
+    This test ensures that the generated slugs are valid Python identifiers
+    for a variety of input strings, including those with numbers, special
+    characters, emojis, and different naming conventions.
+
+    Args:
+        input_str (str): The input string to test.
+    """
+    slug = generate_slug(input_str)
+    assert slug.isidentifier(), (
+        f"Generated slug '{slug}' for input '{input_str}' "
+        "is not a valid Python identifier"
+    )