diff --git a/eav/logic/slug.py b/eav/logic/slug.py index 4984566..89f0410 100644 --- a/eav/logic/slug.py +++ b/eav/logic/slug.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import secrets import string from typing import Final @@ -6,17 +8,53 @@ from django.utils.text import slugify SLUGFIELD_MAX_LENGTH: Final = 50 +def non_identifier_chars() -> dict[str, str]: + """Generate a mapping of non-identifier characters to their Unicode representations. -def generate_slug(name: str) -> str: - """Generates a valid slug based on ``name``.""" - slug = slugify(name, allow_unicode=False) + Returns: + dict[str, str]: A dictionary where keys are special characters and values + are their Unicode representations. + """ + # Start with all printable characters + all_chars = string.printable + # Filter out characters that are valid in Python identifiers + special_chars = [ + char for char in all_chars + if not char.isalnum() and char not in ["_", " "] + ] + + return {char: f"u{ord(char):04x}" for char in special_chars} + +def generate_slug(value: str) -> str: + """Generate a valid slug based on the given value. + + This function converts the input value into a Python-identifier-friendly slug. + It handles special characters, ensures a valid Python identifier, and truncates + the result to fit within the maximum allowed length. + + Args: + value (str): The input string to generate a slug from. + + Returns: + str: A valid Python identifier slug, with a maximum + length of SLUGFIELD_MAX_LENGTH. + """ + for char, replacement in non_identifier_chars().items(): + value = value.replace(char, replacement) + + # Use slugify to create a URL-friendly base slug. + slug = slugify(value, allow_unicode=False).replace("-", "_") + + # If slugify returns an empty string, generate a fallback + # slug to ensure it's never empty. if not slug: - # Fallback to ensure a slug is always generated by using a random one chars = string.ascii_lowercase + string.digits - randstr = ''.join(secrets.choice(chars) for _ in range(8)) - slug = 'rand-{0}'.format(randstr) + randstr = "".join(secrets.choice(chars) for _ in range(8)) + slug = f"rand_{randstr}" - slug = slug.encode('utf-8', 'surrogateescape').decode() + # Ensure the slug doesn't start with a digit to make it a valid Python identifier. + if slug[0].isdigit(): + slug = "_" + slug return slug[:SLUGFIELD_MAX_LENGTH] diff --git a/tests/test_logic.py b/tests/test_logic.py index 6169a66..db26567 100644 --- a/tests/test_logic.py +++ b/tests/test_logic.py @@ -1,3 +1,4 @@ +import pytest from hypothesis import given from hypothesis import strategies as st @@ -18,3 +19,53 @@ def test_generate_long_slug_text(name: str) -> None: slug = generate_slug(name) assert len(slug) <= SLUGFIELD_MAX_LENGTH + + +def test_generate_slug_uniqueness() -> None: + """Test that generate_slug() produces unique slugs for different inputs. + + This test ensures that even similar inputs result in unique slugs, + and that the number of unique slugs matches the number of inputs. + """ + inputs = ["age #", "age %", "age $", "age @", "age!", "age?", "age 😊"] + + generated_slugs: dict[str, str] = {} + for input_str in inputs: + slug = generate_slug(input_str) + assert slug not in generated_slugs.values(), \ + f"Duplicate slug '{slug}' generated for input '{input_str}'" + generated_slugs[input_str] = slug + + assert len(generated_slugs) == len(inputs), \ + "Number of unique slugs doesn't match number of inputs" + + +@pytest.mark.parametrize("input_str", [ + "01 age", + "? age", + "age 😊", + "class", + "def function", + "2nd place", + "@username", + "user-name", + "first.last", + "snake_case", + "CamelCase", + " " # Empty +]) +def test_generate_slug_valid_identifier(input_str: str) -> None: + """Test that generate_slug() produces valid Python identifiers. + + This test ensures that the generated slugs are valid Python identifiers + for a variety of input strings, including those with numbers, special + characters, emojis, and different naming conventions. + + Args: + input_str (str): The input string to test. + """ + slug = generate_slug(input_str) + assert slug.isidentifier(), ( + f"Generated slug '{slug}' for input '{input_str}' " + "is not a valid Python identifier" + )