diff --git a/docs/help.md b/docs/help.md index 8dcd88f..75e9371 100644 --- a/docs/help.md +++ b/docs/help.md @@ -129,6 +129,7 @@ Options: --async Run prompt asynchronously -u, --usage Show token usage -x, --extract Extract first fenced code block + --xl, --extract-last Extract last fenced code block --help Show this message and exit. ``` @@ -302,6 +303,7 @@ Options: -u, --usage Include token usage -r, --response Just output the last response -x, --extract Extract first fenced code block + --xl, --extract-last Extract last fenced code block -c, --current Show logs from the current conversation --cid, --conversation TEXT Show logs for this conversation ID --json Output logs as JSON diff --git a/docs/logging.md b/docs/logging.md index 508f22b..3f0926a 100644 --- a/docs/logging.md +++ b/docs/logging.md @@ -64,7 +64,11 @@ llm logs -r Use `-x/--extract` to extract and return the first fenced code block from the selected log entries: ```bash -llm logs -x +llm logs --extract +``` +Or `--xl/--extract-last` for the last fenced code block: +```bash +llm logs --extract-last ``` Add `--json` to get the log messages in JSON instead: diff --git a/docs/usage.md b/docs/usage.md index 3420c27..b2847ae 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -61,6 +61,8 @@ def my_function(): ```` It will extract and returns just the content of that block, excluding the fenced coded delimiters. If there are no fenced code blocks it will return the full response. +Use `--xl/--extract-last` to return the last fenced code block instead of the first. + The entire response including explanatory text is still logged to the database, and can be viewed using `llm logs -c`. (usage-attachments)= diff --git a/llm/cli.py b/llm/cli.py index 1bde74b..3f5fffb 100644 --- a/llm/cli.py +++ b/llm/cli.py @@ -37,7 +37,7 @@ from .utils import ( mimetype_from_path, mimetype_from_string, token_usage_string, - extract_first_fenced_code_block, + extract_fenced_code_block, ) import base64 import httpx @@ -210,6 +210,13 @@ def cli(): @click.option("async_", "--async", is_flag=True, help="Run prompt asynchronously") @click.option("-u", "--usage", is_flag=True, help="Show token usage") @click.option("-x", "--extract", is_flag=True, help="Extract first fenced code block") +@click.option( + "extract_last", + "--xl", + "--extract-last", + is_flag=True, + help="Extract last fenced code block", +) def prompt( prompt, system, @@ -229,6 +236,7 @@ def prompt( async_, usage, extract, + extract_last, ): """ Execute a prompt @@ -318,6 +326,8 @@ def prompt( to_save["defaults"] = dict(param) if extract: to_save["extract"] = True + if extract_last: + to_save["extract_last"] = True path.write_text( yaml.dump( to_save, @@ -335,6 +345,7 @@ def prompt( raise click.ClickException("Cannot use -t/--template and --system together") template_obj = load_template(template) extract = template_obj.extract + extract_last = template_obj.extract_last prompt = read_prompt() try: prompt, system = template_obj.evaluate(prompt, params) @@ -343,7 +354,7 @@ def prompt( if model_id is None and template_obj.model: model_id = template_obj.model - if extract: + if extract or extract_last: no_stream = True conversation = None @@ -427,8 +438,10 @@ def prompt( **validated_options, ) text = await response.text() - if extract: - text = extract_first_fenced_code_block(text) or text + if extract or extract_last: + text = ( + extract_fenced_code_block(text, last=extract_last) or text + ) print(text) return response @@ -447,8 +460,8 @@ def prompt( print("") else: text = response.text() - if extract: - text = extract_first_fenced_code_block(text) or text + if extract or extract_last: + text = extract_fenced_code_block(text, last=extract_last) or text print(text) # List of exceptions that should never be raised in pytest: except (ValueError, NotImplementedError) as ex: @@ -862,6 +875,13 @@ order by prompt_attachments."order" @click.option("-u", "--usage", is_flag=True, help="Include token usage") @click.option("-r", "--response", is_flag=True, help="Just output the last response") @click.option("-x", "--extract", is_flag=True, help="Extract first fenced code block") +@click.option( + "extract_last", + "--xl", + "--extract-last", + is_flag=True, + help="Extract last fenced code block", +) @click.option( "current_conversation", "-c", @@ -891,6 +911,7 @@ def logs_list( usage, response, extract, + extract_last, current_conversation, conversation_id, json_output, @@ -996,10 +1017,10 @@ def logs_list( for attachment in attachments_by_id.get(row["id"], []) ] output = json.dumps(list(rows), indent=2) - elif extract: + elif extract or extract_last: # Extract and return first code block for row in rows: - output = extract_first_fenced_code_block(row["response"]) + output = extract_fenced_code_block(row["response"], last=extract_last) if output is not None: break elif response: diff --git a/llm/templates.py b/llm/templates.py index b540fad..0cf1616 100644 --- a/llm/templates.py +++ b/llm/templates.py @@ -9,8 +9,9 @@ class Template(BaseModel): system: Optional[str] = None model: Optional[str] = None defaults: Optional[Dict[str, Any]] = None - # Should first fenced code block be extracted? + # Should a fenced code block be extracted? extract: Optional[bool] = None + extract_last: Optional[bool] = None class Config: extra = "forbid" diff --git a/llm/utils.py b/llm/utils.py index a4f57a0..a7170dd 100644 --- a/llm/utils.py +++ b/llm/utils.py @@ -156,9 +156,9 @@ def token_usage_string(input_tokens, output_tokens, token_details) -> str: return ", ".join(bits) -def extract_first_fenced_code_block(text: str) -> Optional[str]: +def extract_fenced_code_block(text: str, last: bool = False) -> Optional[str]: """ - Extracts and returns the first Markdown fenced code block found in the given text. + Extracts and returns Markdown fenced code block found in the given text. The function handles fenced code blocks that: - Use at least three backticks (`). @@ -169,9 +169,10 @@ def extract_first_fenced_code_block(text: str) -> Optional[str]: Args: text (str): The input text to search for a fenced code block. + last (bool): Extract the last code block if True, otherwise the first. Returns: - Optional[str]: The content of the first fenced code block, or None if not found. + Optional[str]: The content of the fenced code block, or None if not found. """ # Regex pattern to match fenced code blocks # - ^ or \n ensures that the fence is at the start of a line @@ -179,13 +180,15 @@ def extract_first_fenced_code_block(text: str) -> Optional[str]: # - (\w+)? optionally captures the language tag # - \n matches the newline after the opening fence # - (.*?) non-greedy match for the code block content - # - \1 ensures that the closing fence has the same number of backticks + # - (?P=fence) ensures that the closing fence has the same number of backticks + # - [ ]* allows for optional spaces between the closing fence and newline # - (?=\n|$) ensures that the closing fence is followed by a newline or end of string pattern = re.compile( - r"""(?m)^(?P`{3,})(?P\w+)?\n(?P.*?)^(?P=fence)(?=\n|$)""", + r"""(?m)^(?P`{3,})(?P\w+)?\n(?P.*?)^(?P=fence)[ ]*(?=\n|$)""", re.DOTALL, ) - match = pattern.search(text) - if match: + matches = list(pattern.finditer(text)) + if matches: + match = matches[-1] if last else matches[0] return match.group("code") return None diff --git a/tests/test_llm.py b/tests/test_llm.py index c122430..53ec90d 100644 --- a/tests/test_llm.py +++ b/tests/test_llm.py @@ -145,6 +145,25 @@ def test_logs_extract_first_code(args, log_path): assert result.output == 'print("hello word")\n\n' +@pytest.mark.parametrize( + "args", + ( + ["--xl"], + ["--extract-last"], + ["list", "--xl"], + ["list", "--extract-last"], + ["--xl", "-r"], + ["-x", "--xl"], + ), +) +def test_logs_extract_last_code(args, log_path): + "Test that logs --xl/--extract-last returns the last code block" + runner = CliRunner() + result = runner.invoke(cli, ["logs"] + args, catch_exceptions=False) + assert result.exit_code == 0 + assert result.output == 'print("hello word")\n\n' + + @pytest.mark.xfail(sys.platform == "win32", reason="Expected to fail on Windows") @pytest.mark.parametrize("env", ({}, {"LLM_USER_PATH": "/tmp/llm-user-path"})) def test_logs_path(monkeypatch, env, user_path): diff --git a/tests/test_utils.py b/tests/test_utils.py index 783e589..ef4b805 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,5 +1,5 @@ import pytest -from llm.utils import simplify_usage_dict, extract_first_fenced_code_block +from llm.utils import simplify_usage_dict, extract_fenced_code_block @pytest.mark.parametrize( @@ -43,40 +43,61 @@ def test_simplify_usage_dict(input_data, expected_output): @pytest.mark.parametrize( - "input,expected", + "input,last,expected", [ - ["This is a sample text without any code blocks.", None], + ["This is a sample text without any code blocks.", False, None], [ "Here is some text.\n\n```\ndef foo():\n return 'bar'\n```\n\nMore text.", + False, "def foo():\n return 'bar'\n", ], [ "Here is some text.\n\n```python\ndef foo():\n return 'bar'\n```\n\nMore text.", + False, "def foo():\n return 'bar'\n", ], [ "Here is some text.\n\n````\ndef foo():\n return 'bar'\n````\n\nMore text.", + False, "def foo():\n return 'bar'\n", ], [ "Here is some text.\n\n````javascript\nfunction foo() {\n return 'bar';\n}\n````\n\nMore text.", + False, "function foo() {\n return 'bar';\n}\n", ], [ "Here is some text.\n\n```python\ndef foo():\n return 'bar'\n````\n\nMore text.", + False, None, ], [ "First code block:\n\n```python\ndef foo():\n return 'bar'\n```\n\n" "Second code block:\n\n```javascript\nfunction foo() {\n return 'bar';\n}\n```", + False, "def foo():\n return 'bar'\n", ], + [ + "First code block:\n\n```python\ndef foo():\n return 'bar'\n```\n\n" + "Second code block:\n\n```javascript\nfunction foo() {\n return 'bar';\n}\n```", + True, + "function foo() {\n return 'bar';\n}\n", + ], + [ + "First code block:\n\n```python\ndef foo():\n return 'bar'\n```\n\n" + # This one has trailing whitespace after the second code block: + # https://github.com/simonw/llm/pull/718#issuecomment-2613177036 + "Second code block:\n\n```javascript\nfunction foo() {\n return 'bar';\n}\n``` ", + True, + "function foo() {\n return 'bar';\n}\n", + ], [ "Here is some text.\n\n```python\ndef foo():\n return `bar`\n```\n\nMore text.", + False, "def foo():\n return `bar`\n", ], ], ) -def test_extract_first_fenced_code_block(input, expected): - actual = extract_first_fenced_code_block(input) +def test_extract_fenced_code_block(input, last, expected): + actual = extract_fenced_code_block(input, last=last) assert actual == expected