feat(llm): enhance structured output prompts

2026-02-24 18:05:11 +00:00 · 2026-02-11 17:48:33 +08:00
parent 29d6d030f8
commit ccf27adaa3
2 changed files with 104 additions and 6 deletions
--- a/api/core/llm_generator/output_parser/structured_output.py
+++ b/api/core/llm_generator/output_parser/structured_output.py
@@ -9,7 +9,11 @@ from pydantic import BaseModel, TypeAdapter, ValidationError

 from core.llm_generator.output_parser.errors import OutputParserError
 from core.llm_generator.output_parser.file_ref import detect_file_path_fields
-from core.llm_generator.prompts import STRUCTURED_OUTPUT_PROMPT, STRUCTURED_OUTPUT_TOOL_CALL_PROMPT
+from core.llm_generator.prompts import (
+    STRUCTURED_OUTPUT_FINAL_TURN_REMINDER,
+    STRUCTURED_OUTPUT_PROMPT,
+    STRUCTURED_OUTPUT_TOOL_CALL_PROMPT,
+)
 from core.model_manager import ModelInstance
 from core.model_runtime.callbacks.base_callback import Callback
 from core.model_runtime.entities.llm_entities import (
@@ -20,6 +24,7 @@ from core.model_runtime.entities.message_entities import (
    PromptMessage,
    PromptMessageTool,
    SystemPromptMessage,
+    UserPromptMessage,
 )
 from core.model_runtime.entities.model_entities import AIModelEntity, ModelFeature, ParameterRule

@@ -108,6 +113,21 @@ def invoke_llm_with_structured_output(
        use_tool_call=use_tool_call,
    )

+    # Append a "final turn" reminder at the very end of the conversation so the
+    # model sees it right before generating.  This exploits recency bias to
+    # override the in-context bash/tool-call patterns from earlier history.
+    # Merge into the last user message when possible to avoid consecutive
+    # UserPromptMessages (some APIs like Anthropic require user/assistant alternation).
+    if use_tool_call:
+        messages = list(prompt_messages)
+        if messages and isinstance(messages[-1], UserPromptMessage) and isinstance(messages[-1].content, str):
+            messages[-1] = UserPromptMessage(
+                content=messages[-1].content + "\n\n" + STRUCTURED_OUTPUT_FINAL_TURN_REMINDER,
+            )
+        else:
+            messages.append(UserPromptMessage(content=STRUCTURED_OUTPUT_FINAL_TURN_REMINDER))
+        prompt_messages = messages
+
    llm_result = model_instance.invoke_llm(
        prompt_messages=list(prompt_messages),
        model_parameters=model_parameters_with_json_schema,
@@ -441,6 +461,11 @@ def _prepare_schema_for_model(provider: str, model_schema: AIModelEntity, schema
    # Convert boolean types to string types (common requirement)
    convert_boolean_to_string(processed_schema)

+    # Strip Dify-internal custom formats (e.g. "file-path") that external model APIs
+    # do not recognise.  The field type ("string") is sufficient for the model to
+    # produce the expected value; the custom format is only used by Dify post-processing.
+    _strip_custom_formats(processed_schema)
+
    # Apply model-specific transformations
    if SpecialModelType.GEMINI in model_schema.model:
        remove_additional_properties(processed_schema)
@@ -448,7 +473,10 @@ def _prepare_schema_for_model(provider: str, model_schema: AIModelEntity, schema
    elif SpecialModelType.OLLAMA in provider:
        return processed_schema
    else:
-        # Default format with name field
+        # OpenAI-style native structured output requires every property key to
+        # appear in ``required``.  Ensure this recursively so user schemas that
+        # leave ``required`` empty or partial don't get rejected by the API.
+        _ensure_all_properties_required(processed_schema)
        return {"schema": processed_schema, "name": "llm_response"}


@@ -496,3 +524,57 @@ def convert_boolean_to_string(schema: dict):
            for item in value:
                if isinstance(item, dict):
                    convert_boolean_to_string(item)
+
+
+# Formats that are Dify-internal and not part of the standard JSON Schema spec
+# recognised by model providers (OpenAI, Azure, Google, etc.).
+_CUSTOM_FORMATS = frozenset({"file-path"})
+
+
+def _strip_custom_formats(schema: dict) -> None:
+    """Remove Dify-internal ``format`` values from a JSON schema in-place.
+
+    Model APIs (OpenAI, Azure, etc.) reject unknown format values in their
+    structured-output / response_format mode.  This strips only the formats
+    that are Dify-specific (e.g. ``file-path``); standard formats like
+    ``date-time`` or ``email`` are left untouched.
+    """
+    if not isinstance(schema, dict):
+        return
+
+    fmt = schema.get("format")
+    if isinstance(fmt, str) and fmt.lower().replace("_", "-") in _CUSTOM_FORMATS:
+        del schema["format"]
+
+    for value in schema.values():
+        if isinstance(value, dict):
+            _strip_custom_formats(value)
+        elif isinstance(value, list):
+            for item in value:
+                if isinstance(item, dict):
+                    _strip_custom_formats(item)
+
+
+def _ensure_all_properties_required(schema: dict) -> None:
+    """Ensure ``required`` lists every key from ``properties``, recursively.
+
+    OpenAI's native structured-output mode (response_format with json_schema)
+    mandates that ``required`` contains ALL property names.  Schemas authored
+    in Dify may leave ``required`` empty or partial, so we patch it here
+    before sending to the API.
+    """
+    if not isinstance(schema, dict):
+        return
+
+    if schema.get("type") == "object":
+        properties = schema.get("properties")
+        if isinstance(properties, dict) and properties:
+            schema["required"] = list(properties.keys())
+
+    for value in schema.values():
+        if isinstance(value, dict):
+            _ensure_all_properties_required(value)
+        elif isinstance(value, list):
+            for item in value:
+                if isinstance(item, dict):
+                    _ensure_all_properties_required(item)
--- a/api/core/llm_generator/prompts.py
+++ b/api/core/llm_generator/prompts.py
@@ -323,12 +323,28 @@ Here is the JSON schema:
 {{schema}}
 """  # noqa: E501

-STRUCTURED_OUTPUT_TOOL_CALL_PROMPT = """The ONLY tool available to you is `structured_output`. You MUST call this tool to provide your final answer.
-Do NOT call any other tool. Tools such as `bash`, `python`, or any others that may appear in the conversation history are NOT available to you — they are part of historical context only.
-Do NOT write JSON directly in your message. Instead, always invoke the `structured_output` tool with the appropriate arguments.
-If you respond without calling `structured_output`, or if you call any other tool, your answer will be considered invalid.
+STRUCTURED_OUTPUT_TOOL_CALL_PROMPT = """## MANDATORY INSTRUCTION — read before responding
+
+You have EXACTLY ONE tool: `structured_output`.  You MUST call it with the correct arguments to provide your final answer.
+
+### Rules (violation = invalid response)
+1. Call `structured_output` — this is the ONLY action you can take.
+2. Do NOT output raw JSON text — always use the tool call.
+3. Do NOT call any other tool (bash, python, code_interpreter, etc.) — they do NOT exist and will be rejected.
+4. Do NOT ask clarifying questions or say you cannot answer — extract the best answer from the available context and call `structured_output`.
+
+### About conversation history
+The messages above may contain calls to tools like `bash`, `python`, `code_interpreter`, etc.
+Those calls happened in PREVIOUS steps that have already finished. The results are shown for your reference.
+You CANNOT execute those tools — they are no longer available. Read their outputs as context, then summarise your answer into `structured_output`.
 """  # noqa: E501

+STRUCTURED_OUTPUT_FINAL_TURN_REMINDER = (
+    "[SYSTEM] This is the FINAL turn. No further interaction is possible after this. "
+    "You must call `structured_output` NOW with your best answer based on the conversation above. "
+    "Do NOT call bash, python, or any other tool. Do NOT ask questions. Just call `structured_output`."
+)
+
 LLM_MODIFY_PROMPT_SYSTEM = """
 Both your input and output should be in JSON format.