feat(llm): enhance structured output prompts
Some checks failed
Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/amd64, build-api-amd64) (push) Has been cancelled
Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/arm64, build-api-arm64) (push) Has been cancelled
Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/amd64, build-web-amd64) (push) Has been cancelled
Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/arm64, build-web-arm64) (push) Has been cancelled
Build and Push API & Web / create-manifest (api, DIFY_API_IMAGE_NAME, merge-api-images) (push) Has been cancelled
Build and Push API & Web / create-manifest (web, DIFY_WEB_IMAGE_NAME, merge-web-images) (push) Has been cancelled

This commit is contained in:
Novice
2026-02-11 17:48:33 +08:00
parent 29d6d030f8
commit ccf27adaa3
2 changed files with 104 additions and 6 deletions

View File

@@ -9,7 +9,11 @@ from pydantic import BaseModel, TypeAdapter, ValidationError
from core.llm_generator.output_parser.errors import OutputParserError
from core.llm_generator.output_parser.file_ref import detect_file_path_fields
from core.llm_generator.prompts import STRUCTURED_OUTPUT_PROMPT, STRUCTURED_OUTPUT_TOOL_CALL_PROMPT
from core.llm_generator.prompts import (
STRUCTURED_OUTPUT_FINAL_TURN_REMINDER,
STRUCTURED_OUTPUT_PROMPT,
STRUCTURED_OUTPUT_TOOL_CALL_PROMPT,
)
from core.model_manager import ModelInstance
from core.model_runtime.callbacks.base_callback import Callback
from core.model_runtime.entities.llm_entities import (
@@ -20,6 +24,7 @@ from core.model_runtime.entities.message_entities import (
PromptMessage,
PromptMessageTool,
SystemPromptMessage,
UserPromptMessage,
)
from core.model_runtime.entities.model_entities import AIModelEntity, ModelFeature, ParameterRule
@@ -108,6 +113,21 @@ def invoke_llm_with_structured_output(
use_tool_call=use_tool_call,
)
# Append a "final turn" reminder at the very end of the conversation so the
# model sees it right before generating. This exploits recency bias to
# override the in-context bash/tool-call patterns from earlier history.
# Merge into the last user message when possible to avoid consecutive
# UserPromptMessages (some APIs like Anthropic require user/assistant alternation).
if use_tool_call:
messages = list(prompt_messages)
if messages and isinstance(messages[-1], UserPromptMessage) and isinstance(messages[-1].content, str):
messages[-1] = UserPromptMessage(
content=messages[-1].content + "\n\n" + STRUCTURED_OUTPUT_FINAL_TURN_REMINDER,
)
else:
messages.append(UserPromptMessage(content=STRUCTURED_OUTPUT_FINAL_TURN_REMINDER))
prompt_messages = messages
llm_result = model_instance.invoke_llm(
prompt_messages=list(prompt_messages),
model_parameters=model_parameters_with_json_schema,
@@ -441,6 +461,11 @@ def _prepare_schema_for_model(provider: str, model_schema: AIModelEntity, schema
# Convert boolean types to string types (common requirement)
convert_boolean_to_string(processed_schema)
# Strip Dify-internal custom formats (e.g. "file-path") that external model APIs
# do not recognise. The field type ("string") is sufficient for the model to
# produce the expected value; the custom format is only used by Dify post-processing.
_strip_custom_formats(processed_schema)
# Apply model-specific transformations
if SpecialModelType.GEMINI in model_schema.model:
remove_additional_properties(processed_schema)
@@ -448,7 +473,10 @@ def _prepare_schema_for_model(provider: str, model_schema: AIModelEntity, schema
elif SpecialModelType.OLLAMA in provider:
return processed_schema
else:
# Default format with name field
# OpenAI-style native structured output requires every property key to
# appear in ``required``. Ensure this recursively so user schemas that
# leave ``required`` empty or partial don't get rejected by the API.
_ensure_all_properties_required(processed_schema)
return {"schema": processed_schema, "name": "llm_response"}
@@ -496,3 +524,57 @@ def convert_boolean_to_string(schema: dict):
for item in value:
if isinstance(item, dict):
convert_boolean_to_string(item)
# Formats that are Dify-internal and not part of the standard JSON Schema spec
# recognised by model providers (OpenAI, Azure, Google, etc.).
_CUSTOM_FORMATS = frozenset({"file-path"})
def _strip_custom_formats(schema: dict) -> None:
"""Remove Dify-internal ``format`` values from a JSON schema in-place.
Model APIs (OpenAI, Azure, etc.) reject unknown format values in their
structured-output / response_format mode. This strips only the formats
that are Dify-specific (e.g. ``file-path``); standard formats like
``date-time`` or ``email`` are left untouched.
"""
if not isinstance(schema, dict):
return
fmt = schema.get("format")
if isinstance(fmt, str) and fmt.lower().replace("_", "-") in _CUSTOM_FORMATS:
del schema["format"]
for value in schema.values():
if isinstance(value, dict):
_strip_custom_formats(value)
elif isinstance(value, list):
for item in value:
if isinstance(item, dict):
_strip_custom_formats(item)
def _ensure_all_properties_required(schema: dict) -> None:
"""Ensure ``required`` lists every key from ``properties``, recursively.
OpenAI's native structured-output mode (response_format with json_schema)
mandates that ``required`` contains ALL property names. Schemas authored
in Dify may leave ``required`` empty or partial, so we patch it here
before sending to the API.
"""
if not isinstance(schema, dict):
return
if schema.get("type") == "object":
properties = schema.get("properties")
if isinstance(properties, dict) and properties:
schema["required"] = list(properties.keys())
for value in schema.values():
if isinstance(value, dict):
_ensure_all_properties_required(value)
elif isinstance(value, list):
for item in value:
if isinstance(item, dict):
_ensure_all_properties_required(item)

View File

@@ -323,12 +323,28 @@ Here is the JSON schema:
{{schema}}
""" # noqa: E501
STRUCTURED_OUTPUT_TOOL_CALL_PROMPT = """The ONLY tool available to you is `structured_output`. You MUST call this tool to provide your final answer.
Do NOT call any other tool. Tools such as `bash`, `python`, or any others that may appear in the conversation history are NOT available to you — they are part of historical context only.
Do NOT write JSON directly in your message. Instead, always invoke the `structured_output` tool with the appropriate arguments.
If you respond without calling `structured_output`, or if you call any other tool, your answer will be considered invalid.
STRUCTURED_OUTPUT_TOOL_CALL_PROMPT = """## MANDATORY INSTRUCTION — read before responding
You have EXACTLY ONE tool: `structured_output`. You MUST call it with the correct arguments to provide your final answer.
### Rules (violation = invalid response)
1. Call `structured_output` — this is the ONLY action you can take.
2. Do NOT output raw JSON text — always use the tool call.
3. Do NOT call any other tool (bash, python, code_interpreter, etc.) — they do NOT exist and will be rejected.
4. Do NOT ask clarifying questions or say you cannot answer — extract the best answer from the available context and call `structured_output`.
### About conversation history
The messages above may contain calls to tools like `bash`, `python`, `code_interpreter`, etc.
Those calls happened in PREVIOUS steps that have already finished. The results are shown for your reference.
You CANNOT execute those tools — they are no longer available. Read their outputs as context, then summarise your answer into `structured_output`.
""" # noqa: E501
STRUCTURED_OUTPUT_FINAL_TURN_REMINDER = (
"[SYSTEM] This is the FINAL turn. No further interaction is possible after this. "
"You must call `structured_output` NOW with your best answer based on the conversation above. "
"Do NOT call bash, python, or any other tool. Do NOT ask questions. Just call `structured_output`."
)
LLM_MODIFY_PROMPT_SYSTEM = """
Both your input and output should be in JSON format.