mirror of
https://github.com/langgenius/dify.git
synced 2026-02-24 18:05:11 +00:00
feat(llm): enhance structured output prompts
Some checks failed
Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/amd64, build-api-amd64) (push) Has been cancelled
Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/arm64, build-api-arm64) (push) Has been cancelled
Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/amd64, build-web-amd64) (push) Has been cancelled
Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/arm64, build-web-arm64) (push) Has been cancelled
Build and Push API & Web / create-manifest (api, DIFY_API_IMAGE_NAME, merge-api-images) (push) Has been cancelled
Build and Push API & Web / create-manifest (web, DIFY_WEB_IMAGE_NAME, merge-web-images) (push) Has been cancelled
Some checks failed
Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/amd64, build-api-amd64) (push) Has been cancelled
Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/arm64, build-api-arm64) (push) Has been cancelled
Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/amd64, build-web-amd64) (push) Has been cancelled
Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/arm64, build-web-arm64) (push) Has been cancelled
Build and Push API & Web / create-manifest (api, DIFY_API_IMAGE_NAME, merge-api-images) (push) Has been cancelled
Build and Push API & Web / create-manifest (web, DIFY_WEB_IMAGE_NAME, merge-web-images) (push) Has been cancelled
This commit is contained in:
@@ -9,7 +9,11 @@ from pydantic import BaseModel, TypeAdapter, ValidationError
|
||||
|
||||
from core.llm_generator.output_parser.errors import OutputParserError
|
||||
from core.llm_generator.output_parser.file_ref import detect_file_path_fields
|
||||
from core.llm_generator.prompts import STRUCTURED_OUTPUT_PROMPT, STRUCTURED_OUTPUT_TOOL_CALL_PROMPT
|
||||
from core.llm_generator.prompts import (
|
||||
STRUCTURED_OUTPUT_FINAL_TURN_REMINDER,
|
||||
STRUCTURED_OUTPUT_PROMPT,
|
||||
STRUCTURED_OUTPUT_TOOL_CALL_PROMPT,
|
||||
)
|
||||
from core.model_manager import ModelInstance
|
||||
from core.model_runtime.callbacks.base_callback import Callback
|
||||
from core.model_runtime.entities.llm_entities import (
|
||||
@@ -20,6 +24,7 @@ from core.model_runtime.entities.message_entities import (
|
||||
PromptMessage,
|
||||
PromptMessageTool,
|
||||
SystemPromptMessage,
|
||||
UserPromptMessage,
|
||||
)
|
||||
from core.model_runtime.entities.model_entities import AIModelEntity, ModelFeature, ParameterRule
|
||||
|
||||
@@ -108,6 +113,21 @@ def invoke_llm_with_structured_output(
|
||||
use_tool_call=use_tool_call,
|
||||
)
|
||||
|
||||
# Append a "final turn" reminder at the very end of the conversation so the
|
||||
# model sees it right before generating. This exploits recency bias to
|
||||
# override the in-context bash/tool-call patterns from earlier history.
|
||||
# Merge into the last user message when possible to avoid consecutive
|
||||
# UserPromptMessages (some APIs like Anthropic require user/assistant alternation).
|
||||
if use_tool_call:
|
||||
messages = list(prompt_messages)
|
||||
if messages and isinstance(messages[-1], UserPromptMessage) and isinstance(messages[-1].content, str):
|
||||
messages[-1] = UserPromptMessage(
|
||||
content=messages[-1].content + "\n\n" + STRUCTURED_OUTPUT_FINAL_TURN_REMINDER,
|
||||
)
|
||||
else:
|
||||
messages.append(UserPromptMessage(content=STRUCTURED_OUTPUT_FINAL_TURN_REMINDER))
|
||||
prompt_messages = messages
|
||||
|
||||
llm_result = model_instance.invoke_llm(
|
||||
prompt_messages=list(prompt_messages),
|
||||
model_parameters=model_parameters_with_json_schema,
|
||||
@@ -441,6 +461,11 @@ def _prepare_schema_for_model(provider: str, model_schema: AIModelEntity, schema
|
||||
# Convert boolean types to string types (common requirement)
|
||||
convert_boolean_to_string(processed_schema)
|
||||
|
||||
# Strip Dify-internal custom formats (e.g. "file-path") that external model APIs
|
||||
# do not recognise. The field type ("string") is sufficient for the model to
|
||||
# produce the expected value; the custom format is only used by Dify post-processing.
|
||||
_strip_custom_formats(processed_schema)
|
||||
|
||||
# Apply model-specific transformations
|
||||
if SpecialModelType.GEMINI in model_schema.model:
|
||||
remove_additional_properties(processed_schema)
|
||||
@@ -448,7 +473,10 @@ def _prepare_schema_for_model(provider: str, model_schema: AIModelEntity, schema
|
||||
elif SpecialModelType.OLLAMA in provider:
|
||||
return processed_schema
|
||||
else:
|
||||
# Default format with name field
|
||||
# OpenAI-style native structured output requires every property key to
|
||||
# appear in ``required``. Ensure this recursively so user schemas that
|
||||
# leave ``required`` empty or partial don't get rejected by the API.
|
||||
_ensure_all_properties_required(processed_schema)
|
||||
return {"schema": processed_schema, "name": "llm_response"}
|
||||
|
||||
|
||||
@@ -496,3 +524,57 @@ def convert_boolean_to_string(schema: dict):
|
||||
for item in value:
|
||||
if isinstance(item, dict):
|
||||
convert_boolean_to_string(item)
|
||||
|
||||
|
||||
# Formats that are Dify-internal and not part of the standard JSON Schema spec
|
||||
# recognised by model providers (OpenAI, Azure, Google, etc.).
|
||||
_CUSTOM_FORMATS = frozenset({"file-path"})
|
||||
|
||||
|
||||
def _strip_custom_formats(schema: dict) -> None:
|
||||
"""Remove Dify-internal ``format`` values from a JSON schema in-place.
|
||||
|
||||
Model APIs (OpenAI, Azure, etc.) reject unknown format values in their
|
||||
structured-output / response_format mode. This strips only the formats
|
||||
that are Dify-specific (e.g. ``file-path``); standard formats like
|
||||
``date-time`` or ``email`` are left untouched.
|
||||
"""
|
||||
if not isinstance(schema, dict):
|
||||
return
|
||||
|
||||
fmt = schema.get("format")
|
||||
if isinstance(fmt, str) and fmt.lower().replace("_", "-") in _CUSTOM_FORMATS:
|
||||
del schema["format"]
|
||||
|
||||
for value in schema.values():
|
||||
if isinstance(value, dict):
|
||||
_strip_custom_formats(value)
|
||||
elif isinstance(value, list):
|
||||
for item in value:
|
||||
if isinstance(item, dict):
|
||||
_strip_custom_formats(item)
|
||||
|
||||
|
||||
def _ensure_all_properties_required(schema: dict) -> None:
|
||||
"""Ensure ``required`` lists every key from ``properties``, recursively.
|
||||
|
||||
OpenAI's native structured-output mode (response_format with json_schema)
|
||||
mandates that ``required`` contains ALL property names. Schemas authored
|
||||
in Dify may leave ``required`` empty or partial, so we patch it here
|
||||
before sending to the API.
|
||||
"""
|
||||
if not isinstance(schema, dict):
|
||||
return
|
||||
|
||||
if schema.get("type") == "object":
|
||||
properties = schema.get("properties")
|
||||
if isinstance(properties, dict) and properties:
|
||||
schema["required"] = list(properties.keys())
|
||||
|
||||
for value in schema.values():
|
||||
if isinstance(value, dict):
|
||||
_ensure_all_properties_required(value)
|
||||
elif isinstance(value, list):
|
||||
for item in value:
|
||||
if isinstance(item, dict):
|
||||
_ensure_all_properties_required(item)
|
||||
|
||||
@@ -323,12 +323,28 @@ Here is the JSON schema:
|
||||
{{schema}}
|
||||
""" # noqa: E501
|
||||
|
||||
STRUCTURED_OUTPUT_TOOL_CALL_PROMPT = """The ONLY tool available to you is `structured_output`. You MUST call this tool to provide your final answer.
|
||||
Do NOT call any other tool. Tools such as `bash`, `python`, or any others that may appear in the conversation history are NOT available to you — they are part of historical context only.
|
||||
Do NOT write JSON directly in your message. Instead, always invoke the `structured_output` tool with the appropriate arguments.
|
||||
If you respond without calling `structured_output`, or if you call any other tool, your answer will be considered invalid.
|
||||
STRUCTURED_OUTPUT_TOOL_CALL_PROMPT = """## MANDATORY INSTRUCTION — read before responding
|
||||
|
||||
You have EXACTLY ONE tool: `structured_output`. You MUST call it with the correct arguments to provide your final answer.
|
||||
|
||||
### Rules (violation = invalid response)
|
||||
1. Call `structured_output` — this is the ONLY action you can take.
|
||||
2. Do NOT output raw JSON text — always use the tool call.
|
||||
3. Do NOT call any other tool (bash, python, code_interpreter, etc.) — they do NOT exist and will be rejected.
|
||||
4. Do NOT ask clarifying questions or say you cannot answer — extract the best answer from the available context and call `structured_output`.
|
||||
|
||||
### About conversation history
|
||||
The messages above may contain calls to tools like `bash`, `python`, `code_interpreter`, etc.
|
||||
Those calls happened in PREVIOUS steps that have already finished. The results are shown for your reference.
|
||||
You CANNOT execute those tools — they are no longer available. Read their outputs as context, then summarise your answer into `structured_output`.
|
||||
""" # noqa: E501
|
||||
|
||||
STRUCTURED_OUTPUT_FINAL_TURN_REMINDER = (
|
||||
"[SYSTEM] This is the FINAL turn. No further interaction is possible after this. "
|
||||
"You must call `structured_output` NOW with your best answer based on the conversation above. "
|
||||
"Do NOT call bash, python, or any other tool. Do NOT ask questions. Just call `structured_output`."
|
||||
)
|
||||
|
||||
LLM_MODIFY_PROMPT_SYSTEM = """
|
||||
Both your input and output should be in JSON format.
|
||||
|
||||
|
||||
Reference in New Issue
Block a user