diff --git a/api/core/llm_generator/output_parser/structured_output.py b/api/core/llm_generator/output_parser/structured_output.py index 3843378f1d..a483775823 100644 --- a/api/core/llm_generator/output_parser/structured_output.py +++ b/api/core/llm_generator/output_parser/structured_output.py @@ -9,7 +9,11 @@ from pydantic import BaseModel, TypeAdapter, ValidationError from core.llm_generator.output_parser.errors import OutputParserError from core.llm_generator.output_parser.file_ref import detect_file_path_fields -from core.llm_generator.prompts import STRUCTURED_OUTPUT_PROMPT, STRUCTURED_OUTPUT_TOOL_CALL_PROMPT +from core.llm_generator.prompts import ( + STRUCTURED_OUTPUT_FINAL_TURN_REMINDER, + STRUCTURED_OUTPUT_PROMPT, + STRUCTURED_OUTPUT_TOOL_CALL_PROMPT, +) from core.model_manager import ModelInstance from core.model_runtime.callbacks.base_callback import Callback from core.model_runtime.entities.llm_entities import ( @@ -20,6 +24,7 @@ from core.model_runtime.entities.message_entities import ( PromptMessage, PromptMessageTool, SystemPromptMessage, + UserPromptMessage, ) from core.model_runtime.entities.model_entities import AIModelEntity, ModelFeature, ParameterRule @@ -108,6 +113,21 @@ def invoke_llm_with_structured_output( use_tool_call=use_tool_call, ) + # Append a "final turn" reminder at the very end of the conversation so the + # model sees it right before generating. This exploits recency bias to + # override the in-context bash/tool-call patterns from earlier history. + # Merge into the last user message when possible to avoid consecutive + # UserPromptMessages (some APIs like Anthropic require user/assistant alternation). + if use_tool_call: + messages = list(prompt_messages) + if messages and isinstance(messages[-1], UserPromptMessage) and isinstance(messages[-1].content, str): + messages[-1] = UserPromptMessage( + content=messages[-1].content + "\n\n" + STRUCTURED_OUTPUT_FINAL_TURN_REMINDER, + ) + else: + messages.append(UserPromptMessage(content=STRUCTURED_OUTPUT_FINAL_TURN_REMINDER)) + prompt_messages = messages + llm_result = model_instance.invoke_llm( prompt_messages=list(prompt_messages), model_parameters=model_parameters_with_json_schema, @@ -441,6 +461,11 @@ def _prepare_schema_for_model(provider: str, model_schema: AIModelEntity, schema # Convert boolean types to string types (common requirement) convert_boolean_to_string(processed_schema) + # Strip Dify-internal custom formats (e.g. "file-path") that external model APIs + # do not recognise. The field type ("string") is sufficient for the model to + # produce the expected value; the custom format is only used by Dify post-processing. + _strip_custom_formats(processed_schema) + # Apply model-specific transformations if SpecialModelType.GEMINI in model_schema.model: remove_additional_properties(processed_schema) @@ -448,7 +473,10 @@ def _prepare_schema_for_model(provider: str, model_schema: AIModelEntity, schema elif SpecialModelType.OLLAMA in provider: return processed_schema else: - # Default format with name field + # OpenAI-style native structured output requires every property key to + # appear in ``required``. Ensure this recursively so user schemas that + # leave ``required`` empty or partial don't get rejected by the API. + _ensure_all_properties_required(processed_schema) return {"schema": processed_schema, "name": "llm_response"} @@ -496,3 +524,57 @@ def convert_boolean_to_string(schema: dict): for item in value: if isinstance(item, dict): convert_boolean_to_string(item) + + +# Formats that are Dify-internal and not part of the standard JSON Schema spec +# recognised by model providers (OpenAI, Azure, Google, etc.). +_CUSTOM_FORMATS = frozenset({"file-path"}) + + +def _strip_custom_formats(schema: dict) -> None: + """Remove Dify-internal ``format`` values from a JSON schema in-place. + + Model APIs (OpenAI, Azure, etc.) reject unknown format values in their + structured-output / response_format mode. This strips only the formats + that are Dify-specific (e.g. ``file-path``); standard formats like + ``date-time`` or ``email`` are left untouched. + """ + if not isinstance(schema, dict): + return + + fmt = schema.get("format") + if isinstance(fmt, str) and fmt.lower().replace("_", "-") in _CUSTOM_FORMATS: + del schema["format"] + + for value in schema.values(): + if isinstance(value, dict): + _strip_custom_formats(value) + elif isinstance(value, list): + for item in value: + if isinstance(item, dict): + _strip_custom_formats(item) + + +def _ensure_all_properties_required(schema: dict) -> None: + """Ensure ``required`` lists every key from ``properties``, recursively. + + OpenAI's native structured-output mode (response_format with json_schema) + mandates that ``required`` contains ALL property names. Schemas authored + in Dify may leave ``required`` empty or partial, so we patch it here + before sending to the API. + """ + if not isinstance(schema, dict): + return + + if schema.get("type") == "object": + properties = schema.get("properties") + if isinstance(properties, dict) and properties: + schema["required"] = list(properties.keys()) + + for value in schema.values(): + if isinstance(value, dict): + _ensure_all_properties_required(value) + elif isinstance(value, list): + for item in value: + if isinstance(item, dict): + _ensure_all_properties_required(item) diff --git a/api/core/llm_generator/prompts.py b/api/core/llm_generator/prompts.py index 221367e421..72d6d8bc60 100644 --- a/api/core/llm_generator/prompts.py +++ b/api/core/llm_generator/prompts.py @@ -323,12 +323,28 @@ Here is the JSON schema: {{schema}} """ # noqa: E501 -STRUCTURED_OUTPUT_TOOL_CALL_PROMPT = """The ONLY tool available to you is `structured_output`. You MUST call this tool to provide your final answer. -Do NOT call any other tool. Tools such as `bash`, `python`, or any others that may appear in the conversation history are NOT available to you — they are part of historical context only. -Do NOT write JSON directly in your message. Instead, always invoke the `structured_output` tool with the appropriate arguments. -If you respond without calling `structured_output`, or if you call any other tool, your answer will be considered invalid. +STRUCTURED_OUTPUT_TOOL_CALL_PROMPT = """## MANDATORY INSTRUCTION — read before responding + +You have EXACTLY ONE tool: `structured_output`. You MUST call it with the correct arguments to provide your final answer. + +### Rules (violation = invalid response) +1. Call `structured_output` — this is the ONLY action you can take. +2. Do NOT output raw JSON text — always use the tool call. +3. Do NOT call any other tool (bash, python, code_interpreter, etc.) — they do NOT exist and will be rejected. +4. Do NOT ask clarifying questions or say you cannot answer — extract the best answer from the available context and call `structured_output`. + +### About conversation history +The messages above may contain calls to tools like `bash`, `python`, `code_interpreter`, etc. +Those calls happened in PREVIOUS steps that have already finished. The results are shown for your reference. +You CANNOT execute those tools — they are no longer available. Read their outputs as context, then summarise your answer into `structured_output`. """ # noqa: E501 +STRUCTURED_OUTPUT_FINAL_TURN_REMINDER = ( + "[SYSTEM] This is the FINAL turn. No further interaction is possible after this. " + "You must call `structured_output` NOW with your best answer based on the conversation above. " + "Do NOT call bash, python, or any other tool. Do NOT ask questions. Just call `structured_output`." +) + LLM_MODIFY_PROMPT_SYSTEM = """ Both your input and output should be in JSON format.