From c2ea3d5b440495bbc3402619f28f8060c327be24 Mon Sep 17 00:00:00 2001
From: FFXN <lizy@dify.ai>
Date: Sun, 1 Mar 2026 11:37:33 +0800
Subject: [PATCH] fix: Some models only support streaming output (e.g. Qwen3
 open-source edition), so all chunks must be consumed and concatenated into a
 single ``LLMResult`` for compatibility.

---
 .../model_providers/__base/large_language_model.py             | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/api/core/model_runtime/model_providers/__base/large_language_model.py b/api/core/model_runtime/model_providers/__base/large_language_model.py
index 170cb48425..a37274842b 100644
--- a/api/core/model_runtime/model_providers/__base/large_language_model.py
+++ b/api/core/model_runtime/model_providers/__base/large_language_model.py
@@ -91,7 +91,8 @@ def _build_llm_result_from_chunks(
     """
     Build a single `LLMResult` by accumulating all returned chunks.
 
-    Some models only support streaming output (e.g. Qwen3 open-source edition),
+    Some models only support streaming output (e.g. Qwen3 open-source edition)
+    and the plugin side may still implement the response via a chunked stream,
     so all chunks must be consumed and concatenated into a single ``LLMResult``.
 
     The ``usage`` is taken from the last chunk that carries it, which is the