From c2ea3d5b440495bbc3402619f28f8060c327be24 Mon Sep 17 00:00:00 2001 From: FFXN Date: Sun, 1 Mar 2026 11:37:33 +0800 Subject: [PATCH] fix: Some models only support streaming output (e.g. Qwen3 open-source edition), so all chunks must be consumed and concatenated into a single ``LLMResult`` for compatibility. --- .../model_providers/__base/large_language_model.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/api/core/model_runtime/model_providers/__base/large_language_model.py b/api/core/model_runtime/model_providers/__base/large_language_model.py index 170cb48425..a37274842b 100644 --- a/api/core/model_runtime/model_providers/__base/large_language_model.py +++ b/api/core/model_runtime/model_providers/__base/large_language_model.py @@ -91,7 +91,8 @@ def _build_llm_result_from_chunks( """ Build a single `LLMResult` by accumulating all returned chunks. - Some models only support streaming output (e.g. Qwen3 open-source edition), + Some models only support streaming output (e.g. Qwen3 open-source edition) + and the plugin side may still implement the response via a chunked stream, so all chunks must be consumed and concatenated into a single ``LLMResult``. The ``usage`` is taken from the last chunk that carries it, which is the