chore: bump version to 0.15.0 (#12297 )

Signed-off-by: -LAN- <laipz8200@outlook.com>
refactor: enhance API token validation with session locking and last used timestamp update (#12426 )
2026-01-24 08:14:21 +00:00 · 2025-01-07 18:05:14 +08:00 · 2025-01-07 18:04:41 +08:00 · 2025-01-07 17:26:24 +08:00 · 2025-01-07 17:25:38 +08:00 · 2025-01-07 15:27:34 +08:00
27 changed files with 471 additions and 82 deletions
--- a/api/configs/packaging/init.py
+++ b/api/configs/packaging/init.py
@@ -9,7 +9,7 @@ class PackagingInfo(BaseSettings):

    CURRENT_VERSION: str = Field(
        description="Dify version",
-        default="0.14.2",
+        default="0.15.0",
    )

    COMMIT_SHA: str = Field(
--- a/api/controllers/service_api/wraps.py
+++ b/api/controllers/service_api/wraps.py
@@ -1,5 +1,5 @@
 from collections.abc import Callable
-from datetime import UTC, datetime
+from datetime import UTC, datetime, timedelta
 from enum import Enum
 from functools import wraps
 from typing import Optional
@@ -8,6 +8,8 @@ from flask import current_app, request
 from flask_login import user_logged_in  # type: ignore
 from flask_restful import Resource  # type: ignore
 from pydantic import BaseModel
+from sqlalchemy import select, update
+from sqlalchemy.orm import Session
 from werkzeug.exceptions import Forbidden, Unauthorized

 from extensions.ext_database import db
@@ -174,7 +176,7 @@ def validate_dataset_token(view=None):
    return decorator


-def validate_and_get_api_token(scope=None):
+def validate_and_get_api_token(scope: str | None = None):
    """
    Validate and get API token.
    """
@@ -188,20 +190,25 @@ def validate_and_get_api_token(scope=None):
    if auth_scheme != "bearer":
        raise Unauthorized("Authorization scheme must be 'Bearer'")

-    api_token = (
-        db.session.query(ApiToken)
-        .filter(
-            ApiToken.token == auth_token,
-            ApiToken.type == scope,
+    current_time = datetime.now(UTC).replace(tzinfo=None)
+    cutoff_time = current_time - timedelta(minutes=1)
+    with Session(db.engine, expire_on_commit=False) as session:
+        update_stmt = (
+            update(ApiToken)
+            .where(ApiToken.token == auth_token, ApiToken.last_used_at < cutoff_time, ApiToken.type == scope)
+            .values(last_used_at=current_time)
+            .returning(ApiToken)
        )
-        .first()
-    )
+        result = session.execute(update_stmt)
+        api_token = result.scalar_one_or_none()

-    if not api_token:
-        raise Unauthorized("Access token is invalid")
-
-    api_token.last_used_at = datetime.now(UTC).replace(tzinfo=None)
-    db.session.commit()
+        if not api_token:
+            stmt = select(ApiToken).where(ApiToken.token == auth_token, ApiToken.type == scope)
+            api_token = session.scalar(stmt)
+            if not api_token:
+                raise Unauthorized("Access token is invalid")
+        else:
+            session.commit()

    return api_token

--- a/api/core/model_runtime/model_providers/__base/tokenizers/gpt2_tokenzier.py
+++ b/api/core/model_runtime/model_providers/__base/tokenizers/gpt2_tokenzier.py
@@ -1,13 +1,10 @@
-from concurrent.futures import ProcessPoolExecutor
-from os.path import abspath, dirname, join
 from threading import Lock
-from typing import Any, cast
+from typing import Any

-from transformers import GPT2Tokenizer as TransformerGPT2Tokenizer  # type: ignore
+import tiktoken

 _tokenizer: Any = None
 _lock = Lock()
-_executor = ProcessPoolExecutor(max_workers=1)


 class GPT2Tokenizer:
@@ -17,22 +14,28 @@ class GPT2Tokenizer:
        use gpt2 tokenizer to get num tokens
        """
        _tokenizer = GPT2Tokenizer.get_encoder()
-        tokens = _tokenizer.encode(text, verbose=False)
+        tokens = _tokenizer.encode(text)
        return len(tokens)

    @staticmethod
    def get_num_tokens(text: str) -> int:
-        future = _executor.submit(GPT2Tokenizer._get_num_tokens_by_gpt2, text)
-        result = future.result()
-        return cast(int, result)
+        # Because this process needs more cpu resource, we turn this back before we find a better way to handle it.
+        #
+        # future = _executor.submit(GPT2Tokenizer._get_num_tokens_by_gpt2, text)
+        # result = future.result()
+        # return cast(int, result)
+        return GPT2Tokenizer._get_num_tokens_by_gpt2(text)

    @staticmethod
    def get_encoder() -> Any:
        global _tokenizer, _lock
        with _lock:
            if _tokenizer is None:
-                base_path = abspath(__file__)
-                gpt2_tokenizer_path = join(dirname(base_path), "gpt2")
-                _tokenizer = TransformerGPT2Tokenizer.from_pretrained(gpt2_tokenizer_path)
+                # Try to use tiktoken to get the tokenizer because it is faster
+                #
+                _tokenizer = tiktoken.get_encoding("gpt2")
+                # base_path = abspath(__file__)
+                # gpt2_tokenizer_path = join(dirname(base_path), "gpt2")
+                # _tokenizer = TransformerGPT2Tokenizer.from_pretrained(gpt2_tokenizer_path)

            return _tokenizer
--- a/api/core/model_runtime/model_providers/gpustack/gpustack.yaml
+++ b/api/core/model_runtime/model_providers/gpustack/gpustack.yaml
@@ -9,6 +9,8 @@ supported_model_types:
  - llm
  - text-embedding
  - rerank
+  - speech2text
+  - tts
 configurate_methods:
  - customizable-model
 model_credential_schema:
@@ -118,3 +120,19 @@ model_credential_schema:
          label:
            en_US: Not Support
            zh_Hans: 不支持
+    - variable: voices
+      show_on:
+        - variable: __model_type
+          value: tts
+      label:
+        en_US: Available Voices (comma-separated)
+        zh_Hans: 可用声音（用英文逗号分隔）
+      type: text-input
+      required: false
+      default: "Chinese Female"
+      placeholder:
+        en_US: "Chinese Female, Chinese Male, Japanese Male, Cantonese Female, English Female, English Male, Korean Female"
+        zh_Hans: "Chinese Female, Chinese Male, Japanese Male, Cantonese Female, English Female, English Male, Korean Female"
+      help:
+        en_US: "List voice names separated by commas. First voice will be used as default."
+        zh_Hans: "用英文逗号分隔的声音列表。第一个声音将作为默认值。"
--- a/api/core/model_runtime/model_providers/gpustack/llm/llm.py
+++ b/api/core/model_runtime/model_providers/gpustack/llm/llm.py
@@ -1,7 +1,5 @@
 from collections.abc import Generator

-from yarl import URL
-
 from core.model_runtime.entities.llm_entities import LLMResult
 from core.model_runtime.entities.message_entities import (
    PromptMessage,
@@ -24,9 +22,10 @@ class GPUStackLanguageModel(OAIAPICompatLargeLanguageModel):
        stream: bool = True,
        user: str | None = None,
    ) -> LLMResult | Generator:
+        compatible_credentials = self._get_compatible_credentials(credentials)
        return super()._invoke(
            model,
-            credentials,
+            compatible_credentials,
            prompt_messages,
            model_parameters,
            tools,
@@ -36,10 +35,15 @@ class GPUStackLanguageModel(OAIAPICompatLargeLanguageModel):
        )

    def validate_credentials(self, model: str, credentials: dict) -> None:
-        self._add_custom_parameters(credentials)
-        super().validate_credentials(model, credentials)
+        compatible_credentials = self._get_compatible_credentials(credentials)
+        super().validate_credentials(model, compatible_credentials)
+
+    def _get_compatible_credentials(self, credentials: dict) -> dict:
+        credentials = credentials.copy()
+        base_url = credentials["endpoint_url"].rstrip("/").removesuffix("/v1-openai")
+        credentials["endpoint_url"] = f"{base_url}/v1-openai"
+        return credentials

    @staticmethod
    def _add_custom_parameters(credentials: dict) -> None:
-        credentials["endpoint_url"] = str(URL(credentials["endpoint_url"]) / "v1-openai")
        credentials["mode"] = "chat"
--- a/api/core/model_runtime/model_providers/gpustack/speech2text/init.py
+++ b/api/core/model_runtime/model_providers/gpustack/speech2text/init.py
--- a/api/core/model_runtime/model_providers/gpustack/speech2text/speech2text.py
+++ b/api/core/model_runtime/model_providers/gpustack/speech2text/speech2text.py
@@ -0,0 +1,43 @@
+from typing import IO, Optional
+
+from core.model_runtime.model_providers.openai_api_compatible.speech2text.speech2text import OAICompatSpeech2TextModel
+
+
+class GPUStackSpeech2TextModel(OAICompatSpeech2TextModel):
+    """
+    Model class for GPUStack Speech to text model.
+    """
+
+    def _invoke(self, model: str, credentials: dict, file: IO[bytes], user: Optional[str] = None) -> str:
+        """
+        Invoke speech2text model
+        :param model: model name
+        :param credentials: model credentials
+        :param file: audio file
+        :param user: unique user id
+        :return: text for given audio file
+        """
+        compatible_credentials = self._get_compatible_credentials(credentials)
+        return super()._invoke(model, compatible_credentials, file)
+
+    def validate_credentials(self, model: str, credentials: dict) -> None:
+        """
+        Validate model credentials
+
+        :param model: model name
+        :param credentials: model credentials
+        """
+        compatible_credentials = self._get_compatible_credentials(credentials)
+        super().validate_credentials(model, compatible_credentials)
+
+    def _get_compatible_credentials(self, credentials: dict) -> dict:
+        """
+        Get compatible credentials
+
+        :param credentials: model credentials
+        :return: compatible credentials
+        """
+        compatible_credentials = credentials.copy()
+        base_url = credentials["endpoint_url"].rstrip("/").removesuffix("/v1-openai")
+        compatible_credentials["endpoint_url"] = f"{base_url}/v1-openai"
+        return compatible_credentials
--- a/api/core/model_runtime/model_providers/gpustack/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/gpustack/text_embedding/text_embedding.py
@@ -1,7 +1,5 @@
 from typing import Optional

-from yarl import URL
-
 from core.entities.embedding_type import EmbeddingInputType
 from core.model_runtime.entities.text_embedding_entities import (
    TextEmbeddingResult,
@@ -24,12 +22,15 @@ class GPUStackTextEmbeddingModel(OAICompatEmbeddingModel):
        user: Optional[str] = None,
        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
-        return super()._invoke(model, credentials, texts, user, input_type)
+        compatible_credentials = self._get_compatible_credentials(credentials)
+        return super()._invoke(model, compatible_credentials, texts, user, input_type)

    def validate_credentials(self, model: str, credentials: dict) -> None:
-        self._add_custom_parameters(credentials)
-        super().validate_credentials(model, credentials)
+        compatible_credentials = self._get_compatible_credentials(credentials)
+        super().validate_credentials(model, compatible_credentials)

-    @staticmethod
-    def _add_custom_parameters(credentials: dict) -> None:
-        credentials["endpoint_url"] = str(URL(credentials["endpoint_url"]) / "v1-openai")
+    def _get_compatible_credentials(self, credentials: dict) -> dict:
+        credentials = credentials.copy()
+        base_url = credentials["endpoint_url"].rstrip("/").removesuffix("/v1-openai")
+        credentials["endpoint_url"] = f"{base_url}/v1-openai"
+        return credentials
--- a/api/core/model_runtime/model_providers/gpustack/tts/init.py
+++ b/api/core/model_runtime/model_providers/gpustack/tts/init.py
--- a/api/core/model_runtime/model_providers/gpustack/tts/tts.py
+++ b/api/core/model_runtime/model_providers/gpustack/tts/tts.py
@@ -0,0 +1,57 @@
+from typing import Any, Optional
+
+from core.model_runtime.model_providers.openai_api_compatible.tts.tts import OAICompatText2SpeechModel
+
+
+class GPUStackText2SpeechModel(OAICompatText2SpeechModel):
+    """
+    Model class for GPUStack Text to Speech model.
+    """
+
+    def _invoke(
+        self, model: str, tenant_id: str, credentials: dict, content_text: str, voice: str, user: Optional[str] = None
+    ) -> Any:
+        """
+        Invoke text2speech model
+
+        :param model: model name
+        :param tenant_id: user tenant id
+        :param credentials: model credentials
+        :param content_text: text content to be translated
+        :param voice: model timbre
+        :param user: unique user id
+        :return: text translated to audio file
+        """
+        compatible_credentials = self._get_compatible_credentials(credentials)
+        return super()._invoke(
+            model=model,
+            tenant_id=tenant_id,
+            credentials=compatible_credentials,
+            content_text=content_text,
+            voice=voice,
+            user=user,
+        )
+
+    def validate_credentials(self, model: str, credentials: dict, user: Optional[str] = None) -> None:
+        """
+        Validate model credentials
+
+        :param model: model name
+        :param credentials: model credentials
+        :param user: unique user id
+        """
+        compatible_credentials = self._get_compatible_credentials(credentials)
+        super().validate_credentials(model, compatible_credentials)
+
+    def _get_compatible_credentials(self, credentials: dict) -> dict:
+        """
+        Get compatible credentials
+
+        :param credentials: model credentials
+        :return: compatible credentials
+        """
+        compatible_credentials = credentials.copy()
+        base_url = credentials["endpoint_url"].rstrip("/").removesuffix("/v1-openai")
+        compatible_credentials["endpoint_url"] = f"{base_url}/v1-openai"
+
+        return compatible_credentials
--- a/api/core/model_runtime/model_providers/openai_api_compatible/llm/llm.py
+++ b/api/core/model_runtime/model_providers/openai_api_compatible/llm/llm.py
@@ -377,7 +377,10 @@ class OAIAPICompatLargeLanguageModel(_CommonOaiApiCompat, LargeLanguageModel):
                for tool in tools:
                    formatted_tools.append(helper.dump_model(PromptMessageFunction(function=tool)))

-                data["tools"] = formatted_tools
+                if prompt_messages[-1].role.value == "tool":
+                    data["tools"] = None
+                else:
+                    data["tools"] = formatted_tools

        if stop:
            data["stop"] = stop
--- a/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/_position.yaml
@@ -7,6 +7,8 @@
 - Qwen/Qwen2.5-Coder-7B-Instruct
 - Qwen/Qwen2-VL-72B-Instruct
 - Qwen/Qwen2-1.5B-Instruct
+- Qwen/Qwen2.5-72B-Instruct-128K
+- Vendor-A/Qwen/Qwen2.5-72B-Instruct
 - Pro/Qwen/Qwen2-VL-7B-Instruct
 - OpenGVLab/InternVL2-26B
 - Pro/OpenGVLab/InternVL2-8B
--- a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-72b-instruct-128k.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-72b-instruct-128k.yaml
@@ -0,0 +1,51 @@
+model: Qwen/Qwen2.5-72B-Instruct-128K
+label:
+  en_US: Qwen/Qwen2.5-72B-Instruct-128K
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 512
+    min: 1
+    max: 4096
+    help:
+      zh_Hans: 指定生成结果长度的上限。如果生成结果截断，可以调大该参数。
+      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: frequency_penalty
+    use_template: frequency_penalty
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
+pricing:
+  input: '4.13'
+  output: '4.13'
+  unit: '0.000001'
+  currency: RMB
--- a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-72b-instruct-vendorA.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-72b-instruct-vendorA.yaml
@@ -0,0 +1,51 @@
+model: Vendor-A/Qwen/Qwen2.5-72B-Instruct
+label:
+  en_US: Vendor-A/Qwen/Qwen2.5-72B-Instruct
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 512
+    min: 1
+    max: 4096
+    help:
+      zh_Hans: 指定生成结果长度的上限。如果生成结果截断，可以调大该参数。
+      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: frequency_penalty
+    use_template: frequency_penalty
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
+pricing:
+  input: '1.00'
+  output: '1.00'
+  unit: '0.000001'
+  currency: RMB
--- a/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-72b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/llm/qwen2.5-72b-instruct.yaml
@@ -15,7 +15,7 @@ parameter_rules:
    type: int
    default: 512
    min: 1
-    max: 8192
+    max: 4096
    help:
      zh_Hans: 指定生成结果长度的上限。如果生成结果截断，可以调大该参数。
      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
--- a/api/core/model_runtime/model_providers/siliconflow/tts/fish-speech-1.5.yaml
+++ b/api/core/model_runtime/model_providers/siliconflow/tts/fish-speech-1.5.yaml
@@ -0,0 +1,37 @@
+model: fishaudio/fish-speech-1.5
+model_type: tts
+model_properties:
+  default_voice: 'fishaudio/fish-speech-1.5:alex'
+  voices:
+    - mode: "fishaudio/fish-speech-1.5:alex"
+      name: "Alex（男声）"
+      language: [ "zh-Hans", "en-US" ]
+    - mode: "fishaudio/fish-speech-1.5:benjamin"
+      name: "Benjamin（男声）"
+      language: [ "zh-Hans", "en-US" ]
+    - mode: "fishaudio/fish-speech-1.5:charles"
+      name: "Charles（男声）"
+      language: [ "zh-Hans", "en-US" ]
+    - mode: "fishaudio/fish-speech-1.5:david"
+      name: "David（男声）"
+      language: [ "zh-Hans", "en-US" ]
+    - mode: "fishaudio/fish-speech-1.5:anna"
+      name: "Anna（女声）"
+      language: [ "zh-Hans", "en-US" ]
+    - mode: "fishaudio/fish-speech-1.5:bella"
+      name: "Bella（女声）"
+      language: [ "zh-Hans", "en-US" ]
+    - mode: "fishaudio/fish-speech-1.5:claire"
+      name: "Claire（女声）"
+      language: [ "zh-Hans", "en-US" ]
+    - mode: "fishaudio/fish-speech-1.5:diana"
+      name: "Diana（女声）"
+      language: [ "zh-Hans", "en-US" ]
+  audio_type: 'mp3'
+  max_workers: 5
+  # stream: false
+pricing:
+  input: '0.015'
+  output: '0'
+  unit: '0.001'
+  currency: RMB
--- a/api/core/workflow/nodes/document_extractor/node.py
+++ b/api/core/workflow/nodes/document_extractor/node.py
@@ -2,6 +2,7 @@ import csv
 import io
 import json
 import logging
+import operator
 import os
 import tempfile
 from typing import cast
@@ -10,6 +11,8 @@ import docx
 import pandas as pd
 import pypdfium2  # type: ignore
 import yaml  # type: ignore
+from docx.table import Table
+from docx.text.paragraph import Paragraph

 from configs import dify_config
 from core.file import File, FileTransferMethod, file_manager
@@ -189,35 +192,56 @@ def _extract_text_from_doc(file_content: bytes) -> str:
        doc_file = io.BytesIO(file_content)
        doc = docx.Document(doc_file)
        text = []
-        # Process paragraphs
-        for paragraph in doc.paragraphs:
-            if paragraph.text.strip():
-                text.append(paragraph.text)

-        # Process tables
-        for table in doc.tables:
-            # Table header
-            try:
-                # table maybe cause errors so ignore it.
-                if len(table.rows) > 0 and table.rows[0].cells is not None:
+        # Keep track of paragraph and table positions
+        content_items: list[tuple[int, str, Table | Paragraph]] = []
+
+        # Process paragraphs and tables
+        for i, paragraph in enumerate(doc.paragraphs):
+            if paragraph.text.strip():
+                content_items.append((i, "paragraph", paragraph))
+
+        for i, table in enumerate(doc.tables):
+            content_items.append((i, "table", table))
+
+        # Sort content items based on their original position
+        content_items.sort(key=operator.itemgetter(0))
+
+        # Process sorted content
+        for _, item_type, item in content_items:
+            if item_type == "paragraph":
+                if isinstance(item, Table):
+                    continue
+                text.append(item.text)
+            elif item_type == "table":
+                # Process tables
+                if not isinstance(item, Table):
+                    continue
+                try:
                    # Check if any cell in the table has text
                    has_content = False
-                    for row in table.rows:
+                    for row in item.rows:
                        if any(cell.text.strip() for cell in row.cells):
                            has_content = True
                            break

                    if has_content:
-                        markdown_table = "| " + " | ".join(cell.text for cell in table.rows[0].cells) + " |\n"
-                        markdown_table += "| " + " | ".join(["---"] * len(table.rows[0].cells)) + " |\n"
-                        for row in table.rows[1:]:
-                            markdown_table += "| " + " | ".join(cell.text for cell in row.cells) + " |\n"
+                        cell_texts = [cell.text.replace("\n", "<br>") for cell in item.rows[0].cells]
+                        markdown_table = f"| {' | '.join(cell_texts)} |\n"
+                        markdown_table += f"| {' | '.join(['---'] * len(item.rows[0].cells))} |\n"
+
+                        for row in item.rows[1:]:
+                            # Replace newlines with <br> in each cell
+                            row_cells = [cell.text.replace("\n", "<br>") for cell in row.cells]
+                            markdown_table += "| " + " | ".join(row_cells) + " |\n"
+
                        text.append(markdown_table)
-            except Exception as e:
-                logger.warning(f"Failed to extract table from DOC/DOCX: {e}")
-                continue
+                except Exception as e:
+                    logger.warning(f"Failed to extract table from DOC/DOCX: {e}")
+                    continue

        return "\n".join(text)
+
    except Exception as e:
        raise TextExtractionError(f"Failed to extract text from DOC/DOCX: {str(e)}") from e

--- a/api/docker/entrypoint.sh
+++ b/api/docker/entrypoint.sh
@@ -33,6 +33,7 @@ else
      --bind "${DIFY_BIND_ADDRESS:-0.0.0.0}:${DIFY_PORT:-5001}" \
      --workers ${SERVER_WORKER_AMOUNT:-1} \
      --worker-class ${SERVER_WORKER_CLASS:-gevent} \
+      --worker-connections ${SERVER_WORKER_CONNECTIONS:-10} \
      --timeout ${GUNICORN_TIMEOUT:-200} \
      app:app
  fi
--- a/api/services/billing_service.py
+++ b/api/services/billing_service.py
@@ -1,5 +1,5 @@
 import os
-from typing import Optional
+from typing import Literal, Optional

 import httpx
 from tenacity import retry, retry_if_exception_type, stop_before_delay, wait_fixed
@@ -17,7 +17,6 @@ class BillingService:
        params = {"tenant_id": tenant_id}

        billing_info = cls._send_request("GET", "/subscription/info", params=params)
-
        return billing_info

    @classmethod
@@ -47,12 +46,13 @@ class BillingService:
        retry=retry_if_exception_type(httpx.RequestError),
        reraise=True,
    )
-    def _send_request(cls, method, endpoint, json=None, params=None):
+    def _send_request(cls, method: Literal["GET", "POST", "DELETE"], endpoint: str, json=None, params=None):
        headers = {"Content-Type": "application/json", "Billing-Api-Secret-Key": cls.secret_key}

        url = f"{cls.base_url}{endpoint}"
        response = httpx.request(method, url, json=json, params=params, headers=headers)
-
+        if method == "GET" and response.status_code != httpx.codes.OK:
+            raise ValueError("Unable to retrieve billing information. Please try again later or contact support.")
        return response.json()

    @staticmethod
--- a/api/tests/integration_tests/model_runtime/gpustack/test_speech2text.py
+++ b/api/tests/integration_tests/model_runtime/gpustack/test_speech2text.py
@@ -0,0 +1,55 @@
+import os
+from pathlib import Path
+
+import pytest
+
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.gpustack.speech2text.speech2text import GPUStackSpeech2TextModel
+
+
+def test_validate_credentials():
+    model = GPUStackSpeech2TextModel()
+
+    with pytest.raises(CredentialsValidateFailedError):
+        model.validate_credentials(
+            model="faster-whisper-medium",
+            credentials={
+                "endpoint_url": "invalid_url",
+                "api_key": "invalid_api_key",
+            },
+        )
+
+    model.validate_credentials(
+        model="faster-whisper-medium",
+        credentials={
+            "endpoint_url": os.environ.get("GPUSTACK_SERVER_URL"),
+            "api_key": os.environ.get("GPUSTACK_API_KEY"),
+        },
+    )
+
+
+def test_invoke_model():
+    model = GPUStackSpeech2TextModel()
+
+    # Get the directory of the current file
+    current_dir = os.path.dirname(os.path.abspath(__file__))
+
+    # Get assets directory
+    assets_dir = os.path.join(os.path.dirname(current_dir), "assets")
+
+    # Construct the path to the audio file
+    audio_file_path = os.path.join(assets_dir, "audio.mp3")
+
+    file = Path(audio_file_path).read_bytes()
+
+    result = model.invoke(
+        model="faster-whisper-medium",
+        credentials={
+            "endpoint_url": os.environ.get("GPUSTACK_SERVER_URL"),
+            "api_key": os.environ.get("GPUSTACK_API_KEY"),
+        },
+        file=file,
+    )
+
+    assert isinstance(result, str)
+    assert result == "1, 2, 3, 4, 5, 6, 7, 8, 9, 10"
--- a/api/tests/integration_tests/model_runtime/gpustack/test_tts.py
+++ b/api/tests/integration_tests/model_runtime/gpustack/test_tts.py
@@ -0,0 +1,24 @@
+import os
+
+from core.model_runtime.model_providers.gpustack.tts.tts import GPUStackText2SpeechModel
+
+
+def test_invoke_model():
+    model = GPUStackText2SpeechModel()
+
+    result = model.invoke(
+        model="cosyvoice-300m-sft",
+        tenant_id="test",
+        credentials={
+            "endpoint_url": os.environ.get("GPUSTACK_SERVER_URL"),
+            "api_key": os.environ.get("GPUSTACK_API_KEY"),
+        },
+        content_text="Hello world",
+        voice="Chinese Female",
+    )
+
+    content = b""
+    for chunk in result:
+        content += chunk
+
+    assert content != b""
--- a/docker-legacy/docker-compose.yaml
+++ b/docker-legacy/docker-compose.yaml
@@ -2,7 +2,7 @@ version: '3'
 services:
  # API service
  api:
-    image: langgenius/dify-api:0.14.2
+    image: langgenius/dify-api:0.15.0
    restart: always
    environment:
      # Startup mode, 'api' starts the API server.
@@ -227,7 +227,7 @@ services:
  # worker service
  # The Celery worker for processing the queue.
  worker:
-    image: langgenius/dify-api:0.14.2
+    image: langgenius/dify-api:0.15.0
    restart: always
    environment:
      CONSOLE_WEB_URL: ''
@@ -397,7 +397,7 @@ services:

  # Frontend web application.
  web:
-    image: langgenius/dify-web:0.14.2
+    image: langgenius/dify-web:0.15.0
    restart: always
    environment:
      # The base URL of console application api server, refers to the Console base URL of WEB service if console domain is
--- a/docker/.env.example
+++ b/docker/.env.example
@@ -126,10 +126,13 @@ DIFY_PORT=5001
 # The number of API server workers, i.e., the number of workers.
 # Formula: number of cpu cores x 2 + 1 for sync, 1 for Gevent
 # Reference: https://docs.gunicorn.org/en/stable/design.html#how-many-workers
-SERVER_WORKER_AMOUNT=
+SERVER_WORKER_AMOUNT=1

 # Defaults to gevent. If using windows, it can be switched to sync or solo.
-SERVER_WORKER_CLASS=
+SERVER_WORKER_CLASS=gevent
+
+# Default number of worker connections, the default is 10.
+SERVER_WORKER_CONNECTIONS=10

 # Similar to SERVER_WORKER_CLASS.
 # If using windows, it can be switched to sync or solo.
@@ -926,3 +929,5 @@ CREATE_TIDB_SERVICE_JOB_ENABLED=false
 # Maximum number of submitted thread count in a ThreadPool for parallel node execution
 MAX_SUBMIT_COUNT=100

+# The maximum number of top-k value for RAG.
+TOP_K_MAX_VALUE=10
--- a/docker/docker-compose-template.yaml
+++ b/docker/docker-compose-template.yaml
@@ -2,7 +2,7 @@ x-shared-env: &shared-api-worker-env
 services:
  # API service
  api:
-    image: langgenius/dify-api:0.14.2
+    image: langgenius/dify-api:0.15.0
    restart: always
    environment:
      # Use the shared environment variables.
@@ -25,7 +25,7 @@ services:
  # worker service
  # The Celery worker for processing the queue.
  worker:
-    image: langgenius/dify-api:0.14.2
+    image: langgenius/dify-api:0.15.0
    restart: always
    environment:
      # Use the shared environment variables.
@@ -47,7 +47,7 @@ services:

  # Frontend web application.
  web:
-    image: langgenius/dify-web:0.14.2
+    image: langgenius/dify-web:0.15.0
    restart: always
    environment:
      CONSOLE_API_URL: ${CONSOLE_API_URL:-}
--- a/docker/docker-compose.yaml
+++ b/docker/docker-compose.yaml
@@ -32,8 +32,9 @@ x-shared-env: &shared-api-worker-env
  APP_MAX_EXECUTION_TIME: ${APP_MAX_EXECUTION_TIME:-1200}
  DIFY_BIND_ADDRESS: ${DIFY_BIND_ADDRESS:-0.0.0.0}
  DIFY_PORT: ${DIFY_PORT:-5001}
-  SERVER_WORKER_AMOUNT: ${SERVER_WORKER_AMOUNT:-}
-  SERVER_WORKER_CLASS: ${SERVER_WORKER_CLASS:-}
+  SERVER_WORKER_AMOUNT: ${SERVER_WORKER_AMOUNT:-1}
+  SERVER_WORKER_CLASS: ${SERVER_WORKER_CLASS:-gevent}
+  SERVER_WORKER_CONNECTIONS: ${SERVER_WORKER_CONNECTIONS:-10}
  CELERY_WORKER_CLASS: ${CELERY_WORKER_CLASS:-}
  GUNICORN_TIMEOUT: ${GUNICORN_TIMEOUT:-360}
  CELERY_WORKER_AMOUNT: ${CELERY_WORKER_AMOUNT:-}
@@ -386,11 +387,12 @@ x-shared-env: &shared-api-worker-env
  CSP_WHITELIST: ${CSP_WHITELIST:-}
  CREATE_TIDB_SERVICE_JOB_ENABLED: ${CREATE_TIDB_SERVICE_JOB_ENABLED:-false}
  MAX_SUBMIT_COUNT: ${MAX_SUBMIT_COUNT:-100}
+  TOP_K_MAX_VALUE: ${TOP_K_MAX_VALUE:-10}

 services:
  # API service
  api:
-    image: langgenius/dify-api:0.14.2
+    image: langgenius/dify-api:0.15.0
    restart: always
    environment:
      # Use the shared environment variables.
@@ -413,7 +415,7 @@ services:
  # worker service
  # The Celery worker for processing the queue.
  worker:
-    image: langgenius/dify-api:0.14.2
+    image: langgenius/dify-api:0.15.0
    restart: always
    environment:
      # Use the shared environment variables.
@@ -435,7 +437,7 @@ services:

  # Frontend web application.
  web:
-    image: langgenius/dify-web:0.14.2
+    image: langgenius/dify-web:0.15.0
    restart: always
    environment:
      CONSOLE_API_URL: ${CONSOLE_API_URL:-}
--- a/web/app/components/app/configuration/config-prompt/prompt-editor-height-resize-wrap.tsx
+++ b/web/app/components/app/configuration/config-prompt/prompt-editor-height-resize-wrap.tsx
@@ -26,13 +26,15 @@ const PromptEditorHeightResizeWrap: FC<Props> = ({
  const [clientY, setClientY] = useState(0)
  const [isResizing, setIsResizing] = useState(false)
  const [prevUserSelectStyle, setPrevUserSelectStyle] = useState(getComputedStyle(document.body).userSelect)
+  const [oldHeight, setOldHeight] = useState(height)

  const handleStartResize = useCallback((e: React.MouseEvent<HTMLElement>) => {
    setClientY(e.clientY)
    setIsResizing(true)
+    setOldHeight(height)
    setPrevUserSelectStyle(getComputedStyle(document.body).userSelect)
    document.body.style.userSelect = 'none'
-  }, [])
+  }, [height])

  const handleStopResize = useCallback(() => {
    setIsResizing(false)
@@ -44,8 +46,7 @@ const PromptEditorHeightResizeWrap: FC<Props> = ({
      return

    const offset = e.clientY - clientY
-    let newHeight = height + offset
-    setClientY(e.clientY)
+    let newHeight = oldHeight + offset
    if (newHeight < minHeight)
      newHeight = minHeight
    onHeightChange(newHeight)
--- a/web/package.json
+++ b/web/package.json
@@ -1,6 +1,6 @@
 {
  "name": "dify-web",
-  "version": "0.14.2",
+  "version": "0.15.0",
  "private": true,
  "engines": {
    "node": ">=18.17.0"
Author	SHA1	Message	Date
-LAN-	c236f05f4b	chore: bump version to 0.15.0 (#12297 ) Signed-off-by: -LAN- <laipz8200@outlook.com>	2025-01-07 18:05:14 +08:00
-LAN-	0eeacdc80c	refactor: enhance API token validation with session locking and last used timestamp update (#12426 ) Signed-off-by: -LAN- <laipz8200@outlook.com>	2025-01-07 18:04:41 +08:00
hisir	41f39bf3fc	Fix newline characters in tables during document parsing (#12112 ) Co-authored-by: hisir <admin@qq.com>	2025-01-07 17:26:24 +08:00
呆萌闷油瓶	9677144015	fix:deepseek tool call not working correctly (#12437 )	2025-01-07 17:25:38 +08:00
SiliconFlow, Inc	15797c556f	add fish-speech-1.5 from siliconflow (#12425 )	2025-01-07 15:27:34 +08:00
-LAN-	acacf35a2a	chore(docker/.env.example): Add TOP_K_MAX_VALUE to the `.env.example`… (#12422 ) Signed-off-by: -LAN- <laipz8200@outlook.com>	2025-01-07 14:51:16 +08:00
-LAN-	d3f5b1cbb6	refactor: use tiktoken for token calculation (#12416 ) Signed-off-by: -LAN- <laipz8200@outlook.com>	2025-01-07 13:32:30 +08:00
why	196ed8101b	fix: [PromptEditorHeightResizeWrap] Bug #12410 (#12406 )	2025-01-07 12:21:54 +08:00
SiliconFlow, Inc	dc650c5368	Fixes #12414 : Add cheaper model and long context model for Qwen2.5-72B-Instruct from siliconflow (#12415 )	2025-01-07 11:28:24 +08:00
Alex Chen	2bb521b135	Support TTS and Speech2Text for Model Provider GPUStack (#12381 )	2025-01-07 09:42:11 +08:00