bump version to 0.5.10 (#2902 )

Feat/add-NVIDIA-as-a-new-model-provider (#2900 )
fix: Ignore some emtpy page_content when append to split_documents (#2898 )
2026-01-20 14:04:17 +00:00 · 2024-03-19 21:35:58 +08:00 · 2024-03-19 21:08:17 +08:00 · 2024-03-19 20:55:15 +08:00 · 2024-03-19 20:54:31 +08:00 · 2024-03-19 18:34:23 +08:00
167 changed files with 8296 additions and 516 deletions
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -12,6 +12,8 @@ Please delete options that are not relevant.
 - [ ] New feature (non-breaking change which adds functionality)
 - [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected)
 - [ ] This change requires a documentation update, included: [Dify Document](https://github.com/langgenius/dify-docs)
+- [ ] Improvement，including but not limited to code refactoring, performance optimization, and UI/UX improvement
+- [ ] Dependency upgrade

 # How Has This Been Tested?

--- a/.github/workflows/build-api-image.yml
+++ b/.github/workflows/build-api-image.yml
@@ -1,17 +1,32 @@
-name: Build and Push API Image
+name: Build and Push API & Web

 on:
  push:
    branches:
-      - 'main'
-      - 'deploy/dev'
+      - "main"
+      - "deploy/dev"
  release:
-    types: [ published ]
+    types: [published]
+
+env:
+  DOCKERHUB_USER: ${{ secrets.DOCKERHUB_USER }}
+  DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }}
+  DIFY_WEB_IMAGE_NAME: ${{ vars.DIFY_WEB_IMAGE_NAME || 'langgenius/dify-web' }}
+  DIFY_API_IMAGE_NAME: ${{ vars.DIFY_API_IMAGE_NAME || 'langgenius/dify-api' }}

 jobs:
  build-and-push:
    runs-on: ubuntu-latest
    if: github.event.pull_request.draft == false
+    strategy:
+      matrix:
+        include:
+          - service_name: "web"
+            image_name_env: "DIFY_WEB_IMAGE_NAME"
+            context: "web"
+          - service_name: "api"
+            image_name_env: "DIFY_API_IMAGE_NAME"
+            context: "api"
    steps:
      - name: Set up QEMU
        uses: docker/setup-qemu-action@v3
@@ -22,14 +37,14 @@ jobs:
      - name: Login to Docker Hub
        uses: docker/login-action@v2
        with:
-          username: ${{ secrets.DOCKERHUB_USER }}
-          password: ${{ secrets.DOCKERHUB_TOKEN }}
+          username: ${{ env.DOCKERHUB_USER }}
+          password: ${{ env.DOCKERHUB_TOKEN }}

      - name: Extract metadata (tags, labels) for Docker
        id: meta
        uses: docker/metadata-action@v5
        with:
-          images: langgenius/dify-api
+          images: ${{ env[matrix.image_name_env] }}
          tags: |
            type=raw,value=latest,enable=${{ startsWith(github.ref, 'refs/tags/') }}
            type=ref,event=branch
@@ -39,22 +54,11 @@ jobs:
      - name: Build and push
        uses: docker/build-push-action@v5
        with:
-          context: "{{defaultContext}}:api"
+          context: "{{defaultContext}}:${{ matrix.context }}"
          platforms: ${{ startsWith(github.ref, 'refs/tags/') && 'linux/amd64,linux/arm64' || 'linux/amd64' }}
-          build-args: |
-            COMMIT_SHA=${{ fromJSON(steps.meta.outputs.json).labels['org.opencontainers.image.revision'] }}
+          build-args: COMMIT_SHA=${{ fromJSON(steps.meta.outputs.json).labels['org.opencontainers.image.revision'] }}
          push: true
          tags: ${{ steps.meta.outputs.tags }}
          labels: ${{ steps.meta.outputs.labels }}
          cache-from: type=gha
          cache-to: type=gha,mode=max
-
-      - name: Deploy to server
-        if: github.ref == 'refs/heads/deploy/dev'
-        uses: appleboy/ssh-action@v0.1.8
-        with:
-          host: ${{ secrets.SSH_HOST }}
-          username: ${{ secrets.SSH_USER }}
-          key: ${{ secrets.SSH_PRIVATE_KEY }}
-          script: |
-            ${{ secrets.SSH_SCRIPT }}
--- a/.github/workflows/build-web-image.yml
+++ b/.github/workflows/build-web-image.yml
@@ -1,60 +0,0 @@
-name: Build and Push WEB Image
-
-on:
-  push:
-    branches:
-      - 'main'
-      - 'deploy/dev'
-  release:
-    types: [ published ]
-
-jobs:
-  build-and-push:
-    runs-on: ubuntu-latest
-    if: github.event.pull_request.draft == false
-    steps:
-      - name: Set up QEMU
-        uses: docker/setup-qemu-action@v3
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-
-      - name: Login to Docker Hub
-        uses: docker/login-action@v2
-        with:
-          username: ${{ secrets.DOCKERHUB_USER }}
-          password: ${{ secrets.DOCKERHUB_TOKEN }}
-
-      - name: Extract metadata (tags, labels) for Docker
-        id: meta
-        uses: docker/metadata-action@v5
-        with:
-          images: langgenius/dify-web
-          tags: |
-            type=raw,value=latest,enable=${{ startsWith(github.ref, 'refs/tags/') }}
-            type=ref,event=branch
-            type=sha,enable=true,priority=100,prefix=,suffix=,format=long
-            type=raw,value=${{ github.ref_name }},enable=${{ startsWith(github.ref, 'refs/tags/') }}
-
-      - name: Build and push
-        uses: docker/build-push-action@v5
-        with:
-          context: "{{defaultContext}}:web"
-          platforms: ${{ startsWith(github.ref, 'refs/tags/') && 'linux/amd64,linux/arm64' || 'linux/amd64' }}
-          build-args: |
-            COMMIT_SHA=${{ fromJSON(steps.meta.outputs.json).labels['org.opencontainers.image.revision'] }}
-          push: true
-          tags: ${{ steps.meta.outputs.tags }}
-          labels: ${{ steps.meta.outputs.labels }}
-          cache-from: type=gha
-          cache-to: type=gha,mode=max
-
-      - name: Deploy to server
-        if: github.ref == 'refs/heads/deploy/dev'
-        uses: appleboy/ssh-action@v0.1.8
-        with:
-          host: ${{ secrets.SSH_HOST }}
-          username: ${{ secrets.SSH_USER }}
-          key: ${{ secrets.SSH_PRIVATE_KEY }}
-          script: |
-            ${{ secrets.SSH_SCRIPT }}
--- a/.github/workflows/deploy-dev.yml
+++ b/.github/workflows/deploy-dev.yml
@@ -0,0 +1,24 @@
+name: Deploy Dev
+
+on:
+  workflow_run:
+    workflows: ["Build and Push API & Web"]
+    branches:
+      - "deploy/dev"
+    types:
+      - completed
+
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    if: |
+      github.event.workflow_run.conclusion == 'success'
+    steps:
+      - name: Deploy to server
+        uses: appleboy/ssh-action@v0.1.8
+        with:
+          host: ${{ secrets.SSH_HOST }}
+          username: ${{ secrets.SSH_USER }}
+          key: ${{ secrets.SSH_PRIVATE_KEY }}
+          script: |
+            ${{ vars.SSH_SCRIPT || secrets.SSH_SCRIPT }}
--- a/.gitignore
+++ b/.gitignore
@@ -154,4 +154,5 @@ sdks/python-client/dist
 sdks/python-client/dify_client.egg-info

 .vscode/*
-!.vscode/launch.json
+!.vscode/launch.json
+pyrightconfig.json
--- a/43
+++ b/43
@@ -0,0 +1,43 @@
+# Variables
+DOCKER_REGISTRY=langgenius
+WEB_IMAGE=$(DOCKER_REGISTRY)/dify-web
+API_IMAGE=$(DOCKER_REGISTRY)/dify-api
+VERSION=latest
+
+# Build Docker images
+build-web:
+	@echo "Building web Docker image: $(WEB_IMAGE):$(VERSION)..."
+	docker build -t $(WEB_IMAGE):$(VERSION) ./web
+	@echo "Web Docker image built successfully: $(WEB_IMAGE):$(VERSION)"
+
+build-api:
+	@echo "Building API Docker image: $(API_IMAGE):$(VERSION)..."
+	docker build -t $(API_IMAGE):$(VERSION) ./api
+	@echo "API Docker image built successfully: $(API_IMAGE):$(VERSION)"
+
+# Push Docker images
+push-web:
+	@echo "Pushing web Docker image: $(WEB_IMAGE):$(VERSION)..."
+	docker push $(WEB_IMAGE):$(VERSION)
+	@echo "Web Docker image pushed successfully: $(WEB_IMAGE):$(VERSION)"
+
+push-api:
+	@echo "Pushing API Docker image: $(API_IMAGE):$(VERSION)..."
+	docker push $(API_IMAGE):$(VERSION)
+	@echo "API Docker image pushed successfully: $(API_IMAGE):$(VERSION)"
+
+# Build all images
+build-all: build-web build-api
+
+# Push all images
+push-all: push-web push-api
+
+build-push-api: build-api push-api
+build-push-web: build-web push-web
+
+# Build and push all images
+build-push-all: build-all push-all
+	@echo "All Docker images have been built and pushed."
+
+# Phony targets
+.PHONY: build-web build-api push-web push-api build-all push-all build-push-all
--- a/README.md
+++ b/README.md
@@ -22,8 +22,8 @@
 </p>

 <p align="center">
-   <a href="https://dify.ai/blog/dify-ai-unveils-ai-agent-creating-gpts-and-assistants-with-various-llms" target="_blank">
-   Dify.AI Unveils AI Agent: Creating GPTs and Assistants with Various LLMs
+   <a href="https://aws.amazon.com/marketplace/pp/prodview-t22mebxzwjhu6" target="_blank">
+   📌 Check out Dify Premium on AWS and deploy it to your own AWS VPC with one-click.
  </a>
 </p>

@@ -37,6 +37,9 @@

 You can try out [Dify.AI Cloud](https://dify.ai) now. It provides all the capabilities of the self-deployed version, and includes 200 free requests to OpenAI GPT-3.5.

+### Looking to purchase via AWS?
+Check out [Dify Premium on AWS](https://aws.amazon.com/marketplace/pp/prodview-t22mebxzwjhu6) and deploy it to your own AWS VPC with one-click. 
+
 ## Dify vs. LangChain vs. Assistants API

 | Feature | Dify.AI | Assistants API | LangChain |
--- a/api/config.py
+++ b/api/config.py
@@ -90,7 +90,7 @@ class Config:
        # ------------------------
        # General Configurations.
        # ------------------------
-        self.CURRENT_VERSION = "0.5.9"
+        self.CURRENT_VERSION = "0.5.10"
        self.COMMIT_SHA = get_env('COMMIT_SHA')
        self.EDITION = "SELF_HOSTED"
        self.DEPLOY_ENV = get_env('DEPLOY_ENV')
--- a/api/constants/languages.py
+++ b/api/constants/languages.py
@@ -2,7 +2,7 @@ import json

 from models.model import AppModelConfig

-languages = ['en-US', 'zh-Hans', 'pt-BR', 'es-ES', 'fr-FR', 'de-DE', 'ja-JP', 'ko-KR', 'ru-RU', 'it-IT', 'uk-UA']
+languages = ['en-US', 'zh-Hans', 'pt-BR', 'es-ES', 'fr-FR', 'de-DE', 'ja-JP', 'ko-KR', 'ru-RU', 'it-IT', 'uk-UA', 'vi-VN']

 language_timezone_mapping = {
    'en-US': 'America/New_York',
@@ -16,6 +16,7 @@ language_timezone_mapping = {
    'ru-RU': 'Europe/Moscow',
    'it-IT': 'Europe/Rome',
    'uk-UA': 'Europe/Kyiv',
+    'vi-VN': 'Asia/Ho_Chi_Minh',
 }


@@ -79,6 +80,16 @@ user_input_form_template = {
            }
        }
    ],
+     "vi-VN": [
+        {
+            "paragraph": {
+                "label": "Nội dung truy vấn",
+                "variable": "default_input",
+                "required": False,
+                "default": ""
+            }
+        }
+    ],
 }

 demo_model_templates = {
@@ -208,7 +219,6 @@ demo_model_templates = {
            )
        }
    ],
-
    'zh-Hans': [
        {
            'name': '翻译助手',
@@ -335,91 +345,92 @@ demo_model_templates = {
            )
        }
    ],
-    'uk-UA': [{
-        "name": "Помічник перекладу",
-        "icon": "",
-        "icon_background": "",
-        "description": "Багатомовний перекладач, який надає можливості перекладу різними мовами, перекладаючи введені користувачем дані на потрібну мову.",
-        "mode": "completion",
-        "model_config": AppModelConfig(
-            provider="openai",
-            model_id="gpt-3.5-turbo-instruct",
-            configs={
-                "prompt_template": "Будь ласка, перекладіть наступний текст на {{target_language}}:\n",
-                "prompt_variables": [
-                    {
-                        "key": "target_language",
-                        "name": "Цільова мова",
-                        "description": "Мова, на яку ви хочете перекласти.",
-                        "type": "select",
-                        "default": "Ukrainian",
-                        "options": [
-                            "Chinese",
-                            "English",
-                            "Japanese",
-                            "French",
-                            "Russian",
-                            "German",
-                            "Spanish",
-                            "Korean",
-                            "Italian",
-                        ],
+    'uk-UA': [
+        {
+            "name": "Помічник перекладу",
+            "icon": "",
+            "icon_background": "",
+            "description": "Багатомовний перекладач, який надає можливості перекладу різними мовами, перекладаючи введені користувачем дані на потрібну мову.",
+            "mode": "completion",
+            "model_config": AppModelConfig(
+                provider="openai",
+                model_id="gpt-3.5-turbo-instruct",
+                configs={
+                    "prompt_template": "Будь ласка, перекладіть наступний текст на {{target_language}}:\n",
+                    "prompt_variables": [
+                        {
+                            "key": "target_language",
+                            "name": "Цільова мова",
+                            "description": "Мова, на яку ви хочете перекласти.",
+                            "type": "select",
+                            "default": "Ukrainian",
+                            "options": [
+                                "Chinese",
+                                "English",
+                                "Japanese",
+                                "French",
+                                "Russian",
+                                "German",
+                                "Spanish",
+                                "Korean",
+                                "Italian",
+                            ],
+                        },
+                    ],
+                    "completion_params": {
+                        "max_token": 1000,
+                        "temperature": 0,
+                        "top_p": 0,
+                        "presence_penalty": 0.1,
+                        "frequency_penalty": 0.1,
                    },
-                ],
-                "completion_params": {
-                    "max_token": 1000,
-                    "temperature": 0,
-                    "top_p": 0,
-                    "presence_penalty": 0.1,
-                    "frequency_penalty": 0.1,
                },
-            },
-            opening_statement="",
-            suggested_questions=None,
-            pre_prompt="Будь ласка, перекладіть наступний текст на {{target_language}}:\n{{query}}\ntranslate:",
-            model=json.dumps({
-                "provider": "openai",
-                "name": "gpt-3.5-turbo-instruct",
-                "mode": "completion",
-                "completion_params": {
-                    "max_tokens": 1000,
-                    "temperature": 0,
-                    "top_p": 0,
-                    "presence_penalty": 0.1,
-                    "frequency_penalty": 0.1,
-                },
-            }),
-            user_input_form=json.dumps([
-                {
-                    "select": {
-                        "label": "Цільова мова",
-                        "variable": "target_language",
-                        "description": "Мова, на яку ви хочете перекласти.",
-                        "default": "Chinese",
-                        "required": True,
-                        'options': [
-                            'Chinese',
-                            'English',
-                            'Japanese',
-                            'French',
-                            'Russian',
-                            'German',
-                            'Spanish',
-                            'Korean',
-                            'Italian',
-                        ]
+                opening_statement="",
+                suggested_questions=None,
+                pre_prompt="Будь ласка, перекладіть наступний текст на {{target_language}}:\n{{query}}\ntranslate:",
+                model=json.dumps({
+                    "provider": "openai",
+                    "name": "gpt-3.5-turbo-instruct",
+                    "mode": "completion",
+                    "completion_params": {
+                        "max_tokens": 1000,
+                        "temperature": 0,
+                        "top_p": 0,
+                        "presence_penalty": 0.1,
+                        "frequency_penalty": 0.1,
+                    },
+                }),
+                user_input_form=json.dumps([
+                    {
+                        "select": {
+                            "label": "Цільова мова",
+                            "variable": "target_language",
+                            "description": "Мова, на яку ви хочете перекласти.",
+                            "default": "Chinese",
+                            "required": True,
+                            'options': [
+                                'Chinese',
+                                'English',
+                                'Japanese',
+                                'French',
+                                'Russian',
+                                'German',
+                                'Spanish',
+                                'Korean',
+                                'Italian',
+                            ]
+                        }
+                    }, {
+                        "paragraph": {
+                            "label": "Запит",
+                            "variable": "query",
+                            "required": True,
+                            "default": ""
+                        }
                    }
-                }, {
-                    "paragraph": {
-                        "label": "Запит",
-                        "variable": "query",
-                        "required": True,
-                        "default": ""
-                    }
-                }
-            ])
-        )
-    },
+                ])
+            )
+        },
        {
            "name": "AI інтерв’юер фронтенду",
            "icon": "",
@@ -460,5 +471,132 @@ demo_model_templates = {
            ),
        }
    ],
-
+    'vi-VN': [
+        {
+            'name': 'Trợ lý dịch thuật',
+            'icon': '',
+            'icon_background': '',
+            'description': 'Trình dịch đa ngôn ngữ cung cấp khả năng dịch bằng nhiều ngôn ngữ, dịch thông tin đầu vào của người dùng sang ngôn ngữ họ cần.',
+            'mode': 'completion',
+            'model_config': AppModelConfig(
+                provider='openai',
+                model_id='gpt-3.5-turbo-instruct',
+                configs={
+                    'prompt_template': "Hãy dịch đoạn văn bản sau sang ngôn ngữ {{target_language}}:\n",
+                    'prompt_variables': [
+                        {
+                            "key": "target_language",
+                            "name": "Ngôn ngữ đích",
+                            "description": "Ngôn ngữ bạn muốn dịch sang.",
+                            "type": "select",
+                            "default": "Vietnamese",
+                            'options': [
+                                'Chinese',
+                                'English',
+                                'Japanese',
+                                'French',
+                                'Russian',
+                                'German',
+                                'Spanish',
+                                'Korean',
+                                'Italian',
+                                'Vietnamese',
+                            ]
+                        }
+                    ],
+                    'completion_params': {
+                        'max_token': 1000,
+                        'temperature': 0,
+                        'top_p': 0,
+                        'presence_penalty': 0.1,
+                        'frequency_penalty': 0.1,
+                    }
+                },
+                opening_statement='',
+                suggested_questions=None,
+                pre_prompt="Hãy dịch đoạn văn bản sau sang {{target_language}}:\n{{query}}\ndịch:",
+                model=json.dumps({
+                    "provider": "openai",
+                    "name": "gpt-3.5-turbo-instruct",
+                    "mode": "completion",
+                    "completion_params": {
+                        "max_tokens": 1000,
+                        "temperature": 0,
+                        "top_p": 0,
+                        "presence_penalty": 0.1,
+                        "frequency_penalty": 0.1
+                    }
+                }),
+                user_input_form=json.dumps([
+                    {
+                        "select": {
+                            "label": "Ngôn ngữ đích",
+                            "variable": "target_language",
+                            "description": "Ngôn ngữ bạn muốn dịch sang.",
+                            "default": "Vietnamese",
+                            "required": True,
+                            'options': [
+                                'Chinese',
+                                'English',
+                                'Japanese',
+                                'French',
+                                'Russian',
+                                'German',
+                                'Spanish',
+                                'Korean',
+                                'Italian',
+                                'Vietnamese',
+                            ]
+                        }
+                    }, {
+                        "paragraph": {
+                            "label": "Query",
+                            "variable": "query",
+                            "required": True,
+                            "default": ""
+                        }
+                    }
+                ])
+            )
+        },
+        {
+            'name': 'Phỏng vấn front-end AI',
+            'icon': '',
+            'icon_background': '',
+            'description': 'Một người phỏng vấn front-end mô phỏng để kiểm tra mức độ kỹ năng phát triển front-end thông qua việc đặt câu hỏi.',
+            'mode': 'chat',
+            'model_config': AppModelConfig(
+                provider='openai',
+                model_id='gpt-3.5-turbo',
+                configs={
+                    'introduction': 'Xin chào, chào mừng đến với cuộc phỏng vấn của chúng tôi. Tôi là người phỏng vấn cho công ty công nghệ này và tôi sẽ kiểm tra kỹ năng phát triển web front-end của bạn. Tiếp theo, tôi sẽ hỏi bạn một số câu hỏi kỹ thuật. Hãy trả lời chúng càng kỹ lưỡng càng tốt. ',
+                    'prompt_template': "Bạn sẽ đóng vai người phỏng vấn cho một công ty công nghệ, kiểm tra kỹ năng phát triển web front-end của người dùng và đặt ra 5-10 câu hỏi kỹ thuật sắc bén.\n\nXin lưu ý:\n- Mỗi lần chỉ hỏi một câu hỏi.\n - Sau khi người dùng trả lời một câu hỏi, hãy hỏi trực tiếp câu hỏi tiếp theo mà không cố gắng sửa bất kỳ lỗi nào mà thí sinh mắc phải.\n- Nếu bạn cho rằng người dùng đã không trả lời đúng cho một số câu hỏi liên tiếp, hãy hỏi ít câu hỏi hơn.\n- Sau đặt câu hỏi cuối cùng, bạn có thể hỏi câu hỏi này: Tại sao bạn lại rời bỏ công việc cuối cùng của mình? Sau khi người dùng trả lời câu hỏi này, vui lòng bày tỏ sự hiểu biết và ủng hộ của bạn.\n",
+                    'prompt_variables': [],
+                    'completion_params': {
+                        'max_token': 300,
+                        'temperature': 0.8,
+                        'top_p': 0.9,
+                        'presence_penalty': 0.1,
+                        'frequency_penalty': 0.1,
+                    }
+                },
+                opening_statement='Xin chào, chào mừng đến với cuộc phỏng vấn của chúng tôi. Tôi là người phỏng vấn cho công ty công nghệ này và tôi sẽ kiểm tra kỹ năng phát triển web front-end của bạn. Tiếp theo, tôi sẽ hỏi bạn một số câu hỏi kỹ thuật. Hãy trả lời chúng càng kỹ lưỡng càng tốt. ',
+                suggested_questions=None,
+                pre_prompt="Bạn sẽ đóng vai người phỏng vấn cho một công ty công nghệ, kiểm tra kỹ năng phát triển web front-end của người dùng và đặt ra 5-10 câu hỏi kỹ thuật sắc bén.\n\nXin lưu ý:\n- Mỗi lần chỉ hỏi một câu hỏi.\n - Sau khi người dùng trả lời một câu hỏi, hãy hỏi trực tiếp câu hỏi tiếp theo mà không cố gắng sửa bất kỳ lỗi nào mà thí sinh mắc phải.\n- Nếu bạn cho rằng người dùng đã không trả lời đúng cho một số câu hỏi liên tiếp, hãy hỏi ít câu hỏi hơn.\n- Sau đặt câu hỏi cuối cùng, bạn có thể hỏi câu hỏi này: Tại sao bạn lại rời bỏ công việc cuối cùng của mình? Sau khi người dùng trả lời câu hỏi này, vui lòng bày tỏ sự hiểu biết và ủng hộ của bạn.\n",
+                model=json.dumps({
+                    "provider": "openai",
+                    "name": "gpt-3.5-turbo",
+                    "mode": "chat",
+                    "completion_params": {
+                        "max_tokens": 300,
+                        "temperature": 0.8,
+                        "top_p": 0.9,
+                        "presence_penalty": 0.1,
+                        "frequency_penalty": 0.1
+                    }
+                }),
+                user_input_form=None
+            )
+        }
+    ],
 }
--- a/api/controllers/console/app/app.py
+++ b/api/controllers/console/app/app.py
@@ -245,6 +245,8 @@ class AppApi(Resource):
        agent_mode = model_config.agent_mode_dict
        # decrypt agent tool parameters if it's secret-input
        for tool in agent_mode.get('tools') or []:
+            if not isinstance(tool, dict) or len(tool.keys()) <= 3:
+                continue
            agent_tool_entity = AgentToolEntity(**tool)
            # get tool
            try:
--- a/api/controllers/console/app/model_config.py
+++ b/api/controllers/console/app/model_config.py
@@ -52,6 +52,9 @@ class ModelConfigResource(Resource):
        masked_parameter_map = {}
        tool_map = {}
        for tool in agent_mode.get('tools') or []:
+            if not isinstance(tool, dict) or len(tool.keys()) <= 3:
+                continue
+            
            agent_tool_entity = AgentToolEntity(**tool)
            # get tool
            try:
--- a/api/controllers/service_api/app/audio.py
+++ b/api/controllers/service_api/app/audio.py
@@ -44,7 +44,7 @@ class AudioApi(Resource):
            response = AudioService.transcript_asr(
                tenant_id=app_model.tenant_id,
                file=file,
-                end_user=end_user
+                end_user=end_user.get_id()
            )

            return response
@@ -75,7 +75,7 @@ class AudioApi(Resource):


 class TextApi(Resource):
-    @validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.JSON, required=True))
+    @validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.JSON))
    def post(self, app_model: App, end_user: EndUser):
        parser = reqparse.RequestParser()
        parser.add_argument('text', type=str, required=True, nullable=False, location='json')
@@ -86,8 +86,8 @@ class TextApi(Resource):
            response = AudioService.transcript_tts(
                tenant_id=app_model.tenant_id,
                text=args['text'],
-                end_user=end_user,
-                voice=args['voice'] if args['voice'] else app_model.app_model_config.text_to_speech_dict.get('voice'),
+                end_user=end_user.get_id(),
+                voice=app_model.app_model_config.text_to_speech_dict.get('voice'),
                streaming=args['streaming']
            )

--- a/api/core/application_manager.py
+++ b/api/core/application_manager.py
@@ -35,7 +35,7 @@ from core.errors.error import ModelCurrentlyNotSupportError, ProviderTokenNotIni
 from core.file.file_obj import FileObj
 from core.model_runtime.entities.message_entities import PromptMessageRole
 from core.model_runtime.entities.model_entities import ModelType
-from core.model_runtime.errors.invoke import InvokeAuthorizationError, InvokeError
+from core.model_runtime.errors.invoke import InvokeAuthorizationError
 from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
 from core.prompt.prompt_template import PromptTemplateParser
 from core.provider_manager import ProviderManager
@@ -195,8 +195,6 @@ class ApplicationManager:
            except ValidationError as e:
                logger.exception("Validation Error when generating")
                queue_manager.publish_error(e, PublishFrom.APPLICATION_MANAGER)
-            except (ValueError, InvokeError) as e:
-                queue_manager.publish_error(e, PublishFrom.APPLICATION_MANAGER)
            except Exception as e:
                logger.exception("Unknown Error when generating")
                queue_manager.publish_error(e, PublishFrom.APPLICATION_MANAGER)
--- a/api/core/extension/extensible.py
+++ b/api/core/extension/extensible.py
@@ -3,11 +3,12 @@ import importlib.util
 import json
 import logging
 import os
-from collections import OrderedDict
 from typing import Any, Optional

 from pydantic import BaseModel

+from core.utils.position_helper import sort_to_dict_by_position_map
+

 class ExtensionModule(enum.Enum):
    MODERATION = 'moderation'
@@ -36,7 +37,8 @@ class Extensible:

    @classmethod
    def scan_extensions(cls):
-        extensions = {}
+        extensions: list[ModuleExtension] = []
+        position_map = {}

        # get the path of the current class
        current_path = os.path.abspath(cls.__module__.replace(".", os.path.sep) + '.py')
@@ -63,6 +65,7 @@ class Extensible:
                    if os.path.exists(builtin_file_path):
                        with open(builtin_file_path, encoding='utf-8') as f:
                            position = int(f.read().strip())
+                position_map[extension_name] = position

                if (extension_name + '.py') not in file_names:
                    logging.warning(f"Missing {extension_name}.py file in {subdir_path}, Skip.")
@@ -96,16 +99,15 @@ class Extensible:
                        with open(json_path, encoding='utf-8') as f:
                            json_data = json.load(f)

-                extensions[extension_name] = ModuleExtension(
+                extensions.append(ModuleExtension(
                    extension_class=extension_class,
                    name=extension_name,
                    label=json_data.get('label'),
                    form_schema=json_data.get('form_schema'),
                    builtin=builtin,
                    position=position
-                )
+                ))

-        sorted_items = sorted(extensions.items(), key=lambda x: (x[1].position is None, x[1].position))
-        sorted_extensions = OrderedDict(sorted_items)
+        sorted_extensions = sort_to_dict_by_position_map(position_map, extensions, lambda x: x.name)

        return sorted_extensions
--- a/api/core/model_runtime/entities/model_entities.py
+++ b/api/core/model_runtime/entities/model_entities.py
@@ -133,7 +133,7 @@ class ModelPropertyKey(Enum):
    DEFAULT_VOICE = "default_voice"
    VOICES = "voices"
    WORD_LIMIT = "word_limit"
-    AUDOI_TYPE = "audio_type"
+    AUDIO_TYPE = "audio_type"
    MAX_WORKERS = "max_workers"


--- a/api/core/model_runtime/model_providers/__base/ai_model.py
+++ b/api/core/model_runtime/model_providers/__base/ai_model.py
@@ -18,6 +18,7 @@ from core.model_runtime.entities.model_entities import (
 )
 from core.model_runtime.errors.invoke import InvokeAuthorizationError, InvokeError
 from core.model_runtime.model_providers.__base.tokenizers.gpt2_tokenzier import GPT2Tokenizer
+from core.utils.position_helper import get_position_map, sort_by_position_map


 class AIModel(ABC):
@@ -148,15 +149,7 @@ class AIModel(ABC):
        ]

        # get _position.yaml file path
-        position_file_path = os.path.join(provider_model_type_path, '_position.yaml')
-
-        # read _position.yaml file
-        position_map = {}
-        if os.path.exists(position_file_path):
-            with open(position_file_path, encoding='utf-8') as f:
-                positions = yaml.safe_load(f)
-                # convert list to dict with key as model provider name, value as index
-                position_map = {position: index for index, position in enumerate(positions)}
+        position_map = get_position_map(provider_model_type_path)

        # traverse all model_schema_yaml_paths
        for model_schema_yaml_path in model_schema_yaml_paths:
@@ -206,8 +199,7 @@ class AIModel(ABC):
            model_schemas.append(model_schema)

        # resort model schemas by position
-        if position_map:
-            model_schemas.sort(key=lambda x: position_map.get(x.model, 999))
+        model_schemas = sort_by_position_map(position_map, model_schemas, lambda x: x.model)

        # cache model schemas
        self.model_schemas = model_schemas
--- a/api/core/model_runtime/model_providers/__base/tts_model.py
+++ b/api/core/model_runtime/model_providers/__base/tts_model.py
@@ -94,8 +94,8 @@ class TTSModel(AIModel):
        """
        model_schema = self.get_model_schema(model, credentials)

-        if model_schema and ModelPropertyKey.AUDOI_TYPE in model_schema.model_properties:
-            return model_schema.model_properties[ModelPropertyKey.AUDOI_TYPE]
+        if model_schema and ModelPropertyKey.AUDIO_TYPE in model_schema.model_properties:
+            return model_schema.model_properties[ModelPropertyKey.AUDIO_TYPE]

    def _get_model_word_limit(self, model: str, credentials: dict) -> int:
        """
--- a/api/core/model_runtime/model_providers/_position.yaml
+++ b/api/core/model_runtime/model_providers/_position.yaml
@@ -2,6 +2,7 @@
 - anthropic
 - azure_openai
 - google
+- nvidia
 - cohere
 - bedrock
 - togetherai
@@ -20,6 +21,7 @@
 - jina
 - chatglm
 - xinference
+- yi
 - openllm
 - localai
 - openai_api_compatible
--- a/api/core/model_runtime/model_providers/anthropic/llm/claude-3-haiku-20240307.yaml
+++ b/api/core/model_runtime/model_providers/anthropic/llm/claude-3-haiku-20240307.yaml
@@ -0,0 +1,37 @@
+model: claude-3-haiku-20240307
+label:
+  en_US: claude-3-haiku-20240307
+model_type: llm
+features:
+  - agent-thought
+  - vision
+model_properties:
+  mode: chat
+  context_size: 200000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_tokens
+    use_template: max_tokens
+    required: true
+    default: 4096
+    min: 1
+    max: 4096
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.25'
+  output: '1.25'
+  unit: '0.000001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/anthropic/llm/llm.py
+++ b/api/core/model_runtime/model_providers/anthropic/llm/llm.py
@@ -342,12 +342,20 @@ class AnthropicLargeLanguageModel(LargeLanguageModel):
        Convert prompt messages to dict list and system
        """
        system = ""
-        prompt_message_dicts = []
-
+        first_loop = True
        for message in prompt_messages:
            if isinstance(message, SystemPromptMessage):
-                system += message.content + ("\n" if not system else "")
-            else:
+                message.content=message.content.strip()
+                if first_loop:
+                    system=message.content
+                    first_loop=False
+                else:
+                    system+="\n"
+                    system+=message.content
+
+        prompt_message_dicts = []
+        for message in prompt_messages:
+            if not isinstance(message, SystemPromptMessage):
                prompt_message_dicts.append(self._convert_prompt_message_to_dict(message))

        return system, prompt_message_dicts
--- a/api/core/model_runtime/model_providers/azure_openai/_common.py
+++ b/api/core/model_runtime/model_providers/azure_openai/_common.py
@@ -15,10 +15,11 @@ from core.model_runtime.model_providers.azure_openai._constant import AZURE_OPEN
 class _CommonAzureOpenAI:
    @staticmethod
    def _to_credential_kwargs(credentials: dict) -> dict:
+        api_version = credentials.get('openai_api_version', AZURE_OPENAI_API_VERSION)
        credentials_kwargs = {
            "api_key": credentials['openai_api_key'],
            "azure_endpoint": credentials['openai_api_base'],
-            "api_version": AZURE_OPENAI_API_VERSION,
+            "api_version": api_version,
            "timeout": Timeout(315.0, read=300.0, write=10.0, connect=5.0),
            "max_retries": 1,
        }
--- a/api/core/model_runtime/model_providers/azure_openai/_constant.py
+++ b/api/core/model_runtime/model_providers/azure_openai/_constant.py
@@ -14,8 +14,7 @@ from core.model_runtime.entities.model_entities import (
    PriceConfig,
 )

-AZURE_OPENAI_API_VERSION = '2023-12-01-preview'
-
+AZURE_OPENAI_API_VERSION = '2024-02-15-preview'

 def _get_max_tokens(default: int, min_val: int, max_val: int) -> ParameterRule:
    rule = ParameterRule(
@@ -124,6 +123,65 @@ LLM_BASE_MODELS = [
            )
        )
    ),
+    AzureBaseModel(
+        base_model_name='gpt-35-turbo-0125',
+        entity=AIModelEntity(
+            model='fake-deployment-name',
+            label=I18nObject(
+                en_US='fake-deployment-name-label',
+            ),
+            model_type=ModelType.LLM,
+            features=[
+                ModelFeature.AGENT_THOUGHT,
+                ModelFeature.MULTI_TOOL_CALL,
+                ModelFeature.STREAM_TOOL_CALL,
+            ],
+            fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
+            model_properties={
+                ModelPropertyKey.MODE: LLMMode.CHAT.value,
+                ModelPropertyKey.CONTEXT_SIZE: 16385,
+            },
+            parameter_rules=[
+                ParameterRule(
+                    name='temperature',
+                    **PARAMETER_RULE_TEMPLATE[DefaultParameterName.TEMPERATURE],
+                ),
+                ParameterRule(
+                    name='top_p',
+                    **PARAMETER_RULE_TEMPLATE[DefaultParameterName.TOP_P],
+                ),
+                ParameterRule(
+                    name='presence_penalty',
+                    **PARAMETER_RULE_TEMPLATE[DefaultParameterName.PRESENCE_PENALTY],
+                ),
+                ParameterRule(
+                    name='frequency_penalty',
+                    **PARAMETER_RULE_TEMPLATE[DefaultParameterName.FREQUENCY_PENALTY],
+                ),
+                _get_max_tokens(default=512, min_val=1, max_val=4096),
+                ParameterRule(
+                    name='response_format',
+                    label=I18nObject(
+                        zh_Hans='回复格式',
+                        en_US='response_format'
+                    ),
+                    type='string',
+                    help=I18nObject(
+                        zh_Hans='指定模型必须输出的格式',
+                        en_US='specifying the format that the model must output'
+                    ),
+                    required=False,
+                    options=['text', 'json_object']
+                ),
+            ],
+            pricing=PriceConfig(
+                input=0.0005,
+                output=0.0015,
+                unit=0.001,
+                currency='USD',
+            )
+        )
+    ),
    AzureBaseModel(
        base_model_name='gpt-4',
        entity=AIModelEntity(
@@ -274,6 +332,81 @@ LLM_BASE_MODELS = [
            )
        )
    ),
+    AzureBaseModel(
+        base_model_name='gpt-4-0125-preview',
+        entity=AIModelEntity(
+            model='fake-deployment-name',
+            label=I18nObject(
+                en_US='fake-deployment-name-label',
+            ),
+            model_type=ModelType.LLM,
+            features=[
+                ModelFeature.AGENT_THOUGHT,
+                ModelFeature.MULTI_TOOL_CALL,
+                ModelFeature.STREAM_TOOL_CALL,
+            ],
+            fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
+            model_properties={
+                ModelPropertyKey.MODE: LLMMode.CHAT.value,
+                ModelPropertyKey.CONTEXT_SIZE: 128000,
+            },
+            parameter_rules=[
+                ParameterRule(
+                    name='temperature',
+                    **PARAMETER_RULE_TEMPLATE[DefaultParameterName.TEMPERATURE],
+                ),
+                ParameterRule(
+                    name='top_p',
+                    **PARAMETER_RULE_TEMPLATE[DefaultParameterName.TOP_P],
+                ),
+                ParameterRule(
+                    name='presence_penalty',
+                    **PARAMETER_RULE_TEMPLATE[DefaultParameterName.PRESENCE_PENALTY],
+                ),
+                ParameterRule(
+                    name='frequency_penalty',
+                    **PARAMETER_RULE_TEMPLATE[DefaultParameterName.FREQUENCY_PENALTY],
+                ),
+                _get_max_tokens(default=512, min_val=1, max_val=4096),
+                ParameterRule(
+                    name='seed',
+                    label=I18nObject(
+                        zh_Hans='种子',
+                        en_US='Seed'
+                    ),
+                    type='int',
+                    help=I18nObject(
+                        zh_Hans='如果指定，模型将尽最大努力进行确定性采样，使得重复的具有相同种子和参数的请求应该返回相同的结果。不能保证确定性，您应该参考 system_fingerprint 响应参数来监视变化。',
+                        en_US='If specified, model will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result. Determinism is not guaranteed, and you should refer to the system_fingerprint response parameter to monitor changes in the backend.'
+                    ),
+                    required=False,
+                    precision=2,
+                    min=0,
+                    max=1,
+                ),
+                ParameterRule(
+                    name='response_format',
+                    label=I18nObject(
+                        zh_Hans='回复格式',
+                        en_US='response_format'
+                    ),
+                    type='string',
+                    help=I18nObject(
+                        zh_Hans='指定模型必须输出的格式',
+                        en_US='specifying the format that the model must output'
+                    ),
+                    required=False,
+                    options=['text', 'json_object']
+                ),
+            ],
+            pricing=PriceConfig(
+                input=0.01,
+                output=0.03,
+                unit=0.001,
+                currency='USD',
+            )
+        )
+    ),
    AzureBaseModel(
        base_model_name='gpt-4-1106-preview',
        entity=AIModelEntity(
@@ -628,7 +761,7 @@ TTS_BASE_MODELS = [
                    },
                ],
                ModelPropertyKey.WORD_LIMIT: 120,
-                ModelPropertyKey.AUDOI_TYPE: 'mp3',
+                ModelPropertyKey.AUDIO_TYPE: 'mp3',
                ModelPropertyKey.MAX_WORKERS: 5
            },
            pricing=PriceConfig(
@@ -682,7 +815,7 @@ TTS_BASE_MODELS = [
                    },
                ],
                ModelPropertyKey.WORD_LIMIT: 120,
-                ModelPropertyKey.AUDOI_TYPE: 'mp3',
+                ModelPropertyKey.AUDIO_TYPE: 'mp3',
                ModelPropertyKey.MAX_WORKERS: 5
            },
            pricing=PriceConfig(
--- a/api/core/model_runtime/model_providers/azure_openai/azure_openai.yaml
+++ b/api/core/model_runtime/model_providers/azure_openai/azure_openai.yaml
@@ -46,6 +46,22 @@ model_credential_schema:
      placeholder:
        zh_Hans: 在此输入您的 API Key
        en_US: Enter your API key here
+    - variable: openai_api_version
+      label:
+        zh_Hans: API 版本
+        en_US: API Version
+      type: select
+      required: true
+      options:
+        - label:
+            en_US: 2024-02-15-preview
+          value: 2024-02-15-preview
+        - label:
+            en_US: 2023-12-01-preview
+          value: 2023-12-01-preview
+      placeholder:
+        zh_Hans: 在此选择您的 API 版本
+        en_US: Select your API Version here
    - variable: base_model_name
      label:
        en_US: Base Model
@@ -59,6 +75,12 @@ model_credential_schema:
          show_on:
            - variable: __model_type
              value: llm
+        - label:
+            en_US: gpt-35-turbo-0125
+          value: gpt-35-turbo-0125
+          show_on:
+            - variable: __model_type
+              value: llm
        - label:
            en_US: gpt-35-turbo-16k
          value: gpt-35-turbo-16k
@@ -77,6 +99,12 @@ model_credential_schema:
          show_on:
            - variable: __model_type
              value: llm
+        - label:
+            en_US: gpt-4-0125-preview
+          value: gpt-4-0125-preview
+          show_on:
+            - variable: __model_type
+              value: llm
        - label:
            en_US: gpt-4-1106-preview
          value: gpt-4-1106-preview
--- a/api/core/model_runtime/model_providers/baichuan/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/baichuan/text_embedding/text_embedding.py
@@ -124,7 +124,7 @@ class BaichuanTextEmbeddingModel(TextEmbeddingModel):
            elif err == 'insufficient_quota':
                raise InsufficientAccountBalance(msg)
            elif err == 'invalid_authentication':
-                raise InvalidAuthenticationError(msg)
+                raise InvalidAuthenticationError(msg) 
            elif err and 'rate' in err:
                raise RateLimitReachedError(msg)
            elif err and 'internal' in err:
--- a/api/core/model_runtime/model_providers/bedrock/bedrock.py
+++ b/api/core/model_runtime/model_providers/bedrock/bedrock.py
@@ -17,10 +17,9 @@ class BedrockProvider(ModelProvider):
        """
        try:
            model_instance = self.get_model_instance(ModelType.LLM)
-
-            # Use `gemini-pro` model for validate,
+            bedrock_validate_model_name = credentials.get('model_for_validation', 'amazon.titan-text-lite-v1')
            model_instance.validate_credentials(
-                model='amazon.titan-text-lite-v1',
+                model=bedrock_validate_model_name,
                credentials=credentials
            )
        except CredentialsValidateFailedError as ex:
--- a/api/core/model_runtime/model_providers/bedrock/bedrock.yaml
+++ b/api/core/model_runtime/model_providers/bedrock/bedrock.yaml
@@ -48,24 +48,33 @@ provider_credential_schema:
        - value: us-east-1
          label:
            en_US: US East (N. Virginia)
-            zh_Hans: US East (N. Virginia)
+            zh_Hans: 美国东部 (弗吉尼亚北部)
        - value: us-west-2
          label:
            en_US: US West (Oregon)
-            zh_Hans: US West (Oregon)
+            zh_Hans: 美国西部 (俄勒冈州)
        - value: ap-southeast-1
          label:
            en_US: Asia Pacific (Singapore)
-            zh_Hans: Asia Pacific (Singapore)
+            zh_Hans: 亚太地区 (新加坡)
        - value: ap-northeast-1
          label:
            en_US: Asia Pacific (Tokyo)
-            zh_Hans: Asia Pacific (Tokyo)
+            zh_Hans: 亚太地区 (东京)
        - value: eu-central-1
          label:
            en_US: Europe (Frankfurt)
-            zh_Hans: Europe (Frankfurt)
+            zh_Hans: 欧洲 (法兰克福)
        - value: us-gov-west-1
          label:
            en_US: AWS GovCloud (US-West)
            zh_Hans: AWS GovCloud (US-West)
+    - variable: model_for_validation
+      required: false
+      label:
+        en_US: Available Model Name
+        zh_Hans: 可用模型名称
+      type: text-input
+      placeholder:
+        en_US: A model you have access to (e.g. amazon.titan-text-lite-v1) for validation.
+        zh_Hans: 为了进行验证，请输入一个您可用的模型名称 (例如：amazon.titan-text-lite-v1)
--- a/api/core/model_runtime/model_providers/bedrock/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/bedrock/llm/_position.yaml
@@ -4,6 +4,8 @@
 - anthropic.claude-v1
 - anthropic.claude-v2
 - anthropic.claude-v2:1
+- anthropic.claude-3-sonnet-v1:0
+- anthropic.claude-3-haiku-v1:0
 - cohere.command-light-text-v14
 - cohere.command-text-v14
 - meta.llama2-13b-chat-v1
--- a/api/core/model_runtime/model_providers/bedrock/llm/anthropic.claude-3-haiku-v1.yaml
+++ b/api/core/model_runtime/model_providers/bedrock/llm/anthropic.claude-3-haiku-v1.yaml
@@ -0,0 +1,57 @@
+model: anthropic.claude-3-haiku-20240307-v1:0
+label:
+  en_US: Claude 3 Haiku
+model_type: llm
+features:
+  - agent-thought
+  - vision
+model_properties:
+  mode: chat
+  context_size: 200000
+# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
+parameter_rules:
+  - name: max_tokens
+    use_template: max_tokens
+    required: true
+    type: int
+    default: 4096
+    min: 1
+    max: 4096
+    help:
+      zh_Hans: 停止前生成的最大令牌数。请注意，Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
+      en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
+  # docs: https://docs.anthropic.com/claude/docs/system-prompts
+  - name: temperature
+    use_template: temperature
+    required: false
+    type: float
+    default: 1
+    min: 0.0
+    max: 1.0
+    help:
+      zh_Hans: 生成内容的随机性。
+      en_US: The amount of randomness injected into the response.
+  - name: top_p
+    required: false
+    type: float
+    default: 0.999
+    min: 0.000
+    max: 1.000
+    help:
+      zh_Hans: 在核采样中，Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布，并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p，但不能同时更改两者。
+      en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
+  - name: top_k
+    required: false
+    type: int
+    default: 0
+    min: 0
+    # tip docs from aws has error, max value is 500
+    max: 500
+    help:
+      zh_Hans: 对于每个后续标记，仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
+      en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
+pricing:
+  input: '0.003'
+  output: '0.015'
+  unit: '0.001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/bedrock/llm/anthropic.claude-3-sonnet-v1.yaml
+++ b/api/core/model_runtime/model_providers/bedrock/llm/anthropic.claude-3-sonnet-v1.yaml
@@ -0,0 +1,56 @@
+model: anthropic.claude-3-sonnet-20240229-v1:0
+label:
+  en_US: Claude 3 Sonnet
+model_type: llm
+features:
+  - agent-thought
+  - vision
+model_properties:
+  mode: chat
+  context_size: 200000
+# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
+parameter_rules:
+  - name: max_tokens
+    use_template: max_tokens
+    required: true
+    type: int
+    default: 4096
+    min: 1
+    max: 4096
+    help:
+      zh_Hans: 停止前生成的最大令牌数。请注意，Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
+      en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
+  - name: temperature
+    use_template: temperature
+    required: false
+    type: float
+    default: 1
+    min: 0.0
+    max: 1.0
+    help:
+      zh_Hans: 生成内容的随机性。
+      en_US: The amount of randomness injected into the response.
+  - name: top_p
+    required: false
+    type: float
+    default: 0.999
+    min: 0.000
+    max: 1.000
+    help:
+      zh_Hans: 在核采样中，Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布，并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p，但不能同时更改两者。
+      en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
+  - name: top_k
+    required: false
+    type: int
+    default: 0
+    min: 0
+    # tip docs from aws has error, max value is 500
+    max: 500
+    help:
+      zh_Hans: 对于每个后续标记，仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
+      en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
+pricing:
+  input: '0.00025'
+  output: '0.00125'
+  unit: '0.001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/bedrock/llm/llm.py
+++ b/api/core/model_runtime/model_providers/bedrock/llm/llm.py
@@ -1,9 +1,22 @@
+import base64
 import json
 import logging
+import mimetypes
+import time
 from collections.abc import Generator
-from typing import Optional, Union
+from typing import Optional, Union, cast

 import boto3
+import requests
+from anthropic import AnthropicBedrock, Stream
+from anthropic.types import (
+    ContentBlockDeltaEvent,
+    Message,
+    MessageDeltaEvent,
+    MessageStartEvent,
+    MessageStopEvent,
+    MessageStreamEvent,
+)
 from botocore.config import Config
 from botocore.exceptions import (
    ClientError,
@@ -13,14 +26,18 @@ from botocore.exceptions import (
    UnknownServiceError,
 )

-from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
+from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
 from core.model_runtime.entities.message_entities import (
    AssistantPromptMessage,
+    ImagePromptMessageContent,
    PromptMessage,
+    PromptMessageContentType,
    PromptMessageTool,
    SystemPromptMessage,
+    TextPromptMessageContent,
    UserPromptMessage,
 )
+from core.model_runtime.entities.model_entities import PriceType
 from core.model_runtime.errors.invoke import (
    InvokeAuthorizationError,
    InvokeBadRequestError,
@@ -54,9 +71,293 @@ class BedrockLargeLanguageModel(LargeLanguageModel):
        :param user: unique user id
        :return: full response or stream response chunk generator result
        """
+
+        # invoke claude 3 models via anthropic official SDK
+        if "anthropic.claude-3" in model:
+            return self._invoke_claude3(model, credentials, prompt_messages, model_parameters, stop, stream, user)
        # invoke model
        return self._generate(model, credentials, prompt_messages, model_parameters, stop, stream, user)

+    def _invoke_claude3(self, model: str, credentials: dict, prompt_messages: list[PromptMessage], model_parameters: dict,
+                stop: Optional[list[str]] = None, stream: bool = True, user: Optional[str] = None) -> Union[LLMResult, Generator]:
+        """
+        Invoke Claude3 large language model
+
+        :param model: model name
+        :param credentials: model credentials
+        :param prompt_messages: prompt messages
+        :param model_parameters: model parameters
+        :param stop: stop words
+        :param stream: is stream response
+        :return: full response or stream response chunk generator result
+        """
+        # use Anthropic official SDK references
+        # - https://docs.anthropic.com/claude/reference/claude-on-amazon-bedrock
+        # - https://github.com/anthropics/anthropic-sdk-python
+        client = AnthropicBedrock(
+            aws_access_key=credentials["aws_access_key_id"],
+            aws_secret_key=credentials["aws_secret_access_key"],
+            aws_region=credentials["aws_region"],
+        )
+
+        extra_model_kwargs = {}
+        if stop:
+            extra_model_kwargs['stop_sequences'] = stop
+
+        # Notice: If you request the current version of the SDK to the bedrock server,
+        #         you will get the following error message and you need to wait for the service or SDK to be updated.
+        #         Response:  Error code: 400
+        #                    {'message': 'Malformed input request: #: subject must not be valid against schema
+        #                        {"required":["messages"]}#: extraneous key [metadata] is not permitted, please reformat your input and try again.'}
+        # TODO: Open in the future when the interface is properly supported
+        # if user:
+            # ref: https://github.com/anthropics/anthropic-sdk-python/blob/e84645b07ca5267066700a104b4d8d6a8da1383d/src/anthropic/resources/messages.py#L465
+            # extra_model_kwargs['metadata'] = message_create_params.Metadata(user_id=user)
+
+        system, prompt_message_dicts = self._convert_claude3_prompt_messages(prompt_messages)
+
+        if system:
+            extra_model_kwargs['system'] = system
+
+        response = client.messages.create(
+            model=model,
+            messages=prompt_message_dicts,
+            stream=stream,
+            **model_parameters,
+            **extra_model_kwargs
+        )
+
+        if stream:
+            return self._handle_claude3_stream_response(model, credentials, response, prompt_messages)
+
+        return self._handle_claude3_response(model, credentials, response, prompt_messages)
+
+    def _handle_claude3_response(self, model: str, credentials: dict, response: Message,
+                                prompt_messages: list[PromptMessage]) -> LLMResult:
+        """
+        Handle llm chat response
+
+        :param model: model name
+        :param credentials: credentials
+        :param response: response
+        :param prompt_messages: prompt messages
+        :return: full response chunk generator result
+        """
+
+        # transform assistant message to prompt message
+        assistant_prompt_message = AssistantPromptMessage(
+            content=response.content[0].text
+        )
+
+        # calculate num tokens
+        if response.usage:
+            # transform usage
+            prompt_tokens = response.usage.input_tokens
+            completion_tokens = response.usage.output_tokens
+        else:
+            # calculate num tokens
+            prompt_tokens = self.get_num_tokens(model, credentials, prompt_messages)
+            completion_tokens = self.get_num_tokens(model, credentials, [assistant_prompt_message])
+
+        # transform usage
+        usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
+
+        # transform response
+        response = LLMResult(
+            model=response.model,
+            prompt_messages=prompt_messages,
+            message=assistant_prompt_message,
+            usage=usage
+        )
+
+        return response
+
+    def _handle_claude3_stream_response(self, model: str, credentials: dict, response: Stream[MessageStreamEvent],
+                                        prompt_messages: list[PromptMessage], ) -> Generator:
+        """
+        Handle llm chat stream response
+
+        :param model: model name
+        :param credentials: credentials
+        :param response: response
+        :param prompt_messages: prompt messages
+        :return: full response or stream response chunk generator result
+        """
+
+        try:
+            full_assistant_content = ''
+            return_model = None
+            input_tokens = 0
+            output_tokens = 0
+            finish_reason = None
+            index = 0
+
+            for chunk in response:
+                if isinstance(chunk, MessageStartEvent):
+                    return_model = chunk.message.model
+                    input_tokens = chunk.message.usage.input_tokens
+                elif isinstance(chunk, MessageDeltaEvent):
+                    output_tokens = chunk.usage.output_tokens
+                    finish_reason = chunk.delta.stop_reason
+                elif isinstance(chunk, MessageStopEvent):
+                    usage = self._calc_response_usage(model, credentials, input_tokens, output_tokens)
+                    yield LLMResultChunk(
+                        model=return_model,
+                        prompt_messages=prompt_messages,
+                        delta=LLMResultChunkDelta(
+                            index=index + 1,
+                            message=AssistantPromptMessage(
+                                content=''
+                            ),
+                            finish_reason=finish_reason,
+                            usage=usage
+                        )
+                    )
+                elif isinstance(chunk, ContentBlockDeltaEvent):
+                    chunk_text = chunk.delta.text if chunk.delta.text else ''
+                    full_assistant_content += chunk_text
+                    assistant_prompt_message = AssistantPromptMessage(
+                        content=chunk_text if chunk_text else '',
+                    )
+                    index = chunk.index
+                    yield LLMResultChunk(
+                        model=model,
+                        prompt_messages=prompt_messages,
+                        delta=LLMResultChunkDelta(
+                            index=index,
+                            message=assistant_prompt_message,
+                        )
+                    )
+        except Exception as ex:
+            raise InvokeError(str(ex))
+
+    def _calc_claude3_response_usage(self, model: str, credentials: dict, prompt_tokens: int, completion_tokens: int) -> LLMUsage:
+        """
+        Calculate response usage
+
+        :param model: model name
+        :param credentials: model credentials
+        :param prompt_tokens: prompt tokens
+        :param completion_tokens: completion tokens
+        :return: usage
+        """
+        # get prompt price info
+        prompt_price_info = self.get_price(
+            model=model,
+            credentials=credentials,
+            price_type=PriceType.INPUT,
+            tokens=prompt_tokens,
+        )
+
+        # get completion price info
+        completion_price_info = self.get_price(
+            model=model,
+            credentials=credentials,
+            price_type=PriceType.OUTPUT,
+            tokens=completion_tokens
+        )
+
+        # transform usage
+        usage = LLMUsage(
+            prompt_tokens=prompt_tokens,
+            prompt_unit_price=prompt_price_info.unit_price,
+            prompt_price_unit=prompt_price_info.unit,
+            prompt_price=prompt_price_info.total_amount,
+            completion_tokens=completion_tokens,
+            completion_unit_price=completion_price_info.unit_price,
+            completion_price_unit=completion_price_info.unit,
+            completion_price=completion_price_info.total_amount,
+            total_tokens=prompt_tokens + completion_tokens,
+            total_price=prompt_price_info.total_amount + completion_price_info.total_amount,
+            currency=prompt_price_info.currency,
+            latency=time.perf_counter() - self.started_at
+        )
+
+        return usage
+
+    def _convert_claude3_prompt_messages(self, prompt_messages: list[PromptMessage]) -> tuple[str, list[dict]]:
+        """
+        Convert prompt messages to dict list and system
+        """
+
+        system = ""
+        first_loop = True
+        for message in prompt_messages:
+            if isinstance(message, SystemPromptMessage):
+                message.content=message.content.strip()
+                if first_loop:
+                    system=message.content
+                    first_loop=False
+                else:
+                    system+="\n"
+                    system+=message.content
+
+        prompt_message_dicts = []
+        for message in prompt_messages:
+            if not isinstance(message, SystemPromptMessage):
+                prompt_message_dicts.append(self._convert_claude3_prompt_message_to_dict(message))
+
+        return system, prompt_message_dicts
+
+    def _convert_claude3_prompt_message_to_dict(self, message: PromptMessage) -> dict:
+        """
+        Convert PromptMessage to dict
+        """
+        if isinstance(message, UserPromptMessage):
+            message = cast(UserPromptMessage, message)
+            if isinstance(message.content, str):
+                message_dict = {"role": "user", "content": message.content}
+            else:
+                sub_messages = []
+                for message_content in message.content:
+                    if message_content.type == PromptMessageContentType.TEXT:
+                        message_content = cast(TextPromptMessageContent, message_content)
+                        sub_message_dict = {
+                            "type": "text",
+                            "text": message_content.data
+                        }
+                        sub_messages.append(sub_message_dict)
+                    elif message_content.type == PromptMessageContentType.IMAGE:
+                        message_content = cast(ImagePromptMessageContent, message_content)
+                        if not message_content.data.startswith("data:"):
+                            # fetch image data from url
+                            try:
+                                image_content = requests.get(message_content.data).content
+                                mime_type, _ = mimetypes.guess_type(message_content.data)
+                                base64_data = base64.b64encode(image_content).decode('utf-8')
+                            except Exception as ex:
+                                raise ValueError(f"Failed to fetch image data from url {message_content.data}, {ex}")
+                        else:
+                            data_split = message_content.data.split(";base64,")
+                            mime_type = data_split[0].replace("data:", "")
+                            base64_data = data_split[1]
+
+                        if mime_type not in ["image/jpeg", "image/png", "image/gif", "image/webp"]:
+                            raise ValueError(f"Unsupported image type {mime_type}, "
+                                             f"only support image/jpeg, image/png, image/gif, and image/webp")
+
+                        sub_message_dict = {
+                            "type": "image",
+                            "source": {
+                                "type": "base64",
+                                "media_type": mime_type,
+                                "data": base64_data
+                            }
+                        }
+                        sub_messages.append(sub_message_dict)
+
+                message_dict = {"role": "user", "content": sub_messages}
+        elif isinstance(message, AssistantPromptMessage):
+            message = cast(AssistantPromptMessage, message)
+            message_dict = {"role": "assistant", "content": message.content}
+        elif isinstance(message, SystemPromptMessage):
+            message = cast(SystemPromptMessage, message)
+            message_dict = {"role": "system", "content": message.content}
+        else:
+            raise ValueError(f"Got unknown type {message}")
+
+        return message_dict
+
    def get_num_tokens(self, model: str, credentials: dict, messages: list[PromptMessage] | str,
                       tools: Optional[list[PromptMessageTool]] = None) -> int:
        """
@@ -101,7 +402,19 @@ class BedrockLargeLanguageModel(LargeLanguageModel):
        :param credentials: model credentials
        :return:
        """
-        
+
+        if "anthropic.claude-3" in model:
+            try:
+                self._invoke_claude3(model=model,
+                                        credentials=credentials,
+                                        prompt_messages=[{"role": "user", "content": "ping"}],
+                                        model_parameters={},
+                                        stop=None,
+                                        stream=False)
+
+            except Exception as ex:
+                raise CredentialsValidateFailedError(str(ex))
+
        try:
            ping_message = UserPromptMessage(content="ping")
            self._generate(model=model,
--- a/api/core/model_runtime/model_providers/localai/llm/llm.py
+++ b/api/core/model_runtime/model_providers/localai/llm/llm.py
@@ -1,6 +1,5 @@
 from collections.abc import Generator
 from typing import cast
-from urllib.parse import urljoin

 from httpx import Timeout
 from openai import (
@@ -19,6 +18,7 @@ from openai import (
 from openai.types.chat import ChatCompletion, ChatCompletionChunk
 from openai.types.chat.chat_completion_message import FunctionCall
 from openai.types.completion import Completion
+from yarl import URL

 from core.model_runtime.entities.common_entities import I18nObject
 from core.model_runtime.entities.llm_entities import LLMMode, LLMResult, LLMResultChunk, LLMResultChunkDelta
@@ -181,7 +181,7 @@ class LocalAILarguageModel(LargeLanguageModel):
                UserPromptMessage(content='ping')
            ], model_parameters={
                'max_tokens': 10,
-            }, stop=[])
+            }, stop=[], stream=False)
        except Exception as ex:
            raise CredentialsValidateFailedError(f'Invalid credentials {str(ex)}')

@@ -227,6 +227,12 @@ class LocalAILarguageModel(LargeLanguageModel):
            )
        ]

+        model_properties = { 
+            ModelPropertyKey.MODE: completion_model,
+        } if completion_model else {}
+
+        model_properties[ModelPropertyKey.CONTEXT_SIZE] = int(credentials.get('context_size', '2048'))
+
        entity = AIModelEntity(
            model=model,
            label=I18nObject(
@@ -234,7 +240,7 @@ class LocalAILarguageModel(LargeLanguageModel):
            ),
            fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
            model_type=ModelType.LLM,
-            model_properties={ ModelPropertyKey.MODE: completion_model } if completion_model else {},
+            model_properties=model_properties,
            parameter_rules=rules
        )

@@ -319,7 +325,7 @@ class LocalAILarguageModel(LargeLanguageModel):
        client_kwargs = {
            "timeout": Timeout(315.0, read=300.0, write=10.0, connect=5.0),
            "api_key": "1",
-            "base_url": urljoin(credentials['server_url'], 'v1'),
+            "base_url": str(URL(credentials['server_url']) / 'v1'),
        }

        return client_kwargs
--- a/api/core/model_runtime/model_providers/localai/localai.yaml
+++ b/api/core/model_runtime/model_providers/localai/localai.yaml
@@ -56,3 +56,12 @@ model_credential_schema:
      placeholder:
        zh_Hans: 在此输入LocalAI的服务器地址，如 http://192.168.1.100:8080
        en_US: Enter the url of your LocalAI, e.g. http://192.168.1.100:8080
+    - variable: context_size
+      label:
+        zh_Hans: 上下文大小
+        en_US: Context size
+      placeholder:
+        zh_Hans: 输入上下文大小
+        en_US: Enter context size
+      required: false
+      type: text-input
--- a/api/core/model_runtime/model_providers/localai/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/localai/text_embedding/text_embedding.py
@@ -1,11 +1,12 @@
 import time
 from json import JSONDecodeError, dumps
-from os.path import join
 from typing import Optional

 from requests import post
+from yarl import URL

-from core.model_runtime.entities.model_entities import PriceType
+from core.model_runtime.entities.common_entities import I18nObject
+from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
 from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
 from core.model_runtime.errors.invoke import (
    InvokeAuthorizationError,
@@ -57,7 +58,7 @@ class LocalAITextEmbeddingModel(TextEmbeddingModel):
        }

        try:
-            response = post(join(url, 'embeddings'), headers=headers, data=dumps(data), timeout=10)
+            response = post(str(URL(url) / 'embeddings'), headers=headers, data=dumps(data), timeout=10)
        except Exception as e:
            raise InvokeConnectionError(str(e))
        
@@ -113,6 +114,27 @@ class LocalAITextEmbeddingModel(TextEmbeddingModel):
            # use GPT2Tokenizer to get num tokens
            num_tokens += self._get_num_tokens_by_gpt2(text)
        return num_tokens
+    
+    def _get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity | None:
+        """
+        Get customizable model schema
+
+        :param model: model name
+        :param credentials: model credentials
+        :return: model schema
+        """
+        return AIModelEntity(
+            model=model,
+            label=I18nObject(zh_Hans=model, en_US=model),
+            model_type=ModelType.TEXT_EMBEDDING,
+            features=[],
+            fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
+            model_properties={
+                ModelPropertyKey.CONTEXT_SIZE: int(credentials.get('context_size', '512')),
+                ModelPropertyKey.MAX_CHUNKS: 1,
+            },
+            parameter_rules=[]
+        )

    def validate_credentials(self, model: str, credentials: dict) -> None:
        """
--- a/api/core/model_runtime/model_providers/model_provider_factory.py
+++ b/api/core/model_runtime/model_providers/model_provider_factory.py
@@ -1,10 +1,8 @@
 import importlib
 import logging
 import os
-from collections import OrderedDict
 from typing import Optional

-import yaml
 from pydantic import BaseModel

 from core.model_runtime.entities.model_entities import ModelType
@@ -12,6 +10,7 @@ from core.model_runtime.entities.provider_entities import ProviderConfig, Provid
 from core.model_runtime.model_providers.__base.model_provider import ModelProvider
 from core.model_runtime.schema_validators.model_credential_schema_validator import ModelCredentialSchemaValidator
 from core.model_runtime.schema_validators.provider_credential_schema_validator import ProviderCredentialSchemaValidator
+from core.utils.position_helper import get_position_map, sort_to_dict_by_position_map

 logger = logging.getLogger(__name__)

@@ -200,7 +199,6 @@ class ModelProviderFactory:
        if self.model_provider_extensions:
            return self.model_provider_extensions

-        model_providers = {}

        # get the path of current classes
        current_path = os.path.abspath(__file__)
@@ -215,17 +213,10 @@ class ModelProviderFactory:
        ]

        # get _position.yaml file path
-        position_file_path = os.path.join(model_providers_path, '_position.yaml')
-
-        # read _position.yaml file
-        position_map = {}
-        if os.path.exists(position_file_path):
-            with open(position_file_path, encoding='utf-8') as f:
-                positions = yaml.safe_load(f)
-                # convert list to dict with key as model provider name, value as index
-                position_map = {position: index for index, position in enumerate(positions)}
+        position_map = get_position_map(model_providers_path)

        # traverse all model_provider_dir_paths
+        model_providers: list[ModelProviderExtension] = []
        for model_provider_dir_path in model_provider_dir_paths:
            # get model_provider dir name
            model_provider_name = os.path.basename(model_provider_dir_path)
@@ -256,14 +247,13 @@ class ModelProviderFactory:
                logger.warning(f"Missing {model_provider_name}.yaml file in {model_provider_dir_path}, Skip.")
                continue

-            model_providers[model_provider_name] = ModelProviderExtension(
+            model_providers.append(ModelProviderExtension(
                name=model_provider_name,
                provider_instance=model_provider_class(),
                position=position_map.get(model_provider_name)
-            )
+            ))

-        sorted_items = sorted(model_providers.items(), key=lambda x: (x[1].position is None, x[1].position))
-        sorted_extensions = OrderedDict(sorted_items)
+        sorted_extensions = sort_to_dict_by_position_map(position_map, model_providers, lambda x: x.name)

        self.model_provider_extensions = sorted_extensions

--- a/api/core/model_runtime/model_providers/nvidia/init.py
+++ b/api/core/model_runtime/model_providers/nvidia/init.py
--- a/api/core/model_runtime/model_providers/nvidia/_assets/icon_l_en.png
+++ b/api/core/model_runtime/model_providers/nvidia/_assets/icon_l_en.png
--- a/api/core/model_runtime/model_providers/nvidia/_assets/icon_s_en.svg
+++ b/api/core/model_runtime/model_providers/nvidia/_assets/icon_s_en.svg
@@ -0,0 +1,3 @@
+<svg width="567" height="376" viewBox="0 0 567 376" fill="none" xmlns="http://www.w3.org/2000/svg">
+<path d="M58.0366 161.868C58.0366 161.868 109.261 86.2912 211.538 78.4724V51.053C98.2528 60.1511 0.152344 156.098 0.152344 156.098C0.152344 156.098 55.7148 316.717 211.538 331.426V302.282C97.1876 287.896 58.0366 161.868 58.0366 161.868ZM211.538 244.32V271.013C125.114 255.603 101.125 165.768 101.125 165.768C101.125 165.768 142.621 119.799 211.538 112.345V141.633C211.486 141.633 211.449 141.617 211.406 141.617C175.235 137.276 146.978 171.067 146.978 171.067C146.978 171.067 162.816 227.949 211.538 244.32ZM211.538 0.47998V51.053C214.864 50.7981 218.189 50.5818 221.533 50.468C350.326 46.1273 434.243 156.098 434.243 156.098C434.243 156.098 337.861 273.296 237.448 273.296C228.245 273.296 219.63 272.443 211.538 271.009V302.282C218.695 303.201 225.903 303.667 233.119 303.675C326.56 303.675 394.134 255.954 459.566 199.474C470.415 208.162 514.828 229.299 523.958 238.55C461.745 290.639 316.752 332.626 234.551 332.626C226.627 332.626 219.018 332.148 211.538 331.426V375.369H566.701V0.47998H211.538ZM211.538 112.345V78.4724C214.829 78.2425 218.146 78.0672 221.533 77.9602C314.148 75.0512 374.909 157.548 374.909 157.548C374.909 157.548 309.281 248.693 238.914 248.693C228.787 248.693 219.707 247.065 211.536 244.318V141.631C247.591 145.987 254.848 161.914 276.524 198.049L324.737 157.398C324.737 157.398 289.544 111.243 230.219 111.243C223.768 111.241 217.597 111.696 211.538 112.345Z" fill="#77B900"/>
+</svg>
--- a/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml
@@ -0,0 +1,4 @@
+- google/gemma-7b
+- meta/llama2-70b
+- mistralai/mixtral-8x7b-instruct-v0.1
+- fuyu-8b
--- a/api/core/model_runtime/model_providers/nvidia/llm/fuyu-8b.yaml
+++ b/api/core/model_runtime/model_providers/nvidia/llm/fuyu-8b.yaml
@@ -0,0 +1,27 @@
+model: fuyu-8b
+label:
+  zh_Hans: fuyu-8b
+  en_US: fuyu-8b
+model_type: llm
+features:
+  - agent-thought
+  - vision
+model_properties:
+  mode: chat
+  context_size: 16000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    default: 0.2
+    min: 0.1
+    max: 1
+  - name: top_p
+    use_template: top_p
+    default: 0.7
+    min: 0.1
+    max: 1
+  - name: max_tokens
+    use_template: max_tokens
+    default: 512
+    min: 1
+    max: 1024
--- a/api/core/model_runtime/model_providers/nvidia/llm/gemma-7b.yaml
+++ b/api/core/model_runtime/model_providers/nvidia/llm/gemma-7b.yaml
@@ -0,0 +1,30 @@
+model: google/gemma-7b
+label:
+  zh_Hans: google/gemma-7b
+  en_US: google/gemma-7b
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 8192
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: max_tokens
+    use_template: max_tokens
+    default: 512
+    min: 1
+    max: 1024
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
--- a/api/core/model_runtime/model_providers/nvidia/llm/llama2-70b.yaml
+++ b/api/core/model_runtime/model_providers/nvidia/llm/llama2-70b.yaml
@@ -0,0 +1,30 @@
+model: meta/llama2-70b
+label:
+  zh_Hans: meta/llama2-70b
+  en_US: meta/llama2-70b
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: max_tokens
+    use_template: max_tokens
+    default: 512
+    min: 1
+    max: 1024
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
--- a/api/core/model_runtime/model_providers/nvidia/llm/llm.py
+++ b/api/core/model_runtime/model_providers/nvidia/llm/llm.py
@@ -0,0 +1,247 @@
+import json
+from collections.abc import Generator
+from typing import Optional, Union
+
+import requests
+from yarl import URL
+
+from core.model_runtime.entities.llm_entities import LLMMode, LLMResult
+from core.model_runtime.entities.message_entities import (
+    PromptMessage,
+    PromptMessageContentType,
+    PromptMessageFunction,
+    PromptMessageTool,
+    UserPromptMessage,
+)
+from core.model_runtime.errors.invoke import InvokeError
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.openai_api_compatible.llm.llm import OAIAPICompatLargeLanguageModel
+from core.model_runtime.utils import helper
+
+
+class NVIDIALargeLanguageModel(OAIAPICompatLargeLanguageModel):
+    MODEL_SUFFIX_MAP = {
+        'fuyu-8b': 'vlm/adept/fuyu-8b',
+        'mistralai/mixtral-8x7b-instruct-v0.1': '',
+        'google/gemma-7b': '',
+        'meta/llama2-70b': ''
+    }
+
+    def _invoke(self, model: str, credentials: dict,
+                prompt_messages: list[PromptMessage], model_parameters: dict,
+                tools: Optional[list[PromptMessageTool]] = None, stop: Optional[list[str]] = None,
+                stream: bool = True, user: Optional[str] = None) \
+            -> Union[LLMResult, Generator]:
+        
+        self._add_custom_parameters(credentials, model)
+        prompt_messages = self._transform_prompt_messages(prompt_messages)
+        stop = []
+        user = None
+
+        return super()._invoke(model, credentials, prompt_messages, model_parameters, tools, stop, stream, user)
+
+    def _transform_prompt_messages(self, prompt_messages: list[PromptMessage]) -> list[PromptMessage]:
+        """
+        Handle Image transform
+        """
+        for i, p in enumerate(prompt_messages):
+            if isinstance(p, UserPromptMessage) and isinstance(p.content, list):
+                content = p.content
+                content_text = ''
+                for prompt_content in content:
+                    if prompt_content.type == PromptMessageContentType.TEXT:
+                        content_text += prompt_content.data
+                    else:
+                        content_text += f' <img src="{prompt_content.data}" />'
+
+                prompt_message = UserPromptMessage(
+                    content=content_text
+                )
+                prompt_messages[i] = prompt_message
+        return prompt_messages
+
+    def validate_credentials(self, model: str, credentials: dict) -> None:
+        self._add_custom_parameters(credentials, model)
+        self._validate_credentials(model, credentials)
+
+    def _add_custom_parameters(self, credentials: dict, model: str) -> None:
+        credentials['mode'] = 'chat'
+        
+        if self.MODEL_SUFFIX_MAP[model]:
+            credentials['server_url'] = f'https://ai.api.nvidia.com/v1/{self.MODEL_SUFFIX_MAP[model]}'
+            credentials.pop('endpoint_url')
+        else:
+            credentials['endpoint_url'] = 'https://integrate.api.nvidia.com/v1'
+
+        credentials['stream_mode_delimiter'] = '\n'
+
+    def _validate_credentials(self, model: str, credentials: dict) -> None:
+        """
+        Validate model credentials using requests to ensure compatibility with all providers following OpenAI's API standard.
+
+        :param model: model name
+        :param credentials: model credentials
+        :return:
+        """
+        try:
+            headers = {
+                'Content-Type': 'application/json'
+            }
+
+            api_key = credentials.get('api_key')
+            if api_key:
+                headers["Authorization"] = f"Bearer {api_key}"
+
+            endpoint_url = credentials['endpoint_url'] if 'endpoint_url' in credentials else None
+            if endpoint_url and not endpoint_url.endswith('/'):
+                endpoint_url += '/'
+            server_url = credentials['server_url'] if 'server_url' in credentials else None
+
+            # prepare the payload for a simple ping to the model
+            data = {
+                'model': model,
+                'max_tokens': 5
+            }
+
+            completion_type = LLMMode.value_of(credentials['mode'])
+
+            if completion_type is LLMMode.CHAT:
+                data['messages'] = [
+                    {
+                        "role": "user",
+                        "content": "ping"
+                    },
+                ]
+                if 'endpoint_url' in credentials:
+                    endpoint_url = str(URL(endpoint_url) / 'chat' / 'completions')
+                elif 'server_url' in credentials:
+                    endpoint_url = server_url
+            elif completion_type is LLMMode.COMPLETION:
+                data['prompt'] = 'ping'
+                if 'endpoint_url' in credentials:
+                    endpoint_url = str(URL(endpoint_url) / 'completions')
+                elif 'server_url' in credentials:
+                    endpoint_url = server_url
+            else:
+                raise ValueError("Unsupported completion type for model configuration.")
+
+            # send a post request to validate the credentials
+            response = requests.post(
+                endpoint_url,
+                headers=headers,
+                json=data,
+                timeout=(10, 60)
+            )
+
+            if response.status_code != 200:
+                raise CredentialsValidateFailedError(
+                    f'Credentials validation failed with status code {response.status_code}')
+
+            try:
+                json_result = response.json()
+            except json.JSONDecodeError as e:
+                raise CredentialsValidateFailedError('Credentials validation failed: JSON decode error')
+        except CredentialsValidateFailedError:
+            raise
+        except Exception as ex:
+            raise CredentialsValidateFailedError(f'An error occurred during credentials validation: {str(ex)}')
+
+    def _generate(self, model: str, credentials: dict, prompt_messages: list[PromptMessage], model_parameters: dict,
+                  tools: Optional[list[PromptMessageTool]] = None, stop: Optional[list[str]] = None,
+                  stream: bool = True, \
+                  user: Optional[str] = None) -> Union[LLMResult, Generator]:
+        """
+        Invoke llm completion model
+
+        :param model: model name
+        :param credentials: credentials
+        :param prompt_messages: prompt messages
+        :param model_parameters: model parameters
+        :param stop: stop words
+        :param stream: is stream response
+        :param user: unique user id
+        :return: full response or stream response chunk generator result
+        """
+        headers = {
+            'Content-Type': 'application/json',
+            'Accept-Charset': 'utf-8',
+        }
+
+        api_key = credentials.get('api_key')
+        if api_key:
+            headers['Authorization'] = f'Bearer {api_key}'
+
+        if stream:
+            headers['Accept'] = 'text/event-stream'
+
+        endpoint_url = credentials['endpoint_url'] if 'endpoint_url' in credentials else None
+        if endpoint_url and not endpoint_url.endswith('/'):
+            endpoint_url += '/'
+        server_url = credentials['server_url'] if 'server_url' in credentials else None
+
+        data = {
+            "model": model,
+            "stream": stream,
+            **model_parameters
+        }
+
+        completion_type = LLMMode.value_of(credentials['mode'])
+
+        if completion_type is LLMMode.CHAT:
+            if 'endpoint_url' in credentials:
+                endpoint_url = str(URL(endpoint_url) / 'chat' / 'completions')
+            elif 'server_url' in credentials:
+                endpoint_url = server_url
+            data['messages'] = [self._convert_prompt_message_to_dict(m) for m in prompt_messages]
+        elif completion_type is LLMMode.COMPLETION:
+            data['prompt'] = 'ping'
+            if 'endpoint_url' in credentials:
+                endpoint_url = str(URL(endpoint_url) / 'completions')
+            elif 'server_url' in credentials:
+                endpoint_url = server_url
+        else:
+            raise ValueError("Unsupported completion type for model configuration.")
+
+
+        # annotate tools with names, descriptions, etc.
+        function_calling_type = credentials.get('function_calling_type', 'no_call')
+        formatted_tools = []
+        if tools:
+            if function_calling_type == 'function_call':
+                data['functions'] = [{
+                    "name": tool.name,
+                    "description": tool.description,
+                    "parameters": tool.parameters
+                } for tool in tools]
+            elif function_calling_type == 'tool_call':
+                data["tool_choice"] = "auto"
+
+                for tool in tools:
+                    formatted_tools.append(helper.dump_model(PromptMessageFunction(function=tool)))
+
+                data["tools"] = formatted_tools
+
+        if stop:
+            data["stop"] = stop
+
+        if user:
+            data["user"] = user
+
+        response = requests.post(
+            endpoint_url,
+            headers=headers,
+            json=data,
+            timeout=(10, 60),
+            stream=stream
+        )
+
+        if response.encoding is None or response.encoding == 'ISO-8859-1':
+            response.encoding = 'utf-8'
+
+        if not response.ok:
+            raise InvokeError(f"API request failed with status code {response.status_code}: {response.text}")
+
+        if stream:
+            return self._handle_generate_stream_response(model, credentials, response, prompt_messages)
+
+        return self._handle_generate_response(model, credentials, response, prompt_messages)
--- a/api/core/model_runtime/model_providers/nvidia/llm/mistralai_mixtral-8x7b-instruct-v0.1.yaml
+++ b/api/core/model_runtime/model_providers/nvidia/llm/mistralai_mixtral-8x7b-instruct-v0.1.yaml
@@ -0,0 +1,30 @@
+model: mistralai/mixtral-8x7b-instruct-v0.1
+label:
+  zh_Hans: mistralai/mixtral-8x7b-instruct-v0.1
+  en_US: mistralai/mixtral-8x7b-instruct-v0.1
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: max_tokens
+    use_template: max_tokens
+    default: 512
+    min: 1
+    max: 1024
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
--- a/api/core/model_runtime/model_providers/nvidia/nvidia.py
+++ b/api/core/model_runtime/model_providers/nvidia/nvidia.py
@@ -0,0 +1,30 @@
+import logging
+
+from core.model_runtime.entities.model_entities import ModelType
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.model_provider import ModelProvider
+
+logger = logging.getLogger(__name__)
+
+
+class MistralAIProvider(ModelProvider):
+
+    def validate_provider_credentials(self, credentials: dict) -> None:
+        """
+        Validate provider credentials
+        if validate failed, raise exception
+
+        :param credentials: provider credentials, credentials form defined in `provider_credential_schema`.
+        """
+        try:
+            model_instance = self.get_model_instance(ModelType.LLM)
+
+            model_instance.validate_credentials(
+                model='mistralai/mixtral-8x7b-instruct-v0.1',
+                credentials=credentials
+            )
+        except CredentialsValidateFailedError as ex:
+            raise ex
+        except Exception as ex:
+            logger.exception(f'{self.get_provider_schema().provider} credentials validate failed')
+            raise ex
--- a/api/core/model_runtime/model_providers/nvidia/nvidia.yaml
+++ b/api/core/model_runtime/model_providers/nvidia/nvidia.yaml
@@ -0,0 +1,30 @@
+provider: nvidia
+label:
+  en_US: NVIDIA
+icon_small:
+  en_US: icon_s_en.svg
+icon_large:
+  en_US: icon_l_en.png
+background: "#FFFFFF"
+help:
+  title:
+    en_US: Get your API Key from NVIDIA
+    zh_Hans: 从 NVIDIA 获取 API Key
+  url:
+    en_US: https://build.nvidia.com/explore/discover
+supported_model_types:
+  - llm
+  - text-embedding
+  - rerank
+configurate_methods:
+  - predefined-model
+provider_credential_schema:
+  credential_form_schemas:
+    - variable: api_key
+      label:
+        en_US: API Key
+      type: secret-input
+      required: true
+      placeholder:
+        zh_Hans: 在此输入您的 API Key
+        en_US: Enter your API Key
--- a/api/core/model_runtime/model_providers/nvidia/rerank/init.py
+++ b/api/core/model_runtime/model_providers/nvidia/rerank/init.py
--- a/api/core/model_runtime/model_providers/nvidia/rerank/rerank-qa-mistral-4b.yaml
+++ b/api/core/model_runtime/model_providers/nvidia/rerank/rerank-qa-mistral-4b.yaml
@@ -0,0 +1,4 @@
+model: nv-rerank-qa-mistral-4b:1
+model_type: rerank
+model_properties:
+  context_size: 8192
--- a/api/core/model_runtime/model_providers/nvidia/rerank/rerank.py
+++ b/api/core/model_runtime/model_providers/nvidia/rerank/rerank.py
@@ -0,0 +1,112 @@
+from math import exp
+from typing import Optional
+
+import requests
+
+from core.model_runtime.entities.rerank_entities import RerankDocument, RerankResult
+from core.model_runtime.errors.invoke import (
+    InvokeAuthorizationError,
+    InvokeBadRequestError,
+    InvokeConnectionError,
+    InvokeError,
+    InvokeRateLimitError,
+    InvokeServerUnavailableError,
+)
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.rerank_model import RerankModel
+
+
+class NvidiaRerankModel(RerankModel):
+    """
+    Model class for NVIDIA rerank model.
+    """
+
+    def _sigmoid(self, logit: float) -> float:
+        return 1/(1+exp(-logit))
+
+    def _invoke(self, model: str, credentials: dict,
+                query: str, docs: list[str], score_threshold: Optional[float] = None, top_n: Optional[int] = None,
+                user: Optional[str] = None) -> RerankResult:
+        """
+        Invoke rerank model
+
+        :param model: model name
+        :param credentials: model credentials
+        :param query: search query
+        :param docs: docs for reranking
+        :param score_threshold: score threshold
+        :param top_n: top n documents to return
+        :param user: unique user id
+        :return: rerank result
+        """
+        if len(docs) == 0:
+            return RerankResult(model=model, docs=[])
+
+        try:
+            invoke_url = "https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking"
+
+            headers = {
+                "Authorization": f"Bearer {credentials.get('api_key')}",
+                "Accept": "application/json",
+            }
+            payload = {
+                "model": model,
+                "query": {"text": query},
+                "passages": [{"text": doc} for doc in docs],
+            }
+
+            session = requests.Session()
+            response = session.post(invoke_url, headers=headers, json=payload)
+            response.raise_for_status()
+            results = response.json()
+
+            rerank_documents = []
+            for result in results['rankings']:
+                index = result['index']
+                logit = result['logit']
+                rerank_document = RerankDocument(
+                    index=index,
+                    text=docs[index],
+                    score=self._sigmoid(logit),
+                )
+
+                rerank_documents.append(rerank_document)
+
+            return RerankResult(model=model, docs=rerank_documents)
+        except requests.HTTPError as e:
+            raise InvokeServerUnavailableError(str(e))
+
+    def validate_credentials(self, model: str, credentials: dict) -> None:
+        """
+        Validate model credentials
+
+        :param model: model name
+        :param credentials: model credentials
+        :return:
+        """
+        try:
+            self._invoke(
+                model=model,
+                credentials=credentials,
+                query="What is the GPU memory bandwidth of H100 SXM?",
+                docs=[
+                    "Example doc 1",
+                    "Example doc 2",
+                    "Example doc 3",
+                ],
+            )
+        except Exception as ex:
+            raise CredentialsValidateFailedError(str(ex))
+
+    @property
+    def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
+        """
+        Map model invoke error to unified error
+        """
+        return {
+            InvokeConnectionError: [requests.ConnectionError],
+            InvokeServerUnavailableError: [requests.HTTPError],
+            InvokeRateLimitError: [],
+            InvokeAuthorizationError: [requests.HTTPError],
+            InvokeBadRequestError: [requests.RequestException]
+        }
--- a/api/core/model_runtime/model_providers/nvidia/text_embedding/init.py
+++ b/api/core/model_runtime/model_providers/nvidia/text_embedding/init.py
--- a/api/core/model_runtime/model_providers/nvidia/text_embedding/embed-qa-4.yaml
+++ b/api/core/model_runtime/model_providers/nvidia/text_embedding/embed-qa-4.yaml
@@ -0,0 +1,5 @@
+model: NV-Embed-QA
+model_type: text-embedding
+model_properties:
+  context_size: 512
+  max_chunks: 1
--- a/api/core/model_runtime/model_providers/nvidia/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/nvidia/text_embedding/text_embedding.py
@@ -0,0 +1,172 @@
+import time
+from json import JSONDecodeError, dumps
+from typing import Optional
+
+from requests import post
+
+from core.model_runtime.entities.model_entities import PriceType
+from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
+from core.model_runtime.errors.invoke import (
+    InvokeAuthorizationError,
+    InvokeBadRequestError,
+    InvokeConnectionError,
+    InvokeError,
+    InvokeRateLimitError,
+    InvokeServerUnavailableError,
+)
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
+
+
+class NvidiaTextEmbeddingModel(TextEmbeddingModel):
+    """
+    Model class for Nvidia text embedding model.
+    """
+    api_base: str = 'https://ai.api.nvidia.com/v1/retrieval/nvidia/embeddings'
+    models: list[str] = ['NV-Embed-QA']
+
+    def _invoke(self, model: str, credentials: dict,
+                texts: list[str], user: Optional[str] = None) \
+            -> TextEmbeddingResult:
+        """
+        Invoke text embedding model
+
+        :param model: model name
+        :param credentials: model credentials
+        :param texts: texts to embed
+        :param user: unique user id
+        :return: embeddings result
+        """
+        api_key = credentials['api_key']
+        if model not in self.models:
+            raise InvokeBadRequestError('Invalid model name')
+        if not api_key:
+            raise CredentialsValidateFailedError('api_key is required')
+        url = self.api_base
+        headers = {
+            'Authorization': 'Bearer ' + api_key,
+            'Content-Type': 'application/json'
+        }
+
+        data = {
+            'model': model,
+            'input': texts[0], 
+            'input_type': 'query'
+        }
+
+        try:
+            response = post(url, headers=headers, data=dumps(data))
+        except Exception as e:
+            raise InvokeConnectionError(str(e))
+        
+        if response.status_code != 200:
+            try:
+                resp = response.json()
+                msg = resp['detail']
+                if response.status_code == 401:
+                    raise InvokeAuthorizationError(msg)
+                elif response.status_code == 429:
+                    raise InvokeRateLimitError(msg)
+                elif response.status_code == 500:
+                    raise InvokeServerUnavailableError(msg)
+                else:
+                    raise InvokeError(msg)
+            except JSONDecodeError as e:
+                raise InvokeServerUnavailableError(f"Failed to convert response to json: {e} with text: {response.text}")
+
+        try:
+            resp = response.json()
+            embeddings = resp['data']
+            usage = resp['usage']
+        except Exception as e:
+            raise InvokeServerUnavailableError(f"Failed to convert response to json: {e} with text: {response.text}")
+
+        usage = self._calc_response_usage(model=model, credentials=credentials, tokens=usage['total_tokens'])
+
+        result = TextEmbeddingResult(
+            model=model,
+            embeddings=[[
+                float(data) for data in x['embedding']
+            ] for x in embeddings],
+            usage=usage
+        )
+
+        return result
+
+    def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
+        """
+        Get number of tokens for given prompt messages
+
+        :param model: model name
+        :param credentials: model credentials
+        :param texts: texts to embed
+        :return:
+        """
+        num_tokens = 0
+        for text in texts:
+            # use JinaTokenizer to get num tokens
+            num_tokens += self._get_num_tokens_by_gpt2(text)
+        return num_tokens
+
+    def validate_credentials(self, model: str, credentials: dict) -> None:
+        """
+        Validate model credentials
+
+        :param model: model name
+        :param credentials: model credentials
+        :return:
+        """
+        try:
+            self._invoke(model=model, credentials=credentials, texts=['ping'])
+        except InvokeAuthorizationError:
+            raise CredentialsValidateFailedError('Invalid api key')
+
+    @property
+    def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
+        return {
+            InvokeConnectionError: [
+                InvokeConnectionError
+            ],
+            InvokeServerUnavailableError: [
+                InvokeServerUnavailableError
+            ],
+            InvokeRateLimitError: [
+                InvokeRateLimitError
+            ],
+            InvokeAuthorizationError: [
+                InvokeAuthorizationError
+            ],
+            InvokeBadRequestError: [
+                KeyError
+            ]
+        }
+    
+    def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
+        """
+        Calculate response usage
+
+        :param model: model name
+        :param credentials: model credentials
+        :param tokens: input tokens
+        :return: usage
+        """
+        # get input price info
+        input_price_info = self.get_price(
+            model=model,
+            credentials=credentials,
+            price_type=PriceType.INPUT,
+            tokens=tokens
+        )
+
+        # transform usage
+        usage = EmbeddingUsage(
+            tokens=tokens,
+            total_tokens=tokens,
+            unit_price=input_price_info.unit_price,
+            price_unit=input_price_info.unit,
+            total_price=input_price_info.total_amount,
+            currency=input_price_info.currency,
+            latency=time.perf_counter() - self.started_at
+        )
+
+        return usage
--- a/api/core/model_runtime/model_providers/ollama/llm/llm.py
+++ b/api/core/model_runtime/model_providers/ollama/llm/llm.py
@@ -449,7 +449,7 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
                    help=I18nObject(en_US="The temperature of the model. "
                                          "Increasing the temperature will make the model answer "
                                          "more creatively. (Default: 0.8)"),
-                    default=0.8,
+                    default=0.1,
                    min=0,
                    max=2
                ),
@@ -472,7 +472,6 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
                    help=I18nObject(en_US="Reduces the probability of generating nonsense. "
                                          "A higher value (e.g. 100) will give more diverse answers, "
                                          "while a lower value (e.g. 10) will be more conservative. (Default: 40)"),
-                    default=40,
                    min=1,
                    max=100
                ),
@@ -483,7 +482,6 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
                    help=I18nObject(en_US="Sets how strongly to penalize repetitions. "
                                          "A higher value (e.g., 1.5) will penalize repetitions more strongly, "
                                          "while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)"),
-                    default=1.1,
                    min=-2,
                    max=2
                ),
@@ -494,7 +492,7 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
                    type=ParameterType.INT,
                    help=I18nObject(en_US="Maximum number of tokens to predict when generating text. "
                                          "(Default: 128, -1 = infinite generation, -2 = fill context)"),
-                    default=128,
+                    default=512 if int(credentials.get('max_tokens', 4096)) >= 768 else 128,
                    min=-2,
                    max=int(credentials.get('max_tokens', 4096)),
                ),
@@ -504,7 +502,6 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
                    type=ParameterType.INT,
                    help=I18nObject(en_US="Enable Mirostat sampling for controlling perplexity. "
                                          "(default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)"),
-                    default=0,
                    min=0,
                    max=2
                ),
@@ -516,7 +513,6 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
                                          "the generated text. A lower learning rate will result in slower adjustments, "
                                          "while a higher learning rate will make the algorithm more responsive. "
                                          "(Default: 0.1)"),
-                    default=0.1,
                    precision=1
                ),
                ParameterRule(
@@ -525,7 +521,6 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
                    type=ParameterType.FLOAT,
                    help=I18nObject(en_US="Controls the balance between coherence and diversity of the output. "
                                          "A lower value will result in more focused and coherent text. (Default: 5.0)"),
-                    default=5.0,
                    precision=1
                ),
                ParameterRule(
@@ -543,7 +538,6 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
                    type=ParameterType.INT,
                    help=I18nObject(en_US="The number of layers to send to the GPU(s). "
                                          "On macOS it defaults to 1 to enable metal support, 0 to disable."),
-                    default=1,
                    min=0,
                    max=1
                ),
@@ -563,7 +557,6 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
                    type=ParameterType.INT,
                    help=I18nObject(en_US="Sets how far back for the model to look back to prevent repetition. "
                                          "(Default: 64, 0 = disabled, -1 = num_ctx)"),
-                    default=64,
                    min=-1
                ),
                ParameterRule(
@@ -573,7 +566,6 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
                    help=I18nObject(en_US="Tail free sampling is used to reduce the impact of less probable tokens "
                                          "from the output. A higher value (e.g., 2.0) will reduce the impact more, "
                                          "while a value of 1.0 disables this setting. (default: 1)"),
-                    default=1,
                    precision=1
                ),
                ParameterRule(
@@ -583,7 +575,6 @@ class OllamaLargeLanguageModel(LargeLanguageModel):
                    help=I18nObject(en_US="Sets the random number seed to use for generation. Setting this to "
                                          "a specific number will make the model generate the same text for "
                                          "the same prompt. (Default: 0)"),
-                    default=0
                ),
                ParameterRule(
                    name='format',
--- a/api/core/model_runtime/model_providers/openai/llm/llm.py
+++ b/api/core/model_runtime/model_providers/openai/llm/llm.py
@@ -656,6 +656,8 @@ class OpenAILargeLanguageModel(_CommonOpenAI, LargeLanguageModel):
                if assistant_message_function_call:
                    # start of stream function call
                    delta_assistant_message_function_call_storage = assistant_message_function_call
+                    if delta_assistant_message_function_call_storage.arguments is None:
+                        delta_assistant_message_function_call_storage.arguments = ''
                    if not has_finish_reason:
                        continue

--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-1201.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-1201.yaml
@@ -8,54 +8,70 @@ model_properties:
 parameter_rules:
  - name: temperature
    use_template: temperature
-    default: 1.0
+    type: float
+    default: 0.85
    min: 0.0
    max: 2.0
    help:
      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 2000
+    min: 1
+    max: 2000
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
  - name: top_p
    use_template: top_p
+    type: float
    default: 0.8
    min: 0.1
    max: 0.9
    help:
      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
-  - name: max_tokens
-    use_template: max_tokens
-    default: 1500
-    min: 1
-    max: 6000
-    help:
-      zh_Hans: 用于限制模型生成token的数量，max_tokens设置的是生成上限，并不表示一定会生成这么多的token数量。
-      en_US: It is used to limit the number of tokens generated by the model. max_tokens sets the upper limit of generation, which does not mean that so many tokens will be generated.
  - name: top_k
+    type: int
+    min: 0
+    max: 99
    label:
      zh_Hans: 取样数量
      en_US: Top k
-    type: int
    help:
-      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。默认不传递该参数，取值为None或当top_k大于100时，表示不启用top_k策略，此时，仅有top_p策略生效。
-      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. This parameter is not passed by default. The value is None or when top_k is greater than 100, it means that the top_k policy is not enabled. At this time, only the top_p policy takes effect.
-    required: false
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
  - name: seed
+    required: false
+    type: int
+    default: 1234
    label:
      zh_Hans: 随机种子
      en_US: Random seed
-    type: int
    help:
-      zh_Hans: 生成时，随机数的种子，用于控制模型生成的随机性。如果使用相同的种子，每次运行生成的结果都将相同；当需要复现模型的生成结果时，可以使用相同的种子。seed参数支持无符号64位整数类型。
-      en_US: When generating, the random number seed is used to control the randomness of model generation. If you use the same seed, the results generated by each run will be the same; when you need to reproduce the results of the model, you can use the same seed. The seed parameter supports unsigned 64-bit integer types.
-    required: false
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
  - name: repetition_penalty
-    label:
-      en_US: Repetition penalty
+    required: false
    type: float
    default: 1.1
+    label:
+      en_US: Repetition penalty
    help:
      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
-      en_US: Used to control the repetition of model generation. Increasing the repetition_penalty can reduce the repetition of model generation. 1.0 means no punishment.
-    required: false
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
  - name: response_format
    use_template: response_format
+pricing:
+  input: '0.12'
+  output: '0.12'
+  unit: '0.001'
+  currency: RMB
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max-longcontext.yaml
@@ -4,58 +4,74 @@ label:
 model_type: llm
 model_properties:
  mode: chat
-  context_size: 30000
+  context_size: 32768
 parameter_rules:
  - name: temperature
    use_template: temperature
-    default: 1.0
+    type: float
+    default: 0.85
    min: 0.0
    max: 2.0
    help:
      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 2000
+    min: 1
+    max: 2000
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
  - name: top_p
    use_template: top_p
+    type: float
    default: 0.8
    min: 0.1
    max: 0.9
    help:
      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
-  - name: max_tokens
-    use_template: max_tokens
-    default: 2000
-    min: 1
-    max: 28000
-    help:
-      zh_Hans: 用于限制模型生成token的数量，max_tokens设置的是生成上限，并不表示一定会生成这么多的token数量。
-      en_US: It is used to limit the number of tokens generated by the model. max_tokens sets the upper limit of generation, which does not mean that so many tokens will be generated.
  - name: top_k
+    type: int
+    min: 0
+    max: 99
    label:
      zh_Hans: 取样数量
      en_US: Top k
-    type: int
    help:
-      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。默认不传递该参数，取值为None或当top_k大于100时，表示不启用top_k策略，此时，仅有top_p策略生效。
-      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. This parameter is not passed by default. The value is None or when top_k is greater than 100, it means that the top_k policy is not enabled. At this time, only the top_p policy takes effect.
-    required: false
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
  - name: seed
+    required: false
+    type: int
+    default: 1234
    label:
      zh_Hans: 随机种子
      en_US: Random seed
-    type: int
    help:
-      zh_Hans: 生成时，随机数的种子，用于控制模型生成的随机性。如果使用相同的种子，每次运行生成的结果都将相同；当需要复现模型的生成结果时，可以使用相同的种子。seed参数支持无符号64位整数类型。
-      en_US: When generating, the random number seed is used to control the randomness of model generation. If you use the same seed, the results generated by each run will be the same; when you need to reproduce the results of the model, you can use the same seed. The seed parameter supports unsigned 64-bit integer types.
-    required: false
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
  - name: repetition_penalty
-    label:
-      en_US: Repetition penalty
+    required: false
    type: float
    default: 1.1
+    label:
+      en_US: Repetition penalty
    help:
      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
-      en_US: Used to control the repetition of model generation. Increasing the repetition_penalty can reduce the repetition of model generation. 1.0 means no punishment.
-    required: false
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
  - name: response_format
    use_template: response_format
+pricing:
+  input: '0.12'
+  output: '0.12'
+  unit: '0.001'
+  currency: RMB
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-max.yaml
@@ -8,54 +8,70 @@ model_properties:
 parameter_rules:
  - name: temperature
    use_template: temperature
-    default: 1.0
+    type: float
+    default: 0.85
    min: 0.0
    max: 2.0
    help:
      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 2000
+    min: 1
+    max: 2000
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
  - name: top_p
    use_template: top_p
+    type: float
    default: 0.8
    min: 0.1
    max: 0.9
    help:
      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
-  - name: max_tokens
-    use_template: max_tokens
-    default: 1500
-    min: 1
-    max: 6000
-    help:
-      zh_Hans: 用于限制模型生成token的数量，max_tokens设置的是生成上限，并不表示一定会生成这么多的token数量。
-      en_US: It is used to limit the number of tokens generated by the model. max_tokens sets the upper limit of generation, which does not mean that so many tokens will be generated.
  - name: top_k
+    type: int
+    min: 0
+    max: 99
    label:
      zh_Hans: 取样数量
      en_US: Top k
-    type: int
    help:
-      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。默认不传递该参数，取值为None或当top_k大于100时，表示不启用top_k策略，此时，仅有top_p策略生效。
-      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. This parameter is not passed by default. The value is None or when top_k is greater than 100, it means that the top_k policy is not enabled. At this time, only the top_p policy takes effect.
-    required: false
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
  - name: seed
+    required: false
+    type: int
+    default: 1234
    label:
      zh_Hans: 随机种子
      en_US: Random seed
-    type: int
    help:
-      zh_Hans: 生成时，随机数的种子，用于控制模型生成的随机性。如果使用相同的种子，每次运行生成的结果都将相同；当需要复现模型的生成结果时，可以使用相同的种子。seed参数支持无符号64位整数类型。
-      en_US: When generating, the random number seed is used to control the randomness of model generation. If you use the same seed, the results generated by each run will be the same; when you need to reproduce the results of the model, you can use the same seed. The seed parameter supports unsigned 64-bit integer types.
-    required: false
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
  - name: repetition_penalty
-    label:
-      en_US: Repetition penalty
+    required: false
    type: float
    default: 1.1
+    label:
+      en_US: Repetition penalty
    help:
      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
-      en_US: Used to control the repetition of model generation. Increasing the repetition_penalty can reduce the repetition of model generation. 1.0 means no punishment.
-    required: false
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
  - name: response_format
    use_template: response_format
+pricing:
+  input: '0.12'
+  output: '0.12'
+  unit: '0.001'
+  currency: RMB
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-plus.yaml
@@ -4,58 +4,70 @@ label:
 model_type: llm
 model_properties:
  mode: completion
-  context_size: 32000
+  context_size: 32768
 parameter_rules:
  - name: temperature
    use_template: temperature
-    default: 1.0
+    type: float
+    default: 0.85
    min: 0.0
    max: 2.0
    help:
      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 1500
+    min: 1
+    max: 1500
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
  - name: top_p
    use_template: top_p
+    type: float
    default: 0.8
    min: 0.1
    max: 0.9
    help:
      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
-  - name: max_tokens
-    use_template: max_tokens
-    default: 2000
-    min: 1
-    max: 30000
-    help:
-      zh_Hans: 用于限制模型生成token的数量，max_tokens设置的是生成上限，并不表示一定会生成这么多的token数量。
-      en_US: It is used to limit the number of tokens generated by the model. max_tokens sets the upper limit of generation, which does not mean that so many tokens will be generated.
  - name: top_k
+    type: int
+    min: 0
+    max: 99
    label:
      zh_Hans: 取样数量
      en_US: Top k
-    type: int
    help:
-      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。默认不传递该参数，取值为None或当top_k大于100时，表示不启用top_k策略，此时，仅有top_p策略生效。
-      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. This parameter is not passed by default. The value is None or when top_k is greater than 100, it means that the top_k policy is not enabled. At this time, only the top_p policy takes effect.
-    required: false
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
  - name: seed
+    required: false
+    type: int
+    default: 1234
    label:
      zh_Hans: 随机种子
      en_US: Random seed
-    type: int
    help:
-      zh_Hans: 生成时，随机数的种子，用于控制模型生成的随机性。如果使用相同的种子，每次运行生成的结果都将相同；当需要复现模型的生成结果时，可以使用相同的种子。seed参数支持无符号64位整数类型。
-      en_US: When generating, the random number seed is used to control the randomness of model generation. If you use the same seed, the results generated by each run will be the same; when you need to reproduce the results of the model, you can use the same seed. The seed parameter supports unsigned 64-bit integer types.
-    required: false
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
  - name: repetition_penalty
-    label:
-      en_US: Repetition penalty
+    required: false
    type: float
    default: 1.1
+    label:
+      en_US: Repetition penalty
    help:
      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
-      en_US: Used to control the repetition of model generation. Increasing the repetition_penalty can reduce the repetition of model generation. 1.0 means no punishment.
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
  - name: response_format
    use_template: response_format
 pricing:
--- a/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/llm/qwen-turbo.yaml
@@ -8,55 +8,66 @@ model_properties:
 parameter_rules:
  - name: temperature
    use_template: temperature
-    default: 1.0
+    type: float
+    default: 0.85
    min: 0.0
    max: 2.0
    help:
      zh_Hans: 用于控制随机性和多样性的程度。具体来说，temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值，使得更多的低概率词被选择，生成结果更加多样化；而较低的temperature值则会增强概率分布的峰值，使得高概率词更容易被选择，生成结果更加确定。
      en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 1500
+    min: 1
+    max: 1500
+    help:
+      zh_Hans: 用于指定模型在生成内容时token的最大数量，它定义了生成的上限，但不保证每次都会生成到这个数量。
+      en_US: It is used to specify the maximum number of tokens when the model generates content. It defines the upper limit of generation, but does not guarantee that this number will be generated every time.
  - name: top_p
    use_template: top_p
+    type: float
    default: 0.8
    min: 0.1
    max: 0.9
    help:
      zh_Hans: 生成过程中核采样方法概率阈值，例如，取值为0.8时，仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为（0,1.0)，取值越大，生成的随机性越高；取值越低，生成的确定性越高。
      en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
-  - name: max_tokens
-    use_template: max_tokens
-    default: 1500
-    min: 1
-    max: 6000
-    help:
-      zh_Hans: 用于限制模型生成token的数量，max_tokens设置的是生成上限，并不表示一定会生成这么多的token数量。
-      en_US: It is used to limit the number of tokens generated by the model. max_tokens sets the upper limit of generation, which does not mean that so many tokens will be generated.
  - name: top_k
+    type: int
+    min: 0
+    max: 99
    label:
      zh_Hans: 取样数量
      en_US: Top k
-    type: int
    help:
-      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。默认不传递该参数，取值为None或当top_k大于100时，表示不启用top_k策略，此时，仅有top_p策略生效。
-      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. This parameter is not passed by default. The value is None or when top_k is greater than 100, it means that the top_k policy is not enabled. At this time, only the top_p policy takes effect.
-    required: false
+      zh_Hans: 生成时，采样候选集的大小。例如，取值为50时，仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大，生成的随机性越高；取值越小，生成的确定性越高。
+      en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated.
  - name: seed
+    required: false
+    type: int
+    default: 1234
    label:
      zh_Hans: 随机种子
      en_US: Random seed
-    type: int
    help:
-      zh_Hans: 生成时，随机数的种子，用于控制模型生成的随机性。如果使用相同的种子，每次运行生成的结果都将相同；当需要复现模型的生成结果时，可以使用相同的种子。seed参数支持无符号64位整数类型。
-      en_US: When generating, the random number seed is used to control the randomness of model generation. If you use the same seed, the results generated by each run will be the same; when you need to reproduce the results of the model, you can use the same seed. The seed parameter supports unsigned 64-bit integer types.
-    required: false
+      zh_Hans: 生成时使用的随机数种子，用户控制模型生成内容的随机性。支持无符号64位整数，默认值为 1234。在使用seed时，模型将尽可能生成相同或相似的结果，但目前不保证每次生成的结果完全相同。
+      en_US: The random number seed used when generating, the user controls the randomness of the content generated by the model. Supports unsigned 64-bit integers, default value is 1234. When using seed, the model will try its best to generate the same or similar results, but there is currently no guarantee that the results will be exactly the same every time.
  - name: repetition_penalty
-    label:
-      en_US: Repetition penalty
+    required: false
    type: float
    default: 1.1
+    label:
+      en_US: Repetition penalty
    help:
      zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
-      en_US: Used to control the repetition of model generation. Increasing the repetition_penalty can reduce the repetition of model generation. 1.0 means no punishment.
-    required: false
+      en_US: Used to control the repeatability when generating models. Increasing repetition_penalty can reduce the duplication of model generation. 1.0 means no punishment.
+  - name: enable_search
+    type: boolean
+    default: false
+    help:
+      zh_Hans: 模型内置了互联网搜索服务，该参数控制模型在生成文本时是否参考使用互联网搜索结果。启用互联网搜索，模型会将搜索结果作为文本生成过程中的参考信息，但模型会基于其内部逻辑“自行判断”是否使用互联网搜索结果。
+      en_US: The model has a built-in Internet search service. This parameter controls whether the model refers to Internet search results when generating text. When Internet search is enabled, the model will use the search results as reference information in the text generation process, but the model will "judge" whether to use Internet search results based on its internal logic.
  - name: response_format
    use_template: response_format
 pricing:
--- a/api/core/model_runtime/model_providers/tongyi/text_embedding/init.py
+++ b/api/core/model_runtime/model_providers/tongyi/text_embedding/init.py
--- a/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v1.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v1.yaml
@@ -0,0 +1,4 @@
+model: text-embedding-v1
+model_type: text-embedding
+model_properties:
+  context_size: 2048
--- a/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v2.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/text_embedding/text-embedding-v2.yaml
@@ -0,0 +1,4 @@
+model: text-embedding-v2
+model_type: text-embedding
+model_properties:
+  context_size: 2048
--- a/api/core/model_runtime/model_providers/tongyi/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/tongyi/text_embedding/text_embedding.py
@@ -0,0 +1,132 @@
+import time
+from typing import Optional
+
+import dashscope
+
+from core.model_runtime.entities.model_entities import PriceType
+from core.model_runtime.entities.text_embedding_entities import (
+    EmbeddingUsage,
+    TextEmbeddingResult,
+)
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.text_embedding_model import (
+    TextEmbeddingModel,
+)
+from core.model_runtime.model_providers.tongyi._common import _CommonTongyi
+
+
+class TongyiTextEmbeddingModel(_CommonTongyi, TextEmbeddingModel):
+    """
+    Model class for Tongyi text embedding model.
+    """
+
+    def _invoke(
+            self,
+            model: str,
+            credentials: dict,
+            texts: list[str],
+            user: Optional[str] = None,
+    ) -> TextEmbeddingResult:
+        """
+        Invoke text embedding model
+
+        :param model: model name
+        :param credentials: model credentials
+        :param texts: texts to embed
+        :param user: unique user id
+        :return: embeddings result
+        """
+        credentials_kwargs = self._to_credential_kwargs(credentials)
+        dashscope.api_key = credentials_kwargs["dashscope_api_key"]
+        embeddings, embedding_used_tokens = self.embed_documents(model, texts)
+
+        return TextEmbeddingResult(
+            embeddings=embeddings,
+            usage=self._calc_response_usage(model, credentials_kwargs, embedding_used_tokens),
+            model=model
+        )
+
+    def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
+        """
+        Get number of tokens for given prompt messages
+
+        :param model: model name
+        :param credentials: model credentials
+        :param texts: texts to embed
+        :return:
+        """
+        if len(texts) == 0:
+            return 0
+        total_num_tokens = 0
+        for text in texts:
+            total_num_tokens += self._get_num_tokens_by_gpt2(text)
+
+        return total_num_tokens
+
+    def validate_credentials(self, model: str, credentials: dict) -> None:
+        """
+        Validate model credentials
+
+        :param model: model name
+        :param credentials: model credentials
+        :return:
+        """
+        try:
+            # transform credentials to kwargs for model instance
+            credentials_kwargs = self._to_credential_kwargs(credentials)
+            dashscope.api_key = credentials_kwargs["dashscope_api_key"]
+            # call embedding model
+            self.embed_documents(model=model, texts=["ping"])
+        except Exception as ex:
+            raise CredentialsValidateFailedError(str(ex))
+
+    @staticmethod
+    def embed_documents(model: str, texts: list[str]) -> tuple[list[list[float]], int]:
+        """Call out to Tongyi's embedding endpoint.
+
+        Args:
+            texts: The list of texts to embed.
+
+        Returns:
+            List of embeddings, one for each text, and tokens usage.
+        """
+        embeddings = []
+        embedding_used_tokens = 0
+        for text in texts:
+            response = dashscope.TextEmbedding.call(model=model, input=text, text_type="document")
+            data = response.output["embeddings"][0]
+            embeddings.append(data["embedding"])
+            embedding_used_tokens += response.usage["total_tokens"]
+
+        return [list(map(float, e)) for e in embeddings], embedding_used_tokens
+
+    def _calc_response_usage(
+            self, model: str, credentials: dict, tokens: int
+    ) -> EmbeddingUsage:
+        """
+        Calculate response usage
+
+        :param model: model name
+        :param tokens: input tokens
+        :return: usage
+        """
+        # get input price info
+        input_price_info = self.get_price(
+            model=model,
+            credentials=credentials,
+            price_type=PriceType.INPUT,
+            tokens=tokens
+        )
+
+        # transform usage
+        usage = EmbeddingUsage(
+            tokens=tokens,
+            total_tokens=tokens,
+            unit_price=input_price_info.unit_price,
+            price_unit=input_price_info.unit,
+            total_price=input_price_info.total_amount,
+            currency=input_price_info.currency,
+            latency=time.perf_counter() - self.started_at
+        )
+
+        return usage
--- a/api/core/model_runtime/model_providers/tongyi/tongyi.yaml
+++ b/api/core/model_runtime/model_providers/tongyi/tongyi.yaml
@@ -17,15 +17,16 @@ help:
 supported_model_types:
  - llm
  - tts
+  - text-embedding
 configurate_methods:
  - predefined-model
 provider_credential_schema:
  credential_form_schemas:
    - variable: dashscope_api_key
      label:
-        en_US: APIKey
+        en_US: API Key
      type: secret-input
      required: true
      placeholder:
-        zh_Hans: 在此输入您的 APIKey
-        en_US: Enter your APIKey
+        zh_Hans: 在此输入您的 API Key
+        en_US: Enter your API Key
--- a/api/core/model_runtime/model_providers/yi/init.py
+++ b/api/core/model_runtime/model_providers/yi/init.py
--- a/api/core/model_runtime/model_providers/yi/_assets/icon_l_en.svg
+++ b/api/core/model_runtime/model_providers/yi/_assets/icon_l_en.svg
@@ -0,0 +1,12 @@
+<svg width="64" height="24" viewBox="0 0 64 24" fill="none" xmlns="http://www.w3.org/2000/svg">
+    <path fill-rule="evenodd" clip-rule="evenodd" d="M1.28808 1.39558C1.76461 1.00315 2.46905 1.07132 2.86149 1.54785L7.7517 7.48596C8.14414 7.96249 8.07597 8.66693 7.59944 9.05937C7.1229 9.45181 6.41847 9.38363 6.02603 8.9071L1.13582 2.96899C0.743382 2.49246 0.811553 1.78802 1.28808 1.39558Z" fill="#133426"/>
+    <path fill-rule="evenodd" clip-rule="evenodd" d="M10.1689 22.3553C9.55157 22.3553 9.05112 21.8549 9.05109 21.2375L9.05075 10.7193C9.05074 10.4478 9.14951 10.1856 9.32863 9.98168L16.1801 2.17956C16.5875 1.7157 17.2937 1.66989 17.7576 2.07723C18.2214 2.48457 18.2673 3.19081 17.8599 3.65467L11.2863 11.1403L11.2866 21.2375C11.2866 21.8548 10.7862 22.3552 10.1689 22.3553Z" fill="#133426"/>
+    <path fill-rule="evenodd" clip-rule="evenodd" d="M18.2138 13.7077C18.8311 13.7077 19.3315 14.2081 19.3315 14.8255V21.0896C19.3315 21.7069 18.8311 22.2073 18.2138 22.2073C17.5965 22.2073 17.096 21.7069 17.096 21.0896V14.8255C17.096 14.2081 17.5965 13.7077 18.2138 13.7077Z" fill="#133426"/>
+    <circle cx="19.7936" cy="10.3307" r="1.73695" fill="#00FF00"/>
+    <path d="M61.6555 10.3637V22H60.593V10.3637H61.6555Z" fill="black"/>
+    <path d="M50.1101 22H48.9964L53.2294 10.3637H54.3658L58.5987 22H57.4851L53.8374 11.7444H53.7578L50.1101 22ZM50.9112 17.5398H56.6839V18.4944H50.9112V17.5398Z" fill="black"/>
+    <path d="M46.3928 22.0853C46.1693 22.0853 45.9761 22.0057 45.8132 21.8466C45.6541 21.6838 45.5746 21.4906 45.5746 21.2671C45.5746 21.0398 45.6541 20.8466 45.8132 20.6875C45.9761 20.5285 46.1693 20.4489 46.3928 20.4489C46.62 20.4489 46.8132 20.5285 46.9723 20.6875C47.1314 20.8466 47.2109 21.0398 47.2109 21.2671C47.2109 21.4148 47.1731 21.5512 47.0973 21.6762C47.0253 21.8012 46.9268 21.9016 46.8018 21.9773C46.6806 22.0493 46.5443 22.0853 46.3928 22.0853Z" fill="black"/>
+    <path d="M42.6996 10.3637V22H41.6371V11.4773H41.5689L38.8416 13.2898V12.1875L41.5916 10.3637H42.6996Z" fill="black"/>
+    <path d="M32.9098 22.1591C32.0916 22.1591 31.3928 21.9243 30.8132 21.4546C30.2375 20.9811 29.7943 20.2974 29.4837 19.4035C29.1768 18.5095 29.0234 17.4357 29.0234 16.1819C29.0234 14.9319 29.1768 13.8618 29.4837 12.9716C29.7943 12.0777 30.2393 11.394 30.8189 10.9205C31.4022 10.4432 32.0992 10.2046 32.9098 10.2046C33.7204 10.2046 34.4155 10.4432 34.995 10.9205C35.5784 11.394 36.0234 12.0777 36.3303 12.9716C36.6409 13.8618 36.7962 14.9319 36.7962 16.1819C36.7962 17.4357 36.6409 18.5095 36.3303 19.4035C36.0234 20.2974 35.5803 20.9811 35.0007 21.4546C34.425 21.9243 33.728 22.1591 32.9098 22.1591ZM32.9098 21.2046C33.8075 21.2046 34.5083 20.7671 35.0121 19.8921C35.5159 19.0133 35.7678 17.7766 35.7678 16.1819C35.7678 15.1213 35.6522 14.216 35.4212 13.466C35.1939 12.7122 34.8662 12.1364 34.4382 11.7387C34.014 11.341 33.5045 11.1421 32.9098 11.1421C32.0196 11.1421 31.3208 11.5853 30.8132 12.4716C30.3056 13.3542 30.0518 14.591 30.0518 16.1819C30.0518 17.2425 30.1655 18.1478 30.3928 18.8978C30.6238 19.6478 30.9515 20.2197 31.3757 20.6137C31.8037 21.0076 32.3151 21.2046 32.9098 21.2046Z" fill="black"/>
+    </svg>
+    
--- a/api/core/model_runtime/model_providers/yi/_assets/icon_s_en.svg
+++ b/api/core/model_runtime/model_providers/yi/_assets/icon_s_en.svg
@@ -0,0 +1,8 @@
+<svg width="24" height="24" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg">
+    <rect x="1" y="1" width="22" height="22" rx="5" fill="#133426"/>
+    <path fill-rule="evenodd" clip-rule="evenodd" d="M4.52004 4.43887C4.87945 4.1429 5.41077 4.19431 5.70676 4.55371L9.39515 9.03221C9.69114 9.39161 9.63972 9.92289 9.2803 10.2189C8.92089 10.5148 8.38957 10.4634 8.09358 10.104L4.40519 5.62553C4.1092 5.26613 4.16062 4.73485 4.52004 4.43887Z" fill="white"/>
+    <path fill-rule="evenodd" clip-rule="evenodd" d="M11.2183 20.2466C10.7527 20.2466 10.3752 19.8692 10.3752 19.4036L10.3749 11.4708C10.3749 11.266 10.4494 11.0683 10.5845 10.9145L15.7522 5.03014C16.0594 4.6803 16.5921 4.64575 16.942 4.95297C17.2918 5.26018 17.3264 5.79283 17.0192 6.14266L12.0611 11.7883L12.0613 19.4035C12.0613 19.8691 11.6839 20.2466 11.2183 20.2466Z" fill="white"/>
+    <path fill-rule="evenodd" clip-rule="evenodd" d="M17.2861 13.7246C17.7517 13.7246 18.1291 14.102 18.1291 14.5676V19.292C18.1291 19.7576 17.7517 20.135 17.2861 20.135C16.8205 20.135 16.443 19.7576 16.443 19.292V14.5676C16.443 14.102 16.8205 13.7246 17.2861 13.7246Z" fill="white"/>
+    <ellipse cx="18.4761" cy="11.1782" rx="1.31008" ry="1.31" fill="#00FF00"/>
+    </svg>
+    
--- a/api/core/model_runtime/model_providers/yi/llm/init.py
+++ b/api/core/model_runtime/model_providers/yi/llm/init.py
--- a/api/core/model_runtime/model_providers/yi/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/yi/llm/_position.yaml
@@ -0,0 +1,3 @@
+- yi-34b-chat-0205
+- yi-34b-chat-200k
+- yi-vl-plus
--- a/api/core/model_runtime/model_providers/yi/llm/llm.py
+++ b/api/core/model_runtime/model_providers/yi/llm/llm.py
@@ -0,0 +1,30 @@
+from collections.abc import Generator
+from typing import Optional, Union
+
+from core.model_runtime.entities.llm_entities import LLMResult
+from core.model_runtime.entities.message_entities import (
+    PromptMessage,
+    PromptMessageTool,
+)
+from core.model_runtime.model_providers.openai_api_compatible.llm.llm import OAIAPICompatLargeLanguageModel
+
+
+class YiLargeLanguageModel(OAIAPICompatLargeLanguageModel):
+    def _invoke(self, model: str, credentials: dict,
+                prompt_messages: list[PromptMessage], model_parameters: dict,
+                tools: Optional[list[PromptMessageTool]] = None, stop: Optional[list[str]] = None,
+                stream: bool = True, user: Optional[str] = None) \
+            -> Union[LLMResult, Generator]:
+        self._add_custom_parameters(credentials)
+        return super()._invoke(model, credentials, prompt_messages, model_parameters, tools, stop, stream)
+
+    def validate_credentials(self, model: str, credentials: dict) -> None:
+        self._add_custom_parameters(credentials)
+        super().validate_credentials(model, credentials)
+
+    @staticmethod
+    def _add_custom_parameters(credentials: dict) -> None:
+        credentials['mode'] = 'chat'
+
+        if 'endpoint_url' not in credentials or credentials['endpoint_url'] == "":
+            credentials['endpoint_url'] = 'https://api.lingyiwanwu.com/v1'
--- a/api/core/model_runtime/model_providers/yi/llm/yi-34b-chat-0205.yaml
+++ b/api/core/model_runtime/model_providers/yi/llm/yi-34b-chat-0205.yaml
@@ -0,0 +1,43 @@
+model: yi-34b-chat-0205
+label:
+  zh_Hans: yi-34b-chat-0205
+  en_US: yi-34b-chat-0205
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 4096
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 控制生成结果的多样性和随机性。数值越小，越严谨；数值越大，越发散。
+      en_US: Control the diversity and randomness of generated results. The smaller the value, the more rigorous it is; the larger the value, the more divergent it is.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 512
+    min: 1
+    max: 4000
+    help:
+      zh_Hans: 指定生成结果长度的上限。如果生成结果截断，可以调大该参数。
+      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.01
+    max: 1.00
+    help:
+      zh_Hans: 控制生成结果的随机性。数值越小，随机性越弱；数值越大，随机性越强。一般而言，top_p 和 temperature 两个参数选择一个进行调整即可。
+      en_US: Control the randomness of generated results. The smaller the value, the weaker the randomness; the larger the value, the stronger the randomness. Generally speaking, you can adjust one of the two parameters top_p and temperature.
+pricing:
+  input: '0.0025'
+  output: '0.0025'
+  unit: '0.00001'
+  currency: RMB
--- a/api/core/model_runtime/model_providers/yi/llm/yi-34b-chat-200k.yaml
+++ b/api/core/model_runtime/model_providers/yi/llm/yi-34b-chat-200k.yaml
@@ -0,0 +1,43 @@
+model: yi-34b-chat-200k
+label:
+  zh_Hans: yi-34b-chat-200k
+  en_US: yi-34b-chat-200k
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 200000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.6
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 控制生成结果的多样性和随机性。数值越小，越严谨；数值越大，越发散。
+      en_US: Control the diversity and randomness of generated results. The smaller the value, the more rigorous it is; the larger the value, the more divergent it is.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 4096
+    min: 1
+    max: 199950
+    help:
+      zh_Hans: 指定生成结果长度的上限。如果生成结果截断，可以调大该参数。
+      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.9
+    min: 0.01
+    max: 1.00
+    help:
+      zh_Hans: 控制生成结果的随机性。数值越小，随机性越弱；数值越大，随机性越强。一般而言，top_p 和 temperature 两个参数选择一个进行调整即可。
+      en_US: Control the randomness of generated results. The smaller the value, the weaker the randomness; the larger the value, the stronger the randomness. Generally speaking, you can adjust one of the two parameters top_p and temperature.
+pricing:
+  input: '0.012'
+  output: '0.012'
+  unit: '0.00001'
+  currency: RMB
--- a/api/core/model_runtime/model_providers/yi/llm/yi-vl-plus.yaml
+++ b/api/core/model_runtime/model_providers/yi/llm/yi-vl-plus.yaml
@@ -0,0 +1,43 @@
+model: yi-vl-plus
+label:
+  zh_Hans: yi-vl-plus
+  en_US: yi-vl-plus
+model_type: llm
+features:
+  - vision
+model_properties:
+  mode: chat
+  context_size: 4096
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    type: float
+    default: 0.3
+    min: 0.0
+    max: 2.0
+    help:
+      zh_Hans: 控制生成结果的多样性和随机性。数值越小，越严谨；数值越大，越发散。
+      en_US: Control the diversity and randomness of generated results. The smaller the value, the more rigorous it is; the larger the value, the more divergent it is.
+  - name: max_tokens
+    use_template: max_tokens
+    type: int
+    default: 512
+    min: 1
+    max: 4000
+    help:
+      zh_Hans: 指定生成结果长度的上限。如果生成结果截断，可以调大该参数。
+      en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
+  - name: top_p
+    use_template: top_p
+    type: float
+    default: 0.8
+    min: 0.01
+    max: 1.00
+    help:
+      zh_Hans: 控制生成结果的随机性。数值越小，随机性越弱；数值越大，随机性越强。一般而言，top_p 和 temperature 两个参数选择一个进行调整即可。
+      en_US: Control the randomness of generated results. The smaller the value, the weaker the randomness; the larger the value, the stronger the randomness. Generally speaking, you can adjust one of the two parameters top_p and temperature.
+pricing:
+  input: '0.01'
+  output: '0.03'
+  unit: '0.001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/yi/yi.py
+++ b/api/core/model_runtime/model_providers/yi/yi.py
@@ -0,0 +1,32 @@
+import logging
+
+from core.model_runtime.entities.model_entities import ModelType
+from core.model_runtime.errors.validate import CredentialsValidateFailedError
+from core.model_runtime.model_providers.__base.model_provider import ModelProvider
+
+logger = logging.getLogger(__name__)
+
+
+class YiProvider(ModelProvider):
+
+    def validate_provider_credentials(self, credentials: dict) -> None:
+        """
+        Validate provider credentials
+        if validate failed, raise exception
+
+        :param credentials: provider credentials, credentials form defined in `provider_credential_schema`.
+        """
+        try:
+            model_instance = self.get_model_instance(ModelType.LLM)
+
+            # Use `yi-34b-chat-0205` model for validate,
+            # no matter what model you pass in, text completion model or chat model
+            model_instance.validate_credentials(
+                model='yi-34b-chat-0205',
+                credentials=credentials
+            )
+        except CredentialsValidateFailedError as ex:
+            raise ex
+        except Exception as ex:
+            logger.exception(f'{self.get_provider_schema().provider} credentials validate failed')
+            raise ex
--- a/api/core/model_runtime/model_providers/yi/yi.yaml
+++ b/api/core/model_runtime/model_providers/yi/yi.yaml
@@ -0,0 +1,41 @@
+provider: yi
+label:
+  en_US: 01.AI
+  zh_Hans: 零一万物
+description:
+  en_US: Models provided by 01.AI, such as yi-34b-chat and yi-vl-plus.
+  zh_Hans: 零一万物提供的模型，例如 yi-34b-chat 和 yi-vl-plus。
+icon_small:
+  en_US: icon_s_en.svg
+icon_large:
+  en_US: icon_l_en.svg
+background: "#E9F1EC"
+help:
+  title:
+    en_US: Get your API Key from 01.ai
+    zh_Hans: 从零一万物获取 API Key
+  url:
+    en_US: https://platform.lingyiwanwu.com/apikeys
+supported_model_types:
+  - llm
+configurate_methods:
+  - predefined-model
+provider_credential_schema:
+  credential_form_schemas:
+    - variable: api_key
+      label:
+        en_US: API Key
+      type: secret-input
+      required: true
+      placeholder:
+        zh_Hans: 在此输入您的 API Key
+        en_US: Enter your API Key
+    - variable: endpoint_url
+      label:
+        zh_Hans: 自定义 API endpoint 地址
+        en_US: CUstom API endpoint URL
+      type: text-input
+      required: false
+      placeholder:
+        zh_Hans: Base URL, e.g. https://api.lingyiwanwu.com/v1
+        en_US: Base URL, e.g. https://api.lingyiwanwu.com/v1
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4.yaml
@@ -32,3 +32,8 @@ parameter_rules:
      zh_Hans: SSE接口调用时，用于控制每次返回内容方式是增量还是全量，不提供此参数时默认为增量返回，true 为增量返回，false 为全量返回。
      en_US: When the SSE interface is called, it is used to control whether the content is returned incrementally or in full. If this parameter is not provided, the default is incremental return. true means incremental return, false means full return.
    required: false
+  - name: max_tokens
+    use_template: max_tokens
+    default: 1024
+    min: 1
+    max: 8192
--- a/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v.yaml
+++ b/api/core/model_runtime/model_providers/zhipuai/llm/glm_4v.yaml
@@ -30,3 +30,8 @@ parameter_rules:
      zh_Hans: SSE接口调用时，用于控制每次返回内容方式是增量还是全量，不提供此参数时默认为增量返回，true 为增量返回，false 为全量返回。
      en_US: When the SSE interface is called, it is used to control whether the content is returned incrementally or in full. If this parameter is not provided, the default is incremental return. true means incremental return, false means full return.
    required: false
+  - name: max_tokens
+    use_template: max_tokens
+    default: 1024
+    min: 1
+    max: 8192
--- a/api/core/prompt/output_parser/suggested_questions_after_answer.py
+++ b/api/core/prompt/output_parser/suggested_questions_after_answer.py
@@ -4,7 +4,6 @@ from typing import Any

 from langchain.schema import BaseOutputParser

-from core.model_runtime.errors.invoke import InvokeError
 from core.prompt.prompts import SUGGESTED_QUESTIONS_AFTER_ANSWER_INSTRUCTION_PROMPT


@@ -14,11 +13,11 @@ class SuggestedQuestionsAfterAnswerOutputParser(BaseOutputParser):
        return SUGGESTED_QUESTIONS_AFTER_ANSWER_INSTRUCTION_PROMPT

    def parse(self, text: str) -> Any:
-        json_string = text.strip()
-        action_match = re.search(r".*(\[\".+\"\]).*", json_string, re.DOTALL)
+        action_match = re.search(r"\[.*?\]", text.strip(), re.DOTALL)
        if action_match is not None:
-            json_obj = json.loads(action_match.group(1).strip(), strict=False)
+            json_obj = json.loads(action_match.group(0).strip())
        else:
-            raise InvokeError("Could not parse LLM output: {text}")
+            json_obj= []
+            print(f"Could not parse LLM output: {text}")

        return json_obj
--- a/api/core/prompt/prompts.py
+++ b/api/core/prompt/prompts.py
@@ -1,4 +1,4 @@
-# Written by YORKI MINAKO🤡
+# Written by YORKI MINAKO🤡, Edited by Xiaoyi
 CONVERSATION_TITLE_PROMPT = """You need to decompose the user's input into "subject" and "intention" in order to accurately figure out what the user's input language actually is. 
 Notice: the language type user use could be diverse, which can be English, Chinese, Español, Arabic, Japanese, French, and etc.
 MAKE SURE your output is the SAME language as the user's input!
@@ -86,6 +86,21 @@ otherwise, it cannot exist as a variable in the variables.
 If you believe revising the original input will result in a better response from the language model, you may \
 suggest revisions.

+<<PRINCIPLES OF GOOD PROMPT>>
+Integrate the intended audience in the prompt e.g. the audience is an expert in the field.
+Break down complex tasks into a sequence of simpler prompts in an interactive conversation.
+Implement example-driven prompting (Use few-shot prompting). 
+When formatting your prompt start with Instruction followed by either Example if relevant. \
+Subsequently present your content. Use one or more line breaks to separate instructions examples questions context and input data.
+Incorporate the following phrases: “Your task is” and “You MUST”.
+Incorporate the following phrases: “You will be penalized”.
+Use leading words like writing “think step by step”.
+Add to your prompt the following phrase “Ensure that your answer is unbiased and does not rely on stereotypes”.
+Assign a role to the large language models.
+Use Delimiters.
+To write an essay /text /paragraph /article or any type of text that should be detailed: “Write a detailed [essay/text/paragraph] for me on [topic] in detail by adding all the information necessary”.
+Clearly state the requirements that the model must follow in order to produce content in the form of the keywords regulations hint or instructions
+
 << FORMATTING >>
 Return a markdown code snippet with a JSON object formatted to look like, \
 no any other string out of markdown code snippet:
@@ -102,27 +117,18 @@ and fill in variables, with a welcome sentence, and keep TLDR.
 [EXAMPLE A]
 ```json
 {
-  "prompt": "Write a letter about love",
-  "variables": [],
-  "opening_statement": "Hi! I'm your love letter writer AI."
+  "prompt": "I need your help to translate the following {{Input_language}}paper paragraph into {{Target_language}}, in a style similar to a popular science magazine in {{Target_language}}. #### Rules Ensure accurate conveyance of the original text's facts and context during translation. Maintain the original paragraph format and retain technical terms and company abbreviations ",
+  "variables": ["Input_language", "Target_language"],
+  "opening_statement": " Hi. I am your translation assistant. I can help you with any translation and ensure accurate conveyance of information. "
 }
 ```

 [EXAMPLE B]
 ```json
 {
-  "prompt": "Translate from {{lanA}} to {{lanB}}",
-  "variables": ["lanA", "lanB"],
-  "opening_statement": "Welcome to use translate app"
-}
-```
-
-[EXAMPLE C]
-```json
-{
-  "prompt": "Write a story about {{topic}}",
-  "variables": ["topic"],
-  "opening_statement": "I'm your story writer"
+  "prompt": "Your task is to review the provided meeting notes and create a concise summary that captures the essential information, focusing on key takeaways and action items assigned to specific individuals or departments during the meeting. Use clear and professional language, and organize the summary in a logical manner using appropriate formatting such as headings, subheadings, and bullet points. Ensure that the summary is easy to understand and provides a comprehensive but succinct overview of the meeting's content, with a particular focus on clearly indicating who is responsible for each action item.",
+  "variables": ["meeting_notes"],
+  "opening_statement": "Hi! I'm your meeting notes summarizer AI. I can help you with any meeting notes and ensure accurate conveyance of information."
 }
 ```

--- a/api/core/rag/extractor/excel_extractor.py
+++ b/api/core/rag/extractor/excel_extractor.py
@@ -29,10 +29,10 @@ class ExcelExtractor(BaseExtractor):
    def extract(self) -> list[Document]:
        """Load from file path."""
        data = []
-        keys = []
        wb = load_workbook(filename=self._file_path, read_only=True)
        # loop over all sheets
        for sheet in wb:
+            keys = []
            if 'A1:A1' == sheet.calculate_dimension():
                sheet.reset_dimensions()
            for row in sheet.iter_rows(values_only=True):
--- a/api/core/rag/index_processor/processor/paragraph_index_processor.py
+++ b/api/core/rag/index_processor/processor/paragraph_index_processor.py
@@ -45,11 +45,12 @@ class ParagraphIndexProcessor(BaseIndexProcessor):
                    # delete Spliter character
                    page_content = document_node.page_content
                    if page_content.startswith(".") or page_content.startswith("。"):
-                        page_content = page_content[1:]
+                        page_content = page_content[1:].strip()
                    else:
                        page_content = page_content
-                    document_node.page_content = page_content
-                    split_documents.append(document_node)
+                    if len(page_content) > 0:
+                        document_node.page_content = page_content
+                        split_documents.append(document_node)
            all_documents.extend(split_documents)
        return all_documents

--- a/api/core/tools/entities/tool_entities.py
+++ b/api/core/tools/entities/tool_entities.py
@@ -171,6 +171,7 @@ class ToolProviderCredentials(BaseModel):
        SECRET_INPUT = "secret-input"
        TEXT_INPUT = "text-input"
        SELECT = "select"
+        BOOLEAN = "boolean"

        @classmethod
        def value_of(cls, value: str) -> "ToolProviderCredentials.CredentialsType":
@@ -192,7 +193,7 @@ class ToolProviderCredentials(BaseModel):
    name: str = Field(..., description="The name of the credentials")
    type: CredentialsType = Field(..., description="The type of the credentials")
    required: bool = False
-    default: Optional[str] = None
+    default: Optional[Union[int, str]] = None
    options: Optional[list[ToolCredentialsOption]] = None
    label: Optional[I18nObject] = None
    help: Optional[I18nObject] = None
--- a/api/core/tools/provider/builtin/_positions.py
+++ b/api/core/tools/provider/builtin/_positions.py
@@ -1,8 +1,7 @@
 import os.path

-from yaml import FullLoader, load
-
 from core.tools.entities.user_entities import UserToolProvider
+from core.utils.position_helper import get_position_map, sort_by_position_map


 class BuiltinToolProviderSort:
@@ -11,18 +10,14 @@ class BuiltinToolProviderSort:
    @classmethod
    def sort(cls, providers: list[UserToolProvider]) -> list[UserToolProvider]:
        if not cls._position:
-            tmp_position = {}
-            file_path = os.path.join(os.path.dirname(__file__), '..', '_position.yaml')
-            with open(file_path) as f:
-                for pos, val in enumerate(load(f, Loader=FullLoader)):
-                    tmp_position[val] = pos
-            cls._position = tmp_position
+            cls._position = get_position_map(os.path.join(os.path.dirname(__file__), '..'))

-        def sort_compare(provider: UserToolProvider) -> int:
+        def name_func(provider: UserToolProvider) -> str:
            if provider.type == UserToolProvider.ProviderType.MODEL:
-                return cls._position.get(f'model.{provider.name}', 10000)
-            return cls._position.get(provider.name, 10000)
-        
-        sorted_providers = sorted(providers, key=sort_compare)
+                return f'model.{provider.name}'
+            else:
+                return provider.name
+
+        sorted_providers = sort_by_position_map(cls._position, providers, name_func)

        return sorted_providers
--- a/api/core/tools/provider/builtin/bing/bing.py
+++ b/api/core/tools/provider/builtin/bing/bing.py
@@ -12,12 +12,11 @@ class BingProvider(BuiltinToolProviderController):
                meta={
                    "credentials": credentials,
                }
-            ).invoke(
-                user_id='',
+            ).validate_credentials(
+                credentials=credentials,
                tool_parameters={
                    "query": "test",
                    "result_type": "link",
-                    "enable_webpages": True,
                },
            )
        except Exception as e:
--- a/api/core/tools/provider/builtin/bing/bing.yaml
+++ b/api/core/tools/provider/builtin/bing/bing.yaml
@@ -43,3 +43,63 @@ credentials_for_provider:
      zh_Hans: 例如 "https://api.bing.microsoft.com/v7.0/search"
      pt_BR: An endpoint is like "https://api.bing.microsoft.com/v7.0/search"
    default: https://api.bing.microsoft.com/v7.0/search
+  allow_entities:
+    type: boolean
+    required: false
+    label:
+      en_US: Allow Entities Search
+      zh_Hans: 支持实体搜索
+      pt_BR: Allow Entities Search
+    help:
+      en_US: Does your subscription plan allow entity search
+      zh_Hans: 您的订阅计划是否支持实体搜索
+      pt_BR: Does your subscription plan allow entity search
+    default: true
+  allow_web_pages:
+    type: boolean
+    required: false
+    label:
+      en_US: Allow Web Pages Search
+      zh_Hans: 支持网页搜索
+      pt_BR: Allow Web Pages Search
+    help:
+      en_US: Does your subscription plan allow web pages search
+      zh_Hans: 您的订阅计划是否支持网页搜索
+      pt_BR: Does your subscription plan allow web pages search
+    default: true
+  allow_computation:
+    type: boolean
+    required: false
+    label:
+      en_US: Allow Computation Search
+      zh_Hans: 支持计算搜索
+      pt_BR: Allow Computation Search
+    help:
+      en_US: Does your subscription plan allow computation search
+      zh_Hans: 您的订阅计划是否支持计算搜索
+      pt_BR: Does your subscription plan allow computation search
+    default: false
+  allow_news:
+    type: boolean
+    required: false
+    label:
+      en_US: Allow News Search
+      zh_Hans: 支持新闻搜索
+      pt_BR: Allow News Search
+    help:
+      en_US: Does your subscription plan allow news search
+      zh_Hans: 您的订阅计划是否支持新闻搜索
+      pt_BR: Does your subscription plan allow news search
+    default: false
+  allow_related_searches:
+    type: boolean
+    required: false
+    label:
+      en_US: Allow Related Searches
+      zh_Hans: 支持相关搜索
+      pt_BR: Allow Related Searches
+    help:
+      en_US: Does your subscription plan allow related searches
+      zh_Hans: 您的订阅计划是否支持相关搜索
+      pt_BR: Does your subscription plan allow related searches
+    default: false
--- a/api/core/tools/provider/builtin/bing/tools/bing_web_search.py
+++ b/api/core/tools/provider/builtin/bing/tools/bing_web_search.py
@@ -10,53 +10,23 @@ from core.tools.tool.builtin_tool import BuiltinTool
 class BingSearchTool(BuiltinTool):
    url = 'https://api.bing.microsoft.com/v7.0/search'

-    def _invoke(self, 
-                user_id: str,
-               tool_parameters: dict[str, Any], 
-        ) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
+    def _invoke_bing(self, 
+                     user_id: str,
+                     subscription_key: str, query: str, limit: int, 
+                     result_type: str, market: str, lang: str, 
+                     filters: list[str]) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
        """
-            invoke tools
+            invoke bing search
        """
-
-        key = self.runtime.credentials.get('subscription_key', None)
-        if not key:
-            raise Exception('subscription_key is required')
-        
-        server_url = self.runtime.credentials.get('server_url', None)
-        if not server_url:
-            server_url = self.url
-        
-        query = tool_parameters.get('query', None)
-        if not query:
-            raise Exception('query is required')
-        
-        limit = min(tool_parameters.get('limit', 5), 10)
-        result_type = tool_parameters.get('result_type', 'text') or 'text'
-        
-        market = tool_parameters.get('market', 'US')
-        lang = tool_parameters.get('language', 'en')
-        filter = []
-
-        if tool_parameters.get('enable_computation', False):
-            filter.append('Computation')
-        if tool_parameters.get('enable_entities', False):
-            filter.append('Entities')
-        if tool_parameters.get('enable_news', False):
-            filter.append('News')
-        if tool_parameters.get('enable_related_search', False):
-            filter.append('RelatedSearches')
-        if tool_parameters.get('enable_webpages', False):
-            filter.append('WebPages')
-
        market_code = f'{lang}-{market}'
        accept_language = f'{lang},{market_code};q=0.9'
        headers = {
-            'Ocp-Apim-Subscription-Key': key,
+            'Ocp-Apim-Subscription-Key': subscription_key,
            'Accept-Language': accept_language
        }

        query = quote(query)
-        server_url = f'{server_url}?q={query}&mkt={market_code}&count={limit}&responseFilter={",".join(filter)}'
+        server_url = f'{self.url}?q={query}&mkt={market_code}&count={limit}&responseFilter={",".join(filters)}'
        response = get(server_url, headers=headers)

        if response.status_code != 200:
@@ -124,3 +94,105 @@ class BingSearchTool(BuiltinTool):
                    text += f'{related["displayText"]} - {related["webSearchUrl"]}\n'

            return self.create_text_message(text=self.summary(user_id=user_id, content=text))
+        
+
+    def validate_credentials(self, credentials: dict[str, Any], tool_parameters: dict[str, Any]) -> None:
+        key = credentials.get('subscription_key', None)
+        if not key:
+            raise Exception('subscription_key is required')
+        
+        server_url = credentials.get('server_url', None)
+        if not server_url:
+            server_url = self.url
+
+        query = tool_parameters.get('query', None)
+        if not query:
+            raise Exception('query is required')
+        
+        limit = min(tool_parameters.get('limit', 5), 10)
+        result_type = tool_parameters.get('result_type', 'text') or 'text'
+
+        market = tool_parameters.get('market', 'US')
+        lang = tool_parameters.get('language', 'en')
+        filter = []
+
+        if credentials.get('allow_entities', False):
+            filter.append('Entities')
+
+        if credentials.get('allow_computation', False):
+            filter.append('Computation')
+
+        if credentials.get('allow_news', False):
+            filter.append('News')
+
+        if credentials.get('allow_related_searches', False):
+            filter.append('RelatedSearches')
+
+        if credentials.get('allow_web_pages', False):
+            filter.append('WebPages')
+
+        if not filter:
+            raise Exception('At least one filter is required')
+        
+        self._invoke_bing(
+            user_id='test',
+            subscription_key=key,
+            query=query,
+            limit=limit,
+            result_type=result_type,
+            market=market,
+            lang=lang,
+            filters=filter
+        )
+        
+    def _invoke(self, 
+                user_id: str,
+               tool_parameters: dict[str, Any], 
+        ) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
+        """
+            invoke tools
+        """
+
+        key = self.runtime.credentials.get('subscription_key', None)
+        if not key:
+            raise Exception('subscription_key is required')
+        
+        server_url = self.runtime.credentials.get('server_url', None)
+        if not server_url:
+            server_url = self.url
+        
+        query = tool_parameters.get('query', None)
+        if not query:
+            raise Exception('query is required')
+        
+        limit = min(tool_parameters.get('limit', 5), 10)
+        result_type = tool_parameters.get('result_type', 'text') or 'text'
+        
+        market = tool_parameters.get('market', 'US')
+        lang = tool_parameters.get('language', 'en')
+        filter = []
+
+        if tool_parameters.get('enable_computation', False):
+            filter.append('Computation')
+        if tool_parameters.get('enable_entities', False):
+            filter.append('Entities')
+        if tool_parameters.get('enable_news', False):
+            filter.append('News')
+        if tool_parameters.get('enable_related_search', False):
+            filter.append('RelatedSearches')
+        if tool_parameters.get('enable_webpages', False):
+            filter.append('WebPages')
+
+        if not filter:
+            raise Exception('At least one filter is required')
+        
+        return self._invoke_bing(
+            user_id=user_id,
+            subscription_key=key,
+            query=query,
+            limit=limit,
+            result_type=result_type,
+            market=market,
+            lang=lang,
+            filters=filter
+        )
--- a/api/core/tools/provider/builtin/chart/chart.py
+++ b/api/core/tools/provider/builtin/chart/chart.py
@@ -1,4 +1,6 @@
 import matplotlib.pyplot as plt
+from fontTools.ttLib import TTFont
+from matplotlib.font_manager import findSystemFonts

 from core.tools.errors import ToolProviderCredentialValidationError
 from core.tools.provider.builtin.chart.tools.line import LinearChartTool
@@ -6,6 +8,37 @@ from core.tools.provider.builtin_tool_provider import BuiltinToolProviderControl

 # use a business theme
 plt.style.use('seaborn-v0_8-darkgrid')
+plt.rcParams['axes.unicode_minus'] = False
+
+def init_fonts():
+    fonts = findSystemFonts()
+
+    popular_unicode_fonts = [
+        'Arial Unicode MS', 'DejaVu Sans', 'DejaVu Sans Mono', 'DejaVu Serif', 'FreeMono', 'FreeSans', 'FreeSerif',
+        'Liberation Mono', 'Liberation Sans', 'Liberation Serif', 'Noto Mono', 'Noto Sans', 'Noto Serif', 'Open Sans',
+        'Roboto', 'Source Code Pro', 'Source Sans Pro', 'Source Serif Pro', 'Ubuntu', 'Ubuntu Mono'
+    ]
+
+    supported_fonts = []
+
+    for font_path in fonts:
+        try:
+            font = TTFont(font_path)
+            # get family name
+            family_name = font['name'].getName(1, 3, 1).toUnicode()
+            if family_name in popular_unicode_fonts:
+                supported_fonts.append(family_name)
+        except:
+            pass
+
+    plt.rcParams['font.family'] = 'sans-serif'
+    # sort by order of popular_unicode_fonts
+    for font in popular_unicode_fonts:
+        if font in supported_fonts:
+            plt.rcParams['font.sans-serif'] = font
+            break
+    
+init_fonts()

 class ChartProvider(BuiltinToolProviderController):
    def _validate_credentials(self, credentials: dict) -> None:
--- a/api/core/tools/provider/builtin/openweather/_assets/icon.svg
+++ b/api/core/tools/provider/builtin/openweather/_assets/icon.svg
@@ -0,0 +1,12 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 16 16" fill="none">
+  <g clip-path="url(#clip0_16624_62807)">
+    <path d="M7.11111 0.888889C7.11111 0.888889 7.11111 0 8 0C8.88889 0 8.88889 0.888889 8.88889 0.888889V1.77778C8.88889 1.77778 8.88889 2.66667 8 2.66667C7.11111 2.66667 7.11111 1.77778 7.11111 1.77778V0.888889ZM15.1111 7.11111C15.1111 7.11111 16 7.11111 16 8C16 8.88889 15.1111 8.88889 15.1111 8.88889H14.2222C14.2222 8.88889 13.3333 8.88889 13.3333 8C13.3333 7.11111 14.2222 7.11111 14.2222 7.11111H15.1111ZM1.77778 7.11111C1.77778 7.11111 2.66667 7.11111 2.66667 8C2.66667 8.88889 1.77778 8.88889 1.77778 8.88889H0.888889C0.888889 8.88889 0 8.88889 0 8C0 7.11111 0.888889 7.11111 0.888889 7.11111H1.77778ZM4.05378 3.24133C4.05378 3.24133 4.68222 3.86978 4.05378 4.49822C3.42533 5.12667 2.79689 4.49822 2.79689 4.49822L2.168 3.87022C2.168 3.87022 1.53956 3.24178 2.168 2.61289C2.79689 1.98444 3.42533 2.61289 3.42533 2.61289L4.05378 3.24133ZM13.2036 4.49822C13.2036 4.49822 12.5751 5.12667 11.9467 4.49822C11.3182 3.86978 11.9467 3.24133 11.9467 3.24133L12.5751 2.61289C12.5751 2.61289 13.2036 1.98444 13.832 2.61289C14.4604 3.24133 13.832 3.86978 13.832 3.86978L13.2036 4.49822ZM3.87022 13.8316C3.87022 13.8316 3.24178 14.46 2.61333 13.8316C1.98489 13.2031 2.61333 12.5747 2.61333 12.5747L3.24178 11.9462C3.24178 11.9462 3.87022 11.3178 4.49867 11.9462C5.12711 12.5747 4.49867 13.2031 4.49867 13.2031L3.87022 13.8316Z" fill="#FFCF27"/>
+    <path d="M8.00011 12.4446C10.4547 12.4446 12.4446 10.4547 12.4446 8.00011C12.4446 5.54551 10.4547 3.55566 8.00011 3.55566C5.54551 3.55566 3.55566 5.54551 3.55566 8.00011C3.55566 10.4547 5.54551 12.4446 8.00011 12.4446Z" fill="#FFCB13"/>
+    <path d="M13.2343 10.3111C12.949 10.3111 12.6743 10.3556 12.4152 10.4378C12.1094 9.53647 11.2774 8.88892 10.2966 8.88892C9.24411 8.88892 8.36322 9.63469 8.11922 10.6387C7.85878 10.436 7.53744 10.3116 7.18544 10.3116C6.32633 10.3116 5.62989 11.0276 5.62989 11.9116C5.62989 12.1262 5.67255 12.3298 5.74722 12.5174C5.59878 12.4742 5.44544 12.4445 5.28411 12.4445C4.32944 12.4445 3.55566 13.2405 3.55566 14.2222C3.55566 15.204 4.32944 16 5.28411 16H13.2348C14.7619 16 16.0001 14.7271 16.0001 13.1556C16.0001 11.5845 14.7619 10.3111 13.2343 10.3111Z" fill="#E9F6FF"/>
+  </g>
+  <defs>
+    <clipPath id="clip0_16624_62807">
+      <rect width="16" height="16" fill="white"/>
+    </clipPath>
+  </defs>
+</svg>
--- a/api/core/tools/provider/builtin/openweather/openweather.py
+++ b/api/core/tools/provider/builtin/openweather/openweather.py
@@ -0,0 +1,36 @@
+import requests
+
+from core.tools.errors import ToolProviderCredentialValidationError
+from core.tools.provider.builtin_tool_provider import BuiltinToolProviderController
+
+
+def query_weather(city="Beijing", units="metric", language="zh_cn", api_key=None):
+
+    url = "https://api.openweathermap.org/data/2.5/weather"
+    params = {"q": city, "appid": api_key, "units": units, "lang": language}
+
+    return requests.get(url, params=params)
+
+
+class OpenweatherProvider(BuiltinToolProviderController):
+    def _validate_credentials(self, credentials: dict) -> None:
+        try:
+            if "api_key" not in credentials or not credentials.get("api_key"):
+                raise ToolProviderCredentialValidationError(
+                    "Open weather API key is required."
+                )
+            apikey = credentials.get("api_key")
+            try:
+                response = query_weather(api_key=apikey)
+                if response.status_code == 200:
+                    pass
+                else:
+                    raise ToolProviderCredentialValidationError(
+                        (response.json()).get("info")
+                    )
+            except Exception as e:
+                raise ToolProviderCredentialValidationError(
+                    "Open weather API Key is invalid. {}".format(e)
+                )
+        except Exception as e:
+            raise ToolProviderCredentialValidationError(str(e))
--- a/api/core/tools/provider/builtin/openweather/openweather.yaml
+++ b/api/core/tools/provider/builtin/openweather/openweather.yaml
@@ -0,0 +1,29 @@
+identity:
+  author: Onelevenvy
+  name: openweather
+  label:
+    en_US: Open weather query
+    zh_Hans: Open Weather
+    pt_BR: Consulta de clima open weather
+  description:
+    en_US: Weather query toolkit based on Open Weather
+    zh_Hans: 基于open weather的天气查询工具包
+    pt_BR: Kit de consulta de clima baseado no Open Weather
+  icon: icon.svg
+credentials_for_provider:
+  api_key:
+    type: secret-input
+    required: true
+    label:
+      en_US: API Key
+      zh_Hans: API Key
+      pt_BR: Fogo a chave
+    placeholder:
+      en_US: Please enter your open weather API Key
+      zh_Hans: 请输入你的open weather API Key
+      pt_BR: Insira sua chave de API open weather
+    help:
+      en_US: Get your API Key from open weather
+      zh_Hans: 从open weather获取您的 API Key
+      pt_BR: Obtenha sua chave de API do open weather
+    url: https://openweathermap.org
--- a/api/core/tools/provider/builtin/openweather/tools/weather.py
+++ b/api/core/tools/provider/builtin/openweather/tools/weather.py
@@ -0,0 +1,60 @@
+import json
+from typing import Any, Union
+
+import requests
+
+from core.tools.entities.tool_entities import ToolInvokeMessage
+from core.tools.tool.builtin_tool import BuiltinTool
+
+
+class OpenweatherTool(BuiltinTool):
+    def _invoke(
+        self, user_id: str, tool_parameters: dict[str, Any]
+    ) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
+        """
+        invoke tools
+        """
+        city = tool_parameters.get("city", "")
+        if not city:
+            return self.create_text_message("Please tell me your city")
+        if (
+            "api_key" not in self.runtime.credentials
+            or not self.runtime.credentials.get("api_key")
+        ):
+            return self.create_text_message("OpenWeather API key is required.")
+
+        units = tool_parameters.get("units", "metric")
+        lang = tool_parameters.get("lang", "zh_cn")
+        try:
+            # request URL
+            url = "https://api.openweathermap.org/data/2.5/weather"
+
+            # request parmas
+            params = {
+                "q": city,
+                "appid": self.runtime.credentials.get("api_key"),
+                "units": units,
+                "lang": lang,
+            }
+            response = requests.get(url, params=params)
+
+            if response.status_code == 200:
+
+                data = response.json()
+                return self.create_text_message(
+                    self.summary(
+                        user_id=user_id, content=json.dumps(data, ensure_ascii=False)
+                    )
+                )
+            else:
+                error_message = {
+                    "error": f"failed:{response.status_code}",
+                    "data": response.text,
+                }
+                # return error
+                return json.dumps(error_message)
+
+        except Exception as e:
+            return self.create_text_message(
+                "Openweather API Key is invalid. {}".format(e)
+            )
--- a/api/core/tools/provider/builtin/openweather/tools/weather.yaml
+++ b/api/core/tools/provider/builtin/openweather/tools/weather.yaml
@@ -0,0 +1,80 @@
+identity:
+  name: weather
+  author: Onelevenvy
+  label:
+    en_US: Open Weather Query
+    zh_Hans: 天气查询
+    pt_BR: Previsão do tempo
+  icon: icon.svg
+description:
+  human:
+    en_US: Weather forecast inquiry
+    zh_Hans: 天气查询
+    pt_BR: Inquérito sobre previsão meteorológica
+  llm: A tool when you want to ask about the weather or weather-related question
+parameters:
+  - name: city
+    type: string
+    required: true
+    label:
+      en_US: city
+      zh_Hans: 城市
+      pt_BR: cidade
+    human_description:
+      en_US: Target city for weather forecast query
+      zh_Hans: 天气预报查询的目标城市
+      pt_BR: Cidade de destino para consulta de previsão do tempo
+    llm_description: If you don't know you can extract the city name from the
+      question or you can reply：Please tell me your city. You have to extract
+      the Chinese city name from the question.If the input region is in Chinese
+      characters for China, it should be replaced with the corresponding English
+      name, such as '北京' for correct input is 'Beijing'
+    form: llm
+  - name: lang
+    type: select
+    required: true
+    human_description:
+      en_US: language
+      zh_Hans: 语言
+      pt_BR: language
+    label:
+      en_US: language
+      zh_Hans: 语言
+      pt_BR: language
+    form: form
+    options:
+      - value: zh_cn
+        label:
+          en_US: cn
+          zh_Hans: 中国
+          pt_BR: cn
+      - value: en_us
+        label:
+          en_US: usa
+          zh_Hans: 美国
+          pt_BR: usa
+    default: zh_cn
+  - name: units
+    type: select
+    required: true
+    human_description:
+      en_US: units for temperature
+      zh_Hans: 温度单位
+      pt_BR: units for temperature
+    label:
+      en_US: units
+      zh_Hans: 单位
+      pt_BR: units
+    form: form
+    options:
+      - value: metric
+        label:
+          en_US: metric
+          zh_Hans: ℃
+          pt_BR: metric
+      - value: imperial
+        label:
+          en_US: imperial
+          zh_Hans: ℉
+          pt_BR: imperial
+    default: metric
--- a/api/core/tools/provider/builtin/spark/init.py
+++ b/api/core/tools/provider/builtin/spark/init.py
--- a/api/core/tools/provider/builtin/spark/_assets/icon.svg
+++ b/api/core/tools/provider/builtin/spark/_assets/icon.svg
@@ -0,0 +1,5 @@
+<svg width="24" height="24" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg">
+<path d="M21.6547 16.7993C21.3111 18.0034 20.7384 19.0938 20.0054 20.048C18.9058 21.4111 15.1261 21.4111 12.8583 20.8204C10.4072 20.1616 8.6433 18.6395 8.50586 18.5259C9.46797 19.2756 10.6821 19.7072 12.0107 19.7072C15.1948 19.7072 17.7605 17.1174 17.7605 13.9368C17.7605 12.9826 17.5314 12.0966 17.119 11.3015C17.0961 11.2561 17.1419 11.2106 17.1649 11.2333C18.9745 11.5287 22.571 13.2098 21.6547 16.7993Z" fill="#2751D0"/>
+<path d="M21.9994 12.7773C21.9994 12.8454 21.9306 12.8682 21.8848 12.8C21.0372 11.0053 19.5483 10.46 17.7615 10.0511C16.4099 9.75577 15.5166 9.3014 15.1271 9.09694C15.0355 9.0515 14.9668 8.98335 14.8751 8.93791C12.0575 7.23404 12.0117 4.30339 12.0117 4.30339V0.0550813C12.0117 0.00964486 12.0804 -0.0130733 12.1034 0.0096449L18.7694 6.50706L19.2734 6.98414C20.7394 8.52898 21.7474 10.5509 21.9994 12.7773Z" fill="#D82F20"/>
+<path d="M20.0052 20.0462C18.1726 22.4316 15.2863 23.9992 12.0334 23.9992C6.48985 23.9992 2 19.501 2 13.9577C2 11.2543 3.05374 8.8234 4.7947 7.00594L5.29866 6.50614L9.65107 2.25783C9.69688 2.2124 9.7656 2.25783 9.7427 2.30327C9.67397 2.59861 9.55944 3.28015 9.62816 4.18888C9.71979 5.25664 10.0634 6.68789 11.0713 8.27817C11.6898 9.27777 12.5832 10.3228 13.8202 11.4133C13.9577 11.5496 14.118 11.6632 14.2784 11.7995C14.8281 12.3674 15.1488 13.1171 15.1488 13.9577C15.1488 15.6616 13.7515 17.0474 12.0563 17.0474C11.3233 17.0474 10.659 16.7975 10.1321 16.3659C10.0863 16.3204 10.1321 16.2523 10.1779 16.275C10.2925 16.2977 10.407 16.3204 10.5215 16.3204C11.1171 16.3204 11.6211 15.8433 11.6211 15.2299C11.6211 14.8665 11.4378 14.5257 11.163 14.3439C10.4299 13.7533 9.81142 13.1853 9.28455 12.6173C8.55151 11.8222 8.00174 11.0498 7.61231 10.3001C6.81055 11.2997 6.30659 12.5492 6.30659 13.935C6.30659 15.7979 7.17707 17.4563 8.55152 18.5014C8.68896 18.615 10.4528 20.1371 12.9039 20.7959C15.1259 21.432 18.9057 21.4093 20.0052 20.0462Z" fill="#69C5F4"/>
+</svg>
--- a/api/core/tools/provider/builtin/spark/spark.py
+++ b/api/core/tools/provider/builtin/spark/spark.py
@@ -0,0 +1,40 @@
+import json
+
+from core.tools.errors import ToolProviderCredentialValidationError
+from core.tools.provider.builtin.spark.tools.spark_img_generation import spark_response
+from core.tools.provider.builtin_tool_provider import BuiltinToolProviderController
+
+
+class SparkProvider(BuiltinToolProviderController):
+    def _validate_credentials(self, credentials: dict) -> None:
+        try:
+            if "APPID" not in credentials or not credentials.get("APPID"):
+                raise ToolProviderCredentialValidationError("APPID is required.")
+            if "APISecret" not in credentials or not credentials.get("APISecret"):
+                raise ToolProviderCredentialValidationError("APISecret is required.")
+            if "APIKey" not in credentials or not credentials.get("APIKey"):
+                raise ToolProviderCredentialValidationError("APIKey is required.")
+
+            appid = credentials.get("APPID")
+            apisecret = credentials.get("APISecret")
+            apikey = credentials.get("APIKey")
+            prompt = "a cute black dog"
+
+            try:
+                response = spark_response(prompt, appid, apikey, apisecret)
+                data = json.loads(response)
+                code = data["header"]["code"]
+
+                if code == 0:
+                    #  0 success，
+                    pass
+                else:
+                    raise ToolProviderCredentialValidationError(
+                        "image generate error, code:{}".format(code)
+                    )
+            except Exception as e:
+                raise ToolProviderCredentialValidationError(
+                    "APPID APISecret APIKey is invalid. {}".format(e)
+                )
+        except Exception as e:
+            raise ToolProviderCredentialValidationError(str(e))
--- a/api/core/tools/provider/builtin/spark/spark.yaml
+++ b/api/core/tools/provider/builtin/spark/spark.yaml
@@ -0,0 +1,59 @@
+identity:
+  author: Onelevenvy
+  name: spark
+  label:
+    en_US: Spark
+    zh_Hans: 讯飞星火
+    pt_BR: Spark
+  description:
+    en_US: Spark Platform Toolkit
+    zh_Hans: 讯飞星火平台工具
+    pt_BR: Pacote de Ferramentas da Plataforma Spark
+  icon: icon.svg
+credentials_for_provider:
+  APPID:
+    type: secret-input
+    required: true
+    label:
+      en_US: Spark APPID
+      zh_Hans: APPID
+      pt_BR: Spark APPID
+    help:
+      en_US: Please input your  APPID
+      zh_Hans: 请输入你的 APPID
+      pt_BR: Please input your APPID
+    placeholder:
+      en_US: Please input your APPID
+      zh_Hans: 请输入你的 APPID
+      pt_BR: Please input your APPID
+  APISecret:
+    type: secret-input
+    required: true
+    label:
+      en_US: Spark APISecret
+      zh_Hans: APISecret
+      pt_BR: Spark APISecret
+    help:
+      en_US: Please input your Spark APISecret
+      zh_Hans: 请输入你的 APISecret
+      pt_BR: Please input your Spark APISecret
+    placeholder:
+      en_US: Please input your Spark APISecret
+      zh_Hans: 请输入你的 APISecret
+      pt_BR: Please input your Spark APISecret
+  APIKey:
+    type: secret-input
+    required: true
+    label:
+      en_US: Spark APIKey
+      zh_Hans: APIKey
+      pt_BR: Spark APIKey
+    help:
+      en_US: Please input your Spark APIKey
+      zh_Hans: 请输入你的 APIKey
+      pt_BR: Please input your Spark APIKey
+    placeholder:
+      en_US: Please input your Spark APIKey
+      zh_Hans: 请输入你的 APIKey
+      pt_BR: Please input Spark APIKey
+    url: https://console.xfyun.cn/services
--- a/api/core/tools/provider/builtin/spark/tools/spark_img_generation.py
+++ b/api/core/tools/provider/builtin/spark/tools/spark_img_generation.py
@@ -0,0 +1,154 @@
+import base64
+import hashlib
+import hmac
+import json
+from base64 import b64decode
+from datetime import datetime
+from time import mktime
+from typing import Any, Union
+from urllib.parse import urlencode
+from wsgiref.handlers import format_date_time
+
+import requests
+
+from core.tools.entities.tool_entities import ToolInvokeMessage
+from core.tools.tool.builtin_tool import BuiltinTool
+
+
+class AssembleHeaderException(Exception):
+    def __init__(self, msg):
+        self.message = msg
+
+
+class Url:
+    def __init__(this, host, path, schema):
+        this.host = host
+        this.path = path
+        this.schema = schema
+
+
+# calculate sha256 and encode to base64
+def sha256base64(data):
+    sha256 = hashlib.sha256()
+    sha256.update(data)
+    digest = base64.b64encode(sha256.digest()).decode(encoding="utf-8")
+    return digest
+
+
+def parse_url(requset_url):
+    stidx = requset_url.index("://")
+    host = requset_url[stidx + 3 :]
+    schema = requset_url[: stidx + 3]
+    edidx = host.index("/")
+    if edidx <= 0:
+        raise AssembleHeaderException("invalid request url:" + requset_url)
+    path = host[edidx:]
+    host = host[:edidx]
+    u = Url(host, path, schema)
+    return u
+
+def assemble_ws_auth_url(requset_url, method="GET", api_key="", api_secret=""):
+    u = parse_url(requset_url)
+    host = u.host
+    path = u.path
+    now = datetime.now()
+    date = format_date_time(mktime(now.timetuple()))
+    signature_origin = "host: {}\ndate: {}\n{} {} HTTP/1.1".format(
+        host, date, method, path
+    )
+    signature_sha = hmac.new(
+        api_secret.encode("utf-8"),
+        signature_origin.encode("utf-8"),
+        digestmod=hashlib.sha256,
+    ).digest()
+    signature_sha = base64.b64encode(signature_sha).decode(encoding="utf-8")
+    authorization_origin = f'api_key="{api_key}", algorithm="hmac-sha256", headers="host date request-line", signature="{signature_sha}"'
+
+    authorization = base64.b64encode(authorization_origin.encode("utf-8")).decode(
+        encoding="utf-8"
+    )
+    values = {"host": host, "date": date, "authorization": authorization}
+
+    return requset_url + "?" + urlencode(values)
+
+
+def get_body(appid, text):
+    body = {
+        "header": {"app_id": appid, "uid": "123456789"},
+        "parameter": {
+            "chat": {"domain": "general", "temperature": 0.5, "max_tokens": 4096}
+        },
+        "payload": {"message": {"text": [{"role": "user", "content": text}]}},
+    }
+    return body
+
+
+def spark_response(text, appid, apikey, apisecret):
+    host = "http://spark-api.cn-huabei-1.xf-yun.com/v2.1/tti"
+    url = assemble_ws_auth_url(
+        host, method="POST", api_key=apikey, api_secret=apisecret
+    )
+    content = get_body(appid, text)
+    response = requests.post(
+        url, json=content, headers={"content-type": "application/json"}
+    ).text
+    return response
+
+
+class SparkImgGeneratorTool(BuiltinTool):
+    def _invoke(
+        self,
+        user_id: str,
+        tool_parameters: dict[str, Any],
+    ) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
+        """
+        invoke tools
+        """
+
+        if "APPID" not in self.runtime.credentials or not self.runtime.credentials.get(
+            "APPID"
+        ):
+            return self.create_text_message("APPID  is required.")
+        if (
+            "APISecret" not in self.runtime.credentials
+            or not self.runtime.credentials.get("APISecret")
+        ):
+            return self.create_text_message("APISecret  is required.")
+        if (
+            "APIKey" not in self.runtime.credentials
+            or not self.runtime.credentials.get("APIKey")
+        ):
+            return self.create_text_message("APIKey  is required.")
+
+        prompt = tool_parameters.get("prompt", "")
+        if not prompt:
+            return self.create_text_message("Please input prompt")
+        res = self.img_generation(prompt)
+        result = []
+        for image in res:
+            result.append(
+                self.create_blob_message(
+                    blob=b64decode(image["base64_image"]),
+                    meta={"mime_type": "image/png"},
+                    save_as=self.VARIABLE_KEY.IMAGE.value,
+                )
+            )
+        return result
+
+    def img_generation(self, prompt):
+        response = spark_response(
+            text=prompt,
+            appid=self.runtime.credentials.get("APPID"),
+            apikey=self.runtime.credentials.get("APIKey"),
+            apisecret=self.runtime.credentials.get("APISecret"),
+        )
+        data = json.loads(response)
+        code = data["header"]["code"]
+        if code != 0:
+            return self.create_text_message(f"error: {code}, {data}")
+        else:
+            text = data["payload"]["choices"]["text"]
+            image_content = text[0]
+            image_base = image_content["content"]
+            json_data = {"base64_image": image_base}
+        return [json_data]
--- a/api/core/tools/provider/builtin/spark/tools/spark_img_generation.yaml
+++ b/api/core/tools/provider/builtin/spark/tools/spark_img_generation.yaml
@@ -0,0 +1,36 @@
+identity:
+  name: spark_img_generation
+  author: Onelevenvy
+  label:
+    en_US: Spark Image Generation
+    zh_Hans: 图片生成
+    pt_BR: Geração de imagens Spark
+  icon: icon.svg
+  description:
+    en_US: Spark Image Generation
+    zh_Hans: 图片生成
+    pt_BR: Geração de imagens Spark
+description:
+  human:
+    en_US: Generate images based on user input, with image generation API
+      provided by Spark
+    zh_Hans: 根据用户的输入生成图片，由讯飞星火提供图片生成api
+    pt_BR: Gerar imagens com base na entrada do usuário, com API de geração
+      de imagem fornecida pela Spark
+  llm: spark_img_generation is a tool used to generate images from text
+parameters:
+  - name: prompt
+    type: string
+    required: true
+    label:
+      en_US: Prompt
+      zh_Hans: 提示词
+      pt_BR: Prompt
+    human_description:
+      en_US: Image prompt
+      zh_Hans: 图像提示词
+      pt_BR: Image prompt
+    llm_description: Image prompt of spark_img_generation tooll, you should
+      describe the image you want to generate as a list of words as possible
+      as detailed
+    form: llm
--- a/api/core/tools/provider/builtin/stablediffusion/stablediffusion.yaml
+++ b/api/core/tools/provider/builtin/stablediffusion/stablediffusion.yaml
@@ -33,3 +33,8 @@ credentials_for_provider:
      en_US: Please input your model
      zh_Hans: 请输入你的模型名称
      pt_BR: Please input your model
+    help:
+      en_US: The model name of the StableDiffusion server
+      zh_Hans: StableDiffusion服务器的模型名称
+      pt_BR: The model name of the StableDiffusion server
+    url: https://docs.dify.ai/tutorials/tool-configuration/stable-diffusion
--- a/api/core/tools/provider/builtin/stablediffusion/tools/stable_diffusion.py
+++ b/api/core/tools/provider/builtin/stablediffusion/tools/stable_diffusion.py
@@ -131,7 +131,8 @@ class StableDiffusionTool(BuiltinTool):
                                    negative_prompt=negative_prompt,
                                    width=width,
                                    height=height,
-                                    steps=steps)
+                                    steps=steps,
+                                    model=model)
            
        return self.text2img(base_url=base_url,
                             lora=lora,
@@ -139,7 +140,8 @@ class StableDiffusionTool(BuiltinTool):
                             negative_prompt=negative_prompt,
                             width=width,
                             height=height,
-                             steps=steps)
+                             steps=steps,
+                             model=model)

    def validate_models(self) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
        """
@@ -197,7 +199,7 @@ class StableDiffusionTool(BuiltinTool):

    def img2img(self, base_url: str, lora: str, image_binary: bytes, 
                prompt: str, negative_prompt: str,
-                width: int, height: int, steps: int) \
+                width: int, height: int, steps: int, model: str) \
        -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
        """
            generate image
@@ -213,7 +215,8 @@ class StableDiffusionTool(BuiltinTool):
            "sampler_name": "Euler a",
            "restore_faces": False,
            "steps": steps,
-            "script_args": ["outpainting mk2"]
+            "script_args": ["outpainting mk2"],
+            "override_settings": {"sd_model_checkpoint": model}
        }

        if lora:
@@ -236,7 +239,7 @@ class StableDiffusionTool(BuiltinTool):
        except Exception as e:
            return self.create_text_message('Failed to generate image')

-    def text2img(self, base_url: str, lora: str, prompt: str, negative_prompt: str, width: int, height: int, steps: int) \
+    def text2img(self, base_url: str, lora: str, prompt: str, negative_prompt: str, width: int, height: int, steps: int, model: str) \
        -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
        """
            generate image
@@ -253,6 +256,7 @@ class StableDiffusionTool(BuiltinTool):
        draw_options['height'] = height
        draw_options['steps'] = steps
        draw_options['negative_prompt'] = negative_prompt
+        draw_options['override_settings']['sd_model_checkpoint'] = model
        
        try:
            url = str(URL(base_url) / 'sdapi' / 'v1' / 'txt2img')
--- a/api/core/tools/provider/builtin/time/tools/weekday.py
+++ b/api/core/tools/provider/builtin/time/tools/weekday.py
@@ -0,0 +1,42 @@
+import calendar
+from datetime import datetime
+from typing import Any, Union
+
+from core.tools.entities.tool_entities import ToolInvokeMessage
+from core.tools.tool.builtin_tool import BuiltinTool
+
+
+class WeekdayTool(BuiltinTool):
+    def _invoke(self,
+                user_id: str,
+                tool_parameters: dict[str, Any],
+                ) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
+        """
+            Calculate the day of the week for a given date
+        """
+        year = tool_parameters.get('year')
+        month = tool_parameters.get('month')
+        day = tool_parameters.get('day')
+
+        date_obj = self.convert_datetime(year, month, day)
+        if not date_obj:
+            return self.create_text_message(f'Invalid date: Year {year}, Month {month}, Day {day}.')
+
+        weekday_name = calendar.day_name[date_obj.weekday()]
+        month_name = calendar.month_name[month]
+        readable_date = f"{month_name} {date_obj.day}, {date_obj.year}"
+        return self.create_text_message(f'{readable_date} is {weekday_name}.')
+
+    @staticmethod
+    def convert_datetime(year, month, day) -> datetime | None:
+        try:
+            # allowed range in datetime module
+            if not (year >= 1 and 1 <= month <= 12 and 1 <= day <= 31):
+                return None
+
+            year = int(year)
+            month = int(month)
+            day = int(day)
+            return datetime(year, month, day)
+        except ValueError:
+            return None
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
takatost	11636bc7c7	bump version to 0.5.10 (#2902 )	2024-03-19 21:35:58 +08:00
Joshua	518c1ceb94	Feat/add-NVIDIA-as-a-new-model-provider (#2900 )	2024-03-19 21:08:17 +08:00
listeng	696efe494e	fix: Ignore some emtpy page_content when append to split_documents (#2898 )	2024-03-19 20:55:15 +08:00
Su Yang	4419d357c4	chore: update Yi models params (#2895 )	2024-03-19 20:54:31 +08:00
takatost	fbbba6db92	feat: optimize ollama model default parameters (#2894 )	2024-03-19 18:34:23 +08:00
Lance Mao	53d428907b	fix incorrect exception raised by api tool which leads to incorrect L… (#2886 ) Co-authored-by: OSS-MAOLONGDONG\kaihong <maolongdong@kaihong.com>	2024-03-19 18:17:12 +08:00
Su Yang	8133ba16b1	chore: update Qwen model params (#2892 )	2024-03-19 18:13:32 +08:00
crazywoola	e9aa0e89d3	chore: update pr template (#2893 )	2024-03-19 17:24:57 +08:00
Su Yang	7e3c59e53e	chore: Update TongYi models prices (#2890 )	2024-03-19 16:32:42 +08:00
呆萌闷油瓶	f6314f8e73	feat:support azure openai llm 0125 version (#2889 )	2024-03-19 16:32:26 +08:00
Su Yang	3bcfd84fba	chore: use API Key instead of APIKey (#2888 )	2024-03-19 16:32:06 +08:00
Bowen Liang	7c0ae76cd0	Bump tiktoken to 0.6.0 to support text-embedding-3-* in encoding_for_model (#2891 )	2024-03-19 16:31:46 +08:00
Su Yang	2dee8a25d5	fix: anthropic system prompt not working (#2885 )	2024-03-19 15:50:02 +08:00
Su Yang	507aa6d949	fix: Fix the problem of system not working (#2884 )	2024-03-19 13:56:22 +08:00
crazywoola	59f173f2e6	feat: add icons for 01.ai (#2883 )	2024-03-19 13:53:21 +08:00
Su Yang	c3790c239c	i18n: update bedrock label (#2879 )	2024-03-19 00:57:19 +08:00
Su Yang	45e51e7730	feat: AWS Bedrock Claude3 (#2864 ) Co-authored-by: crazywoola <427733928@qq.com> Co-authored-by: Chenhe Gu <guchenhe@gmail.com>	2024-03-18 18:16:36 +08:00
Jyong	4834eae887	fix enable annotation reply when collection is None (#2877 ) Co-authored-by: jyong <jyong@dify.ai>	2024-03-18 17:18:52 +08:00
Yeuoly	01108e6172	fix/Add isModel flag to AgentTools component (#2876 )	2024-03-18 17:01:25 +08:00
Yeuoly	95b74c211d	Feat/support tool credentials bool schema (#2875 )	2024-03-18 16:55:26 +08:00
Onelevenvy	cb79a90031	feat: Add tools for open weather search and image generation using the Spark API. (#2845 )	2024-03-18 16:22:48 +08:00
Onelevenvy	4502436c47	feat:Embedding models Support for the Aliyun dashscope text-embedding-v1 and text-embedding-v2 (#2874 )	2024-03-18 15:21:26 +08:00
Jyong	c3d0cf940c	add tenant id index for document and document_segment table (#2873 ) Co-authored-by: jyong <jyong@dify.ai>	2024-03-18 14:34:32 +08:00
orangeclk	e7343cc67c	add max_tokens parameter rule for zhipuai glm4 and glm4v (#2861 )	2024-03-18 13:19:36 +08:00
VoidIsVoid	83145486b0	fix: fix unstable function call response arguments missing (#2872 )	2024-03-18 13:17:16 +08:00
Su Yang	6fd1795d25	feat: Allow users to specify AWS Bedrock validation models (#2857 )	2024-03-18 00:44:09 +08:00
Su Yang	f770232b63	feat: add model for 01.ai, yi-chat-34b series (#2865 )	2024-03-17 21:24:01 +08:00
Bowen Liang	a8e694c235	fix: print exception logs for ValueError and InvokeError (#2823 )	2024-03-17 14:34:32 +08:00
Eric Wang	15a6d94953	Refactor: Streamline the build-push and deploy-dev workflow (#2852 )	2024-03-17 14:20:34 +08:00
crazywoola	056331981e	fix: api doc duplicate symbols (#2853 )	2024-03-15 18:17:43 +08:00
Yeuoly	cef16862da	fix: charts encoding (#2848 )	2024-03-15 14:02:52 +08:00
Rozstone	8a4015722d	prevent auto scrolling down to bottom when user already scrolled up (#2813 )	2024-03-15 13:19:06 +08:00
crazywoola	156345cb4b	fix: use supported languages only for install form (#2844 )	2024-03-15 12:05:35 +08:00
Yeuoly	f29280ba5c	Fix/compatible to old tool config (#2839 )	2024-03-15 11:44:24 +08:00
Yeuoly	742be06ea9	Fix/localai (#2840 )	2024-03-15 11:41:51 +08:00
crazywoola	af98954fc1	Feat/add script to check i18n keys (#2835 )	2024-03-14 18:03:59 +08:00
David	4d63770189	fix: The generate conversation name was not saved (#2836 )	2024-03-14 17:53:55 +08:00
Yeuoly	bbea3a6b84	fix: compatible to old tool config (#2837 )	2024-03-14 17:51:11 +08:00
Bowen Liang	19d3a56194	feat: add weekday calculator in time tool (#2822 )	2024-03-14 17:01:48 +08:00
ChiayenGu	5cab2b711f	fix: doc for datasets (#2831 )	2024-03-14 16:41:40 +08:00
Qun	1e5455e266	enhance: use override_settings for concurrent stable diffusion (#2818 )	2024-03-14 15:26:07 +08:00
Eric Wang	4fe585acc2	feat(llm/models): add claude-3-haiku-20240307 (#2825 )	2024-03-14 10:08:24 +08:00
呆萌闷油瓶	e52448b84b	feat:add api-version selection for azure openai APIs (#2821 )	2024-03-14 09:14:27 +08:00
crazywoola	1f92b55f58	fix: doc for completion-messages (#2820 )	2024-03-13 22:25:18 +08:00
Bowen Liang	8b15b742ad	generalize position helper for parsing _position.yaml and sorting objects by name (#2803 )	2024-03-13 20:29:38 +08:00
Laurent Magnien	849dc0560b	feat: add French fr-FR (#2810 ) Co-authored-by: Laurent Magnien <laurent.magnien@adsn.fr>	2024-03-13 18:20:55 +08:00
Phạm Viết Nghĩa	a026c5fd08	feat: add Vietnamese vi-VN (#2807 )	2024-03-13 15:54:47 +08:00
Charlie.Wei	fd7aade26b	Fix tts api err (#2809 ) Co-authored-by: luowei <glpat-EjySCyNjWiLqAED-YmwM> Co-authored-by: crazywoola <427733928@qq.com> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>	2024-03-13 15:38:10 +08:00
Mark Sun	510f8ede10	Improve automatic prompt generation (#2805 )	2024-03-13 14:10:47 +08:00
呆萌闷油瓶	8f9125b08a	fix:typo (#2808 )	2024-03-13 13:00:46 +08:00
呆萌闷油瓶	e5e97c0a0a	fix:change azure openai api_version default value to 2024-02-15-preview (#2797 )	2024-03-12 22:07:06 +08:00
Yulong Wang	870ca713df	Refactor Markdown component to include paragraph after image (#2798 )	2024-03-12 22:06:54 +08:00
Joshua	6854a3fd26	Update README.md (#2800 )	2024-03-12 18:14:07 +08:00
Joshua	620360d41a	Update README.md (#2799 )	2024-03-12 17:02:46 +08:00
Weaxs	20bd49285b	excel: get keys from every sheet (#2796 )	2024-03-12 16:59:25 +08:00
crazywoola	6bd2730317	Fix/2770 suggestions for next steps (#2788 )	2024-03-12 16:27:55 +08:00
Yeuoly	f734cca337	enhance: add stable diffusion user guide (#2795 )	2024-03-12 14:45:48 +08:00