Feat/webapp verified sso 260 (#20815 )

fix: remove all app token when logout
Feat/webapp verified sso 260: fetch previous app session in public token exchange (#20740 )
2026-01-07 14:58:32 +00:00 · 2025-06-09 15:11:30 +09:00 · 2025-06-06 15:53:40 +08:00 · 2025-06-06 16:52:15 +09:00 · 2025-06-06 16:09:45 +09:00 · 2025-06-06 15:49:08 +09:00
471 changed files with 19202 additions and 10990 deletions
--- a/.github/workflows/build-push.yml
+++ b/.github/workflows/build-push.yml
@@ -5,6 +5,8 @@ on:
    branches:
      - "main"
      - "deploy/dev"
+      - "deploy/enterprise"
+      - "e-260"
  release:
    types: [published]

--- a/.github/workflows/deploy-enterprise.yml
+++ b/.github/workflows/deploy-enterprise.yml
@@ -0,0 +1,29 @@
+name: Deploy Enterprise
+
+permissions:
+  contents: read
+
+on:
+  workflow_run:
+    workflows: ["Build and Push API & Web"]
+    branches:
+      - "deploy/enterprise"
+    types:
+      - completed
+
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    if: |
+      github.event.workflow_run.conclusion == 'success' &&
+      github.event.workflow_run.head_branch == 'deploy/enterprise'
+
+    steps:
+      - name: Deploy to server
+        uses: appleboy/ssh-action@v0.1.8
+        with:
+          host: ${{ secrets.ENTERPRISE_SSH_HOST }}
+          username: ${{ secrets.ENTERPRISE_SSH_USER }}
+          password: ${{ secrets.ENTERPRISE_SSH_PASSWORD }}
+          script: |
+            ${{ vars.ENTERPRISE_SSH_SCRIPT || secrets.ENTERPRISE_SSH_SCRIPT }}
--- a/.github/workflows/docker-build.yml
+++ b/.github/workflows/docker-build.yml
@@ -0,0 +1,47 @@
+name: Build docker image
+
+on:
+  pull_request:
+    branches:
+      - "main"
+    paths:
+      - api/Dockerfile
+      - web/Dockerfile
+
+concurrency:
+  group: docker-build-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
+jobs:
+  build-docker:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        include:
+          - service_name: "api-amd64"
+            platform: linux/amd64
+            context: "api"
+          - service_name: "api-arm64"
+            platform: linux/arm64
+            context: "api"
+          - service_name: "web-amd64"
+            platform: linux/amd64
+            context: "web"
+          - service_name: "web-arm64"
+            platform: linux/arm64
+            context: "web"
+    steps:
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Build Docker Image
+        uses: docker/build-push-action@v6
+        with:
+          push: false
+          context: "{{defaultContext}}:${{ matrix.context }}"
+          platforms: ${{ matrix.platform }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
--- a/.markdownlint.json
+++ b/.markdownlint.json
@@ -0,0 +1,4 @@
+{
+    "MD024": false,
+    "MD013": false
+}
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -0,0 +1,45 @@
+# Changelog
+
+All notable changes to Dify will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [0.15.8] - 2025-05-30
+
+### Added
+
+- Added gunicorn keepalive setting (#19537)
+
+### Fixed
+
+- Fixed database configuration to allow DB_EXTRAS to set search_path via options (#16a4f77)
+- Fixed frontend third-party package security issues (#19655)
+- Updated dependencies: huggingface-hub (~0.16.4 to ~0.31.0), transformers (~4.35.0 to ~4.39.0), and resend (~0.7.0 to ~2.9.0) (#19563)
+- Downgrade boto3 from 1.36 to 1.35 (#19736)
+
+## [0.15.7] - 2025-04-27
+
+### Added
+
+- Added support for GPT-4.1 in model providers (#18912)
+- Added support for Amazon Bedrock DeepSeek-R1 model (#18908)
+- Added support for Amazon Bedrock Claude Sonnet 3.7 model (#18788)
+- Refined version compatibility logic in app DSL service
+
+### Fixed
+
+- Fixed issue with creating apps from template categories (#18807, #18868)
+- Fixed DSL version check when creating apps from explore templates (#18872, #18878)
+
+## [0.15.6] - 2025-04-22
+
+### Security
+
+- Fixed clickjacking vulnerability (#18552)
+- Fixed reset password security issue (#18366)
+- Updated reset password token when email code verification succeeds (#18362)
+
+### Fixed
+
+- Fixed Vertex AI Gemini 2.0 Flash 001 schema (#18405)
--- a/README.md
+++ b/README.md
@@ -25,6 +25,9 @@
    <a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
        <img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
            alt="follow on X(Twitter)"></a>
+    <a href="https://www.linkedin.com/company/langgenius/" target="_blank">
+        <img src="https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff"
+            alt="follow on LinkedIn"></a>
    <a href="https://hub.docker.com/u/langgenius" target="_blank">
        <img alt="Docker Pulls" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
    <a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">
--- a/README_AR.md
+++ b/README_AR.md
@@ -21,6 +21,9 @@
    <a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
        <img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
            alt="follow on X(Twitter)"></a>
+    <a href="https://www.linkedin.com/company/langgenius/" target="_blank">
+        <img src="https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff"
+            alt="follow on LinkedIn"></a>
    <a href="https://hub.docker.com/u/langgenius" target="_blank">
        <img alt="Docker Pulls" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
    <a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">
--- a/README_CN.md
+++ b/README_CN.md
@@ -21,6 +21,9 @@
    <a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
        <img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
            alt="follow on X(Twitter)"></a>
+    <a href="https://www.linkedin.com/company/langgenius/" target="_blank">
+        <img src="https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff"
+            alt="follow on LinkedIn"></a>
    <a href="https://hub.docker.com/u/langgenius" target="_blank">
        <img alt="Docker Pulls" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
    <a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">
--- a/README_ES.md
+++ b/README_ES.md
@@ -21,6 +21,9 @@
    <a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
        <img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
            alt="seguir en X(Twitter)"></a>
+    <a href="https://www.linkedin.com/company/langgenius/" target="_blank">
+        <img src="https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff"
+            alt="seguir en LinkedIn"></a>
    <a href="https://hub.docker.com/u/langgenius" target="_blank">
        <img alt="Descargas de Docker" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
    <a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">
--- a/README_FR.md
+++ b/README_FR.md
@@ -21,6 +21,9 @@
    <a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
        <img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
            alt="suivre sur X(Twitter)"></a>
+    <a href="https://www.linkedin.com/company/langgenius/" target="_blank">
+        <img src="https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff"
+            alt="suivre sur LinkedIn"></a>
    <a href="https://hub.docker.com/u/langgenius" target="_blank">
        <img alt="Tirages Docker" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
    <a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">
--- a/README_JA.md
+++ b/README_JA.md
@@ -21,6 +21,9 @@
    <a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
        <img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
            alt="X(Twitter)でフォロー"></a>
+    <a href="https://www.linkedin.com/company/langgenius/" target="_blank">
+        <img src="https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff"
+            alt="LinkedInでフォロー"></a>
    <a href="https://hub.docker.com/u/langgenius" target="_blank">
        <img alt="Docker Pulls" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
    <a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">
--- a/README_KL.md
+++ b/README_KL.md
@@ -21,6 +21,9 @@
    <a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
        <img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
            alt="follow on X(Twitter)"></a>
+    <a href="https://www.linkedin.com/company/langgenius/" target="_blank">
+        <img src="https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff"
+            alt="follow on LinkedIn"></a>
    <a href="https://hub.docker.com/u/langgenius" target="_blank">
        <img alt="Docker Pulls" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
    <a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">
--- a/README_KR.md
+++ b/README_KR.md
@@ -21,6 +21,9 @@
    <a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
        <img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
            alt="follow on X(Twitter)"></a>
+    <a href="https://www.linkedin.com/company/langgenius/" target="_blank">
+        <img src="https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff"
+            alt="follow on LinkedIn"></a>
    <a href="https://hub.docker.com/u/langgenius" target="_blank">
        <img alt="Docker Pulls" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
    <a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">
--- a/README_PT.md
+++ b/README_PT.md
@@ -25,6 +25,9 @@
    <a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
        <img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
            alt="follow on X(Twitter)"></a>
+    <a href="https://www.linkedin.com/company/langgenius/" target="_blank">
+        <img src="https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff"
+            alt="follow on LinkedIn"></a>
    <a href="https://hub.docker.com/u/langgenius" target="_blank">
        <img alt="Docker Pulls" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
    <a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">
--- a/README_SI.md
+++ b/README_SI.md
@@ -22,6 +22,9 @@
    <a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
        <img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
            alt="follow on X(Twitter)"></a>
+    <a href="https://www.linkedin.com/company/langgenius/" target="_blank">
+        <img src="https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff"
+            alt="follow on LinkedIn"></a>
    <a href="https://hub.docker.com/u/langgenius" target="_blank">
        <img alt="Docker Pulls" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
    <a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">
--- a/README_TR.md
+++ b/README_TR.md
@@ -21,6 +21,9 @@
    <a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
        <img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
            alt="X(Twitter)'da takip et"></a>
+    <a href="https://www.linkedin.com/company/langgenius/" target="_blank">
+        <img src="https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff"
+            alt="LinkedIn'da takip et"></a>
    <a href="https://hub.docker.com/u/langgenius" target="_blank">
        <img alt="Docker Çekmeleri" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
    <a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">
@@ -62,8 +65,6 @@ Görsel bir arayüz üzerinde güçlü AI iş akışları oluşturun ve test edi
 ![providers-v5](https://github.com/langgenius/dify/assets/13230914/5a17bdbe-097a-4100-8363-40255b70f6e3)


-Özür dilerim, haklısınız. Daha anlamlı ve akıcı bir çeviri yapmaya çalışayım. İşte güncellenmiş çeviri:
-
 **3. Prompt IDE**: 
  Komut istemlerini oluşturmak, model performansını karşılaştırmak ve sohbet tabanlı uygulamalara metin-konuşma gibi ek özellikler eklemek için kullanıcı dostu bir arayüz.

@@ -150,8 +151,6 @@ Görsel bir arayüz üzerinde güçlü AI iş akışları oluşturun ve test edi
 ## Dify'ı Kullanma

 - **Cloud </br>**
-İşte verdiğiniz metnin Türkçe çevirisi, kod bloğu içinde:
- 
 Herkesin sıfır kurulumla denemesi için bir [Dify Cloud](https://dify.ai) hizmeti sunuyoruz. Bu hizmet, kendi kendine dağıtılan versiyonun tüm yeteneklerini sağlar ve sandbox planında 200 ücretsiz GPT-4 çağrısı içerir.

 - **Dify Topluluk Sürümünü Kendi Sunucunuzda Barındırma</br>**
@@ -177,8 +176,6 @@ GitHub'da Dify'a yıldız verin ve yeni sürümlerden anında haberdar olun.
 >- RAM >= 4GB

 </br>
-İşte verdiğiniz metnin Türkçe çevirisi, kod bloğu içinde:
-
 Dify sunucusunu başlatmanın en kolay yolu, [docker-compose.yml](docker/docker-compose.yaml) dosyamızı çalıştırmaktır. Kurulum komutunu çalıştırmadan önce, makinenizde [Docker](https://docs.docker.com/get-docker/) ve [Docker Compose](https://docs.docker.com/compose/install/)'un kurulu olduğundan emin olun:

 ```bash
--- a/README_VI.md
+++ b/README_VI.md
@@ -21,6 +21,9 @@
    <a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
        <img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
            alt="theo dõi trên X(Twitter)"></a>
+    <a href="https://www.linkedin.com/company/langgenius/" target="_blank">
+        <img src="https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff"
+            alt="theo dõi trên LinkedIn"></a>
    <a href="https://hub.docker.com/u/langgenius" target="_blank">
        <img alt="Docker Pulls" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
    <a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">
--- a/api/.env.example
+++ b/api/.env.example
@@ -430,4 +430,7 @@ CREATE_TIDB_SERVICE_JOB_ENABLED=false
 # Maximum number of submitted thread count in a ThreadPool for parallel node execution
 MAX_SUBMIT_COUNT=100
 # Lockout duration in seconds
-LOGIN_LOCKOUT_DURATION=86400
+LOGIN_LOCKOUT_DURATION=86400
+
+# Prevent Clickjacking
+ALLOW_EMBED=false
--- a/api/Dockerfile
+++ b/api/Dockerfile
@@ -48,16 +48,18 @@ ENV TZ=UTC

 WORKDIR /app/api

-RUN apt-get update \
-    && apt-get install -y --no-install-recommends curl nodejs libgmp-dev libmpfr-dev libmpc-dev \
-    # if you located in China, you can use aliyun mirror to speed up
-    # && echo "deb http://mirrors.aliyun.com/debian testing main" > /etc/apt/sources.list \
-    && echo "deb http://deb.debian.org/debian testing main" > /etc/apt/sources.list \
-    && apt-get update \
-    # For Security
-    && apt-get install -y --no-install-recommends expat=2.6.4-1 libldap-2.5-0=2.5.19+dfsg-1 perl=5.40.0-8 libsqlite3-0=3.46.1-1 zlib1g=1:1.3.dfsg+really1.3.1-1+b1 \
-    # install a chinese font to support the use of tools like matplotlib
-    && apt-get install -y fonts-noto-cjk \
+RUN \
+    apt-get update \
+    # Install dependencies
+    && apt-get install -y --no-install-recommends \
+        # basic environment
+        curl nodejs libgmp-dev libmpfr-dev libmpc-dev \
+        # For Security
+        expat libldap-2.5-0 perl libsqlite3-0 zlib1g \
+        # install a chinese font to support the use of tools like matplotlib
+        fonts-noto-cjk \
+        # install libmagic to support the use of python-magic guess MIMETYPE
+        libmagic1 \
    && apt-get autoremove -y \
    && rm -rf /var/lib/apt/lists/*

@@ -76,7 +78,6 @@ COPY . /app/api/
 COPY docker/entrypoint.sh /entrypoint.sh
 RUN chmod +x /entrypoint.sh

-
 ARG COMMIT_SHA
 ENV COMMIT_SHA=${COMMIT_SHA}

--- a/api/configs/feature/hosted_service/init.py
+++ b/api/configs/feature/hosted_service/init.py
@@ -1,9 +1,40 @@
 from typing import Optional

-from pydantic import Field, NonNegativeInt
+from pydantic import Field, NonNegativeInt, computed_field
 from pydantic_settings import BaseSettings


+class HostedCreditConfig(BaseSettings):
+    HOSTED_MODEL_CREDIT_CONFIG: str = Field(
+        description="Model credit configuration in format 'model:credits,model:credits', e.g., 'gpt-4:20,gpt-4o:10'",
+        default="",
+    )
+
+    def get_model_credits(self, model_name: str) -> int:
+        """
+        Get credit value for a specific model name.
+        Returns 1 if model is not found in configuration (default credit).
+
+        :param model_name: The name of the model to search for
+        :return: The credit value for the model
+        """
+        if not self.HOSTED_MODEL_CREDIT_CONFIG:
+            return 1
+
+        try:
+            credit_map = dict(
+                item.strip().split(":", 1) for item in self.HOSTED_MODEL_CREDIT_CONFIG.split(",") if ":" in item
+            )
+
+            # Search for matching model pattern
+            for pattern, credit in credit_map.items():
+                if pattern.strip() == model_name:
+                    return int(credit)
+            return 1  # Default quota if no match found
+        except (ValueError, AttributeError):
+            return 1  # Return default quota if parsing fails
+
+
 class HostedOpenAiConfig(BaseSettings):
    """
    Configuration for hosted OpenAI service
@@ -202,5 +233,7 @@ class HostedServiceConfig(
    HostedZhipuAIConfig,
    # moderation
    HostedModerationConfig,
+    # credit config
+    HostedCreditConfig,
 ):
    pass
--- a/api/configs/middleware/init.py
+++ b/api/configs/middleware/init.py
@@ -1,5 +1,5 @@
 from typing import Any, Literal, Optional
-from urllib.parse import quote_plus
+from urllib.parse import parse_qsl, quote_plus

 from pydantic import Field, NonNegativeInt, PositiveFloat, PositiveInt, computed_field
 from pydantic_settings import BaseSettings
@@ -166,14 +166,28 @@ class DatabaseConfig(BaseSettings):
        default=False,
    )

-    @computed_field
+    @computed_field  # type: ignore[misc]
+    @property
    def SQLALCHEMY_ENGINE_OPTIONS(self) -> dict[str, Any]:
+        # Parse DB_EXTRAS for 'options'
+        db_extras_dict = dict(parse_qsl(self.DB_EXTRAS))
+        options = db_extras_dict.get("options", "")
+        # Always include timezone
+        timezone_opt = "-c timezone=UTC"
+        if options:
+            # Merge user options and timezone
+            merged_options = f"{options} {timezone_opt}"
+        else:
+            merged_options = timezone_opt
+
+        connect_args = {"options": merged_options}
+
        return {
            "pool_size": self.SQLALCHEMY_POOL_SIZE,
            "max_overflow": self.SQLALCHEMY_MAX_OVERFLOW,
            "pool_recycle": self.SQLALCHEMY_POOL_RECYCLE,
            "pool_pre_ping": self.SQLALCHEMY_POOL_PRE_PING,
-            "connect_args": {"options": "-c timezone=UTC"},
+            "connect_args": connect_args,
        }


--- a/api/configs/packaging/init.py
+++ b/api/configs/packaging/init.py
@@ -9,7 +9,7 @@ class PackagingInfo(BaseSettings):

    CURRENT_VERSION: str = Field(
        description="Dify version",
-        default="0.15.2",
+        default="0.15.8",
    )

    COMMIT_SHA: str = Field(
--- a/api/controllers/common/helpers.py
+++ b/api/controllers/common/helpers.py
@@ -1,12 +1,32 @@
 import mimetypes
 import os
+import platform
 import re
 import urllib.parse
+import warnings
 from collections.abc import Mapping
 from typing import Any
 from uuid import uuid4

 import httpx
+
+try:
+    import magic
+except ImportError:
+    if platform.system() == "Windows":
+        warnings.warn(
+            "To use python-magic guess MIMETYPE, you need to run `pip install python-magic-bin`", stacklevel=2
+        )
+    elif platform.system() == "Darwin":
+        warnings.warn("To use python-magic guess MIMETYPE, you need to run `brew install libmagic`", stacklevel=2)
+    elif platform.system() == "Linux":
+        warnings.warn(
+            "To use python-magic guess MIMETYPE, you need to run `sudo apt-get install libmagic1`", stacklevel=2
+        )
+    else:
+        warnings.warn("To use python-magic guess MIMETYPE, you need to install `libmagic`", stacklevel=2)
+    magic = None  # type: ignore
+
 from pydantic import BaseModel

 from configs import dify_config
@@ -47,6 +67,13 @@ def guess_file_info_from_response(response: httpx.Response):
        # If guessing fails, use Content-Type from response headers
        mimetype = response.headers.get("Content-Type", "application/octet-stream")

+    # Use python-magic to guess MIME type if still unknown or generic
+    if mimetype == "application/octet-stream" and magic is not None:
+        try:
+            mimetype = magic.from_buffer(response.content[:1024], mime=True)
+        except magic.MagicException:
+            pass
+
    extension = os.path.splitext(filename)[1]

    # Ensure filename has an extension
--- a/api/controllers/console/app/app.py
+++ b/api/controllers/console/app/app.py
@@ -2,30 +2,28 @@ import uuid
 from typing import cast

 from flask_login import current_user  # type: ignore
-from flask_restful import Resource, inputs, marshal, marshal_with, reqparse  # type: ignore
+from flask_restful import (Resource, inputs, marshal,  # type: ignore
+                           marshal_with, reqparse)
 from sqlalchemy import select
 from sqlalchemy.orm import Session
 from werkzeug.exceptions import BadRequest, Forbidden, abort

 from controllers.console import api
 from controllers.console.app.wraps import get_app_model
-from controllers.console.wraps import (
-    account_initialization_required,
-    cloud_edition_billing_resource_check,
-    enterprise_license_required,
-    setup_required,
-)
+from controllers.console.wraps import (account_initialization_required,
+                                       cloud_edition_billing_resource_check,
+                                       enterprise_license_required,
+                                       setup_required)
 from core.ops.ops_trace_manager import OpsTraceManager
 from extensions.ext_database import db
-from fields.app_fields import (
-    app_detail_fields,
-    app_detail_fields_with_site,
-    app_pagination_fields,
-)
+from fields.app_fields import (app_detail_fields, app_detail_fields_with_site,
+                               app_pagination_fields)
 from libs.login import login_required
 from models import Account, App
 from services.app_dsl_service import AppDslService, ImportMode
 from services.app_service import AppService
+from services.enterprise.enterprise_service import EnterpriseService
+from services.feature_service import FeatureService

 ALLOW_CREATE_APP_MODES = ["chat", "agent-chat", "advanced-chat", "workflow", "completion"]

@@ -67,7 +65,17 @@ class AppListApi(Resource):
        if not app_pagination:
            return {"data": [], "total": 0, "page": 1, "limit": 20, "has_more": False}

-        return marshal(app_pagination, app_pagination_fields)
+        if FeatureService.get_system_features().webapp_auth.enabled:
+            app_ids = [str(app.id) for app in app_pagination.items]
+            res = EnterpriseService.WebAppAuth.batch_get_app_access_mode_by_id(app_ids=app_ids)
+            if len(res) != len(app_ids):
+                raise BadRequest("Invalid app id in webapp auth")
+
+            for app in app_pagination.items:
+                if str(app.id) in res:
+                    app.access_mode = res[str(app.id)].access_mode
+
+        return marshal(app_pagination, app_pagination_fields), 200

    @setup_required
    @login_required
@@ -111,6 +119,10 @@ class AppApi(Resource):

        app_model = app_service.get_app(app_model)

+        if FeatureService.get_system_features().webapp_auth.enabled:
+            app_setting = EnterpriseService.WebAppAuth.get_app_access_mode_by_id(app_id=str(app_model.id))
+            app_model.access_mode = app_setting.access_mode
+
        return app_model

    @setup_required
--- a/api/controllers/console/auth/error.py
+++ b/api/controllers/console/auth/error.py
@@ -59,3 +59,9 @@ class EmailCodeAccountDeletionRateLimitExceededError(BaseHTTPException):
    error_code = "email_code_account_deletion_rate_limit_exceeded"
    description = "Too many account deletion emails have been sent. Please try again in 5 minutes."
    code = 429
+
+
+class EmailPasswordResetLimitError(BaseHTTPException):
+    error_code = "email_password_reset_limit"
+    description = "Too many failed password reset attempts. Please try again in 24 hours."
+    code = 429
--- a/api/controllers/console/auth/forgot_password.py
+++ b/api/controllers/console/auth/forgot_password.py
@@ -6,9 +6,13 @@ from flask_restful import Resource, reqparse  # type: ignore

 from constants.languages import languages
 from controllers.console import api
-from controllers.console.auth.error import EmailCodeError, InvalidEmailError, InvalidTokenError, PasswordMismatchError
-from controllers.console.error import AccountInFreezeError, AccountNotFound, EmailSendIpLimitError
-from controllers.console.wraps import setup_required
+from controllers.console.auth.error import (EmailCodeError, InvalidEmailError,
+                                            InvalidTokenError,
+                                            PasswordMismatchError)
+from controllers.console.error import (AccountInFreezeError, AccountNotFound,
+                                       EmailSendIpLimitError)
+from controllers.console.wraps import (email_password_login_enabled,
+                                       setup_required)
 from events.tenant_event import tenant_was_created
 from extensions.ext_database import db
 from libs.helper import email, extract_remote_ip
@@ -16,12 +20,14 @@ from libs.password import hash_password, valid_password
 from models.account import Account
 from services.account_service import AccountService, TenantService
 from services.errors.account import AccountRegisterError
-from services.errors.workspace import WorkSpaceNotAllowedCreateError
+from services.errors.workspace import (WorkSpaceNotAllowedCreateError,
+                                       WorkspacesLimitExceededError)
 from services.feature_service import FeatureService


 class ForgotPasswordSendEmailApi(Resource):
    @setup_required
+    @email_password_login_enabled
    def post(self):
        parser = reqparse.RequestParser()
        parser.add_argument("email", type=email, required=True, location="json")
@@ -53,6 +59,7 @@ class ForgotPasswordSendEmailApi(Resource):

 class ForgotPasswordCheckApi(Resource):
    @setup_required
+    @email_password_login_enabled
    def post(self):
        parser = reqparse.RequestParser()
        parser.add_argument("email", type=str, required=True, location="json")
@@ -72,11 +79,20 @@ class ForgotPasswordCheckApi(Resource):
        if args["code"] != token_data.get("code"):
            raise EmailCodeError()

-        return {"is_valid": True, "email": token_data.get("email")}
+        # Verified, revoke the first token
+        AccountService.revoke_reset_password_token(args["token"])
+
+        # Refresh token data by generating a new token
+        _, new_token = AccountService.generate_reset_password_token(
+            user_email, code=args["code"], additional_data={"phase": "reset"}
+        )
+
+        return {"is_valid": True, "email": token_data.get("email"), "token": new_token}


 class ForgotPasswordResetApi(Resource):
    @setup_required
+    @email_password_login_enabled
    def post(self):
        parser = reqparse.RequestParser()
        parser.add_argument("token", type=str, required=True, nullable=False, location="json")
@@ -95,6 +111,9 @@ class ForgotPasswordResetApi(Resource):

        if reset_data is None:
            raise InvalidTokenError()
+        # Must use token in reset phase
+        if reset_data.get("phase", "") != "reset":
+            raise InvalidTokenError()

        AccountService.revoke_reset_password_token(token)

@@ -127,6 +146,8 @@ class ForgotPasswordResetApi(Resource):
                pass
            except AccountRegisterError as are:
                raise AccountInFreezeError()
+            except WorkspacesLimitExceededError:
+                pass

        return {"result": "success"}

--- a/api/controllers/console/auth/login.py
+++ b/api/controllers/console/auth/login.py
@@ -21,8 +21,9 @@ from controllers.console.error import (
    AccountNotFound,
    EmailSendIpLimitError,
    NotAllowedCreateWorkspace,
+    WorkspacesLimitExceeded,
 )
-from controllers.console.wraps import setup_required
+from controllers.console.wraps import email_password_login_enabled, setup_required
 from events.tenant_event import tenant_was_created
 from libs.helper import email, extract_remote_ip
 from libs.password import valid_password
@@ -30,7 +31,7 @@ from models.account import Account
 from services.account_service import AccountService, RegisterService, TenantService
 from services.billing_service import BillingService
 from services.errors.account import AccountRegisterError
-from services.errors.workspace import WorkSpaceNotAllowedCreateError
+from services.errors.workspace import WorkSpaceNotAllowedCreateError, WorkspacesLimitExceededError
 from services.feature_service import FeatureService


@@ -38,6 +39,7 @@ class LoginApi(Resource):
    """Resource for user login."""

    @setup_required
+    @email_password_login_enabled
    def post(self):
        """Authenticate user and login."""
        parser = reqparse.RequestParser()
@@ -87,10 +89,15 @@ class LoginApi(Resource):
        # SELF_HOSTED only have one workspace
        tenants = TenantService.get_join_tenants(account)
        if len(tenants) == 0:
-            return {
-                "result": "fail",
-                "data": "workspace not found, please contact system admin to invite you to join in a workspace",
-            }
+            system_features = FeatureService.get_system_features()
+
+            if system_features.is_allow_create_workspace and not system_features.license.workspaces.is_available():
+                raise WorkspacesLimitExceeded()
+            else:
+                return {
+                    "result": "fail",
+                    "data": "workspace not found, please contact system admin to invite you to join in a workspace",
+                }

        token_pair = AccountService.login(account=account, ip_address=extract_remote_ip(request))
        AccountService.reset_login_error_rate_limit(args["email"])
@@ -110,6 +117,7 @@ class LogoutApi(Resource):

 class ResetPasswordSendEmailApi(Resource):
    @setup_required
+    @email_password_login_enabled
    def post(self):
        parser = reqparse.RequestParser()
        parser.add_argument("email", type=email, required=True, location="json")
@@ -196,6 +204,9 @@ class EmailCodeLoginApi(Resource):
        if account:
            tenant = TenantService.get_join_tenants(account)
            if not tenant:
+                workspaces = FeatureService.get_system_features().license.workspaces
+                if not workspaces.is_available():
+                    raise WorkspacesLimitExceeded()
                if not FeatureService.get_system_features().is_allow_create_workspace:
                    raise NotAllowedCreateWorkspace()
                else:
@@ -213,6 +224,8 @@ class EmailCodeLoginApi(Resource):
                return NotAllowedCreateWorkspace()
            except AccountRegisterError as are:
                raise AccountInFreezeError()
+            except WorkspacesLimitExceededError:
+                raise WorkspacesLimitExceeded()
        token_pair = AccountService.login(account, ip_address=extract_remote_ip(request))
        AccountService.reset_login_error_rate_limit(args["email"])
        return {"result": "success", "data": token_pair.model_dump()}
--- a/api/controllers/console/datasets/datasets.py
+++ b/api/controllers/console/datasets/datasets.py
@@ -620,7 +620,6 @@ class DatasetRetrievalSettingApi(Resource):
        match vector_type:
            case (
                VectorType.RELYT
-                | VectorType.PGVECTOR
                | VectorType.TIDB_VECTOR
                | VectorType.CHROMA
                | VectorType.TENCENT
--- a/api/controllers/console/error.py
+++ b/api/controllers/console/error.py
@@ -46,6 +46,18 @@ class NotAllowedCreateWorkspace(BaseHTTPException):
    code = 400


+class WorkspaceMembersLimitExceeded(BaseHTTPException):
+    error_code = "limit_exceeded"
+    description = "Unable to add member because the maximum workspace's member limit was exceeded"
+    code = 400
+
+
+class WorkspacesLimitExceeded(BaseHTTPException):
+    error_code = "limit_exceeded"
+    description = "Unable to create workspace because the maximum workspace limit was exceeded"
+    code = 400
+
+
 class AccountBannedError(BaseHTTPException):
    error_code = "account_banned"
    description = "Account is banned."
--- a/api/controllers/console/explore/error.py
+++ b/api/controllers/console/explore/error.py
@@ -23,3 +23,9 @@ class AppSuggestedQuestionsAfterAnswerDisabledError(BaseHTTPException):
    error_code = "app_suggested_questions_after_answer_disabled"
    description = "Function Suggested questions after answer disabled."
    code = 403
+
+
+class AppAccessDeniedError(BaseHTTPException):
+    error_code = "access_denied"
+    description = "App access denied."
+    code = 403
--- a/api/controllers/console/explore/installed_app.py
+++ b/api/controllers/console/explore/installed_app.py
@@ -1,20 +1,26 @@
+import logging
 from datetime import UTC, datetime
 from typing import Any

 from flask import request
 from flask_login import current_user  # type: ignore
-from flask_restful import Resource, inputs, marshal_with, reqparse  # type: ignore
+from flask_restful import (Resource, inputs, marshal_with,  # type: ignore
+                           reqparse)
 from sqlalchemy import and_
 from werkzeug.exceptions import BadRequest, Forbidden, NotFound

 from controllers.console import api
 from controllers.console.explore.wraps import InstalledAppResource
-from controllers.console.wraps import account_initialization_required, cloud_edition_billing_resource_check
+from controllers.console.wraps import (account_initialization_required,
+                                       cloud_edition_billing_resource_check)
 from extensions.ext_database import db
 from fields.installed_app_fields import installed_app_list_fields
 from libs.login import login_required
 from models import App, InstalledApp, RecommendedApp
 from services.account_service import TenantService
+from services.app_service import AppService
+from services.enterprise.enterprise_service import EnterpriseService
+from services.feature_service import FeatureService


 class InstalledAppsListApi(Resource):
@@ -48,6 +54,30 @@ class InstalledAppsListApi(Resource):
            for installed_app in installed_apps
            if installed_app.app is not None
        ]
+
+        # filter out apps that user doesn't have access to
+        if FeatureService.get_system_features().webapp_auth.enabled:
+            user_id = current_user.id
+            res = []
+            app_ids = [installed_app["app"].id for installed_app in installed_app_list]
+            webapp_settings = EnterpriseService.WebAppAuth.batch_get_app_access_mode_by_id(app_ids)
+            for installed_app in installed_app_list:
+                webapp_setting = webapp_settings.get(installed_app["app"].id)
+                if not webapp_setting:
+                    continue
+                if webapp_setting.access_mode == "sso_verified":
+                    continue
+                app_code = AppService.get_app_code_by_id(str(installed_app["app"].id))
+                if EnterpriseService.WebAppAuth.is_user_allowed_to_access_webapp(
+                    user_id=user_id,
+                    app_code=app_code,
+                ):
+                    res.append(installed_app)
+            installed_app_list = res
+            logging.info(
+                f"installed_app_list: {installed_app_list}, user_id: {user_id}"
+            )
+
        installed_app_list.sort(
            key=lambda app: (
                -app["is_pinned"],
--- a/api/controllers/console/explore/message.py
+++ b/api/controllers/console/explore/message.py
@@ -50,7 +50,7 @@ class MessageListApi(InstalledAppResource):

        try:
            return MessageService.pagination_by_first_id(
-                app_model, current_user, args["conversation_id"], args["first_id"], args["limit"], "desc"
+                app_model, current_user, args["conversation_id"], args["first_id"], args["limit"]
            )
        except services.errors.conversation.ConversationNotExistsError:
            raise NotFound("Conversation Not Exists.")
--- a/api/controllers/console/explore/wraps.py
+++ b/api/controllers/console/explore/wraps.py
@@ -4,10 +4,14 @@ from flask_login import current_user  # type: ignore
 from flask_restful import Resource  # type: ignore
 from werkzeug.exceptions import NotFound

+from controllers.console.explore.error import AppAccessDeniedError
 from controllers.console.wraps import account_initialization_required
 from extensions.ext_database import db
 from libs.login import login_required
 from models import InstalledApp
+from services.app_service import AppService
+from services.enterprise.enterprise_service import EnterpriseService
+from services.feature_service import FeatureService


 def installed_app_required(view=None):
@@ -48,6 +52,30 @@ def installed_app_required(view=None):
    return decorator


+def user_allowed_to_access_app(view=None):
+    def decorator(view):
+        @wraps(view)
+        def decorated(installed_app: InstalledApp, *args, **kwargs):
+            feature = FeatureService.get_system_features()
+            if feature.webapp_auth.enabled:
+                app_id = installed_app.app_id
+                app_code = AppService.get_app_code_by_id(app_id)
+                res = EnterpriseService.WebAppAuth.is_user_allowed_to_access_webapp(
+                    user_id=str(current_user.id),
+                    app_code=app_code,
+                )
+                if not res:
+                    raise AppAccessDeniedError()
+
+            return view(installed_app, *args, **kwargs)
+
+        return decorated
+    if view:
+        return decorator(view)
+    return decorator
+
+
 class InstalledAppResource(Resource):
    # must be reversed if there are multiple decorators
-    method_decorators = [installed_app_required, account_initialization_required, login_required]
+
+    method_decorators = [user_allowed_to_access_app, installed_app_required, account_initialization_required, login_required]
--- a/api/controllers/console/workspace/members.py
+++ b/api/controllers/console/workspace/members.py
@@ -6,6 +6,7 @@ from flask_restful import Resource, abort, marshal_with, reqparse  # type: ignor
 import services
 from configs import dify_config
 from controllers.console import api
+from controllers.console.error import WorkspaceMembersLimitExceeded
 from controllers.console.wraps import (
    account_initialization_required,
    cloud_edition_billing_resource_check,
@@ -17,6 +18,7 @@ from libs.login import login_required
 from models.account import Account, TenantAccountRole
 from services.account_service import RegisterService, TenantService
 from services.errors.account import AccountAlreadyInTenantError
+from services.feature_service import FeatureService


 class MemberListApi(Resource):
@@ -54,6 +56,12 @@ class MemberInviteEmailApi(Resource):
        inviter = current_user
        invitation_results = []
        console_web_url = dify_config.CONSOLE_WEB_URL
+
+        workspace_members = FeatureService.get_features(tenant_id=inviter.current_tenant.id).workspace_members
+
+        if not workspace_members.is_available(len(invitee_emails)):
+            raise WorkspaceMembersLimitExceeded()
+
        for invitee_email in invitee_emails:
            try:
                token = RegisterService.invite_new_member(
@@ -71,7 +79,6 @@ class MemberInviteEmailApi(Resource):
                invitation_results.append(
                    {"status": "success", "email": invitee_email, "url": f"{console_web_url}/signin"}
                )
-                break
            except Exception as e:
                invitation_results.append({"status": "failed", "email": invitee_email, "message": str(e)})

--- a/api/controllers/console/wraps.py
+++ b/api/controllers/console/wraps.py
@@ -11,7 +11,8 @@ from models.model import DifySetup
 from services.feature_service import FeatureService, LicenseStatus
 from services.operation_service import OperationService

-from .error import NotInitValidateError, NotSetupError, UnauthorizedAndForceLogout
+from .error import (NotInitValidateError, NotSetupError,
+                    UnauthorizedAndForceLogout)


 def account_initialization_required(view):
@@ -39,6 +40,28 @@ def only_edition_cloud(view):
    return decorated


+def only_edition_enterprise(view):
+    @wraps(view)
+    def decorated(*args, **kwargs):
+        if not dify_config.ENTERPRISE_ENABLED:
+            abort(404)
+
+        return view(*args, **kwargs)
+
+    return decorated
+
+
+def only_edition_self_hosted(view):
+    @wraps(view)
+    def decorated(*args, **kwargs):
+        if not dify_config.ENTERPRISE_ENABLED:
+            abort(404)
+
+        return view(*args, **kwargs)
+
+    return decorated
+
+
 def only_edition_self_hosted(view):
    @wraps(view)
    def decorated(*args, **kwargs):
@@ -154,3 +177,16 @@ def enterprise_license_required(view):
        return view(*args, **kwargs)

    return decorated
+
+
+def email_password_login_enabled(view):
+    @wraps(view)
+    def decorated(*args, **kwargs):
+        features = FeatureService.get_system_features()
+        if features.enable_email_password_login:
+            return view(*args, **kwargs)
+
+        # otherwise, return 403
+        abort(403)
+
+    return decorated
--- a/api/controllers/inner_api/init.py
+++ b/api/controllers/inner_api/init.py
@@ -5,4 +5,5 @@ from libs.external_api import ExternalApi
 bp = Blueprint("inner_api", __name__, url_prefix="/inner/api")
 api = ExternalApi(bp)

+from . import mail
 from .workspace import workspace
--- a/api/controllers/inner_api/mail.py
+++ b/api/controllers/inner_api/mail.py
@@ -0,0 +1,27 @@
+from flask_restful import (
+    Resource,  # type: ignore
+    reqparse,
+)
+
+from controllers.console.wraps import setup_required
+from controllers.inner_api import api
+from controllers.inner_api.wraps import inner_api_only
+from services.enterprise.mail_service import DifyMail, EnterpriseMailService
+
+
+class EnterpriseMail(Resource):
+    @setup_required
+    @inner_api_only
+    def post(self):
+        parser = reqparse.RequestParser()
+        parser.add_argument("to", type=str, action="append", required=True)
+        parser.add_argument("subject", type=str, required=True)
+        parser.add_argument("body", type=str, required=True)
+        parser.add_argument("substitutions", type=dict, required=False)
+        args = parser.parse_args()
+
+        EnterpriseMailService.send_mail(DifyMail(**args))
+        return {"message": "success"}, 200
+
+
+api.add_resource(EnterpriseMail, "/enterprise/mail")
--- a/api/controllers/inner_api/workspace/workspace.py
+++ b/api/controllers/inner_api/workspace/workspace.py
@@ -1,3 +1,5 @@
+import json
+
 from flask_restful import Resource, reqparse  # type: ignore

 from controllers.console.wraps import setup_required
@@ -29,4 +31,34 @@ class EnterpriseWorkspace(Resource):
        return {"message": "enterprise workspace created."}


+class EnterpriseWorkspaceNoOwnerEmail(Resource):
+    @setup_required
+    @inner_api_only
+    def post(self):
+        parser = reqparse.RequestParser()
+        parser.add_argument("name", type=str, required=True, location="json")
+        args = parser.parse_args()
+
+        tenant = TenantService.create_tenant(args["name"], is_from_dashboard=True)
+
+        tenant_was_created.send(tenant)
+
+        resp = {
+            "id": tenant.id,
+            "name": tenant.name,
+            "encrypt_public_key": tenant.encrypt_public_key,
+            "plan": tenant.plan,
+            "status": tenant.status,
+            "custom_config": json.loads(tenant.custom_config) if tenant.custom_config else {},
+            "created_at": tenant.created_at.isoformat() if tenant.created_at else None,
+            "updated_at": tenant.updated_at.isoformat() if tenant.updated_at else None,
+        }
+
+        return {
+            "message": "enterprise workspace created.",
+            "tenant": resp,
+        }
+
+
 api.add_resource(EnterpriseWorkspace, "/enterprise/workspace")
+api.add_resource(EnterpriseWorkspaceNoOwnerEmail, "/enterprise/workspace/ownerless")
--- a/api/controllers/service_api/dataset/document.py
+++ b/api/controllers/service_api/dataset/document.py
@@ -18,6 +18,7 @@ from controllers.service_api.app.error import (
 from controllers.service_api.dataset.error import (
    ArchivedDocumentImmutableError,
    DocumentIndexingError,
+    InvalidMetadataError,
 )
 from controllers.service_api.wraps import DatasetApiResource, cloud_edition_billing_resource_check
 from core.errors.error import ProviderTokenNotInitError
@@ -50,6 +51,9 @@ class DocumentAddByTextApi(DatasetApiResource):
            "indexing_technique", type=str, choices=Dataset.INDEXING_TECHNIQUE_LIST, nullable=False, location="json"
        )
        parser.add_argument("retrieval_model", type=dict, required=False, nullable=False, location="json")
+        parser.add_argument("doc_type", type=str, required=False, nullable=True, location="json")
+        parser.add_argument("doc_metadata", type=dict, required=False, nullable=True, location="json")
+
        args = parser.parse_args()
        dataset_id = str(dataset_id)
        tenant_id = str(tenant_id)
@@ -61,6 +65,28 @@ class DocumentAddByTextApi(DatasetApiResource):
        if not dataset.indexing_technique and not args["indexing_technique"]:
            raise ValueError("indexing_technique is required.")

+        # Validate metadata if provided
+        if args.get("doc_type") or args.get("doc_metadata"):
+            if not args.get("doc_type") or not args.get("doc_metadata"):
+                raise InvalidMetadataError("Both doc_type and doc_metadata must be provided when adding metadata")
+
+            if args["doc_type"] not in DocumentService.DOCUMENT_METADATA_SCHEMA:
+                raise InvalidMetadataError(
+                    "Invalid doc_type. Must be one of: " + ", ".join(DocumentService.DOCUMENT_METADATA_SCHEMA.keys())
+                )
+
+            if not isinstance(args["doc_metadata"], dict):
+                raise InvalidMetadataError("doc_metadata must be a dictionary")
+
+            # Validate metadata schema based on doc_type
+            if args["doc_type"] != "others":
+                metadata_schema = DocumentService.DOCUMENT_METADATA_SCHEMA[args["doc_type"]]
+                for key, value in args["doc_metadata"].items():
+                    if key in metadata_schema and not isinstance(value, metadata_schema[key]):
+                        raise InvalidMetadataError(f"Invalid type for metadata field {key}")
+            # set to MetaDataConfig
+            args["metadata"] = {"doc_type": args["doc_type"], "doc_metadata": args["doc_metadata"]}
+
        text = args.get("text")
        name = args.get("name")
        if text is None or name is None:
@@ -107,6 +133,8 @@ class DocumentUpdateByTextApi(DatasetApiResource):
            "doc_language", type=str, default="English", required=False, nullable=False, location="json"
        )
        parser.add_argument("retrieval_model", type=dict, required=False, nullable=False, location="json")
+        parser.add_argument("doc_type", type=str, required=False, nullable=True, location="json")
+        parser.add_argument("doc_metadata", type=dict, required=False, nullable=True, location="json")
        args = parser.parse_args()
        dataset_id = str(dataset_id)
        tenant_id = str(tenant_id)
@@ -115,6 +143,32 @@ class DocumentUpdateByTextApi(DatasetApiResource):
        if not dataset:
            raise ValueError("Dataset is not exist.")

+        # indexing_technique is already set in dataset since this is an update
+        args["indexing_technique"] = dataset.indexing_technique
+
+        # Validate metadata if provided
+        if args.get("doc_type") or args.get("doc_metadata"):
+            if not args.get("doc_type") or not args.get("doc_metadata"):
+                raise InvalidMetadataError("Both doc_type and doc_metadata must be provided when adding metadata")
+
+            if args["doc_type"] not in DocumentService.DOCUMENT_METADATA_SCHEMA:
+                raise InvalidMetadataError(
+                    "Invalid doc_type. Must be one of: " + ", ".join(DocumentService.DOCUMENT_METADATA_SCHEMA.keys())
+                )
+
+            if not isinstance(args["doc_metadata"], dict):
+                raise InvalidMetadataError("doc_metadata must be a dictionary")
+
+            # Validate metadata schema based on doc_type
+            if args["doc_type"] != "others":
+                metadata_schema = DocumentService.DOCUMENT_METADATA_SCHEMA[args["doc_type"]]
+                for key, value in args["doc_metadata"].items():
+                    if key in metadata_schema and not isinstance(value, metadata_schema[key]):
+                        raise InvalidMetadataError(f"Invalid type for metadata field {key}")
+
+            # set to MetaDataConfig
+            args["metadata"] = {"doc_type": args["doc_type"], "doc_metadata": args["doc_metadata"]}
+
        if args["text"]:
            text = args.get("text")
            name = args.get("name")
@@ -161,6 +215,30 @@ class DocumentAddByFileApi(DatasetApiResource):
            args["doc_form"] = "text_model"
        if "doc_language" not in args:
            args["doc_language"] = "English"
+
+        # Validate metadata if provided
+        if args.get("doc_type") or args.get("doc_metadata"):
+            if not args.get("doc_type") or not args.get("doc_metadata"):
+                raise InvalidMetadataError("Both doc_type and doc_metadata must be provided when adding metadata")
+
+            if args["doc_type"] not in DocumentService.DOCUMENT_METADATA_SCHEMA:
+                raise InvalidMetadataError(
+                    "Invalid doc_type. Must be one of: " + ", ".join(DocumentService.DOCUMENT_METADATA_SCHEMA.keys())
+                )
+
+            if not isinstance(args["doc_metadata"], dict):
+                raise InvalidMetadataError("doc_metadata must be a dictionary")
+
+            # Validate metadata schema based on doc_type
+            if args["doc_type"] != "others":
+                metadata_schema = DocumentService.DOCUMENT_METADATA_SCHEMA[args["doc_type"]]
+                for key, value in args["doc_metadata"].items():
+                    if key in metadata_schema and not isinstance(value, metadata_schema[key]):
+                        raise InvalidMetadataError(f"Invalid type for metadata field {key}")
+
+            # set to MetaDataConfig
+            args["metadata"] = {"doc_type": args["doc_type"], "doc_metadata": args["doc_metadata"]}
+
        # get dataset info
        dataset_id = str(dataset_id)
        tenant_id = str(tenant_id)
@@ -228,6 +306,29 @@ class DocumentUpdateByFileApi(DatasetApiResource):
        if "doc_language" not in args:
            args["doc_language"] = "English"

+        # Validate metadata if provided
+        if args.get("doc_type") or args.get("doc_metadata"):
+            if not args.get("doc_type") or not args.get("doc_metadata"):
+                raise InvalidMetadataError("Both doc_type and doc_metadata must be provided when adding metadata")
+
+            if args["doc_type"] not in DocumentService.DOCUMENT_METADATA_SCHEMA:
+                raise InvalidMetadataError(
+                    "Invalid doc_type. Must be one of: " + ", ".join(DocumentService.DOCUMENT_METADATA_SCHEMA.keys())
+                )
+
+            if not isinstance(args["doc_metadata"], dict):
+                raise InvalidMetadataError("doc_metadata must be a dictionary")
+
+            # Validate metadata schema based on doc_type
+            if args["doc_type"] != "others":
+                metadata_schema = DocumentService.DOCUMENT_METADATA_SCHEMA[args["doc_type"]]
+                for key, value in args["doc_metadata"].items():
+                    if key in metadata_schema and not isinstance(value, metadata_schema[key]):
+                        raise InvalidMetadataError(f"Invalid type for metadata field {key}")
+
+            # set to MetaDataConfig
+            args["metadata"] = {"doc_type": args["doc_type"], "doc_metadata": args["doc_metadata"]}
+
        # get dataset info
        dataset_id = str(dataset_id)
        tenant_id = str(tenant_id)
--- a/api/controllers/web/init.py
+++ b/api/controllers/web/init.py
@@ -15,4 +15,17 @@ api.add_resource(FileApi, "/files/upload")
 api.add_resource(RemoteFileInfoApi, "/remote-files/<path:url>")
 api.add_resource(RemoteFileUploadApi, "/remote-files/upload")

-from . import app, audio, completion, conversation, feature, message, passport, saved_message, site, workflow
+from . import (
+    app,
+    audio,
+    completion,
+    conversation,
+    feature,
+    forgot_password,
+    login,
+    message,
+    passport,
+    saved_message,
+    site,
+    workflow,
+)
--- a/api/controllers/web/app.py
+++ b/api/controllers/web/app.py
@@ -1,12 +1,18 @@
-from flask_restful import marshal_with  # type: ignore
+
+from flask import request
+from flask_restful import Resource, marshal_with, reqparse  # type: ignore

 from controllers.common import fields
 from controllers.common import helpers as controller_helpers
 from controllers.web import api
 from controllers.web.error import AppUnavailableError
 from controllers.web.wraps import WebApiResource
+from libs.passport import PassportService
 from models.model import App, AppMode
 from services.app_service import AppService
+from services.enterprise.enterprise_service import EnterpriseService
+from services.feature_service import FeatureService
+from services.webapp_auth_service import WebAppAuthService


 class AppParameterApi(WebApiResource):
@@ -42,5 +48,65 @@ class AppMeta(WebApiResource):
        return AppService().get_app_meta(app_model)


+class AppAccessMode(Resource):
+    def get(self):
+        parser = reqparse.RequestParser()
+        parser.add_argument("appId", type=str, required=False, location="args")
+        parser.add_argument("appCode", type=str, required=False, location="args")
+        args = parser.parse_args()
+
+        features = FeatureService.get_system_features()
+        if not features.webapp_auth.enabled:
+            return {"accessMode": "public"}
+
+        app_id = args.get("appId")
+        if args.get("appCode"):
+            app_code = args["appCode"]
+            app_id = AppService.get_app_id_by_code(app_code)
+
+        if not app_id:
+            raise ValueError("appId or appCode must be provided")
+
+        res = EnterpriseService.WebAppAuth.get_app_access_mode_by_id(app_id)
+
+        return {"accessMode": res.access_mode}
+
+
+class AppWebAuthPermission(Resource):
+    def get(self):
+        user_id = "visitor"
+        try:
+            auth_header = request.headers.get("Authorization")
+            if auth_header is None:
+                raise
+            if " " not in auth_header:
+                raise
+
+            auth_scheme, tk = auth_header.split(None, 1)
+            auth_scheme = auth_scheme.lower()
+            if auth_scheme != "bearer":
+                raise
+
+            decoded = PassportService().verify(tk)
+            user_id = decoded.get("user_id", "visitor")
+        except Exception as e:
+            pass
+
+        parser = reqparse.RequestParser()
+        parser.add_argument("appId", type=str, required=True, location="args")
+        args = parser.parse_args()
+
+        app_id = args["appId"]
+        app_code = AppService.get_app_code_by_id(app_id)
+
+        res = True
+        if WebAppAuthService.is_app_require_permission_check(app_id=app_id):
+            res = EnterpriseService.WebAppAuth.is_user_allowed_to_access_webapp(str(user_id), app_code)
+        return {"result": res}
+
+
 api.add_resource(AppParameterApi, "/parameters")
 api.add_resource(AppMeta, "/meta")
+# webapp auth apis
+api.add_resource(AppAccessMode, "/webapp/access-mode")
+api.add_resource(AppWebAuthPermission, "/webapp/permission")
--- a/api/controllers/web/error.py
+++ b/api/controllers/web/error.py
@@ -121,9 +121,15 @@ class UnsupportedFileTypeError(BaseHTTPException):
    code = 415


-class WebSSOAuthRequiredError(BaseHTTPException):
+class WebAppAuthRequiredError(BaseHTTPException):
    error_code = "web_sso_auth_required"
-    description = "Web SSO authentication required."
+    description = "Web app authentication required."
+    code = 401
+
+
+class WebAppAuthAccessDeniedError(BaseHTTPException):
+    error_code = "web_app_access_denied"
+    description = "You do not have permission to access this web app."
    code = 401


--- a/api/controllers/web/forgot_password.py
+++ b/api/controllers/web/forgot_password.py
@@ -0,0 +1,147 @@
+import base64
+import secrets
+
+from flask import request
+from flask_restful import Resource, reqparse
+from sqlalchemy import select
+from sqlalchemy.orm import Session
+
+from controllers.console.auth.error import (
+    EmailCodeError,
+    EmailPasswordResetLimitError,
+    InvalidEmailError,
+    InvalidTokenError,
+    PasswordMismatchError,
+)
+from controllers.console.error import AccountNotFound, EmailSendIpLimitError
+from controllers.console.wraps import email_password_login_enabled, only_edition_enterprise, setup_required
+from controllers.web import api
+from extensions.ext_database import db
+from libs.helper import email, extract_remote_ip
+from libs.password import hash_password, valid_password
+from models.account import Account
+from services.account_service import AccountService
+
+
+class ForgotPasswordSendEmailApi(Resource):
+    @only_edition_enterprise
+    @setup_required
+    @email_password_login_enabled
+    def post(self):
+        parser = reqparse.RequestParser()
+        parser.add_argument("email", type=email, required=True, location="json")
+        parser.add_argument("language", type=str, required=False, location="json")
+        args = parser.parse_args()
+
+        ip_address = extract_remote_ip(request)
+        if AccountService.is_email_send_ip_limit(ip_address):
+            raise EmailSendIpLimitError()
+
+        if args["language"] is not None and args["language"] == "zh-Hans":
+            language = "zh-Hans"
+        else:
+            language = "en-US"
+
+        with Session(db.engine) as session:
+            account = session.execute(select(Account).filter_by(email=args["email"])).scalar_one_or_none()
+        token = None
+        if account is None:
+            raise AccountNotFound()
+        else:
+            token = AccountService.send_reset_password_email(account=account, email=args["email"], language=language)
+
+        return {"result": "success", "data": token}
+
+
+class ForgotPasswordCheckApi(Resource):
+    @only_edition_enterprise
+    @setup_required
+    @email_password_login_enabled
+    def post(self):
+        parser = reqparse.RequestParser()
+        parser.add_argument("email", type=str, required=True, location="json")
+        parser.add_argument("code", type=str, required=True, location="json")
+        parser.add_argument("token", type=str, required=True, nullable=False, location="json")
+        args = parser.parse_args()
+
+        user_email = args["email"]
+
+        is_forgot_password_error_rate_limit = AccountService.is_forgot_password_error_rate_limit(args["email"])
+        if is_forgot_password_error_rate_limit:
+            raise EmailPasswordResetLimitError()
+
+        token_data = AccountService.get_reset_password_data(args["token"])
+        if token_data is None:
+            raise InvalidTokenError()
+
+        if user_email != token_data.get("email"):
+            raise InvalidEmailError()
+
+        if args["code"] != token_data.get("code"):
+            AccountService.add_forgot_password_error_rate_limit(args["email"])
+            raise EmailCodeError()
+
+        # Verified, revoke the first token
+        AccountService.revoke_reset_password_token(args["token"])
+
+        # Refresh token data by generating a new token
+        _, new_token = AccountService.generate_reset_password_token(
+            user_email, code=args["code"], additional_data={"phase": "reset"}
+        )
+
+        AccountService.reset_forgot_password_error_rate_limit(args["email"])
+        return {"is_valid": True, "email": token_data.get("email"), "token": new_token}
+
+
+class ForgotPasswordResetApi(Resource):
+    @only_edition_enterprise
+    @setup_required
+    @email_password_login_enabled
+    def post(self):
+        parser = reqparse.RequestParser()
+        parser.add_argument("token", type=str, required=True, nullable=False, location="json")
+        parser.add_argument("new_password", type=valid_password, required=True, nullable=False, location="json")
+        parser.add_argument("password_confirm", type=valid_password, required=True, nullable=False, location="json")
+        args = parser.parse_args()
+
+        # Validate passwords match
+        if args["new_password"] != args["password_confirm"]:
+            raise PasswordMismatchError()
+
+        # Validate token and get reset data
+        reset_data = AccountService.get_reset_password_data(args["token"])
+        if not reset_data:
+            raise InvalidTokenError()
+        # Must use token in reset phase
+        if reset_data.get("phase", "") != "reset":
+            raise InvalidTokenError()
+
+        # Revoke token to prevent reuse
+        AccountService.revoke_reset_password_token(args["token"])
+
+        # Generate secure salt and hash password
+        salt = secrets.token_bytes(16)
+        password_hashed = hash_password(args["new_password"], salt)
+
+        email = reset_data.get("email", "")
+
+        with Session(db.engine) as session:
+            account = session.execute(select(Account).filter_by(email=email)).scalar_one_or_none()
+
+            if account:
+                self._update_existing_account(account, password_hashed, salt, session)
+            else:
+                raise AccountNotFound()
+
+        return {"result": "success"}
+
+    def _update_existing_account(self, account, password_hashed, salt, session):
+        # Update existing account credentials
+        account.password = base64.b64encode(password_hashed).decode()
+        account.password_salt = base64.b64encode(salt).decode()
+        session.commit()
+
+
+api.add_resource(ForgotPasswordSendEmailApi, "/forgot-password")
+api.add_resource(ForgotPasswordCheckApi, "/forgot-password/validity")
+api.add_resource(ForgotPasswordResetApi, "/forgot-password/resets")
--- a/api/controllers/web/login.py
+++ b/api/controllers/web/login.py
@@ -0,0 +1,109 @@
+import services
+from controllers.console.auth.error import (EmailCodeError,
+                                            EmailOrPasswordMismatchError,
+                                            InvalidEmailError)
+from controllers.console.error import AccountBannedError, AccountNotFound
+from controllers.console.wraps import only_edition_enterprise, setup_required
+from controllers.web import api
+from flask_restful import Resource, reqparse
+from jwt import InvalidTokenError  # type: ignore
+from libs.helper import email
+from libs.password import valid_password
+from services.account_service import AccountService
+from services.webapp_auth_service import WebAppAuthService
+
+
+class LoginApi(Resource):
+    """Resource for web app email/password login."""
+
+    @setup_required
+    @only_edition_enterprise
+    def post(self):
+        """Authenticate user and login."""
+        parser = reqparse.RequestParser()
+        parser.add_argument("email", type=email, required=True, location="json")
+        parser.add_argument("password", type=valid_password, required=True, location="json")
+        args = parser.parse_args()
+
+        try:
+            account = WebAppAuthService.authenticate(args["email"], args["password"])
+        except services.errors.account.AccountLoginError:
+            raise AccountBannedError()
+        except services.errors.account.AccountPasswordError:
+            raise EmailOrPasswordMismatchError()
+        except services.errors.account.AccountNotFoundError:
+            raise AccountNotFound()
+
+        token = WebAppAuthService.login(account=account)
+        return {"result": "success", "data": {"access_token": token}}
+
+
+# class LogoutApi(Resource):
+#     @setup_required
+#     def get(self):
+#         account = cast(Account, flask_login.current_user)
+#         if isinstance(account, flask_login.AnonymousUserMixin):
+#             return {"result": "success"}
+#         flask_login.logout_user()
+#         return {"result": "success"}
+
+
+class EmailCodeLoginSendEmailApi(Resource):
+    @setup_required
+    @only_edition_enterprise
+    def post(self):
+        parser = reqparse.RequestParser()
+        parser.add_argument("email", type=email, required=True, location="json")
+        parser.add_argument("language", type=str, required=False, location="json")
+        args = parser.parse_args()
+
+        if args["language"] is not None and args["language"] == "zh-Hans":
+            language = "zh-Hans"
+        else:
+            language = "en-US"
+
+        account = WebAppAuthService.get_user_through_email(args["email"])
+        if account is None:
+            raise AccountNotFound()
+        else:
+            token = WebAppAuthService.send_email_code_login_email(account=account, language=language)
+
+        return {"result": "success", "data": token}
+
+
+class EmailCodeLoginApi(Resource):
+    @setup_required
+    @only_edition_enterprise
+    def post(self):
+        parser = reqparse.RequestParser()
+        parser.add_argument("email", type=str, required=True, location="json")
+        parser.add_argument("code", type=str, required=True, location="json")
+        parser.add_argument("token", type=str, required=True, location="json")
+        args = parser.parse_args()
+
+        user_email = args["email"]
+
+        token_data = WebAppAuthService.get_email_code_login_data(args["token"])
+        if token_data is None:
+            raise InvalidTokenError()
+
+        if token_data["email"] != args["email"]:
+            raise InvalidEmailError()
+
+        if token_data["code"] != args["code"]:
+            raise EmailCodeError()
+
+        WebAppAuthService.revoke_email_code_login_token(args["token"])
+        account = WebAppAuthService.get_user_through_email(user_email)
+        if not account:
+            raise AccountNotFound()
+
+        token = WebAppAuthService.login(account=account)
+        AccountService.reset_login_error_rate_limit(args["email"])
+        return {"result": "success", "data": {"access_token": token}}
+
+
+api.add_resource(LoginApi, "/login")
+# api.add_resource(LogoutApi, "/logout")
+api.add_resource(EmailCodeLoginSendEmailApi, "/email-code-login")
+api.add_resource(EmailCodeLoginApi, "/email-code-login/validity")
--- a/api/controllers/web/message.py
+++ b/api/controllers/web/message.py
@@ -91,7 +91,7 @@ class MessageListApi(WebApiResource):

        try:
            return MessageService.pagination_by_first_id(
-                app_model, end_user, args["conversation_id"], args["first_id"], args["limit"], "desc"
+                app_model, end_user, args["conversation_id"], args["first_id"], args["limit"]
            )
        except services.errors.conversation.ConversationNotExistsError:
            raise NotFound("Conversation Not Exists.")
--- a/api/controllers/web/passport.py
+++ b/api/controllers/web/passport.py
@@ -1,16 +1,18 @@
 import uuid
+from datetime import UTC, datetime, timedelta

-from flask import request
-from flask_restful import Resource  # type: ignore
-from werkzeug.exceptions import NotFound, Unauthorized
-
+from configs import dify_config
 from controllers.web import api
-from controllers.web.error import WebSSOAuthRequiredError
+from controllers.web.error import WebAppAuthRequiredError
 from extensions.ext_database import db
+from flask import request
+from flask_restful import Resource
 from libs.passport import PassportService
 from models.model import App, EndUser, Site
 from services.enterprise.enterprise_service import EnterpriseService
 from services.feature_service import FeatureService
+from services.webapp_auth_service import WebAppAuthService, WebAppAuthType
+from werkzeug.exceptions import NotFound, Unauthorized


 class PassportResource(Resource):
@@ -19,13 +21,23 @@ class PassportResource(Resource):
    def get(self):
        system_features = FeatureService.get_system_features()
        app_code = request.headers.get("X-App-Code")
+        web_app_access_token = request.args.get("web_app_access_token")
+
        if app_code is None:
            raise Unauthorized("X-App-Code header is missing.")

-        if system_features.sso_enforced_for_web:
-            app_web_sso_enabled = EnterpriseService.get_app_web_sso_enabled(app_code).get("enabled", False)
-            if app_web_sso_enabled:
-                raise WebSSOAuthRequiredError()
+        # exchange token for enterprise logined web user
+        enterprise_user_decoded = decode_enterprise_webapp_user_id(web_app_access_token)
+        if enterprise_user_decoded:
+            # a web user has already logged in, exchange a token for this app without redirecting to the login page
+            return exchange_token_for_existing_web_user(
+                app_code=app_code, enterprise_user_decoded=enterprise_user_decoded
+            )
+
+        if system_features.webapp_auth.enabled:
+            app_settings = EnterpriseService.WebAppAuth.get_app_access_mode_by_code(app_code=app_code)
+            if not app_settings or not app_settings.access_mode == "public":
+                raise WebAppAuthRequiredError()

        # get site from db and check if it is normal
        site = db.session.query(Site).filter(Site.code == app_code, Site.status == "normal").first()
@@ -65,6 +77,128 @@ class PassportResource(Resource):
 api.add_resource(PassportResource, "/passport")


+def decode_enterprise_webapp_user_id(jwt_token: str | None):
+    """
+    Decode the enterprise user session from the Authorization header.
+    """
+    if not jwt_token:
+        return None
+
+    decoded = PassportService().verify(jwt_token)
+    source = decoded.get("token_source")
+    if not source or source != "webapp_login_token":
+        raise Unauthorized("Invalid token source. Expected 'webapp_login_token'.")
+    return decoded
+
+
+def exchange_token_for_existing_web_user(app_code: str, enterprise_user_decoded: dict):
+    """
+    Exchange a token for an existing web user session.
+    """
+    user_id = enterprise_user_decoded.get("user_id")
+    end_user_id = enterprise_user_decoded.get("end_user_id")
+    session_id = enterprise_user_decoded.get("session_id")
+    user_auth_type = enterprise_user_decoded.get("auth_type")
+    if not user_auth_type:
+        raise Unauthorized("Missing auth_type in the token.")
+
+    site = db.session.query(Site).filter(Site.code == app_code, Site.status == "normal").first()
+    if not site:
+        raise NotFound()
+
+    app_model = db.session.query(App).filter(App.id == site.app_id).first()
+    if not app_model or app_model.status != "normal" or not app_model.enable_site:
+        raise NotFound()
+
+    app_auth_type = WebAppAuthService.get_app_auth_type(app_code=app_code)
+
+    if app_auth_type == WebAppAuthType.PUBLIC:
+        return _exchange_for_public_app_token(app_model, site, enterprise_user_decoded)
+    elif app_auth_type == WebAppAuthType.EXTERNAL and user_auth_type != "external":
+        raise WebAppAuthRequiredError("Please login as external user.")
+    elif app_auth_type == WebAppAuthType.INTERNAL and user_auth_type != "internal":
+        raise WebAppAuthRequiredError("Please login as internal user.")
+
+    end_user = None
+    if end_user_id:
+        end_user = db.session.query(EndUser).filter(EndUser.id == end_user_id).first()
+    if session_id:
+        end_user = (
+            db.session.query(EndUser)
+            .filter(
+                EndUser.session_id == session_id,
+                EndUser.tenant_id == app_model.tenant_id,
+                EndUser.app_id == app_model.id,
+            )
+            .first()
+        )
+    if not end_user:
+        if not session_id:
+            raise NotFound("Missing session_id for existing web user.")
+        end_user = EndUser(
+            tenant_id=app_model.tenant_id,
+            app_id=app_model.id,
+            type="browser",
+            is_anonymous=True,
+            session_id=session_id,
+        )
+        db.session.add(end_user)
+        db.session.commit()
+    exp_dt = datetime.now(UTC) + timedelta(hours=dify_config.ACCESS_TOKEN_EXPIRE_MINUTES * 24)
+    exp = int(exp_dt.timestamp())
+    payload = {
+        "iss": site.id,
+        "sub": "Web API Passport",
+        "app_id": site.app_id,
+        "app_code": site.code,
+        "user_id": user_id,
+        "end_user_id": end_user.id,
+        "auth_type": user_auth_type,
+        "granted_at": int(datetime.now(UTC).timestamp()),
+        "token_source": "webapp",
+        "exp": exp,
+    }
+    token: str = PassportService().issue(payload)
+    return {
+        "access_token": token,
+    }
+
+
+def _exchange_for_public_app_token(app_model, site, token_decoded):
+    user_id = token_decoded.get("user_id")
+    end_user = None
+    if user_id:
+        end_user = db.session.query(EndUser).filter(
+            EndUser.app_id == app_model.id, EndUser.session_id == user_id
+        ).first()
+
+    if not end_user:
+        end_user = EndUser(
+            tenant_id=app_model.tenant_id,
+            app_id=app_model.id,
+            type="browser",
+            is_anonymous=True,
+            session_id=generate_session_id(),
+        )
+
+        db.session.add(end_user)
+        db.session.commit()
+
+    payload = {
+        "iss": site.app_id,
+        "sub": "Web API Passport",
+        "app_id": site.app_id,
+        "app_code": site.code,
+        "end_user_id": end_user.id,
+    }
+
+    tk = PassportService().issue(payload)
+
+    return {
+        "access_token": tk,
+    }
+
+
 def generate_session_id():
    """
    Generate a unique session ID.
--- a/api/controllers/web/wraps.py
+++ b/api/controllers/web/wraps.py
@@ -1,15 +1,18 @@
+from datetime import UTC, datetime
 from functools import wraps

+from controllers.web.error import (WebAppAuthAccessDeniedError,
+                                   WebAppAuthRequiredError)
+from extensions.ext_database import db
 from flask import request
 from flask_restful import Resource  # type: ignore
-from werkzeug.exceptions import BadRequest, NotFound, Unauthorized
-
-from controllers.web.error import WebSSOAuthRequiredError
-from extensions.ext_database import db
 from libs.passport import PassportService
 from models.model import App, EndUser, Site
-from services.enterprise.enterprise_service import EnterpriseService
+from services.enterprise.enterprise_service import (EnterpriseService,
+                                                    WebAppSettings)
 from services.feature_service import FeatureService
+from services.webapp_auth_service import WebAppAuthService
+from werkzeug.exceptions import BadRequest, NotFound, Unauthorized


 def validate_jwt_token(view=None):
@@ -45,7 +48,8 @@ def decode_jwt_token():
            raise Unauthorized("Invalid Authorization header format. Expected 'Bearer <api-key>' format.")
        decoded = PassportService().verify(tk)
        app_code = decoded.get("app_code")
-        app_model = db.session.query(App).filter(App.id == decoded["app_id"]).first()
+        app_id = decoded.get("app_id")
+        app_model = db.session.query(App).filter(App.id == app_id).first()
        site = db.session.query(Site).filter(Site.code == app_code).first()
        if not app_model:
            raise NotFound()
@@ -53,39 +57,90 @@ def decode_jwt_token():
            raise BadRequest("Site URL is no longer valid.")
        if app_model.enable_site is False:
            raise BadRequest("Site is disabled.")
-        end_user = db.session.query(EndUser).filter(EndUser.id == decoded["end_user_id"]).first()
+        end_user_id = decoded.get("end_user_id")
+        end_user = db.session.query(EndUser).filter(EndUser.id == end_user_id).first()
        if not end_user:
            raise NotFound()

-        _validate_web_sso_token(decoded, system_features, app_code)
+        # for enterprise webapp auth
+        app_web_auth_enabled = False
+        webapp_settings = None
+        if system_features.webapp_auth.enabled:
+            webapp_settings = EnterpriseService.WebAppAuth.get_app_access_mode_by_code(app_code=app_code)
+            if not webapp_settings:
+                raise NotFound("Web app settings not found.")
+            app_web_auth_enabled = webapp_settings.access_mode != "public"
+
+        _validate_webapp_token(decoded, app_web_auth_enabled, system_features.webapp_auth.enabled)
+        _validate_user_accessibility(
+            decoded, app_code, app_web_auth_enabled, system_features.webapp_auth.enabled, webapp_settings
+        )

        return app_model, end_user
    except Unauthorized as e:
-        if system_features.sso_enforced_for_web:
-            app_web_sso_enabled = EnterpriseService.get_app_web_sso_enabled(app_code).get("enabled", False)
-            if app_web_sso_enabled:
-                raise WebSSOAuthRequiredError()
+        if system_features.webapp_auth.enabled:
+            if not app_code:
+                raise Unauthorized("Please re-login to access the web app.")
+            app_web_auth_enabled = (
+                EnterpriseService.WebAppAuth.get_app_access_mode_by_code(app_code=app_code).access_mode != "public"
+            )
+            if app_web_auth_enabled:
+                raise WebAppAuthRequiredError()

        raise Unauthorized(e.description)


-def _validate_web_sso_token(decoded, system_features, app_code):
-    app_web_sso_enabled = False
-
-    # Check if SSO is enforced for web, and if the token source is not SSO, raise an error and redirect to SSO login
-    if system_features.sso_enforced_for_web:
-        app_web_sso_enabled = EnterpriseService.get_app_web_sso_enabled(app_code).get("enabled", False)
-        if app_web_sso_enabled:
-            source = decoded.get("token_source")
-            if not source or source != "sso":
-                raise WebSSOAuthRequiredError()
-
-    # Check if SSO is not enforced for web, and if the token source is SSO,
-    # raise an error and redirect to normal passport login
-    if not system_features.sso_enforced_for_web or not app_web_sso_enabled:
+def _validate_webapp_token(decoded, app_web_auth_enabled: bool, system_webapp_auth_enabled: bool):
+    # Check if authentication is enforced for web app, and if the token source is not webapp,
+    # raise an error and redirect to login
+    if system_webapp_auth_enabled and app_web_auth_enabled:
        source = decoded.get("token_source")
-        if source and source == "sso":
-            raise Unauthorized("sso token expired.")
+        if not source or source != "webapp":
+            raise WebAppAuthRequiredError()
+
+    # Check if authentication is not enforced for web, and if the token source is webapp,
+    # raise an error and redirect to normal passport login
+    if not system_webapp_auth_enabled or not app_web_auth_enabled:
+        source = decoded.get("token_source")
+        if source and source == "webapp":
+            raise Unauthorized("webapp token expired.")
+
+
+def _validate_user_accessibility(
+    decoded,
+    app_code,
+    app_web_auth_enabled: bool,
+    system_webapp_auth_enabled: bool,
+    webapp_settings: WebAppSettings | None,
+):
+    if system_webapp_auth_enabled and app_web_auth_enabled:
+        # Check if the user is allowed to access the web app
+        user_id = decoded.get("user_id")
+        if not user_id:
+            raise WebAppAuthRequiredError()
+
+        if not webapp_settings:
+            raise WebAppAuthRequiredError("Web app settings not found.")
+
+        if WebAppAuthService.is_app_require_permission_check(access_mode=webapp_settings.access_mode):
+            if not EnterpriseService.WebAppAuth.is_user_allowed_to_access_webapp(user_id, app_code=app_code):
+                raise WebAppAuthAccessDeniedError()
+
+        auth_type = decoded.get("auth_type")
+        granted_at = decoded.get("granted_at")
+        if not auth_type:
+            raise WebAppAuthAccessDeniedError("Missing auth_type in the token.")
+        if not granted_at:
+            raise WebAppAuthAccessDeniedError("Missing granted_at in the token.")
+        # check if sso has been updated
+        if auth_type == "external":
+            last_update_time = EnterpriseService.get_app_sso_settings_last_update_time()
+            if granted_at and datetime.fromtimestamp(granted_at, tz=UTC) < last_update_time:
+                raise WebAppAuthAccessDeniedError("SSO settings have been updated. Please re-login.")
+        elif auth_type == "internal":
+            last_update_time = EnterpriseService.get_workspace_sso_settings_last_update_time()
+            if granted_at and datetime.fromtimestamp(granted_at, tz=UTC) < last_update_time:
+                raise WebAppAuthAccessDeniedError("SSO settings have been updated. Please re-login.")


 class WebApiResource(Resource):
--- a/api/core/agent/cot_agent_runner.py
+++ b/api/core/agent/cot_agent_runner.py
@@ -104,7 +104,6 @@ class CotAgentRunner(BaseAgentRunner, ABC):

            # recalc llm max tokens
            prompt_messages = self._organize_prompt_messages()
-            self.recalc_llm_max_tokens(self.model_config, prompt_messages)
            # invoke model
            chunks = model_instance.invoke_llm(
                prompt_messages=prompt_messages,
--- a/api/core/agent/fc_agent_runner.py
+++ b/api/core/agent/fc_agent_runner.py
@@ -84,7 +84,6 @@ class FunctionCallAgentRunner(BaseAgentRunner):

            # recalc llm max tokens
            prompt_messages = self._organize_prompt_messages()
-            self.recalc_llm_max_tokens(self.model_config, prompt_messages)
            # invoke model
            chunks: Union[Generator[LLMResultChunk, None, None], LLMResult] = model_instance.invoke_llm(
                prompt_messages=prompt_messages,
--- a/api/core/app/apps/agent_chat/app_runner.py
+++ b/api/core/app/apps/agent_chat/app_runner.py
@@ -55,20 +55,6 @@ class AgentChatAppRunner(AppRunner):
        query = application_generate_entity.query
        files = application_generate_entity.files

-        # Pre-calculate the number of tokens of the prompt messages,
-        # and return the rest number of tokens by model context token size limit and max token size limit.
-        # If the rest number of tokens is not enough, raise exception.
-        # Include: prompt template, inputs, query(optional), files(optional)
-        # Not Include: memory, external data, dataset context
-        self.get_pre_calculate_rest_tokens(
-            app_record=app_record,
-            model_config=application_generate_entity.model_conf,
-            prompt_template_entity=app_config.prompt_template,
-            inputs=inputs,
-            files=files,
-            query=query,
-        )
-
        memory = None
        if application_generate_entity.conversation_id:
            # get memory of conversation (read-only)
@@ -202,7 +188,7 @@ class AgentChatAppRunner(AppRunner):
        # change function call strategy based on LLM model
        llm_model = cast(LargeLanguageModel, model_instance.model_type_instance)
        model_schema = llm_model.get_model_schema(model_instance.model, model_instance.credentials)
-        if not model_schema or not model_schema.features:
+        if not model_schema:
            raise ValueError("Model schema not found")

        if {ModelFeature.MULTI_TOOL_CALL, ModelFeature.TOOL_CALL}.intersection(model_schema.features or []):
--- a/api/core/app/apps/base_app_runner.py
+++ b/api/core/app/apps/base_app_runner.py
@@ -15,10 +15,8 @@ from core.app.features.annotation_reply.annotation_reply import AnnotationReplyF
 from core.app.features.hosting_moderation.hosting_moderation import HostingModerationFeature
 from core.external_data_tool.external_data_fetch import ExternalDataFetch
 from core.memory.token_buffer_memory import TokenBufferMemory
-from core.model_manager import ModelInstance
 from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
 from core.model_runtime.entities.message_entities import AssistantPromptMessage, PromptMessage
-from core.model_runtime.entities.model_entities import ModelPropertyKey
 from core.model_runtime.errors.invoke import InvokeBadRequestError
 from core.moderation.input_moderation import InputModeration
 from core.prompt.advanced_prompt_transform import AdvancedPromptTransform
@@ -31,106 +29,6 @@ if TYPE_CHECKING:


 class AppRunner:
-    def get_pre_calculate_rest_tokens(
-        self,
-        app_record: App,
-        model_config: ModelConfigWithCredentialsEntity,
-        prompt_template_entity: PromptTemplateEntity,
-        inputs: Mapping[str, str],
-        files: Sequence["File"],
-        query: Optional[str] = None,
-    ) -> int:
-        """
-        Get pre calculate rest tokens
-        :param app_record: app record
-        :param model_config: model config entity
-        :param prompt_template_entity: prompt template entity
-        :param inputs: inputs
-        :param files: files
-        :param query: query
-        :return:
-        """
-        # Invoke model
-        model_instance = ModelInstance(
-            provider_model_bundle=model_config.provider_model_bundle, model=model_config.model
-        )
-
-        model_context_tokens = model_config.model_schema.model_properties.get(ModelPropertyKey.CONTEXT_SIZE)
-
-        max_tokens = 0
-        for parameter_rule in model_config.model_schema.parameter_rules:
-            if parameter_rule.name == "max_tokens" or (
-                parameter_rule.use_template and parameter_rule.use_template == "max_tokens"
-            ):
-                max_tokens = (
-                    model_config.parameters.get(parameter_rule.name)
-                    or model_config.parameters.get(parameter_rule.use_template or "")
-                ) or 0
-
-        if model_context_tokens is None:
-            return -1
-
-        if max_tokens is None:
-            max_tokens = 0
-
-        # get prompt messages without memory and context
-        prompt_messages, stop = self.organize_prompt_messages(
-            app_record=app_record,
-            model_config=model_config,
-            prompt_template_entity=prompt_template_entity,
-            inputs=inputs,
-            files=files,
-            query=query,
-        )
-
-        prompt_tokens = model_instance.get_llm_num_tokens(prompt_messages)
-
-        rest_tokens: int = model_context_tokens - max_tokens - prompt_tokens
-        if rest_tokens < 0:
-            raise InvokeBadRequestError(
-                "Query or prefix prompt is too long, you can reduce the prefix prompt, "
-                "or shrink the max token, or switch to a llm with a larger token limit size."
-            )
-
-        return rest_tokens
-
-    def recalc_llm_max_tokens(
-        self, model_config: ModelConfigWithCredentialsEntity, prompt_messages: list[PromptMessage]
-    ):
-        # recalc max_tokens if sum(prompt_token +  max_tokens) over model token limit
-        model_instance = ModelInstance(
-            provider_model_bundle=model_config.provider_model_bundle, model=model_config.model
-        )
-
-        model_context_tokens = model_config.model_schema.model_properties.get(ModelPropertyKey.CONTEXT_SIZE)
-
-        max_tokens = 0
-        for parameter_rule in model_config.model_schema.parameter_rules:
-            if parameter_rule.name == "max_tokens" or (
-                parameter_rule.use_template and parameter_rule.use_template == "max_tokens"
-            ):
-                max_tokens = (
-                    model_config.parameters.get(parameter_rule.name)
-                    or model_config.parameters.get(parameter_rule.use_template or "")
-                ) or 0
-
-        if model_context_tokens is None:
-            return -1
-
-        if max_tokens is None:
-            max_tokens = 0
-
-        prompt_tokens = model_instance.get_llm_num_tokens(prompt_messages)
-
-        if prompt_tokens + max_tokens > model_context_tokens:
-            max_tokens = max(model_context_tokens - prompt_tokens, 16)
-
-            for parameter_rule in model_config.model_schema.parameter_rules:
-                if parameter_rule.name == "max_tokens" or (
-                    parameter_rule.use_template and parameter_rule.use_template == "max_tokens"
-                ):
-                    model_config.parameters[parameter_rule.name] = max_tokens
-
    def organize_prompt_messages(
        self,
        app_record: App,
--- a/api/core/app/apps/chat/app_runner.py
+++ b/api/core/app/apps/chat/app_runner.py
@@ -50,20 +50,6 @@ class ChatAppRunner(AppRunner):
        query = application_generate_entity.query
        files = application_generate_entity.files

-        # Pre-calculate the number of tokens of the prompt messages,
-        # and return the rest number of tokens by model context token size limit and max token size limit.
-        # If the rest number of tokens is not enough, raise exception.
-        # Include: prompt template, inputs, query(optional), files(optional)
-        # Not Include: memory, external data, dataset context
-        self.get_pre_calculate_rest_tokens(
-            app_record=app_record,
-            model_config=application_generate_entity.model_conf,
-            prompt_template_entity=app_config.prompt_template,
-            inputs=inputs,
-            files=files,
-            query=query,
-        )
-
        memory = None
        if application_generate_entity.conversation_id:
            # get memory of conversation (read-only)
@@ -194,9 +180,6 @@ class ChatAppRunner(AppRunner):
        if hosting_moderation_result:
            return

-        # Re-calculate the max tokens if sum(prompt_token +  max_tokens) over model token limit
-        self.recalc_llm_max_tokens(model_config=application_generate_entity.model_conf, prompt_messages=prompt_messages)
-
        # Invoke model
        model_instance = ModelInstance(
            provider_model_bundle=application_generate_entity.model_conf.provider_model_bundle,
--- a/api/core/app/apps/completion/app_runner.py
+++ b/api/core/app/apps/completion/app_runner.py
@@ -43,20 +43,6 @@ class CompletionAppRunner(AppRunner):
        query = application_generate_entity.query
        files = application_generate_entity.files

-        # Pre-calculate the number of tokens of the prompt messages,
-        # and return the rest number of tokens by model context token size limit and max token size limit.
-        # If the rest number of tokens is not enough, raise exception.
-        # Include: prompt template, inputs, query(optional), files(optional)
-        # Not Include: memory, external data, dataset context
-        self.get_pre_calculate_rest_tokens(
-            app_record=app_record,
-            model_config=application_generate_entity.model_conf,
-            prompt_template_entity=app_config.prompt_template,
-            inputs=inputs,
-            files=files,
-            query=query,
-        )
-
        # organize all inputs and template to prompt messages
        # Include: prompt template, inputs, query(optional), files(optional)
        prompt_messages, stop = self.organize_prompt_messages(
@@ -152,9 +138,6 @@ class CompletionAppRunner(AppRunner):
        if hosting_moderation_result:
            return

-        # Re-calculate the max tokens if sum(prompt_token +  max_tokens) over model token limit
-        self.recalc_llm_max_tokens(model_config=application_generate_entity.model_conf, prompt_messages=prompt_messages)
-
        # Invoke model
        model_instance = ModelInstance(
            provider_model_bundle=application_generate_entity.model_conf.provider_model_bundle,
--- a/api/core/helper/ssrf_proxy.py
+++ b/api/core/helper/ssrf_proxy.py
@@ -11,15 +11,6 @@ from configs import dify_config

 SSRF_DEFAULT_MAX_RETRIES = dify_config.SSRF_DEFAULT_MAX_RETRIES

-proxy_mounts = (
-    {
-        "http://": httpx.HTTPTransport(proxy=dify_config.SSRF_PROXY_HTTP_URL),
-        "https://": httpx.HTTPTransport(proxy=dify_config.SSRF_PROXY_HTTPS_URL),
-    }
-    if dify_config.SSRF_PROXY_HTTP_URL and dify_config.SSRF_PROXY_HTTPS_URL
-    else None
-)
-
 BACKOFF_FACTOR = 0.5
 STATUS_FORCELIST = [429, 500, 502, 503, 504]

@@ -51,7 +42,11 @@ def make_request(method, url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
            if dify_config.SSRF_PROXY_ALL_URL:
                with httpx.Client(proxy=dify_config.SSRF_PROXY_ALL_URL) as client:
                    response = client.request(method=method, url=url, **kwargs)
-            elif proxy_mounts:
+            elif dify_config.SSRF_PROXY_HTTP_URL and dify_config.SSRF_PROXY_HTTPS_URL:
+                proxy_mounts = {
+                    "http://": httpx.HTTPTransport(proxy=dify_config.SSRF_PROXY_HTTP_URL),
+                    "https://": httpx.HTTPTransport(proxy=dify_config.SSRF_PROXY_HTTPS_URL),
+                }
                with httpx.Client(mounts=proxy_mounts) as client:
                    response = client.request(method=method, url=url, **kwargs)
            else:
--- a/api/core/memory/token_buffer_memory.py
+++ b/api/core/memory/token_buffer_memory.py
@@ -26,7 +26,7 @@ class TokenBufferMemory:
        self.model_instance = model_instance

    def get_history_prompt_messages(
-        self, max_token_limit: int = 2000, message_limit: Optional[int] = None
+        self, max_token_limit: int = 100000, message_limit: Optional[int] = None
    ) -> Sequence[PromptMessage]:
        """
        Get history prompt messages.
--- a/api/core/model_runtime/entities/init.py
+++ b/api/core/model_runtime/entities/init.py
@@ -1,4 +1,4 @@
-from .llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
+from .llm_entities import LLMMode, LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
 from .message_entities import (
    AssistantPromptMessage,
    AudioPromptMessageContent,
@@ -23,6 +23,7 @@ __all__ = [
    "AudioPromptMessageContent",
    "DocumentPromptMessageContent",
    "ImagePromptMessageContent",
+    "LLMMode",
    "LLMResult",
    "LLMResultChunk",
    "LLMResultChunkDelta",
--- a/api/core/model_runtime/entities/llm_entities.py
+++ b/api/core/model_runtime/entities/llm_entities.py
@@ -1,5 +1,5 @@
 from decimal import Decimal
-from enum import Enum
+from enum import StrEnum
 from typing import Optional

 from pydantic import BaseModel
@@ -8,7 +8,7 @@ from core.model_runtime.entities.message_entities import AssistantPromptMessage,
 from core.model_runtime.entities.model_entities import ModelUsage, PriceInfo


-class LLMMode(Enum):
+class LLMMode(StrEnum):
    """
    Enum class for large language model mode.
    """
--- a/api/core/model_runtime/model_providers/__base/ai_model.py
+++ b/api/core/model_runtime/model_providers/__base/ai_model.py
@@ -221,13 +221,12 @@ class AIModel(ABC):
        :param credentials: model credentials
        :return: model schema
        """
-        # get predefined models (predefined_models)
-        models = self.predefined_models()
-
-        model_map = {model.model: model for model in models}
-        if model in model_map:
-            return model_map[model]
+        # Try to get model schema from predefined models
+        for predefined_model in self.predefined_models():
+            if model == predefined_model.model:
+                return predefined_model

+        # Try to get model schema from credentials
        if credentials:
            model_schema = self.get_customizable_model_schema_from_credentials(model, credentials)
            if model_schema:
--- a/api/core/model_runtime/model_providers/__base/large_language_model.py
+++ b/api/core/model_runtime/model_providers/__base/large_language_model.py
@@ -30,6 +30,11 @@ from core.model_runtime.model_providers.__base.ai_model import AIModel

 logger = logging.getLogger(__name__)

+HTML_THINKING_TAG = (
+    '<details style="color:gray;background-color: #f8f8f8;padding: 8px;border-radius: 4px;" open> '
+    "<summary> Thinking... </summary>"
+)
+

 class LargeLanguageModel(AIModel):
    """
@@ -400,6 +405,40 @@ if you are not sure about the structure.
                    ),
                )

+    def _wrap_thinking_by_reasoning_content(self, delta: dict, is_reasoning: bool) -> tuple[str, bool]:
+        """
+        If the reasoning response is from delta.get("reasoning_content"), we wrap
+        it with HTML details tag.
+
+        :param delta: delta dictionary from LLM streaming response
+        :param is_reasoning: is reasoning
+        :return: tuple of (processed_content, is_reasoning)
+        """
+
+        content = delta.get("content") or ""
+        reasoning_content = delta.get("reasoning_content")
+
+        if reasoning_content:
+            if not is_reasoning:
+                content = HTML_THINKING_TAG + reasoning_content
+                is_reasoning = True
+            else:
+                content = reasoning_content
+        elif is_reasoning:
+            content = "</details>" + content
+            is_reasoning = False
+        return content, is_reasoning
+
+    def _wrap_thinking_by_tag(self, content: str) -> str:
+        """
+        if the reasoning response is a <think>...</think> block from delta.get("content"),
+        we replace <think> to <detail>.
+
+        :param content: delta.get("content")
+        :return: processed_content
+        """
+        return content.replace("<think>", HTML_THINKING_TAG).replace("</think>", "</details>")
+
    def _invoke_result_generator(
        self,
        model: str,
--- a/api/core/model_runtime/model_providers/_position.yaml
+++ b/api/core/model_runtime/model_providers/_position.yaml
@@ -1,4 +1,5 @@
 - openai
+- deepseek
 - anthropic
 - azure_openai
 - google
@@ -32,7 +33,6 @@
 - localai
 - volcengine_maas
 - openai_api_compatible
- deepseek
 - hunyuan
 - siliconflow
 - perfxcloud
--- a/api/core/model_runtime/model_providers/azure_ai_studio/azure_ai_studio.yaml
+++ b/api/core/model_runtime/model_providers/azure_ai_studio/azure_ai_studio.yaml
@@ -51,6 +51,40 @@ model_credential_schema:
      show_on:
        - variable: __model_type
          value: llm
+    - variable: mode
+      show_on:
+        - variable: __model_type
+          value: llm
+      label:
+        en_US: Completion mode
+      type: select
+      required: false
+      default: chat
+      placeholder:
+        zh_Hans: 选择对话类型
+        en_US: Select completion mode
+      options:
+        - value: completion
+          label:
+            en_US: Completion
+            zh_Hans: 补全
+        - value: chat
+          label:
+            en_US: Chat
+            zh_Hans: 对话
+    - variable: context_size
+      label:
+        zh_Hans: 模型上下文长度
+        en_US: Model context size
+      required: true
+      show_on:
+        - variable: __model_type
+          value: llm
+      type: text-input
+      default: "4096"
+      placeholder:
+        zh_Hans: 在此输入您的模型上下文长度
+        en_US: Enter your Model context size
    - variable: jwt_token
      required: true
      label:
--- a/api/core/model_runtime/model_providers/azure_ai_studio/llm/llm.py
+++ b/api/core/model_runtime/model_providers/azure_ai_studio/llm/llm.py
@@ -1,9 +1,9 @@
 import logging
-from collections.abc import Generator
+from collections.abc import Generator, Sequence
 from typing import Any, Optional, Union

 from azure.ai.inference import ChatCompletionsClient
-from azure.ai.inference.models import StreamingChatCompletionsUpdate
+from azure.ai.inference.models import StreamingChatCompletionsUpdate, SystemMessage, UserMessage
 from azure.core.credentials import AzureKeyCredential
 from azure.core.exceptions import (
    ClientAuthenticationError,
@@ -20,7 +20,7 @@ from azure.core.exceptions import (
 )

 from core.model_runtime.callbacks.base_callback import Callback
-from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
+from core.model_runtime.entities.llm_entities import LLMMode, LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
 from core.model_runtime.entities.message_entities import (
    AssistantPromptMessage,
    PromptMessage,
@@ -30,6 +30,7 @@ from core.model_runtime.entities.model_entities import (
    AIModelEntity,
    FetchFrom,
    I18nObject,
+    ModelPropertyKey,
    ModelType,
    ParameterRule,
    ParameterType,
@@ -60,10 +61,10 @@ class AzureAIStudioLargeLanguageModel(LargeLanguageModel):
        self,
        model: str,
        credentials: dict,
-        prompt_messages: list[PromptMessage],
+        prompt_messages: Sequence[PromptMessage],
        model_parameters: dict,
-        tools: Optional[list[PromptMessageTool]] = None,
-        stop: Optional[list[str]] = None,
+        tools: Optional[Sequence[PromptMessageTool]] = None,
+        stop: Optional[Sequence[str]] = None,
        stream: bool = True,
        user: Optional[str] = None,
    ) -> Union[LLMResult, Generator]:
@@ -82,8 +83,8 @@ class AzureAIStudioLargeLanguageModel(LargeLanguageModel):
        """

        if not self.client:
-            endpoint = credentials.get("endpoint")
-            api_key = credentials.get("api_key")
+            endpoint = str(credentials.get("endpoint"))
+            api_key = str(credentials.get("api_key"))
            self.client = ChatCompletionsClient(endpoint=endpoint, credential=AzureKeyCredential(api_key))

        messages = [{"role": msg.role.value, "content": msg.content} for msg in prompt_messages]
@@ -94,6 +95,7 @@ class AzureAIStudioLargeLanguageModel(LargeLanguageModel):
            "temperature": model_parameters.get("temperature", 0),
            "top_p": model_parameters.get("top_p", 1),
            "stream": stream,
+            "model": model,
        }

        if stop:
@@ -255,10 +257,16 @@ class AzureAIStudioLargeLanguageModel(LargeLanguageModel):
        :return:
        """
        try:
-            endpoint = credentials.get("endpoint")
-            api_key = credentials.get("api_key")
+            endpoint = str(credentials.get("endpoint"))
+            api_key = str(credentials.get("api_key"))
            client = ChatCompletionsClient(endpoint=endpoint, credential=AzureKeyCredential(api_key))
-            client.get_model_info()
+            client.complete(
+                messages=[
+                    SystemMessage(content="I say 'ping', you say 'pong'"),
+                    UserMessage(content="ping"),
+                ],
+                model=model,
+            )
        except Exception as ex:
            raise CredentialsValidateFailedError(str(ex))

@@ -327,7 +335,10 @@ class AzureAIStudioLargeLanguageModel(LargeLanguageModel):
            fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
            model_type=ModelType.LLM,
            features=[],
-            model_properties={},
+            model_properties={
+                ModelPropertyKey.CONTEXT_SIZE: int(credentials.get("context_size", "4096")),
+                ModelPropertyKey.MODE: credentials.get("mode", LLMMode.CHAT),
+            },
            parameter_rules=rules,
        )

--- a/api/core/model_runtime/model_providers/azure_openai/azure_openai.yaml
+++ b/api/core/model_runtime/model_providers/azure_openai/azure_openai.yaml
@@ -53,6 +53,9 @@ model_credential_schema:
      type: select
      required: true
      options:
+        - label:
+            en_US: 2024-12-01-preview
+          value: 2024-12-01-preview
        - label:
            en_US: 2024-10-01-preview
          value: 2024-10-01-preview
@@ -135,6 +138,18 @@ model_credential_schema:
          show_on:
            - variable: __model_type
              value: llm
+        - label:
+            en_US: o3-mini
+          value: o3-mini
+          show_on:
+            - variable: __model_type
+              value: llm
+        - label:
+            en_US: o3-mini-2025-01-31
+          value: o3-mini-2025-01-31
+          show_on:
+            - variable: __model_type
+              value: llm
        - label:
            en_US: o1-preview
          value: o1-preview
--- a/api/core/model_runtime/model_providers/bedrock/bedrock.yaml
+++ b/api/core/model_runtime/model_providers/bedrock/bedrock.yaml
@@ -44,6 +44,7 @@ provider_credential_schema:
      label:
        en_US: AWS Region
        zh_Hans: AWS 地区
+        ja_JP: AWS リージョン
      type: select
      default: us-east-1
      options:
@@ -51,62 +52,86 @@ provider_credential_schema:
          label:
            en_US: US East (N. Virginia)
            zh_Hans: 美国东部 (弗吉尼亚北部)
+            ja_JP: 米国 (バージニア北部)
        - value: us-east-2
          label:
            en_US: US East (Ohio)
-            zh_Hans: 美国东部 (弗吉尼亚北部)
+            zh_Hans: 美国东部 (俄亥俄)
+            ja_JP: 米国 (オハイオ)
        - value: us-west-2
          label:
            en_US: US West (Oregon)
            zh_Hans: 美国西部 (俄勒冈州)
+            ja_JP: 米国 (オレゴン)
        - value: ap-south-1
          label:
            en_US: Asia Pacific (Mumbai)
            zh_Hans: 亚太地区（孟买）
+            ja_JP: アジアパシフィック (ムンバイ)
        - value: ap-southeast-1
          label:
            en_US: Asia Pacific (Singapore)
            zh_Hans: 亚太地区 (新加坡)
+            ja_JP: アジアパシフィック (シンガポール)
        - value: ap-southeast-2
          label:
            en_US: Asia Pacific (Sydney)
            zh_Hans: 亚太地区 (悉尼)
+            ja_JP: アジアパシフィック (シドニー)
        - value: ap-northeast-1
          label:
            en_US: Asia Pacific (Tokyo)
            zh_Hans: 亚太地区 (东京)
+            ja_JP: アジアパシフィック (東京)
        - value: ap-northeast-2
          label:
            en_US: Asia Pacific (Seoul)
            zh_Hans: 亚太地区（首尔）
+            ja_JP: アジアパシフィック (ソウル)
        - value: ca-central-1
          label:
            en_US: Canada (Central)
            zh_Hans: 加拿大（中部）
+            ja_JP: カナダ (中部)
        - value: eu-central-1
          label:
            en_US: Europe (Frankfurt)
            zh_Hans: 欧洲 (法兰克福)
+            ja_JP: 欧州 (フランクフルト)
        - value: eu-west-1
          label:
            en_US: Europe (Ireland)
            zh_Hans: 欧洲（爱尔兰）
+            ja_JP: 欧州 (アイルランド)
        - value: eu-west-2
          label:
            en_US: Europe (London)
            zh_Hans: 欧洲西部 (伦敦)
+            ja_JP: 欧州 (ロンドン)
        - value: eu-west-3
          label:
            en_US: Europe (Paris)
            zh_Hans: 欧洲（巴黎）
+            ja_JP: 欧州 (パリ)
        - value: sa-east-1
          label:
            en_US: South America (São Paulo)
            zh_Hans: 南美洲（圣保罗）
+            ja_JP: 南米 (サンパウロ)
        - value: us-gov-west-1
          label:
            en_US: AWS GovCloud (US-West)
            zh_Hans: AWS GovCloud (US-West)
+            ja_JP: AWS GovCloud (米国西部)
+    - variable: bedrock_endpoint_url
+      label:
+        zh_Hans: Bedrock Endpoint URL
+        en_US: Bedrock Endpoint URL
+      type: text-input
+      required: false
+      placeholder:
+        zh_Hans: 在此输入您的 Bedrock Endpoint URL, 如：https://123456.cloudfront.net
+        en_US: Enter your Bedrock Endpoint URL, e.g. https://123456.cloudfront.net
    - variable: model_for_validation
      required: false
      label:
--- a/api/core/model_runtime/model_providers/bedrock/get_bedrock_client.py
+++ b/api/core/model_runtime/model_providers/bedrock/get_bedrock_client.py
@@ -13,6 +13,7 @@ def get_bedrock_client(service_name: str, credentials: Mapping[str, str]):
    client_config = Config(region_name=region_name)
    aws_access_key_id = credentials.get("aws_access_key_id")
    aws_secret_access_key = credentials.get("aws_secret_access_key")
+    bedrock_endpoint_url = credentials.get("bedrock_endpoint_url")

    if aws_access_key_id and aws_secret_access_key:
        # use aksk to call bedrock
@@ -21,6 +22,7 @@ def get_bedrock_client(service_name: str, credentials: Mapping[str, str]):
            config=client_config,
            aws_access_key_id=aws_access_key_id,
            aws_secret_access_key=aws_secret_access_key,
+            **({"endpoint_url": bedrock_endpoint_url} if bedrock_endpoint_url else {}),
        )
    else:
        # use iam without aksk to call
--- a/api/core/model_runtime/model_providers/bedrock/llm/eu.anthropic.claude-3.7-sonnet-v1.yaml
+++ b/api/core/model_runtime/model_providers/bedrock/llm/eu.anthropic.claude-3.7-sonnet-v1.yaml
@@ -0,0 +1,115 @@
+model: us.anthropic.claude-3-7-sonnet-20250219-v1:0
+label:
+  en_US: Claude 3.7 Sonnet(US.Cross Region Inference)
+icon: icon_s_en.svg
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 200000
+# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
+parameter_rules:
+  - name: enable_cache
+    label:
+      zh_Hans: 启用提示缓存
+      en_US: Enable Prompt Cache
+    type: boolean
+    required: false
+    default: true
+    help:
+      zh_Hans: 启用提示缓存可以提高性能并降低成本。Claude 3.7 Sonnet支持在system、messages和tools字段中使用缓存检查点。
+      en_US: Enable prompt caching to improve performance and reduce costs. Claude 3.7 Sonnet supports cache checkpoints in system, messages, and tools fields.
+  - name: reasoning_type
+    label:
+      zh_Hans: 推理配置
+      en_US: Reasoning Type
+    type: boolean
+    required: false
+    default: false
+    placeholder:
+      zh_Hans: 设置推理配置
+      en_US: Set reasoning configuration
+    help:
+      zh_Hans: 控制模型的推理能力。启用时，temperature将固定为1且top_p将被禁用。
+      en_US: Controls the model's reasoning capability. When enabled, temperature will be fixed to 1 and top_p will be disabled.
+  - name: reasoning_budget
+    show_on:
+      - variable: reasoning_type
+        value: true
+    label:
+      zh_Hans: 推理预算
+      en_US: Reasoning Budget
+    type: int
+    default: 1024
+    min: 0
+    max: 128000
+    help:
+      zh_Hans: 推理的预算限制（最小1024），必须小于max_tokens。仅在推理类型为enabled时可用。
+      en_US: Budget limit for reasoning (minimum 1024), must be less than max_tokens. Only available when reasoning type is enabled.
+
+  - name: max_tokens
+    use_template: max_tokens
+    required: true
+    label:
+      zh_Hans: 最大token数
+      en_US: Max Tokens
+    type: int
+    default: 8192
+    min: 1
+    max: 128000
+    help:
+      zh_Hans: 停止前生成的最大令牌数。请注意，Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
+      en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
+  - name: temperature
+    use_template: temperature
+    required: false
+    label:
+      zh_Hans: 模型温度
+      en_US: Model Temperature
+    type: float
+    default: 1
+    min: 0.0
+    max: 1.0
+    help:
+      zh_Hans: 生成内容的随机性。当推理功能启用时，该值将被固定为1。
+      en_US: The amount of randomness injected into the response. When reasoning is enabled, this value will be fixed to 1.
+  - name: top_p
+    show_on:
+      - variable: reasoning_type
+        value: disabled
+    use_template: top_p
+    label:
+      zh_Hans: Top P
+      en_US: Top P
+    required: false
+    type: float
+    default: 0.999
+    min: 0.000
+    max: 1.000
+    help:
+      zh_Hans: 在核采样中的概率阈值。当推理功能启用时，该参数将被禁用。
+      en_US: The probability threshold in nucleus sampling. When reasoning is enabled, this parameter will be disabled.
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    required: false
+    type: int
+    default: 0
+    min: 0
+    # tip docs from aws has error, max value is 500
+    max: 500
+    help:
+      zh_Hans: 对于每个后续标记，仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
+      en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.003'
+  output: '0.015'
+  unit: '0.001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/bedrock/llm/llm.py
+++ b/api/core/model_runtime/model_providers/bedrock/llm/llm.py
@@ -58,6 +58,7 @@ class BedrockLargeLanguageModel(LargeLanguageModel):
    # TODO There is invoke issue: context limit on Cohere Model, will add them after fixed.
    CONVERSE_API_ENABLED_MODEL_INFO = [
        {"prefix": "anthropic.claude-v2", "support_system_prompts": True, "support_tool_use": False},
+        {"prefix": "us.deepseek", "support_system_prompts": True, "support_tool_use": False},
        {"prefix": "anthropic.claude-v1", "support_system_prompts": True, "support_tool_use": False},
        {"prefix": "us.anthropic.claude-3", "support_system_prompts": True, "support_tool_use": True},
        {"prefix": "eu.anthropic.claude-3", "support_system_prompts": True, "support_tool_use": True},
--- a/api/core/model_runtime/model_providers/bedrock/llm/us.deepseek-r1.yaml
+++ b/api/core/model_runtime/model_providers/bedrock/llm/us.deepseek-r1.yaml
@@ -0,0 +1,63 @@
+model: us.deepseek.r1-v1:0
+label:
+  en_US: DeepSeek-R1(US.Cross Region Inference)
+icon: icon_s_en.svg
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: max_tokens
+    use_template: max_tokens
+    required: true
+    label:
+      zh_Hans: 最大token数
+      en_US: Max Tokens
+    type: int
+    default: 8192
+    min: 1
+    max: 128000
+    help:
+      zh_Hans: 停止前生成的最大令牌数。
+      en_US: The maximum number of tokens to generate before stopping.
+  - name: temperature
+    use_template: temperature
+    required: false
+    label:
+      zh_Hans: 模型温度
+      en_US: Model Temperature
+    type: float
+    default: 1
+    min: 0.0
+    max: 1.0
+    help:
+      zh_Hans: 生成内容的随机性。当推理功能启用时，该值将被固定为1。
+      en_US: The amount of randomness injected into the response. When reasoning is enabled, this value will be fixed to 1.
+  - name: top_p
+    show_on:
+      - variable: reasoning_type
+        value: disabled
+    use_template: top_p
+    label:
+      zh_Hans: Top P
+      en_US: Top P
+    required: false
+    type: float
+    default: 0.999
+    min: 0.000
+    max: 1.000
+    help:
+      zh_Hans: 在核采样中的概率阈值。当推理功能启用时，该参数将被禁用。
+      en_US: The probability threshold in nucleus sampling. When reasoning is enabled, this parameter will be disabled.
+  - name: response_format
+    use_template: response_format
+pricing:
+  input: '0.001'
+  output: '0.005'
+  unit: '0.001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/cohere/llm/llm.py
+++ b/api/core/model_runtime/model_providers/cohere/llm/llm.py
@@ -677,16 +677,17 @@ class CohereLargeLanguageModel(LargeLanguageModel):

        :return: model schema
        """
-        # get model schema
-        models = self.predefined_models()
-        model_map = {model.model: model for model in models}
-
        mode = credentials.get("mode")
+        base_model_schema = None
+        for predefined_model in self.predefined_models():
+            if (
+                mode == "chat" and predefined_model.model == "command-light-chat"
+            ) or predefined_model.model == "command-light":
+                base_model_schema = predefined_model
+                break

-        if mode == "chat":
-            base_model_schema = model_map["command-light-chat"]
-        else:
-            base_model_schema = model_map["command-light"]
+        if not base_model_schema:
+            raise ValueError("Model not found")

        base_model_schema = cast(AIModelEntity, base_model_schema)

--- a/api/core/model_runtime/model_providers/google/google.py
+++ b/api/core/model_runtime/model_providers/google/google.py
@@ -19,8 +19,8 @@ class GoogleProvider(ModelProvider):
        try:
            model_instance = self.get_model_instance(ModelType.LLM)

-            # Use `gemini-pro` model for validate,
-            model_instance.validate_credentials(model="gemini-pro", credentials=credentials)
+            # Use `gemini-2.0-flash` model for validate,
+            model_instance.validate_credentials(model="gemini-2.0-flash", credentials=credentials)
        except CredentialsValidateFailedError as ex:
            raise ex
        except Exception as ex:
--- a/api/core/model_runtime/model_providers/google/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/_position.yaml
@@ -1,4 +1,6 @@
+- gemini-2.0-flash-001
 - gemini-2.0-flash-exp
+- gemini-2.0-pro-exp-02-05
 - gemini-2.0-flash-thinking-exp-1219
 - gemini-2.0-flash-thinking-exp-01-21
 - gemini-1.5-pro
@@ -17,5 +19,3 @@
 - gemini-exp-1206
 - gemini-exp-1121
 - gemini-exp-1114
- gemini-pro
- gemini-pro-vision
--- a/api/core/model_runtime/model_providers/google/llm/gemini-2.0-flash-001.yml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-2.0-flash-001.yml
@@ -0,0 +1,41 @@
+model: gemini-2.0-flash-001
+label:
+  en_US: Gemini 2.0 Flash 001
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+  - document
+  - video
+  - audio
+model_properties:
+  mode: chat
+  context_size: 1048576
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_output_tokens
+    use_template: max_tokens
+    default: 8192
+    min: 1
+    max: 8192
+  - name: json_schema
+    use_template: json_schema
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/google/llm/gemini-2.0-pro-exp-02-05.yaml
+++ b/api/core/model_runtime/model_providers/google/llm/gemini-2.0-pro-exp-02-05.yaml
@@ -0,0 +1,41 @@
+model: gemini-2.0-pro-exp-02-05
+label:
+  en_US: Gemini 2.0 pro exp 02-05
+model_type: llm
+features:
+  - agent-thought
+  - vision
+  - tool-call
+  - stream-tool-call
+  - document
+  - video
+  - audio
+model_properties:
+  mode: chat
+  context_size: 1048576
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: top_k
+    label:
+      zh_Hans: 取样数量
+      en_US: Top k
+    type: int
+    help:
+      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
+      en_US: Only sample from the top K options for each subsequent token.
+    required: false
+  - name: max_output_tokens
+    use_template: max_tokens
+    default: 8192
+    min: 1
+    max: 8192
+  - name: json_schema
+    use_template: json_schema
+pricing:
+  input: '0.00'
+  output: '0.00'
+  unit: '0.000001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/groq/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/groq/llm/_position.yaml
@@ -1,3 +1,4 @@
+- deepseek-r1-distill-llama-70b
 - llama-3.1-405b-reasoning
 - llama-3.3-70b-versatile
 - llama-3.1-70b-versatile
--- a/api/core/model_runtime/model_providers/groq/llm/deepseek-r1-distill-llama-70b.yaml
+++ b/api/core/model_runtime/model_providers/groq/llm/deepseek-r1-distill-llama-70b.yaml
@@ -0,0 +1,36 @@
+model: deepseek-r1-distill-llama-70b
+label:
+  en_US: DeepSeek R1 Distill Llama 70b
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 128000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+  - name: top_p
+    use_template: top_p
+  - name: max_tokens
+    use_template: max_tokens
+    default: 512
+    min: 1
+    max: 8192
+  - name: response_format
+    label:
+      zh_Hans: 回复格式
+      en_US: Response Format
+    type: string
+    help:
+      zh_Hans: 指定模型必须输出的格式
+      en_US: specifying the format that the model must output
+    required: false
+    options:
+      - text
+      - json_object
+pricing:
+  input: '3.00'
+  output: '3.00'
+  unit: '0.000001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/novita/_assets/icon_l_en.svg
+++ b/api/core/model_runtime/model_providers/novita/_assets/icon_l_en.svg
@@ -1,19 +1,11 @@
-<svg width="162" height="36" viewBox="0 0 162 36" fill="none" xmlns="http://www.w3.org/2000/svg">
-<path fill-rule="evenodd" clip-rule="evenodd" d="M2 0C0.895431 0 0 0.895432 0 2V29.1891C0 30.2937 0.895433 31.1891 2 31.1891H5.51171L16.0608 35.1377C16.7145 35.3824 17.4114 34.8991 17.4114 34.2012V11.3669C17.4114 10.533 16.894 9.78665 16.1131 9.49405L5.51171 5.52152H25.58V31.1891H29.0917C30.1963 31.1891 31.0917 30.2937 31.0917 29.1891V2C31.0917 0.895431 30.1963 0 29.0917 0H2ZM14.6022 23.7351C15.0558 23.956 15.4239 23.6812 15.4239 23.1185C15.4239 22.5557 15.0558 21.9204 14.6022 21.6995C14.1486 21.4775 13.7804 21.7545 13.7804 22.3161C13.7804 22.8777 14.1486 23.513 14.6022 23.7351Z" fill="white"/>
-<path fill-rule="evenodd" clip-rule="evenodd" d="M2 0C0.895431 0 0 0.895432 0 2V29.1891C0 30.2937 0.895433 31.1891 2 31.1891H5.51171L16.0608 35.1377C16.7145 35.3824 17.4114 34.8991 17.4114 34.2012V11.3669C17.4114 10.533 16.894 9.78665 16.1131 9.49405L5.51171 5.52152H25.58V31.1891H29.0917C30.1963 31.1891 31.0917 30.2937 31.0917 29.1891V2C31.0917 0.895431 30.1963 0 29.0917 0H2ZM14.6022 23.7351C15.0558 23.956 15.4239 23.6812 15.4239 23.1185C15.4239 22.5557 15.0558 21.9204 14.6022 21.6995C14.1486 21.4775 13.7804 21.7545 13.7804 22.3161C13.7804 22.8777 14.1486 23.513 14.6022 23.7351Z" fill="url(#paint0_linear_1473_71)"/>
-<path d="M55.9397 27.8804H59.0566V19.0803C59.0566 14.9105 56.381 12.7172 52.8228 12.7172C51.0023 12.7172 49.3197 13.4483 48.2991 14.6668V12.9609H45.1546V27.8804H48.2991V19.5406C48.2991 16.8059 49.8162 15.3978 52.1332 15.3978C54.4226 15.3978 55.9397 16.8059 55.9397 19.5406V27.8804Z" fill="#11101A"/>
-<path fill-rule="evenodd" clip-rule="evenodd" d="M69.7881 12.7172C74.1187 12.7172 77.539 15.7228 77.539 20.4071C77.539 25.0915 74.0083 28.1241 69.6502 28.1241C65.3196 28.1241 62.0372 25.0915 62.0372 20.4071C62.0372 15.7228 65.4575 12.7172 69.7881 12.7172ZM69.7342 15.3979C67.362 15.3979 65.2381 17.0225 65.2381 20.4071C65.2381 23.7918 67.2793 25.4435 69.6514 25.4435C71.996 25.4435 74.313 23.7918 74.313 20.4071C74.313 17.0225 72.0788 15.3979 69.7342 15.3979Z" fill="#11101A"/>
-<path d="M78.861 12.9609L84.6259 27.8804H88.3772L94.1697 12.9609H90.8321L86.5291 25.1185L82.2261 12.9609H78.861Z" fill="#11101A"/>
-<path fill-rule="evenodd" clip-rule="evenodd" d="M100.13 9.00761C100.13 10.1178 99.2477 10.9842 98.1443 10.9842C97.0134 10.9842 96.1308 10.1178 96.1308 9.00761C96.1308 7.89745 97.0134 7.03098 98.1443 7.03098C99.2477 7.03098 100.13 7.89745 100.13 9.00761ZM99.6882 27.8804H96.5437V12.9609H99.6882V27.8804Z" fill="#11101A"/>
-<path d="M104.322 23.7376C104.322 26.7702 106.004 27.8804 108.708 27.8804H111.19V25.308H109.259C107.935 25.308 107.494 24.8477 107.494 23.7376V15.479H111.19V12.9609H107.494V9.25128H104.322V12.9609H102.529V15.479H104.322V23.7376Z" fill="#11101A"/>
-<path fill-rule="evenodd" clip-rule="evenodd" d="M120.154 28.1241C116.209 28.1241 113.037 24.9561 113.037 20.353C113.037 15.7498 116.209 12.7172 120.209 12.7172C122.774 12.7172 124.539 13.9086 125.477 15.1271V12.9609H128.649V27.8804H125.477V25.6601C124.512 26.9327 122.691 28.1241 120.154 28.1241ZM120.87 25.4435C123.242 25.4435 125.476 23.6293 125.476 20.4071C125.476 17.212 123.242 15.3979 120.87 15.3979C118.526 15.3979 116.264 17.1308 116.264 20.353C116.264 23.5752 118.526 25.4435 120.87 25.4435Z" fill="#11101A"/>
-<path d="M136.043 26.0933C136.043 24.9832 135.16 24.1167 134.057 24.1167C132.926 24.1167 132.043 24.9832 132.043 26.0933C132.043 27.2035 132.926 28.07 134.057 28.07C135.16 28.07 136.043 27.2035 136.043 26.0933Z" fill="#11101A"/>
-<path fill-rule="evenodd" clip-rule="evenodd" d="M145.502 28.1241C141.558 28.1241 138.386 24.9561 138.386 20.353C138.386 15.7498 141.558 12.7172 145.557 12.7172C148.123 12.7172 149.888 13.9086 150.826 15.1271V12.9609H153.998V27.8804H150.826V25.6601C149.86 26.9327 148.04 28.1241 145.502 28.1241ZM146.219 25.4435C148.591 25.4435 150.825 23.6293 150.825 20.4071C150.825 17.212 148.591 15.3979 146.219 15.3979C143.874 15.3979 141.612 17.1308 141.612 20.353C141.612 23.5752 143.874 25.4435 146.219 25.4435Z" fill="#11101A"/>
-<path fill-rule="evenodd" clip-rule="evenodd" d="M161.722 9.00761C161.722 10.1178 160.84 10.9842 159.736 10.9842C158.605 10.9842 157.723 10.1178 157.723 9.00761C157.723 7.89745 158.605 7.03098 159.736 7.03098C160.84 7.03098 161.722 7.89745 161.722 9.00761ZM161.28 27.8804H158.136V12.9609H161.28V27.8804Z" fill="#11101A"/>
+<svg width="88" height="24" viewBox="0 0 88 24" fill="none" xmlns="http://www.w3.org/2000/svg">
+<g clip-path="url(#clip0_1923_1287)">
+<path d="M24 18.8323V18.8326H14.3246L9.16716 13.6751V18.8326H0V18.8314L9.16716 9.66422V4H9.16774L24 18.8323Z" fill="black"/>
+</g>
+<path fill-rule="evenodd" clip-rule="evenodd" d="M73.2505 16.8061H76.5869V18.9145H73.9391C72.0857 18.9145 70.9202 17.8952 70.9202 15.9977V10.3921H69.0316V8.26609H70.9202L71.4677 5.47209H73.2329V8.26609H76.5869V10.3921H73.2505V16.8061ZM33.8133 4.85699L38.6679 15.681H38.809V4.85699H41.3333V18.9145H37.52L32.6654 8.09046H32.5243V18.9145H30V4.85699H33.8133ZM47.812 19.1254C44.7225 19.1254 42.7457 16.9641 42.7457 13.6079C42.7457 10.2517 44.6873 8.05518 47.812 8.05518C50.9367 8.05518 52.8429 10.1635 52.8429 13.6079C52.8429 17.0523 50.9014 19.1254 47.812 19.1254ZM47.812 17.017C49.1891 17.017 50.3363 16.5423 50.3715 15.1894V12.0265C50.3715 10.6383 49.2068 10.1635 47.812 10.1635C46.4172 10.1635 45.2171 10.6383 45.2171 12.0265V15.1894C45.2524 16.5599 46.4348 17.017 47.812 17.017ZM55.5444 8.24846L58.2979 16.6826H58.439L61.1926 8.24846H63.7346L59.9389 18.8968H56.7966L53.0186 8.24846H55.5429H55.5444ZM65.0419 8.26609H67.3722V18.9145H65.0419V8.26609ZM64.9001 4.85699H67.5126V6.86027H64.9001V4.85699ZM82.3064 19.143C79.4639 19.143 77.6458 16.9817 77.6458 13.6079C77.6458 10.2341 79.4286 8.07282 82.3064 8.07282C83.6483 8.07282 84.7425 8.59973 85.3958 9.58373H85.5369L85.9962 8.26609H87.7614V18.9145H85.9962L85.5369 17.6314H85.3958C84.6896 18.5625 83.5072 19.1423 82.3064 19.1423V19.143ZM82.7826 17.017C84.1774 17.017 85.3951 16.5776 85.4304 15.1894V12.0265C85.4304 10.603 84.159 10.1988 82.7297 10.1988C81.3004 10.1988 80.1172 10.6383 80.1172 12.0265V15.1894C80.1525 16.5952 81.3709 17.017 82.7826 17.017Z" fill="black"/>
 <defs>
-<linearGradient id="paint0_linear_1473_71" x1="31" y1="-2" x2="0.975591" y2="14.2625" gradientUnits="userSpaceOnUse">
-<stop stop-color="#2622FF"/>
-<stop offset="1" stop-color="#A717FF"/>
-</linearGradient>
+<clipPath id="clip0_1923_1287">
+<rect width="24" height="14.8326" fill="white" transform="translate(0 4)"/>
+</clipPath>
 </defs>
 </svg>
--- a/api/core/model_runtime/model_providers/novita/_assets/icon_s_en.svg
+++ b/api/core/model_runtime/model_providers/novita/_assets/icon_s_en.svg
@@ -1,10 +1,3 @@
-<svg width="32" height="36" viewBox="0 0 32 36" fill="none" xmlns="http://www.w3.org/2000/svg">
-<path fill-rule="evenodd" clip-rule="evenodd" d="M2 0C0.895431 0 0 0.895432 0 2V29.1891C0 30.2937 0.895433 31.1891 2 31.1891H5.51171L16.0608 35.1377C16.7145 35.3824 17.4114 34.8991 17.4114 34.2012V11.3669C17.4114 10.533 16.894 9.78665 16.1131 9.49405L5.51171 5.52152H25.58V31.1891H29.0917C30.1963 31.1891 31.0917 30.2937 31.0917 29.1891V2C31.0917 0.895431 30.1963 0 29.0917 0H2ZM14.6022 23.7351C15.0558 23.956 15.4239 23.6812 15.4239 23.1185C15.4239 22.5557 15.0558 21.9204 14.6022 21.6995C14.1486 21.4775 13.7804 21.7545 13.7804 22.3161C13.7804 22.8777 14.1486 23.513 14.6022 23.7351Z" fill="white"/>
-<path fill-rule="evenodd" clip-rule="evenodd" d="M2 0C0.895431 0 0 0.895432 0 2V29.1891C0 30.2937 0.895433 31.1891 2 31.1891H5.51171L16.0608 35.1377C16.7145 35.3824 17.4114 34.8991 17.4114 34.2012V11.3669C17.4114 10.533 16.894 9.78665 16.1131 9.49405L5.51171 5.52152H25.58V31.1891H29.0917C30.1963 31.1891 31.0917 30.2937 31.0917 29.1891V2C31.0917 0.895431 30.1963 0 29.0917 0H2ZM14.6022 23.7351C15.0558 23.956 15.4239 23.6812 15.4239 23.1185C15.4239 22.5557 15.0558 21.9204 14.6022 21.6995C14.1486 21.4775 13.7804 21.7545 13.7804 22.3161C13.7804 22.8777 14.1486 23.513 14.6022 23.7351Z" fill="url(#paint0_linear_1473_97)"/>
-<defs>
-<linearGradient id="paint0_linear_1473_97" x1="31" y1="-2" x2="0.975591" y2="14.2625" gradientUnits="userSpaceOnUse">
-<stop stop-color="#2622FF"/>
-<stop offset="1" stop-color="#A717FF"/>
-</linearGradient>
-</defs>
+<svg width="24" height="15" viewBox="0 0 24 15" fill="none" xmlns="http://www.w3.org/2000/svg">
+<path d="M24 14.8323V14.8326H14.3246L9.16716 9.67507V14.8326H0V14.8314L9.16716 5.66422V0H9.16774L24 14.8323Z" fill="black"/>
 </svg>
--- a/api/core/model_runtime/model_providers/novita/llm/L3-8B-Stheno-v3.2.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/L3-8B-Stheno-v3.2.yaml
@@ -0,0 +1,41 @@
+model: Sao10K/L3-8B-Stheno-v3.2
+label:
+  zh_Hans: L3 8B Stheno V3.2
+  en_US: L3 8B Stheno V3.2
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 8192
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 2
+    default: 1
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 2048
+    default: 512
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
+pricing:
+  input: '0.0005'
+  output: '0.0005'
+  unit: '0.0001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/novita/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/_position.yaml
@@ -0,0 +1,41 @@
+# Deepseek Models
+- deepseek/deepseek-r1
+- deepseek/deepseek_v3
+
+# LLaMA Models
+- meta-llama/llama-3.3-70b-instruct
+- meta-llama/llama-3.2-11b-vision-instruct
+- meta-llama/llama-3.2-3b-instruct
+- meta-llama/llama-3.2-1b-instruct
+- meta-llama/llama-3.1-70b-instruct
+- meta-llama/llama-3.1-8b-instruct
+- meta-llama/llama-3.1-8b-instruct-max
+- meta-llama/llama-3.1-8b-instruct-bf16
+- meta-llama/llama-3-70b-instruct
+- meta-llama/llama-3-8b-instruct
+
+# Mistral Models
+- mistralai/mistral-nemo
+- mistralai/mistral-7b-instruct
+
+# Qwen Models
+- qwen/qwen-2.5-72b-instruct
+- qwen/qwen-2-72b-instruct
+- qwen/qwen-2-vl-72b-instruct
+- qwen/qwen-2-7b-instruct
+
+# Other Models
+- sao10k/L3-8B-Stheno-v3.2
+- sao10k/l3-70b-euryale-v2.1
+- sao10k/l31-70b-euryale-v2.2
+- sao10k/l3-8b-lunaris
+- jondurbin/airoboros-l2-70b
+- cognitivecomputations/dolphin-mixtral-8x22b
+- google/gemma-2-9b-it
+- nousresearch/hermes-2-pro-llama-3-8b
+- sophosympatheia/midnight-rose-70b
+- gryphe/mythomax-l2-13b
+- nousresearch/nous-hermes-llama2-13b
+- openchat/openchat-7b
+- teknium/openhermes-2.5-mistral-7b
+- microsoft/wizardlm-2-8x22b
--- a/api/core/model_runtime/model_providers/novita/llm/airoboros-l2-70b.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/airoboros-l2-70b.yaml
@@ -1,7 +1,7 @@
 model: jondurbin/airoboros-l2-70b
 label:
-  zh_Hans: jondurbin/airoboros-l2-70b
-  en_US: jondurbin/airoboros-l2-70b
+  zh_Hans: Airoboros L2 70B
+  en_US: Airoboros L2 70B
 model_type: llm
 features:
  - agent-thought
--- a/api/core/model_runtime/model_providers/novita/llm/deepseek-r1.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/deepseek-r1.yaml
@@ -0,0 +1,41 @@
+model: deepseek/deepseek-r1
+label:
+  zh_Hans: DeepSeek R1
+  en_US: DeepSeek R1
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 64000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 2
+    default: 1
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 2048
+    default: 512
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
+pricing:
+  input: '0.04'
+  output: '0.04'
+  unit: '0.0001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/novita/llm/deepseek_v3.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/deepseek_v3.yaml
@@ -0,0 +1,41 @@
+model: deepseek/deepseek_v3
+label:
+  zh_Hans: DeepSeek V3
+  en_US: DeepSeek V3
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 64000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 2
+    default: 1
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 2048
+    default: 512
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
+pricing:
+  input: '0.0089'
+  output: '0.0089'
+  unit: '0.0001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/novita/llm/dolphin-mixtral-8x22b.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/dolphin-mixtral-8x22b.yaml
@@ -1,7 +1,7 @@
 model: cognitivecomputations/dolphin-mixtral-8x22b
 label:
-  zh_Hans: cognitivecomputations/dolphin-mixtral-8x22b
-  en_US: cognitivecomputations/dolphin-mixtral-8x22b
+  zh_Hans: Dolphin Mixtral 8x22B
+  en_US: Dolphin Mixtral 8x22B
 model_type: llm
 features:
  - agent-thought
--- a/api/core/model_runtime/model_providers/novita/llm/gemma-2-9b-it.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/gemma-2-9b-it.yaml
@@ -1,7 +1,7 @@
 model: google/gemma-2-9b-it
 label:
-  zh_Hans: google/gemma-2-9b-it
-  en_US: google/gemma-2-9b-it
+  zh_Hans: Gemma 2 9B
+  en_US: Gemma 2 9B
 model_type: llm
 features:
  - agent-thought
--- a/api/core/model_runtime/model_providers/novita/llm/hermes-2-pro-llama-3-8b.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/hermes-2-pro-llama-3-8b.yaml
@@ -1,7 +1,7 @@
 model: nousresearch/hermes-2-pro-llama-3-8b
 label:
-  zh_Hans: nousresearch/hermes-2-pro-llama-3-8b
-  en_US: nousresearch/hermes-2-pro-llama-3-8b
+  zh_Hans: Hermes 2 Pro Llama 3 8B
+  en_US: Hermes 2 Pro Llama 3 8B
 model_type: llm
 features:
  - agent-thought
--- a/api/core/model_runtime/model_providers/novita/llm/l3-70b-euryale-v2.1.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/l3-70b-euryale-v2.1.yaml
@@ -1,7 +1,7 @@
 model: sao10k/l3-70b-euryale-v2.1
 label:
-  zh_Hans: sao10k/l3-70b-euryale-v2.1
-  en_US: sao10k/l3-70b-euryale-v2.1
+  zh_Hans: "L3 70B Euryale V2.1\t"
+  en_US: "L3 70B Euryale V2.1\t"
 model_type: llm
 features:
  - agent-thought
--- a/api/core/model_runtime/model_providers/novita/llm/l3-8b-lunaris.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/l3-8b-lunaris.yaml
@@ -0,0 +1,41 @@
+model: sao10k/l3-8b-lunaris
+label:
+  zh_Hans: "Sao10k L3 8B Lunaris"
+  en_US: "Sao10k L3 8B Lunaris"
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 8192
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 2
+    default: 1
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 2048
+    default: 512
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
+pricing:
+  input: '0.0005'
+  output: '0.0005'
+  unit: '0.0001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/novita/llm/l31-70b-euryale-v2.2.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/l31-70b-euryale-v2.2.yaml
@@ -0,0 +1,41 @@
+model: sao10k/l31-70b-euryale-v2.2
+label:
+  zh_Hans: L31 70B Euryale V2.2
+  en_US: L31 70B Euryale V2.2
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 16000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 2
+    default: 1
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 2048
+    default: 512
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
+pricing:
+  input: '0.0148'
+  output: '0.0148'
+  unit: '0.0001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/novita/llm/llama-3-70b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/llama-3-70b-instruct.yaml
@@ -1,7 +1,7 @@
 model: meta-llama/llama-3-70b-instruct
 label:
-  zh_Hans: meta-llama/llama-3-70b-instruct
-  en_US: meta-llama/llama-3-70b-instruct
+  zh_Hans: Llama3 70b Instruct
+  en_US: Llama3 70b Instruct
 model_type: llm
 features:
  - agent-thought
--- a/api/core/model_runtime/model_providers/novita/llm/llama-3-8b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/llama-3-8b-instruct.yaml
@@ -1,7 +1,7 @@
 model: meta-llama/llama-3-8b-instruct
 label:
-  zh_Hans: meta-llama/llama-3-8b-instruct
-  en_US: meta-llama/llama-3-8b-instruct
+  zh_Hans: Llama 3 8B Instruct
+  en_US: Llama 3 8B Instruct
 model_type: llm
 features:
  - agent-thought
@@ -35,7 +35,7 @@ parameter_rules:
    max: 2
    default: 0
 pricing:
-  input: '0.00063'
-  output: '0.00063'
+  input: '0.0004'
+  output: '0.0004'
  unit: '0.0001'
  currency: USD
--- a/api/core/model_runtime/model_providers/novita/llm/llama-3.1-70b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/llama-3.1-70b-instruct.yaml
@@ -1,13 +1,13 @@
 model: meta-llama/llama-3.1-70b-instruct
 label:
-  zh_Hans: meta-llama/llama-3.1-70b-instruct
-  en_US: meta-llama/llama-3.1-70b-instruct
+  zh_Hans: Llama 3.1 70B Instruct
+  en_US: Llama 3.1 70B Instruct
 model_type: llm
 features:
  - agent-thought
 model_properties:
  mode: chat
-  context_size: 8192
+  context_size: 32768
 parameter_rules:
  - name: temperature
    use_template: temperature
@@ -35,7 +35,7 @@ parameter_rules:
    max: 2
    default: 0
 pricing:
-  input: '0.0055'
-  output: '0.0076'
+  input: '0.0034'
+  output: '0.0039'
  unit: '0.0001'
  currency: USD
--- a/api/core/model_runtime/model_providers/novita/llm/llama-3.1-8b-instruct-bf16.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/llama-3.1-8b-instruct-bf16.yaml
@@ -0,0 +1,41 @@
+model: meta-llama/llama-3.1-8b-instruct-bf16
+label:
+  zh_Hans: Llama 3.1 8B Instruct BF16
+  en_US: Llama 3.1 8B Instruct BF16
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 8192
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 2
+    default: 1
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 2048
+    default: 512
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
+pricing:
+  input: '0.0006'
+  output: '0.0006'
+  unit: '0.0001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/novita/llm/llama-3.1-8b-instruct-max.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/llama-3.1-8b-instruct-max.yaml
@@ -0,0 +1,41 @@
+model: meta-llama/llama-3.1-8b-instruct-max
+label:
+  zh_Hans: "Llama3.1 8B Instruct Max\t"
+  en_US: "Llama3.1 8B Instruct Max\t"
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 16384
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 2
+    default: 1
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 2048
+    default: 512
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
+pricing:
+  input: '0.0005'
+  output: '0.0005'
+  unit: '0.0001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/novita/llm/llama-3.1-8b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/llama-3.1-8b-instruct.yaml
@@ -1,13 +1,13 @@
 model: meta-llama/llama-3.1-8b-instruct
 label:
-  zh_Hans: meta-llama/llama-3.1-8b-instruct
-  en_US: meta-llama/llama-3.1-8b-instruct
+  zh_Hans: Llama 3.1 8B Instruct
+  en_US: Llama 3.1 8B Instruct
 model_type: llm
 features:
  - agent-thought
 model_properties:
  mode: chat
-  context_size: 8192
+  context_size: 16384
 parameter_rules:
  - name: temperature
    use_template: temperature
@@ -35,7 +35,7 @@ parameter_rules:
    max: 2
    default: 0
 pricing:
-  input: '0.001'
-  output: '0.001'
+  input: '0.0005'
+  output: '0.0005'
  unit: '0.0001'
  currency: USD
--- a/api/core/model_runtime/model_providers/novita/llm/llama-3.2-11b-vision-instruct.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/llama-3.2-11b-vision-instruct.yaml
@@ -0,0 +1,41 @@
+model: meta-llama/llama-3.2-11b-vision-instruct
+label:
+  zh_Hans: "Llama 3.2 11B Vision Instruct\t"
+  en_US: "Llama 3.2 11B Vision Instruct\t"
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 32768
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 2
+    default: 1
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 2048
+    default: 512
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
+pricing:
+  input: '0.0006'
+  output: '0.0006'
+  unit: '0.0001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/novita/llm/llama-3.2-1b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/llama-3.2-1b-instruct.yaml
@@ -0,0 +1,41 @@
+model: meta-llama/llama-3.2-1b-instruct
+label:
+  zh_Hans: "Llama 3.2 1B Instruct\t"
+  en_US: "Llama 3.2 1B Instruct\t"
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 131000
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 2
+    default: 1
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 2048
+    default: 512
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
+pricing:
+  input: '0.0002'
+  output: '0.0002'
+  unit: '0.0001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/novita/llm/Nous-Hermes-2-Mixtral-8x7B-DPO.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/Nous-Hermes-2-Mixtral-8x7B-DPO.yaml
@@ -1,7 +1,7 @@
-model: Nous-Hermes-2-Mixtral-8x7B-DPO
+model: meta-llama/llama-3.2-3b-instruct
 label:
-  zh_Hans: Nous-Hermes-2-Mixtral-8x7B-DPO
-  en_US: Nous-Hermes-2-Mixtral-8x7B-DPO
+  zh_Hans: Llama 3.2 3B Instruct
+  en_US: Llama 3.2 3B Instruct
 model_type: llm
 features:
  - agent-thought
@@ -35,7 +35,7 @@ parameter_rules:
    max: 2
    default: 0
 pricing:
-  input: '0.0027'
-  output: '0.0027'
+  input: '0.0003'
+  output: '0.0005'
  unit: '0.0001'
  currency: USD
--- a/api/core/model_runtime/model_providers/novita/llm/llama-3.3-70b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/llama-3.3-70b-instruct.yaml
@@ -0,0 +1,41 @@
+model: meta-llama/llama-3.3-70b-instruct
+label:
+  zh_Hans: Llama 3.3 70B Instruct
+  en_US: Llama 3.3 70B Instruct
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 2
+    default: 1
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 2048
+    default: 512
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
+pricing:
+  input: '0.0039'
+  output: '0.0039'
+  unit: '0.0001'
+  currency: USD
--- a/api/core/model_runtime/model_providers/novita/llm/midnight-rose-70b.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/midnight-rose-70b.yaml
@@ -1,7 +1,7 @@
 model: sophosympatheia/midnight-rose-70b
 label:
-  zh_Hans: sophosympatheia/midnight-rose-70b
-  en_US: sophosympatheia/midnight-rose-70b
+  zh_Hans: Midnight Rose 70B
+  en_US: Midnight Rose 70B
 model_type: llm
 features:
  - agent-thought
--- a/api/core/model_runtime/model_providers/novita/llm/mistral-7b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/novita/llm/mistral-7b-instruct.yaml
@@ -1,7 +1,7 @@
 model: mistralai/mistral-7b-instruct
 label:
-  zh_Hans: mistralai/mistral-7b-instruct
-  en_US: mistralai/mistral-7b-instruct
+  zh_Hans: Mistral 7B Instruct
+  en_US: Mistral 7B Instruct
 model_type: llm
 features:
  - agent-thought
--- a/Show More
+++ b/Show More