Compare commits

..

2 Commits

Author SHA1 Message Date
alexcodelf
df82bccfdf feat: support speech2text and tts for gpustack 2025-01-07 09:19:39 +08:00
alexcodelf
5ba875f96b fix: gpustack llm and text_embedding model url path wrong after edited 2025-01-07 09:19:39 +08:00
551 changed files with 10957 additions and 18958 deletions

View File

@@ -8,7 +8,7 @@ inputs:
poetry-version:
description: Poetry version to set up
required: true
default: '2.0.1'
default: '1.8.4'
poetry-lockfile:
description: Path to the Poetry lockfile to restore cache from
required: true

View File

@@ -42,23 +42,25 @@ jobs:
run: poetry install -C api --with dev
- name: Check dependencies in pyproject.toml
run: poetry run -P api bash dev/pytest/pytest_artifacts.sh
run: poetry run -C api bash dev/pytest/pytest_artifacts.sh
- name: Run Unit tests
run: poetry run -P api bash dev/pytest/pytest_unit_tests.sh
run: poetry run -C api bash dev/pytest/pytest_unit_tests.sh
- name: Run ModelRuntime
run: poetry run -P api bash dev/pytest/pytest_model_runtime.sh
run: poetry run -C api bash dev/pytest/pytest_model_runtime.sh
- name: Run dify config tests
run: poetry run -P api python dev/pytest/pytest_config_tests.py
run: poetry run -C api python dev/pytest/pytest_config_tests.py
- name: Run Tool
run: poetry run -P api bash dev/pytest/pytest_tools.sh
run: poetry run -C api bash dev/pytest/pytest_tools.sh
- name: Run mypy
run: |
poetry run -C api python -m mypy --install-types --non-interactive .
pushd api
poetry run python -m mypy --install-types --non-interactive .
popd
- name: Set up dotenvs
run: |
@@ -78,4 +80,4 @@ jobs:
ssrf_proxy
- name: Run Workflow
run: poetry run -P api bash dev/pytest/pytest_workflow.sh
run: poetry run -C api bash dev/pytest/pytest_workflow.sh

View File

@@ -5,8 +5,8 @@ on:
branches:
- "main"
- "deploy/dev"
tags:
- "*"
release:
types: [published]
concurrency:
group: build-push-${{ github.head_ref || github.run_id }}

View File

@@ -1,47 +0,0 @@
name: Build docker image
on:
pull_request:
branches:
- "main"
paths:
- api/Dockerfile
- web/Dockerfile
concurrency:
group: docker-build-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
jobs:
build-docker:
runs-on: ubuntu-latest
strategy:
matrix:
include:
- service_name: "api-amd64"
platform: linux/amd64
context: "api"
- service_name: "api-arm64"
platform: linux/arm64
context: "api"
- service_name: "web-amd64"
platform: linux/amd64
context: "web"
- service_name: "web-arm64"
platform: linux/arm64
context: "web"
steps:
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Build Docker Image
uses: docker/build-push-action@v6
with:
push: false
context: "{{defaultContext}}:${{ matrix.context }}"
platforms: ${{ matrix.platform }}
cache-from: type=gha
cache-to: type=gha,mode=max

View File

@@ -38,12 +38,12 @@ jobs:
if: steps.changed-files.outputs.any_changed == 'true'
run: |
poetry run -C api ruff --version
poetry run -C api ruff check ./
poetry run -C api ruff format --check ./
poetry run -C api ruff check ./api
poetry run -C api ruff format --check ./api
- name: Dotenv check
if: steps.changed-files.outputs.any_changed == 'true'
run: poetry run -P api dotenv-linter ./api/.env.example ./web/.env.example
run: poetry run -C api dotenv-linter ./api/.env.example ./web/.env.example
- name: Lint hints
if: failure()
@@ -82,33 +82,6 @@ jobs:
if: steps.changed-files.outputs.any_changed == 'true'
run: yarn run lint
docker-compose-template:
name: Docker Compose Template
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Check changed files
id: changed-files
uses: tj-actions/changed-files@v45
with:
files: |
docker/generate_docker_compose
docker/.env.example
docker/docker-compose-template.yaml
docker/docker-compose.yaml
- name: Generate Docker Compose
if: steps.changed-files.outputs.any_changed == 'true'
run: |
cd docker
./generate_docker_compose
- name: Check for changes
if: steps.changed-files.outputs.any_changed == 'true'
run: git diff --exit-code
superlinter:
name: SuperLinter

View File

@@ -70,4 +70,4 @@ jobs:
tidb
- name: Test Vector Stores
run: poetry run -P api bash dev/pytest/pytest_vdb.sh
run: poetry run -C api bash dev/pytest/pytest_vdb.sh

View File

@@ -1,4 +0,0 @@
{
"MD024": false,
"MD013": false
}

View File

@@ -1,45 +0,0 @@
# Changelog
All notable changes to Dify will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [0.15.8] - 2025-05-30
### Added
- Added gunicorn keepalive setting (#19537)
### Fixed
- Fixed database configuration to allow DB_EXTRAS to set search_path via options (#16a4f77)
- Fixed frontend third-party package security issues (#19655)
- Updated dependencies: huggingface-hub (~0.16.4 to ~0.31.0), transformers (~4.35.0 to ~4.39.0), and resend (~0.7.0 to ~2.9.0) (#19563)
- Downgrade boto3 from 1.36 to 1.35 (#19736)
## [0.15.7] - 2025-04-27
### Added
- Added support for GPT-4.1 in model providers (#18912)
- Added support for Amazon Bedrock DeepSeek-R1 model (#18908)
- Added support for Amazon Bedrock Claude Sonnet 3.7 model (#18788)
- Refined version compatibility logic in app DSL service
### Fixed
- Fixed issue with creating apps from template categories (#18807, #18868)
- Fixed DSL version check when creating apps from explore templates (#18872, #18878)
## [0.15.6] - 2025-04-22
### Security
- Fixed clickjacking vulnerability (#18552)
- Fixed reset password security issue (#18366)
- Updated reset password token when email code verification succeeds (#18362)
### Fixed
- Fixed Vertex AI Gemini 2.0 Flash 001 schema (#18405)

View File

@@ -25,9 +25,6 @@
<a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
<img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
alt="follow on X(Twitter)"></a>
<a href="https://www.linkedin.com/company/langgenius/" target="_blank">
<img src="https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff"
alt="follow on LinkedIn"></a>
<a href="https://hub.docker.com/u/langgenius" target="_blank">
<img alt="Docker Pulls" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
<a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">

View File

@@ -21,9 +21,6 @@
<a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
<img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
alt="follow on X(Twitter)"></a>
<a href="https://www.linkedin.com/company/langgenius/" target="_blank">
<img src="https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff"
alt="follow on LinkedIn"></a>
<a href="https://hub.docker.com/u/langgenius" target="_blank">
<img alt="Docker Pulls" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
<a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">

View File

@@ -21,9 +21,6 @@
<a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
<img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
alt="follow on X(Twitter)"></a>
<a href="https://www.linkedin.com/company/langgenius/" target="_blank">
<img src="https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff"
alt="follow on LinkedIn"></a>
<a href="https://hub.docker.com/u/langgenius" target="_blank">
<img alt="Docker Pulls" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
<a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">

View File

@@ -21,9 +21,6 @@
<a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
<img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
alt="seguir en X(Twitter)"></a>
<a href="https://www.linkedin.com/company/langgenius/" target="_blank">
<img src="https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff"
alt="seguir en LinkedIn"></a>
<a href="https://hub.docker.com/u/langgenius" target="_blank">
<img alt="Descargas de Docker" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
<a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">

View File

@@ -21,9 +21,6 @@
<a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
<img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
alt="suivre sur X(Twitter)"></a>
<a href="https://www.linkedin.com/company/langgenius/" target="_blank">
<img src="https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff"
alt="suivre sur LinkedIn"></a>
<a href="https://hub.docker.com/u/langgenius" target="_blank">
<img alt="Tirages Docker" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
<a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">

View File

@@ -21,9 +21,6 @@
<a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
<img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
alt="X(Twitter)でフォロー"></a>
<a href="https://www.linkedin.com/company/langgenius/" target="_blank">
<img src="https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff"
alt="LinkedInでフォロー"></a>
<a href="https://hub.docker.com/u/langgenius" target="_blank">
<img alt="Docker Pulls" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
<a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">

View File

@@ -21,9 +21,6 @@
<a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
<img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
alt="follow on X(Twitter)"></a>
<a href="https://www.linkedin.com/company/langgenius/" target="_blank">
<img src="https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff"
alt="follow on LinkedIn"></a>
<a href="https://hub.docker.com/u/langgenius" target="_blank">
<img alt="Docker Pulls" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
<a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">

View File

@@ -21,9 +21,6 @@
<a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
<img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
alt="follow on X(Twitter)"></a>
<a href="https://www.linkedin.com/company/langgenius/" target="_blank">
<img src="https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff"
alt="follow on LinkedIn"></a>
<a href="https://hub.docker.com/u/langgenius" target="_blank">
<img alt="Docker Pulls" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
<a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">

View File

@@ -25,9 +25,6 @@
<a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
<img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
alt="follow on X(Twitter)"></a>
<a href="https://www.linkedin.com/company/langgenius/" target="_blank">
<img src="https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff"
alt="follow on LinkedIn"></a>
<a href="https://hub.docker.com/u/langgenius" target="_blank">
<img alt="Docker Pulls" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
<a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">

View File

@@ -22,9 +22,6 @@
<a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
<img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
alt="follow on X(Twitter)"></a>
<a href="https://www.linkedin.com/company/langgenius/" target="_blank">
<img src="https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff"
alt="follow on LinkedIn"></a>
<a href="https://hub.docker.com/u/langgenius" target="_blank">
<img alt="Docker Pulls" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
<a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">

View File

@@ -21,9 +21,6 @@
<a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
<img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
alt="X(Twitter)'da takip et"></a>
<a href="https://www.linkedin.com/company/langgenius/" target="_blank">
<img src="https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff"
alt="LinkedIn'da takip et"></a>
<a href="https://hub.docker.com/u/langgenius" target="_blank">
<img alt="Docker Çekmeleri" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
<a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">
@@ -65,6 +62,8 @@ Görsel bir arayüz üzerinde güçlü AI iş akışları oluşturun ve test edi
![providers-v5](https://github.com/langgenius/dify/assets/13230914/5a17bdbe-097a-4100-8363-40255b70f6e3)
Özür dilerim, haklısınız. Daha anlamlı ve akıcı bir çeviri yapmaya çalışayım. İşte güncellenmiş çeviri:
**3. Prompt IDE**:
Komut istemlerini oluşturmak, model performansını karşılaştırmak ve sohbet tabanlı uygulamalara metin-konuşma gibi ek özellikler eklemek için kullanıcı dostu bir arayüz.
@@ -151,6 +150,8 @@ Görsel bir arayüz üzerinde güçlü AI iş akışları oluşturun ve test edi
## Dify'ı Kullanma
- **Cloud </br>**
İşte verdiğiniz metnin Türkçe çevirisi, kod bloğu içinde:
-
Herkesin sıfır kurulumla denemesi için bir [Dify Cloud](https://dify.ai) hizmeti sunuyoruz. Bu hizmet, kendi kendine dağıtılan versiyonun tüm yeteneklerini sağlar ve sandbox planında 200 ücretsiz GPT-4 çağrısı içerir.
- **Dify Topluluk Sürümünü Kendi Sunucunuzda Barındırma</br>**
@@ -176,6 +177,8 @@ GitHub'da Dify'a yıldız verin ve yeni sürümlerden anında haberdar olun.
>- RAM >= 4GB
</br>
İşte verdiğiniz metnin Türkçe çevirisi, kod bloğu içinde:
Dify sunucusunu başlatmanın en kolay yolu, [docker-compose.yml](docker/docker-compose.yaml) dosyamızı çalıştırmaktır. Kurulum komutunu çalıştırmadan önce, makinenizde [Docker](https://docs.docker.com/get-docker/) ve [Docker Compose](https://docs.docker.com/compose/install/)'un kurulu olduğundan emin olun:
```bash

View File

@@ -21,9 +21,6 @@
<a href="https://twitter.com/intent/follow?screen_name=dify_ai" target="_blank">
<img src="https://img.shields.io/twitter/follow/dify_ai?logo=X&color=%20%23f5f5f5"
alt="theo dõi trên X(Twitter)"></a>
<a href="https://www.linkedin.com/company/langgenius/" target="_blank">
<img src="https://custom-icon-badges.demolab.com/badge/LinkedIn-0A66C2?logo=linkedin-white&logoColor=fff"
alt="theo dõi trên LinkedIn"></a>
<a href="https://hub.docker.com/u/langgenius" target="_blank">
<img alt="Docker Pulls" src="https://img.shields.io/docker/pulls/langgenius/dify-web?labelColor=%20%23FDB062&color=%20%23f79009"></a>
<a href="https://github.com/langgenius/dify/graphs/commit-activity" target="_blank">

View File

@@ -430,7 +430,4 @@ CREATE_TIDB_SERVICE_JOB_ENABLED=false
# Maximum number of submitted thread count in a ThreadPool for parallel node execution
MAX_SUBMIT_COUNT=100
# Lockout duration in seconds
LOGIN_LOCKOUT_DURATION=86400
# Prevent Clickjacking
ALLOW_EMBED=false
LOGIN_LOCKOUT_DURATION=86400

View File

@@ -53,12 +53,10 @@ ignore = [
"FURB152", # math-constant
"UP007", # non-pep604-annotation
"UP032", # f-string
"UP045", # non-pep604-annotation-optional
"B005", # strip-with-multi-characters
"B006", # mutable-argument-default
"B007", # unused-loop-control-variable
"B026", # star-arg-unpacking-after-keyword-arg
"B903", # class-as-data-structure
"B904", # raise-without-from-inside-except
"B905", # zip-without-explicit-strict
"N806", # non-lowercase-variable-in-function

View File

@@ -4,7 +4,7 @@ FROM python:3.12-slim-bookworm AS base
WORKDIR /app/api
# Install Poetry
ENV POETRY_VERSION=2.0.1
ENV POETRY_VERSION=1.8.4
# if you located in China, you can use aliyun mirror to speed up
# RUN pip install --no-cache-dir poetry==${POETRY_VERSION} -i https://mirrors.aliyun.com/pypi/simple/
@@ -48,18 +48,16 @@ ENV TZ=UTC
WORKDIR /app/api
RUN \
apt-get update \
# Install dependencies
&& apt-get install -y --no-install-recommends \
# basic environment
curl nodejs libgmp-dev libmpfr-dev libmpc-dev \
# For Security
expat libldap-2.5-0 perl libsqlite3-0 zlib1g \
# install a chinese font to support the use of tools like matplotlib
fonts-noto-cjk \
# install libmagic to support the use of python-magic guess MIMETYPE
libmagic1 \
RUN apt-get update \
&& apt-get install -y --no-install-recommends curl nodejs libgmp-dev libmpfr-dev libmpc-dev \
# if you located in China, you can use aliyun mirror to speed up
# && echo "deb http://mirrors.aliyun.com/debian testing main" > /etc/apt/sources.list \
&& echo "deb http://deb.debian.org/debian testing main" > /etc/apt/sources.list \
&& apt-get update \
# For Security
&& apt-get install -y --no-install-recommends expat=2.6.4-1 libldap-2.5-0=2.5.19+dfsg-1 perl=5.40.0-8 libsqlite3-0=3.46.1-1 zlib1g=1:1.3.dfsg+really1.3.1-1+b1 \
# install a chinese font to support the use of tools like matplotlib
&& apt-get install -y fonts-noto-cjk \
&& apt-get autoremove -y \
&& rm -rf /var/lib/apt/lists/*
@@ -78,6 +76,7 @@ COPY . /app/api/
COPY docker/entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh
ARG COMMIT_SHA
ENV COMMIT_SHA=${COMMIT_SHA}

View File

@@ -79,5 +79,5 @@
2. Run the tests locally with mocked system environment variables in `tool.pytest_env` section in `pyproject.toml`
```bash
poetry run -P api bash dev/pytest/pytest_all_tests.sh
poetry run -C api bash dev/pytest/pytest_all_tests.sh
```

View File

@@ -146,7 +146,7 @@ class EndpointConfig(BaseSettings):
)
CONSOLE_WEB_URL: str = Field(
description="Base URL for the console web interface,used for frontend references and CORS configuration",
description="Base URL for the console web interface," "used for frontend references and CORS configuration",
default="",
)

View File

@@ -1,40 +1,9 @@
from typing import Optional
from pydantic import Field, NonNegativeInt, computed_field
from pydantic import Field, NonNegativeInt
from pydantic_settings import BaseSettings
class HostedCreditConfig(BaseSettings):
HOSTED_MODEL_CREDIT_CONFIG: str = Field(
description="Model credit configuration in format 'model:credits,model:credits', e.g., 'gpt-4:20,gpt-4o:10'",
default="",
)
def get_model_credits(self, model_name: str) -> int:
"""
Get credit value for a specific model name.
Returns 1 if model is not found in configuration (default credit).
:param model_name: The name of the model to search for
:return: The credit value for the model
"""
if not self.HOSTED_MODEL_CREDIT_CONFIG:
return 1
try:
credit_map = dict(
item.strip().split(":", 1) for item in self.HOSTED_MODEL_CREDIT_CONFIG.split(",") if ":" in item
)
# Search for matching model pattern
for pattern, credit in credit_map.items():
if pattern.strip() == model_name:
return int(credit)
return 1 # Default quota if no match found
except (ValueError, AttributeError):
return 1 # Return default quota if parsing fails
class HostedOpenAiConfig(BaseSettings):
"""
Configuration for hosted OpenAI service
@@ -212,7 +181,7 @@ class HostedFetchAppTemplateConfig(BaseSettings):
"""
HOSTED_FETCH_APP_TEMPLATES_MODE: str = Field(
description="Mode for fetching app templates: remote, db, or builtin default to remote,",
description="Mode for fetching app templates: remote, db, or builtin" " default to remote,",
default="remote",
)
@@ -233,7 +202,5 @@ class HostedServiceConfig(
HostedZhipuAIConfig,
# moderation
HostedModerationConfig,
# credit config
HostedCreditConfig,
):
pass

View File

@@ -1,5 +1,5 @@
from typing import Any, Literal, Optional
from urllib.parse import parse_qsl, quote_plus
from urllib.parse import quote_plus
from pydantic import Field, NonNegativeInt, PositiveFloat, PositiveInt, computed_field
from pydantic_settings import BaseSettings
@@ -166,28 +166,14 @@ class DatabaseConfig(BaseSettings):
default=False,
)
@computed_field # type: ignore[misc]
@property
@computed_field
def SQLALCHEMY_ENGINE_OPTIONS(self) -> dict[str, Any]:
# Parse DB_EXTRAS for 'options'
db_extras_dict = dict(parse_qsl(self.DB_EXTRAS))
options = db_extras_dict.get("options", "")
# Always include timezone
timezone_opt = "-c timezone=UTC"
if options:
# Merge user options and timezone
merged_options = f"{options} {timezone_opt}"
else:
merged_options = timezone_opt
connect_args = {"options": merged_options}
return {
"pool_size": self.SQLALCHEMY_POOL_SIZE,
"max_overflow": self.SQLALCHEMY_MAX_OVERFLOW,
"pool_recycle": self.SQLALCHEMY_POOL_RECYCLE,
"pool_pre_ping": self.SQLALCHEMY_POOL_PRE_PING,
"connect_args": connect_args,
"connect_args": {"options": "-c timezone=UTC"},
}

View File

@@ -33,9 +33,3 @@ class MilvusConfig(BaseSettings):
description="Name of the Milvus database to connect to (default is 'default')",
default="default",
)
MILVUS_ENABLE_HYBRID_SEARCH: bool = Field(
description="Enable hybrid search features (requires Milvus >= 2.5.0). Set to false for compatibility with "
"older versions",
default=True,
)

View File

@@ -9,7 +9,7 @@ class PackagingInfo(BaseSettings):
CURRENT_VERSION: str = Field(
description="Dify version",
default="0.15.8",
default="0.14.2",
)
COMMIT_SHA: str = Field(

View File

@@ -1,32 +1,12 @@
import mimetypes
import os
import platform
import re
import urllib.parse
import warnings
from collections.abc import Mapping
from typing import Any
from uuid import uuid4
import httpx
try:
import magic
except ImportError:
if platform.system() == "Windows":
warnings.warn(
"To use python-magic guess MIMETYPE, you need to run `pip install python-magic-bin`", stacklevel=2
)
elif platform.system() == "Darwin":
warnings.warn("To use python-magic guess MIMETYPE, you need to run `brew install libmagic`", stacklevel=2)
elif platform.system() == "Linux":
warnings.warn(
"To use python-magic guess MIMETYPE, you need to run `sudo apt-get install libmagic1`", stacklevel=2
)
else:
warnings.warn("To use python-magic guess MIMETYPE, you need to install `libmagic`", stacklevel=2)
magic = None # type: ignore
from pydantic import BaseModel
from configs import dify_config
@@ -67,13 +47,6 @@ def guess_file_info_from_response(response: httpx.Response):
# If guessing fails, use Content-Type from response headers
mimetype = response.headers.get("Content-Type", "application/octet-stream")
# Use python-magic to guess MIME type if still unknown or generic
if mimetype == "application/octet-stream" and magic is not None:
try:
mimetype = magic.from_buffer(response.content[:1024], mime=True)
except magic.MagicException:
pass
extension = os.path.splitext(filename)[1]
# Ensure filename has an extension

View File

@@ -56,7 +56,7 @@ class InsertExploreAppListApi(Resource):
app = App.query.filter(App.id == args["app_id"]).first()
if not app:
raise NotFound(f"App '{args['app_id']}' is not found")
raise NotFound(f'App \'{args["app_id"]}\' is not found')
site = app.site
if not site:

View File

@@ -22,7 +22,7 @@ from controllers.console.wraps import account_initialization_required, setup_req
from core.errors.error import ModelCurrentlyNotSupportError, ProviderTokenNotInitError, QuotaExceededError
from core.model_runtime.errors.invoke import InvokeError
from libs.login import login_required
from models import App, AppMode
from models.model import AppMode
from services.audio_service import AudioService
from services.errors.audio import (
AudioTooLargeServiceError,
@@ -79,7 +79,7 @@ class ChatMessageTextApi(Resource):
@login_required
@account_initialization_required
@get_app_model
def post(self, app_model: App):
def post(self, app_model):
from werkzeug.exceptions import InternalServerError
try:
@@ -98,13 +98,9 @@ class ChatMessageTextApi(Resource):
and app_model.workflow.features_dict
):
text_to_speech = app_model.workflow.features_dict.get("text_to_speech")
if text_to_speech is None:
raise ValueError("TTS is not enabled")
voice = args.get("voice") or text_to_speech.get("voice")
else:
try:
if app_model.app_model_config is None:
raise ValueError("AppModelConfig not found")
voice = args.get("voice") or app_model.app_model_config.text_to_speech_dict.get("voice")
except Exception:
voice = None

View File

@@ -8,7 +8,7 @@ from constants.languages import languages
from controllers.console import api
from controllers.console.auth.error import EmailCodeError, InvalidEmailError, InvalidTokenError, PasswordMismatchError
from controllers.console.error import AccountInFreezeError, AccountNotFound, EmailSendIpLimitError
from controllers.console.wraps import email_password_login_enabled, setup_required
from controllers.console.wraps import setup_required
from events.tenant_event import tenant_was_created
from extensions.ext_database import db
from libs.helper import email, extract_remote_ip
@@ -22,7 +22,6 @@ from services.feature_service import FeatureService
class ForgotPasswordSendEmailApi(Resource):
@setup_required
@email_password_login_enabled
def post(self):
parser = reqparse.RequestParser()
parser.add_argument("email", type=email, required=True, location="json")
@@ -54,7 +53,6 @@ class ForgotPasswordSendEmailApi(Resource):
class ForgotPasswordCheckApi(Resource):
@setup_required
@email_password_login_enabled
def post(self):
parser = reqparse.RequestParser()
parser.add_argument("email", type=str, required=True, location="json")
@@ -74,20 +72,11 @@ class ForgotPasswordCheckApi(Resource):
if args["code"] != token_data.get("code"):
raise EmailCodeError()
# Verified, revoke the first token
AccountService.revoke_reset_password_token(args["token"])
# Refresh token data by generating a new token
_, new_token = AccountService.generate_reset_password_token(
user_email, code=args["code"], additional_data={"phase": "reset"}
)
return {"is_valid": True, "email": token_data.get("email"), "token": new_token}
return {"is_valid": True, "email": token_data.get("email")}
class ForgotPasswordResetApi(Resource):
@setup_required
@email_password_login_enabled
def post(self):
parser = reqparse.RequestParser()
parser.add_argument("token", type=str, required=True, nullable=False, location="json")
@@ -106,9 +95,6 @@ class ForgotPasswordResetApi(Resource):
if reset_data is None:
raise InvalidTokenError()
# Must use token in reset phase
if reset_data.get("phase", "") != "reset":
raise InvalidTokenError()
AccountService.revoke_reset_password_token(token)

View File

@@ -22,7 +22,7 @@ from controllers.console.error import (
EmailSendIpLimitError,
NotAllowedCreateWorkspace,
)
from controllers.console.wraps import email_password_login_enabled, setup_required
from controllers.console.wraps import setup_required
from events.tenant_event import tenant_was_created
from libs.helper import email, extract_remote_ip
from libs.password import valid_password
@@ -38,7 +38,6 @@ class LoginApi(Resource):
"""Resource for user login."""
@setup_required
@email_password_login_enabled
def post(self):
"""Authenticate user and login."""
parser = reqparse.RequestParser()
@@ -111,7 +110,6 @@ class LogoutApi(Resource):
class ResetPasswordSendEmailApi(Resource):
@setup_required
@email_password_login_enabled
def post(self):
parser = reqparse.RequestParser()
parser.add_argument("email", type=email, required=True, location="json")

View File

@@ -52,12 +52,12 @@ class DatasetListApi(Resource):
# provider = request.args.get("provider", default="vendor")
search = request.args.get("keyword", default=None, type=str)
tag_ids = request.args.getlist("tag_ids")
include_all = request.args.get("include_all", default="false").lower() == "true"
if ids:
datasets, total = DatasetService.get_datasets_by_ids(ids, current_user.current_tenant_id)
else:
datasets, total = DatasetService.get_datasets(
page, limit, current_user.current_tenant_id, current_user, search, tag_ids, include_all
page, limit, current_user.current_tenant_id, current_user, search, tag_ids
)
# check embedding setting
@@ -457,7 +457,7 @@ class DatasetIndexingEstimateApi(Resource):
)
except LLMBadRequestError:
raise ProviderNotInitializeError(
"No Embedding Model available. Please configure a valid provider in the Settings -> Model Provider."
"No Embedding Model available. Please configure a valid provider " "in the Settings -> Model Provider."
)
except ProviderTokenNotInitError as ex:
raise ProviderNotInitializeError(ex.description)
@@ -619,7 +619,9 @@ class DatasetRetrievalSettingApi(Resource):
vector_type = dify_config.VECTOR_STORE
match vector_type:
case (
VectorType.RELYT
VectorType.MILVUS
| VectorType.RELYT
| VectorType.PGVECTOR
| VectorType.TIDB_VECTOR
| VectorType.CHROMA
| VectorType.TENCENT
@@ -638,12 +640,10 @@ class DatasetRetrievalSettingApi(Resource):
| VectorType.MYSCALE
| VectorType.ORACLE
| VectorType.ELASTICSEARCH
| VectorType.ELASTICSEARCH_JA
| VectorType.PGVECTOR
| VectorType.TIDB_ON_QDRANT
| VectorType.LINDORM
| VectorType.COUCHBASE
| VectorType.MILVUS
):
return {
"retrieval_method": [
@@ -683,7 +683,6 @@ class DatasetRetrievalSettingMockApi(Resource):
| VectorType.MYSCALE
| VectorType.ORACLE
| VectorType.ELASTICSEARCH
| VectorType.ELASTICSEARCH_JA
| VectorType.COUCHBASE
| VectorType.PGVECTOR
| VectorType.LINDORM

View File

@@ -257,8 +257,7 @@ class DatasetDocumentListApi(Resource):
parser.add_argument("original_document_id", type=str, required=False, location="json")
parser.add_argument("doc_form", type=str, default="text_model", required=False, nullable=False, location="json")
parser.add_argument("retrieval_model", type=dict, required=False, nullable=False, location="json")
parser.add_argument("embedding_model", type=str, required=False, nullable=True, location="json")
parser.add_argument("embedding_model_provider", type=str, required=False, nullable=True, location="json")
parser.add_argument(
"doc_language", type=str, default="English", required=False, nullable=False, location="json"
)
@@ -350,7 +349,8 @@ class DatasetInitApi(Resource):
)
except InvokeAuthorizationError:
raise ProviderNotInitializeError(
"No Embedding Model available. Please configure a valid provider in the Settings -> Model Provider."
"No Embedding Model available. Please configure a valid provider "
"in the Settings -> Model Provider."
)
except ProviderTokenNotInitError as ex:
raise ProviderNotInitializeError(ex.description)
@@ -525,7 +525,8 @@ class DocumentBatchIndexingEstimateApi(DocumentResource):
return response.model_dump(), 200
except LLMBadRequestError:
raise ProviderNotInitializeError(
"No Embedding Model available. Please configure a valid provider in the Settings -> Model Provider."
"No Embedding Model available. Please configure a valid provider "
"in the Settings -> Model Provider."
)
except ProviderTokenNotInitError as ex:
raise ProviderNotInitializeError(ex.description)

View File

@@ -168,7 +168,8 @@ class DatasetDocumentSegmentApi(Resource):
)
except LLMBadRequestError:
raise ProviderNotInitializeError(
"No Embedding Model available. Please configure a valid provider in the Settings -> Model Provider."
"No Embedding Model available. Please configure a valid provider "
"in the Settings -> Model Provider."
)
except ProviderTokenNotInitError as ex:
raise ProviderNotInitializeError(ex.description)
@@ -216,7 +217,8 @@ class DatasetDocumentSegmentAddApi(Resource):
)
except LLMBadRequestError:
raise ProviderNotInitializeError(
"No Embedding Model available. Please configure a valid provider in the Settings -> Model Provider."
"No Embedding Model available. Please configure a valid provider "
"in the Settings -> Model Provider."
)
except ProviderTokenNotInitError as ex:
raise ProviderNotInitializeError(ex.description)
@@ -265,7 +267,8 @@ class DatasetDocumentSegmentUpdateApi(Resource):
)
except LLMBadRequestError:
raise ProviderNotInitializeError(
"No Embedding Model available. Please configure a valid provider in the Settings -> Model Provider."
"No Embedding Model available. Please configure a valid provider "
"in the Settings -> Model Provider."
)
except ProviderTokenNotInitError as ex:
raise ProviderNotInitializeError(ex.description)
@@ -365,9 +368,9 @@ class DatasetDocumentSegmentBatchImportApi(Resource):
result = []
for index, row in df.iterrows():
if document.doc_form == "qa_model":
data = {"content": row.iloc[0], "answer": row.iloc[1]}
data = {"content": row[0], "answer": row[1]}
else:
data = {"content": row.iloc[0]}
data = {"content": row[0]}
result.append(data)
if len(result) == 0:
raise ValueError("The CSV file is empty.")
@@ -434,7 +437,8 @@ class ChildChunkAddApi(Resource):
)
except LLMBadRequestError:
raise ProviderNotInitializeError(
"No Embedding Model available. Please configure a valid provider in the Settings -> Model Provider."
"No Embedding Model available. Please configure a valid provider "
"in the Settings -> Model Provider."
)
except ProviderTokenNotInitError as ex:
raise ProviderNotInitializeError(ex.description)

View File

@@ -32,7 +32,7 @@ class ConversationListApi(InstalledAppResource):
pinned = None
if "pinned" in args and args["pinned"] is not None:
pinned = args["pinned"] == "true"
pinned = True if args["pinned"] == "true" else False
try:
with Session(db.engine) as session:

View File

@@ -50,7 +50,7 @@ class MessageListApi(InstalledAppResource):
try:
return MessageService.pagination_by_first_id(
app_model, current_user, args["conversation_id"], args["first_id"], args["limit"]
app_model, current_user, args["conversation_id"], args["first_id"], args["limit"], "desc"
)
except services.errors.conversation.ConversationNotExistsError:
raise NotFound("Conversation Not Exists.")

View File

@@ -154,16 +154,3 @@ def enterprise_license_required(view):
return view(*args, **kwargs)
return decorated
def email_password_login_enabled(view):
@wraps(view)
def decorated(*args, **kwargs):
features = FeatureService.get_system_features()
if features.enable_email_password_login:
return view(*args, **kwargs)
# otherwise, return 403
abort(403)
return decorated

View File

@@ -1,5 +1,3 @@
import json
from flask_restful import Resource, reqparse # type: ignore
from controllers.console.wraps import setup_required
@@ -31,34 +29,4 @@ class EnterpriseWorkspace(Resource):
return {"message": "enterprise workspace created."}
class EnterpriseWorkspaceNoOwnerEmail(Resource):
@setup_required
@inner_api_only
def post(self):
parser = reqparse.RequestParser()
parser.add_argument("name", type=str, required=True, location="json")
args = parser.parse_args()
tenant = TenantService.create_tenant(args["name"], is_from_dashboard=True)
tenant_was_created.send(tenant)
resp = {
"id": tenant.id,
"name": tenant.name,
"encrypt_public_key": tenant.encrypt_public_key,
"plan": tenant.plan,
"status": tenant.status,
"custom_config": json.loads(tenant.custom_config) if tenant.custom_config else {},
"created_at": tenant.created_at.isoformat() if tenant.created_at else None,
"updated_at": tenant.updated_at.isoformat() if tenant.updated_at else None,
}
return {
"message": "enterprise workspace created.",
"tenant": resp,
}
api.add_resource(EnterpriseWorkspace, "/enterprise/workspace")
api.add_resource(EnterpriseWorkspaceNoOwnerEmail, "/enterprise/workspace/ownerless")

View File

@@ -7,4 +7,4 @@ api = ExternalApi(bp)
from . import index
from .app import app, audio, completion, conversation, file, message, workflow
from .dataset import dataset, document, hit_testing, segment, upload_file
from .dataset import dataset, document, hit_testing, segment

View File

@@ -31,11 +31,8 @@ class DatasetListApi(DatasetApiResource):
# provider = request.args.get("provider", default="vendor")
search = request.args.get("keyword", default=None, type=str)
tag_ids = request.args.getlist("tag_ids")
include_all = request.args.get("include_all", default="false").lower() == "true"
datasets, total = DatasetService.get_datasets(
page, limit, tenant_id, current_user, search, tag_ids, include_all
)
datasets, total = DatasetService.get_datasets(page, limit, tenant_id, current_user, search, tag_ids)
# check embedding setting
provider_manager = ProviderManager()
configurations = provider_manager.get_configurations(tenant_id=current_user.current_tenant_id)

View File

@@ -18,7 +18,6 @@ from controllers.service_api.app.error import (
from controllers.service_api.dataset.error import (
ArchivedDocumentImmutableError,
DocumentIndexingError,
InvalidMetadataError,
)
from controllers.service_api.wraps import DatasetApiResource, cloud_edition_billing_resource_check
from core.errors.error import ProviderTokenNotInitError
@@ -51,9 +50,6 @@ class DocumentAddByTextApi(DatasetApiResource):
"indexing_technique", type=str, choices=Dataset.INDEXING_TECHNIQUE_LIST, nullable=False, location="json"
)
parser.add_argument("retrieval_model", type=dict, required=False, nullable=False, location="json")
parser.add_argument("doc_type", type=str, required=False, nullable=True, location="json")
parser.add_argument("doc_metadata", type=dict, required=False, nullable=True, location="json")
args = parser.parse_args()
dataset_id = str(dataset_id)
tenant_id = str(tenant_id)
@@ -65,28 +61,6 @@ class DocumentAddByTextApi(DatasetApiResource):
if not dataset.indexing_technique and not args["indexing_technique"]:
raise ValueError("indexing_technique is required.")
# Validate metadata if provided
if args.get("doc_type") or args.get("doc_metadata"):
if not args.get("doc_type") or not args.get("doc_metadata"):
raise InvalidMetadataError("Both doc_type and doc_metadata must be provided when adding metadata")
if args["doc_type"] not in DocumentService.DOCUMENT_METADATA_SCHEMA:
raise InvalidMetadataError(
"Invalid doc_type. Must be one of: " + ", ".join(DocumentService.DOCUMENT_METADATA_SCHEMA.keys())
)
if not isinstance(args["doc_metadata"], dict):
raise InvalidMetadataError("doc_metadata must be a dictionary")
# Validate metadata schema based on doc_type
if args["doc_type"] != "others":
metadata_schema = DocumentService.DOCUMENT_METADATA_SCHEMA[args["doc_type"]]
for key, value in args["doc_metadata"].items():
if key in metadata_schema and not isinstance(value, metadata_schema[key]):
raise InvalidMetadataError(f"Invalid type for metadata field {key}")
# set to MetaDataConfig
args["metadata"] = {"doc_type": args["doc_type"], "doc_metadata": args["doc_metadata"]}
text = args.get("text")
name = args.get("name")
if text is None or name is None:
@@ -133,8 +107,6 @@ class DocumentUpdateByTextApi(DatasetApiResource):
"doc_language", type=str, default="English", required=False, nullable=False, location="json"
)
parser.add_argument("retrieval_model", type=dict, required=False, nullable=False, location="json")
parser.add_argument("doc_type", type=str, required=False, nullable=True, location="json")
parser.add_argument("doc_metadata", type=dict, required=False, nullable=True, location="json")
args = parser.parse_args()
dataset_id = str(dataset_id)
tenant_id = str(tenant_id)
@@ -143,32 +115,6 @@ class DocumentUpdateByTextApi(DatasetApiResource):
if not dataset:
raise ValueError("Dataset is not exist.")
# indexing_technique is already set in dataset since this is an update
args["indexing_technique"] = dataset.indexing_technique
# Validate metadata if provided
if args.get("doc_type") or args.get("doc_metadata"):
if not args.get("doc_type") or not args.get("doc_metadata"):
raise InvalidMetadataError("Both doc_type and doc_metadata must be provided when adding metadata")
if args["doc_type"] not in DocumentService.DOCUMENT_METADATA_SCHEMA:
raise InvalidMetadataError(
"Invalid doc_type. Must be one of: " + ", ".join(DocumentService.DOCUMENT_METADATA_SCHEMA.keys())
)
if not isinstance(args["doc_metadata"], dict):
raise InvalidMetadataError("doc_metadata must be a dictionary")
# Validate metadata schema based on doc_type
if args["doc_type"] != "others":
metadata_schema = DocumentService.DOCUMENT_METADATA_SCHEMA[args["doc_type"]]
for key, value in args["doc_metadata"].items():
if key in metadata_schema and not isinstance(value, metadata_schema[key]):
raise InvalidMetadataError(f"Invalid type for metadata field {key}")
# set to MetaDataConfig
args["metadata"] = {"doc_type": args["doc_type"], "doc_metadata": args["doc_metadata"]}
if args["text"]:
text = args.get("text")
name = args.get("name")
@@ -215,30 +161,6 @@ class DocumentAddByFileApi(DatasetApiResource):
args["doc_form"] = "text_model"
if "doc_language" not in args:
args["doc_language"] = "English"
# Validate metadata if provided
if args.get("doc_type") or args.get("doc_metadata"):
if not args.get("doc_type") or not args.get("doc_metadata"):
raise InvalidMetadataError("Both doc_type and doc_metadata must be provided when adding metadata")
if args["doc_type"] not in DocumentService.DOCUMENT_METADATA_SCHEMA:
raise InvalidMetadataError(
"Invalid doc_type. Must be one of: " + ", ".join(DocumentService.DOCUMENT_METADATA_SCHEMA.keys())
)
if not isinstance(args["doc_metadata"], dict):
raise InvalidMetadataError("doc_metadata must be a dictionary")
# Validate metadata schema based on doc_type
if args["doc_type"] != "others":
metadata_schema = DocumentService.DOCUMENT_METADATA_SCHEMA[args["doc_type"]]
for key, value in args["doc_metadata"].items():
if key in metadata_schema and not isinstance(value, metadata_schema[key]):
raise InvalidMetadataError(f"Invalid type for metadata field {key}")
# set to MetaDataConfig
args["metadata"] = {"doc_type": args["doc_type"], "doc_metadata": args["doc_metadata"]}
# get dataset info
dataset_id = str(dataset_id)
tenant_id = str(tenant_id)
@@ -306,29 +228,6 @@ class DocumentUpdateByFileApi(DatasetApiResource):
if "doc_language" not in args:
args["doc_language"] = "English"
# Validate metadata if provided
if args.get("doc_type") or args.get("doc_metadata"):
if not args.get("doc_type") or not args.get("doc_metadata"):
raise InvalidMetadataError("Both doc_type and doc_metadata must be provided when adding metadata")
if args["doc_type"] not in DocumentService.DOCUMENT_METADATA_SCHEMA:
raise InvalidMetadataError(
"Invalid doc_type. Must be one of: " + ", ".join(DocumentService.DOCUMENT_METADATA_SCHEMA.keys())
)
if not isinstance(args["doc_metadata"], dict):
raise InvalidMetadataError("doc_metadata must be a dictionary")
# Validate metadata schema based on doc_type
if args["doc_type"] != "others":
metadata_schema = DocumentService.DOCUMENT_METADATA_SCHEMA[args["doc_type"]]
for key, value in args["doc_metadata"].items():
if key in metadata_schema and not isinstance(value, metadata_schema[key]):
raise InvalidMetadataError(f"Invalid type for metadata field {key}")
# set to MetaDataConfig
args["metadata"] = {"doc_type": args["doc_type"], "doc_metadata": args["doc_metadata"]}
# get dataset info
dataset_id = str(dataset_id)
tenant_id = str(tenant_id)

View File

@@ -53,7 +53,8 @@ class SegmentApi(DatasetApiResource):
)
except LLMBadRequestError:
raise ProviderNotInitializeError(
"No Embedding Model available. Please configure a valid provider in the Settings -> Model Provider."
"No Embedding Model available. Please configure a valid provider "
"in the Settings -> Model Provider."
)
except ProviderTokenNotInitError as ex:
raise ProviderNotInitializeError(ex.description)
@@ -94,7 +95,8 @@ class SegmentApi(DatasetApiResource):
)
except LLMBadRequestError:
raise ProviderNotInitializeError(
"No Embedding Model available. Please configure a valid provider in the Settings -> Model Provider."
"No Embedding Model available. Please configure a valid provider "
"in the Settings -> Model Provider."
)
except ProviderTokenNotInitError as ex:
raise ProviderNotInitializeError(ex.description)
@@ -173,7 +175,8 @@ class DatasetSegmentApi(DatasetApiResource):
)
except LLMBadRequestError:
raise ProviderNotInitializeError(
"No Embedding Model available. Please configure a valid provider in the Settings -> Model Provider."
"No Embedding Model available. Please configure a valid provider "
"in the Settings -> Model Provider."
)
except ProviderTokenNotInitError as ex:
raise ProviderNotInitializeError(ex.description)

View File

@@ -1,54 +0,0 @@
from werkzeug.exceptions import NotFound
from controllers.service_api import api
from controllers.service_api.wraps import (
DatasetApiResource,
)
from core.file import helpers as file_helpers
from extensions.ext_database import db
from models.dataset import Dataset
from models.model import UploadFile
from services.dataset_service import DocumentService
class UploadFileApi(DatasetApiResource):
def get(self, tenant_id, dataset_id, document_id):
"""Get upload file."""
# check dataset
dataset_id = str(dataset_id)
tenant_id = str(tenant_id)
dataset = db.session.query(Dataset).filter(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first()
if not dataset:
raise NotFound("Dataset not found.")
# check document
document_id = str(document_id)
document = DocumentService.get_document(dataset.id, document_id)
if not document:
raise NotFound("Document not found.")
# check upload file
if document.data_source_type != "upload_file":
raise ValueError(f"Document data source type ({document.data_source_type}) is not upload_file.")
data_source_info = document.data_source_info_dict
if data_source_info and "upload_file_id" in data_source_info:
file_id = data_source_info["upload_file_id"]
upload_file = db.session.query(UploadFile).filter(UploadFile.id == file_id).first()
if not upload_file:
raise NotFound("UploadFile not found.")
else:
raise ValueError("Upload file id not found in document data source info.")
url = file_helpers.get_signed_file_url(upload_file_id=upload_file.id)
return {
"id": upload_file.id,
"name": upload_file.name,
"size": upload_file.size,
"extension": upload_file.extension,
"url": url,
"download_url": f"{url}&as_attachment=true",
"mime_type": upload_file.mime_type,
"created_by": upload_file.created_by,
"created_at": upload_file.created_at.timestamp(),
}, 200
api.add_resource(UploadFileApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/upload-file")

View File

@@ -1,5 +1,5 @@
from collections.abc import Callable
from datetime import UTC, datetime, timedelta
from datetime import UTC, datetime
from enum import Enum
from functools import wraps
from typing import Optional
@@ -8,8 +8,6 @@ from flask import current_app, request
from flask_login import user_logged_in # type: ignore
from flask_restful import Resource # type: ignore
from pydantic import BaseModel
from sqlalchemy import select, update
from sqlalchemy.orm import Session
from werkzeug.exceptions import Forbidden, Unauthorized
from extensions.ext_database import db
@@ -176,7 +174,7 @@ def validate_dataset_token(view=None):
return decorator
def validate_and_get_api_token(scope: str | None = None):
def validate_and_get_api_token(scope=None):
"""
Validate and get API token.
"""
@@ -190,29 +188,20 @@ def validate_and_get_api_token(scope: str | None = None):
if auth_scheme != "bearer":
raise Unauthorized("Authorization scheme must be 'Bearer'")
current_time = datetime.now(UTC).replace(tzinfo=None)
cutoff_time = current_time - timedelta(minutes=1)
with Session(db.engine, expire_on_commit=False) as session:
update_stmt = (
update(ApiToken)
.where(
ApiToken.token == auth_token,
(ApiToken.last_used_at.is_(None) | (ApiToken.last_used_at < cutoff_time)),
ApiToken.type == scope,
)
.values(last_used_at=current_time)
.returning(ApiToken)
api_token = (
db.session.query(ApiToken)
.filter(
ApiToken.token == auth_token,
ApiToken.type == scope,
)
result = session.execute(update_stmt)
api_token = result.scalar_one_or_none()
.first()
)
if not api_token:
stmt = select(ApiToken).where(ApiToken.token == auth_token, ApiToken.type == scope)
api_token = session.scalar(stmt)
if not api_token:
raise Unauthorized("Access token is invalid")
else:
session.commit()
if not api_token:
raise Unauthorized("Access token is invalid")
api_token.last_used_at = datetime.now(UTC).replace(tzinfo=None)
db.session.commit()
return api_token
@@ -240,7 +229,7 @@ def create_or_update_end_user_for_user_id(app_model: App, user_id: Optional[str]
tenant_id=app_model.tenant_id,
app_id=app_model.id,
type="service_api",
is_anonymous=user_id == "DEFAULT-USER",
is_anonymous=True if user_id == "DEFAULT-USER" else False,
session_id=user_id,
)
db.session.add(end_user)

View File

@@ -39,7 +39,7 @@ class ConversationListApi(WebApiResource):
pinned = None
if "pinned" in args and args["pinned"] is not None:
pinned = args["pinned"] == "true"
pinned = True if args["pinned"] == "true" else False
try:
with Session(db.engine) as session:

View File

@@ -91,7 +91,7 @@ class MessageListApi(WebApiResource):
try:
return MessageService.pagination_by_first_id(
app_model, end_user, args["conversation_id"], args["first_id"], args["limit"]
app_model, end_user, args["conversation_id"], args["first_id"], args["limit"], "desc"
)
except services.errors.conversation.ConversationNotExistsError:
raise NotFound("Conversation Not Exists.")

View File

@@ -104,6 +104,7 @@ class CotAgentRunner(BaseAgentRunner, ABC):
# recalc llm max tokens
prompt_messages = self._organize_prompt_messages()
self.recalc_llm_max_tokens(self.model_config, prompt_messages)
# invoke model
chunks = model_instance.invoke_llm(
prompt_messages=prompt_messages,
@@ -171,7 +172,7 @@ class CotAgentRunner(BaseAgentRunner, ABC):
self.save_agent_thought(
agent_thought=agent_thought,
tool_name=(scratchpad.action.action_name if scratchpad.action and not scratchpad.is_final() else ""),
tool_name=scratchpad.action.action_name if scratchpad.action else "",
tool_input={scratchpad.action.action_name: scratchpad.action.action_input} if scratchpad.action else {},
tool_invoke_meta={},
thought=scratchpad.thought or "",

View File

@@ -84,6 +84,7 @@ class FunctionCallAgentRunner(BaseAgentRunner):
# recalc llm max tokens
prompt_messages = self._organize_prompt_messages()
self.recalc_llm_max_tokens(self.model_config, prompt_messages)
# invoke model
chunks: Union[Generator[LLMResultChunk, None, None], LLMResult] = model_instance.invoke_llm(
prompt_messages=prompt_messages,

View File

@@ -55,6 +55,20 @@ class AgentChatAppRunner(AppRunner):
query = application_generate_entity.query
files = application_generate_entity.files
# Pre-calculate the number of tokens of the prompt messages,
# and return the rest number of tokens by model context token size limit and max token size limit.
# If the rest number of tokens is not enough, raise exception.
# Include: prompt template, inputs, query(optional), files(optional)
# Not Include: memory, external data, dataset context
self.get_pre_calculate_rest_tokens(
app_record=app_record,
model_config=application_generate_entity.model_conf,
prompt_template_entity=app_config.prompt_template,
inputs=inputs,
files=files,
query=query,
)
memory = None
if application_generate_entity.conversation_id:
# get memory of conversation (read-only)
@@ -188,7 +202,7 @@ class AgentChatAppRunner(AppRunner):
# change function call strategy based on LLM model
llm_model = cast(LargeLanguageModel, model_instance.model_type_instance)
model_schema = llm_model.get_model_schema(model_instance.model, model_instance.credentials)
if not model_schema:
if not model_schema or not model_schema.features:
raise ValueError("Model schema not found")
if {ModelFeature.MULTI_TOOL_CALL, ModelFeature.TOOL_CALL}.intersection(model_schema.features or []):

View File

@@ -167,7 +167,8 @@ class AppQueueManager:
else:
if isinstance(data, DeclarativeMeta) or hasattr(data, "_sa_instance_state"):
raise TypeError(
"Critical Error: Passing SQLAlchemy Model instances that cause thread safety issues is not allowed."
"Critical Error: Passing SQLAlchemy Model instances "
"that cause thread safety issues is not allowed."
)

View File

@@ -15,8 +15,10 @@ from core.app.features.annotation_reply.annotation_reply import AnnotationReplyF
from core.app.features.hosting_moderation.hosting_moderation import HostingModerationFeature
from core.external_data_tool.external_data_fetch import ExternalDataFetch
from core.memory.token_buffer_memory import TokenBufferMemory
from core.model_manager import ModelInstance
from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
from core.model_runtime.entities.message_entities import AssistantPromptMessage, PromptMessage
from core.model_runtime.entities.model_entities import ModelPropertyKey
from core.model_runtime.errors.invoke import InvokeBadRequestError
from core.moderation.input_moderation import InputModeration
from core.prompt.advanced_prompt_transform import AdvancedPromptTransform
@@ -29,6 +31,106 @@ if TYPE_CHECKING:
class AppRunner:
def get_pre_calculate_rest_tokens(
self,
app_record: App,
model_config: ModelConfigWithCredentialsEntity,
prompt_template_entity: PromptTemplateEntity,
inputs: Mapping[str, str],
files: Sequence["File"],
query: Optional[str] = None,
) -> int:
"""
Get pre calculate rest tokens
:param app_record: app record
:param model_config: model config entity
:param prompt_template_entity: prompt template entity
:param inputs: inputs
:param files: files
:param query: query
:return:
"""
# Invoke model
model_instance = ModelInstance(
provider_model_bundle=model_config.provider_model_bundle, model=model_config.model
)
model_context_tokens = model_config.model_schema.model_properties.get(ModelPropertyKey.CONTEXT_SIZE)
max_tokens = 0
for parameter_rule in model_config.model_schema.parameter_rules:
if parameter_rule.name == "max_tokens" or (
parameter_rule.use_template and parameter_rule.use_template == "max_tokens"
):
max_tokens = (
model_config.parameters.get(parameter_rule.name)
or model_config.parameters.get(parameter_rule.use_template or "")
) or 0
if model_context_tokens is None:
return -1
if max_tokens is None:
max_tokens = 0
# get prompt messages without memory and context
prompt_messages, stop = self.organize_prompt_messages(
app_record=app_record,
model_config=model_config,
prompt_template_entity=prompt_template_entity,
inputs=inputs,
files=files,
query=query,
)
prompt_tokens = model_instance.get_llm_num_tokens(prompt_messages)
rest_tokens: int = model_context_tokens - max_tokens - prompt_tokens
if rest_tokens < 0:
raise InvokeBadRequestError(
"Query or prefix prompt is too long, you can reduce the prefix prompt, "
"or shrink the max token, or switch to a llm with a larger token limit size."
)
return rest_tokens
def recalc_llm_max_tokens(
self, model_config: ModelConfigWithCredentialsEntity, prompt_messages: list[PromptMessage]
):
# recalc max_tokens if sum(prompt_token + max_tokens) over model token limit
model_instance = ModelInstance(
provider_model_bundle=model_config.provider_model_bundle, model=model_config.model
)
model_context_tokens = model_config.model_schema.model_properties.get(ModelPropertyKey.CONTEXT_SIZE)
max_tokens = 0
for parameter_rule in model_config.model_schema.parameter_rules:
if parameter_rule.name == "max_tokens" or (
parameter_rule.use_template and parameter_rule.use_template == "max_tokens"
):
max_tokens = (
model_config.parameters.get(parameter_rule.name)
or model_config.parameters.get(parameter_rule.use_template or "")
) or 0
if model_context_tokens is None:
return -1
if max_tokens is None:
max_tokens = 0
prompt_tokens = model_instance.get_llm_num_tokens(prompt_messages)
if prompt_tokens + max_tokens > model_context_tokens:
max_tokens = max(model_context_tokens - prompt_tokens, 16)
for parameter_rule in model_config.model_schema.parameter_rules:
if parameter_rule.name == "max_tokens" or (
parameter_rule.use_template and parameter_rule.use_template == "max_tokens"
):
model_config.parameters[parameter_rule.name] = max_tokens
def organize_prompt_messages(
self,
app_record: App,

View File

@@ -50,6 +50,20 @@ class ChatAppRunner(AppRunner):
query = application_generate_entity.query
files = application_generate_entity.files
# Pre-calculate the number of tokens of the prompt messages,
# and return the rest number of tokens by model context token size limit and max token size limit.
# If the rest number of tokens is not enough, raise exception.
# Include: prompt template, inputs, query(optional), files(optional)
# Not Include: memory, external data, dataset context
self.get_pre_calculate_rest_tokens(
app_record=app_record,
model_config=application_generate_entity.model_conf,
prompt_template_entity=app_config.prompt_template,
inputs=inputs,
files=files,
query=query,
)
memory = None
if application_generate_entity.conversation_id:
# get memory of conversation (read-only)
@@ -180,6 +194,9 @@ class ChatAppRunner(AppRunner):
if hosting_moderation_result:
return
# Re-calculate the max tokens if sum(prompt_token + max_tokens) over model token limit
self.recalc_llm_max_tokens(model_config=application_generate_entity.model_conf, prompt_messages=prompt_messages)
# Invoke model
model_instance = ModelInstance(
provider_model_bundle=application_generate_entity.model_conf.provider_model_bundle,

View File

@@ -43,6 +43,20 @@ class CompletionAppRunner(AppRunner):
query = application_generate_entity.query
files = application_generate_entity.files
# Pre-calculate the number of tokens of the prompt messages,
# and return the rest number of tokens by model context token size limit and max token size limit.
# If the rest number of tokens is not enough, raise exception.
# Include: prompt template, inputs, query(optional), files(optional)
# Not Include: memory, external data, dataset context
self.get_pre_calculate_rest_tokens(
app_record=app_record,
model_config=application_generate_entity.model_conf,
prompt_template_entity=app_config.prompt_template,
inputs=inputs,
files=files,
query=query,
)
# organize all inputs and template to prompt messages
# Include: prompt template, inputs, query(optional), files(optional)
prompt_messages, stop = self.organize_prompt_messages(
@@ -138,6 +152,9 @@ class CompletionAppRunner(AppRunner):
if hosting_moderation_result:
return
# Re-calculate the max tokens if sum(prompt_token + max_tokens) over model token limit
self.recalc_llm_max_tokens(model_config=application_generate_entity.model_conf, prompt_messages=prompt_messages)
# Invoke model
model_instance = ModelInstance(
provider_model_bundle=application_generate_entity.model_conf.provider_model_bundle,

View File

@@ -89,7 +89,6 @@ class MessageBasedAppGenerator(BaseAppGenerator):
Conversation.id == conversation_id,
Conversation.app_id == app_model.id,
Conversation.status == "normal",
Conversation.is_deleted.is_(False),
]
if isinstance(user, Account):

View File

@@ -145,7 +145,7 @@ class MessageCycleManage:
# get extension
if "." in message_file.url:
extension = f".{message_file.url.split('.')[-1]}"
extension = f'.{message_file.url.split(".")[-1]}'
if len(extension) > 10:
extension = ".bin"
else:

View File

@@ -62,9 +62,8 @@ class ApiExternalDataTool(ExternalDataTool):
if not api_based_extension:
raise ValueError(
"[External data tool] API query failed, variable: {}, error: api_based_extension_id is invalid".format(
self.variable
)
"[External data tool] API query failed, variable: {}, "
"error: api_based_extension_id is invalid".format(self.variable)
)
# decrypt api_key

View File

@@ -90,7 +90,7 @@ class File(BaseModel):
def markdown(self) -> str:
url = self.generate_url()
if self.type == FileType.IMAGE:
text = f"![{self.filename or ''}]({url})"
text = f'![{self.filename or ""}]({url})'
else:
text = f"[{self.filename or url}]({url})"

View File

@@ -11,6 +11,15 @@ from configs import dify_config
SSRF_DEFAULT_MAX_RETRIES = dify_config.SSRF_DEFAULT_MAX_RETRIES
proxy_mounts = (
{
"http://": httpx.HTTPTransport(proxy=dify_config.SSRF_PROXY_HTTP_URL),
"https://": httpx.HTTPTransport(proxy=dify_config.SSRF_PROXY_HTTPS_URL),
}
if dify_config.SSRF_PROXY_HTTP_URL and dify_config.SSRF_PROXY_HTTPS_URL
else None
)
BACKOFF_FACTOR = 0.5
STATUS_FORCELIST = [429, 500, 502, 503, 504]
@@ -42,11 +51,7 @@ def make_request(method, url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
if dify_config.SSRF_PROXY_ALL_URL:
with httpx.Client(proxy=dify_config.SSRF_PROXY_ALL_URL) as client:
response = client.request(method=method, url=url, **kwargs)
elif dify_config.SSRF_PROXY_HTTP_URL and dify_config.SSRF_PROXY_HTTPS_URL:
proxy_mounts = {
"http://": httpx.HTTPTransport(proxy=dify_config.SSRF_PROXY_HTTP_URL),
"https://": httpx.HTTPTransport(proxy=dify_config.SSRF_PROXY_HTTPS_URL),
}
elif proxy_mounts:
with httpx.Client(mounts=proxy_mounts) as client:
response = client.request(method=method, url=url, **kwargs)
else:

View File

@@ -530,6 +530,7 @@ class IndexingRunner:
# chunk nodes by chunk size
indexing_start_at = time.perf_counter()
tokens = 0
chunk_size = 10
if dataset_document.doc_form != IndexType.PARENT_CHILD_INDEX:
# create keyword index
create_keyword_thread = threading.Thread(
@@ -538,22 +539,11 @@ class IndexingRunner:
)
create_keyword_thread.start()
max_workers = 10
if dataset.indexing_technique == "high_quality":
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
futures = []
# Distribute documents into multiple groups based on the hash values of page_content
# This is done to prevent multiple threads from processing the same document,
# Thereby avoiding potential database insertion deadlocks
document_groups: list[list[Document]] = [[] for _ in range(max_workers)]
for document in documents:
hash = helper.generate_text_hash(document.page_content)
group_index = int(hash, 16) % max_workers
document_groups[group_index].append(document)
for chunk_documents in document_groups:
if len(chunk_documents) == 0:
continue
for i in range(0, len(documents), chunk_size):
chunk_documents = documents[i : i + chunk_size]
futures.append(
executor.submit(
self._process_chunk,

View File

@@ -131,7 +131,7 @@ JAVASCRIPT_CODE_GENERATOR_PROMPT_TEMPLATE = (
SUGGESTED_QUESTIONS_AFTER_ANSWER_INSTRUCTION_PROMPT = (
"Please help me predict the three most likely questions that human would ask, "
"and keeping each question under 20 characters.\n"
"MAKE SURE your output is the SAME language as the Assistant's latest response. "
"MAKE SURE your output is the SAME language as the Assistant's latest response"
"The output must be an array in JSON format following the specified schema:\n"
'["question1","question2","question3"]\n'
)

View File

@@ -26,7 +26,7 @@ class TokenBufferMemory:
self.model_instance = model_instance
def get_history_prompt_messages(
self, max_token_limit: int = 100000, message_limit: Optional[int] = None
self, max_token_limit: int = 2000, message_limit: Optional[int] = None
) -> Sequence[PromptMessage]:
"""
Get history prompt messages.

View File

@@ -1,4 +1,4 @@
from .llm_entities import LLMMode, LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
from .llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
from .message_entities import (
AssistantPromptMessage,
AudioPromptMessageContent,
@@ -23,7 +23,6 @@ __all__ = [
"AudioPromptMessageContent",
"DocumentPromptMessageContent",
"ImagePromptMessageContent",
"LLMMode",
"LLMResult",
"LLMResultChunk",
"LLMResultChunkDelta",

View File

@@ -1,5 +1,5 @@
from decimal import Decimal
from enum import StrEnum
from enum import Enum
from typing import Optional
from pydantic import BaseModel
@@ -8,7 +8,7 @@ from core.model_runtime.entities.message_entities import AssistantPromptMessage,
from core.model_runtime.entities.model_entities import ModelUsage, PriceInfo
class LLMMode(StrEnum):
class LLMMode(Enum):
"""
Enum class for large language model mode.
"""

View File

@@ -221,12 +221,13 @@ class AIModel(ABC):
:param credentials: model credentials
:return: model schema
"""
# Try to get model schema from predefined models
for predefined_model in self.predefined_models():
if model == predefined_model.model:
return predefined_model
# get predefined models (predefined_models)
models = self.predefined_models()
model_map = {model.model: model for model in models}
if model in model_map:
return model_map[model]
# Try to get model schema from credentials
if credentials:
model_schema = self.get_customizable_model_schema_from_credentials(model, credentials)
if model_schema:

View File

@@ -30,11 +30,6 @@ from core.model_runtime.model_providers.__base.ai_model import AIModel
logger = logging.getLogger(__name__)
HTML_THINKING_TAG = (
'<details style="color:gray;background-color: #f8f8f8;padding: 8px;border-radius: 4px;" open> '
"<summary> Thinking... </summary>"
)
class LargeLanguageModel(AIModel):
"""
@@ -405,40 +400,6 @@ if you are not sure about the structure.
),
)
def _wrap_thinking_by_reasoning_content(self, delta: dict, is_reasoning: bool) -> tuple[str, bool]:
"""
If the reasoning response is from delta.get("reasoning_content"), we wrap
it with HTML details tag.
:param delta: delta dictionary from LLM streaming response
:param is_reasoning: is reasoning
:return: tuple of (processed_content, is_reasoning)
"""
content = delta.get("content") or ""
reasoning_content = delta.get("reasoning_content")
if reasoning_content:
if not is_reasoning:
content = HTML_THINKING_TAG + reasoning_content
is_reasoning = True
else:
content = reasoning_content
elif is_reasoning:
content = "</details>" + content
is_reasoning = False
return content, is_reasoning
def _wrap_thinking_by_tag(self, content: str) -> str:
"""
if the reasoning response is a <think>...</think> block from delta.get("content"),
we replace <think> to <detail>.
:param content: delta.get("content")
:return: processed_content
"""
return content.replace("<think>", HTML_THINKING_TAG).replace("</think>", "</details>")
def _invoke_result_generator(
self,
model: str,

View File

@@ -1,11 +1,13 @@
import logging
from concurrent.futures import ProcessPoolExecutor
from os.path import abspath, dirname, join
from threading import Lock
from typing import Any
from typing import Any, cast
logger = logging.getLogger(__name__)
from transformers import GPT2Tokenizer as TransformerGPT2Tokenizer # type: ignore
_tokenizer: Any = None
_lock = Lock()
_executor = ProcessPoolExecutor(max_workers=1)
class GPT2Tokenizer:
@@ -15,37 +17,22 @@ class GPT2Tokenizer:
use gpt2 tokenizer to get num tokens
"""
_tokenizer = GPT2Tokenizer.get_encoder()
tokens = _tokenizer.encode(text)
tokens = _tokenizer.encode(text, verbose=False)
return len(tokens)
@staticmethod
def get_num_tokens(text: str) -> int:
# Because this process needs more cpu resource, we turn this back before we find a better way to handle it.
#
# future = _executor.submit(GPT2Tokenizer._get_num_tokens_by_gpt2, text)
# result = future.result()
# return cast(int, result)
return GPT2Tokenizer._get_num_tokens_by_gpt2(text)
future = _executor.submit(GPT2Tokenizer._get_num_tokens_by_gpt2, text)
result = future.result()
return cast(int, result)
@staticmethod
def get_encoder() -> Any:
global _tokenizer, _lock
with _lock:
if _tokenizer is None:
# Try to use tiktoken to get the tokenizer because it is faster
#
try:
import tiktoken
_tokenizer = tiktoken.get_encoding("gpt2")
except Exception:
from os.path import abspath, dirname, join
from transformers import GPT2Tokenizer as TransformerGPT2Tokenizer # type: ignore
base_path = abspath(__file__)
gpt2_tokenizer_path = join(dirname(base_path), "gpt2")
_tokenizer = TransformerGPT2Tokenizer.from_pretrained(gpt2_tokenizer_path)
logger.info("Fallback to Transformers' GPT-2 tokenizer from tiktoken")
base_path = abspath(__file__)
gpt2_tokenizer_path = join(dirname(base_path), "gpt2")
_tokenizer = TransformerGPT2Tokenizer.from_pretrained(gpt2_tokenizer_path)
return _tokenizer

View File

@@ -1,5 +1,4 @@
- openai
- deepseek
- anthropic
- azure_openai
- google
@@ -33,6 +32,7 @@
- localai
- volcengine_maas
- openai_api_compatible
- deepseek
- hunyuan
- siliconflow
- perfxcloud

View File

@@ -51,40 +51,6 @@ model_credential_schema:
show_on:
- variable: __model_type
value: llm
- variable: mode
show_on:
- variable: __model_type
value: llm
label:
en_US: Completion mode
type: select
required: false
default: chat
placeholder:
zh_Hans: 选择对话类型
en_US: Select completion mode
options:
- value: completion
label:
en_US: Completion
zh_Hans: 补全
- value: chat
label:
en_US: Chat
zh_Hans: 对话
- variable: context_size
label:
zh_Hans: 模型上下文长度
en_US: Model context size
required: true
show_on:
- variable: __model_type
value: llm
type: text-input
default: "4096"
placeholder:
zh_Hans: 在此输入您的模型上下文长度
en_US: Enter your Model context size
- variable: jwt_token
required: true
label:

View File

@@ -1,9 +1,9 @@
import logging
from collections.abc import Generator, Sequence
from collections.abc import Generator
from typing import Any, Optional, Union
from azure.ai.inference import ChatCompletionsClient
from azure.ai.inference.models import StreamingChatCompletionsUpdate, SystemMessage, UserMessage
from azure.ai.inference.models import StreamingChatCompletionsUpdate
from azure.core.credentials import AzureKeyCredential
from azure.core.exceptions import (
ClientAuthenticationError,
@@ -20,7 +20,7 @@ from azure.core.exceptions import (
)
from core.model_runtime.callbacks.base_callback import Callback
from core.model_runtime.entities.llm_entities import LLMMode, LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
from core.model_runtime.entities.message_entities import (
AssistantPromptMessage,
PromptMessage,
@@ -30,7 +30,6 @@ from core.model_runtime.entities.model_entities import (
AIModelEntity,
FetchFrom,
I18nObject,
ModelPropertyKey,
ModelType,
ParameterRule,
ParameterType,
@@ -61,10 +60,10 @@ class AzureAIStudioLargeLanguageModel(LargeLanguageModel):
self,
model: str,
credentials: dict,
prompt_messages: Sequence[PromptMessage],
prompt_messages: list[PromptMessage],
model_parameters: dict,
tools: Optional[Sequence[PromptMessageTool]] = None,
stop: Optional[Sequence[str]] = None,
tools: Optional[list[PromptMessageTool]] = None,
stop: Optional[list[str]] = None,
stream: bool = True,
user: Optional[str] = None,
) -> Union[LLMResult, Generator]:
@@ -83,8 +82,8 @@ class AzureAIStudioLargeLanguageModel(LargeLanguageModel):
"""
if not self.client:
endpoint = str(credentials.get("endpoint"))
api_key = str(credentials.get("api_key"))
endpoint = credentials.get("endpoint")
api_key = credentials.get("api_key")
self.client = ChatCompletionsClient(endpoint=endpoint, credential=AzureKeyCredential(api_key))
messages = [{"role": msg.role.value, "content": msg.content} for msg in prompt_messages]
@@ -95,7 +94,6 @@ class AzureAIStudioLargeLanguageModel(LargeLanguageModel):
"temperature": model_parameters.get("temperature", 0),
"top_p": model_parameters.get("top_p", 1),
"stream": stream,
"model": model,
}
if stop:
@@ -257,16 +255,10 @@ class AzureAIStudioLargeLanguageModel(LargeLanguageModel):
:return:
"""
try:
endpoint = str(credentials.get("endpoint"))
api_key = str(credentials.get("api_key"))
endpoint = credentials.get("endpoint")
api_key = credentials.get("api_key")
client = ChatCompletionsClient(endpoint=endpoint, credential=AzureKeyCredential(api_key))
client.complete(
messages=[
SystemMessage(content="I say 'ping', you say 'pong'"),
UserMessage(content="ping"),
],
model=model,
)
client.get_model_info()
except Exception as ex:
raise CredentialsValidateFailedError(str(ex))
@@ -335,10 +327,7 @@ class AzureAIStudioLargeLanguageModel(LargeLanguageModel):
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
model_type=ModelType.LLM,
features=[],
model_properties={
ModelPropertyKey.CONTEXT_SIZE: int(credentials.get("context_size", "4096")),
ModelPropertyKey.MODE: credentials.get("mode", LLMMode.CHAT),
},
model_properties={},
parameter_rules=rules,
)

View File

@@ -53,9 +53,6 @@ model_credential_schema:
type: select
required: true
options:
- label:
en_US: 2024-12-01-preview
value: 2024-12-01-preview
- label:
en_US: 2024-10-01-preview
value: 2024-10-01-preview
@@ -138,18 +135,6 @@ model_credential_schema:
show_on:
- variable: __model_type
value: llm
- label:
en_US: o3-mini
value: o3-mini
show_on:
- variable: __model_type
value: llm
- label:
en_US: o3-mini-2025-01-31
value: o3-mini-2025-01-31
show_on:
- variable: __model_type
value: llm
- label:
en_US: o1-preview
value: o1-preview

View File

@@ -108,7 +108,7 @@ class AzureOpenAILargeLanguageModel(_CommonAzureOpenAI, LargeLanguageModel):
ai_model_entity = self._get_ai_model_entity(base_model_name=base_model_name, model=model)
if not ai_model_entity:
raise CredentialsValidateFailedError(f"Base Model Name {credentials['base_model_name']} is invalid")
raise CredentialsValidateFailedError(f'Base Model Name {credentials["base_model_name"]} is invalid')
try:
client = AzureOpenAI(**self._to_credential_kwargs(credentials))

View File

@@ -130,7 +130,7 @@ class AzureOpenAITextEmbeddingModel(_CommonAzureOpenAI, TextEmbeddingModel):
raise CredentialsValidateFailedError("Base Model Name is required")
if not self._get_ai_model_entity(credentials["base_model_name"], model):
raise CredentialsValidateFailedError(f"Base Model Name {credentials['base_model_name']} is invalid")
raise CredentialsValidateFailedError(f'Base Model Name {credentials["base_model_name"]} is invalid')
try:
credentials_kwargs = self._to_credential_kwargs(credentials)

View File

@@ -44,7 +44,6 @@ provider_credential_schema:
label:
en_US: AWS Region
zh_Hans: AWS 地区
ja_JP: AWS リージョン
type: select
default: us-east-1
options:
@@ -52,86 +51,62 @@ provider_credential_schema:
label:
en_US: US East (N. Virginia)
zh_Hans: 美国东部 (弗吉尼亚北部)
ja_JP: 米国 (バージニア北部)
- value: us-east-2
label:
en_US: US East (Ohio)
zh_Hans: 美国东部 (俄亥俄)
ja_JP: 米国 (オハイオ)
zh_Hans: 美国东部 (弗吉尼亚北部)
- value: us-west-2
label:
en_US: US West (Oregon)
zh_Hans: 美国西部 (俄勒冈州)
ja_JP: 米国 (オレゴン)
- value: ap-south-1
label:
en_US: Asia Pacific (Mumbai)
zh_Hans: 亚太地区(孟买)
ja_JP: アジアパシフィック (ムンバイ)
- value: ap-southeast-1
label:
en_US: Asia Pacific (Singapore)
zh_Hans: 亚太地区 (新加坡)
ja_JP: アジアパシフィック (シンガポール)
- value: ap-southeast-2
label:
en_US: Asia Pacific (Sydney)
zh_Hans: 亚太地区 (悉尼)
ja_JP: アジアパシフィック (シドニー)
- value: ap-northeast-1
label:
en_US: Asia Pacific (Tokyo)
zh_Hans: 亚太地区 (东京)
ja_JP: アジアパシフィック (東京)
- value: ap-northeast-2
label:
en_US: Asia Pacific (Seoul)
zh_Hans: 亚太地区(首尔)
ja_JP: アジアパシフィック (ソウル)
- value: ca-central-1
label:
en_US: Canada (Central)
zh_Hans: 加拿大(中部)
ja_JP: カナダ (中部)
- value: eu-central-1
label:
en_US: Europe (Frankfurt)
zh_Hans: 欧洲 (法兰克福)
ja_JP: 欧州 (フランクフルト)
- value: eu-west-1
label:
en_US: Europe (Ireland)
zh_Hans: 欧洲(爱尔兰)
ja_JP: 欧州 (アイルランド)
- value: eu-west-2
label:
en_US: Europe (London)
zh_Hans: 欧洲西部 (伦敦)
ja_JP: 欧州 (ロンドン)
- value: eu-west-3
label:
en_US: Europe (Paris)
zh_Hans: 欧洲(巴黎)
ja_JP: 欧州 (パリ)
- value: sa-east-1
label:
en_US: South America (São Paulo)
zh_Hans: 南美洲(圣保罗)
ja_JP: 南米 (サンパウロ)
- value: us-gov-west-1
label:
en_US: AWS GovCloud (US-West)
zh_Hans: AWS GovCloud (US-West)
ja_JP: AWS GovCloud (米国西部)
- variable: bedrock_endpoint_url
label:
zh_Hans: Bedrock Endpoint URL
en_US: Bedrock Endpoint URL
type: text-input
required: false
placeholder:
zh_Hans: 在此输入您的 Bedrock Endpoint URL, 如https://123456.cloudfront.net
en_US: Enter your Bedrock Endpoint URL, e.g. https://123456.cloudfront.net
- variable: model_for_validation
required: false
label:

View File

@@ -13,7 +13,6 @@ def get_bedrock_client(service_name: str, credentials: Mapping[str, str]):
client_config = Config(region_name=region_name)
aws_access_key_id = credentials.get("aws_access_key_id")
aws_secret_access_key = credentials.get("aws_secret_access_key")
bedrock_endpoint_url = credentials.get("bedrock_endpoint_url")
if aws_access_key_id and aws_secret_access_key:
# use aksk to call bedrock
@@ -22,7 +21,6 @@ def get_bedrock_client(service_name: str, credentials: Mapping[str, str]):
config=client_config,
aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key,
**({"endpoint_url": bedrock_endpoint_url} if bedrock_endpoint_url else {}),
)
else:
# use iam without aksk to call

View File

@@ -1,115 +0,0 @@
model: us.anthropic.claude-3-7-sonnet-20250219-v1:0
label:
en_US: Claude 3.7 Sonnet(US.Cross Region Inference)
icon: icon_s_en.svg
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 200000
# docs: https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html
parameter_rules:
- name: enable_cache
label:
zh_Hans: 启用提示缓存
en_US: Enable Prompt Cache
type: boolean
required: false
default: true
help:
zh_Hans: 启用提示缓存可以提高性能并降低成本。Claude 3.7 Sonnet支持在system、messages和tools字段中使用缓存检查点。
en_US: Enable prompt caching to improve performance and reduce costs. Claude 3.7 Sonnet supports cache checkpoints in system, messages, and tools fields.
- name: reasoning_type
label:
zh_Hans: 推理配置
en_US: Reasoning Type
type: boolean
required: false
default: false
placeholder:
zh_Hans: 设置推理配置
en_US: Set reasoning configuration
help:
zh_Hans: 控制模型的推理能力。启用时temperature将固定为1且top_p将被禁用。
en_US: Controls the model's reasoning capability. When enabled, temperature will be fixed to 1 and top_p will be disabled.
- name: reasoning_budget
show_on:
- variable: reasoning_type
value: true
label:
zh_Hans: 推理预算
en_US: Reasoning Budget
type: int
default: 1024
min: 0
max: 128000
help:
zh_Hans: 推理的预算限制最小1024必须小于max_tokens。仅在推理类型为enabled时可用。
en_US: Budget limit for reasoning (minimum 1024), must be less than max_tokens. Only available when reasoning type is enabled.
- name: max_tokens
use_template: max_tokens
required: true
label:
zh_Hans: 最大token数
en_US: Max Tokens
type: int
default: 8192
min: 1
max: 128000
help:
zh_Hans: 停止前生成的最大令牌数。请注意Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
- name: temperature
use_template: temperature
required: false
label:
zh_Hans: 模型温度
en_US: Model Temperature
type: float
default: 1
min: 0.0
max: 1.0
help:
zh_Hans: 生成内容的随机性。当推理功能启用时该值将被固定为1。
en_US: The amount of randomness injected into the response. When reasoning is enabled, this value will be fixed to 1.
- name: top_p
show_on:
- variable: reasoning_type
value: disabled
use_template: top_p
label:
zh_Hans: Top P
en_US: Top P
required: false
type: float
default: 0.999
min: 0.000
max: 1.000
help:
zh_Hans: 在核采样中的概率阈值。当推理功能启用时,该参数将被禁用。
en_US: The probability threshold in nucleus sampling. When reasoning is enabled, this parameter will be disabled.
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
required: false
type: int
default: 0
min: 0
# tip docs from aws has error, max value is 500
max: 500
help:
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
- name: response_format
use_template: response_format
pricing:
input: '0.003'
output: '0.015'
unit: '0.001'
currency: USD

View File

@@ -58,7 +58,6 @@ class BedrockLargeLanguageModel(LargeLanguageModel):
# TODO There is invoke issue: context limit on Cohere Model, will add them after fixed.
CONVERSE_API_ENABLED_MODEL_INFO = [
{"prefix": "anthropic.claude-v2", "support_system_prompts": True, "support_tool_use": False},
{"prefix": "us.deepseek", "support_system_prompts": True, "support_tool_use": False},
{"prefix": "anthropic.claude-v1", "support_system_prompts": True, "support_tool_use": False},
{"prefix": "us.anthropic.claude-3", "support_system_prompts": True, "support_tool_use": True},
{"prefix": "eu.anthropic.claude-3", "support_system_prompts": True, "support_tool_use": True},

View File

@@ -1,63 +0,0 @@
model: us.deepseek.r1-v1:0
label:
en_US: DeepSeek-R1(US.Cross Region Inference)
icon: icon_s_en.svg
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 32768
parameter_rules:
- name: max_tokens
use_template: max_tokens
required: true
label:
zh_Hans: 最大token数
en_US: Max Tokens
type: int
default: 8192
min: 1
max: 128000
help:
zh_Hans: 停止前生成的最大令牌数。
en_US: The maximum number of tokens to generate before stopping.
- name: temperature
use_template: temperature
required: false
label:
zh_Hans: 模型温度
en_US: Model Temperature
type: float
default: 1
min: 0.0
max: 1.0
help:
zh_Hans: 生成内容的随机性。当推理功能启用时该值将被固定为1。
en_US: The amount of randomness injected into the response. When reasoning is enabled, this value will be fixed to 1.
- name: top_p
show_on:
- variable: reasoning_type
value: disabled
use_template: top_p
label:
zh_Hans: Top P
en_US: Top P
required: false
type: float
default: 0.999
min: 0.000
max: 1.000
help:
zh_Hans: 在核采样中的概率阈值。当推理功能启用时,该参数将被禁用。
en_US: The probability threshold in nucleus sampling. When reasoning is enabled, this parameter will be disabled.
- name: response_format
use_template: response_format
pricing:
input: '0.001'
output: '0.005'
unit: '0.001'
currency: USD

View File

@@ -70,7 +70,7 @@ class BedrockRerankModel(RerankModel):
rerankingConfiguration = {
"type": "BEDROCK_RERANKING_MODEL",
"bedrockRerankingConfiguration": {
"numberOfResults": min(top_n, len(text_sources)),
"numberOfResults": top_n,
"modelConfiguration": {
"modelArn": model_package_arn,
},

View File

@@ -677,17 +677,16 @@ class CohereLargeLanguageModel(LargeLanguageModel):
:return: model schema
"""
mode = credentials.get("mode")
base_model_schema = None
for predefined_model in self.predefined_models():
if (
mode == "chat" and predefined_model.model == "command-light-chat"
) or predefined_model.model == "command-light":
base_model_schema = predefined_model
break
# get model schema
models = self.predefined_models()
model_map = {model.model: model for model in models}
if not base_model_schema:
raise ValueError("Model not found")
mode = credentials.get("mode")
if mode == "chat":
base_model_schema = model_map["command-light-chat"]
else:
base_model_schema = model_map["command-light"]
base_model_schema = cast(AIModelEntity, base_model_schema)

View File

@@ -1,3 +1,2 @@
- deepseek-chat
- deepseek-coder
- deepseek-reasoner

View File

@@ -10,7 +10,7 @@ features:
- stream-tool-call
model_properties:
mode: chat
context_size: 64000
context_size: 128000
parameter_rules:
- name: temperature
use_template: temperature

View File

@@ -10,7 +10,7 @@ features:
- stream-tool-call
model_properties:
mode: chat
context_size: 64000
context_size: 128000
parameter_rules:
- name: temperature
use_template: temperature

View File

@@ -1,21 +0,0 @@
model: deepseek-reasoner
label:
zh_Hans: deepseek-reasoner
en_US: deepseek-reasoner
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 64000
parameter_rules:
- name: max_tokens
use_template: max_tokens
min: 1
max: 8192
default: 4096
pricing:
input: "4"
output: "16"
unit: "0.000001"
currency: RMB

View File

@@ -24,6 +24,9 @@ class DeepseekLargeLanguageModel(OAIAPICompatLargeLanguageModel):
user: Optional[str] = None,
) -> Union[LLMResult, Generator]:
self._add_custom_parameters(credentials)
# {"response_format": "xx"} need convert to {"response_format": {"type": "xx"}}
if "response_format" in model_parameters:
model_parameters["response_format"] = {"type": model_parameters.get("response_format")}
return super()._invoke(model, credentials, prompt_messages, model_parameters, tools, stop, stream)
def validate_credentials(self, model: str, credentials: dict) -> None:

View File

@@ -19,8 +19,8 @@ class GoogleProvider(ModelProvider):
try:
model_instance = self.get_model_instance(ModelType.LLM)
# Use `gemini-2.0-flash` model for validate,
model_instance.validate_credentials(model="gemini-2.0-flash", credentials=credentials)
# Use `gemini-pro` model for validate,
model_instance.validate_credentials(model="gemini-pro", credentials=credentials)
except CredentialsValidateFailedError as ex:
raise ex
except Exception as ex:

View File

@@ -1,8 +1,5 @@
- gemini-2.0-flash-001
- gemini-2.0-flash-exp
- gemini-2.0-pro-exp-02-05
- gemini-2.0-flash-thinking-exp-1219
- gemini-2.0-flash-thinking-exp-01-21
- gemini-1.5-pro
- gemini-1.5-pro-latest
- gemini-1.5-pro-001
@@ -19,3 +16,5 @@
- gemini-exp-1206
- gemini-exp-1121
- gemini-exp-1114
- gemini-pro
- gemini-pro-vision

View File

@@ -1,41 +0,0 @@
model: gemini-2.0-flash-001
label:
en_US: Gemini 2.0 Flash 001
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
- document
- video
- audio
model_properties:
mode: chat
context_size: 1048576
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_output_tokens
use_template: max_tokens
default: 8192
min: 1
max: 8192
- name: json_schema
use_template: json_schema
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD

View File

@@ -1,39 +0,0 @@
model: gemini-2.0-flash-thinking-exp-01-21
label:
en_US: Gemini 2.0 Flash Thinking Exp 01-21
model_type: llm
features:
- agent-thought
- vision
- document
- video
- audio
model_properties:
mode: chat
context_size: 32767
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_output_tokens
use_template: max_tokens
default: 8192
min: 1
max: 8192
- name: json_schema
use_template: json_schema
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD

View File

@@ -1,41 +0,0 @@
model: gemini-2.0-pro-exp-02-05
label:
en_US: Gemini 2.0 pro exp 02-05
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
- document
- video
- audio
model_properties:
mode: chat
context_size: 1048576
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_output_tokens
use_template: max_tokens
default: 8192
min: 1
max: 8192
- name: json_schema
use_template: json_schema
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD

View File

@@ -1,18 +1,12 @@
model: gemini-exp-1114
model: gemini-pro-vision
label:
en_US: Gemini exp 1114
en_US: Gemini Pro Vision
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
- document
- video
- audio
model_properties:
mode: chat
context_size: 32767
context_size: 12288
parameter_rules:
- name: temperature
use_template: temperature
@@ -27,15 +21,15 @@ parameter_rules:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_output_tokens
- name: max_tokens_to_sample
use_template: max_tokens
default: 8192
required: true
default: 4096
min: 1
max: 8192
- name: json_schema
use_template: json_schema
max: 4096
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD
deprecated: true

View File

@@ -1,18 +1,14 @@
model: gemini-exp-1121
model: gemini-pro
label:
en_US: Gemini exp 1121
en_US: Gemini Pro
model_type: llm
features:
- agent-thought
- vision
- tool-call
- stream-tool-call
- document
- video
- audio
model_properties:
mode: chat
context_size: 32767
context_size: 30720
parameter_rules:
- name: temperature
use_template: temperature
@@ -27,15 +23,17 @@ parameter_rules:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_output_tokens
- name: max_tokens_to_sample
use_template: max_tokens
default: 8192
required: true
default: 2048
min: 1
max: 8192
- name: json_schema
use_template: json_schema
max: 2048
- name: response_format
use_template: response_format
pricing:
input: '0.00'
output: '0.00'
unit: '0.000001'
currency: USD
deprecated: true

View File

@@ -1,4 +1,3 @@
- deepseek-r1-distill-llama-70b
- llama-3.1-405b-reasoning
- llama-3.3-70b-versatile
- llama-3.1-70b-versatile

View File

@@ -1,36 +0,0 @@
model: deepseek-r1-distill-llama-70b
label:
en_US: DeepSeek R1 Distill Llama 70b
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 128000
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 512
min: 1
max: 8192
- name: response_format
label:
zh_Hans: 回复格式
en_US: Response Format
type: string
help:
zh_Hans: 指定模型必须输出的格式
en_US: specifying the format that the model must output
required: false
options:
- text
- json_object
pricing:
input: '3.00'
output: '3.00'
unit: '0.000001'
currency: USD

View File

@@ -162,9 +162,9 @@ class HuggingfaceHubTextEmbeddingModel(_CommonHuggingfaceHub, TextEmbeddingModel
@staticmethod
def _check_endpoint_url_model_repository_name(credentials: dict, model_name: str):
try:
url = f"{HUGGINGFACE_ENDPOINT_API}{credentials['huggingface_namespace']}"
url = f'{HUGGINGFACE_ENDPOINT_API}{credentials["huggingface_namespace"]}'
headers = {
"Authorization": f"Bearer {credentials['huggingfacehub_api_token']}",
"Authorization": f'Bearer {credentials["huggingfacehub_api_token"]}',
"Content-Type": "application/json",
}

View File

@@ -34,7 +34,6 @@ from core.model_runtime.model_providers.minimax.llm.types import MinimaxMessage
class MinimaxLargeLanguageModel(LargeLanguageModel):
model_apis = {
"minimax-text-01": MinimaxChatCompletionPro,
"abab7-chat-preview": MinimaxChatCompletionPro,
"abab6.5t-chat": MinimaxChatCompletionPro,
"abab6.5s-chat": MinimaxChatCompletionPro,

View File

@@ -1,46 +0,0 @@
model: minimax-text-01
label:
en_US: Minimax-Text-01
model_type: llm
features:
- agent-thought
- tool-call
- stream-tool-call
model_properties:
mode: chat
context_size: 1000192
parameter_rules:
- name: temperature
use_template: temperature
min: 0.01
max: 1
default: 0.1
- name: top_p
use_template: top_p
min: 0.01
max: 1
default: 0.95
- name: max_tokens
use_template: max_tokens
required: true
default: 2048
min: 1
max: 1000192
- name: mask_sensitive_info
type: boolean
default: true
label:
zh_Hans: 隐私保护
en_US: Moderate
help:
zh_Hans: 对输出中易涉及隐私问题的文本信息进行打码目前包括但不限于邮箱、域名、链接、证件号、家庭住址等默认true即开启打码
en_US: Mask the sensitive info of the generated content, such as email/domain/link/address/phone/id..
- name: presence_penalty
use_template: presence_penalty
- name: frequency_penalty
use_template: frequency_penalty
pricing:
input: '0.001'
output: '0.008'
unit: '0.001'
currency: RMB

View File

@@ -44,6 +44,9 @@ class MoonshotLargeLanguageModel(OAIAPICompatLargeLanguageModel):
self._add_custom_parameters(credentials)
self._add_function_call(model, credentials)
user = user[:32] if user else None
# {"response_format": "json_object"} need convert to {"response_format": {"type": "json_object"}}
if "response_format" in model_parameters:
model_parameters["response_format"] = {"type": model_parameters.get("response_format")}
return super()._invoke(model, credentials, prompt_messages, model_parameters, tools, stop, stream, user)
def validate_credentials(self, model: str, credentials: dict) -> None:

Some files were not shown because too many files have changed in this diff Show More