Compare commits

..

1 Commits

Author SHA1 Message Date
Jyong
72064372b5 Revert "add clean 7 days datasets (#9424)"
This reverts commit bd678f9ca1.
2024-10-16 22:45:15 +08:00
346 changed files with 15250 additions and 25670 deletions

View File

@@ -1,12 +1,11 @@
#!/bin/bash
npm add -g pnpm@9.12.2
cd web && pnpm install
cd web && npm install
pipx install poetry
echo 'alias start-api="cd /workspaces/dify/api && poetry run python -m flask run --host 0.0.0.0 --port=5001 --debug"' >> ~/.bashrc
echo 'alias start-worker="cd /workspaces/dify/api && poetry run python -m celery -A app.celery worker -P gevent -c 1 --loglevel INFO -Q dataset,generation,mail,ops_trace,app_deletion"' >> ~/.bashrc
echo 'alias start-web="cd /workspaces/dify/web && pnpm dev"' >> ~/.bashrc
echo 'alias start-web="cd /workspaces/dify/web && npm run dev"' >> ~/.bashrc
echo 'alias start-containers="cd /workspaces/dify/docker && docker-compose -f docker-compose.middleware.yaml -p dify up -d"' >> ~/.bashrc
source /home/vscode/.bashrc
source /home/vscode/.bashrc

View File

@@ -42,7 +42,7 @@ jobs:
- name: Run npm script
if: env.FILES_CHANGED == 'true'
run: pnpm run auto-gen-i18n
run: npm run auto-gen-i18n
- name: Create Pull Request
if: env.FILES_CHANGED == 'true'

7
.gitignore vendored
View File

@@ -175,8 +175,6 @@ docker/volumes/pgvector/data/*
docker/volumes/pgvecto_rs/data/*
docker/nginx/conf.d/default.conf
docker/nginx/ssl/*
!docker/nginx/ssl/.gitkeep
docker/middleware.env
sdks/python-client/build
@@ -189,7 +187,4 @@ pyrightconfig.json
api/.vscode
.idea/
.vscode
# pnpm
/.pnpm-store
.vscode

View File

@@ -6,9 +6,8 @@ Dify is licensed under the Apache License 2.0, with the following additional con
a. Multi-tenant service: Unless explicitly authorized by Dify in writing, you may not use the Dify source code to operate a multi-tenant environment.
- Tenant Definition: Within the context of Dify, one tenant corresponds to one workspace. The workspace provides a separated area for each tenant's data and configurations.
b. LOGO and copyright information: In the process of using Dify's frontend, you may not remove or modify the LOGO or copyright information in the Dify console or applications. This restriction is inapplicable to uses of Dify that do not involve its frontend.
- Frontend Definition: For the purposes of this license, the "frontend" of Dify includes all components located in the `web/` directory when running Dify from the raw source code, or the "web" image when running Dify with Docker.
b. LOGO and copyright information: In the process of using Dify's frontend components, you may not remove or modify the LOGO or copyright information in the Dify console or applications. This restriction is inapplicable to uses of Dify that do not involve its frontend components.
Please contact business@dify.ai by email to inquire about licensing matters.

View File

@@ -42,7 +42,7 @@ DB_DATABASE=dify
# Storage configuration
# use for store upload files, private keys...
# storage type: local, s3, aliyun-oss, azure-blob, baidu-obs, google-storage, huawei-obs, oci-storage, tencent-cos, volcengine-tos, supabase
# storage type: local, s3, azure-blob, google-storage, tencent-cos, huawei-obs, volcengine-tos, baidu-obs, supabase
STORAGE_TYPE=local
STORAGE_LOCAL_PATH=storage
S3_USE_AWS_MANAGED_IAM=false

View File

@@ -10,20 +10,44 @@ if os.environ.get("DEBUG", "false").lower() != "true":
grpc.experimental.gevent.init_gevent()
import json
import logging
import sys
import threading
import time
import warnings
from logging.handlers import RotatingFileHandler
from flask import Response
from flask import Flask, Response, request
from flask_cors import CORS
from werkzeug.exceptions import Unauthorized
from app_factory import create_app
import contexts
from commands import register_commands
from configs import dify_config
# DO NOT REMOVE BELOW
from events import event_handlers # noqa: F401
from extensions import (
ext_celery,
ext_code_based_extension,
ext_compress,
ext_database,
ext_hosting_provider,
ext_login,
ext_mail,
ext_migrate,
ext_proxy_fix,
ext_redis,
ext_sentry,
ext_storage,
)
from extensions.ext_database import db
from extensions.ext_login import login_manager
from libs.passport import PassportService
# TODO: Find a way to avoid importing models here
from models import account, dataset, model, source, task, tool, tools, web # noqa: F401
from services.account_service import AccountService
# DO NOT REMOVE ABOVE
@@ -36,12 +60,188 @@ if hasattr(time, "tzset"):
time.tzset()
class DifyApp(Flask):
pass
# -------------
# Configuration
# -------------
config_type = os.getenv("EDITION", default="SELF_HOSTED") # ce edition first
# ----------------------------
# Application Factory Function
# ----------------------------
def create_flask_app_with_configs() -> Flask:
"""
create a raw flask app
with configs loaded from .env file
"""
dify_app = DifyApp(__name__)
dify_app.config.from_mapping(dify_config.model_dump())
# populate configs into system environment variables
for key, value in dify_app.config.items():
if isinstance(value, str):
os.environ[key] = value
elif isinstance(value, int | float | bool):
os.environ[key] = str(value)
elif value is None:
os.environ[key] = ""
return dify_app
def create_app() -> Flask:
app = create_flask_app_with_configs()
app.secret_key = app.config["SECRET_KEY"]
log_handlers = None
log_file = app.config.get("LOG_FILE")
if log_file:
log_dir = os.path.dirname(log_file)
os.makedirs(log_dir, exist_ok=True)
log_handlers = [
RotatingFileHandler(
filename=log_file,
maxBytes=1024 * 1024 * 1024,
backupCount=5,
),
logging.StreamHandler(sys.stdout),
]
logging.basicConfig(
level=app.config.get("LOG_LEVEL"),
format=app.config.get("LOG_FORMAT"),
datefmt=app.config.get("LOG_DATEFORMAT"),
handlers=log_handlers,
force=True,
)
log_tz = app.config.get("LOG_TZ")
if log_tz:
from datetime import datetime
import pytz
timezone = pytz.timezone(log_tz)
def time_converter(seconds):
return datetime.utcfromtimestamp(seconds).astimezone(timezone).timetuple()
for handler in logging.root.handlers:
handler.formatter.converter = time_converter
initialize_extensions(app)
register_blueprints(app)
register_commands(app)
return app
def initialize_extensions(app):
# Since the application instance is now created, pass it to each Flask
# extension instance to bind it to the Flask application instance (app)
ext_compress.init_app(app)
ext_code_based_extension.init()
ext_database.init_app(app)
ext_migrate.init(app, db)
ext_redis.init_app(app)
ext_storage.init_app(app)
ext_celery.init_app(app)
ext_login.init_app(app)
ext_mail.init_app(app)
ext_hosting_provider.init_app(app)
ext_sentry.init_app(app)
ext_proxy_fix.init_app(app)
# Flask-Login configuration
@login_manager.request_loader
def load_user_from_request(request_from_flask_login):
"""Load user based on the request."""
if request.blueprint not in {"console", "inner_api"}:
return None
# Check if the user_id contains a dot, indicating the old format
auth_header = request.headers.get("Authorization", "")
if not auth_header:
auth_token = request.args.get("_token")
if not auth_token:
raise Unauthorized("Invalid Authorization token.")
else:
if " " not in auth_header:
raise Unauthorized("Invalid Authorization header format. Expected 'Bearer <api-key>' format.")
auth_scheme, auth_token = auth_header.split(None, 1)
auth_scheme = auth_scheme.lower()
if auth_scheme != "bearer":
raise Unauthorized("Invalid Authorization header format. Expected 'Bearer <api-key>' format.")
decoded = PassportService().verify(auth_token)
user_id = decoded.get("user_id")
logged_in_account = AccountService.load_logged_in_account(account_id=user_id)
if logged_in_account:
contexts.tenant_id.set(logged_in_account.current_tenant_id)
return logged_in_account
@login_manager.unauthorized_handler
def unauthorized_handler():
"""Handle unauthorized requests."""
return Response(
json.dumps({"code": "unauthorized", "message": "Unauthorized."}),
status=401,
content_type="application/json",
)
# register blueprint routers
def register_blueprints(app):
from controllers.console import bp as console_app_bp
from controllers.files import bp as files_bp
from controllers.inner_api import bp as inner_api_bp
from controllers.service_api import bp as service_api_bp
from controllers.web import bp as web_bp
CORS(
service_api_bp,
allow_headers=["Content-Type", "Authorization", "X-App-Code"],
methods=["GET", "PUT", "POST", "DELETE", "OPTIONS", "PATCH"],
)
app.register_blueprint(service_api_bp)
CORS(
web_bp,
resources={r"/*": {"origins": app.config["WEB_API_CORS_ALLOW_ORIGINS"]}},
supports_credentials=True,
allow_headers=["Content-Type", "Authorization", "X-App-Code"],
methods=["GET", "PUT", "POST", "DELETE", "OPTIONS", "PATCH"],
expose_headers=["X-Version", "X-Env"],
)
app.register_blueprint(web_bp)
CORS(
console_app_bp,
resources={r"/*": {"origins": app.config["CONSOLE_CORS_ALLOW_ORIGINS"]}},
supports_credentials=True,
allow_headers=["Content-Type", "Authorization"],
methods=["GET", "PUT", "POST", "DELETE", "OPTIONS", "PATCH"],
expose_headers=["X-Version", "X-Env"],
)
app.register_blueprint(console_app_bp)
CORS(files_bp, allow_headers=["Content-Type"], methods=["GET", "PUT", "POST", "DELETE", "OPTIONS", "PATCH"])
app.register_blueprint(files_bp)
app.register_blueprint(inner_api_bp)
# create app
app = create_app()
celery = app.extensions["celery"]

View File

@@ -1,213 +0,0 @@
import os
if os.environ.get("DEBUG", "false").lower() != "true":
from gevent import monkey
monkey.patch_all()
import grpc.experimental.gevent
grpc.experimental.gevent.init_gevent()
import json
import logging
import sys
from logging.handlers import RotatingFileHandler
from flask import Flask, Response, request
from flask_cors import CORS
from werkzeug.exceptions import Unauthorized
import contexts
from commands import register_commands
from configs import dify_config
from extensions import (
ext_celery,
ext_code_based_extension,
ext_compress,
ext_database,
ext_hosting_provider,
ext_login,
ext_mail,
ext_migrate,
ext_proxy_fix,
ext_redis,
ext_sentry,
ext_storage,
)
from extensions.ext_database import db
from extensions.ext_login import login_manager
from libs.passport import PassportService
from services.account_service import AccountService
class DifyApp(Flask):
pass
# ----------------------------
# Application Factory Function
# ----------------------------
def create_flask_app_with_configs() -> Flask:
"""
create a raw flask app
with configs loaded from .env file
"""
dify_app = DifyApp(__name__)
dify_app.config.from_mapping(dify_config.model_dump())
# populate configs into system environment variables
for key, value in dify_app.config.items():
if isinstance(value, str):
os.environ[key] = value
elif isinstance(value, int | float | bool):
os.environ[key] = str(value)
elif value is None:
os.environ[key] = ""
return dify_app
def create_app() -> Flask:
app = create_flask_app_with_configs()
app.secret_key = app.config["SECRET_KEY"]
log_handlers = None
log_file = app.config.get("LOG_FILE")
if log_file:
log_dir = os.path.dirname(log_file)
os.makedirs(log_dir, exist_ok=True)
log_handlers = [
RotatingFileHandler(
filename=log_file,
maxBytes=1024 * 1024 * 1024,
backupCount=5,
),
logging.StreamHandler(sys.stdout),
]
logging.basicConfig(
level=app.config.get("LOG_LEVEL"),
format=app.config.get("LOG_FORMAT"),
datefmt=app.config.get("LOG_DATEFORMAT"),
handlers=log_handlers,
force=True,
)
log_tz = app.config.get("LOG_TZ")
if log_tz:
from datetime import datetime
import pytz
timezone = pytz.timezone(log_tz)
def time_converter(seconds):
return datetime.utcfromtimestamp(seconds).astimezone(timezone).timetuple()
for handler in logging.root.handlers:
handler.formatter.converter = time_converter
initialize_extensions(app)
register_blueprints(app)
register_commands(app)
return app
def initialize_extensions(app):
# Since the application instance is now created, pass it to each Flask
# extension instance to bind it to the Flask application instance (app)
ext_compress.init_app(app)
ext_code_based_extension.init()
ext_database.init_app(app)
ext_migrate.init(app, db)
ext_redis.init_app(app)
ext_storage.init_app(app)
ext_celery.init_app(app)
ext_login.init_app(app)
ext_mail.init_app(app)
ext_hosting_provider.init_app(app)
ext_sentry.init_app(app)
ext_proxy_fix.init_app(app)
# Flask-Login configuration
@login_manager.request_loader
def load_user_from_request(request_from_flask_login):
"""Load user based on the request."""
if request.blueprint not in {"console", "inner_api"}:
return None
# Check if the user_id contains a dot, indicating the old format
auth_header = request.headers.get("Authorization", "")
if not auth_header:
auth_token = request.args.get("_token")
if not auth_token:
raise Unauthorized("Invalid Authorization token.")
else:
if " " not in auth_header:
raise Unauthorized("Invalid Authorization header format. Expected 'Bearer <api-key>' format.")
auth_scheme, auth_token = auth_header.split(None, 1)
auth_scheme = auth_scheme.lower()
if auth_scheme != "bearer":
raise Unauthorized("Invalid Authorization header format. Expected 'Bearer <api-key>' format.")
decoded = PassportService().verify(auth_token)
user_id = decoded.get("user_id")
logged_in_account = AccountService.load_logged_in_account(account_id=user_id)
if logged_in_account:
contexts.tenant_id.set(logged_in_account.current_tenant_id)
return logged_in_account
@login_manager.unauthorized_handler
def unauthorized_handler():
"""Handle unauthorized requests."""
return Response(
json.dumps({"code": "unauthorized", "message": "Unauthorized."}),
status=401,
content_type="application/json",
)
# register blueprint routers
def register_blueprints(app):
from controllers.console import bp as console_app_bp
from controllers.files import bp as files_bp
from controllers.inner_api import bp as inner_api_bp
from controllers.service_api import bp as service_api_bp
from controllers.web import bp as web_bp
CORS(
service_api_bp,
allow_headers=["Content-Type", "Authorization", "X-App-Code"],
methods=["GET", "PUT", "POST", "DELETE", "OPTIONS", "PATCH"],
)
app.register_blueprint(service_api_bp)
CORS(
web_bp,
resources={r"/*": {"origins": app.config["WEB_API_CORS_ALLOW_ORIGINS"]}},
supports_credentials=True,
allow_headers=["Content-Type", "Authorization", "X-App-Code"],
methods=["GET", "PUT", "POST", "DELETE", "OPTIONS", "PATCH"],
expose_headers=["X-Version", "X-Env"],
)
app.register_blueprint(web_bp)
CORS(
console_app_bp,
resources={r"/*": {"origins": app.config["CONSOLE_CORS_ALLOW_ORIGINS"]}},
supports_credentials=True,
allow_headers=["Content-Type", "Authorization"],
methods=["GET", "PUT", "POST", "DELETE", "OPTIONS", "PATCH"],
expose_headers=["X-Version", "X-Env"],
)
app.register_blueprint(console_app_bp)
CORS(files_bp, allow_headers=["Content-Type"], methods=["GET", "PUT", "POST", "DELETE", "OPTIONS", "PATCH"])
app.register_blueprint(files_bp)
app.register_blueprint(inner_api_bp)

View File

@@ -259,25 +259,6 @@ def migrate_knowledge_vector_database():
skipped_count = 0
total_count = 0
vector_type = dify_config.VECTOR_STORE
upper_colletion_vector_types = {
VectorType.MILVUS,
VectorType.PGVECTOR,
VectorType.RELYT,
VectorType.WEAVIATE,
VectorType.ORACLE,
VectorType.ELASTICSEARCH,
}
lower_colletion_vector_types = {
VectorType.ANALYTICDB,
VectorType.CHROMA,
VectorType.MYSCALE,
VectorType.PGVECTO_RS,
VectorType.TIDB_VECTOR,
VectorType.OPENSEARCH,
VectorType.TENCENT,
VectorType.BAIDU,
VectorType.VIKINGDB,
}
page = 1
while True:
try:
@@ -303,9 +284,11 @@ def migrate_knowledge_vector_database():
skipped_count = skipped_count + 1
continue
collection_name = ""
dataset_id = dataset.id
if vector_type in upper_colletion_vector_types:
if vector_type == VectorType.WEAVIATE:
dataset_id = dataset.id
collection_name = Dataset.gen_collection_name_by_id(dataset_id)
index_struct_dict = {"type": VectorType.WEAVIATE, "vector_store": {"class_prefix": collection_name}}
dataset.index_struct = json.dumps(index_struct_dict)
elif vector_type == VectorType.QDRANT:
if dataset.collection_binding_id:
dataset_collection_binding = (
@@ -318,15 +301,63 @@ def migrate_knowledge_vector_database():
else:
raise ValueError("Dataset Collection Binding not found")
else:
dataset_id = dataset.id
collection_name = Dataset.gen_collection_name_by_id(dataset_id)
index_struct_dict = {"type": VectorType.QDRANT, "vector_store": {"class_prefix": collection_name}}
dataset.index_struct = json.dumps(index_struct_dict)
elif vector_type in lower_colletion_vector_types:
collection_name = Dataset.gen_collection_name_by_id(dataset_id).lower()
elif vector_type == VectorType.MILVUS:
dataset_id = dataset.id
collection_name = Dataset.gen_collection_name_by_id(dataset_id)
index_struct_dict = {"type": VectorType.MILVUS, "vector_store": {"class_prefix": collection_name}}
dataset.index_struct = json.dumps(index_struct_dict)
elif vector_type == VectorType.RELYT:
dataset_id = dataset.id
collection_name = Dataset.gen_collection_name_by_id(dataset_id)
index_struct_dict = {"type": "relyt", "vector_store": {"class_prefix": collection_name}}
dataset.index_struct = json.dumps(index_struct_dict)
elif vector_type == VectorType.TENCENT:
dataset_id = dataset.id
collection_name = Dataset.gen_collection_name_by_id(dataset_id)
index_struct_dict = {"type": VectorType.TENCENT, "vector_store": {"class_prefix": collection_name}}
dataset.index_struct = json.dumps(index_struct_dict)
elif vector_type == VectorType.PGVECTOR:
dataset_id = dataset.id
collection_name = Dataset.gen_collection_name_by_id(dataset_id)
index_struct_dict = {"type": VectorType.PGVECTOR, "vector_store": {"class_prefix": collection_name}}
dataset.index_struct = json.dumps(index_struct_dict)
elif vector_type == VectorType.OPENSEARCH:
dataset_id = dataset.id
collection_name = Dataset.gen_collection_name_by_id(dataset_id)
index_struct_dict = {
"type": VectorType.OPENSEARCH,
"vector_store": {"class_prefix": collection_name},
}
dataset.index_struct = json.dumps(index_struct_dict)
elif vector_type == VectorType.ANALYTICDB:
dataset_id = dataset.id
collection_name = Dataset.gen_collection_name_by_id(dataset_id)
index_struct_dict = {
"type": VectorType.ANALYTICDB,
"vector_store": {"class_prefix": collection_name},
}
dataset.index_struct = json.dumps(index_struct_dict)
elif vector_type == VectorType.ELASTICSEARCH:
dataset_id = dataset.id
index_name = Dataset.gen_collection_name_by_id(dataset_id)
index_struct_dict = {"type": "elasticsearch", "vector_store": {"class_prefix": index_name}}
dataset.index_struct = json.dumps(index_struct_dict)
elif vector_type == VectorType.BAIDU:
dataset_id = dataset.id
collection_name = Dataset.gen_collection_name_by_id(dataset_id)
index_struct_dict = {
"type": VectorType.BAIDU,
"vector_store": {"class_prefix": collection_name},
}
dataset.index_struct = json.dumps(index_struct_dict)
else:
raise ValueError(f"Vector store {vector_type} is not supported.")
index_struct_dict = {"type": vector_type, "vector_store": {"class_prefix": collection_name}}
dataset.index_struct = json.dumps(index_struct_dict)
vector = Vector(dataset)
click.echo(f"Migrating dataset {dataset.id}.")

View File

@@ -506,16 +506,11 @@ class DataSetConfig(BaseSettings):
Configuration for dataset management
"""
PLAN_SANDBOX_CLEAN_DAY_SETTING: PositiveInt = Field(
description="Interval in days for dataset cleanup operations - plan: sandbox",
CLEAN_DAY_SETTING: PositiveInt = Field(
description="Interval in days for dataset cleanup operations",
default=30,
)
PLAN_PRO_CLEAN_DAY_SETTING: PositiveInt = Field(
description="Interval in days for dataset cleanup operations - plan: pro and team",
default=7,
)
DATASET_OPERATOR_ENABLED: bool = Field(
description="Enable or disable dataset operator functionality",
default=False,

View File

@@ -35,8 +35,7 @@ from configs.middleware.vdb.weaviate_config import WeaviateConfig
class StorageConfig(BaseSettings):
STORAGE_TYPE: str = Field(
description="Type of storage to use."
" Options: 'local', 's3', 'aliyun-oss', 'azure-blob', 'baidu-obs', 'google-storage', 'huawei-obs', "
"'oci-storage', 'tencent-cos', 'volcengine-tos', 'supabase'. Default is 'local'.",
" Options: 'local', 's3', 'azure-blob', 'aliyun-oss', 'google-storage'. Default is 'local'.",
default="local",
)

View File

@@ -6,11 +6,11 @@ import numpy as np
from sqlalchemy.exc import IntegrityError
from configs import dify_config
from core.entities.embedding_type import EmbeddingInputType
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_manager import ModelInstance
from core.model_runtime.entities.model_entities import ModelPropertyKey
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
from core.rag.embedding.embedding_base import Embeddings
from core.rag.datasource.entity.embedding import Embeddings
from extensions.ext_database import db
from extensions.ext_redis import redis_client
from libs import helper

View File

@@ -3,7 +3,7 @@ import os
from collections.abc import Callable, Generator, Sequence
from typing import IO, Optional, Union, cast
from core.entities.embedding_type import EmbeddingInputType
from core.embedding.embedding_constant import EmbeddingInputType
from core.entities.provider_configuration import ProviderConfiguration, ProviderModelBundle
from core.entities.provider_entities import ModelLoadBalancingConfiguration
from core.errors.error import ProviderTokenNotInitError

View File

@@ -4,7 +4,7 @@ from typing import Optional
from pydantic import ConfigDict
from core.entities.embedding_type import EmbeddingInputType
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import ModelPropertyKey, ModelType
from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
from core.model_runtime.model_providers.__base.ai_model import AIModel

View File

@@ -7,7 +7,7 @@ import numpy as np
import tiktoken
from openai import AzureOpenAI
from core.entities.embedding_type import EmbeddingInputType
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import AIModelEntity, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.validate import CredentialsValidateFailedError

View File

@@ -4,7 +4,7 @@ from typing import Optional
from requests import post
from core.entities.embedding_type import EmbeddingInputType
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import (

View File

@@ -52,8 +52,6 @@ parameter_rules:
help:
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
- name: response_format
use_template: response_format
pricing:
input: '0.00025'
output: '0.00125'

View File

@@ -51,8 +51,6 @@ parameter_rules:
help:
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
- name: response_format
use_template: response_format
pricing:
input: '0.003'
output: '0.015'

View File

@@ -51,8 +51,6 @@ parameter_rules:
help:
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
- name: response_format
use_template: response_format
pricing:
input: '0.003'
output: '0.015'

View File

@@ -52,8 +52,6 @@ parameter_rules:
help:
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
- name: response_format
use_template: response_format
pricing:
input: '0.00025'
output: '0.00125'

View File

@@ -52,8 +52,6 @@ parameter_rules:
help:
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
- name: response_format
use_template: response_format
pricing:
input: '0.015'
output: '0.075'

View File

@@ -51,8 +51,6 @@ parameter_rules:
help:
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
- name: response_format
use_template: response_format
pricing:
input: '0.003'
output: '0.015'

View File

@@ -51,8 +51,6 @@ parameter_rules:
help:
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
- name: response_format
use_template: response_format
pricing:
input: '0.003'
output: '0.015'

View File

@@ -13,7 +13,7 @@ from botocore.exceptions import (
UnknownServiceError,
)
from core.entities.embedding_type import EmbeddingInputType
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import (

View File

@@ -5,7 +5,7 @@ import cohere
import numpy as np
from cohere.core import RequestOptions
from core.entities.embedding_type import EmbeddingInputType
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import (

View File

@@ -5,7 +5,7 @@ from typing import Optional, Union
import numpy as np
from openai import OpenAI
from core.entities.embedding_type import EmbeddingInputType
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.validate import CredentialsValidateFailedError

View File

@@ -18,7 +18,6 @@ help:
en_US: https://console.groq.com/
supported_model_types:
- llm
- speech2text
configurate_methods:
- predefined-model
provider_credential_schema:

View File

@@ -1,26 +0,0 @@
model: llama-3.2-11b-vision-preview
label:
zh_Hans: Llama 3.2 11B Vision (Preview)
en_US: Llama 3.2 11B Vision (Preview)
model_type: llm
features:
- agent-thought
- vision
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 512
min: 1
max: 8192
pricing:
input: '0.05'
output: '0.1'
unit: '0.000001'
currency: USD

View File

@@ -1,26 +0,0 @@
model: llama-3.2-90b-vision-preview
label:
zh_Hans: Llama 3.2 90B Vision (Preview)
en_US: Llama 3.2 90B Vision (Preview)
model_type: llm
features:
- agent-thought
- vision
model_properties:
mode: chat
context_size: 131072
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 512
min: 1
max: 8192
pricing:
input: '0.05'
output: '0.1'
unit: '0.000001'
currency: USD

View File

@@ -1,5 +0,0 @@
model: distil-whisper-large-v3-en
model_type: speech2text
model_properties:
file_upload_limit: 1
supported_file_extensions: flac,mp3,mp4,mpeg,mpga,m4a,ogg,wav,webm

View File

@@ -1,30 +0,0 @@
from typing import IO, Optional
from core.model_runtime.model_providers.openai_api_compatible.speech2text.speech2text import OAICompatSpeech2TextModel
class GroqSpeech2TextModel(OAICompatSpeech2TextModel):
"""
Model class for Groq Speech to text model.
"""
def _invoke(self, model: str, credentials: dict, file: IO[bytes], user: Optional[str] = None) -> str:
"""
Invoke speech2text model
:param model: model name
:param credentials: model credentials
:param file: audio file
:param user: unique user id
:return: text for given audio file
"""
self._add_custom_parameters(credentials)
return super()._invoke(model, credentials, file)
def validate_credentials(self, model: str, credentials: dict) -> None:
self._add_custom_parameters(credentials)
return super().validate_credentials(model, credentials)
@classmethod
def _add_custom_parameters(cls, credentials: dict) -> None:
credentials["endpoint_url"] = "https://api.groq.com/openai/v1"

View File

@@ -1,5 +0,0 @@
model: whisper-large-v3-turbo
model_type: speech2text
model_properties:
file_upload_limit: 1
supported_file_extensions: flac,mp3,mp4,mpeg,mpga,m4a,ogg,wav,webm

View File

@@ -1,5 +0,0 @@
model: whisper-large-v3
model_type: speech2text
model_properties:
file_upload_limit: 1
supported_file_extensions: flac,mp3,mp4,mpeg,mpga,m4a,ogg,wav,webm

View File

@@ -6,7 +6,7 @@ import numpy as np
import requests
from huggingface_hub import HfApi, InferenceClient
from core.entities.embedding_type import EmbeddingInputType
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult

View File

@@ -1,7 +1,7 @@
import time
from typing import Optional
from core.entities.embedding_type import EmbeddingInputType
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult

View File

@@ -9,7 +9,7 @@ from tencentcloud.common.profile.client_profile import ClientProfile
from tencentcloud.common.profile.http_profile import HttpProfile
from tencentcloud.hunyuan.v20230901 import hunyuan_client, models
from core.entities.embedding_type import EmbeddingInputType
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import (

View File

@@ -4,7 +4,7 @@ from typing import Optional
from requests import post
from core.entities.embedding_type import EmbeddingInputType
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult

View File

@@ -5,7 +5,7 @@ from typing import Optional
from requests import post
from yarl import URL
from core.entities.embedding_type import EmbeddingInputType
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult

View File

@@ -4,7 +4,7 @@ from typing import Optional
from requests import post
from core.entities.embedding_type import EmbeddingInputType
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import (

View File

@@ -4,7 +4,7 @@ from typing import Optional
import requests
from core.entities.embedding_type import EmbeddingInputType
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult

View File

@@ -5,7 +5,7 @@ from typing import Optional
from nomic import embed
from nomic import login as nomic_login
from core.entities.embedding_type import EmbeddingInputType
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import (
EmbeddingUsage,

View File

@@ -4,7 +4,7 @@ from typing import Optional
from requests import post
from core.entities.embedding_type import EmbeddingInputType
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import (

View File

@@ -6,7 +6,7 @@ from typing import Optional
import numpy as np
import oci
from core.entities.embedding_type import EmbeddingInputType
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import (

View File

@@ -8,7 +8,7 @@ from urllib.parse import urljoin
import numpy as np
import requests
from core.entities.embedding_type import EmbeddingInputType
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import (
AIModelEntity,

View File

@@ -6,7 +6,7 @@ import numpy as np
import tiktoken
from openai import OpenAI
from core.entities.embedding_type import EmbeddingInputType
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.validate import CredentialsValidateFailedError

View File

@@ -8,7 +8,6 @@ supported_model_types:
- llm
- text-embedding
- speech2text
- rerank
configurate_methods:
- customizable-model
model_credential_schema:
@@ -84,19 +83,6 @@ model_credential_schema:
placeholder:
zh_Hans: 在此输入您的模型上下文长度
en_US: Enter your Model context size
- variable: context_size
label:
zh_Hans: 模型上下文长度
en_US: Model context size
required: true
show_on:
- variable: __model_type
value: rerank
type: text-input
default: '4096'
placeholder:
zh_Hans: 在此输入您的模型上下文长度
en_US: Enter your Model context size
- variable: max_tokens_to_sample
label:
zh_Hans: 最大 token 上限

View File

@@ -1,159 +0,0 @@
from json import dumps
from typing import Optional
import httpx
from requests import post
from yarl import URL
from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelType
from core.model_runtime.entities.rerank_entities import RerankDocument, RerankResult
from core.model_runtime.errors.invoke import (
InvokeAuthorizationError,
InvokeBadRequestError,
InvokeConnectionError,
InvokeError,
InvokeRateLimitError,
InvokeServerUnavailableError,
)
from core.model_runtime.errors.validate import CredentialsValidateFailedError
from core.model_runtime.model_providers.__base.rerank_model import RerankModel
class OAICompatRerankModel(RerankModel):
"""
rerank model API is compatible with Jina rerank model API. So copy the JinaRerankModel class code here.
we need enhance for llama.cpp , which return raw score, not normalize score 0~1. It seems Dify need it
"""
def _invoke(
self,
model: str,
credentials: dict,
query: str,
docs: list[str],
score_threshold: Optional[float] = None,
top_n: Optional[int] = None,
user: Optional[str] = None,
) -> RerankResult:
"""
Invoke rerank model
:param model: model name
:param credentials: model credentials
:param query: search query
:param docs: docs for reranking
:param score_threshold: score threshold
:param top_n: top n documents to return
:param user: unique user id
:return: rerank result
"""
if len(docs) == 0:
return RerankResult(model=model, docs=[])
server_url = credentials["endpoint_url"]
model_name = model
if not server_url:
raise CredentialsValidateFailedError("server_url is required")
if not model_name:
raise CredentialsValidateFailedError("model_name is required")
url = server_url
headers = {"Authorization": f"Bearer {credentials.get('api_key')}", "Content-Type": "application/json"}
# TODO: Do we need truncate docs to avoid llama.cpp return error?
data = {"model": model_name, "query": query, "documents": docs, "top_n": top_n}
try:
response = post(str(URL(url) / "rerank"), headers=headers, data=dumps(data), timeout=60)
response.raise_for_status()
results = response.json()
rerank_documents = []
scores = [result["relevance_score"] for result in results["results"]]
# Min-Max Normalization: Normalize scores to 0 ~ 1.0 range
min_score = min(scores)
max_score = max(scores)
score_range = max_score - min_score if max_score != min_score else 1.0 # Avoid division by zero
for result in results["results"]:
index = result["index"]
# Retrieve document text (fallback if llama.cpp rerank doesn't return it)
text = result.get("document", {}).get("text", docs[index])
# Normalize the score
normalized_score = (result["relevance_score"] - min_score) / score_range
# Create RerankDocument object with normalized score
rerank_document = RerankDocument(
index=index,
text=text,
score=normalized_score,
)
# Apply threshold (if defined)
if score_threshold is None or normalized_score >= score_threshold:
rerank_documents.append(rerank_document)
# Sort rerank_documents by normalized score in descending order
rerank_documents.sort(key=lambda doc: doc.score, reverse=True)
return RerankResult(model=model, docs=rerank_documents)
except httpx.HTTPStatusError as e:
raise InvokeServerUnavailableError(str(e))
def validate_credentials(self, model: str, credentials: dict) -> None:
"""
Validate model credentials
:param model: model name
:param credentials: model credentials
:return:
"""
try:
self._invoke(
model=model,
credentials=credentials,
query="What is the capital of the United States?",
docs=[
"Carson City is the capital city of the American state of Nevada. At the 2010 United States "
"Census, Carson City had a population of 55,274.",
"The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean that "
"are a political division controlled by the United States. Its capital is Saipan.",
],
score_threshold=0.8,
)
except Exception as ex:
raise CredentialsValidateFailedError(str(ex))
@property
def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
"""
Map model invoke error to unified error
"""
return {
InvokeConnectionError: [httpx.ConnectError],
InvokeServerUnavailableError: [httpx.RemoteProtocolError],
InvokeRateLimitError: [],
InvokeAuthorizationError: [httpx.HTTPStatusError],
InvokeBadRequestError: [httpx.RequestError],
}
def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity:
"""
generate custom model entities from credentials
"""
entity = AIModelEntity(
model=model,
label=I18nObject(en_US=model),
model_type=ModelType.RERANK,
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
model_properties={},
)
return entity

View File

@@ -7,7 +7,7 @@ from urllib.parse import urljoin
import numpy as np
import requests
from core.entities.embedding_type import EmbeddingInputType
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import (
AIModelEntity,

View File

@@ -5,7 +5,7 @@ from typing import Optional
from requests import post
from requests.exceptions import ConnectionError, InvalidSchema, MissingSchema
from core.entities.embedding_type import EmbeddingInputType
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import (

View File

@@ -35,15 +35,6 @@ parameter_rules:
help:
zh_Hans: 控制生成结果的随机性。数值越小随机性越弱数值越大随机性越强。一般而言top_p 和 temperature 两个参数选择一个进行调整即可。
en_US: Control the randomness of generated results. The smaller the value, the weaker the randomness; the larger the value, the stronger the randomness. Generally speaking, you can adjust one of the two parameters top_p and temperature.
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty
use_template: frequency_penalty
default: 0

View File

@@ -18,15 +18,6 @@ parameter_rules:
min: 0
max: 1
default: 1
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens
use_template: max_tokens
min: 1

View File

@@ -14,15 +14,6 @@ parameter_rules:
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: presence_penalty
use_template: presence_penalty
- name: frequency_penalty

View File

@@ -14,15 +14,6 @@ parameter_rules:
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: presence_penalty
use_template: presence_penalty
- name: frequency_penalty

View File

@@ -14,15 +14,6 @@ parameter_rules:
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: presence_penalty
use_template: presence_penalty
- name: frequency_penalty

View File

@@ -16,15 +16,6 @@ parameter_rules:
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: presence_penalty
use_template: presence_penalty
- name: frequency_penalty

View File

@@ -15,15 +15,6 @@ parameter_rules:
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: presence_penalty
use_template: presence_penalty
- name: frequency_penalty

View File

@@ -15,15 +15,6 @@ parameter_rules:
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: presence_penalty
use_template: presence_penalty
- name: frequency_penalty

View File

@@ -10,15 +10,6 @@ parameter_rules:
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens
use_template: max_tokens
required: true

View File

@@ -10,15 +10,6 @@ parameter_rules:
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens
use_template: max_tokens
required: true

View File

@@ -10,15 +10,6 @@ parameter_rules:
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens
use_template: max_tokens
required: true

View File

@@ -10,15 +10,6 @@ parameter_rules:
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens
use_template: max_tokens
required: true

View File

@@ -10,15 +10,6 @@ parameter_rules:
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens
use_template: max_tokens
required: true

View File

@@ -18,15 +18,6 @@ parameter_rules:
default: 1
min: 0
max: 1
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens
use_template: max_tokens
default: 1024

View File

@@ -18,15 +18,6 @@ parameter_rules:
default: 1
min: 0
max: 1
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens
use_template: max_tokens
default: 1024

View File

@@ -19,15 +19,6 @@ parameter_rules:
default: 1
min: 0
max: 1
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens
use_template: max_tokens
default: 1024

View File

@@ -12,15 +12,6 @@ parameter_rules:
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: presence_penalty
use_template: presence_penalty
- name: frequency_penalty

View File

@@ -12,15 +12,6 @@ parameter_rules:
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: presence_penalty
use_template: presence_penalty
- name: frequency_penalty

View File

@@ -21,15 +21,6 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty
use_template: frequency_penalty
pricing:

View File

@@ -21,15 +21,6 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty
use_template: frequency_penalty
pricing:

View File

@@ -7,7 +7,7 @@ from urllib.parse import urljoin
import numpy as np
import requests
from core.entities.embedding_type import EmbeddingInputType
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import (
AIModelEntity,

View File

@@ -4,7 +4,7 @@ from typing import Optional
from replicate import Client as ReplicateClient
from core.entities.embedding_type import EmbeddingInputType
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult

View File

@@ -14,7 +14,6 @@ from core.model_runtime.errors.invoke import (
InvokeRateLimitError,
InvokeServerUnavailableError,
)
from core.model_runtime.errors.validate import CredentialsValidateFailedError
from core.model_runtime.model_providers.__base.speech2text_model import Speech2TextModel
from core.model_runtime.model_providers.sagemaker.sagemaker import generate_presigned_url
@@ -78,8 +77,7 @@ class SageMakerSpeech2TextModel(Speech2TextModel):
json_obj = json.loads(json_str)
asr_text = json_obj["text"]
except Exception as e:
logger.exception(f"failed to invoke speech2text model, {e}")
raise CredentialsValidateFailedError(str(e))
logger.exception(f"Exception {e}, line : {line}")
return asr_text

View File

@@ -6,7 +6,7 @@ from typing import Any, Optional
import boto3
from core.entities.embedding_type import EmbeddingInputType
from core.embedding.embedding_constant import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult

View File

@@ -21,15 +21,6 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty
use_template: frequency_penalty
pricing:

View File

@@ -21,15 +21,6 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty
use_template: frequency_penalty
pricing:

View File

@@ -21,15 +21,6 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty
use_template: frequency_penalty
pricing:

View File

@@ -21,15 +21,6 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty
use_template: frequency_penalty
pricing:

View File

@@ -21,15 +21,6 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty
use_template: frequency_penalty
pricing:

View File

@@ -21,15 +21,6 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty
use_template: frequency_penalty
pricing:

View File

@@ -21,15 +21,6 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty
use_template: frequency_penalty
pricing:

View File

@@ -21,15 +21,6 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty
use_template: frequency_penalty
pricing:

View File

@@ -21,15 +21,6 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty
use_template: frequency_penalty
pricing:

View File

@@ -21,15 +21,6 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty
use_template: frequency_penalty
pricing:

View File

@@ -21,15 +21,6 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty
use_template: frequency_penalty
pricing:

View File

@@ -21,15 +21,6 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty
use_template: frequency_penalty
pricing:

View File

@@ -21,15 +21,6 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty
use_template: frequency_penalty
pricing:

View File

@@ -21,15 +21,6 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty
use_template: frequency_penalty
pricing:

View File

@@ -21,15 +21,6 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty
use_template: frequency_penalty
pricing:

View File

@@ -21,15 +21,6 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty
use_template: frequency_penalty
pricing:

View File

@@ -21,15 +21,6 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty
use_template: frequency_penalty
pricing:

View File

@@ -21,15 +21,6 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty
use_template: frequency_penalty
pricing:

View File

@@ -21,15 +21,6 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty
use_template: frequency_penalty
pricing:

View File

@@ -21,15 +21,6 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty
use_template: frequency_penalty
pricing:

View File

@@ -21,15 +21,6 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty
use_template: frequency_penalty
pricing:

View File

@@ -21,15 +21,6 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty
use_template: frequency_penalty
pricing:

View File

@@ -21,15 +21,6 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty
use_template: frequency_penalty
pricing:

View File

@@ -21,15 +21,6 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty
use_template: frequency_penalty
pricing:

View File

@@ -21,15 +21,6 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty
use_template: frequency_penalty
pricing:

Some files were not shown because too many files have changed in this diff Show More