Compare commits

..

18 Commits
0.4.6 ... 0.4.7

Author SHA1 Message Date
takatost
c51e179db8 bump version to 0.4.7 (#2045) 2024-01-16 01:13:10 +08:00
takatost
b582fc13c3 fix: qwen top_p min/max wrong (#2044) 2024-01-16 01:12:55 +08:00
Jyong
add33cb5e6 fix SQL slow query (#2043)
Co-authored-by: jyong <jyong@dify.ai>
2024-01-16 00:59:28 +08:00
Garfield Dai
83105d0d8f fix: dataset and moderation. (#2042) 2024-01-15 21:53:31 +08:00
Joel
7b0818b8e5 feat: fix debug rerank params error (#2041) 2024-01-15 20:27:22 +08:00
takatost
28cd3a8c9f fix: dependencies security problems (#2040) 2024-01-15 19:26:08 +08:00
crazywoola
0355645a0e doc: replace readme images (#2039) 2024-01-15 17:38:22 +08:00
Jyong
cb7a608d75 ascii filter Unicode U+FFFE (#2038)
Co-authored-by: jyong <jyong@dify.ai>
2024-01-15 16:52:18 +08:00
crazywoola
bdb0d77227 doc: replace readme images (#2030) 2024-01-15 12:23:30 +08:00
Yeuoly
149102927b fix: openai tool tokens (#2026) 2024-01-14 15:51:05 +08:00
Vikey Chen
d8c0d722d2 fix: datasets indexing-status api document (#2019) 2024-01-14 09:43:52 +08:00
Garfield Dai
cb7be3767c feat: huggingface llm add new params. (#2014) 2024-01-12 21:15:07 +08:00
takatost
34bf2877c8 fix: tongyi stream generate not incremental and add qwen max models (#2013) 2024-01-12 19:19:12 +08:00
killpanda
3ebec8fa41 fixup /stop api (#2012)
Co-authored-by: mayue <mayue05@qiyi.com>
2024-01-12 19:10:42 +08:00
Mark Sun
f877d19c6a Update CONTRIBUTING.md (#2010) 2024-01-12 19:01:29 +08:00
Jyong
a63a9c7d45 text spliter length method use default embedding model tokenizer (#2011)
Co-authored-by: jyong <jyong@dify.ai>
2024-01-12 18:45:34 +08:00
takatost
1779cea6e3 fix: model provider credentials null value validate failed (#2009) 2024-01-12 16:48:38 +08:00
Ricky
26eff330f9 fix: chat log wont show up (#2007) 2024-01-12 14:36:56 +08:00
30 changed files with 442 additions and 103 deletions

View File

@@ -63,4 +63,4 @@ If you see a model provider not yet supported by Dify that you'd like to use, fo
### i18n (Internationalization) Support
We are looking for contributors to help with translations in other languages. If you are interested in helping, please join the [Discord Community Server](https://discord.gg/AhzKf7dNgk) and let us know.
Also check out the [Frontend i18n README]((web/i18n/README_EN.md)) for more information.
Also check out the [Frontend i18n README](web/i18n/README_EN.md) for more information.

View File

@@ -87,7 +87,7 @@ class Config:
# ------------------------
# General Configurations.
# ------------------------
self.CURRENT_VERSION = "0.4.6"
self.CURRENT_VERSION = "0.4.7"
self.COMMIT_SHA = get_env('COMMIT_SHA')
self.EDITION = "SELF_HOSTED"
self.DEPLOY_ENV = get_env('DEPLOY_ENV')

View File

@@ -13,7 +13,7 @@ from core.application_queue_manager import ApplicationQueueManager
from core.entities.application_entities import InvokeFrom
from core.errors.error import ModelCurrentlyNotSupportError, ProviderTokenNotInitError, QuotaExceededError
from core.model_runtime.errors.invoke import InvokeError
from flask import Response, stream_with_context
from flask import Response, stream_with_context, request
from flask_restful import reqparse
from libs.helper import uuid_value
from services.completion_service import CompletionService
@@ -75,11 +75,13 @@ class CompletionApi(AppApiResource):
class CompletionStopApi(AppApiResource):
def post(self, app_model, end_user, task_id):
def post(self, app_model, _, task_id):
if app_model.mode != 'completion':
raise AppUnavailableError()
ApplicationQueueManager.set_stop_flag(task_id, InvokeFrom.SERVICE_API, end_user.id)
end_user_id = request.get_json().get('user')
ApplicationQueueManager.set_stop_flag(task_id, InvokeFrom.SERVICE_API, end_user_id)
return {'result': 'success'}, 200
@@ -139,11 +141,13 @@ class ChatApi(AppApiResource):
class ChatStopApi(AppApiResource):
def post(self, app_model, end_user, task_id):
def post(self, app_model, _, task_id):
if app_model.mode != 'chat':
raise NotChatAppError()
ApplicationQueueManager.set_stop_flag(task_id, InvokeFrom.SERVICE_API, end_user.id)
end_user_id = request.get_json().get('user')
ApplicationQueueManager.set_stop_flag(task_id, InvokeFrom.SERVICE_API, end_user_id)
return {'result': 'success'}, 200

View File

@@ -146,7 +146,7 @@ class BasicApplicationRunner(AppRunner):
# get context from datasets
context = None
if app_orchestration_config.dataset:
if app_orchestration_config.dataset and app_orchestration_config.dataset.dataset_ids:
context = self.retrieve_dataset_context(
tenant_id=app_record.tenant_id,
app_record=app_record,

View File

@@ -165,7 +165,7 @@ class ProviderConfiguration(BaseModel):
if value == '[__HIDDEN__]' and key in original_credentials:
credentials[key] = encrypter.decrypt_token(self.tenant_id, original_credentials[key])
model_provider_factory.provider_credentials_validate(
credentials = model_provider_factory.provider_credentials_validate(
self.provider.provider,
credentials
)
@@ -308,24 +308,13 @@ class ProviderConfiguration(BaseModel):
if value == '[__HIDDEN__]' and key in original_credentials:
credentials[key] = encrypter.decrypt_token(self.tenant_id, original_credentials[key])
model_provider_factory.model_credentials_validate(
credentials = model_provider_factory.model_credentials_validate(
provider=self.provider.provider,
model_type=model_type,
model=model,
credentials=credentials
)
model_schema = (
model_provider_factory.get_provider_instance(self.provider.provider)
.get_model_instance(model_type)._get_customizable_model_schema(
model=model,
credentials=credentials
)
)
if model_schema:
credentials['schema'] = json.dumps(encoders.jsonable_encoder(model_schema))
for key, value in credentials.items():
if key in provider_credential_secret_variables:
credentials[key] = encrypter.encrypt_token(self.tenant_id, value)

View File

@@ -13,7 +13,7 @@ from core.docstore.dataset_docstore import DatasetDocumentStore
from core.errors.error import ProviderTokenNotInitError
from core.generator.llm_generator import LLMGenerator
from core.index.index import IndexBuilder
from core.model_manager import ModelManager
from core.model_manager import ModelManager, ModelInstance
from core.model_runtime.entities.model_entities import ModelType, PriceType
from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
@@ -61,8 +61,24 @@ class IndexingRunner:
# load file
text_docs = self._load_data(dataset_document, processing_rule.mode == 'automatic')
# get embedding model instance
embedding_model_instance = None
if dataset.indexing_technique == 'high_quality':
if dataset.embedding_model_provider:
embedding_model_instance = self.model_manager.get_model_instance(
tenant_id=dataset.tenant_id,
provider=dataset.embedding_model_provider,
model_type=ModelType.TEXT_EMBEDDING,
model=dataset.embedding_model
)
else:
embedding_model_instance = self.model_manager.get_default_model_instance(
tenant_id=dataset.tenant_id,
model_type=ModelType.TEXT_EMBEDDING,
)
# get splitter
splitter = self._get_splitter(processing_rule)
splitter = self._get_splitter(processing_rule, embedding_model_instance)
# split to documents
documents = self._step_split(
@@ -121,8 +137,24 @@ class IndexingRunner:
# load file
text_docs = self._load_data(dataset_document, processing_rule.mode == 'automatic')
# get embedding model instance
embedding_model_instance = None
if dataset.indexing_technique == 'high_quality':
if dataset.embedding_model_provider:
embedding_model_instance = self.model_manager.get_model_instance(
tenant_id=dataset.tenant_id,
provider=dataset.embedding_model_provider,
model_type=ModelType.TEXT_EMBEDDING,
model=dataset.embedding_model
)
else:
embedding_model_instance = self.model_manager.get_default_model_instance(
tenant_id=dataset.tenant_id,
model_type=ModelType.TEXT_EMBEDDING,
)
# get splitter
splitter = self._get_splitter(processing_rule)
splitter = self._get_splitter(processing_rule, embedding_model_instance)
# split to documents
documents = self._step_split(
@@ -253,7 +285,7 @@ class IndexingRunner:
text_docs = FileExtractor.load(file_detail, is_automatic=processing_rule.mode == 'automatic')
# get splitter
splitter = self._get_splitter(processing_rule)
splitter = self._get_splitter(processing_rule, embedding_model_instance)
# split to documents
documents = self._split_to_documents_for_estimate(
@@ -384,7 +416,7 @@ class IndexingRunner:
)
# get splitter
splitter = self._get_splitter(processing_rule)
splitter = self._get_splitter(processing_rule, embedding_model_instance)
# split to documents
documents = self._split_to_documents_for_estimate(
@@ -499,10 +531,13 @@ class IndexingRunner:
def filter_string(self, text):
text = re.sub(r'<\|', '<', text)
text = re.sub(r'\|>', '>', text)
text = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F\x80-\xFF]', '', text)
text = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F\xEF\xBF\xBE]', '', text)
# Unicode U+FFFE
text = re.sub(u'\uFFFE', '', text)
return text
def _get_splitter(self, processing_rule: DatasetProcessRule) -> TextSplitter:
def _get_splitter(self, processing_rule: DatasetProcessRule,
embedding_model_instance: Optional[ModelInstance]) -> TextSplitter:
"""
Get the NodeParser object according to the processing rule.
"""
@@ -517,19 +552,20 @@ class IndexingRunner:
if separator:
separator = separator.replace('\\n', '\n')
character_splitter = FixedRecursiveCharacterTextSplitter.from_gpt2_encoder(
character_splitter = FixedRecursiveCharacterTextSplitter.from_encoder(
chunk_size=segmentation["max_tokens"],
chunk_overlap=0,
fixed_separator=separator,
separators=["\n\n", "", ".", " ", ""]
separators=["\n\n", "", ".", " ", ""],
embedding_model_instance=embedding_model_instance
)
else:
# Automatic segmentation
character_splitter = EnhanceRecursiveCharacterTextSplitter.from_gpt2_encoder(
character_splitter = EnhanceRecursiveCharacterTextSplitter.from_encoder(
chunk_size=DatasetProcessRule.AUTOMATIC_RULES['segmentation']['max_tokens'],
chunk_overlap=0,
separators=["\n\n", "", ".", " ", ""]
separators=["\n\n", "", ".", " ", ""],
embedding_model_instance=embedding_model_instance
)
return character_splitter
@@ -714,7 +750,7 @@ class IndexingRunner:
return text
def format_split_text(self, text):
regex = r"Q\d+:\s*(.*?)\s*A\d+:\s*([\s\S]*?)(?=Q\d+:|$)"
regex = r"Q\d+:\s*(.*?)\s*A\d+:\s*([\s\S]*?)(?=Q\d+:|$)"
matches = re.findall(regex, text, re.UNICODE)
return [

View File

@@ -149,8 +149,8 @@ class ParameterRule(BaseModel):
help: Optional[I18nObject] = None
required: bool = False
default: Optional[Any] = None
min: Optional[float | int] = None
max: Optional[float | int] = None
min: Optional[float] = None
max: Optional[float] = None
precision: Optional[int] = None
options: list[str] = []

View File

@@ -1,6 +1,4 @@
import decimal
import json
import logging
import os
from abc import ABC, abstractmethod
from typing import Optional
@@ -12,7 +10,6 @@ from core.model_runtime.entities.model_entities import (AIModelEntity, DefaultPa
PriceConfig, PriceInfo, PriceType)
from core.model_runtime.errors.invoke import InvokeAuthorizationError, InvokeError
from core.model_runtime.model_providers.__base.tokenizers.gpt2_tokenzier import GPT2Tokenizer
from pydantic import ValidationError
class AIModel(ABC):
@@ -54,14 +51,16 @@ class AIModel(ABC):
:param error: model invoke error
:return: unified error
"""
provider_name = self.__class__.__module__.split('.')[-3]
for invoke_error, model_errors in self._invoke_error_mapping.items():
if isinstance(error, tuple(model_errors)):
if invoke_error == InvokeAuthorizationError:
return invoke_error(description="Incorrect model credentials provided, please check and try again. ")
return invoke_error(description=f"[{provider_name}] Incorrect model credentials provided, please check and try again. ")
return invoke_error(description=f"{invoke_error.description}: {str(error)}")
return invoke_error(description=f"[{provider_name}] {invoke_error.description}, {str(error)}")
return InvokeError(description=f"Error: {str(error)}")
return InvokeError(description=f"[{provider_name}] Error: {str(error)}")
def get_price(self, model: str, credentials: dict, price_type: PriceType, tokens: int) -> PriceInfo:
"""

View File

@@ -134,7 +134,55 @@ class HuggingfaceHubLargeLanguageModel(_CommonHuggingfaceHub, LargeLanguageModel
precision=0,
)
return [temperature_rule, top_k_rule, top_p_rule]
max_new_tokens = ParameterRule(
name='max_new_tokens',
label={
'en_US': 'Max New Tokens',
'zh_Hans': '最大新标记',
},
type='int',
help={
'en_US': 'Maximum number of generated tokens.',
'zh_Hans': '生成的标记的最大数量。',
},
required=False,
default=20,
min=1,
max=4096,
precision=0,
)
seed = ParameterRule(
name='seed',
label={
'en_US': 'Random sampling seed',
'zh_Hans': '随机采样种子',
},
type='int',
help={
'en_US': 'Random sampling seed.',
'zh_Hans': '随机采样种子。',
},
required=False,
precision=0,
)
repetition_penalty = ParameterRule(
name='repetition_penalty',
label={
'en_US': 'Repetition Penalty',
'zh_Hans': '重复惩罚',
},
type='float',
help={
'en_US': 'The parameter for repetition penalty. 1.0 means no penalty.',
'zh_Hans': '重复惩罚的参数。1.0 表示没有惩罚。',
},
required=False,
precision=1,
)
return [temperature_rule, top_k_rule, top_p_rule, max_new_tokens, seed, repetition_penalty]
def _handle_generate_stream_response(self,
model: str,

View File

@@ -61,7 +61,7 @@ class ModelProviderFactory:
# return providers
return providers
def provider_credentials_validate(self, provider: str, credentials: dict) -> None:
def provider_credentials_validate(self, provider: str, credentials: dict) -> dict:
"""
Validate provider credentials
@@ -80,13 +80,15 @@ class ModelProviderFactory:
# validate provider credential schema
validator = ProviderCredentialSchemaValidator(provider_credential_schema)
validator.validate_and_filter(credentials)
filtered_credentials = validator.validate_and_filter(credentials)
# validate the credentials, raise exception if validation failed
model_provider_instance.validate_provider_credentials(credentials)
model_provider_instance.validate_provider_credentials(filtered_credentials)
return filtered_credentials
def model_credentials_validate(self, provider: str, model_type: ModelType,
model: str, credentials: dict) -> None:
model: str, credentials: dict) -> dict:
"""
Validate model credentials
@@ -107,13 +109,15 @@ class ModelProviderFactory:
# validate model credential schema
validator = ModelCredentialSchemaValidator(model_type, model_credential_schema)
validator.validate_and_filter(credentials)
filtered_credentials = validator.validate_and_filter(credentials)
# get model instance of the model type
model_instance = model_provider_instance.get_model_instance(model_type)
# call validate_credentials method of model type to validate credentials, raise exception if validation failed
model_instance.validate_credentials(model, credentials)
model_instance.validate_credentials(model, filtered_credentials)
return filtered_credentials
def get_models(self,
provider: Optional[str] = None,

View File

@@ -765,7 +765,6 @@ class OpenAILargeLanguageModel(_CommonOpenAI, LargeLanguageModel):
num_tokens = 0
for tool in tools:
num_tokens += len(encoding.encode('type'))
num_tokens += len(encoding.encode(tool.get("type")))
num_tokens += len(encoding.encode('function'))
# calculate num tokens for function object

View File

@@ -1,8 +1,8 @@
from http import HTTPStatus
from typing import Generator, List, Optional, Union
import dashscope
from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
from dashscope import get_tokenizer
from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMMode
from core.model_runtime.entities.message_entities import (AssistantPromptMessage, PromptMessage, PromptMessageTool,
SystemPromptMessage, UserPromptMessage)
from core.model_runtime.errors.invoke import (InvokeAuthorizationError, InvokeBadRequestError, InvokeConnectionError,
@@ -51,19 +51,12 @@ class TongyiLargeLanguageModel(LargeLanguageModel):
:param tools: tools for tool calling
:return:
"""
# transform credentials to kwargs for model instance
credentials_kwargs = self._to_credential_kwargs(credentials)
tokenizer = get_tokenizer(model)
response = dashscope.Tokenization.call(
model=model,
prompt=self._convert_messages_to_prompt(prompt_messages),
**credentials_kwargs
)
if response.status_code == HTTPStatus.OK:
return response['usage']['input_tokens']
else:
raise self._invoke_error_mapping[InvokeBadRequestError][0](response['message'])
# convert string to token ids
tokens = tokenizer.encode(self._convert_messages_to_prompt(prompt_messages))
return len(tokens)
def validate_credentials(self, model: str, credentials: dict) -> None:
"""
@@ -119,14 +112,22 @@ class TongyiLargeLanguageModel(LargeLanguageModel):
params = {
'model': model,
'prompt': self._convert_messages_to_prompt(prompt_messages),
**model_parameters,
**credentials_kwargs
}
mode = self.get_model_mode(model, credentials)
if mode == LLMMode.CHAT:
params['messages'] = self._convert_prompt_messages_to_tongyi_messages(prompt_messages)
else:
params['prompt'] = self._convert_messages_to_prompt(prompt_messages)
if stream:
responses = stream_generate_with_retry(
client,
stream=True,
incremental_output=True,
**params
)
@@ -267,6 +268,35 @@ class TongyiLargeLanguageModel(LargeLanguageModel):
# trim off the trailing ' ' that might come from the "Assistant: "
return text.rstrip()
def _convert_prompt_messages_to_tongyi_messages(self, prompt_messages: list[PromptMessage]) -> list[dict]:
"""
Convert prompt messages to tongyi messages
:param prompt_messages: prompt messages
:return: tongyi messages
"""
tongyi_messages = []
for prompt_message in prompt_messages:
if isinstance(prompt_message, SystemPromptMessage):
tongyi_messages.append({
'role': 'system',
'content': prompt_message.content,
})
elif isinstance(prompt_message, UserPromptMessage):
tongyi_messages.append({
'role': 'user',
'content': prompt_message.content,
})
elif isinstance(prompt_message, AssistantPromptMessage):
tongyi_messages.append({
'role': 'assistant',
'content': prompt_message.content,
})
else:
raise ValueError(f"Got unknown type {prompt_message}")
return tongyi_messages
@property
def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
"""

View File

@@ -0,0 +1,59 @@
model: qwen-max-1201
label:
en_US: qwen-max-1201
model_type: llm
model_properties:
mode: chat
context_size: 8192
parameter_rules:
- name: temperature
use_template: temperature
default: 1.0
min: 0.0
max: 2.0
help:
zh_Hans: 用于控制随机性和多样性的程度。具体来说temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值使得更多的低概率词被选择生成结果更加多样化而较低的temperature值则会增强概率分布的峰值使得高概率词更容易被选择生成结果更加确定。
en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
- name: top_p
use_template: top_p
default: 0.8
min: 0.1
max: 0.9
help:
zh_Hans: 生成过程中核采样方法概率阈值例如取值为0.8时仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
- name: max_tokens
use_template: max_tokens
default: 1500
min: 1
max: 6000
help:
zh_Hans: 用于限制模型生成token的数量max_tokens设置的是生成上限并不表示一定会生成这么多的token数量。
en_US: It is used to limit the number of tokens generated by the model. max_tokens sets the upper limit of generation, which does not mean that so many tokens will be generated.
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 生成时采样候选集的大小。例如取值为50时仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大生成的随机性越高取值越小生成的确定性越高。默认不传递该参数取值为None或当top_k大于100时表示不启用top_k策略此时仅有top_p策略生效。
en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. This parameter is not passed by default. The value is None or when top_k is greater than 100, it means that the top_k policy is not enabled. At this time, only the top_p policy takes effect.
required: false
- name: seed
label:
zh_Hans: 随机种子
en_US: Random seed
type: int
help:
zh_Hans: 生成时随机数的种子用于控制模型生成的随机性。如果使用相同的种子每次运行生成的结果都将相同当需要复现模型的生成结果时可以使用相同的种子。seed参数支持无符号64位整数类型。
en_US: When generating, the random number seed is used to control the randomness of model generation. If you use the same seed, the results generated by each run will be the same; when you need to reproduce the results of the model, you can use the same seed. The seed parameter supports unsigned 64-bit integer types.
required: false
- name: repetition_penalty
label:
en_US: Repetition penalty
type: float
default: 1.1
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repetition of model generation. Increasing the repetition_penalty can reduce the repetition of model generation. 1.0 means no punishment.
required: false

View File

@@ -0,0 +1,59 @@
model: qwen-max-longcontext
label:
en_US: qwen-max-longcontext
model_type: llm
model_properties:
mode: chat
context_size: 30000
parameter_rules:
- name: temperature
use_template: temperature
default: 1.0
min: 0.0
max: 2.0
help:
zh_Hans: 用于控制随机性和多样性的程度。具体来说temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值使得更多的低概率词被选择生成结果更加多样化而较低的temperature值则会增强概率分布的峰值使得高概率词更容易被选择生成结果更加确定。
en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
- name: top_p
use_template: top_p
default: 0.8
min: 0.1
max: 0.9
help:
zh_Hans: 生成过程中核采样方法概率阈值例如取值为0.8时仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
- name: max_tokens
use_template: max_tokens
default: 2000
min: 1
max: 28000
help:
zh_Hans: 用于限制模型生成token的数量max_tokens设置的是生成上限并不表示一定会生成这么多的token数量。
en_US: It is used to limit the number of tokens generated by the model. max_tokens sets the upper limit of generation, which does not mean that so many tokens will be generated.
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 生成时采样候选集的大小。例如取值为50时仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大生成的随机性越高取值越小生成的确定性越高。默认不传递该参数取值为None或当top_k大于100时表示不启用top_k策略此时仅有top_p策略生效。
en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. This parameter is not passed by default. The value is None or when top_k is greater than 100, it means that the top_k policy is not enabled. At this time, only the top_p policy takes effect.
required: false
- name: seed
label:
zh_Hans: 随机种子
en_US: Random seed
type: int
help:
zh_Hans: 生成时随机数的种子用于控制模型生成的随机性。如果使用相同的种子每次运行生成的结果都将相同当需要复现模型的生成结果时可以使用相同的种子。seed参数支持无符号64位整数类型。
en_US: When generating, the random number seed is used to control the randomness of model generation. If you use the same seed, the results generated by each run will be the same; when you need to reproduce the results of the model, you can use the same seed. The seed parameter supports unsigned 64-bit integer types.
required: false
- name: repetition_penalty
label:
en_US: Repetition penalty
type: float
default: 1.1
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repetition of model generation. Increasing the repetition_penalty can reduce the repetition of model generation. 1.0 means no punishment.
required: false

View File

@@ -0,0 +1,59 @@
model: qwen-max
label:
en_US: qwen-max
model_type: llm
model_properties:
mode: chat
context_size: 8192
parameter_rules:
- name: temperature
use_template: temperature
default: 1.0
min: 0.0
max: 2.0
help:
zh_Hans: 用于控制随机性和多样性的程度。具体来说temperature值控制了生成文本时对每个候选词的概率分布进行平滑的程度。较高的temperature值会降低概率分布的峰值使得更多的低概率词被选择生成结果更加多样化而较低的temperature值则会增强概率分布的峰值使得高概率词更容易被选择生成结果更加确定。
en_US: Used to control the degree of randomness and diversity. Specifically, the temperature value controls the degree to which the probability distribution of each candidate word is smoothed when generating text. A higher temperature value will reduce the peak value of the probability distribution, allowing more low-probability words to be selected, and the generated results will be more diverse; while a lower temperature value will enhance the peak value of the probability distribution, making it easier for high-probability words to be selected. , the generated results are more certain.
- name: top_p
use_template: top_p
default: 0.8
min: 0.1
max: 0.9
help:
zh_Hans: 生成过程中核采样方法概率阈值例如取值为0.8时仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
- name: max_tokens
use_template: max_tokens
default: 1500
min: 1
max: 6000
help:
zh_Hans: 用于限制模型生成token的数量max_tokens设置的是生成上限并不表示一定会生成这么多的token数量。
en_US: It is used to limit the number of tokens generated by the model. max_tokens sets the upper limit of generation, which does not mean that so many tokens will be generated.
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 生成时采样候选集的大小。例如取值为50时仅将单次生成中得分最高的50个token组成随机采样的候选集。取值越大生成的随机性越高取值越小生成的确定性越高。默认不传递该参数取值为None或当top_k大于100时表示不启用top_k策略此时仅有top_p策略生效。
en_US: The size of the sample candidate set when generated. For example, when the value is 50, only the 50 highest-scoring tokens in a single generation form a randomly sampled candidate set. The larger the value, the higher the randomness generated; the smaller the value, the higher the certainty generated. This parameter is not passed by default. The value is None or when top_k is greater than 100, it means that the top_k policy is not enabled. At this time, only the top_p policy takes effect.
required: false
- name: seed
label:
zh_Hans: 随机种子
en_US: Random seed
type: int
help:
zh_Hans: 生成时随机数的种子用于控制模型生成的随机性。如果使用相同的种子每次运行生成的结果都将相同当需要复现模型的生成结果时可以使用相同的种子。seed参数支持无符号64位整数类型。
en_US: When generating, the random number seed is used to control the randomness of model generation. If you use the same seed, the results generated by each run will be the same; when you need to reproduce the results of the model, you can use the same seed. The seed parameter supports unsigned 64-bit integer types.
required: false
- name: repetition_penalty
label:
en_US: Repetition penalty
type: float
default: 1.1
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repetition of model generation. Increasing the repetition_penalty can reduce the repetition of model generation. 1.0 means no punishment.
required: false

View File

@@ -17,6 +17,8 @@ parameter_rules:
- name: top_p
use_template: top_p
default: 0.8
min: 0.1
max: 0.9
help:
zh_Hans: 生成过程中核采样方法概率阈值例如取值为0.8时仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
@@ -24,7 +26,7 @@ parameter_rules:
use_template: max_tokens
default: 2000
min: 1
max: 2000
max: 30000
help:
zh_Hans: 用于限制模型生成token的数量max_tokens设置的是生成上限并不表示一定会生成这么多的token数量。
en_US: It is used to limit the number of tokens generated by the model. max_tokens sets the upper limit of generation, which does not mean that so many tokens will be generated.
@@ -42,10 +44,9 @@ parameter_rules:
zh_Hans: 随机种子
en_US: Random seed
type: int
default: 1234
help:
zh_Hans: 生成时随机数的种子用于控制模型生成的随机性。如果使用相同的种子每次运行生成的结果都将相同当需要复现模型的生成结果时可以使用相同的种子。seed参数支持无符号64位整数类型。默认值 1234。
en_US: When generating, the random number seed is used to control the randomness of model generation. If you use the same seed, the results generated by each run will be the same; when you need to reproduce the results of the model, you can use the same seed. The seed parameter supports unsigned 64-bit integer types. Default value 1234.
zh_Hans: 生成时随机数的种子用于控制模型生成的随机性。如果使用相同的种子每次运行生成的结果都将相同当需要复现模型的生成结果时可以使用相同的种子。seed参数支持无符号64位整数类型。
en_US: When generating, the random number seed is used to control the randomness of model generation. If you use the same seed, the results generated by each run will be the same; when you need to reproduce the results of the model, you can use the same seed. The seed parameter supports unsigned 64-bit integer types.
required: false
- name: repetition_penalty
label:
@@ -55,3 +56,8 @@ parameter_rules:
help:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repetition of model generation. Increasing the repetition_penalty can reduce the repetition of model generation. 1.0 means no punishment.
pricing:
input: '0.02'
output: '0.02'
unit: '0.001'
currency: RMB

View File

@@ -17,6 +17,8 @@ parameter_rules:
- name: top_p
use_template: top_p
default: 0.8
min: 0.1
max: 0.9
help:
zh_Hans: 生成过程中核采样方法概率阈值例如取值为0.8时仅保留概率加起来大于等于0.8的最可能token的最小集合作为候选集。取值范围为0,1.0),取值越大,生成的随机性越高;取值越低,生成的确定性越高。
en_US: The probability threshold of the kernel sampling method during the generation process. For example, when the value is 0.8, only the smallest set of the most likely tokens with a sum of probabilities greater than or equal to 0.8 is retained as the candidate set. The value range is (0,1.0). The larger the value, the higher the randomness generated; the lower the value, the higher the certainty generated.
@@ -24,7 +26,7 @@ parameter_rules:
use_template: max_tokens
default: 1500
min: 1
max: 1500
max: 6000
help:
zh_Hans: 用于限制模型生成token的数量max_tokens设置的是生成上限并不表示一定会生成这么多的token数量。
en_US: It is used to limit the number of tokens generated by the model. max_tokens sets the upper limit of generation, which does not mean that so many tokens will be generated.
@@ -42,10 +44,9 @@ parameter_rules:
zh_Hans: 随机种子
en_US: Random seed
type: int
default: 1234
help:
zh_Hans: 生成时随机数的种子用于控制模型生成的随机性。如果使用相同的种子每次运行生成的结果都将相同当需要复现模型的生成结果时可以使用相同的种子。seed参数支持无符号64位整数类型。默认值 1234。
en_US: When generating, the random number seed is used to control the randomness of model generation. If you use the same seed, the results generated by each run will be the same; when you need to reproduce the results of the model, you can use the same seed. The seed parameter supports unsigned 64-bit integer types. Default value 1234.
zh_Hans: 生成时随机数的种子用于控制模型生成的随机性。如果使用相同的种子每次运行生成的结果都将相同当需要复现模型的生成结果时可以使用相同的种子。seed参数支持无符号64位整数类型。
en_US: When generating, the random number seed is used to control the randomness of model generation. If you use the same seed, the results generated by each run will be the same; when you need to reproduce the results of the model, you can use the same seed. The seed parameter supports unsigned 64-bit integer types.
required: false
- name: repetition_penalty
label:
@@ -56,3 +57,8 @@ parameter_rules:
zh_Hans: 用于控制模型生成时的重复度。提高repetition_penalty时可以降低模型生成的重复度。1.0表示不做惩罚。
en_US: Used to control the repetition of model generation. Increasing the repetition_penalty can reduce the repetition of model generation. 1.0 means no punishment.
required: false
pricing:
input: '0.008'
output: '0.008'
unit: '0.001'
currency: RMB

View File

@@ -46,7 +46,7 @@ class CommonValidator:
:return: validated credential form schema value
"""
# If the variable does not exist in credentials
if credential_form_schema.variable not in credentials:
if credential_form_schema.variable not in credentials or not credentials[credential_form_schema.variable]:
# If required is True, an exception is thrown
if credential_form_schema.required:
raise ValueError(f'Variable {credential_form_schema.variable} is required')

View File

@@ -30,7 +30,7 @@ class KeywordsModeration(Moderation):
if query:
inputs['query__'] = query
keywords_list = self.config['keywords'].split('\n')
keywords_list = [keyword for keyword in self.config['keywords'].split('\n') if keyword]
flagged = self._is_violated(inputs, keywords_list)
return ModerationInputsResult(flagged=flagged, action=ModerationAction.DIRECT_OUTPUT, preset_response=preset_response)

View File

@@ -1,8 +1,10 @@
"""Functionality for splitting text."""
from __future__ import annotations
from typing import Any, List, Optional
from typing import Any, List, Optional, cast
from core.model_manager import ModelInstance
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
from core.model_runtime.model_providers.__base.tokenizers.gpt2_tokenzier import GPT2Tokenizer
from langchain.text_splitter import (TS, AbstractSet, Collection, Literal, RecursiveCharacterTextSplitter,
TokenTextSplitter, Type, Union)
@@ -12,22 +14,30 @@ class EnhanceRecursiveCharacterTextSplitter(RecursiveCharacterTextSplitter):
"""
This class is used to implement from_gpt2_encoder, to prevent using of tiktoken
"""
@classmethod
def from_gpt2_encoder(
cls: Type[TS],
encoding_name: str = "gpt2",
model_name: Optional[str] = None,
allowed_special: Union[Literal["all"], AbstractSet[str]] = set(),
disallowed_special: Union[Literal["all"], Collection[str]] = "all",
**kwargs: Any,
def from_encoder(
cls: Type[TS],
embedding_model_instance: Optional[ModelInstance],
allowed_special: Union[Literal["all"], AbstractSet[str]] = set(),
disallowed_special: Union[Literal["all"], Collection[str]] = "all",
**kwargs: Any,
):
def _token_encoder(text: str) -> int:
return GPT2Tokenizer.get_num_tokens(text)
if embedding_model_instance:
embedding_model_type_instance = embedding_model_instance.model_type_instance
embedding_model_type_instance = cast(TextEmbeddingModel, embedding_model_type_instance)
return embedding_model_type_instance.get_num_tokens(
model=embedding_model_instance.model,
credentials=embedding_model_instance.credentials,
texts=[text]
)
else:
return GPT2Tokenizer.get_num_tokens(text)
if issubclass(cls, TokenTextSplitter):
extra_kwargs = {
"encoding_name": encoding_name,
"model_name": model_name,
"model_name": embedding_model_instance.model if embedding_model_instance else 'gpt2',
"allowed_special": allowed_special,
"disallowed_special": disallowed_special,
}
@@ -35,6 +45,7 @@ class EnhanceRecursiveCharacterTextSplitter(RecursiveCharacterTextSplitter):
return cls(length_function=_token_encoder, **kwargs)
class FixedRecursiveCharacterTextSplitter(EnhanceRecursiveCharacterTextSplitter):
def __init__(self, fixed_separator: str = "\n\n", separators: Optional[List[str]] = None, **kwargs: Any):
"""Create a new TextSplitter."""
@@ -90,4 +101,4 @@ class FixedRecursiveCharacterTextSplitter(EnhanceRecursiveCharacterTextSplitter)
if _good_splits:
merged_text = self._merge_splits(_good_splits, separator)
final_chunks.extend(merged_text)
return final_chunks
return final_chunks

View File

@@ -94,6 +94,7 @@ class DatasetMultiRetrieverTool(BaseTool):
document_context_list = []
index_node_ids = [document.metadata['doc_id'] for document in all_documents]
segments = DocumentSegment.query.filter(
DocumentSegment.dataset_id.in_(self.dataset_ids),
DocumentSegment.completed_at.isnot(None),
DocumentSegment.status == 'completed',
DocumentSegment.enabled == True,

View File

@@ -9,12 +9,12 @@ flask-restful==0.3.9
flask-session2==1.3.1
flask-cors==3.0.10
gunicorn~=21.2.0
gevent~=22.10.2
gevent~=23.9.1
langchain==0.0.250
openai~=1.3.6
tiktoken~=0.5.2
psycopg2-binary~=2.9.6
pycryptodome==3.17
pycryptodome==3.19.1
python-dotenv==1.0.0
pytest~=7.3.1
pytest-mock~=3.11.1
@@ -44,14 +44,14 @@ readabilipy==0.2.0
google-search-results==2.4.2
replicate~=0.22.0
websocket-client~=1.7.0
dashscope~=1.13.5
dashscope[tokenizer]~=1.14.0
huggingface_hub~=0.16.4
transformers~=4.31.0
pandas==1.5.3
xinference-client~=0.6.4
safetensors==0.3.2
zhipuai==1.0.7
werkzeug==2.3.7
werkzeug==2.3.8
pymilvus==2.3.0
qdrant-client==1.6.4
cohere~=4.32

View File

@@ -327,10 +327,35 @@ def test_get_num_tokens():
UserPromptMessage(
content='Hello World!'
)
],
tools=[
PromptMessageTool(
name='get_weather',
description='Determine weather in my location',
parameters={
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state e.g. San Francisco, CA"
},
"unit": {
"type": "string",
"enum": [
"c",
"f"
]
}
},
"required": [
"location"
]
}
),
]
)
assert num_tokens == 21
assert num_tokens == 72
@pytest.mark.parametrize('setup_openai_mock', [['chat', 'remote']], indirect=True)
def test_fine_tuned_models(setup_openai_mock):

View File

@@ -2,7 +2,7 @@ version: '3.1'
services:
# API service
api:
image: langgenius/dify-api:0.4.6
image: langgenius/dify-api:0.4.7
restart: always
environment:
# Startup mode, 'api' starts the API server.
@@ -131,7 +131,7 @@ services:
# worker service
# The Celery worker for processing the queue.
worker:
image: langgenius/dify-api:0.4.6
image: langgenius/dify-api:0.4.7
restart: always
environment:
# Startup mode, 'worker' starts the Celery worker for processing the queue.
@@ -202,7 +202,7 @@ services:
# Frontend web application.
web:
image: langgenius/dify-web:0.4.6
image: langgenius/dify-web:0.4.7
restart: always
environment:
EDITION: SELF_HOSTED

Binary file not shown.

Before

Width:  |  Height:  |  Size: 80 KiB

After

Width:  |  Height:  |  Size: 82 KiB

View File

@@ -539,7 +539,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
---
<Heading
url='/datasets/{dataset_id}/batch/{batch}/indexing-status'
url='/datasets/{dataset_id}/documents/{batch}/indexing-status'
method='GET'
title='Get document embedding status (progress)'
name='#indexing_status'
@@ -560,7 +560,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
<CodeGroup
title="Request"
tag="GET"
label="/datasets/{dataset_id}/batch/{batch}/indexing-status"
label="/datasets/{dataset_id}/documents/{batch}/indexing-status"
targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{batch}/indexing-status' \\\n--header 'Authorization: Bearer {api_key}'`}
>
```bash {{ title: 'cURL' }}

View File

@@ -539,7 +539,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
---
<Heading
url='/datasets/{dataset_id}/batch/{batch}/indexing-status'
url='/datasets/{dataset_id}/documents/{batch}/indexing-status'
method='GET'
title='获取文档嵌入状态(进度)'
name='#indexing_status'
@@ -560,7 +560,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
<CodeGroup
title="Request"
tag="GET"
label="/datasets/{dataset_id}/batch/{batch}/indexing-status"
label="/datasets/{dataset_id}/documents/{batch}/indexing-status"
targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{batch}/indexing-status' \\\n--header 'Authorization: Bearer {api_key}'`}
>
```bash {{ title: 'cURL' }}

View File

@@ -46,7 +46,11 @@ const Log: FC<LogProps> = ({
`}>
<div
className='flex items-center justify-center rounded-md w-full h-full hover:bg-gray-100'
onClick={() => setShowModal(true)}
onClick={(e) => {
e.stopPropagation()
setShowModal(true)
}
}
>
<File02 className='w-4 h-4 text-gray-500' />
</div>

View File

@@ -103,7 +103,7 @@ const ParamsConfig: FC = () => {
const config = { ...tempDataSetConfigs }
if (config.retrieval_model === RETRIEVE_TYPE.multiWay && !config.reranking_model) {
config.reranking_model = {
reranking_provider_name: rerankDefaultModel?.provider,
reranking_provider_name: rerankDefaultModel?.provider?.provider,
reranking_model_name: rerankDefaultModel?.model,
} as any
}

View File

@@ -1,6 +1,6 @@
{
"name": "dify-web",
"version": "0.4.6",
"version": "0.4.7",
"private": true,
"scripts": {
"dev": "next dev",