Merge branch 'main' into feat/workflow

# Conflicts:
#	docker/docker-compose.yaml
This commit is contained in:
takatost
2024-03-29 21:18:16 +08:00
40 changed files with 876 additions and 141 deletions

View File

@@ -155,4 +155,4 @@ And that's it! Once your PR is merged, you will be featured as a contributor in
## Getting Help
If you ever get stuck or got a burning question while contributing, simply shoot your queries our way via the related GitHub issue, or hop onto our [Discord](https://discord.gg/AhzKf7dNgk) for a quick chat.
If you ever get stuck or got a burning question while contributing, simply shoot your queries our way via the related GitHub issue, or hop onto our [Discord](https://discord.gg/8Tpq4AcN9c) for a quick chat.

View File

@@ -152,4 +152,4 @@ Dify的后端使用Python编写使用[Flask](https://flask.palletsprojects.co
## 获取帮助
如果你在贡献过程中遇到困难或者有任何问题,可以通过相关的 GitHub 问题提出你的疑问,或者加入我们的 [Discord](https://discord.gg/AhzKf7dNgk) 进行快速交流。
如果你在贡献过程中遇到困难或者有任何问题,可以通过相关的 GitHub 问题提出你的疑问,或者加入我们的 [Discord](https://discord.gg/8Tpq4AcN9c) 进行快速交流。

View File

@@ -131,7 +131,7 @@ At the same time, please consider supporting Dify by sharing it on social media
### Translations
We are looking for contributors to help with translating Dify to languages other than Mandarin or English. If you are interested in helping, please see the [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) for more information, and leave us a comment in the `global-users` channel of our [Discord Community Server](https://discord.gg/AhzKf7dNgk).
We are looking for contributors to help with translating Dify to languages other than Mandarin or English. If you are interested in helping, please see the [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) for more information, and leave us a comment in the `global-users` channel of our [Discord Community Server](https://discord.gg/8Tpq4AcN9c).
## Community & Support

View File

@@ -109,19 +109,20 @@ def reset_encrypt_key_pair():
click.echo(click.style('Sorry, only support SELF_HOSTED mode.', fg='red'))
return
tenant = db.session.query(Tenant).first()
if not tenant:
click.echo(click.style('Sorry, no workspace found. Please enter /install to initialize.', fg='red'))
return
tenants = db.session.query(Tenant).all()
for tenant in tenants:
if not tenant:
click.echo(click.style('Sorry, no workspace found. Please enter /install to initialize.', fg='red'))
return
tenant.encrypt_public_key = generate_key_pair(tenant.id)
tenant.encrypt_public_key = generate_key_pair(tenant.id)
db.session.query(Provider).filter(Provider.provider_type == 'custom').delete()
db.session.query(ProviderModel).delete()
db.session.commit()
db.session.query(Provider).filter(Provider.provider_type == 'custom', Provider.tenant_id == tenant.id).delete()
db.session.query(ProviderModel).filter(ProviderModel.tenant_id == tenant.id).delete()
db.session.commit()
click.echo(click.style('Congratulations! '
'the asymmetric key pair of workspace {} has been reset.'.format(tenant.id), fg='green'))
click.echo(click.style('Congratulations! '
'the asymmetric key pair of workspace {} has been reset.'.format(tenant.id), fg='green'))
@click.command('vdb-migrate', help='migrate vector db.')

View File

@@ -97,7 +97,7 @@ class Config:
# ------------------------
# General Configurations.
# ------------------------
self.CURRENT_VERSION = "0.5.10"
self.CURRENT_VERSION = "0.5.11"
self.COMMIT_SHA = get_env('COMMIT_SHA')
self.EDITION = "SELF_HOSTED"
self.DEPLOY_ENV = get_env('DEPLOY_ENV')

View File

@@ -1,4 +1,5 @@
import enum
import importlib
import json
import logging
import os
@@ -6,7 +7,6 @@ from typing import Any, Optional
from pydantic import BaseModel
from core.utils.module_import_helper import load_single_subclass_from_source
from core.utils.position_helper import sort_to_dict_by_position_map
@@ -73,9 +73,17 @@ class Extensible:
# Dynamic loading {subdir_name}.py file and find the subclass of Extensible
py_path = os.path.join(subdir_path, extension_name + '.py')
try:
extension_class = load_single_subclass_from_source(extension_name, py_path, cls)
except Exception:
spec = importlib.util.spec_from_file_location(extension_name, py_path)
mod = importlib.util.module_from_spec(spec)
spec.loader.exec_module(mod)
extension_class = None
for name, obj in vars(mod).items():
if isinstance(obj, type) and issubclass(obj, cls) and obj != cls:
extension_class = obj
break
if not extension_class:
logging.warning(f"Missing subclass of {cls.__name__} in {py_path}, Skip.")
continue

View File

@@ -1,3 +1,4 @@
import concurrent.futures
import datetime
import json
import logging
@@ -650,17 +651,44 @@ class IndexingRunner:
# chunk nodes by chunk size
indexing_start_at = time.perf_counter()
tokens = 0
chunk_size = 100
chunk_size = 10
embedding_model_type_instance = None
if embedding_model_instance:
embedding_model_type_instance = embedding_model_instance.model_type_instance
embedding_model_type_instance = cast(TextEmbeddingModel, embedding_model_type_instance)
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
futures = []
for i in range(0, len(documents), chunk_size):
chunk_documents = documents[i:i + chunk_size]
futures.append(executor.submit(self._process_chunk, current_app._get_current_object(), index_processor,
chunk_documents, dataset,
dataset_document, embedding_model_instance,
embedding_model_type_instance))
for i in range(0, len(documents), chunk_size):
for future in futures:
tokens += future.result()
indexing_end_at = time.perf_counter()
# update document status to completed
self._update_document_index_status(
document_id=dataset_document.id,
after_indexing_status="completed",
extra_update_params={
DatasetDocument.tokens: tokens,
DatasetDocument.completed_at: datetime.datetime.utcnow(),
DatasetDocument.indexing_latency: indexing_end_at - indexing_start_at,
}
)
def _process_chunk(self, flask_app, index_processor, chunk_documents, dataset, dataset_document,
embedding_model_instance, embedding_model_type_instance):
with flask_app.app_context():
# check document is paused
self._check_document_paused_status(dataset_document.id)
chunk_documents = documents[i:i + chunk_size]
tokens = 0
if dataset.indexing_technique == 'high_quality' or embedding_model_type_instance:
tokens += sum(
embedding_model_type_instance.get_num_tokens(
@@ -670,9 +698,9 @@ class IndexingRunner:
)
for document in chunk_documents
)
# load index
index_processor.load(dataset, chunk_documents)
db.session.add(dataset)
document_ids = [document.metadata['doc_id'] for document in chunk_documents]
db.session.query(DocumentSegment).filter(
@@ -687,18 +715,7 @@ class IndexingRunner:
db.session.commit()
indexing_end_at = time.perf_counter()
# update document status to completed
self._update_document_index_status(
document_id=dataset_document.id,
after_indexing_status="completed",
extra_update_params={
DatasetDocument.tokens: tokens,
DatasetDocument.completed_at: datetime.datetime.utcnow(),
DatasetDocument.indexing_latency: indexing_end_at - indexing_start_at,
}
)
return tokens
def _check_document_paused_status(self, document_id: str):
indexing_cache_key = 'document_{}_is_paused'.format(document_id)

View File

@@ -17,9 +17,11 @@ class BedrockProvider(ModelProvider):
"""
try:
model_instance = self.get_model_instance(ModelType.LLM)
bedrock_validate_model_name = credentials.get('model_for_validation', 'amazon.titan-text-lite-v1')
# Use `amazon.titan-text-lite-v1` model by default for validating credentials
model_for_validation = credentials.get('model_for_validation', 'amazon.titan-text-lite-v1')
model_instance.validate_credentials(
model=bedrock_validate_model_name,
model=model_for_validation,
credentials=credentials
)
except CredentialsValidateFailedError as ex:

View File

@@ -74,7 +74,7 @@ provider_credential_schema:
label:
en_US: Available Model Name
zh_Hans: 可用模型名称
type: text-input
type: secret-input
placeholder:
en_US: A model you have access to (e.g. amazon.titan-text-lite-v1) for validation.
zh_Hans: 为了进行验证,请输入一个您可用的模型名称 (例如amazon.titan-text-lite-v1)

View File

@@ -1,33 +1,50 @@
model: anthropic.claude-instant-v1
label:
en_US: Claude Instant V1
en_US: Claude Instant 1
model_type: llm
model_properties:
mode: chat
context_size: 100000
parameter_rules:
- name: temperature
use_template: temperature
- name: topP
use_template: top_p
- name: topK
label:
zh_Hans: 取样数量
en_US: Top K
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
default: 250
min: 0
max: 500
- name: max_tokens_to_sample
- name: max_tokens
use_template: max_tokens
required: true
type: int
default: 4096
min: 1
max: 4096
help:
zh_Hans: 停止前生成的最大令牌数。请注意Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
- name: temperature
use_template: temperature
required: false
type: float
default: 1
min: 0.0
max: 1.0
help:
zh_Hans: 生成内容的随机性。
en_US: The amount of randomness injected into the response.
- name: top_p
required: false
type: float
default: 0.999
min: 0.000
max: 1.000
help:
zh_Hans: 在核采样中Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p但不能同时更改两者。
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
- name: top_k
required: false
type: int
default: 0
min: 0
# tip docs from aws has error, max value is 500
max: 500
help:
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
pricing:
input: '0.0008'
output: '0.0024'

View File

@@ -1,33 +1,50 @@
model: anthropic.claude-v1
label:
en_US: Claude V1
en_US: Claude 1
model_type: llm
model_properties:
mode: chat
context_size: 100000
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top K
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
default: 250
min: 0
max: 500
- name: max_tokens_to_sample
- name: max_tokens
use_template: max_tokens
required: true
type: int
default: 4096
min: 1
max: 4096
help:
zh_Hans: 停止前生成的最大令牌数。请注意Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
- name: temperature
use_template: temperature
required: false
type: float
default: 1
min: 0.0
max: 1.0
help:
zh_Hans: 生成内容的随机性。
en_US: The amount of randomness injected into the response.
- name: top_p
required: false
type: float
default: 0.999
min: 0.000
max: 1.000
help:
zh_Hans: 在核采样中Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p但不能同时更改两者。
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
- name: top_k
required: false
type: int
default: 0
min: 0
# tip docs from aws has error, max value is 500
max: 500
help:
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
pricing:
input: '0.008'
output: '0.024'

View File

@@ -1,33 +1,50 @@
model: anthropic.claude-v2:1
label:
en_US: Claude V2.1
en_US: Claude 2.1
model_type: llm
model_properties:
mode: chat
context_size: 200000
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top K
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
default: 250
min: 0
max: 500
- name: max_tokens_to_sample
- name: max_tokens
use_template: max_tokens
required: true
type: int
default: 4096
min: 1
max: 4096
help:
zh_Hans: 停止前生成的最大令牌数。请注意Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
- name: temperature
use_template: temperature
required: false
type: float
default: 1
min: 0.0
max: 1.0
help:
zh_Hans: 生成内容的随机性。
en_US: The amount of randomness injected into the response.
- name: top_p
required: false
type: float
default: 0.999
min: 0.000
max: 1.000
help:
zh_Hans: 在核采样中Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p但不能同时更改两者。
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
- name: top_k
required: false
type: int
default: 0
min: 0
# tip docs from aws has error, max value is 500
max: 500
help:
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
pricing:
input: '0.008'
output: '0.024'

View File

@@ -1,33 +1,50 @@
model: anthropic.claude-v2
label:
en_US: Claude V2
en_US: Claude 2
model_type: llm
model_properties:
mode: chat
context_size: 100000
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top K
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
default: 250
min: 0
max: 500
- name: max_tokens_to_sample
- name: max_tokens
use_template: max_tokens
required: true
type: int
default: 4096
min: 1
max: 4096
help:
zh_Hans: 停止前生成的最大令牌数。请注意Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
- name: temperature
use_template: temperature
required: false
type: float
default: 1
min: 0.0
max: 1.0
help:
zh_Hans: 生成内容的随机性。
en_US: The amount of randomness injected into the response.
- name: top_p
required: false
type: float
default: 0.999
min: 0.000
max: 1.000
help:
zh_Hans: 在核采样中Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布,并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p但不能同时更改两者。
en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
- name: top_k
required: false
type: int
default: 0
min: 0
# tip docs from aws has error, max value is 500
max: 500
help:
zh_Hans: 对于每个后续标记,仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
pricing:
input: '0.008'
output: '0.024'

View File

@@ -72,16 +72,16 @@ class BedrockLargeLanguageModel(LargeLanguageModel):
:return: full response or stream response chunk generator result
"""
# invoke claude 3 models via anthropic official SDK
if "anthropic.claude-3" in model:
return self._invoke_claude3(model, credentials, prompt_messages, model_parameters, stop, stream, user)
# invoke model
# invoke anthropic models via anthropic official SDK
if "anthropic" in model:
return self._generate_anthropic(model, credentials, prompt_messages, model_parameters, stop, stream, user)
# invoke other models via boto3 client
return self._generate(model, credentials, prompt_messages, model_parameters, stop, stream, user)
def _invoke_claude3(self, model: str, credentials: dict, prompt_messages: list[PromptMessage], model_parameters: dict,
def _generate_anthropic(self, model: str, credentials: dict, prompt_messages: list[PromptMessage], model_parameters: dict,
stop: Optional[list[str]] = None, stream: bool = True, user: Optional[str] = None) -> Union[LLMResult, Generator]:
"""
Invoke Claude3 large language model
Invoke Anthropic large language model
:param model: model name
:param credentials: model credentials
@@ -114,7 +114,7 @@ class BedrockLargeLanguageModel(LargeLanguageModel):
# ref: https://github.com/anthropics/anthropic-sdk-python/blob/e84645b07ca5267066700a104b4d8d6a8da1383d/src/anthropic/resources/messages.py#L465
# extra_model_kwargs['metadata'] = message_create_params.Metadata(user_id=user)
system, prompt_message_dicts = self._convert_claude3_prompt_messages(prompt_messages)
system, prompt_message_dicts = self._convert_claude_prompt_messages(prompt_messages)
if system:
extra_model_kwargs['system'] = system
@@ -128,11 +128,11 @@ class BedrockLargeLanguageModel(LargeLanguageModel):
)
if stream:
return self._handle_claude3_stream_response(model, credentials, response, prompt_messages)
return self._handle_claude_stream_response(model, credentials, response, prompt_messages)
return self._handle_claude3_response(model, credentials, response, prompt_messages)
return self._handle_claude_response(model, credentials, response, prompt_messages)
def _handle_claude3_response(self, model: str, credentials: dict, response: Message,
def _handle_claude_response(self, model: str, credentials: dict, response: Message,
prompt_messages: list[PromptMessage]) -> LLMResult:
"""
Handle llm chat response
@@ -172,7 +172,7 @@ class BedrockLargeLanguageModel(LargeLanguageModel):
return response
def _handle_claude3_stream_response(self, model: str, credentials: dict, response: Stream[MessageStreamEvent],
def _handle_claude_stream_response(self, model: str, credentials: dict, response: Stream[MessageStreamEvent],
prompt_messages: list[PromptMessage], ) -> Generator:
"""
Handle llm chat stream response
@@ -231,7 +231,7 @@ class BedrockLargeLanguageModel(LargeLanguageModel):
except Exception as ex:
raise InvokeError(str(ex))
def _calc_claude3_response_usage(self, model: str, credentials: dict, prompt_tokens: int, completion_tokens: int) -> LLMUsage:
def _calc_claude_response_usage(self, model: str, credentials: dict, prompt_tokens: int, completion_tokens: int) -> LLMUsage:
"""
Calculate response usage
@@ -275,7 +275,7 @@ class BedrockLargeLanguageModel(LargeLanguageModel):
return usage
def _convert_claude3_prompt_messages(self, prompt_messages: list[PromptMessage]) -> tuple[str, list[dict]]:
def _convert_claude_prompt_messages(self, prompt_messages: list[PromptMessage]) -> tuple[str, list[dict]]:
"""
Convert prompt messages to dict list and system
"""
@@ -295,11 +295,11 @@ class BedrockLargeLanguageModel(LargeLanguageModel):
prompt_message_dicts = []
for message in prompt_messages:
if not isinstance(message, SystemPromptMessage):
prompt_message_dicts.append(self._convert_claude3_prompt_message_to_dict(message))
prompt_message_dicts.append(self._convert_claude_prompt_message_to_dict(message))
return system, prompt_message_dicts
def _convert_claude3_prompt_message_to_dict(self, message: PromptMessage) -> dict:
def _convert_claude_prompt_message_to_dict(self, message: PromptMessage) -> dict:
"""
Convert PromptMessage to dict
"""
@@ -405,7 +405,7 @@ class BedrockLargeLanguageModel(LargeLanguageModel):
if "anthropic.claude-3" in model:
try:
self._invoke_claude3(model=model,
self._invoke_claude(model=model,
credentials=credentials,
prompt_messages=[{"role": "user", "content": "ping"}],
model_parameters={},

View File

@@ -144,6 +144,16 @@ class MilvusVector(BaseVector):
utility.drop_collection(self._collection_name, None, using=alias)
def text_exists(self, id: str) -> bool:
alias = uuid4().hex
if self._client_config.secure:
uri = "https://" + str(self._client_config.host) + ":" + str(self._client_config.port)
else:
uri = "http://" + str(self._client_config.host) + ":" + str(self._client_config.port)
connections.connect(alias=alias, uri=uri, user=self._client_config.user, password=self._client_config.password)
from pymilvus import utility
if not utility.has_collection(self._collection_name, using=alias):
return False
result = self._client.query(collection_name=self._collection_name,
filter=f'metadata["doc_id"] == "{id}"',

View File

@@ -275,6 +275,13 @@ class QdrantVector(BaseVector):
)
def text_exists(self, id: str) -> bool:
all_collection_name = []
collections_response = self._client.get_collections()
collection_list = collections_response.collections
for collection in collection_list:
all_collection_name.append(collection.name)
if self._collection_name not in all_collection_name:
return False
response = self._client.retrieve(
collection_name=self._collection_name,
ids=[id]

View File

@@ -128,8 +128,8 @@ class Vector:
if kwargs.get('duplicate_check', False):
documents = self._filter_duplicate_texts(documents)
embeddings = self._embeddings.embed_documents([document.page_content for document in documents])
self._vector_processor.add_texts(
documents=documents,
self._vector_processor.create(
texts=documents,
embeddings=embeddings,
**kwargs
)

View File

@@ -134,6 +134,11 @@ class WeaviateVector(BaseVector):
def text_exists(self, id: str) -> bool:
collection_name = self._collection_name
schema = self._default_schema(self._collection_name)
# check whether the index already exists
if not self._client.schema.contains(schema):
return False
result = self._client.query.get(collection_name).with_additional(["id"]).with_where({
"path": ["doc_id"],
"operator": "Equal",

View File

@@ -53,7 +53,7 @@ class UnstructuredWordExtractor(BaseExtractor):
elements = partition_docx(filename=self._file_path)
from unstructured.chunking.title import chunk_by_title
chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=0)
chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=2000)
documents = []
for chunk in chunks:
text = chunk.text.strip()

View File

@@ -43,7 +43,7 @@ class UnstructuredEmailExtractor(BaseExtractor):
pass
from unstructured.chunking.title import chunk_by_title
chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=0)
chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=2000)
documents = []
for chunk in chunks:
text = chunk.text.strip()

View File

@@ -38,7 +38,7 @@ class UnstructuredMarkdownExtractor(BaseExtractor):
elements = partition_md(filename=self._file_path, api_url=self._api_url)
from unstructured.chunking.title import chunk_by_title
chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=0)
chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=2000)
documents = []
for chunk in chunks:
text = chunk.text.strip()

View File

@@ -28,7 +28,7 @@ class UnstructuredMsgExtractor(BaseExtractor):
elements = partition_msg(filename=self._file_path, api_url=self._api_url)
from unstructured.chunking.title import chunk_by_title
chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=0)
chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=2000)
documents = []
for chunk in chunks:
text = chunk.text.strip()

View File

@@ -28,7 +28,7 @@ class UnstructuredTextExtractor(BaseExtractor):
elements = partition_text(filename=self._file_path, api_url=self._api_url)
from unstructured.chunking.title import chunk_by_title
chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=0)
chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=2000)
documents = []
for chunk in chunks:
text = chunk.text.strip()

View File

@@ -28,7 +28,7 @@ class UnstructuredXmlExtractor(BaseExtractor):
elements = partition_xml(filename=self._file_path, xml_keep_tags=True, api_url=self._api_url)
from unstructured.chunking.title import chunk_by_title
chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=0)
chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=2000)
documents = []
for chunk in chunks:
text = chunk.text.strip()

View File

@@ -0,0 +1,4 @@
<?xml version="1.0" encoding="utf-8"?>
<svg width="800px" height="800px" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg">
<path fill-rule="evenodd" clip-rule="evenodd" d="M15.6111 1.5837C17.2678 1.34703 18.75 2.63255 18.75 4.30606V5.68256C19.9395 6.31131 20.75 7.56102 20.75 9.00004V19C20.75 21.0711 19.0711 22.75 17 22.75H7C4.92893 22.75 3.25 21.0711 3.25 19V5.00004C3.25 4.99074 3.25017 4.98148 3.2505 4.97227C3.25017 4.95788 3.25 4.94344 3.25 4.92897C3.25 4.02272 3.91638 3.25437 4.81353 3.12621L15.6111 1.5837ZM4.75 6.75004V19C4.75 20.2427 5.75736 21.25 7 21.25H17C18.2426 21.25 19.25 20.2427 19.25 19V9.00004C19.25 7.7574 18.2426 6.75004 17 6.75004H4.75ZM5.07107 5.25004H17.25V4.30606C17.25 3.54537 16.5763 2.96104 15.8232 3.06862L5.02566 4.61113C4.86749 4.63373 4.75 4.76919 4.75 4.92897C4.75 5.10629 4.89375 5.25004 5.07107 5.25004ZM7.25 12C7.25 11.5858 7.58579 11.25 8 11.25H16C16.4142 11.25 16.75 11.5858 16.75 12C16.75 12.4143 16.4142 12.75 16 12.75H8C7.58579 12.75 7.25 12.4143 7.25 12ZM7.25 15.5C7.25 15.0858 7.58579 14.75 8 14.75H13.5C13.9142 14.75 14.25 15.0858 14.25 15.5C14.25 15.9143 13.9142 16.25 13.5 16.25H8C7.58579 16.25 7.25 15.9143 7.25 15.5Z" fill="#1C274D"/>
</svg>

After

Width:  |  Height:  |  Size: 1.2 KiB

View File

@@ -0,0 +1,21 @@
from core.tools.errors import ToolProviderCredentialValidationError
from core.tools.provider.builtin.devdocs.tools.searchDevDocs import SearchDevDocsTool
from core.tools.provider.builtin_tool_provider import BuiltinToolProviderController
class DevDocsProvider(BuiltinToolProviderController):
def _validate_credentials(self, credentials: dict) -> None:
try:
SearchDevDocsTool().fork_tool_runtime(
meta={
"credentials": credentials,
}
).invoke(
user_id='',
tool_parameters={
"doc": "python~3.12",
"topic": "library/code",
},
)
except Exception as e:
raise ToolProviderCredentialValidationError(str(e))

View File

@@ -0,0 +1,10 @@
identity:
author: Richards Tu
name: devdocs
label:
en_US: DevDocs
zh_Hans: DevDocs
description:
en_US: Get official developer documentations on DevDocs.
zh_Hans: 从DevDocs获取官方开发者文档。
icon: icon.svg

View File

@@ -0,0 +1,42 @@
from typing import Any, Union
import requests
from pydantic import BaseModel, Field
from core.tools.entities.tool_entities import ToolInvokeMessage
from core.tools.tool.builtin_tool import BuiltinTool
class SearchDevDocsInput(BaseModel):
doc: str = Field(..., description="The name of the documentation.")
topic: str = Field(..., description="The path of the section/topic.")
class SearchDevDocsTool(BuiltinTool):
def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
"""
Invokes the DevDocs search tool with the given user ID and tool parameters.
Args:
user_id (str): The ID of the user invoking the tool.
tool_parameters (dict[str, Any]): The parameters for the tool, including 'doc' and 'topic'.
Returns:
ToolInvokeMessage | list[ToolInvokeMessage]: The result of the tool invocation, which can be a single message or a list of messages.
"""
doc = tool_parameters.get('doc', '')
topic = tool_parameters.get('topic', '')
if not doc:
return self.create_text_message('Please provide the documentation name.')
if not topic:
return self.create_text_message('Please provide the topic path.')
url = f"https://documents.devdocs.io/{doc}/{topic}.html"
response = requests.get(url)
if response.status_code == 200:
content = response.text
return self.create_text_message(self.summary(user_id=user_id, content=content))
else:
return self.create_text_message(f"Failed to retrieve the documentation. Status code: {response.status_code}")

View File

@@ -0,0 +1,34 @@
identity:
name: searchDevDocs
author: Richards Tu
label:
en_US: Search Developer Docs
zh_Hans: 搜索开发者文档
description:
human:
en_US: A tools for searching for a specific topic and path in DevDocs based on the provided documentation name and topic. Don't for get to add some shots in the system prompt; for example, the documentation name should be like \"vuex~4\", \"css\", or \"python~3.12\", while the topic should be like \"guide/actions\" for Vuex 4, \"display-box\" for CSS, or \"library/code\" for Python 3.12.
zh_Hans: 一个用于根据提供的文档名称和主题在DevDocs中搜索特定主题和路径的工具。不要忘记在系统提示词中添加一些示例例如文档名称应该是\"vuex~4\"、\"css\"或\"python~3.12\",而主题应该是\"guide/actions\"用于Vuex 4\"display-box\"用于CSS或\"library/code\"用于Python 3.12。
llm: A tools for searching for specific developer documentation in DevDocs based on the provided documentation name and topic.
parameters:
- name: doc
type: string
required: true
label:
en_US: Documentation name
zh_Hans: 文档名称
human_description:
en_US: The name of the documentation.
zh_Hans: 文档名称。
llm_description: The name of the documentation, such as \"vuex~4\", \"css\", or \"python~3.12\". The exact value should be identified by the user.
form: llm
- name: topic
type: string
required: true
label:
en_US: Topic name
zh_Hans: 主题名称
human_description:
en_US: The path of the section/topic.
zh_Hans: 文档主题的路径。
llm_description: The path of the section/topic, such as \"guide/actions\" for Vuex 4, \"display-box\" for CSS, or \"library/code\" for Python 3.12.
form: llm

View File

@@ -0,0 +1 @@
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 120 120"><style>.st0{fill:#376db6}.st1{fill:#4ca2da}.st2{fill:#91d8f4}.st3{fill:#1e5397}</style><path class="st0" d="M22.4 57.5h74.8v15.4H22.4z"/><path class="st1" d="M22.4 37.6h74.8V53H22.4z"/><path class="st2" d="M85.5 17H34.4c-6.6 0-12 5.5-12 12.3v4h74.8v-4C97.2 22.5 92 17 85.5 17z"/><path class="st3" d="M22.4 77.3v4c0 6.8 5.4 12.3 12 12.3h32v16.3l15.8-16.3h3.5c6.6 0 12-5.5 12-12.3v-4H22.4z"/></svg>

After

Width:  |  Height:  |  Size: 458 B

View File

@@ -0,0 +1,25 @@
from core.tools.errors import ToolProviderCredentialValidationError
from core.tools.provider.builtin.stackexchange.tools.searchStackExQuestions import SearchStackExQuestionsTool
from core.tools.provider.builtin_tool_provider import BuiltinToolProviderController
class StackExchangeProvider(BuiltinToolProviderController):
def _validate_credentials(self, credentials: dict) -> None:
try:
SearchStackExQuestionsTool().fork_tool_runtime(
meta={
"credentials": credentials,
}
).invoke(
user_id='',
tool_parameters={
"intitle": "Test",
"sort": "relevance",
"order": "desc",
"site": "stackoverflow",
"accepted": True,
"pagesize": 1
},
)
except Exception as e:
raise ToolProviderCredentialValidationError(str(e))

View File

@@ -0,0 +1,10 @@
identity:
author: Richards Tu
name: stackexchange
label:
en_US: Stack Exchange
zh_Hans: Stack Exchange
description:
en_US: Access questions and answers from the Stack Exchange and its sub-sites.
zh_Hans: 从Stack Exchange和其子论坛获取问题和答案。
icon: icon.svg

View File

@@ -0,0 +1,37 @@
from typing import Any, Union
import requests
from pydantic import BaseModel, Field
from core.tools.entities.tool_entities import ToolInvokeMessage
from core.tools.tool.builtin_tool import BuiltinTool
class FetchAnsByStackExQuesIDInput(BaseModel):
id: int = Field(..., description="The question ID")
site: str = Field(..., description="The Stack Exchange site")
order: str = Field(..., description="asc or desc")
sort: str = Field(..., description="activity, votes, creation")
pagesize: int = Field(..., description="Number of answers per page")
page: int = Field(..., description="Page number")
class FetchAnsByStackExQuesIDTool(BuiltinTool):
def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
input = FetchAnsByStackExQuesIDInput(**tool_parameters)
params = {
"site": input.site,
"filter": "!nNPvSNdWme",
"order": input.order,
"sort": input.sort,
"pagesize": input.pagesize,
"page": input.page
}
response = requests.get(f"https://api.stackexchange.com/2.3/questions/{input.id}/answers", params=params)
if response.status_code == 200:
return self.create_text_message(self.summary(user_id=user_id, content=response.text))
else:
return self.create_text_message(f"API request failed with status code {response.status_code}")

View File

@@ -0,0 +1,189 @@
identity:
name: fetchAnsByStackExQuesID
author: Richards Tu
label:
en_US: Fetch Stack Exchange Answers
zh_Hans: 获取 Stack Exchange 答案
description:
human:
en_US: A tool for retrieving answers for a specific Stack Exchange question ID. Specify the question ID, Stack Exchange site, sorting order, number of results per page, and page number. Must be used with the searchStackExQuesID tool.
zh_Hans: 用于检索特定Stack Exchange问题ID的答案的工具。指定问题ID、Stack Exchange站点、排序顺序、每页结果数和页码。 必须与searchStackExQuesID工具一起使用。
llm: A tool for retrieving answers for a specific Stack Exchange question ID based on the provided parameters.
parameters:
- name: id
type: string
required: true
label:
en_US: Question ID
zh_Hans: 问题ID
human_description:
en_US: The ID of the Stack Exchange question to fetch answers for.
zh_Hans: 要获取答案的Stack Exchange问题的ID。
llm_description: The ID of the Stack Exchange question.
form: llm
- name: site
type: string
required: true
label:
en_US: Stack Exchange site
zh_Hans: Stack Exchange站点
human_description:
en_US: The Stack Exchange site the question is from, e.g. stackoverflow, unix, etc.
zh_Hans: 问题所在的Stack Exchange站点例如stackoverflow、unix等。
llm_description: The Stack Exchange site identifier.
options:
- value: stackoverflow
label:
en_US: stackoverflow
- value: serverfault
label:
en_US: serverfault
- value: superuser
label:
en_US: superuser
- value: askubuntu
label:
en_US: askubuntu
- value: unix
label:
en_US: unix
- value: cs
label:
en_US: cs
- value: softwareengineering
label:
en_US: softwareengineering
- value: codegolf
label:
en_US: codegolf
- value: codereview
label:
en_US: codereview
- value: cstheory
label:
en_US: cstheory
- value: security
label:
en_US: security
- value: cryptography
label:
en_US: cryptography
- value: reverseengineering
label:
en_US: reverseengineering
- value: datascience
label:
en_US: datascience
- value: devops
label:
en_US: devops
- value: ux
label:
en_US: ux
- value: dba
label:
en_US: dba
- value: gis
label:
en_US: gis
- value: webmasters
label:
en_US: webmasters
- value: arduino
label:
en_US: arduino
- value: raspberrypi
label:
en_US: raspberrypi
- value: networkengineering
label:
en_US: networkengineering
- value: iot
label:
en_US: iot
- value: tor
label:
en_US: tor
- value: sqa
label:
en_US: sqa
- value: mathoverflow
label:
en_US: mathoverflow
- value: math
label:
en_US: math
- value: mathematica
label:
en_US: mathematica
- value: dsp
label:
en_US: dsp
- value: gamedev
label:
en_US: gamedev
- value: robotics
label:
en_US: robotics
- value: genai
label:
en_US: genai
- value: computergraphics
label:
en_US: computergraphics
form: form
- name: filter
type: string
required: true
label:
en_US: Filter
zh_Hans: 过滤器
human_description:
en_US: This is required in order to actually get the body of the answer.
zh_Hans: 为了实际获取答案的正文,这是必需的。
llm_description: Required in order to actually get the body of the answer. Must be \"!nNPvSNdWme\".
form: llm
- name: order
type: string
required: true
label:
en_US: Sort direction
zh_Hans: 排序方向
human_description:
en_US: The direction to sort the answers - ascending or descending.
zh_Hans: 答案的排序方向 - 升序或降序。
llm_description: asc for ascending, desc for descending.
form: llm
- name: sort
type: string
required: true
label:
en_US: Sort order
zh_Hans: 排序
human_description:
en_US: The sort order for the answers - activity, votes, or creation date.
zh_Hans: 答案的排序顺序 - 活动、投票或创建日期。
llm_description: activity, votes, or creation.
form: llm
- name: pagesize
type: number
required: true
label:
en_US: Results per page
zh_Hans: 每页结果数
human_description:
en_US: The number of answers to return per page.
zh_Hans: 每页返回的答案数。
llm_description: The number of answers per page.
form: llm
- name: page
type: number
required: true
label:
en_US: Page number
zh_Hans: 页码
human_description:
en_US: The page number of answers to retrieve.
zh_Hans: 要检索的答案的页码。
llm_description: The page number to retrieve.
form: llm

View File

@@ -0,0 +1,43 @@
from typing import Any, Union
import requests
from pydantic import BaseModel, Field
from core.tools.entities.tool_entities import ToolInvokeMessage
from core.tools.tool.builtin_tool import BuiltinTool
class SearchStackExQuestionsInput(BaseModel):
intitle: str = Field(..., description="The search query.")
sort: str = Field(..., description="The sort order - relevance, activity, votes, creation.")
order: str = Field(..., description="asc or desc")
site: str = Field(..., description="The Stack Exchange site.")
tagged: str = Field(None, description="Semicolon-separated tags to include.")
nottagged: str = Field(None, description="Semicolon-separated tags to exclude.")
accepted: bool = Field(..., description="true for only accepted answers, false otherwise")
pagesize: int = Field(..., description="Number of results per page")
class SearchStackExQuestionsTool(BuiltinTool):
def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
input = SearchStackExQuestionsInput(**tool_parameters)
params = {
"intitle": input.intitle,
"sort": input.sort,
"order": input.order,
"site": input.site,
"accepted": input.accepted,
"pagesize": input.pagesize
}
if input.tagged:
params["tagged"] = input.tagged
if input.nottagged:
params["nottagged"] = input.nottagged
response = requests.get("https://api.stackexchange.com/2.3/search", params=params)
if response.status_code == 200:
return self.create_text_message(self.summary(user_id=user_id, content=response.text))
else:
return self.create_text_message(f"API request failed with status code {response.status_code}")

View File

@@ -0,0 +1,200 @@
identity:
name: searchStackExQuestions
author: Richards Tu
label:
en_US: Search Stack Exchange Questions
zh_Hans: 搜索Stack Exchange问题
description:
human:
en_US: A tool for searching questions on a Stack Exchange site. Specify the search query, sorting order, tags to include or exclude, whether to search only for questions with accepted answers, the Stack Exchange site, and number of results per page.
zh_Hans: 在Stack Exchange站点上搜索问题的工具。指定搜索查询、排序顺序、要包含或排除的标签、是否仅搜索有已接受答案的问题、Stack Exchange站点以及每页结果数。
llm: A tool for searching questions on a Stack Exchange site based on the provided parameters.
parameters:
- name: intitle
type: string
required: true
label:
en_US: Search query
zh_Hans: 搜索查询
human_description:
en_US: The search query to use for finding questions.
zh_Hans: 用于查找问题的搜索查询。
llm_description: The search query to use.
form: llm
- name: sort
type: string
required: true
label:
en_US: Sort order
zh_Hans: 排序
human_description:
en_US: The sort order for the search results - relevance, activity, votes, or creation date.
zh_Hans: 搜索结果的排序顺序 - 相关性、活动、投票或创建日期。
llm_description: The sort order - relevance, activity, votes, or creation.
form: llm
- name: order
type: string
required: true
label:
en_US: Sort direction
zh_Hans: 排序方向
human_description:
en_US: The direction to sort - ascending or descending.
zh_Hans: 排序方向 - 升序或降序。
llm_description: asc for ascending, desc for descending.
form: llm
- name: site
type: string
required: true
label:
en_US: Stack Exchange site
zh_Hans: Stack Exchange 站点
human_description:
en_US: The Stack Exchange site to search, e.g. stackoverflow, unix, etc.
zh_Hans: 要搜索的Stack Exchange站点例如stackoverflow、unix等。
llm_description: The Stack Exchange site identifier.
options:
- value: stackoverflow
label:
en_US: stackoverflow
- value: serverfault
label:
en_US: serverfault
- value: superuser
label:
en_US: superuser
- value: askubuntu
label:
en_US: askubuntu
- value: unix
label:
en_US: unix
- value: cs
label:
en_US: cs
- value: softwareengineering
label:
en_US: softwareengineering
- value: codegolf
label:
en_US: codegolf
- value: codereview
label:
en_US: codereview
- value: cstheory
label:
en_US: cstheory
- value: security
label:
en_US: security
- value: cryptography
label:
en_US: cryptography
- value: reverseengineering
label:
en_US: reverseengineering
- value: datascience
label:
en_US: datascience
- value: devops
label:
en_US: devops
- value: ux
label:
en_US: ux
- value: dba
label:
en_US: dba
- value: gis
label:
en_US: gis
- value: webmasters
label:
en_US: webmasters
- value: arduino
label:
en_US: arduino
- value: raspberrypi
label:
en_US: raspberrypi
- value: networkengineering
label:
en_US: networkengineering
- value: iot
label:
en_US: iot
- value: tor
label:
en_US: tor
- value: sqa
label:
en_US: sqa
- value: mathoverflow
label:
en_US: mathoverflow
- value: math
label:
en_US: math
- value: mathematica
label:
en_US: mathematica
- value: dsp
label:
en_US: dsp
- value: gamedev
label:
en_US: gamedev
- value: robotics
label:
en_US: robotics
- value: genai
label:
en_US: genai
- value: computergraphics
label:
en_US: computergraphics
form: form
- name: tagged
type: string
required: false
label:
en_US: Include tags
zh_Hans: 包含标签
human_description:
en_US: A semicolon-separated list of tags that questions must have.
zh_Hans: 问题必须具有的标签的分号分隔列表。
llm_description: Semicolon-separated tags to include. Leave blank if not needed.
form: llm
- name: nottagged
type: string
required: false
label:
en_US: Exclude tags
zh_Hans: 排除标签
human_description:
en_US: A semicolon-separated list of tags to exclude from the search.
zh_Hans: 从搜索中排除的标签的分号分隔列表。
llm_description: Semicolon-separated tags to exclude. Leave blank if not needed.
form: llm
- name: accepted
type: boolean
required: true
label:
en_US: Has accepted answer
zh_Hans: 有已接受的答案
human_description:
en_US: Whether to limit to only questions that have an accepted answer.
zh_Hans: 是否限制为只有已接受答案的问题。
llm_description: true to limit to only questions with accepted answers, false otherwise.
form: llm
- name: pagesize
type: number
required: true
label:
en_US: Results per page
zh_Hans: 每页结果数
human_description:
en_US: The number of results to return per page.
zh_Hans: 每页返回的结果数。
llm_description: The number of results per page.
form: llm

View File

@@ -53,7 +53,7 @@ def login_required(func):
def decorated_view(*args, **kwargs):
auth_header = request.headers.get('Authorization')
admin_api_key_enable = os.getenv('ADMIN_API_KEY_ENABLE', default='False')
if admin_api_key_enable:
if admin_api_key_enable.lower() == 'true':
if auth_header:
if ' ' not in auth_header:
raise Unauthorized('Invalid Authorization header format. Expected \'Bearer <api-key>\' format.')

View File

@@ -435,11 +435,13 @@ class RegisterService:
if open_id is not None or provider is not None:
AccountService.link_account_integrate(provider, open_id, account)
if current_app.config['EDITION'] != 'SELF_HOSTED':
tenant = TenantService.create_tenant(f"{account.name}'s Workspace")
tenant = TenantService.create_tenant(f"{account.name}'s Workspace")
TenantService.create_tenant_member(tenant, account, role='owner')
account.current_tenant = tenant
TenantService.create_tenant_member(tenant, account, role='owner')
account.current_tenant = tenant
tenant_was_created.send(tenant)
db.session.commit()
except Exception as e:
@@ -447,8 +449,6 @@ class RegisterService:
logging.error(f'Register failed: {e}')
raise AccountRegisterError(f'Registration failed: {e}') from e
tenant_was_created.send(tenant)
return account
@classmethod
@@ -461,7 +461,6 @@ class RegisterService:
name = email.split('@')[0]
account = cls.register(email=email, name=name, language=language, status=AccountStatus.PENDING)
# Create new tenant member for invited tenant
TenantService.create_tenant_member(tenant, account, role)
TenantService.switch_tenant(account, tenant.id)

View File

@@ -9,6 +9,7 @@ import {
$getRoot,
TextNode,
} from 'lexical'
import { CodeNode } from '@lexical/code'
import { LexicalComposer } from '@lexical/react/LexicalComposer'
import { RichTextPlugin } from '@lexical/react/LexicalRichTextPlugin'
import { ContentEditable } from '@lexical/react/LexicalContentEditable'
@@ -97,6 +98,7 @@ const PromptEditor: FC<PromptEditorProps> = ({
const initialConfig = {
namespace: 'prompt-editor',
nodes: [
CodeNode,
CustomTextNode,
{
replace: TextNode,

View File

@@ -1,6 +1,6 @@
{
"name": "dify-web",
"version": "0.5.10",
"version": "0.5.11",
"private": true,
"scripts": {
"dev": "next dev",