Merge branch 'main' into feat/workflow

# Conflicts: # docker/docker-compose.yaml
2026-01-07 23:04:12 +00:00 · 2024-03-29 21:18:16 +08:00
parent de3b7e8815 1387f9b23e
commit a30a6dda63
40 changed files with 876 additions and 141 deletions
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -155,4 +155,4 @@ And that's it! Once your PR is merged, you will be featured as a contributor in

 ## Getting Help

-If you ever get stuck or got a burning question while contributing, simply shoot your queries our way via the related GitHub issue, or hop onto our [Discord](https://discord.gg/AhzKf7dNgk) for a quick chat. 
+If you ever get stuck or got a burning question while contributing, simply shoot your queries our way via the related GitHub issue, or hop onto our [Discord](https://discord.gg/8Tpq4AcN9c) for a quick chat. 
--- a/CONTRIBUTING_CN.md
+++ b/CONTRIBUTING_CN.md
@@ -152,4 +152,4 @@ Dify的后端使用Python编写，使用[Flask](https://flask.palletsprojects.co

 ## 获取帮助

-如果你在贡献过程中遇到困难或者有任何问题，可以通过相关的 GitHub 问题提出你的疑问，或者加入我们的 [Discord](https://discord.gg/AhzKf7dNgk) 进行快速交流。
+如果你在贡献过程中遇到困难或者有任何问题，可以通过相关的 GitHub 问题提出你的疑问，或者加入我们的 [Discord](https://discord.gg/8Tpq4AcN9c) 进行快速交流。
--- a/README.md
+++ b/README.md
@@ -131,7 +131,7 @@ At the same time, please consider supporting Dify by sharing it on social media

 ### Translations

-We are looking for contributors to help with translating Dify to languages other than Mandarin or English. If you are interested in helping, please see the [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) for more information, and leave us a comment in the `global-users` channel of our [Discord Community Server](https://discord.gg/AhzKf7dNgk).
+We are looking for contributors to help with translating Dify to languages other than Mandarin or English. If you are interested in helping, please see the [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) for more information, and leave us a comment in the `global-users` channel of our [Discord Community Server](https://discord.gg/8Tpq4AcN9c).

 ## Community & Support

--- a/api/commands.py
+++ b/api/commands.py
@@ -109,19 +109,20 @@ def reset_encrypt_key_pair():
        click.echo(click.style('Sorry, only support SELF_HOSTED mode.', fg='red'))
        return

-    tenant = db.session.query(Tenant).first()
-    if not tenant:
-        click.echo(click.style('Sorry, no workspace found. Please enter /install to initialize.', fg='red'))
-        return
+    tenants = db.session.query(Tenant).all()
+    for tenant in tenants:
+        if not tenant:
+            click.echo(click.style('Sorry, no workspace found. Please enter /install to initialize.', fg='red'))
+            return

-    tenant.encrypt_public_key = generate_key_pair(tenant.id)
+        tenant.encrypt_public_key = generate_key_pair(tenant.id)

-    db.session.query(Provider).filter(Provider.provider_type == 'custom').delete()
-    db.session.query(ProviderModel).delete()
-    db.session.commit()
+        db.session.query(Provider).filter(Provider.provider_type == 'custom', Provider.tenant_id == tenant.id).delete()
+        db.session.query(ProviderModel).filter(ProviderModel.tenant_id == tenant.id).delete()
+        db.session.commit()

-    click.echo(click.style('Congratulations! '
-                           'the asymmetric key pair of workspace {} has been reset.'.format(tenant.id), fg='green'))
+        click.echo(click.style('Congratulations! '
+                               'the asymmetric key pair of workspace {} has been reset.'.format(tenant.id), fg='green'))


@click.command('vdb-migrate', help='migrate vector db.')
--- a/api/config.py
+++ b/api/config.py
@@ -97,7 +97,7 @@ class Config:
        # ------------------------
        # General Configurations.
        # ------------------------
-        self.CURRENT_VERSION = "0.5.10"
+        self.CURRENT_VERSION = "0.5.11"
        self.COMMIT_SHA = get_env('COMMIT_SHA')
        self.EDITION = "SELF_HOSTED"
        self.DEPLOY_ENV = get_env('DEPLOY_ENV')
--- a/api/core/extension/extensible.py
+++ b/api/core/extension/extensible.py
@@ -1,4 +1,5 @@
 import enum
+import importlib
 import json
 import logging
 import os
@@ -6,7 +7,6 @@ from typing import Any, Optional

 from pydantic import BaseModel

-from core.utils.module_import_helper import load_single_subclass_from_source
 from core.utils.position_helper import sort_to_dict_by_position_map


@@ -73,9 +73,17 @@ class Extensible:

                # Dynamic loading {subdir_name}.py file and find the subclass of Extensible
                py_path = os.path.join(subdir_path, extension_name + '.py')
-                try:
-                    extension_class = load_single_subclass_from_source(extension_name, py_path, cls)
-                except Exception:
+                spec = importlib.util.spec_from_file_location(extension_name, py_path)
+                mod = importlib.util.module_from_spec(spec)
+                spec.loader.exec_module(mod)
+
+                extension_class = None
+                for name, obj in vars(mod).items():
+                    if isinstance(obj, type) and issubclass(obj, cls) and obj != cls:
+                        extension_class = obj
+                        break
+
+                if not extension_class:
                    logging.warning(f"Missing subclass of {cls.__name__} in {py_path}, Skip.")
                    continue

--- a/api/core/indexing_runner.py
+++ b/api/core/indexing_runner.py
@@ -1,3 +1,4 @@
+import concurrent.futures
 import datetime
 import json
 import logging
@@ -650,17 +651,44 @@ class IndexingRunner:
        # chunk nodes by chunk size
        indexing_start_at = time.perf_counter()
        tokens = 0
-        chunk_size = 100
+        chunk_size = 10

        embedding_model_type_instance = None
        if embedding_model_instance:
            embedding_model_type_instance = embedding_model_instance.model_type_instance
            embedding_model_type_instance = cast(TextEmbeddingModel, embedding_model_type_instance)
+        with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
+            futures = []
+            for i in range(0, len(documents), chunk_size):
+                chunk_documents = documents[i:i + chunk_size]
+                futures.append(executor.submit(self._process_chunk, current_app._get_current_object(), index_processor,
+                                               chunk_documents, dataset,
+                                               dataset_document, embedding_model_instance,
+                                               embedding_model_type_instance))

-        for i in range(0, len(documents), chunk_size):
+            for future in futures:
+                tokens += future.result()
+
+        indexing_end_at = time.perf_counter()
+
+        # update document status to completed
+        self._update_document_index_status(
+            document_id=dataset_document.id,
+            after_indexing_status="completed",
+            extra_update_params={
+                DatasetDocument.tokens: tokens,
+                DatasetDocument.completed_at: datetime.datetime.utcnow(),
+                DatasetDocument.indexing_latency: indexing_end_at - indexing_start_at,
+            }
+        )
+
+    def _process_chunk(self, flask_app, index_processor, chunk_documents, dataset, dataset_document,
+                       embedding_model_instance, embedding_model_type_instance):
+        with flask_app.app_context():
            # check document is paused
            self._check_document_paused_status(dataset_document.id)
-            chunk_documents = documents[i:i + chunk_size]
+
+            tokens = 0
            if dataset.indexing_technique == 'high_quality' or embedding_model_type_instance:
                tokens += sum(
                    embedding_model_type_instance.get_num_tokens(
@@ -670,9 +698,9 @@ class IndexingRunner:
                    )
                    for document in chunk_documents
                )
+
            # load index
            index_processor.load(dataset, chunk_documents)
-            db.session.add(dataset)

            document_ids = [document.metadata['doc_id'] for document in chunk_documents]
            db.session.query(DocumentSegment).filter(
@@ -687,18 +715,7 @@ class IndexingRunner:

            db.session.commit()

-        indexing_end_at = time.perf_counter()
-
-        # update document status to completed
-        self._update_document_index_status(
-            document_id=dataset_document.id,
-            after_indexing_status="completed",
-            extra_update_params={
-                DatasetDocument.tokens: tokens,
-                DatasetDocument.completed_at: datetime.datetime.utcnow(),
-                DatasetDocument.indexing_latency: indexing_end_at - indexing_start_at,
-            }
-        )
+            return tokens

    def _check_document_paused_status(self, document_id: str):
        indexing_cache_key = 'document_{}_is_paused'.format(document_id)
--- a/api/core/model_runtime/model_providers/bedrock/bedrock.py
+++ b/api/core/model_runtime/model_providers/bedrock/bedrock.py
@@ -17,9 +17,11 @@ class BedrockProvider(ModelProvider):
        """
        try:
            model_instance = self.get_model_instance(ModelType.LLM)
-            bedrock_validate_model_name = credentials.get('model_for_validation', 'amazon.titan-text-lite-v1')
+
+            # Use `amazon.titan-text-lite-v1` model by default for validating credentials
+            model_for_validation = credentials.get('model_for_validation', 'amazon.titan-text-lite-v1')
            model_instance.validate_credentials(
-                model=bedrock_validate_model_name,
+                model=model_for_validation,
                credentials=credentials
            )
        except CredentialsValidateFailedError as ex:
--- a/api/core/model_runtime/model_providers/bedrock/bedrock.yaml
+++ b/api/core/model_runtime/model_providers/bedrock/bedrock.yaml
@@ -74,7 +74,7 @@ provider_credential_schema:
      label:
        en_US: Available Model Name
        zh_Hans: 可用模型名称
-      type: text-input
+      type: secret-input
      placeholder:
        en_US: A model you have access to (e.g. amazon.titan-text-lite-v1) for validation.
        zh_Hans: 为了进行验证，请输入一个您可用的模型名称 (例如：amazon.titan-text-lite-v1)
--- a/api/core/model_runtime/model_providers/bedrock/llm/anthropic.claude-instant-v1.yaml
+++ b/api/core/model_runtime/model_providers/bedrock/llm/anthropic.claude-instant-v1.yaml
@@ -1,33 +1,50 @@
 model: anthropic.claude-instant-v1
 label:
-  en_US: Claude Instant V1
+  en_US: Claude Instant 1
 model_type: llm
 model_properties:
  mode: chat
  context_size: 100000
 parameter_rules:
-  - name: temperature
-    use_template: temperature
-  - name: topP
-    use_template: top_p
-  - name: topK
-    label:
-      zh_Hans: 取样数量
-      en_US: Top K
-    type: int
-    help:
-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
-      en_US: Only sample from the top K options for each subsequent token.
-    required: false
-    default: 250
-    min: 0
-    max: 500
-  - name: max_tokens_to_sample
+  - name: max_tokens
    use_template: max_tokens
    required: true
+    type: int
    default: 4096
    min: 1
    max: 4096
+    help:
+      zh_Hans: 停止前生成的最大令牌数。请注意，Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
+      en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
+  - name: temperature
+    use_template: temperature
+    required: false
+    type: float
+    default: 1
+    min: 0.0
+    max: 1.0
+    help:
+      zh_Hans: 生成内容的随机性。
+      en_US: The amount of randomness injected into the response.
+  - name: top_p
+    required: false
+    type: float
+    default: 0.999
+    min: 0.000
+    max: 1.000
+    help:
+      zh_Hans: 在核采样中，Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布，并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p，但不能同时更改两者。
+      en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
+  - name: top_k
+    required: false
+    type: int
+    default: 0
+    min: 0
+    # tip docs from aws has error, max value is 500
+    max: 500
+    help:
+      zh_Hans: 对于每个后续标记，仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
+      en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
 pricing:
  input: '0.0008'
  output: '0.0024'
--- a/api/core/model_runtime/model_providers/bedrock/llm/anthropic.claude-v1.yaml
+++ b/api/core/model_runtime/model_providers/bedrock/llm/anthropic.claude-v1.yaml
@@ -1,33 +1,50 @@
 model: anthropic.claude-v1
 label:
-  en_US: Claude V1
+  en_US: Claude 1
 model_type: llm
 model_properties:
  mode: chat
  context_size: 100000
 parameter_rules:
-  - name: temperature
-    use_template: temperature
-  - name: top_p
-    use_template: top_p
-  - name: top_k
-    label:
-      zh_Hans: 取样数量
-      en_US: Top K
-    type: int
-    help:
-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
-      en_US: Only sample from the top K options for each subsequent token.
-    required: false
-    default: 250
-    min: 0
-    max: 500
-  - name: max_tokens_to_sample
+  - name: max_tokens
    use_template: max_tokens
    required: true
+    type: int
    default: 4096
    min: 1
    max: 4096
+    help:
+      zh_Hans: 停止前生成的最大令牌数。请注意，Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
+      en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
+  - name: temperature
+    use_template: temperature
+    required: false
+    type: float
+    default: 1
+    min: 0.0
+    max: 1.0
+    help:
+      zh_Hans: 生成内容的随机性。
+      en_US: The amount of randomness injected into the response.
+  - name: top_p
+    required: false
+    type: float
+    default: 0.999
+    min: 0.000
+    max: 1.000
+    help:
+      zh_Hans: 在核采样中，Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布，并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p，但不能同时更改两者。
+      en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
+  - name: top_k
+    required: false
+    type: int
+    default: 0
+    min: 0
+    # tip docs from aws has error, max value is 500
+    max: 500
+    help:
+      zh_Hans: 对于每个后续标记，仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
+      en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
 pricing:
  input: '0.008'
  output: '0.024'
--- a/api/core/model_runtime/model_providers/bedrock/llm/anthropic.claude-v2.1.yaml
+++ b/api/core/model_runtime/model_providers/bedrock/llm/anthropic.claude-v2.1.yaml
@@ -1,33 +1,50 @@
 model: anthropic.claude-v2:1
 label:
-  en_US: Claude V2.1
+  en_US: Claude 2.1
 model_type: llm
 model_properties:
  mode: chat
  context_size: 200000
 parameter_rules:
-  - name: temperature
-    use_template: temperature
-  - name: top_p
-    use_template: top_p
-  - name: top_k
-    label:
-      zh_Hans: 取样数量
-      en_US: Top K
-    type: int
-    help:
-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
-      en_US: Only sample from the top K options for each subsequent token.
-    required: false
-    default: 250
-    min: 0
-    max: 500
-  - name: max_tokens_to_sample
+  - name: max_tokens
    use_template: max_tokens
    required: true
+    type: int
    default: 4096
    min: 1
    max: 4096
+    help:
+      zh_Hans: 停止前生成的最大令牌数。请注意，Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
+      en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
+  - name: temperature
+    use_template: temperature
+    required: false
+    type: float
+    default: 1
+    min: 0.0
+    max: 1.0
+    help:
+      zh_Hans: 生成内容的随机性。
+      en_US: The amount of randomness injected into the response.
+  - name: top_p
+    required: false
+    type: float
+    default: 0.999
+    min: 0.000
+    max: 1.000
+    help:
+      zh_Hans: 在核采样中，Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布，并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p，但不能同时更改两者。
+      en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
+  - name: top_k
+    required: false
+    type: int
+    default: 0
+    min: 0
+    # tip docs from aws has error, max value is 500
+    max: 500
+    help:
+      zh_Hans: 对于每个后续标记，仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
+      en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
 pricing:
  input: '0.008'
  output: '0.024'
--- a/api/core/model_runtime/model_providers/bedrock/llm/anthropic.claude-v2.yaml
+++ b/api/core/model_runtime/model_providers/bedrock/llm/anthropic.claude-v2.yaml
@@ -1,33 +1,50 @@
 model: anthropic.claude-v2
 label:
-  en_US: Claude V2
+  en_US: Claude 2
 model_type: llm
 model_properties:
  mode: chat
  context_size: 100000
 parameter_rules:
-  - name: temperature
-    use_template: temperature
-  - name: top_p
-    use_template: top_p
-  - name: top_k
-    label:
-      zh_Hans: 取样数量
-      en_US: Top K
-    type: int
-    help:
-      zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
-      en_US: Only sample from the top K options for each subsequent token.
-    required: false
-    default: 250
-    min: 0
-    max: 500
-  - name: max_tokens_to_sample
+  - name: max_tokens
    use_template: max_tokens
    required: true
+    type: int
    default: 4096
    min: 1
    max: 4096
+    help:
+      zh_Hans: 停止前生成的最大令牌数。请注意，Anthropic Claude 模型可能会在达到 max_tokens 的值之前停止生成令牌。不同的 Anthropic Claude 模型对此参数具有不同的最大值。
+      en_US: The maximum number of tokens to generate before stopping. Note that Anthropic Claude models might stop generating tokens before reaching the value of max_tokens. Different Anthropic Claude models have different maximum values for this parameter.
+  - name: temperature
+    use_template: temperature
+    required: false
+    type: float
+    default: 1
+    min: 0.0
+    max: 1.0
+    help:
+      zh_Hans: 生成内容的随机性。
+      en_US: The amount of randomness injected into the response.
+  - name: top_p
+    required: false
+    type: float
+    default: 0.999
+    min: 0.000
+    max: 1.000
+    help:
+      zh_Hans: 在核采样中，Anthropic Claude 按概率递减顺序计算每个后续标记的所有选项的累积分布，并在达到 top_p 指定的特定概率时将其切断。您应该更改温度或top_p，但不能同时更改两者。
+      en_US: In nucleus sampling, Anthropic Claude computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cuts it off once it reaches a particular probability specified by top_p. You should alter either temperature or top_p, but not both.
+  - name: top_k
+    required: false
+    type: int
+    default: 0
+    min: 0
+    # tip docs from aws has error, max value is 500
+    max: 500
+    help:
+      zh_Hans: 对于每个后续标记，仅从前 K 个选项中进行采样。使用 top_k 删除长尾低概率响应。
+      en_US: Only sample from the top K options for each subsequent token. Use top_k to remove long tail low probability responses.
 pricing:
  input: '0.008'
  output: '0.024'
--- a/api/core/model_runtime/model_providers/bedrock/llm/llm.py
+++ b/api/core/model_runtime/model_providers/bedrock/llm/llm.py
@@ -72,16 +72,16 @@ class BedrockLargeLanguageModel(LargeLanguageModel):
        :return: full response or stream response chunk generator result
        """

-        # invoke claude 3 models via anthropic official SDK
-        if "anthropic.claude-3" in model:
-            return self._invoke_claude3(model, credentials, prompt_messages, model_parameters, stop, stream, user)
-        # invoke model
+        # invoke anthropic models via anthropic official SDK
+        if "anthropic" in model:
+            return self._generate_anthropic(model, credentials, prompt_messages, model_parameters, stop, stream, user)
+        # invoke other models via boto3 client
        return self._generate(model, credentials, prompt_messages, model_parameters, stop, stream, user)

-    def _invoke_claude3(self, model: str, credentials: dict, prompt_messages: list[PromptMessage], model_parameters: dict,
+    def _generate_anthropic(self, model: str, credentials: dict, prompt_messages: list[PromptMessage], model_parameters: dict,
                stop: Optional[list[str]] = None, stream: bool = True, user: Optional[str] = None) -> Union[LLMResult, Generator]:
        """
-        Invoke Claude3 large language model
+        Invoke Anthropic large language model

        :param model: model name
        :param credentials: model credentials
@@ -114,7 +114,7 @@ class BedrockLargeLanguageModel(LargeLanguageModel):
            # ref: https://github.com/anthropics/anthropic-sdk-python/blob/e84645b07ca5267066700a104b4d8d6a8da1383d/src/anthropic/resources/messages.py#L465
            # extra_model_kwargs['metadata'] = message_create_params.Metadata(user_id=user)

-        system, prompt_message_dicts = self._convert_claude3_prompt_messages(prompt_messages)
+        system, prompt_message_dicts = self._convert_claude_prompt_messages(prompt_messages)

        if system:
            extra_model_kwargs['system'] = system
@@ -128,11 +128,11 @@ class BedrockLargeLanguageModel(LargeLanguageModel):
        )

        if stream:
-            return self._handle_claude3_stream_response(model, credentials, response, prompt_messages)
+            return self._handle_claude_stream_response(model, credentials, response, prompt_messages)

-        return self._handle_claude3_response(model, credentials, response, prompt_messages)
+        return self._handle_claude_response(model, credentials, response, prompt_messages)

-    def _handle_claude3_response(self, model: str, credentials: dict, response: Message,
+    def _handle_claude_response(self, model: str, credentials: dict, response: Message,
                                prompt_messages: list[PromptMessage]) -> LLMResult:
        """
        Handle llm chat response
@@ -172,7 +172,7 @@ class BedrockLargeLanguageModel(LargeLanguageModel):

        return response

-    def _handle_claude3_stream_response(self, model: str, credentials: dict, response: Stream[MessageStreamEvent],
+    def _handle_claude_stream_response(self, model: str, credentials: dict, response: Stream[MessageStreamEvent],
                                        prompt_messages: list[PromptMessage], ) -> Generator:
        """
        Handle llm chat stream response
@@ -231,7 +231,7 @@ class BedrockLargeLanguageModel(LargeLanguageModel):
        except Exception as ex:
            raise InvokeError(str(ex))

-    def _calc_claude3_response_usage(self, model: str, credentials: dict, prompt_tokens: int, completion_tokens: int) -> LLMUsage:
+    def _calc_claude_response_usage(self, model: str, credentials: dict, prompt_tokens: int, completion_tokens: int) -> LLMUsage:
        """
        Calculate response usage

@@ -275,7 +275,7 @@ class BedrockLargeLanguageModel(LargeLanguageModel):

        return usage

-    def _convert_claude3_prompt_messages(self, prompt_messages: list[PromptMessage]) -> tuple[str, list[dict]]:
+    def _convert_claude_prompt_messages(self, prompt_messages: list[PromptMessage]) -> tuple[str, list[dict]]:
        """
        Convert prompt messages to dict list and system
        """
@@ -295,11 +295,11 @@ class BedrockLargeLanguageModel(LargeLanguageModel):
        prompt_message_dicts = []
        for message in prompt_messages:
            if not isinstance(message, SystemPromptMessage):
-                prompt_message_dicts.append(self._convert_claude3_prompt_message_to_dict(message))
+                prompt_message_dicts.append(self._convert_claude_prompt_message_to_dict(message))

        return system, prompt_message_dicts

-    def _convert_claude3_prompt_message_to_dict(self, message: PromptMessage) -> dict:
+    def _convert_claude_prompt_message_to_dict(self, message: PromptMessage) -> dict:
        """
        Convert PromptMessage to dict
        """
@@ -405,7 +405,7 @@ class BedrockLargeLanguageModel(LargeLanguageModel):

        if "anthropic.claude-3" in model:
            try:
-                self._invoke_claude3(model=model,
+                self._invoke_claude(model=model,
                                        credentials=credentials,
                                        prompt_messages=[{"role": "user", "content": "ping"}],
                                        model_parameters={},
--- a/api/core/rag/datasource/vdb/milvus/milvus_vector.py
+++ b/api/core/rag/datasource/vdb/milvus/milvus_vector.py
@@ -144,6 +144,16 @@ class MilvusVector(BaseVector):
            utility.drop_collection(self._collection_name, None, using=alias)

    def text_exists(self, id: str) -> bool:
+        alias = uuid4().hex
+        if self._client_config.secure:
+            uri = "https://" + str(self._client_config.host) + ":" + str(self._client_config.port)
+        else:
+            uri = "http://" + str(self._client_config.host) + ":" + str(self._client_config.port)
+        connections.connect(alias=alias, uri=uri, user=self._client_config.user, password=self._client_config.password)
+
+        from pymilvus import utility
+        if not utility.has_collection(self._collection_name, using=alias):
+            return False

        result = self._client.query(collection_name=self._collection_name,
                                    filter=f'metadata["doc_id"] == "{id}"',
--- a/api/core/rag/datasource/vdb/qdrant/qdrant_vector.py
+++ b/api/core/rag/datasource/vdb/qdrant/qdrant_vector.py
@@ -275,6 +275,13 @@ class QdrantVector(BaseVector):
            )

    def text_exists(self, id: str) -> bool:
+        all_collection_name = []
+        collections_response = self._client.get_collections()
+        collection_list = collections_response.collections
+        for collection in collection_list:
+            all_collection_name.append(collection.name)
+        if self._collection_name not in all_collection_name:
+            return False
        response = self._client.retrieve(
            collection_name=self._collection_name,
            ids=[id]
--- a/api/core/rag/datasource/vdb/vector_factory.py
+++ b/api/core/rag/datasource/vdb/vector_factory.py
@@ -128,8 +128,8 @@ class Vector:
        if kwargs.get('duplicate_check', False):
            documents = self._filter_duplicate_texts(documents)
        embeddings = self._embeddings.embed_documents([document.page_content for document in documents])
-        self._vector_processor.add_texts(
-            documents=documents,
+        self._vector_processor.create(
+            texts=documents,
            embeddings=embeddings,
            **kwargs
        )
--- a/api/core/rag/datasource/vdb/weaviate/weaviate_vector.py
+++ b/api/core/rag/datasource/vdb/weaviate/weaviate_vector.py
@@ -134,6 +134,11 @@ class WeaviateVector(BaseVector):

    def text_exists(self, id: str) -> bool:
        collection_name = self._collection_name
+        schema = self._default_schema(self._collection_name)
+
+        # check whether the index already exists
+        if not self._client.schema.contains(schema):
+            return False
        result = self._client.query.get(collection_name).with_additional(["id"]).with_where({
            "path": ["doc_id"],
            "operator": "Equal",
--- a/api/core/rag/extractor/unstructured/unstructured_doc_extractor.py
+++ b/api/core/rag/extractor/unstructured/unstructured_doc_extractor.py
@@ -53,7 +53,7 @@ class UnstructuredWordExtractor(BaseExtractor):
            elements = partition_docx(filename=self._file_path)

        from unstructured.chunking.title import chunk_by_title
-        chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=0)
+        chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=2000)
        documents = []
        for chunk in chunks:
            text = chunk.text.strip()
--- a/api/core/rag/extractor/unstructured/unstructured_eml_extractor.py
+++ b/api/core/rag/extractor/unstructured/unstructured_eml_extractor.py
@@ -43,7 +43,7 @@ class UnstructuredEmailExtractor(BaseExtractor):
            pass

        from unstructured.chunking.title import chunk_by_title
-        chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=0)
+        chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=2000)
        documents = []
        for chunk in chunks:
            text = chunk.text.strip()
--- a/api/core/rag/extractor/unstructured/unstructured_markdown_extractor.py
+++ b/api/core/rag/extractor/unstructured/unstructured_markdown_extractor.py
@@ -38,7 +38,7 @@ class UnstructuredMarkdownExtractor(BaseExtractor):

        elements = partition_md(filename=self._file_path, api_url=self._api_url)
        from unstructured.chunking.title import chunk_by_title
-        chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=0)
+        chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=2000)
        documents = []
        for chunk in chunks:
            text = chunk.text.strip()
--- a/api/core/rag/extractor/unstructured/unstructured_msg_extractor.py
+++ b/api/core/rag/extractor/unstructured/unstructured_msg_extractor.py
@@ -28,7 +28,7 @@ class UnstructuredMsgExtractor(BaseExtractor):

        elements = partition_msg(filename=self._file_path, api_url=self._api_url)
        from unstructured.chunking.title import chunk_by_title
-        chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=0)
+        chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=2000)
        documents = []
        for chunk in chunks:
            text = chunk.text.strip()
--- a/api/core/rag/extractor/unstructured/unstructured_text_extractor.py
+++ b/api/core/rag/extractor/unstructured/unstructured_text_extractor.py
@@ -28,7 +28,7 @@ class UnstructuredTextExtractor(BaseExtractor):

        elements = partition_text(filename=self._file_path, api_url=self._api_url)
        from unstructured.chunking.title import chunk_by_title
-        chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=0)
+        chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=2000)
        documents = []
        for chunk in chunks:
            text = chunk.text.strip()
--- a/api/core/rag/extractor/unstructured/unstructured_xml_extractor.py
+++ b/api/core/rag/extractor/unstructured/unstructured_xml_extractor.py
@@ -28,7 +28,7 @@ class UnstructuredXmlExtractor(BaseExtractor):

        elements = partition_xml(filename=self._file_path, xml_keep_tags=True, api_url=self._api_url)
        from unstructured.chunking.title import chunk_by_title
-        chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=0)
+        chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=2000)
        documents = []
        for chunk in chunks:
            text = chunk.text.strip()
--- a/api/core/tools/provider/builtin/devdocs/_assets/icon.svg
+++ b/api/core/tools/provider/builtin/devdocs/_assets/icon.svg
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="utf-8"?>
+<svg width="800px" height="800px" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg">
+<path fill-rule="evenodd" clip-rule="evenodd" d="M15.6111 1.5837C17.2678 1.34703 18.75 2.63255 18.75 4.30606V5.68256C19.9395 6.31131 20.75 7.56102 20.75 9.00004V19C20.75 21.0711 19.0711 22.75 17 22.75H7C4.92893 22.75 3.25 21.0711 3.25 19V5.00004C3.25 4.99074 3.25017 4.98148 3.2505 4.97227C3.25017 4.95788 3.25 4.94344 3.25 4.92897C3.25 4.02272 3.91638 3.25437 4.81353 3.12621L15.6111 1.5837ZM4.75 6.75004V19C4.75 20.2427 5.75736 21.25 7 21.25H17C18.2426 21.25 19.25 20.2427 19.25 19V9.00004C19.25 7.7574 18.2426 6.75004 17 6.75004H4.75ZM5.07107 5.25004H17.25V4.30606C17.25 3.54537 16.5763 2.96104 15.8232 3.06862L5.02566 4.61113C4.86749 4.63373 4.75 4.76919 4.75 4.92897C4.75 5.10629 4.89375 5.25004 5.07107 5.25004ZM7.25 12C7.25 11.5858 7.58579 11.25 8 11.25H16C16.4142 11.25 16.75 11.5858 16.75 12C16.75 12.4143 16.4142 12.75 16 12.75H8C7.58579 12.75 7.25 12.4143 7.25 12ZM7.25 15.5C7.25 15.0858 7.58579 14.75 8 14.75H13.5C13.9142 14.75 14.25 15.0858 14.25 15.5C14.25 15.9143 13.9142 16.25 13.5 16.25H8C7.58579 16.25 7.25 15.9143 7.25 15.5Z" fill="#1C274D"/>
+</svg>
--- a/api/core/tools/provider/builtin/devdocs/devdocs.py
+++ b/api/core/tools/provider/builtin/devdocs/devdocs.py
@@ -0,0 +1,21 @@
+from core.tools.errors import ToolProviderCredentialValidationError
+from core.tools.provider.builtin.devdocs.tools.searchDevDocs import SearchDevDocsTool
+from core.tools.provider.builtin_tool_provider import BuiltinToolProviderController
+
+
+class DevDocsProvider(BuiltinToolProviderController):
+    def _validate_credentials(self, credentials: dict) -> None:
+        try:
+            SearchDevDocsTool().fork_tool_runtime(
+                meta={
+                    "credentials": credentials,
+                }
+            ).invoke(
+                user_id='',
+                tool_parameters={
+                    "doc": "python~3.12",
+                    "topic": "library/code",
+                },
+            )
+        except Exception as e:
+            raise ToolProviderCredentialValidationError(str(e))
--- a/api/core/tools/provider/builtin/devdocs/devdocs.yaml
+++ b/api/core/tools/provider/builtin/devdocs/devdocs.yaml
@@ -0,0 +1,10 @@
+identity:
+  author: Richards Tu
+  name: devdocs
+  label:
+    en_US: DevDocs
+    zh_Hans: DevDocs
+  description:
+    en_US: Get official developer documentations on DevDocs.
+    zh_Hans: 从DevDocs获取官方开发者文档。
+  icon: icon.svg
--- a/api/core/tools/provider/builtin/devdocs/tools/searchDevDocs.py
+++ b/api/core/tools/provider/builtin/devdocs/tools/searchDevDocs.py
@@ -0,0 +1,42 @@
+from typing import Any, Union
+
+import requests
+from pydantic import BaseModel, Field
+
+from core.tools.entities.tool_entities import ToolInvokeMessage
+from core.tools.tool.builtin_tool import BuiltinTool
+
+
+class SearchDevDocsInput(BaseModel):
+    doc: str = Field(..., description="The name of the documentation.")
+    topic: str = Field(..., description="The path of the section/topic.")
+
+
+class SearchDevDocsTool(BuiltinTool):
+    def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
+        """
+        Invokes the DevDocs search tool with the given user ID and tool parameters.
+
+        Args:
+            user_id (str): The ID of the user invoking the tool.
+            tool_parameters (dict[str, Any]): The parameters for the tool, including 'doc' and 'topic'.
+
+        Returns:
+            ToolInvokeMessage | list[ToolInvokeMessage]: The result of the tool invocation, which can be a single message or a list of messages.
+        """
+        doc = tool_parameters.get('doc', '')
+        topic = tool_parameters.get('topic', '')
+
+        if not doc:
+            return self.create_text_message('Please provide the documentation name.')
+        if not topic:
+            return self.create_text_message('Please provide the topic path.')
+
+        url = f"https://documents.devdocs.io/{doc}/{topic}.html"
+        response = requests.get(url)
+
+        if response.status_code == 200:
+            content = response.text
+            return self.create_text_message(self.summary(user_id=user_id, content=content))
+        else:
+            return self.create_text_message(f"Failed to retrieve the documentation. Status code: {response.status_code}")
--- a/api/core/tools/provider/builtin/devdocs/tools/searchDevDocs.yaml
+++ b/api/core/tools/provider/builtin/devdocs/tools/searchDevDocs.yaml
@@ -0,0 +1,34 @@
+identity:
+  name: searchDevDocs
+  author: Richards Tu
+  label:
+    en_US: Search Developer Docs
+    zh_Hans: 搜索开发者文档
+description:
+  human:
+    en_US: A tools for searching for a specific topic and path in DevDocs based on the provided documentation name and topic. Don't for get to add some shots in the system prompt; for example, the documentation name should be like \"vuex~4\", \"css\", or \"python~3.12\", while the topic should be like \"guide/actions\" for Vuex 4, \"display-box\" for CSS, or \"library/code\" for Python 3.12.
+    zh_Hans: 一个用于根据提供的文档名称和主题，在DevDocs中搜索特定主题和路径的工具。不要忘记在系统提示词中添加一些示例；例如，文档名称应该是\"vuex~4\"、\"css\"或\"python~3.12\"，而主题应该是\"guide/actions\"用于Vuex 4，\"display-box\"用于CSS，或\"library/code\"用于Python 3.12。
+  llm: A tools for searching for specific developer documentation in DevDocs based on the provided documentation name and topic.
+parameters:
+  - name: doc
+    type: string
+    required: true
+    label:
+      en_US: Documentation name
+      zh_Hans: 文档名称
+    human_description:
+      en_US: The name of the documentation.
+      zh_Hans: 文档名称。
+    llm_description: The name of the documentation, such as \"vuex~4\", \"css\", or \"python~3.12\". The exact value should be identified by the user.
+    form: llm
+  - name: topic
+    type: string
+    required: true
+    label:
+      en_US: Topic name
+      zh_Hans: 主题名称
+    human_description:
+      en_US: The path of the section/topic.
+      zh_Hans: 文档主题的路径。
+    llm_description: The path of the section/topic, such as \"guide/actions\" for Vuex 4, \"display-box\" for CSS, or \"library/code\" for Python 3.12.
+    form: llm
--- a/api/core/tools/provider/builtin/stackexchange/_assets/icon.svg
+++ b/api/core/tools/provider/builtin/stackexchange/_assets/icon.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 120 120"><style>.st0{fill:#376db6}.st1{fill:#4ca2da}.st2{fill:#91d8f4}.st3{fill:#1e5397}</style><path class="st0" d="M22.4 57.5h74.8v15.4H22.4z"/><path class="st1" d="M22.4 37.6h74.8V53H22.4z"/><path class="st2" d="M85.5 17H34.4c-6.6 0-12 5.5-12 12.3v4h74.8v-4C97.2 22.5 92 17 85.5 17z"/><path class="st3" d="M22.4 77.3v4c0 6.8 5.4 12.3 12 12.3h32v16.3l15.8-16.3h3.5c6.6 0 12-5.5 12-12.3v-4H22.4z"/></svg>
--- a/api/core/tools/provider/builtin/stackexchange/stackexchange.py
+++ b/api/core/tools/provider/builtin/stackexchange/stackexchange.py
@@ -0,0 +1,25 @@
+from core.tools.errors import ToolProviderCredentialValidationError
+from core.tools.provider.builtin.stackexchange.tools.searchStackExQuestions import SearchStackExQuestionsTool
+from core.tools.provider.builtin_tool_provider import BuiltinToolProviderController
+
+
+class StackExchangeProvider(BuiltinToolProviderController):
+    def _validate_credentials(self, credentials: dict) -> None:
+        try:
+            SearchStackExQuestionsTool().fork_tool_runtime(
+                meta={
+                    "credentials": credentials,
+                }
+            ).invoke(
+                user_id='',
+                tool_parameters={
+                    "intitle": "Test",
+                    "sort": "relevance",  
+                    "order": "desc",
+                    "site": "stackoverflow",
+                    "accepted": True,
+                    "pagesize": 1
+                },
+            )
+        except Exception as e:
+            raise ToolProviderCredentialValidationError(str(e))
--- a/api/core/tools/provider/builtin/stackexchange/stackexchange.yaml
+++ b/api/core/tools/provider/builtin/stackexchange/stackexchange.yaml
@@ -0,0 +1,10 @@
+identity:
+  author: Richards Tu
+  name: stackexchange
+  label:
+    en_US: Stack Exchange
+    zh_Hans: Stack Exchange
+  description:
+    en_US: Access questions and answers from the Stack Exchange and its sub-sites.
+    zh_Hans: 从Stack Exchange和其子论坛获取问题和答案。
+  icon: icon.svg
--- a/api/core/tools/provider/builtin/stackexchange/tools/fetchAnsByStackExQuesID.py
+++ b/api/core/tools/provider/builtin/stackexchange/tools/fetchAnsByStackExQuesID.py
@@ -0,0 +1,37 @@
+from typing import Any, Union
+
+import requests
+from pydantic import BaseModel, Field
+
+from core.tools.entities.tool_entities import ToolInvokeMessage
+from core.tools.tool.builtin_tool import BuiltinTool
+
+
+class FetchAnsByStackExQuesIDInput(BaseModel):
+    id: int = Field(..., description="The question ID")
+    site: str = Field(..., description="The Stack Exchange site")
+    order: str = Field(..., description="asc or desc")
+    sort: str = Field(..., description="activity, votes, creation")
+    pagesize: int = Field(..., description="Number of answers per page")
+    page: int = Field(..., description="Page number")
+
+
+class FetchAnsByStackExQuesIDTool(BuiltinTool):
+    def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
+        input = FetchAnsByStackExQuesIDInput(**tool_parameters)
+
+        params = {
+            "site": input.site,
+            "filter": "!nNPvSNdWme",
+            "order": input.order,
+            "sort": input.sort,
+            "pagesize": input.pagesize,
+            "page": input.page
+        }
+
+        response = requests.get(f"https://api.stackexchange.com/2.3/questions/{input.id}/answers", params=params)
+
+        if response.status_code == 200:
+            return self.create_text_message(self.summary(user_id=user_id, content=response.text))
+        else:
+            return self.create_text_message(f"API request failed with status code {response.status_code}")
--- a/api/core/tools/provider/builtin/stackexchange/tools/fetchAnsByStackExQuesID.yaml
+++ b/api/core/tools/provider/builtin/stackexchange/tools/fetchAnsByStackExQuesID.yaml
@@ -0,0 +1,189 @@
+identity:
+  name: fetchAnsByStackExQuesID
+  author: Richards Tu
+  label:
+    en_US: Fetch Stack Exchange Answers
+    zh_Hans: 获取 Stack Exchange 答案
+description:
+  human:
+    en_US: A tool for retrieving answers for a specific Stack Exchange question ID. Specify the question ID, Stack Exchange site, sorting order, number of results per page, and page number. Must be used with the searchStackExQuesID tool.
+    zh_Hans: 用于检索特定Stack Exchange问题ID的答案的工具。指定问题ID、Stack Exchange站点、排序顺序、每页结果数和页码。 必须与searchStackExQuesID工具一起使用。
+  llm: A tool for retrieving answers for a specific Stack Exchange question ID based on the provided parameters.
+parameters:
+  - name: id
+    type: string
+    required: true
+    label:
+      en_US: Question ID
+      zh_Hans: 问题ID
+    human_description:
+      en_US: The ID of the Stack Exchange question to fetch answers for.
+      zh_Hans: 要获取答案的Stack Exchange问题的ID。
+    llm_description: The ID of the Stack Exchange question.
+    form: llm
+  - name: site
+    type: string
+    required: true
+    label:
+      en_US: Stack Exchange site
+      zh_Hans: Stack Exchange站点
+    human_description:
+      en_US: The Stack Exchange site the question is from, e.g. stackoverflow, unix, etc.
+      zh_Hans: 问题所在的Stack Exchange站点，例如stackoverflow、unix等。
+    llm_description: The Stack Exchange site identifier.
+    options:
+      - value: stackoverflow
+        label:
+          en_US: stackoverflow
+      - value: serverfault
+        label:
+          en_US: serverfault
+      - value: superuser
+        label:
+          en_US: superuser
+      - value: askubuntu
+        label:
+          en_US: askubuntu
+      - value: unix
+        label:
+          en_US: unix
+      - value: cs
+        label:
+          en_US: cs
+      - value: softwareengineering
+        label:
+          en_US: softwareengineering
+      - value: codegolf
+        label:
+          en_US: codegolf
+      - value: codereview
+        label:
+          en_US: codereview
+      - value: cstheory
+        label:
+          en_US: cstheory
+      - value: security
+        label:
+          en_US: security
+      - value: cryptography
+        label:
+          en_US: cryptography
+      - value: reverseengineering
+        label:
+          en_US: reverseengineering
+      - value: datascience
+        label:
+          en_US: datascience
+      - value: devops
+        label:
+          en_US: devops
+      - value: ux
+        label:
+          en_US: ux
+      - value: dba
+        label:
+          en_US: dba
+      - value: gis
+        label:
+          en_US: gis
+      - value: webmasters
+        label:
+          en_US: webmasters
+      - value: arduino
+        label:
+          en_US: arduino
+      - value: raspberrypi
+        label:
+          en_US: raspberrypi
+      - value: networkengineering
+        label:
+          en_US: networkengineering
+      - value: iot
+        label:
+          en_US: iot
+      - value: tor
+        label:
+          en_US: tor
+      - value: sqa
+        label:
+          en_US: sqa
+      - value: mathoverflow
+        label:
+          en_US: mathoverflow
+      - value: math
+        label:
+          en_US: math
+      - value: mathematica
+        label:
+          en_US: mathematica
+      - value: dsp
+        label:
+          en_US: dsp
+      - value: gamedev
+        label:
+          en_US: gamedev
+      - value: robotics
+        label:
+          en_US: robotics
+      - value: genai
+        label:
+          en_US: genai
+      - value: computergraphics
+        label:
+          en_US: computergraphics
+    form: form
+  - name: filter
+    type: string
+    required: true
+    label:
+      en_US: Filter
+      zh_Hans: 过滤器
+    human_description:
+      en_US: This is required in order to actually get the body of the answer.
+      zh_Hans: 为了实际获取答案的正文，这是必需的。
+    llm_description: Required in order to actually get the body of the answer. Must be \"!nNPvSNdWme\".
+    form: llm
+  - name: order
+    type: string
+    required: true
+    label:
+      en_US: Sort direction
+      zh_Hans: 排序方向
+    human_description:
+      en_US: The direction to sort the answers - ascending or descending.
+      zh_Hans: 答案的排序方向 - 升序或降序。
+    llm_description: asc for ascending, desc for descending.
+    form: llm
+  - name: sort
+    type: string
+    required: true
+    label:
+      en_US: Sort order
+      zh_Hans: 排序
+    human_description:
+      en_US: The sort order for the answers - activity, votes, or creation date.
+      zh_Hans: 答案的排序顺序 - 活动、投票或创建日期。
+    llm_description: activity, votes, or creation.
+    form: llm
+  - name: pagesize
+    type: number
+    required: true
+    label:
+      en_US: Results per page
+      zh_Hans: 每页结果数
+    human_description:
+      en_US: The number of answers to return per page.
+      zh_Hans: 每页返回的答案数。
+    llm_description: The number of answers per page.
+    form: llm
+  - name: page
+    type: number
+    required: true
+    label:
+      en_US: Page number
+      zh_Hans: 页码
+    human_description:
+      en_US: The page number of answers to retrieve.
+      zh_Hans: 要检索的答案的页码。
+    llm_description: The page number to retrieve.
+    form: llm
--- a/api/core/tools/provider/builtin/stackexchange/tools/searchStackExQuestions.py
+++ b/api/core/tools/provider/builtin/stackexchange/tools/searchStackExQuestions.py
@@ -0,0 +1,43 @@
+from typing import Any, Union
+
+import requests
+from pydantic import BaseModel, Field
+
+from core.tools.entities.tool_entities import ToolInvokeMessage
+from core.tools.tool.builtin_tool import BuiltinTool
+
+
+class SearchStackExQuestionsInput(BaseModel):
+    intitle: str = Field(..., description="The search query.")
+    sort: str = Field(..., description="The sort order - relevance, activity, votes, creation.")  
+    order: str = Field(..., description="asc or desc")
+    site: str = Field(..., description="The Stack Exchange site.")
+    tagged: str = Field(None, description="Semicolon-separated tags to include.")
+    nottagged: str = Field(None, description="Semicolon-separated tags to exclude.")
+    accepted: bool = Field(..., description="true for only accepted answers, false otherwise") 
+    pagesize: int = Field(..., description="Number of results per page")
+
+
+class SearchStackExQuestionsTool(BuiltinTool):
+    def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
+        input = SearchStackExQuestionsInput(**tool_parameters)
+
+        params = {
+            "intitle": input.intitle,
+            "sort": input.sort,
+            "order": input.order,  
+            "site": input.site,
+            "accepted": input.accepted,
+            "pagesize": input.pagesize
+        }
+        if input.tagged:
+            params["tagged"] = input.tagged
+        if input.nottagged:
+            params["nottagged"] = input.nottagged
+
+        response = requests.get("https://api.stackexchange.com/2.3/search", params=params)
+
+        if response.status_code == 200:
+            return self.create_text_message(self.summary(user_id=user_id, content=response.text))
+        else:
+            return self.create_text_message(f"API request failed with status code {response.status_code}")
--- a/api/core/tools/provider/builtin/stackexchange/tools/searchStackExQuestions.yaml
+++ b/api/core/tools/provider/builtin/stackexchange/tools/searchStackExQuestions.yaml
@@ -0,0 +1,200 @@
+identity:
+  name: searchStackExQuestions
+  author: Richards Tu
+  label:
+    en_US: Search Stack Exchange Questions
+    zh_Hans: 搜索Stack Exchange问题
+description:
+  human:
+    en_US: A tool for searching questions on a Stack Exchange site. Specify the search query, sorting order, tags to include or exclude, whether to search only for questions with accepted answers, the Stack Exchange site, and number of results per page.
+    zh_Hans: 在Stack Exchange站点上搜索问题的工具。指定搜索查询、排序顺序、要包含或排除的标签、是否仅搜索有已接受答案的问题、Stack Exchange站点以及每页结果数。
+  llm: A tool for searching questions on a Stack Exchange site based on the provided parameters.
+parameters:
+  - name: intitle
+    type: string
+    required: true
+    label:
+      en_US: Search query
+      zh_Hans: 搜索查询
+    human_description:
+      en_US: The search query to use for finding questions.
+      zh_Hans: 用于查找问题的搜索查询。
+    llm_description: The search query to use.
+    form: llm
+  - name: sort
+    type: string
+    required: true
+    label:
+      en_US: Sort order
+      zh_Hans: 排序
+    human_description:
+      en_US: The sort order for the search results - relevance, activity, votes, or creation date.
+      zh_Hans: 搜索结果的排序顺序 - 相关性、活动、投票或创建日期。
+    llm_description: The sort order - relevance, activity, votes, or creation.
+    form: llm
+  - name: order
+    type: string
+    required: true
+    label:
+      en_US: Sort direction
+      zh_Hans: 排序方向
+    human_description:
+      en_US: The direction to sort - ascending or descending.
+      zh_Hans: 排序方向 - 升序或降序。
+    llm_description: asc for ascending, desc for descending.
+    form: llm
+  - name: site
+    type: string
+    required: true
+    label:
+      en_US: Stack Exchange site
+      zh_Hans: Stack Exchange 站点
+    human_description:
+      en_US: The Stack Exchange site to search, e.g. stackoverflow, unix, etc.
+      zh_Hans: 要搜索的Stack Exchange站点，例如stackoverflow、unix等。
+    llm_description: The Stack Exchange site identifier.
+    options:
+      - value: stackoverflow
+        label:
+          en_US: stackoverflow
+      - value: serverfault
+        label:
+          en_US: serverfault
+      - value: superuser
+        label:
+          en_US: superuser
+      - value: askubuntu
+        label:
+          en_US: askubuntu
+      - value: unix
+        label:
+          en_US: unix
+      - value: cs
+        label:
+          en_US: cs
+      - value: softwareengineering
+        label:
+          en_US: softwareengineering
+      - value: codegolf
+        label:
+          en_US: codegolf
+      - value: codereview
+        label:
+          en_US: codereview
+      - value: cstheory
+        label:
+          en_US: cstheory
+      - value: security
+        label:
+          en_US: security
+      - value: cryptography
+        label:
+          en_US: cryptography
+      - value: reverseengineering
+        label:
+          en_US: reverseengineering
+      - value: datascience
+        label:
+          en_US: datascience
+      - value: devops
+        label:
+          en_US: devops
+      - value: ux
+        label:
+          en_US: ux
+      - value: dba
+        label:
+          en_US: dba
+      - value: gis
+        label:
+          en_US: gis
+      - value: webmasters
+        label:
+          en_US: webmasters
+      - value: arduino
+        label:
+          en_US: arduino
+      - value: raspberrypi
+        label:
+          en_US: raspberrypi
+      - value: networkengineering
+        label:
+          en_US: networkengineering
+      - value: iot
+        label:
+          en_US: iot
+      - value: tor
+        label:
+          en_US: tor
+      - value: sqa
+        label:
+          en_US: sqa
+      - value: mathoverflow
+        label:
+          en_US: mathoverflow
+      - value: math
+        label:
+          en_US: math
+      - value: mathematica
+        label:
+          en_US: mathematica
+      - value: dsp
+        label:
+          en_US: dsp
+      - value: gamedev
+        label:
+          en_US: gamedev
+      - value: robotics
+        label:
+          en_US: robotics
+      - value: genai
+        label:
+          en_US: genai
+      - value: computergraphics
+        label:
+          en_US: computergraphics
+    form: form
+  - name: tagged
+    type: string
+    required: false
+    label:
+      en_US: Include tags
+      zh_Hans: 包含标签
+    human_description:
+      en_US: A semicolon-separated list of tags that questions must have.
+      zh_Hans: 问题必须具有的标签的分号分隔列表。
+    llm_description: Semicolon-separated tags to include. Leave blank if not needed.
+    form: llm
+  - name: nottagged
+    type: string
+    required: false
+    label:
+      en_US: Exclude tags
+      zh_Hans: 排除标签
+    human_description:
+      en_US: A semicolon-separated list of tags to exclude from the search.
+      zh_Hans: 从搜索中排除的标签的分号分隔列表。
+    llm_description: Semicolon-separated tags to exclude. Leave blank if not needed.
+    form: llm
+  - name: accepted
+    type: boolean
+    required: true
+    label:
+      en_US: Has accepted answer
+      zh_Hans: 有已接受的答案
+    human_description:
+      en_US: Whether to limit to only questions that have an accepted answer.
+      zh_Hans: 是否限制为只有已接受答案的问题。
+    llm_description: true to limit to only questions with accepted answers, false otherwise.
+    form: llm
+  - name: pagesize
+    type: number
+    required: true
+    label:
+      en_US: Results per page
+      zh_Hans: 每页结果数
+    human_description:
+      en_US: The number of results to return per page.
+      zh_Hans: 每页返回的结果数。
+    llm_description: The number of results per page.
+    form: llm
--- a/api/libs/login.py
+++ b/api/libs/login.py
@@ -53,7 +53,7 @@ def login_required(func):
    def decorated_view(*args, **kwargs):
        auth_header = request.headers.get('Authorization')
        admin_api_key_enable = os.getenv('ADMIN_API_KEY_ENABLE', default='False')
-        if admin_api_key_enable:
+        if admin_api_key_enable.lower() == 'true':
            if auth_header:
                if ' ' not in auth_header:
                    raise Unauthorized('Invalid Authorization header format. Expected \'Bearer <api-key>\' format.')
--- a/api/services/account_service.py
+++ b/api/services/account_service.py
@@ -435,11 +435,13 @@ class RegisterService:

            if open_id is not None or provider is not None:
                AccountService.link_account_integrate(provider, open_id, account)
+            if current_app.config['EDITION'] != 'SELF_HOSTED':
+                tenant = TenantService.create_tenant(f"{account.name}'s Workspace")

-            tenant = TenantService.create_tenant(f"{account.name}'s Workspace")
+                TenantService.create_tenant_member(tenant, account, role='owner')
+                account.current_tenant = tenant

-            TenantService.create_tenant_member(tenant, account, role='owner')
-            account.current_tenant = tenant
+                tenant_was_created.send(tenant)

            db.session.commit()
        except Exception as e:
@@ -447,8 +449,6 @@ class RegisterService:
            logging.error(f'Register failed: {e}')
            raise AccountRegisterError(f'Registration failed: {e}') from e

-        tenant_was_created.send(tenant)
-
        return account

    @classmethod
@@ -461,7 +461,6 @@ class RegisterService:
            name = email.split('@')[0]

            account = cls.register(email=email, name=name, language=language, status=AccountStatus.PENDING)
-
            # Create new tenant member for invited tenant
            TenantService.create_tenant_member(tenant, account, role)
            TenantService.switch_tenant(account, tenant.id)
--- a/web/app/components/base/prompt-editor/index.tsx
+++ b/web/app/components/base/prompt-editor/index.tsx
@@ -9,6 +9,7 @@ import {
  $getRoot,
  TextNode,
 } from 'lexical'
+import { CodeNode } from '@lexical/code'
 import { LexicalComposer } from '@lexical/react/LexicalComposer'
 import { RichTextPlugin } from '@lexical/react/LexicalRichTextPlugin'
 import { ContentEditable } from '@lexical/react/LexicalContentEditable'
@@ -97,6 +98,7 @@ const PromptEditor: FC<PromptEditorProps> = ({
  const initialConfig = {
    namespace: 'prompt-editor',
    nodes: [
+      CodeNode,
      CustomTextNode,
      {
        replace: TextNode,
--- a/web/package.json
+++ b/web/package.json
@@ -1,6 +1,6 @@
 {
  "name": "dify-web",
-  "version": "0.5.10",
+  "version": "0.5.11",
  "private": true,
  "scripts": {
    "dev": "next dev",
				`@@ -0,0 +1 @@`
				`<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 120 120"><style>.st0{fill:#376db6}.st1{fill:#4ca2da}.st2{fill:#91d8f4}.st3{fill:#1e5397}</style><path class="st0" d="M22.4 57.5h74.8v15.4H22.4z"/><path class="st1" d="M22.4 37.6h74.8V53H22.4z"/><path class="st2" d="M85.5 17H34.4c-6.6 0-12 5.5-12 12.3v4h74.8v-4C97.2 22.5 92 17 85.5 17z"/><path class="st3" d="M22.4 77.3v4c0 6.8 5.4 12.3 12 12.3h32v16.3l15.8-16.3h3.5c6.6 0 12-5.5 12-12.3v-4H22.4z"/></svg>`