bump version to 0.3.26 (#1307 )

Co-authored-by: jyong <jyong@dify.ai>
milvus docker compose env (#1306 )
2026-01-16 12:00:30 +00:00 · 2023-10-11 16:11:24 +08:00 · 2023-10-11 16:05:37 +08:00 · 2023-10-11 13:11:06 +08:00 · 2023-10-10 21:56:24 +08:00 · 2023-10-10 04:44:19 -05:00
15 changed files with 1724 additions and 155 deletions
--- a/api/.env.example
+++ b/api/.env.example
@@ -50,7 +50,7 @@ S3_REGION=your-region
 WEB_API_CORS_ALLOW_ORIGINS=http://127.0.0.1:3000,*
 CONSOLE_CORS_ALLOW_ORIGINS=http://127.0.0.1:3000,*

-# Vector database configuration, support: weaviate, qdrant
+# Vector database configuration, support: weaviate, qdrant, milvus
 VECTOR_STORE=weaviate

 # Weaviate configuration
@@ -63,6 +63,13 @@ WEAVIATE_BATCH_SIZE=100
 QDRANT_URL=http://localhost:6333
 QDRANT_API_KEY=difyai123456

+# Milvus configuration
+MILVUS_HOST=127.0.0.1
+MILVUS_PORT=19530
+MILVUS_USER=root
+MILVUS_PASSWORD=Milvus
+MILVUS_SECURE=false
+
 # Mail configuration, support: resend
 MAIL_TYPE=
 MAIL_DEFAULT_SEND_FROM=no-reply <no-reply@dify.ai>
--- a/api/config.py
+++ b/api/config.py
@@ -92,7 +92,7 @@ class Config:
        self.CONSOLE_URL = get_env('CONSOLE_URL')
        self.API_URL = get_env('API_URL')
        self.APP_URL = get_env('APP_URL')
-        self.CURRENT_VERSION = "0.3.25"
+        self.CURRENT_VERSION = "0.3.26"
        self.COMMIT_SHA = get_env('COMMIT_SHA')
        self.EDITION = "SELF_HOSTED"
        self.DEPLOY_ENV = get_env('DEPLOY_ENV')
@@ -135,6 +135,14 @@ class Config:
        self.QDRANT_URL = get_env('QDRANT_URL')
        self.QDRANT_API_KEY = get_env('QDRANT_API_KEY')

+        # milvus setting
+        self.MILVUS_HOST = get_env('MILVUS_HOST')
+        self.MILVUS_PORT = get_env('MILVUS_PORT')
+        self.MILVUS_USER = get_env('MILVUS_USER')
+        self.MILVUS_PASSWORD = get_env('MILVUS_PASSWORD')
+        self.MILVUS_SECURE = get_env('MILVUS_SECURE')
+
+
        # cors settings
        self.CONSOLE_CORS_ALLOW_ORIGINS = get_cors_allow_origins(
            'CONSOLE_CORS_ALLOW_ORIGINS', self.CONSOLE_WEB_URL)
--- a/api/controllers/service_api/dataset/segment.py
+++ b/api/controllers/service_api/dataset/segment.py
@@ -1,7 +1,6 @@
 from flask_login import current_user
 from flask_restful import reqparse, marshal
 from werkzeug.exceptions import NotFound
-
 from controllers.service_api import api
 from controllers.service_api.app.error import ProviderNotInitializeError
 from controllers.service_api.wraps import DatasetApiResource
@@ -9,8 +8,8 @@ from core.model_providers.error import ProviderTokenNotInitError, LLMBadRequestE
 from core.model_providers.model_factory import ModelFactory
 from extensions.ext_database import db
 from fields.segment_fields import segment_fields
-from models.dataset import Dataset
-from services.dataset_service import DocumentService, SegmentService
+from models.dataset import Dataset, DocumentSegment
+from services.dataset_service import DatasetService, DocumentService, SegmentService


 class SegmentApi(DatasetApiResource):
@@ -24,6 +23,8 @@ class SegmentApi(DatasetApiResource):
            Dataset.tenant_id == tenant_id,
            Dataset.id == dataset_id
        ).first()
+        if not dataset:
+            raise NotFound('Dataset not found.')
        # check document
        document_id = str(document_id)
        document = DocumentService.get_document(dataset.id, document_id)
@@ -55,5 +56,146 @@ class SegmentApi(DatasetApiResource):
            'doc_form': document.doc_form
        }, 200

+    def get(self, tenant_id, dataset_id, document_id):
+        """Create single segment."""
+        # check dataset
+        dataset_id = str(dataset_id)
+        tenant_id = str(tenant_id)
+        dataset = db.session.query(Dataset).filter(
+            Dataset.tenant_id == tenant_id,
+            Dataset.id == dataset_id
+        ).first()
+        if not dataset:
+            raise NotFound('Dataset not found.')
+        # check document
+        document_id = str(document_id)
+        document = DocumentService.get_document(dataset.id, document_id)
+        if not document:
+            raise NotFound('Document not found.')
+        # check embedding model setting
+        if dataset.indexing_technique == 'high_quality':
+            try:
+                ModelFactory.get_embedding_model(
+                    tenant_id=current_user.current_tenant_id,
+                    model_provider_name=dataset.embedding_model_provider,
+                    model_name=dataset.embedding_model
+                )
+            except LLMBadRequestError:
+                raise ProviderNotInitializeError(
+                    f"No Embedding Model available. Please configure a valid provider "
+                    f"in the Settings -> Model Provider.")
+            except ProviderTokenNotInitError as ex:
+                raise ProviderNotInitializeError(ex.description)
+
+        parser = reqparse.RequestParser()
+        parser.add_argument('status', type=str,
+                            action='append', default=[], location='args')
+        parser.add_argument('keyword', type=str, default=None, location='args')
+        args = parser.parse_args()
+
+        status_list = args['status']
+        keyword = args['keyword']
+
+        query = DocumentSegment.query.filter(
+            DocumentSegment.document_id == str(document_id),
+            DocumentSegment.tenant_id == current_user.current_tenant_id
+        )
+
+        if status_list:
+            query = query.filter(DocumentSegment.status.in_(status_list))
+
+        if keyword:
+            query = query.where(DocumentSegment.content.ilike(f'%{keyword}%'))
+
+        total = query.count()
+        segments = query.order_by(DocumentSegment.position).all()
+        return {
+            'data': marshal(segments, segment_fields),
+            'doc_form': document.doc_form,
+            'total': total
+        }, 200
+
+
+class DatasetSegmentApi(DatasetApiResource):
+    def delete(self, tenant_id, dataset_id, document_id, segment_id):
+        # check dataset
+        dataset_id = str(dataset_id)
+        tenant_id = str(tenant_id)
+        dataset = db.session.query(Dataset).filter(
+            Dataset.tenant_id == tenant_id,
+            Dataset.id == dataset_id
+        ).first()
+        if not dataset:
+            raise NotFound('Dataset not found.')
+        # check user's model setting
+        DatasetService.check_dataset_model_setting(dataset)
+        # check document
+        document_id = str(document_id)
+        document = DocumentService.get_document(dataset_id, document_id)
+        if not document:
+            raise NotFound('Document not found.')
+        # check segment
+        segment = DocumentSegment.query.filter(
+            DocumentSegment.id == str(segment_id),
+            DocumentSegment.tenant_id == current_user.current_tenant_id
+        ).first()
+        if not segment:
+            raise NotFound('Segment not found.')
+        SegmentService.delete_segment(segment, document, dataset)
+        return {'result': 'success'}, 200
+
+    def post(self, tenant_id, dataset_id, document_id, segment_id):
+        # check dataset
+        dataset_id = str(dataset_id)
+        tenant_id = str(tenant_id)
+        dataset = db.session.query(Dataset).filter(
+            Dataset.tenant_id == tenant_id,
+            Dataset.id == dataset_id
+        ).first()
+        if not dataset:
+            raise NotFound('Dataset not found.')
+        # check user's model setting
+        DatasetService.check_dataset_model_setting(dataset)
+        # check document
+        document_id = str(document_id)
+        document = DocumentService.get_document(dataset_id, document_id)
+        if not document:
+            raise NotFound('Document not found.')
+        if dataset.indexing_technique == 'high_quality':
+            # check embedding model setting
+            try:
+                ModelFactory.get_embedding_model(
+                    tenant_id=current_user.current_tenant_id,
+                    model_provider_name=dataset.embedding_model_provider,
+                    model_name=dataset.embedding_model
+                )
+            except LLMBadRequestError:
+                raise ProviderNotInitializeError(
+                    f"No Embedding Model available. Please configure a valid provider "
+                    f"in the Settings -> Model Provider.")
+            except ProviderTokenNotInitError as ex:
+                raise ProviderNotInitializeError(ex.description)
+            # check segment
+        segment_id = str(segment_id)
+        segment = DocumentSegment.query.filter(
+            DocumentSegment.id == str(segment_id),
+            DocumentSegment.tenant_id == current_user.current_tenant_id
+        ).first()
+        if not segment:
+            raise NotFound('Segment not found.')
+
+        # validate args
+        parser = reqparse.RequestParser()
+        parser.add_argument('segments', type=dict, required=False, nullable=True, location='json')
+        args = parser.parse_args()
+
+        SegmentService.segment_create_args_validate(args['segments'], document)
+        segment = SegmentService.update_segment(args['segments'], segment, document, dataset)
+        return {
+            'data': marshal(segment, segment_fields),
+            'doc_form': document.doc_form
+        }, 200
+

 api.add_resource(SegmentApi, '/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments')
+api.add_resource(DatasetSegmentApi, '/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments/<uuid:segment_id>')
--- a/api/core/index/vector_index/milvus.py
+++ b/api/core/index/vector_index/milvus.py
@@ -0,0 +1,860 @@
+"""Wrapper around the Milvus vector database."""
+from __future__ import annotations
+
+import logging
+from typing import Any, Iterable, List, Optional, Tuple, Union, Sequence
+from uuid import uuid4
+
+import numpy as np
+
+from langchain.docstore.document import Document
+from langchain.embeddings.base import Embeddings
+from langchain.vectorstores.base import VectorStore
+from langchain.vectorstores.utils import maximal_marginal_relevance
+
+logger = logging.getLogger(__name__)
+
+DEFAULT_MILVUS_CONNECTION = {
+    "host": "localhost",
+    "port": "19530",
+    "user": "",
+    "password": "",
+    "secure": False,
+}
+
+
+class Milvus(VectorStore):
+    """Initialize wrapper around the milvus vector database.
+
+    In order to use this you need to have `pymilvus` installed and a
+    running Milvus
+
+    See the following documentation for how to run a Milvus instance:
+    https://milvus.io/docs/install_standalone-docker.md
+
+    If looking for a hosted Milvus, take a look at this documentation:
+    https://zilliz.com/cloud and make use of the Zilliz vectorstore found in
+    this project,
+
+    IF USING L2/IP metric IT IS HIGHLY SUGGESTED TO NORMALIZE YOUR DATA.
+
+    Args:
+        embedding_function (Embeddings): Function used to embed the text.
+        collection_name (str): Which Milvus collection to use. Defaults to
+            "LangChainCollection".
+        connection_args (Optional[dict[str, any]]): The connection args used for
+            this class comes in the form of a dict.
+        consistency_level (str): The consistency level to use for a collection.
+            Defaults to "Session".
+        index_params (Optional[dict]): Which index params to use. Defaults to
+            HNSW/AUTOINDEX depending on service.
+        search_params (Optional[dict]): Which search params to use. Defaults to
+            default of index.
+        drop_old (Optional[bool]): Whether to drop the current collection. Defaults
+            to False.
+
+    The connection args used for this class comes in the form of a dict,
+    here are a few of the options:
+        address (str): The actual address of Milvus
+            instance. Example address: "localhost:19530"
+        uri (str): The uri of Milvus instance. Example uri:
+            "http://randomwebsite:19530",
+            "tcp:foobarsite:19530",
+            "https://ok.s3.south.com:19530".
+        host (str): The host of Milvus instance. Default at "localhost",
+            PyMilvus will fill in the default host if only port is provided.
+        port (str/int): The port of Milvus instance. Default at 19530, PyMilvus
+            will fill in the default port if only host is provided.
+        user (str): Use which user to connect to Milvus instance. If user and
+            password are provided, we will add related header in every RPC call.
+        password (str): Required when user is provided. The password
+            corresponding to the user.
+        secure (bool): Default is false. If set to true, tls will be enabled.
+        client_key_path (str): If use tls two-way authentication, need to
+            write the client.key path.
+        client_pem_path (str): If use tls two-way authentication, need to
+            write the client.pem path.
+        ca_pem_path (str): If use tls two-way authentication, need to write
+            the ca.pem path.
+        server_pem_path (str): If use tls one-way authentication, need to
+            write the server.pem path.
+        server_name (str): If use tls, need to write the common name.
+
+    Example:
+        .. code-block:: python
+
+        from langchain import Milvus
+        from langchain.embeddings import OpenAIEmbeddings
+
+        embedding = OpenAIEmbeddings()
+        # Connect to a milvus instance on localhost
+        milvus_store = Milvus(
+            embedding_function = Embeddings,
+            collection_name = "LangChainCollection",
+            drop_old = True,
+        )
+
+    Raises:
+        ValueError: If the pymilvus python package is not installed.
+    """
+
+    def __init__(
+        self,
+        embedding_function: Embeddings,
+        collection_name: str = "LangChainCollection",
+        connection_args: Optional[dict[str, Any]] = None,
+        consistency_level: str = "Session",
+        index_params: Optional[dict] = None,
+        search_params: Optional[dict] = None,
+        drop_old: Optional[bool] = False,
+    ):
+        """Initialize the Milvus vector store."""
+        try:
+            from pymilvus import Collection, utility
+        except ImportError:
+            raise ValueError(
+                "Could not import pymilvus python package. "
+                "Please install it with `pip install pymilvus`."
+            )
+
+        # Default search params when one is not provided.
+        self.default_search_params = {
+            "IVF_FLAT": {"metric_type": "L2", "params": {"nprobe": 10}},
+            "IVF_SQ8": {"metric_type": "L2", "params": {"nprobe": 10}},
+            "IVF_PQ": {"metric_type": "L2", "params": {"nprobe": 10}},
+            "HNSW": {"metric_type": "L2", "params": {"ef": 10}},
+            "RHNSW_FLAT": {"metric_type": "L2", "params": {"ef": 10}},
+            "RHNSW_SQ": {"metric_type": "L2", "params": {"ef": 10}},
+            "RHNSW_PQ": {"metric_type": "L2", "params": {"ef": 10}},
+            "IVF_HNSW": {"metric_type": "L2", "params": {"nprobe": 10, "ef": 10}},
+            "ANNOY": {"metric_type": "L2", "params": {"search_k": 10}},
+            "AUTOINDEX": {"metric_type": "L2", "params": {}},
+        }
+
+        self.embedding_func = embedding_function
+        self.collection_name = collection_name
+        self.index_params = index_params
+        self.search_params = search_params
+        self.consistency_level = consistency_level
+
+        # In order for a collection to be compatible, pk needs to be auto'id and int
+        self._primary_field = "id"
+        # In order for compatibility, the text field will need to be called "text"
+        self._text_field = "page_content"
+        # In order for compatibility, the vector field needs to be called "vector"
+        self._vector_field = "vectors"
+        # In order for compatibility, the metadata field will need to be called "metadata"
+        self._metadata_field = "metadata"
+        self.fields: list[str] = []
+        # Create the connection to the server
+        if connection_args is None:
+            connection_args = DEFAULT_MILVUS_CONNECTION
+        self.alias = self._create_connection_alias(connection_args)
+        self.col: Optional[Collection] = None
+
+        # Grab the existing collection if it exists
+        if utility.has_collection(self.collection_name, using=self.alias):
+            self.col = Collection(
+                self.collection_name,
+                using=self.alias,
+            )
+        # If need to drop old, drop it
+        if drop_old and isinstance(self.col, Collection):
+            self.col.drop()
+            self.col = None
+
+        # Initialize the vector store
+        self._init()
+
+    @property
+
+
+    def embeddings(self) -> Embeddings:
+        return self.embedding_func
+
+    def _create_connection_alias(self, connection_args: dict) -> str:
+        """Create the connection to the Milvus server."""
+        from pymilvus import MilvusException, connections
+
+        # Grab the connection arguments that are used for checking existing connection
+        host: str = connection_args.get("host", None)
+        port: Union[str, int] = connection_args.get("port", None)
+        address: str = connection_args.get("address", None)
+        uri: str = connection_args.get("uri", None)
+        user = connection_args.get("user", None)
+
+        # Order of use is host/port, uri, address
+        if host is not None and port is not None:
+            given_address = str(host) + ":" + str(port)
+        elif uri is not None:
+            given_address = uri.split("https://")[1]
+        elif address is not None:
+            given_address = address
+        else:
+            given_address = None
+            logger.debug("Missing standard address type for reuse atttempt")
+
+        # User defaults to empty string when getting connection info
+        if user is not None:
+            tmp_user = user
+        else:
+            tmp_user = ""
+
+        # If a valid address was given, then check if a connection exists
+        if given_address is not None:
+            for con in connections.list_connections():
+                addr = connections.get_connection_addr(con[0])
+                if (
+                    con[1]
+                    and ("address" in addr)
+                    and (addr["address"] == given_address)
+                    and ("user" in addr)
+                    and (addr["user"] == tmp_user)
+                ):
+                    logger.debug("Using previous connection: %s", con[0])
+                    return con[0]
+
+        # Generate a new connection if one doesn't exist
+        alias = uuid4().hex
+        try:
+            connections.connect(alias=alias, **connection_args)
+            logger.debug("Created new connection using: %s", alias)
+            return alias
+        except MilvusException as e:
+            logger.error("Failed to create new connection using: %s", alias)
+            raise e
+
+    def _init(
+        self, embeddings: Optional[list] = None, metadatas: Optional[list[dict]] = None
+    ) -> None:
+        if embeddings is not None:
+            self._create_collection(embeddings, metadatas)
+        self._extract_fields()
+        self._create_index()
+        self._create_search_params()
+        self._load()
+
+    def _create_collection(
+        self, embeddings: list, metadatas: Optional[list[dict]] = None
+    ) -> None:
+        from pymilvus import (
+            Collection,
+            CollectionSchema,
+            DataType,
+            FieldSchema,
+            MilvusException,
+        )
+        from pymilvus.orm.types import infer_dtype_bydata
+
+        # Determine embedding dim
+        dim = len(embeddings[0])
+        fields = []
+        # Determine metadata schema
+        # if metadatas:
+        #     # Create FieldSchema for each entry in metadata.
+        #     for key, value in metadatas[0].items():
+        #         # Infer the corresponding datatype of the metadata
+        #         dtype = infer_dtype_bydata(value)
+        #         # Datatype isn't compatible
+        #         if dtype == DataType.UNKNOWN or dtype == DataType.NONE:
+        #             logger.error(
+        #                 "Failure to create collection, unrecognized dtype for key: %s",
+        #                 key,
+        #             )
+        #             raise ValueError(f"Unrecognized datatype for {key}.")
+        #         # Dataype is a string/varchar equivalent
+        #         elif dtype == DataType.VARCHAR:
+        #             fields.append(FieldSchema(key, DataType.VARCHAR, max_length=65_535))
+        #         else:
+        #             fields.append(FieldSchema(key, dtype))
+        if metadatas:
+            fields.append(FieldSchema(self._metadata_field, DataType.JSON, max_length=65_535))
+
+        # Create the text field
+        fields.append(
+            FieldSchema(self._text_field, DataType.VARCHAR, max_length=65_535)
+        )
+        # Create the primary key field
+        fields.append(
+            FieldSchema(
+                self._primary_field, DataType.INT64, is_primary=True, auto_id=True
+            )
+        )
+        # Create the vector field, supports binary or float vectors
+        fields.append(
+            FieldSchema(self._vector_field, infer_dtype_bydata(embeddings[0]), dim=dim)
+        )
+
+        # Create the schema for the collection
+        schema = CollectionSchema(fields)
+
+        # Create the collection
+        try:
+            self.col = Collection(
+                name=self.collection_name,
+                schema=schema,
+                consistency_level=self.consistency_level,
+                using=self.alias,
+            )
+        except MilvusException as e:
+            logger.error(
+                "Failed to create collection: %s error: %s", self.collection_name, e
+            )
+            raise e
+
+    def _extract_fields(self) -> None:
+        """Grab the existing fields from the Collection"""
+        from pymilvus import Collection
+
+        if isinstance(self.col, Collection):
+            schema = self.col.schema
+            for x in schema.fields:
+                self.fields.append(x.name)
+            # Since primary field is auto-id, no need to track it
+            self.fields.remove(self._primary_field)
+
+    def _get_index(self) -> Optional[dict[str, Any]]:
+        """Return the vector index information if it exists"""
+        from pymilvus import Collection
+
+        if isinstance(self.col, Collection):
+            for x in self.col.indexes:
+                if x.field_name == self._vector_field:
+                    return x.to_dict()
+        return None
+
+    def _create_index(self) -> None:
+        """Create a index on the collection"""
+        from pymilvus import Collection, MilvusException
+
+        if isinstance(self.col, Collection) and self._get_index() is None:
+            try:
+                # If no index params, use a default HNSW based one
+                if self.index_params is None:
+                    self.index_params = {
+                        "metric_type": "IP",
+                        "index_type": "HNSW",
+                        "params": {"M": 8, "efConstruction": 64},
+                    }
+
+                try:
+                    self.col.create_index(
+                        self._vector_field,
+                        index_params=self.index_params,
+                        using=self.alias,
+                    )
+
+                # If default did not work, most likely on Zilliz Cloud
+                except MilvusException:
+                    # Use AUTOINDEX based index
+                    self.index_params = {
+                        "metric_type": "L2",
+                        "index_type": "AUTOINDEX",
+                        "params": {},
+                    }
+                    self.col.create_index(
+                        self._vector_field,
+                        index_params=self.index_params,
+                        using=self.alias,
+                    )
+                logger.debug(
+                    "Successfully created an index on collection: %s",
+                    self.collection_name,
+                )
+
+            except MilvusException as e:
+                logger.error(
+                    "Failed to create an index on collection: %s", self.collection_name
+                )
+                raise e
+
+    def _create_search_params(self) -> None:
+        """Generate search params based on the current index type"""
+        from pymilvus import Collection
+
+        if isinstance(self.col, Collection) and self.search_params is None:
+            index = self._get_index()
+            if index is not None:
+                index_type: str = index["index_param"]["index_type"]
+                metric_type: str = index["index_param"]["metric_type"]
+                self.search_params = self.default_search_params[index_type]
+                self.search_params["metric_type"] = metric_type
+
+    def _load(self) -> None:
+        """Load the collection if available."""
+        from pymilvus import Collection
+
+        if isinstance(self.col, Collection) and self._get_index() is not None:
+            self.col.load()
+
+    def add_texts(
+        self,
+        texts: Iterable[str],
+        metadatas: Optional[List[dict]] = None,
+        timeout: Optional[int] = None,
+        batch_size: int = 1000,
+        **kwargs: Any,
+    ) -> List[str]:
+        """Insert text data into Milvus.
+
+        Inserting data when the collection has not be made yet will result
+        in creating a new Collection. The data of the first entity decides
+        the schema of the new collection, the dim is extracted from the first
+        embedding and the columns are decided by the first metadata dict.
+        Metada keys will need to be present for all inserted values. At
+        the moment there is no None equivalent in Milvus.
+
+        Args:
+            texts (Iterable[str]): The texts to embed, it is assumed
+                that they all fit in memory.
+            metadatas (Optional[List[dict]]): Metadata dicts attached to each of
+                the texts. Defaults to None.
+            timeout (Optional[int]): Timeout for each batch insert. Defaults
+                to None.
+            batch_size (int, optional): Batch size to use for insertion.
+                Defaults to 1000.
+
+        Raises:
+            MilvusException: Failure to add texts
+
+        Returns:
+            List[str]: The resulting keys for each inserted element.
+        """
+        from pymilvus import Collection, MilvusException
+
+        texts = list(texts)
+
+        try:
+            embeddings = self.embedding_func.embed_documents(texts)
+        except NotImplementedError:
+            embeddings = [self.embedding_func.embed_query(x) for x in texts]
+
+        if len(embeddings) == 0:
+            logger.debug("Nothing to insert, skipping.")
+            return []
+
+        # If the collection hasn't been initialized yet, perform all steps to do so
+        if not isinstance(self.col, Collection):
+            self._init(embeddings, metadatas)
+
+        # Dict to hold all insert columns
+        insert_dict: dict[str, list] = {
+            self._text_field: texts,
+            self._vector_field: embeddings,
+        }
+
+        # Collect the metadata into the insert dict.
+        # if metadatas is not None:
+        #     for d in metadatas:
+        #         for key, value in d.items():
+        #             if key in self.fields:
+        #                 insert_dict.setdefault(key, []).append(value)
+        if metadatas is not None:
+            for d in metadatas:
+                insert_dict.setdefault(self._metadata_field, []).append(d)
+
+        # Total insert count
+        vectors: list = insert_dict[self._vector_field]
+        total_count = len(vectors)
+
+        pks: list[str] = []
+
+        assert isinstance(self.col, Collection)
+        for i in range(0, total_count, batch_size):
+            # Grab end index
+            end = min(i + batch_size, total_count)
+            # Convert dict to list of lists batch for insertion
+            insert_list = [insert_dict[x][i:end] for x in self.fields]
+            # Insert into the collection.
+            try:
+                res: Collection
+                res = self.col.insert(insert_list, timeout=timeout, **kwargs)
+                pks.extend(res.primary_keys)
+            except MilvusException as e:
+                logger.error(
+                    "Failed to insert batch starting at entity: %s/%s", i, total_count
+                )
+                raise e
+        return pks
+
+    def similarity_search(
+        self,
+        query: str,
+        k: int = 4,
+        param: Optional[dict] = None,
+        expr: Optional[str] = None,
+        timeout: Optional[int] = None,
+        **kwargs: Any,
+    ) -> List[Document]:
+        """Perform a similarity search against the query string.
+
+        Args:
+            query (str): The text to search.
+            k (int, optional): How many results to return. Defaults to 4.
+            param (dict, optional): The search params for the index type.
+                Defaults to None.
+            expr (str, optional): Filtering expression. Defaults to None.
+            timeout (int, optional): How long to wait before timeout error.
+                Defaults to None.
+            kwargs: Collection.search() keyword arguments.
+
+        Returns:
+            List[Document]: Document results for search.
+        """
+        if self.col is None:
+            logger.debug("No existing collection to search.")
+            return []
+        res = self.similarity_search_with_score(
+            query=query, k=k, param=param, expr=expr, timeout=timeout, **kwargs
+        )
+        return [doc for doc, _ in res]
+
+    def similarity_search_by_vector(
+        self,
+        embedding: List[float],
+        k: int = 4,
+        param: Optional[dict] = None,
+        expr: Optional[str] = None,
+        timeout: Optional[int] = None,
+        **kwargs: Any,
+    ) -> List[Document]:
+        """Perform a similarity search against the query string.
+
+        Args:
+            embedding (List[float]): The embedding vector to search.
+            k (int, optional): How many results to return. Defaults to 4.
+            param (dict, optional): The search params for the index type.
+                Defaults to None.
+            expr (str, optional): Filtering expression. Defaults to None.
+            timeout (int, optional): How long to wait before timeout error.
+                Defaults to None.
+            kwargs: Collection.search() keyword arguments.
+
+        Returns:
+            List[Document]: Document results for search.
+        """
+        if self.col is None:
+            logger.debug("No existing collection to search.")
+            return []
+        res = self.similarity_search_with_score_by_vector(
+            embedding=embedding, k=k, param=param, expr=expr, timeout=timeout, **kwargs
+        )
+        return [doc for doc, _ in res]
+
+    def similarity_search_with_score(
+        self,
+        query: str,
+        k: int = 4,
+        param: Optional[dict] = None,
+        expr: Optional[str] = None,
+        timeout: Optional[int] = None,
+        **kwargs: Any,
+    ) -> List[Tuple[Document, float]]:
+        """Perform a search on a query string and return results with score.
+
+        For more information about the search parameters, take a look at the pymilvus
+        documentation found here:
+        https://milvus.io/api-reference/pymilvus/v2.2.6/Collection/search().md
+
+        Args:
+            query (str): The text being searched.
+            k (int, optional): The amount of results to return. Defaults to 4.
+            param (dict): The search params for the specified index.
+                Defaults to None.
+            expr (str, optional): Filtering expression. Defaults to None.
+            timeout (int, optional): How long to wait before timeout error.
+                Defaults to None.
+            kwargs: Collection.search() keyword arguments.
+
+        Returns:
+            List[float], List[Tuple[Document, any, any]]:
+        """
+        if self.col is None:
+            logger.debug("No existing collection to search.")
+            return []
+
+        # Embed the query text.
+        embedding = self.embedding_func.embed_query(query)
+
+        res = self.similarity_search_with_score_by_vector(
+            embedding=embedding, k=k, param=param, expr=expr, timeout=timeout, **kwargs
+        )
+        return res
+
+    def _similarity_search_with_relevance_scores(
+        self,
+        query: str,
+        k: int = 4,
+        **kwargs: Any,
+    ) -> List[Tuple[Document, float]]:
+        """Return docs and relevance scores in the range [0, 1].
+
+        0 is dissimilar, 1 is most similar.
+
+        Args:
+            query: input text
+            k: Number of Documents to return. Defaults to 4.
+            **kwargs: kwargs to be passed to similarity search. Should include:
+                score_threshold: Optional, a floating point value between 0 to 1 to
+                    filter the resulting set of retrieved docs
+
+        Returns:
+            List of Tuples of (doc, similarity_score)
+        """
+        return self.similarity_search_with_score(query, k, **kwargs)
+
+    def similarity_search_with_score_by_vector(
+        self,
+        embedding: List[float],
+        k: int = 4,
+        param: Optional[dict] = None,
+        expr: Optional[str] = None,
+        timeout: Optional[int] = None,
+        **kwargs: Any,
+    ) -> List[Tuple[Document, float]]:
+        """Perform a search on a query string and return results with score.
+
+        For more information about the search parameters, take a look at the pymilvus
+        documentation found here:
+        https://milvus.io/api-reference/pymilvus/v2.2.6/Collection/search().md
+
+        Args:
+            embedding (List[float]): The embedding vector being searched.
+            k (int, optional): The amount of results to return. Defaults to 4.
+            param (dict): The search params for the specified index.
+                Defaults to None.
+            expr (str, optional): Filtering expression. Defaults to None.
+            timeout (int, optional): How long to wait before timeout error.
+                Defaults to None.
+            kwargs: Collection.search() keyword arguments.
+
+        Returns:
+            List[Tuple[Document, float]]: Result doc and score.
+        """
+        if self.col is None:
+            logger.debug("No existing collection to search.")
+            return []
+
+        if param is None:
+            param = self.search_params
+
+        # Determine result metadata fields.
+        output_fields = self.fields[:]
+        output_fields.remove(self._vector_field)
+
+        # Perform the search.
+        res = self.col.search(
+            data=[embedding],
+            anns_field=self._vector_field,
+            param=param,
+            limit=k,
+            expr=expr,
+            output_fields=output_fields,
+            timeout=timeout,
+            **kwargs,
+        )
+        # Organize results.
+        ret = []
+        for result in res[0]:
+            meta = {x: result.entity.get(x) for x in output_fields}
+            doc = Document(page_content=meta.pop(self._text_field), metadata=meta.get('metadata'))
+            pair = (doc, result.score)
+            ret.append(pair)
+
+        return ret
+
+    def max_marginal_relevance_search(
+        self,
+        query: str,
+        k: int = 4,
+        fetch_k: int = 20,
+        lambda_mult: float = 0.5,
+        param: Optional[dict] = None,
+        expr: Optional[str] = None,
+        timeout: Optional[int] = None,
+        **kwargs: Any,
+    ) -> List[Document]:
+        """Perform a search and return results that are reordered by MMR.
+
+        Args:
+            query (str): The text being searched.
+            k (int, optional): How many results to give. Defaults to 4.
+            fetch_k (int, optional): Total results to select k from.
+                Defaults to 20.
+            lambda_mult: Number between 0 and 1 that determines the degree
+                        of diversity among the results with 0 corresponding
+                        to maximum diversity and 1 to minimum diversity.
+                        Defaults to 0.5
+            param (dict, optional): The search params for the specified index.
+                Defaults to None.
+            expr (str, optional): Filtering expression. Defaults to None.
+            timeout (int, optional): How long to wait before timeout error.
+                Defaults to None.
+            kwargs: Collection.search() keyword arguments.
+
+
+        Returns:
+            List[Document]: Document results for search.
+        """
+        if self.col is None:
+            logger.debug("No existing collection to search.")
+            return []
+
+        embedding = self.embedding_func.embed_query(query)
+
+        return self.max_marginal_relevance_search_by_vector(
+            embedding=embedding,
+            k=k,
+            fetch_k=fetch_k,
+            lambda_mult=lambda_mult,
+            param=param,
+            expr=expr,
+            timeout=timeout,
+            **kwargs,
+        )
+
+    def max_marginal_relevance_search_by_vector(
+        self,
+        embedding: list[float],
+        k: int = 4,
+        fetch_k: int = 20,
+        lambda_mult: float = 0.5,
+        param: Optional[dict] = None,
+        expr: Optional[str] = None,
+        timeout: Optional[int] = None,
+        **kwargs: Any,
+    ) -> List[Document]:
+        """Perform a search and return results that are reordered by MMR.
+
+        Args:
+            embedding (str): The embedding vector being searched.
+            k (int, optional): How many results to give. Defaults to 4.
+            fetch_k (int, optional): Total results to select k from.
+                Defaults to 20.
+            lambda_mult: Number between 0 and 1 that determines the degree
+                        of diversity among the results with 0 corresponding
+                        to maximum diversity and 1 to minimum diversity.
+                        Defaults to 0.5
+            param (dict, optional): The search params for the specified index.
+                Defaults to None.
+            expr (str, optional): Filtering expression. Defaults to None.
+            timeout (int, optional): How long to wait before timeout error.
+                Defaults to None.
+            kwargs: Collection.search() keyword arguments.
+
+        Returns:
+            List[Document]: Document results for search.
+        """
+        if self.col is None:
+            logger.debug("No existing collection to search.")
+            return []
+
+        if param is None:
+            param = self.search_params
+
+        # Determine result metadata fields.
+        output_fields = self.fields[:]
+        output_fields.remove(self._vector_field)
+
+        # Perform the search.
+        res = self.col.search(
+            data=[embedding],
+            anns_field=self._vector_field,
+            param=param,
+            limit=fetch_k,
+            expr=expr,
+            output_fields=output_fields,
+            timeout=timeout,
+            **kwargs,
+        )
+        # Organize results.
+        ids = []
+        documents = []
+        scores = []
+        for result in res[0]:
+            meta = {x: result.entity.get(x) for x in output_fields}
+            doc = Document(page_content=meta.pop(self._text_field), metadata=meta)
+            documents.append(doc)
+            scores.append(result.score)
+            ids.append(result.id)
+
+        vectors = self.col.query(
+            expr=f"{self._primary_field} in {ids}",
+            output_fields=[self._primary_field, self._vector_field],
+            timeout=timeout,
+        )
+        # Reorganize the results from query to match search order.
+        vectors = {x[self._primary_field]: x[self._vector_field] for x in vectors}
+
+        ordered_result_embeddings = [vectors[x] for x in ids]
+
+        # Get the new order of results.
+        new_ordering = maximal_marginal_relevance(
+            np.array(embedding), ordered_result_embeddings, k=k, lambda_mult=lambda_mult
+        )
+
+        # Reorder the values and return.
+        ret = []
+        for x in new_ordering:
+            # Function can return -1 index
+            if x == -1:
+                break
+            else:
+                ret.append(documents[x])
+        return ret
+
+    @classmethod
+    def from_texts(
+        cls,
+        texts: List[str],
+        embedding: Embeddings,
+        metadatas: Optional[List[dict]] = None,
+        collection_name: str = "LangChainCollection",
+        connection_args: dict[str, Any] = DEFAULT_MILVUS_CONNECTION,
+        consistency_level: str = "Session",
+        index_params: Optional[dict] = None,
+        search_params: Optional[dict] = None,
+        drop_old: bool = False,
+        batch_size: int = 100,
+        ids: Optional[Sequence[str]] = None,
+        **kwargs: Any,
+    ) -> Milvus:
+        """Create a Milvus collection, indexes it with HNSW, and insert data.
+
+        Args:
+            texts (List[str]): Text data.
+            embedding (Embeddings): Embedding function.
+            metadatas (Optional[List[dict]]): Metadata for each text if it exists.
+                Defaults to None.
+            collection_name (str, optional): Collection name to use. Defaults to
+                "LangChainCollection".
+            connection_args (dict[str, Any], optional): Connection args to use. Defaults
+                to DEFAULT_MILVUS_CONNECTION.
+            consistency_level (str, optional): Which consistency level to use. Defaults
+                to "Session".
+            index_params (Optional[dict], optional): Which index_params to use. Defaults
+                to None.
+            search_params (Optional[dict], optional): Which search params to use.
+                Defaults to None.
+            drop_old (Optional[bool], optional): Whether to drop the collection with
+                that name if it exists. Defaults to False.
+            batch_size:
+                How many vectors upload per-request.
+                Default: 100
+            ids: Optional[Sequence[str]] = None,
+
+        Returns:
+            Milvus: Milvus Vector Store
+        """
+        vector_db = cls(
+            embedding_function=embedding,
+            collection_name=collection_name,
+            connection_args=connection_args,
+            consistency_level=consistency_level,
+            index_params=index_params,
+            search_params=search_params,
+            drop_old=drop_old,
+            **kwargs,
+        )
+        vector_db.add_texts(texts=texts, metadatas=metadatas, batch_size=batch_size)
+        return vector_db
--- a/api/core/index/vector_index/milvus_vector_index.py
+++ b/api/core/index/vector_index/milvus_vector_index.py
@@ -9,30 +9,44 @@ from core.index.base import BaseIndex
 from core.index.vector_index.base import BaseVectorIndex
 from core.vector_store.milvus_vector_store import MilvusVectorStore
 from core.vector_store.weaviate_vector_store import WeaviateVectorStore
-from models.dataset import Dataset
+from extensions.ext_database import db
+from models.dataset import Dataset, DatasetCollectionBinding


 class MilvusConfig(BaseModel):
-    endpoint: str
+    host: str
+    port: int
    user: str
    password: str
+    secure: bool = False
    batch_size: int = 100

    @root_validator()
    def validate_config(cls, values: dict) -> dict:
-        if not values['endpoint']:
-            raise ValueError("config MILVUS_ENDPOINT is required")
+        if not values['host']:
+            raise ValueError("config MILVUS_HOST is required")
+        if not values['port']:
+            raise ValueError("config MILVUS_PORT is required")
        if not values['user']:
            raise ValueError("config MILVUS_USER is required")
        if not values['password']:
            raise ValueError("config MILVUS_PASSWORD is required")
        return values

+    def to_milvus_params(self):
+        return {
+            'host': self.host,
+            'port': self.port,
+            'user': self.user,
+            'password': self.password,
+            'secure': self.secure
+        }
+

 class MilvusVectorIndex(BaseVectorIndex):
    def __init__(self, dataset: Dataset, config: MilvusConfig, embeddings: Embeddings):
        super().__init__(dataset, embeddings)
-        self._client = self._init_client(config)
+        self._client_config = config

    def get_type(self) -> str:
        return 'milvus'
@@ -49,7 +63,6 @@ class MilvusVectorIndex(BaseVectorIndex):
        dataset_id = dataset.id
        return "Vector_index_" + dataset_id.replace("-", "_") + '_Node'

-
    def to_index_struct(self) -> dict:
        return {
            "type": self.get_type(),
@@ -58,26 +71,29 @@ class MilvusVectorIndex(BaseVectorIndex):

    def create(self, texts: list[Document], **kwargs) -> BaseIndex:
        uuids = self._get_uuids(texts)
-        self._vector_store = WeaviateVectorStore.from_documents(
+        index_params = {
+            'metric_type': 'IP',
+            'index_type': "HNSW",
+            'params':  {"M": 8, "efConstruction": 64}
+        }
+        self._vector_store = MilvusVectorStore.from_documents(
            texts,
            self._embeddings,
-            client=self._client,
-            index_name=self.get_index_name(self.dataset),
-            uuids=uuids,
-            by_text=False
+            collection_name=self.get_index_name(self.dataset),
+            connection_args=self._client_config.to_milvus_params(),
+            index_params=index_params
        )

        return self

    def create_with_collection_name(self, texts: list[Document], collection_name: str, **kwargs) -> BaseIndex:
        uuids = self._get_uuids(texts)
-        self._vector_store = WeaviateVectorStore.from_documents(
+        self._vector_store = MilvusVectorStore.from_documents(
            texts,
            self._embeddings,
-            client=self._client,
-            index_name=collection_name,
-            uuids=uuids,
-            by_text=False
+            collection_name=collection_name,
+            ids=uuids,
+            content_payload_key='page_content'
        )

        return self
@@ -86,42 +102,53 @@ class MilvusVectorIndex(BaseVectorIndex):
        """Only for created index."""
        if self._vector_store:
            return self._vector_store
-
        attributes = ['doc_id', 'dataset_id', 'document_id']
-        if self._is_origin():
-            attributes = ['doc_id']

-        return WeaviateVectorStore(
-            client=self._client,
-            index_name=self.get_index_name(self.dataset),
-            text_key='text',
-            embedding=self._embeddings,
-            attributes=attributes,
-            by_text=False
+        return MilvusVectorStore(
+            collection_name=self.get_index_name(self.dataset),
+            embedding_function=self._embeddings,
+            connection_args=self._client_config.to_milvus_params()
        )

    def _get_vector_store_class(self) -> type:
        return MilvusVectorStore

    def delete_by_document_id(self, document_id: str):
-        if self._is_origin():
-            self.recreate_dataset(self.dataset)
-            return
+
+        vector_store = self._get_vector_store()
+        vector_store = cast(self._get_vector_store_class(), vector_store)
+        ids = vector_store.get_ids_by_document_id(document_id)
+        if ids:
+            vector_store.del_texts({
+                'filter': f'id in {ids}'
+            })
+
+    def delete_by_ids(self, doc_ids: list[str]) -> None:
+
+        vector_store = self._get_vector_store()
+        vector_store = cast(self._get_vector_store_class(), vector_store)
+        ids = vector_store.get_ids_by_doc_ids(doc_ids)
+        vector_store.del_texts({
+            'filter': f' id in {ids}'
+        })
+
+    def delete_by_group_id(self, group_id: str) -> None:

        vector_store = self._get_vector_store()
        vector_store = cast(self._get_vector_store_class(), vector_store)

-        vector_store.del_texts({
-            "operator": "Equal",
-            "path": ["document_id"],
-            "valueText": document_id
-        })
+        vector_store.delete()

-    def _is_origin(self):
-        if self.dataset.index_struct_dict:
-            class_prefix: str = self.dataset.index_struct_dict['vector_store']['class_prefix']
-            if not class_prefix.endswith('_Node'):
-                # original class_prefix
-                return True
+    def delete(self) -> None:
+        vector_store = self._get_vector_store()
+        vector_store = cast(self._get_vector_store_class(), vector_store)

-        return False
+        from qdrant_client.http import models
+        vector_store.del_texts(models.Filter(
+            must=[
+                models.FieldCondition(
+                    key="group_id",
+                    match=models.MatchValue(value=self.dataset.id),
+                ),
+            ],
+        ))
--- a/api/core/index/vector_index/vector_index.py
+++ b/api/core/index/vector_index/vector_index.py
@@ -47,6 +47,20 @@ class VectorIndex:
                ),
                embeddings=embeddings
            )
+        elif vector_type == "milvus":
+            from core.index.vector_index.milvus_vector_index import MilvusVectorIndex, MilvusConfig
+
+            return MilvusVectorIndex(
+                dataset=dataset,
+                config=MilvusConfig(
+                    host=config.get('MILVUS_HOST'),
+                    port=config.get('MILVUS_PORT'),
+                    user=config.get('MILVUS_USER'),
+                    password=config.get('MILVUS_PASSWORD'),
+                    secure=config.get('MILVUS_SECURE'),
+                ),
+                embeddings=embeddings
+            )
        else:
            raise ValueError(f"Vector store {config.get('VECTOR_STORE')} is not supported.")

--- a/api/core/third_party/langchain/llms/openllm.py
+++ b/api/core/third_party/langchain/llms/openllm.py
@@ -66,6 +66,7 @@ class OpenLLM(LLM):

        json_response = response.json()
        completion = json_response["responses"][0]
+        completion = completion.lstrip(prompt)

        if stop is not None:
            completion = enforce_stop_tokens(completion, stop)
--- a/api/core/vector_store/milvus_vector_store.py
+++ b/api/core/vector_store/milvus_vector_store.py
@@ -1,4 +1,4 @@
-from langchain.vectorstores import  Milvus
+from core.index.vector_index.milvus import Milvus


 class MilvusVectorStore(Milvus):
@@ -6,33 +6,41 @@ class MilvusVectorStore(Milvus):
        if not where_filter:
            raise ValueError('where_filter must not be empty')

-        self._client.batch.delete_objects(
-            class_name=self._index_name,
-            where=where_filter,
-            output='minimal'
-        )
+        self.col.delete(where_filter.get('filter'))

    def del_text(self, uuid: str) -> None:
-        self._client.data_object.delete(
-            uuid,
-            class_name=self._index_name
-        )
+        expr = f"id == {uuid}"
+        self.col.delete(expr)

    def text_exists(self, uuid: str) -> bool:
-        result = self._client.query.get(self._index_name).with_additional(["id"]).with_where({
-            "path": ["doc_id"],
-            "operator": "Equal",
-            "valueText": uuid,
-        }).with_limit(1).do()
+        result = self.col.query(
+            expr=f'metadata["doc_id"] == "{uuid}"',
+            output_fields=["id"]
+        )

-        if "errors" in result:
-            raise ValueError(f"Error during query: {result['errors']}")
+        return len(result) > 0

-        entries = result["data"]["Get"][self._index_name]
-        if len(entries) == 0:
-            return False
+    def get_ids_by_document_id(self, document_id: str):
+        result = self.col.query(
+            expr=f'metadata["document_id"] == "{document_id}"',
+            output_fields=["id"]
+        )
+        if result:
+            return [item["id"] for item in result]
+        else:
+            return None

-        return True
+    def get_ids_by_doc_ids(self, doc_ids: list):
+        result = self.col.query(
+            expr=f'metadata["doc_id"] in {doc_ids}',
+            output_fields=["id"]
+        )
+        if result:
+            return [item["id"] for item in result]
+        else:
+            return None

    def delete(self):
-        self._client.schema.delete_class(self._index_name)
+        from pymilvus import utility
+        utility.drop_collection(self.collection_name, None, self.alias)
+
--- a/api/requirements.txt
+++ b/api/requirements.txt
@@ -52,4 +52,5 @@ pandas==1.5.3
 xinference==0.5.2
 safetensors==0.3.2
 zhipuai==1.0.7
-werkzeug==2.3.7
+werkzeug==2.3.7
+pymilvus==2.3.0
--- a/api/services/dataset_service.py
+++ b/api/services/dataset_service.py
@@ -1091,6 +1091,8 @@ class SegmentService:
                    segment.answer = args['answer']
                if args['keywords']:
                    segment.keywords = args['keywords']
+                if args['enabled'] is not None:
+                    segment.enabled = args['enabled']
                db.session.add(segment)
                db.session.commit()
                # update segment index task
--- a/docker/docker-compose.yaml
+++ b/docker/docker-compose.yaml
@@ -2,7 +2,7 @@ version: '3.1'
 services:
  # API service
  api:
-    image: langgenius/dify-api:0.3.25
+    image: langgenius/dify-api:0.3.26
    restart: always
    environment:
      # Startup mode, 'api' starts the API server.
@@ -78,7 +78,7 @@ services:
      S3_ACCESS_KEY: 'ak-difyai'
      S3_SECRET_KEY: 'sk-difyai'
      S3_REGION: 'us-east-1'
-      # The type of vector store to use. Supported values are `weaviate`, `qdrant`.
+      # The type of vector store to use. Supported values are `weaviate`, `qdrant`, `milvus`.
      VECTOR_STORE: weaviate
      # The Weaviate endpoint URL. Only available when VECTOR_STORE is `weaviate`.
      WEAVIATE_ENDPOINT: http://weaviate:8080
@@ -88,6 +88,17 @@ services:
      QDRANT_URL: http://qdrant:6333
      # The Qdrant API key.
      QDRANT_API_KEY: difyai123456
+      # Milvus configuration Only available when VECTOR_STORE is `milvus`.
+      # The milvus host.
+      MILVUS_HOST: 127.0.0.1
+      # The milvus host.
+      MILVUS_PORT: 19530
+      # The milvus username.
+      MILVUS_USER: root
+      # The milvus password.
+      MILVUS_PASSWORD: Milvus
+      # The milvus tls switch.
+      MILVUS_SECURE: false
      # Mail configuration, support: resend
      MAIL_TYPE: ''
      # default send from email address, if not specified
@@ -113,7 +124,7 @@ services:
  # worker service
  # The Celery worker for processing the queue.
  worker:
-    image: langgenius/dify-api:0.3.25
+    image: langgenius/dify-api:0.3.26
    restart: always
    environment:
      # Startup mode, 'worker' starts the Celery worker for processing the queue.
@@ -145,7 +156,7 @@ services:
      # The type of storage to use for storing user files. Supported values are `local` and `s3`, Default: `local`
      STORAGE_TYPE: local
      STORAGE_LOCAL_PATH: storage
-      # The type of vector store to use. Supported values are `weaviate`, `qdrant`.
+      # The type of vector store to use. Supported values are `weaviate`, `qdrant`, `milvus`.
      VECTOR_STORE: weaviate
      # The Weaviate endpoint URL. Only available when VECTOR_STORE is `weaviate`.
      WEAVIATE_ENDPOINT: http://weaviate:8080
@@ -155,6 +166,17 @@ services:
      QDRANT_URL: http://qdrant:6333
      # The Qdrant API key.
      QDRANT_API_KEY: difyai123456
+      # Milvus configuration Only available when VECTOR_STORE is `milvus`.
+      # The milvus host.
+      MILVUS_HOST: 127.0.0.1
+      # The milvus host.
+      MILVUS_PORT: 19530
+      # The milvus username.
+      MILVUS_USER: root
+      # The milvus password.
+      MILVUS_PASSWORD: Milvus
+      # The milvus tls switch.
+      MILVUS_SECURE: false
      # Mail configuration, support: resend
      MAIL_TYPE: ''
      # default send from email address, if not specified
@@ -170,7 +192,7 @@ services:

  # Frontend web application.
  web:
-    image: langgenius/dify-web:0.3.25
+    image: langgenius/dify-web:0.3.26
    restart: always
    environment:
      EDITION: SELF_HOSTED
--- a/docker/milvus-standalone-docker-compose.yml
+++ b/docker/milvus-standalone-docker-compose.yml
@@ -0,0 +1,64 @@
+version: '3.5'
+
+services:
+  etcd:
+    container_name: milvus-etcd
+    image: quay.io/coreos/etcd:v3.5.5
+    environment:
+      - ETCD_AUTO_COMPACTION_MODE=revision
+      - ETCD_AUTO_COMPACTION_RETENTION=1000
+      - ETCD_QUOTA_BACKEND_BYTES=4294967296
+      - ETCD_SNAPSHOT_COUNT=50000
+    volumes:
+      - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/etcd:/etcd
+    command: etcd -advertise-client-urls=http://127.0.0.1:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd
+    healthcheck:
+      test: ["CMD", "etcdctl", "endpoint", "health"]
+      interval: 30s
+      timeout: 20s
+      retries: 3
+
+  minio:
+    container_name: milvus-minio
+    image: minio/minio:RELEASE.2023-03-20T20-16-18Z
+    environment:
+      MINIO_ACCESS_KEY: minioadmin
+      MINIO_SECRET_KEY: minioadmin
+    ports:
+      - "9001:9001"
+      - "9000:9000"
+    volumes:
+      - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/minio:/minio_data
+    command: minio server /minio_data --console-address ":9001"
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
+      interval: 30s
+      timeout: 20s
+      retries: 3
+
+  standalone:
+    container_name: milvus-standalone
+    image: milvusdb/milvus:v2.3.1
+    command: ["milvus", "run", "standalone"]
+    environment:
+      ETCD_ENDPOINTS: etcd:2379
+      MINIO_ADDRESS: minio:9000
+      common.security.authorizationEnabled: true
+    volumes:
+      - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/milvus:/var/lib/milvus
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:9091/healthz"]
+      interval: 30s
+      start_period: 90s
+      timeout: 20s
+      retries: 3
+    ports:
+      - "19530:19530"
+      - "9091:9091"
+    depends_on:
+      - "etcd"
+      - "minio"
+
+networks:
+  default:
+    name: milvus
--- a/web/app/(commonLayout)/datasets/template/template.en.mdx
+++ b/web/app/(commonLayout)/datasets/template/template.en.mdx
@@ -89,7 +89,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
      targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets?page=1&limit=20' \\\n--header 'Authorization: Bearer {api_key}'`}
    >
    ```bash {{ title: 'cURL' }}
-    curl --location --request GET 'https://api.dify.ai/v1/datasets?page=1&limit=20' \
+    curl --location --request GET '${props.apiBaseUrl}/datasets?page=1&limit=20' \
    --header 'Authorization: Bearer {api_key}'
    ```
    </CodeGroup>
@@ -162,7 +162,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
          - <code>rules</code> (object) Custom rules (in automatic mode, this field is empty)
            - <code>pre_processing_rules</code> (array[object]) Preprocessing rules
              - <code>id</code> (string) Unique identifier for the preprocessing rule
-                - enumerate 
+                - enumerate
                  - <code>remove_extra_spaces</code> Replace consecutive spaces, newlines, tabs
                  - <code>remove_urls_emails</code> Delete URL, email address
              - <code>enabled</code> (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value.
@@ -173,14 +173,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
    </Properties>
  </Col>
  <Col sticky>
-    <CodeGroup 
-      title="Request" 
-      tag="POST" 
+    <CodeGroup
+      title="Request"
+      tag="POST"
      label="/datasets/{dataset_id}/document/create_by_text"
      targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "text","text": "text","indexing_technique": "high_quality","process_rule": {"mode": "automatic"}}'`}
    >
    ```bash {{ title: 'cURL' }}
-    curl --location --request POST 'https://api.dify.ai/v1/datasets/{dataset_id}/document/create_by_text' \
+    curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_text' \
    --header 'Authorization: Bearer {api_key}' \
    --header 'Content-Type: application/json' \
    --data-raw '{
@@ -269,7 +269,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
          - <code>rules</code> (object) Custom rules (in automatic mode, this field is empty)
            - <code>pre_processing_rules</code> (array[object]) Preprocessing rules
              - <code>id</code> (string) Unique identifier for the preprocessing rule
-                - enumerate 
+                - enumerate
                  - <code>remove_extra_spaces</code> Replace consecutive spaces, newlines, tabs
                  - <code>remove_urls_emails</code> Delete URL, email address
              - <code>enabled</code> (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value.
@@ -280,14 +280,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
    </Properties>
  </Col>
  <Col sticky>
-    <CodeGroup 
-      title="Request" 
-      tag="POST" 
+    <CodeGroup
+      title="Request"
+      tag="POST"
      label="/datasets/{dataset_id}/document/create_by_file"
      targetCode={`curl --location POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"name":"Dify","indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
    >
    ```bash {{ title: 'cURL' }}
-    curl --location POST 'https://api.dify.ai/v1/datasets/{dataset_id}/document/create_by_file' \
+    curl --location POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_file' \
    --header 'Authorization: Bearer {api_key}' \
    --form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \
    --form 'file=@"/path/to/file"'
@@ -363,7 +363,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
          - <code>rules</code> (object) Custom rules (in automatic mode, this field is empty)
            - <code>pre_processing_rules</code> (array[object]) Preprocessing rules
              - <code>id</code> (string) Unique identifier for the preprocessing rule
-                - enumerate 
+                - enumerate
                  - <code>remove_extra_spaces</code> Replace consecutive spaces, newlines, tabs
                  - <code>remove_urls_emails</code> Delete URL, email address
              - <code>enabled</code> (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value.
@@ -374,14 +374,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
    </Properties>
  </Col>
  <Col sticky>
-    <CodeGroup 
-      title="Request" 
-      tag="POST" 
+    <CodeGroup
+      title="Request"
+      tag="POST"
      label="/datasets/{dataset_id}/documents/{document_id}/update_by_text"
      targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "name","text": "text"}'`}
    >
    ```bash {{ title: 'cURL' }}
-    curl --location --request POST 'https://api.dify.ai/v1/datasets/{dataset_id}/documents/{document_id}/update_by_text' \
+    curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_text' \
    --header 'Authorization: Bearer {api_key}' \
    --header 'Content-Type: application/json' \
    --data-raw '{
@@ -460,7 +460,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
          - <code>rules</code> (object) Custom rules (in automatic mode, this field is empty)
            - <code>pre_processing_rules</code> (array[object]) Preprocessing rules
              - <code>id</code> (string) Unique identifier for the preprocessing rule
-                - enumerate 
+                - enumerate
                  - <code>remove_extra_spaces</code> Replace consecutive spaces, newlines, tabs
                  - <code>remove_urls_emails</code> Delete URL, email address
              - <code>enabled</code> (bool) Whether to select this rule or not. If no document ID is passed in, it represents the default value.
@@ -471,14 +471,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
    </Properties>
  </Col>
  <Col sticky>
-    <CodeGroup 
-      title="Request" 
-      tag="POST" 
+    <CodeGroup
+      title="Request"
+      tag="POST"
      label="/datasets/{dataset_id}/documents/{document_id}/update_by_file"
      targetCode={`curl --location POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/{document_id}/create_by_file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"name":"Dify","indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
    >
    ```bash {{ title: 'cURL' }}
-    curl --location POST 'https://api.dify.ai/v1/datasets/{dataset_id}/document/{document_id}/create_by_file' \
+    curl --location POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/{document_id}/create_by_file' \
    --header 'Authorization: Bearer {api_key}' \
    --form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \
    --form 'file=@"/path/to/file"'
@@ -539,14 +539,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
    </Properties>
  </Col>
  <Col sticky>
-    <CodeGroup 
-      title="Request" 
-      tag="GET" 
+    <CodeGroup
+      title="Request"
+      tag="GET"
      label="/datasets/{dataset_id}/batch/{batch}/indexing-status"
      targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{batch}/indexing-status' \\\n--header 'Authorization: Bearer {api_key}'`}
    >
    ```bash {{ title: 'cURL' }}
-    curl --location --request GET 'https://api.dify.ai/v1/datasets/{dataset_id}/documents/{batch}/indexing-status' \
+    curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{batch}/indexing-status' \
    --header 'Authorization: Bearer {api_key}' \
    ```
    </CodeGroup>
@@ -555,7 +555,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
    {
      "data":[{
        "id": "",
-        "indexing_status": "indexing", 
+        "indexing_status": "indexing",
        "processing_started_at": 1681623462.0,
        "parsing_completed_at": 1681623462.0,
        "cleaning_completed_at": 1681623462.0,
@@ -594,14 +594,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
    </Properties>
  </Col>
  <Col sticky>
-    <CodeGroup 
-      title="Request" 
-      tag="DELETE" 
+    <CodeGroup
+      title="Request"
+      tag="DELETE"
      label="/datasets/{dataset_id}/documents/{document_id}"
      targetCode={`curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}' \\\n--header 'Authorization: Bearer {api_key}'`}
    >
    ```bash {{ title: 'cURL' }}
-    curl --location --request DELETE 'https://api.dify.ai/v1/datasets/{dataset_id}/documents/{document_id}' \
+    curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}' \
    --header 'Authorization: Bearer {api_key}' \
    ```
    </CodeGroup>
@@ -646,14 +646,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
    </Properties>
  </Col>
  <Col sticky>
-    <CodeGroup 
-      title="Request" 
-      tag="GET" 
+    <CodeGroup
+      title="Request"
+      tag="GET"
      label="/datasets/{dataset_id}/documents"
      targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents' \\\n--header 'Authorization: Bearer {api_key}'`}
    >
    ```bash {{ title: 'cURL' }}
-    curl --location --request GET 'https://api.dify.ai/v1/datasets/{dataset_id}/documents' \
+    curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents' \
    --header 'Authorization: Bearer {api_key}' \
    ```
    </CodeGroup>
@@ -720,14 +720,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
    </Properties>
  </Col>
  <Col sticky>
-    <CodeGroup 
-      title="Request" 
-      tag="POST" 
+    <CodeGroup
+      title="Request"
+      tag="POST"
      label="/datasets/{dataset_id}/documents/{document_id}/segments"
      targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"segments": [{"content": "1","answer": "1","keywords": ["a"]}]}'`}
    >
    ```bash {{ title: 'cURL' }}
-    curl --location --request POST 'https://api.dify.ai/v1/datasets/{dataset_id}/documents/{document_id}/segments' \
+    curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments' \
    --header 'Authorization: Bearer {api_key}' \
    --header 'Content-Type: application/json' \
    --data-raw '{
@@ -778,6 +778,212 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from

 ---

+<Heading
+  url='/datasets/{dataset_id}/documents/{document_id}/segments'
+  method='GET'
+  title='get documents segments'
+  name='#get_segment'
+/>
+<Row>
+  <Col>
+    ### Path
+    <Properties>
+      <Property name='dataset_id' type='string' key='dataset_id'>
+        Dataset ID
+      </Property>
+      <Property name='document_id' type='string' key='document_id'>
+        Document ID
+      </Property>
+    </Properties>
+
+     ### Query
+    <Properties>
+      <Property name='keyword' type='string' key='keyword'>
+        keyword，choosable
+      </Property>
+      <Property name='status' type='string' key='status'>
+        Search status，completed
+      </Property>
+    </Properties>
+  </Col>
+  <Col sticky>
+    <CodeGroup
+      title="Request"
+      tag="GET"
+      label="/datasets/{dataset_id}/documents/{document_id}/segments"
+      targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'`}
+    >
+    ```bash {{ title: 'cURL' }}
+    curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments' \
+    --header 'Authorization: Bearer {api_key}' \
+    --header 'Content-Type: application/json'
+    ```
+    </CodeGroup>
+    <CodeGroup title="Response">
+    ```json {{ title: 'Response' }}
+    {
+      "data": [{
+        "id": "",
+        "position": 1,
+        "document_id": "",
+        "content": "1",
+        "answer": "1",
+        "word_count": 25,
+        "tokens": 0,
+        "keywords": [
+            "a"
+        ],
+        "index_node_id": "",
+        "index_node_hash": "",
+        "hit_count": 0,
+        "enabled": true,
+        "disabled_at": null,
+        "disabled_by": null,
+        "status": "completed",
+        "created_by": "",
+        "created_at": 1695312007,
+        "indexing_at": 1695312007,
+        "completed_at": 1695312007,
+        "error": null,
+        "stopped_at": null
+      }],
+      "doc_form": "text_model"
+    }
+    ```
+    </CodeGroup>
+  </Col>
+</Row>
+
+---
+
+<Heading
+  url='/datasets/{dataset_id}/segments/{segment_id}'
+  method='DELETE'
+  title='delete document segment'
+  name='#delete_segment'
+/>
+<Row>
+  <Col>
+    ### Path
+    <Properties>
+      <Property name='dataset_id' type='string' key='dataset_id'>
+        Dataset ID
+      </Property>
+      <Property name='segment_id' type='string' key='segment_id'>
+        Document Segment ID
+      </Property>
+    </Properties>
+  </Col>
+  <Col sticky>
+    <CodeGroup
+      title="Request"
+      tag="DELETE"
+      label="/datasets/{dataset_id}/segments/{segment_id}"
+      targetCode={`curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/segments/{segment_id}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'`}
+    >
+    ```bash {{ title: 'cURL' }}
+    curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/segments/{segment_id}' \
+    --header 'Authorization: Bearer {api_key}' \
+    --header 'Content-Type: application/json'
+    ```
+    </CodeGroup>
+    <CodeGroup title="Response">
+    ```json {{ title: 'Response' }}
+    {
+      "result": "success"
+    }
+    ```
+    </CodeGroup>
+  </Col>
+</Row>
+
+---
+
+<Heading
+  url='/datasets/{dataset_id}/segments/{segment_id}'
+  method='POST'
+  title='update document segment'
+  name='#update_segment'
+/>
+<Row>
+  <Col>
+    ### POST
+    <Properties>
+      <Property name='dataset_id' type='string' key='dataset_id'>
+        Dataset ID
+      </Property>
+      <Property name='segment_id' type='string' key='segment_id'>
+        Document Segment ID
+      </Property>
+    </Properties>
+
+    ### Request Body
+    <Properties>
+      <Property name='segments' type='object list' key='segments'>
+        - <code>content</code> (text) text content/question content，required
+        - <code>answer</code> (text) Answer content, not required, passed if the data set is in qa mode
+        - <code>keywords</code> (list) keyword, not required
+        - <code>enabled</code> (bool) false/true, not required
+      </Property>
+    </Properties>
+  </Col>
+  <Col sticky>
+    <CodeGroup
+      title="Request"
+      tag="POST"
+      label="/datasets/{dataset_id}/segments/{segment_id}"
+      targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'\\\n--data-raw '{\"segments\": {\"content\": \"1\",\"answer\": \"1\", \"keywords\": [\"a\"], \"enabled\": false}}'`}
+    >
+    ```bash {{ title: 'cURL' }}
+    curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \
+    --header 'Content-Type: application/json' \
+    --data-raw '{
+      "segments": {
+          "content": "1",
+          "answer": "1",
+          "keywords": ["a"],
+          "enabled": false
+      }
+    }'
+    ```
+    </CodeGroup>
+    <CodeGroup title="Response">
+    ```json {{ title: 'Response' }}
+    {
+      "data": [{
+        "id": "",
+        "position": 1,
+        "document_id": "",
+        "content": "1",
+        "answer": "1",
+        "word_count": 25,
+        "tokens": 0,
+        "keywords": [
+            "a"
+        ],
+        "index_node_id": "",
+        "index_node_hash": "",
+        "hit_count": 0,
+        "enabled": true,
+        "disabled_at": null,
+        "disabled_by": null,
+        "status": "completed",
+        "created_by": "",
+        "created_at": 1695312007,
+        "indexing_at": 1695312007,
+        "completed_at": 1695312007,
+        "error": null,
+        "stopped_at": null
+      }],
+      "doc_form": "text_model"
+    }
+    ```
+    </CodeGroup>
+  </Col>
+</Row>
+
+---
+
 <Row>
  <Col>
    ### Error message
--- a/web/app/(commonLayout)/datasets/template/template.zh.mdx
+++ b/web/app/(commonLayout)/datasets/template/template.zh.mdx
@@ -27,7 +27,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
      targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "name"}'`}
    >
    ```bash {{ title: 'cURL' }}
-    curl --location --request POST 'https://api.dify.ai/v1/datasets' \
+    curl --location --request POST '${props.apiBaseUrl}/datasets' \
    --header 'Authorization: Bearer {api_key}' \
    --header 'Content-Type: application/json' \
    --data-raw '{
@@ -82,14 +82,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
    </Properties>
  </Col>
  <Col sticky>
-    <CodeGroup 
-      title="Request" 
-      tag="POST" 
+    <CodeGroup
+      title="Request"
+      tag="POST"
      label="/datasets"
      targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets?page=1&limit=20' \\\n--header 'Authorization: Bearer {api_key}'`}
    >
    ```bash {{ title: 'cURL' }}
-    curl --location --request GET 'https://api.dify.ai/v1/datasets?page=1&limit=20' \
+    curl --location --request GET '${props.apiBaseUrl}/datasets?page=1&limit=20' \
    --header 'Authorization: Bearer {api_key}'
    ```
    </CodeGroup>
@@ -162,7 +162,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
          - <code>rules</code> (object) 自定义规则（自动模式下，该字段为空）
            - <code>pre_processing_rules</code> (array[object]) 预处理规则
              - <code>id</code> (string) 预处理规则的唯一标识符
-                - 枚举： 
+                - 枚举：
                  - <code>remove_extra_spaces</code> 替换连续空格、换行符、制表符
                  - <code>remove_urls_emails</code> 删除 URL、电子邮件地址
              - <code>enabled</code> (bool) 是否选中该规则，不传入文档 ID 时代表默认值
@@ -173,14 +173,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
    </Properties>
  </Col>
  <Col sticky>
-    <CodeGroup 
-      title="Request" 
-      tag="POST" 
+    <CodeGroup
+      title="Request"
+      tag="POST"
      label="/datasets/{dataset_id}/document/create_by_text"
      targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "text","text": "text","indexing_technique": "high_quality","process_rule": {"mode": "automatic"}}'`}
    >
    ```bash {{ title: 'cURL' }}
-    curl --location --request POST 'https://api.dify.ai/v1/datasets/{dataset_id}/document/create_by_text' \
+    curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_text' \
    --header 'Authorization: Bearer {api_key}' \
    --header 'Content-Type: application/json' \
    --data-raw '{
@@ -269,7 +269,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
          - <code>rules</code> (object) 自定义规则（自动模式下，该字段为空）
            - <code>pre_processing_rules</code> (array[object]) 预处理规则
              - <code>id</code> (string) 预处理规则的唯一标识符
-                - 枚举： 
+                - 枚举：
                  - <code>remove_extra_spaces</code> 替换连续空格、换行符、制表符
                  - <code>remove_urls_emails</code> 删除 URL、电子邮件地址
              - <code>enabled</code> (bool) 是否选中该规则，不传入文档 ID 时代表默认值
@@ -280,14 +280,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
    </Properties>
  </Col>
  <Col sticky>
-    <CodeGroup 
-      title="Request" 
-      tag="POST" 
+    <CodeGroup
+      title="Request"
+      tag="POST"
      label="/datasets/{dataset_id}/document/create_by_file"
      targetCode={`curl --location POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"name":"Dify","indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
    >
    ```bash {{ title: 'cURL' }}
-    curl --location POST 'https://api.dify.ai/v1/datasets/{dataset_id}/document/create_by_file' \
+    curl --location POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_file' \
    --header 'Authorization: Bearer {api_key}' \
    --form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \
    --form 'file=@"/path/to/file"'
@@ -363,7 +363,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
          - <code>rules</code> (object) 自定义规则（自动模式下，该字段为空）
            - <code>pre_processing_rules</code> (array[object]) 预处理规则
              - <code>id</code> (string) 预处理规则的唯一标识符
-                - 枚举： 
+                - 枚举：
                  - <code>remove_extra_spaces</code> 替换连续空格、换行符、制表符
                  - <code>remove_urls_emails</code> 删除 URL、电子邮件地址
              - <code>enabled</code> (bool) 是否选中该规则，不传入文档 ID 时代表默认值
@@ -374,14 +374,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
    </Properties>
  </Col>
  <Col sticky>
-    <CodeGroup 
-      title="Request" 
-      tag="POST" 
+    <CodeGroup
+      title="Request"
+      tag="POST"
      label="/datasets/{dataset_id}/documents/{document_id}/update_by_text"
      targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_text' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"name": "name","text": "text"}'`}
    >
    ```bash {{ title: 'cURL' }}
-    curl --location --request POST 'https://api.dify.ai/v1/datasets/{dataset_id}/documents/{document_id}/update_by_text' \
+    curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/update_by_text' \
    --header 'Authorization: Bearer {api_key}' \
    --header 'Content-Type: application/json' \
    --data-raw '{
@@ -460,7 +460,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
          - <code>rules</code> (object) 自定义规则（自动模式下，该字段为空）
            - <code>pre_processing_rules</code> (array[object]) 预处理规则
              - <code>id</code> (string) 预处理规则的唯一标识符
-                - 枚举： 
+                - 枚举：
                  - <code>remove_extra_spaces</code> 替换连续空格、换行符、制表符
                  - <code>remove_urls_emails</code> 删除 URL、电子邮件地址
              - <code>enabled</code> (bool) 是否选中该规则，不传入文档 ID 时代表默认值
@@ -471,14 +471,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
    </Properties>
  </Col>
  <Col sticky>
-    <CodeGroup 
-      title="Request" 
-      tag="POST" 
+    <CodeGroup
+      title="Request"
+      tag="POST"
      label="/datasets/{dataset_id}/documents/{document_id}/update_by_file"
      targetCode={`curl --location POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/{document_id}/create_by_file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"name":"Dify","indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`}
    >
    ```bash {{ title: 'cURL' }}
-    curl --location POST 'https://api.dify.ai/v1/datasets/{dataset_id}/document/{document_id}/create_by_file' \
+    curl --location POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/{document_id}/create_by_file' \
    --header 'Authorization: Bearer {api_key}' \
    --form 'data="{\"name\":\"Dify\",\"indexing_technique\":\"high_quality\",\"process_rule\":{\"rules\":{\"pre_processing_rules\":[{\"id\":\"remove_extra_spaces\",\"enabled\":true},{\"id\":\"remove_urls_emails\",\"enabled\":true}],\"segmentation\":{\"separator\":\"###\",\"max_tokens\":500}},\"mode\":\"custom\"}}";type=text/plain' \
    --form 'file=@"/path/to/file"'
@@ -539,14 +539,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
    </Properties>
  </Col>
  <Col sticky>
-    <CodeGroup 
-      title="Request" 
-      tag="GET" 
+    <CodeGroup
+      title="Request"
+      tag="GET"
      label="/datasets/{dataset_id}/batch/{batch}/indexing-status"
      targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{batch}/indexing-status' \\\n--header 'Authorization: Bearer {api_key}'`}
    >
    ```bash {{ title: 'cURL' }}
-    curl --location --request GET 'https://api.dify.ai/v1/datasets/{dataset_id}/documents/{batch}/indexing-status' \
+    curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{batch}/indexing-status' \
    --header 'Authorization: Bearer {api_key}' \
    ```
    </CodeGroup>
@@ -555,7 +555,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
    {
      "data":[{
        "id": "",
-        "indexing_status": "indexing", 
+        "indexing_status": "indexing",
        "processing_started_at": 1681623462.0,
        "parsing_completed_at": 1681623462.0,
        "cleaning_completed_at": 1681623462.0,
@@ -594,14 +594,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
    </Properties>
  </Col>
  <Col sticky>
-    <CodeGroup 
-      title="Request" 
-      tag="DELETE" 
+    <CodeGroup
+      title="Request"
+      tag="DELETE"
      label="/datasets/{dataset_id}/documents/{document_id}"
      targetCode={`curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}' \\\n--header 'Authorization: Bearer {api_key}'`}
    >
    ```bash {{ title: 'cURL' }}
-    curl --location --request DELETE 'https://api.dify.ai/v1/datasets/{dataset_id}/documents/{document_id}' \
+    curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}' \
    --header 'Authorization: Bearer {api_key}' \
    ```
    </CodeGroup>
@@ -646,14 +646,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
    </Properties>
  </Col>
  <Col sticky>
-    <CodeGroup 
-      title="Request" 
-      tag="GET" 
+    <CodeGroup
+      title="Request"
+      tag="GET"
      label="/datasets/{dataset_id}/documents"
      targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents' \\\n--header 'Authorization: Bearer {api_key}'`}
    >
    ```bash {{ title: 'cURL' }}
-    curl --location --request GET 'https://api.dify.ai/v1/datasets/{dataset_id}/documents' \
+    curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents' \
    --header 'Authorization: Bearer {api_key}' \
    ```
    </CodeGroup>
@@ -720,14 +720,14 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from
    </Properties>
  </Col>
  <Col sticky>
-    <CodeGroup 
-      title="Request" 
-      tag="POST" 
+    <CodeGroup
+      title="Request"
+      tag="POST"
      label="/datasets/{dataset_id}/documents/{document_id}/segments"
      targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{"segments": [{"content": "1","answer": "1","keywords": ["a"]}]}'`}
    >
    ```bash {{ title: 'cURL' }}
-    curl --location --request POST 'https://api.dify.ai/v1/datasets/{dataset_id}/documents/{document_id}/segments' \
+    curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments' \
    --header 'Authorization: Bearer {api_key}' \
    --header 'Content-Type: application/json' \
    --data-raw '{
@@ -778,6 +778,213 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from

 ---

+<Heading
+  url='/datasets/{dataset_id}/documents/{document_id}/segments'
+  method='GET'
+  title='查询文档分段'
+  name='#get_segment'
+/>
+<Row>
+  <Col>
+    ### Path
+    <Properties>
+      <Property name='dataset_id' type='string' key='dataset_id'>
+        数据集 ID
+      </Property>
+      <Property name='document_id' type='string' key='document_id'>
+        文档 ID
+      </Property>
+    </Properties>
+
+     ### Query
+    <Properties>
+      <Property name='keyword' type='string' key='keyword'>
+        搜索关键词，可选
+      </Property>
+      <Property name='status' type='string' key='status'>
+        搜索状态，completed
+      </Property>
+    </Properties>
+  </Col>
+  <Col sticky>
+    <CodeGroup
+      title="Request"
+      tag="GET"
+      label="/datasets/{dataset_id}/documents/{document_id}/segments"
+      targetCode={`curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'`}
+    >
+    ```bash {{ title: 'cURL' }}
+    curl --location --request GET '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments' \
+    --header 'Authorization: Bearer {api_key}' \
+    --header 'Content-Type: application/json'
+    ```
+    </CodeGroup>
+    <CodeGroup title="Response">
+    ```json {{ title: 'Response' }}
+    {
+      "data": [{
+        "id": "",
+        "position": 1,
+        "document_id": "",
+        "content": "1",
+        "answer": "1",
+        "word_count": 25,
+        "tokens": 0,
+        "keywords": [
+            "a"
+        ],
+        "index_node_id": "",
+        "index_node_hash": "",
+        "hit_count": 0,
+        "enabled": true,
+        "disabled_at": null,
+        "disabled_by": null,
+        "status": "completed",
+        "created_by": "",
+        "created_at": 1695312007,
+        "indexing_at": 1695312007,
+        "completed_at": 1695312007,
+        "error": null,
+        "stopped_at": null
+      }],
+      "doc_form": "text_model"
+    }
+    ```
+    </CodeGroup>
+  </Col>
+</Row>
+
+---
+
+<Heading
+  url='/datasets/{dataset_id}/segments/{segment_id}'
+  method='DELETE'
+  title='删除文档分段'
+  name='#delete_segment'
+/>
+<Row>
+  <Col>
+    ### Path
+    <Properties>
+      <Property name='dataset_id' type='string' key='dataset_id'>
+        数据集 ID
+      </Property>
+      <Property name='segment_id' type='string' key='segment_id'>
+        文档分段ID
+      </Property>
+    </Properties>
+  </Col>
+  <Col sticky>
+    <CodeGroup
+      title="Request"
+      tag="DELETE"
+      label="/datasets/{dataset_id}/segments/{segment_id}"
+      targetCode={`curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'`}
+    >
+    ```bash {{ title: 'cURL' }}
+    curl --location --request DELETE '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \
+    --header 'Authorization: Bearer {api_key}' \
+    --header 'Content-Type: application/json'
+    ```
+    </CodeGroup>
+    <CodeGroup title="Response">
+    ```json {{ title: 'Response' }}
+    {
+      "result": "success"
+    }
+    ```
+    </CodeGroup>
+  </Col>
+</Row>
+
+---
+
+<Heading
+  url='/datasets/{dataset_id}/segments/{segment_id}'
+  method='POST'
+  title='更新文档分段'
+  name='#update_segment'
+/>
+<Row>
+  <Col>
+    ### POST
+    <Properties>
+      <Property name='dataset_id' type='string' key='dataset_id'>
+        数据集 ID
+      </Property>
+      <Property name='segment_id' type='string' key='segment_id'>
+        文档分段ID
+      </Property>
+    </Properties>
+
+    ### Request Body
+    <Properties>
+      <Property name='segments' type='object list' key='segments'>
+        - <code>content</code> (text) 文本内容/问题内容，必填
+        - <code>answer</code> (text) 答案内容，非必填，如果数据集的模式为qa模式则传值
+        - <code>keywords</code> (list) 关键字，非必填
+        - <code>enabled</code> (bool) false/true，非必填
+      </Property>
+    </Properties>
+  </Col>
+  <Col sticky>
+    <CodeGroup
+      title="Request"
+      tag="POST"
+      label="/datasets/{dataset_id}/segments/{segment_id}"
+      targetCode={`curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json'\\\n--data-raw '{\"segments\": {\"content\": \"1\",\"answer\": \"1\", \"keywords\": [\"a\"], \"enabled\": false}}'`}
+    >
+    ```bash {{ title: 'cURL' }}
+    curl --location --request POST '${props.apiBaseUrl}/datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}' \
+    --header 'Authorization: Bearer {api_key}' \
+    --header 'Content-Type: application/json' \
+    --data-raw '{
+      "segments": {
+          "content": "1",
+          "answer": "1",
+          "keywords": ["a"],
+          "enabled": false
+      }
+    }'
+    ```
+    </CodeGroup>
+    <CodeGroup title="Response">
+    ```json {{ title: 'Response' }}
+    {
+      "data": [{
+        "id": "",
+        "position": 1,
+        "document_id": "",
+        "content": "1",
+        "answer": "1",
+        "word_count": 25,
+        "tokens": 0,
+        "keywords": [
+            "a"
+        ],
+        "index_node_id": "",
+        "index_node_hash": "",
+        "hit_count": 0,
+        "enabled": true,
+        "disabled_at": null,
+        "disabled_by": null,
+        "status": "completed",
+        "created_by": "",
+        "created_at": 1695312007,
+        "indexing_at": 1695312007,
+        "completed_at": 1695312007,
+        "error": null,
+        "stopped_at": null
+      }],
+      "doc_form": "text_model"
+    }
+    ```
+    </CodeGroup>
+  </Col>
+</Row>
+
+---
+
 <Row>
  <Col>
    ### 错误信息
--- a/web/package.json
+++ b/web/package.json
@@ -1,6 +1,6 @@
 {
  "name": "dify-web",
-  "version": "0.3.25",
+  "version": "0.3.26",
  "private": true,
  "scripts": {
    "dev": "next dev",
Author	SHA1	Message	Date
Jyong	cc63c8499f	bump version to 0.3.26 (#1307 ) Co-authored-by: jyong <jyong@dify.ai>	2023-10-11 16:11:24 +08:00
Jyong	f191b8b8d1	milvus docker compose env (#1306 ) Co-authored-by: jyong <jyong@dify.ai>	2023-10-11 16:05:37 +08:00
Jyong	5003db987d	milvus secure check fix (#1305 ) Co-authored-by: jyong <jyong@dify.ai>	2023-10-11 13:11:06 +08:00
Jyong	07aab5e868	Feat/add milvus vector db (#1302 ) Co-authored-by: jyong <jyong@dify.ai>	2023-10-10 21:56:24 +08:00
takatost	875dfbbf0e	fix: openllm completion start with prompt, remove it (#1303 )	2023-10-10 04:44:19 -05:00
Charlie.Wei	9e7efa45d4	document segmentApi Add get&update&delete operate (#1285 ) Co-authored-by: luowei <glpat-EjySCyNjWiLqAED-YmwM>	2023-10-10 13:27:06 +08:00