mirror of
https://github.com/langgenius/dify.git
synced 2026-03-14 19:56:59 +00:00
Compare commits
10 Commits
deploy/age
...
pinecone
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
594906c1ff | ||
|
|
80f8245f2e | ||
|
|
a12b437c16 | ||
|
|
12de554313 | ||
|
|
1f36c0c1c5 | ||
|
|
8b9297563c | ||
|
|
1cbe9eedb6 | ||
|
|
90fc5a1f12 | ||
|
|
41dfdf1ac0 | ||
|
|
dd7de74aa6 |
@@ -156,7 +156,7 @@ WEB_API_CORS_ALLOW_ORIGINS=http://localhost:3000,*
|
|||||||
CONSOLE_CORS_ALLOW_ORIGINS=http://localhost:3000,*
|
CONSOLE_CORS_ALLOW_ORIGINS=http://localhost:3000,*
|
||||||
|
|
||||||
# Vector database configuration
|
# Vector database configuration
|
||||||
# Supported values are `weaviate`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `pgvecto-rs`, `chroma`, `opensearch`, `oracle`, `tencent`, `elasticsearch`, `elasticsearch-ja`, `analyticdb`, `couchbase`, `vikingdb`, `oceanbase`, `opengauss`, `tablestore`,`vastbase`,`tidb`,`tidb_on_qdrant`,`baidu`,`lindorm`,`huawei_cloud`,`upstash`, `matrixone`.
|
# Supported values are `weaviate`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `pgvecto-rs`, `chroma`, `opensearch`, `oracle`, `tencent`, `elasticsearch`, `elasticsearch-ja`, `analyticdb`, `couchbase`, `vikingdb`, `oceanbase`, `opengauss`, `tablestore`,`vastbase`,`tidb`,`tidb_on_qdrant`,`baidu`,`lindorm`,`huawei_cloud`,`upstash`, `matrixone`, `pinecone`.
|
||||||
VECTOR_STORE=weaviate
|
VECTOR_STORE=weaviate
|
||||||
# Prefix used to create collection name in vector database
|
# Prefix used to create collection name in vector database
|
||||||
VECTOR_INDEX_NAME_PREFIX=Vector_index
|
VECTOR_INDEX_NAME_PREFIX=Vector_index
|
||||||
@@ -361,6 +361,17 @@ PROMPT_GENERATION_MAX_TOKENS=512
|
|||||||
CODE_GENERATION_MAX_TOKENS=1024
|
CODE_GENERATION_MAX_TOKENS=1024
|
||||||
PLUGIN_BASED_TOKEN_COUNTING_ENABLED=false
|
PLUGIN_BASED_TOKEN_COUNTING_ENABLED=false
|
||||||
|
|
||||||
|
|
||||||
|
# Pinecone configuration, only available when VECTOR_STORE is `pinecone`
|
||||||
|
PINECONE_API_KEY=your-pinecone-api-key
|
||||||
|
PINECONE_ENVIRONMENT=your-pinecone-environment
|
||||||
|
PINECONE_INDEX_NAME=dify-index
|
||||||
|
PINECONE_CLIENT_TIMEOUT=30
|
||||||
|
PINECONE_BATCH_SIZE=100
|
||||||
|
PINECONE_METRIC=cosine
|
||||||
|
PINECONE_PODS=1
|
||||||
|
PINECONE_POD_TYPE=s1
|
||||||
|
|
||||||
# Mail configuration, support: resend, smtp, sendgrid
|
# Mail configuration, support: resend, smtp, sendgrid
|
||||||
MAIL_TYPE=
|
MAIL_TYPE=
|
||||||
# If using SendGrid, use the 'from' field for authentication if necessary.
|
# If using SendGrid, use the 'from' field for authentication if necessary.
|
||||||
|
|||||||
@@ -35,6 +35,7 @@ from .vdb.opensearch_config import OpenSearchConfig
|
|||||||
from .vdb.oracle_config import OracleConfig
|
from .vdb.oracle_config import OracleConfig
|
||||||
from .vdb.pgvector_config import PGVectorConfig
|
from .vdb.pgvector_config import PGVectorConfig
|
||||||
from .vdb.pgvectors_config import PGVectoRSConfig
|
from .vdb.pgvectors_config import PGVectoRSConfig
|
||||||
|
from .vdb.pinecone_config import PineconeConfig
|
||||||
from .vdb.qdrant_config import QdrantConfig
|
from .vdb.qdrant_config import QdrantConfig
|
||||||
from .vdb.relyt_config import RelytConfig
|
from .vdb.relyt_config import RelytConfig
|
||||||
from .vdb.tablestore_config import TableStoreConfig
|
from .vdb.tablestore_config import TableStoreConfig
|
||||||
@@ -331,6 +332,7 @@ class MiddlewareConfig(
|
|||||||
PGVectorConfig,
|
PGVectorConfig,
|
||||||
VastbaseVectorConfig,
|
VastbaseVectorConfig,
|
||||||
PGVectoRSConfig,
|
PGVectoRSConfig,
|
||||||
|
PineconeConfig,
|
||||||
QdrantConfig,
|
QdrantConfig,
|
||||||
RelytConfig,
|
RelytConfig,
|
||||||
TencentVectorDBConfig,
|
TencentVectorDBConfig,
|
||||||
|
|||||||
41
api/configs/middleware/vdb/pinecone_config.py
Normal file
41
api/configs/middleware/vdb/pinecone_config.py
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from pydantic import Field, PositiveInt
|
||||||
|
from pydantic_settings import BaseSettings
|
||||||
|
|
||||||
|
|
||||||
|
class PineconeConfig(BaseSettings):
|
||||||
|
"""
|
||||||
|
Configuration settings for Pinecone vector database
|
||||||
|
"""
|
||||||
|
|
||||||
|
PINECONE_API_KEY: Optional[str] = Field(
|
||||||
|
description="API key for authenticating with Pinecone service",
|
||||||
|
default=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
PINECONE_ENVIRONMENT: Optional[str] = Field(
|
||||||
|
description="Pinecone environment (e.g., 'us-west1-gcp', 'us-east-1-aws')",
|
||||||
|
default=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
PINECONE_INDEX_NAME: Optional[str] = Field(
|
||||||
|
description="Default Pinecone index name",
|
||||||
|
default=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
PINECONE_CLIENT_TIMEOUT: PositiveInt = Field(
|
||||||
|
description="Timeout in seconds for Pinecone client operations (default is 30 seconds)",
|
||||||
|
default=30,
|
||||||
|
)
|
||||||
|
|
||||||
|
PINECONE_BATCH_SIZE: PositiveInt = Field(
|
||||||
|
description="Batch size for Pinecone operations (default is 100)",
|
||||||
|
default=100,
|
||||||
|
)
|
||||||
|
|
||||||
|
PINECONE_METRIC: str = Field(
|
||||||
|
description="Distance metric for Pinecone index (cosine, euclidean, dotproduct)",
|
||||||
|
default="cosine",
|
||||||
|
)
|
||||||
|
|
||||||
@@ -660,6 +660,7 @@ class DatasetRetrievalSettingApi(Resource):
|
|||||||
| VectorType.BAIDU
|
| VectorType.BAIDU
|
||||||
| VectorType.VIKINGDB
|
| VectorType.VIKINGDB
|
||||||
| VectorType.UPSTASH
|
| VectorType.UPSTASH
|
||||||
|
| VectorType.PINECONE
|
||||||
):
|
):
|
||||||
return {"retrieval_method": [RetrievalMethod.SEMANTIC_SEARCH.value]}
|
return {"retrieval_method": [RetrievalMethod.SEMANTIC_SEARCH.value]}
|
||||||
case (
|
case (
|
||||||
@@ -711,6 +712,7 @@ class DatasetRetrievalSettingMockApi(Resource):
|
|||||||
| VectorType.BAIDU
|
| VectorType.BAIDU
|
||||||
| VectorType.VIKINGDB
|
| VectorType.VIKINGDB
|
||||||
| VectorType.UPSTASH
|
| VectorType.UPSTASH
|
||||||
|
| VectorType.PINECONE
|
||||||
):
|
):
|
||||||
return {"retrieval_method": [RetrievalMethod.SEMANTIC_SEARCH.value]}
|
return {"retrieval_method": [RetrievalMethod.SEMANTIC_SEARCH.value]}
|
||||||
case (
|
case (
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ default_retrieval_model = {
|
|||||||
"search_method": RetrievalMethod.SEMANTIC_SEARCH.value,
|
"search_method": RetrievalMethod.SEMANTIC_SEARCH.value,
|
||||||
"reranking_enable": False,
|
"reranking_enable": False,
|
||||||
"reranking_model": {"reranking_provider_name": "", "reranking_model_name": ""},
|
"reranking_model": {"reranking_provider_name": "", "reranking_model_name": ""},
|
||||||
"top_k": 2,
|
"top_k": 4,
|
||||||
"score_threshold_enabled": False,
|
"score_threshold_enabled": False,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -256,7 +256,7 @@ class AnalyticdbVectorOpenAPI:
|
|||||||
response = self._client.query_collection_data(request)
|
response = self._client.query_collection_data(request)
|
||||||
documents = []
|
documents = []
|
||||||
for match in response.body.matches.match:
|
for match in response.body.matches.match:
|
||||||
if match.score > score_threshold:
|
if match.score >= score_threshold:
|
||||||
metadata = json.loads(match.metadata.get("metadata_"))
|
metadata = json.loads(match.metadata.get("metadata_"))
|
||||||
metadata["score"] = match.score
|
metadata["score"] = match.score
|
||||||
doc = Document(
|
doc = Document(
|
||||||
@@ -293,7 +293,7 @@ class AnalyticdbVectorOpenAPI:
|
|||||||
response = self._client.query_collection_data(request)
|
response = self._client.query_collection_data(request)
|
||||||
documents = []
|
documents = []
|
||||||
for match in response.body.matches.match:
|
for match in response.body.matches.match:
|
||||||
if match.score > score_threshold:
|
if match.score >= score_threshold:
|
||||||
metadata = json.loads(match.metadata.get("metadata_"))
|
metadata = json.loads(match.metadata.get("metadata_"))
|
||||||
metadata["score"] = match.score
|
metadata["score"] = match.score
|
||||||
doc = Document(
|
doc = Document(
|
||||||
|
|||||||
@@ -229,7 +229,7 @@ class AnalyticdbVectorBySql:
|
|||||||
documents = []
|
documents = []
|
||||||
for record in cur:
|
for record in cur:
|
||||||
id, vector, score, page_content, metadata = record
|
id, vector, score, page_content, metadata = record
|
||||||
if score > score_threshold:
|
if score >= score_threshold:
|
||||||
metadata["score"] = score
|
metadata["score"] = score
|
||||||
doc = Document(
|
doc = Document(
|
||||||
page_content=page_content,
|
page_content=page_content,
|
||||||
|
|||||||
@@ -157,7 +157,7 @@ class BaiduVector(BaseVector):
|
|||||||
if meta is not None:
|
if meta is not None:
|
||||||
meta = json.loads(meta)
|
meta = json.loads(meta)
|
||||||
score = row.get("score", 0.0)
|
score = row.get("score", 0.0)
|
||||||
if score > score_threshold:
|
if score >= score_threshold:
|
||||||
meta["score"] = score
|
meta["score"] = score
|
||||||
doc = Document(page_content=row_data.get(self.field_text), metadata=meta)
|
doc = Document(page_content=row_data.get(self.field_text), metadata=meta)
|
||||||
docs.append(doc)
|
docs.append(doc)
|
||||||
|
|||||||
@@ -120,7 +120,7 @@ class ChromaVector(BaseVector):
|
|||||||
distance = distances[index]
|
distance = distances[index]
|
||||||
metadata = dict(metadatas[index])
|
metadata = dict(metadatas[index])
|
||||||
score = 1 - distance
|
score = 1 - distance
|
||||||
if score > score_threshold:
|
if score >= score_threshold:
|
||||||
metadata["score"] = score
|
metadata["score"] = score
|
||||||
doc = Document(
|
doc = Document(
|
||||||
page_content=documents[index],
|
page_content=documents[index],
|
||||||
|
|||||||
@@ -304,7 +304,7 @@ class CouchbaseVector(BaseVector):
|
|||||||
return docs
|
return docs
|
||||||
|
|
||||||
def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]:
|
def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]:
|
||||||
top_k = kwargs.get("top_k", 2)
|
top_k = kwargs.get("top_k", 4)
|
||||||
try:
|
try:
|
||||||
CBrequest = search.SearchRequest.create(search.QueryStringQuery("text:" + query))
|
CBrequest = search.SearchRequest.create(search.QueryStringQuery("text:" + query))
|
||||||
search_iter = self._scope.search(
|
search_iter = self._scope.search(
|
||||||
|
|||||||
@@ -216,7 +216,7 @@ class ElasticSearchVector(BaseVector):
|
|||||||
docs = []
|
docs = []
|
||||||
for doc, score in docs_and_scores:
|
for doc, score in docs_and_scores:
|
||||||
score_threshold = float(kwargs.get("score_threshold") or 0.0)
|
score_threshold = float(kwargs.get("score_threshold") or 0.0)
|
||||||
if score > score_threshold:
|
if score >= score_threshold:
|
||||||
if doc.metadata is not None:
|
if doc.metadata is not None:
|
||||||
doc.metadata["score"] = score
|
doc.metadata["score"] = score
|
||||||
docs.append(doc)
|
docs.append(doc)
|
||||||
|
|||||||
@@ -127,7 +127,7 @@ class HuaweiCloudVector(BaseVector):
|
|||||||
docs = []
|
docs = []
|
||||||
for doc, score in docs_and_scores:
|
for doc, score in docs_and_scores:
|
||||||
score_threshold = float(kwargs.get("score_threshold") or 0.0)
|
score_threshold = float(kwargs.get("score_threshold") or 0.0)
|
||||||
if score > score_threshold:
|
if score >= score_threshold:
|
||||||
if doc.metadata is not None:
|
if doc.metadata is not None:
|
||||||
doc.metadata["score"] = score
|
doc.metadata["score"] = score
|
||||||
docs.append(doc)
|
docs.append(doc)
|
||||||
|
|||||||
@@ -275,7 +275,7 @@ class LindormVectorStore(BaseVector):
|
|||||||
docs = []
|
docs = []
|
||||||
for doc, score in docs_and_scores:
|
for doc, score in docs_and_scores:
|
||||||
score_threshold = kwargs.get("score_threshold", 0.0) or 0.0
|
score_threshold = kwargs.get("score_threshold", 0.0) or 0.0
|
||||||
if score > score_threshold:
|
if score >= score_threshold:
|
||||||
if doc.metadata is not None:
|
if doc.metadata is not None:
|
||||||
doc.metadata["score"] = score
|
doc.metadata["score"] = score
|
||||||
docs.append(doc)
|
docs.append(doc)
|
||||||
|
|||||||
@@ -194,7 +194,7 @@ class OpenGauss(BaseVector):
|
|||||||
metadata, text, distance = record
|
metadata, text, distance = record
|
||||||
score = 1 - distance
|
score = 1 - distance
|
||||||
metadata["score"] = score
|
metadata["score"] = score
|
||||||
if score > score_threshold:
|
if score >= score_threshold:
|
||||||
docs.append(Document(page_content=text, metadata=metadata))
|
docs.append(Document(page_content=text, metadata=metadata))
|
||||||
return docs
|
return docs
|
||||||
|
|
||||||
|
|||||||
@@ -211,7 +211,7 @@ class OpenSearchVector(BaseVector):
|
|||||||
|
|
||||||
metadata["score"] = hit["_score"]
|
metadata["score"] = hit["_score"]
|
||||||
score_threshold = float(kwargs.get("score_threshold") or 0.0)
|
score_threshold = float(kwargs.get("score_threshold") or 0.0)
|
||||||
if hit["_score"] > score_threshold:
|
if hit["_score"] >= score_threshold:
|
||||||
doc = Document(page_content=hit["_source"].get(Field.CONTENT_KEY.value), metadata=metadata)
|
doc = Document(page_content=hit["_source"].get(Field.CONTENT_KEY.value), metadata=metadata)
|
||||||
docs.append(doc)
|
docs.append(doc)
|
||||||
|
|
||||||
|
|||||||
@@ -261,7 +261,7 @@ class OracleVector(BaseVector):
|
|||||||
metadata, text, distance = record
|
metadata, text, distance = record
|
||||||
score = 1 - distance
|
score = 1 - distance
|
||||||
metadata["score"] = score
|
metadata["score"] = score
|
||||||
if score > score_threshold:
|
if score >= score_threshold:
|
||||||
docs.append(Document(page_content=text, metadata=metadata))
|
docs.append(Document(page_content=text, metadata=metadata))
|
||||||
conn.close()
|
conn.close()
|
||||||
return docs
|
return docs
|
||||||
|
|||||||
@@ -202,7 +202,7 @@ class PGVectoRS(BaseVector):
|
|||||||
score = 1 - dis
|
score = 1 - dis
|
||||||
metadata["score"] = score
|
metadata["score"] = score
|
||||||
score_threshold = float(kwargs.get("score_threshold") or 0.0)
|
score_threshold = float(kwargs.get("score_threshold") or 0.0)
|
||||||
if score > score_threshold:
|
if score >= score_threshold:
|
||||||
doc = Document(page_content=record.text, metadata=metadata)
|
doc = Document(page_content=record.text, metadata=metadata)
|
||||||
docs.append(doc)
|
docs.append(doc)
|
||||||
return docs
|
return docs
|
||||||
|
|||||||
@@ -195,7 +195,7 @@ class PGVector(BaseVector):
|
|||||||
metadata, text, distance = record
|
metadata, text, distance = record
|
||||||
score = 1 - distance
|
score = 1 - distance
|
||||||
metadata["score"] = score
|
metadata["score"] = score
|
||||||
if score > score_threshold:
|
if score >= score_threshold:
|
||||||
docs.append(Document(page_content=text, metadata=metadata))
|
docs.append(Document(page_content=text, metadata=metadata))
|
||||||
return docs
|
return docs
|
||||||
|
|
||||||
|
|||||||
0
api/core/rag/datasource/vdb/pinecone/__init__.py
Normal file
0
api/core/rag/datasource/vdb/pinecone/__init__.py
Normal file
341
api/core/rag/datasource/vdb/pinecone/pinecone_vector.py
Normal file
341
api/core/rag/datasource/vdb/pinecone/pinecone_vector.py
Normal file
@@ -0,0 +1,341 @@
|
|||||||
|
import json
|
||||||
|
import time
|
||||||
|
from typing import Any, Optional
|
||||||
|
|
||||||
|
from pinecone import Pinecone, ServerlessSpec
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
from configs import dify_config
|
||||||
|
from core.rag.datasource.vdb.field import Field
|
||||||
|
from core.rag.datasource.vdb.vector_base import BaseVector
|
||||||
|
from core.rag.datasource.vdb.vector_factory import AbstractVectorFactory
|
||||||
|
from core.rag.datasource.vdb.vector_type import VectorType
|
||||||
|
from core.rag.embedding.embedding_base import Embeddings
|
||||||
|
from core.rag.models.document import Document
|
||||||
|
from extensions.ext_database import db
|
||||||
|
from extensions.ext_redis import redis_client
|
||||||
|
from models.dataset import Dataset, DatasetCollectionBinding
|
||||||
|
|
||||||
|
|
||||||
|
class PineconeConfig(BaseModel):
|
||||||
|
"""Pinecone configuration class"""
|
||||||
|
api_key: str
|
||||||
|
environment: str
|
||||||
|
index_name: Optional[str] = None
|
||||||
|
timeout: float = 30
|
||||||
|
batch_size: int = 100
|
||||||
|
metric: str = "cosine"
|
||||||
|
|
||||||
|
|
||||||
|
class PineconeVector(BaseVector):
|
||||||
|
"""Pinecone vector database concrete implementation class"""
|
||||||
|
|
||||||
|
def __init__(self, collection_name: str, group_id: str, config: PineconeConfig):
|
||||||
|
super().__init__(collection_name)
|
||||||
|
self._client_config = config
|
||||||
|
self._group_id = group_id
|
||||||
|
|
||||||
|
# Initialize Pinecone client with SSL configuration
|
||||||
|
try:
|
||||||
|
self._pc = Pinecone(
|
||||||
|
api_key=config.api_key,
|
||||||
|
# Configure SSL to handle connection issues
|
||||||
|
ssl_ca_certs=None, # Use system default CA certificates
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
# Fallback to basic initialization if SSL config fails
|
||||||
|
self._pc = Pinecone(api_key=config.api_key)
|
||||||
|
|
||||||
|
# Normalize index name: lowercase, only a-z0-9- and <=45 chars
|
||||||
|
import re, hashlib
|
||||||
|
base_name = collection_name.lower()
|
||||||
|
base_name = re.sub(r'[^a-z0-9-]+', '-', base_name) # replace invalid chars with '-'
|
||||||
|
base_name = re.sub(r'-+', '-', base_name).strip('-')
|
||||||
|
# Use longer secure suffix to reduce collision risk
|
||||||
|
suffix_len = 24 # 24 hex digits (96-bit entropy)
|
||||||
|
if len(base_name) > 45:
|
||||||
|
hash_suffix = hashlib.sha256(base_name.encode()).hexdigest()[:suffix_len]
|
||||||
|
truncated_name = base_name[:45-(suffix_len+1)].rstrip('-')
|
||||||
|
self._index_name = f"{truncated_name}-{hash_suffix}"
|
||||||
|
else:
|
||||||
|
self._index_name = base_name
|
||||||
|
# Guard empty name
|
||||||
|
if not self._index_name:
|
||||||
|
self._index_name = f"index-{hashlib.sha256(collection_name.encode()).hexdigest()[:suffix_len]}"
|
||||||
|
self._index = None
|
||||||
|
|
||||||
|
def get_type(self) -> str:
|
||||||
|
"""Return vector database type identifier"""
|
||||||
|
return "pinecone"
|
||||||
|
|
||||||
|
def _ensure_index_initialized(self) -> None:
|
||||||
|
"""Ensure that self._index is attached to an existing Pinecone index."""
|
||||||
|
if self._index is not None:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
existing_indexes = self._pc.list_indexes().names()
|
||||||
|
if self._index_name in existing_indexes:
|
||||||
|
self._index = self._pc.Index(self._index_name)
|
||||||
|
else:
|
||||||
|
raise ValueError("Index not initialized. Please ingest documents to create index.")
|
||||||
|
except Exception:
|
||||||
|
raise
|
||||||
|
|
||||||
|
def to_index_struct(self) -> dict:
|
||||||
|
"""Generate index structure dictionary"""
|
||||||
|
return {
|
||||||
|
"type": self.get_type(),
|
||||||
|
"vector_store": {"class_prefix": self._collection_name}
|
||||||
|
}
|
||||||
|
|
||||||
|
def create(self, texts: list[Document], embeddings: list[list[float]], **kwargs):
|
||||||
|
"""Create vector index"""
|
||||||
|
if texts:
|
||||||
|
# Get vector dimension
|
||||||
|
vector_size = len(embeddings[0])
|
||||||
|
|
||||||
|
# Create Pinecone index
|
||||||
|
self.create_index(vector_size)
|
||||||
|
|
||||||
|
# Add vector data
|
||||||
|
self.add_texts(texts, embeddings, **kwargs)
|
||||||
|
|
||||||
|
def create_index(self, dimension: int):
|
||||||
|
"""Create Pinecone index"""
|
||||||
|
lock_name = f"vector_indexing_lock_{self._index_name}"
|
||||||
|
|
||||||
|
with redis_client.lock(lock_name, timeout=30):
|
||||||
|
# Check Redis cache
|
||||||
|
index_exist_cache_key = f"vector_indexing_{self._index_name}"
|
||||||
|
if redis_client.get(index_exist_cache_key):
|
||||||
|
self._index = self._pc.Index(self._index_name)
|
||||||
|
return
|
||||||
|
|
||||||
|
# Check if index already exists
|
||||||
|
existing_indexes = self._pc.list_indexes().names()
|
||||||
|
|
||||||
|
if self._index_name not in existing_indexes:
|
||||||
|
# Create new index using ServerlessSpec
|
||||||
|
self._pc.create_index(
|
||||||
|
name=self._index_name,
|
||||||
|
dimension=dimension,
|
||||||
|
metric=self._client_config.metric,
|
||||||
|
spec=ServerlessSpec(
|
||||||
|
cloud='aws',
|
||||||
|
region=self._client_config.environment
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Wait for index creation to complete
|
||||||
|
while not self._pc.describe_index(self._index_name).status['ready']:
|
||||||
|
time.sleep(1)
|
||||||
|
else:
|
||||||
|
# Get index instance
|
||||||
|
self._index = self._pc.Index(self._index_name)
|
||||||
|
|
||||||
|
# Set cache
|
||||||
|
redis_client.set(index_exist_cache_key, 1, ex=3600)
|
||||||
|
|
||||||
|
def add_texts(self, documents: list[Document], embeddings: list[list[float]], **kwargs):
|
||||||
|
"""Batch add document vectors"""
|
||||||
|
if not self._index:
|
||||||
|
raise ValueError("Index not initialized. Call create() first.")
|
||||||
|
|
||||||
|
total_docs = len(documents)
|
||||||
|
|
||||||
|
uuids = self._get_uuids(documents)
|
||||||
|
batch_size = self._client_config.batch_size
|
||||||
|
added_ids = []
|
||||||
|
|
||||||
|
# Batch processing
|
||||||
|
total_batches = (total_docs + batch_size - 1) // batch_size # Ceiling division
|
||||||
|
for batch_idx, i in enumerate(range(0, len(documents), batch_size), 1):
|
||||||
|
batch_documents = documents[i:i + batch_size]
|
||||||
|
batch_embeddings = embeddings[i:i + batch_size]
|
||||||
|
batch_uuids = uuids[i:i + batch_size]
|
||||||
|
batch_size_actual = len(batch_documents)
|
||||||
|
|
||||||
|
# Build Pinecone vector data (metadata must be primitives or list[str])
|
||||||
|
vectors_to_upsert = []
|
||||||
|
for doc, embedding, doc_id in zip(batch_documents, batch_embeddings, batch_uuids):
|
||||||
|
raw_meta = doc.metadata or {}
|
||||||
|
safe_meta: dict[str, Any] = {}
|
||||||
|
# lift common identifiers to top-level fields for filtering
|
||||||
|
for k, v in raw_meta.items():
|
||||||
|
if isinstance(v, (str, int, float, bool)):
|
||||||
|
safe_meta[k] = v
|
||||||
|
elif isinstance(v, list) and all(isinstance(x, str) for x in v):
|
||||||
|
safe_meta[k] = v
|
||||||
|
else:
|
||||||
|
safe_meta[k] = json.dumps(v, ensure_ascii=False)
|
||||||
|
|
||||||
|
# keep content as string metadata if needed
|
||||||
|
safe_meta[Field.CONTENT_KEY.value] = doc.page_content
|
||||||
|
# group id as string
|
||||||
|
safe_meta[Field.GROUP_KEY.value] = str(self._group_id)
|
||||||
|
|
||||||
|
vectors_to_upsert.append({
|
||||||
|
"id": doc_id,
|
||||||
|
"values": embedding,
|
||||||
|
"metadata": safe_meta
|
||||||
|
})
|
||||||
|
|
||||||
|
# Batch insert to Pinecone
|
||||||
|
try:
|
||||||
|
self._index.upsert(vectors=vectors_to_upsert)
|
||||||
|
added_ids.extend(batch_uuids)
|
||||||
|
except Exception as e:
|
||||||
|
raise
|
||||||
|
|
||||||
|
return added_ids
|
||||||
|
|
||||||
|
def search_by_vector(self, query_vector: list[float], **kwargs) -> list[Document]:
|
||||||
|
"""Vector similarity search"""
|
||||||
|
# Lazily attach to an existing index if needed
|
||||||
|
self._ensure_index_initialized()
|
||||||
|
|
||||||
|
top_k = kwargs.get("top_k", 4)
|
||||||
|
score_threshold = float(kwargs.get("score_threshold", 0.0))
|
||||||
|
|
||||||
|
# Build filter conditions
|
||||||
|
filter_dict = {Field.GROUP_KEY.value: {"$eq": str(self._group_id)}}
|
||||||
|
|
||||||
|
# Document scope filtering
|
||||||
|
document_ids_filter = kwargs.get("document_ids_filter")
|
||||||
|
if document_ids_filter:
|
||||||
|
filter_dict["document_id"] = {"$in": document_ids_filter}
|
||||||
|
|
||||||
|
# Execute search
|
||||||
|
try:
|
||||||
|
response = self._index.query(
|
||||||
|
vector=query_vector,
|
||||||
|
top_k=top_k,
|
||||||
|
include_metadata=True,
|
||||||
|
filter=filter_dict
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
raise
|
||||||
|
|
||||||
|
# Convert results
|
||||||
|
docs = []
|
||||||
|
filtered_count = 0
|
||||||
|
for match in response.matches:
|
||||||
|
if match.score >= score_threshold:
|
||||||
|
page_content = match.metadata.get(Field.CONTENT_KEY.value, "")
|
||||||
|
metadata = dict(match.metadata or {})
|
||||||
|
metadata.pop(Field.CONTENT_KEY.value, None)
|
||||||
|
metadata.pop(Field.GROUP_KEY.value, None)
|
||||||
|
metadata["score"] = match.score
|
||||||
|
|
||||||
|
doc = Document(page_content=page_content, metadata=metadata)
|
||||||
|
docs.append(doc)
|
||||||
|
else:
|
||||||
|
filtered_count += 1
|
||||||
|
|
||||||
|
# Sort by similarity score in descending order
|
||||||
|
docs.sort(key=lambda x: x.metadata.get("score", 0), reverse=True)
|
||||||
|
|
||||||
|
return docs
|
||||||
|
|
||||||
|
def search_by_full_text(self, query: str, **kwargs) -> list[Document]:
|
||||||
|
"""Full-text search - Pinecone does not natively support it, returns empty list"""
|
||||||
|
return []
|
||||||
|
|
||||||
|
def delete_by_metadata_field(self, key: str, value: str):
|
||||||
|
"""Delete by metadata field"""
|
||||||
|
self._ensure_index_initialized()
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Build filter conditions
|
||||||
|
filter_dict = {
|
||||||
|
Field.GROUP_KEY.value: {"$eq": self._group_id},
|
||||||
|
f"{Field.METADATA_KEY.value}.{key}": {"$eq": value}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Pinecone delete operation
|
||||||
|
self._index.delete(filter=filter_dict)
|
||||||
|
except Exception as e:
|
||||||
|
# Ignore delete errors
|
||||||
|
pass
|
||||||
|
|
||||||
|
def delete_by_ids(self, ids: list[str]) -> None:
|
||||||
|
"""Batch delete by ID list"""
|
||||||
|
self._ensure_index_initialized()
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Pinecone delete by ID
|
||||||
|
self._index.delete(ids=ids)
|
||||||
|
except Exception as e:
|
||||||
|
raise
|
||||||
|
|
||||||
|
def delete(self) -> None:
|
||||||
|
"""Delete all vector data for the entire dataset"""
|
||||||
|
self._ensure_index_initialized()
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Delete all vectors by group_id
|
||||||
|
filter_dict = {Field.GROUP_KEY.value: {"$eq": self._group_id}}
|
||||||
|
self._index.delete(filter=filter_dict)
|
||||||
|
except Exception as e:
|
||||||
|
raise
|
||||||
|
|
||||||
|
def text_exists(self, id: str) -> bool:
|
||||||
|
"""Check if document exists"""
|
||||||
|
try:
|
||||||
|
self._ensure_index_initialized()
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Check if vector exists through query
|
||||||
|
response = self._index.fetch(ids=[id])
|
||||||
|
exists = id in response.vectors
|
||||||
|
return exists
|
||||||
|
except Exception as e:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
class PineconeVectorFactory(AbstractVectorFactory):
|
||||||
|
"""Pinecone vector database factory class"""
|
||||||
|
|
||||||
|
def init_vector(self, dataset: Dataset, attributes: list, embeddings: Embeddings) -> PineconeVector:
|
||||||
|
"""Create PineconeVector instance"""
|
||||||
|
|
||||||
|
# Determine index name
|
||||||
|
if dataset.collection_binding_id:
|
||||||
|
dataset_collection_binding = (
|
||||||
|
db.session.query(DatasetCollectionBinding)
|
||||||
|
.where(DatasetCollectionBinding.id == dataset.collection_binding_id)
|
||||||
|
.one_or_none()
|
||||||
|
)
|
||||||
|
if dataset_collection_binding:
|
||||||
|
collection_name = dataset_collection_binding.collection_name
|
||||||
|
else:
|
||||||
|
raise ValueError("Dataset Collection Bindings does not exist!")
|
||||||
|
else:
|
||||||
|
if dataset.index_struct_dict:
|
||||||
|
class_prefix: str = dataset.index_struct_dict["vector_store"]["class_prefix"]
|
||||||
|
collection_name = class_prefix
|
||||||
|
else:
|
||||||
|
dataset_id = dataset.id
|
||||||
|
collection_name = Dataset.gen_collection_name_by_id(dataset_id)
|
||||||
|
|
||||||
|
# Set index structure
|
||||||
|
if not dataset.index_struct_dict:
|
||||||
|
dataset.index_struct = json.dumps(
|
||||||
|
self.gen_index_struct_dict("pinecone", collection_name)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create PineconeVector instance
|
||||||
|
return PineconeVector(
|
||||||
|
collection_name=collection_name,
|
||||||
|
group_id=dataset.id,
|
||||||
|
config=PineconeConfig(
|
||||||
|
api_key=dify_config.PINECONE_API_KEY or "",
|
||||||
|
environment=dify_config.PINECONE_ENVIRONMENT or "",
|
||||||
|
index_name=dify_config.PINECONE_INDEX_NAME,
|
||||||
|
timeout=dify_config.PINECONE_CLIENT_TIMEOUT,
|
||||||
|
batch_size=dify_config.PINECONE_BATCH_SIZE,
|
||||||
|
metric=dify_config.PINECONE_METRIC,
|
||||||
|
),
|
||||||
|
)
|
||||||
@@ -170,7 +170,7 @@ class VastbaseVector(BaseVector):
|
|||||||
metadata, text, distance = record
|
metadata, text, distance = record
|
||||||
score = 1 - distance
|
score = 1 - distance
|
||||||
metadata["score"] = score
|
metadata["score"] = score
|
||||||
if score > score_threshold:
|
if score >= score_threshold:
|
||||||
docs.append(Document(page_content=text, metadata=metadata))
|
docs.append(Document(page_content=text, metadata=metadata))
|
||||||
return docs
|
return docs
|
||||||
|
|
||||||
|
|||||||
@@ -369,7 +369,7 @@ class QdrantVector(BaseVector):
|
|||||||
continue
|
continue
|
||||||
metadata = result.payload.get(Field.METADATA_KEY.value) or {}
|
metadata = result.payload.get(Field.METADATA_KEY.value) or {}
|
||||||
# duplicate check score threshold
|
# duplicate check score threshold
|
||||||
if result.score > score_threshold:
|
if result.score >= score_threshold:
|
||||||
metadata["score"] = result.score
|
metadata["score"] = result.score
|
||||||
doc = Document(
|
doc = Document(
|
||||||
page_content=result.payload.get(Field.CONTENT_KEY.value, ""),
|
page_content=result.payload.get(Field.CONTENT_KEY.value, ""),
|
||||||
|
|||||||
@@ -233,7 +233,7 @@ class RelytVector(BaseVector):
|
|||||||
docs = []
|
docs = []
|
||||||
for document, score in results:
|
for document, score in results:
|
||||||
score_threshold = float(kwargs.get("score_threshold") or 0.0)
|
score_threshold = float(kwargs.get("score_threshold") or 0.0)
|
||||||
if 1 - score > score_threshold:
|
if 1 - score >= score_threshold:
|
||||||
docs.append(document)
|
docs.append(document)
|
||||||
return docs
|
return docs
|
||||||
|
|
||||||
|
|||||||
@@ -300,7 +300,7 @@ class TableStoreVector(BaseVector):
|
|||||||
)
|
)
|
||||||
documents = []
|
documents = []
|
||||||
for search_hit in search_response.search_hits:
|
for search_hit in search_response.search_hits:
|
||||||
if search_hit.score > score_threshold:
|
if search_hit.score >= score_threshold:
|
||||||
ots_column_map = {}
|
ots_column_map = {}
|
||||||
for col in search_hit.row[1]:
|
for col in search_hit.row[1]:
|
||||||
ots_column_map[col[0]] = col[1]
|
ots_column_map[col[0]] = col[1]
|
||||||
|
|||||||
@@ -291,7 +291,7 @@ class TencentVector(BaseVector):
|
|||||||
score = 1 - result.get("score", 0.0)
|
score = 1 - result.get("score", 0.0)
|
||||||
else:
|
else:
|
||||||
score = result.get("score", 0.0)
|
score = result.get("score", 0.0)
|
||||||
if score > score_threshold:
|
if score >= score_threshold:
|
||||||
meta["score"] = score
|
meta["score"] = score
|
||||||
doc = Document(page_content=result.get(self.field_text), metadata=meta)
|
doc = Document(page_content=result.get(self.field_text), metadata=meta)
|
||||||
docs.append(doc)
|
docs.append(doc)
|
||||||
|
|||||||
@@ -351,7 +351,7 @@ class TidbOnQdrantVector(BaseVector):
|
|||||||
metadata = result.payload.get(Field.METADATA_KEY.value) or {}
|
metadata = result.payload.get(Field.METADATA_KEY.value) or {}
|
||||||
# duplicate check score threshold
|
# duplicate check score threshold
|
||||||
score_threshold = kwargs.get("score_threshold") or 0.0
|
score_threshold = kwargs.get("score_threshold") or 0.0
|
||||||
if result.score > score_threshold:
|
if result.score >= score_threshold:
|
||||||
metadata["score"] = result.score
|
metadata["score"] = result.score
|
||||||
doc = Document(
|
doc = Document(
|
||||||
page_content=result.payload.get(Field.CONTENT_KEY.value, ""),
|
page_content=result.payload.get(Field.CONTENT_KEY.value, ""),
|
||||||
|
|||||||
@@ -110,7 +110,7 @@ class UpstashVector(BaseVector):
|
|||||||
score = record.score
|
score = record.score
|
||||||
if metadata is not None and text is not None:
|
if metadata is not None and text is not None:
|
||||||
metadata["score"] = score
|
metadata["score"] = score
|
||||||
if score > score_threshold:
|
if score >= score_threshold:
|
||||||
docs.append(Document(page_content=text, metadata=metadata))
|
docs.append(Document(page_content=text, metadata=metadata))
|
||||||
return docs
|
return docs
|
||||||
|
|
||||||
|
|||||||
@@ -86,6 +86,10 @@ class Vector:
|
|||||||
from core.rag.datasource.vdb.pgvecto_rs.pgvecto_rs import PGVectoRSFactory
|
from core.rag.datasource.vdb.pgvecto_rs.pgvecto_rs import PGVectoRSFactory
|
||||||
|
|
||||||
return PGVectoRSFactory
|
return PGVectoRSFactory
|
||||||
|
case VectorType.PINECONE:
|
||||||
|
from core.rag.datasource.vdb.pinecone.pinecone_vector import PineconeVectorFactory
|
||||||
|
|
||||||
|
return PineconeVectorFactory
|
||||||
case VectorType.QDRANT:
|
case VectorType.QDRANT:
|
||||||
from core.rag.datasource.vdb.qdrant.qdrant_vector import QdrantVectorFactory
|
from core.rag.datasource.vdb.qdrant.qdrant_vector import QdrantVectorFactory
|
||||||
|
|
||||||
|
|||||||
@@ -31,3 +31,4 @@ class VectorType(StrEnum):
|
|||||||
HUAWEI_CLOUD = "huawei_cloud"
|
HUAWEI_CLOUD = "huawei_cloud"
|
||||||
MATRIXONE = "matrixone"
|
MATRIXONE = "matrixone"
|
||||||
CLICKZETTA = "clickzetta"
|
CLICKZETTA = "clickzetta"
|
||||||
|
PINECONE = "pinecone"
|
||||||
|
|||||||
@@ -192,7 +192,7 @@ class VikingDBVector(BaseVector):
|
|||||||
metadata = result.fields.get(vdb_Field.METADATA_KEY.value)
|
metadata = result.fields.get(vdb_Field.METADATA_KEY.value)
|
||||||
if metadata is not None:
|
if metadata is not None:
|
||||||
metadata = json.loads(metadata)
|
metadata = json.loads(metadata)
|
||||||
if result.score > score_threshold:
|
if result.score >= score_threshold:
|
||||||
metadata["score"] = result.score
|
metadata["score"] = result.score
|
||||||
doc = Document(page_content=result.fields.get(vdb_Field.CONTENT_KEY.value), metadata=metadata)
|
doc = Document(page_content=result.fields.get(vdb_Field.CONTENT_KEY.value), metadata=metadata)
|
||||||
docs.append(doc)
|
docs.append(doc)
|
||||||
|
|||||||
@@ -220,7 +220,7 @@ class WeaviateVector(BaseVector):
|
|||||||
for doc, score in docs_and_scores:
|
for doc, score in docs_and_scores:
|
||||||
score_threshold = float(kwargs.get("score_threshold") or 0.0)
|
score_threshold = float(kwargs.get("score_threshold") or 0.0)
|
||||||
# check score threshold
|
# check score threshold
|
||||||
if score > score_threshold:
|
if score >= score_threshold:
|
||||||
if doc.metadata is not None:
|
if doc.metadata is not None:
|
||||||
doc.metadata["score"] = score
|
doc.metadata["score"] = score
|
||||||
docs.append(doc)
|
docs.append(doc)
|
||||||
|
|||||||
@@ -10,6 +10,23 @@ from core.rag.extractor.extractor_base import BaseExtractor
|
|||||||
from core.rag.models.document import Document
|
from core.rag.models.document import Document
|
||||||
|
|
||||||
|
|
||||||
|
def _format_cell_value(value) -> str:
|
||||||
|
if pd.isna(value):
|
||||||
|
return ""
|
||||||
|
|
||||||
|
if isinstance(value, (int, float)):
|
||||||
|
if isinstance(value, float):
|
||||||
|
if value.is_integer():
|
||||||
|
return str(int(value))
|
||||||
|
else:
|
||||||
|
formatted = f"{value:f}"
|
||||||
|
return formatted.rstrip('0').rstrip('.')
|
||||||
|
else:
|
||||||
|
return str(value)
|
||||||
|
|
||||||
|
return str(value)
|
||||||
|
|
||||||
|
|
||||||
class ExcelExtractor(BaseExtractor):
|
class ExcelExtractor(BaseExtractor):
|
||||||
"""Load Excel files.
|
"""Load Excel files.
|
||||||
|
|
||||||
@@ -49,10 +66,12 @@ class ExcelExtractor(BaseExtractor):
|
|||||||
row=cast(int, index) + 2, column=col_index + 1
|
row=cast(int, index) + 2, column=col_index + 1
|
||||||
) # +2 to account for header and 1-based index
|
) # +2 to account for header and 1-based index
|
||||||
if cell.hyperlink:
|
if cell.hyperlink:
|
||||||
value = f"[{v}]({cell.hyperlink.target})"
|
formatted_v = _format_cell_value(v)
|
||||||
|
value = f"[{formatted_v}]({cell.hyperlink.target})"
|
||||||
page_content.append(f'"{k}":"{value}"')
|
page_content.append(f'"{k}":"{value}"')
|
||||||
else:
|
else:
|
||||||
page_content.append(f'"{k}":"{v}"')
|
formatted_v = _format_cell_value(v)
|
||||||
|
page_content.append(f'"{k}":"{formatted_v}"')
|
||||||
documents.append(
|
documents.append(
|
||||||
Document(page_content=";".join(page_content), metadata={"source": self._file_path})
|
Document(page_content=";".join(page_content), metadata={"source": self._file_path})
|
||||||
)
|
)
|
||||||
@@ -67,7 +86,8 @@ class ExcelExtractor(BaseExtractor):
|
|||||||
page_content = []
|
page_content = []
|
||||||
for k, v in row.items():
|
for k, v in row.items():
|
||||||
if pd.notna(v):
|
if pd.notna(v):
|
||||||
page_content.append(f'"{k}":"{v}"')
|
formatted_v = _format_cell_value(v)
|
||||||
|
page_content.append(f'"{k}":"{formatted_v}"')
|
||||||
documents.append(
|
documents.append(
|
||||||
Document(page_content=";".join(page_content), metadata={"source": self._file_path})
|
Document(page_content=";".join(page_content), metadata={"source": self._file_path})
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -123,7 +123,7 @@ class ParagraphIndexProcessor(BaseIndexProcessor):
|
|||||||
for result in results:
|
for result in results:
|
||||||
metadata = result.metadata
|
metadata = result.metadata
|
||||||
metadata["score"] = result.score
|
metadata["score"] = result.score
|
||||||
if result.score > score_threshold:
|
if result.score >= score_threshold:
|
||||||
doc = Document(page_content=result.page_content, metadata=metadata)
|
doc = Document(page_content=result.page_content, metadata=metadata)
|
||||||
docs.append(doc)
|
docs.append(doc)
|
||||||
return docs
|
return docs
|
||||||
|
|||||||
@@ -162,7 +162,7 @@ class ParentChildIndexProcessor(BaseIndexProcessor):
|
|||||||
for result in results:
|
for result in results:
|
||||||
metadata = result.metadata
|
metadata = result.metadata
|
||||||
metadata["score"] = result.score
|
metadata["score"] = result.score
|
||||||
if result.score > score_threshold:
|
if result.score >= score_threshold:
|
||||||
doc = Document(page_content=result.page_content, metadata=metadata)
|
doc = Document(page_content=result.page_content, metadata=metadata)
|
||||||
docs.append(doc)
|
docs.append(doc)
|
||||||
return docs
|
return docs
|
||||||
|
|||||||
@@ -158,7 +158,7 @@ class QAIndexProcessor(BaseIndexProcessor):
|
|||||||
for result in results:
|
for result in results:
|
||||||
metadata = result.metadata
|
metadata = result.metadata
|
||||||
metadata["score"] = result.score
|
metadata["score"] = result.score
|
||||||
if result.score > score_threshold:
|
if result.score >= score_threshold:
|
||||||
doc = Document(page_content=result.page_content, metadata=metadata)
|
doc = Document(page_content=result.page_content, metadata=metadata)
|
||||||
docs.append(doc)
|
docs.append(doc)
|
||||||
return docs
|
return docs
|
||||||
|
|||||||
@@ -65,7 +65,7 @@ default_retrieval_model: dict[str, Any] = {
|
|||||||
"search_method": RetrievalMethod.SEMANTIC_SEARCH.value,
|
"search_method": RetrievalMethod.SEMANTIC_SEARCH.value,
|
||||||
"reranking_enable": False,
|
"reranking_enable": False,
|
||||||
"reranking_model": {"reranking_provider_name": "", "reranking_model_name": ""},
|
"reranking_model": {"reranking_provider_name": "", "reranking_model_name": ""},
|
||||||
"top_k": 2,
|
"top_k": 4,
|
||||||
"score_threshold_enabled": False,
|
"score_threshold_enabled": False,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -647,7 +647,7 @@ class DatasetRetrieval:
|
|||||||
retrieval_method=retrieval_model["search_method"],
|
retrieval_method=retrieval_model["search_method"],
|
||||||
dataset_id=dataset.id,
|
dataset_id=dataset.id,
|
||||||
query=query,
|
query=query,
|
||||||
top_k=retrieval_model.get("top_k") or 2,
|
top_k=retrieval_model.get("top_k") or 4,
|
||||||
score_threshold=retrieval_model.get("score_threshold", 0.0)
|
score_threshold=retrieval_model.get("score_threshold", 0.0)
|
||||||
if retrieval_model["score_threshold_enabled"]
|
if retrieval_model["score_threshold_enabled"]
|
||||||
else 0.0,
|
else 0.0,
|
||||||
@@ -743,7 +743,7 @@ class DatasetRetrieval:
|
|||||||
tool = DatasetMultiRetrieverTool.from_dataset(
|
tool = DatasetMultiRetrieverTool.from_dataset(
|
||||||
dataset_ids=[dataset.id for dataset in available_datasets],
|
dataset_ids=[dataset.id for dataset in available_datasets],
|
||||||
tenant_id=tenant_id,
|
tenant_id=tenant_id,
|
||||||
top_k=retrieve_config.top_k or 2,
|
top_k=retrieve_config.top_k or 4,
|
||||||
score_threshold=retrieve_config.score_threshold,
|
score_threshold=retrieve_config.score_threshold,
|
||||||
hit_callbacks=[hit_callback],
|
hit_callbacks=[hit_callback],
|
||||||
return_resource=return_resource,
|
return_resource=return_resource,
|
||||||
|
|||||||
@@ -181,7 +181,7 @@ class DatasetMultiRetrieverTool(DatasetRetrieverBaseTool):
|
|||||||
retrieval_method="keyword_search",
|
retrieval_method="keyword_search",
|
||||||
dataset_id=dataset.id,
|
dataset_id=dataset.id,
|
||||||
query=query,
|
query=query,
|
||||||
top_k=retrieval_model.get("top_k") or 2,
|
top_k=retrieval_model.get("top_k") or 4,
|
||||||
)
|
)
|
||||||
if documents:
|
if documents:
|
||||||
all_documents.extend(documents)
|
all_documents.extend(documents)
|
||||||
@@ -192,7 +192,7 @@ class DatasetMultiRetrieverTool(DatasetRetrieverBaseTool):
|
|||||||
retrieval_method=retrieval_model["search_method"],
|
retrieval_method=retrieval_model["search_method"],
|
||||||
dataset_id=dataset.id,
|
dataset_id=dataset.id,
|
||||||
query=query,
|
query=query,
|
||||||
top_k=retrieval_model.get("top_k") or 2,
|
top_k=retrieval_model.get("top_k") or 4,
|
||||||
score_threshold=retrieval_model.get("score_threshold", 0.0)
|
score_threshold=retrieval_model.get("score_threshold", 0.0)
|
||||||
if retrieval_model["score_threshold_enabled"]
|
if retrieval_model["score_threshold_enabled"]
|
||||||
else 0.0,
|
else 0.0,
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ class DatasetRetrieverBaseTool(BaseModel, ABC):
|
|||||||
name: str = "dataset"
|
name: str = "dataset"
|
||||||
description: str = "use this to retrieve a dataset. "
|
description: str = "use this to retrieve a dataset. "
|
||||||
tenant_id: str
|
tenant_id: str
|
||||||
top_k: int = 2
|
top_k: int = 4
|
||||||
score_threshold: Optional[float] = None
|
score_threshold: Optional[float] = None
|
||||||
hit_callbacks: list[DatasetIndexToolCallbackHandler] = []
|
hit_callbacks: list[DatasetIndexToolCallbackHandler] = []
|
||||||
return_resource: bool
|
return_resource: bool
|
||||||
|
|||||||
@@ -485,6 +485,24 @@ def _extract_text_from_csv(file_content: bytes) -> str:
|
|||||||
raise TextExtractionError(f"Failed to extract text from CSV: {str(e)}") from e
|
raise TextExtractionError(f"Failed to extract text from CSV: {str(e)}") from e
|
||||||
|
|
||||||
|
|
||||||
|
def _format_cell_value_for_markdown(value) -> str:
|
||||||
|
"""格式化单元格值,避免科学计数法"""
|
||||||
|
if pd.isna(value):
|
||||||
|
return ""
|
||||||
|
|
||||||
|
if isinstance(value, (int, float)):
|
||||||
|
if isinstance(value, float):
|
||||||
|
if value.is_integer():
|
||||||
|
return str(int(value))
|
||||||
|
else:
|
||||||
|
formatted = f"{value:f}"
|
||||||
|
return formatted.rstrip('0').rstrip('.')
|
||||||
|
else:
|
||||||
|
return str(value)
|
||||||
|
|
||||||
|
return str(value)
|
||||||
|
|
||||||
|
|
||||||
def _extract_text_from_excel(file_content: bytes) -> str:
|
def _extract_text_from_excel(file_content: bytes) -> str:
|
||||||
"""Extract text from an Excel file using pandas."""
|
"""Extract text from an Excel file using pandas."""
|
||||||
|
|
||||||
@@ -499,7 +517,8 @@ def _extract_text_from_excel(file_content: bytes) -> str:
|
|||||||
# Construct the data rows
|
# Construct the data rows
|
||||||
data_rows = []
|
data_rows = []
|
||||||
for _, row in df.iterrows():
|
for _, row in df.iterrows():
|
||||||
data_row = "| " + " | ".join(map(str, row)) + " |"
|
formatted_row = [_format_cell_value_for_markdown(cell) for cell in row]
|
||||||
|
data_row = "| " + " | ".join(formatted_row) + " |"
|
||||||
data_rows.append(data_row)
|
data_rows.append(data_row)
|
||||||
|
|
||||||
# Combine all rows into a single string
|
# Combine all rows into a single string
|
||||||
|
|||||||
@@ -78,7 +78,7 @@ default_retrieval_model = {
|
|||||||
"search_method": RetrievalMethod.SEMANTIC_SEARCH.value,
|
"search_method": RetrievalMethod.SEMANTIC_SEARCH.value,
|
||||||
"reranking_enable": False,
|
"reranking_enable": False,
|
||||||
"reranking_model": {"reranking_provider_name": "", "reranking_model_name": ""},
|
"reranking_model": {"reranking_provider_name": "", "reranking_model_name": ""},
|
||||||
"top_k": 2,
|
"top_k": 4,
|
||||||
"score_threshold_enabled": False,
|
"score_threshold_enabled": False,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -88,6 +88,7 @@ dependencies = [
|
|||||||
"httpx-sse>=0.4.0",
|
"httpx-sse>=0.4.0",
|
||||||
"sendgrid~=6.12.3",
|
"sendgrid~=6.12.3",
|
||||||
"flask-restx>=1.3.0",
|
"flask-restx>=1.3.0",
|
||||||
|
"pinecone>=7.3.0",
|
||||||
]
|
]
|
||||||
# Before adding new dependency, consider place it in
|
# Before adding new dependency, consider place it in
|
||||||
# alphabet order (a-z) and suitable group.
|
# alphabet order (a-z) and suitable group.
|
||||||
|
|||||||
@@ -1149,7 +1149,7 @@ class DocumentService:
|
|||||||
"search_method": RetrievalMethod.SEMANTIC_SEARCH.value,
|
"search_method": RetrievalMethod.SEMANTIC_SEARCH.value,
|
||||||
"reranking_enable": False,
|
"reranking_enable": False,
|
||||||
"reranking_model": {"reranking_provider_name": "", "reranking_model_name": ""},
|
"reranking_model": {"reranking_provider_name": "", "reranking_model_name": ""},
|
||||||
"top_k": 2,
|
"top_k": 4,
|
||||||
"score_threshold_enabled": False,
|
"score_threshold_enabled": False,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1612,7 +1612,7 @@ class DocumentService:
|
|||||||
search_method=RetrievalMethod.SEMANTIC_SEARCH.value,
|
search_method=RetrievalMethod.SEMANTIC_SEARCH.value,
|
||||||
reranking_enable=False,
|
reranking_enable=False,
|
||||||
reranking_model=RerankingModel(reranking_provider_name="", reranking_model_name=""),
|
reranking_model=RerankingModel(reranking_provider_name="", reranking_model_name=""),
|
||||||
top_k=2,
|
top_k=4,
|
||||||
score_threshold_enabled=False,
|
score_threshold_enabled=False,
|
||||||
)
|
)
|
||||||
# save dataset
|
# save dataset
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ default_retrieval_model = {
|
|||||||
"search_method": RetrievalMethod.SEMANTIC_SEARCH.value,
|
"search_method": RetrievalMethod.SEMANTIC_SEARCH.value,
|
||||||
"reranking_enable": False,
|
"reranking_enable": False,
|
||||||
"reranking_model": {"reranking_provider_name": "", "reranking_model_name": ""},
|
"reranking_model": {"reranking_provider_name": "", "reranking_model_name": ""},
|
||||||
"top_k": 2,
|
"top_k": 4,
|
||||||
"score_threshold_enabled": False,
|
"score_threshold_enabled": False,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -66,7 +66,7 @@ class HitTestingService:
|
|||||||
retrieval_method=retrieval_model.get("search_method", "semantic_search"),
|
retrieval_method=retrieval_model.get("search_method", "semantic_search"),
|
||||||
dataset_id=dataset.id,
|
dataset_id=dataset.id,
|
||||||
query=query,
|
query=query,
|
||||||
top_k=retrieval_model.get("top_k", 2),
|
top_k=retrieval_model.get("top_k", 4),
|
||||||
score_threshold=retrieval_model.get("score_threshold", 0.0)
|
score_threshold=retrieval_model.get("score_threshold", 0.0)
|
||||||
if retrieval_model["score_threshold_enabled"]
|
if retrieval_model["score_threshold_enabled"]
|
||||||
else 0.0,
|
else 0.0,
|
||||||
|
|||||||
30
api/tests/integration_tests/vdb/pinecone/test_pinecone.py
Normal file
30
api/tests/integration_tests/vdb/pinecone/test_pinecone.py
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
from core.rag.datasource.vdb.pinecone.pinecone_vector import PineconeConfig, PineconeVector
|
||||||
|
from core.rag.models.document import Document
|
||||||
|
from tests.integration_tests.vdb.test_vector_store import (
|
||||||
|
AbstractVectorTest,
|
||||||
|
setup_mock_redis,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class PineconeVectorTest(AbstractVectorTest):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self.attributes = ["doc_id", "dataset_id", "document_id", "doc_hash"]
|
||||||
|
self.vector = PineconeVector(
|
||||||
|
collection_name=self.collection_name,
|
||||||
|
group_id=self.dataset_id,
|
||||||
|
config=PineconeConfig(
|
||||||
|
api_key="test_api_key",
|
||||||
|
environment="test_environment",
|
||||||
|
index_name="test_index",
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
def search_by_vector(self):
|
||||||
|
super().search_by_vector()
|
||||||
|
|
||||||
|
|
||||||
|
def test_pinecone_vector(setup_mock_redis):
|
||||||
|
|
||||||
|
|
||||||
|
PineconeVectorTest().run_all_tests()
|
||||||
4849
api/uv.lock
generated
4849
api/uv.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -20,7 +20,17 @@ services:
|
|||||||
ports:
|
ports:
|
||||||
- "${EXPOSE_POSTGRES_PORT:-5432}:5432"
|
- "${EXPOSE_POSTGRES_PORT:-5432}:5432"
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: [ 'CMD', 'pg_isready', '-h', 'db', '-U', '${PGUSER:-postgres}', '-d', '${POSTGRES_DB:-dify}' ]
|
test:
|
||||||
|
[
|
||||||
|
"CMD",
|
||||||
|
"pg_isready",
|
||||||
|
"-h",
|
||||||
|
"db",
|
||||||
|
"-U",
|
||||||
|
"${PGUSER:-postgres}",
|
||||||
|
"-d",
|
||||||
|
"${POSTGRES_DB:-dify}",
|
||||||
|
]
|
||||||
interval: 1s
|
interval: 1s
|
||||||
timeout: 3s
|
timeout: 3s
|
||||||
retries: 30
|
retries: 30
|
||||||
@@ -41,7 +51,11 @@ services:
|
|||||||
ports:
|
ports:
|
||||||
- "${EXPOSE_REDIS_PORT:-6379}:6379"
|
- "${EXPOSE_REDIS_PORT:-6379}:6379"
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: [ 'CMD-SHELL', 'redis-cli -a ${REDIS_PASSWORD:-difyai123456} ping | grep -q PONG' ]
|
test:
|
||||||
|
[
|
||||||
|
"CMD-SHELL",
|
||||||
|
"redis-cli -a ${REDIS_PASSWORD:-difyai123456} ping | grep -q PONG",
|
||||||
|
]
|
||||||
|
|
||||||
# The DifySandbox
|
# The DifySandbox
|
||||||
sandbox:
|
sandbox:
|
||||||
@@ -65,13 +79,13 @@ services:
|
|||||||
- ./volumes/sandbox/dependencies:/dependencies
|
- ./volumes/sandbox/dependencies:/dependencies
|
||||||
- ./volumes/sandbox/conf:/conf
|
- ./volumes/sandbox/conf:/conf
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: [ "CMD", "curl", "-f", "http://localhost:8194/health" ]
|
test: ["CMD", "curl", "-f", "http://localhost:8194/health"]
|
||||||
networks:
|
networks:
|
||||||
- ssrf_proxy_network
|
- ssrf_proxy_network
|
||||||
|
|
||||||
# plugin daemon
|
# plugin daemon
|
||||||
plugin_daemon:
|
plugin_daemon:
|
||||||
image: langgenius/dify-plugin-daemon:0.2.0-local
|
image: langgenius/dify-plugin-daemon:0.3.0-local
|
||||||
restart: always
|
restart: always
|
||||||
env_file:
|
env_file:
|
||||||
- ./middleware.env
|
- ./middleware.env
|
||||||
@@ -94,7 +108,6 @@ services:
|
|||||||
PLUGIN_REMOTE_INSTALLING_HOST: ${PLUGIN_DEBUGGING_HOST:-0.0.0.0}
|
PLUGIN_REMOTE_INSTALLING_HOST: ${PLUGIN_DEBUGGING_HOST:-0.0.0.0}
|
||||||
PLUGIN_REMOTE_INSTALLING_PORT: ${PLUGIN_DEBUGGING_PORT:-5003}
|
PLUGIN_REMOTE_INSTALLING_PORT: ${PLUGIN_DEBUGGING_PORT:-5003}
|
||||||
PLUGIN_WORKING_PATH: ${PLUGIN_WORKING_PATH:-/app/storage/cwd}
|
PLUGIN_WORKING_PATH: ${PLUGIN_WORKING_PATH:-/app/storage/cwd}
|
||||||
FORCE_VERIFYING_SIGNATURE: ${FORCE_VERIFYING_SIGNATURE:-true}
|
|
||||||
PYTHON_ENV_INIT_TIMEOUT: ${PLUGIN_PYTHON_ENV_INIT_TIMEOUT:-120}
|
PYTHON_ENV_INIT_TIMEOUT: ${PLUGIN_PYTHON_ENV_INIT_TIMEOUT:-120}
|
||||||
PLUGIN_MAX_EXECUTION_TIMEOUT: ${PLUGIN_MAX_EXECUTION_TIMEOUT:-600}
|
PLUGIN_MAX_EXECUTION_TIMEOUT: ${PLUGIN_MAX_EXECUTION_TIMEOUT:-600}
|
||||||
PIP_MIRROR_URL: ${PIP_MIRROR_URL:-}
|
PIP_MIRROR_URL: ${PIP_MIRROR_URL:-}
|
||||||
@@ -126,6 +139,9 @@ services:
|
|||||||
VOLCENGINE_TOS_ACCESS_KEY: ${PLUGIN_VOLCENGINE_TOS_ACCESS_KEY:-}
|
VOLCENGINE_TOS_ACCESS_KEY: ${PLUGIN_VOLCENGINE_TOS_ACCESS_KEY:-}
|
||||||
VOLCENGINE_TOS_SECRET_KEY: ${PLUGIN_VOLCENGINE_TOS_SECRET_KEY:-}
|
VOLCENGINE_TOS_SECRET_KEY: ${PLUGIN_VOLCENGINE_TOS_SECRET_KEY:-}
|
||||||
VOLCENGINE_TOS_REGION: ${PLUGIN_VOLCENGINE_TOS_REGION:-}
|
VOLCENGINE_TOS_REGION: ${PLUGIN_VOLCENGINE_TOS_REGION:-}
|
||||||
|
THIRD_PARTY_SIGNATURE_VERIFICATION_ENABLED: true
|
||||||
|
THIRD_PARTY_SIGNATURE_VERIFICATION_PUBLIC_KEYS: /app/keys/publickey.pem
|
||||||
|
FORCE_VERIFYING_SIGNATURE: false
|
||||||
ports:
|
ports:
|
||||||
- "${EXPOSE_PLUGIN_DAEMON_PORT:-5002}:${PLUGIN_DAEMON_PORT:-5002}"
|
- "${EXPOSE_PLUGIN_DAEMON_PORT:-5002}:${PLUGIN_DAEMON_PORT:-5002}"
|
||||||
- "${EXPOSE_PLUGIN_DEBUGGING_PORT:-5003}:${PLUGIN_DEBUGGING_PORT:-5003}"
|
- "${EXPOSE_PLUGIN_DEBUGGING_PORT:-5003}:${PLUGIN_DEBUGGING_PORT:-5003}"
|
||||||
@@ -141,7 +157,12 @@ services:
|
|||||||
volumes:
|
volumes:
|
||||||
- ./ssrf_proxy/squid.conf.template:/etc/squid/squid.conf.template
|
- ./ssrf_proxy/squid.conf.template:/etc/squid/squid.conf.template
|
||||||
- ./ssrf_proxy/docker-entrypoint.sh:/docker-entrypoint-mount.sh
|
- ./ssrf_proxy/docker-entrypoint.sh:/docker-entrypoint-mount.sh
|
||||||
entrypoint: [ "sh", "-c", "cp /docker-entrypoint-mount.sh /docker-entrypoint.sh && sed -i 's/\r$$//' /docker-entrypoint.sh && chmod +x /docker-entrypoint.sh && /docker-entrypoint.sh" ]
|
entrypoint:
|
||||||
|
[
|
||||||
|
"sh",
|
||||||
|
"-c",
|
||||||
|
"cp /docker-entrypoint-mount.sh /docker-entrypoint.sh && sed -i 's/\r$$//' /docker-entrypoint.sh && chmod +x /docker-entrypoint.sh && /docker-entrypoint.sh",
|
||||||
|
]
|
||||||
env_file:
|
env_file:
|
||||||
- ./middleware.env
|
- ./middleware.env
|
||||||
environment:
|
environment:
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ x-shared-env: &shared-api-worker-env
|
|||||||
SERVICE_API_URL: ${SERVICE_API_URL:-}
|
SERVICE_API_URL: ${SERVICE_API_URL:-}
|
||||||
APP_API_URL: ${APP_API_URL:-}
|
APP_API_URL: ${APP_API_URL:-}
|
||||||
APP_WEB_URL: ${APP_WEB_URL:-}
|
APP_WEB_URL: ${APP_WEB_URL:-}
|
||||||
FILES_URL: ${FILES_URL:-}
|
FILES_URL: ${FILES_URL:-http://api:5001}
|
||||||
INTERNAL_FILES_URL: ${INTERNAL_FILES_URL:-}
|
INTERNAL_FILES_URL: ${INTERNAL_FILES_URL:-}
|
||||||
LANG: ${LANG:-en_US.UTF-8}
|
LANG: ${LANG:-en_US.UTF-8}
|
||||||
LC_ALL: ${LC_ALL:-en_US.UTF-8}
|
LC_ALL: ${LC_ALL:-en_US.UTF-8}
|
||||||
@@ -62,6 +62,7 @@ x-shared-env: &shared-api-worker-env
|
|||||||
SQLALCHEMY_ECHO: ${SQLALCHEMY_ECHO:-false}
|
SQLALCHEMY_ECHO: ${SQLALCHEMY_ECHO:-false}
|
||||||
SQLALCHEMY_POOL_PRE_PING: ${SQLALCHEMY_POOL_PRE_PING:-false}
|
SQLALCHEMY_POOL_PRE_PING: ${SQLALCHEMY_POOL_PRE_PING:-false}
|
||||||
SQLALCHEMY_POOL_USE_LIFO: ${SQLALCHEMY_POOL_USE_LIFO:-false}
|
SQLALCHEMY_POOL_USE_LIFO: ${SQLALCHEMY_POOL_USE_LIFO:-false}
|
||||||
|
SQLALCHEMY_POOL_TIMEOUT: ${SQLALCHEMY_POOL_TIMEOUT:-30}
|
||||||
POSTGRES_MAX_CONNECTIONS: ${POSTGRES_MAX_CONNECTIONS:-100}
|
POSTGRES_MAX_CONNECTIONS: ${POSTGRES_MAX_CONNECTIONS:-100}
|
||||||
POSTGRES_SHARED_BUFFERS: ${POSTGRES_SHARED_BUFFERS:-128MB}
|
POSTGRES_SHARED_BUFFERS: ${POSTGRES_SHARED_BUFFERS:-128MB}
|
||||||
POSTGRES_WORK_MEM: ${POSTGRES_WORK_MEM:-4MB}
|
POSTGRES_WORK_MEM: ${POSTGRES_WORK_MEM:-4MB}
|
||||||
@@ -285,6 +286,8 @@ x-shared-env: &shared-api-worker-env
|
|||||||
BAIDU_VECTOR_DB_DATABASE: ${BAIDU_VECTOR_DB_DATABASE:-dify}
|
BAIDU_VECTOR_DB_DATABASE: ${BAIDU_VECTOR_DB_DATABASE:-dify}
|
||||||
BAIDU_VECTOR_DB_SHARD: ${BAIDU_VECTOR_DB_SHARD:-1}
|
BAIDU_VECTOR_DB_SHARD: ${BAIDU_VECTOR_DB_SHARD:-1}
|
||||||
BAIDU_VECTOR_DB_REPLICAS: ${BAIDU_VECTOR_DB_REPLICAS:-3}
|
BAIDU_VECTOR_DB_REPLICAS: ${BAIDU_VECTOR_DB_REPLICAS:-3}
|
||||||
|
BAIDU_VECTOR_DB_INVERTED_INDEX_ANALYZER: ${BAIDU_VECTOR_DB_INVERTED_INDEX_ANALYZER:-DEFAULT_ANALYZER}
|
||||||
|
BAIDU_VECTOR_DB_INVERTED_INDEX_PARSER_MODE: ${BAIDU_VECTOR_DB_INVERTED_INDEX_PARSER_MODE:-COARSE_MODE}
|
||||||
VIKINGDB_ACCESS_KEY: ${VIKINGDB_ACCESS_KEY:-your-ak}
|
VIKINGDB_ACCESS_KEY: ${VIKINGDB_ACCESS_KEY:-your-ak}
|
||||||
VIKINGDB_SECRET_KEY: ${VIKINGDB_SECRET_KEY:-your-sk}
|
VIKINGDB_SECRET_KEY: ${VIKINGDB_SECRET_KEY:-your-sk}
|
||||||
VIKINGDB_REGION: ${VIKINGDB_REGION:-cn-shanghai}
|
VIKINGDB_REGION: ${VIKINGDB_REGION:-cn-shanghai}
|
||||||
@@ -292,9 +295,10 @@ x-shared-env: &shared-api-worker-env
|
|||||||
VIKINGDB_SCHEMA: ${VIKINGDB_SCHEMA:-http}
|
VIKINGDB_SCHEMA: ${VIKINGDB_SCHEMA:-http}
|
||||||
VIKINGDB_CONNECTION_TIMEOUT: ${VIKINGDB_CONNECTION_TIMEOUT:-30}
|
VIKINGDB_CONNECTION_TIMEOUT: ${VIKINGDB_CONNECTION_TIMEOUT:-30}
|
||||||
VIKINGDB_SOCKET_TIMEOUT: ${VIKINGDB_SOCKET_TIMEOUT:-30}
|
VIKINGDB_SOCKET_TIMEOUT: ${VIKINGDB_SOCKET_TIMEOUT:-30}
|
||||||
LINDORM_URL: ${LINDORM_URL:-http://lindorm:30070}
|
LINDORM_URL: ${LINDORM_URL:-http://localhost:30070}
|
||||||
LINDORM_USERNAME: ${LINDORM_USERNAME:-lindorm}
|
LINDORM_USERNAME: ${LINDORM_USERNAME:-admin}
|
||||||
LINDORM_PASSWORD: ${LINDORM_PASSWORD:-lindorm}
|
LINDORM_PASSWORD: ${LINDORM_PASSWORD:-admin}
|
||||||
|
LINDORM_USING_UGC: ${LINDORM_USING_UGC:-True}
|
||||||
LINDORM_QUERY_TIMEOUT: ${LINDORM_QUERY_TIMEOUT:-1}
|
LINDORM_QUERY_TIMEOUT: ${LINDORM_QUERY_TIMEOUT:-1}
|
||||||
OCEANBASE_VECTOR_HOST: ${OCEANBASE_VECTOR_HOST:-oceanbase}
|
OCEANBASE_VECTOR_HOST: ${OCEANBASE_VECTOR_HOST:-oceanbase}
|
||||||
OCEANBASE_VECTOR_PORT: ${OCEANBASE_VECTOR_PORT:-2881}
|
OCEANBASE_VECTOR_PORT: ${OCEANBASE_VECTOR_PORT:-2881}
|
||||||
@@ -304,6 +308,7 @@ x-shared-env: &shared-api-worker-env
|
|||||||
OCEANBASE_CLUSTER_NAME: ${OCEANBASE_CLUSTER_NAME:-difyai}
|
OCEANBASE_CLUSTER_NAME: ${OCEANBASE_CLUSTER_NAME:-difyai}
|
||||||
OCEANBASE_MEMORY_LIMIT: ${OCEANBASE_MEMORY_LIMIT:-6G}
|
OCEANBASE_MEMORY_LIMIT: ${OCEANBASE_MEMORY_LIMIT:-6G}
|
||||||
OCEANBASE_ENABLE_HYBRID_SEARCH: ${OCEANBASE_ENABLE_HYBRID_SEARCH:-false}
|
OCEANBASE_ENABLE_HYBRID_SEARCH: ${OCEANBASE_ENABLE_HYBRID_SEARCH:-false}
|
||||||
|
OCEANBASE_FULLTEXT_PARSER: ${OCEANBASE_FULLTEXT_PARSER:-ik}
|
||||||
OPENGAUSS_HOST: ${OPENGAUSS_HOST:-opengauss}
|
OPENGAUSS_HOST: ${OPENGAUSS_HOST:-opengauss}
|
||||||
OPENGAUSS_PORT: ${OPENGAUSS_PORT:-6600}
|
OPENGAUSS_PORT: ${OPENGAUSS_PORT:-6600}
|
||||||
OPENGAUSS_USER: ${OPENGAUSS_USER:-postgres}
|
OPENGAUSS_USER: ${OPENGAUSS_USER:-postgres}
|
||||||
@@ -372,6 +377,7 @@ x-shared-env: &shared-api-worker-env
|
|||||||
INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH: ${INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH:-4000}
|
INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH: ${INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH:-4000}
|
||||||
INVITE_EXPIRY_HOURS: ${INVITE_EXPIRY_HOURS:-72}
|
INVITE_EXPIRY_HOURS: ${INVITE_EXPIRY_HOURS:-72}
|
||||||
RESET_PASSWORD_TOKEN_EXPIRY_MINUTES: ${RESET_PASSWORD_TOKEN_EXPIRY_MINUTES:-5}
|
RESET_PASSWORD_TOKEN_EXPIRY_MINUTES: ${RESET_PASSWORD_TOKEN_EXPIRY_MINUTES:-5}
|
||||||
|
EMAIL_REGISTER_TOKEN_EXPIRY_MINUTES: ${EMAIL_REGISTER_TOKEN_EXPIRY_MINUTES:-5}
|
||||||
CHANGE_EMAIL_TOKEN_EXPIRY_MINUTES: ${CHANGE_EMAIL_TOKEN_EXPIRY_MINUTES:-5}
|
CHANGE_EMAIL_TOKEN_EXPIRY_MINUTES: ${CHANGE_EMAIL_TOKEN_EXPIRY_MINUTES:-5}
|
||||||
OWNER_TRANSFER_TOKEN_EXPIRY_MINUTES: ${OWNER_TRANSFER_TOKEN_EXPIRY_MINUTES:-5}
|
OWNER_TRANSFER_TOKEN_EXPIRY_MINUTES: ${OWNER_TRANSFER_TOKEN_EXPIRY_MINUTES:-5}
|
||||||
CODE_EXECUTION_ENDPOINT: ${CODE_EXECUTION_ENDPOINT:-http://sandbox:8194}
|
CODE_EXECUTION_ENDPOINT: ${CODE_EXECUTION_ENDPOINT:-http://sandbox:8194}
|
||||||
@@ -394,6 +400,10 @@ x-shared-env: &shared-api-worker-env
|
|||||||
MAX_VARIABLE_SIZE: ${MAX_VARIABLE_SIZE:-204800}
|
MAX_VARIABLE_SIZE: ${MAX_VARIABLE_SIZE:-204800}
|
||||||
WORKFLOW_PARALLEL_DEPTH_LIMIT: ${WORKFLOW_PARALLEL_DEPTH_LIMIT:-3}
|
WORKFLOW_PARALLEL_DEPTH_LIMIT: ${WORKFLOW_PARALLEL_DEPTH_LIMIT:-3}
|
||||||
WORKFLOW_FILE_UPLOAD_LIMIT: ${WORKFLOW_FILE_UPLOAD_LIMIT:-10}
|
WORKFLOW_FILE_UPLOAD_LIMIT: ${WORKFLOW_FILE_UPLOAD_LIMIT:-10}
|
||||||
|
GRAPH_ENGINE_MIN_WORKERS: ${GRAPH_ENGINE_MIN_WORKERS:-1}
|
||||||
|
GRAPH_ENGINE_MAX_WORKERS: ${GRAPH_ENGINE_MAX_WORKERS:-10}
|
||||||
|
GRAPH_ENGINE_SCALE_UP_THRESHOLD: ${GRAPH_ENGINE_SCALE_UP_THRESHOLD:-3}
|
||||||
|
GRAPH_ENGINE_SCALE_DOWN_IDLE_TIME: ${GRAPH_ENGINE_SCALE_DOWN_IDLE_TIME:-5.0}
|
||||||
WORKFLOW_NODE_EXECUTION_STORAGE: ${WORKFLOW_NODE_EXECUTION_STORAGE:-rdbms}
|
WORKFLOW_NODE_EXECUTION_STORAGE: ${WORKFLOW_NODE_EXECUTION_STORAGE:-rdbms}
|
||||||
CORE_WORKFLOW_EXECUTION_REPOSITORY: ${CORE_WORKFLOW_EXECUTION_REPOSITORY:-core.repositories.sqlalchemy_workflow_execution_repository.SQLAlchemyWorkflowExecutionRepository}
|
CORE_WORKFLOW_EXECUTION_REPOSITORY: ${CORE_WORKFLOW_EXECUTION_REPOSITORY:-core.repositories.sqlalchemy_workflow_execution_repository.SQLAlchemyWorkflowExecutionRepository}
|
||||||
CORE_WORKFLOW_NODE_EXECUTION_REPOSITORY: ${CORE_WORKFLOW_NODE_EXECUTION_REPOSITORY:-core.repositories.sqlalchemy_workflow_node_execution_repository.SQLAlchemyWorkflowNodeExecutionRepository}
|
CORE_WORKFLOW_NODE_EXECUTION_REPOSITORY: ${CORE_WORKFLOW_NODE_EXECUTION_REPOSITORY:-core.repositories.sqlalchemy_workflow_node_execution_repository.SQLAlchemyWorkflowNodeExecutionRepository}
|
||||||
@@ -570,6 +580,7 @@ x-shared-env: &shared-api-worker-env
|
|||||||
QUEUE_MONITOR_INTERVAL: ${QUEUE_MONITOR_INTERVAL:-30}
|
QUEUE_MONITOR_INTERVAL: ${QUEUE_MONITOR_INTERVAL:-30}
|
||||||
SWAGGER_UI_ENABLED: ${SWAGGER_UI_ENABLED:-true}
|
SWAGGER_UI_ENABLED: ${SWAGGER_UI_ENABLED:-true}
|
||||||
SWAGGER_UI_PATH: ${SWAGGER_UI_PATH:-/swagger-ui.html}
|
SWAGGER_UI_PATH: ${SWAGGER_UI_PATH:-/swagger-ui.html}
|
||||||
|
DSL_EXPORT_ENCRYPT_DATASET_ID: ${DSL_EXPORT_ENCRYPT_DATASET_ID:-true}
|
||||||
ENABLE_CLEAN_EMBEDDING_CACHE_TASK: ${ENABLE_CLEAN_EMBEDDING_CACHE_TASK:-false}
|
ENABLE_CLEAN_EMBEDDING_CACHE_TASK: ${ENABLE_CLEAN_EMBEDDING_CACHE_TASK:-false}
|
||||||
ENABLE_CLEAN_UNUSED_DATASETS_TASK: ${ENABLE_CLEAN_UNUSED_DATASETS_TASK:-false}
|
ENABLE_CLEAN_UNUSED_DATASETS_TASK: ${ENABLE_CLEAN_UNUSED_DATASETS_TASK:-false}
|
||||||
ENABLE_CREATE_TIDB_SERVERLESS_TASK: ${ENABLE_CREATE_TIDB_SERVERLESS_TASK:-false}
|
ENABLE_CREATE_TIDB_SERVERLESS_TASK: ${ENABLE_CREATE_TIDB_SERVERLESS_TASK:-false}
|
||||||
@@ -582,7 +593,7 @@ x-shared-env: &shared-api-worker-env
|
|||||||
services:
|
services:
|
||||||
# API service
|
# API service
|
||||||
api:
|
api:
|
||||||
image: langgenius/dify-api:1.8.0
|
image: langgenius/dify-api:1.9.0
|
||||||
restart: always
|
restart: always
|
||||||
environment:
|
environment:
|
||||||
# Use the shared environment variables.
|
# Use the shared environment variables.
|
||||||
@@ -611,7 +622,7 @@ services:
|
|||||||
# worker service
|
# worker service
|
||||||
# The Celery worker for processing the queue.
|
# The Celery worker for processing the queue.
|
||||||
worker:
|
worker:
|
||||||
image: langgenius/dify-api:1.8.0
|
image: langgenius/dify-api:1.9.0
|
||||||
restart: always
|
restart: always
|
||||||
environment:
|
environment:
|
||||||
# Use the shared environment variables.
|
# Use the shared environment variables.
|
||||||
@@ -638,7 +649,7 @@ services:
|
|||||||
# worker_beat service
|
# worker_beat service
|
||||||
# Celery beat for scheduling periodic tasks.
|
# Celery beat for scheduling periodic tasks.
|
||||||
worker_beat:
|
worker_beat:
|
||||||
image: langgenius/dify-api:1.8.0
|
image: langgenius/dify-api:1.9.0
|
||||||
restart: always
|
restart: always
|
||||||
environment:
|
environment:
|
||||||
# Use the shared environment variables.
|
# Use the shared environment variables.
|
||||||
@@ -656,7 +667,7 @@ services:
|
|||||||
|
|
||||||
# Frontend web application.
|
# Frontend web application.
|
||||||
web:
|
web:
|
||||||
image: langgenius/dify-web:1.8.0
|
image: langgenius/dify-web:1.9.0
|
||||||
restart: always
|
restart: always
|
||||||
environment:
|
environment:
|
||||||
CONSOLE_API_URL: ${CONSOLE_API_URL:-}
|
CONSOLE_API_URL: ${CONSOLE_API_URL:-}
|
||||||
@@ -698,7 +709,17 @@ services:
|
|||||||
volumes:
|
volumes:
|
||||||
- ./volumes/db/data:/var/lib/postgresql/data
|
- ./volumes/db/data:/var/lib/postgresql/data
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: [ 'CMD', 'pg_isready', '-h', 'db', '-U', '${PGUSER:-postgres}', '-d', '${POSTGRES_DB:-dify}' ]
|
test:
|
||||||
|
[
|
||||||
|
"CMD",
|
||||||
|
"pg_isready",
|
||||||
|
"-h",
|
||||||
|
"db",
|
||||||
|
"-U",
|
||||||
|
"${PGUSER:-postgres}",
|
||||||
|
"-d",
|
||||||
|
"${POSTGRES_DB:-dify}",
|
||||||
|
]
|
||||||
interval: 1s
|
interval: 1s
|
||||||
timeout: 3s
|
timeout: 3s
|
||||||
retries: 60
|
retries: 60
|
||||||
@@ -715,7 +736,11 @@ services:
|
|||||||
# Set the redis password when startup redis server.
|
# Set the redis password when startup redis server.
|
||||||
command: redis-server --requirepass ${REDIS_PASSWORD:-difyai123456}
|
command: redis-server --requirepass ${REDIS_PASSWORD:-difyai123456}
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: [ 'CMD-SHELL', 'redis-cli -a ${REDIS_PASSWORD:-difyai123456} ping | grep -q PONG' ]
|
test:
|
||||||
|
[
|
||||||
|
"CMD-SHELL",
|
||||||
|
"redis-cli -a ${REDIS_PASSWORD:-difyai123456} ping | grep -q PONG",
|
||||||
|
]
|
||||||
|
|
||||||
# The DifySandbox
|
# The DifySandbox
|
||||||
sandbox:
|
sandbox:
|
||||||
@@ -737,13 +762,13 @@ services:
|
|||||||
- ./volumes/sandbox/dependencies:/dependencies
|
- ./volumes/sandbox/dependencies:/dependencies
|
||||||
- ./volumes/sandbox/conf:/conf
|
- ./volumes/sandbox/conf:/conf
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: [ 'CMD', 'curl', '-f', 'http://localhost:8194/health' ]
|
test: ["CMD", "curl", "-f", "http://localhost:8194/health"]
|
||||||
networks:
|
networks:
|
||||||
- ssrf_proxy_network
|
- ssrf_proxy_network
|
||||||
|
|
||||||
# plugin daemon
|
# plugin daemon
|
||||||
plugin_daemon:
|
plugin_daemon:
|
||||||
image: langgenius/dify-plugin-daemon:0.2.0-local
|
image: langgenius/dify-plugin-daemon:0.3.0-local
|
||||||
restart: always
|
restart: always
|
||||||
environment:
|
environment:
|
||||||
# Use the shared environment variables.
|
# Use the shared environment variables.
|
||||||
@@ -811,7 +836,12 @@ services:
|
|||||||
volumes:
|
volumes:
|
||||||
- ./ssrf_proxy/squid.conf.template:/etc/squid/squid.conf.template
|
- ./ssrf_proxy/squid.conf.template:/etc/squid/squid.conf.template
|
||||||
- ./ssrf_proxy/docker-entrypoint.sh:/docker-entrypoint-mount.sh
|
- ./ssrf_proxy/docker-entrypoint.sh:/docker-entrypoint-mount.sh
|
||||||
entrypoint: [ 'sh', '-c', "cp /docker-entrypoint-mount.sh /docker-entrypoint.sh && sed -i 's/\r$$//' /docker-entrypoint.sh && chmod +x /docker-entrypoint.sh && /docker-entrypoint.sh" ]
|
entrypoint:
|
||||||
|
[
|
||||||
|
"sh",
|
||||||
|
"-c",
|
||||||
|
"cp /docker-entrypoint-mount.sh /docker-entrypoint.sh && sed -i 's/\r$$//' /docker-entrypoint.sh && chmod +x /docker-entrypoint.sh && /docker-entrypoint.sh",
|
||||||
|
]
|
||||||
environment:
|
environment:
|
||||||
# pls clearly modify the squid env vars to fit your network environment.
|
# pls clearly modify the squid env vars to fit your network environment.
|
||||||
HTTP_PORT: ${SSRF_HTTP_PORT:-3128}
|
HTTP_PORT: ${SSRF_HTTP_PORT:-3128}
|
||||||
@@ -840,8 +870,8 @@ services:
|
|||||||
- CERTBOT_EMAIL=${CERTBOT_EMAIL}
|
- CERTBOT_EMAIL=${CERTBOT_EMAIL}
|
||||||
- CERTBOT_DOMAIN=${CERTBOT_DOMAIN}
|
- CERTBOT_DOMAIN=${CERTBOT_DOMAIN}
|
||||||
- CERTBOT_OPTIONS=${CERTBOT_OPTIONS:-}
|
- CERTBOT_OPTIONS=${CERTBOT_OPTIONS:-}
|
||||||
entrypoint: [ '/docker-entrypoint.sh' ]
|
entrypoint: ["/docker-entrypoint.sh"]
|
||||||
command: [ 'tail', '-f', '/dev/null' ]
|
command: ["tail", "-f", "/dev/null"]
|
||||||
|
|
||||||
# The nginx reverse proxy.
|
# The nginx reverse proxy.
|
||||||
# used for reverse proxying the API service and Web service.
|
# used for reverse proxying the API service and Web service.
|
||||||
@@ -858,7 +888,12 @@ services:
|
|||||||
- ./volumes/certbot/conf/live:/etc/letsencrypt/live # cert dir (with certbot container)
|
- ./volumes/certbot/conf/live:/etc/letsencrypt/live # cert dir (with certbot container)
|
||||||
- ./volumes/certbot/conf:/etc/letsencrypt
|
- ./volumes/certbot/conf:/etc/letsencrypt
|
||||||
- ./volumes/certbot/www:/var/www/html
|
- ./volumes/certbot/www:/var/www/html
|
||||||
entrypoint: [ 'sh', '-c', "cp /docker-entrypoint-mount.sh /docker-entrypoint.sh && sed -i 's/\r$$//' /docker-entrypoint.sh && chmod +x /docker-entrypoint.sh && /docker-entrypoint.sh" ]
|
entrypoint:
|
||||||
|
[
|
||||||
|
"sh",
|
||||||
|
"-c",
|
||||||
|
"cp /docker-entrypoint-mount.sh /docker-entrypoint.sh && sed -i 's/\r$$//' /docker-entrypoint.sh && chmod +x /docker-entrypoint.sh && /docker-entrypoint.sh",
|
||||||
|
]
|
||||||
environment:
|
environment:
|
||||||
NGINX_SERVER_NAME: ${NGINX_SERVER_NAME:-_}
|
NGINX_SERVER_NAME: ${NGINX_SERVER_NAME:-_}
|
||||||
NGINX_HTTPS_ENABLED: ${NGINX_HTTPS_ENABLED:-false}
|
NGINX_HTTPS_ENABLED: ${NGINX_HTTPS_ENABLED:-false}
|
||||||
@@ -880,14 +915,14 @@ services:
|
|||||||
- api
|
- api
|
||||||
- web
|
- web
|
||||||
ports:
|
ports:
|
||||||
- '${EXPOSE_NGINX_PORT:-80}:${NGINX_PORT:-80}'
|
- "${EXPOSE_NGINX_PORT:-80}:${NGINX_PORT:-80}"
|
||||||
- '${EXPOSE_NGINX_SSL_PORT:-443}:${NGINX_SSL_PORT:-443}'
|
- "${EXPOSE_NGINX_SSL_PORT:-443}:${NGINX_SSL_PORT:-443}"
|
||||||
|
|
||||||
# The Weaviate vector store.
|
# The Weaviate vector store.
|
||||||
weaviate:
|
weaviate:
|
||||||
image: semitechnologies/weaviate:1.19.0
|
image: semitechnologies/weaviate:1.19.0
|
||||||
profiles:
|
profiles:
|
||||||
- ''
|
- ""
|
||||||
- weaviate
|
- weaviate
|
||||||
restart: always
|
restart: always
|
||||||
volumes:
|
volumes:
|
||||||
@@ -940,13 +975,17 @@ services:
|
|||||||
working_dir: /opt/couchbase
|
working_dir: /opt/couchbase
|
||||||
stdin_open: true
|
stdin_open: true
|
||||||
tty: true
|
tty: true
|
||||||
entrypoint: [ "" ]
|
entrypoint: [""]
|
||||||
command: sh -c "/opt/couchbase/init/init-cbserver.sh"
|
command: sh -c "/opt/couchbase/init/init-cbserver.sh"
|
||||||
volumes:
|
volumes:
|
||||||
- ./volumes/couchbase/data:/opt/couchbase/var/lib/couchbase/data
|
- ./volumes/couchbase/data:/opt/couchbase/var/lib/couchbase/data
|
||||||
healthcheck:
|
healthcheck:
|
||||||
# ensure bucket was created before proceeding
|
# ensure bucket was created before proceeding
|
||||||
test: [ "CMD-SHELL", "curl -s -f -u Administrator:password http://localhost:8091/pools/default/buckets | grep -q '\\[{' || exit 1" ]
|
test:
|
||||||
|
[
|
||||||
|
"CMD-SHELL",
|
||||||
|
"curl -s -f -u Administrator:password http://localhost:8091/pools/default/buckets | grep -q '\\[{' || exit 1",
|
||||||
|
]
|
||||||
interval: 10s
|
interval: 10s
|
||||||
retries: 10
|
retries: 10
|
||||||
start_period: 30s
|
start_period: 30s
|
||||||
@@ -972,9 +1011,9 @@ services:
|
|||||||
volumes:
|
volumes:
|
||||||
- ./volumes/pgvector/data:/var/lib/postgresql/data
|
- ./volumes/pgvector/data:/var/lib/postgresql/data
|
||||||
- ./pgvector/docker-entrypoint.sh:/docker-entrypoint.sh
|
- ./pgvector/docker-entrypoint.sh:/docker-entrypoint.sh
|
||||||
entrypoint: [ '/docker-entrypoint.sh' ]
|
entrypoint: ["/docker-entrypoint.sh"]
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: [ 'CMD', 'pg_isready' ]
|
test: ["CMD", "pg_isready"]
|
||||||
interval: 1s
|
interval: 1s
|
||||||
timeout: 3s
|
timeout: 3s
|
||||||
retries: 30
|
retries: 30
|
||||||
@@ -991,14 +1030,14 @@ services:
|
|||||||
- VB_USERNAME=dify
|
- VB_USERNAME=dify
|
||||||
- VB_PASSWORD=Difyai123456
|
- VB_PASSWORD=Difyai123456
|
||||||
ports:
|
ports:
|
||||||
- '5434:5432'
|
- "5434:5432"
|
||||||
volumes:
|
volumes:
|
||||||
- ./vastbase/lic:/home/vastbase/vastbase/lic
|
- ./vastbase/lic:/home/vastbase/vastbase/lic
|
||||||
- ./vastbase/data:/home/vastbase/data
|
- ./vastbase/data:/home/vastbase/data
|
||||||
- ./vastbase/backup:/home/vastbase/backup
|
- ./vastbase/backup:/home/vastbase/backup
|
||||||
- ./vastbase/backup_log:/home/vastbase/backup_log
|
- ./vastbase/backup_log:/home/vastbase/backup_log
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: [ 'CMD', 'pg_isready' ]
|
test: ["CMD", "pg_isready"]
|
||||||
interval: 1s
|
interval: 1s
|
||||||
timeout: 3s
|
timeout: 3s
|
||||||
retries: 30
|
retries: 30
|
||||||
@@ -1020,7 +1059,7 @@ services:
|
|||||||
volumes:
|
volumes:
|
||||||
- ./volumes/pgvecto_rs/data:/var/lib/postgresql/data
|
- ./volumes/pgvecto_rs/data:/var/lib/postgresql/data
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: [ 'CMD', 'pg_isready' ]
|
test: ["CMD", "pg_isready"]
|
||||||
interval: 1s
|
interval: 1s
|
||||||
timeout: 3s
|
timeout: 3s
|
||||||
retries: 30
|
retries: 30
|
||||||
@@ -1056,10 +1095,15 @@ services:
|
|||||||
OB_CLUSTER_NAME: ${OCEANBASE_CLUSTER_NAME:-difyai}
|
OB_CLUSTER_NAME: ${OCEANBASE_CLUSTER_NAME:-difyai}
|
||||||
OB_SERVER_IP: 127.0.0.1
|
OB_SERVER_IP: 127.0.0.1
|
||||||
MODE: mini
|
MODE: mini
|
||||||
|
LANG: en_US.UTF-8
|
||||||
ports:
|
ports:
|
||||||
- "${OCEANBASE_VECTOR_PORT:-2881}:2881"
|
- "${OCEANBASE_VECTOR_PORT:-2881}:2881"
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: [ 'CMD-SHELL', 'obclient -h127.0.0.1 -P2881 -uroot@test -p$${OB_TENANT_PASSWORD} -e "SELECT 1;"' ]
|
test:
|
||||||
|
[
|
||||||
|
"CMD-SHELL",
|
||||||
|
'obclient -h127.0.0.1 -P2881 -uroot@test -p$${OB_TENANT_PASSWORD} -e "SELECT 1;"',
|
||||||
|
]
|
||||||
interval: 10s
|
interval: 10s
|
||||||
retries: 30
|
retries: 30
|
||||||
start_period: 30s
|
start_period: 30s
|
||||||
@@ -1095,7 +1139,7 @@ services:
|
|||||||
- ./volumes/milvus/etcd:/etcd
|
- ./volumes/milvus/etcd:/etcd
|
||||||
command: etcd -advertise-client-urls=http://127.0.0.1:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd
|
command: etcd -advertise-client-urls=http://127.0.0.1:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: [ 'CMD', 'etcdctl', 'endpoint', 'health' ]
|
test: ["CMD", "etcdctl", "endpoint", "health"]
|
||||||
interval: 30s
|
interval: 30s
|
||||||
timeout: 20s
|
timeout: 20s
|
||||||
retries: 3
|
retries: 3
|
||||||
@@ -1114,7 +1158,7 @@ services:
|
|||||||
- ./volumes/milvus/minio:/minio_data
|
- ./volumes/milvus/minio:/minio_data
|
||||||
command: minio server /minio_data --console-address ":9001"
|
command: minio server /minio_data --console-address ":9001"
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: [ 'CMD', 'curl', '-f', 'http://localhost:9000/minio/health/live' ]
|
test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
|
||||||
interval: 30s
|
interval: 30s
|
||||||
timeout: 20s
|
timeout: 20s
|
||||||
retries: 3
|
retries: 3
|
||||||
@@ -1126,7 +1170,7 @@ services:
|
|||||||
image: milvusdb/milvus:v2.5.15
|
image: milvusdb/milvus:v2.5.15
|
||||||
profiles:
|
profiles:
|
||||||
- milvus
|
- milvus
|
||||||
command: [ 'milvus', 'run', 'standalone' ]
|
command: ["milvus", "run", "standalone"]
|
||||||
environment:
|
environment:
|
||||||
ETCD_ENDPOINTS: ${ETCD_ENDPOINTS:-etcd:2379}
|
ETCD_ENDPOINTS: ${ETCD_ENDPOINTS:-etcd:2379}
|
||||||
MINIO_ADDRESS: ${MINIO_ADDRESS:-minio:9000}
|
MINIO_ADDRESS: ${MINIO_ADDRESS:-minio:9000}
|
||||||
@@ -1134,7 +1178,7 @@ services:
|
|||||||
volumes:
|
volumes:
|
||||||
- ./volumes/milvus/milvus:/var/lib/milvus
|
- ./volumes/milvus/milvus:/var/lib/milvus
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: [ 'CMD', 'curl', '-f', 'http://localhost:9091/healthz' ]
|
test: ["CMD", "curl", "-f", "http://localhost:9091/healthz"]
|
||||||
interval: 30s
|
interval: 30s
|
||||||
start_period: 90s
|
start_period: 90s
|
||||||
timeout: 20s
|
timeout: 20s
|
||||||
@@ -1200,7 +1244,7 @@ services:
|
|||||||
volumes:
|
volumes:
|
||||||
- ./volumes/opengauss/data:/var/lib/opengauss/data
|
- ./volumes/opengauss/data:/var/lib/opengauss/data
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: [ "CMD-SHELL", "netstat -lntp | grep tcp6 > /dev/null 2>&1" ]
|
test: ["CMD-SHELL", "netstat -lntp | grep tcp6 > /dev/null 2>&1"]
|
||||||
interval: 10s
|
interval: 10s
|
||||||
timeout: 10s
|
timeout: 10s
|
||||||
retries: 10
|
retries: 10
|
||||||
@@ -1253,18 +1297,19 @@ services:
|
|||||||
node.name: dify-es0
|
node.name: dify-es0
|
||||||
discovery.type: single-node
|
discovery.type: single-node
|
||||||
xpack.license.self_generated.type: basic
|
xpack.license.self_generated.type: basic
|
||||||
xpack.security.enabled: 'true'
|
xpack.security.enabled: "true"
|
||||||
xpack.security.enrollment.enabled: 'false'
|
xpack.security.enrollment.enabled: "false"
|
||||||
xpack.security.http.ssl.enabled: 'false'
|
xpack.security.http.ssl.enabled: "false"
|
||||||
ports:
|
ports:
|
||||||
- ${ELASTICSEARCH_PORT:-9200}:9200
|
- ${ELASTICSEARCH_PORT:-9200}:9200
|
||||||
deploy:
|
deploy:
|
||||||
resources:
|
resources:
|
||||||
limits:
|
limits:
|
||||||
memory: 2g
|
memory: 2g
|
||||||
entrypoint: [ 'sh', '-c', "sh /docker-entrypoint-mount.sh" ]
|
entrypoint: ["sh", "-c", "sh /docker-entrypoint-mount.sh"]
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: [ 'CMD', 'curl', '-s', 'http://localhost:9200/_cluster/health?pretty' ]
|
test:
|
||||||
|
["CMD", "curl", "-s", "http://localhost:9200/_cluster/health?pretty"]
|
||||||
interval: 30s
|
interval: 30s
|
||||||
timeout: 10s
|
timeout: 10s
|
||||||
retries: 50
|
retries: 50
|
||||||
@@ -1282,17 +1327,17 @@ services:
|
|||||||
environment:
|
environment:
|
||||||
XPACK_ENCRYPTEDSAVEDOBJECTS_ENCRYPTIONKEY: d1a66dfd-c4d3-4a0a-8290-2abcb83ab3aa
|
XPACK_ENCRYPTEDSAVEDOBJECTS_ENCRYPTIONKEY: d1a66dfd-c4d3-4a0a-8290-2abcb83ab3aa
|
||||||
NO_PROXY: localhost,127.0.0.1,elasticsearch,kibana
|
NO_PROXY: localhost,127.0.0.1,elasticsearch,kibana
|
||||||
XPACK_SECURITY_ENABLED: 'true'
|
XPACK_SECURITY_ENABLED: "true"
|
||||||
XPACK_SECURITY_ENROLLMENT_ENABLED: 'false'
|
XPACK_SECURITY_ENROLLMENT_ENABLED: "false"
|
||||||
XPACK_SECURITY_HTTP_SSL_ENABLED: 'false'
|
XPACK_SECURITY_HTTP_SSL_ENABLED: "false"
|
||||||
XPACK_FLEET_ISAIRGAPPED: 'true'
|
XPACK_FLEET_ISAIRGAPPED: "true"
|
||||||
I18N_LOCALE: zh-CN
|
I18N_LOCALE: zh-CN
|
||||||
SERVER_PORT: '5601'
|
SERVER_PORT: "5601"
|
||||||
ELASTICSEARCH_HOSTS: http://elasticsearch:9200
|
ELASTICSEARCH_HOSTS: http://elasticsearch:9200
|
||||||
ports:
|
ports:
|
||||||
- ${KIBANA_PORT:-5601}:5601
|
- ${KIBANA_PORT:-5601}:5601
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: [ 'CMD-SHELL', 'curl -s http://localhost:5601 >/dev/null || exit 1' ]
|
test: ["CMD-SHELL", "curl -s http://localhost:5601 >/dev/null || exit 1"]
|
||||||
interval: 30s
|
interval: 30s
|
||||||
timeout: 10s
|
timeout: 10s
|
||||||
retries: 3
|
retries: 3
|
||||||
|
|||||||
@@ -79,6 +79,17 @@ WEAVIATE_AUTHORIZATION_ADMINLIST_ENABLED=true
|
|||||||
WEAVIATE_AUTHORIZATION_ADMINLIST_USERS=hello@dify.ai
|
WEAVIATE_AUTHORIZATION_ADMINLIST_USERS=hello@dify.ai
|
||||||
WEAVIATE_HOST_VOLUME=./volumes/weaviate
|
WEAVIATE_HOST_VOLUME=./volumes/weaviate
|
||||||
|
|
||||||
|
# ------------------------------
|
||||||
|
# Environment Variables for Pinecone Vector Database
|
||||||
|
# ------------------------------
|
||||||
|
# Get your API key from: https://app.pinecone.io/
|
||||||
|
# PINECONE_API_KEY=your-pinecone-api-key
|
||||||
|
# PINECONE_ENVIRONMENT=us-west1-gcp
|
||||||
|
# PINECONE_INDEX_NAME=dify-pinecone-index
|
||||||
|
# PINECONE_CLIENT_TIMEOUT=30
|
||||||
|
# PINECONE_BATCH_SIZE=100
|
||||||
|
# PINECONE_METRIC=cosine
|
||||||
|
|
||||||
# ------------------------------
|
# ------------------------------
|
||||||
# Docker Compose Service Expose Host Port Configurations
|
# Docker Compose Service Expose Host Port Configurations
|
||||||
# ------------------------------
|
# ------------------------------
|
||||||
|
|||||||
@@ -28,7 +28,7 @@ const ExternalKnowledgeBaseCreate: React.FC<ExternalKnowledgeBaseCreateProps> =
|
|||||||
external_knowledge_api_id: '',
|
external_knowledge_api_id: '',
|
||||||
external_knowledge_id: '',
|
external_knowledge_id: '',
|
||||||
external_retrieval_model: {
|
external_retrieval_model: {
|
||||||
top_k: 2,
|
top_k: 4,
|
||||||
score_threshold: 0.5,
|
score_threshold: 0.5,
|
||||||
score_threshold_enabled: false,
|
score_threshold_enabled: false,
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -49,7 +49,7 @@ const TextAreaWithButton = ({
|
|||||||
const { t } = useTranslation()
|
const { t } = useTranslation()
|
||||||
const [isSettingsOpen, setIsSettingsOpen] = useState(false)
|
const [isSettingsOpen, setIsSettingsOpen] = useState(false)
|
||||||
const [externalRetrievalSettings, setExternalRetrievalSettings] = useState({
|
const [externalRetrievalSettings, setExternalRetrievalSettings] = useState({
|
||||||
top_k: 2,
|
top_k: 4,
|
||||||
score_threshold: 0.5,
|
score_threshold: 0.5,
|
||||||
score_threshold_enabled: false,
|
score_threshold_enabled: false,
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -233,7 +233,7 @@ const DebugConfigurationContext = createContext<IDebugConfiguration>({
|
|||||||
reranking_provider_name: '',
|
reranking_provider_name: '',
|
||||||
reranking_model_name: '',
|
reranking_model_name: '',
|
||||||
},
|
},
|
||||||
top_k: 2,
|
top_k: 4,
|
||||||
score_threshold_enabled: false,
|
score_threshold_enabled: false,
|
||||||
score_threshold: 0.7,
|
score_threshold: 0.7,
|
||||||
datasets: {
|
datasets: {
|
||||||
|
|||||||
Reference in New Issue
Block a user