mirror of
https://github.com/langgenius/dify.git
synced 2026-01-08 07:14:14 +00:00
FEAT: Tencent Vector optimize BM25 initialization to reduce loading time (#24915)
Some checks are pending
Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/amd64, build-api-amd64) (push) Waiting to run
Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/arm64, build-api-arm64) (push) Waiting to run
Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/amd64, build-web-amd64) (push) Waiting to run
Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/arm64, build-web-arm64) (push) Waiting to run
Build and Push API & Web / create-manifest (api, DIFY_API_IMAGE_NAME, merge-api-images) (push) Blocked by required conditions
Build and Push API & Web / create-manifest (web, DIFY_WEB_IMAGE_NAME, merge-web-images) (push) Blocked by required conditions
Main CI Pipeline / Check Changed Files (push) Waiting to run
Main CI Pipeline / API Tests (push) Blocked by required conditions
Main CI Pipeline / Web Tests (push) Blocked by required conditions
Main CI Pipeline / Style Check (push) Waiting to run
Main CI Pipeline / VDB Tests (push) Blocked by required conditions
Main CI Pipeline / DB Migration Test (push) Blocked by required conditions
Some checks are pending
Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/amd64, build-api-amd64) (push) Waiting to run
Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/arm64, build-api-arm64) (push) Waiting to run
Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/amd64, build-web-amd64) (push) Waiting to run
Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/arm64, build-web-arm64) (push) Waiting to run
Build and Push API & Web / create-manifest (api, DIFY_API_IMAGE_NAME, merge-api-images) (push) Blocked by required conditions
Build and Push API & Web / create-manifest (web, DIFY_WEB_IMAGE_NAME, merge-web-images) (push) Blocked by required conditions
Main CI Pipeline / Check Changed Files (push) Waiting to run
Main CI Pipeline / API Tests (push) Blocked by required conditions
Main CI Pipeline / Web Tests (push) Blocked by required conditions
Main CI Pipeline / Style Check (push) Waiting to run
Main CI Pipeline / VDB Tests (push) Blocked by required conditions
Main CI Pipeline / DB Migration Test (push) Blocked by required conditions
Co-authored-by: wlleiiwang <wlleiiwang@tencent.com>
This commit is contained in:
@@ -39,6 +39,9 @@ class TencentConfig(BaseModel):
|
||||
return {"url": self.url, "username": self.username, "key": self.api_key, "timeout": self.timeout}
|
||||
|
||||
|
||||
bm25 = BM25Encoder.default("zh")
|
||||
|
||||
|
||||
class TencentVector(BaseVector):
|
||||
field_id: str = "id"
|
||||
field_vector: str = "vector"
|
||||
@@ -53,7 +56,6 @@ class TencentVector(BaseVector):
|
||||
self._dimension = 1024
|
||||
self._init_database()
|
||||
self._load_collection()
|
||||
self._bm25 = BM25Encoder.default("zh")
|
||||
|
||||
def _load_collection(self):
|
||||
"""
|
||||
@@ -186,7 +188,7 @@ class TencentVector(BaseVector):
|
||||
metadata=metadata,
|
||||
)
|
||||
if self._enable_hybrid_search:
|
||||
doc.__dict__["sparse_vector"] = self._bm25.encode_texts(texts[i])
|
||||
doc.__dict__["sparse_vector"] = bm25.encode_texts(texts[i])
|
||||
docs.append(doc)
|
||||
self._client.upsert(
|
||||
database_name=self._client_config.database,
|
||||
@@ -264,7 +266,7 @@ class TencentVector(BaseVector):
|
||||
match=[
|
||||
KeywordSearch(
|
||||
field_name="sparse_vector",
|
||||
data=self._bm25.encode_queries(query),
|
||||
data=bm25.encode_queries(query),
|
||||
),
|
||||
],
|
||||
rerank=WeightedRerank(
|
||||
|
||||
Reference in New Issue
Block a user