mirror of
https://github.com/langgenius/dify.git
synced 2025-12-20 06:32:45 +00:00
Some checks failed
autofix.ci / autofix (push) Has been cancelled
Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/amd64, build-api-amd64) (push) Has been cancelled
Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/arm64, build-api-arm64) (push) Has been cancelled
Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/amd64, build-web-amd64) (push) Has been cancelled
Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/arm64, build-web-arm64) (push) Has been cancelled
Build and Push API & Web / create-manifest (api, DIFY_API_IMAGE_NAME, merge-api-images) (push) Has been cancelled
Build and Push API & Web / create-manifest (web, DIFY_WEB_IMAGE_NAME, merge-web-images) (push) Has been cancelled
Main CI Pipeline / Check Changed Files (push) Has been cancelled
Main CI Pipeline / API Tests (push) Has been cancelled
Main CI Pipeline / Web Tests (push) Has been cancelled
Main CI Pipeline / Style Check (push) Has been cancelled
Main CI Pipeline / VDB Tests (push) Has been cancelled
Main CI Pipeline / DB Migration Test (push) Has been cancelled
Mark stale issues and pull requests / stale (push) Has been cancelled
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
77 lines
2.9 KiB
Python
77 lines
2.9 KiB
Python
import logging
|
|
from collections.abc import Callable, Sequence
|
|
from dataclasses import asdict
|
|
from typing import Any
|
|
|
|
from core.entities.document_task import DocumentTask
|
|
from core.rag.pipeline.queue import TenantIsolatedTaskQueue
|
|
|
|
from .base import DocumentTaskProxyBase
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class BatchDocumentIndexingProxy(DocumentTaskProxyBase):
|
|
"""
|
|
Base proxy for batch document indexing tasks (document_ids in plural).
|
|
|
|
Adds:
|
|
- Tenant isolated queue management
|
|
- Batch document handling
|
|
"""
|
|
|
|
def __init__(self, tenant_id: str, dataset_id: str, document_ids: Sequence[str]):
|
|
"""
|
|
Initialize with batch documents.
|
|
|
|
Args:
|
|
tenant_id: Tenant identifier
|
|
dataset_id: Dataset identifier
|
|
document_ids: List of document IDs to process
|
|
"""
|
|
super().__init__(tenant_id, dataset_id)
|
|
self._document_ids = document_ids
|
|
self._tenant_isolated_task_queue = TenantIsolatedTaskQueue(tenant_id, self.QUEUE_NAME)
|
|
|
|
def _send_to_direct_queue(self, task_func: Callable[[str, str, Sequence[str]], Any]):
|
|
"""
|
|
Send batch task to direct queue.
|
|
|
|
Args:
|
|
task_func: The Celery task function to call with (tenant_id, dataset_id, document_ids)
|
|
"""
|
|
logger.info("tenant %s send documents %s to direct queue", self._tenant_id, self._document_ids)
|
|
task_func.delay( # type: ignore
|
|
tenant_id=self._tenant_id, dataset_id=self._dataset_id, document_ids=self._document_ids
|
|
)
|
|
|
|
def _send_to_tenant_queue(self, task_func: Callable[[str, str, Sequence[str]], Any]):
|
|
"""
|
|
Send batch task to tenant-isolated queue.
|
|
|
|
Args:
|
|
task_func: The Celery task function to call with (tenant_id, dataset_id, document_ids)
|
|
"""
|
|
logger.info(
|
|
"tenant %s send documents %s to tenant queue %s", self._tenant_id, self._document_ids, self.QUEUE_NAME
|
|
)
|
|
if self._tenant_isolated_task_queue.get_task_key():
|
|
# Add to waiting queue using List operations (lpush)
|
|
self._tenant_isolated_task_queue.push_tasks(
|
|
[
|
|
asdict(
|
|
DocumentTask(
|
|
tenant_id=self._tenant_id, dataset_id=self._dataset_id, document_ids=self._document_ids
|
|
)
|
|
)
|
|
]
|
|
)
|
|
logger.info("tenant %s push tasks: %s - %s", self._tenant_id, self._dataset_id, self._document_ids)
|
|
else:
|
|
# Set flag and execute task
|
|
self._tenant_isolated_task_queue.set_task_waiting_time()
|
|
task_func.delay( # type: ignore
|
|
tenant_id=self._tenant_id, dataset_id=self._dataset_id, document_ids=self._document_ids
|
|
)
|
|
logger.info("tenant %s init tasks: %s - %s", self._tenant_id, self._dataset_id, self._document_ids)
|