r2 transform

This commit is contained in:
jyong
2025-07-16 18:05:40 +08:00
parent 3bdb40f37b
commit aaa5b0e295
20 changed files with 138 additions and 33 deletions

View File

@@ -22,7 +22,9 @@ from sqlalchemy.orm import Session
from core.helper import ssrf_proxy
from core.model_runtime.utils.encoders import jsonable_encoder
from core.plugin.entities.plugin import PluginDependency
from core.workflow.nodes.datasource.entities import DatasourceNodeData
from core.workflow.nodes.enums import NodeType
from core.workflow.nodes.knowledge_index.entities import KnowledgeIndexNodeData
from core.workflow.nodes.knowledge_retrieval.entities import KnowledgeRetrievalNodeData
from core.workflow.nodes.llm.entities import LLMNodeData
from core.workflow.nodes.parameter_extractor.entities import ParameterExtractorNodeData
@@ -725,6 +727,10 @@ class RagPipelineDslService:
dependencies.append(
DependenciesAnalysisService.analyze_tool_dependency(tool_entity.provider_id),
)
case NodeType.DATASOURCE.value:
datasource_entity = DatasourceNodeData(**node["data"])
if datasource_entity.provider_type != "local_file":
dependencies.append(datasource_entity.plugin_id)
case NodeType.LLM.value:
llm_entity = LLMNodeData(**node["data"])
dependencies.append(
@@ -744,6 +750,24 @@ class RagPipelineDslService:
parameter_extractor_entity.model.provider
),
)
case NodeType.KNOWLEDGE_INDEX.value:
knowledge_index_entity = KnowledgeConfiguration(**node["data"])
if knowledge_index_entity.indexing_technique == "high_quality":
if knowledge_index_entity.embedding_model_provider:
dependencies.append(
DependenciesAnalysisService.analyze_model_provider_dependency(
knowledge_index_entity.embedding_model_provider
),
)
if knowledge_index_entity.retrieval_model.reranking_mode == "reranking_model":
if knowledge_index_entity.retrieval_model.reranking_enable:
if knowledge_index_entity.retrieval_model.reranking_model and knowledge_index_entity.retrieval_model.reranking_mode == "reranking_model":
if knowledge_index_entity.retrieval_model.reranking_model.reranking_provider_name:
dependencies.append(
DependenciesAnalysisService.analyze_model_provider_dependency(
knowledge_index_entity.retrieval_model.reranking_model.reranking_provider_name
),
)
case NodeType.KNOWLEDGE_RETRIEVAL.value:
knowledge_retrieval_entity = KnowledgeRetrievalNodeData(**node["data"])
if knowledge_retrieval_entity.retrieval_mode == "multiple":

View File

@@ -7,6 +7,10 @@ import yaml
from flask_login import current_user
from constants import DOCUMENT_EXTENSIONS
from core.plugin.entities.plugin import PluginInstallationSource
from core.plugin.impl.datasource import PluginDatasourceManager
from core.plugin.impl.plugin import PluginInstaller
from core.tools.tool_manager import ToolManager
from extensions.ext_database import db
from factories import variable_factory
from models.dataset import Dataset, Pipeline
@@ -33,6 +37,8 @@ class RagPipelineTransformService:
return
retrieval_model = dataset.retrieval_model
pipeline_yaml = self._get_transform_yaml(doc_form, datasource_type, indexing_technique)
# deal dependencies
self._deal_dependencies(pipeline_yaml, dataset.tenant_id)
# Extract app data
workflow_data = pipeline_yaml.get("workflow")
graph = workflow_data.get("graph", {})
@@ -221,3 +227,51 @@ class RagPipelineTransformService:
pipeline.workflow_id = published_workflow.id
db.session.add(pipeline)
return pipeline
def _deal_dependencies(self, pipeline_yaml: dict, tenant_id: str):
installer_manager = PluginInstaller()
installed_plugins = installer_manager.list_plugins(tenant_id)
datasource_manager = PluginDatasourceManager()
tool_manager = ToolManager()
installed_plugins_ids = [plugin.plugin_id for plugin in installed_plugins]
dependencies = pipeline_yaml.get("dependencies", [])
need_install_plugin_unique_identifiers = []
for dependency in dependencies:
if dependency.get("type") == "marketplace":
plugin_unique_identifier = dependency.get("value", {}).get("plugin_unique_identifier")
plugin_id = plugin_unique_identifier.split(":")[0]
if plugin_id not in installed_plugins_ids:
if plugin_id == "langgenius/notion_datasource":
datasource = datasource_manager.fetch_datasource_provider(tenant_id, f"{plugin_id}/notion")
need_install_plugin_unique_identifiers.append(datasource.plugin_unique_identifier)
elif plugin_id == "langgenius/firecrawl_datasource":
datasource = datasource_manager.fetch_datasource_provider(tenant_id, f"{plugin_id}/firecrawl")
need_install_plugin_unique_identifiers.append(datasource.plugin_unique_identifier)
elif plugin_id == "langgenius/jina_datasource":
datasource = datasource_manager.fetch_datasource_provider(tenant_id, f"{plugin_id}/jina")
need_install_plugin_unique_identifiers.append(datasource.plugin_unique_identifier)
elif plugin_id == "langgenius/dify_extractor":
tool = tool_manager.get_plugin_provider(f"{plugin_id}/dify_extractor", tenant_id)
need_install_plugin_unique_identifiers.append(tool.plugin_unique_identifier)
elif plugin_id == "langgenius/general_chunk":
tool = tool_manager.get_plugin_provider(f"{plugin_id}/general_chunk", tenant_id)
need_install_plugin_unique_identifiers.append(tool.plugin_unique_identifier)
elif plugin_id == "langgenius/parent_child_chunk":
tool = tool_manager.get_plugin_provider(f"{plugin_id}/parent_child_chunk", tenant_id)
need_install_plugin_unique_identifiers.append(tool.plugin_unique_identifier)
dependency["value"]["current_identifier"] = plugin_unique_identifier
if need_install_plugin_unique_identifiers:
installer_manager.install_from_identifiers(
tenant_id,
need_install_plugin_unique_identifiers,
PluginInstallationSource.Marketplace,
metas=[
{
"plugin_unique_identifier": identifier,
}
for identifier in need_install_plugin_unique_identifiers
],
)

View File

@@ -1,12 +1,12 @@
dependencies:
- current_identifier: null
type: package
type: marketplace
value:
plugin_unique_identifier: langgenius/general_chunk:0.0.2@0856fa64f7b0dc937c982f12d45b3a1ad91ba8aacc0d28a1b436e6c94a77e298
plugin_unique_identifier: langgenius/general_chunker:0.0.1@e3da408b7277866404c3f884d599261f9d0b9003ea4ef7eb3b64489bdf39d18b
- current_identifier: null
type: package
type: marketplace
value:
plugin_unique_identifier: langgenius/dify_extractor:0.0.4@0cb3f06230a377c4c037fa7b5e21f4f4e362e5f24a59ed7bf4950ff75e6f1e61
plugin_unique_identifier: langgenius/dify_extractor:0.0.1@50103421d4e002f059b662d21ad2d7a1cf34869abdbe320299d7e382516ebb1c
kind: rag_pipeline
rag_pipeline:
description: ''

View File

@@ -1,12 +1,12 @@
dependencies:
- current_identifier: null
type: package
type: marketplace
value:
plugin_unique_identifier: langgenius/general_chunk:0.0.2@0856fa64f7b0dc937c982f12d45b3a1ad91ba8aacc0d28a1b436e6c94a77e298
plugin_unique_identifier: langgenius/qa_chunk:0.0.1@ef14ad7edce1d293ef52f14429a9acb39fa146a7b91d63a31cda905539908453
- current_identifier: null
type: package
type: marketplace
value:
plugin_unique_identifier: langgenius/dify_extractor:0.0.4@0cb3f06230a377c4c037fa7b5e21f4f4e362e5f24a59ed7bf4950ff75e6f1e61
plugin_unique_identifier: langgenius/dify_extractor:0.0.1@50103421d4e002f059b662d21ad2d7a1cf34869abdbe320299d7e382516ebb1c
kind: rag_pipeline
rag_pipeline:
description: ''

View File

@@ -1,12 +1,12 @@
dependencies:
- current_identifier: null
type: package
type: marketplace
value:
plugin_unique_identifier: langgenius/parent_child_chunk:0.0.1@f8f9ba1f3bcda159ebc0168baa755c2181b923da8157ebb439b8046019f5b510
plugin_unique_identifier: langgenius/parentchild_chunker:0.0.1@b1a28a27e33fec442ce494da2a7814edd7eb9d646c81f38bccfcf1133d486e40
- current_identifier: null
type: package
type: marketplace
value:
plugin_unique_identifier: langgenius/dify_extractor:0.0.4@0cb3f06230a377c4c037fa7b5e21f4f4e362e5f24a59ed7bf4950ff75e6f1e61
plugin_unique_identifier: langgenius/dify_extractor:0.0.1@50103421d4e002f059b662d21ad2d7a1cf34869abdbe320299d7e382516ebb1c
kind: rag_pipeline
rag_pipeline:
description: ''

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -1,8 +1,12 @@
dependencies:
- current_identifier: null
type: package
type: marketplace
value:
plugin_unique_identifier: langgenius/general_chunk:0.0.2@0856fa64f7b0dc937c982f12d45b3a1ad91ba8aacc0d28a1b436e6c94a77e298
plugin_unique_identifier: langgenius/general_chunker:0.0.1@e3da408b7277866404c3f884d599261f9d0b9003ea4ef7eb3b64489bdf39d18b
- current_identifier: null
type: marketplace
value:
plugin_unique_identifier: langgenius/notion_datasource:0.0.1@2dd49c2c3ffff976be8d22efb1ac0f63522a8d0f24ef8c44729d0a50a94ec039
kind: rag_pipeline
rag_pipeline:
description: ''

View File

@@ -1,8 +1,12 @@
dependencies:
- current_identifier: null
type: package
type: marketplace
value:
plugin_unique_identifier: langgenius/general_chunk:0.0.2@0856fa64f7b0dc937c982f12d45b3a1ad91ba8aacc0d28a1b436e6c94a77e298
plugin_unique_identifier: langgenius/general_chunker:0.0.1@e3da408b7277866404c3f884d599261f9d0b9003ea4ef7eb3b64489bdf39d18b
- current_identifier: null
type: marketplace
value:
plugin_unique_identifier: langgenius/notion_datasource:0.0.1@2dd49c2c3ffff976be8d22efb1ac0f63522a8d0f24ef8c44729d0a50a94ec039
kind: rag_pipeline
rag_pipeline:
description: ''

View File

@@ -1,8 +1,12 @@
dependencies:
- current_identifier: null
type: package
type: marketplace
value:
plugin_unique_identifier: langgenius/parent_child_chunk:0.0.1@f8f9ba1f3bcda159ebc0168baa755c2181b923da8157ebb439b8046019f5b510
plugin_unique_identifier: langgenius/parentchild_chunker:0.0.1@b1a28a27e33fec442ce494da2a7814edd7eb9d646c81f38bccfcf1133d486e40
- current_identifier: null
type: marketplace
value:
plugin_unique_identifier: langgenius/notion_datasource:0.0.1@2dd49c2c3ffff976be8d22efb1ac0f63522a8d0f24ef8c44729d0a50a94ec039
kind: rag_pipeline
rag_pipeline:
description: ''

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -1,8 +1,16 @@
dependencies:
- current_identifier: null
type: package
type: marketplace
value:
plugin_unique_identifier: langgenius/general_chunk:0.0.2@0856fa64f7b0dc937c982f12d45b3a1ad91ba8aacc0d28a1b436e6c94a77e298
plugin_unique_identifier: langgenius/general_chunker:0.0.1@e3da408b7277866404c3f884d599261f9d0b9003ea4ef7eb3b64489bdf39d18b
- current_identifier: null
type: marketplace
value:
plugin_unique_identifier: langgenius/firecrawl_datasource:0.0.1@f7aed0a26df0e5f4b9555371b5c9fa6db3c7dcf6a46dd1583245697bd90a539a
- current_identifier: null
type: marketplace
value:
plugin_unique_identifier: langgenius/jina_datasource:0.0.1@cf23afb2c3eeccc5a187763a1947f583f0bb10aa56461e512ac4141bf930d608
kind: rag_pipeline
rag_pipeline:
description: ''

View File

@@ -1,8 +1,16 @@
dependencies:
- current_identifier: null
type: package
type: marketplace
value:
plugin_unique_identifier: langgenius/general_chunk:0.0.2@0856fa64f7b0dc937c982f12d45b3a1ad91ba8aacc0d28a1b436e6c94a77e298
plugin_unique_identifier: langgenius/general_chunker:0.0.1@e3da408b7277866404c3f884d599261f9d0b9003ea4ef7eb3b64489bdf39d18b
- current_identifier: null
type: marketplace
value:
plugin_unique_identifier: langgenius/firecrawl_datasource:0.0.1@f7aed0a26df0e5f4b9555371b5c9fa6db3c7dcf6a46dd1583245697bd90a539a
- current_identifier: null
type: marketplace
value:
plugin_unique_identifier: langgenius/jina_datasource:0.0.1@cf23afb2c3eeccc5a187763a1947f583f0bb10aa56461e512ac4141bf930d608
kind: rag_pipeline
rag_pipeline:
description: ''

View File

@@ -1,8 +1,16 @@
dependencies:
- current_identifier: null
type: package
type: marketplace
value:
plugin_unique_identifier: langgenius/parent_child_chunk:0.0.1@f8f9ba1f3bcda159ebc0168baa755c2181b923da8157ebb439b8046019f5b510
plugin_unique_identifier: langgenius/parentchild_chunker:0.0.1@b1a28a27e33fec442ce494da2a7814edd7eb9d646c81f38bccfcf1133d486e40
- current_identifier: null
type: marketplace
value:
plugin_unique_identifier: langgenius/firecrawl_datasource:0.0.1@f7aed0a26df0e5f4b9555371b5c9fa6db3c7dcf6a46dd1583245697bd90a539a
- current_identifier: null
type: marketplace
value:
plugin_unique_identifier: langgenius/jina_datasource:0.0.1@cf23afb2c3eeccc5a187763a1947f583f0bb10aa56461e512ac4141bf930d608
kind: rag_pipeline
rag_pipeline:
description: ''