From 6363ecef97ddc164b984b533e93597602a2ddc56 Mon Sep 17 00:00:00 2001 From: jyong <718720800@qq.com> Date: Wed, 16 Jul 2025 11:49:59 +0800 Subject: [PATCH] r2 transform --- .../rag_pipeline_transform_service.py | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/api/services/rag_pipeline/rag_pipeline_transform_service.py b/api/services/rag_pipeline/rag_pipeline_transform_service.py index cde1e9f182..f87f2e994a 100644 --- a/api/services/rag_pipeline/rag_pipeline_transform_service.py +++ b/api/services/rag_pipeline/rag_pipeline_transform_service.py @@ -108,16 +108,16 @@ class RagPipelineTransformService: elif doc_form == "hierarchical_model": match datasource_type: case "upload_file": - # get graph from transform.file-parent-child.yml - with open(f"{Path(__file__).parent}/transform/file-parent-child.yml") as f: + # get graph from transform.file-parentchild.yml + with open(f"{Path(__file__).parent}/transform/file-parentchild.yml") as f: pipeline_yaml = yaml.safe_load(f) case "notion_import": - # get graph from transform.notion-parent-child.yml - with open(f"{Path(__file__).parent}/transform/notion-parent-child.yml") as f: + # get graph from transform.notion-parentchild.yml + with open(f"{Path(__file__).parent}/transform/notion-parentchild.yml") as f: pipeline_yaml = yaml.safe_load(f) case "website_crawl": - # get graph from transform.website-crawl-parent-child.yml - with open(f"{Path(__file__).parent}/transform/website-crawl-parent-child.yml") as f: + # get graph from transform.website-crawl-parentchild.yml + with open(f"{Path(__file__).parent}/transform/website-crawl-parentchild.yml") as f: pipeline_yaml = yaml.safe_load(f) case _: raise ValueError("Unsupported datasource type") @@ -142,10 +142,11 @@ class RagPipelineTransformService: if indexing_technique == "high_quality": knowledge_configuration.embedding_model = dataset.embedding_model knowledge_configuration.embedding_model_provider = dataset.embedding_model_provider - retrieval_setting = RetrievalSetting(**retrieval_model) - if indexing_technique == "economy": - retrieval_setting.search_method = "keyword_search" - knowledge_configuration.retrieval_model = retrieval_setting + if retrieval_model: + retrieval_setting = RetrievalSetting(**retrieval_model) + if indexing_technique == "economy": + retrieval_setting.search_method = "keyword_search" + knowledge_configuration.retrieval_model = retrieval_setting return knowledge_configuration.model_dump()