Files
GenAIExamples/EdgeCraftRAG/edgecraftrag/components/node_parser.py
Zhu Yongbo c9088eb824 Add EdgeCraftRag as a GenAIExample (#1072)
Signed-off-by: ZePan110 <ze.pan@intel.com>
Signed-off-by: chensuyue <suyue.chen@intel.com>
Signed-off-by: Zhu, Yongbo <yongbo.zhu@intel.com>
Signed-off-by: Wang, Xigui <xigui.wang@intel.com>
Co-authored-by: ZePan110 <ze.pan@intel.com>
Co-authored-by: chen, suyue <suyue.chen@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: xiguiw <111278656+xiguiw@users.noreply.github.com>
Co-authored-by: lvliang-intel <liang1.lv@intel.com>
2024-11-08 21:07:24 +08:00

86 lines
2.4 KiB
Python

# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
from typing import Any
from edgecraftrag.base import BaseComponent, CompType, NodeParserType
from llama_index.core.node_parser import HierarchicalNodeParser, SentenceSplitter, SentenceWindowNodeParser
from pydantic import model_serializer
class SimpleNodeParser(BaseComponent, SentenceSplitter):
# Use super for SentenceSplitter since it's __init__ will cleanup
# BaseComponent fields
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.comp_type = CompType.NODEPARSER
self.comp_subtype = NodeParserType.SIMPLE
def run(self, **kwargs) -> Any:
for k, v in kwargs.items():
if k == "docs":
return self.get_nodes_from_documents(v, show_progress=False)
return None
@model_serializer
def ser_model(self):
set = {
"idx": self.idx,
"parser_type": self.comp_subtype,
"chunk_size": self.chunk_size,
"chunk_overlap": self.chunk_overlap,
}
return set
class HierarchyNodeParser(BaseComponent, HierarchicalNodeParser):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.comp_type = CompType.NODEPARSER
self.comp_subtype = NodeParserType.HIERARCHY
def run(self, **kwargs) -> Any:
for k, v in kwargs.items():
if k == "docs":
return self.get_nodes_from_documents(v, show_progress=False)
return None
@model_serializer
def ser_model(self):
set = {
"idx": self.idx,
"parser_type": self.comp_subtype,
"chunk_size": self.chunk_sizes,
"chunk_overlap": None,
}
return set
class SWindowNodeParser(BaseComponent, SentenceWindowNodeParser):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.comp_type = CompType.NODEPARSER
self.comp_subtype = NodeParserType.SENTENCEWINDOW
def run(self, **kwargs) -> Any:
for k, v in kwargs.items():
if k == "docs":
return self.get_nodes_from_documents(v, show_progress=False)
return None
@model_serializer
def ser_model(self):
set = {
"idx": self.idx,
"parser_type": self.comp_subtype,
"chunk_size": None,
"chunk_overlap": None,
}
return set