Signed-off-by: ZePan110 <ze.pan@intel.com> Signed-off-by: chensuyue <suyue.chen@intel.com> Signed-off-by: Zhu, Yongbo <yongbo.zhu@intel.com> Signed-off-by: Wang, Xigui <xigui.wang@intel.com> Co-authored-by: ZePan110 <ze.pan@intel.com> Co-authored-by: chen, suyue <suyue.chen@intel.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: xiguiw <111278656+xiguiw@users.noreply.github.com> Co-authored-by: lvliang-intel <liang1.lv@intel.com>
84 lines
2.2 KiB
Python
84 lines
2.2 KiB
Python
# Copyright (C) 2024 Intel Corporation
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
|
|
import asyncio
|
|
import os
|
|
from typing import Any, Callable, List, Optional
|
|
|
|
from edgecraftrag.base import BaseMgr
|
|
from edgecraftrag.components.data import File
|
|
from llama_index.core.schema import Document
|
|
|
|
|
|
class FilelMgr(BaseMgr):
|
|
|
|
def __init__(self):
|
|
super().__init__()
|
|
|
|
def add_text(self, text: str):
|
|
file = File(file_name="text", content=text)
|
|
self.add(file)
|
|
return file.documents
|
|
|
|
def add_files(self, docs: Any):
|
|
if not isinstance(docs, list):
|
|
docs = [docs]
|
|
|
|
input_docs = []
|
|
for doc in docs:
|
|
if not os.path.exists(doc):
|
|
continue
|
|
|
|
if os.path.isfile(doc):
|
|
files = [doc]
|
|
elif os.path.isdir(doc):
|
|
files = [os.path.join(root, f) for root, _, files in os.walk(doc) for f in files]
|
|
else:
|
|
continue
|
|
|
|
if not files:
|
|
continue
|
|
|
|
for file_path in files:
|
|
file = File(file_path=file_path)
|
|
self.add(file)
|
|
input_docs.extend(file.documents)
|
|
|
|
return input_docs
|
|
|
|
def get_file_by_name_or_id(self, name: str):
|
|
for _, file in self.components.items():
|
|
if file.name == name or file.idx == name:
|
|
return file
|
|
return None
|
|
|
|
def get_files(self):
|
|
return [file for _, file in self.components.items()]
|
|
|
|
def get_all_docs(self) -> List[Document]:
|
|
all_docs = []
|
|
for _, file in self.components.items():
|
|
all_docs.extend(file.documents)
|
|
return all_docs
|
|
|
|
def get_docs_by_file(self, name) -> List[Document]:
|
|
file = self.get_file_by_name_or_id(name)
|
|
return file.documents if file else []
|
|
|
|
def del_file(self, name):
|
|
file = self.get_file_by_name_or_id(name)
|
|
if file:
|
|
self.remove(file.idx)
|
|
return True
|
|
else:
|
|
return False
|
|
|
|
def update_file(self, name):
|
|
file = self.get_file_by_name_or_id(name)
|
|
if file:
|
|
self.remove(file.idx)
|
|
self.add_files(docs=name)
|
|
return True
|
|
else:
|
|
return False
|