Adapt code for dataprep microservice refactor (#1408)

https://github.com/opea-project/GenAIComps/pull/1153 Signed-off-by: lvliang-intel <liang1.lv@intel.com>
2025-01-20 20:37:03 +08:00
parent 2d5898244c
commit 0f7e5a37ac
91 changed files with 400 additions and 354 deletions
--- a/AgentQnA/docker_compose/amd/gpu/rocm/launch_agent_service_tgi_rocm.sh
+++ b/AgentQnA/docker_compose/amd/gpu/rocm/launch_agent_service_tgi_rocm.sh
@@ -40,8 +40,8 @@ export EMBEDDING_SERVICE_HOST_IP=${host_ip}
 export RETRIEVER_SERVICE_HOST_IP=${host_ip}
 export RERANK_SERVICE_HOST_IP=${host_ip}
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8889/v1/retrievaltool"
-export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
+export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/ingest"
-export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_file"
+export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get"
-export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_file"
+export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete"
 docker compose -f compose.yaml up -d
--- a/AgentQnA/docker_compose/amd/gpu/rocm/set_env.sh
+++ b/AgentQnA/docker_compose/amd/gpu/rocm/set_env.sh
@@ -41,6 +41,6 @@ export EMBEDDING_SERVICE_HOST_IP=${host_ip}
 export RETRIEVER_SERVICE_HOST_IP=${host_ip}
 export RERANK_SERVICE_HOST_IP=${host_ip}
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8889/v1/retrievaltool"
-export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
+export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/ingest"
-export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_file"
+export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get"
-export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_file"
+export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete"
--- a/AgentQnA/retrieval_tool/index_data.py
+++ b/AgentQnA/retrieval_tool/index_data.py
@@ -53,7 +53,7 @@ def main():
    host_ip = args.host_ip
    port = args.port
    proxies = {"http": ""}
-    url = "http://{host_ip}:{port}/v1/dataprep".format(host_ip=host_ip, port=port)
+    url = "http://{host_ip}:{port}/v1/dataprep/ingest".format(host_ip=host_ip, port=port)
    # Split jsonl file into json files
    files = split_jsonl_into_txts(os.path.join(args.filedir, args.filename))
--- a/AgentQnA/retrieval_tool/launch_retrieval_tool.sh
+++ b/AgentQnA/retrieval_tool/launch_retrieval_tool.sh
@@ -19,8 +19,8 @@ export EMBEDDING_SERVICE_HOST_IP=${host_ip}
 export RETRIEVER_SERVICE_HOST_IP=${host_ip}
 export RERANK_SERVICE_HOST_IP=${host_ip}
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8889/v1/retrievaltool"
-export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
+export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/ingest"
-export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6008/v1/dataprep/get_file"
+export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6008/v1/dataprep/get"
-export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6009/v1/dataprep/delete_file"
+export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6009/v1/dataprep/delete"
 docker compose -f $WORKDIR/GenAIExamples/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml up -d
--- a/AgentQnA/tests/step1_build_images.sh
+++ b/AgentQnA/tests/step1_build_images.sh
@@ -21,7 +21,7 @@ function build_docker_images_for_retrieval_tool(){
    # git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
    get_genai_comps
    echo "Build all the images with --no-cache..."
-    service_list="doc-index-retriever dataprep-redis embedding retriever reranking"
+    service_list="doc-index-retriever dataprep embedding retriever reranking"
    docker compose -f build.yaml build ${service_list} --no-cache
    docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
--- a/ChatQnA/README.md
+++ b/ChatQnA/README.md
@@ -202,8 +202,8 @@ Gaudi default compose.yaml
 | Embedding | Langchain | Xeon | 6000 | /v1/embeddings |
 | Retriever | Langchain, Redis | Xeon | 7000 | /v1/retrieval |
 | Reranking | Langchain, TEI | Gaudi | 8000 | /v1/reranking |
-| LLM | Langchain, vLLM | Gaudi | 9000 | /v1/chat/completions |
+| LLM | Langchain, TGI | Gaudi | 9000 | /v1/chat/completions |
-| Dataprep | Redis, Langchain | Xeon | 6007 | /v1/dataprep |
+| Dataprep | Redis, Langchain | Xeon | 6007 | /v1/dataprep/ingest |
 ### Required Models
@@ -294,7 +294,7 @@ Here is an example of `Nike 2023` pdf.
 # download pdf file
 wget https://raw.githubusercontent.com/opea-project/GenAIComps/v1.1/comps/retrievers/redis/data/nke-10k-2023.pdf
 # upload pdf file with dataprep
-curl -X POST "http://${host_ip}:6007/v1/dataprep" \
+curl -X POST "http://${host_ip}:6007/v1/dataprep/ingest" \
    -H "Content-Type: multipart/form-data" \
    -F "files=@./nke-10k-2023.pdf"
 ```
--- a/ChatQnA/benchmark/accuracy/README.md
+++ b/ChatQnA/benchmark/accuracy/README.md
@@ -72,14 +72,14 @@ python eval_multihop.py --docs_path MultiHop-RAG/dataset/corpus.json  --dataset_
 If you are using Kubernetes manifest/helm to deploy `ChatQnA` system, you must specify more arguments as following:
 ```bash
-python eval_multihop.py --docs_path MultiHop-RAG/dataset/corpus.json  --dataset_path MultiHop-RAG/dataset/MultiHopRAG.json --ingest_docs --retrieval_metrics --ragas_metrics --llm_endpoint http://{llm_as_judge_ip}:{llm_as_judge_port}/generate --database_endpoint http://{your_dataprep_ip}:{your_dataprep_port}/v1/dataprep --embedding_endpoint http://{your_embedding_ip}:{your_embedding_port}/v1/embeddings --tei_embedding_endpoint http://{your_tei_embedding_ip}:{your_tei_embedding_port} --retrieval_endpoint http://{your_retrieval_ip}:{your_retrieval_port}/v1/retrieval --service_url http://{your_chatqna_ip}:{your_chatqna_port}/v1/chatqna
+python eval_multihop.py --docs_path MultiHop-RAG/dataset/corpus.json  --dataset_path MultiHop-RAG/dataset/MultiHopRAG.json --ingest_docs --retrieval_metrics --ragas_metrics --llm_endpoint http://{llm_as_judge_ip}:{llm_as_judge_port}/generate --database_endpoint http://{your_dataprep_ip}:{your_dataprep_port}/v1/dataprep/ingest --embedding_endpoint http://{your_embedding_ip}:{your_embedding_port}/v1/embeddings --tei_embedding_endpoint http://{your_tei_embedding_ip}:{your_tei_embedding_port} --retrieval_endpoint http://{your_retrieval_ip}:{your_retrieval_port}/v1/retrieval --service_url http://{your_chatqna_ip}:{your_chatqna_port}/v1/chatqna
 ```
 The default values for arguments are:
 |Argument|Default value|
 |--------|-------------|
 |service_url|http://localhost:8888/v1/chatqna|
-|database_endpoint|http://localhost:6007/v1/dataprep|
+|database_endpoint|http://localhost:6007/v1/dataprep/ingest|
 |embedding_endpoint|http://localhost:6000/v1/embeddings|
 |tei_embedding_endpoint|http://localhost:8090|
 |retrieval_endpoint|http://localhost:7000/v1/retrieval|
@@ -139,14 +139,14 @@ python eval_crud.py --dataset_path ./data/split_merged.json --docs_path ./data/8
 If you are using Kubernetes manifest/helm to deploy `ChatQnA` system, you must specify more arguments as following:
 ```bash
-python eval_crud.py --dataset_path ./data/split_merged.json --docs_path ./data/80000_docs --ingest_docs --database_endpoint http://{your_dataprep_ip}:{your_dataprep_port}/v1/dataprep --embedding_endpoint http://{your_embedding_ip}:{your_embedding_port}/v1/embeddings --retrieval_endpoint http://{your_retrieval_ip}:{your_retrieval_port}/v1/retrieval --service_url http://{your_chatqna_ip}:{your_chatqna_port}/v1/chatqna
+python eval_crud.py --dataset_path ./data/split_merged.json --docs_path ./data/80000_docs --ingest_docs --database_endpoint http://{your_dataprep_ip}:{your_dataprep_port}/v1/dataprep/ingest --embedding_endpoint http://{your_embedding_ip}:{your_embedding_port}/v1/embeddings --retrieval_endpoint http://{your_retrieval_ip}:{your_retrieval_port}/v1/retrieval --service_url http://{your_chatqna_ip}:{your_chatqna_port}/v1/chatqna
 ```
 The default values for arguments are:
 |Argument|Default value|
 |--------|-------------|
 |service_url|http://localhost:8888/v1/chatqna|
-|database_endpoint|http://localhost:6007/v1/dataprep|
+|database_endpoint|http://localhost:6007/v1/dataprep/ingest|
 |embedding_endpoint|http://localhost:6000/v1/embeddings|
 |retrieval_endpoint|http://localhost:7000/v1/retrieval|
 |reranking_endpoint|http://localhost:8000/v1/reranking|
--- a/ChatQnA/benchmark/accuracy/eval_crud.py
+++ b/ChatQnA/benchmark/accuracy/eval_crud.py
@@ -149,7 +149,7 @@ def args_parser():
    parser.add_argument("--tasks", default=["question_answering"], nargs="+", help="Task to perform")
    parser.add_argument("--ingest_docs", action="store_true", help="Whether to ingest documents to vector database")
    parser.add_argument(
-        "--database_endpoint", type=str, default="http://localhost:6007/v1/dataprep", help="Service URL address."
+        "--database_endpoint", type=str, default="http://localhost:6007/v1/dataprep/ingest", help="Service URL address."
    )
    parser.add_argument(
        "--embedding_endpoint", type=str, default="http://localhost:6000/v1/embeddings", help="Service URL address."
--- a/ChatQnA/benchmark/accuracy/eval_multihop.py
+++ b/ChatQnA/benchmark/accuracy/eval_multihop.py
@@ -211,7 +211,7 @@ def args_parser():
    parser.add_argument("--ragas_metrics", action="store_true", help="Whether to compute ragas metrics.")
    parser.add_argument("--limits", type=int, default=100, help="Number of examples to be evaluated by llm-as-judge")
    parser.add_argument(
-        "--database_endpoint", type=str, default="http://localhost:6007/v1/dataprep", help="Service URL address."
+        "--database_endpoint", type=str, default="http://localhost:6007/v1/dataprep/ingest", help="Service URL address."
    )
    parser.add_argument(
        "--embedding_endpoint", type=str, default="http://localhost:6000/v1/embeddings", help="Service URL address."
--- a/ChatQnA/benchmark/performance/kubernetes/intel/gaudi/README.md
+++ b/ChatQnA/benchmark/performance/kubernetes/intel/gaudi/README.md
@@ -164,7 +164,7 @@ Use the following `cURL` command to upload file:
 ```bash
 cd GenAIEval/evals/benchmark/data
-curl -X POST "http://${cluster_ip}:6007/v1/dataprep" \
+curl -X POST "http://${cluster_ip}:6007/v1/dataprep/ingest" \
     -H "Content-Type: multipart/form-data" \
     -F "chunk_size=3800" \
     -F "files=@./upload_file.txt"
--- a/ChatQnA/docker_compose/amd/gpu/rocm/README.md
+++ b/ChatQnA/docker_compose/amd/gpu/rocm/README.md
@@ -65,7 +65,7 @@ Prepare and upload test document
 # download pdf file
 wget https://raw.githubusercontent.com/opea-project/GenAIComps/v1.1/comps/retrievers/redis/data/nke-10k-2023.pdf
 # upload pdf file with dataprep
-curl -X POST "http://${host_ip}:6007/v1/dataprep" \
+curl -X POST "http://${host_ip}:6007/v1/dataprep/ingest" \
    -H "Content-Type: multipart/form-data" \
    -F "files=@./nke-10k-2023.pdf"
 ```
@@ -100,7 +100,7 @@ docker build --no-cache -t opea/retriever:latest --build-arg https_proxy=$https_
 ### 3. Build Dataprep Image
 ```bash
-docker build --no-cache -t opea/dataprep-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain/Dockerfile .
+docker build --no-cache -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile .
 ```
 ### 4. Build MegaService Docker Image
@@ -144,7 +144,7 @@ docker build -t opea/nginx:latest --build-arg https_proxy=$https_proxy --build-a
 Then run the command `docker images`, you will have the following 5 Docker Images:
 1. `opea/retriever:latest`
-2. `opea/dataprep-redis:latest`
+2. `opea/dataprep:latest`
 3. `opea/chatqna:latest`
 4. `opea/chatqna-ui:latest` or `opea/chatqna-react-ui:latest`
 5. `opea/nginx:latest`
@@ -192,9 +192,9 @@ Change the `xxx_MODEL_ID` below for your needs.
   export CHATQNA_MEGA_SERVICE_HOST_IP=${HOST_IP}
   export CHATQNA_RETRIEVER_SERVICE_HOST_IP=${HOST_IP}
   export CHATQNA_BACKEND_SERVICE_ENDPOINT="http://127.0.0.1:${CHATQNA_BACKEND_SERVICE_PORT}/v1/chatqna"
-   export CHATQNA_DATAPREP_SERVICE_ENDPOINT="http://127.0.0.1:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep"
+   export CHATQNA_DATAPREP_SERVICE_ENDPOINT="http://127.0.0.1:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/ingest"
-   export CHATQNA_DATAPREP_GET_FILE_ENDPOINT="http://127.0.0.1:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/get_file"
+   export CHATQNA_DATAPREP_GET_FILE_ENDPOINT="http://127.0.0.1:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/get"
-   export CHATQNA_DATAPREP_DELETE_FILE_ENDPOINT="http://127.0.0.1:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/delete_file"
+   export CHATQNA_DATAPREP_DELETE_FILE_ENDPOINT="http://127.0.0.1:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/delete"
   export CHATQNA_FRONTEND_SERVICE_IP=${HOST_IP}
   export CHATQNA_FRONTEND_SERVICE_PORT=5173
   export CHATQNA_BACKEND_SERVICE_NAME=chatqna
@@ -331,7 +331,7 @@ If you want to update the default knowledge base, you can use the following comm
 Update Knowledge Base via Local File Upload:
 ```bash
-curl -X POST "http://${host_ip}:6007/v1/dataprep" \
+curl -X POST "http://${host_ip}:6007/v1/dataprep/ingest" \
     -H "Content-Type: multipart/form-data" \
     -F "files=@./nke-10k-2023.pdf"
 ```
@@ -341,7 +341,7 @@ This command updates a knowledge base by uploading a local file for processing.
 Add Knowledge Base via HTTP Links:
 ```bash
-curl -X POST "http://${host_ip}:6007/v1/dataprep" \
+curl -X POST "http://${host_ip}:6007/v1/dataprep/ingest" \
     -H "Content-Type: multipart/form-data" \
     -F 'link_list=["https://opea.dev"]'
 ```
@@ -351,7 +351,7 @@ This command updates a knowledge base by submitting a list of HTTP links for pro
 Also, you are able to get the file list that you uploaded:
 ```bash
-curl -X POST "http://${host_ip}:6007/v1/dataprep/get_file" \
+curl -X POST "http://${host_ip}:6007/v1/dataprep/get" \
     -H "Content-Type: application/json"
 ```
@@ -359,17 +359,17 @@ To delete the file/link you uploaded:
 ```bash
 # delete link
-curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \
+curl -X POST "http://${host_ip}:6007/v1/dataprep/delete" \
     -d '{"file_path": "https://opea.dev"}' \
     -H "Content-Type: application/json"
 # delete file
-curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \
+curl -X POST "http://${host_ip}:6007/v1/dataprep/delete" \
     -d '{"file_path": "nke-10k-2023.pdf"}' \
     -H "Content-Type: application/json"
 # delete all uploaded files and links
-curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \
+curl -X POST "http://${host_ip}:6007/v1/dataprep/delete" \
     -d '{"file_path": "all"}' \
     -H "Content-Type: application/json"
 ```
--- a/ChatQnA/docker_compose/amd/gpu/rocm/compose.yaml
+++ b/ChatQnA/docker_compose/amd/gpu/rocm/compose.yaml
@@ -9,13 +9,13 @@ services:
      - "${CHATQNA_REDIS_VECTOR_PORT}:6379"
      - "${CHATQNA_REDIS_VECTOR_INSIGHT_PORT}:8001"
  chatqna-dataprep-redis-service:
-    image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest}
+    image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
    container_name: dataprep-redis-server
    depends_on:
      - chatqna-redis-vector-db
      - chatqna-tei-embedding-service
    ports:
-      - "${CHATQNA_REDIS_DATAPREP_PORT}:6007"
+      - "${CHATQNA_REDIS_DATAPREP_PORT}:5000"
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
--- a/ChatQnA/docker_compose/amd/gpu/rocm/set_env.sh
+++ b/ChatQnA/docker_compose/amd/gpu/rocm/set_env.sh
@@ -19,9 +19,9 @@ export CHATQNA_INDEX_NAME="rag-redis"
 export CHATQNA_MEGA_SERVICE_HOST_IP=${HOST_IP}
 export CHATQNA_RETRIEVER_SERVICE_HOST_IP=${HOST_IP}
 export CHATQNA_BACKEND_SERVICE_ENDPOINT="http://127.0.0.1:${CHATQNA_BACKEND_SERVICE_PORT}/v1/chatqna"
-export CHATQNA_DATAPREP_SERVICE_ENDPOINT="http://127.0.0.1:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep"
+export CHATQNA_DATAPREP_SERVICE_ENDPOINT="http://127.0.0.1:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/ingest"
-export CHATQNA_DATAPREP_GET_FILE_ENDPOINT="http://127.0.0.1:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/get_file"
+export CHATQNA_DATAPREP_GET_FILE_ENDPOINT="http://127.0.0.1:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/get"
-export CHATQNA_DATAPREP_DELETE_FILE_ENDPOINT="http://127.0.0.1:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/delete_file"
+export CHATQNA_DATAPREP_DELETE_FILE_ENDPOINT="http://127.0.0.1:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/delete"
 export CHATQNA_FRONTEND_SERVICE_IP=${HOST_IP}
 export CHATQNA_FRONTEND_SERVICE_PORT=15173
 export CHATQNA_BACKEND_SERVICE_NAME=chatqna
--- a/ChatQnA/docker_compose/intel/cpu/aipc/README.md
+++ b/ChatQnA/docker_compose/intel/cpu/aipc/README.md
@@ -27,7 +27,7 @@ docker build --no-cache -t opea/retriever:latest --build-arg https_proxy=$https_
 ### 2. Build Dataprep Image
 ```bash
-docker build --no-cache -t opea/dataprep-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain/Dockerfile .
+docker build --no-cache -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile .
 cd ..
 ```
@@ -60,7 +60,7 @@ docker build -t opea/nginx:latest --build-arg https_proxy=$https_proxy --build-a
 Then run the command `docker images`, you will have the following Docker Images:
-1. `opea/dataprep-redis:latest`
+1. `opea/dataprep:latest`
 2. `opea/retriever:latest`
 3. `opea/chatqna:latest`
 4. `opea/chatqna-ui:latest`
@@ -191,7 +191,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v
 wget https://raw.githubusercontent.com/opea-project/GenAIComps/v1.1/comps/retrievers/redis/data/nke-10k-2023.pdf
 # upload pdf file with dataprep
-curl -X POST "http://${host_ip}:6007/v1/dataprep" \
+curl -X POST "http://${host_ip}:6007/v1/dataprep/ingest" \
     -H "Content-Type: multipart/form-data" \
     -F "files=@./nke-10k-2023.pdf"
 ```
@@ -201,7 +201,7 @@ This command updates a knowledge base by uploading a local file for processing.
 Alternatively, you can add knowledge base via HTTP Links:
 ```bash
-curl -X POST "http://${host_ip}:6007/v1/dataprep" \
+curl -X POST "http://${host_ip}:6007/v1/dataprep/ingest" \
     -H "Content-Type: multipart/form-data" \
     -F 'link_list=["https://opea.dev"]'
 ```
@@ -211,7 +211,7 @@ This command updates a knowledge base by submitting a list of HTTP links for pro
 To check the uploaded files, you are able to get the file list that uploaded:
 ```bash
-curl -X POST "http://${host_ip}:6007/v1/dataprep/get_file" \
+curl -X POST "http://${host_ip}:6007/v1/dataprep/get" \
     -H "Content-Type: application/json"
 ```
--- a/ChatQnA/docker_compose/intel/cpu/aipc/compose.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/aipc/compose.yaml
@@ -9,13 +9,13 @@ services:
      - "6379:6379"
      - "8001:8001"
  dataprep-redis-service:
-    image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest}
+    image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
    container_name: dataprep-redis-server
    depends_on:
      - redis-vector-db
      - tei-embedding-service
    ports:
-      - "6007:6007"
+      - "6007:5000"
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
@@ -146,7 +146,7 @@ services:
      - BACKEND_SERVICE_IP=chatqna-aipc-backend-server
      - BACKEND_SERVICE_PORT=8888
      - DATAPREP_SERVICE_IP=dataprep-redis-service
-      - DATAPREP_SERVICE_PORT=6007
+      - DATAPREP_SERVICE_PORT=5000
    ipc: host
    restart: always
--- a/ChatQnA/docker_compose/intel/cpu/xeon/README.md
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/README.md
@@ -113,7 +113,7 @@ docker build --no-cache -t opea/retriever:latest --build-arg https_proxy=$https_
 ### 2. Build Dataprep Image
 ```bash
-docker build --no-cache -t opea/dataprep-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain/Dockerfile .
+docker build --no-cache -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile .
 cd ..
 ```
@@ -168,7 +168,7 @@ docker build -t opea/nginx:latest --build-arg https_proxy=$https_proxy --build-a
 Then run the command `docker images`, you will have the following 5 Docker Images:
-1. `opea/dataprep-redis:latest`
+1. `opea/dataprep:latest`
 2. `opea/retriever:latest`
 3. `opea/chatqna:latest` or `opea/chatqna-without-rerank:latest`
 4. `opea/chatqna-ui:latest`
@@ -372,7 +372,7 @@ wget https://raw.githubusercontent.com/opea-project/GenAIComps/v1.1/comps/retrie
 Upload:
 ```bash
-curl -X POST "http://${host_ip}:6007/v1/dataprep" \
+curl -X POST "http://${host_ip}:6007/v1/dataprep/ingest" \
     -H "Content-Type: multipart/form-data" \
     -F "files=@./nke-10k-2023.pdf"
 ```
@@ -382,7 +382,7 @@ This command updates a knowledge base by uploading a local file for processing.
 Add Knowledge Base via HTTP Links:
 ```bash
-curl -X POST "http://${host_ip}:6007/v1/dataprep" \
+curl -X POST "http://${host_ip}:6007/v1/dataprep/ingest" \
     -H "Content-Type: multipart/form-data" \
     -F 'link_list=["https://opea.dev"]'
 ```
@@ -392,7 +392,7 @@ This command updates a knowledge base by submitting a list of HTTP links for pro
 Also, you are able to get the file list that you uploaded:
 ```bash
-curl -X POST "http://${host_ip}:6007/v1/dataprep/get_file" \
+curl -X POST "http://${host_ip}:6007/v1/dataprep/get" \
     -H "Content-Type: application/json"
 ```
@@ -417,21 +417,21 @@ Then you will get the response JSON like this. Notice that the returned `name`/`
 To delete the file/link you uploaded:
-The `file_path` here should be the `id` get from `/v1/dataprep/get_file` API.
+The `file_path` here should be the `id` get from `/v1/dataprep/get` API.
 ```bash
 # delete link
-curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \
+curl -X POST "http://${host_ip}:6007/v1/dataprep/delete" \
     -d '{"file_path": "https://opea.dev.txt"}' \
     -H "Content-Type: application/json"
 # delete file
-curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \
+curl -X POST "http://${host_ip}:6007/v1/dataprep/delete" \
     -d '{"file_path": "nke-10k-2023.pdf"}' \
     -H "Content-Type: application/json"
 # delete all uploaded files and links
-curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \
+curl -X POST "http://${host_ip}:6007/v1/dataprep/delete" \
     -d '{"file_path": "all"}' \
     -H "Content-Type: application/json"
 ```
--- a/ChatQnA/docker_compose/intel/cpu/xeon/README_pinecone.md
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/README_pinecone.md
@@ -116,7 +116,7 @@ docker build --no-cache -t opea/retriever:latest --build-arg https_proxy=$https_
 ### 2. Build Dataprep Image
 ```bash
-docker build --no-cache -t opea/dataprep-pinecone:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/pinecone/langchain/Dockerfile .
+docker build --no-cache -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile .
 cd ..
 ```
@@ -171,7 +171,7 @@ docker build -t opea/nginx:latest --build-arg https_proxy=$https_proxy --build-a
 Then run the command `docker images`, you will have the following 5 Docker Images:
-1. `opea/dataprep-pinecone:latest`
+1. `opea/dataprep:latest`
 2. `opea/retriever:latest`
 3. `opea/chatqna:latest` or `opea/chatqna-without-rerank:latest`
 4. `opea/chatqna-ui:latest`
@@ -360,7 +360,7 @@ wget https://raw.githubusercontent.com/opea-project/GenAIComps/v1.1/comps/retrie
 Upload:
 ```bash
-curl -X POST "http://${host_ip}:6007/v1/dataprep" \
+curl -X POST "http://${host_ip}:6007/v1/dataprep/ingest" \
     -H "Content-Type: multipart/form-data" \
     -F "files=@./nke-10k-2023.pdf"
 ```
@@ -370,7 +370,7 @@ This command updates a knowledge base by uploading a local file for processing.
 Add Knowledge Base via HTTP Links:
 ```bash
-curl -X POST "http://${host_ip}:6007/v1/dataprep" \
+curl -X POST "http://${host_ip}:6007/v1/dataprep/ingest" \
     -H "Content-Type: multipart/form-data" \
     -F 'link_list=["https://opea.dev"]'
 ```
@@ -381,7 +381,7 @@ To delete the files/link you uploaded:
 ```bash
 # delete all uploaded files and links
-curl -X POST "http://${host_ip}:6009/v1/dataprep/delete_file" \
+curl -X POST "http://${host_ip}:6009/v1/dataprep/delete" \
     -d '{"file_path": "all"}' \
     -H "Content-Type: application/json"
 ```
--- a/ChatQnA/docker_compose/intel/cpu/xeon/README_qdrant.md
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/README_qdrant.md
@@ -81,7 +81,7 @@ docker build --no-cache -t opea/retriever:latest --build-arg https_proxy=$https_
 ### 2. Build Dataprep Image
 ```bash
-docker build --no-cache -t opea/dataprep-qdrant:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/qdrant/langchain/Dockerfile .
+docker build --no-cache -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile .
 cd ..
 ```
@@ -115,7 +115,7 @@ Build frontend Docker image that enables Conversational experience with ChatQnA
 ```bash
 cd GenAIExamples/ChatQnA/ui
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8912/v1/chatqna"
-export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6043/v1/dataprep"
+export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6043/v1/dataprep/ingest"
 docker build --no-cache -t opea/chatqna-conversation-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT --build-arg DATAPREP_SERVICE_ENDPOINT=$DATAPREP_SERVICE_ENDPOINT -f ./docker/Dockerfile.react .
 cd ../../../..
 ```
@@ -129,7 +129,7 @@ docker build -t opea/nginx:latest --build-arg https_proxy=$https_proxy --build-a
 Then run the command `docker images`, you will have the following 5 Docker Images:
-1. `opea/dataprep-qdrant:latest`
+1. `opea/dataprep:latest`
 2. `opea/retriever:latest`
 3. `opea/chatqna:latest`
 4. `opea/chatqna-ui:latest`
@@ -275,7 +275,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v
   Update Knowledge Base via Local File Upload:
   ```bash
-   curl -X POST "http://${host_ip}:6043/v1/dataprep" \
+   curl -X POST "http://${host_ip}:6043/v1/dataprep/ingest" \
        -H "Content-Type: multipart/form-data" \
        -F "files=@./your_file.pdf"
   ```
@@ -285,7 +285,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v
   Add Knowledge Base via HTTP Links:
   ```bash
-   curl -X POST "http://${host_ip}:6043/v1/dataprep" \
+   curl -X POST "http://${host_ip}:6043/v1/dataprep/ingest" \
        -H "Content-Type: multipart/form-data" \
        -F 'link_list=["https://opea.dev"]'
   ```
--- a/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose.yaml
@@ -9,13 +9,13 @@ services:
      - "6379:6379"
      - "8001:8001"
  dataprep-redis-service:
-    image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest}
+    image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
    container_name: dataprep-redis-server
    depends_on:
      - redis-vector-db
      - tei-embedding-service
    ports:
-      - "6007:6007"
+      - "6007:5000"
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
@@ -153,7 +153,7 @@ services:
      - BACKEND_SERVICE_IP=chatqna-xeon-backend-server
      - BACKEND_SERVICE_PORT=8888
      - DATAPREP_SERVICE_IP=dataprep-redis-service
-      - DATAPREP_SERVICE_PORT=6007
+      - DATAPREP_SERVICE_PORT=5000
    ipc: host
    restart: always
--- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_pinecone.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_pinecone.yaml
@@ -6,12 +6,12 @@ version: "3.8"
 services:
  dataprep-pinecone-service:
-    image: ${REGISTRY:-opea}/dataprep-pinecone:${TAG:-latest}
+    image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
    container_name: dataprep-pinecone-server
    depends_on:
      - tei-embedding-service
    ports:
-      - "6007:6007"
+      - "6007:5000"
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
@@ -21,6 +21,7 @@ services:
      TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_PINECONE"
  tei-embedding-service:
    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
    container_name: tei-embedding-server
@@ -142,7 +143,7 @@ services:
      - BACKEND_SERVICE_IP=chatqna-xeon-backend-server
      - BACKEND_SERVICE_PORT=8888
      - DATAPREP_SERVICE_IP=dataprep-pinecone-service
-      - DATAPREP_SERVICE_PORT=6007
+      - DATAPREP_SERVICE_PORT=5000
    ipc: host
    restart: always
--- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_qdrant.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_qdrant.yaml
@@ -9,13 +9,13 @@ services:
      - "6333:6333"
      - "6334:6334"
  dataprep-qdrant-service:
-    image: ${REGISTRY:-opea}/dataprep-qdrant:${TAG:-latest}
+    image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
    container_name: dataprep-qdrant-server
    depends_on:
      - qdrant-vector-db
      - tei-embedding-service
    ports:
-      - "6043:6007"
+      - "6043:5000"
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
@@ -25,6 +25,7 @@ services:
      QDRANT_INDEX_NAME: ${INDEX_NAME}
      TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_QDRANT"
  tei-embedding-service:
    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
    container_name: tei-embedding-server
@@ -148,7 +149,7 @@ services:
      - BACKEND_SERVICE_IP=chatqna-xeon-backend-server
      - BACKEND_SERVICE_PORT=8888
      - DATAPREP_SERVICE_IP=dataprep-qdrant-service
-      - DATAPREP_SERVICE_PORT=6007
+      - DATAPREP_SERVICE_PORT=5000
    ipc: host
    restart: always
--- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml
@@ -9,13 +9,13 @@ services:
      - "6379:6379"
      - "8001:8001"
  dataprep-redis-service:
-    image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest}
+    image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
    container_name: dataprep-redis-server
    depends_on:
      - redis-vector-db
      - tei-embedding-service
    ports:
-      - "6007:6007"
+      - "6007:5000"
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
@@ -149,7 +149,7 @@ services:
      - BACKEND_SERVICE_IP=chatqna-xeon-backend-server
      - BACKEND_SERVICE_PORT=8888
      - DATAPREP_SERVICE_IP=dataprep-redis-service
-      - DATAPREP_SERVICE_PORT=6007
+      - DATAPREP_SERVICE_PORT=5000
    ipc: host
    restart: always
--- a/ChatQnA/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml
+++ b/ChatQnA/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml
@@ -9,13 +9,13 @@ services:
      - "6379:6379"
      - "8001:8001"
  dataprep-redis-service:
-    image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest}
+    image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
    container_name: dataprep-redis-server
    depends_on:
      - redis-vector-db
      - tei-embedding-service
    ports:
-      - "6007:6007"
+      - "6007:5000"
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
@@ -130,7 +130,7 @@ services:
      - BACKEND_SERVICE_IP=chatqna-xeon-backend-server
      - BACKEND_SERVICE_PORT=8888
      - DATAPREP_SERVICE_IP=dataprep-redis-service
-      - DATAPREP_SERVICE_PORT=6007
+      - DATAPREP_SERVICE_PORT=5000
    ipc: host
    restart: always
--- a/ChatQnA/docker_compose/intel/hpu/gaudi/README.md
+++ b/ChatQnA/docker_compose/intel/hpu/gaudi/README.md
@@ -86,7 +86,7 @@ docker build --no-cache -t opea/retriever:latest --build-arg https_proxy=$https_
 ### 2. Build Dataprep Image
 ```bash
-docker build --no-cache -t opea/dataprep-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain/Dockerfile .
+docker build --no-cache -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile .
 ```
 ### 3. Build Guardrails Docker Image (Optional)
@@ -159,7 +159,7 @@ docker build -t opea/nginx:latest --build-arg https_proxy=$https_proxy --build-a
 Then run the command `docker images`, you will have the following 5 Docker Images:
 - `opea/retriever:latest`
- `opea/dataprep-redis:latest`
+- `opea/dataprep:latest`
 - `opea/chatqna:latest`
 - `opea/chatqna-ui:latest`
 - `opea/nginx:latest`
@@ -376,7 +376,7 @@ If you want to update the default knowledge base, you can use the following comm
 Update Knowledge Base via Local File Upload:
 ```bash
-curl -X POST "http://${host_ip}:6007/v1/dataprep" \
+curl -X POST "http://${host_ip}:6007/v1/dataprep/ingest" \
     -H "Content-Type: multipart/form-data" \
     -F "files=@./nke-10k-2023.pdf"
 ```
@@ -386,7 +386,7 @@ This command updates a knowledge base by uploading a local file for processing.
 Add Knowledge Base via HTTP Links:
 ```bash
-curl -X POST "http://${host_ip}:6007/v1/dataprep" \
+curl -X POST "http://${host_ip}:6007/v1/dataprep/ingest" \
     -H "Content-Type: multipart/form-data" \
     -F 'link_list=["https://opea.dev"]'
 ```
@@ -396,7 +396,7 @@ This command updates a knowledge base by submitting a list of HTTP links for pro
 Also, you are able to get the file/link list that you uploaded:
 ```bash
-curl -X POST "http://${host_ip}:6007/v1/dataprep/get_file" \
+curl -X POST "http://${host_ip}:6007/v1/dataprep/get" \
     -H "Content-Type: application/json"
 ```
@@ -423,17 +423,17 @@ To delete the file/link you uploaded:
 ```bash
 # delete link
-curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \
+curl -X POST "http://${host_ip}:6007/v1/dataprep/delete" \
     -d '{"file_path": "https://opea.dev.txt"}' \
     -H "Content-Type: application/json"
 # delete file
-curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \
+curl -X POST "http://${host_ip}:6007/v1/dataprep/delete" \
     -d '{"file_path": "nke-10k-2023.pdf"}' \
     -H "Content-Type: application/json"
 # delete all uploaded files and links
-curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \
+curl -X POST "http://${host_ip}:6007/v1/dataprep/delete" \
     -d '{"file_path": "all"}' \
     -H "Content-Type: application/json"
 ```
--- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -9,13 +9,13 @@ services:
      - "6379:6379"
      - "8001:8001"
  dataprep-redis-service:
-    image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest}
+    image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
    container_name: dataprep-redis-server
    depends_on:
      - redis-vector-db
      - tei-embedding-service
    ports:
-      - "6007:6007"
+      - "6007:5000"
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
@@ -161,7 +161,7 @@ services:
      - BACKEND_SERVICE_IP=chatqna-gaudi-backend-server
      - BACKEND_SERVICE_PORT=8888
      - DATAPREP_SERVICE_IP=dataprep-redis-service
-      - DATAPREP_SERVICE_PORT=6007
+      - DATAPREP_SERVICE_PORT=5000
    ipc: host
    restart: always
--- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml
+++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml
@@ -9,13 +9,13 @@ services:
      - "6379:6379"
      - "8001:8001"
  dataprep-redis-service:
-    image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest}
+    image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
    container_name: dataprep-redis-server
    depends_on:
      - redis-vector-db
      - tei-embedding-service
    ports:
-      - "6007:6007"
+      - "6007:5000"
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
@@ -201,7 +201,7 @@ services:
      - BACKEND_SERVICE_IP=chatqna-gaudi-backend-server
      - BACKEND_SERVICE_PORT=8888
      - DATAPREP_SERVICE_IP=dataprep-redis-service
-      - DATAPREP_SERVICE_PORT=6007
+      - DATAPREP_SERVICE_PORT=5000
    ipc: host
    restart: always
--- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml
+++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml
@@ -9,13 +9,13 @@ services:
      - "6379:6379"
      - "8001:8001"
  dataprep-redis-service:
-    image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest}
+    image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
    container_name: dataprep-redis-server
    depends_on:
      - redis-vector-db
      - tei-embedding-service
    ports:
-      - "6007:6007"
+      - "6007:5000"
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
@@ -180,7 +180,7 @@ services:
      - BACKEND_SERVICE_IP=chatqna-gaudi-backend-server
      - BACKEND_SERVICE_PORT=8888
      - DATAPREP_SERVICE_IP=dataprep-redis-service
-      - DATAPREP_SERVICE_PORT=6007
+      - DATAPREP_SERVICE_PORT=5000
    ipc: host
    restart: always
--- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml
+++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml
@@ -9,13 +9,13 @@ services:
      - "6379:6379"
      - "8001:8001"
  dataprep-redis-service:
-    image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest}
+    image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
    container_name: dataprep-redis-server
    depends_on:
      - redis-vector-db
      - tei-embedding-service
    ports:
-      - "6007:6007"
+      - "6007:5000"
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
@@ -134,7 +134,7 @@ services:
      - BACKEND_SERVICE_IP=chatqna-gaudi-backend-server
      - BACKEND_SERVICE_PORT=8888
      - DATAPREP_SERVICE_IP=dataprep-redis-service
-      - DATAPREP_SERVICE_PORT=6007
+      - DATAPREP_SERVICE_PORT=5000
    ipc: host
    restart: always
--- a/ChatQnA/docker_compose/intel/hpu/gaudi/how_to_validate_service.md
+++ b/ChatQnA/docker_compose/intel/hpu/gaudi/how_to_validate_service.md
@@ -45,7 +45,7 @@ CONTAINER ID   IMAGE                                                   COMMAND
 bee1132464cd   opea/chatqna:latest                                     "python chatqna.py"      2 minutes ago   Up 2 minutes                    0.0.0.0:8888->8888/tcp, :::8888->8888/tcp                                              chatqna-gaudi-backend-server
 f810f3b4d329   opea/embedding:latest                               "python embedding_te…"   2 minutes ago   Up 2 minutes                    0.0.0.0:6000->6000/tcp, :::6000->6000/tcp                                              embedding-server
 325236a01f9b   opea/llm-textgen:latest                                     "python llm.py"          2 minutes ago   Up 2 minutes                    0.0.0.0:9000->9000/tcp, :::9000->9000/tcp                                              llm-textgen-gaudi-server
-2fa17d84605f   opea/dataprep-redis:latest                              "python prepare_doc_…"   2 minutes ago   Up 2 minutes                    0.0.0.0:6007->6007/tcp, :::6007->6007/tcp                                              dataprep-redis-server
+2fa17d84605f   opea/dataprep:latest                              "python prepare_doc_…"   2 minutes ago   Up 2 minutes                    0.0.0.0:6007->6007/tcp, :::6007->5000/tcp                                              dataprep-redis-server
 69e1fb59e92c   opea/retriever:latest                             "/home/user/comps/re…"   2 minutes ago   Up 2 minutes                    0.0.0.0:7000->7000/tcp, :::7000->7000/tcp                                              retriever-redis-server
 313b9d14928a   opea/reranking-tei:latest                               "python reranking_te…"   2 minutes ago   Up 2 minutes                    0.0.0.0:8000->8000/tcp, :::8000->8000/tcp                                              reranking-tei-gaudi-server
 174bd43fa6b5   ghcr.io/huggingface/tei-gaudi:1.5.0                    "text-embeddings-rou…"   2 minutes ago   Up 2 minutes                    0.0.0.0:8090->80/tcp, :::8090->80/tcp                                                  tei-embedding-gaudi-server
--- a/ChatQnA/docker_compose/nvidia/gpu/README.md
+++ b/ChatQnA/docker_compose/nvidia/gpu/README.md
@@ -110,7 +110,7 @@ docker build --no-cache -t opea/retriever:latest --build-arg https_proxy=$https_
 ### 3. Build Dataprep Image
 ```bash
-docker build --no-cache -t opea/dataprep-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain/Dockerfile .
+docker build --no-cache -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile .
 ```
 ### 4. Build MegaService Docker Image
@@ -154,7 +154,7 @@ docker build -t opea/nginx:latest --build-arg https_proxy=$https_proxy --build-a
 Then run the command `docker images`, you will have the following 5 Docker Images:
 1. `opea/retriever:latest`
-2. `opea/dataprep-redis:latest`
+2. `opea/dataprep:latest`
 3. `opea/chatqna:latest`
 4. `opea/chatqna-ui:latest` or `opea/chatqna-react-ui:latest`
 5. `opea/nginx:latest`
@@ -314,7 +314,7 @@ If you want to update the default knowledge base, you can use the following comm
 Update Knowledge Base via Local File Upload:
 ```bash
-curl -X POST "http://${host_ip}:6007/v1/dataprep" \
+curl -X POST "http://${host_ip}:6007/v1/dataprep/ingest" \
     -H "Content-Type: multipart/form-data" \
     -F "files=@./nke-10k-2023.pdf"
 ```
@@ -324,7 +324,7 @@ This command updates a knowledge base by uploading a local file for processing.
 Add Knowledge Base via HTTP Links:
 ```bash
-curl -X POST "http://${host_ip}:6007/v1/dataprep" \
+curl -X POST "http://${host_ip}:6007/v1/dataprep/ingest" \
     -H "Content-Type: multipart/form-data" \
     -F 'link_list=["https://opea.dev"]'
 ```
@@ -334,7 +334,7 @@ This command updates a knowledge base by submitting a list of HTTP links for pro
 Also, you are able to get the file list that you uploaded:
 ```bash
-curl -X POST "http://${host_ip}:6007/v1/dataprep/get_file" \
+curl -X POST "http://${host_ip}:6007/v1/dataprep/get" \
     -H "Content-Type: application/json"
 ```
@@ -342,17 +342,17 @@ To delete the file/link you uploaded:
 ```bash
 # delete link
-curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \
+curl -X POST "http://${host_ip}:6007/v1/dataprep/delete" \
     -d '{"file_path": "https://opea.dev"}' \
     -H "Content-Type: application/json"
 # delete file
-curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \
+curl -X POST "http://${host_ip}:6007/v1/dataprep/delete" \
     -d '{"file_path": "nke-10k-2023.pdf"}' \
     -H "Content-Type: application/json"
 # delete all uploaded files and links
-curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \
+curl -X POST "http://${host_ip}:6007/v1/dataprep/delete" \
     -d '{"file_path": "all"}' \
     -H "Content-Type: application/json"
 ```
--- a/ChatQnA/docker_compose/nvidia/gpu/compose.yaml
+++ b/ChatQnA/docker_compose/nvidia/gpu/compose.yaml
@@ -9,13 +9,13 @@ services:
      - "6379:6379"
      - "8001:8001"
  dataprep-redis-service:
-    image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest}
+    image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
    container_name: dataprep-redis-server
    depends_on:
      - redis-vector-db
      - tei-embedding-service
    ports:
-      - "6007:6007"
+      - "6007:5000"
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
--- a/ChatQnA/docker_compose/nvidia/gpu/set_env.sh
+++ b/ChatQnA/docker_compose/nvidia/gpu/set_env.sh
@@ -12,9 +12,9 @@ export INDEX_NAME="rag-redis"
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export RETRIEVER_SERVICE_HOST_IP=${host_ip}
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/chatqna"
-export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
+export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/ingest"
-export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_file"
+export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get"
-export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_file"
+export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete"
 export FRONTEND_SERVICE_IP=${host_ip}
 export FRONTEND_SERVICE_PORT=5173
 export BACKEND_SERVICE_NAME=chatqna
--- a/ChatQnA/docker_image_build/build.yaml
+++ b/ChatQnA/docker_image_build/build.yaml
@@ -65,24 +65,12 @@ services:
      dockerfile: comps/llms/src/text-generation/Dockerfile
    extends: chatqna
    image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest}
-  dataprep-redis:
+  dataprep:
    build:
      context: GenAIComps
-      dockerfile: comps/dataprep/redis/langchain/Dockerfile
+      dockerfile: comps/dataprep/src/Dockerfile
    extends: chatqna
-    image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest}
+    image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
  dataprep-qdrant:
    build:
      context: GenAIComps
      dockerfile: comps/dataprep/qdrant/langchain/Dockerfile
    extends: chatqna
    image: ${REGISTRY:-opea}/dataprep-qdrant:${TAG:-latest}
  dataprep-pinecone:
    build:
      context: GenAIComps
      dockerfile: comps/dataprep/pinecone/langchain/Dockerfile
    extends: chatqna
    image: ${REGISTRY:-opea}/dataprep-pinecone:${TAG:-latest}
  guardrails:
    build:
      context: GenAIComps
--- a/ChatQnA/kubernetes/gmc/chatQnA_dataprep_gaudi.yaml
+++ b/ChatQnA/kubernetes/gmc/chatQnA_dataprep_gaudi.yaml
@@ -70,7 +70,7 @@ spec:
        internalService:
          serviceName: data-prep-svc
          config:
-            endpoint: /v1/dataprep
+            endpoint: /v1/dataprep/ingest
            REDIS_URL: redis-vector-db
            TEI_ENDPOINT: tei-embedding-gaudi-svc
          isDownstreamService: true
--- a/ChatQnA/kubernetes/gmc/chatQnA_dataprep_xeon.yaml
+++ b/ChatQnA/kubernetes/gmc/chatQnA_dataprep_xeon.yaml
@@ -70,7 +70,7 @@ spec:
        internalService:
          serviceName: data-prep-svc
          config:
-            endpoint: /v1/dataprep
+            endpoint: /v1/dataprep/ingest
            REDIS_URL: redis-vector-db
            TEI_ENDPOINT: tei-embedding-svc
          isDownstreamService: true
--- a/ChatQnA/tests/test_compose_guardrails_on_gaudi.sh
+++ b/ChatQnA/tests/test_compose_guardrails_on_gaudi.sh
@@ -20,7 +20,7 @@ function build_docker_images() {
    git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork && git checkout v0.6.4.post2+Gaudi-1.19.0 && cd ../
    echo "Build all the images with --no-cache, check docker_image_build.log for details..."
-    service_list="chatqna-guardrails chatqna-ui dataprep-redis retriever vllm-gaudi guardrails nginx"
+    service_list="chatqna-guardrails chatqna-ui dataprep retriever vllm-gaudi guardrails nginx"
    docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
--- a/ChatQnA/tests/test_compose_on_gaudi.sh
+++ b/ChatQnA/tests/test_compose_on_gaudi.sh
@@ -20,7 +20,7 @@ function build_docker_images() {
    git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork && git checkout v0.6.4.post2+Gaudi-1.19.0 && cd ../
    echo "Build all the images with --no-cache, check docker_image_build.log for details..."
-    service_list="chatqna chatqna-ui dataprep-redis retriever vllm-gaudi nginx"
+    service_list="chatqna chatqna-ui dataprep retriever vllm-gaudi nginx"
    docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
    docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
--- a/ChatQnA/tests/test_compose_on_rocm.sh
+++ b/ChatQnA/tests/test_compose_on_rocm.sh
@@ -31,9 +31,9 @@ export CHATQNA_INDEX_NAME="rag-redis"
 export CHATQNA_MEGA_SERVICE_HOST_IP=${HOST_IP}
 export CHATQNA_RETRIEVER_SERVICE_HOST_IP=${HOST_IP}
 export CHATQNA_BACKEND_SERVICE_ENDPOINT="http://127.0.0.1:${CHATQNA_BACKEND_SERVICE_PORT}/v1/chatqna"
-export CHATQNA_DATAPREP_SERVICE_ENDPOINT="http://127.0.0.1:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep"
+export CHATQNA_DATAPREP_SERVICE_ENDPOINT="http://127.0.0.1:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/ingest"
-export CHATQNA_DATAPREP_GET_FILE_ENDPOINT="http://127.0.0.1:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/get_file"
+export CHATQNA_DATAPREP_GET_FILE_ENDPOINT="http://127.0.0.1:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/get"
-export CHATQNA_DATAPREP_DELETE_FILE_ENDPOINT="http://127.0.0.1:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/delete_file"
+export CHATQNA_DATAPREP_DELETE_FILE_ENDPOINT="http://127.0.0.1:${CHATQNA_REDIS_DATAPREP_PORT}/v1/dataprep/delete"
 export CHATQNA_FRONTEND_SERVICE_IP=${HOST_IP}
 export CHATQNA_FRONTEND_SERVICE_PORT=15173
 export CHATQNA_BACKEND_SERVICE_NAME=chatqna
@@ -64,7 +64,7 @@ function build_docker_images() {
    git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
    echo "Build all the images with --no-cache, check docker_image_build.log for details..."
-    service_list="chatqna chatqna-ui dataprep-redis retriever nginx"
+    service_list="chatqna chatqna-ui dataprep retriever nginx"
    docker compose -f build.yaml build ${service_list} --no-cache > "${LOG_PATH}"/docker_image_build.log
    docker pull ghcr.io/huggingface/text-generation-inference:2.3.1-rocm
@@ -147,31 +147,31 @@ function validate_microservices() {
    sleep 1m # retrieval can't curl as expected, try to wait for more time
-    # test /v1/dataprep upload file
+    # test /v1/dataprep/ingest upload file
    echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > "$LOG_PATH"/dataprep_file.txt
    validate_service \
-        "http://${ip_address}:6007/v1/dataprep" \
+        "http://${ip_address}:6007/v1/dataprep/ingest" \
        "Data preparation succeeded" \
        "dataprep_upload_file" \
        "dataprep-redis-server"
-    # test /v1/dataprep upload link
+    # test /v1/dataprep/ingest upload link
    validate_service \
-        "http://${ip_address}:6007/v1/dataprep" \
+        "http://${ip_address}:6007/v1/dataprep/ingest" \
        "Data preparation succeeded" \
        "dataprep_upload_link" \
        "dataprep-redis-server"
-    # test /v1/dataprep/get_file
+    # test /v1/dataprep/get
    validate_service \
-        "http://${ip_address}:6007/v1/dataprep/get_file" \
+        "http://${ip_address}:6007/v1/dataprep/get" \
        '{"name":' \
        "dataprep_get" \
        "dataprep-redis-server"
-    # test /v1/dataprep/delete_file
+    # test /v1/dataprep/delete
    validate_service \
-        "http://${ip_address}:6007/v1/dataprep/delete_file" \
+        "http://${ip_address}:6007/v1/dataprep/delete" \
        '{"status":true}' \
        "dataprep_del" \
        "dataprep-redis-server"
--- a/ChatQnA/tests/test_compose_on_xeon.sh
+++ b/ChatQnA/tests/test_compose_on_xeon.sh
@@ -20,7 +20,7 @@ function build_docker_images() {
    git clone https://github.com/vllm-project/vllm.git
    echo "Build all the images with --no-cache, check docker_image_build.log for details..."
-    service_list="chatqna chatqna-ui dataprep-redis retriever vllm nginx"
+    service_list="chatqna chatqna-ui dataprep retriever vllm nginx"
    docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
    docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
--- a/ChatQnA/tests/test_compose_pinecone_on_xeon.sh
+++ b/ChatQnA/tests/test_compose_pinecone_on_xeon.sh
@@ -20,7 +20,7 @@ function build_docker_images() {
    git clone https://github.com/vllm-project/vllm.git
    echo "Build all the images with --no-cache, check docker_image_build.log for details..."
-    service_list="chatqna chatqna-ui dataprep-pinecone retriever vllm nginx"
+    service_list="chatqna chatqna-ui dataprep retriever vllm nginx"
    docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
    docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
@@ -110,18 +110,18 @@ function validate_microservices() {
    sleep 1m # retrieval can't curl as expected, try to wait for more time
-    # test /v1/dataprep/delete_file
+    # test /v1/dataprep/delete
    validate_service \
-       "http://${ip_address}:6007/v1/dataprep/delete_file" \
+       "http://${ip_address}:6007/v1/dataprep/delete" \
       '{"status":true}' \
        "dataprep_del" \
        "dataprep-pinecone-server"
-    # test /v1/dataprep upload file
+    # test /v1/dataprep/ingest upload file
    echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt
    validate_service \
-       "http://${ip_address}:6007/v1/dataprep" \
+       "http://${ip_address}:6007/v1/dataprep/ingest" \
        "Data preparation succeeded" \
        "dataprep_upload_file" \
        "dataprep-pinecone-server"
--- a/ChatQnA/tests/test_compose_qdrant_on_xeon.sh
+++ b/ChatQnA/tests/test_compose_qdrant_on_xeon.sh
@@ -20,7 +20,7 @@ function build_docker_images() {
    git clone https://github.com/vllm-project/vllm.git
    echo "Build all the images with --no-cache, check docker_image_build.log for details..."
-    service_list="chatqna chatqna-ui dataprep-qdrant retriever vllm nginx"
+    service_list="chatqna chatqna-ui dataprep retriever vllm nginx"
    docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
    docker images && sleep 1s
@@ -99,17 +99,17 @@ function validate_microservices() {
        "tei-embedding-server" \
        '{"inputs":"What is Deep Learning?"}'
-    # test /v1/dataprep upload file
+    # test /v1/dataprep/ingest upload file
    echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt
    validate_service \
-        "${ip_address}:6043/v1/dataprep" \
+        "${ip_address}:6043/v1/dataprep/ingest" \
        "Data preparation succeeded" \
        "dataprep_upload_file" \
        "dataprep-qdrant-server"
    # test upload link
    validate_service \
-        "${ip_address}:6043/v1/dataprep" \
+        "${ip_address}:6043/v1/dataprep/ingest" \
        "Data preparation succeeded" \
        "dataprep_upload_link" \
        "dataprep-qdrant-server"
--- a/ChatQnA/tests/test_compose_tgi_on_gaudi.sh
+++ b/ChatQnA/tests/test_compose_tgi_on_gaudi.sh
@@ -31,7 +31,7 @@ function build_docker_images() {
    git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
    echo "Build all the images with --no-cache, check docker_image_build.log for details..."
-    service_list="chatqna chatqna-ui dataprep-redis retriever nginx"
+    service_list="chatqna chatqna-ui dataprep retriever nginx"
    docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
@@ -124,28 +124,28 @@ function validate_microservices() {
    # test /v1/dataprep upload file
    echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt
    validate_service \
-        "http://${ip_address}:6007/v1/dataprep" \
+        "http://${ip_address}:6007/v1/dataprep/ingest" \
        "Data preparation succeeded" \
        "dataprep_upload_file" \
        "dataprep-redis-server"
    # test /v1/dataprep upload link
    validate_service \
-        "http://${ip_address}:6007/v1/dataprep" \
+        "http://${ip_address}:6007/v1/dataprep/ingest" \
        "Data preparation succeeded" \
        "dataprep_upload_link" \
        "dataprep-redis-server"
    # test /v1/dataprep/get_file
    validate_service \
-        "http://${ip_address}:6007/v1/dataprep/get_file" \
+        "http://${ip_address}:6007/v1/dataprep/get" \
        '{"name":' \
        "dataprep_get" \
        "dataprep-redis-server"
    # test /v1/dataprep/delete_file
    validate_service \
-        "http://${ip_address}:6007/v1/dataprep/delete_file" \
+        "http://${ip_address}:6007/v1/dataprep/delete" \
        '{"status":true}' \
        "dataprep_del" \
        "dataprep-redis-server"
--- a/ChatQnA/tests/test_compose_tgi_on_xeon.sh
+++ b/ChatQnA/tests/test_compose_tgi_on_xeon.sh
@@ -31,7 +31,7 @@ function build_docker_images() {
    git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
    echo "Build all the images with --no-cache, check docker_image_build.log for details..."
-    service_list="chatqna chatqna-ui dataprep-redis retriever nginx"
+    service_list="chatqna chatqna-ui dataprep retriever nginx"
    docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
    docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
@@ -121,28 +121,28 @@ function validate_microservices() {
    # test /v1/dataprep upload file
    echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt
    validate_service \
-        "http://${ip_address}:6007/v1/dataprep" \
+        "http://${ip_address}:6007/v1/dataprep/ingest" \
        "Data preparation succeeded" \
        "dataprep_upload_file" \
        "dataprep-redis-server"
    # test /v1/dataprep upload link
    validate_service \
-        "http://${ip_address}:6007/v1/dataprep" \
+        "http://${ip_address}:6007/v1/dataprep/ingest" \
        "Data preparation succeeded" \
        "dataprep_upload_link" \
        "dataprep-redis-server"
    # test /v1/dataprep/get_file
    validate_service \
-        "http://${ip_address}:6007/v1/dataprep/get_file" \
+        "http://${ip_address}:6007/v1/dataprep/get" \
        '{"name":' \
        "dataprep_get" \
        "dataprep-redis-server"
    # test /v1/dataprep/delete_file
    validate_service \
-        "http://${ip_address}:6007/v1/dataprep/delete_file" \
+        "http://${ip_address}:6007/v1/dataprep/delete" \
        '{"status":true}' \
        "dataprep_del" \
        "dataprep-redis-server"
--- a/ChatQnA/tests/test_compose_without_rerank_on_gaudi.sh
+++ b/ChatQnA/tests/test_compose_without_rerank_on_gaudi.sh
@@ -20,7 +20,7 @@ function build_docker_images() {
    git clone https://github.com/HabanaAI/vllm-fork.git && cd vllm-fork && git checkout v0.6.4.post2+Gaudi-1.19.0 && cd ../
    echo "Build all the images with --no-cache, check docker_image_build.log for details..."
-    service_list="chatqna-without-rerank chatqna-ui dataprep-redis retriever vllm-gaudi nginx"
+    service_list="chatqna-without-rerank chatqna-ui dataprep retriever vllm-gaudi nginx"
    docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
    docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
@@ -105,31 +105,31 @@ function validate_microservices() {
    sleep 1m # retrieval can't curl as expected, try to wait for more time
-    # test /v1/dataprep upload file
+    # test /v1/dataprep/ingest upload file
    echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt
    validate_service \
-        "http://${ip_address}:6007/v1/dataprep" \
+        "http://${ip_address}:6007/v1/dataprep/ingest" \
        "Data preparation succeeded" \
        "dataprep_upload_file" \
        "dataprep-redis-server"
-    # test /v1/dataprep upload link
+    # test /v1/dataprep/ingest upload link
    validate_service \
-        "http://${ip_address}:6007/v1/dataprep" \
+        "http://${ip_address}:6007/v1/dataprep/ingest" \
        "Data preparation succeeded" \
        "dataprep_upload_link" \
        "dataprep-redis-server"
-    # test /v1/dataprep/get_file
+    # test /v1/dataprep/get
    validate_service \
-        "http://${ip_address}:6007/v1/dataprep/get_file" \
+        "http://${ip_address}:6007/v1/dataprep/get" \
        '{"name":' \
        "dataprep_get" \
        "dataprep-redis-server"
-    # test /v1/dataprep/delete_file
+    # test /v1/dataprep/delete
    validate_service \
-        "http://${ip_address}:6007/v1/dataprep/delete_file" \
+        "http://${ip_address}:6007/v1/dataprep/delete" \
        '{"status":true}' \
        "dataprep_del" \
        "dataprep-redis-server"
--- a/ChatQnA/tests/test_compose_without_rerank_on_xeon.sh
+++ b/ChatQnA/tests/test_compose_without_rerank_on_xeon.sh
@@ -20,7 +20,7 @@ function build_docker_images() {
    git clone https://github.com/vllm-project/vllm.git
    echo "Build all the images with --no-cache, check docker_image_build.log for details..."
-    service_list="chatqna-without-rerank chatqna-ui dataprep-redis retriever vllm nginx"
+    service_list="chatqna-without-rerank chatqna-ui dataprep retriever vllm nginx"
    docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
    docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
@@ -104,31 +104,31 @@ function validate_microservices() {
    sleep 1m # retrieval can't curl as expected, try to wait for more time
-    # test /v1/dataprep upload file
+    # test /v1/dataprep/ingest upload file
    echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt
    validate_service \
-        "http://${ip_address}:6007/v1/dataprep" \
+        "http://${ip_address}:6007/v1/dataprep/ingest" \
        "Data preparation succeeded" \
        "dataprep_upload_file" \
        "dataprep-redis-server"
-    # test /v1/dataprep upload link
+    # test /v1/dataprep/ingest upload link
    validate_service \
-        "http://${ip_address}:6007/v1/dataprep" \
+        "http://${ip_address}:6007/v1/dataprep/ingest" \
        "Data preparation succeeded" \
        "dataprep_upload_link" \
        "dataprep-redis-server"
-    # test /v1/dataprep/get_file
+    # test /v1/dataprep/get
    validate_service \
-        "http://${ip_address}:6007/v1/dataprep/get_file" \
+        "http://${ip_address}:6007/v1/dataprep/get" \
        '{"name":' \
        "dataprep_get" \
        "dataprep-redis-server"
-    # test /v1/dataprep/delete_file
+    # test /v1/dataprep/delete
    validate_service \
-        "http://${ip_address}:6007/v1/dataprep/delete_file" \
+        "http://${ip_address}:6007/v1/dataprep/delete" \
        '{"status":true}' \
        "dataprep_del" \
        "dataprep-redis-server"
--- a/ChatQnA/tests/test_gmc_on_gaudi.sh
+++ b/ChatQnA/tests/test_gmc_on_gaudi.sh
@@ -94,7 +94,7 @@ function validate_chatqna_dataprep() {
   export CLIENT_POD=$(kubectl get pod -n $CHATQNA_DATAPREP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name})
   echo "$CLIENT_POD"
   accessUrl=$(kubectl get gmc -n $CHATQNA_DATAPREP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='chatqa')].status.accessUrl}")
-   kubectl exec "$CLIENT_POD" -n $CHATQNA_DATAPREP_NAMESPACE -- curl "$accessUrl/dataprep"  -X POST  -F 'link_list=["https://raw.githubusercontent.com/opea-project/GenAIInfra/main/microservices-connector/test/data/gaudi.txt"]' -H "Content-Type: multipart/form-data" > $LOG_PATH/curl_dataprep.log
+   kubectl exec "$CLIENT_POD" -n $CHATQNA_DATAPREP_NAMESPACE -- curl "$accessUrl/dataprep/ingest"  -X POST  -F 'link_list=["https://raw.githubusercontent.com/opea-project/GenAIInfra/main/microservices-connector/test/data/gaudi.txt"]' -H "Content-Type: multipart/form-data" > $LOG_PATH/curl_dataprep.log
   exit_code=$?
   if [ $exit_code -ne 0 ]; then
       echo "chatqna failed, please check the logs in ${LOG_PATH}!"
--- a/ChatQnA/tests/test_gmc_on_xeon.sh
+++ b/ChatQnA/tests/test_gmc_on_xeon.sh
@@ -96,7 +96,7 @@ function validate_chatqna_dataprep() {
   export CLIENT_POD=$(kubectl get pod -n $CHATQNA_DATAPREP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name})
   echo "$CLIENT_POD"
   accessUrl=$(kubectl get gmc -n $CHATQNA_DATAPREP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='chatqa')].status.accessUrl}")
-   kubectl exec "$CLIENT_POD" -n $CHATQNA_DATAPREP_NAMESPACE -- curl "$accessUrl/dataprep"  -X POST  -F 'link_list=["https://raw.githubusercontent.com/opea-project/GenAIInfra/main/microservices-connector/test/data/gaudi.txt"]' -H "Content-Type: multipart/form-data" > $LOG_PATH/curl_dataprep.log
+   kubectl exec "$CLIENT_POD" -n $CHATQNA_DATAPREP_NAMESPACE -- curl "$accessUrl/dataprep/ingest"  -X POST  -F 'link_list=["https://raw.githubusercontent.com/opea-project/GenAIInfra/main/microservices-connector/test/data/gaudi.txt"]' -H "Content-Type: multipart/form-data" > $LOG_PATH/curl_dataprep.log
   exit_code=$?
   if [ $exit_code -ne 0 ]; then
       echo "chatqna failed, please check the logs in ${LOG_PATH}!"
--- a/ChatQnA/ui/react/.env
+++ b/ChatQnA/ui/react/.env
@@ -1,2 +1,2 @@
 VITE_BACKEND_SERVICE_ENDPOINT=http://backend_address:8888/v1/chatqna
-VITE_DATA_PREP_SERVICE_URL=http://backend_address:6007/v1/dataprep
+VITE_DATA_PREP_SERVICE_URL=http://backend_address:6007/v1/dataprep/ingest
--- a/ChatQnA/ui/svelte/.env
+++ b/ChatQnA/ui/svelte/.env
@@ -1,7 +1,7 @@
 CHAT_BASE_URL = '/v1/chatqna'
-UPLOAD_FILE_BASE_URL = '/v1/dataprep'
+UPLOAD_FILE_BASE_URL = '/v1/dataprep/ingest'
-GET_FILE = '/v1/dataprep/get_file'
+GET_FILE = '/v1/dataprep/get'
-DELETE_FILE = '/v1/dataprep/delete_file'
+DELETE_FILE = '/v1/dataprep/delete'
--- a/DocIndexRetriever/docker_compose/intel/cpu/xeon/README.md
+++ b/DocIndexRetriever/docker_compose/intel/cpu/xeon/README.md
@@ -27,7 +27,7 @@ DocRetriever are the most widely adopted use case for leveraging the different m
 - Dataprep Image
  ```bash
-  docker build -t opea/dataprep-on-ray-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain_ray/Dockerfile .
+  docker build -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile .
  ```
 ## 2. Build Images for MegaService
@@ -57,7 +57,7 @@ export RETRIEVER_SERVICE_HOST_IP=${host_ip}
 export RERANK_SERVICE_HOST_IP=${host_ip}
 export LLM_SERVICE_HOST_IP=${host_ip}
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8000/v1/retrievaltool"
-export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
+export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/ingest"
 cd GenAIExamples/DocIndexRetriever/intel/cpu/xoen/
 docker compose up -d
 ```
@@ -78,7 +78,7 @@ docker compose -f compose_without_rerank.yaml up -d
 Add Knowledge Base via HTTP Links:
 ```bash
-curl -X POST "http://${host_ip}:6007/v1/dataprep" \
+curl -X POST "http://${host_ip}:6007/v1/dataprep/ingest" \
     -H "Content-Type: multipart/form-data" \
     -F 'link_list=["https://opea.dev"]'
--- a/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose.yaml
@@ -11,12 +11,12 @@ services:
      - "6379:6379"
      - "8001:8001"
  dataprep-redis-service:
-    image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest}
+    image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
    container_name: dataprep-redis-server
    depends_on:
      - redis-vector-db
    ports:
-      - "6007:6007"
+      - "6007:5000"
      - "6008:6008"
      - "6009:6009"
    environment:
--- a/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml
+++ b/DocIndexRetriever/docker_compose/intel/cpu/xeon/compose_without_rerank.yaml
@@ -11,12 +11,12 @@ services:
      - "6379:6379"
      - "8001:8001"
  dataprep-redis-service:
-    image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest}
+    image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
    container_name: dataprep-redis-server
    depends_on:
      - redis-vector-db
    ports:
-      - "6007:6007"
+      - "6007:5000"
      - "6008:6008"
      - "6009:6009"
    environment:
--- a/DocIndexRetriever/docker_compose/intel/hpu/gaudi/README.md
+++ b/DocIndexRetriever/docker_compose/intel/hpu/gaudi/README.md
@@ -27,7 +27,7 @@ DocRetriever are the most widely adopted use case for leveraging the different m
 - Dataprep Image
  ```bash
-  docker build -t opea/dataprep-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain_ray/Dockerfile .
+  docker build -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile .
  ```
 ## 2. Build Images for MegaService
@@ -57,7 +57,7 @@ export RETRIEVER_SERVICE_HOST_IP=${host_ip}
 export RERANK_SERVICE_HOST_IP=${host_ip}
 export LLM_SERVICE_HOST_IP=${host_ip}
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8000/v1/retrievaltool"
-export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
+export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/ingest"
 export llm_hardware='cpu/xeon' #cpu/xeon, xpu, hpu/gaudi
 cd GenAIExamples/DocIndexRetriever/intel/hpu/gaudi/
 docker compose up -d
@@ -68,7 +68,7 @@ docker compose up -d
 Add Knowledge Base via HTTP Links:
 ```bash
-curl -X POST "http://${host_ip}:6007/v1/dataprep" \
+curl -X POST "http://${host_ip}:6007/v1/dataprep/ingest" \
     -H "Content-Type: multipart/form-data" \
     -F 'link_list=["https://opea.dev"]'
--- a/DocIndexRetriever/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/DocIndexRetriever/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -12,13 +12,13 @@ services:
      - "6379:6379"
      - "8001:8001"
  dataprep-redis-service:
-    image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest}
+    image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
    container_name: dataprep-redis-server
    depends_on:
      - redis-vector-db
      - tei-embedding-service
    ports:
-      - "6007:6007"
+      - "6007:5000"
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
--- a/DocIndexRetriever/docker_image_build/build.yaml
+++ b/DocIndexRetriever/docker_image_build/build.yaml
@@ -29,9 +29,9 @@ services:
      dockerfile: comps/rerankings/src/Dockerfile
    extends: doc-index-retriever
    image: ${REGISTRY:-opea}/reranking:${TAG:-latest}
-  dataprep-redis:
+  dataprep:
    build:
      context: GenAIComps
-      dockerfile: comps/dataprep/redis/langchain/Dockerfile
+      dockerfile: comps/dataprep/src/Dockerfile
    extends: doc-index-retriever
-    image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest}
+    image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
--- a/DocIndexRetriever/tests/test_compose_on_gaudi.sh
+++ b/DocIndexRetriever/tests/test_compose_on_gaudi.sh
@@ -72,7 +72,7 @@ function validate() {
 function validate_megaservice() {
    echo "=========Ingest data=================="
-    local CONTENT=$(curl -X POST "http://${ip_address}:6007/v1/dataprep" \
+    local CONTENT=$(curl -X POST "http://${ip_address}:6007/v1/dataprep/ingest" \
     -H "Content-Type: multipart/form-data" \
     -F 'link_list=["https://opea.dev"]')
    local EXIT_CODE=$(validate "$CONTENT" "Data preparation succeeded" "dataprep-redis-service-gaudi")
--- a/DocIndexRetriever/tests/test_compose_on_xeon.sh
+++ b/DocIndexRetriever/tests/test_compose_on_xeon.sh
@@ -20,7 +20,7 @@ function build_docker_images() {
    if [ ! -d "GenAIComps" ] ; then
        git clone --single-branch --branch "${opea_branch:-"main"}" https://github.com/opea-project/GenAIComps.git
    fi
-    service_list="dataprep-redis embedding retriever reranking doc-index-retriever"
+    service_list="dataprep embedding retriever reranking doc-index-retriever"
    docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
    docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
@@ -72,7 +72,7 @@ function validate() {
 function validate_megaservice() {
    echo "===========Ingest data=================="
-    local CONTENT=$(http_proxy="" curl -X POST "http://${ip_address}:6007/v1/dataprep" \
+    local CONTENT=$(http_proxy="" curl -X POST "http://${ip_address}:6007/v1/dataprep/ingest" \
     -H "Content-Type: multipart/form-data" \
     -F 'link_list=["https://opea.dev/"]')
    local EXIT_CODE=$(validate "$CONTENT" "Data preparation succeeded" "dataprep-redis-service-xeon")
--- a/DocIndexRetriever/tests/test_compose_without_rerank_on_xeon.sh
+++ b/DocIndexRetriever/tests/test_compose_without_rerank_on_xeon.sh
@@ -20,7 +20,7 @@ function build_docker_images() {
    if [ ! -d "GenAIComps" ] ; then
        git clone --single-branch --branch "${opea_branch:-"main"}" https://github.com/opea-project/GenAIComps.git
    fi
-    service_list="dataprep-redis embedding retriever doc-index-retriever"
+    service_list="dataprep embedding retriever doc-index-retriever"
    docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
    docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
@@ -66,7 +66,7 @@ function validate() {
 function validate_megaservice() {
    echo "===========Ingest data=================="
-    local CONTENT=$(http_proxy="" curl -X POST "http://${ip_address}:6007/v1/dataprep" \
+    local CONTENT=$(http_proxy="" curl -X POST "http://${ip_address}:6007/v1/dataprep/ingest" \
     -H "Content-Type: multipart/form-data" \
     -F 'link_list=["https://opea.dev/"]')
    local EXIT_CODE=$(validate "$CONTENT" "Data preparation succeeded" "dataprep-redis-service-xeon")
--- a/FaqGen/faqgen.py
+++ b/FaqGen/faqgen.py
@@ -58,10 +58,28 @@ def read_text_from_file(file, save_file_name):
    return file_content
 def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs):
    if self.services[cur_node].service_type == ServiceType.LLM:
        for key_to_replace in ["text"]:
            if key_to_replace in inputs:
                inputs["messages"] = inputs[key_to_replace]
                del inputs[key_to_replace]
        if "id" in inputs:
            del inputs["id"]
        if "max_new_tokens" in inputs:
            del inputs["max_new_tokens"]
        if "input" in inputs:
            del inputs["input"]
    return inputs
 class FaqGenService:
    def __init__(self, host="0.0.0.0", port=8000):
        self.host = host
        self.port = port
        ServiceOrchestrator.align_inputs = align_inputs
        self.megaservice = ServiceOrchestrator()
        self.endpoint = str(MegaServiceEndpoint.FAQ_GEN)
--- a/GraphRAG/README.md
+++ b/GraphRAG/README.md
@@ -72,7 +72,7 @@ Here is an example of `Nike 2023` pdf.
 # download pdf file
 wget https://raw.githubusercontent.com/opea-project/GenAIComps/v1.1/comps/retrievers/redis/data/nke-10k-2023.pdf
 # upload pdf file with dataprep
-curl -X POST "http://${host_ip}:6004/v1/dataprep" \
+curl -X POST "http://${host_ip}:6004/v1/dataprep/ingest" \
    -H "Content-Type: multipart/form-data" \
    -F "files=@./nke-10k-2023.pdf"
 ```
@@ -172,7 +172,7 @@ Gaudi default compose.yaml
 | Embedding | Llama-index | Xeon | 6006 | /v1/embaddings |
 | Retriever | Llama-index, Neo4j | Xeon | 6009 | /v1/retrieval |
 | LLM | Llama-index, TGI | Gaudi | 6005 | /v1/chat/completions |
-| Dataprep | Neo4j, LlamaIndex | Xeon | 6004 | /v1/dataprep |
+| Dataprep | Neo4j, LlamaIndex | Xeon | 6004 | /v1/dataprep/ingest |
 ### Models Selection
@@ -207,7 +207,7 @@ Here is an example of `Nike 2023` pdf.
 # download pdf file
 wget https://raw.githubusercontent.com/opea-project/GenAIComps/v1.1/comps/retrievers/redis/data/nke-10k-2023.pdf
 # upload pdf file with dataprep
-curl -X POST "http://${host_ip}:6007/v1/dataprep" \
+curl -X POST "http://${host_ip}:6007/v1/dataprep/ingest" \
    -H "Content-Type: multipart/form-data" \
    -F "files=@./nke-10k-2023.pdf"
 ```
--- a/GraphRAG/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/GraphRAG/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -66,14 +66,14 @@ services:
    ipc: host
    command: --model-id ${LLM_MODEL_ID} --max-input-length 6000 --max-total-tokens 8192
  dataprep-neo4j-llamaindex:
-    image: ${REGISTRY:-opea}/dataprep-neo4j-llamaindex:${TAG:-latest}
+    image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
    container_name: dataprep-neo4j-server
    depends_on:
      - neo4j-apoc
      - tgi-gaudi-service
      - tei-embedding-service
    ports:
-      - "6004:6004"
+      - "6004:5000"
    ipc: host
    environment:
      no_proxy: ${no_proxy}
@@ -85,6 +85,7 @@ services:
      NEO4J_URL: ${NEO4J_URL}
      NEO4J_USERNAME: ${NEO4J_USERNAME}
      NEO4J_PASSWORD: ${NEO4J_PASSWORD}
      DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_NEO4J_LLAMAINDEX"
      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
      OPENAI_API_KEY: ${OPENAI_API_KEY}
@@ -177,7 +178,7 @@ services:
      - BACKEND_SERVICE_IP=graphrag-gaudi-backend-server
      - BACKEND_SERVICE_PORT=8888
      - DATAPREP_SERVICE_IP=dataprep-neo4j-llamaindex
-      - DATAPREP_SERVICE_PORT=6004
+      - DATAPREP_SERVICE_PORT=5000
    ipc: host
    restart: always
 networks:
--- a/GraphRAG/docker_compose/intel/hpu/gaudi/set_env.sh
+++ b/GraphRAG/docker_compose/intel/hpu/gaudi/set_env.sh
@@ -18,6 +18,6 @@ export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006"
 export TGI_LLM_ENDPOINT="http://${host_ip}:6005"
 export NEO4J_URL="bolt://${host_ip}:7687"
 export NEO4J_USERNAME=neo4j
-export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6004/v1/dataprep"
+export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/ingest"
 export LOGFLAG=True
 export RETRIEVER_SERVICE_PORT=80
--- a/GraphRAG/docker_image_build/build.yaml
+++ b/GraphRAG/docker_image_build/build.yaml
@@ -20,15 +20,15 @@ services:
      context: GenAIComps
      dockerfile: comps/retrievers/src/Dockerfile
    image: ${REGISTRY:-opea}/retriever:${TAG:-latest}
-  dataprep-neo4j-llamaindex:
+  dataprep:
    build:
      args:
        http_proxy: ${http_proxy}
        https_proxy: ${https_proxy}
        no_proxy: ${no_proxy}
      context: GenAIComps
-      dockerfile: comps/dataprep/neo4j/llama_index/Dockerfile
+      dockerfile: comps/dataprep/src/Dockerfile
-    image: ${REGISTRY:-opea}/dataprep-neo4j-llamaindex:${TAG:-latest}
+    image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
  nginx:
    build:
      args:
--- a/GraphRAG/tests/test_compose_on_gaudi.sh
+++ b/GraphRAG/tests/test_compose_on_gaudi.sh
@@ -129,10 +129,10 @@ function validate_microservices() {
    sleep 1m # retrieval can't curl as expected, try to wait for more time
-    # test /v1/dataprep graph extraction
+    # test /v1/dataprep/ingest graph extraction
    echo "Like many companies in the O&G sector, the stock of Chevron (NYSE:CVX) has declined about 10% over the past 90-days despite the fact that Q2 consensus earnings estimates have risen sharply (~25%) during that same time frame. Over the years, Chevron has kept a very strong balance sheet. FirstEnergy (NYSE:FE – Get Rating) posted its earnings results on Tuesday. The utilities provider reported $0.53 earnings per share for the quarter, topping the consensus estimate of $0.52 by $0.01, RTT News reports. FirstEnergy had a net margin of 10.85% and a return on equity of 17.17%. The Dáil was almost suspended on Thursday afternoon after Sinn Féin TD John Brady walked across the chamber and placed an on-call pager in front of the Minister for Housing Darragh O’Brien during a debate on retained firefighters. Mr O’Brien said Mr Brady had taken part in an act of theatre that was obviously choreographed.Around 2,000 retained firefighters around the country staged a second day of industrial action on Tuesday and are due to start all out-strike action from next Tuesday. The mostly part-time workers, who keep the services going outside of Ireland’s larger urban centres, are taking industrial action in a dispute over pay and working conditions. Speaking in the Dáil, Sinn Féin deputy leader Pearse Doherty said firefighters had marched on Leinster House today and were very angry at the fact the Government will not intervene. Reintroduction of tax relief on mortgages needs to be considered, O’Brien says. Martin withdraws comment after saying People Before Profit would ‘put the jackboot on people’ Taoiseach ‘propagated fears’ farmers forced to rewet land due to nature restoration law – Cairns An intervention is required now. I’m asking you to make an improved offer in relation to pay for retained firefighters, Mr Doherty told the housing minister.I’m also asking you, and challenging you, to go outside after this Order of Business and meet with the firefighters because they are just fed up to the hilt in relation to what you said.Some of them have handed in their pagers to members of the Opposition and have challenged you to wear the pager for the next number of weeks, put up with an €8,600 retainer and not leave your community for the two and a half kilometres and see how you can stand over those type of pay and conditions. At this point, Mr Brady got up from his seat, walked across the chamber and placed the pager on the desk in front of Mr O’Brien. Ceann Comhairle Seán Ó Fearghaíl said the Sinn Féin TD was completely out of order and told him not to carry out a charade in this House, adding it was absolutely outrageous behaviour and not to be encouraged.Mr O’Brien said Mr Brady had engaged in an act of theatre here today which was obviously choreographed and was then interrupted with shouts from the Opposition benches. Mr Ó Fearghaíl said he would suspend the House if this racket continues.Mr O’Brien later said he said he was confident the dispute could be resolved and he had immense regard for firefighters. The minister said he would encourage the unions to re-engage with the State’s industrial relations process while also accusing Sinn Féin of using the issue for their own political gain." > $LOG_PATH/dataprep_file.txt
    validate_service \
-        "http://${ip_address}:6004/v1/dataprep" \
+        "http://${ip_address}:6004/v1/dataprep/ingest" \
        "Data preparation succeeded" \
        "extract_graph_neo4j" \
        "dataprep-neo4j-server"
--- a/GraphRAG/ui/react/.env
+++ b/GraphRAG/ui/react/.env
@@ -1,2 +1,2 @@
 VITE_BACKEND_SERVICE_ENDPOINT=http://backend_address:8888/v1/chatqna
-VITE_DATA_PREP_SERVICE_URL=http://backend_address:6007/v1/dataprep
+VITE_DATA_PREP_SERVICE_URL=http://backend_address:6007/v1/dataprep/ingest
--- a/GraphRAG/ui/svelte/.env
+++ b/GraphRAG/ui/svelte/.env
@@ -1,7 +1,7 @@
 CHAT_BASE_URL = '/v1/graphrag'
-UPLOAD_FILE_BASE_URL = '/v1/dataprep'
+UPLOAD_FILE_BASE_URL = '/v1/dataprep/ingest'
-GET_FILE = '/v1/dataprep/get_file'
+GET_FILE = '/v1/dataprep/get'
-DELETE_FILE = '/v1/dataprep/delete_file'
+DELETE_FILE = '/v1/dataprep/delete'
--- a/MultimodalQnA/docker_compose/amd/gpu/rocm/README.md
+++ b/MultimodalQnA/docker_compose/amd/gpu/rocm/README.md
@@ -51,7 +51,7 @@ docker build --no-cache -t opea/retriever:latest --build-arg https_proxy=$https_
 ### 4. Build dataprep-multimodal-redis Image
 ```bash
-docker build --no-cache -t opea/dataprep-multimodal-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/multimodal/redis/langchain/Dockerfile .
+docker build --no-cache -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile .
 ```
 ### 5. Build MegaService Docker Image
@@ -83,7 +83,7 @@ docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-rocm
 Then run the command `docker images`, you will have the following 8 Docker Images:
-1. `opea/dataprep-multimodal-redis:latest`
+1. `opea/dataprep:latest`
 2. `ghcr.io/huggingface/text-generation-inference:2.4.1-rocm`
 3. `opea/lvm:latest`
 4. `opea/retriever:latest`
--- a/MultimodalQnA/docker_compose/amd/gpu/rocm/compose.yaml
+++ b/MultimodalQnA/docker_compose/amd/gpu/rocm/compose.yaml
@@ -20,13 +20,13 @@ services:
      - "6379:6379"
      - "8001:8001"
  dataprep-multimodal-redis:
-    image: ${REGISTRY:-opea}/dataprep-multimodal-redis:${TAG:-latest}
+    image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
    container_name: dataprep-multimodal-redis
    depends_on:
      - redis-vector-db
      - lvm
    ports:
-      - "6007:6007"
+      - "6007:5000"
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
@@ -36,6 +36,8 @@ services:
      INDEX_NAME: ${INDEX_NAME}
      LVM_ENDPOINT: "http://${LVM_SERVICE_HOST_IP}:9399/v1/lvm"
      HUGGINGFACEHUB_API_TOKEN: ${MULTIMODAL_HUGGINGFACEHUB_API_TOKEN}
      MULTIMODAL_DATAPREP: true
      DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_MULTIMODALREDIS"
    restart: unless-stopped
  embedding-multimodal-bridgetower:
    image: ${REGISTRY:-opea}/embedding-multimodal-bridgetower:${TAG:-latest}
--- a/MultimodalQnA/docker_compose/amd/gpu/rocm/set_env.sh
+++ b/MultimodalQnA/docker_compose/amd/gpu/rocm/set_env.sh
@@ -26,8 +26,8 @@ export MM_RETRIEVER_SERVICE_HOST_IP=${HOST_IP}
 export LVM_SERVICE_HOST_IP=${HOST_IP}
 export MEGA_SERVICE_HOST_IP=${HOST_IP}
 export BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:8888/v1/multimodalqna"
-export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${HOST_IP}:6007/v1/ingest_with_text"
+export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${HOST_IP}:5000/v1/dataprep/ingest"
-export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${HOST_IP}:6007/v1/generate_transcripts"
+export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${HOST_IP}:5000/v1/dataprep/generate_transcripts"
-export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${HOST_IP}:6007/v1/generate_captions"
+export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${HOST_IP}:5000/v1/dataprep/generate_captions"
-export DATAPREP_GET_FILE_ENDPOINT="http://${HOST_IP}:6007/v1/dataprep/get_files"
+export DATAPREP_GET_FILE_ENDPOINT="http://${HOST_IP}:5000/v1/dataprep/get"
-export DATAPREP_DELETE_FILE_ENDPOINT="http://${HOST_IP}:6007/v1/dataprep/delete_files"
+export DATAPREP_DELETE_FILE_ENDPOINT="http://${HOST_IP}:5000/v1/dataprep/delete"
--- a/MultimodalQnA/docker_compose/intel/cpu/xeon/README.md
+++ b/MultimodalQnA/docker_compose/intel/cpu/xeon/README.md
@@ -94,11 +94,11 @@ export MM_RETRIEVER_SERVICE_HOST_IP=${host_ip}
 export LVM_SERVICE_HOST_IP=${host_ip}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/multimodalqna"
-export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/ingest_with_text"
+export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/ingest"
-export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/generate_transcripts"
+export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/generate_transcripts"
-export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/generate_captions"
+export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/generate_captions"
-export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_files"
+export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/get"
-export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_files"
+export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/delete"
 ```
 Note: Please replace with `host_ip` with you external IP address, do not use localhost.
@@ -144,7 +144,7 @@ docker build --no-cache -t opea/lvm:latest --build-arg https_proxy=$https_proxy
 ### 4. Build dataprep-multimodal-redis Image
 ```bash
-docker build --no-cache -t opea/dataprep-multimodal-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/multimodal/redis/langchain/Dockerfile .
+docker build --no-cache -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile .
 ```
 ### 5. Build asr images
@@ -178,7 +178,7 @@ cd ../../../
 Then run the command `docker images`, you will have the following 11 Docker Images:
-1. `opea/dataprep-multimodal-redis:latest`
+1. `opea/dataprep:latest`
 2. `opea/lvm:latest`
 3. `opea/lvm-llava:latest`
 4. `opea/retriever:latest`
--- a/MultimodalQnA/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/MultimodalQnA/docker_compose/intel/cpu/xeon/compose.yaml
@@ -20,13 +20,13 @@ services:
      - "6379:6379"
      - "8001:8001"
  dataprep-multimodal-redis:
-    image: ${REGISTRY:-opea}/dataprep-multimodal-redis:${TAG:-latest}
+    image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
    container_name: dataprep-multimodal-redis
    depends_on:
      - redis-vector-db
      - lvm-llava
    ports:
-      - "6007:6007"
+      - "6007:5000"
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
@@ -36,6 +36,8 @@ services:
      INDEX_NAME: ${INDEX_NAME}
      LVM_ENDPOINT: "http://${LVM_SERVICE_HOST_IP}:9399/v1/lvm"
      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      MULTIMODAL_DATAPREP: true
      DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_MULTIMODALREDIS"
    restart: unless-stopped
  embedding-multimodal-bridgetower:
    image: ${REGISTRY:-opea}/embedding-multimodal-bridgetower:${TAG:-latest}
--- a/MultimodalQnA/docker_compose/intel/cpu/xeon/set_env.sh
+++ b/MultimodalQnA/docker_compose/intel/cpu/xeon/set_env.sh
@@ -28,8 +28,8 @@ export MM_RETRIEVER_SERVICE_HOST_IP=${host_ip}
 export LVM_SERVICE_HOST_IP=${host_ip}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/multimodalqna"
-export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/ingest_with_text"
+export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/ingest"
-export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/generate_transcripts"
+export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/generate_transcripts"
-export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/generate_captions"
+export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/generate_captions"
-export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_files"
+export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/get"
-export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_files"
+export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/delete"
--- a/MultimodalQnA/docker_compose/intel/hpu/gaudi/README.md
+++ b/MultimodalQnA/docker_compose/intel/hpu/gaudi/README.md
@@ -43,11 +43,11 @@ export WHISPER_SERVER_ENDPOINT="http://${host_ip}:${WHISPER_SERVER_PORT}/v1/asr"
 export LVM_SERVICE_HOST_IP=${host_ip}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/multimodalqna"
-export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/ingest_with_text"
+export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/ingest"
-export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/generate_transcripts"
+export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/generate_transcripts"
-export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/generate_captions"
+export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/generate_captions"
-export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_files"
+export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/get"
-export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_files"
+export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/delete"
 ```
 Note: Please replace with `host_ip` with you external IP address, do not use localhost.
@@ -95,7 +95,7 @@ docker build --no-cache -t opea/lvm:latest --build-arg https_proxy=$https_proxy
 ### 4. Build dataprep-multimodal-redis Image
 ```bash
-docker build --no-cache -t opea/dataprep-multimodal-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/multimodal/redis/langchain/Dockerfile .
+docker build --no-cache -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile .
 ```
 ### 5. Build asr images
@@ -127,7 +127,7 @@ docker build --no-cache -t opea/multimodalqna-ui:latest --build-arg https_proxy=
 Then run the command `docker images`, you will have the following 11 Docker Images:
-1. `opea/dataprep-multimodal-redis:latest`
+1. `opea/dataprep:latest`
 2. `opea/lvm:latest`
 3. `ghcr.io/huggingface/tgi-gaudi:2.0.6`
 4. `opea/retriever:latest`
--- a/MultimodalQnA/docker_compose/intel/hpu/gaudi/compose.yaml
+++ b/MultimodalQnA/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -20,13 +20,13 @@ services:
      https_proxy: ${https_proxy}
    restart: unless-stopped
  dataprep-multimodal-redis:
-    image: ${REGISTRY:-opea}/dataprep-multimodal-redis:${TAG:-latest}
+    image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
    container_name: dataprep-multimodal-redis
    depends_on:
      - redis-vector-db
      - lvm
    ports:
-      - "6007:6007"
+      - "6007:5000"
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
@@ -36,6 +36,8 @@ services:
      INDEX_NAME: ${INDEX_NAME}
      LVM_ENDPOINT: "http://${LVM_SERVICE_HOST_IP}:9399/v1/lvm"
      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
      MULTIMODAL_DATAPREP: true
      DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_MULTIMODALREDIS"
    restart: unless-stopped
  embedding-multimodal-bridgetower:
    image: ${REGISTRY:-opea}/embedding-multimodal-bridgetower:${TAG:-latest}
--- a/MultimodalQnA/docker_compose/intel/hpu/gaudi/set_env.sh
+++ b/MultimodalQnA/docker_compose/intel/hpu/gaudi/set_env.sh
@@ -28,8 +28,8 @@ export MM_RETRIEVER_SERVICE_HOST_IP=${host_ip}
 export LVM_SERVICE_HOST_IP=${host_ip}
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/multimodalqna"
-export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/ingest_with_text"
+export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/ingest"
-export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/generate_transcripts"
+export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/generate_transcripts"
-export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/generate_captions"
+export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/generate_captions"
-export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_files"
+export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/get"
-export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_files"
+export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/delete"
--- a/MultimodalQnA/docker_image_build/build.yaml
+++ b/MultimodalQnA/docker_image_build/build.yaml
@@ -47,12 +47,12 @@ services:
      dockerfile: comps/lvms/src/Dockerfile
    extends: multimodalqna
    image: ${REGISTRY:-opea}/lvm:${TAG:-latest}
-  dataprep-multimodal-redis:
+  dataprep:
    build:
      context: GenAIComps
-      dockerfile: comps/dataprep/multimodal/redis/langchain/Dockerfile
+      dockerfile: comps/dataprep/src/Dockerfile
    extends: multimodalqna
-    image: ${REGISTRY:-opea}/dataprep-multimodal-redis:${TAG:-latest}
+    image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
  whisper:
    build:
      context: GenAIComps
--- a/MultimodalQnA/tests/test_compose_on_gaudi.sh
+++ b/MultimodalQnA/tests/test_compose_on_gaudi.sh
@@ -2,7 +2,7 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
-set -e
+set -x
 IMAGE_REPO=${IMAGE_REPO:-"opea"}
 IMAGE_TAG=${IMAGE_TAG:-"latest"}
 echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
@@ -35,7 +35,7 @@ function build_docker_images() {
    git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
    echo "Build all the images with --no-cache, check docker_image_build.log for details..."
-    service_list="multimodalqna multimodalqna-ui embedding-multimodal-bridgetower embedding retriever lvm dataprep-multimodal-redis whisper"
+    service_list="multimodalqna multimodalqna-ui embedding-multimodal-bridgetower embedding retriever lvm dataprep whisper"
    docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
    docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
@@ -64,11 +64,11 @@ function setup_env() {
    export LVM_SERVICE_HOST_IP=${host_ip}
    export MEGA_SERVICE_HOST_IP=${host_ip}
    export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/multimodalqna"
-    export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/ingest_with_text"
+    export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/ingest"
-    export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/generate_transcripts"
+    export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/generate_transcripts"
-    export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/generate_captions"
+    export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/generate_captions"
-    export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_files"
+    export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/get"
-    export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_files"
+    export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/delete"
 }
 function start_services() {
@@ -109,7 +109,7 @@ function validate_service() {
    elif [[ $SERVICE_NAME == *"dataprep_get"* ]]; then
        HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -H 'Content-Type: application/json' "$URL")
    elif [[ $SERVICE_NAME == *"dataprep_del"* ]]; then
-        HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -H 'Content-Type: application/json' "$URL")
+        HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d '{"file_path": "apple.txt"}' -H 'Content-Type: application/json' "$URL")
    else
        HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
    fi
@@ -173,6 +173,11 @@ function validate_microservices() {
    sleep 1m # retrieval can't curl as expected, try to wait for more time
    export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/ingest"
    export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/generate_transcripts"
    export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/generate_captions"
    export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get"
    # test data prep
    echo "Data Prep with Generating Transcript for Video"
    validate_service \
@@ -283,6 +288,7 @@ function validate_megaservice() {
 function validate_delete {
    echo "Validate data prep delete files"
    export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete"
    validate_service \
        "${DATAPREP_DELETE_FILE_ENDPOINT}" \
        '{"status":true}' \
--- a/MultimodalQnA/tests/test_compose_on_rocm.sh
+++ b/MultimodalQnA/tests/test_compose_on_rocm.sh
@@ -35,7 +35,7 @@ function build_docker_images() {
    git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
    echo "Build all the images with --no-cache, check docker_image_build.log for details..."
-    service_list="multimodalqna multimodalqna-ui embedding-multimodal-bridgetower embedding retriever lvm dataprep-multimodal-redis whisper"
+    service_list="multimodalqna multimodalqna-ui embedding-multimodal-bridgetower embedding retriever lvm dataprep whisper"
    docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
    docker images && sleep 1m
@@ -68,11 +68,11 @@ function setup_env() {
    export LVM_SERVICE_HOST_IP=${HOST_IP}
    export MEGA_SERVICE_HOST_IP=${HOST_IP}
    export BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:8888/v1/multimodalqna"
-    export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${HOST_IP}:6007/v1/ingest_with_text"
+    export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${HOST_IP}:5000/v1/dataprep/ingest"
-    export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${HOST_IP}:6007/v1/generate_transcripts"
+    export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${HOST_IP}:5000/v1/dataprep/generate_transcripts"
-    export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${HOST_IP}:6007/v1/generate_captions"
+    export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${HOST_IP}:5000/v1/dataprep/generate_captions"
-    export DATAPREP_GET_FILE_ENDPOINT="http://${HOST_IP}:6007/v1/dataprep/get_files"
+    export DATAPREP_GET_FILE_ENDPOINT="http://${HOST_IP}:5000/v1/dataprep/get"
-    export DATAPREP_DELETE_FILE_ENDPOINT="http://${HOST_IP}:6007/v1/dataprep/delete_files"
+    export DATAPREP_DELETE_FILE_ENDPOINT="http://${HOST_IP}:5000/v1/dataprep/delete"
 }
 function start_services() {
@@ -111,7 +111,7 @@ function validate_service() {
    elif [[ $SERVICE_NAME == *"dataprep_get"* ]]; then
        HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -H 'Content-Type: application/json' "$URL")
    elif [[ $SERVICE_NAME == *"dataprep_del"* ]]; then
-        HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -H 'Content-Type: application/json' "$URL")
+        HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d '{"file_path": "apple.txt"}' -H 'Content-Type: application/json' "$URL")
    else
        HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
    fi
@@ -175,6 +175,11 @@ function validate_microservices() {
    sleep 1m # retrieval can't curl as expected, try to wait for more time
    export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${HOST_IP}:6007/v1/dataprep/ingest"
    export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${HOST_IP}:6007/v1/dataprep/generate_transcripts"
    export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${HOST_IP}:6007/v1/dataprep/generate_captions"
    export DATAPREP_GET_FILE_ENDPOINT="http://${HOST_IP}:6007/v1/dataprep/get"
    # test data prep
    echo "Data Prep with Generating Transcript for Video"
    validate_service \
@@ -284,6 +289,7 @@ function validate_megaservice() {
 function validate_delete {
    echo "Validate data prep delete files"
    export DATAPREP_DELETE_FILE_ENDPOINT="http://${HOST_IP}:6007/v1/dataprep/delete"
    validate_service \
        "${DATAPREP_DELETE_FILE_ENDPOINT}" \
        '{"status":true}' \
--- a/MultimodalQnA/tests/test_compose_on_xeon.sh
+++ b/MultimodalQnA/tests/test_compose_on_xeon.sh
@@ -2,7 +2,7 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
-set -e
+set -x
 IMAGE_REPO=${IMAGE_REPO:-"opea"}
 IMAGE_TAG=${IMAGE_TAG:-"latest"}
 echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
@@ -35,7 +35,7 @@ function build_docker_images() {
    git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git
    echo "Build all the images with --no-cache, check docker_image_build.log for details..."
-    service_list="multimodalqna multimodalqna-ui embedding-multimodal-bridgetower embedding retriever lvm-llava lvm dataprep-multimodal-redis whisper"
+    service_list="multimodalqna multimodalqna-ui embedding-multimodal-bridgetower embedding retriever lvm-llava lvm dataprep whisper"
    docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
    docker images && sleep 1s
 }
@@ -61,11 +61,11 @@ function setup_env() {
    export LVM_SERVICE_HOST_IP=${host_ip}
    export MEGA_SERVICE_HOST_IP=${host_ip}
    export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/multimodalqna"
-    export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/ingest_with_text"
+    export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/ingest"
-    export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/generate_transcripts"
+    export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/generate_transcripts"
-    export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/generate_captions"
+    export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/generate_captions"
-    export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_files"
+    export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/get"
-    export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_files"
+    export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/delete"
 }
 function start_services() {
@@ -103,7 +103,7 @@ function validate_service() {
    elif [[ $SERVICE_NAME == *"dataprep_get"* ]]; then
        HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -H 'Content-Type: application/json' "$URL")
    elif [[ $SERVICE_NAME == *"dataprep_del"* ]]; then
-        HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -H 'Content-Type: application/json' "$URL")
+        HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d '{"file_path": "apple.txt"}' -H 'Content-Type: application/json' "$URL")
    else
        HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
    fi
@@ -167,6 +167,11 @@ function validate_microservices() {
    sleep 1m # retrieval can't curl as expected, try to wait for more time
    export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/ingest"
    export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/generate_transcripts"
    export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/generate_captions"
    export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get"
    # test data prep
    echo "Data Prep with Generating Transcript for Video"
    validate_service \
@@ -276,6 +281,7 @@ function validate_megaservice() {
 function validate_delete {
    echo "Validate data prep delete files"
    export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete"
    validate_service \
        "${DATAPREP_DELETE_FILE_ENDPOINT}" \
        '{"status":true}' \
--- a/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md
+++ b/ProductivitySuite/docker_compose/intel/cpu/xeon/README.md
@@ -39,7 +39,7 @@ docker build --no-cache -t opea/llm-textgen:latest --build-arg https_proxy=$http
 ### 5. Build Dataprep Image
 ```bash
-docker build --no-cache -t opea/dataprep-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain/Dockerfile .
+docker build --no-cache -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile .
 ```
 ### 6. Build Prompt Registry Image
@@ -158,12 +158,12 @@ export TGI_LLM_ENDPOINT_CODEGEN="http://${host_ip}:8028"
 export TGI_LLM_ENDPOINT_FAQGEN="http://${host_ip}:9009"
 export TGI_LLM_ENDPOINT_DOCSUM="http://${host_ip}:9009"
 export BACKEND_SERVICE_ENDPOINT_CHATQNA="http://${host_ip}:8888/v1/chatqna"
-export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6009/v1/dataprep/delete_file"
+export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/delete"
 export BACKEND_SERVICE_ENDPOINT_FAQGEN="http://${host_ip}:8889/v1/faqgen"
 export BACKEND_SERVICE_ENDPOINT_CODEGEN="http://${host_ip}:7778/v1/codegen"
 export BACKEND_SERVICE_ENDPOINT_DOCSUM="http://${host_ip}:8890/v1/docsum"
-export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
+export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/ingest"
-export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_file"
+export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/get"
 export CHAT_HISTORY_CREATE_ENDPOINT="http://${host_ip}:6012/v1/chathistory/create"
 export CHAT_HISTORY_CREATE_ENDPOINT="http://${host_ip}:6012/v1/chathistory/create"
 export CHAT_HISTORY_DELETE_ENDPOINT="http://${host_ip}:6012/v1/chathistory/delete"
@@ -347,7 +347,7 @@ Please refer to **[keycloak_setup_guide](keycloak_setup_guide.md)** for more det
    Update Knowledge Base via Local File Upload:
    ```bash
-    curl -X POST "http://${host_ip}:6007/v1/dataprep" \
+    curl -X POST "http://${host_ip}:6007/v1/dataprep/ingest" \
         -H "Content-Type: multipart/form-data" \
         -F "files=@./nke-10k-2023.pdf"
    ```
@@ -357,7 +357,7 @@ Please refer to **[keycloak_setup_guide](keycloak_setup_guide.md)** for more det
    Add Knowledge Base via HTTP Links:
    ```bash
-    curl -X POST "http://${host_ip}:6007/v1/dataprep" \
+    curl -X POST "http://${host_ip}:6007/v1/dataprep/ingest" \
         -H "Content-Type: multipart/form-data" \
         -F 'link_list=["https://opea.dev"]'
    ```
@@ -367,7 +367,7 @@ Please refer to **[keycloak_setup_guide](keycloak_setup_guide.md)** for more det
    Also, you are able to get the file list that you uploaded:
    ```bash
-    curl -X POST "http://${host_ip}:6007/v1/dataprep/get_file" \
+    curl -X POST "http://${host_ip}:6007/v1/dataprep/get" \
         -H "Content-Type: application/json"
    ```
@@ -375,17 +375,17 @@ Please refer to **[keycloak_setup_guide](keycloak_setup_guide.md)** for more det
    ```bash
    # delete link
-    curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \
+    curl -X POST "http://${host_ip}:6007/v1/dataprep/delete" \
         -d '{"file_path": "https://opea.dev.txt"}' \
         -H "Content-Type: application/json"
    # delete file
-    curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \
+    curl -X POST "http://${host_ip}:6007/v1/dataprep/delete" \
         -d '{"file_path": "nke-10k-2023.pdf"}' \
         -H "Content-Type: application/json"
    # delete all uploaded files and links
-    curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \
+    curl -X POST "http://${host_ip}:6007/v1/dataprep/delete" \
         -d '{"file_path": "all"}' \
         -H "Content-Type: application/json"
    ```
--- a/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml
@@ -15,12 +15,12 @@ services:
      - "6379:6379"
      - "8001:8001"
  dataprep-redis-service:
-    image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest}
+    image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
    container_name: dataprep-redis-server
    depends_on:
      - redis-vector-db
    ports:
-      - "6007:6007"
+      - "6007:5000"
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
--- a/ProductivitySuite/docker_compose/intel/cpu/xeon/set_env.sh
+++ b/ProductivitySuite/docker_compose/intel/cpu/xeon/set_env.sh
@@ -32,12 +32,12 @@ export TGI_LLM_ENDPOINT_CODEGEN="http://${host_ip}:8028"
 export TGI_LLM_ENDPOINT_FAQGEN="http://${host_ip}:9009"
 export TGI_LLM_ENDPOINT_DOCSUM="http://${host_ip}:9009"
 export BACKEND_SERVICE_ENDPOINT_CHATQNA="http://${host_ip}:8888/v1/chatqna"
-export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6009/v1/dataprep/delete_file"
+export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/delete"
 export BACKEND_SERVICE_ENDPOINT_FAQGEN="http://${host_ip}:8889/v1/faqgen"
 export BACKEND_SERVICE_ENDPOINT_CODEGEN="http://${host_ip}:7778/v1/codegen"
 export BACKEND_SERVICE_ENDPOINT_DOCSUM="http://${host_ip}:8890/v1/docsum"
-export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
+export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/ingest"
-export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_file"
+export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:5000/v1/dataprep/get"
 export CHAT_HISTORY_CREATE_ENDPOINT="http://${host_ip}:6012/v1/chathistory/create"
 export CHAT_HISTORY_CREATE_ENDPOINT="http://${host_ip}:6012/v1/chathistory/create"
 export CHAT_HISTORY_DELETE_ENDPOINT="http://${host_ip}:6012/v1/chathistory/delete"
--- a/ProductivitySuite/docker_image_build/build.yaml
+++ b/ProductivitySuite/docker_image_build/build.yaml
@@ -35,12 +35,12 @@ services:
      dockerfile: comps/llms/src/text-generation/Dockerfile
    extends: chatqna
    image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest}
-  dataprep-redis:
+  dataprep:
    build:
      context: GenAIComps
-      dockerfile: comps/dataprep/redis/langchain/Dockerfile
+      dockerfile: comps/dataprep/src/Dockerfile
    extends: chatqna
-    image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest}
+    image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
  promptregistry-mongo-server:
    build:
      context: GenAIComps
--- a/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/chatqna.yaml
+++ b/ProductivitySuite/kubernetes/intel/cpu/xeon/manifest/chatqna.yaml
@@ -499,11 +499,11 @@ spec:
            runAsUser: 1000
            seccompProfile:
              type: RuntimeDefault
-          image: "opea/dataprep-redis:latest"
+          image: "opea/dataprep:latest"
          imagePullPolicy: IfNotPresent
          ports:
            - name: data-prep
-              containerPort: 6007
+              containerPort: 5000
              protocol: TCP
          volumeMounts:
            - mountPath: /tmp
--- a/ProductivitySuite/tests/test_compose_on_xeon.sh
+++ b/ProductivitySuite/tests/test_compose_on_xeon.sh
@@ -58,10 +58,10 @@ function start_services() {
    export TGI_LLM_ENDPOINT_DOCSUM="http://${ip_address}:9009"
    export BACKEND_SERVICE_ENDPOINT_CHATQNA="http://${ip_address}:8888/v1/chatqna"
    export BACKEND_SERVICE_ENDPOINT_FAQGEN="http://${ip_address}:8889/v1/faqgen"
-    export DATAPREP_DELETE_FILE_ENDPOINT="http://${ip_address}:6009/v1/dataprep/delete_file"
+    export DATAPREP_DELETE_FILE_ENDPOINT="http://${ip_address}:5000/v1/dataprep/delete"
    export BACKEND_SERVICE_ENDPOINT_CODEGEN="http://${ip_address}:7778/v1/codegen"
-    export DATAPREP_SERVICE_ENDPOINT="http://${ip_address}:6007/v1/dataprep/ingest"
+    export DATAPREP_SERVICE_ENDPOINT="http://${ip_address}:5000/v1/dataprep/ingest"
-    export DATAPREP_GET_FILE_ENDPOINT="http://${ip_address}:6008/v1/dataprep/get"
+    export DATAPREP_GET_FILE_ENDPOINT="http://${ip_address}:5000/v1/dataprep/get"
    export CHAT_HISTORY_CREATE_ENDPOINT="http://${ip_address}:6012/v1/chathistory/create"
    export CHAT_HISTORY_CREATE_ENDPOINT="http://${ip_address}:6012/v1/chathistory/create"
    export CHAT_HISTORY_DELETE_ENDPOINT="http://${ip_address}:6012/v1/chathistory/delete"
@@ -146,6 +146,34 @@ function validate_service() {
    sleep 1s
 }
 function validate_faqgen() {
    local URL="$1"
    local SERVICE_NAME="$2"
    local DOCKER_NAME="$3"
    local EXPECTED_RESULT="Embeddings"
    local INPUT_DATA="messages=Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."
    local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -F "$INPUT_DATA" -F "max_tokens=32" -F "stream=False" -H 'Content-Type: multipart/form-data' "$URL")
    if [ "$HTTP_STATUS" -eq 200 ]; then
        echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
        local CONTENT=$(curl -s -X POST -F "$INPUT_DATA"  -F "max_tokens=32" -F "stream=False" -H 'Content-Type: multipart/form-data' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log)
        if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
            echo "[ $SERVICE_NAME ] Content is as expected."
        else
            echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
            docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
            exit 1
        fi
    else
        echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
        docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
        exit 1
    fi
    sleep 1s
 }
 function validate_microservices() {
    # Check if the microservices are running correctly.
@@ -167,31 +195,31 @@ function validate_microservices() {
    sleep 1m # retrieval can't curl as expected, try to wait for more time
-    # test /v1/dataprep/delete_file
+    # test /v1/dataprep/delete
    validate_service \
-        "http://${ip_address}:6007/v1/dataprep/delete_file" \
+        "http://${ip_address}:6007/v1/dataprep/delete" \
        '{"status":true}' \
        "dataprep_del" \
        "dataprep-redis-server"
-    # test /v1/dataprep upload file
+    # test /v1/dataprep/ingest upload file
    echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt
    validate_service \
-        "http://${ip_address}:6007/v1/dataprep" \
+        "http://${ip_address}:6007/v1/dataprep/ingest" \
        "Data preparation succeeded" \
        "dataprep_upload_file" \
        "dataprep-redis-server"
    # test /v1/dataprep upload link
    validate_service \
-        "http://${ip_address}:6007/v1/dataprep" \
+        "http://${ip_address}:6007/v1/dataprep/ingest" \
        "Data preparation succeeded" \
        "dataprep_upload_link" \
        "dataprep-redis-server"
-    # test /v1/dataprep/get_file
+    # test /v1/dataprep/get
    validate_service \
-        "http://${ip_address}:6007/v1/dataprep/get_file" \
+        "http://${ip_address}:6007/v1/dataprep/get" \
        '{"name":' \
        "dataprep_get" \
        "dataprep-redis-server"
@@ -238,12 +266,10 @@ function validate_microservices() {
        '{"query":"What is Deep Learning?"}'
    # FAQGen llm microservice
-    validate_service \
+    validate_faqgen \
        "${ip_address}:9002/v1/faqgen" \
        "data: " \
        "llm_faqgen" \
-        "llm-faqgen-server" \
+        "llm-faqgen-server"
        '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
    # CodeGen llm microservice
    validate_service \
@@ -287,10 +313,8 @@ function validate_microservices() {
 }
 function validate_megaservice() {
    # Curl the ChatQnAMega Service
    validate_service \
        "${ip_address}:8888/v1/chatqna" \
@@ -300,12 +324,10 @@ function validate_megaservice() {
        '{"messages": "What is the revenue of Nike in 2023?"}'\
    # Curl the FAQGen Service
-    validate_service \
+    validate_faqgen \
        "${ip_address}:8889/v1/faqgen" \
        "Text Embeddings Inference" \
        "faqgen-xeon-backend-server" \
-        "faqgen-xeon-backend-server" \
+        "faqgen-xeon-backend-server"
        '{"messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'\
    # Curl the CodeGen Mega Service
    validate_service \
--- a/VideoQnA/docker_compose/intel/cpu/xeon/README.md
+++ b/VideoQnA/docker_compose/intel/cpu/xeon/README.md
@@ -80,7 +80,7 @@ docker build -t opea/lvm:latest --build-arg https_proxy=$https_proxy --build-arg
 ### 5. Build Dataprep Image
 ```bash
-docker build -t opea/dataprep-multimodal-vdms:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/vdms/multimodal_langchain/Dockerfile .
+docker build -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile .
 ```
 ### 6. Build MegaService Docker Image
@@ -106,7 +106,7 @@ docker build -t opea/videoqna-ui:latest --build-arg https_proxy=$https_proxy --b
 Then run the command `docker images`, you will have the following 8 Docker Images:
-1. `opea/dataprep-multimodal-vdms:latest`
+1. `opea/dataprep:latest`
 2. `opea/embedding-multimodal-clip:latest`
 3. `opea/retriever:latest`
 4. `opea/reranking:latest`
@@ -161,8 +161,8 @@ export LVM_SERVICE_HOST_IP=${host_ip}
 export LVM_ENDPOINT="http://${host_ip}:9009"
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/videoqna"
 export BACKEND_HEALTH_CHECK_ENDPOINT="http://${host_ip}:8888/v1/health_check"
-export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
+export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/ingest"
-export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_file"
+export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get"
 export DATAPREP_GET_VIDEO_LIST_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_videos"
 export VDMS_HOST=${host_ip}
@@ -195,7 +195,7 @@ docker compose up vdms-vector-db dataprep -d
 sleep 1m # wait for the services ready
 # Insert some sample data to the DB
-curl -X POST http://${host_ip}:6007/v1/dataprep \
+curl -X POST http://${host_ip}:6007/v1/dataprep/ingest \
      -H "Content-Type: multipart/form-data" \
      -F "files=@./data/op_1_0320241830.mp4"
--- a/VideoQnA/docker_compose/intel/cpu/xeon/compose.yaml
+++ b/VideoQnA/docker_compose/intel/cpu/xeon/compose.yaml
@@ -10,12 +10,12 @@ services:
    ports:
      - "8001:55555"
  dataprep:
-    image: ${REGISTRY:-opea}/dataprep-multimodal-vdms:${TAG:-latest}
+    image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
    container_name: dataprep-vdms-server
    depends_on:
      - vdms-vector-db
    ports:
-      - "6007:6007"
+      - "6007:5000"
    environment:
      no_proxy: ${no_proxy}
      http_proxy: ${http_proxy}
@@ -23,6 +23,7 @@ services:
      VDMS_HOST: ${VDMS_HOST}
      VDMS_PORT: ${VDMS_PORT}
      INDEX_NAME: ${INDEX_NAME}
      MULTIMODAL_DATAPREP: true
    entrypoint: sh -c 'sleep 15 && python ingest_videos.py'
    volumes:
      - /home/$USER/.cache/clip:/home/user/.cache/clip
--- a/VideoQnA/docker_compose/intel/cpu/xeon/set_env.sh
+++ b/VideoQnA/docker_compose/intel/cpu/xeon/set_env.sh
@@ -17,8 +17,8 @@ export LVM_SERVICE_HOST_IP=${host_ip}
 export LVM_ENDPOINT="http://${host_ip}:9009"
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/videoqna"
 export BACKEND_HEALTH_CHECK_ENDPOINT="http://${host_ip}:8888/v1/health_check"
-export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
+export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/ingest"
-export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_file"
+export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get"
 export DATAPREP_GET_VIDEO_LIST_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_videos"
 export VDMS_HOST=${host_ip}
--- a/VideoQnA/docker_image_build/build.yaml
+++ b/VideoQnA/docker_image_build/build.yaml
@@ -17,12 +17,12 @@ services:
      dockerfile: ./docker/Dockerfile
    extends: videoqna
    image: ${REGISTRY:-opea}/videoqna-ui:${TAG:-latest}
-  dataprep-multimodal-vdms:
+  dataprep:
    build:
      context: GenAIComps
-      dockerfile: comps/dataprep/vdms/multimodal_langchain/Dockerfile
+      dockerfile: comps/dataprep/src/Dockerfile
    extends: videoqna
-    image: ${REGISTRY:-opea}/dataprep-multimodal-vdms:${TAG:-latest}
+    image: ${REGISTRY:-opea}/dataprep:${TAG:-latest}
  embedding-multimodal-clip:
    build:
      context: GenAIComps
--- a/VideoQnA/tests/test_compose_on_xeon.sh
+++ b/VideoQnA/tests/test_compose_on_xeon.sh
@@ -35,7 +35,7 @@ function start_services() {
    sleep 30s
    # Insert some sample data to the DB
-    HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST http://${ip_address}:6007/v1/dataprep \
+    HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST http://${ip_address}:6007/v1/dataprep/ingest \
    -H "Content-Type: multipart/form-data" \
    -F "files=@./data/op_1_0320241830.mp4")
@@ -142,7 +142,7 @@ function validate_microservices() {
    cd $WORKPATH/docker_compose/intel/cpu/xeon//data
    # dataprep microservice
-    HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST http://${ip_address}:6007/v1/dataprep \
+    HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST http://${ip_address}:6007/v1/dataprep/ingest \
    -H "Content-Type: multipart/form-data" \
    -F "files=@./op_1_0320241830.mp4")
--- a/docker_images_list.md
+++ b/docker_images_list.md
@@ -2,7 +2,7 @@
 A list of released OPEA docker images in https://hub.docker.com/, contains all relevant images from the GenAIExamples, GenAIComps and GenAIInfra projects. Please expect more public available images in the future release.
-Take ChatQnA for example. ChatQnA is a chatbot application service based on the Retrieval Augmented Generation (RAG) architecture. It consists of [opea/embedding](https://hub.docker.com/r/opea/embedding), [opea/retriever](https://hub.docker.com/r/opea/retriever-redis), [opea/reranking-tei](https://hub.docker.com/r/opea/reranking-tei), [opea/llm-textgen](https://hub.docker.com/r/opea/llm-textgen), [opea/dataprep-redis](https://hub.docker.com/r/opea/dataprep-redis), [opea/chatqna](https://hub.docker.com/r/opea/chatqna), [opea/chatqna-ui](https://hub.docker.com/r/opea/chatqna-ui) and [opea/chatqna-conversation-ui](https://hub.docker.com/r/opea/chatqna-conversation-ui) (Optional) multiple microservices. Other services are similar, see the corresponding README for details.
+Take ChatQnA for example. ChatQnA is a chatbot application service based on the Retrieval Augmented Generation (RAG) architecture. It consists of [opea/embedding](https://hub.docker.com/r/opea/embedding), [opea/retriever](https://hub.docker.com/r/opea/retriever-redis), [opea/reranking-tei](https://hub.docker.com/r/opea/reranking-tei), [opea/llm-textgen](https://hub.docker.com/r/opea/llm-textgen), [opea/dataprep](), [opea/chatqna](https://hub.docker.com/r/opea/chatqna), [opea/chatqna-ui](https://hub.docker.com/r/opea/chatqna-ui) and [opea/chatqna-conversation-ui](https://hub.docker.com/r/opea/chatqna-conversation-ui) (Optional) multiple microservices. Other services are similar, see the corresponding README for details.
 ## Example images
@@ -45,17 +45,7 @@ Take ChatQnA for example. ChatQnA is a chatbot application service based on the
 | [opea/agent]()                                                                                                      | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/agent/src/Dockerfile)                                          | The docker image exposed the OPEA agent microservice for GenAI application use                                                                                                                                         |
 | [opea/asr](https://hub.docker.com/r/opea/asr)                                                                       | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/asr/src/Dockerfile)                                            | The docker image exposed the OPEA Audio-Speech-Recognition microservice for GenAI application use                                                                                                                      |
 | [opea/chathistory-mongo-server](https://hub.docker.com/r/opea/chathistory-mongo-server)                             | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/chathistory/src/Dockerfile)                                    | The docker image exposes OPEA Chat History microservice which based on MongoDB database, designed to allow user to store, retrieve and manage chat conversations                                                       |
-| [opea/dataprep-milvus](https://hub.docker.com/r/opea/dataprep-milvus)                                               | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/dataprep/milvus/langchain/Dockerfile)                          | The docker image exposed the OPEA dataprep microservice based on milvus vectordb for GenAI application use                                                                                                             |
+| [opea/dataprep]()                                                                                                   | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/dataprep/src/Dockerfile)                                       | The docker image exposed the OPEA dataprep microservice based on many vectordbs for GenAI application use                                                                                                              |
 | [opea/dataprep-multimodal-vdms](https://hub.docker.com/r/opea/dataprep-multimodal-vdms)                             | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/dataprep/vdms/multimodal_langchain/Dockerfile)                 | This docker image exposes an OPEA dataprep microservice based on a multi-modal VDMS for use by GenAI applications.                                                                                                     |
 | [opea/dataprep-multimodal-redis](https://hub.docker.com/r/opea/dataprep-multimodal-redis)                           | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/dataprep/redis/langchain/Dockerfile)                           | This docker image exposes an OPEA dataprep microservice based on a multi-modal redis for use by GenAI applications.                                                                                                    |
 | [opea/dataprep-on-ray-redis](https://hub.docker.com/r/opea/dataprep-on-ray-redis)                                   | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/dataprep/redis/langchain_ray/Dockerfile)                       | The docker image exposed the OPEA dataprep microservice based on redis vectordb and optimized ray for GenAI application use                                                                                            |
 | [opea/dataprep-pgvector](https://hub.docker.com/r/opea/dataprep-pgvector)                                           | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/dataprep/pgvector/langchain/Dockerfile)                        | The docker image exposed the OPEA dataprep microservice based on pgvector vectordb for GenAI application use                                                                                                           |
 | [opea/dataprep-pinecone](https://hub.docker.com/r/opea/dataprep-pinecone)                                           | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/dataprep/pinecone/langchain/Dockerfile)                        | The docker image exposed the OPEA dataprep microservice based on pincone vectordb for GenAI application use                                                                                                            |
 | [opea/dataprep-qdrant](https://hub.docker.com/r/opea/dataprep-qdrant)                                               | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/dataprep/qdrant/langchain/Dockerfile)                          | The docker image exposed the OPEA dataprep microservice based on qdrant vectordb for GenAI application use                                                                                                             |
 | [opea/dataprep-redis](https://hub.docker.com/r/opea/dataprep-redis)                                                 | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/dataprep/redis/langchain/Dockerfile)                           | The docker image exposed the OPEA dataprep microservice based on redis vectordb Langchain framework for GenAI application use                                                                                          |
 | [opea/dataprep-redis-llama-index](https://hub.docker.com/r/opea/dataprep-redis-llama-index)                         | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/dataprep/redis/llama_index/Dockerfile)                         | The docker image exposed the OPEA dataprep microservice based on redis vectordb LlamaIndex framework for GenAI application use                                                                                         |
 | [opea/dataprep-vdms](https://hub.docker.com/r/opea/dataprep-vdms)                                                   | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/dataprep/vdms/langchain/Dockerfile)                            | This docker image exposes an OPEA dataprep microservice based on VDMS vectordb for use by GenAI applications.                                                                                                          |
 | [opea/embedding-langchain-mosec](https://hub.docker.com/r/opea/embedding-langchain-mosec)                           | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/third_parties/nginx/src/Dockerfile)                            | The docker image exposed the OPEA mosec embedding microservice base on Langchain framework for GenAI application use                                                                                                   |
 | [opea/embedding-multimodal-clip](https://hub.docker.com/r/opea/embedding-multimodal-clip)                           | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/third_parties/clip/src/Dockerfile)                             | The docker image exposes OPEA multimodal CLIP-based embedded microservices for use by GenAI applications                                                                                                               |
 | [opea/embedding](https://hub.docker.com/r/opea/embedding)                                                           | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/embeddings/src/Dockerfile)                                     | The docker image exposes OPEA multimodal embedded microservices for use by GenAI applications                                                                                                                          |
 | [opea/embedding-multimodal-bridgetower](https://hub.docker.com/r/opea/embedding-multimodal-bridgetower)             | [Link](https://github.com/opea-project/GenAIComps/blob/main/comps/third_parties/bridgetower/src/Dockerfile)                      | The docker image exposes OPEA multimodal embedded microservices based on bridgetower for use by GenAI applications                                                                                                     |
`@@ -1,2 +1,2 @@`
	`VITE_BACKEND_SERVICE_ENDPOINT=http://backend_address:8888/v1/chatqna`	`VITE_BACKEND_SERVICE_ENDPOINT=http://backend_address:8888/v1/chatqna`
	`VITE_DATA_PREP_SERVICE_URL=http://backend_address:6007/v1/dataprep`	`VITE_DATA_PREP_SERVICE_URL=http://backend_address:6007/v1/dataprep/ingest`