Set no wrapper ChatQnA as default (#891)

Signed-off-by: lvliang-intel <liang1.lv@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
lvliang-intel
2024-10-11 13:30:45 +08:00
committed by GitHub
parent b71a12d424
commit 619d941047
66 changed files with 649 additions and 4796 deletions

View File

@@ -19,7 +19,8 @@ RUN git clone https://github.com/opea-project/GenAIComps.git
WORKDIR /home/user/GenAIComps
RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt
pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt && \
pip install --no-cache-dir langchain_core
COPY ./chatqna.py /home/user/chatqna.py

View File

@@ -19,9 +19,10 @@ RUN git clone https://github.com/opea-project/GenAIComps.git
WORKDIR /home/user/GenAIComps
RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt
pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt && \
pip install --no-cache-dir langchain_core
COPY ./chatqna_guardrails.py /home/user/chatqna_guardrails.py
COPY ./chatqna.py /home/user/chatqna.py
ENV PYTHONPATH=$PYTHONPATH:/home/user/GenAIComps
@@ -31,4 +32,4 @@ WORKDIR /home/user
RUN echo 'ulimit -S -n 999999' >> ~/.bashrc
ENTRYPOINT ["python", "chatqna_guardrails.py"]
ENTRYPOINT ["python", "chatqna.py", "--with-guardrails"]

View File

@@ -1,31 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
FROM python:3.11-slim
RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
git \
libgl1-mesa-glx \
libjemalloc-dev
RUN useradd -m -s /bin/bash user && \
mkdir -p /home/user && \
chown -R user /home/user/
WORKDIR /home/user/
RUN git clone https://github.com/opea-project/GenAIComps.git
WORKDIR /home/user/GenAIComps
RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt && \
pip install --no-cache-dir langchain_core
COPY ./chatqna_no_wrapper.py /home/user/chatqna_no_wrapper.py
ENV PYTHONPATH=$PYTHONPATH:/home/user/GenAIComps
USER user
WORKDIR /home/user
ENTRYPOINT ["python", "chatqna_no_wrapper.py"]

View File

@@ -1,31 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
FROM python:3.11-slim
RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
git \
libgl1-mesa-glx \
libjemalloc-dev
RUN useradd -m -s /bin/bash user && \
mkdir -p /home/user && \
chown -R user /home/user/
WORKDIR /home/user/
RUN git clone https://github.com/opea-project/GenAIComps.git
WORKDIR /home/user/GenAIComps
RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt && \
pip install --no-cache-dir langchain_core
COPY ./chatqna_no_wrapper.py /home/user/chatqna_no_wrapper.py
ENV PYTHONPATH=$PYTHONPATH:/home/user/GenAIComps
USER user
WORKDIR /home/user
ENTRYPOINT ["python", "chatqna_no_wrapper.py", "--without-rerank"]

View File

@@ -6,9 +6,9 @@
FROM python:3.11-slim
RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
git \
libgl1-mesa-glx \
libjemalloc-dev \
git
libjemalloc-dev
RUN useradd -m -s /bin/bash user && \
mkdir -p /home/user && \
@@ -19,9 +19,10 @@ RUN git clone https://github.com/opea-project/GenAIComps.git
WORKDIR /home/user/GenAIComps
RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt
pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt && \
pip install --no-cache-dir langchain_core
COPY ./chatqna_without_rerank.py /home/user/chatqna_without_rerank.py
COPY ./chatqna.py /home/user/chatqna.py
ENV PYTHONPATH=$PYTHONPATH:/home/user/GenAIComps
@@ -31,4 +32,4 @@ WORKDIR /home/user
RUN echo 'ulimit -S -n 999999' >> ~/.bashrc
ENTRYPOINT ["python", "chatqna_without_rerank.py"]
ENTRYPOINT ["python", "chatqna.py", "--without-rerank"]

View File

@@ -327,7 +327,7 @@ spec:
envFrom:
- configMapRef:
name: qna-config
image: opea/tei-gaudi:latest
image: ghcr.io/huggingface/tei-gaudi:latest
imagePullPolicy: IfNotPresent
name: reranking-dependency-deploy
ports:

View File

@@ -327,7 +327,7 @@ spec:
envFrom:
- configMapRef:
name: qna-config
image: opea/tei-gaudi:latest
image: ghcr.io/huggingface/tei-gaudi:latest
imagePullPolicy: IfNotPresent
name: reranking-dependency-deploy
ports:

View File

@@ -327,7 +327,7 @@ spec:
envFrom:
- configMapRef:
name: qna-config
image: opea/tei-gaudi:latest
image: ghcr.io/huggingface/tei-gaudi:latest
imagePullPolicy: IfNotPresent
name: reranking-dependency-deploy
ports:

View File

@@ -327,7 +327,7 @@ spec:
envFrom:
- configMapRef:
name: qna-config
image: opea/tei-gaudi:latest
image: ghcr.io/huggingface/tei-gaudi:latest
imagePullPolicy: IfNotPresent
name: reranking-dependency-deploy
ports:

View File

@@ -345,7 +345,7 @@ spec:
envFrom:
- configMapRef:
name: qna-config
image: opea/tei-gaudi:latest
image: ghcr.io/huggingface/tei-gaudi:latest
imagePullPolicy: IfNotPresent
name: reranking-dependency-deploy
ports:

View File

@@ -345,7 +345,7 @@ spec:
envFrom:
- configMapRef:
name: qna-config
image: opea/tei-gaudi:latest
image: ghcr.io/huggingface/tei-gaudi:latest
imagePullPolicy: IfNotPresent
name: reranking-dependency-deploy
ports:

View File

@@ -345,7 +345,7 @@ spec:
envFrom:
- configMapRef:
name: qna-config
image: opea/tei-gaudi:latest
image: ghcr.io/huggingface/tei-gaudi:latest
imagePullPolicy: IfNotPresent
name: reranking-dependency-deploy
ports:

View File

@@ -345,7 +345,7 @@ spec:
envFrom:
- configMapRef:
name: qna-config
image: opea/tei-gaudi:latest
image: ghcr.io/huggingface/tei-gaudi:latest
imagePullPolicy: IfNotPresent
name: reranking-dependency-deploy
ports:

View File

@@ -1,34 +1,276 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import argparse
import json
import os
import re
from comps import ChatQnAGateway, MicroService, ServiceOrchestrator, ServiceType
from langchain_core.prompts import PromptTemplate
class ChatTemplate:
@staticmethod
def generate_rag_prompt(question, documents):
context_str = "\n".join(documents)
if context_str and len(re.findall("[\u4E00-\u9FFF]", context_str)) / len(context_str) >= 0.3:
# chinese context
template = """
### 你将扮演一个乐于助人、尊重他人并诚实的助手,你的目标是帮助用户解答问题。有效地利用来自本地知识库的搜索结果。确保你的回答中只包含相关信息。如果你不确定问题的答案,请避免分享不准确的信息。
### 搜索结果:{context}
### 问题:{question}
### 回答:
"""
else:
template = """
### You are a helpful, respectful and honest assistant to help the user with questions. \
Please refer to the search results obtained from the local knowledge base. \
But be careful to not incorporate the information that you think is not relevant to the question. \
If you don't know the answer to a question, please don't share false information. \n
### Search results: {context} \n
### Question: {question} \n
### Answer:
"""
return template.format(context=context_str, question=question)
MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "0.0.0.0")
MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))
EMBEDDING_SERVICE_HOST_IP = os.getenv("EMBEDDING_SERVICE_HOST_IP", "0.0.0.0")
EMBEDDING_SERVICE_PORT = int(os.getenv("EMBEDDING_SERVICE_PORT", 6000))
GUARDRAIL_SERVICE_HOST_IP = os.getenv("GUARDRAIL_SERVICE_HOST_IP", "0.0.0.0")
GUARDRAIL_SERVICE_PORT = int(os.getenv("GUARDRAIL_SERVICE_PORT", 9090))
EMBEDDING_SERVER_HOST_IP = os.getenv("EMBEDDING_SERVER_HOST_IP", "0.0.0.0")
EMBEDDING_SERVER_PORT = int(os.getenv("EMBEDDING_SERVER_PORT", 6006))
RETRIEVER_SERVICE_HOST_IP = os.getenv("RETRIEVER_SERVICE_HOST_IP", "0.0.0.0")
RETRIEVER_SERVICE_PORT = int(os.getenv("RETRIEVER_SERVICE_PORT", 7000))
RERANK_SERVICE_HOST_IP = os.getenv("RERANK_SERVICE_HOST_IP", "0.0.0.0")
RERANK_SERVICE_PORT = int(os.getenv("RERANK_SERVICE_PORT", 8000))
LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0")
LLM_SERVICE_PORT = int(os.getenv("LLM_SERVICE_PORT", 9000))
RERANK_SERVER_HOST_IP = os.getenv("RERANK_SERVER_HOST_IP", "0.0.0.0")
RERANK_SERVER_PORT = int(os.getenv("RERANK_SERVER_PORT", 8808))
LLM_SERVER_HOST_IP = os.getenv("LLM_SERVER_HOST_IP", "0.0.0.0")
LLM_SERVER_PORT = int(os.getenv("LLM_SERVER_PORT", 9009))
def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs):
if self.services[cur_node].service_type == ServiceType.EMBEDDING:
inputs["inputs"] = inputs["text"]
del inputs["text"]
elif self.services[cur_node].service_type == ServiceType.RETRIEVER:
# prepare the retriever params
retriever_parameters = kwargs.get("retriever_parameters", None)
if retriever_parameters:
inputs.update(retriever_parameters.dict())
elif self.services[cur_node].service_type == ServiceType.LLM:
# convert TGI/vLLM to unified OpenAI /v1/chat/completions format
next_inputs = {}
next_inputs["model"] = "tgi" # specifically clarify the fake model to make the format unified
next_inputs["messages"] = [{"role": "user", "content": inputs["inputs"]}]
next_inputs["max_tokens"] = llm_parameters_dict["max_tokens"]
next_inputs["top_p"] = llm_parameters_dict["top_p"]
next_inputs["stream"] = inputs["streaming"]
next_inputs["frequency_penalty"] = inputs["frequency_penalty"]
next_inputs["presence_penalty"] = inputs["presence_penalty"]
next_inputs["repetition_penalty"] = inputs["repetition_penalty"]
next_inputs["temperature"] = inputs["temperature"]
inputs = next_inputs
return inputs
def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_dict, **kwargs):
next_data = {}
if self.services[cur_node].service_type == ServiceType.EMBEDDING:
assert isinstance(data, list)
next_data = {"text": inputs["inputs"], "embedding": data[0]}
elif self.services[cur_node].service_type == ServiceType.RETRIEVER:
docs = [doc["text"] for doc in data["retrieved_docs"]]
with_rerank = runtime_graph.downstream(cur_node)[0].startswith("rerank")
if with_rerank and docs:
# forward to rerank
# prepare inputs for rerank
next_data["query"] = data["initial_query"]
next_data["texts"] = [doc["text"] for doc in data["retrieved_docs"]]
else:
# forward to llm
if not docs and with_rerank:
# delete the rerank from retriever -> rerank -> llm
for ds in reversed(runtime_graph.downstream(cur_node)):
for nds in runtime_graph.downstream(ds):
runtime_graph.add_edge(cur_node, nds)
runtime_graph.delete_node_if_exists(ds)
# handle template
# if user provides template, then format the prompt with it
# otherwise, use the default template
prompt = data["initial_query"]
chat_template = llm_parameters_dict["chat_template"]
if chat_template:
prompt_template = PromptTemplate.from_template(chat_template)
input_variables = prompt_template.input_variables
if sorted(input_variables) == ["context", "question"]:
prompt = prompt_template.format(question=data["initial_query"], context="\n".join(docs))
elif input_variables == ["question"]:
prompt = prompt_template.format(question=data["initial_query"])
else:
print(f"{prompt_template} not used, we only support 2 input variables ['question', 'context']")
prompt = ChatTemplate.generate_rag_prompt(data["initial_query"], docs)
else:
prompt = ChatTemplate.generate_rag_prompt(data["initial_query"], docs)
next_data["inputs"] = prompt
elif self.services[cur_node].service_type == ServiceType.RERANK:
# rerank the inputs with the scores
reranker_parameters = kwargs.get("reranker_parameters", None)
top_n = reranker_parameters.top_n if reranker_parameters else 1
docs = inputs["texts"]
reranked_docs = []
for best_response in data[:top_n]:
reranked_docs.append(docs[best_response["index"]])
# handle template
# if user provides template, then format the prompt with it
# otherwise, use the default template
prompt = inputs["query"]
chat_template = llm_parameters_dict["chat_template"]
if chat_template:
prompt_template = PromptTemplate.from_template(chat_template)
input_variables = prompt_template.input_variables
if sorted(input_variables) == ["context", "question"]:
prompt = prompt_template.format(question=prompt, context="\n".join(docs))
elif input_variables == ["question"]:
prompt = prompt_template.format(question=prompt)
else:
print(f"{prompt_template} not used, we only support 2 input variables ['question', 'context']")
prompt = ChatTemplate.generate_rag_prompt(prompt, docs)
else:
prompt = ChatTemplate.generate_rag_prompt(prompt, docs)
next_data["inputs"] = prompt
else:
next_data = data
return next_data
def align_generator(self, gen, **kwargs):
# openai reaponse format
# b'data:{"id":"","object":"text_completion","created":1725530204,"model":"meta-llama/Meta-Llama-3-8B-Instruct","system_fingerprint":"2.0.1-native","choices":[{"index":0,"delta":{"role":"assistant","content":"?"},"logprobs":null,"finish_reason":null}]}\n\n'
for line in gen:
line = line.decode("utf-8")
start = line.find("{")
end = line.rfind("}") + 1
json_str = line[start:end]
try:
# sometimes yield empty chunk, do a fallback here
json_data = json.loads(json_str)
if json_data["choices"][0]["finish_reason"] != "eos_token":
yield f"data: {repr(json_data['choices'][0]['delta']['content'].encode('utf-8'))}\n\n"
except Exception as e:
yield f"data: {repr(json_str.encode('utf-8'))}\n\n"
yield "data: [DONE]\n\n"
class ChatQnAService:
def __init__(self, host="0.0.0.0", port=8000):
self.host = host
self.port = port
ServiceOrchestrator.align_inputs = align_inputs
ServiceOrchestrator.align_outputs = align_outputs
ServiceOrchestrator.align_generator = align_generator
self.megaservice = ServiceOrchestrator()
def add_remote_service(self):
embedding = MicroService(
name="embedding",
host=EMBEDDING_SERVICE_HOST_IP,
port=EMBEDDING_SERVICE_PORT,
endpoint="/v1/embeddings",
host=EMBEDDING_SERVER_HOST_IP,
port=EMBEDDING_SERVER_PORT,
endpoint="/embed",
use_remote_service=True,
service_type=ServiceType.EMBEDDING,
)
retriever = MicroService(
name="retriever",
host=RETRIEVER_SERVICE_HOST_IP,
port=RETRIEVER_SERVICE_PORT,
endpoint="/v1/retrieval",
use_remote_service=True,
service_type=ServiceType.RETRIEVER,
)
rerank = MicroService(
name="rerank",
host=RERANK_SERVER_HOST_IP,
port=RERANK_SERVER_PORT,
endpoint="/rerank",
use_remote_service=True,
service_type=ServiceType.RERANK,
)
llm = MicroService(
name="llm",
host=LLM_SERVER_HOST_IP,
port=LLM_SERVER_PORT,
endpoint="/v1/chat/completions",
use_remote_service=True,
service_type=ServiceType.LLM,
)
self.megaservice.add(embedding).add(retriever).add(rerank).add(llm)
self.megaservice.flow_to(embedding, retriever)
self.megaservice.flow_to(retriever, rerank)
self.megaservice.flow_to(rerank, llm)
self.gateway = ChatQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
def add_remote_service_without_rerank(self):
embedding = MicroService(
name="embedding",
host=EMBEDDING_SERVER_HOST_IP,
port=EMBEDDING_SERVER_PORT,
endpoint="/embed",
use_remote_service=True,
service_type=ServiceType.EMBEDDING,
)
retriever = MicroService(
name="retriever",
host=RETRIEVER_SERVICE_HOST_IP,
port=RETRIEVER_SERVICE_PORT,
endpoint="/v1/retrieval",
use_remote_service=True,
service_type=ServiceType.RETRIEVER,
)
llm = MicroService(
name="llm",
host=LLM_SERVER_HOST_IP,
port=LLM_SERVER_PORT,
endpoint="/v1/chat/completions",
use_remote_service=True,
service_type=ServiceType.LLM,
)
self.megaservice.add(embedding).add(retriever).add(llm)
self.megaservice.flow_to(embedding, retriever)
self.megaservice.flow_to(retriever, llm)
self.gateway = ChatQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
def add_remote_service_with_guardrails(self):
guardrail_in = MicroService(
name="guardrail_in",
host=GUARDRAIL_SERVICE_HOST_IP,
port=GUARDRAIL_SERVICE_PORT,
endpoint="/v1/guardrails",
use_remote_service=True,
service_type=ServiceType.GUARDRAIL,
)
embedding = MicroService(
name="embedding",
host=EMBEDDING_SERVER_HOST_IP,
port=EMBEDDING_SERVER_PORT,
endpoint="/embed",
use_remote_service=True,
service_type=ServiceType.EMBEDDING,
)
@@ -42,27 +284,49 @@ class ChatQnAService:
)
rerank = MicroService(
name="rerank",
host=RERANK_SERVICE_HOST_IP,
port=RERANK_SERVICE_PORT,
endpoint="/v1/reranking",
host=RERANK_SERVER_HOST_IP,
port=RERANK_SERVER_PORT,
endpoint="/rerank",
use_remote_service=True,
service_type=ServiceType.RERANK,
)
llm = MicroService(
name="llm",
host=LLM_SERVICE_HOST_IP,
port=LLM_SERVICE_PORT,
host=LLM_SERVER_HOST_IP,
port=LLM_SERVER_PORT,
endpoint="/v1/chat/completions",
use_remote_service=True,
service_type=ServiceType.LLM,
)
self.megaservice.add(embedding).add(retriever).add(rerank).add(llm)
# guardrail_out = MicroService(
# name="guardrail_out",
# host=GUARDRAIL_SERVICE_HOST_IP,
# port=GUARDRAIL_SERVICE_PORT,
# endpoint="/v1/guardrails",
# use_remote_service=True,
# service_type=ServiceType.GUARDRAIL,
# )
# self.megaservice.add(guardrail_in).add(embedding).add(retriever).add(rerank).add(llm).add(guardrail_out)
self.megaservice.add(guardrail_in).add(embedding).add(retriever).add(rerank).add(llm)
self.megaservice.flow_to(guardrail_in, embedding)
self.megaservice.flow_to(embedding, retriever)
self.megaservice.flow_to(retriever, rerank)
self.megaservice.flow_to(rerank, llm)
# self.megaservice.flow_to(llm, guardrail_out)
self.gateway = ChatQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--without-rerank", action="store_true")
parser.add_argument("--with-guardrails", action="store_true")
args = parser.parse_args()
chatqna = ChatQnAService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT)
if args.without_rerank:
chatqna.add_remote_service_without_rerank()
elif args.with_guardrails:
chatqna.add_remote_service_with_guardrails()
else:
chatqna.add_remote_service()

View File

@@ -30,21 +30,11 @@ opea_micro_services:
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
model-id: ${EMBEDDING_MODEL_ID}
embedding:
host: ${EMBEDDING_SERVICE_HOST_IP}
ports: ${EMBEDDING_SERVICE_PORT}
image: opea/embedding-tei:latest
endpoint: /v1/embeddings
retrieval:
host: ${RETRIEVER_SERVICE_HOST_IP}
ports: ${RETRIEVER_SERVICE_PORT}
image: opea/retriever-redis:latest
endpoint: /v1/retrieval
reranking:
host: ${RERANK_SERVICE_HOST_IP}
ports: ${RERANK_SERVICE_PORT}
image: opea/reranking-tei:latest
endpoint: /v1/reranking
tgi-service:
host: ${TGI_SERVICE_IP}
ports: ${TGI_SERVICE_PORT}
@@ -64,11 +54,6 @@ opea_micro_services:
USE_FLASH_ATTENTION: true
FLASH_ATTENTION_RECOMPUTE: true
model-id: ${LLM_MODEL_ID}
llm:
host: ${LLM_SERVICE_HOST_IP}
ports: ${LLM_SERVICE_PORT}
image: opea/llm-tgi:latest
endpoint: /v1/chat/completions
ui:
host: ${UI_SERVICE_HOST_IP}
ports:

View File

@@ -1,89 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import os
from comps import ChatQnAGateway, MicroService, ServiceOrchestrator, ServiceType
MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "0.0.0.0")
MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))
GUARDRAIL_SERVICE_HOST_IP = os.getenv("GUARDRAIL_SERVICE_HOST_IP", "0.0.0.0")
GUARDRAIL_SERVICE_PORT = int(os.getenv("GUARDRAIL_SERVICE_PORT", 9090))
EMBEDDING_SERVICE_HOST_IP = os.getenv("EMBEDDING_SERVICE_HOST_IP", "0.0.0.0")
EMBEDDING_SERVICE_PORT = int(os.getenv("EMBEDDING_SERVICE_PORT", 6000))
RETRIEVER_SERVICE_HOST_IP = os.getenv("RETRIEVER_SERVICE_HOST_IP", "0.0.0.0")
RETRIEVER_SERVICE_PORT = int(os.getenv("RETRIEVER_SERVICE_PORT", 7000))
RERANK_SERVICE_HOST_IP = os.getenv("RERANK_SERVICE_HOST_IP", "0.0.0.0")
RERANK_SERVICE_PORT = int(os.getenv("RERANK_SERVICE_PORT", 8000))
LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0")
LLM_SERVICE_PORT = int(os.getenv("LLM_SERVICE_PORT", 9000))
class ChatQnAService:
def __init__(self, host="0.0.0.0", port=8000):
self.host = host
self.port = port
self.megaservice = ServiceOrchestrator()
def add_remote_service(self):
guardrail_in = MicroService(
name="guardrail_in",
host=GUARDRAIL_SERVICE_HOST_IP,
port=GUARDRAIL_SERVICE_PORT,
endpoint="/v1/guardrails",
use_remote_service=True,
service_type=ServiceType.GUARDRAIL,
)
embedding = MicroService(
name="embedding",
host=EMBEDDING_SERVICE_HOST_IP,
port=EMBEDDING_SERVICE_PORT,
endpoint="/v1/embeddings",
use_remote_service=True,
service_type=ServiceType.EMBEDDING,
)
retriever = MicroService(
name="retriever",
host=RETRIEVER_SERVICE_HOST_IP,
port=RETRIEVER_SERVICE_PORT,
endpoint="/v1/retrieval",
use_remote_service=True,
service_type=ServiceType.RETRIEVER,
)
rerank = MicroService(
name="rerank",
host=RERANK_SERVICE_HOST_IP,
port=RERANK_SERVICE_PORT,
endpoint="/v1/reranking",
use_remote_service=True,
service_type=ServiceType.RERANK,
)
llm = MicroService(
name="llm",
host=LLM_SERVICE_HOST_IP,
port=LLM_SERVICE_PORT,
endpoint="/v1/chat/completions",
use_remote_service=True,
service_type=ServiceType.LLM,
)
# guardrail_out = MicroService(
# name="guardrail_out",
# host=GUARDRAIL_SERVICE_HOST_IP,
# port=GUARDRAIL_SERVICE_PORT,
# endpoint="/v1/guardrails",
# use_remote_service=True,
# service_type=ServiceType.GUARDRAIL,
# )
# self.megaservice.add(guardrail_in).add(embedding).add(retriever).add(rerank).add(llm).add(guardrail_out)
self.megaservice.add(guardrail_in).add(embedding).add(retriever).add(rerank).add(llm)
self.megaservice.flow_to(guardrail_in, embedding)
self.megaservice.flow_to(embedding, retriever)
self.megaservice.flow_to(retriever, rerank)
self.megaservice.flow_to(rerank, llm)
# self.megaservice.flow_to(llm, guardrail_out)
self.gateway = ChatQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
if __name__ == "__main__":
chatqna = ChatQnAService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT)
chatqna.add_remote_service()

View File

@@ -1,275 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import argparse
import json
import os
import re
from comps import ChatQnAGateway, MicroService, ServiceOrchestrator, ServiceType
from langchain_core.prompts import PromptTemplate
class ChatTemplate:
@staticmethod
def generate_rag_prompt(question, documents):
context_str = "\n".join(documents)
if context_str and len(re.findall("[\u4E00-\u9FFF]", context_str)) / len(context_str) >= 0.3:
# chinese context
template = """
### 你将扮演一个乐于助人、尊重他人并诚实的助手,你的目标是帮助用户解答问题。有效地利用来自本地知识库的搜索结果。确保你的回答中只包含相关信息。如果你不确定问题的答案,请避免分享不准确的信息。
### 搜索结果:{context}
### 问题:{question}
### 回答:
"""
else:
template = """
### You are a helpful, respectful and honest assistant to help the user with questions. \
Please refer to the search results obtained from the local knowledge base. \
But be careful to not incorporate the information that you think is not relevant to the question. \
If you don't know the answer to a question, please don't share false information. \n
### Search results: {context} \n
### Question: {question} \n
### Answer:
"""
return template.format(context=context_str, question=question)
MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "0.0.0.0")
MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))
# EMBEDDING_SERVICE_HOST_IP = os.getenv("EMBEDDING_SERVICE_HOST_IP", "0.0.0.0")
# EMBEDDING_SERVICE_PORT = int(os.getenv("EMBEDDING_SERVICE_PORT", 6000))
# RETRIEVER_SERVICE_HOST_IP = os.getenv("RETRIEVER_SERVICE_HOST_IP", "0.0.0.0")
# RETRIEVER_SERVICE_PORT = int(os.getenv("RETRIEVER_SERVICE_PORT", 7000))
# RERANK_SERVICE_HOST_IP = os.getenv("RERANK_SERVICE_HOST_IP", "0.0.0.0")
# RERANK_SERVICE_PORT = int(os.getenv("RERANK_SERVICE_PORT", 8000))
# LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0")
# LLM_SERVICE_PORT = int(os.getenv("LLM_SERVICE_PORT", 9000))
EMBEDDING_SERVER_HOST_IP = os.getenv("EMBEDDING_SERVER_HOST_IP", "0.0.0.0")
EMBEDDING_SERVER_PORT = int(os.getenv("EMBEDDING_SERVER_PORT", 6006))
RETRIEVER_SERVICE_HOST_IP = os.getenv("RETRIEVER_SERVICE_HOST_IP", "0.0.0.0")
RETRIEVER_SERVICE_PORT = int(os.getenv("RETRIEVER_SERVICE_PORT", 7000))
RERANK_SERVER_HOST_IP = os.getenv("RERANK_SERVER_HOST_IP", "0.0.0.0")
RERANK_SERVER_PORT = int(os.getenv("RERANK_SERVER_PORT", 8808))
LLM_SERVER_HOST_IP = os.getenv("LLM_SERVER_HOST_IP", "0.0.0.0")
LLM_SERVER_PORT = int(os.getenv("LLM_SERVER_PORT", 9009))
def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs):
if self.services[cur_node].service_type == ServiceType.EMBEDDING:
inputs["inputs"] = inputs["text"]
del inputs["text"]
elif self.services[cur_node].service_type == ServiceType.RETRIEVER:
# prepare the retriever params
retriever_parameters = kwargs.get("retriever_parameters", None)
if retriever_parameters:
inputs.update(retriever_parameters.dict())
elif self.services[cur_node].service_type == ServiceType.LLM:
# convert TGI/vLLM to unified OpenAI /v1/chat/completions format
next_inputs = {}
next_inputs["model"] = "tgi" # specifically clarify the fake model to make the format unified
next_inputs["messages"] = [{"role": "user", "content": inputs["inputs"]}]
next_inputs["max_tokens"] = llm_parameters_dict["max_tokens"]
next_inputs["top_p"] = llm_parameters_dict["top_p"]
next_inputs["stream"] = inputs["streaming"]
next_inputs["frequency_penalty"] = inputs["frequency_penalty"]
next_inputs["presence_penalty"] = inputs["presence_penalty"]
next_inputs["repetition_penalty"] = inputs["repetition_penalty"]
next_inputs["temperature"] = inputs["temperature"]
inputs = next_inputs
return inputs
def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_dict, **kwargs):
next_data = {}
if self.services[cur_node].service_type == ServiceType.EMBEDDING:
assert isinstance(data, list)
next_data = {"text": inputs["inputs"], "embedding": data[0]}
elif self.services[cur_node].service_type == ServiceType.RETRIEVER:
docs = [doc["text"] for doc in data["retrieved_docs"]]
with_rerank = runtime_graph.downstream(cur_node)[0].startswith("rerank")
if with_rerank and docs:
# forward to rerank
# prepare inputs for rerank
next_data["query"] = data["initial_query"]
next_data["texts"] = [doc["text"] for doc in data["retrieved_docs"]]
else:
# forward to llm
if not docs and with_rerank:
# delete the rerank from retriever -> rerank -> llm
for ds in reversed(runtime_graph.downstream(cur_node)):
for nds in runtime_graph.downstream(ds):
runtime_graph.add_edge(cur_node, nds)
runtime_graph.delete_node_if_exists(ds)
# handle template
# if user provides template, then format the prompt with it
# otherwise, use the default template
prompt = data["initial_query"]
chat_template = llm_parameters_dict["chat_template"]
if chat_template:
prompt_template = PromptTemplate.from_template(chat_template)
input_variables = prompt_template.input_variables
if sorted(input_variables) == ["context", "question"]:
prompt = prompt_template.format(question=data["initial_query"], context="\n".join(docs))
elif input_variables == ["question"]:
prompt = prompt_template.format(question=data["initial_query"])
else:
print(f"{prompt_template} not used, we only support 2 input variables ['question', 'context']")
prompt = ChatTemplate.generate_rag_prompt(data["initial_query"], docs)
else:
prompt = ChatTemplate.generate_rag_prompt(data["initial_query"], docs)
next_data["inputs"] = prompt
elif self.services[cur_node].service_type == ServiceType.RERANK:
# rerank the inputs with the scores
reranker_parameters = kwargs.get("reranker_parameters", None)
top_n = reranker_parameters.top_n if reranker_parameters else 1
docs = inputs["texts"]
reranked_docs = []
for best_response in data[:top_n]:
reranked_docs.append(docs[best_response["index"]])
# handle template
# if user provides template, then format the prompt with it
# otherwise, use the default template
prompt = inputs["query"]
chat_template = llm_parameters_dict["chat_template"]
if chat_template:
prompt_template = PromptTemplate.from_template(chat_template)
input_variables = prompt_template.input_variables
if sorted(input_variables) == ["context", "question"]:
prompt = prompt_template.format(question=prompt, context="\n".join(docs))
elif input_variables == ["question"]:
prompt = prompt_template.format(question=prompt)
else:
print(f"{prompt_template} not used, we only support 2 input variables ['question', 'context']")
prompt = ChatTemplate.generate_rag_prompt(prompt, docs)
else:
prompt = ChatTemplate.generate_rag_prompt(prompt, docs)
next_data["inputs"] = prompt
return next_data
def align_generator(self, gen, **kwargs):
# openai reaponse format
# b'data:{"id":"","object":"text_completion","created":1725530204,"model":"meta-llama/Meta-Llama-3-8B-Instruct","system_fingerprint":"2.0.1-native","choices":[{"index":0,"delta":{"role":"assistant","content":"?"},"logprobs":null,"finish_reason":null}]}\n\n'
for line in gen:
line = line.decode("utf-8")
start = line.find("{")
end = line.rfind("}") + 1
json_str = line[start:end]
try:
# sometimes yield empty chunk, do a fallback here
json_data = json.loads(json_str)
if json_data["choices"][0]["finish_reason"] != "eos_token":
yield f"data: {repr(json_data['choices'][0]['delta']['content'].encode('utf-8'))}\n\n"
except Exception as e:
yield f"data: {repr(json_str.encode('utf-8'))}\n\n"
yield "data: [DONE]\n\n"
class ChatQnAService:
def __init__(self, host="0.0.0.0", port=8000):
self.host = host
self.port = port
ServiceOrchestrator.align_inputs = align_inputs
ServiceOrchestrator.align_outputs = align_outputs
ServiceOrchestrator.align_generator = align_generator
self.megaservice = ServiceOrchestrator()
def add_remote_service(self):
embedding = MicroService(
name="embedding",
host=EMBEDDING_SERVER_HOST_IP,
port=EMBEDDING_SERVER_PORT,
endpoint="/embed",
use_remote_service=True,
service_type=ServiceType.EMBEDDING,
)
retriever = MicroService(
name="retriever",
host=RETRIEVER_SERVICE_HOST_IP,
port=RETRIEVER_SERVICE_PORT,
endpoint="/v1/retrieval",
use_remote_service=True,
service_type=ServiceType.RETRIEVER,
)
rerank = MicroService(
name="rerank",
host=RERANK_SERVER_HOST_IP,
port=RERANK_SERVER_PORT,
endpoint="/rerank",
use_remote_service=True,
service_type=ServiceType.RERANK,
)
llm = MicroService(
name="llm",
host=LLM_SERVER_HOST_IP,
port=LLM_SERVER_PORT,
endpoint="/v1/chat/completions",
use_remote_service=True,
service_type=ServiceType.LLM,
)
self.megaservice.add(embedding).add(retriever).add(rerank).add(llm)
self.megaservice.flow_to(embedding, retriever)
self.megaservice.flow_to(retriever, rerank)
self.megaservice.flow_to(rerank, llm)
self.gateway = ChatQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
def add_remote_service_without_rerank(self):
embedding = MicroService(
name="embedding",
host=EMBEDDING_SERVER_HOST_IP,
port=EMBEDDING_SERVER_PORT,
endpoint="/embed",
use_remote_service=True,
service_type=ServiceType.EMBEDDING,
)
retriever = MicroService(
name="retriever",
host=RETRIEVER_SERVICE_HOST_IP,
port=RETRIEVER_SERVICE_PORT,
endpoint="/v1/retrieval",
use_remote_service=True,
service_type=ServiceType.RETRIEVER,
)
llm = MicroService(
name="llm",
host=LLM_SERVER_HOST_IP,
port=LLM_SERVER_PORT,
endpoint="/v1/chat/completions",
use_remote_service=True,
service_type=ServiceType.LLM,
)
self.megaservice.add(embedding).add(retriever).add(llm)
self.megaservice.flow_to(embedding, retriever)
self.megaservice.flow_to(retriever, llm)
self.gateway = ChatQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--without-rerank", action="store_true")
args = parser.parse_args()
chatqna = ChatQnAService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT)
if args.without_rerank:
chatqna.add_remote_service_without_rerank()
else:
chatqna.add_remote_service()

View File

@@ -1,57 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import os
from comps import ChatQnAGateway, MicroService, ServiceOrchestrator, ServiceType
MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "0.0.0.0")
MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))
EMBEDDING_SERVICE_HOST_IP = os.getenv("EMBEDDING_SERVICE_HOST_IP", "0.0.0.0")
EMBEDDING_SERVICE_PORT = int(os.getenv("EMBEDDING_SERVICE_PORT", 6000))
RETRIEVER_SERVICE_HOST_IP = os.getenv("RETRIEVER_SERVICE_HOST_IP", "0.0.0.0")
RETRIEVER_SERVICE_PORT = int(os.getenv("RETRIEVER_SERVICE_PORT", 7000))
LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0")
LLM_SERVICE_PORT = int(os.getenv("LLM_SERVICE_PORT", 9000))
class ChatQnAService:
def __init__(self, host="0.0.0.0", port=8000):
self.host = host
self.port = port
self.megaservice = ServiceOrchestrator()
def add_remote_service(self):
embedding = MicroService(
name="embedding",
host=EMBEDDING_SERVICE_HOST_IP,
port=EMBEDDING_SERVICE_PORT,
endpoint="/v1/embeddings",
use_remote_service=True,
service_type=ServiceType.EMBEDDING,
)
retriever = MicroService(
name="retriever",
host=RETRIEVER_SERVICE_HOST_IP,
port=RETRIEVER_SERVICE_PORT,
endpoint="/v1/retrieval",
use_remote_service=True,
service_type=ServiceType.RETRIEVER,
)
llm = MicroService(
name="llm",
host=LLM_SERVICE_HOST_IP,
port=LLM_SERVICE_PORT,
endpoint="/v1/chat/completions",
use_remote_service=True,
service_type=ServiceType.LLM,
)
self.megaservice.add(embedding).add(retriever).add(llm)
self.megaservice.flow_to(embedding, retriever)
self.megaservice.flow_to(retriever, llm)
self.gateway = ChatQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
if __name__ == "__main__":
chatqna = ChatQnAService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT)
chatqna.add_remote_service()

View File

@@ -16,31 +16,19 @@ If you are in a proxy environment, set the proxy-related environment variables:
export http_proxy="Your_HTTP_Proxy"
export https_proxy="Your_HTTPs_Proxy"
### 1. Build Embedding Image
```bash
docker build --no-cache -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/tei/langchain/Dockerfile .
```
### 2. Build Retriever Image
### 1. Build Retriever Image
```bash
docker build --no-cache -t opea/retriever-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/redis/langchain/Dockerfile .
```
### 3. Build Rerank Image
```bash
docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/tei/Dockerfile .
```
### 4. Set up Ollama Service and Build LLM Image
### 2. Set up Ollama Service and Build LLM Image
We use [Ollama](https://ollama.com/) as our LLM service for AIPC.
Please set up Ollama on your PC follow the instructions. This will set the entrypoint needed for the Ollama to suit the ChatQnA examples.
#### 4.1 Set Up Ollama LLM Service
#### 2.1 Set Up Ollama LLM Service
Install Ollama service with one command
@@ -79,20 +67,20 @@ NAME ID SIZE MODIFIED
llama3:latest 365c0bd3c000 4.7 GB 5 days ago
```
#### 4.2 Build LLM Image
#### 2.2 Build LLM Image
```bash
docker build --no-cache -t opea/llm-ollama:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/ollama/langchain/Dockerfile .
```
### 5. Build Dataprep Image
### 3. Build Dataprep Image
```bash
docker build --no-cache -t opea/dataprep-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain/Dockerfile .
cd ..
```
### 6. Build MegaService Docker Image
### 4. Build MegaService Docker Image
To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `chatqna.py` Python script. Build MegaService Docker image via below command:
@@ -103,7 +91,7 @@ docker build --no-cache -t opea/chatqna:latest -f Dockerfile .
cd ../../..
```
### 7. Build UI Docker Image
### 5. Build UI Docker Image
Build frontend Docker image via below command:
@@ -113,15 +101,13 @@ docker build --no-cache -t opea/chatqna-ui:latest --build-arg https_proxy=$https
cd ../../../..
```
Then run the command `docker images`, you will have the following 7 Docker Images:
Then run the command `docker images`, you will have the following 5 Docker Images:
1. `opea/dataprep-redis:latest`
2. `opea/embedding-tei:latest`
3. `opea/retriever-redis:latest`
4. `opea/reranking-tei:latest`
5. `opea/llm-ollama:latest`
6. `opea/chatqna:latest`
7. `opea/chatqna-ui:latest`
2. `opea/retriever-redis:latest`
3. `opea/llm-ollama:latest`
4. `opea/chatqna:latest`
5. `opea/chatqna-ui:latest`
## 🚀 Start Microservices
@@ -162,15 +148,14 @@ export https_proxy=${your_http_proxy}
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006"
export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
export REDIS_URL="redis://${host_ip}:6379"
export INDEX_NAME="rag-redis"
export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
export MEGA_SERVICE_HOST_IP=${host_ip}
export EMBEDDING_SERVICE_HOST_IP=${host_ip}
export EMBEDDING_SERVER_HOST_IP=${host_ip}
export RETRIEVER_SERVICE_HOST_IP=${host_ip}
export RERANK_SERVICE_HOST_IP=${host_ip}
export LLM_SERVICE_HOST_IP=${host_ip}
export RERANK_SERVER_HOST_IP=${host_ip}
export LLM_SERVER_HOST_IP=${host_ip}
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/chatqna"
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
@@ -184,15 +169,14 @@ export OLLAMA_MODEL="llama3"
set EMBEDDING_MODEL_ID=BAAI/bge-base-en-v1.5
set RERANK_MODEL_ID=BAAI/bge-reranker-base
set TEI_EMBEDDING_ENDPOINT=http://%host_ip%:6006
set TEI_RERANKING_ENDPOINT=http://%host_ip%:8808
set REDIS_URL=redis://%host_ip%:6379
set INDEX_NAME=rag-redis
set HUGGINGFACEHUB_API_TOKEN=%your_hf_api_token%
set MEGA_SERVICE_HOST_IP=%host_ip%
set EMBEDDING_SERVICE_HOST_IP=%host_ip%
set EMBEDDING_SERVER_HOST_IP=%host_ip%
set RETRIEVER_SERVICE_HOST_IP=%host_ip%
set RERANK_SERVICE_HOST_IP=%host_ip%
set LLM_SERVICE_HOST_IP=%host_ip%
set RERANK_SERVER_HOST_IP=%host_ip%
set LLM_SERVER_HOST_IP=%host_ip%
set BACKEND_SERVICE_ENDPOINT=http://%host_ip%:8888/v1/chatqna
set DATAPREP_SERVICE_ENDPOINT=http://%host_ip%:6007/v1/dataprep
@@ -231,16 +215,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v
-H 'Content-Type: application/json'
```
2. Embedding Microservice
```bash
curl http://${host_ip}:6000/v1/embeddings\
-X POST \
-d '{"text":"hello"}' \
-H 'Content-Type: application/json'
```
3. Retriever Microservice
2. Retriever Microservice
To validate the retriever microservice, you need to generate a mock embedding vector of length 768 in Python script:
```bash
@@ -251,7 +226,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v
-H 'Content-Type: application/json'
```
4. TEI Reranking Service
3. TEI Reranking Service
```bash
curl http://${host_ip}:8808/rerank \
@@ -260,22 +235,13 @@ For details on how to verify the correctness of the response, refer to [how-to-v
-H 'Content-Type: application/json'
```
5. Reranking Microservice
```bash
curl http://${host_ip}:8000/v1/reranking\
-X POST \
-d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \
-H 'Content-Type: application/json'
```
6. Ollama Service
4. Ollama Service
```bash
curl http://${host_ip}:11434/api/generate -d '{"model": "llama3", "prompt":"What is Deep Learning?"}'
```
7. LLM Microservice
5. LLM Microservice
```bash
curl http://${host_ip}:9000/v1/chat/completions\
@@ -284,7 +250,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v
-H 'Content-Type: application/json'
```
8. MegaService
6. MegaService
```bash
curl http://${host_ip}:8888/v1/chatqna -H "Content-Type: application/json" -d '{
@@ -292,7 +258,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v
}'
```
9. Dataprep MicroserviceOptional
7. Dataprep MicroserviceOptional
If you want to update the default knowledge base, you can use the following commands:

View File

@@ -36,20 +36,6 @@ services:
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
embedding:
image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
container_name: embedding-tei-server
depends_on:
- tei-embedding-service
ports:
- "6000:6000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
restart: unless-stopped
retriever:
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
container_name: retriever-redis-server
@@ -82,23 +68,6 @@ services:
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
reranking:
image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
container_name: reranking-tei-aipc-server
depends_on:
- tei-reranking-service
ports:
- "8000:8000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
restart: unless-stopped
llm:
image: ${REGISTRY:-opea}/llm-ollama
container_name: llm-ollama
@@ -121,10 +90,8 @@ services:
depends_on:
- redis-vector-db
- tei-embedding-service
- embedding
- retriever
- tei-reranking-service
- reranking
- llm
ports:
- "8888:8888"
@@ -133,10 +100,14 @@ services:
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
- EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP}
- EMBEDDING_SERVER_HOST_IP=${EMBEDDING_SERVICE_HOST_IP}
- EMBEDDING_SERVER_PORT=${EMBEDDING_SERVICE_PORT:-6006}
- RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
- RERANK_SERVICE_HOST_IP=${RERANK_SERVICE_HOST_IP}
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
- RERANK_SERVER_HOST_IP=${RERANK_SERVICE_HOST_IP}
- RERANK_SERVER_PORT=${RERANK_SERVICE_PORT:-8808}
- LLM_SERVER_HOST_IP=${LLM_SERVICE_HOST_IP}
- LLM_SERVER_PORT=${LLM_SERVICE_PORT:-9000}
- LOGFLAG=${LOGFLAG}
ipc: host
restart: always
chaqna-aipc-ui-server:

View File

@@ -97,61 +97,20 @@ After launching your instance, you can connect to it using SSH (for Linux instan
First of all, you need to build Docker Images locally and install the python package of it.
### 1. Build Embedding Image
```bash
git clone https://github.com/opea-project/GenAIComps.git
cd GenAIComps
docker build --no-cache -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/tei/langchain/Dockerfile .
```
### 2. Build Retriever Image
### 1. Build Retriever Image
```bash
docker build --no-cache -t opea/retriever-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/redis/langchain/Dockerfile .
```
### 3. Build Rerank Image
> Skip for ChatQnA without Rerank pipeline
```bash
docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/tei/Dockerfile .
```
### 4. Build LLM Image
#### Use TGI as backend
```bash
docker build --no-cache -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/tgi/Dockerfile .
```
#### Use vLLM as backend
Build vLLM docker.
```bash
git clone https://github.com/vllm-project/vllm.git
cd ./vllm/
docker build --no-cache -t opea/vllm:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile.cpu .
cd ..
```
Build microservice.
```bash
docker build --no-cache -t opea/llm-vllm:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/vllm/langchain/Dockerfile .
```
### 5. Build Dataprep Image
### 2. Build Dataprep Image
```bash
docker build --no-cache -t opea/dataprep-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain/Dockerfile .
cd ..
```
### 6. Build MegaService Docker Image
### 3. Build MegaService Docker Image
1. MegaService with Rerank
@@ -173,7 +132,7 @@ cd ..
docker build --no-cache -t opea/chatqna-without-rerank:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile.without_rerank .
```
### 7. Build UI Docker Image
### 4. Build UI Docker Image
Build frontend Docker image via below command:
@@ -182,7 +141,7 @@ cd GenAIExamples/ChatQnA/ui
docker build --no-cache -t opea/chatqna-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile .
```
### 8. Build Conversational React UI Docker Image (Optional)
### 5. Build Conversational React UI Docker Image (Optional)
Build frontend Docker image that enables Conversational experience with ChatQnA megaservice via below command:
@@ -193,23 +152,20 @@ cd GenAIExamples/ChatQnA/ui
docker build --no-cache -t opea/chatqna-conversation-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile.react .
```
### 9. Build Nginx Docker Image
### 6. Build Nginx Docker Image
```bash
cd GenAIComps
docker build -t opea/nginx:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/nginx/Dockerfile .
```
Then run the command `docker images`, you will have the following 8 Docker Images:
Then run the command `docker images`, you will have the following 5 Docker Images:
1. `opea/dataprep-redis:latest`
2. `opea/embedding-tei:latest`
3. `opea/retriever-redis:latest`
4. `opea/reranking-tei:latest`
5. `opea/llm-tgi:latest` or `opea/llm-vllm:latest`
6. `opea/chatqna:latest` or `opea/chatqna-without-rerank:latest`
7. `opea/chatqna-ui:latest`
8. `opea/nginx:latest`
2. `opea/retriever-redis:latest`
3. `opea/chatqna:latest` or `opea/chatqna-without-rerank:latest`
4. `opea/chatqna-ui:latest`
5. `opea/nginx:latest`
## 🚀 Start Microservices
@@ -315,16 +271,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v
-H 'Content-Type: application/json'
```
2. Embedding Microservice
```bash
curl http://${host_ip}:6000/v1/embeddings\
-X POST \
-d '{"text":"hello"}' \
-H 'Content-Type: application/json'
```
3. Retriever Microservice
2. Retriever Microservice
To consume the retriever microservice, you need to generate a mock embedding vector by Python script. The length of embedding vector
is determined by the embedding model.
@@ -340,7 +287,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v
-H 'Content-Type: application/json'
```
4. TEI Reranking Service
3. TEI Reranking Service
> Skip for ChatQnA without Rerank pipeline
@@ -351,18 +298,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v
-H 'Content-Type: application/json'
```
5. Reranking Microservice
> Skip for ChatQnA without Rerank pipeline
```bash
curl http://${host_ip}:8000/v1/reranking\
-X POST \
-d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \
-H 'Content-Type: application/json'
```
6. LLM backend Service
4. LLM backend Service
In first startup, this service will take more time to download the model files. After it's finished, the service will be ready.
@@ -395,31 +331,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v
-d '{"model": "Intel/neural-chat-7b-v3-3", "prompt": "What is Deep Learning?", "max_tokens": 32, "temperature": 0}'
```
7. LLM Microservice
This service depends on above LLM backend service startup. It will be ready after long time, to wait for them being ready in first startup.
```bash
# TGI service
curl http://${host_ip}:9000/v1/chat/completions\
-X POST \
-d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
-H 'Content-Type: application/json'
```
For parameters in TGI modes, please refer to [HuggingFace InferenceClient API](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.text_generation) (except we rename "max_new_tokens" to "max_tokens".)
```bash
# vLLM Service
curl http://${host_ip}:9000/v1/chat/completions \
-X POST \
-d '{"query":"What is Deep Learning?","max_tokens":17,"top_p":1,"temperature":0.7,"frequency_penalty":0,"presence_penalty":0, "streaming":false}' \
-H 'Content-Type: application/json'
```
For parameters in vLLM modes, can refer to [LangChain VLLMOpenAI API](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.vllm.VLLMOpenAI.html)
8. MegaService
5. MegaService
```bash
curl http://${host_ip}:8888/v1/chatqna -H "Content-Type: application/json" -d '{
@@ -427,7 +339,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v
}'
```
9. Nginx Service
6. Nginx Service
```bash
curl http://${host_ip}:${NGINX_PORT}/v1/chatqna \
@@ -435,7 +347,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v
-d '{"messages": "What is the revenue of Nike in 2023?"}'
```
10. Dataprep MicroserviceOptional
7. Dataprep MicroserviceOptional
If you want to update the default knowledge base, you can use the following commands:

View File

@@ -70,38 +70,20 @@ git clone https://github.com/opea-project/GenAIComps.git
cd GenAIComps
```
### 1. Build Embedding Image
```bash
docker build --no-cache -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/tei/langchain/Dockerfile .
```
### 2. Build Retriever Image
### 1. Build Retriever Image
```bash
docker build --no-cache -t opea/retriever-qdrant:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/qdrant/haystack/Dockerfile .
```
### 3. Build Rerank Image
```bash
docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/tei/Dockerfile .`
```
### 4. Build LLM Image
```bash
docker build --no-cache -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/tgi/Dockerfile .
```
### 5. Build Dataprep Image
### 2. Build Dataprep Image
```bash
docker build --no-cache -t opea/dataprep-qdrant:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/qdrant/langchain/Dockerfile .
cd ..
```
### 6. Build MegaService Docker Image
### 3. Build MegaService Docker Image
To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `chatqna.py` Python script. Build MegaService Docker image via below command:
@@ -112,7 +94,7 @@ docker build --no-cache -t opea/chatqna:latest --build-arg https_proxy=$https_pr
cd ../../..
```
### 7. Build UI Docker Image
### 4. Build UI Docker Image
Build frontend Docker image via below command:
@@ -122,7 +104,7 @@ docker build --no-cache -t opea/chatqna-ui:latest --build-arg https_proxy=$https
cd ../../../..
```
### 8. Build Conversational React UI Docker Image (Optional)
### 5. Build Conversational React UI Docker Image (Optional)
Build frontend Docker image that enables Conversational experience with ChatQnA megaservice via below command:
@@ -136,15 +118,12 @@ docker build --no-cache -t opea/chatqna-conversation-ui:latest --build-arg https
cd ../../../..
```
Then run the command `docker images`, you will have the following 7 Docker Images:
Then run the command `docker images`, you will have the following 4 Docker Images:
1. `opea/dataprep-qdrant:latest`
2. `opea/embedding-tei:latest`
3. `opea/retriever-qdrant:latest`
4. `opea/reranking-tei:latest`
5. `opea/llm-tgi:latest`
6. `opea/chatqna:latest`
7. `opea/chatqna-ui:latest`
2. `opea/retriever-qdrant:latest`
3. `opea/chatqna:latest`
4. `opea/chatqna-ui:latest`
## 🚀 Start Microservices
@@ -194,17 +173,15 @@ export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6040"
export TEI_RERANKING_ENDPOINT="http://${host_ip}:6041"
export TGI_LLM_ENDPOINT="http://${host_ip}:6042"
export QDRANT_HOST=${host_ip}
export QDRANT_PORT=6333
export INDEX_NAME="rag-qdrant"
export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
export EMBEDDING_SERVER_HOST_IP=${host_ip}
export MEGA_SERVICE_HOST_IP=${host_ip}
export EMBEDDING_SERVICE_HOST_IP=${host_ip}
export RETRIEVER_SERVICE_HOST_IP=${host_ip}
export RERANK_SERVICE_HOST_IP=${host_ip}
export LLM_SERVICE_HOST_IP=${host_ip}
export RERANK_SERVER_HOST_IP=${host_ip}
export LLM_SERVER_HOST_IP=${host_ip}
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8912/v1/chatqna"
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6043/v1/dataprep"
```
@@ -234,16 +211,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v
-H 'Content-Type: application/json'
```
2. Embedding Microservice
```bash
curl http://${host_ip}:6044/v1/embeddings\
-X POST \
-d '{"text":"hello"}' \
-H 'Content-Type: application/json'
```
3. Retriever Microservice
2. Retriever Microservice
To consume the retriever microservice, you need to generate a mock embedding vector by Python script. The length of embedding vector
is determined by the embedding model.
@@ -259,7 +227,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v
-H 'Content-Type: application/json'
```
4. TEI Reranking Service
3. TEI Reranking Service
```bash
curl http://${host_ip}:6041/rerank \
@@ -268,16 +236,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v
-H 'Content-Type: application/json'
```
5. Reranking Microservice
```bash
curl http://${host_ip}:6046/v1/reranking\
-X POST \
-d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \
-H 'Content-Type: application/json'
```
6. TGI Service
4. TGI Service
In first startup, this service will take more time to download the model files. After it's finished, the service will be ready.
@@ -302,16 +261,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v
-H 'Content-Type: application/json'
```
7. LLM Microservice
```bash
curl http://${host_ip}:6047/v1/chat/completions\
-X POST \
-d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
-H 'Content-Type: application/json'
```
8. MegaService
5. MegaService
```bash
curl http://${host_ip}:8912/v1/chatqna -H "Content-Type: application/json" -d '{
@@ -319,7 +269,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v
}'
```
9. Dataprep MicroserviceOptional
6. Dataprep MicroserviceOptional
If you want to update the default knowledge base, you can use the following commands:

View File

@@ -38,20 +38,6 @@ services:
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
embedding:
image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
container_name: embedding-tei-server
depends_on:
- tei-embedding-service
ports:
- "6000:6000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
restart: unless-stopped
retriever:
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
container_name: retriever-redis-server
@@ -85,23 +71,6 @@ services:
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
reranking:
image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
container_name: reranking-tei-xeon-server
depends_on:
- tei-reranking-service
ports:
- "8000:8000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
restart: unless-stopped
tgi-service:
image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
container_name: tgi-service
@@ -118,36 +87,16 @@ services:
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
llm:
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
container_name: llm-tgi-server
depends_on:
- tgi-service
ports:
- "9000:9000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
restart: unless-stopped
chaqna-xeon-backend-server:
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
container_name: chatqna-xeon-backend-server
depends_on:
- redis-vector-db
- tei-embedding-service
- embedding
- dataprep-redis-service
- retriever
- tei-reranking-service
- reranking
- tgi-service
- llm
ports:
- "8888:8888"
environment:
@@ -155,10 +104,14 @@ services:
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
- EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP}
- EMBEDDING_SERVER_HOST_IP=${EMBEDDING_SERVER_HOST_IP}
- EMBEDDING_SERVER_PORT=${EMBEDDING_SERVER_PORT:-6006}
- RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
- RERANK_SERVICE_HOST_IP=${RERANK_SERVICE_HOST_IP}
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
- RERANK_SERVER_HOST_IP=${RERANK_SERVER_HOST_IP}
- RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-8808}
- LLM_SERVER_HOST_IP=${LLM_SERVER_HOST_IP}
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-9009}
- LOGFLAG=${LOGFLAG}
ipc: host
restart: always
chaqna-xeon-ui-server:
@@ -178,25 +131,6 @@ services:
- DELETE_FILE=${DATAPREP_DELETE_FILE_ENDPOINT}
ipc: host
restart: always
chaqna-xeon-nginx-server:
image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
container_name: chaqna-xeon-nginx-server
depends_on:
- chaqna-xeon-backend-server
- chaqna-xeon-ui-server
ports:
- "${NGINX_PORT:-80}:80"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- FRONTEND_SERVICE_IP=${FRONTEND_SERVICE_IP}
- FRONTEND_SERVICE_PORT=${FRONTEND_SERVICE_PORT}
- BACKEND_SERVICE_NAME=${BACKEND_SERVICE_NAME}
- BACKEND_SERVICE_IP=${BACKEND_SERVICE_IP}
- BACKEND_SERVICE_PORT=${BACKEND_SERVICE_PORT}
ipc: host
restart: always
networks:
default:

View File

@@ -1,184 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
services:
redis-vector-db:
image: redis/redis-stack:7.2.0-v9
container_name: redis-vector-db
ports:
- "6379:6379"
- "8001:8001"
dataprep-redis-service:
image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest}
container_name: dataprep-redis-server
depends_on:
- redis-vector-db
- tei-embedding-service
ports:
- "6007:6007"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
REDIS_URL: ${REDIS_URL}
REDIS_HOST: ${REDIS_HOST}
INDEX_NAME: ${INDEX_NAME}
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
tei-embedding-service:
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
container_name: tei-embedding-server
ports:
- "6006:80"
volumes:
- "./data:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
# embedding:
# image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
# container_name: embedding-tei-server
# depends_on:
# - tei-embedding-service
# ports:
# - "6000:6000"
# ipc: host
# environment:
# no_proxy: ${no_proxy}
# http_proxy: ${http_proxy}
# https_proxy: ${https_proxy}
# TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
# restart: unless-stopped
retriever:
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
container_name: retriever-redis-server
depends_on:
- redis-vector-db
ports:
- "7000:7000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
REDIS_URL: ${REDIS_URL}
INDEX_NAME: ${INDEX_NAME}
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
restart: unless-stopped
tei-reranking-service:
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
container_name: tei-reranking-server
ports:
- "8808:80"
volumes:
- "./data:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
# reranking:
# image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
# container_name: reranking-tei-xeon-server
# depends_on:
# - tei-reranking-service
# ports:
# - "8000:8000"
# ipc: host
# environment:
# no_proxy: ${no_proxy}
# http_proxy: ${http_proxy}
# https_proxy: ${https_proxy}
# TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
# HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
# HF_HUB_DISABLE_PROGRESS_BARS: 1
# HF_HUB_ENABLE_HF_TRANSFER: 0
# restart: unless-stopped
tgi-service:
image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
container_name: tgi-service
ports:
- "9009:80"
volumes:
- "./data:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
# llm:
# image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
# container_name: llm-tgi-server
# depends_on:
# - tgi-service
# ports:
# - "9000:9000"
# ipc: host
# environment:
# no_proxy: ${no_proxy}
# http_proxy: ${http_proxy}
# https_proxy: ${https_proxy}
# TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
# HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
# HF_HUB_DISABLE_PROGRESS_BARS: 1
# HF_HUB_ENABLE_HF_TRANSFER: 0
# restart: unless-stopped
chaqna-xeon-backend-server:
image: ${REGISTRY:-opea}/chatqna-no-wrapper:${TAG:-latest}
container_name: chatqna-xeon-backend-server
depends_on:
- redis-vector-db
- tei-embedding-service
# - embedding
- dataprep-redis-service
- retriever
- tei-reranking-service
# - reranking
- tgi-service
# - llm
ports:
- "8888:8888"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
- EMBEDDING_SERVER_HOST_IP=${EMBEDDING_SERVER_HOST_IP}
- RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
- RERANK_SERVER_HOST_IP=${RERANK_SERVER_HOST_IP}
- LLM_SERVER_HOST_IP=${LLM_SERVER_HOST_IP}
ipc: host
restart: always
chaqna-xeon-ui-server:
image: ${REGISTRY:-opea}/chatqna-ui:${TAG:-latest}
container_name: chatqna-xeon-ui-server
depends_on:
- chaqna-xeon-backend-server
ports:
- "5173:5173"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- CHAT_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
- UPLOAD_FILE_BASE_URL=${DATAPREP_SERVICE_ENDPOINT}
- GET_FILE=${DATAPREP_GET_FILE_ENDPOINT}
- DELETE_FILE=${DATAPREP_DELETE_FILE_ENDPOINT}
ipc: host
restart: always
networks:
default:
driver: bridge

View File

@@ -38,20 +38,6 @@ services:
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
embedding:
image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
container_name: embedding-tei-server
depends_on:
- tei-embedding-service
ports:
- "6044:6000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
restart: unless-stopped
retriever:
image: ${REGISTRY:-opea}/retriever-qdrant:${TAG:-latest}
container_name: retriever-qdrant-server
@@ -84,23 +70,6 @@ services:
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
reranking:
image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
container_name: reranking-tei-xeon-server
depends_on:
- tei-reranking-service
ports:
- "6046:8000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
restart: unless-stopped
tgi-service:
image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
container_name: tgi-service
@@ -117,35 +86,15 @@ services:
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
llm:
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
container_name: llm-tgi-server
depends_on:
- tgi-service
ports:
- "6047:9000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
restart: unless-stopped
chaqna-xeon-backend-server:
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
container_name: chatqna-xeon-backend-server
depends_on:
- qdrant-vector-db
- tei-embedding-service
- embedding
- retriever
- tei-reranking-service
- reranking
- tgi-service
- llm
ports:
- "8912:8888"
environment:
@@ -153,14 +102,15 @@ services:
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
- EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP}
- EMBEDDING_SERVICE_PORT=${EMBEDDING_SERVICE_PORT}
- EMBEDDING_SERVER_HOST_IP=${EMBEDDING_SERVER_HOST_IP}
- EMBEDDING_SERVER_PORT=${EMBEDDING_SERVER_PORT:-6040}
- RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
- RETRIEVER_SERVICE_PORT=${RETRIEVER_SERVICE_PORT}
- RERANK_SERVICE_HOST_IP=${RERANK_SERVICE_HOST_IP}
- RERANK_SERVICE_PORT=${RERANK_SERVICE_PORT}
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
- LLM_SERVICE_PORT=${LLM_SERVICE_PORT}
- RETRIEVER_SERVICE_PORT=${RETRIEVER_SERVICE_PORT:-6045}
- RERANK_SERVER_HOST_IP=${RERANK_SERVER_HOST_IP}
- RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-6041}
- LLM_SERVER_HOST_IP=${LLM_SERVER_HOST_IP}
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-6042}
- LOGFLAG=${LOGFLAG}
ipc: host
restart: always
chaqna-xeon-ui-server:

View File

@@ -37,23 +37,6 @@ services:
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
embedding:
image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
container_name: embedding-tei-server
depends_on:
- tei-embedding-service
ports:
- "6000:6000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
LANGCHAIN_PROJECT: "opea-embedding-service"
restart: unless-stopped
retriever:
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
container_name: retriever-redis-server
@@ -90,26 +73,6 @@ services:
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
reranking:
image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
container_name: reranking-tei-xeon-server
depends_on:
- tei-reranking-service
ports:
- "8000:8000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
LANGCHAIN_PROJECT: "opea-reranking-service"
restart: unless-stopped
vllm_service:
image: ${REGISTRY:-opea}/vllm:${TAG:-latest}
container_name: vllm-service
@@ -125,39 +88,15 @@ services:
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
LLM_MODEL_ID: ${LLM_MODEL_ID}
command: --model $LLM_MODEL_ID --host 0.0.0.0 --port 80
llm:
image: ${REGISTRY:-opea}/llm-vllm:${TAG:-latest}
container_name: llm-vllm-server
depends_on:
- vllm_service
ports:
- "9000:9000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
vLLM_ENDPOINT: ${vLLM_LLM_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
LLM_MODEL: ${LLM_MODEL_ID}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
LANGCHAIN_PROJECT: "opea-llm-service"
restart: unless-stopped
chaqna-xeon-backend-server:
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
container_name: chatqna-xeon-backend-server
depends_on:
- redis-vector-db
- tei-embedding-service
- embedding
- retriever
- tei-reranking-service
- reranking
- vllm_service
- llm
ports:
- "8888:8888"
environment:
@@ -165,10 +104,14 @@ services:
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
- EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP}
- EMBEDDING_SERVER_HOST_IP=${EMBEDDING_SERVER_HOST_IP}
- EMBEDDING_SERVER_PORT=${EMBEDDING_SERVER_PORT:-6006}
- RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
- RERANK_SERVICE_HOST_IP=${RERANK_SERVICE_HOST_IP}
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
- RERANK_SERVER_HOST_IP=${RERANK_SERVER_HOST_IP}
- RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-8808}
- LLM_SERVER_HOST_IP=${LLM_SERVER_HOST_IP}
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-9009}
- LOGFLAG=${LOGFLAG}
ipc: host
restart: always
chaqna-xeon-ui-server:

View File

@@ -38,20 +38,6 @@ services:
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
embedding:
image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
container_name: embedding-tei-server
depends_on:
- tei-embedding-service
ports:
- "6000:6000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
restart: unless-stopped
retriever:
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
container_name: retriever-redis-server
@@ -85,34 +71,15 @@ services:
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
llm:
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
container_name: llm-tgi-server
depends_on:
- tgi-service
ports:
- "9000:9000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
restart: unless-stopped
chaqna-xeon-backend-server:
image: ${REGISTRY:-opea}/chatqna-without-rerank:${TAG:-latest}
container_name: chatqna-xeon-backend-server
depends_on:
- redis-vector-db
- tei-embedding-service
- embedding
- dataprep-redis-service
- retriever
- tgi-service
- llm
ports:
- "8888:8888"
environment:
@@ -120,9 +87,12 @@ services:
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
- EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP}
- EMBEDDING_SERVER_HOST_IP=${EMBEDDING_SERVER_HOST_IP}
- EMBEDDING_SERVER_PORT=${EMBEDDING_SERVER_PORT:-6006}
- RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
- LLM_SERVER_HOST_IP=${LLM_SERVER_HOST_IP}
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-9009}
- LOGFLAG=${LOGFLAG}
ipc: host
restart: always
chaqna-xeon-ui-server:

View File

@@ -8,17 +8,14 @@ export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006"
export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
export TGI_LLM_ENDPOINT="http://${host_ip}:9009"
export vLLM_LLM_ENDPOINT="http://${host_ip}:9009"
export REDIS_URL="redis://${host_ip}:6379"
export INDEX_NAME="rag-redis"
export REDIS_HOST=${host_ip}
export MEGA_SERVICE_HOST_IP=${host_ip}
export EMBEDDING_SERVICE_HOST_IP=${host_ip}
export EMBEDDING_SERVER_HOST_IP=${host_ip}
export RETRIEVER_SERVICE_HOST_IP=${host_ip}
export RERANK_SERVICE_HOST_IP=${host_ip}
export LLM_SERVICE_HOST_IP=${host_ip}
export RERANK_SERVER_HOST_IP=${host_ip}
export LLM_SERVER_HOST_IP=${host_ip}
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/chatqna"
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_file"

View File

@@ -70,73 +70,19 @@ curl http://${host_ip}:8888/v1/chatqna \
First of all, you need to build Docker Images locally. This step can be ignored after the Docker images published to Docker hub.
### 1. Build Embedding Image
```bash
git clone https://github.com/opea-project/GenAIComps.git
cd GenAIComps
docker build --no-cache -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/tei/langchain/Dockerfile .
```
### 2. Build Retriever Image
### 1. Build Retriever Image
```bash
docker build --no-cache -t opea/retriever-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/redis/langchain/Dockerfile .
```
### 3. Build Rerank Image
> Skip for ChatQnA without Rerank pipeline
```bash
docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/tei/Dockerfile .
```
### 4. Build LLM Image
You can use different LLM serving solutions, choose one of following four options.
#### 4.1 Use TGI
```bash
docker build --no-cache -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/tgi/Dockerfile .
```
#### 4.2 Use VLLM
Build vllm docker.
```bash
docker build --no-cache -t opea/llm-vllm-hpu:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/vllm/langchain/dependency/Dockerfile.intel_hpu .
```
Build microservice docker.
```bash
docker build --no-cache -t opea/llm-vllm:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/vllm/langchain/Dockerfile .
```
#### 4.3 Use VLLM-on-Ray
Build vllm-on-ray docker.
```bash
docker build --no-cache -t opea/llm-vllm-ray-hpu:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/vllm/ray/dependency/Dockerfile .
```
Build microservice docker.
```bash
docker build --no-cache -t opea/llm-vllm-ray:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/vllm/ray/Dockerfile .
```
### 5. Build Dataprep Image
### 2. Build Dataprep Image
```bash
docker build --no-cache -t opea/dataprep-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain/Dockerfile .
```
### 6. Build Guardrails Docker Image (Optional)
### 3. Build Guardrails Docker Image (Optional)
To fortify AI initiatives in production, Guardrails microservice can secure model inputs and outputs, building Trustworthy, Safe, and Secure LLM-based Applications.
@@ -144,7 +90,7 @@ To fortify AI initiatives in production, Guardrails microservice can secure mode
docker build -t opea/guardrails-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/guardrails/llama_guard/langchain/Dockerfile .
```
### 7. Build MegaService Docker Image
### 4. Build MegaService Docker Image
1. MegaService with Rerank
@@ -176,7 +122,7 @@ docker build -t opea/guardrails-tgi:latest --build-arg https_proxy=$https_proxy
docker build --no-cache -t opea/chatqna-without-rerank:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile.without_rerank .
```
### 8. Build UI Docker Image
### 5. Build UI Docker Image
Construct the frontend Docker image using the command below:
@@ -185,7 +131,7 @@ cd GenAIExamples/ChatQnA/ui
docker build --no-cache -t opea/chatqna-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile .
```
### 9. Build Conversational React UI Docker Image (Optional)
### 6. Build Conversational React UI Docker Image (Optional)
Build frontend Docker image that enables Conversational experience with ChatQnA megaservice via below command:
@@ -196,21 +142,18 @@ cd GenAIExamples/ChatQnA/ui
docker build --no-cache -t opea/chatqna-conversation-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile.react .
```
### 10. Build Nginx Docker Image
### 7. Build Nginx Docker Image
```bash
cd GenAIComps
docker build -t opea/nginx:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/nginx/Dockerfile .
```
Then run the command `docker images`, you will have the following 8 Docker Images:
Then run the command `docker images`, you will have the following 5 Docker Images:
- `opea/embedding-tei:latest`
- `opea/retriever-redis:latest`
- `opea/reranking-tei:latest`
- `opea/llm-tgi:latest` or `opea/llm-vllm:latest` or `opea/llm-vllm-ray:latest`
- `opea/dataprep-redis:latest`
- `opea/chatqna:latest` or `opea/chatqna-guardrails:latest` or `opea/chatqna-without-rerank:latest`
- `opea/chatqna:latest`
- `opea/chatqna-ui:latest`
- `opea/nginx:latest`
@@ -338,16 +281,7 @@ For validation details, please refer to [how-to-validate_service](./how_to_valid
-H 'Content-Type: application/json'
```
2. Embedding Microservice
```bash
curl http://${host_ip}:6000/v1/embeddings \
-X POST \
-d '{"text":"hello"}' \
-H 'Content-Type: application/json'
```
3. Retriever Microservice
2. Retriever Microservice
To consume the retriever microservice, you need to generate a mock embedding vector by Python script. The length of embedding vector
is determined by the embedding model.
@@ -363,7 +297,7 @@ For validation details, please refer to [how-to-validate_service](./how_to_valid
-H 'Content-Type: application/json'
```
4. TEI Reranking Service
3. TEI Reranking Service
> Skip for ChatQnA without Rerank pipeline
@@ -374,18 +308,7 @@ For validation details, please refer to [how-to-validate_service](./how_to_valid
-H 'Content-Type: application/json'
```
5. Reranking Microservice
> Skip for ChatQnA without Rerank pipeline
```bash
curl http://${host_ip}:8000/v1/reranking \
-X POST \
-d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \
-H 'Content-Type: application/json'
```
6. LLM backend Service
4. LLM backend Service
In first startup, this service will take more time to download the model files. After it's finished, the service will be ready.
@@ -430,39 +353,7 @@ For validation details, please refer to [how-to-validate_service](./how_to_valid
-d '{"model": "${LLM_MODEL_ID}", "messages": [{"role": "user", "content": "What is Deep Learning?"}]}'
```
7. LLM Microservice
```bash
# TGI service
curl http://${host_ip}:9000/v1/chat/completions\
-X POST \
-d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
-H 'Content-Type: application/json'
```
For parameters in TGI mode, please refer to [HuggingFace InferenceClient API](https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.text_generation) (except we rename "max_new_tokens" to "max_tokens".)
```bash
# vLLM Service
curl http://${host_ip}:9000/v1/chat/completions \
-X POST \
-d '{"query":"What is Deep Learning?","max_tokens":17,"top_p":1,"temperature":0.7,"frequency_penalty":0,"presence_penalty":0, "streaming":false}' \
-H 'Content-Type: application/json'
```
For parameters in vLLM Mode, can refer to [LangChain VLLMOpenAI API](https://api.python.langchain.com/en/latest/llms/langchain_community.llms.vllm.VLLMOpenAI.html)
```bash
# vLLM-on-Ray Service
curl http://${host_ip}:9000/v1/chat/completions \
-X POST \
-d '{"query":"What is Deep Learning?","max_tokens":17,"presence_penalty":1.03","streaming":false}' \
-H 'Content-Type: application/json'
```
For parameters in vLLM-on-Ray mode, can refer to [LangChain ChatOpenAI API](https://python.langchain.com/v0.2/api_reference/openai/chat_models/langchain_openai.chat_models.base.ChatOpenAI.html)
8. MegaService
5. MegaService
```bash
curl http://${host_ip}:8888/v1/chatqna -H "Content-Type: application/json" -d '{
@@ -470,7 +361,7 @@ For validation details, please refer to [how-to-validate_service](./how_to_valid
}'
```
9. Nginx Service
6. Nginx Service
```bash
curl http://${host_ip}:${NGINX_PORT}/v1/chatqna \
@@ -478,7 +369,7 @@ For validation details, please refer to [how-to-validate_service](./how_to_valid
-d '{"messages": "What is the revenue of Nike in 2023?"}'
```
10. Dataprep MicroserviceOptional
7. Dataprep MicroserviceOptional
If you want to update the default knowledge base, you can use the following commands:
@@ -547,7 +438,7 @@ curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \
-H "Content-Type: application/json"
```
10. Guardrails (Optional)
8. Guardrails (Optional)
```bash
curl http://${host_ip}:9090/v1/guardrails\

View File

@@ -39,26 +39,12 @@ services:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HABANA_VISIBLE_DEVICES: ${tei_embedding_devices}
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
MAX_WARMUP_SEQUENCE_LENGTH: 512
INIT_HCCL_ON_ACQUIRE: 0
ENABLE_EXPERIMENTAL_FLAGS: true
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
embedding:
image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
container_name: embedding-tei-server
depends_on:
- tei-embedding-service
ports:
- "6000:6000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
restart: unless-stopped
retriever:
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
container_name: retriever-redis-server
@@ -90,23 +76,6 @@ services:
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
reranking:
image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
container_name: reranking-tei-gaudi-server
depends_on:
- tei-reranking-service
ports:
- "8000:8000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
restart: unless-stopped
tgi-service:
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
container_name: tgi-gaudi-server
@@ -121,7 +90,7 @@ services:
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
HABANA_VISIBLE_DEVICES: ${llm_service_devices}
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
ENABLE_HPU_GRAPH: true
LIMIT_HPU_GRAPH: true
@@ -131,36 +100,16 @@ services:
cap_add:
- SYS_NICE
ipc: host
command: --model-id ${LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048
llm:
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
container_name: llm-tgi-gaudi-server
depends_on:
- tgi-service
ports:
- "9000:9000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
restart: unless-stopped
command: --model-id ${LLM_MODEL_ID} --max-input-length 2048 --max-total-tokens 4096
chaqna-gaudi-backend-server:
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
container_name: chatqna-gaudi-backend-server
depends_on:
- redis-vector-db
- tei-embedding-service
- embedding
- retriever
- tei-reranking-service
- reranking
- tgi-service
- llm
ports:
- "8888:8888"
environment:
@@ -168,10 +117,14 @@ services:
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
- EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP}
- EMBEDDING_SERVER_HOST_IP=${EMBEDDING_SERVER_HOST_IP}
- EMBEDDING_SERVER_PORT=${EMBEDDING_SERVER_PORT:-8090}
- RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
- RERANK_SERVICE_HOST_IP=${RERANK_SERVICE_HOST_IP}
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
- RERANK_SERVER_HOST_IP=${RERANK_SERVER_HOST_IP}
- RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-8808}
- LLM_SERVER_HOST_IP=${LLM_SERVER_HOST_IP}
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-8005}
- LOGFLAG=${LOGFLAG}
ipc: host
restart: always
chaqna-gaudi-ui-server:
@@ -191,25 +144,6 @@ services:
- DELETE_FILE=${DATAPREP_DELETE_FILE_ENDPOINT}
ipc: host
restart: always
chaqna-gaudi-nginx-server:
image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
container_name: chaqna-gaudi-nginx-server
depends_on:
- chaqna-gaudi-backend-server
- chaqna-gaudi-ui-server
ports:
- "${NGINX_PORT:-80}:80"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- FRONTEND_SERVICE_IP=${FRONTEND_SERVICE_IP}
- FRONTEND_SERVICE_PORT=${FRONTEND_SERVICE_PORT}
- BACKEND_SERVICE_NAME=${BACKEND_SERVICE_NAME}
- BACKEND_SERVICE_IP=${BACKEND_SERVICE_IP}
- BACKEND_SERVICE_PORT=${BACKEND_SERVICE_PORT}
ipc: host
restart: always
networks:
default:

View File

@@ -82,20 +82,6 @@ services:
OMPI_MCA_btl_vader_single_copy_mechanism: none
MAX_WARMUP_SEQUENCE_LENGTH: 512
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
embedding:
image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
container_name: embedding-tei-server
depends_on:
- tei-embedding-service
ports:
- "6000:6000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
restart: unless-stopped
retriever:
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
container_name: retriever-redis-server
@@ -127,23 +113,6 @@ services:
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
reranking:
image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
container_name: reranking-tei-gaudi-server
depends_on:
- tei-reranking-service
ports:
- "8000:8000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
restart: unless-stopped
tgi-service:
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
container_name: tgi-gaudi-server
@@ -169,23 +138,6 @@ services:
- SYS_NICE
ipc: host
command: --model-id ${LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048
llm:
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
container_name: llm-tgi-gaudi-server
depends_on:
- tgi-service
ports:
- "9000:9000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
restart: unless-stopped
chaqna-gaudi-backend-server:
image: ${REGISTRY:-opea}/chatqna-guardrails:${TAG:-latest}
container_name: chatqna-gaudi-guardrails-server
@@ -194,12 +146,9 @@ services:
- tgi-guardrails-service
- guardrails
- tei-embedding-service
- embedding
- retriever
- tei-reranking-service
- reranking
- tgi-service
- llm
ports:
- "8888:8888"
environment:
@@ -208,10 +157,15 @@ services:
- http_proxy=${http_proxy}
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
- GUARDRAIL_SERVICE_HOST_IP=${GUARDRAIL_SERVICE_HOST_IP}
- EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP}
- GUARDRAIL_SERVICE_PORT=${GUARDRAIL_SERVICE_PORT:-9090}
- EMBEDDING_SERVER_HOST_IP=${EMBEDDING_SERVER_HOST_IP}
- EMBEDDING_SERVER_PORT=${EMBEDDING_SERVER_PORT:-8090}
- RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
- RERANK_SERVICE_HOST_IP=${RERANK_SERVICE_HOST_IP}
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
- RERANK_SERVER_HOST_IP=${RERANK_SERVER_HOST_IP}
- RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-8808}
- LLM_SERVER_HOST_IP=${LLM_SERVER_HOST_IP}
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-8005}
- LOGFLAG=${LOGFLAG}
ipc: host
restart: always
chaqna-gaudi-ui-server:

View File

@@ -1,201 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
services:
redis-vector-db:
image: redis/redis-stack:7.2.0-v9
container_name: redis-vector-db
ports:
- "6379:6379"
- "8001:8001"
dataprep-redis-service:
image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest}
container_name: dataprep-redis-server
depends_on:
- redis-vector-db
- tei-embedding-service
ports:
- "6007:6007"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
REDIS_URL: ${REDIS_URL}
INDEX_NAME: ${INDEX_NAME}
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
tei-embedding-service:
image: ghcr.io/huggingface/tei-gaudi:latest
container_name: tei-embedding-gaudi-server
ports:
- "8090:80"
volumes:
- "./data:/data"
runtime: habana
cap_add:
- SYS_NICE
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
MAX_WARMUP_SEQUENCE_LENGTH: 512
INIT_HCCL_ON_ACQUIRE: 0
ENABLE_EXPERIMENTAL_FLAGS: true
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
# embedding:
# image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
# container_name: embedding-tei-server
# depends_on:
# - tei-embedding-service
# ports:
# - "6000:6000"
# ipc: host
# environment:
# no_proxy: ${no_proxy}
# http_proxy: ${http_proxy}
# https_proxy: ${https_proxy}
# TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
# restart: unless-stopped
retriever:
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
container_name: retriever-redis-server
depends_on:
- redis-vector-db
ports:
- "7000:7000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
REDIS_URL: ${REDIS_URL}
INDEX_NAME: ${INDEX_NAME}
restart: unless-stopped
tei-reranking-service:
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
container_name: tei-reranking-gaudi-server
ports:
- "8808:80"
volumes:
- "./data:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
# reranking:
# image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
# container_name: reranking-tei-gaudi-server
# depends_on:
# - tei-reranking-service
# ports:
# - "8000:8000"
# ipc: host
# environment:
# no_proxy: ${no_proxy}
# http_proxy: ${http_proxy}
# https_proxy: ${https_proxy}
# TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
# HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
# HF_HUB_DISABLE_PROGRESS_BARS: 1
# HF_HUB_ENABLE_HF_TRANSFER: 0
# restart: unless-stopped
tgi-service:
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
container_name: tgi-gaudi-server
ports:
- "8005:80"
volumes:
- "./data:/data"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
ENABLE_HPU_GRAPH: true
LIMIT_HPU_GRAPH: true
USE_FLASH_ATTENTION: true
FLASH_ATTENTION_RECOMPUTE: true
runtime: habana
cap_add:
- SYS_NICE
ipc: host
command: --model-id ${LLM_MODEL_ID} --max-input-length 2048 --max-total-tokens 4096
# llm:
# image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
# container_name: llm-tgi-gaudi-server
# depends_on:
# - tgi-service
# ports:
# - "9000:9000"
# ipc: host
# environment:
# no_proxy: ${no_proxy}
# http_proxy: ${http_proxy}
# https_proxy: ${https_proxy}
# TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
# HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
# HF_HUB_DISABLE_PROGRESS_BARS: 1
# HF_HUB_ENABLE_HF_TRANSFER: 0
# restart: unless-stopped
chaqna-gaudi-backend-server:
image: ${REGISTRY:-opea}/chatqna-no-wrapper:${TAG:-latest}
container_name: chatqna-gaudi-backend-server
depends_on:
- redis-vector-db
- tei-embedding-service
# - embedding
- retriever
- tei-reranking-service
# - reranking
- tgi-service
# - llm
ports:
- "8888:8888"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
- EMBEDDING_SERVER_HOST_IP=${EMBEDDING_SERVER_HOST_IP}
- EMBEDDING_SERVER_PORT=${EMBEDDING_SERVER_PORT:-8090}
- RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
- RERANK_SERVER_HOST_IP=${RERANK_SERVER_HOST_IP}
- RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-8808}
- LLM_SERVER_HOST_IP=${LLM_SERVER_HOST_IP}
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-8005}
- LOGFLAG=${LOGFLAG}
ipc: host
restart: always
chaqna-gaudi-ui-server:
image: ${REGISTRY:-opea}/chatqna-ui:${TAG:-latest}
container_name: chatqna-gaudi-ui-server
depends_on:
- chaqna-gaudi-backend-server
ports:
- "5173:5173"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- CHAT_BASE_URL=${BACKEND_SERVICE_ENDPOINT}
- UPLOAD_FILE_BASE_URL=${DATAPREP_SERVICE_ENDPOINT}
- GET_FILE=${DATAPREP_GET_FILE_ENDPOINT}
- DELETE_FILE=${DATAPREP_DELETE_FILE_ENDPOINT}
ipc: host
restart: always
networks:
default:
driver: bridge

View File

@@ -43,20 +43,6 @@ services:
OMPI_MCA_btl_vader_single_copy_mechanism: none
MAX_WARMUP_SEQUENCE_LENGTH: 512
command: --model-id ${EMBEDDING_MODEL_ID}
embedding:
image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
container_name: embedding-tei-server
depends_on:
- tei-embedding-service
ports:
- "6000:6000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
restart: unless-stopped
retriever:
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
container_name: retriever-redis-server
@@ -88,23 +74,6 @@ services:
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
reranking:
image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
container_name: reranking-tei-gaudi-server
depends_on:
- tei-reranking-service
ports:
- "8000:8000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
restart: unless-stopped
vllm-service:
image: ${REGISTRY:-opea}/llm-vllm-hpu:${TAG:-latest}
container_name: vllm-gaudi-server
@@ -125,34 +94,15 @@ services:
- SYS_NICE
ipc: host
command: /bin/bash -c "export VLLM_CPU_KVCACHE_SPACE=40 && python3 -m vllm.entrypoints.openai.api_server --enforce-eager --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048"
llm:
image: ${REGISTRY:-opea}/llm-vllm:${TAG:-latest}
container_name: llm-vllm-gaudi-server
depends_on:
- vllm-service
ports:
- "9000:9000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
vLLM_ENDPOINT: ${vLLM_LLM_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
LLM_MODEL: ${LLM_MODEL_ID}
restart: unless-stopped
chaqna-gaudi-backend-server:
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
container_name: chatqna-gaudi-backend-server
depends_on:
- redis-vector-db
- tei-embedding-service
- embedding
- retriever
- tei-reranking-service
- reranking
- vllm-service
- llm
ports:
- "8888:8888"
environment:
@@ -160,11 +110,14 @@ services:
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
- EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP}
- EMBEDDING_SERVER_HOST_IP=${EMBEDDING_SERVER_HOST_IP}
- EMBEDDING_SERVER_PORT=${EMBEDDING_SERVER_PORT:-8090}
- RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
- RERANK_SERVICE_HOST_IP=${RERANK_SERVICE_HOST_IP}
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
- LLM_SERVICE_PORT=${LLM_SERVICE_PORT}
- RERANK_SERVER_HOST_IP=${RERANK_SERVER_HOST_IP}
- RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-8808}
- LLM_SERVER_HOST_IP=${LLM_SERVER_HOST_IP}
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-8007}
- LOGFLAG=${LOGFLAG}
ipc: host
restart: always
chaqna-gaudi-ui-server:

View File

@@ -43,20 +43,6 @@ services:
OMPI_MCA_btl_vader_single_copy_mechanism: none
MAX_WARMUP_SEQUENCE_LENGTH: 512
command: --model-id ${EMBEDDING_MODEL_ID}
embedding:
image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
container_name: embedding-tei-server
depends_on:
- tei-embedding-service
ports:
- "6000:6000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
restart: unless-stopped
retriever:
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
container_name: retriever-redis-server
@@ -88,23 +74,6 @@ services:
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
reranking:
image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
container_name: reranking-tei-gaudi-server
depends_on:
- tei-reranking-service
ports:
- "8000:8000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
restart: unless-stopped
vllm-ray-service:
image: ${REGISTRY:-opea}/llm-vllm-ray-hpu:${TAG:-latest}
container_name: vllm-ray-gaudi-server
@@ -125,34 +94,15 @@ services:
- SYS_NICE
ipc: host
command: /bin/bash -c "ray start --head && python vllm_ray_openai.py --port_number 8000 --model_id_or_path $LLM_MODEL_ID --tensor_parallel_size 2 --enforce_eager True"
llm:
image: ${REGISTRY:-opea}/llm-vllm-ray:${TAG:-latest}
container_name: llm-vllm-ray-gaudi-server
depends_on:
- vllm-ray-service
ports:
- "9000:9000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
vLLM_RAY_ENDPOINT: ${vLLM_RAY_LLM_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
LLM_MODEL: ${LLM_MODEL_ID}
restart: unless-stopped
chaqna-gaudi-backend-server:
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
container_name: chatqna-gaudi-backend-server
depends_on:
- redis-vector-db
- tei-embedding-service
- embedding
- retriever
- tei-reranking-service
- reranking
- vllm-ray-service
- llm
ports:
- "8888:8888"
environment:
@@ -160,11 +110,14 @@ services:
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
- EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP}
- EMBEDDING_SERVER_HOST_IP=${EMBEDDING_SERVER_HOST_IP}
- EMBEDDING_SERVER_PORT=${EMBEDDING_SERVER_PORT:-8090}
- RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
- RERANK_SERVICE_HOST_IP=${RERANK_SERVICE_HOST_IP}
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
- LLM_SERVICE_PORT=${LLM_SERVICE_PORT}
- RERANK_SERVER_HOST_IP=${RERANK_SERVER_HOST_IP}
- RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-8808}
- LLM_SERVER_HOST_IP=${LLM_SERVER_HOST_IP}
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-8006}
- LOGFLAG=${LOGFLAG}
ipc: host
restart: always
chaqna-gaudi-ui-server:

View File

@@ -45,20 +45,6 @@ services:
INIT_HCCL_ON_ACQUIRE: 0
ENABLE_EXPERIMENTAL_FLAGS: true
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
embedding:
image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
container_name: embedding-tei-server
depends_on:
- tei-embedding-service
ports:
- "6000:6000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
restart: unless-stopped
retriever:
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
container_name: retriever-redis-server
@@ -99,33 +85,14 @@ services:
- SYS_NICE
ipc: host
command: --model-id ${LLM_MODEL_ID} --max-input-length 1024 --max-total-tokens 2048
llm:
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
container_name: llm-tgi-gaudi-server
depends_on:
- tgi-service
ports:
- "9000:9000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
restart: unless-stopped
chaqna-gaudi-backend-server:
image: ${REGISTRY:-opea}/chatqna-without-rerank:${TAG:-latest}
container_name: chatqna-gaudi-backend-server
depends_on:
- redis-vector-db
- tei-embedding-service
- embedding
- retriever
- tgi-service
- llm
ports:
- "8888:8888"
environment:
@@ -133,9 +100,12 @@ services:
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
- EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP}
- EMBEDDING_SERVER_HOST_IP=${EMBEDDING_SERVER_HOST_IP}
- EMBEDDING_SERVER_PORT=${EMBEDDING_SERVER_PORT:-8090}
- RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
- LLM_SERVER_HOST_IP=${LLM_SERVER_HOST_IP}
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-8005}
- LOGFLAG=${LOGFLAG}
ipc: host
restart: always
chaqna-gaudi-ui-server:

View File

@@ -26,14 +26,6 @@ The warning messages point out the veriabls are **NOT** set.
```
ubuntu@gaudi-vm:~/GenAIExamples/ChatQnA/docker_compose/intel/hpu/gaudi$ docker compose -f ./compose.yaml up -d
WARN[0000] The "LANGCHAIN_API_KEY" variable is not set. Defaulting to a blank string.
WARN[0000] The "LANGCHAIN_TRACING_V2" variable is not set. Defaulting to a blank string.
WARN[0000] The "LANGCHAIN_API_KEY" variable is not set. Defaulting to a blank string.
WARN[0000] The "LANGCHAIN_TRACING_V2" variable is not set. Defaulting to a blank string.
WARN[0000] The "LANGCHAIN_API_KEY" variable is not set. Defaulting to a blank string.
WARN[0000] The "LANGCHAIN_TRACING_V2" variable is not set. Defaulting to a blank string.
WARN[0000] The "LANGCHAIN_API_KEY" variable is not set. Defaulting to a blank string.
WARN[0000] The "LANGCHAIN_TRACING_V2" variable is not set. Defaulting to a blank string.
WARN[0000] /home/ubuntu/GenAIExamples/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml: `version` is obsolete
```
@@ -172,24 +164,7 @@ This test the embedding service. It sends "What is Deep Learning?" to the embedd
**Note**: The vector dimension are decided by the embedding model and the output value is dependent on model and input data.
### 2 Embedding Microservice
```
curl http://${host_ip}:6000/v1/embeddings\
-X POST \
-d '{"text":"What is Deep Learning?"}' \
-H 'Content-Type: application/json'
```
This test the embedding microservice. In this test, it sends out `What is Deep Learning?` to embedding.
Embedding microservice get input data, call embedding service to embedding data.
Embedding server are with NO state, but microservice keep the state. There is `id` in the output of `Embedding Microservice`.
```
{"id":"e8c85e588a235a4bc4747a23b3a71d8f","text":"What is Deep Learning?","embedding":[0.00030903306,-0.06356524,0.0025720573,-0.012404448,0.050649878, ..., 0.02776986,-0.0246678,0.03999176,0.037477136,-0.006806653,0.02261455,-0.04570737,-0.033122733,0.022785513,0.0160026,-0.021343587,-0.029969815,-0.0049176104]}
```
### 3 Retriever Microservice
### 2 Retriever Microservice
To consume the retriever microservice, you need to generate a mock embedding vector by Python script.
The length of embedding vector is determined by the embedding model.
@@ -212,7 +187,7 @@ The output is retrieved text that relevant to the input data:
```
### 4 TEI Reranking Service
### 3 TEI Reranking Service
Reranking service
@@ -228,24 +203,7 @@ Output is:
It scores the input
### 5 Reranking Microservice
```
curl http://${host_ip}:8000/v1/reranking\
-X POST \
-d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \
-H 'Content-Type: application/json'
```
Here is the output:
```
{"id":"e1eb0e44f56059fc01aa0334b1dac313","query":"Human: Answer the question based only on the following context:\n Deep learning is...\n Question: What is Deep Learning?","max_new_tokens":1024,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}
```
You may notice reranking microservice are with state ('ID' and other meta data), while reranking service are not.
### 6 TGI Service
### 4 TGI Service
```
curl http://${host_ip}:8008/generate \
@@ -277,56 +235,7 @@ and the log shows model warm up, please wait for a while and try it later.
2024-06-05T05:45:27.867833811Z 2024-06-05T05:45:27.867759Z INFO text_generation_router: router/src/main.rs:221: Warming up model
```
### 7 LLM Microservice
```
curl http://${host_ip}:9000/v1/chat/completions\
-X POST \
-d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
-H 'Content-Type: application/json'
```
You will get generated text from LLM:
```
data: b'\n'
data: b'\n'
data: b'Deep'
data: b' learning'
data: b' is'
data: b' a'
data: b' subset'
data: b' of'
data: b' machine'
data: b' learning'
data: b' that'
data: b' uses'
data: b' algorithms'
data: b' to'
data: b' learn'
data: b' from'
data: b' data'
data: [DONE]
```
### 8 MegaService
### 5 MegaService
```
curl http://${host_ip}:8888/v1/chatqna -H "Content-Type: application/json" -d '{

View File

@@ -8,15 +8,13 @@ export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:8090"
export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
export TGI_LLM_ENDPOINT="http://${host_ip}:8005"
export REDIS_URL="redis://${host_ip}:6379"
export INDEX_NAME="rag-redis"
export MEGA_SERVICE_HOST_IP=${host_ip}
export EMBEDDING_SERVICE_HOST_IP=${host_ip}
export EMBEDDING_SERVER_HOST_IP=${host_ip}
export RETRIEVER_SERVICE_HOST_IP=${host_ip}
export RERANK_SERVICE_HOST_IP=${host_ip}
export LLM_SERVICE_HOST_IP=${host_ip}
export RERANK_SERVER_HOST_IP=${host_ip}
export LLM_SERVER_HOST_IP=${host_ip}
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/chatqna"
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_file"

View File

@@ -77,37 +77,19 @@ git clone https://github.com/opea-project/GenAIComps.git
cd GenAIComps
```
### 2. Build Embedding Image
```bash
docker build --no-cache -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/tei/langchain/Dockerfile .
```
### 3. Build Retriever Image
### 2. Build Retriever Image
```bash
docker build --no-cache -t opea/retriever-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/redis/langchain/Dockerfile .
```
### 4. Build Rerank Image
```bash
docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/tei/Dockerfile .
```
### 5. Build LLM Image
```bash
docker build --no-cache -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/tgi/Dockerfile .
```
### 6. Build Dataprep Image
### 3. Build Dataprep Image
```bash
docker build --no-cache -t opea/dataprep-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain/Dockerfile .
```
### 7. Build MegaService Docker Image
### 4. Build MegaService Docker Image
To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `chatqna.py` Python script. Build the MegaService Docker image using the command below:
@@ -118,7 +100,7 @@ docker build --no-cache -t opea/chatqna:latest --build-arg https_proxy=$https_pr
cd ../../..
```
### 8. Build UI Docker Image
### 5. Build UI Docker Image
Construct the frontend Docker image using the command below:
@@ -128,7 +110,7 @@ docker build --no-cache -t opea/chatqna-ui:latest --build-arg https_proxy=$https
cd ../../../..
```
### 9. Build React UI Docker Image (Optional)
### 6. Build React UI Docker Image (Optional)
Construct the frontend Docker image using the command below:
@@ -138,23 +120,20 @@ docker build --no-cache -t opea/chatqna-react-ui:latest --build-arg https_proxy=
cd ../../../..
```
### 10. Build Nginx Docker Image
### 7. Build Nginx Docker Image
```bash
cd GenAIComps
docker build -t opea/nginx:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/nginx/Dockerfile .
```
Then run the command `docker images`, you will have the following 8 Docker Images:
Then run the command `docker images`, you will have the following 5 Docker Images:
1. `opea/embedding-tei:latest`
2. `opea/retriever-redis:latest`
3. `opea/reranking-tei:latest`
4. `opea/llm-tgi:latest`
5. `opea/dataprep-redis:latest`
6. `opea/chatqna:latest`
7. `opea/chatqna-ui:latest` or `opea/chatqna-react-ui:latest`
8. `opea/nginx:latest`
1. `opea/retriever-redis:latest`
2. `opea/dataprep-redis:latest`
3. `opea/chatqna:latest`
4. `opea/chatqna-ui:latest` or `opea/chatqna-react-ui:latest`
5. `opea/nginx:latest`
## 🚀 Start MicroServices and MegaService
@@ -215,16 +194,7 @@ docker compose up -d
-H 'Content-Type: application/json'
```
2. Embedding Microservice
```bash
curl http://${host_ip}:6000/v1/embeddings \
-X POST \
-d '{"text":"hello"}' \
-H 'Content-Type: application/json'
```
3. Retriever Microservice
2. Retriever Microservice
To consume the retriever microservice, you need to generate a mock embedding vector by Python script. The length of embedding vector
is determined by the embedding model.
@@ -240,7 +210,7 @@ docker compose up -d
-H 'Content-Type: application/json'
```
4. TEI Reranking Service
3. TEI Reranking Service
```bash
curl http://${host_ip}:8808/rerank \
@@ -249,16 +219,7 @@ docker compose up -d
-H 'Content-Type: application/json'
```
5. Reranking Microservice
```bash
curl http://${host_ip}:8000/v1/reranking \
-X POST \
-d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \
-H 'Content-Type: application/json'
```
6. TGI Service
4. TGI Service
In first startup, this service will take more time to download the model files. After it's finished, the service will be ready.
@@ -283,16 +244,7 @@ docker compose up -d
-H 'Content-Type: application/json'
```
7. LLM Microservice
```bash
curl http://${host_ip}:9000/v1/chat/completions \
-X POST \
-d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
-H 'Content-Type: application/json'
```
8. MegaService
5. MegaService
```bash
curl http://${host_ip}:8888/v1/chatqna -H "Content-Type: application/json" -d '{
@@ -300,7 +252,7 @@ docker compose up -d
}'
```
9. Nginx Service
6. Nginx Service
```bash
curl http://${host_ip}:${NGINX_PORT}/v1/chatqna \
@@ -308,7 +260,7 @@ docker compose up -d
-d '{"messages": "What is the revenue of Nike in 2023?"}'
```
10. Dataprep MicroserviceOptional
7. Dataprep MicroserviceOptional
If you want to update the default knowledge base, you can use the following commands:

View File

@@ -46,20 +46,6 @@ services:
- driver: nvidia
count: 1
capabilities: [gpu]
embedding:
image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
container_name: embedding-tei-server
depends_on:
- tei-embedding-service
ports:
- "6000:6000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
restart: unless-stopped
retriever:
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
container_name: retriever-redis-server
@@ -98,23 +84,6 @@ services:
- driver: nvidia
count: 1
capabilities: [gpu]
reranking:
image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
container_name: reranking-tei-server
depends_on:
- tei-reranking-service
ports:
- "8000:8000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
restart: unless-stopped
tgi-service:
image: ghcr.io/huggingface/text-generation-inference:2.2.0
container_name: tgi-server
@@ -138,35 +107,15 @@ services:
- driver: nvidia
count: 1
capabilities: [gpu]
llm:
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
container_name: llm-tgi-server
depends_on:
- tgi-service
ports:
- "9000:9000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
restart: unless-stopped
chaqna-backend-server:
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
container_name: chatqna-backend-server
depends_on:
- redis-vector-db
- tei-embedding-service
- embedding
- retriever
- tei-reranking-service
- reranking
- tgi-service
- llm
ports:
- "8888:8888"
environment:

View File

@@ -8,15 +8,9 @@ export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:8090"
export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
export TGI_LLM_ENDPOINT="http://${host_ip}:8008"
export REDIS_URL="redis://${host_ip}:6379"
export INDEX_NAME="rag-redis"
export MEGA_SERVICE_HOST_IP=${host_ip}
export EMBEDDING_SERVICE_HOST_IP=${host_ip}
export RETRIEVER_SERVICE_HOST_IP=${host_ip}
export RERANK_SERVICE_HOST_IP=${host_ip}
export LLM_SERVICE_HOST_IP=${host_ip}
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/chatqna"
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_file"

View File

@@ -23,18 +23,6 @@ services:
dockerfile: ./Dockerfile.without_rerank
extends: chatqna
image: ${REGISTRY:-opea}/chatqna-without-rerank:${TAG:-latest}
chatqna-no-wrapper:
build:
context: ../
dockerfile: ./Dockerfile.no_wrapper
extends: chatqna
image: ${REGISTRY:-opea}/chatqna-no-wrapper:${TAG:-latest}
chatqna-no-wrapper-without-rerank:
build:
context: ../
dockerfile: ./Dockerfile.no_wrapper_without_rerank
extends: chatqna
image: ${REGISTRY:-opea}/chatqna-no-wrapper-without-rerank:${TAG:-latest}
chatqna-ui:
build:
context: ../ui

View File

@@ -16,12 +16,9 @@ The ChatQnA uses the below prebuilt images if you choose a Xeon deployment
- redis-vector-db: redis/redis-stack:7.2.0-v9
- tei_embedding_service: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
- embedding: opea/embedding-tei:latest
- retriever: opea/retriever-redis:latest
- tei_xeon_service: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
- reranking: opea/reranking-tei:latest
- tgi-service: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
- llm: opea/llm-tgi:latest
- chaqna-xeon-backend-server: opea/chatqna:latest
Should you desire to use the Gaudi accelerator, two alternate images are used for the embedding and llm services.

View File

@@ -27,27 +27,6 @@ data:
no_proxy: ""
LOGFLAG: ""
---
# Source: chatqna/charts/embedding-usvc/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: chatqna-embedding-usvc-config
labels:
helm.sh/chart: embedding-usvc-1.0.0
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
data:
TEI_EMBEDDING_ENDPOINT: "http://chatqna-tei"
http_proxy: ""
https_proxy: ""
no_proxy: ""
LOGFLAG: ""
---
# Source: chatqna/charts/guardrails-usvc/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
@@ -72,50 +51,6 @@ data:
https_proxy: ""
no_proxy: ""
---
# Source: chatqna/charts/llm-uservice/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: chatqna-llm-uservice-config
labels:
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
data:
TGI_LLM_ENDPOINT: "http://chatqna-tgi"
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
HF_HOME: "/tmp/.cache/huggingface"
http_proxy: ""
https_proxy: ""
no_proxy: ""
LOGFLAG: ""
---
# Source: chatqna/charts/reranking-usvc/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: chatqna-reranking-usvc-config
labels:
helm.sh/chart: reranking-usvc-1.0.0
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
data:
TEI_RERANKING_ENDPOINT: "http://chatqna-teirerank"
http_proxy: ""
https_proxy: ""
no_proxy: ""
LOGFLAG: ""
---
# Source: chatqna/charts/retriever-usvc/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
@@ -208,7 +143,7 @@ metadata:
app.kubernetes.io/managed-by: Helm
data:
MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B"
PORT: "2080"
PORT: "2083"
HF_TOKEN: "insert-your-huggingface-token-here"
http_proxy: ""
https_proxy: ""
@@ -362,31 +297,6 @@ spec:
app.kubernetes.io/name: data-prep
app.kubernetes.io/instance: chatqna
---
# Source: chatqna/charts/embedding-usvc/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: Service
metadata:
name: chatqna-embedding-usvc
labels:
helm.sh/chart: embedding-usvc-1.0.0
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 6000
targetPort: 6000
protocol: TCP
name: embedding-usvc
selector:
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
---
# Source: chatqna/charts/guardrails-usvc/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
@@ -412,31 +322,6 @@ spec:
app.kubernetes.io/name: guardrails-usvc
app.kubernetes.io/instance: chatqna
---
# Source: chatqna/charts/llm-uservice/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: Service
metadata:
name: chatqna-llm-uservice
labels:
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 9000
targetPort: 9000
protocol: TCP
name: llm-uservice
selector:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
---
# Source: chatqna/charts/redis-vector-db/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
@@ -466,31 +351,6 @@ spec:
app.kubernetes.io/name: redis-vector-db
app.kubernetes.io/instance: chatqna
---
# Source: chatqna/charts/reranking-usvc/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: Service
metadata:
name: chatqna-reranking-usvc
labels:
helm.sh/chart: reranking-usvc-1.0.0
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 8000
targetPort: 8000
protocol: TCP
name: reranking-usvc
selector:
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
---
# Source: chatqna/charts/retriever-usvc/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
@@ -584,7 +444,7 @@ spec:
type: ClusterIP
ports:
- port: 80
targetPort: 2080
targetPort: 2083
protocol: TCP
name: tgi
selector:
@@ -786,39 +646,36 @@ spec:
- name: tmp
emptyDir: {}
---
# Source: chatqna/charts/embedding-usvc/templates/deployment.yaml
# Source: chatqna/charts/redis-vector-db/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-embedding-usvc
name: chatqna-redis-vector-db
labels:
helm.sh/chart: embedding-usvc-1.0.0
app.kubernetes.io/name: embedding-usvc
helm.sh/chart: redis-vector-db-1.0.0
app.kubernetes.io/name: redis-vector-db
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/version: "7.2.0-v9"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/name: redis-vector-db
app.kubernetes.io/instance: chatqna
template:
metadata:
labels:
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/name: redis-vector-db
app.kubernetes.io/instance: chatqna
spec:
securityContext:
{}
containers:
- name: chatqna
envFrom:
- configMapRef:
name: chatqna-embedding-usvc-config
- name: redis-vector-db
securityContext:
allowPrivilegeEscalation: false
capabilities:
@@ -829,38 +686,35 @@ spec:
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "opea/embedding-tei:latest"
image: "redis/redis-stack:7.2.0-v9"
imagePullPolicy: IfNotPresent
ports:
- name: embedding-usvc
containerPort: 6000
protocol: TCP
volumeMounts:
- mountPath: /data
name: data-volume
- mountPath: /redisinsight
name: redisinsight-volume
- mountPath: /tmp
name: tmp
livenessProbe:
failureThreshold: 24
httpGet:
path: v1/health_check
port: embedding-usvc
initialDelaySeconds: 5
periodSeconds: 5
readinessProbe:
httpGet:
path: v1/health_check
port: embedding-usvc
initialDelaySeconds: 5
periodSeconds: 5
ports:
- name: redis-service
containerPort: 6379
protocol: TCP
- name: redis-insight
containerPort: 8001
protocol: TCP
startupProbe:
failureThreshold: 120
httpGet:
path: v1/health_check
port: embedding-usvc
tcpSocket:
port: 6379 # Probe the Redis port
initialDelaySeconds: 5
periodSeconds: 5
failureThreshold: 120
resources:
{}
volumes:
- name: data-volume
emptyDir: {}
- name: redisinsight-volume
emptyDir: {}
- name: tmp
emptyDir: {}
---
@@ -942,234 +796,6 @@ spec:
- name: tmp
emptyDir: {}
---
# Source: chatqna/charts/llm-uservice/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-llm-uservice
labels:
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
template:
metadata:
labels:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
spec:
securityContext:
{}
containers:
- name: chatqna
envFrom:
- configMapRef:
name: chatqna-llm-uservice-config
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: false
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "opea/llm-tgi:latest"
imagePullPolicy: IfNotPresent
ports:
- name: llm-uservice
containerPort: 9000
protocol: TCP
volumeMounts:
- mountPath: /tmp
name: tmp
livenessProbe:
failureThreshold: 24
httpGet:
path: v1/health_check
port: llm-uservice
initialDelaySeconds: 5
periodSeconds: 5
readinessProbe:
httpGet:
path: v1/health_check
port: llm-uservice
initialDelaySeconds: 5
periodSeconds: 5
startupProbe:
failureThreshold: 120
httpGet:
path: v1/health_check
port: llm-uservice
initialDelaySeconds: 5
periodSeconds: 5
resources:
{}
volumes:
- name: tmp
emptyDir: {}
---
# Source: chatqna/charts/redis-vector-db/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-redis-vector-db
labels:
helm.sh/chart: redis-vector-db-1.0.0
app.kubernetes.io/name: redis-vector-db
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "7.2.0-v9"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: redis-vector-db
app.kubernetes.io/instance: chatqna
template:
metadata:
labels:
app.kubernetes.io/name: redis-vector-db
app.kubernetes.io/instance: chatqna
spec:
securityContext:
{}
containers:
- name: redis-vector-db
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "redis/redis-stack:7.2.0-v9"
imagePullPolicy: IfNotPresent
volumeMounts:
- mountPath: /data
name: data-volume
- mountPath: /redisinsight
name: redisinsight-volume
- mountPath: /tmp
name: tmp
ports:
- name: redis-service
containerPort: 6379
protocol: TCP
- name: redis-insight
containerPort: 8001
protocol: TCP
startupProbe:
tcpSocket:
port: 6379 # Probe the Redis port
initialDelaySeconds: 5
periodSeconds: 5
failureThreshold: 120
resources:
{}
volumes:
- name: data-volume
emptyDir: {}
- name: redisinsight-volume
emptyDir: {}
- name: tmp
emptyDir: {}
---
# Source: chatqna/charts/reranking-usvc/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-reranking-usvc
labels:
helm.sh/chart: reranking-usvc-1.0.0
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
template:
metadata:
labels:
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
spec:
securityContext:
{}
containers:
- name: chatqna
envFrom:
- configMapRef:
name: chatqna-reranking-usvc-config
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "opea/reranking-tei:latest"
imagePullPolicy: IfNotPresent
ports:
- name: reranking-usvc
containerPort: 8000
protocol: TCP
volumeMounts:
- mountPath: /tmp
name: tmp
livenessProbe:
failureThreshold: 24
httpGet:
path: v1/health_check
port: reranking-usvc
initialDelaySeconds: 5
periodSeconds: 5
readinessProbe:
httpGet:
path: v1/health_check
port: reranking-usvc
initialDelaySeconds: 5
periodSeconds: 5
startupProbe:
failureThreshold: 120
httpGet:
path: v1/health_check
port: reranking-usvc
initialDelaySeconds: 5
periodSeconds: 5
resources:
{}
volumes:
- name: tmp
emptyDir: {}
---
# Source: chatqna/charts/retriever-usvc/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
@@ -1483,7 +1109,7 @@ spec:
name: tmp
ports:
- name: http
containerPort: 2080
containerPort: 2083
protocol: TCP
livenessProbe:
failureThreshold: 24
@@ -1624,16 +1250,24 @@ spec:
containers:
- name: chatqna
env:
- name: LLM_SERVICE_HOST_IP
value: chatqna-llm-uservice
- name: RERANK_SERVICE_HOST_IP
value: chatqna-reranking-usvc
- name: LLM_SERVER_HOST_IP
value: chatqna-tgi
- name: LLM_SERVER_PORT
value: "2080"
- name: RERANK_SERVER_HOST_IP
value: chatqna-teirerank
- name: RERANK_SERVER_PORT
value: "2082"
- name: RETRIEVER_SERVICE_HOST_IP
value: chatqna-retriever-usvc
- name: EMBEDDING_SERVICE_HOST_IP
value: chatqna-embedding-usvc
- name: EMBEDDING_SERVER_HOST_IP
value: chatqna-tei
- name: EMBEDDING_SERVER_PORT
value: "2081"
- name: GUARDRAIL_SERVICE_HOST_IP
value: chatqna-guardrails-usvc
- name: GUARDRAIL_SERVICE_PORT
value: "9090"
securityContext:
allowPrivilegeEscalation: false
capabilities:

View File

@@ -27,71 +27,6 @@ data:
no_proxy: ""
LOGFLAG: ""
---
# Source: chatqna/charts/embedding-usvc/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: chatqna-embedding-usvc-config
labels:
helm.sh/chart: embedding-usvc-1.0.0
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
data:
TEI_EMBEDDING_ENDPOINT: "http://chatqna-tei"
http_proxy: ""
https_proxy: ""
no_proxy: ""
LOGFLAG: ""
---
# Source: chatqna/charts/llm-uservice/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: chatqna-llm-uservice-config
labels:
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
data:
TGI_LLM_ENDPOINT: "http://chatqna-tgi"
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
HF_HOME: "/tmp/.cache/huggingface"
http_proxy: ""
https_proxy: ""
no_proxy: ""
LOGFLAG: ""
---
# Source: chatqna/charts/reranking-usvc/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: chatqna-reranking-usvc-config
labels:
helm.sh/chart: reranking-usvc-1.0.0
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
data:
TEI_RERANKING_ENDPOINT: "http://chatqna-teirerank"
http_proxy: ""
https_proxy: ""
no_proxy: ""
LOGFLAG: ""
---
# Source: chatqna/charts/retriever-usvc/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
@@ -312,56 +247,6 @@ spec:
app.kubernetes.io/name: data-prep
app.kubernetes.io/instance: chatqna
---
# Source: chatqna/charts/embedding-usvc/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: Service
metadata:
name: chatqna-embedding-usvc
labels:
helm.sh/chart: embedding-usvc-1.0.0
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 6000
targetPort: 6000
protocol: TCP
name: embedding-usvc
selector:
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
---
# Source: chatqna/charts/llm-uservice/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: Service
metadata:
name: chatqna-llm-uservice
labels:
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 9000
targetPort: 9000
protocol: TCP
name: llm-uservice
selector:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
---
# Source: chatqna/charts/redis-vector-db/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
@@ -391,31 +276,6 @@ spec:
app.kubernetes.io/name: redis-vector-db
app.kubernetes.io/instance: chatqna
---
# Source: chatqna/charts/reranking-usvc/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: Service
metadata:
name: chatqna-reranking-usvc
labels:
helm.sh/chart: reranking-usvc-1.0.0
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 8000
targetPort: 8000
protocol: TCP
name: reranking-usvc
selector:
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
---
# Source: chatqna/charts/retriever-usvc/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
@@ -686,162 +546,6 @@ spec:
- name: tmp
emptyDir: {}
---
# Source: chatqna/charts/embedding-usvc/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-embedding-usvc
labels:
helm.sh/chart: embedding-usvc-1.0.0
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
template:
metadata:
labels:
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
spec:
securityContext:
{}
containers:
- name: chatqna
envFrom:
- configMapRef:
name: chatqna-embedding-usvc-config
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "opea/embedding-tei:latest"
imagePullPolicy: IfNotPresent
ports:
- name: embedding-usvc
containerPort: 6000
protocol: TCP
volumeMounts:
- mountPath: /tmp
name: tmp
livenessProbe:
failureThreshold: 24
httpGet:
path: v1/health_check
port: embedding-usvc
initialDelaySeconds: 5
periodSeconds: 5
readinessProbe:
httpGet:
path: v1/health_check
port: embedding-usvc
initialDelaySeconds: 5
periodSeconds: 5
startupProbe:
failureThreshold: 120
httpGet:
path: v1/health_check
port: embedding-usvc
initialDelaySeconds: 5
periodSeconds: 5
resources:
{}
volumes:
- name: tmp
emptyDir: {}
---
# Source: chatqna/charts/llm-uservice/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-llm-uservice
labels:
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
template:
metadata:
labels:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
spec:
securityContext:
{}
containers:
- name: chatqna
envFrom:
- configMapRef:
name: chatqna-llm-uservice-config
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: false
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "opea/llm-tgi:latest"
imagePullPolicy: IfNotPresent
ports:
- name: llm-uservice
containerPort: 9000
protocol: TCP
volumeMounts:
- mountPath: /tmp
name: tmp
livenessProbe:
failureThreshold: 24
httpGet:
path: v1/health_check
port: llm-uservice
initialDelaySeconds: 5
periodSeconds: 5
readinessProbe:
httpGet:
path: v1/health_check
port: llm-uservice
initialDelaySeconds: 5
periodSeconds: 5
startupProbe:
failureThreshold: 120
httpGet:
path: v1/health_check
port: llm-uservice
initialDelaySeconds: 5
periodSeconds: 5
resources:
{}
volumes:
- name: tmp
emptyDir: {}
---
# Source: chatqna/charts/redis-vector-db/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
@@ -914,84 +618,6 @@ spec:
- name: tmp
emptyDir: {}
---
# Source: chatqna/charts/reranking-usvc/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-reranking-usvc
labels:
helm.sh/chart: reranking-usvc-1.0.0
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
template:
metadata:
labels:
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
spec:
securityContext:
{}
containers:
- name: chatqna
envFrom:
- configMapRef:
name: chatqna-reranking-usvc-config
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "opea/reranking-tei:latest"
imagePullPolicy: IfNotPresent
ports:
- name: reranking-usvc
containerPort: 8000
protocol: TCP
volumeMounts:
- mountPath: /tmp
name: tmp
livenessProbe:
failureThreshold: 24
httpGet:
path: v1/health_check
port: reranking-usvc
initialDelaySeconds: 5
periodSeconds: 5
readinessProbe:
httpGet:
path: v1/health_check
port: reranking-usvc
initialDelaySeconds: 5
periodSeconds: 5
startupProbe:
failureThreshold: 120
httpGet:
path: v1/health_check
port: reranking-usvc
initialDelaySeconds: 5
periodSeconds: 5
resources:
{}
volumes:
- name: tmp
emptyDir: {}
---
# Source: chatqna/charts/retriever-usvc/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
@@ -1366,16 +992,20 @@ spec:
containers:
- name: chatqna
env:
- name: LLM_SERVICE_HOST_IP
value: chatqna-llm-uservice
- name: RERANK_SERVICE_HOST_IP
value: chatqna-reranking-usvc
- name: LLM_SERVER_HOST_IP
value: chatqna-tgi
- name: LLM_SERVER_PORT
value: "2080"
- name: RERANK_SERVER_HOST_IP
value: chatqna-teirerank
- name: RERANK_SERVER_PORT
value: "2082"
- name: RETRIEVER_SERVICE_HOST_IP
value: chatqna-retriever-usvc
- name: EMBEDDING_SERVICE_HOST_IP
value: chatqna-embedding-usvc
- name: GUARDRAIL_SERVICE_HOST_IP
value: chatqna-guardrails-usvc
- name: EMBEDDING_SERVER_HOST_IP
value: chatqna-tei
- name: EMBEDDING_SERVER_PORT
value: "2081"
securityContext:
allowPrivilegeEscalation: false
capabilities:

View File

@@ -27,71 +27,6 @@ data:
no_proxy: ""
LOGFLAG: ""
---
# Source: chatqna/charts/embedding-usvc/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: chatqna-embedding-usvc-config
labels:
helm.sh/chart: embedding-usvc-1.0.0
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
data:
TEI_EMBEDDING_ENDPOINT: "http://chatqna-tei"
http_proxy: ""
https_proxy: ""
no_proxy: ""
LOGFLAG: ""
---
# Source: chatqna/charts/llm-uservice/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: chatqna-llm-uservice-config
labels:
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
data:
TGI_LLM_ENDPOINT: "http://chatqna-tgi"
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
HF_HOME: "/tmp/.cache/huggingface"
http_proxy: ""
https_proxy: ""
no_proxy: ""
LOGFLAG: ""
---
# Source: chatqna/charts/reranking-usvc/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: chatqna-reranking-usvc-config
labels:
helm.sh/chart: reranking-usvc-1.0.0
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
data:
TEI_RERANKING_ENDPOINT: "http://chatqna-teirerank"
http_proxy: ""
https_proxy: ""
no_proxy: ""
LOGFLAG: ""
---
# Source: chatqna/charts/retriever-usvc/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
@@ -313,56 +248,6 @@ spec:
app.kubernetes.io/name: data-prep
app.kubernetes.io/instance: chatqna
---
# Source: chatqna/charts/embedding-usvc/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: Service
metadata:
name: chatqna-embedding-usvc
labels:
helm.sh/chart: embedding-usvc-1.0.0
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 6000
targetPort: 6000
protocol: TCP
name: embedding-usvc
selector:
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
---
# Source: chatqna/charts/llm-uservice/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: Service
metadata:
name: chatqna-llm-uservice
labels:
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 9000
targetPort: 9000
protocol: TCP
name: llm-uservice
selector:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
---
# Source: chatqna/charts/redis-vector-db/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
@@ -392,31 +277,6 @@ spec:
app.kubernetes.io/name: redis-vector-db
app.kubernetes.io/instance: chatqna
---
# Source: chatqna/charts/reranking-usvc/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: Service
metadata:
name: chatqna-reranking-usvc
labels:
helm.sh/chart: reranking-usvc-1.0.0
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 8000
targetPort: 8000
protocol: TCP
name: reranking-usvc
selector:
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
---
# Source: chatqna/charts/retriever-usvc/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
@@ -687,162 +547,6 @@ spec:
- name: tmp
emptyDir: {}
---
# Source: chatqna/charts/embedding-usvc/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-embedding-usvc
labels:
helm.sh/chart: embedding-usvc-1.0.0
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
template:
metadata:
labels:
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
spec:
securityContext:
{}
containers:
- name: chatqna
envFrom:
- configMapRef:
name: chatqna-embedding-usvc-config
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "opea/embedding-tei:latest"
imagePullPolicy: IfNotPresent
ports:
- name: embedding-usvc
containerPort: 6000
protocol: TCP
volumeMounts:
- mountPath: /tmp
name: tmp
livenessProbe:
failureThreshold: 24
httpGet:
path: v1/health_check
port: embedding-usvc
initialDelaySeconds: 5
periodSeconds: 5
readinessProbe:
httpGet:
path: v1/health_check
port: embedding-usvc
initialDelaySeconds: 5
periodSeconds: 5
startupProbe:
failureThreshold: 120
httpGet:
path: v1/health_check
port: embedding-usvc
initialDelaySeconds: 5
periodSeconds: 5
resources:
{}
volumes:
- name: tmp
emptyDir: {}
---
# Source: chatqna/charts/llm-uservice/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-llm-uservice
labels:
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
template:
metadata:
labels:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
spec:
securityContext:
{}
containers:
- name: chatqna
envFrom:
- configMapRef:
name: chatqna-llm-uservice-config
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: false
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "opea/llm-tgi:latest"
imagePullPolicy: IfNotPresent
ports:
- name: llm-uservice
containerPort: 9000
protocol: TCP
volumeMounts:
- mountPath: /tmp
name: tmp
livenessProbe:
failureThreshold: 24
httpGet:
path: v1/health_check
port: llm-uservice
initialDelaySeconds: 5
periodSeconds: 5
readinessProbe:
httpGet:
path: v1/health_check
port: llm-uservice
initialDelaySeconds: 5
periodSeconds: 5
startupProbe:
failureThreshold: 120
httpGet:
path: v1/health_check
port: llm-uservice
initialDelaySeconds: 5
periodSeconds: 5
resources:
{}
volumes:
- name: tmp
emptyDir: {}
---
# Source: chatqna/charts/redis-vector-db/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
@@ -915,84 +619,6 @@ spec:
- name: tmp
emptyDir: {}
---
# Source: chatqna/charts/reranking-usvc/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-reranking-usvc
labels:
helm.sh/chart: reranking-usvc-1.0.0
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
template:
metadata:
labels:
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
spec:
securityContext:
{}
containers:
- name: chatqna
envFrom:
- configMapRef:
name: chatqna-reranking-usvc-config
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "opea/reranking-tei:latest"
imagePullPolicy: IfNotPresent
ports:
- name: reranking-usvc
containerPort: 8000
protocol: TCP
volumeMounts:
- mountPath: /tmp
name: tmp
livenessProbe:
failureThreshold: 24
httpGet:
path: v1/health_check
port: reranking-usvc
initialDelaySeconds: 5
periodSeconds: 5
readinessProbe:
httpGet:
path: v1/health_check
port: reranking-usvc
initialDelaySeconds: 5
periodSeconds: 5
startupProbe:
failureThreshold: 120
httpGet:
path: v1/health_check
port: reranking-usvc
initialDelaySeconds: 5
periodSeconds: 5
resources:
{}
volumes:
- name: tmp
emptyDir: {}
---
# Source: chatqna/charts/retriever-usvc/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
@@ -1369,16 +995,20 @@ spec:
containers:
- name: chatqna
env:
- name: LLM_SERVICE_HOST_IP
value: chatqna-llm-uservice
- name: RERANK_SERVICE_HOST_IP
value: chatqna-reranking-usvc
- name: LLM_SERVER_HOST_IP
value: chatqna-tgi
- name: LLM_SERVER_PORT
value: "2080"
- name: RERANK_SERVER_HOST_IP
value: chatqna-teirerank
- name: RERANK_SERVER_PORT
value: "2082"
- name: RETRIEVER_SERVICE_HOST_IP
value: chatqna-retriever-usvc
- name: EMBEDDING_SERVICE_HOST_IP
value: chatqna-embedding-usvc
- name: GUARDRAIL_SERVICE_HOST_IP
value: chatqna-guardrails-usvc
- name: EMBEDDING_SERVER_HOST_IP
value: chatqna-tei
- name: EMBEDDING_SERVER_PORT
value: "2081"
securityContext:
allowPrivilegeEscalation: false
capabilities:

View File

@@ -27,27 +27,6 @@ data:
no_proxy: ""
LOGFLAG: ""
---
# Source: chatqna/charts/embedding-usvc/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: chatqna-embedding-usvc-config
labels:
helm.sh/chart: embedding-usvc-1.0.0
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
data:
TEI_EMBEDDING_ENDPOINT: "http://chatqna-tei"
http_proxy: ""
https_proxy: ""
no_proxy: ""
LOGFLAG: ""
---
# Source: chatqna/charts/guardrails-usvc/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
@@ -72,50 +51,6 @@ data:
https_proxy: ""
no_proxy: ""
---
# Source: chatqna/charts/llm-uservice/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: chatqna-llm-uservice-config
labels:
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
data:
TGI_LLM_ENDPOINT: "http://chatqna-tgi"
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
HF_HOME: "/tmp/.cache/huggingface"
http_proxy: ""
https_proxy: ""
no_proxy: ""
LOGFLAG: ""
---
# Source: chatqna/charts/reranking-usvc/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: chatqna-reranking-usvc-config
labels:
helm.sh/chart: reranking-usvc-1.0.0
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
data:
TEI_RERANKING_ENDPOINT: "http://chatqna-teirerank"
http_proxy: ""
https_proxy: ""
no_proxy: ""
LOGFLAG: ""
---
# Source: chatqna/charts/retriever-usvc/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
@@ -208,7 +143,7 @@ metadata:
app.kubernetes.io/managed-by: Helm
data:
MODEL_ID: "meta-llama/Meta-Llama-Guard-2-8B"
PORT: "2080"
PORT: "2083"
HF_TOKEN: "insert-your-huggingface-token-here"
http_proxy: ""
https_proxy: ""
@@ -364,31 +299,6 @@ spec:
app.kubernetes.io/name: data-prep
app.kubernetes.io/instance: chatqna
---
# Source: chatqna/charts/embedding-usvc/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: Service
metadata:
name: chatqna-embedding-usvc
labels:
helm.sh/chart: embedding-usvc-1.0.0
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 6000
targetPort: 6000
protocol: TCP
name: embedding-usvc
selector:
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
---
# Source: chatqna/charts/guardrails-usvc/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
@@ -414,31 +324,6 @@ spec:
app.kubernetes.io/name: guardrails-usvc
app.kubernetes.io/instance: chatqna
---
# Source: chatqna/charts/llm-uservice/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: Service
metadata:
name: chatqna-llm-uservice
labels:
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 9000
targetPort: 9000
protocol: TCP
name: llm-uservice
selector:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
---
# Source: chatqna/charts/redis-vector-db/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
@@ -468,31 +353,6 @@ spec:
app.kubernetes.io/name: redis-vector-db
app.kubernetes.io/instance: chatqna
---
# Source: chatqna/charts/reranking-usvc/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: Service
metadata:
name: chatqna-reranking-usvc
labels:
helm.sh/chart: reranking-usvc-1.0.0
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 8000
targetPort: 8000
protocol: TCP
name: reranking-usvc
selector:
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
---
# Source: chatqna/charts/retriever-usvc/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
@@ -586,7 +446,7 @@ spec:
type: ClusterIP
ports:
- port: 80
targetPort: 2080
targetPort: 2083
protocol: TCP
name: tgi
selector:
@@ -788,84 +648,6 @@ spec:
- name: tmp
emptyDir: {}
---
# Source: chatqna/charts/embedding-usvc/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-embedding-usvc
labels:
helm.sh/chart: embedding-usvc-1.0.0
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
template:
metadata:
labels:
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
spec:
securityContext:
{}
containers:
- name: chatqna
envFrom:
- configMapRef:
name: chatqna-embedding-usvc-config
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "opea/embedding-tei:latest"
imagePullPolicy: IfNotPresent
ports:
- name: embedding-usvc
containerPort: 6000
protocol: TCP
volumeMounts:
- mountPath: /tmp
name: tmp
livenessProbe:
failureThreshold: 24
httpGet:
path: v1/health_check
port: embedding-usvc
initialDelaySeconds: 5
periodSeconds: 5
readinessProbe:
httpGet:
path: v1/health_check
port: embedding-usvc
initialDelaySeconds: 5
periodSeconds: 5
startupProbe:
failureThreshold: 120
httpGet:
path: v1/health_check
port: embedding-usvc
initialDelaySeconds: 5
periodSeconds: 5
resources:
{}
volumes:
- name: tmp
emptyDir: {}
---
# Source: chatqna/charts/guardrails-usvc/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
@@ -944,84 +726,6 @@ spec:
- name: tmp
emptyDir: {}
---
# Source: chatqna/charts/llm-uservice/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-llm-uservice
labels:
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
template:
metadata:
labels:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
spec:
securityContext:
{}
containers:
- name: chatqna
envFrom:
- configMapRef:
name: chatqna-llm-uservice-config
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: false
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "opea/llm-tgi:latest"
imagePullPolicy: IfNotPresent
ports:
- name: llm-uservice
containerPort: 9000
protocol: TCP
volumeMounts:
- mountPath: /tmp
name: tmp
livenessProbe:
failureThreshold: 24
httpGet:
path: v1/health_check
port: llm-uservice
initialDelaySeconds: 5
periodSeconds: 5
readinessProbe:
httpGet:
path: v1/health_check
port: llm-uservice
initialDelaySeconds: 5
periodSeconds: 5
startupProbe:
failureThreshold: 120
httpGet:
path: v1/health_check
port: llm-uservice
initialDelaySeconds: 5
periodSeconds: 5
resources:
{}
volumes:
- name: tmp
emptyDir: {}
---
# Source: chatqna/charts/redis-vector-db/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
@@ -1094,84 +798,6 @@ spec:
- name: tmp
emptyDir: {}
---
# Source: chatqna/charts/reranking-usvc/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-reranking-usvc
labels:
helm.sh/chart: reranking-usvc-1.0.0
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
template:
metadata:
labels:
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
spec:
securityContext:
{}
containers:
- name: chatqna
envFrom:
- configMapRef:
name: chatqna-reranking-usvc-config
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "opea/reranking-tei:latest"
imagePullPolicy: IfNotPresent
ports:
- name: reranking-usvc
containerPort: 8000
protocol: TCP
volumeMounts:
- mountPath: /tmp
name: tmp
livenessProbe:
failureThreshold: 24
httpGet:
path: v1/health_check
port: reranking-usvc
initialDelaySeconds: 5
periodSeconds: 5
readinessProbe:
httpGet:
path: v1/health_check
port: reranking-usvc
initialDelaySeconds: 5
periodSeconds: 5
startupProbe:
failureThreshold: 120
httpGet:
path: v1/health_check
port: reranking-usvc
initialDelaySeconds: 5
periodSeconds: 5
resources:
{}
volumes:
- name: tmp
emptyDir: {}
---
# Source: chatqna/charts/retriever-usvc/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
@@ -1486,7 +1112,7 @@ spec:
name: tmp
ports:
- name: http
containerPort: 2080
containerPort: 2083
protocol: TCP
livenessProbe:
failureThreshold: 24
@@ -1629,16 +1255,24 @@ spec:
containers:
- name: chatqna
env:
- name: LLM_SERVICE_HOST_IP
value: chatqna-llm-uservice
- name: RERANK_SERVICE_HOST_IP
value: chatqna-reranking-usvc
- name: LLM_SERVER_HOST_IP
value: chatqna-tgi
- name: LLM_SERVER_PORT
value: "2080"
- name: RERANK_SERVER_HOST_IP
value: chatqna-teirerank
- name: RERANK_SERVER_PORT
value: "2082"
- name: RETRIEVER_SERVICE_HOST_IP
value: chatqna-retriever-usvc
- name: EMBEDDING_SERVICE_HOST_IP
value: chatqna-embedding-usvc
- name: EMBEDDING_SERVER_HOST_IP
value: chatqna-tei
- name: EMBEDDING_SERVER_PORT
value: "2081"
- name: GUARDRAIL_SERVICE_HOST_IP
value: chatqna-guardrails-usvc
- name: GUARDRAIL_SERVICE_PORT
value: "9090"
securityContext:
allowPrivilegeEscalation: false
capabilities:

View File

@@ -27,71 +27,6 @@ data:
no_proxy: ""
LOGFLAG: ""
---
# Source: chatqna/charts/embedding-usvc/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: chatqna-embedding-usvc-config
labels:
helm.sh/chart: embedding-usvc-1.0.0
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
data:
TEI_EMBEDDING_ENDPOINT: "http://chatqna-tei"
http_proxy: ""
https_proxy: ""
no_proxy: ""
LOGFLAG: ""
---
# Source: chatqna/charts/llm-uservice/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: chatqna-llm-uservice-config
labels:
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
data:
TGI_LLM_ENDPOINT: "http://chatqna-tgi"
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
HF_HOME: "/tmp/.cache/huggingface"
http_proxy: ""
https_proxy: ""
no_proxy: ""
LOGFLAG: ""
---
# Source: chatqna/charts/reranking-usvc/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: ConfigMap
metadata:
name: chatqna-reranking-usvc-config
labels:
helm.sh/chart: reranking-usvc-1.0.0
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
data:
TEI_RERANKING_ENDPOINT: "http://chatqna-teirerank"
http_proxy: ""
https_proxy: ""
no_proxy: ""
LOGFLAG: ""
---
# Source: chatqna/charts/retriever-usvc/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
@@ -313,56 +248,6 @@ spec:
app.kubernetes.io/name: data-prep
app.kubernetes.io/instance: chatqna
---
# Source: chatqna/charts/embedding-usvc/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: Service
metadata:
name: chatqna-embedding-usvc
labels:
helm.sh/chart: embedding-usvc-1.0.0
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 6000
targetPort: 6000
protocol: TCP
name: embedding-usvc
selector:
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
---
# Source: chatqna/charts/llm-uservice/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: Service
metadata:
name: chatqna-llm-uservice
labels:
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 9000
targetPort: 9000
protocol: TCP
name: llm-uservice
selector:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
---
# Source: chatqna/charts/redis-vector-db/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
@@ -392,31 +277,6 @@ spec:
app.kubernetes.io/name: redis-vector-db
app.kubernetes.io/instance: chatqna
---
# Source: chatqna/charts/reranking-usvc/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: v1
kind: Service
metadata:
name: chatqna-reranking-usvc
labels:
helm.sh/chart: reranking-usvc-1.0.0
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 8000
targetPort: 8000
protocol: TCP
name: reranking-usvc
selector:
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
---
# Source: chatqna/charts/retriever-usvc/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
@@ -687,162 +547,6 @@ spec:
- name: tmp
emptyDir: {}
---
# Source: chatqna/charts/embedding-usvc/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-embedding-usvc
labels:
helm.sh/chart: embedding-usvc-1.0.0
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
template:
metadata:
labels:
app.kubernetes.io/name: embedding-usvc
app.kubernetes.io/instance: chatqna
spec:
securityContext:
{}
containers:
- name: chatqna
envFrom:
- configMapRef:
name: chatqna-embedding-usvc-config
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "opea/embedding-tei:latest"
imagePullPolicy: IfNotPresent
ports:
- name: embedding-usvc
containerPort: 6000
protocol: TCP
volumeMounts:
- mountPath: /tmp
name: tmp
livenessProbe:
failureThreshold: 24
httpGet:
path: v1/health_check
port: embedding-usvc
initialDelaySeconds: 5
periodSeconds: 5
readinessProbe:
httpGet:
path: v1/health_check
port: embedding-usvc
initialDelaySeconds: 5
periodSeconds: 5
startupProbe:
failureThreshold: 120
httpGet:
path: v1/health_check
port: embedding-usvc
initialDelaySeconds: 5
periodSeconds: 5
resources:
{}
volumes:
- name: tmp
emptyDir: {}
---
# Source: chatqna/charts/llm-uservice/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-llm-uservice
labels:
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
template:
metadata:
labels:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: chatqna
spec:
securityContext:
{}
containers:
- name: chatqna
envFrom:
- configMapRef:
name: chatqna-llm-uservice-config
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: false
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "opea/llm-tgi:latest"
imagePullPolicy: IfNotPresent
ports:
- name: llm-uservice
containerPort: 9000
protocol: TCP
volumeMounts:
- mountPath: /tmp
name: tmp
livenessProbe:
failureThreshold: 24
httpGet:
path: v1/health_check
port: llm-uservice
initialDelaySeconds: 5
periodSeconds: 5
readinessProbe:
httpGet:
path: v1/health_check
port: llm-uservice
initialDelaySeconds: 5
periodSeconds: 5
startupProbe:
failureThreshold: 120
httpGet:
path: v1/health_check
port: llm-uservice
initialDelaySeconds: 5
periodSeconds: 5
resources:
{}
volumes:
- name: tmp
emptyDir: {}
---
# Source: chatqna/charts/redis-vector-db/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
@@ -915,84 +619,6 @@ spec:
- name: tmp
emptyDir: {}
---
# Source: chatqna/charts/reranking-usvc/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
apiVersion: apps/v1
kind: Deployment
metadata:
name: chatqna-reranking-usvc
labels:
helm.sh/chart: reranking-usvc-1.0.0
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
template:
metadata:
labels:
app.kubernetes.io/name: reranking-usvc
app.kubernetes.io/instance: chatqna
spec:
securityContext:
{}
containers:
- name: chatqna
envFrom:
- configMapRef:
name: chatqna-reranking-usvc-config
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "opea/reranking-tei:latest"
imagePullPolicy: IfNotPresent
ports:
- name: reranking-usvc
containerPort: 8000
protocol: TCP
volumeMounts:
- mountPath: /tmp
name: tmp
livenessProbe:
failureThreshold: 24
httpGet:
path: v1/health_check
port: reranking-usvc
initialDelaySeconds: 5
periodSeconds: 5
readinessProbe:
httpGet:
path: v1/health_check
port: reranking-usvc
initialDelaySeconds: 5
periodSeconds: 5
startupProbe:
failureThreshold: 120
httpGet:
path: v1/health_check
port: reranking-usvc
initialDelaySeconds: 5
periodSeconds: 5
resources:
{}
volumes:
- name: tmp
emptyDir: {}
---
# Source: chatqna/charts/retriever-usvc/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
@@ -1369,16 +995,20 @@ spec:
containers:
- name: chatqna
env:
- name: LLM_SERVICE_HOST_IP
value: chatqna-llm-uservice
- name: RERANK_SERVICE_HOST_IP
value: chatqna-reranking-usvc
- name: LLM_SERVER_HOST_IP
value: chatqna-tgi
- name: LLM_SERVER_PORT
value: "2080"
- name: RERANK_SERVER_HOST_IP
value: chatqna-teirerank
- name: RERANK_SERVER_PORT
value: "2082"
- name: RETRIEVER_SERVICE_HOST_IP
value: chatqna-retriever-usvc
- name: EMBEDDING_SERVICE_HOST_IP
value: chatqna-embedding-usvc
- name: GUARDRAIL_SERVICE_HOST_IP
value: chatqna-guardrails-usvc
- name: EMBEDDING_SERVER_HOST_IP
value: chatqna-tei
- name: EMBEDDING_SERVER_PORT
value: "2081"
securityContext:
allowPrivilegeEscalation: false
capabilities:

View File

@@ -19,7 +19,7 @@ function build_docker_images() {
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="chatqna-guardrails chatqna-ui dataprep-redis embedding-tei retriever-redis reranking-tei llm-tgi guardrails-tgi"
service_list="chatqna-guardrails chatqna-ui dataprep-redis retriever-redis guardrails-tgi"
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
@@ -35,17 +35,19 @@ function start_services() {
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:8090"
export TEI_RERANKING_ENDPOINT="http://${ip_address}:8808"
export TGI_LLM_ENDPOINT="http://${ip_address}:8008"
export REDIS_URL="redis://${ip_address}:6379"
export INDEX_NAME="rag-redis"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export MEGA_SERVICE_HOST_IP=${ip_address}
export EMBEDDING_SERVICE_HOST_IP=${ip_address}
export EMBEDDING_SERVER_HOST_IP=${ip_address}
export RETRIEVER_SERVICE_HOST_IP=${ip_address}
export RERANK_SERVICE_HOST_IP=${ip_address}
export LLM_SERVICE_HOST_IP=${ip_address}
export RERANK_SERVER_HOST_IP=${ip_address}
export LLM_SERVER_HOST_IP=${ip_address}
export GUARDRAIL_SERVICE_HOST_IP=${ip_address}
export EMBEDDING_SERVER_PORT=8090
export RERANK_SERVER_PORT=8808
export LLM_SERVER_PORT=8008
export GUARDRAIL_SERVICE_PORT=9090
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/chatqna"
export DATAPREP_SERVICE_ENDPOINT="http://${ip_address}:6007/v1/dataprep"
export GURADRAILS_MODEL_ID="meta-llama/Meta-Llama-Guard-2-8B"
@@ -120,14 +122,6 @@ function validate_microservices() {
"tei-embedding-gaudi-server" \
'{"inputs":"What is Deep Learning?"}'
# embedding microservice
validate_services \
"${ip_address}:6000/v1/embeddings" \
'"text":"What is Deep Learning?","embedding":[' \
"embedding" \
"embedding-tei-server" \
'{"text":"What is Deep Learning?"}'
sleep 1m # retrieval can't curl as expected, try to wait for more time
# retrieval microservice
@@ -147,14 +141,6 @@ function validate_microservices() {
"tei-reranking-gaudi-server" \
'{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}'
# rerank microservice
validate_services \
"${ip_address}:8000/v1/reranking" \
"Deep learning is..." \
"rerank" \
"reranking-tei-gaudi-server" \
'{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}'
# tgi for llm service
validate_services \
"${ip_address}:8008/generate" \
@@ -163,22 +149,6 @@ function validate_microservices() {
"tgi-gaudi-server" \
'{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}'
# llm microservice
validate_services \
"${ip_address}:9000/v1/chat/completions" \
"data: " \
"llm" \
"llm-tgi-gaudi-server" \
'{"query":"What is Deep Learning?"}'
# tgi for guardrails service
validate_services \
"${ip_address}:8088/generate" \
"generated_text" \
"tgi-guardrails" \
"tgi-guardrails-server" \
'{"inputs":"How do you buy a tiger in the US?","parameters":{"max_new_tokens":32}}'
# guardrails microservice
validate_services \
"${ip_address}:9090/v1/guardrails" \
@@ -186,14 +156,13 @@ function validate_microservices() {
"guardrails" \
"guardrails-tgi-gaudi-server" \
'{"text":"How do you buy a tiger in the US?"}'
}
function validate_megaservice() {
# Curl the Mega Service
validate_services \
"${ip_address}:8888/v1/chatqna" \
"billion" \
"data: " \
"mega-chatqna" \
"chatqna-gaudi-guardrails-server" \
'{"messages": "What is the revenue of Nike in 2023?"}'

View File

@@ -1,251 +0,0 @@
#!/bin/bash
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
set -e
IMAGE_REPO=${IMAGE_REPO:-"opea"}
IMAGE_TAG=${IMAGE_TAG:-"latest"}
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
export REGISTRY=${IMAGE_REPO}
export TAG=${IMAGE_TAG}
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"
ip_address=$(hostname -I | awk '{print $1}')
function build_docker_images() {
cd $WORKPATH/docker_image_build
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="chatqna-no-wrapper chatqna-ui dataprep-redis retriever-redis"
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
docker pull ghcr.io/huggingface/tei-gaudi:latest
docker images && sleep 1s
}
function start_services() {
cd $WORKPATH/docker_compose/intel/hpu/gaudi
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:8090"
export TEI_RERANKING_ENDPOINT="http://${ip_address}:8808"
export TGI_LLM_ENDPOINT="http://${ip_address}:8005"
export REDIS_URL="redis://${ip_address}:6379"
export REDIS_HOST=${ip_address}
export INDEX_NAME="rag-redis"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export MEGA_SERVICE_HOST_IP=${ip_address}
export EMBEDDING_SERVER_HOST_IP=${ip_address}
export RETRIEVER_SERVICE_HOST_IP=${ip_address}
export RERANK_SERVER_HOST_IP=${ip_address}
export LLM_SERVER_HOST_IP=${ip_address}
export EMBEDDING_SERVER_PORT=8090
export RERANK_SERVER_PORT=8808
export LLM_SERVER_PORT=8005
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/chatqna"
export DATAPREP_SERVICE_ENDPOINT="http://${ip_address}:6007/v1/dataprep"
export DATAPREP_GET_FILE_ENDPOINT="http://${ip_address}:6008/v1/dataprep/get_file"
export DATAPREP_DELETE_FILE_ENDPOINT="http://${ip_address}:6009/v1/dataprep/delete_file"
sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
# Start Docker Containers
docker compose -f compose_no_wrapper.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
n=0
until [[ "$n" -ge 500 ]]; do
docker logs tgi-gaudi-server > ${LOG_PATH}/tgi_service_start.log
if grep -q Connected ${LOG_PATH}/tgi_service_start.log; then
break
fi
sleep 1s
n=$((n+1))
done
}
function validate_service() {
local URL="$1"
local EXPECTED_RESULT="$2"
local SERVICE_NAME="$3"
local DOCKER_NAME="$4"
local INPUT_DATA="$5"
if [[ $SERVICE_NAME == *"dataprep_upload_file"* ]]; then
cd $LOG_PATH
HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F 'files=@./dataprep_file.txt' -H 'Content-Type: multipart/form-data' "$URL")
elif [[ $SERVICE_NAME == *"dataprep_upload_link"* ]]; then
HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F 'link_list=["https://www.ces.tech/"]' "$URL")
elif [[ $SERVICE_NAME == *"dataprep_get"* ]]; then
HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -H 'Content-Type: application/json' "$URL")
elif [[ $SERVICE_NAME == *"dataprep_del"* ]]; then
HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d '{"file_path": "all"}' -H 'Content-Type: application/json' "$URL")
else
HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
fi
HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://')
RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g')
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
# check response status
if [ "$HTTP_STATUS" -ne "200" ]; then
echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
exit 1
else
echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
fi
# check response body
if [[ "$RESPONSE_BODY" != *"$EXPECTED_RESULT"* ]]; then
echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY"
exit 1
else
echo "[ $SERVICE_NAME ] Content is as expected."
fi
sleep 1s
}
function validate_microservices() {
# Check if the microservices are running correctly.
# tei for embedding service
validate_service \
"${ip_address}:8090/embed" \
"[[" \
"tei-embedding" \
"tei-embedding-gaudi-server" \
'{"inputs":"What is Deep Learning?"}'
sleep 1m # retrieval can't curl as expected, try to wait for more time
# test /v1/dataprep upload file
echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt
validate_service \
"http://${ip_address}:6007/v1/dataprep" \
"Data preparation succeeded" \
"dataprep_upload_file" \
"dataprep-redis-server"
# test /v1/dataprep upload link
validate_service \
"http://${ip_address}:6007/v1/dataprep" \
"Data preparation succeeded" \
"dataprep_upload_link" \
"dataprep-redis-server"
# test /v1/dataprep/get_file
validate_service \
"http://${ip_address}:6007/v1/dataprep/get_file" \
'{"name":' \
"dataprep_get" \
"dataprep-redis-server"
# test /v1/dataprep/delete_file
validate_service \
"http://${ip_address}:6007/v1/dataprep/delete_file" \
'{"status":true}' \
"dataprep_del" \
"dataprep-redis-server"
# retrieval microservice
test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
validate_service \
"${ip_address}:7000/v1/retrieval" \
"retrieved_docs" \
"retrieval-microservice" \
"retriever-redis-server" \
"{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${test_embedding}}"
# tei for rerank microservice
validate_service \
"${ip_address}:8808/rerank" \
'{"index":1,"score":' \
"tei-rerank" \
"tei-reranking-gaudi-server" \
'{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}'
# tgi for llm service
validate_service \
"${ip_address}:8005/generate" \
"generated_text" \
"tgi-llm" \
"tgi-gaudi-server" \
'{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}'
}
function validate_megaservice() {
# Curl the Mega Service
validate_service \
"${ip_address}:8888/v1/chatqna" \
"data: " \
"chatqna-megaservice" \
"chatqna-gaudi-backend-server" \
'{"messages": "What is the revenue of Nike in 2023?"}'
}
function validate_frontend() {
cd $WORKPATH/ui/svelte
local conda_env_name="OPEA_e2e"
export PATH=${HOME}/miniforge3/bin/:$PATH
if conda info --envs | grep -q "$conda_env_name"; then
echo "$conda_env_name exist!"
else
conda create -n ${conda_env_name} python=3.12 -y
fi
source activate ${conda_env_name}
sed -i "s/localhost/$ip_address/g" playwright.config.ts
conda install -c conda-forge nodejs -y
npm install && npm ci && npx playwright install --with-deps
node -v && npm -v && pip list
exit_status=0
npx playwright test || exit_status=$?
if [ $exit_status -ne 0 ]; then
echo "[TEST INFO]: ---------frontend test failed---------"
exit $exit_status
else
echo "[TEST INFO]: ---------frontend test passed---------"
fi
}
function stop_docker() {
cd $WORKPATH/docker_compose/intel/hpu/gaudi
docker compose stop && docker compose rm -f
}
function main() {
stop_docker
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
start_time=$(date +%s)
start_services
end_time=$(date +%s)
duration=$((end_time-start_time))
echo "Mega service start duration is $duration s"
if [ "${mode}" == "perf" ]; then
python3 $WORKPATH/tests/chatqna_benchmark.py
elif [ "${mode}" == "" ]; then
validate_microservices
validate_megaservice
# validate_frontend
fi
stop_docker
echo y | docker system prune
}
main

View File

@@ -1,256 +0,0 @@
#!/bin/bash
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
set -e
IMAGE_REPO=${IMAGE_REPO:-"opea"}
IMAGE_TAG=${IMAGE_TAG:-"latest"}
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
export REGISTRY=${IMAGE_REPO}
export TAG=${IMAGE_TAG}
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"
ip_address=$(hostname -I | awk '{print $1}')
function build_docker_images() {
cd $WORKPATH/docker_image_build
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="chatqna-no-wrapper chatqna-ui chatqna-conversation-ui dataprep-redis retriever-redis"
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
docker images && sleep 1s
}
function start_services() {
cd $WORKPATH/docker_compose/intel/cpu/xeon
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:6006"
export TEI_RERANKING_ENDPOINT="http://${ip_address}:8808"
export TGI_LLM_ENDPOINT="http://${ip_address}:9009"
export REDIS_URL="redis://${ip_address}:6379"
export REDIS_HOST=${ip_address}
export INDEX_NAME="rag-redis"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export MEGA_SERVICE_HOST_IP=${ip_address}
export EMBEDDING_SERVER_HOST_IP=${ip_address}
export RETRIEVER_SERVICE_HOST_IP=${ip_address}
export RERANK_SERVER_HOST_IP=${ip_address}
export LLM_SERVER_HOST_IP=${ip_address}
export EMBEDDING_SERVER_PORT=6006
export RERANK_SERVER_PORT=8808
export LLM_SERVER_PORT=9009
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/chatqna"
export DATAPREP_SERVICE_ENDPOINT="http://${ip_address}:6007/v1/dataprep"
export DATAPREP_GET_FILE_ENDPOINT="http://${ip_address}:6007/v1/dataprep/get_file"
export DATAPREP_DELETE_FILE_ENDPOINT="http://${ip_address}:6007/v1/dataprep/delete_file"
sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
# Start Docker Containers
docker compose -f compose_no_wrapper.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
n=0
until [[ "$n" -ge 500 ]]; do
docker logs tgi-service > ${LOG_PATH}/tgi_service_start.log
if grep -q Connected ${LOG_PATH}/tgi_service_start.log; then
break
fi
sleep 1s
n=$((n+1))
done
}
function validate_service() {
local URL="$1"
local EXPECTED_RESULT="$2"
local SERVICE_NAME="$3"
local DOCKER_NAME="$4"
local INPUT_DATA="$5"
if [[ $SERVICE_NAME == *"dataprep_upload_file"* ]]; then
cd $LOG_PATH
HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F 'files=@./dataprep_file.txt' -H 'Content-Type: multipart/form-data' "$URL")
elif [[ $SERVICE_NAME == *"dataprep_upload_link"* ]]; then
HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -F 'link_list=["https://www.ces.tech/"]' "$URL")
elif [[ $SERVICE_NAME == *"dataprep_get"* ]]; then
HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -H 'Content-Type: application/json' "$URL")
elif [[ $SERVICE_NAME == *"dataprep_del"* ]]; then
HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d '{"file_path": "all"}' -H 'Content-Type: application/json' "$URL")
else
HTTP_RESPONSE=$(curl --silent --write-out "HTTPSTATUS:%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
fi
HTTP_STATUS=$(echo $HTTP_RESPONSE | tr -d '\n' | sed -e 's/.*HTTPSTATUS://')
RESPONSE_BODY=$(echo $HTTP_RESPONSE | sed -e 's/HTTPSTATUS\:.*//g')
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
# check response status
if [ "$HTTP_STATUS" -ne "200" ]; then
echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
exit 1
else
echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
fi
# check response body
if [[ "$RESPONSE_BODY" != *"$EXPECTED_RESULT"* ]]; then
echo "[ $SERVICE_NAME ] Content does not match the expected result: $RESPONSE_BODY"
exit 1
else
echo "[ $SERVICE_NAME ] Content is as expected."
fi
sleep 1s
}
function validate_microservices() {
# Check if the microservices are running correctly.
# tei for embedding service
validate_service \
"${ip_address}:6006/embed" \
"[[" \
"tei-embedding" \
"tei-embedding-server" \
'{"inputs":"What is Deep Learning?"}'
sleep 1m # retrieval can't curl as expected, try to wait for more time
# test /v1/dataprep upload file
echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt
validate_service \
"http://${ip_address}:6007/v1/dataprep" \
"Data preparation succeeded" \
"dataprep_upload_file" \
"dataprep-redis-server"
# test /v1/dataprep upload link
validate_service \
"http://${ip_address}:6007/v1/dataprep" \
"Data preparation succeeded" \
"dataprep_upload_link" \
"dataprep-redis-server"
# test /v1/dataprep/get_file
validate_service \
"http://${ip_address}:6007/v1/dataprep/get_file" \
'{"name":' \
"dataprep_get" \
"dataprep-redis-server"
# test /v1/dataprep/delete_file
validate_service \
"http://${ip_address}:6007/v1/dataprep/delete_file" \
'{"status":true}' \
"dataprep_del" \
"dataprep-redis-server"
# retrieval microservice
test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
validate_service \
"${ip_address}:7000/v1/retrieval" \
"retrieved_docs" \
"retrieval-microservice" \
"retriever-redis-server" \
"{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${test_embedding}}"
# tei for rerank microservice
validate_service \
"${ip_address}:8808/rerank" \
'{"index":1,"score":' \
"tei-rerank" \
"tei-reranking-server" \
'{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}'
# tgi for llm service
validate_service \
"${ip_address}:9009/generate" \
"generated_text" \
"tgi-llm" \
"tgi-service" \
'{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}'
}
function validate_megaservice() {
# Curl the Mega Service
validate_service \
"${ip_address}:8888/v1/chatqna" \
"data: " \
"chatqna-megaservice" \
"chatqna-xeon-backend-server" \
'{"messages": "What is the revenue of Nike in 2023?"}'
}
function validate_frontend() {
echo "[ TEST INFO ]: --------- frontend test started ---------"
cd $WORKPATH/ui/svelte
local conda_env_name="OPEA_e2e"
export PATH=${HOME}/miniforge3/bin/:$PATH
if conda info --envs | grep -q "$conda_env_name"; then
echo "$conda_env_name exist!"
else
conda create -n ${conda_env_name} python=3.12 -y
fi
source activate ${conda_env_name}
echo "[ TEST INFO ]: --------- conda env activated ---------"
sed -i "s/localhost/$ip_address/g" playwright.config.ts
conda install -c conda-forge nodejs -y
npm install && npm ci && npx playwright install --with-deps
node -v && npm -v && pip list
exit_status=0
npx playwright test || exit_status=$?
if [ $exit_status -ne 0 ]; then
echo "[TEST INFO]: ---------frontend test failed---------"
exit $exit_status
else
echo "[TEST INFO]: ---------frontend test passed---------"
fi
}
function stop_docker() {
cd $WORKPATH/docker_compose/intel/cpu/xeon
docker compose stop && docker compose rm -f
}
function main() {
stop_docker
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
start_time=$(date +%s)
start_services
end_time=$(date +%s)
duration=$((end_time-start_time))
echo "Mega service start duration is $duration s" && sleep 1s
if [ "${mode}" == "perf" ]; then
python3 $WORKPATH/tests/chatqna_benchmark.py
elif [ "${mode}" == "" ]; then
validate_microservices
echo "==== microservices validated ===="
validate_megaservice
echo "==== megaservice validated ===="
# validate_frontend
# echo "==== frontend validated ===="
fi
stop_docker
echo y | docker system prune
}
main

View File

@@ -19,7 +19,7 @@ function build_docker_images() {
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="chatqna chatqna-ui dataprep-redis embedding-tei retriever-redis reranking-tei llm-tgi nginx"
service_list="chatqna chatqna-ui dataprep-redis retriever-redis"
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
@@ -33,7 +33,7 @@ function start_services() {
cd $WORKPATH/docker_compose/intel/hpu/gaudi
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:8090"
export TEI_RERANKING_ENDPOINT="http://${ip_address}:8808"
export TGI_LLM_ENDPOINT="http://${ip_address}:8005"
@@ -42,27 +42,22 @@ function start_services() {
export INDEX_NAME="rag-redis"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export MEGA_SERVICE_HOST_IP=${ip_address}
export EMBEDDING_SERVICE_HOST_IP=${ip_address}
export EMBEDDING_SERVER_HOST_IP=${ip_address}
export RETRIEVER_SERVICE_HOST_IP=${ip_address}
export RERANK_SERVICE_HOST_IP=${ip_address}
export LLM_SERVICE_HOST_IP=${ip_address}
export RERANK_SERVER_HOST_IP=${ip_address}
export LLM_SERVER_HOST_IP=${ip_address}
export EMBEDDING_SERVER_PORT=8090
export RERANK_SERVER_PORT=8808
export LLM_SERVER_PORT=8005
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/chatqna"
export DATAPREP_SERVICE_ENDPOINT="http://${ip_address}:6007/v1/dataprep"
export DATAPREP_GET_FILE_ENDPOINT="http://${ip_address}:6008/v1/dataprep/get_file"
export DATAPREP_DELETE_FILE_ENDPOINT="http://${ip_address}:6009/v1/dataprep/delete_file"
export llm_service_devices=all
export tei_embedding_devices=all
export FRONTEND_SERVICE_IP=${host_ip}
export FRONTEND_SERVICE_PORT=5173
export BACKEND_SERVICE_NAME=chatqna
export BACKEND_SERVICE_IP=${host_ip}
export BACKEND_SERVICE_PORT=8888
export NGINX_PORT=80
sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
# Start Docker Containers
docker compose up -d > ${LOG_PATH}/start_services_with_compose.log
docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
n=0
until [[ "$n" -ge 500 ]]; do
@@ -128,14 +123,6 @@ function validate_microservices() {
"tei-embedding-gaudi-server" \
'{"inputs":"What is Deep Learning?"}'
# embedding microservice
validate_service \
"${ip_address}:6000/v1/embeddings" \
'"text":"What is Deep Learning?","embedding":[' \
"embedding-microservice" \
"embedding-tei-server" \
'{"text":"What is Deep Learning?"}'
sleep 1m # retrieval can't curl as expected, try to wait for more time
# test /v1/dataprep upload file
@@ -184,14 +171,6 @@ function validate_microservices() {
"tei-reranking-gaudi-server" \
'{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}'
# rerank microservice
validate_service \
"${ip_address}:8000/v1/reranking" \
"Deep learning is..." \
"rerank-microservice" \
"reranking-tei-gaudi-server" \
'{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}'
# tgi for llm service
validate_service \
"${ip_address}:8005/generate" \
@@ -200,14 +179,6 @@ function validate_microservices() {
"tgi-gaudi-server" \
'{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}'
# llm microservice
validate_service \
"${ip_address}:9000/v1/chat/completions" \
"data: " \
"llm-microservice" \
"llm-tgi-gaudi-server" \
'{"query":"What is Deep Learning?"}'
}
function validate_megaservice() {
@@ -264,9 +235,13 @@ function main() {
duration=$((end_time-start_time))
echo "Mega service start duration is $duration s"
if [ "${mode}" == "perf" ]; then
python3 $WORKPATH/tests/chatqna_benchmark.py
elif [ "${mode}" == "" ]; then
validate_microservices
validate_megaservice
validate_frontend
# validate_frontend
fi
stop_docker
echo y | docker system prune

View File

@@ -19,7 +19,7 @@ function build_docker_images() {
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="chatqna chatqna-ui chatqna-conversation-ui dataprep-redis embedding-tei retriever-redis reranking-tei llm-tgi nginx"
service_list="chatqna chatqna-ui chatqna-conversation-ui dataprep-redis retriever-redis"
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
@@ -29,38 +29,34 @@ function build_docker_images() {
}
function start_services() {
cd $WORKPATH/docker_compose/intel/cpu/xeon/
cd $WORKPATH/docker_compose/intel/cpu/xeon
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:6006"
export TEI_RERANKING_ENDPOINT="http://${ip_address}:8808"
export TGI_LLM_ENDPOINT="http://${ip_address}:9009"
export REDIS_URL="redis://${ip_address}:6379"
export REDIS_HOST=${ip_address}
export INDEX_NAME="rag-redis"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export MEGA_SERVICE_HOST_IP=${ip_address}
export EMBEDDING_SERVICE_HOST_IP=${ip_address}
export EMBEDDING_SERVER_HOST_IP=${ip_address}
export RETRIEVER_SERVICE_HOST_IP=${ip_address}
export RERANK_SERVICE_HOST_IP=${ip_address}
export LLM_SERVICE_HOST_IP=${ip_address}
export RERANK_SERVER_HOST_IP=${ip_address}
export LLM_SERVER_HOST_IP=${ip_address}
export EMBEDDING_SERVER_PORT=6006
export RERANK_SERVER_PORT=8808
export LLM_SERVER_PORT=9009
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/chatqna"
export DATAPREP_SERVICE_ENDPOINT="http://${ip_address}:6007/v1/dataprep"
export DATAPREP_GET_FILE_ENDPOINT="http://${ip_address}:6007/v1/dataprep/get_file"
export DATAPREP_DELETE_FILE_ENDPOINT="http://${ip_address}:6007/v1/dataprep/delete_file"
export FRONTEND_SERVICE_IP=${host_ip}
export FRONTEND_SERVICE_PORT=5173
export BACKEND_SERVICE_NAME=chatqna
export BACKEND_SERVICE_IP=${host_ip}
export BACKEND_SERVICE_PORT=8888
export NGINX_PORT=80
sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env
# Start Docker Containers
docker compose up -d > ${LOG_PATH}/start_services_with_compose.log
docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
n=0
until [[ "$n" -ge 500 ]]; do
docker logs tgi-service > ${LOG_PATH}/tgi_service_start.log
@@ -125,14 +121,6 @@ function validate_microservices() {
"tei-embedding-server" \
'{"inputs":"What is Deep Learning?"}'
# embedding microservice
validate_service \
"${ip_address}:6000/v1/embeddings" \
'"text":"What is Deep Learning?","embedding":[' \
"embedding-microservice" \
"embedding-tei-server" \
'{"text":"What is Deep Learning?"}'
sleep 1m # retrieval can't curl as expected, try to wait for more time
# test /v1/dataprep upload file
@@ -181,14 +169,6 @@ function validate_microservices() {
"tei-reranking-server" \
'{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}'
# rerank microservice
validate_service \
"${ip_address}:8000/v1/reranking" \
"Deep learning is..." \
"rerank-microservice" \
"reranking-tei-xeon-server" \
'{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}'
# tgi for llm service
validate_service \
"${ip_address}:9009/generate" \
@@ -197,14 +177,6 @@ function validate_microservices() {
"tgi-service" \
'{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}'
# llm microservice
validate_service \
"${ip_address}:9000/v1/chat/completions" \
"data: " \
"llm-microservice" \
"llm-tgi-server" \
'{"query":"What is Deep Learning?"}'
}
function validate_megaservice() {
@@ -249,7 +221,7 @@ function validate_frontend() {
}
function stop_docker() {
cd $WORKPATH/docker_compose/intel/cpu/xeon/
cd $WORKPATH/docker_compose/intel/cpu/xeon
docker compose stop && docker compose rm -f
}
@@ -270,8 +242,8 @@ function main() {
echo "==== microservices validated ===="
validate_megaservice
echo "==== megaservice validated ===="
validate_frontend
echo "==== frontend validated ===="
# validate_frontend
# echo "==== frontend validated ===="
fi
stop_docker

View File

@@ -19,7 +19,7 @@ function build_docker_images() {
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="chatqna chatqna-ui dataprep-qdrant embedding-tei retriever-qdrant reranking-tei llm-tgi"
service_list="chatqna chatqna-ui dataprep-qdrant retriever-qdrant"
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
docker images && sleep 1s
@@ -32,21 +32,20 @@ function start_services() {
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:6040"
export TEI_RERANKING_ENDPOINT="http://${ip_address}:6041"
export TGI_LLM_ENDPOINT="http://${ip_address}:6042"
export QDRANT_HOST=${ip_address}
export QDRANT_PORT=6333
export INDEX_NAME="rag-qdrant"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export MEGA_SERVICE_HOST_IP=${ip_address}
export EMBEDDING_SERVICE_HOST_IP=${ip_address}
export EMBEDDING_SERVER_HOST_IP=${ip_address}
export RETRIEVER_SERVICE_HOST_IP=${ip_address}
export RERANK_SERVICE_HOST_IP=${ip_address}
export LLM_SERVICE_HOST_IP=${ip_address}
export EMBEDDING_SERVICE_PORT=6044
export RERANK_SERVER_HOST_IP=${ip_address}
export LLM_SERVER_HOST_IP=${ip_address}
export MEGA_SERVICE_PORT=8912
export EMBEDDING_SERVER_PORT=6040
export RETRIEVER_SERVICE_PORT=6045
export RERANK_SERVICE_PORT=6046
export LLM_SERVICE_PORT=6047
export RERANK_SERVER_PORT=6041
export LLM_SERVER_PORT=6042
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8912/v1/chatqna"
export DATAPREP_SERVICE_ENDPOINT="http://${ip_address}:6043/v1/dataprep"
@@ -114,14 +113,6 @@ function validate_microservices() {
"tei-embedding-server" \
'{"inputs":"What is Deep Learning?"}'
# embedding microservice
validate_services \
"${ip_address}:6044/v1/embeddings" \
'"text":"What is Deep Learning?","embedding":[' \
"embedding" \
"embedding-tei-server" \
'{"text":"What is Deep Learning?"}'
# test /v1/dataprep upload file
echo "Deep learning is a subset of machine learning that utilizes neural networks with multiple layers to analyze various levels of abstract data representations. It enables computers to identify patterns and make decisions with minimal human intervention by learning from large amounts of data." > $LOG_PATH/dataprep_file.txt
validate_services \
@@ -154,14 +145,6 @@ function validate_microservices() {
"tei-reranking-server" \
'{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}'
# rerank microservice
validate_services \
"${ip_address}:6046/v1/reranking" \
"Deep learning is..." \
"rerank" \
"reranking-tei-xeon-server" \
'{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}'
# tgi for llm service
validate_services \
"${ip_address}:6042/generate" \
@@ -169,15 +152,6 @@ function validate_microservices() {
"tgi-llm" \
"tgi-service" \
'{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}'
# llm microservice
validate_services \
"${ip_address}:6047/v1/chat/completions" \
"data: " \
"llm" \
"llm-tgi-server" \
'{"query":"Deep Learning"}'
}
function validate_megaservice() {

View File

@@ -19,7 +19,7 @@ function build_docker_images() {
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="chatqna chatqna-ui dataprep-redis embedding-tei retriever-redis reranking-tei llm-vllm-hpu llm-vllm"
service_list="chatqna chatqna-ui dataprep-redis retriever-redis llm-vllm-hpu"
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
@@ -33,17 +33,17 @@ function start_services() {
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:8090"
export TEI_RERANKING_ENDPOINT="http://${ip_address}:8808"
export vLLM_LLM_ENDPOINT="http://${ip_address}:8007"
export LLM_SERVICE_PORT=9000
export REDIS_URL="redis://${ip_address}:6379"
export INDEX_NAME="rag-redis"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export MEGA_SERVICE_HOST_IP=${ip_address}
export EMBEDDING_SERVICE_HOST_IP=${ip_address}
export EMBEDDING_SERVER_HOST_IP=${ip_address}
export RETRIEVER_SERVICE_HOST_IP=${ip_address}
export RERANK_SERVICE_HOST_IP=${ip_address}
export LLM_SERVICE_HOST_IP=${ip_address}
export RERANK_SERVER_HOST_IP=${ip_address}
export LLM_SERVER_HOST_IP=${ip_address}
export EMBEDDING_SERVER_PORT=8090
export RERANK_SERVER_PORT=8808
export LLM_SERVER_PORT=8007
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/chatqna"
export DATAPREP_SERVICE_ENDPOINT="http://${ip_address}:6007/v1/dataprep"
@@ -102,14 +102,6 @@ function validate_microservices() {
"tei-embedding-gaudi-server" \
'{"inputs":"What is Deep Learning?"}'
# embedding microservice
validate_services \
"${ip_address}:6000/v1/embeddings" \
'"text":"What is Deep Learning?","embedding":\[' \
"embedding" \
"embedding-tei-server" \
'{"text":"What is Deep Learning?"}'
sleep 1m # retrieval can't curl as expected, try to wait for more time
# retrieval microservice
@@ -129,14 +121,6 @@ function validate_microservices() {
"tei-reranking-gaudi-server" \
'{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}'
# rerank microservice
validate_services \
"${ip_address}:8000/v1/reranking" \
"Deep learning is..." \
"rerank" \
"reranking-tei-gaudi-server" \
'{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}'
# vllm for llm service
validate_services \
"${ip_address}:8007/v1/completions" \
@@ -144,15 +128,6 @@ function validate_microservices() {
"vllm-llm" \
"vllm-gaudi-server" \
'{"model": "Intel/neural-chat-7b-v3-3","prompt": "What is Deep Learning?","max_tokens": 32,"temperature": 0}'
# llm microservice
validate_services \
"${ip_address}:9000/v1/chat/completions" \
"data: " \
"llm" \
"llm-vllm-gaudi-server" \
'{"query":"What is Deep Learning?"}'
}
function validate_megaservice() {

View File

@@ -20,7 +20,7 @@ function build_docker_images() {
git clone https://github.com/vllm-project/vllm.git
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="chatqna chatqna-ui dataprep-redis embedding-tei retriever-redis reranking-tei llm-vllm vllm"
service_list="chatqna chatqna-ui dataprep-redis retriever-redis vllm"
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
@@ -36,17 +36,17 @@ function start_services() {
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:6006"
export TEI_RERANKING_ENDPOINT="http://${ip_address}:8808"
export vLLM_LLM_ENDPOINT="http://${ip_address}:9009"
export LLM_SERVICE_PORT=9000
export REDIS_URL="redis://${ip_address}:6379"
export INDEX_NAME="rag-redis"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export MEGA_SERVICE_HOST_IP=${ip_address}
export EMBEDDING_SERVICE_HOST_IP=${ip_address}
export EMBEDDING_SERVER_HOST_IP=${ip_address}
export RETRIEVER_SERVICE_HOST_IP=${ip_address}
export RERANK_SERVICE_HOST_IP=${ip_address}
export LLM_SERVICE_HOST_IP=${ip_address}
export RERANK_SERVER_HOST_IP=${ip_address}
export LLM_SERVER_HOST_IP=${ip_address}
export EMBEDDING_SERVER_PORT=6006
export RERANK_SERVER_PORT=8808
export LLM_SERVER_PORT=9009
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/chatqna"
export DATAPREP_SERVICE_ENDPOINT="http://${ip_address}:6007/v1/dataprep"
@@ -104,14 +104,6 @@ function validate_microservices() {
"tei-embedding-server" \
'{"inputs":"What is Deep Learning?"}'
# embedding microservice
validate_services \
"${ip_address}:6000/v1/embeddings" \
'"text":"What is Deep Learning?","embedding":\[' \
"embedding" \
"embedding-tei-server" \
'{"text":"What is Deep Learning?"}'
sleep 1m # retrieval can't curl as expected, try to wait for more time
# retrieval microservice
@@ -131,14 +123,6 @@ function validate_microservices() {
"tei-reranking-server" \
'{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}'
# rerank microservice
validate_services \
"${ip_address}:8000/v1/reranking" \
"Deep learning is..." \
"rerank" \
"reranking-tei-xeon-server" \
'{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}'
# vllm for llm service
validate_services \
"${ip_address}:9009/v1/completions" \
@@ -146,15 +130,6 @@ function validate_microservices() {
"vllm-llm" \
"vllm-service" \
'{"model": "Intel/neural-chat-7b-v3-3", "prompt": "What is Deep Learning?", "max_tokens": 32, "temperature": 0}'
# llm microservice
validate_services \
"${ip_address}:9000/v1/chat/completions" \
"data: " \
"llm" \
"llm-vllm-server" \
'{"query":"What is Deep Learning?"}'
}
function validate_megaservice() {
@@ -217,7 +192,7 @@ function main() {
elif [ "${mode}" == "" ]; then
validate_microservices
validate_megaservice
validate_frontend
#validate_frontend
fi
stop_docker

View File

@@ -19,7 +19,7 @@ function build_docker_images() {
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="chatqna chatqna-ui dataprep-redis embedding-tei retriever-redis reranking-tei llm-vllm-ray-hpu llm-vllm-ray"
service_list="chatqna chatqna-ui dataprep-redis retriever-redis llm-vllm-ray-hpu"
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
@@ -34,17 +34,17 @@ function start_services() {
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:8090"
export TEI_RERANKING_ENDPOINT="http://${ip_address}:8808"
export vLLM_RAY_LLM_ENDPOINT="http://${ip_address}:8006"
export LLM_SERVICE_PORT=9000
export REDIS_URL="redis://${ip_address}:6379"
export INDEX_NAME="rag-redis"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export MEGA_SERVICE_HOST_IP=${ip_address}
export EMBEDDING_SERVICE_HOST_IP=${ip_address}
export EMBEDDING_SERVER_HOST_IP=${ip_address}
export RETRIEVER_SERVICE_HOST_IP=${ip_address}
export RERANK_SERVICE_HOST_IP=${ip_address}
export LLM_SERVICE_HOST_IP=${ip_address}
export RERANK_SERVER_HOST_IP=${ip_address}
export LLM_SERVER_HOST_IP=${ip_address}
export EMBEDDING_SERVER_PORT=8090
export RERANK_SERVER_PORT=8808
export LLM_SERVER_PORT=8006
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/chatqna"
export DATAPREP_SERVICE_ENDPOINT="http://${ip_address}:6007/v1/dataprep"
@@ -103,14 +103,6 @@ function validate_microservices() {
"tei-embedding-gaudi-server" \
'{"inputs":"What is Deep Learning?"}'
# embedding microservice
validate_services \
"${ip_address}:6000/v1/embeddings" \
'"text":"What is Deep Learning?","embedding":\[' \
"embedding" \
"embedding-tei-server" \
'{"text":"What is Deep Learning?"}'
sleep 1m # retrieval can't curl as expected, try to wait for more time
# retrieval microservice
@@ -130,14 +122,6 @@ function validate_microservices() {
"tei-reranking-gaudi-server" \
'{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}'
# rerank microservice
validate_services \
"${ip_address}:8000/v1/reranking" \
"Deep learning is..." \
"rerank" \
"reranking-tei-gaudi-server" \
'{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}'
# vllm-on-ray for llm service
validate_services \
"${ip_address}:8006/v1/chat/completions" \
@@ -145,15 +129,6 @@ function validate_microservices() {
"vllm-ray-llm" \
"vllm-ray-gaudi-server" \
'{"model": "Intel/neural-chat-7b-v3-3", "messages": [{"role": "user", "content": "What is Deep Learning?"}]}'
# llm microservice
validate_services \
"${ip_address}:9000/v1/chat/completions" \
"data: " \
"llm" \
"llm-vllm-ray-gaudi-server" \
'{"query":"What is Deep Learning?"}'
}
function validate_megaservice() {

View File

@@ -19,7 +19,7 @@ function build_docker_images() {
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="chatqna-without-rerank chatqna-ui dataprep-redis embedding-tei retriever-redis llm-tgi"
service_list="chatqna-without-rerank chatqna-ui dataprep-redis retriever-redis"
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
@@ -34,15 +34,16 @@ function start_services() {
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:8090"
export TGI_LLM_ENDPOINT="http://${ip_address}:8005"
export REDIS_URL="redis://${ip_address}:6379"
export REDIS_HOST=${ip_address}
export INDEX_NAME="rag-redis"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export MEGA_SERVICE_HOST_IP=${ip_address}
export EMBEDDING_SERVICE_HOST_IP=${ip_address}
export EMBEDDING_SERVER_HOST_IP=${ip_address}
export RETRIEVER_SERVICE_HOST_IP=${ip_address}
export LLM_SERVICE_HOST_IP=${ip_address}
export LLM_SERVER_HOST_IP=${ip_address}
export EMBEDDING_SERVER_PORT=8090
export LLM_SERVER_PORT=8005
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/chatqna"
export DATAPREP_SERVICE_ENDPOINT="http://${ip_address}:6007/v1/dataprep"
export DATAPREP_GET_FILE_ENDPOINT="http://${ip_address}:6008/v1/dataprep/get_file"
@@ -117,14 +118,6 @@ function validate_microservices() {
"tei-embedding-gaudi-server" \
'{"inputs":"What is Deep Learning?"}'
# embedding microservice
validate_service \
"${ip_address}:6000/v1/embeddings" \
'"text":"What is Deep Learning?","embedding":[' \
"embedding-microservice" \
"embedding-tei-server" \
'{"text":"What is Deep Learning?"}'
sleep 1m # retrieval can't curl as expected, try to wait for more time
# test /v1/dataprep upload file
@@ -172,15 +165,6 @@ function validate_microservices() {
"tgi-llm" \
"tgi-gaudi-server" \
'{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}'
# llm microservice
validate_service \
"${ip_address}:9000/v1/chat/completions" \
"data: " \
"llm-microservice" \
"llm-tgi-gaudi-server" \
'{"query":"What is Deep Learning?"}'
}
function validate_megaservice() {

View File

@@ -19,7 +19,7 @@ function build_docker_images() {
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="chatqna-without-rerank chatqna-ui chatqna-conversation-ui dataprep-redis embedding-tei retriever-redis llm-tgi"
service_list="chatqna-without-rerank chatqna-ui chatqna-conversation-ui dataprep-redis retriever-redis"
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
@@ -34,15 +34,16 @@ function start_services() {
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export TEI_EMBEDDING_ENDPOINT="http://${ip_address}:6006"
export TGI_LLM_ENDPOINT="http://${ip_address}:9009"
export REDIS_URL="redis://${ip_address}:6379"
export REDIS_HOST=${ip_address}
export INDEX_NAME="rag-redis"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export MEGA_SERVICE_HOST_IP=${ip_address}
export EMBEDDING_SERVICE_HOST_IP=${ip_address}
export EMBEDDING_SERVER_HOST_IP=${ip_address}
export RETRIEVER_SERVICE_HOST_IP=${ip_address}
export LLM_SERVICE_HOST_IP=${ip_address}
export LLM_SERVER_HOST_IP=${ip_address}
export EMBEDDING_SERVER_PORT=6006
export LLM_SERVER_PORT=9009
export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/chatqna"
export DATAPREP_SERVICE_ENDPOINT="http://${ip_address}:6007/v1/dataprep"
export DATAPREP_GET_FILE_ENDPOINT="http://${ip_address}:6007/v1/dataprep/get_file"
@@ -116,14 +117,6 @@ function validate_microservices() {
"tei-embedding-server" \
'{"inputs":"What is Deep Learning?"}'
# embedding microservice
validate_service \
"${ip_address}:6000/v1/embeddings" \
'"text":"What is Deep Learning?","embedding":[' \
"embedding-microservice" \
"embedding-tei-server" \
'{"text":"What is Deep Learning?"}'
sleep 1m # retrieval can't curl as expected, try to wait for more time
# test /v1/dataprep upload file
@@ -171,15 +164,6 @@ function validate_microservices() {
"tgi-llm" \
"tgi-service" \
'{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}'
# llm microservice
validate_service \
"${ip_address}:9000/v1/chat/completions" \
"data: " \
"llm-microservice" \
"llm-tgi-server" \
'{"query":"What is Deep Learning?"}'
}
function validate_megaservice() {