Compare commits
63 Commits
helmcharts
...
reorg_helm
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
91940b8058 | ||
|
|
3744bb8c1b | ||
|
|
82801d0121 | ||
|
|
f7026773b8 | ||
|
|
edc09ece5c | ||
|
|
dfed2aead2 | ||
|
|
049517f977 | ||
|
|
ee83a6d5b4 | ||
|
|
e2bdd19fd4 | ||
|
|
c9088eb824 | ||
|
|
9c3023a12e | ||
|
|
7d779513f5 | ||
|
|
bbc95bb708 | ||
|
|
dd9623d3d5 | ||
|
|
4c27a3d30c | ||
|
|
40386d9bd6 | ||
|
|
fe97e88c7a | ||
|
|
11d8b24c8a | ||
|
|
4635a927fa | ||
|
|
1da44d99a1 | ||
|
|
e9b164505e | ||
|
|
6263b517b9 | ||
|
|
2de7c0ba89 | ||
|
|
944ae47948 | ||
|
|
2d9aeb3715 | ||
|
|
a0921f127f | ||
|
|
cf86aceb18 | ||
|
|
c2b7bd25d9 | ||
|
|
78331ee678 | ||
|
|
7f7ad0e256 | ||
|
|
0306c620b5 | ||
|
|
3372b9d480 | ||
|
|
5eb3d2869f | ||
|
|
ced68e1834 | ||
|
|
bf5c391e47 | ||
|
|
c65d7d40fb | ||
|
|
9d124161e0 | ||
|
|
0f5a9c4a5e | ||
|
|
a65640b4a5 | ||
|
|
7197286a14 | ||
|
|
960805a57b | ||
|
|
002f0e2b11 | ||
|
|
fde5996192 | ||
|
|
bc47930ce1 | ||
|
|
2332d22950 | ||
|
|
a2afce1675 | ||
|
|
89f4c5fb41 | ||
|
|
98f66405ac | ||
|
|
90c2d49050 | ||
|
|
95b58b51fa | ||
|
|
d3ce6f5357 | ||
|
|
a10b4a1f1d | ||
|
|
085d859a70 | ||
|
|
15cc457cea | ||
|
|
cfffb4c005 | ||
|
|
41955f65ad | ||
|
|
def39cfcdc | ||
|
|
35a4fef70d | ||
|
|
a3f9811f7e | ||
|
|
0eedbbfce0 | ||
|
|
9438d392b4 | ||
|
|
1929dfd3a0 | ||
|
|
c7e33647ad |
2
.github/code_spell_ignore.txt
vendored
@@ -0,0 +1,2 @@
|
||||
ModelIn
|
||||
modelin
|
||||
10
.github/workflows/_example-workflow.yml
vendored
@@ -40,6 +40,11 @@ on:
|
||||
default: "main"
|
||||
required: false
|
||||
type: string
|
||||
inject_commit:
|
||||
default: false
|
||||
required: false
|
||||
type: string
|
||||
|
||||
jobs:
|
||||
####################################################################################################
|
||||
# Image Build
|
||||
@@ -72,6 +77,10 @@ jobs:
|
||||
git clone https://github.com/vllm-project/vllm.git
|
||||
cd vllm && git rev-parse HEAD && cd ../
|
||||
fi
|
||||
if [[ $(grep -c "vllm-hpu:" ${docker_compose_path}) != 0 ]]; then
|
||||
git clone https://github.com/HabanaAI/vllm-fork.git
|
||||
cd vllm-fork && git rev-parse HEAD && cd ../
|
||||
fi
|
||||
git clone https://github.com/opea-project/GenAIComps.git
|
||||
cd GenAIComps && git checkout ${{ inputs.opea_branch }} && git rev-parse HEAD && cd ../
|
||||
|
||||
@@ -83,6 +92,7 @@ jobs:
|
||||
docker_compose_path: ${{ github.workspace }}/${{ inputs.example }}/docker_image_build/build.yaml
|
||||
service_list: ${{ inputs.services }}
|
||||
registry: ${OPEA_IMAGE_REPO}opea
|
||||
inject_commit: ${{ inputs.inject_commit }}
|
||||
tag: ${{ inputs.tag }}
|
||||
|
||||
####################################################################################################
|
||||
|
||||
8
.github/workflows/_manifest-e2e.yml
vendored
@@ -90,10 +90,16 @@ jobs:
|
||||
echo "Validate ${{ inputs.example }} successful!"
|
||||
else
|
||||
echo "Validate ${{ inputs.example }} failure!!!"
|
||||
.github/workflows/scripts/k8s-utils.sh dump_all_pod_logs $NAMESPACE
|
||||
echo "Check the logs in 'Dump logs when e2e test failed' step!!!"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
- name: Dump logs when e2e test failed
|
||||
if: failure()
|
||||
run: |
|
||||
.github/workflows/scripts/k8s-utils.sh dump_all_pod_logs $NAMESPACE
|
||||
|
||||
- name: Kubectl uninstall
|
||||
if: always()
|
||||
run: |
|
||||
|
||||
8
.github/workflows/_run-docker-compose.yml
vendored
@@ -119,6 +119,8 @@ jobs:
|
||||
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
|
||||
PINECONE_KEY: ${{ secrets.PINECONE_KEY }}
|
||||
PINECONE_KEY_LANGCHAIN_TEST: ${{ secrets.PINECONE_KEY_LANGCHAIN_TEST }}
|
||||
SDK_BASE_URL: ${{ secrets.SDK_BASE_URL }}
|
||||
SERVING_TOKEN: ${{ secrets.SERVING_TOKEN }}
|
||||
IMAGE_REPO: ${{ inputs.registry }}
|
||||
IMAGE_TAG: ${{ inputs.tag }}
|
||||
example: ${{ inputs.example }}
|
||||
@@ -139,7 +141,11 @@ jobs:
|
||||
flag=${flag#test_}
|
||||
yaml_file=$(find . -type f -wholename "*${{ inputs.hardware }}/${flag}.yaml")
|
||||
echo $yaml_file
|
||||
docker compose -f $yaml_file stop && docker compose -f $yaml_file rm -f || true
|
||||
container_list=$(cat $yaml_file | grep container_name | cut -d':' -f2)
|
||||
for container_name in $container_list; do
|
||||
cid=$(docker ps -aq --filter "name=$container_name")
|
||||
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
|
||||
done
|
||||
docker system prune -f
|
||||
docker rmi $(docker images --filter reference="*:5000/*/*" -q) || true
|
||||
|
||||
|
||||
35
.github/workflows/check-online-doc-build.yml
vendored
Normal file
@@ -0,0 +1,35 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
name: Check Online Document Building
|
||||
permissions: {}
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [main]
|
||||
paths:
|
||||
- "**.md"
|
||||
- "**.rst"
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
path: GenAIExamples
|
||||
|
||||
- name: Checkout docs
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
repository: opea-project/docs
|
||||
path: docs
|
||||
|
||||
- name: Build Online Document
|
||||
shell: bash
|
||||
run: |
|
||||
echo "build online doc"
|
||||
cd docs
|
||||
bash scripts/build.sh
|
||||
@@ -50,6 +50,11 @@ on:
|
||||
description: 'OPEA branch for image build'
|
||||
required: false
|
||||
type: string
|
||||
inject_commit:
|
||||
default: true
|
||||
description: "inject commit to docker images true or false"
|
||||
required: false
|
||||
type: string
|
||||
|
||||
permissions: read-all
|
||||
jobs:
|
||||
@@ -101,4 +106,5 @@ jobs:
|
||||
test_k8s: ${{ fromJSON(inputs.test_k8s) }}
|
||||
test_gmc: ${{ fromJSON(inputs.test_gmc) }}
|
||||
opea_branch: ${{ inputs.opea_branch }}
|
||||
inject_commit: ${{ inputs.inject_commit }}
|
||||
secrets: inherit
|
||||
|
||||
7
.github/workflows/manual-image-build.yml
vendored
@@ -30,6 +30,12 @@ on:
|
||||
description: 'OPEA branch for image build'
|
||||
required: false
|
||||
type: string
|
||||
inject_commit:
|
||||
default: true
|
||||
description: "inject commit to docker images true or false"
|
||||
required: false
|
||||
type: string
|
||||
|
||||
jobs:
|
||||
get-test-matrix:
|
||||
runs-on: ubuntu-latest
|
||||
@@ -56,4 +62,5 @@ jobs:
|
||||
services: ${{ inputs.services }}
|
||||
tag: ${{ inputs.tag }}
|
||||
opea_branch: ${{ inputs.opea_branch }}
|
||||
inject_commit: ${{ inputs.inject_commit }}
|
||||
secrets: inherit
|
||||
|
||||
70
.github/workflows/nightly-docker-build-publish.yml
vendored
Normal file
@@ -0,0 +1,70 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
name: Nightly build/publish latest docker images
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: "30 13 * * *" # UTC time
|
||||
workflow_dispatch:
|
||||
|
||||
env:
|
||||
EXAMPLES: "AgentQnA,AudioQnA,ChatQnA,CodeGen,CodeTrans,DocIndexRetriever,DocSum,FaqGen,InstructionTuning,MultimodalQnA,ProductivitySuite,RerankFinetuning,SearchQnA,Translation,VideoQnA,VisualQnA"
|
||||
TAG: "latest"
|
||||
PUBLISH_TAGS: "latest"
|
||||
|
||||
jobs:
|
||||
get-build-matrix:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
examples_json: ${{ steps.get-matrix.outputs.examples_json }}
|
||||
EXAMPLES: ${{ steps.get-matrix.outputs.EXAMPLES }}
|
||||
TAG: ${{ steps.get-matrix.outputs.TAG }}
|
||||
PUBLISH_TAGS: ${{ steps.get-matrix.outputs.PUBLISH_TAGS }}
|
||||
steps:
|
||||
- name: Create Matrix
|
||||
id: get-matrix
|
||||
run: |
|
||||
examples=($(echo ${EXAMPLES} | tr ',' ' '))
|
||||
examples_json=$(printf '%s\n' "${examples[@]}" | sort -u | jq -R '.' | jq -sc '.')
|
||||
echo "examples_json=$examples_json" >> $GITHUB_OUTPUT
|
||||
echo "EXAMPLES=$EXAMPLES" >> $GITHUB_OUTPUT
|
||||
echo "TAG=$TAG" >> $GITHUB_OUTPUT
|
||||
echo "PUBLISH_TAGS=$PUBLISH_TAGS" >> $GITHUB_OUTPUT
|
||||
|
||||
build:
|
||||
needs: get-build-matrix
|
||||
strategy:
|
||||
matrix:
|
||||
example: ${{ fromJSON(needs.get-build-matrix.outputs.examples_json) }}
|
||||
fail-fast: false
|
||||
uses: ./.github/workflows/_example-workflow.yml
|
||||
with:
|
||||
node: gaudi
|
||||
example: ${{ matrix.example }}
|
||||
secrets: inherit
|
||||
|
||||
get-image-list:
|
||||
needs: get-build-matrix
|
||||
uses: ./.github/workflows/_get-image-list.yml
|
||||
with:
|
||||
examples: ${{ needs.get-build-matrix.outputs.EXAMPLES }}
|
||||
|
||||
publish:
|
||||
needs: [get-build-matrix, get-image-list, build]
|
||||
strategy:
|
||||
matrix:
|
||||
image: ${{ fromJSON(needs.get-image-list.outputs.matrix) }}
|
||||
runs-on: "docker-build-gaudi"
|
||||
steps:
|
||||
- uses: docker/login-action@v3.2.0
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Image Publish
|
||||
uses: opea-project/validation/actions/image-publish@main
|
||||
with:
|
||||
local_image_ref: ${OPEA_IMAGE_REPO}opea/${{ matrix.image }}:${{ needs.get-build-matrix.outputs.TAG }}
|
||||
image_name: opea/${{ matrix.image }}
|
||||
publish_tags: ${{ needs.get-build-matrix.outputs.PUBLISH_TAGS }}
|
||||
2
.github/workflows/pr-gmc-e2e.yaml
vendored
@@ -12,7 +12,7 @@ on:
|
||||
- "**/tests/test_gmc**"
|
||||
- "!**.md"
|
||||
- "!**.txt"
|
||||
- "!**/kubernetes/**/manifests/**"
|
||||
- "!**/kubernetes/**/manifest/**"
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
|
||||
2
.github/workflows/pr-manifest-e2e.yml
vendored
@@ -10,7 +10,7 @@ on:
|
||||
paths:
|
||||
- "**/Dockerfile**"
|
||||
- "**.py"
|
||||
- "**/kubernetes/**/manifests/**"
|
||||
- "**/kubernetes/**/manifest/**"
|
||||
- "**/tests/test_manifest**"
|
||||
- "!**.md"
|
||||
- "!**.txt"
|
||||
|
||||
6
.github/workflows/pr-path-detection.yml
vendored
@@ -61,14 +61,14 @@ jobs:
|
||||
changed_files="$(git diff --name-status --diff-filter=ARM ${{ github.event.pull_request.base.sha }} ${merged_commit} | awk '/\.md$/ {print $NF}')"
|
||||
if [ -n "$changed_files" ]; then
|
||||
for changed_file in $changed_files; do
|
||||
echo $changed_file
|
||||
# echo $changed_file
|
||||
url_lines=$(grep -H -Eo '\]\(http[s]?://[^)]+\)' "$changed_file" | grep -Ev 'GenAIExamples/blob/main') || true
|
||||
if [ -n "$url_lines" ]; then
|
||||
for url_line in $url_lines; do
|
||||
echo $url_line
|
||||
# echo $url_line
|
||||
url=$(echo "$url_line"|cut -d '(' -f2 | cut -d ')' -f1|sed 's/\.git$//')
|
||||
path=$(echo "$url_line"|cut -d':' -f1 | cut -d'/' -f2-)
|
||||
response=$(curl -L -s -o /dev/null -w "%{http_code}" "$url")
|
||||
response=$(curl -L -s -o /dev/null -w "%{http_code}" "$url")|| true
|
||||
if [ "$response" -ne 200 ]; then
|
||||
echo "**********Validation failed, try again**********"
|
||||
response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url")
|
||||
|
||||
5
.github/workflows/scripts/get_test_matrix.sh
vendored
@@ -9,12 +9,15 @@ set -e
|
||||
changed_files=$changed_files
|
||||
test_mode=$test_mode
|
||||
run_matrix="{\"include\":["
|
||||
hardware_list="xeon gaudi" # current support hardware list
|
||||
|
||||
examples=$(printf '%s\n' "${changed_files[@]}" | grep '/' | cut -d'/' -f1 | sort -u)
|
||||
for example in ${examples}; do
|
||||
cd $WORKSPACE/$example
|
||||
if [[ ! $(find . -type f | grep ${test_mode}) ]]; then continue; fi
|
||||
cd tests
|
||||
ls -l
|
||||
hardware_list=$(find . -type f -name "test_compose*_on_*.sh" | cut -d/ -f2 | cut -d. -f1 | awk -F'_on_' '{print $2}'| sort -u)
|
||||
echo "Test supported hardware list = ${hardware_list}"
|
||||
|
||||
run_hardware=""
|
||||
if [[ $(printf '%s\n' "${changed_files[@]}" | grep ${example} | cut -d'/' -f2 | grep -E '*.py|Dockerfile*|ui|docker_image_build' ) ]]; then
|
||||
|
||||
@@ -81,17 +81,13 @@ flowchart LR
|
||||
3. Hierarchical agent can further improve performance.
|
||||
Expert worker agents, such as retrieval agent, knowledge graph agent, SQL agent, etc., can provide high-quality output for different aspects of a complex query, and the supervisor agent can aggregate the information together to provide a comprehensive answer.
|
||||
|
||||
### Roadmap
|
||||
## Deployment with docker
|
||||
|
||||
- v0.9: Worker agent uses open-source websearch tool (duckduckgo), agents use OpenAI GPT-4o-mini as llm backend.
|
||||
- v1.0: Worker agent uses OPEA retrieval megaservice as tool.
|
||||
- v1.0 or later: agents use open-source llm backend.
|
||||
- v1.1 or later: add safeguards
|
||||
1. Build agent docker image
|
||||
|
||||
## Getting started
|
||||
Note: this is optional. The docker images will be automatically pulled when running the docker compose commands. This step is only needed if pulling images failed.
|
||||
|
||||
1. Build agent docker image </br>
|
||||
First, clone the opea GenAIComps repo
|
||||
First, clone the opea GenAIComps repo.
|
||||
|
||||
```
|
||||
export WORKDIR=<your-work-directory>
|
||||
@@ -106,35 +102,63 @@ flowchart LR
|
||||
docker build -t opea/agent-langchain:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/agent/langchain/Dockerfile .
|
||||
```
|
||||
|
||||
2. Launch tool services </br>
|
||||
In this example, we will use some of the mock APIs provided in the Meta CRAG KDD Challenge to demonstrate the benefits of gaining additional context from mock knowledge graphs.
|
||||
|
||||
```
|
||||
docker run -d -p=8080:8000 docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0
|
||||
```
|
||||
|
||||
3. Set up environment for this example </br>
|
||||
First, clone this repo
|
||||
2. Set up environment for this example </br>
|
||||
First, clone this repo.
|
||||
|
||||
```
|
||||
cd $WORKDIR
|
||||
git clone https://github.com/opea-project/GenAIExamples.git
|
||||
```
|
||||
|
||||
Second, set up env vars
|
||||
Second, set up env vars.
|
||||
|
||||
```
|
||||
export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
|
||||
# optional: OPANAI_API_KEY
|
||||
# for using open-source llms
|
||||
export HUGGINGFACEHUB_API_TOKEN=<your-HF-token>
|
||||
export HF_CACHE_DIR=<directory-where-llms-are-downloaded> #so that no need to redownload every time
|
||||
|
||||
# optional: OPANAI_API_KEY if you want to use OpenAI models
|
||||
export OPENAI_API_KEY=<your-openai-key>
|
||||
```
|
||||
|
||||
4. Launch agent services</br>
|
||||
The configurations of the supervisor agent and the worker agent are defined in the docker-compose yaml file. We currently use openAI GPT-4o-mini as LLM, and we plan to add support for llama3.1-70B-instruct (served by TGI-Gaudi) in a subsequent release.
|
||||
To use openai llm, run command below.
|
||||
3. Deploy the retrieval tool (i.e., DocIndexRetriever mega-service)
|
||||
|
||||
First, launch the mega-service.
|
||||
|
||||
```
|
||||
cd docker_compose/intel/cpu/xeon
|
||||
cd $WORKDIR/GenAIExamples/AgentQnA/retrieval_tool
|
||||
bash launch_retrieval_tool.sh
|
||||
```
|
||||
|
||||
Then, ingest data into the vector database. Here we provide an example. You can ingest your own data.
|
||||
|
||||
```
|
||||
bash run_ingest_data.sh
|
||||
```
|
||||
|
||||
4. Launch other tools. </br>
|
||||
In this example, we will use some of the mock APIs provided in the Meta CRAG KDD Challenge to demonstrate the benefits of gaining additional context from mock knowledge graphs.
|
||||
|
||||
```
|
||||
docker run -d -p=8080:8000 docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0
|
||||
```
|
||||
|
||||
5. Launch agent services</br>
|
||||
We provide two options for `llm_engine` of the agents: 1. open-source LLMs, 2. OpenAI models via API calls.
|
||||
|
||||
To use open-source LLMs on Gaudi2, run commands below.
|
||||
|
||||
```
|
||||
cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/hpu/gaudi
|
||||
bash launch_tgi_gaudi.sh
|
||||
bash launch_agent_service_tgi_gaudi.sh
|
||||
```
|
||||
|
||||
To use OpenAI models, run commands below.
|
||||
|
||||
```
|
||||
cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/cpu/xeon
|
||||
bash launch_agent_service_openai.sh
|
||||
```
|
||||
|
||||
@@ -143,10 +167,12 @@ flowchart LR
|
||||
First look at logs of the agent docker containers:
|
||||
|
||||
```
|
||||
docker logs docgrader-agent-endpoint
|
||||
# worker agent
|
||||
docker logs rag-agent-endpoint
|
||||
```
|
||||
|
||||
```
|
||||
# supervisor agent
|
||||
docker logs react-agent-endpoint
|
||||
```
|
||||
|
||||
@@ -170,4 +196,4 @@ curl http://${ip_address}:9090/v1/chat/completions -X POST -H "Content-Type: app
|
||||
|
||||
## How to register your own tools with agent
|
||||
|
||||
You can take a look at the tools yaml and python files in this example. For more details, please refer to the "Provide your own tools" section in the instructions [here](https://github.com/opea-project/GenAIComps/tree/main/comps/agent/langchain/README.md#5-customize-agent-strategy).
|
||||
You can take a look at the tools yaml and python files in this example. For more details, please refer to the "Provide your own tools" section in the instructions [here](https://github.com/opea-project/GenAIComps/tree/main/comps/agent/langchain/README.md).
|
||||
|
||||
3
AgentQnA/docker_compose/intel/cpu/xeon/README.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# Deployment on Xeon
|
||||
|
||||
We deploy the retrieval tool on Xeon. For LLMs, we support OpenAI models via API calls. For instructions on using open-source LLMs, please refer to the deployment guide [here](../../../../README.md).
|
||||
@@ -2,11 +2,10 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
services:
|
||||
worker-docgrader-agent:
|
||||
worker-rag-agent:
|
||||
image: opea/agent-langchain:latest
|
||||
container_name: docgrader-agent-endpoint
|
||||
container_name: rag-agent-endpoint
|
||||
volumes:
|
||||
- ${WORKDIR}/GenAIComps/comps/agent/langchain/:/home/user/comps/agent/langchain/
|
||||
- ${TOOLSET_PATH}:/home/user/tools/
|
||||
ports:
|
||||
- "9095:9095"
|
||||
@@ -36,8 +35,9 @@ services:
|
||||
supervisor-react-agent:
|
||||
image: opea/agent-langchain:latest
|
||||
container_name: react-agent-endpoint
|
||||
depends_on:
|
||||
- worker-rag-agent
|
||||
volumes:
|
||||
- ${WORKDIR}/GenAIComps/comps/agent/langchain/:/home/user/comps/agent/langchain/
|
||||
- ${TOOLSET_PATH}:/home/user/tools/
|
||||
ports:
|
||||
- "9090:9090"
|
||||
|
||||
@@ -7,7 +7,7 @@ export recursion_limit_worker=12
|
||||
export recursion_limit_supervisor=10
|
||||
export model="gpt-4o-mini-2024-07-18"
|
||||
export temperature=0
|
||||
export max_new_tokens=512
|
||||
export max_new_tokens=4096
|
||||
export OPENAI_API_KEY=${OPENAI_API_KEY}
|
||||
export WORKER_AGENT_URL="http://${ip_address}:9095/v1/chat/completions"
|
||||
export RETRIEVAL_TOOL_URL="http://${ip_address}:8889/v1/retrievaltool"
|
||||
|
||||
@@ -2,37 +2,9 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
services:
|
||||
tgi-server:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
container_name: tgi-server
|
||||
ports:
|
||||
- "8085:80"
|
||||
volumes:
|
||||
- ${HF_CACHE_DIR}:/data
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
PT_HPU_ENABLE_LAZY_COLLECTIVES: true
|
||||
ENABLE_HPU_GRAPH: true
|
||||
LIMIT_HPU_GRAPH: true
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
ipc: host
|
||||
command: --model-id ${LLM_MODEL_ID} --max-input-length 4096 --max-total-tokens 8192 --sharded true --num-shard ${NUM_SHARDS}
|
||||
worker-docgrader-agent:
|
||||
worker-rag-agent:
|
||||
image: opea/agent-langchain:latest
|
||||
container_name: docgrader-agent-endpoint
|
||||
depends_on:
|
||||
- tgi-server
|
||||
container_name: rag-agent-endpoint
|
||||
volumes:
|
||||
# - ${WORKDIR}/GenAIExamples/AgentQnA/docker_image_build/GenAIComps/comps/agent/langchain/:/home/user/comps/agent/langchain/
|
||||
- ${TOOLSET_PATH}:/home/user/tools/
|
||||
@@ -41,7 +13,7 @@ services:
|
||||
ipc: host
|
||||
environment:
|
||||
ip_address: ${ip_address}
|
||||
strategy: rag_agent
|
||||
strategy: rag_agent_llama
|
||||
recursion_limit: ${recursion_limit_worker}
|
||||
llm_engine: tgi
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
@@ -66,8 +38,7 @@ services:
|
||||
image: opea/agent-langchain:latest
|
||||
container_name: react-agent-endpoint
|
||||
depends_on:
|
||||
- tgi-server
|
||||
- worker-docgrader-agent
|
||||
- worker-rag-agent
|
||||
volumes:
|
||||
# - ${WORKDIR}/GenAIExamples/AgentQnA/docker_image_build/GenAIComps/comps/agent/langchain/:/home/user/comps/agent/langchain/
|
||||
- ${TOOLSET_PATH}:/home/user/tools/
|
||||
@@ -76,7 +47,7 @@ services:
|
||||
ipc: host
|
||||
environment:
|
||||
ip_address: ${ip_address}
|
||||
strategy: react_langgraph
|
||||
strategy: react_llama
|
||||
recursion_limit: ${recursion_limit_supervisor}
|
||||
llm_engine: tgi
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
|
||||
@@ -15,7 +15,7 @@ export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-70B-Instruct"
|
||||
export NUM_SHARDS=4
|
||||
export LLM_ENDPOINT_URL="http://${ip_address}:8085"
|
||||
export temperature=0.01
|
||||
export max_new_tokens=512
|
||||
export max_new_tokens=4096
|
||||
|
||||
# agent related environment variables
|
||||
export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
|
||||
@@ -27,17 +27,3 @@ export RETRIEVAL_TOOL_URL="http://${ip_address}:8889/v1/retrievaltool"
|
||||
export CRAG_SERVER=http://${ip_address}:8080
|
||||
|
||||
docker compose -f compose.yaml up -d
|
||||
|
||||
sleep 5s
|
||||
echo "Waiting tgi gaudi ready"
|
||||
n=0
|
||||
until [[ "$n" -ge 100 ]] || [[ $ready == true ]]; do
|
||||
docker logs tgi-server &> tgi-gaudi-service.log
|
||||
n=$((n+1))
|
||||
if grep -q Connected tgi-gaudi-service.log; then
|
||||
break
|
||||
fi
|
||||
sleep 5s
|
||||
done
|
||||
sleep 5s
|
||||
echo "Service started successfully"
|
||||
|
||||
25
AgentQnA/docker_compose/intel/hpu/gaudi/launch_tgi_gaudi.sh
Normal file
@@ -0,0 +1,25 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
# LLM related environment variables
|
||||
export HF_CACHE_DIR=${HF_CACHE_DIR}
|
||||
ls $HF_CACHE_DIR
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-70B-Instruct"
|
||||
export NUM_SHARDS=4
|
||||
|
||||
docker compose -f tgi_gaudi.yaml up -d
|
||||
|
||||
sleep 5s
|
||||
echo "Waiting tgi gaudi ready"
|
||||
n=0
|
||||
until [[ "$n" -ge 100 ]] || [[ $ready == true ]]; do
|
||||
docker logs tgi-server &> tgi-gaudi-service.log
|
||||
n=$((n+1))
|
||||
if grep -q Connected tgi-gaudi-service.log; then
|
||||
break
|
||||
fi
|
||||
sleep 5s
|
||||
done
|
||||
sleep 5s
|
||||
echo "Service started successfully"
|
||||
30
AgentQnA/docker_compose/intel/hpu/gaudi/tgi_gaudi.yaml
Normal file
@@ -0,0 +1,30 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
services:
|
||||
tgi-server:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
container_name: tgi-server
|
||||
ports:
|
||||
- "8085:80"
|
||||
volumes:
|
||||
- ${HF_CACHE_DIR}:/data
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
PT_HPU_ENABLE_LAZY_COLLECTIVES: true
|
||||
ENABLE_HPU_GRAPH: true
|
||||
LIMIT_HPU_GRAPH: true
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
ipc: host
|
||||
command: --model-id ${LLM_MODEL_ID} --max-input-length 4096 --max-total-tokens 8192 --sharded true --num-shard ${NUM_SHARDS}
|
||||
@@ -17,6 +17,12 @@ if [ ! -d "$HF_CACHE_DIR" ]; then
|
||||
fi
|
||||
ls $HF_CACHE_DIR
|
||||
|
||||
function start_tgi(){
|
||||
echo "Starting tgi-gaudi server"
|
||||
cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/hpu/gaudi
|
||||
bash launch_tgi_gaudi.sh
|
||||
|
||||
}
|
||||
|
||||
function start_agent_and_api_server() {
|
||||
echo "Starting CRAG server"
|
||||
@@ -25,6 +31,7 @@ function start_agent_and_api_server() {
|
||||
echo "Starting Agent services"
|
||||
cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/hpu/gaudi
|
||||
bash launch_agent_service_tgi_gaudi.sh
|
||||
sleep 10
|
||||
}
|
||||
|
||||
function validate() {
|
||||
@@ -43,18 +50,22 @@ function validate() {
|
||||
|
||||
function validate_agent_service() {
|
||||
echo "----------------Test agent ----------------"
|
||||
local CONTENT=$(http_proxy="" curl http://${ip_address}:9095/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
|
||||
"query": "Tell me about Michael Jackson song thriller"
|
||||
}')
|
||||
local EXIT_CODE=$(validate "$CONTENT" "Thriller" "react-agent-endpoint")
|
||||
docker logs docgrader-agent-endpoint
|
||||
# local CONTENT=$(http_proxy="" curl http://${ip_address}:9095/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
|
||||
# "query": "Tell me about Michael Jackson song thriller"
|
||||
# }')
|
||||
export agent_port="9095"
|
||||
local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py)
|
||||
local EXIT_CODE=$(validate "$CONTENT" "Thriller" "rag-agent-endpoint")
|
||||
docker logs rag-agent-endpoint
|
||||
if [ "$EXIT_CODE" == "1" ]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
local CONTENT=$(http_proxy="" curl http://${ip_address}:9090/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
|
||||
"query": "Tell me about Michael Jackson song thriller"
|
||||
}')
|
||||
# local CONTENT=$(http_proxy="" curl http://${ip_address}:9090/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
|
||||
# "query": "Tell me about Michael Jackson song thriller"
|
||||
# }')
|
||||
export agent_port="9090"
|
||||
local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py)
|
||||
local EXIT_CODE=$(validate "$CONTENT" "Thriller" "react-agent-endpoint")
|
||||
docker logs react-agent-endpoint
|
||||
if [ "$EXIT_CODE" == "1" ]; then
|
||||
@@ -64,6 +75,10 @@ function validate_agent_service() {
|
||||
}
|
||||
|
||||
function main() {
|
||||
echo "==================== Start TGI ===================="
|
||||
start_tgi
|
||||
echo "==================== TGI started ===================="
|
||||
|
||||
echo "==================== Start agent ===================="
|
||||
start_agent_and_api_server
|
||||
echo "==================== Agent started ===================="
|
||||
25
AgentQnA/tests/test.py
Normal file
@@ -0,0 +1,25 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import os
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
def generate_answer_agent_api(url, prompt):
|
||||
proxies = {"http": ""}
|
||||
payload = {
|
||||
"query": prompt,
|
||||
}
|
||||
response = requests.post(url, json=payload, proxies=proxies)
|
||||
answer = response.json()["text"]
|
||||
return answer
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
ip_address = os.getenv("ip_address", "localhost")
|
||||
agent_port = os.getenv("agent_port", "9095")
|
||||
url = f"http://{ip_address}:{agent_port}/v1/chat/completions"
|
||||
prompt = "Tell me about Michael Jackson song thriller"
|
||||
answer = generate_answer_agent_api(url, prompt)
|
||||
print(answer)
|
||||
@@ -19,7 +19,6 @@ function stop_crag() {
|
||||
|
||||
function stop_agent_docker() {
|
||||
cd $WORKPATH/docker_compose/intel/hpu/gaudi/
|
||||
# docker compose -f compose.yaml down
|
||||
container_list=$(cat compose.yaml | grep container_name | cut -d':' -f2)
|
||||
for container_name in $container_list; do
|
||||
cid=$(docker ps -aq --filter "name=$container_name")
|
||||
@@ -28,11 +27,21 @@ function stop_agent_docker() {
|
||||
done
|
||||
}
|
||||
|
||||
function stop_tgi(){
|
||||
cd $WORKPATH/docker_compose/intel/hpu/gaudi/
|
||||
container_list=$(cat tgi_gaudi.yaml | grep container_name | cut -d':' -f2)
|
||||
for container_name in $container_list; do
|
||||
cid=$(docker ps -aq --filter "name=$container_name")
|
||||
echo "Stopping container $container_name"
|
||||
if [[ ! -z "$cid" ]]; then docker rm $cid -f && sleep 1s; fi
|
||||
done
|
||||
|
||||
}
|
||||
|
||||
function stop_retrieval_tool() {
|
||||
echo "Stopping Retrieval tool"
|
||||
local RETRIEVAL_TOOL_PATH=$WORKPATH/../DocIndexRetriever
|
||||
cd $RETRIEVAL_TOOL_PATH/docker_compose/intel/cpu/xeon/
|
||||
# docker compose -f compose.yaml down
|
||||
container_list=$(cat compose.yaml | grep container_name | cut -d':' -f2)
|
||||
for container_name in $container_list; do
|
||||
cid=$(docker ps -aq --filter "name=$container_name")
|
||||
@@ -43,25 +52,26 @@ function stop_retrieval_tool() {
|
||||
echo "workpath: $WORKPATH"
|
||||
echo "=================== Stop containers ===================="
|
||||
stop_crag
|
||||
stop_tgi
|
||||
stop_agent_docker
|
||||
stop_retrieval_tool
|
||||
|
||||
cd $WORKPATH/tests
|
||||
|
||||
echo "=================== #1 Building docker images===================="
|
||||
bash 1_build_images.sh
|
||||
bash step1_build_images.sh
|
||||
echo "=================== #1 Building docker images completed===================="
|
||||
|
||||
echo "=================== #2 Start retrieval tool===================="
|
||||
bash 2_start_retrieval_tool.sh
|
||||
bash step2_start_retrieval_tool.sh
|
||||
echo "=================== #2 Retrieval tool started===================="
|
||||
|
||||
echo "=================== #3 Ingest data and validate retrieval===================="
|
||||
bash 3_ingest_data_and_validate_retrieval.sh
|
||||
bash step3_ingest_data_and_validate_retrieval.sh
|
||||
echo "=================== #3 Data ingestion and validation completed===================="
|
||||
|
||||
echo "=================== #4 Start agent and API server===================="
|
||||
bash 4_launch_and_validate_agent_tgi.sh
|
||||
bash step4_launch_and_validate_agent_tgi.sh
|
||||
echo "=================== #4 Agent test passed ===================="
|
||||
|
||||
echo "=================== #5 Stop agent and API server===================="
|
||||
@@ -70,4 +80,6 @@ stop_agent_docker
|
||||
stop_retrieval_tool
|
||||
echo "=================== #5 Agent and API server stopped===================="
|
||||
|
||||
echo y | docker system prune
|
||||
|
||||
echo "ALL DONE!"
|
||||
|
||||
@@ -25,7 +25,7 @@ get_billboard_rank_date:
|
||||
args_schema:
|
||||
rank:
|
||||
type: int
|
||||
description: song name
|
||||
description: the rank of interest, for example 1 for top 1
|
||||
date:
|
||||
type: str
|
||||
description: date
|
||||
|
||||
@@ -12,16 +12,31 @@ def search_knowledge_base(query: str) -> str:
|
||||
print(url)
|
||||
proxies = {"http": ""}
|
||||
payload = {
|
||||
"text": query,
|
||||
"messages": query,
|
||||
}
|
||||
response = requests.post(url, json=payload, proxies=proxies)
|
||||
print(response)
|
||||
docs = response.json()["documents"]
|
||||
context = ""
|
||||
for i, doc in enumerate(docs):
|
||||
if i == 0:
|
||||
context = doc
|
||||
else:
|
||||
context += "\n" + doc
|
||||
print(context)
|
||||
return context
|
||||
if "documents" in response.json():
|
||||
docs = response.json()["documents"]
|
||||
context = ""
|
||||
for i, doc in enumerate(docs):
|
||||
if i == 0:
|
||||
context = doc
|
||||
else:
|
||||
context += "\n" + doc
|
||||
# print(context)
|
||||
return context
|
||||
elif "text" in response.json():
|
||||
return response.json()["text"]
|
||||
elif "reranked_docs" in response.json():
|
||||
docs = response.json()["reranked_docs"]
|
||||
context = ""
|
||||
for i, doc in enumerate(docs):
|
||||
if i == 0:
|
||||
context = doc["text"]
|
||||
else:
|
||||
context += "\n" + doc["text"]
|
||||
# print(context)
|
||||
return context
|
||||
else:
|
||||
return "Error parsing response from the knowledge base."
|
||||
|
||||
@@ -36,9 +36,9 @@ Evaluate the performance with the LLM:
|
||||
|
||||
```py
|
||||
# validate the offline model
|
||||
# python offline_evaluate.py
|
||||
# python offline_eval.py
|
||||
# validate the online asr microservice accuracy
|
||||
python online_evaluate.py
|
||||
python online_eval.py
|
||||
```
|
||||
|
||||
### Performance Result
|
||||
|
||||
@@ -2,4 +2,4 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
python online_evaluate.py
|
||||
python online_eval.py
|
||||
|
||||
@@ -41,7 +41,7 @@ services:
|
||||
environment:
|
||||
TTS_ENDPOINT: ${TTS_ENDPOINT}
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
|
||||
image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
|
||||
container_name: tgi-service
|
||||
ports:
|
||||
- "3006:80"
|
||||
|
||||
@@ -26,7 +26,7 @@ services:
|
||||
https_proxy: ${https_proxy}
|
||||
restart: unless-stopped
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
|
||||
image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
|
||||
container_name: tgi-service
|
||||
ports:
|
||||
- "3006:80"
|
||||
|
||||
@@ -7,14 +7,14 @@
|
||||
|
||||
## Deploy On Xeon
|
||||
```
|
||||
cd GenAIExamples/AudioQnA/kubernetes/intel/cpu/xeon/manifests
|
||||
cd GenAIExamples/AudioQnA/kubernetes/intel/cpu/xeon/manifest
|
||||
export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
|
||||
sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" audioqna.yaml
|
||||
kubectl apply -f audioqna.yaml
|
||||
```
|
||||
## Deploy On Gaudi
|
||||
```
|
||||
cd GenAIExamples/AudioQnA/kubernetes/intel/hpu/gaudi/manifests
|
||||
cd GenAIExamples/AudioQnA/kubernetes/intel/hpu/gaudi/manifest
|
||||
export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
|
||||
sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" audioqna.yaml
|
||||
kubectl apply -f audioqna.yaml
|
||||
|
||||
@@ -247,7 +247,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: audio-qna-config
|
||||
image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
|
||||
image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
|
||||
name: llm-dependency-deploy-demo
|
||||
securityContext:
|
||||
capabilities:
|
||||
|
||||
8
AvatarChatbot/.gitignore
vendored
Normal file
@@ -0,0 +1,8 @@
|
||||
*.safetensors
|
||||
*.bin
|
||||
*.model
|
||||
*.log
|
||||
docker_compose/intel/cpu/xeon/data
|
||||
docker_compose/intel/hpu/gaudi/data
|
||||
inputs/
|
||||
outputs/
|
||||
33
AvatarChatbot/Dockerfile
Normal file
@@ -0,0 +1,33 @@
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
FROM python:3.11-slim
|
||||
|
||||
RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
|
||||
libgl1-mesa-glx \
|
||||
libjemalloc-dev \
|
||||
vim \
|
||||
git
|
||||
|
||||
RUN useradd -m -s /bin/bash user && \
|
||||
mkdir -p /home/user && \
|
||||
chown -R user /home/user/
|
||||
|
||||
WORKDIR /home/user/
|
||||
RUN git clone https://github.com/opea-project/GenAIComps.git
|
||||
WORKDIR /home/user/GenAIComps
|
||||
|
||||
RUN pip install --no-cache-dir --upgrade pip && \
|
||||
pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt
|
||||
|
||||
COPY ./avatarchatbot.py /home/user/avatarchatbot.py
|
||||
|
||||
ENV PYTHONPATH=$PYTHONPATH:/home/user/GenAIComps
|
||||
|
||||
USER user
|
||||
|
||||
WORKDIR /home/user
|
||||
|
||||
ENTRYPOINT ["python", "avatarchatbot.py"]
|
||||
105
AvatarChatbot/README.md
Normal file
@@ -0,0 +1,105 @@
|
||||
# AvatarChatbot Application
|
||||
|
||||
The AvatarChatbot service can be effortlessly deployed on either Intel Gaudi2 or Intel XEON Scalable Processors.
|
||||
|
||||
## AI Avatar Workflow
|
||||
|
||||
The AI Avatar example is implemented using both megaservices and the component-level microservices defined in [GenAIComps](https://github.com/opea-project/GenAIComps). The flow chart below shows the information flow between different megaservices and microservices for this example.
|
||||
|
||||
```mermaid
|
||||
---
|
||||
config:
|
||||
flowchart:
|
||||
nodeSpacing: 100
|
||||
rankSpacing: 100
|
||||
curve: linear
|
||||
themeVariables:
|
||||
fontSize: 42px
|
||||
---
|
||||
flowchart LR
|
||||
classDef blue fill:#ADD8E6,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
|
||||
classDef thistle fill:#D8BFD8,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
|
||||
classDef orange fill:#FBAA60,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
|
||||
classDef orchid fill:#C26DBC,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
|
||||
classDef invisible fill:transparent,stroke:transparent;
|
||||
style AvatarChatbot-Megaservice stroke:#000000
|
||||
|
||||
subgraph AvatarChatbot-Megaservice["AvatarChatbot Megaservice"]
|
||||
direction LR
|
||||
ASR([ASR Microservice]):::blue
|
||||
LLM([LLM Microservice]):::blue
|
||||
TTS([TTS Microservice]):::blue
|
||||
animation([Animation Microservice]):::blue
|
||||
end
|
||||
subgraph UserInterface["User Interface"]
|
||||
direction LR
|
||||
invis1[ ]:::invisible
|
||||
USER1([User Audio Query]):::orchid
|
||||
USER2([User Image/Video Query]):::orchid
|
||||
UI([UI server<br>]):::orchid
|
||||
end
|
||||
GW([AvatarChatbot GateWay<br>]):::orange
|
||||
subgraph .
|
||||
direction LR
|
||||
X([OPEA Microservice]):::blue
|
||||
Y{{Open Source Service}}:::thistle
|
||||
Z([OPEA Gateway]):::orange
|
||||
Z1([UI]):::orchid
|
||||
end
|
||||
|
||||
WHISPER{{Whisper service}}:::thistle
|
||||
TGI{{LLM service}}:::thistle
|
||||
T5{{Speecht5 service}}:::thistle
|
||||
WAV2LIP{{Wav2Lip service}}:::thistle
|
||||
|
||||
%% Connections %%
|
||||
direction LR
|
||||
USER1 -->|1| UI
|
||||
UI -->|2| GW
|
||||
GW <==>|3| AvatarChatbot-Megaservice
|
||||
ASR ==>|4| LLM ==>|5| TTS ==>|6| animation
|
||||
|
||||
direction TB
|
||||
ASR <-.->|3'| WHISPER
|
||||
LLM <-.->|4'| TGI
|
||||
TTS <-.->|5'| T5
|
||||
animation <-.->|6'| WAV2LIP
|
||||
|
||||
USER2 -->|1| UI
|
||||
UI <-.->|6'| WAV2LIP
|
||||
```
|
||||
|
||||
## Deploy AvatarChatbot Service
|
||||
|
||||
The AvatarChatbot service can be deployed on either Intel Gaudi2 AI Accelerator or Intel Xeon Scalable Processor.
|
||||
|
||||
### Deploy AvatarChatbot on Gaudi
|
||||
|
||||
Refer to the [Gaudi Guide](./docker_compose/intel/hpu/gaudi/README.md) for instructions on deploying AvatarChatbot on Gaudi, and on setting up an UI for the application.
|
||||
|
||||
### Deploy AvatarChatbot on Xeon
|
||||
|
||||
Refer to the [Xeon Guide](./docker_compose/intel/cpu/xeon/README.md) for instructions on deploying AvatarChatbot on Xeon.
|
||||
|
||||
## Supported Models
|
||||
|
||||
### ASR
|
||||
|
||||
The default model is [openai/whisper-small](https://huggingface.co/openai/whisper-small). It also supports all models in the Whisper family, such as `openai/whisper-large-v3`, `openai/whisper-medium`, `openai/whisper-base`, `openai/whisper-tiny`, etc.
|
||||
|
||||
To replace the model, please edit the `compose.yaml` and add the `command` line to pass the name of the model you want to use:
|
||||
|
||||
```yaml
|
||||
services:
|
||||
whisper-service:
|
||||
...
|
||||
command: --model_name_or_path openai/whisper-tiny
|
||||
```
|
||||
|
||||
### TTS
|
||||
|
||||
The default model is [microsoft/SpeechT5](https://huggingface.co/microsoft/speecht5_tts). We currently do not support replacing the model. More models under the commercial license will be added in the future.
|
||||
|
||||
### Animation
|
||||
|
||||
The default model is [Rudrabha/Wav2Lip](https://github.com/Rudrabha/Wav2Lip) and [TencentARC/GFPGAN](https://github.com/TencentARC/GFPGAN). We currently do not support replacing the model. More models under the commercial license such as [OpenTalker/SadTalker](https://github.com/OpenTalker/SadTalker) will be added in the future.
|
||||
BIN
AvatarChatbot/assets/audio/eg3_ref.wav
Normal file
3
AvatarChatbot/assets/audio/sample_minecraft.json
Normal file
3
AvatarChatbot/assets/audio/sample_question.json
Normal file
4
AvatarChatbot/assets/audio/sample_whoareyou.json
Normal file
BIN
AvatarChatbot/assets/img/UI.png
Normal file
|
After Width: | Height: | Size: 595 KiB |
BIN
AvatarChatbot/assets/img/avatar1.jpg
Normal file
|
After Width: | Height: | Size: 148 KiB |
BIN
AvatarChatbot/assets/img/avatar2.jpg
Normal file
|
After Width: | Height: | Size: 158 KiB |
BIN
AvatarChatbot/assets/img/avatar3.png
Normal file
|
After Width: | Height: | Size: 2.5 MiB |
BIN
AvatarChatbot/assets/img/avatar4.png
Normal file
|
After Width: | Height: | Size: 992 KiB |
BIN
AvatarChatbot/assets/img/avatar5.png
Normal file
|
After Width: | Height: | Size: 1.7 MiB |
BIN
AvatarChatbot/assets/img/avatar6.png
Normal file
|
After Width: | Height: | Size: 1.6 MiB |
BIN
AvatarChatbot/assets/img/design.png
Normal file
|
After Width: | Height: | Size: 169 KiB |
BIN
AvatarChatbot/assets/img/flowchart.png
Normal file
|
After Width: | Height: | Size: 121 KiB |
BIN
AvatarChatbot/assets/img/gaudi.png
Normal file
|
After Width: | Height: | Size: 47 KiB |
BIN
AvatarChatbot/assets/img/opea_gh_qr.png
Normal file
|
After Width: | Height: | Size: 20 KiB |
BIN
AvatarChatbot/assets/img/opea_qr.png
Normal file
|
After Width: | Height: | Size: 25 KiB |
BIN
AvatarChatbot/assets/img/xeon.jpg
Normal file
|
After Width: | Height: | Size: 22 KiB |
BIN
AvatarChatbot/assets/outputs/result_max_tokens_1024.mp4
Normal file
BIN
AvatarChatbot/assets/outputs/result_max_tokens_64.mp4
Normal file
93
AvatarChatbot/avatarchatbot.py
Normal file
@@ -0,0 +1,93 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
from comps import AvatarChatbotGateway, MicroService, ServiceOrchestrator, ServiceType
|
||||
|
||||
MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "0.0.0.0")
|
||||
MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))
|
||||
ASR_SERVICE_HOST_IP = os.getenv("ASR_SERVICE_HOST_IP", "0.0.0.0")
|
||||
ASR_SERVICE_PORT = int(os.getenv("ASR_SERVICE_PORT", 9099))
|
||||
LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0")
|
||||
LLM_SERVICE_PORT = int(os.getenv("LLM_SERVICE_PORT", 9000))
|
||||
TTS_SERVICE_HOST_IP = os.getenv("TTS_SERVICE_HOST_IP", "0.0.0.0")
|
||||
TTS_SERVICE_PORT = int(os.getenv("TTS_SERVICE_PORT", 9088))
|
||||
ANIMATION_SERVICE_HOST_IP = os.getenv("ANIMATION_SERVICE_HOST_IP", "0.0.0.0")
|
||||
ANIMATION_SERVICE_PORT = int(os.getenv("ANIMATION_SERVICE_PORT", 9066))
|
||||
|
||||
|
||||
def check_env_vars(env_var_list):
|
||||
for var in env_var_list:
|
||||
if os.getenv(var) is None:
|
||||
print(f"Error: The environment variable '{var}' is not set.")
|
||||
sys.exit(1) # Exit the program with a non-zero status code
|
||||
print("All environment variables are set.")
|
||||
|
||||
|
||||
class AvatarChatbotService:
|
||||
def __init__(self, host="0.0.0.0", port=8000):
|
||||
self.host = host
|
||||
self.port = port
|
||||
self.megaservice = ServiceOrchestrator()
|
||||
|
||||
def add_remote_service(self):
|
||||
asr = MicroService(
|
||||
name="asr",
|
||||
host=ASR_SERVICE_HOST_IP,
|
||||
port=ASR_SERVICE_PORT,
|
||||
endpoint="/v1/audio/transcriptions",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.ASR,
|
||||
)
|
||||
llm = MicroService(
|
||||
name="llm",
|
||||
host=LLM_SERVICE_HOST_IP,
|
||||
port=LLM_SERVICE_PORT,
|
||||
endpoint="/v1/chat/completions",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.LLM,
|
||||
)
|
||||
tts = MicroService(
|
||||
name="tts",
|
||||
host=TTS_SERVICE_HOST_IP,
|
||||
port=TTS_SERVICE_PORT,
|
||||
endpoint="/v1/audio/speech",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.TTS,
|
||||
)
|
||||
animation = MicroService(
|
||||
name="animation",
|
||||
host=ANIMATION_SERVICE_HOST_IP,
|
||||
port=ANIMATION_SERVICE_PORT,
|
||||
endpoint="/v1/animation",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.ANIMATION,
|
||||
)
|
||||
self.megaservice.add(asr).add(llm).add(tts).add(animation)
|
||||
self.megaservice.flow_to(asr, llm)
|
||||
self.megaservice.flow_to(llm, tts)
|
||||
self.megaservice.flow_to(tts, animation)
|
||||
self.gateway = AvatarChatbotGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
check_env_vars(
|
||||
[
|
||||
"MEGA_SERVICE_HOST_IP",
|
||||
"MEGA_SERVICE_PORT",
|
||||
"ASR_SERVICE_HOST_IP",
|
||||
"ASR_SERVICE_PORT",
|
||||
"LLM_SERVICE_HOST_IP",
|
||||
"LLM_SERVICE_PORT",
|
||||
"TTS_SERVICE_HOST_IP",
|
||||
"TTS_SERVICE_PORT",
|
||||
"ANIMATION_SERVICE_HOST_IP",
|
||||
"ANIMATION_SERVICE_PORT",
|
||||
]
|
||||
)
|
||||
|
||||
avatarchatbot = AvatarChatbotService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT)
|
||||
avatarchatbot.add_remote_service()
|
||||
210
AvatarChatbot/docker_compose/intel/cpu/xeon/README.md
Normal file
@@ -0,0 +1,210 @@
|
||||
# Build Mega Service of AvatarChatbot on Xeon
|
||||
|
||||
This document outlines the deployment process for a AvatarChatbot application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel Xeon server.
|
||||
|
||||
## 🚀 Build Docker images
|
||||
|
||||
### 1. Source Code install GenAIComps
|
||||
|
||||
```bash
|
||||
git clone https://github.com/opea-project/GenAIComps.git
|
||||
cd GenAIComps
|
||||
```
|
||||
|
||||
### 2. Build ASR Image
|
||||
|
||||
```bash
|
||||
docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/dependency/Dockerfile .
|
||||
|
||||
|
||||
docker build -t opea/asr:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/Dockerfile .
|
||||
```
|
||||
|
||||
### 3. Build LLM Image
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/tgi/Dockerfile .
|
||||
```
|
||||
|
||||
### 4. Build TTS Image
|
||||
|
||||
```bash
|
||||
docker build -t opea/speecht5:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/speecht5/dependency/Dockerfile .
|
||||
|
||||
docker build -t opea/tts:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/speecht5/Dockerfile .
|
||||
```
|
||||
|
||||
### 5. Build Animation Image
|
||||
|
||||
```bash
|
||||
docker build -t opea/wav2lip:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/animation/wav2lip/dependency/Dockerfile .
|
||||
|
||||
docker build -t opea/animation:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/animation/wav2lip/Dockerfile .
|
||||
```
|
||||
|
||||
### 6. Build MegaService Docker Image
|
||||
|
||||
To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `audioqna.py` Python script. Build the MegaService Docker image using the command below:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/opea-project/GenAIExamples.git
|
||||
cd GenAIExamples/AvatarChatbot/
|
||||
docker build --no-cache -t opea/avatarchatbot:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
|
||||
```
|
||||
|
||||
Then run the command `docker images`, you will have following images ready:
|
||||
|
||||
1. `opea/whisper:latest`
|
||||
2. `opea/asr:latest`
|
||||
3. `opea/llm-tgi:latest`
|
||||
4. `opea/speecht5:latest`
|
||||
5. `opea/tts:latest`
|
||||
6. `opea/wav2lip:latest`
|
||||
7. `opea/animation:latest`
|
||||
8. `opea/avatarchatbot:latest`
|
||||
|
||||
## 🚀 Set the environment variables
|
||||
|
||||
Before starting the services with `docker compose`, you have to recheck the following environment variables.
|
||||
|
||||
```bash
|
||||
export HUGGINGFACEHUB_API_TOKEN=<your_hf_token>
|
||||
export host_ip=$(hostname -I | awk '{print $1}')
|
||||
|
||||
export TGI_LLM_ENDPOINT=http://$host_ip:3006
|
||||
export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
|
||||
|
||||
export ASR_ENDPOINT=http://$host_ip:7066
|
||||
export TTS_ENDPOINT=http://$host_ip:7055
|
||||
export WAV2LIP_ENDPOINT=http://$host_ip:7860
|
||||
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export ASR_SERVICE_HOST_IP=${host_ip}
|
||||
export TTS_SERVICE_HOST_IP=${host_ip}
|
||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
||||
export ANIMATION_SERVICE_HOST_IP=${host_ip}
|
||||
|
||||
export MEGA_SERVICE_PORT=8888
|
||||
export ASR_SERVICE_PORT=3001
|
||||
export TTS_SERVICE_PORT=3002
|
||||
export LLM_SERVICE_PORT=3007
|
||||
export ANIMATION_SERVICE_PORT=3008
|
||||
```
|
||||
|
||||
- Xeon CPU
|
||||
|
||||
```bash
|
||||
export DEVICE="cpu"
|
||||
export WAV2LIP_PORT=7860
|
||||
export INFERENCE_MODE='wav2lip_only'
|
||||
export CHECKPOINT_PATH='/usr/local/lib/python3.11/site-packages/Wav2Lip/checkpoints/wav2lip_gan.pth'
|
||||
export FACE="assets/img/avatar1.jpg"
|
||||
# export AUDIO='assets/audio/eg3_ref.wav' # audio file path is optional, will use base64str in the post request as input if is 'None'
|
||||
export AUDIO='None'
|
||||
export FACESIZE=96
|
||||
export OUTFILE="/outputs/result.mp4"
|
||||
export GFPGAN_MODEL_VERSION=1.4 # latest version, can roll back to v1.3 if needed
|
||||
export UPSCALE_FACTOR=1
|
||||
export FPS=10
|
||||
```
|
||||
|
||||
## 🚀 Start the MegaService
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/AvatarChatbot/docker_compose/intel/cpu/xeon/
|
||||
docker compose -f compose.yaml up -d
|
||||
```
|
||||
|
||||
## 🚀 Test MicroServices
|
||||
|
||||
```bash
|
||||
# whisper service
|
||||
curl http://${host_ip}:7066/v1/asr \
|
||||
-X POST \
|
||||
-d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# asr microservice
|
||||
curl http://${host_ip}:3001/v1/audio/transcriptions \
|
||||
-X POST \
|
||||
-d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# tgi service
|
||||
curl http://${host_ip}:3006/generate \
|
||||
-X POST \
|
||||
-d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# llm microservice
|
||||
curl http://${host_ip}:3007/v1/chat/completions\
|
||||
-X POST \
|
||||
-d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":false}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# speecht5 service
|
||||
curl http://${host_ip}:7055/v1/tts \
|
||||
-X POST \
|
||||
-d '{"text": "Who are you?"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# tts microservice
|
||||
curl http://${host_ip}:3002/v1/audio/speech \
|
||||
-X POST \
|
||||
-d '{"text": "Who are you?"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# wav2lip service
|
||||
cd ../../../..
|
||||
curl http://${host_ip}:7860/v1/wav2lip \
|
||||
-X POST \
|
||||
-d @assets/audio/sample_minecraft.json \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# animation microservice
|
||||
curl http://${host_ip}:3008/v1/animation \
|
||||
-X POST \
|
||||
-d @assets/audio/sample_question.json \
|
||||
-H "Content-Type: application/json"
|
||||
|
||||
```
|
||||
|
||||
## 🚀 Test MegaService
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:3009/v1/avatarchatbot \
|
||||
-X POST \
|
||||
-d @assets/audio/sample_whoareyou.json \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
If the megaservice is running properly, you should see the following output:
|
||||
|
||||
```bash
|
||||
"/outputs/result.mp4"
|
||||
```
|
||||
|
||||
The output file will be saved in the current working directory, as `${PWD}` is mapped to `/outputs` inside the wav2lip-service Docker container.
|
||||
|
||||
## Gradio UI
|
||||
|
||||
```bash
|
||||
cd $WORKPATH/GenAIExamples/AvatarChatbot
|
||||
python3 ui/gradio/app_gradio_demo_avatarchatbot.py
|
||||
```
|
||||
|
||||
The UI can be viewed at http://${host_ip}:7861
|
||||
<img src="../../../../assets/img/UI.png" alt="UI Example" width="60%">
|
||||
In the current version v1.0, you need to set the avatar figure image/video and the DL model choice in the environment variables before starting AvatarChatbot backend service and running the UI. Please just customize the audio question in the UI.
|
||||
\*\* We will enable change of avatar figure between runs in v2.0
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/AvatarChatbot/tests
|
||||
export IMAGE_REPO="opea"
|
||||
export IMAGE_TAG="latest"
|
||||
export HUGGINGFACEHUB_API_TOKEN=<your_hf_token>
|
||||
|
||||
test_avatarchatbot_on_xeon.sh
|
||||
```
|
||||
138
AvatarChatbot/docker_compose/intel/cpu/xeon/compose.yaml
Normal file
@@ -0,0 +1,138 @@
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
services:
|
||||
whisper-service:
|
||||
image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
|
||||
container_name: whisper-service
|
||||
ports:
|
||||
- "7066:7066"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
restart: unless-stopped
|
||||
asr:
|
||||
image: ${REGISTRY:-opea}/asr:${TAG:-latest}
|
||||
container_name: asr-service
|
||||
ports:
|
||||
- "3001:9099"
|
||||
ipc: host
|
||||
environment:
|
||||
ASR_ENDPOINT: ${ASR_ENDPOINT}
|
||||
speecht5-service:
|
||||
image: ${REGISTRY:-opea}/speecht5:${TAG:-latest}
|
||||
container_name: speecht5-service
|
||||
ports:
|
||||
- "7055:7055"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
restart: unless-stopped
|
||||
tts:
|
||||
image: ${REGISTRY:-opea}/tts:${TAG:-latest}
|
||||
container_name: tts-service
|
||||
ports:
|
||||
- "3002:9088"
|
||||
ipc: host
|
||||
environment:
|
||||
TTS_ENDPOINT: ${TTS_ENDPOINT}
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
|
||||
container_name: tgi-service
|
||||
ports:
|
||||
- "3006:80"
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
shm_size: 1g
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
|
||||
llm:
|
||||
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
|
||||
container_name: llm-tgi-server
|
||||
depends_on:
|
||||
- tgi-service
|
||||
ports:
|
||||
- "3007:9000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
restart: unless-stopped
|
||||
wav2lip-service:
|
||||
image: ${REGISTRY:-opea}/wav2lip:${TAG:-latest}
|
||||
container_name: wav2lip-service
|
||||
ports:
|
||||
- "7860:7860"
|
||||
ipc: host
|
||||
volumes:
|
||||
- ${PWD}:/outputs
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
DEVICE: ${DEVICE}
|
||||
INFERENCE_MODE: ${INFERENCE_MODE}
|
||||
CHECKPOINT_PATH: ${CHECKPOINT_PATH}
|
||||
FACE: ${FACE}
|
||||
AUDIO: ${AUDIO}
|
||||
FACESIZE: ${FACESIZE}
|
||||
OUTFILE: ${OUTFILE}
|
||||
GFPGAN_MODEL_VERSION: ${GFPGAN_MODEL_VERSION}
|
||||
UPSCALE_FACTOR: ${UPSCALE_FACTOR}
|
||||
FPS: ${FPS}
|
||||
WAV2LIP_PORT: ${WAV2LIP_PORT}
|
||||
restart: unless-stopped
|
||||
animation:
|
||||
image: ${REGISTRY:-opea}/animation:${TAG:-latest}
|
||||
container_name: animation-server
|
||||
ports:
|
||||
- "3008:9066"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
WAV2LIP_ENDPOINT: ${WAV2LIP_ENDPOINT}
|
||||
restart: unless-stopped
|
||||
avatarchatbot-xeon-backend-server:
|
||||
image: ${REGISTRY:-opea}/avatarchatbot:${TAG:-latest}
|
||||
container_name: avatarchatbot-xeon-backend-server
|
||||
depends_on:
|
||||
- asr
|
||||
- llm
|
||||
- tts
|
||||
- animation
|
||||
ports:
|
||||
- "3009:8888"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
|
||||
- MEGA_SERVICE_PORT=${MEGA_SERVICE_PORT}
|
||||
- ASR_SERVICE_HOST_IP=${ASR_SERVICE_HOST_IP}
|
||||
- ASR_SERVICE_PORT=${ASR_SERVICE_PORT}
|
||||
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
|
||||
- LLM_SERVICE_PORT=${LLM_SERVICE_PORT}
|
||||
- TTS_SERVICE_HOST_IP=${TTS_SERVICE_HOST_IP}
|
||||
- TTS_SERVICE_PORT=${TTS_SERVICE_PORT}
|
||||
- ANIMATION_SERVICE_HOST_IP=${ANIMATION_SERVICE_HOST_IP}
|
||||
- ANIMATION_SERVICE_PORT=${ANIMATION_SERVICE_PORT}
|
||||
ipc: host
|
||||
restart: always
|
||||
|
||||
networks:
|
||||
default:
|
||||
driver: bridge
|
||||
220
AvatarChatbot/docker_compose/intel/hpu/gaudi/README.md
Normal file
@@ -0,0 +1,220 @@
|
||||
# Build Mega Service of AvatarChatbot on Gaudi
|
||||
|
||||
This document outlines the deployment process for a AvatarChatbot application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel Gaudi server.
|
||||
|
||||
## 🚀 Build Docker images
|
||||
|
||||
### 1. Source Code install GenAIComps
|
||||
|
||||
```bash
|
||||
git clone https://github.com/opea-project/GenAIComps.git
|
||||
cd GenAIComps
|
||||
```
|
||||
|
||||
### 2. Build ASR Image
|
||||
|
||||
```bash
|
||||
docker build -t opea/whisper-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/dependency/Dockerfile.intel_hpu .
|
||||
|
||||
|
||||
docker build -t opea/asr:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/Dockerfile .
|
||||
```
|
||||
|
||||
### 3. Build LLM Image
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/tgi/Dockerfile .
|
||||
```
|
||||
|
||||
### 4. Build TTS Image
|
||||
|
||||
```bash
|
||||
docker build -t opea/speecht5-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/speecht5/dependency/Dockerfile.intel_hpu .
|
||||
|
||||
docker build -t opea/tts:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/speecht5/Dockerfile .
|
||||
```
|
||||
|
||||
### 5. Build Animation Image
|
||||
|
||||
```bash
|
||||
docker build -t opea/wav2lip-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/animation/wav2lip/dependency/Dockerfile.intel_hpu .
|
||||
|
||||
docker build -t opea/animation:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/animation/wav2lip/Dockerfile .
|
||||
```
|
||||
|
||||
### 6. Build MegaService Docker Image
|
||||
|
||||
To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `audioqna.py` Python script. Build the MegaService Docker image using the command below:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/opea-project/GenAIExamples.git
|
||||
cd GenAIExamples/AvatarChatbot/
|
||||
docker build --no-cache -t opea/avatarchatbot:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
|
||||
```
|
||||
|
||||
Then run the command `docker images`, you will have following images ready:
|
||||
|
||||
1. `opea/whisper-gaudi:latest`
|
||||
2. `opea/asr:latest`
|
||||
3. `opea/llm-tgi:latest`
|
||||
4. `opea/speecht5-gaudi:latest`
|
||||
5. `opea/tts:latest`
|
||||
6. `opea/wav2lip-gaudi:latest`
|
||||
7. `opea/animation:latest`
|
||||
8. `opea/avatarchatbot:latest`
|
||||
|
||||
## 🚀 Set the environment variables
|
||||
|
||||
Before starting the services with `docker compose`, you have to recheck the following environment variables.
|
||||
|
||||
```bash
|
||||
export HUGGINGFACEHUB_API_TOKEN=<your_hf_token>
|
||||
export host_ip=$(hostname -I | awk '{print $1}')
|
||||
|
||||
export TGI_LLM_ENDPOINT=http://$host_ip:3006
|
||||
export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
|
||||
|
||||
export ASR_ENDPOINT=http://$host_ip:7066
|
||||
export TTS_ENDPOINT=http://$host_ip:7055
|
||||
export WAV2LIP_ENDPOINT=http://$host_ip:7860
|
||||
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export ASR_SERVICE_HOST_IP=${host_ip}
|
||||
export TTS_SERVICE_HOST_IP=${host_ip}
|
||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
||||
export ANIMATION_SERVICE_HOST_IP=${host_ip}
|
||||
|
||||
export MEGA_SERVICE_PORT=8888
|
||||
export ASR_SERVICE_PORT=3001
|
||||
export TTS_SERVICE_PORT=3002
|
||||
export LLM_SERVICE_PORT=3007
|
||||
export ANIMATION_SERVICE_PORT=3008
|
||||
```
|
||||
|
||||
- Gaudi2 HPU
|
||||
|
||||
```bash
|
||||
export DEVICE="hpu"
|
||||
export WAV2LIP_PORT=7860
|
||||
export INFERENCE_MODE='wav2lip_only'
|
||||
export CHECKPOINT_PATH='/usr/local/lib/python3.10/dist-packages/Wav2Lip/checkpoints/wav2lip_gan.pth'
|
||||
export FACE="assets/img/avatar1.jpg"
|
||||
# export AUDIO='assets/audio/eg3_ref.wav' # audio file path is optional, will use base64str in the post request as input if is 'None'
|
||||
export AUDIO='None'
|
||||
export FACESIZE=96
|
||||
export OUTFILE="/outputs/result.mp4"
|
||||
export GFPGAN_MODEL_VERSION=1.4 # latest version, can roll back to v1.3 if needed
|
||||
export UPSCALE_FACTOR=1
|
||||
export FPS=10
|
||||
```
|
||||
|
||||
## 🚀 Start the MegaService
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/AvatarChatbot/docker_compose/intel/hpu/gaudi/
|
||||
docker compose -f compose.yaml up -d
|
||||
```
|
||||
|
||||
## 🚀 Test MicroServices
|
||||
|
||||
```bash
|
||||
# whisper service
|
||||
curl http://${host_ip}:7066/v1/asr \
|
||||
-X POST \
|
||||
-d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# asr microservice
|
||||
curl http://${host_ip}:3001/v1/audio/transcriptions \
|
||||
-X POST \
|
||||
-d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# tgi service
|
||||
curl http://${host_ip}:3006/generate \
|
||||
-X POST \
|
||||
-d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# llm microservice
|
||||
curl http://${host_ip}:3007/v1/chat/completions\
|
||||
-X POST \
|
||||
-d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":false}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# speecht5 service
|
||||
curl http://${host_ip}:7055/v1/tts \
|
||||
-X POST \
|
||||
-d '{"text": "Who are you?"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# tts microservice
|
||||
curl http://${host_ip}:3002/v1/audio/speech \
|
||||
-X POST \
|
||||
-d '{"text": "Who are you?"}' \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# wav2lip service
|
||||
cd ../../../..
|
||||
curl http://${host_ip}:7860/v1/wav2lip \
|
||||
-X POST \
|
||||
-d @assets/audio/sample_minecraft.json \
|
||||
-H 'Content-Type: application/json'
|
||||
|
||||
# animation microservice
|
||||
curl http://${host_ip}:3008/v1/animation \
|
||||
-X POST \
|
||||
-d @assets/audio/sample_question.json \
|
||||
-H "Content-Type: application/json"
|
||||
|
||||
```
|
||||
|
||||
## 🚀 Test MegaService
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:3009/v1/avatarchatbot \
|
||||
-X POST \
|
||||
-d @assets/audio/sample_whoareyou.json \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
If the megaservice is running properly, you should see the following output:
|
||||
|
||||
```bash
|
||||
"/outputs/result.mp4"
|
||||
```
|
||||
|
||||
The output file will be saved in the current working directory, as `${PWD}` is mapped to `/outputs` inside the wav2lip-service Docker container.
|
||||
|
||||
## Gradio UI
|
||||
|
||||
```bash
|
||||
sudo apt update
|
||||
sudo apt install -y yasm pkg-config libx264-dev nasm
|
||||
cd $WORKPATH
|
||||
git clone https://github.com/FFmpeg/FFmpeg.git
|
||||
cd FFmpeg
|
||||
sudo ./configure --enable-gpl --enable-libx264 && sudo make -j$(nproc-1) && sudo make install && hash -r
|
||||
pip install gradio==4.38.1 soundfile
|
||||
```
|
||||
|
||||
```bash
|
||||
cd $WORKPATH/GenAIExamples/AvatarChatbot
|
||||
python3 ui/gradio/app_gradio_demo_avatarchatbot.py
|
||||
```
|
||||
|
||||
The UI can be viewed at http://${host_ip}:7861
|
||||
<img src="../../../../assets/img/UI.png" alt="UI Example" width="60%">
|
||||
In the current version v1.0, you need to set the avatar figure image/video and the DL model choice in the environment variables before starting AvatarChatbot backend service and running the UI. Please just customize the audio question in the UI.
|
||||
\*\* We will enable change of avatar figure between runs in v2.0
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/AvatarChatbot/tests
|
||||
export IMAGE_REPO="opea"
|
||||
export IMAGE_TAG="latest"
|
||||
export HUGGINGFACEHUB_API_TOKEN=<your_hf_token>
|
||||
|
||||
test_avatarchatbot_on_gaudi.sh
|
||||
```
|
||||
171
AvatarChatbot/docker_compose/intel/hpu/gaudi/compose.yaml
Normal file
@@ -0,0 +1,171 @@
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
version: "3.8"
|
||||
|
||||
services:
|
||||
whisper-service:
|
||||
image: ${REGISTRY:-opea}/whisper-gaudi:${TAG:-latest}
|
||||
container_name: whisper-service
|
||||
ports:
|
||||
- "7066:7066"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HABANA_VISIBLE_MODULES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
restart: unless-stopped
|
||||
asr:
|
||||
image: ${REGISTRY:-opea}/asr:${TAG:-latest}
|
||||
container_name: asr-service
|
||||
ports:
|
||||
- "3001:9099"
|
||||
ipc: host
|
||||
environment:
|
||||
ASR_ENDPOINT: ${ASR_ENDPOINT}
|
||||
speecht5-service:
|
||||
image: ${REGISTRY:-opea}/speecht5-gaudi:${TAG:-latest}
|
||||
container_name: speecht5-service
|
||||
ports:
|
||||
- "7055:7055"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HABANA_VISIBLE_MODULES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
restart: unless-stopped
|
||||
tts:
|
||||
image: ${REGISTRY:-opea}/tts:${TAG:-latest}
|
||||
container_name: tts-service
|
||||
ports:
|
||||
- "3002:9088"
|
||||
ipc: host
|
||||
environment:
|
||||
TTS_ENDPOINT: ${TTS_ENDPOINT}
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
container_name: tgi-gaudi-server
|
||||
ports:
|
||||
- "3006:80"
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HABANA_VISIBLE_MODULES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
ENABLE_HPU_GRAPH: true
|
||||
LIMIT_HPU_GRAPH: true
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
ipc: host
|
||||
command: --model-id ${LLM_MODEL_ID} --max-input-length 128 --max-total-tokens 256
|
||||
llm:
|
||||
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
|
||||
container_name: llm-tgi-gaudi-server
|
||||
depends_on:
|
||||
- tgi-service
|
||||
ports:
|
||||
- "3007:9000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
restart: unless-stopped
|
||||
wav2lip-service:
|
||||
image: ${REGISTRY:-opea}/wav2lip-gaudi:${TAG:-latest}
|
||||
container_name: wav2lip-service
|
||||
ports:
|
||||
- "7860:7860"
|
||||
ipc: host
|
||||
volumes:
|
||||
- ${PWD}:/outputs
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HABANA_VISIBLE_MODULES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
DEVICE: ${DEVICE}
|
||||
INFERENCE_MODE: ${INFERENCE_MODE}
|
||||
CHECKPOINT_PATH: ${CHECKPOINT_PATH}
|
||||
FACE: ${FACE}
|
||||
AUDIO: ${AUDIO}
|
||||
FACESIZE: ${FACESIZE}
|
||||
OUTFILE: ${OUTFILE}
|
||||
GFPGAN_MODEL_VERSION: ${GFPGAN_MODEL_VERSION}
|
||||
UPSCALE_FACTOR: ${UPSCALE_FACTOR}
|
||||
FPS: ${FPS}
|
||||
WAV2LIP_PORT: ${WAV2LIP_PORT}
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
restart: unless-stopped
|
||||
animation:
|
||||
image: ${REGISTRY:-opea}/animation:${TAG:-latest}
|
||||
container_name: animation-gaudi-server
|
||||
ports:
|
||||
- "3008:9066"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HABANA_VISIBLE_MODULES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
WAV2LIP_ENDPOINT: ${WAV2LIP_ENDPOINT}
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
restart: unless-stopped
|
||||
avatarchatbot-gaudi-backend-server:
|
||||
image: ${REGISTRY:-opea}/avatarchatbot:${TAG:-latest}
|
||||
container_name: avatarchatbot-gaudi-backend-server
|
||||
depends_on:
|
||||
- asr
|
||||
- llm
|
||||
- tts
|
||||
- animation
|
||||
ports:
|
||||
- "3009:8888"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
|
||||
- MEGA_SERVICE_PORT=${MEGA_SERVICE_PORT}
|
||||
- ASR_SERVICE_HOST_IP=${ASR_SERVICE_HOST_IP}
|
||||
- ASR_SERVICE_PORT=${ASR_SERVICE_PORT}
|
||||
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
|
||||
- LLM_SERVICE_PORT=${LLM_SERVICE_PORT}
|
||||
- TTS_SERVICE_HOST_IP=${TTS_SERVICE_HOST_IP}
|
||||
- TTS_SERVICE_PORT=${TTS_SERVICE_PORT}
|
||||
- ANIMATION_SERVICE_HOST_IP=${ANIMATION_SERVICE_HOST_IP}
|
||||
- ANIMATION_SERVICE_PORT=${ANIMATION_SERVICE_PORT}
|
||||
ipc: host
|
||||
restart: always
|
||||
|
||||
networks:
|
||||
default:
|
||||
driver: bridge
|
||||
73
AvatarChatbot/docker_image_build/build.yaml
Normal file
@@ -0,0 +1,73 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
services:
|
||||
avatarchatbot:
|
||||
build:
|
||||
args:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
no_proxy: ${no_proxy}
|
||||
context: ../
|
||||
dockerfile: ./Dockerfile
|
||||
image: ${REGISTRY:-opea}/avatarchatbot:${TAG:-latest}
|
||||
whisper-gaudi:
|
||||
build:
|
||||
context: GenAIComps
|
||||
dockerfile: comps/asr/whisper/dependency/Dockerfile.intel_hpu
|
||||
extends: avatarchatbot
|
||||
image: ${REGISTRY:-opea}/whisper-gaudi:${TAG:-latest}
|
||||
whisper:
|
||||
build:
|
||||
context: GenAIComps
|
||||
dockerfile: comps/asr/whisper/dependency/Dockerfile
|
||||
extends: avatarchatbot
|
||||
image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
|
||||
asr:
|
||||
build:
|
||||
context: GenAIComps
|
||||
dockerfile: comps/asr/whisper/Dockerfile
|
||||
extends: avatarchatbot
|
||||
image: ${REGISTRY:-opea}/asr:${TAG:-latest}
|
||||
llm-tgi:
|
||||
build:
|
||||
context: GenAIComps
|
||||
dockerfile: comps/llms/text-generation/tgi/Dockerfile
|
||||
extends: avatarchatbot
|
||||
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
|
||||
speecht5-gaudi:
|
||||
build:
|
||||
context: GenAIComps
|
||||
dockerfile: comps/tts/speecht5/dependency/Dockerfile.intel_hpu
|
||||
extends: avatarchatbot
|
||||
image: ${REGISTRY:-opea}/speecht5-gaudi:${TAG:-latest}
|
||||
speecht5:
|
||||
build:
|
||||
context: GenAIComps
|
||||
dockerfile: comps/tts/speecht5/dependency/Dockerfile
|
||||
extends: avatarchatbot
|
||||
image: ${REGISTRY:-opea}/speecht5:${TAG:-latest}
|
||||
tts:
|
||||
build:
|
||||
context: GenAIComps
|
||||
dockerfile: comps/tts/speecht5/Dockerfile
|
||||
extends: avatarchatbot
|
||||
image: ${REGISTRY:-opea}/tts:${TAG:-latest}
|
||||
wav2lip-gaudi:
|
||||
build:
|
||||
context: GenAIComps
|
||||
dockerfile: comps/animation/wav2lip/dependency/Dockerfile.intel_hpu
|
||||
extends: avatarchatbot
|
||||
image: ${REGISTRY:-opea}/wav2lip-gaudi:${TAG:-latest}
|
||||
wav2lip:
|
||||
build:
|
||||
context: GenAIComps
|
||||
dockerfile: comps/animation/wav2lip/dependency/Dockerfile
|
||||
extends: avatarchatbot
|
||||
image: ${REGISTRY:-opea}/wav2lip:${TAG:-latest}
|
||||
animation:
|
||||
build:
|
||||
context: GenAIComps
|
||||
dockerfile: comps/animation/wav2lip/Dockerfile
|
||||
extends: avatarchatbot
|
||||
image: ${REGISTRY:-opea}/animation:${TAG:-latest}
|
||||
147
AvatarChatbot/tests/test_compose_on_gaudi.sh
Executable file
@@ -0,0 +1,147 @@
|
||||
#!/bin/bash
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
set -e
|
||||
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
||||
IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
||||
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
||||
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
|
||||
export REGISTRY=${IMAGE_REPO}
|
||||
export TAG=${IMAGE_TAG}
|
||||
|
||||
WORKPATH=$(dirname "$PWD")
|
||||
LOG_PATH="$WORKPATH/tests"
|
||||
if ls $LOG_PATH/*.log 1> /dev/null 2>&1; then
|
||||
rm $LOG_PATH/*.log
|
||||
echo "Log files removed."
|
||||
else
|
||||
echo "No log files to remove."
|
||||
fi
|
||||
ip_address=$(hostname -I | awk '{print $1}')
|
||||
|
||||
|
||||
function build_docker_images() {
|
||||
cd $WORKPATH/docker_image_build
|
||||
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
||||
|
||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||
service_list="avatarchatbot whisper-gaudi asr llm-tgi speecht5-gaudi tts wav2lip-gaudi animation"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
|
||||
function start_services() {
|
||||
cd $WORKPATH/docker_compose/intel/hpu/gaudi
|
||||
|
||||
export HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN
|
||||
export host_ip=$(hostname -I | awk '{print $1}')
|
||||
|
||||
export TGI_LLM_ENDPOINT=http://$host_ip:3006
|
||||
export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
|
||||
|
||||
export ASR_ENDPOINT=http://$host_ip:7066
|
||||
export TTS_ENDPOINT=http://$host_ip:7055
|
||||
export WAV2LIP_ENDPOINT=http://$host_ip:7860
|
||||
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export ASR_SERVICE_HOST_IP=${host_ip}
|
||||
export TTS_SERVICE_HOST_IP=${host_ip}
|
||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
||||
export ANIMATION_SERVICE_HOST_IP=${host_ip}
|
||||
|
||||
export MEGA_SERVICE_PORT=8888
|
||||
export ASR_SERVICE_PORT=3001
|
||||
export TTS_SERVICE_PORT=3002
|
||||
export LLM_SERVICE_PORT=3007
|
||||
export ANIMATION_SERVICE_PORT=3008
|
||||
|
||||
export DEVICE="hpu"
|
||||
export WAV2LIP_PORT=7860
|
||||
export INFERENCE_MODE='wav2lip+gfpgan'
|
||||
export CHECKPOINT_PATH='/usr/local/lib/python3.10/dist-packages/Wav2Lip/checkpoints/wav2lip_gan.pth'
|
||||
export FACE="assets/img/avatar1.jpg"
|
||||
# export AUDIO='assets/audio/eg3_ref.wav' # audio file path is optional, will use base64str in the post request as input if is 'None'
|
||||
export AUDIO='None'
|
||||
export FACESIZE=96
|
||||
export OUTFILE="/outputs/result.mp4"
|
||||
export GFPGAN_MODEL_VERSION=1.4 # latest version, can roll back to v1.3 if needed
|
||||
export UPSCALE_FACTOR=1
|
||||
export FPS=10
|
||||
|
||||
# Start Docker Containers
|
||||
docker compose up -d
|
||||
|
||||
n=0
|
||||
until [[ "$n" -ge 100 ]]; do
|
||||
docker logs tgi-gaudi-server > $LOG_PATH/tgi_service_start.log
|
||||
if grep -q Connected $LOG_PATH/tgi_service_start.log; then
|
||||
break
|
||||
fi
|
||||
sleep 5s
|
||||
n=$((n+1))
|
||||
done
|
||||
|
||||
# sleep 5m
|
||||
echo "All services are up and running"
|
||||
sleep 5s
|
||||
}
|
||||
|
||||
|
||||
function validate_megaservice() {
|
||||
cd $WORKPATH
|
||||
result=$(http_proxy="" curl http://${ip_address}:3009/v1/avatarchatbot -X POST -d @assets/audio/sample_whoareyou.json -H 'Content-Type: application/json')
|
||||
echo "result is === $result"
|
||||
if [[ $result == *"mp4"* ]]; then
|
||||
echo "Result correct."
|
||||
else
|
||||
docker logs whisper-service > $LOG_PATH/whisper-service.log
|
||||
docker logs asr-service > $LOG_PATH/asr-service.log
|
||||
docker logs speecht5-service > $LOG_PATH/speecht5-service.log
|
||||
docker logs tts-service > $LOG_PATH/tts-service.log
|
||||
docker logs tgi-gaudi-server > $LOG_PATH/tgi-gaudi-server.log
|
||||
docker logs llm-tgi-gaudi-server > $LOG_PATH/llm-tgi-gaudi-server.log
|
||||
docker logs wav2lip-service > $LOG_PATH/wav2lip-service.log
|
||||
docker logs animation-gaudi-server > $LOG_PATH/animation-gaudi-server.log
|
||||
|
||||
echo "Result wrong."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
}
|
||||
|
||||
|
||||
#function validate_frontend() {
|
||||
|
||||
#}
|
||||
|
||||
|
||||
function stop_docker() {
|
||||
cd $WORKPATH/docker_compose/intel/hpu/gaudi
|
||||
docker compose down
|
||||
}
|
||||
|
||||
|
||||
function main() {
|
||||
stop_docker
|
||||
echo y | docker builder prune --all
|
||||
echo y | docker image prune
|
||||
|
||||
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
|
||||
start_services
|
||||
# validate_microservices
|
||||
validate_megaservice
|
||||
# validate_frontend
|
||||
|
||||
stop_docker
|
||||
echo y | docker builder prune --all
|
||||
echo y | docker image prune
|
||||
|
||||
}
|
||||
|
||||
|
||||
main
|
||||
142
AvatarChatbot/tests/test_compose_on_xeon.sh
Executable file
@@ -0,0 +1,142 @@
|
||||
#!/bin/bash
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
set -e
|
||||
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
||||
IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
||||
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
||||
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
|
||||
export REGISTRY=${IMAGE_REPO}
|
||||
export TAG=${IMAGE_TAG}
|
||||
|
||||
WORKPATH=$(dirname "$PWD")
|
||||
LOG_PATH="$WORKPATH/tests"
|
||||
if ls $LOG_PATH/*.log 1> /dev/null 2>&1; then
|
||||
rm $LOG_PATH/*.log
|
||||
echo "Log files removed."
|
||||
else
|
||||
echo "No log files to remove."
|
||||
fi
|
||||
ip_address=$(hostname -I | awk '{print $1}')
|
||||
|
||||
|
||||
function build_docker_images() {
|
||||
cd $WORKPATH/docker_image_build
|
||||
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
||||
|
||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||
service_list="avatarchatbot whisper asr llm-tgi speecht5 tts wav2lip animation"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
|
||||
function start_services() {
|
||||
cd $WORKPATH/docker_compose/intel/cpu/xeon
|
||||
|
||||
export HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN
|
||||
export host_ip=$(hostname -I | awk '{print $1}')
|
||||
|
||||
export TGI_LLM_ENDPOINT=http://$host_ip:3006
|
||||
export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
|
||||
|
||||
export ASR_ENDPOINT=http://$host_ip:7066
|
||||
export TTS_ENDPOINT=http://$host_ip:7055
|
||||
export WAV2LIP_ENDPOINT=http://$host_ip:7860
|
||||
|
||||
export MEGA_SERVICE_HOST_IP=${host_ip}
|
||||
export ASR_SERVICE_HOST_IP=${host_ip}
|
||||
export TTS_SERVICE_HOST_IP=${host_ip}
|
||||
export LLM_SERVICE_HOST_IP=${host_ip}
|
||||
export ANIMATION_SERVICE_HOST_IP=${host_ip}
|
||||
|
||||
export MEGA_SERVICE_PORT=8888
|
||||
export ASR_SERVICE_PORT=3001
|
||||
export TTS_SERVICE_PORT=3002
|
||||
export LLM_SERVICE_PORT=3007
|
||||
export ANIMATION_SERVICE_PORT=3008
|
||||
|
||||
export DEVICE="cpu"
|
||||
export WAV2LIP_PORT=7860
|
||||
export INFERENCE_MODE='wav2lip+gfpgan'
|
||||
export CHECKPOINT_PATH='/usr/local/lib/python3.11/site-packages/Wav2Lip/checkpoints/wav2lip_gan.pth'
|
||||
export FACE="assets/img/avatar5.png"
|
||||
# export AUDIO='assets/audio/eg3_ref.wav' # audio file path is optional, will use base64str in the post request as input if is 'None'
|
||||
export AUDIO='None'
|
||||
export FACESIZE=96
|
||||
export OUTFILE="/outputs/result.mp4"
|
||||
export GFPGAN_MODEL_VERSION=1.4 # latest version, can roll back to v1.3 if needed
|
||||
export UPSCALE_FACTOR=1
|
||||
export FPS=10
|
||||
|
||||
# Start Docker Containers
|
||||
docker compose up -d
|
||||
n=0
|
||||
until [[ "$n" -ge 100 ]]; do
|
||||
docker logs tgi-service > $LOG_PATH/tgi_service_start.log
|
||||
if grep -q Connected $LOG_PATH/tgi_service_start.log; then
|
||||
break
|
||||
fi
|
||||
sleep 5s
|
||||
n=$((n+1))
|
||||
done
|
||||
echo "All services are up and running"
|
||||
sleep 5s
|
||||
}
|
||||
|
||||
|
||||
function validate_megaservice() {
|
||||
cd $WORKPATH
|
||||
result=$(http_proxy="" curl http://${ip_address}:3009/v1/avatarchatbot -X POST -d @assets/audio/sample_whoareyou.json -H 'Content-Type: application/json')
|
||||
echo "result is === $result"
|
||||
if [[ $result == *"mp4"* ]]; then
|
||||
echo "Result correct."
|
||||
else
|
||||
docker logs whisper-service > $LOG_PATH/whisper-service.log
|
||||
docker logs asr-service > $LOG_PATH/asr-service.log
|
||||
docker logs speecht5-service > $LOG_PATH/speecht5-service.log
|
||||
docker logs tts-service > $LOG_PATH/tts-service.log
|
||||
docker logs tgi-service > $LOG_PATH/tgi-service.log
|
||||
docker logs llm-tgi-server > $LOG_PATH/llm-tgi-server.log
|
||||
docker logs wav2lip-service > $LOG_PATH/wav2lip-service.log
|
||||
docker logs animation-server > $LOG_PATH/animation-server.log
|
||||
|
||||
echo "Result wrong."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
}
|
||||
|
||||
|
||||
#function validate_frontend() {
|
||||
|
||||
#}
|
||||
|
||||
|
||||
function stop_docker() {
|
||||
cd $WORKPATH/docker_compose/intel/cpu/xeon
|
||||
docker compose down
|
||||
}
|
||||
|
||||
|
||||
function main() {
|
||||
|
||||
stop_docker
|
||||
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
|
||||
start_services
|
||||
# validate_microservices
|
||||
validate_megaservice
|
||||
# validate_frontend
|
||||
stop_docker
|
||||
|
||||
echo y | docker builder prune --all
|
||||
echo y | docker image prune
|
||||
|
||||
}
|
||||
|
||||
|
||||
main
|
||||
349
AvatarChatbot/ui/gradio/app_gradio_demo_avatarchatbot.py
Normal file
@@ -0,0 +1,349 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import io
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import time
|
||||
|
||||
import aiohttp
|
||||
import docker
|
||||
import ffmpeg
|
||||
import gradio as gr
|
||||
import numpy as np
|
||||
import soundfile as sf
|
||||
from PIL import Image
|
||||
|
||||
|
||||
# %% Docker Management
|
||||
def update_env_var_in_container(container_name, env_var, new_value):
|
||||
return
|
||||
|
||||
|
||||
# %% AudioQnA functions
|
||||
def preprocess_audio(audio):
|
||||
"""The audio data is a 16-bit integer array with values ranging from -32768 to 32767 and the shape of the audio data array is (samples,)"""
|
||||
sr, y = audio
|
||||
|
||||
# Convert to normalized float32 audio
|
||||
y = y.astype(np.float32)
|
||||
y /= np.max(np.abs(y))
|
||||
|
||||
# Save to memory
|
||||
buf = io.BytesIO()
|
||||
sf.write(buf, y, sr, format="WAV")
|
||||
buf.seek(0) # Reset the buffer position to the beginning
|
||||
|
||||
# Encode the WAV file to base64 string
|
||||
base64_bytes = base64.b64encode(buf.read())
|
||||
base64_string = base64_bytes.decode("utf-8")
|
||||
return base64_string
|
||||
|
||||
|
||||
def base64_to_int16(base64_string):
|
||||
wav_bytes = base64.b64decode(base64_string)
|
||||
buf = io.BytesIO(wav_bytes)
|
||||
y, sr = sf.read(buf, dtype="int16")
|
||||
return sr, y
|
||||
|
||||
|
||||
async def transcribe(audio_input, face_input, model_choice):
|
||||
"""Input: mic audio; Output: ai audio, text, text"""
|
||||
global ai_chatbot_url, chat_history, count
|
||||
chat_history = ""
|
||||
# Preprocess the audio
|
||||
base64bytestr = preprocess_audio(audio_input)
|
||||
|
||||
# Send the audio to the AvatarChatbot backend server endpoint
|
||||
initial_inputs = {"audio": base64bytestr, "max_tokens": 64}
|
||||
|
||||
# TO-DO: update wav2lip-service with the chosen face_input
|
||||
# update_env_var_in_container("wav2lip-service", "DEVICE", "new_device_value")
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.post(ai_chatbot_url, json=initial_inputs) as response:
|
||||
|
||||
# Check the response status code
|
||||
if response.status == 200:
|
||||
# response_json = await response.json()
|
||||
# # Decode the base64 string
|
||||
# sampling_rate, audio_int16 = base64_to_int16(response_json["byte_str"])
|
||||
# chat_history += f"User: {response_json['query']}\n\n"
|
||||
# chat_ai = response_json["text"]
|
||||
# hitted_ends = [",", ".", "?", "!", "。", ";"]
|
||||
# last_punc_idx = max([chat_ai.rfind(punc) for punc in hitted_ends])
|
||||
# if last_punc_idx != -1:
|
||||
# chat_ai = chat_ai[: last_punc_idx + 1]
|
||||
# chat_history += f"AI: {chat_ai}"
|
||||
# chat_history = chat_history.replace("OPEX", "OPEA")
|
||||
# return (sampling_rate, audio_int16) # handle the response
|
||||
|
||||
result = await response.text()
|
||||
return "docker_compose/intel/hpu/gaudi/result.mp4"
|
||||
else:
|
||||
return {"error": "Failed to transcribe audio", "status_code": response.status_code}
|
||||
|
||||
|
||||
def resize_image(image_pil, size=(720, 720)):
|
||||
"""Resize the image to the specified size."""
|
||||
return image_pil.resize(size, Image.LANCZOS)
|
||||
|
||||
|
||||
def resize_video(video_path, save_path, size=(720, 1280)):
|
||||
"""Resize the video to the specified size, and save to the save path."""
|
||||
ffmpeg.input(video_path).output(save_path, vf=f"scale={size[0]}:{size[1]}").overwrite_output().run()
|
||||
|
||||
|
||||
# %% AI Avatar demo function
|
||||
async def aiavatar_demo(audio_input, face_input, model_choice):
|
||||
"""Input: mic/preloaded audio, avatar file path;
|
||||
Output: ai video"""
|
||||
# Wait for response from AvatarChatbot backend
|
||||
output_video = await transcribe(audio_input, face_input, model_choice) # output video path
|
||||
|
||||
if isinstance(output_video, dict): # in case of an error
|
||||
return None, None
|
||||
else:
|
||||
return output_video
|
||||
|
||||
|
||||
# %% Main
|
||||
if __name__ == "__main__":
|
||||
# HOST_IP = os.getenv("host_ip")
|
||||
HOST_IP = subprocess.check_output("hostname -I | awk '{print $1}'", shell=True).decode("utf-8").strip()
|
||||
|
||||
# Fetch the AudioQnA backend server
|
||||
ai_chatbot_url = f"http://{HOST_IP}:3009/v1/avatarchatbot"
|
||||
|
||||
# Collect chat history to print in the interface
|
||||
chat_history = ""
|
||||
|
||||
# Prepare 3 image paths and 3 video paths
|
||||
# image_pils = [
|
||||
# Image.open(os.path.join("assets/img/woman1.png")),
|
||||
# Image.open(os.path.join("assets/img/man1.png")),
|
||||
# Image.open(os.path.join("assets/img/woman2.png")),
|
||||
# ]
|
||||
|
||||
# video_paths = [
|
||||
# os.path.join("assets/video/man1.mp4"),
|
||||
# os.path.join("assets/video/woman2.mp4"),
|
||||
# os.path.join("assets/video/man4.mp4"),
|
||||
# ]
|
||||
|
||||
def image_to_base64(image_path):
|
||||
with open(image_path, "rb") as image_file:
|
||||
return base64.b64encode(image_file.read()).decode("utf-8")
|
||||
|
||||
# Convert your images to Base64
|
||||
xeon_base64 = image_to_base64("assets/img/xeon.jpg")
|
||||
gaudi_base64 = image_to_base64("assets/img/gaudi.png")
|
||||
|
||||
# List of prerecorded WAV files containing audio questions
|
||||
# audio_filepaths = [
|
||||
# "assets/audio/intel2.wav",
|
||||
# "assets/audio/intel4.wav",
|
||||
# ]
|
||||
# audio_questions = [
|
||||
# "1. What's the objective of the Open Platform for Enterprise AI? How is it helpful to enterprises building AI solutions?",
|
||||
# "2. What kinds of Intel AI tools are available to accelerate AI workloads?",
|
||||
# ]
|
||||
|
||||
# Demo frontend
|
||||
demo = gr.Blocks()
|
||||
with demo:
|
||||
# Define processing functions
|
||||
count = 0
|
||||
|
||||
# Make necessary folders:
|
||||
if not os.path.exists("inputs"):
|
||||
os.makedirs("inputs")
|
||||
if not os.path.exists("outputs"):
|
||||
os.makedirs("outputs")
|
||||
|
||||
def initial_process(audio_input, face_input, model_choice):
|
||||
global count
|
||||
start_time = time.time()
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
video_file = loop.run_until_complete(aiavatar_demo(audio_input, face_input, model_choice))
|
||||
count += 1
|
||||
end_time = time.time()
|
||||
return video_file, f"The entire application took {(end_time - start_time):.1f} seconds"
|
||||
|
||||
# def update_selected_image_state(image_index):
|
||||
# image_index = int(image_index)
|
||||
# selected_image_state.value = image_index
|
||||
# # change image_input here
|
||||
# if image_index < len(image_pils):
|
||||
# return f"inputs/face_{image_index}.png"
|
||||
# else:
|
||||
# return f"inputs/video_{image_index - len(image_pils)}.mp4"
|
||||
|
||||
# def update_audio_input(audio_choice):
|
||||
# if audio_choice:
|
||||
# audio_index = int(audio_choice.split(".")[0]) - 1
|
||||
# audio_filepath_gradio = f"inputs/audio_{audio_index:d}.wav"
|
||||
# shutil.copyfile(audio_filepaths[audio_index], audio_filepath_gradio)
|
||||
# return audio_filepath_gradio
|
||||
|
||||
# UI Components
|
||||
# Title & Introduction
|
||||
gr.Markdown("<h1 style='font-size: 36px;'>A PyTorch and OPEA based AI Avatar Audio Chatbot</h1>")
|
||||
with gr.Row():
|
||||
with gr.Column(scale=8):
|
||||
gr.Markdown(
|
||||
"""
|
||||
<p style='font-size: 24px;'>Welcome to our AI Avatar Audio Chatbot! This application leverages PyTorch and <strong>OPEA (Open Platform for Enterprise AI) v0.8</strong> to provide you with a human-like conversational experience. It's run on Intel® Gaudi® AI Accelerator and Intel® Xeon® Processor, with hardware and software optimizations.<br>
|
||||
Please feel free to interact with the AI avatar by choosing your own avatar and talking into the mic.</p>
|
||||
"""
|
||||
)
|
||||
with gr.Column(scale=1):
|
||||
# with gr.Row():
|
||||
# gr.Markdown(f"""
|
||||
# <img src='data:image/png;base64,{opea_qr_base64}' alt='OPEA QR Code' style='width: 150px; height: auto;'>
|
||||
# """, label="OPEA QR Code")
|
||||
# gr.Markdown(f"""
|
||||
# <img src='data:image/png;base64,{opea_gh_qr_base64}' alt='OPEA GitHub QR Code' style='width: 150px; height: auto;'>
|
||||
# """, label="OPEA GitHub QR Code")
|
||||
with gr.Row():
|
||||
gr.Markdown(
|
||||
f"""
|
||||
<img src='data:image/png;base64,{gaudi_base64}' alt='Intel®Gaudi' style='width: 120px; height: auto;'>""",
|
||||
label="Intel®Gaudi",
|
||||
)
|
||||
gr.Markdown(
|
||||
f"""
|
||||
<img src='data:image/png;base64,{xeon_base64}' alt='Intel®Xeon' style='width: 120px; height: auto;'>""",
|
||||
label="Intel®Xeon",
|
||||
)
|
||||
gr.Markdown("<hr>") # Divider
|
||||
|
||||
# Inputs
|
||||
# Image gallery
|
||||
selected_image_state = gr.State(value=-1)
|
||||
image_clicks = []
|
||||
image_click_buttons = []
|
||||
video_clicks = []
|
||||
video_click_buttons = []
|
||||
with gr.Row():
|
||||
with gr.Column(scale=1):
|
||||
audio_input = gr.Audio(
|
||||
sources=["upload", "microphone"], format="wav", label="🎤 or 📤 for your Input audio!"
|
||||
)
|
||||
# audio_choice = gr.Dropdown(
|
||||
# choices=audio_questions,
|
||||
# label="Choose an audio question",
|
||||
# value=None, # default value
|
||||
# )
|
||||
# Update audio_input when a selection is made from the dropdown
|
||||
# audio_choice.change(fn=update_audio_input, inputs=audio_choice, outputs=audio_input)
|
||||
|
||||
face_input = gr.File(
|
||||
file_count="single",
|
||||
file_types=["image", "video"],
|
||||
label="Choose an avatar or 📤 an image or video!",
|
||||
)
|
||||
model_choice = gr.Dropdown(
|
||||
choices=["wav2lip", "wav2lip+GAN", "wav2lip+GFPGAN"],
|
||||
label="Choose a DL model",
|
||||
)
|
||||
# with gr.Column(scale=2):
|
||||
# # Display 3 images and buttons
|
||||
# with gr.Row():
|
||||
# for i, image_pil in enumerate(image_pils):
|
||||
# image_pil = resize_image(image_pil)
|
||||
# save_path = f"inputs/face_{int(i)}.png"
|
||||
# image_pil.save(save_path, "PNG")
|
||||
# image_clicks.append(gr.Image(type="filepath", value=save_path, label=f"Avatar {int(i)+1}"))
|
||||
# with gr.Row():
|
||||
# for i in range(len(image_pils)):
|
||||
# image_click_buttons.append(gr.Button(f"Use Image {i+1}"))
|
||||
|
||||
# # Display 3 videos and buttons
|
||||
# with gr.Row():
|
||||
# for i, video_path in enumerate(video_paths):
|
||||
# save_path = f"inputs/video_{int(i)}.mp4"
|
||||
# resize_video(video_path, save_path)
|
||||
# video_clicks.append(gr.Video(value=save_path, label=f"Video {int(i)+1}"))
|
||||
# with gr.Row():
|
||||
# for i in range(len(video_paths)):
|
||||
# video_click_buttons.append(gr.Button(f"Use Video {int(i)+1}"))
|
||||
|
||||
submit_button = gr.Button("Submit")
|
||||
|
||||
# Outputs
|
||||
gr.Markdown("<hr>") # Divider
|
||||
with gr.Row():
|
||||
with gr.Column():
|
||||
video_output = gr.Video(label="Your AI Avatar video: ", format="mp4", width=1280, height=720)
|
||||
video_time_text = gr.Textbox(label="Video processing time", value="0.0 seconds")
|
||||
|
||||
# Technical details
|
||||
gr.Markdown("<hr>") # Divider
|
||||
with gr.Row():
|
||||
gr.Markdown(
|
||||
"""
|
||||
<p style='font-size: 24px;'>OPEA megaservice deployed: <br>
|
||||
<ul style='font-size: 24px;'>
|
||||
<li><strong>AvatarChatbot</strong></li>
|
||||
</ul></p>
|
||||
<p style='font-size: 24px;'>OPEA microservices deployed:
|
||||
<ul style='font-size: 24px;'>
|
||||
<li><strong>ASR</strong> (service: opea/whisper-gaudi, model: openai/whisper-small)</li>
|
||||
<li><strong>LLM 'text-generation'</strong> (service: opea/llm-tgi, model: Intel/neural-chat-7b-v3-3)</li>
|
||||
<li><strong>TTS</strong> (service: opea/speecht5-gaudi, model: microsoft/speecht5_tts)</li>
|
||||
<li><strong>Animation</strong> (service: opea/animation, model: wav2lip+gfpgan)</li>
|
||||
</ul></p>
|
||||
"""
|
||||
)
|
||||
with gr.Row():
|
||||
gr.Image("assets/img/flowchart.png", label="Megaservice Flowchart")
|
||||
with gr.Row():
|
||||
gr.Markdown(
|
||||
"""
|
||||
<p style='font-size: 24px;'>The AI Avatar Audio Chatbot is powered by the following Intel® AI software:<br>
|
||||
<ul style='font-size: 24px;'>
|
||||
<li><strong>Intel Gaudi Software v1.17.0</strong></li>
|
||||
<li><strong>PyTorch v2.3.1 (Eager mode + torch.compile) </strong></li>
|
||||
<li><strong>HPU Graph</strong></li>
|
||||
<li><strong>Intel Neural Compressor (INC)</strong></li>
|
||||
</ul></p>
|
||||
"""
|
||||
)
|
||||
|
||||
# Disclaimer
|
||||
gr.Markdown("<hr>") # Divider
|
||||
gr.Markdown("<h2 style='font-size: 24px;'>Notices & Disclaimers</h1>")
|
||||
gr.Markdown(
|
||||
"""
|
||||
<p style='font-size: 20px;'>Intel is committed to respecting human rights and avoiding complicity in human rights abuses. See Intel's Global Human Rights Principles. Intel's products and software are intended only to be used in applications that do not cause or contribute to a violation of an internationally recognized human right.<br></p>
|
||||
<p style='font-size: 20px;'>© Intel Corporation. Intel, the Intel logo, and other Intel marks are trademarks of Intel Corporation or its subsidiaries. Other names and brands may be claimed as the property of others.<br></p>
|
||||
<p style='font-size: 20px;'>You may not use or facilitate the use of this document in connection with any infringement or other legal analysis concerning Intel products described herein. You agree to grant Intel a non-exclusive, royalty-free license to any patent claim thereafter drafted which includes subject matter disclosed herein.<br></p>
|
||||
"""
|
||||
)
|
||||
|
||||
# State transitions
|
||||
# for i in range(len(image_pils)):
|
||||
# image_click_buttons[i].click(
|
||||
# update_selected_image_state, inputs=[gr.Number(value=i, visible=False)], outputs=[face_input]
|
||||
# )
|
||||
# for i in range(len(video_paths)):
|
||||
# video_click_buttons[i].click(
|
||||
# update_selected_image_state,
|
||||
# inputs=[gr.Number(value=i + len(image_pils), visible=False)],
|
||||
# outputs=[face_input],
|
||||
# )
|
||||
submit_button.click(
|
||||
initial_process,
|
||||
inputs=[audio_input, face_input, model_choice],
|
||||
outputs=[
|
||||
video_output,
|
||||
video_time_text,
|
||||
],
|
||||
)
|
||||
|
||||
demo.queue().launch(server_name="0.0.0.0", server_port=7861)
|
||||
@@ -206,8 +206,6 @@ cd GenAIExamples/ChatQnA/docker_compose/intel/hpu/gaudi/
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
> Notice: Currently only the **Habana Driver 1.16.x** is supported for Gaudi.
|
||||
|
||||
Refer to the [Gaudi Guide](./docker_compose/intel/hpu/gaudi/README.md) to build docker images from source.
|
||||
|
||||
### Deploy ChatQnA on Xeon
|
||||
|
||||
@@ -41,11 +41,11 @@ class MultiHop_Evaluator(Evaluator):
|
||||
return []
|
||||
|
||||
def get_retrieved_documents(self, query, arguments):
|
||||
data = {"text": query}
|
||||
data = {"inputs": query}
|
||||
headers = {"Content-Type": "application/json"}
|
||||
response = requests.post(arguments.embedding_endpoint, data=json.dumps(data), headers=headers)
|
||||
response = requests.post(arguments.tei_embedding_endpoint + "/embed", data=json.dumps(data), headers=headers)
|
||||
if response.ok:
|
||||
embedding = response.json()["embedding"]
|
||||
embedding = response.json()[0]
|
||||
else:
|
||||
print(f"Request for embedding failed due to {response.text}.")
|
||||
return []
|
||||
|
||||
@@ -47,6 +47,7 @@ RERANK_SERVER_HOST_IP = os.getenv("RERANK_SERVER_HOST_IP", "0.0.0.0")
|
||||
RERANK_SERVER_PORT = int(os.getenv("RERANK_SERVER_PORT", 80))
|
||||
LLM_SERVER_HOST_IP = os.getenv("LLM_SERVER_HOST_IP", "0.0.0.0")
|
||||
LLM_SERVER_PORT = int(os.getenv("LLM_SERVER_PORT", 80))
|
||||
LLM_MODEL = os.getenv("LLM_MODEL", "Intel/neural-chat-7b-v3-3")
|
||||
|
||||
|
||||
def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs):
|
||||
@@ -61,7 +62,7 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k
|
||||
elif self.services[cur_node].service_type == ServiceType.LLM:
|
||||
# convert TGI/vLLM to unified OpenAI /v1/chat/completions format
|
||||
next_inputs = {}
|
||||
next_inputs["model"] = "tgi" # specifically clarify the fake model to make the format unified
|
||||
next_inputs["model"] = LLM_MODEL
|
||||
next_inputs["messages"] = [{"role": "user", "content": inputs["inputs"]}]
|
||||
next_inputs["max_tokens"] = llm_parameters_dict["max_tokens"]
|
||||
next_inputs["top_p"] = llm_parameters_dict["top_p"]
|
||||
@@ -165,7 +166,10 @@ def align_generator(self, gen, **kwargs):
|
||||
try:
|
||||
# sometimes yield empty chunk, do a fallback here
|
||||
json_data = json.loads(json_str)
|
||||
if json_data["choices"][0]["finish_reason"] != "eos_token":
|
||||
if (
|
||||
json_data["choices"][0]["finish_reason"] != "eos_token"
|
||||
and "content" in json_data["choices"][0]["delta"]
|
||||
):
|
||||
yield f"data: {repr(json_data['choices'][0]['delta']['content'].encode('utf-8'))}\n\n"
|
||||
except Exception as e:
|
||||
yield f"data: {repr(json_str.encode('utf-8'))}\n\n"
|
||||
|
||||
@@ -2,104 +2,6 @@
|
||||
|
||||
This document outlines the deployment process for a ChatQnA application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on AIPC. The steps include Docker image creation, container deployment via Docker Compose, and service execution to integrate microservices such as `embedding`, `retriever`, `rerank`, and `llm`.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
We use [Ollama](https://ollama.com/) as our LLM service for AIPC.
|
||||
|
||||
Please follow the instructions to set up Ollama on your PC. This will set the entrypoint needed for the Ollama to suit the ChatQnA examples.
|
||||
|
||||
### Set Up Ollama LLM Service
|
||||
|
||||
#### Install Ollama Service
|
||||
|
||||
Install Ollama service with one command:
|
||||
|
||||
```
|
||||
curl -fsSL https://ollama.com/install.sh | sh
|
||||
```
|
||||
|
||||
#### Set Ollama Service Configuration
|
||||
|
||||
Ollama Service Configuration file is /etc/systemd/system/ollama.service. Edit the file to set OLLAMA_HOST environment.
|
||||
Replace **<host_ip>** with your host IPV4 (please use external public IP). For example the host_ip is 10.132.x.y, then `Environment="OLLAMA_HOST=10.132.x.y:11434"'.
|
||||
|
||||
```
|
||||
Environment="OLLAMA_HOST=host_ip:11434"
|
||||
```
|
||||
|
||||
#### Set https_proxy environment for Ollama
|
||||
|
||||
If your system access network through proxy, add https_proxy in Ollama Service Configuration file
|
||||
|
||||
```
|
||||
Environment="https_proxy=Your_HTTPS_Proxy"
|
||||
```
|
||||
|
||||
#### Restart Ollama services
|
||||
|
||||
```
|
||||
$ sudo systemctl daemon-reload
|
||||
$ sudo systemctl restart ollama.service
|
||||
```
|
||||
|
||||
#### Check the service started
|
||||
|
||||
```
|
||||
netstat -tuln | grep 11434
|
||||
```
|
||||
|
||||
The output are:
|
||||
|
||||
```
|
||||
tcp 0 0 10.132.x.y:11434 0.0.0.0:* LISTEN
|
||||
```
|
||||
|
||||
#### Pull Ollama LLM model
|
||||
|
||||
Run the command to download LLM models. The <host_ip> is the one set in [Ollama Service Configuration](#Set-Ollama-Service-Configuration)
|
||||
|
||||
```
|
||||
export host_ip=<host_ip>
|
||||
export OLLAMA_HOST=http://${host_ip}:11434
|
||||
ollama pull llama3.2
|
||||
```
|
||||
|
||||
After downloaded the models, you can list the models by `ollama list`.
|
||||
|
||||
The output should be similar to the following:
|
||||
|
||||
```
|
||||
NAME ID SIZE MODIFIED
|
||||
llama3.2:latest a80c4f17acd5 2.0 GB 2 minutes ago
|
||||
```
|
||||
|
||||
### Consume Ollama LLM Service
|
||||
|
||||
Access ollama service to verify that the ollama is functioning correctly.
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:11434/api/generate -d '{"model": "llama3.2", "prompt":"What is Deep Learning?"}'
|
||||
```
|
||||
|
||||
The outputs are similar to these:
|
||||
|
||||
```
|
||||
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.098813868Z","response":"Deep","done":false}
|
||||
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.124514468Z","response":" learning","done":false}
|
||||
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.149754216Z","response":" is","done":false}
|
||||
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.180420784Z","response":" a","done":false}
|
||||
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.229185873Z","response":" subset","done":false}
|
||||
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.263956118Z","response":" of","done":false}
|
||||
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.289097354Z","response":" machine","done":false}
|
||||
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.316838918Z","response":" learning","done":false}
|
||||
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.342309506Z","response":" that","done":false}
|
||||
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.367221264Z","response":" involves","done":false}
|
||||
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.39205893Z","response":" the","done":false}
|
||||
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.417933974Z","response":" use","done":false}
|
||||
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.443110388Z","response":" of","done":false}
|
||||
...
|
||||
```
|
||||
|
||||
## 🚀 Build Docker Images
|
||||
|
||||
First of all, you need to build Docker Images locally and install the python package of it.
|
||||
@@ -122,20 +24,14 @@ export https_proxy="Your_HTTPs_Proxy"
|
||||
docker build --no-cache -t opea/retriever-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/redis/langchain/Dockerfile .
|
||||
```
|
||||
|
||||
### 2 Build LLM Image
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/llm-ollama:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/ollama/langchain/Dockerfile .
|
||||
```
|
||||
|
||||
### 3. Build Dataprep Image
|
||||
### 2. Build Dataprep Image
|
||||
|
||||
```bash
|
||||
docker build --no-cache -t opea/dataprep-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain/Dockerfile .
|
||||
cd ..
|
||||
```
|
||||
|
||||
### 4. Build MegaService Docker Image
|
||||
### 3. Build MegaService Docker Image
|
||||
|
||||
To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `chatqna.py` Python script. Build MegaService Docker image via below command:
|
||||
|
||||
@@ -146,7 +42,7 @@ cd GenAIExamples/ChatQnA
|
||||
docker build --no-cache -t opea/chatqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
|
||||
```
|
||||
|
||||
### 5. Build UI Docker Image
|
||||
### 4. Build UI Docker Image
|
||||
|
||||
Build frontend Docker image via below command:
|
||||
|
||||
@@ -155,7 +51,7 @@ cd ~/OPEA/GenAIExamples/ChatQnA/ui
|
||||
docker build --no-cache -t opea/chatqna-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile .
|
||||
```
|
||||
|
||||
### 6. Build Nginx Docker Image
|
||||
### 5. Build Nginx Docker Image
|
||||
|
||||
```bash
|
||||
cd GenAIComps
|
||||
@@ -166,10 +62,9 @@ Then run the command `docker images`, you will have the following 6 Docker Image
|
||||
|
||||
1. `opea/dataprep-redis:latest`
|
||||
2. `opea/retriever-redis:latest`
|
||||
3. `opea/llm-ollama:latest`
|
||||
4. `opea/chatqna:latest`
|
||||
5. `opea/chatqna-ui:latest`
|
||||
6. `opea/nginx:latest`
|
||||
3. `opea/chatqna:latest`
|
||||
4. `opea/chatqna-ui:latest`
|
||||
5. `opea/nginx:latest`
|
||||
|
||||
## 🚀 Start Microservices
|
||||
|
||||
@@ -195,10 +90,10 @@ For Linux users, please run `hostname -I | awk '{print $1}'`. For Windows users,
|
||||
export your_hf_api_token="Your_Huggingface_API_Token"
|
||||
```
|
||||
|
||||
**Append the value of the public IP address to the no_proxy list**
|
||||
**Append the value of the public IP address to the no_proxy list if you are in a proxy environment**
|
||||
|
||||
```
|
||||
export your_no_proxy=${your_no_proxy},"External_Public_IP"
|
||||
export your_no_proxy=${your_no_proxy},"External_Public_IP",chatqna-aipc-backend-server,tei-embedding-service,retriever,tei-reranking-service,redis-vector-db,dataprep-redis-service
|
||||
```
|
||||
|
||||
- Linux PC
|
||||
@@ -211,7 +106,7 @@ export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||
export INDEX_NAME="rag-redis"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
|
||||
export OLLAMA_ENDPOINT=http://${host_ip}:11434
|
||||
export OLLAMA_HOST=${host_ip}
|
||||
export OLLAMA_MODEL="llama3.2"
|
||||
```
|
||||
|
||||
@@ -222,7 +117,7 @@ set EMBEDDING_MODEL_ID=BAAI/bge-base-en-v1.5
|
||||
set RERANK_MODEL_ID=BAAI/bge-reranker-base
|
||||
set INDEX_NAME=rag-redis
|
||||
set HUGGINGFACEHUB_API_TOKEN=%your_hf_api_token%
|
||||
set OLLAMA_ENDPOINT=http://host.docker.internal:11434
|
||||
set OLLAMA_HOST=host.docker.internal
|
||||
set OLLAMA_MODEL="llama3.2"
|
||||
```
|
||||
|
||||
@@ -277,16 +172,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v
|
||||
curl http://${host_ip}:11434/api/generate -d '{"model": "llama3.2", "prompt":"What is Deep Learning?"}'
|
||||
```
|
||||
|
||||
5. LLM Microservice
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:9000/v1/chat/completions\
|
||||
-X POST \
|
||||
-d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
|
||||
-H 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
6. MegaService
|
||||
5. MegaService
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:8888/v1/chatqna -H "Content-Type: application/json" -d '{
|
||||
@@ -294,7 +180,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v
|
||||
}'
|
||||
```
|
||||
|
||||
7. Upload RAG Files through Dataprep Microservice (Optional)
|
||||
6. Upload RAG Files through Dataprep Microservice (Optional)
|
||||
|
||||
To chat with retrieved information, you need to upload a file using Dataprep service.
|
||||
|
||||
@@ -334,4 +220,4 @@ the output is:
|
||||
|
||||
## 🚀 Launch the UI
|
||||
|
||||
To access the frontend, open the following URL in your browser: http://{host_ip}:5173.
|
||||
To access the frontend, open the following URL in your browser: http://{host_ip}:80.
|
||||
|
||||
@@ -72,22 +72,21 @@ services:
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
|
||||
llm:
|
||||
image: ${REGISTRY:-opea}/llm-ollama
|
||||
container_name: llm-ollama
|
||||
ollama-service:
|
||||
image: ollama/ollama
|
||||
container_name: ollama
|
||||
ports:
|
||||
- "9000:9000"
|
||||
ipc: host
|
||||
- "11434:11434"
|
||||
volumes:
|
||||
- ollama:/root/.ollama
|
||||
entrypoint: ["bash", "-c"]
|
||||
command: ["ollama serve & sleep 10 && ollama run ${OLLAMA_MODEL} & wait"]
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
OLLAMA_ENDPOINT: ${OLLAMA_ENDPOINT}
|
||||
OLLAMA_MODEL: ${OLLAMA_MODEL}
|
||||
chaqna-aipc-backend-server:
|
||||
|
||||
chatqna-aipc-backend-server:
|
||||
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
|
||||
container_name: chatqna-aipc-backend-server
|
||||
depends_on:
|
||||
@@ -96,29 +95,29 @@ services:
|
||||
- tei-embedding-service
|
||||
- retriever
|
||||
- tei-reranking-service
|
||||
- llm
|
||||
ports:
|
||||
- "8888:8888"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- MEGA_SERVICE_HOST_IP=chaqna-aipc-backend-server
|
||||
- MEGA_SERVICE_HOST_IP=chatqna-aipc-backend-server
|
||||
- EMBEDDING_SERVER_HOST_IP=tei-embedding-service
|
||||
- EMBEDDING_SERVER_PORT=80
|
||||
- RETRIEVER_SERVICE_HOST_IP=retriever
|
||||
- RERANK_SERVER_HOST_IP=tei-reranking-service
|
||||
- RERANK_SERVER_PORT=80
|
||||
- LLM_SERVER_HOST_IP=llm
|
||||
- LLM_SERVER_PORT=9000
|
||||
- LLM_SERVER_HOST_IP=${OLLAMA_HOST}
|
||||
- LLM_SERVER_PORT=11434
|
||||
- LLM_MODEL=${OLLAMA_MODEL}
|
||||
- LOGFLAG=${LOGFLAG}
|
||||
ipc: host
|
||||
restart: always
|
||||
chaqna-aipc-ui-server:
|
||||
chatqna-aipc-ui-server:
|
||||
image: ${REGISTRY:-opea}/chatqna-ui:${TAG:-latest}
|
||||
container_name: chatqna-aipc-ui-server
|
||||
depends_on:
|
||||
- chaqna-aipc-backend-server
|
||||
- chatqna-aipc-backend-server
|
||||
ports:
|
||||
- "5173:5173"
|
||||
environment:
|
||||
@@ -127,28 +126,31 @@ services:
|
||||
- http_proxy=${http_proxy}
|
||||
ipc: host
|
||||
restart: always
|
||||
chaqna-aipc-nginx-server:
|
||||
chatqna-aipc-nginx-server:
|
||||
image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
|
||||
container_name: chaqna-aipc-nginx-server
|
||||
container_name: chatqna-aipc-nginx-server
|
||||
depends_on:
|
||||
- chaqna-aipc-backend-server
|
||||
- chaqna-aipc-ui-server
|
||||
- chatqna-aipc-backend-server
|
||||
- chatqna-aipc-ui-server
|
||||
ports:
|
||||
- "${NGINX_PORT:-80}:80"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- FRONTEND_SERVICE_IP=chatqna-xeon-ui-server
|
||||
- FRONTEND_SERVICE_IP=chatqna-aipc-ui-server
|
||||
- FRONTEND_SERVICE_PORT=5173
|
||||
- BACKEND_SERVICE_NAME=chatqna
|
||||
- BACKEND_SERVICE_IP=chatqna-xeon-backend-server
|
||||
- BACKEND_SERVICE_IP=chatqna-aipc-backend-server
|
||||
- BACKEND_SERVICE_PORT=8888
|
||||
- DATAPREP_SERVICE_IP=dataprep-redis-service
|
||||
- DATAPREP_SERVICE_PORT=6007
|
||||
ipc: host
|
||||
restart: always
|
||||
|
||||
volumes:
|
||||
ollama:
|
||||
|
||||
networks:
|
||||
default:
|
||||
driver: bridge
|
||||
|
||||
@@ -16,5 +16,5 @@ export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
|
||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||
export INDEX_NAME="rag-redis"
|
||||
export OLLAMA_ENDPOINT=http://${host_ip}:11434
|
||||
export OLLAMA_HOST=${host_ip}
|
||||
export OLLAMA_MODEL="llama3.2"
|
||||
|
||||
@@ -17,8 +17,6 @@ To set up environment variables for deploying ChatQnA services, follow these ste
|
||||
```bash
|
||||
# Example: host_ip="192.168.1.1"
|
||||
export host_ip="External_Public_IP"
|
||||
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
|
||||
export no_proxy="Your_No_Proxy"
|
||||
export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
|
||||
```
|
||||
|
||||
@@ -27,6 +25,9 @@ To set up environment variables for deploying ChatQnA services, follow these ste
|
||||
```bash
|
||||
export http_proxy="Your_HTTP_Proxy"
|
||||
export https_proxy="Your_HTTPs_Proxy"
|
||||
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
|
||||
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
|
||||
export no_proxy="Your_No_Proxy",chatqna-xeon-ui-server,chatqna-xeon-backend-server,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm_service
|
||||
```
|
||||
|
||||
3. Set up other environment variables:
|
||||
@@ -47,13 +48,13 @@ docker pull opea/chatqna:latest
|
||||
docker pull opea/chatqna-ui:latest
|
||||
```
|
||||
|
||||
In following cases, you could build docker image from source by yourself.
|
||||
NB: You should build docker image from source by yourself if:
|
||||
|
||||
- Failed to download the docker image.
|
||||
- You are developing off the git main branch (as the container's ports in the repo may be different from the published docker image).
|
||||
- You can't download the docker image.
|
||||
- You want to use a specific version of Docker image.
|
||||
|
||||
- If you want to use a specific version of Docker image.
|
||||
|
||||
Please refer to 'Build Docker Images' in below.
|
||||
Please refer to ['Build Docker Images'](#🚀-build-docker-images) in below.
|
||||
|
||||
## QuickStart: 3.Consume the ChatQnA Service
|
||||
|
||||
@@ -97,6 +98,11 @@ After launching your instance, you can connect to it using SSH (for Linux instan
|
||||
|
||||
First of all, you need to build Docker Images locally and install the python package of it.
|
||||
|
||||
```bash
|
||||
git clone https://github.com/opea-project/GenAIComps.git
|
||||
cd GenAIComps
|
||||
```
|
||||
|
||||
### 1. Build Retriever Image
|
||||
|
||||
```bash
|
||||
@@ -189,7 +195,7 @@ For users in China who are unable to download models directly from Huggingface,
|
||||
export HF_TOKEN=${your_hf_token}
|
||||
export HF_ENDPOINT="https://hf-mirror.com"
|
||||
model_name="Intel/neural-chat-7b-v3-3"
|
||||
docker run -p 8008:80 -v ./data:/data --name tgi-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --shm-size 1g ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu --model-id $model_name
|
||||
docker run -p 8008:80 -v ./data:/data --name tgi-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu --model-id $model_name
|
||||
```
|
||||
|
||||
2. Offline
|
||||
@@ -203,7 +209,7 @@ For users in China who are unable to download models directly from Huggingface,
|
||||
```bash
|
||||
export HF_TOKEN=${your_hf_token}
|
||||
export model_path="/path/to/model"
|
||||
docker run -p 8008:80 -v $model_path:/data --name tgi_service --shm-size 1g ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu --model-id /data
|
||||
docker run -p 8008:80 -v $model_path:/data --name tgi_service --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu --model-id /data
|
||||
```
|
||||
|
||||
### Setup Environment Variables
|
||||
@@ -213,8 +219,6 @@ For users in China who are unable to download models directly from Huggingface,
|
||||
```bash
|
||||
# Example: host_ip="192.168.1.1"
|
||||
export host_ip="External_Public_IP"
|
||||
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
|
||||
export no_proxy="Your_No_Proxy"
|
||||
export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
|
||||
# Example: NGINX_PORT=80
|
||||
export NGINX_PORT=${your_nginx_port}
|
||||
@@ -225,6 +229,8 @@ For users in China who are unable to download models directly from Huggingface,
|
||||
```bash
|
||||
export http_proxy="Your_HTTP_Proxy"
|
||||
export https_proxy="Your_HTTPs_Proxy"
|
||||
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
|
||||
export no_proxy="Your_No_Proxy",chatqna-xeon-ui-server,chatqna-xeon-backend-server,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm_service
|
||||
```
|
||||
|
||||
3. Set up other environment variables:
|
||||
@@ -305,7 +311,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v
|
||||
Try the command below to check whether the LLM serving is ready.
|
||||
|
||||
```bash
|
||||
docker logs ${CONTAINER_ID} | grep Connected
|
||||
docker logs tgi-service | grep Connected
|
||||
```
|
||||
|
||||
If the service is ready, you will get the response like below.
|
||||
|
||||
@@ -111,7 +111,7 @@ Build frontend Docker image that enables Conversational experience with ChatQnA
|
||||
**Export the value of the public IP address of your Xeon server to the `host_ip` environment variable**
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/ChatQnA//ui
|
||||
cd GenAIExamples/ChatQnA/ui
|
||||
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8912/v1/chatqna"
|
||||
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6043/v1/dataprep"
|
||||
docker build --no-cache -t opea/chatqna-conversation-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT --build-arg DATAPREP_SERVICE_ENDPOINT=$DATAPREP_SERVICE_ENDPOINT -f ./docker/Dockerfile.react .
|
||||
@@ -167,10 +167,10 @@ export host_ip="External_Public_IP"
|
||||
export your_hf_api_token="Your_Huggingface_API_Token"
|
||||
```
|
||||
|
||||
**Append the value of the public IP address to the no_proxy list**
|
||||
**Append the value of the public IP address to the no_proxy list if you are in a proxy environment**
|
||||
|
||||
```
|
||||
export your_no_proxy=${your_no_proxy},"External_Public_IP"
|
||||
export your_no_proxy=${your_no_proxy},"External_Public_IP",chatqna-xeon-ui-server,chatqna-xeon-backend-server,dataprep-qdrant-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service
|
||||
```
|
||||
|
||||
```bash
|
||||
|
||||
@@ -73,7 +73,7 @@ services:
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
|
||||
image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
|
||||
container_name: tgi-service
|
||||
ports:
|
||||
- "9009:80"
|
||||
@@ -112,6 +112,7 @@ services:
|
||||
- RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
|
||||
- LLM_SERVER_HOST_IP=tgi-service
|
||||
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
|
||||
- LLM_MODEL=${LLM_MODEL_ID}
|
||||
- LOGFLAG=${LOGFLAG}
|
||||
ipc: host
|
||||
restart: always
|
||||
|
||||
@@ -72,7 +72,7 @@ services:
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
|
||||
image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
|
||||
container_name: tgi-service
|
||||
ports:
|
||||
- "6042:80"
|
||||
@@ -111,6 +111,7 @@ services:
|
||||
- RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
|
||||
- LLM_SERVER_HOST_IP=tgi-service
|
||||
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
|
||||
- LLM_MODEL=${LLM_MODEL_ID}
|
||||
- LOGFLAG=${LOGFLAG}
|
||||
ipc: host
|
||||
restart: always
|
||||
|
||||
@@ -110,6 +110,7 @@ services:
|
||||
- RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
|
||||
- LLM_SERVER_HOST_IP=vllm_service
|
||||
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
|
||||
- LLM_MODEL=${LLM_MODEL_ID}
|
||||
- LOGFLAG=${LOGFLAG}
|
||||
ipc: host
|
||||
restart: always
|
||||
|
||||
@@ -57,7 +57,7 @@ services:
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
restart: unless-stopped
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
|
||||
image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
|
||||
container_name: tgi-service
|
||||
ports:
|
||||
- "9009:80"
|
||||
@@ -93,6 +93,7 @@ services:
|
||||
- RETRIEVER_SERVICE_HOST_IP=retriever
|
||||
- LLM_SERVER_HOST_IP=tgi-service
|
||||
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
|
||||
- LLM_MODEL=${LLM_MODEL_ID}
|
||||
- LOGFLAG=${LOGFLAG}
|
||||
ipc: host
|
||||
restart: always
|
||||
|
||||
0
ChatQnA/docker_compose/intel/cpu/xeon/set_env.sh
Normal file → Executable file
@@ -17,8 +17,6 @@ To set up environment variables for deploying ChatQnA services, follow these ste
|
||||
```bash
|
||||
# Example: host_ip="192.168.1.1"
|
||||
export host_ip="External_Public_IP"
|
||||
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
|
||||
export no_proxy="Your_No_Proxy"
|
||||
export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
|
||||
```
|
||||
|
||||
@@ -27,6 +25,8 @@ To set up environment variables for deploying ChatQnA services, follow these ste
|
||||
```bash
|
||||
export http_proxy="Your_HTTP_Proxy"
|
||||
export https_proxy="Your_HTTPs_Proxy"
|
||||
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
|
||||
export no_proxy="Your_No_Proxy",chatqna-gaudi-ui-server,chatqna-gaudi-backend-server,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm_service,guardrails
|
||||
```
|
||||
|
||||
3. Set up other environment variables:
|
||||
@@ -70,6 +70,11 @@ curl http://${host_ip}:8888/v1/chatqna \
|
||||
|
||||
First of all, you need to build Docker Images locally. This step can be ignored after the Docker images published to Docker hub.
|
||||
|
||||
```bash
|
||||
git clone https://github.com/opea-project/GenAIComps.git
|
||||
cd GenAIComps
|
||||
```
|
||||
|
||||
### 1. Build Retriever Image
|
||||
|
||||
```bash
|
||||
@@ -98,7 +103,7 @@ docker build -t opea/guardrails-tgi:latest --build-arg https_proxy=$https_proxy
|
||||
|
||||
```bash
|
||||
git clone https://github.com/opea-project/GenAIExamples.git
|
||||
cd GenAIExamples/ChatQnA/docker
|
||||
cd GenAIExamples/ChatQnA
|
||||
docker build --no-cache -t opea/chatqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
|
||||
```
|
||||
|
||||
@@ -118,7 +123,7 @@ docker build -t opea/guardrails-tgi:latest --build-arg https_proxy=$https_proxy
|
||||
|
||||
```bash
|
||||
git clone https://github.com/opea-project/GenAIExamples.git
|
||||
cd GenAIExamples/ChatQnA/docker
|
||||
cd GenAIExamples/ChatQnA
|
||||
docker build --no-cache -t opea/chatqna-without-rerank:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile.without_rerank .
|
||||
```
|
||||
|
||||
@@ -211,8 +216,6 @@ For users in China who are unable to download models directly from Huggingface,
|
||||
```bash
|
||||
# Example: host_ip="192.168.1.1"
|
||||
export host_ip="External_Public_IP"
|
||||
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
|
||||
export no_proxy="Your_No_Proxy"
|
||||
export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
|
||||
# Example: NGINX_PORT=80
|
||||
export NGINX_PORT=${your_nginx_port}
|
||||
@@ -223,6 +226,8 @@ For users in China who are unable to download models directly from Huggingface,
|
||||
```bash
|
||||
export http_proxy="Your_HTTP_Proxy"
|
||||
export https_proxy="Your_HTTPs_Proxy"
|
||||
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
|
||||
export no_proxy="Your_No_Proxy",chatqna-gaudi-ui-server,chatqna-gaudi-backend-server,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm_service,guardrails
|
||||
```
|
||||
|
||||
3. Set up other environment variables:
|
||||
@@ -252,12 +257,6 @@ If use vllm for llm backend.
|
||||
docker compose -f compose_vllm.yaml up -d
|
||||
```
|
||||
|
||||
If use vllm-on-ray for llm backend.
|
||||
|
||||
```bash
|
||||
docker compose -f compose_vllm_ray.yaml up -d
|
||||
```
|
||||
|
||||
If you want to enable guardrails microservice in the pipeline, please follow the below command instead:
|
||||
|
||||
```bash
|
||||
@@ -315,7 +314,7 @@ For validation details, please refer to [how-to-validate_service](./how_to_valid
|
||||
Try the command below to check whether the LLM serving is ready.
|
||||
|
||||
```bash
|
||||
docker logs ${CONTAINER_ID} | grep Connected
|
||||
docker logs tgi-service | grep Connected
|
||||
```
|
||||
|
||||
If the service is ready, you will get the response like below.
|
||||
@@ -346,13 +345,6 @@ For validation details, please refer to [how-to-validate_service](./how_to_valid
|
||||
}'
|
||||
```
|
||||
|
||||
```bash
|
||||
#vLLM-on-Ray Service
|
||||
curl http://${host_ip}:8006/v1/chat/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"model": "${LLM_MODEL_ID}", "messages": [{"role": "user", "content": "What is Deep Learning?"}]}'
|
||||
```
|
||||
|
||||
5. MegaService
|
||||
|
||||
```bash
|
||||
|
||||
@@ -26,25 +26,17 @@ services:
|
||||
TEI_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
tei-embedding-service:
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
container_name: tei-embedding-gaudi-server
|
||||
ports:
|
||||
- "8090:80"
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
ipc: host
|
||||
shm_size: 1g
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
MAX_WARMUP_SEQUENCE_LENGTH: 512
|
||||
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
|
||||
retriever:
|
||||
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
|
||||
@@ -133,6 +125,7 @@ services:
|
||||
- RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
|
||||
- LLM_SERVER_HOST_IP=tgi-service
|
||||
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
|
||||
- LLM_MODEL=${LLM_MODEL_ID}
|
||||
- LOGFLAG=${LOGFLAG}
|
||||
ipc: host
|
||||
restart: always
|
||||
|
||||
@@ -65,25 +65,17 @@ services:
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
restart: unless-stopped
|
||||
tei-embedding-service:
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
container_name: tei-embedding-gaudi-server
|
||||
ports:
|
||||
- "8090:80"
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
ipc: host
|
||||
shm_size: 1g
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
MAX_WARMUP_SEQUENCE_LENGTH: 512
|
||||
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
|
||||
retriever:
|
||||
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
|
||||
@@ -176,6 +168,7 @@ services:
|
||||
- RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
|
||||
- LLM_SERVER_HOST_IP=tgi-service
|
||||
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
|
||||
- LLM_MODEL=${LLM_MODEL_ID}
|
||||
- LOGFLAG=${LOGFLAG}
|
||||
ipc: host
|
||||
restart: always
|
||||
|
||||
@@ -26,25 +26,17 @@ services:
|
||||
TEI_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
tei-embedding-service:
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
container_name: tei-embedding-gaudi-server
|
||||
ports:
|
||||
- "8090:80"
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
ipc: host
|
||||
shm_size: 1g
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
MAX_WARMUP_SEQUENCE_LENGTH: 512
|
||||
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
|
||||
retriever:
|
||||
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
|
||||
@@ -86,7 +78,7 @@ services:
|
||||
MAX_WARMUP_SEQUENCE_LENGTH: 512
|
||||
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
|
||||
vllm-service:
|
||||
image: ${REGISTRY:-opea}/llm-vllm-hpu:${TAG:-latest}
|
||||
image: ${REGISTRY:-opea}/vllm-hpu:${TAG:-latest}
|
||||
container_name: vllm-gaudi-server
|
||||
ports:
|
||||
- "8007:80"
|
||||
@@ -104,7 +96,7 @@ services:
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
ipc: host
|
||||
command: /bin/bash -c "export VLLM_CPU_KVCACHE_SPACE=40 && python3 -m vllm.entrypoints.openai.api_server --enforce-eager --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048"
|
||||
command: --enforce-eager --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
|
||||
chatqna-gaudi-backend-server:
|
||||
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
|
||||
container_name: chatqna-gaudi-backend-server
|
||||
@@ -128,6 +120,7 @@ services:
|
||||
- RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
|
||||
- LLM_SERVER_HOST_IP=vllm-service
|
||||
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
|
||||
- LLM_MODEL=${LLM_MODEL_ID}
|
||||
- LOGFLAG=${LOGFLAG}
|
||||
ipc: host
|
||||
restart: always
|
||||
|
||||
@@ -1,171 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
services:
|
||||
redis-vector-db:
|
||||
image: redis/redis-stack:7.2.0-v9
|
||||
container_name: redis-vector-db
|
||||
ports:
|
||||
- "6379:6379"
|
||||
- "8001:8001"
|
||||
dataprep-redis-service:
|
||||
image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest}
|
||||
container_name: dataprep-redis-server
|
||||
depends_on:
|
||||
- redis-vector-db
|
||||
- tei-embedding-service
|
||||
ports:
|
||||
- "6007:6007"
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
REDIS_URL: redis://redis-vector-db:6379
|
||||
REDIS_HOST: redis-vector-db
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
tei-embedding-service:
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
container_name: tei-embedding-gaudi-server
|
||||
ports:
|
||||
- "8090:80"
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
MAX_WARMUP_SEQUENCE_LENGTH: 512
|
||||
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
|
||||
retriever:
|
||||
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
|
||||
container_name: retriever-redis-server
|
||||
depends_on:
|
||||
- redis-vector-db
|
||||
ports:
|
||||
- "7000:7000"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
REDIS_URL: redis://redis-vector-db:6379
|
||||
REDIS_HOST: redis-vector-db
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
restart: unless-stopped
|
||||
tei-reranking-service:
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
container_name: tei-reranking-gaudi-server
|
||||
ports:
|
||||
- "8808:80"
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
MAX_WARMUP_SEQUENCE_LENGTH: 512
|
||||
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
|
||||
vllm-ray-service:
|
||||
image: ${REGISTRY:-opea}/llm-vllm-ray-hpu:${TAG:-latest}
|
||||
container_name: vllm-ray-gaudi-server
|
||||
ports:
|
||||
- "8006:8000"
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
LLM_MODEL_ID: ${LLM_MODEL_ID}
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
ipc: host
|
||||
command: /bin/bash -c "ray start --head && python vllm_ray_openai.py --port_number 8000 --model_id_or_path $LLM_MODEL_ID --tensor_parallel_size 2 --enforce_eager True"
|
||||
chatqna-gaudi-backend-server:
|
||||
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
|
||||
container_name: chatqna-gaudi-backend-server
|
||||
depends_on:
|
||||
- redis-vector-db
|
||||
- tei-embedding-service
|
||||
- retriever
|
||||
- tei-reranking-service
|
||||
- vllm-ray-service
|
||||
ports:
|
||||
- "8888:8888"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- MEGA_SERVICE_HOST_IP=chatqna-gaudi-backend-server
|
||||
- EMBEDDING_SERVER_HOST_IP=tei-embedding-service
|
||||
- EMBEDDING_SERVER_PORT=${EMBEDDING_SERVER_PORT:-80}
|
||||
- RETRIEVER_SERVICE_HOST_IP=retriever
|
||||
- RERANK_SERVER_HOST_IP=tei-reranking-service
|
||||
- RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
|
||||
- LLM_SERVER_HOST_IP=vllm-ray-service
|
||||
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-8000}
|
||||
- LOGFLAG=${LOGFLAG}
|
||||
ipc: host
|
||||
restart: always
|
||||
chatqna-gaudi-ui-server:
|
||||
image: ${REGISTRY:-opea}/chatqna-ui:${TAG:-latest}
|
||||
container_name: chatqna-gaudi-ui-server
|
||||
depends_on:
|
||||
- chatqna-gaudi-backend-server
|
||||
ports:
|
||||
- "5173:5173"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
ipc: host
|
||||
restart: always
|
||||
chatqna-gaudi-nginx-server:
|
||||
image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
|
||||
container_name: chatqna-gaudi-nginx-server
|
||||
depends_on:
|
||||
- chatqna-gaudi-backend-server
|
||||
- chatqna-gaudi-ui-server
|
||||
ports:
|
||||
- "${NGINX_PORT:-80}:80"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- FRONTEND_SERVICE_IP=chatqna-gaudi-ui-server
|
||||
- FRONTEND_SERVICE_PORT=5173
|
||||
- BACKEND_SERVICE_NAME=chatqna
|
||||
- BACKEND_SERVICE_IP=chatqna-gaudi-backend-server
|
||||
- BACKEND_SERVICE_PORT=8888
|
||||
- DATAPREP_SERVICE_IP=dataprep-redis-service
|
||||
- DATAPREP_SERVICE_PORT=6007
|
||||
ipc: host
|
||||
restart: always
|
||||
|
||||
networks:
|
||||
default:
|
||||
driver: bridge
|
||||
@@ -26,25 +26,17 @@ services:
|
||||
TEI_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
tei-embedding-service:
|
||||
image: ghcr.io/huggingface/tei-gaudi:latest
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
container_name: tei-embedding-gaudi-server
|
||||
ports:
|
||||
- "8090:80"
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
ipc: host
|
||||
shm_size: 1g
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
MAX_WARMUP_SEQUENCE_LENGTH: 512
|
||||
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
|
||||
retriever:
|
||||
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
|
||||
@@ -109,6 +101,7 @@ services:
|
||||
- RETRIEVER_SERVICE_HOST_IP=retriever
|
||||
- LLM_SERVER_HOST_IP=tgi-service
|
||||
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
|
||||
- LLM_MODEL=${LLM_MODEL_ID}
|
||||
- LOGFLAG=${LOGFLAG}
|
||||
ipc: host
|
||||
restart: always
|
||||
|
||||
@@ -17,8 +17,6 @@ To set up environment variables for deploying ChatQnA services, follow these ste
|
||||
```bash
|
||||
# Example: host_ip="192.168.1.1"
|
||||
export host_ip="External_Public_IP"
|
||||
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
|
||||
export no_proxy="Your_No_Proxy"
|
||||
export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
|
||||
```
|
||||
|
||||
@@ -27,6 +25,8 @@ To set up environment variables for deploying ChatQnA services, follow these ste
|
||||
```bash
|
||||
export http_proxy="Your_HTTP_Proxy"
|
||||
export https_proxy="Your_HTTPs_Proxy"
|
||||
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
|
||||
export no_proxy="Your_No_Proxy",chatqna-ui-server,chatqna-backend-server,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service
|
||||
```
|
||||
|
||||
3. Set up other environment variables:
|
||||
@@ -95,9 +95,9 @@ To construct the Mega Service, we utilize the [GenAIComps](https://github.com/op
|
||||
|
||||
```bash
|
||||
git clone https://github.com/opea-project/GenAIExamples.git
|
||||
cd GenAIExamples/ChatQnA/docker
|
||||
cd GenAIExamples/ChatQnA
|
||||
docker build --no-cache -t opea/chatqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
|
||||
cd ../../..
|
||||
cd ../..
|
||||
```
|
||||
|
||||
### 5. Build UI Docker Image
|
||||
@@ -107,7 +107,7 @@ Construct the frontend Docker image using the command below:
|
||||
```bash
|
||||
cd GenAIExamples/ChatQnA/ui
|
||||
docker build --no-cache -t opea/chatqna-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile .
|
||||
cd ../../../..
|
||||
cd ../../../
|
||||
```
|
||||
|
||||
### 6. Build React UI Docker Image (Optional)
|
||||
@@ -117,7 +117,7 @@ Construct the frontend Docker image using the command below:
|
||||
```bash
|
||||
cd GenAIExamples/ChatQnA/ui
|
||||
docker build --no-cache -t opea/chatqna-react-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile.react .
|
||||
cd ../../../..
|
||||
cd ../../..
|
||||
```
|
||||
|
||||
### 7. Build Nginx Docker Image
|
||||
@@ -156,8 +156,6 @@ Change the `xxx_MODEL_ID` below for your needs.
|
||||
```bash
|
||||
# Example: host_ip="192.168.1.1"
|
||||
export host_ip="External_Public_IP"
|
||||
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
|
||||
export no_proxy="Your_No_Proxy"
|
||||
export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
|
||||
# Example: NGINX_PORT=80
|
||||
export NGINX_PORT=${your_nginx_port}
|
||||
@@ -168,6 +166,8 @@ Change the `xxx_MODEL_ID` below for your needs.
|
||||
```bash
|
||||
export http_proxy="Your_HTTP_Proxy"
|
||||
export https_proxy="Your_HTTPs_Proxy"
|
||||
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
|
||||
export no_proxy="Your_No_Proxy",chatqna-ui-server,chatqna-backend-server,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service
|
||||
```
|
||||
|
||||
3. Set up other environment variables:
|
||||
|
||||
@@ -20,10 +20,10 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
REDIS_URL: ${REDIS_URL}
|
||||
REDIS_HOST: ${REDIS_HOST}
|
||||
REDIS_URL: redis://redis-vector-db:6379
|
||||
REDIS_HOST: redis-vector-db
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
TEI_ENDPOINT: http://tei-embedding-service:80
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
tei-embedding-service:
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
@@ -39,13 +39,6 @@ services:
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: 1
|
||||
capabilities: [gpu]
|
||||
retriever:
|
||||
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
|
||||
container_name: retriever-redis-server
|
||||
@@ -58,12 +51,13 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
REDIS_URL: ${REDIS_URL}
|
||||
REDIS_URL: redis://redis-vector-db:6379
|
||||
REDIS_HOST: redis-vector-db
|
||||
INDEX_NAME: ${INDEX_NAME}
|
||||
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
|
||||
restart: unless-stopped
|
||||
tei-reranking-service:
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
image: ghcr.io/huggingface/text-embeddings-inference:1.5
|
||||
container_name: tei-reranking-server
|
||||
ports:
|
||||
- "8808:80"
|
||||
@@ -123,11 +117,14 @@ services:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
|
||||
- EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP}
|
||||
- RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
|
||||
- RERANK_SERVICE_HOST_IP=${RERANK_SERVICE_HOST_IP}
|
||||
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
|
||||
- MEGA_SERVICE_HOST_IP=chaqna-backend-server
|
||||
- EMBEDDING_SERVER_HOST_IP=tei-embedding-service
|
||||
- EMBEDDING_SERVER_PORT=${EMBEDDING_SERVER_PORT:-80}
|
||||
- RETRIEVER_SERVICE_HOST_IP=retriever
|
||||
- RERANK_SERVER_HOST_IP=tei-reranking-service
|
||||
- RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
|
||||
- LLM_SERVER_HOST_IP=tgi-service
|
||||
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
|
||||
ipc: host
|
||||
restart: always
|
||||
chaqna-ui-server:
|
||||
|
||||
@@ -77,24 +77,6 @@ services:
|
||||
dockerfile: comps/llms/text-generation/vllm/langchain/Dockerfile
|
||||
extends: chatqna
|
||||
image: ${REGISTRY:-opea}/llm-vllm:${TAG:-latest}
|
||||
llm-vllm-hpu:
|
||||
build:
|
||||
context: GenAIComps
|
||||
dockerfile: comps/llms/text-generation/vllm/langchain/dependency/Dockerfile.intel_hpu
|
||||
extends: chatqna
|
||||
image: ${REGISTRY:-opea}/llm-vllm-hpu:${TAG:-latest}
|
||||
llm-vllm-ray:
|
||||
build:
|
||||
context: GenAIComps
|
||||
dockerfile: comps/llms/text-generation/vllm/ray/Dockerfile
|
||||
extends: chatqna
|
||||
image: ${REGISTRY:-opea}/llm-vllm-ray:${TAG:-latest}
|
||||
llm-vllm-ray-hpu:
|
||||
build:
|
||||
context: GenAIComps
|
||||
dockerfile: comps/llms/text-generation/vllm/ray/dependency/Dockerfile
|
||||
extends: chatqna
|
||||
image: ${REGISTRY:-opea}/llm-vllm-ray-hpu:${TAG:-latest}
|
||||
dataprep-redis:
|
||||
build:
|
||||
context: GenAIComps
|
||||
@@ -119,6 +101,12 @@ services:
|
||||
dockerfile: Dockerfile.cpu
|
||||
extends: chatqna
|
||||
image: ${REGISTRY:-opea}/vllm:${TAG:-latest}
|
||||
vllm-hpu:
|
||||
build:
|
||||
context: vllm-fork
|
||||
dockerfile: Dockerfile.hpu
|
||||
extends: chatqna
|
||||
image: ${REGISTRY:-opea}/vllm-hpu:${TAG:-latest}
|
||||
nginx:
|
||||
build:
|
||||
context: GenAIComps
|
||||
|
||||
@@ -18,7 +18,7 @@ The ChatQnA uses the below prebuilt images if you choose a Xeon deployment
|
||||
- tei_embedding_service: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
- retriever: opea/retriever-redis:latest
|
||||
- tei_xeon_service: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
- tgi-service: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
|
||||
- tgi-service: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
|
||||
- chaqna-xeon-backend-server: opea/chatqna:latest
|
||||
|
||||
Should you desire to use the Gaudi accelerator, two alternate images are used for the embedding and llm services.
|
||||
|
||||
53
ChatQnA/kubernetes/intel/README_single_node.md
Normal file
@@ -0,0 +1,53 @@
|
||||
# Deploy ChatQnA in Kubernetes Cluster on Single Node environment (Minikube)
|
||||
|
||||
The following instructions are to deploy the ChatQnA example on a single Node using Kubernetes for testing purposes.
|
||||
## Minikube setup
|
||||
1. Install [Minikube](https://minikube.sigs.k8s.io/docs/start/) following the quickstart guide
|
||||
2. Install [kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl-linux/)
|
||||
3. Build the container images, following the steps under "Build Docker Images" section in the [docker-compose README](../../docker_compose/intel/cpu/xeon/README.md) to checkout [GenAIComps](https://github.com/opea-project/GenAIComps.git) and build other images with your changes for development.
|
||||
```bash
|
||||
# Example on building frontend Docker image
|
||||
cd GenAIExamples/ChatQnA/ui
|
||||
docker build --no-cache -t opea/chatqna-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile .
|
||||
# etc...
|
||||
```
|
||||
The built images should be visible in the local Docker registry. Other images which have not been built with your changes (or not present in your local Docker registry) will be pulled from [docker hub](https://hub.docker.com/u/opea) by Minikube later in step 6.
|
||||
```bash
|
||||
docker images | grep opea
|
||||
# REPOSITORY TAG IMAGE ID CREATED SIZE
|
||||
# opea/chatqna-ui latest 8f2fa2523b85 6 days ago 1.56GB
|
||||
# opea/chatqna latest 7f2602a7a266 6 days ago 821MB
|
||||
# ...
|
||||
```
|
||||
4. The built images must be imported into the Minikube registry from the local Docker registry. This can be done using `minikube load `image.
|
||||
```bash
|
||||
minikube image load opea/chatqna
|
||||
minikube image load opea/chatqna-ui
|
||||
# etc...
|
||||
```
|
||||
5. Start the minikube cluster with `minikube start`, check that the minikube container (kicbase) is up with `docker ps`
|
||||
```bash
|
||||
docker ps
|
||||
# CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
|
||||
# de088666cef2 gcr.io/k8s-minikube/kicbase:v0.0.45 "/usr/local/bin/entr…" 2 days ago Up 2 days 127.0.0.1:49157->22/tcp... minikube
|
||||
```
|
||||
6. Deploy the ChatQnA application with `minikube apply -f chatqna.yaml`, check that the opea pods are in a running state with `kubectl get pods`
|
||||
```bash
|
||||
kubectl get pods
|
||||
# NAME READY STATUS RESTARTS AGE
|
||||
# chatqna-78b4f5865-qbzms 1/1 Running 0 2d3h
|
||||
# chatqna-chatqna-ui-54c8dfb6cf-fll5g 1/1 Running 0 2d3h
|
||||
# etc...
|
||||
```
|
||||
|
||||
7. Forward the port of the chatqna service from Minikube to the host, and test the service as you would a normal k8s cluster deployment
|
||||
```bash
|
||||
# port-forward to expose the chatqna endpoint from within the minikube cluster
|
||||
kubectl port-forward svc/chatqna 8888:8888
|
||||
curl http://localhost:8888/v1/chatqna \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d '{"messages": "What is the revenue of Nike in 2023?"}'
|
||||
|
||||
# Similarly port-forward to expose the chatqna-ui endpoint and use the UI at <machine-external-ip>:5173 in your browser
|
||||
kubectl port-forward svc/chatqna-chatqna-ui 5173:5173
|
||||
```
|
||||
@@ -1100,7 +1100,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
|
||||
image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
@@ -1180,7 +1180,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
|
||||
image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
@@ -1252,18 +1252,12 @@ spec:
|
||||
env:
|
||||
- name: LLM_SERVER_HOST_IP
|
||||
value: chatqna-tgi
|
||||
- name: LLM_SERVER_PORT
|
||||
value: "2080"
|
||||
- name: RERANK_SERVER_HOST_IP
|
||||
value: chatqna-teirerank
|
||||
- name: RERANK_SERVER_PORT
|
||||
value: "2082"
|
||||
- name: RETRIEVER_SERVICE_HOST_IP
|
||||
value: chatqna-retriever-usvc
|
||||
- name: EMBEDDING_SERVER_HOST_IP
|
||||
value: chatqna-tei
|
||||
- name: EMBEDDING_SERVER_PORT
|
||||
value: "2081"
|
||||
- name: GUARDRAIL_SERVICE_HOST_IP
|
||||
value: chatqna-guardrails-usvc
|
||||
- name: GUARDRAIL_SERVICE_PORT
|
||||
|
||||
@@ -922,7 +922,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
|
||||
image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
@@ -994,18 +994,12 @@ spec:
|
||||
env:
|
||||
- name: LLM_SERVER_HOST_IP
|
||||
value: chatqna-tgi
|
||||
- name: LLM_SERVER_PORT
|
||||
value: "2080"
|
||||
- name: RERANK_SERVER_HOST_IP
|
||||
value: chatqna-teirerank
|
||||
- name: RERANK_SERVER_PORT
|
||||
value: "2082"
|
||||
- name: RETRIEVER_SERVICE_HOST_IP
|
||||
value: chatqna-retriever-usvc
|
||||
- name: EMBEDDING_SERVER_HOST_IP
|
||||
value: chatqna-tei
|
||||
- name: EMBEDDING_SERVER_PORT
|
||||
value: "2081"
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
|
||||
@@ -925,7 +925,7 @@ spec:
|
||||
runAsUser: 1000
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
|
||||
image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
|
||||
imagePullPolicy: IfNotPresent
|
||||
volumeMounts:
|
||||
- mountPath: /data
|
||||
@@ -997,18 +997,12 @@ spec:
|
||||
env:
|
||||
- name: LLM_SERVER_HOST_IP
|
||||
value: chatqna-tgi
|
||||
- name: LLM_SERVER_PORT
|
||||
value: "2080"
|
||||
- name: RERANK_SERVER_HOST_IP
|
||||
value: chatqna-teirerank
|
||||
- name: RERANK_SERVER_PORT
|
||||
value: "2082"
|
||||
- name: RETRIEVER_SERVICE_HOST_IP
|
||||
value: chatqna-retriever-usvc
|
||||
- name: EMBEDDING_SERVER_HOST_IP
|
||||
value: chatqna-tei
|
||||
- name: EMBEDDING_SERVER_PORT
|
||||
value: "2081"
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
|
||||
@@ -1257,18 +1257,12 @@ spec:
|
||||
env:
|
||||
- name: LLM_SERVER_HOST_IP
|
||||
value: chatqna-tgi
|
||||
- name: LLM_SERVER_PORT
|
||||
value: "2080"
|
||||
- name: RERANK_SERVER_HOST_IP
|
||||
value: chatqna-teirerank
|
||||
- name: RERANK_SERVER_PORT
|
||||
value: "2082"
|
||||
- name: RETRIEVER_SERVICE_HOST_IP
|
||||
value: chatqna-retriever-usvc
|
||||
- name: EMBEDDING_SERVER_HOST_IP
|
||||
value: chatqna-tei
|
||||
- name: EMBEDDING_SERVER_PORT
|
||||
value: "2081"
|
||||
- name: GUARDRAIL_SERVICE_HOST_IP
|
||||
value: chatqna-guardrails-usvc
|
||||
- name: GUARDRAIL_SERVICE_PORT
|
||||
|
||||
@@ -997,18 +997,12 @@ spec:
|
||||
env:
|
||||
- name: LLM_SERVER_HOST_IP
|
||||
value: chatqna-tgi
|
||||
- name: LLM_SERVER_PORT
|
||||
value: "2080"
|
||||
- name: RERANK_SERVER_HOST_IP
|
||||
value: chatqna-teirerank
|
||||
- name: RERANK_SERVER_PORT
|
||||
value: "2082"
|
||||
- name: RETRIEVER_SERVICE_HOST_IP
|
||||
value: chatqna-retriever-usvc
|
||||
- name: EMBEDDING_SERVER_HOST_IP
|
||||
value: chatqna-tei
|
||||
- name: EMBEDDING_SERVER_PORT
|
||||
value: "2081"
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
|
||||
@@ -22,7 +22,7 @@ function build_docker_images() {
|
||||
service_list="chatqna chatqna-ui chatqna-conversation-ui dataprep-redis retriever-redis nginx"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
|
||||
docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
|
||||
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
|
||||
docker images && sleep 1s
|
||||
|
||||
@@ -17,9 +17,10 @@ ip_address=$(hostname -I | awk '{print $1}')
|
||||
function build_docker_images() {
|
||||
cd $WORKPATH/docker_image_build
|
||||
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
||||
git clone https://github.com/HabanaAI/vllm-fork.git
|
||||
|
||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||
service_list="chatqna chatqna-ui dataprep-redis retriever-redis llm-vllm-hpu nginx"
|
||||
service_list="chatqna chatqna-ui dataprep-redis retriever-redis vllm-hpu nginx"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
|
||||
@@ -1,183 +0,0 @@
|
||||
#!/bin/bash
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
set -e
|
||||
IMAGE_REPO=${IMAGE_REPO:-"opea"}
|
||||
IMAGE_TAG=${IMAGE_TAG:-"latest"}
|
||||
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
|
||||
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
|
||||
export REGISTRY=${IMAGE_REPO}
|
||||
export TAG=${IMAGE_TAG}
|
||||
|
||||
WORKPATH=$(dirname "$PWD")
|
||||
LOG_PATH="$WORKPATH/tests"
|
||||
ip_address=$(hostname -I | awk '{print $1}')
|
||||
|
||||
function build_docker_images() {
|
||||
cd $WORKPATH/docker_image_build
|
||||
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
|
||||
|
||||
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
|
||||
service_list="chatqna chatqna-ui dataprep-redis retriever-redis llm-vllm-ray-hpu nginx"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
|
||||
docker pull ghcr.io/huggingface/tei-gaudi:latest
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
function start_services() {
|
||||
|
||||
cd $WORKPATH/docker_compose/intel/hpu/gaudi
|
||||
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
|
||||
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
|
||||
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
|
||||
export INDEX_NAME="rag-redis"
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
|
||||
# Start Docker Containers
|
||||
docker compose -f compose_vllm_ray.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
|
||||
n=0
|
||||
until [[ "$n" -ge 100 ]]; do
|
||||
echo "n=$n"
|
||||
docker logs vllm-ray-gaudi-server > vllm_ray_service_start.log
|
||||
if grep -q "Warmup finished" vllm_ray_service_start.log; then
|
||||
break
|
||||
fi
|
||||
sleep 5s
|
||||
n=$((n+1))
|
||||
done
|
||||
}
|
||||
|
||||
function validate_services() {
|
||||
local URL="$1"
|
||||
local EXPECTED_RESULT="$2"
|
||||
local SERVICE_NAME="$3"
|
||||
local DOCKER_NAME="$4"
|
||||
local INPUT_DATA="$5"
|
||||
|
||||
local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
|
||||
if [ "$HTTP_STATUS" -eq 200 ]; then
|
||||
echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
|
||||
|
||||
local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log)
|
||||
|
||||
if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
|
||||
echo "[ $SERVICE_NAME ] Content is as expected."
|
||||
else
|
||||
echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
|
||||
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
|
||||
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
|
||||
exit 1
|
||||
fi
|
||||
sleep 1s
|
||||
}
|
||||
|
||||
function validate_microservices() {
|
||||
# Check if the microservices are running correctly.
|
||||
|
||||
# tei for embedding service
|
||||
validate_services \
|
||||
"${ip_address}:8090/embed" \
|
||||
"\[\[" \
|
||||
"tei-embedding" \
|
||||
"tei-embedding-gaudi-server" \
|
||||
'{"inputs":"What is Deep Learning?"}'
|
||||
|
||||
sleep 1m # retrieval can't curl as expected, try to wait for more time
|
||||
|
||||
# retrieval microservice
|
||||
test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
|
||||
validate_services \
|
||||
"${ip_address}:7000/v1/retrieval" \
|
||||
" " \
|
||||
"retrieval" \
|
||||
"retriever-redis-server" \
|
||||
"{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${test_embedding}}"
|
||||
|
||||
# tei for rerank microservice
|
||||
validate_services \
|
||||
"${ip_address}:8808/rerank" \
|
||||
'{"index":1,"score":' \
|
||||
"tei-rerank" \
|
||||
"tei-reranking-gaudi-server" \
|
||||
'{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}'
|
||||
|
||||
# vllm-on-ray for llm service
|
||||
validate_services \
|
||||
"${ip_address}:8006/v1/chat/completions" \
|
||||
"content" \
|
||||
"vllm-ray-llm" \
|
||||
"vllm-ray-gaudi-server" \
|
||||
'{"model": "Intel/neural-chat-7b-v3-3", "messages": [{"role": "user", "content": "What is Deep Learning?"}]}'
|
||||
}
|
||||
|
||||
function validate_megaservice() {
|
||||
# Curl the Mega Service
|
||||
validate_services \
|
||||
"${ip_address}:8888/v1/chatqna" \
|
||||
"data: " \
|
||||
"mega-chatqna" \
|
||||
"chatqna-gaudi-backend-server" \
|
||||
'{"messages": "What is the revenue of Nike in 2023?"}'
|
||||
|
||||
}
|
||||
|
||||
function validate_frontend() {
|
||||
cd $WORKPATH/ui/svelte
|
||||
local conda_env_name="OPEA_e2e"
|
||||
export PATH=${HOME}/miniforge3/bin/:$PATH
|
||||
if conda info --envs | grep -q "$conda_env_name"; then
|
||||
echo "$conda_env_name exist!"
|
||||
else
|
||||
conda create -n ${conda_env_name} python=3.12 -y
|
||||
fi
|
||||
source activate ${conda_env_name}
|
||||
|
||||
sed -i "s/localhost/$ip_address/g" playwright.config.ts
|
||||
|
||||
conda install -c conda-forge nodejs -y
|
||||
npm install && npm ci && npx playwright install --with-deps
|
||||
node -v && npm -v && pip list
|
||||
|
||||
exit_status=0
|
||||
npx playwright test || exit_status=$?
|
||||
|
||||
if [ $exit_status -ne 0 ]; then
|
||||
echo "[TEST INFO]: ---------frontend test failed---------"
|
||||
exit $exit_status
|
||||
else
|
||||
echo "[TEST INFO]: ---------frontend test passed---------"
|
||||
fi
|
||||
}
|
||||
|
||||
function stop_docker() {
|
||||
cd $WORKPATH/docker_compose/intel/hpu/gaudi
|
||||
docker compose -f compose_vllm_ray.yaml down
|
||||
}
|
||||
|
||||
function main() {
|
||||
|
||||
stop_docker
|
||||
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
|
||||
start_time=$(date +%s)
|
||||
start_services
|
||||
end_time=$(date +%s)
|
||||
duration=$((end_time-start_time))
|
||||
echo "Mega service start duration is $duration s"
|
||||
|
||||
validate_microservices
|
||||
validate_megaservice
|
||||
# validate_frontend
|
||||
|
||||
stop_docker
|
||||
echo y | docker system prune
|
||||
|
||||
}
|
||||
|
||||
main
|
||||
@@ -111,7 +111,7 @@ function _cleanup_ns() {
|
||||
|
||||
function install_and_validate_chatqna_guardrail() {
|
||||
echo "Testing manifests chatqna_guardrils"
|
||||
local ns=${NAMESPACE}-gaurdrails
|
||||
local ns=${NAMESPACE}
|
||||
_cleanup_ns $ns
|
||||
kubectl create namespace $ns
|
||||
# install guardrail
|
||||
@@ -119,10 +119,9 @@ function install_and_validate_chatqna_guardrail() {
|
||||
# Sleep enough time for chatqna_guardrail to be ready
|
||||
sleep 60
|
||||
if kubectl rollout status deployment -n "$ns" --timeout "$ROLLOUT_TIMEOUT_SECONDS"; then
|
||||
echo "Waiting for cahtqna_guardrail pod ready done!"
|
||||
echo "Waiting for chatqna_guardrail pod ready done!"
|
||||
else
|
||||
echo "Timeout waiting for chatqna_guardrail pod ready!"
|
||||
_cleanup_ns $ns
|
||||
exit 1
|
||||
fi
|
||||
|
||||
@@ -130,10 +129,8 @@ function install_and_validate_chatqna_guardrail() {
|
||||
validate_chatqna $ns chatqna-guardrails
|
||||
local ret=$?
|
||||
if [ $ret -ne 0 ]; then
|
||||
_cleanup_ns $ns
|
||||
exit 1
|
||||
fi
|
||||
_cleanup_ns $ns
|
||||
}
|
||||
|
||||
if [ $# -eq 0 ]; then
|
||||
@@ -161,8 +158,7 @@ case "$1" in
|
||||
if [ $ret -ne 0 ]; then
|
||||
exit $ret
|
||||
fi
|
||||
pushd ChatQnA/kubernetes/intel/hpu/gaudi/manifests
|
||||
set +e
|
||||
pushd ChatQnA/kubernetes/intel/hpu/gaudi/manifest
|
||||
install_and_validate_chatqna_guardrail
|
||||
popd
|
||||
;;
|
||||
|
||||
@@ -40,7 +40,7 @@ function get_end_point() {
|
||||
function validate_chatqna() {
|
||||
local ns=$1
|
||||
local log=$2
|
||||
max_retry=20
|
||||
max_retry=10
|
||||
# make sure microservice retriever-usvc is ready
|
||||
# try to curl retriever-svc for max_retry times
|
||||
test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
|
||||
@@ -111,7 +111,7 @@ function _cleanup_ns() {
|
||||
|
||||
function install_and_validate_chatqna_guardrail() {
|
||||
echo "Testing manifests chatqna_guardrils"
|
||||
local ns=${NAMESPACE}-gaurdrails
|
||||
local ns=${NAMESPACE}
|
||||
_cleanup_ns $ns
|
||||
kubectl create namespace $ns
|
||||
# install guardrail
|
||||
@@ -119,10 +119,9 @@ function install_and_validate_chatqna_guardrail() {
|
||||
# Sleep enough time for chatqna_guardrail to be ready
|
||||
sleep 60
|
||||
if kubectl rollout status deployment -n "$ns" --timeout "$ROLLOUT_TIMEOUT_SECONDS"; then
|
||||
echo "Waiting for cahtqna_guardrail pod ready done!"
|
||||
echo "Waiting for chatqna_guardrail pod ready done!"
|
||||
else
|
||||
echo "Timeout waiting for chatqna_guardrail pod ready!"
|
||||
_cleanup_ns $ns
|
||||
exit 1
|
||||
fi
|
||||
|
||||
@@ -130,10 +129,8 @@ function install_and_validate_chatqna_guardrail() {
|
||||
validate_chatqna $ns chatqna-guardrails
|
||||
local ret=$?
|
||||
if [ $ret -ne 0 ]; then
|
||||
_cleanup_ns $ns
|
||||
exit 1
|
||||
fi
|
||||
_cleanup_ns $ns
|
||||
}
|
||||
|
||||
if [ $# -eq 0 ]; then
|
||||
@@ -161,8 +158,7 @@ case "$1" in
|
||||
if [ $ret -ne 0 ]; then
|
||||
exit $ret
|
||||
fi
|
||||
pushd ChatQnA/kubernetes/intel/cpu/xeon/manifests
|
||||
set +e
|
||||
pushd ChatQnA/kubernetes/intel/cpu/xeon/manifest
|
||||
install_and_validate_chatqna_guardrail
|
||||
popd
|
||||
;;
|
||||
|
||||