Compare commits

...

63 Commits

Author SHA1 Message Date
letonghan
91940b8058 Merge branch 'main' of https://github.com/opea-project/GenAIExamples into reorg_helm_chart 2024-11-11 13:49:52 +08:00
WenjiaoYue
3744bb8c1b Fix docSum ui error in accessing parsed files (#1079)
Signed-off-by: Xinyao Wang <xinyao.wang@intel.com>
Signed-off-by: ZePan110 <ze.pan@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: chen, suyue <suyue.chen@intel.com>
Co-authored-by: Neo Zhang Jianyu <jianyu.zhang@intel.com>
Co-authored-by: ZhangJianyu <zhang.jianyu@outlook.com>
Co-authored-by: XinyaoWa <xinyao.wang@intel.com>
Co-authored-by: ZePan110 <ze.pan@intel.com>
2024-11-11 09:10:12 +08:00
chen, suyue
82801d0121 image build bug fix (#1105)
Signed-off-by: chensuyue <suyue.chen@intel.com>
2024-11-08 23:54:32 +08:00
Wang, Kai Lawrence
f7026773b8 [ChatQnA] Fix the no_proxy setting for gpu example (#1078)
Signed-off-by: Wang, Kai Lawrence <kai.lawrence.wang@intel.com>
2024-11-08 22:27:51 +08:00
Hoong Tee, Yeoh
edc09ece5c ProductivitySuite: Fix typo in README (#1083)
Signed-off-by: Yeoh, Hoong Tee <hoong.tee.yeoh@intel.com>
2024-11-08 22:26:32 +08:00
dependabot[bot]
dfed2aead2 Bump gradio from 5.0.0 to 5.5.0 in /MultimodalQnA/ui/gradio (#1080)
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2024-11-08 22:24:36 +08:00
ZePan110
049517f977 Improve the robustness of links check workflow (#1096)
Signed-off-by: ZePan110 <ze.pan@intel.com>
2024-11-08 22:19:52 +08:00
Neo Zhang Jianyu
ee83a6d5b4 opt CI to skip none MD and RST files (#1098)
Signed-off-by: ZhangJianyu <zhang.jianyu@outlook.com>
2024-11-08 22:07:17 +08:00
WenjiaoYue
e2bdd19fd4 update faqGen ui response (#1091)
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: lvliang-intel <liang1.lv@intel.com>
2024-11-08 21:29:52 +08:00
Zhu Yongbo
c9088eb824 Add EdgeCraftRag as a GenAIExample (#1072)
Signed-off-by: ZePan110 <ze.pan@intel.com>
Signed-off-by: chensuyue <suyue.chen@intel.com>
Signed-off-by: Zhu, Yongbo <yongbo.zhu@intel.com>
Signed-off-by: Wang, Xigui <xigui.wang@intel.com>
Co-authored-by: ZePan110 <ze.pan@intel.com>
Co-authored-by: chen, suyue <suyue.chen@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: xiguiw <111278656+xiguiw@users.noreply.github.com>
Co-authored-by: lvliang-intel <liang1.lv@intel.com>
2024-11-08 21:07:24 +08:00
XinyaoWa
9c3023a12e Fix faq ut bug (#1097)
Signed-off-by: Xinyao Wang <xinyao.wang@intel.com>
2024-11-08 16:27:00 +08:00
letonghan
7d779513f5 add docsum helm charts
Signed-off-by: letonghan <letong.han@intel.com>
2024-11-08 16:04:29 +08:00
Melanie Hart Buehler
bbc95bb708 MultimodalQnA Image and Audio Support Phase 1 (#1071)
Signed-off-by: Melanie Buehler <melanie.h.buehler@intel.com>
Signed-off-by: okhleif-IL <omar.khleif@intel.com>
Signed-off-by: dmsuehir <dina.s.jones@intel.com>
Co-authored-by: Omar Khleif <omar.khleif@intel.com>
Co-authored-by: dmsuehir <dina.s.jones@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Abolfazl Shahbazi <12436063+ashahba@users.noreply.github.com>
2024-11-08 15:54:49 +08:00
ZePan110
dd9623d3d5 Add new image repo clone. (#1093)
Signed-off-by: ZePan110 <ze.pan@intel.com>
2024-11-08 15:27:42 +08:00
XinyaoWa
4c27a3d30c Align faqgen to form input (#1089)
Signed-off-by: Xinyao Wang <xinyao.wang@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2024-11-08 13:32:26 +08:00
XinyaoWa
40386d9bd6 remove vllm-on-ray (#1084)
Signed-off-by: Xinyao Wang <xinyao.wang@intel.com>
2024-11-08 13:01:48 +08:00
Neo Zhang Jianyu
fe97e88c7a Add CI case to check online doc building, not update online doc (#1087)
Co-authored-by: ZhangJianyu <zhang.jianyu@outlook.com>
2024-11-08 11:57:01 +08:00
Hoong Tee, Yeoh
11d8b24c8a ProductivitySuite: Update TGI CPU image version to 2.4.0 (#1062)
Signed-off-by: Yeoh, Hoong Tee <hoong.tee.yeoh@intel.com>
2024-11-08 09:50:11 +08:00
lvliang-intel
4635a927fa Make embedding run on CPU for aligning with Gaudi performance benchmark (#1057)
Signed-off-by: lvliang-intel <liang1.lv@intel.com>
Co-authored-by: chen, suyue <suyue.chen@intel.com>
2024-11-07 17:39:34 +08:00
ZePan110
1da44d99a1 Remove debug outputs (#1085)
Signed-off-by: ZePan110 <ze.pan@intel.com>
2024-11-07 14:11:46 +08:00
XinyaoWa
e9b164505e align vllm hpu version to latest vllm-fork (#1061)
Signed-off-by: Xinyao Wang <xinyao.wang@intel.com>
2024-11-07 14:08:56 +08:00
Arthur Leung
6263b517b9 [Doc] Add steps to deploy opea services using minikube (#1058)
Signed-off-by: Arthur Leung <arcyleung@gmail.com>
Co-authored-by: Arthur Leung <arcyleung@gmail.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2024-11-07 13:57:34 +08:00
chen, suyue
2de7c0ba89 Enhance CI hardware list detect (#1077)
Signed-off-by: chensuyue <suyue.chen@intel.com>
2024-11-07 09:38:19 +08:00
Wang, Kai Lawrence
944ae47948 [ChatQnA] Fix the service connection issue on GPU and modify the emb backend (#1059)
Signed-off-by: Wang, Kai Lawrence <kai.lawrence.wang@intel.com>
2024-11-06 10:22:21 +08:00
Neo Zhang Jianyu
2d9aeb3715 fix wrong format which break online doc build (#1073)
Co-authored-by: ZhangJianyu <zhang.jianyu@outlook.com>
2024-11-05 17:01:40 +08:00
xiguiw
a0921f127f [Doc] Fix broken build instruction (#1063)
Signed-off-by: Wang, Xigui <xigui.wang@intel.com>
2024-11-05 13:35:12 +08:00
chen, suyue
cf86aceb18 Update nightly image build jobs (#1070)
Signed-off-by: chensuyue <suyue.chen@intel.com>
2024-11-05 09:14:44 +08:00
chen, suyue
c2b7bd25d9 Use docker stop instead of docker compose stop to avoid container clean up issue (#1068)
Signed-off-by: chensuyue <suyue.chen@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2024-11-04 22:54:19 +08:00
chen, suyue
78331ee678 Add nightly image build and publish action (#1067)
Signed-off-by: chensuyue <suyue.chen@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2024-11-04 17:22:56 +08:00
ZePan110
7f7ad0e256 Inject commit for the release docker image (#1060)
Signed-off-by: ZePan110 <ze.pan@intel.com>
2024-11-04 17:08:15 +08:00
lvliang-intel
0306c620b5 Update TGI CPU image to latest official release 2.4.0 (#1035)
Signed-off-by: lvliang-intel <liang1.lv@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2024-11-04 11:28:43 +08:00
lkk
3372b9d480 update accuracy embedding endpoint for no wrapper (#1056)
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2024-11-04 09:18:49 +08:00
minmin-intel
5eb3d2869f Update AgentQnA example for v1.1 release (#885)
Signed-off-by: minmin-intel <minmin.hou@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2024-11-04 09:17:19 +08:00
Yi Yao
ced68e1834 Add performance benchmark scripts for 4 use cases. (#1052)
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2024-11-03 12:41:02 +08:00
JoshuaL3000
bf5c391e47 Add Workflow Executor Example (#892)
Signed-off-by: JoshuaL3000 <joshua.jian.ern.liew@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2024-10-31 20:50:20 -05:00
XinyaoWa
c65d7d40fb fix vllm output in chatqna (#1038)
Signed-off-by: Xinyao Wang <xinyao.wang@intel.com>
2024-11-01 09:26:57 +08:00
chen, suyue
9d124161e0 update action for CI (#1050)
Signed-off-by: chensuyue <suyue.chen@intel.com>
2024-10-31 14:54:04 +08:00
chen, suyue
0f5a9c4a5e Fix ChatQnA manifest test issue on Xeon (#1044)
Signed-off-by: chensuyue <suyue.chen@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2024-10-31 14:23:17 +08:00
rbrugaro
a65640b4a5 Graph rag (#1007)
Signed-off-by: Rita Brugarolas <rita.brugarolas.brufau@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2024-10-30 08:52:25 -07:00
lvliang-intel
7197286a14 Fix ChatQnA manifest default port issue (#1033)
Signed-off-by: lvliang-intel <liang1.lv@intel.com>
2024-10-30 11:52:04 +08:00
Chun Tao
960805a57b Adding audio and image/video files needed for loading the Gradio UI, and update the UI Python function (#1034)
Signed-off-by: Chun Tao <chun.tao@intel.com>
Signed-off-by: rbrugaro <rita.brugarolas.brufau@intel.com>
Signed-off-by: ZePan110 <ze.pan@intel.com>
Signed-off-by: Louie Tsai <louie.tsai@intel.com>
Signed-off-by: chen, suyue <suyue.chen@intel.com>
Co-authored-by: rbrugaro <rita.brugarolas.brufau@intel.com>
Co-authored-by: ZePan110 <ze.pan@intel.com>
Co-authored-by: kevinintel <hanwen.chang@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Louie Tsai <louie.tsai@intel.com>
Co-authored-by: chen, suyue <suyue.chen@intel.com>
2024-10-30 10:05:02 +08:00
Louie Tsai
002f0e2b11 Update VisualQnA README.md for its workflow (#912)
Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
2024-10-30 09:27:22 +08:00
XinyaoWa
fde5996192 fix FaqGen accuracy scripts bug (#1039)
Signed-off-by: Xinyao Wang <xinyao.wang@intel.com>
2024-10-29 16:34:11 +08:00
Lianhao Lu
bc47930ce1 manifest CI: repopulate the failure from inner test script (#1032)
Signed-off-by: Lianhao Lu <lianhao.lu@intel.com>
2024-10-28 11:51:24 +08:00
Yao Qing
2332d22950 [Codegen] Replace codegen default Model to Qwen/Qwen2.5-Coder-7B-Instruct. (#1013)
Signed-off-by: Yao, Qing <qing.yao@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2024-10-28 09:18:01 +08:00
XinyaoWa
a2afce1675 update codetrans default model (#1015)
Signed-off-by: Xinyao Wang <xinyao.wang@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2024-10-28 09:11:54 +08:00
WenjiaoYue
89f4c5fb41 update upload response format and add streaming method in front_end (#1019)
Signed-off-by: Yue, Wenjiao <wenjiao.yue@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2024-10-25 15:46:56 +08:00
lvliang-intel
98f66405ac Update docsum test command line format (#1027)
Signed-off-by: chensuyue <suyue.chen@intel.com>
Signed-off-by: lvliang-intel <liang1.lv@intel.com>
Co-authored-by: chen, suyue <suyue.chen@intel.com>
2024-10-25 15:39:05 +08:00
Louie Tsai
90c2d49050 Update CodeTrans README.md for workflow (#908)
Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
2024-10-25 12:39:18 +08:00
xiguiw
95b58b51fa Fix AIPC docker container network issue (#1021)
Signed-off-by: Wang, Xigui <xigui.wang@intel.com>
2024-10-25 10:46:57 +08:00
chen, suyue
d3ce6f5357 add new secrets for CI test (#1023)
Signed-off-by: chensuyue <suyue.chen@intel.com>
2024-10-24 18:10:22 +08:00
Louie Tsai
a10b4a1f1d Address request from Issue#971 (#1018) 2024-10-23 23:57:52 -07:00
XinyuYe-Intel
085d859a70 Add example for text2image (#920)
Signed-off-by: Ye, Xinyu <xinyu.ye@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2024-10-24 11:43:44 +08:00
chen, suyue
15cc457cea fix action path in CI workflow (#1016)
Signed-off-by: chensuyue <suyue.chen@intel.com>
2024-10-23 17:40:08 +08:00
Chun Tao
cfffb4c005 Initiate "AvatarChatbot" (audio) example (#923)
Signed-off-by: Chun Tao <chun.tao@intel.com>
Signed-off-by: rbrugaro <rita.brugarolas.brufau@intel.com>
Signed-off-by: ZePan110 <ze.pan@intel.com>
Signed-off-by: Louie Tsai <louie.tsai@intel.com>
Signed-off-by: chen, suyue <suyue.chen@intel.com>
Co-authored-by: rbrugaro <rita.brugarolas.brufau@intel.com>
Co-authored-by: ZePan110 <ze.pan@intel.com>
Co-authored-by: kevinintel <hanwen.chang@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Louie Tsai <louie.tsai@intel.com>
Co-authored-by: chen, suyue <suyue.chen@intel.com>
2024-10-23 14:58:17 +08:00
Chun Tao
41955f65ad Add a sample UI image for CodeGen's TGI monitoring (#1009)
Signed-off-by: Chun Tao <chun.tao@intel.com>
2024-10-23 14:38:12 +08:00
RuijingGuo
def39cfcdc setup ollama service in aipc docker compose (#1008)
Signed-off-by: Guo Ruijing <ruijing.guo@intel.com>
2024-10-23 14:22:48 +08:00
Louie Tsai
35a4fef70d Update Translation README.md for workflow (#907)
Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
2024-10-23 11:35:15 +08:00
Louie Tsai
a3f9811f7e Update DocIndexRetriever README.md for workflow (#939)
Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
2024-10-22 14:44:36 +08:00
lvliang-intel
0eedbbfce0 Update aipc ollama docker compose and readme (#984)
Signed-off-by: lvliang-intel <liang1.lv@intel.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: chen, suyue <suyue.chen@intel.com>
2024-10-22 10:30:47 +08:00
lvliang-intel
9438d392b4 Update README for some minor issues (#1000)
Signed-off-by: lvliang-intel <liang1.lv@intel.com>
2024-10-22 10:30:18 +08:00
Louie Tsai
1929dfd3a0 Update VideoQnA README.md for workflow (#906)
Signed-off-by: Tsai, Louie <louie.tsai@intel.com>
2024-10-21 13:56:45 -07:00
ZePan110
c7e33647ad Fix script name errors. (#997)
Signed-off-by: ZePan110 <ze.pan@intel.com>
2024-10-21 11:44:50 +08:00
424 changed files with 15107 additions and 1390 deletions

View File

@@ -0,0 +1,2 @@
ModelIn
modelin

View File

@@ -40,6 +40,11 @@ on:
default: "main"
required: false
type: string
inject_commit:
default: false
required: false
type: string
jobs:
####################################################################################################
# Image Build
@@ -72,6 +77,10 @@ jobs:
git clone https://github.com/vllm-project/vllm.git
cd vllm && git rev-parse HEAD && cd ../
fi
if [[ $(grep -c "vllm-hpu:" ${docker_compose_path}) != 0 ]]; then
git clone https://github.com/HabanaAI/vllm-fork.git
cd vllm-fork && git rev-parse HEAD && cd ../
fi
git clone https://github.com/opea-project/GenAIComps.git
cd GenAIComps && git checkout ${{ inputs.opea_branch }} && git rev-parse HEAD && cd ../
@@ -83,6 +92,7 @@ jobs:
docker_compose_path: ${{ github.workspace }}/${{ inputs.example }}/docker_image_build/build.yaml
service_list: ${{ inputs.services }}
registry: ${OPEA_IMAGE_REPO}opea
inject_commit: ${{ inputs.inject_commit }}
tag: ${{ inputs.tag }}
####################################################################################################

View File

@@ -90,10 +90,16 @@ jobs:
echo "Validate ${{ inputs.example }} successful!"
else
echo "Validate ${{ inputs.example }} failure!!!"
.github/workflows/scripts/k8s-utils.sh dump_all_pod_logs $NAMESPACE
echo "Check the logs in 'Dump logs when e2e test failed' step!!!"
exit 1
fi
fi
- name: Dump logs when e2e test failed
if: failure()
run: |
.github/workflows/scripts/k8s-utils.sh dump_all_pod_logs $NAMESPACE
- name: Kubectl uninstall
if: always()
run: |

View File

@@ -119,6 +119,8 @@ jobs:
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
PINECONE_KEY: ${{ secrets.PINECONE_KEY }}
PINECONE_KEY_LANGCHAIN_TEST: ${{ secrets.PINECONE_KEY_LANGCHAIN_TEST }}
SDK_BASE_URL: ${{ secrets.SDK_BASE_URL }}
SERVING_TOKEN: ${{ secrets.SERVING_TOKEN }}
IMAGE_REPO: ${{ inputs.registry }}
IMAGE_TAG: ${{ inputs.tag }}
example: ${{ inputs.example }}
@@ -139,7 +141,11 @@ jobs:
flag=${flag#test_}
yaml_file=$(find . -type f -wholename "*${{ inputs.hardware }}/${flag}.yaml")
echo $yaml_file
docker compose -f $yaml_file stop && docker compose -f $yaml_file rm -f || true
container_list=$(cat $yaml_file | grep container_name | cut -d':' -f2)
for container_name in $container_list; do
cid=$(docker ps -aq --filter "name=$container_name")
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
done
docker system prune -f
docker rmi $(docker images --filter reference="*:5000/*/*" -q) || true

View File

@@ -0,0 +1,35 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
name: Check Online Document Building
permissions: {}
on:
pull_request:
branches: [main]
paths:
- "**.md"
- "**.rst"
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
with:
path: GenAIExamples
- name: Checkout docs
uses: actions/checkout@v4
with:
repository: opea-project/docs
path: docs
- name: Build Online Document
shell: bash
run: |
echo "build online doc"
cd docs
bash scripts/build.sh

View File

@@ -50,6 +50,11 @@ on:
description: 'OPEA branch for image build'
required: false
type: string
inject_commit:
default: true
description: "inject commit to docker images true or false"
required: false
type: string
permissions: read-all
jobs:
@@ -101,4 +106,5 @@ jobs:
test_k8s: ${{ fromJSON(inputs.test_k8s) }}
test_gmc: ${{ fromJSON(inputs.test_gmc) }}
opea_branch: ${{ inputs.opea_branch }}
inject_commit: ${{ inputs.inject_commit }}
secrets: inherit

View File

@@ -30,6 +30,12 @@ on:
description: 'OPEA branch for image build'
required: false
type: string
inject_commit:
default: true
description: "inject commit to docker images true or false"
required: false
type: string
jobs:
get-test-matrix:
runs-on: ubuntu-latest
@@ -56,4 +62,5 @@ jobs:
services: ${{ inputs.services }}
tag: ${{ inputs.tag }}
opea_branch: ${{ inputs.opea_branch }}
inject_commit: ${{ inputs.inject_commit }}
secrets: inherit

View File

@@ -0,0 +1,70 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
name: Nightly build/publish latest docker images
on:
schedule:
- cron: "30 13 * * *" # UTC time
workflow_dispatch:
env:
EXAMPLES: "AgentQnA,AudioQnA,ChatQnA,CodeGen,CodeTrans,DocIndexRetriever,DocSum,FaqGen,InstructionTuning,MultimodalQnA,ProductivitySuite,RerankFinetuning,SearchQnA,Translation,VideoQnA,VisualQnA"
TAG: "latest"
PUBLISH_TAGS: "latest"
jobs:
get-build-matrix:
runs-on: ubuntu-latest
outputs:
examples_json: ${{ steps.get-matrix.outputs.examples_json }}
EXAMPLES: ${{ steps.get-matrix.outputs.EXAMPLES }}
TAG: ${{ steps.get-matrix.outputs.TAG }}
PUBLISH_TAGS: ${{ steps.get-matrix.outputs.PUBLISH_TAGS }}
steps:
- name: Create Matrix
id: get-matrix
run: |
examples=($(echo ${EXAMPLES} | tr ',' ' '))
examples_json=$(printf '%s\n' "${examples[@]}" | sort -u | jq -R '.' | jq -sc '.')
echo "examples_json=$examples_json" >> $GITHUB_OUTPUT
echo "EXAMPLES=$EXAMPLES" >> $GITHUB_OUTPUT
echo "TAG=$TAG" >> $GITHUB_OUTPUT
echo "PUBLISH_TAGS=$PUBLISH_TAGS" >> $GITHUB_OUTPUT
build:
needs: get-build-matrix
strategy:
matrix:
example: ${{ fromJSON(needs.get-build-matrix.outputs.examples_json) }}
fail-fast: false
uses: ./.github/workflows/_example-workflow.yml
with:
node: gaudi
example: ${{ matrix.example }}
secrets: inherit
get-image-list:
needs: get-build-matrix
uses: ./.github/workflows/_get-image-list.yml
with:
examples: ${{ needs.get-build-matrix.outputs.EXAMPLES }}
publish:
needs: [get-build-matrix, get-image-list, build]
strategy:
matrix:
image: ${{ fromJSON(needs.get-image-list.outputs.matrix) }}
runs-on: "docker-build-gaudi"
steps:
- uses: docker/login-action@v3.2.0
with:
username: ${{ secrets.DOCKERHUB_USER }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Image Publish
uses: opea-project/validation/actions/image-publish@main
with:
local_image_ref: ${OPEA_IMAGE_REPO}opea/${{ matrix.image }}:${{ needs.get-build-matrix.outputs.TAG }}
image_name: opea/${{ matrix.image }}
publish_tags: ${{ needs.get-build-matrix.outputs.PUBLISH_TAGS }}

View File

@@ -12,7 +12,7 @@ on:
- "**/tests/test_gmc**"
- "!**.md"
- "!**.txt"
- "!**/kubernetes/**/manifests/**"
- "!**/kubernetes/**/manifest/**"
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}

View File

@@ -10,7 +10,7 @@ on:
paths:
- "**/Dockerfile**"
- "**.py"
- "**/kubernetes/**/manifests/**"
- "**/kubernetes/**/manifest/**"
- "**/tests/test_manifest**"
- "!**.md"
- "!**.txt"

View File

@@ -61,14 +61,14 @@ jobs:
changed_files="$(git diff --name-status --diff-filter=ARM ${{ github.event.pull_request.base.sha }} ${merged_commit} | awk '/\.md$/ {print $NF}')"
if [ -n "$changed_files" ]; then
for changed_file in $changed_files; do
echo $changed_file
# echo $changed_file
url_lines=$(grep -H -Eo '\]\(http[s]?://[^)]+\)' "$changed_file" | grep -Ev 'GenAIExamples/blob/main') || true
if [ -n "$url_lines" ]; then
for url_line in $url_lines; do
echo $url_line
# echo $url_line
url=$(echo "$url_line"|cut -d '(' -f2 | cut -d ')' -f1|sed 's/\.git$//')
path=$(echo "$url_line"|cut -d':' -f1 | cut -d'/' -f2-)
response=$(curl -L -s -o /dev/null -w "%{http_code}" "$url")
response=$(curl -L -s -o /dev/null -w "%{http_code}" "$url")|| true
if [ "$response" -ne 200 ]; then
echo "**********Validation failed, try again**********"
response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url")

View File

@@ -9,12 +9,15 @@ set -e
changed_files=$changed_files
test_mode=$test_mode
run_matrix="{\"include\":["
hardware_list="xeon gaudi" # current support hardware list
examples=$(printf '%s\n' "${changed_files[@]}" | grep '/' | cut -d'/' -f1 | sort -u)
for example in ${examples}; do
cd $WORKSPACE/$example
if [[ ! $(find . -type f | grep ${test_mode}) ]]; then continue; fi
cd tests
ls -l
hardware_list=$(find . -type f -name "test_compose*_on_*.sh" | cut -d/ -f2 | cut -d. -f1 | awk -F'_on_' '{print $2}'| sort -u)
echo "Test supported hardware list = ${hardware_list}"
run_hardware=""
if [[ $(printf '%s\n' "${changed_files[@]}" | grep ${example} | cut -d'/' -f2 | grep -E '*.py|Dockerfile*|ui|docker_image_build' ) ]]; then

View File

@@ -81,17 +81,13 @@ flowchart LR
3. Hierarchical agent can further improve performance.
Expert worker agents, such as retrieval agent, knowledge graph agent, SQL agent, etc., can provide high-quality output for different aspects of a complex query, and the supervisor agent can aggregate the information together to provide a comprehensive answer.
### Roadmap
## Deployment with docker
- v0.9: Worker agent uses open-source websearch tool (duckduckgo), agents use OpenAI GPT-4o-mini as llm backend.
- v1.0: Worker agent uses OPEA retrieval megaservice as tool.
- v1.0 or later: agents use open-source llm backend.
- v1.1 or later: add safeguards
1. Build agent docker image
## Getting started
Note: this is optional. The docker images will be automatically pulled when running the docker compose commands. This step is only needed if pulling images failed.
1. Build agent docker image </br>
First, clone the opea GenAIComps repo
First, clone the opea GenAIComps repo.
```
export WORKDIR=<your-work-directory>
@@ -106,35 +102,63 @@ flowchart LR
docker build -t opea/agent-langchain:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/agent/langchain/Dockerfile .
```
2. Launch tool services </br>
In this example, we will use some of the mock APIs provided in the Meta CRAG KDD Challenge to demonstrate the benefits of gaining additional context from mock knowledge graphs.
```
docker run -d -p=8080:8000 docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0
```
3. Set up environment for this example </br>
First, clone this repo
2. Set up environment for this example </br>
First, clone this repo.
```
cd $WORKDIR
git clone https://github.com/opea-project/GenAIExamples.git
```
Second, set up env vars
Second, set up env vars.
```
export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
# optional: OPANAI_API_KEY
# for using open-source llms
export HUGGINGFACEHUB_API_TOKEN=<your-HF-token>
export HF_CACHE_DIR=<directory-where-llms-are-downloaded> #so that no need to redownload every time
# optional: OPANAI_API_KEY if you want to use OpenAI models
export OPENAI_API_KEY=<your-openai-key>
```
4. Launch agent services</br>
The configurations of the supervisor agent and the worker agent are defined in the docker-compose yaml file. We currently use openAI GPT-4o-mini as LLM, and we plan to add support for llama3.1-70B-instruct (served by TGI-Gaudi) in a subsequent release.
To use openai llm, run command below.
3. Deploy the retrieval tool (i.e., DocIndexRetriever mega-service)
First, launch the mega-service.
```
cd docker_compose/intel/cpu/xeon
cd $WORKDIR/GenAIExamples/AgentQnA/retrieval_tool
bash launch_retrieval_tool.sh
```
Then, ingest data into the vector database. Here we provide an example. You can ingest your own data.
```
bash run_ingest_data.sh
```
4. Launch other tools. </br>
In this example, we will use some of the mock APIs provided in the Meta CRAG KDD Challenge to demonstrate the benefits of gaining additional context from mock knowledge graphs.
```
docker run -d -p=8080:8000 docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0
```
5. Launch agent services</br>
We provide two options for `llm_engine` of the agents: 1. open-source LLMs, 2. OpenAI models via API calls.
To use open-source LLMs on Gaudi2, run commands below.
```
cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/hpu/gaudi
bash launch_tgi_gaudi.sh
bash launch_agent_service_tgi_gaudi.sh
```
To use OpenAI models, run commands below.
```
cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/cpu/xeon
bash launch_agent_service_openai.sh
```
@@ -143,10 +167,12 @@ flowchart LR
First look at logs of the agent docker containers:
```
docker logs docgrader-agent-endpoint
# worker agent
docker logs rag-agent-endpoint
```
```
# supervisor agent
docker logs react-agent-endpoint
```
@@ -170,4 +196,4 @@ curl http://${ip_address}:9090/v1/chat/completions -X POST -H "Content-Type: app
## How to register your own tools with agent
You can take a look at the tools yaml and python files in this example. For more details, please refer to the "Provide your own tools" section in the instructions [here](https://github.com/opea-project/GenAIComps/tree/main/comps/agent/langchain/README.md#5-customize-agent-strategy).
You can take a look at the tools yaml and python files in this example. For more details, please refer to the "Provide your own tools" section in the instructions [here](https://github.com/opea-project/GenAIComps/tree/main/comps/agent/langchain/README.md).

View File

@@ -0,0 +1,3 @@
# Deployment on Xeon
We deploy the retrieval tool on Xeon. For LLMs, we support OpenAI models via API calls. For instructions on using open-source LLMs, please refer to the deployment guide [here](../../../../README.md).

View File

@@ -2,11 +2,10 @@
# SPDX-License-Identifier: Apache-2.0
services:
worker-docgrader-agent:
worker-rag-agent:
image: opea/agent-langchain:latest
container_name: docgrader-agent-endpoint
container_name: rag-agent-endpoint
volumes:
- ${WORKDIR}/GenAIComps/comps/agent/langchain/:/home/user/comps/agent/langchain/
- ${TOOLSET_PATH}:/home/user/tools/
ports:
- "9095:9095"
@@ -36,8 +35,9 @@ services:
supervisor-react-agent:
image: opea/agent-langchain:latest
container_name: react-agent-endpoint
depends_on:
- worker-rag-agent
volumes:
- ${WORKDIR}/GenAIComps/comps/agent/langchain/:/home/user/comps/agent/langchain/
- ${TOOLSET_PATH}:/home/user/tools/
ports:
- "9090:9090"

View File

@@ -7,7 +7,7 @@ export recursion_limit_worker=12
export recursion_limit_supervisor=10
export model="gpt-4o-mini-2024-07-18"
export temperature=0
export max_new_tokens=512
export max_new_tokens=4096
export OPENAI_API_KEY=${OPENAI_API_KEY}
export WORKER_AGENT_URL="http://${ip_address}:9095/v1/chat/completions"
export RETRIEVAL_TOOL_URL="http://${ip_address}:8889/v1/retrievaltool"

View File

@@ -2,37 +2,9 @@
# SPDX-License-Identifier: Apache-2.0
services:
tgi-server:
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
container_name: tgi-server
ports:
- "8085:80"
volumes:
- ${HF_CACHE_DIR}:/data
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
PT_HPU_ENABLE_LAZY_COLLECTIVES: true
ENABLE_HPU_GRAPH: true
LIMIT_HPU_GRAPH: true
USE_FLASH_ATTENTION: true
FLASH_ATTENTION_RECOMPUTE: true
runtime: habana
cap_add:
- SYS_NICE
ipc: host
command: --model-id ${LLM_MODEL_ID} --max-input-length 4096 --max-total-tokens 8192 --sharded true --num-shard ${NUM_SHARDS}
worker-docgrader-agent:
worker-rag-agent:
image: opea/agent-langchain:latest
container_name: docgrader-agent-endpoint
depends_on:
- tgi-server
container_name: rag-agent-endpoint
volumes:
# - ${WORKDIR}/GenAIExamples/AgentQnA/docker_image_build/GenAIComps/comps/agent/langchain/:/home/user/comps/agent/langchain/
- ${TOOLSET_PATH}:/home/user/tools/
@@ -41,7 +13,7 @@ services:
ipc: host
environment:
ip_address: ${ip_address}
strategy: rag_agent
strategy: rag_agent_llama
recursion_limit: ${recursion_limit_worker}
llm_engine: tgi
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
@@ -66,8 +38,7 @@ services:
image: opea/agent-langchain:latest
container_name: react-agent-endpoint
depends_on:
- tgi-server
- worker-docgrader-agent
- worker-rag-agent
volumes:
# - ${WORKDIR}/GenAIExamples/AgentQnA/docker_image_build/GenAIComps/comps/agent/langchain/:/home/user/comps/agent/langchain/
- ${TOOLSET_PATH}:/home/user/tools/
@@ -76,7 +47,7 @@ services:
ipc: host
environment:
ip_address: ${ip_address}
strategy: react_langgraph
strategy: react_llama
recursion_limit: ${recursion_limit_supervisor}
llm_engine: tgi
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}

View File

@@ -15,7 +15,7 @@ export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-70B-Instruct"
export NUM_SHARDS=4
export LLM_ENDPOINT_URL="http://${ip_address}:8085"
export temperature=0.01
export max_new_tokens=512
export max_new_tokens=4096
# agent related environment variables
export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
@@ -27,17 +27,3 @@ export RETRIEVAL_TOOL_URL="http://${ip_address}:8889/v1/retrievaltool"
export CRAG_SERVER=http://${ip_address}:8080
docker compose -f compose.yaml up -d
sleep 5s
echo "Waiting tgi gaudi ready"
n=0
until [[ "$n" -ge 100 ]] || [[ $ready == true ]]; do
docker logs tgi-server &> tgi-gaudi-service.log
n=$((n+1))
if grep -q Connected tgi-gaudi-service.log; then
break
fi
sleep 5s
done
sleep 5s
echo "Service started successfully"

View File

@@ -0,0 +1,25 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
# LLM related environment variables
export HF_CACHE_DIR=${HF_CACHE_DIR}
ls $HF_CACHE_DIR
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-70B-Instruct"
export NUM_SHARDS=4
docker compose -f tgi_gaudi.yaml up -d
sleep 5s
echo "Waiting tgi gaudi ready"
n=0
until [[ "$n" -ge 100 ]] || [[ $ready == true ]]; do
docker logs tgi-server &> tgi-gaudi-service.log
n=$((n+1))
if grep -q Connected tgi-gaudi-service.log; then
break
fi
sleep 5s
done
sleep 5s
echo "Service started successfully"

View File

@@ -0,0 +1,30 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
services:
tgi-server:
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
container_name: tgi-server
ports:
- "8085:80"
volumes:
- ${HF_CACHE_DIR}:/data
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
PT_HPU_ENABLE_LAZY_COLLECTIVES: true
ENABLE_HPU_GRAPH: true
LIMIT_HPU_GRAPH: true
USE_FLASH_ATTENTION: true
FLASH_ATTENTION_RECOMPUTE: true
runtime: habana
cap_add:
- SYS_NICE
ipc: host
command: --model-id ${LLM_MODEL_ID} --max-input-length 4096 --max-total-tokens 8192 --sharded true --num-shard ${NUM_SHARDS}

View File

@@ -17,6 +17,12 @@ if [ ! -d "$HF_CACHE_DIR" ]; then
fi
ls $HF_CACHE_DIR
function start_tgi(){
echo "Starting tgi-gaudi server"
cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/hpu/gaudi
bash launch_tgi_gaudi.sh
}
function start_agent_and_api_server() {
echo "Starting CRAG server"
@@ -25,6 +31,7 @@ function start_agent_and_api_server() {
echo "Starting Agent services"
cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/hpu/gaudi
bash launch_agent_service_tgi_gaudi.sh
sleep 10
}
function validate() {
@@ -43,18 +50,22 @@ function validate() {
function validate_agent_service() {
echo "----------------Test agent ----------------"
local CONTENT=$(http_proxy="" curl http://${ip_address}:9095/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
"query": "Tell me about Michael Jackson song thriller"
}')
local EXIT_CODE=$(validate "$CONTENT" "Thriller" "react-agent-endpoint")
docker logs docgrader-agent-endpoint
# local CONTENT=$(http_proxy="" curl http://${ip_address}:9095/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
# "query": "Tell me about Michael Jackson song thriller"
# }')
export agent_port="9095"
local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py)
local EXIT_CODE=$(validate "$CONTENT" "Thriller" "rag-agent-endpoint")
docker logs rag-agent-endpoint
if [ "$EXIT_CODE" == "1" ]; then
exit 1
fi
local CONTENT=$(http_proxy="" curl http://${ip_address}:9090/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
"query": "Tell me about Michael Jackson song thriller"
}')
# local CONTENT=$(http_proxy="" curl http://${ip_address}:9090/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
# "query": "Tell me about Michael Jackson song thriller"
# }')
export agent_port="9090"
local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py)
local EXIT_CODE=$(validate "$CONTENT" "Thriller" "react-agent-endpoint")
docker logs react-agent-endpoint
if [ "$EXIT_CODE" == "1" ]; then
@@ -64,6 +75,10 @@ function validate_agent_service() {
}
function main() {
echo "==================== Start TGI ===================="
start_tgi
echo "==================== TGI started ===================="
echo "==================== Start agent ===================="
start_agent_and_api_server
echo "==================== Agent started ===================="

25
AgentQnA/tests/test.py Normal file
View File

@@ -0,0 +1,25 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import os
import requests
def generate_answer_agent_api(url, prompt):
proxies = {"http": ""}
payload = {
"query": prompt,
}
response = requests.post(url, json=payload, proxies=proxies)
answer = response.json()["text"]
return answer
if __name__ == "__main__":
ip_address = os.getenv("ip_address", "localhost")
agent_port = os.getenv("agent_port", "9095")
url = f"http://{ip_address}:{agent_port}/v1/chat/completions"
prompt = "Tell me about Michael Jackson song thriller"
answer = generate_answer_agent_api(url, prompt)
print(answer)

View File

@@ -19,7 +19,6 @@ function stop_crag() {
function stop_agent_docker() {
cd $WORKPATH/docker_compose/intel/hpu/gaudi/
# docker compose -f compose.yaml down
container_list=$(cat compose.yaml | grep container_name | cut -d':' -f2)
for container_name in $container_list; do
cid=$(docker ps -aq --filter "name=$container_name")
@@ -28,11 +27,21 @@ function stop_agent_docker() {
done
}
function stop_tgi(){
cd $WORKPATH/docker_compose/intel/hpu/gaudi/
container_list=$(cat tgi_gaudi.yaml | grep container_name | cut -d':' -f2)
for container_name in $container_list; do
cid=$(docker ps -aq --filter "name=$container_name")
echo "Stopping container $container_name"
if [[ ! -z "$cid" ]]; then docker rm $cid -f && sleep 1s; fi
done
}
function stop_retrieval_tool() {
echo "Stopping Retrieval tool"
local RETRIEVAL_TOOL_PATH=$WORKPATH/../DocIndexRetriever
cd $RETRIEVAL_TOOL_PATH/docker_compose/intel/cpu/xeon/
# docker compose -f compose.yaml down
container_list=$(cat compose.yaml | grep container_name | cut -d':' -f2)
for container_name in $container_list; do
cid=$(docker ps -aq --filter "name=$container_name")
@@ -43,25 +52,26 @@ function stop_retrieval_tool() {
echo "workpath: $WORKPATH"
echo "=================== Stop containers ===================="
stop_crag
stop_tgi
stop_agent_docker
stop_retrieval_tool
cd $WORKPATH/tests
echo "=================== #1 Building docker images===================="
bash 1_build_images.sh
bash step1_build_images.sh
echo "=================== #1 Building docker images completed===================="
echo "=================== #2 Start retrieval tool===================="
bash 2_start_retrieval_tool.sh
bash step2_start_retrieval_tool.sh
echo "=================== #2 Retrieval tool started===================="
echo "=================== #3 Ingest data and validate retrieval===================="
bash 3_ingest_data_and_validate_retrieval.sh
bash step3_ingest_data_and_validate_retrieval.sh
echo "=================== #3 Data ingestion and validation completed===================="
echo "=================== #4 Start agent and API server===================="
bash 4_launch_and_validate_agent_tgi.sh
bash step4_launch_and_validate_agent_tgi.sh
echo "=================== #4 Agent test passed ===================="
echo "=================== #5 Stop agent and API server===================="
@@ -70,4 +80,6 @@ stop_agent_docker
stop_retrieval_tool
echo "=================== #5 Agent and API server stopped===================="
echo y | docker system prune
echo "ALL DONE!"

View File

@@ -25,7 +25,7 @@ get_billboard_rank_date:
args_schema:
rank:
type: int
description: song name
description: the rank of interest, for example 1 for top 1
date:
type: str
description: date

View File

@@ -12,16 +12,31 @@ def search_knowledge_base(query: str) -> str:
print(url)
proxies = {"http": ""}
payload = {
"text": query,
"messages": query,
}
response = requests.post(url, json=payload, proxies=proxies)
print(response)
docs = response.json()["documents"]
context = ""
for i, doc in enumerate(docs):
if i == 0:
context = doc
else:
context += "\n" + doc
print(context)
return context
if "documents" in response.json():
docs = response.json()["documents"]
context = ""
for i, doc in enumerate(docs):
if i == 0:
context = doc
else:
context += "\n" + doc
# print(context)
return context
elif "text" in response.json():
return response.json()["text"]
elif "reranked_docs" in response.json():
docs = response.json()["reranked_docs"]
context = ""
for i, doc in enumerate(docs):
if i == 0:
context = doc["text"]
else:
context += "\n" + doc["text"]
# print(context)
return context
else:
return "Error parsing response from the knowledge base."

View File

@@ -36,9 +36,9 @@ Evaluate the performance with the LLM:
```py
# validate the offline model
# python offline_evaluate.py
# python offline_eval.py
# validate the online asr microservice accuracy
python online_evaluate.py
python online_eval.py
```
### Performance Result

View File

@@ -2,4 +2,4 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
python online_evaluate.py
python online_eval.py

View File

@@ -41,7 +41,7 @@ services:
environment:
TTS_ENDPOINT: ${TTS_ENDPOINT}
tgi-service:
image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
container_name: tgi-service
ports:
- "3006:80"

View File

@@ -26,7 +26,7 @@ services:
https_proxy: ${https_proxy}
restart: unless-stopped
tgi-service:
image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
container_name: tgi-service
ports:
- "3006:80"

View File

@@ -7,14 +7,14 @@
## Deploy On Xeon
```
cd GenAIExamples/AudioQnA/kubernetes/intel/cpu/xeon/manifests
cd GenAIExamples/AudioQnA/kubernetes/intel/cpu/xeon/manifest
export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" audioqna.yaml
kubectl apply -f audioqna.yaml
```
## Deploy On Gaudi
```
cd GenAIExamples/AudioQnA/kubernetes/intel/hpu/gaudi/manifests
cd GenAIExamples/AudioQnA/kubernetes/intel/hpu/gaudi/manifest
export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" audioqna.yaml
kubectl apply -f audioqna.yaml

View File

@@ -247,7 +247,7 @@ spec:
- envFrom:
- configMapRef:
name: audio-qna-config
image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
name: llm-dependency-deploy-demo
securityContext:
capabilities:

8
AvatarChatbot/.gitignore vendored Normal file
View File

@@ -0,0 +1,8 @@
*.safetensors
*.bin
*.model
*.log
docker_compose/intel/cpu/xeon/data
docker_compose/intel/hpu/gaudi/data
inputs/
outputs/

33
AvatarChatbot/Dockerfile Normal file
View File

@@ -0,0 +1,33 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
FROM python:3.11-slim
RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
libgl1-mesa-glx \
libjemalloc-dev \
vim \
git
RUN useradd -m -s /bin/bash user && \
mkdir -p /home/user && \
chown -R user /home/user/
WORKDIR /home/user/
RUN git clone https://github.com/opea-project/GenAIComps.git
WORKDIR /home/user/GenAIComps
RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt
COPY ./avatarchatbot.py /home/user/avatarchatbot.py
ENV PYTHONPATH=$PYTHONPATH:/home/user/GenAIComps
USER user
WORKDIR /home/user
ENTRYPOINT ["python", "avatarchatbot.py"]

105
AvatarChatbot/README.md Normal file
View File

@@ -0,0 +1,105 @@
# AvatarChatbot Application
The AvatarChatbot service can be effortlessly deployed on either Intel Gaudi2 or Intel XEON Scalable Processors.
## AI Avatar Workflow
The AI Avatar example is implemented using both megaservices and the component-level microservices defined in [GenAIComps](https://github.com/opea-project/GenAIComps). The flow chart below shows the information flow between different megaservices and microservices for this example.
```mermaid
---
config:
flowchart:
nodeSpacing: 100
rankSpacing: 100
curve: linear
themeVariables:
fontSize: 42px
---
flowchart LR
classDef blue fill:#ADD8E6,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
classDef thistle fill:#D8BFD8,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
classDef orange fill:#FBAA60,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
classDef orchid fill:#C26DBC,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
classDef invisible fill:transparent,stroke:transparent;
style AvatarChatbot-Megaservice stroke:#000000
subgraph AvatarChatbot-Megaservice["AvatarChatbot Megaservice"]
direction LR
ASR([ASR Microservice]):::blue
LLM([LLM Microservice]):::blue
TTS([TTS Microservice]):::blue
animation([Animation Microservice]):::blue
end
subgraph UserInterface["User Interface"]
direction LR
invis1[ ]:::invisible
USER1([User Audio Query]):::orchid
USER2([User Image/Video Query]):::orchid
UI([UI server<br>]):::orchid
end
GW([AvatarChatbot GateWay<br>]):::orange
subgraph .
direction LR
X([OPEA Microservice]):::blue
Y{{Open Source Service}}:::thistle
Z([OPEA Gateway]):::orange
Z1([UI]):::orchid
end
WHISPER{{Whisper service}}:::thistle
TGI{{LLM service}}:::thistle
T5{{Speecht5 service}}:::thistle
WAV2LIP{{Wav2Lip service}}:::thistle
%% Connections %%
direction LR
USER1 -->|1| UI
UI -->|2| GW
GW <==>|3| AvatarChatbot-Megaservice
ASR ==>|4| LLM ==>|5| TTS ==>|6| animation
direction TB
ASR <-.->|3'| WHISPER
LLM <-.->|4'| TGI
TTS <-.->|5'| T5
animation <-.->|6'| WAV2LIP
USER2 -->|1| UI
UI <-.->|6'| WAV2LIP
```
## Deploy AvatarChatbot Service
The AvatarChatbot service can be deployed on either Intel Gaudi2 AI Accelerator or Intel Xeon Scalable Processor.
### Deploy AvatarChatbot on Gaudi
Refer to the [Gaudi Guide](./docker_compose/intel/hpu/gaudi/README.md) for instructions on deploying AvatarChatbot on Gaudi, and on setting up an UI for the application.
### Deploy AvatarChatbot on Xeon
Refer to the [Xeon Guide](./docker_compose/intel/cpu/xeon/README.md) for instructions on deploying AvatarChatbot on Xeon.
## Supported Models
### ASR
The default model is [openai/whisper-small](https://huggingface.co/openai/whisper-small). It also supports all models in the Whisper family, such as `openai/whisper-large-v3`, `openai/whisper-medium`, `openai/whisper-base`, `openai/whisper-tiny`, etc.
To replace the model, please edit the `compose.yaml` and add the `command` line to pass the name of the model you want to use:
```yaml
services:
whisper-service:
...
command: --model_name_or_path openai/whisper-tiny
```
### TTS
The default model is [microsoft/SpeechT5](https://huggingface.co/microsoft/speecht5_tts). We currently do not support replacing the model. More models under the commercial license will be added in the future.
### Animation
The default model is [Rudrabha/Wav2Lip](https://github.com/Rudrabha/Wav2Lip) and [TencentARC/GFPGAN](https://github.com/TencentARC/GFPGAN). We currently do not support replacing the model. More models under the commercial license such as [OpenTalker/SadTalker](https://github.com/OpenTalker/SadTalker) will be added in the future.

Binary file not shown.

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

Binary file not shown.

After

Width:  |  Height:  |  Size: 595 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 148 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 158 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.5 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 992 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.7 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 169 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 121 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 47 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 25 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 22 KiB

Binary file not shown.

View File

@@ -0,0 +1,93 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import asyncio
import os
import sys
from comps import AvatarChatbotGateway, MicroService, ServiceOrchestrator, ServiceType
MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "0.0.0.0")
MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))
ASR_SERVICE_HOST_IP = os.getenv("ASR_SERVICE_HOST_IP", "0.0.0.0")
ASR_SERVICE_PORT = int(os.getenv("ASR_SERVICE_PORT", 9099))
LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0")
LLM_SERVICE_PORT = int(os.getenv("LLM_SERVICE_PORT", 9000))
TTS_SERVICE_HOST_IP = os.getenv("TTS_SERVICE_HOST_IP", "0.0.0.0")
TTS_SERVICE_PORT = int(os.getenv("TTS_SERVICE_PORT", 9088))
ANIMATION_SERVICE_HOST_IP = os.getenv("ANIMATION_SERVICE_HOST_IP", "0.0.0.0")
ANIMATION_SERVICE_PORT = int(os.getenv("ANIMATION_SERVICE_PORT", 9066))
def check_env_vars(env_var_list):
for var in env_var_list:
if os.getenv(var) is None:
print(f"Error: The environment variable '{var}' is not set.")
sys.exit(1) # Exit the program with a non-zero status code
print("All environment variables are set.")
class AvatarChatbotService:
def __init__(self, host="0.0.0.0", port=8000):
self.host = host
self.port = port
self.megaservice = ServiceOrchestrator()
def add_remote_service(self):
asr = MicroService(
name="asr",
host=ASR_SERVICE_HOST_IP,
port=ASR_SERVICE_PORT,
endpoint="/v1/audio/transcriptions",
use_remote_service=True,
service_type=ServiceType.ASR,
)
llm = MicroService(
name="llm",
host=LLM_SERVICE_HOST_IP,
port=LLM_SERVICE_PORT,
endpoint="/v1/chat/completions",
use_remote_service=True,
service_type=ServiceType.LLM,
)
tts = MicroService(
name="tts",
host=TTS_SERVICE_HOST_IP,
port=TTS_SERVICE_PORT,
endpoint="/v1/audio/speech",
use_remote_service=True,
service_type=ServiceType.TTS,
)
animation = MicroService(
name="animation",
host=ANIMATION_SERVICE_HOST_IP,
port=ANIMATION_SERVICE_PORT,
endpoint="/v1/animation",
use_remote_service=True,
service_type=ServiceType.ANIMATION,
)
self.megaservice.add(asr).add(llm).add(tts).add(animation)
self.megaservice.flow_to(asr, llm)
self.megaservice.flow_to(llm, tts)
self.megaservice.flow_to(tts, animation)
self.gateway = AvatarChatbotGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
if __name__ == "__main__":
check_env_vars(
[
"MEGA_SERVICE_HOST_IP",
"MEGA_SERVICE_PORT",
"ASR_SERVICE_HOST_IP",
"ASR_SERVICE_PORT",
"LLM_SERVICE_HOST_IP",
"LLM_SERVICE_PORT",
"TTS_SERVICE_HOST_IP",
"TTS_SERVICE_PORT",
"ANIMATION_SERVICE_HOST_IP",
"ANIMATION_SERVICE_PORT",
]
)
avatarchatbot = AvatarChatbotService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT)
avatarchatbot.add_remote_service()

View File

@@ -0,0 +1,210 @@
# Build Mega Service of AvatarChatbot on Xeon
This document outlines the deployment process for a AvatarChatbot application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel Xeon server.
## 🚀 Build Docker images
### 1. Source Code install GenAIComps
```bash
git clone https://github.com/opea-project/GenAIComps.git
cd GenAIComps
```
### 2. Build ASR Image
```bash
docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/dependency/Dockerfile .
docker build -t opea/asr:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/Dockerfile .
```
### 3. Build LLM Image
```bash
docker build --no-cache -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/tgi/Dockerfile .
```
### 4. Build TTS Image
```bash
docker build -t opea/speecht5:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/speecht5/dependency/Dockerfile .
docker build -t opea/tts:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/speecht5/Dockerfile .
```
### 5. Build Animation Image
```bash
docker build -t opea/wav2lip:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/animation/wav2lip/dependency/Dockerfile .
docker build -t opea/animation:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/animation/wav2lip/Dockerfile .
```
### 6. Build MegaService Docker Image
To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `audioqna.py` Python script. Build the MegaService Docker image using the command below:
```bash
git clone https://github.com/opea-project/GenAIExamples.git
cd GenAIExamples/AvatarChatbot/
docker build --no-cache -t opea/avatarchatbot:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
```
Then run the command `docker images`, you will have following images ready:
1. `opea/whisper:latest`
2. `opea/asr:latest`
3. `opea/llm-tgi:latest`
4. `opea/speecht5:latest`
5. `opea/tts:latest`
6. `opea/wav2lip:latest`
7. `opea/animation:latest`
8. `opea/avatarchatbot:latest`
## 🚀 Set the environment variables
Before starting the services with `docker compose`, you have to recheck the following environment variables.
```bash
export HUGGINGFACEHUB_API_TOKEN=<your_hf_token>
export host_ip=$(hostname -I | awk '{print $1}')
export TGI_LLM_ENDPOINT=http://$host_ip:3006
export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
export ASR_ENDPOINT=http://$host_ip:7066
export TTS_ENDPOINT=http://$host_ip:7055
export WAV2LIP_ENDPOINT=http://$host_ip:7860
export MEGA_SERVICE_HOST_IP=${host_ip}
export ASR_SERVICE_HOST_IP=${host_ip}
export TTS_SERVICE_HOST_IP=${host_ip}
export LLM_SERVICE_HOST_IP=${host_ip}
export ANIMATION_SERVICE_HOST_IP=${host_ip}
export MEGA_SERVICE_PORT=8888
export ASR_SERVICE_PORT=3001
export TTS_SERVICE_PORT=3002
export LLM_SERVICE_PORT=3007
export ANIMATION_SERVICE_PORT=3008
```
- Xeon CPU
```bash
export DEVICE="cpu"
export WAV2LIP_PORT=7860
export INFERENCE_MODE='wav2lip_only'
export CHECKPOINT_PATH='/usr/local/lib/python3.11/site-packages/Wav2Lip/checkpoints/wav2lip_gan.pth'
export FACE="assets/img/avatar1.jpg"
# export AUDIO='assets/audio/eg3_ref.wav' # audio file path is optional, will use base64str in the post request as input if is 'None'
export AUDIO='None'
export FACESIZE=96
export OUTFILE="/outputs/result.mp4"
export GFPGAN_MODEL_VERSION=1.4 # latest version, can roll back to v1.3 if needed
export UPSCALE_FACTOR=1
export FPS=10
```
## 🚀 Start the MegaService
```bash
cd GenAIExamples/AvatarChatbot/docker_compose/intel/cpu/xeon/
docker compose -f compose.yaml up -d
```
## 🚀 Test MicroServices
```bash
# whisper service
curl http://${host_ip}:7066/v1/asr \
-X POST \
-d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
-H 'Content-Type: application/json'
# asr microservice
curl http://${host_ip}:3001/v1/audio/transcriptions \
-X POST \
-d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
-H 'Content-Type: application/json'
# tgi service
curl http://${host_ip}:3006/generate \
-X POST \
-d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
-H 'Content-Type: application/json'
# llm microservice
curl http://${host_ip}:3007/v1/chat/completions\
-X POST \
-d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":false}' \
-H 'Content-Type: application/json'
# speecht5 service
curl http://${host_ip}:7055/v1/tts \
-X POST \
-d '{"text": "Who are you?"}' \
-H 'Content-Type: application/json'
# tts microservice
curl http://${host_ip}:3002/v1/audio/speech \
-X POST \
-d '{"text": "Who are you?"}' \
-H 'Content-Type: application/json'
# wav2lip service
cd ../../../..
curl http://${host_ip}:7860/v1/wav2lip \
-X POST \
-d @assets/audio/sample_minecraft.json \
-H 'Content-Type: application/json'
# animation microservice
curl http://${host_ip}:3008/v1/animation \
-X POST \
-d @assets/audio/sample_question.json \
-H "Content-Type: application/json"
```
## 🚀 Test MegaService
```bash
curl http://${host_ip}:3009/v1/avatarchatbot \
-X POST \
-d @assets/audio/sample_whoareyou.json \
-H 'Content-Type: application/json'
```
If the megaservice is running properly, you should see the following output:
```bash
"/outputs/result.mp4"
```
The output file will be saved in the current working directory, as `${PWD}` is mapped to `/outputs` inside the wav2lip-service Docker container.
## Gradio UI
```bash
cd $WORKPATH/GenAIExamples/AvatarChatbot
python3 ui/gradio/app_gradio_demo_avatarchatbot.py
```
The UI can be viewed at http://${host_ip}:7861
<img src="../../../../assets/img/UI.png" alt="UI Example" width="60%">
In the current version v1.0, you need to set the avatar figure image/video and the DL model choice in the environment variables before starting AvatarChatbot backend service and running the UI. Please just customize the audio question in the UI.
\*\* We will enable change of avatar figure between runs in v2.0
## Troubleshooting
```bash
cd GenAIExamples/AvatarChatbot/tests
export IMAGE_REPO="opea"
export IMAGE_TAG="latest"
export HUGGINGFACEHUB_API_TOKEN=<your_hf_token>
test_avatarchatbot_on_xeon.sh
```

View File

@@ -0,0 +1,138 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
services:
whisper-service:
image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
container_name: whisper-service
ports:
- "7066:7066"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
restart: unless-stopped
asr:
image: ${REGISTRY:-opea}/asr:${TAG:-latest}
container_name: asr-service
ports:
- "3001:9099"
ipc: host
environment:
ASR_ENDPOINT: ${ASR_ENDPOINT}
speecht5-service:
image: ${REGISTRY:-opea}/speecht5:${TAG:-latest}
container_name: speecht5-service
ports:
- "7055:7055"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
restart: unless-stopped
tts:
image: ${REGISTRY:-opea}/tts:${TAG:-latest}
container_name: tts-service
ports:
- "3002:9088"
ipc: host
environment:
TTS_ENDPOINT: ${TTS_ENDPOINT}
tgi-service:
image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
container_name: tgi-service
ports:
- "3006:80"
volumes:
- "./data:/data"
shm_size: 1g
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
llm:
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
container_name: llm-tgi-server
depends_on:
- tgi-service
ports:
- "3007:9000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
restart: unless-stopped
wav2lip-service:
image: ${REGISTRY:-opea}/wav2lip:${TAG:-latest}
container_name: wav2lip-service
ports:
- "7860:7860"
ipc: host
volumes:
- ${PWD}:/outputs
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
DEVICE: ${DEVICE}
INFERENCE_MODE: ${INFERENCE_MODE}
CHECKPOINT_PATH: ${CHECKPOINT_PATH}
FACE: ${FACE}
AUDIO: ${AUDIO}
FACESIZE: ${FACESIZE}
OUTFILE: ${OUTFILE}
GFPGAN_MODEL_VERSION: ${GFPGAN_MODEL_VERSION}
UPSCALE_FACTOR: ${UPSCALE_FACTOR}
FPS: ${FPS}
WAV2LIP_PORT: ${WAV2LIP_PORT}
restart: unless-stopped
animation:
image: ${REGISTRY:-opea}/animation:${TAG:-latest}
container_name: animation-server
ports:
- "3008:9066"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
WAV2LIP_ENDPOINT: ${WAV2LIP_ENDPOINT}
restart: unless-stopped
avatarchatbot-xeon-backend-server:
image: ${REGISTRY:-opea}/avatarchatbot:${TAG:-latest}
container_name: avatarchatbot-xeon-backend-server
depends_on:
- asr
- llm
- tts
- animation
ports:
- "3009:8888"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
- MEGA_SERVICE_PORT=${MEGA_SERVICE_PORT}
- ASR_SERVICE_HOST_IP=${ASR_SERVICE_HOST_IP}
- ASR_SERVICE_PORT=${ASR_SERVICE_PORT}
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
- LLM_SERVICE_PORT=${LLM_SERVICE_PORT}
- TTS_SERVICE_HOST_IP=${TTS_SERVICE_HOST_IP}
- TTS_SERVICE_PORT=${TTS_SERVICE_PORT}
- ANIMATION_SERVICE_HOST_IP=${ANIMATION_SERVICE_HOST_IP}
- ANIMATION_SERVICE_PORT=${ANIMATION_SERVICE_PORT}
ipc: host
restart: always
networks:
default:
driver: bridge

View File

@@ -0,0 +1,220 @@
# Build Mega Service of AvatarChatbot on Gaudi
This document outlines the deployment process for a AvatarChatbot application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel Gaudi server.
## 🚀 Build Docker images
### 1. Source Code install GenAIComps
```bash
git clone https://github.com/opea-project/GenAIComps.git
cd GenAIComps
```
### 2. Build ASR Image
```bash
docker build -t opea/whisper-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/dependency/Dockerfile.intel_hpu .
docker build -t opea/asr:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/Dockerfile .
```
### 3. Build LLM Image
```bash
docker build --no-cache -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/tgi/Dockerfile .
```
### 4. Build TTS Image
```bash
docker build -t opea/speecht5-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/speecht5/dependency/Dockerfile.intel_hpu .
docker build -t opea/tts:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/speecht5/Dockerfile .
```
### 5. Build Animation Image
```bash
docker build -t opea/wav2lip-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/animation/wav2lip/dependency/Dockerfile.intel_hpu .
docker build -t opea/animation:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/animation/wav2lip/Dockerfile .
```
### 6. Build MegaService Docker Image
To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `audioqna.py` Python script. Build the MegaService Docker image using the command below:
```bash
git clone https://github.com/opea-project/GenAIExamples.git
cd GenAIExamples/AvatarChatbot/
docker build --no-cache -t opea/avatarchatbot:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
```
Then run the command `docker images`, you will have following images ready:
1. `opea/whisper-gaudi:latest`
2. `opea/asr:latest`
3. `opea/llm-tgi:latest`
4. `opea/speecht5-gaudi:latest`
5. `opea/tts:latest`
6. `opea/wav2lip-gaudi:latest`
7. `opea/animation:latest`
8. `opea/avatarchatbot:latest`
## 🚀 Set the environment variables
Before starting the services with `docker compose`, you have to recheck the following environment variables.
```bash
export HUGGINGFACEHUB_API_TOKEN=<your_hf_token>
export host_ip=$(hostname -I | awk '{print $1}')
export TGI_LLM_ENDPOINT=http://$host_ip:3006
export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
export ASR_ENDPOINT=http://$host_ip:7066
export TTS_ENDPOINT=http://$host_ip:7055
export WAV2LIP_ENDPOINT=http://$host_ip:7860
export MEGA_SERVICE_HOST_IP=${host_ip}
export ASR_SERVICE_HOST_IP=${host_ip}
export TTS_SERVICE_HOST_IP=${host_ip}
export LLM_SERVICE_HOST_IP=${host_ip}
export ANIMATION_SERVICE_HOST_IP=${host_ip}
export MEGA_SERVICE_PORT=8888
export ASR_SERVICE_PORT=3001
export TTS_SERVICE_PORT=3002
export LLM_SERVICE_PORT=3007
export ANIMATION_SERVICE_PORT=3008
```
- Gaudi2 HPU
```bash
export DEVICE="hpu"
export WAV2LIP_PORT=7860
export INFERENCE_MODE='wav2lip_only'
export CHECKPOINT_PATH='/usr/local/lib/python3.10/dist-packages/Wav2Lip/checkpoints/wav2lip_gan.pth'
export FACE="assets/img/avatar1.jpg"
# export AUDIO='assets/audio/eg3_ref.wav' # audio file path is optional, will use base64str in the post request as input if is 'None'
export AUDIO='None'
export FACESIZE=96
export OUTFILE="/outputs/result.mp4"
export GFPGAN_MODEL_VERSION=1.4 # latest version, can roll back to v1.3 if needed
export UPSCALE_FACTOR=1
export FPS=10
```
## 🚀 Start the MegaService
```bash
cd GenAIExamples/AvatarChatbot/docker_compose/intel/hpu/gaudi/
docker compose -f compose.yaml up -d
```
## 🚀 Test MicroServices
```bash
# whisper service
curl http://${host_ip}:7066/v1/asr \
-X POST \
-d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
-H 'Content-Type: application/json'
# asr microservice
curl http://${host_ip}:3001/v1/audio/transcriptions \
-X POST \
-d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' \
-H 'Content-Type: application/json'
# tgi service
curl http://${host_ip}:3006/generate \
-X POST \
-d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
-H 'Content-Type: application/json'
# llm microservice
curl http://${host_ip}:3007/v1/chat/completions\
-X POST \
-d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":false}' \
-H 'Content-Type: application/json'
# speecht5 service
curl http://${host_ip}:7055/v1/tts \
-X POST \
-d '{"text": "Who are you?"}' \
-H 'Content-Type: application/json'
# tts microservice
curl http://${host_ip}:3002/v1/audio/speech \
-X POST \
-d '{"text": "Who are you?"}' \
-H 'Content-Type: application/json'
# wav2lip service
cd ../../../..
curl http://${host_ip}:7860/v1/wav2lip \
-X POST \
-d @assets/audio/sample_minecraft.json \
-H 'Content-Type: application/json'
# animation microservice
curl http://${host_ip}:3008/v1/animation \
-X POST \
-d @assets/audio/sample_question.json \
-H "Content-Type: application/json"
```
## 🚀 Test MegaService
```bash
curl http://${host_ip}:3009/v1/avatarchatbot \
-X POST \
-d @assets/audio/sample_whoareyou.json \
-H 'Content-Type: application/json'
```
If the megaservice is running properly, you should see the following output:
```bash
"/outputs/result.mp4"
```
The output file will be saved in the current working directory, as `${PWD}` is mapped to `/outputs` inside the wav2lip-service Docker container.
## Gradio UI
```bash
sudo apt update
sudo apt install -y yasm pkg-config libx264-dev nasm
cd $WORKPATH
git clone https://github.com/FFmpeg/FFmpeg.git
cd FFmpeg
sudo ./configure --enable-gpl --enable-libx264 && sudo make -j$(nproc-1) && sudo make install && hash -r
pip install gradio==4.38.1 soundfile
```
```bash
cd $WORKPATH/GenAIExamples/AvatarChatbot
python3 ui/gradio/app_gradio_demo_avatarchatbot.py
```
The UI can be viewed at http://${host_ip}:7861
<img src="../../../../assets/img/UI.png" alt="UI Example" width="60%">
In the current version v1.0, you need to set the avatar figure image/video and the DL model choice in the environment variables before starting AvatarChatbot backend service and running the UI. Please just customize the audio question in the UI.
\*\* We will enable change of avatar figure between runs in v2.0
## Troubleshooting
```bash
cd GenAIExamples/AvatarChatbot/tests
export IMAGE_REPO="opea"
export IMAGE_TAG="latest"
export HUGGINGFACEHUB_API_TOKEN=<your_hf_token>
test_avatarchatbot_on_gaudi.sh
```

View File

@@ -0,0 +1,171 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
version: "3.8"
services:
whisper-service:
image: ${REGISTRY:-opea}/whisper-gaudi:${TAG:-latest}
container_name: whisper-service
ports:
- "7066:7066"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HABANA_VISIBLE_MODULES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
runtime: habana
cap_add:
- SYS_NICE
restart: unless-stopped
asr:
image: ${REGISTRY:-opea}/asr:${TAG:-latest}
container_name: asr-service
ports:
- "3001:9099"
ipc: host
environment:
ASR_ENDPOINT: ${ASR_ENDPOINT}
speecht5-service:
image: ${REGISTRY:-opea}/speecht5-gaudi:${TAG:-latest}
container_name: speecht5-service
ports:
- "7055:7055"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HABANA_VISIBLE_MODULES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
runtime: habana
cap_add:
- SYS_NICE
restart: unless-stopped
tts:
image: ${REGISTRY:-opea}/tts:${TAG:-latest}
container_name: tts-service
ports:
- "3002:9088"
ipc: host
environment:
TTS_ENDPOINT: ${TTS_ENDPOINT}
tgi-service:
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
container_name: tgi-gaudi-server
ports:
- "3006:80"
volumes:
- "./data:/data"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
HABANA_VISIBLE_MODULES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
ENABLE_HPU_GRAPH: true
LIMIT_HPU_GRAPH: true
USE_FLASH_ATTENTION: true
FLASH_ATTENTION_RECOMPUTE: true
runtime: habana
cap_add:
- SYS_NICE
ipc: host
command: --model-id ${LLM_MODEL_ID} --max-input-length 128 --max-total-tokens 256
llm:
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
container_name: llm-tgi-gaudi-server
depends_on:
- tgi-service
ports:
- "3007:9000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
restart: unless-stopped
wav2lip-service:
image: ${REGISTRY:-opea}/wav2lip-gaudi:${TAG:-latest}
container_name: wav2lip-service
ports:
- "7860:7860"
ipc: host
volumes:
- ${PWD}:/outputs
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HABANA_VISIBLE_MODULES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
DEVICE: ${DEVICE}
INFERENCE_MODE: ${INFERENCE_MODE}
CHECKPOINT_PATH: ${CHECKPOINT_PATH}
FACE: ${FACE}
AUDIO: ${AUDIO}
FACESIZE: ${FACESIZE}
OUTFILE: ${OUTFILE}
GFPGAN_MODEL_VERSION: ${GFPGAN_MODEL_VERSION}
UPSCALE_FACTOR: ${UPSCALE_FACTOR}
FPS: ${FPS}
WAV2LIP_PORT: ${WAV2LIP_PORT}
runtime: habana
cap_add:
- SYS_NICE
restart: unless-stopped
animation:
image: ${REGISTRY:-opea}/animation:${TAG:-latest}
container_name: animation-gaudi-server
ports:
- "3008:9066"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HABANA_VISIBLE_MODULES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
WAV2LIP_ENDPOINT: ${WAV2LIP_ENDPOINT}
runtime: habana
cap_add:
- SYS_NICE
restart: unless-stopped
avatarchatbot-gaudi-backend-server:
image: ${REGISTRY:-opea}/avatarchatbot:${TAG:-latest}
container_name: avatarchatbot-gaudi-backend-server
depends_on:
- asr
- llm
- tts
- animation
ports:
- "3009:8888"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
- MEGA_SERVICE_PORT=${MEGA_SERVICE_PORT}
- ASR_SERVICE_HOST_IP=${ASR_SERVICE_HOST_IP}
- ASR_SERVICE_PORT=${ASR_SERVICE_PORT}
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
- LLM_SERVICE_PORT=${LLM_SERVICE_PORT}
- TTS_SERVICE_HOST_IP=${TTS_SERVICE_HOST_IP}
- TTS_SERVICE_PORT=${TTS_SERVICE_PORT}
- ANIMATION_SERVICE_HOST_IP=${ANIMATION_SERVICE_HOST_IP}
- ANIMATION_SERVICE_PORT=${ANIMATION_SERVICE_PORT}
ipc: host
restart: always
networks:
default:
driver: bridge

View File

@@ -0,0 +1,73 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
services:
avatarchatbot:
build:
args:
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
no_proxy: ${no_proxy}
context: ../
dockerfile: ./Dockerfile
image: ${REGISTRY:-opea}/avatarchatbot:${TAG:-latest}
whisper-gaudi:
build:
context: GenAIComps
dockerfile: comps/asr/whisper/dependency/Dockerfile.intel_hpu
extends: avatarchatbot
image: ${REGISTRY:-opea}/whisper-gaudi:${TAG:-latest}
whisper:
build:
context: GenAIComps
dockerfile: comps/asr/whisper/dependency/Dockerfile
extends: avatarchatbot
image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
asr:
build:
context: GenAIComps
dockerfile: comps/asr/whisper/Dockerfile
extends: avatarchatbot
image: ${REGISTRY:-opea}/asr:${TAG:-latest}
llm-tgi:
build:
context: GenAIComps
dockerfile: comps/llms/text-generation/tgi/Dockerfile
extends: avatarchatbot
image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
speecht5-gaudi:
build:
context: GenAIComps
dockerfile: comps/tts/speecht5/dependency/Dockerfile.intel_hpu
extends: avatarchatbot
image: ${REGISTRY:-opea}/speecht5-gaudi:${TAG:-latest}
speecht5:
build:
context: GenAIComps
dockerfile: comps/tts/speecht5/dependency/Dockerfile
extends: avatarchatbot
image: ${REGISTRY:-opea}/speecht5:${TAG:-latest}
tts:
build:
context: GenAIComps
dockerfile: comps/tts/speecht5/Dockerfile
extends: avatarchatbot
image: ${REGISTRY:-opea}/tts:${TAG:-latest}
wav2lip-gaudi:
build:
context: GenAIComps
dockerfile: comps/animation/wav2lip/dependency/Dockerfile.intel_hpu
extends: avatarchatbot
image: ${REGISTRY:-opea}/wav2lip-gaudi:${TAG:-latest}
wav2lip:
build:
context: GenAIComps
dockerfile: comps/animation/wav2lip/dependency/Dockerfile
extends: avatarchatbot
image: ${REGISTRY:-opea}/wav2lip:${TAG:-latest}
animation:
build:
context: GenAIComps
dockerfile: comps/animation/wav2lip/Dockerfile
extends: avatarchatbot
image: ${REGISTRY:-opea}/animation:${TAG:-latest}

View File

@@ -0,0 +1,147 @@
#!/bin/bash
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
set -e
IMAGE_REPO=${IMAGE_REPO:-"opea"}
IMAGE_TAG=${IMAGE_TAG:-"latest"}
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
export REGISTRY=${IMAGE_REPO}
export TAG=${IMAGE_TAG}
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"
if ls $LOG_PATH/*.log 1> /dev/null 2>&1; then
rm $LOG_PATH/*.log
echo "Log files removed."
else
echo "No log files to remove."
fi
ip_address=$(hostname -I | awk '{print $1}')
function build_docker_images() {
cd $WORKPATH/docker_image_build
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="avatarchatbot whisper-gaudi asr llm-tgi speecht5-gaudi tts wav2lip-gaudi animation"
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
docker images && sleep 1s
}
function start_services() {
cd $WORKPATH/docker_compose/intel/hpu/gaudi
export HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN
export host_ip=$(hostname -I | awk '{print $1}')
export TGI_LLM_ENDPOINT=http://$host_ip:3006
export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
export ASR_ENDPOINT=http://$host_ip:7066
export TTS_ENDPOINT=http://$host_ip:7055
export WAV2LIP_ENDPOINT=http://$host_ip:7860
export MEGA_SERVICE_HOST_IP=${host_ip}
export ASR_SERVICE_HOST_IP=${host_ip}
export TTS_SERVICE_HOST_IP=${host_ip}
export LLM_SERVICE_HOST_IP=${host_ip}
export ANIMATION_SERVICE_HOST_IP=${host_ip}
export MEGA_SERVICE_PORT=8888
export ASR_SERVICE_PORT=3001
export TTS_SERVICE_PORT=3002
export LLM_SERVICE_PORT=3007
export ANIMATION_SERVICE_PORT=3008
export DEVICE="hpu"
export WAV2LIP_PORT=7860
export INFERENCE_MODE='wav2lip+gfpgan'
export CHECKPOINT_PATH='/usr/local/lib/python3.10/dist-packages/Wav2Lip/checkpoints/wav2lip_gan.pth'
export FACE="assets/img/avatar1.jpg"
# export AUDIO='assets/audio/eg3_ref.wav' # audio file path is optional, will use base64str in the post request as input if is 'None'
export AUDIO='None'
export FACESIZE=96
export OUTFILE="/outputs/result.mp4"
export GFPGAN_MODEL_VERSION=1.4 # latest version, can roll back to v1.3 if needed
export UPSCALE_FACTOR=1
export FPS=10
# Start Docker Containers
docker compose up -d
n=0
until [[ "$n" -ge 100 ]]; do
docker logs tgi-gaudi-server > $LOG_PATH/tgi_service_start.log
if grep -q Connected $LOG_PATH/tgi_service_start.log; then
break
fi
sleep 5s
n=$((n+1))
done
# sleep 5m
echo "All services are up and running"
sleep 5s
}
function validate_megaservice() {
cd $WORKPATH
result=$(http_proxy="" curl http://${ip_address}:3009/v1/avatarchatbot -X POST -d @assets/audio/sample_whoareyou.json -H 'Content-Type: application/json')
echo "result is === $result"
if [[ $result == *"mp4"* ]]; then
echo "Result correct."
else
docker logs whisper-service > $LOG_PATH/whisper-service.log
docker logs asr-service > $LOG_PATH/asr-service.log
docker logs speecht5-service > $LOG_PATH/speecht5-service.log
docker logs tts-service > $LOG_PATH/tts-service.log
docker logs tgi-gaudi-server > $LOG_PATH/tgi-gaudi-server.log
docker logs llm-tgi-gaudi-server > $LOG_PATH/llm-tgi-gaudi-server.log
docker logs wav2lip-service > $LOG_PATH/wav2lip-service.log
docker logs animation-gaudi-server > $LOG_PATH/animation-gaudi-server.log
echo "Result wrong."
exit 1
fi
}
#function validate_frontend() {
#}
function stop_docker() {
cd $WORKPATH/docker_compose/intel/hpu/gaudi
docker compose down
}
function main() {
stop_docker
echo y | docker builder prune --all
echo y | docker image prune
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
start_services
# validate_microservices
validate_megaservice
# validate_frontend
stop_docker
echo y | docker builder prune --all
echo y | docker image prune
}
main

View File

@@ -0,0 +1,142 @@
#!/bin/bash
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
set -e
IMAGE_REPO=${IMAGE_REPO:-"opea"}
IMAGE_TAG=${IMAGE_TAG:-"latest"}
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
export REGISTRY=${IMAGE_REPO}
export TAG=${IMAGE_TAG}
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"
if ls $LOG_PATH/*.log 1> /dev/null 2>&1; then
rm $LOG_PATH/*.log
echo "Log files removed."
else
echo "No log files to remove."
fi
ip_address=$(hostname -I | awk '{print $1}')
function build_docker_images() {
cd $WORKPATH/docker_image_build
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="avatarchatbot whisper asr llm-tgi speecht5 tts wav2lip animation"
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
docker images && sleep 1s
}
function start_services() {
cd $WORKPATH/docker_compose/intel/cpu/xeon
export HUGGINGFACEHUB_API_TOKEN=$HUGGINGFACEHUB_API_TOKEN
export host_ip=$(hostname -I | awk '{print $1}')
export TGI_LLM_ENDPOINT=http://$host_ip:3006
export LLM_MODEL_ID=Intel/neural-chat-7b-v3-3
export ASR_ENDPOINT=http://$host_ip:7066
export TTS_ENDPOINT=http://$host_ip:7055
export WAV2LIP_ENDPOINT=http://$host_ip:7860
export MEGA_SERVICE_HOST_IP=${host_ip}
export ASR_SERVICE_HOST_IP=${host_ip}
export TTS_SERVICE_HOST_IP=${host_ip}
export LLM_SERVICE_HOST_IP=${host_ip}
export ANIMATION_SERVICE_HOST_IP=${host_ip}
export MEGA_SERVICE_PORT=8888
export ASR_SERVICE_PORT=3001
export TTS_SERVICE_PORT=3002
export LLM_SERVICE_PORT=3007
export ANIMATION_SERVICE_PORT=3008
export DEVICE="cpu"
export WAV2LIP_PORT=7860
export INFERENCE_MODE='wav2lip+gfpgan'
export CHECKPOINT_PATH='/usr/local/lib/python3.11/site-packages/Wav2Lip/checkpoints/wav2lip_gan.pth'
export FACE="assets/img/avatar5.png"
# export AUDIO='assets/audio/eg3_ref.wav' # audio file path is optional, will use base64str in the post request as input if is 'None'
export AUDIO='None'
export FACESIZE=96
export OUTFILE="/outputs/result.mp4"
export GFPGAN_MODEL_VERSION=1.4 # latest version, can roll back to v1.3 if needed
export UPSCALE_FACTOR=1
export FPS=10
# Start Docker Containers
docker compose up -d
n=0
until [[ "$n" -ge 100 ]]; do
docker logs tgi-service > $LOG_PATH/tgi_service_start.log
if grep -q Connected $LOG_PATH/tgi_service_start.log; then
break
fi
sleep 5s
n=$((n+1))
done
echo "All services are up and running"
sleep 5s
}
function validate_megaservice() {
cd $WORKPATH
result=$(http_proxy="" curl http://${ip_address}:3009/v1/avatarchatbot -X POST -d @assets/audio/sample_whoareyou.json -H 'Content-Type: application/json')
echo "result is === $result"
if [[ $result == *"mp4"* ]]; then
echo "Result correct."
else
docker logs whisper-service > $LOG_PATH/whisper-service.log
docker logs asr-service > $LOG_PATH/asr-service.log
docker logs speecht5-service > $LOG_PATH/speecht5-service.log
docker logs tts-service > $LOG_PATH/tts-service.log
docker logs tgi-service > $LOG_PATH/tgi-service.log
docker logs llm-tgi-server > $LOG_PATH/llm-tgi-server.log
docker logs wav2lip-service > $LOG_PATH/wav2lip-service.log
docker logs animation-server > $LOG_PATH/animation-server.log
echo "Result wrong."
exit 1
fi
}
#function validate_frontend() {
#}
function stop_docker() {
cd $WORKPATH/docker_compose/intel/cpu/xeon
docker compose down
}
function main() {
stop_docker
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
start_services
# validate_microservices
validate_megaservice
# validate_frontend
stop_docker
echo y | docker builder prune --all
echo y | docker image prune
}
main

View File

@@ -0,0 +1,349 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
import asyncio
import base64
import io
import os
import shutil
import subprocess
import time
import aiohttp
import docker
import ffmpeg
import gradio as gr
import numpy as np
import soundfile as sf
from PIL import Image
# %% Docker Management
def update_env_var_in_container(container_name, env_var, new_value):
return
# %% AudioQnA functions
def preprocess_audio(audio):
"""The audio data is a 16-bit integer array with values ranging from -32768 to 32767 and the shape of the audio data array is (samples,)"""
sr, y = audio
# Convert to normalized float32 audio
y = y.astype(np.float32)
y /= np.max(np.abs(y))
# Save to memory
buf = io.BytesIO()
sf.write(buf, y, sr, format="WAV")
buf.seek(0) # Reset the buffer position to the beginning
# Encode the WAV file to base64 string
base64_bytes = base64.b64encode(buf.read())
base64_string = base64_bytes.decode("utf-8")
return base64_string
def base64_to_int16(base64_string):
wav_bytes = base64.b64decode(base64_string)
buf = io.BytesIO(wav_bytes)
y, sr = sf.read(buf, dtype="int16")
return sr, y
async def transcribe(audio_input, face_input, model_choice):
"""Input: mic audio; Output: ai audio, text, text"""
global ai_chatbot_url, chat_history, count
chat_history = ""
# Preprocess the audio
base64bytestr = preprocess_audio(audio_input)
# Send the audio to the AvatarChatbot backend server endpoint
initial_inputs = {"audio": base64bytestr, "max_tokens": 64}
# TO-DO: update wav2lip-service with the chosen face_input
# update_env_var_in_container("wav2lip-service", "DEVICE", "new_device_value")
async with aiohttp.ClientSession() as session:
async with session.post(ai_chatbot_url, json=initial_inputs) as response:
# Check the response status code
if response.status == 200:
# response_json = await response.json()
# # Decode the base64 string
# sampling_rate, audio_int16 = base64_to_int16(response_json["byte_str"])
# chat_history += f"User: {response_json['query']}\n\n"
# chat_ai = response_json["text"]
# hitted_ends = [",", ".", "?", "!", "。", ";"]
# last_punc_idx = max([chat_ai.rfind(punc) for punc in hitted_ends])
# if last_punc_idx != -1:
# chat_ai = chat_ai[: last_punc_idx + 1]
# chat_history += f"AI: {chat_ai}"
# chat_history = chat_history.replace("OPEX", "OPEA")
# return (sampling_rate, audio_int16) # handle the response
result = await response.text()
return "docker_compose/intel/hpu/gaudi/result.mp4"
else:
return {"error": "Failed to transcribe audio", "status_code": response.status_code}
def resize_image(image_pil, size=(720, 720)):
"""Resize the image to the specified size."""
return image_pil.resize(size, Image.LANCZOS)
def resize_video(video_path, save_path, size=(720, 1280)):
"""Resize the video to the specified size, and save to the save path."""
ffmpeg.input(video_path).output(save_path, vf=f"scale={size[0]}:{size[1]}").overwrite_output().run()
# %% AI Avatar demo function
async def aiavatar_demo(audio_input, face_input, model_choice):
"""Input: mic/preloaded audio, avatar file path;
Output: ai video"""
# Wait for response from AvatarChatbot backend
output_video = await transcribe(audio_input, face_input, model_choice) # output video path
if isinstance(output_video, dict): # in case of an error
return None, None
else:
return output_video
# %% Main
if __name__ == "__main__":
# HOST_IP = os.getenv("host_ip")
HOST_IP = subprocess.check_output("hostname -I | awk '{print $1}'", shell=True).decode("utf-8").strip()
# Fetch the AudioQnA backend server
ai_chatbot_url = f"http://{HOST_IP}:3009/v1/avatarchatbot"
# Collect chat history to print in the interface
chat_history = ""
# Prepare 3 image paths and 3 video paths
# image_pils = [
# Image.open(os.path.join("assets/img/woman1.png")),
# Image.open(os.path.join("assets/img/man1.png")),
# Image.open(os.path.join("assets/img/woman2.png")),
# ]
# video_paths = [
# os.path.join("assets/video/man1.mp4"),
# os.path.join("assets/video/woman2.mp4"),
# os.path.join("assets/video/man4.mp4"),
# ]
def image_to_base64(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode("utf-8")
# Convert your images to Base64
xeon_base64 = image_to_base64("assets/img/xeon.jpg")
gaudi_base64 = image_to_base64("assets/img/gaudi.png")
# List of prerecorded WAV files containing audio questions
# audio_filepaths = [
# "assets/audio/intel2.wav",
# "assets/audio/intel4.wav",
# ]
# audio_questions = [
# "1. What's the objective of the Open Platform for Enterprise AI? How is it helpful to enterprises building AI solutions?",
# "2. What kinds of Intel AI tools are available to accelerate AI workloads?",
# ]
# Demo frontend
demo = gr.Blocks()
with demo:
# Define processing functions
count = 0
# Make necessary folders:
if not os.path.exists("inputs"):
os.makedirs("inputs")
if not os.path.exists("outputs"):
os.makedirs("outputs")
def initial_process(audio_input, face_input, model_choice):
global count
start_time = time.time()
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
video_file = loop.run_until_complete(aiavatar_demo(audio_input, face_input, model_choice))
count += 1
end_time = time.time()
return video_file, f"The entire application took {(end_time - start_time):.1f} seconds"
# def update_selected_image_state(image_index):
# image_index = int(image_index)
# selected_image_state.value = image_index
# # change image_input here
# if image_index < len(image_pils):
# return f"inputs/face_{image_index}.png"
# else:
# return f"inputs/video_{image_index - len(image_pils)}.mp4"
# def update_audio_input(audio_choice):
# if audio_choice:
# audio_index = int(audio_choice.split(".")[0]) - 1
# audio_filepath_gradio = f"inputs/audio_{audio_index:d}.wav"
# shutil.copyfile(audio_filepaths[audio_index], audio_filepath_gradio)
# return audio_filepath_gradio
# UI Components
# Title & Introduction
gr.Markdown("<h1 style='font-size: 36px;'>A PyTorch and OPEA based AI Avatar Audio Chatbot</h1>")
with gr.Row():
with gr.Column(scale=8):
gr.Markdown(
"""
<p style='font-size: 24px;'>Welcome to our AI Avatar Audio Chatbot! This application leverages PyTorch and <strong>OPEA (Open Platform for Enterprise AI) v0.8</strong> to provide you with a human-like conversational experience. It's run on Intel® Gaudi® AI Accelerator and Intel® Xeon® Processor, with hardware and software optimizations.<br>
Please feel free to interact with the AI avatar by choosing your own avatar and talking into the mic.</p>
"""
)
with gr.Column(scale=1):
# with gr.Row():
# gr.Markdown(f"""
# <img src='data:image/png;base64,{opea_qr_base64}' alt='OPEA QR Code' style='width: 150px; height: auto;'>
# """, label="OPEA QR Code")
# gr.Markdown(f"""
# <img src='data:image/png;base64,{opea_gh_qr_base64}' alt='OPEA GitHub QR Code' style='width: 150px; height: auto;'>
# """, label="OPEA GitHub QR Code")
with gr.Row():
gr.Markdown(
f"""
<img src='data:image/png;base64,{gaudi_base64}' alt='Intel®Gaudi' style='width: 120px; height: auto;'>""",
label="Intel®Gaudi",
)
gr.Markdown(
f"""
<img src='data:image/png;base64,{xeon_base64}' alt='Intel®Xeon' style='width: 120px; height: auto;'>""",
label="Intel®Xeon",
)
gr.Markdown("<hr>") # Divider
# Inputs
# Image gallery
selected_image_state = gr.State(value=-1)
image_clicks = []
image_click_buttons = []
video_clicks = []
video_click_buttons = []
with gr.Row():
with gr.Column(scale=1):
audio_input = gr.Audio(
sources=["upload", "microphone"], format="wav", label="🎤 or 📤 for your Input audio!"
)
# audio_choice = gr.Dropdown(
# choices=audio_questions,
# label="Choose an audio question",
# value=None, # default value
# )
# Update audio_input when a selection is made from the dropdown
# audio_choice.change(fn=update_audio_input, inputs=audio_choice, outputs=audio_input)
face_input = gr.File(
file_count="single",
file_types=["image", "video"],
label="Choose an avatar or 📤 an image or video!",
)
model_choice = gr.Dropdown(
choices=["wav2lip", "wav2lip+GAN", "wav2lip+GFPGAN"],
label="Choose a DL model",
)
# with gr.Column(scale=2):
# # Display 3 images and buttons
# with gr.Row():
# for i, image_pil in enumerate(image_pils):
# image_pil = resize_image(image_pil)
# save_path = f"inputs/face_{int(i)}.png"
# image_pil.save(save_path, "PNG")
# image_clicks.append(gr.Image(type="filepath", value=save_path, label=f"Avatar {int(i)+1}"))
# with gr.Row():
# for i in range(len(image_pils)):
# image_click_buttons.append(gr.Button(f"Use Image {i+1}"))
# # Display 3 videos and buttons
# with gr.Row():
# for i, video_path in enumerate(video_paths):
# save_path = f"inputs/video_{int(i)}.mp4"
# resize_video(video_path, save_path)
# video_clicks.append(gr.Video(value=save_path, label=f"Video {int(i)+1}"))
# with gr.Row():
# for i in range(len(video_paths)):
# video_click_buttons.append(gr.Button(f"Use Video {int(i)+1}"))
submit_button = gr.Button("Submit")
# Outputs
gr.Markdown("<hr>") # Divider
with gr.Row():
with gr.Column():
video_output = gr.Video(label="Your AI Avatar video: ", format="mp4", width=1280, height=720)
video_time_text = gr.Textbox(label="Video processing time", value="0.0 seconds")
# Technical details
gr.Markdown("<hr>") # Divider
with gr.Row():
gr.Markdown(
"""
<p style='font-size: 24px;'>OPEA megaservice deployed: <br>
<ul style='font-size: 24px;'>
<li><strong>AvatarChatbot</strong></li>
</ul></p>
<p style='font-size: 24px;'>OPEA microservices deployed:
<ul style='font-size: 24px;'>
<li><strong>ASR</strong> (service: opea/whisper-gaudi, model: openai/whisper-small)</li>
<li><strong>LLM 'text-generation'</strong> (service: opea/llm-tgi, model: Intel/neural-chat-7b-v3-3)</li>
<li><strong>TTS</strong> (service: opea/speecht5-gaudi, model: microsoft/speecht5_tts)</li>
<li><strong>Animation</strong> (service: opea/animation, model: wav2lip+gfpgan)</li>
</ul></p>
"""
)
with gr.Row():
gr.Image("assets/img/flowchart.png", label="Megaservice Flowchart")
with gr.Row():
gr.Markdown(
"""
<p style='font-size: 24px;'>The AI Avatar Audio Chatbot is powered by the following Intel® AI software:<br>
<ul style='font-size: 24px;'>
<li><strong>Intel Gaudi Software v1.17.0</strong></li>
<li><strong>PyTorch v2.3.1 (Eager mode + torch.compile) </strong></li>
<li><strong>HPU Graph</strong></li>
<li><strong>Intel Neural Compressor (INC)</strong></li>
</ul></p>
"""
)
# Disclaimer
gr.Markdown("<hr>") # Divider
gr.Markdown("<h2 style='font-size: 24px;'>Notices & Disclaimers</h1>")
gr.Markdown(
"""
<p style='font-size: 20px;'>Intel is committed to respecting human rights and avoiding complicity in human rights abuses. See Intel's Global Human Rights Principles. Intel's products and software are intended only to be used in applications that do not cause or contribute to a violation of an internationally recognized human right.<br></p>
<p style='font-size: 20px;'>© Intel Corporation. Intel, the Intel logo, and other Intel marks are trademarks of Intel Corporation or its subsidiaries. Other names and brands may be claimed as the property of others.<br></p>
<p style='font-size: 20px;'>You may not use or facilitate the use of this document in connection with any infringement or other legal analysis concerning Intel products described herein. You agree to grant Intel a non-exclusive, royalty-free license to any patent claim thereafter drafted which includes subject matter disclosed herein.<br></p>
"""
)
# State transitions
# for i in range(len(image_pils)):
# image_click_buttons[i].click(
# update_selected_image_state, inputs=[gr.Number(value=i, visible=False)], outputs=[face_input]
# )
# for i in range(len(video_paths)):
# video_click_buttons[i].click(
# update_selected_image_state,
# inputs=[gr.Number(value=i + len(image_pils), visible=False)],
# outputs=[face_input],
# )
submit_button.click(
initial_process,
inputs=[audio_input, face_input, model_choice],
outputs=[
video_output,
video_time_text,
],
)
demo.queue().launch(server_name="0.0.0.0", server_port=7861)

View File

@@ -206,8 +206,6 @@ cd GenAIExamples/ChatQnA/docker_compose/intel/hpu/gaudi/
docker compose up -d
```
> Notice: Currently only the **Habana Driver 1.16.x** is supported for Gaudi.
Refer to the [Gaudi Guide](./docker_compose/intel/hpu/gaudi/README.md) to build docker images from source.
### Deploy ChatQnA on Xeon

View File

@@ -41,11 +41,11 @@ class MultiHop_Evaluator(Evaluator):
return []
def get_retrieved_documents(self, query, arguments):
data = {"text": query}
data = {"inputs": query}
headers = {"Content-Type": "application/json"}
response = requests.post(arguments.embedding_endpoint, data=json.dumps(data), headers=headers)
response = requests.post(arguments.tei_embedding_endpoint + "/embed", data=json.dumps(data), headers=headers)
if response.ok:
embedding = response.json()["embedding"]
embedding = response.json()[0]
else:
print(f"Request for embedding failed due to {response.text}.")
return []

View File

@@ -47,6 +47,7 @@ RERANK_SERVER_HOST_IP = os.getenv("RERANK_SERVER_HOST_IP", "0.0.0.0")
RERANK_SERVER_PORT = int(os.getenv("RERANK_SERVER_PORT", 80))
LLM_SERVER_HOST_IP = os.getenv("LLM_SERVER_HOST_IP", "0.0.0.0")
LLM_SERVER_PORT = int(os.getenv("LLM_SERVER_PORT", 80))
LLM_MODEL = os.getenv("LLM_MODEL", "Intel/neural-chat-7b-v3-3")
def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs):
@@ -61,7 +62,7 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k
elif self.services[cur_node].service_type == ServiceType.LLM:
# convert TGI/vLLM to unified OpenAI /v1/chat/completions format
next_inputs = {}
next_inputs["model"] = "tgi" # specifically clarify the fake model to make the format unified
next_inputs["model"] = LLM_MODEL
next_inputs["messages"] = [{"role": "user", "content": inputs["inputs"]}]
next_inputs["max_tokens"] = llm_parameters_dict["max_tokens"]
next_inputs["top_p"] = llm_parameters_dict["top_p"]
@@ -165,7 +166,10 @@ def align_generator(self, gen, **kwargs):
try:
# sometimes yield empty chunk, do a fallback here
json_data = json.loads(json_str)
if json_data["choices"][0]["finish_reason"] != "eos_token":
if (
json_data["choices"][0]["finish_reason"] != "eos_token"
and "content" in json_data["choices"][0]["delta"]
):
yield f"data: {repr(json_data['choices'][0]['delta']['content'].encode('utf-8'))}\n\n"
except Exception as e:
yield f"data: {repr(json_str.encode('utf-8'))}\n\n"

View File

@@ -2,104 +2,6 @@
This document outlines the deployment process for a ChatQnA application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on AIPC. The steps include Docker image creation, container deployment via Docker Compose, and service execution to integrate microservices such as `embedding`, `retriever`, `rerank`, and `llm`.
## Prerequisites
We use [Ollama](https://ollama.com/) as our LLM service for AIPC.
Please follow the instructions to set up Ollama on your PC. This will set the entrypoint needed for the Ollama to suit the ChatQnA examples.
### Set Up Ollama LLM Service
#### Install Ollama Service
Install Ollama service with one command:
```
curl -fsSL https://ollama.com/install.sh | sh
```
#### Set Ollama Service Configuration
Ollama Service Configuration file is /etc/systemd/system/ollama.service. Edit the file to set OLLAMA_HOST environment.
Replace **<host_ip>** with your host IPV4 (please use external public IP). For example the host_ip is 10.132.x.y, then `Environment="OLLAMA_HOST=10.132.x.y:11434"'.
```
Environment="OLLAMA_HOST=host_ip:11434"
```
#### Set https_proxy environment for Ollama
If your system access network through proxy, add https_proxy in Ollama Service Configuration file
```
Environment="https_proxy=Your_HTTPS_Proxy"
```
#### Restart Ollama services
```
$ sudo systemctl daemon-reload
$ sudo systemctl restart ollama.service
```
#### Check the service started
```
netstat -tuln | grep 11434
```
The output are:
```
tcp 0 0 10.132.x.y:11434 0.0.0.0:* LISTEN
```
#### Pull Ollama LLM model
Run the command to download LLM models. The <host_ip> is the one set in [Ollama Service Configuration](#Set-Ollama-Service-Configuration)
```
export host_ip=<host_ip>
export OLLAMA_HOST=http://${host_ip}:11434
ollama pull llama3.2
```
After downloaded the models, you can list the models by `ollama list`.
The output should be similar to the following:
```
NAME ID SIZE MODIFIED
llama3.2:latest a80c4f17acd5 2.0 GB 2 minutes ago
```
### Consume Ollama LLM Service
Access ollama service to verify that the ollama is functioning correctly.
```bash
curl http://${host_ip}:11434/api/generate -d '{"model": "llama3.2", "prompt":"What is Deep Learning?"}'
```
The outputs are similar to these:
```
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.098813868Z","response":"Deep","done":false}
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.124514468Z","response":" learning","done":false}
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.149754216Z","response":" is","done":false}
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.180420784Z","response":" a","done":false}
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.229185873Z","response":" subset","done":false}
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.263956118Z","response":" of","done":false}
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.289097354Z","response":" machine","done":false}
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.316838918Z","response":" learning","done":false}
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.342309506Z","response":" that","done":false}
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.367221264Z","response":" involves","done":false}
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.39205893Z","response":" the","done":false}
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.417933974Z","response":" use","done":false}
{"model":"llama3.2","created_at":"2024-10-12T12:55:28.443110388Z","response":" of","done":false}
...
```
## 🚀 Build Docker Images
First of all, you need to build Docker Images locally and install the python package of it.
@@ -122,20 +24,14 @@ export https_proxy="Your_HTTPs_Proxy"
docker build --no-cache -t opea/retriever-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/redis/langchain/Dockerfile .
```
### 2 Build LLM Image
```bash
docker build --no-cache -t opea/llm-ollama:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/ollama/langchain/Dockerfile .
```
### 3. Build Dataprep Image
### 2. Build Dataprep Image
```bash
docker build --no-cache -t opea/dataprep-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain/Dockerfile .
cd ..
```
### 4. Build MegaService Docker Image
### 3. Build MegaService Docker Image
To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `chatqna.py` Python script. Build MegaService Docker image via below command:
@@ -146,7 +42,7 @@ cd GenAIExamples/ChatQnA
docker build --no-cache -t opea/chatqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
```
### 5. Build UI Docker Image
### 4. Build UI Docker Image
Build frontend Docker image via below command:
@@ -155,7 +51,7 @@ cd ~/OPEA/GenAIExamples/ChatQnA/ui
docker build --no-cache -t opea/chatqna-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile .
```
### 6. Build Nginx Docker Image
### 5. Build Nginx Docker Image
```bash
cd GenAIComps
@@ -166,10 +62,9 @@ Then run the command `docker images`, you will have the following 6 Docker Image
1. `opea/dataprep-redis:latest`
2. `opea/retriever-redis:latest`
3. `opea/llm-ollama:latest`
4. `opea/chatqna:latest`
5. `opea/chatqna-ui:latest`
6. `opea/nginx:latest`
3. `opea/chatqna:latest`
4. `opea/chatqna-ui:latest`
5. `opea/nginx:latest`
## 🚀 Start Microservices
@@ -195,10 +90,10 @@ For Linux users, please run `hostname -I | awk '{print $1}'`. For Windows users,
export your_hf_api_token="Your_Huggingface_API_Token"
```
**Append the value of the public IP address to the no_proxy list**
**Append the value of the public IP address to the no_proxy list if you are in a proxy environment**
```
export your_no_proxy=${your_no_proxy},"External_Public_IP"
export your_no_proxy=${your_no_proxy},"External_Public_IP",chatqna-aipc-backend-server,tei-embedding-service,retriever,tei-reranking-service,redis-vector-db,dataprep-redis-service
```
- Linux PC
@@ -211,7 +106,7 @@ export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
export INDEX_NAME="rag-redis"
export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
export OLLAMA_ENDPOINT=http://${host_ip}:11434
export OLLAMA_HOST=${host_ip}
export OLLAMA_MODEL="llama3.2"
```
@@ -222,7 +117,7 @@ set EMBEDDING_MODEL_ID=BAAI/bge-base-en-v1.5
set RERANK_MODEL_ID=BAAI/bge-reranker-base
set INDEX_NAME=rag-redis
set HUGGINGFACEHUB_API_TOKEN=%your_hf_api_token%
set OLLAMA_ENDPOINT=http://host.docker.internal:11434
set OLLAMA_HOST=host.docker.internal
set OLLAMA_MODEL="llama3.2"
```
@@ -277,16 +172,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v
curl http://${host_ip}:11434/api/generate -d '{"model": "llama3.2", "prompt":"What is Deep Learning?"}'
```
5. LLM Microservice
```bash
curl http://${host_ip}:9000/v1/chat/completions\
-X POST \
-d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
-H 'Content-Type: application/json'
```
6. MegaService
5. MegaService
```bash
curl http://${host_ip}:8888/v1/chatqna -H "Content-Type: application/json" -d '{
@@ -294,7 +180,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v
}'
```
7. Upload RAG Files through Dataprep Microservice (Optional)
6. Upload RAG Files through Dataprep Microservice (Optional)
To chat with retrieved information, you need to upload a file using Dataprep service.
@@ -334,4 +220,4 @@ the output is:
## 🚀 Launch the UI
To access the frontend, open the following URL in your browser: http://{host_ip}:5173.
To access the frontend, open the following URL in your browser: http://{host_ip}:80.

View File

@@ -72,22 +72,21 @@ services:
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
llm:
image: ${REGISTRY:-opea}/llm-ollama
container_name: llm-ollama
ollama-service:
image: ollama/ollama
container_name: ollama
ports:
- "9000:9000"
ipc: host
- "11434:11434"
volumes:
- ollama:/root/.ollama
entrypoint: ["bash", "-c"]
command: ["ollama serve & sleep 10 && ollama run ${OLLAMA_MODEL} & wait"]
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
OLLAMA_ENDPOINT: ${OLLAMA_ENDPOINT}
OLLAMA_MODEL: ${OLLAMA_MODEL}
chaqna-aipc-backend-server:
chatqna-aipc-backend-server:
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
container_name: chatqna-aipc-backend-server
depends_on:
@@ -96,29 +95,29 @@ services:
- tei-embedding-service
- retriever
- tei-reranking-service
- llm
ports:
- "8888:8888"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- MEGA_SERVICE_HOST_IP=chaqna-aipc-backend-server
- MEGA_SERVICE_HOST_IP=chatqna-aipc-backend-server
- EMBEDDING_SERVER_HOST_IP=tei-embedding-service
- EMBEDDING_SERVER_PORT=80
- RETRIEVER_SERVICE_HOST_IP=retriever
- RERANK_SERVER_HOST_IP=tei-reranking-service
- RERANK_SERVER_PORT=80
- LLM_SERVER_HOST_IP=llm
- LLM_SERVER_PORT=9000
- LLM_SERVER_HOST_IP=${OLLAMA_HOST}
- LLM_SERVER_PORT=11434
- LLM_MODEL=${OLLAMA_MODEL}
- LOGFLAG=${LOGFLAG}
ipc: host
restart: always
chaqna-aipc-ui-server:
chatqna-aipc-ui-server:
image: ${REGISTRY:-opea}/chatqna-ui:${TAG:-latest}
container_name: chatqna-aipc-ui-server
depends_on:
- chaqna-aipc-backend-server
- chatqna-aipc-backend-server
ports:
- "5173:5173"
environment:
@@ -127,28 +126,31 @@ services:
- http_proxy=${http_proxy}
ipc: host
restart: always
chaqna-aipc-nginx-server:
chatqna-aipc-nginx-server:
image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
container_name: chaqna-aipc-nginx-server
container_name: chatqna-aipc-nginx-server
depends_on:
- chaqna-aipc-backend-server
- chaqna-aipc-ui-server
- chatqna-aipc-backend-server
- chatqna-aipc-ui-server
ports:
- "${NGINX_PORT:-80}:80"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- FRONTEND_SERVICE_IP=chatqna-xeon-ui-server
- FRONTEND_SERVICE_IP=chatqna-aipc-ui-server
- FRONTEND_SERVICE_PORT=5173
- BACKEND_SERVICE_NAME=chatqna
- BACKEND_SERVICE_IP=chatqna-xeon-backend-server
- BACKEND_SERVICE_IP=chatqna-aipc-backend-server
- BACKEND_SERVICE_PORT=8888
- DATAPREP_SERVICE_IP=dataprep-redis-service
- DATAPREP_SERVICE_PORT=6007
ipc: host
restart: always
volumes:
ollama:
networks:
default:
driver: bridge

View File

@@ -16,5 +16,5 @@ export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
export INDEX_NAME="rag-redis"
export OLLAMA_ENDPOINT=http://${host_ip}:11434
export OLLAMA_HOST=${host_ip}
export OLLAMA_MODEL="llama3.2"

View File

@@ -17,8 +17,6 @@ To set up environment variables for deploying ChatQnA services, follow these ste
```bash
# Example: host_ip="192.168.1.1"
export host_ip="External_Public_IP"
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
export no_proxy="Your_No_Proxy"
export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
```
@@ -27,6 +25,9 @@ To set up environment variables for deploying ChatQnA services, follow these ste
```bash
export http_proxy="Your_HTTP_Proxy"
export https_proxy="Your_HTTPs_Proxy"
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
export no_proxy="Your_No_Proxy",chatqna-xeon-ui-server,chatqna-xeon-backend-server,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm_service
```
3. Set up other environment variables:
@@ -47,13 +48,13 @@ docker pull opea/chatqna:latest
docker pull opea/chatqna-ui:latest
```
In following cases, you could build docker image from source by yourself.
NB: You should build docker image from source by yourself if:
- Failed to download the docker image.
- You are developing off the git main branch (as the container's ports in the repo may be different from the published docker image).
- You can't download the docker image.
- You want to use a specific version of Docker image.
- If you want to use a specific version of Docker image.
Please refer to 'Build Docker Images' in below.
Please refer to ['Build Docker Images'](#🚀-build-docker-images) in below.
## QuickStart: 3.Consume the ChatQnA Service
@@ -97,6 +98,11 @@ After launching your instance, you can connect to it using SSH (for Linux instan
First of all, you need to build Docker Images locally and install the python package of it.
```bash
git clone https://github.com/opea-project/GenAIComps.git
cd GenAIComps
```
### 1. Build Retriever Image
```bash
@@ -189,7 +195,7 @@ For users in China who are unable to download models directly from Huggingface,
export HF_TOKEN=${your_hf_token}
export HF_ENDPOINT="https://hf-mirror.com"
model_name="Intel/neural-chat-7b-v3-3"
docker run -p 8008:80 -v ./data:/data --name tgi-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --shm-size 1g ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu --model-id $model_name
docker run -p 8008:80 -v ./data:/data --name tgi-service -e HF_ENDPOINT=$HF_ENDPOINT -e http_proxy=$http_proxy -e https_proxy=$https_proxy --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu --model-id $model_name
```
2. Offline
@@ -203,7 +209,7 @@ For users in China who are unable to download models directly from Huggingface,
```bash
export HF_TOKEN=${your_hf_token}
export model_path="/path/to/model"
docker run -p 8008:80 -v $model_path:/data --name tgi_service --shm-size 1g ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu --model-id /data
docker run -p 8008:80 -v $model_path:/data --name tgi_service --shm-size 1g ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu --model-id /data
```
### Setup Environment Variables
@@ -213,8 +219,6 @@ For users in China who are unable to download models directly from Huggingface,
```bash
# Example: host_ip="192.168.1.1"
export host_ip="External_Public_IP"
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
export no_proxy="Your_No_Proxy"
export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
# Example: NGINX_PORT=80
export NGINX_PORT=${your_nginx_port}
@@ -225,6 +229,8 @@ For users in China who are unable to download models directly from Huggingface,
```bash
export http_proxy="Your_HTTP_Proxy"
export https_proxy="Your_HTTPs_Proxy"
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
export no_proxy="Your_No_Proxy",chatqna-xeon-ui-server,chatqna-xeon-backend-server,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm_service
```
3. Set up other environment variables:
@@ -305,7 +311,7 @@ For details on how to verify the correctness of the response, refer to [how-to-v
Try the command below to check whether the LLM serving is ready.
```bash
docker logs ${CONTAINER_ID} | grep Connected
docker logs tgi-service | grep Connected
```
If the service is ready, you will get the response like below.

View File

@@ -111,7 +111,7 @@ Build frontend Docker image that enables Conversational experience with ChatQnA
**Export the value of the public IP address of your Xeon server to the `host_ip` environment variable**
```bash
cd GenAIExamples/ChatQnA//ui
cd GenAIExamples/ChatQnA/ui
export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8912/v1/chatqna"
export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6043/v1/dataprep"
docker build --no-cache -t opea/chatqna-conversation-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy --build-arg BACKEND_SERVICE_ENDPOINT=$BACKEND_SERVICE_ENDPOINT --build-arg DATAPREP_SERVICE_ENDPOINT=$DATAPREP_SERVICE_ENDPOINT -f ./docker/Dockerfile.react .
@@ -167,10 +167,10 @@ export host_ip="External_Public_IP"
export your_hf_api_token="Your_Huggingface_API_Token"
```
**Append the value of the public IP address to the no_proxy list**
**Append the value of the public IP address to the no_proxy list if you are in a proxy environment**
```
export your_no_proxy=${your_no_proxy},"External_Public_IP"
export your_no_proxy=${your_no_proxy},"External_Public_IP",chatqna-xeon-ui-server,chatqna-xeon-backend-server,dataprep-qdrant-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service
```
```bash

View File

@@ -73,7 +73,7 @@ services:
HF_HUB_ENABLE_HF_TRANSFER: 0
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
tgi-service:
image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
container_name: tgi-service
ports:
- "9009:80"
@@ -112,6 +112,7 @@ services:
- RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
- LLM_SERVER_HOST_IP=tgi-service
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
- LLM_MODEL=${LLM_MODEL_ID}
- LOGFLAG=${LOGFLAG}
ipc: host
restart: always

View File

@@ -72,7 +72,7 @@ services:
HF_HUB_ENABLE_HF_TRANSFER: 0
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
tgi-service:
image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
container_name: tgi-service
ports:
- "6042:80"
@@ -111,6 +111,7 @@ services:
- RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
- LLM_SERVER_HOST_IP=tgi-service
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
- LLM_MODEL=${LLM_MODEL_ID}
- LOGFLAG=${LOGFLAG}
ipc: host
restart: always

View File

@@ -110,6 +110,7 @@ services:
- RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
- LLM_SERVER_HOST_IP=vllm_service
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
- LLM_MODEL=${LLM_MODEL_ID}
- LOGFLAG=${LOGFLAG}
ipc: host
restart: always

View File

@@ -57,7 +57,7 @@ services:
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
restart: unless-stopped
tgi-service:
image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
container_name: tgi-service
ports:
- "9009:80"
@@ -93,6 +93,7 @@ services:
- RETRIEVER_SERVICE_HOST_IP=retriever
- LLM_SERVER_HOST_IP=tgi-service
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
- LLM_MODEL=${LLM_MODEL_ID}
- LOGFLAG=${LOGFLAG}
ipc: host
restart: always

0
ChatQnA/docker_compose/intel/cpu/xeon/set_env.sh Normal file → Executable file
View File

View File

@@ -17,8 +17,6 @@ To set up environment variables for deploying ChatQnA services, follow these ste
```bash
# Example: host_ip="192.168.1.1"
export host_ip="External_Public_IP"
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
export no_proxy="Your_No_Proxy"
export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
```
@@ -27,6 +25,8 @@ To set up environment variables for deploying ChatQnA services, follow these ste
```bash
export http_proxy="Your_HTTP_Proxy"
export https_proxy="Your_HTTPs_Proxy"
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
export no_proxy="Your_No_Proxy",chatqna-gaudi-ui-server,chatqna-gaudi-backend-server,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm_service,guardrails
```
3. Set up other environment variables:
@@ -70,6 +70,11 @@ curl http://${host_ip}:8888/v1/chatqna \
First of all, you need to build Docker Images locally. This step can be ignored after the Docker images published to Docker hub.
```bash
git clone https://github.com/opea-project/GenAIComps.git
cd GenAIComps
```
### 1. Build Retriever Image
```bash
@@ -98,7 +103,7 @@ docker build -t opea/guardrails-tgi:latest --build-arg https_proxy=$https_proxy
```bash
git clone https://github.com/opea-project/GenAIExamples.git
cd GenAIExamples/ChatQnA/docker
cd GenAIExamples/ChatQnA
docker build --no-cache -t opea/chatqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
```
@@ -118,7 +123,7 @@ docker build -t opea/guardrails-tgi:latest --build-arg https_proxy=$https_proxy
```bash
git clone https://github.com/opea-project/GenAIExamples.git
cd GenAIExamples/ChatQnA/docker
cd GenAIExamples/ChatQnA
docker build --no-cache -t opea/chatqna-without-rerank:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile.without_rerank .
```
@@ -211,8 +216,6 @@ For users in China who are unable to download models directly from Huggingface,
```bash
# Example: host_ip="192.168.1.1"
export host_ip="External_Public_IP"
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
export no_proxy="Your_No_Proxy"
export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
# Example: NGINX_PORT=80
export NGINX_PORT=${your_nginx_port}
@@ -223,6 +226,8 @@ For users in China who are unable to download models directly from Huggingface,
```bash
export http_proxy="Your_HTTP_Proxy"
export https_proxy="Your_HTTPs_Proxy"
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
export no_proxy="Your_No_Proxy",chatqna-gaudi-ui-server,chatqna-gaudi-backend-server,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service,vllm_service,guardrails
```
3. Set up other environment variables:
@@ -252,12 +257,6 @@ If use vllm for llm backend.
docker compose -f compose_vllm.yaml up -d
```
If use vllm-on-ray for llm backend.
```bash
docker compose -f compose_vllm_ray.yaml up -d
```
If you want to enable guardrails microservice in the pipeline, please follow the below command instead:
```bash
@@ -315,7 +314,7 @@ For validation details, please refer to [how-to-validate_service](./how_to_valid
Try the command below to check whether the LLM serving is ready.
```bash
docker logs ${CONTAINER_ID} | grep Connected
docker logs tgi-service | grep Connected
```
If the service is ready, you will get the response like below.
@@ -346,13 +345,6 @@ For validation details, please refer to [how-to-validate_service](./how_to_valid
}'
```
```bash
#vLLM-on-Ray Service
curl http://${host_ip}:8006/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{"model": "${LLM_MODEL_ID}", "messages": [{"role": "user", "content": "What is Deep Learning?"}]}'
```
5. MegaService
```bash

View File

@@ -26,25 +26,17 @@ services:
TEI_ENDPOINT: http://tei-embedding-service:80
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
tei-embedding-service:
image: ghcr.io/huggingface/tei-gaudi:latest
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
container_name: tei-embedding-gaudi-server
ports:
- "8090:80"
volumes:
- "./data:/data"
runtime: habana
cap_add:
- SYS_NICE
ipc: host
shm_size: 1g
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
MAX_WARMUP_SEQUENCE_LENGTH: 512
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
retriever:
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
@@ -133,6 +125,7 @@ services:
- RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
- LLM_SERVER_HOST_IP=tgi-service
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
- LLM_MODEL=${LLM_MODEL_ID}
- LOGFLAG=${LOGFLAG}
ipc: host
restart: always

View File

@@ -65,25 +65,17 @@ services:
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
restart: unless-stopped
tei-embedding-service:
image: ghcr.io/huggingface/tei-gaudi:latest
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
container_name: tei-embedding-gaudi-server
ports:
- "8090:80"
volumes:
- "./data:/data"
runtime: habana
cap_add:
- SYS_NICE
ipc: host
shm_size: 1g
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
MAX_WARMUP_SEQUENCE_LENGTH: 512
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
retriever:
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
@@ -176,6 +168,7 @@ services:
- RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
- LLM_SERVER_HOST_IP=tgi-service
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
- LLM_MODEL=${LLM_MODEL_ID}
- LOGFLAG=${LOGFLAG}
ipc: host
restart: always

View File

@@ -26,25 +26,17 @@ services:
TEI_ENDPOINT: http://tei-embedding-service:80
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
tei-embedding-service:
image: ghcr.io/huggingface/tei-gaudi:latest
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
container_name: tei-embedding-gaudi-server
ports:
- "8090:80"
volumes:
- "./data:/data"
runtime: habana
cap_add:
- SYS_NICE
ipc: host
shm_size: 1g
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
MAX_WARMUP_SEQUENCE_LENGTH: 512
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
retriever:
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
@@ -86,7 +78,7 @@ services:
MAX_WARMUP_SEQUENCE_LENGTH: 512
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
vllm-service:
image: ${REGISTRY:-opea}/llm-vllm-hpu:${TAG:-latest}
image: ${REGISTRY:-opea}/vllm-hpu:${TAG:-latest}
container_name: vllm-gaudi-server
ports:
- "8007:80"
@@ -104,7 +96,7 @@ services:
cap_add:
- SYS_NICE
ipc: host
command: /bin/bash -c "export VLLM_CPU_KVCACHE_SPACE=40 && python3 -m vllm.entrypoints.openai.api_server --enforce-eager --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048"
command: --enforce-eager --model $LLM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
chatqna-gaudi-backend-server:
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
container_name: chatqna-gaudi-backend-server
@@ -128,6 +120,7 @@ services:
- RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
- LLM_SERVER_HOST_IP=vllm-service
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
- LLM_MODEL=${LLM_MODEL_ID}
- LOGFLAG=${LOGFLAG}
ipc: host
restart: always

View File

@@ -1,171 +0,0 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
services:
redis-vector-db:
image: redis/redis-stack:7.2.0-v9
container_name: redis-vector-db
ports:
- "6379:6379"
- "8001:8001"
dataprep-redis-service:
image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest}
container_name: dataprep-redis-server
depends_on:
- redis-vector-db
- tei-embedding-service
ports:
- "6007:6007"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
REDIS_URL: redis://redis-vector-db:6379
REDIS_HOST: redis-vector-db
INDEX_NAME: ${INDEX_NAME}
TEI_ENDPOINT: http://tei-embedding-service:80
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
tei-embedding-service:
image: ghcr.io/huggingface/tei-gaudi:latest
container_name: tei-embedding-gaudi-server
ports:
- "8090:80"
volumes:
- "./data:/data"
runtime: habana
cap_add:
- SYS_NICE
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
MAX_WARMUP_SEQUENCE_LENGTH: 512
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
retriever:
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
container_name: retriever-redis-server
depends_on:
- redis-vector-db
ports:
- "7000:7000"
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
REDIS_URL: redis://redis-vector-db:6379
REDIS_HOST: redis-vector-db
INDEX_NAME: ${INDEX_NAME}
TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
restart: unless-stopped
tei-reranking-service:
image: ghcr.io/huggingface/tei-gaudi:latest
container_name: tei-reranking-gaudi-server
ports:
- "8808:80"
volumes:
- "./data:/data"
runtime: habana
cap_add:
- SYS_NICE
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
MAX_WARMUP_SEQUENCE_LENGTH: 512
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
vllm-ray-service:
image: ${REGISTRY:-opea}/llm-vllm-ray-hpu:${TAG:-latest}
container_name: vllm-ray-gaudi-server
ports:
- "8006:8000"
volumes:
- "./data:/data"
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
LLM_MODEL_ID: ${LLM_MODEL_ID}
runtime: habana
cap_add:
- SYS_NICE
ipc: host
command: /bin/bash -c "ray start --head && python vllm_ray_openai.py --port_number 8000 --model_id_or_path $LLM_MODEL_ID --tensor_parallel_size 2 --enforce_eager True"
chatqna-gaudi-backend-server:
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
container_name: chatqna-gaudi-backend-server
depends_on:
- redis-vector-db
- tei-embedding-service
- retriever
- tei-reranking-service
- vllm-ray-service
ports:
- "8888:8888"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- MEGA_SERVICE_HOST_IP=chatqna-gaudi-backend-server
- EMBEDDING_SERVER_HOST_IP=tei-embedding-service
- EMBEDDING_SERVER_PORT=${EMBEDDING_SERVER_PORT:-80}
- RETRIEVER_SERVICE_HOST_IP=retriever
- RERANK_SERVER_HOST_IP=tei-reranking-service
- RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
- LLM_SERVER_HOST_IP=vllm-ray-service
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-8000}
- LOGFLAG=${LOGFLAG}
ipc: host
restart: always
chatqna-gaudi-ui-server:
image: ${REGISTRY:-opea}/chatqna-ui:${TAG:-latest}
container_name: chatqna-gaudi-ui-server
depends_on:
- chatqna-gaudi-backend-server
ports:
- "5173:5173"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
ipc: host
restart: always
chatqna-gaudi-nginx-server:
image: ${REGISTRY:-opea}/nginx:${TAG:-latest}
container_name: chatqna-gaudi-nginx-server
depends_on:
- chatqna-gaudi-backend-server
- chatqna-gaudi-ui-server
ports:
- "${NGINX_PORT:-80}:80"
environment:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- FRONTEND_SERVICE_IP=chatqna-gaudi-ui-server
- FRONTEND_SERVICE_PORT=5173
- BACKEND_SERVICE_NAME=chatqna
- BACKEND_SERVICE_IP=chatqna-gaudi-backend-server
- BACKEND_SERVICE_PORT=8888
- DATAPREP_SERVICE_IP=dataprep-redis-service
- DATAPREP_SERVICE_PORT=6007
ipc: host
restart: always
networks:
default:
driver: bridge

View File

@@ -26,25 +26,17 @@ services:
TEI_ENDPOINT: http://tei-embedding-service:80
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
tei-embedding-service:
image: ghcr.io/huggingface/tei-gaudi:latest
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
container_name: tei-embedding-gaudi-server
ports:
- "8090:80"
volumes:
- "./data:/data"
runtime: habana
cap_add:
- SYS_NICE
ipc: host
shm_size: 1g
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
MAX_WARMUP_SEQUENCE_LENGTH: 512
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
retriever:
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
@@ -109,6 +101,7 @@ services:
- RETRIEVER_SERVICE_HOST_IP=retriever
- LLM_SERVER_HOST_IP=tgi-service
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
- LLM_MODEL=${LLM_MODEL_ID}
- LOGFLAG=${LOGFLAG}
ipc: host
restart: always

View File

@@ -17,8 +17,6 @@ To set up environment variables for deploying ChatQnA services, follow these ste
```bash
# Example: host_ip="192.168.1.1"
export host_ip="External_Public_IP"
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
export no_proxy="Your_No_Proxy"
export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
```
@@ -27,6 +25,8 @@ To set up environment variables for deploying ChatQnA services, follow these ste
```bash
export http_proxy="Your_HTTP_Proxy"
export https_proxy="Your_HTTPs_Proxy"
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
export no_proxy="Your_No_Proxy",chatqna-ui-server,chatqna-backend-server,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service
```
3. Set up other environment variables:
@@ -95,9 +95,9 @@ To construct the Mega Service, we utilize the [GenAIComps](https://github.com/op
```bash
git clone https://github.com/opea-project/GenAIExamples.git
cd GenAIExamples/ChatQnA/docker
cd GenAIExamples/ChatQnA
docker build --no-cache -t opea/chatqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
cd ../../..
cd ../..
```
### 5. Build UI Docker Image
@@ -107,7 +107,7 @@ Construct the frontend Docker image using the command below:
```bash
cd GenAIExamples/ChatQnA/ui
docker build --no-cache -t opea/chatqna-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile .
cd ../../../..
cd ../../../
```
### 6. Build React UI Docker Image (Optional)
@@ -117,7 +117,7 @@ Construct the frontend Docker image using the command below:
```bash
cd GenAIExamples/ChatQnA/ui
docker build --no-cache -t opea/chatqna-react-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile.react .
cd ../../../..
cd ../../..
```
### 7. Build Nginx Docker Image
@@ -156,8 +156,6 @@ Change the `xxx_MODEL_ID` below for your needs.
```bash
# Example: host_ip="192.168.1.1"
export host_ip="External_Public_IP"
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
export no_proxy="Your_No_Proxy"
export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
# Example: NGINX_PORT=80
export NGINX_PORT=${your_nginx_port}
@@ -168,6 +166,8 @@ Change the `xxx_MODEL_ID` below for your needs.
```bash
export http_proxy="Your_HTTP_Proxy"
export https_proxy="Your_HTTPs_Proxy"
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
export no_proxy="Your_No_Proxy",chatqna-ui-server,chatqna-backend-server,dataprep-redis-service,tei-embedding-service,retriever,tei-reranking-service,tgi-service
```
3. Set up other environment variables:

View File

@@ -20,10 +20,10 @@ services:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
REDIS_URL: ${REDIS_URL}
REDIS_HOST: ${REDIS_HOST}
REDIS_URL: redis://redis-vector-db:6379
REDIS_HOST: redis-vector-db
INDEX_NAME: ${INDEX_NAME}
TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
TEI_ENDPOINT: http://tei-embedding-service:80
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
tei-embedding-service:
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
@@ -39,13 +39,6 @@ services:
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
retriever:
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
container_name: retriever-redis-server
@@ -58,12 +51,13 @@ services:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
REDIS_URL: ${REDIS_URL}
REDIS_URL: redis://redis-vector-db:6379
REDIS_HOST: redis-vector-db
INDEX_NAME: ${INDEX_NAME}
TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
restart: unless-stopped
tei-reranking-service:
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
image: ghcr.io/huggingface/text-embeddings-inference:1.5
container_name: tei-reranking-server
ports:
- "8808:80"
@@ -123,11 +117,14 @@ services:
- no_proxy=${no_proxy}
- https_proxy=${https_proxy}
- http_proxy=${http_proxy}
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
- EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP}
- RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP}
- RERANK_SERVICE_HOST_IP=${RERANK_SERVICE_HOST_IP}
- LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP}
- MEGA_SERVICE_HOST_IP=chaqna-backend-server
- EMBEDDING_SERVER_HOST_IP=tei-embedding-service
- EMBEDDING_SERVER_PORT=${EMBEDDING_SERVER_PORT:-80}
- RETRIEVER_SERVICE_HOST_IP=retriever
- RERANK_SERVER_HOST_IP=tei-reranking-service
- RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80}
- LLM_SERVER_HOST_IP=tgi-service
- LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
ipc: host
restart: always
chaqna-ui-server:

View File

@@ -77,24 +77,6 @@ services:
dockerfile: comps/llms/text-generation/vllm/langchain/Dockerfile
extends: chatqna
image: ${REGISTRY:-opea}/llm-vllm:${TAG:-latest}
llm-vllm-hpu:
build:
context: GenAIComps
dockerfile: comps/llms/text-generation/vllm/langchain/dependency/Dockerfile.intel_hpu
extends: chatqna
image: ${REGISTRY:-opea}/llm-vllm-hpu:${TAG:-latest}
llm-vllm-ray:
build:
context: GenAIComps
dockerfile: comps/llms/text-generation/vllm/ray/Dockerfile
extends: chatqna
image: ${REGISTRY:-opea}/llm-vllm-ray:${TAG:-latest}
llm-vllm-ray-hpu:
build:
context: GenAIComps
dockerfile: comps/llms/text-generation/vllm/ray/dependency/Dockerfile
extends: chatqna
image: ${REGISTRY:-opea}/llm-vllm-ray-hpu:${TAG:-latest}
dataprep-redis:
build:
context: GenAIComps
@@ -119,6 +101,12 @@ services:
dockerfile: Dockerfile.cpu
extends: chatqna
image: ${REGISTRY:-opea}/vllm:${TAG:-latest}
vllm-hpu:
build:
context: vllm-fork
dockerfile: Dockerfile.hpu
extends: chatqna
image: ${REGISTRY:-opea}/vllm-hpu:${TAG:-latest}
nginx:
build:
context: GenAIComps

View File

@@ -18,7 +18,7 @@ The ChatQnA uses the below prebuilt images if you choose a Xeon deployment
- tei_embedding_service: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
- retriever: opea/retriever-redis:latest
- tei_xeon_service: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
- tgi-service: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
- tgi-service: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
- chaqna-xeon-backend-server: opea/chatqna:latest
Should you desire to use the Gaudi accelerator, two alternate images are used for the embedding and llm services.

View File

@@ -0,0 +1,53 @@
# Deploy ChatQnA in Kubernetes Cluster on Single Node environment (Minikube)
The following instructions are to deploy the ChatQnA example on a single Node using Kubernetes for testing purposes.
## Minikube setup
1. Install [Minikube](https://minikube.sigs.k8s.io/docs/start/) following the quickstart guide
2. Install [kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl-linux/)
3. Build the container images, following the steps under "Build Docker Images" section in the [docker-compose README](../../docker_compose/intel/cpu/xeon/README.md) to checkout [GenAIComps](https://github.com/opea-project/GenAIComps.git) and build other images with your changes for development.
```bash
# Example on building frontend Docker image
cd GenAIExamples/ChatQnA/ui
docker build --no-cache -t opea/chatqna-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile .
# etc...
```
The built images should be visible in the local Docker registry. Other images which have not been built with your changes (or not present in your local Docker registry) will be pulled from [docker hub](https://hub.docker.com/u/opea) by Minikube later in step 6.
```bash
docker images | grep opea
# REPOSITORY TAG IMAGE ID CREATED SIZE
# opea/chatqna-ui latest 8f2fa2523b85 6 days ago 1.56GB
# opea/chatqna latest 7f2602a7a266 6 days ago 821MB
# ...
```
4. The built images must be imported into the Minikube registry from the local Docker registry. This can be done using `minikube load `image.
```bash
minikube image load opea/chatqna
minikube image load opea/chatqna-ui
# etc...
```
5. Start the minikube cluster with `minikube start`, check that the minikube container (kicbase) is up with `docker ps`
```bash
docker ps
# CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
# de088666cef2 gcr.io/k8s-minikube/kicbase:v0.0.45 "/usr/local/bin/entr…" 2 days ago Up 2 days 127.0.0.1:49157->22/tcp... minikube
```
6. Deploy the ChatQnA application with `minikube apply -f chatqna.yaml`, check that the opea pods are in a running state with `kubectl get pods`
```bash
kubectl get pods
# NAME READY STATUS RESTARTS AGE
# chatqna-78b4f5865-qbzms 1/1 Running 0 2d3h
# chatqna-chatqna-ui-54c8dfb6cf-fll5g 1/1 Running 0 2d3h
# etc...
```
7. Forward the port of the chatqna service from Minikube to the host, and test the service as you would a normal k8s cluster deployment
```bash
# port-forward to expose the chatqna endpoint from within the minikube cluster
kubectl port-forward svc/chatqna 8888:8888
curl http://localhost:8888/v1/chatqna \
-H 'Content-Type: application/json' \
-d '{"messages": "What is the revenue of Nike in 2023?"}'
# Similarly port-forward to expose the chatqna-ui endpoint and use the UI at <machine-external-ip>:5173 in your browser
kubectl port-forward svc/chatqna-chatqna-ui 5173:5173
```

View File

@@ -1100,7 +1100,7 @@ spec:
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
imagePullPolicy: IfNotPresent
volumeMounts:
- mountPath: /data
@@ -1180,7 +1180,7 @@ spec:
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
imagePullPolicy: IfNotPresent
volumeMounts:
- mountPath: /data
@@ -1252,18 +1252,12 @@ spec:
env:
- name: LLM_SERVER_HOST_IP
value: chatqna-tgi
- name: LLM_SERVER_PORT
value: "2080"
- name: RERANK_SERVER_HOST_IP
value: chatqna-teirerank
- name: RERANK_SERVER_PORT
value: "2082"
- name: RETRIEVER_SERVICE_HOST_IP
value: chatqna-retriever-usvc
- name: EMBEDDING_SERVER_HOST_IP
value: chatqna-tei
- name: EMBEDDING_SERVER_PORT
value: "2081"
- name: GUARDRAIL_SERVICE_HOST_IP
value: chatqna-guardrails-usvc
- name: GUARDRAIL_SERVICE_PORT

View File

@@ -922,7 +922,7 @@ spec:
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
imagePullPolicy: IfNotPresent
volumeMounts:
- mountPath: /data
@@ -994,18 +994,12 @@ spec:
env:
- name: LLM_SERVER_HOST_IP
value: chatqna-tgi
- name: LLM_SERVER_PORT
value: "2080"
- name: RERANK_SERVER_HOST_IP
value: chatqna-teirerank
- name: RERANK_SERVER_PORT
value: "2082"
- name: RETRIEVER_SERVICE_HOST_IP
value: chatqna-retriever-usvc
- name: EMBEDDING_SERVER_HOST_IP
value: chatqna-tei
- name: EMBEDDING_SERVER_PORT
value: "2081"
securityContext:
allowPrivilegeEscalation: false
capabilities:

View File

@@ -925,7 +925,7 @@ spec:
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
imagePullPolicy: IfNotPresent
volumeMounts:
- mountPath: /data
@@ -997,18 +997,12 @@ spec:
env:
- name: LLM_SERVER_HOST_IP
value: chatqna-tgi
- name: LLM_SERVER_PORT
value: "2080"
- name: RERANK_SERVER_HOST_IP
value: chatqna-teirerank
- name: RERANK_SERVER_PORT
value: "2082"
- name: RETRIEVER_SERVICE_HOST_IP
value: chatqna-retriever-usvc
- name: EMBEDDING_SERVER_HOST_IP
value: chatqna-tei
- name: EMBEDDING_SERVER_PORT
value: "2081"
securityContext:
allowPrivilegeEscalation: false
capabilities:

View File

@@ -1257,18 +1257,12 @@ spec:
env:
- name: LLM_SERVER_HOST_IP
value: chatqna-tgi
- name: LLM_SERVER_PORT
value: "2080"
- name: RERANK_SERVER_HOST_IP
value: chatqna-teirerank
- name: RERANK_SERVER_PORT
value: "2082"
- name: RETRIEVER_SERVICE_HOST_IP
value: chatqna-retriever-usvc
- name: EMBEDDING_SERVER_HOST_IP
value: chatqna-tei
- name: EMBEDDING_SERVER_PORT
value: "2081"
- name: GUARDRAIL_SERVICE_HOST_IP
value: chatqna-guardrails-usvc
- name: GUARDRAIL_SERVICE_PORT

View File

@@ -997,18 +997,12 @@ spec:
env:
- name: LLM_SERVER_HOST_IP
value: chatqna-tgi
- name: LLM_SERVER_PORT
value: "2080"
- name: RERANK_SERVER_HOST_IP
value: chatqna-teirerank
- name: RERANK_SERVER_PORT
value: "2082"
- name: RETRIEVER_SERVICE_HOST_IP
value: chatqna-retriever-usvc
- name: EMBEDDING_SERVER_HOST_IP
value: chatqna-tei
- name: EMBEDDING_SERVER_PORT
value: "2081"
securityContext:
allowPrivilegeEscalation: false
capabilities:

View File

@@ -22,7 +22,7 @@ function build_docker_images() {
service_list="chatqna chatqna-ui chatqna-conversation-ui dataprep-redis retriever-redis nginx"
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.5
docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
docker images && sleep 1s

View File

@@ -17,9 +17,10 @@ ip_address=$(hostname -I | awk '{print $1}')
function build_docker_images() {
cd $WORKPATH/docker_image_build
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
git clone https://github.com/HabanaAI/vllm-fork.git
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="chatqna chatqna-ui dataprep-redis retriever-redis llm-vllm-hpu nginx"
service_list="chatqna chatqna-ui dataprep-redis retriever-redis vllm-hpu nginx"
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5

View File

@@ -1,183 +0,0 @@
#!/bin/bash
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
set -e
IMAGE_REPO=${IMAGE_REPO:-"opea"}
IMAGE_TAG=${IMAGE_TAG:-"latest"}
echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}"
echo "TAG=IMAGE_TAG=${IMAGE_TAG}"
export REGISTRY=${IMAGE_REPO}
export TAG=${IMAGE_TAG}
WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"
ip_address=$(hostname -I | awk '{print $1}')
function build_docker_images() {
cd $WORKPATH/docker_image_build
git clone https://github.com/opea-project/GenAIComps.git && cd GenAIComps && git checkout "${opea_branch:-"main"}" && cd ../
echo "Build all the images with --no-cache, check docker_image_build.log for details..."
service_list="chatqna chatqna-ui dataprep-redis retriever-redis llm-vllm-ray-hpu nginx"
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
docker pull ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
docker pull ghcr.io/huggingface/tei-gaudi:latest
docker images && sleep 1s
}
function start_services() {
cd $WORKPATH/docker_compose/intel/hpu/gaudi
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export INDEX_NAME="rag-redis"
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
# Start Docker Containers
docker compose -f compose_vllm_ray.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
n=0
until [[ "$n" -ge 100 ]]; do
echo "n=$n"
docker logs vllm-ray-gaudi-server > vllm_ray_service_start.log
if grep -q "Warmup finished" vllm_ray_service_start.log; then
break
fi
sleep 5s
n=$((n+1))
done
}
function validate_services() {
local URL="$1"
local EXPECTED_RESULT="$2"
local SERVICE_NAME="$3"
local DOCKER_NAME="$4"
local INPUT_DATA="$5"
local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL")
if [ "$HTTP_STATUS" -eq 200 ]; then
echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..."
local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log)
if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then
echo "[ $SERVICE_NAME ] Content is as expected."
else
echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT"
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
exit 1
fi
else
echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS"
docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log
exit 1
fi
sleep 1s
}
function validate_microservices() {
# Check if the microservices are running correctly.
# tei for embedding service
validate_services \
"${ip_address}:8090/embed" \
"\[\[" \
"tei-embedding" \
"tei-embedding-gaudi-server" \
'{"inputs":"What is Deep Learning?"}'
sleep 1m # retrieval can't curl as expected, try to wait for more time
# retrieval microservice
test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
validate_services \
"${ip_address}:7000/v1/retrieval" \
" " \
"retrieval" \
"retriever-redis-server" \
"{\"text\":\"What is the revenue of Nike in 2023?\",\"embedding\":${test_embedding}}"
# tei for rerank microservice
validate_services \
"${ip_address}:8808/rerank" \
'{"index":1,"score":' \
"tei-rerank" \
"tei-reranking-gaudi-server" \
'{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}'
# vllm-on-ray for llm service
validate_services \
"${ip_address}:8006/v1/chat/completions" \
"content" \
"vllm-ray-llm" \
"vllm-ray-gaudi-server" \
'{"model": "Intel/neural-chat-7b-v3-3", "messages": [{"role": "user", "content": "What is Deep Learning?"}]}'
}
function validate_megaservice() {
# Curl the Mega Service
validate_services \
"${ip_address}:8888/v1/chatqna" \
"data: " \
"mega-chatqna" \
"chatqna-gaudi-backend-server" \
'{"messages": "What is the revenue of Nike in 2023?"}'
}
function validate_frontend() {
cd $WORKPATH/ui/svelte
local conda_env_name="OPEA_e2e"
export PATH=${HOME}/miniforge3/bin/:$PATH
if conda info --envs | grep -q "$conda_env_name"; then
echo "$conda_env_name exist!"
else
conda create -n ${conda_env_name} python=3.12 -y
fi
source activate ${conda_env_name}
sed -i "s/localhost/$ip_address/g" playwright.config.ts
conda install -c conda-forge nodejs -y
npm install && npm ci && npx playwright install --with-deps
node -v && npm -v && pip list
exit_status=0
npx playwright test || exit_status=$?
if [ $exit_status -ne 0 ]; then
echo "[TEST INFO]: ---------frontend test failed---------"
exit $exit_status
else
echo "[TEST INFO]: ---------frontend test passed---------"
fi
}
function stop_docker() {
cd $WORKPATH/docker_compose/intel/hpu/gaudi
docker compose -f compose_vllm_ray.yaml down
}
function main() {
stop_docker
if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi
start_time=$(date +%s)
start_services
end_time=$(date +%s)
duration=$((end_time-start_time))
echo "Mega service start duration is $duration s"
validate_microservices
validate_megaservice
# validate_frontend
stop_docker
echo y | docker system prune
}
main

View File

@@ -111,7 +111,7 @@ function _cleanup_ns() {
function install_and_validate_chatqna_guardrail() {
echo "Testing manifests chatqna_guardrils"
local ns=${NAMESPACE}-gaurdrails
local ns=${NAMESPACE}
_cleanup_ns $ns
kubectl create namespace $ns
# install guardrail
@@ -119,10 +119,9 @@ function install_and_validate_chatqna_guardrail() {
# Sleep enough time for chatqna_guardrail to be ready
sleep 60
if kubectl rollout status deployment -n "$ns" --timeout "$ROLLOUT_TIMEOUT_SECONDS"; then
echo "Waiting for cahtqna_guardrail pod ready done!"
echo "Waiting for chatqna_guardrail pod ready done!"
else
echo "Timeout waiting for chatqna_guardrail pod ready!"
_cleanup_ns $ns
exit 1
fi
@@ -130,10 +129,8 @@ function install_and_validate_chatqna_guardrail() {
validate_chatqna $ns chatqna-guardrails
local ret=$?
if [ $ret -ne 0 ]; then
_cleanup_ns $ns
exit 1
fi
_cleanup_ns $ns
}
if [ $# -eq 0 ]; then
@@ -161,8 +158,7 @@ case "$1" in
if [ $ret -ne 0 ]; then
exit $ret
fi
pushd ChatQnA/kubernetes/intel/hpu/gaudi/manifests
set +e
pushd ChatQnA/kubernetes/intel/hpu/gaudi/manifest
install_and_validate_chatqna_guardrail
popd
;;

View File

@@ -40,7 +40,7 @@ function get_end_point() {
function validate_chatqna() {
local ns=$1
local log=$2
max_retry=20
max_retry=10
# make sure microservice retriever-usvc is ready
# try to curl retriever-svc for max_retry times
test_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
@@ -111,7 +111,7 @@ function _cleanup_ns() {
function install_and_validate_chatqna_guardrail() {
echo "Testing manifests chatqna_guardrils"
local ns=${NAMESPACE}-gaurdrails
local ns=${NAMESPACE}
_cleanup_ns $ns
kubectl create namespace $ns
# install guardrail
@@ -119,10 +119,9 @@ function install_and_validate_chatqna_guardrail() {
# Sleep enough time for chatqna_guardrail to be ready
sleep 60
if kubectl rollout status deployment -n "$ns" --timeout "$ROLLOUT_TIMEOUT_SECONDS"; then
echo "Waiting for cahtqna_guardrail pod ready done!"
echo "Waiting for chatqna_guardrail pod ready done!"
else
echo "Timeout waiting for chatqna_guardrail pod ready!"
_cleanup_ns $ns
exit 1
fi
@@ -130,10 +129,8 @@ function install_and_validate_chatqna_guardrail() {
validate_chatqna $ns chatqna-guardrails
local ret=$?
if [ $ret -ne 0 ]; then
_cleanup_ns $ns
exit 1
fi
_cleanup_ns $ns
}
if [ $# -eq 0 ]; then
@@ -161,8 +158,7 @@ case "$1" in
if [ $ret -ne 0 ]; then
exit $ret
fi
pushd ChatQnA/kubernetes/intel/cpu/xeon/manifests
set +e
pushd ChatQnA/kubernetes/intel/cpu/xeon/manifest
install_and_validate_chatqna_guardrail
popd
;;

Some files were not shown because too many files have changed in this diff Show More