Compare commits
181 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
bbb4e231d0 | ||
|
|
da10068964 | ||
|
|
188b568467 | ||
|
|
9e9af9766f | ||
|
|
cc108b5a18 | ||
|
|
f70d9c3853 | ||
|
|
8808b51e42 | ||
|
|
17d4b0c97f | ||
|
|
3a03d31f8f | ||
|
|
179fd84362 | ||
|
|
9ba034b22d | ||
|
|
c3e6f43ece | ||
|
|
1ac756a1c7 | ||
|
|
56f770cb28 | ||
|
|
0cdeb946e4 | ||
|
|
5648839411 | ||
|
|
eb91d1f054 | ||
|
|
2587179224 | ||
|
|
7e62175c2e | ||
|
|
152adf8012 | ||
|
|
83172e9a99 | ||
|
|
fb514bb8ba | ||
|
|
b1bb6db52d | ||
|
|
7949045176 | ||
|
|
cbe952ec5e | ||
|
|
3b1a9fe9e1 | ||
|
|
e66d7fe381 | ||
|
|
6d3a017609 | ||
|
|
dbf4ba03fa | ||
|
|
4f96d9e605 | ||
|
|
a8f4245384 | ||
|
|
096a37aacc | ||
|
|
6f8fa6a689 | ||
|
|
39f68d5d6b | ||
|
|
00d9bb6128 | ||
|
|
59b624c677 | ||
|
|
2b2c7ee2f5 | ||
|
|
6b9a27dd83 | ||
|
|
5720cd45c0 | ||
|
|
73879d3cec | ||
|
|
7c9ed04132 | ||
|
|
9ff7df9202 | ||
|
|
b5f95f735e | ||
|
|
393367e9f1 | ||
|
|
7adbba6add | ||
|
|
0d52c2f003 | ||
|
|
1ff85f6a85 | ||
|
|
f7a7f8aa3f | ||
|
|
e3187be819 | ||
|
|
abd9d12937 | ||
|
|
a7353bbaa4 | ||
|
|
aa314f6757 | ||
|
|
3744bb8c1b | ||
|
|
82801d0121 | ||
|
|
f7026773b8 | ||
|
|
edc09ece5c | ||
|
|
dfed2aead2 | ||
|
|
049517f977 | ||
|
|
ee83a6d5b4 | ||
|
|
e2bdd19fd4 | ||
|
|
c9088eb824 | ||
|
|
9c3023a12e | ||
|
|
bbc95bb708 | ||
|
|
dd9623d3d5 | ||
|
|
4c27a3d30c | ||
|
|
40386d9bd6 | ||
|
|
fe97e88c7a | ||
|
|
11d8b24c8a | ||
|
|
4635a927fa | ||
|
|
1da44d99a1 | ||
|
|
e9b164505e | ||
|
|
6263b517b9 | ||
|
|
2de7c0ba89 | ||
|
|
944ae47948 | ||
|
|
2d9aeb3715 | ||
|
|
a0921f127f | ||
|
|
cf86aceb18 | ||
|
|
c2b7bd25d9 | ||
|
|
78331ee678 | ||
|
|
7f7ad0e256 | ||
|
|
0306c620b5 | ||
|
|
3372b9d480 | ||
|
|
5eb3d2869f | ||
|
|
ced68e1834 | ||
|
|
bf5c391e47 | ||
|
|
c65d7d40fb | ||
|
|
9d124161e0 | ||
|
|
0f5a9c4a5e | ||
|
|
a65640b4a5 | ||
|
|
7197286a14 | ||
|
|
960805a57b | ||
|
|
002f0e2b11 | ||
|
|
fde5996192 | ||
|
|
bc47930ce1 | ||
|
|
2332d22950 | ||
|
|
a2afce1675 | ||
|
|
89f4c5fb41 | ||
|
|
98f66405ac | ||
|
|
90c2d49050 | ||
|
|
95b58b51fa | ||
|
|
d3ce6f5357 | ||
|
|
a10b4a1f1d | ||
|
|
085d859a70 | ||
|
|
15cc457cea | ||
|
|
cfffb4c005 | ||
|
|
41955f65ad | ||
|
|
def39cfcdc | ||
|
|
35a4fef70d | ||
|
|
a3f9811f7e | ||
|
|
0eedbbfce0 | ||
|
|
9438d392b4 | ||
|
|
1929dfd3a0 | ||
|
|
c7e33647ad | ||
|
|
184e9a43b8 | ||
|
|
658867fce4 | ||
|
|
620ef76d16 | ||
|
|
23b820e740 | ||
|
|
3c164f3aa2 | ||
|
|
7669c42085 | ||
|
|
256b58c07e | ||
|
|
3c3a5bed67 | ||
|
|
37c74b232c | ||
|
|
4a265abb73 | ||
|
|
b0487fe92b | ||
|
|
d486bbbe10 | ||
|
|
b0f7c9cfc2 | ||
|
|
eeced9b31c | ||
|
|
b377c2b8f8 | ||
|
|
5dae713793 | ||
|
|
c930bea172 | ||
|
|
0edff26ee5 | ||
|
|
778afb50ac | ||
|
|
40800b0848 | ||
|
|
f2f6c09a0f | ||
|
|
c6fc92d37c | ||
|
|
c0643b71e8 | ||
|
|
088ab98f31 | ||
|
|
441f8cc6ba | ||
|
|
b056ce6617 | ||
|
|
773c32b38b | ||
|
|
619d941047 | ||
|
|
b71a12d424 | ||
|
|
12469c92d8 | ||
|
|
fbde15b40d | ||
|
|
ae10712fe8 | ||
|
|
373fa88033 | ||
|
|
e2f9037344 | ||
|
|
afc39fa4c0 | ||
|
|
e1c476c185 | ||
|
|
77920613dc | ||
|
|
7dec00176e | ||
|
|
bf28c7f098 | ||
|
|
63bad29794 | ||
|
|
36d3ef2b17 | ||
|
|
0c6b044139 | ||
|
|
d23cd799e9 | ||
|
|
644c3a67ce | ||
|
|
ffecd182db | ||
|
|
d16c80e493 | ||
|
|
2de1bfc5bb | ||
|
|
75df2c9979 | ||
|
|
62e06a0aff | ||
|
|
bd32b03e3c | ||
|
|
9d0b49c2d6 | ||
|
|
75ce2a3ca6 | ||
|
|
99c10933b4 | ||
|
|
8bcd82e82d | ||
|
|
c1038d2193 | ||
|
|
33b9d4e421 | ||
|
|
c9553c6f9a | ||
|
|
3e796ba73d | ||
|
|
5ed776709d | ||
|
|
954a22051b | ||
|
|
6f4b00f829 | ||
|
|
3fb60608b3 | ||
|
|
c35fe0b429 | ||
|
|
28f5e4a268 | ||
|
|
d55a33dda1 | ||
|
|
daf2a4fad7 | ||
|
|
3ce395582b | ||
|
|
7eaab93d0b |
8
.github/CODEOWNERS
vendored
@@ -1,13 +1,17 @@
|
||||
/AgentQnA/ xuhui.ren@intel.com
|
||||
/AgentQnA/ kaokao.lv@intel.com
|
||||
/AudioQnA/ sihan.chen@intel.com
|
||||
/ChatQnA/ liang1.lv@intel.com
|
||||
/CodeGen/ liang1.lv@intel.com
|
||||
/CodeTrans/ sihan.chen@intel.com
|
||||
/DocSum/ letong.han@intel.com
|
||||
/DocIndexRetriever/ xuhui.ren@intel.com chendi.xue@intel.com
|
||||
/DocIndexRetriever/ kaokao.lv@intel.com chendi.xue@intel.com
|
||||
/InstructionTuning xinyu.ye@intel.com
|
||||
/RerankFinetuning xinyu.ye@intel.com
|
||||
/MultimodalQnA tiep.le@intel.com
|
||||
/FaqGen/ xinyao.wang@intel.com
|
||||
/SearchQnA/ sihan.chen@intel.com
|
||||
/Translation/ liang1.lv@intel.com
|
||||
/VisualQnA/ liang1.lv@intel.com
|
||||
/ProductivitySuite/ hoong.tee.yeoh@intel.com
|
||||
/VideoQnA huiling.bao@intel.com
|
||||
/*/ liang1.lv@intel.com
|
||||
|
||||
2
.github/code_spell_ignore.txt
vendored
@@ -0,0 +1,2 @@
|
||||
ModelIn
|
||||
modelin
|
||||
|
||||
2
.github/license_template.txt
vendored
@@ -1,2 +1,2 @@
|
||||
Copyright (C) 2024 Intel Corporation
|
||||
SPDX-License-Identifier: Apache-2.0
|
||||
SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
29
.github/workflows/_example-workflow.yml
vendored
@@ -12,6 +12,10 @@ on:
|
||||
example:
|
||||
required: true
|
||||
type: string
|
||||
services:
|
||||
default: ""
|
||||
required: false
|
||||
type: string
|
||||
tag:
|
||||
default: "latest"
|
||||
required: false
|
||||
@@ -36,6 +40,11 @@ on:
|
||||
default: "main"
|
||||
required: false
|
||||
type: string
|
||||
inject_commit:
|
||||
default: false
|
||||
required: false
|
||||
type: string
|
||||
|
||||
jobs:
|
||||
####################################################################################################
|
||||
# Image Build
|
||||
@@ -46,33 +55,34 @@ jobs:
|
||||
- name: Clean Up Working Directory
|
||||
run: sudo rm -rf ${{github.workspace}}/*
|
||||
|
||||
- name: Get checkout ref
|
||||
- name: Get Checkout Ref
|
||||
run: |
|
||||
if [ "${{ github.event_name }}" == "pull_request" ] || [ "${{ github.event_name }}" == "pull_request_target" ]; then
|
||||
echo "CHECKOUT_REF=refs/pull/${{ github.event.number }}/merge" >> $GITHUB_ENV
|
||||
else
|
||||
echo "CHECKOUT_REF=${{ github.ref }}" >> $GITHUB_ENV
|
||||
fi
|
||||
echo "checkout ref ${{ env.CHECKOUT_REF }}"
|
||||
|
||||
- name: Checkout out Repo
|
||||
- name: Checkout out GenAIExamples
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ env.CHECKOUT_REF }}
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Clone required Repo
|
||||
- name: Clone Required Repo
|
||||
run: |
|
||||
cd ${{ github.workspace }}/${{ inputs.example }}/docker_image_build
|
||||
docker_compose_path=${{ github.workspace }}/${{ inputs.example }}/docker_image_build/build.yaml
|
||||
if [[ $(grep -c "tei-gaudi:" ${docker_compose_path}) != 0 ]]; then
|
||||
git clone https://github.com/huggingface/tei-gaudi.git
|
||||
fi
|
||||
if [[ $(grep -c "vllm:" ${docker_compose_path}) != 0 ]]; then
|
||||
git clone https://github.com/vllm-project/vllm.git
|
||||
cd vllm && git rev-parse HEAD && cd ../
|
||||
fi
|
||||
if [[ $(grep -c "vllm-gaudi:" ${docker_compose_path}) != 0 ]]; then
|
||||
git clone https://github.com/HabanaAI/vllm-fork.git
|
||||
cd vllm-fork && git checkout 3c39626 && cd ../
|
||||
fi
|
||||
git clone https://github.com/opea-project/GenAIComps.git
|
||||
cd GenAIComps && git checkout ${{ inputs.opea_branch }} && cd ../
|
||||
cd GenAIComps && git checkout ${{ inputs.opea_branch }} && git rev-parse HEAD && cd ../
|
||||
|
||||
- name: Build Image
|
||||
if: ${{ fromJSON(inputs.build) }}
|
||||
@@ -80,7 +90,9 @@ jobs:
|
||||
with:
|
||||
work_dir: ${{ github.workspace }}/${{ inputs.example }}/docker_image_build
|
||||
docker_compose_path: ${{ github.workspace }}/${{ inputs.example }}/docker_image_build/build.yaml
|
||||
service_list: ${{ inputs.services }}
|
||||
registry: ${OPEA_IMAGE_REPO}opea
|
||||
inject_commit: ${{ inputs.inject_commit }}
|
||||
tag: ${{ inputs.tag }}
|
||||
|
||||
####################################################################################################
|
||||
@@ -108,7 +120,6 @@ jobs:
|
||||
example: ${{ inputs.example }}
|
||||
hardware: ${{ inputs.node }}
|
||||
tag: ${{ inputs.tag }}
|
||||
context: "CD"
|
||||
secrets: inherit
|
||||
|
||||
####################################################################################################
|
||||
|
||||
2
.github/workflows/_get-test-matrix.yml
vendored
@@ -14,7 +14,7 @@ on:
|
||||
test_mode:
|
||||
required: false
|
||||
type: string
|
||||
default: 'docker_compose'
|
||||
default: 'compose'
|
||||
outputs:
|
||||
run_matrix:
|
||||
description: "The matrix string"
|
||||
|
||||
16
.github/workflows/_manifest-e2e.yml
vendored
@@ -20,11 +20,6 @@ on:
|
||||
description: "Tag to apply to images, default is latest"
|
||||
required: false
|
||||
type: string
|
||||
context:
|
||||
default: "CI"
|
||||
description: "CI or CD"
|
||||
required: false
|
||||
type: string
|
||||
|
||||
jobs:
|
||||
manifest-test:
|
||||
@@ -51,7 +46,7 @@ jobs:
|
||||
|
||||
- name: Set variables
|
||||
run: |
|
||||
echo "IMAGE_REPO=$OPEA_IMAGE_REPO" >> $GITHUB_ENV
|
||||
echo "IMAGE_REPO=${OPEA_IMAGE_REPO}opea" >> $GITHUB_ENV
|
||||
echo "IMAGE_TAG=${{ inputs.tag }}" >> $GITHUB_ENV
|
||||
lower_example=$(echo "${{ inputs.example }}" | tr '[:upper:]' '[:lower:]')
|
||||
echo "NAMESPACE=$lower_example-$(tr -dc a-z0-9 </dev/urandom | head -c 16)" >> $GITHUB_ENV
|
||||
@@ -60,7 +55,6 @@ jobs:
|
||||
echo "continue_test=true" >> $GITHUB_ENV
|
||||
echo "should_cleanup=false" >> $GITHUB_ENV
|
||||
echo "skip_validate=true" >> $GITHUB_ENV
|
||||
echo "CONTEXT=${{ inputs.context }}" >> $GITHUB_ENV
|
||||
echo "NAMESPACE=$NAMESPACE"
|
||||
|
||||
- name: Kubectl install
|
||||
@@ -96,10 +90,16 @@ jobs:
|
||||
echo "Validate ${{ inputs.example }} successful!"
|
||||
else
|
||||
echo "Validate ${{ inputs.example }} failure!!!"
|
||||
.github/workflows/scripts/k8s-utils.sh dump_all_pod_logs $NAMESPACE
|
||||
echo "Check the logs in 'Dump logs when e2e test failed' step!!!"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
- name: Dump logs when e2e test failed
|
||||
if: failure()
|
||||
run: |
|
||||
.github/workflows/scripts/k8s-utils.sh dump_all_pod_logs $NAMESPACE
|
||||
|
||||
- name: Kubectl uninstall
|
||||
if: always()
|
||||
run: |
|
||||
|
||||
9
.github/workflows/_run-docker-compose.yml
vendored
@@ -118,6 +118,9 @@ jobs:
|
||||
GOOGLE_CSE_ID: ${{ secrets.GOOGLE_CSE_ID }}
|
||||
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
|
||||
PINECONE_KEY: ${{ secrets.PINECONE_KEY }}
|
||||
PINECONE_KEY_LANGCHAIN_TEST: ${{ secrets.PINECONE_KEY_LANGCHAIN_TEST }}
|
||||
SDK_BASE_URL: ${{ secrets.SDK_BASE_URL }}
|
||||
SERVING_TOKEN: ${{ secrets.SERVING_TOKEN }}
|
||||
IMAGE_REPO: ${{ inputs.registry }}
|
||||
IMAGE_TAG: ${{ inputs.tag }}
|
||||
example: ${{ inputs.example }}
|
||||
@@ -138,7 +141,11 @@ jobs:
|
||||
flag=${flag#test_}
|
||||
yaml_file=$(find . -type f -wholename "*${{ inputs.hardware }}/${flag}.yaml")
|
||||
echo $yaml_file
|
||||
docker compose -f $yaml_file stop && docker compose -f $yaml_file rm -f || true
|
||||
container_list=$(cat $yaml_file | grep container_name | cut -d':' -f2)
|
||||
for container_name in $container_list; do
|
||||
cid=$(docker ps -aq --filter "name=$container_name")
|
||||
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
|
||||
done
|
||||
docker system prune -f
|
||||
docker rmi $(docker images --filter reference="*:5000/*/*" -q) || true
|
||||
|
||||
|
||||
35
.github/workflows/check-online-doc-build.yml
vendored
Normal file
@@ -0,0 +1,35 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
name: Check Online Document Building
|
||||
permissions: {}
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [main]
|
||||
paths:
|
||||
- "**.md"
|
||||
- "**.rst"
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
path: GenAIExamples
|
||||
|
||||
- name: Checkout docs
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
repository: opea-project/docs
|
||||
path: docs
|
||||
|
||||
- name: Build Online Document
|
||||
shell: bash
|
||||
run: |
|
||||
echo "build online doc"
|
||||
cd docs
|
||||
bash scripts/build.sh
|
||||
16
.github/workflows/manual-docker-publish.yml
vendored
@@ -11,23 +11,23 @@ on:
|
||||
required: true
|
||||
type: string
|
||||
examples:
|
||||
default: "Translation"
|
||||
description: 'List of examples to publish [AudioQnA,ChatQnA,CodeGen,CodeTrans,DocSum,FaqGen,SearchQnA,Translation]'
|
||||
default: ""
|
||||
description: 'List of examples to publish [AgentQnA,AudioQnA,ChatQnA,CodeGen,CodeTrans,DocIndexRetriever,DocSum,FaqGen,InstructionTuning,MultimodalQnA,ProductivitySuite,RerankFinetuning,SearchQnA,Translation,VideoQnA,VisualQnA]'
|
||||
required: false
|
||||
type: string
|
||||
images:
|
||||
default: "gmcmanager,gmcrouter"
|
||||
description: 'List of images to publish [gmcmanager,gmcrouter, ...]'
|
||||
default: ""
|
||||
description: 'List of images to publish [gmcmanager,gmcrouter]'
|
||||
required: false
|
||||
type: string
|
||||
tag:
|
||||
default: "v0.9"
|
||||
description: "Tag to publish"
|
||||
default: "rc"
|
||||
description: "Tag to publish, like [1.0rc]"
|
||||
required: true
|
||||
type: string
|
||||
publish_tags:
|
||||
default: "latest,v0.9"
|
||||
description: 'Tag list apply to publish images'
|
||||
default: "latest,1.x"
|
||||
description: "Tag list apply to publish images, like [latest,1.0]"
|
||||
required: false
|
||||
type: string
|
||||
|
||||
|
||||
8
.github/workflows/manual-docker-scan.yml
vendored
@@ -11,13 +11,13 @@ on:
|
||||
required: true
|
||||
type: string
|
||||
examples:
|
||||
default: "ChatQnA"
|
||||
description: 'List of examples to scan [AudioQnA,ChatQnA,CodeGen,CodeTrans,DocSum,FaqGen,SearchQnA,Translation]'
|
||||
default: ""
|
||||
description: 'List of examples to publish "AgentQnA,AudioQnA,ChatQnA,CodeGen,CodeTrans,DocIndexRetriever,DocSum,FaqGen,InstructionTuning,MultimodalQnA,ProductivitySuite,RerankFinetuning,SearchQnA,Translation,VideoQnA,VisualQnA"'
|
||||
required: false
|
||||
type: string
|
||||
images:
|
||||
default: "gmcmanager,gmcrouter"
|
||||
description: 'List of images to scan [gmcmanager,gmcrouter, ...]'
|
||||
default: ""
|
||||
description: 'List of images to publish "gmcmanager,gmcrouter"'
|
||||
required: false
|
||||
type: string
|
||||
tag:
|
||||
|
||||
@@ -50,6 +50,11 @@ on:
|
||||
description: 'OPEA branch for image build'
|
||||
required: false
|
||||
type: string
|
||||
inject_commit:
|
||||
default: true
|
||||
description: "inject commit to docker images true or false"
|
||||
required: false
|
||||
type: string
|
||||
|
||||
permissions: read-all
|
||||
jobs:
|
||||
@@ -101,4 +106,5 @@ jobs:
|
||||
test_k8s: ${{ fromJSON(inputs.test_k8s) }}
|
||||
test_gmc: ${{ fromJSON(inputs.test_gmc) }}
|
||||
opea_branch: ${{ inputs.opea_branch }}
|
||||
inject_commit: ${{ inputs.inject_commit }}
|
||||
secrets: inherit
|
||||
|
||||
15
.github/workflows/manual-freeze-tag.yml
vendored
@@ -1,13 +1,13 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
name: Freeze OPEA images release tag in readme on manual event
|
||||
name: Freeze OPEA images release tag
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
tag:
|
||||
default: "latest"
|
||||
default: "1.1.0"
|
||||
description: "Tag to apply to images"
|
||||
required: true
|
||||
type: string
|
||||
@@ -23,10 +23,6 @@ jobs:
|
||||
fetch-depth: 0
|
||||
ref: ${{ github.ref }}
|
||||
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.10"
|
||||
|
||||
- name: Set up Git
|
||||
run: |
|
||||
git config --global user.name "NeuralChatBot"
|
||||
@@ -35,9 +31,10 @@ jobs:
|
||||
|
||||
- name: Run script
|
||||
run: |
|
||||
find . -name "*.md" | xargs sed -i "s|^docker\ compose|TAG=${{ github.event.inputs.tag }}\ docker\ compose|g"
|
||||
find . -type f -name "*.yaml" \( -path "*/benchmark/*" -o -path "*/kubernetes/*" \) | xargs sed -i -E 's/(opea\/[A-Za-z0-9\-]*:)latest/\1${{ github.event.inputs.tag }}/g'
|
||||
find . -type f -name "*.md" \( -path "*/benchmark/*" -o -path "*/kubernetes/*" \) | xargs sed -i -E 's/(opea\/[A-Za-z0-9\-]*:)latest/\1${{ github.event.inputs.tag }}/g'
|
||||
IFS='.' read -r major minor patch <<< "${{ github.event.inputs.tag }}"
|
||||
echo "VERSION_MAJOR ${major}" > version.txt
|
||||
echo "VERSION_MINOR ${minor}" >> version.txt
|
||||
echo "VERSION_PATCH ${patch}" >> version.txt
|
||||
|
||||
- name: Commit changes
|
||||
run: |
|
||||
|
||||
66
.github/workflows/manual-image-build.yml
vendored
Normal file
@@ -0,0 +1,66 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
name: Build specific images on manual event
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
nodes:
|
||||
default: "gaudi,xeon"
|
||||
description: "Hardware to run test"
|
||||
required: true
|
||||
type: string
|
||||
example:
|
||||
default: "ChatQnA"
|
||||
description: 'Build images belong to which example?'
|
||||
required: true
|
||||
type: string
|
||||
services:
|
||||
default: "chatqna,chatqna-without-rerank"
|
||||
description: 'Service list to build'
|
||||
required: true
|
||||
type: string
|
||||
tag:
|
||||
default: "latest"
|
||||
description: "Tag to apply to images"
|
||||
required: true
|
||||
type: string
|
||||
opea_branch:
|
||||
default: "main"
|
||||
description: 'OPEA branch for image build'
|
||||
required: false
|
||||
type: string
|
||||
inject_commit:
|
||||
default: true
|
||||
description: "inject commit to docker images true or false"
|
||||
required: false
|
||||
type: string
|
||||
|
||||
jobs:
|
||||
get-test-matrix:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
nodes: ${{ steps.get-matrix.outputs.nodes }}
|
||||
steps:
|
||||
- name: Create Matrix
|
||||
id: get-matrix
|
||||
run: |
|
||||
nodes=($(echo ${{ inputs.nodes }} | tr ',' ' '))
|
||||
nodes_json=$(printf '%s\n' "${nodes[@]}" | sort -u | jq -R '.' | jq -sc '.')
|
||||
echo "nodes=$nodes_json" >> $GITHUB_OUTPUT
|
||||
|
||||
image-build:
|
||||
needs: get-test-matrix
|
||||
strategy:
|
||||
matrix:
|
||||
node: ${{ fromJson(needs.get-test-matrix.outputs.nodes) }}
|
||||
fail-fast: false
|
||||
uses: ./.github/workflows/_example-workflow.yml
|
||||
with:
|
||||
node: ${{ matrix.node }}
|
||||
example: ${{ inputs.example }}
|
||||
services: ${{ inputs.services }}
|
||||
tag: ${{ inputs.tag }}
|
||||
opea_branch: ${{ inputs.opea_branch }}
|
||||
inject_commit: ${{ inputs.inject_commit }}
|
||||
secrets: inherit
|
||||
70
.github/workflows/nightly-docker-build-publish.yml
vendored
Normal file
@@ -0,0 +1,70 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
name: Nightly build/publish latest docker images
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: "30 13 * * *" # UTC time
|
||||
workflow_dispatch:
|
||||
|
||||
env:
|
||||
EXAMPLES: "AgentQnA,AudioQnA,ChatQnA,CodeGen,CodeTrans,DocIndexRetriever,DocSum,FaqGen,InstructionTuning,MultimodalQnA,ProductivitySuite,RerankFinetuning,SearchQnA,Translation,VideoQnA,VisualQnA"
|
||||
TAG: "latest"
|
||||
PUBLISH_TAGS: "latest"
|
||||
|
||||
jobs:
|
||||
get-build-matrix:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
examples_json: ${{ steps.get-matrix.outputs.examples_json }}
|
||||
EXAMPLES: ${{ steps.get-matrix.outputs.EXAMPLES }}
|
||||
TAG: ${{ steps.get-matrix.outputs.TAG }}
|
||||
PUBLISH_TAGS: ${{ steps.get-matrix.outputs.PUBLISH_TAGS }}
|
||||
steps:
|
||||
- name: Create Matrix
|
||||
id: get-matrix
|
||||
run: |
|
||||
examples=($(echo ${EXAMPLES} | tr ',' ' '))
|
||||
examples_json=$(printf '%s\n' "${examples[@]}" | sort -u | jq -R '.' | jq -sc '.')
|
||||
echo "examples_json=$examples_json" >> $GITHUB_OUTPUT
|
||||
echo "EXAMPLES=$EXAMPLES" >> $GITHUB_OUTPUT
|
||||
echo "TAG=$TAG" >> $GITHUB_OUTPUT
|
||||
echo "PUBLISH_TAGS=$PUBLISH_TAGS" >> $GITHUB_OUTPUT
|
||||
|
||||
build:
|
||||
needs: get-build-matrix
|
||||
strategy:
|
||||
matrix:
|
||||
example: ${{ fromJSON(needs.get-build-matrix.outputs.examples_json) }}
|
||||
fail-fast: false
|
||||
uses: ./.github/workflows/_example-workflow.yml
|
||||
with:
|
||||
node: gaudi
|
||||
example: ${{ matrix.example }}
|
||||
secrets: inherit
|
||||
|
||||
get-image-list:
|
||||
needs: get-build-matrix
|
||||
uses: ./.github/workflows/_get-image-list.yml
|
||||
with:
|
||||
examples: ${{ needs.get-build-matrix.outputs.EXAMPLES }}
|
||||
|
||||
publish:
|
||||
needs: [get-build-matrix, get-image-list, build]
|
||||
strategy:
|
||||
matrix:
|
||||
image: ${{ fromJSON(needs.get-image-list.outputs.matrix) }}
|
||||
runs-on: "docker-build-gaudi"
|
||||
steps:
|
||||
- uses: docker/login-action@v3.2.0
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USER }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Image Publish
|
||||
uses: opea-project/validation/actions/image-publish@main
|
||||
with:
|
||||
local_image_ref: ${OPEA_IMAGE_REPO}opea/${{ matrix.image }}:${{ needs.get-build-matrix.outputs.TAG }}
|
||||
image_name: opea/${{ matrix.image }}
|
||||
publish_tags: ${{ needs.get-build-matrix.outputs.PUBLISH_TAGS }}
|
||||
50
.github/workflows/pr-bum_list_check.yml
vendored
@@ -1,50 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
name: Check Requirements
|
||||
|
||||
on: [pull_request]
|
||||
|
||||
jobs:
|
||||
check-requirements:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout PR branch
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Save PR requirements
|
||||
run: |
|
||||
find . -name "requirements.txt" -exec cat {} \; | \
|
||||
grep -v '^\s*#' | \
|
||||
grep -v '^\s*$' | \
|
||||
grep -v '^\s*-' | \
|
||||
sed 's/^\s*//' | \
|
||||
awk -F'[>=<]' '{print $1}' | \
|
||||
sort -u > pr-requirements.txt
|
||||
cat pr-requirements.txt
|
||||
|
||||
- name: Checkout main branch
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: main
|
||||
path: main-branch
|
||||
|
||||
- name: Save main branch requirements
|
||||
run: |
|
||||
find ./main-branch -name "requirements.txt" -exec cat {} \; | \
|
||||
grep -v '^\s*#' | \
|
||||
grep -v '^\s*$' | \
|
||||
grep -v '^\s*-' | \
|
||||
sed 's/^\s*//' | \
|
||||
awk -F'[>=<]' '{print $1}' | \
|
||||
sort -u > main-requirements.txt
|
||||
cat main-requirements.txt
|
||||
|
||||
- name: Compare requirements
|
||||
run: |
|
||||
comm -23 pr-requirements.txt main-requirements.txt > added-packages.txt
|
||||
if [ -s added-packages.txt ]; then
|
||||
echo "New packages found in PR:" && cat added-packages.txt
|
||||
else
|
||||
echo "No new packages found😊."
|
||||
fi
|
||||
2
.github/workflows/pr-gmc-e2e.yaml
vendored
@@ -12,7 +12,7 @@ on:
|
||||
- "**/tests/test_gmc**"
|
||||
- "!**.md"
|
||||
- "!**.txt"
|
||||
- "!**/kubernetes/**/manifests/**"
|
||||
- "!**/kubernetes/**/manifest/**"
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
|
||||
4
.github/workflows/pr-manifest-e2e.yml
vendored
@@ -8,7 +8,9 @@ on:
|
||||
branches: ["main", "*rc"]
|
||||
types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
|
||||
paths:
|
||||
- "**/kubernetes/**/manifests/**"
|
||||
- "**/Dockerfile**"
|
||||
- "**.py"
|
||||
- "**/kubernetes/**/manifest/**"
|
||||
- "**/tests/test_manifest**"
|
||||
- "!**.md"
|
||||
- "!**.txt"
|
||||
|
||||
54
.github/workflows/pr-manifest-validate.yml
vendored
@@ -1,54 +0,0 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
name: Manifests Validate
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [main]
|
||||
types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
|
||||
paths:
|
||||
- "**/kubernetes/manifests/**"
|
||||
- .github/workflows/manifest-validate.yml
|
||||
workflow_dispatch:
|
||||
|
||||
# If there is a new commit, the previous jobs will be canceled
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
MANIFEST_DIR: "manifests"
|
||||
|
||||
jobs:
|
||||
manifests-validate:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout out Repo
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: changed files
|
||||
id: changed_files
|
||||
run: |
|
||||
set -xe
|
||||
changed_folder=$(git diff --name-only ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }} | \
|
||||
grep "kubernetes/manifests" | grep -vE '.github|README.md|*.txt|*.sh' | cut -d'/' -f1 | sort -u )
|
||||
echo "changed_folder: $changed_folder"
|
||||
if [ -z "$changed_folder" ]; then
|
||||
echo "No changes in manifests folder"
|
||||
echo "SKIP=true" >> $GITHUB_OUTPUT
|
||||
exit 0
|
||||
fi
|
||||
echo "SKIP=false" >> $GITHUB_OUTPUT
|
||||
for folder in $changed_folder; do
|
||||
folder_str="$folder_str $folder/kubernetes/manifests/"
|
||||
done
|
||||
echo "folder_str=$folder_str"
|
||||
echo "folder_str=$folder_str" >> $GITHUB_ENV
|
||||
|
||||
- uses: docker://ghcr.io/yannh/kubeconform:latest
|
||||
if: steps.changed_files.outputs.SKIP == 'false'
|
||||
with:
|
||||
args: "-summary -output json ${{env.folder_str}}"
|
||||
88
.github/workflows/pr-path-detection.yml
vendored
@@ -50,28 +50,40 @@ jobs:
|
||||
|
||||
- name: Checkout Repo GenAIExamples
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Check the Validity of Hyperlinks
|
||||
run: |
|
||||
cd ${{github.workspace}}
|
||||
fail="FALSE"
|
||||
url_lines=$(grep -Eo '\]\(http[s]?://[^)]+\)' --include='*.md' -r .)
|
||||
if [ -n "$url_lines" ]; then
|
||||
for url_line in $url_lines; do
|
||||
url=$(echo "$url_line"|cut -d '(' -f2 | cut -d ')' -f1|sed 's/\.git$//')
|
||||
path=$(echo "$url_line"|cut -d':' -f1 | cut -d'/' -f2-)
|
||||
response=$(curl -L -s -o /dev/null -w "%{http_code}" "$url")
|
||||
if [ "$response" -ne 200 ]; then
|
||||
echo "**********Validation failed, try again**********"
|
||||
response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url")
|
||||
if [ "$response_retry" -eq 200 ]; then
|
||||
echo "*****Retry successfully*****"
|
||||
else
|
||||
echo "Invalid link from ${{github.workspace}}/$path: $url"
|
||||
fail="TRUE"
|
||||
fi
|
||||
merged_commit=$(git log -1 --format='%H')
|
||||
changed_files="$(git diff --name-status --diff-filter=ARM ${{ github.event.pull_request.base.sha }} ${merged_commit} | awk '/\.md$/ {print $NF}')"
|
||||
if [ -n "$changed_files" ]; then
|
||||
for changed_file in $changed_files; do
|
||||
# echo $changed_file
|
||||
url_lines=$(grep -H -Eo '\]\(http[s]?://[^)]+\)' "$changed_file" | grep -Ev 'GenAIExamples/blob/main') || true
|
||||
if [ -n "$url_lines" ]; then
|
||||
for url_line in $url_lines; do
|
||||
# echo $url_line
|
||||
url=$(echo "$url_line"|cut -d '(' -f2 | cut -d ')' -f1|sed 's/\.git$//')
|
||||
path=$(echo "$url_line"|cut -d':' -f1 | cut -d'/' -f2-)
|
||||
response=$(curl -L -s -o /dev/null -w "%{http_code}" "$url")|| true
|
||||
if [ "$response" -ne 200 ]; then
|
||||
echo "**********Validation failed, try again**********"
|
||||
response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url")
|
||||
if [ "$response_retry" -eq 200 ]; then
|
||||
echo "*****Retry successfully*****"
|
||||
else
|
||||
echo "Invalid link from ${{github.workspace}}/$path: $url"
|
||||
fail="TRUE"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
fi
|
||||
done
|
||||
else
|
||||
echo "No changed .md file."
|
||||
fi
|
||||
|
||||
if [[ "$fail" == "TRUE" ]]; then
|
||||
@@ -89,6 +101,8 @@ jobs:
|
||||
|
||||
- name: Checkout Repo GenAIExamples
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Checking Relative Path Validity
|
||||
run: |
|
||||
@@ -102,33 +116,34 @@ jobs:
|
||||
branch="https://github.com/opea-project/GenAIExamples/blob/${{ github.event.pull_request.head.ref }}"
|
||||
fi
|
||||
link_head="https://github.com/opea-project/GenAIExamples/blob/main"
|
||||
|
||||
merged_commit=$(git log -1 --format='%H')
|
||||
changed_files="$(git diff --name-status --diff-filter=ARM ${{ github.event.pull_request.base.sha }} ${merged_commit} | awk '/\.md$/ {print $NF}')"
|
||||
png_lines=$(grep -Eo '\]\([^)]+\)' --include='*.md' -r .|grep -Ev 'http')
|
||||
if [ -n "$png_lines" ]; then
|
||||
for png_line in $png_lines; do
|
||||
refer_path=$(echo "$png_line"|cut -d':' -f1 | cut -d'/' -f2-)
|
||||
png_path=$(echo "$png_line"|cut -d '(' -f2 | cut -d ')' -f1)
|
||||
|
||||
if [[ "${png_path:0:1}" == "/" ]]; then
|
||||
check_path=${{github.workspace}}$png_path
|
||||
elif [[ "${png_path:0:1}" == "#" ]]; then
|
||||
check_path=${{github.workspace}}/$refer_path$png_path
|
||||
check_path=$png_path
|
||||
elif [[ "$png_path" == *#* ]]; then
|
||||
relative_path=$(echo "$png_path" | cut -d '#' -f1)
|
||||
if [ -n "$relative_path" ]; then
|
||||
check_path=$(dirname "$refer_path")/$relative_path
|
||||
png_path=$(echo "$png_path" | awk -F'#' '{print "#" $2}')
|
||||
else
|
||||
check_path=$refer_path
|
||||
fi
|
||||
else
|
||||
check_path=${{github.workspace}}/$(dirname "$refer_path")/$png_path
|
||||
check_path=$(dirname "$refer_path")/$png_path
|
||||
fi
|
||||
real_path=$(realpath $check_path)
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Path $png_path in file ${{github.workspace}}/$refer_path does not exist"
|
||||
fail="TRUE"
|
||||
else
|
||||
url=$link_head$(echo "$real_path" | sed 's|.*/GenAIExamples||')
|
||||
response=$(curl -I -L -s -o /dev/null -w "%{http_code}" "$url")
|
||||
if [ "$response" -ne 200 ]; then
|
||||
echo "**********Validation failed, try again**********"
|
||||
response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url")
|
||||
if [ "$response_retry" -eq 200 ]; then
|
||||
echo "*****Retry successfully*****"
|
||||
else
|
||||
echo "Retry failed. Check branch ${{ github.event.pull_request.head.ref }}"
|
||||
url_dev=$branch$(echo "$real_path" | sed 's|.*/GenAIExamples||')
|
||||
|
||||
if [ -e "$check_path" ]; then
|
||||
real_path=$(realpath $check_path)
|
||||
if [[ "$png_line" == *#* ]]; then
|
||||
if [ -n "changed_files" ] && echo "$changed_files" | grep -q "^${refer_path}$"; then
|
||||
url_dev=$branch$(echo "$real_path" | sed 's|.*/GenAIExamples||')$png_path
|
||||
response=$(curl -I -L -s -o /dev/null -w "%{http_code}" "$url_dev")
|
||||
if [ "$response" -ne 200 ]; then
|
||||
echo "**********Validation failed, try again**********"
|
||||
@@ -140,10 +155,13 @@ jobs:
|
||||
fail="TRUE"
|
||||
fi
|
||||
else
|
||||
echo "Check branch ${{ github.event.pull_request.head.ref }} successfully."
|
||||
echo "Validation succeed $png_line"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
else
|
||||
echo "${{github.workspace}}/$refer_path:$png_path does not exist"
|
||||
fail="TRUE"
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
12
.github/workflows/push-image-build.yml
vendored
@@ -8,8 +8,8 @@ on:
|
||||
branches: [ 'main' ]
|
||||
paths:
|
||||
- "**.py"
|
||||
- "**Dockerfile"
|
||||
workflow_dispatch:
|
||||
- "**Dockerfile*"
|
||||
- "**docker_image_build/build.yaml"
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}-on-push
|
||||
@@ -19,17 +19,15 @@ jobs:
|
||||
job1:
|
||||
uses: ./.github/workflows/_get-test-matrix.yml
|
||||
with:
|
||||
test_mode: "docker_image_build/build.yaml"
|
||||
test_mode: "docker_image_build"
|
||||
|
||||
image-build:
|
||||
needs: job1
|
||||
strategy:
|
||||
matrix:
|
||||
example: ${{ fromJSON(needs.job1.outputs.run_matrix).include.*.example }}
|
||||
node: ["gaudi","xeon"]
|
||||
matrix: ${{ fromJSON(needs.job1.outputs.run_matrix) }}
|
||||
fail-fast: false
|
||||
uses: ./.github/workflows/_example-workflow.yml
|
||||
with:
|
||||
node: ${{ matrix.node }}
|
||||
node: ${{ matrix.hardware }}
|
||||
example: ${{ matrix.example }}
|
||||
secrets: inherit
|
||||
|
||||
10
.github/workflows/scripts/get_test_matrix.sh
vendored
@@ -9,12 +9,20 @@ set -e
|
||||
changed_files=$changed_files
|
||||
test_mode=$test_mode
|
||||
run_matrix="{\"include\":["
|
||||
hardware_list="xeon gaudi" # current support hardware list
|
||||
|
||||
examples=$(printf '%s\n' "${changed_files[@]}" | grep '/' | cut -d'/' -f1 | sort -u)
|
||||
for example in ${examples}; do
|
||||
cd $WORKSPACE/$example
|
||||
if [[ ! $(find . -type f | grep ${test_mode}) ]]; then continue; fi
|
||||
cd tests
|
||||
ls -l
|
||||
if [[ "$test_mode" == "docker_image_build" ]]; then
|
||||
find_name="test_manifest_on_*.sh"
|
||||
else
|
||||
find_name="test_${test_mode}*_on_*.sh"
|
||||
fi
|
||||
hardware_list=$(find . -type f -name "${find_name}" | cut -d/ -f2 | cut -d. -f1 | awk -F'_on_' '{print $2}'| sort -u)
|
||||
echo -e "Test supported hardware list: \n${hardware_list}"
|
||||
|
||||
run_hardware=""
|
||||
if [[ $(printf '%s\n' "${changed_files[@]}" | grep ${example} | cut -d'/' -f2 | grep -E '*.py|Dockerfile*|ui|docker_image_build' ) ]]; then
|
||||
|
||||
2
.gitignore
vendored
@@ -5,4 +5,4 @@
|
||||
**/playwright/.cache/
|
||||
**/test-results/
|
||||
|
||||
__pycache__/
|
||||
__pycache__/
|
||||
|
||||
@@ -18,8 +18,6 @@ repos:
|
||||
SearchQnA/ui/svelte/tsconfig.json|
|
||||
DocSum/ui/svelte/tsconfig.json
|
||||
)$
|
||||
- id: check-yaml
|
||||
args: [--allow-multiple-documents]
|
||||
- id: debug-statements
|
||||
- id: requirements-txt-fixer
|
||||
- id: trailing-whitespace
|
||||
@@ -81,7 +79,7 @@ repos:
|
||||
- id: isort
|
||||
|
||||
- repo: https://github.com/PyCQA/docformatter
|
||||
rev: v1.7.5
|
||||
rev: 06907d0
|
||||
hooks:
|
||||
- id: docformatter
|
||||
args: [
|
||||
|
||||
@@ -1 +1 @@
|
||||
**/kubernetes/
|
||||
**/kubernetes/
|
||||
|
||||
16
.set_env.sh
Normal file
@@ -0,0 +1,16 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
#To anounce the version of the codes, please create a version.txt and have following format.
|
||||
#VERSION_MAJOR 1
|
||||
#VERSION_MINOR 0
|
||||
#VERSION_PATCH 0
|
||||
|
||||
VERSION_FILE="version.txt"
|
||||
if [ -f $VERSION_FILE ]; then
|
||||
VER_OPEA_MAJOR=$(grep "VERSION_MAJOR" $VERSION_FILE | cut -d " " -f 2)
|
||||
VER_OPEA_MINOR=$(grep "VERSION_MINOR" $VERSION_FILE | cut -d " " -f 2)
|
||||
VER_OPEA_PATCH=$(grep "VERSION_PATCH" $VERSION_FILE | cut -d " " -f 2)
|
||||
export TAG=$VER_OPEA_MAJOR.$VER_OPEA_MINOR
|
||||
echo OPEA Version:$TAG
|
||||
fi
|
||||
@@ -5,6 +5,73 @@
|
||||
This example showcases a hierarchical multi-agent system for question-answering applications. The architecture diagram is shown below. The supervisor agent interfaces with the user and dispatch tasks to the worker agent and other tools to gather information and come up with answers. The worker agent uses the retrieval tool to generate answers to the queries posted by the supervisor agent. Other tools used by the supervisor agent may include APIs to interface knowledge graphs, SQL databases, external knowledge bases, etc.
|
||||

|
||||
|
||||
The AgentQnA example is implemented using the component-level microservices defined in [GenAIComps](https://github.com/opea-project/GenAIComps). The flow chart below shows the information flow between different microservices for this example.
|
||||
|
||||
```mermaid
|
||||
---
|
||||
config:
|
||||
flowchart:
|
||||
nodeSpacing: 400
|
||||
rankSpacing: 100
|
||||
curve: linear
|
||||
themeVariables:
|
||||
fontSize: 50px
|
||||
---
|
||||
flowchart LR
|
||||
%% Colors %%
|
||||
classDef blue fill:#ADD8E6,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
|
||||
classDef orange fill:#FBAA60,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
|
||||
classDef orchid fill:#C26DBC,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
|
||||
classDef invisible fill:transparent,stroke:transparent;
|
||||
|
||||
%% Subgraphs %%
|
||||
subgraph DocIndexRetriever-MegaService["DocIndexRetriever MegaService "]
|
||||
direction LR
|
||||
EM([Embedding MicroService]):::blue
|
||||
RET([Retrieval MicroService]):::blue
|
||||
RER([Rerank MicroService]):::blue
|
||||
end
|
||||
subgraph UserInput[" User Input "]
|
||||
direction LR
|
||||
a([User Input Query]):::orchid
|
||||
Ingest([Ingest data]):::orchid
|
||||
end
|
||||
AG_REACT([Agent MicroService - react]):::blue
|
||||
AG_RAG([Agent MicroService - rag]):::blue
|
||||
LLM_gen{{LLM Service <br>}}
|
||||
DP([Data Preparation MicroService]):::blue
|
||||
TEI_RER{{Reranking service<br>}}
|
||||
TEI_EM{{Embedding service <br>}}
|
||||
VDB{{Vector DB<br><br>}}
|
||||
R_RET{{Retriever service <br>}}
|
||||
|
||||
|
||||
|
||||
%% Questions interaction
|
||||
direction LR
|
||||
a[User Input Query] --> AG_REACT
|
||||
AG_REACT --> AG_RAG
|
||||
AG_RAG --> DocIndexRetriever-MegaService
|
||||
EM ==> RET
|
||||
RET ==> RER
|
||||
Ingest[Ingest data] --> DP
|
||||
|
||||
%% Embedding service flow
|
||||
direction LR
|
||||
AG_RAG <-.-> LLM_gen
|
||||
AG_REACT <-.-> LLM_gen
|
||||
EM <-.-> TEI_EM
|
||||
RET <-.-> R_RET
|
||||
RER <-.-> TEI_RER
|
||||
|
||||
direction TB
|
||||
%% Vector DB interaction
|
||||
R_RET <-.-> VDB
|
||||
DP <-.-> VDB
|
||||
|
||||
|
||||
```
|
||||
|
||||
### Why Agent for question answering?
|
||||
|
||||
1. Improve relevancy of retrieved context.
|
||||
@@ -14,72 +81,122 @@ This example showcases a hierarchical multi-agent system for question-answering
|
||||
3. Hierarchical agent can further improve performance.
|
||||
Expert worker agents, such as retrieval agent, knowledge graph agent, SQL agent, etc., can provide high-quality output for different aspects of a complex query, and the supervisor agent can aggregate the information together to provide a comprehensive answer.
|
||||
|
||||
### Roadmap
|
||||
## Deployment with docker
|
||||
|
||||
- v0.9: Worker agent uses open-source websearch tool (duckduckgo), agents use OpenAI GPT-4o-mini as llm backend.
|
||||
- v1.0: Worker agent uses OPEA retrieval megaservice as tool.
|
||||
- v1.0 or later: agents use open-source llm backend.
|
||||
- v1.1 or later: add safeguards
|
||||
1. Build agent docker image [Optional]
|
||||
|
||||
## Getting started
|
||||
> [!NOTE]
|
||||
> the step is optional. The docker images will be automatically pulled when running the docker compose commands. This step is only needed if pulling images failed.
|
||||
|
||||
1. Build agent docker image </br>
|
||||
First, clone the opea GenAIComps repo
|
||||
First, clone the opea GenAIComps repo.
|
||||
|
||||
```
|
||||
export WORKDIR=<your-work-directory>
|
||||
cd $WORKDIR
|
||||
git clone https://github.com/opea-project/GenAIComps.git
|
||||
```
|
||||
|
||||
Then build the agent docker image. Both the supervisor agent and the worker agent will use the same docker image, but when we launch the two agents we will specify different strategies and register different tools.
|
||||
|
||||
```
|
||||
cd GenAIComps
|
||||
docker build -t opea/agent-langchain:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/agent/langchain/Dockerfile .
|
||||
```
|
||||
|
||||
2. Set up environment for this example </br>
|
||||
|
||||
First, clone this repo.
|
||||
|
||||
```
|
||||
export WORKDIR=<your-work-directory>
|
||||
cd $WORKDIR
|
||||
git clone https://github.com/opea-project/GenAIComps.git
|
||||
git clone https://github.com/opea-project/GenAIExamples.git
|
||||
```
|
||||
|
||||
Then build the agent docker image. Both the supervisor agent and the worker agent will use the same docker image, but when we launch the two agents we will specify different strategies and register different tools.
|
||||
Second, set up env vars.
|
||||
|
||||
```
|
||||
cd GenAIComps
|
||||
docker build -t opea/agent-langchain:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/agent/langchain/Dockerfile .
|
||||
# Example: host_ip="192.168.1.1" or export host_ip="External_Public_IP"
|
||||
export host_ip=$(hostname -I | awk '{print $1}')
|
||||
# if you are in a proxy environment, also set the proxy-related environment variables
|
||||
export http_proxy="Your_HTTP_Proxy"
|
||||
export https_proxy="Your_HTTPs_Proxy"
|
||||
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
|
||||
export no_proxy="Your_No_Proxy"
|
||||
|
||||
export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
|
||||
# for using open-source llms
|
||||
export HUGGINGFACEHUB_API_TOKEN=<your-HF-token>
|
||||
export HF_CACHE_DIR=<directory-where-llms-are-downloaded> #so that no need to redownload every time
|
||||
|
||||
# optional: OPANAI_API_KEY if you want to use OpenAI models
|
||||
export OPENAI_API_KEY=<your-openai-key>
|
||||
```
|
||||
|
||||
2. Launch tool services </br>
|
||||
3. Deploy the retrieval tool (i.e., DocIndexRetriever mega-service)
|
||||
|
||||
First, launch the mega-service.
|
||||
|
||||
```
|
||||
cd $WORKDIR/GenAIExamples/AgentQnA/retrieval_tool
|
||||
bash launch_retrieval_tool.sh
|
||||
```
|
||||
|
||||
Then, ingest data into the vector database. Here we provide an example. You can ingest your own data.
|
||||
|
||||
```
|
||||
bash run_ingest_data.sh
|
||||
```
|
||||
|
||||
4. Launch other tools. </br>
|
||||
In this example, we will use some of the mock APIs provided in the Meta CRAG KDD Challenge to demonstrate the benefits of gaining additional context from mock knowledge graphs.
|
||||
|
||||
```
|
||||
docker run -d -p=8080:8000 docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0
|
||||
```
|
||||
|
||||
3. Set up environment for this example </br>
|
||||
First, clone this repo
|
||||
5. Launch agent services</br>
|
||||
We provide two options for `llm_engine` of the agents: 1. open-source LLMs, 2. OpenAI models via API calls.
|
||||
|
||||
Deploy it on Gaudi or Xeon respectively
|
||||
|
||||
::::{tab-set}
|
||||
:::{tab-item} Gaudi
|
||||
:sync: Gaudi
|
||||
|
||||
To use open-source LLMs on Gaudi2, run commands below.
|
||||
|
||||
```
|
||||
cd $WORKDIR
|
||||
git clone https://github.com/opea-project/GenAIExamples.git
|
||||
cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/hpu/gaudi
|
||||
bash launch_tgi_gaudi.sh
|
||||
bash launch_agent_service_tgi_gaudi.sh
|
||||
```
|
||||
|
||||
Second, set up env vars
|
||||
:::
|
||||
:::{tab-item} Xeon
|
||||
:sync: Xeon
|
||||
|
||||
To use OpenAI models, run commands below.
|
||||
|
||||
```
|
||||
export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
|
||||
# optional: OPANAI_API_KEY
|
||||
export OPENAI_API_KEY=<your-openai-key>
|
||||
```
|
||||
|
||||
4. Launch agent services</br>
|
||||
The configurations of the supervisor agent and the worker agent are defined in the docker-compose yaml file. We currently use openAI GPT-4o-mini as LLM, and we plan to add support for llama3.1-70B-instruct (served by TGI-Gaudi) in a subsequent release.
|
||||
To use openai llm, run command below.
|
||||
|
||||
```
|
||||
cd docker_compose/intel/cpu/xeon
|
||||
cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/cpu/xeon
|
||||
bash launch_agent_service_openai.sh
|
||||
```
|
||||
|
||||
:::
|
||||
::::
|
||||
|
||||
## Validate services
|
||||
|
||||
First look at logs of the agent docker containers:
|
||||
|
||||
```
|
||||
docker logs docgrader-agent-endpoint
|
||||
# worker agent
|
||||
docker logs rag-agent-endpoint
|
||||
```
|
||||
|
||||
```
|
||||
# supervisor agent
|
||||
docker logs react-agent-endpoint
|
||||
```
|
||||
|
||||
@@ -88,7 +205,7 @@ You should see something like "HTTP server setup successful" if the docker conta
|
||||
Second, validate worker agent:
|
||||
|
||||
```
|
||||
curl http://${ip_address}:9095/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
|
||||
curl http://${host_ip}:9095/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
|
||||
"query": "Most recent album by Taylor Swift"
|
||||
}'
|
||||
```
|
||||
@@ -96,11 +213,11 @@ curl http://${ip_address}:9095/v1/chat/completions -X POST -H "Content-Type: app
|
||||
Third, validate supervisor agent:
|
||||
|
||||
```
|
||||
curl http://${ip_address}:9090/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
|
||||
curl http://${host_ip}:9090/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
|
||||
"query": "Most recent album by Taylor Swift"
|
||||
}'
|
||||
```
|
||||
|
||||
## How to register your own tools with agent
|
||||
|
||||
You can take a look at the tools yaml and python files in this example. For more details, please refer to the "Provide your own tools" section in the instructions [here](https://github.com/opea-project/GenAIComps/tree/main/comps/agent/langchain#5-customize-agent-strategy).
|
||||
You can take a look at the tools yaml and python files in this example. For more details, please refer to the "Provide your own tools" section in the instructions [here](https://github.com/opea-project/GenAIComps/tree/main/comps/agent/langchain/README.md).
|
||||
|
||||
100
AgentQnA/docker_compose/intel/cpu/xeon/README.md
Normal file
@@ -0,0 +1,100 @@
|
||||
# Single node on-prem deployment with Docker Compose on Xeon Scalable processors
|
||||
|
||||
This example showcases a hierarchical multi-agent system for question-answering applications. We deploy the example on Xeon. For LLMs, we use OpenAI models via API calls. For instructions on using open-source LLMs, please refer to the deployment guide [here](../../../../README.md).
|
||||
|
||||
## Deployment with docker
|
||||
|
||||
1. First, clone this repo.
|
||||
```
|
||||
export WORKDIR=<your-work-directory>
|
||||
cd $WORKDIR
|
||||
git clone https://github.com/opea-project/GenAIExamples.git
|
||||
```
|
||||
2. Set up environment for this example </br>
|
||||
|
||||
```
|
||||
# Example: host_ip="192.168.1.1" or export host_ip="External_Public_IP"
|
||||
export host_ip=$(hostname -I | awk '{print $1}')
|
||||
# if you are in a proxy environment, also set the proxy-related environment variables
|
||||
export http_proxy="Your_HTTP_Proxy"
|
||||
export https_proxy="Your_HTTPs_Proxy"
|
||||
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
|
||||
export no_proxy="Your_No_Proxy"
|
||||
|
||||
export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
|
||||
#OPANAI_API_KEY if you want to use OpenAI models
|
||||
export OPENAI_API_KEY=<your-openai-key>
|
||||
```
|
||||
|
||||
3. Deploy the retrieval tool (i.e., DocIndexRetriever mega-service)
|
||||
|
||||
First, launch the mega-service.
|
||||
|
||||
```
|
||||
cd $WORKDIR/GenAIExamples/AgentQnA/retrieval_tool
|
||||
bash launch_retrieval_tool.sh
|
||||
```
|
||||
|
||||
Then, ingest data into the vector database. Here we provide an example. You can ingest your own data.
|
||||
|
||||
```
|
||||
bash run_ingest_data.sh
|
||||
```
|
||||
|
||||
4. Launch Tool service
|
||||
In this example, we will use some of the mock APIs provided in the Meta CRAG KDD Challenge to demonstrate the benefits of gaining additional context from mock knowledge graphs.
|
||||
```
|
||||
docker run -d -p=8080:8000 docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0
|
||||
```
|
||||
5. Launch `Agent` service
|
||||
|
||||
The configurations of the supervisor agent and the worker agent are defined in the docker-compose yaml file. We currently use openAI GPT-4o-mini as LLM, and llama3.1-70B-instruct (served by TGI-Gaudi) in Gaudi example. To use openai llm, run command below.
|
||||
|
||||
```
|
||||
cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/cpu/xeon
|
||||
bash launch_agent_service_openai.sh
|
||||
```
|
||||
|
||||
6. [Optional] Build `Agent` docker image if pulling images failed.
|
||||
|
||||
```
|
||||
git clone https://github.com/opea-project/GenAIComps.git
|
||||
cd GenAIComps
|
||||
docker build -t opea/agent-langchain:latest -f comps/agent/langchain/Dockerfile .
|
||||
```
|
||||
|
||||
## Validate services
|
||||
|
||||
First look at logs of the agent docker containers:
|
||||
|
||||
```
|
||||
# worker agent
|
||||
docker logs rag-agent-endpoint
|
||||
```
|
||||
|
||||
```
|
||||
# supervisor agent
|
||||
docker logs react-agent-endpoint
|
||||
```
|
||||
|
||||
You should see something like "HTTP server setup successful" if the docker containers are started successfully.</p>
|
||||
|
||||
Second, validate worker agent:
|
||||
|
||||
```
|
||||
curl http://${host_ip}:9095/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
|
||||
"query": "Most recent album by Taylor Swift"
|
||||
}'
|
||||
```
|
||||
|
||||
Third, validate supervisor agent:
|
||||
|
||||
```
|
||||
curl http://${host_ip}:9090/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
|
||||
"query": "Most recent album by Taylor Swift"
|
||||
}'
|
||||
```
|
||||
|
||||
## How to register your own tools with agent
|
||||
|
||||
You can take a look at the tools yaml and python files in this example. For more details, please refer to the "Provide your own tools" section in the instructions [here](https://github.com/opea-project/GenAIComps/tree/main/comps/agent/langchain/README.md).
|
||||
@@ -2,11 +2,10 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
services:
|
||||
worker-docgrader-agent:
|
||||
worker-rag-agent:
|
||||
image: opea/agent-langchain:latest
|
||||
container_name: docgrader-agent-endpoint
|
||||
container_name: rag-agent-endpoint
|
||||
volumes:
|
||||
- ${WORKDIR}/GenAIComps/comps/agent/langchain/:/home/user/comps/agent/langchain/
|
||||
- ${TOOLSET_PATH}:/home/user/tools/
|
||||
ports:
|
||||
- "9095:9095"
|
||||
@@ -36,8 +35,9 @@ services:
|
||||
supervisor-react-agent:
|
||||
image: opea/agent-langchain:latest
|
||||
container_name: react-agent-endpoint
|
||||
depends_on:
|
||||
- worker-rag-agent
|
||||
volumes:
|
||||
- ${WORKDIR}/GenAIComps/comps/agent/langchain/:/home/user/comps/agent/langchain/
|
||||
- ${TOOLSET_PATH}:/home/user/tools/
|
||||
ports:
|
||||
- "9090:9090"
|
||||
|
||||
@@ -1,13 +1,16 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
pushd "../../../../../" > /dev/null
|
||||
source .set_env.sh
|
||||
popd > /dev/null
|
||||
export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
|
||||
export ip_address=$(hostname -I | awk '{print $1}')
|
||||
export recursion_limit_worker=12
|
||||
export recursion_limit_supervisor=10
|
||||
export model="gpt-4o-mini-2024-07-18"
|
||||
export temperature=0
|
||||
export max_new_tokens=512
|
||||
export max_new_tokens=4096
|
||||
export OPENAI_API_KEY=${OPENAI_API_KEY}
|
||||
export WORKER_AGENT_URL="http://${ip_address}:9095/v1/chat/completions"
|
||||
export RETRIEVAL_TOOL_URL="http://${ip_address}:8889/v1/retrievaltool"
|
||||
|
||||
105
AgentQnA/docker_compose/intel/hpu/gaudi/README.md
Normal file
@@ -0,0 +1,105 @@
|
||||
# Single node on-prem deployment AgentQnA on Gaudi
|
||||
|
||||
This example showcases a hierarchical multi-agent system for question-answering applications. We deploy the example on Gaudi using open-source LLMs,
|
||||
For more details, please refer to the deployment guide [here](../../../../README.md).
|
||||
|
||||
## Deployment with docker
|
||||
|
||||
1. First, clone this repo.
|
||||
```
|
||||
export WORKDIR=<your-work-directory>
|
||||
cd $WORKDIR
|
||||
git clone https://github.com/opea-project/GenAIExamples.git
|
||||
```
|
||||
2. Set up environment for this example </br>
|
||||
|
||||
```
|
||||
# Example: host_ip="192.168.1.1" or export host_ip="External_Public_IP"
|
||||
export host_ip=$(hostname -I | awk '{print $1}')
|
||||
# if you are in a proxy environment, also set the proxy-related environment variables
|
||||
export http_proxy="Your_HTTP_Proxy"
|
||||
export https_proxy="Your_HTTPs_Proxy"
|
||||
# Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
|
||||
export no_proxy="Your_No_Proxy"
|
||||
|
||||
export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
|
||||
# for using open-source llms
|
||||
export HUGGINGFACEHUB_API_TOKEN=<your-HF-token>
|
||||
# Example export HF_CACHE_DIR=$WORKDIR so that no need to redownload every time
|
||||
export HF_CACHE_DIR=<directory-where-llms-are-downloaded>
|
||||
|
||||
```
|
||||
|
||||
3. Deploy the retrieval tool (i.e., DocIndexRetriever mega-service)
|
||||
|
||||
First, launch the mega-service.
|
||||
|
||||
```
|
||||
cd $WORKDIR/GenAIExamples/AgentQnA/retrieval_tool
|
||||
bash launch_retrieval_tool.sh
|
||||
```
|
||||
|
||||
Then, ingest data into the vector database. Here we provide an example. You can ingest your own data.
|
||||
|
||||
```
|
||||
bash run_ingest_data.sh
|
||||
```
|
||||
|
||||
4. Launch Tool service
|
||||
In this example, we will use some of the mock APIs provided in the Meta CRAG KDD Challenge to demonstrate the benefits of gaining additional context from mock knowledge graphs.
|
||||
```
|
||||
docker run -d -p=8080:8000 docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0
|
||||
```
|
||||
5. Launch `Agent` service
|
||||
|
||||
To use open-source LLMs on Gaudi2, run commands below.
|
||||
|
||||
```
|
||||
cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/hpu/gaudi
|
||||
bash launch_tgi_gaudi.sh
|
||||
bash launch_agent_service_tgi_gaudi.sh
|
||||
```
|
||||
|
||||
6. [Optional] Build `Agent` docker image if pulling images failed.
|
||||
|
||||
```
|
||||
git clone https://github.com/opea-project/GenAIComps.git
|
||||
cd GenAIComps
|
||||
docker build -t opea/agent-langchain:latest -f comps/agent/langchain/Dockerfile .
|
||||
```
|
||||
|
||||
## Validate services
|
||||
|
||||
First look at logs of the agent docker containers:
|
||||
|
||||
```
|
||||
# worker agent
|
||||
docker logs rag-agent-endpoint
|
||||
```
|
||||
|
||||
```
|
||||
# supervisor agent
|
||||
docker logs react-agent-endpoint
|
||||
```
|
||||
|
||||
You should see something like "HTTP server setup successful" if the docker containers are started successfully.</p>
|
||||
|
||||
Second, validate worker agent:
|
||||
|
||||
```
|
||||
curl http://${host_ip}:9095/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
|
||||
"query": "Most recent album by Taylor Swift"
|
||||
}'
|
||||
```
|
||||
|
||||
Third, validate supervisor agent:
|
||||
|
||||
```
|
||||
curl http://${host_ip}:9090/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
|
||||
"query": "Most recent album by Taylor Swift"
|
||||
}'
|
||||
```
|
||||
|
||||
## How to register your own tools with agent
|
||||
|
||||
You can take a look at the tools yaml and python files in this example. For more details, please refer to the "Provide your own tools" section in the instructions [here](https://github.com/opea-project/GenAIComps/tree/main/comps/agent/langchain/README.md).
|
||||
@@ -2,33 +2,9 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
services:
|
||||
tgi-server:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.4
|
||||
container_name: tgi-server
|
||||
ports:
|
||||
- "8085:80"
|
||||
volumes:
|
||||
- ${HF_CACHE_DIR}:/data
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
PT_HPU_ENABLE_LAZY_COLLECTIVES: true
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
ipc: host
|
||||
command: --model-id ${LLM_MODEL_ID} --max-input-length 4096 --max-total-tokens 8192 --sharded true --num-shard ${NUM_SHARDS}
|
||||
worker-docgrader-agent:
|
||||
worker-rag-agent:
|
||||
image: opea/agent-langchain:latest
|
||||
container_name: docgrader-agent-endpoint
|
||||
depends_on:
|
||||
- tgi-server
|
||||
container_name: rag-agent-endpoint
|
||||
volumes:
|
||||
# - ${WORKDIR}/GenAIExamples/AgentQnA/docker_image_build/GenAIComps/comps/agent/langchain/:/home/user/comps/agent/langchain/
|
||||
- ${TOOLSET_PATH}:/home/user/tools/
|
||||
@@ -37,7 +13,7 @@ services:
|
||||
ipc: host
|
||||
environment:
|
||||
ip_address: ${ip_address}
|
||||
strategy: rag_agent
|
||||
strategy: rag_agent_llama
|
||||
recursion_limit: ${recursion_limit_worker}
|
||||
llm_engine: tgi
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
@@ -62,8 +38,7 @@ services:
|
||||
image: opea/agent-langchain:latest
|
||||
container_name: react-agent-endpoint
|
||||
depends_on:
|
||||
- tgi-server
|
||||
- worker-docgrader-agent
|
||||
- worker-rag-agent
|
||||
volumes:
|
||||
# - ${WORKDIR}/GenAIExamples/AgentQnA/docker_image_build/GenAIComps/comps/agent/langchain/:/home/user/comps/agent/langchain/
|
||||
- ${TOOLSET_PATH}:/home/user/tools/
|
||||
@@ -72,7 +47,7 @@ services:
|
||||
ipc: host
|
||||
environment:
|
||||
ip_address: ${ip_address}
|
||||
strategy: react_langgraph
|
||||
strategy: react_llama
|
||||
recursion_limit: ${recursion_limit_supervisor}
|
||||
llm_engine: tgi
|
||||
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
pushd "../../../../../" > /dev/null
|
||||
source .set_env.sh
|
||||
popd > /dev/null
|
||||
WORKPATH=$(dirname "$PWD")/..
|
||||
# export WORKDIR=$WORKPATH/../../
|
||||
echo "WORKDIR=${WORKDIR}"
|
||||
@@ -15,7 +18,7 @@ export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-70B-Instruct"
|
||||
export NUM_SHARDS=4
|
||||
export LLM_ENDPOINT_URL="http://${ip_address}:8085"
|
||||
export temperature=0.01
|
||||
export max_new_tokens=512
|
||||
export max_new_tokens=4096
|
||||
|
||||
# agent related environment variables
|
||||
export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
|
||||
@@ -27,17 +30,3 @@ export RETRIEVAL_TOOL_URL="http://${ip_address}:8889/v1/retrievaltool"
|
||||
export CRAG_SERVER=http://${ip_address}:8080
|
||||
|
||||
docker compose -f compose.yaml up -d
|
||||
|
||||
sleep 5s
|
||||
echo "Waiting tgi gaudi ready"
|
||||
n=0
|
||||
until [[ "$n" -ge 100 ]] || [[ $ready == true ]]; do
|
||||
docker logs tgi-server &> tgi-gaudi-service.log
|
||||
n=$((n+1))
|
||||
if grep -q Connected tgi-gaudi-service.log; then
|
||||
break
|
||||
fi
|
||||
sleep 5s
|
||||
done
|
||||
sleep 5s
|
||||
echo "Service started successfully"
|
||||
|
||||
25
AgentQnA/docker_compose/intel/hpu/gaudi/launch_tgi_gaudi.sh
Normal file
@@ -0,0 +1,25 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
# LLM related environment variables
|
||||
export HF_CACHE_DIR=${HF_CACHE_DIR}
|
||||
ls $HF_CACHE_DIR
|
||||
export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
|
||||
export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-70B-Instruct"
|
||||
export NUM_SHARDS=4
|
||||
|
||||
docker compose -f tgi_gaudi.yaml up -d
|
||||
|
||||
sleep 5s
|
||||
echo "Waiting tgi gaudi ready"
|
||||
n=0
|
||||
until [[ "$n" -ge 100 ]] || [[ $ready == true ]]; do
|
||||
docker logs tgi-server &> tgi-gaudi-service.log
|
||||
n=$((n+1))
|
||||
if grep -q Connected tgi-gaudi-service.log; then
|
||||
break
|
||||
fi
|
||||
sleep 5s
|
||||
done
|
||||
sleep 5s
|
||||
echo "Service started successfully"
|
||||
30
AgentQnA/docker_compose/intel/hpu/gaudi/tgi_gaudi.yaml
Normal file
@@ -0,0 +1,30 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
services:
|
||||
tgi-server:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
container_name: tgi-server
|
||||
ports:
|
||||
- "8085:80"
|
||||
volumes:
|
||||
- ${HF_CACHE_DIR}:/data
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
PT_HPU_ENABLE_LAZY_COLLECTIVES: true
|
||||
ENABLE_HPU_GRAPH: true
|
||||
LIMIT_HPU_GRAPH: true
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
ipc: host
|
||||
command: --model-id ${LLM_MODEL_ID} --max-input-length 4096 --max-total-tokens 8192 --sharded true --num-shard ${NUM_SHARDS}
|
||||
@@ -17,6 +17,12 @@ if [ ! -d "$HF_CACHE_DIR" ]; then
|
||||
fi
|
||||
ls $HF_CACHE_DIR
|
||||
|
||||
function start_tgi(){
|
||||
echo "Starting tgi-gaudi server"
|
||||
cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/hpu/gaudi
|
||||
bash launch_tgi_gaudi.sh
|
||||
|
||||
}
|
||||
|
||||
function start_agent_and_api_server() {
|
||||
echo "Starting CRAG server"
|
||||
@@ -25,6 +31,7 @@ function start_agent_and_api_server() {
|
||||
echo "Starting Agent services"
|
||||
cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/hpu/gaudi
|
||||
bash launch_agent_service_tgi_gaudi.sh
|
||||
sleep 10
|
||||
}
|
||||
|
||||
function validate() {
|
||||
@@ -43,18 +50,22 @@ function validate() {
|
||||
|
||||
function validate_agent_service() {
|
||||
echo "----------------Test agent ----------------"
|
||||
local CONTENT=$(http_proxy="" curl http://${ip_address}:9095/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
|
||||
"query": "Tell me about Michael Jackson song thriller"
|
||||
}')
|
||||
local EXIT_CODE=$(validate "$CONTENT" "Thriller" "react-agent-endpoint")
|
||||
docker logs docgrader-agent-endpoint
|
||||
# local CONTENT=$(http_proxy="" curl http://${ip_address}:9095/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
|
||||
# "query": "Tell me about Michael Jackson song thriller"
|
||||
# }')
|
||||
export agent_port="9095"
|
||||
local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py)
|
||||
local EXIT_CODE=$(validate "$CONTENT" "Thriller" "rag-agent-endpoint")
|
||||
docker logs rag-agent-endpoint
|
||||
if [ "$EXIT_CODE" == "1" ]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
local CONTENT=$(http_proxy="" curl http://${ip_address}:9090/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
|
||||
"query": "Tell me about Michael Jackson song thriller"
|
||||
}')
|
||||
# local CONTENT=$(http_proxy="" curl http://${ip_address}:9090/v1/chat/completions -X POST -H "Content-Type: application/json" -d '{
|
||||
# "query": "Tell me about Michael Jackson song thriller"
|
||||
# }')
|
||||
export agent_port="9090"
|
||||
local CONTENT=$(python3 $WORKDIR/GenAIExamples/AgentQnA/tests/test.py)
|
||||
local EXIT_CODE=$(validate "$CONTENT" "Thriller" "react-agent-endpoint")
|
||||
docker logs react-agent-endpoint
|
||||
if [ "$EXIT_CODE" == "1" ]; then
|
||||
@@ -64,6 +75,10 @@ function validate_agent_service() {
|
||||
}
|
||||
|
||||
function main() {
|
||||
echo "==================== Start TGI ===================="
|
||||
start_tgi
|
||||
echo "==================== TGI started ===================="
|
||||
|
||||
echo "==================== Start agent ===================="
|
||||
start_agent_and_api_server
|
||||
echo "==================== Agent started ===================="
|
||||
25
AgentQnA/tests/test.py
Normal file
@@ -0,0 +1,25 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import os
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
def generate_answer_agent_api(url, prompt):
|
||||
proxies = {"http": ""}
|
||||
payload = {
|
||||
"query": prompt,
|
||||
}
|
||||
response = requests.post(url, json=payload, proxies=proxies)
|
||||
answer = response.json()["text"]
|
||||
return answer
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
ip_address = os.getenv("ip_address", "localhost")
|
||||
agent_port = os.getenv("agent_port", "9095")
|
||||
url = f"http://{ip_address}:{agent_port}/v1/chat/completions"
|
||||
prompt = "Tell me about Michael Jackson song thriller"
|
||||
answer = generate_answer_agent_api(url, prompt)
|
||||
print(answer)
|
||||
@@ -19,7 +19,6 @@ function stop_crag() {
|
||||
|
||||
function stop_agent_docker() {
|
||||
cd $WORKPATH/docker_compose/intel/hpu/gaudi/
|
||||
# docker compose -f compose.yaml down
|
||||
container_list=$(cat compose.yaml | grep container_name | cut -d':' -f2)
|
||||
for container_name in $container_list; do
|
||||
cid=$(docker ps -aq --filter "name=$container_name")
|
||||
@@ -28,11 +27,21 @@ function stop_agent_docker() {
|
||||
done
|
||||
}
|
||||
|
||||
function stop_tgi(){
|
||||
cd $WORKPATH/docker_compose/intel/hpu/gaudi/
|
||||
container_list=$(cat tgi_gaudi.yaml | grep container_name | cut -d':' -f2)
|
||||
for container_name in $container_list; do
|
||||
cid=$(docker ps -aq --filter "name=$container_name")
|
||||
echo "Stopping container $container_name"
|
||||
if [[ ! -z "$cid" ]]; then docker rm $cid -f && sleep 1s; fi
|
||||
done
|
||||
|
||||
}
|
||||
|
||||
function stop_retrieval_tool() {
|
||||
echo "Stopping Retrieval tool"
|
||||
local RETRIEVAL_TOOL_PATH=$WORKPATH/../DocIndexRetriever
|
||||
cd $RETRIEVAL_TOOL_PATH/docker_compose/intel/cpu/xeon/
|
||||
# docker compose -f compose.yaml down
|
||||
container_list=$(cat compose.yaml | grep container_name | cut -d':' -f2)
|
||||
for container_name in $container_list; do
|
||||
cid=$(docker ps -aq --filter "name=$container_name")
|
||||
@@ -43,25 +52,26 @@ function stop_retrieval_tool() {
|
||||
echo "workpath: $WORKPATH"
|
||||
echo "=================== Stop containers ===================="
|
||||
stop_crag
|
||||
stop_tgi
|
||||
stop_agent_docker
|
||||
stop_retrieval_tool
|
||||
|
||||
cd $WORKPATH/tests
|
||||
|
||||
echo "=================== #1 Building docker images===================="
|
||||
bash 1_build_images.sh
|
||||
bash step1_build_images.sh
|
||||
echo "=================== #1 Building docker images completed===================="
|
||||
|
||||
echo "=================== #2 Start retrieval tool===================="
|
||||
bash 2_start_retrieval_tool.sh
|
||||
bash step2_start_retrieval_tool.sh
|
||||
echo "=================== #2 Retrieval tool started===================="
|
||||
|
||||
echo "=================== #3 Ingest data and validate retrieval===================="
|
||||
bash 3_ingest_data_and_validate_retrieval.sh
|
||||
bash step3_ingest_data_and_validate_retrieval.sh
|
||||
echo "=================== #3 Data ingestion and validation completed===================="
|
||||
|
||||
echo "=================== #4 Start agent and API server===================="
|
||||
bash 4_launch_and_validate_agent_tgi.sh
|
||||
bash step4_launch_and_validate_agent_tgi.sh
|
||||
echo "=================== #4 Agent test passed ===================="
|
||||
|
||||
echo "=================== #5 Stop agent and API server===================="
|
||||
@@ -70,4 +80,6 @@ stop_agent_docker
|
||||
stop_retrieval_tool
|
||||
echo "=================== #5 Agent and API server stopped===================="
|
||||
|
||||
echo y | docker system prune
|
||||
|
||||
echo "ALL DONE!"
|
||||
|
||||
@@ -25,7 +25,7 @@ get_billboard_rank_date:
|
||||
args_schema:
|
||||
rank:
|
||||
type: int
|
||||
description: song name
|
||||
description: the rank of interest, for example 1 for top 1
|
||||
date:
|
||||
type: str
|
||||
description: date
|
||||
|
||||
@@ -12,16 +12,31 @@ def search_knowledge_base(query: str) -> str:
|
||||
print(url)
|
||||
proxies = {"http": ""}
|
||||
payload = {
|
||||
"text": query,
|
||||
"messages": query,
|
||||
}
|
||||
response = requests.post(url, json=payload, proxies=proxies)
|
||||
print(response)
|
||||
docs = response.json()["documents"]
|
||||
context = ""
|
||||
for i, doc in enumerate(docs):
|
||||
if i == 0:
|
||||
context = doc
|
||||
else:
|
||||
context += "\n" + doc
|
||||
print(context)
|
||||
return context
|
||||
if "documents" in response.json():
|
||||
docs = response.json()["documents"]
|
||||
context = ""
|
||||
for i, doc in enumerate(docs):
|
||||
if i == 0:
|
||||
context = doc
|
||||
else:
|
||||
context += "\n" + doc
|
||||
# print(context)
|
||||
return context
|
||||
elif "text" in response.json():
|
||||
return response.json()["text"]
|
||||
elif "reranked_docs" in response.json():
|
||||
docs = response.json()["reranked_docs"]
|
||||
context = ""
|
||||
for i, doc in enumerate(docs):
|
||||
if i == 0:
|
||||
context = doc["text"]
|
||||
else:
|
||||
context += "\n" + doc["text"]
|
||||
# print(context)
|
||||
return context
|
||||
else:
|
||||
return "Error parsing response from the knowledge base."
|
||||
|
||||
@@ -18,7 +18,7 @@ WORKDIR /home/user/
|
||||
RUN git clone https://github.com/opea-project/GenAIComps.git
|
||||
|
||||
WORKDIR /home/user/GenAIComps
|
||||
RUN pip install --no-cache-dir --upgrade pip && \
|
||||
RUN pip install --no-cache-dir --upgrade pip setuptools && \
|
||||
pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt
|
||||
|
||||
COPY ./audioqna.py /home/user/audioqna.py
|
||||
|
||||
32
AudioQnA/Dockerfile.multilang
Normal file
@@ -0,0 +1,32 @@
|
||||
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
FROM python:3.11-slim
|
||||
|
||||
RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
|
||||
libgl1-mesa-glx \
|
||||
libjemalloc-dev \
|
||||
git
|
||||
|
||||
RUN useradd -m -s /bin/bash user && \
|
||||
mkdir -p /home/user && \
|
||||
chown -R user /home/user/
|
||||
|
||||
WORKDIR /home/user/
|
||||
RUN git clone https://github.com/opea-project/GenAIComps.git
|
||||
|
||||
WORKDIR /home/user/GenAIComps
|
||||
RUN pip install --no-cache-dir --upgrade pip setuptools && \
|
||||
pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt
|
||||
|
||||
COPY ./audioqna_multilang.py /home/user/audioqna_multilang.py
|
||||
|
||||
ENV PYTHONPATH=$PYTHONPATH:/home/user/GenAIComps
|
||||
|
||||
USER user
|
||||
|
||||
WORKDIR /home/user
|
||||
|
||||
ENTRYPOINT ["python", "audioqna_multilang.py"]
|
||||
@@ -2,6 +2,63 @@
|
||||
|
||||
AudioQnA is an example that demonstrates the integration of Generative AI (GenAI) models for performing question-answering (QnA) on audio files, with the added functionality of Text-to-Speech (TTS) for generating spoken responses. The example showcases how to convert audio input to text using Automatic Speech Recognition (ASR), generate answers to user queries using a language model, and then convert those answers back to speech using Text-to-Speech (TTS).
|
||||
|
||||
The AudioQnA example is implemented using the component-level microservices defined in [GenAIComps](https://github.com/opea-project/GenAIComps). The flow chart below shows the information flow between different microservices for this example.
|
||||
|
||||
```mermaid
|
||||
---
|
||||
config:
|
||||
flowchart:
|
||||
nodeSpacing: 400
|
||||
rankSpacing: 100
|
||||
curve: linear
|
||||
themeVariables:
|
||||
fontSize: 50px
|
||||
---
|
||||
flowchart LR
|
||||
%% Colors %%
|
||||
classDef blue fill:#ADD8E6,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
|
||||
classDef orange fill:#FBAA60,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
|
||||
classDef orchid fill:#C26DBC,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
|
||||
classDef invisible fill:transparent,stroke:transparent;
|
||||
style AudioQnA-MegaService stroke:#000000
|
||||
|
||||
%% Subgraphs %%
|
||||
subgraph AudioQnA-MegaService["AudioQnA MegaService "]
|
||||
direction LR
|
||||
ASR([ASR MicroService]):::blue
|
||||
LLM([LLM MicroService]):::blue
|
||||
TTS([TTS MicroService]):::blue
|
||||
end
|
||||
subgraph UserInterface[" User Interface "]
|
||||
direction LR
|
||||
a([User Input Query]):::orchid
|
||||
UI([UI server<br>]):::orchid
|
||||
end
|
||||
|
||||
|
||||
|
||||
WSP_SRV{{whisper service<br>}}
|
||||
SPC_SRV{{speecht5 service <br>}}
|
||||
LLM_gen{{LLM Service <br>}}
|
||||
GW([AudioQnA GateWay<br>]):::orange
|
||||
|
||||
|
||||
%% Questions interaction
|
||||
direction LR
|
||||
a[User Audio Query] --> UI
|
||||
UI --> GW
|
||||
GW <==> AudioQnA-MegaService
|
||||
ASR ==> LLM
|
||||
LLM ==> TTS
|
||||
|
||||
%% Embedding service flow
|
||||
direction LR
|
||||
ASR <-.-> WSP_SRV
|
||||
LLM <-.-> LLM_gen
|
||||
TTS <-.-> SPC_SRV
|
||||
|
||||
```
|
||||
|
||||
## Deploy AudioQnA Service
|
||||
|
||||
The AudioQnA service can be deployed on either Intel Gaudi2 or Intel Xeon Scalable Processor.
|
||||
|
||||
98
AudioQnA/audioqna_multilang.py
Normal file
@@ -0,0 +1,98 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import os
|
||||
|
||||
from comps import AudioQnAGateway, MicroService, ServiceOrchestrator, ServiceType
|
||||
|
||||
MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "0.0.0.0")
|
||||
MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))
|
||||
|
||||
WHISPER_SERVER_HOST_IP = os.getenv("WHISPER_SERVER_HOST_IP", "0.0.0.0")
|
||||
WHISPER_SERVER_PORT = int(os.getenv("WHISPER_SERVER_PORT", 7066))
|
||||
GPT_SOVITS_SERVER_HOST_IP = os.getenv("GPT_SOVITS_SERVER_HOST_IP", "0.0.0.0")
|
||||
GPT_SOVITS_SERVER_PORT = int(os.getenv("GPT_SOVITS_SERVER_PORT", 9088))
|
||||
LLM_SERVER_HOST_IP = os.getenv("LLM_SERVER_HOST_IP", "0.0.0.0")
|
||||
LLM_SERVER_PORT = int(os.getenv("LLM_SERVER_PORT", 8888))
|
||||
|
||||
|
||||
def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs):
|
||||
print(inputs)
|
||||
if self.services[cur_node].service_type == ServiceType.ASR:
|
||||
# {'byte_str': 'UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA'}
|
||||
inputs["audio"] = inputs["byte_str"]
|
||||
del inputs["byte_str"]
|
||||
elif self.services[cur_node].service_type == ServiceType.LLM:
|
||||
# convert TGI/vLLM to unified OpenAI /v1/chat/completions format
|
||||
next_inputs = {}
|
||||
next_inputs["model"] = "tgi" # specifically clarify the fake model to make the format unified
|
||||
next_inputs["messages"] = [{"role": "user", "content": inputs["asr_result"]}]
|
||||
next_inputs["max_tokens"] = llm_parameters_dict["max_tokens"]
|
||||
next_inputs["top_p"] = llm_parameters_dict["top_p"]
|
||||
next_inputs["stream"] = inputs["streaming"] # False as default
|
||||
next_inputs["frequency_penalty"] = inputs["frequency_penalty"]
|
||||
# next_inputs["presence_penalty"] = inputs["presence_penalty"]
|
||||
# next_inputs["repetition_penalty"] = inputs["repetition_penalty"]
|
||||
next_inputs["temperature"] = inputs["temperature"]
|
||||
inputs = next_inputs
|
||||
elif self.services[cur_node].service_type == ServiceType.TTS:
|
||||
next_inputs = {}
|
||||
next_inputs["text"] = inputs["choices"][0]["message"]["content"]
|
||||
next_inputs["text_language"] = kwargs["tts_text_language"] if "tts_text_language" in kwargs else "zh"
|
||||
inputs = next_inputs
|
||||
return inputs
|
||||
|
||||
|
||||
def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_dict, **kwargs):
|
||||
if self.services[cur_node].service_type == ServiceType.TTS:
|
||||
audio_base64 = base64.b64encode(data).decode("utf-8")
|
||||
return {"byte_str": audio_base64}
|
||||
return data
|
||||
|
||||
|
||||
class AudioQnAService:
|
||||
def __init__(self, host="0.0.0.0", port=8000):
|
||||
self.host = host
|
||||
self.port = port
|
||||
ServiceOrchestrator.align_inputs = align_inputs
|
||||
ServiceOrchestrator.align_outputs = align_outputs
|
||||
self.megaservice = ServiceOrchestrator()
|
||||
|
||||
def add_remote_service(self):
|
||||
asr = MicroService(
|
||||
name="asr",
|
||||
host=WHISPER_SERVER_HOST_IP,
|
||||
port=WHISPER_SERVER_PORT,
|
||||
# endpoint="/v1/audio/transcriptions",
|
||||
endpoint="/v1/asr",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.ASR,
|
||||
)
|
||||
llm = MicroService(
|
||||
name="llm",
|
||||
host=LLM_SERVER_HOST_IP,
|
||||
port=LLM_SERVER_PORT,
|
||||
endpoint="/v1/chat/completions",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.LLM,
|
||||
)
|
||||
tts = MicroService(
|
||||
name="tts",
|
||||
host=GPT_SOVITS_SERVER_HOST_IP,
|
||||
port=GPT_SOVITS_SERVER_PORT,
|
||||
# endpoint="/v1/audio/speech",
|
||||
endpoint="/",
|
||||
use_remote_service=True,
|
||||
service_type=ServiceType.TTS,
|
||||
)
|
||||
self.megaservice.add(asr).add(llm).add(tts)
|
||||
self.megaservice.flow_to(asr, llm)
|
||||
self.megaservice.flow_to(llm, tts)
|
||||
self.gateway = AudioQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
audioqna = AudioQnAService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT)
|
||||
audioqna.add_remote_service()
|
||||
@@ -1,4 +1,4 @@
|
||||
# AudioQnA accuracy Evaluation
|
||||
# AudioQnA Accuracy
|
||||
|
||||
AudioQnA is an example that demonstrates the integration of Generative AI (GenAI) models for performing question-answering (QnA) on audio scene, which contains Automatic Speech Recognition (ASR) and Text-to-Speech (TTS). The following is the piepline for evaluating the ASR accuracy.
|
||||
|
||||
@@ -14,7 +14,7 @@ We evaluate the WER (Word Error Rate) metric of the ASR microservice.
|
||||
|
||||
### Launch ASR microservice
|
||||
|
||||
Launch the ASR microserice with the following commands. For more details please refer to [doc](https://github.com/opea-project/GenAIComps/tree/main/comps/asr).
|
||||
Launch the ASR microserice with the following commands. For more details please refer to [doc](https://github.com/opea-project/GenAIComps/tree/main/comps/asr/whisper/README.md).
|
||||
|
||||
```bash
|
||||
git clone https://github.com/opea-project/GenAIComps
|
||||
@@ -36,9 +36,9 @@ Evaluate the performance with the LLM:
|
||||
|
||||
```py
|
||||
# validate the offline model
|
||||
# python offline_evaluate.py
|
||||
# python offline_eval.py
|
||||
# validate the online asr microservice accuracy
|
||||
python online_evaluate.py
|
||||
python online_eval.py
|
||||
```
|
||||
|
||||
### Performance Result
|
||||
|
||||
5
AudioQnA/benchmark/accuracy/run_acc.sh
Normal file
@@ -0,0 +1,5 @@
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
python online_eval.py
|
||||
77
AudioQnA/benchmark/performance/README.md
Normal file
@@ -0,0 +1,77 @@
|
||||
# AudioQnA Benchmarking
|
||||
|
||||
This folder contains a collection of scripts to enable inference benchmarking by leveraging a comprehensive benchmarking tool, [GenAIEval](https://github.com/opea-project/GenAIEval/blob/main/evals/benchmark/README.md), that enables throughput analysis to assess inference performance.
|
||||
|
||||
By following this guide, you can run benchmarks on your deployment and share the results with the OPEA community.
|
||||
|
||||
## Purpose
|
||||
|
||||
We aim to run these benchmarks and share them with the OPEA community for three primary reasons:
|
||||
|
||||
- To offer insights on inference throughput in real-world scenarios, helping you choose the best service or deployment for your needs.
|
||||
- To establish a baseline for validating optimization solutions across different implementations, providing clear guidance on which methods are most effective for your use case.
|
||||
- To inspire the community to build upon our benchmarks, allowing us to better quantify new solutions in conjunction with current leading llms, serving frameworks etc.
|
||||
|
||||
## Metrics
|
||||
|
||||
The benchmark will report the below metrics, including:
|
||||
|
||||
- Number of Concurrent Requests
|
||||
- End-to-End Latency: P50, P90, P99 (in milliseconds)
|
||||
- End-to-End First Token Latency: P50, P90, P99 (in milliseconds)
|
||||
- Average Next Token Latency (in milliseconds)
|
||||
- Average Token Latency (in milliseconds)
|
||||
- Requests Per Second (RPS)
|
||||
- Output Tokens Per Second
|
||||
- Input Tokens Per Second
|
||||
|
||||
Results will be displayed in the terminal and saved as CSV file named `1_stats.csv` for easy export to spreadsheets.
|
||||
|
||||
## Getting Started
|
||||
|
||||
We recommend using Kubernetes to deploy the AudioQnA service, as it offers benefits such as load balancing and improved scalability. However, you can also deploy the service using Docker if that better suits your needs.
|
||||
|
||||
### Prerequisites
|
||||
|
||||
- Install Kubernetes by following [this guide](https://github.com/opea-project/docs/blob/main/guide/installation/k8s_install/k8s_install_kubespray.md).
|
||||
|
||||
- Every node has direct internet access
|
||||
- Set up kubectl on the master node with access to the Kubernetes cluster.
|
||||
- Install Python 3.8+ on the master node for running GenAIEval.
|
||||
- Ensure all nodes have a local /mnt/models folder, which will be mounted by the pods.
|
||||
- Ensure that the container's ulimit can meet the the number of requests.
|
||||
|
||||
```bash
|
||||
# The way to modify the containered ulimit:
|
||||
sudo systemctl edit containerd
|
||||
# Add two lines:
|
||||
[Service]
|
||||
LimitNOFILE=65536:1048576
|
||||
|
||||
sudo systemctl daemon-reload; sudo systemctl restart containerd
|
||||
```
|
||||
|
||||
## Test Steps
|
||||
|
||||
Please deploy AudioQnA service before benchmarking.
|
||||
|
||||
### Run Benchmark Test
|
||||
|
||||
Before the benchmark, we can configure the number of test queries and test output directory by:
|
||||
|
||||
```bash
|
||||
export USER_QUERIES="[128, 128, 128, 128]"
|
||||
export TEST_OUTPUT_DIR="/tmp/benchmark_output"
|
||||
```
|
||||
|
||||
And then run the benchmark by:
|
||||
|
||||
```bash
|
||||
bash benchmark.sh -n <node_count>
|
||||
```
|
||||
|
||||
The argument `-n` refers to the number of test nodes.
|
||||
|
||||
### Data collection
|
||||
|
||||
All the test results will come to this folder `/tmp/benchmark_output` configured by the environment variable `TEST_OUTPUT_DIR` in previous steps.
|
||||
99
AudioQnA/benchmark/performance/benchmark.sh
Normal file
@@ -0,0 +1,99 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
deployment_type="k8s"
|
||||
node_number=1
|
||||
service_port=8888
|
||||
query_per_node=128
|
||||
|
||||
benchmark_tool_path="$(pwd)/GenAIEval"
|
||||
|
||||
usage() {
|
||||
echo "Usage: $0 [-d deployment_type] [-n node_number] [-i service_ip] [-p service_port]"
|
||||
echo " -d deployment_type AudioQnA deployment type, select between k8s and docker (default: k8s)"
|
||||
echo " -n node_number Test node number, required only for k8s deployment_type, (default: 1)"
|
||||
echo " -i service_ip AudioQnA service ip, required only for docker deployment_type"
|
||||
echo " -p service_port AudioQnA service port, required only for docker deployment_type, (default: 8888)"
|
||||
exit 1
|
||||
}
|
||||
|
||||
while getopts ":d:n:i:p:" opt; do
|
||||
case ${opt} in
|
||||
d )
|
||||
deployment_type=$OPTARG
|
||||
;;
|
||||
n )
|
||||
node_number=$OPTARG
|
||||
;;
|
||||
i )
|
||||
service_ip=$OPTARG
|
||||
;;
|
||||
p )
|
||||
service_port=$OPTARG
|
||||
;;
|
||||
\? )
|
||||
echo "Invalid option: -$OPTARG" 1>&2
|
||||
usage
|
||||
;;
|
||||
: )
|
||||
echo "Invalid option: -$OPTARG requires an argument" 1>&2
|
||||
usage
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ "$deployment_type" == "docker" && -z "$service_ip" ]]; then
|
||||
echo "Error: service_ip is required for docker deployment_type" 1>&2
|
||||
usage
|
||||
fi
|
||||
|
||||
if [[ "$deployment_type" == "k8s" && ( -n "$service_ip" || -n "$service_port" ) ]]; then
|
||||
echo "Warning: service_ip and service_port are ignored for k8s deployment_type" 1>&2
|
||||
fi
|
||||
|
||||
function main() {
|
||||
if [[ ! -d ${benchmark_tool_path} ]]; then
|
||||
echo "Benchmark tool not found, setting up..."
|
||||
setup_env
|
||||
fi
|
||||
run_benchmark
|
||||
}
|
||||
|
||||
function setup_env() {
|
||||
git clone https://github.com/opea-project/GenAIEval.git
|
||||
pushd ${benchmark_tool_path}
|
||||
python3 -m venv stress_venv
|
||||
source stress_venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
popd
|
||||
}
|
||||
|
||||
function run_benchmark() {
|
||||
source ${benchmark_tool_path}/stress_venv/bin/activate
|
||||
export DEPLOYMENT_TYPE=${deployment_type}
|
||||
export SERVICE_IP=${service_ip:-"None"}
|
||||
export SERVICE_PORT=${service_port:-"None"}
|
||||
if [[ -z $USER_QUERIES ]]; then
|
||||
user_query=$((query_per_node*node_number))
|
||||
export USER_QUERIES="[${user_query}, ${user_query}, ${user_query}, ${user_query}]"
|
||||
echo "USER_QUERIES not configured, setting to: ${USER_QUERIES}."
|
||||
fi
|
||||
export WARMUP=$(echo $USER_QUERIES | sed -e 's/[][]//g' -e 's/,.*//')
|
||||
if [[ -z $WARMUP ]]; then export WARMUP=0; fi
|
||||
if [[ -z $TEST_OUTPUT_DIR ]]; then
|
||||
if [[ $DEPLOYMENT_TYPE == "k8s" ]]; then
|
||||
export TEST_OUTPUT_DIR="${benchmark_tool_path}/evals/benchmark/benchmark_output/node_${node_number}"
|
||||
else
|
||||
export TEST_OUTPUT_DIR="${benchmark_tool_path}/evals/benchmark/benchmark_output/docker"
|
||||
fi
|
||||
echo "TEST_OUTPUT_DIR not configured, setting to: ${TEST_OUTPUT_DIR}."
|
||||
fi
|
||||
|
||||
envsubst < ./benchmark.yaml > ${benchmark_tool_path}/evals/benchmark/benchmark.yaml
|
||||
cd ${benchmark_tool_path}/evals/benchmark
|
||||
python benchmark.py
|
||||
}
|
||||
|
||||
main
|
||||
52
AudioQnA/benchmark/performance/benchmark.yaml
Normal file
@@ -0,0 +1,52 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
test_suite_config: # Overall configuration settings for the test suite
|
||||
examples: ["audioqna"] # The specific test cases being tested, e.g., chatqna, codegen, codetrans, faqgen, audioqna, visualqna
|
||||
deployment_type: "k8s" # Default is "k8s", can also be "docker"
|
||||
service_ip: None # Leave as None for k8s, specify for Docker
|
||||
service_port: None # Leave as None for k8s, specify for Docker
|
||||
warm_ups: 0 # Number of test requests for warm-up
|
||||
run_time: 60m # The max total run time for the test suite
|
||||
seed: # The seed for all RNGs
|
||||
user_queries: [1, 2, 4, 8, 16, 32, 64, 128] # Number of test requests at each concurrency level
|
||||
query_timeout: 120 # Number of seconds to wait for a simulated user to complete any executing task before exiting. 120 sec by defeult.
|
||||
random_prompt: false # Use random prompts if true, fixed prompts if false
|
||||
collect_service_metric: false # Collect service metrics if true, do not collect service metrics if false
|
||||
data_visualization: false # Generate data visualization if true, do not generate data visualization if false
|
||||
llm_model: "Intel/neural-chat-7b-v3-3" # The LLM model used for the test
|
||||
test_output_dir: "/tmp/benchmark_output" # The directory to store the test output
|
||||
load_shape: # Tenant concurrency pattern
|
||||
name: constant # poisson or constant(locust default load shape)
|
||||
params: # Loadshape-specific parameters
|
||||
constant: # Poisson load shape specific parameters, activate only if load_shape is poisson
|
||||
concurrent_level: 4 # If user_queries is specified, concurrent_level is target number of requests per user. If not, it is the number of simulated users
|
||||
poisson: # Poisson load shape specific parameters, activate only if load_shape is poisson
|
||||
arrival-rate: 1.0 # Request arrival rate
|
||||
namespace: "" # Fill the user-defined namespace. Otherwise, it will be default.
|
||||
|
||||
test_cases:
|
||||
audioqna:
|
||||
asr:
|
||||
run_test: true
|
||||
service_name: "asr-svc" # Replace with your service name
|
||||
llm:
|
||||
run_test: true
|
||||
service_name: "llm-svc" # Replace with your service name
|
||||
parameters:
|
||||
model_name: "Intel/neural-chat-7b-v3-3"
|
||||
max_new_tokens: 128
|
||||
temperature: 0.01
|
||||
top_k: 10
|
||||
top_p: 0.95
|
||||
repetition_penalty: 1.03
|
||||
streaming: true
|
||||
llmserve:
|
||||
run_test: true
|
||||
service_name: "llm-svc" # Replace with your service name
|
||||
tts:
|
||||
run_test: true
|
||||
service_name: "tts-svc" # Replace with your service name
|
||||
e2e:
|
||||
run_test: true
|
||||
service_name: "audioqna-backend-server-svc" # Replace with your service name
|
||||
@@ -127,9 +127,13 @@ curl http://${host_ip}:3002/v1/audio/speech \
|
||||
|
||||
## 🚀 Test MegaService
|
||||
|
||||
Test the AudioQnA megaservice by recording a .wav file, encoding the file into the base64 format, and then sending the
|
||||
base64 string to the megaservice endpoint. The megaservice will return a spoken response as a base64 string. To listen
|
||||
to the response, decode the base64 string and save it as a .wav file.
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:3008/v1/audioqna \
|
||||
-X POST \
|
||||
-d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64}' \
|
||||
-H 'Content-Type: application/json'
|
||||
-H 'Content-Type: application/json' | sed 's/^"//;s/"$//' | base64 -d > output.wav
|
||||
```
|
||||
|
||||
@@ -41,7 +41,7 @@ services:
|
||||
environment:
|
||||
TTS_ENDPOINT: ${TTS_ENDPOINT}
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
|
||||
image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
|
||||
container_name: tgi-service
|
||||
ports:
|
||||
- "3006:80"
|
||||
|
||||
@@ -0,0 +1,64 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
services:
|
||||
whisper-service:
|
||||
image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
|
||||
container_name: whisper-service
|
||||
ports:
|
||||
- "7066:7066"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
restart: unless-stopped
|
||||
command: --language "zh"
|
||||
gpt-sovits-service:
|
||||
image: ${REGISTRY:-opea}/gpt-sovits:${TAG:-latest}
|
||||
container_name: gpt-sovits-service
|
||||
ports:
|
||||
- "9880:9880"
|
||||
ipc: host
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
restart: unless-stopped
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu
|
||||
container_name: tgi-service
|
||||
ports:
|
||||
- "3006:80"
|
||||
volumes:
|
||||
- "./data:/data"
|
||||
shm_size: 1g
|
||||
environment:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0
|
||||
audioqna-xeon-backend-server:
|
||||
image: ${REGISTRY:-opea}/audioqna-multilang:${TAG:-latest}
|
||||
container_name: audioqna-xeon-backend-server
|
||||
ports:
|
||||
- "3008:8888"
|
||||
environment:
|
||||
- no_proxy=${no_proxy}
|
||||
- https_proxy=${https_proxy}
|
||||
- http_proxy=${http_proxy}
|
||||
- MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP}
|
||||
- LLM_SERVER_HOST_IP=${LLM_SERVER_HOST_IP}
|
||||
- LLM_SERVER_PORT=${LLM_SERVER_PORT}
|
||||
- LLM_MODEL_ID=${LLM_MODEL_ID}
|
||||
- WHISPER_SERVER_HOST_IP=${WHISPER_SERVER_HOST_IP}
|
||||
- WHISPER_SERVER_PORT=${WHISPER_SERVER_PORT}
|
||||
- GPT_SOVITS_SERVER_HOST_IP=${GPT_SOVITS_SERVER_HOST_IP}
|
||||
- GPT_SOVITS_SERVER_PORT=${GPT_SOVITS_SERVER_PORT}
|
||||
ipc: host
|
||||
restart: always
|
||||
|
||||
networks:
|
||||
default:
|
||||
driver: bridge
|
||||
7
AudioQnA/docker_compose/intel/cpu/xeon/set_env.sh
Normal file
@@ -0,0 +1,7 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
pushd "../../../../../" > /dev/null
|
||||
source .set_env.sh
|
||||
popd > /dev/null
|
||||
@@ -79,6 +79,8 @@ export LLM_SERVICE_PORT=3007
|
||||
|
||||
## 🚀 Start the MegaService
|
||||
|
||||
> **_NOTE:_** Users will need at least three Gaudi cards for AudioQnA.
|
||||
|
||||
```bash
|
||||
cd GenAIExamples/AudioQnA/docker_compose/intel/hpu/gaudi/
|
||||
docker compose up -d
|
||||
@@ -127,9 +129,13 @@ curl http://${host_ip}:3002/v1/audio/speech \
|
||||
|
||||
## 🚀 Test MegaService
|
||||
|
||||
Test the AudioQnA megaservice by recording a .wav file, encoding the file into the base64 format, and then sending the
|
||||
base64 string to the megaservice endpoint. The megaservice will return a spoken response as a base64 string. To listen
|
||||
to the response, decode the base64 string and save it as a .wav file.
|
||||
|
||||
```bash
|
||||
curl http://${host_ip}:3008/v1/audioqna \
|
||||
-X POST \
|
||||
-d '{"audio": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA", "max_tokens":64}' \
|
||||
-H 'Content-Type: application/json'
|
||||
-H 'Content-Type: application/json' | sed 's/^"//;s/"$//' | base64 -d > output.wav
|
||||
```
|
||||
|
||||
@@ -51,7 +51,7 @@ services:
|
||||
environment:
|
||||
TTS_ENDPOINT: ${TTS_ENDPOINT}
|
||||
tgi-service:
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
container_name: tgi-gaudi-server
|
||||
ports:
|
||||
- "3006:80"
|
||||
@@ -61,11 +61,15 @@ services:
|
||||
no_proxy: ${no_proxy}
|
||||
http_proxy: ${http_proxy}
|
||||
https_proxy: ${https_proxy}
|
||||
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
|
||||
HF_HUB_DISABLE_PROGRESS_BARS: 1
|
||||
HF_HUB_ENABLE_HF_TRANSFER: 0
|
||||
HABANA_VISIBLE_DEVICES: all
|
||||
OMPI_MCA_btl_vader_single_copy_mechanism: none
|
||||
ENABLE_HPU_GRAPH: true
|
||||
LIMIT_HPU_GRAPH: true
|
||||
USE_FLASH_ATTENTION: true
|
||||
FLASH_ATTENTION_RECOMPUTE: true
|
||||
runtime: habana
|
||||
cap_add:
|
||||
- SYS_NICE
|
||||
|
||||
7
AudioQnA/docker_compose/intel/hpu/gaudi/set_env.sh
Normal file
@@ -0,0 +1,7 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
pushd "../../../../../" > /dev/null
|
||||
source .set_env.sh
|
||||
popd > /dev/null
|
||||
@@ -53,3 +53,9 @@ services:
|
||||
dockerfile: comps/tts/speecht5/Dockerfile
|
||||
extends: audioqna
|
||||
image: ${REGISTRY:-opea}/tts:${TAG:-latest}
|
||||
gpt-sovits:
|
||||
build:
|
||||
context: GenAIComps
|
||||
dockerfile: comps/tts/gpt-sovits/Dockerfile
|
||||
extends: audioqna
|
||||
image: ${REGISTRY:-opea}/gpt-sovits:${TAG:-latest}
|
||||
|
||||
@@ -7,14 +7,14 @@
|
||||
|
||||
## Deploy On Xeon
|
||||
```
|
||||
cd GenAIExamples/AudioQnA/kubernetes/intel/cpu/xeon/manifests
|
||||
cd GenAIExamples/AudioQnA/kubernetes/intel/cpu/xeon/manifest
|
||||
export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
|
||||
sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" audioqna.yaml
|
||||
kubectl apply -f audioqna.yaml
|
||||
```
|
||||
## Deploy On Gaudi
|
||||
```
|
||||
cd GenAIExamples/AudioQnA/kubernetes/intel/hpu/gaudi/manifests
|
||||
cd GenAIExamples/AudioQnA/kubernetes/intel/hpu/gaudi/manifest
|
||||
export HUGGINGFACEHUB_API_TOKEN="YourOwnToken"
|
||||
sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" audioqna.yaml
|
||||
kubectl apply -f audioqna.yaml
|
||||
|
||||
@@ -4,7 +4,7 @@ This document outlines the deployment process for a AudioQnA application utilizi
|
||||
|
||||
The AudioQnA Service leverages a Kubernetes operator called genai-microservices-connector(GMC). GMC supports connecting microservices to create pipelines based on the specification in the pipeline yaml file in addition to allowing the user to dynamically control which model is used in a service such as an LLM or embedder. The underlying pipeline language also supports using external services that may be running in public or private cloud elsewhere.
|
||||
|
||||
Install GMC in your Kubernetes cluster, if you have not already done so, by following the steps in Section "Getting Started" at [GMC Install](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector). Soon as we publish images to Docker Hub, at which point no builds will be required, simplifying install.
|
||||
Install GMC in your Kubernetes cluster, if you have not already done so, by following the steps in Section "Getting Started" at [GMC Install](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector/README.md). Soon as we publish images to Docker Hub, at which point no builds will be required, simplifying install.
|
||||
|
||||
|
||||
The AudioQnA application is defined as a Custom Resource (CR) file that the above GMC operator acts upon. It first checks if the microservices listed in the CR yaml file are running, if not starts them and then proceeds to connect them. When the AudioQnA pipeline is ready, the service endpoint details are returned, letting you use the application. Should you use "kubectl get pods" commands you will see all the component microservices, in particular `asr`, `tts`, and `llm`.
|
||||
@@ -25,7 +25,7 @@ The AudioQnA uses the below prebuilt images if you choose a Xeon deployment
|
||||
Should you desire to use the Gaudi accelerator, two alternate images are used for the embedding and llm services.
|
||||
For Gaudi:
|
||||
|
||||
- tgi-service: ghcr.io/huggingface/tgi-gaudi:1.2.1
|
||||
- tgi-service: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
- whisper-gaudi: opea/whisper-gaudi:latest
|
||||
- speecht5-gaudi: opea/speecht5-gaudi:latest
|
||||
|
||||
|
||||
@@ -247,7 +247,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: audio-qna-config
|
||||
image: ghcr.io/huggingface/text-generation-inference:2.2.0
|
||||
image: "ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu"
|
||||
name: llm-dependency-deploy-demo
|
||||
securityContext:
|
||||
capabilities:
|
||||
|
||||
@@ -271,7 +271,7 @@ spec:
|
||||
- envFrom:
|
||||
- configMapRef:
|
||||
name: audio-qna-config
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
image: ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
name: llm-dependency-deploy-demo
|
||||
securityContext:
|
||||
capabilities:
|
||||
@@ -303,6 +303,14 @@ spec:
|
||||
value: none
|
||||
- name: PT_HPU_ENABLE_LAZY_COLLECTIVES
|
||||
value: 'true'
|
||||
- name: ENABLE_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: LIMIT_HPU_GRAPH
|
||||
value: 'true'
|
||||
- name: USE_FLASH_ATTENTION
|
||||
value: 'true'
|
||||
- name: FLASH_ATTENTION_RECOMPUTE
|
||||
value: 'true'
|
||||
- name: runtime
|
||||
value: habana
|
||||
- name: HABANA_VISIBLE_DEVICES
|
||||
@@ -315,7 +323,7 @@ spec:
|
||||
volumes:
|
||||
- name: model-volume
|
||||
hostPath:
|
||||
path: /home/sdp/cesg
|
||||
path: /mnt/models
|
||||
type: Directory
|
||||
- name: shm
|
||||
emptyDir:
|
||||
|
||||
@@ -22,7 +22,7 @@ function build_docker_images() {
|
||||
service_list="audioqna whisper-gaudi asr llm-tgi speecht5-gaudi tts"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
@@ -100,7 +100,7 @@ function validate_megaservice() {
|
||||
#
|
||||
# sed -i "s/localhost/$ip_address/g" playwright.config.ts
|
||||
#
|
||||
## conda install -c conda-forge nodejs -y
|
||||
## conda install -c conda-forge nodejs=22.6.0 -y
|
||||
# npm install && npm ci && npx playwright install --with-deps
|
||||
# node -v && npm -v && pip list
|
||||
#
|
||||
|
||||
@@ -22,7 +22,7 @@ function build_docker_images() {
|
||||
service_list="audioqna whisper asr llm-tgi speecht5 tts"
|
||||
docker compose -f build.yaml build ${service_list} --no-cache > ${LOG_PATH}/docker_image_build.log
|
||||
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.1
|
||||
docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6
|
||||
docker images && sleep 1s
|
||||
}
|
||||
|
||||
@@ -90,7 +90,7 @@ function validate_megaservice() {
|
||||
#
|
||||
# sed -i "s/localhost/$ip_address/g" playwright.config.ts
|
||||
#
|
||||
## conda install -c conda-forge nodejs -y
|
||||
## conda install -c conda-forge nodejs=22.6.0 -y
|
||||
# npm install && npm ci && npx playwright install --with-deps
|
||||
# node -v && npm -v && pip list
|
||||
#
|
||||
|
||||
@@ -23,4 +23,4 @@ RUN npm run build
|
||||
EXPOSE 5173
|
||||
|
||||
# Run the front-end application in preview mode
|
||||
CMD ["npm", "run", "preview", "--", "--port", "5173", "--host", "0.0.0.0"]
|
||||
CMD ["npm", "run", "preview", "--", "--port", "5173", "--host", "0.0.0.0"]
|
||||
|
||||
@@ -79,4 +79,4 @@ a.btn {
|
||||
|
||||
.w-12\/12 {
|
||||
width: 100%
|
||||
}
|
||||
}
|
||||
|
||||
@@ -89,4 +89,4 @@
|
||||
<stop offset="1" stop-color="#3300FF" stop-opacity="0.2" />
|
||||
</linearGradient>
|
||||
</defs>
|
||||
</svg>
|
||||
</svg>
|
||||
|
||||
|
Before Width: | Height: | Size: 5.8 KiB After Width: | Height: | Size: 5.8 KiB |
@@ -89,4 +89,4 @@
|
||||
<stop offset="1" stop-color="#f3f4f6" stop-opacity="0" />
|
||||
</linearGradient>
|
||||
</defs>
|
||||
</svg>
|
||||
</svg>
|
||||
|
||||
|
Before Width: | Height: | Size: 5.8 KiB After Width: | Height: | Size: 5.8 KiB |
@@ -76,4 +76,4 @@
|
||||
<stop offset="1" stop-color="#9CFFED" stop-opacity="0" />
|
||||
</linearGradient>
|
||||
</defs>
|
||||
</svg>
|
||||
</svg>
|
||||
|
||||
|
Before Width: | Height: | Size: 4.8 KiB After Width: | Height: | Size: 4.8 KiB |
@@ -76,4 +76,4 @@
|
||||
<stop offset="1" stop-color="#6141E1" stop-opacity="0" />
|
||||
</linearGradient>
|
||||
</defs>
|
||||
</svg>
|
||||
</svg>
|
||||
|
||||
|
Before Width: | Height: | Size: 4.8 KiB After Width: | Height: | Size: 4.8 KiB |
@@ -89,4 +89,4 @@
|
||||
<stop offset="1" stop-color="#3300FF" stop-opacity="0" />
|
||||
</linearGradient>
|
||||
</defs>
|
||||
</svg>
|
||||
</svg>
|
||||
|
||||
|
Before Width: | Height: | Size: 5.8 KiB After Width: | Height: | Size: 5.8 KiB |
@@ -3,4 +3,4 @@
|
||||
<path
|
||||
d="M512 1024a512 512 0 1 1 512-512 512 512 0 0 1-512 512z m0-896a384 384 0 1 0 384 384A384 384 0 0 0 512 128z m128 576h-256a64 64 0 0 1-64-64v-256a64 64 0 0 1 64-64h256a64 64 0 0 1 64 64v256a64 64 0 0 1-64 64z"
|
||||
fill="#d81e06" p-id="3104"></path>
|
||||
</svg>
|
||||
</svg>
|
||||
|
||||
|
Before Width: | Height: | Size: 429 B After Width: | Height: | Size: 430 B |
@@ -1 +1 @@
|
||||
<svg t="1713431562066" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="6399" width="32" height="32"><path d="M592 768h-160c-26.6 0-48-21.4-48-48V384h-175.4c-35.6 0-53.4-43-28.2-68.2L484.6 11.4c15-15 39.6-15 54.6 0l304.4 304.4c25.2 25.2 7.4 68.2-28.2 68.2H640v336c0 26.6-21.4 48-48 48z m432-16v224c0 26.6-21.4 48-48 48H48c-26.6 0-48-21.4-48-48V752c0-26.6 21.4-48 48-48h272v16c0 61.8 50.2 112 112 112h160c61.8 0 112-50.2 112-112v-16h272c26.6 0 48 21.4 48 48z m-248 176c0-22-18-40-40-40s-40 18-40 40 18 40 40 40 40-18 40-40z m128 0c0-22-18-40-40-40s-40 18-40 40 18 40 40 40 40-18 40-40z" p-id="6400" fill="#ffffff"></path></svg>
|
||||
<svg t="1713431562066" class="icon" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" p-id="6399" width="32" height="32"><path d="M592 768h-160c-26.6 0-48-21.4-48-48V384h-175.4c-35.6 0-53.4-43-28.2-68.2L484.6 11.4c15-15 39.6-15 54.6 0l304.4 304.4c25.2 25.2 7.4 68.2-28.2 68.2H640v336c0 26.6-21.4 48-48 48z m432-16v224c0 26.6-21.4 48-48 48H48c-26.6 0-48-21.4-48-48V752c0-26.6 21.4-48 48-48h272v16c0 61.8 50.2 112 112 112h160c61.8 0 112-50.2 112-112v-16h272c26.6 0 48 21.4 48 48z m-248 176c0-22-18-40-40-40s-40 18-40 40 18 40 40 40 40-18 40-40z m128 0c0-22-18-40-40-40s-40 18-40 40 18 40 40 40 40-18 40-40z" p-id="6400" fill="#ffffff"></path></svg>
|
||||
|
||||
|
Before Width: | Height: | Size: 669 B After Width: | Height: | Size: 670 B |
@@ -6,4 +6,4 @@
|
||||
<path
|
||||
d="M864 479.776 864 352c0-17.664-14.304-32-32-32s-32 14.336-32 32l0 127.776c0 160.16-129.184 290.464-288 290.464-158.784 0-288-130.304-288-290.464L224 352c0-17.664-14.336-32-32-32s-32 14.336-32 32l0 127.776c0 184.608 140.864 336.48 320 352.832L480 896 288 896c-17.664 0-32 14.304-32 32s14.336 32 32 32l448 0c17.696 0 32-14.304 32-32s-14.304-32-32-32l-192 0 0-63.36C723.136 816.256 864 664.384 864 479.776z"
|
||||
fill="#707070" p-id="2962"></path>
|
||||
</svg>
|
||||
</svg>
|
||||
|
||||
|
Before Width: | Height: | Size: 844 B After Width: | Height: | Size: 845 B |
|
Before Width: | Height: | Size: 16 KiB After Width: | Height: | Size: 16 KiB |
|
Before Width: | Height: | Size: 15 KiB After Width: | Height: | Size: 15 KiB |
8
AvatarChatbot/.gitignore
vendored
Normal file
@@ -0,0 +1,8 @@
|
||||
*.safetensors
|
||||
*.bin
|
||||
*.model
|
||||
*.log
|
||||
docker_compose/intel/cpu/xeon/data
|
||||
docker_compose/intel/hpu/gaudi/data
|
||||
inputs/
|
||||
outputs/
|
||||
@@ -17,13 +17,12 @@ RUN useradd -m -s /bin/bash user && \
|
||||
|
||||
WORKDIR /home/user/
|
||||
RUN git clone https://github.com/opea-project/GenAIComps.git
|
||||
|
||||
WORKDIR /home/user/GenAIComps
|
||||
RUN pip install --no-cache-dir --upgrade pip && \
|
||||
pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt && \
|
||||
pip install --no-cache-dir langchain_core
|
||||
|
||||
COPY ./chatqna_no_wrapper.py /home/user/chatqna_no_wrapper.py
|
||||
RUN pip install --no-cache-dir --upgrade pip && \
|
||||
pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt
|
||||
|
||||
COPY ./avatarchatbot.py /home/user/avatarchatbot.py
|
||||
|
||||
ENV PYTHONPATH=$PYTHONPATH:/home/user/GenAIComps
|
||||
|
||||
@@ -31,4 +30,4 @@ USER user
|
||||
|
||||
WORKDIR /home/user
|
||||
|
||||
ENTRYPOINT ["python", "chatqna_no_wrapper.py", "--without-rerank"]
|
||||
ENTRYPOINT ["python", "avatarchatbot.py"]
|
||||
105
AvatarChatbot/README.md
Normal file
@@ -0,0 +1,105 @@
|
||||
# AvatarChatbot Application
|
||||
|
||||
The AvatarChatbot service can be effortlessly deployed on either Intel Gaudi2 or Intel XEON Scalable Processors.
|
||||
|
||||
## AI Avatar Workflow
|
||||
|
||||
The AI Avatar example is implemented using both megaservices and the component-level microservices defined in [GenAIComps](https://github.com/opea-project/GenAIComps). The flow chart below shows the information flow between different megaservices and microservices for this example.
|
||||
|
||||
```mermaid
|
||||
---
|
||||
config:
|
||||
flowchart:
|
||||
nodeSpacing: 100
|
||||
rankSpacing: 100
|
||||
curve: linear
|
||||
themeVariables:
|
||||
fontSize: 42px
|
||||
---
|
||||
flowchart LR
|
||||
classDef blue fill:#ADD8E6,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
|
||||
classDef thistle fill:#D8BFD8,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
|
||||
classDef orange fill:#FBAA60,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
|
||||
classDef orchid fill:#C26DBC,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5
|
||||
classDef invisible fill:transparent,stroke:transparent;
|
||||
style AvatarChatbot-Megaservice stroke:#000000
|
||||
|
||||
subgraph AvatarChatbot-Megaservice["AvatarChatbot Megaservice"]
|
||||
direction LR
|
||||
ASR([ASR Microservice]):::blue
|
||||
LLM([LLM Microservice]):::blue
|
||||
TTS([TTS Microservice]):::blue
|
||||
animation([Animation Microservice]):::blue
|
||||
end
|
||||
subgraph UserInterface["User Interface"]
|
||||
direction LR
|
||||
invis1[ ]:::invisible
|
||||
USER1([User Audio Query]):::orchid
|
||||
USER2([User Image/Video Query]):::orchid
|
||||
UI([UI server<br>]):::orchid
|
||||
end
|
||||
GW([AvatarChatbot GateWay<br>]):::orange
|
||||
subgraph .
|
||||
direction LR
|
||||
X([OPEA Microservice]):::blue
|
||||
Y{{Open Source Service}}:::thistle
|
||||
Z([OPEA Gateway]):::orange
|
||||
Z1([UI]):::orchid
|
||||
end
|
||||
|
||||
WHISPER{{Whisper service}}:::thistle
|
||||
TGI{{LLM service}}:::thistle
|
||||
T5{{Speecht5 service}}:::thistle
|
||||
WAV2LIP{{Wav2Lip service}}:::thistle
|
||||
|
||||
%% Connections %%
|
||||
direction LR
|
||||
USER1 -->|1| UI
|
||||
UI -->|2| GW
|
||||
GW <==>|3| AvatarChatbot-Megaservice
|
||||
ASR ==>|4| LLM ==>|5| TTS ==>|6| animation
|
||||
|
||||
direction TB
|
||||
ASR <-.->|3'| WHISPER
|
||||
LLM <-.->|4'| TGI
|
||||
TTS <-.->|5'| T5
|
||||
animation <-.->|6'| WAV2LIP
|
||||
|
||||
USER2 -->|1| UI
|
||||
UI <-.->|6'| WAV2LIP
|
||||
```
|
||||
|
||||
## Deploy AvatarChatbot Service
|
||||
|
||||
The AvatarChatbot service can be deployed on either Intel Gaudi2 AI Accelerator or Intel Xeon Scalable Processor.
|
||||
|
||||
### Deploy AvatarChatbot on Gaudi
|
||||
|
||||
Refer to the [Gaudi Guide](./docker_compose/intel/hpu/gaudi/README.md) for instructions on deploying AvatarChatbot on Gaudi, and on setting up an UI for the application.
|
||||
|
||||
### Deploy AvatarChatbot on Xeon
|
||||
|
||||
Refer to the [Xeon Guide](./docker_compose/intel/cpu/xeon/README.md) for instructions on deploying AvatarChatbot on Xeon.
|
||||
|
||||
## Supported Models
|
||||
|
||||
### ASR
|
||||
|
||||
The default model is [openai/whisper-small](https://huggingface.co/openai/whisper-small). It also supports all models in the Whisper family, such as `openai/whisper-large-v3`, `openai/whisper-medium`, `openai/whisper-base`, `openai/whisper-tiny`, etc.
|
||||
|
||||
To replace the model, please edit the `compose.yaml` and add the `command` line to pass the name of the model you want to use:
|
||||
|
||||
```yaml
|
||||
services:
|
||||
whisper-service:
|
||||
...
|
||||
command: --model_name_or_path openai/whisper-tiny
|
||||
```
|
||||
|
||||
### TTS
|
||||
|
||||
The default model is [microsoft/SpeechT5](https://huggingface.co/microsoft/speecht5_tts). We currently do not support replacing the model. More models under the commercial license will be added in the future.
|
||||
|
||||
### Animation
|
||||
|
||||
The default model is [Rudrabha/Wav2Lip](https://github.com/Rudrabha/Wav2Lip) and [TencentARC/GFPGAN](https://github.com/TencentARC/GFPGAN). We currently do not support replacing the model. More models under the commercial license such as [OpenTalker/SadTalker](https://github.com/OpenTalker/SadTalker) will be added in the future.
|
||||
BIN
AvatarChatbot/assets/audio/eg3_ref.wav
Normal file
3
AvatarChatbot/assets/audio/sample_minecraft.json
Normal file
3
AvatarChatbot/assets/audio/sample_question.json
Normal file
4
AvatarChatbot/assets/audio/sample_whoareyou.json
Normal file
BIN
AvatarChatbot/assets/img/UI.png
Normal file
|
After Width: | Height: | Size: 595 KiB |
BIN
AvatarChatbot/assets/img/avatar1.jpg
Normal file
|
After Width: | Height: | Size: 148 KiB |
BIN
AvatarChatbot/assets/img/avatar2.jpg
Normal file
|
After Width: | Height: | Size: 158 KiB |
BIN
AvatarChatbot/assets/img/avatar3.png
Normal file
|
After Width: | Height: | Size: 2.5 MiB |
BIN
AvatarChatbot/assets/img/avatar4.png
Normal file
|
After Width: | Height: | Size: 992 KiB |
BIN
AvatarChatbot/assets/img/avatar5.png
Normal file
|
After Width: | Height: | Size: 1.7 MiB |
BIN
AvatarChatbot/assets/img/avatar6.png
Normal file
|
After Width: | Height: | Size: 1.6 MiB |
BIN
AvatarChatbot/assets/img/design.png
Normal file
|
After Width: | Height: | Size: 169 KiB |
BIN
AvatarChatbot/assets/img/flowchart.png
Normal file
|
After Width: | Height: | Size: 121 KiB |
BIN
AvatarChatbot/assets/img/gaudi.png
Normal file
|
After Width: | Height: | Size: 47 KiB |
BIN
AvatarChatbot/assets/img/opea_gh_qr.png
Normal file
|
After Width: | Height: | Size: 20 KiB |
BIN
AvatarChatbot/assets/img/opea_qr.png
Normal file
|
After Width: | Height: | Size: 25 KiB |
BIN
AvatarChatbot/assets/img/xeon.jpg
Normal file
|
After Width: | Height: | Size: 22 KiB |